From e955285ed22d58a52c5a029db2db8a91bf9fd5cd Mon Sep 17 00:00:00 2001 From: Sushant Nair Date: Tue, 28 Jan 2025 23:25:03 +0530 Subject: [PATCH 1/8] Add files via upload --- .../121_C2_RL_EXPT_3_SEM_VII.ipynb | 884 ++++++++++++++++++ 1 file changed, 884 insertions(+) create mode 100644 chapter_05_monte_carlo_methods/121_C2_RL_EXPT_3_SEM_VII.ipynb diff --git a/chapter_05_monte_carlo_methods/121_C2_RL_EXPT_3_SEM_VII.ipynb b/chapter_05_monte_carlo_methods/121_C2_RL_EXPT_3_SEM_VII.ipynb new file mode 100644 index 0000000..24d109a --- /dev/null +++ b/chapter_05_monte_carlo_methods/121_C2_RL_EXPT_3_SEM_VII.ipynb @@ -0,0 +1,884 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JuDWA5loOsFM", + "outputId": "b9cdd35b-2703-4828-ad14-494bafe11476" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting gymnasium\n", + " Downloading gymnasium-0.29.1-py3-none-any.whl.metadata (10 kB)\n", + "Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (1.26.4)\n", + "Requirement already satisfied: cloudpickle>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (2.2.1)\n", + "Requirement already satisfied: typing-extensions>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (4.12.2)\n", + "Collecting farama-notifications>=0.0.1 (from gymnasium)\n", + " Downloading Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)\n", + "Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)\n", + "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/953.9 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[90m╺\u001b[0m\u001b[90m━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m501.8/953.9 kB\u001b[0m \u001b[31m14.6 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m953.9/953.9 kB\u001b[0m \u001b[31m17.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)\n", + "Installing collected packages: farama-notifications, gymnasium\n", + "Successfully installed farama-notifications-0.0.4 gymnasium-0.29.1\n" + ] + } + ], + "source": [ + "# @title Installing Dependencies\n", + "\n", + "!pip install gymnasium" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "cbt-NEiMO1bA" + }, + "outputs": [], + "source": [ + "# @title Importing Dependencies\n", + "\n", + "import random\n", + "import numpy as np\n", + "import gymnasium as gym\n", + "from gym import error, spaces, utils\n", + "from gym.utils import seeding" + ] + }, + { + "cell_type": "code", + "source": [ + "# @title Set up the game\n", + "# Define the rank and suit of a card\n", + "\n", + "import enum\n", + "\n", + "ranks = {\n", + " \"two\": 2,\n", + " \"three\": 3,\n", + " \"four\": 4,\n", + " \"five\": 5,\n", + " \"six\": 6,\n", + " \"seven\": 7,\n", + " \"eight\": 8,\n", + " \"nine\": 9,\n", + " \"ten\": 10,\n", + " \"jack\": 10,\n", + " \"queen\": 10,\n", + " \"king\": 10,\n", + " \"ace\": (1, 11),\n", + "}\n", + "\n", + "class Suit(enum.Enum):\n", + " spades = \"spades\"\n", + " clubs = \"clubs\"\n", + " diamonds = \"diamonds\"\n", + " hearts = \"hearts\"\n", + "\n", + "# Now to define the card and deck\n", + "# Implement the shuffle, peek and deal functions for the deck\n", + "\n", + "class Card:\n", + " def __init__(self, suit, rank, value):\n", + " self.suit = suit\n", + " self.rank = rank\n", + " self.value = value\n", + "\n", + " def __str__(self):\n", + " return self.rank + \" of \" + self.suit.value\n", + "\n", + "class Deck:\n", + " def __init__(self, num=1):\n", + " self.cards = []\n", + " for i in range(num):\n", + " for suit in Suit:\n", + " for rank, value in ranks.items():\n", + " self.cards.append(Card(suit, rank, value))\n", + "\n", + " def shuffle(self):\n", + " random.shuffle(self.cards)\n", + "\n", + " def deal(self):\n", + " return self.cards.pop(0)\n", + "\n", + " def peek(self):\n", + " if len(self.cards) > 0:\n", + " return self.cards[0]\n", + "\n", + " def add_to_bottom(self, card):\n", + " self.cards.append(card)\n", + "\n", + " def __str__(self):\n", + " result = \"\"\n", + " for card in self.cards:\n", + " result += str(card) + \"\\n\"\n", + " return result\n", + "\n", + " def __len__(self):\n", + " return len(self.cards)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "D3df2N-JrKiE", + "outputId": "a36dc6fc-51b2-4fb0-bfb5-380e143cc4b4" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n", + " and should_run_async(code)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# @title Define the logic for evaluating the value of the player's hand\n", + "'''\n", + "An ace is used as 11 whenever possible. It is 1 only if the other option would make the player bust.\n", + "'''\n", + "\n", + "def player_eval(player_hand):\n", + " num_ace = 0\n", + " # use_one means that every ace in the hand is counted as one\n", + " use_one = 0\n", + " for card in player_hand:\n", + " if card.rank == \"ace\":\n", + " num_ace += 1\n", + " use_one += card.value[0] # use 1 for Ace\n", + " else:\n", + " use_one += card.value\n", + "\n", + " if num_ace > 0:\n", + " # Define player policy for Aces:\n", + " # Make Aces 11 if they get you to the range [18, 21]\n", + " # Otherwise, use one.\n", + "\n", + " ace_counter = 0\n", + " while ace_counter < num_ace:\n", + " # Only add by 10 because 1 is already added before\n", + " use_eleven = use_one + 10\n", + " if use_eleven > 21:\n", + " return use_one\n", + " elif use_eleven >= 18 and use_eleven <= 21:\n", + " return use_eleven\n", + " else:\n", + " # This allows for some Aces to be 11 and others to be 1.\n", + " use_one = use_eleven\n", + " ace_counter += 1\n", + " return use_one\n", + " else:\n", + " return use_one" + ], + "metadata": { + "id": "SWrENBc-vEUL" + }, + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title Define the logic for evaluating the value of the dealer's hand\n", + "\n", + "# This follows the same, official rules every time.\n", + "# Still there is a need to figure out what happens if there are multiple Aces.\n", + "def dealer_eval(player_hand):\n", + " num_ace = 0\n", + " use_one = 0\n", + " for card in player_hand:\n", + " if card.rank == \"ace\":\n", + " num_ace += 1\n", + " use_one += card.value[0] # use 1 for Ace\n", + " else:\n", + " use_one += card.value\n", + "\n", + " if num_ace > 0:\n", + " # See if using 11 instead of 1 for the Ace gets the dealer's hand value closer to the [17, 21] range.\n", + " # The dealer will follow Hard 17 rules. This means that the dealer will not hit again if the Ace yields a 17.\n", + " # This also means that Aces initially declared as 11 can be changed to 1 as new cards come.\n", + "\n", + " ace_counter = 0\n", + " while ace_counter < num_ace:\n", + " # Only add 10 because 1 is already added before\n", + " use_eleven = use_one + 10\n", + "\n", + " if use_eleven > 21:\n", + " return use_one\n", + " elif use_eleven >= 17 and use_eleven <= 21:\n", + " return use_eleven\n", + " else:\n", + " # The case where even using Ace as eleven is less than 17.\n", + " use_one = use_eleven\n", + " ace_counter += 1\n", + " return use_one\n", + " else:\n", + " return use_one" + ], + "metadata": { + "id": "YzDYaCCVx3AH" + }, + "execution_count": 5, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title Define logic for the dealer's turn\n", + "\n", + "def dealer_turn(dealer_hand, deck):\n", + " # Calculate the dealer hand's value.\n", + " dealer_value = dealer_eval(dealer_hand)\n", + "\n", + " # Define dealer policy (which is fixed to the official rules)\n", + " # The dealer keeps hitting until their total is 17 or more\n", + " while dealer_value < 17:\n", + " dealer_hand.append(deck.deal())\n", + " dealer_value = dealer_eval(dealer_hand)\n", + "\n", + " return dealer_value, dealer_hand, deck" + ], + "metadata": { + "id": "YetM4Xsixlrf" + }, + "execution_count": 6, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title Define the OpenAI Gym Environment for Blackjack\n", + "\n", + "INITIAL_BALANCE = 1000\n", + "NUM_DECKS = 6\n", + "\n", + "class BlackjackEnv(gym.Env):\n", + " metadata = {'render.modes': ['human']}\n", + "\n", + " def __init__(self):\n", + " super(BlackjackEnv, self).__init__()\n", + "\n", + " # Initialize the blackjack deck\n", + " self.bj_deck = Deck(NUM_DECKS)\n", + "\n", + " self.player_hand = []\n", + " self.dealer_hand = []\n", + "\n", + " self.reward_options = {\"lose\": -100, \"tie\": 0, \"win\": 100}\n", + "\n", + " self.action_space = spaces.Discrete(2)\n", + "\n", + " '''\n", + " First element of tuple is the range of possible hand values for the player (3 through 20). This is the possible range of values that the player will actually have to make a decision for. Any player hand value 21 or above already has automatic valuations, and needs no input from an AI agent.\n", + " However, we also need to add all the hand values that the agent could possibly end up in when they bust. Maybe the agent can glean some correlations based on what hand value they bust at, so this should be in the observation space. Also, the layout of OpenAI Gym environment class makes us have to include the bust-value in the step() function because we need to return that done is true alongside the final obs, which is the bust-value.\n", + " '''\n", + " # Second element of the tuple is the range of possible values for the dealer's upcard (2 through 11)\n", + " self.observation_space = spaces.Tuple((spaces.Discrete(18), spaces.Discrete(10)))\n", + " self.done = False\n", + "\n", + " def _take_action(self, action):\n", + " if action == 0: #hit\n", + " self.player_hand.append(self.bj_deck.deal())\n", + " # recalculate the value of the player's hand after any changes to the hand\n", + " self.player_value = player_eval(self.player_hand)\n", + "\n", + " def step(self, action):\n", + " self._take_action(action)\n", + "\n", + " # End the episode/game if the player stands or has a hand value >= 21.\n", + " self.done = action == 1 or self.player_value >= 21\n", + "\n", + " # rewards are 0 when the player hits and is still below 21, and they keep playing\n", + " rewards = 0\n", + "\n", + " if self.done:\n", + " # Calculate rewards\n", + " if self.player_value > 21:\n", + " rewards = self.reward_options['lose']\n", + " elif self.player_value == 21:\n", + " rewards = self.reward_options['win']\n", + " else:\n", + " # begin the dealer turn phase\n", + " dealer_value, self.dealer_hand, self.bj_deck = dealer_turn(self.dealer_hand, self.bj_deck)\n", + " # End the dealer turn phase\n", + "\n", + " # Final comparison\n", + " if dealer_value > 21:\n", + " rewards = self.reward_options['win']\n", + " elif dealer_value == 21:\n", + " rewards = self.reward_options['lose']\n", + " else:\n", + " # both dealer and player value is less than 21\n", + " if self.player_value > dealer_value:\n", + " rewards = self.reward_options['win']\n", + " elif self.player_value < dealer_value:\n", + " rewards = self.reward_options['lose']\n", + " else:\n", + " rewards = self.reward_options['tie']\n", + " self.balance += rewards\n", + " # Subtract by 1 to fit into the possible observation range.\n", + " # This makes the possible range of 3 through 20 into 1 through 18.\n", + " player_value_obs = self.player_value - 2\n", + " # Get the value of the dealer's upcard; this value is what the agent sees.\n", + " # Subtract by 1 to fit into the possible observation range of 1 to 10\n", + " upcard_value_obs = dealer_eval([self.dealer_upcard]) - 1\n", + " # The state is represented as a player hand-value + dealer upcard pair.\n", + " obs = np.array([player_value_obs, upcard_value_obs])\n", + " return obs, rewards, self.done, {}\n", + "\n", + " def reset(self):\n", + " # reset the game to an initial state\n", + " # add the player and dealer cards back into the deck\n", + " self.bj_deck.cards += self.player_hand + self.dealer_hand\n", + "\n", + " # shuffle before beginning. Only shuffle once before the start of each game.\n", + " self.bj_deck.shuffle()\n", + " self.balance = INITIAL_BALANCE\n", + " self.done = False\n", + "\n", + " # returns the start stage for the agent\n", + " # deal 2 cards to the agent and the dealer\n", + " self.player_hand = [self.bj_deck.deal(), self.bj_deck.deal()]\n", + " self.dealer_hand = [self.bj_deck.deal(), self.bj_deck.deal()]\n", + " self.dealer_upcard = self.dealer_hand[0]\n", + "\n", + " # calculate the value of the agent's hand\n", + " self.player_value = player_eval(self.player_hand)\n", + "\n", + " # subtract by 2 to fit into the possible observation range. This makes the possible range of 3 through 20 into 1 through 18\n", + " player_value_obs = self.player_value - 2\n", + "\n", + " # get the value of the dealer's upcard; this value is what the agent sees. Subtract by 1 to fit the possible observation range of 1 to 10.\n", + " upcard_value_obs = dealer_eval([self.dealer_upcard]) - 1\n", + "\n", + " # the state is represented as a player hand-value + dealer upcard pair.\n", + " obs = np.array([player_value_obs, upcard_value_obs])\n", + " return obs\n", + "\n", + " def render(self, mode='human', close=False):\n", + " # convert the player hand into a format that is easy to read and understand.\n", + " hand_list = []\n", + " for card in self.player_hand:\n", + " hand_list.append(card.rank)\n", + "\n", + " # recalculate the value of the dealer upcard.\n", + " upcard_value = dealer_eval([self.dealer_upcard])\n", + "\n", + " print(f'Balance: {self.balance}')\n", + " print(f'Player Hand: {hand_list}')\n", + " print(f'Dealer Upcard: {upcard_value}')\n", + " print(f'Done: {self.done}')" + ], + "metadata": { + "id": "am91D0LgqNnf" + }, + "execution_count": 7, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title Test the OpenAI Gym Blackjack Environment\n", + "\n", + "env = BlackjackEnv()\n", + "\n", + "total_rewards = 0\n", + "NUM_EPISODES = 1000\n", + "\n", + "for _ in range(NUM_EPISODES):\n", + " env.reset()\n", + " episode_reward = 0\n", + " while env.done == False:\n", + " action = env.action_space.sample()\n", + " new_state, reward, done, desc = env.step(action)\n", + " episode_reward += reward\n", + " total_rewards += episode_reward\n", + "\n", + "avg_reward = total_rewards / NUM_EPISODES\n", + "print(f'Average Reward: {avg_reward}')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ttzs7UQb2qCP", + "outputId": "94c0576f-abdf-49ff-eead-38312653e263" + }, + "execution_count": 8, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Average Reward: -32.2\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# @title A function to map a state from the Blackjack environment to the proper index i Q and prob tables\n", + "\n", + "'''\n", + "Given a state, derive the corresponding index in the Q-table. This state is a player hand value and dealer upcard pair, so the hashing formula must be used to allocate the indices of the Q-table properly.\n", + "'''\n", + "def get_Q_state_index(state):\n", + " '''\n", + " The player value is already subtracted by 1 in the env when it returns the state. Subtract by 1 again to fir with the array indexing that starts at 0.\n", + " '''\n", + " initial_player_value = state[0] - 1\n", + " '''\n", + " The upcard value is already subtracted by 1 in the env when it returns the state. dealer_upcard will be subtracted by 1 to fit with the array indexing that starts at 0.\n", + " '''\n", + " dealer_upcard = state[1] - 1\n", + " return(env.observation_space[1].n * (initial_player_value)) + (dealer_upcard)" + ], + "metadata": { + "id": "etl-JD8lHlJB" + }, + "execution_count": 9, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title Functions to get and update the probability of taking the best action for a given state\n", + "def get_prob_of_best_action(env, state, Q, prob):\n", + " # Use the mappig function to figure out which index of Q corresponds to the player hand value and dealer upcard value that defines each state.\n", + " Q_state_index = get_Q_state_index(state)\n", + "\n", + " # Use this index in the Q 2-D array to get a 2-element array that yield the current Q-values for hitting (index 0) and standing (index 1) in this state. Use np.argmax() function to find the index of the action that yields the maximum rewards, i.e., the best action we are looking for.\n", + " best_action = np.argmax(Q[Q_state_index])\n", + "\n", + " # Retrieve the probability of the best action using the state/action pair as indices of the 'prob' array, which stores the probability of taking an action (hit or stand) for a given state/action pair.\n", + " return prob[Q_state_index][best_action]\n", + "\n", + "def update_prob_of_best_action(env, state, Q, prob, epsilon):\n", + " Q_state_index = get_Q_state_index(state)\n", + "\n", + " best_action = np.argmax(Q[Q_state_index])\n", + "\n", + " '''\n", + " Slightly alter the probability of this best action being taken by using epsilon. Epsilon starts at 1.0, and slowly decays over time. Therefore, as per the equation below, the AI agent will use the probability listed for the best action in the 'prob' table during the beginning of the algorithm. As time goes on, the likelihood that the best action is taken is increased from what is listed in the 'prob' table.\n", + " This allows for the exploration of other moves in the beginning of the algorithm, but exploitation later for a greater reward.\n", + " '''\n", + " prob[Q_state_index][best_action] = min(1, prob[Q_state_index][best_action] + 1 - epsilon)\n", + "\n", + " other_action = 1 if best_action == 0 else 0\n", + " prob[Q_state_index][other_action] = 1 - prob[Q_state_index][best_action]\n", + "\n", + " return prob" + ], + "metadata": { + "id": "qazyxoCbYLhz" + }, + "execution_count": 10, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title play_game() function\n", + "'''\n", + "Simulates one round of Blackjack.\n", + "\n", + "First, use env.reset() to set up a new round of Blackjack.\n", + "\n", + "If the player is already dealt a blackjack by luck, then this episode is just scrapped. There is nothing the AI agent can learn from this episode since it did not have to make any decisions.\n", + "\n", + "Otherwise, the AI agent finds the best action available given the current state of the game and its knowledge so far. This knowledge is stored in the Q-table defined in the run_mc() function. Then, the AI agent retrieves the probability that it should take this action from the prob table. The AI agent applies this probability and chooses its action given this state. The action is sent to the environment, and a reward is returned. This state-action-reward sequence that just occured is stored in episode variable as a tuple. This process is repeated until the current episode is over. Each episode will roughly rield 1-3 tuples in episode variable since Blackjack rounds are usually resolved after 1-3 decisions by the player.\n", + "\n", + "The Q-values in the Q-table associated with each state-action pair that was seen in this episode will be updated after this episode based on the state-action-reward tuples returned by this function. Then, the corresponding probabilities in prob variable are also modified to reflect this change in Q-values.\n", + "'''\n", + "\n", + "def play_game(env, Q, prob):\n", + " '''\n", + " Can contain numerous state->action->reward tuples bacause a round of Blackjack is not always resolved in one turn. However, there will be no state that has a player hand value that exceeds 20, since only initial states BEFORE actions are made are used when storing state->action->reward tuples.\n", + " '''\n", + " episode = []\n", + " state = env.reset()\n", + " while env.done == False:\n", + " if state[0] == 19:\n", + " # Player was dealt blackjack right from the beginning; player_value already subtracted by 2 to get state[0]\n", + " # No analysis done for this useless episode\n", + " next_state, reward, env.done, info = env.step(1)\n", + " else:\n", + " # Get the index in Q that corresponds to the current state\n", + " Q_state_index = get_Q_state_index(state)\n", + "\n", + " # Use the index to get the possible actions, and use np.argmax() to get the index of the action that has the highest current Q value. Index 0 is hit, 1 is stand.\n", + " best_action = np.argmax(Q[Q_state_index])\n", + "\n", + " # Go to the prob table to retrieve the probability of this action. This uses the same Q_state_index used for finding the state index of the Q-table.\n", + " prob_of_best_action = get_prob_of_best_action(env, state, Q, prob)\n", + "\n", + " action_to_take = None\n", + "\n", + " if random.uniform(0, 1) < prob_of_best_action:\n", + " # Take the best action\n", + " action_to_take = best_action\n", + " else:\n", + " # Take the other action\n", + " action_to_take = 1 if best_action == 0 else 0\n", + "\n", + " # The action is performed by the agent, and the next state, rewards and done information is returned.\n", + " next_state, reward, env.done, info = env.step(action_to_take)\n", + "\n", + " # Log the state->action->reward sequence\n", + " episode.append((state, action_to_take, reward))\n", + "\n", + " # Update the state for the next decision made by the agent.\n", + " state = next_state\n", + "\n", + " return episode" + ], + "metadata": { + "id": "HmFb1gcWFFcm" + }, + "execution_count": 11, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title update_Q() function\n", + "'''\n", + "This function iterates through the state-action-reward tuples in episode and updates the Q-values of the corresponding state-action pairs in Q.\n", + "'''\n", + "\n", + "def update_Q(env, episode, Q, alpha, gamma):\n", + " '''\n", + " This is where the algorithm hinges on being first visit or every visit. First visit will be used because if you want first-visit, you need to use the cumulative reward of the entire episode when updating a Q-value for ALL of the state/action pairs in the episode, even the first state/action pair. In this algorithm, an episode is a round of Blackjack. Although the bulk of the reward may come from the 2nd or 3rd decision, deciding to hit on the 1st decision is what enabled the future situations to even occur, so it is important to include the entire cumulative reward. We can reduce the impact of the rewards of the future decisions by lowering gamma, which will lower the G value for our early state/action pair in which we hit and did not get any immediate rewards. This will make our agent consider future rewards, and not just look at each state in isolation despite having hit previously.\n", + " If you want Every-Visit MC, do not use the cumulative rewards when updating Q-values and just use the immediate reward in this episode for each state/action pair.\n", + " '''\n", + " step = 0\n", + " for state, action, reward in episode:\n", + " # calculate the cumulative reward of taking this action in this state.\n", + " # Start from the immediate rewards, and use all the rewards from the subsequent states.\n", + " # Do not use rewards from previous states\n", + "\n", + " total_reward = 0\n", + " gamma_exp = 0\n", + " for curr_step in range(step, len(episode)):\n", + " curr_reward = episode[curr_step][2]\n", + " total_reward += (gamma ** gamma_exp) * curr_reward\n", + " gamma_exp += 1\n", + "\n", + " # Update the Q-value\n", + " Q_state_index = get_Q_state_index(state)\n", + " curr_Q_value = Q[Q_state_index][action]\n", + " Q[Q_state_index][action] = curr_Q_value + alpha * (total_reward - curr_Q_value)\n", + " # update step to start further down the episode the next time.\n", + " step += 1\n", + " return Q" + ], + "metadata": { + "id": "R08jdGkVbQD0" + }, + "execution_count": 12, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title update_prob() function\n", + "def update_prob(env, episode, Q, prob, epsilon):\n", + " for state, action, reward in episode:\n", + " '''\n", + " Update the probabilities of the actions that can be taken given the current state. The goal is that the new update in Q has changed what the best action is, and epsilon will be used to create a small increase in the probability that the new, better action is chosen.\n", + " '''\n", + " prob = update_prob_of_best_action(env, state, Q, prob, epsilon)\n", + " return prob" + ], + "metadata": { + "id": "4UA7u4nWdRFL" + }, + "execution_count": 13, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title First-Visit Monte Carlo Algorithm\n", + "# run_mc() to run the First Visit Monte Carlo Algorithm\n", + "'''\n", + "This function initializes the key variables for the algorithm and runs the learning algorithm for the AI agent for num_episodes episodes. An episode is a simulation of one game of Blackjack using the OpenAI Gym environment defined above.\n", + "Q, or the Q-table, is a 2D list in which the rows are the different possible states and the columns are the different possible actions for each state. The values stored in each cell refer to the value, or Q-value, of selecting some action (the column index) given some state (the row index). These Q-values start at 0 for every state-action pair, and are updated by the update_Q() function after each episode to reflect the rewards received in an episode where the state-action pair occured. All Q-values initially start at 0.\n", + "\n", + "prob, or the prob table, has the same structure as Q, but the cell values refer to the probability of selecting some action (the column index) given some state (the row index). These probabilites are updated by the update_prob() function after the Q-values are updated after each episode. All action probabilities start at 0.5 (or 50%).\n", + "\n", + "alpha defines the weight given to each new change in Q-value within the update-Q() function. A smaller alpha means that a new reward logged in an episode for some state-action pair has less impact on the current Q-value for that state-action pair. The converse is also true. Therefore, alpha essentially defines how fast the AI agent learns (learning rate).\n", + "\n", + "epsilon defines the weight given to each new change in the action probabilities within the update_prob() function. A larger epsilon reduces the precent amount by which an action probability is changed after some change in Q-values. The converse is also true. An epsilon of 1 means that no change will occur in the action probability, regardless of the magnitude of changes in Q-values.\n", + "\n", + "epsilon is decayed by the decay value after every episode. The lowest value epsilon can reach is epsilon_min.\n", + "\n", + "gamma is the rate used to discount future rewards yielded by a certain state-action pair in the episode. Since a round (or episode) of Blackjack can have more than 1 decision made, there can be numerous state-action pairs that are seen in one episode. However, only the final decision (or state-action pair) yields an immediate reward from the environment. All previous state-action pairs had no rewards. So, the final reward must be used to modify the Q-values of the earlier state-action pairs. Since the final reward was only partially made possible by the eariler state-action pairs, the final reward is discounted using gamma to account for this.\n", + "\n", + "Once these important variables are defines, this function runs the First-Visit Monte Carlo algorithm for the Blackjack environment.\n", + "\n", + "The function runs num_episodes episodes.\n", + "\n", + "In each episode, epsilon is first decayed by the decay rate.\n", + "\n", + "Then, the game of Blackjack is played out throught the play_game() function. This function returns a list of the state-action-reward tuples that occured during the game. These tuples represent the actions the AI agent had to take given some state, and the rewards that resulted.\n", + "\n", + "These tuples are used to modify the Q-values in Q through the update_Q() function.\n", + "\n", + "Then, the tuples are used to modify the probability distributions in prob of the two actions (hit or stand) for any states that were encountered in the episode. This is done through the update_prob() function.\n", + "\n", + "After this process is done for each episode, the function returns the modified Q and prob tables. These tables are an imprint of the learning that has taken place by the AI agent through the First-Visit Monte Carlo algorithm.\n", + "'''\n", + "\n", + "def run_mc(env, num_episodes):\n", + " '''\n", + " observation_space[0] is the 18 possible player values. (3-20)\n", + " observation_space[1] is the 10 possible dealer upcards. (2-11)\n", + " Combining these togenther yields all possible states.\n", + " Multiplying this with hit/stand yields all possible state-action pairs.\n", + " This is the Q map.\n", + " '''\n", + " Q = np.zeros([env.observation_space[0].n * env.observation_space[1].n, env.action_space.n], dtype=np.float16)\n", + "\n", + " # This map contains the probability distributions for each action (hit or stand) given a state. The state (combination of player hand value and dealer upcard value) index in this array yields a 2-element array. The 0th index of this 2-element array refers to the probability of hit and the 1st index is the probability of stand.\n", + " prob = np.zeros([env.observation_space[0].n * env.observation_space[1].n, env.action_space.n], dtype=np.float16) + 0.5\n", + "\n", + " # The learning rate; very small to avoid making quick, large changes in our policy.\n", + " alpha = 0.001\n", + "\n", + " epsilon = 1\n", + "\n", + " # The rate by which epsilon will decay over time.\n", + " # Since the probability that decides which option has the highest Q-value is 1 - epsilon + probability, this decay will make sure that the better option is taken more often in the long run.\n", + " # This allows the algorithm to explore in the early stages and exploit in the later stages.\n", + " decay = 0.9999\n", + "\n", + " # The lowest value that epsilon can go to. Although the decay seems slow, it grows exponentially, and this is magnified when running thousands of episodes.\n", + " epsilon_min = 0.9\n", + "\n", + " gamma = 0.8\n", + "\n", + " for _ in range(num_episodes):\n", + " episode = play_game(env, Q, prob)\n", + "\n", + " epsilon = max(epsilon * decay, epsilon_min)\n", + "\n", + " Q = update_Q(env, episode, Q, alpha, gamma)\n", + "\n", + " prob = update_prob(env, episode, Q, prob, epsilon)\n", + "\n", + " return Q, prob" + ], + "metadata": { + "id": "PnRTa7FZ3RnJ" + }, + "execution_count": 14, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title Run First-Visit Monte Carlo Reinforcement Learning Algorithm\n", + "import time\n", + "env = BlackjackEnv()\n", + "\n", + "start_time = time.time()\n", + "new_Q, new_prob = run_mc(env, 1000000)\n", + "end_time = time.time()\n", + "\n", + "print(f'Total time for learning: {end_time - start_time} s.')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "UKx3psLBd3Yl", + "outputId": "2181c312-d684-4525-bb48-ed723de63333" + }, + "execution_count": 15, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Total time for learning: 230.13713669776917 s.\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# @title best_policy() function\n", + "'''\n", + "This function takes the new Q-table returned and keeps only the action that yields the highest value for a given state. This is represented in three different ways: binary, string and colors.\n", + "0 is hit and 1 is stand in the binary representation of the results.\n", + "H is hit and S is stand in the string representation of the results.\n", + "Green is hit and Red is stand in the color representation of the results.\n", + "'''\n", + "def best_policy(Q):\n", + " best_policy_binary = []\n", + " best_policy_string = []\n", + " best_policy_colors = []\n", + " for i in range(len(Q)):\n", + " best_policy_binary.append(np.argmax(Q[i]))\n", + " best_policy_string.append('H' if np.argmax(Q[i]) == 0 else 'S')\n", + " best_policy_colors.append('g' if np.argmax(Q[i]) == 0 else 'r')\n", + "\n", + " return best_policy_binary, best_policy_string, best_policy_colors" + ], + "metadata": { + "id": "MNbxnlFKeRgQ" + }, + "execution_count": 20, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title Create DataFrame with Player Value as Rows and Dealer Upcard as Columns\n", + "import pandas as pd\n", + "new_Q_binary, new_Q_string, new_Q_colors = best_policy(new_Q)\n", + "\n", + "df = pd.DataFrame(columns = range(2, 12))\n", + "\n", + "color_df = pd.DataFrame(columns = range(2, 12))\n", + "\n", + "for s in range(3, 21):\n", + " # possible player values in the range 3 to 20\n", + " start = env.observation_space[1].n * (s - 3)\n", + " end = start + 10\n", + " df.loc[s] = (new_Q_string[start:end])\n", + " color_df.loc[s] = (new_Q_colors[start:end])" + ], + "metadata": { + "id": "lZIV2WxWfR9z" + }, + "execution_count": 21, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title Create Chart Graphic for the Results\n", + "import matplotlib.pyplot as plt\n", + "\n", + "fig, ax = plt.subplots()\n", + "\n", + "# hide axes\n", + "fig.patch.set_visible(False)\n", + "ax.set_axis_off()\n", + "ax.axis('tight')\n", + "\n", + "ax.table(cellText=df.values, cellColours=color_df.values, cellLoc='center', rowLabels=df.index, colLabels=df.columns, loc='center')\n", + "\n", + "fig.tight_layout()\n", + "\n", + "plt.show()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 487 + }, + "id": "wR8JVHEef5Cf", + "outputId": "02c77629-9a81-4150-9561-c0255ad152ea" + }, + "execution_count": 23, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAnYAAAHWCAYAAAD6oMSKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABM9UlEQVR4nO3de3xU9Z3/8dcM4VaI2HALUw0GbWGgQklVjNoCFcEsjSgsFhdtaNS2FmuRrW3ht+MdEfZRpRcWqnWBNhUftZVIbZBV5GYkSrCp0mYh0FQqKggl0kQllDm/P06SJSGQ5OScOTPf834+HvOAnMye+bx2Evz2nJk5IcuyEBEREZHUF/Z7ABERERFxhxZ2IiIiIobQwk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImIILexEREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG0MJORERExBBa2ImIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIIbSwExERETGEFnYiIiIihtDCTkRERMQQWtiJiIiIGEILOxERERFDaGEnIiIiYggt7EREREQMoYWdiIiIiCG0sBMRERExhBZ2IiIiIobQwk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImIILexEREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG0MJORERExBBa2ImIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIIdL8fPBQKJQF9PNzhgTqBtT7PUSCqNVMajVTUFqD0glqNdUhy7L2tXUn3xZ2oVAoKxwO74rH4z38miGRwuEw8Xjc7zESQq1mUquZgtIalE5Qq6nC4fDHoVBoaFuLOz+P2PWLx+M9ioqKiEajPo7hvZKSEmKxGGo1i1rNpFbzBKUT1GqqyspKbrzxxh7YZzmTdmEHQDQaJScnx+8xPFVZWQmo1TRqNZNazROUTlCr6M0TIiIiIsYIxMJu4cKFXHzxxaSnpzNgwACuvfZadu3a5fdYnli2bBkjR47krLPO4qyzziI3N5d169b5PVZCPPzww4RCIebMmeP3KK679957CYVCzW7Dhg3zeyzP7N+/nxtvvJG+ffvSs2dPLrzwQsrLy/0ey3XnnXfeKc9rKBRi9uzZfo/muhMnThCLxcjOzqZnz56cf/75PPDAA1iW5fdorvvHP/7BnDlzGDx4MD179uSyyy5j+/btfo/VaVu2bCE/P59IJEIoFKK4uLjZ9y3L4u6772bQoEH07NmTCRMmUFVV5c+wndRW6zPPPMPEiRPp27cvoVCIiooKX+ZsTSAWdps3b2b27NmUlZXxwgsvcPz4cSZOnEhdXZ3fo7nunHPO4eGHH2bHjh2Ul5fzpS99iSlTpvCnP/3J79E8tX37dn72s58xcuRIv0fxzIgRI3j33Xebbi+//LLfI3niyJEjXH755XTt2pV169bx5z//mR/+8Id88pOf9Hs0123fvr3Zc/rCCy8AMH36dJ8nc9+iRYtYtmwZP/3pT6msrGTRokUsXryYn/zkJ36P5rpbbrmFF154gV/+8pe8+eabTJw4kQkTJrB//36/R+uUuro6Ro0axdKlS1v9/uLFi/nxj3/M8uXLefXVV+nVqxeTJk3i448/TvCknddWa11dHVdccQWLFi1K8GRt8/01donw/PPPN/t65cqVDBgwgB07dvDFL37Rp6m8kZ+f3+zrBQsWsGzZMsrKyhgxYoRPU3mrtraWmTNn8vjjj/Pggw/6PY5n0tLSyMzM9HsMzy1atIhzzz2XFStWNG3Lzs72cSLv9O/fv9nXDz/8MOeffz5jx471aSLvvPLKK0yZMoXJkycD9tHK1atX89prr/k8mbs++ugjfvvb3/Lss882/ffl3nvv5Xe/+x3Lli1L6X+j8vLyyMvLa/V7lmWxZMkS/uM//oMpU6YA8Itf/IKBAwdSXFzMjBkzEjlqp52pFeCmm24C4K9//WuCJmq/QByxa+mDDz4AICMjw+dJvHXixAmeeuop6urqyM3N9Xscz8yePZvJkyczYcIEv0fxVFVVFZFIhCFDhjBz5kz27Wvz44xS0tq1a7nooouYPn06AwYMYPTo0Tz++ON+j+W5+vp6ioqKKCwsJBQK+T2O6y677DI2bNjA7t27AfjjH//Iyy+/fMb/eKaif/7zn5w4cYIePZp/klfPnj2NPcoOUF1dzXvvvdfs3+E+ffowZswYtm3b5uNkwROII3Yni8fjzJkzh8svv5zPfvazfo/jiTfffJPc3Fw+/vhjevfuzZo1axg+fLjfY3niqaee4vXXXzfi9StnMmbMGFauXMnQoUN59913ue+++/jCF77Azp07SU9P93s8V/3lL39h2bJlzJ07l/nz57N9+3buuOMOunXrRkFBgd/jeaa4uJiamhpmzZrl9yie+MEPfsDRo0cZNmwYXbp04cSJEyxYsICZM2f6PZqr0tPTyc3N5YEHHiAajTJw4EBWr17Ntm3buOCCC/wezzPvvfceAAMHDmy2feDAgU3fk8QI3MJu9uzZ7Ny50+j/5TR06FAqKir44IMP+M1vfkNBQQGbN282bnH3t7/9je985zu88MILp/yvY9OcfFRj5MiRjBkzhsGDB/PrX/+am2++2cfJ3BePx7nooot46KGHABg9ejQ7d+5k+fLlRi/snnjiCfLy8ohEIn6P4olf//rX/OpXv+LJJ59kxIgRVFRUMGfOHCKRiHHP6y9/+UsKCwv51Kc+RZcuXcjJyeGGG25gx44dfo8mARCoU7G33347zz33HBs3buScc87xexzPdOvWjQsuuIDPf/7zLFy4kFGjRvGjH/3I77Fct2PHDg4ePEhOTg5paWmkpaWxefNmfvzjH5OWlsaJEyf8HtEzZ599Np/5zGfYs2eP36O4btCgQaf8j5BoNGrsqWeAt956ixdffJFbbrnF71E8c9ddd/GDH/yAGTNmcOGFF3LTTTdx5513snDhQr9Hc93555/P5s2bqa2t5W9/+xuvvfYax48fZ8iQIX6P5pnG1/8eOHCg2fYDBw4E4rXBySQQCzvLsrj99ttZs2YNL730krEvxD6deDzOsWPH/B7DdVdeeSVvvvkmFRUVTbeLLrqImTNnUlFRQZcuXfwe0TO1tbXs3buXQYMG+T2K6y6//PJTPo5o9+7dDB482KeJvLdixQoGDBjQ9MYCE3344YeEw83/k9OlSxejLwfVq1cvBg0axJEjR1i/fn3TmwpMlJ2dTWZmJhs2bGjadvToUV599VWjX+OdjAJxKnb27Nk8+eSTPPvss6Snpzed7+/Tpw89e/b0eTp3zZs3j7y8PLKysvjHP/7Bk08+yaZNm1i/fr3fo7kuPT39lNdJ9urVi759+xr3+snvfve75OfnM3jwYN555x3uueceunTpwg033OD3aK678847ueyyy3jooYe4/vrree2113jsscd47LHH/B7NE/F4nBUrVlBQUEBamrn/JOfn57NgwQKysrIYMWIEf/jDH3jkkUcoLCz0ezTXrV+/HsuyGDp0KHv27OGuu+5i2LBhfO1rX/N7tE6pra1tdpagurqaiooKMjIyyMrKYs6cOTz44IN8+tOfJjs7m1gsRiQS4dprr/VvaIfaav373//Ovn37eOeddwCa/sdoZmam70cozf1X5CTLli0DYNy4cc22r1ixwrgXKh88eJCvfvWrvPvuu/Tp04eRI0eyfv16rrrqKr9Hk054++23ueGGGzh8+DD9+/fniiuuoKys7JSPyzDBxRdfzJo1a5g3bx73338/2dnZLFmyxLgX2Td68cUX2bdvn5ELnJP95Cc/IRaL8a1vfYuDBw8SiUT4xje+wd133+33aK774IMPmDdvHm+//TYZGRlMmzaNBQsW0LVrV79H65Ty8nLGjx/f9PXcuXMBKCgoYOXKlXzve9+jrq6Or3/969TU1HDFFVfw/PPPp+RroNtqXbt2bbOFeuPHudxzzz3ce++9CZ21pUAs7Ez8ZPPTeeKJJ/wewVebNm3yewRPPPXUU36PkFBf/vKX+fKXv+z3GAkxceLEQPwblZ6ezpIlS1iyZInfo3ju+uuv5/rrr/d7DNeNGzfujD+roVCI+++/n/vvvz+BU3mjrdZZs2Yl7YGhQLzGTkRERCQItLATERERMYQWdiIiIiKG0MJORERExBBa2ImIiIgYot3vig2FQllAPxcfexhASUkJlZWVLu42+ZSWlgJqNY1azaRW8wSlE9Rqqurq6nbfN9Set9mHQqGscDi8Kx6Pu/thNCHA/Hf529RqJrWaSa3mCUonqNVUdmuuZVllZ7pbe4/Y9YvH4z2KioqIRqOdHw57hR2LxWAq7h4HTEZVwEbUahq1mkmt5glKJ6jVVIeAZwCob+uuHfqA4mg0Sk5OjrOhWmg6bNoPiLiyy+R1qOFPtZpFrWZSq3mC0glqFb15QkRERMQUjhZ2y5YtY+TIkZx11lmcddZZ5Obmsm7dOrdnc88aYHUr26uBe4GPEjqNt9Sq1lQWlE5QK6g11ak1KVsdLezOOeccHn74YXbs2EF5eTlf+tKXmDJlCn/605/cnk9ERERE2qlDr7FrlJ+f3+zrBQsWsGzZMsrKyhgxYoQrg4mIiIhIxzha2J3sxIkTPP3009TV1ZGbm+vGTCIiIiLigOOF3Ztvvklubi4ff/wxvXv3Zs2aNQwfPtzN2dy1G1jQYpupn32jVjMFpTUonaBWtaY+tSYdxwu7oUOHUlFRwQcffMBvfvMbCgoK2Lx5c/Iu7rKByS227afxc2HMola1prKgdIJa1Zr61Jp0rY4Xdt26deOCCy4A4POf/zzbt2/nRz/6ET/72c9cG85VXYG+LbYd9WOQBFCrmYLSGpROUKtaU59ak45rn2MXj8c5duyYW7sTERERkQ5ydMRu3rx55OXlkZWVxT/+8Q+efPJJNm3axPr1692eT0RERETaydHC7uDBg3z1q1/l3XffpU+fPowcOZL169dz1VVXuT2fiIiIiLSTo4XdE0884fYc3rruNNuzsT8x2iRqVWsqC0onqBXUmurUmpStulasiIiIiCG0sBMRERExhBZ2IiIiIobQwk5ERETEEB1680RJSQmVlZWuPHBpaan9lyrgkCu7TF77Gv5Uq1nUaia1miconaBWUx1p/11DltX2hc5CodCl4XB4Wzwe78RUpwoD7u4xiYVIymvKeUKtZgpQq/5tMlBQOiFQrUH6XW1ozbUsq+xM92vvEbv6eDxOUVER0Wi089NhH/2LxWIUAe7sMXmVADELmAr083kYr1UBG1GraQLWGt+I/m0yScB+foPUGpTf1UrgRvuv9W3dt0OnYqPRKDk5OY6GaqnxlG4UcGePyavp5HU/IOLjIInQeDhcrWYJYKv+bTJIAH9+g9QahN/VjtCbJ0REREQM4crC7uGHHyYUCjFnzhw3dueJ94HbgCygO5AJTAJK/RzKC2uA1a1sr8b+dOyPEjqNt9RqXmtQOk8SiH+bgvS8qtXMVlLnd9XRJcVOtn37dn72s58xcuRIN+bxzDTsE9OrgCHAAWADcNjPoUQk8PRvk0hqSJXf1U4t7Gpra5k5cyaPP/44Dz74oFszua4G2ApsAsY2bBsMXOLTPCIioH+bRFJFDanzu9qpU7GzZ89m8uTJTJgwwa15PNG74VYMHPN3FBGRJvq3SSQ1pNLvquMjdk899RSvv/4627dvd3MeT6QBK4FbgeXY754ZC8wAkvsEskO7gQUttpn6mUZqNU9QOgnYv00Bel7V6scg3kql31VHC7u//e1vfOc73+GFF16gR48ebs/kiWnAZOxDqWXAOmAx8HNgln9jeSMbO/Zk+4FnfJjFa2o1rzUonQ0C829TkJ5XtRrZmiq/q44Wdjt27ODgwYPNPtPuxIkTbNmyhZ/+9KccO3aMLl26uDakW3oAVzXcYsAtwD0k1xPiiq5A3xbbjvoxSAKo1TxB6TxJIP5tCtLzqlZjpcLvqqOF3ZVXXsmbb77ZbNvXvvY1hg0bxve///2kXNS1Zjj2+XIRkWSif5tEUkMy/q46Wtilp6fz2c9+ttm2Xr160bdv31O2J4PDwHSgEPtceDpQjn0IdYqPc4lIsOnfJpHUkEq/q53+HLtU0BsYAzwK7AWOA+divwhyvo9ziUiw6d8mkdSQSr+rri3sNm3a5NauXNcdWNhwM951p9mejf1J4CZRq3mtQelsEJh/m4L0vKrVyNZU+l3VtWJFREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG6NCbJ0pKSqisrHTlgUtLS+19Au7sMXmVNv6lCjjk4yCJsK/hT7WaJYCt+rfJIAH8+Q1SaxB+V6s7cN+QZbV9UbdQKHRpOBzeFo/HnU/VijDg7h6TWAgjr5/XGj2vhgpQq36GzaPn1ExBel4bWnMtyyo70/3ae8SuPh6PU1RURDQa7fx02Ef/YrEYRYA7e0xeJUDMAqYC/XwexmtVEN+InlfTVAEbCUyrfoYNo+fUTAF6XiuBG+2/1rd13w6dio1Go82uD9sZjad0o4A7e0xeTYeI+wERHwdJhIZD/3peDdN4SidArfoZNoieUzMF6HntCL15QkRERMQQjhZ29957L6FQqNlt2LBhbs/mqveB24As7E+QzgQmcdKLh02xBljdyvZq7E8C/yih03hOzytmPa9B6TxJIH6GA/a8BuI5BT2vJOfz6viSYiNGjODFF1/8vx2lJfdlZ6dhn5heBQwBDgAbsC/sK6lLz6ukOv0Mm0fPqZlS5Xl1vBpLS0sjMzPTzVk8UwNsBTYBYxu2DQYu8WkecUcNel4ltdWgn2HT1KDn1EQ1pM7z6vg1dlVVVUQiEYYMGcLMmTPZt29f2/9HPundcCsGjvk7irhIz6ukOv0Mm0fPqZlS6Xl1dMRuzJgxrFy5kqFDh/Luu+9y33338YUvfIGdO3eSnp7u9oydlgasBG4FlmO/e2YsMAMY6d9Y3tkNLGixzcDPNNLzipHPa2A6CdjPcECe10A9p6DnleR7Xh0t7PLy8pr+PnLkSMaMGcPgwYP59a9/zc033+zacG6aBkzGPpRaBqwDFgM/B2b5N5Y3srFjT7YfeMaHWTym5xXzntegdDYIzM9wgJ7XwDynoOeV5HteXfm4k7PPPpvPfOYz7Nmzx43deaYHcBUQA17BfiLu8XMgr3QF+ra4Jd+BVNfoeTVMUDpPEoif4YA9r4F4TkHPK8n3vLqysKutrWXv3r0MGjTIjd0lzHCgzu8hxHV6XiXV6WfYPHpOzZSMz6ujU7Hf/e53yc/PZ/Dgwbzzzjvcc889dOnShRtuuMHt+VxxGJgOFGKfC08HyrEPoU7xcS7pHD2vkur0M2wePadmSqXn1dHC7u233+aGG27g8OHD9O/fnyuuuIKysjL69+/v9nyu6A2MAR4F9gLHgXOxXwQ538e5pHP0vEqq08+wefScmimVnldHC7unnnrK7Tk81R1Y2HAz3nWn2Z6N/UngBtHzinnPa1A6GwTmZzhAz2tgnlPQ85qkdK1YEREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIITr05omSkhIqKytdeeDS0lJ7n4A7e0xepY1/qQIO+ThIIjRcMljPq2EaLwUdoFb9DBtEz6mZAvS8VnfgviHLavuibqFQ6NJwOLwtHo87n6oVYcDdPSavILUSwshrBbZKrWZSq3mC0glqNZXdmmtZVtmZ7tbeI3b18XicoqIiotFo54fDPvoXi8UoAtzZY/Iqwb78SGBaLWAq0M/nYbxWBWxEraZRq3mC0glqNdUhGq+/W9/WXTt0KjYajZKTk+NsqBYaT+lGAXf2mLwaDxEHqZV+QMTHQRKh8TSHWs2iVvMEpRPUKnrzhIiIiIgptLATERERMYTjhd3+/fu58cYb6du3Lz179uTCCy+kvLzczdlc9T5wG5CFfWmQTGASJ72DyCCBaV0DrG5lezX25Ww+Sug03gpKa1A6Qa2g1lSn1qRsdXSt2CNHjnD55Zczfvx41q1bR//+/amqquKTn/yk2/O5Zhr2Kw5XAUOAA8AG4LCfQ3kkSK0iIiLyfxwt7BYtWsS5557LihUrmrZlZ2e7NpTbaoCtwCZgbMO2wcAlPs3jpRqC0yoiIiLNOToVu3btWi666CKmT5/OgAEDGD16NI8//rjbs7mmd8OtGDjm7yieC1KriIiINOfoiN1f/vIXli1bxty5c5k/fz7bt2/njjvuoFu3bhQUFLg9Y6elASuBW4Hl2B85MhaYAYz0byxPBKkVgN3AghbbTP2wyqC0BqUT1KrW1KfWpONoYRePx7nooot46KGHABg9ejQ7d+5k+fLlSbmwA/t1Z5OxT1OWAeuAxcDPgVn+jeWJILWSjR17sv00fpCjWYLSGpROUKtaU59ak67V0cJu0KBBDB8+vNm2aDTKb3/7W1eG8koP4KqGWwy4BbgHAxc7BKi1K9C3xbajfgySAEFpDUonqFWtqU+tScfRa+wuv/xydu3a1Wzb7t27GTx4sCtDJcpwoM7vIRIkSK0iIiJB5eiI3Z133slll13GQw89xPXXX89rr73GY489xmOPPeb2fK44DEwHCrFfZ5YOlGOfnpzi41xeCFKriIiINOdoYXfxxRezZs0a5s2bx/333092djZLlixh5syZbs/nit7AGOBRYC9wHDgX+w0G832cywtBahUREZHmHC3sAL785S/z5S9/2c1ZPNMdWNhwM12QWrnuNNuzsT8J3CRBaQ1KJ6gV1Jrq1JqUrbpWrIiIiIghtLATERERMYQWdiIiIiKG0MJORERExBAdevNESUkJlZWVrjxwaWmpvU/AnT0mr9KGP4PUShVwyMdBEmFfw59qNYtazROUTlCrqY60/64hy2r7QmehUOjScDi8LR6Pd2Kq1nZMUl5nzRNqNZNazRSg1jDg8r/sySlAz6laDWW35lqWVXamu7X3iF19PB6nqKiIaDTa+eGwj/7FYjGYCvRzZZfJqwrYiFpNo1YzBaw1vhGKAHf+ZU9OJUDMIjDPaZB+fgPTeojGa9LWt3XXDp2KjUaj5OTkOBuqhaZTuv2AiCu7TF6Nh4jVaha1mimArVHAnX/Zk1PTS2AC9JyqNbj05gkRERERQzha2J133nmEQqFTbrNnz3Z7PnesAVa3sr0a+xOjP0roNN5Sq1pTWVA6IVitwPvAbUAW9hVyMoFJnPSGK1ME6XlVa1K2Orqk2Pbt2zlx4kTT1zt37uSqq65i+vTprg0mIiLmmIb94qBVwBDgALABOOznUCIGcrSw69+/f7OvH374Yc4//3zGjh3rylAiImKOGmArsAlo/K/EYOASn+YRMVmnX2NXX19PUVERhYWFhEIhN2YSERGD9G64FQPH/B1FxHiOjtidrLi4mJqaGmbNmuXCOB7aDSxosc3Uz75Rq5mC0hqUTghMaxqwErgVWI79DtyxwAxgpH9jeScgzyug1iRs7fTC7oknniAvL49IJMnfa5wNTG6xbT+NnwtjFrWqNZUFpRMC1ToNO3UrUAasAxYDPwdm+TeWNwL0vKqVpGvt1MLurbfe4sUXX+SZZ5KsqjVdgb4tth31Y5AEUKuZgtIalE4IVivQA7iq4RYDbgHuwcCFXZCeV7UmnU69xm7FihUMGDCAyZNbLmFFRETObDhQ5/cQIoZxfMQuHo+zYsUKCgoKSEvr9BldEREx1GFgOlCI/Zq6dKAc+1TsFB/nEjGR4xXZiy++yL59+ygsLHRzHhERMUxvYAzwKLAXOA6ci/1mivk+ziViIscLu4kTJ2JZSfh2kNZcd5rt2difGG0Stao1lQWlEwLV2h1Y2HAzXoCeV7WSlK26VqyIiIiIIbSwExERETGEFnYiIiIihtDCTkRERMQQHXrzRElJCZWVla48cGlpqf2XKuCQK7tMXvsa/lSrWdRqpgC2lgDu/MuenEob/xKg51SthjnS/ruG2vPO1lAodGk4HN4Wj8c7MVVrOyYpr7PmCbWaSa1mUqt5gtIJajWV3ZprWVbZme7W3iN29fF4nKKiIqLRaOeHwz76F4vFYCrQz5VdJq8qYCNqNY1azaRW8wSlE9RqqkM0XpO2vq27duhUbDQaJScnx9lQLTSd0u0HRFzZZfJqPESsVrOo1UxqNU9QOkGtojdPiIiIiJjC0cLuxIkTxGIxsrOz6dmzJ+effz4PPPBA8l6JYg2wupXt1difGP1RQqfxllrVmsqC0glqBbWmOrUmZaujS4otWrSIZcuWsWrVKkaMGEF5eTlf+9rX6NOnD3fccYfbM4qIiIhIOzha2L3yyitMmTKFyZMnA3DeeeexevVqXnvtNVeHExEREZH2c3Qq9rLLLmPDhg3s3r0bgD/+8Y+8/PLL5OXluTqciIiIiLSfoyN2P/jBDzh69CjDhg2jS5cunDhxggULFjBz5ky353PPbmBBi21J+pLATlOrmYLSGpROUKtaU59ak46jhd2vf/1rfvWrX/Hkk08yYsQIKioqmDNnDpFIhIKCArdndEc2MLnFtv00fi6MWdSq1lQWlE5Qq1pTn1qTrtXRwu6uu+7iBz/4ATNmzADgwgsv5K233mLhwoXJu7DrCvRtse2oH4MkgFrNFJTWoHSCWtWa+tSadBy9xu7DDz8kHG7+f9qlSxdcv+SYiIiIiLSboyN2+fn5LFiwgKysLEaMGMEf/vAHHnnkEQoLC92eT0RERETaydHC7ic/+QmxWIxvfetbHDx4kEgkwje+8Q3uvvtut+cTERERkXZytLBLT09nyZIlLFmyxOVxPHLdabZnY39itEnUqtZUFpROUCuoNdWpNSlbda1YEREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIITr05omSkhIqKytdeeDS0lL7L1XAIVd2mbz2NfypVrOo1UxqNU9QOkGtpjrS/ruGLKvtC52FQqFLw+HwNtc/gDhEUl5nzRNqNZNazaRW8wSlEwLVGgaCcmmEhtZcy7LKznS/9h6xq4/H4xQVFRGNRjs/HfbRv1gsBlOBfq7sMnlVARtRq2nUaia1miconRC41vhGKALcWZkkr0rgRvuv9W3dt0OnYqPRKDk5OY6GaqnplG4/IOLKLpNX4yFitZpFrWZSq3mC0gmBbI0C7qxMzKA3T4iIiIgYwvHC7h//+Adz5sxh8ODB9OzZk8suu4zt27e7OZt71gCrW9lejf2J0R8ldBpvqVWtqSwonaBWUGuqC1Ir8D5wG5AFdAcygUlAqZ9DtcLRJcUAbrnlFnbu3Mkvf/lLIpEIRUVFTJgwgT//+c986lOfcnNGEREREV9Nw36B2ypgCHAA2AAc9nOoVjg6YvfRRx/x29/+lsWLF/PFL36RCy64gHvvvZcLLriAZcuWuT2jiIiIiG9qgK3AImA8MBi4BJgHXOPfWK1ytLD75z//yYkTJ+jRo0ez7T179uTll192ZTARERGRZNC74VYMHPN3lDY5OhWbnp5Obm4uDzzwANFolIEDB7J69Wq2bdvGBRdc4PaM7tgNLGixzdTP+VGrmYLSGpROUKtaU19AWtOAlcCtwHLsd+GOBWYAI/0bq1WOX2P3y1/+ksLCQj71qU/RpUsXcnJyuOGGG9ixY4eb87knG5jcYtt+4BkfZvGaWtWayoLSCWpVa+oLUOs07NStQBmwDlgM/ByY5d9Yp3C8sDv//PPZvHkzdXV1HD16lEGDBvGVr3yFIUOGuDmfe7oCfVtsO+rHIAmgVjMFpTUonaBWtaa+ILUCPYCrGm4x4BbgHpJrYdfpz7Hr1asXgwYN4siRI6xfv54pU6a4MZeIiIhIUhsO1Pk9RAuOj9itX78ey7IYOnQoe/bs4a677mLYsGF87Wtfc3M+EREREV8dBqYDhdivqUsHyrFPxSbb4SzHC7sPPviAefPm8fbbb5ORkcG0adNYsGABXbt2dXM+EREREV/1BsYAjwJ7gePAudhvppjv41ytcbywu/7667n++uvdnMU7151mezb2p2ObRK1qTWVB6QS1glpTXYBauwMLG27JTteKFRERETGEFnYiIiIihtDCTkRERMQQWtiJiIiIGEILOxERERFDdOhdsSUlJVRWVrrywKWlpfZfqoBDruwyee1r+FOtZlGrmdRqnqB0QiBbSwB3VibJq7oD9w1ZVttX6w2FQpeGw+Ft8Xjc+VSt7hgjLxbcKrWaSa1mUqt5gtIJajWV3ZprWVbZme7W3iN29fF4nKKiIqLRaOeHwz76F4vFYCrQz5VdJq8qYCNqNY1azaRW8wSlE9RqqkPAMwDUt3XXDp2KjUaj5OTkOBuqhaZTuv2AiCu7TF6Nh8PVaha1mkmt5glKJ6hV9OYJEREREVM4Xtht2bKF/Px8IpEIoVCI4uLiZt+3LIu7776bQYMG0bNnTyZMmEBVVVVn53VmDbC6le3V2Jc9+Sih03hLrWpNZUHpBLWCWlOdWpOy1fHCrq6ujlGjRrF06dJWv7948WJ+/OMfs3z5cl599VV69erFpEmT+Pjjjx0PKyIiIiKn16HX2J0sLy+PvLy8Vr9nWRZLlizhP/7jP5gyZQoAv/jFLxg4cCDFxcXMmDHD6cOKiIiIyGl48hq76upq3nvvPSZMmNC0rU+fPowZM4Zt27Z58ZAiIiIigef4iN2ZvPfeewAMHDiw2faBAwc2fS/hdgMLWmwz9bNv1GqmoLQGpRPUqtbUp9ak48nCLillA5NbbNtP4+fCmEWtak1lQekEtao19ak16Vo9WdhlZmYCcODAAQYNGtS0/cCBA3zuc5/z4iHb1hXo22LbUT8GSQC1mikorUHpBLWqNfWpNel48hq77OxsMjMz2bBhQ9O2o0eP8uqrr5Kbm+vFQ4qIiIgEnuMjdrW1tezZs6fp6+rqaioqKsjIyCArK4s5c+bw4IMP8ulPf5rs7GxisRiRSIRrr73WjblFREREpAXHC7vy8nLGjx/f9PXcuXMBKCgoYOXKlXzve9+jrq6Or3/969TU1HDFFVfw/PPP06NHj85PLSIiIiKncLywGzduHJZ1+reDhEIh7r//fu6//36nD+Ge606zPRv7E6NNola1prKgdIJaQa2pTq1J2aprxYqIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAzRoTdPlJSUUFlZ6coDl5aW2n+pAg65ssvkta/hT7WaRa1mUqt5gtIJajXVkfbfNXSmd7Y23SkUujQcDm+Lx+OdmKq1HZOU11nzhFrNpFYzqdU8QekEtZrKbs21LKvsTHdr7xG7+ng8TlFREdFotPPDYR/9i8ViMBXo58ouk1cVsBG1mkatZlKreYLSCWo11SEar0lb39ZdO3QqNhqNkpOT42yoFppO6fYDIq7sMnk1HiJWq1nUaia1miconaBW0ZsnREREREzheGG3ZcsW8vPziUQihEIhiouLm33/mWeeYeLEifTt25dQKERFRUUnR+2ENcDqVrZXY39i9EcJncZbalVrKgtKJ6gV1Jrq1JqUrY4XdnV1dYwaNYqlS5ee9vtXXHEFixYtcjyciIiIiLSf42vF5uXlkZeXd9rv33TTTQD89a9/dfoQIiIiItIBeo2diIiIiCEcH7FLObuBBS22mfrZN2o1U1Bag9IJalVr6lNr0gnOwi4bmNxi234aPxfGLGpVayoLSieoVa2pT61J1xqchV1XoG+LbUf9GCQB1GqmoLQGpRPUqtbUp9ako9fYiYiIiBjC8RG72tpa9uzZ0/R1dXU1FRUVZGRkkJWVxd///nf27dvHO++8A8CuXbsAyMzMJDMzs5Nji4iIiEhLjo/YlZeXM3r0aEaPHg3A3LlzGT16NHfffTcAa9euZfTo0UyebJ+QnjFjBqNHj2b58uUujC0iIiIiLTk+Yjdu3Dgs6/RvB5k1axazZs1yunt3XXea7dnYnxhtErWqNZUFpRPUCmpNdWpNyla9xk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImKIDr15oqSkhMrKSlceuLS01P5LFXDIlV0mr30Nf6rVLGo1k1rNE5ROUKupjrT/rqEzvbO16U6h0KXhcHhbPB7vxFSt7ZikvM6aJwLUGgZc/klJWmo1VIB+XwPTGpROUKup7NZcy7LKznS39h6xq4/H4xQVFRGNRjs/HPbRv1gsBlOBfq7sMnlVARsJTGt8IxQB7vykJK8SIIZaTVMCxCwC8/saiH+bgtIJajXVIRqvSVvf1l07dCo2Go2Sk5PjbKgWmk7p9gMiruwyeTUeIg5QaxRw5ycleTW+KEGtZml6sUmAfl+Nbw1KJ6hV9OYJEREREVM4Xtht2bKF/Px8IpEIoVCI4uLipu8dP36c73//+1x44YX06tWLSCTCV7/61abrxibcGmB1K9ursT8x+qOETuOtILUC7wO3AVlAdyATmASU+jmUR9RqWGuQflfVqtZUl0Ktjhd2dXV1jBo1iqVLl57yvQ8//JDXX3+dWCzG66+/zjPPPMOuXbu45pprOjWsSEvTgD8Aq4DdwFpgHHDYx5m8olYzW0VE3OT4WrF5eXnk5eW1+r0+ffrwwgsvNNv205/+lEsuuYR9+/aRlZXl9GFFmtQAW4FNwNiGbYOBS3yax0s1qNXEVhERtyXsNXYffPABoVCIs88+O1EPKYbr3XArBo75O4rn1CoiIu3h+IhdR3z88cd8//vf54YbbuCss85KxEOeajewoMU2Uz/7JiCtacBK4FZgOfa7NccCM4CR/o3lCbWa2RqU31VArWpNfSnS6vnC7vjx41x//fVYlsWyZcu8frjTywYmt9i2n8bPhTFLgFqnYaduBcqAdcBi4OfALP/G8oRaDWwN0O+qWlFrqkuRVk8Xdo2LurfeeouXXnrJv6N1AF2Bvi22HfVjkAQIUivQA7iq4RYDbgHuwbAFQAO1GtYapN9VtZpJrUnHs9fYNS7qqqqqePHFF+nbt+X/N0S8MRyo83uIBFGriIiczPERu9raWvbs2dP0dXV1NRUVFWRkZDBo0CD+9V//lddff53nnnuOEydO8N577wGQkZFBt27dOj+5BN5hYDpQiP3aq3SgHPuU3RQf5/KCWs1sFRFxm+OFXXl5OePHj2/6eu7cuQAUFBRw7733snbtWgA+97nPNfu/27hxI+PGjXP6sCJNegNjgEeBvcBx4FzsF93P93EuL6jVzFYREbc5XtiNGzcOyzr920HO9L2Eu+4027OxPzHaJAFq7Q4sbLiZTq0GCtDvqlpRa6pLoVZdK1ZERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImKIDr15oqSkhMrKSlceuLS01P5LFXDIlV0mr30NfwaotQRw5ycleTX8BKvVMI2tQfp9Nb41KJ2gVlMdaf9dQ+1592ooFLo0HA5vi8fjnZiqtR2TlNdZ80SAWsOAyz8pSUuthgrQ72tgWoPSCWo1ld2aa1lW2Znu1t4jdvXxeJyioiKi0Wjnh8M++heLxWAq0M+VXSavKmAjgWmNb4QiwJ2flORVgn2pK7WapQSIWQTm9zUQ/zYFpRPUaqpDNF6Ttr6tu3boVGw0GiUnJ8fZUC00ndLtB0Rc2WXyajxEHKDWKODOT0ryajwlqVazNJ1qDtDvq/GtQekEtYrePCEiIiJiCi3sRERERAzheGG3ZcsW8vPziUQihEIhiouLm33/3nvvZdiwYfTq1YtPfvKTTJgwgVdffbWz8zqzBljdyvZq7EuBfJTQabwVpFbgfeA2IAv7UlSZwCROemejQdRqWGuQflfVqtZUl0Ktjhd2dXV1jBo1iqVLl7b6/c985jP89Kc/5c033+Tll1/mvPPOY+LEibz//vuOhxVpaRrwB2AVsBtYC4wDDvs4k1fUamariIibOvTmiZPl5eWRl5d32u//27/9W7OvH3nkEZ544gneeOMNrrzySqcPK9KkBtgKbALGNmwbDFzi0zxeqkGtJraKiLgtIa+xq6+v57HHHqNPnz6MGjUqEQ8pAdC74VYMHPN3FM+pVURE2sPxEbv2eO6555gxYwYffvghgwYN4oUXXqBfP58+bGY3sKDFNlM/1DAgrWnASuBWYDn2x3CMBWYAI/0byxNqNbM1KL+rgFrVmvpSpNXThd348eOpqKjg0KFDPP7441x//fW8+uqrDBgwwMuHbV02MLnFtv00fuCfWQLUOg07dStQBqwDFgM/B2b5N5Yn1Gpga4B+V9WKWlNdirR6eiq2V69eXHDBBVx66aU88cQTpKWl8cQTT3j5kKfXFejb4pbuzyieC1Ir0AO4CvvKCK9g/4f/Hj8H8pBaDROk31W1mkmtSSehn2MXj8c5dkyvmhFvDQfq/B4iQdQqIiInc3wqtra2lj179jR9XV1dTUVFBRkZGfTt25cFCxZwzTXXMGjQIA4dOsTSpUvZv38/06dPd2VwkcPAdKAQ+7VX6UA59im7KT7O5QW1mtkqIuI2xwu78vJyxo8f3/T13LlzASgoKGD58uX87//+L6tWreLQoUP07duXiy++mK1btzJixIjOTy2C/c7JMcCjwF7gOHAu9ovu5/s4lxfUamariIjbHC/sxo0bh2Wd/u0gzzyTRK8mvO4027OxPzHaJAFq7Q4sbLiZTq0GCtDvqlpRa6pLoVZdK1ZERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImKIDr15oqSkhMrKSlceuLS01P5LFXDIlV0mr30NfwaotQRw5ycleTX8BKvVMI2tQfp9Nb41KJ2gVlMdaf9dQ2d6Z2vTnUKhS8Ph8LZ4PN6JqU4VBtzdY/JSq5nUaqgQSXkNSE8EpTUonaBWU9mtuZZllZ3pbu09Ylcfj8cpKioiGo12fjjso3+xWIwiwJ09Jq8S7MsiqdUsajVTCRCzgKlAP5+H8VoVsBHzW4PSCWo11SEar0lb39ZdO3QqNhqNkpOT42yoFhpP6UYBd/aYvBpPXanVLGo1U9Op5n5AxMdBEqHx9JXprUHpBLWK3jwhIiIiYgrHC7stW7aQn59PJBIhFApRXFx82vt+85vfJBQKsWTJEqcP12nvA7cBWdifbJ8JTOKkF0obRK1qTXWBaF0DrG5lezX2J9l/lNBpvKVWtaa6FGp1fEmxuro6Ro0aRWFhIVOnTj3t/dasWUNZWRmRiL/HSadhn5heBQwBDgAbsC84bhq1qjXVBalVRMRNjhd2eXl55OXlnfE++/fv59vf/jbr169n8uTJTh+q02qArcAmYGzDtsHAJT7N46Ua1KrW1FZDcFpFRNzm2Wvs4vE4N910E3fddRcjRozw6mHapXfDrRg45usk3lOrmdQqIiLt4fiIXVsWLVpEWload9xxh1cP0W5pwErgVmA59rv6xgIzgJH+jeUJtao11QWpld3AghbbTP1MLrWaSa1Jx5OF3Y4dO/jRj37E66+/TigU8uIhOmwaMBn7FE8ZsA5YDPwcmOXfWJ5Qq1pTXWBas7FDT7afxs+rMota1ZrqUqTVk1OxW7du5eDBg2RlZZGWlkZaWhpvvfUW//7v/855553nxUO2Sw/gKuwPWn0F+z8Q9/g2jbfUaia1GqYr0LfFLd3XibyjVjOpNel4srC76aabeOONN6ioqGi6RSIR7rrrLtavX+/FQzoyHKjze4gEUauZ1CoiIidzfCq2traWPXv2NH1dXV1NRUUFGRkZZGVl0bdv32b379q1K5mZmQwdOtT5tA4dBqYDhdiv0UkHyrFP7UxJ+DTeUqtaU12QWkVE3OZ4YVdeXs748eObvp47dy4ABQUFrFy5stODuak3MAZ4FNgLHAfOxX5x9nwf5/KCWtWa6oLUKiLiNscLu3HjxmFZ7X87yF//+lenD9Vp3YGFDTfTqdVMajXQdafZno39SfYmUataU10KtepasSIiIiKG0MJORERExBBa2ImIiIgYQgs7EREREUN06M0TJSUlVFZWuvLApaWl9j4Bd/aYvEob/lSrWdRqpsZWqoBDPg6SCPsa/jS9NSidoFZTHWn/XUPteWdrKBS6NBwOb4vH452Y6lRhwN09Ji+1mkmthgqRlNeA9ERQWoPSCWo1ld2aa1lW2Znu1t4jdvXxeJyioiKi0Wjnh8M++heLxSgC3Nlj8irBviySWs2iVjOVADELmAr083kYr1UBGzG/NSidoFZTHaLxmrT1bd21Q6dio9EoOTk5zoZqofGUbhRwZ4/Jq/HUlVrNolYzNZ1q7gdEfBwkERpPX5neGpROUKvozRMiIiIipnC8sNuyZQv5+flEIhFCoRDFxcXNvj9r1ixCoVCz29VXX93ZeR17H7gNyML+ZPtMYBInvVDaIGpVa6oLROsaYHUr26uxP8n+o4RO4y21qjXVpVCr40uK1dXVMWrUKAoLC5k6dWqr97n66qtZsWJF09fdu3d3+nCdNg37xPQqYAhwANiAfcFx06hVrakuSK0iIm5yvLDLy8sjLy/vjPfp3r07mZmZTh/CNTXAVmATMLZh22DgEp/m8VINalVraqshOK0iIm7z9DV2mzZtYsCAAQwdOpTbbruNw4f9+d/bvRtuxcAxXyZIHLWaSa0iItIejo/YteXqq69m6tSpZGdns3fvXubPn09eXh7btm2jS5cuXj1sq9KAlcCtwHLsd/WNBWYAIxM6iffUqtZUF6RWdgMLWmwz9TO51GomtSYdzxZ2M2bMaPr7hRdeyMiRIzn//PPZtGkTV155pVcPe1rTgMnYp3jKgHXAYuDnwKyET+Mttao11QWmNRs79GT7afy8KrOoVa2pLkVaE/ZxJ0OGDKFfv37s2bMnUQ95ih7AVdgftPoK9n8g7vFtGm+p1UxqNUxXoG+LW7qvE3lHrWZSa9JJ2MLu7bff5vDhwwwaNChRD9mm4UCd30MkiFrNpFYRETmZ41OxtbW1zY6+VVdXU1FRQUZGBhkZGdx3331MmzaNzMxM9u7dy/e+9z0uuOACJk2a5MrgHXEYmA4UYr9GJx0oxz61MyXh03hLrWpNdUFqFRFxm+OFXXl5OePHj2/6eu7cuQAUFBSwbNky3njjDVatWkVNTQ2RSISJEyfywAMP+PJZdr2BMcCjwF7gOHAu9ouz5yd8Gm+pVa2pLkitIiJuc7ywGzduHJZ1+reDrF+/3umuXdcdWNhwM51azaRWA113mu3Z2J9kbxK1qjXVpVCrrhUrIiIiYggt7EREREQMoYWdiIiIiCG0sBMRERExRIfePFFSUkJlZaUrD1xaWmrvE3Bnj8mrtOFPtZpFrWZqbKUKOOTjIImwr+FP01uD0glqNdWR9t81dKZ3tjbdKRS6NBwOb4vH452Y6lRhwN09Ji+1mkmthgqRlNeA9ERQWoPSCWo1ld2aa1lW2Znu1t4jdvXxeJyioiKi0Wjnh8M++heLxSgC3Nlj8irBviySWs2iVjOVADELmAr083kYr1UBGzG/NSidoFZTHaLxmrT1bd21Q6dio9EoOTk5zoZqofGUbhRwZ4/Jq/HUlVrNolYzNZ1q7gdEfBwkERpPX5neGpROUKvozRMiIiIipnC8sNuyZQv5+flEIhFCoRDFxcWn3KeyspJrrrmGPn360KtXLy6++GL27dt36s4S4H3gNiAL+5PtM4FJnPRCaYOoVa2pLhCta4DVrWyvxv4k+48SOo231KrWVJdCrY4vKVZXV8eoUaMoLCxk6tSpp3x/7969XHHFFdx8883cd999nHXWWfzpT3+iR48enRrYqWnYJ6ZXAUOAA8AG7AuOm0atak11QWoVEXGT44VdXl4eeXl5p/3+//t//49/+Zd/YfHixU3bzj//fKcP1yk1wFZgEzC2Ydtg4BJfpvFWDWpVa2qrITitIiJu8+Q1dvF4nN///vd85jOfYdKkSQwYMIAxY8a0ero2EXo33IqBY75MkDhqNZNaRUSkPRwfsTuTgwcPUltby8MPP8yDDz7IokWLeP7555k6dSobN25k7Nixbe/ERWnASuBWYDn2u/rGAjOAkQmdxHtqVWuqC1Iru4EFLbaZ+plcajWTWpOOJwu7xg8ynjJlCnfeeScAn/vc53jllVdYvnx5whd2YL9mZzL2KZ4yYB2wGPg5MCvh03hLrWpNdYFpzcYOPdl+Gj+vyixqVWuqS5FWT07F9uvXj7S0NIYPH95sezQa9e1dsQA9gKuwP2j1Fez/QNzj2zTeUquZ1GqYrkDfFrd0XyfyjlrNpNak48nCrlu3blx88cXs2rWr2fbdu3czePBgLx7SkeFAnd9DJIhazaRWERE5meNTsbW1tezZs6fp6+rqaioqKsjIyCArK4u77rqLr3zlK3zxi19k/PjxPP/88/zud79j06ZNbszdIYeB6UAh9mt00oFy7FM7UxI+jbfUqtZUF6RWERG3OV7YlZeXM378+Kav586dC0BBQQErV67kuuuuY/ny5SxcuJA77riDoUOH8tvf/pYrrrii81N3UG9gDPAosBc4DpyL/eLs+QmfxltqVWuqC1KriIjbHC/sxo0bh2Wd+e0ghYWFFBYWOn0I13QHFjbcTKdWM6nVQNedZns29ifZm0Stak11KdSqa8WKiIiIGEILOxERERFDaGEnIiIiYggt7EREREQMoYWdiIiIiCE69K7YkpISKisrXXng0tJSe5+AO3tMXqUNf6rVLGo1U2MrVcAhHwdJhMYLAZneGpROUKupjrT/rqG2PrIEIBQKXRoOh7c1XgPWLWHA3T0mL7WaSa1mUqt5gtIJwWolBLS9jDGD3ZprWVbZme7W3iN29fF4nKKiIqLRaOeHwz76F4vFKALc2WPyKsG+3qVazaJWM6nVPEHphAC2WsBUoJ/Pw3jtEPAMAPVt3bVDp2Kj0Sg5OTnOhmqh8ZRuFHBnj8mr8dSVWs2iVjOp1TxB6YRgttIPiPg4SJLRmydEREREDOF4Ybdlyxby8/OJRCKEQiGKi4ubfT8UCrV6+8///M/OzuzI+8BtQBb2JYsygUmc9EJpg6hVralOrea1BqUT1Gpk6xpgdSvbq7EvKfZRQqc5I8fXiq2rq2PUqFEUFhYyderUU77/7rvvNvt63bp13HzzzUybNs3pQ3bKNOwT06uAIcABYANw2JdpvKVWtaY6tZrXGpROUKupranC8cIuLy+PvLy8034/MzOz2dfPPvss48ePZ8iQIU4f0rEaYCuwCRjbsG0wcEnCJ/FeDWpVa2qrQa2mtdYQjE5Qq6mtqSQhr7E7cOAAv//977n55psT8XCn6N1wKwaO+TJB4qjVTGo1U1Bag9IJahX/JWRht2rVKtLT01s9ZZsIacBK7EPFZwOXA/OBN3yZxltqVWuqU6t5rUHpBLWa2grAbmBBi9uvfJ2oVQlZ2P33f/83M2fOpEePHol4uFZNA94B1gJXYx86zsH+oTSNWtWa6tRqXmtQOkGtpraSDXyzxe0aXydqlecLu61bt7Jr1y5uueUWrx+qTT2Aq7A/vPEVYBZwj58DeUitZlKrmYLSGpROUKuRrV2Bvi1u6b5O1CrPF3ZPPPEEn//85xk1apTXD9Vhw4E6v4dIELWaSa1mCkprUDpBrZI4jt8VW1tby549e5q+rq6upqKigoyMDLKysgA4evQoTz/9ND/84Q87P2knHAamA4XASOwFdjmwGJji41xeUKtaU51azWsNSieo1dTWVOJ4YVdeXs748eObvp47dy4ABQUFrFy5EoCnnnoKy7K44YYbOjdlJ/UGxgCPAnuB48C5wK3YL/Q0iVrVmurUal5rUDpBraa2phLHC7tx48ZhWdYZ7/P1r3+dr3/9604fwjXdgYUNN9Op1UxqNVNQWoPSCWo11nWn2Z6NfeWJJKJrxYqIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAzRoTdPlJSUUFlZ6coDl5aW2vsE3Nlj8ipt+FOtZlGrmdRqnqB0QjBbqQIO+ThIIhxp/11Dbb2zFSAUCl0aDoe3xePxTkx1qjDg7h6Tl1rNpFYzqdU8QekEtZqqoTXXsqyyM92vvUfs6uPxOEVFRUSj0c5Ph330LxaLUQS4s8fkVYJ9qRW1mkWtZlKreYLSCWo1VSVwo/3X+rbu26FTsdFolJycHEdDtdR4SjeKfcFgkzUeDlerWdRqJrWaJyidoFbRmydEREREjOF4Ybdlyxby8/OJRCKEQiGKi4ubfb+2tpbbb7+dc845h549ezJ8+HCWL1/e2Xkdex+4DcjC/rTsTGASJ7340iBqVWuqU6t5rUHpBLWq1V+OLylWV1fHqFGjKCwsZOrUqad8f+7cubz00ksUFRVx3nnn8T//8z9861vfIhKJcM0113RqaCemYZ+YXgUMAQ4AG7AvYmwatao11anVvNagdIJa1eovxwu7vLw88vLyTvv9V155hYKCAsaNGwfY14392c9+xmuvvZbwhV0NsBXYBIxt2DYYuCShUyRGDWpVa2qrQa2mtdYQjE5Qq1r959lr7C677DLWrl3L/v37sSyLjRs3snv3biZOnOjVQ55W74ZbMXAs4Y+eWGo1k1rNFJTWoHSCWk2VSq2eLex+8pOfMHz4cM455xy6devG1VdfzdKlS/niF7/o1UOeVhqwEvvw6dnA5cB84I2ET+I9tao11anVvNagdIJa1eo/Txd2ZWVlrF27lh07dvDDH/6Q2bNn8+KLL3r1kGc0DXgHWAtcjX04NQf7iTKNWtWa6tRqXmtQOkGtavWZZVlt3rBnt3bs2GG1BrDWrFnT9PWHH35ode3a1Xruueea3e/mm2+2Jk2aZFmWZRUVFVmAtQMsy6fbzWBlJeBxikCtalWrWtUa0E61qrWztx0NrUBOW2s2T47YHT9+nOPHjxMON999ly5dcPuyZJ0xHKjze4gEUauZ1GqmoLQGpRPUaqpkbHX8rtja2lr27NnT9HV1dTUVFRVkZGSQlZXF2LFjueuuu+jZsyeDBw9m8+bN/OIXv+CRRx5xZfCOOAxMBwqBkUA6UA4sBqYkfBpvqVWtqU6t5rUGpRPUqlb/OV7YlZeXM378+Kav586dC0BBQQErV67kqaeeYt68ecycOZO///3vDB48mAULFvDNb36z81N3UG9gDPAosBc4DpwL3Ir94keTqFWtqU6t5rUGpRPUqlb/OV7YjRs3jobX37UqMzOTFStWON29q7oDCxtuplOrmdRqpqC0BqUT1GqqVGrVtWJFREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG6NCbJ0pKSqisrHTlgUtLS+19Au7sMXmVNvypVrOo1UxqNU9QOkGtpqruwH1DZ3pna9OdQqFLw+HwNrc/XDgMJM/HFXtLrWZSq5nUap6gdIJaTdXQmmtZVtmZ7tfeI3b18XicoqIiotFo56fDPvoXi8UoAtzZY/IqAWKgVsOo1UxqNU9QOkGtpqoEbrT/Wt/WfTt0KjYajZKTk+NoqJYaT+lGsS9Ea7LGQ8RqNYtazaRW8wSlE9QqevOEiIiIiDEcL+y2bNlCfn4+kUiEUChEcXFxs+8fOHCAWbNmEYlE+MQnPsHVV19NVVVVZ+d17H3gNiAL+xOkM4FJ/N+LL02iVrWmOrWa1xqUTlCrWv3l+JJidXV1jBo1isLCQqZOndrse5Zlce2119K1a1eeffZZzjrrLB555BEmTJjAn//8Z3r16tXpwTtqGvaJ6VXAEOAAsAH7wr6mUataU51azWsNSieoVa0+syyrzRv26Wtrx44dVmsAa82aNU1f79q1ywKsnTt3Nm07ceKE1b9/f+vxxx+3LMuyioqKLMDaAZbl8e0IWIC1KQGP1dqtqOHx1apWtao1iK1B6VSrWr267Wh4fCCnrTWbJ6+xO3bsGAA9evRo2hYOh+nevTsvv/yyFw95Rr0bbsXAsYQ/emKp1UxqNVNQWoPSCWo1VSq1erKwGzZsGFlZWcybN48jR45QX1/PokWLePvtt3n33Xe9eMgzSgNWYh8+PRu4HJgPvJHwSbynVrWmOrWa1xqUTlCrWv3nycKua9euPPPMM+zevZuMjAw+8YlPsHHjRvLy8giH/Xkj7jTgHWAtcDWwCfv88kpfpvGWWtWa6tRqXmtQOkGtavWZF6+xO1lNTY118OBBy7Is65JLLrG+9a1vJfw1dqe73QxWlmGvA1CrWtWq1lRpDUqnWtXa2Zvvr7E7WZ8+fejfvz9VVVWUl5czZcoUrx+y3YYDdX4PkSBqNZNazRSU1qB0glpNlYytjj/upLa2lj179jR9XV1dTUVFBRkZGWRlZfH000/Tv39/srKyePPNN/nOd77Dtddey8SJE10ZvCMOA9OBQmAkkA6UA4uB5FlmukOtak11ajWvNSidoFa1+s/xwq68vJzx48c3fT137lwACgoKWLlyJe+++y5z587lwIEDDBo0iK9+9avEYrHOT+xAb2AM8CiwFzgOnAvciv3iR5OoVa2pTq3mtQalE9Sq1iTgxmvsnEiG19gl6pYMrwNQq1rVqtagtgalU63m3pLqNXYiIiIikhha2ImIiIgYQgs7EREREUNoYSciIiJiiA69K7akpITKykpXHri0tNTeJ+DOHpNXacOfajWLWs2kVvMEpRPUaqrqDtw31PCu1zPfKRS6NBwOb4vH486nakUYcHePyUutZlKrmdRqnqB0glpN1dCaa1lW2Znu194jdvXxeJyioiKi0Wjnp8M++heLxSgC3Nlj8ioBYqBWw6jVTGo1T1A6Qa2mqgRutP9a39Z9O3QqNhqNkpOT42iolhpP6UaxPyTPZI2HiNVqFrWaSa3mCUonqFX05gkRERERY2hhJyIiImIIRwu7hQsXcvHFF5Oens6AAQO49tpr2bVrV7P7fPzxx8yePZu+ffvSu3dvpk2bxoEDB1wZ2on3gduALKA7kAlM4v/eVWMStao11anVvNagdIJa1eqvDr3GrtHmzZuZPXs2F198Mf/85z+ZP38+EydO5M9//jO9evUC4M477+T3v/89Tz/9NH369OH2229n6tSpTR9zkmjTsF9xuAoYAhwANgCHfZnGW2pVa6pTq3mtQekEtarVZ21dTLbh41ByAGvHjh1Waw4ePGgB1ubNmy3Lsqyamhqra9eu1tNPP910n8rKSguwtm3bZlmWZRUVFSXs4r1HGi6euykAFypWq1rVqtZkaw1Kp1rV6tVtR8PjAzltrdlceY3dBx98AEBGRgYAO3bs4Pjx40yYMKHpPsOGDSMrK4tt27a58ZAd0rvhVgwcS/ijJ5ZazaRWMwWlNSidoFZTpVJrpxd28XicOXPmcPnll/PZz34WgPfee49u3bpx9tlnN7vvwIEDee+99zr7kB2WBqzEPnx6NnA5MB94I+GTeE+tak11ajWvNSidoFa1+q/TC7vZs2ezc+dOnnrqKTfm8cw04B1gLXA1sAn7/PJK/0byjFrVmurUal5rUDpBrWr1WWdeYzd79mzrnHPOsf7yl780275hwwYLsI4cOdJse1ZWlvXII48k/DV2p7vdDFaWYa8DUKta1arWVGkNSqda1drZm+evsbMsi9tvv501a9bw0ksvkZ2d3ez7n//85+natSsbNmxo2rZr1y727dtHbm6uk4f0xHCgzu8hEkStZlKrmYLSGpROUKupkrHV0cedzJ49myeffJJnn32W9PT0ptfN9enTh549e9KnTx9uvvlm5s6dS0ZGBmeddRbf/va3yc3N5dJLL3U1oD0OA9OBQmAkkA6UA4uBKQmfxltqVWuqU6t5rUHpBLWq1X+OFnbLli0DYNy4cc22r1ixglmzZgHw6KOPEg6HmTZtGseOHWPSpEn813/9V6eGdao3MAZ4FNgLHAfOBW7FfvGjSdSq1lSnVvNag9IJalWr/xwt7Bped3dGPXr0YOnSpSxdutTJQ7iqO7Cw4WY6tZpJrWYKSmtQOkGtpkqlVl0rVkRERMQQWtiJiIiIGEILOxERERFDaGEnIiIiYogOvXmisrLStQeurq629+naHpNXdcOfajWLWs2kVvMEpRPUaqqONIba8w7XUCiUFQ6Hd8Xj8R7OxzpVGIi7ucMkplYzqdVMajVPUDpBraYKw8dxGGpZ1r4z3a9dCzuwF3dAPzeGO0k3oN7lfSYrtZpJrWZSq3mC0glqNdWhthZ10IGFnYiIiIgkN715QkRERMQQWtiJiIiIGEILOxERERFDaGEnIiIiYggt7EREREQMoYWdiIiIiCG0sBMRERExhBZ2IiIiIobQwk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImIILexEREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG0MJORERExBBa2ImIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIIbSwExERETGEFnYiIiIihtDCTkRERMQQWtiJiIiIGEILOxERERFDaGEnIiIiYggt7EREREQMoYWdiIiIiCG0sBMRERExhBZ2IiIiIobQwk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImIILexEREREDKGFnYiIiIghtLATERERMcT/B73X1paF7bdVAAAAAElFTkSuQmCC\n" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "# @title Test Best Policy on New Episodes\n", + "\n", + "env = BlackjackEnv()\n", + "NUM_EPISODES = 100000\n", + "\n", + "for _ in range(NUM_EPISODES):\n", + " state = env.reset()\n", + " while env.done == False:\n", + " if state[0] == 19:\n", + " # Player was dealt with a Blackjack\n", + " next_state, reward, env.done, info = env.step(1)\n", + " total_rewards += reward\n", + " else:\n", + " Q_index = get_Q_state_index(state)\n", + " action = new_Q_binary[Q_index]\n", + " new_state, reward, done, desc = env.step(action)\n", + " state = new_state\n", + " total_rewards += reward\n", + "avg_reward = total_rewards / NUM_EPISODES\n", + "print(f'Average Reward: {avg_reward}')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "7sTNqbeZga_3", + "outputId": "97878c73-7419-41d4-d600-68bfd314b8f4" + }, + "execution_count": 24, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Average Reward: -4.677\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "WL-NmCiphQiq" + }, + "execution_count": null, + "outputs": [] + } + ], + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4" + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU" + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file From a40374ea29c8af6dcbe0b7d1f6a762874676cde4 Mon Sep 17 00:00:00 2001 From: Sushant Nair Date: Tue, 28 Jan 2025 23:27:11 +0530 Subject: [PATCH 2/8] Rename 121_C2_RL_EXPT_3_SEM_VII.ipynb to Blackjack.ipynb --- .../{121_C2_RL_EXPT_3_SEM_VII.ipynb => Blackjack.ipynb} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename chapter_05_monte_carlo_methods/{121_C2_RL_EXPT_3_SEM_VII.ipynb => Blackjack.ipynb} (99%) diff --git a/chapter_05_monte_carlo_methods/121_C2_RL_EXPT_3_SEM_VII.ipynb b/chapter_05_monte_carlo_methods/Blackjack.ipynb similarity index 99% rename from chapter_05_monte_carlo_methods/121_C2_RL_EXPT_3_SEM_VII.ipynb rename to chapter_05_monte_carlo_methods/Blackjack.ipynb index 24d109a..b6b8840 100644 --- a/chapter_05_monte_carlo_methods/121_C2_RL_EXPT_3_SEM_VII.ipynb +++ b/chapter_05_monte_carlo_methods/Blackjack.ipynb @@ -881,4 +881,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} From c25841ce5734175a20fdd4d6f45f8bf7040816bd Mon Sep 17 00:00:00 2001 From: Sushant Nair Date: Tue, 28 Jan 2025 23:42:35 +0530 Subject: [PATCH 3/8] Created using Colab --- Monte_Carlo_Blackjack_Demonstration.ipynb | 1081 +++++++++++++++++++++ 1 file changed, 1081 insertions(+) create mode 100644 Monte_Carlo_Blackjack_Demonstration.ipynb diff --git a/Monte_Carlo_Blackjack_Demonstration.ipynb b/Monte_Carlo_Blackjack_Demonstration.ipynb new file mode 100644 index 0000000..30277a0 --- /dev/null +++ b/Monte_Carlo_Blackjack_Demonstration.ipynb @@ -0,0 +1,1081 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Installing Dependencies" + ], + "metadata": { + "id": "ptQKksAbExoX" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JuDWA5loOsFM", + "outputId": "b9cdd35b-2703-4828-ad14-494bafe11476" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting gymnasium\n", + " Downloading gymnasium-0.29.1-py3-none-any.whl.metadata (10 kB)\n", + "Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (1.26.4)\n", + "Requirement already satisfied: cloudpickle>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (2.2.1)\n", + "Requirement already satisfied: typing-extensions>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (4.12.2)\n", + "Collecting farama-notifications>=0.0.1 (from gymnasium)\n", + " Downloading Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)\n", + "Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)\n", + "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/953.9 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[90m╺\u001b[0m\u001b[90m━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m501.8/953.9 kB\u001b[0m \u001b[31m14.6 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m953.9/953.9 kB\u001b[0m \u001b[31m17.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)\n", + "Installing collected packages: farama-notifications, gymnasium\n", + "Successfully installed farama-notifications-0.0.4 gymnasium-0.29.1\n" + ] + } + ], + "source": [ + "!pip install gymnasium" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Importing Dependencies" + ], + "metadata": { + "id": "q6ml-ZqBE1DP" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cbt-NEiMO1bA" + }, + "outputs": [], + "source": [ + "import random\n", + "import numpy as np\n", + "import gymnasium as gym\n", + "from gym import error, spaces, utils\n", + "from gym.utils import seeding\n", + "import enum\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import time" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Set up the game" + ], + "metadata": { + "id": "RSlZaAJ3E4YL" + } + }, + { + "cell_type": "code", + "source": [ + "# Define the rank and suit of a card\n", + "\n", + "ranks = {\n", + " \"two\": 2,\n", + " \"three\": 3,\n", + " \"four\": 4,\n", + " \"five\": 5,\n", + " \"six\": 6,\n", + " \"seven\": 7,\n", + " \"eight\": 8,\n", + " \"nine\": 9,\n", + " \"ten\": 10,\n", + " \"jack\": 10,\n", + " \"queen\": 10,\n", + " \"king\": 10,\n", + " \"ace\": (1, 11),\n", + "}\n", + "\n", + "class Suit(enum.Enum):\n", + " spades = \"spades\"\n", + " clubs = \"clubs\"\n", + " diamonds = \"diamonds\"\n", + " hearts = \"hearts\"\n", + "\n", + "# Now to define the card and deck\n", + "# Implement the shuffle, peek and deal functions for the deck\n", + "\n", + "class Card:\n", + " def __init__(self, suit, rank, value):\n", + " self.suit = suit\n", + " self.rank = rank\n", + " self.value = value\n", + "\n", + " def __str__(self):\n", + " return self.rank + \" of \" + self.suit.value\n", + "\n", + "class Deck:\n", + " def __init__(self, num=1):\n", + " self.cards = []\n", + " for i in range(num):\n", + " for suit in Suit:\n", + " for rank, value in ranks.items():\n", + " self.cards.append(Card(suit, rank, value))\n", + "\n", + " def shuffle(self):\n", + " random.shuffle(self.cards)\n", + "\n", + " def deal(self):\n", + " return self.cards.pop(0)\n", + "\n", + " def peek(self):\n", + " if len(self.cards) > 0:\n", + " return self.cards[0]\n", + "\n", + " def add_to_bottom(self, card):\n", + " self.cards.append(card)\n", + "\n", + " def __str__(self):\n", + " result = \"\"\n", + " for card in self.cards:\n", + " result += str(card) + \"\\n\"\n", + " return result\n", + "\n", + " def __len__(self):\n", + " return len(self.cards)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "D3df2N-JrKiE", + "outputId": "a36dc6fc-51b2-4fb0-bfb5-380e143cc4b4" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n", + " and should_run_async(code)\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Define the logic for evaluating the value of the player's hand" + ], + "metadata": { + "id": "jLdAUSMgFPIi" + } + }, + { + "cell_type": "code", + "source": [ + "'''\n", + "An ace is used as 11 whenever possible. It is 1 only if the other option would make the player bust.\n", + "'''\n", + "\n", + "def player_eval(player_hand):\n", + " num_ace = 0\n", + " # use_one means that every ace in the hand is counted as one\n", + " use_one = 0\n", + " for card in player_hand:\n", + " if card.rank == \"ace\":\n", + " num_ace += 1\n", + " use_one += card.value[0] # use 1 for Ace\n", + " else:\n", + " use_one += card.value\n", + "\n", + " if num_ace > 0:\n", + " # Define player policy for Aces:\n", + " # Make Aces 11 if they get you to the range [18, 21]\n", + " # Otherwise, use one.\n", + "\n", + " ace_counter = 0\n", + " while ace_counter < num_ace:\n", + " # Only add by 10 because 1 is already added before\n", + " use_eleven = use_one + 10\n", + " if use_eleven > 21:\n", + " return use_one\n", + " elif use_eleven >= 18 and use_eleven <= 21:\n", + " return use_eleven\n", + " else:\n", + " # This allows for some Aces to be 11 and others to be 1.\n", + " use_one = use_eleven\n", + " ace_counter += 1\n", + " return use_one\n", + " else:\n", + " return use_one" + ], + "metadata": { + "id": "SWrENBc-vEUL" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Define the logic for evaluating the value of the dealer's hand" + ], + "metadata": { + "id": "T9tDhTf0FWUV" + } + }, + { + "cell_type": "code", + "source": [ + "# This follows the same, official rules every time.\n", + "# Still there is a need to figure out what happens if there are multiple Aces.\n", + "def dealer_eval(player_hand):\n", + " num_ace = 0\n", + " use_one = 0\n", + " for card in player_hand:\n", + " if card.rank == \"ace\":\n", + " num_ace += 1\n", + " use_one += card.value[0] # use 1 for Ace\n", + " else:\n", + " use_one += card.value\n", + "\n", + " if num_ace > 0:\n", + " # See if using 11 instead of 1 for the Ace gets the dealer's hand value closer to the [17, 21] range.\n", + " # The dealer will follow Hard 17 rules. This means that the dealer will not hit again if the Ace yields a 17.\n", + " # This also means that Aces initially declared as 11 can be changed to 1 as new cards come.\n", + "\n", + " ace_counter = 0\n", + " while ace_counter < num_ace:\n", + " # Only add 10 because 1 is already added before\n", + " use_eleven = use_one + 10\n", + "\n", + " if use_eleven > 21:\n", + " return use_one\n", + " elif use_eleven >= 17 and use_eleven <= 21:\n", + " return use_eleven\n", + " else:\n", + " # The case where even using Ace as eleven is less than 17.\n", + " use_one = use_eleven\n", + " ace_counter += 1\n", + " return use_one\n", + " else:\n", + " return use_one" + ], + "metadata": { + "id": "YzDYaCCVx3AH" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Define logic for the dealer's turn" + ], + "metadata": { + "id": "ips4ASjUFaEg" + } + }, + { + "cell_type": "code", + "source": [ + "def dealer_turn(dealer_hand, deck):\n", + " # Calculate the dealer hand's value.\n", + " dealer_value = dealer_eval(dealer_hand)\n", + "\n", + " # Define dealer policy (which is fixed to the official rules)\n", + " # The dealer keeps hitting until their total is 17 or more\n", + " while dealer_value < 17:\n", + " dealer_hand.append(deck.deal())\n", + " dealer_value = dealer_eval(dealer_hand)\n", + "\n", + " return dealer_value, dealer_hand, deck" + ], + "metadata": { + "id": "YetM4Xsixlrf" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Define the OpenAI Gym Environment for Blackjack" + ], + "metadata": { + "id": "jUQRNbyAFd9-" + } + }, + { + "cell_type": "code", + "source": [ + "INITIAL_BALANCE = 1000\n", + "NUM_DECKS = 6\n", + "\n", + "class BlackjackEnv(gym.Env):\n", + " metadata = {'render.modes': ['human']}\n", + "\n", + " def __init__(self):\n", + " super(BlackjackEnv, self).__init__()\n", + "\n", + " # Initialize the blackjack deck\n", + " self.bj_deck = Deck(NUM_DECKS)\n", + "\n", + " self.player_hand = []\n", + " self.dealer_hand = []\n", + "\n", + " self.reward_options = {\"lose\": -100, \"tie\": 0, \"win\": 100}\n", + "\n", + " self.action_space = spaces.Discrete(2)\n", + "\n", + " '''\n", + " First element of tuple is the range of possible hand values for the player (3 through 20). This is the possible range of values that the player will actually have to make a decision for. Any player hand value 21 or above already has automatic valuations, and needs no input from an AI agent.\n", + " However, we also need to add all the hand values that the agent could possibly end up in when they bust. Maybe the agent can glean some correlations based on what hand value they bust at, so this should be in the observation space. Also, the layout of OpenAI Gym environment class makes us have to include the bust-value in the step() function because we need to return that done is true alongside the final obs, which is the bust-value.\n", + " '''\n", + " # Second element of the tuple is the range of possible values for the dealer's upcard (2 through 11)\n", + " self.observation_space = spaces.Tuple((spaces.Discrete(18), spaces.Discrete(10)))\n", + " self.done = False\n", + "\n", + " def _take_action(self, action):\n", + " if action == 0: #hit\n", + " self.player_hand.append(self.bj_deck.deal())\n", + " # recalculate the value of the player's hand after any changes to the hand\n", + " self.player_value = player_eval(self.player_hand)\n", + "\n", + " def step(self, action):\n", + " self._take_action(action)\n", + "\n", + " # End the episode/game if the player stands or has a hand value >= 21.\n", + " self.done = action == 1 or self.player_value >= 21\n", + "\n", + " # rewards are 0 when the player hits and is still below 21, and they keep playing\n", + " rewards = 0\n", + "\n", + " if self.done:\n", + " # Calculate rewards\n", + " if self.player_value > 21:\n", + " rewards = self.reward_options['lose']\n", + " elif self.player_value == 21:\n", + " rewards = self.reward_options['win']\n", + " else:\n", + " # begin the dealer turn phase\n", + " dealer_value, self.dealer_hand, self.bj_deck = dealer_turn(self.dealer_hand, self.bj_deck)\n", + " # End the dealer turn phase\n", + "\n", + " # Final comparison\n", + " if dealer_value > 21:\n", + " rewards = self.reward_options['win']\n", + " elif dealer_value == 21:\n", + " rewards = self.reward_options['lose']\n", + " else:\n", + " # both dealer and player value is less than 21\n", + " if self.player_value > dealer_value:\n", + " rewards = self.reward_options['win']\n", + " elif self.player_value < dealer_value:\n", + " rewards = self.reward_options['lose']\n", + " else:\n", + " rewards = self.reward_options['tie']\n", + " self.balance += rewards\n", + " # Subtract by 1 to fit into the possible observation range.\n", + " # This makes the possible range of 3 through 20 into 1 through 18.\n", + " player_value_obs = self.player_value - 2\n", + " # Get the value of the dealer's upcard; this value is what the agent sees.\n", + " # Subtract by 1 to fit into the possible observation range of 1 to 10\n", + " upcard_value_obs = dealer_eval([self.dealer_upcard]) - 1\n", + " # The state is represented as a player hand-value + dealer upcard pair.\n", + " obs = np.array([player_value_obs, upcard_value_obs])\n", + " return obs, rewards, self.done, {}\n", + "\n", + " def reset(self):\n", + " # reset the game to an initial state\n", + " # add the player and dealer cards back into the deck\n", + " self.bj_deck.cards += self.player_hand + self.dealer_hand\n", + "\n", + " # shuffle before beginning. Only shuffle once before the start of each game.\n", + " self.bj_deck.shuffle()\n", + " self.balance = INITIAL_BALANCE\n", + " self.done = False\n", + "\n", + " # returns the start stage for the agent\n", + " # deal 2 cards to the agent and the dealer\n", + " self.player_hand = [self.bj_deck.deal(), self.bj_deck.deal()]\n", + " self.dealer_hand = [self.bj_deck.deal(), self.bj_deck.deal()]\n", + " self.dealer_upcard = self.dealer_hand[0]\n", + "\n", + " # calculate the value of the agent's hand\n", + " self.player_value = player_eval(self.player_hand)\n", + "\n", + " # subtract by 2 to fit into the possible observation range. This makes the possible range of 3 through 20 into 1 through 18\n", + " player_value_obs = self.player_value - 2\n", + "\n", + " # get the value of the dealer's upcard; this value is what the agent sees. Subtract by 1 to fit the possible observation range of 1 to 10.\n", + " upcard_value_obs = dealer_eval([self.dealer_upcard]) - 1\n", + "\n", + " # the state is represented as a player hand-value + dealer upcard pair.\n", + " obs = np.array([player_value_obs, upcard_value_obs])\n", + " return obs\n", + "\n", + " def render(self, mode='human', close=False):\n", + " # convert the player hand into a format that is easy to read and understand.\n", + " hand_list = []\n", + " for card in self.player_hand:\n", + " hand_list.append(card.rank)\n", + "\n", + " # recalculate the value of the dealer upcard.\n", + " upcard_value = dealer_eval([self.dealer_upcard])\n", + "\n", + " print(f'Balance: {self.balance}')\n", + " print(f'Player Hand: {hand_list}')\n", + " print(f'Dealer Upcard: {upcard_value}')\n", + " print(f'Done: {self.done}')" + ], + "metadata": { + "id": "am91D0LgqNnf" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Test the OpenAI Gym Blackjack Environment" + ], + "metadata": { + "id": "BHiry8g3FiEL" + } + }, + { + "cell_type": "code", + "source": [ + "env = BlackjackEnv()\n", + "\n", + "total_rewards = 0\n", + "NUM_EPISODES = 1000\n", + "\n", + "for _ in range(NUM_EPISODES):\n", + " env.reset()\n", + " episode_reward = 0\n", + " while env.done == False:\n", + " action = env.action_space.sample()\n", + " new_state, reward, done, desc = env.step(action)\n", + " episode_reward += reward\n", + " total_rewards += episode_reward\n", + "\n", + "avg_reward = total_rewards / NUM_EPISODES\n", + "print(f'Average Reward: {avg_reward}')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ttzs7UQb2qCP", + "outputId": "94c0576f-abdf-49ff-eead-38312653e263" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Average Reward: -32.2\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# A function to map a state from the Blackjack environment to the proper index i Q and prob tables" + ], + "metadata": { + "id": "f3Oo6Kz4FldW" + } + }, + { + "cell_type": "code", + "source": [ + "'''\n", + "Given a state, derive the corresponding index in the Q-table. This state is a player hand value and dealer upcard pair, so the hashing formula must be used to allocate the indices of the Q-table properly.\n", + "'''\n", + "def get_Q_state_index(state):\n", + " '''\n", + " The player value is already subtracted by 1 in the env when it returns the state. Subtract by 1 again to fir with the array indexing that starts at 0.\n", + " '''\n", + " initial_player_value = state[0] - 1\n", + " '''\n", + " The upcard value is already subtracted by 1 in the env when it returns the state. dealer_upcard will be subtracted by 1 to fit with the array indexing that starts at 0.\n", + " '''\n", + " dealer_upcard = state[1] - 1\n", + " return(env.observation_space[1].n * (initial_player_value)) + (dealer_upcard)" + ], + "metadata": { + "id": "etl-JD8lHlJB" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Functions to get and update the probability of taking the best action" + ], + "metadata": { + "id": "TjRTbwhdFo5x" + } + }, + { + "cell_type": "code", + "source": [ + "# for a given state\n", + "def get_prob_of_best_action(env, state, Q, prob):\n", + " # Use the mappig function to figure out which index of Q corresponds to the player hand value and dealer upcard value that defines each state.\n", + " Q_state_index = get_Q_state_index(state)\n", + "\n", + " # Use this index in the Q 2-D array to get a 2-element array that yield the current Q-values for hitting (index 0) and standing (index 1) in this state. Use np.argmax() function to find the index of the action that yields the maximum rewards, i.e., the best action we are looking for.\n", + " best_action = np.argmax(Q[Q_state_index])\n", + "\n", + " # Retrieve the probability of the best action using the state/action pair as indices of the 'prob' array, which stores the probability of taking an action (hit or stand) for a given state/action pair.\n", + " return prob[Q_state_index][best_action]\n", + "\n", + "def update_prob_of_best_action(env, state, Q, prob, epsilon):\n", + " Q_state_index = get_Q_state_index(state)\n", + "\n", + " best_action = np.argmax(Q[Q_state_index])\n", + "\n", + " '''\n", + " Slightly alter the probability of this best action being taken by using epsilon. Epsilon starts at 1.0, and slowly decays over time. Therefore, as per the equation below, the AI agent will use the probability listed for the best action in the 'prob' table during the beginning of the algorithm. As time goes on, the likelihood that the best action is taken is increased from what is listed in the 'prob' table.\n", + " This allows for the exploration of other moves in the beginning of the algorithm, but exploitation later for a greater reward.\n", + " '''\n", + " prob[Q_state_index][best_action] = min(1, prob[Q_state_index][best_action] + 1 - epsilon)\n", + "\n", + " other_action = 1 if best_action == 0 else 0\n", + " prob[Q_state_index][other_action] = 1 - prob[Q_state_index][best_action]\n", + "\n", + " return prob" + ], + "metadata": { + "id": "qazyxoCbYLhz" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# play_game() function" + ], + "metadata": { + "id": "D89gCgidFruP" + } + }, + { + "cell_type": "code", + "source": [ + "'''\n", + "Simulates one round of Blackjack.\n", + "\n", + "First, use env.reset() to set up a new round of Blackjack.\n", + "\n", + "If the player is already dealt a blackjack by luck, then this episode is just scrapped. There is nothing the AI agent can learn from this episode since it did not have to make any decisions.\n", + "\n", + "Otherwise, the AI agent finds the best action available given the current state of the game and its knowledge so far. This knowledge is stored in the Q-table defined in the run_mc() function. Then, the AI agent retrieves the probability that it should take this action from the prob table. The AI agent applies this probability and chooses its action given this state. The action is sent to the environment, and a reward is returned. This state-action-reward sequence that just occured is stored in episode variable as a tuple. This process is repeated until the current episode is over. Each episode will roughly rield 1-3 tuples in episode variable since Blackjack rounds are usually resolved after 1-3 decisions by the player.\n", + "\n", + "The Q-values in the Q-table associated with each state-action pair that was seen in this episode will be updated after this episode based on the state-action-reward tuples returned by this function. Then, the corresponding probabilities in prob variable are also modified to reflect this change in Q-values.\n", + "'''\n", + "\n", + "def play_game(env, Q, prob):\n", + " '''\n", + " Can contain numerous state->action->reward tuples bacause a round of Blackjack is not always resolved in one turn. However, there will be no state that has a player hand value that exceeds 20, since only initial states BEFORE actions are made are used when storing state->action->reward tuples.\n", + " '''\n", + " episode = []\n", + " state = env.reset()\n", + " while env.done == False:\n", + " if state[0] == 19:\n", + " # Player was dealt blackjack right from the beginning; player_value already subtracted by 2 to get state[0]\n", + " # No analysis done for this useless episode\n", + " next_state, reward, env.done, info = env.step(1)\n", + " else:\n", + " # Get the index in Q that corresponds to the current state\n", + " Q_state_index = get_Q_state_index(state)\n", + "\n", + " # Use the index to get the possible actions, and use np.argmax() to get the index of the action that has the highest current Q value. Index 0 is hit, 1 is stand.\n", + " best_action = np.argmax(Q[Q_state_index])\n", + "\n", + " # Go to the prob table to retrieve the probability of this action. This uses the same Q_state_index used for finding the state index of the Q-table.\n", + " prob_of_best_action = get_prob_of_best_action(env, state, Q, prob)\n", + "\n", + " action_to_take = None\n", + "\n", + " if random.uniform(0, 1) < prob_of_best_action:\n", + " # Take the best action\n", + " action_to_take = best_action\n", + " else:\n", + " # Take the other action\n", + " action_to_take = 1 if best_action == 0 else 0\n", + "\n", + " # The action is performed by the agent, and the next state, rewards and done information is returned.\n", + " next_state, reward, env.done, info = env.step(action_to_take)\n", + "\n", + " # Log the state->action->reward sequence\n", + " episode.append((state, action_to_take, reward))\n", + "\n", + " # Update the state for the next decision made by the agent.\n", + " state = next_state\n", + "\n", + " return episode" + ], + "metadata": { + "id": "HmFb1gcWFFcm" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# update_Q() function" + ], + "metadata": { + "id": "MoKz3QUBFuZl" + } + }, + { + "cell_type": "code", + "source": [ + "\n", + "'''\n", + "This function iterates through the state-action-reward tuples in episode and updates the Q-values of the corresponding state-action pairs in Q.\n", + "'''\n", + "\n", + "def update_Q(env, episode, Q, alpha, gamma):\n", + " '''\n", + " This is where the algorithm hinges on being first visit or every visit. First visit will be used because if you want first-visit, you need to use the cumulative reward of the entire episode when updating a Q-value for ALL of the state/action pairs in the episode, even the first state/action pair. In this algorithm, an episode is a round of Blackjack. Although the bulk of the reward may come from the 2nd or 3rd decision, deciding to hit on the 1st decision is what enabled the future situations to even occur, so it is important to include the entire cumulative reward. We can reduce the impact of the rewards of the future decisions by lowering gamma, which will lower the G value for our early state/action pair in which we hit and did not get any immediate rewards. This will make our agent consider future rewards, and not just look at each state in isolation despite having hit previously.\n", + " If you want Every-Visit MC, do not use the cumulative rewards when updating Q-values and just use the immediate reward in this episode for each state/action pair.\n", + " '''\n", + " step = 0\n", + " for state, action, reward in episode:\n", + " # calculate the cumulative reward of taking this action in this state.\n", + " # Start from the immediate rewards, and use all the rewards from the subsequent states.\n", + " # Do not use rewards from previous states\n", + "\n", + " total_reward = 0\n", + " gamma_exp = 0\n", + " for curr_step in range(step, len(episode)):\n", + " curr_reward = episode[curr_step][2]\n", + " total_reward += (gamma ** gamma_exp) * curr_reward\n", + " gamma_exp += 1\n", + "\n", + " # Update the Q-value\n", + " Q_state_index = get_Q_state_index(state)\n", + " curr_Q_value = Q[Q_state_index][action]\n", + " Q[Q_state_index][action] = curr_Q_value + alpha * (total_reward - curr_Q_value)\n", + " # update step to start further down the episode the next time.\n", + " step += 1\n", + " return Q" + ], + "metadata": { + "id": "R08jdGkVbQD0" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# update_prob() function" + ], + "metadata": { + "id": "BcUOuFW2Fwm4" + } + }, + { + "cell_type": "code", + "source": [ + "\n", + "def update_prob(env, episode, Q, prob, epsilon):\n", + " for state, action, reward in episode:\n", + " '''\n", + " Update the probabilities of the actions that can be taken given the current state. The goal is that the new update in Q has changed what the best action is, and epsilon will be used to create a small increase in the probability that the new, better action is chosen.\n", + " '''\n", + " prob = update_prob_of_best_action(env, state, Q, prob, epsilon)\n", + " return prob" + ], + "metadata": { + "id": "4UA7u4nWdRFL" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# First-Visit Monte Carlo Algorithm" + ], + "metadata": { + "id": "rR5AeirSFy5h" + } + }, + { + "cell_type": "code", + "source": [ + "# run_mc() to run the First Visit Monte Carlo Algorithm\n", + "'''\n", + "This function initializes the key variables for the algorithm and runs the learning algorithm for the AI agent for num_episodes episodes. An episode is a simulation of one game of Blackjack using the OpenAI Gym environment defined above.\n", + "Q, or the Q-table, is a 2D list in which the rows are the different possible states and the columns are the different possible actions for each state. The values stored in each cell refer to the value, or Q-value, of selecting some action (the column index) given some state (the row index). These Q-values start at 0 for every state-action pair, and are updated by the update_Q() function after each episode to reflect the rewards received in an episode where the state-action pair occured. All Q-values initially start at 0.\n", + "\n", + "prob, or the prob table, has the same structure as Q, but the cell values refer to the probability of selecting some action (the column index) given some state (the row index). These probabilites are updated by the update_prob() function after the Q-values are updated after each episode. All action probabilities start at 0.5 (or 50%).\n", + "\n", + "alpha defines the weight given to each new change in Q-value within the update-Q() function. A smaller alpha means that a new reward logged in an episode for some state-action pair has less impact on the current Q-value for that state-action pair. The converse is also true. Therefore, alpha essentially defines how fast the AI agent learns (learning rate).\n", + "\n", + "epsilon defines the weight given to each new change in the action probabilities within the update_prob() function. A larger epsilon reduces the precent amount by which an action probability is changed after some change in Q-values. The converse is also true. An epsilon of 1 means that no change will occur in the action probability, regardless of the magnitude of changes in Q-values.\n", + "\n", + "epsilon is decayed by the decay value after every episode. The lowest value epsilon can reach is epsilon_min.\n", + "\n", + "gamma is the rate used to discount future rewards yielded by a certain state-action pair in the episode. Since a round (or episode) of Blackjack can have more than 1 decision made, there can be numerous state-action pairs that are seen in one episode. However, only the final decision (or state-action pair) yields an immediate reward from the environment. All previous state-action pairs had no rewards. So, the final reward must be used to modify the Q-values of the earlier state-action pairs. Since the final reward was only partially made possible by the eariler state-action pairs, the final reward is discounted using gamma to account for this.\n", + "\n", + "Once these important variables are defines, this function runs the First-Visit Monte Carlo algorithm for the Blackjack environment.\n", + "\n", + "The function runs num_episodes episodes.\n", + "\n", + "In each episode, epsilon is first decayed by the decay rate.\n", + "\n", + "Then, the game of Blackjack is played out throught the play_game() function. This function returns a list of the state-action-reward tuples that occured during the game. These tuples represent the actions the AI agent had to take given some state, and the rewards that resulted.\n", + "\n", + "These tuples are used to modify the Q-values in Q through the update_Q() function.\n", + "\n", + "Then, the tuples are used to modify the probability distributions in prob of the two actions (hit or stand) for any states that were encountered in the episode. This is done through the update_prob() function.\n", + "\n", + "After this process is done for each episode, the function returns the modified Q and prob tables. These tables are an imprint of the learning that has taken place by the AI agent through the First-Visit Monte Carlo algorithm.\n", + "'''\n", + "\n", + "def run_mc(env, num_episodes):\n", + " '''\n", + " observation_space[0] is the 18 possible player values. (3-20)\n", + " observation_space[1] is the 10 possible dealer upcards. (2-11)\n", + " Combining these togenther yields all possible states.\n", + " Multiplying this with hit/stand yields all possible state-action pairs.\n", + " This is the Q map.\n", + " '''\n", + " Q = np.zeros([env.observation_space[0].n * env.observation_space[1].n, env.action_space.n], dtype=np.float16)\n", + "\n", + " # This map contains the probability distributions for each action (hit or stand) given a state. The state (combination of player hand value and dealer upcard value) index in this array yields a 2-element array. The 0th index of this 2-element array refers to the probability of hit and the 1st index is the probability of stand.\n", + " prob = np.zeros([env.observation_space[0].n * env.observation_space[1].n, env.action_space.n], dtype=np.float16) + 0.5\n", + "\n", + " # The learning rate; very small to avoid making quick, large changes in our policy.\n", + " alpha = 0.001\n", + "\n", + " epsilon = 1\n", + "\n", + " # The rate by which epsilon will decay over time.\n", + " # Since the probability that decides which option has the highest Q-value is 1 - epsilon + probability, this decay will make sure that the better option is taken more often in the long run.\n", + " # This allows the algorithm to explore in the early stages and exploit in the later stages.\n", + " decay = 0.9999\n", + "\n", + " # The lowest value that epsilon can go to. Although the decay seems slow, it grows exponentially, and this is magnified when running thousands of episodes.\n", + " epsilon_min = 0.9\n", + "\n", + " gamma = 0.8\n", + "\n", + " for _ in range(num_episodes):\n", + " episode = play_game(env, Q, prob)\n", + "\n", + " epsilon = max(epsilon * decay, epsilon_min)\n", + "\n", + " Q = update_Q(env, episode, Q, alpha, gamma)\n", + "\n", + " prob = update_prob(env, episode, Q, prob, epsilon)\n", + "\n", + " return Q, prob" + ], + "metadata": { + "id": "PnRTa7FZ3RnJ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Run First-Visit Monte Carlo Reinforcement Learning Algorithm" + ], + "metadata": { + "id": "jdfLT2OvF1sV" + } + }, + { + "cell_type": "code", + "source": [ + "env = BlackjackEnv()\n", + "\n", + "start_time = time.time()\n", + "new_Q, new_prob = run_mc(env, 1000000)\n", + "end_time = time.time()\n", + "\n", + "print(f'Total time for learning: {end_time - start_time} s.')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "UKx3psLBd3Yl", + "outputId": "2181c312-d684-4525-bb48-ed723de63333" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Total time for learning: 230.13713669776917 s.\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# best_policy() function" + ], + "metadata": { + "id": "qICTdADBF4Eu" + } + }, + { + "cell_type": "code", + "source": [ + "'''\n", + "This function takes the new Q-table returned and keeps only the action that yields the highest value for a given state. This is represented in three different ways: binary, string and colors.\n", + "0 is hit and 1 is stand in the binary representation of the results.\n", + "H is hit and S is stand in the string representation of the results.\n", + "Green is hit and Red is stand in the color representation of the results.\n", + "'''\n", + "def best_policy(Q):\n", + " best_policy_binary = []\n", + " best_policy_string = []\n", + " best_policy_colors = []\n", + " for i in range(len(Q)):\n", + " best_policy_binary.append(np.argmax(Q[i]))\n", + " best_policy_string.append('H' if np.argmax(Q[i]) == 0 else 'S')\n", + " best_policy_colors.append('g' if np.argmax(Q[i]) == 0 else 'r')\n", + "\n", + " return best_policy_binary, best_policy_string, best_policy_colors" + ], + "metadata": { + "id": "MNbxnlFKeRgQ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Create DataFrame with Player Value as Rows and Dealer Upcard as Columns" + ], + "metadata": { + "id": "89PKdgPiF6Qx" + } + }, + { + "cell_type": "code", + "source": [ + "new_Q_binary, new_Q_string, new_Q_colors = best_policy(new_Q)\n", + "\n", + "df = pd.DataFrame(columns = range(2, 12))\n", + "\n", + "color_df = pd.DataFrame(columns = range(2, 12))\n", + "\n", + "for s in range(3, 21):\n", + " # possible player values in the range 3 to 20\n", + " start = env.observation_space[1].n * (s - 3)\n", + " end = start + 10\n", + " df.loc[s] = (new_Q_string[start:end])\n", + " color_df.loc[s] = (new_Q_colors[start:end])" + ], + "metadata": { + "id": "lZIV2WxWfR9z" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Create Chart Graphic for the Results" + ], + "metadata": { + "id": "Lh14aJ8jF8s-" + } + }, + { + "cell_type": "code", + "source": [ + "fig, ax = plt.subplots()\n", + "\n", + "# hide axes\n", + "fig.patch.set_visible(False)\n", + "ax.set_axis_off()\n", + "ax.axis('tight')\n", + "\n", + "ax.table(cellText=df.values, cellColours=color_df.values, cellLoc='center', rowLabels=df.index, colLabels=df.columns, loc='center')\n", + "\n", + "fig.tight_layout()\n", + "\n", + "plt.show()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 487 + }, + "id": "wR8JVHEef5Cf", + "outputId": "02c77629-9a81-4150-9561-c0255ad152ea" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAnYAAAHWCAYAAAD6oMSKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABM9UlEQVR4nO3de3xU9Z3/8dcM4VaI2HALUw0GbWGgQklVjNoCFcEsjSgsFhdtaNS2FmuRrW3ht+MdEfZRpRcWqnWBNhUftZVIbZBV5GYkSrCp0mYh0FQqKggl0kQllDm/P06SJSGQ5OScOTPf834+HvOAnMye+bx2Evz2nJk5IcuyEBEREZHUF/Z7ABERERFxhxZ2IiIiIobQwk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImIILexEREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG0MJORERExBBa2ImIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIIbSwExERETGEFnYiIiIihtDCTkRERMQQWtiJiIiIGEILOxERERFDaGEnIiIiYggt7EREREQMoYWdiIiIiCG0sBMRERExhBZ2IiIiIobQwk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImIILexEREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG0MJORERExBBa2ImIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIIdL8fPBQKJQF9PNzhgTqBtT7PUSCqNVMajVTUFqD0glqNdUhy7L2tXUn3xZ2oVAoKxwO74rH4z38miGRwuEw8Xjc7zESQq1mUquZgtIalE5Qq6nC4fDHoVBoaFuLOz+P2PWLx+M9ioqKiEajPo7hvZKSEmKxGGo1i1rNpFbzBKUT1GqqyspKbrzxxh7YZzmTdmEHQDQaJScnx+8xPFVZWQmo1TRqNZNazROUTlCr6M0TIiIiIsYIxMJu4cKFXHzxxaSnpzNgwACuvfZadu3a5fdYnli2bBkjR47krLPO4qyzziI3N5d169b5PVZCPPzww4RCIebMmeP3KK679957CYVCzW7Dhg3zeyzP7N+/nxtvvJG+ffvSs2dPLrzwQsrLy/0ey3XnnXfeKc9rKBRi9uzZfo/muhMnThCLxcjOzqZnz56cf/75PPDAA1iW5fdorvvHP/7BnDlzGDx4MD179uSyyy5j+/btfo/VaVu2bCE/P59IJEIoFKK4uLjZ9y3L4u6772bQoEH07NmTCRMmUFVV5c+wndRW6zPPPMPEiRPp27cvoVCIiooKX+ZsTSAWdps3b2b27NmUlZXxwgsvcPz4cSZOnEhdXZ3fo7nunHPO4eGHH2bHjh2Ul5fzpS99iSlTpvCnP/3J79E8tX37dn72s58xcuRIv0fxzIgRI3j33Xebbi+//LLfI3niyJEjXH755XTt2pV169bx5z//mR/+8Id88pOf9Hs0123fvr3Zc/rCCy8AMH36dJ8nc9+iRYtYtmwZP/3pT6msrGTRokUsXryYn/zkJ36P5rpbbrmFF154gV/+8pe8+eabTJw4kQkTJrB//36/R+uUuro6Ro0axdKlS1v9/uLFi/nxj3/M8uXLefXVV+nVqxeTJk3i448/TvCknddWa11dHVdccQWLFi1K8GRt8/01donw/PPPN/t65cqVDBgwgB07dvDFL37Rp6m8kZ+f3+zrBQsWsGzZMsrKyhgxYoRPU3mrtraWmTNn8vjjj/Pggw/6PY5n0tLSyMzM9HsMzy1atIhzzz2XFStWNG3Lzs72cSLv9O/fv9nXDz/8MOeffz5jx471aSLvvPLKK0yZMoXJkycD9tHK1atX89prr/k8mbs++ugjfvvb3/Lss882/ffl3nvv5Xe/+x3Lli1L6X+j8vLyyMvLa/V7lmWxZMkS/uM//oMpU6YA8Itf/IKBAwdSXFzMjBkzEjlqp52pFeCmm24C4K9//WuCJmq/QByxa+mDDz4AICMjw+dJvHXixAmeeuop6urqyM3N9Xscz8yePZvJkyczYcIEv0fxVFVVFZFIhCFDhjBz5kz27Wvz44xS0tq1a7nooouYPn06AwYMYPTo0Tz++ON+j+W5+vp6ioqKKCwsJBQK+T2O6y677DI2bNjA7t27AfjjH//Iyy+/fMb/eKaif/7zn5w4cYIePZp/klfPnj2NPcoOUF1dzXvvvdfs3+E+ffowZswYtm3b5uNkwROII3Yni8fjzJkzh8svv5zPfvazfo/jiTfffJPc3Fw+/vhjevfuzZo1axg+fLjfY3niqaee4vXXXzfi9StnMmbMGFauXMnQoUN59913ue+++/jCF77Azp07SU9P93s8V/3lL39h2bJlzJ07l/nz57N9+3buuOMOunXrRkFBgd/jeaa4uJiamhpmzZrl9yie+MEPfsDRo0cZNmwYXbp04cSJEyxYsICZM2f6PZqr0tPTyc3N5YEHHiAajTJw4EBWr17Ntm3buOCCC/wezzPvvfceAAMHDmy2feDAgU3fk8QI3MJu9uzZ7Ny50+j/5TR06FAqKir44IMP+M1vfkNBQQGbN282bnH3t7/9je985zu88MILp/yvY9OcfFRj5MiRjBkzhsGDB/PrX/+am2++2cfJ3BePx7nooot46KGHABg9ejQ7d+5k+fLlRi/snnjiCfLy8ohEIn6P4olf//rX/OpXv+LJJ59kxIgRVFRUMGfOHCKRiHHP6y9/+UsKCwv51Kc+RZcuXcjJyeGGG25gx44dfo8mARCoU7G33347zz33HBs3buScc87xexzPdOvWjQsuuIDPf/7zLFy4kFGjRvGjH/3I77Fct2PHDg4ePEhOTg5paWmkpaWxefNmfvzjH5OWlsaJEyf8HtEzZ599Np/5zGfYs2eP36O4btCgQaf8j5BoNGrsqWeAt956ixdffJFbbrnF71E8c9ddd/GDH/yAGTNmcOGFF3LTTTdx5513snDhQr9Hc93555/P5s2bqa2t5W9/+xuvvfYax48fZ8iQIX6P5pnG1/8eOHCg2fYDBw4E4rXBySQQCzvLsrj99ttZs2YNL730krEvxD6deDzOsWPH/B7DdVdeeSVvvvkmFRUVTbeLLrqImTNnUlFRQZcuXfwe0TO1tbXs3buXQYMG+T2K6y6//PJTPo5o9+7dDB482KeJvLdixQoGDBjQ9MYCE3344YeEw83/k9OlSxejLwfVq1cvBg0axJEjR1i/fn3TmwpMlJ2dTWZmJhs2bGjadvToUV599VWjX+OdjAJxKnb27Nk8+eSTPPvss6Snpzed7+/Tpw89e/b0eTp3zZs3j7y8PLKysvjHP/7Bk08+yaZNm1i/fr3fo7kuPT39lNdJ9urVi759+xr3+snvfve75OfnM3jwYN555x3uueceunTpwg033OD3aK678847ueyyy3jooYe4/vrree2113jsscd47LHH/B7NE/F4nBUrVlBQUEBamrn/JOfn57NgwQKysrIYMWIEf/jDH3jkkUcoLCz0ezTXrV+/HsuyGDp0KHv27OGuu+5i2LBhfO1rX/N7tE6pra1tdpagurqaiooKMjIyyMrKYs6cOTz44IN8+tOfJjs7m1gsRiQS4dprr/VvaIfaav373//Ovn37eOeddwCa/sdoZmam70cozf1X5CTLli0DYNy4cc22r1ixwrgXKh88eJCvfvWrvPvuu/Tp04eRI0eyfv16rrrqKr9Hk054++23ueGGGzh8+DD9+/fniiuuoKys7JSPyzDBxRdfzJo1a5g3bx73338/2dnZLFmyxLgX2Td68cUX2bdvn5ELnJP95Cc/IRaL8a1vfYuDBw8SiUT4xje+wd133+33aK774IMPmDdvHm+//TYZGRlMmzaNBQsW0LVrV79H65Ty8nLGjx/f9PXcuXMBKCgoYOXKlXzve9+jrq6Or3/969TU1HDFFVfw/PPPp+RroNtqXbt2bbOFeuPHudxzzz3ce++9CZ21pUAs7Ez8ZPPTeeKJJ/wewVebNm3yewRPPPXUU36PkFBf/vKX+fKXv+z3GAkxceLEQPwblZ6ezpIlS1iyZInfo3ju+uuv5/rrr/d7DNeNGzfujD+roVCI+++/n/vvvz+BU3mjrdZZs2Yl7YGhQLzGTkRERCQItLATERERMYQWdiIiIiKG0MJORERExBBa2ImIiIgYot3vig2FQllAPxcfexhASUkJlZWVLu42+ZSWlgJqNY1azaRW8wSlE9Rqqurq6nbfN9Set9mHQqGscDi8Kx6Pu/thNCHA/Hf529RqJrWaSa3mCUonqNVUdmuuZVllZ7pbe4/Y9YvH4z2KioqIRqOdHw57hR2LxWAq7h4HTEZVwEbUahq1mkmt5glKJ6jVVIeAZwCob+uuHfqA4mg0Sk5OjrOhWmg6bNoPiLiyy+R1qOFPtZpFrWZSq3mC0glqFb15QkRERMQUjhZ2y5YtY+TIkZx11lmcddZZ5Obmsm7dOrdnc88aYHUr26uBe4GPEjqNt9Sq1lQWlE5QK6g11ak1KVsdLezOOeccHn74YXbs2EF5eTlf+tKXmDJlCn/605/cnk9ERERE2qlDr7FrlJ+f3+zrBQsWsGzZMsrKyhgxYoQrg4mIiIhIxzha2J3sxIkTPP3009TV1ZGbm+vGTCIiIiLigOOF3Ztvvklubi4ff/wxvXv3Zs2aNQwfPtzN2dy1G1jQYpupn32jVjMFpTUonaBWtaY+tSYdxwu7oUOHUlFRwQcffMBvfvMbCgoK2Lx5c/Iu7rKByS227afxc2HMola1prKgdIJa1Zr61Jp0rY4Xdt26deOCCy4A4POf/zzbt2/nRz/6ET/72c9cG85VXYG+LbYd9WOQBFCrmYLSGpROUKtaU59ak45rn2MXj8c5duyYW7sTERERkQ5ydMRu3rx55OXlkZWVxT/+8Q+efPJJNm3axPr1692eT0RERETaydHC7uDBg3z1q1/l3XffpU+fPowcOZL169dz1VVXuT2fiIiIiLSTo4XdE0884fYc3rruNNuzsT8x2iRqVWsqC0onqBXUmurUmpStulasiIiIiCG0sBMRERExhBZ2IiIiIobQwk5ERETEEB1680RJSQmVlZWuPHBpaan9lyrgkCu7TF77Gv5Uq1nUaia1miconaBWUx1p/11DltX2hc5CodCl4XB4Wzwe78RUpwoD7u4xiYVIymvKeUKtZgpQq/5tMlBQOiFQrUH6XW1ozbUsq+xM92vvEbv6eDxOUVER0Wi089NhH/2LxWIUAe7sMXmVADELmAr083kYr1UBG1GraQLWGt+I/m0yScB+foPUGpTf1UrgRvuv9W3dt0OnYqPRKDk5OY6GaqnxlG4UcGePyavp5HU/IOLjIInQeDhcrWYJYKv+bTJIAH9+g9QahN/VjtCbJ0REREQM4crC7uGHHyYUCjFnzhw3dueJ94HbgCygO5AJTAJK/RzKC2uA1a1sr8b+dOyPEjqNt9RqXmtQOk8SiH+bgvS8qtXMVlLnd9XRJcVOtn37dn72s58xcuRIN+bxzDTsE9OrgCHAAWADcNjPoUQk8PRvk0hqSJXf1U4t7Gpra5k5cyaPP/44Dz74oFszua4G2ApsAsY2bBsMXOLTPCIioH+bRFJFDanzu9qpU7GzZ89m8uTJTJgwwa15PNG74VYMHPN3FBGRJvq3SSQ1pNLvquMjdk899RSvv/4627dvd3MeT6QBK4FbgeXY754ZC8wAkvsEskO7gQUttpn6mUZqNU9QOgnYv00Bel7V6scg3kql31VHC7u//e1vfOc73+GFF16gR48ebs/kiWnAZOxDqWXAOmAx8HNgln9jeSMbO/Zk+4FnfJjFa2o1rzUonQ0C829TkJ5XtRrZmiq/q44Wdjt27ODgwYPNPtPuxIkTbNmyhZ/+9KccO3aMLl26uDakW3oAVzXcYsAtwD0k1xPiiq5A3xbbjvoxSAKo1TxB6TxJIP5tCtLzqlZjpcLvqqOF3ZVXXsmbb77ZbNvXvvY1hg0bxve///2kXNS1Zjj2+XIRkWSif5tEUkMy/q46Wtilp6fz2c9+ttm2Xr160bdv31O2J4PDwHSgEPtceDpQjn0IdYqPc4lIsOnfJpHUkEq/q53+HLtU0BsYAzwK7AWOA+divwhyvo9ziUiw6d8mkdSQSr+rri3sNm3a5NauXNcdWNhwM951p9mejf1J4CZRq3mtQelsEJh/m4L0vKrVyNZU+l3VtWJFREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG6NCbJ0pKSqisrHTlgUtLS+19Au7sMXmVNv6lCjjk4yCJsK/hT7WaJYCt+rfJIAH8+Q1SaxB+V6s7cN+QZbV9UbdQKHRpOBzeFo/HnU/VijDg7h6TWAgjr5/XGj2vhgpQq36GzaPn1ExBel4bWnMtyyo70/3ae8SuPh6PU1RURDQa7fx02Ef/YrEYRYA7e0xeJUDMAqYC/XwexmtVEN+InlfTVAEbCUyrfoYNo+fUTAF6XiuBG+2/1rd13w6dio1Go82uD9sZjad0o4A7e0xeTYeI+wERHwdJhIZD/3peDdN4SidArfoZNoieUzMF6HntCL15QkRERMQQjhZ29957L6FQqNlt2LBhbs/mqveB24As7E+QzgQmcdKLh02xBljdyvZq7E8C/yih03hOzytmPa9B6TxJIH6GA/a8BuI5BT2vJOfz6viSYiNGjODFF1/8vx2lJfdlZ6dhn5heBQwBDgAbsC/sK6lLz6ukOv0Mm0fPqZlS5Xl1vBpLS0sjMzPTzVk8UwNsBTYBYxu2DQYu8WkecUcNel4ltdWgn2HT1KDn1EQ1pM7z6vg1dlVVVUQiEYYMGcLMmTPZt29f2/9HPundcCsGjvk7irhIz6ukOv0Mm0fPqZlS6Xl1dMRuzJgxrFy5kqFDh/Luu+9y33338YUvfIGdO3eSnp7u9oydlgasBG4FlmO/e2YsMAMY6d9Y3tkNLGixzcDPNNLzipHPa2A6CdjPcECe10A9p6DnleR7Xh0t7PLy8pr+PnLkSMaMGcPgwYP59a9/zc033+zacG6aBkzGPpRaBqwDFgM/B2b5N5Y3srFjT7YfeMaHWTym5xXzntegdDYIzM9wgJ7XwDynoOeV5HteXfm4k7PPPpvPfOYz7Nmzx43deaYHcBUQA17BfiLu8XMgr3QF+ra4Jd+BVNfoeTVMUDpPEoif4YA9r4F4TkHPK8n3vLqysKutrWXv3r0MGjTIjd0lzHCgzu8hxHV6XiXV6WfYPHpOzZSMz6ujU7Hf/e53yc/PZ/Dgwbzzzjvcc889dOnShRtuuMHt+VxxGJgOFGKfC08HyrEPoU7xcS7pHD2vkur0M2wePadmSqXn1dHC7u233+aGG27g8OHD9O/fnyuuuIKysjL69+/v9nyu6A2MAR4F9gLHgXOxXwQ538e5pHP0vEqq08+wefScmimVnldHC7unnnrK7Tk81R1Y2HAz3nWn2Z6N/UngBtHzinnPa1A6GwTmZzhAz2tgnlPQ85qkdK1YEREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIITr05omSkhIqKytdeeDS0lJ7n4A7e0xepY1/qQIO+ThIIjRcMljPq2EaLwUdoFb9DBtEz6mZAvS8VnfgviHLavuibqFQ6NJwOLwtHo87n6oVYcDdPSavILUSwshrBbZKrWZSq3mC0glqNZXdmmtZVtmZ7tbeI3b18XicoqIiotFo54fDPvoXi8UoAtzZY/Iqwb78SGBaLWAq0M/nYbxWBWxEraZRq3mC0glqNdUhGq+/W9/WXTt0KjYajZKTk+NsqBYaT+lGAXf2mLwaDxEHqZV+QMTHQRKh8TSHWs2iVvMEpRPUKnrzhIiIiIgptLATERERMYTjhd3+/fu58cYb6du3Lz179uTCCy+kvLzczdlc9T5wG5CFfWmQTGASJ72DyCCBaV0DrG5lezX25Ww+Sug03gpKa1A6Qa2g1lSn1qRsdXSt2CNHjnD55Zczfvx41q1bR//+/amqquKTn/yk2/O5Zhr2Kw5XAUOAA8AG4LCfQ3kkSK0iIiLyfxwt7BYtWsS5557LihUrmrZlZ2e7NpTbaoCtwCZgbMO2wcAlPs3jpRqC0yoiIiLNOToVu3btWi666CKmT5/OgAEDGD16NI8//rjbs7mmd8OtGDjm7yieC1KriIiINOfoiN1f/vIXli1bxty5c5k/fz7bt2/njjvuoFu3bhQUFLg9Y6elASuBW4Hl2B85MhaYAYz0byxPBKkVgN3AghbbTP2wyqC0BqUT1KrW1KfWpONoYRePx7nooot46KGHABg9ejQ7d+5k+fLlSbmwA/t1Z5OxT1OWAeuAxcDPgVn+jeWJILWSjR17sv00fpCjWYLSGpROUKtaU59ak67V0cJu0KBBDB8+vNm2aDTKb3/7W1eG8koP4KqGWwy4BbgHAxc7BKi1K9C3xbajfgySAEFpDUonqFWtqU+tScfRa+wuv/xydu3a1Wzb7t27GTx4sCtDJcpwoM7vIRIkSK0iIiJB5eiI3Z133slll13GQw89xPXXX89rr73GY489xmOPPeb2fK44DEwHCrFfZ5YOlGOfnpzi41xeCFKriIiINOdoYXfxxRezZs0a5s2bx/333092djZLlixh5syZbs/nit7AGOBRYC9wHDgX+w0G832cywtBahUREZHmHC3sAL785S/z5S9/2c1ZPNMdWNhwM12QWrnuNNuzsT8J3CRBaQ1KJ6gV1Jrq1JqUrbpWrIiIiIghtLATERERMYQWdiIiIiKG0MJORERExBAdevNESUkJlZWVrjxwaWmpvU/AnT0mr9KGP4PUShVwyMdBEmFfw59qNYtazROUTlCrqY60/64hy2r7QmehUOjScDi8LR6Pd2Kq1nZMUl5nzRNqNZNazRSg1jDg8r/sySlAz6laDWW35lqWVXamu7X3iF19PB6nqKiIaDTa+eGwj/7FYjGYCvRzZZfJqwrYiFpNo1YzBaw1vhGKAHf+ZU9OJUDMIjDPaZB+fgPTeojGa9LWt3XXDp2KjUaj5OTkOBuqhaZTuv2AiCu7TF6Nh4jVaha1mimArVHAnX/Zk1PTS2AC9JyqNbj05gkRERERQzha2J133nmEQqFTbrNnz3Z7PnesAVa3sr0a+xOjP0roNN5Sq1pTWVA6IVitwPvAbUAW9hVyMoFJnPSGK1ME6XlVa1K2Orqk2Pbt2zlx4kTT1zt37uSqq65i+vTprg0mIiLmmIb94qBVwBDgALABOOznUCIGcrSw69+/f7OvH374Yc4//3zGjh3rylAiImKOGmArsAlo/K/EYOASn+YRMVmnX2NXX19PUVERhYWFhEIhN2YSERGD9G64FQPH/B1FxHiOjtidrLi4mJqaGmbNmuXCOB7aDSxosc3Uz75Rq5mC0hqUTghMaxqwErgVWI79DtyxwAxgpH9jeScgzyug1iRs7fTC7oknniAvL49IJMnfa5wNTG6xbT+NnwtjFrWqNZUFpRMC1ToNO3UrUAasAxYDPwdm+TeWNwL0vKqVpGvt1MLurbfe4sUXX+SZZ5KsqjVdgb4tth31Y5AEUKuZgtIalE4IVivQA7iq4RYDbgHuwcCFXZCeV7UmnU69xm7FihUMGDCAyZNbLmFFRETObDhQ5/cQIoZxfMQuHo+zYsUKCgoKSEvr9BldEREx1GFgOlCI/Zq6dKAc+1TsFB/nEjGR4xXZiy++yL59+ygsLHRzHhERMUxvYAzwKLAXOA6ci/1mivk+ziViIscLu4kTJ2JZSfh2kNZcd5rt2difGG0Stao1lQWlEwLV2h1Y2HAzXoCeV7WSlK26VqyIiIiIIbSwExERETGEFnYiIiIihtDCTkRERMQQHXrzRElJCZWVla48cGlpqf2XKuCQK7tMXvsa/lSrWdRqpgC2lgDu/MuenEob/xKg51SthjnS/ruG2vPO1lAodGk4HN4Wj8c7MVVrOyYpr7PmCbWaSa1mUqt5gtIJajWV3ZprWVbZme7W3iN29fF4nKKiIqLRaOeHwz76F4vFYCrQz5VdJq8qYCNqNY1azaRW8wSlE9RqqkM0XpO2vq27duhUbDQaJScnx9lQLTSd0u0HRFzZZfJqPESsVrOo1UxqNU9QOkGtojdPiIiIiJjC0cLuxIkTxGIxsrOz6dmzJ+effz4PPPBA8l6JYg2wupXt1difGP1RQqfxllrVmsqC0glqBbWmOrUmZaujS4otWrSIZcuWsWrVKkaMGEF5eTlf+9rX6NOnD3fccYfbM4qIiIhIOzha2L3yyitMmTKFyZMnA3DeeeexevVqXnvtNVeHExEREZH2c3Qq9rLLLmPDhg3s3r0bgD/+8Y+8/PLL5OXluTqciIiIiLSfoyN2P/jBDzh69CjDhg2jS5cunDhxggULFjBz5ky353PPbmBBi21J+pLATlOrmYLSGpROUKtaU59ak46jhd2vf/1rfvWrX/Hkk08yYsQIKioqmDNnDpFIhIKCArdndEc2MLnFtv00fi6MWdSq1lQWlE5Qq1pTn1qTrtXRwu6uu+7iBz/4ATNmzADgwgsv5K233mLhwoXJu7DrCvRtse2oH4MkgFrNFJTWoHSCWtWa+tSadBy9xu7DDz8kHG7+f9qlSxdcv+SYiIiIiLSboyN2+fn5LFiwgKysLEaMGMEf/vAHHnnkEQoLC92eT0RERETaydHC7ic/+QmxWIxvfetbHDx4kEgkwje+8Q3uvvtut+cTERERkXZytLBLT09nyZIlLFmyxOVxPHLdabZnY39itEnUqtZUFpROUCuoNdWpNSlbda1YEREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIITr05omSkhIqKytdeeDS0lL7L1XAIVd2mbz2NfypVrOo1UxqNU9QOkGtpjrS/ruGLKvtC52FQqFLw+HwNtc/gDhEUl5nzRNqNZNazaRW8wSlEwLVGgaCcmmEhtZcy7LKznS/9h6xq4/H4xQVFRGNRjs/HfbRv1gsBlOBfq7sMnlVARtRq2nUaia1miconRC41vhGKALcWZkkr0rgRvuv9W3dt0OnYqPRKDk5OY6GaqnplG4/IOLKLpNX4yFitZpFrWZSq3mC0gmBbI0C7qxMzKA3T4iIiIgYwvHC7h//+Adz5sxh8ODB9OzZk8suu4zt27e7OZt71gCrW9lejf2J0R8ldBpvqVWtqSwonaBWUGuqC1Ir8D5wG5AFdAcygUlAqZ9DtcLRJcUAbrnlFnbu3Mkvf/lLIpEIRUVFTJgwgT//+c986lOfcnNGEREREV9Nw36B2ypgCHAA2AAc9nOoVjg6YvfRRx/x29/+lsWLF/PFL36RCy64gHvvvZcLLriAZcuWuT2jiIiIiG9qgK3AImA8MBi4BJgHXOPfWK1ytLD75z//yYkTJ+jRo0ez7T179uTll192ZTARERGRZNC74VYMHPN3lDY5OhWbnp5Obm4uDzzwANFolIEDB7J69Wq2bdvGBRdc4PaM7tgNLGixzdTP+VGrmYLSGpROUKtaU19AWtOAlcCtwHLsd+GOBWYAI/0bq1WOX2P3y1/+ksLCQj71qU/RpUsXcnJyuOGGG9ixY4eb87knG5jcYtt+4BkfZvGaWtWayoLSCWpVa+oLUOs07NStQBmwDlgM/ByY5d9Yp3C8sDv//PPZvHkzdXV1HD16lEGDBvGVr3yFIUOGuDmfe7oCfVtsO+rHIAmgVjMFpTUonaBWtaa+ILUCPYCrGm4x4BbgHpJrYdfpz7Hr1asXgwYN4siRI6xfv54pU6a4MZeIiIhIUhsO1Pk9RAuOj9itX78ey7IYOnQoe/bs4a677mLYsGF87Wtfc3M+EREREV8dBqYDhdivqUsHyrFPxSbb4SzHC7sPPviAefPm8fbbb5ORkcG0adNYsGABXbt2dXM+EREREV/1BsYAjwJ7gePAudhvppjv41ytcbywu/7667n++uvdnMU7151mezb2p2ObRK1qTWVB6QS1glpTXYBauwMLG27JTteKFRERETGEFnYiIiIihtDCTkRERMQQWtiJiIiIGEILOxERERFDdOhdsSUlJVRWVrrywKWlpfZfqoBDruwyee1r+FOtZlGrmdRqnqB0QiBbSwB3VibJq7oD9w1ZVttX6w2FQpeGw+Ft8Xjc+VSt7hgjLxbcKrWaSa1mUqt5gtIJajWV3ZprWVbZme7W3iN29fF4nKKiIqLRaOeHwz76F4vFYCrQz5VdJq8qYCNqNY1azaRW8wSlE9RqqkPAMwDUt3XXDp2KjUaj5OTkOBuqhaZTuv2AiCu7TF6Nh8PVaha1mkmt5glKJ6hV9OYJEREREVM4Xtht2bKF/Px8IpEIoVCI4uLiZt+3LIu7776bQYMG0bNnTyZMmEBVVVVn53VmDbC6le3V2Jc9+Sih03hLrWpNZUHpBLWCWlOdWpOy1fHCrq6ujlGjRrF06dJWv7948WJ+/OMfs3z5cl599VV69erFpEmT+Pjjjx0PKyIiIiKn16HX2J0sLy+PvLy8Vr9nWRZLlizhP/7jP5gyZQoAv/jFLxg4cCDFxcXMmDHD6cOKiIiIyGl48hq76upq3nvvPSZMmNC0rU+fPowZM4Zt27Z58ZAiIiIigef4iN2ZvPfeewAMHDiw2faBAwc2fS/hdgMLWmwz9bNv1GqmoLQGpRPUqtbUp9ak48nCLillA5NbbNtP4+fCmEWtak1lQekEtao19ak16Vo9WdhlZmYCcODAAQYNGtS0/cCBA3zuc5/z4iHb1hXo22LbUT8GSQC1mikorUHpBLWqNfWpNel48hq77OxsMjMz2bBhQ9O2o0eP8uqrr5Kbm+vFQ4qIiIgEnuMjdrW1tezZs6fp6+rqaioqKsjIyCArK4s5c+bw4IMP8ulPf5rs7GxisRiRSIRrr73WjblFREREpAXHC7vy8nLGjx/f9PXcuXMBKCgoYOXKlXzve9+jrq6Or3/969TU1HDFFVfw/PPP06NHj85PLSIiIiKncLywGzduHJZ1+reDhEIh7r//fu6//36nD+Ge606zPRv7E6NNola1prKgdIJaQa2pTq1J2aprxYqIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAzRoTdPlJSUUFlZ6coDl5aW2n+pAg65ssvkta/hT7WaRa1mUqt5gtIJajXVkfbfNXSmd7Y23SkUujQcDm+Lx+OdmKq1HZOU11nzhFrNpFYzqdU8QekEtZrKbs21LKvsTHdr7xG7+ng8TlFREdFotPPDYR/9i8ViMBXo58ouk1cVsBG1mkatZlKreYLSCWo11SEar0lb39ZdO3QqNhqNkpOT42yoFppO6fYDIq7sMnk1HiJWq1nUaia1miconaBW0ZsnREREREzheGG3ZcsW8vPziUQihEIhiouLm33/mWeeYeLEifTt25dQKERFRUUnR+2ENcDqVrZXY39i9EcJncZbalVrKgtKJ6gV1Jrq1JqUrY4XdnV1dYwaNYqlS5ee9vtXXHEFixYtcjyciIiIiLSf42vF5uXlkZeXd9rv33TTTQD89a9/dfoQIiIiItIBeo2diIiIiCEcH7FLObuBBS22mfrZN2o1U1Bag9IJalVr6lNr0gnOwi4bmNxi234aPxfGLGpVayoLSieoVa2pT61J1xqchV1XoG+LbUf9GCQB1GqmoLQGpRPUqtbUp9ako9fYiYiIiBjC8RG72tpa9uzZ0/R1dXU1FRUVZGRkkJWVxd///nf27dvHO++8A8CuXbsAyMzMJDMzs5Nji4iIiEhLjo/YlZeXM3r0aEaPHg3A3LlzGT16NHfffTcAa9euZfTo0UyebJ+QnjFjBqNHj2b58uUujC0iIiIiLTk+Yjdu3Dgs6/RvB5k1axazZs1yunt3XXea7dnYnxhtErWqNZUFpRPUCmpNdWpNyla9xk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImKIDr15oqSkhMrKSlceuLS01P5LFXDIlV0mr30Nf6rVLGo1k1rNE5ROUKupjrT/rqEzvbO16U6h0KXhcHhbPB7vxFSt7ZikvM6aJwLUGgZc/klJWmo1VIB+XwPTGpROUKup7NZcy7LKznS39h6xq4/H4xQVFRGNRjs/HPbRv1gsBlOBfq7sMnlVARsJTGt8IxQB7vykJK8SIIZaTVMCxCwC8/saiH+bgtIJajXVIRqvSVvf1l07dCo2Go2Sk5PjbKgWmk7p9gMiruwyeTUeIg5QaxRw5ycleTW+KEGtZml6sUmAfl+Nbw1KJ6hV9OYJEREREVM4Xtht2bKF/Px8IpEIoVCI4uLipu8dP36c73//+1x44YX06tWLSCTCV7/61abrxibcGmB1K9ursT8x+qOETuOtILUC7wO3AVlAdyATmASU+jmUR9RqWGuQflfVqtZUl0Ktjhd2dXV1jBo1iqVLl57yvQ8//JDXX3+dWCzG66+/zjPPPMOuXbu45pprOjWsSEvTgD8Aq4DdwFpgHHDYx5m8olYzW0VE3OT4WrF5eXnk5eW1+r0+ffrwwgsvNNv205/+lEsuuYR9+/aRlZXl9GFFmtQAW4FNwNiGbYOBS3yax0s1qNXEVhERtyXsNXYffPABoVCIs88+O1EPKYbr3XArBo75O4rn1CoiIu3h+IhdR3z88cd8//vf54YbbuCss85KxEOeajewoMU2Uz/7JiCtacBK4FZgOfa7NccCM4CR/o3lCbWa2RqU31VArWpNfSnS6vnC7vjx41x//fVYlsWyZcu8frjTywYmt9i2n8bPhTFLgFqnYaduBcqAdcBi4OfALP/G8oRaDWwN0O+qWlFrqkuRVk8Xdo2LurfeeouXXnrJv6N1AF2Bvi22HfVjkAQIUivQA7iq4RYDbgHuwbAFQAO1GtYapN9VtZpJrUnHs9fYNS7qqqqqePHFF+nbt+X/N0S8MRyo83uIBFGriIiczPERu9raWvbs2dP0dXV1NRUVFWRkZDBo0CD+9V//lddff53nnnuOEydO8N577wGQkZFBt27dOj+5BN5hYDpQiP3aq3SgHPuU3RQf5/KCWs1sFRFxm+OFXXl5OePHj2/6eu7cuQAUFBRw7733snbtWgA+97nPNfu/27hxI+PGjXP6sCJNegNjgEeBvcBx4FzsF93P93EuL6jVzFYREbc5XtiNGzcOyzr920HO9L2Eu+4027OxPzHaJAFq7Q4sbLiZTq0GCtDvqlpRa6pLoVZdK1ZERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImKIDr15oqSkhMrKSlceuLS01P5LFXDIlV0mr30NfwaotQRw5ycleTX8BKvVMI2tQfp9Nb41KJ2gVlMdaf9dQ+1592ooFLo0HA5vi8fjnZiqtR2TlNdZ80SAWsOAyz8pSUuthgrQ72tgWoPSCWo1ld2aa1lW2Znu1t4jdvXxeJyioiKi0Wjnh8M++heLxWAq0M+VXSavKmAjgWmNb4QiwJ2flORVgn2pK7WapQSIWQTm9zUQ/zYFpRPUaqpDNF6Ttr6tu3boVGw0GiUnJ8fZUC00ndLtB0Rc2WXyajxEHKDWKODOT0ryajwlqVazNJ1qDtDvq/GtQekEtYrePCEiIiJiCi3sRERERAzheGG3ZcsW8vPziUQihEIhiouLm33/3nvvZdiwYfTq1YtPfvKTTJgwgVdffbWz8zqzBljdyvZq7EuBfJTQabwVpFbgfeA2IAv7UlSZwCROemejQdRqWGuQflfVqtZUl0Ktjhd2dXV1jBo1iqVLl7b6/c985jP89Kc/5c033+Tll1/mvPPOY+LEibz//vuOhxVpaRrwB2AVsBtYC4wDDvs4k1fUamariIibOvTmiZPl5eWRl5d32u//27/9W7OvH3nkEZ544gneeOMNrrzySqcPK9KkBtgKbALGNmwbDFzi0zxeqkGtJraKiLgtIa+xq6+v57HHHqNPnz6MGjUqEQ8pAdC74VYMHPN3FM+pVURE2sPxEbv2eO6555gxYwYffvghgwYN4oUXXqBfP58+bGY3sKDFNlM/1DAgrWnASuBWYDn2x3CMBWYAI/0byxNqNbM1KL+rgFrVmvpSpNXThd348eOpqKjg0KFDPP7441x//fW8+uqrDBgwwMuHbV02MLnFtv00fuCfWQLUOg07dStQBqwDFgM/B2b5N5Yn1Gpga4B+V9WKWlNdirR6eiq2V69eXHDBBVx66aU88cQTpKWl8cQTT3j5kKfXFejb4pbuzyieC1Ir0AO4CvvKCK9g/4f/Hj8H8pBaDROk31W1mkmtSSehn2MXj8c5dkyvmhFvDQfq/B4iQdQqIiInc3wqtra2lj179jR9XV1dTUVFBRkZGfTt25cFCxZwzTXXMGjQIA4dOsTSpUvZv38/06dPd2VwkcPAdKAQ+7VX6UA59im7KT7O5QW1mtkqIuI2xwu78vJyxo8f3/T13LlzASgoKGD58uX87//+L6tWreLQoUP07duXiy++mK1btzJixIjOTy2C/c7JMcCjwF7gOHAu9ovu5/s4lxfUamariIjbHC/sxo0bh2Wd/u0gzzyTRK8mvO4027OxPzHaJAFq7Q4sbLiZTq0GCtDvqlpRa6pLoVZdK1ZERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImKIDr15oqSkhMrKSlceuLS01P5LFXDIlV0mr30NfwaotQRw5ycleTX8BKvVMI2tQfp9Nb41KJ2gVlMdaf9dQ2d6Z2vTnUKhS8Ph8LZ4PN6JqU4VBtzdY/JSq5nUaqgQSXkNSE8EpTUonaBWU9mtuZZllZ3pbu09Ylcfj8cpKioiGo12fjjso3+xWIwiwJ09Jq8S7MsiqdUsajVTCRCzgKlAP5+H8VoVsBHzW4PSCWo11SEar0lb39ZdO3QqNhqNkpOT42yoFhpP6UYBd/aYvBpPXanVLGo1U9Op5n5AxMdBEqHx9JXprUHpBLWK3jwhIiIiYgrHC7stW7aQn59PJBIhFApRXFx82vt+85vfJBQKsWTJEqcP12nvA7cBWdifbJ8JTOKkF0obRK1qTXWBaF0DrG5lezX2J9l/lNBpvKVWtaa6FGp1fEmxuro6Ro0aRWFhIVOnTj3t/dasWUNZWRmRiL/HSadhn5heBQwBDgAbsC84bhq1qjXVBalVRMRNjhd2eXl55OXlnfE++/fv59vf/jbr169n8uTJTh+q02qArcAmYGzDtsHAJT7N46Ua1KrW1FZDcFpFRNzm2Wvs4vE4N910E3fddRcjRozw6mHapXfDrRg45usk3lOrmdQqIiLt4fiIXVsWLVpEWload9xxh1cP0W5pwErgVmA59rv6xgIzgJH+jeUJtao11QWpld3AghbbTP1MLrWaSa1Jx5OF3Y4dO/jRj37E66+/TigU8uIhOmwaMBn7FE8ZsA5YDPwcmOXfWJ5Qq1pTXWBas7FDT7afxs+rMota1ZrqUqTVk1OxW7du5eDBg2RlZZGWlkZaWhpvvfUW//7v/855553nxUO2Sw/gKuwPWn0F+z8Q9/g2jbfUaia1GqYr0LfFLd3XibyjVjOpNel4srC76aabeOONN6ioqGi6RSIR7rrrLtavX+/FQzoyHKjze4gEUauZ1CoiIidzfCq2traWPXv2NH1dXV1NRUUFGRkZZGVl0bdv32b379q1K5mZmQwdOtT5tA4dBqYDhdiv0UkHyrFP7UxJ+DTeUqtaU12QWkVE3OZ4YVdeXs748eObvp47dy4ABQUFrFy5stODuak3MAZ4FNgLHAfOxX5x9nwf5/KCWtWa6oLUKiLiNscLu3HjxmFZ7X87yF//+lenD9Vp3YGFDTfTqdVMajXQdafZno39SfYmUataU10KtepasSIiIiKG0MJORERExBBa2ImIiIgYQgs7EREREUN06M0TJSUlVFZWuvLApaWl9j4Bd/aYvEob/lSrWdRqpsZWqoBDPg6SCPsa/jS9NSidoFZTHWn/XUPteWdrKBS6NBwOb4vH452Y6lRhwN09Ji+1mkmthgqRlNeA9ERQWoPSCWo1ld2aa1lW2Znu1t4jdvXxeJyioiKi0Wjnh8M++heLxSgC3Nlj8irBviySWs2iVjOVADELmAr083kYr1UBGzG/NSidoFZTHaLxmrT1bd21Q6dio9EoOTk5zoZqofGUbhRwZ4/Jq/HUlVrNolYzNZ1q7gdEfBwkERpPX5neGpROUKvozRMiIiIipnC8sNuyZQv5+flEIhFCoRDFxcXNvj9r1ixCoVCz29VXX93ZeR17H7gNyML+ZPtMYBInvVDaIGpVa6oLROsaYHUr26uxP8n+o4RO4y21qjXVpVCr40uK1dXVMWrUKAoLC5k6dWqr97n66qtZsWJF09fdu3d3+nCdNg37xPQqYAhwANiAfcFx06hVrakuSK0iIm5yvLDLy8sjLy/vjPfp3r07mZmZTh/CNTXAVmATMLZh22DgEp/m8VINalVraqshOK0iIm7z9DV2mzZtYsCAAQwdOpTbbruNw4f9+d/bvRtuxcAxXyZIHLWaSa0iItIejo/YteXqq69m6tSpZGdns3fvXubPn09eXh7btm2jS5cuXj1sq9KAlcCtwHLsd/WNBWYAIxM6iffUqtZUF6RWdgMLWmwz9TO51GomtSYdzxZ2M2bMaPr7hRdeyMiRIzn//PPZtGkTV155pVcPe1rTgMnYp3jKgHXAYuDnwKyET+Mttao11QWmNRs79GT7afy8KrOoVa2pLkVaE/ZxJ0OGDKFfv37s2bMnUQ95ih7AVdgftPoK9n8g7vFtGm+p1UxqNUxXoG+LW7qvE3lHrWZSa9JJ2MLu7bff5vDhwwwaNChRD9mm4UCd30MkiFrNpFYRETmZ41OxtbW1zY6+VVdXU1FRQUZGBhkZGdx3331MmzaNzMxM9u7dy/e+9z0uuOACJk2a5MrgHXEYmA4UYr9GJx0oxz61MyXh03hLrWpNdUFqFRFxm+OFXXl5OePHj2/6eu7cuQAUFBSwbNky3njjDVatWkVNTQ2RSISJEyfywAMP+PJZdr2BMcCjwF7gOHAu9ouz5yd8Gm+pVa2pLkitIiJuc7ywGzduHJZ1+reDrF+/3umuXdcdWNhwM51azaRWA113mu3Z2J9kbxK1qjXVpVCrrhUrIiIiYggt7EREREQMoYWdiIiIiCG0sBMRERExRIfePFFSUkJlZaUrD1xaWmrvE3Bnj8mrtOFPtZpFrWZqbKUKOOTjIImwr+FP01uD0glqNdWR9t81dKZ3tjbdKRS6NBwOb4vH452Y6lRhwN09Ji+1mkmthgqRlNeA9ERQWoPSCWo1ld2aa1lW2Znu1t4jdvXxeJyioiKi0Wjnh8M++heLxSgC3Nlj8irBviySWs2iVjOVADELmAr083kYr1UBGzG/NSidoFZTHaLxmrT1bd21Q6dio9EoOTk5zoZqofGUbhRwZ4/Jq/HUlVrNolYzNZ1q7gdEfBwkERpPX5neGpROUKvozRMiIiIipnC8sNuyZQv5+flEIhFCoRDFxcWn3KeyspJrrrmGPn360KtXLy6++GL27dt36s4S4H3gNiAL+5PtM4FJnPRCaYOoVa2pLhCta4DVrWyvxv4k+48SOo231KrWVJdCrY4vKVZXV8eoUaMoLCxk6tSpp3x/7969XHHFFdx8883cd999nHXWWfzpT3+iR48enRrYqWnYJ6ZXAUOAA8AG7AuOm0atak11QWoVEXGT44VdXl4eeXl5p/3+//t//49/+Zd/YfHixU3bzj//fKcP1yk1wFZgEzC2Ydtg4BJfpvFWDWpVa2qrITitIiJu8+Q1dvF4nN///vd85jOfYdKkSQwYMIAxY8a0ero2EXo33IqBY75MkDhqNZNaRUSkPRwfsTuTgwcPUltby8MPP8yDDz7IokWLeP7555k6dSobN25k7Nixbe/ERWnASuBWYDn2u/rGAjOAkQmdxHtqVWuqC1Iru4EFLbaZ+plcajWTWpOOJwu7xg8ynjJlCnfeeScAn/vc53jllVdYvnx5whd2YL9mZzL2KZ4yYB2wGPg5MCvh03hLrWpNdYFpzcYOPdl+Gj+vyixqVWuqS5FWT07F9uvXj7S0NIYPH95sezQa9e1dsQA9gKuwP2j1Fez/QNzj2zTeUquZ1GqYrkDfFrd0XyfyjlrNpNak48nCrlu3blx88cXs2rWr2fbdu3czePBgLx7SkeFAnd9DJIhazaRWERE5meNTsbW1tezZs6fp6+rqaioqKsjIyCArK4u77rqLr3zlK3zxi19k/PjxPP/88/zud79j06ZNbszdIYeB6UAh9mt00oFy7FM7UxI+jbfUqtZUF6RWERG3OV7YlZeXM378+Kav586dC0BBQQErV67kuuuuY/ny5SxcuJA77riDoUOH8tvf/pYrrrii81N3UG9gDPAosBc4DpyL/eLs+QmfxltqVWuqC1KriIjbHC/sxo0bh2Wd+e0ghYWFFBYWOn0I13QHFjbcTKdWM6nVQNedZns29ifZm0Stak11KdSqa8WKiIiIGEILOxERERFDaGEnIiIiYggt7EREREQMoYWdiIiIiCE69K7YkpISKisrXXng0tJSe5+AO3tMXqUNf6rVLGo1U2MrVcAhHwdJhMYLAZneGpROUKupjrT/rqG2PrIEIBQKXRoOh7c1XgPWLWHA3T0mL7WaSa1mUqt5gtIJwWolBLS9jDGD3ZprWVbZme7W3iN29fF4nKKiIqLRaOeHwz76F4vFKALc2WPyKsG+3qVazaJWM6nVPEHphAC2WsBUoJ/Pw3jtEPAMAPVt3bVDp2Kj0Sg5OTnOhmqh8ZRuFHBnj8mr8dSVWs2iVjOp1TxB6YRgttIPiPg4SJLRmydEREREDOF4Ybdlyxby8/OJRCKEQiGKi4ubfT8UCrV6+8///M/OzuzI+8BtQBb2JYsygUmc9EJpg6hVralOrea1BqUT1Gpk6xpgdSvbq7EvKfZRQqc5I8fXiq2rq2PUqFEUFhYyderUU77/7rvvNvt63bp13HzzzUybNs3pQ3bKNOwT06uAIcABYANw2JdpvKVWtaY6tZrXGpROUKupranC8cIuLy+PvLy8034/MzOz2dfPPvss48ePZ8iQIU4f0rEaYCuwCRjbsG0wcEnCJ/FeDWpVa2qrQa2mtdYQjE5Qq6mtqSQhr7E7cOAAv//977n55psT8XCn6N1wKwaO+TJB4qjVTGo1U1Bag9IJahX/JWRht2rVKtLT01s9ZZsIacBK7EPFZwOXA/OBN3yZxltqVWuqU6t5rUHpBLWa2grAbmBBi9uvfJ2oVQlZ2P33f/83M2fOpEePHol4uFZNA94B1gJXYx86zsH+oTSNWtWa6tRqXmtQOkGtpraSDXyzxe0aXydqlecLu61bt7Jr1y5uueUWrx+qTT2Aq7A/vPEVYBZwj58DeUitZlKrmYLSGpROUKuRrV2Bvi1u6b5O1CrPF3ZPPPEEn//85xk1apTXD9Vhw4E6v4dIELWaSa1mCkprUDpBrZI4jt8VW1tby549e5q+rq6upqKigoyMDLKysgA4evQoTz/9ND/84Q87P2knHAamA4XASOwFdjmwGJji41xeUKtaU51azWsNSieo1dTWVOJ4YVdeXs748eObvp47dy4ABQUFrFy5EoCnnnoKy7K44YYbOjdlJ/UGxgCPAnuB48C5wK3YL/Q0iVrVmurUal5rUDpBraa2phLHC7tx48ZhWdYZ7/P1r3+dr3/9604fwjXdgYUNN9Op1UxqNVNQWoPSCWo11nWn2Z6NfeWJJKJrxYqIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAzRoTdPlJSUUFlZ6coDl5aW2vsE3Nlj8ipt+FOtZlGrmdRqnqB0QjBbqQIO+ThIIhxp/11Dbb2zFSAUCl0aDoe3xePxTkx1qjDg7h6Tl1rNpFYzqdU8QekEtZqqoTXXsqyyM92vvUfs6uPxOEVFRUSj0c5Ph330LxaLUQS4s8fkVYJ9qRW1mkWtZlKreYLSCWo1VSVwo/3X+rbu26FTsdFolJycHEdDtdR4SjeKfcFgkzUeDlerWdRqJrWaJyidoFbRmydEREREjOF4Ybdlyxby8/OJRCKEQiGKi4ubfb+2tpbbb7+dc845h549ezJ8+HCWL1/e2Xkdex+4DcjC/rTsTGASJ7340iBqVWuqU6t5rUHpBLWq1V+OLylWV1fHqFGjKCwsZOrUqad8f+7cubz00ksUFRVx3nnn8T//8z9861vfIhKJcM0113RqaCemYZ+YXgUMAQ4AG7AvYmwatao11anVvNagdIJa1eovxwu7vLw88vLyTvv9V155hYKCAsaNGwfY14392c9+xmuvvZbwhV0NsBXYBIxt2DYYuCShUyRGDWpVa2qrQa2mtdYQjE5Qq1r959lr7C677DLWrl3L/v37sSyLjRs3snv3biZOnOjVQ55W74ZbMXAs4Y+eWGo1k1rNFJTWoHSCWk2VSq2eLex+8pOfMHz4cM455xy6devG1VdfzdKlS/niF7/o1UOeVhqwEvvw6dnA5cB84I2ET+I9tao11anVvNagdIJa1eo/Txd2ZWVlrF27lh07dvDDH/6Q2bNn8+KLL3r1kGc0DXgHWAtcjX04NQf7iTKNWtWa6tRqXmtQOkGtavWZZVlt3rBnt3bs2GG1BrDWrFnT9PWHH35ode3a1Xruueea3e/mm2+2Jk2aZFmWZRUVFVmAtQMsy6fbzWBlJeBxikCtalWrWtUa0E61qrWztx0NrUBOW2s2T47YHT9+nOPHjxMON999ly5dcPuyZJ0xHKjze4gEUauZ1GqmoLQGpRPUaqpkbHX8rtja2lr27NnT9HV1dTUVFRVkZGSQlZXF2LFjueuuu+jZsyeDBw9m8+bN/OIXv+CRRx5xZfCOOAxMBwqBkUA6UA4sBqYkfBpvqVWtqU6t5rUGpRPUqlb/OV7YlZeXM378+Kav586dC0BBQQErV67kqaeeYt68ecycOZO///3vDB48mAULFvDNb36z81N3UG9gDPAosBc4DpwL3Ir94keTqFWtqU6t5rUGpRPUqlb/OV7YjRs3jobX37UqMzOTFStWON29q7oDCxtuplOrmdRqpqC0BqUT1GqqVGrVtWJFREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG6NCbJ0pKSqisrHTlgUtLS+19Au7sMXmVNvypVrOo1UxqNU9QOkGtpqruwH1DZ3pna9OdQqFLw+HwNrc/XDgMJM/HFXtLrWZSq5nUap6gdIJaTdXQmmtZVtmZ7tfeI3b18XicoqIiotFo56fDPvoXi8UoAtzZY/IqAWKgVsOo1UxqNU9QOkGtpqoEbrT/Wt/WfTt0KjYajZKTk+NoqJYaT+lGsS9Ea7LGQ8RqNYtazaRW8wSlE9QqevOEiIiIiDEcL+y2bNlCfn4+kUiEUChEcXFxs+8fOHCAWbNmEYlE+MQnPsHVV19NVVVVZ+d17H3gNiAL+xOkM4FJ/N+LL02iVrWmOrWa1xqUTlCrWv3l+JJidXV1jBo1isLCQqZOndrse5Zlce2119K1a1eeffZZzjrrLB555BEmTJjAn//8Z3r16tXpwTtqGvaJ6VXAEOAAsAH7wr6mUataU51azWsNSieoVa0+syyrzRv26Wtrx44dVmsAa82aNU1f79q1ywKsnTt3Nm07ceKE1b9/f+vxxx+3LMuyioqKLMDaAZbl8e0IWIC1KQGP1dqtqOHx1apWtao1iK1B6VSrWr267Wh4fCCnrTWbJ6+xO3bsGAA9evRo2hYOh+nevTsvv/yyFw95Rr0bbsXAsYQ/emKp1UxqNVNQWoPSCWo1VSq1erKwGzZsGFlZWcybN48jR45QX1/PokWLePvtt3n33Xe9eMgzSgNWYh8+PRu4HJgPvJHwSbynVrWmOrWa1xqUTlCrWv3nycKua9euPPPMM+zevZuMjAw+8YlPsHHjRvLy8giH/Xkj7jTgHWAtcDWwCfv88kpfpvGWWtWa6tRqXmtQOkGtavWZF6+xO1lNTY118OBBy7Is65JLLrG+9a1vJfw1dqe73QxWlmGvA1CrWtWq1lRpDUqnWtXa2Zvvr7E7WZ8+fejfvz9VVVWUl5czZcoUrx+y3YYDdX4PkSBqNZNazRSU1qB0glpNlYytjj/upLa2lj179jR9XV1dTUVFBRkZGWRlZfH000/Tv39/srKyePPNN/nOd77Dtddey8SJE10ZvCMOA9OBQmAkkA6UA4uB5FlmukOtak11ajWvNSidoFa1+s/xwq68vJzx48c3fT137lwACgoKWLlyJe+++y5z587lwIEDDBo0iK9+9avEYrHOT+xAb2AM8CiwFzgOnAvciv3iR5OoVa2pTq3mtQalE9Sq1iTgxmvsnEiG19gl6pYMrwNQq1rVqtagtgalU63m3pLqNXYiIiIikhha2ImIiIgYQgs7EREREUNoYSciIiJiiA69K7akpITKykpXHri0tNTeJ+DOHpNXacOfajWLWs2kVvMEpRPUaqrqDtw31PCu1zPfKRS6NBwOb4vH486nakUYcHePyUutZlKrmdRqnqB0glpN1dCaa1lW2Znu194jdvXxeJyioiKi0Wjnp8M++heLxSgC3Nlj8ioBYqBWw6jVTGo1T1A6Qa2mqgRutP9a39Z9O3QqNhqNkpOT42iolhpP6UaxPyTPZI2HiNVqFrWaSa3mCUonqFX05gkRERERY2hhJyIiImIIRwu7hQsXcvHFF5Oens6AAQO49tpr2bVrV7P7fPzxx8yePZu+ffvSu3dvpk2bxoEDB1wZ2on3gduALKA7kAlM4v/eVWMStao11anVvNagdIJa1eqvDr3GrtHmzZuZPXs2F198Mf/85z+ZP38+EydO5M9//jO9evUC4M477+T3v/89Tz/9NH369OH2229n6tSpTR9zkmjTsF9xuAoYAhwANgCHfZnGW2pVa6pTq3mtQekEtarVZ21dTLbh41ByAGvHjh1Waw4ePGgB1ubNmy3Lsqyamhqra9eu1tNPP910n8rKSguwtm3bZlmWZRUVFSXs4r1HGi6euykAFypWq1rVqtZkaw1Kp1rV6tVtR8PjAzltrdlceY3dBx98AEBGRgYAO3bs4Pjx40yYMKHpPsOGDSMrK4tt27a58ZAd0rvhVgwcS/ijJ5ZazaRWMwWlNSidoFZTpVJrpxd28XicOXPmcPnll/PZz34WgPfee49u3bpx9tlnN7vvwIEDee+99zr7kB2WBqzEPnx6NnA5MB94I+GTeE+tak11ajWvNSidoFa1+q/TC7vZs2ezc+dOnnrqKTfm8cw04B1gLXA1sAn7/PJK/0byjFrVmurUal5rUDpBrWr1WWdeYzd79mzrnHPOsf7yl780275hwwYLsI4cOdJse1ZWlvXII48k/DV2p7vdDFaWYa8DUKta1arWVGkNSqda1drZm+evsbMsi9tvv501a9bw0ksvkZ2d3ez7n//85+natSsbNmxo2rZr1y727dtHbm6uk4f0xHCgzu8hEkStZlKrmYLSGpROUKupkrHV0cedzJ49myeffJJnn32W9PT0ptfN9enTh549e9KnTx9uvvlm5s6dS0ZGBmeddRbf/va3yc3N5dJLL3U1oD0OA9OBQmAkkA6UA4uBKQmfxltqVWuqU6t5rUHpBLWq1X+OFnbLli0DYNy4cc22r1ixglmzZgHw6KOPEg6HmTZtGseOHWPSpEn813/9V6eGdao3MAZ4FNgLHAfOBW7FfvGjSdSq1lSnVvNag9IJalWr/xwt7Bped3dGPXr0YOnSpSxdutTJQ7iqO7Cw4WY6tZpJrWYKSmtQOkGtpkqlVl0rVkRERMQQWtiJiIiIGEILOxERERFDaGEnIiIiYogOvXmisrLStQeurq629+naHpNXdcOfajWLWs2kVvMEpRPUaqqONIba8w7XUCiUFQ6Hd8Xj8R7OxzpVGIi7ucMkplYzqdVMajVPUDpBraYKw8dxGGpZ1r4z3a9dCzuwF3dAPzeGO0k3oN7lfSYrtZpJrWZSq3mC0glqNdWhthZ10IGFnYiIiIgkN715QkRERMQQWtiJiIiIGEILOxERERFDaGEnIiIiYggt7EREREQMoYWdiIiIiCG0sBMRERExhBZ2IiIiIobQwk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImIILexEREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG0MJORERExBBa2ImIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIIbSwExERETGEFnYiIiIihtDCTkRERMQQWtiJiIiIGEILOxERERFDaGEnIiIiYggt7EREREQMoYWdiIiIiCG0sBMRERExhBZ2IiIiIobQwk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImIILexEREREDKGFnYiIiIghtLATERERMcT/B73X1paF7bdVAAAAAElFTkSuQmCC\n" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Test Best Policy on New Episodes" + ], + "metadata": { + "id": "NPbcI9qNF_F9" + } + }, + { + "cell_type": "code", + "source": [ + "env = BlackjackEnv()\n", + "NUM_EPISODES = 100000\n", + "\n", + "for _ in range(NUM_EPISODES):\n", + " state = env.reset()\n", + " while env.done == False:\n", + " if state[0] == 19:\n", + " # Player was dealt with a Blackjack\n", + " next_state, reward, env.done, info = env.step(1)\n", + " total_rewards += reward\n", + " else:\n", + " Q_index = get_Q_state_index(state)\n", + " action = new_Q_binary[Q_index]\n", + " new_state, reward, done, desc = env.step(action)\n", + " state = new_state\n", + " total_rewards += reward\n", + "avg_reward = total_rewards / NUM_EPISODES\n", + "print(f'Average Reward: {avg_reward}')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "7sTNqbeZga_3", + "outputId": "97878c73-7419-41d4-d600-68bfd314b8f4" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Average Reward: -4.677\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "WL-NmCiphQiq" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Credits" + ], + "metadata": { + "id": "73y12ontGQRk" + } + }, + { + "cell_type": "markdown", + "source": [ + "The code is work of Mr. Adithya Solai.\n", + "
\n", + "The conversion from Python to Python Notebook is work of Mr. Sushant Nair.\n", + "
\n", + "The inspiration for making this Python Notebook as part of the RL Series is Mr. Terrence Ou\n", + "
\n", + "The code is further explained in the following Medium articles by Mr. Adithya Solai. GitHub Repo link is also provided.\n", + "
\n", + "1.\thttps://towardsdatascience.com/cracking-blackjack-part-1-31da28aeb4bb\n", + "2.\thttps://towardsdatascience.com/cracking-blackjack-part-2-75e32363e38\n", + "3.\thttps://towardsdatascience.com/cracking-blackjack-part-3-8fd3a5870efd\n", + "4.\thttps://towardsdatascience.com/cracking-blackjack-part-4-8b4a9caa38eb\n", + "5.\thttps://towardsdatascience.com/cracking-blackjack-part-5-70bd2f726133\n", + "\n", + "https://github.com/adithyasolai/Monte-Carlo-Blackjack/blob/master/MC_Blackjack_Full.ipynb\n" + ], + "metadata": { + "id": "BiGQjzloGSIX" + } + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "btiWK3iZHFi5" + }, + "execution_count": null, + "outputs": [] + } + ], + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4", + "include_colab_link": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU" + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file From 031726d9884937f507d126fe567aeea283f5e3fe Mon Sep 17 00:00:00 2001 From: Sushant Nair Date: Tue, 28 Jan 2025 23:43:00 +0530 Subject: [PATCH 4/8] Delete Monte_Carlo_Blackjack_Demonstration.ipynb --- Monte_Carlo_Blackjack_Demonstration.ipynb | 1081 --------------------- 1 file changed, 1081 deletions(-) delete mode 100644 Monte_Carlo_Blackjack_Demonstration.ipynb diff --git a/Monte_Carlo_Blackjack_Demonstration.ipynb b/Monte_Carlo_Blackjack_Demonstration.ipynb deleted file mode 100644 index 30277a0..0000000 --- a/Monte_Carlo_Blackjack_Demonstration.ipynb +++ /dev/null @@ -1,1081 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, - "source": [ - "\"Open" - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Installing Dependencies" - ], - "metadata": { - "id": "ptQKksAbExoX" - } - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "JuDWA5loOsFM", - "outputId": "b9cdd35b-2703-4828-ad14-494bafe11476" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Collecting gymnasium\n", - " Downloading gymnasium-0.29.1-py3-none-any.whl.metadata (10 kB)\n", - "Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (1.26.4)\n", - "Requirement already satisfied: cloudpickle>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (2.2.1)\n", - "Requirement already satisfied: typing-extensions>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (4.12.2)\n", - "Collecting farama-notifications>=0.0.1 (from gymnasium)\n", - " Downloading Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)\n", - "Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)\n", - "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/953.9 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[90m╺\u001b[0m\u001b[90m━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m501.8/953.9 kB\u001b[0m \u001b[31m14.6 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m953.9/953.9 kB\u001b[0m \u001b[31m17.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)\n", - "Installing collected packages: farama-notifications, gymnasium\n", - "Successfully installed farama-notifications-0.0.4 gymnasium-0.29.1\n" - ] - } - ], - "source": [ - "!pip install gymnasium" - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Importing Dependencies" - ], - "metadata": { - "id": "q6ml-ZqBE1DP" - } - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "cbt-NEiMO1bA" - }, - "outputs": [], - "source": [ - "import random\n", - "import numpy as np\n", - "import gymnasium as gym\n", - "from gym import error, spaces, utils\n", - "from gym.utils import seeding\n", - "import enum\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import time" - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Set up the game" - ], - "metadata": { - "id": "RSlZaAJ3E4YL" - } - }, - { - "cell_type": "code", - "source": [ - "# Define the rank and suit of a card\n", - "\n", - "ranks = {\n", - " \"two\": 2,\n", - " \"three\": 3,\n", - " \"four\": 4,\n", - " \"five\": 5,\n", - " \"six\": 6,\n", - " \"seven\": 7,\n", - " \"eight\": 8,\n", - " \"nine\": 9,\n", - " \"ten\": 10,\n", - " \"jack\": 10,\n", - " \"queen\": 10,\n", - " \"king\": 10,\n", - " \"ace\": (1, 11),\n", - "}\n", - "\n", - "class Suit(enum.Enum):\n", - " spades = \"spades\"\n", - " clubs = \"clubs\"\n", - " diamonds = \"diamonds\"\n", - " hearts = \"hearts\"\n", - "\n", - "# Now to define the card and deck\n", - "# Implement the shuffle, peek and deal functions for the deck\n", - "\n", - "class Card:\n", - " def __init__(self, suit, rank, value):\n", - " self.suit = suit\n", - " self.rank = rank\n", - " self.value = value\n", - "\n", - " def __str__(self):\n", - " return self.rank + \" of \" + self.suit.value\n", - "\n", - "class Deck:\n", - " def __init__(self, num=1):\n", - " self.cards = []\n", - " for i in range(num):\n", - " for suit in Suit:\n", - " for rank, value in ranks.items():\n", - " self.cards.append(Card(suit, rank, value))\n", - "\n", - " def shuffle(self):\n", - " random.shuffle(self.cards)\n", - "\n", - " def deal(self):\n", - " return self.cards.pop(0)\n", - "\n", - " def peek(self):\n", - " if len(self.cards) > 0:\n", - " return self.cards[0]\n", - "\n", - " def add_to_bottom(self, card):\n", - " self.cards.append(card)\n", - "\n", - " def __str__(self):\n", - " result = \"\"\n", - " for card in self.cards:\n", - " result += str(card) + \"\\n\"\n", - " return result\n", - "\n", - " def __len__(self):\n", - " return len(self.cards)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "D3df2N-JrKiE", - "outputId": "a36dc6fc-51b2-4fb0-bfb5-380e143cc4b4" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n", - " and should_run_async(code)\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Define the logic for evaluating the value of the player's hand" - ], - "metadata": { - "id": "jLdAUSMgFPIi" - } - }, - { - "cell_type": "code", - "source": [ - "'''\n", - "An ace is used as 11 whenever possible. It is 1 only if the other option would make the player bust.\n", - "'''\n", - "\n", - "def player_eval(player_hand):\n", - " num_ace = 0\n", - " # use_one means that every ace in the hand is counted as one\n", - " use_one = 0\n", - " for card in player_hand:\n", - " if card.rank == \"ace\":\n", - " num_ace += 1\n", - " use_one += card.value[0] # use 1 for Ace\n", - " else:\n", - " use_one += card.value\n", - "\n", - " if num_ace > 0:\n", - " # Define player policy for Aces:\n", - " # Make Aces 11 if they get you to the range [18, 21]\n", - " # Otherwise, use one.\n", - "\n", - " ace_counter = 0\n", - " while ace_counter < num_ace:\n", - " # Only add by 10 because 1 is already added before\n", - " use_eleven = use_one + 10\n", - " if use_eleven > 21:\n", - " return use_one\n", - " elif use_eleven >= 18 and use_eleven <= 21:\n", - " return use_eleven\n", - " else:\n", - " # This allows for some Aces to be 11 and others to be 1.\n", - " use_one = use_eleven\n", - " ace_counter += 1\n", - " return use_one\n", - " else:\n", - " return use_one" - ], - "metadata": { - "id": "SWrENBc-vEUL" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# Define the logic for evaluating the value of the dealer's hand" - ], - "metadata": { - "id": "T9tDhTf0FWUV" - } - }, - { - "cell_type": "code", - "source": [ - "# This follows the same, official rules every time.\n", - "# Still there is a need to figure out what happens if there are multiple Aces.\n", - "def dealer_eval(player_hand):\n", - " num_ace = 0\n", - " use_one = 0\n", - " for card in player_hand:\n", - " if card.rank == \"ace\":\n", - " num_ace += 1\n", - " use_one += card.value[0] # use 1 for Ace\n", - " else:\n", - " use_one += card.value\n", - "\n", - " if num_ace > 0:\n", - " # See if using 11 instead of 1 for the Ace gets the dealer's hand value closer to the [17, 21] range.\n", - " # The dealer will follow Hard 17 rules. This means that the dealer will not hit again if the Ace yields a 17.\n", - " # This also means that Aces initially declared as 11 can be changed to 1 as new cards come.\n", - "\n", - " ace_counter = 0\n", - " while ace_counter < num_ace:\n", - " # Only add 10 because 1 is already added before\n", - " use_eleven = use_one + 10\n", - "\n", - " if use_eleven > 21:\n", - " return use_one\n", - " elif use_eleven >= 17 and use_eleven <= 21:\n", - " return use_eleven\n", - " else:\n", - " # The case where even using Ace as eleven is less than 17.\n", - " use_one = use_eleven\n", - " ace_counter += 1\n", - " return use_one\n", - " else:\n", - " return use_one" - ], - "metadata": { - "id": "YzDYaCCVx3AH" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# Define logic for the dealer's turn" - ], - "metadata": { - "id": "ips4ASjUFaEg" - } - }, - { - "cell_type": "code", - "source": [ - "def dealer_turn(dealer_hand, deck):\n", - " # Calculate the dealer hand's value.\n", - " dealer_value = dealer_eval(dealer_hand)\n", - "\n", - " # Define dealer policy (which is fixed to the official rules)\n", - " # The dealer keeps hitting until their total is 17 or more\n", - " while dealer_value < 17:\n", - " dealer_hand.append(deck.deal())\n", - " dealer_value = dealer_eval(dealer_hand)\n", - "\n", - " return dealer_value, dealer_hand, deck" - ], - "metadata": { - "id": "YetM4Xsixlrf" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# Define the OpenAI Gym Environment for Blackjack" - ], - "metadata": { - "id": "jUQRNbyAFd9-" - } - }, - { - "cell_type": "code", - "source": [ - "INITIAL_BALANCE = 1000\n", - "NUM_DECKS = 6\n", - "\n", - "class BlackjackEnv(gym.Env):\n", - " metadata = {'render.modes': ['human']}\n", - "\n", - " def __init__(self):\n", - " super(BlackjackEnv, self).__init__()\n", - "\n", - " # Initialize the blackjack deck\n", - " self.bj_deck = Deck(NUM_DECKS)\n", - "\n", - " self.player_hand = []\n", - " self.dealer_hand = []\n", - "\n", - " self.reward_options = {\"lose\": -100, \"tie\": 0, \"win\": 100}\n", - "\n", - " self.action_space = spaces.Discrete(2)\n", - "\n", - " '''\n", - " First element of tuple is the range of possible hand values for the player (3 through 20). This is the possible range of values that the player will actually have to make a decision for. Any player hand value 21 or above already has automatic valuations, and needs no input from an AI agent.\n", - " However, we also need to add all the hand values that the agent could possibly end up in when they bust. Maybe the agent can glean some correlations based on what hand value they bust at, so this should be in the observation space. Also, the layout of OpenAI Gym environment class makes us have to include the bust-value in the step() function because we need to return that done is true alongside the final obs, which is the bust-value.\n", - " '''\n", - " # Second element of the tuple is the range of possible values for the dealer's upcard (2 through 11)\n", - " self.observation_space = spaces.Tuple((spaces.Discrete(18), spaces.Discrete(10)))\n", - " self.done = False\n", - "\n", - " def _take_action(self, action):\n", - " if action == 0: #hit\n", - " self.player_hand.append(self.bj_deck.deal())\n", - " # recalculate the value of the player's hand after any changes to the hand\n", - " self.player_value = player_eval(self.player_hand)\n", - "\n", - " def step(self, action):\n", - " self._take_action(action)\n", - "\n", - " # End the episode/game if the player stands or has a hand value >= 21.\n", - " self.done = action == 1 or self.player_value >= 21\n", - "\n", - " # rewards are 0 when the player hits and is still below 21, and they keep playing\n", - " rewards = 0\n", - "\n", - " if self.done:\n", - " # Calculate rewards\n", - " if self.player_value > 21:\n", - " rewards = self.reward_options['lose']\n", - " elif self.player_value == 21:\n", - " rewards = self.reward_options['win']\n", - " else:\n", - " # begin the dealer turn phase\n", - " dealer_value, self.dealer_hand, self.bj_deck = dealer_turn(self.dealer_hand, self.bj_deck)\n", - " # End the dealer turn phase\n", - "\n", - " # Final comparison\n", - " if dealer_value > 21:\n", - " rewards = self.reward_options['win']\n", - " elif dealer_value == 21:\n", - " rewards = self.reward_options['lose']\n", - " else:\n", - " # both dealer and player value is less than 21\n", - " if self.player_value > dealer_value:\n", - " rewards = self.reward_options['win']\n", - " elif self.player_value < dealer_value:\n", - " rewards = self.reward_options['lose']\n", - " else:\n", - " rewards = self.reward_options['tie']\n", - " self.balance += rewards\n", - " # Subtract by 1 to fit into the possible observation range.\n", - " # This makes the possible range of 3 through 20 into 1 through 18.\n", - " player_value_obs = self.player_value - 2\n", - " # Get the value of the dealer's upcard; this value is what the agent sees.\n", - " # Subtract by 1 to fit into the possible observation range of 1 to 10\n", - " upcard_value_obs = dealer_eval([self.dealer_upcard]) - 1\n", - " # The state is represented as a player hand-value + dealer upcard pair.\n", - " obs = np.array([player_value_obs, upcard_value_obs])\n", - " return obs, rewards, self.done, {}\n", - "\n", - " def reset(self):\n", - " # reset the game to an initial state\n", - " # add the player and dealer cards back into the deck\n", - " self.bj_deck.cards += self.player_hand + self.dealer_hand\n", - "\n", - " # shuffle before beginning. Only shuffle once before the start of each game.\n", - " self.bj_deck.shuffle()\n", - " self.balance = INITIAL_BALANCE\n", - " self.done = False\n", - "\n", - " # returns the start stage for the agent\n", - " # deal 2 cards to the agent and the dealer\n", - " self.player_hand = [self.bj_deck.deal(), self.bj_deck.deal()]\n", - " self.dealer_hand = [self.bj_deck.deal(), self.bj_deck.deal()]\n", - " self.dealer_upcard = self.dealer_hand[0]\n", - "\n", - " # calculate the value of the agent's hand\n", - " self.player_value = player_eval(self.player_hand)\n", - "\n", - " # subtract by 2 to fit into the possible observation range. This makes the possible range of 3 through 20 into 1 through 18\n", - " player_value_obs = self.player_value - 2\n", - "\n", - " # get the value of the dealer's upcard; this value is what the agent sees. Subtract by 1 to fit the possible observation range of 1 to 10.\n", - " upcard_value_obs = dealer_eval([self.dealer_upcard]) - 1\n", - "\n", - " # the state is represented as a player hand-value + dealer upcard pair.\n", - " obs = np.array([player_value_obs, upcard_value_obs])\n", - " return obs\n", - "\n", - " def render(self, mode='human', close=False):\n", - " # convert the player hand into a format that is easy to read and understand.\n", - " hand_list = []\n", - " for card in self.player_hand:\n", - " hand_list.append(card.rank)\n", - "\n", - " # recalculate the value of the dealer upcard.\n", - " upcard_value = dealer_eval([self.dealer_upcard])\n", - "\n", - " print(f'Balance: {self.balance}')\n", - " print(f'Player Hand: {hand_list}')\n", - " print(f'Dealer Upcard: {upcard_value}')\n", - " print(f'Done: {self.done}')" - ], - "metadata": { - "id": "am91D0LgqNnf" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# Test the OpenAI Gym Blackjack Environment" - ], - "metadata": { - "id": "BHiry8g3FiEL" - } - }, - { - "cell_type": "code", - "source": [ - "env = BlackjackEnv()\n", - "\n", - "total_rewards = 0\n", - "NUM_EPISODES = 1000\n", - "\n", - "for _ in range(NUM_EPISODES):\n", - " env.reset()\n", - " episode_reward = 0\n", - " while env.done == False:\n", - " action = env.action_space.sample()\n", - " new_state, reward, done, desc = env.step(action)\n", - " episode_reward += reward\n", - " total_rewards += episode_reward\n", - "\n", - "avg_reward = total_rewards / NUM_EPISODES\n", - "print(f'Average Reward: {avg_reward}')" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "ttzs7UQb2qCP", - "outputId": "94c0576f-abdf-49ff-eead-38312653e263" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Average Reward: -32.2\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# A function to map a state from the Blackjack environment to the proper index i Q and prob tables" - ], - "metadata": { - "id": "f3Oo6Kz4FldW" - } - }, - { - "cell_type": "code", - "source": [ - "'''\n", - "Given a state, derive the corresponding index in the Q-table. This state is a player hand value and dealer upcard pair, so the hashing formula must be used to allocate the indices of the Q-table properly.\n", - "'''\n", - "def get_Q_state_index(state):\n", - " '''\n", - " The player value is already subtracted by 1 in the env when it returns the state. Subtract by 1 again to fir with the array indexing that starts at 0.\n", - " '''\n", - " initial_player_value = state[0] - 1\n", - " '''\n", - " The upcard value is already subtracted by 1 in the env when it returns the state. dealer_upcard will be subtracted by 1 to fit with the array indexing that starts at 0.\n", - " '''\n", - " dealer_upcard = state[1] - 1\n", - " return(env.observation_space[1].n * (initial_player_value)) + (dealer_upcard)" - ], - "metadata": { - "id": "etl-JD8lHlJB" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# Functions to get and update the probability of taking the best action" - ], - "metadata": { - "id": "TjRTbwhdFo5x" - } - }, - { - "cell_type": "code", - "source": [ - "# for a given state\n", - "def get_prob_of_best_action(env, state, Q, prob):\n", - " # Use the mappig function to figure out which index of Q corresponds to the player hand value and dealer upcard value that defines each state.\n", - " Q_state_index = get_Q_state_index(state)\n", - "\n", - " # Use this index in the Q 2-D array to get a 2-element array that yield the current Q-values for hitting (index 0) and standing (index 1) in this state. Use np.argmax() function to find the index of the action that yields the maximum rewards, i.e., the best action we are looking for.\n", - " best_action = np.argmax(Q[Q_state_index])\n", - "\n", - " # Retrieve the probability of the best action using the state/action pair as indices of the 'prob' array, which stores the probability of taking an action (hit or stand) for a given state/action pair.\n", - " return prob[Q_state_index][best_action]\n", - "\n", - "def update_prob_of_best_action(env, state, Q, prob, epsilon):\n", - " Q_state_index = get_Q_state_index(state)\n", - "\n", - " best_action = np.argmax(Q[Q_state_index])\n", - "\n", - " '''\n", - " Slightly alter the probability of this best action being taken by using epsilon. Epsilon starts at 1.0, and slowly decays over time. Therefore, as per the equation below, the AI agent will use the probability listed for the best action in the 'prob' table during the beginning of the algorithm. As time goes on, the likelihood that the best action is taken is increased from what is listed in the 'prob' table.\n", - " This allows for the exploration of other moves in the beginning of the algorithm, but exploitation later for a greater reward.\n", - " '''\n", - " prob[Q_state_index][best_action] = min(1, prob[Q_state_index][best_action] + 1 - epsilon)\n", - "\n", - " other_action = 1 if best_action == 0 else 0\n", - " prob[Q_state_index][other_action] = 1 - prob[Q_state_index][best_action]\n", - "\n", - " return prob" - ], - "metadata": { - "id": "qazyxoCbYLhz" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# play_game() function" - ], - "metadata": { - "id": "D89gCgidFruP" - } - }, - { - "cell_type": "code", - "source": [ - "'''\n", - "Simulates one round of Blackjack.\n", - "\n", - "First, use env.reset() to set up a new round of Blackjack.\n", - "\n", - "If the player is already dealt a blackjack by luck, then this episode is just scrapped. There is nothing the AI agent can learn from this episode since it did not have to make any decisions.\n", - "\n", - "Otherwise, the AI agent finds the best action available given the current state of the game and its knowledge so far. This knowledge is stored in the Q-table defined in the run_mc() function. Then, the AI agent retrieves the probability that it should take this action from the prob table. The AI agent applies this probability and chooses its action given this state. The action is sent to the environment, and a reward is returned. This state-action-reward sequence that just occured is stored in episode variable as a tuple. This process is repeated until the current episode is over. Each episode will roughly rield 1-3 tuples in episode variable since Blackjack rounds are usually resolved after 1-3 decisions by the player.\n", - "\n", - "The Q-values in the Q-table associated with each state-action pair that was seen in this episode will be updated after this episode based on the state-action-reward tuples returned by this function. Then, the corresponding probabilities in prob variable are also modified to reflect this change in Q-values.\n", - "'''\n", - "\n", - "def play_game(env, Q, prob):\n", - " '''\n", - " Can contain numerous state->action->reward tuples bacause a round of Blackjack is not always resolved in one turn. However, there will be no state that has a player hand value that exceeds 20, since only initial states BEFORE actions are made are used when storing state->action->reward tuples.\n", - " '''\n", - " episode = []\n", - " state = env.reset()\n", - " while env.done == False:\n", - " if state[0] == 19:\n", - " # Player was dealt blackjack right from the beginning; player_value already subtracted by 2 to get state[0]\n", - " # No analysis done for this useless episode\n", - " next_state, reward, env.done, info = env.step(1)\n", - " else:\n", - " # Get the index in Q that corresponds to the current state\n", - " Q_state_index = get_Q_state_index(state)\n", - "\n", - " # Use the index to get the possible actions, and use np.argmax() to get the index of the action that has the highest current Q value. Index 0 is hit, 1 is stand.\n", - " best_action = np.argmax(Q[Q_state_index])\n", - "\n", - " # Go to the prob table to retrieve the probability of this action. This uses the same Q_state_index used for finding the state index of the Q-table.\n", - " prob_of_best_action = get_prob_of_best_action(env, state, Q, prob)\n", - "\n", - " action_to_take = None\n", - "\n", - " if random.uniform(0, 1) < prob_of_best_action:\n", - " # Take the best action\n", - " action_to_take = best_action\n", - " else:\n", - " # Take the other action\n", - " action_to_take = 1 if best_action == 0 else 0\n", - "\n", - " # The action is performed by the agent, and the next state, rewards and done information is returned.\n", - " next_state, reward, env.done, info = env.step(action_to_take)\n", - "\n", - " # Log the state->action->reward sequence\n", - " episode.append((state, action_to_take, reward))\n", - "\n", - " # Update the state for the next decision made by the agent.\n", - " state = next_state\n", - "\n", - " return episode" - ], - "metadata": { - "id": "HmFb1gcWFFcm" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# update_Q() function" - ], - "metadata": { - "id": "MoKz3QUBFuZl" - } - }, - { - "cell_type": "code", - "source": [ - "\n", - "'''\n", - "This function iterates through the state-action-reward tuples in episode and updates the Q-values of the corresponding state-action pairs in Q.\n", - "'''\n", - "\n", - "def update_Q(env, episode, Q, alpha, gamma):\n", - " '''\n", - " This is where the algorithm hinges on being first visit or every visit. First visit will be used because if you want first-visit, you need to use the cumulative reward of the entire episode when updating a Q-value for ALL of the state/action pairs in the episode, even the first state/action pair. In this algorithm, an episode is a round of Blackjack. Although the bulk of the reward may come from the 2nd or 3rd decision, deciding to hit on the 1st decision is what enabled the future situations to even occur, so it is important to include the entire cumulative reward. We can reduce the impact of the rewards of the future decisions by lowering gamma, which will lower the G value for our early state/action pair in which we hit and did not get any immediate rewards. This will make our agent consider future rewards, and not just look at each state in isolation despite having hit previously.\n", - " If you want Every-Visit MC, do not use the cumulative rewards when updating Q-values and just use the immediate reward in this episode for each state/action pair.\n", - " '''\n", - " step = 0\n", - " for state, action, reward in episode:\n", - " # calculate the cumulative reward of taking this action in this state.\n", - " # Start from the immediate rewards, and use all the rewards from the subsequent states.\n", - " # Do not use rewards from previous states\n", - "\n", - " total_reward = 0\n", - " gamma_exp = 0\n", - " for curr_step in range(step, len(episode)):\n", - " curr_reward = episode[curr_step][2]\n", - " total_reward += (gamma ** gamma_exp) * curr_reward\n", - " gamma_exp += 1\n", - "\n", - " # Update the Q-value\n", - " Q_state_index = get_Q_state_index(state)\n", - " curr_Q_value = Q[Q_state_index][action]\n", - " Q[Q_state_index][action] = curr_Q_value + alpha * (total_reward - curr_Q_value)\n", - " # update step to start further down the episode the next time.\n", - " step += 1\n", - " return Q" - ], - "metadata": { - "id": "R08jdGkVbQD0" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# update_prob() function" - ], - "metadata": { - "id": "BcUOuFW2Fwm4" - } - }, - { - "cell_type": "code", - "source": [ - "\n", - "def update_prob(env, episode, Q, prob, epsilon):\n", - " for state, action, reward in episode:\n", - " '''\n", - " Update the probabilities of the actions that can be taken given the current state. The goal is that the new update in Q has changed what the best action is, and epsilon will be used to create a small increase in the probability that the new, better action is chosen.\n", - " '''\n", - " prob = update_prob_of_best_action(env, state, Q, prob, epsilon)\n", - " return prob" - ], - "metadata": { - "id": "4UA7u4nWdRFL" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# First-Visit Monte Carlo Algorithm" - ], - "metadata": { - "id": "rR5AeirSFy5h" - } - }, - { - "cell_type": "code", - "source": [ - "# run_mc() to run the First Visit Monte Carlo Algorithm\n", - "'''\n", - "This function initializes the key variables for the algorithm and runs the learning algorithm for the AI agent for num_episodes episodes. An episode is a simulation of one game of Blackjack using the OpenAI Gym environment defined above.\n", - "Q, or the Q-table, is a 2D list in which the rows are the different possible states and the columns are the different possible actions for each state. The values stored in each cell refer to the value, or Q-value, of selecting some action (the column index) given some state (the row index). These Q-values start at 0 for every state-action pair, and are updated by the update_Q() function after each episode to reflect the rewards received in an episode where the state-action pair occured. All Q-values initially start at 0.\n", - "\n", - "prob, or the prob table, has the same structure as Q, but the cell values refer to the probability of selecting some action (the column index) given some state (the row index). These probabilites are updated by the update_prob() function after the Q-values are updated after each episode. All action probabilities start at 0.5 (or 50%).\n", - "\n", - "alpha defines the weight given to each new change in Q-value within the update-Q() function. A smaller alpha means that a new reward logged in an episode for some state-action pair has less impact on the current Q-value for that state-action pair. The converse is also true. Therefore, alpha essentially defines how fast the AI agent learns (learning rate).\n", - "\n", - "epsilon defines the weight given to each new change in the action probabilities within the update_prob() function. A larger epsilon reduces the precent amount by which an action probability is changed after some change in Q-values. The converse is also true. An epsilon of 1 means that no change will occur in the action probability, regardless of the magnitude of changes in Q-values.\n", - "\n", - "epsilon is decayed by the decay value after every episode. The lowest value epsilon can reach is epsilon_min.\n", - "\n", - "gamma is the rate used to discount future rewards yielded by a certain state-action pair in the episode. Since a round (or episode) of Blackjack can have more than 1 decision made, there can be numerous state-action pairs that are seen in one episode. However, only the final decision (or state-action pair) yields an immediate reward from the environment. All previous state-action pairs had no rewards. So, the final reward must be used to modify the Q-values of the earlier state-action pairs. Since the final reward was only partially made possible by the eariler state-action pairs, the final reward is discounted using gamma to account for this.\n", - "\n", - "Once these important variables are defines, this function runs the First-Visit Monte Carlo algorithm for the Blackjack environment.\n", - "\n", - "The function runs num_episodes episodes.\n", - "\n", - "In each episode, epsilon is first decayed by the decay rate.\n", - "\n", - "Then, the game of Blackjack is played out throught the play_game() function. This function returns a list of the state-action-reward tuples that occured during the game. These tuples represent the actions the AI agent had to take given some state, and the rewards that resulted.\n", - "\n", - "These tuples are used to modify the Q-values in Q through the update_Q() function.\n", - "\n", - "Then, the tuples are used to modify the probability distributions in prob of the two actions (hit or stand) for any states that were encountered in the episode. This is done through the update_prob() function.\n", - "\n", - "After this process is done for each episode, the function returns the modified Q and prob tables. These tables are an imprint of the learning that has taken place by the AI agent through the First-Visit Monte Carlo algorithm.\n", - "'''\n", - "\n", - "def run_mc(env, num_episodes):\n", - " '''\n", - " observation_space[0] is the 18 possible player values. (3-20)\n", - " observation_space[1] is the 10 possible dealer upcards. (2-11)\n", - " Combining these togenther yields all possible states.\n", - " Multiplying this with hit/stand yields all possible state-action pairs.\n", - " This is the Q map.\n", - " '''\n", - " Q = np.zeros([env.observation_space[0].n * env.observation_space[1].n, env.action_space.n], dtype=np.float16)\n", - "\n", - " # This map contains the probability distributions for each action (hit or stand) given a state. The state (combination of player hand value and dealer upcard value) index in this array yields a 2-element array. The 0th index of this 2-element array refers to the probability of hit and the 1st index is the probability of stand.\n", - " prob = np.zeros([env.observation_space[0].n * env.observation_space[1].n, env.action_space.n], dtype=np.float16) + 0.5\n", - "\n", - " # The learning rate; very small to avoid making quick, large changes in our policy.\n", - " alpha = 0.001\n", - "\n", - " epsilon = 1\n", - "\n", - " # The rate by which epsilon will decay over time.\n", - " # Since the probability that decides which option has the highest Q-value is 1 - epsilon + probability, this decay will make sure that the better option is taken more often in the long run.\n", - " # This allows the algorithm to explore in the early stages and exploit in the later stages.\n", - " decay = 0.9999\n", - "\n", - " # The lowest value that epsilon can go to. Although the decay seems slow, it grows exponentially, and this is magnified when running thousands of episodes.\n", - " epsilon_min = 0.9\n", - "\n", - " gamma = 0.8\n", - "\n", - " for _ in range(num_episodes):\n", - " episode = play_game(env, Q, prob)\n", - "\n", - " epsilon = max(epsilon * decay, epsilon_min)\n", - "\n", - " Q = update_Q(env, episode, Q, alpha, gamma)\n", - "\n", - " prob = update_prob(env, episode, Q, prob, epsilon)\n", - "\n", - " return Q, prob" - ], - "metadata": { - "id": "PnRTa7FZ3RnJ" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# Run First-Visit Monte Carlo Reinforcement Learning Algorithm" - ], - "metadata": { - "id": "jdfLT2OvF1sV" - } - }, - { - "cell_type": "code", - "source": [ - "env = BlackjackEnv()\n", - "\n", - "start_time = time.time()\n", - "new_Q, new_prob = run_mc(env, 1000000)\n", - "end_time = time.time()\n", - "\n", - "print(f'Total time for learning: {end_time - start_time} s.')" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "UKx3psLBd3Yl", - "outputId": "2181c312-d684-4525-bb48-ed723de63333" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Total time for learning: 230.13713669776917 s.\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# best_policy() function" - ], - "metadata": { - "id": "qICTdADBF4Eu" - } - }, - { - "cell_type": "code", - "source": [ - "'''\n", - "This function takes the new Q-table returned and keeps only the action that yields the highest value for a given state. This is represented in three different ways: binary, string and colors.\n", - "0 is hit and 1 is stand in the binary representation of the results.\n", - "H is hit and S is stand in the string representation of the results.\n", - "Green is hit and Red is stand in the color representation of the results.\n", - "'''\n", - "def best_policy(Q):\n", - " best_policy_binary = []\n", - " best_policy_string = []\n", - " best_policy_colors = []\n", - " for i in range(len(Q)):\n", - " best_policy_binary.append(np.argmax(Q[i]))\n", - " best_policy_string.append('H' if np.argmax(Q[i]) == 0 else 'S')\n", - " best_policy_colors.append('g' if np.argmax(Q[i]) == 0 else 'r')\n", - "\n", - " return best_policy_binary, best_policy_string, best_policy_colors" - ], - "metadata": { - "id": "MNbxnlFKeRgQ" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# Create DataFrame with Player Value as Rows and Dealer Upcard as Columns" - ], - "metadata": { - "id": "89PKdgPiF6Qx" - } - }, - { - "cell_type": "code", - "source": [ - "new_Q_binary, new_Q_string, new_Q_colors = best_policy(new_Q)\n", - "\n", - "df = pd.DataFrame(columns = range(2, 12))\n", - "\n", - "color_df = pd.DataFrame(columns = range(2, 12))\n", - "\n", - "for s in range(3, 21):\n", - " # possible player values in the range 3 to 20\n", - " start = env.observation_space[1].n * (s - 3)\n", - " end = start + 10\n", - " df.loc[s] = (new_Q_string[start:end])\n", - " color_df.loc[s] = (new_Q_colors[start:end])" - ], - "metadata": { - "id": "lZIV2WxWfR9z" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# Create Chart Graphic for the Results" - ], - "metadata": { - "id": "Lh14aJ8jF8s-" - } - }, - { - "cell_type": "code", - "source": [ - "fig, ax = plt.subplots()\n", - "\n", - "# hide axes\n", - "fig.patch.set_visible(False)\n", - "ax.set_axis_off()\n", - "ax.axis('tight')\n", - "\n", - "ax.table(cellText=df.values, cellColours=color_df.values, cellLoc='center', rowLabels=df.index, colLabels=df.columns, loc='center')\n", - "\n", - "fig.tight_layout()\n", - "\n", - "plt.show()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 487 - }, - "id": "wR8JVHEef5Cf", - "outputId": "02c77629-9a81-4150-9561-c0255ad152ea" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAnYAAAHWCAYAAAD6oMSKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABM9UlEQVR4nO3de3xU9Z3/8dcM4VaI2HALUw0GbWGgQklVjNoCFcEsjSgsFhdtaNS2FmuRrW3ht+MdEfZRpRcWqnWBNhUftZVIbZBV5GYkSrCp0mYh0FQqKggl0kQllDm/P06SJSGQ5OScOTPf834+HvOAnMye+bx2Evz2nJk5IcuyEBEREZHUF/Z7ABERERFxhxZ2IiIiIobQwk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImIILexEREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG0MJORERExBBa2ImIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIIbSwExERETGEFnYiIiIihtDCTkRERMQQWtiJiIiIGEILOxERERFDaGEnIiIiYggt7EREREQMoYWdiIiIiCG0sBMRERExhBZ2IiIiIobQwk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImIILexEREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG0MJORERExBBa2ImIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIIdL8fPBQKJQF9PNzhgTqBtT7PUSCqNVMajVTUFqD0glqNdUhy7L2tXUn3xZ2oVAoKxwO74rH4z38miGRwuEw8Xjc7zESQq1mUquZgtIalE5Qq6nC4fDHoVBoaFuLOz+P2PWLx+M9ioqKiEajPo7hvZKSEmKxGGo1i1rNpFbzBKUT1GqqyspKbrzxxh7YZzmTdmEHQDQaJScnx+8xPFVZWQmo1TRqNZNazROUTlCr6M0TIiIiIsYIxMJu4cKFXHzxxaSnpzNgwACuvfZadu3a5fdYnli2bBkjR47krLPO4qyzziI3N5d169b5PVZCPPzww4RCIebMmeP3KK679957CYVCzW7Dhg3zeyzP7N+/nxtvvJG+ffvSs2dPLrzwQsrLy/0ey3XnnXfeKc9rKBRi9uzZfo/muhMnThCLxcjOzqZnz56cf/75PPDAA1iW5fdorvvHP/7BnDlzGDx4MD179uSyyy5j+/btfo/VaVu2bCE/P59IJEIoFKK4uLjZ9y3L4u6772bQoEH07NmTCRMmUFVV5c+wndRW6zPPPMPEiRPp27cvoVCIiooKX+ZsTSAWdps3b2b27NmUlZXxwgsvcPz4cSZOnEhdXZ3fo7nunHPO4eGHH2bHjh2Ul5fzpS99iSlTpvCnP/3J79E8tX37dn72s58xcuRIv0fxzIgRI3j33Xebbi+//LLfI3niyJEjXH755XTt2pV169bx5z//mR/+8Id88pOf9Hs0123fvr3Zc/rCCy8AMH36dJ8nc9+iRYtYtmwZP/3pT6msrGTRokUsXryYn/zkJ36P5rpbbrmFF154gV/+8pe8+eabTJw4kQkTJrB//36/R+uUuro6Ro0axdKlS1v9/uLFi/nxj3/M8uXLefXVV+nVqxeTJk3i448/TvCknddWa11dHVdccQWLFi1K8GRt8/01donw/PPPN/t65cqVDBgwgB07dvDFL37Rp6m8kZ+f3+zrBQsWsGzZMsrKyhgxYoRPU3mrtraWmTNn8vjjj/Pggw/6PY5n0tLSyMzM9HsMzy1atIhzzz2XFStWNG3Lzs72cSLv9O/fv9nXDz/8MOeffz5jx471aSLvvPLKK0yZMoXJkycD9tHK1atX89prr/k8mbs++ugjfvvb3/Lss882/ffl3nvv5Xe/+x3Lli1L6X+j8vLyyMvLa/V7lmWxZMkS/uM//oMpU6YA8Itf/IKBAwdSXFzMjBkzEjlqp52pFeCmm24C4K9//WuCJmq/QByxa+mDDz4AICMjw+dJvHXixAmeeuop6urqyM3N9Xscz8yePZvJkyczYcIEv0fxVFVVFZFIhCFDhjBz5kz27Wvz44xS0tq1a7nooouYPn06AwYMYPTo0Tz++ON+j+W5+vp6ioqKKCwsJBQK+T2O6y677DI2bNjA7t27AfjjH//Iyy+/fMb/eKaif/7zn5w4cYIePZp/klfPnj2NPcoOUF1dzXvvvdfs3+E+ffowZswYtm3b5uNkwROII3Yni8fjzJkzh8svv5zPfvazfo/jiTfffJPc3Fw+/vhjevfuzZo1axg+fLjfY3niqaee4vXXXzfi9StnMmbMGFauXMnQoUN59913ue+++/jCF77Azp07SU9P93s8V/3lL39h2bJlzJ07l/nz57N9+3buuOMOunXrRkFBgd/jeaa4uJiamhpmzZrl9yie+MEPfsDRo0cZNmwYXbp04cSJEyxYsICZM2f6PZqr0tPTyc3N5YEHHiAajTJw4EBWr17Ntm3buOCCC/wezzPvvfceAAMHDmy2feDAgU3fk8QI3MJu9uzZ7Ny50+j/5TR06FAqKir44IMP+M1vfkNBQQGbN282bnH3t7/9je985zu88MILp/yvY9OcfFRj5MiRjBkzhsGDB/PrX/+am2++2cfJ3BePx7nooot46KGHABg9ejQ7d+5k+fLlRi/snnjiCfLy8ohEIn6P4olf//rX/OpXv+LJJ59kxIgRVFRUMGfOHCKRiHHP6y9/+UsKCwv51Kc+RZcuXcjJyeGGG25gx44dfo8mARCoU7G33347zz33HBs3buScc87xexzPdOvWjQsuuIDPf/7zLFy4kFGjRvGjH/3I77Fct2PHDg4ePEhOTg5paWmkpaWxefNmfvzjH5OWlsaJEyf8HtEzZ599Np/5zGfYs2eP36O4btCgQaf8j5BoNGrsqWeAt956ixdffJFbbrnF71E8c9ddd/GDH/yAGTNmcOGFF3LTTTdx5513snDhQr9Hc93555/P5s2bqa2t5W9/+xuvvfYax48fZ8iQIX6P5pnG1/8eOHCg2fYDBw4E4rXBySQQCzvLsrj99ttZs2YNL730krEvxD6deDzOsWPH/B7DdVdeeSVvvvkmFRUVTbeLLrqImTNnUlFRQZcuXfwe0TO1tbXs3buXQYMG+T2K6y6//PJTPo5o9+7dDB482KeJvLdixQoGDBjQ9MYCE3344YeEw83/k9OlSxejLwfVq1cvBg0axJEjR1i/fn3TmwpMlJ2dTWZmJhs2bGjadvToUV599VWjX+OdjAJxKnb27Nk8+eSTPPvss6Snpzed7+/Tpw89e/b0eTp3zZs3j7y8PLKysvjHP/7Bk08+yaZNm1i/fr3fo7kuPT39lNdJ9urVi759+xr3+snvfve75OfnM3jwYN555x3uueceunTpwg033OD3aK678847ueyyy3jooYe4/vrree2113jsscd47LHH/B7NE/F4nBUrVlBQUEBamrn/JOfn57NgwQKysrIYMWIEf/jDH3jkkUcoLCz0ezTXrV+/HsuyGDp0KHv27OGuu+5i2LBhfO1rX/N7tE6pra1tdpagurqaiooKMjIyyMrKYs6cOTz44IN8+tOfJjs7m1gsRiQS4dprr/VvaIfaav373//Ovn37eOeddwCa/sdoZmam70cozf1X5CTLli0DYNy4cc22r1ixwrgXKh88eJCvfvWrvPvuu/Tp04eRI0eyfv16rrrqKr9Hk054++23ueGGGzh8+DD9+/fniiuuoKys7JSPyzDBxRdfzJo1a5g3bx73338/2dnZLFmyxLgX2Td68cUX2bdvn5ELnJP95Cc/IRaL8a1vfYuDBw8SiUT4xje+wd133+33aK774IMPmDdvHm+//TYZGRlMmzaNBQsW0LVrV79H65Ty8nLGjx/f9PXcuXMBKCgoYOXKlXzve9+jrq6Or3/969TU1HDFFVfw/PPPp+RroNtqXbt2bbOFeuPHudxzzz3ce++9CZ21pUAs7Ez8ZPPTeeKJJ/wewVebNm3yewRPPPXUU36PkFBf/vKX+fKXv+z3GAkxceLEQPwblZ6ezpIlS1iyZInfo3ju+uuv5/rrr/d7DNeNGzfujD+roVCI+++/n/vvvz+BU3mjrdZZs2Yl7YGhQLzGTkRERCQItLATERERMYQWdiIiIiKG0MJORERExBBa2ImIiIgYot3vig2FQllAPxcfexhASUkJlZWVLu42+ZSWlgJqNY1azaRW8wSlE9Rqqurq6nbfN9Set9mHQqGscDi8Kx6Pu/thNCHA/Hf529RqJrWaSa3mCUonqNVUdmuuZVllZ7pbe4/Y9YvH4z2KioqIRqOdHw57hR2LxWAq7h4HTEZVwEbUahq1mkmt5glKJ6jVVIeAZwCob+uuHfqA4mg0Sk5OjrOhWmg6bNoPiLiyy+R1qOFPtZpFrWZSq3mC0glqFb15QkRERMQUjhZ2y5YtY+TIkZx11lmcddZZ5Obmsm7dOrdnc88aYHUr26uBe4GPEjqNt9Sq1lQWlE5QK6g11ak1KVsdLezOOeccHn74YXbs2EF5eTlf+tKXmDJlCn/605/cnk9ERERE2qlDr7FrlJ+f3+zrBQsWsGzZMsrKyhgxYoQrg4mIiIhIxzha2J3sxIkTPP3009TV1ZGbm+vGTCIiIiLigOOF3Ztvvklubi4ff/wxvXv3Zs2aNQwfPtzN2dy1G1jQYpupn32jVjMFpTUonaBWtaY+tSYdxwu7oUOHUlFRwQcffMBvfvMbCgoK2Lx5c/Iu7rKByS227afxc2HMola1prKgdIJa1Zr61Jp0rY4Xdt26deOCCy4A4POf/zzbt2/nRz/6ET/72c9cG85VXYG+LbYd9WOQBFCrmYLSGpROUKtaU59ak45rn2MXj8c5duyYW7sTERERkQ5ydMRu3rx55OXlkZWVxT/+8Q+efPJJNm3axPr1692eT0RERETaydHC7uDBg3z1q1/l3XffpU+fPowcOZL169dz1VVXuT2fiIiIiLSTo4XdE0884fYc3rruNNuzsT8x2iRqVWsqC0onqBXUmurUmpStulasiIiIiCG0sBMRERExhBZ2IiIiIobQwk5ERETEEB1680RJSQmVlZWuPHBpaan9lyrgkCu7TF77Gv5Uq1nUaia1miconaBWUx1p/11DltX2hc5CodCl4XB4Wzwe78RUpwoD7u4xiYVIymvKeUKtZgpQq/5tMlBQOiFQrUH6XW1ozbUsq+xM92vvEbv6eDxOUVER0Wi089NhH/2LxWIUAe7sMXmVADELmAr083kYr1UBG1GraQLWGt+I/m0yScB+foPUGpTf1UrgRvuv9W3dt0OnYqPRKDk5OY6GaqnxlG4UcGePyavp5HU/IOLjIInQeDhcrWYJYKv+bTJIAH9+g9QahN/VjtCbJ0REREQM4crC7uGHHyYUCjFnzhw3dueJ94HbgCygO5AJTAJK/RzKC2uA1a1sr8b+dOyPEjqNt9RqXmtQOk8SiH+bgvS8qtXMVlLnd9XRJcVOtn37dn72s58xcuRIN+bxzDTsE9OrgCHAAWADcNjPoUQk8PRvk0hqSJXf1U4t7Gpra5k5cyaPP/44Dz74oFszua4G2ApsAsY2bBsMXOLTPCIioH+bRFJFDanzu9qpU7GzZ89m8uTJTJgwwa15PNG74VYMHPN3FBGRJvq3SSQ1pNLvquMjdk899RSvv/4627dvd3MeT6QBK4FbgeXY754ZC8wAkvsEskO7gQUttpn6mUZqNU9QOgnYv00Bel7V6scg3kql31VHC7u//e1vfOc73+GFF16gR48ebs/kiWnAZOxDqWXAOmAx8HNgln9jeSMbO/Zk+4FnfJjFa2o1rzUonQ0C829TkJ5XtRrZmiq/q44Wdjt27ODgwYPNPtPuxIkTbNmyhZ/+9KccO3aMLl26uDakW3oAVzXcYsAtwD0k1xPiiq5A3xbbjvoxSAKo1TxB6TxJIP5tCtLzqlZjpcLvqqOF3ZVXXsmbb77ZbNvXvvY1hg0bxve///2kXNS1Zjj2+XIRkWSif5tEUkMy/q46Wtilp6fz2c9+ttm2Xr160bdv31O2J4PDwHSgEPtceDpQjn0IdYqPc4lIsOnfJpHUkEq/q53+HLtU0BsYAzwK7AWOA+divwhyvo9ziUiw6d8mkdSQSr+rri3sNm3a5NauXNcdWNhwM951p9mejf1J4CZRq3mtQelsEJh/m4L0vKrVyNZU+l3VtWJFREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG6NCbJ0pKSqisrHTlgUtLS+19Au7sMXmVNv6lCjjk4yCJsK/hT7WaJYCt+rfJIAH8+Q1SaxB+V6s7cN+QZbV9UbdQKHRpOBzeFo/HnU/VijDg7h6TWAgjr5/XGj2vhgpQq36GzaPn1ExBel4bWnMtyyo70/3ae8SuPh6PU1RURDQa7fx02Ef/YrEYRYA7e0xeJUDMAqYC/XwexmtVEN+InlfTVAEbCUyrfoYNo+fUTAF6XiuBG+2/1rd13w6dio1Go82uD9sZjad0o4A7e0xeTYeI+wERHwdJhIZD/3peDdN4SidArfoZNoieUzMF6HntCL15QkRERMQQjhZ29957L6FQqNlt2LBhbs/mqveB24As7E+QzgQmcdKLh02xBljdyvZq7E8C/yih03hOzytmPa9B6TxJIH6GA/a8BuI5BT2vJOfz6viSYiNGjODFF1/8vx2lJfdlZ6dhn5heBQwBDgAbsC/sK6lLz6ukOv0Mm0fPqZlS5Xl1vBpLS0sjMzPTzVk8UwNsBTYBYxu2DQYu8WkecUcNel4ltdWgn2HT1KDn1EQ1pM7z6vg1dlVVVUQiEYYMGcLMmTPZt29f2/9HPundcCsGjvk7irhIz6ukOv0Mm0fPqZlS6Xl1dMRuzJgxrFy5kqFDh/Luu+9y33338YUvfIGdO3eSnp7u9oydlgasBG4FlmO/e2YsMAMY6d9Y3tkNLGixzcDPNNLzipHPa2A6CdjPcECe10A9p6DnleR7Xh0t7PLy8pr+PnLkSMaMGcPgwYP59a9/zc033+zacG6aBkzGPpRaBqwDFgM/B2b5N5Y3srFjT7YfeMaHWTym5xXzntegdDYIzM9wgJ7XwDynoOeV5HteXfm4k7PPPpvPfOYz7Nmzx43deaYHcBUQA17BfiLu8XMgr3QF+ra4Jd+BVNfoeTVMUDpPEoif4YA9r4F4TkHPK8n3vLqysKutrWXv3r0MGjTIjd0lzHCgzu8hxHV6XiXV6WfYPHpOzZSMz6ujU7Hf/e53yc/PZ/Dgwbzzzjvcc889dOnShRtuuMHt+VxxGJgOFGKfC08HyrEPoU7xcS7pHD2vkur0M2wePadmSqXn1dHC7u233+aGG27g8OHD9O/fnyuuuIKysjL69+/v9nyu6A2MAR4F9gLHgXOxXwQ538e5pHP0vEqq08+wefScmimVnldHC7unnnrK7Tk81R1Y2HAz3nWn2Z6N/UngBtHzinnPa1A6GwTmZzhAz2tgnlPQ85qkdK1YEREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIITr05omSkhIqKytdeeDS0lJ7n4A7e0xepY1/qQIO+ThIIjRcMljPq2EaLwUdoFb9DBtEz6mZAvS8VnfgviHLavuibqFQ6NJwOLwtHo87n6oVYcDdPSavILUSwshrBbZKrWZSq3mC0glqNZXdmmtZVtmZ7tbeI3b18XicoqIiotFo54fDPvoXi8UoAtzZY/Iqwb78SGBaLWAq0M/nYbxWBWxEraZRq3mC0glqNdUhGq+/W9/WXTt0KjYajZKTk+NsqBYaT+lGAXf2mLwaDxEHqZV+QMTHQRKh8TSHWs2iVvMEpRPUKnrzhIiIiIgptLATERERMYTjhd3+/fu58cYb6du3Lz179uTCCy+kvLzczdlc9T5wG5CFfWmQTGASJ72DyCCBaV0DrG5lezX25Ww+Sug03gpKa1A6Qa2g1lSn1qRsdXSt2CNHjnD55Zczfvx41q1bR//+/amqquKTn/yk2/O5Zhr2Kw5XAUOAA8AG4LCfQ3kkSK0iIiLyfxwt7BYtWsS5557LihUrmrZlZ2e7NpTbaoCtwCZgbMO2wcAlPs3jpRqC0yoiIiLNOToVu3btWi666CKmT5/OgAEDGD16NI8//rjbs7mmd8OtGDjm7yieC1KriIiINOfoiN1f/vIXli1bxty5c5k/fz7bt2/njjvuoFu3bhQUFLg9Y6elASuBW4Hl2B85MhaYAYz0byxPBKkVgN3AghbbTP2wyqC0BqUT1KrW1KfWpONoYRePx7nooot46KGHABg9ejQ7d+5k+fLlSbmwA/t1Z5OxT1OWAeuAxcDPgVn+jeWJILWSjR17sv00fpCjWYLSGpROUKtaU59ak67V0cJu0KBBDB8+vNm2aDTKb3/7W1eG8koP4KqGWwy4BbgHAxc7BKi1K9C3xbajfgySAEFpDUonqFWtqU+tScfRa+wuv/xydu3a1Wzb7t27GTx4sCtDJcpwoM7vIRIkSK0iIiJB5eiI3Z133slll13GQw89xPXXX89rr73GY489xmOPPeb2fK44DEwHCrFfZ5YOlGOfnpzi41xeCFKriIiINOdoYXfxxRezZs0a5s2bx/333092djZLlixh5syZbs/nit7AGOBRYC9wHDgX+w0G832cywtBahUREZHmHC3sAL785S/z5S9/2c1ZPNMdWNhwM12QWrnuNNuzsT8J3CRBaQ1KJ6gV1Jrq1JqUrbpWrIiIiIghtLATERERMYQWdiIiIiKG0MJORERExBAdevNESUkJlZWVrjxwaWmpvU/AnT0mr9KGP4PUShVwyMdBEmFfw59qNYtazROUTlCrqY60/64hy2r7QmehUOjScDi8LR6Pd2Kq1nZMUl5nzRNqNZNazRSg1jDg8r/sySlAz6laDWW35lqWVXamu7X3iF19PB6nqKiIaDTa+eGwj/7FYjGYCvRzZZfJqwrYiFpNo1YzBaw1vhGKAHf+ZU9OJUDMIjDPaZB+fgPTeojGa9LWt3XXDp2KjUaj5OTkOBuqhaZTuv2AiCu7TF6Nh4jVaha1mimArVHAnX/Zk1PTS2AC9JyqNbj05gkRERERQzha2J133nmEQqFTbrNnz3Z7PnesAVa3sr0a+xOjP0roNN5Sq1pTWVA6IVitwPvAbUAW9hVyMoFJnPSGK1ME6XlVa1K2Orqk2Pbt2zlx4kTT1zt37uSqq65i+vTprg0mIiLmmIb94qBVwBDgALABOOznUCIGcrSw69+/f7OvH374Yc4//3zGjh3rylAiImKOGmArsAlo/K/EYOASn+YRMVmnX2NXX19PUVERhYWFhEIhN2YSERGD9G64FQPH/B1FxHiOjtidrLi4mJqaGmbNmuXCOB7aDSxosc3Uz75Rq5mC0hqUTghMaxqwErgVWI79DtyxwAxgpH9jeScgzyug1iRs7fTC7oknniAvL49IJMnfa5wNTG6xbT+NnwtjFrWqNZUFpRMC1ToNO3UrUAasAxYDPwdm+TeWNwL0vKqVpGvt1MLurbfe4sUXX+SZZ5KsqjVdgb4tth31Y5AEUKuZgtIalE4IVivQA7iq4RYDbgHuwcCFXZCeV7UmnU69xm7FihUMGDCAyZNbLmFFRETObDhQ5/cQIoZxfMQuHo+zYsUKCgoKSEvr9BldEREx1GFgOlCI/Zq6dKAc+1TsFB/nEjGR4xXZiy++yL59+ygsLHRzHhERMUxvYAzwKLAXOA6ci/1mivk+ziViIscLu4kTJ2JZSfh2kNZcd5rt2difGG0Stao1lQWlEwLV2h1Y2HAzXoCeV7WSlK26VqyIiIiIIbSwExERETGEFnYiIiIihtDCTkRERMQQHXrzRElJCZWVla48cGlpqf2XKuCQK7tMXvsa/lSrWdRqpgC2lgDu/MuenEob/xKg51SthjnS/ruG2vPO1lAodGk4HN4Wj8c7MVVrOyYpr7PmCbWaSa1mUqt5gtIJajWV3ZprWVbZme7W3iN29fF4nKKiIqLRaOeHwz76F4vFYCrQz5VdJq8qYCNqNY1azaRW8wSlE9RqqkM0XpO2vq27duhUbDQaJScnx9lQLTSd0u0HRFzZZfJqPESsVrOo1UxqNU9QOkGtojdPiIiIiJjC0cLuxIkTxGIxsrOz6dmzJ+effz4PPPBA8l6JYg2wupXt1difGP1RQqfxllrVmsqC0glqBbWmOrUmZaujS4otWrSIZcuWsWrVKkaMGEF5eTlf+9rX6NOnD3fccYfbM4qIiIhIOzha2L3yyitMmTKFyZMnA3DeeeexevVqXnvtNVeHExEREZH2c3Qq9rLLLmPDhg3s3r0bgD/+8Y+8/PLL5OXluTqciIiIiLSfoyN2P/jBDzh69CjDhg2jS5cunDhxggULFjBz5ky353PPbmBBi21J+pLATlOrmYLSGpROUKtaU59ak46jhd2vf/1rfvWrX/Hkk08yYsQIKioqmDNnDpFIhIKCArdndEc2MLnFtv00fi6MWdSq1lQWlE5Qq1pTn1qTrtXRwu6uu+7iBz/4ATNmzADgwgsv5K233mLhwoXJu7DrCvRtse2oH4MkgFrNFJTWoHSCWtWa+tSadBy9xu7DDz8kHG7+f9qlSxdcv+SYiIiIiLSboyN2+fn5LFiwgKysLEaMGMEf/vAHHnnkEQoLC92eT0RERETaydHC7ic/+QmxWIxvfetbHDx4kEgkwje+8Q3uvvtut+cTERERkXZytLBLT09nyZIlLFmyxOVxPHLdabZnY39itEnUqtZUFpROUCuoNdWpNSlbda1YEREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIITr05omSkhIqKytdeeDS0lL7L1XAIVd2mbz2NfypVrOo1UxqNU9QOkGtpjrS/ruGLKvtC52FQqFLw+HwNtc/gDhEUl5nzRNqNZNazaRW8wSlEwLVGgaCcmmEhtZcy7LKznS/9h6xq4/H4xQVFRGNRjs/HfbRv1gsBlOBfq7sMnlVARtRq2nUaia1miconRC41vhGKALcWZkkr0rgRvuv9W3dt0OnYqPRKDk5OY6GaqnplG4/IOLKLpNX4yFitZpFrWZSq3mC0gmBbI0C7qxMzKA3T4iIiIgYwvHC7h//+Adz5sxh8ODB9OzZk8suu4zt27e7OZt71gCrW9lejf2J0R8ldBpvqVWtqSwonaBWUGuqC1Ir8D5wG5AFdAcygUlAqZ9DtcLRJcUAbrnlFnbu3Mkvf/lLIpEIRUVFTJgwgT//+c986lOfcnNGEREREV9Nw36B2ypgCHAA2AAc9nOoVjg6YvfRRx/x29/+lsWLF/PFL36RCy64gHvvvZcLLriAZcuWuT2jiIiIiG9qgK3AImA8MBi4BJgHXOPfWK1ytLD75z//yYkTJ+jRo0ez7T179uTll192ZTARERGRZNC74VYMHPN3lDY5OhWbnp5Obm4uDzzwANFolIEDB7J69Wq2bdvGBRdc4PaM7tgNLGixzdTP+VGrmYLSGpROUKtaU19AWtOAlcCtwHLsd+GOBWYAI/0bq1WOX2P3y1/+ksLCQj71qU/RpUsXcnJyuOGGG9ixY4eb87knG5jcYtt+4BkfZvGaWtWayoLSCWpVa+oLUOs07NStQBmwDlgM/ByY5d9Yp3C8sDv//PPZvHkzdXV1HD16lEGDBvGVr3yFIUOGuDmfe7oCfVtsO+rHIAmgVjMFpTUonaBWtaa+ILUCPYCrGm4x4BbgHpJrYdfpz7Hr1asXgwYN4siRI6xfv54pU6a4MZeIiIhIUhsO1Pk9RAuOj9itX78ey7IYOnQoe/bs4a677mLYsGF87Wtfc3M+EREREV8dBqYDhdivqUsHyrFPxSbb4SzHC7sPPviAefPm8fbbb5ORkcG0adNYsGABXbt2dXM+EREREV/1BsYAjwJ7gePAudhvppjv41ytcbywu/7667n++uvdnMU7151mezb2p2ObRK1qTWVB6QS1glpTXYBauwMLG27JTteKFRERETGEFnYiIiIihtDCTkRERMQQWtiJiIiIGEILOxERERFDdOhdsSUlJVRWVrrywKWlpfZfqoBDruwyee1r+FOtZlGrmdRqnqB0QiBbSwB3VibJq7oD9w1ZVttX6w2FQpeGw+Ft8Xjc+VSt7hgjLxbcKrWaSa1mUqt5gtIJajWV3ZprWVbZme7W3iN29fF4nKKiIqLRaOeHwz76F4vFYCrQz5VdJq8qYCNqNY1azaRW8wSlE9RqqkPAMwDUt3XXDp2KjUaj5OTkOBuqhaZTuv2AiCu7TF6Nh8PVaha1mkmt5glKJ6hV9OYJEREREVM4Xtht2bKF/Px8IpEIoVCI4uLiZt+3LIu7776bQYMG0bNnTyZMmEBVVVVn53VmDbC6le3V2Jc9+Sih03hLrWpNZUHpBLWCWlOdWpOy1fHCrq6ujlGjRrF06dJWv7948WJ+/OMfs3z5cl599VV69erFpEmT+Pjjjx0PKyIiIiKn16HX2J0sLy+PvLy8Vr9nWRZLlizhP/7jP5gyZQoAv/jFLxg4cCDFxcXMmDHD6cOKiIiIyGl48hq76upq3nvvPSZMmNC0rU+fPowZM4Zt27Z58ZAiIiIigef4iN2ZvPfeewAMHDiw2faBAwc2fS/hdgMLWmwz9bNv1GqmoLQGpRPUqtbUp9ak48nCLillA5NbbNtP4+fCmEWtak1lQekEtao19ak16Vo9WdhlZmYCcODAAQYNGtS0/cCBA3zuc5/z4iHb1hXo22LbUT8GSQC1mikorUHpBLWqNfWpNel48hq77OxsMjMz2bBhQ9O2o0eP8uqrr5Kbm+vFQ4qIiIgEnuMjdrW1tezZs6fp6+rqaioqKsjIyCArK4s5c+bw4IMP8ulPf5rs7GxisRiRSIRrr73WjblFREREpAXHC7vy8nLGjx/f9PXcuXMBKCgoYOXKlXzve9+jrq6Or3/969TU1HDFFVfw/PPP06NHj85PLSIiIiKncLywGzduHJZ1+reDhEIh7r//fu6//36nD+Ge606zPRv7E6NNola1prKgdIJaQa2pTq1J2aprxYqIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAzRoTdPlJSUUFlZ6coDl5aW2n+pAg65ssvkta/hT7WaRa1mUqt5gtIJajXVkfbfNXSmd7Y23SkUujQcDm+Lx+OdmKq1HZOU11nzhFrNpFYzqdU8QekEtZrKbs21LKvsTHdr7xG7+ng8TlFREdFotPPDYR/9i8ViMBXo58ouk1cVsBG1mkatZlKreYLSCWo11SEar0lb39ZdO3QqNhqNkpOT42yoFppO6fYDIq7sMnk1HiJWq1nUaia1miconaBW0ZsnREREREzheGG3ZcsW8vPziUQihEIhiouLm33/mWeeYeLEifTt25dQKERFRUUnR+2ENcDqVrZXY39i9EcJncZbalVrKgtKJ6gV1Jrq1JqUrY4XdnV1dYwaNYqlS5ee9vtXXHEFixYtcjyciIiIiLSf42vF5uXlkZeXd9rv33TTTQD89a9/dfoQIiIiItIBeo2diIiIiCEcH7FLObuBBS22mfrZN2o1U1Bag9IJalVr6lNr0gnOwi4bmNxi234aPxfGLGpVayoLSieoVa2pT61J1xqchV1XoG+LbUf9GCQB1GqmoLQGpRPUqtbUp9ako9fYiYiIiBjC8RG72tpa9uzZ0/R1dXU1FRUVZGRkkJWVxd///nf27dvHO++8A8CuXbsAyMzMJDMzs5Nji4iIiEhLjo/YlZeXM3r0aEaPHg3A3LlzGT16NHfffTcAa9euZfTo0UyebJ+QnjFjBqNHj2b58uUujC0iIiIiLTk+Yjdu3Dgs6/RvB5k1axazZs1yunt3XXea7dnYnxhtErWqNZUFpRPUCmpNdWpNyla9xk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImKIDr15oqSkhMrKSlceuLS01P5LFXDIlV0mr30Nf6rVLGo1k1rNE5ROUKupjrT/rqEzvbO16U6h0KXhcHhbPB7vxFSt7ZikvM6aJwLUGgZc/klJWmo1VIB+XwPTGpROUKup7NZcy7LKznS39h6xq4/H4xQVFRGNRjs/HPbRv1gsBlOBfq7sMnlVARsJTGt8IxQB7vykJK8SIIZaTVMCxCwC8/saiH+bgtIJajXVIRqvSVvf1l07dCo2Go2Sk5PjbKgWmk7p9gMiruwyeTUeIg5QaxRw5ycleTW+KEGtZml6sUmAfl+Nbw1KJ6hV9OYJEREREVM4Xtht2bKF/Px8IpEIoVCI4uLipu8dP36c73//+1x44YX06tWLSCTCV7/61abrxibcGmB1K9ursT8x+qOETuOtILUC7wO3AVlAdyATmASU+jmUR9RqWGuQflfVqtZUl0Ktjhd2dXV1jBo1iqVLl57yvQ8//JDXX3+dWCzG66+/zjPPPMOuXbu45pprOjWsSEvTgD8Aq4DdwFpgHHDYx5m8olYzW0VE3OT4WrF5eXnk5eW1+r0+ffrwwgsvNNv205/+lEsuuYR9+/aRlZXl9GFFmtQAW4FNwNiGbYOBS3yax0s1qNXEVhERtyXsNXYffPABoVCIs88+O1EPKYbr3XArBo75O4rn1CoiIu3h+IhdR3z88cd8//vf54YbbuCss85KxEOeajewoMU2Uz/7JiCtacBK4FZgOfa7NccCM4CR/o3lCbWa2RqU31VArWpNfSnS6vnC7vjx41x//fVYlsWyZcu8frjTywYmt9i2n8bPhTFLgFqnYaduBcqAdcBi4OfALP/G8oRaDWwN0O+qWlFrqkuRVk8Xdo2LurfeeouXXnrJv6N1AF2Bvi22HfVjkAQIUivQA7iq4RYDbgHuwbAFQAO1GtYapN9VtZpJrUnHs9fYNS7qqqqqePHFF+nbt+X/N0S8MRyo83uIBFGriIiczPERu9raWvbs2dP0dXV1NRUVFWRkZDBo0CD+9V//lddff53nnnuOEydO8N577wGQkZFBt27dOj+5BN5hYDpQiP3aq3SgHPuU3RQf5/KCWs1sFRFxm+OFXXl5OePHj2/6eu7cuQAUFBRw7733snbtWgA+97nPNfu/27hxI+PGjXP6sCJNegNjgEeBvcBx4FzsF93P93EuL6jVzFYREbc5XtiNGzcOyzr920HO9L2Eu+4027OxPzHaJAFq7Q4sbLiZTq0GCtDvqlpRa6pLoVZdK1ZERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImKIDr15oqSkhMrKSlceuLS01P5LFXDIlV0mr30NfwaotQRw5ycleTX8BKvVMI2tQfp9Nb41KJ2gVlMdaf9dQ+1592ooFLo0HA5vi8fjnZiqtR2TlNdZ80SAWsOAyz8pSUuthgrQ72tgWoPSCWo1ld2aa1lW2Znu1t4jdvXxeJyioiKi0Wjnh8M++heLxWAq0M+VXSavKmAjgWmNb4QiwJ2flORVgn2pK7WapQSIWQTm9zUQ/zYFpRPUaqpDNF6Ttr6tu3boVGw0GiUnJ8fZUC00ndLtB0Rc2WXyajxEHKDWKODOT0ryajwlqVazNJ1qDtDvq/GtQekEtYrePCEiIiJiCi3sRERERAzheGG3ZcsW8vPziUQihEIhiouLm33/3nvvZdiwYfTq1YtPfvKTTJgwgVdffbWz8zqzBljdyvZq7EuBfJTQabwVpFbgfeA2IAv7UlSZwCROemejQdRqWGuQflfVqtZUl0Ktjhd2dXV1jBo1iqVLl7b6/c985jP89Kc/5c033+Tll1/mvPPOY+LEibz//vuOhxVpaRrwB2AVsBtYC4wDDvs4k1fUamariIibOvTmiZPl5eWRl5d32u//27/9W7OvH3nkEZ544gneeOMNrrzySqcPK9KkBtgKbALGNmwbDFzi0zxeqkGtJraKiLgtIa+xq6+v57HHHqNPnz6MGjUqEQ8pAdC74VYMHPN3FM+pVURE2sPxEbv2eO6555gxYwYffvghgwYN4oUXXqBfP58+bGY3sKDFNlM/1DAgrWnASuBWYDn2x3CMBWYAI/0byxNqNbM1KL+rgFrVmvpSpNXThd348eOpqKjg0KFDPP7441x//fW8+uqrDBgwwMuHbV02MLnFtv00fuCfWQLUOg07dStQBqwDFgM/B2b5N5Yn1Gpga4B+V9WKWlNdirR6eiq2V69eXHDBBVx66aU88cQTpKWl8cQTT3j5kKfXFejb4pbuzyieC1Ir0AO4CvvKCK9g/4f/Hj8H8pBaDROk31W1mkmtSSehn2MXj8c5dkyvmhFvDQfq/B4iQdQqIiInc3wqtra2lj179jR9XV1dTUVFBRkZGfTt25cFCxZwzTXXMGjQIA4dOsTSpUvZv38/06dPd2VwkcPAdKAQ+7VX6UA59im7KT7O5QW1mtkqIuI2xwu78vJyxo8f3/T13LlzASgoKGD58uX87//+L6tWreLQoUP07duXiy++mK1btzJixIjOTy2C/c7JMcCjwF7gOHAu9ovu5/s4lxfUamariIjbHC/sxo0bh2Wd/u0gzzyTRK8mvO4027OxPzHaJAFq7Q4sbLiZTq0GCtDvqlpRa6pLoVZdK1ZERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImKIDr15oqSkhMrKSlceuLS01P5LFXDIlV0mr30NfwaotQRw5ycleTX8BKvVMI2tQfp9Nb41KJ2gVlMdaf9dQ2d6Z2vTnUKhS8Ph8LZ4PN6JqU4VBtzdY/JSq5nUaqgQSXkNSE8EpTUonaBWU9mtuZZllZ3pbu09Ylcfj8cpKioiGo12fjjso3+xWIwiwJ09Jq8S7MsiqdUsajVTCRCzgKlAP5+H8VoVsBHzW4PSCWo11SEar0lb39ZdO3QqNhqNkpOT42yoFhpP6UYBd/aYvBpPXanVLGo1U9Op5n5AxMdBEqHx9JXprUHpBLWK3jwhIiIiYgrHC7stW7aQn59PJBIhFApRXFx82vt+85vfJBQKsWTJEqcP12nvA7cBWdifbJ8JTOKkF0obRK1qTXWBaF0DrG5lezX2J9l/lNBpvKVWtaa6FGp1fEmxuro6Ro0aRWFhIVOnTj3t/dasWUNZWRmRiL/HSadhn5heBQwBDgAbsC84bhq1qjXVBalVRMRNjhd2eXl55OXlnfE++/fv59vf/jbr169n8uTJTh+q02qArcAmYGzDtsHAJT7N46Ua1KrW1FZDcFpFRNzm2Wvs4vE4N910E3fddRcjRozw6mHapXfDrRg45usk3lOrmdQqIiLt4fiIXVsWLVpEWload9xxh1cP0W5pwErgVmA59rv6xgIzgJH+jeUJtao11QWpld3AghbbTP1MLrWaSa1Jx5OF3Y4dO/jRj37E66+/TigU8uIhOmwaMBn7FE8ZsA5YDPwcmOXfWJ5Qq1pTXWBas7FDT7afxs+rMota1ZrqUqTVk1OxW7du5eDBg2RlZZGWlkZaWhpvvfUW//7v/855553nxUO2Sw/gKuwPWn0F+z8Q9/g2jbfUaia1GqYr0LfFLd3XibyjVjOpNel4srC76aabeOONN6ioqGi6RSIR7rrrLtavX+/FQzoyHKjze4gEUauZ1CoiIidzfCq2traWPXv2NH1dXV1NRUUFGRkZZGVl0bdv32b379q1K5mZmQwdOtT5tA4dBqYDhdiv0UkHyrFP7UxJ+DTeUqtaU12QWkVE3OZ4YVdeXs748eObvp47dy4ABQUFrFy5stODuak3MAZ4FNgLHAfOxX5x9nwf5/KCWtWa6oLUKiLiNscLu3HjxmFZ7X87yF//+lenD9Vp3YGFDTfTqdVMajXQdafZno39SfYmUataU10KtepasSIiIiKG0MJORERExBBa2ImIiIgYQgs7EREREUN06M0TJSUlVFZWuvLApaWl9j4Bd/aYvEob/lSrWdRqpsZWqoBDPg6SCPsa/jS9NSidoFZTHWn/XUPteWdrKBS6NBwOb4vH452Y6lRhwN09Ji+1mkmthgqRlNeA9ERQWoPSCWo1ld2aa1lW2Znu1t4jdvXxeJyioiKi0Wjnh8M++heLxSgC3Nlj8irBviySWs2iVjOVADELmAr083kYr1UBGzG/NSidoFZTHaLxmrT1bd21Q6dio9EoOTk5zoZqofGUbhRwZ4/Jq/HUlVrNolYzNZ1q7gdEfBwkERpPX5neGpROUKvozRMiIiIipnC8sNuyZQv5+flEIhFCoRDFxcXNvj9r1ixCoVCz29VXX93ZeR17H7gNyML+ZPtMYBInvVDaIGpVa6oLROsaYHUr26uxP8n+o4RO4y21qjXVpVCr40uK1dXVMWrUKAoLC5k6dWqr97n66qtZsWJF09fdu3d3+nCdNg37xPQqYAhwANiAfcFx06hVrakuSK0iIm5yvLDLy8sjLy/vjPfp3r07mZmZTh/CNTXAVmATMLZh22DgEp/m8VINalVraqshOK0iIm7z9DV2mzZtYsCAAQwdOpTbbruNw4f9+d/bvRtuxcAxXyZIHLWaSa0iItIejo/YteXqq69m6tSpZGdns3fvXubPn09eXh7btm2jS5cuXj1sq9KAlcCtwHLsd/WNBWYAIxM6iffUqtZUF6RWdgMLWmwz9TO51GomtSYdzxZ2M2bMaPr7hRdeyMiRIzn//PPZtGkTV155pVcPe1rTgMnYp3jKgHXAYuDnwKyET+Mttao11QWmNRs79GT7afy8KrOoVa2pLkVaE/ZxJ0OGDKFfv37s2bMnUQ95ih7AVdgftPoK9n8g7vFtGm+p1UxqNUxXoG+LW7qvE3lHrWZSa9JJ2MLu7bff5vDhwwwaNChRD9mm4UCd30MkiFrNpFYRETmZ41OxtbW1zY6+VVdXU1FRQUZGBhkZGdx3331MmzaNzMxM9u7dy/e+9z0uuOACJk2a5MrgHXEYmA4UYr9GJx0oxz61MyXh03hLrWpNdUFqFRFxm+OFXXl5OePHj2/6eu7cuQAUFBSwbNky3njjDVatWkVNTQ2RSISJEyfywAMP+PJZdr2BMcCjwF7gOHAu9ouz5yd8Gm+pVa2pLkitIiJuc7ywGzduHJZ1+reDrF+/3umuXdcdWNhwM51azaRWA113mu3Z2J9kbxK1qjXVpVCrrhUrIiIiYggt7EREREQMoYWdiIiIiCG0sBMRERExRIfePFFSUkJlZaUrD1xaWmrvE3Bnj8mrtOFPtZpFrWZqbKUKOOTjIImwr+FP01uD0glqNdWR9t81dKZ3tjbdKRS6NBwOb4vH452Y6lRhwN09Ji+1mkmthgqRlNeA9ERQWoPSCWo1ld2aa1lW2Znu1t4jdvXxeJyioiKi0Wjnh8M++heLxSgC3Nlj8irBviySWs2iVjOVADELmAr083kYr1UBGzG/NSidoFZTHaLxmrT1bd21Q6dio9EoOTk5zoZqofGUbhRwZ4/Jq/HUlVrNolYzNZ1q7gdEfBwkERpPX5neGpROUKvozRMiIiIipnC8sNuyZQv5+flEIhFCoRDFxcWn3KeyspJrrrmGPn360KtXLy6++GL27dt36s4S4H3gNiAL+5PtM4FJnPRCaYOoVa2pLhCta4DVrWyvxv4k+48SOo231KrWVJdCrY4vKVZXV8eoUaMoLCxk6tSpp3x/7969XHHFFdx8883cd999nHXWWfzpT3+iR48enRrYqWnYJ6ZXAUOAA8AG7AuOm0atak11QWoVEXGT44VdXl4eeXl5p/3+//t//49/+Zd/YfHixU3bzj//fKcP1yk1wFZgEzC2Ydtg4BJfpvFWDWpVa2qrITitIiJu8+Q1dvF4nN///vd85jOfYdKkSQwYMIAxY8a0ero2EXo33IqBY75MkDhqNZNaRUSkPRwfsTuTgwcPUltby8MPP8yDDz7IokWLeP7555k6dSobN25k7Nixbe/ERWnASuBWYDn2u/rGAjOAkQmdxHtqVWuqC1Iru4EFLbaZ+plcajWTWpOOJwu7xg8ynjJlCnfeeScAn/vc53jllVdYvnx5whd2YL9mZzL2KZ4yYB2wGPg5MCvh03hLrWpNdYFpzcYOPdl+Gj+vyixqVWuqS5FWT07F9uvXj7S0NIYPH95sezQa9e1dsQA9gKuwP2j1Fez/QNzj2zTeUquZ1GqYrkDfFrd0XyfyjlrNpNak48nCrlu3blx88cXs2rWr2fbdu3czePBgLx7SkeFAnd9DJIhazaRWERE5meNTsbW1tezZs6fp6+rqaioqKsjIyCArK4u77rqLr3zlK3zxi19k/PjxPP/88/zud79j06ZNbszdIYeB6UAh9mt00oFy7FM7UxI+jbfUqtZUF6RWERG3OV7YlZeXM378+Kav586dC0BBQQErV67kuuuuY/ny5SxcuJA77riDoUOH8tvf/pYrrrii81N3UG9gDPAosBc4DpyL/eLs+QmfxltqVWuqC1KriIjbHC/sxo0bh2Wd+e0ghYWFFBYWOn0I13QHFjbcTKdWM6nVQNedZns29ifZm0Stak11KdSqa8WKiIiIGEILOxERERFDaGEnIiIiYggt7EREREQMoYWdiIiIiCE69K7YkpISKisrXXng0tJSe5+AO3tMXqUNf6rVLGo1U2MrVcAhHwdJhMYLAZneGpROUKupjrT/rqG2PrIEIBQKXRoOh7c1XgPWLWHA3T0mL7WaSa1mUqt5gtIJwWolBLS9jDGD3ZprWVbZme7W3iN29fF4nKKiIqLRaOeHwz76F4vFKALc2WPyKsG+3qVazaJWM6nVPEHphAC2WsBUoJ/Pw3jtEPAMAPVt3bVDp2Kj0Sg5OTnOhmqh8ZRuFHBnj8mr8dSVWs2iVjOp1TxB6YRgttIPiPg4SJLRmydEREREDOF4Ybdlyxby8/OJRCKEQiGKi4ubfT8UCrV6+8///M/OzuzI+8BtQBb2JYsygUmc9EJpg6hVralOrea1BqUT1Gpk6xpgdSvbq7EvKfZRQqc5I8fXiq2rq2PUqFEUFhYyderUU77/7rvvNvt63bp13HzzzUybNs3pQ3bKNOwT06uAIcABYANw2JdpvKVWtaY6tZrXGpROUKupranC8cIuLy+PvLy8034/MzOz2dfPPvss48ePZ8iQIU4f0rEaYCuwCRjbsG0wcEnCJ/FeDWpVa2qrQa2mtdYQjE5Qq6mtqSQhr7E7cOAAv//977n55psT8XCn6N1wKwaO+TJB4qjVTGo1U1Bag9IJahX/JWRht2rVKtLT01s9ZZsIacBK7EPFZwOXA/OBN3yZxltqVWuqU6t5rUHpBLWa2grAbmBBi9uvfJ2oVQlZ2P33f/83M2fOpEePHol4uFZNA94B1gJXYx86zsH+oTSNWtWa6tRqXmtQOkGtpraSDXyzxe0aXydqlecLu61bt7Jr1y5uueUWrx+qTT2Aq7A/vPEVYBZwj58DeUitZlKrmYLSGpROUKuRrV2Bvi1u6b5O1CrPF3ZPPPEEn//85xk1apTXD9Vhw4E6v4dIELWaSa1mCkprUDpBrZI4jt8VW1tby549e5q+rq6upqKigoyMDLKysgA4evQoTz/9ND/84Q87P2knHAamA4XASOwFdjmwGJji41xeUKtaU51azWsNSieo1dTWVOJ4YVdeXs748eObvp47dy4ABQUFrFy5EoCnnnoKy7K44YYbOjdlJ/UGxgCPAnuB48C5wK3YL/Q0iVrVmurUal5rUDpBraa2phLHC7tx48ZhWdYZ7/P1r3+dr3/9604fwjXdgYUNN9Op1UxqNVNQWoPSCWo11nWn2Z6NfeWJJKJrxYqIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAzRoTdPlJSUUFlZ6coDl5aW2vsE3Nlj8ipt+FOtZlGrmdRqnqB0QjBbqQIO+ThIIhxp/11Dbb2zFSAUCl0aDoe3xePxTkx1qjDg7h6Tl1rNpFYzqdU8QekEtZqqoTXXsqyyM92vvUfs6uPxOEVFRUSj0c5Ph330LxaLUQS4s8fkVYJ9qRW1mkWtZlKreYLSCWo1VSVwo/3X+rbu26FTsdFolJycHEdDtdR4SjeKfcFgkzUeDlerWdRqJrWaJyidoFbRmydEREREjOF4Ybdlyxby8/OJRCKEQiGKi4ubfb+2tpbbb7+dc845h549ezJ8+HCWL1/e2Xkdex+4DcjC/rTsTGASJ7340iBqVWuqU6t5rUHpBLWq1V+OLylWV1fHqFGjKCwsZOrUqad8f+7cubz00ksUFRVx3nnn8T//8z9861vfIhKJcM0113RqaCemYZ+YXgUMAQ4AG7AvYmwatao11anVvNagdIJa1eovxwu7vLw88vLyTvv9V155hYKCAsaNGwfY14392c9+xmuvvZbwhV0NsBXYBIxt2DYYuCShUyRGDWpVa2qrQa2mtdYQjE5Qq1r959lr7C677DLWrl3L/v37sSyLjRs3snv3biZOnOjVQ55W74ZbMXAs4Y+eWGo1k1rNFJTWoHSCWk2VSq2eLex+8pOfMHz4cM455xy6devG1VdfzdKlS/niF7/o1UOeVhqwEvvw6dnA5cB84I2ET+I9tao11anVvNagdIJa1eo/Txd2ZWVlrF27lh07dvDDH/6Q2bNn8+KLL3r1kGc0DXgHWAtcjX04NQf7iTKNWtWa6tRqXmtQOkGtavWZZVlt3rBnt3bs2GG1BrDWrFnT9PWHH35ode3a1Xruueea3e/mm2+2Jk2aZFmWZRUVFVmAtQMsy6fbzWBlJeBxikCtalWrWtUa0E61qrWztx0NrUBOW2s2T47YHT9+nOPHjxMON999ly5dcPuyZJ0xHKjze4gEUauZ1GqmoLQGpRPUaqpkbHX8rtja2lr27NnT9HV1dTUVFRVkZGSQlZXF2LFjueuuu+jZsyeDBw9m8+bN/OIXv+CRRx5xZfCOOAxMBwqBkUA6UA4sBqYkfBpvqVWtqU6t5rUGpRPUqlb/OV7YlZeXM378+Kav586dC0BBQQErV67kqaeeYt68ecycOZO///3vDB48mAULFvDNb36z81N3UG9gDPAosBc4DpwL3Ir94keTqFWtqU6t5rUGpRPUqlb/OV7YjRs3jobX37UqMzOTFStWON29q7oDCxtuplOrmdRqpqC0BqUT1GqqVGrVtWJFREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG6NCbJ0pKSqisrHTlgUtLS+19Au7sMXmVNvypVrOo1UxqNU9QOkGtpqruwH1DZ3pna9OdQqFLw+HwNrc/XDgMJM/HFXtLrWZSq5nUap6gdIJaTdXQmmtZVtmZ7tfeI3b18XicoqIiotFo56fDPvoXi8UoAtzZY/IqAWKgVsOo1UxqNU9QOkGtpqoEbrT/Wt/WfTt0KjYajZKTk+NoqJYaT+lGsS9Ea7LGQ8RqNYtazaRW8wSlE9QqevOEiIiIiDEcL+y2bNlCfn4+kUiEUChEcXFxs+8fOHCAWbNmEYlE+MQnPsHVV19NVVVVZ+d17H3gNiAL+xOkM4FJ/N+LL02iVrWmOrWa1xqUTlCrWv3l+JJidXV1jBo1isLCQqZOndrse5Zlce2119K1a1eeffZZzjrrLB555BEmTJjAn//8Z3r16tXpwTtqGvaJ6VXAEOAAsAH7wr6mUataU51azWsNSieoVa0+syyrzRv26Wtrx44dVmsAa82aNU1f79q1ywKsnTt3Nm07ceKE1b9/f+vxxx+3LMuyioqKLMDaAZbl8e0IWIC1KQGP1dqtqOHx1apWtao1iK1B6VSrWr267Wh4fCCnrTWbJ6+xO3bsGAA9evRo2hYOh+nevTsvv/yyFw95Rr0bbsXAsYQ/emKp1UxqNVNQWoPSCWo1VSq1erKwGzZsGFlZWcybN48jR45QX1/PokWLePvtt3n33Xe9eMgzSgNWYh8+PRu4HJgPvJHwSbynVrWmOrWa1xqUTlCrWv3nycKua9euPPPMM+zevZuMjAw+8YlPsHHjRvLy8giH/Xkj7jTgHWAtcDWwCfv88kpfpvGWWtWa6tRqXmtQOkGtavWZF6+xO1lNTY118OBBy7Is65JLLrG+9a1vJfw1dqe73QxWlmGvA1CrWtWq1lRpDUqnWtXa2Zvvr7E7WZ8+fejfvz9VVVWUl5czZcoUrx+y3YYDdX4PkSBqNZNazRSU1qB0glpNlYytjj/upLa2lj179jR9XV1dTUVFBRkZGWRlZfH000/Tv39/srKyePPNN/nOd77Dtddey8SJE10ZvCMOA9OBQmAkkA6UA4uB5FlmukOtak11ajWvNSidoFa1+s/xwq68vJzx48c3fT137lwACgoKWLlyJe+++y5z587lwIEDDBo0iK9+9avEYrHOT+xAb2AM8CiwFzgOnAvciv3iR5OoVa2pTq3mtQalE9Sq1iTgxmvsnEiG19gl6pYMrwNQq1rVqtagtgalU63m3pLqNXYiIiIikhha2ImIiIgYQgs7EREREUNoYSciIiJiiA69K7akpITKykpXHri0tNTeJ+DOHpNXacOfajWLWs2kVvMEpRPUaqrqDtw31PCu1zPfKRS6NBwOb4vH486nakUYcHePyUutZlKrmdRqnqB0glpN1dCaa1lW2Znu194jdvXxeJyioiKi0Wjnp8M++heLxSgC3Nlj8ioBYqBWw6jVTGo1T1A6Qa2mqgRutP9a39Z9O3QqNhqNkpOT42iolhpP6UaxPyTPZI2HiNVqFrWaSa3mCUonqFX05gkRERERY2hhJyIiImIIRwu7hQsXcvHFF5Oens6AAQO49tpr2bVrV7P7fPzxx8yePZu+ffvSu3dvpk2bxoEDB1wZ2on3gduALKA7kAlM4v/eVWMStao11anVvNagdIJa1eqvDr3GrtHmzZuZPXs2F198Mf/85z+ZP38+EydO5M9//jO9evUC4M477+T3v/89Tz/9NH369OH2229n6tSpTR9zkmjTsF9xuAoYAhwANgCHfZnGW2pVa6pTq3mtQekEtarVZ21dTLbh41ByAGvHjh1Waw4ePGgB1ubNmy3Lsqyamhqra9eu1tNPP910n8rKSguwtm3bZlmWZRUVFSXs4r1HGi6euykAFypWq1rVqtZkaw1Kp1rV6tVtR8PjAzltrdlceY3dBx98AEBGRgYAO3bs4Pjx40yYMKHpPsOGDSMrK4tt27a58ZAd0rvhVgwcS/ijJ5ZazaRWMwWlNSidoFZTpVJrpxd28XicOXPmcPnll/PZz34WgPfee49u3bpx9tlnN7vvwIEDee+99zr7kB2WBqzEPnx6NnA5MB94I+GTeE+tak11ajWvNSidoFa1+q/TC7vZs2ezc+dOnnrqKTfm8cw04B1gLXA1sAn7/PJK/0byjFrVmurUal5rUDpBrWr1WWdeYzd79mzrnHPOsf7yl780275hwwYLsI4cOdJse1ZWlvXII48k/DV2p7vdDFaWYa8DUKta1arWVGkNSqda1drZm+evsbMsi9tvv501a9bw0ksvkZ2d3ez7n//85+natSsbNmxo2rZr1y727dtHbm6uk4f0xHCgzu8hEkStZlKrmYLSGpROUKupkrHV0cedzJ49myeffJJnn32W9PT0ptfN9enTh549e9KnTx9uvvlm5s6dS0ZGBmeddRbf/va3yc3N5dJLL3U1oD0OA9OBQmAkkA6UA4uBKQmfxltqVWuqU6t5rUHpBLWq1X+OFnbLli0DYNy4cc22r1ixglmzZgHw6KOPEg6HmTZtGseOHWPSpEn813/9V6eGdao3MAZ4FNgLHAfOBW7FfvGjSdSq1lSnVvNag9IJalWr/xwt7Bped3dGPXr0YOnSpSxdutTJQ7iqO7Cw4WY6tZpJrWYKSmtQOkGtpkqlVl0rVkRERMQQWtiJiIiIGEILOxERERFDaGEnIiIiYogOvXmisrLStQeurq629+naHpNXdcOfajWLWs2kVvMEpRPUaqqONIba8w7XUCiUFQ6Hd8Xj8R7OxzpVGIi7ucMkplYzqdVMajVPUDpBraYKw8dxGGpZ1r4z3a9dCzuwF3dAPzeGO0k3oN7lfSYrtZpJrWZSq3mC0glqNdWhthZ10IGFnYiIiIgkN715QkRERMQQWtiJiIiIGEILOxERERFDaGEnIiIiYggt7EREREQMoYWdiIiIiCG0sBMRERExhBZ2IiIiIobQwk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImIILexEREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG0MJORERExBBa2ImIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIIbSwExERETGEFnYiIiIihtDCTkRERMQQWtiJiIiIGEILOxERERFDaGEnIiIiYggt7EREREQMoYWdiIiIiCG0sBMRERExhBZ2IiIiIobQwk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImIILexEREREDKGFnYiIiIghtLATERERMcT/B73X1paF7bdVAAAAAElFTkSuQmCC\n" - }, - "metadata": {} - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Test Best Policy on New Episodes" - ], - "metadata": { - "id": "NPbcI9qNF_F9" - } - }, - { - "cell_type": "code", - "source": [ - "env = BlackjackEnv()\n", - "NUM_EPISODES = 100000\n", - "\n", - "for _ in range(NUM_EPISODES):\n", - " state = env.reset()\n", - " while env.done == False:\n", - " if state[0] == 19:\n", - " # Player was dealt with a Blackjack\n", - " next_state, reward, env.done, info = env.step(1)\n", - " total_rewards += reward\n", - " else:\n", - " Q_index = get_Q_state_index(state)\n", - " action = new_Q_binary[Q_index]\n", - " new_state, reward, done, desc = env.step(action)\n", - " state = new_state\n", - " total_rewards += reward\n", - "avg_reward = total_rewards / NUM_EPISODES\n", - "print(f'Average Reward: {avg_reward}')" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "7sTNqbeZga_3", - "outputId": "97878c73-7419-41d4-d600-68bfd314b8f4" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Average Reward: -4.677\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [], - "metadata": { - "id": "WL-NmCiphQiq" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# Credits" - ], - "metadata": { - "id": "73y12ontGQRk" - } - }, - { - "cell_type": "markdown", - "source": [ - "The code is work of Mr. Adithya Solai.\n", - "
\n", - "The conversion from Python to Python Notebook is work of Mr. Sushant Nair.\n", - "
\n", - "The inspiration for making this Python Notebook as part of the RL Series is Mr. Terrence Ou\n", - "
\n", - "The code is further explained in the following Medium articles by Mr. Adithya Solai. GitHub Repo link is also provided.\n", - "
\n", - "1.\thttps://towardsdatascience.com/cracking-blackjack-part-1-31da28aeb4bb\n", - "2.\thttps://towardsdatascience.com/cracking-blackjack-part-2-75e32363e38\n", - "3.\thttps://towardsdatascience.com/cracking-blackjack-part-3-8fd3a5870efd\n", - "4.\thttps://towardsdatascience.com/cracking-blackjack-part-4-8b4a9caa38eb\n", - "5.\thttps://towardsdatascience.com/cracking-blackjack-part-5-70bd2f726133\n", - "\n", - "https://github.com/adithyasolai/Monte-Carlo-Blackjack/blob/master/MC_Blackjack_Full.ipynb\n" - ], - "metadata": { - "id": "BiGQjzloGSIX" - } - }, - { - "cell_type": "code", - "source": [], - "metadata": { - "id": "btiWK3iZHFi5" - }, - "execution_count": null, - "outputs": [] - } - ], - "metadata": { - "colab": { - "provenance": [], - "gpuType": "T4", - "include_colab_link": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python" - }, - "accelerator": "GPU" - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file From eb1085673a7322d4c389584404081a824505bce6 Mon Sep 17 00:00:00 2001 From: Sushant Nair Date: Tue, 28 Jan 2025 23:43:22 +0530 Subject: [PATCH 5/8] Delete chapter_05_monte_carlo_methods/Blackjack.ipynb --- .../Blackjack.ipynb | 884 ------------------ 1 file changed, 884 deletions(-) delete mode 100644 chapter_05_monte_carlo_methods/Blackjack.ipynb diff --git a/chapter_05_monte_carlo_methods/Blackjack.ipynb b/chapter_05_monte_carlo_methods/Blackjack.ipynb deleted file mode 100644 index b6b8840..0000000 --- a/chapter_05_monte_carlo_methods/Blackjack.ipynb +++ /dev/null @@ -1,884 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "JuDWA5loOsFM", - "outputId": "b9cdd35b-2703-4828-ad14-494bafe11476" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Collecting gymnasium\n", - " Downloading gymnasium-0.29.1-py3-none-any.whl.metadata (10 kB)\n", - "Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (1.26.4)\n", - "Requirement already satisfied: cloudpickle>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (2.2.1)\n", - "Requirement already satisfied: typing-extensions>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (4.12.2)\n", - "Collecting farama-notifications>=0.0.1 (from gymnasium)\n", - " Downloading Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)\n", - "Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)\n", - "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/953.9 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[90m╺\u001b[0m\u001b[90m━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m501.8/953.9 kB\u001b[0m \u001b[31m14.6 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m953.9/953.9 kB\u001b[0m \u001b[31m17.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)\n", - "Installing collected packages: farama-notifications, gymnasium\n", - "Successfully installed farama-notifications-0.0.4 gymnasium-0.29.1\n" - ] - } - ], - "source": [ - "# @title Installing Dependencies\n", - "\n", - "!pip install gymnasium" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "id": "cbt-NEiMO1bA" - }, - "outputs": [], - "source": [ - "# @title Importing Dependencies\n", - "\n", - "import random\n", - "import numpy as np\n", - "import gymnasium as gym\n", - "from gym import error, spaces, utils\n", - "from gym.utils import seeding" - ] - }, - { - "cell_type": "code", - "source": [ - "# @title Set up the game\n", - "# Define the rank and suit of a card\n", - "\n", - "import enum\n", - "\n", - "ranks = {\n", - " \"two\": 2,\n", - " \"three\": 3,\n", - " \"four\": 4,\n", - " \"five\": 5,\n", - " \"six\": 6,\n", - " \"seven\": 7,\n", - " \"eight\": 8,\n", - " \"nine\": 9,\n", - " \"ten\": 10,\n", - " \"jack\": 10,\n", - " \"queen\": 10,\n", - " \"king\": 10,\n", - " \"ace\": (1, 11),\n", - "}\n", - "\n", - "class Suit(enum.Enum):\n", - " spades = \"spades\"\n", - " clubs = \"clubs\"\n", - " diamonds = \"diamonds\"\n", - " hearts = \"hearts\"\n", - "\n", - "# Now to define the card and deck\n", - "# Implement the shuffle, peek and deal functions for the deck\n", - "\n", - "class Card:\n", - " def __init__(self, suit, rank, value):\n", - " self.suit = suit\n", - " self.rank = rank\n", - " self.value = value\n", - "\n", - " def __str__(self):\n", - " return self.rank + \" of \" + self.suit.value\n", - "\n", - "class Deck:\n", - " def __init__(self, num=1):\n", - " self.cards = []\n", - " for i in range(num):\n", - " for suit in Suit:\n", - " for rank, value in ranks.items():\n", - " self.cards.append(Card(suit, rank, value))\n", - "\n", - " def shuffle(self):\n", - " random.shuffle(self.cards)\n", - "\n", - " def deal(self):\n", - " return self.cards.pop(0)\n", - "\n", - " def peek(self):\n", - " if len(self.cards) > 0:\n", - " return self.cards[0]\n", - "\n", - " def add_to_bottom(self, card):\n", - " self.cards.append(card)\n", - "\n", - " def __str__(self):\n", - " result = \"\"\n", - " for card in self.cards:\n", - " result += str(card) + \"\\n\"\n", - " return result\n", - "\n", - " def __len__(self):\n", - " return len(self.cards)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "D3df2N-JrKiE", - "outputId": "a36dc6fc-51b2-4fb0-bfb5-380e143cc4b4" - }, - "execution_count": 3, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n", - " and should_run_async(code)\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "# @title Define the logic for evaluating the value of the player's hand\n", - "'''\n", - "An ace is used as 11 whenever possible. It is 1 only if the other option would make the player bust.\n", - "'''\n", - "\n", - "def player_eval(player_hand):\n", - " num_ace = 0\n", - " # use_one means that every ace in the hand is counted as one\n", - " use_one = 0\n", - " for card in player_hand:\n", - " if card.rank == \"ace\":\n", - " num_ace += 1\n", - " use_one += card.value[0] # use 1 for Ace\n", - " else:\n", - " use_one += card.value\n", - "\n", - " if num_ace > 0:\n", - " # Define player policy for Aces:\n", - " # Make Aces 11 if they get you to the range [18, 21]\n", - " # Otherwise, use one.\n", - "\n", - " ace_counter = 0\n", - " while ace_counter < num_ace:\n", - " # Only add by 10 because 1 is already added before\n", - " use_eleven = use_one + 10\n", - " if use_eleven > 21:\n", - " return use_one\n", - " elif use_eleven >= 18 and use_eleven <= 21:\n", - " return use_eleven\n", - " else:\n", - " # This allows for some Aces to be 11 and others to be 1.\n", - " use_one = use_eleven\n", - " ace_counter += 1\n", - " return use_one\n", - " else:\n", - " return use_one" - ], - "metadata": { - "id": "SWrENBc-vEUL" - }, - "execution_count": 4, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# @title Define the logic for evaluating the value of the dealer's hand\n", - "\n", - "# This follows the same, official rules every time.\n", - "# Still there is a need to figure out what happens if there are multiple Aces.\n", - "def dealer_eval(player_hand):\n", - " num_ace = 0\n", - " use_one = 0\n", - " for card in player_hand:\n", - " if card.rank == \"ace\":\n", - " num_ace += 1\n", - " use_one += card.value[0] # use 1 for Ace\n", - " else:\n", - " use_one += card.value\n", - "\n", - " if num_ace > 0:\n", - " # See if using 11 instead of 1 for the Ace gets the dealer's hand value closer to the [17, 21] range.\n", - " # The dealer will follow Hard 17 rules. This means that the dealer will not hit again if the Ace yields a 17.\n", - " # This also means that Aces initially declared as 11 can be changed to 1 as new cards come.\n", - "\n", - " ace_counter = 0\n", - " while ace_counter < num_ace:\n", - " # Only add 10 because 1 is already added before\n", - " use_eleven = use_one + 10\n", - "\n", - " if use_eleven > 21:\n", - " return use_one\n", - " elif use_eleven >= 17 and use_eleven <= 21:\n", - " return use_eleven\n", - " else:\n", - " # The case where even using Ace as eleven is less than 17.\n", - " use_one = use_eleven\n", - " ace_counter += 1\n", - " return use_one\n", - " else:\n", - " return use_one" - ], - "metadata": { - "id": "YzDYaCCVx3AH" - }, - "execution_count": 5, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# @title Define logic for the dealer's turn\n", - "\n", - "def dealer_turn(dealer_hand, deck):\n", - " # Calculate the dealer hand's value.\n", - " dealer_value = dealer_eval(dealer_hand)\n", - "\n", - " # Define dealer policy (which is fixed to the official rules)\n", - " # The dealer keeps hitting until their total is 17 or more\n", - " while dealer_value < 17:\n", - " dealer_hand.append(deck.deal())\n", - " dealer_value = dealer_eval(dealer_hand)\n", - "\n", - " return dealer_value, dealer_hand, deck" - ], - "metadata": { - "id": "YetM4Xsixlrf" - }, - "execution_count": 6, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# @title Define the OpenAI Gym Environment for Blackjack\n", - "\n", - "INITIAL_BALANCE = 1000\n", - "NUM_DECKS = 6\n", - "\n", - "class BlackjackEnv(gym.Env):\n", - " metadata = {'render.modes': ['human']}\n", - "\n", - " def __init__(self):\n", - " super(BlackjackEnv, self).__init__()\n", - "\n", - " # Initialize the blackjack deck\n", - " self.bj_deck = Deck(NUM_DECKS)\n", - "\n", - " self.player_hand = []\n", - " self.dealer_hand = []\n", - "\n", - " self.reward_options = {\"lose\": -100, \"tie\": 0, \"win\": 100}\n", - "\n", - " self.action_space = spaces.Discrete(2)\n", - "\n", - " '''\n", - " First element of tuple is the range of possible hand values for the player (3 through 20). This is the possible range of values that the player will actually have to make a decision for. Any player hand value 21 or above already has automatic valuations, and needs no input from an AI agent.\n", - " However, we also need to add all the hand values that the agent could possibly end up in when they bust. Maybe the agent can glean some correlations based on what hand value they bust at, so this should be in the observation space. Also, the layout of OpenAI Gym environment class makes us have to include the bust-value in the step() function because we need to return that done is true alongside the final obs, which is the bust-value.\n", - " '''\n", - " # Second element of the tuple is the range of possible values for the dealer's upcard (2 through 11)\n", - " self.observation_space = spaces.Tuple((spaces.Discrete(18), spaces.Discrete(10)))\n", - " self.done = False\n", - "\n", - " def _take_action(self, action):\n", - " if action == 0: #hit\n", - " self.player_hand.append(self.bj_deck.deal())\n", - " # recalculate the value of the player's hand after any changes to the hand\n", - " self.player_value = player_eval(self.player_hand)\n", - "\n", - " def step(self, action):\n", - " self._take_action(action)\n", - "\n", - " # End the episode/game if the player stands or has a hand value >= 21.\n", - " self.done = action == 1 or self.player_value >= 21\n", - "\n", - " # rewards are 0 when the player hits and is still below 21, and they keep playing\n", - " rewards = 0\n", - "\n", - " if self.done:\n", - " # Calculate rewards\n", - " if self.player_value > 21:\n", - " rewards = self.reward_options['lose']\n", - " elif self.player_value == 21:\n", - " rewards = self.reward_options['win']\n", - " else:\n", - " # begin the dealer turn phase\n", - " dealer_value, self.dealer_hand, self.bj_deck = dealer_turn(self.dealer_hand, self.bj_deck)\n", - " # End the dealer turn phase\n", - "\n", - " # Final comparison\n", - " if dealer_value > 21:\n", - " rewards = self.reward_options['win']\n", - " elif dealer_value == 21:\n", - " rewards = self.reward_options['lose']\n", - " else:\n", - " # both dealer and player value is less than 21\n", - " if self.player_value > dealer_value:\n", - " rewards = self.reward_options['win']\n", - " elif self.player_value < dealer_value:\n", - " rewards = self.reward_options['lose']\n", - " else:\n", - " rewards = self.reward_options['tie']\n", - " self.balance += rewards\n", - " # Subtract by 1 to fit into the possible observation range.\n", - " # This makes the possible range of 3 through 20 into 1 through 18.\n", - " player_value_obs = self.player_value - 2\n", - " # Get the value of the dealer's upcard; this value is what the agent sees.\n", - " # Subtract by 1 to fit into the possible observation range of 1 to 10\n", - " upcard_value_obs = dealer_eval([self.dealer_upcard]) - 1\n", - " # The state is represented as a player hand-value + dealer upcard pair.\n", - " obs = np.array([player_value_obs, upcard_value_obs])\n", - " return obs, rewards, self.done, {}\n", - "\n", - " def reset(self):\n", - " # reset the game to an initial state\n", - " # add the player and dealer cards back into the deck\n", - " self.bj_deck.cards += self.player_hand + self.dealer_hand\n", - "\n", - " # shuffle before beginning. Only shuffle once before the start of each game.\n", - " self.bj_deck.shuffle()\n", - " self.balance = INITIAL_BALANCE\n", - " self.done = False\n", - "\n", - " # returns the start stage for the agent\n", - " # deal 2 cards to the agent and the dealer\n", - " self.player_hand = [self.bj_deck.deal(), self.bj_deck.deal()]\n", - " self.dealer_hand = [self.bj_deck.deal(), self.bj_deck.deal()]\n", - " self.dealer_upcard = self.dealer_hand[0]\n", - "\n", - " # calculate the value of the agent's hand\n", - " self.player_value = player_eval(self.player_hand)\n", - "\n", - " # subtract by 2 to fit into the possible observation range. This makes the possible range of 3 through 20 into 1 through 18\n", - " player_value_obs = self.player_value - 2\n", - "\n", - " # get the value of the dealer's upcard; this value is what the agent sees. Subtract by 1 to fit the possible observation range of 1 to 10.\n", - " upcard_value_obs = dealer_eval([self.dealer_upcard]) - 1\n", - "\n", - " # the state is represented as a player hand-value + dealer upcard pair.\n", - " obs = np.array([player_value_obs, upcard_value_obs])\n", - " return obs\n", - "\n", - " def render(self, mode='human', close=False):\n", - " # convert the player hand into a format that is easy to read and understand.\n", - " hand_list = []\n", - " for card in self.player_hand:\n", - " hand_list.append(card.rank)\n", - "\n", - " # recalculate the value of the dealer upcard.\n", - " upcard_value = dealer_eval([self.dealer_upcard])\n", - "\n", - " print(f'Balance: {self.balance}')\n", - " print(f'Player Hand: {hand_list}')\n", - " print(f'Dealer Upcard: {upcard_value}')\n", - " print(f'Done: {self.done}')" - ], - "metadata": { - "id": "am91D0LgqNnf" - }, - "execution_count": 7, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# @title Test the OpenAI Gym Blackjack Environment\n", - "\n", - "env = BlackjackEnv()\n", - "\n", - "total_rewards = 0\n", - "NUM_EPISODES = 1000\n", - "\n", - "for _ in range(NUM_EPISODES):\n", - " env.reset()\n", - " episode_reward = 0\n", - " while env.done == False:\n", - " action = env.action_space.sample()\n", - " new_state, reward, done, desc = env.step(action)\n", - " episode_reward += reward\n", - " total_rewards += episode_reward\n", - "\n", - "avg_reward = total_rewards / NUM_EPISODES\n", - "print(f'Average Reward: {avg_reward}')" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "ttzs7UQb2qCP", - "outputId": "94c0576f-abdf-49ff-eead-38312653e263" - }, - "execution_count": 8, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Average Reward: -32.2\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "# @title A function to map a state from the Blackjack environment to the proper index i Q and prob tables\n", - "\n", - "'''\n", - "Given a state, derive the corresponding index in the Q-table. This state is a player hand value and dealer upcard pair, so the hashing formula must be used to allocate the indices of the Q-table properly.\n", - "'''\n", - "def get_Q_state_index(state):\n", - " '''\n", - " The player value is already subtracted by 1 in the env when it returns the state. Subtract by 1 again to fir with the array indexing that starts at 0.\n", - " '''\n", - " initial_player_value = state[0] - 1\n", - " '''\n", - " The upcard value is already subtracted by 1 in the env when it returns the state. dealer_upcard will be subtracted by 1 to fit with the array indexing that starts at 0.\n", - " '''\n", - " dealer_upcard = state[1] - 1\n", - " return(env.observation_space[1].n * (initial_player_value)) + (dealer_upcard)" - ], - "metadata": { - "id": "etl-JD8lHlJB" - }, - "execution_count": 9, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# @title Functions to get and update the probability of taking the best action for a given state\n", - "def get_prob_of_best_action(env, state, Q, prob):\n", - " # Use the mappig function to figure out which index of Q corresponds to the player hand value and dealer upcard value that defines each state.\n", - " Q_state_index = get_Q_state_index(state)\n", - "\n", - " # Use this index in the Q 2-D array to get a 2-element array that yield the current Q-values for hitting (index 0) and standing (index 1) in this state. Use np.argmax() function to find the index of the action that yields the maximum rewards, i.e., the best action we are looking for.\n", - " best_action = np.argmax(Q[Q_state_index])\n", - "\n", - " # Retrieve the probability of the best action using the state/action pair as indices of the 'prob' array, which stores the probability of taking an action (hit or stand) for a given state/action pair.\n", - " return prob[Q_state_index][best_action]\n", - "\n", - "def update_prob_of_best_action(env, state, Q, prob, epsilon):\n", - " Q_state_index = get_Q_state_index(state)\n", - "\n", - " best_action = np.argmax(Q[Q_state_index])\n", - "\n", - " '''\n", - " Slightly alter the probability of this best action being taken by using epsilon. Epsilon starts at 1.0, and slowly decays over time. Therefore, as per the equation below, the AI agent will use the probability listed for the best action in the 'prob' table during the beginning of the algorithm. As time goes on, the likelihood that the best action is taken is increased from what is listed in the 'prob' table.\n", - " This allows for the exploration of other moves in the beginning of the algorithm, but exploitation later for a greater reward.\n", - " '''\n", - " prob[Q_state_index][best_action] = min(1, prob[Q_state_index][best_action] + 1 - epsilon)\n", - "\n", - " other_action = 1 if best_action == 0 else 0\n", - " prob[Q_state_index][other_action] = 1 - prob[Q_state_index][best_action]\n", - "\n", - " return prob" - ], - "metadata": { - "id": "qazyxoCbYLhz" - }, - "execution_count": 10, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# @title play_game() function\n", - "'''\n", - "Simulates one round of Blackjack.\n", - "\n", - "First, use env.reset() to set up a new round of Blackjack.\n", - "\n", - "If the player is already dealt a blackjack by luck, then this episode is just scrapped. There is nothing the AI agent can learn from this episode since it did not have to make any decisions.\n", - "\n", - "Otherwise, the AI agent finds the best action available given the current state of the game and its knowledge so far. This knowledge is stored in the Q-table defined in the run_mc() function. Then, the AI agent retrieves the probability that it should take this action from the prob table. The AI agent applies this probability and chooses its action given this state. The action is sent to the environment, and a reward is returned. This state-action-reward sequence that just occured is stored in episode variable as a tuple. This process is repeated until the current episode is over. Each episode will roughly rield 1-3 tuples in episode variable since Blackjack rounds are usually resolved after 1-3 decisions by the player.\n", - "\n", - "The Q-values in the Q-table associated with each state-action pair that was seen in this episode will be updated after this episode based on the state-action-reward tuples returned by this function. Then, the corresponding probabilities in prob variable are also modified to reflect this change in Q-values.\n", - "'''\n", - "\n", - "def play_game(env, Q, prob):\n", - " '''\n", - " Can contain numerous state->action->reward tuples bacause a round of Blackjack is not always resolved in one turn. However, there will be no state that has a player hand value that exceeds 20, since only initial states BEFORE actions are made are used when storing state->action->reward tuples.\n", - " '''\n", - " episode = []\n", - " state = env.reset()\n", - " while env.done == False:\n", - " if state[0] == 19:\n", - " # Player was dealt blackjack right from the beginning; player_value already subtracted by 2 to get state[0]\n", - " # No analysis done for this useless episode\n", - " next_state, reward, env.done, info = env.step(1)\n", - " else:\n", - " # Get the index in Q that corresponds to the current state\n", - " Q_state_index = get_Q_state_index(state)\n", - "\n", - " # Use the index to get the possible actions, and use np.argmax() to get the index of the action that has the highest current Q value. Index 0 is hit, 1 is stand.\n", - " best_action = np.argmax(Q[Q_state_index])\n", - "\n", - " # Go to the prob table to retrieve the probability of this action. This uses the same Q_state_index used for finding the state index of the Q-table.\n", - " prob_of_best_action = get_prob_of_best_action(env, state, Q, prob)\n", - "\n", - " action_to_take = None\n", - "\n", - " if random.uniform(0, 1) < prob_of_best_action:\n", - " # Take the best action\n", - " action_to_take = best_action\n", - " else:\n", - " # Take the other action\n", - " action_to_take = 1 if best_action == 0 else 0\n", - "\n", - " # The action is performed by the agent, and the next state, rewards and done information is returned.\n", - " next_state, reward, env.done, info = env.step(action_to_take)\n", - "\n", - " # Log the state->action->reward sequence\n", - " episode.append((state, action_to_take, reward))\n", - "\n", - " # Update the state for the next decision made by the agent.\n", - " state = next_state\n", - "\n", - " return episode" - ], - "metadata": { - "id": "HmFb1gcWFFcm" - }, - "execution_count": 11, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# @title update_Q() function\n", - "'''\n", - "This function iterates through the state-action-reward tuples in episode and updates the Q-values of the corresponding state-action pairs in Q.\n", - "'''\n", - "\n", - "def update_Q(env, episode, Q, alpha, gamma):\n", - " '''\n", - " This is where the algorithm hinges on being first visit or every visit. First visit will be used because if you want first-visit, you need to use the cumulative reward of the entire episode when updating a Q-value for ALL of the state/action pairs in the episode, even the first state/action pair. In this algorithm, an episode is a round of Blackjack. Although the bulk of the reward may come from the 2nd or 3rd decision, deciding to hit on the 1st decision is what enabled the future situations to even occur, so it is important to include the entire cumulative reward. We can reduce the impact of the rewards of the future decisions by lowering gamma, which will lower the G value for our early state/action pair in which we hit and did not get any immediate rewards. This will make our agent consider future rewards, and not just look at each state in isolation despite having hit previously.\n", - " If you want Every-Visit MC, do not use the cumulative rewards when updating Q-values and just use the immediate reward in this episode for each state/action pair.\n", - " '''\n", - " step = 0\n", - " for state, action, reward in episode:\n", - " # calculate the cumulative reward of taking this action in this state.\n", - " # Start from the immediate rewards, and use all the rewards from the subsequent states.\n", - " # Do not use rewards from previous states\n", - "\n", - " total_reward = 0\n", - " gamma_exp = 0\n", - " for curr_step in range(step, len(episode)):\n", - " curr_reward = episode[curr_step][2]\n", - " total_reward += (gamma ** gamma_exp) * curr_reward\n", - " gamma_exp += 1\n", - "\n", - " # Update the Q-value\n", - " Q_state_index = get_Q_state_index(state)\n", - " curr_Q_value = Q[Q_state_index][action]\n", - " Q[Q_state_index][action] = curr_Q_value + alpha * (total_reward - curr_Q_value)\n", - " # update step to start further down the episode the next time.\n", - " step += 1\n", - " return Q" - ], - "metadata": { - "id": "R08jdGkVbQD0" - }, - "execution_count": 12, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# @title update_prob() function\n", - "def update_prob(env, episode, Q, prob, epsilon):\n", - " for state, action, reward in episode:\n", - " '''\n", - " Update the probabilities of the actions that can be taken given the current state. The goal is that the new update in Q has changed what the best action is, and epsilon will be used to create a small increase in the probability that the new, better action is chosen.\n", - " '''\n", - " prob = update_prob_of_best_action(env, state, Q, prob, epsilon)\n", - " return prob" - ], - "metadata": { - "id": "4UA7u4nWdRFL" - }, - "execution_count": 13, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# @title First-Visit Monte Carlo Algorithm\n", - "# run_mc() to run the First Visit Monte Carlo Algorithm\n", - "'''\n", - "This function initializes the key variables for the algorithm and runs the learning algorithm for the AI agent for num_episodes episodes. An episode is a simulation of one game of Blackjack using the OpenAI Gym environment defined above.\n", - "Q, or the Q-table, is a 2D list in which the rows are the different possible states and the columns are the different possible actions for each state. The values stored in each cell refer to the value, or Q-value, of selecting some action (the column index) given some state (the row index). These Q-values start at 0 for every state-action pair, and are updated by the update_Q() function after each episode to reflect the rewards received in an episode where the state-action pair occured. All Q-values initially start at 0.\n", - "\n", - "prob, or the prob table, has the same structure as Q, but the cell values refer to the probability of selecting some action (the column index) given some state (the row index). These probabilites are updated by the update_prob() function after the Q-values are updated after each episode. All action probabilities start at 0.5 (or 50%).\n", - "\n", - "alpha defines the weight given to each new change in Q-value within the update-Q() function. A smaller alpha means that a new reward logged in an episode for some state-action pair has less impact on the current Q-value for that state-action pair. The converse is also true. Therefore, alpha essentially defines how fast the AI agent learns (learning rate).\n", - "\n", - "epsilon defines the weight given to each new change in the action probabilities within the update_prob() function. A larger epsilon reduces the precent amount by which an action probability is changed after some change in Q-values. The converse is also true. An epsilon of 1 means that no change will occur in the action probability, regardless of the magnitude of changes in Q-values.\n", - "\n", - "epsilon is decayed by the decay value after every episode. The lowest value epsilon can reach is epsilon_min.\n", - "\n", - "gamma is the rate used to discount future rewards yielded by a certain state-action pair in the episode. Since a round (or episode) of Blackjack can have more than 1 decision made, there can be numerous state-action pairs that are seen in one episode. However, only the final decision (or state-action pair) yields an immediate reward from the environment. All previous state-action pairs had no rewards. So, the final reward must be used to modify the Q-values of the earlier state-action pairs. Since the final reward was only partially made possible by the eariler state-action pairs, the final reward is discounted using gamma to account for this.\n", - "\n", - "Once these important variables are defines, this function runs the First-Visit Monte Carlo algorithm for the Blackjack environment.\n", - "\n", - "The function runs num_episodes episodes.\n", - "\n", - "In each episode, epsilon is first decayed by the decay rate.\n", - "\n", - "Then, the game of Blackjack is played out throught the play_game() function. This function returns a list of the state-action-reward tuples that occured during the game. These tuples represent the actions the AI agent had to take given some state, and the rewards that resulted.\n", - "\n", - "These tuples are used to modify the Q-values in Q through the update_Q() function.\n", - "\n", - "Then, the tuples are used to modify the probability distributions in prob of the two actions (hit or stand) for any states that were encountered in the episode. This is done through the update_prob() function.\n", - "\n", - "After this process is done for each episode, the function returns the modified Q and prob tables. These tables are an imprint of the learning that has taken place by the AI agent through the First-Visit Monte Carlo algorithm.\n", - "'''\n", - "\n", - "def run_mc(env, num_episodes):\n", - " '''\n", - " observation_space[0] is the 18 possible player values. (3-20)\n", - " observation_space[1] is the 10 possible dealer upcards. (2-11)\n", - " Combining these togenther yields all possible states.\n", - " Multiplying this with hit/stand yields all possible state-action pairs.\n", - " This is the Q map.\n", - " '''\n", - " Q = np.zeros([env.observation_space[0].n * env.observation_space[1].n, env.action_space.n], dtype=np.float16)\n", - "\n", - " # This map contains the probability distributions for each action (hit or stand) given a state. The state (combination of player hand value and dealer upcard value) index in this array yields a 2-element array. The 0th index of this 2-element array refers to the probability of hit and the 1st index is the probability of stand.\n", - " prob = np.zeros([env.observation_space[0].n * env.observation_space[1].n, env.action_space.n], dtype=np.float16) + 0.5\n", - "\n", - " # The learning rate; very small to avoid making quick, large changes in our policy.\n", - " alpha = 0.001\n", - "\n", - " epsilon = 1\n", - "\n", - " # The rate by which epsilon will decay over time.\n", - " # Since the probability that decides which option has the highest Q-value is 1 - epsilon + probability, this decay will make sure that the better option is taken more often in the long run.\n", - " # This allows the algorithm to explore in the early stages and exploit in the later stages.\n", - " decay = 0.9999\n", - "\n", - " # The lowest value that epsilon can go to. Although the decay seems slow, it grows exponentially, and this is magnified when running thousands of episodes.\n", - " epsilon_min = 0.9\n", - "\n", - " gamma = 0.8\n", - "\n", - " for _ in range(num_episodes):\n", - " episode = play_game(env, Q, prob)\n", - "\n", - " epsilon = max(epsilon * decay, epsilon_min)\n", - "\n", - " Q = update_Q(env, episode, Q, alpha, gamma)\n", - "\n", - " prob = update_prob(env, episode, Q, prob, epsilon)\n", - "\n", - " return Q, prob" - ], - "metadata": { - "id": "PnRTa7FZ3RnJ" - }, - "execution_count": 14, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# @title Run First-Visit Monte Carlo Reinforcement Learning Algorithm\n", - "import time\n", - "env = BlackjackEnv()\n", - "\n", - "start_time = time.time()\n", - "new_Q, new_prob = run_mc(env, 1000000)\n", - "end_time = time.time()\n", - "\n", - "print(f'Total time for learning: {end_time - start_time} s.')" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "UKx3psLBd3Yl", - "outputId": "2181c312-d684-4525-bb48-ed723de63333" - }, - "execution_count": 15, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Total time for learning: 230.13713669776917 s.\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "# @title best_policy() function\n", - "'''\n", - "This function takes the new Q-table returned and keeps only the action that yields the highest value for a given state. This is represented in three different ways: binary, string and colors.\n", - "0 is hit and 1 is stand in the binary representation of the results.\n", - "H is hit and S is stand in the string representation of the results.\n", - "Green is hit and Red is stand in the color representation of the results.\n", - "'''\n", - "def best_policy(Q):\n", - " best_policy_binary = []\n", - " best_policy_string = []\n", - " best_policy_colors = []\n", - " for i in range(len(Q)):\n", - " best_policy_binary.append(np.argmax(Q[i]))\n", - " best_policy_string.append('H' if np.argmax(Q[i]) == 0 else 'S')\n", - " best_policy_colors.append('g' if np.argmax(Q[i]) == 0 else 'r')\n", - "\n", - " return best_policy_binary, best_policy_string, best_policy_colors" - ], - "metadata": { - "id": "MNbxnlFKeRgQ" - }, - "execution_count": 20, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# @title Create DataFrame with Player Value as Rows and Dealer Upcard as Columns\n", - "import pandas as pd\n", - "new_Q_binary, new_Q_string, new_Q_colors = best_policy(new_Q)\n", - "\n", - "df = pd.DataFrame(columns = range(2, 12))\n", - "\n", - "color_df = pd.DataFrame(columns = range(2, 12))\n", - "\n", - "for s in range(3, 21):\n", - " # possible player values in the range 3 to 20\n", - " start = env.observation_space[1].n * (s - 3)\n", - " end = start + 10\n", - " df.loc[s] = (new_Q_string[start:end])\n", - " color_df.loc[s] = (new_Q_colors[start:end])" - ], - "metadata": { - "id": "lZIV2WxWfR9z" - }, - "execution_count": 21, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# @title Create Chart Graphic for the Results\n", - "import matplotlib.pyplot as plt\n", - "\n", - "fig, ax = plt.subplots()\n", - "\n", - "# hide axes\n", - "fig.patch.set_visible(False)\n", - "ax.set_axis_off()\n", - "ax.axis('tight')\n", - "\n", - "ax.table(cellText=df.values, cellColours=color_df.values, cellLoc='center', rowLabels=df.index, colLabels=df.columns, loc='center')\n", - "\n", - "fig.tight_layout()\n", - "\n", - "plt.show()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 487 - }, - "id": "wR8JVHEef5Cf", - "outputId": "02c77629-9a81-4150-9561-c0255ad152ea" - }, - "execution_count": 23, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAnYAAAHWCAYAAAD6oMSKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABM9UlEQVR4nO3de3xU9Z3/8dcM4VaI2HALUw0GbWGgQklVjNoCFcEsjSgsFhdtaNS2FmuRrW3ht+MdEfZRpRcWqnWBNhUftZVIbZBV5GYkSrCp0mYh0FQqKggl0kQllDm/P06SJSGQ5OScOTPf834+HvOAnMye+bx2Evz2nJk5IcuyEBEREZHUF/Z7ABERERFxhxZ2IiIiIobQwk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImIILexEREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG0MJORERExBBa2ImIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIIbSwExERETGEFnYiIiIihtDCTkRERMQQWtiJiIiIGEILOxERERFDaGEnIiIiYggt7EREREQMoYWdiIiIiCG0sBMRERExhBZ2IiIiIobQwk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImIILexEREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG0MJORERExBBa2ImIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIIdL8fPBQKJQF9PNzhgTqBtT7PUSCqNVMajVTUFqD0glqNdUhy7L2tXUn3xZ2oVAoKxwO74rH4z38miGRwuEw8Xjc7zESQq1mUquZgtIalE5Qq6nC4fDHoVBoaFuLOz+P2PWLx+M9ioqKiEajPo7hvZKSEmKxGGo1i1rNpFbzBKUT1GqqyspKbrzxxh7YZzmTdmEHQDQaJScnx+8xPFVZWQmo1TRqNZNazROUTlCr6M0TIiIiIsYIxMJu4cKFXHzxxaSnpzNgwACuvfZadu3a5fdYnli2bBkjR47krLPO4qyzziI3N5d169b5PVZCPPzww4RCIebMmeP3KK679957CYVCzW7Dhg3zeyzP7N+/nxtvvJG+ffvSs2dPLrzwQsrLy/0ey3XnnXfeKc9rKBRi9uzZfo/muhMnThCLxcjOzqZnz56cf/75PPDAA1iW5fdorvvHP/7BnDlzGDx4MD179uSyyy5j+/btfo/VaVu2bCE/P59IJEIoFKK4uLjZ9y3L4u6772bQoEH07NmTCRMmUFVV5c+wndRW6zPPPMPEiRPp27cvoVCIiooKX+ZsTSAWdps3b2b27NmUlZXxwgsvcPz4cSZOnEhdXZ3fo7nunHPO4eGHH2bHjh2Ul5fzpS99iSlTpvCnP/3J79E8tX37dn72s58xcuRIv0fxzIgRI3j33Xebbi+//LLfI3niyJEjXH755XTt2pV169bx5z//mR/+8Id88pOf9Hs0123fvr3Zc/rCCy8AMH36dJ8nc9+iRYtYtmwZP/3pT6msrGTRokUsXryYn/zkJ36P5rpbbrmFF154gV/+8pe8+eabTJw4kQkTJrB//36/R+uUuro6Ro0axdKlS1v9/uLFi/nxj3/M8uXLefXVV+nVqxeTJk3i448/TvCknddWa11dHVdccQWLFi1K8GRt8/01donw/PPPN/t65cqVDBgwgB07dvDFL37Rp6m8kZ+f3+zrBQsWsGzZMsrKyhgxYoRPU3mrtraWmTNn8vjjj/Pggw/6PY5n0tLSyMzM9HsMzy1atIhzzz2XFStWNG3Lzs72cSLv9O/fv9nXDz/8MOeffz5jx471aSLvvPLKK0yZMoXJkycD9tHK1atX89prr/k8mbs++ugjfvvb3/Lss882/ffl3nvv5Xe/+x3Lli1L6X+j8vLyyMvLa/V7lmWxZMkS/uM//oMpU6YA8Itf/IKBAwdSXFzMjBkzEjlqp52pFeCmm24C4K9//WuCJmq/QByxa+mDDz4AICMjw+dJvHXixAmeeuop6urqyM3N9Xscz8yePZvJkyczYcIEv0fxVFVVFZFIhCFDhjBz5kz27Wvz44xS0tq1a7nooouYPn06AwYMYPTo0Tz++ON+j+W5+vp6ioqKKCwsJBQK+T2O6y677DI2bNjA7t27AfjjH//Iyy+/fMb/eKaif/7zn5w4cYIePZp/klfPnj2NPcoOUF1dzXvvvdfs3+E+ffowZswYtm3b5uNkwROII3Yni8fjzJkzh8svv5zPfvazfo/jiTfffJPc3Fw+/vhjevfuzZo1axg+fLjfY3niqaee4vXXXzfi9StnMmbMGFauXMnQoUN59913ue+++/jCF77Azp07SU9P93s8V/3lL39h2bJlzJ07l/nz57N9+3buuOMOunXrRkFBgd/jeaa4uJiamhpmzZrl9yie+MEPfsDRo0cZNmwYXbp04cSJEyxYsICZM2f6PZqr0tPTyc3N5YEHHiAajTJw4EBWr17Ntm3buOCCC/wezzPvvfceAAMHDmy2feDAgU3fk8QI3MJu9uzZ7Ny50+j/5TR06FAqKir44IMP+M1vfkNBQQGbN282bnH3t7/9je985zu88MILp/yvY9OcfFRj5MiRjBkzhsGDB/PrX/+am2++2cfJ3BePx7nooot46KGHABg9ejQ7d+5k+fLlRi/snnjiCfLy8ohEIn6P4olf//rX/OpXv+LJJ59kxIgRVFRUMGfOHCKRiHHP6y9/+UsKCwv51Kc+RZcuXcjJyeGGG25gx44dfo8mARCoU7G33347zz33HBs3buScc87xexzPdOvWjQsuuIDPf/7zLFy4kFGjRvGjH/3I77Fct2PHDg4ePEhOTg5paWmkpaWxefNmfvzjH5OWlsaJEyf8HtEzZ599Np/5zGfYs2eP36O4btCgQaf8j5BoNGrsqWeAt956ixdffJFbbrnF71E8c9ddd/GDH/yAGTNmcOGFF3LTTTdx5513snDhQr9Hc93555/P5s2bqa2t5W9/+xuvvfYax48fZ8iQIX6P5pnG1/8eOHCg2fYDBw4E4rXBySQQCzvLsrj99ttZs2YNL730krEvxD6deDzOsWPH/B7DdVdeeSVvvvkmFRUVTbeLLrqImTNnUlFRQZcuXfwe0TO1tbXs3buXQYMG+T2K6y6//PJTPo5o9+7dDB482KeJvLdixQoGDBjQ9MYCE3344YeEw83/k9OlSxejLwfVq1cvBg0axJEjR1i/fn3TmwpMlJ2dTWZmJhs2bGjadvToUV599VWjX+OdjAJxKnb27Nk8+eSTPPvss6Snpzed7+/Tpw89e/b0eTp3zZs3j7y8PLKysvjHP/7Bk08+yaZNm1i/fr3fo7kuPT39lNdJ9urVi759+xr3+snvfve75OfnM3jwYN555x3uueceunTpwg033OD3aK678847ueyyy3jooYe4/vrree2113jsscd47LHH/B7NE/F4nBUrVlBQUEBamrn/JOfn57NgwQKysrIYMWIEf/jDH3jkkUcoLCz0ezTXrV+/HsuyGDp0KHv27OGuu+5i2LBhfO1rX/N7tE6pra1tdpagurqaiooKMjIyyMrKYs6cOTz44IN8+tOfJjs7m1gsRiQS4dprr/VvaIfaav373//Ovn37eOeddwCa/sdoZmam70cozf1X5CTLli0DYNy4cc22r1ixwrgXKh88eJCvfvWrvPvuu/Tp04eRI0eyfv16rrrqKr9Hk054++23ueGGGzh8+DD9+/fniiuuoKys7JSPyzDBxRdfzJo1a5g3bx73338/2dnZLFmyxLgX2Td68cUX2bdvn5ELnJP95Cc/IRaL8a1vfYuDBw8SiUT4xje+wd133+33aK774IMPmDdvHm+//TYZGRlMmzaNBQsW0LVrV79H65Ty8nLGjx/f9PXcuXMBKCgoYOXKlXzve9+jrq6Or3/969TU1HDFFVfw/PPPp+RroNtqXbt2bbOFeuPHudxzzz3ce++9CZ21pUAs7Ez8ZPPTeeKJJ/wewVebNm3yewRPPPXUU36PkFBf/vKX+fKXv+z3GAkxceLEQPwblZ6ezpIlS1iyZInfo3ju+uuv5/rrr/d7DNeNGzfujD+roVCI+++/n/vvvz+BU3mjrdZZs2Yl7YGhQLzGTkRERCQItLATERERMYQWdiIiIiKG0MJORERExBBa2ImIiIgYot3vig2FQllAPxcfexhASUkJlZWVLu42+ZSWlgJqNY1azaRW8wSlE9Rqqurq6nbfN9Set9mHQqGscDi8Kx6Pu/thNCHA/Hf529RqJrWaSa3mCUonqNVUdmuuZVllZ7pbe4/Y9YvH4z2KioqIRqOdHw57hR2LxWAq7h4HTEZVwEbUahq1mkmt5glKJ6jVVIeAZwCob+uuHfqA4mg0Sk5OjrOhWmg6bNoPiLiyy+R1qOFPtZpFrWZSq3mC0glqFb15QkRERMQUjhZ2y5YtY+TIkZx11lmcddZZ5Obmsm7dOrdnc88aYHUr26uBe4GPEjqNt9Sq1lQWlE5QK6g11ak1KVsdLezOOeccHn74YXbs2EF5eTlf+tKXmDJlCn/605/cnk9ERERE2qlDr7FrlJ+f3+zrBQsWsGzZMsrKyhgxYoQrg4mIiIhIxzha2J3sxIkTPP3009TV1ZGbm+vGTCIiIiLigOOF3Ztvvklubi4ff/wxvXv3Zs2aNQwfPtzN2dy1G1jQYpupn32jVjMFpTUonaBWtaY+tSYdxwu7oUOHUlFRwQcffMBvfvMbCgoK2Lx5c/Iu7rKByS227afxc2HMola1prKgdIJa1Zr61Jp0rY4Xdt26deOCCy4A4POf/zzbt2/nRz/6ET/72c9cG85VXYG+LbYd9WOQBFCrmYLSGpROUKtaU59ak45rn2MXj8c5duyYW7sTERERkQ5ydMRu3rx55OXlkZWVxT/+8Q+efPJJNm3axPr1692eT0RERETaydHC7uDBg3z1q1/l3XffpU+fPowcOZL169dz1VVXuT2fiIiIiLSTo4XdE0884fYc3rruNNuzsT8x2iRqVWsqC0onqBXUmurUmpStulasiIiIiCG0sBMRERExhBZ2IiIiIobQwk5ERETEEB1680RJSQmVlZWuPHBpaan9lyrgkCu7TF77Gv5Uq1nUaia1miconaBWUx1p/11DltX2hc5CodCl4XB4Wzwe78RUpwoD7u4xiYVIymvKeUKtZgpQq/5tMlBQOiFQrUH6XW1ozbUsq+xM92vvEbv6eDxOUVER0Wi089NhH/2LxWIUAe7sMXmVADELmAr083kYr1UBG1GraQLWGt+I/m0yScB+foPUGpTf1UrgRvuv9W3dt0OnYqPRKDk5OY6GaqnxlG4UcGePyavp5HU/IOLjIInQeDhcrWYJYKv+bTJIAH9+g9QahN/VjtCbJ0REREQM4crC7uGHHyYUCjFnzhw3dueJ94HbgCygO5AJTAJK/RzKC2uA1a1sr8b+dOyPEjqNt9RqXmtQOk8SiH+bgvS8qtXMVlLnd9XRJcVOtn37dn72s58xcuRIN+bxzDTsE9OrgCHAAWADcNjPoUQk8PRvk0hqSJXf1U4t7Gpra5k5cyaPP/44Dz74oFszua4G2ApsAsY2bBsMXOLTPCIioH+bRFJFDanzu9qpU7GzZ89m8uTJTJgwwa15PNG74VYMHPN3FBGRJvq3SSQ1pNLvquMjdk899RSvv/4627dvd3MeT6QBK4FbgeXY754ZC8wAkvsEskO7gQUttpn6mUZqNU9QOgnYv00Bel7V6scg3kql31VHC7u//e1vfOc73+GFF16gR48ebs/kiWnAZOxDqWXAOmAx8HNgln9jeSMbO/Zk+4FnfJjFa2o1rzUonQ0C829TkJ5XtRrZmiq/q44Wdjt27ODgwYPNPtPuxIkTbNmyhZ/+9KccO3aMLl26uDakW3oAVzXcYsAtwD0k1xPiiq5A3xbbjvoxSAKo1TxB6TxJIP5tCtLzqlZjpcLvqqOF3ZVXXsmbb77ZbNvXvvY1hg0bxve///2kXNS1Zjj2+XIRkWSif5tEUkMy/q46Wtilp6fz2c9+ttm2Xr160bdv31O2J4PDwHSgEPtceDpQjn0IdYqPc4lIsOnfJpHUkEq/q53+HLtU0BsYAzwK7AWOA+divwhyvo9ziUiw6d8mkdSQSr+rri3sNm3a5NauXNcdWNhwM951p9mejf1J4CZRq3mtQelsEJh/m4L0vKrVyNZU+l3VtWJFREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG6NCbJ0pKSqisrHTlgUtLS+19Au7sMXmVNv6lCjjk4yCJsK/hT7WaJYCt+rfJIAH8+Q1SaxB+V6s7cN+QZbV9UbdQKHRpOBzeFo/HnU/VijDg7h6TWAgjr5/XGj2vhgpQq36GzaPn1ExBel4bWnMtyyo70/3ae8SuPh6PU1RURDQa7fx02Ef/YrEYRYA7e0xeJUDMAqYC/XwexmtVEN+InlfTVAEbCUyrfoYNo+fUTAF6XiuBG+2/1rd13w6dio1Go82uD9sZjad0o4A7e0xeTYeI+wERHwdJhIZD/3peDdN4SidArfoZNoieUzMF6HntCL15QkRERMQQjhZ29957L6FQqNlt2LBhbs/mqveB24As7E+QzgQmcdKLh02xBljdyvZq7E8C/yih03hOzytmPa9B6TxJIH6GA/a8BuI5BT2vJOfz6viSYiNGjODFF1/8vx2lJfdlZ6dhn5heBQwBDgAbsC/sK6lLz6ukOv0Mm0fPqZlS5Xl1vBpLS0sjMzPTzVk8UwNsBTYBYxu2DQYu8WkecUcNel4ltdWgn2HT1KDn1EQ1pM7z6vg1dlVVVUQiEYYMGcLMmTPZt29f2/9HPundcCsGjvk7irhIz6ukOv0Mm0fPqZlS6Xl1dMRuzJgxrFy5kqFDh/Luu+9y33338YUvfIGdO3eSnp7u9oydlgasBG4FlmO/e2YsMAMY6d9Y3tkNLGixzcDPNNLzipHPa2A6CdjPcECe10A9p6DnleR7Xh0t7PLy8pr+PnLkSMaMGcPgwYP59a9/zc033+zacG6aBkzGPpRaBqwDFgM/B2b5N5Y3srFjT7YfeMaHWTym5xXzntegdDYIzM9wgJ7XwDynoOeV5HteXfm4k7PPPpvPfOYz7Nmzx43deaYHcBUQA17BfiLu8XMgr3QF+ra4Jd+BVNfoeTVMUDpPEoif4YA9r4F4TkHPK8n3vLqysKutrWXv3r0MGjTIjd0lzHCgzu8hxHV6XiXV6WfYPHpOzZSMz6ujU7Hf/e53yc/PZ/Dgwbzzzjvcc889dOnShRtuuMHt+VxxGJgOFGKfC08HyrEPoU7xcS7pHD2vkur0M2wePadmSqXn1dHC7u233+aGG27g8OHD9O/fnyuuuIKysjL69+/v9nyu6A2MAR4F9gLHgXOxXwQ538e5pHP0vEqq08+wefScmimVnldHC7unnnrK7Tk81R1Y2HAz3nWn2Z6N/UngBtHzinnPa1A6GwTmZzhAz2tgnlPQ85qkdK1YEREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIITr05omSkhIqKytdeeDS0lJ7n4A7e0xepY1/qQIO+ThIIjRcMljPq2EaLwUdoFb9DBtEz6mZAvS8VnfgviHLavuibqFQ6NJwOLwtHo87n6oVYcDdPSavILUSwshrBbZKrWZSq3mC0glqNZXdmmtZVtmZ7tbeI3b18XicoqIiotFo54fDPvoXi8UoAtzZY/Iqwb78SGBaLWAq0M/nYbxWBWxEraZRq3mC0glqNdUhGq+/W9/WXTt0KjYajZKTk+NsqBYaT+lGAXf2mLwaDxEHqZV+QMTHQRKh8TSHWs2iVvMEpRPUKnrzhIiIiIgptLATERERMYTjhd3+/fu58cYb6du3Lz179uTCCy+kvLzczdlc9T5wG5CFfWmQTGASJ72DyCCBaV0DrG5lezX25Ww+Sug03gpKa1A6Qa2g1lSn1qRsdXSt2CNHjnD55Zczfvx41q1bR//+/amqquKTn/yk2/O5Zhr2Kw5XAUOAA8AG4LCfQ3kkSK0iIiLyfxwt7BYtWsS5557LihUrmrZlZ2e7NpTbaoCtwCZgbMO2wcAlPs3jpRqC0yoiIiLNOToVu3btWi666CKmT5/OgAEDGD16NI8//rjbs7mmd8OtGDjm7yieC1KriIiINOfoiN1f/vIXli1bxty5c5k/fz7bt2/njjvuoFu3bhQUFLg9Y6elASuBW4Hl2B85MhaYAYz0byxPBKkVgN3AghbbTP2wyqC0BqUT1KrW1KfWpONoYRePx7nooot46KGHABg9ejQ7d+5k+fLlSbmwA/t1Z5OxT1OWAeuAxcDPgVn+jeWJILWSjR17sv00fpCjWYLSGpROUKtaU59ak67V0cJu0KBBDB8+vNm2aDTKb3/7W1eG8koP4KqGWwy4BbgHAxc7BKi1K9C3xbajfgySAEFpDUonqFWtqU+tScfRa+wuv/xydu3a1Wzb7t27GTx4sCtDJcpwoM7vIRIkSK0iIiJB5eiI3Z133slll13GQw89xPXXX89rr73GY489xmOPPeb2fK44DEwHCrFfZ5YOlGOfnpzi41xeCFKriIiINOdoYXfxxRezZs0a5s2bx/333092djZLlixh5syZbs/nit7AGOBRYC9wHDgX+w0G832cywtBahUREZHmHC3sAL785S/z5S9/2c1ZPNMdWNhwM12QWrnuNNuzsT8J3CRBaQ1KJ6gV1Jrq1JqUrbpWrIiIiIghtLATERERMYQWdiIiIiKG0MJORERExBAdevNESUkJlZWVrjxwaWmpvU/AnT0mr9KGP4PUShVwyMdBEmFfw59qNYtazROUTlCrqY60/64hy2r7QmehUOjScDi8LR6Pd2Kq1nZMUl5nzRNqNZNazRSg1jDg8r/sySlAz6laDWW35lqWVXamu7X3iF19PB6nqKiIaDTa+eGwj/7FYjGYCvRzZZfJqwrYiFpNo1YzBaw1vhGKAHf+ZU9OJUDMIjDPaZB+fgPTeojGa9LWt3XXDp2KjUaj5OTkOBuqhaZTuv2AiCu7TF6Nh4jVaha1mimArVHAnX/Zk1PTS2AC9JyqNbj05gkRERERQzha2J133nmEQqFTbrNnz3Z7PnesAVa3sr0a+xOjP0roNN5Sq1pTWVA6IVitwPvAbUAW9hVyMoFJnPSGK1ME6XlVa1K2Orqk2Pbt2zlx4kTT1zt37uSqq65i+vTprg0mIiLmmIb94qBVwBDgALABOOznUCIGcrSw69+/f7OvH374Yc4//3zGjh3rylAiImKOGmArsAlo/K/EYOASn+YRMVmnX2NXX19PUVERhYWFhEIhN2YSERGD9G64FQPH/B1FxHiOjtidrLi4mJqaGmbNmuXCOB7aDSxosc3Uz75Rq5mC0hqUTghMaxqwErgVWI79DtyxwAxgpH9jeScgzyug1iRs7fTC7oknniAvL49IJMnfa5wNTG6xbT+NnwtjFrWqNZUFpRMC1ToNO3UrUAasAxYDPwdm+TeWNwL0vKqVpGvt1MLurbfe4sUXX+SZZ5KsqjVdgb4tth31Y5AEUKuZgtIalE4IVivQA7iq4RYDbgHuwcCFXZCeV7UmnU69xm7FihUMGDCAyZNbLmFFRETObDhQ5/cQIoZxfMQuHo+zYsUKCgoKSEvr9BldEREx1GFgOlCI/Zq6dKAc+1TsFB/nEjGR4xXZiy++yL59+ygsLHRzHhERMUxvYAzwKLAXOA6ci/1mivk+ziViIscLu4kTJ2JZSfh2kNZcd5rt2difGG0Stao1lQWlEwLV2h1Y2HAzXoCeV7WSlK26VqyIiIiIIbSwExERETGEFnYiIiIihtDCTkRERMQQHXrzRElJCZWVla48cGlpqf2XKuCQK7tMXvsa/lSrWdRqpgC2lgDu/MuenEob/xKg51SthjnS/ruG2vPO1lAodGk4HN4Wj8c7MVVrOyYpr7PmCbWaSa1mUqt5gtIJajWV3ZprWVbZme7W3iN29fF4nKKiIqLRaOeHwz76F4vFYCrQz5VdJq8qYCNqNY1azaRW8wSlE9RqqkM0XpO2vq27duhUbDQaJScnx9lQLTSd0u0HRFzZZfJqPESsVrOo1UxqNU9QOkGtojdPiIiIiJjC0cLuxIkTxGIxsrOz6dmzJ+effz4PPPBA8l6JYg2wupXt1difGP1RQqfxllrVmsqC0glqBbWmOrUmZaujS4otWrSIZcuWsWrVKkaMGEF5eTlf+9rX6NOnD3fccYfbM4qIiIhIOzha2L3yyitMmTKFyZMnA3DeeeexevVqXnvtNVeHExEREZH2c3Qq9rLLLmPDhg3s3r0bgD/+8Y+8/PLL5OXluTqciIiIiLSfoyN2P/jBDzh69CjDhg2jS5cunDhxggULFjBz5ky353PPbmBBi21J+pLATlOrmYLSGpROUKtaU59ak46jhd2vf/1rfvWrX/Hkk08yYsQIKioqmDNnDpFIhIKCArdndEc2MLnFtv00fi6MWdSq1lQWlE5Qq1pTn1qTrtXRwu6uu+7iBz/4ATNmzADgwgsv5K233mLhwoXJu7DrCvRtse2oH4MkgFrNFJTWoHSCWtWa+tSadBy9xu7DDz8kHG7+f9qlSxdcv+SYiIiIiLSboyN2+fn5LFiwgKysLEaMGMEf/vAHHnnkEQoLC92eT0RERETaydHC7ic/+QmxWIxvfetbHDx4kEgkwje+8Q3uvvtut+cTERERkXZytLBLT09nyZIlLFmyxOVxPHLdabZnY39itEnUqtZUFpROUCuoNdWpNSlbda1YEREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIITr05omSkhIqKytdeeDS0lL7L1XAIVd2mbz2NfypVrOo1UxqNU9QOkGtpjrS/ruGLKvtC52FQqFLw+HwNtc/gDhEUl5nzRNqNZNazaRW8wSlEwLVGgaCcmmEhtZcy7LKznS/9h6xq4/H4xQVFRGNRjs/HfbRv1gsBlOBfq7sMnlVARtRq2nUaia1miconRC41vhGKALcWZkkr0rgRvuv9W3dt0OnYqPRKDk5OY6GaqnplG4/IOLKLpNX4yFitZpFrWZSq3mC0gmBbI0C7qxMzKA3T4iIiIgYwvHC7h//+Adz5sxh8ODB9OzZk8suu4zt27e7OZt71gCrW9lejf2J0R8ldBpvqVWtqSwonaBWUGuqC1Ir8D5wG5AFdAcygUlAqZ9DtcLRJcUAbrnlFnbu3Mkvf/lLIpEIRUVFTJgwgT//+c986lOfcnNGEREREV9Nw36B2ypgCHAA2AAc9nOoVjg6YvfRRx/x29/+lsWLF/PFL36RCy64gHvvvZcLLriAZcuWuT2jiIiIiG9qgK3AImA8MBi4BJgHXOPfWK1ytLD75z//yYkTJ+jRo0ez7T179uTll192ZTARERGRZNC74VYMHPN3lDY5OhWbnp5Obm4uDzzwANFolIEDB7J69Wq2bdvGBRdc4PaM7tgNLGixzdTP+VGrmYLSGpROUKtaU19AWtOAlcCtwHLsd+GOBWYAI/0bq1WOX2P3y1/+ksLCQj71qU/RpUsXcnJyuOGGG9ixY4eb87knG5jcYtt+4BkfZvGaWtWayoLSCWpVa+oLUOs07NStQBmwDlgM/ByY5d9Yp3C8sDv//PPZvHkzdXV1HD16lEGDBvGVr3yFIUOGuDmfe7oCfVtsO+rHIAmgVjMFpTUonaBWtaa+ILUCPYCrGm4x4BbgHpJrYdfpz7Hr1asXgwYN4siRI6xfv54pU6a4MZeIiIhIUhsO1Pk9RAuOj9itX78ey7IYOnQoe/bs4a677mLYsGF87Wtfc3M+EREREV8dBqYDhdivqUsHyrFPxSbb4SzHC7sPPviAefPm8fbbb5ORkcG0adNYsGABXbt2dXM+EREREV/1BsYAjwJ7gePAudhvppjv41ytcbywu/7667n++uvdnMU7151mezb2p2ObRK1qTWVB6QS1glpTXYBauwMLG27JTteKFRERETGEFnYiIiIihtDCTkRERMQQWtiJiIiIGEILOxERERFDdOhdsSUlJVRWVrrywKWlpfZfqoBDruwyee1r+FOtZlGrmdRqnqB0QiBbSwB3VibJq7oD9w1ZVttX6w2FQpeGw+Ft8Xjc+VSt7hgjLxbcKrWaSa1mUqt5gtIJajWV3ZprWVbZme7W3iN29fF4nKKiIqLRaOeHwz76F4vFYCrQz5VdJq8qYCNqNY1azaRW8wSlE9RqqkPAMwDUt3XXDp2KjUaj5OTkOBuqhaZTuv2AiCu7TF6Nh8PVaha1mkmt5glKJ6hV9OYJEREREVM4Xtht2bKF/Px8IpEIoVCI4uLiZt+3LIu7776bQYMG0bNnTyZMmEBVVVVn53VmDbC6le3V2Jc9+Sih03hLrWpNZUHpBLWCWlOdWpOy1fHCrq6ujlGjRrF06dJWv7948WJ+/OMfs3z5cl599VV69erFpEmT+Pjjjx0PKyIiIiKn16HX2J0sLy+PvLy8Vr9nWRZLlizhP/7jP5gyZQoAv/jFLxg4cCDFxcXMmDHD6cOKiIiIyGl48hq76upq3nvvPSZMmNC0rU+fPowZM4Zt27Z58ZAiIiIigef4iN2ZvPfeewAMHDiw2faBAwc2fS/hdgMLWmwz9bNv1GqmoLQGpRPUqtbUp9ak48nCLillA5NbbNtP4+fCmEWtak1lQekEtao19ak16Vo9WdhlZmYCcODAAQYNGtS0/cCBA3zuc5/z4iHb1hXo22LbUT8GSQC1mikorUHpBLWqNfWpNel48hq77OxsMjMz2bBhQ9O2o0eP8uqrr5Kbm+vFQ4qIiIgEnuMjdrW1tezZs6fp6+rqaioqKsjIyCArK4s5c+bw4IMP8ulPf5rs7GxisRiRSIRrr73WjblFREREpAXHC7vy8nLGjx/f9PXcuXMBKCgoYOXKlXzve9+jrq6Or3/969TU1HDFFVfw/PPP06NHj85PLSIiIiKncLywGzduHJZ1+reDhEIh7r//fu6//36nD+Ge606zPRv7E6NNola1prKgdIJaQa2pTq1J2aprxYqIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAzRoTdPlJSUUFlZ6coDl5aW2n+pAg65ssvkta/hT7WaRa1mUqt5gtIJajXVkfbfNXSmd7Y23SkUujQcDm+Lx+OdmKq1HZOU11nzhFrNpFYzqdU8QekEtZrKbs21LKvsTHdr7xG7+ng8TlFREdFotPPDYR/9i8ViMBXo58ouk1cVsBG1mkatZlKreYLSCWo11SEar0lb39ZdO3QqNhqNkpOT42yoFppO6fYDIq7sMnk1HiJWq1nUaia1miconaBW0ZsnREREREzheGG3ZcsW8vPziUQihEIhiouLm33/mWeeYeLEifTt25dQKERFRUUnR+2ENcDqVrZXY39i9EcJncZbalVrKgtKJ6gV1Jrq1JqUrY4XdnV1dYwaNYqlS5ee9vtXXHEFixYtcjyciIiIiLSf42vF5uXlkZeXd9rv33TTTQD89a9/dfoQIiIiItIBeo2diIiIiCEcH7FLObuBBS22mfrZN2o1U1Bag9IJalVr6lNr0gnOwi4bmNxi234aPxfGLGpVayoLSieoVa2pT61J1xqchV1XoG+LbUf9GCQB1GqmoLQGpRPUqtbUp9ako9fYiYiIiBjC8RG72tpa9uzZ0/R1dXU1FRUVZGRkkJWVxd///nf27dvHO++8A8CuXbsAyMzMJDMzs5Nji4iIiEhLjo/YlZeXM3r0aEaPHg3A3LlzGT16NHfffTcAa9euZfTo0UyebJ+QnjFjBqNHj2b58uUujC0iIiIiLTk+Yjdu3Dgs6/RvB5k1axazZs1yunt3XXea7dnYnxhtErWqNZUFpRPUCmpNdWpNyla9xk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImKIDr15oqSkhMrKSlceuLS01P5LFXDIlV0mr30Nf6rVLGo1k1rNE5ROUKupjrT/rqEzvbO16U6h0KXhcHhbPB7vxFSt7ZikvM6aJwLUGgZc/klJWmo1VIB+XwPTGpROUKup7NZcy7LKznS39h6xq4/H4xQVFRGNRjs/HPbRv1gsBlOBfq7sMnlVARsJTGt8IxQB7vykJK8SIIZaTVMCxCwC8/saiH+bgtIJajXVIRqvSVvf1l07dCo2Go2Sk5PjbKgWmk7p9gMiruwyeTUeIg5QaxRw5ycleTW+KEGtZml6sUmAfl+Nbw1KJ6hV9OYJEREREVM4Xtht2bKF/Px8IpEIoVCI4uLipu8dP36c73//+1x44YX06tWLSCTCV7/61abrxibcGmB1K9ursT8x+qOETuOtILUC7wO3AVlAdyATmASU+jmUR9RqWGuQflfVqtZUl0Ktjhd2dXV1jBo1iqVLl57yvQ8//JDXX3+dWCzG66+/zjPPPMOuXbu45pprOjWsSEvTgD8Aq4DdwFpgHHDYx5m8olYzW0VE3OT4WrF5eXnk5eW1+r0+ffrwwgsvNNv205/+lEsuuYR9+/aRlZXl9GFFmtQAW4FNwNiGbYOBS3yax0s1qNXEVhERtyXsNXYffPABoVCIs88+O1EPKYbr3XArBo75O4rn1CoiIu3h+IhdR3z88cd8//vf54YbbuCss85KxEOeajewoMU2Uz/7JiCtacBK4FZgOfa7NccCM4CR/o3lCbWa2RqU31VArWpNfSnS6vnC7vjx41x//fVYlsWyZcu8frjTywYmt9i2n8bPhTFLgFqnYaduBcqAdcBi4OfALP/G8oRaDWwN0O+qWlFrqkuRVk8Xdo2LurfeeouXXnrJv6N1AF2Bvi22HfVjkAQIUivQA7iq4RYDbgHuwbAFQAO1GtYapN9VtZpJrUnHs9fYNS7qqqqqePHFF+nbt+X/N0S8MRyo83uIBFGriIiczPERu9raWvbs2dP0dXV1NRUVFWRkZDBo0CD+9V//lddff53nnnuOEydO8N577wGQkZFBt27dOj+5BN5hYDpQiP3aq3SgHPuU3RQf5/KCWs1sFRFxm+OFXXl5OePHj2/6eu7cuQAUFBRw7733snbtWgA+97nPNfu/27hxI+PGjXP6sCJNegNjgEeBvcBx4FzsF93P93EuL6jVzFYREbc5XtiNGzcOyzr920HO9L2Eu+4027OxPzHaJAFq7Q4sbLiZTq0GCtDvqlpRa6pLoVZdK1ZERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImKIDr15oqSkhMrKSlceuLS01P5LFXDIlV0mr30NfwaotQRw5ycleTX8BKvVMI2tQfp9Nb41KJ2gVlMdaf9dQ+1592ooFLo0HA5vi8fjnZiqtR2TlNdZ80SAWsOAyz8pSUuthgrQ72tgWoPSCWo1ld2aa1lW2Znu1t4jdvXxeJyioiKi0Wjnh8M++heLxWAq0M+VXSavKmAjgWmNb4QiwJ2flORVgn2pK7WapQSIWQTm9zUQ/zYFpRPUaqpDNF6Ttr6tu3boVGw0GiUnJ8fZUC00ndLtB0Rc2WXyajxEHKDWKODOT0ryajwlqVazNJ1qDtDvq/GtQekEtYrePCEiIiJiCi3sRERERAzheGG3ZcsW8vPziUQihEIhiouLm33/3nvvZdiwYfTq1YtPfvKTTJgwgVdffbWz8zqzBljdyvZq7EuBfJTQabwVpFbgfeA2IAv7UlSZwCROemejQdRqWGuQflfVqtZUl0Ktjhd2dXV1jBo1iqVLl7b6/c985jP89Kc/5c033+Tll1/mvPPOY+LEibz//vuOhxVpaRrwB2AVsBtYC4wDDvs4k1fUamariIibOvTmiZPl5eWRl5d32u//27/9W7OvH3nkEZ544gneeOMNrrzySqcPK9KkBtgKbALGNmwbDFzi0zxeqkGtJraKiLgtIa+xq6+v57HHHqNPnz6MGjUqEQ8pAdC74VYMHPN3FM+pVURE2sPxEbv2eO6555gxYwYffvghgwYN4oUXXqBfP58+bGY3sKDFNlM/1DAgrWnASuBWYDn2x3CMBWYAI/0byxNqNbM1KL+rgFrVmvpSpNXThd348eOpqKjg0KFDPP7441x//fW8+uqrDBgwwMuHbV02MLnFtv00fuCfWQLUOg07dStQBqwDFgM/B2b5N5Yn1Gpga4B+V9WKWlNdirR6eiq2V69eXHDBBVx66aU88cQTpKWl8cQTT3j5kKfXFejb4pbuzyieC1Ir0AO4CvvKCK9g/4f/Hj8H8pBaDROk31W1mkmtSSehn2MXj8c5dkyvmhFvDQfq/B4iQdQqIiInc3wqtra2lj179jR9XV1dTUVFBRkZGfTt25cFCxZwzTXXMGjQIA4dOsTSpUvZv38/06dPd2VwkcPAdKAQ+7VX6UA59im7KT7O5QW1mtkqIuI2xwu78vJyxo8f3/T13LlzASgoKGD58uX87//+L6tWreLQoUP07duXiy++mK1btzJixIjOTy2C/c7JMcCjwF7gOHAu9ovu5/s4lxfUamariIjbHC/sxo0bh2Wd/u0gzzyTRK8mvO4027OxPzHaJAFq7Q4sbLiZTq0GCtDvqlpRa6pLoVZdK1ZERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImKIDr15oqSkhMrKSlceuLS01P5LFXDIlV0mr30NfwaotQRw5ycleTX8BKvVMI2tQfp9Nb41KJ2gVlMdaf9dQ2d6Z2vTnUKhS8Ph8LZ4PN6JqU4VBtzdY/JSq5nUaqgQSXkNSE8EpTUonaBWU9mtuZZllZ3pbu09Ylcfj8cpKioiGo12fjjso3+xWIwiwJ09Jq8S7MsiqdUsajVTCRCzgKlAP5+H8VoVsBHzW4PSCWo11SEar0lb39ZdO3QqNhqNkpOT42yoFhpP6UYBd/aYvBpPXanVLGo1U9Op5n5AxMdBEqHx9JXprUHpBLWK3jwhIiIiYgrHC7stW7aQn59PJBIhFApRXFx82vt+85vfJBQKsWTJEqcP12nvA7cBWdifbJ8JTOKkF0obRK1qTXWBaF0DrG5lezX2J9l/lNBpvKVWtaa6FGp1fEmxuro6Ro0aRWFhIVOnTj3t/dasWUNZWRmRiL/HSadhn5heBQwBDgAbsC84bhq1qjXVBalVRMRNjhd2eXl55OXlnfE++/fv59vf/jbr169n8uTJTh+q02qArcAmYGzDtsHAJT7N46Ua1KrW1FZDcFpFRNzm2Wvs4vE4N910E3fddRcjRozw6mHapXfDrRg45usk3lOrmdQqIiLt4fiIXVsWLVpEWload9xxh1cP0W5pwErgVmA59rv6xgIzgJH+jeUJtao11QWpld3AghbbTP1MLrWaSa1Jx5OF3Y4dO/jRj37E66+/TigU8uIhOmwaMBn7FE8ZsA5YDPwcmOXfWJ5Qq1pTXWBas7FDT7afxs+rMota1ZrqUqTVk1OxW7du5eDBg2RlZZGWlkZaWhpvvfUW//7v/855553nxUO2Sw/gKuwPWn0F+z8Q9/g2jbfUaia1GqYr0LfFLd3XibyjVjOpNel4srC76aabeOONN6ioqGi6RSIR7rrrLtavX+/FQzoyHKjze4gEUauZ1CoiIidzfCq2traWPXv2NH1dXV1NRUUFGRkZZGVl0bdv32b379q1K5mZmQwdOtT5tA4dBqYDhdiv0UkHyrFP7UxJ+DTeUqtaU12QWkVE3OZ4YVdeXs748eObvp47dy4ABQUFrFy5stODuak3MAZ4FNgLHAfOxX5x9nwf5/KCWtWa6oLUKiLiNscLu3HjxmFZ7X87yF//+lenD9Vp3YGFDTfTqdVMajXQdafZno39SfYmUataU10KtepasSIiIiKG0MJORERExBBa2ImIiIgYQgs7EREREUN06M0TJSUlVFZWuvLApaWl9j4Bd/aYvEob/lSrWdRqpsZWqoBDPg6SCPsa/jS9NSidoFZTHWn/XUPteWdrKBS6NBwOb4vH452Y6lRhwN09Ji+1mkmthgqRlNeA9ERQWoPSCWo1ld2aa1lW2Znu1t4jdvXxeJyioiKi0Wjnh8M++heLxSgC3Nlj8irBviySWs2iVjOVADELmAr083kYr1UBGzG/NSidoFZTHaLxmrT1bd21Q6dio9EoOTk5zoZqofGUbhRwZ4/Jq/HUlVrNolYzNZ1q7gdEfBwkERpPX5neGpROUKvozRMiIiIipnC8sNuyZQv5+flEIhFCoRDFxcXNvj9r1ixCoVCz29VXX93ZeR17H7gNyML+ZPtMYBInvVDaIGpVa6oLROsaYHUr26uxP8n+o4RO4y21qjXVpVCr40uK1dXVMWrUKAoLC5k6dWqr97n66qtZsWJF09fdu3d3+nCdNg37xPQqYAhwANiAfcFx06hVrakuSK0iIm5yvLDLy8sjLy/vjPfp3r07mZmZTh/CNTXAVmATMLZh22DgEp/m8VINalVraqshOK0iIm7z9DV2mzZtYsCAAQwdOpTbbruNw4f9+d/bvRtuxcAxXyZIHLWaSa0iItIejo/YteXqq69m6tSpZGdns3fvXubPn09eXh7btm2jS5cuXj1sq9KAlcCtwHLsd/WNBWYAIxM6iffUqtZUF6RWdgMLWmwz9TO51GomtSYdzxZ2M2bMaPr7hRdeyMiRIzn//PPZtGkTV155pVcPe1rTgMnYp3jKgHXAYuDnwKyET+Mttao11QWmNRs79GT7afy8KrOoVa2pLkVaE/ZxJ0OGDKFfv37s2bMnUQ95ih7AVdgftPoK9n8g7vFtGm+p1UxqNUxXoG+LW7qvE3lHrWZSa9JJ2MLu7bff5vDhwwwaNChRD9mm4UCd30MkiFrNpFYRETmZ41OxtbW1zY6+VVdXU1FRQUZGBhkZGdx3331MmzaNzMxM9u7dy/e+9z0uuOACJk2a5MrgHXEYmA4UYr9GJx0oxz61MyXh03hLrWpNdUFqFRFxm+OFXXl5OePHj2/6eu7cuQAUFBSwbNky3njjDVatWkVNTQ2RSISJEyfywAMP+PJZdr2BMcCjwF7gOHAu9ouz5yd8Gm+pVa2pLkitIiJuc7ywGzduHJZ1+reDrF+/3umuXdcdWNhwM51azaRWA113mu3Z2J9kbxK1qjXVpVCrrhUrIiIiYggt7EREREQMoYWdiIiIiCG0sBMRERExRIfePFFSUkJlZaUrD1xaWmrvE3Bnj8mrtOFPtZpFrWZqbKUKOOTjIImwr+FP01uD0glqNdWR9t81dKZ3tjbdKRS6NBwOb4vH452Y6lRhwN09Ji+1mkmthgqRlNeA9ERQWoPSCWo1ld2aa1lW2Znu1t4jdvXxeJyioiKi0Wjnh8M++heLxSgC3Nlj8irBviySWs2iVjOVADELmAr083kYr1UBGzG/NSidoFZTHaLxmrT1bd21Q6dio9EoOTk5zoZqofGUbhRwZ4/Jq/HUlVrNolYzNZ1q7gdEfBwkERpPX5neGpROUKvozRMiIiIipnC8sNuyZQv5+flEIhFCoRDFxcWn3KeyspJrrrmGPn360KtXLy6++GL27dt36s4S4H3gNiAL+5PtM4FJnPRCaYOoVa2pLhCta4DVrWyvxv4k+48SOo231KrWVJdCrY4vKVZXV8eoUaMoLCxk6tSpp3x/7969XHHFFdx8883cd999nHXWWfzpT3+iR48enRrYqWnYJ6ZXAUOAA8AG7AuOm0atak11QWoVEXGT44VdXl4eeXl5p/3+//t//49/+Zd/YfHixU3bzj//fKcP1yk1wFZgEzC2Ydtg4BJfpvFWDWpVa2qrITitIiJu8+Q1dvF4nN///vd85jOfYdKkSQwYMIAxY8a0ero2EXo33IqBY75MkDhqNZNaRUSkPRwfsTuTgwcPUltby8MPP8yDDz7IokWLeP7555k6dSobN25k7Nixbe/ERWnASuBWYDn2u/rGAjOAkQmdxHtqVWuqC1Iru4EFLbaZ+plcajWTWpOOJwu7xg8ynjJlCnfeeScAn/vc53jllVdYvnx5whd2YL9mZzL2KZ4yYB2wGPg5MCvh03hLrWpNdYFpzcYOPdl+Gj+vyixqVWuqS5FWT07F9uvXj7S0NIYPH95sezQa9e1dsQA9gKuwP2j1Fez/QNzj2zTeUquZ1GqYrkDfFrd0XyfyjlrNpNak48nCrlu3blx88cXs2rWr2fbdu3czePBgLx7SkeFAnd9DJIhazaRWERE5meNTsbW1tezZs6fp6+rqaioqKsjIyCArK4u77rqLr3zlK3zxi19k/PjxPP/88/zud79j06ZNbszdIYeB6UAh9mt00oFy7FM7UxI+jbfUqtZUF6RWERG3OV7YlZeXM378+Kav586dC0BBQQErV67kuuuuY/ny5SxcuJA77riDoUOH8tvf/pYrrrii81N3UG9gDPAosBc4DpyL/eLs+QmfxltqVWuqC1KriIjbHC/sxo0bh2Wd+e0ghYWFFBYWOn0I13QHFjbcTKdWM6nVQNedZns29ifZm0Stak11KdSqa8WKiIiIGEILOxERERFDaGEnIiIiYggt7EREREQMoYWdiIiIiCE69K7YkpISKisrXXng0tJSe5+AO3tMXqUNf6rVLGo1U2MrVcAhHwdJhMYLAZneGpROUKupjrT/rqG2PrIEIBQKXRoOh7c1XgPWLWHA3T0mL7WaSa1mUqt5gtIJwWolBLS9jDGD3ZprWVbZme7W3iN29fF4nKKiIqLRaOeHwz76F4vFKALc2WPyKsG+3qVazaJWM6nVPEHphAC2WsBUoJ/Pw3jtEPAMAPVt3bVDp2Kj0Sg5OTnOhmqh8ZRuFHBnj8mr8dSVWs2iVjOp1TxB6YRgttIPiPg4SJLRmydEREREDOF4Ybdlyxby8/OJRCKEQiGKi4ubfT8UCrV6+8///M/OzuzI+8BtQBb2JYsygUmc9EJpg6hVralOrea1BqUT1Gpk6xpgdSvbq7EvKfZRQqc5I8fXiq2rq2PUqFEUFhYyderUU77/7rvvNvt63bp13HzzzUybNs3pQ3bKNOwT06uAIcABYANw2JdpvKVWtaY6tZrXGpROUKupranC8cIuLy+PvLy8034/MzOz2dfPPvss48ePZ8iQIU4f0rEaYCuwCRjbsG0wcEnCJ/FeDWpVa2qrQa2mtdYQjE5Qq6mtqSQhr7E7cOAAv//977n55psT8XCn6N1wKwaO+TJB4qjVTGo1U1Bag9IJahX/JWRht2rVKtLT01s9ZZsIacBK7EPFZwOXA/OBN3yZxltqVWuqU6t5rUHpBLWa2grAbmBBi9uvfJ2oVQlZ2P33f/83M2fOpEePHol4uFZNA94B1gJXYx86zsH+oTSNWtWa6tRqXmtQOkGtpraSDXyzxe0aXydqlecLu61bt7Jr1y5uueUWrx+qTT2Aq7A/vPEVYBZwj58DeUitZlKrmYLSGpROUKuRrV2Bvi1u6b5O1CrPF3ZPPPEEn//85xk1apTXD9Vhw4E6v4dIELWaSa1mCkprUDpBrZI4jt8VW1tby549e5q+rq6upqKigoyMDLKysgA4evQoTz/9ND/84Q87P2knHAamA4XASOwFdjmwGJji41xeUKtaU51azWsNSieo1dTWVOJ4YVdeXs748eObvp47dy4ABQUFrFy5EoCnnnoKy7K44YYbOjdlJ/UGxgCPAnuB48C5wK3YL/Q0iVrVmurUal5rUDpBraa2phLHC7tx48ZhWdYZ7/P1r3+dr3/9604fwjXdgYUNN9Op1UxqNVNQWoPSCWo11nWn2Z6NfeWJJKJrxYqIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAzRoTdPlJSUUFlZ6coDl5aW2vsE3Nlj8ipt+FOtZlGrmdRqnqB0QjBbqQIO+ThIIhxp/11Dbb2zFSAUCl0aDoe3xePxTkx1qjDg7h6Tl1rNpFYzqdU8QekEtZqqoTXXsqyyM92vvUfs6uPxOEVFRUSj0c5Ph330LxaLUQS4s8fkVYJ9qRW1mkWtZlKreYLSCWo1VSVwo/3X+rbu26FTsdFolJycHEdDtdR4SjeKfcFgkzUeDlerWdRqJrWaJyidoFbRmydEREREjOF4Ybdlyxby8/OJRCKEQiGKi4ubfb+2tpbbb7+dc845h549ezJ8+HCWL1/e2Xkdex+4DcjC/rTsTGASJ7340iBqVWuqU6t5rUHpBLWq1V+OLylWV1fHqFGjKCwsZOrUqad8f+7cubz00ksUFRVx3nnn8T//8z9861vfIhKJcM0113RqaCemYZ+YXgUMAQ4AG7AvYmwatao11anVvNagdIJa1eovxwu7vLw88vLyTvv9V155hYKCAsaNGwfY14392c9+xmuvvZbwhV0NsBXYBIxt2DYYuCShUyRGDWpVa2qrQa2mtdYQjE5Qq1r959lr7C677DLWrl3L/v37sSyLjRs3snv3biZOnOjVQ55W74ZbMXAs4Y+eWGo1k1rNFJTWoHSCWk2VSq2eLex+8pOfMHz4cM455xy6devG1VdfzdKlS/niF7/o1UOeVhqwEvvw6dnA5cB84I2ET+I9tao11anVvNagdIJa1eo/Txd2ZWVlrF27lh07dvDDH/6Q2bNn8+KLL3r1kGc0DXgHWAtcjX04NQf7iTKNWtWa6tRqXmtQOkGtavWZZVlt3rBnt3bs2GG1BrDWrFnT9PWHH35ode3a1Xruueea3e/mm2+2Jk2aZFmWZRUVFVmAtQMsy6fbzWBlJeBxikCtalWrWtUa0E61qrWztx0NrUBOW2s2T47YHT9+nOPHjxMON999ly5dcPuyZJ0xHKjze4gEUauZ1GqmoLQGpRPUaqpkbHX8rtja2lr27NnT9HV1dTUVFRVkZGSQlZXF2LFjueuuu+jZsyeDBw9m8+bN/OIXv+CRRx5xZfCOOAxMBwqBkUA6UA4sBqYkfBpvqVWtqU6t5rUGpRPUqlb/OV7YlZeXM378+Kav586dC0BBQQErV67kqaeeYt68ecycOZO///3vDB48mAULFvDNb36z81N3UG9gDPAosBc4DpwL3Ir94keTqFWtqU6t5rUGpRPUqlb/OV7YjRs3jobX37UqMzOTFStWON29q7oDCxtuplOrmdRqpqC0BqUT1GqqVGrVtWJFREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG6NCbJ0pKSqisrHTlgUtLS+19Au7sMXmVNvypVrOo1UxqNU9QOkGtpqruwH1DZ3pna9OdQqFLw+HwNrc/XDgMJM/HFXtLrWZSq5nUap6gdIJaTdXQmmtZVtmZ7tfeI3b18XicoqIiotFo56fDPvoXi8UoAtzZY/IqAWKgVsOo1UxqNU9QOkGtpqoEbrT/Wt/WfTt0KjYajZKTk+NoqJYaT+lGsS9Ea7LGQ8RqNYtazaRW8wSlE9QqevOEiIiIiDEcL+y2bNlCfn4+kUiEUChEcXFxs+8fOHCAWbNmEYlE+MQnPsHVV19NVVVVZ+d17H3gNiAL+xOkM4FJ/N+LL02iVrWmOrWa1xqUTlCrWv3l+JJidXV1jBo1isLCQqZOndrse5Zlce2119K1a1eeffZZzjrrLB555BEmTJjAn//8Z3r16tXpwTtqGvaJ6VXAEOAAsAH7wr6mUataU51azWsNSieoVa0+syyrzRv26Wtrx44dVmsAa82aNU1f79q1ywKsnTt3Nm07ceKE1b9/f+vxxx+3LMuyioqKLMDaAZbl8e0IWIC1KQGP1dqtqOHx1apWtao1iK1B6VSrWr267Wh4fCCnrTWbJ6+xO3bsGAA9evRo2hYOh+nevTsvv/yyFw95Rr0bbsXAsYQ/emKp1UxqNVNQWoPSCWo1VSq1erKwGzZsGFlZWcybN48jR45QX1/PokWLePvtt3n33Xe9eMgzSgNWYh8+PRu4HJgPvJHwSbynVrWmOrWa1xqUTlCrWv3nycKua9euPPPMM+zevZuMjAw+8YlPsHHjRvLy8giH/Xkj7jTgHWAtcDWwCfv88kpfpvGWWtWa6tRqXmtQOkGtavWZF6+xO1lNTY118OBBy7Is65JLLrG+9a1vJfw1dqe73QxWlmGvA1CrWtWq1lRpDUqnWtXa2Zvvr7E7WZ8+fejfvz9VVVWUl5czZcoUrx+y3YYDdX4PkSBqNZNazRSU1qB0glpNlYytjj/upLa2lj179jR9XV1dTUVFBRkZGWRlZfH000/Tv39/srKyePPNN/nOd77Dtddey8SJE10ZvCMOA9OBQmAkkA6UA4uB5FlmukOtak11ajWvNSidoFa1+s/xwq68vJzx48c3fT137lwACgoKWLlyJe+++y5z587lwIEDDBo0iK9+9avEYrHOT+xAb2AM8CiwFzgOnAvciv3iR5OoVa2pTq3mtQalE9Sq1iTgxmvsnEiG19gl6pYMrwNQq1rVqtagtgalU63m3pLqNXYiIiIikhha2ImIiIgYQgs7EREREUNoYSciIiJiiA69K7akpITKykpXHri0tNTeJ+DOHpNXacOfajWLWs2kVvMEpRPUaqrqDtw31PCu1zPfKRS6NBwOb4vH486nakUYcHePyUutZlKrmdRqnqB0glpN1dCaa1lW2Znu194jdvXxeJyioiKi0Wjnp8M++heLxSgC3Nlj8ioBYqBWw6jVTGo1T1A6Qa2mqgRutP9a39Z9O3QqNhqNkpOT42iolhpP6UaxPyTPZI2HiNVqFrWaSa3mCUonqFX05gkRERERY2hhJyIiImIIRwu7hQsXcvHFF5Oens6AAQO49tpr2bVrV7P7fPzxx8yePZu+ffvSu3dvpk2bxoEDB1wZ2on3gduALKA7kAlM4v/eVWMStao11anVvNagdIJa1eqvDr3GrtHmzZuZPXs2F198Mf/85z+ZP38+EydO5M9//jO9evUC4M477+T3v/89Tz/9NH369OH2229n6tSpTR9zkmjTsF9xuAoYAhwANgCHfZnGW2pVa6pTq3mtQekEtarVZ21dTLbh41ByAGvHjh1Waw4ePGgB1ubNmy3Lsqyamhqra9eu1tNPP910n8rKSguwtm3bZlmWZRUVFSXs4r1HGi6euykAFypWq1rVqtZkaw1Kp1rV6tVtR8PjAzltrdlceY3dBx98AEBGRgYAO3bs4Pjx40yYMKHpPsOGDSMrK4tt27a58ZAd0rvhVgwcS/ijJ5ZazaRWMwWlNSidoFZTpVJrpxd28XicOXPmcPnll/PZz34WgPfee49u3bpx9tlnN7vvwIEDee+99zr7kB2WBqzEPnx6NnA5MB94I+GTeE+tak11ajWvNSidoFa1+q/TC7vZs2ezc+dOnnrqKTfm8cw04B1gLXA1sAn7/PJK/0byjFrVmurUal5rUDpBrWr1WWdeYzd79mzrnHPOsf7yl780275hwwYLsI4cOdJse1ZWlvXII48k/DV2p7vdDFaWYa8DUKta1arWVGkNSqda1drZm+evsbMsi9tvv501a9bw0ksvkZ2d3ez7n//85+natSsbNmxo2rZr1y727dtHbm6uk4f0xHCgzu8hEkStZlKrmYLSGpROUKupkrHV0cedzJ49myeffJJnn32W9PT0ptfN9enTh549e9KnTx9uvvlm5s6dS0ZGBmeddRbf/va3yc3N5dJLL3U1oD0OA9OBQmAkkA6UA4uBKQmfxltqVWuqU6t5rUHpBLWq1X+OFnbLli0DYNy4cc22r1ixglmzZgHw6KOPEg6HmTZtGseOHWPSpEn813/9V6eGdao3MAZ4FNgLHAfOBW7FfvGjSdSq1lSnVvNag9IJalWr/xwt7Bped3dGPXr0YOnSpSxdutTJQ7iqO7Cw4WY6tZpJrWYKSmtQOkGtpkqlVl0rVkRERMQQWtiJiIiIGEILOxERERFDaGEnIiIiYogOvXmisrLStQeurq629+naHpNXdcOfajWLWs2kVvMEpRPUaqqONIba8w7XUCiUFQ6Hd8Xj8R7OxzpVGIi7ucMkplYzqdVMajVPUDpBraYKw8dxGGpZ1r4z3a9dCzuwF3dAPzeGO0k3oN7lfSYrtZpJrWZSq3mC0glqNdWhthZ10IGFnYiIiIgkN715QkRERMQQWtiJiIiIGEILOxERERFDaGEnIiIiYggt7EREREQMoYWdiIiIiCG0sBMRERExhBZ2IiIiIobQwk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImIILexEREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG0MJORERExBBa2ImIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIIbSwExERETGEFnYiIiIihtDCTkRERMQQWtiJiIiIGEILOxERERFDaGEnIiIiYggt7EREREQMoYWdiIiIiCG0sBMRERExhBZ2IiIiIobQwk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImIILexEREREDKGFnYiIiIghtLATERERMcT/B73X1paF7bdVAAAAAElFTkSuQmCC\n" - }, - "metadata": {} - } - ] - }, - { - "cell_type": "code", - "source": [ - "# @title Test Best Policy on New Episodes\n", - "\n", - "env = BlackjackEnv()\n", - "NUM_EPISODES = 100000\n", - "\n", - "for _ in range(NUM_EPISODES):\n", - " state = env.reset()\n", - " while env.done == False:\n", - " if state[0] == 19:\n", - " # Player was dealt with a Blackjack\n", - " next_state, reward, env.done, info = env.step(1)\n", - " total_rewards += reward\n", - " else:\n", - " Q_index = get_Q_state_index(state)\n", - " action = new_Q_binary[Q_index]\n", - " new_state, reward, done, desc = env.step(action)\n", - " state = new_state\n", - " total_rewards += reward\n", - "avg_reward = total_rewards / NUM_EPISODES\n", - "print(f'Average Reward: {avg_reward}')" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "7sTNqbeZga_3", - "outputId": "97878c73-7419-41d4-d600-68bfd314b8f4" - }, - "execution_count": 24, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Average Reward: -4.677\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [], - "metadata": { - "id": "WL-NmCiphQiq" - }, - "execution_count": null, - "outputs": [] - } - ], - "metadata": { - "colab": { - "provenance": [], - "gpuType": "T4" - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python" - }, - "accelerator": "GPU" - }, - "nbformat": 4, - "nbformat_minor": 0 -} From f83df9bf0204003b5aa3bd204efff207ff5d4046 Mon Sep 17 00:00:00 2001 From: Sushant Nair Date: Tue, 28 Jan 2025 23:46:21 +0530 Subject: [PATCH 6/8] Created using Colab --- .../Monte_Carlo_Blackjack_Demonstration.ipynb | 1081 +++++++++++++++++ 1 file changed, 1081 insertions(+) create mode 100644 chapter_05_monte_carlo_methods/Monte_Carlo_Blackjack_Demonstration.ipynb diff --git a/chapter_05_monte_carlo_methods/Monte_Carlo_Blackjack_Demonstration.ipynb b/chapter_05_monte_carlo_methods/Monte_Carlo_Blackjack_Demonstration.ipynb new file mode 100644 index 0000000..8c4a24e --- /dev/null +++ b/chapter_05_monte_carlo_methods/Monte_Carlo_Blackjack_Demonstration.ipynb @@ -0,0 +1,1081 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Installing Dependencies" + ], + "metadata": { + "id": "ptQKksAbExoX" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JuDWA5loOsFM", + "outputId": "b9cdd35b-2703-4828-ad14-494bafe11476" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting gymnasium\n", + " Downloading gymnasium-0.29.1-py3-none-any.whl.metadata (10 kB)\n", + "Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (1.26.4)\n", + "Requirement already satisfied: cloudpickle>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (2.2.1)\n", + "Requirement already satisfied: typing-extensions>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (4.12.2)\n", + "Collecting farama-notifications>=0.0.1 (from gymnasium)\n", + " Downloading Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)\n", + "Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)\n", + "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/953.9 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[90m╺\u001b[0m\u001b[90m━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m501.8/953.9 kB\u001b[0m \u001b[31m14.6 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m953.9/953.9 kB\u001b[0m \u001b[31m17.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)\n", + "Installing collected packages: farama-notifications, gymnasium\n", + "Successfully installed farama-notifications-0.0.4 gymnasium-0.29.1\n" + ] + } + ], + "source": [ + "!pip install gymnasium" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Importing Dependencies" + ], + "metadata": { + "id": "q6ml-ZqBE1DP" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cbt-NEiMO1bA" + }, + "outputs": [], + "source": [ + "import random\n", + "import numpy as np\n", + "import gymnasium as gym\n", + "from gym import error, spaces, utils\n", + "from gym.utils import seeding\n", + "import enum\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import time" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Set up the game" + ], + "metadata": { + "id": "RSlZaAJ3E4YL" + } + }, + { + "cell_type": "code", + "source": [ + "# Define the rank and suit of a card\n", + "\n", + "ranks = {\n", + " \"two\": 2,\n", + " \"three\": 3,\n", + " \"four\": 4,\n", + " \"five\": 5,\n", + " \"six\": 6,\n", + " \"seven\": 7,\n", + " \"eight\": 8,\n", + " \"nine\": 9,\n", + " \"ten\": 10,\n", + " \"jack\": 10,\n", + " \"queen\": 10,\n", + " \"king\": 10,\n", + " \"ace\": (1, 11),\n", + "}\n", + "\n", + "class Suit(enum.Enum):\n", + " spades = \"spades\"\n", + " clubs = \"clubs\"\n", + " diamonds = \"diamonds\"\n", + " hearts = \"hearts\"\n", + "\n", + "# Now to define the card and deck\n", + "# Implement the shuffle, peek and deal functions for the deck\n", + "\n", + "class Card:\n", + " def __init__(self, suit, rank, value):\n", + " self.suit = suit\n", + " self.rank = rank\n", + " self.value = value\n", + "\n", + " def __str__(self):\n", + " return self.rank + \" of \" + self.suit.value\n", + "\n", + "class Deck:\n", + " def __init__(self, num=1):\n", + " self.cards = []\n", + " for i in range(num):\n", + " for suit in Suit:\n", + " for rank, value in ranks.items():\n", + " self.cards.append(Card(suit, rank, value))\n", + "\n", + " def shuffle(self):\n", + " random.shuffle(self.cards)\n", + "\n", + " def deal(self):\n", + " return self.cards.pop(0)\n", + "\n", + " def peek(self):\n", + " if len(self.cards) > 0:\n", + " return self.cards[0]\n", + "\n", + " def add_to_bottom(self, card):\n", + " self.cards.append(card)\n", + "\n", + " def __str__(self):\n", + " result = \"\"\n", + " for card in self.cards:\n", + " result += str(card) + \"\\n\"\n", + " return result\n", + "\n", + " def __len__(self):\n", + " return len(self.cards)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "D3df2N-JrKiE", + "outputId": "a36dc6fc-51b2-4fb0-bfb5-380e143cc4b4" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n", + " and should_run_async(code)\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Define the logic for evaluating the value of the player's hand" + ], + "metadata": { + "id": "jLdAUSMgFPIi" + } + }, + { + "cell_type": "code", + "source": [ + "'''\n", + "An ace is used as 11 whenever possible. It is 1 only if the other option would make the player bust.\n", + "'''\n", + "\n", + "def player_eval(player_hand):\n", + " num_ace = 0\n", + " # use_one means that every ace in the hand is counted as one\n", + " use_one = 0\n", + " for card in player_hand:\n", + " if card.rank == \"ace\":\n", + " num_ace += 1\n", + " use_one += card.value[0] # use 1 for Ace\n", + " else:\n", + " use_one += card.value\n", + "\n", + " if num_ace > 0:\n", + " # Define player policy for Aces:\n", + " # Make Aces 11 if they get you to the range [18, 21]\n", + " # Otherwise, use one.\n", + "\n", + " ace_counter = 0\n", + " while ace_counter < num_ace:\n", + " # Only add by 10 because 1 is already added before\n", + " use_eleven = use_one + 10\n", + " if use_eleven > 21:\n", + " return use_one\n", + " elif use_eleven >= 18 and use_eleven <= 21:\n", + " return use_eleven\n", + " else:\n", + " # This allows for some Aces to be 11 and others to be 1.\n", + " use_one = use_eleven\n", + " ace_counter += 1\n", + " return use_one\n", + " else:\n", + " return use_one" + ], + "metadata": { + "id": "SWrENBc-vEUL" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Define the logic for evaluating the value of the dealer's hand" + ], + "metadata": { + "id": "T9tDhTf0FWUV" + } + }, + { + "cell_type": "code", + "source": [ + "# This follows the same, official rules every time.\n", + "# Still there is a need to figure out what happens if there are multiple Aces.\n", + "def dealer_eval(player_hand):\n", + " num_ace = 0\n", + " use_one = 0\n", + " for card in player_hand:\n", + " if card.rank == \"ace\":\n", + " num_ace += 1\n", + " use_one += card.value[0] # use 1 for Ace\n", + " else:\n", + " use_one += card.value\n", + "\n", + " if num_ace > 0:\n", + " # See if using 11 instead of 1 for the Ace gets the dealer's hand value closer to the [17, 21] range.\n", + " # The dealer will follow Hard 17 rules. This means that the dealer will not hit again if the Ace yields a 17.\n", + " # This also means that Aces initially declared as 11 can be changed to 1 as new cards come.\n", + "\n", + " ace_counter = 0\n", + " while ace_counter < num_ace:\n", + " # Only add 10 because 1 is already added before\n", + " use_eleven = use_one + 10\n", + "\n", + " if use_eleven > 21:\n", + " return use_one\n", + " elif use_eleven >= 17 and use_eleven <= 21:\n", + " return use_eleven\n", + " else:\n", + " # The case where even using Ace as eleven is less than 17.\n", + " use_one = use_eleven\n", + " ace_counter += 1\n", + " return use_one\n", + " else:\n", + " return use_one" + ], + "metadata": { + "id": "YzDYaCCVx3AH" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Define logic for the dealer's turn" + ], + "metadata": { + "id": "ips4ASjUFaEg" + } + }, + { + "cell_type": "code", + "source": [ + "def dealer_turn(dealer_hand, deck):\n", + " # Calculate the dealer hand's value.\n", + " dealer_value = dealer_eval(dealer_hand)\n", + "\n", + " # Define dealer policy (which is fixed to the official rules)\n", + " # The dealer keeps hitting until their total is 17 or more\n", + " while dealer_value < 17:\n", + " dealer_hand.append(deck.deal())\n", + " dealer_value = dealer_eval(dealer_hand)\n", + "\n", + " return dealer_value, dealer_hand, deck" + ], + "metadata": { + "id": "YetM4Xsixlrf" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Define the OpenAI Gym Environment for Blackjack" + ], + "metadata": { + "id": "jUQRNbyAFd9-" + } + }, + { + "cell_type": "code", + "source": [ + "INITIAL_BALANCE = 1000\n", + "NUM_DECKS = 6\n", + "\n", + "class BlackjackEnv(gym.Env):\n", + " metadata = {'render.modes': ['human']}\n", + "\n", + " def __init__(self):\n", + " super(BlackjackEnv, self).__init__()\n", + "\n", + " # Initialize the blackjack deck\n", + " self.bj_deck = Deck(NUM_DECKS)\n", + "\n", + " self.player_hand = []\n", + " self.dealer_hand = []\n", + "\n", + " self.reward_options = {\"lose\": -100, \"tie\": 0, \"win\": 100}\n", + "\n", + " self.action_space = spaces.Discrete(2)\n", + "\n", + " '''\n", + " First element of tuple is the range of possible hand values for the player (3 through 20). This is the possible range of values that the player will actually have to make a decision for. Any player hand value 21 or above already has automatic valuations, and needs no input from an AI agent.\n", + " However, we also need to add all the hand values that the agent could possibly end up in when they bust. Maybe the agent can glean some correlations based on what hand value they bust at, so this should be in the observation space. Also, the layout of OpenAI Gym environment class makes us have to include the bust-value in the step() function because we need to return that done is true alongside the final obs, which is the bust-value.\n", + " '''\n", + " # Second element of the tuple is the range of possible values for the dealer's upcard (2 through 11)\n", + " self.observation_space = spaces.Tuple((spaces.Discrete(18), spaces.Discrete(10)))\n", + " self.done = False\n", + "\n", + " def _take_action(self, action):\n", + " if action == 0: #hit\n", + " self.player_hand.append(self.bj_deck.deal())\n", + " # recalculate the value of the player's hand after any changes to the hand\n", + " self.player_value = player_eval(self.player_hand)\n", + "\n", + " def step(self, action):\n", + " self._take_action(action)\n", + "\n", + " # End the episode/game if the player stands or has a hand value >= 21.\n", + " self.done = action == 1 or self.player_value >= 21\n", + "\n", + " # rewards are 0 when the player hits and is still below 21, and they keep playing\n", + " rewards = 0\n", + "\n", + " if self.done:\n", + " # Calculate rewards\n", + " if self.player_value > 21:\n", + " rewards = self.reward_options['lose']\n", + " elif self.player_value == 21:\n", + " rewards = self.reward_options['win']\n", + " else:\n", + " # begin the dealer turn phase\n", + " dealer_value, self.dealer_hand, self.bj_deck = dealer_turn(self.dealer_hand, self.bj_deck)\n", + " # End the dealer turn phase\n", + "\n", + " # Final comparison\n", + " if dealer_value > 21:\n", + " rewards = self.reward_options['win']\n", + " elif dealer_value == 21:\n", + " rewards = self.reward_options['lose']\n", + " else:\n", + " # both dealer and player value is less than 21\n", + " if self.player_value > dealer_value:\n", + " rewards = self.reward_options['win']\n", + " elif self.player_value < dealer_value:\n", + " rewards = self.reward_options['lose']\n", + " else:\n", + " rewards = self.reward_options['tie']\n", + " self.balance += rewards\n", + " # Subtract by 1 to fit into the possible observation range.\n", + " # This makes the possible range of 3 through 20 into 1 through 18.\n", + " player_value_obs = self.player_value - 2\n", + " # Get the value of the dealer's upcard; this value is what the agent sees.\n", + " # Subtract by 1 to fit into the possible observation range of 1 to 10\n", + " upcard_value_obs = dealer_eval([self.dealer_upcard]) - 1\n", + " # The state is represented as a player hand-value + dealer upcard pair.\n", + " obs = np.array([player_value_obs, upcard_value_obs])\n", + " return obs, rewards, self.done, {}\n", + "\n", + " def reset(self):\n", + " # reset the game to an initial state\n", + " # add the player and dealer cards back into the deck\n", + " self.bj_deck.cards += self.player_hand + self.dealer_hand\n", + "\n", + " # shuffle before beginning. Only shuffle once before the start of each game.\n", + " self.bj_deck.shuffle()\n", + " self.balance = INITIAL_BALANCE\n", + " self.done = False\n", + "\n", + " # returns the start stage for the agent\n", + " # deal 2 cards to the agent and the dealer\n", + " self.player_hand = [self.bj_deck.deal(), self.bj_deck.deal()]\n", + " self.dealer_hand = [self.bj_deck.deal(), self.bj_deck.deal()]\n", + " self.dealer_upcard = self.dealer_hand[0]\n", + "\n", + " # calculate the value of the agent's hand\n", + " self.player_value = player_eval(self.player_hand)\n", + "\n", + " # subtract by 2 to fit into the possible observation range. This makes the possible range of 3 through 20 into 1 through 18\n", + " player_value_obs = self.player_value - 2\n", + "\n", + " # get the value of the dealer's upcard; this value is what the agent sees. Subtract by 1 to fit the possible observation range of 1 to 10.\n", + " upcard_value_obs = dealer_eval([self.dealer_upcard]) - 1\n", + "\n", + " # the state is represented as a player hand-value + dealer upcard pair.\n", + " obs = np.array([player_value_obs, upcard_value_obs])\n", + " return obs\n", + "\n", + " def render(self, mode='human', close=False):\n", + " # convert the player hand into a format that is easy to read and understand.\n", + " hand_list = []\n", + " for card in self.player_hand:\n", + " hand_list.append(card.rank)\n", + "\n", + " # recalculate the value of the dealer upcard.\n", + " upcard_value = dealer_eval([self.dealer_upcard])\n", + "\n", + " print(f'Balance: {self.balance}')\n", + " print(f'Player Hand: {hand_list}')\n", + " print(f'Dealer Upcard: {upcard_value}')\n", + " print(f'Done: {self.done}')" + ], + "metadata": { + "id": "am91D0LgqNnf" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Test the OpenAI Gym Blackjack Environment" + ], + "metadata": { + "id": "BHiry8g3FiEL" + } + }, + { + "cell_type": "code", + "source": [ + "env = BlackjackEnv()\n", + "\n", + "total_rewards = 0\n", + "NUM_EPISODES = 1000\n", + "\n", + "for _ in range(NUM_EPISODES):\n", + " env.reset()\n", + " episode_reward = 0\n", + " while env.done == False:\n", + " action = env.action_space.sample()\n", + " new_state, reward, done, desc = env.step(action)\n", + " episode_reward += reward\n", + " total_rewards += episode_reward\n", + "\n", + "avg_reward = total_rewards / NUM_EPISODES\n", + "print(f'Average Reward: {avg_reward}')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ttzs7UQb2qCP", + "outputId": "94c0576f-abdf-49ff-eead-38312653e263" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Average Reward: -32.2\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# A function to map a state from the Blackjack environment to the proper index i Q and prob tables" + ], + "metadata": { + "id": "f3Oo6Kz4FldW" + } + }, + { + "cell_type": "code", + "source": [ + "'''\n", + "Given a state, derive the corresponding index in the Q-table. This state is a player hand value and dealer upcard pair, so the hashing formula must be used to allocate the indices of the Q-table properly.\n", + "'''\n", + "def get_Q_state_index(state):\n", + " '''\n", + " The player value is already subtracted by 1 in the env when it returns the state. Subtract by 1 again to fir with the array indexing that starts at 0.\n", + " '''\n", + " initial_player_value = state[0] - 1\n", + " '''\n", + " The upcard value is already subtracted by 1 in the env when it returns the state. dealer_upcard will be subtracted by 1 to fit with the array indexing that starts at 0.\n", + " '''\n", + " dealer_upcard = state[1] - 1\n", + " return(env.observation_space[1].n * (initial_player_value)) + (dealer_upcard)" + ], + "metadata": { + "id": "etl-JD8lHlJB" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Functions to get and update the probability of taking the best action" + ], + "metadata": { + "id": "TjRTbwhdFo5x" + } + }, + { + "cell_type": "code", + "source": [ + "# for a given state\n", + "def get_prob_of_best_action(env, state, Q, prob):\n", + " # Use the mappig function to figure out which index of Q corresponds to the player hand value and dealer upcard value that defines each state.\n", + " Q_state_index = get_Q_state_index(state)\n", + "\n", + " # Use this index in the Q 2-D array to get a 2-element array that yield the current Q-values for hitting (index 0) and standing (index 1) in this state. Use np.argmax() function to find the index of the action that yields the maximum rewards, i.e., the best action we are looking for.\n", + " best_action = np.argmax(Q[Q_state_index])\n", + "\n", + " # Retrieve the probability of the best action using the state/action pair as indices of the 'prob' array, which stores the probability of taking an action (hit or stand) for a given state/action pair.\n", + " return prob[Q_state_index][best_action]\n", + "\n", + "def update_prob_of_best_action(env, state, Q, prob, epsilon):\n", + " Q_state_index = get_Q_state_index(state)\n", + "\n", + " best_action = np.argmax(Q[Q_state_index])\n", + "\n", + " '''\n", + " Slightly alter the probability of this best action being taken by using epsilon. Epsilon starts at 1.0, and slowly decays over time. Therefore, as per the equation below, the AI agent will use the probability listed for the best action in the 'prob' table during the beginning of the algorithm. As time goes on, the likelihood that the best action is taken is increased from what is listed in the 'prob' table.\n", + " This allows for the exploration of other moves in the beginning of the algorithm, but exploitation later for a greater reward.\n", + " '''\n", + " prob[Q_state_index][best_action] = min(1, prob[Q_state_index][best_action] + 1 - epsilon)\n", + "\n", + " other_action = 1 if best_action == 0 else 0\n", + " prob[Q_state_index][other_action] = 1 - prob[Q_state_index][best_action]\n", + "\n", + " return prob" + ], + "metadata": { + "id": "qazyxoCbYLhz" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# play_game() function" + ], + "metadata": { + "id": "D89gCgidFruP" + } + }, + { + "cell_type": "code", + "source": [ + "'''\n", + "Simulates one round of Blackjack.\n", + "\n", + "First, use env.reset() to set up a new round of Blackjack.\n", + "\n", + "If the player is already dealt a blackjack by luck, then this episode is just scrapped. There is nothing the AI agent can learn from this episode since it did not have to make any decisions.\n", + "\n", + "Otherwise, the AI agent finds the best action available given the current state of the game and its knowledge so far. This knowledge is stored in the Q-table defined in the run_mc() function. Then, the AI agent retrieves the probability that it should take this action from the prob table. The AI agent applies this probability and chooses its action given this state. The action is sent to the environment, and a reward is returned. This state-action-reward sequence that just occured is stored in episode variable as a tuple. This process is repeated until the current episode is over. Each episode will roughly rield 1-3 tuples in episode variable since Blackjack rounds are usually resolved after 1-3 decisions by the player.\n", + "\n", + "The Q-values in the Q-table associated with each state-action pair that was seen in this episode will be updated after this episode based on the state-action-reward tuples returned by this function. Then, the corresponding probabilities in prob variable are also modified to reflect this change in Q-values.\n", + "'''\n", + "\n", + "def play_game(env, Q, prob):\n", + " '''\n", + " Can contain numerous state->action->reward tuples bacause a round of Blackjack is not always resolved in one turn. However, there will be no state that has a player hand value that exceeds 20, since only initial states BEFORE actions are made are used when storing state->action->reward tuples.\n", + " '''\n", + " episode = []\n", + " state = env.reset()\n", + " while env.done == False:\n", + " if state[0] == 19:\n", + " # Player was dealt blackjack right from the beginning; player_value already subtracted by 2 to get state[0]\n", + " # No analysis done for this useless episode\n", + " next_state, reward, env.done, info = env.step(1)\n", + " else:\n", + " # Get the index in Q that corresponds to the current state\n", + " Q_state_index = get_Q_state_index(state)\n", + "\n", + " # Use the index to get the possible actions, and use np.argmax() to get the index of the action that has the highest current Q value. Index 0 is hit, 1 is stand.\n", + " best_action = np.argmax(Q[Q_state_index])\n", + "\n", + " # Go to the prob table to retrieve the probability of this action. This uses the same Q_state_index used for finding the state index of the Q-table.\n", + " prob_of_best_action = get_prob_of_best_action(env, state, Q, prob)\n", + "\n", + " action_to_take = None\n", + "\n", + " if random.uniform(0, 1) < prob_of_best_action:\n", + " # Take the best action\n", + " action_to_take = best_action\n", + " else:\n", + " # Take the other action\n", + " action_to_take = 1 if best_action == 0 else 0\n", + "\n", + " # The action is performed by the agent, and the next state, rewards and done information is returned.\n", + " next_state, reward, env.done, info = env.step(action_to_take)\n", + "\n", + " # Log the state->action->reward sequence\n", + " episode.append((state, action_to_take, reward))\n", + "\n", + " # Update the state for the next decision made by the agent.\n", + " state = next_state\n", + "\n", + " return episode" + ], + "metadata": { + "id": "HmFb1gcWFFcm" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# update_Q() function" + ], + "metadata": { + "id": "MoKz3QUBFuZl" + } + }, + { + "cell_type": "code", + "source": [ + "\n", + "'''\n", + "This function iterates through the state-action-reward tuples in episode and updates the Q-values of the corresponding state-action pairs in Q.\n", + "'''\n", + "\n", + "def update_Q(env, episode, Q, alpha, gamma):\n", + " '''\n", + " This is where the algorithm hinges on being first visit or every visit. First visit will be used because if you want first-visit, you need to use the cumulative reward of the entire episode when updating a Q-value for ALL of the state/action pairs in the episode, even the first state/action pair. In this algorithm, an episode is a round of Blackjack. Although the bulk of the reward may come from the 2nd or 3rd decision, deciding to hit on the 1st decision is what enabled the future situations to even occur, so it is important to include the entire cumulative reward. We can reduce the impact of the rewards of the future decisions by lowering gamma, which will lower the G value for our early state/action pair in which we hit and did not get any immediate rewards. This will make our agent consider future rewards, and not just look at each state in isolation despite having hit previously.\n", + " If you want Every-Visit MC, do not use the cumulative rewards when updating Q-values and just use the immediate reward in this episode for each state/action pair.\n", + " '''\n", + " step = 0\n", + " for state, action, reward in episode:\n", + " # calculate the cumulative reward of taking this action in this state.\n", + " # Start from the immediate rewards, and use all the rewards from the subsequent states.\n", + " # Do not use rewards from previous states\n", + "\n", + " total_reward = 0\n", + " gamma_exp = 0\n", + " for curr_step in range(step, len(episode)):\n", + " curr_reward = episode[curr_step][2]\n", + " total_reward += (gamma ** gamma_exp) * curr_reward\n", + " gamma_exp += 1\n", + "\n", + " # Update the Q-value\n", + " Q_state_index = get_Q_state_index(state)\n", + " curr_Q_value = Q[Q_state_index][action]\n", + " Q[Q_state_index][action] = curr_Q_value + alpha * (total_reward - curr_Q_value)\n", + " # update step to start further down the episode the next time.\n", + " step += 1\n", + " return Q" + ], + "metadata": { + "id": "R08jdGkVbQD0" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# update_prob() function" + ], + "metadata": { + "id": "BcUOuFW2Fwm4" + } + }, + { + "cell_type": "code", + "source": [ + "\n", + "def update_prob(env, episode, Q, prob, epsilon):\n", + " for state, action, reward in episode:\n", + " '''\n", + " Update the probabilities of the actions that can be taken given the current state. The goal is that the new update in Q has changed what the best action is, and epsilon will be used to create a small increase in the probability that the new, better action is chosen.\n", + " '''\n", + " prob = update_prob_of_best_action(env, state, Q, prob, epsilon)\n", + " return prob" + ], + "metadata": { + "id": "4UA7u4nWdRFL" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# First-Visit Monte Carlo Algorithm" + ], + "metadata": { + "id": "rR5AeirSFy5h" + } + }, + { + "cell_type": "code", + "source": [ + "# run_mc() to run the First Visit Monte Carlo Algorithm\n", + "'''\n", + "This function initializes the key variables for the algorithm and runs the learning algorithm for the AI agent for num_episodes episodes. An episode is a simulation of one game of Blackjack using the OpenAI Gym environment defined above.\n", + "Q, or the Q-table, is a 2D list in which the rows are the different possible states and the columns are the different possible actions for each state. The values stored in each cell refer to the value, or Q-value, of selecting some action (the column index) given some state (the row index). These Q-values start at 0 for every state-action pair, and are updated by the update_Q() function after each episode to reflect the rewards received in an episode where the state-action pair occured. All Q-values initially start at 0.\n", + "\n", + "prob, or the prob table, has the same structure as Q, but the cell values refer to the probability of selecting some action (the column index) given some state (the row index). These probabilites are updated by the update_prob() function after the Q-values are updated after each episode. All action probabilities start at 0.5 (or 50%).\n", + "\n", + "alpha defines the weight given to each new change in Q-value within the update-Q() function. A smaller alpha means that a new reward logged in an episode for some state-action pair has less impact on the current Q-value for that state-action pair. The converse is also true. Therefore, alpha essentially defines how fast the AI agent learns (learning rate).\n", + "\n", + "epsilon defines the weight given to each new change in the action probabilities within the update_prob() function. A larger epsilon reduces the precent amount by which an action probability is changed after some change in Q-values. The converse is also true. An epsilon of 1 means that no change will occur in the action probability, regardless of the magnitude of changes in Q-values.\n", + "\n", + "epsilon is decayed by the decay value after every episode. The lowest value epsilon can reach is epsilon_min.\n", + "\n", + "gamma is the rate used to discount future rewards yielded by a certain state-action pair in the episode. Since a round (or episode) of Blackjack can have more than 1 decision made, there can be numerous state-action pairs that are seen in one episode. However, only the final decision (or state-action pair) yields an immediate reward from the environment. All previous state-action pairs had no rewards. So, the final reward must be used to modify the Q-values of the earlier state-action pairs. Since the final reward was only partially made possible by the eariler state-action pairs, the final reward is discounted using gamma to account for this.\n", + "\n", + "Once these important variables are defines, this function runs the First-Visit Monte Carlo algorithm for the Blackjack environment.\n", + "\n", + "The function runs num_episodes episodes.\n", + "\n", + "In each episode, epsilon is first decayed by the decay rate.\n", + "\n", + "Then, the game of Blackjack is played out throught the play_game() function. This function returns a list of the state-action-reward tuples that occured during the game. These tuples represent the actions the AI agent had to take given some state, and the rewards that resulted.\n", + "\n", + "These tuples are used to modify the Q-values in Q through the update_Q() function.\n", + "\n", + "Then, the tuples are used to modify the probability distributions in prob of the two actions (hit or stand) for any states that were encountered in the episode. This is done through the update_prob() function.\n", + "\n", + "After this process is done for each episode, the function returns the modified Q and prob tables. These tables are an imprint of the learning that has taken place by the AI agent through the First-Visit Monte Carlo algorithm.\n", + "'''\n", + "\n", + "def run_mc(env, num_episodes):\n", + " '''\n", + " observation_space[0] is the 18 possible player values. (3-20)\n", + " observation_space[1] is the 10 possible dealer upcards. (2-11)\n", + " Combining these togenther yields all possible states.\n", + " Multiplying this with hit/stand yields all possible state-action pairs.\n", + " This is the Q map.\n", + " '''\n", + " Q = np.zeros([env.observation_space[0].n * env.observation_space[1].n, env.action_space.n], dtype=np.float16)\n", + "\n", + " # This map contains the probability distributions for each action (hit or stand) given a state. The state (combination of player hand value and dealer upcard value) index in this array yields a 2-element array. The 0th index of this 2-element array refers to the probability of hit and the 1st index is the probability of stand.\n", + " prob = np.zeros([env.observation_space[0].n * env.observation_space[1].n, env.action_space.n], dtype=np.float16) + 0.5\n", + "\n", + " # The learning rate; very small to avoid making quick, large changes in our policy.\n", + " alpha = 0.001\n", + "\n", + " epsilon = 1\n", + "\n", + " # The rate by which epsilon will decay over time.\n", + " # Since the probability that decides which option has the highest Q-value is 1 - epsilon + probability, this decay will make sure that the better option is taken more often in the long run.\n", + " # This allows the algorithm to explore in the early stages and exploit in the later stages.\n", + " decay = 0.9999\n", + "\n", + " # The lowest value that epsilon can go to. Although the decay seems slow, it grows exponentially, and this is magnified when running thousands of episodes.\n", + " epsilon_min = 0.9\n", + "\n", + " gamma = 0.8\n", + "\n", + " for _ in range(num_episodes):\n", + " episode = play_game(env, Q, prob)\n", + "\n", + " epsilon = max(epsilon * decay, epsilon_min)\n", + "\n", + " Q = update_Q(env, episode, Q, alpha, gamma)\n", + "\n", + " prob = update_prob(env, episode, Q, prob, epsilon)\n", + "\n", + " return Q, prob" + ], + "metadata": { + "id": "PnRTa7FZ3RnJ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Run First-Visit Monte Carlo Reinforcement Learning Algorithm" + ], + "metadata": { + "id": "jdfLT2OvF1sV" + } + }, + { + "cell_type": "code", + "source": [ + "env = BlackjackEnv()\n", + "\n", + "start_time = time.time()\n", + "new_Q, new_prob = run_mc(env, 1000000)\n", + "end_time = time.time()\n", + "\n", + "print(f'Total time for learning: {end_time - start_time} s.')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "UKx3psLBd3Yl", + "outputId": "2181c312-d684-4525-bb48-ed723de63333" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Total time for learning: 230.13713669776917 s.\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# best_policy() function" + ], + "metadata": { + "id": "qICTdADBF4Eu" + } + }, + { + "cell_type": "code", + "source": [ + "'''\n", + "This function takes the new Q-table returned and keeps only the action that yields the highest value for a given state. This is represented in three different ways: binary, string and colors.\n", + "0 is hit and 1 is stand in the binary representation of the results.\n", + "H is hit and S is stand in the string representation of the results.\n", + "Green is hit and Red is stand in the color representation of the results.\n", + "'''\n", + "def best_policy(Q):\n", + " best_policy_binary = []\n", + " best_policy_string = []\n", + " best_policy_colors = []\n", + " for i in range(len(Q)):\n", + " best_policy_binary.append(np.argmax(Q[i]))\n", + " best_policy_string.append('H' if np.argmax(Q[i]) == 0 else 'S')\n", + " best_policy_colors.append('g' if np.argmax(Q[i]) == 0 else 'r')\n", + "\n", + " return best_policy_binary, best_policy_string, best_policy_colors" + ], + "metadata": { + "id": "MNbxnlFKeRgQ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Create DataFrame with Player Value as Rows and Dealer Upcard as Columns" + ], + "metadata": { + "id": "89PKdgPiF6Qx" + } + }, + { + "cell_type": "code", + "source": [ + "new_Q_binary, new_Q_string, new_Q_colors = best_policy(new_Q)\n", + "\n", + "df = pd.DataFrame(columns = range(2, 12))\n", + "\n", + "color_df = pd.DataFrame(columns = range(2, 12))\n", + "\n", + "for s in range(3, 21):\n", + " # possible player values in the range 3 to 20\n", + " start = env.observation_space[1].n * (s - 3)\n", + " end = start + 10\n", + " df.loc[s] = (new_Q_string[start:end])\n", + " color_df.loc[s] = (new_Q_colors[start:end])" + ], + "metadata": { + "id": "lZIV2WxWfR9z" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Create Chart Graphic for the Results" + ], + "metadata": { + "id": "Lh14aJ8jF8s-" + } + }, + { + "cell_type": "code", + "source": [ + "fig, ax = plt.subplots()\n", + "\n", + "# hide axes\n", + "fig.patch.set_visible(False)\n", + "ax.set_axis_off()\n", + "ax.axis('tight')\n", + "\n", + "ax.table(cellText=df.values, cellColours=color_df.values, cellLoc='center', rowLabels=df.index, colLabels=df.columns, loc='center')\n", + "\n", + "fig.tight_layout()\n", + "\n", + "plt.show()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 487 + }, + "id": "wR8JVHEef5Cf", + "outputId": "02c77629-9a81-4150-9561-c0255ad152ea" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAnYAAAHWCAYAAAD6oMSKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABM9UlEQVR4nO3de3xU9Z3/8dcM4VaI2HALUw0GbWGgQklVjNoCFcEsjSgsFhdtaNS2FmuRrW3ht+MdEfZRpRcWqnWBNhUftZVIbZBV5GYkSrCp0mYh0FQqKggl0kQllDm/P06SJSGQ5OScOTPf834+HvOAnMye+bx2Evz2nJk5IcuyEBEREZHUF/Z7ABERERFxhxZ2IiIiIobQwk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImIILexEREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG0MJORERExBBa2ImIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIIbSwExERETGEFnYiIiIihtDCTkRERMQQWtiJiIiIGEILOxERERFDaGEnIiIiYggt7EREREQMoYWdiIiIiCG0sBMRERExhBZ2IiIiIobQwk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImIILexEREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG0MJORERExBBa2ImIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIIdL8fPBQKJQF9PNzhgTqBtT7PUSCqNVMajVTUFqD0glqNdUhy7L2tXUn3xZ2oVAoKxwO74rH4z38miGRwuEw8Xjc7zESQq1mUquZgtIalE5Qq6nC4fDHoVBoaFuLOz+P2PWLx+M9ioqKiEajPo7hvZKSEmKxGGo1i1rNpFbzBKUT1GqqyspKbrzxxh7YZzmTdmEHQDQaJScnx+8xPFVZWQmo1TRqNZNazROUTlCr6M0TIiIiIsYIxMJu4cKFXHzxxaSnpzNgwACuvfZadu3a5fdYnli2bBkjR47krLPO4qyzziI3N5d169b5PVZCPPzww4RCIebMmeP3KK679957CYVCzW7Dhg3zeyzP7N+/nxtvvJG+ffvSs2dPLrzwQsrLy/0ey3XnnXfeKc9rKBRi9uzZfo/muhMnThCLxcjOzqZnz56cf/75PPDAA1iW5fdorvvHP/7BnDlzGDx4MD179uSyyy5j+/btfo/VaVu2bCE/P59IJEIoFKK4uLjZ9y3L4u6772bQoEH07NmTCRMmUFVV5c+wndRW6zPPPMPEiRPp27cvoVCIiooKX+ZsTSAWdps3b2b27NmUlZXxwgsvcPz4cSZOnEhdXZ3fo7nunHPO4eGHH2bHjh2Ul5fzpS99iSlTpvCnP/3J79E8tX37dn72s58xcuRIv0fxzIgRI3j33Xebbi+//LLfI3niyJEjXH755XTt2pV169bx5z//mR/+8Id88pOf9Hs0123fvr3Zc/rCCy8AMH36dJ8nc9+iRYtYtmwZP/3pT6msrGTRokUsXryYn/zkJ36P5rpbbrmFF154gV/+8pe8+eabTJw4kQkTJrB//36/R+uUuro6Ro0axdKlS1v9/uLFi/nxj3/M8uXLefXVV+nVqxeTJk3i448/TvCknddWa11dHVdccQWLFi1K8GRt8/01donw/PPPN/t65cqVDBgwgB07dvDFL37Rp6m8kZ+f3+zrBQsWsGzZMsrKyhgxYoRPU3mrtraWmTNn8vjjj/Pggw/6PY5n0tLSyMzM9HsMzy1atIhzzz2XFStWNG3Lzs72cSLv9O/fv9nXDz/8MOeffz5jx471aSLvvPLKK0yZMoXJkycD9tHK1atX89prr/k8mbs++ugjfvvb3/Lss882/ffl3nvv5Xe/+x3Lli1L6X+j8vLyyMvLa/V7lmWxZMkS/uM//oMpU6YA8Itf/IKBAwdSXFzMjBkzEjlqp52pFeCmm24C4K9//WuCJmq/QByxa+mDDz4AICMjw+dJvHXixAmeeuop6urqyM3N9Xscz8yePZvJkyczYcIEv0fxVFVVFZFIhCFDhjBz5kz27Wvz44xS0tq1a7nooouYPn06AwYMYPTo0Tz++ON+j+W5+vp6ioqKKCwsJBQK+T2O6y677DI2bNjA7t27AfjjH//Iyy+/fMb/eKaif/7zn5w4cYIePZp/klfPnj2NPcoOUF1dzXvvvdfs3+E+ffowZswYtm3b5uNkwROII3Yni8fjzJkzh8svv5zPfvazfo/jiTfffJPc3Fw+/vhjevfuzZo1axg+fLjfY3niqaee4vXXXzfi9StnMmbMGFauXMnQoUN59913ue+++/jCF77Azp07SU9P93s8V/3lL39h2bJlzJ07l/nz57N9+3buuOMOunXrRkFBgd/jeaa4uJiamhpmzZrl9yie+MEPfsDRo0cZNmwYXbp04cSJEyxYsICZM2f6PZqr0tPTyc3N5YEHHiAajTJw4EBWr17Ntm3buOCCC/wezzPvvfceAAMHDmy2feDAgU3fk8QI3MJu9uzZ7Ny50+j/5TR06FAqKir44IMP+M1vfkNBQQGbN282bnH3t7/9je985zu88MILp/yvY9OcfFRj5MiRjBkzhsGDB/PrX/+am2++2cfJ3BePx7nooot46KGHABg9ejQ7d+5k+fLlRi/snnjiCfLy8ohEIn6P4olf//rX/OpXv+LJJ59kxIgRVFRUMGfOHCKRiHHP6y9/+UsKCwv51Kc+RZcuXcjJyeGGG25gx44dfo8mARCoU7G33347zz33HBs3buScc87xexzPdOvWjQsuuIDPf/7zLFy4kFGjRvGjH/3I77Fct2PHDg4ePEhOTg5paWmkpaWxefNmfvzjH5OWlsaJEyf8HtEzZ599Np/5zGfYs2eP36O4btCgQaf8j5BoNGrsqWeAt956ixdffJFbbrnF71E8c9ddd/GDH/yAGTNmcOGFF3LTTTdx5513snDhQr9Hc93555/P5s2bqa2t5W9/+xuvvfYax48fZ8iQIX6P5pnG1/8eOHCg2fYDBw4E4rXBySQQCzvLsrj99ttZs2YNL730krEvxD6deDzOsWPH/B7DdVdeeSVvvvkmFRUVTbeLLrqImTNnUlFRQZcuXfwe0TO1tbXs3buXQYMG+T2K6y6//PJTPo5o9+7dDB482KeJvLdixQoGDBjQ9MYCE3344YeEw83/k9OlSxejLwfVq1cvBg0axJEjR1i/fn3TmwpMlJ2dTWZmJhs2bGjadvToUV599VWjX+OdjAJxKnb27Nk8+eSTPPvss6Snpzed7+/Tpw89e/b0eTp3zZs3j7y8PLKysvjHP/7Bk08+yaZNm1i/fr3fo7kuPT39lNdJ9urVi759+xr3+snvfve75OfnM3jwYN555x3uueceunTpwg033OD3aK678847ueyyy3jooYe4/vrree2113jsscd47LHH/B7NE/F4nBUrVlBQUEBamrn/JOfn57NgwQKysrIYMWIEf/jDH3jkkUcoLCz0ezTXrV+/HsuyGDp0KHv27OGuu+5i2LBhfO1rX/N7tE6pra1tdpagurqaiooKMjIyyMrKYs6cOTz44IN8+tOfJjs7m1gsRiQS4dprr/VvaIfaav373//Ovn37eOeddwCa/sdoZmam70cozf1X5CTLli0DYNy4cc22r1ixwrgXKh88eJCvfvWrvPvuu/Tp04eRI0eyfv16rrrqKr9Hk054++23ueGGGzh8+DD9+/fniiuuoKys7JSPyzDBxRdfzJo1a5g3bx73338/2dnZLFmyxLgX2Td68cUX2bdvn5ELnJP95Cc/IRaL8a1vfYuDBw8SiUT4xje+wd133+33aK774IMPmDdvHm+//TYZGRlMmzaNBQsW0LVrV79H65Ty8nLGjx/f9PXcuXMBKCgoYOXKlXzve9+jrq6Or3/969TU1HDFFVfw/PPPp+RroNtqXbt2bbOFeuPHudxzzz3ce++9CZ21pUAs7Ez8ZPPTeeKJJ/wewVebNm3yewRPPPXUU36PkFBf/vKX+fKXv+z3GAkxceLEQPwblZ6ezpIlS1iyZInfo3ju+uuv5/rrr/d7DNeNGzfujD+roVCI+++/n/vvvz+BU3mjrdZZs2Yl7YGhQLzGTkRERCQItLATERERMYQWdiIiIiKG0MJORERExBBa2ImIiIgYot3vig2FQllAPxcfexhASUkJlZWVLu42+ZSWlgJqNY1azaRW8wSlE9Rqqurq6nbfN9Set9mHQqGscDi8Kx6Pu/thNCHA/Hf529RqJrWaSa3mCUonqNVUdmuuZVllZ7pbe4/Y9YvH4z2KioqIRqOdHw57hR2LxWAq7h4HTEZVwEbUahq1mkmt5glKJ6jVVIeAZwCob+uuHfqA4mg0Sk5OjrOhWmg6bNoPiLiyy+R1qOFPtZpFrWZSq3mC0glqFb15QkRERMQUjhZ2y5YtY+TIkZx11lmcddZZ5Obmsm7dOrdnc88aYHUr26uBe4GPEjqNt9Sq1lQWlE5QK6g11ak1KVsdLezOOeccHn74YXbs2EF5eTlf+tKXmDJlCn/605/cnk9ERERE2qlDr7FrlJ+f3+zrBQsWsGzZMsrKyhgxYoQrg4mIiIhIxzha2J3sxIkTPP3009TV1ZGbm+vGTCIiIiLigOOF3Ztvvklubi4ff/wxvXv3Zs2aNQwfPtzN2dy1G1jQYpupn32jVjMFpTUonaBWtaY+tSYdxwu7oUOHUlFRwQcffMBvfvMbCgoK2Lx5c/Iu7rKByS227afxc2HMola1prKgdIJa1Zr61Jp0rY4Xdt26deOCCy4A4POf/zzbt2/nRz/6ET/72c9cG85VXYG+LbYd9WOQBFCrmYLSGpROUKtaU59ak45rn2MXj8c5duyYW7sTERERkQ5ydMRu3rx55OXlkZWVxT/+8Q+efPJJNm3axPr1692eT0RERETaydHC7uDBg3z1q1/l3XffpU+fPowcOZL169dz1VVXuT2fiIiIiLSTo4XdE0884fYc3rruNNuzsT8x2iRqVWsqC0onqBXUmurUmpStulasiIiIiCG0sBMRERExhBZ2IiIiIobQwk5ERETEEB1680RJSQmVlZWuPHBpaan9lyrgkCu7TF77Gv5Uq1nUaia1miconaBWUx1p/11DltX2hc5CodCl4XB4Wzwe78RUpwoD7u4xiYVIymvKeUKtZgpQq/5tMlBQOiFQrUH6XW1ozbUsq+xM92vvEbv6eDxOUVER0Wi089NhH/2LxWIUAe7sMXmVADELmAr083kYr1UBG1GraQLWGt+I/m0yScB+foPUGpTf1UrgRvuv9W3dt0OnYqPRKDk5OY6GaqnxlG4UcGePyavp5HU/IOLjIInQeDhcrWYJYKv+bTJIAH9+g9QahN/VjtCbJ0REREQM4crC7uGHHyYUCjFnzhw3dueJ94HbgCygO5AJTAJK/RzKC2uA1a1sr8b+dOyPEjqNt9RqXmtQOk8SiH+bgvS8qtXMVlLnd9XRJcVOtn37dn72s58xcuRIN+bxzDTsE9OrgCHAAWADcNjPoUQk8PRvk0hqSJXf1U4t7Gpra5k5cyaPP/44Dz74oFszua4G2ApsAsY2bBsMXOLTPCIioH+bRFJFDanzu9qpU7GzZ89m8uTJTJgwwa15PNG74VYMHPN3FBGRJvq3SSQ1pNLvquMjdk899RSvv/4627dvd3MeT6QBK4FbgeXY754ZC8wAkvsEskO7gQUttpn6mUZqNU9QOgnYv00Bel7V6scg3kql31VHC7u//e1vfOc73+GFF16gR48ebs/kiWnAZOxDqWXAOmAx8HNgln9jeSMbO/Zk+4FnfJjFa2o1rzUonQ0C829TkJ5XtRrZmiq/q44Wdjt27ODgwYPNPtPuxIkTbNmyhZ/+9KccO3aMLl26uDakW3oAVzXcYsAtwD0k1xPiiq5A3xbbjvoxSAKo1TxB6TxJIP5tCtLzqlZjpcLvqqOF3ZVXXsmbb77ZbNvXvvY1hg0bxve///2kXNS1Zjj2+XIRkWSif5tEUkMy/q46Wtilp6fz2c9+ttm2Xr160bdv31O2J4PDwHSgEPtceDpQjn0IdYqPc4lIsOnfJpHUkEq/q53+HLtU0BsYAzwK7AWOA+divwhyvo9ziUiw6d8mkdSQSr+rri3sNm3a5NauXNcdWNhwM951p9mejf1J4CZRq3mtQelsEJh/m4L0vKrVyNZU+l3VtWJFREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG6NCbJ0pKSqisrHTlgUtLS+19Au7sMXmVNv6lCjjk4yCJsK/hT7WaJYCt+rfJIAH8+Q1SaxB+V6s7cN+QZbV9UbdQKHRpOBzeFo/HnU/VijDg7h6TWAgjr5/XGj2vhgpQq36GzaPn1ExBel4bWnMtyyo70/3ae8SuPh6PU1RURDQa7fx02Ef/YrEYRYA7e0xeJUDMAqYC/XwexmtVEN+InlfTVAEbCUyrfoYNo+fUTAF6XiuBG+2/1rd13w6dio1Go82uD9sZjad0o4A7e0xeTYeI+wERHwdJhIZD/3peDdN4SidArfoZNoieUzMF6HntCL15QkRERMQQjhZ29957L6FQqNlt2LBhbs/mqveB24As7E+QzgQmcdKLh02xBljdyvZq7E8C/yih03hOzytmPa9B6TxJIH6GA/a8BuI5BT2vJOfz6viSYiNGjODFF1/8vx2lJfdlZ6dhn5heBQwBDgAbsC/sK6lLz6ukOv0Mm0fPqZlS5Xl1vBpLS0sjMzPTzVk8UwNsBTYBYxu2DQYu8WkecUcNel4ltdWgn2HT1KDn1EQ1pM7z6vg1dlVVVUQiEYYMGcLMmTPZt29f2/9HPundcCsGjvk7irhIz6ukOv0Mm0fPqZlS6Xl1dMRuzJgxrFy5kqFDh/Luu+9y33338YUvfIGdO3eSnp7u9oydlgasBG4FlmO/e2YsMAMY6d9Y3tkNLGixzcDPNNLzipHPa2A6CdjPcECe10A9p6DnleR7Xh0t7PLy8pr+PnLkSMaMGcPgwYP59a9/zc033+zacG6aBkzGPpRaBqwDFgM/B2b5N5Y3srFjT7YfeMaHWTym5xXzntegdDYIzM9wgJ7XwDynoOeV5HteXfm4k7PPPpvPfOYz7Nmzx43deaYHcBUQA17BfiLu8XMgr3QF+ra4Jd+BVNfoeTVMUDpPEoif4YA9r4F4TkHPK8n3vLqysKutrWXv3r0MGjTIjd0lzHCgzu8hxHV6XiXV6WfYPHpOzZSMz6ujU7Hf/e53yc/PZ/Dgwbzzzjvcc889dOnShRtuuMHt+VxxGJgOFGKfC08HyrEPoU7xcS7pHD2vkur0M2wePadmSqXn1dHC7u233+aGG27g8OHD9O/fnyuuuIKysjL69+/v9nyu6A2MAR4F9gLHgXOxXwQ538e5pHP0vEqq08+wefScmimVnldHC7unnnrK7Tk81R1Y2HAz3nWn2Z6N/UngBtHzinnPa1A6GwTmZzhAz2tgnlPQ85qkdK1YEREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIITr05omSkhIqKytdeeDS0lJ7n4A7e0xepY1/qQIO+ThIIjRcMljPq2EaLwUdoFb9DBtEz6mZAvS8VnfgviHLavuibqFQ6NJwOLwtHo87n6oVYcDdPSavILUSwshrBbZKrWZSq3mC0glqNZXdmmtZVtmZ7tbeI3b18XicoqIiotFo54fDPvoXi8UoAtzZY/Iqwb78SGBaLWAq0M/nYbxWBWxEraZRq3mC0glqNdUhGq+/W9/WXTt0KjYajZKTk+NsqBYaT+lGAXf2mLwaDxEHqZV+QMTHQRKh8TSHWs2iVvMEpRPUKnrzhIiIiIgptLATERERMYTjhd3+/fu58cYb6du3Lz179uTCCy+kvLzczdlc9T5wG5CFfWmQTGASJ72DyCCBaV0DrG5lezX25Ww+Sug03gpKa1A6Qa2g1lSn1qRsdXSt2CNHjnD55Zczfvx41q1bR//+/amqquKTn/yk2/O5Zhr2Kw5XAUOAA8AG4LCfQ3kkSK0iIiLyfxwt7BYtWsS5557LihUrmrZlZ2e7NpTbaoCtwCZgbMO2wcAlPs3jpRqC0yoiIiLNOToVu3btWi666CKmT5/OgAEDGD16NI8//rjbs7mmd8OtGDjm7yieC1KriIiINOfoiN1f/vIXli1bxty5c5k/fz7bt2/njjvuoFu3bhQUFLg9Y6elASuBW4Hl2B85MhaYAYz0byxPBKkVgN3AghbbTP2wyqC0BqUT1KrW1KfWpONoYRePx7nooot46KGHABg9ejQ7d+5k+fLlSbmwA/t1Z5OxT1OWAeuAxcDPgVn+jeWJILWSjR17sv00fpCjWYLSGpROUKtaU59ak67V0cJu0KBBDB8+vNm2aDTKb3/7W1eG8koP4KqGWwy4BbgHAxc7BKi1K9C3xbajfgySAEFpDUonqFWtqU+tScfRa+wuv/xydu3a1Wzb7t27GTx4sCtDJcpwoM7vIRIkSK0iIiJB5eiI3Z133slll13GQw89xPXXX89rr73GY489xmOPPeb2fK44DEwHCrFfZ5YOlGOfnpzi41xeCFKriIiINOdoYXfxxRezZs0a5s2bx/333092djZLlixh5syZbs/nit7AGOBRYC9wHDgX+w0G832cywtBahUREZHmHC3sAL785S/z5S9/2c1ZPNMdWNhwM12QWrnuNNuzsT8J3CRBaQ1KJ6gV1Jrq1JqUrbpWrIiIiIghtLATERERMYQWdiIiIiKG0MJORERExBAdevNESUkJlZWVrjxwaWmpvU/AnT0mr9KGP4PUShVwyMdBEmFfw59qNYtazROUTlCrqY60/64hy2r7QmehUOjScDi8LR6Pd2Kq1nZMUl5nzRNqNZNazRSg1jDg8r/sySlAz6laDWW35lqWVXamu7X3iF19PB6nqKiIaDTa+eGwj/7FYjGYCvRzZZfJqwrYiFpNo1YzBaw1vhGKAHf+ZU9OJUDMIjDPaZB+fgPTeojGa9LWt3XXDp2KjUaj5OTkOBuqhaZTuv2AiCu7TF6Nh4jVaha1mimArVHAnX/Zk1PTS2AC9JyqNbj05gkRERERQzha2J133nmEQqFTbrNnz3Z7PnesAVa3sr0a+xOjP0roNN5Sq1pTWVA6IVitwPvAbUAW9hVyMoFJnPSGK1ME6XlVa1K2Orqk2Pbt2zlx4kTT1zt37uSqq65i+vTprg0mIiLmmIb94qBVwBDgALABOOznUCIGcrSw69+/f7OvH374Yc4//3zGjh3rylAiImKOGmArsAlo/K/EYOASn+YRMVmnX2NXX19PUVERhYWFhEIhN2YSERGD9G64FQPH/B1FxHiOjtidrLi4mJqaGmbNmuXCOB7aDSxosc3Uz75Rq5mC0hqUTghMaxqwErgVWI79DtyxwAxgpH9jeScgzyug1iRs7fTC7oknniAvL49IJMnfa5wNTG6xbT+NnwtjFrWqNZUFpRMC1ToNO3UrUAasAxYDPwdm+TeWNwL0vKqVpGvt1MLurbfe4sUXX+SZZ5KsqjVdgb4tth31Y5AEUKuZgtIalE4IVivQA7iq4RYDbgHuwcCFXZCeV7UmnU69xm7FihUMGDCAyZNbLmFFRETObDhQ5/cQIoZxfMQuHo+zYsUKCgoKSEvr9BldEREx1GFgOlCI/Zq6dKAc+1TsFB/nEjGR4xXZiy++yL59+ygsLHRzHhERMUxvYAzwKLAXOA6ci/1mivk+ziViIscLu4kTJ2JZSfh2kNZcd5rt2difGG0Stao1lQWlEwLV2h1Y2HAzXoCeV7WSlK26VqyIiIiIIbSwExERETGEFnYiIiIihtDCTkRERMQQHXrzRElJCZWVla48cGlpqf2XKuCQK7tMXvsa/lSrWdRqpgC2lgDu/MuenEob/xKg51SthjnS/ruG2vPO1lAodGk4HN4Wj8c7MVVrOyYpr7PmCbWaSa1mUqt5gtIJajWV3ZprWVbZme7W3iN29fF4nKKiIqLRaOeHwz76F4vFYCrQz5VdJq8qYCNqNY1azaRW8wSlE9RqqkM0XpO2vq27duhUbDQaJScnx9lQLTSd0u0HRFzZZfJqPESsVrOo1UxqNU9QOkGtojdPiIiIiJjC0cLuxIkTxGIxsrOz6dmzJ+effz4PPPBA8l6JYg2wupXt1difGP1RQqfxllrVmsqC0glqBbWmOrUmZaujS4otWrSIZcuWsWrVKkaMGEF5eTlf+9rX6NOnD3fccYfbM4qIiIhIOzha2L3yyitMmTKFyZMnA3DeeeexevVqXnvtNVeHExEREZH2c3Qq9rLLLmPDhg3s3r0bgD/+8Y+8/PLL5OXluTqciIiIiLSfoyN2P/jBDzh69CjDhg2jS5cunDhxggULFjBz5ky353PPbmBBi21J+pLATlOrmYLSGpROUKtaU59ak46jhd2vf/1rfvWrX/Hkk08yYsQIKioqmDNnDpFIhIKCArdndEc2MLnFtv00fi6MWdSq1lQWlE5Qq1pTn1qTrtXRwu6uu+7iBz/4ATNmzADgwgsv5K233mLhwoXJu7DrCvRtse2oH4MkgFrNFJTWoHSCWtWa+tSadBy9xu7DDz8kHG7+f9qlSxdcv+SYiIiIiLSboyN2+fn5LFiwgKysLEaMGMEf/vAHHnnkEQoLC92eT0RERETaydHC7ic/+QmxWIxvfetbHDx4kEgkwje+8Q3uvvtut+cTERERkXZytLBLT09nyZIlLFmyxOVxPHLdabZnY39itEnUqtZUFpROUCuoNdWpNSlbda1YEREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIITr05omSkhIqKytdeeDS0lL7L1XAIVd2mbz2NfypVrOo1UxqNU9QOkGtpjrS/ruGLKvtC52FQqFLw+HwNtc/gDhEUl5nzRNqNZNazaRW8wSlEwLVGgaCcmmEhtZcy7LKznS/9h6xq4/H4xQVFRGNRjs/HfbRv1gsBlOBfq7sMnlVARtRq2nUaia1miconRC41vhGKALcWZkkr0rgRvuv9W3dt0OnYqPRKDk5OY6GaqnplG4/IOLKLpNX4yFitZpFrWZSq3mC0gmBbI0C7qxMzKA3T4iIiIgYwvHC7h//+Adz5sxh8ODB9OzZk8suu4zt27e7OZt71gCrW9lejf2J0R8ldBpvqVWtqSwonaBWUGuqC1Ir8D5wG5AFdAcygUlAqZ9DtcLRJcUAbrnlFnbu3Mkvf/lLIpEIRUVFTJgwgT//+c986lOfcnNGEREREV9Nw36B2ypgCHAA2AAc9nOoVjg6YvfRRx/x29/+lsWLF/PFL36RCy64gHvvvZcLLriAZcuWuT2jiIiIiG9qgK3AImA8MBi4BJgHXOPfWK1ytLD75z//yYkTJ+jRo0ez7T179uTll192ZTARERGRZNC74VYMHPN3lDY5OhWbnp5Obm4uDzzwANFolIEDB7J69Wq2bdvGBRdc4PaM7tgNLGixzdTP+VGrmYLSGpROUKtaU19AWtOAlcCtwHLsd+GOBWYAI/0bq1WOX2P3y1/+ksLCQj71qU/RpUsXcnJyuOGGG9ixY4eb87knG5jcYtt+4BkfZvGaWtWayoLSCWpVa+oLUOs07NStQBmwDlgM/ByY5d9Yp3C8sDv//PPZvHkzdXV1HD16lEGDBvGVr3yFIUOGuDmfe7oCfVtsO+rHIAmgVjMFpTUonaBWtaa+ILUCPYCrGm4x4BbgHpJrYdfpz7Hr1asXgwYN4siRI6xfv54pU6a4MZeIiIhIUhsO1Pk9RAuOj9itX78ey7IYOnQoe/bs4a677mLYsGF87Wtfc3M+EREREV8dBqYDhdivqUsHyrFPxSbb4SzHC7sPPviAefPm8fbbb5ORkcG0adNYsGABXbt2dXM+EREREV/1BsYAjwJ7gePAudhvppjv41ytcbywu/7667n++uvdnMU7151mezb2p2ObRK1qTWVB6QS1glpTXYBauwMLG27JTteKFRERETGEFnYiIiIihtDCTkRERMQQWtiJiIiIGEILOxERERFDdOhdsSUlJVRWVrrywKWlpfZfqoBDruwyee1r+FOtZlGrmdRqnqB0QiBbSwB3VibJq7oD9w1ZVttX6w2FQpeGw+Ft8Xjc+VSt7hgjLxbcKrWaSa1mUqt5gtIJajWV3ZprWVbZme7W3iN29fF4nKKiIqLRaOeHwz76F4vFYCrQz5VdJq8qYCNqNY1azaRW8wSlE9RqqkPAMwDUt3XXDp2KjUaj5OTkOBuqhaZTuv2AiCu7TF6Nh8PVaha1mkmt5glKJ6hV9OYJEREREVM4Xtht2bKF/Px8IpEIoVCI4uLiZt+3LIu7776bQYMG0bNnTyZMmEBVVVVn53VmDbC6le3V2Jc9+Sih03hLrWpNZUHpBLWCWlOdWpOy1fHCrq6ujlGjRrF06dJWv7948WJ+/OMfs3z5cl599VV69erFpEmT+Pjjjx0PKyIiIiKn16HX2J0sLy+PvLy8Vr9nWRZLlizhP/7jP5gyZQoAv/jFLxg4cCDFxcXMmDHD6cOKiIiIyGl48hq76upq3nvvPSZMmNC0rU+fPowZM4Zt27Z58ZAiIiIigef4iN2ZvPfeewAMHDiw2faBAwc2fS/hdgMLWmwz9bNv1GqmoLQGpRPUqtbUp9ak48nCLillA5NbbNtP4+fCmEWtak1lQekEtao19ak16Vo9WdhlZmYCcODAAQYNGtS0/cCBA3zuc5/z4iHb1hXo22LbUT8GSQC1mikorUHpBLWqNfWpNel48hq77OxsMjMz2bBhQ9O2o0eP8uqrr5Kbm+vFQ4qIiIgEnuMjdrW1tezZs6fp6+rqaioqKsjIyCArK4s5c+bw4IMP8ulPf5rs7GxisRiRSIRrr73WjblFREREpAXHC7vy8nLGjx/f9PXcuXMBKCgoYOXKlXzve9+jrq6Or3/969TU1HDFFVfw/PPP06NHj85PLSIiIiKncLywGzduHJZ1+reDhEIh7r//fu6//36nD+Ge606zPRv7E6NNola1prKgdIJaQa2pTq1J2aprxYqIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAzRoTdPlJSUUFlZ6coDl5aW2n+pAg65ssvkta/hT7WaRa1mUqt5gtIJajXVkfbfNXSmd7Y23SkUujQcDm+Lx+OdmKq1HZOU11nzhFrNpFYzqdU8QekEtZrKbs21LKvsTHdr7xG7+ng8TlFREdFotPPDYR/9i8ViMBXo58ouk1cVsBG1mkatZlKreYLSCWo11SEar0lb39ZdO3QqNhqNkpOT42yoFppO6fYDIq7sMnk1HiJWq1nUaia1miconaBW0ZsnREREREzheGG3ZcsW8vPziUQihEIhiouLm33/mWeeYeLEifTt25dQKERFRUUnR+2ENcDqVrZXY39i9EcJncZbalVrKgtKJ6gV1Jrq1JqUrY4XdnV1dYwaNYqlS5ee9vtXXHEFixYtcjyciIiIiLSf42vF5uXlkZeXd9rv33TTTQD89a9/dfoQIiIiItIBeo2diIiIiCEcH7FLObuBBS22mfrZN2o1U1Bag9IJalVr6lNr0gnOwi4bmNxi234aPxfGLGpVayoLSieoVa2pT61J1xqchV1XoG+LbUf9GCQB1GqmoLQGpRPUqtbUp9ako9fYiYiIiBjC8RG72tpa9uzZ0/R1dXU1FRUVZGRkkJWVxd///nf27dvHO++8A8CuXbsAyMzMJDMzs5Nji4iIiEhLjo/YlZeXM3r0aEaPHg3A3LlzGT16NHfffTcAa9euZfTo0UyebJ+QnjFjBqNHj2b58uUujC0iIiIiLTk+Yjdu3Dgs6/RvB5k1axazZs1yunt3XXea7dnYnxhtErWqNZUFpRPUCmpNdWpNyla9xk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImKIDr15oqSkhMrKSlceuLS01P5LFXDIlV0mr30Nf6rVLGo1k1rNE5ROUKupjrT/rqEzvbO16U6h0KXhcHhbPB7vxFSt7ZikvM6aJwLUGgZc/klJWmo1VIB+XwPTGpROUKup7NZcy7LKznS39h6xq4/H4xQVFRGNRjs/HPbRv1gsBlOBfq7sMnlVARsJTGt8IxQB7vykJK8SIIZaTVMCxCwC8/saiH+bgtIJajXVIRqvSVvf1l07dCo2Go2Sk5PjbKgWmk7p9gMiruwyeTUeIg5QaxRw5ycleTW+KEGtZml6sUmAfl+Nbw1KJ6hV9OYJEREREVM4Xtht2bKF/Px8IpEIoVCI4uLipu8dP36c73//+1x44YX06tWLSCTCV7/61abrxibcGmB1K9ursT8x+qOETuOtILUC7wO3AVlAdyATmASU+jmUR9RqWGuQflfVqtZUl0Ktjhd2dXV1jBo1iqVLl57yvQ8//JDXX3+dWCzG66+/zjPPPMOuXbu45pprOjWsSEvTgD8Aq4DdwFpgHHDYx5m8olYzW0VE3OT4WrF5eXnk5eW1+r0+ffrwwgsvNNv205/+lEsuuYR9+/aRlZXl9GFFmtQAW4FNwNiGbYOBS3yax0s1qNXEVhERtyXsNXYffPABoVCIs88+O1EPKYbr3XArBo75O4rn1CoiIu3h+IhdR3z88cd8//vf54YbbuCss85KxEOeajewoMU2Uz/7JiCtacBK4FZgOfa7NccCM4CR/o3lCbWa2RqU31VArWpNfSnS6vnC7vjx41x//fVYlsWyZcu8frjTywYmt9i2n8bPhTFLgFqnYaduBcqAdcBi4OfALP/G8oRaDWwN0O+qWlFrqkuRVk8Xdo2LurfeeouXXnrJv6N1AF2Bvi22HfVjkAQIUivQA7iq4RYDbgHuwbAFQAO1GtYapN9VtZpJrUnHs9fYNS7qqqqqePHFF+nbt+X/N0S8MRyo83uIBFGriIiczPERu9raWvbs2dP0dXV1NRUVFWRkZDBo0CD+9V//lddff53nnnuOEydO8N577wGQkZFBt27dOj+5BN5hYDpQiP3aq3SgHPuU3RQf5/KCWs1sFRFxm+OFXXl5OePHj2/6eu7cuQAUFBRw7733snbtWgA+97nPNfu/27hxI+PGjXP6sCJNegNjgEeBvcBx4FzsF93P93EuL6jVzFYREbc5XtiNGzcOyzr920HO9L2Eu+4027OxPzHaJAFq7Q4sbLiZTq0GCtDvqlpRa6pLoVZdK1ZERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImKIDr15oqSkhMrKSlceuLS01P5LFXDIlV0mr30NfwaotQRw5ycleTX8BKvVMI2tQfp9Nb41KJ2gVlMdaf9dQ+1592ooFLo0HA5vi8fjnZiqtR2TlNdZ80SAWsOAyz8pSUuthgrQ72tgWoPSCWo1ld2aa1lW2Znu1t4jdvXxeJyioiKi0Wjnh8M++heLxWAq0M+VXSavKmAjgWmNb4QiwJ2flORVgn2pK7WapQSIWQTm9zUQ/zYFpRPUaqpDNF6Ttr6tu3boVGw0GiUnJ8fZUC00ndLtB0Rc2WXyajxEHKDWKODOT0ryajwlqVazNJ1qDtDvq/GtQekEtYrePCEiIiJiCi3sRERERAzheGG3ZcsW8vPziUQihEIhiouLm33/3nvvZdiwYfTq1YtPfvKTTJgwgVdffbWz8zqzBljdyvZq7EuBfJTQabwVpFbgfeA2IAv7UlSZwCROemejQdRqWGuQflfVqtZUl0Ktjhd2dXV1jBo1iqVLl7b6/c985jP89Kc/5c033+Tll1/mvPPOY+LEibz//vuOhxVpaRrwB2AVsBtYC4wDDvs4k1fUamariIibOvTmiZPl5eWRl5d32u//27/9W7OvH3nkEZ544gneeOMNrrzySqcPK9KkBtgKbALGNmwbDFzi0zxeqkGtJraKiLgtIa+xq6+v57HHHqNPnz6MGjUqEQ8pAdC74VYMHPN3FM+pVURE2sPxEbv2eO6555gxYwYffvghgwYN4oUXXqBfP58+bGY3sKDFNlM/1DAgrWnASuBWYDn2x3CMBWYAI/0byxNqNbM1KL+rgFrVmvpSpNXThd348eOpqKjg0KFDPP7441x//fW8+uqrDBgwwMuHbV02MLnFtv00fuCfWQLUOg07dStQBqwDFgM/B2b5N5Yn1Gpga4B+V9WKWlNdirR6eiq2V69eXHDBBVx66aU88cQTpKWl8cQTT3j5kKfXFejb4pbuzyieC1Ir0AO4CvvKCK9g/4f/Hj8H8pBaDROk31W1mkmtSSehn2MXj8c5dkyvmhFvDQfq/B4iQdQqIiInc3wqtra2lj179jR9XV1dTUVFBRkZGfTt25cFCxZwzTXXMGjQIA4dOsTSpUvZv38/06dPd2VwkcPAdKAQ+7VX6UA59im7KT7O5QW1mtkqIuI2xwu78vJyxo8f3/T13LlzASgoKGD58uX87//+L6tWreLQoUP07duXiy++mK1btzJixIjOTy2C/c7JMcCjwF7gOHAu9ovu5/s4lxfUamariIjbHC/sxo0bh2Wd/u0gzzyTRK8mvO4027OxPzHaJAFq7Q4sbLiZTq0GCtDvqlpRa6pLoVZdK1ZERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImKIDr15oqSkhMrKSlceuLS01P5LFXDIlV0mr30NfwaotQRw5ycleTX8BKvVMI2tQfp9Nb41KJ2gVlMdaf9dQ2d6Z2vTnUKhS8Ph8LZ4PN6JqU4VBtzdY/JSq5nUaqgQSXkNSE8EpTUonaBWU9mtuZZllZ3pbu09Ylcfj8cpKioiGo12fjjso3+xWIwiwJ09Jq8S7MsiqdUsajVTCRCzgKlAP5+H8VoVsBHzW4PSCWo11SEar0lb39ZdO3QqNhqNkpOT42yoFhpP6UYBd/aYvBpPXanVLGo1U9Op5n5AxMdBEqHx9JXprUHpBLWK3jwhIiIiYgrHC7stW7aQn59PJBIhFApRXFx82vt+85vfJBQKsWTJEqcP12nvA7cBWdifbJ8JTOKkF0obRK1qTXWBaF0DrG5lezX2J9l/lNBpvKVWtaa6FGp1fEmxuro6Ro0aRWFhIVOnTj3t/dasWUNZWRmRiL/HSadhn5heBQwBDgAbsC84bhq1qjXVBalVRMRNjhd2eXl55OXlnfE++/fv59vf/jbr169n8uTJTh+q02qArcAmYGzDtsHAJT7N46Ua1KrW1FZDcFpFRNzm2Wvs4vE4N910E3fddRcjRozw6mHapXfDrRg45usk3lOrmdQqIiLt4fiIXVsWLVpEWload9xxh1cP0W5pwErgVmA59rv6xgIzgJH+jeUJtao11QWpld3AghbbTP1MLrWaSa1Jx5OF3Y4dO/jRj37E66+/TigU8uIhOmwaMBn7FE8ZsA5YDPwcmOXfWJ5Qq1pTXWBas7FDT7afxs+rMota1ZrqUqTVk1OxW7du5eDBg2RlZZGWlkZaWhpvvfUW//7v/855553nxUO2Sw/gKuwPWn0F+z8Q9/g2jbfUaia1GqYr0LfFLd3XibyjVjOpNel4srC76aabeOONN6ioqGi6RSIR7rrrLtavX+/FQzoyHKjze4gEUauZ1CoiIidzfCq2traWPXv2NH1dXV1NRUUFGRkZZGVl0bdv32b379q1K5mZmQwdOtT5tA4dBqYDhdiv0UkHyrFP7UxJ+DTeUqtaU12QWkVE3OZ4YVdeXs748eObvp47dy4ABQUFrFy5stODuak3MAZ4FNgLHAfOxX5x9nwf5/KCWtWa6oLUKiLiNscLu3HjxmFZ7X87yF//+lenD9Vp3YGFDTfTqdVMajXQdafZno39SfYmUataU10KtepasSIiIiKG0MJORERExBBa2ImIiIgYQgs7EREREUN06M0TJSUlVFZWuvLApaWl9j4Bd/aYvEob/lSrWdRqpsZWqoBDPg6SCPsa/jS9NSidoFZTHWn/XUPteWdrKBS6NBwOb4vH452Y6lRhwN09Ji+1mkmthgqRlNeA9ERQWoPSCWo1ld2aa1lW2Znu1t4jdvXxeJyioiKi0Wjnh8M++heLxSgC3Nlj8irBviySWs2iVjOVADELmAr083kYr1UBGzG/NSidoFZTHaLxmrT1bd21Q6dio9EoOTk5zoZqofGUbhRwZ4/Jq/HUlVrNolYzNZ1q7gdEfBwkERpPX5neGpROUKvozRMiIiIipnC8sNuyZQv5+flEIhFCoRDFxcXNvj9r1ixCoVCz29VXX93ZeR17H7gNyML+ZPtMYBInvVDaIGpVa6oLROsaYHUr26uxP8n+o4RO4y21qjXVpVCr40uK1dXVMWrUKAoLC5k6dWqr97n66qtZsWJF09fdu3d3+nCdNg37xPQqYAhwANiAfcFx06hVrakuSK0iIm5yvLDLy8sjLy/vjPfp3r07mZmZTh/CNTXAVmATMLZh22DgEp/m8VINalVraqshOK0iIm7z9DV2mzZtYsCAAQwdOpTbbruNw4f9+d/bvRtuxcAxXyZIHLWaSa0iItIejo/YteXqq69m6tSpZGdns3fvXubPn09eXh7btm2jS5cuXj1sq9KAlcCtwHLsd/WNBWYAIxM6iffUqtZUF6RWdgMLWmwz9TO51GomtSYdzxZ2M2bMaPr7hRdeyMiRIzn//PPZtGkTV155pVcPe1rTgMnYp3jKgHXAYuDnwKyET+Mttao11QWmNRs79GT7afy8KrOoVa2pLkVaE/ZxJ0OGDKFfv37s2bMnUQ95ih7AVdgftPoK9n8g7vFtGm+p1UxqNUxXoG+LW7qvE3lHrWZSa9JJ2MLu7bff5vDhwwwaNChRD9mm4UCd30MkiFrNpFYRETmZ41OxtbW1zY6+VVdXU1FRQUZGBhkZGdx3331MmzaNzMxM9u7dy/e+9z0uuOACJk2a5MrgHXEYmA4UYr9GJx0oxz61MyXh03hLrWpNdUFqFRFxm+OFXXl5OePHj2/6eu7cuQAUFBSwbNky3njjDVatWkVNTQ2RSISJEyfywAMP+PJZdr2BMcCjwF7gOHAu9ouz5yd8Gm+pVa2pLkitIiJuc7ywGzduHJZ1+reDrF+/3umuXdcdWNhwM51azaRWA113mu3Z2J9kbxK1qjXVpVCrrhUrIiIiYggt7EREREQMoYWdiIiIiCG0sBMRERExRIfePFFSUkJlZaUrD1xaWmrvE3Bnj8mrtOFPtZpFrWZqbKUKOOTjIImwr+FP01uD0glqNdWR9t81dKZ3tjbdKRS6NBwOb4vH452Y6lRhwN09Ji+1mkmthgqRlNeA9ERQWoPSCWo1ld2aa1lW2Znu1t4jdvXxeJyioiKi0Wjnh8M++heLxSgC3Nlj8irBviySWs2iVjOVADELmAr083kYr1UBGzG/NSidoFZTHaLxmrT1bd21Q6dio9EoOTk5zoZqofGUbhRwZ4/Jq/HUlVrNolYzNZ1q7gdEfBwkERpPX5neGpROUKvozRMiIiIipnC8sNuyZQv5+flEIhFCoRDFxcWn3KeyspJrrrmGPn360KtXLy6++GL27dt36s4S4H3gNiAL+5PtM4FJnPRCaYOoVa2pLhCta4DVrWyvxv4k+48SOo231KrWVJdCrY4vKVZXV8eoUaMoLCxk6tSpp3x/7969XHHFFdx8883cd999nHXWWfzpT3+iR48enRrYqWnYJ6ZXAUOAA8AG7AuOm0atak11QWoVEXGT44VdXl4eeXl5p/3+//t//49/+Zd/YfHixU3bzj//fKcP1yk1wFZgEzC2Ydtg4BJfpvFWDWpVa2qrITitIiJu8+Q1dvF4nN///vd85jOfYdKkSQwYMIAxY8a0ero2EXo33IqBY75MkDhqNZNaRUSkPRwfsTuTgwcPUltby8MPP8yDDz7IokWLeP7555k6dSobN25k7Nixbe/ERWnASuBWYDn2u/rGAjOAkQmdxHtqVWuqC1Iru4EFLbaZ+plcajWTWpOOJwu7xg8ynjJlCnfeeScAn/vc53jllVdYvnx5whd2YL9mZzL2KZ4yYB2wGPg5MCvh03hLrWpNdYFpzcYOPdl+Gj+vyixqVWuqS5FWT07F9uvXj7S0NIYPH95sezQa9e1dsQA9gKuwP2j1Fez/QNzj2zTeUquZ1GqYrkDfFrd0XyfyjlrNpNak48nCrlu3blx88cXs2rWr2fbdu3czePBgLx7SkeFAnd9DJIhazaRWERE5meNTsbW1tezZs6fp6+rqaioqKsjIyCArK4u77rqLr3zlK3zxi19k/PjxPP/88/zud79j06ZNbszdIYeB6UAh9mt00oFy7FM7UxI+jbfUqtZUF6RWERG3OV7YlZeXM378+Kav586dC0BBQQErV67kuuuuY/ny5SxcuJA77riDoUOH8tvf/pYrrrii81N3UG9gDPAosBc4DpyL/eLs+QmfxltqVWuqC1KriIjbHC/sxo0bh2Wd+e0ghYWFFBYWOn0I13QHFjbcTKdWM6nVQNedZns29ifZm0Stak11KdSqa8WKiIiIGEILOxERERFDaGEnIiIiYggt7EREREQMoYWdiIiIiCE69K7YkpISKisrXXng0tJSe5+AO3tMXqUNf6rVLGo1U2MrVcAhHwdJhMYLAZneGpROUKupjrT/rqG2PrIEIBQKXRoOh7c1XgPWLWHA3T0mL7WaSa1mUqt5gtIJwWolBLS9jDGD3ZprWVbZme7W3iN29fF4nKKiIqLRaOeHwz76F4vFKALc2WPyKsG+3qVazaJWM6nVPEHphAC2WsBUoJ/Pw3jtEPAMAPVt3bVDp2Kj0Sg5OTnOhmqh8ZRuFHBnj8mr8dSVWs2iVjOp1TxB6YRgttIPiPg4SJLRmydEREREDOF4Ybdlyxby8/OJRCKEQiGKi4ubfT8UCrV6+8///M/OzuzI+8BtQBb2JYsygUmc9EJpg6hVralOrea1BqUT1Gpk6xpgdSvbq7EvKfZRQqc5I8fXiq2rq2PUqFEUFhYyderUU77/7rvvNvt63bp13HzzzUybNs3pQ3bKNOwT06uAIcABYANw2JdpvKVWtaY6tZrXGpROUKupranC8cIuLy+PvLy8034/MzOz2dfPPvss48ePZ8iQIU4f0rEaYCuwCRjbsG0wcEnCJ/FeDWpVa2qrQa2mtdYQjE5Qq6mtqSQhr7E7cOAAv//977n55psT8XCn6N1wKwaO+TJB4qjVTGo1U1Bag9IJahX/JWRht2rVKtLT01s9ZZsIacBK7EPFZwOXA/OBN3yZxltqVWuqU6t5rUHpBLWa2grAbmBBi9uvfJ2oVQlZ2P33f/83M2fOpEePHol4uFZNA94B1gJXYx86zsH+oTSNWtWa6tRqXmtQOkGtpraSDXyzxe0aXydqlecLu61bt7Jr1y5uueUWrx+qTT2Aq7A/vPEVYBZwj58DeUitZlKrmYLSGpROUKuRrV2Bvi1u6b5O1CrPF3ZPPPEEn//85xk1apTXD9Vhw4E6v4dIELWaSa1mCkprUDpBrZI4jt8VW1tby549e5q+rq6upqKigoyMDLKysgA4evQoTz/9ND/84Q87P2knHAamA4XASOwFdjmwGJji41xeUKtaU51azWsNSieo1dTWVOJ4YVdeXs748eObvp47dy4ABQUFrFy5EoCnnnoKy7K44YYbOjdlJ/UGxgCPAnuB48C5wK3YL/Q0iVrVmurUal5rUDpBraa2phLHC7tx48ZhWdYZ7/P1r3+dr3/9604fwjXdgYUNN9Op1UxqNVNQWoPSCWo11nWn2Z6NfeWJJKJrxYqIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAzRoTdPlJSUUFlZ6coDl5aW2vsE3Nlj8ipt+FOtZlGrmdRqnqB0QjBbqQIO+ThIIhxp/11Dbb2zFSAUCl0aDoe3xePxTkx1qjDg7h6Tl1rNpFYzqdU8QekEtZqqoTXXsqyyM92vvUfs6uPxOEVFRUSj0c5Ph330LxaLUQS4s8fkVYJ9qRW1mkWtZlKreYLSCWo1VSVwo/3X+rbu26FTsdFolJycHEdDtdR4SjeKfcFgkzUeDlerWdRqJrWaJyidoFbRmydEREREjOF4Ybdlyxby8/OJRCKEQiGKi4ubfb+2tpbbb7+dc845h549ezJ8+HCWL1/e2Xkdex+4DcjC/rTsTGASJ7340iBqVWuqU6t5rUHpBLWq1V+OLylWV1fHqFGjKCwsZOrUqad8f+7cubz00ksUFRVx3nnn8T//8z9861vfIhKJcM0113RqaCemYZ+YXgUMAQ4AG7AvYmwatao11anVvNagdIJa1eovxwu7vLw88vLyTvv9V155hYKCAsaNGwfY14392c9+xmuvvZbwhV0NsBXYBIxt2DYYuCShUyRGDWpVa2qrQa2mtdYQjE5Qq1r959lr7C677DLWrl3L/v37sSyLjRs3snv3biZOnOjVQ55W74ZbMXAs4Y+eWGo1k1rNFJTWoHSCWk2VSq2eLex+8pOfMHz4cM455xy6devG1VdfzdKlS/niF7/o1UOeVhqwEvvw6dnA5cB84I2ET+I9tao11anVvNagdIJa1eo/Txd2ZWVlrF27lh07dvDDH/6Q2bNn8+KLL3r1kGc0DXgHWAtcjX04NQf7iTKNWtWa6tRqXmtQOkGtavWZZVlt3rBnt3bs2GG1BrDWrFnT9PWHH35ode3a1Xruueea3e/mm2+2Jk2aZFmWZRUVFVmAtQMsy6fbzWBlJeBxikCtalWrWtUa0E61qrWztx0NrUBOW2s2T47YHT9+nOPHjxMON999ly5dcPuyZJ0xHKjze4gEUauZ1GqmoLQGpRPUaqpkbHX8rtja2lr27NnT9HV1dTUVFRVkZGSQlZXF2LFjueuuu+jZsyeDBw9m8+bN/OIXv+CRRx5xZfCOOAxMBwqBkUA6UA4sBqYkfBpvqVWtqU6t5rUGpRPUqlb/OV7YlZeXM378+Kav586dC0BBQQErV67kqaeeYt68ecycOZO///3vDB48mAULFvDNb36z81N3UG9gDPAosBc4DpwL3Ir94keTqFWtqU6t5rUGpRPUqlb/OV7YjRs3jobX37UqMzOTFStWON29q7oDCxtuplOrmdRqpqC0BqUT1GqqVGrVtWJFREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG6NCbJ0pKSqisrHTlgUtLS+19Au7sMXmVNvypVrOo1UxqNU9QOkGtpqruwH1DZ3pna9OdQqFLw+HwNrc/XDgMJM/HFXtLrWZSq5nUap6gdIJaTdXQmmtZVtmZ7tfeI3b18XicoqIiotFo56fDPvoXi8UoAtzZY/IqAWKgVsOo1UxqNU9QOkGtpqoEbrT/Wt/WfTt0KjYajZKTk+NoqJYaT+lGsS9Ea7LGQ8RqNYtazaRW8wSlE9QqevOEiIiIiDEcL+y2bNlCfn4+kUiEUChEcXFxs+8fOHCAWbNmEYlE+MQnPsHVV19NVVVVZ+d17H3gNiAL+xOkM4FJ/N+LL02iVrWmOrWa1xqUTlCrWv3l+JJidXV1jBo1isLCQqZOndrse5Zlce2119K1a1eeffZZzjrrLB555BEmTJjAn//8Z3r16tXpwTtqGvaJ6VXAEOAAsAH7wr6mUataU51azWsNSieoVa0+syyrzRv26Wtrx44dVmsAa82aNU1f79q1ywKsnTt3Nm07ceKE1b9/f+vxxx+3LMuyioqKLMDaAZbl8e0IWIC1KQGP1dqtqOHx1apWtao1iK1B6VSrWr267Wh4fCCnrTWbJ6+xO3bsGAA9evRo2hYOh+nevTsvv/yyFw95Rr0bbsXAsYQ/emKp1UxqNVNQWoPSCWo1VSq1erKwGzZsGFlZWcybN48jR45QX1/PokWLePvtt3n33Xe9eMgzSgNWYh8+PRu4HJgPvJHwSbynVrWmOrWa1xqUTlCrWv3nycKua9euPPPMM+zevZuMjAw+8YlPsHHjRvLy8giH/Xkj7jTgHWAtcDWwCfv88kpfpvGWWtWa6tRqXmtQOkGtavWZF6+xO1lNTY118OBBy7Is65JLLrG+9a1vJfw1dqe73QxWlmGvA1CrWtWq1lRpDUqnWtXa2Zvvr7E7WZ8+fejfvz9VVVWUl5czZcoUrx+y3YYDdX4PkSBqNZNazRSU1qB0glpNlYytjj/upLa2lj179jR9XV1dTUVFBRkZGWRlZfH000/Tv39/srKyePPNN/nOd77Dtddey8SJE10ZvCMOA9OBQmAkkA6UA4uB5FlmukOtak11ajWvNSidoFa1+s/xwq68vJzx48c3fT137lwACgoKWLlyJe+++y5z587lwIEDDBo0iK9+9avEYrHOT+xAb2AM8CiwFzgOnAvciv3iR5OoVa2pTq3mtQalE9Sq1iTgxmvsnEiG19gl6pYMrwNQq1rVqtagtgalU63m3pLqNXYiIiIikhha2ImIiIgYQgs7EREREUNoYSciIiJiiA69K7akpITKykpXHri0tNTeJ+DOHpNXacOfajWLWs2kVvMEpRPUaqrqDtw31PCu1zPfKRS6NBwOb4vH486nakUYcHePyUutZlKrmdRqnqB0glpN1dCaa1lW2Znu194jdvXxeJyioiKi0Wjnp8M++heLxSgC3Nlj8ioBYqBWw6jVTGo1T1A6Qa2mqgRutP9a39Z9O3QqNhqNkpOT42iolhpP6UaxPyTPZI2HiNVqFrWaSa3mCUonqFX05gkRERERY2hhJyIiImIIRwu7hQsXcvHFF5Oens6AAQO49tpr2bVrV7P7fPzxx8yePZu+ffvSu3dvpk2bxoEDB1wZ2on3gduALKA7kAlM4v/eVWMStao11anVvNagdIJa1eqvDr3GrtHmzZuZPXs2F198Mf/85z+ZP38+EydO5M9//jO9evUC4M477+T3v/89Tz/9NH369OH2229n6tSpTR9zkmjTsF9xuAoYAhwANgCHfZnGW2pVa6pTq3mtQekEtarVZ21dTLbh41ByAGvHjh1Waw4ePGgB1ubNmy3Lsqyamhqra9eu1tNPP910n8rKSguwtm3bZlmWZRUVFSXs4r1HGi6euykAFypWq1rVqtZkaw1Kp1rV6tVtR8PjAzltrdlceY3dBx98AEBGRgYAO3bs4Pjx40yYMKHpPsOGDSMrK4tt27a58ZAd0rvhVgwcS/ijJ5ZazaRWMwWlNSidoFZTpVJrpxd28XicOXPmcPnll/PZz34WgPfee49u3bpx9tlnN7vvwIEDee+99zr7kB2WBqzEPnx6NnA5MB94I+GTeE+tak11ajWvNSidoFa1+q/TC7vZs2ezc+dOnnrqKTfm8cw04B1gLXA1sAn7/PJK/0byjFrVmurUal5rUDpBrWr1WWdeYzd79mzrnHPOsf7yl780275hwwYLsI4cOdJse1ZWlvXII48k/DV2p7vdDFaWYa8DUKta1arWVGkNSqda1drZm+evsbMsi9tvv501a9bw0ksvkZ2d3ez7n//85+natSsbNmxo2rZr1y727dtHbm6uk4f0xHCgzu8hEkStZlKrmYLSGpROUKupkrHV0cedzJ49myeffJJnn32W9PT0ptfN9enTh549e9KnTx9uvvlm5s6dS0ZGBmeddRbf/va3yc3N5dJLL3U1oD0OA9OBQmAkkA6UA4uBKQmfxltqVWuqU6t5rUHpBLWq1X+OFnbLli0DYNy4cc22r1ixglmzZgHw6KOPEg6HmTZtGseOHWPSpEn813/9V6eGdao3MAZ4FNgLHAfOBW7FfvGjSdSq1lSnVvNag9IJalWr/xwt7Bped3dGPXr0YOnSpSxdutTJQ7iqO7Cw4WY6tZpJrWYKSmtQOkGtpkqlVl0rVkRERMQQWtiJiIiIGEILOxERERFDaGEnIiIiYogOvXmisrLStQeurq629+naHpNXdcOfajWLWs2kVvMEpRPUaqqONIba8w7XUCiUFQ6Hd8Xj8R7OxzpVGIi7ucMkplYzqdVMajVPUDpBraYKw8dxGGpZ1r4z3a9dCzuwF3dAPzeGO0k3oN7lfSYrtZpJrWZSq3mC0glqNdWhthZ10IGFnYiIiIgkN715QkRERMQQWtiJiIiIGEILOxERERFDaGEnIiIiYggt7EREREQMoYWdiIiIiCG0sBMRERExhBZ2IiIiIobQwk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImIILexEREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG0MJORERExBBa2ImIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIIbSwExERETGEFnYiIiIihtDCTkRERMQQWtiJiIiIGEILOxERERFDaGEnIiIiYggt7EREREQMoYWdiIiIiCG0sBMRERExhBZ2IiIiIobQwk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImIILexEREREDKGFnYiIiIghtLATERERMcT/B73X1paF7bdVAAAAAElFTkSuQmCC\n" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Test Best Policy on New Episodes" + ], + "metadata": { + "id": "NPbcI9qNF_F9" + } + }, + { + "cell_type": "code", + "source": [ + "env = BlackjackEnv()\n", + "NUM_EPISODES = 100000\n", + "\n", + "for _ in range(NUM_EPISODES):\n", + " state = env.reset()\n", + " while env.done == False:\n", + " if state[0] == 19:\n", + " # Player was dealt with a Blackjack\n", + " next_state, reward, env.done, info = env.step(1)\n", + " total_rewards += reward\n", + " else:\n", + " Q_index = get_Q_state_index(state)\n", + " action = new_Q_binary[Q_index]\n", + " new_state, reward, done, desc = env.step(action)\n", + " state = new_state\n", + " total_rewards += reward\n", + "avg_reward = total_rewards / NUM_EPISODES\n", + "print(f'Average Reward: {avg_reward}')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "7sTNqbeZga_3", + "outputId": "97878c73-7419-41d4-d600-68bfd314b8f4" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Average Reward: -4.677\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "WL-NmCiphQiq" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Credits" + ], + "metadata": { + "id": "73y12ontGQRk" + } + }, + { + "cell_type": "markdown", + "source": [ + "The code is work of Mr. Adithya Solai.\n", + "
\n", + "The conversion from Python to Python Notebook is work of Mr. Sushant Nair.\n", + "
\n", + "The inspiration for making this Python Notebook as part of the RL Series is Mr. Terrence Ou\n", + "
\n", + "The code is further explained in the following Medium articles by Mr. Adithya Solai. GitHub Repo link is also provided.\n", + "
\n", + "1.\thttps://towardsdatascience.com/cracking-blackjack-part-1-31da28aeb4bb\n", + "2.\thttps://towardsdatascience.com/cracking-blackjack-part-2-75e32363e38\n", + "3.\thttps://towardsdatascience.com/cracking-blackjack-part-3-8fd3a5870efd\n", + "4.\thttps://towardsdatascience.com/cracking-blackjack-part-4-8b4a9caa38eb\n", + "5.\thttps://towardsdatascience.com/cracking-blackjack-part-5-70bd2f726133\n", + "\n", + "https://github.com/adithyasolai/Monte-Carlo-Blackjack/blob/master/MC_Blackjack_Full.ipynb\n" + ], + "metadata": { + "id": "BiGQjzloGSIX" + } + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "btiWK3iZHFi5" + }, + "execution_count": null, + "outputs": [] + } + ], + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4", + "include_colab_link": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU" + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file From 5fa5ef88336e6fba1a36f26a7ad44df9b1650be5 Mon Sep 17 00:00:00 2001 From: Sushant Nair Date: Tue, 28 Jan 2025 23:47:15 +0530 Subject: [PATCH 7/8] Delete chapter_05_monte_carlo_methods/Monte_Carlo_Blackjack_Demonstration.ipynb --- .../Monte_Carlo_Blackjack_Demonstration.ipynb | 1081 ----------------- 1 file changed, 1081 deletions(-) delete mode 100644 chapter_05_monte_carlo_methods/Monte_Carlo_Blackjack_Demonstration.ipynb diff --git a/chapter_05_monte_carlo_methods/Monte_Carlo_Blackjack_Demonstration.ipynb b/chapter_05_monte_carlo_methods/Monte_Carlo_Blackjack_Demonstration.ipynb deleted file mode 100644 index 8c4a24e..0000000 --- a/chapter_05_monte_carlo_methods/Monte_Carlo_Blackjack_Demonstration.ipynb +++ /dev/null @@ -1,1081 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, - "source": [ - "\"Open" - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Installing Dependencies" - ], - "metadata": { - "id": "ptQKksAbExoX" - } - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "JuDWA5loOsFM", - "outputId": "b9cdd35b-2703-4828-ad14-494bafe11476" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Collecting gymnasium\n", - " Downloading gymnasium-0.29.1-py3-none-any.whl.metadata (10 kB)\n", - "Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (1.26.4)\n", - "Requirement already satisfied: cloudpickle>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (2.2.1)\n", - "Requirement already satisfied: typing-extensions>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (4.12.2)\n", - "Collecting farama-notifications>=0.0.1 (from gymnasium)\n", - " Downloading Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)\n", - "Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)\n", - "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/953.9 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[90m╺\u001b[0m\u001b[90m━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m501.8/953.9 kB\u001b[0m \u001b[31m14.6 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m953.9/953.9 kB\u001b[0m \u001b[31m17.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)\n", - "Installing collected packages: farama-notifications, gymnasium\n", - "Successfully installed farama-notifications-0.0.4 gymnasium-0.29.1\n" - ] - } - ], - "source": [ - "!pip install gymnasium" - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Importing Dependencies" - ], - "metadata": { - "id": "q6ml-ZqBE1DP" - } - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "cbt-NEiMO1bA" - }, - "outputs": [], - "source": [ - "import random\n", - "import numpy as np\n", - "import gymnasium as gym\n", - "from gym import error, spaces, utils\n", - "from gym.utils import seeding\n", - "import enum\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import time" - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Set up the game" - ], - "metadata": { - "id": "RSlZaAJ3E4YL" - } - }, - { - "cell_type": "code", - "source": [ - "# Define the rank and suit of a card\n", - "\n", - "ranks = {\n", - " \"two\": 2,\n", - " \"three\": 3,\n", - " \"four\": 4,\n", - " \"five\": 5,\n", - " \"six\": 6,\n", - " \"seven\": 7,\n", - " \"eight\": 8,\n", - " \"nine\": 9,\n", - " \"ten\": 10,\n", - " \"jack\": 10,\n", - " \"queen\": 10,\n", - " \"king\": 10,\n", - " \"ace\": (1, 11),\n", - "}\n", - "\n", - "class Suit(enum.Enum):\n", - " spades = \"spades\"\n", - " clubs = \"clubs\"\n", - " diamonds = \"diamonds\"\n", - " hearts = \"hearts\"\n", - "\n", - "# Now to define the card and deck\n", - "# Implement the shuffle, peek and deal functions for the deck\n", - "\n", - "class Card:\n", - " def __init__(self, suit, rank, value):\n", - " self.suit = suit\n", - " self.rank = rank\n", - " self.value = value\n", - "\n", - " def __str__(self):\n", - " return self.rank + \" of \" + self.suit.value\n", - "\n", - "class Deck:\n", - " def __init__(self, num=1):\n", - " self.cards = []\n", - " for i in range(num):\n", - " for suit in Suit:\n", - " for rank, value in ranks.items():\n", - " self.cards.append(Card(suit, rank, value))\n", - "\n", - " def shuffle(self):\n", - " random.shuffle(self.cards)\n", - "\n", - " def deal(self):\n", - " return self.cards.pop(0)\n", - "\n", - " def peek(self):\n", - " if len(self.cards) > 0:\n", - " return self.cards[0]\n", - "\n", - " def add_to_bottom(self, card):\n", - " self.cards.append(card)\n", - "\n", - " def __str__(self):\n", - " result = \"\"\n", - " for card in self.cards:\n", - " result += str(card) + \"\\n\"\n", - " return result\n", - "\n", - " def __len__(self):\n", - " return len(self.cards)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "D3df2N-JrKiE", - "outputId": "a36dc6fc-51b2-4fb0-bfb5-380e143cc4b4" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n", - " and should_run_async(code)\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Define the logic for evaluating the value of the player's hand" - ], - "metadata": { - "id": "jLdAUSMgFPIi" - } - }, - { - "cell_type": "code", - "source": [ - "'''\n", - "An ace is used as 11 whenever possible. It is 1 only if the other option would make the player bust.\n", - "'''\n", - "\n", - "def player_eval(player_hand):\n", - " num_ace = 0\n", - " # use_one means that every ace in the hand is counted as one\n", - " use_one = 0\n", - " for card in player_hand:\n", - " if card.rank == \"ace\":\n", - " num_ace += 1\n", - " use_one += card.value[0] # use 1 for Ace\n", - " else:\n", - " use_one += card.value\n", - "\n", - " if num_ace > 0:\n", - " # Define player policy for Aces:\n", - " # Make Aces 11 if they get you to the range [18, 21]\n", - " # Otherwise, use one.\n", - "\n", - " ace_counter = 0\n", - " while ace_counter < num_ace:\n", - " # Only add by 10 because 1 is already added before\n", - " use_eleven = use_one + 10\n", - " if use_eleven > 21:\n", - " return use_one\n", - " elif use_eleven >= 18 and use_eleven <= 21:\n", - " return use_eleven\n", - " else:\n", - " # This allows for some Aces to be 11 and others to be 1.\n", - " use_one = use_eleven\n", - " ace_counter += 1\n", - " return use_one\n", - " else:\n", - " return use_one" - ], - "metadata": { - "id": "SWrENBc-vEUL" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# Define the logic for evaluating the value of the dealer's hand" - ], - "metadata": { - "id": "T9tDhTf0FWUV" - } - }, - { - "cell_type": "code", - "source": [ - "# This follows the same, official rules every time.\n", - "# Still there is a need to figure out what happens if there are multiple Aces.\n", - "def dealer_eval(player_hand):\n", - " num_ace = 0\n", - " use_one = 0\n", - " for card in player_hand:\n", - " if card.rank == \"ace\":\n", - " num_ace += 1\n", - " use_one += card.value[0] # use 1 for Ace\n", - " else:\n", - " use_one += card.value\n", - "\n", - " if num_ace > 0:\n", - " # See if using 11 instead of 1 for the Ace gets the dealer's hand value closer to the [17, 21] range.\n", - " # The dealer will follow Hard 17 rules. This means that the dealer will not hit again if the Ace yields a 17.\n", - " # This also means that Aces initially declared as 11 can be changed to 1 as new cards come.\n", - "\n", - " ace_counter = 0\n", - " while ace_counter < num_ace:\n", - " # Only add 10 because 1 is already added before\n", - " use_eleven = use_one + 10\n", - "\n", - " if use_eleven > 21:\n", - " return use_one\n", - " elif use_eleven >= 17 and use_eleven <= 21:\n", - " return use_eleven\n", - " else:\n", - " # The case where even using Ace as eleven is less than 17.\n", - " use_one = use_eleven\n", - " ace_counter += 1\n", - " return use_one\n", - " else:\n", - " return use_one" - ], - "metadata": { - "id": "YzDYaCCVx3AH" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# Define logic for the dealer's turn" - ], - "metadata": { - "id": "ips4ASjUFaEg" - } - }, - { - "cell_type": "code", - "source": [ - "def dealer_turn(dealer_hand, deck):\n", - " # Calculate the dealer hand's value.\n", - " dealer_value = dealer_eval(dealer_hand)\n", - "\n", - " # Define dealer policy (which is fixed to the official rules)\n", - " # The dealer keeps hitting until their total is 17 or more\n", - " while dealer_value < 17:\n", - " dealer_hand.append(deck.deal())\n", - " dealer_value = dealer_eval(dealer_hand)\n", - "\n", - " return dealer_value, dealer_hand, deck" - ], - "metadata": { - "id": "YetM4Xsixlrf" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# Define the OpenAI Gym Environment for Blackjack" - ], - "metadata": { - "id": "jUQRNbyAFd9-" - } - }, - { - "cell_type": "code", - "source": [ - "INITIAL_BALANCE = 1000\n", - "NUM_DECKS = 6\n", - "\n", - "class BlackjackEnv(gym.Env):\n", - " metadata = {'render.modes': ['human']}\n", - "\n", - " def __init__(self):\n", - " super(BlackjackEnv, self).__init__()\n", - "\n", - " # Initialize the blackjack deck\n", - " self.bj_deck = Deck(NUM_DECKS)\n", - "\n", - " self.player_hand = []\n", - " self.dealer_hand = []\n", - "\n", - " self.reward_options = {\"lose\": -100, \"tie\": 0, \"win\": 100}\n", - "\n", - " self.action_space = spaces.Discrete(2)\n", - "\n", - " '''\n", - " First element of tuple is the range of possible hand values for the player (3 through 20). This is the possible range of values that the player will actually have to make a decision for. Any player hand value 21 or above already has automatic valuations, and needs no input from an AI agent.\n", - " However, we also need to add all the hand values that the agent could possibly end up in when they bust. Maybe the agent can glean some correlations based on what hand value they bust at, so this should be in the observation space. Also, the layout of OpenAI Gym environment class makes us have to include the bust-value in the step() function because we need to return that done is true alongside the final obs, which is the bust-value.\n", - " '''\n", - " # Second element of the tuple is the range of possible values for the dealer's upcard (2 through 11)\n", - " self.observation_space = spaces.Tuple((spaces.Discrete(18), spaces.Discrete(10)))\n", - " self.done = False\n", - "\n", - " def _take_action(self, action):\n", - " if action == 0: #hit\n", - " self.player_hand.append(self.bj_deck.deal())\n", - " # recalculate the value of the player's hand after any changes to the hand\n", - " self.player_value = player_eval(self.player_hand)\n", - "\n", - " def step(self, action):\n", - " self._take_action(action)\n", - "\n", - " # End the episode/game if the player stands or has a hand value >= 21.\n", - " self.done = action == 1 or self.player_value >= 21\n", - "\n", - " # rewards are 0 when the player hits and is still below 21, and they keep playing\n", - " rewards = 0\n", - "\n", - " if self.done:\n", - " # Calculate rewards\n", - " if self.player_value > 21:\n", - " rewards = self.reward_options['lose']\n", - " elif self.player_value == 21:\n", - " rewards = self.reward_options['win']\n", - " else:\n", - " # begin the dealer turn phase\n", - " dealer_value, self.dealer_hand, self.bj_deck = dealer_turn(self.dealer_hand, self.bj_deck)\n", - " # End the dealer turn phase\n", - "\n", - " # Final comparison\n", - " if dealer_value > 21:\n", - " rewards = self.reward_options['win']\n", - " elif dealer_value == 21:\n", - " rewards = self.reward_options['lose']\n", - " else:\n", - " # both dealer and player value is less than 21\n", - " if self.player_value > dealer_value:\n", - " rewards = self.reward_options['win']\n", - " elif self.player_value < dealer_value:\n", - " rewards = self.reward_options['lose']\n", - " else:\n", - " rewards = self.reward_options['tie']\n", - " self.balance += rewards\n", - " # Subtract by 1 to fit into the possible observation range.\n", - " # This makes the possible range of 3 through 20 into 1 through 18.\n", - " player_value_obs = self.player_value - 2\n", - " # Get the value of the dealer's upcard; this value is what the agent sees.\n", - " # Subtract by 1 to fit into the possible observation range of 1 to 10\n", - " upcard_value_obs = dealer_eval([self.dealer_upcard]) - 1\n", - " # The state is represented as a player hand-value + dealer upcard pair.\n", - " obs = np.array([player_value_obs, upcard_value_obs])\n", - " return obs, rewards, self.done, {}\n", - "\n", - " def reset(self):\n", - " # reset the game to an initial state\n", - " # add the player and dealer cards back into the deck\n", - " self.bj_deck.cards += self.player_hand + self.dealer_hand\n", - "\n", - " # shuffle before beginning. Only shuffle once before the start of each game.\n", - " self.bj_deck.shuffle()\n", - " self.balance = INITIAL_BALANCE\n", - " self.done = False\n", - "\n", - " # returns the start stage for the agent\n", - " # deal 2 cards to the agent and the dealer\n", - " self.player_hand = [self.bj_deck.deal(), self.bj_deck.deal()]\n", - " self.dealer_hand = [self.bj_deck.deal(), self.bj_deck.deal()]\n", - " self.dealer_upcard = self.dealer_hand[0]\n", - "\n", - " # calculate the value of the agent's hand\n", - " self.player_value = player_eval(self.player_hand)\n", - "\n", - " # subtract by 2 to fit into the possible observation range. This makes the possible range of 3 through 20 into 1 through 18\n", - " player_value_obs = self.player_value - 2\n", - "\n", - " # get the value of the dealer's upcard; this value is what the agent sees. Subtract by 1 to fit the possible observation range of 1 to 10.\n", - " upcard_value_obs = dealer_eval([self.dealer_upcard]) - 1\n", - "\n", - " # the state is represented as a player hand-value + dealer upcard pair.\n", - " obs = np.array([player_value_obs, upcard_value_obs])\n", - " return obs\n", - "\n", - " def render(self, mode='human', close=False):\n", - " # convert the player hand into a format that is easy to read and understand.\n", - " hand_list = []\n", - " for card in self.player_hand:\n", - " hand_list.append(card.rank)\n", - "\n", - " # recalculate the value of the dealer upcard.\n", - " upcard_value = dealer_eval([self.dealer_upcard])\n", - "\n", - " print(f'Balance: {self.balance}')\n", - " print(f'Player Hand: {hand_list}')\n", - " print(f'Dealer Upcard: {upcard_value}')\n", - " print(f'Done: {self.done}')" - ], - "metadata": { - "id": "am91D0LgqNnf" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# Test the OpenAI Gym Blackjack Environment" - ], - "metadata": { - "id": "BHiry8g3FiEL" - } - }, - { - "cell_type": "code", - "source": [ - "env = BlackjackEnv()\n", - "\n", - "total_rewards = 0\n", - "NUM_EPISODES = 1000\n", - "\n", - "for _ in range(NUM_EPISODES):\n", - " env.reset()\n", - " episode_reward = 0\n", - " while env.done == False:\n", - " action = env.action_space.sample()\n", - " new_state, reward, done, desc = env.step(action)\n", - " episode_reward += reward\n", - " total_rewards += episode_reward\n", - "\n", - "avg_reward = total_rewards / NUM_EPISODES\n", - "print(f'Average Reward: {avg_reward}')" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "ttzs7UQb2qCP", - "outputId": "94c0576f-abdf-49ff-eead-38312653e263" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Average Reward: -32.2\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# A function to map a state from the Blackjack environment to the proper index i Q and prob tables" - ], - "metadata": { - "id": "f3Oo6Kz4FldW" - } - }, - { - "cell_type": "code", - "source": [ - "'''\n", - "Given a state, derive the corresponding index in the Q-table. This state is a player hand value and dealer upcard pair, so the hashing formula must be used to allocate the indices of the Q-table properly.\n", - "'''\n", - "def get_Q_state_index(state):\n", - " '''\n", - " The player value is already subtracted by 1 in the env when it returns the state. Subtract by 1 again to fir with the array indexing that starts at 0.\n", - " '''\n", - " initial_player_value = state[0] - 1\n", - " '''\n", - " The upcard value is already subtracted by 1 in the env when it returns the state. dealer_upcard will be subtracted by 1 to fit with the array indexing that starts at 0.\n", - " '''\n", - " dealer_upcard = state[1] - 1\n", - " return(env.observation_space[1].n * (initial_player_value)) + (dealer_upcard)" - ], - "metadata": { - "id": "etl-JD8lHlJB" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# Functions to get and update the probability of taking the best action" - ], - "metadata": { - "id": "TjRTbwhdFo5x" - } - }, - { - "cell_type": "code", - "source": [ - "# for a given state\n", - "def get_prob_of_best_action(env, state, Q, prob):\n", - " # Use the mappig function to figure out which index of Q corresponds to the player hand value and dealer upcard value that defines each state.\n", - " Q_state_index = get_Q_state_index(state)\n", - "\n", - " # Use this index in the Q 2-D array to get a 2-element array that yield the current Q-values for hitting (index 0) and standing (index 1) in this state. Use np.argmax() function to find the index of the action that yields the maximum rewards, i.e., the best action we are looking for.\n", - " best_action = np.argmax(Q[Q_state_index])\n", - "\n", - " # Retrieve the probability of the best action using the state/action pair as indices of the 'prob' array, which stores the probability of taking an action (hit or stand) for a given state/action pair.\n", - " return prob[Q_state_index][best_action]\n", - "\n", - "def update_prob_of_best_action(env, state, Q, prob, epsilon):\n", - " Q_state_index = get_Q_state_index(state)\n", - "\n", - " best_action = np.argmax(Q[Q_state_index])\n", - "\n", - " '''\n", - " Slightly alter the probability of this best action being taken by using epsilon. Epsilon starts at 1.0, and slowly decays over time. Therefore, as per the equation below, the AI agent will use the probability listed for the best action in the 'prob' table during the beginning of the algorithm. As time goes on, the likelihood that the best action is taken is increased from what is listed in the 'prob' table.\n", - " This allows for the exploration of other moves in the beginning of the algorithm, but exploitation later for a greater reward.\n", - " '''\n", - " prob[Q_state_index][best_action] = min(1, prob[Q_state_index][best_action] + 1 - epsilon)\n", - "\n", - " other_action = 1 if best_action == 0 else 0\n", - " prob[Q_state_index][other_action] = 1 - prob[Q_state_index][best_action]\n", - "\n", - " return prob" - ], - "metadata": { - "id": "qazyxoCbYLhz" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# play_game() function" - ], - "metadata": { - "id": "D89gCgidFruP" - } - }, - { - "cell_type": "code", - "source": [ - "'''\n", - "Simulates one round of Blackjack.\n", - "\n", - "First, use env.reset() to set up a new round of Blackjack.\n", - "\n", - "If the player is already dealt a blackjack by luck, then this episode is just scrapped. There is nothing the AI agent can learn from this episode since it did not have to make any decisions.\n", - "\n", - "Otherwise, the AI agent finds the best action available given the current state of the game and its knowledge so far. This knowledge is stored in the Q-table defined in the run_mc() function. Then, the AI agent retrieves the probability that it should take this action from the prob table. The AI agent applies this probability and chooses its action given this state. The action is sent to the environment, and a reward is returned. This state-action-reward sequence that just occured is stored in episode variable as a tuple. This process is repeated until the current episode is over. Each episode will roughly rield 1-3 tuples in episode variable since Blackjack rounds are usually resolved after 1-3 decisions by the player.\n", - "\n", - "The Q-values in the Q-table associated with each state-action pair that was seen in this episode will be updated after this episode based on the state-action-reward tuples returned by this function. Then, the corresponding probabilities in prob variable are also modified to reflect this change in Q-values.\n", - "'''\n", - "\n", - "def play_game(env, Q, prob):\n", - " '''\n", - " Can contain numerous state->action->reward tuples bacause a round of Blackjack is not always resolved in one turn. However, there will be no state that has a player hand value that exceeds 20, since only initial states BEFORE actions are made are used when storing state->action->reward tuples.\n", - " '''\n", - " episode = []\n", - " state = env.reset()\n", - " while env.done == False:\n", - " if state[0] == 19:\n", - " # Player was dealt blackjack right from the beginning; player_value already subtracted by 2 to get state[0]\n", - " # No analysis done for this useless episode\n", - " next_state, reward, env.done, info = env.step(1)\n", - " else:\n", - " # Get the index in Q that corresponds to the current state\n", - " Q_state_index = get_Q_state_index(state)\n", - "\n", - " # Use the index to get the possible actions, and use np.argmax() to get the index of the action that has the highest current Q value. Index 0 is hit, 1 is stand.\n", - " best_action = np.argmax(Q[Q_state_index])\n", - "\n", - " # Go to the prob table to retrieve the probability of this action. This uses the same Q_state_index used for finding the state index of the Q-table.\n", - " prob_of_best_action = get_prob_of_best_action(env, state, Q, prob)\n", - "\n", - " action_to_take = None\n", - "\n", - " if random.uniform(0, 1) < prob_of_best_action:\n", - " # Take the best action\n", - " action_to_take = best_action\n", - " else:\n", - " # Take the other action\n", - " action_to_take = 1 if best_action == 0 else 0\n", - "\n", - " # The action is performed by the agent, and the next state, rewards and done information is returned.\n", - " next_state, reward, env.done, info = env.step(action_to_take)\n", - "\n", - " # Log the state->action->reward sequence\n", - " episode.append((state, action_to_take, reward))\n", - "\n", - " # Update the state for the next decision made by the agent.\n", - " state = next_state\n", - "\n", - " return episode" - ], - "metadata": { - "id": "HmFb1gcWFFcm" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# update_Q() function" - ], - "metadata": { - "id": "MoKz3QUBFuZl" - } - }, - { - "cell_type": "code", - "source": [ - "\n", - "'''\n", - "This function iterates through the state-action-reward tuples in episode and updates the Q-values of the corresponding state-action pairs in Q.\n", - "'''\n", - "\n", - "def update_Q(env, episode, Q, alpha, gamma):\n", - " '''\n", - " This is where the algorithm hinges on being first visit or every visit. First visit will be used because if you want first-visit, you need to use the cumulative reward of the entire episode when updating a Q-value for ALL of the state/action pairs in the episode, even the first state/action pair. In this algorithm, an episode is a round of Blackjack. Although the bulk of the reward may come from the 2nd or 3rd decision, deciding to hit on the 1st decision is what enabled the future situations to even occur, so it is important to include the entire cumulative reward. We can reduce the impact of the rewards of the future decisions by lowering gamma, which will lower the G value for our early state/action pair in which we hit and did not get any immediate rewards. This will make our agent consider future rewards, and not just look at each state in isolation despite having hit previously.\n", - " If you want Every-Visit MC, do not use the cumulative rewards when updating Q-values and just use the immediate reward in this episode for each state/action pair.\n", - " '''\n", - " step = 0\n", - " for state, action, reward in episode:\n", - " # calculate the cumulative reward of taking this action in this state.\n", - " # Start from the immediate rewards, and use all the rewards from the subsequent states.\n", - " # Do not use rewards from previous states\n", - "\n", - " total_reward = 0\n", - " gamma_exp = 0\n", - " for curr_step in range(step, len(episode)):\n", - " curr_reward = episode[curr_step][2]\n", - " total_reward += (gamma ** gamma_exp) * curr_reward\n", - " gamma_exp += 1\n", - "\n", - " # Update the Q-value\n", - " Q_state_index = get_Q_state_index(state)\n", - " curr_Q_value = Q[Q_state_index][action]\n", - " Q[Q_state_index][action] = curr_Q_value + alpha * (total_reward - curr_Q_value)\n", - " # update step to start further down the episode the next time.\n", - " step += 1\n", - " return Q" - ], - "metadata": { - "id": "R08jdGkVbQD0" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# update_prob() function" - ], - "metadata": { - "id": "BcUOuFW2Fwm4" - } - }, - { - "cell_type": "code", - "source": [ - "\n", - "def update_prob(env, episode, Q, prob, epsilon):\n", - " for state, action, reward in episode:\n", - " '''\n", - " Update the probabilities of the actions that can be taken given the current state. The goal is that the new update in Q has changed what the best action is, and epsilon will be used to create a small increase in the probability that the new, better action is chosen.\n", - " '''\n", - " prob = update_prob_of_best_action(env, state, Q, prob, epsilon)\n", - " return prob" - ], - "metadata": { - "id": "4UA7u4nWdRFL" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# First-Visit Monte Carlo Algorithm" - ], - "metadata": { - "id": "rR5AeirSFy5h" - } - }, - { - "cell_type": "code", - "source": [ - "# run_mc() to run the First Visit Monte Carlo Algorithm\n", - "'''\n", - "This function initializes the key variables for the algorithm and runs the learning algorithm for the AI agent for num_episodes episodes. An episode is a simulation of one game of Blackjack using the OpenAI Gym environment defined above.\n", - "Q, or the Q-table, is a 2D list in which the rows are the different possible states and the columns are the different possible actions for each state. The values stored in each cell refer to the value, or Q-value, of selecting some action (the column index) given some state (the row index). These Q-values start at 0 for every state-action pair, and are updated by the update_Q() function after each episode to reflect the rewards received in an episode where the state-action pair occured. All Q-values initially start at 0.\n", - "\n", - "prob, or the prob table, has the same structure as Q, but the cell values refer to the probability of selecting some action (the column index) given some state (the row index). These probabilites are updated by the update_prob() function after the Q-values are updated after each episode. All action probabilities start at 0.5 (or 50%).\n", - "\n", - "alpha defines the weight given to each new change in Q-value within the update-Q() function. A smaller alpha means that a new reward logged in an episode for some state-action pair has less impact on the current Q-value for that state-action pair. The converse is also true. Therefore, alpha essentially defines how fast the AI agent learns (learning rate).\n", - "\n", - "epsilon defines the weight given to each new change in the action probabilities within the update_prob() function. A larger epsilon reduces the precent amount by which an action probability is changed after some change in Q-values. The converse is also true. An epsilon of 1 means that no change will occur in the action probability, regardless of the magnitude of changes in Q-values.\n", - "\n", - "epsilon is decayed by the decay value after every episode. The lowest value epsilon can reach is epsilon_min.\n", - "\n", - "gamma is the rate used to discount future rewards yielded by a certain state-action pair in the episode. Since a round (or episode) of Blackjack can have more than 1 decision made, there can be numerous state-action pairs that are seen in one episode. However, only the final decision (or state-action pair) yields an immediate reward from the environment. All previous state-action pairs had no rewards. So, the final reward must be used to modify the Q-values of the earlier state-action pairs. Since the final reward was only partially made possible by the eariler state-action pairs, the final reward is discounted using gamma to account for this.\n", - "\n", - "Once these important variables are defines, this function runs the First-Visit Monte Carlo algorithm for the Blackjack environment.\n", - "\n", - "The function runs num_episodes episodes.\n", - "\n", - "In each episode, epsilon is first decayed by the decay rate.\n", - "\n", - "Then, the game of Blackjack is played out throught the play_game() function. This function returns a list of the state-action-reward tuples that occured during the game. These tuples represent the actions the AI agent had to take given some state, and the rewards that resulted.\n", - "\n", - "These tuples are used to modify the Q-values in Q through the update_Q() function.\n", - "\n", - "Then, the tuples are used to modify the probability distributions in prob of the two actions (hit or stand) for any states that were encountered in the episode. This is done through the update_prob() function.\n", - "\n", - "After this process is done for each episode, the function returns the modified Q and prob tables. These tables are an imprint of the learning that has taken place by the AI agent through the First-Visit Monte Carlo algorithm.\n", - "'''\n", - "\n", - "def run_mc(env, num_episodes):\n", - " '''\n", - " observation_space[0] is the 18 possible player values. (3-20)\n", - " observation_space[1] is the 10 possible dealer upcards. (2-11)\n", - " Combining these togenther yields all possible states.\n", - " Multiplying this with hit/stand yields all possible state-action pairs.\n", - " This is the Q map.\n", - " '''\n", - " Q = np.zeros([env.observation_space[0].n * env.observation_space[1].n, env.action_space.n], dtype=np.float16)\n", - "\n", - " # This map contains the probability distributions for each action (hit or stand) given a state. The state (combination of player hand value and dealer upcard value) index in this array yields a 2-element array. The 0th index of this 2-element array refers to the probability of hit and the 1st index is the probability of stand.\n", - " prob = np.zeros([env.observation_space[0].n * env.observation_space[1].n, env.action_space.n], dtype=np.float16) + 0.5\n", - "\n", - " # The learning rate; very small to avoid making quick, large changes in our policy.\n", - " alpha = 0.001\n", - "\n", - " epsilon = 1\n", - "\n", - " # The rate by which epsilon will decay over time.\n", - " # Since the probability that decides which option has the highest Q-value is 1 - epsilon + probability, this decay will make sure that the better option is taken more often in the long run.\n", - " # This allows the algorithm to explore in the early stages and exploit in the later stages.\n", - " decay = 0.9999\n", - "\n", - " # The lowest value that epsilon can go to. Although the decay seems slow, it grows exponentially, and this is magnified when running thousands of episodes.\n", - " epsilon_min = 0.9\n", - "\n", - " gamma = 0.8\n", - "\n", - " for _ in range(num_episodes):\n", - " episode = play_game(env, Q, prob)\n", - "\n", - " epsilon = max(epsilon * decay, epsilon_min)\n", - "\n", - " Q = update_Q(env, episode, Q, alpha, gamma)\n", - "\n", - " prob = update_prob(env, episode, Q, prob, epsilon)\n", - "\n", - " return Q, prob" - ], - "metadata": { - "id": "PnRTa7FZ3RnJ" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# Run First-Visit Monte Carlo Reinforcement Learning Algorithm" - ], - "metadata": { - "id": "jdfLT2OvF1sV" - } - }, - { - "cell_type": "code", - "source": [ - "env = BlackjackEnv()\n", - "\n", - "start_time = time.time()\n", - "new_Q, new_prob = run_mc(env, 1000000)\n", - "end_time = time.time()\n", - "\n", - "print(f'Total time for learning: {end_time - start_time} s.')" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "UKx3psLBd3Yl", - "outputId": "2181c312-d684-4525-bb48-ed723de63333" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Total time for learning: 230.13713669776917 s.\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# best_policy() function" - ], - "metadata": { - "id": "qICTdADBF4Eu" - } - }, - { - "cell_type": "code", - "source": [ - "'''\n", - "This function takes the new Q-table returned and keeps only the action that yields the highest value for a given state. This is represented in three different ways: binary, string and colors.\n", - "0 is hit and 1 is stand in the binary representation of the results.\n", - "H is hit and S is stand in the string representation of the results.\n", - "Green is hit and Red is stand in the color representation of the results.\n", - "'''\n", - "def best_policy(Q):\n", - " best_policy_binary = []\n", - " best_policy_string = []\n", - " best_policy_colors = []\n", - " for i in range(len(Q)):\n", - " best_policy_binary.append(np.argmax(Q[i]))\n", - " best_policy_string.append('H' if np.argmax(Q[i]) == 0 else 'S')\n", - " best_policy_colors.append('g' if np.argmax(Q[i]) == 0 else 'r')\n", - "\n", - " return best_policy_binary, best_policy_string, best_policy_colors" - ], - "metadata": { - "id": "MNbxnlFKeRgQ" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# Create DataFrame with Player Value as Rows and Dealer Upcard as Columns" - ], - "metadata": { - "id": "89PKdgPiF6Qx" - } - }, - { - "cell_type": "code", - "source": [ - "new_Q_binary, new_Q_string, new_Q_colors = best_policy(new_Q)\n", - "\n", - "df = pd.DataFrame(columns = range(2, 12))\n", - "\n", - "color_df = pd.DataFrame(columns = range(2, 12))\n", - "\n", - "for s in range(3, 21):\n", - " # possible player values in the range 3 to 20\n", - " start = env.observation_space[1].n * (s - 3)\n", - " end = start + 10\n", - " df.loc[s] = (new_Q_string[start:end])\n", - " color_df.loc[s] = (new_Q_colors[start:end])" - ], - "metadata": { - "id": "lZIV2WxWfR9z" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# Create Chart Graphic for the Results" - ], - "metadata": { - "id": "Lh14aJ8jF8s-" - } - }, - { - "cell_type": "code", - "source": [ - "fig, ax = plt.subplots()\n", - "\n", - "# hide axes\n", - "fig.patch.set_visible(False)\n", - "ax.set_axis_off()\n", - "ax.axis('tight')\n", - "\n", - "ax.table(cellText=df.values, cellColours=color_df.values, cellLoc='center', rowLabels=df.index, colLabels=df.columns, loc='center')\n", - "\n", - "fig.tight_layout()\n", - "\n", - "plt.show()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 487 - }, - "id": "wR8JVHEef5Cf", - "outputId": "02c77629-9a81-4150-9561-c0255ad152ea" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAnYAAAHWCAYAAAD6oMSKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABM9UlEQVR4nO3de3xU9Z3/8dcM4VaI2HALUw0GbWGgQklVjNoCFcEsjSgsFhdtaNS2FmuRrW3ht+MdEfZRpRcWqnWBNhUftZVIbZBV5GYkSrCp0mYh0FQqKggl0kQllDm/P06SJSGQ5OScOTPf834+HvOAnMye+bx2Evz2nJk5IcuyEBEREZHUF/Z7ABERERFxhxZ2IiIiIobQwk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImIILexEREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG0MJORERExBBa2ImIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIIbSwExERETGEFnYiIiIihtDCTkRERMQQWtiJiIiIGEILOxERERFDaGEnIiIiYggt7EREREQMoYWdiIiIiCG0sBMRERExhBZ2IiIiIobQwk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImIILexEREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG0MJORERExBBa2ImIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIIdL8fPBQKJQF9PNzhgTqBtT7PUSCqNVMajVTUFqD0glqNdUhy7L2tXUn3xZ2oVAoKxwO74rH4z38miGRwuEw8Xjc7zESQq1mUquZgtIalE5Qq6nC4fDHoVBoaFuLOz+P2PWLx+M9ioqKiEajPo7hvZKSEmKxGGo1i1rNpFbzBKUT1GqqyspKbrzxxh7YZzmTdmEHQDQaJScnx+8xPFVZWQmo1TRqNZNazROUTlCr6M0TIiIiIsYIxMJu4cKFXHzxxaSnpzNgwACuvfZadu3a5fdYnli2bBkjR47krLPO4qyzziI3N5d169b5PVZCPPzww4RCIebMmeP3KK679957CYVCzW7Dhg3zeyzP7N+/nxtvvJG+ffvSs2dPLrzwQsrLy/0ey3XnnXfeKc9rKBRi9uzZfo/muhMnThCLxcjOzqZnz56cf/75PPDAA1iW5fdorvvHP/7BnDlzGDx4MD179uSyyy5j+/btfo/VaVu2bCE/P59IJEIoFKK4uLjZ9y3L4u6772bQoEH07NmTCRMmUFVV5c+wndRW6zPPPMPEiRPp27cvoVCIiooKX+ZsTSAWdps3b2b27NmUlZXxwgsvcPz4cSZOnEhdXZ3fo7nunHPO4eGHH2bHjh2Ul5fzpS99iSlTpvCnP/3J79E8tX37dn72s58xcuRIv0fxzIgRI3j33Xebbi+//LLfI3niyJEjXH755XTt2pV169bx5z//mR/+8Id88pOf9Hs0123fvr3Zc/rCCy8AMH36dJ8nc9+iRYtYtmwZP/3pT6msrGTRokUsXryYn/zkJ36P5rpbbrmFF154gV/+8pe8+eabTJw4kQkTJrB//36/R+uUuro6Ro0axdKlS1v9/uLFi/nxj3/M8uXLefXVV+nVqxeTJk3i448/TvCknddWa11dHVdccQWLFi1K8GRt8/01donw/PPPN/t65cqVDBgwgB07dvDFL37Rp6m8kZ+f3+zrBQsWsGzZMsrKyhgxYoRPU3mrtraWmTNn8vjjj/Pggw/6PY5n0tLSyMzM9HsMzy1atIhzzz2XFStWNG3Lzs72cSLv9O/fv9nXDz/8MOeffz5jx471aSLvvPLKK0yZMoXJkycD9tHK1atX89prr/k8mbs++ugjfvvb3/Lss882/ffl3nvv5Xe/+x3Lli1L6X+j8vLyyMvLa/V7lmWxZMkS/uM//oMpU6YA8Itf/IKBAwdSXFzMjBkzEjlqp52pFeCmm24C4K9//WuCJmq/QByxa+mDDz4AICMjw+dJvHXixAmeeuop6urqyM3N9Xscz8yePZvJkyczYcIEv0fxVFVVFZFIhCFDhjBz5kz27Wvz44xS0tq1a7nooouYPn06AwYMYPTo0Tz++ON+j+W5+vp6ioqKKCwsJBQK+T2O6y677DI2bNjA7t27AfjjH//Iyy+/fMb/eKaif/7zn5w4cYIePZp/klfPnj2NPcoOUF1dzXvvvdfs3+E+ffowZswYtm3b5uNkwROII3Yni8fjzJkzh8svv5zPfvazfo/jiTfffJPc3Fw+/vhjevfuzZo1axg+fLjfY3niqaee4vXXXzfi9StnMmbMGFauXMnQoUN59913ue+++/jCF77Azp07SU9P93s8V/3lL39h2bJlzJ07l/nz57N9+3buuOMOunXrRkFBgd/jeaa4uJiamhpmzZrl9yie+MEPfsDRo0cZNmwYXbp04cSJEyxYsICZM2f6PZqr0tPTyc3N5YEHHiAajTJw4EBWr17Ntm3buOCCC/wezzPvvfceAAMHDmy2feDAgU3fk8QI3MJu9uzZ7Ny50+j/5TR06FAqKir44IMP+M1vfkNBQQGbN282bnH3t7/9je985zu88MILp/yvY9OcfFRj5MiRjBkzhsGDB/PrX/+am2++2cfJ3BePx7nooot46KGHABg9ejQ7d+5k+fLlRi/snnjiCfLy8ohEIn6P4olf//rX/OpXv+LJJ59kxIgRVFRUMGfOHCKRiHHP6y9/+UsKCwv51Kc+RZcuXcjJyeGGG25gx44dfo8mARCoU7G33347zz33HBs3buScc87xexzPdOvWjQsuuIDPf/7zLFy4kFGjRvGjH/3I77Fct2PHDg4ePEhOTg5paWmkpaWxefNmfvzjH5OWlsaJEyf8HtEzZ599Np/5zGfYs2eP36O4btCgQaf8j5BoNGrsqWeAt956ixdffJFbbrnF71E8c9ddd/GDH/yAGTNmcOGFF3LTTTdx5513snDhQr9Hc93555/P5s2bqa2t5W9/+xuvvfYax48fZ8iQIX6P5pnG1/8eOHCg2fYDBw4E4rXBySQQCzvLsrj99ttZs2YNL730krEvxD6deDzOsWPH/B7DdVdeeSVvvvkmFRUVTbeLLrqImTNnUlFRQZcuXfwe0TO1tbXs3buXQYMG+T2K6y6//PJTPo5o9+7dDB482KeJvLdixQoGDBjQ9MYCE3344YeEw83/k9OlSxejLwfVq1cvBg0axJEjR1i/fn3TmwpMlJ2dTWZmJhs2bGjadvToUV599VWjX+OdjAJxKnb27Nk8+eSTPPvss6Snpzed7+/Tpw89e/b0eTp3zZs3j7y8PLKysvjHP/7Bk08+yaZNm1i/fr3fo7kuPT39lNdJ9urVi759+xr3+snvfve75OfnM3jwYN555x3uueceunTpwg033OD3aK678847ueyyy3jooYe4/vrree2113jsscd47LHH/B7NE/F4nBUrVlBQUEBamrn/JOfn57NgwQKysrIYMWIEf/jDH3jkkUcoLCz0ezTXrV+/HsuyGDp0KHv27OGuu+5i2LBhfO1rX/N7tE6pra1tdpagurqaiooKMjIyyMrKYs6cOTz44IN8+tOfJjs7m1gsRiQS4dprr/VvaIfaav373//Ovn37eOeddwCa/sdoZmam70cozf1X5CTLli0DYNy4cc22r1ixwrgXKh88eJCvfvWrvPvuu/Tp04eRI0eyfv16rrrqKr9Hk054++23ueGGGzh8+DD9+/fniiuuoKys7JSPyzDBxRdfzJo1a5g3bx73338/2dnZLFmyxLgX2Td68cUX2bdvn5ELnJP95Cc/IRaL8a1vfYuDBw8SiUT4xje+wd133+33aK774IMPmDdvHm+//TYZGRlMmzaNBQsW0LVrV79H65Ty8nLGjx/f9PXcuXMBKCgoYOXKlXzve9+jrq6Or3/969TU1HDFFVfw/PPPp+RroNtqXbt2bbOFeuPHudxzzz3ce++9CZ21pUAs7Ez8ZPPTeeKJJ/wewVebNm3yewRPPPXUU36PkFBf/vKX+fKXv+z3GAkxceLEQPwblZ6ezpIlS1iyZInfo3ju+uuv5/rrr/d7DNeNGzfujD+roVCI+++/n/vvvz+BU3mjrdZZs2Yl7YGhQLzGTkRERCQItLATERERMYQWdiIiIiKG0MJORERExBBa2ImIiIgYot3vig2FQllAPxcfexhASUkJlZWVLu42+ZSWlgJqNY1azaRW8wSlE9Rqqurq6nbfN9Set9mHQqGscDi8Kx6Pu/thNCHA/Hf529RqJrWaSa3mCUonqNVUdmuuZVllZ7pbe4/Y9YvH4z2KioqIRqOdHw57hR2LxWAq7h4HTEZVwEbUahq1mkmt5glKJ6jVVIeAZwCob+uuHfqA4mg0Sk5OjrOhWmg6bNoPiLiyy+R1qOFPtZpFrWZSq3mC0glqFb15QkRERMQUjhZ2y5YtY+TIkZx11lmcddZZ5Obmsm7dOrdnc88aYHUr26uBe4GPEjqNt9Sq1lQWlE5QK6g11ak1KVsdLezOOeccHn74YXbs2EF5eTlf+tKXmDJlCn/605/cnk9ERERE2qlDr7FrlJ+f3+zrBQsWsGzZMsrKyhgxYoQrg4mIiIhIxzha2J3sxIkTPP3009TV1ZGbm+vGTCIiIiLigOOF3Ztvvklubi4ff/wxvXv3Zs2aNQwfPtzN2dy1G1jQYpupn32jVjMFpTUonaBWtaY+tSYdxwu7oUOHUlFRwQcffMBvfvMbCgoK2Lx5c/Iu7rKByS227afxc2HMola1prKgdIJa1Zr61Jp0rY4Xdt26deOCCy4A4POf/zzbt2/nRz/6ET/72c9cG85VXYG+LbYd9WOQBFCrmYLSGpROUKtaU59ak45rn2MXj8c5duyYW7sTERERkQ5ydMRu3rx55OXlkZWVxT/+8Q+efPJJNm3axPr1692eT0RERETaydHC7uDBg3z1q1/l3XffpU+fPowcOZL169dz1VVXuT2fiIiIiLSTo4XdE0884fYc3rruNNuzsT8x2iRqVWsqC0onqBXUmurUmpStulasiIiIiCG0sBMRERExhBZ2IiIiIobQwk5ERETEEB1680RJSQmVlZWuPHBpaan9lyrgkCu7TF77Gv5Uq1nUaia1miconaBWUx1p/11DltX2hc5CodCl4XB4Wzwe78RUpwoD7u4xiYVIymvKeUKtZgpQq/5tMlBQOiFQrUH6XW1ozbUsq+xM92vvEbv6eDxOUVER0Wi089NhH/2LxWIUAe7sMXmVADELmAr083kYr1UBG1GraQLWGt+I/m0yScB+foPUGpTf1UrgRvuv9W3dt0OnYqPRKDk5OY6GaqnxlG4UcGePyavp5HU/IOLjIInQeDhcrWYJYKv+bTJIAH9+g9QahN/VjtCbJ0REREQM4crC7uGHHyYUCjFnzhw3dueJ94HbgCygO5AJTAJK/RzKC2uA1a1sr8b+dOyPEjqNt9RqXmtQOk8SiH+bgvS8qtXMVlLnd9XRJcVOtn37dn72s58xcuRIN+bxzDTsE9OrgCHAAWADcNjPoUQk8PRvk0hqSJXf1U4t7Gpra5k5cyaPP/44Dz74oFszua4G2ApsAsY2bBsMXOLTPCIioH+bRFJFDanzu9qpU7GzZ89m8uTJTJgwwa15PNG74VYMHPN3FBGRJvq3SSQ1pNLvquMjdk899RSvv/4627dvd3MeT6QBK4FbgeXY754ZC8wAkvsEskO7gQUttpn6mUZqNU9QOgnYv00Bel7V6scg3kql31VHC7u//e1vfOc73+GFF16gR48ebs/kiWnAZOxDqWXAOmAx8HNgln9jeSMbO/Zk+4FnfJjFa2o1rzUonQ0C829TkJ5XtRrZmiq/q44Wdjt27ODgwYPNPtPuxIkTbNmyhZ/+9KccO3aMLl26uDakW3oAVzXcYsAtwD0k1xPiiq5A3xbbjvoxSAKo1TxB6TxJIP5tCtLzqlZjpcLvqqOF3ZVXXsmbb77ZbNvXvvY1hg0bxve///2kXNS1Zjj2+XIRkWSif5tEUkMy/q46Wtilp6fz2c9+ttm2Xr160bdv31O2J4PDwHSgEPtceDpQjn0IdYqPc4lIsOnfJpHUkEq/q53+HLtU0BsYAzwK7AWOA+divwhyvo9ziUiw6d8mkdSQSr+rri3sNm3a5NauXNcdWNhwM951p9mejf1J4CZRq3mtQelsEJh/m4L0vKrVyNZU+l3VtWJFREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG6NCbJ0pKSqisrHTlgUtLS+19Au7sMXmVNv6lCjjk4yCJsK/hT7WaJYCt+rfJIAH8+Q1SaxB+V6s7cN+QZbV9UbdQKHRpOBzeFo/HnU/VijDg7h6TWAgjr5/XGj2vhgpQq36GzaPn1ExBel4bWnMtyyo70/3ae8SuPh6PU1RURDQa7fx02Ef/YrEYRYA7e0xeJUDMAqYC/XwexmtVEN+InlfTVAEbCUyrfoYNo+fUTAF6XiuBG+2/1rd13w6dio1Go82uD9sZjad0o4A7e0xeTYeI+wERHwdJhIZD/3peDdN4SidArfoZNoieUzMF6HntCL15QkRERMQQjhZ29957L6FQqNlt2LBhbs/mqveB24As7E+QzgQmcdKLh02xBljdyvZq7E8C/yih03hOzytmPa9B6TxJIH6GA/a8BuI5BT2vJOfz6viSYiNGjODFF1/8vx2lJfdlZ6dhn5heBQwBDgAbsC/sK6lLz6ukOv0Mm0fPqZlS5Xl1vBpLS0sjMzPTzVk8UwNsBTYBYxu2DQYu8WkecUcNel4ltdWgn2HT1KDn1EQ1pM7z6vg1dlVVVUQiEYYMGcLMmTPZt29f2/9HPundcCsGjvk7irhIz6ukOv0Mm0fPqZlS6Xl1dMRuzJgxrFy5kqFDh/Luu+9y33338YUvfIGdO3eSnp7u9oydlgasBG4FlmO/e2YsMAMY6d9Y3tkNLGixzcDPNNLzipHPa2A6CdjPcECe10A9p6DnleR7Xh0t7PLy8pr+PnLkSMaMGcPgwYP59a9/zc033+zacG6aBkzGPpRaBqwDFgM/B2b5N5Y3srFjT7YfeMaHWTym5xXzntegdDYIzM9wgJ7XwDynoOeV5HteXfm4k7PPPpvPfOYz7Nmzx43deaYHcBUQA17BfiLu8XMgr3QF+ra4Jd+BVNfoeTVMUDpPEoif4YA9r4F4TkHPK8n3vLqysKutrWXv3r0MGjTIjd0lzHCgzu8hxHV6XiXV6WfYPHpOzZSMz6ujU7Hf/e53yc/PZ/Dgwbzzzjvcc889dOnShRtuuMHt+VxxGJgOFGKfC08HyrEPoU7xcS7pHD2vkur0M2wePadmSqXn1dHC7u233+aGG27g8OHD9O/fnyuuuIKysjL69+/v9nyu6A2MAR4F9gLHgXOxXwQ538e5pHP0vEqq08+wefScmimVnldHC7unnnrK7Tk81R1Y2HAz3nWn2Z6N/UngBtHzinnPa1A6GwTmZzhAz2tgnlPQ85qkdK1YEREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIITr05omSkhIqKytdeeDS0lJ7n4A7e0xepY1/qQIO+ThIIjRcMljPq2EaLwUdoFb9DBtEz6mZAvS8VnfgviHLavuibqFQ6NJwOLwtHo87n6oVYcDdPSavILUSwshrBbZKrWZSq3mC0glqNZXdmmtZVtmZ7tbeI3b18XicoqIiotFo54fDPvoXi8UoAtzZY/Iqwb78SGBaLWAq0M/nYbxWBWxEraZRq3mC0glqNdUhGq+/W9/WXTt0KjYajZKTk+NsqBYaT+lGAXf2mLwaDxEHqZV+QMTHQRKh8TSHWs2iVvMEpRPUKnrzhIiIiIgptLATERERMYTjhd3+/fu58cYb6du3Lz179uTCCy+kvLzczdlc9T5wG5CFfWmQTGASJ72DyCCBaV0DrG5lezX25Ww+Sug03gpKa1A6Qa2g1lSn1qRsdXSt2CNHjnD55Zczfvx41q1bR//+/amqquKTn/yk2/O5Zhr2Kw5XAUOAA8AG4LCfQ3kkSK0iIiLyfxwt7BYtWsS5557LihUrmrZlZ2e7NpTbaoCtwCZgbMO2wcAlPs3jpRqC0yoiIiLNOToVu3btWi666CKmT5/OgAEDGD16NI8//rjbs7mmd8OtGDjm7yieC1KriIiINOfoiN1f/vIXli1bxty5c5k/fz7bt2/njjvuoFu3bhQUFLg9Y6elASuBW4Hl2B85MhaYAYz0byxPBKkVgN3AghbbTP2wyqC0BqUT1KrW1KfWpONoYRePx7nooot46KGHABg9ejQ7d+5k+fLlSbmwA/t1Z5OxT1OWAeuAxcDPgVn+jeWJILWSjR17sv00fpCjWYLSGpROUKtaU59ak67V0cJu0KBBDB8+vNm2aDTKb3/7W1eG8koP4KqGWwy4BbgHAxc7BKi1K9C3xbajfgySAEFpDUonqFWtqU+tScfRa+wuv/xydu3a1Wzb7t27GTx4sCtDJcpwoM7vIRIkSK0iIiJB5eiI3Z133slll13GQw89xPXXX89rr73GY489xmOPPeb2fK44DEwHCrFfZ5YOlGOfnpzi41xeCFKriIiINOdoYXfxxRezZs0a5s2bx/333092djZLlixh5syZbs/nit7AGOBRYC9wHDgX+w0G832cywtBahUREZHmHC3sAL785S/z5S9/2c1ZPNMdWNhwM12QWrnuNNuzsT8J3CRBaQ1KJ6gV1Jrq1JqUrbpWrIiIiIghtLATERERMYQWdiIiIiKG0MJORERExBAdevNESUkJlZWVrjxwaWmpvU/AnT0mr9KGP4PUShVwyMdBEmFfw59qNYtazROUTlCrqY60/64hy2r7QmehUOjScDi8LR6Pd2Kq1nZMUl5nzRNqNZNazRSg1jDg8r/sySlAz6laDWW35lqWVXamu7X3iF19PB6nqKiIaDTa+eGwj/7FYjGYCvRzZZfJqwrYiFpNo1YzBaw1vhGKAHf+ZU9OJUDMIjDPaZB+fgPTeojGa9LWt3XXDp2KjUaj5OTkOBuqhaZTuv2AiCu7TF6Nh4jVaha1mimArVHAnX/Zk1PTS2AC9JyqNbj05gkRERERQzha2J133nmEQqFTbrNnz3Z7PnesAVa3sr0a+xOjP0roNN5Sq1pTWVA6IVitwPvAbUAW9hVyMoFJnPSGK1ME6XlVa1K2Orqk2Pbt2zlx4kTT1zt37uSqq65i+vTprg0mIiLmmIb94qBVwBDgALABOOznUCIGcrSw69+/f7OvH374Yc4//3zGjh3rylAiImKOGmArsAlo/K/EYOASn+YRMVmnX2NXX19PUVERhYWFhEIhN2YSERGD9G64FQPH/B1FxHiOjtidrLi4mJqaGmbNmuXCOB7aDSxosc3Uz75Rq5mC0hqUTghMaxqwErgVWI79DtyxwAxgpH9jeScgzyug1iRs7fTC7oknniAvL49IJMnfa5wNTG6xbT+NnwtjFrWqNZUFpRMC1ToNO3UrUAasAxYDPwdm+TeWNwL0vKqVpGvt1MLurbfe4sUXX+SZZ5KsqjVdgb4tth31Y5AEUKuZgtIalE4IVivQA7iq4RYDbgHuwcCFXZCeV7UmnU69xm7FihUMGDCAyZNbLmFFRETObDhQ5/cQIoZxfMQuHo+zYsUKCgoKSEvr9BldEREx1GFgOlCI/Zq6dKAc+1TsFB/nEjGR4xXZiy++yL59+ygsLHRzHhERMUxvYAzwKLAXOA6ci/1mivk+ziViIscLu4kTJ2JZSfh2kNZcd5rt2difGG0Stao1lQWlEwLV2h1Y2HAzXoCeV7WSlK26VqyIiIiIIbSwExERETGEFnYiIiIihtDCTkRERMQQHXrzRElJCZWVla48cGlpqf2XKuCQK7tMXvsa/lSrWdRqpgC2lgDu/MuenEob/xKg51SthjnS/ruG2vPO1lAodGk4HN4Wj8c7MVVrOyYpr7PmCbWaSa1mUqt5gtIJajWV3ZprWVbZme7W3iN29fF4nKKiIqLRaOeHwz76F4vFYCrQz5VdJq8qYCNqNY1azaRW8wSlE9RqqkM0XpO2vq27duhUbDQaJScnx9lQLTSd0u0HRFzZZfJqPESsVrOo1UxqNU9QOkGtojdPiIiIiJjC0cLuxIkTxGIxsrOz6dmzJ+effz4PPPBA8l6JYg2wupXt1difGP1RQqfxllrVmsqC0glqBbWmOrUmZaujS4otWrSIZcuWsWrVKkaMGEF5eTlf+9rX6NOnD3fccYfbM4qIiIhIOzha2L3yyitMmTKFyZMnA3DeeeexevVqXnvtNVeHExEREZH2c3Qq9rLLLmPDhg3s3r0bgD/+8Y+8/PLL5OXluTqciIiIiLSfoyN2P/jBDzh69CjDhg2jS5cunDhxggULFjBz5ky353PPbmBBi21J+pLATlOrmYLSGpROUKtaU59ak46jhd2vf/1rfvWrX/Hkk08yYsQIKioqmDNnDpFIhIKCArdndEc2MLnFtv00fi6MWdSq1lQWlE5Qq1pTn1qTrtXRwu6uu+7iBz/4ATNmzADgwgsv5K233mLhwoXJu7DrCvRtse2oH4MkgFrNFJTWoHSCWtWa+tSadBy9xu7DDz8kHG7+f9qlSxdcv+SYiIiIiLSboyN2+fn5LFiwgKysLEaMGMEf/vAHHnnkEQoLC92eT0RERETaydHC7ic/+QmxWIxvfetbHDx4kEgkwje+8Q3uvvtut+cTERERkXZytLBLT09nyZIlLFmyxOVxPHLdabZnY39itEnUqtZUFpROUCuoNdWpNSlbda1YEREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIITr05omSkhIqKytdeeDS0lL7L1XAIVd2mbz2NfypVrOo1UxqNU9QOkGtpjrS/ruGLKvtC52FQqFLw+HwNtc/gDhEUl5nzRNqNZNazaRW8wSlEwLVGgaCcmmEhtZcy7LKznS/9h6xq4/H4xQVFRGNRjs/HfbRv1gsBlOBfq7sMnlVARtRq2nUaia1miconRC41vhGKALcWZkkr0rgRvuv9W3dt0OnYqPRKDk5OY6GaqnplG4/IOLKLpNX4yFitZpFrWZSq3mC0gmBbI0C7qxMzKA3T4iIiIgYwvHC7h//+Adz5sxh8ODB9OzZk8suu4zt27e7OZt71gCrW9lejf2J0R8ldBpvqVWtqSwonaBWUGuqC1Ir8D5wG5AFdAcygUlAqZ9DtcLRJcUAbrnlFnbu3Mkvf/lLIpEIRUVFTJgwgT//+c986lOfcnNGEREREV9Nw36B2ypgCHAA2AAc9nOoVjg6YvfRRx/x29/+lsWLF/PFL36RCy64gHvvvZcLLriAZcuWuT2jiIiIiG9qgK3AImA8MBi4BJgHXOPfWK1ytLD75z//yYkTJ+jRo0ez7T179uTll192ZTARERGRZNC74VYMHPN3lDY5OhWbnp5Obm4uDzzwANFolIEDB7J69Wq2bdvGBRdc4PaM7tgNLGixzdTP+VGrmYLSGpROUKtaU19AWtOAlcCtwHLsd+GOBWYAI/0bq1WOX2P3y1/+ksLCQj71qU/RpUsXcnJyuOGGG9ixY4eb87knG5jcYtt+4BkfZvGaWtWayoLSCWpVa+oLUOs07NStQBmwDlgM/ByY5d9Yp3C8sDv//PPZvHkzdXV1HD16lEGDBvGVr3yFIUOGuDmfe7oCfVtsO+rHIAmgVjMFpTUonaBWtaa+ILUCPYCrGm4x4BbgHpJrYdfpz7Hr1asXgwYN4siRI6xfv54pU6a4MZeIiIhIUhsO1Pk9RAuOj9itX78ey7IYOnQoe/bs4a677mLYsGF87Wtfc3M+EREREV8dBqYDhdivqUsHyrFPxSbb4SzHC7sPPviAefPm8fbbb5ORkcG0adNYsGABXbt2dXM+EREREV/1BsYAjwJ7gePAudhvppjv41ytcbywu/7667n++uvdnMU7151mezb2p2ObRK1qTWVB6QS1glpTXYBauwMLG27JTteKFRERETGEFnYiIiIihtDCTkRERMQQWtiJiIiIGEILOxERERFDdOhdsSUlJVRWVrrywKWlpfZfqoBDruwyee1r+FOtZlGrmdRqnqB0QiBbSwB3VibJq7oD9w1ZVttX6w2FQpeGw+Ft8Xjc+VSt7hgjLxbcKrWaSa1mUqt5gtIJajWV3ZprWVbZme7W3iN29fF4nKKiIqLRaOeHwz76F4vFYCrQz5VdJq8qYCNqNY1azaRW8wSlE9RqqkPAMwDUt3XXDp2KjUaj5OTkOBuqhaZTuv2AiCu7TF6Nh8PVaha1mkmt5glKJ6hV9OYJEREREVM4Xtht2bKF/Px8IpEIoVCI4uLiZt+3LIu7776bQYMG0bNnTyZMmEBVVVVn53VmDbC6le3V2Jc9+Sih03hLrWpNZUHpBLWCWlOdWpOy1fHCrq6ujlGjRrF06dJWv7948WJ+/OMfs3z5cl599VV69erFpEmT+Pjjjx0PKyIiIiKn16HX2J0sLy+PvLy8Vr9nWRZLlizhP/7jP5gyZQoAv/jFLxg4cCDFxcXMmDHD6cOKiIiIyGl48hq76upq3nvvPSZMmNC0rU+fPowZM4Zt27Z58ZAiIiIigef4iN2ZvPfeewAMHDiw2faBAwc2fS/hdgMLWmwz9bNv1GqmoLQGpRPUqtbUp9ak48nCLillA5NbbNtP4+fCmEWtak1lQekEtao19ak16Vo9WdhlZmYCcODAAQYNGtS0/cCBA3zuc5/z4iHb1hXo22LbUT8GSQC1mikorUHpBLWqNfWpNel48hq77OxsMjMz2bBhQ9O2o0eP8uqrr5Kbm+vFQ4qIiIgEnuMjdrW1tezZs6fp6+rqaioqKsjIyCArK4s5c+bw4IMP8ulPf5rs7GxisRiRSIRrr73WjblFREREpAXHC7vy8nLGjx/f9PXcuXMBKCgoYOXKlXzve9+jrq6Or3/969TU1HDFFVfw/PPP06NHj85PLSIiIiKncLywGzduHJZ1+reDhEIh7r//fu6//36nD+Ge606zPRv7E6NNola1prKgdIJaQa2pTq1J2aprxYqIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAzRoTdPlJSUUFlZ6coDl5aW2n+pAg65ssvkta/hT7WaRa1mUqt5gtIJajXVkfbfNXSmd7Y23SkUujQcDm+Lx+OdmKq1HZOU11nzhFrNpFYzqdU8QekEtZrKbs21LKvsTHdr7xG7+ng8TlFREdFotPPDYR/9i8ViMBXo58ouk1cVsBG1mkatZlKreYLSCWo11SEar0lb39ZdO3QqNhqNkpOT42yoFppO6fYDIq7sMnk1HiJWq1nUaia1miconaBW0ZsnREREREzheGG3ZcsW8vPziUQihEIhiouLm33/mWeeYeLEifTt25dQKERFRUUnR+2ENcDqVrZXY39i9EcJncZbalVrKgtKJ6gV1Jrq1JqUrY4XdnV1dYwaNYqlS5ee9vtXXHEFixYtcjyciIiIiLSf42vF5uXlkZeXd9rv33TTTQD89a9/dfoQIiIiItIBeo2diIiIiCEcH7FLObuBBS22mfrZN2o1U1Bag9IJalVr6lNr0gnOwi4bmNxi234aPxfGLGpVayoLSieoVa2pT61J1xqchV1XoG+LbUf9GCQB1GqmoLQGpRPUqtbUp9ako9fYiYiIiBjC8RG72tpa9uzZ0/R1dXU1FRUVZGRkkJWVxd///nf27dvHO++8A8CuXbsAyMzMJDMzs5Nji4iIiEhLjo/YlZeXM3r0aEaPHg3A3LlzGT16NHfffTcAa9euZfTo0UyebJ+QnjFjBqNHj2b58uUujC0iIiIiLTk+Yjdu3Dgs6/RvB5k1axazZs1yunt3XXea7dnYnxhtErWqNZUFpRPUCmpNdWpNyla9xk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImKIDr15oqSkhMrKSlceuLS01P5LFXDIlV0mr30Nf6rVLGo1k1rNE5ROUKupjrT/rqEzvbO16U6h0KXhcHhbPB7vxFSt7ZikvM6aJwLUGgZc/klJWmo1VIB+XwPTGpROUKup7NZcy7LKznS39h6xq4/H4xQVFRGNRjs/HPbRv1gsBlOBfq7sMnlVARsJTGt8IxQB7vykJK8SIIZaTVMCxCwC8/saiH+bgtIJajXVIRqvSVvf1l07dCo2Go2Sk5PjbKgWmk7p9gMiruwyeTUeIg5QaxRw5ycleTW+KEGtZml6sUmAfl+Nbw1KJ6hV9OYJEREREVM4Xtht2bKF/Px8IpEIoVCI4uLipu8dP36c73//+1x44YX06tWLSCTCV7/61abrxibcGmB1K9ursT8x+qOETuOtILUC7wO3AVlAdyATmASU+jmUR9RqWGuQflfVqtZUl0Ktjhd2dXV1jBo1iqVLl57yvQ8//JDXX3+dWCzG66+/zjPPPMOuXbu45pprOjWsSEvTgD8Aq4DdwFpgHHDYx5m8olYzW0VE3OT4WrF5eXnk5eW1+r0+ffrwwgsvNNv205/+lEsuuYR9+/aRlZXl9GFFmtQAW4FNwNiGbYOBS3yax0s1qNXEVhERtyXsNXYffPABoVCIs88+O1EPKYbr3XArBo75O4rn1CoiIu3h+IhdR3z88cd8//vf54YbbuCss85KxEOeajewoMU2Uz/7JiCtacBK4FZgOfa7NccCM4CR/o3lCbWa2RqU31VArWpNfSnS6vnC7vjx41x//fVYlsWyZcu8frjTywYmt9i2n8bPhTFLgFqnYaduBcqAdcBi4OfALP/G8oRaDWwN0O+qWlFrqkuRVk8Xdo2LurfeeouXXnrJv6N1AF2Bvi22HfVjkAQIUivQA7iq4RYDbgHuwbAFQAO1GtYapN9VtZpJrUnHs9fYNS7qqqqqePHFF+nbt+X/N0S8MRyo83uIBFGriIiczPERu9raWvbs2dP0dXV1NRUVFWRkZDBo0CD+9V//lddff53nnnuOEydO8N577wGQkZFBt27dOj+5BN5hYDpQiP3aq3SgHPuU3RQf5/KCWs1sFRFxm+OFXXl5OePHj2/6eu7cuQAUFBRw7733snbtWgA+97nPNfu/27hxI+PGjXP6sCJNegNjgEeBvcBx4FzsF93P93EuL6jVzFYREbc5XtiNGzcOyzr920HO9L2Eu+4027OxPzHaJAFq7Q4sbLiZTq0GCtDvqlpRa6pLoVZdK1ZERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImKIDr15oqSkhMrKSlceuLS01P5LFXDIlV0mr30NfwaotQRw5ycleTX8BKvVMI2tQfp9Nb41KJ2gVlMdaf9dQ+1592ooFLo0HA5vi8fjnZiqtR2TlNdZ80SAWsOAyz8pSUuthgrQ72tgWoPSCWo1ld2aa1lW2Znu1t4jdvXxeJyioiKi0Wjnh8M++heLxWAq0M+VXSavKmAjgWmNb4QiwJ2flORVgn2pK7WapQSIWQTm9zUQ/zYFpRPUaqpDNF6Ttr6tu3boVGw0GiUnJ8fZUC00ndLtB0Rc2WXyajxEHKDWKODOT0ryajwlqVazNJ1qDtDvq/GtQekEtYrePCEiIiJiCi3sRERERAzheGG3ZcsW8vPziUQihEIhiouLm33/3nvvZdiwYfTq1YtPfvKTTJgwgVdffbWz8zqzBljdyvZq7EuBfJTQabwVpFbgfeA2IAv7UlSZwCROemejQdRqWGuQflfVqtZUl0Ktjhd2dXV1jBo1iqVLl7b6/c985jP89Kc/5c033+Tll1/mvPPOY+LEibz//vuOhxVpaRrwB2AVsBtYC4wDDvs4k1fUamariIibOvTmiZPl5eWRl5d32u//27/9W7OvH3nkEZ544gneeOMNrrzySqcPK9KkBtgKbALGNmwbDFzi0zxeqkGtJraKiLgtIa+xq6+v57HHHqNPnz6MGjUqEQ8pAdC74VYMHPN3FM+pVURE2sPxEbv2eO6555gxYwYffvghgwYN4oUXXqBfP58+bGY3sKDFNlM/1DAgrWnASuBWYDn2x3CMBWYAI/0byxNqNbM1KL+rgFrVmvpSpNXThd348eOpqKjg0KFDPP7441x//fW8+uqrDBgwwMuHbV02MLnFtv00fuCfWQLUOg07dStQBqwDFgM/B2b5N5Yn1Gpga4B+V9WKWlNdirR6eiq2V69eXHDBBVx66aU88cQTpKWl8cQTT3j5kKfXFejb4pbuzyieC1Ir0AO4CvvKCK9g/4f/Hj8H8pBaDROk31W1mkmtSSehn2MXj8c5dkyvmhFvDQfq/B4iQdQqIiInc3wqtra2lj179jR9XV1dTUVFBRkZGfTt25cFCxZwzTXXMGjQIA4dOsTSpUvZv38/06dPd2VwkcPAdKAQ+7VX6UA59im7KT7O5QW1mtkqIuI2xwu78vJyxo8f3/T13LlzASgoKGD58uX87//+L6tWreLQoUP07duXiy++mK1btzJixIjOTy2C/c7JMcCjwF7gOHAu9ovu5/s4lxfUamariIjbHC/sxo0bh2Wd/u0gzzyTRK8mvO4027OxPzHaJAFq7Q4sbLiZTq0GCtDvqlpRa6pLoVZdK1ZERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImKIDr15oqSkhMrKSlceuLS01P5LFXDIlV0mr30NfwaotQRw5ycleTX8BKvVMI2tQfp9Nb41KJ2gVlMdaf9dQ2d6Z2vTnUKhS8Ph8LZ4PN6JqU4VBtzdY/JSq5nUaqgQSXkNSE8EpTUonaBWU9mtuZZllZ3pbu09Ylcfj8cpKioiGo12fjjso3+xWIwiwJ09Jq8S7MsiqdUsajVTCRCzgKlAP5+H8VoVsBHzW4PSCWo11SEar0lb39ZdO3QqNhqNkpOT42yoFhpP6UYBd/aYvBpPXanVLGo1U9Op5n5AxMdBEqHx9JXprUHpBLWK3jwhIiIiYgrHC7stW7aQn59PJBIhFApRXFx82vt+85vfJBQKsWTJEqcP12nvA7cBWdifbJ8JTOKkF0obRK1qTXWBaF0DrG5lezX2J9l/lNBpvKVWtaa6FGp1fEmxuro6Ro0aRWFhIVOnTj3t/dasWUNZWRmRiL/HSadhn5heBQwBDgAbsC84bhq1qjXVBalVRMRNjhd2eXl55OXlnfE++/fv59vf/jbr169n8uTJTh+q02qArcAmYGzDtsHAJT7N46Ua1KrW1FZDcFpFRNzm2Wvs4vE4N910E3fddRcjRozw6mHapXfDrRg45usk3lOrmdQqIiLt4fiIXVsWLVpEWload9xxh1cP0W5pwErgVmA59rv6xgIzgJH+jeUJtao11QWpld3AghbbTP1MLrWaSa1Jx5OF3Y4dO/jRj37E66+/TigU8uIhOmwaMBn7FE8ZsA5YDPwcmOXfWJ5Qq1pTXWBas7FDT7afxs+rMota1ZrqUqTVk1OxW7du5eDBg2RlZZGWlkZaWhpvvfUW//7v/855553nxUO2Sw/gKuwPWn0F+z8Q9/g2jbfUaia1GqYr0LfFLd3XibyjVjOpNel4srC76aabeOONN6ioqGi6RSIR7rrrLtavX+/FQzoyHKjze4gEUauZ1CoiIidzfCq2traWPXv2NH1dXV1NRUUFGRkZZGVl0bdv32b379q1K5mZmQwdOtT5tA4dBqYDhdiv0UkHyrFP7UxJ+DTeUqtaU12QWkVE3OZ4YVdeXs748eObvp47dy4ABQUFrFy5stODuak3MAZ4FNgLHAfOxX5x9nwf5/KCWtWa6oLUKiLiNscLu3HjxmFZ7X87yF//+lenD9Vp3YGFDTfTqdVMajXQdafZno39SfYmUataU10KtepasSIiIiKG0MJORERExBBa2ImIiIgYQgs7EREREUN06M0TJSUlVFZWuvLApaWl9j4Bd/aYvEob/lSrWdRqpsZWqoBDPg6SCPsa/jS9NSidoFZTHWn/XUPteWdrKBS6NBwOb4vH452Y6lRhwN09Ji+1mkmthgqRlNeA9ERQWoPSCWo1ld2aa1lW2Znu1t4jdvXxeJyioiKi0Wjnh8M++heLxSgC3Nlj8irBviySWs2iVjOVADELmAr083kYr1UBGzG/NSidoFZTHaLxmrT1bd21Q6dio9EoOTk5zoZqofGUbhRwZ4/Jq/HUlVrNolYzNZ1q7gdEfBwkERpPX5neGpROUKvozRMiIiIipnC8sNuyZQv5+flEIhFCoRDFxcXNvj9r1ixCoVCz29VXX93ZeR17H7gNyML+ZPtMYBInvVDaIGpVa6oLROsaYHUr26uxP8n+o4RO4y21qjXVpVCr40uK1dXVMWrUKAoLC5k6dWqr97n66qtZsWJF09fdu3d3+nCdNg37xPQqYAhwANiAfcFx06hVrakuSK0iIm5yvLDLy8sjLy/vjPfp3r07mZmZTh/CNTXAVmATMLZh22DgEp/m8VINalVraqshOK0iIm7z9DV2mzZtYsCAAQwdOpTbbruNw4f9+d/bvRtuxcAxXyZIHLWaSa0iItIejo/YteXqq69m6tSpZGdns3fvXubPn09eXh7btm2jS5cuXj1sq9KAlcCtwHLsd/WNBWYAIxM6iffUqtZUF6RWdgMLWmwz9TO51GomtSYdzxZ2M2bMaPr7hRdeyMiRIzn//PPZtGkTV155pVcPe1rTgMnYp3jKgHXAYuDnwKyET+Mttao11QWmNRs79GT7afy8KrOoVa2pLkVaE/ZxJ0OGDKFfv37s2bMnUQ95ih7AVdgftPoK9n8g7vFtGm+p1UxqNUxXoG+LW7qvE3lHrWZSa9JJ2MLu7bff5vDhwwwaNChRD9mm4UCd30MkiFrNpFYRETmZ41OxtbW1zY6+VVdXU1FRQUZGBhkZGdx3331MmzaNzMxM9u7dy/e+9z0uuOACJk2a5MrgHXEYmA4UYr9GJx0oxz61MyXh03hLrWpNdUFqFRFxm+OFXXl5OePHj2/6eu7cuQAUFBSwbNky3njjDVatWkVNTQ2RSISJEyfywAMP+PJZdr2BMcCjwF7gOHAu9ouz5yd8Gm+pVa2pLkitIiJuc7ywGzduHJZ1+reDrF+/3umuXdcdWNhwM51azaRWA113mu3Z2J9kbxK1qjXVpVCrrhUrIiIiYggt7EREREQMoYWdiIiIiCG0sBMRERExRIfePFFSUkJlZaUrD1xaWmrvE3Bnj8mrtOFPtZpFrWZqbKUKOOTjIImwr+FP01uD0glqNdWR9t81dKZ3tjbdKRS6NBwOb4vH452Y6lRhwN09Ji+1mkmthgqRlNeA9ERQWoPSCWo1ld2aa1lW2Znu1t4jdvXxeJyioiKi0Wjnh8M++heLxSgC3Nlj8irBviySWs2iVjOVADELmAr083kYr1UBGzG/NSidoFZTHaLxmrT1bd21Q6dio9EoOTk5zoZqofGUbhRwZ4/Jq/HUlVrNolYzNZ1q7gdEfBwkERpPX5neGpROUKvozRMiIiIipnC8sNuyZQv5+flEIhFCoRDFxcWn3KeyspJrrrmGPn360KtXLy6++GL27dt36s4S4H3gNiAL+5PtM4FJnPRCaYOoVa2pLhCta4DVrWyvxv4k+48SOo231KrWVJdCrY4vKVZXV8eoUaMoLCxk6tSpp3x/7969XHHFFdx8883cd999nHXWWfzpT3+iR48enRrYqWnYJ6ZXAUOAA8AG7AuOm0atak11QWoVEXGT44VdXl4eeXl5p/3+//t//49/+Zd/YfHixU3bzj//fKcP1yk1wFZgEzC2Ydtg4BJfpvFWDWpVa2qrITitIiJu8+Q1dvF4nN///vd85jOfYdKkSQwYMIAxY8a0ero2EXo33IqBY75MkDhqNZNaRUSkPRwfsTuTgwcPUltby8MPP8yDDz7IokWLeP7555k6dSobN25k7Nixbe/ERWnASuBWYDn2u/rGAjOAkQmdxHtqVWuqC1Iru4EFLbaZ+plcajWTWpOOJwu7xg8ynjJlCnfeeScAn/vc53jllVdYvnx5whd2YL9mZzL2KZ4yYB2wGPg5MCvh03hLrWpNdYFpzcYOPdl+Gj+vyixqVWuqS5FWT07F9uvXj7S0NIYPH95sezQa9e1dsQA9gKuwP2j1Fez/QNzj2zTeUquZ1GqYrkDfFrd0XyfyjlrNpNak48nCrlu3blx88cXs2rWr2fbdu3czePBgLx7SkeFAnd9DJIhazaRWERE5meNTsbW1tezZs6fp6+rqaioqKsjIyCArK4u77rqLr3zlK3zxi19k/PjxPP/88/zud79j06ZNbszdIYeB6UAh9mt00oFy7FM7UxI+jbfUqtZUF6RWERG3OV7YlZeXM378+Kav586dC0BBQQErV67kuuuuY/ny5SxcuJA77riDoUOH8tvf/pYrrrii81N3UG9gDPAosBc4DpyL/eLs+QmfxltqVWuqC1KriIjbHC/sxo0bh2Wd+e0ghYWFFBYWOn0I13QHFjbcTKdWM6nVQNedZns29ifZm0Stak11KdSqa8WKiIiIGEILOxERERFDaGEnIiIiYggt7EREREQMoYWdiIiIiCE69K7YkpISKisrXXng0tJSe5+AO3tMXqUNf6rVLGo1U2MrVcAhHwdJhMYLAZneGpROUKupjrT/rqG2PrIEIBQKXRoOh7c1XgPWLWHA3T0mL7WaSa1mUqt5gtIJwWolBLS9jDGD3ZprWVbZme7W3iN29fF4nKKiIqLRaOeHwz76F4vFKALc2WPyKsG+3qVazaJWM6nVPEHphAC2WsBUoJ/Pw3jtEPAMAPVt3bVDp2Kj0Sg5OTnOhmqh8ZRuFHBnj8mr8dSVWs2iVjOp1TxB6YRgttIPiPg4SJLRmydEREREDOF4Ybdlyxby8/OJRCKEQiGKi4ubfT8UCrV6+8///M/OzuzI+8BtQBb2JYsygUmc9EJpg6hVralOrea1BqUT1Gpk6xpgdSvbq7EvKfZRQqc5I8fXiq2rq2PUqFEUFhYyderUU77/7rvvNvt63bp13HzzzUybNs3pQ3bKNOwT06uAIcABYANw2JdpvKVWtaY6tZrXGpROUKupranC8cIuLy+PvLy8034/MzOz2dfPPvss48ePZ8iQIU4f0rEaYCuwCRjbsG0wcEnCJ/FeDWpVa2qrQa2mtdYQjE5Qq6mtqSQhr7E7cOAAv//977n55psT8XCn6N1wKwaO+TJB4qjVTGo1U1Bag9IJahX/JWRht2rVKtLT01s9ZZsIacBK7EPFZwOXA/OBN3yZxltqVWuqU6t5rUHpBLWa2grAbmBBi9uvfJ2oVQlZ2P33f/83M2fOpEePHol4uFZNA94B1gJXYx86zsH+oTSNWtWa6tRqXmtQOkGtpraSDXyzxe0aXydqlecLu61bt7Jr1y5uueUWrx+qTT2Aq7A/vPEVYBZwj58DeUitZlKrmYLSGpROUKuRrV2Bvi1u6b5O1CrPF3ZPPPEEn//85xk1apTXD9Vhw4E6v4dIELWaSa1mCkprUDpBrZI4jt8VW1tby549e5q+rq6upqKigoyMDLKysgA4evQoTz/9ND/84Q87P2knHAamA4XASOwFdjmwGJji41xeUKtaU51azWsNSieo1dTWVOJ4YVdeXs748eObvp47dy4ABQUFrFy5EoCnnnoKy7K44YYbOjdlJ/UGxgCPAnuB48C5wK3YL/Q0iVrVmurUal5rUDpBraa2phLHC7tx48ZhWdYZ7/P1r3+dr3/9604fwjXdgYUNN9Op1UxqNVNQWoPSCWo11nWn2Z6NfeWJJKJrxYqIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAzRoTdPlJSUUFlZ6coDl5aW2vsE3Nlj8ipt+FOtZlGrmdRqnqB0QjBbqQIO+ThIIhxp/11Dbb2zFSAUCl0aDoe3xePxTkx1qjDg7h6Tl1rNpFYzqdU8QekEtZqqoTXXsqyyM92vvUfs6uPxOEVFRUSj0c5Ph330LxaLUQS4s8fkVYJ9qRW1mkWtZlKreYLSCWo1VSVwo/3X+rbu26FTsdFolJycHEdDtdR4SjeKfcFgkzUeDlerWdRqJrWaJyidoFbRmydEREREjOF4Ybdlyxby8/OJRCKEQiGKi4ubfb+2tpbbb7+dc845h549ezJ8+HCWL1/e2Xkdex+4DcjC/rTsTGASJ7340iBqVWuqU6t5rUHpBLWq1V+OLylWV1fHqFGjKCwsZOrUqad8f+7cubz00ksUFRVx3nnn8T//8z9861vfIhKJcM0113RqaCemYZ+YXgUMAQ4AG7AvYmwatao11anVvNagdIJa1eovxwu7vLw88vLyTvv9V155hYKCAsaNGwfY14392c9+xmuvvZbwhV0NsBXYBIxt2DYYuCShUyRGDWpVa2qrQa2mtdYQjE5Qq1r959lr7C677DLWrl3L/v37sSyLjRs3snv3biZOnOjVQ55W74ZbMXAs4Y+eWGo1k1rNFJTWoHSCWk2VSq2eLex+8pOfMHz4cM455xy6devG1VdfzdKlS/niF7/o1UOeVhqwEvvw6dnA5cB84I2ET+I9tao11anVvNagdIJa1eo/Txd2ZWVlrF27lh07dvDDH/6Q2bNn8+KLL3r1kGc0DXgHWAtcjX04NQf7iTKNWtWa6tRqXmtQOkGtavWZZVlt3rBnt3bs2GG1BrDWrFnT9PWHH35ode3a1Xruueea3e/mm2+2Jk2aZFmWZRUVFVmAtQMsy6fbzWBlJeBxikCtalWrWtUa0E61qrWztx0NrUBOW2s2T47YHT9+nOPHjxMON999ly5dcPuyZJ0xHKjze4gEUauZ1GqmoLQGpRPUaqpkbHX8rtja2lr27NnT9HV1dTUVFRVkZGSQlZXF2LFjueuuu+jZsyeDBw9m8+bN/OIXv+CRRx5xZfCOOAxMBwqBkUA6UA4sBqYkfBpvqVWtqU6t5rUGpRPUqlb/OV7YlZeXM378+Kav586dC0BBQQErV67kqaeeYt68ecycOZO///3vDB48mAULFvDNb36z81N3UG9gDPAosBc4DpwL3Ir94keTqFWtqU6t5rUGpRPUqlb/OV7YjRs3jobX37UqMzOTFStWON29q7oDCxtuplOrmdRqpqC0BqUT1GqqVGrVtWJFREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG6NCbJ0pKSqisrHTlgUtLS+19Au7sMXmVNvypVrOo1UxqNU9QOkGtpqruwH1DZ3pna9OdQqFLw+HwNrc/XDgMJM/HFXtLrWZSq5nUap6gdIJaTdXQmmtZVtmZ7tfeI3b18XicoqIiotFo56fDPvoXi8UoAtzZY/IqAWKgVsOo1UxqNU9QOkGtpqoEbrT/Wt/WfTt0KjYajZKTk+NoqJYaT+lGsS9Ea7LGQ8RqNYtazaRW8wSlE9QqevOEiIiIiDEcL+y2bNlCfn4+kUiEUChEcXFxs+8fOHCAWbNmEYlE+MQnPsHVV19NVVVVZ+d17H3gNiAL+xOkM4FJ/N+LL02iVrWmOrWa1xqUTlCrWv3l+JJidXV1jBo1isLCQqZOndrse5Zlce2119K1a1eeffZZzjrrLB555BEmTJjAn//8Z3r16tXpwTtqGvaJ6VXAEOAAsAH7wr6mUataU51azWsNSieoVa0+syyrzRv26Wtrx44dVmsAa82aNU1f79q1ywKsnTt3Nm07ceKE1b9/f+vxxx+3LMuyioqKLMDaAZbl8e0IWIC1KQGP1dqtqOHx1apWtao1iK1B6VSrWr267Wh4fCCnrTWbJ6+xO3bsGAA9evRo2hYOh+nevTsvv/yyFw95Rr0bbsXAsYQ/emKp1UxqNVNQWoPSCWo1VSq1erKwGzZsGFlZWcybN48jR45QX1/PokWLePvtt3n33Xe9eMgzSgNWYh8+PRu4HJgPvJHwSbynVrWmOrWa1xqUTlCrWv3nycKua9euPPPMM+zevZuMjAw+8YlPsHHjRvLy8giH/Xkj7jTgHWAtcDWwCfv88kpfpvGWWtWa6tRqXmtQOkGtavWZF6+xO1lNTY118OBBy7Is65JLLrG+9a1vJfw1dqe73QxWlmGvA1CrWtWq1lRpDUqnWtXa2Zvvr7E7WZ8+fejfvz9VVVWUl5czZcoUrx+y3YYDdX4PkSBqNZNazRSU1qB0glpNlYytjj/upLa2lj179jR9XV1dTUVFBRkZGWRlZfH000/Tv39/srKyePPNN/nOd77Dtddey8SJE10ZvCMOA9OBQmAkkA6UA4uB5FlmukOtak11ajWvNSidoFa1+s/xwq68vJzx48c3fT137lwACgoKWLlyJe+++y5z587lwIEDDBo0iK9+9avEYrHOT+xAb2AM8CiwFzgOnAvciv3iR5OoVa2pTq3mtQalE9Sq1iTgxmvsnEiG19gl6pYMrwNQq1rVqtagtgalU63m3pLqNXYiIiIikhha2ImIiIgYQgs7EREREUNoYSciIiJiiA69K7akpITKykpXHri0tNTeJ+DOHpNXacOfajWLWs2kVvMEpRPUaqrqDtw31PCu1zPfKRS6NBwOb4vH486nakUYcHePyUutZlKrmdRqnqB0glpN1dCaa1lW2Znu194jdvXxeJyioiKi0Wjnp8M++heLxSgC3Nlj8ioBYqBWw6jVTGo1T1A6Qa2mqgRutP9a39Z9O3QqNhqNkpOT42iolhpP6UaxPyTPZI2HiNVqFrWaSa3mCUonqFX05gkRERERY2hhJyIiImIIRwu7hQsXcvHFF5Oens6AAQO49tpr2bVrV7P7fPzxx8yePZu+ffvSu3dvpk2bxoEDB1wZ2on3gduALKA7kAlM4v/eVWMStao11anVvNagdIJa1eqvDr3GrtHmzZuZPXs2F198Mf/85z+ZP38+EydO5M9//jO9evUC4M477+T3v/89Tz/9NH369OH2229n6tSpTR9zkmjTsF9xuAoYAhwANgCHfZnGW2pVa6pTq3mtQekEtarVZ21dTLbh41ByAGvHjh1Waw4ePGgB1ubNmy3Lsqyamhqra9eu1tNPP910n8rKSguwtm3bZlmWZRUVFSXs4r1HGi6euykAFypWq1rVqtZkaw1Kp1rV6tVtR8PjAzltrdlceY3dBx98AEBGRgYAO3bs4Pjx40yYMKHpPsOGDSMrK4tt27a58ZAd0rvhVgwcS/ijJ5ZazaRWMwWlNSidoFZTpVJrpxd28XicOXPmcPnll/PZz34WgPfee49u3bpx9tlnN7vvwIEDee+99zr7kB2WBqzEPnx6NnA5MB94I+GTeE+tak11ajWvNSidoFa1+q/TC7vZs2ezc+dOnnrqKTfm8cw04B1gLXA1sAn7/PJK/0byjFrVmurUal5rUDpBrWr1WWdeYzd79mzrnHPOsf7yl780275hwwYLsI4cOdJse1ZWlvXII48k/DV2p7vdDFaWYa8DUKta1arWVGkNSqda1drZm+evsbMsi9tvv501a9bw0ksvkZ2d3ez7n//85+natSsbNmxo2rZr1y727dtHbm6uk4f0xHCgzu8hEkStZlKrmYLSGpROUKupkrHV0cedzJ49myeffJJnn32W9PT0ptfN9enTh549e9KnTx9uvvlm5s6dS0ZGBmeddRbf/va3yc3N5dJLL3U1oD0OA9OBQmAkkA6UA4uBKQmfxltqVWuqU6t5rUHpBLWq1X+OFnbLli0DYNy4cc22r1ixglmzZgHw6KOPEg6HmTZtGseOHWPSpEn813/9V6eGdao3MAZ4FNgLHAfOBW7FfvGjSdSq1lSnVvNag9IJalWr/xwt7Bped3dGPXr0YOnSpSxdutTJQ7iqO7Cw4WY6tZpJrWYKSmtQOkGtpkqlVl0rVkRERMQQWtiJiIiIGEILOxERERFDaGEnIiIiYogOvXmisrLStQeurq629+naHpNXdcOfajWLWs2kVvMEpRPUaqqONIba8w7XUCiUFQ6Hd8Xj8R7OxzpVGIi7ucMkplYzqdVMajVPUDpBraYKw8dxGGpZ1r4z3a9dCzuwF3dAPzeGO0k3oN7lfSYrtZpJrWZSq3mC0glqNdWhthZ10IGFnYiIiIgkN715QkRERMQQWtiJiIiIGEILOxERERFDaGEnIiIiYggt7EREREQMoYWdiIiIiCG0sBMRERExhBZ2IiIiIobQwk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImIILexEREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG0MJORERExBBa2ImIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIIbSwExERETGEFnYiIiIihtDCTkRERMQQWtiJiIiIGEILOxERERFDaGEnIiIiYggt7EREREQMoYWdiIiIiCG0sBMRERExhBZ2IiIiIobQwk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImIILexEREREDKGFnYiIiIghtLATERERMcT/B73X1paF7bdVAAAAAElFTkSuQmCC\n" - }, - "metadata": {} - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Test Best Policy on New Episodes" - ], - "metadata": { - "id": "NPbcI9qNF_F9" - } - }, - { - "cell_type": "code", - "source": [ - "env = BlackjackEnv()\n", - "NUM_EPISODES = 100000\n", - "\n", - "for _ in range(NUM_EPISODES):\n", - " state = env.reset()\n", - " while env.done == False:\n", - " if state[0] == 19:\n", - " # Player was dealt with a Blackjack\n", - " next_state, reward, env.done, info = env.step(1)\n", - " total_rewards += reward\n", - " else:\n", - " Q_index = get_Q_state_index(state)\n", - " action = new_Q_binary[Q_index]\n", - " new_state, reward, done, desc = env.step(action)\n", - " state = new_state\n", - " total_rewards += reward\n", - "avg_reward = total_rewards / NUM_EPISODES\n", - "print(f'Average Reward: {avg_reward}')" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "7sTNqbeZga_3", - "outputId": "97878c73-7419-41d4-d600-68bfd314b8f4" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Average Reward: -4.677\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [], - "metadata": { - "id": "WL-NmCiphQiq" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# Credits" - ], - "metadata": { - "id": "73y12ontGQRk" - } - }, - { - "cell_type": "markdown", - "source": [ - "The code is work of Mr. Adithya Solai.\n", - "
\n", - "The conversion from Python to Python Notebook is work of Mr. Sushant Nair.\n", - "
\n", - "The inspiration for making this Python Notebook as part of the RL Series is Mr. Terrence Ou\n", - "
\n", - "The code is further explained in the following Medium articles by Mr. Adithya Solai. GitHub Repo link is also provided.\n", - "
\n", - "1.\thttps://towardsdatascience.com/cracking-blackjack-part-1-31da28aeb4bb\n", - "2.\thttps://towardsdatascience.com/cracking-blackjack-part-2-75e32363e38\n", - "3.\thttps://towardsdatascience.com/cracking-blackjack-part-3-8fd3a5870efd\n", - "4.\thttps://towardsdatascience.com/cracking-blackjack-part-4-8b4a9caa38eb\n", - "5.\thttps://towardsdatascience.com/cracking-blackjack-part-5-70bd2f726133\n", - "\n", - "https://github.com/adithyasolai/Monte-Carlo-Blackjack/blob/master/MC_Blackjack_Full.ipynb\n" - ], - "metadata": { - "id": "BiGQjzloGSIX" - } - }, - { - "cell_type": "code", - "source": [], - "metadata": { - "id": "btiWK3iZHFi5" - }, - "execution_count": null, - "outputs": [] - } - ], - "metadata": { - "colab": { - "provenance": [], - "gpuType": "T4", - "include_colab_link": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python" - }, - "accelerator": "GPU" - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file From 38aa4c3d7b9257c57cc3336cd2f7c8cb13b972b7 Mon Sep 17 00:00:00 2001 From: Sushant Nair Date: Tue, 28 Jan 2025 23:52:32 +0530 Subject: [PATCH 8/8] Create python notebook. --- .../Monte_Carlo_Blackjack_Demonstration.ipynb | 1081 +++++++++++++++++ 1 file changed, 1081 insertions(+) create mode 100644 chapter_05_monte_carlo_methods/Monte_Carlo_Blackjack_Demonstration.ipynb diff --git a/chapter_05_monte_carlo_methods/Monte_Carlo_Blackjack_Demonstration.ipynb b/chapter_05_monte_carlo_methods/Monte_Carlo_Blackjack_Demonstration.ipynb new file mode 100644 index 0000000..8c4a24e --- /dev/null +++ b/chapter_05_monte_carlo_methods/Monte_Carlo_Blackjack_Demonstration.ipynb @@ -0,0 +1,1081 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Installing Dependencies" + ], + "metadata": { + "id": "ptQKksAbExoX" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JuDWA5loOsFM", + "outputId": "b9cdd35b-2703-4828-ad14-494bafe11476" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting gymnasium\n", + " Downloading gymnasium-0.29.1-py3-none-any.whl.metadata (10 kB)\n", + "Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (1.26.4)\n", + "Requirement already satisfied: cloudpickle>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (2.2.1)\n", + "Requirement already satisfied: typing-extensions>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (4.12.2)\n", + "Collecting farama-notifications>=0.0.1 (from gymnasium)\n", + " Downloading Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)\n", + "Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)\n", + "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/953.9 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[90m╺\u001b[0m\u001b[90m━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m501.8/953.9 kB\u001b[0m \u001b[31m14.6 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m953.9/953.9 kB\u001b[0m \u001b[31m17.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)\n", + "Installing collected packages: farama-notifications, gymnasium\n", + "Successfully installed farama-notifications-0.0.4 gymnasium-0.29.1\n" + ] + } + ], + "source": [ + "!pip install gymnasium" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Importing Dependencies" + ], + "metadata": { + "id": "q6ml-ZqBE1DP" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cbt-NEiMO1bA" + }, + "outputs": [], + "source": [ + "import random\n", + "import numpy as np\n", + "import gymnasium as gym\n", + "from gym import error, spaces, utils\n", + "from gym.utils import seeding\n", + "import enum\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import time" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Set up the game" + ], + "metadata": { + "id": "RSlZaAJ3E4YL" + } + }, + { + "cell_type": "code", + "source": [ + "# Define the rank and suit of a card\n", + "\n", + "ranks = {\n", + " \"two\": 2,\n", + " \"three\": 3,\n", + " \"four\": 4,\n", + " \"five\": 5,\n", + " \"six\": 6,\n", + " \"seven\": 7,\n", + " \"eight\": 8,\n", + " \"nine\": 9,\n", + " \"ten\": 10,\n", + " \"jack\": 10,\n", + " \"queen\": 10,\n", + " \"king\": 10,\n", + " \"ace\": (1, 11),\n", + "}\n", + "\n", + "class Suit(enum.Enum):\n", + " spades = \"spades\"\n", + " clubs = \"clubs\"\n", + " diamonds = \"diamonds\"\n", + " hearts = \"hearts\"\n", + "\n", + "# Now to define the card and deck\n", + "# Implement the shuffle, peek and deal functions for the deck\n", + "\n", + "class Card:\n", + " def __init__(self, suit, rank, value):\n", + " self.suit = suit\n", + " self.rank = rank\n", + " self.value = value\n", + "\n", + " def __str__(self):\n", + " return self.rank + \" of \" + self.suit.value\n", + "\n", + "class Deck:\n", + " def __init__(self, num=1):\n", + " self.cards = []\n", + " for i in range(num):\n", + " for suit in Suit:\n", + " for rank, value in ranks.items():\n", + " self.cards.append(Card(suit, rank, value))\n", + "\n", + " def shuffle(self):\n", + " random.shuffle(self.cards)\n", + "\n", + " def deal(self):\n", + " return self.cards.pop(0)\n", + "\n", + " def peek(self):\n", + " if len(self.cards) > 0:\n", + " return self.cards[0]\n", + "\n", + " def add_to_bottom(self, card):\n", + " self.cards.append(card)\n", + "\n", + " def __str__(self):\n", + " result = \"\"\n", + " for card in self.cards:\n", + " result += str(card) + \"\\n\"\n", + " return result\n", + "\n", + " def __len__(self):\n", + " return len(self.cards)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "D3df2N-JrKiE", + "outputId": "a36dc6fc-51b2-4fb0-bfb5-380e143cc4b4" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n", + " and should_run_async(code)\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Define the logic for evaluating the value of the player's hand" + ], + "metadata": { + "id": "jLdAUSMgFPIi" + } + }, + { + "cell_type": "code", + "source": [ + "'''\n", + "An ace is used as 11 whenever possible. It is 1 only if the other option would make the player bust.\n", + "'''\n", + "\n", + "def player_eval(player_hand):\n", + " num_ace = 0\n", + " # use_one means that every ace in the hand is counted as one\n", + " use_one = 0\n", + " for card in player_hand:\n", + " if card.rank == \"ace\":\n", + " num_ace += 1\n", + " use_one += card.value[0] # use 1 for Ace\n", + " else:\n", + " use_one += card.value\n", + "\n", + " if num_ace > 0:\n", + " # Define player policy for Aces:\n", + " # Make Aces 11 if they get you to the range [18, 21]\n", + " # Otherwise, use one.\n", + "\n", + " ace_counter = 0\n", + " while ace_counter < num_ace:\n", + " # Only add by 10 because 1 is already added before\n", + " use_eleven = use_one + 10\n", + " if use_eleven > 21:\n", + " return use_one\n", + " elif use_eleven >= 18 and use_eleven <= 21:\n", + " return use_eleven\n", + " else:\n", + " # This allows for some Aces to be 11 and others to be 1.\n", + " use_one = use_eleven\n", + " ace_counter += 1\n", + " return use_one\n", + " else:\n", + " return use_one" + ], + "metadata": { + "id": "SWrENBc-vEUL" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Define the logic for evaluating the value of the dealer's hand" + ], + "metadata": { + "id": "T9tDhTf0FWUV" + } + }, + { + "cell_type": "code", + "source": [ + "# This follows the same, official rules every time.\n", + "# Still there is a need to figure out what happens if there are multiple Aces.\n", + "def dealer_eval(player_hand):\n", + " num_ace = 0\n", + " use_one = 0\n", + " for card in player_hand:\n", + " if card.rank == \"ace\":\n", + " num_ace += 1\n", + " use_one += card.value[0] # use 1 for Ace\n", + " else:\n", + " use_one += card.value\n", + "\n", + " if num_ace > 0:\n", + " # See if using 11 instead of 1 for the Ace gets the dealer's hand value closer to the [17, 21] range.\n", + " # The dealer will follow Hard 17 rules. This means that the dealer will not hit again if the Ace yields a 17.\n", + " # This also means that Aces initially declared as 11 can be changed to 1 as new cards come.\n", + "\n", + " ace_counter = 0\n", + " while ace_counter < num_ace:\n", + " # Only add 10 because 1 is already added before\n", + " use_eleven = use_one + 10\n", + "\n", + " if use_eleven > 21:\n", + " return use_one\n", + " elif use_eleven >= 17 and use_eleven <= 21:\n", + " return use_eleven\n", + " else:\n", + " # The case where even using Ace as eleven is less than 17.\n", + " use_one = use_eleven\n", + " ace_counter += 1\n", + " return use_one\n", + " else:\n", + " return use_one" + ], + "metadata": { + "id": "YzDYaCCVx3AH" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Define logic for the dealer's turn" + ], + "metadata": { + "id": "ips4ASjUFaEg" + } + }, + { + "cell_type": "code", + "source": [ + "def dealer_turn(dealer_hand, deck):\n", + " # Calculate the dealer hand's value.\n", + " dealer_value = dealer_eval(dealer_hand)\n", + "\n", + " # Define dealer policy (which is fixed to the official rules)\n", + " # The dealer keeps hitting until their total is 17 or more\n", + " while dealer_value < 17:\n", + " dealer_hand.append(deck.deal())\n", + " dealer_value = dealer_eval(dealer_hand)\n", + "\n", + " return dealer_value, dealer_hand, deck" + ], + "metadata": { + "id": "YetM4Xsixlrf" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Define the OpenAI Gym Environment for Blackjack" + ], + "metadata": { + "id": "jUQRNbyAFd9-" + } + }, + { + "cell_type": "code", + "source": [ + "INITIAL_BALANCE = 1000\n", + "NUM_DECKS = 6\n", + "\n", + "class BlackjackEnv(gym.Env):\n", + " metadata = {'render.modes': ['human']}\n", + "\n", + " def __init__(self):\n", + " super(BlackjackEnv, self).__init__()\n", + "\n", + " # Initialize the blackjack deck\n", + " self.bj_deck = Deck(NUM_DECKS)\n", + "\n", + " self.player_hand = []\n", + " self.dealer_hand = []\n", + "\n", + " self.reward_options = {\"lose\": -100, \"tie\": 0, \"win\": 100}\n", + "\n", + " self.action_space = spaces.Discrete(2)\n", + "\n", + " '''\n", + " First element of tuple is the range of possible hand values for the player (3 through 20). This is the possible range of values that the player will actually have to make a decision for. Any player hand value 21 or above already has automatic valuations, and needs no input from an AI agent.\n", + " However, we also need to add all the hand values that the agent could possibly end up in when they bust. Maybe the agent can glean some correlations based on what hand value they bust at, so this should be in the observation space. Also, the layout of OpenAI Gym environment class makes us have to include the bust-value in the step() function because we need to return that done is true alongside the final obs, which is the bust-value.\n", + " '''\n", + " # Second element of the tuple is the range of possible values for the dealer's upcard (2 through 11)\n", + " self.observation_space = spaces.Tuple((spaces.Discrete(18), spaces.Discrete(10)))\n", + " self.done = False\n", + "\n", + " def _take_action(self, action):\n", + " if action == 0: #hit\n", + " self.player_hand.append(self.bj_deck.deal())\n", + " # recalculate the value of the player's hand after any changes to the hand\n", + " self.player_value = player_eval(self.player_hand)\n", + "\n", + " def step(self, action):\n", + " self._take_action(action)\n", + "\n", + " # End the episode/game if the player stands or has a hand value >= 21.\n", + " self.done = action == 1 or self.player_value >= 21\n", + "\n", + " # rewards are 0 when the player hits and is still below 21, and they keep playing\n", + " rewards = 0\n", + "\n", + " if self.done:\n", + " # Calculate rewards\n", + " if self.player_value > 21:\n", + " rewards = self.reward_options['lose']\n", + " elif self.player_value == 21:\n", + " rewards = self.reward_options['win']\n", + " else:\n", + " # begin the dealer turn phase\n", + " dealer_value, self.dealer_hand, self.bj_deck = dealer_turn(self.dealer_hand, self.bj_deck)\n", + " # End the dealer turn phase\n", + "\n", + " # Final comparison\n", + " if dealer_value > 21:\n", + " rewards = self.reward_options['win']\n", + " elif dealer_value == 21:\n", + " rewards = self.reward_options['lose']\n", + " else:\n", + " # both dealer and player value is less than 21\n", + " if self.player_value > dealer_value:\n", + " rewards = self.reward_options['win']\n", + " elif self.player_value < dealer_value:\n", + " rewards = self.reward_options['lose']\n", + " else:\n", + " rewards = self.reward_options['tie']\n", + " self.balance += rewards\n", + " # Subtract by 1 to fit into the possible observation range.\n", + " # This makes the possible range of 3 through 20 into 1 through 18.\n", + " player_value_obs = self.player_value - 2\n", + " # Get the value of the dealer's upcard; this value is what the agent sees.\n", + " # Subtract by 1 to fit into the possible observation range of 1 to 10\n", + " upcard_value_obs = dealer_eval([self.dealer_upcard]) - 1\n", + " # The state is represented as a player hand-value + dealer upcard pair.\n", + " obs = np.array([player_value_obs, upcard_value_obs])\n", + " return obs, rewards, self.done, {}\n", + "\n", + " def reset(self):\n", + " # reset the game to an initial state\n", + " # add the player and dealer cards back into the deck\n", + " self.bj_deck.cards += self.player_hand + self.dealer_hand\n", + "\n", + " # shuffle before beginning. Only shuffle once before the start of each game.\n", + " self.bj_deck.shuffle()\n", + " self.balance = INITIAL_BALANCE\n", + " self.done = False\n", + "\n", + " # returns the start stage for the agent\n", + " # deal 2 cards to the agent and the dealer\n", + " self.player_hand = [self.bj_deck.deal(), self.bj_deck.deal()]\n", + " self.dealer_hand = [self.bj_deck.deal(), self.bj_deck.deal()]\n", + " self.dealer_upcard = self.dealer_hand[0]\n", + "\n", + " # calculate the value of the agent's hand\n", + " self.player_value = player_eval(self.player_hand)\n", + "\n", + " # subtract by 2 to fit into the possible observation range. This makes the possible range of 3 through 20 into 1 through 18\n", + " player_value_obs = self.player_value - 2\n", + "\n", + " # get the value of the dealer's upcard; this value is what the agent sees. Subtract by 1 to fit the possible observation range of 1 to 10.\n", + " upcard_value_obs = dealer_eval([self.dealer_upcard]) - 1\n", + "\n", + " # the state is represented as a player hand-value + dealer upcard pair.\n", + " obs = np.array([player_value_obs, upcard_value_obs])\n", + " return obs\n", + "\n", + " def render(self, mode='human', close=False):\n", + " # convert the player hand into a format that is easy to read and understand.\n", + " hand_list = []\n", + " for card in self.player_hand:\n", + " hand_list.append(card.rank)\n", + "\n", + " # recalculate the value of the dealer upcard.\n", + " upcard_value = dealer_eval([self.dealer_upcard])\n", + "\n", + " print(f'Balance: {self.balance}')\n", + " print(f'Player Hand: {hand_list}')\n", + " print(f'Dealer Upcard: {upcard_value}')\n", + " print(f'Done: {self.done}')" + ], + "metadata": { + "id": "am91D0LgqNnf" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Test the OpenAI Gym Blackjack Environment" + ], + "metadata": { + "id": "BHiry8g3FiEL" + } + }, + { + "cell_type": "code", + "source": [ + "env = BlackjackEnv()\n", + "\n", + "total_rewards = 0\n", + "NUM_EPISODES = 1000\n", + "\n", + "for _ in range(NUM_EPISODES):\n", + " env.reset()\n", + " episode_reward = 0\n", + " while env.done == False:\n", + " action = env.action_space.sample()\n", + " new_state, reward, done, desc = env.step(action)\n", + " episode_reward += reward\n", + " total_rewards += episode_reward\n", + "\n", + "avg_reward = total_rewards / NUM_EPISODES\n", + "print(f'Average Reward: {avg_reward}')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ttzs7UQb2qCP", + "outputId": "94c0576f-abdf-49ff-eead-38312653e263" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Average Reward: -32.2\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# A function to map a state from the Blackjack environment to the proper index i Q and prob tables" + ], + "metadata": { + "id": "f3Oo6Kz4FldW" + } + }, + { + "cell_type": "code", + "source": [ + "'''\n", + "Given a state, derive the corresponding index in the Q-table. This state is a player hand value and dealer upcard pair, so the hashing formula must be used to allocate the indices of the Q-table properly.\n", + "'''\n", + "def get_Q_state_index(state):\n", + " '''\n", + " The player value is already subtracted by 1 in the env when it returns the state. Subtract by 1 again to fir with the array indexing that starts at 0.\n", + " '''\n", + " initial_player_value = state[0] - 1\n", + " '''\n", + " The upcard value is already subtracted by 1 in the env when it returns the state. dealer_upcard will be subtracted by 1 to fit with the array indexing that starts at 0.\n", + " '''\n", + " dealer_upcard = state[1] - 1\n", + " return(env.observation_space[1].n * (initial_player_value)) + (dealer_upcard)" + ], + "metadata": { + "id": "etl-JD8lHlJB" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Functions to get and update the probability of taking the best action" + ], + "metadata": { + "id": "TjRTbwhdFo5x" + } + }, + { + "cell_type": "code", + "source": [ + "# for a given state\n", + "def get_prob_of_best_action(env, state, Q, prob):\n", + " # Use the mappig function to figure out which index of Q corresponds to the player hand value and dealer upcard value that defines each state.\n", + " Q_state_index = get_Q_state_index(state)\n", + "\n", + " # Use this index in the Q 2-D array to get a 2-element array that yield the current Q-values for hitting (index 0) and standing (index 1) in this state. Use np.argmax() function to find the index of the action that yields the maximum rewards, i.e., the best action we are looking for.\n", + " best_action = np.argmax(Q[Q_state_index])\n", + "\n", + " # Retrieve the probability of the best action using the state/action pair as indices of the 'prob' array, which stores the probability of taking an action (hit or stand) for a given state/action pair.\n", + " return prob[Q_state_index][best_action]\n", + "\n", + "def update_prob_of_best_action(env, state, Q, prob, epsilon):\n", + " Q_state_index = get_Q_state_index(state)\n", + "\n", + " best_action = np.argmax(Q[Q_state_index])\n", + "\n", + " '''\n", + " Slightly alter the probability of this best action being taken by using epsilon. Epsilon starts at 1.0, and slowly decays over time. Therefore, as per the equation below, the AI agent will use the probability listed for the best action in the 'prob' table during the beginning of the algorithm. As time goes on, the likelihood that the best action is taken is increased from what is listed in the 'prob' table.\n", + " This allows for the exploration of other moves in the beginning of the algorithm, but exploitation later for a greater reward.\n", + " '''\n", + " prob[Q_state_index][best_action] = min(1, prob[Q_state_index][best_action] + 1 - epsilon)\n", + "\n", + " other_action = 1 if best_action == 0 else 0\n", + " prob[Q_state_index][other_action] = 1 - prob[Q_state_index][best_action]\n", + "\n", + " return prob" + ], + "metadata": { + "id": "qazyxoCbYLhz" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# play_game() function" + ], + "metadata": { + "id": "D89gCgidFruP" + } + }, + { + "cell_type": "code", + "source": [ + "'''\n", + "Simulates one round of Blackjack.\n", + "\n", + "First, use env.reset() to set up a new round of Blackjack.\n", + "\n", + "If the player is already dealt a blackjack by luck, then this episode is just scrapped. There is nothing the AI agent can learn from this episode since it did not have to make any decisions.\n", + "\n", + "Otherwise, the AI agent finds the best action available given the current state of the game and its knowledge so far. This knowledge is stored in the Q-table defined in the run_mc() function. Then, the AI agent retrieves the probability that it should take this action from the prob table. The AI agent applies this probability and chooses its action given this state. The action is sent to the environment, and a reward is returned. This state-action-reward sequence that just occured is stored in episode variable as a tuple. This process is repeated until the current episode is over. Each episode will roughly rield 1-3 tuples in episode variable since Blackjack rounds are usually resolved after 1-3 decisions by the player.\n", + "\n", + "The Q-values in the Q-table associated with each state-action pair that was seen in this episode will be updated after this episode based on the state-action-reward tuples returned by this function. Then, the corresponding probabilities in prob variable are also modified to reflect this change in Q-values.\n", + "'''\n", + "\n", + "def play_game(env, Q, prob):\n", + " '''\n", + " Can contain numerous state->action->reward tuples bacause a round of Blackjack is not always resolved in one turn. However, there will be no state that has a player hand value that exceeds 20, since only initial states BEFORE actions are made are used when storing state->action->reward tuples.\n", + " '''\n", + " episode = []\n", + " state = env.reset()\n", + " while env.done == False:\n", + " if state[0] == 19:\n", + " # Player was dealt blackjack right from the beginning; player_value already subtracted by 2 to get state[0]\n", + " # No analysis done for this useless episode\n", + " next_state, reward, env.done, info = env.step(1)\n", + " else:\n", + " # Get the index in Q that corresponds to the current state\n", + " Q_state_index = get_Q_state_index(state)\n", + "\n", + " # Use the index to get the possible actions, and use np.argmax() to get the index of the action that has the highest current Q value. Index 0 is hit, 1 is stand.\n", + " best_action = np.argmax(Q[Q_state_index])\n", + "\n", + " # Go to the prob table to retrieve the probability of this action. This uses the same Q_state_index used for finding the state index of the Q-table.\n", + " prob_of_best_action = get_prob_of_best_action(env, state, Q, prob)\n", + "\n", + " action_to_take = None\n", + "\n", + " if random.uniform(0, 1) < prob_of_best_action:\n", + " # Take the best action\n", + " action_to_take = best_action\n", + " else:\n", + " # Take the other action\n", + " action_to_take = 1 if best_action == 0 else 0\n", + "\n", + " # The action is performed by the agent, and the next state, rewards and done information is returned.\n", + " next_state, reward, env.done, info = env.step(action_to_take)\n", + "\n", + " # Log the state->action->reward sequence\n", + " episode.append((state, action_to_take, reward))\n", + "\n", + " # Update the state for the next decision made by the agent.\n", + " state = next_state\n", + "\n", + " return episode" + ], + "metadata": { + "id": "HmFb1gcWFFcm" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# update_Q() function" + ], + "metadata": { + "id": "MoKz3QUBFuZl" + } + }, + { + "cell_type": "code", + "source": [ + "\n", + "'''\n", + "This function iterates through the state-action-reward tuples in episode and updates the Q-values of the corresponding state-action pairs in Q.\n", + "'''\n", + "\n", + "def update_Q(env, episode, Q, alpha, gamma):\n", + " '''\n", + " This is where the algorithm hinges on being first visit or every visit. First visit will be used because if you want first-visit, you need to use the cumulative reward of the entire episode when updating a Q-value for ALL of the state/action pairs in the episode, even the first state/action pair. In this algorithm, an episode is a round of Blackjack. Although the bulk of the reward may come from the 2nd or 3rd decision, deciding to hit on the 1st decision is what enabled the future situations to even occur, so it is important to include the entire cumulative reward. We can reduce the impact of the rewards of the future decisions by lowering gamma, which will lower the G value for our early state/action pair in which we hit and did not get any immediate rewards. This will make our agent consider future rewards, and not just look at each state in isolation despite having hit previously.\n", + " If you want Every-Visit MC, do not use the cumulative rewards when updating Q-values and just use the immediate reward in this episode for each state/action pair.\n", + " '''\n", + " step = 0\n", + " for state, action, reward in episode:\n", + " # calculate the cumulative reward of taking this action in this state.\n", + " # Start from the immediate rewards, and use all the rewards from the subsequent states.\n", + " # Do not use rewards from previous states\n", + "\n", + " total_reward = 0\n", + " gamma_exp = 0\n", + " for curr_step in range(step, len(episode)):\n", + " curr_reward = episode[curr_step][2]\n", + " total_reward += (gamma ** gamma_exp) * curr_reward\n", + " gamma_exp += 1\n", + "\n", + " # Update the Q-value\n", + " Q_state_index = get_Q_state_index(state)\n", + " curr_Q_value = Q[Q_state_index][action]\n", + " Q[Q_state_index][action] = curr_Q_value + alpha * (total_reward - curr_Q_value)\n", + " # update step to start further down the episode the next time.\n", + " step += 1\n", + " return Q" + ], + "metadata": { + "id": "R08jdGkVbQD0" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# update_prob() function" + ], + "metadata": { + "id": "BcUOuFW2Fwm4" + } + }, + { + "cell_type": "code", + "source": [ + "\n", + "def update_prob(env, episode, Q, prob, epsilon):\n", + " for state, action, reward in episode:\n", + " '''\n", + " Update the probabilities of the actions that can be taken given the current state. The goal is that the new update in Q has changed what the best action is, and epsilon will be used to create a small increase in the probability that the new, better action is chosen.\n", + " '''\n", + " prob = update_prob_of_best_action(env, state, Q, prob, epsilon)\n", + " return prob" + ], + "metadata": { + "id": "4UA7u4nWdRFL" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# First-Visit Monte Carlo Algorithm" + ], + "metadata": { + "id": "rR5AeirSFy5h" + } + }, + { + "cell_type": "code", + "source": [ + "# run_mc() to run the First Visit Monte Carlo Algorithm\n", + "'''\n", + "This function initializes the key variables for the algorithm and runs the learning algorithm for the AI agent for num_episodes episodes. An episode is a simulation of one game of Blackjack using the OpenAI Gym environment defined above.\n", + "Q, or the Q-table, is a 2D list in which the rows are the different possible states and the columns are the different possible actions for each state. The values stored in each cell refer to the value, or Q-value, of selecting some action (the column index) given some state (the row index). These Q-values start at 0 for every state-action pair, and are updated by the update_Q() function after each episode to reflect the rewards received in an episode where the state-action pair occured. All Q-values initially start at 0.\n", + "\n", + "prob, or the prob table, has the same structure as Q, but the cell values refer to the probability of selecting some action (the column index) given some state (the row index). These probabilites are updated by the update_prob() function after the Q-values are updated after each episode. All action probabilities start at 0.5 (or 50%).\n", + "\n", + "alpha defines the weight given to each new change in Q-value within the update-Q() function. A smaller alpha means that a new reward logged in an episode for some state-action pair has less impact on the current Q-value for that state-action pair. The converse is also true. Therefore, alpha essentially defines how fast the AI agent learns (learning rate).\n", + "\n", + "epsilon defines the weight given to each new change in the action probabilities within the update_prob() function. A larger epsilon reduces the precent amount by which an action probability is changed after some change in Q-values. The converse is also true. An epsilon of 1 means that no change will occur in the action probability, regardless of the magnitude of changes in Q-values.\n", + "\n", + "epsilon is decayed by the decay value after every episode. The lowest value epsilon can reach is epsilon_min.\n", + "\n", + "gamma is the rate used to discount future rewards yielded by a certain state-action pair in the episode. Since a round (or episode) of Blackjack can have more than 1 decision made, there can be numerous state-action pairs that are seen in one episode. However, only the final decision (or state-action pair) yields an immediate reward from the environment. All previous state-action pairs had no rewards. So, the final reward must be used to modify the Q-values of the earlier state-action pairs. Since the final reward was only partially made possible by the eariler state-action pairs, the final reward is discounted using gamma to account for this.\n", + "\n", + "Once these important variables are defines, this function runs the First-Visit Monte Carlo algorithm for the Blackjack environment.\n", + "\n", + "The function runs num_episodes episodes.\n", + "\n", + "In each episode, epsilon is first decayed by the decay rate.\n", + "\n", + "Then, the game of Blackjack is played out throught the play_game() function. This function returns a list of the state-action-reward tuples that occured during the game. These tuples represent the actions the AI agent had to take given some state, and the rewards that resulted.\n", + "\n", + "These tuples are used to modify the Q-values in Q through the update_Q() function.\n", + "\n", + "Then, the tuples are used to modify the probability distributions in prob of the two actions (hit or stand) for any states that were encountered in the episode. This is done through the update_prob() function.\n", + "\n", + "After this process is done for each episode, the function returns the modified Q and prob tables. These tables are an imprint of the learning that has taken place by the AI agent through the First-Visit Monte Carlo algorithm.\n", + "'''\n", + "\n", + "def run_mc(env, num_episodes):\n", + " '''\n", + " observation_space[0] is the 18 possible player values. (3-20)\n", + " observation_space[1] is the 10 possible dealer upcards. (2-11)\n", + " Combining these togenther yields all possible states.\n", + " Multiplying this with hit/stand yields all possible state-action pairs.\n", + " This is the Q map.\n", + " '''\n", + " Q = np.zeros([env.observation_space[0].n * env.observation_space[1].n, env.action_space.n], dtype=np.float16)\n", + "\n", + " # This map contains the probability distributions for each action (hit or stand) given a state. The state (combination of player hand value and dealer upcard value) index in this array yields a 2-element array. The 0th index of this 2-element array refers to the probability of hit and the 1st index is the probability of stand.\n", + " prob = np.zeros([env.observation_space[0].n * env.observation_space[1].n, env.action_space.n], dtype=np.float16) + 0.5\n", + "\n", + " # The learning rate; very small to avoid making quick, large changes in our policy.\n", + " alpha = 0.001\n", + "\n", + " epsilon = 1\n", + "\n", + " # The rate by which epsilon will decay over time.\n", + " # Since the probability that decides which option has the highest Q-value is 1 - epsilon + probability, this decay will make sure that the better option is taken more often in the long run.\n", + " # This allows the algorithm to explore in the early stages and exploit in the later stages.\n", + " decay = 0.9999\n", + "\n", + " # The lowest value that epsilon can go to. Although the decay seems slow, it grows exponentially, and this is magnified when running thousands of episodes.\n", + " epsilon_min = 0.9\n", + "\n", + " gamma = 0.8\n", + "\n", + " for _ in range(num_episodes):\n", + " episode = play_game(env, Q, prob)\n", + "\n", + " epsilon = max(epsilon * decay, epsilon_min)\n", + "\n", + " Q = update_Q(env, episode, Q, alpha, gamma)\n", + "\n", + " prob = update_prob(env, episode, Q, prob, epsilon)\n", + "\n", + " return Q, prob" + ], + "metadata": { + "id": "PnRTa7FZ3RnJ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Run First-Visit Monte Carlo Reinforcement Learning Algorithm" + ], + "metadata": { + "id": "jdfLT2OvF1sV" + } + }, + { + "cell_type": "code", + "source": [ + "env = BlackjackEnv()\n", + "\n", + "start_time = time.time()\n", + "new_Q, new_prob = run_mc(env, 1000000)\n", + "end_time = time.time()\n", + "\n", + "print(f'Total time for learning: {end_time - start_time} s.')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "UKx3psLBd3Yl", + "outputId": "2181c312-d684-4525-bb48-ed723de63333" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Total time for learning: 230.13713669776917 s.\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# best_policy() function" + ], + "metadata": { + "id": "qICTdADBF4Eu" + } + }, + { + "cell_type": "code", + "source": [ + "'''\n", + "This function takes the new Q-table returned and keeps only the action that yields the highest value for a given state. This is represented in three different ways: binary, string and colors.\n", + "0 is hit and 1 is stand in the binary representation of the results.\n", + "H is hit and S is stand in the string representation of the results.\n", + "Green is hit and Red is stand in the color representation of the results.\n", + "'''\n", + "def best_policy(Q):\n", + " best_policy_binary = []\n", + " best_policy_string = []\n", + " best_policy_colors = []\n", + " for i in range(len(Q)):\n", + " best_policy_binary.append(np.argmax(Q[i]))\n", + " best_policy_string.append('H' if np.argmax(Q[i]) == 0 else 'S')\n", + " best_policy_colors.append('g' if np.argmax(Q[i]) == 0 else 'r')\n", + "\n", + " return best_policy_binary, best_policy_string, best_policy_colors" + ], + "metadata": { + "id": "MNbxnlFKeRgQ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Create DataFrame with Player Value as Rows and Dealer Upcard as Columns" + ], + "metadata": { + "id": "89PKdgPiF6Qx" + } + }, + { + "cell_type": "code", + "source": [ + "new_Q_binary, new_Q_string, new_Q_colors = best_policy(new_Q)\n", + "\n", + "df = pd.DataFrame(columns = range(2, 12))\n", + "\n", + "color_df = pd.DataFrame(columns = range(2, 12))\n", + "\n", + "for s in range(3, 21):\n", + " # possible player values in the range 3 to 20\n", + " start = env.observation_space[1].n * (s - 3)\n", + " end = start + 10\n", + " df.loc[s] = (new_Q_string[start:end])\n", + " color_df.loc[s] = (new_Q_colors[start:end])" + ], + "metadata": { + "id": "lZIV2WxWfR9z" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Create Chart Graphic for the Results" + ], + "metadata": { + "id": "Lh14aJ8jF8s-" + } + }, + { + "cell_type": "code", + "source": [ + "fig, ax = plt.subplots()\n", + "\n", + "# hide axes\n", + "fig.patch.set_visible(False)\n", + "ax.set_axis_off()\n", + "ax.axis('tight')\n", + "\n", + "ax.table(cellText=df.values, cellColours=color_df.values, cellLoc='center', rowLabels=df.index, colLabels=df.columns, loc='center')\n", + "\n", + "fig.tight_layout()\n", + "\n", + "plt.show()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 487 + }, + "id": "wR8JVHEef5Cf", + "outputId": "02c77629-9a81-4150-9561-c0255ad152ea" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAnYAAAHWCAYAAAD6oMSKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABM9UlEQVR4nO3de3xU9Z3/8dcM4VaI2HALUw0GbWGgQklVjNoCFcEsjSgsFhdtaNS2FmuRrW3ht+MdEfZRpRcWqnWBNhUftZVIbZBV5GYkSrCp0mYh0FQqKggl0kQllDm/P06SJSGQ5OScOTPf834+HvOAnMye+bx2Evz2nJk5IcuyEBEREZHUF/Z7ABERERFxhxZ2IiIiIobQwk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImIILexEREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG0MJORERExBBa2ImIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIIbSwExERETGEFnYiIiIihtDCTkRERMQQWtiJiIiIGEILOxERERFDaGEnIiIiYggt7EREREQMoYWdiIiIiCG0sBMRERExhBZ2IiIiIobQwk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImIILexEREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG0MJORERExBBa2ImIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIIdL8fPBQKJQF9PNzhgTqBtT7PUSCqNVMajVTUFqD0glqNdUhy7L2tXUn3xZ2oVAoKxwO74rH4z38miGRwuEw8Xjc7zESQq1mUquZgtIalE5Qq6nC4fDHoVBoaFuLOz+P2PWLx+M9ioqKiEajPo7hvZKSEmKxGGo1i1rNpFbzBKUT1GqqyspKbrzxxh7YZzmTdmEHQDQaJScnx+8xPFVZWQmo1TRqNZNazROUTlCr6M0TIiIiIsYIxMJu4cKFXHzxxaSnpzNgwACuvfZadu3a5fdYnli2bBkjR47krLPO4qyzziI3N5d169b5PVZCPPzww4RCIebMmeP3KK679957CYVCzW7Dhg3zeyzP7N+/nxtvvJG+ffvSs2dPLrzwQsrLy/0ey3XnnXfeKc9rKBRi9uzZfo/muhMnThCLxcjOzqZnz56cf/75PPDAA1iW5fdorvvHP/7BnDlzGDx4MD179uSyyy5j+/btfo/VaVu2bCE/P59IJEIoFKK4uLjZ9y3L4u6772bQoEH07NmTCRMmUFVV5c+wndRW6zPPPMPEiRPp27cvoVCIiooKX+ZsTSAWdps3b2b27NmUlZXxwgsvcPz4cSZOnEhdXZ3fo7nunHPO4eGHH2bHjh2Ul5fzpS99iSlTpvCnP/3J79E8tX37dn72s58xcuRIv0fxzIgRI3j33Xebbi+//LLfI3niyJEjXH755XTt2pV169bx5z//mR/+8Id88pOf9Hs0123fvr3Zc/rCCy8AMH36dJ8nc9+iRYtYtmwZP/3pT6msrGTRokUsXryYn/zkJ36P5rpbbrmFF154gV/+8pe8+eabTJw4kQkTJrB//36/R+uUuro6Ro0axdKlS1v9/uLFi/nxj3/M8uXLefXVV+nVqxeTJk3i448/TvCknddWa11dHVdccQWLFi1K8GRt8/01donw/PPPN/t65cqVDBgwgB07dvDFL37Rp6m8kZ+f3+zrBQsWsGzZMsrKyhgxYoRPU3mrtraWmTNn8vjjj/Pggw/6PY5n0tLSyMzM9HsMzy1atIhzzz2XFStWNG3Lzs72cSLv9O/fv9nXDz/8MOeffz5jx471aSLvvPLKK0yZMoXJkycD9tHK1atX89prr/k8mbs++ugjfvvb3/Lss882/ffl3nvv5Xe/+x3Lli1L6X+j8vLyyMvLa/V7lmWxZMkS/uM//oMpU6YA8Itf/IKBAwdSXFzMjBkzEjlqp52pFeCmm24C4K9//WuCJmq/QByxa+mDDz4AICMjw+dJvHXixAmeeuop6urqyM3N9Xscz8yePZvJkyczYcIEv0fxVFVVFZFIhCFDhjBz5kz27Wvz44xS0tq1a7nooouYPn06AwYMYPTo0Tz++ON+j+W5+vp6ioqKKCwsJBQK+T2O6y677DI2bNjA7t27AfjjH//Iyy+/fMb/eKaif/7zn5w4cYIePZp/klfPnj2NPcoOUF1dzXvvvdfs3+E+ffowZswYtm3b5uNkwROII3Yni8fjzJkzh8svv5zPfvazfo/jiTfffJPc3Fw+/vhjevfuzZo1axg+fLjfY3niqaee4vXXXzfi9StnMmbMGFauXMnQoUN59913ue+++/jCF77Azp07SU9P93s8V/3lL39h2bJlzJ07l/nz57N9+3buuOMOunXrRkFBgd/jeaa4uJiamhpmzZrl9yie+MEPfsDRo0cZNmwYXbp04cSJEyxYsICZM2f6PZqr0tPTyc3N5YEHHiAajTJw4EBWr17Ntm3buOCCC/wezzPvvfceAAMHDmy2feDAgU3fk8QI3MJu9uzZ7Ny50+j/5TR06FAqKir44IMP+M1vfkNBQQGbN282bnH3t7/9je985zu88MILp/yvY9OcfFRj5MiRjBkzhsGDB/PrX/+am2++2cfJ3BePx7nooot46KGHABg9ejQ7d+5k+fLlRi/snnjiCfLy8ohEIn6P4olf//rX/OpXv+LJJ59kxIgRVFRUMGfOHCKRiHHP6y9/+UsKCwv51Kc+RZcuXcjJyeGGG25gx44dfo8mARCoU7G33347zz33HBs3buScc87xexzPdOvWjQsuuIDPf/7zLFy4kFGjRvGjH/3I77Fct2PHDg4ePEhOTg5paWmkpaWxefNmfvzjH5OWlsaJEyf8HtEzZ599Np/5zGfYs2eP36O4btCgQaf8j5BoNGrsqWeAt956ixdffJFbbrnF71E8c9ddd/GDH/yAGTNmcOGFF3LTTTdx5513snDhQr9Hc93555/P5s2bqa2t5W9/+xuvvfYax48fZ8iQIX6P5pnG1/8eOHCg2fYDBw4E4rXBySQQCzvLsrj99ttZs2YNL730krEvxD6deDzOsWPH/B7DdVdeeSVvvvkmFRUVTbeLLrqImTNnUlFRQZcuXfwe0TO1tbXs3buXQYMG+T2K6y6//PJTPo5o9+7dDB482KeJvLdixQoGDBjQ9MYCE3344YeEw83/k9OlSxejLwfVq1cvBg0axJEjR1i/fn3TmwpMlJ2dTWZmJhs2bGjadvToUV599VWjX+OdjAJxKnb27Nk8+eSTPPvss6Snpzed7+/Tpw89e/b0eTp3zZs3j7y8PLKysvjHP/7Bk08+yaZNm1i/fr3fo7kuPT39lNdJ9urVi759+xr3+snvfve75OfnM3jwYN555x3uueceunTpwg033OD3aK678847ueyyy3jooYe4/vrree2113jsscd47LHH/B7NE/F4nBUrVlBQUEBamrn/JOfn57NgwQKysrIYMWIEf/jDH3jkkUcoLCz0ezTXrV+/HsuyGDp0KHv27OGuu+5i2LBhfO1rX/N7tE6pra1tdpagurqaiooKMjIyyMrKYs6cOTz44IN8+tOfJjs7m1gsRiQS4dprr/VvaIfaav373//Ovn37eOeddwCa/sdoZmam70cozf1X5CTLli0DYNy4cc22r1ixwrgXKh88eJCvfvWrvPvuu/Tp04eRI0eyfv16rrrqKr9Hk054++23ueGGGzh8+DD9+/fniiuuoKys7JSPyzDBxRdfzJo1a5g3bx73338/2dnZLFmyxLgX2Td68cUX2bdvn5ELnJP95Cc/IRaL8a1vfYuDBw8SiUT4xje+wd133+33aK774IMPmDdvHm+//TYZGRlMmzaNBQsW0LVrV79H65Ty8nLGjx/f9PXcuXMBKCgoYOXKlXzve9+jrq6Or3/969TU1HDFFVfw/PPPp+RroNtqXbt2bbOFeuPHudxzzz3ce++9CZ21pUAs7Ez8ZPPTeeKJJ/wewVebNm3yewRPPPXUU36PkFBf/vKX+fKXv+z3GAkxceLEQPwblZ6ezpIlS1iyZInfo3ju+uuv5/rrr/d7DNeNGzfujD+roVCI+++/n/vvvz+BU3mjrdZZs2Yl7YGhQLzGTkRERCQItLATERERMYQWdiIiIiKG0MJORERExBBa2ImIiIgYot3vig2FQllAPxcfexhASUkJlZWVLu42+ZSWlgJqNY1azaRW8wSlE9Rqqurq6nbfN9Set9mHQqGscDi8Kx6Pu/thNCHA/Hf529RqJrWaSa3mCUonqNVUdmuuZVllZ7pbe4/Y9YvH4z2KioqIRqOdHw57hR2LxWAq7h4HTEZVwEbUahq1mkmt5glKJ6jVVIeAZwCob+uuHfqA4mg0Sk5OjrOhWmg6bNoPiLiyy+R1qOFPtZpFrWZSq3mC0glqFb15QkRERMQUjhZ2y5YtY+TIkZx11lmcddZZ5Obmsm7dOrdnc88aYHUr26uBe4GPEjqNt9Sq1lQWlE5QK6g11ak1KVsdLezOOeccHn74YXbs2EF5eTlf+tKXmDJlCn/605/cnk9ERERE2qlDr7FrlJ+f3+zrBQsWsGzZMsrKyhgxYoQrg4mIiIhIxzha2J3sxIkTPP3009TV1ZGbm+vGTCIiIiLigOOF3Ztvvklubi4ff/wxvXv3Zs2aNQwfPtzN2dy1G1jQYpupn32jVjMFpTUonaBWtaY+tSYdxwu7oUOHUlFRwQcffMBvfvMbCgoK2Lx5c/Iu7rKByS227afxc2HMola1prKgdIJa1Zr61Jp0rY4Xdt26deOCCy4A4POf/zzbt2/nRz/6ET/72c9cG85VXYG+LbYd9WOQBFCrmYLSGpROUKtaU59ak45rn2MXj8c5duyYW7sTERERkQ5ydMRu3rx55OXlkZWVxT/+8Q+efPJJNm3axPr1692eT0RERETaydHC7uDBg3z1q1/l3XffpU+fPowcOZL169dz1VVXuT2fiIiIiLSTo4XdE0884fYc3rruNNuzsT8x2iRqVWsqC0onqBXUmurUmpStulasiIiIiCG0sBMRERExhBZ2IiIiIobQwk5ERETEEB1680RJSQmVlZWuPHBpaan9lyrgkCu7TF77Gv5Uq1nUaia1miconaBWUx1p/11DltX2hc5CodCl4XB4Wzwe78RUpwoD7u4xiYVIymvKeUKtZgpQq/5tMlBQOiFQrUH6XW1ozbUsq+xM92vvEbv6eDxOUVER0Wi089NhH/2LxWIUAe7sMXmVADELmAr083kYr1UBG1GraQLWGt+I/m0yScB+foPUGpTf1UrgRvuv9W3dt0OnYqPRKDk5OY6GaqnxlG4UcGePyavp5HU/IOLjIInQeDhcrWYJYKv+bTJIAH9+g9QahN/VjtCbJ0REREQM4crC7uGHHyYUCjFnzhw3dueJ94HbgCygO5AJTAJK/RzKC2uA1a1sr8b+dOyPEjqNt9RqXmtQOk8SiH+bgvS8qtXMVlLnd9XRJcVOtn37dn72s58xcuRIN+bxzDTsE9OrgCHAAWADcNjPoUQk8PRvk0hqSJXf1U4t7Gpra5k5cyaPP/44Dz74oFszua4G2ApsAsY2bBsMXOLTPCIioH+bRFJFDanzu9qpU7GzZ89m8uTJTJgwwa15PNG74VYMHPN3FBGRJvq3SSQ1pNLvquMjdk899RSvv/4627dvd3MeT6QBK4FbgeXY754ZC8wAkvsEskO7gQUttpn6mUZqNU9QOgnYv00Bel7V6scg3kql31VHC7u//e1vfOc73+GFF16gR48ebs/kiWnAZOxDqWXAOmAx8HNgln9jeSMbO/Zk+4FnfJjFa2o1rzUonQ0C829TkJ5XtRrZmiq/q44Wdjt27ODgwYPNPtPuxIkTbNmyhZ/+9KccO3aMLl26uDakW3oAVzXcYsAtwD0k1xPiiq5A3xbbjvoxSAKo1TxB6TxJIP5tCtLzqlZjpcLvqqOF3ZVXXsmbb77ZbNvXvvY1hg0bxve///2kXNS1Zjj2+XIRkWSif5tEUkMy/q46Wtilp6fz2c9+ttm2Xr160bdv31O2J4PDwHSgEPtceDpQjn0IdYqPc4lIsOnfJpHUkEq/q53+HLtU0BsYAzwK7AWOA+divwhyvo9ziUiw6d8mkdSQSr+rri3sNm3a5NauXNcdWNhwM951p9mejf1J4CZRq3mtQelsEJh/m4L0vKrVyNZU+l3VtWJFREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG6NCbJ0pKSqisrHTlgUtLS+19Au7sMXmVNv6lCjjk4yCJsK/hT7WaJYCt+rfJIAH8+Q1SaxB+V6s7cN+QZbV9UbdQKHRpOBzeFo/HnU/VijDg7h6TWAgjr5/XGj2vhgpQq36GzaPn1ExBel4bWnMtyyo70/3ae8SuPh6PU1RURDQa7fx02Ef/YrEYRYA7e0xeJUDMAqYC/XwexmtVEN+InlfTVAEbCUyrfoYNo+fUTAF6XiuBG+2/1rd13w6dio1Go82uD9sZjad0o4A7e0xeTYeI+wERHwdJhIZD/3peDdN4SidArfoZNoieUzMF6HntCL15QkRERMQQjhZ29957L6FQqNlt2LBhbs/mqveB24As7E+QzgQmcdKLh02xBljdyvZq7E8C/yih03hOzytmPa9B6TxJIH6GA/a8BuI5BT2vJOfz6viSYiNGjODFF1/8vx2lJfdlZ6dhn5heBQwBDgAbsC/sK6lLz6ukOv0Mm0fPqZlS5Xl1vBpLS0sjMzPTzVk8UwNsBTYBYxu2DQYu8WkecUcNel4ltdWgn2HT1KDn1EQ1pM7z6vg1dlVVVUQiEYYMGcLMmTPZt29f2/9HPundcCsGjvk7irhIz6ukOv0Mm0fPqZlS6Xl1dMRuzJgxrFy5kqFDh/Luu+9y33338YUvfIGdO3eSnp7u9oydlgasBG4FlmO/e2YsMAMY6d9Y3tkNLGixzcDPNNLzipHPa2A6CdjPcECe10A9p6DnleR7Xh0t7PLy8pr+PnLkSMaMGcPgwYP59a9/zc033+zacG6aBkzGPpRaBqwDFgM/B2b5N5Y3srFjT7YfeMaHWTym5xXzntegdDYIzM9wgJ7XwDynoOeV5HteXfm4k7PPPpvPfOYz7Nmzx43deaYHcBUQA17BfiLu8XMgr3QF+ra4Jd+BVNfoeTVMUDpPEoif4YA9r4F4TkHPK8n3vLqysKutrWXv3r0MGjTIjd0lzHCgzu8hxHV6XiXV6WfYPHpOzZSMz6ujU7Hf/e53yc/PZ/Dgwbzzzjvcc889dOnShRtuuMHt+VxxGJgOFGKfC08HyrEPoU7xcS7pHD2vkur0M2wePadmSqXn1dHC7u233+aGG27g8OHD9O/fnyuuuIKysjL69+/v9nyu6A2MAR4F9gLHgXOxXwQ538e5pHP0vEqq08+wefScmimVnldHC7unnnrK7Tk81R1Y2HAz3nWn2Z6N/UngBtHzinnPa1A6GwTmZzhAz2tgnlPQ85qkdK1YEREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIITr05omSkhIqKytdeeDS0lJ7n4A7e0xepY1/qQIO+ThIIjRcMljPq2EaLwUdoFb9DBtEz6mZAvS8VnfgviHLavuibqFQ6NJwOLwtHo87n6oVYcDdPSavILUSwshrBbZKrWZSq3mC0glqNZXdmmtZVtmZ7tbeI3b18XicoqIiotFo54fDPvoXi8UoAtzZY/Iqwb78SGBaLWAq0M/nYbxWBWxEraZRq3mC0glqNdUhGq+/W9/WXTt0KjYajZKTk+NsqBYaT+lGAXf2mLwaDxEHqZV+QMTHQRKh8TSHWs2iVvMEpRPUKnrzhIiIiIgptLATERERMYTjhd3+/fu58cYb6du3Lz179uTCCy+kvLzczdlc9T5wG5CFfWmQTGASJ72DyCCBaV0DrG5lezX25Ww+Sug03gpKa1A6Qa2g1lSn1qRsdXSt2CNHjnD55Zczfvx41q1bR//+/amqquKTn/yk2/O5Zhr2Kw5XAUOAA8AG4LCfQ3kkSK0iIiLyfxwt7BYtWsS5557LihUrmrZlZ2e7NpTbaoCtwCZgbMO2wcAlPs3jpRqC0yoiIiLNOToVu3btWi666CKmT5/OgAEDGD16NI8//rjbs7mmd8OtGDjm7yieC1KriIiINOfoiN1f/vIXli1bxty5c5k/fz7bt2/njjvuoFu3bhQUFLg9Y6elASuBW4Hl2B85MhaYAYz0byxPBKkVgN3AghbbTP2wyqC0BqUT1KrW1KfWpONoYRePx7nooot46KGHABg9ejQ7d+5k+fLlSbmwA/t1Z5OxT1OWAeuAxcDPgVn+jeWJILWSjR17sv00fpCjWYLSGpROUKtaU59ak67V0cJu0KBBDB8+vNm2aDTKb3/7W1eG8koP4KqGWwy4BbgHAxc7BKi1K9C3xbajfgySAEFpDUonqFWtqU+tScfRa+wuv/xydu3a1Wzb7t27GTx4sCtDJcpwoM7vIRIkSK0iIiJB5eiI3Z133slll13GQw89xPXXX89rr73GY489xmOPPeb2fK44DEwHCrFfZ5YOlGOfnpzi41xeCFKriIiINOdoYXfxxRezZs0a5s2bx/333092djZLlixh5syZbs/nit7AGOBRYC9wHDgX+w0G832cywtBahUREZHmHC3sAL785S/z5S9/2c1ZPNMdWNhwM12QWrnuNNuzsT8J3CRBaQ1KJ6gV1Jrq1JqUrbpWrIiIiIghtLATERERMYQWdiIiIiKG0MJORERExBAdevNESUkJlZWVrjxwaWmpvU/AnT0mr9KGP4PUShVwyMdBEmFfw59qNYtazROUTlCrqY60/64hy2r7QmehUOjScDi8LR6Pd2Kq1nZMUl5nzRNqNZNazRSg1jDg8r/sySlAz6laDWW35lqWVXamu7X3iF19PB6nqKiIaDTa+eGwj/7FYjGYCvRzZZfJqwrYiFpNo1YzBaw1vhGKAHf+ZU9OJUDMIjDPaZB+fgPTeojGa9LWt3XXDp2KjUaj5OTkOBuqhaZTuv2AiCu7TF6Nh4jVaha1mimArVHAnX/Zk1PTS2AC9JyqNbj05gkRERERQzha2J133nmEQqFTbrNnz3Z7PnesAVa3sr0a+xOjP0roNN5Sq1pTWVA6IVitwPvAbUAW9hVyMoFJnPSGK1ME6XlVa1K2Orqk2Pbt2zlx4kTT1zt37uSqq65i+vTprg0mIiLmmIb94qBVwBDgALABOOznUCIGcrSw69+/f7OvH374Yc4//3zGjh3rylAiImKOGmArsAlo/K/EYOASn+YRMVmnX2NXX19PUVERhYWFhEIhN2YSERGD9G64FQPH/B1FxHiOjtidrLi4mJqaGmbNmuXCOB7aDSxosc3Uz75Rq5mC0hqUTghMaxqwErgVWI79DtyxwAxgpH9jeScgzyug1iRs7fTC7oknniAvL49IJMnfa5wNTG6xbT+NnwtjFrWqNZUFpRMC1ToNO3UrUAasAxYDPwdm+TeWNwL0vKqVpGvt1MLurbfe4sUXX+SZZ5KsqjVdgb4tth31Y5AEUKuZgtIalE4IVivQA7iq4RYDbgHuwcCFXZCeV7UmnU69xm7FihUMGDCAyZNbLmFFRETObDhQ5/cQIoZxfMQuHo+zYsUKCgoKSEvr9BldEREx1GFgOlCI/Zq6dKAc+1TsFB/nEjGR4xXZiy++yL59+ygsLHRzHhERMUxvYAzwKLAXOA6ci/1mivk+ziViIscLu4kTJ2JZSfh2kNZcd5rt2difGG0Stao1lQWlEwLV2h1Y2HAzXoCeV7WSlK26VqyIiIiIIbSwExERETGEFnYiIiIihtDCTkRERMQQHXrzRElJCZWVla48cGlpqf2XKuCQK7tMXvsa/lSrWdRqpgC2lgDu/MuenEob/xKg51SthjnS/ruG2vPO1lAodGk4HN4Wj8c7MVVrOyYpr7PmCbWaSa1mUqt5gtIJajWV3ZprWVbZme7W3iN29fF4nKKiIqLRaOeHwz76F4vFYCrQz5VdJq8qYCNqNY1azaRW8wSlE9RqqkM0XpO2vq27duhUbDQaJScnx9lQLTSd0u0HRFzZZfJqPESsVrOo1UxqNU9QOkGtojdPiIiIiJjC0cLuxIkTxGIxsrOz6dmzJ+effz4PPPBA8l6JYg2wupXt1difGP1RQqfxllrVmsqC0glqBbWmOrUmZaujS4otWrSIZcuWsWrVKkaMGEF5eTlf+9rX6NOnD3fccYfbM4qIiIhIOzha2L3yyitMmTKFyZMnA3DeeeexevVqXnvtNVeHExEREZH2c3Qq9rLLLmPDhg3s3r0bgD/+8Y+8/PLL5OXluTqciIiIiLSfoyN2P/jBDzh69CjDhg2jS5cunDhxggULFjBz5ky353PPbmBBi21J+pLATlOrmYLSGpROUKtaU59ak46jhd2vf/1rfvWrX/Hkk08yYsQIKioqmDNnDpFIhIKCArdndEc2MLnFtv00fi6MWdSq1lQWlE5Qq1pTn1qTrtXRwu6uu+7iBz/4ATNmzADgwgsv5K233mLhwoXJu7DrCvRtse2oH4MkgFrNFJTWoHSCWtWa+tSadBy9xu7DDz8kHG7+f9qlSxdcv+SYiIiIiLSboyN2+fn5LFiwgKysLEaMGMEf/vAHHnnkEQoLC92eT0RERETaydHC7ic/+QmxWIxvfetbHDx4kEgkwje+8Q3uvvtut+cTERERkXZytLBLT09nyZIlLFmyxOVxPHLdabZnY39itEnUqtZUFpROUCuoNdWpNSlbda1YEREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIITr05omSkhIqKytdeeDS0lL7L1XAIVd2mbz2NfypVrOo1UxqNU9QOkGtpjrS/ruGLKvtC52FQqFLw+HwNtc/gDhEUl5nzRNqNZNazaRW8wSlEwLVGgaCcmmEhtZcy7LKznS/9h6xq4/H4xQVFRGNRjs/HfbRv1gsBlOBfq7sMnlVARtRq2nUaia1miconRC41vhGKALcWZkkr0rgRvuv9W3dt0OnYqPRKDk5OY6GaqnplG4/IOLKLpNX4yFitZpFrWZSq3mC0gmBbI0C7qxMzKA3T4iIiIgYwvHC7h//+Adz5sxh8ODB9OzZk8suu4zt27e7OZt71gCrW9lejf2J0R8ldBpvqVWtqSwonaBWUGuqC1Ir8D5wG5AFdAcygUlAqZ9DtcLRJcUAbrnlFnbu3Mkvf/lLIpEIRUVFTJgwgT//+c986lOfcnNGEREREV9Nw36B2ypgCHAA2AAc9nOoVjg6YvfRRx/x29/+lsWLF/PFL36RCy64gHvvvZcLLriAZcuWuT2jiIiIiG9qgK3AImA8MBi4BJgHXOPfWK1ytLD75z//yYkTJ+jRo0ez7T179uTll192ZTARERGRZNC74VYMHPN3lDY5OhWbnp5Obm4uDzzwANFolIEDB7J69Wq2bdvGBRdc4PaM7tgNLGixzdTP+VGrmYLSGpROUKtaU19AWtOAlcCtwHLsd+GOBWYAI/0bq1WOX2P3y1/+ksLCQj71qU/RpUsXcnJyuOGGG9ixY4eb87knG5jcYtt+4BkfZvGaWtWayoLSCWpVa+oLUOs07NStQBmwDlgM/ByY5d9Yp3C8sDv//PPZvHkzdXV1HD16lEGDBvGVr3yFIUOGuDmfe7oCfVtsO+rHIAmgVjMFpTUonaBWtaa+ILUCPYCrGm4x4BbgHpJrYdfpz7Hr1asXgwYN4siRI6xfv54pU6a4MZeIiIhIUhsO1Pk9RAuOj9itX78ey7IYOnQoe/bs4a677mLYsGF87Wtfc3M+EREREV8dBqYDhdivqUsHyrFPxSbb4SzHC7sPPviAefPm8fbbb5ORkcG0adNYsGABXbt2dXM+EREREV/1BsYAjwJ7gePAudhvppjv41ytcbywu/7667n++uvdnMU7151mezb2p2ObRK1qTWVB6QS1glpTXYBauwMLG27JTteKFRERETGEFnYiIiIihtDCTkRERMQQWtiJiIiIGEILOxERERFDdOhdsSUlJVRWVrrywKWlpfZfqoBDruwyee1r+FOtZlGrmdRqnqB0QiBbSwB3VibJq7oD9w1ZVttX6w2FQpeGw+Ft8Xjc+VSt7hgjLxbcKrWaSa1mUqt5gtIJajWV3ZprWVbZme7W3iN29fF4nKKiIqLRaOeHwz76F4vFYCrQz5VdJq8qYCNqNY1azaRW8wSlE9RqqkPAMwDUt3XXDp2KjUaj5OTkOBuqhaZTuv2AiCu7TF6Nh8PVaha1mkmt5glKJ6hV9OYJEREREVM4Xtht2bKF/Px8IpEIoVCI4uLiZt+3LIu7776bQYMG0bNnTyZMmEBVVVVn53VmDbC6le3V2Jc9+Sih03hLrWpNZUHpBLWCWlOdWpOy1fHCrq6ujlGjRrF06dJWv7948WJ+/OMfs3z5cl599VV69erFpEmT+Pjjjx0PKyIiIiKn16HX2J0sLy+PvLy8Vr9nWRZLlizhP/7jP5gyZQoAv/jFLxg4cCDFxcXMmDHD6cOKiIiIyGl48hq76upq3nvvPSZMmNC0rU+fPowZM4Zt27Z58ZAiIiIigef4iN2ZvPfeewAMHDiw2faBAwc2fS/hdgMLWmwz9bNv1GqmoLQGpRPUqtbUp9ak48nCLillA5NbbNtP4+fCmEWtak1lQekEtao19ak16Vo9WdhlZmYCcODAAQYNGtS0/cCBA3zuc5/z4iHb1hXo22LbUT8GSQC1mikorUHpBLWqNfWpNel48hq77OxsMjMz2bBhQ9O2o0eP8uqrr5Kbm+vFQ4qIiIgEnuMjdrW1tezZs6fp6+rqaioqKsjIyCArK4s5c+bw4IMP8ulPf5rs7GxisRiRSIRrr73WjblFREREpAXHC7vy8nLGjx/f9PXcuXMBKCgoYOXKlXzve9+jrq6Or3/969TU1HDFFVfw/PPP06NHj85PLSIiIiKncLywGzduHJZ1+reDhEIh7r//fu6//36nD+Ge606zPRv7E6NNola1prKgdIJaQa2pTq1J2aprxYqIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAzRoTdPlJSUUFlZ6coDl5aW2n+pAg65ssvkta/hT7WaRa1mUqt5gtIJajXVkfbfNXSmd7Y23SkUujQcDm+Lx+OdmKq1HZOU11nzhFrNpFYzqdU8QekEtZrKbs21LKvsTHdr7xG7+ng8TlFREdFotPPDYR/9i8ViMBXo58ouk1cVsBG1mkatZlKreYLSCWo11SEar0lb39ZdO3QqNhqNkpOT42yoFppO6fYDIq7sMnk1HiJWq1nUaia1miconaBW0ZsnREREREzheGG3ZcsW8vPziUQihEIhiouLm33/mWeeYeLEifTt25dQKERFRUUnR+2ENcDqVrZXY39i9EcJncZbalVrKgtKJ6gV1Jrq1JqUrY4XdnV1dYwaNYqlS5ee9vtXXHEFixYtcjyciIiIiLSf42vF5uXlkZeXd9rv33TTTQD89a9/dfoQIiIiItIBeo2diIiIiCEcH7FLObuBBS22mfrZN2o1U1Bag9IJalVr6lNr0gnOwi4bmNxi234aPxfGLGpVayoLSieoVa2pT61J1xqchV1XoG+LbUf9GCQB1GqmoLQGpRPUqtbUp9ako9fYiYiIiBjC8RG72tpa9uzZ0/R1dXU1FRUVZGRkkJWVxd///nf27dvHO++8A8CuXbsAyMzMJDMzs5Nji4iIiEhLjo/YlZeXM3r0aEaPHg3A3LlzGT16NHfffTcAa9euZfTo0UyebJ+QnjFjBqNHj2b58uUujC0iIiIiLTk+Yjdu3Dgs6/RvB5k1axazZs1yunt3XXea7dnYnxhtErWqNZUFpRPUCmpNdWpNyla9xk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImKIDr15oqSkhMrKSlceuLS01P5LFXDIlV0mr30Nf6rVLGo1k1rNE5ROUKupjrT/rqEzvbO16U6h0KXhcHhbPB7vxFSt7ZikvM6aJwLUGgZc/klJWmo1VIB+XwPTGpROUKup7NZcy7LKznS39h6xq4/H4xQVFRGNRjs/HPbRv1gsBlOBfq7sMnlVARsJTGt8IxQB7vykJK8SIIZaTVMCxCwC8/saiH+bgtIJajXVIRqvSVvf1l07dCo2Go2Sk5PjbKgWmk7p9gMiruwyeTUeIg5QaxRw5ycleTW+KEGtZml6sUmAfl+Nbw1KJ6hV9OYJEREREVM4Xtht2bKF/Px8IpEIoVCI4uLipu8dP36c73//+1x44YX06tWLSCTCV7/61abrxibcGmB1K9ursT8x+qOETuOtILUC7wO3AVlAdyATmASU+jmUR9RqWGuQflfVqtZUl0Ktjhd2dXV1jBo1iqVLl57yvQ8//JDXX3+dWCzG66+/zjPPPMOuXbu45pprOjWsSEvTgD8Aq4DdwFpgHHDYx5m8olYzW0VE3OT4WrF5eXnk5eW1+r0+ffrwwgsvNNv205/+lEsuuYR9+/aRlZXl9GFFmtQAW4FNwNiGbYOBS3yax0s1qNXEVhERtyXsNXYffPABoVCIs88+O1EPKYbr3XArBo75O4rn1CoiIu3h+IhdR3z88cd8//vf54YbbuCss85KxEOeajewoMU2Uz/7JiCtacBK4FZgOfa7NccCM4CR/o3lCbWa2RqU31VArWpNfSnS6vnC7vjx41x//fVYlsWyZcu8frjTywYmt9i2n8bPhTFLgFqnYaduBcqAdcBi4OfALP/G8oRaDWwN0O+qWlFrqkuRVk8Xdo2LurfeeouXXnrJv6N1AF2Bvi22HfVjkAQIUivQA7iq4RYDbgHuwbAFQAO1GtYapN9VtZpJrUnHs9fYNS7qqqqqePHFF+nbt+X/N0S8MRyo83uIBFGriIiczPERu9raWvbs2dP0dXV1NRUVFWRkZDBo0CD+9V//lddff53nnnuOEydO8N577wGQkZFBt27dOj+5BN5hYDpQiP3aq3SgHPuU3RQf5/KCWs1sFRFxm+OFXXl5OePHj2/6eu7cuQAUFBRw7733snbtWgA+97nPNfu/27hxI+PGjXP6sCJNegNjgEeBvcBx4FzsF93P93EuL6jVzFYREbc5XtiNGzcOyzr920HO9L2Eu+4027OxPzHaJAFq7Q4sbLiZTq0GCtDvqlpRa6pLoVZdK1ZERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImKIDr15oqSkhMrKSlceuLS01P5LFXDIlV0mr30NfwaotQRw5ycleTX8BKvVMI2tQfp9Nb41KJ2gVlMdaf9dQ+1592ooFLo0HA5vi8fjnZiqtR2TlNdZ80SAWsOAyz8pSUuthgrQ72tgWoPSCWo1ld2aa1lW2Znu1t4jdvXxeJyioiKi0Wjnh8M++heLxWAq0M+VXSavKmAjgWmNb4QiwJ2flORVgn2pK7WapQSIWQTm9zUQ/zYFpRPUaqpDNF6Ttr6tu3boVGw0GiUnJ8fZUC00ndLtB0Rc2WXyajxEHKDWKODOT0ryajwlqVazNJ1qDtDvq/GtQekEtYrePCEiIiJiCi3sRERERAzheGG3ZcsW8vPziUQihEIhiouLm33/3nvvZdiwYfTq1YtPfvKTTJgwgVdffbWz8zqzBljdyvZq7EuBfJTQabwVpFbgfeA2IAv7UlSZwCROemejQdRqWGuQflfVqtZUl0Ktjhd2dXV1jBo1iqVLl7b6/c985jP89Kc/5c033+Tll1/mvPPOY+LEibz//vuOhxVpaRrwB2AVsBtYC4wDDvs4k1fUamariIibOvTmiZPl5eWRl5d32u//27/9W7OvH3nkEZ544gneeOMNrrzySqcPK9KkBtgKbALGNmwbDFzi0zxeqkGtJraKiLgtIa+xq6+v57HHHqNPnz6MGjUqEQ8pAdC74VYMHPN3FM+pVURE2sPxEbv2eO6555gxYwYffvghgwYN4oUXXqBfP58+bGY3sKDFNlM/1DAgrWnASuBWYDn2x3CMBWYAI/0byxNqNbM1KL+rgFrVmvpSpNXThd348eOpqKjg0KFDPP7441x//fW8+uqrDBgwwMuHbV02MLnFtv00fuCfWQLUOg07dStQBqwDFgM/B2b5N5Yn1Gpga4B+V9WKWlNdirR6eiq2V69eXHDBBVx66aU88cQTpKWl8cQTT3j5kKfXFejb4pbuzyieC1Ir0AO4CvvKCK9g/4f/Hj8H8pBaDROk31W1mkmtSSehn2MXj8c5dkyvmhFvDQfq/B4iQdQqIiInc3wqtra2lj179jR9XV1dTUVFBRkZGfTt25cFCxZwzTXXMGjQIA4dOsTSpUvZv38/06dPd2VwkcPAdKAQ+7VX6UA59im7KT7O5QW1mtkqIuI2xwu78vJyxo8f3/T13LlzASgoKGD58uX87//+L6tWreLQoUP07duXiy++mK1btzJixIjOTy2C/c7JMcCjwF7gOHAu9ovu5/s4lxfUamariIjbHC/sxo0bh2Wd/u0gzzyTRK8mvO4027OxPzHaJAFq7Q4sbLiZTq0GCtDvqlpRa6pLoVZdK1ZERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImKIDr15oqSkhMrKSlceuLS01P5LFXDIlV0mr30NfwaotQRw5ycleTX8BKvVMI2tQfp9Nb41KJ2gVlMdaf9dQ2d6Z2vTnUKhS8Ph8LZ4PN6JqU4VBtzdY/JSq5nUaqgQSXkNSE8EpTUonaBWU9mtuZZllZ3pbu09Ylcfj8cpKioiGo12fjjso3+xWIwiwJ09Jq8S7MsiqdUsajVTCRCzgKlAP5+H8VoVsBHzW4PSCWo11SEar0lb39ZdO3QqNhqNkpOT42yoFhpP6UYBd/aYvBpPXanVLGo1U9Op5n5AxMdBEqHx9JXprUHpBLWK3jwhIiIiYgrHC7stW7aQn59PJBIhFApRXFx82vt+85vfJBQKsWTJEqcP12nvA7cBWdifbJ8JTOKkF0obRK1qTXWBaF0DrG5lezX2J9l/lNBpvKVWtaa6FGp1fEmxuro6Ro0aRWFhIVOnTj3t/dasWUNZWRmRiL/HSadhn5heBQwBDgAbsC84bhq1qjXVBalVRMRNjhd2eXl55OXlnfE++/fv59vf/jbr169n8uTJTh+q02qArcAmYGzDtsHAJT7N46Ua1KrW1FZDcFpFRNzm2Wvs4vE4N910E3fddRcjRozw6mHapXfDrRg45usk3lOrmdQqIiLt4fiIXVsWLVpEWload9xxh1cP0W5pwErgVmA59rv6xgIzgJH+jeUJtao11QWpld3AghbbTP1MLrWaSa1Jx5OF3Y4dO/jRj37E66+/TigU8uIhOmwaMBn7FE8ZsA5YDPwcmOXfWJ5Qq1pTXWBas7FDT7afxs+rMota1ZrqUqTVk1OxW7du5eDBg2RlZZGWlkZaWhpvvfUW//7v/855553nxUO2Sw/gKuwPWn0F+z8Q9/g2jbfUaia1GqYr0LfFLd3XibyjVjOpNel4srC76aabeOONN6ioqGi6RSIR7rrrLtavX+/FQzoyHKjze4gEUauZ1CoiIidzfCq2traWPXv2NH1dXV1NRUUFGRkZZGVl0bdv32b379q1K5mZmQwdOtT5tA4dBqYDhdiv0UkHyrFP7UxJ+DTeUqtaU12QWkVE3OZ4YVdeXs748eObvp47dy4ABQUFrFy5stODuak3MAZ4FNgLHAfOxX5x9nwf5/KCWtWa6oLUKiLiNscLu3HjxmFZ7X87yF//+lenD9Vp3YGFDTfTqdVMajXQdafZno39SfYmUataU10KtepasSIiIiKG0MJORERExBBa2ImIiIgYQgs7EREREUN06M0TJSUlVFZWuvLApaWl9j4Bd/aYvEob/lSrWdRqpsZWqoBDPg6SCPsa/jS9NSidoFZTHWn/XUPteWdrKBS6NBwOb4vH452Y6lRhwN09Ji+1mkmthgqRlNeA9ERQWoPSCWo1ld2aa1lW2Znu1t4jdvXxeJyioiKi0Wjnh8M++heLxSgC3Nlj8irBviySWs2iVjOVADELmAr083kYr1UBGzG/NSidoFZTHaLxmrT1bd21Q6dio9EoOTk5zoZqofGUbhRwZ4/Jq/HUlVrNolYzNZ1q7gdEfBwkERpPX5neGpROUKvozRMiIiIipnC8sNuyZQv5+flEIhFCoRDFxcXNvj9r1ixCoVCz29VXX93ZeR17H7gNyML+ZPtMYBInvVDaIGpVa6oLROsaYHUr26uxP8n+o4RO4y21qjXVpVCr40uK1dXVMWrUKAoLC5k6dWqr97n66qtZsWJF09fdu3d3+nCdNg37xPQqYAhwANiAfcFx06hVrakuSK0iIm5yvLDLy8sjLy/vjPfp3r07mZmZTh/CNTXAVmATMLZh22DgEp/m8VINalVraqshOK0iIm7z9DV2mzZtYsCAAQwdOpTbbruNw4f9+d/bvRtuxcAxXyZIHLWaSa0iItIejo/YteXqq69m6tSpZGdns3fvXubPn09eXh7btm2jS5cuXj1sq9KAlcCtwHLsd/WNBWYAIxM6iffUqtZUF6RWdgMLWmwz9TO51GomtSYdzxZ2M2bMaPr7hRdeyMiRIzn//PPZtGkTV155pVcPe1rTgMnYp3jKgHXAYuDnwKyET+Mttao11QWmNRs79GT7afy8KrOoVa2pLkVaE/ZxJ0OGDKFfv37s2bMnUQ95ih7AVdgftPoK9n8g7vFtGm+p1UxqNUxXoG+LW7qvE3lHrWZSa9JJ2MLu7bff5vDhwwwaNChRD9mm4UCd30MkiFrNpFYRETmZ41OxtbW1zY6+VVdXU1FRQUZGBhkZGdx3331MmzaNzMxM9u7dy/e+9z0uuOACJk2a5MrgHXEYmA4UYr9GJx0oxz61MyXh03hLrWpNdUFqFRFxm+OFXXl5OePHj2/6eu7cuQAUFBSwbNky3njjDVatWkVNTQ2RSISJEyfywAMP+PJZdr2BMcCjwF7gOHAu9ouz5yd8Gm+pVa2pLkitIiJuc7ywGzduHJZ1+reDrF+/3umuXdcdWNhwM51azaRWA113mu3Z2J9kbxK1qjXVpVCrrhUrIiIiYggt7EREREQMoYWdiIiIiCG0sBMRERExRIfePFFSUkJlZaUrD1xaWmrvE3Bnj8mrtOFPtZpFrWZqbKUKOOTjIImwr+FP01uD0glqNdWR9t81dKZ3tjbdKRS6NBwOb4vH452Y6lRhwN09Ji+1mkmthgqRlNeA9ERQWoPSCWo1ld2aa1lW2Znu1t4jdvXxeJyioiKi0Wjnh8M++heLxSgC3Nlj8irBviySWs2iVjOVADELmAr083kYr1UBGzG/NSidoFZTHaLxmrT1bd21Q6dio9EoOTk5zoZqofGUbhRwZ4/Jq/HUlVrNolYzNZ1q7gdEfBwkERpPX5neGpROUKvozRMiIiIipnC8sNuyZQv5+flEIhFCoRDFxcWn3KeyspJrrrmGPn360KtXLy6++GL27dt36s4S4H3gNiAL+5PtM4FJnPRCaYOoVa2pLhCta4DVrWyvxv4k+48SOo231KrWVJdCrY4vKVZXV8eoUaMoLCxk6tSpp3x/7969XHHFFdx8883cd999nHXWWfzpT3+iR48enRrYqWnYJ6ZXAUOAA8AG7AuOm0atak11QWoVEXGT44VdXl4eeXl5p/3+//t//49/+Zd/YfHixU3bzj//fKcP1yk1wFZgEzC2Ydtg4BJfpvFWDWpVa2qrITitIiJu8+Q1dvF4nN///vd85jOfYdKkSQwYMIAxY8a0ero2EXo33IqBY75MkDhqNZNaRUSkPRwfsTuTgwcPUltby8MPP8yDDz7IokWLeP7555k6dSobN25k7Nixbe/ERWnASuBWYDn2u/rGAjOAkQmdxHtqVWuqC1Iru4EFLbaZ+plcajWTWpOOJwu7xg8ynjJlCnfeeScAn/vc53jllVdYvnx5whd2YL9mZzL2KZ4yYB2wGPg5MCvh03hLrWpNdYFpzcYOPdl+Gj+vyixqVWuqS5FWT07F9uvXj7S0NIYPH95sezQa9e1dsQA9gKuwP2j1Fez/QNzj2zTeUquZ1GqYrkDfFrd0XyfyjlrNpNak48nCrlu3blx88cXs2rWr2fbdu3czePBgLx7SkeFAnd9DJIhazaRWERE5meNTsbW1tezZs6fp6+rqaioqKsjIyCArK4u77rqLr3zlK3zxi19k/PjxPP/88/zud79j06ZNbszdIYeB6UAh9mt00oFy7FM7UxI+jbfUqtZUF6RWERG3OV7YlZeXM378+Kav586dC0BBQQErV67kuuuuY/ny5SxcuJA77riDoUOH8tvf/pYrrrii81N3UG9gDPAosBc4DpyL/eLs+QmfxltqVWuqC1KriIjbHC/sxo0bh2Wd+e0ghYWFFBYWOn0I13QHFjbcTKdWM6nVQNedZns29ifZm0Stak11KdSqa8WKiIiIGEILOxERERFDaGEnIiIiYggt7EREREQMoYWdiIiIiCE69K7YkpISKisrXXng0tJSe5+AO3tMXqUNf6rVLGo1U2MrVcAhHwdJhMYLAZneGpROUKupjrT/rqG2PrIEIBQKXRoOh7c1XgPWLWHA3T0mL7WaSa1mUqt5gtIJwWolBLS9jDGD3ZprWVbZme7W3iN29fF4nKKiIqLRaOeHwz76F4vFKALc2WPyKsG+3qVazaJWM6nVPEHphAC2WsBUoJ/Pw3jtEPAMAPVt3bVDp2Kj0Sg5OTnOhmqh8ZRuFHBnj8mr8dSVWs2iVjOp1TxB6YRgttIPiPg4SJLRmydEREREDOF4Ybdlyxby8/OJRCKEQiGKi4ubfT8UCrV6+8///M/OzuzI+8BtQBb2JYsygUmc9EJpg6hVralOrea1BqUT1Gpk6xpgdSvbq7EvKfZRQqc5I8fXiq2rq2PUqFEUFhYyderUU77/7rvvNvt63bp13HzzzUybNs3pQ3bKNOwT06uAIcABYANw2JdpvKVWtaY6tZrXGpROUKupranC8cIuLy+PvLy8034/MzOz2dfPPvss48ePZ8iQIU4f0rEaYCuwCRjbsG0wcEnCJ/FeDWpVa2qrQa2mtdYQjE5Qq6mtqSQhr7E7cOAAv//977n55psT8XCn6N1wKwaO+TJB4qjVTGo1U1Bag9IJahX/JWRht2rVKtLT01s9ZZsIacBK7EPFZwOXA/OBN3yZxltqVWuqU6t5rUHpBLWa2grAbmBBi9uvfJ2oVQlZ2P33f/83M2fOpEePHol4uFZNA94B1gJXYx86zsH+oTSNWtWa6tRqXmtQOkGtpraSDXyzxe0aXydqlecLu61bt7Jr1y5uueUWrx+qTT2Aq7A/vPEVYBZwj58DeUitZlKrmYLSGpROUKuRrV2Bvi1u6b5O1CrPF3ZPPPEEn//85xk1apTXD9Vhw4E6v4dIELWaSa1mCkprUDpBrZI4jt8VW1tby549e5q+rq6upqKigoyMDLKysgA4evQoTz/9ND/84Q87P2knHAamA4XASOwFdjmwGJji41xeUKtaU51azWsNSieo1dTWVOJ4YVdeXs748eObvp47dy4ABQUFrFy5EoCnnnoKy7K44YYbOjdlJ/UGxgCPAnuB48C5wK3YL/Q0iVrVmurUal5rUDpBraa2phLHC7tx48ZhWdYZ7/P1r3+dr3/9604fwjXdgYUNN9Op1UxqNVNQWoPSCWo11nWn2Z6NfeWJJKJrxYqIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAzRoTdPlJSUUFlZ6coDl5aW2vsE3Nlj8ipt+FOtZlGrmdRqnqB0QjBbqQIO+ThIIhxp/11Dbb2zFSAUCl0aDoe3xePxTkx1qjDg7h6Tl1rNpFYzqdU8QekEtZqqoTXXsqyyM92vvUfs6uPxOEVFRUSj0c5Ph330LxaLUQS4s8fkVYJ9qRW1mkWtZlKreYLSCWo1VSVwo/3X+rbu26FTsdFolJycHEdDtdR4SjeKfcFgkzUeDlerWdRqJrWaJyidoFbRmydEREREjOF4Ybdlyxby8/OJRCKEQiGKi4ubfb+2tpbbb7+dc845h549ezJ8+HCWL1/e2Xkdex+4DcjC/rTsTGASJ7340iBqVWuqU6t5rUHpBLWq1V+OLylWV1fHqFGjKCwsZOrUqad8f+7cubz00ksUFRVx3nnn8T//8z9861vfIhKJcM0113RqaCemYZ+YXgUMAQ4AG7AvYmwatao11anVvNagdIJa1eovxwu7vLw88vLyTvv9V155hYKCAsaNGwfY14392c9+xmuvvZbwhV0NsBXYBIxt2DYYuCShUyRGDWpVa2qrQa2mtdYQjE5Qq1r959lr7C677DLWrl3L/v37sSyLjRs3snv3biZOnOjVQ55W74ZbMXAs4Y+eWGo1k1rNFJTWoHSCWk2VSq2eLex+8pOfMHz4cM455xy6devG1VdfzdKlS/niF7/o1UOeVhqwEvvw6dnA5cB84I2ET+I9tao11anVvNagdIJa1eo/Txd2ZWVlrF27lh07dvDDH/6Q2bNn8+KLL3r1kGc0DXgHWAtcjX04NQf7iTKNWtWa6tRqXmtQOkGtavWZZVlt3rBnt3bs2GG1BrDWrFnT9PWHH35ode3a1Xruueea3e/mm2+2Jk2aZFmWZRUVFVmAtQMsy6fbzWBlJeBxikCtalWrWtUa0E61qrWztx0NrUBOW2s2T47YHT9+nOPHjxMON999ly5dcPuyZJ0xHKjze4gEUauZ1GqmoLQGpRPUaqpkbHX8rtja2lr27NnT9HV1dTUVFRVkZGSQlZXF2LFjueuuu+jZsyeDBw9m8+bN/OIXv+CRRx5xZfCOOAxMBwqBkUA6UA4sBqYkfBpvqVWtqU6t5rUGpRPUqlb/OV7YlZeXM378+Kav586dC0BBQQErV67kqaeeYt68ecycOZO///3vDB48mAULFvDNb36z81N3UG9gDPAosBc4DpwL3Ir94keTqFWtqU6t5rUGpRPUqlb/OV7YjRs3jobX37UqMzOTFStWON29q7oDCxtuplOrmdRqpqC0BqUT1GqqVGrVtWJFREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG6NCbJ0pKSqisrHTlgUtLS+19Au7sMXmVNvypVrOo1UxqNU9QOkGtpqruwH1DZ3pna9OdQqFLw+HwNrc/XDgMJM/HFXtLrWZSq5nUap6gdIJaTdXQmmtZVtmZ7tfeI3b18XicoqIiotFo56fDPvoXi8UoAtzZY/IqAWKgVsOo1UxqNU9QOkGtpqoEbrT/Wt/WfTt0KjYajZKTk+NoqJYaT+lGsS9Ea7LGQ8RqNYtazaRW8wSlE9QqevOEiIiIiDEcL+y2bNlCfn4+kUiEUChEcXFxs+8fOHCAWbNmEYlE+MQnPsHVV19NVVVVZ+d17H3gNiAL+xOkM4FJ/N+LL02iVrWmOrWa1xqUTlCrWv3l+JJidXV1jBo1isLCQqZOndrse5Zlce2119K1a1eeffZZzjrrLB555BEmTJjAn//8Z3r16tXpwTtqGvaJ6VXAEOAAsAH7wr6mUataU51azWsNSieoVa0+syyrzRv26Wtrx44dVmsAa82aNU1f79q1ywKsnTt3Nm07ceKE1b9/f+vxxx+3LMuyioqKLMDaAZbl8e0IWIC1KQGP1dqtqOHx1apWtao1iK1B6VSrWr267Wh4fCCnrTWbJ6+xO3bsGAA9evRo2hYOh+nevTsvv/yyFw95Rr0bbsXAsYQ/emKp1UxqNVNQWoPSCWo1VSq1erKwGzZsGFlZWcybN48jR45QX1/PokWLePvtt3n33Xe9eMgzSgNWYh8+PRu4HJgPvJHwSbynVrWmOrWa1xqUTlCrWv3nycKua9euPPPMM+zevZuMjAw+8YlPsHHjRvLy8giH/Xkj7jTgHWAtcDWwCfv88kpfpvGWWtWa6tRqXmtQOkGtavWZF6+xO1lNTY118OBBy7Is65JLLrG+9a1vJfw1dqe73QxWlmGvA1CrWtWq1lRpDUqnWtXa2Zvvr7E7WZ8+fejfvz9VVVWUl5czZcoUrx+y3YYDdX4PkSBqNZNazRSU1qB0glpNlYytjj/upLa2lj179jR9XV1dTUVFBRkZGWRlZfH000/Tv39/srKyePPNN/nOd77Dtddey8SJE10ZvCMOA9OBQmAkkA6UA4uB5FlmukOtak11ajWvNSidoFa1+s/xwq68vJzx48c3fT137lwACgoKWLlyJe+++y5z587lwIEDDBo0iK9+9avEYrHOT+xAb2AM8CiwFzgOnAvciv3iR5OoVa2pTq3mtQalE9Sq1iTgxmvsnEiG19gl6pYMrwNQq1rVqtagtgalU63m3pLqNXYiIiIikhha2ImIiIgYQgs7EREREUNoYSciIiJiiA69K7akpITKykpXHri0tNTeJ+DOHpNXacOfajWLWs2kVvMEpRPUaqrqDtw31PCu1zPfKRS6NBwOb4vH486nakUYcHePyUutZlKrmdRqnqB0glpN1dCaa1lW2Znu194jdvXxeJyioiKi0Wjnp8M++heLxSgC3Nlj8ioBYqBWw6jVTGo1T1A6Qa2mqgRutP9a39Z9O3QqNhqNkpOT42iolhpP6UaxPyTPZI2HiNVqFrWaSa3mCUonqFX05gkRERERY2hhJyIiImIIRwu7hQsXcvHFF5Oens6AAQO49tpr2bVrV7P7fPzxx8yePZu+ffvSu3dvpk2bxoEDB1wZ2on3gduALKA7kAlM4v/eVWMStao11anVvNagdIJa1eqvDr3GrtHmzZuZPXs2F198Mf/85z+ZP38+EydO5M9//jO9evUC4M477+T3v/89Tz/9NH369OH2229n6tSpTR9zkmjTsF9xuAoYAhwANgCHfZnGW2pVa6pTq3mtQekEtarVZ21dTLbh41ByAGvHjh1Waw4ePGgB1ubNmy3Lsqyamhqra9eu1tNPP910n8rKSguwtm3bZlmWZRUVFSXs4r1HGi6euykAFypWq1rVqtZkaw1Kp1rV6tVtR8PjAzltrdlceY3dBx98AEBGRgYAO3bs4Pjx40yYMKHpPsOGDSMrK4tt27a58ZAd0rvhVgwcS/ijJ5ZazaRWMwWlNSidoFZTpVJrpxd28XicOXPmcPnll/PZz34WgPfee49u3bpx9tlnN7vvwIEDee+99zr7kB2WBqzEPnx6NnA5MB94I+GTeE+tak11ajWvNSidoFa1+q/TC7vZs2ezc+dOnnrqKTfm8cw04B1gLXA1sAn7/PJK/0byjFrVmurUal5rUDpBrWr1WWdeYzd79mzrnHPOsf7yl780275hwwYLsI4cOdJse1ZWlvXII48k/DV2p7vdDFaWYa8DUKta1arWVGkNSqda1drZm+evsbMsi9tvv501a9bw0ksvkZ2d3ez7n//85+natSsbNmxo2rZr1y727dtHbm6uk4f0xHCgzu8hEkStZlKrmYLSGpROUKupkrHV0cedzJ49myeffJJnn32W9PT0ptfN9enTh549e9KnTx9uvvlm5s6dS0ZGBmeddRbf/va3yc3N5dJLL3U1oD0OA9OBQmAkkA6UA4uBKQmfxltqVWuqU6t5rUHpBLWq1X+OFnbLli0DYNy4cc22r1ixglmzZgHw6KOPEg6HmTZtGseOHWPSpEn813/9V6eGdao3MAZ4FNgLHAfOBW7FfvGjSdSq1lSnVvNag9IJalWr/xwt7Bped3dGPXr0YOnSpSxdutTJQ7iqO7Cw4WY6tZpJrWYKSmtQOkGtpkqlVl0rVkRERMQQWtiJiIiIGEILOxERERFDaGEnIiIiYogOvXmisrLStQeurq629+naHpNXdcOfajWLWs2kVvMEpRPUaqqONIba8w7XUCiUFQ6Hd8Xj8R7OxzpVGIi7ucMkplYzqdVMajVPUDpBraYKw8dxGGpZ1r4z3a9dCzuwF3dAPzeGO0k3oN7lfSYrtZpJrWZSq3mC0glqNdWhthZ10IGFnYiIiIgkN715QkRERMQQWtiJiIiIGEILOxERERFDaGEnIiIiYggt7EREREQMoYWdiIiIiCG0sBMRERExhBZ2IiIiIobQwk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImIILexEREREDKGFnYiIiIghtLATERERMYQWdiIiIiKG0MJORERExBBa2ImIiIgYQgs7EREREUNoYSciIiJiCC3sRERERAyhhZ2IiIiIIbSwExERETGEFnYiIiIihtDCTkRERMQQWtiJiIiIGEILOxERERFDaGEnIiIiYggt7EREREQMoYWdiIiIiCG0sBMRERExhBZ2IiIiIobQwk5ERETEEFrYiYiIiBhCCzsRERERQ2hhJyIiImIILexEREREDKGFnYiIiIghtLATERERMcT/B73X1paF7bdVAAAAAElFTkSuQmCC\n" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Test Best Policy on New Episodes" + ], + "metadata": { + "id": "NPbcI9qNF_F9" + } + }, + { + "cell_type": "code", + "source": [ + "env = BlackjackEnv()\n", + "NUM_EPISODES = 100000\n", + "\n", + "for _ in range(NUM_EPISODES):\n", + " state = env.reset()\n", + " while env.done == False:\n", + " if state[0] == 19:\n", + " # Player was dealt with a Blackjack\n", + " next_state, reward, env.done, info = env.step(1)\n", + " total_rewards += reward\n", + " else:\n", + " Q_index = get_Q_state_index(state)\n", + " action = new_Q_binary[Q_index]\n", + " new_state, reward, done, desc = env.step(action)\n", + " state = new_state\n", + " total_rewards += reward\n", + "avg_reward = total_rewards / NUM_EPISODES\n", + "print(f'Average Reward: {avg_reward}')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "7sTNqbeZga_3", + "outputId": "97878c73-7419-41d4-d600-68bfd314b8f4" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Average Reward: -4.677\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "WL-NmCiphQiq" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Credits" + ], + "metadata": { + "id": "73y12ontGQRk" + } + }, + { + "cell_type": "markdown", + "source": [ + "The code is work of Mr. Adithya Solai.\n", + "
\n", + "The conversion from Python to Python Notebook is work of Mr. Sushant Nair.\n", + "
\n", + "The inspiration for making this Python Notebook as part of the RL Series is Mr. Terrence Ou\n", + "
\n", + "The code is further explained in the following Medium articles by Mr. Adithya Solai. GitHub Repo link is also provided.\n", + "
\n", + "1.\thttps://towardsdatascience.com/cracking-blackjack-part-1-31da28aeb4bb\n", + "2.\thttps://towardsdatascience.com/cracking-blackjack-part-2-75e32363e38\n", + "3.\thttps://towardsdatascience.com/cracking-blackjack-part-3-8fd3a5870efd\n", + "4.\thttps://towardsdatascience.com/cracking-blackjack-part-4-8b4a9caa38eb\n", + "5.\thttps://towardsdatascience.com/cracking-blackjack-part-5-70bd2f726133\n", + "\n", + "https://github.com/adithyasolai/Monte-Carlo-Blackjack/blob/master/MC_Blackjack_Full.ipynb\n" + ], + "metadata": { + "id": "BiGQjzloGSIX" + } + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "btiWK3iZHFi5" + }, + "execution_count": null, + "outputs": [] + } + ], + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4", + "include_colab_link": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU" + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file