From c37fcd2638e6de4a9f75bcd5e164bdb763118ad1 Mon Sep 17 00:00:00 2001 From: Lucas Borboleta Date: Thu, 31 Dec 2020 19:36:46 +0100 Subject: [PATCH 01/12] Improved example + improved expand method --- mcts.py | 4 ++ naughtsandcrosses.py | 101 ++++++++++++++++++++++++++++++++++--------- 2 files changed, 84 insertions(+), 21 deletions(-) diff --git a/mcts.py b/mcts.py index 1db365a..936d474 100644 --- a/mcts.py +++ b/mcts.py @@ -75,6 +75,7 @@ def selectNode(self, node): def expand(self, node): actions = node.state.getPossibleActions() + random.shuffle(actions) for action in actions: if action not in node.children: newNode = treeNode(node.state.takeAction(action), node) @@ -108,3 +109,6 @@ def getAction(self, root, bestChild): for action, node in root.children.items(): if node is bestChild: return action + + def getTotalReward(self): + return self.root.totalReward diff --git a/naughtsandcrosses.py b/naughtsandcrosses.py index 5b4019a..bd1ed95 100644 --- a/naughtsandcrosses.py +++ b/naughtsandcrosses.py @@ -4,54 +4,71 @@ from mcts import mcts from functools import reduce import operator +import random class NaughtsAndCrossesState(): - def __init__(self): - self.board = [[0, 0, 0], [0, 0, 0], [0, 0, 0]] + def __init__(self, side=3): + self.side = side + self.board = [ [0 for _ in range(side)] for _ in range(side)] self.currentPlayer = 1 + self.possibleActions = None + + def show(self): + for row in self.board: + row_text = "" + for cell in row: + if cell == 1: + row_text += " O " + elif cell == -1: + row_text += " X " + else: + row_text += " . " + print(row_text) def getCurrentPlayer(self): return self.currentPlayer def getPossibleActions(self): - possibleActions = [] - for i in range(len(self.board)): - for j in range(len(self.board[i])): - if self.board[i][j] == 0: - possibleActions.append(Action(player=self.currentPlayer, x=i, y=j)) - return possibleActions + if self.possibleActions is None: + self.possibleActions = [] + for i in range(len(self.board)): + for j in range(len(self.board[i])): + if self.board[i][j] == 0: + self.possibleActions.append(Action(player=self.currentPlayer, x=i, y=j)) + return self.possibleActions def takeAction(self, action): newState = deepcopy(self) newState.board[action.x][action.y] = action.player newState.currentPlayer = self.currentPlayer * -1 + newState.possibleActions = None return newState def isTerminal(self): for row in self.board: - if abs(sum(row)) == 3: + if abs(sum(row)) == self.side: return True for column in list(map(list, zip(*self.board))): - if abs(sum(column)) == 3: + if abs(sum(column)) == self.side: return True for diagonal in [[self.board[i][i] for i in range(len(self.board))], [self.board[i][len(self.board) - i - 1] for i in range(len(self.board))]]: - if abs(sum(diagonal)) == 3: + if abs(sum(diagonal)) == self.side: return True return reduce(operator.mul, sum(self.board, []), 1) def getReward(self): for row in self.board: - if abs(sum(row)) == 3: - return sum(row) / 3 + if abs(sum(row)) == self.side: + return sum(row) / self.side for column in list(map(list, zip(*self.board))): - if abs(sum(column)) == 3: - return sum(column) / 3 + if abs(sum(column)) == self.side: + return sum(column) / self.side for diagonal in [[self.board[i][i] for i in range(len(self.board))], [self.board[i][len(self.board) - i - 1] for i in range(len(self.board))]]: - if abs(sum(diagonal)) == 3: - return sum(diagonal) / 3 + if abs(sum(diagonal)) == self.side: + return sum(diagonal) / self.side return False @@ -74,8 +91,50 @@ def __hash__(self): return hash((self.x, self.y, self.player)) -initialState = NaughtsAndCrossesState() -mcts = mcts(timeLimit=1000) -action = mcts.search(initialState=initialState) +# Example of a game between two searchers: MCTS versus random +mcts_player = random.choice((1, -1)) +player_name = {1:'O', -1:'X'} + +game_side = random.choice(list(range(3,11))) +currentState = NaughtsAndCrossesState(side=game_side) +currentState.show() +turn = 0 + +while not currentState.isTerminal(): + turn += 1 + player = currentState.getCurrentPlayer() + + action_count = len(currentState.getPossibleActions()) + + if player == mcts_player: + searcher = mcts(timeLimit=1_000) + searcher_name = "mcts-1s" + action = searcher.search(initialState=currentState) + totalReward = searcher.getTotalReward() + + else: + searcher_name = "random" + action =random.choice(currentState.getPossibleActions()) + totalReward = None + + currentState = currentState.takeAction(action) + + print(f"at turn {turn} player {player_name[player]}={player} ({searcher_name}) takes action {action}" + + f" amongst {action_count} possibilities") + + if totalReward is not None: + if totalReward*player > 0: + print(f"mcts: {totalReward} total reward; winning leaves found !!!") + else: + print(f"mcts: {totalReward} total reward; no winning leaf found ...") + + print('-'*90) + currentState.show() + +print('-'*90) +reward = currentState.getReward() -print(action) +if reward == 0: + print(f"game {game_side}x{game_side} terminates; nobody wins") +else: + print(f"game {game_side}x{game_side} terminates; player {player_name[player]}={player} wins") From 689d895a315fdc3e7b231be8fa03de1ca7cc6820 Mon Sep 17 00:00:00 2001 From: Lucas Borboleta Date: Fri, 1 Jan 2021 21:27:13 +0100 Subject: [PATCH 02/12] Impove game play example with MCTS statistics on the selected action --- README.md | 16 ++++---- mcts.py | 18 ++++++--- naughtsandcrosses.py | 87 ++++++++++++++++++++++++-------------------- 3 files changed, 68 insertions(+), 53 deletions(-) diff --git a/README.md b/README.md index 0d160e7..ce08699 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # MCTS -This package provides a simple way of using Monte Carlo Tree Search in any perfect information domain. +This package provides a simple way of using Monte Carlo Tree Search in any perfect information domain. -## Installation +## Installation With pip: `pip install mcts` @@ -10,25 +10,25 @@ Without pip: Download the zip/tar.gz file of the [latest release](https://github ## Quick Usage -In order to run MCTS, you must implement a `State` class which can fully describe the state of the world. It must also implement four methods: +In order to run MCTS, you must implement a `State` class which can fully describe the state of the world. It must also implement four methods: - `getCurrentPlayer()`: Returns 1 if it is the maximizer player's turn to choose an action, or -1 for the minimiser player - `getPossibleActions()`: Returns an iterable of all actions which can be taken from this state - `takeAction(action)`: Returns the state which results from taking action `action` - `isTerminal()`: Returns whether this state is a terminal state -- `getReward()`: Returns the reward for this state. Only needed for terminal states. +- `getReward()`: Returns the reward for this state. Only needed for terminal states. -You must also choose a hashable representation for an action as used in `getPossibleActions` and `takeAction`. Typically this would be a class with a custom `__hash__` method, but it could also simply be a tuple or a string. +You must also choose a hashable representation for an action as used in `getPossibleActions` and `takeAction`. Typically this would be a class with a custom `__hash__` method, but it could also simply be a tuple or a string. Once these have been implemented, running MCTS is as simple as initializing your starting state, then running: ```python from mcts import mcts -mcts = mcts(timeLimit=1000) -bestAction = mcts.search(initialState=initialState) +searcher = mcts(timeLimit=1000) +bestAction = searcher.search(initialState=currentState) ``` -See [naughtsandcrosses.py](https://github.com/pbsinclair42/MCTS/blob/master/naughtsandcrosses.py) for a simple example. +See [naughtsandcrosses.py](https://github.com/pbsinclair42/MCTS/blob/master/naughtsandcrosses.py) for a simple example. ## Slow Usage //TODO diff --git a/mcts.py b/mcts.py index 936d474..45f007b 100644 --- a/mcts.py +++ b/mcts.py @@ -27,7 +27,7 @@ def __init__(self, state, parent): class mcts(): - def __init__(self, timeLimit=None, iterationLimit=None, explorationConstant=1 / math.sqrt(2), + def __init__(self, timeLimit=None, iterationLimit=None, explorationConstant=math.sqrt(2), rolloutPolicy=randomPolicy): if timeLimit != None: if iterationLimit != None: @@ -96,8 +96,10 @@ def getBestChild(self, node, explorationValue): bestValue = float("-inf") bestNodes = [] for child in node.children.values(): - nodeValue = node.state.getCurrentPlayer() * child.totalReward / child.numVisits + explorationValue * math.sqrt( - 2 * math.log(node.numVisits) / child.numVisits) + nodeValue = node.state.getCurrentPlayer() * child.totalReward / child.numVisits + if explorationValue != 0: + nodeValue += explorationValue * math.sqrt(math.log(node.numVisits) / child.numVisits) + if nodeValue > bestValue: bestValue = nodeValue bestNodes = [child] @@ -110,5 +112,11 @@ def getAction(self, root, bestChild): if node is bestChild: return action - def getTotalReward(self): - return self.root.totalReward + def getStatistics(self, action=None): + statistics = {} + statistics['rootNumVisits'] = self.root.numVisits + statistics['rootTotalReward'] = self.root.totalReward + if action is not None: + statistics['actionNumVisits'] = self.root.children[action].numVisits + statistics['actionTotalReward'] = self.root.children[action].totalReward + return statistics diff --git a/naughtsandcrosses.py b/naughtsandcrosses.py index bd1ed95..cda57d9 100644 --- a/naughtsandcrosses.py +++ b/naughtsandcrosses.py @@ -8,9 +8,12 @@ class NaughtsAndCrossesState(): - def __init__(self, side=3): - self.side = side - self.board = [ [0 for _ in range(side)] for _ in range(side)] + + playerNames = {1:'O', -1:'X'} + + def __init__(self, gridSize=3): + self.gridSize = gridSize + self.board = [ [0 for _ in range(self.gridSize)] for _ in range(self.gridSize)] self.currentPlayer = 1 self.possibleActions = None @@ -18,10 +21,8 @@ def show(self): for row in self.board: row_text = "" for cell in row: - if cell == 1: - row_text += " O " - elif cell == -1: - row_text += " X " + if cell in self.playerNames: + row_text += f" {self.playerNames[cell]} " else: row_text += " . " print(row_text) @@ -47,28 +48,28 @@ def takeAction(self, action): def isTerminal(self): for row in self.board: - if abs(sum(row)) == self.side: + if abs(sum(row)) == self.gridSize: return True for column in list(map(list, zip(*self.board))): - if abs(sum(column)) == self.side: + if abs(sum(column)) == self.gridSize: return True for diagonal in [[self.board[i][i] for i in range(len(self.board))], [self.board[i][len(self.board) - i - 1] for i in range(len(self.board))]]: - if abs(sum(diagonal)) == self.side: + if abs(sum(diagonal)) == self.gridSize: return True return reduce(operator.mul, sum(self.board, []), 1) def getReward(self): for row in self.board: - if abs(sum(row)) == self.side: - return sum(row) / self.side + if abs(sum(row)) == self.gridSize: + return sum(row) / self.gridSize for column in list(map(list, zip(*self.board))): - if abs(sum(column)) == self.side: - return sum(column) / self.side + if abs(sum(column)) == self.gridSize: + return sum(column) / self.gridSize for diagonal in [[self.board[i][i] for i in range(len(self.board))], [self.board[i][len(self.board) - i - 1] for i in range(len(self.board))]]: - if abs(sum(diagonal)) == self.side: - return sum(diagonal) / self.side + if abs(sum(diagonal)) == self.gridSize: + return sum(diagonal) / self.gridSize return False @@ -91,14 +92,19 @@ def __hash__(self): return hash((self.x, self.y, self.player)) -# Example of a game between two searchers: MCTS versus random -mcts_player = random.choice((1, -1)) -player_name = {1:'O', -1:'X'} +# Example of a NaughtsAndCrossesState game play between an MCTS agent and a random agent. +# The standard 3x3 grid is randomly extended up to 10x10 in order to exercise the MCTS time ressource. +# One of the two player is randomly assigned to the MCTS agent for purpose of correctness checking. +# A basic statistics is provided at each MCTS turn. + +playerNames = NaughtsAndCrossesState.playerNames +mctsPlayer = random.choice(sorted(playerNames.keys())) + +gridSize = random.choice(list(range(3,11))) +currentState = NaughtsAndCrossesState(gridSize) -game_side = random.choice(list(range(3,11))) -currentState = NaughtsAndCrossesState(side=game_side) -currentState.show() turn = 0 +currentState.show() while not currentState.isTerminal(): turn += 1 @@ -106,35 +112,36 @@ def __hash__(self): action_count = len(currentState.getPossibleActions()) - if player == mcts_player: - searcher = mcts(timeLimit=1_000) - searcher_name = "mcts-1s" - action = searcher.search(initialState=currentState) - totalReward = searcher.getTotalReward() + if player == mctsPlayer: + agent = mcts(timeLimit=1_000) + agentName = "mcts-1-second" + action = agent.search(initialState=currentState) + statistics = agent.getStatistics(action) else: - searcher_name = "random" + agentName = "random" action =random.choice(currentState.getPossibleActions()) - totalReward = None + statistics = None currentState = currentState.takeAction(action) - print(f"at turn {turn} player {player_name[player]}={player} ({searcher_name}) takes action {action}" + - f" amongst {action_count} possibilities") + print(f"at turn {turn} player {playerNames[player]}={player} ({agentName})" + + f" takes action {action} amongst {action_count} possibilities") + + if statistics is not None: + print(f"mcts action statitics: {statistics['actionTotalReward']} total reward" + + f" over {statistics['actionNumVisits']} visits") - if totalReward is not None: - if totalReward*player > 0: - print(f"mcts: {totalReward} total reward; winning leaves found !!!") - else: - print(f"mcts: {totalReward} total reward; no winning leaf found ...") + print(f"mcts root statitics: {statistics['rootTotalReward']} total reward" + + f" over {statistics['rootNumVisits']} visits") print('-'*90) currentState.show() print('-'*90) -reward = currentState.getReward() -if reward == 0: - print(f"game {game_side}x{game_side} terminates; nobody wins") +if currentState.getReward() == 0: + print(f"game {gridSize}x{gridSize} terminates; nobody wins") else: - print(f"game {game_side}x{game_side} terminates; player {player_name[player]}={player} wins") + print(f"game {gridSize}x{gridSize} terminates" + + f"; player {playerNames[player]}={player} ({agentName}) wins") From 256af8712be67e0747672b0080ccf90bc79760d9 Mon Sep 17 00:00:00 2001 From: Lucas Borboleta Date: Fri, 1 Jan 2021 21:31:37 +0100 Subject: [PATCH 03/12] Update naughtsandcrosses.py --- naughtsandcrosses.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/naughtsandcrosses.py b/naughtsandcrosses.py index cda57d9..f4de9d9 100644 --- a/naughtsandcrosses.py +++ b/naughtsandcrosses.py @@ -120,7 +120,7 @@ def __hash__(self): else: agentName = "random" - action =random.choice(currentState.getPossibleActions()) + action = random.choice(currentState.getPossibleActions()) statistics = None currentState = currentState.takeAction(action) From 8c3d805ce32c482509ce9c395e5cc601fe9180ac Mon Sep 17 00:00:00 2001 From: Lucas Borboleta Date: Sat, 2 Jan 2021 11:51:31 +0100 Subject: [PATCH 04/12] Contribution prepared --- .gitignore | 2 + README.md | 23 ++++++++--- mcts.py | 6 +-- naughtsandcrosses.py | 91 +++++++++++++++++++++----------------------- 4 files changed, 65 insertions(+), 57 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a295864 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.pyc +__pycache__ diff --git a/README.md b/README.md index ce08699..2d057c7 100644 --- a/README.md +++ b/README.md @@ -10,13 +10,13 @@ Without pip: Download the zip/tar.gz file of the [latest release](https://github ## Quick Usage -In order to run MCTS, you must implement a `State` class which can fully describe the state of the world. It must also implement four methods: +In order to run MCTS, you must implement a `State` class which can fully describe the state of the world. It must also implement the following methods: -- `getCurrentPlayer()`: Returns 1 if it is the maximizer player's turn to choose an action, or -1 for the minimiser player +- `getCurrentPlayer()`: Returns 1 if it is the maximizer player's turn to choose an action, or -1 for the minimizer player - `getPossibleActions()`: Returns an iterable of all actions which can be taken from this state - `takeAction(action)`: Returns the state which results from taking action `action` - `isTerminal()`: Returns whether this state is a terminal state -- `getReward()`: Returns the reward for this state. Only needed for terminal states. +- `getReward()`: Returns the reward for this state: 0 for a draw, positive for a win by maximizer player or negative for a win by the minimizer player. Only needed for terminal states. You must also choose a hashable representation for an action as used in `getPossibleActions` and `takeAction`. Typically this would be a class with a custom `__hash__` method, but it could also simply be a tuple or a string. @@ -25,13 +25,26 @@ Once these have been implemented, running MCTS is as simple as initializing your ```python from mcts import mcts +currentState = MyState() +... searcher = mcts(timeLimit=1000) bestAction = searcher.search(initialState=currentState) +currentState = currentState.takeAction(action) +... + ``` See [naughtsandcrosses.py](https://github.com/pbsinclair42/MCTS/blob/master/naughtsandcrosses.py) for a simple example. -## Slow Usage -//TODO +## Detailed usage +A few customizations are possible through the `mcts` constructor: + +- The number of MCTS search rounds can be limited by either a given time limit or a given iteration number. +- The exploration constant $c$, which appears in the UCT score $w_i/n_i + c\sqrt{{ln N_i}/n_i}$ with theoretical default setting $c=\sqrt 2$, can be adapted to your game. +- The default uniform random rollout/playout policy can be changed. + +A few statistics can be retrieved after each MCTS search call (see `naughtsandcrosses.py` example) + +More of MCTS theory could be found at https://en.wikipedia.org/wiki/Monte_Carlo_tree_search and cited references. ## Collaborating diff --git a/mcts.py b/mcts.py index 45f007b..c69bd8b 100644 --- a/mcts.py +++ b/mcts.py @@ -96,10 +96,8 @@ def getBestChild(self, node, explorationValue): bestValue = float("-inf") bestNodes = [] for child in node.children.values(): - nodeValue = node.state.getCurrentPlayer() * child.totalReward / child.numVisits - if explorationValue != 0: - nodeValue += explorationValue * math.sqrt(math.log(node.numVisits) / child.numVisits) - + nodeValue = (node.state.getCurrentPlayer() * child.totalReward / child.numVisits + + explorationValue * math.sqrt(math.log(node.numVisits) / child.numVisits)) if nodeValue > bestValue: bestValue = nodeValue bestNodes = [child] diff --git a/naughtsandcrosses.py b/naughtsandcrosses.py index f4de9d9..d013a50 100644 --- a/naughtsandcrosses.py +++ b/naughtsandcrosses.py @@ -92,56 +92,51 @@ def __hash__(self): return hash((self.x, self.y, self.player)) -# Example of a NaughtsAndCrossesState game play between an MCTS agent and a random agent. -# The standard 3x3 grid is randomly extended up to 10x10 in order to exercise the MCTS time ressource. -# One of the two player is randomly assigned to the MCTS agent for purpose of correctness checking. -# A basic statistics is provided at each MCTS turn. - -playerNames = NaughtsAndCrossesState.playerNames -mctsPlayer = random.choice(sorted(playerNames.keys())) - -gridSize = random.choice(list(range(3,11))) -currentState = NaughtsAndCrossesState(gridSize) - -turn = 0 -currentState.show() - -while not currentState.isTerminal(): - turn += 1 - player = currentState.getCurrentPlayer() - - action_count = len(currentState.getPossibleActions()) - - if player == mctsPlayer: - agent = mcts(timeLimit=1_000) - agentName = "mcts-1-second" - action = agent.search(initialState=currentState) - statistics = agent.getStatistics(action) +def main(): + """Example of a NaughtsAndCrossesState game play between MCTS and random searchers. + The standard 3x3 grid is randomly extended up to 10x10 in order to exercise the MCTS time ressource. + One of the two player is randomly assigned to the MCTS searcher for purpose of correctness checking. + A basic statistics is provided at each MCTS turn.""" + + playerNames = NaughtsAndCrossesState.playerNames + mctsPlayer = random.choice(sorted(playerNames.keys())) + gridSize = random.choice(list(range(3,11))) + + currentState = NaughtsAndCrossesState(gridSize) + turn = 0 + currentState.show() + while not currentState.isTerminal(): + turn += 1 + player = currentState.getCurrentPlayer() + action_count = len(currentState.getPossibleActions()) + + if player == mctsPlayer: + searcher = mcts(timeLimit=1_000) + searcherName = "mcts-1-second" + action = searcher.search(initialState=currentState) + statistics = searcher.getStatistics(action) + else: + searcherName = "random" + action = random.choice(currentState.getPossibleActions()) + statistics = None + + currentState = currentState.takeAction(action) + print(f"at turn {turn} player {playerNames[player]}={player} ({searcherName}) takes action {action} amongst {action_count} possibilities") + + if statistics is not None: + print(f"mcts statitics for the chosen action: {statistics['actionTotalReward']} total reward over {statistics['actionNumVisits']} visits") + print(f"mcts statitics for all explored actions: {statistics['rootTotalReward']} total reward over {statistics['rootNumVisits']} visits") + + print('-'*90) + currentState.show() + print('-'*90) + if currentState.getReward() == 0: + print(f"game {gridSize}x{gridSize} terminates; nobody wins") else: - agentName = "random" - action = random.choice(currentState.getPossibleActions()) - statistics = None - - currentState = currentState.takeAction(action) - - print(f"at turn {turn} player {playerNames[player]}={player} ({agentName})" + - f" takes action {action} amongst {action_count} possibilities") - - if statistics is not None: - print(f"mcts action statitics: {statistics['actionTotalReward']} total reward" + - f" over {statistics['actionNumVisits']} visits") + print(f"game {gridSize}x{gridSize} terminates; player {playerNames[player]}={player} ({searcherName}) wins") - print(f"mcts root statitics: {statistics['rootTotalReward']} total reward" + - f" over {statistics['rootNumVisits']} visits") - - print('-'*90) - currentState.show() -print('-'*90) +if __name__ == "__main__": + main() -if currentState.getReward() == 0: - print(f"game {gridSize}x{gridSize} terminates; nobody wins") -else: - print(f"game {gridSize}x{gridSize} terminates" + - f"; player {playerNames[player]}={player} ({agentName}) wins") From b297d2b3fc2771ccee412aa7804b2e16c8481548 Mon Sep 17 00:00:00 2001 From: Lucas Borboleta Date: Sun, 10 Jan 2021 21:54:07 +0100 Subject: [PATCH 05/12] Create kinarow.py K-in-a-Row started ; missing diagonals --- kinarow.py | 195 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 195 insertions(+) create mode 100644 kinarow.py diff --git a/kinarow.py b/kinarow.py new file mode 100644 index 0000000..c464c42 --- /dev/null +++ b/kinarow.py @@ -0,0 +1,195 @@ +from __future__ import division + +from copy import deepcopy +from mcts import mcts +import random + +class KInARow: + + playerNames = {1:'O', -1:'X'} + + def __init__(self, k_connections=4, m_columns=7, n_rows=6): + self.k_connections = k_connections + self.m_columns = m_columns + self.n_rows = n_rows + self.board = [ [0 for _ in range(self.m_columns)] for _ in range(self.n_rows)] + self.currentPlayer = max(KInARow.playerNames.keys()) + self.isTerminated = None + self.reward = None + self.possibleActions = None + + def show(self): + for row in reversed(self.board): + row_text = "" + for cell in row: + if cell in self.playerNames: + row_text += f" {self.playerNames[cell]} " + else: + row_text += " . " + print(row_text) + + def getCurrentPlayer(self): + return self.currentPlayer + + def getPossibleActions(self): + if self.possibleActions is None: + self.possibleActions = [] + for column_index in range(self.m_columns): + for row_index in range(self.n_rows): + if self.board[row_index][column_index] == 0: + self.possibleActions.append(Action(player=self.currentPlayer, + row_index=row_index, + column_index=column_index)) + break + return self.possibleActions + + def takeAction(self, action): + newState = deepcopy(self) + newState.board[action.row_index][action.column_index] = action.player + newState.currentPlayer = self.currentPlayer * -1 + newState.isTerminated = None + newState.possibleActions = None + return newState + + def isTerminal(self): + + if self.isTerminated is None: + + self.isTerminated = False + for column_index in range(self.m_columns): + line = [] + for row_index in range(self.n_rows): + line.append(self.board[row_index][column_index]) + lineReward = self.getLineReward(line) + if lineReward != 0: + self.isTerminated = True + self.reward = lineReward + break + + if not self.isTerminated: + for row_index in range(self.n_rows): + line = [] + for column_index in range(self.m_columns): + line.append(self.board[row_index][column_index]) + lineReward = self.getLineReward(line) + if lineReward != 0: + self.isTerminated = True + self.reward = lineReward + break + + ##TODO: add diagonals + + if not self.isTerminated and len(self.getPossibleActions()) == 0: + self.isTerminated = True + self.reward = 0 + + return self.isTerminated + + def getReward(self): + assert self.isTerminal() + assert self.reward is not None + return self.reward + + def getLineReward(self, line): + lineReward = 0 + if len(line) >= self.k_connections: + for player in KInARow.playerNames.keys(): + line_player = [True if x == player else False for x in line] + k = 0 + for x in line_player: + if x: + k += 1 + if k == self.k_connections: + lineReward = player + break + else: + k = 0 + if lineReward != 0: + break + return lineReward + + +class Action(): + def __init__(self, player, row_index, column_index): + self.player = player + self.row_index = row_index + self.column_index = column_index + + def __str__(self): + return str((self.row_index, self.column_index)) + + def __repr__(self): + return str(self) + + def __eq__(self, other): + return self.__class__ == (other.__class__ and + self.player == other.player and + self.row_index == other.row_index and + self.column_index == other.column_index) + + def __hash__(self): + return hash((self.row_index, self.column_index, self.player)) + + +def main(): + """Example of a KInARow game play between MCTS and random searchers. + + The k_connections and m_columns x n_rows board are randomly chosen + in order to exercise the MCTS time ressource. + + One of the two player is randomly assigned to the MCTS searcher + for purpose of correctness checking. + + A basic statistics is provided at each MCTS turn.""" + + playerNames = KInARow.playerNames + mctsPlayer = random.choice(sorted(playerNames.keys())) + + (k, m, n) = random.choice([(4, 7, 6)]) + currentState = KInARow(k_connections=k, m_columns=m, n_rows=n) + + turn = 0 + currentState.show() + while not currentState.isTerminal(): + turn += 1 + player = currentState.getCurrentPlayer() + action_count = len(currentState.getPossibleActions()) + + if player == mctsPlayer: + searcher = mcts(timeLimit=1_000) + searcherName = "mcts-1-second" + action = searcher.search(initialState=currentState) + statistics = searcher.getStatistics(action) + else: + searcher = mcts(timeLimit=500) + searcherName = "mcts-0.5-second" + action = searcher.search(initialState=currentState) + statistics = searcher.getStatistics(action) + # searcherName = "random" + # action = random.choice(currentState.getPossibleActions()) + # statistics = None + + currentState = currentState.takeAction(action) + print(f"at turn {turn} player {playerNames[player]}={player} ({searcherName})" + + f" takes action {action} amongst {action_count} possibilities") + + if statistics is not None: + print(f"mcts statitics for the chosen action: {statistics['actionTotalReward']} total reward" + + f" over {statistics['actionNumVisits']} visits") + + print(f"mcts statitics for all explored actions: {statistics['rootTotalReward']} total reward" + + f" over {statistics['rootNumVisits']} visits") + + print('-'*90) + currentState.show() + + print('-'*90) + if currentState.getReward() == 0: + print(f"game k={k} mxn={m}x{n} terminates; nobody wins") + else: + print(f"game k={k} mxn={m}x{n} terminates; player {playerNames[player]}={player} ({searcherName}) wins") + + +if __name__ == "__main__": + main() + From cfb0753991810acc9fc8fd89811959997cf965d3 Mon Sep 17 00:00:00 2001 From: Lucas Borboleta Date: Mon, 11 Jan 2021 19:11:05 +0100 Subject: [PATCH 06/12] Update kinarow.py Almost good --- kinarow.py | 163 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 97 insertions(+), 66 deletions(-) diff --git a/kinarow.py b/kinarow.py index c464c42..60e4f35 100644 --- a/kinarow.py +++ b/kinarow.py @@ -8,25 +8,26 @@ class KInARow: playerNames = {1:'O', -1:'X'} - def __init__(self, k_connections=4, m_columns=7, n_rows=6): - self.k_connections = k_connections - self.m_columns = m_columns - self.n_rows = n_rows - self.board = [ [0 for _ in range(self.m_columns)] for _ in range(self.n_rows)] + def __init__(self, kConnections=4, mColumns=7, nRows=6): + self.kConnections = kConnections + self.mColumns = mColumns + self.nRows = nRows + self.board = [ [0 for _ in range(self.mColumns)] for _ in range(self.nRows)] self.currentPlayer = max(KInARow.playerNames.keys()) self.isTerminated = None self.reward = None self.possibleActions = None + self.winMove = None def show(self): for row in reversed(self.board): - row_text = "" - for cell in row: - if cell in self.playerNames: - row_text += f" {self.playerNames[cell]} " + rowText = "" + for x in row: + if x in self.playerNames: + rowText += f" {self.playerNames[x]} " else: - row_text += " . " - print(row_text) + rowText += " . " + print(rowText) def getCurrentPlayer(self): return self.currentPlayer @@ -34,21 +35,22 @@ def getCurrentPlayer(self): def getPossibleActions(self): if self.possibleActions is None: self.possibleActions = [] - for column_index in range(self.m_columns): - for row_index in range(self.n_rows): - if self.board[row_index][column_index] == 0: + for columnIndex in range(self.mColumns): + for rowIndex in range(self.nRows): + if self.board[rowIndex][columnIndex] == 0: self.possibleActions.append(Action(player=self.currentPlayer, - row_index=row_index, - column_index=column_index)) + rowIndex=rowIndex, + columnIndex=columnIndex)) break return self.possibleActions def takeAction(self, action): newState = deepcopy(self) - newState.board[action.row_index][action.column_index] = action.player + newState.board[action.rowIndex][action.columnIndex] = action.player newState.currentPlayer = self.currentPlayer * -1 newState.isTerminated = None newState.possibleActions = None + newState.winMove = None return newState def isTerminal(self): @@ -56,28 +58,56 @@ def isTerminal(self): if self.isTerminated is None: self.isTerminated = False - for column_index in range(self.m_columns): - line = [] - for row_index in range(self.n_rows): - line.append(self.board[row_index][column_index]) + for rowIndex in range(self.nRows): + line = self.board[rowIndex] + lineReward = self.getLineReward(line) + if lineReward != 0: + self.isTerminated = True + self.reward = lineReward + self.winMove = "k-in-row" + break + + if not self.isTerminated: + for columnIndex in range(self.mColumns): + line = [] + for rowIndex in range(self.nRows): + line.append(self.board[rowIndex][columnIndex]) lineReward = self.getLineReward(line) if lineReward != 0: self.isTerminated = True self.reward = lineReward + self.winMove = "k-in-column" break if not self.isTerminated: - for row_index in range(self.n_rows): + # diagonal: rowIndex = columnIndex + parameter + for parameter in range(1 - self.mColumns, self.nRows): line = [] - for column_index in range(self.m_columns): - line.append(self.board[row_index][column_index]) - lineReward = self.getLineReward(line) - if lineReward != 0: - self.isTerminated = True - self.reward = lineReward - break + for columnIndex in range(self.mColumns): + rowIndex = columnIndex + parameter + if 0 <= rowIndex < self.nRows: + line.append(self.board[rowIndex][columnIndex]) + lineReward = self.getLineReward(line) + if lineReward != 0: + self.isTerminated = True + self.reward = lineReward + self.winMove = "k-in-diagonal" + break - ##TODO: add diagonals + if not self.isTerminated: + # antidiagonal: rowIndex = - columnIndex + parameter + for parameter in range(0, self.mColumns + self.nRows): + line = [] + for columnIndex in range(self.mColumns): + rowIndex = -columnIndex + parameter + if 0 <= rowIndex < self.nRows: + line.append(self.board[rowIndex][columnIndex]) + lineReward = self.getLineReward(line) + if lineReward != 0: + self.isTerminated = True + self.reward = lineReward + self.winMove = "k-in-antidiagonal" + break if not self.isTerminated and len(self.getPossibleActions()) == 0: self.isTerminated = True @@ -92,31 +122,31 @@ def getReward(self): def getLineReward(self, line): lineReward = 0 - if len(line) >= self.k_connections: + if len(line) >= self.kConnections: for player in KInARow.playerNames.keys(): - line_player = [True if x == player else False for x in line] - k = 0 - for x in line_player: + playerLine = [x == player for x in line] + playerConnections = 0 + for x in playerLine: if x: - k += 1 - if k == self.k_connections: + playerConnections += 1 + if playerConnections == self.kConnections: lineReward = player break else: - k = 0 + playerConnections = 0 if lineReward != 0: break return lineReward class Action(): - def __init__(self, player, row_index, column_index): + def __init__(self, player, rowIndex, columnIndex): self.player = player - self.row_index = row_index - self.column_index = column_index + self.rowIndex = rowIndex + self.columnIndex = columnIndex def __str__(self): - return str((self.row_index, self.column_index)) + return str((self.rowIndex, self.columnIndex)) def __repr__(self): return str(self) @@ -124,17 +154,17 @@ def __repr__(self): def __eq__(self, other): return self.__class__ == (other.__class__ and self.player == other.player and - self.row_index == other.row_index and - self.column_index == other.column_index) + self.rowIndex == other.rowIndex and + self.columnIndex == other.columnIndex) def __hash__(self): - return hash((self.row_index, self.column_index, self.player)) + return hash((self.rowIndex, self.columnIndex, self.player)) def main(): """Example of a KInARow game play between MCTS and random searchers. - The k_connections and m_columns x n_rows board are randomly chosen + The kConnections and mColumns x nRows board are randomly chosen in order to exercise the MCTS time ressource. One of the two player is randomly assigned to the MCTS searcher @@ -142,11 +172,18 @@ def main(): A basic statistics is provided at each MCTS turn.""" + searchers = {} + searchers["mcts-100i"] = mcts(iterationLimit=200) + searchers["mcts-50i"] = mcts(iterationLimit=100) + playerNames = KInARow.playerNames - mctsPlayer = random.choice(sorted(playerNames.keys())) + + playerSearcherNames = {} + for player in sorted(playerNames.keys()): + playerSearcherNames[player] = random.choice(sorted(searchers.keys())) (k, m, n) = random.choice([(4, 7, 6)]) - currentState = KInARow(k_connections=k, m_columns=m, n_rows=n) + currentState = KInARow(kConnections=k, mColumns=m, nRows=n) turn = 0 currentState.show() @@ -155,30 +192,22 @@ def main(): player = currentState.getCurrentPlayer() action_count = len(currentState.getPossibleActions()) - if player == mctsPlayer: - searcher = mcts(timeLimit=1_000) - searcherName = "mcts-1-second" - action = searcher.search(initialState=currentState) - statistics = searcher.getStatistics(action) - else: - searcher = mcts(timeLimit=500) - searcherName = "mcts-0.5-second" - action = searcher.search(initialState=currentState) - statistics = searcher.getStatistics(action) - # searcherName = "random" - # action = random.choice(currentState.getPossibleActions()) - # statistics = None + searcherName = playerSearcherNames[player] + searcher = searchers[searcherName] + + action = searcher.search(initialState=currentState) + statistics = searcher.getStatistics(action) currentState = currentState.takeAction(action) + print(f"at turn {turn} player {playerNames[player]}={player} ({searcherName})" + f" takes action {action} amongst {action_count} possibilities") - if statistics is not None: - print(f"mcts statitics for the chosen action: {statistics['actionTotalReward']} total reward" + - f" over {statistics['actionNumVisits']} visits") + print(f"mcts statitics for the chosen action: {statistics['actionTotalReward']} total reward" + + f" over {statistics['actionNumVisits']} visits") - print(f"mcts statitics for all explored actions: {statistics['rootTotalReward']} total reward" + - f" over {statistics['rootNumVisits']} visits") + print(f"mcts statitics for all explored actions: {statistics['rootTotalReward']} total reward" + + f" over {statistics['rootNumVisits']} visits") print('-'*90) currentState.show() @@ -187,7 +216,9 @@ def main(): if currentState.getReward() == 0: print(f"game k={k} mxn={m}x{n} terminates; nobody wins") else: - print(f"game k={k} mxn={m}x{n} terminates; player {playerNames[player]}={player} ({searcherName}) wins") + print(f"game k={k} mxn={m}x{n} terminates;" + + f" player {playerNames[player]}={player} ({searcherName}) wins" + + f" by {currentState.winMove}") if __name__ == "__main__": From 6a3483d8440121e26a12e7bfcb08588b53cc40d8 Mon Sep 17 00:00:00 2001 From: Lucas Borboleta Date: Mon, 11 Jan 2021 22:46:26 +0100 Subject: [PATCH 07/12] Update kinarow.py Merge with master + README update to be done --- kinarow.py | 82 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 46 insertions(+), 36 deletions(-) diff --git a/kinarow.py b/kinarow.py index 60e4f35..914ead5 100644 --- a/kinarow.py +++ b/kinarow.py @@ -4,20 +4,21 @@ from mcts import mcts import random -class KInARow: + +class KInARowState: playerNames = {1:'O', -1:'X'} - def __init__(self, kConnections=4, mColumns=7, nRows=6): - self.kConnections = kConnections + def __init__(self, mColumns=7, nRows=6, kConnections=4): self.mColumns = mColumns self.nRows = nRows + self.kConnections = kConnections self.board = [ [0 for _ in range(self.mColumns)] for _ in range(self.nRows)] - self.currentPlayer = max(KInARow.playerNames.keys()) + self.currentPlayer = max(KInARowState.playerNames.keys()) self.isTerminated = None self.reward = None self.possibleActions = None - self.winMove = None + self.winingPattern = None def show(self): for row in reversed(self.board): @@ -39,8 +40,8 @@ def getPossibleActions(self): for rowIndex in range(self.nRows): if self.board[rowIndex][columnIndex] == 0: self.possibleActions.append(Action(player=self.currentPlayer, - rowIndex=rowIndex, - columnIndex=columnIndex)) + columnIndex=columnIndex, + rowIndex=rowIndex)) break return self.possibleActions @@ -50,7 +51,7 @@ def takeAction(self, action): newState.currentPlayer = self.currentPlayer * -1 newState.isTerminated = None newState.possibleActions = None - newState.winMove = None + newState.winingPattern = None return newState def isTerminal(self): @@ -64,7 +65,7 @@ def isTerminal(self): if lineReward != 0: self.isTerminated = True self.reward = lineReward - self.winMove = "k-in-row" + self.winingPattern = "k-in-row" break if not self.isTerminated: @@ -76,7 +77,7 @@ def isTerminal(self): if lineReward != 0: self.isTerminated = True self.reward = lineReward - self.winMove = "k-in-column" + self.winingPattern = "k-in-column" break if not self.isTerminated: @@ -91,7 +92,7 @@ def isTerminal(self): if lineReward != 0: self.isTerminated = True self.reward = lineReward - self.winMove = "k-in-diagonal" + self.winingPattern = "k-in-diagonal" break if not self.isTerminated: @@ -106,7 +107,7 @@ def isTerminal(self): if lineReward != 0: self.isTerminated = True self.reward = lineReward - self.winMove = "k-in-antidiagonal" + self.winingPattern = "k-in-antidiagonal" break if not self.isTerminated and len(self.getPossibleActions()) == 0: @@ -123,7 +124,7 @@ def getReward(self): def getLineReward(self, line): lineReward = 0 if len(line) >= self.kConnections: - for player in KInARow.playerNames.keys(): + for player in KInARowState.playerNames.keys(): playerLine = [x == player for x in line] playerConnections = 0 for x in playerLine: @@ -140,13 +141,13 @@ def getLineReward(self, line): class Action(): - def __init__(self, player, rowIndex, columnIndex): + def __init__(self, player, columnIndex, rowIndex): self.player = player self.rowIndex = rowIndex self.columnIndex = columnIndex def __str__(self): - return str((self.rowIndex, self.columnIndex)) + return str((self.columnIndex, self.rowIndex)) def __repr__(self): return str(self) @@ -154,37 +155,47 @@ def __repr__(self): def __eq__(self, other): return self.__class__ == (other.__class__ and self.player == other.player and - self.rowIndex == other.rowIndex and - self.columnIndex == other.columnIndex) + self.columnIndex == other.columnIndex and + self.rowIndex == other.rowIndex) def __hash__(self): - return hash((self.rowIndex, self.columnIndex, self.player)) + return hash((self.columnIndex, self.rowIndex, self.player)) + + +def extractStatistics(searcher, action=None): + statistics = {} + statistics['rootNumVisits'] = searcher.root.numVisits + statistics['rootTotalReward'] = searcher.root.totalReward + if action is not None: + statistics['actionNumVisits'] = searcher.root.children[action].numVisits + statistics['actionTotalReward'] = searcher.root.children[action].totalReward + return statistics def main(): - """Example of a KInARow game play between MCTS and random searchers. + """Example of a "k-in-a-row" game with gravity like "Connect Four". - The kConnections and mColumns x nRows board are randomly chosen - in order to exercise the MCTS time ressource. + The match occurs between two MCTS searchers. - One of the two player is randomly assigned to the MCTS searcher - for purpose of correctness checking. + The kConnections and (mColumns, nRows) board are randomly chosen + in order to exercise the MCTS time ressource. - A basic statistics is provided at each MCTS turn.""" + Basic MCTS statistics is provided.""" searchers = {} - searchers["mcts-100i"] = mcts(iterationLimit=200) - searchers["mcts-50i"] = mcts(iterationLimit=100) + searchers["mcts-1500ms"] = mcts(timeLimit=1_500) + searchers["mcts-1000ms"] = mcts(timeLimit=1_000) + searchers["mcts-500ms"] = mcts(timeLimit=500) + searchers["mcts-250ms"] = mcts(timeLimit=250) - playerNames = KInARow.playerNames + playerNames = KInARowState.playerNames playerSearcherNames = {} for player in sorted(playerNames.keys()): playerSearcherNames[player] = random.choice(sorted(searchers.keys())) - (k, m, n) = random.choice([(4, 7, 6)]) - currentState = KInARow(kConnections=k, mColumns=m, nRows=n) - + (m, n, k) = random.choice([(7, 6, 4), (8, 7, 5), (9, 8, 6)]) + currentState = KInARowState(mColumns=m, nRows=n, kConnections=k) turn = 0 currentState.show() while not currentState.isTerminal(): @@ -196,12 +207,12 @@ def main(): searcher = searchers[searcherName] action = searcher.search(initialState=currentState) - statistics = searcher.getStatistics(action) + statistics = extractStatistics(searcher, action) currentState = currentState.takeAction(action) print(f"at turn {turn} player {playerNames[player]}={player} ({searcherName})" + - f" takes action {action} amongst {action_count} possibilities") + f" takes action (column, row)={action} amongst {action_count} possibilities") print(f"mcts statitics for the chosen action: {statistics['actionTotalReward']} total reward" + f" over {statistics['actionNumVisits']} visits") @@ -214,13 +225,12 @@ def main(): print('-'*90) if currentState.getReward() == 0: - print(f"game k={k} mxn={m}x{n} terminates; nobody wins") + print(f"game mxn={m}x{n} k={k} terminates; nobody wins") else: - print(f"game k={k} mxn={m}x{n} terminates;" + + print(f"game mxn={m}x{n} k={k} terminates;" + f" player {playerNames[player]}={player} ({searcherName}) wins" + - f" by {currentState.winMove}") + f" with pattern {currentState.winingPattern}") if __name__ == "__main__": main() - From e651677617611defcffe9f133b1ab2fcbf0df822 Mon Sep 17 00:00:00 2001 From: Lucas Borboleta Date: Tue, 12 Jan 2021 15:32:57 +0100 Subject: [PATCH 08/12] Polishing new example renamed connectmnk --- kinarow.py => connectmnk.py | 107 +++++++++++++++++++++--------------- 1 file changed, 63 insertions(+), 44 deletions(-) rename kinarow.py => connectmnk.py (70%) diff --git a/kinarow.py b/connectmnk.py similarity index 70% rename from kinarow.py rename to connectmnk.py index 914ead5..cd8c017 100644 --- a/kinarow.py +++ b/connectmnk.py @@ -1,11 +1,21 @@ from __future__ import division -from copy import deepcopy +import copy from mcts import mcts import random -class KInARowState: +class ConnectMNKState: + """ConnectMNKState models a Connect(m,n,k,1,1) game that generalizes + the famous "Connect Four" itself equal to the Connect(7,6,4,1,1) game. + + Background from wikipedia: + Connect(m,n,k,p,q) games are another generalization of gomoku to a board + with m×n intersections, k in a row needed to win, p stones for each player + to place, and q stones for the first player to place for the first move + only. Each player may play only at the lowest unoccupied place in a column. + In particular, Connect(m,n,6,2,1) is called Connect6. + """ playerNames = {1:'O', -1:'X'} @@ -14,20 +24,26 @@ def __init__(self, mColumns=7, nRows=6, kConnections=4): self.nRows = nRows self.kConnections = kConnections self.board = [ [0 for _ in range(self.mColumns)] for _ in range(self.nRows)] - self.currentPlayer = max(KInARowState.playerNames.keys()) + self.currentPlayer = max(ConnectMNKState.playerNames.keys()) self.isTerminated = None self.reward = None self.possibleActions = None self.winingPattern = None def show(self): - for row in reversed(self.board): + rowText = "" + for columnIndex in range(self.mColumns): + rowText += f" {columnIndex % 10} " + print(rowText) + + for rowIndex in reversed(range(self.nRows)): rowText = "" - for x in row: + for x in self.board[rowIndex]: if x in self.playerNames: rowText += f" {self.playerNames[x]} " else: rowText += " . " + rowText += f" {rowIndex % 10} " print(rowText) def getCurrentPlayer(self): @@ -39,14 +55,16 @@ def getPossibleActions(self): for columnIndex in range(self.mColumns): for rowIndex in range(self.nRows): if self.board[rowIndex][columnIndex] == 0: - self.possibleActions.append(Action(player=self.currentPlayer, - columnIndex=columnIndex, - rowIndex=rowIndex)) + action = Action(player=self.currentPlayer, + columnIndex=columnIndex, + rowIndex=rowIndex) + self.possibleActions.append(action) break return self.possibleActions def takeAction(self, action): - newState = deepcopy(self) + newState = copy.copy(self) + newState.board = copy.deepcopy(newState.board) newState.board[action.rowIndex][action.columnIndex] = action.player newState.currentPlayer = self.currentPlayer * -1 newState.isTerminated = None @@ -55,13 +73,11 @@ def takeAction(self, action): return newState def isTerminal(self): - if self.isTerminated is None: - self.isTerminated = False for rowIndex in range(self.nRows): line = self.board[rowIndex] - lineReward = self.getLineReward(line) + lineReward = self.__getLineReward(line) if lineReward != 0: self.isTerminated = True self.reward = lineReward @@ -73,7 +89,7 @@ def isTerminal(self): line = [] for rowIndex in range(self.nRows): line.append(self.board[rowIndex][columnIndex]) - lineReward = self.getLineReward(line) + lineReward = self.__getLineReward(line) if lineReward != 0: self.isTerminated = True self.reward = lineReward @@ -88,7 +104,7 @@ def isTerminal(self): rowIndex = columnIndex + parameter if 0 <= rowIndex < self.nRows: line.append(self.board[rowIndex][columnIndex]) - lineReward = self.getLineReward(line) + lineReward = self.__getLineReward(line) if lineReward != 0: self.isTerminated = True self.reward = lineReward @@ -103,7 +119,7 @@ def isTerminal(self): rowIndex = -columnIndex + parameter if 0 <= rowIndex < self.nRows: line.append(self.board[rowIndex][columnIndex]) - lineReward = self.getLineReward(line) + lineReward = self.__getLineReward(line) if lineReward != 0: self.isTerminated = True self.reward = lineReward @@ -116,15 +132,10 @@ def isTerminal(self): return self.isTerminated - def getReward(self): - assert self.isTerminal() - assert self.reward is not None - return self.reward - - def getLineReward(self, line): + def __getLineReward(self, line): lineReward = 0 if len(line) >= self.kConnections: - for player in KInARowState.playerNames.keys(): + for player in ConnectMNKState.playerNames.keys(): playerLine = [x == player for x in line] playerConnections = 0 for x in playerLine: @@ -139,6 +150,11 @@ def getLineReward(self, line): break return lineReward + def getReward(self): + assert self.isTerminal() + assert self.reward is not None + return self.reward + class Action(): def __init__(self, player, columnIndex, rowIndex): @@ -162,25 +178,23 @@ def __hash__(self): return hash((self.columnIndex, self.rowIndex, self.player)) -def extractStatistics(searcher, action=None): +def extractStatistics(searcher, action): statistics = {} statistics['rootNumVisits'] = searcher.root.numVisits statistics['rootTotalReward'] = searcher.root.totalReward - if action is not None: - statistics['actionNumVisits'] = searcher.root.children[action].numVisits - statistics['actionTotalReward'] = searcher.root.children[action].totalReward + statistics['actionNumVisits'] = searcher.root.children[action].numVisits + statistics['actionTotalReward'] = searcher.root.children[action].totalReward return statistics def main(): - """Example of a "k-in-a-row" game with gravity like "Connect Four". + """Run a full match between two MCTS searchers, possibly with different + parametrization, playing a Connect(m,n,k) game. - The match occurs between two MCTS searchers. + Extraction of MCTS statistics is examplified. - The kConnections and (mColumns, nRows) board are randomly chosen - in order to exercise the MCTS time ressource. - - Basic MCTS statistics is provided.""" + The game parameters (m,n,k) are randomly chosen. + """ searchers = {} searchers["mcts-1500ms"] = mcts(timeLimit=1_500) @@ -188,14 +202,20 @@ def main(): searchers["mcts-500ms"] = mcts(timeLimit=500) searchers["mcts-250ms"] = mcts(timeLimit=250) - playerNames = KInARowState.playerNames + playerNames = ConnectMNKState.playerNames playerSearcherNames = {} for player in sorted(playerNames.keys()): playerSearcherNames[player] = random.choice(sorted(searchers.keys())) - (m, n, k) = random.choice([(7, 6, 4), (8, 7, 5), (9, 8, 6)]) - currentState = KInARowState(mColumns=m, nRows=n, kConnections=k) + runnableGames = list() + runnableGames.append((3, 3, 3)) + runnableGames.append((7, 6, 4)) + runnableGames.append((8, 7, 5)) + runnableGames.append((9, 8, 6)) + (m, n, k) = random.choice(runnableGames) + currentState = ConnectMNKState(mColumns=m, nRows=n, kConnections=k) + turn = 0 currentState.show() while not currentState.isTerminal(): @@ -208,26 +228,25 @@ def main(): action = searcher.search(initialState=currentState) statistics = extractStatistics(searcher, action) - currentState = currentState.takeAction(action) print(f"at turn {turn} player {playerNames[player]}={player} ({searcherName})" + f" takes action (column, row)={action} amongst {action_count} possibilities") - print(f"mcts statitics for the chosen action: {statistics['actionTotalReward']} total reward" + - f" over {statistics['actionNumVisits']} visits") - - print(f"mcts statitics for all explored actions: {statistics['rootTotalReward']} total reward" + + print("mcts statitics:" + + f" chosen action= {statistics['actionTotalReward']} total reward" + + f" over {statistics['actionNumVisits']} visits /" + f" all explored actions= {statistics['rootTotalReward']} total reward" + f" over {statistics['rootNumVisits']} visits") - print('-'*90) + print('-'*120) currentState.show() - print('-'*90) + print('-'*120) if currentState.getReward() == 0: - print(f"game mxn={m}x{n} k={k} terminates; nobody wins") + print(f"Connect(m={m},n={n},k={k}) game terminates; nobody wins") else: - print(f"game mxn={m}x{n} k={k} terminates;" + + print(f"Connect(m={m},n={n},k={k}) game terminates;" + f" player {playerNames[player]}={player} ({searcherName}) wins" + f" with pattern {currentState.winingPattern}") From f48311a1a942e7001e5ba9484ac43d1295efd2df Mon Sep 17 00:00:00 2001 From: Lucas Borboleta Date: Tue, 12 Jan 2021 16:11:37 +0100 Subject: [PATCH 09/12] Preparing pull request --- README.md | 25 ++++++----- mcts.py | 9 ---- naughtsandcrosses.py | 103 +++++++++---------------------------------- 3 files changed, 34 insertions(+), 103 deletions(-) diff --git a/README.md b/README.md index 2d057c7..8fe1fad 100644 --- a/README.md +++ b/README.md @@ -12,11 +12,11 @@ Without pip: Download the zip/tar.gz file of the [latest release](https://github In order to run MCTS, you must implement a `State` class which can fully describe the state of the world. It must also implement the following methods: -- `getCurrentPlayer()`: Returns 1 if it is the maximizer player's turn to choose an action, or -1 for the minimizer player +- `getCurrentPlayer()`: Returns 1 if it is the maximizer player's turn to choose an action, or -1 for the minimiser player - `getPossibleActions()`: Returns an iterable of all actions which can be taken from this state - `takeAction(action)`: Returns the state which results from taking action `action` - `isTerminal()`: Returns whether this state is a terminal state -- `getReward()`: Returns the reward for this state: 0 for a draw, positive for a win by maximizer player or negative for a win by the minimizer player. Only needed for terminal states. +- `getReward()`: Returns the reward for this state: 0 for a draw, positive for a win by maximizer player or negative for a win by the minimizer player. Only needed for terminal states. You must also choose a hashable representation for an action as used in `getPossibleActions` and `takeAction`. Typically this would be a class with a custom `__hash__` method, but it could also simply be a tuple or a string. @@ -29,22 +29,23 @@ currentState = MyState() ... searcher = mcts(timeLimit=1000) bestAction = searcher.search(initialState=currentState) -currentState = currentState.takeAction(action) ... - ``` -See [naughtsandcrosses.py](https://github.com/pbsinclair42/MCTS/blob/master/naughtsandcrosses.py) for a simple example. +See [naughtsandcrosses.py](./naughtsandcrosses.py) for a simple example. + +See [connectmnk.py](./connectmnk.py) for another example that runs a full *Connect(m,n,k,1,1)* game between two MCTS searchers. + +When initializing the MCTS searcher, there are a few optional parameters that can be used to optimize the search: -## Detailed usage -A few customizations are possible through the `mcts` constructor: +- `timeLimit`: the maximum duration of the search in milliseconds. Exactly one of `timeLimit` and `iterationLimit` must be set. +- `iterationLimit`: the maximum number of search iterations to be carried out. Exactly one of `timeLimit` and `iterationLimit` must be set. +- `explorationConstant`: a weight used when searching to help the algorithm prioritize between exploring unknown areas vs deeper exploring areas it currently believes to be valuable. The higher this constant, the more the algorithm will prioritize exploring unknown areas. Default value is √2. +- `rolloutPolicy`: the policy to be used in the roll-out phase when simulating one full play-out. Default is a random uniform policy -- The number of MCTS search rounds can be limited by either a given time limit or a given iteration number. -- The exploration constant $c$, which appears in the UCT score $w_i/n_i + c\sqrt{{ln N_i}/n_i}$ with theoretical default setting $c=\sqrt 2$, can be adapted to your game. -- The default uniform random rollout/playout policy can be changed. -A few statistics can be retrieved after each MCTS search call (see `naughtsandcrosses.py` example) -More of MCTS theory could be found at https://en.wikipedia.org/wiki/Monte_Carlo_tree_search and cited references. +## Detailed Information +//TODO ## Collaborating diff --git a/mcts.py b/mcts.py index c69bd8b..dd5a764 100644 --- a/mcts.py +++ b/mcts.py @@ -109,12 +109,3 @@ def getAction(self, root, bestChild): for action, node in root.children.items(): if node is bestChild: return action - - def getStatistics(self, action=None): - statistics = {} - statistics['rootNumVisits'] = self.root.numVisits - statistics['rootTotalReward'] = self.root.totalReward - if action is not None: - statistics['actionNumVisits'] = self.root.children[action].numVisits - statistics['actionTotalReward'] = self.root.children[action].totalReward - return statistics diff --git a/naughtsandcrosses.py b/naughtsandcrosses.py index d013a50..5b4019a 100644 --- a/naughtsandcrosses.py +++ b/naughtsandcrosses.py @@ -4,72 +4,54 @@ from mcts import mcts from functools import reduce import operator -import random class NaughtsAndCrossesState(): - - playerNames = {1:'O', -1:'X'} - - def __init__(self, gridSize=3): - self.gridSize = gridSize - self.board = [ [0 for _ in range(self.gridSize)] for _ in range(self.gridSize)] + def __init__(self): + self.board = [[0, 0, 0], [0, 0, 0], [0, 0, 0]] self.currentPlayer = 1 - self.possibleActions = None - - def show(self): - for row in self.board: - row_text = "" - for cell in row: - if cell in self.playerNames: - row_text += f" {self.playerNames[cell]} " - else: - row_text += " . " - print(row_text) def getCurrentPlayer(self): return self.currentPlayer def getPossibleActions(self): - if self.possibleActions is None: - self.possibleActions = [] - for i in range(len(self.board)): - for j in range(len(self.board[i])): - if self.board[i][j] == 0: - self.possibleActions.append(Action(player=self.currentPlayer, x=i, y=j)) - return self.possibleActions + possibleActions = [] + for i in range(len(self.board)): + for j in range(len(self.board[i])): + if self.board[i][j] == 0: + possibleActions.append(Action(player=self.currentPlayer, x=i, y=j)) + return possibleActions def takeAction(self, action): newState = deepcopy(self) newState.board[action.x][action.y] = action.player newState.currentPlayer = self.currentPlayer * -1 - newState.possibleActions = None return newState def isTerminal(self): for row in self.board: - if abs(sum(row)) == self.gridSize: + if abs(sum(row)) == 3: return True for column in list(map(list, zip(*self.board))): - if abs(sum(column)) == self.gridSize: + if abs(sum(column)) == 3: return True for diagonal in [[self.board[i][i] for i in range(len(self.board))], [self.board[i][len(self.board) - i - 1] for i in range(len(self.board))]]: - if abs(sum(diagonal)) == self.gridSize: + if abs(sum(diagonal)) == 3: return True return reduce(operator.mul, sum(self.board, []), 1) def getReward(self): for row in self.board: - if abs(sum(row)) == self.gridSize: - return sum(row) / self.gridSize + if abs(sum(row)) == 3: + return sum(row) / 3 for column in list(map(list, zip(*self.board))): - if abs(sum(column)) == self.gridSize: - return sum(column) / self.gridSize + if abs(sum(column)) == 3: + return sum(column) / 3 for diagonal in [[self.board[i][i] for i in range(len(self.board))], [self.board[i][len(self.board) - i - 1] for i in range(len(self.board))]]: - if abs(sum(diagonal)) == self.gridSize: - return sum(diagonal) / self.gridSize + if abs(sum(diagonal)) == 3: + return sum(diagonal) / 3 return False @@ -92,51 +74,8 @@ def __hash__(self): return hash((self.x, self.y, self.player)) -def main(): - """Example of a NaughtsAndCrossesState game play between MCTS and random searchers. - The standard 3x3 grid is randomly extended up to 10x10 in order to exercise the MCTS time ressource. - One of the two player is randomly assigned to the MCTS searcher for purpose of correctness checking. - A basic statistics is provided at each MCTS turn.""" - - playerNames = NaughtsAndCrossesState.playerNames - mctsPlayer = random.choice(sorted(playerNames.keys())) - gridSize = random.choice(list(range(3,11))) - - currentState = NaughtsAndCrossesState(gridSize) - turn = 0 - currentState.show() - while not currentState.isTerminal(): - turn += 1 - player = currentState.getCurrentPlayer() - action_count = len(currentState.getPossibleActions()) - - if player == mctsPlayer: - searcher = mcts(timeLimit=1_000) - searcherName = "mcts-1-second" - action = searcher.search(initialState=currentState) - statistics = searcher.getStatistics(action) - else: - searcherName = "random" - action = random.choice(currentState.getPossibleActions()) - statistics = None - - currentState = currentState.takeAction(action) - print(f"at turn {turn} player {playerNames[player]}={player} ({searcherName}) takes action {action} amongst {action_count} possibilities") - - if statistics is not None: - print(f"mcts statitics for the chosen action: {statistics['actionTotalReward']} total reward over {statistics['actionNumVisits']} visits") - print(f"mcts statitics for all explored actions: {statistics['rootTotalReward']} total reward over {statistics['rootNumVisits']} visits") - - print('-'*90) - currentState.show() - - print('-'*90) - if currentState.getReward() == 0: - print(f"game {gridSize}x{gridSize} terminates; nobody wins") - else: - print(f"game {gridSize}x{gridSize} terminates; player {playerNames[player]}={player} ({searcherName}) wins") - - -if __name__ == "__main__": - main() +initialState = NaughtsAndCrossesState() +mcts = mcts(timeLimit=1000) +action = mcts.search(initialState=initialState) +print(action) From 50bea15b233c60a034daa2c304e4a7578adaeb22 Mon Sep 17 00:00:00 2001 From: Lucas Borboleta Date: Tue, 12 Jan 2021 16:14:39 +0100 Subject: [PATCH 10/12] Delete .gitignore --- .gitignore | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 .gitignore diff --git a/.gitignore b/.gitignore deleted file mode 100644 index a295864..0000000 --- a/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -*.pyc -__pycache__ From 7f8e1599a357b8abbe2a7f741dc1132bb140ebb9 Mon Sep 17 00:00:00 2001 From: Lucas Borboleta Date: Tue, 12 Jan 2021 20:33:52 +0100 Subject: [PATCH 11/12] Update naughtsandcrosses.py --- naughtsandcrosses.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/naughtsandcrosses.py b/naughtsandcrosses.py index 5b4019a..ffaf0de 100644 --- a/naughtsandcrosses.py +++ b/naughtsandcrosses.py @@ -75,7 +75,7 @@ def __hash__(self): initialState = NaughtsAndCrossesState() -mcts = mcts(timeLimit=1000) -action = mcts.search(initialState=initialState) +searcher = mcts(timeLimit=1000) +action = searcher.search(initialState=initialState) print(action) From 4e62716afdc1691784e33998297067cc4413dc6f Mon Sep 17 00:00:00 2001 From: Lucas Borboleta Date: Wed, 13 Jan 2021 08:29:27 +0100 Subject: [PATCH 12/12] Update naughtsandcrosses.py getReward changed for returning 0 when the game terminates with a draw ; isTerminal changed for returning True or False when no winning case has been detected ; it does not change the way the code works, but it is cleaner regarding types ... and so it helps the reader, in my opinion. --- naughtsandcrosses.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/naughtsandcrosses.py b/naughtsandcrosses.py index ffaf0de..4c528d1 100644 --- a/naughtsandcrosses.py +++ b/naughtsandcrosses.py @@ -39,7 +39,7 @@ def isTerminal(self): [self.board[i][len(self.board) - i - 1] for i in range(len(self.board))]]: if abs(sum(diagonal)) == 3: return True - return reduce(operator.mul, sum(self.board, []), 1) + return reduce(operator.mul, sum(self.board, []), 1) != 0 def getReward(self): for row in self.board: @@ -52,7 +52,7 @@ def getReward(self): [self.board[i][len(self.board) - i - 1] for i in range(len(self.board))]]: if abs(sum(diagonal)) == 3: return sum(diagonal) / 3 - return False + return 0 class Action():