cmput455-a3/simulate.py at master · heppelle/cmput455-a3 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
#!/usr/local/bin/python3
#/usr/bin/python3
# Set the path to your python3 above

#from gtp_connection_go3 import GtpConnectionGo3
#from gtp_connection import point_to_coord, format_point
from board_util import GoBoardUtil
from pattern_util import PatternUtil
from simple_board import SimpleGoBoard
from ucb import findBest, bestArm, runUcb
import numpy as np
import argparse
import sys

EMPTY = 0
BLACK = 1
WHITE = 2
BORDER = 3
PASS = None
MAXSIZE = 25

def byPercentage(pair):
    return pair[1]

def byCoord(pair):
    return pair[0]

def writeMoves(board, moves, count, numSimulations):
    #Write simulation results for each move.

    gtp_moves = []
    for i in range(len(moves)):
        if moves[i] != None:
            x, y = point_to_coord(moves[i], board.size)
            gtp_moves.append((format_point((x, y)), float(count[i])/float(numSimulations)))
        #else:
        #    gtp_moves.append(('Pass',float(count[i])/float(numSimulations)))
    #sys.stderr.write("win rates: {}\n"
    #                 .format(sorted(gtp_moves, key = byPercentage,
    #                                reverse = True)))

    sorted(gtp_moves, key = byCoord, reverse = True)
    points = []
    probs = []
    for pair in gtp_moves:
        points.append(pair[0])
        probs.append(pair[1])
    total = sum(probs)
    probs_out = []
    for probability in probs:
        probs_out.append((round(probability/total, 3)))
    return points, probs_out

def select_best_move(board, moves, moveWins):
    """
    Move select after the search.
    """
    max_child = np.argmax(moveWins)
    return moves[max_child]

def simulate(board, move, toplay):
    """
    Run a simulate game for a given move.
    """
    cboard = board.copy()
    cboard.play_move(move, toplay)
    opp = GoBoardUtil.opponent(toplay)
    return PatternUtil.playGame(cboard,
                                opp,
                                komi=0,
                                limit=100,
                                random_simulation = True,      #implement a way to change this accordingly
                                use_pattern = False,           #implement a way to change this accordingly
                                check_selfatari = False)       #implement a way to change this accordingly

def simulateMove(board, move, toplay, sim_num):
    """
    Run simulations for a given move.
    """
    wins = 0
    for _ in range(sim_num):
        result = simulate(board, move, toplay)
        if result == toplay:
            wins += 1
    return wins

def get_move(board, color, selection_policy, sim_num, get_best):
    """
    Run one-ply MC simulations to get a move to play.
    """
    #cboard = board.copy()
    emptyPoints = board.get_empty_points()
    moves = []
    for p in emptyPoints:
        if board.is_legal(p, color):
            moves.append(p)
    if not moves:
        return None
    moves.append(None)
    if selection_policy == "ucb":
        C = 0.4 #sqrt(2) is safe, this is more aggressive
        return runUcb(board, C, moves, color, sim_num, get_best)
    else:
        moveWins = []
        for move in moves:
            wins = simulateMove(board, move, color, sim_num)
            moveWins.append(wins)
        if get_best:
            return select_best_move(board, moves, moveWins)
        else:
            return writeMoves(board, moves, moveWins, len(moves)*sim_num)
        #return moveWins
        #return select_best_move(board, moves, moveWins)

def get_pattern_move(board, color, selection_policy, sim_num):

    #Run one-ply MC simulations to get a move to play.
    get_best = False

    emptyPoints = board.get_empty_points()
    moves = []
    for p in emptyPoints:
        if board.is_legal(p, color):
            moves.append(p)
    if not moves:
        return None

    (weights,d, weight_total) = extract_pattern_weights(board, moves, color)

    result = {}
    for i in range(len(moves)):
        result[moves[i]] = d[weights[i]] / weight_total

    return result

def extract_pattern_weights(board, moves, color):
    #Function for taking all currently legal moves, and extracting the mini 3x3 positions around them.
    small_boards = get_small_boards(board,moves,color)

    if color == 2:
        #white player, need to flip board to use for pattern mathcing
        for small_board in small_boards:
            for i in range(0,len(small_board)):
                if(small_board[i]==1):
                    small_board[i] = 2
                elif(small_board[i]==2):
                    small_board[i]=1

    weights = get_weights(small_boards)

    lines = []
    with open("weights") as fp:
        for i, line in enumerate(fp):
            if i in weights:
                lines.append(line[:-1])
    total =0
    dictionary = dict(s.split(' ') for s in lines)
    d = {int(k):float(v) for k,v in dictionary.items()}
    for weight in weights:
        total += d[weight]


    #have the weights we want
    return (weights,d, total)

def get_weights(boards):
    weights = []
    for b in boards:
        temp = ""
        for i in b:
            temp += str(i)
        weights.append(temp)
    baseten=[]

    for weight in weights:
        bten=0
        for i in range(0,len(weight)):

            bten += int(weight[i]) * (4**(7-i))
        baseten.append(bten)

    return baseten


def get_small_boards(board,moves,color):
    #function to get 3x3 board around empty point
    small_boards = []
    for point in moves:
        small_boards.append(board.board[get_neighbors(board, point)])
    return small_boards


def get_neighbors(board, point):
    #adapted from simple_board.py
    return [point - board.NS + 1,  point - board.NS, point - board.NS - 1, point + 1, point-1,point + board.NS + 1, point + board.NS, point + board.NS - 1]

def point_to_coord(point, boardsize):
    """
    Transform point given as board array index
    to (row, col) coordinate representation.
    Special case: PASS is not transformed
    """
    if point == PASS:
        return PASS
    else:
        NS = boardsize + 1
        return divmod(point, NS)

def format_point(move):
    """
    Return move coordinates as a string such as 'a1', or 'pass'.
    """
    #column_letters = "ABCDEFGHJKLMNOPQRSTUVWXYZ"
    column_letters = "abcdefghjklmnopqrstuvwxyz"
    if move == PASS:
        return "pass"
    row, col = move
    if not 0 <= row < MAXSIZE or not 0 <= col < MAXSIZE:
        raise ValueError
    return column_letters[col - 1]+ str(row)

"""
def run(sim, move_select, sim_rule, move_filter):
    #Start the gtp connection and wait for commands.
    board = SimpleGoBoard(7)
    con = GtpConnectionGo3(Go3(sim, move_select, sim_rule, move_filter), board)
    con.start_connection()

def parse_args():
    #Parse the arguments of the program.
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--sim', type=int, default=10, help='number of simulations per move, so total playouts=sim*legal_moves')
    parser.add_argument('--moveselect', type=str, default='simple', help='type of move selection: simple or ucb')
    parser.add_argument('--simrule', type=str, default='random', help='type of simulation policy: random or rulebased')
    parser.add_argument('--movefilter', action='store_true', default=False, help='whether use move filter or not')

    args = parser.parse_args()
    sim = args.sim
    move_select = args.moveselect
    sim_rule = args.simrule
    move_filter = args.movefilter

    if move_select != "simple" and move_select != "ucb":
        print('moveselect must be simple or ucb')
        sys.exit(0)
    if sim_rule != "random" and sim_rule != "rulebased":
        print('simrule must be random or rulebased')
        sys.exit(0)

    return sim, move_select, sim_rule, move_filter

if __name__=='__main__':
    sim, move_select, sim_rule, move_filter = parse_args()
    run(sim, move_select, sim_rule, move_filter)
"""