-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathnode.py
More file actions
107 lines (96 loc) · 4.27 KB
/
node.py
File metadata and controls
107 lines (96 loc) · 4.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
from __future__ import division
import numpy as np
import math
class Node:
def __init__(self, value, children_values, parent=None, level=0, position=None, struct=None, v=0, w=0.0,
children=None, min_range=float(math.pow(10, 10)), max_range=0.0, adjust_val=1):
self.value = value
self.parent = parent
self.level = level
self.position = position
self.struct = struct
self.v = v
self.w = w
self.children = children
self.min_range = min_range
self.max_range = max_range
self.adjust_val = adjust_val
self.children_values = children_values
def has_children(self):
return self.children is not None
def has_all_children(self):
if self.has_children():
return len(self.children) == len(self.children_values)
else:
return False
def select(self, max_flag, ucb_mean):
if self.has_all_children():
#print(self.adjust_val)
c = 2#((math.sqrt(2)/2)*(self.max_range-self.min_range)) #*self.adjust_val
u_scores = {}
if max_flag:
for child in iter(self.children.values()):
if child is not None:
if ucb_mean:
u_scores[child.value] = ((child.w / child.v) + (c * (math.sqrt((2 * math.log(self.v)) /
child.v))))
else:
u_scores[child.value] = child.max_range + (c * (math.sqrt((2 * math.log(self.v)) / child.v)))
max_idxs = [i for i, x in iter(u_scores.items()) if x == max(iter(u_scores.values()))]
idx = np.random.choice(max_idxs)
else:
for child in iter(self.children.values()):
if child is not None:
if ucb_mean:
u_scores[child.value] = ((child.w / child.v) - (c * (math.sqrt((2 * math.log(self.v)) /
child.v))))
else:
u_scores[child.value] = child.min_range + (c * (math.sqrt((2 * math.log(self.v)) / child.v)))
min_idxs = [i for i, x in iter(u_scores.items()) if x == min(iter(u_scores.values()))]
idx = np.random.choice(min_idxs)
return self.children[idx].select(max_flag, ucb_mean)
else:
return self
def bck_prop(self, e):
self.v += 1
self.w += e
if e > self.max_range:
self.max_range = e
if e < self.min_range:
self.min_range = e
if self.parent is not None:
return self.parent.bck_prop(e)
def adjust_c(self, adjust_value):
self.adjust_val += adjust_value
if self.parent is not None:
return self.parent.adjust_c(adjust_value)
def expand(self, position, expand_children):
if self.children is None:
self.children = {}
expanded = []
avl_child_values = list(set(self.children_values) - set(self.children.keys()))
if expand_children > len(avl_child_values):
no_chosen_values = len(avl_child_values)
else:
no_chosen_values = expand_children
chosen_values = np.random.choice(avl_child_values, no_chosen_values, replace=False)
for child_value in chosen_values:
child_struct = self.struct[:]
child_struct[position] = child_value
self.children[child_value] = Node(value=child_value, children_values=self.children_values, parent=self,
level=self.level + 1, position=position, struct=child_struct)
expanded.append(self.children[child_value])
return expanded
def get_info(self):
nodes = 1
visits = self.v
depth = self.level
if self.has_children():
for child in iter(self.children.values()):
if child is not None:
x, y, z = child.get_info()
nodes +=x
visits += y
if z > depth:
depth = z
return nodes, visits, depth