-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathiterative_language_game.py
More file actions
181 lines (159 loc) · 7.92 KB
/
iterative_language_game.py
File metadata and controls
181 lines (159 loc) · 7.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# reinforcement learning
# more open-ended output space
# temporal component
# Ben comment:
# "
# Been thinking a little more about sequential tasks you could model with the unrolled RNN. It's definitely tricky
# coming up with something that the agents won't always be able to compute, but is still simple enough to model easily.
#
# Could consider something like this:
# 1. Each agent gets a series of random inputs (where each input is a one-hot)
# 2. The task is to determine if the total number of the most common input (across all agents) is even or odd.
#
# This is at least a task where there are two "types" of information the agents need to communicate: which symbol is
# most common in their local input (and maybe how much more common, to determine the globally common symbol), and how
# many of them are there (for even/odd purposes).
#
# This also creates a dynamic where the information they need to be focusing on changes over time if the most common
# symbol (locally or globally) changes over the course of the game as they get more inputs.
#
# Just spitballing here!
# "
# get signals, train transformer on sequence??
import torch
from torch.nn.functional import leaky_relu, one_hot, softmax, adaptive_avg_pool1d
from torch.nn import AdaptiveAvgPool1d
class IterativeGamePool(torch.nn.Module):
def __init__(self, com_size=5, input_size=5, layer_size=25, output_size=5):
super().__init__()
# [environmental input, communication input]
# [environmental output, communication output]
self.com_size = com_size
self.input_size = input_size
self.output_size = output_size
self.A1W1 = torch.nn.Linear(input_size + com_size, layer_size)
self.A1W2 = torch.nn.Linear(layer_size, output_size + com_size)
# self.A1H = torch.nn.Linear(input_size + com_size + hidden_size, layer_size)
self.A2W1 = torch.nn.Linear(input_size + com_size, layer_size)
self.A2W2 = torch.nn.Linear(layer_size, output_size + com_size)
# self.A2H = torch.nn.Linear(input_size + com_size + hidden_size, layer_size)
self.optim = torch.optim.Adam(self.parameters())
def forward(self, I1, I2):
# get outputs and signals at each timestep
# agent1 gets input1 and 0 communication, passes output through communication space
com1 = torch.zeros((I1.size()[0], self.com_size))
out2 = torch.zeros(I1.size()[0], self.output_size)
com2 = torch.zeros((I1.size()[0], self.com_size))
out1 = torch.zeros(I1.size()[0], self.output_size)
for t in range(I1.size()[0]):
com1[t] = softmax(35 * leaky_relu(
self.A1W2(leaky_relu(self.A1W1(
torch.hstack((I1[0], adaptive_avg_pool1d(com2[:t + 1].float().T, 1).T[0])).unsqueeze(0)))))[0,
-self.com_size:], dim=0)
# agent2 gets input2 and the communication from agent1, generates output behavior and passes signal to agent1
fullout2 = leaky_relu(self.A2W2(
leaky_relu(self.A2W1(torch.hstack((I2[0], adaptive_avg_pool1d(com1[:t + 1].float().T, 1).T[0]))))))
out2[t] = fullout2[:-self.com_size]
com2[t] = softmax(35 * fullout2[-self.com_size:], dim=0)
# agent1 gets input1 and signal from agent2, generates output behavior
out1[t] = leaky_relu(self.A1W2(
leaky_relu(self.A1W1(torch.hstack((I1[0], adaptive_avg_pool1d(com2[:t + 1].float().T, 1).T[0]))))))[
:-self.com_size]
return (out1, out2)
class IterativeNoCom(torch.nn.Module):
def __init__(self, input_size=5, layer_size=25, output_size=5):
super().__init__()
# [environmental input, communication input]
# [environmental output, communication output]
self.input_size = input_size
self.output_size = output_size
self.A1W1 = torch.nn.Linear(input_size, layer_size)
self.A1W2 = torch.nn.Linear(layer_size, output_size)
# self.A1H = torch.nn.Linear(input_size + com_size + hidden_size, layer_size)
self.A2W1 = torch.nn.Linear(input_size, layer_size)
self.A2W2 = torch.nn.Linear(layer_size, output_size)
# self.A2H = torch.nn.Linear(input_size + com_size + hidden_size, layer_size)
self.optim = torch.optim.Adam(self.parameters())
def forward(self, I1, I2):
# get outputs and signals at each timestep
out2 = torch.zeros(I1.size()[0], self.output_size)
out1 = torch.zeros(I1.size()[0], self.output_size)
for t in range(I1.size()[0]):
# agent2 gets input2, generates output behavior and passes signal to agent1
out1 = leaky_relu(self.A1W2(leaky_relu(self.A1W1(I1[0]))))
# agent1 gets input1 from agent2, generates output behavior
out2 = leaky_relu(self.A2W2(leaky_relu(self.A2W1(I2[0]))))
return (out1, out2)
class IterativeMask(torch.nn.Module):
def __init__(self, model):
super().__init__()
self.model = model
self.W = torch.nn.Linear(model.output_size, 1)
self.optim = torch.optim.Adam(self.parameters())
def forward(self, I1, I2):
# get outputs and signals at each timestep
outs = self.model.forward(I1, I2)
return self.W(outs[0][-1])
def simple_pretraining(model, steps, n):
# communicate most common vector
print('setting up training environment...')
ins1 = [torch.stack([one_hot(torch.randint(5, (1, 1)), 5)[0, 0] for i in range(steps)]) for i in range(n)]
ins2 = [torch.stack([one_hot(torch.randint(5, (1, 1)), 5)[0, 0] for i in range(steps)]) for i in range(n)]
# keep track at each step
actuals = one_hot(torch.argmax(torch.stack([torch.bincount(torch.tensor(
[torch.argmax(ins1[i][x]) for x in range(steps)] + [torch.argmax(ins2[i][x]) for x in range(steps)]),
minlength=5) for i in range(n)]), dim=1))
print('setup complete. training...')
running_loss = 0
for i in range(n):
model.optim.zero_grad()
predicted = torch.mean(model.forward(ins1[i].type(torch.FloatTensor), ins2[i].type(torch.FloatTensor))[:][-1],
dim=0)
loss = torch.sum(torch.sub(predicted, actuals[i]) ** 2)
loss.backward()
model.optim.step()
# print statistics
running_loss += loss.item()
if i % 2000 == 1999: # print every 2000 mini-batches
print(f'[{i + 1:5d}] loss: {running_loss / 2000:.3f}')
print(predicted)
print(actuals[i])
running_loss = 0.0
return running_loss
def iterative_training(model, steps, n):
print('setting up training environment...')
ins1 = [torch.stack([one_hot(torch.randint(5, (1, 1)), 5)[0, 0] for i in range(steps)]) for i in range(n)]
ins2 = [torch.stack([one_hot(torch.randint(5, (1, 1)), 5)[0, 0] for i in range(steps)]) for i in range(n)]
print('setup complete. training...')
# 1 if the most common vector appears an odd number of times, 0 otherwise
actuals = torch.max(torch.stack([torch.bincount(torch.tensor(
[torch.argmax(ins1[i][x]) for x in range(steps)] + [torch.argmax(ins2[i][x]) for x in range(steps)]),
minlength=5) for i in range(n)]), dim=1).values % 2
running_loss = 0
for i in range(n):
model.optim.zero_grad()
predicted = torch.mean(model.forward(ins1[i].type(torch.FloatTensor), ins2[i].type(torch.FloatTensor))[0][-1])
loss = torch.sub(predicted, actuals[i]) ** 2
loss.backward()
model.optim.step()
# print statistics
running_loss += loss.item()
if i % 2000 == 1999: # print every 2000 mini-batches
print(f'[{i + 1:5d}] loss: {running_loss / 2000:.3f}')
running_loss = 0.0
# train several models for 2000 rounds, pick best to keep training
rnn = IterativeNoCom(output_size=1)
iterative_training(rnn, 10, 10000)
# .249
# .247
# .745
# .76
# .258 vs .264
# .781
# .264 vs .264
# .776
# .265 vs .263
# .759
# .262 vs .259
# masked
# .252