Language-Games/iterative_language_game.py at main · MacObri/Language-Games · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# reinforcement learning
# more open-ended output space
# temporal component
# Ben comment:
# "
# Been thinking a little more about sequential tasks you could model with the unrolled RNN. It's definitely tricky
# coming up with something that the agents won't always be able to compute, but is still simple enough to model easily.
#
# Could consider something like this:
# 1. Each agent gets a series of random inputs (where each input is a one-hot)
# 2. The task is to determine if the total number of the most common input (across all agents) is even or odd.
#
# This is at least a task where there are two "types" of information the agents need to communicate: which symbol is
# most common in their local input (and maybe how much more common, to determine the globally common symbol), and how
# many of them are there (for even/odd purposes).
#
# This also creates a dynamic where the information they need to be focusing on changes over time if the most common
# symbol (locally or globally) changes over the course of the game as they get more inputs.
#
# Just spitballing here!
# "

# get signals, train transformer on sequence??


import torch
from torch.nn.functional import leaky_relu, one_hot, softmax, adaptive_avg_pool1d
from torch.nn import AdaptiveAvgPool1d


class IterativeGamePool(torch.nn.Module):
    def __init__(self, com_size=5, input_size=5, layer_size=25, output_size=5):
        super().__init__()
        # [environmental input, communication input]
        # [environmental output, communication output]
        self.com_size = com_size
        self.input_size = input_size
        self.output_size = output_size
        self.A1W1 = torch.nn.Linear(input_size + com_size, layer_size)
        self.A1W2 = torch.nn.Linear(layer_size, output_size + com_size)
        # self.A1H = torch.nn.Linear(input_size + com_size + hidden_size, layer_size)
        self.A2W1 = torch.nn.Linear(input_size + com_size, layer_size)
        self.A2W2 = torch.nn.Linear(layer_size, output_size + com_size)
        # self.A2H = torch.nn.Linear(input_size + com_size + hidden_size, layer_size)
        self.optim = torch.optim.Adam(self.parameters())

    def forward(self, I1, I2):
        # get outputs and signals at each timestep
        # agent1 gets input1 and 0 communication, passes output through communication space
        com1 = torch.zeros((I1.size()[0], self.com_size))
        out2 = torch.zeros(I1.size()[0], self.output_size)
        com2 = torch.zeros((I1.size()[0], self.com_size))
        out1 = torch.zeros(I1.size()[0], self.output_size)
        for t in range(I1.size()[0]):
            com1[t] = softmax(35 * leaky_relu(
                self.A1W2(leaky_relu(self.A1W1(
                    torch.hstack((I1[0], adaptive_avg_pool1d(com2[:t + 1].float().T, 1).T[0])).unsqueeze(0)))))[0,
                                   -self.com_size:], dim=0)
            # agent2 gets input2 and the communication from agent1, generates output behavior and passes signal to agent1
            fullout2 = leaky_relu(self.A2W2(
                leaky_relu(self.A2W1(torch.hstack((I2[0], adaptive_avg_pool1d(com1[:t + 1].float().T, 1).T[0]))))))
            out2[t] = fullout2[:-self.com_size]
            com2[t] = softmax(35 * fullout2[-self.com_size:], dim=0)
            # agent1 gets input1 and signal from agent2, generates output behavior
            out1[t] = leaky_relu(self.A1W2(
                leaky_relu(self.A1W1(torch.hstack((I1[0], adaptive_avg_pool1d(com2[:t + 1].float().T, 1).T[0]))))))[
                      :-self.com_size]
        return (out1, out2)


class IterativeNoCom(torch.nn.Module):
    def __init__(self, input_size=5, layer_size=25, output_size=5):
        super().__init__()
        # [environmental input, communication input]
        # [environmental output, communication output]
        self.input_size = input_size
        self.output_size = output_size
        self.A1W1 = torch.nn.Linear(input_size, layer_size)
        self.A1W2 = torch.nn.Linear(layer_size, output_size)
        # self.A1H = torch.nn.Linear(input_size + com_size + hidden_size, layer_size)
        self.A2W1 = torch.nn.Linear(input_size, layer_size)
        self.A2W2 = torch.nn.Linear(layer_size, output_size)
        # self.A2H = torch.nn.Linear(input_size + com_size + hidden_size, layer_size)
        self.optim = torch.optim.Adam(self.parameters())

    def forward(self, I1, I2):
        # get outputs and signals at each timestep
        out2 = torch.zeros(I1.size()[0], self.output_size)
        out1 = torch.zeros(I1.size()[0], self.output_size)
        for t in range(I1.size()[0]):
            # agent2 gets input2, generates output behavior and passes signal to agent1
            out1 = leaky_relu(self.A1W2(leaky_relu(self.A1W1(I1[0]))))
            # agent1 gets input1 from agent2, generates output behavior
            out2 = leaky_relu(self.A2W2(leaky_relu(self.A2W1(I2[0]))))
        return (out1, out2)


class IterativeMask(torch.nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model = model
        self.W = torch.nn.Linear(model.output_size, 1)
        self.optim = torch.optim.Adam(self.parameters())

    def forward(self, I1, I2):
        # get outputs and signals at each timestep
        outs = self.model.forward(I1, I2)
        return self.W(outs[0][-1])


def simple_pretraining(model, steps, n):
    # communicate most common vector
    print('setting up training environment...')
    ins1 = [torch.stack([one_hot(torch.randint(5, (1, 1)), 5)[0, 0] for i in range(steps)]) for i in range(n)]
    ins2 = [torch.stack([one_hot(torch.randint(5, (1, 1)), 5)[0, 0] for i in range(steps)]) for i in range(n)]
    # keep track at each step
    actuals = one_hot(torch.argmax(torch.stack([torch.bincount(torch.tensor(
        [torch.argmax(ins1[i][x]) for x in range(steps)] + [torch.argmax(ins2[i][x]) for x in range(steps)]),
        minlength=5) for i in range(n)]), dim=1))
    print('setup complete. training...')
    running_loss = 0
    for i in range(n):
        model.optim.zero_grad()
        predicted = torch.mean(model.forward(ins1[i].type(torch.FloatTensor), ins2[i].type(torch.FloatTensor))[:][-1],
                               dim=0)
        loss = torch.sum(torch.sub(predicted, actuals[i]) ** 2)
        loss.backward()
        model.optim.step()
        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:  # print every 2000 mini-batches
            print(f'[{i + 1:5d}] loss: {running_loss / 2000:.3f}')
            print(predicted)
            print(actuals[i])
            running_loss = 0.0
    return running_loss


def iterative_training(model, steps, n):
    print('setting up training environment...')
    ins1 = [torch.stack([one_hot(torch.randint(5, (1, 1)), 5)[0, 0] for i in range(steps)]) for i in range(n)]
    ins2 = [torch.stack([one_hot(torch.randint(5, (1, 1)), 5)[0, 0] for i in range(steps)]) for i in range(n)]
    print('setup complete. training...')
    # 1 if the most common vector appears an odd number of times, 0 otherwise
    actuals = torch.max(torch.stack([torch.bincount(torch.tensor(
        [torch.argmax(ins1[i][x]) for x in range(steps)] + [torch.argmax(ins2[i][x]) for x in range(steps)]),
        minlength=5) for i in range(n)]), dim=1).values % 2
    running_loss = 0
    for i in range(n):
        model.optim.zero_grad()
        predicted = torch.mean(model.forward(ins1[i].type(torch.FloatTensor), ins2[i].type(torch.FloatTensor))[0][-1])
        loss = torch.sub(predicted, actuals[i]) ** 2
        loss.backward()
        model.optim.step()
        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:  # print every 2000 mini-batches
            print(f'[{i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0


# train several models for 2000 rounds, pick best to keep training
rnn = IterativeNoCom(output_size=1)
iterative_training(rnn, 10, 10000)

# .249
# .247

# .745

# .76
# .258 vs .264
# .781
# .264 vs .264
# .776
# .265 vs .263
# .759
# .262 vs .259

# masked
# .252