-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy patheval.py
More file actions
114 lines (83 loc) · 3.31 KB
/
eval.py
File metadata and controls
114 lines (83 loc) · 3.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import random
from collections import Counter
import numpy as np
from lstm import LSTM
import datahandling as dh
import embeddings as emb
def evaluate(model, data, embedding):
predictions = Counter()
occurances = Counter()
correct_predicitons = Counter()
test_len = len(data)
j = 0
total = 0.0
correct = 0.0
for story, answers in data:
j += 1
print('{}/{}'.format(j, test_len), end='\r')
current_answer = 0
for word in story:
model.read(embedding[word])
if word == '?':
ans = answers[current_answer]
occurances[ans] += 1
current_answer += 1
prediction_vector = np.array(model.answer().value())
prediction = embedding.wv.most_similar(positive=[prediction_vector],
negative=[])[0][0]
predictions[prediction] += 1
total += 1.0
if ans == prediction:
correct_predicitons[prediction] += 1
correct += 1.0
model.reset()
print('{: >15} {: >15} {: >15} {: >15}'.format('Prediction', 'Count', 'Actual',
'Accuracy'))
total = sum(occurances.values())
for word, n in predictions.most_common(10):
accuracy = round(100 * correct_predicitons[word] / n, 2)
print('{: >15} {: >15} {: >15} {: >15}'.format(word, n, occurances[word], accuracy))
succes_rate = correct/total
return succes_rate
def main():
model = LSTM(10)
data = dh.TaskData(1, 'english')
embedding = emb.load_embedding(1, 'english')
#set train length to 1000
train_len = 1500
for epoch in range(1000):
epoch_loss = []
j = 0
total = 0.0
correct = 0.0
#randomly shuffle before each epoch
random.shuffle(data.train_data)
for story, answers in data.train_data[:train_len]:
j += 1
print('{}/{}'.format(j, train_len), end='\r')
current_answer = 0
for word in story:
model.read(embedding[word])
if word == '?':
ans = answers[current_answer]
current_answer += 1
prediction_vector = np.array(model.answer().value())
prediction = embedding.wv.most_similar(positive=[prediction_vector],
negative=[])[0][0]
epoch_loss.append(model.train(embedding[ans]))
total += 1.0
if ans == prediction:
correct += 1.0
#story_loss = model.backprop()
#epoch_loss.append(story_loss)
model.reset()
train_acc = correct/total
loss = sum(epoch_loss) / len(epoch_loss)
print('Epoch: {} Loss: {} Train Accuracy: {}'.format(epoch, loss, train_acc))
validation_rate = evaluate(model, data.valid_data, embedding)
print('Validation Success Rate: {}'.format(validation_rate))
model.save('saved_models/rnn/epoch-{}'.format(epoch))
test_rate = evaluate(model, data.test_data, embedding)
print('Test Success Rate: {}'.format(test_rate))
if __name__ == '__main__':
main()