-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmodels.py
More file actions
114 lines (83 loc) · 4.75 KB
/
models.py
File metadata and controls
114 lines (83 loc) · 4.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import os
from keras.layers.embeddings import Embedding
from keras.layers import Input, concatenate
from keras.layers.normalization import BatchNormalization
from keras.layers.recurrent import LSTM
from keras.layers.core import Dropout, Dense
from keras.models import Model
import numpy as np
import io
def get_embeddings_index(glove_dir):
embeddings_index = {}
with io.open(os.path.join(glove_dir, 'glove.6B.100d.txt'), mode='r', encoding='utf8') as embedding:
for line in embedding:
values = line.split()
word = values[0]
coefs = np.asarray(values[1:], dtype='float32')
embeddings_index[word] = coefs
return embeddings_index
def get_embedding_matrix(word_index, embedding_dim, embeddings_index):
embedding_matrix = np.zeros((len(word_index) + 1, embedding_dim))
for word, i in word_index.items():
embedding_vector = embeddings_index.get(word)
if embedding_vector is not None:
# words not found in embedding index will be all-zeros.
embedding_matrix[i] = embedding_vector
return embedding_matrix
def lstm_model(headline_length, body_length, embedding_dim, word_index, embedding_matrix, activation, numb_layers, drop_out, cells):
headline_embedding_layer = Embedding(len(word_index) + 1, embedding_dim, weights=[embedding_matrix],
input_length=headline_length, trainable=False)
bodies_embedding_layer = Embedding(len(word_index) + 1, embedding_dim, weights=[embedding_matrix],
input_length=body_length, trainable=False)
headline_input = Input(shape=(headline_length,), dtype='int32')
headline_embedding = headline_embedding_layer(headline_input)
headline_lstm = LSTM(cells)(headline_embedding)
body_input = Input(shape=(body_length,), dtype='int32')
body_embedding = bodies_embedding_layer(body_input)
body_lstm = LSTM(cells)(body_embedding)
concat = concatenate([headline_lstm, body_lstm])
preds = Dense(4, activation='softmax')(concat)
fake_nn = Model([headline_input, body_input], outputs=preds)
print(fake_nn.summary())
fake_nn.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
return fake_nn
def lstm_model_with_featrues(globel_vectors, headline_length, body_length, embedding_dim, word_index, embedding_matrix,
activation, numb_layers, drop_out, cells):
headline_embedding_layer = Embedding(len(word_index) + 1, embedding_dim, weights=[embedding_matrix],
input_length=headline_length, trainable=False)
bodies_embedding_layer = Embedding(len(word_index) + 1, embedding_dim, weights=[embedding_matrix],
input_length=body_length, trainable=False)
headline_input = Input(shape=(headline_length,), dtype='int32')
headline_embedding = headline_embedding_layer(headline_input)
headline_lstm = LSTM(cells)(headline_embedding)
body_input = Input(shape=(body_length,), dtype='int32')
body_embedding = bodies_embedding_layer(body_input)
body_lstm = LSTM(cells)(body_embedding)
global_vector_input = Input(shape=(globel_vectors,), dtype='float32')
concat = concatenate([headline_lstm, body_lstm, global_vector_input])
preds = Dense(4, activation='softmax')(concat)
fake_nn = Model([headline_input, body_input, global_vector_input], outputs=preds)
print(fake_nn.summary())
fake_nn.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
return fake_nn
def lstm_with_combine_headline_body(headline_length, body_length, embedding_dim, word_index, embedding_matrix, activation, numb_layers, drop_out, cells):
embedding_layer = Embedding(len(word_index) + 1, embedding_dim, weights=[embedding_matrix],
input_length=headline_length + body_length, trainable=False)
input = Input(shape=(headline_length + body_length,), dtype='int32')
embedding = embedding_layer(input)
lstm = LSTM(cells)(embedding)
preds = Dense(4, activation='softmax')(lstm)
fake_nn = Model(input, outputs=preds)
print(fake_nn.summary())
fake_nn.compile(loss="categorical_crossentropy", optimizer='adam', metrics=['acc'])
return fake_nn
def feed_forward_network(input_vector, activation, numb_layers, drop_out):
input = Input(shape=(input_vector,), dtype='float32')
dense = Dense(numb_layers, activation=activation)(input)
dropout = Dropout(drop_out)(dense)
normalize = BatchNormalization()(dropout)
preds = Dense(4, activation='softmax')(normalize)
fake_nn = Model(input, outputs=preds)
print(fake_nn.summary())
fake_nn.compile(loss="categorical_crossentropy", optimizer='adam', metrics=['acc'])
return fake_nn