Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@

An automatic essay scoring system based on convolutional and recurrent neural networks, including GRU and LSTM.

## Environment

* Keras 2.0.8
* Theano 0.9.0
* Numpy 1.13.3
* Scipy 1.0.0
* Python 2.7.9

### Set Up ###

* Install Keras (with Theano backend)
Expand Down
8 changes: 4 additions & 4 deletions nea/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def create_model(args, initial_mean_value, overal_maxlen, vocab):
model.add(Dense(num_outputs))
if not args.skip_init_bias:
bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx())
model.layers[-1].b.set_value(bias_value)
model.layers[-1].bias = bias_value
model.add(Activation('sigmoid'))
model.emb_index = 0

Expand All @@ -70,10 +70,10 @@ def create_model(args, initial_mean_value, overal_maxlen, vocab):
model.add(Dense(num_outputs))
if not args.skip_init_bias:
bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx())
model.layers[-1].b.set_value(bias_value)
model.layers[-1].bias = bias_value
model.add(Activation('sigmoid'))
model.emb_index = 0

elif args.model_type == 'breg':
logger.info('Building a BIDIRECTIONAL REGRESSION model')
from keras.layers import Dense, Dropout, Embedding, LSTM, Input, merge
Expand Down Expand Up @@ -130,7 +130,7 @@ def create_model(args, initial_mean_value, overal_maxlen, vocab):
from w2vEmbReader import W2VEmbReader as EmbReader
logger.info('Initializing lookup table')
emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim)
model.layers[model.emb_index].W.set_value(emb_reader.get_emb_matrix_given_vocab(vocab, model.layers[model.emb_index].W.get_value()))
model.layers[model.emb_index].set_weights(emb_reader.get_emb_matrix_given_vocab(vocab, model.layers[model.emb_index].get_weights()))
logger.info(' Done')

return model
7 changes: 4 additions & 3 deletions nea/my_layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,12 @@ def __init__(self, mask_zero=True, **kwargs):

def call(self, x, mask=None):
if self.mask_zero:
return K.cast(x.sum(axis=1) / mask.sum(axis=1, keepdims=True), K.floatx())
mask = K.cast(mask, K.floatx())
return K.cast(K.sum(x, axis=1) / K.sum(mask, axis=1, keepdims= True), K.floatx())
else:
return K.mean(x, axis=1)

def get_output_shape_for(self, input_shape):
def compute_output_shape(self, input_shape):
return (input_shape[0], input_shape[2])

def compute_mask(self, x, mask):
Expand All @@ -73,6 +74,6 @@ class Conv1DWithMasking(Convolution1D):
def __init__(self, **kwargs):
self.supports_masking = True
super(Conv1DWithMasking, self).__init__(**kwargs)

def compute_mask(self, x, mask):
return mask
15 changes: 8 additions & 7 deletions nea/w2vEmbReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ def __init__(self, emb_path, emb_dim=None):
counter = 0
for line in emb_file:
tokens = line.split()
assert len(tokens) == self.emb_dim + 1, 'The number of dimensions does not match the header info'
word = tokens[0]
vec = tokens[1:]
vec = tokens[1].split(',')
assert len(vec) == self.emb_dim, 'The number of dimensions does not match the header info'
self.embeddings[word] = vec
counter += 1
assert counter == self.vocab_size, 'Vocab size does not match the header info'
Expand All @@ -41,13 +41,14 @@ def __init__(self, emb_path, emb_dim=None):
self.embeddings = {}
for line in emb_file:
tokens = line.split()
word = tokens[0]
vec = tokens[1].split(',')
if self.emb_dim == -1:
self.emb_dim = len(tokens) - 1
self.emb_dim = len(vec)
assert self.emb_dim == emb_dim, 'The embeddings dimension does not match with the requested dimension'
else:
assert len(tokens) == self.emb_dim + 1, 'The number of dimensions does not match the header info'
word = tokens[0]
vec = tokens[1:]
assert len(vec) == self.emb_dim, 'The number of dimensions does not match the header info'

self.embeddings[word] = vec
self.vocab_size += 1

Expand All @@ -63,7 +64,7 @@ def get_emb_matrix_given_vocab(self, vocab, emb_matrix):
counter = 0.
for word, index in vocab.iteritems():
try:
emb_matrix[index] = self.embeddings[word]
emb_matrix[0][index] = self.embeddings[word]
counter += 1
except KeyError:
pass
Expand Down
10 changes: 7 additions & 3 deletions train_nea.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/usr/bin/env python

import os
import argparse
import logging
import numpy as np
Expand All @@ -9,6 +10,8 @@
import nea.utils as U
import pickle as pk


os.environ['KERAS_BACKEND']='theano'
logger = logging.getLogger(__name__)

###############################################################################################################################
Expand Down Expand Up @@ -164,9 +167,9 @@
## Plotting model
#

from keras.utils.visualize_util import plot
from keras.utils.vis_utils import plot_model

plot(model, to_file = out_dir + '/model.png')
plot_model(model, to_file = out_dir + '/model.png')

###############################################################################################################################
## Save model architecture
Expand Down Expand Up @@ -194,10 +197,11 @@
total_train_time = 0
total_eval_time = 0


for ii in range(args.epochs):
# Training
t0 = time()
train_history = model.fit(train_x, train_y, batch_size=args.batch_size, nb_epoch=1, verbose=0)
train_history = model.fit(train_x, train_y, batch_size=args.batch_size, epochs=1, verbose=0)
tr_time = time() - t0
total_train_time += tr_time

Expand Down