diff --git a/README.md b/README.md index f5b84b1..f5c98c0 100644 --- a/README.md +++ b/README.md @@ -9,17 +9,18 @@ System requrements: To simplify the obtaining dataset, please use get_training_data script. ``` pip3 install python-mnist numpy -python3 get_training_data.py +python get_training_data.py ``` ## Getting started neural_network.py implemented NeuralNetwork class and related functionality. Run example: ``` -python3 neural_network.py train-images/ 10 0.05 0.01 100 +python neural_network.py train-images/ 30 0.1 300 10 60 ``` Inputs: - - folder with training data (in .gz format) - - stop criterion by the number of eras - - stop criterion for minimizing cross-entropy - - learning Speed - - the number of neurons in the hidden layer + - [data folder] folder with training data (in .gz format) + - [epochs] stop criterion by the number of eras + - [learn rate] + - [hidden size] the number of neurons in the hidden layer + - [output size] the number of neurons in the output layer + - [batch_size] \ No newline at end of file diff --git a/neural_network.py b/neural_network.py index b30b88a..fdb4337 100644 --- a/neural_network.py +++ b/neural_network.py @@ -2,130 +2,111 @@ from datetime import datetime import numpy as np from mnist import MNIST -INPUT_SIZE = 784 -OUTPUT_SIZE = 10 -def unison_shuffled_copies(a, b): - p = np.random.permutation(len(a)) - return a[p], b[p] -def logistic(arg): + +def mix(x, y): + random_state = np.random.get_state() + np.random.shuffle(x) + np.random.set_state(random_state) + np.random.shuffle(y) + return x, y + +def relu(arg): return 1 / (1 + np.exp(-arg)) + def softmax(arg): - res = np.zeros(arg.shape) - sumres = 0 - for i, row in enumerate(arg): - res[i] = np.exp(row) - sumres += res[i].sum() - return res/sumres + return np.exp(arg) / np.sum(np.exp(arg), axis=0) + def deriv(func, arg): - return func(arg) * (1 - func(arg)) + return np.vectorize(relu_der_s)(arg) + +def relu_der_s(x): + return 1 if x > 0 else 0 + class NeuralNetwork: - weights_layers = [[], []] - hidden_layer = np.array([]) - input_layer = np.array([]) - output_layer = np.array([]) - output_layer_expected = np.array([]) - epochs = 100 - cross_entropy_min = 0.05 - learn_rate = 0.01 - hidden_size = 300 - def __init__(self, epochs, cross_entropy, learn_rate, hidden_size): - self.epochs = epochs - self.cross_entropy_min = cross_entropy + def __init__(self, hidden_nodes=40, output_nodes=10, learn_rate= 0.1): + self.input_nodes = 0 + self.hidden_nodes = hidden_nodes + self.output_nodes = output_nodes + self.w1 = np.array([]) + self.w2 = np.array([]) self.learn_rate = learn_rate - self.hidden_size = hidden_size - self.hidden_layer = np.zeros(hidden_size) - def reset_weights(self): - self.weights_layers[0] = 2*np.random.rand(INPUT_SIZE, self.hidden_size) -1 - self.weights_layers[1] = 2*np.random.rand(self.hidden_size, OUTPUT_SIZE) -1 - def __calc_hidden(self): - self.hidden_layer = logistic(np.dot(self.input_layer, self.weights_layers[0])) - def __calc_output(self): - self.__calc_hidden() - self.output_layer = softmax(np.dot(self.hidden_layer, self.weights_layers[1])) - def __correct_weights(self): - gradient_weights = [ - np.zeros((INPUT_SIZE, self.hidden_size)), - np.zeros((self.hidden_size, OUTPUT_SIZE)) - ] - delta1 = np.zeros(self.hidden_size) - delta2 = np.zeros(OUTPUT_SIZE) - for i in range(self.hidden_size): - delta2 = self.output_layer - self.output_layer_expected - gradient_weights[1][i] = np.dot(delta2, self.hidden_layer[i]) - for i in range(self.hidden_size): - delta1[i] += np.dot(delta2, self.weights_layers[1][i]) * deriv(logistic, self.hidden_layer[i]) - for i in range(INPUT_SIZE): - gradient_weights[0][i] = np.dot(delta1, self.input_layer[i]) - #correct weights - for layer in range(1): - self.weights_layers[layer] -= self.learn_rate * gradient_weights[layer] - def __set_input(self, input_layer, label): - self.input_layer = input_layer - self.output_layer_expected = label - def train(self, data, labels): - for epoch in range(self.epochs): - correct = 0 - data, labels = unison_shuffled_copies(data, labels) - for i in range(len(data)): - #if i % 1000 == 1: - # print(i, self.output_layer.max(), self.output_layer.argmax(), self.output_layer_expected.argmax()) - self.__set_input(data[i], labels[i]) - self.__calc_output() - if self.output_layer.argmax() == self.output_layer_expected.argmax(): - correct += 1 - self.__correct_weights() - precision = correct / len(data) - #calc cross entropy - cross_entropy = 0 - for i in range(len(data)): - self.__set_input(data[i], labels[i]) - index = self.output_layer_expected.argmax() - self.__calc_output() - cross_entropy -= np.log(self.output_layer[index]) - cross_entropy = cross_entropy / len(data) - print(str(datetime.now()), 'Epoch:', epoch, 'Cross entropy:', cross_entropy, 'Precision:', precision) - if cross_entropy < self.cross_entropy_min: - break + self.batch_size = 0 + + def __initialize_weights(self): + self.w1 = 2*np.random.rand(self.hidden_nodes, self.input_nodes) -1 + self.w2 = 2*np.random.rand(self.output_nodes, self.hidden_nodes) -1 + + def __calc_hidden(self, input): + self.w1_dot = np.dot(self.w1, input.transpose()) + self.w1_updated = relu(self.w1_dot) + + def __calc_output(self, input): + self.__calc_hidden(input) + self.w2_dot = np.dot(self.w2, self.w1_updated) + self.w2_updated = softmax(self.w2_dot) + + def __back(self, temp_output_layer, output_layer_expected): + delta2 = output_layer_expected.transpose() - self.w2_updated + dws = np.dot(delta2, self.w1_updated.transpose())/self.batch_size + + delta1 = np.dot(self.w2.transpose(), delta2) * deriv(relu, self.w1_dot) + dwh = np.dot(delta1, temp_output_layer)/self.batch_size + + self.w2 = self.w2 + self.learn_rate * dws + self.w1 = self.w1 + self.learn_rate * dwh + + def train(self, data, labels, batch_size, epochs): + self.batch_size = batch_size + self.input_nodes = data.shape[1] + self.__initialize_weights() + + for epoch in range(epochs): + data, labels = mix(data, labels) + for i in range(0, data.shape[0], self.batch_size): + self.__calc_output(data[i:i + self.batch_size]) + self.__back(data[i:i + self.batch_size], labels[i:i + self.batch_size]) + def test(self, data, labels): - correct = 0 - for i in range(len(data)): - self.__set_input(data[i], labels[i]) - self.__calc_output() - if self.output_layer_expected[self.output_layer.argmax()] == 1: - correct += 1 - return correct / len(data) -def read_mnist_data(data_folder): + self.__calc_output(data) + crossentropy = -np.sum(labels * np.log(self.w2_updated.transpose())) / data.shape[0] + + result_net = np.argmax(self.w2_updated, axis=0) + result_real = np.argmax(labels, axis=1) + accuracy = (result_net == result_real).mean() + + return crossentropy, accuracy + +def read_mnist_data(data_folder, output_nodes): mndata = MNIST(data_folder) mndata.gz = True train_images, train_labels = mndata.load_training() test_images, test_labels = mndata.load_testing() - np_train_labels = np.zeros((len(train_labels), OUTPUT_SIZE)) + np_train_labels = np.zeros((len(train_labels), output_nodes), dtype='float32') for i in range(len(train_labels)): np_train_labels[i][train_labels[i]] = 1 - np_test_labels = np.zeros((len(test_labels), OUTPUT_SIZE)) + np_test_labels = np.zeros((len(test_labels), output_nodes), dtype='float32') for i in range(len(test_labels)): np_test_labels[i][test_labels[i]] = 1 return np.array(train_images)/255, np_train_labels, np.array(test_images)/255, np_test_labels + def main(argv): - if len(argv) != 6: + if len(argv) != 7: print("""Usage: -python neural_network.py [data folder] [epochs] [max error] [learn rate] [hidden size]""") +python neural_network.py [data folder] [epochs] [learn rate] [hidden size] [output size] [batch_size]""") sys.exit() else: data_folder = argv[1] epochs = int(argv[2]) - cross_entropy = float(argv[3]) - learn_rate = float(argv[4]) - hidden_size = int(argv[5]) - #print(data_folder, epochs, cross_entropy, learn_rate, hidden_size) + learn_rate = float(argv[3]) + hidden_nodes = int(argv[4]) + output_nodes = int(argv[5]) + batch_size = int(argv[6]) print('Loading data from ', data_folder) - train_images, train_labels, test_images, test_labels = read_mnist_data(data_folder) + train_images, train_labels, test_images, test_labels = read_mnist_data(data_folder, output_nodes) print('Found', len(train_images), 'training images') print('Found', len(test_images), 'testing images') - network = NeuralNetwork(epochs, cross_entropy, learn_rate, hidden_size) - network.reset_weights() - print(str(datetime.now()), 'Initialization successful, training network...') - network.train(train_images, train_labels) + network = NeuralNetwork(hidden_nodes, output_nodes, learn_rate) + network.train(train_images, train_labels, batch_size, epochs) print(str(datetime.now()), 'Training ended') train_result = network.test(train_images, train_labels) print(str(datetime.now()), 'Training data result:', train_result)