ML_Spring_2018/conceptual_neural_net at master · pacopoler/ML_Spring_2018 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
from __future__ import division
import numpy as np
import random

def relu_func(i):
    return np.maximum(i, i / 100) #this is actually leaky relu

def relu_der(i):
    try:
        if i < 0:
            return 1 / 100
        else:
            return 1
    except ValueError:
        der = []
        for j in i:
            if j[0] < 0:
                der.append([1 / 100])
            else:
                der.append(1)
        return np.array(der)

def sig_func(i):
    return 1 / (1 + np.exp(-1 * i))

def sig_der(i):
    return i * (1 - i)


class ActivationFunction:
    def __init__(self, function, derivative):
        self.compute = function
        self.derivative = derivative

class Neuron:
    relu = ActivationFunction(relu_func, relu_der)
    sigmoid = ActivationFunction(sig_func, sig_der)

    def __init__(self, inputs, dataset_size, weights = False, layer = "h"):
        self.dataset_size = dataset_size
        self.inputs = inputs
        self.bias = random.random()
        try:
            len(weights)
            self.weights = weights
        except TypeError:
            self.weights = np.array([[random.random() / 10.0 for i in range(inputs.shape[1])]])
        if layer == "h":
            self.act_func = Neuron.relu
        elif layer == "o":
            self.act_func = Neuron.sigmoid

    def __calc_input_to_act_func(self, inputs = False): #doesn't like that I'm using a field from the instance in here...
        try:
            try:
                # print 0, inputs
                inputs.transpose() #this tests if 'inputs' is still "False"
                # print self.weights, self.inputs.transpose(), self.weights.dot(inputs.transpose()), (1, self.dataset_size)
                return self.weights.dot(inputs.transpose()).reshape((1,self.dataset_size))
            except AttributeError:
                # print 1, self.inputs
                return self.weights.dot(self.inputs.transpose())
        except TypeError, ValueError: #this identifies if the inputs are neurons or actual numbers
            # print 2, inputs, self
            try:
                inputs.transpose() #this tests if 'inputs' is still "False"
                new_inputs = np.array([[neuron.calc_output() for neuron in inputs[0]]])
            except AttributeError:
                # print 3, self.inputs
                new_inputs = np.array([[neuron.calc_output() for neuron in self.inputs[0]]])
            return self.__calc_input_to_act_func(inputs = new_inputs)

    def calc_output(self):
        return self.act_func.compute(self.bias + self.__calc_input_to_act_func())


class NeuralNet:


    def __init__(self, X, y, network_structure, lmda = 10):
        """network_structure should be a list whose length is the number of hidden layers
        and whose ith entry is the desired number of neurons in the ith hidden layer
        y should be a column vector"""
        if X.shape[0] != y.shape[0]:
            raise RuntimeError("dataset flawed: y does not match X")
        self.y = y
        self.lmda = lmda
        self.network_structure = network_structure
        self.network = [[Neuron(inputs = X, dataset_size = X.shape[0]) for i in range(network_structure[0])]]
        for layer in network_structure[1:]:
            self.network.append([Neuron(inputs = np.array([self.network[-1]]), \
            dataset_size = X.shape[0]) for i in range(layer)])
        self.network.append([Neuron(inputs = np.array([self.network[-1]]), dataset_size = X.shape[0], layer = "o")])

    def make_prediction(self):
        return self.network[-1][0].calc_output().reshape(self.y.shape)

    def calc_error(self):
        return sum((self.y - self.make_prediction())**2) / (2.0 * len(self.y))

    def __backprop(self):
        base_derivatives = [["tbd" for j in range(self.network_structure[i])] \
        for i in range(len(self.network_structure))] + [["tbd"]]
        y_prime = self.make_prediction()
        dE_dout = sum(y_prime - self.y)
        dout_din = np.mean(self.network[-1][0].act_func.derivative(y_prime))
        base_derivatives[-1][0] =  dE_dout * dout_din
        for i in range(len(base_derivatives) - 2, -1, -1):
            for j in range(len(base_derivatives[i])):
                dE_dout = 0
                for neuron in range(len(base_derivatives[i + 1])): #naming convention here uses 'i' and 'o' for in and out
                    din_di = self.network[i + 1][neuron].weights[0][j]  # of the neuron in question and 'in' and 'out'  for
                    dE_dout += din_di * base_derivatives[i + 1][neuron]  #neurons in the next layer
                do_di = np.mean(self.network[i][j].act_func.derivative(self.network[i][j].calc_output()))
                base_derivatives[i][j] = dE_dout * do_di
                for weight in range(self.network[i][j].weights.shape[1]): #note that we can change weights as we go here only
                    if i != 0:                                  # because we go front-to-back, meaning these weights
                        # print np.mean(self.network[i - 1][weight].calc_output()) * base_derivatives[i][j]
                        self.network[i][j].weights[0][weight] -= self.lmda * np.mean(self.network[i - 1][weight] \
                        .calc_output()) * base_derivatives[i][j]#aren't used again while calculating the other gradients
                    else:
                        # print np.mean(self.network[i][j].inputs.transpose()[weight]) * base_derivatives[i][j]
                        self.network[i][j].weights[0][weight] -= self.lmda * np.mean(self.network[i][j].inputs \
                        .transpose()[weight]) * base_derivatives[i][j]
                    self.network[i][j].bias -= self.lmda * base_derivatives[i][j]

    def train(self, iterations = 300):
        for i in range(iterations):
            self.__backprop()

        # full_derivatives = [[["tbd" for weight in len(self.network[layer][neuron].shape[1])] for neuron in \
        # len(self.network[layer])] for layer ]


A = NeuralNet(np.array([[1,2,3],[2,3,4]]), np.array([[1],[2]]), [2,2])
B = NeuralNet(np.array([[1,1],[1,0],[0,1],[0,0]]), np.array([[0],[1],[1],[0]]), [2,2])