diff --git a/activations.py b/activations.py index b9bee4f..f4b8791 100644 --- a/activations.py +++ b/activations.py @@ -6,7 +6,7 @@ -def sigmoid(x: Union[float, np.ndarray]): +def sigmoid(x: np.ndarray): """The sigmoid activation function. It is a monotonic function (entirely non-decreasing or non-increasing) @@ -29,7 +29,13 @@ def sigmoid(x: Union[float, np.ndarray]): raise ValueError("Sigmoid input must be a float or a 1D array of float values.") -def relu(x: Union[float, np.ndarray]): +def sigmoid_delta(x: np.ndarray): + """Sigmoid derivative. + """ + return sigmoid(x) * (1 - sigmoid(x)) + + +def relu(x: np.ndarray): """Relu activation function. Defined as, f(x) = @@ -42,6 +48,16 @@ def relu(x: Union[float, np.ndarray]): raise ValueError("ReLU input must be a float or a 1D array of float values") +def relu_delta(x: float): + """ReLU Derivative. + + For x < 0, f'(x) = 0 and f'(x) = 1 otherwise. + relu isn't differentiable at x=0 but for practical reasons, + f'(x=0) = 0. + """ + return np.where(x > 0, 1, 0) + + def tanh(x: np.ndarray): """Hyperbolic tangent activation function. diff --git a/losses.py b/losses.py index f269d60..39ed405 100644 --- a/losses.py +++ b/losses.py @@ -5,10 +5,10 @@ import numpy as np -def mean_squared_error( - true_values: List[Union[float, int]], - predicted_values: List[Union[float, int]] - ): +def mse( + true_values: List[float], + predicted_values: List[float] +): """Mean squared error loss function. Defined as the average of the sum of the square differences between @@ -27,3 +27,24 @@ def mean_squared_error( else: # list return sum([(t - p) ** 2 for t, p in zip(true_values, predicted_values)]) / len(true_values) + +def mse_delta( + true_values: np.ndarray, + predicted_values: np.ndarray +): + """Derivative of the mean squared error function. + The derivative is taken with respect to the predicted value. + Therefore, if t is the true and predicted values, + differentiating (t - p)^2 by chain rule yields 2*(-1)*(t - p) + + Args: + Numpy arrays of the true and predicted values. + """ + return np.nanmean(-2 * (true_values - predicted_values)) + # or return np.nanmean(2 * (predicted_values - true_values)) + + +def bce_loss(): + """Binary Cross Entropy Loss function. + """ + pass diff --git a/mlp.py b/mlp.py index 3bde384..5d2422f 100644 --- a/mlp.py +++ b/mlp.py @@ -8,7 +8,7 @@ def perceptron_np(inputs: np.ndarray, weights: np.ndarray, bias: np.ndarray): - """Perceptron in numpy.""" + """Perceptron in numpy (Logically the same as that in perceptron.py).""" return np.dot(inputs, weights) + bias @@ -43,6 +43,6 @@ def __call__(self, x): if __name__=="__main__": or_circuit = np.array([[1, 1], [0, 1], [1, 0], [0, 0]]) in_dim = or_circuit.shape[1] - model = MLP(in_dim, 10, 1) + model = MLP(in_dim, 10, 1) # hidden dim can be different from input dim out = model(or_circuit) print(f"Output probabilities: \n{out}") diff --git a/mlp_backprop.py b/mlp_backprop.py new file mode 100644 index 0000000..6237df2 --- /dev/null +++ b/mlp_backprop.py @@ -0,0 +1,36 @@ +""" +Backpropagation implementation. +A neural network learns by means of updates to its weights for each data processed by +inorder to minimize a given objective function. +The mechanism of updating network weights is what is termed as backpropagation. +""" +import numpy as np +from activations import relu, sigmoid +from mlp import perceptron_np +from losses import mse, mse_delta + + +class MLP: + """Multi Layer Perceptron.""" + def __init__( + self, + input_dim: int, + hidden_dim: int, + out_dim: int, + ) -> None: + self.input_dim = input_dim + self.out_dim = out_dim + self.hidden_dim = hidden_dim + self.w1 = np.random.uniform(low=0.0, high=0.5, size=(input_dim, hidden_dim)) + self.w2 = np.random.uniform(low=0.0, high=0.5, size=(hidden_dim, out_dim)) + self.bias = np.random.random(size=out_dim) + + def forward(self, x: np.ndarray) -> np.ndarray: + """Forward pass.""" + layer1 = perceptron_np(x, self.w1, self.bias) + x1 = relu(layer1) + x2 = perceptron_np(layer1, self.w2, self.bias) + return sigmoid(x2) + + def __call__(self, x): + return self.forward(x) \ No newline at end of file