diff --git a/activations.py b/activations.py
index b9bee4f..f4b8791 100644
--- a/activations.py
+++ b/activations.py
@@ -6,7 +6,7 @@
 
 
 
-def sigmoid(x: Union[float, np.ndarray]):
+def sigmoid(x: np.ndarray):
 	"""The sigmoid activation function.
 
 	It is a monotonic function (entirely non-decreasing or non-increasing) 
@@ -29,7 +29,13 @@ def sigmoid(x: Union[float, np.ndarray]):
 		raise ValueError("Sigmoid input must be a float or a 1D array of float values.")
 
 
-def relu(x: Union[float, np.ndarray]):
+def sigmoid_delta(x: np.ndarray):
+	"""Sigmoid derivative.
+	"""
+	return sigmoid(x) * (1 - sigmoid(x))
+
+
+def relu(x: np.ndarray):
 	"""Relu activation function.
 	
 	Defined as, f(x) = 
@@ -42,6 +48,16 @@ def relu(x: Union[float, np.ndarray]):
 		raise ValueError("ReLU input must be a float or a 1D array of float values")
 
 
+def relu_delta(x: float):
+	"""ReLU Derivative.
+
+	For x < 0, f'(x) = 0 and f'(x) = 1 otherwise.
+	relu isn't differentiable at x=0 but for practical reasons,
+	f'(x=0) = 0.
+	"""
+	return np.where(x > 0, 1, 0)
+
+
 def tanh(x: np.ndarray):
 	"""Hyperbolic tangent activation function.
 
diff --git a/losses.py b/losses.py
index f269d60..39ed405 100644
--- a/losses.py
+++ b/losses.py
@@ -5,10 +5,10 @@
 import numpy as np
 
 
-def mean_squared_error(
-	true_values: List[Union[float, int]], 
-	predicted_values: List[Union[float, int]]
-	):
+def mse(
+	true_values: List[float], 
+	predicted_values: List[float]
+):
 	"""Mean squared error loss function.
 
 	Defined as the average of the sum of the square differences between
@@ -27,3 +27,24 @@ def mean_squared_error(
 	else: # list
 		return sum([(t - p) ** 2 for t, p in zip(true_values, predicted_values)]) / len(true_values)
 
+
+def mse_delta(
+	true_values: np.ndarray, 
+	predicted_values: np.ndarray
+):
+	"""Derivative of the mean squared error function.
+	The derivative is taken with respect to the predicted value.
+	Therefore, if t is the true and predicted values,
+	differentiating (t - p)^2 by chain rule yields 2*(-1)*(t - p)
+
+	Args:
+		Numpy arrays of the true and predicted values.
+	"""
+	return np.nanmean(-2 * (true_values - predicted_values))
+	# or return np.nanmean(2 * (predicted_values - true_values))
+
+
+def bce_loss():
+	"""Binary Cross Entropy Loss function.
+	"""
+	pass
diff --git a/mlp.py b/mlp.py
index 3bde384..5d2422f 100644
--- a/mlp.py
+++ b/mlp.py
@@ -8,7 +8,7 @@
 
 
 def perceptron_np(inputs: np.ndarray, weights: np.ndarray, bias: np.ndarray):
-	"""Perceptron in numpy."""
+	"""Perceptron in numpy (Logically the same as that in perceptron.py)."""
 	return np.dot(inputs, weights) + bias
 
 
@@ -43,6 +43,6 @@ def __call__(self, x):
 if __name__=="__main__":
 	or_circuit = np.array([[1, 1], [0, 1], [1, 0], [0, 0]])
 	in_dim = or_circuit.shape[1]
-	model = MLP(in_dim, 10, 1)
+	model = MLP(in_dim, 10, 1) # hidden dim can be different from input dim
 	out = model(or_circuit)
 	print(f"Output probabilities: \n{out}")
diff --git a/mlp_backprop.py b/mlp_backprop.py
new file mode 100644
index 0000000..6237df2
--- /dev/null
+++ b/mlp_backprop.py
@@ -0,0 +1,36 @@
+"""
+Backpropagation implementation.
+A neural network learns by means of updates to its weights for each data processed by  
+inorder to minimize a given objective function.
+The mechanism of updating network weights is what is termed as backpropagation.
+"""
+import numpy as np
+from activations import relu, sigmoid
+from mlp import perceptron_np
+from losses import mse, mse_delta
+
+
+class MLP:
+	"""Multi Layer Perceptron."""
+	def __init__(
+		self, 
+		input_dim: int,
+		hidden_dim: int,
+		out_dim: int, 
+	) -> None:
+		self.input_dim = input_dim
+		self.out_dim = out_dim
+		self.hidden_dim = hidden_dim
+		self.w1 = np.random.uniform(low=0.0, high=0.5, size=(input_dim, hidden_dim))
+		self.w2 = np.random.uniform(low=0.0, high=0.5, size=(hidden_dim, out_dim))
+		self.bias = np.random.random(size=out_dim)
+	
+	def forward(self, x: np.ndarray) -> np.ndarray:
+		"""Forward pass."""
+		layer1 = perceptron_np(x, self.w1, self.bias)
+		x1 = relu(layer1)
+		x2 = perceptron_np(layer1, self.w2, self.bias)
+		return sigmoid(x2)
+
+	def __call__(self, x):
+		return self.forward(x)
\ No newline at end of file