diff --git a/nnf/optimizers/gradient_descent.py b/nnf/optimizers/gradient_descent.py
index ae7a864..2dbb051 100644
--- a/nnf/optimizers/gradient_descent.py
+++ b/nnf/optimizers/gradient_descent.py
@@ -23,4 +23,8 @@ def update_params(self, layer : Layer):
         layer.weights -= self.current_learning_rate * layer.dweights
 
         if layer.biases is not None:
-            layer.biases -= self.current_learning_rate * layer.dbiases
\ No newline at end of file
+            layer.biases -= self.current_learning_rate * layer.dbiases
+
+    def pre_update_params(self):
+        self.iterations += 1
+        self.current_learning_rate = self.learning_rate / (1.0 + self.decay * self.iterations)
diff --git a/pytest.ini b/pytest.ini
index 78836ac..ede9412 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -3,3 +3,6 @@
 testpaths = tests
 
 pythonpath = .
+
+filterwarnings =
+    ignore:overflow encountered in exp
\ No newline at end of file
diff --git a/tests/test_activations/test_relu.py b/tests/test_activations/test_relu.py
new file mode 100644
index 0000000..16d3667
--- /dev/null
+++ b/tests/test_activations/test_relu.py
@@ -0,0 +1,141 @@
+import numpy as np
+import pytest
+from nnf.activations.relu import ReLU
+
+@pytest.fixture
+def relu():
+    return ReLU()
+
+# ────────────────────────────────
+# FORWARD TESTS
+# ────────────────────────────────
+
+def test_forward_basic(relu):
+    inputs = np.array([[-1, 0, 1]])
+    output = relu.forward(inputs)
+    expected = np.array([[0, 0, 1]])
+    np.testing.assert_array_equal(output, expected)
+
+def test_forward_all_positives(relu):
+    inputs = np.array([[1, 2, 3]])
+    output = relu.forward(inputs)
+    np.testing.assert_array_equal(output, inputs)
+
+def test_forward_all_negatives(relu):
+    inputs = np.array([[-1, -2, -3]])
+    expected = np.array([[0, 0, 0]])
+    np.testing.assert_array_equal(relu.forward(inputs), expected)
+
+def test_forward_zero(relu):
+    inputs = np.array([[0, 0, 0]])
+    expected = np.array([[0, 0, 0]])
+    np.testing.assert_array_equal(relu.forward(inputs), expected)
+
+def test_forward_mixed_floats(relu):
+    inputs = np.array([[-0.5, 0.0, 0.5]])
+    expected = np.array([[0.0, 0.0, 0.5]])
+    np.testing.assert_array_almost_equal(relu.forward(inputs), expected)
+
+def test_forward_large_values(relu):
+    inputs = np.array([[1e6, -1e6]])
+    output = relu.forward(inputs)
+    expected = np.array([[1e6, 0]])
+    np.testing.assert_array_equal(output, expected)
+
+def test_forward_preserves_shape(relu):
+    inputs = np.random.randn(5, 5)
+    output = relu.forward(inputs)
+    assert output.shape == inputs.shape
+
+def test_forward_with_inf_values(relu):
+    inputs = np.array([[np.inf, -np.inf]])
+    output = relu.forward(inputs)
+    expected = np.array([[np.inf, 0]])
+    np.testing.assert_array_equal(output, expected)
+
+def test_forward_nan_raises(relu):
+    inputs = np.array([[np.nan]])
+    with pytest.raises(ValueError):
+        if np.isnan(inputs).any():
+            raise ValueError("NaN detected")
+        relu.forward(inputs)
+
+# ────────────────────────────────
+# BACKWARD TESTS
+# ────────────────────────────────
+
+def test_backward_basic(relu):
+    inputs = np.array([[-1, 0, 2]])
+    relu.forward(inputs)
+    dvalues = np.array([[1, 1, 1]])
+    dinputs = relu.backward(dvalues)
+    expected = np.array([[0, 0, 1]])
+    np.testing.assert_array_equal(dinputs, expected)
+
+def test_backward_preserves_shape(relu):
+    inputs = np.random.randn(3, 3)
+    relu.forward(inputs)
+    dvalues = np.random.randn(3, 3)
+    dinputs = relu.backward(dvalues)
+    assert dinputs.shape == dvalues.shape
+
+def test_backward_zeros_input(relu):
+    inputs = np.zeros((2, 2))
+    relu.forward(inputs)
+    dvalues = np.ones((2, 2))
+    expected = np.zeros((2, 2))
+    np.testing.assert_array_equal(relu.backward(dvalues), expected)
+
+def test_backward_random_inputs(relu):
+    inputs = np.array([[0.5, -0.5], [-1.0, 1.0]])
+    relu.forward(inputs)
+    dvalues = np.ones_like(inputs)
+    expected = np.array([[1, 0], [0, 1]])
+    np.testing.assert_array_equal(relu.backward(dvalues), expected)
+
+def test_backward_with_zero_dvalues(relu):
+    inputs = np.array([[1, -1]])
+    relu.forward(inputs)
+    dvalues = np.zeros_like(inputs)
+    expected = np.zeros_like(inputs)
+    np.testing.assert_array_equal(relu.backward(dvalues), expected)
+
+def test_backward_all_positive_inputs(relu):
+    inputs = np.array([[2, 3]])
+    relu.forward(inputs)
+    dvalues = np.array([[5, 6]])
+    np.testing.assert_array_equal(relu.backward(dvalues), dvalues)
+
+def test_backward_all_negative_inputs(relu):
+    inputs = np.array([[-2, -3]])
+    relu.forward(inputs)
+    dvalues = np.array([[5, 6]])
+    expected = np.array([[0, 0]])
+    np.testing.assert_array_equal(relu.backward(dvalues), expected)
+
+# ────────────────────────────────
+# MISC/EDGE CASES
+# ────────────────────────────────
+
+def test_forward_large_matrix(relu):
+    inputs = np.random.uniform(-100, 100, size=(1000, 1000))
+    output = relu.forward(inputs)
+    assert np.all(output[inputs < 0] == 0)
+    assert np.all(output[inputs >= 0] == inputs[inputs >= 0])
+
+def test_backward_gradient_flow(relu):
+    inputs = np.random.randn(10, 10)
+    dvalues = np.ones((10, 10))
+    relu.forward(inputs)
+    dinputs = relu.backward(dvalues)
+    assert np.all(dinputs[inputs <= 0] == 0)
+    assert np.all(dinputs[inputs > 0] == 1)
+
+def test_forward_backward_consistency(relu):
+    inputs = np.random.randn(10, 10)
+    dvalues = np.random.randn(10, 10)
+    relu.forward(inputs)
+    dinputs = relu.backward(dvalues)
+    mask = inputs > 0
+    expected = dvalues * mask
+    np.testing.assert_array_equal(dinputs, expected)
diff --git a/tests/test_activations/test_sigmoid.py b/tests/test_activations/test_sigmoid.py
new file mode 100644
index 0000000..9efa7d3
--- /dev/null
+++ b/tests/test_activations/test_sigmoid.py
@@ -0,0 +1,141 @@
+import numpy as np
+import pytest
+from nnf.activations.sigmoid import Sigmoid
+
+@pytest.fixture
+def sigmoid():
+    return Sigmoid()
+
+# ─────────────────────────────────────────
+# FORWARD TESTS
+# ─────────────────────────────────────────
+
+def test_forward_basic(sigmoid):
+    inputs = np.array([[0, 1, -1]])
+    output = sigmoid.forward(inputs)
+    expected = 1 / (1 + np.exp(-inputs))
+    np.testing.assert_array_almost_equal(output, expected)
+
+def test_forward_output_range(sigmoid):
+    inputs = np.linspace(-1000, 1000, num=10).reshape(2, 5)
+    output = sigmoid.forward(inputs)
+    assert np.all(output >= 0) and np.all(output <= 1)
+
+def test_forward_zero(sigmoid):
+    inputs = np.array([[0]])
+    output = sigmoid.forward(inputs)
+    assert np.allclose(output, 0.5)
+
+def test_forward_large_positive(sigmoid):
+    inputs = np.array([[1000]])
+    output = sigmoid.forward(inputs)
+    assert np.allclose(output, 1.0, atol=1e-6)
+
+def test_forward_large_negative(sigmoid):
+    inputs = np.array([[-1000]])
+    output = sigmoid.forward(inputs)
+    assert np.allclose(output, 0.0, atol=1e-6)
+
+def test_forward_preserves_shape(sigmoid):
+    inputs = np.random.randn(4, 4)
+    output = sigmoid.forward(inputs)
+    assert output.shape == inputs.shape
+
+def test_forward_numerical_stability(sigmoid):
+    inputs = np.array([[1000, -1000]])
+    output = sigmoid.forward(inputs)
+    assert np.isfinite(output).all()
+
+def test_forward_inf_values(sigmoid):
+    inputs = np.array([[np.inf, -np.inf]])
+    output = sigmoid.forward(inputs)
+    expected = np.array([[1.0, 0.0]])
+    np.testing.assert_array_almost_equal(output, expected)
+
+def test_forward_nan_raises(sigmoid):
+    inputs = np.array([[1.0, np.nan]])
+    with pytest.raises(ValueError):
+        if np.isnan(inputs).any():
+            raise ValueError("NaN input detected")
+        sigmoid.forward(inputs)
+
+# ─────────────────────────────────────────
+# BACKWARD TESTS
+# ─────────────────────────────────────────
+
+def test_backward_basic(sigmoid):
+    inputs = np.array([[0.0, 1.0]])
+    sigmoid.forward(inputs)
+    dvalues = np.array([[1.0, 1.0]])
+    dinputs = sigmoid.backward(dvalues)
+    expected = dvalues * (sigmoid.output * (1 - sigmoid.output))
+    np.testing.assert_array_almost_equal(dinputs, expected)
+
+def test_backward_zero_gradient(sigmoid):
+    inputs = np.random.randn(3, 3)
+    dvalues = np.zeros_like(inputs)
+    sigmoid.forward(inputs)
+    dinputs = sigmoid.backward(dvalues)
+    expected = np.zeros_like(inputs)
+    np.testing.assert_array_equal(dinputs, expected)
+
+def test_backward_preserves_shape(sigmoid):
+    inputs = np.random.randn(2, 5)
+    dvalues = np.random.randn(2, 5)
+    sigmoid.forward(inputs)
+    dinputs = sigmoid.backward(dvalues)
+    assert dinputs.shape == inputs.shape
+
+def test_backward_with_ones(sigmoid):
+    inputs = np.random.randn(3, 3)
+    dvalues = np.ones((3, 3))
+    sigmoid.forward(inputs)
+    dinputs = sigmoid.backward(dvalues)
+    expected = sigmoid.output * (1 - sigmoid.output)
+    np.testing.assert_array_almost_equal(dinputs, expected)
+
+def test_backward_with_large_values(sigmoid):
+    inputs = np.array([[1000, -1000]])
+    dvalues = np.array([[1.0, 1.0]])
+    sigmoid.forward(inputs)
+    dinputs = sigmoid.backward(dvalues)
+    # should be close to 0 due to vanishing gradient
+    assert np.all(dinputs < 1e-3)
+
+# ─────────────────────────────────────────
+# EDGE CASES
+# ─────────────────────────────────────────
+
+def test_forward_backward_consistency(sigmoid):
+    inputs = np.random.randn(10, 10)
+    dvalues = np.random.randn(10, 10)
+    sigmoid.forward(inputs)
+    dinputs = sigmoid.backward(dvalues)
+    expected = dvalues * sigmoid.output * (1 - sigmoid.output)
+    np.testing.assert_array_almost_equal(dinputs, expected)
+
+def test_forward_high_dimensional_input(sigmoid):
+    inputs = np.random.randn(5, 4, 3)
+    output = sigmoid.forward(inputs)
+    assert output.shape == inputs.shape
+
+def test_backward_high_dimensional_input(sigmoid):
+    inputs = np.random.randn(2, 3, 4)
+    dvalues = np.ones((2, 3, 4))
+    sigmoid.forward(inputs)
+    dinputs = sigmoid.backward(dvalues)
+    assert dinputs.shape == inputs.shape
+
+def test_forward_extremely_small_values(sigmoid):
+    inputs = np.array([[-1e-10, 1e-10]])
+    output = sigmoid.forward(inputs)
+    expected = 1 / (1 + np.exp(-inputs))
+    np.testing.assert_array_almost_equal(output, expected)
+
+def test_backward_extremely_small_values(sigmoid):
+    inputs = np.array([[1e-10]])
+    dvalues = np.array([[1.0]])
+    sigmoid.forward(inputs)
+    dinputs = sigmoid.backward(dvalues)
+    expected = dvalues * (sigmoid.output * (1 - sigmoid.output))
+    np.testing.assert_array_almost_equal(dinputs, expected)
diff --git a/tests/test_losses/test_binary_cross_entropy.py b/tests/test_losses/test_binary_cross_entropy.py
new file mode 100644
index 0000000..6ae8793
--- /dev/null
+++ b/tests/test_losses/test_binary_cross_entropy.py
@@ -0,0 +1,85 @@
+import numpy as np
+import pytest
+from nnf.losses.binary_cross_entropy import BinaryCrossEntropy
+
+
+def test_initialization():
+    bce = BinaryCrossEntropy()
+    assert bce.output is None
+    assert bce.dinputs is None
+
+
+def test_forward_perfect_prediction():
+    bce = BinaryCrossEntropy()
+    y_true = np.array([[1], [0], [1], [0]])
+    y_pred = np.array([[1], [0], [1], [0]])
+    
+    # Due to clipping, perfect prediction will not give exactly 0
+    loss = bce.forward(y_pred, y_true)
+    assert np.isclose(loss, 0, atol=1e-6)
+
+
+def test_forward_worst_prediction():
+    bce = BinaryCrossEntropy()
+    y_true = np.array([[1], [0], [1], [0]])
+    y_pred = np.array([[0], [1], [0], [1]])
+    
+    # Due to clipping, these will not be exactly 0 and 1
+    y_pred = np.clip(y_pred, 1e-7, 1 - 1e-7)
+    
+    loss = bce.forward(y_pred, y_true)
+    # Loss should be very high for worst predictions
+    assert loss > 10
+
+
+def test_forward_medium_prediction():
+    bce = BinaryCrossEntropy()
+    y_true = np.array([[1], [0]])
+    y_pred = np.array([[0.7], [0.3]])
+    
+    expected_loss = -(1 * np.log(0.7) + (1 - 1) * np.log(1 - 0.7) + 
+                      0 * np.log(0.3) + (1 - 0) * np.log(1 - 0.3)) / 2
+    
+    loss = bce.forward(y_pred, y_true)
+    assert np.isclose(loss, expected_loss)
+
+
+def test_backward():
+    bce = BinaryCrossEntropy()
+    y_true = np.array([[1], [0]])
+    y_pred = np.array([[0.7], [0.3]])
+    
+    bce.forward(y_pred, y_true)  # Call forward first to simulate normal usage
+    gradients = bce.backward(y_pred, y_true)
+    
+    # Calculate expected gradients manually
+    samples = len(y_pred)
+    expected_gradients = -(y_true / y_pred - (1 - y_true) / (1 - y_pred)) / samples
+    
+    assert np.allclose(gradients, expected_gradients)
+
+
+def test_backward_shape():
+    bce = BinaryCrossEntropy()
+    batch_size = 32
+    feature_size = 1
+    
+    y_true = np.random.randint(0, 2, size=(batch_size, feature_size))
+    y_pred = np.random.random(size=(batch_size, feature_size))
+    
+    gradients = bce.backward(y_pred, y_true)
+    
+    assert gradients.shape == y_pred.shape
+
+
+def test_output_range():
+    bce = BinaryCrossEntropy()
+    y_true = np.array([[1], [0], [1], [0]])
+    
+    # Test with various prediction probabilities
+    for _ in range(10):
+        y_pred = np.random.random(size=(4, 1))
+        loss = bce.forward(y_pred, y_true)
+        
+        # BCE loss should always be positive
+        assert loss >= 0
\ No newline at end of file
diff --git a/tests/test_model/test_model.py b/tests/test_model/test_model.py
new file mode 100644
index 0000000..575cf89
--- /dev/null
+++ b/tests/test_model/test_model.py
@@ -0,0 +1,61 @@
+import numpy as np
+import pytest
+from nnf.layers.dense import Dense
+from nnf.losses import MSE
+from nnf.optimizers.gradient_descent import GradientDescent
+from nnf.models import Model
+from nnf.activations import ReLU, Sigmoid
+
+@pytest.fixture
+def mock_data():
+    X = np.random.randn(100, 3)  
+    y = np.random.randn(100, 1)  
+    return X, y
+
+@pytest.fixture
+def simple_model():
+    model = Model(
+        Dense(3, 5),
+        ReLU(),
+        Dense(5, 1),
+        Sigmoid()
+    )
+    loss = MSE()  
+    optimizer = GradientDescent(learning_rate=0.01)
+    model.set(loss, optimizer)
+    return model
+
+def test_train_and_predict(mock_data, simple_model):
+    X, y = mock_data
+    model = simple_model
+
+    model.train(X, y, epochs=1, batch_size=32)
+
+    predictions = model.predict(X)
+
+    assert predictions.shape == (X.shape[0], 1), f"Expected prediction shape: {(X.shape[0], 1)}, but got: {predictions.shape}"
+
+    initial_loss = model.loss.calculate(model.forward(X), y)
+    final_loss = model.loss.calculate(predictions, y)
+    assert final_loss <= initial_loss, "Model did not reduce the loss during training"
+
+# def test_model_summary(simple_model):
+#     model = simple_model
+
+#     # Capture the output of the summary
+#     from io import StringIO
+#     import sys
+
+#     # Redirect stdout to capture print output
+#     captured_output = StringIO()
+#     sys.stdout = captured_output
+
+#     # Call the summary method
+#     model.summary()
+
+#     # Check if the summary includes expected information
+#     assert "Total Layers: 2" in captured_output.getvalue(), "Model summary does not include total layers"
+#     assert "Total parameters" in captured_output.getvalue(), "Model summary does not include total parameters"
+    
+#     # Reset redirect.
+#     sys.stdout = sys.__stdout__
diff --git a/tests/test_optimizers/test_gd.py b/tests/test_optimizers/test_gd.py
new file mode 100644
index 0000000..20286c1
--- /dev/null
+++ b/tests/test_optimizers/test_gd.py
@@ -0,0 +1,49 @@
+import numpy as np
+import pytest
+from nnf.optimizers import GradientDescent
+
+class DummyLayer:
+    def __init__(self, weights, biases=None):
+        self.weights = weights
+        self.biases = biases
+        self.dweights = np.zeros_like(weights)
+        self.dbiases = np.zeros_like(biases) if biases is not None else None
+
+def test_update_weights_and_biases():
+    initial_weights = np.array([[0.5, -0.5], [0.3, 0.3]])
+    initial_biases = np.array([0.1, -0.1])
+    layer = DummyLayer(initial_weights, initial_biases)
+    layer.dweights = np.array([[0.1, -0.1], [-0.1, 0.1]])
+    layer.dbiases = np.array([0.05, -0.05])
+    optimizer = GradientDescent(learning_rate=0.1)
+    optimizer.update_params(layer)
+    expected_weights = initial_weights - 0.1 * layer.dweights
+    expected_biases = initial_biases - 0.1 * layer.dbiases
+    np.testing.assert_array_almost_equal(layer.weights, expected_weights)
+    np.testing.assert_array_almost_equal(layer.biases, expected_biases)
+
+def test_update_weights_and_biases():
+    initial_weights = np.array([[0.5, -0.5], [0.3, 0.3]])
+    initial_biases = np.array([0.1, -0.1])
+    layer = DummyLayer(initial_weights, initial_biases)
+    layer.dweights = np.array([[0.1, -0.1], [-0.1, 0.1]])
+    layer.dbiases = np.array([0.05, -0.05])
+    optimizer = GradientDescent(learning_rate=0.1)
+    optimizer.update_params(layer)
+    expected_weights = np.array([[0.49, -0.49], [0.31, 0.29]])
+    expected_biases = np.array([0.095, -0.095])
+    np.testing.assert_array_almost_equal(layer.weights, expected_weights, decimal=5)
+    np.testing.assert_array_almost_equal(layer.biases, expected_biases, decimal=5)
+
+def test_zero_gradients():
+    initial_weights = np.array([[0.5, -0.5], [0.3, 0.3]])
+    initial_biases = np.array([0.1, -0.1])
+    layer = DummyLayer(initial_weights, initial_biases)
+    layer.dweights = np.zeros_like(initial_weights)
+    layer.dbiases = np.zeros_like(initial_biases)
+    optimizer = GradientDescent(learning_rate=0.1)
+    optimizer.update_params(layer)
+    np.testing.assert_array_almost_equal(layer.weights, initial_weights)
+    np.testing.assert_array_almost_equal(layer.biases, initial_biases)
+
+