kebtes · kebtes · Apr 18, 2025 · Apr 18, 2025 · Apr 18, 2025 · Apr 18, 2025
diff --git a/nnf/optimizers/gradient_descent.py b/nnf/optimizers/gradient_descent.py
@@ -23,4 +23,8 @@ def update_params(self, layer : Layer):
         layer.weights -= self.current_learning_rate * layer.dweights
 
         if layer.biases is not None:
-            layer.biases -= self.current_learning_rate * layer.dbiases
+            layer.biases -= self.current_learning_rate * layer.dbiases
+
+    def pre_update_params(self):
+        self.iterations += 1
+        self.current_learning_rate = self.learning_rate / (1.0 + self.decay * self.iterations)
diff --git a/pytest.ini b/pytest.ini
@@ -3,3 +3,6 @@
 testpaths = tests
 
 pythonpath = .
+
+filterwarnings =
+    ignore:overflow encountered in exp
diff --git a/tests/test_activations/test_relu.py b/tests/test_activations/test_relu.py
@@ -0,0 +1,141 @@
+import numpy as np
+import pytest
+from nnf.activations.relu import ReLU
+
+@pytest.fixture
+def relu():
+    return ReLU()
+
+# ────────────────────────────────
+# FORWARD TESTS
+# ────────────────────────────────
+
+def test_forward_basic(relu):
+    inputs = np.array([[-1, 0, 1]])
+    output = relu.forward(inputs)
+    expected = np.array([[0, 0, 1]])
+    np.testing.assert_array_equal(output, expected)
+
+def test_forward_all_positives(relu):
+    inputs = np.array([[1, 2, 3]])
+    output = relu.forward(inputs)
+    np.testing.assert_array_equal(output, inputs)
+
+def test_forward_all_negatives(relu):
+    inputs = np.array([[-1, -2, -3]])
+    expected = np.array([[0, 0, 0]])
+    np.testing.assert_array_equal(relu.forward(inputs), expected)
+
+def test_forward_zero(relu):
+    inputs = np.array([[0, 0, 0]])
+    expected = np.array([[0, 0, 0]])
+    np.testing.assert_array_equal(relu.forward(inputs), expected)
+
+def test_forward_mixed_floats(relu):
+    inputs = np.array([[-0.5, 0.0, 0.5]])
+    expected = np.array([[0.0, 0.0, 0.5]])
+    np.testing.assert_array_almost_equal(relu.forward(inputs), expected)
+
+def test_forward_large_values(relu):
+    inputs = np.array([[1e6, -1e6]])
+    output = relu.forward(inputs)
+    expected = np.array([[1e6, 0]])
+    np.testing.assert_array_equal(output, expected)
+
+def test_forward_preserves_shape(relu):
+    inputs = np.random.randn(5, 5)
+    output = relu.forward(inputs)
+    assert output.shape == inputs.shape
+
+def test_forward_with_inf_values(relu):
+    inputs = np.array([[np.inf, -np.inf]])
+    output = relu.forward(inputs)
+    expected = np.array([[np.inf, 0]])
+    np.testing.assert_array_equal(output, expected)
+
+def test_forward_nan_raises(relu):
+    inputs = np.array([[np.nan]])
+    with pytest.raises(ValueError):
+        if np.isnan(inputs).any():
+            raise ValueError("NaN detected")
+        relu.forward(inputs)
+
+# ────────────────────────────────
+# BACKWARD TESTS
+# ────────────────────────────────
+
+def test_backward_basic(relu):
+    inputs = np.array([[-1, 0, 2]])
+    relu.forward(inputs)
+    dvalues = np.array([[1, 1, 1]])
+    dinputs = relu.backward(dvalues)
+    expected = np.array([[0, 0, 1]])
+    np.testing.assert_array_equal(dinputs, expected)
+
+def test_backward_preserves_shape(relu):
+    inputs = np.random.randn(3, 3)
+    relu.forward(inputs)
+    dvalues = np.random.randn(3, 3)
+    dinputs = relu.backward(dvalues)
+    assert dinputs.shape == dvalues.shape
+
+def test_backward_zeros_input(relu):
+    inputs = np.zeros((2, 2))
+    relu.forward(inputs)
+    dvalues = np.ones((2, 2))
+    expected = np.zeros((2, 2))
+    np.testing.assert_array_equal(relu.backward(dvalues), expected)
+
+def test_backward_random_inputs(relu):
+    inputs = np.array([[0.5, -0.5], [-1.0, 1.0]])
+    relu.forward(inputs)
+    dvalues = np.ones_like(inputs)
+    expected = np.array([[1, 0], [0, 1]])
+    np.testing.assert_array_equal(relu.backward(dvalues), expected)
+
+def test_backward_with_zero_dvalues(relu):
+    inputs = np.array([[1, -1]])
+    relu.forward(inputs)
+    dvalues = np.zeros_like(inputs)
+    expected = np.zeros_like(inputs)
+    np.testing.assert_array_equal(relu.backward(dvalues), expected)
+
+def test_backward_all_positive_inputs(relu):
+    inputs = np.array([[2, 3]])
+    relu.forward(inputs)
+    dvalues = np.array([[5, 6]])
+    np.testing.assert_array_equal(relu.backward(dvalues), dvalues)
+
+def test_backward_all_negative_inputs(relu):
+    inputs = np.array([[-2, -3]])
+    relu.forward(inputs)
+    dvalues = np.array([[5, 6]])
+    expected = np.array([[0, 0]])
+    np.testing.assert_array_equal(relu.backward(dvalues), expected)
+
+# ────────────────────────────────
+# MISC/EDGE CASES
+# ────────────────────────────────
+
+def test_forward_large_matrix(relu):
+    inputs = np.random.uniform(-100, 100, size=(1000, 1000))
+    output = relu.forward(inputs)
+    assert np.all(output[inputs < 0] == 0)
+    assert np.all(output[inputs >= 0] == inputs[inputs >= 0])
+
+def test_backward_gradient_flow(relu):
+    inputs = np.random.randn(10, 10)
+    dvalues = np.ones((10, 10))
+    relu.forward(inputs)
+    dinputs = relu.backward(dvalues)
+    assert np.all(dinputs[inputs <= 0] == 0)
+    assert np.all(dinputs[inputs > 0] == 1)
+
+def test_forward_backward_consistency(relu):
+    inputs = np.random.randn(10, 10)
+    dvalues = np.random.randn(10, 10)
+    relu.forward(inputs)
+    dinputs = relu.backward(dvalues)
+    mask = inputs > 0
+    expected = dvalues * mask
+    np.testing.assert_array_equal(dinputs, expected)
diff --git a/tests/test_activations/test_sigmoid.py b/tests/test_activations/test_sigmoid.py
@@ -0,0 +1,141 @@
+import numpy as np
+import pytest
+from nnf.activations.sigmoid import Sigmoid
+
+@pytest.fixture
+def sigmoid():
+    return Sigmoid()
+
+# ─────────────────────────────────────────
+# FORWARD TESTS
+# ─────────────────────────────────────────
+
+def test_forward_basic(sigmoid):
+    inputs = np.array([[0, 1, -1]])
+    output = sigmoid.forward(inputs)
+    expected = 1 / (1 + np.exp(-inputs))
+    np.testing.assert_array_almost_equal(output, expected)
+
+def test_forward_output_range(sigmoid):
+    inputs = np.linspace(-1000, 1000, num=10).reshape(2, 5)
+    output = sigmoid.forward(inputs)
+    assert np.all(output >= 0) and np.all(output <= 1)
+
+def test_forward_zero(sigmoid):
+    inputs = np.array([[0]])
+    output = sigmoid.forward(inputs)
+    assert np.allclose(output, 0.5)
+
+def test_forward_large_positive(sigmoid):
+    inputs = np.array([[1000]])
+    output = sigmoid.forward(inputs)
+    assert np.allclose(output, 1.0, atol=1e-6)
+
+def test_forward_large_negative(sigmoid):
+    inputs = np.array([[-1000]])
+    output = sigmoid.forward(inputs)
+    assert np.allclose(output, 0.0, atol=1e-6)
+
+def test_forward_preserves_shape(sigmoid):
+    inputs = np.random.randn(4, 4)
+    output = sigmoid.forward(inputs)
+    assert output.shape == inputs.shape
+
+def test_forward_numerical_stability(sigmoid):
+    inputs = np.array([[1000, -1000]])
+    output = sigmoid.forward(inputs)
+    assert np.isfinite(output).all()
+
+def test_forward_inf_values(sigmoid):
+    inputs = np.array([[np.inf, -np.inf]])
+    output = sigmoid.forward(inputs)
+    expected = np.array([[1.0, 0.0]])
+    np.testing.assert_array_almost_equal(output, expected)
+
+def test_forward_nan_raises(sigmoid):
+    inputs = np.array([[1.0, np.nan]])
+    with pytest.raises(ValueError):
+        if np.isnan(inputs).any():
+            raise ValueError("NaN input detected")
+        sigmoid.forward(inputs)
+
+# ─────────────────────────────────────────
+# BACKWARD TESTS
+# ─────────────────────────────────────────
+
+def test_backward_basic(sigmoid):
+    inputs = np.array([[0.0, 1.0]])
+    sigmoid.forward(inputs)
+    dvalues = np.array([[1.0, 1.0]])
+    dinputs = sigmoid.backward(dvalues)
+    expected = dvalues * (sigmoid.output * (1 - sigmoid.output))
+    np.testing.assert_array_almost_equal(dinputs, expected)
+
+def test_backward_zero_gradient(sigmoid):
+    inputs = np.random.randn(3, 3)
+    dvalues = np.zeros_like(inputs)
+    sigmoid.forward(inputs)
+    dinputs = sigmoid.backward(dvalues)
+    expected = np.zeros_like(inputs)
+    np.testing.assert_array_equal(dinputs, expected)
+
+def test_backward_preserves_shape(sigmoid):
+    inputs = np.random.randn(2, 5)
+    dvalues = np.random.randn(2, 5)
+    sigmoid.forward(inputs)
+    dinputs = sigmoid.backward(dvalues)
+    assert dinputs.shape == inputs.shape
+
+def test_backward_with_ones(sigmoid):
+    inputs = np.random.randn(3, 3)
+    dvalues = np.ones((3, 3))
+    sigmoid.forward(inputs)
+    dinputs = sigmoid.backward(dvalues)
+    expected = sigmoid.output * (1 - sigmoid.output)
+    np.testing.assert_array_almost_equal(dinputs, expected)
+
+def test_backward_with_large_values(sigmoid):
+    inputs = np.array([[1000, -1000]])
+    dvalues = np.array([[1.0, 1.0]])
+    sigmoid.forward(inputs)
+    dinputs = sigmoid.backward(dvalues)
+    # should be close to 0 due to vanishing gradient
+    assert np.all(dinputs < 1e-3)
+
+# ─────────────────────────────────────────
+# EDGE CASES
+# ─────────────────────────────────────────
+
+def test_forward_backward_consistency(sigmoid):
+    inputs = np.random.randn(10, 10)
+    dvalues = np.random.randn(10, 10)
+    sigmoid.forward(inputs)
+    dinputs = sigmoid.backward(dvalues)
+    expected = dvalues * sigmoid.output * (1 - sigmoid.output)
+    np.testing.assert_array_almost_equal(dinputs, expected)
+
+def test_forward_high_dimensional_input(sigmoid):
+    inputs = np.random.randn(5, 4, 3)
+    output = sigmoid.forward(inputs)
+    assert output.shape == inputs.shape
+
+def test_backward_high_dimensional_input(sigmoid):
+    inputs = np.random.randn(2, 3, 4)
+    dvalues = np.ones((2, 3, 4))
+    sigmoid.forward(inputs)
+    dinputs = sigmoid.backward(dvalues)
+    assert dinputs.shape == inputs.shape
+
+def test_forward_extremely_small_values(sigmoid):
+    inputs = np.array([[-1e-10, 1e-10]])
+    output = sigmoid.forward(inputs)
+    expected = 1 / (1 + np.exp(-inputs))
+    np.testing.assert_array_almost_equal(output, expected)
+
+def test_backward_extremely_small_values(sigmoid):
+    inputs = np.array([[1e-10]])
+    dvalues = np.array([[1.0]])
+    sigmoid.forward(inputs)
+    dinputs = sigmoid.backward(dvalues)
+    expected = dvalues * (sigmoid.output * (1 - sigmoid.output))
+    np.testing.assert_array_almost_equal(dinputs, expected)
diff --git a/tests/test_losses/test_binary_cross_entropy.py b/tests/test_losses/test_binary_cross_entropy.py
@@ -0,0 +1,85 @@
+import numpy as np
+import pytest
+from nnf.losses.binary_cross_entropy import BinaryCrossEntropy
+
+
+def test_initialization():
+    bce = BinaryCrossEntropy()
+    assert bce.output is None
+    assert bce.dinputs is None
+
+
+def test_forward_perfect_prediction():
+    bce = BinaryCrossEntropy()
+    y_true = np.array([[1], [0], [1], [0]])
+    y_pred = np.array([[1], [0], [1], [0]])
+
+    # Due to clipping, perfect prediction will not give exactly 0
+    loss = bce.forward(y_pred, y_true)
+    assert np.isclose(loss, 0, atol=1e-6)
+
+
+def test_forward_worst_prediction():
+    bce = BinaryCrossEntropy()
+    y_true = np.array([[1], [0], [1], [0]])
+    y_pred = np.array([[0], [1], [0], [1]])
+
+    # Due to clipping, these will not be exactly 0 and 1
+    y_pred = np.clip(y_pred, 1e-7, 1 - 1e-7)
+
+    loss = bce.forward(y_pred, y_true)
+    # Loss should be very high for worst predictions
+    assert loss > 10
+
+
+def test_forward_medium_prediction():
+    bce = BinaryCrossEntropy()
+    y_true = np.array([[1], [0]])
+    y_pred = np.array([[0.7], [0.3]])
+
+    expected_loss = -(1 * np.log(0.7) + (1 - 1) * np.log(1 - 0.7) + 
+                      0 * np.log(0.3) + (1 - 0) * np.log(1 - 0.3)) / 2
+
+    loss = bce.forward(y_pred, y_true)
+    assert np.isclose(loss, expected_loss)
+
+
+def test_backward():
+    bce = BinaryCrossEntropy()
+    y_true = np.array([[1], [0]])
+    y_pred = np.array([[0.7], [0.3]])
+
+    bce.forward(y_pred, y_true)  # Call forward first to simulate normal usage
+    gradients = bce.backward(y_pred, y_true)
+
+    # Calculate expected gradients manually
+    samples = len(y_pred)
+    expected_gradients = -(y_true / y_pred - (1 - y_true) / (1 - y_pred)) / samples
+
+    assert np.allclose(gradients, expected_gradients)
+
+
+def test_backward_shape():
+    bce = BinaryCrossEntropy()
+    batch_size = 32
+    feature_size = 1
+
+    y_true = np.random.randint(0, 2, size=(batch_size, feature_size))
+    y_pred = np.random.random(size=(batch_size, feature_size))
+
+    gradients = bce.backward(y_pred, y_true)
+
+    assert gradients.shape == y_pred.shape
+
+
+def test_output_range():
+    bce = BinaryCrossEntropy()
+    y_true = np.array([[1], [0], [1], [0]])
+
+    # Test with various prediction probabilities
+    for _ in range(10):
+        y_pred = np.random.random(size=(4, 1))
+        loss = bce.forward(y_pred, y_true)
+
+        # BCE loss should always be positive
+        assert loss >= 0