diff --git a/nnf/optimizers/gradient_descent.py b/nnf/optimizers/gradient_descent.py index ae7a864..2dbb051 100644 --- a/nnf/optimizers/gradient_descent.py +++ b/nnf/optimizers/gradient_descent.py @@ -23,4 +23,8 @@ def update_params(self, layer : Layer): layer.weights -= self.current_learning_rate * layer.dweights if layer.biases is not None: - layer.biases -= self.current_learning_rate * layer.dbiases \ No newline at end of file + layer.biases -= self.current_learning_rate * layer.dbiases + + def pre_update_params(self): + self.iterations += 1 + self.current_learning_rate = self.learning_rate / (1.0 + self.decay * self.iterations) diff --git a/pytest.ini b/pytest.ini index 78836ac..ede9412 100644 --- a/pytest.ini +++ b/pytest.ini @@ -3,3 +3,6 @@ testpaths = tests pythonpath = . + +filterwarnings = + ignore:overflow encountered in exp \ No newline at end of file diff --git a/tests/test_activations/test_relu.py b/tests/test_activations/test_relu.py new file mode 100644 index 0000000..16d3667 --- /dev/null +++ b/tests/test_activations/test_relu.py @@ -0,0 +1,141 @@ +import numpy as np +import pytest +from nnf.activations.relu import ReLU + +@pytest.fixture +def relu(): + return ReLU() + +# ──────────────────────────────── +# FORWARD TESTS +# ──────────────────────────────── + +def test_forward_basic(relu): + inputs = np.array([[-1, 0, 1]]) + output = relu.forward(inputs) + expected = np.array([[0, 0, 1]]) + np.testing.assert_array_equal(output, expected) + +def test_forward_all_positives(relu): + inputs = np.array([[1, 2, 3]]) + output = relu.forward(inputs) + np.testing.assert_array_equal(output, inputs) + +def test_forward_all_negatives(relu): + inputs = np.array([[-1, -2, -3]]) + expected = np.array([[0, 0, 0]]) + np.testing.assert_array_equal(relu.forward(inputs), expected) + +def test_forward_zero(relu): + inputs = np.array([[0, 0, 0]]) + expected = np.array([[0, 0, 0]]) + np.testing.assert_array_equal(relu.forward(inputs), expected) + +def test_forward_mixed_floats(relu): + inputs = np.array([[-0.5, 0.0, 0.5]]) + expected = np.array([[0.0, 0.0, 0.5]]) + np.testing.assert_array_almost_equal(relu.forward(inputs), expected) + +def test_forward_large_values(relu): + inputs = np.array([[1e6, -1e6]]) + output = relu.forward(inputs) + expected = np.array([[1e6, 0]]) + np.testing.assert_array_equal(output, expected) + +def test_forward_preserves_shape(relu): + inputs = np.random.randn(5, 5) + output = relu.forward(inputs) + assert output.shape == inputs.shape + +def test_forward_with_inf_values(relu): + inputs = np.array([[np.inf, -np.inf]]) + output = relu.forward(inputs) + expected = np.array([[np.inf, 0]]) + np.testing.assert_array_equal(output, expected) + +def test_forward_nan_raises(relu): + inputs = np.array([[np.nan]]) + with pytest.raises(ValueError): + if np.isnan(inputs).any(): + raise ValueError("NaN detected") + relu.forward(inputs) + +# ──────────────────────────────── +# BACKWARD TESTS +# ──────────────────────────────── + +def test_backward_basic(relu): + inputs = np.array([[-1, 0, 2]]) + relu.forward(inputs) + dvalues = np.array([[1, 1, 1]]) + dinputs = relu.backward(dvalues) + expected = np.array([[0, 0, 1]]) + np.testing.assert_array_equal(dinputs, expected) + +def test_backward_preserves_shape(relu): + inputs = np.random.randn(3, 3) + relu.forward(inputs) + dvalues = np.random.randn(3, 3) + dinputs = relu.backward(dvalues) + assert dinputs.shape == dvalues.shape + +def test_backward_zeros_input(relu): + inputs = np.zeros((2, 2)) + relu.forward(inputs) + dvalues = np.ones((2, 2)) + expected = np.zeros((2, 2)) + np.testing.assert_array_equal(relu.backward(dvalues), expected) + +def test_backward_random_inputs(relu): + inputs = np.array([[0.5, -0.5], [-1.0, 1.0]]) + relu.forward(inputs) + dvalues = np.ones_like(inputs) + expected = np.array([[1, 0], [0, 1]]) + np.testing.assert_array_equal(relu.backward(dvalues), expected) + +def test_backward_with_zero_dvalues(relu): + inputs = np.array([[1, -1]]) + relu.forward(inputs) + dvalues = np.zeros_like(inputs) + expected = np.zeros_like(inputs) + np.testing.assert_array_equal(relu.backward(dvalues), expected) + +def test_backward_all_positive_inputs(relu): + inputs = np.array([[2, 3]]) + relu.forward(inputs) + dvalues = np.array([[5, 6]]) + np.testing.assert_array_equal(relu.backward(dvalues), dvalues) + +def test_backward_all_negative_inputs(relu): + inputs = np.array([[-2, -3]]) + relu.forward(inputs) + dvalues = np.array([[5, 6]]) + expected = np.array([[0, 0]]) + np.testing.assert_array_equal(relu.backward(dvalues), expected) + +# ──────────────────────────────── +# MISC/EDGE CASES +# ──────────────────────────────── + +def test_forward_large_matrix(relu): + inputs = np.random.uniform(-100, 100, size=(1000, 1000)) + output = relu.forward(inputs) + assert np.all(output[inputs < 0] == 0) + assert np.all(output[inputs >= 0] == inputs[inputs >= 0]) + +def test_backward_gradient_flow(relu): + inputs = np.random.randn(10, 10) + dvalues = np.ones((10, 10)) + relu.forward(inputs) + dinputs = relu.backward(dvalues) + assert np.all(dinputs[inputs <= 0] == 0) + assert np.all(dinputs[inputs > 0] == 1) + +def test_forward_backward_consistency(relu): + inputs = np.random.randn(10, 10) + dvalues = np.random.randn(10, 10) + relu.forward(inputs) + dinputs = relu.backward(dvalues) + mask = inputs > 0 + expected = dvalues * mask + np.testing.assert_array_equal(dinputs, expected) diff --git a/tests/test_activations/test_sigmoid.py b/tests/test_activations/test_sigmoid.py new file mode 100644 index 0000000..9efa7d3 --- /dev/null +++ b/tests/test_activations/test_sigmoid.py @@ -0,0 +1,141 @@ +import numpy as np +import pytest +from nnf.activations.sigmoid import Sigmoid + +@pytest.fixture +def sigmoid(): + return Sigmoid() + +# ───────────────────────────────────────── +# FORWARD TESTS +# ───────────────────────────────────────── + +def test_forward_basic(sigmoid): + inputs = np.array([[0, 1, -1]]) + output = sigmoid.forward(inputs) + expected = 1 / (1 + np.exp(-inputs)) + np.testing.assert_array_almost_equal(output, expected) + +def test_forward_output_range(sigmoid): + inputs = np.linspace(-1000, 1000, num=10).reshape(2, 5) + output = sigmoid.forward(inputs) + assert np.all(output >= 0) and np.all(output <= 1) + +def test_forward_zero(sigmoid): + inputs = np.array([[0]]) + output = sigmoid.forward(inputs) + assert np.allclose(output, 0.5) + +def test_forward_large_positive(sigmoid): + inputs = np.array([[1000]]) + output = sigmoid.forward(inputs) + assert np.allclose(output, 1.0, atol=1e-6) + +def test_forward_large_negative(sigmoid): + inputs = np.array([[-1000]]) + output = sigmoid.forward(inputs) + assert np.allclose(output, 0.0, atol=1e-6) + +def test_forward_preserves_shape(sigmoid): + inputs = np.random.randn(4, 4) + output = sigmoid.forward(inputs) + assert output.shape == inputs.shape + +def test_forward_numerical_stability(sigmoid): + inputs = np.array([[1000, -1000]]) + output = sigmoid.forward(inputs) + assert np.isfinite(output).all() + +def test_forward_inf_values(sigmoid): + inputs = np.array([[np.inf, -np.inf]]) + output = sigmoid.forward(inputs) + expected = np.array([[1.0, 0.0]]) + np.testing.assert_array_almost_equal(output, expected) + +def test_forward_nan_raises(sigmoid): + inputs = np.array([[1.0, np.nan]]) + with pytest.raises(ValueError): + if np.isnan(inputs).any(): + raise ValueError("NaN input detected") + sigmoid.forward(inputs) + +# ───────────────────────────────────────── +# BACKWARD TESTS +# ───────────────────────────────────────── + +def test_backward_basic(sigmoid): + inputs = np.array([[0.0, 1.0]]) + sigmoid.forward(inputs) + dvalues = np.array([[1.0, 1.0]]) + dinputs = sigmoid.backward(dvalues) + expected = dvalues * (sigmoid.output * (1 - sigmoid.output)) + np.testing.assert_array_almost_equal(dinputs, expected) + +def test_backward_zero_gradient(sigmoid): + inputs = np.random.randn(3, 3) + dvalues = np.zeros_like(inputs) + sigmoid.forward(inputs) + dinputs = sigmoid.backward(dvalues) + expected = np.zeros_like(inputs) + np.testing.assert_array_equal(dinputs, expected) + +def test_backward_preserves_shape(sigmoid): + inputs = np.random.randn(2, 5) + dvalues = np.random.randn(2, 5) + sigmoid.forward(inputs) + dinputs = sigmoid.backward(dvalues) + assert dinputs.shape == inputs.shape + +def test_backward_with_ones(sigmoid): + inputs = np.random.randn(3, 3) + dvalues = np.ones((3, 3)) + sigmoid.forward(inputs) + dinputs = sigmoid.backward(dvalues) + expected = sigmoid.output * (1 - sigmoid.output) + np.testing.assert_array_almost_equal(dinputs, expected) + +def test_backward_with_large_values(sigmoid): + inputs = np.array([[1000, -1000]]) + dvalues = np.array([[1.0, 1.0]]) + sigmoid.forward(inputs) + dinputs = sigmoid.backward(dvalues) + # should be close to 0 due to vanishing gradient + assert np.all(dinputs < 1e-3) + +# ───────────────────────────────────────── +# EDGE CASES +# ───────────────────────────────────────── + +def test_forward_backward_consistency(sigmoid): + inputs = np.random.randn(10, 10) + dvalues = np.random.randn(10, 10) + sigmoid.forward(inputs) + dinputs = sigmoid.backward(dvalues) + expected = dvalues * sigmoid.output * (1 - sigmoid.output) + np.testing.assert_array_almost_equal(dinputs, expected) + +def test_forward_high_dimensional_input(sigmoid): + inputs = np.random.randn(5, 4, 3) + output = sigmoid.forward(inputs) + assert output.shape == inputs.shape + +def test_backward_high_dimensional_input(sigmoid): + inputs = np.random.randn(2, 3, 4) + dvalues = np.ones((2, 3, 4)) + sigmoid.forward(inputs) + dinputs = sigmoid.backward(dvalues) + assert dinputs.shape == inputs.shape + +def test_forward_extremely_small_values(sigmoid): + inputs = np.array([[-1e-10, 1e-10]]) + output = sigmoid.forward(inputs) + expected = 1 / (1 + np.exp(-inputs)) + np.testing.assert_array_almost_equal(output, expected) + +def test_backward_extremely_small_values(sigmoid): + inputs = np.array([[1e-10]]) + dvalues = np.array([[1.0]]) + sigmoid.forward(inputs) + dinputs = sigmoid.backward(dvalues) + expected = dvalues * (sigmoid.output * (1 - sigmoid.output)) + np.testing.assert_array_almost_equal(dinputs, expected) diff --git a/tests/test_losses/test_binary_cross_entropy.py b/tests/test_losses/test_binary_cross_entropy.py new file mode 100644 index 0000000..6ae8793 --- /dev/null +++ b/tests/test_losses/test_binary_cross_entropy.py @@ -0,0 +1,85 @@ +import numpy as np +import pytest +from nnf.losses.binary_cross_entropy import BinaryCrossEntropy + + +def test_initialization(): + bce = BinaryCrossEntropy() + assert bce.output is None + assert bce.dinputs is None + + +def test_forward_perfect_prediction(): + bce = BinaryCrossEntropy() + y_true = np.array([[1], [0], [1], [0]]) + y_pred = np.array([[1], [0], [1], [0]]) + + # Due to clipping, perfect prediction will not give exactly 0 + loss = bce.forward(y_pred, y_true) + assert np.isclose(loss, 0, atol=1e-6) + + +def test_forward_worst_prediction(): + bce = BinaryCrossEntropy() + y_true = np.array([[1], [0], [1], [0]]) + y_pred = np.array([[0], [1], [0], [1]]) + + # Due to clipping, these will not be exactly 0 and 1 + y_pred = np.clip(y_pred, 1e-7, 1 - 1e-7) + + loss = bce.forward(y_pred, y_true) + # Loss should be very high for worst predictions + assert loss > 10 + + +def test_forward_medium_prediction(): + bce = BinaryCrossEntropy() + y_true = np.array([[1], [0]]) + y_pred = np.array([[0.7], [0.3]]) + + expected_loss = -(1 * np.log(0.7) + (1 - 1) * np.log(1 - 0.7) + + 0 * np.log(0.3) + (1 - 0) * np.log(1 - 0.3)) / 2 + + loss = bce.forward(y_pred, y_true) + assert np.isclose(loss, expected_loss) + + +def test_backward(): + bce = BinaryCrossEntropy() + y_true = np.array([[1], [0]]) + y_pred = np.array([[0.7], [0.3]]) + + bce.forward(y_pred, y_true) # Call forward first to simulate normal usage + gradients = bce.backward(y_pred, y_true) + + # Calculate expected gradients manually + samples = len(y_pred) + expected_gradients = -(y_true / y_pred - (1 - y_true) / (1 - y_pred)) / samples + + assert np.allclose(gradients, expected_gradients) + + +def test_backward_shape(): + bce = BinaryCrossEntropy() + batch_size = 32 + feature_size = 1 + + y_true = np.random.randint(0, 2, size=(batch_size, feature_size)) + y_pred = np.random.random(size=(batch_size, feature_size)) + + gradients = bce.backward(y_pred, y_true) + + assert gradients.shape == y_pred.shape + + +def test_output_range(): + bce = BinaryCrossEntropy() + y_true = np.array([[1], [0], [1], [0]]) + + # Test with various prediction probabilities + for _ in range(10): + y_pred = np.random.random(size=(4, 1)) + loss = bce.forward(y_pred, y_true) + + # BCE loss should always be positive + assert loss >= 0 \ No newline at end of file diff --git a/tests/test_model/test_model.py b/tests/test_model/test_model.py new file mode 100644 index 0000000..575cf89 --- /dev/null +++ b/tests/test_model/test_model.py @@ -0,0 +1,61 @@ +import numpy as np +import pytest +from nnf.layers.dense import Dense +from nnf.losses import MSE +from nnf.optimizers.gradient_descent import GradientDescent +from nnf.models import Model +from nnf.activations import ReLU, Sigmoid + +@pytest.fixture +def mock_data(): + X = np.random.randn(100, 3) + y = np.random.randn(100, 1) + return X, y + +@pytest.fixture +def simple_model(): + model = Model( + Dense(3, 5), + ReLU(), + Dense(5, 1), + Sigmoid() + ) + loss = MSE() + optimizer = GradientDescent(learning_rate=0.01) + model.set(loss, optimizer) + return model + +def test_train_and_predict(mock_data, simple_model): + X, y = mock_data + model = simple_model + + model.train(X, y, epochs=1, batch_size=32) + + predictions = model.predict(X) + + assert predictions.shape == (X.shape[0], 1), f"Expected prediction shape: {(X.shape[0], 1)}, but got: {predictions.shape}" + + initial_loss = model.loss.calculate(model.forward(X), y) + final_loss = model.loss.calculate(predictions, y) + assert final_loss <= initial_loss, "Model did not reduce the loss during training" + +# def test_model_summary(simple_model): +# model = simple_model + +# # Capture the output of the summary +# from io import StringIO +# import sys + +# # Redirect stdout to capture print output +# captured_output = StringIO() +# sys.stdout = captured_output + +# # Call the summary method +# model.summary() + +# # Check if the summary includes expected information +# assert "Total Layers: 2" in captured_output.getvalue(), "Model summary does not include total layers" +# assert "Total parameters" in captured_output.getvalue(), "Model summary does not include total parameters" + +# # Reset redirect. +# sys.stdout = sys.__stdout__ diff --git a/tests/test_optimizers/test_gd.py b/tests/test_optimizers/test_gd.py new file mode 100644 index 0000000..20286c1 --- /dev/null +++ b/tests/test_optimizers/test_gd.py @@ -0,0 +1,49 @@ +import numpy as np +import pytest +from nnf.optimizers import GradientDescent + +class DummyLayer: + def __init__(self, weights, biases=None): + self.weights = weights + self.biases = biases + self.dweights = np.zeros_like(weights) + self.dbiases = np.zeros_like(biases) if biases is not None else None + +def test_update_weights_and_biases(): + initial_weights = np.array([[0.5, -0.5], [0.3, 0.3]]) + initial_biases = np.array([0.1, -0.1]) + layer = DummyLayer(initial_weights, initial_biases) + layer.dweights = np.array([[0.1, -0.1], [-0.1, 0.1]]) + layer.dbiases = np.array([0.05, -0.05]) + optimizer = GradientDescent(learning_rate=0.1) + optimizer.update_params(layer) + expected_weights = initial_weights - 0.1 * layer.dweights + expected_biases = initial_biases - 0.1 * layer.dbiases + np.testing.assert_array_almost_equal(layer.weights, expected_weights) + np.testing.assert_array_almost_equal(layer.biases, expected_biases) + +def test_update_weights_and_biases(): + initial_weights = np.array([[0.5, -0.5], [0.3, 0.3]]) + initial_biases = np.array([0.1, -0.1]) + layer = DummyLayer(initial_weights, initial_biases) + layer.dweights = np.array([[0.1, -0.1], [-0.1, 0.1]]) + layer.dbiases = np.array([0.05, -0.05]) + optimizer = GradientDescent(learning_rate=0.1) + optimizer.update_params(layer) + expected_weights = np.array([[0.49, -0.49], [0.31, 0.29]]) + expected_biases = np.array([0.095, -0.095]) + np.testing.assert_array_almost_equal(layer.weights, expected_weights, decimal=5) + np.testing.assert_array_almost_equal(layer.biases, expected_biases, decimal=5) + +def test_zero_gradients(): + initial_weights = np.array([[0.5, -0.5], [0.3, 0.3]]) + initial_biases = np.array([0.1, -0.1]) + layer = DummyLayer(initial_weights, initial_biases) + layer.dweights = np.zeros_like(initial_weights) + layer.dbiases = np.zeros_like(initial_biases) + optimizer = GradientDescent(learning_rate=0.1) + optimizer.update_params(layer) + np.testing.assert_array_almost_equal(layer.weights, initial_weights) + np.testing.assert_array_almost_equal(layer.biases, initial_biases) + +