diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 9b4105d7..a389d25e 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -7,6 +7,14 @@ Changelog ========= +3.2.3 - 2026-03-18 +------------------ + +**Bug fix:** + +- Fixed incorrect call in :meth:`~glum.InverseGaussianDistribution.log_likelihood`. The previous implementation always returned NaN. + + 3.2.2 - 2026-03-17 ------------------ @@ -14,6 +22,7 @@ Changelog - Fixed incorrect formula in :meth:`~glum.CloglogLink.inverse_derivative2`. This affected observed information matrix computation and robust/clustered standard errors for models using the complementary log-log link. + 3.2.1 - 2026-03-16 ------------------ diff --git a/src/glum/_distribution.py b/src/glum/_distribution.py index 8f968629..bb6f8a09 100644 --- a/src/glum/_distribution.py +++ b/src/glum/_distribution.py @@ -1145,7 +1145,7 @@ def deviance(self, y, mu, sample_weight=None) -> float: # noqa D y, mu, sample_weight = _as_float_arrays(y, mu, sample_weight) sample_weight = np.ones_like(y) if sample_weight is None else sample_weight - return tweedie_deviance(y, sample_weight, mu, p=3.0) + return inv_gaussian_deviance(y, sample_weight, mu, dispersion=1.0) def unit_deviance(self, y, mu): # noqa D return numexpr.evaluate("y / (mu**2) + 1 / y - 2 / mu") @@ -1205,7 +1205,7 @@ def log_likelihood(self, y, mu, sample_weight=None, dispersion=None) -> float: if dispersion is None: dispersion = self.dispersion(y, mu, sample_weight) - return tweedie_log_likelihood(y, sample_weight, mu, 3.0, float(dispersion)) + return inv_gaussian_log_likelihood(y, sample_weight, mu, float(dispersion)) def dispersion( # noqa D self, y, mu, sample_weight=None, ddof=1, method="pearson" diff --git a/src/glum/_functions.pyx b/src/glum/_functions.pyx index ea70effb..deab5aeb 100644 --- a/src/glum/_functions.pyx +++ b/src/glum/_functions.pyx @@ -69,10 +69,10 @@ def normal_log_likelihood( floating dispersion, ): cdef int i # loop counter - cdef floating sum_weights # helper cdef int n = y.shape[0] # loop length cdef floating ll = 0.0 # output + cdef floating sum_weights = 0.0 # helper for i in prange(n, nogil=True): ll -= weights[i] * (y[i] - mu[i]) ** 2 @@ -203,13 +203,15 @@ def gamma_log_likelihood( floating dispersion, ): cdef int i # loop counter - cdef floating ln_y, sum_weights # helpers + cdef floating ln_y # helper - cdef int n = y.shape[0] # loop length - cdef floating ll = 0.0 # output cdef floating inv_dispersion = 1 / dispersion cdef floating normalization = log(dispersion) * inv_dispersion + lgamma(inv_dispersion) + cdef int n = y.shape[0] # loop length + cdef floating ll = 0.0 # output + cdef floating sum_weights = 0.0 # helper + for i in prange(n, nogil=True): ln_y = log(y[i]) ll += weights[i] * (inv_dispersion * (ln_y - log(mu[i]) - y[i] / mu[i]) - ln_y) @@ -287,22 +289,23 @@ def inv_gaussian_log_likelihood( const_floating1d mu, floating dispersion, ): - cdef int n = y.shape[0] # loop length cdef int i # loop counter - cdef floating sum_weights # helper - cdef floating ll = 0.0 # output + cdef floating scale = 1 / (2 * dispersion) cdef floating sq_err # helper - cdef floating inv_dispersion = 1 / (2 * dispersion) # helper + + cdef int n = y.shape[0] # loop length + cdef floating sum_weights = 0.0 # helper + cdef floating ll = 0.0 # output for i in prange(n, nogil=True): sq_err = (y[i] / mu[i] - 1) ** 2 - ll -= weights[i] * (inv_dispersion * sq_err / y[i] + log(y[i]) * 3 / 2) - sum_weights -= weights[i] + ll -= weights[i] * (scale * sq_err / y[i] + log(y[i]) * 3 / 2) + sum_weights += weights[i] - return ll + sum_weights * log(inv_dispersion / M_PI) + return ll + sum_weights * log(scale / M_PI) / 2 def inv_gaussian_deviance( const_floating1d y, diff --git a/src/glum/_link.py b/src/glum/_link.py index 7fc3e962..34f36cf8 100644 --- a/src/glum/_link.py +++ b/src/glum/_link.py @@ -110,7 +110,7 @@ def __init__(self, power): def __eq__(self, other): # noqa D return isinstance(other, self.__class__) and (self.power == other.power) - def __tweedie__repr__(self): # noqa D + def __tweedie_repr__(self): # noqa D return self.__class__(self.power) @property diff --git a/tests/glm/test_distribution.py b/tests/glm/test_distribution.py index 4e5fdc50..a3d64155 100644 --- a/tests/glm/test_distribution.py +++ b/tests/glm/test_distribution.py @@ -1,6 +1,7 @@ import numpy as np import pytest import scipy as sp +import scipy.stats import tabmat as tm from glum._distribution import ( @@ -540,6 +541,34 @@ def test_binomial_deviance_dispersion_loglihood(weighted): np.testing.assert_approx_equal(ll, -3.365058) +@pytest.mark.parametrize("dispersion", [1, 2]) +def test_inv_gaussian_deviance_loglihood(dispersion): + + n = 1_000 + rng = np.random.default_rng(42) + + mu = rng.uniform(1, 2, size=n) + + y = rng.wald(mu, 1 / dispersion, size=n) + + candidate = InverseGaussianDistribution().log_likelihood(y, mu, None, dispersion) + + target = scipy.stats.invgauss.logpdf( + y, mu * dispersion, scale=(1 / dispersion) + ).sum() + + np.testing.assert_approx_equal(candidate, target) + + candidate = InverseGaussianDistribution().deviance(y, mu) + + target = 2 * ( + scipy.stats.invgauss.logpdf(y, y).sum() + - scipy.stats.invgauss.logpdf(y, mu).sum() + ) + + np.testing.assert_approx_equal(candidate, target) + + @pytest.mark.parametrize("weighted", [False, True]) def test_negative_binomial_deviance_dispersion_loglihood(weighted): # y <- c(0, 1, 0, 1, 0)