diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 283b08ed..115d8a81 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -15,6 +15,10 @@ Changelog - :class:`~glum.GeneralizedLinearRegressorCV` now exposes ``train_deviance_path_``, an array of shape ``(n_folds, n_l1_ratios, n_alphas)`` with the training-set deviance. +**Bug fix:** + +- Fixed numerical instability in the ``closed-form`` solver for unregularized models (``alpha=0``). The solver now uses weighted least squares directly on the square-root–weighted data matrix instead of forming the normal equations, matching the approach of ``sklearn.linear_model.LinearRegression`` and ensuring that fitting with explicit sample weights gives the same result as fitting with repeated rows. + 3.2.3 - 2026-03-18 ------------------ diff --git a/src/glum/_linalg.py b/src/glum/_linalg.py index 9ccb4ea5..3d466e6a 100644 --- a/src/glum/_linalg.py +++ b/src/glum/_linalg.py @@ -123,45 +123,70 @@ def _solve_least_squares_tikhonov( Learning*, 2nd ed., Springer, Section 3.4.1, Eq. 3.44. """ y_minus_offset = y if offset is None else (y - offset) - weighted_y = sample_weight * y_minus_offset is_sparse_p2 = sparse.issparse(P2) if is_sparse_p2: has_l2_penalty = np.any(P2.data != 0) - # Detect "diagonal sparse" and handle without densifying. - is_diag_p2 = sparse.isspmatrix_dia(P2) or ( - sparse.isspmatrix(P2) - and (P2.nnz <= P2.shape[0]) - and np.all(P2.tocoo().row == P2.tocoo().col) - ) else: has_l2_penalty = np.any(P2 != 0) - is_diag_p2 = getattr(P2, "ndim", 0) == 1 + + if not has_l2_penalty: + # Use direct WLS via the sqrt-weighted data matrix (avoids squaring the + # condition number of the normal equations). We set cond explicitly to + # n_cols * eps (matching sklearn's LinearRegression) rather than using + # the default (eps), which sits right at the boundary of the near-zero + # singular values. The default threshold is fragile: different LAPACK + # implementations for differently-shaped matrices (e.g. 15×31 weighted + # vs 27×31 repeated-rows) can compute near-zero singular values + # slightly differently, leading to inconsistent rank detection across + # platforms. n_cols * eps comfortably zeros out all null-space + # directions regardless of platform. + sw_sqrt = np.sqrt(sample_weight) + X_arr = X.toarray() + A = X_arr * sw_sqrt[:, None] + b = sw_sqrt * (y_minus_offset) + if fit_intercept: + # Prepend a column of sqrt(w) for the unpenalised intercept. + A = np.column_stack([sw_sqrt, A]) + cond = A.shape[1] * np.finfo(A.dtype).eps + return linalg.lstsq(A, b, cond=cond)[0] + + # ------------------------------------------------------------------ + # P2 != 0: the L2 penalty makes the normal equations well-conditioned + # so the Hessian approach remains efficient and stable. + # ------------------------------------------------------------------ + weighted_y = sample_weight * y_minus_offset + is_diag_p2 = (not is_sparse_p2 and getattr(P2, "ndim", 0) == 1) or ( + is_sparse_p2 + and ( + sparse.isspmatrix_dia(P2) + or (P2.nnz <= P2.shape[0] and np.all(P2.tocoo().row == P2.tocoo().col)) + ) + ) if fit_intercept: hessian = _safe_sandwich_dot(X, sample_weight, intercept=True) rhs = np.empty(X.shape[1] + 1, dtype=X.dtype) rhs[0] = weighted_y.sum() rhs[1:] = X.transpose_matvec(weighted_y) - if has_l2_penalty and is_diag_p2: + if is_diag_p2: diag_idx = np.arange(1, hessian.shape[0]) diag = P2 if not is_sparse_p2 else P2.diagonal() hessian[(diag_idx, diag_idx)] += diag - elif has_l2_penalty: + else: hessian[1:, 1:] += safe_toarray(P2) else: hessian = _safe_sandwich_dot(X, sample_weight) rhs = X.transpose_matvec(weighted_y) - if has_l2_penalty and is_diag_p2: + if is_diag_p2: diag = P2 if not is_sparse_p2 else P2.diagonal() hessian[np.diag_indices_from(hessian)] += diag - elif has_l2_penalty: + else: hessian += safe_toarray(P2) # With nonzero L2 penalty this system is often SPD, so we try the faster # Cholesky-oriented path (assume_a="pos"); fallback handles failures. - assume_a = "pos" if has_l2_penalty else "sym" try: - coef = linalg.solve(hessian, rhs, assume_a=assume_a) + coef = linalg.solve(hessian, rhs, assume_a="pos") except linalg.LinAlgError: # OLS can be singular / rank-deficient (e.g. collinearity). Use # minimum-norm least squares solution as a robust closed-form fallback. diff --git a/tests/glm/test_glm_base.py b/tests/glm/test_glm_base.py index ebf4dee4..d3a8ef0b 100644 --- a/tests/glm/test_glm_base.py +++ b/tests/glm/test_glm_base.py @@ -25,6 +25,8 @@ GLM_SOLVERS = ["irls-ls", "lbfgs", "irls-cd", "trust-constr", "closed-form"] estimators = [ + (GeneralizedLinearRegressor, {}), + (GeneralizedLinearRegressorCV, {}), (GeneralizedLinearRegressor, {"alpha": 1.0}), (GeneralizedLinearRegressorCV, {"n_alphas": 2}), ]