From 65464643b8ab06ce98ea1c74b0cf4c6d3bf0cee7 Mon Sep 17 00:00:00 2001 From: Jan Tilly Date: Wed, 25 Mar 2026 08:46:45 +0100 Subject: [PATCH 1/4] Test sckit-learn compatiblity with default parameters. --- tests/glm/test_glm_base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/glm/test_glm_base.py b/tests/glm/test_glm_base.py index ebf4dee4..d3a8ef0b 100644 --- a/tests/glm/test_glm_base.py +++ b/tests/glm/test_glm_base.py @@ -25,6 +25,8 @@ GLM_SOLVERS = ["irls-ls", "lbfgs", "irls-cd", "trust-constr", "closed-form"] estimators = [ + (GeneralizedLinearRegressor, {}), + (GeneralizedLinearRegressorCV, {}), (GeneralizedLinearRegressor, {"alpha": 1.0}), (GeneralizedLinearRegressorCV, {"n_alphas": 2}), ] From 299d9a1b502e3871b2655948aa19523cda34e899 Mon Sep 17 00:00:00 2001 From: Jan Tilly Date: Wed, 25 Mar 2026 09:13:12 +0100 Subject: [PATCH 2/4] Fix numerical instability in unregularized closed-form solver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When alpha=0 (no L2 penalty), the closed-form solver previously built normal equations (H = X'WX) and solved them with scipy.linalg.solve. Forming normal equations squares the condition number; for underdetermined systems this caused scipy to return an inaccurate result that differed depending on whether sample weights were supplied as explicit weights or as repeated rows – violating sklearn's check_sample_weight_equivalence contract. Fix: when P2 = 0, solve the equivalent WLS problem directly via lstsq on the sqrt-weighted data matrix (sqrt(W)·X, sqrt(W)·y). This matches the approach used by sklearn.linear_model.LinearRegression, avoids squaring the condition number, and gives a unique minimum-norm solution that is consistent across the two representations. When P2 != 0 the L2 penalty already makes the normal equations well-conditioned, so that path is retained unchanged. Co-Authored-By: Claude Sonnet 4.6 --- src/glum/_linalg.py | 44 ++++++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/src/glum/_linalg.py b/src/glum/_linalg.py index 9ccb4ea5..94f51c7c 100644 --- a/src/glum/_linalg.py +++ b/src/glum/_linalg.py @@ -123,45 +123,61 @@ def _solve_least_squares_tikhonov( Learning*, 2nd ed., Springer, Section 3.4.1, Eq. 3.44. """ y_minus_offset = y if offset is None else (y - offset) - weighted_y = sample_weight * y_minus_offset is_sparse_p2 = sparse.issparse(P2) if is_sparse_p2: has_l2_penalty = np.any(P2.data != 0) - # Detect "diagonal sparse" and handle without densifying. - is_diag_p2 = sparse.isspmatrix_dia(P2) or ( - sparse.isspmatrix(P2) - and (P2.nnz <= P2.shape[0]) - and np.all(P2.tocoo().row == P2.tocoo().col) - ) else: has_l2_penalty = np.any(P2 != 0) - is_diag_p2 = getattr(P2, "ndim", 0) == 1 + + if not has_l2_penalty: + # Use direct WLS via sqrt-weighted data (avoids squaring the condition + # number of the normal equations) for maximum numerical stability. + sw_sqrt = np.sqrt(sample_weight) + X_arr = X.toarray() + A = X_arr * sw_sqrt[:, None] + b = sw_sqrt * y_minus_offset + if fit_intercept: + # Prepend a column of sqrt(w) for the unpenalised intercept. + A = np.column_stack([sw_sqrt, A]) + return linalg.lstsq(A, b)[0] + + # ------------------------------------------------------------------ + # P2 != 0: the L2 penalty makes the normal equations well-conditioned + # so the Hessian approach remains efficient and stable. + # ------------------------------------------------------------------ + weighted_y = sample_weight * y_minus_offset + is_diag_p2 = (not is_sparse_p2 and getattr(P2, "ndim", 0) == 1) or ( + is_sparse_p2 + and ( + sparse.isspmatrix_dia(P2) + or (P2.nnz <= P2.shape[0] and np.all(P2.tocoo().row == P2.tocoo().col)) + ) + ) if fit_intercept: hessian = _safe_sandwich_dot(X, sample_weight, intercept=True) rhs = np.empty(X.shape[1] + 1, dtype=X.dtype) rhs[0] = weighted_y.sum() rhs[1:] = X.transpose_matvec(weighted_y) - if has_l2_penalty and is_diag_p2: + if is_diag_p2: diag_idx = np.arange(1, hessian.shape[0]) diag = P2 if not is_sparse_p2 else P2.diagonal() hessian[(diag_idx, diag_idx)] += diag - elif has_l2_penalty: + else: hessian[1:, 1:] += safe_toarray(P2) else: hessian = _safe_sandwich_dot(X, sample_weight) rhs = X.transpose_matvec(weighted_y) - if has_l2_penalty and is_diag_p2: + if is_diag_p2: diag = P2 if not is_sparse_p2 else P2.diagonal() hessian[np.diag_indices_from(hessian)] += diag - elif has_l2_penalty: + else: hessian += safe_toarray(P2) # With nonzero L2 penalty this system is often SPD, so we try the faster # Cholesky-oriented path (assume_a="pos"); fallback handles failures. - assume_a = "pos" if has_l2_penalty else "sym" try: - coef = linalg.solve(hessian, rhs, assume_a=assume_a) + coef = linalg.solve(hessian, rhs, assume_a="pos") except linalg.LinAlgError: # OLS can be singular / rank-deficient (e.g. collinearity). Use # minimum-norm least squares solution as a robust closed-form fallback. From 9c5a52091e52f25bd7167ebe7e1be27191efa1fb Mon Sep 17 00:00:00 2001 From: Jan Tilly Date: Wed, 25 Mar 2026 09:24:56 +0100 Subject: [PATCH 3/4] Add changelog entry for closed-form solver fix --- CHANGELOG.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 283b08ed..115d8a81 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -15,6 +15,10 @@ Changelog - :class:`~glum.GeneralizedLinearRegressorCV` now exposes ``train_deviance_path_``, an array of shape ``(n_folds, n_l1_ratios, n_alphas)`` with the training-set deviance. +**Bug fix:** + +- Fixed numerical instability in the ``closed-form`` solver for unregularized models (``alpha=0``). The solver now uses weighted least squares directly on the square-root–weighted data matrix instead of forming the normal equations, matching the approach of ``sklearn.linear_model.LinearRegression`` and ensuring that fitting with explicit sample weights gives the same result as fitting with repeated rows. + 3.2.3 - 2026-03-18 ------------------ From 0cd07ed33ade4dbe47f32b14ca18bc2f84e2e658 Mon Sep 17 00:00:00 2001 From: Jan Tilly Date: Wed, 25 Mar 2026 11:09:08 +0100 Subject: [PATCH 4/4] Fix closed-form solver to use explicit cond for platform-consistent rank detection --- src/glum/_linalg.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/glum/_linalg.py b/src/glum/_linalg.py index 94f51c7c..3d466e6a 100644 --- a/src/glum/_linalg.py +++ b/src/glum/_linalg.py @@ -130,16 +130,25 @@ def _solve_least_squares_tikhonov( has_l2_penalty = np.any(P2 != 0) if not has_l2_penalty: - # Use direct WLS via sqrt-weighted data (avoids squaring the condition - # number of the normal equations) for maximum numerical stability. + # Use direct WLS via the sqrt-weighted data matrix (avoids squaring the + # condition number of the normal equations). We set cond explicitly to + # n_cols * eps (matching sklearn's LinearRegression) rather than using + # the default (eps), which sits right at the boundary of the near-zero + # singular values. The default threshold is fragile: different LAPACK + # implementations for differently-shaped matrices (e.g. 15×31 weighted + # vs 27×31 repeated-rows) can compute near-zero singular values + # slightly differently, leading to inconsistent rank detection across + # platforms. n_cols * eps comfortably zeros out all null-space + # directions regardless of platform. sw_sqrt = np.sqrt(sample_weight) X_arr = X.toarray() A = X_arr * sw_sqrt[:, None] - b = sw_sqrt * y_minus_offset + b = sw_sqrt * (y_minus_offset) if fit_intercept: # Prepend a column of sqrt(w) for the unpenalised intercept. A = np.column_stack([sw_sqrt, A]) - return linalg.lstsq(A, b)[0] + cond = A.shape[1] * np.finfo(A.dtype).eps + return linalg.lstsq(A, b, cond=cond)[0] # ------------------------------------------------------------------ # P2 != 0: the L2 penalty makes the normal equations well-conditioned