From 65464643b8ab06ce98ea1c74b0cf4c6d3bf0cee7 Mon Sep 17 00:00:00 2001
From: Jan Tilly <jan.tilly@quantco.com>
Date: Wed, 25 Mar 2026 08:46:45 +0100
Subject: [PATCH 1/4] Test sckit-learn compatiblity with default parameters.

---
 tests/glm/test_glm_base.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/glm/test_glm_base.py b/tests/glm/test_glm_base.py
index ebf4dee4..d3a8ef0b 100644
--- a/tests/glm/test_glm_base.py
+++ b/tests/glm/test_glm_base.py
@@ -25,6 +25,8 @@
 GLM_SOLVERS = ["irls-ls", "lbfgs", "irls-cd", "trust-constr", "closed-form"]
 
 estimators = [
+    (GeneralizedLinearRegressor, {}),
+    (GeneralizedLinearRegressorCV, {}),
     (GeneralizedLinearRegressor, {"alpha": 1.0}),
     (GeneralizedLinearRegressorCV, {"n_alphas": 2}),
 ]

From 299d9a1b502e3871b2655948aa19523cda34e899 Mon Sep 17 00:00:00 2001
From: Jan Tilly <jan.tilly@quantco.com>
Date: Wed, 25 Mar 2026 09:13:12 +0100
Subject: [PATCH 2/4] Fix numerical instability in unregularized closed-form
 solver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When alpha=0 (no L2 penalty), the closed-form solver previously built
normal equations (H = X'WX) and solved them with scipy.linalg.solve.
Forming normal equations squares the condition number; for
underdetermined systems this caused scipy to return an inaccurate result
that differed depending on whether sample weights were supplied as
explicit weights or as repeated rows – violating sklearn's
check_sample_weight_equivalence contract.

Fix: when P2 = 0, solve the equivalent WLS problem directly via lstsq on
the sqrt-weighted data matrix (sqrt(W)·X, sqrt(W)·y). This matches the
approach used by sklearn.linear_model.LinearRegression, avoids squaring
the condition number, and gives a unique minimum-norm solution that is
consistent across the two representations. When P2 != 0 the L2 penalty
already makes the normal equations well-conditioned, so that path is
retained unchanged.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/glum/_linalg.py | 44 ++++++++++++++++++++++++++++++--------------
 1 file changed, 30 insertions(+), 14 deletions(-)

diff --git a/src/glum/_linalg.py b/src/glum/_linalg.py
index 9ccb4ea5..94f51c7c 100644
--- a/src/glum/_linalg.py
+++ b/src/glum/_linalg.py
@@ -123,45 +123,61 @@ def _solve_least_squares_tikhonov(
     Learning*, 2nd ed., Springer, Section 3.4.1, Eq. 3.44.
     """
     y_minus_offset = y if offset is None else (y - offset)
-    weighted_y = sample_weight * y_minus_offset
     is_sparse_p2 = sparse.issparse(P2)
     if is_sparse_p2:
         has_l2_penalty = np.any(P2.data != 0)
-        # Detect "diagonal sparse" and handle without densifying.
-        is_diag_p2 = sparse.isspmatrix_dia(P2) or (
-            sparse.isspmatrix(P2)
-            and (P2.nnz <= P2.shape[0])
-            and np.all(P2.tocoo().row == P2.tocoo().col)
-        )
     else:
         has_l2_penalty = np.any(P2 != 0)
-        is_diag_p2 = getattr(P2, "ndim", 0) == 1
+
+    if not has_l2_penalty:
+        # Use direct WLS via sqrt-weighted data (avoids squaring the condition
+        # number of the normal equations) for maximum numerical stability.
+        sw_sqrt = np.sqrt(sample_weight)
+        X_arr = X.toarray()
+        A = X_arr * sw_sqrt[:, None]
+        b = sw_sqrt * y_minus_offset
+        if fit_intercept:
+            # Prepend a column of sqrt(w) for the unpenalised intercept.
+            A = np.column_stack([sw_sqrt, A])
+        return linalg.lstsq(A, b)[0]
+
+    # ------------------------------------------------------------------
+    # P2 != 0: the L2 penalty makes the normal equations well-conditioned
+    # so the Hessian approach remains efficient and stable.
+    # ------------------------------------------------------------------
+    weighted_y = sample_weight * y_minus_offset
+    is_diag_p2 = (not is_sparse_p2 and getattr(P2, "ndim", 0) == 1) or (
+        is_sparse_p2
+        and (
+            sparse.isspmatrix_dia(P2)
+            or (P2.nnz <= P2.shape[0] and np.all(P2.tocoo().row == P2.tocoo().col))
+        )
+    )
 
     if fit_intercept:
         hessian = _safe_sandwich_dot(X, sample_weight, intercept=True)
         rhs = np.empty(X.shape[1] + 1, dtype=X.dtype)
         rhs[0] = weighted_y.sum()
         rhs[1:] = X.transpose_matvec(weighted_y)
-        if has_l2_penalty and is_diag_p2:
+        if is_diag_p2:
             diag_idx = np.arange(1, hessian.shape[0])
             diag = P2 if not is_sparse_p2 else P2.diagonal()
             hessian[(diag_idx, diag_idx)] += diag
-        elif has_l2_penalty:
+        else:
             hessian[1:, 1:] += safe_toarray(P2)
     else:
         hessian = _safe_sandwich_dot(X, sample_weight)
         rhs = X.transpose_matvec(weighted_y)
-        if has_l2_penalty and is_diag_p2:
+        if is_diag_p2:
             diag = P2 if not is_sparse_p2 else P2.diagonal()
             hessian[np.diag_indices_from(hessian)] += diag
-        elif has_l2_penalty:
+        else:
             hessian += safe_toarray(P2)
 
     # With nonzero L2 penalty this system is often SPD, so we try the faster
     # Cholesky-oriented path (assume_a="pos"); fallback handles failures.
-    assume_a = "pos" if has_l2_penalty else "sym"
     try:
-        coef = linalg.solve(hessian, rhs, assume_a=assume_a)
+        coef = linalg.solve(hessian, rhs, assume_a="pos")
     except linalg.LinAlgError:
         # OLS can be singular / rank-deficient (e.g. collinearity). Use
         # minimum-norm least squares solution as a robust closed-form fallback.

From 9c5a52091e52f25bd7167ebe7e1be27191efa1fb Mon Sep 17 00:00:00 2001
From: Jan Tilly <jan.tilly@quantco.com>
Date: Wed, 25 Mar 2026 09:24:56 +0100
Subject: [PATCH 3/4] Add changelog entry for closed-form solver fix

---
 CHANGELOG.rst | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 283b08ed..115d8a81 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -15,6 +15,10 @@ Changelog
 
 - :class:`~glum.GeneralizedLinearRegressorCV` now exposes ``train_deviance_path_``, an array of shape ``(n_folds, n_l1_ratios, n_alphas)`` with the training-set deviance.
 
+**Bug fix:**
+
+- Fixed numerical instability in the ``closed-form`` solver for unregularized models (``alpha=0``). The solver now uses weighted least squares directly on the square-root–weighted data matrix instead of forming the normal equations, matching the approach of ``sklearn.linear_model.LinearRegression`` and ensuring that fitting with explicit sample weights gives the same result as fitting with repeated rows.
+
 
 3.2.3 - 2026-03-18
 ------------------

From 0cd07ed33ade4dbe47f32b14ca18bc2f84e2e658 Mon Sep 17 00:00:00 2001
From: Jan Tilly <jan.tilly@quantco.com>
Date: Wed, 25 Mar 2026 11:09:08 +0100
Subject: [PATCH 4/4] Fix closed-form solver to use explicit cond for
 platform-consistent rank detection

---
 src/glum/_linalg.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/glum/_linalg.py b/src/glum/_linalg.py
index 94f51c7c..3d466e6a 100644
--- a/src/glum/_linalg.py
+++ b/src/glum/_linalg.py
@@ -130,16 +130,25 @@ def _solve_least_squares_tikhonov(
         has_l2_penalty = np.any(P2 != 0)
 
     if not has_l2_penalty:
-        # Use direct WLS via sqrt-weighted data (avoids squaring the condition
-        # number of the normal equations) for maximum numerical stability.
+        # Use direct WLS via the sqrt-weighted data matrix (avoids squaring the
+        # condition number of the normal equations). We set cond explicitly to
+        # n_cols * eps (matching sklearn's LinearRegression) rather than using
+        # the default (eps), which sits right at the boundary of the near-zero
+        # singular values. The default threshold is fragile: different LAPACK
+        # implementations for differently-shaped matrices (e.g. 15×31 weighted
+        # vs 27×31 repeated-rows) can compute near-zero singular values
+        # slightly differently, leading to inconsistent rank detection across
+        # platforms.  n_cols * eps comfortably zeros out all null-space
+        # directions regardless of platform.
         sw_sqrt = np.sqrt(sample_weight)
         X_arr = X.toarray()
         A = X_arr * sw_sqrt[:, None]
-        b = sw_sqrt * y_minus_offset
+        b = sw_sqrt * (y_minus_offset)
         if fit_intercept:
             # Prepend a column of sqrt(w) for the unpenalised intercept.
             A = np.column_stack([sw_sqrt, A])
-        return linalg.lstsq(A, b)[0]
+        cond = A.shape[1] * np.finfo(A.dtype).eps
+        return linalg.lstsq(A, b, cond=cond)[0]
 
     # ------------------------------------------------------------------
     # P2 != 0: the L2 penalty makes the normal equations well-conditioned