Quantco · Jan Tilly (jtilly) · Mar 25, 2026 · Mar 25, 2026 · Mar 25, 2026 · Mar 25, 2026
@@ -15,6 +15,10 @@ Changelog
 
 - :class:`~glum.GeneralizedLinearRegressorCV` now exposes ``train_deviance_path_``, an array of shape ``(n_folds, n_l1_ratios, n_alphas)`` with the training-set deviance.
 
+**Bug fix:**
+
+- Fixed numerical instability in the ``closed-form`` solver for unregularized models (``alpha=0``). The solver now uses weighted least squares directly on the square-root–weighted data matrix instead of forming the normal equations, matching the approach of ``sklearn.linear_model.LinearRegression`` and ensuring that fitting with explicit sample weights gives the same result as fitting with repeated rows.
+
 
 3.2.3 - 2026-03-18
 ------------------

@@ -123,45 +123,70 @@ def _solve_least_squares_tikhonov(
     Learning*, 2nd ed., Springer, Section 3.4.1, Eq. 3.44.
     """
     y_minus_offset = y if offset is None else (y - offset)
-    weighted_y = sample_weight * y_minus_offset
     is_sparse_p2 = sparse.issparse(P2)
     if is_sparse_p2:
         has_l2_penalty = np.any(P2.data != 0)
-        # Detect "diagonal sparse" and handle without densifying.
-        is_diag_p2 = sparse.isspmatrix_dia(P2) or (
-            sparse.isspmatrix(P2)
-            and (P2.nnz <= P2.shape[0])
-            and np.all(P2.tocoo().row == P2.tocoo().col)
-        )
     else:
         has_l2_penalty = np.any(P2 != 0)
-        is_diag_p2 = getattr(P2, "ndim", 0) == 1
+
+    if not has_l2_penalty:
+        # Use direct WLS via the sqrt-weighted data matrix (avoids squaring the
+        # condition number of the normal equations). We set cond explicitly to
+        # n_cols * eps (matching sklearn's LinearRegression) rather than using
+        # the default (eps), which sits right at the boundary of the near-zero
+        # singular values. The default threshold is fragile: different LAPACK
+        # implementations for differently-shaped matrices (e.g. 15×31 weighted
+        # vs 27×31 repeated-rows) can compute near-zero singular values
+        # slightly differently, leading to inconsistent rank detection across
+        # platforms.  n_cols * eps comfortably zeros out all null-space
+        # directions regardless of platform.
+        sw_sqrt = np.sqrt(sample_weight)
+        X_arr = X.toarray()
+        A = X_arr * sw_sqrt[:, None]
+        b = sw_sqrt * (y_minus_offset)
+        if fit_intercept:
+            # Prepend a column of sqrt(w) for the unpenalised intercept.
+            A = np.column_stack([sw_sqrt, A])
+        cond = A.shape[1] * np.finfo(A.dtype).eps
+        return linalg.lstsq(A, b, cond=cond)[0]
+
+    # ------------------------------------------------------------------
+    # P2 != 0: the L2 penalty makes the normal equations well-conditioned
+    # so the Hessian approach remains efficient and stable.
+    # ------------------------------------------------------------------
+    weighted_y = sample_weight * y_minus_offset
+    is_diag_p2 = (not is_sparse_p2 and getattr(P2, "ndim", 0) == 1) or (
+        is_sparse_p2
+        and (
+            sparse.isspmatrix_dia(P2)
+            or (P2.nnz <= P2.shape[0] and np.all(P2.tocoo().row == P2.tocoo().col))
+        )
+    )
 
     if fit_intercept:
         hessian = _safe_sandwich_dot(X, sample_weight, intercept=True)
         rhs = np.empty(X.shape[1] + 1, dtype=X.dtype)
         rhs[0] = weighted_y.sum()
         rhs[1:] = X.transpose_matvec(weighted_y)
-        if has_l2_penalty and is_diag_p2:
+        if is_diag_p2:
             diag_idx = np.arange(1, hessian.shape[0])
             diag = P2 if not is_sparse_p2 else P2.diagonal()
             hessian[(diag_idx, diag_idx)] += diag
-        elif has_l2_penalty:
+        else:
             hessian[1:, 1:] += safe_toarray(P2)
     else:
         hessian = _safe_sandwich_dot(X, sample_weight)
         rhs = X.transpose_matvec(weighted_y)
-        if has_l2_penalty and is_diag_p2:
+        if is_diag_p2:
             diag = P2 if not is_sparse_p2 else P2.diagonal()
             hessian[np.diag_indices_from(hessian)] += diag
-        elif has_l2_penalty:
+        else:
             hessian += safe_toarray(P2)
 
     # With nonzero L2 penalty this system is often SPD, so we try the faster
     # Cholesky-oriented path (assume_a="pos"); fallback handles failures.
-    assume_a = "pos" if has_l2_penalty else "sym"
     try:
-        coef = linalg.solve(hessian, rhs, assume_a=assume_a)
+        coef = linalg.solve(hessian, rhs, assume_a="pos")
     except linalg.LinAlgError:
         # OLS can be singular / rank-deficient (e.g. collinearity). Use
         # minimum-norm least squares solution as a robust closed-form fallback.

@@ -25,6 +25,8 @@
 GLM_SOLVERS = ["irls-ls", "lbfgs", "irls-cd", "trust-constr", "closed-form"]
 
 estimators = [
+    (GeneralizedLinearRegressor, {}),
+    (GeneralizedLinearRegressorCV, {}),
     (GeneralizedLinearRegressor, {"alpha": 1.0}),
     (GeneralizedLinearRegressorCV, {"n_alphas": 2}),
 ]