From 4ab88d2c614112785ca58ef7b362757b422752cb Mon Sep 17 00:00:00 2001
From: Matthias Schmidtblaicher <matthias.schmidtblaicher@quantco.com>
Date: Tue, 10 Mar 2026 17:51:18 +0100
Subject: [PATCH 01/11] Add `train_deviance_path_` to
 `GeneralizedLinearRegressorCV`

Expose training-set deviance alongside validation deviance so users can
diagnose over-/under-fitting as regularization changes.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 CHANGELOG.rst            |  8 ++++++++
 src/glum/_glm_cv.py      | 36 +++++++++++++++++++++++++++++-------
 tests/glm/test_glm_cv.py | 26 ++++++++++++++++++++++++++
 3 files changed, 63 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 150d9b90..14d10fb4 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -7,6 +7,14 @@
 Changelog
 =========
 
+3.3.0 - unreleased
+------------------
+
+**New features:**
+
+- :class:`~glum.GeneralizedLinearRegressorCV` now exposes ``train_deviance_path_``, an array of shape ``(n_folds, n_l1_ratios, n_alphas)`` with the training-set deviance for each fold and alpha. Comparing it with ``deviance_path_`` (validation deviance) helps diagnose over-/under-fitting as regularization changes.
+
+
 3.2.0 - 2026-03-10
 ------------------
 
diff --git a/src/glum/_glm_cv.py b/src/glum/_glm_cv.py
index 745232eb..a7032f02 100644
--- a/src/glum/_glm_cv.py
+++ b/src/glum/_glm_cv.py
@@ -298,9 +298,12 @@ class GeneralizedLinearRegressorCV(GeneralizedLinearRegressorBase):
         Estimated intercepts at every point along the regularization path,
         per fold and l1_ratio.
 
-    deviance_path_: array, shape(n_folds, n_alphas)
+    deviance_path_: array, shape(n_folds, n_l1_ratios, n_alphas)
         Deviance for the test set on each fold, varying alpha.
 
+    train_deviance_path_: array, shape(n_folds, n_l1_ratios, n_alphas)
+        Deviance for the training set on each fold, varying alpha.
+
     robust : bool, optional (default = False)
         If true, then robust standard errors are computed by default.
 
@@ -668,12 +671,22 @@ def _fit_path(
             else:
                 offset_train, offset_test = None, None
 
+            x_train_raw = x_train
+
             def _get_deviance(coef):
                 mu = self._link_instance.inverse(
                     _safe_lin_pred(x_test, coef, offset_test)
                 )
                 return self._family_instance.deviance(y_test, mu, sample_weight=w_test)
 
+            def _get_train_deviance(coef):
+                mu = self._link_instance.inverse(
+                    _safe_lin_pred(x_train_raw, coef, offset_train)
+                )
+                return self._family_instance.deviance(
+                    y_train, mu, sample_weight=w_train
+                )
+
             if (
                 hasattr(self._family_instance, "_power")
                 and self._family_instance._power == 1.5
@@ -744,11 +757,12 @@ def _get_deviance(coef):
                     self.col_means_, self.col_stds_, coef[:, 0], coef[:, 1:]
                 )
                 assert isinstance(intercept_path_, np.ndarray)  # make mypy happy
-                deviance_path_ = [
-                    _get_deviance(_coef)
-                    for _coef in np.concatenate(
-                        [intercept_path_[:, np.newaxis], coef_path_], axis=1
-                    )
+                full_coef_path = np.concatenate(
+                    [intercept_path_[:, np.newaxis], coef_path_], axis=1
+                )
+                deviance_path_ = [_get_deviance(_coef) for _coef in full_coef_path]
+                train_deviance_path_ = [
+                    _get_train_deviance(_coef) for _coef in full_coef_path
                 ]
             else:
                 # set intercept to zero as the other linear models do
@@ -756,8 +770,11 @@ def _get_deviance(coef):
                     self.col_means_, self.col_stds_, np.zeros(coef.shape[0]), coef
                 )
                 deviance_path_ = [_get_deviance(_coef) for _coef in coef_path_]
+                train_deviance_path_ = [
+                    _get_train_deviance(_coef) for _coef in coef_path_
+                ]
 
-            return intercept_path_, coef_path_, deviance_path_
+            return intercept_path_, coef_path_, deviance_path_, train_deviance_path_
 
         jobs = (
             joblib.delayed(_fit_path)(
@@ -796,6 +813,11 @@ def _get_deviance(coef):
             (cv.get_n_splits(), len(l1_ratio), len(alphas[0])),
         )
 
+        self.train_deviance_path_ = np.reshape(
+            [elmt[3] for elmt in paths_data],
+            (cv.get_n_splits(), len(l1_ratio), len(alphas[0])),
+        )
+
         avg_deviance = self.deviance_path_.mean(axis=0)  # type: ignore
 
         best_l1, best_alpha = np.unravel_index(
diff --git a/tests/glm/test_glm_cv.py b/tests/glm/test_glm_cv.py
index 10a8ba6b..2458633d 100644
--- a/tests/glm/test_glm_cv.py
+++ b/tests/glm/test_glm_cv.py
@@ -185,6 +185,7 @@ def _assert_all_close(x, y):
     _assert_all_close(est_2.l1_ratio_, est_ref.l1_ratio_)
     _assert_all_close(est_2.coef_path_, est_ref.coef_path_)
     _assert_all_close(est_2.deviance_path_, est_ref.deviance_path_)
+    _assert_all_close(est_2.train_deviance_path_, est_ref.train_deviance_path_)
     _assert_all_close(est_2.intercept_, est_ref.intercept_)
     _assert_all_close(est_2.coef_, est_ref.coef_)
     _assert_all_close(
@@ -272,6 +273,31 @@ def test_cv_predict_with_alpha_index(l1_ratio):
     np.testing.assert_allclose(pred_alpha, pred_default)
 
 
+@pytest.mark.parametrize("fit_intercept", [False, True])
+def test_train_deviance_path(fit_intercept):
+    """train_deviance_path_ should have the correct shape and train deviance
+    should generally be lower than test deviance."""
+    np.random.seed(42)
+    n_samples, n_features = 200, 5
+    n_alphas = 5
+    X = np.random.randn(n_samples, n_features)
+    y = X @ np.array([1, 0.5, -0.5, 0, 0]) + np.random.randn(n_samples) * 0.1
+
+    model = GeneralizedLinearRegressorCV(
+        l1_ratio=0.5,
+        n_alphas=n_alphas,
+        min_alpha_ratio=1e-2,
+        fit_intercept=fit_intercept,
+    ).fit(X, y)
+
+    assert hasattr(model, "train_deviance_path_")
+    assert model.train_deviance_path_.shape == model.deviance_path_.shape
+
+    avg_train = model.train_deviance_path_.mean(axis=0)
+    avg_test = model.deviance_path_.mean(axis=0)
+    assert np.all(avg_train <= avg_test)
+
+
 @pytest.mark.parametrize("scale_factor", [1.0, 1000.0])
 @pytest.mark.parametrize("l1_ratio", [0.0, 0.5, 1.0])
 def test_match_with_base_class(l1_ratio, scale_factor):

From 8a5e0d4d52923986afb287aa59e322cd331fca80 Mon Sep 17 00:00:00 2001
From: Matthias Schmidtblaicher <matthias.schmidtblaicher@quantco.com>
Date: Tue, 10 Mar 2026 18:05:09 +0100
Subject: [PATCH 02/11] Relax train deviance assertion to avoid flaky test

Compare overall means instead of per-alpha to avoid failures from
unlucky CV splits or high-regularization alphas.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 tests/glm/test_glm_cv.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/glm/test_glm_cv.py b/tests/glm/test_glm_cv.py
index 2458633d..aefcc226 100644
--- a/tests/glm/test_glm_cv.py
+++ b/tests/glm/test_glm_cv.py
@@ -293,9 +293,8 @@ def test_train_deviance_path(fit_intercept):
     assert hasattr(model, "train_deviance_path_")
     assert model.train_deviance_path_.shape == model.deviance_path_.shape
 
-    avg_train = model.train_deviance_path_.mean(axis=0)
-    avg_test = model.deviance_path_.mean(axis=0)
-    assert np.all(avg_train <= avg_test)
+    # On average, train deviance should be lower than test deviance.
+    assert model.train_deviance_path_.mean() < model.deviance_path_.mean()
 
 
 @pytest.mark.parametrize("scale_factor", [1.0, 1000.0])

From 6a974a24fedcb6476859e44c72beff1d97e6951c Mon Sep 17 00:00:00 2001
From: Matthias Schmidtblaicher <matthias.schmidtblaicher@quantco.com>
Date: Tue, 10 Mar 2026 18:06:54 +0100
Subject: [PATCH 03/11] small changes to changelog

---
 CHANGELOG.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 14d10fb4..0455d1a1 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -10,9 +10,9 @@ Changelog
 3.3.0 - unreleased
 ------------------
 
-**New features:**
+**New feature:**
 
-- :class:`~glum.GeneralizedLinearRegressorCV` now exposes ``train_deviance_path_``, an array of shape ``(n_folds, n_l1_ratios, n_alphas)`` with the training-set deviance for each fold and alpha. Comparing it with ``deviance_path_`` (validation deviance) helps diagnose over-/under-fitting as regularization changes.
+- :class:`~glum.GeneralizedLinearRegressorCV` now exposes ``train_deviance_path_``, an array of shape ``(n_folds, n_l1_ratios, n_alphas)`` with the training-set deviance for each fold and alpha.
 
 
 3.2.0 - 2026-03-10

From 38a08cc13f999397c57851f2ab1e1eacadcd3efe Mon Sep 17 00:00:00 2001
From: Matthias Schmidtblaicher <matthias.schmidtblaicher@quantco.com>
Date: Thu, 12 Mar 2026 17:18:06 +0100
Subject: [PATCH 04/11] Compute train deviance with standardized x_train

---
 src/glum/_glm_cv.py | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/src/glum/_glm_cv.py b/src/glum/_glm_cv.py
index a7032f02..57a142cf 100644
--- a/src/glum/_glm_cv.py
+++ b/src/glum/_glm_cv.py
@@ -671,22 +671,12 @@ def _fit_path(
             else:
                 offset_train, offset_test = None, None
 
-            x_train_raw = x_train
-
             def _get_deviance(coef):
                 mu = self._link_instance.inverse(
                     _safe_lin_pred(x_test, coef, offset_test)
                 )
                 return self._family_instance.deviance(y_test, mu, sample_weight=w_test)
 
-            def _get_train_deviance(coef):
-                mu = self._link_instance.inverse(
-                    _safe_lin_pred(x_train_raw, coef, offset_train)
-                )
-                return self._family_instance.deviance(
-                    y_train, mu, sample_weight=w_train
-                )
-
             if (
                 hasattr(self._family_instance, "_power")
                 and self._family_instance._power == 1.5
@@ -717,6 +707,14 @@ def _get_train_deviance(coef):
                 P2_no_alpha,
             )
 
+            def _get_train_deviance(coef):
+                mu = self._link_instance.inverse(
+                    _safe_lin_pred(x_train, coef, offset_train)
+                )
+                return self._family_instance.deviance(
+                    y_train, mu, sample_weight=w_train
+                )
+
             coef = self._get_start_coef(
                 x_train,
                 y_train,
@@ -752,6 +750,11 @@ def _get_train_deviance(coef):
                 b_ineq=b_ineq,
             )
 
+            # Compute train deviance with standardized x_train + raw coef
+            # (before unstandardize). StandardizedMatrix ensures the linear
+            # predictor is identical to using unstandardized data.
+            train_deviance_path_ = [_get_train_deviance(_coef) for _coef in coef]
+
             if self.fit_intercept:
                 intercept_path_, coef_path_ = unstandardize(
                     self.col_means_, self.col_stds_, coef[:, 0], coef[:, 1:]
@@ -761,18 +764,12 @@ def _get_train_deviance(coef):
                     [intercept_path_[:, np.newaxis], coef_path_], axis=1
                 )
                 deviance_path_ = [_get_deviance(_coef) for _coef in full_coef_path]
-                train_deviance_path_ = [
-                    _get_train_deviance(_coef) for _coef in full_coef_path
-                ]
             else:
                 # set intercept to zero as the other linear models do
                 intercept_path_, coef_path_ = unstandardize(
                     self.col_means_, self.col_stds_, np.zeros(coef.shape[0]), coef
                 )
                 deviance_path_ = [_get_deviance(_coef) for _coef in coef_path_]
-                train_deviance_path_ = [
-                    _get_train_deviance(_coef) for _coef in coef_path_
-                ]
 
             return intercept_path_, coef_path_, deviance_path_, train_deviance_path_
 

From d3c2ccecd9215e34959fdfd69bccfdccb5b5deae Mon Sep 17 00:00:00 2001
From: Matthias Schmidtblaicher <matthias.schmidtblaicher@quantco.com>
Date: Thu, 12 Mar 2026 17:19:49 +0100
Subject: [PATCH 05/11] remove comment

---
 src/glum/_glm_cv.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/glum/_glm_cv.py b/src/glum/_glm_cv.py
index 57a142cf..62a56801 100644
--- a/src/glum/_glm_cv.py
+++ b/src/glum/_glm_cv.py
@@ -750,9 +750,6 @@ def _get_train_deviance(coef):
                 b_ineq=b_ineq,
             )
 
-            # Compute train deviance with standardized x_train + raw coef
-            # (before unstandardize). StandardizedMatrix ensures the linear
-            # predictor is identical to using unstandardized data.
             train_deviance_path_ = [_get_train_deviance(_coef) for _coef in coef]
 
             if self.fit_intercept:

From 5d41656c12b994ea212b84984193b9621d1b0c9e Mon Sep 17 00:00:00 2001
From: Matthias Schmidtblaicher <matthias.schmidtblaicher@quantco.com>
Date: Thu, 12 Mar 2026 17:36:16 +0100
Subject: [PATCH 06/11] test that train deviance is at correct scale; set to
 severe overfitting

---
 tests/glm/test_glm_cv.py | 39 +++++++++++++++++++++++++++++++--------
 1 file changed, 31 insertions(+), 8 deletions(-)

diff --git a/tests/glm/test_glm_cv.py b/tests/glm/test_glm_cv.py
index aefcc226..7aae0032 100644
--- a/tests/glm/test_glm_cv.py
+++ b/tests/glm/test_glm_cv.py
@@ -274,26 +274,49 @@ def test_cv_predict_with_alpha_index(l1_ratio):
 
 
 @pytest.mark.parametrize("fit_intercept", [False, True])
-def test_train_deviance_path(fit_intercept):
-    """train_deviance_path_ should have the correct shape and train deviance
-    should generally be lower than test deviance."""
+@pytest.mark.parametrize("scale", [1.0, 1e4])
+def test_train_deviance_path(fit_intercept, scale):
+    """train_deviance_path_ should match manually computed train deviance. 
+    Severe overfitting should be visible by comparing train and test deviance.
+    """
     np.random.seed(42)
-    n_samples, n_features = 200, 5
+    n_samples, n_features = 10, 5
     n_alphas = 5
-    X = np.random.randn(n_samples, n_features)
-    y = X @ np.array([1, 0.5, -0.5, 0, 0]) + np.random.randn(n_samples) * 0.1
+    X = np.random.randn(n_samples, n_features) * scale
+    y = np.random.randn(n_samples)
 
+    cv = skl.model_selection.KFold(n_splits=3)
     model = GeneralizedLinearRegressorCV(
         l1_ratio=0.5,
         n_alphas=n_alphas,
         min_alpha_ratio=1e-2,
         fit_intercept=fit_intercept,
+        cv=cv,
     ).fit(X, y)
 
-    assert hasattr(model, "train_deviance_path_")
     assert model.train_deviance_path_.shape == model.deviance_path_.shape
 
-    # On average, train deviance should be lower than test deviance.
+    # Manually recompute train deviance from coef_path_ / intercept_path_
+    family = model._family_instance
+    link = model._link_instance
+    for fold_idx, (train_idx, _) in enumerate(cv.split(X)):
+        X_train = X[train_idx]
+        y_train = y[train_idx]
+        w_train = np.ones(len(train_idx)) / len(train_idx)
+        for alpha_idx in range(n_alphas):
+            coef = model.coef_path_[fold_idx, 0, alpha_idx]
+            intercept = model.intercept_path_[fold_idx, 0, alpha_idx]
+            lin_pred = X_train @ coef + intercept
+            mu = link.inverse(lin_pred)
+            expected = family.deviance(y_train, mu, sample_weight=w_train)
+            np.testing.assert_allclose(
+                model.train_deviance_path_[fold_idx, 0, alpha_idx],
+                expected,
+                rtol=1e-5,
+            )
+
+    # In this severely overfitted example, average train deviance should be lower than 
+    # average test deviance.
     assert model.train_deviance_path_.mean() < model.deviance_path_.mean()
 
 

From 2759ef122f0ca1ab711f1007f74ef5ca0376265f Mon Sep 17 00:00:00 2001
From: Matthias Schmidtblaicher <matthias.schmidtblaicher@quantco.com>
Date: Fri, 13 Mar 2026 09:04:41 +0100
Subject: [PATCH 07/11] remove scaling test because it is too obvious

---
 tests/glm/test_glm_cv.py | 36 ++++--------------------------------
 1 file changed, 4 insertions(+), 32 deletions(-)

diff --git a/tests/glm/test_glm_cv.py b/tests/glm/test_glm_cv.py
index 7aae0032..f10c36a7 100644
--- a/tests/glm/test_glm_cv.py
+++ b/tests/glm/test_glm_cv.py
@@ -273,50 +273,22 @@ def test_cv_predict_with_alpha_index(l1_ratio):
     np.testing.assert_allclose(pred_alpha, pred_default)
 
 
-@pytest.mark.parametrize("fit_intercept", [False, True])
-@pytest.mark.parametrize("scale", [1.0, 1e4])
-def test_train_deviance_path(fit_intercept, scale):
-    """train_deviance_path_ should match manually computed train deviance. 
-    Severe overfitting should be visible by comparing train and test deviance.
-    """
+def test_train_deviance_path():
+    """train_deviance_path_ should have correct shape and train deviance
+    should be lower than test deviance in a severely overfitted example."""
     np.random.seed(42)
     n_samples, n_features = 10, 5
     n_alphas = 5
-    X = np.random.randn(n_samples, n_features) * scale
+    X = np.random.randn(n_samples, n_features) * 1e4
     y = np.random.randn(n_samples)
 
-    cv = skl.model_selection.KFold(n_splits=3)
     model = GeneralizedLinearRegressorCV(
         l1_ratio=0.5,
         n_alphas=n_alphas,
         min_alpha_ratio=1e-2,
-        fit_intercept=fit_intercept,
-        cv=cv,
     ).fit(X, y)
 
     assert model.train_deviance_path_.shape == model.deviance_path_.shape
-
-    # Manually recompute train deviance from coef_path_ / intercept_path_
-    family = model._family_instance
-    link = model._link_instance
-    for fold_idx, (train_idx, _) in enumerate(cv.split(X)):
-        X_train = X[train_idx]
-        y_train = y[train_idx]
-        w_train = np.ones(len(train_idx)) / len(train_idx)
-        for alpha_idx in range(n_alphas):
-            coef = model.coef_path_[fold_idx, 0, alpha_idx]
-            intercept = model.intercept_path_[fold_idx, 0, alpha_idx]
-            lin_pred = X_train @ coef + intercept
-            mu = link.inverse(lin_pred)
-            expected = family.deviance(y_train, mu, sample_weight=w_train)
-            np.testing.assert_allclose(
-                model.train_deviance_path_[fold_idx, 0, alpha_idx],
-                expected,
-                rtol=1e-5,
-            )
-
-    # In this severely overfitted example, average train deviance should be lower than 
-    # average test deviance.
     assert model.train_deviance_path_.mean() < model.deviance_path_.mean()
 
 

From 701bde79272639cda1a26366c41edcfaad8baa50 Mon Sep 17 00:00:00 2001
From: Matthias Schmidtblaicher <matthias.schmidtblaicher@quantco.com>
Date: Fri, 13 Mar 2026 09:20:36 +0100
Subject: [PATCH 08/11] simplify slightly

---
 tests/glm/test_glm_cv.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/glm/test_glm_cv.py b/tests/glm/test_glm_cv.py
index f10c36a7..0bc3282f 100644
--- a/tests/glm/test_glm_cv.py
+++ b/tests/glm/test_glm_cv.py
@@ -279,11 +279,10 @@ def test_train_deviance_path():
     np.random.seed(42)
     n_samples, n_features = 10, 5
     n_alphas = 5
-    X = np.random.randn(n_samples, n_features) * 1e4
+    X = np.random.randn(n_samples, n_features)
     y = np.random.randn(n_samples)
 
     model = GeneralizedLinearRegressorCV(
-        l1_ratio=0.5,
         n_alphas=n_alphas,
         min_alpha_ratio=1e-2,
     ).fit(X, y)

From a8a0f21e493bacd62912dcc599181e5bca9e0a97 Mon Sep 17 00:00:00 2001
From: Matthias Schmidtblaicher <matthias.schmidtblaicher@quantco.com>
Date: Fri, 13 Mar 2026 09:53:48 +0100
Subject: [PATCH 09/11] do save file

---
 CHANGELOG.rst | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index c4c2a899..c61bb112 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -7,21 +7,17 @@
 Changelog
 =========
 
-<<<<<<< HEAD
+
 3.3.0 - unreleased
 ------------------
 
 **New feature:**
 
 - :class:`~glum.GeneralizedLinearRegressorCV` now exposes ``train_deviance_path_``, an array of shape ``(n_folds, n_l1_ratios, n_alphas)`` with the training-set deviance for each fold and alpha.
-=======
-3.2.1 - unreleased
-------------------
 
 **Other changes:**
 
 - Downgraded log messages in ``align_df_categories`` and ``add_missing_categories`` from INFO to DEBUG, and deduplicated them so they are emitted only once per column per fitted model.
->>>>>>> main
 
 
 3.2.0 - 2026-03-10

From 056f54ac45798fb2383679b3f8d6ddef3f8d5225 Mon Sep 17 00:00:00 2001
From: Matthias Schmidtblaicher <matthias.schmidtblaicher@quantco.com>
Date: Tue, 17 Mar 2026 18:31:50 +0100
Subject: [PATCH 10/11] small comment why test deviance requires branching by
 fit_intercept

---
 src/glum/_glm_cv.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/glum/_glm_cv.py b/src/glum/_glm_cv.py
index 55324ee5..7ec5b880 100644
--- a/src/glum/_glm_cv.py
+++ b/src/glum/_glm_cv.py
@@ -753,6 +753,8 @@ def _get_train_deviance(coef):
 
             train_deviance_path_ = [_get_train_deviance(_coef) for _coef in coef]
 
+            # Unlike train deviance, test deviance is computed on unstandardized
+            # x and coefficient rescaling differs by self.fit_intercept.
             if self.fit_intercept:
                 intercept_path_, coef_path_ = unstandardize(
                     self.col_means_, self.col_stds_, coef[:, 0], coef[:, 1:]

From fcb677b50091b80a8fb0bc10829f0f93250fbbfc Mon Sep 17 00:00:00 2001
From: Matthias Schmidtblaicher <matthias.schmidtblaicher@quantco.com>
Date: Thu, 19 Mar 2026 18:42:09 +0100
Subject: [PATCH 11/11] small overhang from #990

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index ca28a444..8934e66a 100644
--- a/README.md
+++ b/README.md
@@ -23,7 +23,7 @@ We believe that for GLM development, broad support for distributions, regulariza
 * Built-in formula-based model specification using `formulaic`
 * Classical statistical inference for unregularized models
 * Box constraints, linear inequality constraints, sample weights, offsets
-* Support for multiple dataframe backends (pandas, polars, and more) via `narwhals`
+* Multiple dataframe backends (pandas, polars, and more) via `narwhals`
 
 Performance also matters, so we conducted extensive benchmarks against other modern libraries. Although performance depends on the specific problem, we find that when N >> K (there are more observations than predictors), `glum` is consistently much faster for a wide range of problems. This repo includes the benchmarking tools in the `glum_benchmarks` module. For details, [see here](glum_benchmarks/README.md).