diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index a389d25e..283b08ed 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -7,6 +7,15 @@
 Changelog
 =========
 
+
+3.3.0 - unreleased
+------------------
+
+**New feature:**
+
+- :class:`~glum.GeneralizedLinearRegressorCV` now exposes ``train_deviance_path_``, an array of shape ``(n_folds, n_l1_ratios, n_alphas)`` with the training-set deviance.
+
+
 3.2.3 - 2026-03-18
 ------------------
 
diff --git a/README.md b/README.md
index ca28a444..8934e66a 100644
--- a/README.md
+++ b/README.md
@@ -23,7 +23,7 @@ We believe that for GLM development, broad support for distributions, regulariza
 * Built-in formula-based model specification using `formulaic`
 * Classical statistical inference for unregularized models
 * Box constraints, linear inequality constraints, sample weights, offsets
-* Support for multiple dataframe backends (pandas, polars, and more) via `narwhals`
+* Multiple dataframe backends (pandas, polars, and more) via `narwhals`
 
 Performance also matters, so we conducted extensive benchmarks against other modern libraries. Although performance depends on the specific problem, we find that when N >> K (there are more observations than predictors), `glum` is consistently much faster for a wide range of problems. This repo includes the benchmarking tools in the `glum_benchmarks` module. For details, [see here](glum_benchmarks/README.md).
 
diff --git a/src/glum/_glm_cv.py b/src/glum/_glm_cv.py
index 41aa459d..7ec5b880 100644
--- a/src/glum/_glm_cv.py
+++ b/src/glum/_glm_cv.py
@@ -298,9 +298,12 @@ class GeneralizedLinearRegressorCV(GeneralizedLinearRegressorBase):
         Estimated intercepts at every point along the regularization path,
         per fold and l1_ratio.
 
-    deviance_path_: array, shape(n_folds, n_alphas)
+    deviance_path_: array, shape(n_folds, n_l1_ratios, n_alphas)
         Deviance for the test set on each fold, varying alpha.
 
+    train_deviance_path_: array, shape(n_folds, n_l1_ratios, n_alphas)
+        Deviance for the training set on each fold, varying alpha.
+
     robust : bool, optional (default = False)
         If true, then robust standard errors are computed by default.
 
@@ -705,6 +708,14 @@ def _get_deviance(coef):
                 P2_no_alpha,
             )
 
+            def _get_train_deviance(coef):
+                mu = self._link_instance.inverse(
+                    _safe_lin_pred(x_train, coef, offset_train)
+                )
+                return self._family_instance.deviance(
+                    y_train, mu, sample_weight=w_train
+                )
+
             coef = self._get_start_coef(
                 x_train,
                 y_train,
@@ -740,17 +751,19 @@ def _get_deviance(coef):
                 b_ineq=b_ineq,
             )
 
+            train_deviance_path_ = [_get_train_deviance(_coef) for _coef in coef]
+
+            # Unlike train deviance, test deviance is computed on unstandardized
+            # x and coefficient rescaling differs by self.fit_intercept.
             if self.fit_intercept:
                 intercept_path_, coef_path_ = unstandardize(
                     self.col_means_, self.col_stds_, coef[:, 0], coef[:, 1:]
                 )
                 assert isinstance(intercept_path_, np.ndarray)  # make mypy happy
-                deviance_path_ = [
-                    _get_deviance(_coef)
-                    for _coef in np.concatenate(
-                        [intercept_path_[:, np.newaxis], coef_path_], axis=1
-                    )
-                ]
+                full_coef_path = np.concatenate(
+                    [intercept_path_[:, np.newaxis], coef_path_], axis=1
+                )
+                deviance_path_ = [_get_deviance(_coef) for _coef in full_coef_path]
             else:
                 # set intercept to zero as the other linear models do
                 intercept_path_, coef_path_ = unstandardize(
@@ -758,7 +771,7 @@ def _get_deviance(coef):
                 )
                 deviance_path_ = [_get_deviance(_coef) for _coef in coef_path_]
 
-            return intercept_path_, coef_path_, deviance_path_
+            return intercept_path_, coef_path_, deviance_path_, train_deviance_path_
 
         jobs = (
             joblib.delayed(_fit_path)(
@@ -797,6 +810,11 @@ def _get_deviance(coef):
             (cv.get_n_splits(), len(l1_ratio), len(alphas[0])),
         )
 
+        self.train_deviance_path_ = np.reshape(
+            [elmt[3] for elmt in paths_data],
+            (cv.get_n_splits(), len(l1_ratio), len(alphas[0])),
+        )
+
         avg_deviance = self.deviance_path_.mean(axis=0)  # type: ignore
 
         best_l1, best_alpha = np.unravel_index(
diff --git a/tests/glm/test_glm_cv.py b/tests/glm/test_glm_cv.py
index 10a8ba6b..0bc3282f 100644
--- a/tests/glm/test_glm_cv.py
+++ b/tests/glm/test_glm_cv.py
@@ -185,6 +185,7 @@ def _assert_all_close(x, y):
     _assert_all_close(est_2.l1_ratio_, est_ref.l1_ratio_)
     _assert_all_close(est_2.coef_path_, est_ref.coef_path_)
     _assert_all_close(est_2.deviance_path_, est_ref.deviance_path_)
+    _assert_all_close(est_2.train_deviance_path_, est_ref.train_deviance_path_)
     _assert_all_close(est_2.intercept_, est_ref.intercept_)
     _assert_all_close(est_2.coef_, est_ref.coef_)
     _assert_all_close(
@@ -272,6 +273,24 @@ def test_cv_predict_with_alpha_index(l1_ratio):
     np.testing.assert_allclose(pred_alpha, pred_default)
 
 
+def test_train_deviance_path():
+    """train_deviance_path_ should have correct shape and train deviance
+    should be lower than test deviance in a severely overfitted example."""
+    np.random.seed(42)
+    n_samples, n_features = 10, 5
+    n_alphas = 5
+    X = np.random.randn(n_samples, n_features)
+    y = np.random.randn(n_samples)
+
+    model = GeneralizedLinearRegressorCV(
+        n_alphas=n_alphas,
+        min_alpha_ratio=1e-2,
+    ).fit(X, y)
+
+    assert model.train_deviance_path_.shape == model.deviance_path_.shape
+    assert model.train_deviance_path_.mean() < model.deviance_path_.mean()
+
+
 @pytest.mark.parametrize("scale_factor", [1.0, 1000.0])
 @pytest.mark.parametrize("l1_ratio", [0.0, 0.5, 1.0])
 def test_match_with_base_class(l1_ratio, scale_factor):