diff --git a/CHANGELOG.rst b/CHANGELOG.rst index a389d25e..283b08ed 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -7,6 +7,15 @@ Changelog ========= + +3.3.0 - unreleased +------------------ + +**New feature:** + +- :class:`~glum.GeneralizedLinearRegressorCV` now exposes ``train_deviance_path_``, an array of shape ``(n_folds, n_l1_ratios, n_alphas)`` with the training-set deviance. + + 3.2.3 - 2026-03-18 ------------------ diff --git a/README.md b/README.md index ca28a444..8934e66a 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ We believe that for GLM development, broad support for distributions, regulariza * Built-in formula-based model specification using `formulaic` * Classical statistical inference for unregularized models * Box constraints, linear inequality constraints, sample weights, offsets -* Support for multiple dataframe backends (pandas, polars, and more) via `narwhals` +* Multiple dataframe backends (pandas, polars, and more) via `narwhals` Performance also matters, so we conducted extensive benchmarks against other modern libraries. Although performance depends on the specific problem, we find that when N >> K (there are more observations than predictors), `glum` is consistently much faster for a wide range of problems. This repo includes the benchmarking tools in the `glum_benchmarks` module. For details, [see here](glum_benchmarks/README.md). diff --git a/src/glum/_glm_cv.py b/src/glum/_glm_cv.py index 41aa459d..7ec5b880 100644 --- a/src/glum/_glm_cv.py +++ b/src/glum/_glm_cv.py @@ -298,9 +298,12 @@ class GeneralizedLinearRegressorCV(GeneralizedLinearRegressorBase): Estimated intercepts at every point along the regularization path, per fold and l1_ratio. - deviance_path_: array, shape(n_folds, n_alphas) + deviance_path_: array, shape(n_folds, n_l1_ratios, n_alphas) Deviance for the test set on each fold, varying alpha. + train_deviance_path_: array, shape(n_folds, n_l1_ratios, n_alphas) + Deviance for the training set on each fold, varying alpha. + robust : bool, optional (default = False) If true, then robust standard errors are computed by default. @@ -705,6 +708,14 @@ def _get_deviance(coef): P2_no_alpha, ) + def _get_train_deviance(coef): + mu = self._link_instance.inverse( + _safe_lin_pred(x_train, coef, offset_train) + ) + return self._family_instance.deviance( + y_train, mu, sample_weight=w_train + ) + coef = self._get_start_coef( x_train, y_train, @@ -740,17 +751,19 @@ def _get_deviance(coef): b_ineq=b_ineq, ) + train_deviance_path_ = [_get_train_deviance(_coef) for _coef in coef] + + # Unlike train deviance, test deviance is computed on unstandardized + # x and coefficient rescaling differs by self.fit_intercept. if self.fit_intercept: intercept_path_, coef_path_ = unstandardize( self.col_means_, self.col_stds_, coef[:, 0], coef[:, 1:] ) assert isinstance(intercept_path_, np.ndarray) # make mypy happy - deviance_path_ = [ - _get_deviance(_coef) - for _coef in np.concatenate( - [intercept_path_[:, np.newaxis], coef_path_], axis=1 - ) - ] + full_coef_path = np.concatenate( + [intercept_path_[:, np.newaxis], coef_path_], axis=1 + ) + deviance_path_ = [_get_deviance(_coef) for _coef in full_coef_path] else: # set intercept to zero as the other linear models do intercept_path_, coef_path_ = unstandardize( @@ -758,7 +771,7 @@ def _get_deviance(coef): ) deviance_path_ = [_get_deviance(_coef) for _coef in coef_path_] - return intercept_path_, coef_path_, deviance_path_ + return intercept_path_, coef_path_, deviance_path_, train_deviance_path_ jobs = ( joblib.delayed(_fit_path)( @@ -797,6 +810,11 @@ def _get_deviance(coef): (cv.get_n_splits(), len(l1_ratio), len(alphas[0])), ) + self.train_deviance_path_ = np.reshape( + [elmt[3] for elmt in paths_data], + (cv.get_n_splits(), len(l1_ratio), len(alphas[0])), + ) + avg_deviance = self.deviance_path_.mean(axis=0) # type: ignore best_l1, best_alpha = np.unravel_index( diff --git a/tests/glm/test_glm_cv.py b/tests/glm/test_glm_cv.py index 10a8ba6b..0bc3282f 100644 --- a/tests/glm/test_glm_cv.py +++ b/tests/glm/test_glm_cv.py @@ -185,6 +185,7 @@ def _assert_all_close(x, y): _assert_all_close(est_2.l1_ratio_, est_ref.l1_ratio_) _assert_all_close(est_2.coef_path_, est_ref.coef_path_) _assert_all_close(est_2.deviance_path_, est_ref.deviance_path_) + _assert_all_close(est_2.train_deviance_path_, est_ref.train_deviance_path_) _assert_all_close(est_2.intercept_, est_ref.intercept_) _assert_all_close(est_2.coef_, est_ref.coef_) _assert_all_close( @@ -272,6 +273,24 @@ def test_cv_predict_with_alpha_index(l1_ratio): np.testing.assert_allclose(pred_alpha, pred_default) +def test_train_deviance_path(): + """train_deviance_path_ should have correct shape and train deviance + should be lower than test deviance in a severely overfitted example.""" + np.random.seed(42) + n_samples, n_features = 10, 5 + n_alphas = 5 + X = np.random.randn(n_samples, n_features) + y = np.random.randn(n_samples) + + model = GeneralizedLinearRegressorCV( + n_alphas=n_alphas, + min_alpha_ratio=1e-2, + ).fit(X, y) + + assert model.train_deviance_path_.shape == model.deviance_path_.shape + assert model.train_deviance_path_.mean() < model.deviance_path_.mean() + + @pytest.mark.parametrize("scale_factor", [1.0, 1000.0]) @pytest.mark.parametrize("l1_ratio", [0.0, 0.5, 1.0]) def test_match_with_base_class(l1_ratio, scale_factor):