From 8bd37038c2b52f9d81524ceec266f043dbe8db4c Mon Sep 17 00:00:00 2001 From: lbittarello Date: Mon, 16 Mar 2026 09:37:41 +0000 Subject: [PATCH] Partially reverse 975 --- CHANGELOG.rst | 6 +++++- src/glum/_glm.py | 28 +++++++++++++++------------- tests/glm/test_glm_regressor.py | 18 ++++++++++++++++++ 3 files changed, 38 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 0de61e4f..eadf0ecd 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -7,9 +7,13 @@ Changelog ========= -3.2.1 - unreleased +3.2.1 - 2026-03-16 ------------------ +**Bug fix:** + +- Fixed an error when predicting at a specific ``alpha`` with categorical features. + **Other changes:** - Downgraded log messages in ``align_df_categories`` and ``add_missing_categories`` from INFO to DEBUG, and deduplicated them so they are emitted only once per column per fitted model. diff --git a/src/glum/_glm.py b/src/glum/_glm.py index 6cf03366..8c9e73e9 100644 --- a/src/glum/_glm.py +++ b/src/glum/_glm.py @@ -889,20 +889,22 @@ def _compute_linear_predictor( ) if alpha_index is None: - coef = coef_path - intercept = intercept_path + xb = X @ coef_path + intercept_path + if offset is not None: + xb += offset + elif np.isscalar(alpha_index): # `None` doesn't qualify + xb = X @ coef_path[alpha_index] + intercept_path[alpha_index] # type: ignore + if offset is not None: + xb += offset else: - scalar = np.isscalar(alpha_index) - alpha_index = np.atleast_1d(alpha_index) # type: ignore[assignment] - coef = coef_path[alpha_index] # type: ignore - intercept = intercept_path[alpha_index] # type: ignore - - xb = X @ coef.T + intercept - if offset is not None: - offset = np.asanyarray(offset) - xb += offset if xb.ndim == 1 else offset[:, np.newaxis] # type: ignore[call-overload] - - return xb.squeeze() if alpha_index is None or scalar else xb + _xb = [] + for idx in alpha_index: # type: ignore + _xb.append(X @ coef_path[idx] + intercept_path[idx]) # type: ignore + xb = np.stack(_xb, axis=1) + if offset is not None: + xb += np.asanyarray(offset)[:, np.newaxis] + + return xb def predict( self, diff --git a/tests/glm/test_glm_regressor.py b/tests/glm/test_glm_regressor.py index d37a539a..351d1077 100644 --- a/tests/glm/test_glm_regressor.py +++ b/tests/glm/test_glm_regressor.py @@ -1202,6 +1202,24 @@ def test_predict_list(regression_data, alpha, alpha_index): np.testing.assert_allclose(candidate, target + 1) +def test_predict_list_categorical(): + + letters = ["a", "b", "c", "d", "e", "f"] + rng = np.random.default_rng(42) + + df = pd.DataFrame({"x": rng.choice(letters, size=100)}) + + df["x"] = df["x"].astype("category") + df["y"] = df["x"].map({v: k + 1 for k, v in enumerate(letters)}) + + regressor = GeneralizedLinearRegressor(alpha=[0, 2], alpha_search=True) + regressor = regressor.fit(df[["x"]], df["y"]) + + candidate = regressor.predict(df[["x"]], alpha=0) + + np.testing.assert_allclose(candidate, df["y"]) + + def test_predict_error(regression_data): X, y = regression_data