From 84b58c491ab87f1bfe94d73484b1a679117f499c Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Sun, 21 Dec 2025 10:34:37 +0100 Subject: [PATCH 1/5] fix WLS hetero bug in #367 --- pyfixest/estimation/feols_.py | 6 ++ tests/test_wls_types.py | 121 +++++++++++++++++++++++----------- 2 files changed, 88 insertions(+), 39 deletions(-) diff --git a/pyfixest/estimation/feols_.py b/pyfixest/estimation/feols_.py index c172d879b..cd870ec0c 100644 --- a/pyfixest/estimation/feols_.py +++ b/pyfixest/estimation/feols_.py @@ -824,11 +824,17 @@ def _vcov_hetero(self): transformed_scores = self._scores elif self._vcov_type_detail in ["HC2", "HC3"]: leverage = np.sum(self._X * (self._X @ np.linalg.inv(self._tZX)), axis=1) + if self._weights_type == "fweights": + leverage = leverage / self._weights.flatten() transformed_scores = ( self._scores / np.sqrt(1 - leverage)[:, None] if self._vcov_type_detail == "HC2" else self._scores / (1 - leverage)[:, None] ) + + # for fweights, need to divide by sqrt(weights) + if self._weights_type == "fweights": + transformed_scores = transformed_scores / np.sqrt(self._weights) Omega = transformed_scores.T @ transformed_scores diff --git a/tests/test_wls_types.py b/tests/test_wls_types.py index 9887d18dc..f0b0f67c9 100644 --- a/tests/test_wls_types.py +++ b/tests/test_wls_types.py @@ -4,55 +4,98 @@ import pyfixest as pf -# @pytest.mark.skip(reason="Bug for fweights and heteroskedastic errors.") -def test_fweights_ols(): - "Test that the fweights are correctly implemented for OLS models." - # Fepois model for discrete Y - data = pf.get_data(model="Fepois") - data2_w = ( - data[["Y", "X1"]] - .groupby(["Y", "X1"]) - .size() - .reset_index() - .rename(columns={0: "count"}) +def _assert_fit_equal(fit1, fit2, vcov_types, rtol=1e-5): + """Assert that two fits have identical coefficients, vcov, and SEs.""" + assert fit1._N == fit2._N, "Number of observations is not the same." + + np.testing.assert_allclose( + fit1.coef().values, fit2.coef().values, rtol=rtol, err_msg="Coefficients differ" ) - data3_w = ( - data[["Y", "X1", "f1"]] - .groupby(["Y", "X1", "f1"]) - .size() - .reset_index() - .rename(columns={0: "count"}) + + for vcov_type in vcov_types: + fit1_vcov = fit1.vcov(vcov_type) + fit2_vcov = fit2.vcov(vcov_type) + + np.testing.assert_allclose( + fit1_vcov._vcov, + fit2_vcov._vcov, + rtol=rtol, + err_msg=f"Vcov differs for {vcov_type}", + ) + np.testing.assert_allclose( + fit1_vcov.se().values, + fit2_vcov.se().values, + rtol=rtol, + err_msg=f"SEs differ for {vcov_type}", + ) + + +@pytest.mark.parametrize( + "fml,cols,vcov_types", + [ + # Without fixed effects - test hetero vcov types + ("Y ~ X1", ["Y", "X1"], ["iid", "HC1", "HC2", "HC3"]), + # Without fixed effects - test CRV (need to include cluster var in aggregation) + ("Y ~ X1", ["Y", "X1", "f1"], [{"CRV1": "f1"}, {"CRV3": "f1"}]), + # With fixed effects - HC2/HC3 not supported + ("Y ~ X1 | f1", ["Y", "X1", "f1"], ["iid", "HC1", {"CRV1": "f1"}, {"CRV3": "f1"}]), + ], +) +def test_fweights_ols(fml, cols, vcov_types): + """Test that fweights are correctly implemented for OLS models.""" + data = pf.get_data(model="Fepois") + + # Drop rows with NaN in columns used for aggregation to ensure same N + data = data.dropna(subset=cols) + + data_agg = ( + data[cols].groupby(cols).size().reset_index().rename(columns={0: "count"}) ) - fit1 = pf.feols("Y ~ X1", data=data, ssc=pf.ssc(k_adj=False, G_adj=False)) - fit2 = pf.feols( - "Y ~ X1", - data=data2_w, + fit_raw = pf.feols(fml, data=data, vcov="iid") + fit_agg = pf.feols( + fml, + data=data_agg, weights="count", weights_type="fweights", - ssc=pf.ssc(k_adj=False, G_adj=False), + vcov="iid", ) - assert fit1._N == fit2._N, "Number of observations is not the same." + _assert_fit_equal(fit_raw, fit_agg, vcov_types) - if False: - np.testing.assert_allclose(fit1.tidy().values, fit2.tidy().values) - np.testing.assert_allclose(fit1.vcov("HC1")._vcov, fit2.vcov("HC1")._vcov) - np.testing.assert_allclose(fit1.vcov("HC2")._vcov, fit2.vcov("HC2")._vcov) - np.testing.assert_allclose(fit1.vcov("HC3")._vcov, fit2.vcov("HC3")._vcov) +@pytest.mark.skip(reason="Poisson fweights has a separate bug - see issue #367") +@pytest.mark.parametrize( + "fml,fe_col", + [ + ("Y ~ X1", None), + ("Y ~ X1 | f1", "f1"), + ], +) +def test_fweights_poisson(fml, fe_col): + """Test that fweights are correctly implemented for Poisson models.""" + data = pf.get_data(model="Fepois") - fit3 = pf.feols("Y ~ X1 | f1", data=data) - fit4 = pf.feols( - "Y ~ X1 | f1", data=data3_w, weights="count", weights_type="fweights" - ) - np.testing.assert_allclose(fit3.tidy().values, fit4.tidy().values) - np.testing.assert_allclose( - fit3.vcov({"CRV3": "f1"})._vcov, fit4.vcov({"CRV3": "f1"})._vcov - ) - np.testing.assert_allclose(fit1.vcov("HC1")._vcov, fit2.vcov("HC1")._vcov) - np.testing.assert_allclose(fit1.vcov("HC2")._vcov, fit2.vcov("HC2")._vcov) - np.testing.assert_allclose(fit1.vcov("HC3")._vcov, fit2.vcov("HC3")._vcov) + cols = ["Y", "X1"] + if fe_col: + cols.append(fe_col) + + data_agg = ( + data[cols].groupby(cols).size().reset_index().rename(columns={0: "count"}) + ) + + fit_raw = pf.fepois(fml, data=data, vcov="iid") + fit_agg = pf.fepois( + fml, + data=data_agg, + weights="count", + weights_type="fweights", + vcov="iid", + ) + + # Poisson only supports HC1 for hetero-robust SEs + vcov_types = ["iid", "hetero"] + _assert_fit_equal(fit_raw, fit_agg, vcov_types) @pytest.mark.skip(reason="Not implemented yet.") From 60c8bdfe24f4cbd2c882c89a752614da0e46a629 Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Sun, 21 Dec 2025 10:35:46 +0100 Subject: [PATCH 2/5] run linter --- pyfixest/estimation/feols_.py | 2 +- tests/test_wls_types.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pyfixest/estimation/feols_.py b/pyfixest/estimation/feols_.py index cd870ec0c..493bdb333 100644 --- a/pyfixest/estimation/feols_.py +++ b/pyfixest/estimation/feols_.py @@ -831,7 +831,7 @@ def _vcov_hetero(self): if self._vcov_type_detail == "HC2" else self._scores / (1 - leverage)[:, None] ) - + # for fweights, need to divide by sqrt(weights) if self._weights_type == "fweights": transformed_scores = transformed_scores / np.sqrt(self._weights) diff --git a/tests/test_wls_types.py b/tests/test_wls_types.py index f0b0f67c9..7453250b6 100644 --- a/tests/test_wls_types.py +++ b/tests/test_wls_types.py @@ -38,7 +38,11 @@ def _assert_fit_equal(fit1, fit2, vcov_types, rtol=1e-5): # Without fixed effects - test CRV (need to include cluster var in aggregation) ("Y ~ X1", ["Y", "X1", "f1"], [{"CRV1": "f1"}, {"CRV3": "f1"}]), # With fixed effects - HC2/HC3 not supported - ("Y ~ X1 | f1", ["Y", "X1", "f1"], ["iid", "HC1", {"CRV1": "f1"}, {"CRV3": "f1"}]), + ( + "Y ~ X1 | f1", + ["Y", "X1", "f1"], + ["iid", "HC1", {"CRV1": "f1"}, {"CRV3": "f1"}], + ), ], ) def test_fweights_ols(fml, cols, vcov_types): From ae10de97f0b9731891d9632b445426ff4aa9af96 Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Sun, 21 Dec 2025 10:36:35 +0100 Subject: [PATCH 3/5] minor adjustment --- tests/test_wls_types.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_wls_types.py b/tests/test_wls_types.py index 7453250b6..fb8c14f72 100644 --- a/tests/test_wls_types.py +++ b/tests/test_wls_types.py @@ -5,7 +5,8 @@ def _assert_fit_equal(fit1, fit2, vcov_types, rtol=1e-5): - """Assert that two fits have identical coefficients, vcov, and SEs.""" + "Assert that two regression fits have identical coefficients, vcov, and SEs." + assert fit1._N == fit2._N, "Number of observations is not the same." np.testing.assert_allclose( From 673c205283a92dd4af713f063c7d196b9da55693 Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Sun, 21 Dec 2025 10:46:00 +0100 Subject: [PATCH 4/5] WLS for poisson --- pyfixest/estimation/feols_.py | 8 +++- pyfixest/estimation/fepois_.py | 6 ++- tests/test_wls_types.py | 74 +++++++++++++--------------------- 3 files changed, 38 insertions(+), 50 deletions(-) diff --git a/pyfixest/estimation/feols_.py b/pyfixest/estimation/feols_.py index 493bdb333..e24abfc70 100644 --- a/pyfixest/estimation/feols_.py +++ b/pyfixest/estimation/feols_.py @@ -832,9 +832,13 @@ def _vcov_hetero(self): else self._scores / (1 - leverage)[:, None] ) - # for fweights, need to divide by sqrt(weights) + # For fweights, the scores are X_w * u_w = (sqrt(w)*x) * (sqrt(w)*u) = w*x*u. + # The meat matrix should be sum_i(w_i * u_i^2 * x_i @ x_i'), not + # sum_i(w_i^2 * u_i^2 * x_i @ x_i'). We correct by dividing by sqrt(w). + # For Poisson, use _user_weights (original weights before IRLS adjustment). if self._weights_type == "fweights": - transformed_scores = transformed_scores / np.sqrt(self._weights) + fweights = getattr(self, "_user_weights", self._weights) + transformed_scores = transformed_scores / np.sqrt(fweights) Omega = transformed_scores.T @ transformed_scores diff --git a/pyfixest/estimation/fepois_.py b/pyfixest/estimation/fepois_.py index 631b785a7..4ad254238 100644 --- a/pyfixest/estimation/fepois_.py +++ b/pyfixest/estimation/fepois_.py @@ -202,7 +202,7 @@ def to_array(self): if self._fe is not None: self._fe = self._fe.to_numpy() if self._fe.ndim == 1: - self._fe = self._fe.reshape((self._N, 1)) + self._fe = self._fe.reshape((-1, 1)) def _compute_deviance( self, Y: np.ndarray, mu: np.ndarray, weights: Optional[np.ndarray] = None @@ -313,7 +313,7 @@ def get_fit(self) -> None: else: ZX_resid = ZX - Z_resid = ZX_resid[:, 0].reshape((self._N, 1)) # z_resid + Z_resid = ZX_resid[:, 0].reshape((-1, 1)) # z_resid X_resid = ZX_resid[:, 1:] # x_resid WX = np.sqrt(combined_weights) * X_resid @@ -346,7 +346,9 @@ def get_fit(self) -> None: # needed for the calculation of the vcov # update for inference + # Save original user weights for fweights correction in _vcov_hetero user_weights = self._weights.flatten() + self._user_weights = self._weights.copy() self._irls_weights = combined_weights.flatten() self._u_hat = (WZ - WX @ delta_new).flatten() diff --git a/tests/test_wls_types.py b/tests/test_wls_types.py index fb8c14f72..7174025f5 100644 --- a/tests/test_wls_types.py +++ b/tests/test_wls_types.py @@ -32,65 +32,49 @@ def _assert_fit_equal(fit1, fit2, vcov_types, rtol=1e-5): @pytest.mark.parametrize( - "fml,cols,vcov_types", + "model,fml,cols,vcov_types,rtol", [ - # Without fixed effects - test hetero vcov types - ("Y ~ X1", ["Y", "X1"], ["iid", "HC1", "HC2", "HC3"]), - # Without fixed effects - test CRV (need to include cluster var in aggregation) - ("Y ~ X1", ["Y", "X1", "f1"], [{"CRV1": "f1"}, {"CRV3": "f1"}]), - # With fixed effects - HC2/HC3 not supported + # OLS without fixed effects - test hetero vcov types + ("ols", "Y ~ X1", ["Y", "X1"], ["iid", "HC1", "HC2", "HC3"], 1e-5), + # OLS without fixed effects - test CRV + ("ols", "Y ~ X1", ["Y", "X1", "f1"], [{"CRV1": "f1"}, {"CRV3": "f1"}], 1e-5), + # OLS with fixed effects - HC2/HC3 not supported ( + "ols", "Y ~ X1 | f1", ["Y", "X1", "f1"], ["iid", "HC1", {"CRV1": "f1"}, {"CRV3": "f1"}], + 1e-5, + ), + # Poisson without fixed effects + ("poisson", "Y ~ X1", ["Y", "X1"], ["iid", "hetero"], 1e-4), + # Poisson without fixed effects - test CRV + ("poisson", "Y ~ X1", ["Y", "X1", "f1"], [{"CRV1": "f1"}, {"CRV3": "f1"}], 1e-4), + # Poisson with fixed effects + ("poisson", "Y ~ X1 | f1", ["Y", "X1", "f1"], ["iid", "hetero"], 1e-4), + # Poisson with fixed effects - test CRV + ( + "poisson", + "Y ~ X1 | f1", + ["Y", "X1", "f1"], + [{"CRV1": "f1"}, {"CRV3": "f1"}], + 1e-4, ), ], ) -def test_fweights_ols(fml, cols, vcov_types): - """Test that fweights are correctly implemented for OLS models.""" +def test_fweights(model, fml, cols, vcov_types, rtol): + """Test that fweights are correctly implemented for OLS and Poisson models.""" data = pf.get_data(model="Fepois") - - # Drop rows with NaN in columns used for aggregation to ensure same N data = data.dropna(subset=cols) data_agg = ( data[cols].groupby(cols).size().reset_index().rename(columns={0: "count"}) ) - fit_raw = pf.feols(fml, data=data, vcov="iid") - fit_agg = pf.feols( - fml, - data=data_agg, - weights="count", - weights_type="fweights", - vcov="iid", - ) - - _assert_fit_equal(fit_raw, fit_agg, vcov_types) - - -@pytest.mark.skip(reason="Poisson fweights has a separate bug - see issue #367") -@pytest.mark.parametrize( - "fml,fe_col", - [ - ("Y ~ X1", None), - ("Y ~ X1 | f1", "f1"), - ], -) -def test_fweights_poisson(fml, fe_col): - """Test that fweights are correctly implemented for Poisson models.""" - data = pf.get_data(model="Fepois") - - cols = ["Y", "X1"] - if fe_col: - cols.append(fe_col) - - data_agg = ( - data[cols].groupby(cols).size().reset_index().rename(columns={0: "count"}) - ) + fit_func = pf.feols if model == "ols" else pf.fepois - fit_raw = pf.fepois(fml, data=data, vcov="iid") - fit_agg = pf.fepois( + fit_raw = fit_func(fml, data=data, vcov="iid") + fit_agg = fit_func( fml, data=data_agg, weights="count", @@ -98,9 +82,7 @@ def test_fweights_poisson(fml, fe_col): vcov="iid", ) - # Poisson only supports HC1 for hetero-robust SEs - vcov_types = ["iid", "hetero"] - _assert_fit_equal(fit_raw, fit_agg, vcov_types) + _assert_fit_equal(fit_raw, fit_agg, vcov_types, rtol=rtol) @pytest.mark.skip(reason="Not implemented yet.") From 0f5bc233b04a728693bccd1d301176eb17c52f5d Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Sun, 21 Dec 2025 10:51:01 +0100 Subject: [PATCH 5/5] update changelog --- docs/changelog.qmd | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/docs/changelog.qmd b/docs/changelog.qmd index 4375724ff..a37c98200 100644 --- a/docs/changelog.qmd +++ b/docs/changelog.qmd @@ -15,7 +15,45 @@ fit3 = pf.feols("Y ~ X1 + X2 | f1", data = df) - Adds the following statistics to the `Fepois` class: `_loglik`, `_loglik_null`, `_pseudo_r2` for regression without weights. - Set the default parameters for the MAP algorithm to a tolerance of `1e-06` and maximum number of iterations of `10_000`. -- Adds support for weights for poisson regression via `feopis()`. +- Adds support for weights for Poisson regression via `fepois()`, including frequency weights (`fweights`). +- Fixes a bug in heteroskedastic-robust standard errors (HC1/HC2/HC3) when using frequency weights with both `feols()` and `fepois()`. Previously, the meat matrix was incorrectly scaled by `w²` instead of `w`. See [issue #367](https://github.com/py-econometrics/pyfixest/issues/367). + +### Frequency Weights Example + +Frequency weights allow you to work with aggregated data where each row represents multiple identical observations: + +```{python} +import pyfixest as pf + +data = pf.get_data(model="Fepois") + +# aggregate data by (Y, X1) and count occurrences +data_agg = ( + data[["Y", "X1"]] + .groupby(["Y", "X1"]) + .size() + .reset_index() + .rename(columns={0: "count"}) +) + +# fit poisson without on long data +fit = pf.fepois( + "Y ~ X1", + data=data, + vcov="hetero" +) + +# fit poisson on compressed data +fit_wls = pf.fepois( + "Y ~ X1", + data=data_agg, + weights="count", + weights_type="fweights", + vcov="hetero" +) + +pf.etable([fit, fit_wls]) +``` ## PyFixest 0.40.1