Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 39 additions & 1 deletion docs/changelog.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,45 @@ fit3 = pf.feols("Y ~ X1 + X2 | f1", data = df)

- Adds the following statistics to the `Fepois` class: `_loglik`, `_loglik_null`, `_pseudo_r2` for regression without weights.
- Set the default parameters for the MAP algorithm to a tolerance of `1e-06` and maximum number of iterations of `10_000`.
- Adds support for weights for poisson regression via `feopis()`.
- Adds support for weights for Poisson regression via `fepois()`, including frequency weights (`fweights`).
- Fixes a bug in heteroskedastic-robust standard errors (HC1/HC2/HC3) when using frequency weights with both `feols()` and `fepois()`. Previously, the meat matrix was incorrectly scaled by `w²` instead of `w`. See [issue #367](https://github.com/py-econometrics/pyfixest/issues/367).

### Frequency Weights Example

Frequency weights allow you to work with aggregated data where each row represents multiple identical observations:

```{python}
import pyfixest as pf

data = pf.get_data(model="Fepois")

# aggregate data by (Y, X1) and count occurrences
data_agg = (
data[["Y", "X1"]]
.groupby(["Y", "X1"])
.size()
.reset_index()
.rename(columns={0: "count"})
)

# fit poisson without on long data
fit = pf.fepois(
"Y ~ X1",
data=data,
vcov="hetero"
)

# fit poisson on compressed data
fit_wls = pf.fepois(
"Y ~ X1",
data=data_agg,
weights="count",
weights_type="fweights",
vcov="hetero"
)

pf.etable([fit, fit_wls])
```

## PyFixest 0.40.1

Expand Down
10 changes: 10 additions & 0 deletions pyfixest/estimation/feols_.py
Original file line number Diff line number Diff line change
Expand Up @@ -824,12 +824,22 @@ def _vcov_hetero(self):
transformed_scores = self._scores
elif self._vcov_type_detail in ["HC2", "HC3"]:
leverage = np.sum(self._X * (self._X @ np.linalg.inv(self._tZX)), axis=1)
if self._weights_type == "fweights":
leverage = leverage / self._weights.flatten()
transformed_scores = (
self._scores / np.sqrt(1 - leverage)[:, None]
if self._vcov_type_detail == "HC2"
else self._scores / (1 - leverage)[:, None]
)

# For fweights, the scores are X_w * u_w = (sqrt(w)*x) * (sqrt(w)*u) = w*x*u.
# The meat matrix should be sum_i(w_i * u_i^2 * x_i @ x_i'), not
# sum_i(w_i^2 * u_i^2 * x_i @ x_i'). We correct by dividing by sqrt(w).
# For Poisson, use _user_weights (original weights before IRLS adjustment).
if self._weights_type == "fweights":
fweights = getattr(self, "_user_weights", self._weights)
transformed_scores = transformed_scores / np.sqrt(fweights)

Omega = transformed_scores.T @ transformed_scores

meat = (
Expand Down
6 changes: 4 additions & 2 deletions pyfixest/estimation/fepois_.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ def to_array(self):
if self._fe is not None:
self._fe = self._fe.to_numpy()
if self._fe.ndim == 1:
self._fe = self._fe.reshape((self._N, 1))
self._fe = self._fe.reshape((-1, 1))

def _compute_deviance(
self, Y: np.ndarray, mu: np.ndarray, weights: Optional[np.ndarray] = None
Expand Down Expand Up @@ -313,7 +313,7 @@ def get_fit(self) -> None:
else:
ZX_resid = ZX

Z_resid = ZX_resid[:, 0].reshape((self._N, 1)) # z_resid
Z_resid = ZX_resid[:, 0].reshape((-1, 1)) # z_resid
X_resid = ZX_resid[:, 1:] # x_resid

WX = np.sqrt(combined_weights) * X_resid
Expand Down Expand Up @@ -346,7 +346,9 @@ def get_fit(self) -> None:
# needed for the calculation of the vcov

# update for inference
# Save original user weights for fweights correction in _vcov_hetero
user_weights = self._weights.flatten()
self._user_weights = self._weights.copy()
self._irls_weights = combined_weights.flatten()

self._u_hat = (WZ - WX @ delta_new).flatten()
Expand Down
112 changes: 71 additions & 41 deletions tests/test_wls_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,55 +4,85 @@
import pyfixest as pf


# @pytest.mark.skip(reason="Bug for fweights and heteroskedastic errors.")
def test_fweights_ols():
"Test that the fweights are correctly implemented for OLS models."
# Fepois model for discrete Y
data = pf.get_data(model="Fepois")
data2_w = (
data[["Y", "X1"]]
.groupby(["Y", "X1"])
.size()
.reset_index()
.rename(columns={0: "count"})
)
data3_w = (
data[["Y", "X1", "f1"]]
.groupby(["Y", "X1", "f1"])
.size()
.reset_index()
.rename(columns={0: "count"})
)

fit1 = pf.feols("Y ~ X1", data=data, ssc=pf.ssc(k_adj=False, G_adj=False))
fit2 = pf.feols(
"Y ~ X1",
data=data2_w,
weights="count",
weights_type="fweights",
ssc=pf.ssc(k_adj=False, G_adj=False),
)
def _assert_fit_equal(fit1, fit2, vcov_types, rtol=1e-5):
"Assert that two regression fits have identical coefficients, vcov, and SEs."

assert fit1._N == fit2._N, "Number of observations is not the same."

if False:
np.testing.assert_allclose(fit1.tidy().values, fit2.tidy().values)
np.testing.assert_allclose(
fit1.coef().values, fit2.coef().values, rtol=rtol, err_msg="Coefficients differ"
)

np.testing.assert_allclose(fit1.vcov("HC1")._vcov, fit2.vcov("HC1")._vcov)
np.testing.assert_allclose(fit1.vcov("HC2")._vcov, fit2.vcov("HC2")._vcov)
np.testing.assert_allclose(fit1.vcov("HC3")._vcov, fit2.vcov("HC3")._vcov)
for vcov_type in vcov_types:
fit1_vcov = fit1.vcov(vcov_type)
fit2_vcov = fit2.vcov(vcov_type)

fit3 = pf.feols("Y ~ X1 | f1", data=data)
fit4 = pf.feols(
"Y ~ X1 | f1", data=data3_w, weights="count", weights_type="fweights"
np.testing.assert_allclose(
fit1_vcov._vcov,
fit2_vcov._vcov,
rtol=rtol,
err_msg=f"Vcov differs for {vcov_type}",
)
np.testing.assert_allclose(fit3.tidy().values, fit4.tidy().values)
np.testing.assert_allclose(
fit3.vcov({"CRV3": "f1"})._vcov, fit4.vcov({"CRV3": "f1"})._vcov
fit1_vcov.se().values,
fit2_vcov.se().values,
rtol=rtol,
err_msg=f"SEs differ for {vcov_type}",
)
np.testing.assert_allclose(fit1.vcov("HC1")._vcov, fit2.vcov("HC1")._vcov)
np.testing.assert_allclose(fit1.vcov("HC2")._vcov, fit2.vcov("HC2")._vcov)
np.testing.assert_allclose(fit1.vcov("HC3")._vcov, fit2.vcov("HC3")._vcov)


@pytest.mark.parametrize(
"model,fml,cols,vcov_types,rtol",
[
# OLS without fixed effects - test hetero vcov types
("ols", "Y ~ X1", ["Y", "X1"], ["iid", "HC1", "HC2", "HC3"], 1e-5),
# OLS without fixed effects - test CRV
("ols", "Y ~ X1", ["Y", "X1", "f1"], [{"CRV1": "f1"}, {"CRV3": "f1"}], 1e-5),
# OLS with fixed effects - HC2/HC3 not supported
(
"ols",
"Y ~ X1 | f1",
["Y", "X1", "f1"],
["iid", "HC1", {"CRV1": "f1"}, {"CRV3": "f1"}],
1e-5,
),
# Poisson without fixed effects
("poisson", "Y ~ X1", ["Y", "X1"], ["iid", "hetero"], 1e-4),
# Poisson without fixed effects - test CRV
("poisson", "Y ~ X1", ["Y", "X1", "f1"], [{"CRV1": "f1"}, {"CRV3": "f1"}], 1e-4),
# Poisson with fixed effects
("poisson", "Y ~ X1 | f1", ["Y", "X1", "f1"], ["iid", "hetero"], 1e-4),
# Poisson with fixed effects - test CRV
(
"poisson",
"Y ~ X1 | f1",
["Y", "X1", "f1"],
[{"CRV1": "f1"}, {"CRV3": "f1"}],
1e-4,
),
],
)
def test_fweights(model, fml, cols, vcov_types, rtol):
"""Test that fweights are correctly implemented for OLS and Poisson models."""
data = pf.get_data(model="Fepois")
data = data.dropna(subset=cols)

data_agg = (
data[cols].groupby(cols).size().reset_index().rename(columns={0: "count"})
)

fit_func = pf.feols if model == "ols" else pf.fepois

fit_raw = fit_func(fml, data=data, vcov="iid")
fit_agg = fit_func(
fml,
data=data_agg,
weights="count",
weights_type="fweights",
vcov="iid",
)

_assert_fit_equal(fit_raw, fit_agg, vcov_types, rtol=rtol)


@pytest.mark.skip(reason="Not implemented yet.")
Expand Down
Loading