py-econometrics · bradhackinen · Feb 4, 2025 · Feb 4, 2025 · Mar 24, 2026 · Mar 24, 2026
diff --git a/pyfixest/estimation/FixestMulti_.py b/pyfixest/estimation/FixestMulti_.py
@@ -165,6 +165,7 @@ def _prepare_estimation(
         quantile: float | None = None,
         quantile_tol: float = 1e-06,
         quantile_maxiter: int | None = None,
+        offset: str | None = None,
     ) -> None:
         """
         Prepare model for estimation.
@@ -189,6 +190,9 @@ def _prepare_estimation(
         weights : Union[None, np.ndarray], optional
             An array of weights.
             Either None or a 1D array of length N. Default is None.
+        offset : Union[None, str], optional
+            Default is None. Offset variable for Poisson regression. If None, no offset.
+            If a string, the name of the column in `data` that contains the offset.
         ssc : dict[str, str], optional
             A dictionary specifying the type of standard errors to use for inference.
             See `feols()` or `fepois()`.
@@ -220,6 +224,7 @@ def _prepare_estimation(
         self._drop_singletons = False
         self._is_multiple_estimation = False
         self._weights = weights
+        self._offset = offset
         self._has_weights = False
         if weights is not None:
             self._has_weights = True
@@ -380,6 +385,13 @@ def _estimate_all_models(
                             }
                         )
 
+                    if self._method == "fepois":
+                        model_kwargs.update(
+                            {
+                                "offset": self._offset,
+                            }
+                        )
+
                     if self._method in {
                         "feglm-logit",
                         "feglm-probit",

diff --git a/pyfixest/estimation/api/fepois.py b/pyfixest/estimation/api/fepois.py
@@ -24,6 +24,7 @@ def fepois(
     vcov_kwargs: dict[str, str | int] | None = None,
     weights: None | str = None,
     weights_type: WeightsTypeOptions = "aweights",
+    offset: str | None = None,
     ssc: dict[str, str | bool] | None = None,
     fixef_rm: FixedRmOptions = "singleton",
     fixef_tol: float = 1e-06,
@@ -88,6 +89,13 @@ def fepois(
         are useful for compressed count data where identical observations are aggregated.
         For details see this blog post: https://notstatschat.rbind.io/2020/08/04/weights-in-statistics/.
 
+    offset : str, optional
+        Default is None. The name of a column in `data` to use as an offset in the
+        Poisson regression. An offset is added to the linear predictor, which is
+        equivalent to constraining its coefficient to 1. This is useful for modeling
+        rates when the exposure variable differs across observations (e.g.
+        `offset = "log_population"`).
+
     ssc : str
         A ssc object specifying the small sample correction for inference.
 
@@ -257,6 +265,7 @@ def fepois(
         ssc=ssc,
         fixef_rm=fixef_rm,
         drop_intercept=drop_intercept,
+        offset=offset,
     )
     if fixest._is_iv:
         raise NotImplementedError(

diff --git a/pyfixest/estimation/deprecated/model_matrix_fixest_.py b/pyfixest/estimation/deprecated/model_matrix_fixest_.py
@@ -17,6 +17,7 @@ def model_matrix_fixest(
     data: pd.DataFrame,
     drop_singletons: bool = False,
     weights: str | None = None,
+    offset: str | None = None,
     drop_intercept=False,
     context: int | Mapping[str, Any] = 0,
 ) -> dict:
@@ -143,14 +144,15 @@ def model_matrix_fixest(
         **({"fml_first_stage": fml_first_stage} if _is_iv else {}),
         **({"fe": wrap_factorize(fval)} if fval is not None else {}),
         **({"weights": weights} if weights is not None else {}),
+        **({"offset": offset} if offset is not None else {}),
     }
 
     FML = Formula(**fml_kwargs)
     _context = capture_context(context)
     mm = FML.get_model_matrix(
         data, output="pandas", context={"factorize": factorize, **_context}
     )
-    endogvar = Z = weights_df = fe = None
+    endogvar = Z = weights_df = offset_df = fe = None
 
     model_spec = mm.model_spec
 
@@ -164,6 +166,8 @@ def model_matrix_fixest(
         fe = mm["fe"]
     if weights is not None:
         weights_df = mm["weights"]
+    if offset is not None:
+        offset_df = mm["offset"]
 
     # drop infinite values
     inf_idx_list = []
@@ -192,7 +196,7 @@ def model_matrix_fixest(
             weights_df=weights_df,
         )
 
-    for df in [Y, X, Z, endogvar, weights_df]:
+    for df in [Y, X, Z, endogvar, weights_df, offset_df]:
         if df is not None:
             cols_to_convert = df.select_dtypes(exclude=["int64", "float64"]).columns
             if cols_to_convert.size > 0:
@@ -245,6 +249,8 @@ def model_matrix_fixest(
                 endogvar=endogvar,
                 weights_df=weights_df,
             )
+            if offset_df is not None:
+                offset_df = offset_df[keep_idx]
 
     na_index = _get_na_index(data.shape[0], Y.index)
     na_index_str = ",".join(str(x) for x in na_index)
@@ -262,6 +268,7 @@ def model_matrix_fixest(
         "endogvar": endogvar,
         "Z": Z,
         "weights_df": weights_df,
+        "offset_df": offset_df,
         "na_index": na_index,
         "na_index_str": na_index_str,
         "icovars": _icovars,

diff --git a/pyfixest/estimation/models/fepois_.py b/pyfixest/estimation/models/fepois_.py
@@ -106,6 +106,7 @@ def __init__(
         sample_split_var: str | None = None,
         sample_split_value: str | int | None = None,
         separation_check: list[str] | None = None,
+        offset: str | None = None,
     ):
         super().__init__(
             FixestFormula=FixestFormula,
@@ -141,6 +142,7 @@ def __init__(
         self._method = "fepois"
         self.convergence = False
         self.separation_check = separation_check
+        self._offset_name = offset
 
         self._support_crv3_inference = True
         self._support_iid_inference = True
@@ -196,6 +198,16 @@ def prepare_model_matrix(self):
             self._k_fe = self._fe.nunique(axis=0) if self._has_fixef else None
             self._n_fe = np.sum(self._k_fe > 1) if self._has_fixef else 0
 
+        # Extract offset after all drops (singleton + separation) so indices are aligned
+        if self._offset_name is not None:
+            if self._offset_name not in self._data.columns:
+                raise ValueError(
+                    f"Offset variable '{self._offset_name}' not found in data."
+                )
+            self._offset = self._data[self._offset_name].to_numpy().reshape((-1, 1))
+        else:
+            self._offset = np.zeros((self._N, 1))
+
     def to_array(self):
         "Turn estimation DataFrames to np arrays."
         self._Y, self._X, self._Z = (
@@ -288,12 +300,12 @@ def get_fit(self) -> None:
                 _mean = np.mean(self._Y)
                 mu = (self._Y + _mean) / 2
                 eta = np.log(mu)
-                Z = eta + self._Y / mu - 1
+                Z = eta - self._offset + self._Y / mu - 1
                 reg_Z = Z.copy()
                 last = self._compute_deviance(self._Y, mu)
             else:
                 # update w and Z
-                Z = eta + self._Y / mu - 1  # eq (8)
+                Z = eta - self._offset + self._Y / mu - 1  # eq (8)
                 reg_Z = Z.copy()  # eq (9)
 
             # tighten HDFE tolerance - currently not possible with PyHDFE
@@ -349,7 +361,7 @@ def get_fit(self) -> None:
             resid = Z_resid - X_resid @ delta_new
 
             # more updating
-            eta = Z - resid
+            eta = Z - resid + self._offset
             mu = np.exp(eta)
 
             # same criterion as fixest

diff --git a/tests/test_vs_fixest.py b/tests/test_vs_fixest.py
@@ -520,8 +520,9 @@ def test_single_fit_feols_empty(
 @pytest.mark.parametrize("k_adj", [True])
 @pytest.mark.parametrize("G_adj", [True])
 @pytest.mark.parametrize("weights", [None, "weights"])
+@pytest.mark.parametrize("offset", [False, True])
 def test_single_fit_fepois(
-    data_fepois, dropna, inference, f3_type, fml, k_adj, G_adj, weights
+    data_fepois, dropna, inference, f3_type, fml, k_adj, G_adj, weights, offset
 ):
     global test_counter_fepois
     test_counter_fepois += 1
@@ -532,6 +533,11 @@ def test_single_fit_fepois(
     ssc_ = ssc(k_adj=k_adj, G_adj=G_adj)
 
     data_fepois = data_fepois.copy()
+    if offset:
+        data_fepois["offset_var"] = np.log(np.ones(data_fepois.shape[0]) * 2)
+        offset_var = "offset_var"
+    else:
+        offset_var = None
     if dropna:
         data_fepois.dropna(inplace=True)
     # long story, but categories need to be strings to be converted to R factors,
@@ -552,27 +558,22 @@ def test_single_fit_fepois(
         iwls_tol=1e-10,
         iwls_maxiter=100,
         weights=weights,
+        offset=offset_var if offset else None,
     )
 
+    r_kwargs = {
+        "vcov": r_inference,
+        "data": data_r,
+        "ssc": fixest.ssc(k_adj, "nonnested", False, G_adj, "min", "min"),
+        "glm_tol": 1e-10,
+        "glm_maxiter": 100,
+    }
     if weights is not None:
-        r_fixest = fixest.fepois(
-            ro.Formula(r_fml),
-            vcov=r_inference,
-            data=data_r,
-            ssc=fixest.ssc(k_adj, "nonnested", False, G_adj, "min", "min"),
-            glm_tol=1e-10,
-            glm_maxiter=100,
-            weights=ro.Formula("~" + weights),
-        )
-    else:
-        r_fixest = fixest.fepois(
-            ro.Formula(r_fml),
-            vcov=r_inference,
-            data=data_r,
-            ssc=fixest.ssc(k_adj, "nonnested", False, G_adj, "min", "min"),
-            glm_tol=1e-10,
-            glm_maxiter=100,
-        )
+        r_kwargs["weights"] = ro.Formula("~" + weights)
+    if offset:
+        r_kwargs["offset"] = ro.Formula("~" + offset_var)
+
+    r_fixest = fixest.fepois(ro.Formula(r_fml), **r_kwargs)
 
     py_coef = mod.coef().xs("X1")
     py_se = mod.se().xs("X1")
@@ -638,8 +639,9 @@ def test_single_fit_fepois(
         py_tstat, r_tstat, 1e-06 if weights is None else 1e-05, "py_tstat != r_tstat"
     )
     check_absolute_diff(py_confint, r_confint, 1e-06, "py_confint != r_confint")
-    check_absolute_diff(py_deviance, r_deviance, 1e-08, "py_deviance != r_deviance")
-    check_absolute_diff(py_loglik, r_loglik, 1e-08, "py_ll != r_loglik")
+    _dev_tol = 1e-07 if offset else 1e-08
+    check_absolute_diff(py_deviance, r_deviance, _dev_tol, "py_deviance != r_deviance")
+    check_absolute_diff(py_loglik, r_loglik, _dev_tol, "py_ll != r_loglik")
 
     # cant match fixest yet
     if weights is None: