From e9dc8f436c50c37b4a7b8adaa08257bded8ea900 Mon Sep 17 00:00:00 2001
From: Brad Hackinen <bradhackinen@users.noreply.github.com>
Date: Mon, 3 Feb 2025 16:03:03 -0800
Subject: [PATCH 1/5] Added offset option for fepois

---
 pyfixest/estimation/FixestMulti_.py                  | 12 ++++++++++++
 .../estimation/deprecated/model_matrix_fixest_.py    | 11 +++++++++--
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/pyfixest/estimation/FixestMulti_.py b/pyfixest/estimation/FixestMulti_.py
index 1522ceea8..933c8ffd6 100644
--- a/pyfixest/estimation/FixestMulti_.py
+++ b/pyfixest/estimation/FixestMulti_.py
@@ -165,6 +165,7 @@ def _prepare_estimation(
         quantile: float | None = None,
         quantile_tol: float = 1e-06,
         quantile_maxiter: int | None = None,
+        offset: Optional[Union[None, str]] = None,
     ) -> None:
         """
         Prepare model for estimation.
@@ -189,6 +190,9 @@ def _prepare_estimation(
         weights : Union[None, np.ndarray], optional
             An array of weights.
             Either None or a 1D array of length N. Default is None.
+        offset : Union[None, str], optional
+            Default is None. Offset variable for Poisson regression. If None, no offset.
+            If a string, the name of the column in `data` that contains the offset.
         ssc : dict[str, str], optional
             A dictionary specifying the type of standard errors to use for inference.
             See `feols()` or `fepois()`.
@@ -220,6 +224,7 @@ def _prepare_estimation(
         self._drop_singletons = False
         self._is_multiple_estimation = False
         self._weights = weights
+        self._offset = offset
         self._has_weights = False
         if weights is not None:
             self._has_weights = True
@@ -380,6 +385,13 @@ def _estimate_all_models(
                             }
                         )
 
+                    if self._method == "fepois":
+                        model_kwargs.update(
+                            {
+                                "offset": self._offset,
+                            }
+                        )
+
                     if self._method in {
                         "feglm-logit",
                         "feglm-probit",
diff --git a/pyfixest/estimation/deprecated/model_matrix_fixest_.py b/pyfixest/estimation/deprecated/model_matrix_fixest_.py
index aef0a387b..1c58f936b 100644
--- a/pyfixest/estimation/deprecated/model_matrix_fixest_.py
+++ b/pyfixest/estimation/deprecated/model_matrix_fixest_.py
@@ -17,6 +17,7 @@ def model_matrix_fixest(
     data: pd.DataFrame,
     drop_singletons: bool = False,
     weights: str | None = None,
+    offset: str | None = None,
     drop_intercept=False,
     context: int | Mapping[str, Any] = 0,
 ) -> dict:
@@ -143,6 +144,7 @@ def model_matrix_fixest(
         **({"fml_first_stage": fml_first_stage} if _is_iv else {}),
         **({"fe": wrap_factorize(fval)} if fval is not None else {}),
         **({"weights": weights} if weights is not None else {}),
+        **({"offset": offset} if offset is not None else {}),
     }
 
     FML = Formula(**fml_kwargs)
@@ -150,7 +152,7 @@ def model_matrix_fixest(
     mm = FML.get_model_matrix(
         data, output="pandas", context={"factorize": factorize, **_context}
     )
-    endogvar = Z = weights_df = fe = None
+    endogvar = Z = weights_df = offset_df = fe = None
 
     model_spec = mm.model_spec
 
@@ -164,6 +166,8 @@ def model_matrix_fixest(
         fe = mm["fe"]
     if weights is not None:
         weights_df = mm["weights"]
+    if offset is not None:
+        offset_df = mm["offset"]
 
     # drop infinite values
     inf_idx_list = []
@@ -192,7 +196,7 @@ def model_matrix_fixest(
             weights_df=weights_df,
         )
 
-    for df in [Y, X, Z, endogvar, weights_df]:
+    for df in [Y, X, Z, endogvar, weights_df, offset_df]:
         if df is not None:
             cols_to_convert = df.select_dtypes(exclude=["int64", "float64"]).columns
             if cols_to_convert.size > 0:
@@ -245,6 +249,8 @@ def model_matrix_fixest(
                 endogvar=endogvar,
                 weights_df=weights_df,
             )
+            if offset is not None:
+                offset_df = offset_df[keep_idx]
 
     na_index = _get_na_index(data.shape[0], Y.index)
     na_index_str = ",".join(str(x) for x in na_index)
@@ -262,6 +268,7 @@ def model_matrix_fixest(
         "endogvar": endogvar,
         "Z": Z,
         "weights_df": weights_df,
+        "offset_df": offset_df,
         "na_index": na_index,
         "na_index_str": na_index_str,
         "icovars": _icovars,

From 91020adb6a720c72f9bc703006726bfacceaab88 Mon Sep 17 00:00:00 2001
From: Alexander Fischer <alexander-fischer1801@t-online.de>
Date: Tue, 4 Feb 2025 21:36:49 +0100
Subject: [PATCH 2/5] add tests

---
 tests/test_vs_fixest.py | 41 ++++++++++++++++++++++-------------------
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/tests/test_vs_fixest.py b/tests/test_vs_fixest.py
index 57eb79f1f..3f4522b60 100644
--- a/tests/test_vs_fixest.py
+++ b/tests/test_vs_fixest.py
@@ -520,8 +520,9 @@ def test_single_fit_feols_empty(
 @pytest.mark.parametrize("k_adj", [True])
 @pytest.mark.parametrize("G_adj", [True])
 @pytest.mark.parametrize("weights", [None, "weights"])
+@pytest.mark.parametrize("offset", [False, True])
 def test_single_fit_fepois(
-    data_fepois, dropna, inference, f3_type, fml, k_adj, G_adj, weights
+    data_fepois, dropna, inference, f3_type, fml, k_adj, G_adj, weights, offset
 ):
     global test_counter_fepois
     test_counter_fepois += 1
@@ -531,6 +532,13 @@ def test_single_fit_fepois(
 
     ssc_ = ssc(k_adj=k_adj, G_adj=G_adj)
 
+    data = data_fepois
+    if offset:
+        data["offset_var"] = np.ones(data.shape[0]) * 5
+        offset_var = "offset_var"
+    else:
+        offset_var = None
+
     data_fepois = data_fepois.copy()
     if dropna:
         data_fepois.dropna(inplace=True)
@@ -552,27 +560,22 @@ def test_single_fit_fepois(
         iwls_tol=1e-10,
         iwls_maxiter=100,
         weights=weights,
+        offset=offset_var if offset else None,
     )
 
+    r_kwargs = {
+        "vcov": r_inference,
+        "data": data_r,
+        "ssc": fixest.ssc(k_adj, "nonnested", False, G_adj, "min", "min"),
+        "glm_tol": 1e-10,
+        "glm_maxiter": 100,
+    }
     if weights is not None:
-        r_fixest = fixest.fepois(
-            ro.Formula(r_fml),
-            vcov=r_inference,
-            data=data_r,
-            ssc=fixest.ssc(k_adj, "nonnested", False, G_adj, "min", "min"),
-            glm_tol=1e-10,
-            glm_maxiter=100,
-            weights=ro.Formula("~" + weights),
-        )
-    else:
-        r_fixest = fixest.fepois(
-            ro.Formula(r_fml),
-            vcov=r_inference,
-            data=data_r,
-            ssc=fixest.ssc(k_adj, "nonnested", False, G_adj, "min", "min"),
-            glm_tol=1e-10,
-            glm_maxiter=100,
-        )
+        r_kwargs["weights"] = ro.Formula("~" + weights)
+    if offset:
+        r_kwargs["offset"] = ro.Formula("~" + offset_var)
+
+    r_fixest = fixest.fepois(ro.Formula(r_fml), **r_kwargs)
 
     py_coef = mod.coef().xs("X1")
     py_se = mod.se().xs("X1")

From 6981c8210aa21a05cf048fa381bc2b5e22312eda Mon Sep 17 00:00:00 2001
From: Alexander Fischer <alexander-fischer1801@t-online.de>
Date: Tue, 24 Mar 2026 23:15:20 +0100
Subject: [PATCH 3/5] bring back offsets

---
 pyfixest/estimation/FixestMulti_.py           |  2 +-
 pyfixest/estimation/api/fepois.py             |  9 ++++++++
 .../deprecated/model_matrix_fixest_.py        |  2 +-
 pyfixest/estimation/models/fepois_.py         | 23 ++++++++++++++++---
 4 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/pyfixest/estimation/FixestMulti_.py b/pyfixest/estimation/FixestMulti_.py
index 933c8ffd6..55459a4ed 100644
--- a/pyfixest/estimation/FixestMulti_.py
+++ b/pyfixest/estimation/FixestMulti_.py
@@ -165,7 +165,7 @@ def _prepare_estimation(
         quantile: float | None = None,
         quantile_tol: float = 1e-06,
         quantile_maxiter: int | None = None,
-        offset: Optional[Union[None, str]] = None,
+        offset: str | None = None,
     ) -> None:
         """
         Prepare model for estimation.
diff --git a/pyfixest/estimation/api/fepois.py b/pyfixest/estimation/api/fepois.py
index f005376d5..f78040f3f 100644
--- a/pyfixest/estimation/api/fepois.py
+++ b/pyfixest/estimation/api/fepois.py
@@ -24,6 +24,7 @@ def fepois(
     vcov_kwargs: dict[str, str | int] | None = None,
     weights: None | str = None,
     weights_type: WeightsTypeOptions = "aweights",
+    offset: str | None = None,
     ssc: dict[str, str | bool] | None = None,
     fixef_rm: FixedRmOptions = "singleton",
     fixef_tol: float = 1e-06,
@@ -88,6 +89,13 @@ def fepois(
         are useful for compressed count data where identical observations are aggregated.
         For details see this blog post: https://notstatschat.rbind.io/2020/08/04/weights-in-statistics/.
 
+    offset : str, optional
+        Default is None. The name of a column in `data` to use as an offset in the
+        Poisson regression. An offset is added to the linear predictor, which is
+        equivalent to constraining its coefficient to 1. This is useful for modeling
+        rates when the exposure variable differs across observations (e.g.
+        `offset = "log_population"`).
+
     ssc : str
         A ssc object specifying the small sample correction for inference.
 
@@ -257,6 +265,7 @@ def fepois(
         ssc=ssc,
         fixef_rm=fixef_rm,
         drop_intercept=drop_intercept,
+        offset=offset,
     )
     if fixest._is_iv:
         raise NotImplementedError(
diff --git a/pyfixest/estimation/deprecated/model_matrix_fixest_.py b/pyfixest/estimation/deprecated/model_matrix_fixest_.py
index 1c58f936b..cd46c298a 100644
--- a/pyfixest/estimation/deprecated/model_matrix_fixest_.py
+++ b/pyfixest/estimation/deprecated/model_matrix_fixest_.py
@@ -249,7 +249,7 @@ def model_matrix_fixest(
                 endogvar=endogvar,
                 weights_df=weights_df,
             )
-            if offset is not None:
+            if offset_df is not None:
                 offset_df = offset_df[keep_idx]
 
     na_index = _get_na_index(data.shape[0], Y.index)
diff --git a/pyfixest/estimation/models/fepois_.py b/pyfixest/estimation/models/fepois_.py
index be6f1545d..de5ee9361 100644
--- a/pyfixest/estimation/models/fepois_.py
+++ b/pyfixest/estimation/models/fepois_.py
@@ -106,6 +106,7 @@ def __init__(
         sample_split_var: str | None = None,
         sample_split_value: str | int | None = None,
         separation_check: list[str] | None = None,
+        offset: str | None = None,
     ):
         super().__init__(
             FixestFormula=FixestFormula,
@@ -141,6 +142,7 @@ def __init__(
         self._method = "fepois"
         self.convergence = False
         self.separation_check = separation_check
+        self._offset_name = offset
 
         self._support_crv3_inference = True
         self._support_iid_inference = True
@@ -155,6 +157,20 @@ def prepare_model_matrix(self):
         "Prepare model inputs for estimation."
         super().prepare_model_matrix()
 
+        # Extract offset from data or default to zeros
+        if self._offset_name is not None:
+            if self._offset_name not in self._data.columns:
+                raise ValueError(
+                    f"Offset variable '{self._offset_name}' not found in data."
+                )
+            self._offset = (
+                self._data.loc[self._Y.index, self._offset_name]
+                .to_numpy()
+                .reshape((-1, 1))
+            )
+        else:
+            self._offset = np.zeros((self._N, 1))
+
         # check that self._Y is a pandas Series or DataFrame
         self._Y = _check_series_or_dataframe(self._Y)
 
@@ -185,6 +201,7 @@ def prepare_model_matrix(self):
             self._data.drop(na_separation, axis=0, inplace=True)
             if self._weights_df is not None:
                 self._weights_df.drop(na_separation, axis=0, inplace=True)
+            self._offset = np.delete(self._offset, na_separation, axis=0)
             self._N = self._Y.shape[0]
             self._N_rows = self._N
             # Re-set weights after dropping rows (handles both weighted and unweighted)
@@ -288,12 +305,12 @@ def get_fit(self) -> None:
                 _mean = np.mean(self._Y)
                 mu = (self._Y + _mean) / 2
                 eta = np.log(mu)
-                Z = eta + self._Y / mu - 1
+                Z = eta - self._offset + self._Y / mu - 1
                 reg_Z = Z.copy()
                 last = self._compute_deviance(self._Y, mu)
             else:
                 # update w and Z
-                Z = eta + self._Y / mu - 1  # eq (8)
+                Z = eta - self._offset + self._Y / mu - 1  # eq (8)
                 reg_Z = Z.copy()  # eq (9)
 
             # tighten HDFE tolerance - currently not possible with PyHDFE
@@ -349,7 +366,7 @@ def get_fit(self) -> None:
             resid = Z_resid - X_resid @ delta_new
 
             # more updating
-            eta = Z - resid
+            eta = Z - resid + self._offset
             mu = np.exp(eta)
 
             # same criterion as fixest

From 3b0953d8b482ae01408c6f695e1072f3fcf2a603 Mon Sep 17 00:00:00 2001
From: Alexander Fischer <alexander-fischer1801@t-online.de>
Date: Tue, 24 Mar 2026 23:35:36 +0100
Subject: [PATCH 4/5] adjust tests

---
 tests/test_vs_fixest.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/tests/test_vs_fixest.py b/tests/test_vs_fixest.py
index 3f4522b60..791ba57ae 100644
--- a/tests/test_vs_fixest.py
+++ b/tests/test_vs_fixest.py
@@ -532,14 +532,12 @@ def test_single_fit_fepois(
 
     ssc_ = ssc(k_adj=k_adj, G_adj=G_adj)
 
-    data = data_fepois
+    data_fepois = data_fepois.copy()
     if offset:
-        data["offset_var"] = np.ones(data.shape[0]) * 5
+        data_fepois["offset_var"] = np.log(np.ones(data_fepois.shape[0]) * 2)
         offset_var = "offset_var"
     else:
         offset_var = None
-
-    data_fepois = data_fepois.copy()
     if dropna:
         data_fepois.dropna(inplace=True)
     # long story, but categories need to be strings to be converted to R factors,
@@ -641,8 +639,9 @@ def test_single_fit_fepois(
         py_tstat, r_tstat, 1e-06 if weights is None else 1e-05, "py_tstat != r_tstat"
     )
     check_absolute_diff(py_confint, r_confint, 1e-06, "py_confint != r_confint")
-    check_absolute_diff(py_deviance, r_deviance, 1e-08, "py_deviance != r_deviance")
-    check_absolute_diff(py_loglik, r_loglik, 1e-08, "py_ll != r_loglik")
+    _dev_tol = 1e-07 if offset else 1e-08
+    check_absolute_diff(py_deviance, r_deviance, _dev_tol, "py_deviance != r_deviance")
+    check_absolute_diff(py_loglik, r_loglik, _dev_tol, "py_ll != r_loglik")
 
     # cant match fixest yet
     if weights is None:

From 85a8cef99011caea01eff3007b839d8e1058019e Mon Sep 17 00:00:00 2001
From: Alexander Fischer <alexander-fischer1801@t-online.de>
Date: Wed, 25 Mar 2026 00:01:12 +0100
Subject: [PATCH 5/5] move offset creation after separation check to fix test
 error

---
 pyfixest/estimation/models/fepois_.py | 25 ++++++++++---------------
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/pyfixest/estimation/models/fepois_.py b/pyfixest/estimation/models/fepois_.py
index de5ee9361..7c9ca3cc4 100644
--- a/pyfixest/estimation/models/fepois_.py
+++ b/pyfixest/estimation/models/fepois_.py
@@ -157,20 +157,6 @@ def prepare_model_matrix(self):
         "Prepare model inputs for estimation."
         super().prepare_model_matrix()
 
-        # Extract offset from data or default to zeros
-        if self._offset_name is not None:
-            if self._offset_name not in self._data.columns:
-                raise ValueError(
-                    f"Offset variable '{self._offset_name}' not found in data."
-                )
-            self._offset = (
-                self._data.loc[self._Y.index, self._offset_name]
-                .to_numpy()
-                .reshape((-1, 1))
-            )
-        else:
-            self._offset = np.zeros((self._N, 1))
-
         # check that self._Y is a pandas Series or DataFrame
         self._Y = _check_series_or_dataframe(self._Y)
 
@@ -201,7 +187,6 @@ def prepare_model_matrix(self):
             self._data.drop(na_separation, axis=0, inplace=True)
             if self._weights_df is not None:
                 self._weights_df.drop(na_separation, axis=0, inplace=True)
-            self._offset = np.delete(self._offset, na_separation, axis=0)
             self._N = self._Y.shape[0]
             self._N_rows = self._N
             # Re-set weights after dropping rows (handles both weighted and unweighted)
@@ -213,6 +198,16 @@ def prepare_model_matrix(self):
             self._k_fe = self._fe.nunique(axis=0) if self._has_fixef else None
             self._n_fe = np.sum(self._k_fe > 1) if self._has_fixef else 0
 
+        # Extract offset after all drops (singleton + separation) so indices are aligned
+        if self._offset_name is not None:
+            if self._offset_name not in self._data.columns:
+                raise ValueError(
+                    f"Offset variable '{self._offset_name}' not found in data."
+                )
+            self._offset = self._data[self._offset_name].to_numpy().reshape((-1, 1))
+        else:
+            self._offset = np.zeros((self._N, 1))
+
     def to_array(self):
         "Turn estimation DataFrames to np arrays."
         self._Y, self._X, self._Z = (