py-econometrics · leostimpfle · Mar 6, 2026 · Mar 6, 2026 · Mar 6, 2026 · Mar 6, 2026
diff --git a/docs/_quarto.yml b/docs/_quarto.yml
@@ -94,17 +94,17 @@ website:
           file: explanation/compare-fixest-pyfixest.qmd
       - text: "Textbook Replications"
         menu:
-        - text: "All Textbook Replications"
-          file: textbook-replications/index.qmd
-        - text: "---"
-        - text: "Causal Inference for the Brave and True"
-          file: textbook-replications/brave_true.qmd
-        - text: "Causal Inference: The Mixtape"
-          file: textbook-replications/mixtape.qmd
-        - text: "The Effect"
-          file: textbook-replications/replicating-the-effect.qmd
-        - text: "Other Resources"
-          file: textbook-replications/resources.qmd
+          - text: "All Textbook Replications"
+            file: textbook-replications/index.qmd
+          - text: "---"
+          - text: "Causal Inference for the Brave and True"
+            file: textbook-replications/brave_true.qmd
+          - text: "Causal Inference: The Mixtape"
+            file: textbook-replications/mixtape.qmd
+          - text: "The Effect"
+            file: textbook-replications/replicating-the-effect.qmd
+          - text: "Other Resources"
+            file: textbook-replications/resources.qmd
       - text: "Changelog"
         file: changelog.qmd
       - text: "Contributing"
@@ -170,7 +170,7 @@ quartodoc:
       contents:
         - estimation.formula.parse.Formula
         - estimation.formula.model_matrix.ModelMatrix
-        - estimation.formula.factor_interaction.factor_interaction
+        - estimation.formula.transforms.factor_interaction.factor_interaction
     - title: Misc / Utilities
       desc: |
         PyFixest internals and utilities

diff --git a/docs/_sidebar.yml b/docs/_sidebar.yml
@@ -37,7 +37,7 @@ website:
     - contents:
       - reference/estimation.formula.parse.Formula.qmd
       - reference/estimation.formula.model_matrix.ModelMatrix.qmd
-      - reference/estimation.formula.factor_interaction.factor_interaction.qmd
+      - reference/estimation.formula.transforms.factor_interaction.factor_interaction.qmd
       section: Formula Parsing & Model Matrix
     - contents:
       - reference/estimation.internals.demean_.demean.qmd

diff --git a/pyfixest/did/did2s.py b/pyfixest/did/did2s.py
@@ -1,5 +1,6 @@
 from typing import cast
 
+import formulaic
 import numpy as np
 import pandas as pd
 from scipy.sparse import csr_matrix
@@ -317,14 +318,16 @@ def _did2s_vcov(
     # fixed-effect levels). Removing `- 1` would cause formulaic to drop
     # reference levels, changing the GMM vcov standard errors.
     FML1 = Formula(
-        _second_stage=f"{yname} ~ {first_stage_fml.replace('~', '').strip()} - 1",
+        _formula=formulaic.Formula(
+            f"{yname} ~ {first_stage_fml.replace('~', '').strip()} - 1"
+        )
     )
     # Second stage: do NOT use `- 1`. Formulaic needs the intercept present
     # for full-rank encoding (dropping a reference level for factors like
     # i(treat)). The intercept column is then removed by drop_intercept=True
     # below, matching what feols does in _did2s_estimate.
     FML2 = Formula(
-        _second_stage=f"{yname} ~ {second_stage.replace('~', '').strip()}",
+        _formula=formulaic.Formula(f"{yname} ~ {second_stage.replace('~', '').strip()}")
     )
 
     mm_first_stage = model_matrix.create_model_matrix(

diff --git a/pyfixest/estimation/FixestMulti_.py b/pyfixest/estimation/FixestMulti_.py
@@ -238,7 +238,7 @@ def _prepare_estimation(
         self.FixestFormulaDict = formula_dictionary
         self._method = estimation
         self._is_iv = any(
-            formula.first_stage is not None
+            formula.is_instrumental_variable
             for _, formulas in formula_dictionary.items()
             for formula in formulas
         )

diff --git a/pyfixest/estimation/formula/__init__.py b/pyfixest/estimation/formula/__init__.py
@@ -1,7 +1,20 @@
+from collections.abc import Callable
 from typing import Final
 
 from formulaic.parser import DefaultFormulaParser
 
+from pyfixest.estimation.formula.transforms.factor_interaction import factor_interaction
+from pyfixest.estimation.formula.transforms.fixed_effects_encoding import (
+    encode_fixed_effects,
+)
+from pyfixest.estimation.formula.transforms.misc import log
+
 FORMULAIC_FEATURE_FLAG: Final[DefaultFormulaParser.FeatureFlags] = (
-    DefaultFormulaParser.FeatureFlags.DEFAULT
+    DefaultFormulaParser.FeatureFlags.ALL
 )
+
+FORMULAIC_TRANSFORMS: Final[dict[str, Callable]] = {
+    "i": factor_interaction,  # fixest::i()-style syntax
+    "__fixed_effect__": encode_fixed_effects,
+    "log": log,  # custom log settings infinite to nan
+}
diff --git a/pyfixest/estimation/formula/model_matrix.py b/pyfixest/estimation/formula/model_matrix.py
@@ -9,15 +9,9 @@
 from formulaic.parser import DefaultFormulaParser
 
 from pyfixest.core.detect_singletons import detect_singletons
-from pyfixest.estimation.formula import FORMULAIC_FEATURE_FLAG
-from pyfixest.estimation.formula.factor_interaction import factor_interaction
+from pyfixest.estimation.formula import FORMULAIC_FEATURE_FLAG, FORMULAIC_TRANSFORMS
 from pyfixest.estimation.formula.parse import Formula
-from pyfixest.estimation.formula.utils import (
-    _encode_fixed_effects,
-    _factorize,
-    _get_weights,
-    log,
-)
+from pyfixest.estimation.formula.utils import _get_weights
 from pyfixest.utils.utils import capture_context
 
 
@@ -327,12 +321,7 @@ def create_model_matrix(
         ensure_full_rank=ensure_full_rank,
         na_action="drop",
         output="pandas",
-        context={
-            "log": log,  # custom log settings infinite to nan
-            "i": factor_interaction,  # fixest::i()-style syntax
-            "__fixed_effect__": _factorize,
-        }
-        | {**capture_context(context)},
+        context=FORMULAIC_TRANSFORMS | {**capture_context(context)},
     )
     drop_rows: set[int] = set(range(n_observations)).difference(
         model_matrix[_ModelMatrixKey.main]["lhs"].index
@@ -352,12 +341,11 @@ def _get_formulaic_formula(
 ) -> formulaic.Formula:
     # Collate kwargs to be passed to formulaic.Formula
     formula_kwargs: dict[str, str] = {_ModelMatrixKey.main: formula.second_stage}
-    if formula.fixed_effects is not None:
-        fixed_effects_formula = _encode_fixed_effects(
-            fixed_effects=formula.fixed_effects, data=data
+    if formula.is_fixed_effects:
+        formula_kwargs.update(
+            {_ModelMatrixKey.fixed_effects: f"{formula.fixed_effects_wrapped} - 1"}
         )
-        formula_kwargs.update({_ModelMatrixKey.fixed_effects: fixed_effects_formula})
-    if formula.first_stage is not None:
+    if formula.is_instrumental_variable:
         formula_kwargs.update(
             {_ModelMatrixKey.instrumental_variable: formula.first_stage}
         )
@@ -366,6 +354,15 @@ def _get_formulaic_formula(
         formula_kwargs.update({_ModelMatrixKey.weights: f"{weights}-1"})
     formula_formulaic = formulaic.Formula(
         formula_kwargs,
-        _parser=DefaultFormulaParser(feature_flags=FORMULAIC_FEATURE_FLAG),
+        _parser=DefaultFormulaParser(
+            feature_flags=FORMULAIC_FEATURE_FLAG,
+            # When FEs are present, include_intercept=True so that spans_intercept=True
+            # terms (like i()) receive reduced_rank=True from formulaic, causing them to
+            # drop the first level (matching R/fixest). The intercept column is removed
+            # afterwards in ModelMatrix._process(). Without this, i() would receive
+            # reduced_rank=False and generate all levels; the post-hoc collinearity check
+            # would then drop the last level instead of the first, mismatching R.
+            include_intercept=formula.is_fixed_effects,
+        ),
     )
     return formula_formulaic