py-econometrics · s3alfisc · Feb 7, 2026 · Feb 7, 2026 · Feb 7, 2026 · Feb 7, 2026
diff --git a/docs/acknowledgements.md b/docs/acknowledgements.md
@@ -109,6 +109,7 @@ standard errors, t-statistics, p-values, confidence intervals, etc for OLS, IV,
 |---|---|---|
 | [**lfe**](https://cran.r-project.org/web/packages/lfe/vignettes/lfehow.pdf) | R | We based our first implementation of the MAP algorithm on the description in the "how lfe works" vignette. |
 | [**pyhdfe**](https://github.com/jeffgortmaker/pyhdfe) | Python | PyFixest's demeaning results are tested against Jeff Gortmaker's `pyhdfe`. `pyfixest`'s first MVP was built using `pyhdfe` it ran its demeaning algorithm via `pyhdfe` MAP algo. |
+| [**FixedEffects.jl**](https://github.com/FixedEffects/FixedEffects.jl) | Julia | Matthieu Gomez's Julia package for high-dimensional fixed effects. PyFixest's variance-ratio collinearity check for LSMR backends (`collin_tol_var`) and the default LSMR convergence tolerance (`1e-6`) are informed by the defaults and approach in FixedEffects.jl. |
 
 ---
 

diff --git a/pyfixest/estimation/FixestMulti_.py b/pyfixest/estimation/FixestMulti_.py
@@ -247,6 +247,7 @@ def _estimate_all_models(
         vcov_kwargs: Optional[dict[str, Any]] = None,
         demeaner_backend: DemeanerBackendOptions = "numba",
         collin_tol: float = 1e-6,
+        collin_tol_var: Optional[float] = None,
         iwls_maxiter: int = 25,
         iwls_tol: float = 1e-08,
         separation_check: Optional[list[str]] = None,
@@ -272,6 +273,9 @@ def _estimate_all_models(
             Defaults to "numba".
         collin_tol : float, optional
             The tolerance level for the multicollinearity check. Default is 1e-6.
+        collin_tol_var : float, optional
+            Tolerance for the variance ratio collinearity check. Default is None
+            (auto-enable for LSMR backends with threshold 1e-6).
         iwls_maxiter : int, optional
             The maximum number of iterations for the IWLS algorithm. Default is 25.
             Only relevant for non-linear estimation strategies.
@@ -350,6 +354,7 @@ def _estimate_all_models(
                         model_kwargs.update(
                             {
                                 "demeaner_backend": demeaner_backend,
+                                "collin_tol_var": collin_tol_var,
                             }
                         )
 

diff --git a/pyfixest/estimation/api/feglm.py b/pyfixest/estimation/api/feglm.py
@@ -25,10 +25,11 @@ def feglm(
     ssc: Optional[dict[str, Union[str, bool]]] = None,
     fixef_rm: FixedRmOptions = "singleton",
     fixef_tol: float = 1e-06,
-    fixef_maxiter: int = 100_000,
+    fixef_maxiter: int = 10_000,
     iwls_tol: float = 1e-08,
     iwls_maxiter: int = 25,
     collin_tol: float = 1e-09,
+    collin_tol_var: Optional[float] = None,
     separation_check: Optional[list[str]] = None,
     solver: SolverOptions = "scipy.linalg.solve",
     demeaner_backend: DemeanerBackendOptions = "numba",
@@ -107,7 +108,8 @@ def feglm(
 
     fixef_tol: float, optional
         Tolerance for the fixed effects demeaning algorithm. Defaults to 1e-06.
-        Currently does not do anything, as fixed effects are not supported for GLMs.
+        For LSMR-based backends (cupy, cupy32, cupy64, scipy), the tolerance is
+        passed directly as LSMR's atol and btol parameters.
 
     fixef_maxiter: int, optional
          Maximum iterations for the demeaning algorithm.
@@ -122,6 +124,12 @@ def feglm(
     collin_tol : float, optional
         Tolerance for collinearity check, by default 1e-10.
 
+    collin_tol_var : float, optional
+        Tolerance for the variance ratio collinearity check.
+        Default is None: auto-enabled with threshold 1e-6 for LSMR
+        backends (cupy, cupy32, cupy64, scipy), disabled for
+        MAP backends (numba, rust, jax). Set to 0 to disable explicitly.
+
     separation_check: list[str], optional
         Methods to identify and drop separated observations.
         Either "fe" or "ir". Executes "fe" by default (when None).
@@ -323,6 +331,7 @@ class [FixestMulti](/reference/estimation.FixestMulti_.FixestMulti.qmd) for mult
         separation_check=separation_check,
         demeaner_backend=demeaner_backend,
         accelerate=accelerate,
+        collin_tol_var=collin_tol_var,
     )
 
     if fixest._is_multiple_estimation:

diff --git a/pyfixest/estimation/api/feols.py b/pyfixest/estimation/api/feols.py
@@ -27,6 +27,7 @@ def feols(
     fixef_tol=1e-06,
     fixef_maxiter: int = 10_000,
     collin_tol: float = 1e-09,
+    collin_tol_var: Optional[float] = None,
     drop_intercept: bool = False,
     copy_data: bool = True,
     store_data: bool = True,
@@ -89,8 +90,16 @@ def feols(
     collin_tol : float, optional
         Tolerance for collinearity check, by default 1e-10.
 
+    collin_tol_var : float, optional
+        Tolerance for the variance ratio collinearity check.
+        Default is None: auto-enabled with threshold 1e-6 for LSMR
+        backends (cupy, cupy32, cupy64, scipy), disabled for
+        MAP backends (numba, rust, jax). Set to 0 to disable explicitly.
+
     fixef_tol: float, optional
         Tolerance for the fixed effects demeaning algorithm. Defaults to 1e-06.
+        For LSMR-based backends (cupy, cupy32, cupy64, scipy), the tolerance is
+        passed directly as LSMR's atol and btol parameters.
 
     fixef_maxiter: int, optional
          Maximum number of iterations for the demeaning algorithm. Defaults to 100,000.
@@ -514,6 +523,7 @@ def _lspline(series: pd.Series, knots: list[float]) -> np.array:
         vcov_kwargs=vcov_kwargs,
         collin_tol=collin_tol,
         demeaner_backend=demeaner_backend,
+        collin_tol_var=collin_tol_var,
     )
 
     if fixest._is_multiple_estimation:

diff --git a/pyfixest/estimation/api/fepois.py b/pyfixest/estimation/api/fepois.py
@@ -31,6 +31,7 @@ def fepois(
     iwls_tol: float = 1e-08,
     iwls_maxiter: int = 25,
     collin_tol: float = 1e-09,
+    collin_tol_var: Optional[float] = None,
     separation_check: Optional[list[str]] = None,
     solver: SolverOptions = "scipy.linalg.solve",
     demeaner_backend: DemeanerBackendOptions = "numba",
@@ -99,6 +100,8 @@ def fepois(
 
     fixef_tol: float, optional
         Tolerance for the fixed effects demeaning algorithm. Defaults to 1e-06.
+        For LSMR-based backends (cupy, cupy32, cupy64, scipy), the tolerance is
+        passed directly as LSMR's atol and btol parameters.
 
     fixef_maxiter: int, optional
          Maximum number of iterations for the demeaning algorithm. Defaults to 100,000.
@@ -112,6 +115,12 @@ def fepois(
     collin_tol : float, optional
         Tolerance for collinearity check, by default 1e-10.
 
+    collin_tol_var : float, optional
+        Tolerance for the variance ratio collinearity check.
+        Default is None: auto-enabled with threshold 1e-6 for LSMR
+        backends (cupy, cupy32, cupy64, scipy), disabled for
+        MAP backends (numba, rust, jax). Set to 0 to disable explicitly.
+
     separation_check: list[str], optional
         Methods to identify and drop separated observations.
         Either "fe" or "ir". Executes "fe" by default (when None).
@@ -267,6 +276,7 @@ def fepois(
         collin_tol=collin_tol,
         separation_check=separation_check,
         demeaner_backend=demeaner_backend,
+        collin_tol_var=collin_tol_var,
     )
 
     if fixest._is_multiple_estimation:

diff --git a/pyfixest/estimation/api/quantreg.py b/pyfixest/estimation/api/quantreg.py
@@ -196,7 +196,7 @@ def quantreg(
 
     fixef_rm = "none"
     fixef_tol = 1e-06
-    fixef_maxiter = 100_000
+    fixef_maxiter = 10_000
     iwls_tol = 1e-08
     iwls_maxiter = 25
 

diff --git a/pyfixest/estimation/collinearity.py b/pyfixest/estimation/collinearity.py
@@ -0,0 +1,216 @@
+"""Multicollinearity detection utilities."""
+
+import warnings
+from typing import Callable, Optional
+
+import numpy as np
+
+
+def _drop_multicollinear_variables_chol(
+    X_demeaned: np.ndarray,
+    coefnames: list[str],
+    collin_tol: float,
+    backend_func: Callable,
+) -> tuple[np.ndarray, list[str], list[str], list[int]]:
+    """
+    Check for multicollinearity in the design matrices X and Z.
+
+    Parameters
+    ----------
+    X_demeaned : numpy.ndarray
+        A demeaned matrix.
+    coefnames : list[str]
+        The names of the coefficients.
+    collin_tol : float
+        The tolerance level for the multicollinearity check.
+    backend_func: Callable
+        Which backend function to use for the multicollinearity check.
+
+    Returns
+    -------
+    X_demeaned : numpy.ndarray
+        X_demeaned excluding multicollinear variables.
+    coefnames : list[str]
+        The names of the coefficients, excluding those identified as collinear.
+    collin_vars : list[str]
+        The collinear variables identified during the check.
+    collin_index : numpy.ndarray
+        Logical array, where True indicates that the variable is collinear.
+    """
+    # TODO: avoid doing this computation twice, e.g. compute tXXinv here as fixest does
+
+    tXX = X_demeaned.T @ X_demeaned
+    id_excl, n_excl, all_removed = backend_func(tXX, collin_tol)
+
+    collin_vars = []
+    collin_index = []
+
+    if all_removed:
+        raise ValueError(
+            """
+            All variables are collinear. Maybe your model specification introduces multicollinearity? If not, please reach out to the package authors!.
+            """
+        )
+
+    names_array = np.array(coefnames)
+    if n_excl > 0:
+        collin_vars = names_array[id_excl].tolist()
+        if len(collin_vars) > 5:
+            indent = "    "
+            formatted_collinear_vars = (
+                f"\n{indent}" + f"\n{indent}".join(collin_vars[:5]) + f"\n{indent}..."
+            )
+        else:
+            formatted_collinear_vars = str(collin_vars)
+
+        warnings.warn(
+            f"""
+            {len(collin_vars)} variables dropped due to multicollinearity.
+            The following variables are dropped: {formatted_collinear_vars}.
+            """
+        )
+
+        X_demeaned = np.delete(X_demeaned, id_excl, axis=1)
+        if X_demeaned.ndim == 2 and X_demeaned.shape[1] == 0:
+            raise ValueError(
+                """
+                All variables are collinear. Please check your model specification.
+                """
+            )
+
+        names_array = np.delete(names_array, id_excl)
+        collin_index = id_excl.tolist()
+
+    return X_demeaned, list(names_array), collin_vars, collin_index
+
+
+def _drop_multicollinear_variables_var(
+    X_demeaned: np.ndarray,
+    coefnames: list[str],
+    X_raw_sumsq: Optional[np.ndarray],
+    collin_tol_var: float,
+) -> tuple[np.ndarray, list[str], list[str], list[int]]:
+    """
+    Detect variables absorbed by fixed effects via variance ratio.
+
+    Computes rho_i = ||x_tilde_i||^2 / ||x_i||^2 for each column.
+    Columns with rho_i < collin_tol_var are flagged as absorbed.
+
+    Parameters
+    ----------
+    X_demeaned : numpy.ndarray
+        The demeaned design matrix.
+    coefnames : list[str]
+        The names of the coefficients.
+    X_raw_sumsq : numpy.ndarray or None
+        Squared column norms of X before demeaning.
+    collin_tol_var : float
+        Tolerance for the variance ratio check.
+
+    Returns
+    -------
+    X_demeaned : numpy.ndarray
+        The design matrix after removing absorbed variables.
+    coefnames : list[str]
+        Coefficient names after removing absorbed variables.
+    collin_vars : list[str]
+        Names of absorbed variables.
+    collin_index : list[int]
+        Indices of absorbed variables.
+    """
+    if X_raw_sumsq is None or X_demeaned.shape[1] == 0:
+        return X_demeaned, coefnames, [], []
+
+    demeaned_norms = (X_demeaned**2).sum(axis=0)
+    ratios = demeaned_norms / X_raw_sumsq
+    absorbed_mask = ratios < collin_tol_var
+    if not absorbed_mask.any():
+        return X_demeaned, coefnames, [], []
+
+    collin_index = np.where(absorbed_mask)[0]
+    names_array = np.array(coefnames)
+    collin_vars = names_array[collin_index].tolist()
+
+    warnings.warn(
+        f"""
+        {len(collin_vars)} variables dropped (absorbed by fixed effects).
+        The following variables are dropped: {collin_vars}.
+        """
+    )
+
+    X_demeaned = np.delete(X_demeaned, collin_index, axis=1)
+    coefnames = np.delete(names_array, collin_index).tolist()
+
+    return X_demeaned, coefnames, collin_vars, collin_index.tolist()
+
+
+def drop_multicollinear_variables(
+    X_demeaned: np.ndarray,
+    coefnames: list[str],
+    collin_tol: float,
+    backend_func: Callable,
+    X_raw_sumsq: Optional[np.ndarray],
+    collin_tol_var: float,
+    has_fixef: bool,
+) -> tuple[np.ndarray, list[str], list[str], list[int]]:
+    """
+    Run Cholesky + variance ratio collinearity checks.
+
+    Parameters
+    ----------
+    X_demeaned : numpy.ndarray
+        The demeaned design matrix.
+    coefnames : list[str]
+        The names of the coefficients.
+    collin_tol : float
+        Tolerance for the Cholesky multicollinearity check.
+    backend_func : Callable
+        Backend function for the Cholesky check.
+    X_raw_sumsq : numpy.ndarray or None
+        Squared column norms of X before demeaning.
+    collin_tol_var : float
+        Tolerance for the variance ratio check.
+    has_fixef : bool
+        Whether the model has fixed effects.
+
+    Returns
+    -------
+    X_demeaned : numpy.ndarray
+        The design matrix after removing collinear variables.
+    coefnames : list[str]
+        Coefficient names after removing collinear variables.
+    collin_vars : list[str]
+        Names of all removed variables.
+    collin_index : list[int]
+        Indices of removed variables (relative to the original input columns).
+    """
+    N = X_demeaned.shape[1]
+    collin_vars = []
+    collin_index = []
+
+    if N > 0:
+        (X_demeaned, coefnames, chol_vars, chol_idx) = (
+            _drop_multicollinear_variables_chol(
+                X_demeaned, coefnames, collin_tol, backend_func
+            )
+        )
+        collin_vars.extend(chol_vars)
+        collin_index.extend(chol_idx)
+
+    if (
+        has_fixef
+        and collin_tol_var > 0
+        and X_raw_sumsq is not None
+        and X_demeaned.shape[1] > 0
+    ):
+        if chol_idx:
+            X_raw_sumsq = np.delete(X_raw_sumsq, chol_idx)
+        (X_demeaned, coefnames, var_vars, var_idx) = _drop_multicollinear_variables_var(
+            X_demeaned, coefnames, X_raw_sumsq, collin_tol_var
+        )
+        collin_vars.extend(var_vars)
+        if var_idx:
+            remaining = np.delete(np.arange(N), chol_idx)
+            collin_index.extend(remaining[var_idx].tolist())
+
+    return X_demeaned, coefnames, collin_vars, collin_index