py-econometrics · schroedk · Aug 19, 2025 · Dec 26, 2025 · Dec 27, 2025 · Jan 2, 2026
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -14,10 +14,12 @@ ndarray = { version = "0.16.1", features = ["rayon"] }
 rayon   = "1.11.0"
 numpy = "0.26.0"
 thiserror = "2.0.16"
+smallvec = "1.13"
 
 [profile.release]
 opt-level = 3        # Maximize performance
 lto = "fat"          # Full link-time optimization
 codegen-units = 1    # Whole-program optimization
 panic = "abort"      # Smaller binary, no unwind support
 strip = true         # Remove symbol table
+debug = false        # No debug info in release
diff --git a/pyfixest/core/__init__.py b/pyfixest/core/__init__.py
@@ -1,11 +1,13 @@
 from .collinear import find_collinear_variables
 from .crv1 import crv1_meat_loop
 from .demean import demean
+from .detect_singletons import detect_singletons
 from .nested_fixed_effects import count_fixef_fully_nested_all
 
 __all__ = [
     "count_fixef_fully_nested_all",
     "crv1_meat_loop",
     "demean",
+    "detect_singletons",
     "find_collinear_variables",
 ]
diff --git a/pyfixest/core/_core_impl.pyi b/pyfixest/core/_core_impl.pyi
@@ -1,6 +1,15 @@
+from typing import TypedDict
+
 import numpy as np
 from numpy.typing import NDArray
 
+class DemeanResult(TypedDict):
+    """Result from the Rust demeaning function."""
+
+    demeaned: NDArray[np.float64]
+    fe_coefficients: NDArray[np.float64]
+    success: bool
+
 def _find_collinear_variables_rs(x: NDArray[np.float64], tol: float = 1e-10): ...
 def _crv1_meat_loop_rs(
     scores: NDArray[np.float64],
@@ -10,13 +19,15 @@ def _crv1_meat_loop_rs(
 def _demean_rs(
     x: NDArray[np.float64],
     flist: NDArray[np.uint64],
-    weights: NDArray[np.float64],
+    weights: NDArray[np.float64] | None = None,
     tol: float = 1e-08,
     maxiter: int = 100_000,
-) -> tuple[np.ndarray, bool]: ...
+    reorder_fe: bool = False,
+) -> DemeanResult: ...
 def _count_fixef_fully_nested_all_rs(
     all_fixef_array: NDArray,
     cluster_colnames: NDArray,
     cluster_data: NDArray[np.uint64],
     fe_data: NDArray[np.uint64],
 ) -> tuple[np.ndarray, int]: ...
+def _detect_singletons_rs(ids: NDArray[np.uint32]) -> NDArray[np.bool_]: ...
diff --git a/pyfixest/core/demean.py b/pyfixest/core/demean.py
@@ -10,6 +10,7 @@ def demean(
     weights: NDArray[np.float64],
     tol: float = 1e-08,
     maxiter: int = 100_000,
+    reorder_fe: bool = False,
 ) -> tuple[NDArray, bool]:
     """
     Demean an array.
@@ -30,6 +31,9 @@ def demean(
         Tolerance criterion for convergence. Defaults to 1e-08.
     maxiter : int, optional
         Maximum number of iterations. Defaults to 100_000.
+    reorder_fe : bool, optional
+        Whether to reorder fixed effects by size (largest first) before demeaning.
+        This can improve convergence for some datasets. Defaults to False.
 
     Returns
     -------
@@ -70,10 +74,16 @@ def demean(
     print(pf.feols(fml, data).coef())
     ```
     """
-    return _demean_rs(
+    # Check if weights are uniform (all equal) - use fast unweighted path
+    weights_f64 = weights.astype(np.float64, copy=False)
+    is_uniform = np.allclose(weights_f64, weights_f64.flat[0], atol=1e-10, rtol=0)
+
+    result = _demean_rs(
         x.astype(np.float64, copy=False),
         flist.astype(np.uint64, copy=False),
-        weights.astype(np.float64, copy=False),
+        None if is_uniform else weights_f64,
         tol,
         maxiter,
+        reorder_fe,
     )
+    return result["demeaned"], result["success"]
diff --git a/pyfixest/core/detect_singletons.py b/pyfixest/core/detect_singletons.py
@@ -0,0 +1,48 @@
+import numpy as np
+from numpy.typing import NDArray
+
+from pyfixest.core._core_impl import _detect_singletons_rs
+
+
+def detect_singletons(ids: NDArray[np.integer]) -> NDArray[np.bool_]:
+    """
+    Detect singleton fixed effects in a dataset.
+
+    This function iterates over the columns of a 2D numpy array representing
+    fixed effects to identify singleton fixed effects.
+    An observation is considered a singleton if it is the only one in its group
+    (fixed effect identifier).
+
+    Parameters
+    ----------
+    ids : np.ndarray
+        A 2D numpy array representing fixed effects, with a shape of (n_samples,
+        n_features).
+        Elements should be non-negative integers representing fixed effect identifiers.
+
+    Returns
+    -------
+    numpy.ndarray
+        A boolean array of shape (n_samples,), indicating which observations have
+        a singleton fixed effect.
+
+    Notes
+    -----
+    The algorithm iterates over columns to identify fixed effects. After each
+    column is processed, it updates the record of non-singleton rows. This approach
+    accounts for the possibility that removing an observation in one column can
+    lead to the emergence of new singletons in subsequent columns.
+
+    For performance reasons, the input array should be in column-major order.
+    Operating on a row-major array can lead to significant performance losses.
+    """
+    if not np.issubdtype(ids.dtype, np.integer):
+        raise TypeError("Fixed effects must be integers")
+
+    # Convert to uint32 F-contiguous array for optimal performance
+    # (matches numba implementation behavior)
+    # Using empty((m,n)).T gives F-order (n,m) layout
+    n, m = ids.shape
+    out: NDArray[np.uint32] = np.empty((m, n), dtype=np.uint32).T
+    out[:] = ids
+    return _detect_singletons_rs(out)
diff --git a/pyfixest/estimation/__init__.py b/pyfixest/estimation/__init__.py
@@ -1,3 +1,6 @@
+from pyfixest.core.detect_singletons import (
+    detect_singletons,
+)
 from pyfixest.estimation import literals
 from pyfixest.estimation.api import (
     feglm,
@@ -8,9 +11,6 @@
 from pyfixest.estimation.demean_ import (
     demean,
 )
-from pyfixest.estimation.detect_singletons_ import (
-    detect_singletons,
-)
 from pyfixest.estimation.fegaussian_ import Fegaussian
 from pyfixest.estimation.feiv_ import (
     Feiv,

diff --git a/pyfixest/estimation/backends.py b/pyfixest/estimation/backends.py
@@ -1,6 +1,6 @@
 from pyfixest.core.collinear import find_collinear_variables
 from pyfixest.core.crv1 import crv1_meat_loop
-from pyfixest.core.demean import demean
+from pyfixest.core.demean import demean as demean_rust
 from pyfixest.core.nested_fixed_effects import count_fixef_fully_nested_all
 from pyfixest.estimation.demean_ import demean as demean_nb
 from pyfixest.estimation.numba.find_collinear_variables_nb import (
@@ -53,7 +53,7 @@
         "nonnested": count_fixef_fully_nested_all_nb,
     },
     "rust": {
-        "demean": demean,
+        "demean": demean_rust,
         "collinear": find_collinear_variables,
         "crv1_meat": crv1_meat_loop,
         "nonnested": count_fixef_fully_nested_all,

diff --git a/pyfixest/estimation/demean_.py b/pyfixest/estimation/demean_.py
@@ -346,9 +346,9 @@ def _set_demeaner_backend(
         If the demeaning backend is not supported.
     """
     if demeaner_backend == "rust":
-        from pyfixest.core.demean import demean as demean_rs
+        from pyfixest.core.demean import demean as demean_rust
 
-        return demean_rs
+        return demean_rust
     elif demeaner_backend == "numba":
         return demean
     elif demeaner_backend == "jax":

diff --git a/pyfixest/estimation/feols_.py b/pyfixest/estimation/feols_.py
@@ -1,5 +1,4 @@
 import functools
-import gc
 import re
 import warnings
 from collections.abc import Mapping
@@ -1140,7 +1139,8 @@ def _clear_attributes(self):
         for attr in attributes:
             if hasattr(self, attr):
                 delattr(self, attr)
-        gc.collect()
+        # Note: gc.collect() was removed here as it added ~50ms overhead per call
+        # and Python's automatic GC is sufficient for most use cases
 
     def wald_test(self, R=None, q=None, distribution="F"):
         """

diff --git a/pyfixest/estimation/model_matrix_fixest_.py b/pyfixest/estimation/model_matrix_fixest_.py
@@ -7,7 +7,7 @@
 import pandas as pd
 from formulaic import Formula
 
-from pyfixest.estimation.detect_singletons_ import detect_singletons
+from pyfixest.core.detect_singletons import detect_singletons
 from pyfixest.estimation.FormulaParser import FixestFormula
 from pyfixest.utils.utils import capture_context
 
@@ -153,14 +153,16 @@ def model_matrix_fixest(
     if weights is not None:
         weights_df = mm["weights"]
 
-    # drop infinite values
-    inf_idx_list = []
+    # drop infinite values - use numpy for speed (df.isin is very slow)
+    inf_mask = np.zeros(Y.shape[0], dtype=bool)
     for df in [Y, X, Z, endogvar, weights_df]:
         if df is not None:
-            inf_idx = np.where(df.isin([np.inf, -np.inf]).any(axis=1))[0].tolist()
-            inf_idx_list.extend(inf_idx)
+            arr = df.to_numpy()
+            # Check for inf values: ~np.isfinite catches both inf and nan,
+            # but we only want inf, so use explicit check
+            inf_mask |= np.isinf(arr).any(axis=1)
 
-    inf_idx = list(set(inf_idx_list))
+    inf_idx = np.where(inf_mask)[0]
     if len(inf_idx) > 0:
         warnings.warn(
             f"{len(inf_idx)} rows with infinite values detected. These rows are dropped from the model."
@@ -560,24 +562,24 @@ def _is_finite_positive(x: Union[pd.DataFrame, pd.Series, np.ndarray]) -> bool:
     return bool((x[~np.isnan(x)] > 0).all())
 
 
-def factorize(fe: pd.DataFrame) -> pd.DataFrame:
+def factorize(fe: pd.Series) -> pd.Series:
     """
     Factorize / Convert fixed effects into integers.
 
     Parameters
     ----------
-    - fe: A DataFrame of fixed effects.
+    - fe: A Series of fixed effects (single column).
 
     Returns
     -------
-    - A DataFrame of fixed effects where each unique value is replaced by an integer.
+    - A Series of fixed effects where each unique value is replaced by an integer.
       NaNs are not removed but set to -1.
     """
-    if fe.dtype != "category":
-        fe = fe.astype("category")
-    res = fe.cat.codes
-    res[res == -1] = np.nan
-    return res
+    codes, _ = pd.factorize(fe)
+    # pd.factorize returns -1 for NaN, convert to actual NaN
+    result = codes.astype(float)
+    result[codes == -1] = np.nan
+    return pd.Series(result, index=fe.index)
 
 
 def wrap_factorize(pattern: str) -> str: