From fe12c0ac834bdd79eb619ac911e0bde62429d329 Mon Sep 17 00:00:00 2001
From: Alexander Fischer <alexander-fischer1801@t-online.de>
Date: Sat, 28 Mar 2026 11:09:47 +0100
Subject: [PATCH 01/10] cache within proposal

---
 .../execute-results/html.json                 |  16 ++
 docs/_quarto.yml                              |   2 +
 docs/how-to/fixed-effects-solvers.qmd         | 237 ++++++++++++++++++
 docs/how-to/index.qmd                         |   1 +
 4 files changed, 256 insertions(+)
 create mode 100644 docs/_freeze/how-to/fixed-effects-solvers/execute-results/html.json
 create mode 100644 docs/how-to/fixed-effects-solvers.qmd
diff --git a/docs/_freeze/how-to/fixed-effects-solvers/execute-results/html.json b/docs/_freeze/how-to/fixed-effects-solvers/execute-results/html.json
new file mode 100644
index 000000000..4845c7fca
--- /dev/null
+++ b/docs/_freeze/how-to/fixed-effects-solvers/execute-results/html.json
@@ -0,0 +1,16 @@
+{
+  "hash": "2e454435a33d9f6487d50ede3e986926",
+  "result": {
+    "engine": "jupyter",
+    "markdown": "---\ntitle: \"Choosing and Reusing Fixed-Effects Solvers\"\ndescription: \"How to configure fixed-effects demeaners, reuse within solvers and preconditioners, and persist reusable preconditioners across sessions.\"\ncategories: [Fixed Effects, Performance]\n---\n\n::: {#661c1809 .cell execution_count=1}\n``` {.python .cell-code}\nimport pickle\nimport tempfile\nfrom pathlib import Path\n\nimport numpy as np\nimport pandas as pd\n\nimport pyfixest as pf\n```\n:::\n\n\n`PyFixest` now accepts object-based demeaner configurations. This makes it easier to choose a backend, tune solver settings, and pass reusable solver objects between regressions.\n\nIf you are deciding whether a fixed-effects problem is likely to be easy or hard for alternating-projections methods, see [When Are Fixed Effects Problems Difficult?](../explanation/difficult-fixed-effects.md).\n\n## Choosing a Demeaner\n\nThe main fixed-effects options are:\n\n| Demeaner | Main idea | Typical use |\n|---|---|---|\n| `pf.NumbaDemeaner(...)` | CPU alternating projections | good default on CPU |\n| `pf.WithinDemeaner(...)` | Krylov solver with Schwarz preconditioning via `within` | hard FE problems and reuse workflows |\n| `pf.ScipyDemeaner(...)` | SciPy sparse iterative solver | CPU sparse linear algebra workflows |\n| `pf.CupyDemeaner(...)` / `pf.JaxDemeaner(...)` | GPU-based demeaning | large GPU workloads |\n\nThe `within` backend is the only one that currently exposes reusable `solver` and `preconditioner` objects.\n\n## Example Data\n\n::: {#08c7114a .cell execution_count=2}\n``` {.python .cell-code}\npanel = pf.get_worker_panel(N_workers=300, N_firms=40, N_years=8, seed=42)\n\nrng = np.random.default_rng(123)\npanel[\"log_bonus\"] = (\n    0.4 * panel[\"worker_fe\"]\n    - 0.2 * panel[\"firm_fe\"]\n    + 0.01 * panel[\"experience\"]\n    + 0.02 * panel[\"tenure\"]\n    + rng.normal(0, 0.2, size=len(panel))\n)\n\nfml_y1 = \"log_wage ~ experience + tenure | worker_id + firm_id\"\nfml_y2 = \"log_bonus ~ experience + tenure | worker_id + firm_id\"\npanel.head()\n```\n\n::: {.cell-output .cell-output-display execution_count=2}\n```{=html}\n<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>worker_id</th>\n      <th>firm_id</th>\n      <th>year</th>\n      <th>female</th>\n      <th>experience</th>\n      <th>tenure</th>\n      <th>log_wage</th>\n      <th>worker_fe</th>\n      <th>firm_fe</th>\n      <th>log_bonus</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>0</td>\n      <td>16</td>\n      <td>2000</td>\n      <td>0</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0.028921</td>\n      <td>0.152359</td>\n      <td>0.019964</td>\n      <td>-0.110874</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n      <td>34</td>\n      <td>2000</td>\n      <td>1</td>\n      <td>0</td>\n      <td>1</td>\n      <td>-0.526312</td>\n      <td>-0.519992</td>\n      <td>0.009835</td>\n      <td>-0.263521</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>2</td>\n      <td>20</td>\n      <td>2000</td>\n      <td>1</td>\n      <td>3</td>\n      <td>1</td>\n      <td>0.515641</td>\n      <td>0.375226</td>\n      <td>0.234705</td>\n      <td>0.410734</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>3</td>\n      <td>13</td>\n      <td>2000</td>\n      <td>1</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0.528749</td>\n      <td>0.470282</td>\n      <td>0.131698</td>\n      <td>0.220568</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>4</td>\n      <td>31</td>\n      <td>2000</td>\n      <td>0</td>\n      <td>3</td>\n      <td>1</td>\n      <td>-0.337822</td>\n      <td>-0.975518</td>\n      <td>0.191260</td>\n      <td>-0.194413</td>\n    </tr>\n  </tbody>\n</table>\n</div>\n```\n:::\n:::\n\n\n## Pass Demeaners as Dataclasses\n\nInstead of string shorthands, you can pass a configured demeaner directly.\n\n::: {#97c68471 .cell execution_count=3}\n``` {.python .cell-code}\nfit_numba = pf.feols(\n    fml_y1,\n    data=panel,\n    demeaner=pf.NumbaDemeaner(fixef_maxiter=10_000),\n)\n\nfit_within = pf.feols(\n    fml_y1,\n    data=panel,\n    demeaner=pf.WithinDemeaner(\n        krylov_method=\"gmres\",\n        gmres_restart=20,\n        preconditioner_type=\"multiplicative\",\n    ),\n)\n\npd.concat(\n    {\n        \"numba\": fit_numba.coef(),\n        \"within\": fit_within.coef(),\n    },\n    axis=1,\n)\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n\n        <div id=\"Yb5WqZ\"></div>\n        <script type=\"text/javascript\" data-lets-plot-script=\"library\">\n            if(!window.letsPlotCallQueue) {\n                window.letsPlotCallQueue = [];\n            };\n            window.letsPlotCall = function(f) {\n                window.letsPlotCallQueue.push(f);\n            };\n            (function() {\n                var script = document.createElement(\"script\");\n                script.type = \"text/javascript\";\n                script.src = \"https://cdn.jsdelivr.net/gh/JetBrains/lets-plot@v4.8.2/js-package/distr/lets-plot.min.js\";\n                script.onload = function() {\n                    window.letsPlotCall = function(f) {f();};\n                    window.letsPlotCallQueue.forEach(function(f) {f();});\n                    window.letsPlotCallQueue = [];\n                    \n                };\n                script.onerror = function(event) {\n                    window.letsPlotCall = function(f) {};    // noop\n                    window.letsPlotCallQueue = [];\n                    var div = document.createElement(\"div\");\n                    div.style.color = 'darkred';\n                    div.textContent = 'Error loading Lets-Plot JS';\n                    document.getElementById(\"Yb5WqZ\").appendChild(div);\n                };\n                var e = document.getElementById(\"Yb5WqZ\");\n                e.appendChild(script);\n            })()\n        </script>\n        \n```\n:::\n\n::: {.cell-output .cell-output-display}\n```{=html}\n\n        <div id=\"BsRUIo\"></div>\n        <script type=\"text/javascript\" data-lets-plot-script=\"library\">\n            if(!window.letsPlotCallQueue) {\n                window.letsPlotCallQueue = [];\n            };\n            window.letsPlotCall = function(f) {\n                window.letsPlotCallQueue.push(f);\n            };\n            (function() {\n                var script = document.createElement(\"script\");\n                script.type = \"text/javascript\";\n                script.src = \"https://cdn.jsdelivr.net/gh/JetBrains/lets-plot@v4.8.2/js-package/distr/lets-plot.min.js\";\n                script.onload = function() {\n                    window.letsPlotCall = function(f) {f();};\n                    window.letsPlotCallQueue.forEach(function(f) {f();});\n                    window.letsPlotCallQueue = [];\n                    \n                };\n                script.onerror = function(event) {\n                    window.letsPlotCall = function(f) {};    // noop\n                    window.letsPlotCallQueue = [];\n                    var div = document.createElement(\"div\");\n                    div.style.color = 'darkred';\n                    div.textContent = 'Error loading Lets-Plot JS';\n                    document.getElementById(\"BsRUIo\").appendChild(div);\n                };\n                var e = document.getElementById(\"BsRUIo\");\n                e.appendChild(script);\n            })()\n        </script>\n        \n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=3}\n```{=html}\n<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>numba</th>\n      <th>within</th>\n    </tr>\n    <tr>\n      <th>Coefficient</th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>experience</th>\n      <td>0.018400</td>\n      <td>0.018400</td>\n    </tr>\n    <tr>\n      <th>tenure</th>\n      <td>0.016517</td>\n      <td>0.016517</td>\n    </tr>\n  </tbody>\n</table>\n</div>\n```\n:::\n:::\n\n\n## Reuse a Solver Within the Same Session\n\nFor in-process reuse, prefer a `solver`. This is the fast path: the expensive setup is done once, and later solves borrow the stored solver directly.\n\nYou can reuse a solver exposed by a fitted model:\n\n::: {#9d0f1071 .cell execution_count=4}\n``` {.python .cell-code}\nfit1 = pf.feols(\n    fml_y1,\n    data=panel,\n    demeaner=pf.WithinDemeaner(),\n)\n\nfit2 = pf.feols(\n    fml_y2,\n    data=panel,\n    demeaner=pf.WithinDemeaner(solver=fit1.solver_),\n)\n\nfit2.coef()\n```\n\n::: {.cell-output .cell-output-display execution_count=4}\n```\nCoefficient\nexperience    0.011953\ntenure        0.016365\nName: Estimate, dtype: float64\n```\n:::\n:::\n\n\nYou can also build a reusable solver explicitly:\n\n::: {#6e60c279 .cell execution_count=5}\n``` {.python .cell-code}\nsolver = pf.get_solver(\n    fml_y1,\n    data=panel,\n    demeaner=pf.WithinDemeaner(preconditioner_type=\"additive\"),\n)\n\nfit3 = pf.feols(\n    fml_y2,\n    data=panel,\n    demeaner=pf.WithinDemeaner(solver=solver),\n)\n\nfit3.coef()\n```\n\n::: {.cell-output .cell-output-display execution_count=5}\n```\nCoefficient\nexperience    0.011953\ntenure        0.016365\nName: Estimate, dtype: float64\n```\n:::\n:::\n\n\nUse a solver when the sample, weights, and fixed-effects encoding are the same. Solver reuse is intentionally strict.\n\n## Reuse a Preconditioner Across Regressions\n\nIf you want a more portable object, extract a `preconditioner` from a solver and pass that into a later regression:\n\n::: {#8a613a85 .cell execution_count=6}\n``` {.python .cell-code}\npreconditioner_from_fit = fit1.solver_.to_preconditioner()\n\nfit4 = pf.feols(\n    fml_y2,\n    data=panel,\n    demeaner=pf.WithinDemeaner(preconditioner=preconditioner_from_fit),\n)\n\nfit4.coef()\n```\n\n::: {.cell-output .cell-output-display execution_count=6}\n```\nCoefficient\nexperience    0.011953\ntenure        0.016365\nName: Estimate, dtype: float64\n```\n:::\n:::\n\n\nCompared with a solver, a preconditioner is a lower-level object. It only affects convergence speed, not the target problem itself.\n\n## Build a Preconditioner Explicitly\n\nYou can also construct a reusable preconditioner directly:\n\n::: {#39988af0 .cell execution_count=7}\n``` {.python .cell-code}\npreconditioner = pf.get_preconditioner(\n    fml_y1,\n    data=panel,\n    demeaner=pf.WithinDemeaner(preconditioner_type=\"additive\"),\n)\n\nfit5 = pf.feols(\n    fml_y2,\n    data=panel,\n    demeaner=pf.WithinDemeaner(preconditioner=preconditioner),\n)\n\nfit5.coef()\n```\n\n::: {.cell-output .cell-output-display execution_count=7}\n```\nCoefficient\nexperience    0.011953\ntenure        0.016365\nName: Estimate, dtype: float64\n```\n:::\n:::\n\n\nThis is the preferred API when you want a reusable object for persistence or for advanced reuse workflows.\n\n## Persist a Preconditioner Across Sessions\n\nPreconditioners are pickleable, so you can save them and reload them in a later session:\n\n::: {#e1356e5c .cell execution_count=8}\n``` {.python .cell-code}\nwith tempfile.TemporaryDirectory() as tmpdir:\n    path = Path(tmpdir) / \"within_preconditioner.pkl\"\n\n    with path.open(\"wb\") as file:\n        pickle.dump(preconditioner, file)\n\n    with path.open(\"rb\") as file:\n        restored_preconditioner = pickle.load(file)\n\n    print(path)\n\nfit6 = pf.feols(\n    fml_y2,\n    data=panel,\n    demeaner=pf.WithinDemeaner(preconditioner=restored_preconditioner),\n)\n\nfit6.coef()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n/var/folders/98/c353q4p95v5_fz62gr5c9td80000gn/T/tmpa3kwwj68/within_preconditioner.pkl\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=8}\n```\nCoefficient\nexperience    0.011953\ntenure        0.016365\nName: Estimate, dtype: float64\n```\n:::\n:::\n\n\nIn practice, replace the temporary file with a durable path.\n\n## Which Reuse Object Should I Use?\n\n- Use `solver` for repeated estimation in the same Python session.\n- Use `preconditioner` when you want a portable, pickleable object.\n- Use `fit.solver_.to_preconditioner()` when you want to turn a fitted model's solver into a persistence artifact.\n- Use `pf.get_preconditioner()` when you want to build the persistence artifact explicitly.\n\nReusable solvers are not supported for `feglm()` and `fepois()`, because those estimators update observation weights across iterations. Reusable preconditioners can still be used there.\n\n",
+    "supporting": [
+      "fixed-effects-solvers_files"
+    ],
+    "filters": [],
+    "includes": {
+      "include-in-header": [
+        "<script src=\"https://cdn.jsdelivr.net/npm/requirejs@2.3.6/require.min.js\" integrity=\"sha384-c9c+LnTbwQ3aujuU7ULEPVvgLs+Fn6fJUvIGTsuu1ZcCf11fiEubah0ttpca4ntM sha384-6V1/AdqZRWk1KAlWbKBlGhN7VG4iE/yAZcO6NZPMF8od0vukrvr0tg4qY6NSrItx\" crossorigin=\"anonymous\"></script>\n<script src=\"https://cdn.jsdelivr.net/npm/jquery@3.5.1/dist/jquery.min.js\" integrity=\"sha384-ZvpUoO/+PpLXR1lu4jmpXWu80pZlYUAfxl5NsBMWOEPSjUn/6Z/hRTt8+pR6L4N2\" crossorigin=\"anonymous\" data-relocate-top=\"true\"></script>\n<script type=\"application/javascript\">define('jquery', [],function() {return window.jQuery;})</script>\n"
+      ]
+    }
+  }
+}
diff --git a/docs/_quarto.yml b/docs/_quarto.yml
index 3fb06f98d..fc4cb0720 100644
--- a/docs/_quarto.yml
+++ b/docs/_quarto.yml
@@ -72,6 +72,8 @@ website:
         - text: "All Guides"
           file: how-to/index.qmd
         - text: "---"
+        - text: "Choosing and Reusing Fixed-Effects Solvers"
+          file: how-to/fixed-effects-solvers.qmd
         - text: "Marginal Effects & Hypothesis Testing"
           file: how-to/marginaleffects.qmd
         - text: "Regression Decomposition"
diff --git a/docs/how-to/fixed-effects-solvers.qmd b/docs/how-to/fixed-effects-solvers.qmd
new file mode 100644
index 000000000..cec4a55a8
--- /dev/null
+++ b/docs/how-to/fixed-effects-solvers.qmd
@@ -0,0 +1,237 @@
+---
+title: "Choosing and Reusing Fixed-Effects Solvers"
+description: "How to configure fixed-effects demeaners, reuse within solvers and preconditioners, and persist reusable preconditioners across sessions."
+categories: [Fixed Effects, Performance]
+---
+
+```{python}
+import pickle
+import tempfile
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+
+import pyfixest as pf
+```
+
+`PyFixest` now accepts object-based demeaner configurations. This makes it easier to choose a backend, tune solver settings, and pass reusable solver objects between regressions.
+
+If you are deciding whether a fixed-effects problem is likely to be easy or hard for alternating-projections methods, see [When Are Fixed Effects Problems Difficult?](../explanation/difficult-fixed-effects.md).
+
+## Choosing a Demeaner
+
+The main fixed-effects options are:
+
+| Demeaner | Main idea | Typical use |
+|---|---|---|
+| `pf.NumbaDemeaner(...)` | CPU alternating projections | good default on CPU |
+| `pf.WithinDemeaner(...)` | Krylov solver with Schwarz preconditioning via `within` | hard FE problems and reuse workflows |
+| `pf.ScipyDemeaner(...)` | SciPy sparse iterative solver | CPU sparse linear algebra workflows |
+| `pf.CupyDemeaner(...)` / `pf.JaxDemeaner(...)` | GPU-based demeaning | large GPU workloads |
+
+The `within` backend is the only one that currently exposes reusable `solver` and `preconditioner` objects.
+
+## Solver vs Preconditioner
+
+The two reusable objects serve different purposes:
+
+| Object | Stores | Reuse rule | Main benefit | Main risk |
+|---|---|---|---|---|
+| `solver` | the full demeaning problem | reuse only on the same sample, with the same weights, fixed-effects encoding, and solver settings | skips the expensive setup entirely | exact-match only |
+| `preconditioner` | only the acceleration object | can be reused more loosely and may remain useful even when slightly stale | avoids rebuilding the preconditioner from scratch | stale reuse may need more iterations |
+
+In other words, a `solver` is tied to one specific demeaning problem. A `preconditioner` is only an accelerator for the current problem. That is why a stale preconditioner can still be useful on non-identical, but related problems, while a stale solver should not be reused.
+
+The expensive part of the `within` backend is the initial setup. Before the iterative solve even starts, `within` has to build a high-quality preconditioner for the fixed-effects problem. That setup cost is exactly what solver and preconditioner reuse are designed to avoid.
+
+## Example Data
+
+```{python}
+panel = pf.get_worker_panel(N_workers=300, N_firms=40, N_years=8, seed=42)
+
+rng = np.random.default_rng(123)
+panel["log_bonus"] = (
+    0.4 * panel["worker_fe"]
+    - 0.2 * panel["firm_fe"]
+    + 0.01 * panel["experience"]
+    + 0.02 * panel["tenure"]
+    + rng.normal(0, 0.2, size=len(panel))
+)
+
+fml_y1 = "log_wage ~ experience + tenure | worker_id + firm_id"
+fml_y2 = "log_bonus ~ experience + tenure | worker_id + firm_id"
+panel.head()
+```
+
+## Pass Demeaners as Dataclasses
+
+Instead of string shorthands, you can pass a configured demeaner directly.
+
+```{python}
+fit_numba = pf.feols(
+    fml_y1,
+    data=panel,
+    demeaner=pf.NumbaDemeaner(fixef_maxiter=10_000),
+)
+
+fit_within = pf.feols(
+    fml_y1,
+    data=panel,
+    demeaner=pf.WithinDemeaner(
+        krylov_method="gmres",
+        gmres_restart=20,
+        preconditioner_type="multiplicative",
+    ),
+)
+
+pd.concat(
+    {
+        "numba": fit_numba.coef(),
+        "within": fit_within.coef(),
+    },
+    axis=1,
+)
+```
+
+## Reuse a Solver Within the Same Session
+
+For in-process reuse, prefer a `solver`. This is the fast path: the expensive setup is done once, and later solves borrow the stored solver directly.
+
+Why is the first run expensive? Because `within` has to build the preconditioner for the current worker-firm problem. Once that has been done, repeated solves on the exact same problem are much cheaper.
+
+You can reuse a solver exposed by a fitted model:
+
+```{python}
+fit1 = pf.feols(
+    fml_y1,
+    data=panel,
+    demeaner=pf.WithinDemeaner(),
+)
+
+fit2 = pf.feols(
+    fml_y2,
+    data=panel,
+    demeaner=pf.WithinDemeaner(solver=fit1.solver_),
+)
+
+fit2.coef()
+```
+
+You can also build a reusable solver explicitly:
+
+```{python}
+solver = pf.get_solver(
+    fml_y1,
+    data=panel,
+    demeaner=pf.WithinDemeaner(preconditioner_type="additive"),
+)
+
+fit3 = pf.feols(
+    fml_y2,
+    data=panel,
+    demeaner=pf.WithinDemeaner(solver=solver),
+)
+
+fit3.coef()
+```
+
+Use a solver when the sample, weights, and fixed-effects encoding are the same. Solver reuse is intentionally strict.
+
+More concretely, solver reuse is appropriate only when:
+
+- the same rows are used
+- the same observation weights are used
+- the same fixed-effects structure and encoding are used
+- the same solver configuration is used
+
+This strictness is what makes the solver path fast: `PyFixest` can borrow the stored solver directly instead of rebuilding anything. No fresh solver object needs to be constructed, and no new preconditioner needs to be built.
+
+## Reuse a Preconditioner Across Regressions
+
+If you want a more portable object, extract a `preconditioner` from a solver and pass that into a later regression:
+
+```{python}
+preconditioner_from_fit = fit1.solver_.to_preconditioner()
+
+fit4 = pf.feols(
+    fml_y2,
+    data=panel,
+    demeaner=pf.WithinDemeaner(preconditioner=preconditioner_from_fit),
+)
+
+fit4.coef()
+```
+
+Compared with a solver, a preconditioner is a lower-level object. It only affects convergence speed, not the target problem itself.
+
+This is the key tradeoff:
+
+- a solver must match the current problem exactly
+- a preconditioner can be stale and still be useful
+
+In the preconditioner case, `PyFixest` builds a fresh solver for the current regression and uses the stored preconditioner only to accelerate convergence. So preconditioner reuse still has some setup cost, but it avoids the most expensive part: building a new preconditioner from scratch.
+
+If the sample or weights changed a bit, the preconditioner may still help, but it will usually be less effective and the Krylov solver may need more iterations.
+
+So if you expect to rerun the exact same demeaning problem, use a `solver`. If you expect slightly different samples or want something portable across sessions, use a `preconditioner`.
+
+## Build a Preconditioner Explicitly
+
+You can also construct a reusable preconditioner directly:
+
+```{python}
+preconditioner = pf.get_preconditioner(
+    fml_y1,
+    data=panel,
+    demeaner=pf.WithinDemeaner(preconditioner_type="additive"),
+)
+
+fit5 = pf.feols(
+    fml_y2,
+    data=panel,
+    demeaner=pf.WithinDemeaner(preconditioner=preconditioner),
+)
+
+fit5.coef()
+```
+
+This is the preferred API when you want a reusable object for persistence or for advanced reuse workflows.
+
+## Persist a Preconditioner Across Sessions
+
+Preconditioners are pickleable, so you can save them and reload them in a later session:
+
+```{python}
+with tempfile.TemporaryDirectory() as tmpdir:
+    path = Path(tmpdir) / "within_preconditioner.pkl"
+
+    with path.open("wb") as file:
+        pickle.dump(preconditioner, file)
+
+    with path.open("rb") as file:
+        restored_preconditioner = pickle.load(file)
+
+    print(path)
+
+fit6 = pf.feols(
+    fml_y2,
+    data=panel,
+    demeaner=pf.WithinDemeaner(preconditioner=restored_preconditioner),
+)
+
+fit6.coef()
+```
+
+In practice, replace the temporary file with a durable path.
+
+## Which Reuse Object Should I Use?
+
+- Use `solver` for repeated estimation in the same Python session.
+- Use `preconditioner` when you want a portable, pickleable object.
+- Use `solver` only for exact-match reuse.
+- Use `preconditioner` when reuse may be slightly stale, for example after small sample changes.
+- Use `fit.solver_.to_preconditioner()` when you want to turn a fitted model's solver into a persistence artifact.
+- Use `pf.get_preconditioner()` when you want to build the persistence artifact explicitly.
+
+Reusable solvers are not supported for `feglm()` and `fepois()`, because those estimators update observation weights across iterations. Reusable preconditioners can still be used there.
diff --git a/docs/how-to/index.qmd b/docs/how-to/index.qmd
index b6b43d0e2..a53b49724 100644
--- a/docs/how-to/index.qmd
+++ b/docs/how-to/index.qmd
@@ -6,6 +6,7 @@ aliases:
 listing:
   type: grid
   contents:
+    - fixed-effects-solvers.qmd
     - marginaleffects.qmd
     - regression_decomposition.qmd
     - pyfixest_gpu.qmd

From aa5e0b3736d24f8b2bbcea0cee73af362755a015 Mon Sep 17 00:00:00 2001
From: Alexander Fischer <alexander-fischer1801@t-online.de>
Date: Sat, 28 Mar 2026 19:55:10 +0100
Subject: [PATCH 02/10] preliminaries, no need to review this, please start
 with next commit

---
 pyfixest/__init__.py                          |  12 +-
 pyfixest/demeaners.py                         | 166 ++++++++++++++++++
 .../estimation/internals/demeaner_options.py  | 123 +++++++++++++
 pyfixest/estimation/internals/literals.py     |  10 +-
 4 files changed, 309 insertions(+), 2 deletions(-)
 create mode 100644 pyfixest/demeaners.py
 create mode 100644 pyfixest/estimation/internals/demeaner_options.py

diff --git a/pyfixest/__init__.py b/pyfixest/__init__.py
index b1cc65a8b..e4e5e42bd 100644
--- a/pyfixest/__init__.py
+++ b/pyfixest/__init__.py
@@ -8,9 +8,14 @@
     __version__ = "unknown"
 
 __all__ = [
+    "BaseDemeaner",
+    "LsmrDemeaner",
+    "MapDemeaner",
     "SaturatedEventStudy",
+    "WithinDemeaner",
     "bonferroni",
     "coefplot",
+    "demeaners",
     "did",
     "did2s",
     "dtable",
@@ -40,7 +45,7 @@
 ]
 
 # Submodules loaded lazily
-_submodules = ["did", "errors", "estimation", "report", "utils"]
+_submodules = ["demeaners", "did", "errors", "estimation", "report", "utils"]
 
 # Map function/class names to their module prefix
 # For direct module imports: import_module(f"{prefix}.{name}")
@@ -50,6 +55,11 @@
     "fepois": "pyfixest.estimation.api",
     "feglm": "pyfixest.estimation.api",
     "quantreg": "pyfixest.estimation.api",
+    # demeaners
+    "BaseDemeaner": "pyfixest.demeaners",
+    "LsmrDemeaner": "pyfixest.demeaners",
+    "MapDemeaner": "pyfixest.demeaners",
+    "WithinDemeaner": "pyfixest.demeaners",
     # estimation - other functions (still use parent module + getattr)
     "bonferroni": "pyfixest.estimation",
     "rwolf": "pyfixest.estimation",
diff --git a/pyfixest/demeaners.py b/pyfixest/demeaners.py
new file mode 100644
index 000000000..ddba25cbb
--- /dev/null
+++ b/pyfixest/demeaners.py
@@ -0,0 +1,166 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, replace
+from numbers import Real
+from typing import ClassVar, Literal, Self
+
+
+def _validate_positive_float(value: float | None, name: str) -> float | None:
+    if value is None:
+        return None
+    if not isinstance(value, Real):
+        raise TypeError(f"`{name}` must be a real number or None.")
+    value = float(value)
+    if value <= 0:
+        raise ValueError(f"`{name}` must be strictly positive.")
+    return value
+
+
+def _validate_positive_int(value: int | None, name: str) -> int | None:
+    if value is None:
+        return None
+    if not isinstance(value, int):
+        raise TypeError(f"`{name}` must be an int or None.")
+    if value <= 0:
+        raise ValueError(f"`{name}` must be strictly positive.")
+    return value
+
+
+@dataclass(frozen=True, slots=True)
+class BaseDemeaner:
+    """Base configuration shared by all fixed-effects demeaners."""
+
+    fixef_tol: float | None = None
+    fixef_maxiter: int | None = None
+    kind: ClassVar[str]
+
+    def __post_init__(self) -> None:
+        object.__setattr__(
+            self,
+            "fixef_tol",
+            _validate_positive_float(self.fixef_tol, "fixef_tol"),
+        )
+        object.__setattr__(
+            self,
+            "fixef_maxiter",
+            _validate_positive_int(self.fixef_maxiter, "fixef_maxiter"),
+        )
+
+    def resolved(self, fixef_tol: float, fixef_maxiter: int) -> Self:
+        """Fill in unspecified fixed-effects controls from top-level defaults."""
+        return replace(
+            self,
+            fixef_tol=fixef_tol if self.fixef_tol is None else self.fixef_tol,
+            fixef_maxiter=(
+                fixef_maxiter if self.fixef_maxiter is None else self.fixef_maxiter
+            ),
+        )
+
+
+@dataclass(frozen=True, slots=True)
+class MapDemeaner(BaseDemeaner):
+    """Alternating-projections demeaner with selectable implementation backend."""
+
+    backend: Literal["numba", "rust", "jax"] = "numba"
+    kind: ClassVar[str] = "map"
+
+    def __post_init__(self) -> None:
+        BaseDemeaner.__post_init__(self)
+        if not isinstance(self.backend, str):
+            raise TypeError("`backend` must be a string.")
+        backend = self.backend.lower()
+        if backend not in {"numba", "rust", "jax"}:
+            raise ValueError("`backend` must be one of 'numba', 'rust', or 'jax'.")
+        object.__setattr__(self, "backend", backend)
+
+
+@dataclass(frozen=True, slots=True)
+class WithinDemeaner(BaseDemeaner):
+    """Krylov-based demeaner configuration for the Rust `within` backend."""
+
+    krylov_method: str = "cg"
+    gmres_restart: int = 30
+    preconditioner_type: str = "additive"
+    kind: ClassVar[str] = "within"
+
+    def __post_init__(self) -> None:
+        BaseDemeaner.__post_init__(self)
+        if not isinstance(self.krylov_method, str):
+            raise TypeError("`krylov_method` must be a string.")
+        krylov_method = self.krylov_method.lower()
+        if krylov_method not in {"cg", "gmres"}:
+            raise ValueError("`krylov_method` must be either 'cg' or 'gmres'.")
+        object.__setattr__(self, "krylov_method", krylov_method)
+
+        gmres_restart = _validate_positive_int(self.gmres_restart, "gmres_restart")
+        assert gmres_restart is not None
+        object.__setattr__(self, "gmres_restart", gmres_restart)
+
+        if not isinstance(self.preconditioner_type, str):
+            raise TypeError("`preconditioner_type` must be a string.")
+        preconditioner_type = self.preconditioner_type.lower()
+        if preconditioner_type not in {"additive", "multiplicative"}:
+            raise ValueError(
+                "`preconditioner_type` must be either 'additive' or 'multiplicative'."
+            )
+        if preconditioner_type == "multiplicative" and krylov_method != "gmres":
+            raise ValueError("Multiplicative Schwarz requires `krylov_method='gmres'`.")
+        object.__setattr__(self, "preconditioner_type", preconditioner_type)
+
+
+@dataclass(frozen=True, slots=True)
+class LsmrDemeaner(BaseDemeaner):
+    """Sparse LSMR demeaner for CPU and GPU backends."""
+
+    precision: str = "float64"
+    use_gpu: bool | None = None
+    solver_atol: float | None = None
+    solver_btol: float | None = None
+    solver_maxiter: int | None = None
+    warn_on_cpu_fallback: bool = True
+    use_preconditioner: bool = True
+    kind: ClassVar[str] = "lsmr"
+
+    def __post_init__(self) -> None:
+        BaseDemeaner.__post_init__(self)
+        if not isinstance(self.precision, str):
+            raise TypeError("`precision` must be a string.")
+        precision = self.precision.lower()
+        if precision not in {"float32", "float64"}:
+            raise ValueError("`precision` must be either 'float32' or 'float64'.")
+        object.__setattr__(self, "precision", precision)
+
+        if self.use_gpu is not None and not isinstance(self.use_gpu, bool):
+            raise TypeError("`use_gpu` must be a bool or None.")
+
+        object.__setattr__(
+            self,
+            "solver_atol",
+            _validate_positive_float(self.solver_atol, "solver_atol"),
+        )
+        object.__setattr__(
+            self,
+            "solver_btol",
+            _validate_positive_float(self.solver_btol, "solver_btol"),
+        )
+        object.__setattr__(
+            self,
+            "solver_maxiter",
+            _validate_positive_int(self.solver_maxiter, "solver_maxiter"),
+        )
+
+        if not isinstance(self.warn_on_cpu_fallback, bool):
+            raise TypeError("`warn_on_cpu_fallback` must be a bool.")
+        if not isinstance(self.use_preconditioner, bool):
+            raise TypeError("`use_preconditioner` must be a bool.")
+
+
+AnyDemeaner = MapDemeaner | WithinDemeaner | LsmrDemeaner
+
+__all__ = [
+    "AnyDemeaner",
+    "BaseDemeaner",
+    "LsmrDemeaner",
+    "MapDemeaner",
+    "WithinDemeaner",
+]
diff --git a/pyfixest/estimation/internals/demeaner_options.py b/pyfixest/estimation/internals/demeaner_options.py
new file mode 100644
index 000000000..434cf01a9
--- /dev/null
+++ b/pyfixest/estimation/internals/demeaner_options.py
@@ -0,0 +1,123 @@
+from __future__ import annotations
+
+from typing import Literal, TypeAlias, cast
+
+from pyfixest.demeaners import (
+    AnyDemeaner,
+    BaseDemeaner,
+    LsmrDemeaner,
+    MapDemeaner,
+    WithinDemeaner,
+)
+from pyfixest.estimation.internals.literals import DemeanerBackendOptions
+
+ResolvedDemeaner: TypeAlias = AnyDemeaner
+
+_DEMEANER_TYPES = (
+    MapDemeaner,
+    WithinDemeaner,
+    LsmrDemeaner,
+)
+
+
+def normalize_demeaner_backend(
+    demeaner_backend: DemeanerBackendOptions,
+) -> DemeanerBackendOptions:
+    """Map legacy backend aliases to their canonical demeaner backend."""
+    if demeaner_backend == "rust-cg":
+        return "within"
+    if demeaner_backend == "cupy64":
+        return "cupy"
+    return demeaner_backend
+
+
+def resolve_demeaner(
+    demeaner: BaseDemeaner | None,
+    demeaner_backend: DemeanerBackendOptions,
+    fixef_tol: float,
+    fixef_maxiter: int,
+) -> ResolvedDemeaner:
+    """Resolve user input into a fully specified demeaner object."""
+    backend = normalize_demeaner_backend(demeaner_backend)
+
+    if demeaner is not None:
+        if not isinstance(demeaner, _DEMEANER_TYPES):
+            raise TypeError("`demeaner` must be a supported pyfixest demeaner.")
+        if backend != "numba":
+            raise ValueError(
+                "When `demeaner` is provided, `demeaner_backend` must be left at "
+                "its default value `'numba'`."
+            )
+        return cast(
+            ResolvedDemeaner,
+            demeaner.resolved(fixef_tol=fixef_tol, fixef_maxiter=fixef_maxiter),
+        )
+
+    shorthand = _from_backend_shorthand(
+        demeaner_backend=backend,
+        fixef_tol=fixef_tol,
+        fixef_maxiter=fixef_maxiter,
+    )
+    return cast(
+        ResolvedDemeaner,
+        shorthand.resolved(fixef_tol=fixef_tol, fixef_maxiter=fixef_maxiter),
+    )
+
+
+def get_demeaner_backend(demeaner: ResolvedDemeaner) -> DemeanerBackendOptions:
+    """Return the backend key associated with a resolved demeaner."""
+    if isinstance(demeaner, MapDemeaner):
+        return cast(
+            Literal["numba", "rust", "jax"],
+            demeaner.backend,
+        )
+    if isinstance(demeaner, LsmrDemeaner):
+        if demeaner.use_gpu is False:
+            return "scipy"
+        return "cupy32" if demeaner.precision == "float32" else "cupy"
+    return "within"
+
+
+def _from_backend_shorthand(
+    demeaner_backend: DemeanerBackendOptions,
+    fixef_tol: float,
+    fixef_maxiter: int,
+) -> ResolvedDemeaner:
+    """Construct a demeaner object from a legacy backend shorthand."""
+    if demeaner_backend in {"numba", "rust", "jax"}:
+        return MapDemeaner(
+            backend=cast(Literal["numba", "rust", "jax"], demeaner_backend),
+            fixef_tol=fixef_tol,
+            fixef_maxiter=fixef_maxiter,
+        )
+    if demeaner_backend == "within":
+        return WithinDemeaner(fixef_tol=fixef_tol, fixef_maxiter=fixef_maxiter)
+    if demeaner_backend == "cupy":
+        return LsmrDemeaner(
+            fixef_tol=fixef_tol,
+            fixef_maxiter=fixef_maxiter,
+            precision="float64",
+            use_gpu=True,
+        )
+    if demeaner_backend == "cupy32":
+        return LsmrDemeaner(
+            fixef_tol=fixef_tol,
+            fixef_maxiter=fixef_maxiter,
+            precision="float32",
+            use_gpu=True,
+        )
+    if demeaner_backend == "scipy":
+        return LsmrDemeaner(
+            fixef_tol=fixef_tol,
+            fixef_maxiter=fixef_maxiter,
+            use_gpu=False,
+        )
+    raise ValueError(f"Unknown demeaner backend {demeaner_backend!r}.")
+
+
+__all__ = [
+    "ResolvedDemeaner",
+    "get_demeaner_backend",
+    "normalize_demeaner_backend",
+    "resolve_demeaner",
+]
diff --git a/pyfixest/estimation/internals/literals.py b/pyfixest/estimation/internals/literals.py
index ab8e7e067..5d27a209e 100644
--- a/pyfixest/estimation/internals/literals.py
+++ b/pyfixest/estimation/internals/literals.py
@@ -12,7 +12,15 @@
     "jax",
 ]
 DemeanerBackendOptions = Literal[
-    "numba", "jax", "rust", "rust-cg", "cupy", "cupy32", "cupy64", "scipy"
+    "numba",
+    "jax",
+    "rust",
+    "rust-cg",
+    "within",
+    "cupy",
+    "cupy32",
+    "cupy64",
+    "scipy",
 ]
 PredictionErrorOptions = Literal["prediction"]
 QuantregMethodOptions = Literal["fn", "pfn"]

From 2e46243c902b70c9ef6aad99a5ed10d1576f3775 Mon Sep 17 00:00:00 2001
From: Alexander Fischer <alexander-fischer1801@t-online.de>
Date: Sat, 28 Mar 2026 20:29:24 +0100
Subject: [PATCH 03/10] wire in new demeaner API

---
 docs/how-to/fixed-effects-solvers.qmd         |  3 +
 docs/how-to/panel_variance_reduction.qmd      |  3 +
 docs/how-to/pyfixest_gpu.qmd                  | 10 ++-
 docs/textbook-replications/brave_true.qmd     |  1 -
 pyfixest/demeaners.py                         | 64 ++++++++-----------
 pyfixest/estimation/FixestMulti_.py           | 12 ++--
 pyfixest/estimation/api/feglm.py              | 30 +++++++--
 pyfixest/estimation/api/feols.py              | 30 +++++++--
 pyfixest/estimation/api/fepois.py             | 30 +++++++--
 .../estimation/internals/demeaner_options.py  | 22 +++----
 pyfixest/estimation/models/fegaussian_.py     |  6 +-
 pyfixest/estimation/models/feglm_.py          | 14 ++--
 pyfixest/estimation/models/feiv_.py           | 12 ++--
 pyfixest/estimation/models/felogit_.py        |  6 +-
 pyfixest/estimation/models/feols_.py          | 24 +++++--
 .../estimation/models/feols_compressed_.py    | 10 ++-
 pyfixest/estimation/models/fepois_.py         | 11 ++--
 pyfixest/estimation/models/feprobit_.py       |  6 +-
 pyfixest/estimation/quantreg/quantreg_.py     |  9 ++-
 19 files changed, 189 insertions(+), 114 deletions(-)

diff --git a/docs/how-to/fixed-effects-solvers.qmd b/docs/how-to/fixed-effects-solvers.qmd
index cec4a55a8..cad7e2a77 100644
--- a/docs/how-to/fixed-effects-solvers.qmd
+++ b/docs/how-to/fixed-effects-solvers.qmd
@@ -67,6 +67,9 @@ panel.head()
 ## Pass Demeaners as Dataclasses
 
 Instead of string shorthands, you can pass a configured demeaner directly.
+When a typed `demeaner=` is provided, it is authoritative: its backend,
+`fixef_tol`, and `fixef_maxiter` take precedence over the top-level
+`demeaner_backend`, `fixef_tol`, and `fixef_maxiter` arguments.
 
 ```{python}
 fit_numba = pf.feols(
diff --git a/docs/how-to/panel_variance_reduction.qmd b/docs/how-to/panel_variance_reduction.qmd
index 247623ee2..18c69b100 100644
--- a/docs/how-to/panel_variance_reduction.qmd
+++ b/docs/how-to/panel_variance_reduction.qmd
@@ -229,6 +229,9 @@ fit_panel = pf.feols(
 )
 ```
 
+This uses the legacy backend shorthand. The typed equivalent is
+`demeaner=pf.MapDemeaner(backend="rust")`.
+
 We can compare all results:
 
 ```{python}
diff --git a/docs/how-to/pyfixest_gpu.qmd b/docs/how-to/pyfixest_gpu.qmd
index efb097573..f20df4b76 100644
--- a/docs/how-to/pyfixest_gpu.qmd
+++ b/docs/how-to/pyfixest_gpu.qmd
@@ -6,8 +6,14 @@ jupyter: python3
 ---
 
 
-`PyFixest` allows to run the fixed effects demeaning on the GPU via the `demeaner_backend` argument.
-To do so, you will have to install `jax` and `jaxblib`, for example by typing `pip install pyfixest[jax]`.
+`PyFixest` allows fixed-effects demeaning on the GPU either via the legacy
+`demeaner_backend` shorthand or via a typed `demeaner=` configuration. For
+example, `demeaner_backend="jax"` and `demeaner=pf.MapDemeaner(backend="jax")`
+select the same JAX backend. If a typed `demeaner` is provided, it takes
+precedence over `demeaner_backend`, `fixef_tol`, and `fixef_maxiter`.
+
+To do so, you will have to install `jax` and `jaxlib`, for example by typing
+`pip install pyfixest[jax]`.
 
 We test two back-ends for the iterative alternating-projections component of the fixed-effects regression on an Nvidia A100 GPU with 40 GB VRAM (a GPU that one typically wouldn't have installed to play graphics-intensive videogames on consumer hardware). `numba` benchmarks are run on a 12-core xeon CPU.
 
diff --git a/docs/textbook-replications/brave_true.qmd b/docs/textbook-replications/brave_true.qmd
index 72321e43d..4f8c01081 100644
--- a/docs/textbook-replications/brave_true.qmd
+++ b/docs/textbook-replications/brave_true.qmd
@@ -33,7 +33,6 @@ panel_fit = pf.feols(
     fml="lwage ~ married + expersq + union + hours | nr + year",
     data=data_df,
     vcov={"CRV1": "nr + year"},
-    demeaner_backend="rust",
 )
 ```
 
diff --git a/pyfixest/demeaners.py b/pyfixest/demeaners.py
index ddba25cbb..e793e5277 100644
--- a/pyfixest/demeaners.py
+++ b/pyfixest/demeaners.py
@@ -1,26 +1,22 @@
 from __future__ import annotations
 
-from dataclasses import dataclass, replace
+from dataclasses import dataclass
 from numbers import Real
-from typing import ClassVar, Literal, Self
+from typing import ClassVar, Literal
 
 
-def _validate_positive_float(value: float | None, name: str) -> float | None:
-    if value is None:
-        return None
+def _validate_positive_float(value: float, name: str) -> float:
     if not isinstance(value, Real):
-        raise TypeError(f"`{name}` must be a real number or None.")
+        raise TypeError(f"`{name}` must be a real number.")
     value = float(value)
     if value <= 0:
         raise ValueError(f"`{name}` must be strictly positive.")
     return value
 
 
-def _validate_positive_int(value: int | None, name: str) -> int | None:
-    if value is None:
-        return None
+def _validate_positive_int(value: int, name: str) -> int:
     if not isinstance(value, int):
-        raise TypeError(f"`{name}` must be an int or None.")
+        raise TypeError(f"`{name}` must be an int.")
     if value <= 0:
         raise ValueError(f"`{name}` must be strictly positive.")
     return value
@@ -30,8 +26,8 @@ def _validate_positive_int(value: int | None, name: str) -> int | None:
 class BaseDemeaner:
     """Base configuration shared by all fixed-effects demeaners."""
 
-    fixef_tol: float | None = None
-    fixef_maxiter: int | None = None
+    fixef_tol: float = 1e-06
+    fixef_maxiter: int = 10_000
     kind: ClassVar[str]
 
     def __post_init__(self) -> None:
@@ -46,16 +42,6 @@ def __post_init__(self) -> None:
             _validate_positive_int(self.fixef_maxiter, "fixef_maxiter"),
         )
 
-    def resolved(self, fixef_tol: float, fixef_maxiter: int) -> Self:
-        """Fill in unspecified fixed-effects controls from top-level defaults."""
-        return replace(
-            self,
-            fixef_tol=fixef_tol if self.fixef_tol is None else self.fixef_tol,
-            fixef_maxiter=(
-                fixef_maxiter if self.fixef_maxiter is None else self.fixef_maxiter
-            ),
-        )
-
 
 @dataclass(frozen=True, slots=True)
 class MapDemeaner(BaseDemeaner):
@@ -93,7 +79,6 @@ def __post_init__(self) -> None:
         object.__setattr__(self, "krylov_method", krylov_method)
 
         gmres_restart = _validate_positive_int(self.gmres_restart, "gmres_restart")
-        assert gmres_restart is not None
         object.__setattr__(self, "gmres_restart", gmres_restart)
 
         if not isinstance(self.preconditioner_type, str):
@@ -133,21 +118,24 @@ def __post_init__(self) -> None:
         if self.use_gpu is not None and not isinstance(self.use_gpu, bool):
             raise TypeError("`use_gpu` must be a bool or None.")
 
-        object.__setattr__(
-            self,
-            "solver_atol",
-            _validate_positive_float(self.solver_atol, "solver_atol"),
-        )
-        object.__setattr__(
-            self,
-            "solver_btol",
-            _validate_positive_float(self.solver_btol, "solver_btol"),
-        )
-        object.__setattr__(
-            self,
-            "solver_maxiter",
-            _validate_positive_int(self.solver_maxiter, "solver_maxiter"),
-        )
+        if self.solver_atol is not None:
+            object.__setattr__(
+                self,
+                "solver_atol",
+                _validate_positive_float(self.solver_atol, "solver_atol"),
+            )
+        if self.solver_btol is not None:
+            object.__setattr__(
+                self,
+                "solver_btol",
+                _validate_positive_float(self.solver_btol, "solver_btol"),
+            )
+        if self.solver_maxiter is not None:
+            object.__setattr__(
+                self,
+                "solver_maxiter",
+                _validate_positive_int(self.solver_maxiter, "solver_maxiter"),
+            )
 
         if not isinstance(self.warn_on_cpu_fallback, bool):
             raise TypeError("`warn_on_cpu_fallback` must be a bool.")
diff --git a/pyfixest/estimation/FixestMulti_.py b/pyfixest/estimation/FixestMulti_.py
index 1522ceea8..371868034 100644
--- a/pyfixest/estimation/FixestMulti_.py
+++ b/pyfixest/estimation/FixestMulti_.py
@@ -7,8 +7,8 @@
 
 from pyfixest.estimation.api.utils import _ALL_SAMPLE, _AllSampleSentinel
 from pyfixest.estimation.formula.parse import Formula
+from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
 from pyfixest.estimation.internals.literals import (
-    DemeanerBackendOptions,
     QuantregMethodOptions,
     QuantregMultiOptions,
     SolverOptions,
@@ -252,7 +252,7 @@ def _estimate_all_models(
         vcov: str | dict[str, str] | None,
         solver: SolverOptions,
         vcov_kwargs: dict[str, Any] | None = None,
-        demeaner_backend: DemeanerBackendOptions = "numba",
+        demeaner: ResolvedDemeaner | None = None,
         collin_tol: float = 1e-6,
         iwls_maxiter: int = 25,
         iwls_tol: float = 1e-08,
@@ -274,9 +274,9 @@ def _estimate_all_models(
              Additional keyword arguments for the variance-covariance matrix. Defaults to None.
         solver: SolverOptions
             Solver to use for the estimation.
-        demeaner_backend: DemeanerBackendOptions, optional
-            The backend to use for demeaning. Can be either "numba" or "jax".
-            Defaults to "numba".
+        demeaner: Optional[ResolvedDemeaner]
+            The demeaning strategy to use for the estimation. Not relevant for the "compression" estimator
+            and quantile regression.
         collin_tol : float, optional
             The tolerance level for the multicollinearity check. Default is 1e-6.
         iwls_maxiter : int, optional
@@ -362,7 +362,7 @@ def _estimate_all_models(
                     }:
                         model_kwargs.update(
                             {
-                                "demeaner_backend": demeaner_backend,
+                                "demeaner": demeaner,
                             }
                         )
 
diff --git a/pyfixest/estimation/api/feglm.py b/pyfixest/estimation/api/feglm.py
index 25ce271ac..78d6edeb8 100644
--- a/pyfixest/estimation/api/feglm.py
+++ b/pyfixest/estimation/api/feglm.py
@@ -1,8 +1,10 @@
 from collections.abc import Mapping
 from typing import Any
 
+from pyfixest.demeaners import BaseDemeaner
 from pyfixest.estimation.api.utils import _estimation_input_checks
 from pyfixest.estimation.FixestMulti_ import FixestMulti
+from pyfixest.estimation.internals.demeaner_options import resolve_demeaner
 from pyfixest.estimation.internals.literals import (
     DemeanerBackendOptions,
     FixedRmOptions,
@@ -32,6 +34,7 @@ def feglm(
     separation_check: list[str] | None = None,
     solver: SolverOptions = "scipy.linalg.solve",
     demeaner_backend: DemeanerBackendOptions = "numba",
+    demeaner: BaseDemeaner | None = None,
     drop_intercept: bool = False,
     copy_data: bool = True,
     store_data: bool = True,
@@ -147,7 +150,14 @@ def feglm(
           matrices (requires cupy & GPU, defaults to scipy/CPU and float64 if no GPU available)
         - "scipy": Direct application of the Frisch-Waugh-Lovell Theorem on sparse matrice.
           Forces to use a scipy-sparse backend even when cupy is installed and GPU is available.
-        Defaults to "numba".
+        Defaults to "numba". This argument is treated as a shorthand and is only
+        used when `demeaner` is not provided.
+
+    demeaner : BaseDemeaner | None, optional
+        Typed demeaner configuration. If provided, it takes precedence over
+        `demeaner_backend`, `fixef_tol`, and `fixef_maxiter`. Backend-specific
+        settings and fixed-effects iteration controls are taken entirely from
+        this object.
 
     drop_intercept : bool, optional
         Whether to drop the intercept from the model, by default False.
@@ -262,6 +272,14 @@ class [FixestMulti](/reference/estimation.FixestMulti_.FixestMulti.qmd) for mult
     weights_type = "aweights"
 
     context = {} if context is None else capture_context(context)
+    resolved_demeaner = resolve_demeaner(
+        demeaner=demeaner,
+        demeaner_backend=demeaner_backend,
+        fixef_tol=fixef_tol,
+        fixef_maxiter=fixef_maxiter,
+    )
+    resolved_fixef_tol = resolved_demeaner.fixef_tol
+    resolved_fixef_maxiter = resolved_demeaner.fixef_maxiter
 
     _estimation_input_checks(
         fml=fml,
@@ -275,8 +293,8 @@ class [FixestMulti](/reference/estimation.FixestMulti_.FixestMulti.qmd) for mult
         copy_data=copy_data,
         store_data=store_data,
         lean=lean,
-        fixef_tol=fixef_tol,
-        fixef_maxiter=fixef_maxiter,
+        fixef_tol=resolved_fixef_tol,
+        fixef_maxiter=resolved_fixef_maxiter,
         weights_type=weights_type,
         use_compression=False,
         reps=None,
@@ -291,8 +309,8 @@ class [FixestMulti](/reference/estimation.FixestMulti_.FixestMulti.qmd) for mult
         copy_data=copy_data,
         store_data=store_data,
         lean=lean,
-        fixef_tol=fixef_tol,
-        fixef_maxiter=fixef_maxiter,
+        fixef_tol=resolved_fixef_tol,
+        fixef_maxiter=resolved_fixef_maxiter,
         weights_type=weights_type,
         use_compression=False,
         reps=None,
@@ -326,7 +344,7 @@ class [FixestMulti](/reference/estimation.FixestMulti_.FixestMulti.qmd) for mult
         iwls_maxiter=iwls_maxiter,
         collin_tol=collin_tol,
         separation_check=separation_check,
-        demeaner_backend=demeaner_backend,
+        demeaner=resolved_demeaner,
         accelerate=accelerate,
     )
 
diff --git a/pyfixest/estimation/api/feols.py b/pyfixest/estimation/api/feols.py
index 6e04d0a55..99aeab749 100644
--- a/pyfixest/estimation/api/feols.py
+++ b/pyfixest/estimation/api/feols.py
@@ -1,8 +1,10 @@
 from collections.abc import Mapping
 from typing import Any
 
+from pyfixest.demeaners import BaseDemeaner
 from pyfixest.estimation.api.utils import _estimation_input_checks
 from pyfixest.estimation.FixestMulti_ import FixestMulti
+from pyfixest.estimation.internals.demeaner_options import resolve_demeaner
 from pyfixest.estimation.internals.literals import (
     DemeanerBackendOptions,
     FixedRmOptions,
@@ -34,6 +36,7 @@ def feols(
     weights_type: WeightsTypeOptions = "aweights",
     solver: SolverOptions = "scipy.linalg.solve",
     demeaner_backend: DemeanerBackendOptions = "numba",
+    demeaner: BaseDemeaner | None = None,
     use_compression: bool = False,
     reps: int = 100,
     context: int | Mapping[str, Any] | None = None,
@@ -151,7 +154,14 @@ def feols(
           matrices (requires cupy & GPU, defaults to scipy/CPU and float64 if no GPU available)
         - "scipy": Direct application of the Frisch-Waugh-Lovell Theorem on sparse matrice.
           Forces to use a scipy-sparse backend even when cupy is installed and GPU is available.
-        Defaults to "numba".
+        Defaults to "numba". This argument is treated as a shorthand and is only
+        used when `demeaner` is not provided.
+
+    demeaner : BaseDemeaner | None, optional
+        Typed demeaner configuration. If provided, it takes precedence over
+        `demeaner_backend`, `fixef_tol`, and `fixef_maxiter`. Backend-specific
+        settings and fixed-effects iteration controls are taken entirely from
+        this object.
 
     use_compression: bool
         Whether to use sufficient statistics to losslessly fit the regression model
@@ -492,6 +502,14 @@ def _lspline(series: pd.Series, knots: list[float]) -> np.array:
     if ssc is None:
         ssc = ssc_func()
     context = {} if context is None else capture_context(context)
+    resolved_demeaner = resolve_demeaner(
+        demeaner=demeaner,
+        demeaner_backend=demeaner_backend,
+        fixef_tol=fixef_tol,
+        fixef_maxiter=fixef_maxiter,
+    )
+    resolved_fixef_tol = resolved_demeaner.fixef_tol
+    resolved_fixef_maxiter = resolved_demeaner.fixef_maxiter
 
     _estimation_input_checks(
         fml=fml,
@@ -505,8 +523,8 @@ def _lspline(series: pd.Series, knots: list[float]) -> np.array:
         copy_data=copy_data,
         store_data=store_data,
         lean=lean,
-        fixef_tol=fixef_tol,
-        fixef_maxiter=fixef_maxiter,
+        fixef_tol=resolved_fixef_tol,
+        fixef_maxiter=resolved_fixef_maxiter,
         weights_type=weights_type,
         use_compression=use_compression,
         reps=reps,
@@ -520,8 +538,8 @@ def _lspline(series: pd.Series, knots: list[float]) -> np.array:
         copy_data=copy_data,
         store_data=store_data,
         lean=lean,
-        fixef_tol=fixef_tol,
-        fixef_maxiter=fixef_maxiter,
+        fixef_tol=resolved_fixef_tol,
+        fixef_maxiter=resolved_fixef_maxiter,
         weights_type=weights_type,
         use_compression=use_compression,
         reps=reps,
@@ -550,7 +568,7 @@ def _lspline(series: pd.Series, knots: list[float]) -> np.array:
         solver=solver,
         vcov_kwargs=vcov_kwargs,
         collin_tol=collin_tol,
-        demeaner_backend=demeaner_backend,
+        demeaner=resolved_demeaner,
     )
 
     if fixest._is_multiple_estimation:
diff --git a/pyfixest/estimation/api/fepois.py b/pyfixest/estimation/api/fepois.py
index f005376d5..c9cc65d58 100644
--- a/pyfixest/estimation/api/fepois.py
+++ b/pyfixest/estimation/api/fepois.py
@@ -1,8 +1,10 @@
 from collections.abc import Mapping
 from typing import Any
 
+from pyfixest.demeaners import BaseDemeaner
 from pyfixest.estimation.api.utils import _estimation_input_checks
 from pyfixest.estimation.FixestMulti_ import FixestMulti
+from pyfixest.estimation.internals.demeaner_options import resolve_demeaner
 from pyfixest.estimation.internals.literals import (
     DemeanerBackendOptions,
     FixedRmOptions,
@@ -34,6 +36,7 @@ def fepois(
     separation_check: list[str] | None = None,
     solver: SolverOptions = "scipy.linalg.solve",
     demeaner_backend: DemeanerBackendOptions = "numba",
+    demeaner: BaseDemeaner | None = None,
     drop_intercept: bool = False,
     copy_data: bool = True,
     store_data: bool = True,
@@ -137,7 +140,14 @@ def fepois(
           matrices (requires cupy & GPU, defaults to scipy/CPU and float64 if no GPU available)
         - "scipy": Direct application of the Frisch-Waugh-Lovell Theorem on sparse matrice.
           Forces to use a scipy-sparse backend even when cupy is installed and GPU is available.
-        Defaults to "numba".
+        Defaults to "numba". This argument is treated as a shorthand and is only
+        used when `demeaner` is not provided.
+
+    demeaner : BaseDemeaner | None, optional
+        Typed demeaner configuration. If provided, it takes precedence over
+        `demeaner_backend`, `fixef_tol`, and `fixef_maxiter`. Backend-specific
+        settings and fixed-effects iteration controls are taken entirely from
+        this object.
 
     drop_intercept : bool, optional
         Whether to drop the intercept from the model, by default False.
@@ -208,6 +218,14 @@ def fepois(
     if ssc is None:
         ssc = ssc_func()
     context = {} if context is None else capture_context(context)
+    resolved_demeaner = resolve_demeaner(
+        demeaner=demeaner,
+        demeaner_backend=demeaner_backend,
+        fixef_tol=fixef_tol,
+        fixef_maxiter=fixef_maxiter,
+    )
+    resolved_fixef_tol = resolved_demeaner.fixef_tol
+    resolved_fixef_maxiter = resolved_demeaner.fixef_maxiter
 
     _estimation_input_checks(
         fml=fml,
@@ -221,8 +239,8 @@ def fepois(
         copy_data=copy_data,
         store_data=store_data,
         lean=lean,
-        fixef_tol=fixef_tol,
-        fixef_maxiter=fixef_maxiter,
+        fixef_tol=resolved_fixef_tol,
+        fixef_maxiter=resolved_fixef_maxiter,
         weights_type=weights_type,
         use_compression=False,
         reps=None,
@@ -237,8 +255,8 @@ def fepois(
         copy_data=copy_data,
         store_data=store_data,
         lean=lean,
-        fixef_tol=fixef_tol,
-        fixef_maxiter=fixef_maxiter,
+        fixef_tol=resolved_fixef_tol,
+        fixef_maxiter=resolved_fixef_maxiter,
         weights_type=weights_type,
         use_compression=False,
         reps=None,
@@ -271,7 +289,7 @@ def fepois(
         iwls_maxiter=iwls_maxiter,
         collin_tol=collin_tol,
         separation_check=separation_check,
-        demeaner_backend=demeaner_backend,
+        demeaner=resolved_demeaner,
     )
 
     if fixest._is_multiple_estimation:
diff --git a/pyfixest/estimation/internals/demeaner_options.py b/pyfixest/estimation/internals/demeaner_options.py
index 434cf01a9..d4656c231 100644
--- a/pyfixest/estimation/internals/demeaner_options.py
+++ b/pyfixest/estimation/internals/demeaner_options.py
@@ -37,31 +37,25 @@ def resolve_demeaner(
     fixef_tol: float,
     fixef_maxiter: int,
 ) -> ResolvedDemeaner:
-    """Resolve user input into a fully specified demeaner object."""
+    """Resolve user input into a fully specified demeaner object.
+
+    If a typed ``demeaner`` is supplied, it takes precedence over the legacy
+    ``demeaner_backend`` shorthand. The backend argument is only used when no
+    typed demeaner configuration is provided.
+    """
     backend = normalize_demeaner_backend(demeaner_backend)
 
     if demeaner is not None:
         if not isinstance(demeaner, _DEMEANER_TYPES):
             raise TypeError("`demeaner` must be a supported pyfixest demeaner.")
-        if backend != "numba":
-            raise ValueError(
-                "When `demeaner` is provided, `demeaner_backend` must be left at "
-                "its default value `'numba'`."
-            )
-        return cast(
-            ResolvedDemeaner,
-            demeaner.resolved(fixef_tol=fixef_tol, fixef_maxiter=fixef_maxiter),
-        )
+        return cast(ResolvedDemeaner, demeaner)
 
     shorthand = _from_backend_shorthand(
         demeaner_backend=backend,
         fixef_tol=fixef_tol,
         fixef_maxiter=fixef_maxiter,
     )
-    return cast(
-        ResolvedDemeaner,
-        shorthand.resolved(fixef_tol=fixef_tol, fixef_maxiter=fixef_maxiter),
-    )
+    return cast(ResolvedDemeaner, shorthand)
 
 
 def get_demeaner_backend(demeaner: ResolvedDemeaner) -> DemeanerBackendOptions:
diff --git a/pyfixest/estimation/models/fegaussian_.py b/pyfixest/estimation/models/fegaussian_.py
index 35b9054de..bd5ae7200 100644
--- a/pyfixest/estimation/models/fegaussian_.py
+++ b/pyfixest/estimation/models/fegaussian_.py
@@ -5,7 +5,7 @@
 import pandas as pd
 
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
-from pyfixest.estimation.internals.literals import DemeanerBackendOptions
+from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
 from pyfixest.estimation.models.feglm_ import Feglm
 
 
@@ -34,6 +34,7 @@ def __init__(
             "scipy.sparse.linalg.lsqr",
             "jax",
         ],
+        demeaner: ResolvedDemeaner | None = None,
         store_data: bool = True,
         copy_data: bool = True,
         lean: bool = False,
@@ -41,7 +42,6 @@ def __init__(
         sample_split_value: str | int | None = None,
         separation_check: list[str] | None = None,
         context: int | Mapping[str, Any] = 0,
-        demeaner_backend: DemeanerBackendOptions = "numba",
         accelerate: bool = True,
     ):
         super().__init__(
@@ -59,6 +59,7 @@ def __init__(
             tol=tol,
             maxiter=maxiter,
             solver=solver,
+            demeaner=demeaner,
             store_data=store_data,
             copy_data=copy_data,
             lean=lean,
@@ -66,7 +67,6 @@ def __init__(
             sample_split_value=sample_split_value,
             separation_check=separation_check,
             context=context,
-            demeaner_backend=demeaner_backend,
             accelerate=accelerate,
         )
 
diff --git a/pyfixest/estimation/models/feglm_.py b/pyfixest/estimation/models/feglm_.py
index 49d52c102..4e567457b 100644
--- a/pyfixest/estimation/models/feglm_.py
+++ b/pyfixest/estimation/models/feglm_.py
@@ -10,7 +10,10 @@
 )
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
 from pyfixest.estimation.internals.backends import BACKENDS
-from pyfixest.estimation.internals.literals import DemeanerBackendOptions
+from pyfixest.estimation.internals.demeaner_options import (
+    ResolvedDemeaner,
+    get_demeaner_backend,
+)
 from pyfixest.estimation.internals.solvers import solve_ols
 from pyfixest.estimation.models.feols_ import (
     Feols,
@@ -47,7 +50,7 @@ def __init__(
             "scipy.sparse.linalg.lsqr",
             "jax",
         ],
-        demeaner_backend: DemeanerBackendOptions = "numba",
+        demeaner: ResolvedDemeaner | None = None,
         store_data: bool = True,
         copy_data: bool = True,
         lean: bool = False,
@@ -76,6 +79,7 @@ def __init__(
             sample_split_var=sample_split_var,
             sample_split_value=sample_split_value,
             context=context,
+            demeaner=demeaner,
         )
 
         _glm_input_checks(
@@ -90,11 +94,11 @@ def __init__(
         self.separation_check = separation_check
         self._accelerate = accelerate
 
-        self._demeaner_backend = demeaner_backend
+        backend_key = get_demeaner_backend(self._demeaner)
         try:
-            impl = BACKENDS[demeaner_backend]
+            impl = BACKENDS[backend_key]
         except KeyError:
-            raise ValueError(f"Unknown demeaner backend {demeaner_backend!r}")
+            raise ValueError(f"Unknown demeaner backend {backend_key!r}")
         self._demean_func = impl["demean"]
 
         self._support_crv3_inference = True
diff --git a/pyfixest/estimation/models/feiv_.py b/pyfixest/estimation/models/feiv_.py
index 475494a38..dde7e0371 100644
--- a/pyfixest/estimation/models/feiv_.py
+++ b/pyfixest/estimation/models/feiv_.py
@@ -8,7 +8,7 @@
 
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
 from pyfixest.estimation.internals.demean_ import demean_model
-from pyfixest.estimation.internals.literals import DemeanerBackendOptions
+from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
 from pyfixest.estimation.internals.solvers import solve_ols
 from pyfixest.estimation.models.feols_ import Feols, _drop_multicollinear_variables
 
@@ -44,9 +44,9 @@ class Feiv(Feols):
     solver: Literal["np.linalg.lstsq", "np.linalg.solve", "scipy.linalg.solve",
         "scipy.sparse.linalg.lsqr", "jax"],
         default is "scipy.linalg.solve". Solver to use for the estimation.
-    demeaner_backend: DemeanerBackendOptions, optional
-        The backend to use for demeaning. Can be either "numba", "jax", or "rust".
-        Defaults to "numba".
+    demeaner : Optional[ResolvedDemeaner]
+        Resolved typed demeaner configuration. If provided, it determines the
+        backend and the fixed-effects iteration controls used by the model.
     weights_name : Optional[str]
         Name of the weights variable.
     weights_type : Optional[str]
@@ -154,7 +154,7 @@ def __init__(
             "scipy.sparse.linalg.lsqr",
             "jax",
         ] = "scipy.linalg.solve",
-        demeaner_backend: DemeanerBackendOptions = "numba",
+        demeaner: ResolvedDemeaner | None = None,
         store_data: bool = True,
         copy_data: bool = True,
         lean: bool = False,
@@ -181,7 +181,7 @@ def __init__(
             sample_split_var=sample_split_var,
             sample_split_value=sample_split_value,
             context=context,
-            demeaner_backend=demeaner_backend,
+            demeaner=demeaner,
         )
 
         self._is_iv = True
diff --git a/pyfixest/estimation/models/felogit_.py b/pyfixest/estimation/models/felogit_.py
index 5f443db44..6ef55928f 100644
--- a/pyfixest/estimation/models/felogit_.py
+++ b/pyfixest/estimation/models/felogit_.py
@@ -5,7 +5,7 @@
 import pandas as pd
 
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
-from pyfixest.estimation.internals.literals import DemeanerBackendOptions
+from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
 from pyfixest.estimation.models.feglm_ import Feglm
 
 
@@ -34,7 +34,7 @@ def __init__(
             "scipy.sparse.linalg.lsqr",
             "jax",
         ],
-        demeaner_backend: DemeanerBackendOptions = "numba",
+        demeaner: ResolvedDemeaner | None = None,
         store_data: bool = True,
         copy_data: bool = True,
         lean: bool = False,
@@ -59,7 +59,7 @@ def __init__(
             tol=tol,
             maxiter=maxiter,
             solver=solver,
-            demeaner_backend=demeaner_backend,
+            demeaner=demeaner,
             store_data=store_data,
             copy_data=copy_data,
             lean=lean,
diff --git a/pyfixest/estimation/models/feols_.py b/pyfixest/estimation/models/feols_.py
index 82532a7ed..0b399582c 100644
--- a/pyfixest/estimation/models/feols_.py
+++ b/pyfixest/estimation/models/feols_.py
@@ -11,14 +11,18 @@
 from scipy.sparse.linalg import lsqr
 from scipy.stats import chi2, f, t
 
+from pyfixest.demeaners import MapDemeaner
 from pyfixest.errors import VcovTypeNotSupportedError
 from pyfixest.estimation.api.utils import _ALL_SAMPLE, _AllSampleSentinel
 from pyfixest.estimation.formula import model_matrix as model_matrix_fixest
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
 from pyfixest.estimation.internals.backends import BACKENDS
 from pyfixest.estimation.internals.demean_ import demean_model
+from pyfixest.estimation.internals.demeaner_options import (
+    ResolvedDemeaner,
+    get_demeaner_backend,
+)
 from pyfixest.estimation.internals.literals import (
-    DemeanerBackendOptions,
     PredictionErrorOptions,
     PredictionType,
     SolverOptions,
@@ -98,6 +102,9 @@ class Feols(ResultAccessorMixin):
         The solver to use for the regression. Can be "np.linalg.lstsq",
         "np.linalg.solve", "scipy.linalg.solve", "scipy.sparse.linalg.lsqr" and "jax".
         Defaults to "scipy.linalg.solve".
+    demeaner : Optional[ResolvedDemeaner]
+        Resolved typed demeaner configuration. If provided, it determines the
+        backend and the fixed-effects iteration controls used by the model.
     context : int or Mapping[str, Any]
         A dictionary containing additional context variables to be used by
         formulaic during the creation of the model matrix. This can include
@@ -214,7 +221,6 @@ class Feols(ResultAccessorMixin):
     _solver: Literal["np.linalg.lstsq", "np.linalg.solve", "scipy.linalg.solve",
         "scipy.sparse.linalg.lsqr", "jax"],
         default is "scipy.linalg.solve". Solver to use for the estimation.
-    _demeaner_backend: DemeanerBackendOptions
     _data: pd.DataFrame
         The data frame used in the estimation. None if arguments `lean = True` or
         `store_data = False`.
@@ -256,7 +262,7 @@ def __init__(
         fixef_maxiter: int,
         lookup_demeaned_data: dict[frozenset[int], pd.DataFrame],
         solver: SolverOptions = "np.linalg.solve",
-        demeaner_backend: DemeanerBackendOptions = "numba",
+        demeaner: ResolvedDemeaner | None = None,
         store_data: bool = True,
         copy_data: bool = True,
         lean: bool = False,
@@ -296,7 +302,12 @@ def __init__(
         self._fixef_tol = fixef_tol
         self._fixef_maxiter = fixef_maxiter
         self._solver = solver
-        self._demeaner_backend = demeaner_backend
+        if demeaner is None:
+            demeaner = MapDemeaner(
+                fixef_tol=fixef_tol,
+                fixef_maxiter=fixef_maxiter,
+            )
+        self._demeaner = demeaner
         self._lookup_demeaned_data = lookup_demeaned_data
         self._store_data = store_data
         self._copy_data = copy_data
@@ -322,10 +333,11 @@ def __init__(
         # self._coefnames = None
         self._icovars = None
 
+        backend_key = get_demeaner_backend(demeaner)
         try:
-            impl = BACKENDS[demeaner_backend]
+            impl = BACKENDS[backend_key]
         except KeyError:
-            raise ValueError(f"Unknown backend {demeaner_backend!r}")
+            raise ValueError(f"Unknown backend {backend_key!r}")
 
         self._demean_func = impl["demean"]
         self._find_collinear_variables_func = impl["collinear"]
diff --git a/pyfixest/estimation/models/feols_compressed_.py b/pyfixest/estimation/models/feols_compressed_.py
index 845088ac8..b744b1c7c 100644
--- a/pyfixest/estimation/models/feols_compressed_.py
+++ b/pyfixest/estimation/models/feols_compressed_.py
@@ -9,8 +9,8 @@
 from tqdm import tqdm
 
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
+from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
 from pyfixest.estimation.internals.literals import (
-    DemeanerBackendOptions,
     SolverOptions,
 )
 from pyfixest.estimation.models.feols_ import (
@@ -64,6 +64,10 @@ class FeolsCompressed(Feols):
         The lookup table for demeaned data.
     solver : SolverOptions
         The solver to use.
+    demeaner : Optional[ResolvedDemeaner]
+        Resolved typed demeaner configuration. If provided, it determines the
+        backend and the fixed-effects iteration controls used by the model. Not
+        relevant for the "compression" estimator.
     store_data : bool
         Whether to store the data.
     copy_data : bool
@@ -97,7 +101,7 @@ def __init__(
         fixef_maxiter: int,
         lookup_demeaned_data: dict[frozenset[int], pd.DataFrame],
         solver: SolverOptions = "np.linalg.solve",
-        demeaner_backend: DemeanerBackendOptions = "numba",
+        demeaner: ResolvedDemeaner | None = None,
         store_data: bool = True,
         copy_data: bool = True,
         lean: bool = False,
@@ -120,7 +124,7 @@ def __init__(
             fixef_maxiter,
             lookup_demeaned_data,
             solver,
-            demeaner_backend,
+            demeaner,
             store_data,
             copy_data,
             lean,
diff --git a/pyfixest/estimation/models/fepois_.py b/pyfixest/estimation/models/fepois_.py
index be6f1545d..1d8244e5c 100644
--- a/pyfixest/estimation/models/fepois_.py
+++ b/pyfixest/estimation/models/fepois_.py
@@ -12,8 +12,8 @@
     NonConvergenceError,
 )
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
+from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
 from pyfixest.estimation.internals.literals import (
-    DemeanerBackendOptions,
     SolverOptions,
 )
 from pyfixest.estimation.internals.solvers import solve_ols
@@ -64,8 +64,9 @@ class Fepois(Feols):
         The solver to use for the regression. Can be "np.linalg.lstsq",
         "np.linalg.solve", "scipy.linalg.solve", "scipy.sparse.linalg.lsqr" and "jax".
         Defaults to "scipy.linalg.solve".
-    demeaner_backend: DemeanerBackendOptions.
-        The backend used for demeaning.
+    demeaner : Optional[ResolvedDemeaner]
+        Resolved typed demeaner configuration. If provided, it determines the
+        backend and the fixed-effects iteration controls used by the model.
     fixef_tol: float, default = 1e-06.
         Tolerance level for the convergence of the demeaning algorithm.
     context : int or Mapping[str, Any]
@@ -98,7 +99,7 @@ def __init__(
         tol: float,
         maxiter: int,
         solver: SolverOptions = "np.linalg.solve",
-        demeaner_backend: DemeanerBackendOptions = "numba",
+        demeaner: ResolvedDemeaner | None = None,
         context: int | Mapping[str, Any] = 0,
         store_data: bool = True,
         copy_data: bool = True,
@@ -126,7 +127,7 @@ def __init__(
             sample_split_var=sample_split_var,
             sample_split_value=sample_split_value,
             context=context,
-            demeaner_backend=demeaner_backend,
+            demeaner=demeaner,
         )
 
         # input checks
diff --git a/pyfixest/estimation/models/feprobit_.py b/pyfixest/estimation/models/feprobit_.py
index 50bb7df7a..8054fbed1 100644
--- a/pyfixest/estimation/models/feprobit_.py
+++ b/pyfixest/estimation/models/feprobit_.py
@@ -7,7 +7,7 @@
 from scipy.stats import norm
 
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
-from pyfixest.estimation.internals.literals import DemeanerBackendOptions
+from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
 from pyfixest.estimation.models.feglm_ import Feglm
 
 
@@ -36,7 +36,7 @@ def __init__(
             "scipy.sparse.linalg.lsqr",
             "jax",
         ],
-        demeaner_backend: DemeanerBackendOptions = "numba",
+        demeaner: ResolvedDemeaner | None = None,
         store_data: bool = True,
         copy_data: bool = True,
         lean: bool = False,
@@ -61,7 +61,7 @@ def __init__(
             tol=tol,
             maxiter=maxiter,
             solver=solver,
-            demeaner_backend=demeaner_backend,
+            demeaner=demeaner,
             store_data=store_data,
             copy_data=copy_data,
             lean=lean,
diff --git a/pyfixest/estimation/quantreg/quantreg_.py b/pyfixest/estimation/quantreg/quantreg_.py
index 67cb25e7d..4f6466367 100644
--- a/pyfixest/estimation/quantreg/quantreg_.py
+++ b/pyfixest/estimation/quantreg/quantreg_.py
@@ -10,6 +10,7 @@
 from scipy.stats import norm
 
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
+from pyfixest.estimation.internals.demeaner_options import resolve_demeaner
 from pyfixest.estimation.internals.literals import (
     QuantregMethodOptions,
     SolverOptions,
@@ -52,6 +53,12 @@ def __init__(
         quantile_maxiter: int | None = None,
         seed: int | None = None,
     ) -> None:
+        demeaner = resolve_demeaner(
+            demeaner=None,
+            demeaner_backend=demeaner_backend,
+            fixef_tol=fixef_tol,
+            fixef_maxiter=fixef_maxiter,
+        )
         super().__init__(
             FixestFormula=FixestFormula,
             data=data,
@@ -71,7 +78,7 @@ def __init__(
             sample_split_var=sample_split_var,
             sample_split_value=sample_split_value,
             context=context,
-            demeaner_backend=demeaner_backend,
+            demeaner=demeaner,
         )
 
         warnings.warn(

From 119ba94508e5648890945277909b95266354efe5 Mon Sep 17 00:00:00 2001
From: Alexander Fischer <alexander-fischer1801@t-online.de>
Date: Sun, 29 Mar 2026 11:20:54 +0200
Subject: [PATCH 04/10] Route typed demeaner options to backend execution

---
 pyfixest/core/_core_impl.pyi              |   3 +
 pyfixest/core/demean.py                   |  96 ++++++----------
 pyfixest/estimation/cupy/demean_cupy_.py  |  69 +++++++++++-
 pyfixest/estimation/internals/backends.py |   6 +
 pyfixest/estimation/internals/demean_.py  | 130 +++++++++++++++++++---
 pyfixest/estimation/models/feglm_.py      |  13 ++-
 pyfixest/estimation/models/feiv_.py       |   1 +
 pyfixest/estimation/models/feols_.py      |   2 +-
 pyfixest/estimation/models/fepois_.py     |  11 +-
 src/demean_within.rs                      | 104 ++++++++++++++---
 10 files changed, 327 insertions(+), 108 deletions(-)

diff --git a/pyfixest/core/_core_impl.pyi b/pyfixest/core/_core_impl.pyi
index 6d79f8bf0..6dd6a0589 100644
--- a/pyfixest/core/_core_impl.pyi
+++ b/pyfixest/core/_core_impl.pyi
@@ -20,6 +20,9 @@ def _demean_within_rs(
     weights: NDArray[np.float64],
     tol: float = 1e-06,
     maxiter: int = 1_000,
+    krylov_method: str = "cg",
+    gmres_restart: int = 30,
+    preconditioner_type: str = "additive",
 ) -> tuple[np.ndarray, bool]: ...
 def _count_fixef_fully_nested_all_rs(
     all_fixef_array: NDArray,
diff --git a/pyfixest/core/demean.py b/pyfixest/core/demean.py
index 6fe66673e..b73e5c299 100644
--- a/pyfixest/core/demean.py
+++ b/pyfixest/core/demean.py
@@ -4,6 +4,27 @@
 from ._core_impl import _demean_rs, _demean_within_rs
 
 
+def _sanitize_krylov_and_preconditioner(
+    krylov_method: str,
+    preconditioner_type: str,
+) -> tuple[str, str]:
+    krylov_method = krylov_method.lower()
+    preconditioner_type = preconditioner_type.lower()
+
+    if krylov_method not in {"cg", "gmres"}:
+        raise ValueError("`krylov_method` must be either 'cg' or 'gmres'.")
+    if preconditioner_type not in {"additive", "multiplicative"}:
+        raise ValueError(
+            "`preconditioner_type` must be either 'additive' or 'multiplicative'."
+        )
+    if preconditioner_type == "multiplicative" and krylov_method != "gmres":
+        raise ValueError("Multiplicative Schwarz requires `krylov_method='gmres'`.")
+
+    return krylov_method, preconditioner_type
+
+
+# Legacy: used by BACKENDS dict and _set_demeaner_backend.
+# Remove once all callers use the typed demeaner= API.
 def demean(
     x: NDArray[np.float64],
     flist: NDArray[np.uint64],
@@ -36,39 +57,6 @@ def demean(
     tuple[numpy.ndarray, bool]
         A tuple containing the demeaned array of shape (n_samples, n_features)
         and a boolean indicating whether the algorithm converged successfully.
-
-    Examples
-    --------
-    ```{python}
-    import numpy as np
-    import pyfixest as pf
-    from pyfixest.utils.dgps import get_blw
-    from pyfixest.estimation.internals.demean_ import demean
-    from formulaic import model_matrix
-
-    fml = "y ~ treat | state + year"
-
-    data = get_blw()
-    data.head()
-
-    Y, rhs = model_matrix(fml, data)
-    X = rhs[0].drop(columns="Intercept")
-    fe = rhs[1].drop(columns="Intercept")
-    YX = np.concatenate([Y, X], axis=1)
-
-    # to numpy
-    Y = Y.to_numpy()
-    X = X.to_numpy()
-    YX = np.concatenate([Y, X], axis=1)
-    fe = fe.to_numpy().astype(int)  # demean requires fixed effects as ints!
-
-    YX_demeaned, success = demean(YX, fe, weights = np.ones(YX.shape[0]))
-    Y_demeaned = YX_demeaned[:, 0]
-    X_demeaned = YX_demeaned[:, 1:]
-
-    print(np.linalg.lstsq(X_demeaned, Y_demeaned, rcond=None)[0])
-    print(pf.feols(fml, data).coef())
-    ```
     """
     return _demean_rs(
         x.astype(np.float64, copy=False),
@@ -79,42 +67,24 @@ def demean(
     )
 
 
+# Legacy: used by BACKENDS dict and _set_demeaner_backend.
+# Remove once all callers use the typed demeaner= API.
 def demean_within(
     x: NDArray[np.float64],
     flist: NDArray[np.uint32],
     weights: NDArray[np.float64],
     tol: float = 1e-06,
     maxiter: int = 1_000,
+    krylov_method: str = "cg",
+    gmres_restart: int = 30,
+    preconditioner_type: str = "additive",
 ) -> tuple[NDArray, bool]:
-    """
-    Demean an array using preconditioned conjugate gradient via the `within` crate.
-
-    Uses one-level Schwarz preconditioning with approximate Cholesky local
-    solvers. Converges faster than alternating projections on weakly-connected
-    or block-diagonal fixed-effect structures.
-
-    For single fixed effects, falls back to alternating projections (``_demean_rs``)
-    because the CG/Schwarz preconditioner is designed for multi-way FE problems.
-
-    Parameters
-    ----------
-    x : numpy.ndarray
-        Input array of shape (n_samples, n_features).
-    flist : numpy.ndarray
-        Array of shape (n_samples, n_factors) specifying the fixed effects
-        (integer-encoded).
-    weights : numpy.ndarray
-        Array of shape (n_samples,) specifying the weights.
-    tol : float, optional
-        Convergence tolerance. Defaults to 1e-06.
-    maxiter : int, optional
-        Maximum number of CG iterations. Defaults to 1_000.
+    """Demean an array using the configurable `within` backend."""
+    krylov_method, preconditioner_type = _sanitize_krylov_and_preconditioner(
+        krylov_method,
+        preconditioner_type,
+    )
 
-    Returns
-    -------
-    tuple[numpy.ndarray, bool]
-        Demeaned array and convergence flag.
-    """
     if flist.ndim == 1 or flist.shape[1] == 1:
         return _demean_rs(
             x.astype(np.float64, copy=False),
@@ -123,10 +93,14 @@ def demean_within(
             tol,
             maxiter,
         )
+
     return _demean_within_rs(
         x.astype(np.float64, copy=False),
         np.asfortranarray(flist, dtype=np.uint32),
         weights.astype(np.float64, copy=False).reshape(-1),
         tol,
         maxiter,
+        krylov_method,
+        gmres_restart,
+        preconditioner_type,
     )
diff --git a/pyfixest/estimation/cupy/demean_cupy_.py b/pyfixest/estimation/cupy/demean_cupy_.py
index 0ce87aff5..ba99d49bb 100644
--- a/pyfixest/estimation/cupy/demean_cupy_.py
+++ b/pyfixest/estimation/cupy/demean_cupy_.py
@@ -278,6 +278,31 @@ def demean_cupy(
     dtype : type, default=np.float64
         Data type for GPU computations (np.float32 or np.float64).
     """
+    return demean_cupy_configured(
+        x=x,
+        flist=flist,
+        weights=weights,
+        solver_atol=tol,
+        solver_btol=tol,
+        solver_maxiter=maxiter,
+        dtype=dtype,
+    )
+
+
+def demean_cupy_configured(
+    x: NDArray[np.float64],
+    flist: NDArray[np.uint64] | None = None,
+    weights: NDArray[np.float64] | None = None,
+    *,
+    use_gpu: bool | None = None,
+    solver_atol: float = 1e-8,
+    solver_btol: float = 1e-8,
+    solver_maxiter: int = 100_000,
+    warn_on_cpu_fallback: bool = True,
+    dtype: type = np.float64,
+    use_preconditioner: bool = True,
+) -> tuple[NDArray[np.float64], bool]:
+    """Demean arrays with the configurable CuPy/Scipy sparse FWL backend."""
     if weights is None:
         weights = np.ones(x.shape[0] if x.ndim > 1 else len(x))
 
@@ -288,9 +313,15 @@ def demean_cupy(
     fe_df = pd.DataFrame(flist, columns=[f"f{i + 1}" for i in range(n_fe)], copy=False)
     fe_sparse_matrix = create_fe_sparse_matrix(fe_df)
 
-    return CupyFWLDemeaner(dtype=dtype).demean(
-        x, flist, weights, tol, maxiter, fe_sparse_matrix=fe_sparse_matrix
-    )
+    return CupyFWLDemeaner(
+        use_gpu=use_gpu,
+        solver_atol=solver_atol,
+        solver_btol=solver_btol,
+        solver_maxiter=solver_maxiter,
+        warn_on_cpu_fallback=warn_on_cpu_fallback,
+        dtype=dtype,
+        use_preconditioner=use_preconditioner,
+    ).demean(x, flist, weights, fe_sparse_matrix=fe_sparse_matrix)
 
 
 def demean_cupy32(
@@ -332,6 +363,27 @@ def demean_scipy(
     maxiter: int = 100_000,
 ) -> tuple[NDArray[np.float64], bool]:
     "Scipy demeaner using float64 precision (CPU-only, no GPU)."
+    return demean_scipy_configured(
+        x=x,
+        flist=flist,
+        weights=weights,
+        solver_atol=tol,
+        solver_btol=tol,
+        solver_maxiter=maxiter,
+    )
+
+
+def demean_scipy_configured(
+    x: NDArray[np.float64],
+    flist: NDArray[np.uint64] | None = None,
+    weights: NDArray[np.float64] | None = None,
+    *,
+    solver_atol: float = 1e-8,
+    solver_btol: float = 1e-8,
+    solver_maxiter: int = 100_000,
+    use_preconditioner: bool = True,
+) -> tuple[NDArray[np.float64], bool]:
+    """Demean arrays with the CPU-only sparse FWL backend."""
     if weights is None:
         weights = np.ones(x.shape[0] if x.ndim > 1 else len(x))
 
@@ -342,7 +394,12 @@ def demean_scipy(
     fe_df = pd.DataFrame(flist, columns=[f"f{i + 1}" for i in range(n_fe)], copy=False)
     fe_sparse_matrix = create_fe_sparse_matrix(fe_df)
 
-    # Force CPU usage (use_gpu=False) and disable warnings
     return CupyFWLDemeaner(
-        use_gpu=False, warn_on_cpu_fallback=False, dtype=np.float64
-    ).demean(x, flist, weights, tol, maxiter, fe_sparse_matrix=fe_sparse_matrix)
+        use_gpu=False,
+        solver_atol=solver_atol,
+        solver_btol=solver_btol,
+        solver_maxiter=solver_maxiter,
+        warn_on_cpu_fallback=False,
+        dtype=np.float64,
+        use_preconditioner=use_preconditioner,
+    ).demean(x, flist, weights, fe_sparse_matrix=fe_sparse_matrix)
diff --git a/pyfixest/estimation/internals/backends.py b/pyfixest/estimation/internals/backends.py
index 2f6b35bde..5fa43b623 100644
--- a/pyfixest/estimation/internals/backends.py
+++ b/pyfixest/estimation/internals/backends.py
@@ -66,6 +66,12 @@
         "crv1_meat": crv1_meat_loop,
         "nonnested": count_fixef_fully_nested_all,
     },
+    "within": {
+        "demean": demean_within,
+        "collinear": find_collinear_variables,
+        "crv1_meat": crv1_meat_loop,
+        "nonnested": count_fixef_fully_nested_all,
+    },
     "jax": {
         "demean": demean_jax_fn,
         "collinear": find_collinear_variables_jax,
diff --git a/pyfixest/estimation/internals/demean_.py b/pyfixest/estimation/internals/demean_.py
index c141ce595..920682ca1 100644
--- a/pyfixest/estimation/internals/demean_.py
+++ b/pyfixest/estimation/internals/demean_.py
@@ -1,10 +1,14 @@
 from collections.abc import Callable
-from typing import Any
+from importlib import import_module
+from typing import Any, cast
 
 import numba as nb
 import numpy as np
 import pandas as pd
 
+from pyfixest.core.demean import demean_within
+from pyfixest.demeaners import LsmrDemeaner, WithinDemeaner
+from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
 from pyfixest.estimation.internals.literals import DemeanerBackendOptions
 
 
@@ -18,6 +22,7 @@ def demean_model(
     fixef_tol: float,
     fixef_maxiter: int,
     demean_func: Callable,
+    demeaner: ResolvedDemeaner | None = None,
     # demeaner_backend: Literal["numba", "jax", "rust"] = "numba",
 ) -> tuple[pd.DataFrame, pd.DataFrame]:
     """
@@ -50,9 +55,9 @@ def demean_model(
         The tolerance for the demeaning algorithm.
     fixef_maxiter: int
          The maximum number of iterations for the demeaning algorithm.
-    demeaner_backend: DemeanerBackendOptions, optional
-        The backend to use for demeaning. Can be either "numba", "jax", or "rust".
-        Defaults to "numba".
+    demeaner : ResolvedDemeaner | None, optional
+        Resolved typed demeaner configuration. If provided, backend-specific
+        runtime options are taken from this object.
 
 
     Returns
@@ -74,10 +79,15 @@ def demean_model(
     if YX_array.dtype != np.dtype("float64"):
         YX_array = YX_array.astype(np.float64)
 
-    if weights is not None and weights.ndim > 1:
-        weights = weights.flatten()
+    if weights is None:
+        weights_array = np.ones(YX_array.shape[0], dtype=np.float64)
+    elif weights.ndim > 1:
+        weights_array = weights.flatten()
+    else:
+        weights_array = weights
 
     if fe is not None:
+        YX_demeaned: pd.DataFrame
         fe_array = fe.to_numpy()
         # check if looked dict has data for na_index
         if lookup_demeaned_data.get(na_index) is not None:
@@ -105,12 +115,18 @@ def demean_model(
                 if var_diff.ndim == 1:
                     var_diff = var_diff.reshape(len(var_diff), 1)
 
-                YX_demean_new, success = demean_func(
+                YX_demean_new, success = dispatch_demean(
                     x=var_diff,
-                    flist=fe_array.astype(np.uintp),
-                    weights=weights,
-                    tol=fixef_tol,
-                    maxiter=fixef_maxiter,
+                    flist=fe_array,
+                    weights=weights_array,
+                    # TODO:fixef_tol, fixef_maxiter, demean_func are passed via
+                    # the demeaner_backend= string API and the _set_demeaner_backend function.
+                    # Remove passing fixef_tol, fixef_maxiter once all callers use the
+                    # typed demeaner= API.
+                    fixef_tol=fixef_tol,
+                    fixef_maxiter=fixef_maxiter,
+                    demean_func=demean_func,
+                    demeaner=demeaner,
                 )
                 if success is False:
                     raise ValueError(
@@ -134,17 +150,19 @@ def demean_model(
                 YX_demeaned = YX_demeaned_old[yx_names]
 
         else:
-            YX_demeaned, success = demean_func(
+            YX_demeaned_array, success = dispatch_demean(
                 x=YX_array,
-                flist=fe_array.astype(np.uintp),
-                weights=weights,
-                tol=fixef_tol,
-                maxiter=fixef_maxiter,
+                flist=fe_array,
+                weights=weights_array,
+                fixef_tol=fixef_tol,
+                fixef_maxiter=fixef_maxiter,
+                demean_func=demean_func,
+                demeaner=demeaner,
             )
             if success is False:
                 raise ValueError(f"Demeaning failed after {fixef_maxiter} iterations.")
 
-            YX_demeaned = pd.DataFrame(YX_demeaned)
+            YX_demeaned = pd.DataFrame(YX_demeaned_array)
             YX_demeaned.columns = yx_names
 
         lookup_demeaned_data[na_index] = [None, YX_demeaned]
@@ -163,6 +181,82 @@ def demean_model(
     return Yd, Xd
 
 
+def dispatch_demean(
+    x: np.ndarray,
+    flist: np.ndarray,
+    weights: np.ndarray,
+    fixef_tol: float,
+    fixef_maxiter: int,
+    demean_func: Callable,
+    demeaner: ResolvedDemeaner | None = None,
+) -> tuple[np.ndarray, bool]:
+    """Demean an array using the configured backend for the resolved demeaner."""
+    if isinstance(demeaner, WithinDemeaner):
+        return demean_within(
+            x=x,
+            flist=flist.astype(np.uintp, copy=False),
+            weights=weights,
+            tol=fixef_tol,
+            maxiter=fixef_maxiter,
+            krylov_method=demeaner.krylov_method,
+            gmres_restart=demeaner.gmres_restart,
+            preconditioner_type=demeaner.preconditioner_type,
+        )
+
+    if isinstance(demeaner, LsmrDemeaner):
+        solver_atol = demeaner.solver_atol if demeaner.solver_atol is not None else 1e-8
+        solver_btol = demeaner.solver_btol if demeaner.solver_btol is not None else 1e-8
+        solver_maxiter = (
+            demeaner.solver_maxiter
+            if demeaner.solver_maxiter is not None
+            else fixef_maxiter
+        )
+
+        if demeaner.use_gpu is False:
+            demean_scipy_configured = cast(
+                Callable[..., tuple[np.ndarray, bool]],
+                import_module(
+                    "pyfixest.estimation.cupy.demean_cupy_"
+                ).demean_scipy_configured,
+            )
+            return demean_scipy_configured(
+                x=x,
+                flist=flist.astype(np.uintp, copy=False),
+                weights=weights,
+                solver_atol=solver_atol,
+                solver_btol=solver_btol,
+                solver_maxiter=solver_maxiter,
+                use_preconditioner=demeaner.use_preconditioner,
+            )
+
+        demean_cupy_configured = cast(
+            Callable[..., tuple[np.ndarray, bool]],
+            import_module(
+                "pyfixest.estimation.cupy.demean_cupy_"
+            ).demean_cupy_configured,
+        )
+        return demean_cupy_configured(
+            x=x,
+            flist=flist.astype(np.uintp, copy=False),
+            weights=weights,
+            use_gpu=demeaner.use_gpu,
+            solver_atol=solver_atol,
+            solver_btol=solver_btol,
+            solver_maxiter=solver_maxiter,
+            warn_on_cpu_fallback=demeaner.warn_on_cpu_fallback,
+            dtype=np.float32 if demeaner.precision == "float32" else np.float64,
+            use_preconditioner=demeaner.use_preconditioner,
+        )
+
+    return demean_func(
+        x=x,
+        flist=flist.astype(np.uintp, copy=False),
+        weights=weights,
+        tol=fixef_tol,
+        maxiter=fixef_maxiter,
+    )
+
+
 @nb.njit
 def _sad_converged(a: np.ndarray, b: np.ndarray, tol: float) -> bool:
     for i in range(a.size):
@@ -322,6 +416,8 @@ def demean(
     return (res, success)
 
 
+# Legacy: used by the old demeaner_backend= string API.
+# Remove once all callers use the typed demeaner= API.
 def _set_demeaner_backend(
     demeaner_backend: DemeanerBackendOptions,
 ) -> Callable:
diff --git a/pyfixest/estimation/models/feglm_.py b/pyfixest/estimation/models/feglm_.py
index 4e567457b..8a71dd9e8 100644
--- a/pyfixest/estimation/models/feglm_.py
+++ b/pyfixest/estimation/models/feglm_.py
@@ -10,6 +10,7 @@
 )
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
 from pyfixest.estimation.internals.backends import BACKENDS
+from pyfixest.estimation.internals.demean_ import dispatch_demean
 from pyfixest.estimation.internals.demeaner_options import (
     ResolvedDemeaner,
     get_demeaner_backend,
@@ -412,19 +413,21 @@ def residualize(
         X: np.ndarray,
         flist: np.ndarray,
         weights: np.ndarray,
-        tol: np.ndarray,
+        tol: float,
         maxiter: int,
     ) -> tuple[np.ndarray, np.ndarray]:
         "Residualize v and X by flist using weights."
         if flist is None:
             return v, X
         else:
-            vX_tilde, success = self._demean_func(
+            vX_tilde, success = dispatch_demean(
                 x=np.c_[v, X],
-                flist=flist.astype(np.uintp),
+                flist=flist,
                 weights=weights,
-                tol=tol,
-                maxiter=maxiter,
+                fixef_tol=tol,
+                fixef_maxiter=maxiter,
+                demean_func=self._demean_func,
+                demeaner=self._demeaner,
             )
             if success is False:
                 raise ValueError(f"Demeaning failed after {maxiter} iterations.")
diff --git a/pyfixest/estimation/models/feiv_.py b/pyfixest/estimation/models/feiv_.py
index dde7e0371..4dfe9de4a 100644
--- a/pyfixest/estimation/models/feiv_.py
+++ b/pyfixest/estimation/models/feiv_.py
@@ -218,6 +218,7 @@ def demean(self) -> None:
                 self._fixef_tol,
                 self._fixef_maxiter,
                 self._demean_func,
+                self._demeaner,
             )
         else:
             self._endogvard = self._endogvar
diff --git a/pyfixest/estimation/models/feols_.py b/pyfixest/estimation/models/feols_.py
index 0b399582c..73cf0d052 100644
--- a/pyfixest/estimation/models/feols_.py
+++ b/pyfixest/estimation/models/feols_.py
@@ -512,7 +512,7 @@ def demean(self):
                 self._fixef_tol,
                 self._fixef_maxiter,
                 self._demean_func,
-                # self._demeaner_backend,
+                self._demeaner,
             )
         else:
             self._Yd, self._Xd = self._Y, self._X
diff --git a/pyfixest/estimation/models/fepois_.py b/pyfixest/estimation/models/fepois_.py
index 1d8244e5c..0c154a0ef 100644
--- a/pyfixest/estimation/models/fepois_.py
+++ b/pyfixest/estimation/models/fepois_.py
@@ -12,6 +12,7 @@
     NonConvergenceError,
 )
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
+from pyfixest.estimation.internals.demean_ import dispatch_demean
 from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
 from pyfixest.estimation.internals.literals import (
     SolverOptions,
@@ -309,12 +310,14 @@ def get_fit(self) -> None:
             if self._fe is None:
                 ZX_resid = ZX
             else:
-                ZX_resid, success = self._demean_func(
+                ZX_resid, success = dispatch_demean(
                     x=ZX,
-                    flist=self._fe.astype(np.uintp),
+                    flist=self._fe,
                     weights=combined_weights.flatten(),
-                    tol=self._fixef_tol,
-                    maxiter=self._fixef_maxiter,
+                    fixef_tol=self._fixef_tol,
+                    fixef_maxiter=self._fixef_maxiter,
+                    demean_func=self._demean_func,
+                    demeaner=self._demeaner,
                 )
                 if success is False:
                     raise ValueError("Demeaning failed after 100_000 iterations.")
diff --git a/src/demean_within.rs b/src/demean_within.rs
index adba07b94..2c526289d 100644
--- a/src/demean_within.rs
+++ b/src/demean_within.rs
@@ -1,5 +1,6 @@
 use ndarray::{Array2, ArrayView1, ArrayView2, ShapeBuilder};
 use numpy::{PyArray2, PyReadonlyArray1, PyReadonlyArray2};
+use pyo3::exceptions::PyValueError;
 use pyo3::prelude::*;
 
 fn extract_columns(x: &ArrayView2<f64>) -> Vec<Vec<f64>> {
@@ -8,13 +9,71 @@ fn extract_columns(x: &ArrayView2<f64>) -> Vec<Vec<f64>> {
         .collect()
 }
 
+fn build_solver_params(
+    tol: f64,
+    maxiter: usize,
+    krylov_method: &str,
+    gmres_restart: usize,
+) -> PyResult<within::SolverParams> {
+    let krylov = match krylov_method {
+        "cg" => within::KrylovMethod::Cg,
+        "gmres" => within::KrylovMethod::Gmres {
+            restart: gmres_restart,
+        },
+        _ => {
+            return Err(PyValueError::new_err(
+                "krylov_method must be either 'cg' or 'gmres'.",
+            ));
+        }
+    };
+
+    Ok(within::SolverParams {
+        tol,
+        maxiter,
+        krylov,
+        ..within::SolverParams::default()
+    })
+}
+
+fn build_preconditioner_config(preconditioner_type: &str) -> PyResult<within::Preconditioner> {
+    match preconditioner_type {
+        "additive" => Ok(within::Preconditioner::Additive(
+            within::LocalSolverConfig::solver_default(),
+            within::ReductionStrategy::Auto,
+        )),
+        "multiplicative" => Ok(within::Preconditioner::Multiplicative(
+            within::LocalSolverConfig::default(),
+        )),
+        _ => Err(PyValueError::new_err(
+            "preconditioner_type must be either 'additive' or 'multiplicative'.",
+        )),
+    }
+}
+
+fn validate_solver_preconditioner_combo(
+    params: &within::SolverParams,
+    preconditioner_type: &str,
+) -> PyResult<()> {
+    if matches!(params.krylov, within::KrylovMethod::Cg)
+        && preconditioner_type == "multiplicative"
+    {
+        return Err(PyValueError::new_err(
+            "Multiplicative Schwarz requires `krylov_method='gmres'`.",
+        ));
+    }
+    Ok(())
+}
+
 fn demean_within_impl(
     x: &ArrayView2<f64>,
     flist: &ArrayView2<u32>,
     weights: &ArrayView1<f64>,
     tol: f64,
     maxiter: usize,
-) -> Result<(Array2<f64>, bool), within::WithinError> {
+    krylov_method: &str,
+    gmres_restart: usize,
+    preconditioner_type: &str,
+) -> PyResult<(Array2<f64>, bool)> {
     let n_obs = x.nrows();
     let n_rhs = x.ncols();
 
@@ -22,15 +81,9 @@ fn demean_within_impl(
     let x_slices: Vec<&[f64]> = x_columns.iter().map(|col| col.as_slice()).collect();
     let weights_vec: Vec<f64> = weights.iter().copied().collect();
 
-    let params = within::SolverParams {
-        tol,
-        maxiter,
-        ..within::SolverParams::default()
-    };
-    let preconditioner = within::Preconditioner::Additive(
-        within::LocalSolverConfig::solver_default(),
-        within::ReductionStrategy::Auto,
-    );
+    let params = build_solver_params(tol, maxiter, krylov_method, gmres_restart)?;
+    validate_solver_preconditioner_combo(&params, preconditioner_type)?;
+    let preconditioner = build_preconditioner_config(preconditioner_type)?;
 
     let result = within::solve_batch(
         flist.view(),
@@ -38,7 +91,8 @@ fn demean_within_impl(
         Some(&weights_vec),
         &params,
         Some(&preconditioner),
-    )?;
+    )
+    .map_err(|e| pyo3::exceptions::PyRuntimeError::new_err(e.to_string()))?;
 
     let all_converged = result.converged().iter().all(|&c| c);
     let out = Array2::from_shape_vec((n_obs, n_rhs).f(), result.demeaned_all().to_vec())
@@ -47,7 +101,16 @@ fn demean_within_impl(
 }
 
 #[pyfunction]
-#[pyo3(signature = (x, flist, weights, tol=1e-6, maxiter=1_000))]
+#[pyo3(signature = (
+    x,
+    flist,
+    weights,
+    tol=1e-6,
+    maxiter=1_000,
+    krylov_method="cg",
+    gmres_restart=30,
+    preconditioner_type="additive"
+))]
 pub fn _demean_within_rs(
     py: Python<'_>,
     x: PyReadonlyArray2<f64>,
@@ -55,14 +118,27 @@ pub fn _demean_within_rs(
     weights: PyReadonlyArray1<f64>,
     tol: f64,
     maxiter: usize,
+    krylov_method: &str,
+    gmres_restart: usize,
+    preconditioner_type: &str,
 ) -> PyResult<(Py<PyArray2<f64>>, bool)> {
     let x_arr = x.as_array();
     let flist_arr = flist.as_array();
     let weights_arr = weights.as_array();
 
     let (out, success) = py
-        .detach(|| demean_within_impl(&x_arr, &flist_arr, &weights_arr, tol, maxiter))
-        .map_err(|e| pyo3::exceptions::PyRuntimeError::new_err(e.to_string()))?;
+        .detach(|| {
+            demean_within_impl(
+                &x_arr,
+                &flist_arr,
+                &weights_arr,
+                tol,
+                maxiter,
+                krylov_method,
+                gmres_restart,
+                preconditioner_type,
+            )
+        })?;
 
     let pyarray = PyArray2::from_owned_array(py, out);
     Ok((pyarray.into(), success))

From 70dc5d9456853d833ae87f828a666d5973ae286c Mon Sep 17 00:00:00 2001
From: Alexander Fischer <alexander-fischer1801@t-online.de>
Date: Sun, 29 Mar 2026 12:02:45 +0200
Subject: [PATCH 05/10] Remove legacy callable plumbing; hardwire Rust for
 collinear/crv1/nonnested

---
 pyfixest/demeaners.py                     |  39 ++++----
 pyfixest/estimation/internals/backends.py | 105 -------------------
 pyfixest/estimation/internals/demean_.py  | 110 +++++++++-----------
 pyfixest/estimation/models/feglm_.py      |  17 +---
 pyfixest/estimation/models/feiv_.py       |   4 -
 pyfixest/estimation/models/feols_.py      |  37 ++-----
 pyfixest/estimation/models/fepois_.py     |   4 -
 tests/test_demean.py                      | 117 ++++++++++++----------
 8 files changed, 143 insertions(+), 290 deletions(-)
 delete mode 100644 pyfixest/estimation/internals/backends.py

diff --git a/pyfixest/demeaners.py b/pyfixest/demeaners.py
index e793e5277..1d70faaa0 100644
--- a/pyfixest/demeaners.py
+++ b/pyfixest/demeaners.py
@@ -99,9 +99,9 @@ class LsmrDemeaner(BaseDemeaner):
 
     precision: str = "float64"
     use_gpu: bool | None = None
-    solver_atol: float | None = None
-    solver_btol: float | None = None
-    solver_maxiter: int | None = None
+    solver_atol: float = 1e-8
+    solver_btol: float = 1e-8
+    solver_maxiter: int = 100_000
     warn_on_cpu_fallback: bool = True
     use_preconditioner: bool = True
     kind: ClassVar[str] = "lsmr"
@@ -118,24 +118,21 @@ def __post_init__(self) -> None:
         if self.use_gpu is not None and not isinstance(self.use_gpu, bool):
             raise TypeError("`use_gpu` must be a bool or None.")
 
-        if self.solver_atol is not None:
-            object.__setattr__(
-                self,
-                "solver_atol",
-                _validate_positive_float(self.solver_atol, "solver_atol"),
-            )
-        if self.solver_btol is not None:
-            object.__setattr__(
-                self,
-                "solver_btol",
-                _validate_positive_float(self.solver_btol, "solver_btol"),
-            )
-        if self.solver_maxiter is not None:
-            object.__setattr__(
-                self,
-                "solver_maxiter",
-                _validate_positive_int(self.solver_maxiter, "solver_maxiter"),
-            )
+        object.__setattr__(
+            self,
+            "solver_atol",
+            _validate_positive_float(self.solver_atol, "solver_atol"),
+        )
+        object.__setattr__(
+            self,
+            "solver_btol",
+            _validate_positive_float(self.solver_btol, "solver_btol"),
+        )
+        object.__setattr__(
+            self,
+            "solver_maxiter",
+            _validate_positive_int(self.solver_maxiter, "solver_maxiter"),
+        )
 
         if not isinstance(self.warn_on_cpu_fallback, bool):
             raise TypeError("`warn_on_cpu_fallback` must be a bool.")
diff --git a/pyfixest/estimation/internals/backends.py b/pyfixest/estimation/internals/backends.py
deleted file mode 100644
index 5fa43b623..000000000
--- a/pyfixest/estimation/internals/backends.py
+++ /dev/null
@@ -1,105 +0,0 @@
-from pyfixest.core.collinear import find_collinear_variables
-from pyfixest.core.crv1 import crv1_meat_loop
-from pyfixest.core.demean import demean, demean_within
-from pyfixest.core.nested_fixed_effects import count_fixef_fully_nested_all
-from pyfixest.estimation.internals.demean_ import demean as demean_nb
-from pyfixest.estimation.internals.vcov_utils import (
-    _crv1_meat_loop as crv1_meat_loop_nb,
-)
-from pyfixest.estimation.numba.find_collinear_variables_nb import (
-    _find_collinear_variables_nb as find_collinear_variables_nb,
-)
-from pyfixest.estimation.numba.nested_fixef_nb import (
-    _count_fixef_fully_nested_all as count_fixef_fully_nested_all_nb,
-)
-
-# Try to import JAX functions, fall back to numba if not available
-try:
-    from pyfixest.estimation.jax.demean_jax_ import demean_jax as demean_jax_fn
-
-    JAX_AVAILABLE = True
-except ImportError:
-    # Fall back to numba implementation if JAX is not available
-    demean_jax_fn = demean_nb
-    JAX_AVAILABLE = False
-
-find_collinear_variables_jax = find_collinear_variables_nb
-crv1_meat_loop_jax = crv1_meat_loop_nb
-count_fixef_fully_nested_all_jax = count_fixef_fully_nested_all_nb
-
-# Try to import CuPy functions, fall back to numba if not available
-try:
-    from pyfixest.estimation.cupy.demean_cupy_ import (
-        demean_cupy32,
-        demean_cupy64,
-        demean_scipy,
-    )
-
-    CUPY_AVAILABLE = True
-except ImportError:
-    # Fall back to numba implementation if CuPy is not available
-    demean_cupy32 = demean_nb
-    demean_cupy64 = demean_nb
-    demean_scipy = demean_nb
-    CUPY_AVAILABLE = False
-
-find_collinear_variables_cupy = find_collinear_variables_nb
-crv1_meat_loop_cupy = crv1_meat_loop_nb
-count_fixef_fully_nested_all_cupy = count_fixef_fully_nested_all_nb
-
-BACKENDS = {
-    "numba": {
-        "demean": demean_nb,
-        "collinear": find_collinear_variables_nb,
-        "crv1_meat": crv1_meat_loop_nb,
-        "nonnested": count_fixef_fully_nested_all_nb,
-    },
-    "rust": {
-        "demean": demean,
-        "collinear": find_collinear_variables,
-        "crv1_meat": crv1_meat_loop,
-        "nonnested": count_fixef_fully_nested_all,
-    },
-    "rust-cg": {
-        "demean": demean_within,
-        "collinear": find_collinear_variables,
-        "crv1_meat": crv1_meat_loop,
-        "nonnested": count_fixef_fully_nested_all,
-    },
-    "within": {
-        "demean": demean_within,
-        "collinear": find_collinear_variables,
-        "crv1_meat": crv1_meat_loop,
-        "nonnested": count_fixef_fully_nested_all,
-    },
-    "jax": {
-        "demean": demean_jax_fn,
-        "collinear": find_collinear_variables_jax,
-        "crv1_meat": crv1_meat_loop_jax,
-        "nonnested": count_fixef_fully_nested_all_jax,
-    },
-    "cupy": {
-        "demean": demean_cupy64,
-        "collinear": find_collinear_variables_cupy,
-        "crv1_meat": crv1_meat_loop_cupy,
-        "nonnested": count_fixef_fully_nested_all_cupy,
-    },
-    "cupy32": {
-        "demean": demean_cupy32,
-        "collinear": find_collinear_variables_cupy,
-        "crv1_meat": crv1_meat_loop_cupy,
-        "nonnested": count_fixef_fully_nested_all_cupy,
-    },
-    "cupy64": {
-        "demean": demean_cupy64,
-        "collinear": find_collinear_variables_cupy,
-        "crv1_meat": crv1_meat_loop_cupy,
-        "nonnested": count_fixef_fully_nested_all_cupy,
-    },
-    "scipy": {
-        "demean": demean_scipy,
-        "collinear": find_collinear_variables_cupy,
-        "crv1_meat": crv1_meat_loop_cupy,
-        "nonnested": count_fixef_fully_nested_all_cupy,
-    },
-}
diff --git a/pyfixest/estimation/internals/demean_.py b/pyfixest/estimation/internals/demean_.py
index 920682ca1..a40a0930e 100644
--- a/pyfixest/estimation/internals/demean_.py
+++ b/pyfixest/estimation/internals/demean_.py
@@ -7,7 +7,7 @@
 import pandas as pd
 
 from pyfixest.core.demean import demean_within
-from pyfixest.demeaners import LsmrDemeaner, WithinDemeaner
+from pyfixest.demeaners import LsmrDemeaner, MapDemeaner, WithinDemeaner
 from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
 from pyfixest.estimation.internals.literals import DemeanerBackendOptions
 
@@ -19,11 +19,7 @@ def demean_model(
     weights: np.ndarray | None,
     lookup_demeaned_data: dict[frozenset[int], Any],
     na_index: frozenset[int],
-    fixef_tol: float,
-    fixef_maxiter: int,
-    demean_func: Callable,
-    demeaner: ResolvedDemeaner | None = None,
-    # demeaner_backend: Literal["numba", "jax", "rust"] = "numba",
+    demeaner: ResolvedDemeaner,
 ) -> tuple[pd.DataFrame, pd.DataFrame]:
     """
     Demean a regression model.
@@ -51,25 +47,18 @@ def demean_model(
     na_index : frozenset[int]
         A frozenset of indices of dropped rows. Used as a hashable cache key
         for demeaned variables.
-    fixef_tol: float
-        The tolerance for the demeaning algorithm.
-    fixef_maxiter: int
-         The maximum number of iterations for the demeaning algorithm.
-    demeaner : ResolvedDemeaner | None, optional
-        Resolved typed demeaner configuration. If provided, backend-specific
-        runtime options are taken from this object.
-
+    demeaner : ResolvedDemeaner
+        Resolved typed demeaner configuration. Backend-specific runtime options
+        are taken from this object.
 
     Returns
     -------
-    tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]]
+    tuple[pd.DataFrame, pd.DataFrame]
         A tuple of the following elements:
         - Yd : pd.DataFrame
             A DataFrame of the demeaned dependent variable.
         - Xd : pd.DataFrame
             A DataFrame of the demeaned covariates.
-        - Id : pd.DataFrame or None
-            A DataFrame of the demeaned Instruments. None if no IV.
     """
     YX = pd.concat([Y, X], axis=1)
 
@@ -106,11 +95,8 @@ def demean_model(
 
             # if some variables still need to be demeaned
             if var_diff_names:
-                # var_diff_names = var_diff_names
-
                 yx_names_list = list(yx_names)
                 var_diff_index = [yx_names_list.index(item) for item in var_diff_names]
-                # var_diff_index = list(yx_names).index(var_diff_names)
                 var_diff = YX_array[:, var_diff_index]
                 if var_diff.ndim == 1:
                     var_diff = var_diff.reshape(len(var_diff), 1)
@@ -119,18 +105,11 @@ def demean_model(
                     x=var_diff,
                     flist=fe_array,
                     weights=weights_array,
-                    # TODO:fixef_tol, fixef_maxiter, demean_func are passed via
-                    # the demeaner_backend= string API and the _set_demeaner_backend function.
-                    # Remove passing fixef_tol, fixef_maxiter once all callers use the
-                    # typed demeaner= API.
-                    fixef_tol=fixef_tol,
-                    fixef_maxiter=fixef_maxiter,
-                    demean_func=demean_func,
                     demeaner=demeaner,
                 )
                 if success is False:
                     raise ValueError(
-                        f"Demeaning failed after {fixef_maxiter} iterations."
+                        f"Demeaning failed after {demeaner.fixef_maxiter} iterations."
                     )
 
                 YX_demeaned = pd.DataFrame(
@@ -154,13 +133,12 @@ def demean_model(
                 x=YX_array,
                 flist=fe_array,
                 weights=weights_array,
-                fixef_tol=fixef_tol,
-                fixef_maxiter=fixef_maxiter,
-                demean_func=demean_func,
                 demeaner=demeaner,
             )
             if success is False:
-                raise ValueError(f"Demeaning failed after {fixef_maxiter} iterations.")
+                raise ValueError(
+                    f"Demeaning failed after {demeaner.fixef_maxiter} iterations."
+                )
 
             YX_demeaned = pd.DataFrame(YX_demeaned_array)
             YX_demeaned.columns = yx_names
@@ -185,33 +163,24 @@ def dispatch_demean(
     x: np.ndarray,
     flist: np.ndarray,
     weights: np.ndarray,
-    fixef_tol: float,
-    fixef_maxiter: int,
-    demean_func: Callable,
-    demeaner: ResolvedDemeaner | None = None,
+    demeaner: ResolvedDemeaner,
 ) -> tuple[np.ndarray, bool]:
     """Demean an array using the configured backend for the resolved demeaner."""
+    flist_uint = flist.astype(np.uintp, copy=False)
+
     if isinstance(demeaner, WithinDemeaner):
         return demean_within(
             x=x,
-            flist=flist.astype(np.uintp, copy=False),
+            flist=flist_uint.astype(np.uint32, copy=False),
             weights=weights,
-            tol=fixef_tol,
-            maxiter=fixef_maxiter,
+            tol=demeaner.fixef_tol,
+            maxiter=demeaner.fixef_maxiter,
             krylov_method=demeaner.krylov_method,
             gmres_restart=demeaner.gmres_restart,
             preconditioner_type=demeaner.preconditioner_type,
         )
 
     if isinstance(demeaner, LsmrDemeaner):
-        solver_atol = demeaner.solver_atol if demeaner.solver_atol is not None else 1e-8
-        solver_btol = demeaner.solver_btol if demeaner.solver_btol is not None else 1e-8
-        solver_maxiter = (
-            demeaner.solver_maxiter
-            if demeaner.solver_maxiter is not None
-            else fixef_maxiter
-        )
-
         if demeaner.use_gpu is False:
             demean_scipy_configured = cast(
                 Callable[..., tuple[np.ndarray, bool]],
@@ -221,11 +190,11 @@ def dispatch_demean(
             )
             return demean_scipy_configured(
                 x=x,
-                flist=flist.astype(np.uintp, copy=False),
+                flist=flist_uint,
                 weights=weights,
-                solver_atol=solver_atol,
-                solver_btol=solver_btol,
-                solver_maxiter=solver_maxiter,
+                solver_atol=demeaner.solver_atol,
+                solver_btol=demeaner.solver_btol,
+                solver_maxiter=demeaner.solver_maxiter,
                 use_preconditioner=demeaner.use_preconditioner,
             )
 
@@ -237,24 +206,41 @@ def dispatch_demean(
         )
         return demean_cupy_configured(
             x=x,
-            flist=flist.astype(np.uintp, copy=False),
+            flist=flist_uint,
             weights=weights,
             use_gpu=demeaner.use_gpu,
-            solver_atol=solver_atol,
-            solver_btol=solver_btol,
-            solver_maxiter=solver_maxiter,
+            solver_atol=demeaner.solver_atol,
+            solver_btol=demeaner.solver_btol,
+            solver_maxiter=demeaner.solver_maxiter,
             warn_on_cpu_fallback=demeaner.warn_on_cpu_fallback,
             dtype=np.float32 if demeaner.precision == "float32" else np.float64,
             use_preconditioner=demeaner.use_preconditioner,
         )
 
-    return demean_func(
-        x=x,
-        flist=flist.astype(np.uintp, copy=False),
-        weights=weights,
-        tol=fixef_tol,
-        maxiter=fixef_maxiter,
-    )
+    if isinstance(demeaner, MapDemeaner):
+        backend = demeaner.backend
+        if backend == "numba":
+            demean_func = demean
+        elif backend == "rust":
+            from pyfixest.core.demean import demean as demean_rs
+
+            demean_func = demean_rs
+        elif backend == "jax":
+            from pyfixest.estimation.jax.demean_jax_ import demean_jax
+
+            demean_func = demean_jax
+        else:
+            raise ValueError(f"Unknown MapDemeaner backend: {backend!r}")
+
+        return demean_func(
+            x=x,
+            flist=flist_uint,
+            weights=weights,
+            tol=demeaner.fixef_tol,
+            maxiter=demeaner.fixef_maxiter,
+        )
+
+    raise TypeError(f"Unsupported demeaner type: {type(demeaner)!r}")
 
 
 @nb.njit
diff --git a/pyfixest/estimation/models/feglm_.py b/pyfixest/estimation/models/feglm_.py
index 8a71dd9e8..fcb02da69 100644
--- a/pyfixest/estimation/models/feglm_.py
+++ b/pyfixest/estimation/models/feglm_.py
@@ -9,12 +9,8 @@
     NonConvergenceError,
 )
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
-from pyfixest.estimation.internals.backends import BACKENDS
 from pyfixest.estimation.internals.demean_ import dispatch_demean
-from pyfixest.estimation.internals.demeaner_options import (
-    ResolvedDemeaner,
-    get_demeaner_backend,
-)
+from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
 from pyfixest.estimation.internals.solvers import solve_ols
 from pyfixest.estimation.models.feols_ import (
     Feols,
@@ -95,13 +91,6 @@ def __init__(
         self.separation_check = separation_check
         self._accelerate = accelerate
 
-        backend_key = get_demeaner_backend(self._demeaner)
-        try:
-            impl = BACKENDS[backend_key]
-        except KeyError:
-            raise ValueError(f"Unknown demeaner backend {backend_key!r}")
-        self._demean_func = impl["demean"]
-
         self._support_crv3_inference = True
         self._support_iid_inference = True
         self._support_hac_inference = True
@@ -234,7 +223,6 @@ def get_fit(self):
                         X_tilde,
                         self._coefnames,
                         self._collin_tol,
-                        backend_func=self._find_collinear_variables_func,
                     )
                 )
                 if self._collin_index:
@@ -424,9 +412,6 @@ def residualize(
                 x=np.c_[v, X],
                 flist=flist,
                 weights=weights,
-                fixef_tol=tol,
-                fixef_maxiter=maxiter,
-                demean_func=self._demean_func,
                 demeaner=self._demeaner,
             )
             if success is False:
diff --git a/pyfixest/estimation/models/feiv_.py b/pyfixest/estimation/models/feiv_.py
index 4dfe9de4a..63d8b457e 100644
--- a/pyfixest/estimation/models/feiv_.py
+++ b/pyfixest/estimation/models/feiv_.py
@@ -215,9 +215,6 @@ def demean(self) -> None:
                 self._weights.flatten(),
                 self._lookup_demeaned_data,
                 self._na_index,
-                self._fixef_tol,
-                self._fixef_maxiter,
-                self._demean_func,
                 self._demeaner,
             )
         else:
@@ -236,7 +233,6 @@ def drop_multicol_vars(self) -> None:
             self._Z,
             self._coefnames_z,
             self._collin_tol,
-            self._find_collinear_variables_func,
         )
 
     def get_fit(self) -> None:
diff --git a/pyfixest/estimation/models/feols_.py b/pyfixest/estimation/models/feols_.py
index 73cf0d052..60cb9aab4 100644
--- a/pyfixest/estimation/models/feols_.py
+++ b/pyfixest/estimation/models/feols_.py
@@ -1,6 +1,6 @@
 import re
 import warnings
-from collections.abc import Callable, Mapping
+from collections.abc import Mapping
 from importlib import import_module
 from typing import Any, Literal, cast
 
@@ -11,17 +11,16 @@
 from scipy.sparse.linalg import lsqr
 from scipy.stats import chi2, f, t
 
+from pyfixest.core.collinear import find_collinear_variables
+from pyfixest.core.crv1 import crv1_meat_loop
+from pyfixest.core.nested_fixed_effects import count_fixef_fully_nested_all
 from pyfixest.demeaners import MapDemeaner
 from pyfixest.errors import VcovTypeNotSupportedError
 from pyfixest.estimation.api.utils import _ALL_SAMPLE, _AllSampleSentinel
 from pyfixest.estimation.formula import model_matrix as model_matrix_fixest
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
-from pyfixest.estimation.internals.backends import BACKENDS
 from pyfixest.estimation.internals.demean_ import demean_model
-from pyfixest.estimation.internals.demeaner_options import (
-    ResolvedDemeaner,
-    get_demeaner_backend,
-)
+from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
 from pyfixest.estimation.internals.literals import (
     PredictionErrorOptions,
     PredictionType,
@@ -333,16 +332,9 @@ def __init__(
         # self._coefnames = None
         self._icovars = None
 
-        backend_key = get_demeaner_backend(demeaner)
-        try:
-            impl = BACKENDS[backend_key]
-        except KeyError:
-            raise ValueError(f"Unknown backend {backend_key!r}")
-
-        self._demean_func = impl["demean"]
-        self._find_collinear_variables_func = impl["collinear"]
-        self._crv1_meat_func = impl["crv1_meat"]
-        self._count_nested_fixef_func = impl["nonnested"]
+        # Removed: BACKENDS dict lookup. Collinearity, CRV1, and nested FE
+        # detection always use the Rust implementations. Demeaning backend
+        # is resolved inside dispatch_demean from the typed demeaner object.
 
         # set in get_fit()
         self._tZX = np.array([])
@@ -509,9 +501,6 @@ def demean(self):
                 self._weights.flatten(),
                 self._lookup_demeaned_data,
                 self._na_index,
-                self._fixef_tol,
-                self._fixef_maxiter,
-                self._demean_func,
                 self._demeaner,
             )
         else:
@@ -544,7 +533,6 @@ def drop_multicol_vars(self):
                 self._X,
                 self._coefnames,
                 self._collin_tol,
-                backend_func=self._find_collinear_variables_func,
             )
         # update X_is_empty
         self._X_is_empty = self._X.shape[1] == 0
@@ -761,7 +749,7 @@ def vcov(
             k_fe_nested = 0
             n_fe_fully_nested = 0
             if self._fixef is not None and self._ssc_dict["k_fixef"] == "nonnested":
-                k_fe_nested_flag, n_fe_fully_nested = self._count_nested_fixef_func(
+                k_fe_nested_flag, n_fe_fully_nested = count_fixef_fully_nested_all(
                     all_fixef_array=np.array(
                         self._fixef.replace("^", "_").split("+"), dtype=str
                     ),
@@ -954,7 +942,7 @@ def _vcov_crv1(self, clustid: np.ndarray, cluster_col: np.ndarray):
         k = self._scores.shape[1]
         meat = np.zeros((k, k))
 
-        meat = self._crv1_meat_func(
+        meat = crv1_meat_loop(
             scores=self._scores.astype(np.float64),
             clustid=clustid.astype(np.uintp),
             cluster_col=cluster_col.astype(np.uintp),
@@ -2420,7 +2408,6 @@ def _drop_multicollinear_variables(
     X: np.ndarray,
     names: list[str],
     collin_tol: float,
-    backend_func: Callable,
 ) -> tuple[np.ndarray, list[str], list[str], list[int]]:
     """
     Check for multicollinearity in the design matrices X and Z.
@@ -2433,8 +2420,6 @@ def _drop_multicollinear_variables(
         The names of the coefficients.
     collin_tol : float
         The tolerance level for the multicollinearity check.
-    backend_func: Callable
-        Which backend function to use for the multicollinearity check.
 
     Returns
     -------
@@ -2450,7 +2435,7 @@ def _drop_multicollinear_variables(
     # TODO: avoid doing this computation twice, e.g. compute tXXinv here as fixest does
 
     tXX = X.T @ X
-    id_excl, n_excl, all_removed = backend_func(tXX, collin_tol)
+    id_excl, n_excl, all_removed = find_collinear_variables(tXX, collin_tol)
 
     collin_vars = []
     collin_index = []
diff --git a/pyfixest/estimation/models/fepois_.py b/pyfixest/estimation/models/fepois_.py
index 0c154a0ef..109a20ec5 100644
--- a/pyfixest/estimation/models/fepois_.py
+++ b/pyfixest/estimation/models/fepois_.py
@@ -314,9 +314,6 @@ def get_fit(self) -> None:
                     x=ZX,
                     flist=self._fe,
                     weights=combined_weights.flatten(),
-                    fixef_tol=self._fixef_tol,
-                    fixef_maxiter=self._fixef_maxiter,
-                    demean_func=self._demean_func,
                     demeaner=self._demeaner,
                 )
                 if success is False:
@@ -333,7 +330,6 @@ def get_fit(self) -> None:
                         X_resid,
                         self._coefnames,
                         self._collin_tol,
-                        backend_func=self._find_collinear_variables_func,
                     )
                 )
                 if self._collin_index:
diff --git a/tests/test_demean.py b/tests/test_demean.py
index 0d161a5c6..130ea0f58 100644
--- a/tests/test_demean.py
+++ b/tests/test_demean.py
@@ -5,6 +5,7 @@
 
 from pyfixest.core import demean as demean_rs
 from pyfixest.core.demean import demean_within
+from pyfixest.demeaners import LsmrDemeaner, MapDemeaner
 from pyfixest.estimation.cupy.demean_cupy_ import demean_cupy32, demean_cupy64
 from pyfixest.estimation.internals.demean_ import (
     _set_demeaner_backend,
@@ -72,11 +73,16 @@ def test_set_demeaner_backend():
 
 
 @pytest.mark.parametrize(
-    argnames="demean_func",
-    argvalues=[demean, demean_jax, demean_rs, demean_cupy32, demean_cupy64],
-    ids=["demean_numba", "demean_jax", "demean_rs", "demean_cupy32", "demean_cupy64"],
+    argnames="demeaner",
+    argvalues=[
+        MapDemeaner(backend="numba"),
+        MapDemeaner(backend="jax"),
+        MapDemeaner(backend="rust"),
+        LsmrDemeaner(use_gpu=False),
+    ],
+    ids=["numba", "jax", "rust", "lsmr_scipy"],
 )
-def test_demean_model_no_fixed_effects(benchmark, demean_func):
+def test_demean_model_no_fixed_effects(benchmark, demeaner):
     """Test demean_model when there are no fixed effects."""
     # Create sample data
     N = 1000
@@ -94,9 +100,7 @@ def test_demean_model_no_fixed_effects(benchmark, demean_func):
         weights=weights,
         lookup_demeaned_data=lookup_dict,
         na_index=frozenset(),
-        fixef_tol=1e-6,
-        fixef_maxiter=10_000,
-        demean_func=demean_func,
+        demeaner=demeaner,
     )
 
     # When no fixed effects, output should equal input
@@ -107,11 +111,16 @@ def test_demean_model_no_fixed_effects(benchmark, demean_func):
 
 
 @pytest.mark.parametrize(
-    argnames="demean_func",
-    argvalues=[demean, demean_jax, demean_rs, demean_cupy32, demean_cupy64],
-    ids=["demean_numba", "demean_jax", "demean_rs", "demean_cupy32", "demean_cupy64"],
+    argnames="demeaner",
+    argvalues=[
+        MapDemeaner(backend="numba"),
+        MapDemeaner(backend="jax"),
+        MapDemeaner(backend="rust"),
+        LsmrDemeaner(use_gpu=False),
+    ],
+    ids=["numba", "jax", "rust", "lsmr_scipy"],
 )
-def test_demean_model_with_fixed_effects(benchmark, demean_func):
+def test_demean_model_with_fixed_effects(benchmark, demeaner):
     """Test demean_model with fixed effects."""
     # Create sample data
     N = 1000
@@ -132,9 +141,7 @@ def test_demean_model_with_fixed_effects(benchmark, demean_func):
         weights=weights,
         lookup_demeaned_data=lookup_dict,
         na_index=frozenset(),
-        fixef_tol=1e-6,
-        fixef_maxiter=10_000,
-        demean_func=demean_func,
+        demeaner=demeaner,
     )
 
     # Verify results are different from input (since we're demeaning)
@@ -153,11 +160,16 @@ def test_demean_model_with_fixed_effects(benchmark, demean_func):
 
 
 @pytest.mark.parametrize(
-    argnames="demean_func",
-    argvalues=[demean, demean_jax, demean_rs, demean_cupy32, demean_cupy64],
-    ids=["demean_numba", "demean_jax", "demean_rs", "demean_cupy32", "demean_cupy64"],
+    argnames="demeaner",
+    argvalues=[
+        MapDemeaner(backend="numba"),
+        MapDemeaner(backend="jax"),
+        MapDemeaner(backend="rust"),
+        LsmrDemeaner(use_gpu=False),
+    ],
+    ids=["numba", "jax", "rust", "lsmr_scipy"],
 )
-def test_demean_model_with_weights(benchmark, demean_func):
+def test_demean_model_with_weights(benchmark, demeaner):
     """Test demean_model with weights."""
     N = 1000
     rng = np.random.default_rng(42)
@@ -177,9 +189,7 @@ def test_demean_model_with_weights(benchmark, demean_func):
         weights=weights,
         lookup_demeaned_data=lookup_dict,
         na_index=frozenset(),
-        fixef_tol=1e-6,
-        fixef_maxiter=10_000,
-        demean_func=demean_func,
+        demeaner=demeaner,
     )
 
     # Run without weights for comparison (fresh lookup dict to avoid cache hit)
@@ -190,9 +200,7 @@ def test_demean_model_with_weights(benchmark, demean_func):
         weights=np.ones(N),
         lookup_demeaned_data={},
         na_index=frozenset(),
-        fixef_tol=1e-6,
-        fixef_maxiter=10_000,
-        demean_func=demean_func,
+        demeaner=demeaner,
     )
 
     # Results should be different with weights vs without
@@ -201,11 +209,16 @@ def test_demean_model_with_weights(benchmark, demean_func):
 
 
 @pytest.mark.parametrize(
-    argnames="demean_func",
-    argvalues=[demean, demean_jax, demean_rs, demean_cupy32, demean_cupy64],
-    ids=["demean_numba", "demean_jax", "demean_rs", "demean_cupy32", "demean_cupy64"],
+    argnames="demeaner",
+    argvalues=[
+        MapDemeaner(backend="numba"),
+        MapDemeaner(backend="jax"),
+        MapDemeaner(backend="rust"),
+        LsmrDemeaner(use_gpu=False),
+    ],
+    ids=["numba", "jax", "rust", "lsmr_scipy"],
 )
-def test_demean_model_caching(benchmark, demean_func):
+def test_demean_model_caching(benchmark, demeaner):
     """Test the caching behavior of demean_model."""
     N = 1000
     rng = np.random.default_rng(42)
@@ -224,9 +237,7 @@ def test_demean_model_caching(benchmark, demean_func):
         weights=weights,
         lookup_demeaned_data=lookup_dict,
         na_index=frozenset(),
-        fixef_tol=1e-6,
-        fixef_maxiter=10_000,
-        demean_func=demean_func,
+        demeaner=demeaner,
     )
 
     # Second run - should use cache
@@ -238,9 +249,7 @@ def test_demean_model_caching(benchmark, demean_func):
         weights=weights,
         lookup_demeaned_data=lookup_dict,
         na_index=frozenset(),
-        fixef_tol=1e-6,
-        fixef_maxiter=10_000,
-        demean_func=demean_func,
+        demeaner=demeaner,
     )
 
     # Results should be identical
@@ -258,9 +267,7 @@ def test_demean_model_caching(benchmark, demean_func):
         weights=weights,
         lookup_demeaned_data=lookup_dict,
         na_index=frozenset(),
-        fixef_tol=1e-6,
-        fixef_maxiter=10_000,
-        demean_func=demean_func,
+        demeaner=demeaner,
     )
 
     # Original columns should match previous results
@@ -270,11 +277,16 @@ def test_demean_model_caching(benchmark, demean_func):
 
 
 @pytest.mark.parametrize(
-    argnames="demean_func",
-    argvalues=[demean, demean_jax, demean_rs, demean_cupy32, demean_cupy64],
-    ids=["demean_numba", "demean_jax", "demean_rs", "demean_cupy32", "demean_cupy64"],
+    argnames="demeaner",
+    argvalues=[
+        MapDemeaner(backend="numba", fixef_maxiter=1),
+        MapDemeaner(backend="jax", fixef_maxiter=1),
+        MapDemeaner(backend="rust", fixef_maxiter=1),
+        LsmrDemeaner(use_gpu=False, solver_maxiter=1),
+    ],
+    ids=["numba", "jax", "rust", "lsmr_scipy"],
 )
-def test_demean_model_maxiter_convergence_failure(demean_func):
+def test_demean_model_maxiter_convergence_failure(demeaner):
     """Test that demean_model fails when maxiter is too small."""
     N = 100
     rng = np.random.default_rng(42)
@@ -289,7 +301,7 @@ def test_demean_model_maxiter_convergence_failure(demean_func):
     lookup_dict = {}
 
     # Should fail with very small maxiter
-    with pytest.raises(ValueError, match="Demeaning failed after 1 iterations"):
+    with pytest.raises(ValueError, match="Demeaning failed after"):
         demean_model(
             Y=Y,
             X=X,
@@ -297,18 +309,21 @@ def test_demean_model_maxiter_convergence_failure(demean_func):
             weights=weights,
             lookup_demeaned_data=lookup_dict,
             na_index=frozenset(),
-            fixef_tol=1e-6,
-            fixef_maxiter=1,  # Very small limit
-            demean_func=demean_func,
+            demeaner=demeaner,
         )
 
 
 @pytest.mark.parametrize(
-    argnames="demean_func",
-    argvalues=[demean, demean_jax, demean_rs, demean_cupy32, demean_cupy64],
-    ids=["demean_numba", "demean_jax", "demean_rs", "demean_cupy32", "demean_cupy64"],
+    argnames="demeaner",
+    argvalues=[
+        MapDemeaner(backend="numba", fixef_maxiter=5000),
+        MapDemeaner(backend="jax", fixef_maxiter=5000),
+        MapDemeaner(backend="rust", fixef_maxiter=5000),
+        LsmrDemeaner(use_gpu=False, solver_maxiter=5000),
+    ],
+    ids=["numba", "jax", "rust", "lsmr_scipy"],
 )
-def test_demean_model_custom_maxiter_success(demean_func):
+def test_demean_model_custom_maxiter_success(demeaner):
     """Test that demean_model succeeds with reasonable maxiter."""
     N = 1000
     rng = np.random.default_rng(42)
@@ -327,9 +342,7 @@ def test_demean_model_custom_maxiter_success(demean_func):
         weights=weights,
         lookup_demeaned_data=lookup_dict,
         na_index=frozenset(),
-        fixef_tol=1e-6,
-        fixef_maxiter=5000,  # Custom limit
-        demean_func=demean_func,
+        demeaner=demeaner,
     )
 
     # Just verify it returns valid results

From 8f3e64fa6b1f993c25a5b586ca5bdc9e41162f6a Mon Sep 17 00:00:00 2001
From: Alexander Fischer <alexander-fischer1801@t-online.de>
Date: Sun, 29 Mar 2026 15:53:14 +0200
Subject: [PATCH 06/10] Add within preconditioner reuse and align core bindings

---
 pyfixest/core/_core_impl.pyi             |   8 ++
 pyfixest/core/collinear.py               |  17 ++-
 pyfixest/core/demean.py                  | 173 +++++++++++++++++++++--
 pyfixest/demeaners.py                    |  25 +++-
 pyfixest/estimation/internals/demean_.py |   1 +
 src/demean_within.rs                     | 172 ++++++++++++++++------
 src/lib.rs                               |   4 +
 tests/test_demean.py                     |  46 +++++-
 8 files changed, 382 insertions(+), 64 deletions(-)

diff --git a/pyfixest/core/_core_impl.pyi b/pyfixest/core/_core_impl.pyi
index 6dd6a0589..fc78a7fce 100644
--- a/pyfixest/core/_core_impl.pyi
+++ b/pyfixest/core/_core_impl.pyi
@@ -1,6 +1,8 @@
 import numpy as np
 from numpy.typing import NDArray
 
+class _WithinPreconditionerHandle: ...
+
 def _find_collinear_variables_rs(x: NDArray[np.float64], tol: float = 1e-10): ...
 def _crv1_meat_loop_rs(
     scores: NDArray[np.float64],
@@ -23,7 +25,13 @@ def _demean_within_rs(
     krylov_method: str = "cg",
     gmres_restart: int = 30,
     preconditioner_type: str = "additive",
+    preconditioner_handle: _WithinPreconditionerHandle | None = None,
 ) -> tuple[np.ndarray, bool]: ...
+def _build_within_preconditioner_rs(
+    flist: NDArray[np.uint32],
+    weights: NDArray[np.float64],
+    preconditioner_type: str = "additive",
+) -> _WithinPreconditionerHandle: ...
 def _count_fixef_fully_nested_all_rs(
     all_fixef_array: NDArray,
     cluster_colnames: NDArray,
diff --git a/pyfixest/core/collinear.py b/pyfixest/core/collinear.py
index 0f3238a0b..a6592e7d4 100644
--- a/pyfixest/core/collinear.py
+++ b/pyfixest/core/collinear.py
@@ -1,3 +1,14 @@
-from ._core_impl import (
-    _find_collinear_variables_rs as find_collinear_variables,  # noqa: F401
-)
+import numpy as np
+from numpy.typing import NDArray
+
+from ._core_impl import _find_collinear_variables_rs
+
+
+def find_collinear_variables(
+    x: NDArray[np.float64], tol: float = 1e-10
+) -> tuple[np.ndarray, int, bool]:
+    """Coerce the Gram matrix into a NumPy float64 array before calling Rust."""
+    x_arr = np.asarray(x, dtype=np.float64)
+    if x_arr.ndim != 2:
+        raise ValueError("`x` must be a 2D array.")
+    return _find_collinear_variables_rs(x_arr, tol)
diff --git a/pyfixest/core/demean.py b/pyfixest/core/demean.py
index b73e5c299..9957f7d96 100644
--- a/pyfixest/core/demean.py
+++ b/pyfixest/core/demean.py
@@ -1,7 +1,33 @@
+from __future__ import annotations
+
+import warnings
+from dataclasses import dataclass, field
+from typing import Any
+
 import numpy as np
 from numpy.typing import NDArray
 
-from ._core_impl import _demean_rs, _demean_within_rs
+from ._core_impl import (
+    _build_within_preconditioner_rs,
+    _demean_rs,
+    _demean_within_rs,
+)
+
+
+def _prepare_within_flist(flist: NDArray[np.uint32]) -> NDArray[np.uint32]:
+    flist_arr = np.asfortranarray(flist, dtype=np.uint32)
+    if flist_arr.ndim == 1:
+        flist_arr = flist_arr.reshape((-1, 1), order="F")
+    return flist_arr
+
+
+def _prepare_weights(weights: NDArray[np.float64]) -> NDArray[np.float64]:
+    return np.asarray(weights, dtype=np.float64).reshape(-1)
+
+
+def _compute_factor_cardinalities(flist: NDArray[np.uint32]) -> tuple[int, ...]:
+    # FE columns are dense 0..K-1 codes from `pd.factorize`, so cardinality is max + 1.
+    return tuple((flist.max(axis=0) + 1).tolist())
 
 
 def _sanitize_krylov_and_preconditioner(
@@ -23,8 +49,118 @@ def _sanitize_krylov_and_preconditioner(
     return krylov_method, preconditioner_type
 
 
-# Legacy: used by BACKENDS dict and _set_demeaner_backend.
-# Remove once all callers use the typed demeaner= API.
+@dataclass(frozen=True, slots=True)
+class WithinPreconditioner:
+    """Thin in-process wrapper around a built `within::FePreconditioner`."""
+
+    n_obs: int
+    n_factors: int
+    factor_cardinalities: tuple[int, ...]
+    preconditioner_type: str
+    _preconditioner_handle: Any = field(repr=False)
+
+
+def build_within_preconditioner(
+    flist: NDArray[np.uint32],
+    weights: NDArray[np.float64],
+    preconditioner_type: str = "additive",
+) -> WithinPreconditioner:
+    """
+    Build a reusable preconditioner for the `within` demeaner backend.
+
+    Parameters
+    ----------
+    flist : numpy.ndarray
+        Fixed-effects array of shape `(n_obs, n_factors)`.
+    weights : numpy.ndarray
+        Observation weights.
+    preconditioner_type : {"additive", "multiplicative"}
+        Schwarz preconditioner variant.
+
+    Returns
+    -------
+    WithinPreconditioner
+        In-process reusable preconditioner handle.
+
+    Notes
+    -----
+    This is a pyfixest convenience wrapper around the upstream `within` solver
+    flow: it constructs a temporary solver with upstream-compatible defaults and
+    extracts its built `FePreconditioner` for reuse.
+    """
+    preconditioner_type = preconditioner_type.lower()
+    if preconditioner_type not in {"additive", "multiplicative"}:
+        raise ValueError(
+            "`preconditioner_type` must be either 'additive' or 'multiplicative'."
+        )
+
+    flist_arr = _prepare_within_flist(flist)
+    if flist_arr.shape[1] == 1:
+        raise ValueError(
+            "A reusable `within` preconditioner requires at least two fixed effects."
+        )
+
+    weights_arr = _prepare_weights(weights)
+    handle = _build_within_preconditioner_rs(
+        flist_arr,
+        weights_arr,
+        preconditioner_type,
+    )
+    return WithinPreconditioner(
+        n_obs=flist_arr.shape[0],
+        n_factors=flist_arr.shape[1],
+        factor_cardinalities=_compute_factor_cardinalities(flist_arr),
+        preconditioner_type=preconditioner_type,
+        _preconditioner_handle=handle,
+    )
+
+
+def validate_within_preconditioner(
+    preconditioner: WithinPreconditioner,
+    flist: NDArray[np.uint32],
+    *,
+    preconditioner_type: str | None = None,
+) -> None:
+    """Validate compatibility and emit warnings for a reusable preconditioner.
+
+    Raises ``ValueError`` for hard incompatibilities (number of factors,
+    cardinalities, preconditioner type). Emits ``UserWarning`` for a differing
+    observation count.
+    """
+    flist_arr = _prepare_within_flist(flist)
+
+    if preconditioner.n_factors != flist_arr.shape[1]:
+        raise ValueError(
+            "The supplied within preconditioner is incompatible with the current "
+            "fixed-effects structure: the number of fixed effects does not match."
+        )
+    if (
+        preconditioner_type is not None
+        and preconditioner.preconditioner_type != preconditioner_type
+    ):
+        raise ValueError(
+            "The supplied within preconditioner uses "
+            f"`preconditioner_type='{preconditioner.preconditioner_type}'`, but "
+            f"`preconditioner_type='{preconditioner_type}'` was requested."
+        )
+
+    factor_cardinalities = _compute_factor_cardinalities(flist_arr)
+    if preconditioner.factor_cardinalities != factor_cardinalities:
+        raise ValueError(
+            "The supplied within preconditioner is incompatible with the current "
+            "fixed-effects structure: the fixed-effect cardinalities do not match."
+        )
+
+    # Soft warnings — reuse is allowed but may degrade effectiveness.
+    if preconditioner.n_obs != flist_arr.shape[0]:
+        warnings.warn(
+            "The supplied within preconditioner was built on a different number "
+            "of observations. Reuse is allowed, but effectiveness may degrade.",
+            UserWarning,
+            stacklevel=3,
+        )
+
+
 def demean(
     x: NDArray[np.float64],
     flist: NDArray[np.uint64],
@@ -67,8 +203,6 @@ def demean(
     )
 
 
-# Legacy: used by BACKENDS dict and _set_demeaner_backend.
-# Remove once all callers use the typed demeaner= API.
 def demean_within(
     x: NDArray[np.float64],
     flist: NDArray[np.uint32],
@@ -78,6 +212,7 @@ def demean_within(
     krylov_method: str = "cg",
     gmres_restart: int = 30,
     preconditioner_type: str = "additive",
+    preconditioner: WithinPreconditioner | None = None,
 ) -> tuple[NDArray, bool]:
     """Demean an array using the configurable `within` backend."""
     krylov_method, preconditioner_type = _sanitize_krylov_and_preconditioner(
@@ -85,22 +220,38 @@ def demean_within(
         preconditioner_type,
     )
 
-    if flist.ndim == 1 or flist.shape[1] == 1:
+    flist_arr = _prepare_within_flist(flist)
+    if flist_arr.shape[1] == 1:
         return _demean_rs(
             x.astype(np.float64, copy=False),
-            flist.astype(np.uint64, copy=False),
-            weights.astype(np.float64, copy=False),
+            flist_arr.astype(np.uint64, copy=False),
+            _prepare_weights(weights),
             tol,
             maxiter,
         )
 
+    weights_arr = _prepare_weights(weights)
+    x_arr = np.asarray(x, dtype=np.float64)
+    if x_arr.ndim == 1:
+        x_arr = x_arr.reshape((-1, 1))
+
+    preconditioner_handle = None
+    if preconditioner is not None:
+        validate_within_preconditioner(
+            preconditioner,
+            flist_arr,
+            preconditioner_type=preconditioner_type,
+        )
+        preconditioner_handle = preconditioner._preconditioner_handle
+
     return _demean_within_rs(
-        x.astype(np.float64, copy=False),
-        np.asfortranarray(flist, dtype=np.uint32),
-        weights.astype(np.float64, copy=False).reshape(-1),
+        x_arr,
+        flist_arr,
+        weights_arr,
         tol,
         maxiter,
         krylov_method,
         gmres_restart,
         preconditioner_type,
+        preconditioner_handle,
     )
diff --git a/pyfixest/demeaners.py b/pyfixest/demeaners.py
index 1d70faaa0..592098e44 100644
--- a/pyfixest/demeaners.py
+++ b/pyfixest/demeaners.py
@@ -2,7 +2,10 @@
 
 from dataclasses import dataclass
 from numbers import Real
-from typing import ClassVar, Literal
+from typing import TYPE_CHECKING, ClassVar, Literal
+
+if TYPE_CHECKING:
+    from pyfixest.core.demean import WithinPreconditioner
 
 
 def _validate_positive_float(value: float, name: str) -> float:
@@ -67,6 +70,7 @@ class WithinDemeaner(BaseDemeaner):
     krylov_method: str = "cg"
     gmres_restart: int = 30
     preconditioner_type: str = "additive"
+    preconditioner: WithinPreconditioner | None = None
     kind: ClassVar[str] = "within"
 
     def __post_init__(self) -> None:
@@ -88,10 +92,25 @@ def __post_init__(self) -> None:
             raise ValueError(
                 "`preconditioner_type` must be either 'additive' or 'multiplicative'."
             )
-        if preconditioner_type == "multiplicative" and krylov_method != "gmres":
-            raise ValueError("Multiplicative Schwarz requires `krylov_method='gmres'`.")
         object.__setattr__(self, "preconditioner_type", preconditioner_type)
 
+        if self.preconditioner is not None:
+            from pyfixest.core.demean import WithinPreconditioner
+
+            if not isinstance(self.preconditioner, WithinPreconditioner):
+                raise TypeError("`preconditioner` must be a WithinPreconditioner.")
+            object.__setattr__(
+                self,
+                "preconditioner_type",
+                self.preconditioner.preconditioner_type,
+            )
+
+        if (
+            self.preconditioner_type == "multiplicative"
+            and self.krylov_method != "gmres"
+        ):
+            raise ValueError("Multiplicative Schwarz requires `krylov_method='gmres'`.")
+
 
 @dataclass(frozen=True, slots=True)
 class LsmrDemeaner(BaseDemeaner):
diff --git a/pyfixest/estimation/internals/demean_.py b/pyfixest/estimation/internals/demean_.py
index a40a0930e..0472d753d 100644
--- a/pyfixest/estimation/internals/demean_.py
+++ b/pyfixest/estimation/internals/demean_.py
@@ -178,6 +178,7 @@ def dispatch_demean(
             krylov_method=demeaner.krylov_method,
             gmres_restart=demeaner.gmres_restart,
             preconditioner_type=demeaner.preconditioner_type,
+            preconditioner=demeaner.preconditioner,
         )
 
     if isinstance(demeaner, LsmrDemeaner):
diff --git a/src/demean_within.rs b/src/demean_within.rs
index 2c526289d..cce01d4dd 100644
--- a/src/demean_within.rs
+++ b/src/demean_within.rs
@@ -1,8 +1,20 @@
 use ndarray::{Array2, ArrayView1, ArrayView2, ShapeBuilder};
 use numpy::{PyArray2, PyReadonlyArray1, PyReadonlyArray2};
-use pyo3::exceptions::PyValueError;
+use pyo3::exceptions::{PyRuntimeError, PyValueError};
 use pyo3::prelude::*;
 
+#[pyclass(module = "pyfixest.core._core_impl", name = "_WithinPreconditionerHandle")]
+#[derive(Clone)]
+pub struct WithinPreconditionerHandle {
+    inner: within::FePreconditioner,
+}
+
+impl WithinPreconditionerHandle {
+    fn from_inner(inner: within::FePreconditioner) -> Self {
+        Self { inner }
+    }
+}
+
 fn extract_columns(x: &ArrayView2<f64>) -> Vec<Vec<f64>> {
     (0..x.ncols())
         .map(|col| x.column(col).iter().copied().collect())
@@ -42,7 +54,7 @@ fn build_preconditioner_config(preconditioner_type: &str) -> PyResult<within::Pr
             within::ReductionStrategy::Auto,
         )),
         "multiplicative" => Ok(within::Preconditioner::Multiplicative(
-            within::LocalSolverConfig::default(),
+            within::LocalSolverConfig::solver_default(),
         )),
         _ => Err(PyValueError::new_err(
             "preconditioner_type must be either 'additive' or 'multiplicative'.",
@@ -50,21 +62,47 @@ fn build_preconditioner_config(preconditioner_type: &str) -> PyResult<within::Pr
     }
 }
 
-fn validate_solver_preconditioner_combo(
-    params: &within::SolverParams,
-    preconditioner_type: &str,
-) -> PyResult<()> {
-    if matches!(params.krylov, within::KrylovMethod::Cg)
-        && preconditioner_type == "multiplicative"
-    {
-        return Err(PyValueError::new_err(
-            "Multiplicative Schwarz requires `krylov_method='gmres'`.",
-        ));
+fn build_preconditioner_solver_params(preconditioner_type: &str) -> within::SolverParams {
+    match preconditioner_type {
+        "multiplicative" => within::SolverParams {
+            krylov: within::KrylovMethod::Gmres { restart: 30 },
+            ..within::SolverParams::default()
+        },
+        _ => within::SolverParams::default(),
     }
-    Ok(())
 }
 
-fn demean_within_impl(
+fn build_weighted_design(
+    flist: &ArrayView2<u32>,
+    weights: &ArrayView1<f64>,
+) -> PyResult<within::WeightedDesign<within::FactorMajorStore>> {
+    let n_obs = flist.nrows();
+    let n_factors = flist.ncols();
+    let factor_levels: Vec<Vec<u32>> = (0..n_factors)
+        .map(|factor| flist.column(factor).iter().copied().collect())
+        .collect();
+    let weights_vec: Vec<f64> = weights.iter().copied().collect();
+    let store = within::FactorMajorStore::new(
+        factor_levels,
+        within::ObservationWeights::Dense(weights_vec),
+        n_obs,
+    )
+    .map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
+    within::WeightedDesign::from_store(store).map_err(|e| PyRuntimeError::new_err(e.to_string()))
+}
+
+fn solve_result_to_array(
+    result: within::BatchSolveResult,
+    n_obs: usize,
+    n_rhs: usize,
+) -> (Array2<f64>, bool) {
+    let all_converged = result.converged().iter().all(|&c| c);
+    let out = Array2::from_shape_vec((n_obs, n_rhs).f(), result.demeaned_all().to_vec())
+        .expect("within returns one demeaned column per RHS");
+    (out, all_converged)
+}
+
+fn solve_within_impl(
     x: &ArrayView2<f64>,
     flist: &ArrayView2<u32>,
     weights: &ArrayView1<f64>,
@@ -73,31 +111,68 @@ fn demean_within_impl(
     krylov_method: &str,
     gmres_restart: usize,
     preconditioner_type: &str,
+    preconditioner_handle: Option<&WithinPreconditionerHandle>,
 ) -> PyResult<(Array2<f64>, bool)> {
+    let params = build_solver_params(tol, maxiter, krylov_method, gmres_restart)?;
     let n_obs = x.nrows();
     let n_rhs = x.ncols();
-
     let x_columns = extract_columns(x);
     let x_slices: Vec<&[f64]> = x_columns.iter().map(|col| col.as_slice()).collect();
-    let weights_vec: Vec<f64> = weights.iter().copied().collect();
 
-    let params = build_solver_params(tol, maxiter, krylov_method, gmres_restart)?;
-    validate_solver_preconditioner_combo(&params, preconditioner_type)?;
-    let preconditioner = build_preconditioner_config(preconditioner_type)?;
-
-    let result = within::solve_batch(
-        flist.view(),
-        &x_slices,
-        Some(&weights_vec),
-        &params,
-        Some(&preconditioner),
-    )
-    .map_err(|e| pyo3::exceptions::PyRuntimeError::new_err(e.to_string()))?;
+    let result = if let Some(handle) = preconditioner_handle {
+        let design = build_weighted_design(flist, weights)?;
+        let solver = within::Solver::from_design_with_preconditioner(
+            design,
+            &params,
+            handle.inner.clone(),
+        )
+        .map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
+        solver
+            .solve_batch(&x_slices)
+            .map_err(|e| PyRuntimeError::new_err(e.to_string()))?
+    } else {
+        let preconditioner = build_preconditioner_config(preconditioner_type)?;
+        let design = build_weighted_design(flist, weights)?;
+        let solver = within::Solver::from_design(design, &params, Some(&preconditioner))
+            .map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
+        solver
+            .solve_batch(&x_slices)
+            .map_err(|e| PyRuntimeError::new_err(e.to_string()))?
+    };
 
-    let all_converged = result.converged().iter().all(|&c| c);
-    let out = Array2::from_shape_vec((n_obs, n_rhs).f(), result.demeaned_all().to_vec())
-        .expect("within returns one demeaned column per RHS");
-    Ok((out, all_converged))
+    Ok(solve_result_to_array(result, n_obs, n_rhs))
+}
+
+fn build_preconditioner_impl(
+    flist: &ArrayView2<u32>,
+    weights: &ArrayView1<f64>,
+    preconditioner_type: &str,
+) -> PyResult<WithinPreconditionerHandle> {
+    let preconditioner_config = build_preconditioner_config(preconditioner_type)?;
+    let design = build_weighted_design(flist, weights)?;
+    let params = build_preconditioner_solver_params(preconditioner_type);
+    let solver = within::Solver::from_design(design, &params, Some(&preconditioner_config))
+        .map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
+    let built = solver
+        .preconditioner()
+        .cloned()
+        .ok_or_else(|| PyRuntimeError::new_err("solver did not build a preconditioner"))?;
+    Ok(WithinPreconditionerHandle::from_inner(built))
+}
+
+#[pyfunction]
+#[pyo3(signature = (flist, weights, preconditioner_type="additive"))]
+pub fn _build_within_preconditioner_rs(
+    py: Python<'_>,
+    flist: PyReadonlyArray2<u32>,
+    weights: PyReadonlyArray1<f64>,
+    preconditioner_type: &str,
+) -> PyResult<Py<WithinPreconditionerHandle>> {
+    let flist_arr = flist.as_array();
+    let weights_arr = weights.as_array();
+    let handle =
+        py.detach(|| build_preconditioner_impl(&flist_arr, &weights_arr, preconditioner_type))?;
+    Py::new(py, handle)
 }
 
 #[pyfunction]
@@ -109,7 +184,8 @@ fn demean_within_impl(
     maxiter=1_000,
     krylov_method="cg",
     gmres_restart=30,
-    preconditioner_type="additive"
+    preconditioner_type="additive",
+    preconditioner_handle=None
 ))]
 pub fn _demean_within_rs(
     py: Python<'_>,
@@ -121,24 +197,28 @@ pub fn _demean_within_rs(
     krylov_method: &str,
     gmres_restart: usize,
     preconditioner_type: &str,
+    preconditioner_handle: Option<Py<WithinPreconditionerHandle>>,
 ) -> PyResult<(Py<PyArray2<f64>>, bool)> {
     let x_arr = x.as_array();
     let flist_arr = flist.as_array();
     let weights_arr = weights.as_array();
+    let preconditioner_handle = preconditioner_handle
+        .as_ref()
+        .map(|handle| handle.bind(py).borrow().clone());
 
-    let (out, success) = py
-        .detach(|| {
-            demean_within_impl(
-                &x_arr,
-                &flist_arr,
-                &weights_arr,
-                tol,
-                maxiter,
-                krylov_method,
-                gmres_restart,
-                preconditioner_type,
-            )
-        })?;
+    let (out, success) = py.detach(|| {
+        solve_within_impl(
+            &x_arr,
+            &flist_arr,
+            &weights_arr,
+            tol,
+            maxiter,
+            krylov_method,
+            gmres_restart,
+            preconditioner_type,
+            preconditioner_handle.as_ref(),
+        )
+    })?;
 
     let pyarray = PyArray2::from_owned_array(py, out);
     Ok((pyarray.into(), success))
diff --git a/src/lib.rs b/src/lib.rs
index 6f2d2e421..d5ee05a08 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -10,10 +10,14 @@ mod nw;
 
 #[pymodule]
 fn _core_impl(m: &Bound<'_, PyModule>) -> PyResult<()> {
+    m.add_class::<demean_within::WithinPreconditionerHandle>()?;
     m.add_wrapped(wrap_pyfunction!(collinear::_find_collinear_variables_rs))?;
     m.add_wrapped(wrap_pyfunction!(crv1::_crv1_meat_loop_rs))?;
     m.add_wrapped(wrap_pyfunction!(demean::_demean_rs))?;
     m.add_wrapped(wrap_pyfunction!(demean_within::_demean_within_rs))?;
+    m.add_wrapped(wrap_pyfunction!(
+        demean_within::_build_within_preconditioner_rs
+    ))?;
     m.add_wrapped(wrap_pyfunction!(
         nested_fixed_effects::_count_fixef_fully_nested_all_rs
     ))?;
diff --git a/tests/test_demean.py b/tests/test_demean.py
index 130ea0f58..8eadf57b6 100644
--- a/tests/test_demean.py
+++ b/tests/test_demean.py
@@ -1,10 +1,12 @@
+import re
+
 import numpy as np
 import pandas as pd
 import pyhdfe
 import pytest
 
 from pyfixest.core import demean as demean_rs
-from pyfixest.core.demean import demean_within
+from pyfixest.core.demean import build_within_preconditioner, demean_within
 from pyfixest.demeaners import LsmrDemeaner, MapDemeaner
 from pyfixest.estimation.cupy.demean_cupy_ import demean_cupy32, demean_cupy64
 from pyfixest.estimation.internals.demean_ import (
@@ -396,6 +398,48 @@ def test_feols_integration_maxiter():
     assert model is not None
 
 
+def test_demean_within_multiplicative_preconditioner_build_and_reuse():
+    rng = np.random.default_rng(123)
+    n = 400
+
+    x = rng.normal(size=(n, 3))
+    flist = np.column_stack([rng.integers(0, 40, n), rng.integers(0, 25, n)]).astype(
+        np.uint32
+    )
+    weights = rng.uniform(0.5, 1.5, n)
+
+    preconditioner = build_within_preconditioner(
+        flist,
+        weights,
+        preconditioner_type="multiplicative",
+    )
+
+    x_demeaned, success = demean_within(
+        x,
+        flist,
+        weights,
+        krylov_method="gmres",
+        preconditioner_type="multiplicative",
+        preconditioner=preconditioner,
+    )
+
+    assert success
+    assert x_demeaned.shape == x.shape
+
+    with pytest.raises(
+        ValueError,
+        match=re.escape("Multiplicative Schwarz requires `krylov_method='gmres'`."),
+    ):
+        demean_within(
+            x,
+            flist,
+            weights,
+            krylov_method="cg",
+            preconditioner_type="multiplicative",
+            preconditioner=preconditioner,
+        )
+
+
 @pytest.mark.parametrize(
     argnames="demean_func",
     argvalues=[demean_rs, demean_within, demean_cupy32, demean_cupy64],

From ee2abcd4116c62a3f736b4c17ee1da6c2827bd83 Mon Sep 17 00:00:00 2001
From: Alexander Fischer <alexander-fischer1801@t-online.de>
Date: Sun, 29 Mar 2026 16:28:44 +0200
Subject: [PATCH 07/10] cache preconditioner for multiple estimation and return
 preconditioner_ attribute for reuse across model fits

---
 pyfixest/estimation/FixestMulti_.py           |  3 +-
 pyfixest/estimation/internals/demean_.py      | 87 +++++++++++++++----
 pyfixest/estimation/models/fegaussian_.py     |  3 +-
 pyfixest/estimation/models/feglm_.py          |  7 +-
 pyfixest/estimation/models/feiv_.py           |  4 +-
 pyfixest/estimation/models/felogit_.py        |  3 +-
 pyfixest/estimation/models/feols_.py          | 11 ++-
 .../estimation/models/feols_compressed_.py    |  5 +-
 pyfixest/estimation/models/fepois_.py         |  7 +-
 pyfixest/estimation/models/feprobit_.py       |  3 +-
 pyfixest/estimation/quantreg/QuantregMulti.py |  3 +-
 pyfixest/estimation/quantreg/quantreg_.py     |  3 +-
 tests/test_api.py                             | 20 +++++
 tests/test_demean.py                          |  2 +-
 14 files changed, 129 insertions(+), 32 deletions(-)

diff --git a/pyfixest/estimation/FixestMulti_.py b/pyfixest/estimation/FixestMulti_.py
index 371868034..ed6cf795b 100644
--- a/pyfixest/estimation/FixestMulti_.py
+++ b/pyfixest/estimation/FixestMulti_.py
@@ -7,6 +7,7 @@
 
 from pyfixest.estimation.api.utils import _ALL_SAMPLE, _AllSampleSentinel
 from pyfixest.estimation.formula.parse import Formula
+from pyfixest.estimation.internals.demean_ import DemeanedDataCacheEntry
 from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
 from pyfixest.estimation.internals.literals import (
     QuantregMethodOptions,
@@ -314,7 +315,7 @@ def _estimate_all_models(
 
                 # dictionary to cache demeaned data keyed by na_index,
                 # only relevant for `.feols()`
-                lookup_demeaned_data: dict[frozenset[int], pd.DataFrame] = {}
+                lookup_demeaned_data: dict[frozenset[int], DemeanedDataCacheEntry] = {}
 
                 for FixestFormula in fixef_key_models:  # type: ignore
                     # loop over both dictfe and dictfe_iv (if the latter is not None)
diff --git a/pyfixest/estimation/internals/demean_.py b/pyfixest/estimation/internals/demean_.py
index 0472d753d..0a80027c6 100644
--- a/pyfixest/estimation/internals/demean_.py
+++ b/pyfixest/estimation/internals/demean_.py
@@ -1,23 +1,36 @@
 from collections.abc import Callable
+from dataclasses import dataclass, replace
 from importlib import import_module
-from typing import Any, cast
+from typing import cast
 
 import numba as nb
 import numpy as np
 import pandas as pd
 
-from pyfixest.core.demean import demean_within
+from pyfixest.core.demean import (
+    WithinPreconditioner,
+    build_within_preconditioner,
+    demean_within,
+)
 from pyfixest.demeaners import LsmrDemeaner, MapDemeaner, WithinDemeaner
 from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
 from pyfixest.estimation.internals.literals import DemeanerBackendOptions
 
 
+@dataclass(slots=True)
+class DemeanedDataCacheEntry:
+    """Cached demeaned data and any reusable within preconditioner."""
+
+    demeaned: pd.DataFrame
+    preconditioner: WithinPreconditioner | None = None
+
+
 def demean_model(
     Y: pd.DataFrame,
     X: pd.DataFrame,
     fe: pd.DataFrame | None,
     weights: np.ndarray | None,
-    lookup_demeaned_data: dict[frozenset[int], Any],
+    lookup_demeaned_data: dict[frozenset[int], DemeanedDataCacheEntry],
     na_index: frozenset[int],
     demeaner: ResolvedDemeaner,
 ) -> tuple[pd.DataFrame, pd.DataFrame]:
@@ -80,15 +93,8 @@ def demean_model(
         fe_array = fe.to_numpy()
         # check if looked dict has data for na_index
         if lookup_demeaned_data.get(na_index) is not None:
-            # get data out of lookup table: list of [algo, data]
-            value = lookup_demeaned_data.get(na_index)
-            if value is not None:
-                try:
-                    _, YX_demeaned_old = value
-                except ValueError:
-                    print("Error: Expected the value to be iterable with two elements.")
-            else:
-                pass
+            cache_entry = lookup_demeaned_data[na_index]
+            YX_demeaned_old = cache_entry.demeaned
 
             # get not yet demeaned covariates
             var_diff_names = list(set(yx_names) - set(YX_demeaned_old.columns))
@@ -101,11 +107,17 @@ def demean_model(
                 if var_diff.ndim == 1:
                     var_diff = var_diff.reshape(len(var_diff), 1)
 
+                effective_demeaner, _ = _prepare_within_cache_preconditioner(
+                    flist=fe_array,
+                    weights=weights_array,
+                    demeaner=demeaner,
+                    cache_preconditioner=cache_entry.preconditioner,
+                )
                 YX_demean_new, success = dispatch_demean(
                     x=var_diff,
                     flist=fe_array,
                     weights=weights_array,
-                    demeaner=demeaner,
+                    demeaner=effective_demeaner,
                 )
                 if success is False:
                     raise ValueError(
@@ -129,11 +141,16 @@ def demean_model(
                 YX_demeaned = YX_demeaned_old[yx_names]
 
         else:
+            effective_demeaner, preconditioner = _prepare_within_cache_preconditioner(
+                flist=fe_array,
+                weights=weights_array,
+                demeaner=demeaner,
+            )
             YX_demeaned_array, success = dispatch_demean(
                 x=YX_array,
                 flist=fe_array,
                 weights=weights_array,
-                demeaner=demeaner,
+                demeaner=effective_demeaner,
             )
             if success is False:
                 raise ValueError(
@@ -142,8 +159,15 @@ def demean_model(
 
             YX_demeaned = pd.DataFrame(YX_demeaned_array)
             YX_demeaned.columns = yx_names
+            lookup_demeaned_data[na_index] = DemeanedDataCacheEntry(
+                demeaned=YX_demeaned,
+                preconditioner=preconditioner,
+            )
 
-        lookup_demeaned_data[na_index] = [None, YX_demeaned]
+        if na_index not in lookup_demeaned_data:
+            lookup_demeaned_data[na_index] = DemeanedDataCacheEntry(
+                demeaned=YX_demeaned
+            )
 
     else:
         # nothing to demean here
@@ -159,6 +183,39 @@ def demean_model(
     return Yd, Xd
 
 
+def _prepare_within_cache_preconditioner(
+    flist: np.ndarray,
+    weights: np.ndarray,
+    demeaner: ResolvedDemeaner,
+    cache_preconditioner: WithinPreconditioner | None = None,
+) -> tuple[ResolvedDemeaner, WithinPreconditioner | None]:
+    """
+    Prepare the effective demeaner and reusable within cache preconditioner.
+
+    Only `WithinDemeaner` participates in preconditioner reuse. Other demeaners
+    are returned unchanged with no cache preconditioner.
+    """
+    if not isinstance(demeaner, WithinDemeaner):
+        return demeaner, None
+    if demeaner.preconditioner is not None:
+        return demeaner, demeaner.preconditioner
+    if cache_preconditioner is not None:
+        return (
+            replace(demeaner, preconditioner=cache_preconditioner),
+            cache_preconditioner,
+        )
+    flist_uint32 = flist.astype(np.uint32, copy=False)
+    if flist_uint32.ndim == 1 or flist_uint32.shape[1] <= 1:
+        return demeaner, None
+
+    preconditioner = build_within_preconditioner(
+        flist=flist_uint32,
+        weights=weights,
+        preconditioner_type=demeaner.preconditioner_type,
+    )
+    return replace(demeaner, preconditioner=preconditioner), preconditioner
+
+
 def dispatch_demean(
     x: np.ndarray,
     flist: np.ndarray,
diff --git a/pyfixest/estimation/models/fegaussian_.py b/pyfixest/estimation/models/fegaussian_.py
index bd5ae7200..936edc07c 100644
--- a/pyfixest/estimation/models/fegaussian_.py
+++ b/pyfixest/estimation/models/fegaussian_.py
@@ -5,6 +5,7 @@
 import pandas as pd
 
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
+from pyfixest.estimation.internals.demean_ import DemeanedDataCacheEntry
 from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
 from pyfixest.estimation.models.feglm_ import Feglm
 
@@ -24,7 +25,7 @@ def __init__(
         collin_tol: float,
         fixef_tol: float,
         fixef_maxiter: int,
-        lookup_demeaned_data: dict[frozenset[int], pd.DataFrame],
+        lookup_demeaned_data: dict[frozenset[int], DemeanedDataCacheEntry],
         tol: float,
         maxiter: int,
         solver: Literal[
diff --git a/pyfixest/estimation/models/feglm_.py b/pyfixest/estimation/models/feglm_.py
index fcb02da69..0525762b3 100644
--- a/pyfixest/estimation/models/feglm_.py
+++ b/pyfixest/estimation/models/feglm_.py
@@ -9,7 +9,10 @@
     NonConvergenceError,
 )
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
-from pyfixest.estimation.internals.demean_ import dispatch_demean
+from pyfixest.estimation.internals.demean_ import (
+    DemeanedDataCacheEntry,
+    dispatch_demean,
+)
 from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
 from pyfixest.estimation.internals.solvers import solve_ols
 from pyfixest.estimation.models.feols_ import (
@@ -37,7 +40,7 @@ def __init__(
         collin_tol: float,
         fixef_tol: float,
         fixef_maxiter: int,
-        lookup_demeaned_data: dict[frozenset[int], pd.DataFrame],
+        lookup_demeaned_data: dict[frozenset[int], DemeanedDataCacheEntry],
         tol: float,
         maxiter: int,
         solver: Literal[
diff --git a/pyfixest/estimation/models/feiv_.py b/pyfixest/estimation/models/feiv_.py
index 63d8b457e..c06f2a035 100644
--- a/pyfixest/estimation/models/feiv_.py
+++ b/pyfixest/estimation/models/feiv_.py
@@ -7,7 +7,7 @@
 import pandas as pd
 
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
-from pyfixest.estimation.internals.demean_ import demean_model
+from pyfixest.estimation.internals.demean_ import DemeanedDataCacheEntry, demean_model
 from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
 from pyfixest.estimation.internals.solvers import solve_ols
 from pyfixest.estimation.models.feols_ import Feols, _drop_multicollinear_variables
@@ -146,7 +146,7 @@ def __init__(
         collin_tol: float,
         fixef_tol: float,
         fixef_maxiter: int,
-        lookup_demeaned_data: dict[frozenset[int], pd.DataFrame],
+        lookup_demeaned_data: dict[frozenset[int], DemeanedDataCacheEntry],
         solver: Literal[
             "np.linalg.lstsq",
             "np.linalg.solve",
diff --git a/pyfixest/estimation/models/felogit_.py b/pyfixest/estimation/models/felogit_.py
index 6ef55928f..4c4ade930 100644
--- a/pyfixest/estimation/models/felogit_.py
+++ b/pyfixest/estimation/models/felogit_.py
@@ -5,6 +5,7 @@
 import pandas as pd
 
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
+from pyfixest.estimation.internals.demean_ import DemeanedDataCacheEntry
 from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
 from pyfixest.estimation.models.feglm_ import Feglm
 
@@ -24,7 +25,7 @@ def __init__(
         collin_tol: float,
         fixef_tol: float,
         fixef_maxiter: int,
-        lookup_demeaned_data: dict[frozenset[int], pd.DataFrame],
+        lookup_demeaned_data: dict[frozenset[int], DemeanedDataCacheEntry],
         tol: float,
         maxiter: int,
         solver: Literal[
diff --git a/pyfixest/estimation/models/feols_.py b/pyfixest/estimation/models/feols_.py
index 60cb9aab4..be12e9f3f 100644
--- a/pyfixest/estimation/models/feols_.py
+++ b/pyfixest/estimation/models/feols_.py
@@ -13,13 +13,14 @@
 
 from pyfixest.core.collinear import find_collinear_variables
 from pyfixest.core.crv1 import crv1_meat_loop
+from pyfixest.core.demean import WithinPreconditioner
 from pyfixest.core.nested_fixed_effects import count_fixef_fully_nested_all
 from pyfixest.demeaners import MapDemeaner
 from pyfixest.errors import VcovTypeNotSupportedError
 from pyfixest.estimation.api.utils import _ALL_SAMPLE, _AllSampleSentinel
 from pyfixest.estimation.formula import model_matrix as model_matrix_fixest
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
-from pyfixest.estimation.internals.demean_ import demean_model
+from pyfixest.estimation.internals.demean_ import DemeanedDataCacheEntry, demean_model
 from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
 from pyfixest.estimation.internals.literals import (
     PredictionErrorOptions,
@@ -259,7 +260,7 @@ def __init__(
         collin_tol: float,
         fixef_tol: float,
         fixef_maxiter: int,
-        lookup_demeaned_data: dict[frozenset[int], pd.DataFrame],
+        lookup_demeaned_data: dict[frozenset[int], DemeanedDataCacheEntry],
         solver: SolverOptions = "np.linalg.solve",
         demeaner: ResolvedDemeaner | None = None,
         store_data: bool = True,
@@ -308,6 +309,7 @@ def __init__(
             )
         self._demeaner = demeaner
         self._lookup_demeaned_data = lookup_demeaned_data
+        self.preconditioner_: WithinPreconditioner | None = None
         self._store_data = store_data
         self._copy_data = copy_data
         self._lean = lean
@@ -503,8 +505,13 @@ def demean(self):
                 self._na_index,
                 self._demeaner,
             )
+            cache_entry = self._lookup_demeaned_data.get(self._na_index)
+            self.preconditioner_ = (
+                cache_entry.preconditioner if cache_entry is not None else None
+            )
         else:
             self._Yd, self._Xd = self._Y, self._X
+            self.preconditioner_ = None
 
     def to_array(self):
         "Convert estimation data frames to np arrays."
diff --git a/pyfixest/estimation/models/feols_compressed_.py b/pyfixest/estimation/models/feols_compressed_.py
index b744b1c7c..fdc674483 100644
--- a/pyfixest/estimation/models/feols_compressed_.py
+++ b/pyfixest/estimation/models/feols_compressed_.py
@@ -9,6 +9,7 @@
 from tqdm import tqdm
 
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
+from pyfixest.estimation.internals.demean_ import DemeanedDataCacheEntry
 from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
 from pyfixest.estimation.internals.literals import (
     SolverOptions,
@@ -60,7 +61,7 @@ class FeolsCompressed(Feols):
         The tolerance level for the fixed effects.
     fixef_maxiter : int
          The maximum iterations for the demeaning algorithm.
-    lookup_demeaned_data : dict[str, pd.DataFrame]
+    lookup_demeaned_data : dict[str, DemeanedDataCacheEntry]
         The lookup table for demeaned data.
     solver : SolverOptions
         The solver to use.
@@ -99,7 +100,7 @@ def __init__(
         collin_tol: float,
         fixef_tol: float,
         fixef_maxiter: int,
-        lookup_demeaned_data: dict[frozenset[int], pd.DataFrame],
+        lookup_demeaned_data: dict[frozenset[int], DemeanedDataCacheEntry],
         solver: SolverOptions = "np.linalg.solve",
         demeaner: ResolvedDemeaner | None = None,
         store_data: bool = True,
diff --git a/pyfixest/estimation/models/fepois_.py b/pyfixest/estimation/models/fepois_.py
index 109a20ec5..6266ab11e 100644
--- a/pyfixest/estimation/models/fepois_.py
+++ b/pyfixest/estimation/models/fepois_.py
@@ -12,7 +12,10 @@
     NonConvergenceError,
 )
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
-from pyfixest.estimation.internals.demean_ import dispatch_demean
+from pyfixest.estimation.internals.demean_ import (
+    DemeanedDataCacheEntry,
+    dispatch_demean,
+)
 from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
 from pyfixest.estimation.internals.literals import (
     SolverOptions,
@@ -96,7 +99,7 @@ def __init__(
         collin_tol: float,
         fixef_tol: float,
         fixef_maxiter: int,
-        lookup_demeaned_data: dict[frozenset[int], pd.DataFrame],
+        lookup_demeaned_data: dict[frozenset[int], DemeanedDataCacheEntry],
         tol: float,
         maxiter: int,
         solver: SolverOptions = "np.linalg.solve",
diff --git a/pyfixest/estimation/models/feprobit_.py b/pyfixest/estimation/models/feprobit_.py
index 8054fbed1..334a90edb 100644
--- a/pyfixest/estimation/models/feprobit_.py
+++ b/pyfixest/estimation/models/feprobit_.py
@@ -7,6 +7,7 @@
 from scipy.stats import norm
 
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
+from pyfixest.estimation.internals.demean_ import DemeanedDataCacheEntry
 from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
 from pyfixest.estimation.models.feglm_ import Feglm
 
@@ -26,7 +27,7 @@ def __init__(
         collin_tol: float,
         fixef_tol: float,
         fixef_maxiter: int,
-        lookup_demeaned_data: dict[frozenset[int], pd.DataFrame],
+        lookup_demeaned_data: dict[frozenset[int], DemeanedDataCacheEntry],
         tol: float,
         maxiter: int,
         solver: Literal[
diff --git a/pyfixest/estimation/quantreg/QuantregMulti.py b/pyfixest/estimation/quantreg/QuantregMulti.py
index 35081c60e..e06466a29 100644
--- a/pyfixest/estimation/quantreg/QuantregMulti.py
+++ b/pyfixest/estimation/quantreg/QuantregMulti.py
@@ -8,6 +8,7 @@
 from scipy.stats import norm
 
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
+from pyfixest.estimation.internals.demean_ import DemeanedDataCacheEntry
 from pyfixest.estimation.internals.literals import (
     QuantregMethodOptions,
     QuantregMultiOptions,
@@ -34,7 +35,7 @@ def __init__(
         collin_tol: float,
         fixef_tol: float,
         fixef_maxiter: int,
-        lookup_demeaned_data: dict[frozenset[int], pd.DataFrame],
+        lookup_demeaned_data: dict[frozenset[int], DemeanedDataCacheEntry],
         solver: SolverOptions = "np.linalg.solve",
         demeaner_backend: Literal["numba", "jax"] = "numba",
         store_data: bool = True,
diff --git a/pyfixest/estimation/quantreg/quantreg_.py b/pyfixest/estimation/quantreg/quantreg_.py
index 4f6466367..fa8ccab20 100644
--- a/pyfixest/estimation/quantreg/quantreg_.py
+++ b/pyfixest/estimation/quantreg/quantreg_.py
@@ -10,6 +10,7 @@
 from scipy.stats import norm
 
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
+from pyfixest.estimation.internals.demean_ import DemeanedDataCacheEntry
 from pyfixest.estimation.internals.demeaner_options import resolve_demeaner
 from pyfixest.estimation.internals.literals import (
     QuantregMethodOptions,
@@ -38,7 +39,7 @@ def __init__(
         collin_tol: float,
         fixef_tol: float,
         fixef_maxiter: int,
-        lookup_demeaned_data: dict[frozenset[int], pd.DataFrame],
+        lookup_demeaned_data: dict[frozenset[int], DemeanedDataCacheEntry],
         solver: SolverOptions = "np.linalg.solve",
         demeaner_backend: Literal["numba", "jax"] = "numba",
         store_data: bool = True,
diff --git a/tests/test_api.py b/tests/test_api.py
index 9b3702856..e97db7207 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -92,6 +92,26 @@ def test_fepois_args():
     np.testing.assert_allclose(fit4.coef(), fit5.coef(), rtol=1e-12)
 
 
+def test_feols_exposes_and_reuses_within_preconditioner():
+    data = pf.get_data()
+    demeaner = pf.WithinDemeaner(krylov_method="gmres")
+
+    fit1 = pf.feols("Y ~ X1 | f1 + f2", data=data, demeaner=demeaner)
+
+    assert fit1.preconditioner_ is not None
+
+    fit2 = pf.feols(
+        "Y ~ X1 | f1 + f2",
+        data=data,
+        demeaner=pf.WithinDemeaner(
+            krylov_method="gmres",
+            preconditioner=fit1.preconditioner_,
+        ),
+    )
+
+    np.testing.assert_allclose(fit1.coef(), fit2.coef(), rtol=1e-10)
+
+
 def test_lean():
     data = pf.get_data()
     fit = pf.feols("Y ~ X1 + X2 | f1", data=data, lean=True)
diff --git a/tests/test_demean.py b/tests/test_demean.py
index 8eadf57b6..a2dfa50e1 100644
--- a/tests/test_demean.py
+++ b/tests/test_demean.py
@@ -156,7 +156,7 @@ def test_demean_model_with_fixed_effects(benchmark, demeaner):
 
     # Verify results are cached in lookup_dict
     assert frozenset() in lookup_dict
-    cached_data = lookup_dict[frozenset()][1]
+    cached_data = lookup_dict[frozenset()].demeaned
     assert np.allclose(cached_data[Y.columns].values, Yd.values)
     assert np.allclose(cached_data[X.columns].values, Xd.values)
 

From 922a68f1c3beb690507aa75c0d48da1426abafd1 Mon Sep 17 00:00:00 2001
From: Alexander Fischer <alexander-fischer1801@t-online.de>
Date: Sun, 29 Mar 2026 16:42:34 +0200
Subject: [PATCH 08/10] adjust docs

---
 docs/how-to/fixed-effects-solvers.qmd | 178 +++++---------------------
 1 file changed, 35 insertions(+), 143 deletions(-)

diff --git a/docs/how-to/fixed-effects-solvers.qmd b/docs/how-to/fixed-effects-solvers.qmd
index cad7e2a77..e7a700626 100644
--- a/docs/how-to/fixed-effects-solvers.qmd
+++ b/docs/how-to/fixed-effects-solvers.qmd
@@ -1,23 +1,24 @@
 ---
 title: "Choosing and Reusing Fixed-Effects Solvers"
-description: "How to configure fixed-effects demeaners, reuse within solvers and preconditioners, and persist reusable preconditioners across sessions."
+description: "How to configure fixed-effects demeaners and reuse within preconditioners in the current Python session."
 categories: [Fixed Effects, Performance]
 ---
 
 ```{python}
-import pickle
-import tempfile
-from pathlib import Path
-
 import numpy as np
 import pandas as pd
 
 import pyfixest as pf
 ```
 
-`PyFixest` now accepts object-based demeaner configurations. This makes it easier to choose a backend, tune solver settings, and pass reusable solver objects between regressions.
+`PyFixest` now accepts object-based demeaner configurations. This makes it easier
+to choose a backend, tune solver settings, and, for the `within` backend,
+reuse a pre-built fixed-effects preconditioner across regressions. As a good chunk of
+the computational cost of multi-fixed-effect problems is in the preconditioner setup, this can lead to a large performance increase when estimating several related regression models.
 
-If you are deciding whether a fixed-effects problem is likely to be easy or hard for alternating-projections methods, see [When Are Fixed Effects Problems Difficult?](../explanation/difficult-fixed-effects.md).
+If you are deciding whether a fixed-effects problem is likely to be easy or hard
+for alternating-projections methods, see
+[When Are Fixed Effects Problems Difficult?](../explanation/difficult-fixed-effects.md).
 
 ## Choosing a Demeaner
 
@@ -25,25 +26,11 @@ The main fixed-effects options are:
 
 | Demeaner | Main idea | Typical use |
 |---|---|---|
-| `pf.NumbaDemeaner(...)` | CPU alternating projections | good default on CPU |
-| `pf.WithinDemeaner(...)` | Krylov solver with Schwarz preconditioning via `within` | hard FE problems and reuse workflows |
-| `pf.ScipyDemeaner(...)` | SciPy sparse iterative solver | CPU sparse linear algebra workflows |
-| `pf.CupyDemeaner(...)` / `pf.JaxDemeaner(...)` | GPU-based demeaning | large GPU workloads |
-
-The `within` backend is the only one that currently exposes reusable `solver` and `preconditioner` objects.
-
-## Solver vs Preconditioner
-
-The two reusable objects serve different purposes:
+| `pf.MapDemeaner(...)` | alternating projections on CPU or JAX | good for "dense" fixed effect networks |
+| `pf.WithinDemeaner(...)` | Krylov solver with Schwarz preconditioning via `within` | tailored to sparse FE problems and reuse workflows |
+| `pf.LsmrDemeaner(...)` | sparse LSMR-based demeaning on CPU or GPU | tba |
 
-| Object | Stores | Reuse rule | Main benefit | Main risk |
-|---|---|---|---|---|
-| `solver` | the full demeaning problem | reuse only on the same sample, with the same weights, fixed-effects encoding, and solver settings | skips the expensive setup entirely | exact-match only |
-| `preconditioner` | only the acceleration object | can be reused more loosely and may remain useful even when slightly stale | avoids rebuilding the preconditioner from scratch | stale reuse may need more iterations |
-
-In other words, a `solver` is tied to one specific demeaning problem. A `preconditioner` is only an accelerator for the current problem. That is why a stale preconditioner can still be useful on non-identical, but related problems, while a stale solver should not be reused.
-
-The expensive part of the `within` backend is the initial setup. Before the iterative solve even starts, `within` has to build a high-quality preconditioner for the fixed-effects problem. That setup cost is exactly what solver and preconditioner reuse are designed to avoid.
+Only `pf.WithinDemeaner(...)` currently supports reusable preconditioners.
 
 ## Example Data
 
@@ -72,10 +59,10 @@ When a typed `demeaner=` is provided, it is authoritative: its backend,
 `demeaner_backend`, `fixef_tol`, and `fixef_maxiter` arguments.
 
 ```{python}
-fit_numba = pf.feols(
+fit_map = pf.feols(
     fml_y1,
     data=panel,
-    demeaner=pf.NumbaDemeaner(fixef_maxiter=10_000),
+    demeaner=pf.MapDemeaner(backend="rust", fixef_maxiter=10_000),
 )
 
 fit_within = pf.feols(
@@ -90,20 +77,20 @@ fit_within = pf.feols(
 
 pd.concat(
     {
-        "numba": fit_numba.coef(),
+        "map": fit_map.coef(),
         "within": fit_within.coef(),
     },
     axis=1,
 )
 ```
 
-## Reuse a Solver Within the Same Session
+## Reuse a Within Preconditioner
 
-For in-process reuse, prefer a `solver`. This is the fast path: the expensive setup is done once, and later solves borrow the stored solver directly.
+The expensive part of the `within` backend is the preconditioner setup. For
+multi-fixed-effect problems, `PyFixest` caches that object during demeaning and
+exposes it on `feols` fits as `fit.preconditioner_`.
 
-Why is the first run expensive? Because `within` has to build the preconditioner for the current worker-firm problem. Once that has been done, repeated solves on the exact same problem are much cheaper.
-
-You can reuse a solver exposed by a fitted model:
+You can then pass that preconditioner into a later `WithinDemeaner(...)`:
 
 ```{python}
 fit1 = pf.feols(
@@ -112,129 +99,34 @@ fit1 = pf.feols(
     demeaner=pf.WithinDemeaner(),
 )
 
-fit2 = pf.feols(
-    fml_y2,
-    data=panel,
-    demeaner=pf.WithinDemeaner(solver=fit1.solver_),
-)
-
-fit2.coef()
-```
-
-You can also build a reusable solver explicitly:
-
-```{python}
-solver = pf.get_solver(
-    fml_y1,
-    data=panel,
-    demeaner=pf.WithinDemeaner(preconditioner_type="additive"),
-)
-
-fit3 = pf.feols(
-    fml_y2,
-    data=panel,
-    demeaner=pf.WithinDemeaner(solver=solver),
-)
-
-fit3.coef()
+fit1.preconditioner_
 ```
 
-Use a solver when the sample, weights, and fixed-effects encoding are the same. Solver reuse is intentionally strict.
-
-More concretely, solver reuse is appropriate only when:
-
-- the same rows are used
-- the same observation weights are used
-- the same fixed-effects structure and encoding are used
-- the same solver configuration is used
-
-This strictness is what makes the solver path fast: `PyFixest` can borrow the stored solver directly instead of rebuilding anything. No fresh solver object needs to be constructed, and no new preconditioner needs to be built.
-
-## Reuse a Preconditioner Across Regressions
-
-If you want a more portable object, extract a `preconditioner` from a solver and pass that into a later regression:
-
 ```{python}
-preconditioner_from_fit = fit1.solver_.to_preconditioner()
-
-fit4 = pf.feols(
-    fml_y2,
-    data=panel,
-    demeaner=pf.WithinDemeaner(preconditioner=preconditioner_from_fit),
-)
-
-fit4.coef()
-```
-
-Compared with a solver, a preconditioner is a lower-level object. It only affects convergence speed, not the target problem itself.
-
-This is the key tradeoff:
-
-- a solver must match the current problem exactly
-- a preconditioner can be stale and still be useful
-
-In the preconditioner case, `PyFixest` builds a fresh solver for the current regression and uses the stored preconditioner only to accelerate convergence. So preconditioner reuse still has some setup cost, but it avoids the most expensive part: building a new preconditioner from scratch.
-
-If the sample or weights changed a bit, the preconditioner may still help, but it will usually be less effective and the Krylov solver may need more iterations.
-
-So if you expect to rerun the exact same demeaning problem, use a `solver`. If you expect slightly different samples or want something portable across sessions, use a `preconditioner`.
-
-## Build a Preconditioner Explicitly
-
-You can also construct a reusable preconditioner directly:
-
-```{python}
-preconditioner = pf.get_preconditioner(
-    fml_y1,
-    data=panel,
-    demeaner=pf.WithinDemeaner(preconditioner_type="additive"),
-)
-
-fit5 = pf.feols(
+fit2 = pf.feols(
     fml_y2,
     data=panel,
-    demeaner=pf.WithinDemeaner(preconditioner=preconditioner),
+    demeaner=pf.WithinDemeaner(preconditioner=fit1.preconditioner_),
 )
 
-fit5.coef()
+fit2.coef()
 ```
 
-This is the preferred API when you want a reusable object for persistence or for advanced reuse workflows.
-
-## Persist a Preconditioner Across Sessions
+## When Does `preconditioner_` Exist?
 
-Preconditioners are pickleable, so you can save them and reload them in a later session:
-
-```{python}
-with tempfile.TemporaryDirectory() as tmpdir:
-    path = Path(tmpdir) / "within_preconditioner.pkl"
+`fit.preconditioner_` is currently exposed on `feols()` fits that:
 
-    with path.open("wb") as file:
-        pickle.dump(preconditioner, file)
+- use `pf.WithinDemeaner(...)`
+- have at least two fixed-effects dimensions
+- successfully complete demeaning
 
-    with path.open("rb") as file:
-        restored_preconditioner = pickle.load(file)
-
-    print(path)
-
-fit6 = pf.feols(
-    fml_y2,
-    data=panel,
-    demeaner=pf.WithinDemeaner(preconditioner=restored_preconditioner),
-)
-
-fit6.coef()
-```
+In simpler cases, such as no fixed effects, a single fixed effect, or a
+non-`within` demeaner, `fit.preconditioner_` is `None`.
 
-In practice, replace the temporary file with a durable path.
+## Using stale Preconditioners
 
-## Which Reuse Object Should I Use?
+A preconditioner is defined for a given sample, fixed effects structure, and weights. For regression with identical structure in these three dimensions, the cached preconditioner will be exactly valid - if you were to recompute the preconditioner instead of using the cached one, you would get exactly the same object.
 
-- Use `solver` for repeated estimation in the same Python session.
-- Use `preconditioner` when you want a portable, pickleable object.
-- Use `solver` only for exact-match reuse.
-- Use `preconditioner` when reuse may be slightly stale, for example after small sample changes.
-- Use `fit.solver_.to_preconditioner()` when you want to turn a fitted model's solver into a persistence artifact.
-- Use `pf.get_preconditioner()` when you want to build the persistence artifact explicitly.
+If you change the sample, fixed effects structure, or weights, the cached preconditioner will turn "stale". In some cases, a stale preconditioner may still be a good approximation. Even with a stale preconditioner, the Kyrlov solver will converge - but the more the new regression deviation deviates from the original problem, the less the preconditioner will help.
 
-Reusable solvers are not supported for `feglm()` and `fepois()`, because those estimators update observation weights across iterations. Reusable preconditioners can still be used there.
+For example, suppose that you fit a regression a slightly different sample, but with the same fixed effects and weights. The cached preconditioner may still be a good approximation to the new problem, and using it may lead to faster convergence than starting from scratch. If the sample changes too drastically, or if the fixed effects structure changes, or your new regression even drops levels from the fixed effects, the cached preconditioner may be a poor approximation and lead to slower convergence than creating a new preconditioner.

From 014e37748c6337ed57da983b6197daf0361fb3d3 Mon Sep 17 00:00:00 2001
From: Alexander Fischer <alexander-fischer1801@t-online.de>
Date: Sun, 29 Mar 2026 17:49:35 +0200
Subject: [PATCH 09/10] compute preconditioner twice for IWLS: once in the
 beginning, then when inner tolerance is tighened. in addition, add inner
 tolerance tighening to fepois

---
 pyfixest/estimation/internals/demean_.py | 43 +++++++++-----
 pyfixest/estimation/models/feglm_.py     | 45 ++++++++++----
 pyfixest/estimation/models/fepois_.py    | 38 ++++++++++--
 tests/test_api.py                        | 68 +++++++++++++++++++++
 tests/test_demean.py                     | 75 +++++++++++++++++++++++-
 5 files changed, 234 insertions(+), 35 deletions(-)

diff --git a/pyfixest/estimation/internals/demean_.py b/pyfixest/estimation/internals/demean_.py
index 0a80027c6..5cae3e046 100644
--- a/pyfixest/estimation/internals/demean_.py
+++ b/pyfixest/estimation/internals/demean_.py
@@ -107,11 +107,11 @@ def demean_model(
                 if var_diff.ndim == 1:
                     var_diff = var_diff.reshape(len(var_diff), 1)
 
-                effective_demeaner, _ = _prepare_within_cache_preconditioner(
+                effective_demeaner, _ = _prepare_within_preconditioner(
                     flist=fe_array,
                     weights=weights_array,
                     demeaner=demeaner,
-                    cache_preconditioner=cache_entry.preconditioner,
+                    preconditioner=cache_entry.preconditioner,
                 )
                 YX_demean_new, success = dispatch_demean(
                     x=var_diff,
@@ -141,7 +141,7 @@ def demean_model(
                 YX_demeaned = YX_demeaned_old[yx_names]
 
         else:
-            effective_demeaner, preconditioner = _prepare_within_cache_preconditioner(
+            effective_demeaner, preconditioner = _prepare_within_preconditioner(
                 flist=fe_array,
                 weights=weights_array,
                 demeaner=demeaner,
@@ -183,37 +183,48 @@ def demean_model(
     return Yd, Xd
 
 
-def _prepare_within_cache_preconditioner(
+def _prepare_within_preconditioner(
     flist: np.ndarray,
     weights: np.ndarray,
     demeaner: ResolvedDemeaner,
-    cache_preconditioner: WithinPreconditioner | None = None,
+    preconditioner: WithinPreconditioner | None = None,
+    *,
+    refresh_preconditioner: bool = False,
 ) -> tuple[ResolvedDemeaner, WithinPreconditioner | None]:
     """
-    Prepare the effective demeaner and reusable within cache preconditioner.
+    Prepare the effective demeaner and reusable within preconditioner.
 
     Only `WithinDemeaner` participates in preconditioner reuse. Other demeaners
-    are returned unchanged with no cache preconditioner.
+    are returned unchanged with no reusable preconditioner.
     """
     if not isinstance(demeaner, WithinDemeaner):
         return demeaner, None
-    if demeaner.preconditioner is not None:
-        return demeaner, demeaner.preconditioner
-    if cache_preconditioner is not None:
-        return (
-            replace(demeaner, preconditioner=cache_preconditioner),
-            cache_preconditioner,
-        )
     flist_uint32 = flist.astype(np.uint32, copy=False)
     if flist_uint32.ndim == 1 or flist_uint32.shape[1] <= 1:
         return demeaner, None
 
-    preconditioner = build_within_preconditioner(
+    if demeaner.preconditioner is not None and not refresh_preconditioner:
+        return demeaner, demeaner.preconditioner
+    if preconditioner is not None and not refresh_preconditioner:
+        return replace(demeaner, preconditioner=preconditioner), preconditioner
+
+    built_preconditioner = build_within_preconditioner(
         flist=flist_uint32,
         weights=weights,
         preconditioner_type=demeaner.preconditioner_type,
     )
-    return replace(demeaner, preconditioner=preconditioner), preconditioner
+    return replace(demeaner, preconditioner=built_preconditioner), built_preconditioner
+
+
+def _override_demeaner_tol(
+    demeaner: ResolvedDemeaner,
+    *,
+    tol: float | None = None,
+) -> ResolvedDemeaner:
+    """Override FE tolerance on a typed demeaner when needed. Used for IWLS acceleration."""
+    if tol is None or tol == demeaner.fixef_tol:
+        return demeaner
+    return replace(demeaner, fixef_tol=tol)
 
 
 def dispatch_demean(
diff --git a/pyfixest/estimation/models/feglm_.py b/pyfixest/estimation/models/feglm_.py
index 0525762b3..038e7d6ef 100644
--- a/pyfixest/estimation/models/feglm_.py
+++ b/pyfixest/estimation/models/feglm_.py
@@ -5,12 +5,15 @@
 import numpy as np
 import pandas as pd
 
+from pyfixest.core.demean import WithinPreconditioner
 from pyfixest.errors import (
     NonConvergenceError,
 )
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
 from pyfixest.estimation.internals.demean_ import (
     DemeanedDataCacheEntry,
+    _override_demeaner_tol,
+    _prepare_within_preconditioner,
     dispatch_demean,
 )
 from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
@@ -105,6 +108,8 @@ def __init__(
         self._Xbeta = np.empty(0)
 
         self._method = "feglm"
+        self._iwls_preconditioner: WithinPreconditioner | None = None
+        self._iwls_tightening_refresh_done = False
 
     def prepare_model_matrix(self):
         "Prepare model inputs for estimation."
@@ -175,8 +180,11 @@ def get_fit(self):
         X_tilde_prev = None
         accelerate = self._accelerate and self._fe is not None
         inner_tol = self._fixef_tol
+        self._iwls_preconditioner = None
+        self._iwls_tightening_refresh_done = False
 
         for r in range(self.maxiter):
+            refresh_preconditioner = False
             if r > 0:
                 rel_deviance_change = self._get_relative_deviance_change(
                     deviance, deviance_old
@@ -195,6 +203,9 @@ def get_fit(self):
                 # Adaptive tolerance as in ppmlhdfe
                 if accelerate and rel_deviance_change < 10 * inner_tol:
                     inner_tol = inner_tol / 10
+                    if not self._iwls_tightening_refresh_done:
+                        refresh_preconditioner = True
+                        self._iwls_tightening_refresh_done = True
 
             gprime = self._get_gprime(mu=mu)
             W = self._update_W(mu=mu)
@@ -214,6 +225,7 @@ def get_fit(self):
                 X=X_input,
                 flist=self._fe,
                 weights=W.flatten(),
+                refresh_preconditioner=refresh_preconditioner,
                 tol=inner_tol,
                 maxiter=self._fixef_maxiter,
             )
@@ -404,23 +416,34 @@ def residualize(
         X: np.ndarray,
         flist: np.ndarray,
         weights: np.ndarray,
+        refresh_preconditioner: bool,
         tol: float,
         maxiter: int,
     ) -> tuple[np.ndarray, np.ndarray]:
         "Residualize v and X by flist using weights."
         if flist is None:
             return v, X
-        else:
-            vX_tilde, success = dispatch_demean(
-                x=np.c_[v, X],
-                flist=flist,
-                weights=weights,
-                demeaner=self._demeaner,
-            )
-            if success is False:
-                raise ValueError(f"Demeaning failed after {maxiter} iterations.")
-            else:
-                return vX_tilde[:, 0], vX_tilde[:, 1:]
+
+        iwls_weights = weights.flatten()
+        effective_demeaner, self._iwls_preconditioner = _prepare_within_preconditioner(
+            flist=flist,
+            weights=iwls_weights,
+            demeaner=self._demeaner,
+            preconditioner=self._iwls_preconditioner,
+            refresh_preconditioner=refresh_preconditioner,
+        )
+        effective_demeaner = _override_demeaner_tol(effective_demeaner, tol=tol)
+
+        vX_tilde, success = dispatch_demean(
+            x=np.c_[v, X],
+            flist=flist,
+            weights=iwls_weights,
+            demeaner=effective_demeaner,
+        )
+
+        if success is False:
+            raise ValueError(f"Demeaning failed after {maxiter} iterations.")
+        return vX_tilde[:, 0], vX_tilde[:, 1:]
 
     def _check_convergence(
         self,
diff --git a/pyfixest/estimation/models/fepois_.py b/pyfixest/estimation/models/fepois_.py
index 6266ab11e..1e37ebc41 100644
--- a/pyfixest/estimation/models/fepois_.py
+++ b/pyfixest/estimation/models/fepois_.py
@@ -8,12 +8,15 @@
 import pandas as pd
 from scipy.special import gammaln
 
+from pyfixest.core.demean import WithinPreconditioner
 from pyfixest.errors import (
     NonConvergenceError,
 )
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
 from pyfixest.estimation.internals.demean_ import (
     DemeanedDataCacheEntry,
+    _override_demeaner_tol,
+    _prepare_within_preconditioner,
     dispatch_demean,
 )
 from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
@@ -155,6 +158,7 @@ def __init__(
         self._Y_hat_response = np.array([])
         self.deviance = None
         self._Xbeta = np.array([])
+        self._iwls_preconditioner: WithinPreconditioner | None = None
 
     def prepare_model_matrix(self):
         "Prepare model inputs for estimation."
@@ -273,11 +277,15 @@ def get_fit(self) -> None:
             algorithm).
         """
         self.to_array()
+        self._iwls_preconditioner = None
+        inner_tol = self._fixef_tol
+        tightening_refresh_done = False
 
         stop_iterating = False
         crit = 1
 
         for i in range(self.maxiter):
+            refresh_preconditioner = False
             if stop_iterating:
                 self.convergence = True
                 break
@@ -301,26 +309,44 @@ def get_fit(self) -> None:
                 Z = eta + self._Y / mu - 1  # eq (8)
                 reg_Z = Z.copy()  # eq (9)
 
-            # tighten HDFE tolerance - currently not possible with PyHDFE
-            # if crit < 10 * inner_tol:
-            #    inner_tol = inner_tol / 10
+            if crit < 10 * inner_tol:
+                inner_tol = inner_tol / 10
+                if not tightening_refresh_done:
+                    refresh_preconditioner = True
+                    tightening_refresh_done = True
 
             # Step 1: weighted demeaning
             ZX = np.concatenate([reg_Z, self._X], axis=1)
 
             combined_weights = self._weights * mu
+            iwls_weights = combined_weights.flatten()
 
             if self._fe is None:
                 ZX_resid = ZX
             else:
+                effective_demeaner, self._iwls_preconditioner = (
+                    _prepare_within_preconditioner(
+                        flist=self._fe,
+                        weights=iwls_weights,
+                        demeaner=self._demeaner,
+                        preconditioner=self._iwls_preconditioner,
+                        refresh_preconditioner=refresh_preconditioner,
+                    )
+                )
+                effective_demeaner = _override_demeaner_tol(
+                    effective_demeaner,
+                    tol=inner_tol,
+                )
                 ZX_resid, success = dispatch_demean(
                     x=ZX,
                     flist=self._fe,
-                    weights=combined_weights.flatten(),
-                    demeaner=self._demeaner,
+                    weights=iwls_weights,
+                    demeaner=effective_demeaner,
                 )
                 if success is False:
-                    raise ValueError("Demeaning failed after 100_000 iterations.")
+                    raise ValueError(
+                        f"Demeaning failed after {self._fixef_maxiter} iterations."
+                    )
 
             Z_resid = ZX_resid[:, 0].reshape((self._N, 1))  # z_resid
             X_resid = ZX_resid[:, 1:]  # x_resid
diff --git a/tests/test_api.py b/tests/test_api.py
index e97db7207..58ed29038 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -5,6 +5,7 @@
 from formulaic.errors import FactorEvaluationError
 
 import pyfixest as pf
+import pyfixest.estimation.models.fepois_ as fepois_model
 from pyfixest.utils.utils import get_data
 
 
@@ -112,6 +113,73 @@ def test_feols_exposes_and_reuses_within_preconditioner():
     np.testing.assert_allclose(fit1.coef(), fit2.coef(), rtol=1e-10)
 
 
+def test_feglm_runs_with_within_demeaner():
+    data = pf.get_data().copy()
+    data["Y"] = (data["Y"] > 0).astype(int)
+
+    fit = pf.feglm(
+        "Y ~ X1 | f1 + f2",
+        data=data,
+        family="logit",
+        demeaner=pf.WithinDemeaner(),
+    )
+
+    assert np.isfinite(fit.coef().to_numpy()).all()
+
+
+def test_fepois_runs_with_within_demeaner():
+    data = pf.get_data(model="Fepois")
+
+    fit = pf.fepois(
+        "Y ~ X1 | f1 + f2",
+        data=data,
+        demeaner=pf.WithinDemeaner(),
+    )
+
+    assert np.isfinite(fit.coef().to_numpy()).all()
+
+
+def test_fepois_tightens_inner_tol_for_within_demeaner(monkeypatch):
+    data = pf.get_data(model="Fepois")
+    initial_inner_tol = 1e-2
+    tol_history: list[float] = []
+    refresh_history: list[bool] = []
+
+    original_prepare = fepois_model._prepare_within_preconditioner
+    original_override = fepois_model._override_demeaner_tol
+
+    def wrapped_prepare(*args, **kwargs):
+        refresh_history.append(kwargs.get("refresh_preconditioner", False))
+        return original_prepare(*args, **kwargs)
+
+    def wrapped_override(demeaner, *, tol=None):
+        if tol is not None:
+            tol_history.append(tol)
+        return original_override(demeaner, tol=tol)
+
+    monkeypatch.setattr(
+        fepois_model,
+        "_prepare_within_preconditioner",
+        wrapped_prepare,
+    )
+    monkeypatch.setattr(
+        fepois_model,
+        "_override_demeaner_tol",
+        wrapped_override,
+    )
+
+    fit = pf.fepois(
+        "Y ~ X1 | f1 + f2",
+        data=data,
+        demeaner=pf.WithinDemeaner(fixef_tol=initial_inner_tol),
+    )
+
+    assert np.isfinite(fit.coef().to_numpy()).all()
+    assert tol_history
+    assert min(tol_history) < initial_inner_tol
+    assert any(refresh_history)
+
+
 def test_lean():
     data = pf.get_data()
     fit = pf.feols("Y ~ X1 + X2 | f1", data=data, lean=True)
diff --git a/tests/test_demean.py b/tests/test_demean.py
index a2dfa50e1..f8bcc6638 100644
--- a/tests/test_demean.py
+++ b/tests/test_demean.py
@@ -6,10 +6,15 @@
 import pytest
 
 from pyfixest.core import demean as demean_rs
-from pyfixest.core.demean import build_within_preconditioner, demean_within
-from pyfixest.demeaners import LsmrDemeaner, MapDemeaner
+from pyfixest.core.demean import (
+    WithinPreconditioner,
+    build_within_preconditioner,
+    demean_within,
+)
+from pyfixest.demeaners import LsmrDemeaner, MapDemeaner, WithinDemeaner
 from pyfixest.estimation.cupy.demean_cupy_ import demean_cupy32, demean_cupy64
 from pyfixest.estimation.internals.demean_ import (
+    _prepare_within_preconditioner,
     _set_demeaner_backend,
     demean,
     demean_model,
@@ -440,6 +445,72 @@ def test_demean_within_multiplicative_preconditioner_build_and_reuse():
         )
 
 
+def test_prepare_within_preconditioner_builds_once_and_refreshes_on_request(
+    monkeypatch,
+):
+    rng = np.random.default_rng(321)
+    n = 200
+    flist = np.column_stack([rng.integers(0, 30, n), rng.integers(0, 20, n)]).astype(
+        np.uint32
+    )
+    demeaner = WithinDemeaner(krylov_method="gmres")
+
+    build_calls: list[np.ndarray] = []
+
+    def fake_build(
+        flist: np.ndarray,
+        weights: np.ndarray,
+        preconditioner_type: str,
+    ) -> WithinPreconditioner:
+        build_calls.append(weights.copy())
+        return WithinPreconditioner(
+            n_obs=flist.shape[0],
+            n_factors=flist.shape[1],
+            factor_cardinalities=tuple((flist.max(axis=0) + 1).tolist()),
+            preconditioner_type=preconditioner_type,
+            _preconditioner_handle=object(),
+        )
+
+    monkeypatch.setattr(
+        "pyfixest.estimation.internals.demean_.build_within_preconditioner",
+        fake_build,
+    )
+
+    weights0 = np.ones(n)
+    effective_demeaner, preconditioner = _prepare_within_preconditioner(
+        flist=flist,
+        weights=weights0,
+        demeaner=demeaner,
+        preconditioner=None,
+    )
+
+    assert len(build_calls) == 1
+    assert effective_demeaner.preconditioner is preconditioner
+
+    effective_demeaner, reused_preconditioner = _prepare_within_preconditioner(
+        flist=flist,
+        weights=weights0 * 1.01,
+        demeaner=demeaner,
+        preconditioner=preconditioner,
+    )
+
+    assert len(build_calls) == 1
+    assert effective_demeaner.preconditioner is reused_preconditioner
+    assert reused_preconditioner is preconditioner
+
+    effective_demeaner, refreshed_preconditioner = _prepare_within_preconditioner(
+        flist=flist,
+        weights=weights0 * 1.01,
+        demeaner=demeaner,
+        preconditioner=preconditioner,
+        refresh_preconditioner=True,
+    )
+
+    assert len(build_calls) == 2
+    assert effective_demeaner.preconditioner is refreshed_preconditioner
+    assert refreshed_preconditioner is not preconditioner
+
+
 @pytest.mark.parametrize(
     argnames="demean_func",
     argvalues=[demean_rs, demean_within, demean_cupy32, demean_cupy64],

From d73d03420c1171f2ea878c7fe582f7d016cd14e0 Mon Sep 17 00:00:00 2001
From: Alexander Fischer <alexander-fischer1801@t-online.de>
Date: Sun, 29 Mar 2026 18:10:16 +0200
Subject: [PATCH 10/10] allow to pickle preconditioners

---
 Cargo.lock                            |  1 +
 Cargo.toml                            |  1 +
 docs/how-to/fixed-effects-solvers.qmd | 30 +++++++++++++-
 pyfixest/core/_core_impl.pyi          |  6 +++
 pyfixest/core/demean.py               | 56 +++++++++++++++++++++++++
 src/demean_within.rs                  | 23 ++++++++++
 src/lib.rs                            |  6 +++
 tests/test_api.py                     | 21 ++++++++++
 tests/test_demean.py                  | 60 +++++++++++++++++++++++++++
 9 files changed, 203 insertions(+), 1 deletion(-)

diff --git a/Cargo.lock b/Cargo.lock
index da6fd2c87..6a1129389 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1120,6 +1120,7 @@ version = "0.50.0"
 dependencies = [
  "ndarray",
  "numpy",
+ "postcard",
  "pyo3",
  "rayon",
  "thiserror 2.0.17",
diff --git a/Cargo.toml b/Cargo.toml
index 2dfbc638a..f3fae2b11 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -15,6 +15,7 @@ rayon   = "1.11.0"
 numpy = "0.26.0"
 thiserror = "2.0.16"
 within = "0.1.0"
+postcard = { version = "1", features = ["use-std"] }
 
 [profile.release]
 opt-level = 3        # Maximize performance
diff --git a/docs/how-to/fixed-effects-solvers.qmd b/docs/how-to/fixed-effects-solvers.qmd
index e7a700626..ea06178d2 100644
--- a/docs/how-to/fixed-effects-solvers.qmd
+++ b/docs/how-to/fixed-effects-solvers.qmd
@@ -1,12 +1,13 @@
 ---
 title: "Choosing and Reusing Fixed-Effects Solvers"
-description: "How to configure fixed-effects demeaners and reuse within preconditioners in the current Python session."
+description: "How to configure fixed-effects demeaners, reuse within preconditioners, and persist them across sessions."
 categories: [Fixed Effects, Performance]
 ---
 
 ```{python}
 import numpy as np
 import pandas as pd
+import pickle
 
 import pyfixest as pf
 ```
@@ -123,6 +124,33 @@ fit2.coef()
 In simpler cases, such as no fixed effects, a single fixed effect, or a
 non-`within` demeaner, `fit.preconditioner_` is `None`.
 
+## Persist a Preconditioner Across Sessions
+
+It is possible to persist a preconditioner across Python sessions, which can be useful if you have a costly preconditioner setup and want to reuse it later without recomputing.
+
+The easiest path is via standard `pickle`:
+
+```{python}
+fit1 = pf.feols(
+    fml_y1,
+    data=panel,
+    demeaner=pf.WithinDemeaner(),
+)
+
+payload = pickle.dumps(fit1.preconditioner_)
+restored_preconditioner = pickle.loads(payload)
+
+fit2 = pf.feols(
+    fml_y2,
+    data=panel,
+    demeaner=pf.WithinDemeaner(preconditioner=restored_preconditioner),
+)
+
+fit2.coef()
+```
+
+Please make sure to only load persisted preconditioners from trusted sources. Like any Python pickle-based workflow, untrusted payloads are unsafe to deserialize.
+
 ## Using stale Preconditioners
 
 A preconditioner is defined for a given sample, fixed effects structure, and weights. For regression with identical structure in these three dimensions, the cached preconditioner will be exactly valid - if you were to recompute the preconditioner instead of using the cached one, you would get exactly the same object.
diff --git a/pyfixest/core/_core_impl.pyi b/pyfixest/core/_core_impl.pyi
index fc78a7fce..af0291009 100644
--- a/pyfixest/core/_core_impl.pyi
+++ b/pyfixest/core/_core_impl.pyi
@@ -32,6 +32,12 @@ def _build_within_preconditioner_rs(
     weights: NDArray[np.float64],
     preconditioner_type: str = "additive",
 ) -> _WithinPreconditionerHandle: ...
+def _serialize_within_preconditioner_rs(
+    preconditioner_handle: _WithinPreconditionerHandle,
+) -> bytes: ...
+def _deserialize_within_preconditioner_rs(
+    data: bytes,
+) -> _WithinPreconditionerHandle: ...
 def _count_fixef_fully_nested_all_rs(
     all_fixef_array: NDArray,
     cluster_colnames: NDArray,
diff --git a/pyfixest/core/demean.py b/pyfixest/core/demean.py
index 9957f7d96..50703fed6 100644
--- a/pyfixest/core/demean.py
+++ b/pyfixest/core/demean.py
@@ -11,6 +11,8 @@
     _build_within_preconditioner_rs,
     _demean_rs,
     _demean_within_rs,
+    _deserialize_within_preconditioner_rs,
+    _serialize_within_preconditioner_rs,
 )
 
 
@@ -59,6 +61,60 @@ class WithinPreconditioner:
     preconditioner_type: str
     _preconditioner_handle: Any = field(repr=False)
 
+    def _to_pickle_bytes(self) -> bytes:
+        return _serialize_within_preconditioner_rs(self._preconditioner_handle)
+
+    @classmethod
+    def _from_pickle(
+        cls,
+        data: bytes,
+        n_obs: int,
+        n_factors: int,
+        factor_cardinalities: tuple[int, ...],
+        preconditioner_type: str,
+    ) -> WithinPreconditioner:
+        if not isinstance(data, (bytes, bytearray, memoryview)):
+            raise TypeError("`data` must be bytes-like.")
+        if not isinstance(n_obs, int) or n_obs <= 0:
+            raise ValueError("`n_obs` must be a positive int.")
+        if not isinstance(n_factors, int) or n_factors <= 0:
+            raise ValueError("`n_factors` must be a positive int.")
+        factor_cardinalities = tuple(int(card) for card in factor_cardinalities)
+        if len(factor_cardinalities) != n_factors:
+            raise ValueError(
+                "`factor_cardinalities` must have one entry per fixed-effect dimension."
+            )
+        if any(card <= 0 for card in factor_cardinalities):
+            raise ValueError("`factor_cardinalities` entries must be positive.")
+        if not isinstance(preconditioner_type, str):
+            raise TypeError("`preconditioner_type` must be a string.")
+        preconditioner_type = preconditioner_type.lower()
+        if preconditioner_type not in {"additive", "multiplicative"}:
+            raise ValueError(
+                "`preconditioner_type` must be either 'additive' or 'multiplicative'."
+            )
+
+        handle = _deserialize_within_preconditioner_rs(bytes(data))
+        return cls(
+            n_obs=n_obs,
+            n_factors=n_factors,
+            factor_cardinalities=factor_cardinalities,
+            preconditioner_type=preconditioner_type,
+            _preconditioner_handle=handle,
+        )
+
+    def __reduce__(self) -> tuple[Any, tuple[Any, ...]]:
+        return (
+            type(self)._from_pickle,
+            (
+                self._to_pickle_bytes(),
+                self.n_obs,
+                self.n_factors,
+                self.factor_cardinalities,
+                self.preconditioner_type,
+            ),
+        )
+
 
 def build_within_preconditioner(
     flist: NDArray[np.uint32],
diff --git a/src/demean_within.rs b/src/demean_within.rs
index cce01d4dd..c4376a154 100644
--- a/src/demean_within.rs
+++ b/src/demean_within.rs
@@ -2,6 +2,7 @@ use ndarray::{Array2, ArrayView1, ArrayView2, ShapeBuilder};
 use numpy::{PyArray2, PyReadonlyArray1, PyReadonlyArray2};
 use pyo3::exceptions::{PyRuntimeError, PyValueError};
 use pyo3::prelude::*;
+use pyo3::types::PyBytes;
 
 #[pyclass(module = "pyfixest.core._core_impl", name = "_WithinPreconditionerHandle")]
 #[derive(Clone)]
@@ -175,6 +176,28 @@ pub fn _build_within_preconditioner_rs(
     Py::new(py, handle)
 }
 
+#[pyfunction]
+pub fn _serialize_within_preconditioner_rs(
+    py: Python<'_>,
+    preconditioner_handle: Py<WithinPreconditionerHandle>,
+) -> PyResult<Py<PyBytes>> {
+    let handle = preconditioner_handle.bind(py).borrow();
+    let bytes = postcard::to_stdvec(&handle.inner)
+        .map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
+    Ok(PyBytes::new(py, &bytes).unbind())
+}
+
+#[pyfunction]
+pub fn _deserialize_within_preconditioner_rs(
+    py: Python<'_>,
+    data: &[u8],
+) -> PyResult<Py<WithinPreconditionerHandle>> {
+    let inner: within::FePreconditioner = postcard::from_bytes(data).map_err(|e| {
+        PyValueError::new_err(format!("failed to deserialize preconditioner: {}", e))
+    })?;
+    Py::new(py, WithinPreconditionerHandle::from_inner(inner))
+}
+
 #[pyfunction]
 #[pyo3(signature = (
     x,
diff --git a/src/lib.rs b/src/lib.rs
index d5ee05a08..11169281e 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -18,6 +18,12 @@ fn _core_impl(m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_wrapped(wrap_pyfunction!(
         demean_within::_build_within_preconditioner_rs
     ))?;
+    m.add_wrapped(wrap_pyfunction!(
+        demean_within::_serialize_within_preconditioner_rs
+    ))?;
+    m.add_wrapped(wrap_pyfunction!(
+        demean_within::_deserialize_within_preconditioner_rs
+    ))?;
     m.add_wrapped(wrap_pyfunction!(
         nested_fixed_effects::_count_fixef_fully_nested_all_rs
     ))?;
diff --git a/tests/test_api.py b/tests/test_api.py
index 58ed29038..549fccda5 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -1,3 +1,5 @@
+import pickle
+
 import duckdb
 import numpy as np
 import pandas as pd
@@ -113,6 +115,25 @@ def test_feols_exposes_and_reuses_within_preconditioner():
     np.testing.assert_allclose(fit1.coef(), fit2.coef(), rtol=1e-10)
 
 
+def test_feols_reuses_pickled_within_preconditioner():
+    data = pf.get_data()
+    fit1 = pf.feols(
+        "Y ~ X1 | f1 + f2",
+        data=data,
+        demeaner=pf.WithinDemeaner(),
+    )
+
+    restored_preconditioner = pickle.loads(pickle.dumps(fit1.preconditioner_))
+
+    fit2 = pf.feols(
+        "Y ~ X1 | f1 + f2",
+        data=data,
+        demeaner=pf.WithinDemeaner(preconditioner=restored_preconditioner),
+    )
+
+    np.testing.assert_allclose(fit1.coef(), fit2.coef(), rtol=1e-10)
+
+
 def test_feglm_runs_with_within_demeaner():
     data = pf.get_data().copy()
     data["Y"] = (data["Y"] > 0).astype(int)
diff --git a/tests/test_demean.py b/tests/test_demean.py
index f8bcc6638..e073e9058 100644
--- a/tests/test_demean.py
+++ b/tests/test_demean.py
@@ -1,3 +1,4 @@
+import pickle
 import re
 
 import numpy as np
@@ -511,6 +512,65 @@ def fake_build(
     assert refreshed_preconditioner is not preconditioner
 
 
+def test_within_preconditioner_roundtrips_via_pickle():
+    rng = np.random.default_rng(777)
+    n = 250
+    x = rng.normal(size=(n, 3))
+    flist = np.column_stack([rng.integers(0, 30, n), rng.integers(0, 20, n)]).astype(
+        np.uint32
+    )
+    weights = rng.uniform(0.5, 1.5, n)
+
+    preconditioner = build_within_preconditioner(flist, weights)
+    restored_from_pickle = pickle.loads(pickle.dumps(preconditioner))
+
+    x_demeaned_original, success_original = demean_within(
+        x,
+        flist,
+        weights,
+        preconditioner=preconditioner,
+    )
+    x_demeaned_pickle, success_pickle = demean_within(
+        x,
+        flist,
+        weights,
+        preconditioner=restored_from_pickle,
+    )
+
+    assert success_original and success_pickle
+    np.testing.assert_allclose(x_demeaned_pickle, x_demeaned_original, rtol=1e-10)
+    assert (
+        restored_from_pickle.factor_cardinalities == preconditioner.factor_cardinalities
+    )
+
+
+def test_restored_within_preconditioner_still_validates_structure():
+    rng = np.random.default_rng(991)
+    n = 200
+    x = rng.normal(size=(n, 2))
+    flist = np.column_stack([rng.integers(0, 20, n), rng.integers(0, 15, n)]).astype(
+        np.uint32
+    )
+    weights = rng.uniform(0.5, 1.5, n)
+    preconditioner = build_within_preconditioner(flist, weights)
+    restored = pickle.loads(pickle.dumps(preconditioner))
+
+    incompatible_flist = np.column_stack(
+        [rng.integers(0, 25, n), rng.integers(0, 15, n)]
+    ).astype(np.uint32)
+
+    with pytest.raises(
+        ValueError,
+        match="fixed-effect cardinalities do not match",
+    ):
+        demean_within(
+            x,
+            incompatible_flist,
+            weights,
+            preconditioner=restored,
+        )
+
+
 @pytest.mark.parametrize(
     argnames="demean_func",
     argvalues=[demean_rs, demean_within, demean_cupy32, demean_cupy64],

	worker_id	firm_id	year	female	experience	tenure	log_wage	worker_fe	firm_fe	log_bonus
0	0	16	2000	0	1	1	0.028921	0.152359	0.019964	-0.110874
1	1	34	2000	1	0	1	-0.526312	-0.519992	0.009835	-0.263521
2	2	20	2000	1	3	1	0.515641	0.375226	0.234705	0.410734
3	3	13	2000	1	0	1	0.528749	0.470282	0.131698	0.220568
4	4	31	2000	0	3	1	-0.337822	-0.975518	0.191260	-0.194413