diff --git a/Cargo.lock b/Cargo.lock
index da6fd2c87..6a1129389 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1120,6 +1120,7 @@ version = "0.50.0"
 dependencies = [
  "ndarray",
  "numpy",
+ "postcard",
  "pyo3",
  "rayon",
  "thiserror 2.0.17",
diff --git a/Cargo.toml b/Cargo.toml
index 2dfbc638a..f3fae2b11 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -15,6 +15,7 @@ rayon   = "1.11.0"
 numpy = "0.26.0"
 thiserror = "2.0.16"
 within = "0.1.0"
+postcard = { version = "1", features = ["use-std"] }
 
 [profile.release]
 opt-level = 3        # Maximize performance
diff --git a/docs/_freeze/how-to/fixed-effects-solvers/execute-results/html.json b/docs/_freeze/how-to/fixed-effects-solvers/execute-results/html.json
new file mode 100644
index 000000000..4845c7fca
--- /dev/null
+++ b/docs/_freeze/how-to/fixed-effects-solvers/execute-results/html.json
@@ -0,0 +1,16 @@
+{
+  "hash": "2e454435a33d9f6487d50ede3e986926",
+  "result": {
+    "engine": "jupyter",
+    "markdown": "---\ntitle: \"Choosing and Reusing Fixed-Effects Solvers\"\ndescription: \"How to configure fixed-effects demeaners, reuse within solvers and preconditioners, and persist reusable preconditioners across sessions.\"\ncategories: [Fixed Effects, Performance]\n---\n\n::: {#661c1809 .cell execution_count=1}\n``` {.python .cell-code}\nimport pickle\nimport tempfile\nfrom pathlib import Path\n\nimport numpy as np\nimport pandas as pd\n\nimport pyfixest as pf\n```\n:::\n\n\n`PyFixest` now accepts object-based demeaner configurations. This makes it easier to choose a backend, tune solver settings, and pass reusable solver objects between regressions.\n\nIf you are deciding whether a fixed-effects problem is likely to be easy or hard for alternating-projections methods, see [When Are Fixed Effects Problems Difficult?](../explanation/difficult-fixed-effects.md).\n\n## Choosing a Demeaner\n\nThe main fixed-effects options are:\n\n| Demeaner | Main idea | Typical use |\n|---|---|---|\n| `pf.NumbaDemeaner(...)` | CPU alternating projections | good default on CPU |\n| `pf.WithinDemeaner(...)` | Krylov solver with Schwarz preconditioning via `within` | hard FE problems and reuse workflows |\n| `pf.ScipyDemeaner(...)` | SciPy sparse iterative solver | CPU sparse linear algebra workflows |\n| `pf.CupyDemeaner(...)` / `pf.JaxDemeaner(...)` | GPU-based demeaning | large GPU workloads |\n\nThe `within` backend is the only one that currently exposes reusable `solver` and `preconditioner` objects.\n\n## Example Data\n\n::: {#08c7114a .cell execution_count=2}\n``` {.python .cell-code}\npanel = pf.get_worker_panel(N_workers=300, N_firms=40, N_years=8, seed=42)\n\nrng = np.random.default_rng(123)\npanel[\"log_bonus\"] = (\n    0.4 * panel[\"worker_fe\"]\n    - 0.2 * panel[\"firm_fe\"]\n    + 0.01 * panel[\"experience\"]\n    + 0.02 * panel[\"tenure\"]\n    + rng.normal(0, 0.2, size=len(panel))\n)\n\nfml_y1 = \"log_wage ~ experience + tenure | worker_id + firm_id\"\nfml_y2 = \"log_bonus ~ experience + tenure | worker_id + firm_id\"\npanel.head()\n```\n\n::: {.cell-output .cell-output-display execution_count=2}\n```{=html}\n<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>worker_id</th>\n      <th>firm_id</th>\n      <th>year</th>\n      <th>female</th>\n      <th>experience</th>\n      <th>tenure</th>\n      <th>log_wage</th>\n      <th>worker_fe</th>\n      <th>firm_fe</th>\n      <th>log_bonus</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>0</td>\n      <td>16</td>\n      <td>2000</td>\n      <td>0</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0.028921</td>\n      <td>0.152359</td>\n      <td>0.019964</td>\n      <td>-0.110874</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n      <td>34</td>\n      <td>2000</td>\n      <td>1</td>\n      <td>0</td>\n      <td>1</td>\n      <td>-0.526312</td>\n      <td>-0.519992</td>\n      <td>0.009835</td>\n      <td>-0.263521</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>2</td>\n      <td>20</td>\n      <td>2000</td>\n      <td>1</td>\n      <td>3</td>\n      <td>1</td>\n      <td>0.515641</td>\n      <td>0.375226</td>\n      <td>0.234705</td>\n      <td>0.410734</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>3</td>\n      <td>13</td>\n      <td>2000</td>\n      <td>1</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0.528749</td>\n      <td>0.470282</td>\n      <td>0.131698</td>\n      <td>0.220568</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>4</td>\n      <td>31</td>\n      <td>2000</td>\n      <td>0</td>\n      <td>3</td>\n      <td>1</td>\n      <td>-0.337822</td>\n      <td>-0.975518</td>\n      <td>0.191260</td>\n      <td>-0.194413</td>\n    </tr>\n  </tbody>\n</table>\n</div>\n```\n:::\n:::\n\n\n## Pass Demeaners as Dataclasses\n\nInstead of string shorthands, you can pass a configured demeaner directly.\n\n::: {#97c68471 .cell execution_count=3}\n``` {.python .cell-code}\nfit_numba = pf.feols(\n    fml_y1,\n    data=panel,\n    demeaner=pf.NumbaDemeaner(fixef_maxiter=10_000),\n)\n\nfit_within = pf.feols(\n    fml_y1,\n    data=panel,\n    demeaner=pf.WithinDemeaner(\n        krylov_method=\"gmres\",\n        gmres_restart=20,\n        preconditioner_type=\"multiplicative\",\n    ),\n)\n\npd.concat(\n    {\n        \"numba\": fit_numba.coef(),\n        \"within\": fit_within.coef(),\n    },\n    axis=1,\n)\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n\n        <div id=\"Yb5WqZ\"></div>\n        <script type=\"text/javascript\" data-lets-plot-script=\"library\">\n            if(!window.letsPlotCallQueue) {\n                window.letsPlotCallQueue = [];\n            };\n            window.letsPlotCall = function(f) {\n                window.letsPlotCallQueue.push(f);\n            };\n            (function() {\n                var script = document.createElement(\"script\");\n                script.type = \"text/javascript\";\n                script.src = \"https://cdn.jsdelivr.net/gh/JetBrains/lets-plot@v4.8.2/js-package/distr/lets-plot.min.js\";\n                script.onload = function() {\n                    window.letsPlotCall = function(f) {f();};\n                    window.letsPlotCallQueue.forEach(function(f) {f();});\n                    window.letsPlotCallQueue = [];\n                    \n                };\n                script.onerror = function(event) {\n                    window.letsPlotCall = function(f) {};    // noop\n                    window.letsPlotCallQueue = [];\n                    var div = document.createElement(\"div\");\n                    div.style.color = 'darkred';\n                    div.textContent = 'Error loading Lets-Plot JS';\n                    document.getElementById(\"Yb5WqZ\").appendChild(div);\n                };\n                var e = document.getElementById(\"Yb5WqZ\");\n                e.appendChild(script);\n            })()\n        </script>\n        \n```\n:::\n\n::: {.cell-output .cell-output-display}\n```{=html}\n\n        <div id=\"BsRUIo\"></div>\n        <script type=\"text/javascript\" data-lets-plot-script=\"library\">\n            if(!window.letsPlotCallQueue) {\n                window.letsPlotCallQueue = [];\n            };\n            window.letsPlotCall = function(f) {\n                window.letsPlotCallQueue.push(f);\n            };\n            (function() {\n                var script = document.createElement(\"script\");\n                script.type = \"text/javascript\";\n                script.src = \"https://cdn.jsdelivr.net/gh/JetBrains/lets-plot@v4.8.2/js-package/distr/lets-plot.min.js\";\n                script.onload = function() {\n                    window.letsPlotCall = function(f) {f();};\n                    window.letsPlotCallQueue.forEach(function(f) {f();});\n                    window.letsPlotCallQueue = [];\n                    \n                };\n                script.onerror = function(event) {\n                    window.letsPlotCall = function(f) {};    // noop\n                    window.letsPlotCallQueue = [];\n                    var div = document.createElement(\"div\");\n                    div.style.color = 'darkred';\n                    div.textContent = 'Error loading Lets-Plot JS';\n                    document.getElementById(\"BsRUIo\").appendChild(div);\n                };\n                var e = document.getElementById(\"BsRUIo\");\n                e.appendChild(script);\n            })()\n        </script>\n        \n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=3}\n```{=html}\n<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>numba</th>\n      <th>within</th>\n    </tr>\n    <tr>\n      <th>Coefficient</th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>experience</th>\n      <td>0.018400</td>\n      <td>0.018400</td>\n    </tr>\n    <tr>\n      <th>tenure</th>\n      <td>0.016517</td>\n      <td>0.016517</td>\n    </tr>\n  </tbody>\n</table>\n</div>\n```\n:::\n:::\n\n\n## Reuse a Solver Within the Same Session\n\nFor in-process reuse, prefer a `solver`. This is the fast path: the expensive setup is done once, and later solves borrow the stored solver directly.\n\nYou can reuse a solver exposed by a fitted model:\n\n::: {#9d0f1071 .cell execution_count=4}\n``` {.python .cell-code}\nfit1 = pf.feols(\n    fml_y1,\n    data=panel,\n    demeaner=pf.WithinDemeaner(),\n)\n\nfit2 = pf.feols(\n    fml_y2,\n    data=panel,\n    demeaner=pf.WithinDemeaner(solver=fit1.solver_),\n)\n\nfit2.coef()\n```\n\n::: {.cell-output .cell-output-display execution_count=4}\n```\nCoefficient\nexperience    0.011953\ntenure        0.016365\nName: Estimate, dtype: float64\n```\n:::\n:::\n\n\nYou can also build a reusable solver explicitly:\n\n::: {#6e60c279 .cell execution_count=5}\n``` {.python .cell-code}\nsolver = pf.get_solver(\n    fml_y1,\n    data=panel,\n    demeaner=pf.WithinDemeaner(preconditioner_type=\"additive\"),\n)\n\nfit3 = pf.feols(\n    fml_y2,\n    data=panel,\n    demeaner=pf.WithinDemeaner(solver=solver),\n)\n\nfit3.coef()\n```\n\n::: {.cell-output .cell-output-display execution_count=5}\n```\nCoefficient\nexperience    0.011953\ntenure        0.016365\nName: Estimate, dtype: float64\n```\n:::\n:::\n\n\nUse a solver when the sample, weights, and fixed-effects encoding are the same. Solver reuse is intentionally strict.\n\n## Reuse a Preconditioner Across Regressions\n\nIf you want a more portable object, extract a `preconditioner` from a solver and pass that into a later regression:\n\n::: {#8a613a85 .cell execution_count=6}\n``` {.python .cell-code}\npreconditioner_from_fit = fit1.solver_.to_preconditioner()\n\nfit4 = pf.feols(\n    fml_y2,\n    data=panel,\n    demeaner=pf.WithinDemeaner(preconditioner=preconditioner_from_fit),\n)\n\nfit4.coef()\n```\n\n::: {.cell-output .cell-output-display execution_count=6}\n```\nCoefficient\nexperience    0.011953\ntenure        0.016365\nName: Estimate, dtype: float64\n```\n:::\n:::\n\n\nCompared with a solver, a preconditioner is a lower-level object. It only affects convergence speed, not the target problem itself.\n\n## Build a Preconditioner Explicitly\n\nYou can also construct a reusable preconditioner directly:\n\n::: {#39988af0 .cell execution_count=7}\n``` {.python .cell-code}\npreconditioner = pf.get_preconditioner(\n    fml_y1,\n    data=panel,\n    demeaner=pf.WithinDemeaner(preconditioner_type=\"additive\"),\n)\n\nfit5 = pf.feols(\n    fml_y2,\n    data=panel,\n    demeaner=pf.WithinDemeaner(preconditioner=preconditioner),\n)\n\nfit5.coef()\n```\n\n::: {.cell-output .cell-output-display execution_count=7}\n```\nCoefficient\nexperience    0.011953\ntenure        0.016365\nName: Estimate, dtype: float64\n```\n:::\n:::\n\n\nThis is the preferred API when you want a reusable object for persistence or for advanced reuse workflows.\n\n## Persist a Preconditioner Across Sessions\n\nPreconditioners are pickleable, so you can save them and reload them in a later session:\n\n::: {#e1356e5c .cell execution_count=8}\n``` {.python .cell-code}\nwith tempfile.TemporaryDirectory() as tmpdir:\n    path = Path(tmpdir) / \"within_preconditioner.pkl\"\n\n    with path.open(\"wb\") as file:\n        pickle.dump(preconditioner, file)\n\n    with path.open(\"rb\") as file:\n        restored_preconditioner = pickle.load(file)\n\n    print(path)\n\nfit6 = pf.feols(\n    fml_y2,\n    data=panel,\n    demeaner=pf.WithinDemeaner(preconditioner=restored_preconditioner),\n)\n\nfit6.coef()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n/var/folders/98/c353q4p95v5_fz62gr5c9td80000gn/T/tmpa3kwwj68/within_preconditioner.pkl\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=8}\n```\nCoefficient\nexperience    0.011953\ntenure        0.016365\nName: Estimate, dtype: float64\n```\n:::\n:::\n\n\nIn practice, replace the temporary file with a durable path.\n\n## Which Reuse Object Should I Use?\n\n- Use `solver` for repeated estimation in the same Python session.\n- Use `preconditioner` when you want a portable, pickleable object.\n- Use `fit.solver_.to_preconditioner()` when you want to turn a fitted model's solver into a persistence artifact.\n- Use `pf.get_preconditioner()` when you want to build the persistence artifact explicitly.\n\nReusable solvers are not supported for `feglm()` and `fepois()`, because those estimators update observation weights across iterations. Reusable preconditioners can still be used there.\n\n",
+    "supporting": [
+      "fixed-effects-solvers_files"
+    ],
+    "filters": [],
+    "includes": {
+      "include-in-header": [
+        "<script src=\"https://cdn.jsdelivr.net/npm/requirejs@2.3.6/require.min.js\" integrity=\"sha384-c9c+LnTbwQ3aujuU7ULEPVvgLs+Fn6fJUvIGTsuu1ZcCf11fiEubah0ttpca4ntM sha384-6V1/AdqZRWk1KAlWbKBlGhN7VG4iE/yAZcO6NZPMF8od0vukrvr0tg4qY6NSrItx\" crossorigin=\"anonymous\"></script>\n<script src=\"https://cdn.jsdelivr.net/npm/jquery@3.5.1/dist/jquery.min.js\" integrity=\"sha384-ZvpUoO/+PpLXR1lu4jmpXWu80pZlYUAfxl5NsBMWOEPSjUn/6Z/hRTt8+pR6L4N2\" crossorigin=\"anonymous\" data-relocate-top=\"true\"></script>\n<script type=\"application/javascript\">define('jquery', [],function() {return window.jQuery;})</script>\n"
+      ]
+    }
+  }
+}
diff --git a/docs/_quarto.yml b/docs/_quarto.yml
index 3fb06f98d..fc4cb0720 100644
--- a/docs/_quarto.yml
+++ b/docs/_quarto.yml
@@ -72,6 +72,8 @@ website:
         - text: "All Guides"
           file: how-to/index.qmd
         - text: "---"
+        - text: "Choosing and Reusing Fixed-Effects Solvers"
+          file: how-to/fixed-effects-solvers.qmd
         - text: "Marginal Effects & Hypothesis Testing"
           file: how-to/marginaleffects.qmd
         - text: "Regression Decomposition"
diff --git a/docs/how-to/fixed-effects-solvers.qmd b/docs/how-to/fixed-effects-solvers.qmd
new file mode 100644
index 000000000..ea06178d2
--- /dev/null
+++ b/docs/how-to/fixed-effects-solvers.qmd
@@ -0,0 +1,160 @@
+---
+title: "Choosing and Reusing Fixed-Effects Solvers"
+description: "How to configure fixed-effects demeaners, reuse within preconditioners, and persist them across sessions."
+categories: [Fixed Effects, Performance]
+---
+
+```{python}
+import numpy as np
+import pandas as pd
+import pickle
+
+import pyfixest as pf
+```
+
+`PyFixest` now accepts object-based demeaner configurations. This makes it easier
+to choose a backend, tune solver settings, and, for the `within` backend,
+reuse a pre-built fixed-effects preconditioner across regressions. As a good chunk of
+the computational cost of multi-fixed-effect problems is in the preconditioner setup, this can lead to a large performance increase when estimating several related regression models.
+
+If you are deciding whether a fixed-effects problem is likely to be easy or hard
+for alternating-projections methods, see
+[When Are Fixed Effects Problems Difficult?](../explanation/difficult-fixed-effects.md).
+
+## Choosing a Demeaner
+
+The main fixed-effects options are:
+
+| Demeaner | Main idea | Typical use |
+|---|---|---|
+| `pf.MapDemeaner(...)` | alternating projections on CPU or JAX | good for "dense" fixed effect networks |
+| `pf.WithinDemeaner(...)` | Krylov solver with Schwarz preconditioning via `within` | tailored to sparse FE problems and reuse workflows |
+| `pf.LsmrDemeaner(...)` | sparse LSMR-based demeaning on CPU or GPU | tba |
+
+Only `pf.WithinDemeaner(...)` currently supports reusable preconditioners.
+
+## Example Data
+
+```{python}
+panel = pf.get_worker_panel(N_workers=300, N_firms=40, N_years=8, seed=42)
+
+rng = np.random.default_rng(123)
+panel["log_bonus"] = (
+    0.4 * panel["worker_fe"]
+    - 0.2 * panel["firm_fe"]
+    + 0.01 * panel["experience"]
+    + 0.02 * panel["tenure"]
+    + rng.normal(0, 0.2, size=len(panel))
+)
+
+fml_y1 = "log_wage ~ experience + tenure | worker_id + firm_id"
+fml_y2 = "log_bonus ~ experience + tenure | worker_id + firm_id"
+panel.head()
+```
+
+## Pass Demeaners as Dataclasses
+
+Instead of string shorthands, you can pass a configured demeaner directly.
+When a typed `demeaner=` is provided, it is authoritative: its backend,
+`fixef_tol`, and `fixef_maxiter` take precedence over the top-level
+`demeaner_backend`, `fixef_tol`, and `fixef_maxiter` arguments.
+
+```{python}
+fit_map = pf.feols(
+    fml_y1,
+    data=panel,
+    demeaner=pf.MapDemeaner(backend="rust", fixef_maxiter=10_000),
+)
+
+fit_within = pf.feols(
+    fml_y1,
+    data=panel,
+    demeaner=pf.WithinDemeaner(
+        krylov_method="gmres",
+        gmres_restart=20,
+        preconditioner_type="multiplicative",
+    ),
+)
+
+pd.concat(
+    {
+        "map": fit_map.coef(),
+        "within": fit_within.coef(),
+    },
+    axis=1,
+)
+```
+
+## Reuse a Within Preconditioner
+
+The expensive part of the `within` backend is the preconditioner setup. For
+multi-fixed-effect problems, `PyFixest` caches that object during demeaning and
+exposes it on `feols` fits as `fit.preconditioner_`.
+
+You can then pass that preconditioner into a later `WithinDemeaner(...)`:
+
+```{python}
+fit1 = pf.feols(
+    fml_y1,
+    data=panel,
+    demeaner=pf.WithinDemeaner(),
+)
+
+fit1.preconditioner_
+```
+
+```{python}
+fit2 = pf.feols(
+    fml_y2,
+    data=panel,
+    demeaner=pf.WithinDemeaner(preconditioner=fit1.preconditioner_),
+)
+
+fit2.coef()
+```
+
+## When Does `preconditioner_` Exist?
+
+`fit.preconditioner_` is currently exposed on `feols()` fits that:
+
+- use `pf.WithinDemeaner(...)`
+- have at least two fixed-effects dimensions
+- successfully complete demeaning
+
+In simpler cases, such as no fixed effects, a single fixed effect, or a
+non-`within` demeaner, `fit.preconditioner_` is `None`.
+
+## Persist a Preconditioner Across Sessions
+
+It is possible to persist a preconditioner across Python sessions, which can be useful if you have a costly preconditioner setup and want to reuse it later without recomputing.
+
+The easiest path is via standard `pickle`:
+
+```{python}
+fit1 = pf.feols(
+    fml_y1,
+    data=panel,
+    demeaner=pf.WithinDemeaner(),
+)
+
+payload = pickle.dumps(fit1.preconditioner_)
+restored_preconditioner = pickle.loads(payload)
+
+fit2 = pf.feols(
+    fml_y2,
+    data=panel,
+    demeaner=pf.WithinDemeaner(preconditioner=restored_preconditioner),
+)
+
+fit2.coef()
+```
+
+Please make sure to only load persisted preconditioners from trusted sources. Like any Python pickle-based workflow, untrusted payloads are unsafe to deserialize.
+
+## Using stale Preconditioners
+
+A preconditioner is defined for a given sample, fixed effects structure, and weights. For regression with identical structure in these three dimensions, the cached preconditioner will be exactly valid - if you were to recompute the preconditioner instead of using the cached one, you would get exactly the same object.
+
+If you change the sample, fixed effects structure, or weights, the cached preconditioner will turn "stale". In some cases, a stale preconditioner may still be a good approximation. Even with a stale preconditioner, the Kyrlov solver will converge - but the more the new regression deviation deviates from the original problem, the less the preconditioner will help.
+
+For example, suppose that you fit a regression a slightly different sample, but with the same fixed effects and weights. The cached preconditioner may still be a good approximation to the new problem, and using it may lead to faster convergence than starting from scratch. If the sample changes too drastically, or if the fixed effects structure changes, or your new regression even drops levels from the fixed effects, the cached preconditioner may be a poor approximation and lead to slower convergence than creating a new preconditioner.
diff --git a/docs/how-to/index.qmd b/docs/how-to/index.qmd
index b6b43d0e2..a53b49724 100644
--- a/docs/how-to/index.qmd
+++ b/docs/how-to/index.qmd
@@ -6,6 +6,7 @@ aliases:
 listing:
   type: grid
   contents:
+    - fixed-effects-solvers.qmd
     - marginaleffects.qmd
     - regression_decomposition.qmd
     - pyfixest_gpu.qmd
diff --git a/docs/how-to/panel_variance_reduction.qmd b/docs/how-to/panel_variance_reduction.qmd
index 247623ee2..18c69b100 100644
--- a/docs/how-to/panel_variance_reduction.qmd
+++ b/docs/how-to/panel_variance_reduction.qmd
@@ -229,6 +229,9 @@ fit_panel = pf.feols(
 )
 ```
 
+This uses the legacy backend shorthand. The typed equivalent is
+`demeaner=pf.MapDemeaner(backend="rust")`.
+
 We can compare all results:
 
 ```{python}
diff --git a/docs/how-to/pyfixest_gpu.qmd b/docs/how-to/pyfixest_gpu.qmd
index efb097573..f20df4b76 100644
--- a/docs/how-to/pyfixest_gpu.qmd
+++ b/docs/how-to/pyfixest_gpu.qmd
@@ -6,8 +6,14 @@ jupyter: python3
 ---
 
 
-`PyFixest` allows to run the fixed effects demeaning on the GPU via the `demeaner_backend` argument.
-To do so, you will have to install `jax` and `jaxblib`, for example by typing `pip install pyfixest[jax]`.
+`PyFixest` allows fixed-effects demeaning on the GPU either via the legacy
+`demeaner_backend` shorthand or via a typed `demeaner=` configuration. For
+example, `demeaner_backend="jax"` and `demeaner=pf.MapDemeaner(backend="jax")`
+select the same JAX backend. If a typed `demeaner` is provided, it takes
+precedence over `demeaner_backend`, `fixef_tol`, and `fixef_maxiter`.
+
+To do so, you will have to install `jax` and `jaxlib`, for example by typing
+`pip install pyfixest[jax]`.
 
 We test two back-ends for the iterative alternating-projections component of the fixed-effects regression on an Nvidia A100 GPU with 40 GB VRAM (a GPU that one typically wouldn't have installed to play graphics-intensive videogames on consumer hardware). `numba` benchmarks are run on a 12-core xeon CPU.
 
diff --git a/docs/textbook-replications/brave_true.qmd b/docs/textbook-replications/brave_true.qmd
index 72321e43d..4f8c01081 100644
--- a/docs/textbook-replications/brave_true.qmd
+++ b/docs/textbook-replications/brave_true.qmd
@@ -33,7 +33,6 @@ panel_fit = pf.feols(
     fml="lwage ~ married + expersq + union + hours | nr + year",
     data=data_df,
     vcov={"CRV1": "nr + year"},
-    demeaner_backend="rust",
 )
 ```
 
diff --git a/pyfixest/__init__.py b/pyfixest/__init__.py
index b1cc65a8b..e4e5e42bd 100644
--- a/pyfixest/__init__.py
+++ b/pyfixest/__init__.py
@@ -8,9 +8,14 @@
     __version__ = "unknown"
 
 __all__ = [
+    "BaseDemeaner",
+    "LsmrDemeaner",
+    "MapDemeaner",
     "SaturatedEventStudy",
+    "WithinDemeaner",
     "bonferroni",
     "coefplot",
+    "demeaners",
     "did",
     "did2s",
     "dtable",
@@ -40,7 +45,7 @@
 ]
 
 # Submodules loaded lazily
-_submodules = ["did", "errors", "estimation", "report", "utils"]
+_submodules = ["demeaners", "did", "errors", "estimation", "report", "utils"]
 
 # Map function/class names to their module prefix
 # For direct module imports: import_module(f"{prefix}.{name}")
@@ -50,6 +55,11 @@
     "fepois": "pyfixest.estimation.api",
     "feglm": "pyfixest.estimation.api",
     "quantreg": "pyfixest.estimation.api",
+    # demeaners
+    "BaseDemeaner": "pyfixest.demeaners",
+    "LsmrDemeaner": "pyfixest.demeaners",
+    "MapDemeaner": "pyfixest.demeaners",
+    "WithinDemeaner": "pyfixest.demeaners",
     # estimation - other functions (still use parent module + getattr)
     "bonferroni": "pyfixest.estimation",
     "rwolf": "pyfixest.estimation",
diff --git a/pyfixest/core/_core_impl.pyi b/pyfixest/core/_core_impl.pyi
index 6d79f8bf0..af0291009 100644
--- a/pyfixest/core/_core_impl.pyi
+++ b/pyfixest/core/_core_impl.pyi
@@ -1,6 +1,8 @@
 import numpy as np
 from numpy.typing import NDArray
 
+class _WithinPreconditionerHandle: ...
+
 def _find_collinear_variables_rs(x: NDArray[np.float64], tol: float = 1e-10): ...
 def _crv1_meat_loop_rs(
     scores: NDArray[np.float64],
@@ -20,7 +22,22 @@ def _demean_within_rs(
     weights: NDArray[np.float64],
     tol: float = 1e-06,
     maxiter: int = 1_000,
+    krylov_method: str = "cg",
+    gmres_restart: int = 30,
+    preconditioner_type: str = "additive",
+    preconditioner_handle: _WithinPreconditionerHandle | None = None,
 ) -> tuple[np.ndarray, bool]: ...
+def _build_within_preconditioner_rs(
+    flist: NDArray[np.uint32],
+    weights: NDArray[np.float64],
+    preconditioner_type: str = "additive",
+) -> _WithinPreconditionerHandle: ...
+def _serialize_within_preconditioner_rs(
+    preconditioner_handle: _WithinPreconditionerHandle,
+) -> bytes: ...
+def _deserialize_within_preconditioner_rs(
+    data: bytes,
+) -> _WithinPreconditionerHandle: ...
 def _count_fixef_fully_nested_all_rs(
     all_fixef_array: NDArray,
     cluster_colnames: NDArray,
diff --git a/pyfixest/core/collinear.py b/pyfixest/core/collinear.py
index 0f3238a0b..a6592e7d4 100644
--- a/pyfixest/core/collinear.py
+++ b/pyfixest/core/collinear.py
@@ -1,3 +1,14 @@
-from ._core_impl import (
-    _find_collinear_variables_rs as find_collinear_variables,  # noqa: F401
-)
+import numpy as np
+from numpy.typing import NDArray
+
+from ._core_impl import _find_collinear_variables_rs
+
+
+def find_collinear_variables(
+    x: NDArray[np.float64], tol: float = 1e-10
+) -> tuple[np.ndarray, int, bool]:
+    """Coerce the Gram matrix into a NumPy float64 array before calling Rust."""
+    x_arr = np.asarray(x, dtype=np.float64)
+    if x_arr.ndim != 2:
+        raise ValueError("`x` must be a 2D array.")
+    return _find_collinear_variables_rs(x_arr, tol)
diff --git a/pyfixest/core/demean.py b/pyfixest/core/demean.py
index 6fe66673e..50703fed6 100644
--- a/pyfixest/core/demean.py
+++ b/pyfixest/core/demean.py
@@ -1,7 +1,220 @@
+from __future__ import annotations
+
+import warnings
+from dataclasses import dataclass, field
+from typing import Any
+
 import numpy as np
 from numpy.typing import NDArray
 
-from ._core_impl import _demean_rs, _demean_within_rs
+from ._core_impl import (
+    _build_within_preconditioner_rs,
+    _demean_rs,
+    _demean_within_rs,
+    _deserialize_within_preconditioner_rs,
+    _serialize_within_preconditioner_rs,
+)
+
+
+def _prepare_within_flist(flist: NDArray[np.uint32]) -> NDArray[np.uint32]:
+    flist_arr = np.asfortranarray(flist, dtype=np.uint32)
+    if flist_arr.ndim == 1:
+        flist_arr = flist_arr.reshape((-1, 1), order="F")
+    return flist_arr
+
+
+def _prepare_weights(weights: NDArray[np.float64]) -> NDArray[np.float64]:
+    return np.asarray(weights, dtype=np.float64).reshape(-1)
+
+
+def _compute_factor_cardinalities(flist: NDArray[np.uint32]) -> tuple[int, ...]:
+    # FE columns are dense 0..K-1 codes from `pd.factorize`, so cardinality is max + 1.
+    return tuple((flist.max(axis=0) + 1).tolist())
+
+
+def _sanitize_krylov_and_preconditioner(
+    krylov_method: str,
+    preconditioner_type: str,
+) -> tuple[str, str]:
+    krylov_method = krylov_method.lower()
+    preconditioner_type = preconditioner_type.lower()
+
+    if krylov_method not in {"cg", "gmres"}:
+        raise ValueError("`krylov_method` must be either 'cg' or 'gmres'.")
+    if preconditioner_type not in {"additive", "multiplicative"}:
+        raise ValueError(
+            "`preconditioner_type` must be either 'additive' or 'multiplicative'."
+        )
+    if preconditioner_type == "multiplicative" and krylov_method != "gmres":
+        raise ValueError("Multiplicative Schwarz requires `krylov_method='gmres'`.")
+
+    return krylov_method, preconditioner_type
+
+
+@dataclass(frozen=True, slots=True)
+class WithinPreconditioner:
+    """Thin in-process wrapper around a built `within::FePreconditioner`."""
+
+    n_obs: int
+    n_factors: int
+    factor_cardinalities: tuple[int, ...]
+    preconditioner_type: str
+    _preconditioner_handle: Any = field(repr=False)
+
+    def _to_pickle_bytes(self) -> bytes:
+        return _serialize_within_preconditioner_rs(self._preconditioner_handle)
+
+    @classmethod
+    def _from_pickle(
+        cls,
+        data: bytes,
+        n_obs: int,
+        n_factors: int,
+        factor_cardinalities: tuple[int, ...],
+        preconditioner_type: str,
+    ) -> WithinPreconditioner:
+        if not isinstance(data, (bytes, bytearray, memoryview)):
+            raise TypeError("`data` must be bytes-like.")
+        if not isinstance(n_obs, int) or n_obs <= 0:
+            raise ValueError("`n_obs` must be a positive int.")
+        if not isinstance(n_factors, int) or n_factors <= 0:
+            raise ValueError("`n_factors` must be a positive int.")
+        factor_cardinalities = tuple(int(card) for card in factor_cardinalities)
+        if len(factor_cardinalities) != n_factors:
+            raise ValueError(
+                "`factor_cardinalities` must have one entry per fixed-effect dimension."
+            )
+        if any(card <= 0 for card in factor_cardinalities):
+            raise ValueError("`factor_cardinalities` entries must be positive.")
+        if not isinstance(preconditioner_type, str):
+            raise TypeError("`preconditioner_type` must be a string.")
+        preconditioner_type = preconditioner_type.lower()
+        if preconditioner_type not in {"additive", "multiplicative"}:
+            raise ValueError(
+                "`preconditioner_type` must be either 'additive' or 'multiplicative'."
+            )
+
+        handle = _deserialize_within_preconditioner_rs(bytes(data))
+        return cls(
+            n_obs=n_obs,
+            n_factors=n_factors,
+            factor_cardinalities=factor_cardinalities,
+            preconditioner_type=preconditioner_type,
+            _preconditioner_handle=handle,
+        )
+
+    def __reduce__(self) -> tuple[Any, tuple[Any, ...]]:
+        return (
+            type(self)._from_pickle,
+            (
+                self._to_pickle_bytes(),
+                self.n_obs,
+                self.n_factors,
+                self.factor_cardinalities,
+                self.preconditioner_type,
+            ),
+        )
+
+
+def build_within_preconditioner(
+    flist: NDArray[np.uint32],
+    weights: NDArray[np.float64],
+    preconditioner_type: str = "additive",
+) -> WithinPreconditioner:
+    """
+    Build a reusable preconditioner for the `within` demeaner backend.
+
+    Parameters
+    ----------
+    flist : numpy.ndarray
+        Fixed-effects array of shape `(n_obs, n_factors)`.
+    weights : numpy.ndarray
+        Observation weights.
+    preconditioner_type : {"additive", "multiplicative"}
+        Schwarz preconditioner variant.
+
+    Returns
+    -------
+    WithinPreconditioner
+        In-process reusable preconditioner handle.
+
+    Notes
+    -----
+    This is a pyfixest convenience wrapper around the upstream `within` solver
+    flow: it constructs a temporary solver with upstream-compatible defaults and
+    extracts its built `FePreconditioner` for reuse.
+    """
+    preconditioner_type = preconditioner_type.lower()
+    if preconditioner_type not in {"additive", "multiplicative"}:
+        raise ValueError(
+            "`preconditioner_type` must be either 'additive' or 'multiplicative'."
+        )
+
+    flist_arr = _prepare_within_flist(flist)
+    if flist_arr.shape[1] == 1:
+        raise ValueError(
+            "A reusable `within` preconditioner requires at least two fixed effects."
+        )
+
+    weights_arr = _prepare_weights(weights)
+    handle = _build_within_preconditioner_rs(
+        flist_arr,
+        weights_arr,
+        preconditioner_type,
+    )
+    return WithinPreconditioner(
+        n_obs=flist_arr.shape[0],
+        n_factors=flist_arr.shape[1],
+        factor_cardinalities=_compute_factor_cardinalities(flist_arr),
+        preconditioner_type=preconditioner_type,
+        _preconditioner_handle=handle,
+    )
+
+
+def validate_within_preconditioner(
+    preconditioner: WithinPreconditioner,
+    flist: NDArray[np.uint32],
+    *,
+    preconditioner_type: str | None = None,
+) -> None:
+    """Validate compatibility and emit warnings for a reusable preconditioner.
+
+    Raises ``ValueError`` for hard incompatibilities (number of factors,
+    cardinalities, preconditioner type). Emits ``UserWarning`` for a differing
+    observation count.
+    """
+    flist_arr = _prepare_within_flist(flist)
+
+    if preconditioner.n_factors != flist_arr.shape[1]:
+        raise ValueError(
+            "The supplied within preconditioner is incompatible with the current "
+            "fixed-effects structure: the number of fixed effects does not match."
+        )
+    if (
+        preconditioner_type is not None
+        and preconditioner.preconditioner_type != preconditioner_type
+    ):
+        raise ValueError(
+            "The supplied within preconditioner uses "
+            f"`preconditioner_type='{preconditioner.preconditioner_type}'`, but "
+            f"`preconditioner_type='{preconditioner_type}'` was requested."
+        )
+
+    factor_cardinalities = _compute_factor_cardinalities(flist_arr)
+    if preconditioner.factor_cardinalities != factor_cardinalities:
+        raise ValueError(
+            "The supplied within preconditioner is incompatible with the current "
+            "fixed-effects structure: the fixed-effect cardinalities do not match."
+        )
+
+    # Soft warnings — reuse is allowed but may degrade effectiveness.
+    if preconditioner.n_obs != flist_arr.shape[0]:
+        warnings.warn(
+            "The supplied within preconditioner was built on a different number "
+            "of observations. Reuse is allowed, but effectiveness may degrade.",
+            UserWarning,
+            stacklevel=3,
+        )
 
 
 def demean(
@@ -36,39 +249,6 @@ def demean(
     tuple[numpy.ndarray, bool]
         A tuple containing the demeaned array of shape (n_samples, n_features)
         and a boolean indicating whether the algorithm converged successfully.
-
-    Examples
-    --------
-    ```{python}
-    import numpy as np
-    import pyfixest as pf
-    from pyfixest.utils.dgps import get_blw
-    from pyfixest.estimation.internals.demean_ import demean
-    from formulaic import model_matrix
-
-    fml = "y ~ treat | state + year"
-
-    data = get_blw()
-    data.head()
-
-    Y, rhs = model_matrix(fml, data)
-    X = rhs[0].drop(columns="Intercept")
-    fe = rhs[1].drop(columns="Intercept")
-    YX = np.concatenate([Y, X], axis=1)
-
-    # to numpy
-    Y = Y.to_numpy()
-    X = X.to_numpy()
-    YX = np.concatenate([Y, X], axis=1)
-    fe = fe.to_numpy().astype(int)  # demean requires fixed effects as ints!
-
-    YX_demeaned, success = demean(YX, fe, weights = np.ones(YX.shape[0]))
-    Y_demeaned = YX_demeaned[:, 0]
-    X_demeaned = YX_demeaned[:, 1:]
-
-    print(np.linalg.lstsq(X_demeaned, Y_demeaned, rcond=None)[0])
-    print(pf.feols(fml, data).coef())
-    ```
     """
     return _demean_rs(
         x.astype(np.float64, copy=False),
@@ -85,48 +265,49 @@ def demean_within(
     weights: NDArray[np.float64],
     tol: float = 1e-06,
     maxiter: int = 1_000,
+    krylov_method: str = "cg",
+    gmres_restart: int = 30,
+    preconditioner_type: str = "additive",
+    preconditioner: WithinPreconditioner | None = None,
 ) -> tuple[NDArray, bool]:
-    """
-    Demean an array using preconditioned conjugate gradient via the `within` crate.
-
-    Uses one-level Schwarz preconditioning with approximate Cholesky local
-    solvers. Converges faster than alternating projections on weakly-connected
-    or block-diagonal fixed-effect structures.
-
-    For single fixed effects, falls back to alternating projections (``_demean_rs``)
-    because the CG/Schwarz preconditioner is designed for multi-way FE problems.
-
-    Parameters
-    ----------
-    x : numpy.ndarray
-        Input array of shape (n_samples, n_features).
-    flist : numpy.ndarray
-        Array of shape (n_samples, n_factors) specifying the fixed effects
-        (integer-encoded).
-    weights : numpy.ndarray
-        Array of shape (n_samples,) specifying the weights.
-    tol : float, optional
-        Convergence tolerance. Defaults to 1e-06.
-    maxiter : int, optional
-        Maximum number of CG iterations. Defaults to 1_000.
+    """Demean an array using the configurable `within` backend."""
+    krylov_method, preconditioner_type = _sanitize_krylov_and_preconditioner(
+        krylov_method,
+        preconditioner_type,
+    )
 
-    Returns
-    -------
-    tuple[numpy.ndarray, bool]
-        Demeaned array and convergence flag.
-    """
-    if flist.ndim == 1 or flist.shape[1] == 1:
+    flist_arr = _prepare_within_flist(flist)
+    if flist_arr.shape[1] == 1:
         return _demean_rs(
             x.astype(np.float64, copy=False),
-            flist.astype(np.uint64, copy=False),
-            weights.astype(np.float64, copy=False),
+            flist_arr.astype(np.uint64, copy=False),
+            _prepare_weights(weights),
             tol,
             maxiter,
         )
+
+    weights_arr = _prepare_weights(weights)
+    x_arr = np.asarray(x, dtype=np.float64)
+    if x_arr.ndim == 1:
+        x_arr = x_arr.reshape((-1, 1))
+
+    preconditioner_handle = None
+    if preconditioner is not None:
+        validate_within_preconditioner(
+            preconditioner,
+            flist_arr,
+            preconditioner_type=preconditioner_type,
+        )
+        preconditioner_handle = preconditioner._preconditioner_handle
+
     return _demean_within_rs(
-        x.astype(np.float64, copy=False),
-        np.asfortranarray(flist, dtype=np.uint32),
-        weights.astype(np.float64, copy=False).reshape(-1),
+        x_arr,
+        flist_arr,
+        weights_arr,
         tol,
         maxiter,
+        krylov_method,
+        gmres_restart,
+        preconditioner_type,
+        preconditioner_handle,
     )
diff --git a/pyfixest/demeaners.py b/pyfixest/demeaners.py
new file mode 100644
index 000000000..592098e44
--- /dev/null
+++ b/pyfixest/demeaners.py
@@ -0,0 +1,170 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from numbers import Real
+from typing import TYPE_CHECKING, ClassVar, Literal
+
+if TYPE_CHECKING:
+    from pyfixest.core.demean import WithinPreconditioner
+
+
+def _validate_positive_float(value: float, name: str) -> float:
+    if not isinstance(value, Real):
+        raise TypeError(f"`{name}` must be a real number.")
+    value = float(value)
+    if value <= 0:
+        raise ValueError(f"`{name}` must be strictly positive.")
+    return value
+
+
+def _validate_positive_int(value: int, name: str) -> int:
+    if not isinstance(value, int):
+        raise TypeError(f"`{name}` must be an int.")
+    if value <= 0:
+        raise ValueError(f"`{name}` must be strictly positive.")
+    return value
+
+
+@dataclass(frozen=True, slots=True)
+class BaseDemeaner:
+    """Base configuration shared by all fixed-effects demeaners."""
+
+    fixef_tol: float = 1e-06
+    fixef_maxiter: int = 10_000
+    kind: ClassVar[str]
+
+    def __post_init__(self) -> None:
+        object.__setattr__(
+            self,
+            "fixef_tol",
+            _validate_positive_float(self.fixef_tol, "fixef_tol"),
+        )
+        object.__setattr__(
+            self,
+            "fixef_maxiter",
+            _validate_positive_int(self.fixef_maxiter, "fixef_maxiter"),
+        )
+
+
+@dataclass(frozen=True, slots=True)
+class MapDemeaner(BaseDemeaner):
+    """Alternating-projections demeaner with selectable implementation backend."""
+
+    backend: Literal["numba", "rust", "jax"] = "numba"
+    kind: ClassVar[str] = "map"
+
+    def __post_init__(self) -> None:
+        BaseDemeaner.__post_init__(self)
+        if not isinstance(self.backend, str):
+            raise TypeError("`backend` must be a string.")
+        backend = self.backend.lower()
+        if backend not in {"numba", "rust", "jax"}:
+            raise ValueError("`backend` must be one of 'numba', 'rust', or 'jax'.")
+        object.__setattr__(self, "backend", backend)
+
+
+@dataclass(frozen=True, slots=True)
+class WithinDemeaner(BaseDemeaner):
+    """Krylov-based demeaner configuration for the Rust `within` backend."""
+
+    krylov_method: str = "cg"
+    gmres_restart: int = 30
+    preconditioner_type: str = "additive"
+    preconditioner: WithinPreconditioner | None = None
+    kind: ClassVar[str] = "within"
+
+    def __post_init__(self) -> None:
+        BaseDemeaner.__post_init__(self)
+        if not isinstance(self.krylov_method, str):
+            raise TypeError("`krylov_method` must be a string.")
+        krylov_method = self.krylov_method.lower()
+        if krylov_method not in {"cg", "gmres"}:
+            raise ValueError("`krylov_method` must be either 'cg' or 'gmres'.")
+        object.__setattr__(self, "krylov_method", krylov_method)
+
+        gmres_restart = _validate_positive_int(self.gmres_restart, "gmres_restart")
+        object.__setattr__(self, "gmres_restart", gmres_restart)
+
+        if not isinstance(self.preconditioner_type, str):
+            raise TypeError("`preconditioner_type` must be a string.")
+        preconditioner_type = self.preconditioner_type.lower()
+        if preconditioner_type not in {"additive", "multiplicative"}:
+            raise ValueError(
+                "`preconditioner_type` must be either 'additive' or 'multiplicative'."
+            )
+        object.__setattr__(self, "preconditioner_type", preconditioner_type)
+
+        if self.preconditioner is not None:
+            from pyfixest.core.demean import WithinPreconditioner
+
+            if not isinstance(self.preconditioner, WithinPreconditioner):
+                raise TypeError("`preconditioner` must be a WithinPreconditioner.")
+            object.__setattr__(
+                self,
+                "preconditioner_type",
+                self.preconditioner.preconditioner_type,
+            )
+
+        if (
+            self.preconditioner_type == "multiplicative"
+            and self.krylov_method != "gmres"
+        ):
+            raise ValueError("Multiplicative Schwarz requires `krylov_method='gmres'`.")
+
+
+@dataclass(frozen=True, slots=True)
+class LsmrDemeaner(BaseDemeaner):
+    """Sparse LSMR demeaner for CPU and GPU backends."""
+
+    precision: str = "float64"
+    use_gpu: bool | None = None
+    solver_atol: float = 1e-8
+    solver_btol: float = 1e-8
+    solver_maxiter: int = 100_000
+    warn_on_cpu_fallback: bool = True
+    use_preconditioner: bool = True
+    kind: ClassVar[str] = "lsmr"
+
+    def __post_init__(self) -> None:
+        BaseDemeaner.__post_init__(self)
+        if not isinstance(self.precision, str):
+            raise TypeError("`precision` must be a string.")
+        precision = self.precision.lower()
+        if precision not in {"float32", "float64"}:
+            raise ValueError("`precision` must be either 'float32' or 'float64'.")
+        object.__setattr__(self, "precision", precision)
+
+        if self.use_gpu is not None and not isinstance(self.use_gpu, bool):
+            raise TypeError("`use_gpu` must be a bool or None.")
+
+        object.__setattr__(
+            self,
+            "solver_atol",
+            _validate_positive_float(self.solver_atol, "solver_atol"),
+        )
+        object.__setattr__(
+            self,
+            "solver_btol",
+            _validate_positive_float(self.solver_btol, "solver_btol"),
+        )
+        object.__setattr__(
+            self,
+            "solver_maxiter",
+            _validate_positive_int(self.solver_maxiter, "solver_maxiter"),
+        )
+
+        if not isinstance(self.warn_on_cpu_fallback, bool):
+            raise TypeError("`warn_on_cpu_fallback` must be a bool.")
+        if not isinstance(self.use_preconditioner, bool):
+            raise TypeError("`use_preconditioner` must be a bool.")
+
+
+AnyDemeaner = MapDemeaner | WithinDemeaner | LsmrDemeaner
+
+__all__ = [
+    "AnyDemeaner",
+    "BaseDemeaner",
+    "LsmrDemeaner",
+    "MapDemeaner",
+    "WithinDemeaner",
+]
diff --git a/pyfixest/estimation/FixestMulti_.py b/pyfixest/estimation/FixestMulti_.py
index 1522ceea8..ed6cf795b 100644
--- a/pyfixest/estimation/FixestMulti_.py
+++ b/pyfixest/estimation/FixestMulti_.py
@@ -7,8 +7,9 @@
 
 from pyfixest.estimation.api.utils import _ALL_SAMPLE, _AllSampleSentinel
 from pyfixest.estimation.formula.parse import Formula
+from pyfixest.estimation.internals.demean_ import DemeanedDataCacheEntry
+from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
 from pyfixest.estimation.internals.literals import (
-    DemeanerBackendOptions,
     QuantregMethodOptions,
     QuantregMultiOptions,
     SolverOptions,
@@ -252,7 +253,7 @@ def _estimate_all_models(
         vcov: str | dict[str, str] | None,
         solver: SolverOptions,
         vcov_kwargs: dict[str, Any] | None = None,
-        demeaner_backend: DemeanerBackendOptions = "numba",
+        demeaner: ResolvedDemeaner | None = None,
         collin_tol: float = 1e-6,
         iwls_maxiter: int = 25,
         iwls_tol: float = 1e-08,
@@ -274,9 +275,9 @@ def _estimate_all_models(
              Additional keyword arguments for the variance-covariance matrix. Defaults to None.
         solver: SolverOptions
             Solver to use for the estimation.
-        demeaner_backend: DemeanerBackendOptions, optional
-            The backend to use for demeaning. Can be either "numba" or "jax".
-            Defaults to "numba".
+        demeaner: Optional[ResolvedDemeaner]
+            The demeaning strategy to use for the estimation. Not relevant for the "compression" estimator
+            and quantile regression.
         collin_tol : float, optional
             The tolerance level for the multicollinearity check. Default is 1e-6.
         iwls_maxiter : int, optional
@@ -314,7 +315,7 @@ def _estimate_all_models(
 
                 # dictionary to cache demeaned data keyed by na_index,
                 # only relevant for `.feols()`
-                lookup_demeaned_data: dict[frozenset[int], pd.DataFrame] = {}
+                lookup_demeaned_data: dict[frozenset[int], DemeanedDataCacheEntry] = {}
 
                 for FixestFormula in fixef_key_models:  # type: ignore
                     # loop over both dictfe and dictfe_iv (if the latter is not None)
@@ -362,7 +363,7 @@ def _estimate_all_models(
                     }:
                         model_kwargs.update(
                             {
-                                "demeaner_backend": demeaner_backend,
+                                "demeaner": demeaner,
                             }
                         )
 
diff --git a/pyfixest/estimation/api/feglm.py b/pyfixest/estimation/api/feglm.py
index 25ce271ac..78d6edeb8 100644
--- a/pyfixest/estimation/api/feglm.py
+++ b/pyfixest/estimation/api/feglm.py
@@ -1,8 +1,10 @@
 from collections.abc import Mapping
 from typing import Any
 
+from pyfixest.demeaners import BaseDemeaner
 from pyfixest.estimation.api.utils import _estimation_input_checks
 from pyfixest.estimation.FixestMulti_ import FixestMulti
+from pyfixest.estimation.internals.demeaner_options import resolve_demeaner
 from pyfixest.estimation.internals.literals import (
     DemeanerBackendOptions,
     FixedRmOptions,
@@ -32,6 +34,7 @@ def feglm(
     separation_check: list[str] | None = None,
     solver: SolverOptions = "scipy.linalg.solve",
     demeaner_backend: DemeanerBackendOptions = "numba",
+    demeaner: BaseDemeaner | None = None,
     drop_intercept: bool = False,
     copy_data: bool = True,
     store_data: bool = True,
@@ -147,7 +150,14 @@ def feglm(
           matrices (requires cupy & GPU, defaults to scipy/CPU and float64 if no GPU available)
         - "scipy": Direct application of the Frisch-Waugh-Lovell Theorem on sparse matrice.
           Forces to use a scipy-sparse backend even when cupy is installed and GPU is available.
-        Defaults to "numba".
+        Defaults to "numba". This argument is treated as a shorthand and is only
+        used when `demeaner` is not provided.
+
+    demeaner : BaseDemeaner | None, optional
+        Typed demeaner configuration. If provided, it takes precedence over
+        `demeaner_backend`, `fixef_tol`, and `fixef_maxiter`. Backend-specific
+        settings and fixed-effects iteration controls are taken entirely from
+        this object.
 
     drop_intercept : bool, optional
         Whether to drop the intercept from the model, by default False.
@@ -262,6 +272,14 @@ class [FixestMulti](/reference/estimation.FixestMulti_.FixestMulti.qmd) for mult
     weights_type = "aweights"
 
     context = {} if context is None else capture_context(context)
+    resolved_demeaner = resolve_demeaner(
+        demeaner=demeaner,
+        demeaner_backend=demeaner_backend,
+        fixef_tol=fixef_tol,
+        fixef_maxiter=fixef_maxiter,
+    )
+    resolved_fixef_tol = resolved_demeaner.fixef_tol
+    resolved_fixef_maxiter = resolved_demeaner.fixef_maxiter
 
     _estimation_input_checks(
         fml=fml,
@@ -275,8 +293,8 @@ class [FixestMulti](/reference/estimation.FixestMulti_.FixestMulti.qmd) for mult
         copy_data=copy_data,
         store_data=store_data,
         lean=lean,
-        fixef_tol=fixef_tol,
-        fixef_maxiter=fixef_maxiter,
+        fixef_tol=resolved_fixef_tol,
+        fixef_maxiter=resolved_fixef_maxiter,
         weights_type=weights_type,
         use_compression=False,
         reps=None,
@@ -291,8 +309,8 @@ class [FixestMulti](/reference/estimation.FixestMulti_.FixestMulti.qmd) for mult
         copy_data=copy_data,
         store_data=store_data,
         lean=lean,
-        fixef_tol=fixef_tol,
-        fixef_maxiter=fixef_maxiter,
+        fixef_tol=resolved_fixef_tol,
+        fixef_maxiter=resolved_fixef_maxiter,
         weights_type=weights_type,
         use_compression=False,
         reps=None,
@@ -326,7 +344,7 @@ class [FixestMulti](/reference/estimation.FixestMulti_.FixestMulti.qmd) for mult
         iwls_maxiter=iwls_maxiter,
         collin_tol=collin_tol,
         separation_check=separation_check,
-        demeaner_backend=demeaner_backend,
+        demeaner=resolved_demeaner,
         accelerate=accelerate,
     )
 
diff --git a/pyfixest/estimation/api/feols.py b/pyfixest/estimation/api/feols.py
index 6e04d0a55..99aeab749 100644
--- a/pyfixest/estimation/api/feols.py
+++ b/pyfixest/estimation/api/feols.py
@@ -1,8 +1,10 @@
 from collections.abc import Mapping
 from typing import Any
 
+from pyfixest.demeaners import BaseDemeaner
 from pyfixest.estimation.api.utils import _estimation_input_checks
 from pyfixest.estimation.FixestMulti_ import FixestMulti
+from pyfixest.estimation.internals.demeaner_options import resolve_demeaner
 from pyfixest.estimation.internals.literals import (
     DemeanerBackendOptions,
     FixedRmOptions,
@@ -34,6 +36,7 @@ def feols(
     weights_type: WeightsTypeOptions = "aweights",
     solver: SolverOptions = "scipy.linalg.solve",
     demeaner_backend: DemeanerBackendOptions = "numba",
+    demeaner: BaseDemeaner | None = None,
     use_compression: bool = False,
     reps: int = 100,
     context: int | Mapping[str, Any] | None = None,
@@ -151,7 +154,14 @@ def feols(
           matrices (requires cupy & GPU, defaults to scipy/CPU and float64 if no GPU available)
         - "scipy": Direct application of the Frisch-Waugh-Lovell Theorem on sparse matrice.
           Forces to use a scipy-sparse backend even when cupy is installed and GPU is available.
-        Defaults to "numba".
+        Defaults to "numba". This argument is treated as a shorthand and is only
+        used when `demeaner` is not provided.
+
+    demeaner : BaseDemeaner | None, optional
+        Typed demeaner configuration. If provided, it takes precedence over
+        `demeaner_backend`, `fixef_tol`, and `fixef_maxiter`. Backend-specific
+        settings and fixed-effects iteration controls are taken entirely from
+        this object.
 
     use_compression: bool
         Whether to use sufficient statistics to losslessly fit the regression model
@@ -492,6 +502,14 @@ def _lspline(series: pd.Series, knots: list[float]) -> np.array:
     if ssc is None:
         ssc = ssc_func()
     context = {} if context is None else capture_context(context)
+    resolved_demeaner = resolve_demeaner(
+        demeaner=demeaner,
+        demeaner_backend=demeaner_backend,
+        fixef_tol=fixef_tol,
+        fixef_maxiter=fixef_maxiter,
+    )
+    resolved_fixef_tol = resolved_demeaner.fixef_tol
+    resolved_fixef_maxiter = resolved_demeaner.fixef_maxiter
 
     _estimation_input_checks(
         fml=fml,
@@ -505,8 +523,8 @@ def _lspline(series: pd.Series, knots: list[float]) -> np.array:
         copy_data=copy_data,
         store_data=store_data,
         lean=lean,
-        fixef_tol=fixef_tol,
-        fixef_maxiter=fixef_maxiter,
+        fixef_tol=resolved_fixef_tol,
+        fixef_maxiter=resolved_fixef_maxiter,
         weights_type=weights_type,
         use_compression=use_compression,
         reps=reps,
@@ -520,8 +538,8 @@ def _lspline(series: pd.Series, knots: list[float]) -> np.array:
         copy_data=copy_data,
         store_data=store_data,
         lean=lean,
-        fixef_tol=fixef_tol,
-        fixef_maxiter=fixef_maxiter,
+        fixef_tol=resolved_fixef_tol,
+        fixef_maxiter=resolved_fixef_maxiter,
         weights_type=weights_type,
         use_compression=use_compression,
         reps=reps,
@@ -550,7 +568,7 @@ def _lspline(series: pd.Series, knots: list[float]) -> np.array:
         solver=solver,
         vcov_kwargs=vcov_kwargs,
         collin_tol=collin_tol,
-        demeaner_backend=demeaner_backend,
+        demeaner=resolved_demeaner,
     )
 
     if fixest._is_multiple_estimation:
diff --git a/pyfixest/estimation/api/fepois.py b/pyfixest/estimation/api/fepois.py
index f005376d5..c9cc65d58 100644
--- a/pyfixest/estimation/api/fepois.py
+++ b/pyfixest/estimation/api/fepois.py
@@ -1,8 +1,10 @@
 from collections.abc import Mapping
 from typing import Any
 
+from pyfixest.demeaners import BaseDemeaner
 from pyfixest.estimation.api.utils import _estimation_input_checks
 from pyfixest.estimation.FixestMulti_ import FixestMulti
+from pyfixest.estimation.internals.demeaner_options import resolve_demeaner
 from pyfixest.estimation.internals.literals import (
     DemeanerBackendOptions,
     FixedRmOptions,
@@ -34,6 +36,7 @@ def fepois(
     separation_check: list[str] | None = None,
     solver: SolverOptions = "scipy.linalg.solve",
     demeaner_backend: DemeanerBackendOptions = "numba",
+    demeaner: BaseDemeaner | None = None,
     drop_intercept: bool = False,
     copy_data: bool = True,
     store_data: bool = True,
@@ -137,7 +140,14 @@ def fepois(
           matrices (requires cupy & GPU, defaults to scipy/CPU and float64 if no GPU available)
         - "scipy": Direct application of the Frisch-Waugh-Lovell Theorem on sparse matrice.
           Forces to use a scipy-sparse backend even when cupy is installed and GPU is available.
-        Defaults to "numba".
+        Defaults to "numba". This argument is treated as a shorthand and is only
+        used when `demeaner` is not provided.
+
+    demeaner : BaseDemeaner | None, optional
+        Typed demeaner configuration. If provided, it takes precedence over
+        `demeaner_backend`, `fixef_tol`, and `fixef_maxiter`. Backend-specific
+        settings and fixed-effects iteration controls are taken entirely from
+        this object.
 
     drop_intercept : bool, optional
         Whether to drop the intercept from the model, by default False.
@@ -208,6 +218,14 @@ def fepois(
     if ssc is None:
         ssc = ssc_func()
     context = {} if context is None else capture_context(context)
+    resolved_demeaner = resolve_demeaner(
+        demeaner=demeaner,
+        demeaner_backend=demeaner_backend,
+        fixef_tol=fixef_tol,
+        fixef_maxiter=fixef_maxiter,
+    )
+    resolved_fixef_tol = resolved_demeaner.fixef_tol
+    resolved_fixef_maxiter = resolved_demeaner.fixef_maxiter
 
     _estimation_input_checks(
         fml=fml,
@@ -221,8 +239,8 @@ def fepois(
         copy_data=copy_data,
         store_data=store_data,
         lean=lean,
-        fixef_tol=fixef_tol,
-        fixef_maxiter=fixef_maxiter,
+        fixef_tol=resolved_fixef_tol,
+        fixef_maxiter=resolved_fixef_maxiter,
         weights_type=weights_type,
         use_compression=False,
         reps=None,
@@ -237,8 +255,8 @@ def fepois(
         copy_data=copy_data,
         store_data=store_data,
         lean=lean,
-        fixef_tol=fixef_tol,
-        fixef_maxiter=fixef_maxiter,
+        fixef_tol=resolved_fixef_tol,
+        fixef_maxiter=resolved_fixef_maxiter,
         weights_type=weights_type,
         use_compression=False,
         reps=None,
@@ -271,7 +289,7 @@ def fepois(
         iwls_maxiter=iwls_maxiter,
         collin_tol=collin_tol,
         separation_check=separation_check,
-        demeaner_backend=demeaner_backend,
+        demeaner=resolved_demeaner,
     )
 
     if fixest._is_multiple_estimation:
diff --git a/pyfixest/estimation/cupy/demean_cupy_.py b/pyfixest/estimation/cupy/demean_cupy_.py
index 0ce87aff5..ba99d49bb 100644
--- a/pyfixest/estimation/cupy/demean_cupy_.py
+++ b/pyfixest/estimation/cupy/demean_cupy_.py
@@ -278,6 +278,31 @@ def demean_cupy(
     dtype : type, default=np.float64
         Data type for GPU computations (np.float32 or np.float64).
     """
+    return demean_cupy_configured(
+        x=x,
+        flist=flist,
+        weights=weights,
+        solver_atol=tol,
+        solver_btol=tol,
+        solver_maxiter=maxiter,
+        dtype=dtype,
+    )
+
+
+def demean_cupy_configured(
+    x: NDArray[np.float64],
+    flist: NDArray[np.uint64] | None = None,
+    weights: NDArray[np.float64] | None = None,
+    *,
+    use_gpu: bool | None = None,
+    solver_atol: float = 1e-8,
+    solver_btol: float = 1e-8,
+    solver_maxiter: int = 100_000,
+    warn_on_cpu_fallback: bool = True,
+    dtype: type = np.float64,
+    use_preconditioner: bool = True,
+) -> tuple[NDArray[np.float64], bool]:
+    """Demean arrays with the configurable CuPy/Scipy sparse FWL backend."""
     if weights is None:
         weights = np.ones(x.shape[0] if x.ndim > 1 else len(x))
 
@@ -288,9 +313,15 @@ def demean_cupy(
     fe_df = pd.DataFrame(flist, columns=[f"f{i + 1}" for i in range(n_fe)], copy=False)
     fe_sparse_matrix = create_fe_sparse_matrix(fe_df)
 
-    return CupyFWLDemeaner(dtype=dtype).demean(
-        x, flist, weights, tol, maxiter, fe_sparse_matrix=fe_sparse_matrix
-    )
+    return CupyFWLDemeaner(
+        use_gpu=use_gpu,
+        solver_atol=solver_atol,
+        solver_btol=solver_btol,
+        solver_maxiter=solver_maxiter,
+        warn_on_cpu_fallback=warn_on_cpu_fallback,
+        dtype=dtype,
+        use_preconditioner=use_preconditioner,
+    ).demean(x, flist, weights, fe_sparse_matrix=fe_sparse_matrix)
 
 
 def demean_cupy32(
@@ -332,6 +363,27 @@ def demean_scipy(
     maxiter: int = 100_000,
 ) -> tuple[NDArray[np.float64], bool]:
     "Scipy demeaner using float64 precision (CPU-only, no GPU)."
+    return demean_scipy_configured(
+        x=x,
+        flist=flist,
+        weights=weights,
+        solver_atol=tol,
+        solver_btol=tol,
+        solver_maxiter=maxiter,
+    )
+
+
+def demean_scipy_configured(
+    x: NDArray[np.float64],
+    flist: NDArray[np.uint64] | None = None,
+    weights: NDArray[np.float64] | None = None,
+    *,
+    solver_atol: float = 1e-8,
+    solver_btol: float = 1e-8,
+    solver_maxiter: int = 100_000,
+    use_preconditioner: bool = True,
+) -> tuple[NDArray[np.float64], bool]:
+    """Demean arrays with the CPU-only sparse FWL backend."""
     if weights is None:
         weights = np.ones(x.shape[0] if x.ndim > 1 else len(x))
 
@@ -342,7 +394,12 @@ def demean_scipy(
     fe_df = pd.DataFrame(flist, columns=[f"f{i + 1}" for i in range(n_fe)], copy=False)
     fe_sparse_matrix = create_fe_sparse_matrix(fe_df)
 
-    # Force CPU usage (use_gpu=False) and disable warnings
     return CupyFWLDemeaner(
-        use_gpu=False, warn_on_cpu_fallback=False, dtype=np.float64
-    ).demean(x, flist, weights, tol, maxiter, fe_sparse_matrix=fe_sparse_matrix)
+        use_gpu=False,
+        solver_atol=solver_atol,
+        solver_btol=solver_btol,
+        solver_maxiter=solver_maxiter,
+        warn_on_cpu_fallback=False,
+        dtype=np.float64,
+        use_preconditioner=use_preconditioner,
+    ).demean(x, flist, weights, fe_sparse_matrix=fe_sparse_matrix)
diff --git a/pyfixest/estimation/internals/backends.py b/pyfixest/estimation/internals/backends.py
deleted file mode 100644
index 2f6b35bde..000000000
--- a/pyfixest/estimation/internals/backends.py
+++ /dev/null
@@ -1,99 +0,0 @@
-from pyfixest.core.collinear import find_collinear_variables
-from pyfixest.core.crv1 import crv1_meat_loop
-from pyfixest.core.demean import demean, demean_within
-from pyfixest.core.nested_fixed_effects import count_fixef_fully_nested_all
-from pyfixest.estimation.internals.demean_ import demean as demean_nb
-from pyfixest.estimation.internals.vcov_utils import (
-    _crv1_meat_loop as crv1_meat_loop_nb,
-)
-from pyfixest.estimation.numba.find_collinear_variables_nb import (
-    _find_collinear_variables_nb as find_collinear_variables_nb,
-)
-from pyfixest.estimation.numba.nested_fixef_nb import (
-    _count_fixef_fully_nested_all as count_fixef_fully_nested_all_nb,
-)
-
-# Try to import JAX functions, fall back to numba if not available
-try:
-    from pyfixest.estimation.jax.demean_jax_ import demean_jax as demean_jax_fn
-
-    JAX_AVAILABLE = True
-except ImportError:
-    # Fall back to numba implementation if JAX is not available
-    demean_jax_fn = demean_nb
-    JAX_AVAILABLE = False
-
-find_collinear_variables_jax = find_collinear_variables_nb
-crv1_meat_loop_jax = crv1_meat_loop_nb
-count_fixef_fully_nested_all_jax = count_fixef_fully_nested_all_nb
-
-# Try to import CuPy functions, fall back to numba if not available
-try:
-    from pyfixest.estimation.cupy.demean_cupy_ import (
-        demean_cupy32,
-        demean_cupy64,
-        demean_scipy,
-    )
-
-    CUPY_AVAILABLE = True
-except ImportError:
-    # Fall back to numba implementation if CuPy is not available
-    demean_cupy32 = demean_nb
-    demean_cupy64 = demean_nb
-    demean_scipy = demean_nb
-    CUPY_AVAILABLE = False
-
-find_collinear_variables_cupy = find_collinear_variables_nb
-crv1_meat_loop_cupy = crv1_meat_loop_nb
-count_fixef_fully_nested_all_cupy = count_fixef_fully_nested_all_nb
-
-BACKENDS = {
-    "numba": {
-        "demean": demean_nb,
-        "collinear": find_collinear_variables_nb,
-        "crv1_meat": crv1_meat_loop_nb,
-        "nonnested": count_fixef_fully_nested_all_nb,
-    },
-    "rust": {
-        "demean": demean,
-        "collinear": find_collinear_variables,
-        "crv1_meat": crv1_meat_loop,
-        "nonnested": count_fixef_fully_nested_all,
-    },
-    "rust-cg": {
-        "demean": demean_within,
-        "collinear": find_collinear_variables,
-        "crv1_meat": crv1_meat_loop,
-        "nonnested": count_fixef_fully_nested_all,
-    },
-    "jax": {
-        "demean": demean_jax_fn,
-        "collinear": find_collinear_variables_jax,
-        "crv1_meat": crv1_meat_loop_jax,
-        "nonnested": count_fixef_fully_nested_all_jax,
-    },
-    "cupy": {
-        "demean": demean_cupy64,
-        "collinear": find_collinear_variables_cupy,
-        "crv1_meat": crv1_meat_loop_cupy,
-        "nonnested": count_fixef_fully_nested_all_cupy,
-    },
-    "cupy32": {
-        "demean": demean_cupy32,
-        "collinear": find_collinear_variables_cupy,
-        "crv1_meat": crv1_meat_loop_cupy,
-        "nonnested": count_fixef_fully_nested_all_cupy,
-    },
-    "cupy64": {
-        "demean": demean_cupy64,
-        "collinear": find_collinear_variables_cupy,
-        "crv1_meat": crv1_meat_loop_cupy,
-        "nonnested": count_fixef_fully_nested_all_cupy,
-    },
-    "scipy": {
-        "demean": demean_scipy,
-        "collinear": find_collinear_variables_cupy,
-        "crv1_meat": crv1_meat_loop_cupy,
-        "nonnested": count_fixef_fully_nested_all_cupy,
-    },
-}
diff --git a/pyfixest/estimation/internals/demean_.py b/pyfixest/estimation/internals/demean_.py
index c141ce595..5cae3e046 100644
--- a/pyfixest/estimation/internals/demean_.py
+++ b/pyfixest/estimation/internals/demean_.py
@@ -1,24 +1,38 @@
 from collections.abc import Callable
-from typing import Any
+from dataclasses import dataclass, replace
+from importlib import import_module
+from typing import cast
 
 import numba as nb
 import numpy as np
 import pandas as pd
 
+from pyfixest.core.demean import (
+    WithinPreconditioner,
+    build_within_preconditioner,
+    demean_within,
+)
+from pyfixest.demeaners import LsmrDemeaner, MapDemeaner, WithinDemeaner
+from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
 from pyfixest.estimation.internals.literals import DemeanerBackendOptions
 
 
+@dataclass(slots=True)
+class DemeanedDataCacheEntry:
+    """Cached demeaned data and any reusable within preconditioner."""
+
+    demeaned: pd.DataFrame
+    preconditioner: WithinPreconditioner | None = None
+
+
 def demean_model(
     Y: pd.DataFrame,
     X: pd.DataFrame,
     fe: pd.DataFrame | None,
     weights: np.ndarray | None,
-    lookup_demeaned_data: dict[frozenset[int], Any],
+    lookup_demeaned_data: dict[frozenset[int], DemeanedDataCacheEntry],
     na_index: frozenset[int],
-    fixef_tol: float,
-    fixef_maxiter: int,
-    demean_func: Callable,
-    # demeaner_backend: Literal["numba", "jax", "rust"] = "numba",
+    demeaner: ResolvedDemeaner,
 ) -> tuple[pd.DataFrame, pd.DataFrame]:
     """
     Demean a regression model.
@@ -46,25 +60,18 @@ def demean_model(
     na_index : frozenset[int]
         A frozenset of indices of dropped rows. Used as a hashable cache key
         for demeaned variables.
-    fixef_tol: float
-        The tolerance for the demeaning algorithm.
-    fixef_maxiter: int
-         The maximum number of iterations for the demeaning algorithm.
-    demeaner_backend: DemeanerBackendOptions, optional
-        The backend to use for demeaning. Can be either "numba", "jax", or "rust".
-        Defaults to "numba".
-
+    demeaner : ResolvedDemeaner
+        Resolved typed demeaner configuration. Backend-specific runtime options
+        are taken from this object.
 
     Returns
     -------
-    tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]]
+    tuple[pd.DataFrame, pd.DataFrame]
         A tuple of the following elements:
         - Yd : pd.DataFrame
             A DataFrame of the demeaned dependent variable.
         - Xd : pd.DataFrame
             A DataFrame of the demeaned covariates.
-        - Id : pd.DataFrame or None
-            A DataFrame of the demeaned Instruments. None if no IV.
     """
     YX = pd.concat([Y, X], axis=1)
 
@@ -74,47 +81,47 @@ def demean_model(
     if YX_array.dtype != np.dtype("float64"):
         YX_array = YX_array.astype(np.float64)
 
-    if weights is not None and weights.ndim > 1:
-        weights = weights.flatten()
+    if weights is None:
+        weights_array = np.ones(YX_array.shape[0], dtype=np.float64)
+    elif weights.ndim > 1:
+        weights_array = weights.flatten()
+    else:
+        weights_array = weights
 
     if fe is not None:
+        YX_demeaned: pd.DataFrame
         fe_array = fe.to_numpy()
         # check if looked dict has data for na_index
         if lookup_demeaned_data.get(na_index) is not None:
-            # get data out of lookup table: list of [algo, data]
-            value = lookup_demeaned_data.get(na_index)
-            if value is not None:
-                try:
-                    _, YX_demeaned_old = value
-                except ValueError:
-                    print("Error: Expected the value to be iterable with two elements.")
-            else:
-                pass
+            cache_entry = lookup_demeaned_data[na_index]
+            YX_demeaned_old = cache_entry.demeaned
 
             # get not yet demeaned covariates
             var_diff_names = list(set(yx_names) - set(YX_demeaned_old.columns))
 
             # if some variables still need to be demeaned
             if var_diff_names:
-                # var_diff_names = var_diff_names
-
                 yx_names_list = list(yx_names)
                 var_diff_index = [yx_names_list.index(item) for item in var_diff_names]
-                # var_diff_index = list(yx_names).index(var_diff_names)
                 var_diff = YX_array[:, var_diff_index]
                 if var_diff.ndim == 1:
                     var_diff = var_diff.reshape(len(var_diff), 1)
 
-                YX_demean_new, success = demean_func(
+                effective_demeaner, _ = _prepare_within_preconditioner(
+                    flist=fe_array,
+                    weights=weights_array,
+                    demeaner=demeaner,
+                    preconditioner=cache_entry.preconditioner,
+                )
+                YX_demean_new, success = dispatch_demean(
                     x=var_diff,
-                    flist=fe_array.astype(np.uintp),
-                    weights=weights,
-                    tol=fixef_tol,
-                    maxiter=fixef_maxiter,
+                    flist=fe_array,
+                    weights=weights_array,
+                    demeaner=effective_demeaner,
                 )
                 if success is False:
                     raise ValueError(
-                        f"Demeaning failed after {fixef_maxiter} iterations."
+                        f"Demeaning failed after {demeaner.fixef_maxiter} iterations."
                     )
 
                 YX_demeaned = pd.DataFrame(
@@ -134,20 +141,33 @@ def demean_model(
                 YX_demeaned = YX_demeaned_old[yx_names]
 
         else:
-            YX_demeaned, success = demean_func(
+            effective_demeaner, preconditioner = _prepare_within_preconditioner(
+                flist=fe_array,
+                weights=weights_array,
+                demeaner=demeaner,
+            )
+            YX_demeaned_array, success = dispatch_demean(
                 x=YX_array,
-                flist=fe_array.astype(np.uintp),
-                weights=weights,
-                tol=fixef_tol,
-                maxiter=fixef_maxiter,
+                flist=fe_array,
+                weights=weights_array,
+                demeaner=effective_demeaner,
             )
             if success is False:
-                raise ValueError(f"Demeaning failed after {fixef_maxiter} iterations.")
+                raise ValueError(
+                    f"Demeaning failed after {demeaner.fixef_maxiter} iterations."
+                )
 
-            YX_demeaned = pd.DataFrame(YX_demeaned)
+            YX_demeaned = pd.DataFrame(YX_demeaned_array)
             YX_demeaned.columns = yx_names
+            lookup_demeaned_data[na_index] = DemeanedDataCacheEntry(
+                demeaned=YX_demeaned,
+                preconditioner=preconditioner,
+            )
 
-        lookup_demeaned_data[na_index] = [None, YX_demeaned]
+        if na_index not in lookup_demeaned_data:
+            lookup_demeaned_data[na_index] = DemeanedDataCacheEntry(
+                demeaned=YX_demeaned
+            )
 
     else:
         # nothing to demean here
@@ -163,6 +183,135 @@ def demean_model(
     return Yd, Xd
 
 
+def _prepare_within_preconditioner(
+    flist: np.ndarray,
+    weights: np.ndarray,
+    demeaner: ResolvedDemeaner,
+    preconditioner: WithinPreconditioner | None = None,
+    *,
+    refresh_preconditioner: bool = False,
+) -> tuple[ResolvedDemeaner, WithinPreconditioner | None]:
+    """
+    Prepare the effective demeaner and reusable within preconditioner.
+
+    Only `WithinDemeaner` participates in preconditioner reuse. Other demeaners
+    are returned unchanged with no reusable preconditioner.
+    """
+    if not isinstance(demeaner, WithinDemeaner):
+        return demeaner, None
+    flist_uint32 = flist.astype(np.uint32, copy=False)
+    if flist_uint32.ndim == 1 or flist_uint32.shape[1] <= 1:
+        return demeaner, None
+
+    if demeaner.preconditioner is not None and not refresh_preconditioner:
+        return demeaner, demeaner.preconditioner
+    if preconditioner is not None and not refresh_preconditioner:
+        return replace(demeaner, preconditioner=preconditioner), preconditioner
+
+    built_preconditioner = build_within_preconditioner(
+        flist=flist_uint32,
+        weights=weights,
+        preconditioner_type=demeaner.preconditioner_type,
+    )
+    return replace(demeaner, preconditioner=built_preconditioner), built_preconditioner
+
+
+def _override_demeaner_tol(
+    demeaner: ResolvedDemeaner,
+    *,
+    tol: float | None = None,
+) -> ResolvedDemeaner:
+    """Override FE tolerance on a typed demeaner when needed. Used for IWLS acceleration."""
+    if tol is None or tol == demeaner.fixef_tol:
+        return demeaner
+    return replace(demeaner, fixef_tol=tol)
+
+
+def dispatch_demean(
+    x: np.ndarray,
+    flist: np.ndarray,
+    weights: np.ndarray,
+    demeaner: ResolvedDemeaner,
+) -> tuple[np.ndarray, bool]:
+    """Demean an array using the configured backend for the resolved demeaner."""
+    flist_uint = flist.astype(np.uintp, copy=False)
+
+    if isinstance(demeaner, WithinDemeaner):
+        return demean_within(
+            x=x,
+            flist=flist_uint.astype(np.uint32, copy=False),
+            weights=weights,
+            tol=demeaner.fixef_tol,
+            maxiter=demeaner.fixef_maxiter,
+            krylov_method=demeaner.krylov_method,
+            gmres_restart=demeaner.gmres_restart,
+            preconditioner_type=demeaner.preconditioner_type,
+            preconditioner=demeaner.preconditioner,
+        )
+
+    if isinstance(demeaner, LsmrDemeaner):
+        if demeaner.use_gpu is False:
+            demean_scipy_configured = cast(
+                Callable[..., tuple[np.ndarray, bool]],
+                import_module(
+                    "pyfixest.estimation.cupy.demean_cupy_"
+                ).demean_scipy_configured,
+            )
+            return demean_scipy_configured(
+                x=x,
+                flist=flist_uint,
+                weights=weights,
+                solver_atol=demeaner.solver_atol,
+                solver_btol=demeaner.solver_btol,
+                solver_maxiter=demeaner.solver_maxiter,
+                use_preconditioner=demeaner.use_preconditioner,
+            )
+
+        demean_cupy_configured = cast(
+            Callable[..., tuple[np.ndarray, bool]],
+            import_module(
+                "pyfixest.estimation.cupy.demean_cupy_"
+            ).demean_cupy_configured,
+        )
+        return demean_cupy_configured(
+            x=x,
+            flist=flist_uint,
+            weights=weights,
+            use_gpu=demeaner.use_gpu,
+            solver_atol=demeaner.solver_atol,
+            solver_btol=demeaner.solver_btol,
+            solver_maxiter=demeaner.solver_maxiter,
+            warn_on_cpu_fallback=demeaner.warn_on_cpu_fallback,
+            dtype=np.float32 if demeaner.precision == "float32" else np.float64,
+            use_preconditioner=demeaner.use_preconditioner,
+        )
+
+    if isinstance(demeaner, MapDemeaner):
+        backend = demeaner.backend
+        if backend == "numba":
+            demean_func = demean
+        elif backend == "rust":
+            from pyfixest.core.demean import demean as demean_rs
+
+            demean_func = demean_rs
+        elif backend == "jax":
+            from pyfixest.estimation.jax.demean_jax_ import demean_jax
+
+            demean_func = demean_jax
+        else:
+            raise ValueError(f"Unknown MapDemeaner backend: {backend!r}")
+
+        return demean_func(
+            x=x,
+            flist=flist_uint,
+            weights=weights,
+            tol=demeaner.fixef_tol,
+            maxiter=demeaner.fixef_maxiter,
+        )
+
+    raise TypeError(f"Unsupported demeaner type: {type(demeaner)!r}")
+
+
 @nb.njit
 def _sad_converged(a: np.ndarray, b: np.ndarray, tol: float) -> bool:
     for i in range(a.size):
@@ -322,6 +471,8 @@ def demean(
     return (res, success)
 
 
+# Legacy: used by the old demeaner_backend= string API.
+# Remove once all callers use the typed demeaner= API.
 def _set_demeaner_backend(
     demeaner_backend: DemeanerBackendOptions,
 ) -> Callable:
diff --git a/pyfixest/estimation/internals/demeaner_options.py b/pyfixest/estimation/internals/demeaner_options.py
new file mode 100644
index 000000000..d4656c231
--- /dev/null
+++ b/pyfixest/estimation/internals/demeaner_options.py
@@ -0,0 +1,117 @@
+from __future__ import annotations
+
+from typing import Literal, TypeAlias, cast
+
+from pyfixest.demeaners import (
+    AnyDemeaner,
+    BaseDemeaner,
+    LsmrDemeaner,
+    MapDemeaner,
+    WithinDemeaner,
+)
+from pyfixest.estimation.internals.literals import DemeanerBackendOptions
+
+ResolvedDemeaner: TypeAlias = AnyDemeaner
+
+_DEMEANER_TYPES = (
+    MapDemeaner,
+    WithinDemeaner,
+    LsmrDemeaner,
+)
+
+
+def normalize_demeaner_backend(
+    demeaner_backend: DemeanerBackendOptions,
+) -> DemeanerBackendOptions:
+    """Map legacy backend aliases to their canonical demeaner backend."""
+    if demeaner_backend == "rust-cg":
+        return "within"
+    if demeaner_backend == "cupy64":
+        return "cupy"
+    return demeaner_backend
+
+
+def resolve_demeaner(
+    demeaner: BaseDemeaner | None,
+    demeaner_backend: DemeanerBackendOptions,
+    fixef_tol: float,
+    fixef_maxiter: int,
+) -> ResolvedDemeaner:
+    """Resolve user input into a fully specified demeaner object.
+
+    If a typed ``demeaner`` is supplied, it takes precedence over the legacy
+    ``demeaner_backend`` shorthand. The backend argument is only used when no
+    typed demeaner configuration is provided.
+    """
+    backend = normalize_demeaner_backend(demeaner_backend)
+
+    if demeaner is not None:
+        if not isinstance(demeaner, _DEMEANER_TYPES):
+            raise TypeError("`demeaner` must be a supported pyfixest demeaner.")
+        return cast(ResolvedDemeaner, demeaner)
+
+    shorthand = _from_backend_shorthand(
+        demeaner_backend=backend,
+        fixef_tol=fixef_tol,
+        fixef_maxiter=fixef_maxiter,
+    )
+    return cast(ResolvedDemeaner, shorthand)
+
+
+def get_demeaner_backend(demeaner: ResolvedDemeaner) -> DemeanerBackendOptions:
+    """Return the backend key associated with a resolved demeaner."""
+    if isinstance(demeaner, MapDemeaner):
+        return cast(
+            Literal["numba", "rust", "jax"],
+            demeaner.backend,
+        )
+    if isinstance(demeaner, LsmrDemeaner):
+        if demeaner.use_gpu is False:
+            return "scipy"
+        return "cupy32" if demeaner.precision == "float32" else "cupy"
+    return "within"
+
+
+def _from_backend_shorthand(
+    demeaner_backend: DemeanerBackendOptions,
+    fixef_tol: float,
+    fixef_maxiter: int,
+) -> ResolvedDemeaner:
+    """Construct a demeaner object from a legacy backend shorthand."""
+    if demeaner_backend in {"numba", "rust", "jax"}:
+        return MapDemeaner(
+            backend=cast(Literal["numba", "rust", "jax"], demeaner_backend),
+            fixef_tol=fixef_tol,
+            fixef_maxiter=fixef_maxiter,
+        )
+    if demeaner_backend == "within":
+        return WithinDemeaner(fixef_tol=fixef_tol, fixef_maxiter=fixef_maxiter)
+    if demeaner_backend == "cupy":
+        return LsmrDemeaner(
+            fixef_tol=fixef_tol,
+            fixef_maxiter=fixef_maxiter,
+            precision="float64",
+            use_gpu=True,
+        )
+    if demeaner_backend == "cupy32":
+        return LsmrDemeaner(
+            fixef_tol=fixef_tol,
+            fixef_maxiter=fixef_maxiter,
+            precision="float32",
+            use_gpu=True,
+        )
+    if demeaner_backend == "scipy":
+        return LsmrDemeaner(
+            fixef_tol=fixef_tol,
+            fixef_maxiter=fixef_maxiter,
+            use_gpu=False,
+        )
+    raise ValueError(f"Unknown demeaner backend {demeaner_backend!r}.")
+
+
+__all__ = [
+    "ResolvedDemeaner",
+    "get_demeaner_backend",
+    "normalize_demeaner_backend",
+    "resolve_demeaner",
+]
diff --git a/pyfixest/estimation/internals/literals.py b/pyfixest/estimation/internals/literals.py
index ab8e7e067..5d27a209e 100644
--- a/pyfixest/estimation/internals/literals.py
+++ b/pyfixest/estimation/internals/literals.py
@@ -12,7 +12,15 @@
     "jax",
 ]
 DemeanerBackendOptions = Literal[
-    "numba", "jax", "rust", "rust-cg", "cupy", "cupy32", "cupy64", "scipy"
+    "numba",
+    "jax",
+    "rust",
+    "rust-cg",
+    "within",
+    "cupy",
+    "cupy32",
+    "cupy64",
+    "scipy",
 ]
 PredictionErrorOptions = Literal["prediction"]
 QuantregMethodOptions = Literal["fn", "pfn"]
diff --git a/pyfixest/estimation/models/fegaussian_.py b/pyfixest/estimation/models/fegaussian_.py
index 35b9054de..936edc07c 100644
--- a/pyfixest/estimation/models/fegaussian_.py
+++ b/pyfixest/estimation/models/fegaussian_.py
@@ -5,7 +5,8 @@
 import pandas as pd
 
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
-from pyfixest.estimation.internals.literals import DemeanerBackendOptions
+from pyfixest.estimation.internals.demean_ import DemeanedDataCacheEntry
+from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
 from pyfixest.estimation.models.feglm_ import Feglm
 
 
@@ -24,7 +25,7 @@ def __init__(
         collin_tol: float,
         fixef_tol: float,
         fixef_maxiter: int,
-        lookup_demeaned_data: dict[frozenset[int], pd.DataFrame],
+        lookup_demeaned_data: dict[frozenset[int], DemeanedDataCacheEntry],
         tol: float,
         maxiter: int,
         solver: Literal[
@@ -34,6 +35,7 @@ def __init__(
             "scipy.sparse.linalg.lsqr",
             "jax",
         ],
+        demeaner: ResolvedDemeaner | None = None,
         store_data: bool = True,
         copy_data: bool = True,
         lean: bool = False,
@@ -41,7 +43,6 @@ def __init__(
         sample_split_value: str | int | None = None,
         separation_check: list[str] | None = None,
         context: int | Mapping[str, Any] = 0,
-        demeaner_backend: DemeanerBackendOptions = "numba",
         accelerate: bool = True,
     ):
         super().__init__(
@@ -59,6 +60,7 @@ def __init__(
             tol=tol,
             maxiter=maxiter,
             solver=solver,
+            demeaner=demeaner,
             store_data=store_data,
             copy_data=copy_data,
             lean=lean,
@@ -66,7 +68,6 @@ def __init__(
             sample_split_value=sample_split_value,
             separation_check=separation_check,
             context=context,
-            demeaner_backend=demeaner_backend,
             accelerate=accelerate,
         )
 
diff --git a/pyfixest/estimation/models/feglm_.py b/pyfixest/estimation/models/feglm_.py
index 49d52c102..038e7d6ef 100644
--- a/pyfixest/estimation/models/feglm_.py
+++ b/pyfixest/estimation/models/feglm_.py
@@ -5,12 +5,18 @@
 import numpy as np
 import pandas as pd
 
+from pyfixest.core.demean import WithinPreconditioner
 from pyfixest.errors import (
     NonConvergenceError,
 )
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
-from pyfixest.estimation.internals.backends import BACKENDS
-from pyfixest.estimation.internals.literals import DemeanerBackendOptions
+from pyfixest.estimation.internals.demean_ import (
+    DemeanedDataCacheEntry,
+    _override_demeaner_tol,
+    _prepare_within_preconditioner,
+    dispatch_demean,
+)
+from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
 from pyfixest.estimation.internals.solvers import solve_ols
 from pyfixest.estimation.models.feols_ import (
     Feols,
@@ -37,7 +43,7 @@ def __init__(
         collin_tol: float,
         fixef_tol: float,
         fixef_maxiter: int,
-        lookup_demeaned_data: dict[frozenset[int], pd.DataFrame],
+        lookup_demeaned_data: dict[frozenset[int], DemeanedDataCacheEntry],
         tol: float,
         maxiter: int,
         solver: Literal[
@@ -47,7 +53,7 @@ def __init__(
             "scipy.sparse.linalg.lsqr",
             "jax",
         ],
-        demeaner_backend: DemeanerBackendOptions = "numba",
+        demeaner: ResolvedDemeaner | None = None,
         store_data: bool = True,
         copy_data: bool = True,
         lean: bool = False,
@@ -76,6 +82,7 @@ def __init__(
             sample_split_var=sample_split_var,
             sample_split_value=sample_split_value,
             context=context,
+            demeaner=demeaner,
         )
 
         _glm_input_checks(
@@ -90,13 +97,6 @@ def __init__(
         self.separation_check = separation_check
         self._accelerate = accelerate
 
-        self._demeaner_backend = demeaner_backend
-        try:
-            impl = BACKENDS[demeaner_backend]
-        except KeyError:
-            raise ValueError(f"Unknown demeaner backend {demeaner_backend!r}")
-        self._demean_func = impl["demean"]
-
         self._support_crv3_inference = True
         self._support_iid_inference = True
         self._support_hac_inference = True
@@ -108,6 +108,8 @@ def __init__(
         self._Xbeta = np.empty(0)
 
         self._method = "feglm"
+        self._iwls_preconditioner: WithinPreconditioner | None = None
+        self._iwls_tightening_refresh_done = False
 
     def prepare_model_matrix(self):
         "Prepare model inputs for estimation."
@@ -178,8 +180,11 @@ def get_fit(self):
         X_tilde_prev = None
         accelerate = self._accelerate and self._fe is not None
         inner_tol = self._fixef_tol
+        self._iwls_preconditioner = None
+        self._iwls_tightening_refresh_done = False
 
         for r in range(self.maxiter):
+            refresh_preconditioner = False
             if r > 0:
                 rel_deviance_change = self._get_relative_deviance_change(
                     deviance, deviance_old
@@ -198,6 +203,9 @@ def get_fit(self):
                 # Adaptive tolerance as in ppmlhdfe
                 if accelerate and rel_deviance_change < 10 * inner_tol:
                     inner_tol = inner_tol / 10
+                    if not self._iwls_tightening_refresh_done:
+                        refresh_preconditioner = True
+                        self._iwls_tightening_refresh_done = True
 
             gprime = self._get_gprime(mu=mu)
             W = self._update_W(mu=mu)
@@ -217,6 +225,7 @@ def get_fit(self):
                 X=X_input,
                 flist=self._fe,
                 weights=W.flatten(),
+                refresh_preconditioner=refresh_preconditioner,
                 tol=inner_tol,
                 maxiter=self._fixef_maxiter,
             )
@@ -229,7 +238,6 @@ def get_fit(self):
                         X_tilde,
                         self._coefnames,
                         self._collin_tol,
-                        backend_func=self._find_collinear_variables_func,
                     )
                 )
                 if self._collin_index:
@@ -408,24 +416,34 @@ def residualize(
         X: np.ndarray,
         flist: np.ndarray,
         weights: np.ndarray,
-        tol: np.ndarray,
+        refresh_preconditioner: bool,
+        tol: float,
         maxiter: int,
     ) -> tuple[np.ndarray, np.ndarray]:
         "Residualize v and X by flist using weights."
         if flist is None:
             return v, X
-        else:
-            vX_tilde, success = self._demean_func(
-                x=np.c_[v, X],
-                flist=flist.astype(np.uintp),
-                weights=weights,
-                tol=tol,
-                maxiter=maxiter,
-            )
-            if success is False:
-                raise ValueError(f"Demeaning failed after {maxiter} iterations.")
-            else:
-                return vX_tilde[:, 0], vX_tilde[:, 1:]
+
+        iwls_weights = weights.flatten()
+        effective_demeaner, self._iwls_preconditioner = _prepare_within_preconditioner(
+            flist=flist,
+            weights=iwls_weights,
+            demeaner=self._demeaner,
+            preconditioner=self._iwls_preconditioner,
+            refresh_preconditioner=refresh_preconditioner,
+        )
+        effective_demeaner = _override_demeaner_tol(effective_demeaner, tol=tol)
+
+        vX_tilde, success = dispatch_demean(
+            x=np.c_[v, X],
+            flist=flist,
+            weights=iwls_weights,
+            demeaner=effective_demeaner,
+        )
+
+        if success is False:
+            raise ValueError(f"Demeaning failed after {maxiter} iterations.")
+        return vX_tilde[:, 0], vX_tilde[:, 1:]
 
     def _check_convergence(
         self,
diff --git a/pyfixest/estimation/models/feiv_.py b/pyfixest/estimation/models/feiv_.py
index 475494a38..c06f2a035 100644
--- a/pyfixest/estimation/models/feiv_.py
+++ b/pyfixest/estimation/models/feiv_.py
@@ -7,8 +7,8 @@
 import pandas as pd
 
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
-from pyfixest.estimation.internals.demean_ import demean_model
-from pyfixest.estimation.internals.literals import DemeanerBackendOptions
+from pyfixest.estimation.internals.demean_ import DemeanedDataCacheEntry, demean_model
+from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
 from pyfixest.estimation.internals.solvers import solve_ols
 from pyfixest.estimation.models.feols_ import Feols, _drop_multicollinear_variables
 
@@ -44,9 +44,9 @@ class Feiv(Feols):
     solver: Literal["np.linalg.lstsq", "np.linalg.solve", "scipy.linalg.solve",
         "scipy.sparse.linalg.lsqr", "jax"],
         default is "scipy.linalg.solve". Solver to use for the estimation.
-    demeaner_backend: DemeanerBackendOptions, optional
-        The backend to use for demeaning. Can be either "numba", "jax", or "rust".
-        Defaults to "numba".
+    demeaner : Optional[ResolvedDemeaner]
+        Resolved typed demeaner configuration. If provided, it determines the
+        backend and the fixed-effects iteration controls used by the model.
     weights_name : Optional[str]
         Name of the weights variable.
     weights_type : Optional[str]
@@ -146,7 +146,7 @@ def __init__(
         collin_tol: float,
         fixef_tol: float,
         fixef_maxiter: int,
-        lookup_demeaned_data: dict[frozenset[int], pd.DataFrame],
+        lookup_demeaned_data: dict[frozenset[int], DemeanedDataCacheEntry],
         solver: Literal[
             "np.linalg.lstsq",
             "np.linalg.solve",
@@ -154,7 +154,7 @@ def __init__(
             "scipy.sparse.linalg.lsqr",
             "jax",
         ] = "scipy.linalg.solve",
-        demeaner_backend: DemeanerBackendOptions = "numba",
+        demeaner: ResolvedDemeaner | None = None,
         store_data: bool = True,
         copy_data: bool = True,
         lean: bool = False,
@@ -181,7 +181,7 @@ def __init__(
             sample_split_var=sample_split_var,
             sample_split_value=sample_split_value,
             context=context,
-            demeaner_backend=demeaner_backend,
+            demeaner=demeaner,
         )
 
         self._is_iv = True
@@ -215,9 +215,7 @@ def demean(self) -> None:
                 self._weights.flatten(),
                 self._lookup_demeaned_data,
                 self._na_index,
-                self._fixef_tol,
-                self._fixef_maxiter,
-                self._demean_func,
+                self._demeaner,
             )
         else:
             self._endogvard = self._endogvar
@@ -235,7 +233,6 @@ def drop_multicol_vars(self) -> None:
             self._Z,
             self._coefnames_z,
             self._collin_tol,
-            self._find_collinear_variables_func,
         )
 
     def get_fit(self) -> None:
diff --git a/pyfixest/estimation/models/felogit_.py b/pyfixest/estimation/models/felogit_.py
index 5f443db44..4c4ade930 100644
--- a/pyfixest/estimation/models/felogit_.py
+++ b/pyfixest/estimation/models/felogit_.py
@@ -5,7 +5,8 @@
 import pandas as pd
 
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
-from pyfixest.estimation.internals.literals import DemeanerBackendOptions
+from pyfixest.estimation.internals.demean_ import DemeanedDataCacheEntry
+from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
 from pyfixest.estimation.models.feglm_ import Feglm
 
 
@@ -24,7 +25,7 @@ def __init__(
         collin_tol: float,
         fixef_tol: float,
         fixef_maxiter: int,
-        lookup_demeaned_data: dict[frozenset[int], pd.DataFrame],
+        lookup_demeaned_data: dict[frozenset[int], DemeanedDataCacheEntry],
         tol: float,
         maxiter: int,
         solver: Literal[
@@ -34,7 +35,7 @@ def __init__(
             "scipy.sparse.linalg.lsqr",
             "jax",
         ],
-        demeaner_backend: DemeanerBackendOptions = "numba",
+        demeaner: ResolvedDemeaner | None = None,
         store_data: bool = True,
         copy_data: bool = True,
         lean: bool = False,
@@ -59,7 +60,7 @@ def __init__(
             tol=tol,
             maxiter=maxiter,
             solver=solver,
-            demeaner_backend=demeaner_backend,
+            demeaner=demeaner,
             store_data=store_data,
             copy_data=copy_data,
             lean=lean,
diff --git a/pyfixest/estimation/models/feols_.py b/pyfixest/estimation/models/feols_.py
index 82532a7ed..be12e9f3f 100644
--- a/pyfixest/estimation/models/feols_.py
+++ b/pyfixest/estimation/models/feols_.py
@@ -1,6 +1,6 @@
 import re
 import warnings
-from collections.abc import Callable, Mapping
+from collections.abc import Mapping
 from importlib import import_module
 from typing import Any, Literal, cast
 
@@ -11,14 +11,18 @@
 from scipy.sparse.linalg import lsqr
 from scipy.stats import chi2, f, t
 
+from pyfixest.core.collinear import find_collinear_variables
+from pyfixest.core.crv1 import crv1_meat_loop
+from pyfixest.core.demean import WithinPreconditioner
+from pyfixest.core.nested_fixed_effects import count_fixef_fully_nested_all
+from pyfixest.demeaners import MapDemeaner
 from pyfixest.errors import VcovTypeNotSupportedError
 from pyfixest.estimation.api.utils import _ALL_SAMPLE, _AllSampleSentinel
 from pyfixest.estimation.formula import model_matrix as model_matrix_fixest
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
-from pyfixest.estimation.internals.backends import BACKENDS
-from pyfixest.estimation.internals.demean_ import demean_model
+from pyfixest.estimation.internals.demean_ import DemeanedDataCacheEntry, demean_model
+from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
 from pyfixest.estimation.internals.literals import (
-    DemeanerBackendOptions,
     PredictionErrorOptions,
     PredictionType,
     SolverOptions,
@@ -98,6 +102,9 @@ class Feols(ResultAccessorMixin):
         The solver to use for the regression. Can be "np.linalg.lstsq",
         "np.linalg.solve", "scipy.linalg.solve", "scipy.sparse.linalg.lsqr" and "jax".
         Defaults to "scipy.linalg.solve".
+    demeaner : Optional[ResolvedDemeaner]
+        Resolved typed demeaner configuration. If provided, it determines the
+        backend and the fixed-effects iteration controls used by the model.
     context : int or Mapping[str, Any]
         A dictionary containing additional context variables to be used by
         formulaic during the creation of the model matrix. This can include
@@ -214,7 +221,6 @@ class Feols(ResultAccessorMixin):
     _solver: Literal["np.linalg.lstsq", "np.linalg.solve", "scipy.linalg.solve",
         "scipy.sparse.linalg.lsqr", "jax"],
         default is "scipy.linalg.solve". Solver to use for the estimation.
-    _demeaner_backend: DemeanerBackendOptions
     _data: pd.DataFrame
         The data frame used in the estimation. None if arguments `lean = True` or
         `store_data = False`.
@@ -254,9 +260,9 @@ def __init__(
         collin_tol: float,
         fixef_tol: float,
         fixef_maxiter: int,
-        lookup_demeaned_data: dict[frozenset[int], pd.DataFrame],
+        lookup_demeaned_data: dict[frozenset[int], DemeanedDataCacheEntry],
         solver: SolverOptions = "np.linalg.solve",
-        demeaner_backend: DemeanerBackendOptions = "numba",
+        demeaner: ResolvedDemeaner | None = None,
         store_data: bool = True,
         copy_data: bool = True,
         lean: bool = False,
@@ -296,8 +302,14 @@ def __init__(
         self._fixef_tol = fixef_tol
         self._fixef_maxiter = fixef_maxiter
         self._solver = solver
-        self._demeaner_backend = demeaner_backend
+        if demeaner is None:
+            demeaner = MapDemeaner(
+                fixef_tol=fixef_tol,
+                fixef_maxiter=fixef_maxiter,
+            )
+        self._demeaner = demeaner
         self._lookup_demeaned_data = lookup_demeaned_data
+        self.preconditioner_: WithinPreconditioner | None = None
         self._store_data = store_data
         self._copy_data = copy_data
         self._lean = lean
@@ -322,15 +334,9 @@ def __init__(
         # self._coefnames = None
         self._icovars = None
 
-        try:
-            impl = BACKENDS[demeaner_backend]
-        except KeyError:
-            raise ValueError(f"Unknown backend {demeaner_backend!r}")
-
-        self._demean_func = impl["demean"]
-        self._find_collinear_variables_func = impl["collinear"]
-        self._crv1_meat_func = impl["crv1_meat"]
-        self._count_nested_fixef_func = impl["nonnested"]
+        # Removed: BACKENDS dict lookup. Collinearity, CRV1, and nested FE
+        # detection always use the Rust implementations. Demeaning backend
+        # is resolved inside dispatch_demean from the typed demeaner object.
 
         # set in get_fit()
         self._tZX = np.array([])
@@ -497,13 +503,15 @@ def demean(self):
                 self._weights.flatten(),
                 self._lookup_demeaned_data,
                 self._na_index,
-                self._fixef_tol,
-                self._fixef_maxiter,
-                self._demean_func,
-                # self._demeaner_backend,
+                self._demeaner,
+            )
+            cache_entry = self._lookup_demeaned_data.get(self._na_index)
+            self.preconditioner_ = (
+                cache_entry.preconditioner if cache_entry is not None else None
             )
         else:
             self._Yd, self._Xd = self._Y, self._X
+            self.preconditioner_ = None
 
     def to_array(self):
         "Convert estimation data frames to np arrays."
@@ -532,7 +540,6 @@ def drop_multicol_vars(self):
                 self._X,
                 self._coefnames,
                 self._collin_tol,
-                backend_func=self._find_collinear_variables_func,
             )
         # update X_is_empty
         self._X_is_empty = self._X.shape[1] == 0
@@ -749,7 +756,7 @@ def vcov(
             k_fe_nested = 0
             n_fe_fully_nested = 0
             if self._fixef is not None and self._ssc_dict["k_fixef"] == "nonnested":
-                k_fe_nested_flag, n_fe_fully_nested = self._count_nested_fixef_func(
+                k_fe_nested_flag, n_fe_fully_nested = count_fixef_fully_nested_all(
                     all_fixef_array=np.array(
                         self._fixef.replace("^", "_").split("+"), dtype=str
                     ),
@@ -942,7 +949,7 @@ def _vcov_crv1(self, clustid: np.ndarray, cluster_col: np.ndarray):
         k = self._scores.shape[1]
         meat = np.zeros((k, k))
 
-        meat = self._crv1_meat_func(
+        meat = crv1_meat_loop(
             scores=self._scores.astype(np.float64),
             clustid=clustid.astype(np.uintp),
             cluster_col=cluster_col.astype(np.uintp),
@@ -2408,7 +2415,6 @@ def _drop_multicollinear_variables(
     X: np.ndarray,
     names: list[str],
     collin_tol: float,
-    backend_func: Callable,
 ) -> tuple[np.ndarray, list[str], list[str], list[int]]:
     """
     Check for multicollinearity in the design matrices X and Z.
@@ -2421,8 +2427,6 @@ def _drop_multicollinear_variables(
         The names of the coefficients.
     collin_tol : float
         The tolerance level for the multicollinearity check.
-    backend_func: Callable
-        Which backend function to use for the multicollinearity check.
 
     Returns
     -------
@@ -2438,7 +2442,7 @@ def _drop_multicollinear_variables(
     # TODO: avoid doing this computation twice, e.g. compute tXXinv here as fixest does
 
     tXX = X.T @ X
-    id_excl, n_excl, all_removed = backend_func(tXX, collin_tol)
+    id_excl, n_excl, all_removed = find_collinear_variables(tXX, collin_tol)
 
     collin_vars = []
     collin_index = []
diff --git a/pyfixest/estimation/models/feols_compressed_.py b/pyfixest/estimation/models/feols_compressed_.py
index 845088ac8..fdc674483 100644
--- a/pyfixest/estimation/models/feols_compressed_.py
+++ b/pyfixest/estimation/models/feols_compressed_.py
@@ -9,8 +9,9 @@
 from tqdm import tqdm
 
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
+from pyfixest.estimation.internals.demean_ import DemeanedDataCacheEntry
+from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
 from pyfixest.estimation.internals.literals import (
-    DemeanerBackendOptions,
     SolverOptions,
 )
 from pyfixest.estimation.models.feols_ import (
@@ -60,10 +61,14 @@ class FeolsCompressed(Feols):
         The tolerance level for the fixed effects.
     fixef_maxiter : int
          The maximum iterations for the demeaning algorithm.
-    lookup_demeaned_data : dict[str, pd.DataFrame]
+    lookup_demeaned_data : dict[str, DemeanedDataCacheEntry]
         The lookup table for demeaned data.
     solver : SolverOptions
         The solver to use.
+    demeaner : Optional[ResolvedDemeaner]
+        Resolved typed demeaner configuration. If provided, it determines the
+        backend and the fixed-effects iteration controls used by the model. Not
+        relevant for the "compression" estimator.
     store_data : bool
         Whether to store the data.
     copy_data : bool
@@ -95,9 +100,9 @@ def __init__(
         collin_tol: float,
         fixef_tol: float,
         fixef_maxiter: int,
-        lookup_demeaned_data: dict[frozenset[int], pd.DataFrame],
+        lookup_demeaned_data: dict[frozenset[int], DemeanedDataCacheEntry],
         solver: SolverOptions = "np.linalg.solve",
-        demeaner_backend: DemeanerBackendOptions = "numba",
+        demeaner: ResolvedDemeaner | None = None,
         store_data: bool = True,
         copy_data: bool = True,
         lean: bool = False,
@@ -120,7 +125,7 @@ def __init__(
             fixef_maxiter,
             lookup_demeaned_data,
             solver,
-            demeaner_backend,
+            demeaner,
             store_data,
             copy_data,
             lean,
diff --git a/pyfixest/estimation/models/fepois_.py b/pyfixest/estimation/models/fepois_.py
index be6f1545d..1e37ebc41 100644
--- a/pyfixest/estimation/models/fepois_.py
+++ b/pyfixest/estimation/models/fepois_.py
@@ -8,12 +8,19 @@
 import pandas as pd
 from scipy.special import gammaln
 
+from pyfixest.core.demean import WithinPreconditioner
 from pyfixest.errors import (
     NonConvergenceError,
 )
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
+from pyfixest.estimation.internals.demean_ import (
+    DemeanedDataCacheEntry,
+    _override_demeaner_tol,
+    _prepare_within_preconditioner,
+    dispatch_demean,
+)
+from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
 from pyfixest.estimation.internals.literals import (
-    DemeanerBackendOptions,
     SolverOptions,
 )
 from pyfixest.estimation.internals.solvers import solve_ols
@@ -64,8 +71,9 @@ class Fepois(Feols):
         The solver to use for the regression. Can be "np.linalg.lstsq",
         "np.linalg.solve", "scipy.linalg.solve", "scipy.sparse.linalg.lsqr" and "jax".
         Defaults to "scipy.linalg.solve".
-    demeaner_backend: DemeanerBackendOptions.
-        The backend used for demeaning.
+    demeaner : Optional[ResolvedDemeaner]
+        Resolved typed demeaner configuration. If provided, it determines the
+        backend and the fixed-effects iteration controls used by the model.
     fixef_tol: float, default = 1e-06.
         Tolerance level for the convergence of the demeaning algorithm.
     context : int or Mapping[str, Any]
@@ -94,11 +102,11 @@ def __init__(
         collin_tol: float,
         fixef_tol: float,
         fixef_maxiter: int,
-        lookup_demeaned_data: dict[frozenset[int], pd.DataFrame],
+        lookup_demeaned_data: dict[frozenset[int], DemeanedDataCacheEntry],
         tol: float,
         maxiter: int,
         solver: SolverOptions = "np.linalg.solve",
-        demeaner_backend: DemeanerBackendOptions = "numba",
+        demeaner: ResolvedDemeaner | None = None,
         context: int | Mapping[str, Any] = 0,
         store_data: bool = True,
         copy_data: bool = True,
@@ -126,7 +134,7 @@ def __init__(
             sample_split_var=sample_split_var,
             sample_split_value=sample_split_value,
             context=context,
-            demeaner_backend=demeaner_backend,
+            demeaner=demeaner,
         )
 
         # input checks
@@ -150,6 +158,7 @@ def __init__(
         self._Y_hat_response = np.array([])
         self.deviance = None
         self._Xbeta = np.array([])
+        self._iwls_preconditioner: WithinPreconditioner | None = None
 
     def prepare_model_matrix(self):
         "Prepare model inputs for estimation."
@@ -268,11 +277,15 @@ def get_fit(self) -> None:
             algorithm).
         """
         self.to_array()
+        self._iwls_preconditioner = None
+        inner_tol = self._fixef_tol
+        tightening_refresh_done = False
 
         stop_iterating = False
         crit = 1
 
         for i in range(self.maxiter):
+            refresh_preconditioner = False
             if stop_iterating:
                 self.convergence = True
                 break
@@ -296,27 +309,44 @@ def get_fit(self) -> None:
                 Z = eta + self._Y / mu - 1  # eq (8)
                 reg_Z = Z.copy()  # eq (9)
 
-            # tighten HDFE tolerance - currently not possible with PyHDFE
-            # if crit < 10 * inner_tol:
-            #    inner_tol = inner_tol / 10
+            if crit < 10 * inner_tol:
+                inner_tol = inner_tol / 10
+                if not tightening_refresh_done:
+                    refresh_preconditioner = True
+                    tightening_refresh_done = True
 
             # Step 1: weighted demeaning
             ZX = np.concatenate([reg_Z, self._X], axis=1)
 
             combined_weights = self._weights * mu
+            iwls_weights = combined_weights.flatten()
 
             if self._fe is None:
                 ZX_resid = ZX
             else:
-                ZX_resid, success = self._demean_func(
+                effective_demeaner, self._iwls_preconditioner = (
+                    _prepare_within_preconditioner(
+                        flist=self._fe,
+                        weights=iwls_weights,
+                        demeaner=self._demeaner,
+                        preconditioner=self._iwls_preconditioner,
+                        refresh_preconditioner=refresh_preconditioner,
+                    )
+                )
+                effective_demeaner = _override_demeaner_tol(
+                    effective_demeaner,
+                    tol=inner_tol,
+                )
+                ZX_resid, success = dispatch_demean(
                     x=ZX,
-                    flist=self._fe.astype(np.uintp),
-                    weights=combined_weights.flatten(),
-                    tol=self._fixef_tol,
-                    maxiter=self._fixef_maxiter,
+                    flist=self._fe,
+                    weights=iwls_weights,
+                    demeaner=effective_demeaner,
                 )
                 if success is False:
-                    raise ValueError("Demeaning failed after 100_000 iterations.")
+                    raise ValueError(
+                        f"Demeaning failed after {self._fixef_maxiter} iterations."
+                    )
 
             Z_resid = ZX_resid[:, 0].reshape((self._N, 1))  # z_resid
             X_resid = ZX_resid[:, 1:]  # x_resid
@@ -329,7 +359,6 @@ def get_fit(self) -> None:
                         X_resid,
                         self._coefnames,
                         self._collin_tol,
-                        backend_func=self._find_collinear_variables_func,
                     )
                 )
                 if self._collin_index:
diff --git a/pyfixest/estimation/models/feprobit_.py b/pyfixest/estimation/models/feprobit_.py
index 50bb7df7a..334a90edb 100644
--- a/pyfixest/estimation/models/feprobit_.py
+++ b/pyfixest/estimation/models/feprobit_.py
@@ -7,7 +7,8 @@
 from scipy.stats import norm
 
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
-from pyfixest.estimation.internals.literals import DemeanerBackendOptions
+from pyfixest.estimation.internals.demean_ import DemeanedDataCacheEntry
+from pyfixest.estimation.internals.demeaner_options import ResolvedDemeaner
 from pyfixest.estimation.models.feglm_ import Feglm
 
 
@@ -26,7 +27,7 @@ def __init__(
         collin_tol: float,
         fixef_tol: float,
         fixef_maxiter: int,
-        lookup_demeaned_data: dict[frozenset[int], pd.DataFrame],
+        lookup_demeaned_data: dict[frozenset[int], DemeanedDataCacheEntry],
         tol: float,
         maxiter: int,
         solver: Literal[
@@ -36,7 +37,7 @@ def __init__(
             "scipy.sparse.linalg.lsqr",
             "jax",
         ],
-        demeaner_backend: DemeanerBackendOptions = "numba",
+        demeaner: ResolvedDemeaner | None = None,
         store_data: bool = True,
         copy_data: bool = True,
         lean: bool = False,
@@ -61,7 +62,7 @@ def __init__(
             tol=tol,
             maxiter=maxiter,
             solver=solver,
-            demeaner_backend=demeaner_backend,
+            demeaner=demeaner,
             store_data=store_data,
             copy_data=copy_data,
             lean=lean,
diff --git a/pyfixest/estimation/quantreg/QuantregMulti.py b/pyfixest/estimation/quantreg/QuantregMulti.py
index 35081c60e..e06466a29 100644
--- a/pyfixest/estimation/quantreg/QuantregMulti.py
+++ b/pyfixest/estimation/quantreg/QuantregMulti.py
@@ -8,6 +8,7 @@
 from scipy.stats import norm
 
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
+from pyfixest.estimation.internals.demean_ import DemeanedDataCacheEntry
 from pyfixest.estimation.internals.literals import (
     QuantregMethodOptions,
     QuantregMultiOptions,
@@ -34,7 +35,7 @@ def __init__(
         collin_tol: float,
         fixef_tol: float,
         fixef_maxiter: int,
-        lookup_demeaned_data: dict[frozenset[int], pd.DataFrame],
+        lookup_demeaned_data: dict[frozenset[int], DemeanedDataCacheEntry],
         solver: SolverOptions = "np.linalg.solve",
         demeaner_backend: Literal["numba", "jax"] = "numba",
         store_data: bool = True,
diff --git a/pyfixest/estimation/quantreg/quantreg_.py b/pyfixest/estimation/quantreg/quantreg_.py
index 67cb25e7d..fa8ccab20 100644
--- a/pyfixest/estimation/quantreg/quantreg_.py
+++ b/pyfixest/estimation/quantreg/quantreg_.py
@@ -10,6 +10,8 @@
 from scipy.stats import norm
 
 from pyfixest.estimation.formula.parse import Formula as FixestFormula
+from pyfixest.estimation.internals.demean_ import DemeanedDataCacheEntry
+from pyfixest.estimation.internals.demeaner_options import resolve_demeaner
 from pyfixest.estimation.internals.literals import (
     QuantregMethodOptions,
     SolverOptions,
@@ -37,7 +39,7 @@ def __init__(
         collin_tol: float,
         fixef_tol: float,
         fixef_maxiter: int,
-        lookup_demeaned_data: dict[frozenset[int], pd.DataFrame],
+        lookup_demeaned_data: dict[frozenset[int], DemeanedDataCacheEntry],
         solver: SolverOptions = "np.linalg.solve",
         demeaner_backend: Literal["numba", "jax"] = "numba",
         store_data: bool = True,
@@ -52,6 +54,12 @@ def __init__(
         quantile_maxiter: int | None = None,
         seed: int | None = None,
     ) -> None:
+        demeaner = resolve_demeaner(
+            demeaner=None,
+            demeaner_backend=demeaner_backend,
+            fixef_tol=fixef_tol,
+            fixef_maxiter=fixef_maxiter,
+        )
         super().__init__(
             FixestFormula=FixestFormula,
             data=data,
@@ -71,7 +79,7 @@ def __init__(
             sample_split_var=sample_split_var,
             sample_split_value=sample_split_value,
             context=context,
-            demeaner_backend=demeaner_backend,
+            demeaner=demeaner,
         )
 
         warnings.warn(
diff --git a/src/demean_within.rs b/src/demean_within.rs
index adba07b94..c4376a154 100644
--- a/src/demean_within.rs
+++ b/src/demean_within.rs
@@ -1,6 +1,20 @@
 use ndarray::{Array2, ArrayView1, ArrayView2, ShapeBuilder};
 use numpy::{PyArray2, PyReadonlyArray1, PyReadonlyArray2};
+use pyo3::exceptions::{PyRuntimeError, PyValueError};
 use pyo3::prelude::*;
+use pyo3::types::PyBytes;
+
+#[pyclass(module = "pyfixest.core._core_impl", name = "_WithinPreconditionerHandle")]
+#[derive(Clone)]
+pub struct WithinPreconditionerHandle {
+    inner: within::FePreconditioner,
+}
+
+impl WithinPreconditionerHandle {
+    fn from_inner(inner: within::FePreconditioner) -> Self {
+        Self { inner }
+    }
+}
 
 fn extract_columns(x: &ArrayView2<f64>) -> Vec<Vec<f64>> {
     (0..x.ncols())
@@ -8,46 +22,194 @@ fn extract_columns(x: &ArrayView2<f64>) -> Vec<Vec<f64>> {
         .collect()
 }
 
-fn demean_within_impl(
+fn build_solver_params(
+    tol: f64,
+    maxiter: usize,
+    krylov_method: &str,
+    gmres_restart: usize,
+) -> PyResult<within::SolverParams> {
+    let krylov = match krylov_method {
+        "cg" => within::KrylovMethod::Cg,
+        "gmres" => within::KrylovMethod::Gmres {
+            restart: gmres_restart,
+        },
+        _ => {
+            return Err(PyValueError::new_err(
+                "krylov_method must be either 'cg' or 'gmres'.",
+            ));
+        }
+    };
+
+    Ok(within::SolverParams {
+        tol,
+        maxiter,
+        krylov,
+        ..within::SolverParams::default()
+    })
+}
+
+fn build_preconditioner_config(preconditioner_type: &str) -> PyResult<within::Preconditioner> {
+    match preconditioner_type {
+        "additive" => Ok(within::Preconditioner::Additive(
+            within::LocalSolverConfig::solver_default(),
+            within::ReductionStrategy::Auto,
+        )),
+        "multiplicative" => Ok(within::Preconditioner::Multiplicative(
+            within::LocalSolverConfig::solver_default(),
+        )),
+        _ => Err(PyValueError::new_err(
+            "preconditioner_type must be either 'additive' or 'multiplicative'.",
+        )),
+    }
+}
+
+fn build_preconditioner_solver_params(preconditioner_type: &str) -> within::SolverParams {
+    match preconditioner_type {
+        "multiplicative" => within::SolverParams {
+            krylov: within::KrylovMethod::Gmres { restart: 30 },
+            ..within::SolverParams::default()
+        },
+        _ => within::SolverParams::default(),
+    }
+}
+
+fn build_weighted_design(
+    flist: &ArrayView2<u32>,
+    weights: &ArrayView1<f64>,
+) -> PyResult<within::WeightedDesign<within::FactorMajorStore>> {
+    let n_obs = flist.nrows();
+    let n_factors = flist.ncols();
+    let factor_levels: Vec<Vec<u32>> = (0..n_factors)
+        .map(|factor| flist.column(factor).iter().copied().collect())
+        .collect();
+    let weights_vec: Vec<f64> = weights.iter().copied().collect();
+    let store = within::FactorMajorStore::new(
+        factor_levels,
+        within::ObservationWeights::Dense(weights_vec),
+        n_obs,
+    )
+    .map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
+    within::WeightedDesign::from_store(store).map_err(|e| PyRuntimeError::new_err(e.to_string()))
+}
+
+fn solve_result_to_array(
+    result: within::BatchSolveResult,
+    n_obs: usize,
+    n_rhs: usize,
+) -> (Array2<f64>, bool) {
+    let all_converged = result.converged().iter().all(|&c| c);
+    let out = Array2::from_shape_vec((n_obs, n_rhs).f(), result.demeaned_all().to_vec())
+        .expect("within returns one demeaned column per RHS");
+    (out, all_converged)
+}
+
+fn solve_within_impl(
     x: &ArrayView2<f64>,
     flist: &ArrayView2<u32>,
     weights: &ArrayView1<f64>,
     tol: f64,
     maxiter: usize,
-) -> Result<(Array2<f64>, bool), within::WithinError> {
+    krylov_method: &str,
+    gmres_restart: usize,
+    preconditioner_type: &str,
+    preconditioner_handle: Option<&WithinPreconditionerHandle>,
+) -> PyResult<(Array2<f64>, bool)> {
+    let params = build_solver_params(tol, maxiter, krylov_method, gmres_restart)?;
     let n_obs = x.nrows();
     let n_rhs = x.ncols();
-
     let x_columns = extract_columns(x);
     let x_slices: Vec<&[f64]> = x_columns.iter().map(|col| col.as_slice()).collect();
-    let weights_vec: Vec<f64> = weights.iter().copied().collect();
 
-    let params = within::SolverParams {
-        tol,
-        maxiter,
-        ..within::SolverParams::default()
+    let result = if let Some(handle) = preconditioner_handle {
+        let design = build_weighted_design(flist, weights)?;
+        let solver = within::Solver::from_design_with_preconditioner(
+            design,
+            &params,
+            handle.inner.clone(),
+        )
+        .map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
+        solver
+            .solve_batch(&x_slices)
+            .map_err(|e| PyRuntimeError::new_err(e.to_string()))?
+    } else {
+        let preconditioner = build_preconditioner_config(preconditioner_type)?;
+        let design = build_weighted_design(flist, weights)?;
+        let solver = within::Solver::from_design(design, &params, Some(&preconditioner))
+            .map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
+        solver
+            .solve_batch(&x_slices)
+            .map_err(|e| PyRuntimeError::new_err(e.to_string()))?
     };
-    let preconditioner = within::Preconditioner::Additive(
-        within::LocalSolverConfig::solver_default(),
-        within::ReductionStrategy::Auto,
-    );
-
-    let result = within::solve_batch(
-        flist.view(),
-        &x_slices,
-        Some(&weights_vec),
-        &params,
-        Some(&preconditioner),
-    )?;
 
-    let all_converged = result.converged().iter().all(|&c| c);
-    let out = Array2::from_shape_vec((n_obs, n_rhs).f(), result.demeaned_all().to_vec())
-        .expect("within returns one demeaned column per RHS");
-    Ok((out, all_converged))
+    Ok(solve_result_to_array(result, n_obs, n_rhs))
+}
+
+fn build_preconditioner_impl(
+    flist: &ArrayView2<u32>,
+    weights: &ArrayView1<f64>,
+    preconditioner_type: &str,
+) -> PyResult<WithinPreconditionerHandle> {
+    let preconditioner_config = build_preconditioner_config(preconditioner_type)?;
+    let design = build_weighted_design(flist, weights)?;
+    let params = build_preconditioner_solver_params(preconditioner_type);
+    let solver = within::Solver::from_design(design, &params, Some(&preconditioner_config))
+        .map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
+    let built = solver
+        .preconditioner()
+        .cloned()
+        .ok_or_else(|| PyRuntimeError::new_err("solver did not build a preconditioner"))?;
+    Ok(WithinPreconditionerHandle::from_inner(built))
+}
+
+#[pyfunction]
+#[pyo3(signature = (flist, weights, preconditioner_type="additive"))]
+pub fn _build_within_preconditioner_rs(
+    py: Python<'_>,
+    flist: PyReadonlyArray2<u32>,
+    weights: PyReadonlyArray1<f64>,
+    preconditioner_type: &str,
+) -> PyResult<Py<WithinPreconditionerHandle>> {
+    let flist_arr = flist.as_array();
+    let weights_arr = weights.as_array();
+    let handle =
+        py.detach(|| build_preconditioner_impl(&flist_arr, &weights_arr, preconditioner_type))?;
+    Py::new(py, handle)
+}
+
+#[pyfunction]
+pub fn _serialize_within_preconditioner_rs(
+    py: Python<'_>,
+    preconditioner_handle: Py<WithinPreconditionerHandle>,
+) -> PyResult<Py<PyBytes>> {
+    let handle = preconditioner_handle.bind(py).borrow();
+    let bytes = postcard::to_stdvec(&handle.inner)
+        .map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
+    Ok(PyBytes::new(py, &bytes).unbind())
+}
+
+#[pyfunction]
+pub fn _deserialize_within_preconditioner_rs(
+    py: Python<'_>,
+    data: &[u8],
+) -> PyResult<Py<WithinPreconditionerHandle>> {
+    let inner: within::FePreconditioner = postcard::from_bytes(data).map_err(|e| {
+        PyValueError::new_err(format!("failed to deserialize preconditioner: {}", e))
+    })?;
+    Py::new(py, WithinPreconditionerHandle::from_inner(inner))
 }
 
 #[pyfunction]
-#[pyo3(signature = (x, flist, weights, tol=1e-6, maxiter=1_000))]
+#[pyo3(signature = (
+    x,
+    flist,
+    weights,
+    tol=1e-6,
+    maxiter=1_000,
+    krylov_method="cg",
+    gmres_restart=30,
+    preconditioner_type="additive",
+    preconditioner_handle=None
+))]
 pub fn _demean_within_rs(
     py: Python<'_>,
     x: PyReadonlyArray2<f64>,
@@ -55,14 +217,31 @@ pub fn _demean_within_rs(
     weights: PyReadonlyArray1<f64>,
     tol: f64,
     maxiter: usize,
+    krylov_method: &str,
+    gmres_restart: usize,
+    preconditioner_type: &str,
+    preconditioner_handle: Option<Py<WithinPreconditionerHandle>>,
 ) -> PyResult<(Py<PyArray2<f64>>, bool)> {
     let x_arr = x.as_array();
     let flist_arr = flist.as_array();
     let weights_arr = weights.as_array();
+    let preconditioner_handle = preconditioner_handle
+        .as_ref()
+        .map(|handle| handle.bind(py).borrow().clone());
 
-    let (out, success) = py
-        .detach(|| demean_within_impl(&x_arr, &flist_arr, &weights_arr, tol, maxiter))
-        .map_err(|e| pyo3::exceptions::PyRuntimeError::new_err(e.to_string()))?;
+    let (out, success) = py.detach(|| {
+        solve_within_impl(
+            &x_arr,
+            &flist_arr,
+            &weights_arr,
+            tol,
+            maxiter,
+            krylov_method,
+            gmres_restart,
+            preconditioner_type,
+            preconditioner_handle.as_ref(),
+        )
+    })?;
 
     let pyarray = PyArray2::from_owned_array(py, out);
     Ok((pyarray.into(), success))
diff --git a/src/lib.rs b/src/lib.rs
index 6f2d2e421..11169281e 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -10,10 +10,20 @@ mod nw;
 
 #[pymodule]
 fn _core_impl(m: &Bound<'_, PyModule>) -> PyResult<()> {
+    m.add_class::<demean_within::WithinPreconditionerHandle>()?;
     m.add_wrapped(wrap_pyfunction!(collinear::_find_collinear_variables_rs))?;
     m.add_wrapped(wrap_pyfunction!(crv1::_crv1_meat_loop_rs))?;
     m.add_wrapped(wrap_pyfunction!(demean::_demean_rs))?;
     m.add_wrapped(wrap_pyfunction!(demean_within::_demean_within_rs))?;
+    m.add_wrapped(wrap_pyfunction!(
+        demean_within::_build_within_preconditioner_rs
+    ))?;
+    m.add_wrapped(wrap_pyfunction!(
+        demean_within::_serialize_within_preconditioner_rs
+    ))?;
+    m.add_wrapped(wrap_pyfunction!(
+        demean_within::_deserialize_within_preconditioner_rs
+    ))?;
     m.add_wrapped(wrap_pyfunction!(
         nested_fixed_effects::_count_fixef_fully_nested_all_rs
     ))?;
diff --git a/tests/test_api.py b/tests/test_api.py
index 9b3702856..549fccda5 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -1,3 +1,5 @@
+import pickle
+
 import duckdb
 import numpy as np
 import pandas as pd
@@ -5,6 +7,7 @@
 from formulaic.errors import FactorEvaluationError
 
 import pyfixest as pf
+import pyfixest.estimation.models.fepois_ as fepois_model
 from pyfixest.utils.utils import get_data
 
 
@@ -92,6 +95,112 @@ def test_fepois_args():
     np.testing.assert_allclose(fit4.coef(), fit5.coef(), rtol=1e-12)
 
 
+def test_feols_exposes_and_reuses_within_preconditioner():
+    data = pf.get_data()
+    demeaner = pf.WithinDemeaner(krylov_method="gmres")
+
+    fit1 = pf.feols("Y ~ X1 | f1 + f2", data=data, demeaner=demeaner)
+
+    assert fit1.preconditioner_ is not None
+
+    fit2 = pf.feols(
+        "Y ~ X1 | f1 + f2",
+        data=data,
+        demeaner=pf.WithinDemeaner(
+            krylov_method="gmres",
+            preconditioner=fit1.preconditioner_,
+        ),
+    )
+
+    np.testing.assert_allclose(fit1.coef(), fit2.coef(), rtol=1e-10)
+
+
+def test_feols_reuses_pickled_within_preconditioner():
+    data = pf.get_data()
+    fit1 = pf.feols(
+        "Y ~ X1 | f1 + f2",
+        data=data,
+        demeaner=pf.WithinDemeaner(),
+    )
+
+    restored_preconditioner = pickle.loads(pickle.dumps(fit1.preconditioner_))
+
+    fit2 = pf.feols(
+        "Y ~ X1 | f1 + f2",
+        data=data,
+        demeaner=pf.WithinDemeaner(preconditioner=restored_preconditioner),
+    )
+
+    np.testing.assert_allclose(fit1.coef(), fit2.coef(), rtol=1e-10)
+
+
+def test_feglm_runs_with_within_demeaner():
+    data = pf.get_data().copy()
+    data["Y"] = (data["Y"] > 0).astype(int)
+
+    fit = pf.feglm(
+        "Y ~ X1 | f1 + f2",
+        data=data,
+        family="logit",
+        demeaner=pf.WithinDemeaner(),
+    )
+
+    assert np.isfinite(fit.coef().to_numpy()).all()
+
+
+def test_fepois_runs_with_within_demeaner():
+    data = pf.get_data(model="Fepois")
+
+    fit = pf.fepois(
+        "Y ~ X1 | f1 + f2",
+        data=data,
+        demeaner=pf.WithinDemeaner(),
+    )
+
+    assert np.isfinite(fit.coef().to_numpy()).all()
+
+
+def test_fepois_tightens_inner_tol_for_within_demeaner(monkeypatch):
+    data = pf.get_data(model="Fepois")
+    initial_inner_tol = 1e-2
+    tol_history: list[float] = []
+    refresh_history: list[bool] = []
+
+    original_prepare = fepois_model._prepare_within_preconditioner
+    original_override = fepois_model._override_demeaner_tol
+
+    def wrapped_prepare(*args, **kwargs):
+        refresh_history.append(kwargs.get("refresh_preconditioner", False))
+        return original_prepare(*args, **kwargs)
+
+    def wrapped_override(demeaner, *, tol=None):
+        if tol is not None:
+            tol_history.append(tol)
+        return original_override(demeaner, tol=tol)
+
+    monkeypatch.setattr(
+        fepois_model,
+        "_prepare_within_preconditioner",
+        wrapped_prepare,
+    )
+    monkeypatch.setattr(
+        fepois_model,
+        "_override_demeaner_tol",
+        wrapped_override,
+    )
+
+    fit = pf.fepois(
+        "Y ~ X1 | f1 + f2",
+        data=data,
+        demeaner=pf.WithinDemeaner(fixef_tol=initial_inner_tol),
+    )
+
+    assert np.isfinite(fit.coef().to_numpy()).all()
+    assert tol_history
+    assert min(tol_history) < initial_inner_tol
+    assert any(refresh_history)
+
+
 def test_lean():
     data = pf.get_data()
     fit = pf.feols("Y ~ X1 + X2 | f1", data=data, lean=True)
diff --git a/tests/test_demean.py b/tests/test_demean.py
index 0d161a5c6..e073e9058 100644
--- a/tests/test_demean.py
+++ b/tests/test_demean.py
@@ -1,12 +1,21 @@
+import pickle
+import re
+
 import numpy as np
 import pandas as pd
 import pyhdfe
 import pytest
 
 from pyfixest.core import demean as demean_rs
-from pyfixest.core.demean import demean_within
+from pyfixest.core.demean import (
+    WithinPreconditioner,
+    build_within_preconditioner,
+    demean_within,
+)
+from pyfixest.demeaners import LsmrDemeaner, MapDemeaner, WithinDemeaner
 from pyfixest.estimation.cupy.demean_cupy_ import demean_cupy32, demean_cupy64
 from pyfixest.estimation.internals.demean_ import (
+    _prepare_within_preconditioner,
     _set_demeaner_backend,
     demean,
     demean_model,
@@ -72,11 +81,16 @@ def test_set_demeaner_backend():
 
 
 @pytest.mark.parametrize(
-    argnames="demean_func",
-    argvalues=[demean, demean_jax, demean_rs, demean_cupy32, demean_cupy64],
-    ids=["demean_numba", "demean_jax", "demean_rs", "demean_cupy32", "demean_cupy64"],
+    argnames="demeaner",
+    argvalues=[
+        MapDemeaner(backend="numba"),
+        MapDemeaner(backend="jax"),
+        MapDemeaner(backend="rust"),
+        LsmrDemeaner(use_gpu=False),
+    ],
+    ids=["numba", "jax", "rust", "lsmr_scipy"],
 )
-def test_demean_model_no_fixed_effects(benchmark, demean_func):
+def test_demean_model_no_fixed_effects(benchmark, demeaner):
     """Test demean_model when there are no fixed effects."""
     # Create sample data
     N = 1000
@@ -94,9 +108,7 @@ def test_demean_model_no_fixed_effects(benchmark, demean_func):
         weights=weights,
         lookup_demeaned_data=lookup_dict,
         na_index=frozenset(),
-        fixef_tol=1e-6,
-        fixef_maxiter=10_000,
-        demean_func=demean_func,
+        demeaner=demeaner,
     )
 
     # When no fixed effects, output should equal input
@@ -107,11 +119,16 @@ def test_demean_model_no_fixed_effects(benchmark, demean_func):
 
 
 @pytest.mark.parametrize(
-    argnames="demean_func",
-    argvalues=[demean, demean_jax, demean_rs, demean_cupy32, demean_cupy64],
-    ids=["demean_numba", "demean_jax", "demean_rs", "demean_cupy32", "demean_cupy64"],
+    argnames="demeaner",
+    argvalues=[
+        MapDemeaner(backend="numba"),
+        MapDemeaner(backend="jax"),
+        MapDemeaner(backend="rust"),
+        LsmrDemeaner(use_gpu=False),
+    ],
+    ids=["numba", "jax", "rust", "lsmr_scipy"],
 )
-def test_demean_model_with_fixed_effects(benchmark, demean_func):
+def test_demean_model_with_fixed_effects(benchmark, demeaner):
     """Test demean_model with fixed effects."""
     # Create sample data
     N = 1000
@@ -132,9 +149,7 @@ def test_demean_model_with_fixed_effects(benchmark, demean_func):
         weights=weights,
         lookup_demeaned_data=lookup_dict,
         na_index=frozenset(),
-        fixef_tol=1e-6,
-        fixef_maxiter=10_000,
-        demean_func=demean_func,
+        demeaner=demeaner,
     )
 
     # Verify results are different from input (since we're demeaning)
@@ -147,17 +162,22 @@ def test_demean_model_with_fixed_effects(benchmark, demean_func):
 
     # Verify results are cached in lookup_dict
     assert frozenset() in lookup_dict
-    cached_data = lookup_dict[frozenset()][1]
+    cached_data = lookup_dict[frozenset()].demeaned
     assert np.allclose(cached_data[Y.columns].values, Yd.values)
     assert np.allclose(cached_data[X.columns].values, Xd.values)
 
 
 @pytest.mark.parametrize(
-    argnames="demean_func",
-    argvalues=[demean, demean_jax, demean_rs, demean_cupy32, demean_cupy64],
-    ids=["demean_numba", "demean_jax", "demean_rs", "demean_cupy32", "demean_cupy64"],
+    argnames="demeaner",
+    argvalues=[
+        MapDemeaner(backend="numba"),
+        MapDemeaner(backend="jax"),
+        MapDemeaner(backend="rust"),
+        LsmrDemeaner(use_gpu=False),
+    ],
+    ids=["numba", "jax", "rust", "lsmr_scipy"],
 )
-def test_demean_model_with_weights(benchmark, demean_func):
+def test_demean_model_with_weights(benchmark, demeaner):
     """Test demean_model with weights."""
     N = 1000
     rng = np.random.default_rng(42)
@@ -177,9 +197,7 @@ def test_demean_model_with_weights(benchmark, demean_func):
         weights=weights,
         lookup_demeaned_data=lookup_dict,
         na_index=frozenset(),
-        fixef_tol=1e-6,
-        fixef_maxiter=10_000,
-        demean_func=demean_func,
+        demeaner=demeaner,
     )
 
     # Run without weights for comparison (fresh lookup dict to avoid cache hit)
@@ -190,9 +208,7 @@ def test_demean_model_with_weights(benchmark, demean_func):
         weights=np.ones(N),
         lookup_demeaned_data={},
         na_index=frozenset(),
-        fixef_tol=1e-6,
-        fixef_maxiter=10_000,
-        demean_func=demean_func,
+        demeaner=demeaner,
     )
 
     # Results should be different with weights vs without
@@ -201,11 +217,16 @@ def test_demean_model_with_weights(benchmark, demean_func):
 
 
 @pytest.mark.parametrize(
-    argnames="demean_func",
-    argvalues=[demean, demean_jax, demean_rs, demean_cupy32, demean_cupy64],
-    ids=["demean_numba", "demean_jax", "demean_rs", "demean_cupy32", "demean_cupy64"],
+    argnames="demeaner",
+    argvalues=[
+        MapDemeaner(backend="numba"),
+        MapDemeaner(backend="jax"),
+        MapDemeaner(backend="rust"),
+        LsmrDemeaner(use_gpu=False),
+    ],
+    ids=["numba", "jax", "rust", "lsmr_scipy"],
 )
-def test_demean_model_caching(benchmark, demean_func):
+def test_demean_model_caching(benchmark, demeaner):
     """Test the caching behavior of demean_model."""
     N = 1000
     rng = np.random.default_rng(42)
@@ -224,9 +245,7 @@ def test_demean_model_caching(benchmark, demean_func):
         weights=weights,
         lookup_demeaned_data=lookup_dict,
         na_index=frozenset(),
-        fixef_tol=1e-6,
-        fixef_maxiter=10_000,
-        demean_func=demean_func,
+        demeaner=demeaner,
     )
 
     # Second run - should use cache
@@ -238,9 +257,7 @@ def test_demean_model_caching(benchmark, demean_func):
         weights=weights,
         lookup_demeaned_data=lookup_dict,
         na_index=frozenset(),
-        fixef_tol=1e-6,
-        fixef_maxiter=10_000,
-        demean_func=demean_func,
+        demeaner=demeaner,
     )
 
     # Results should be identical
@@ -258,9 +275,7 @@ def test_demean_model_caching(benchmark, demean_func):
         weights=weights,
         lookup_demeaned_data=lookup_dict,
         na_index=frozenset(),
-        fixef_tol=1e-6,
-        fixef_maxiter=10_000,
-        demean_func=demean_func,
+        demeaner=demeaner,
     )
 
     # Original columns should match previous results
@@ -270,11 +285,16 @@ def test_demean_model_caching(benchmark, demean_func):
 
 
 @pytest.mark.parametrize(
-    argnames="demean_func",
-    argvalues=[demean, demean_jax, demean_rs, demean_cupy32, demean_cupy64],
-    ids=["demean_numba", "demean_jax", "demean_rs", "demean_cupy32", "demean_cupy64"],
+    argnames="demeaner",
+    argvalues=[
+        MapDemeaner(backend="numba", fixef_maxiter=1),
+        MapDemeaner(backend="jax", fixef_maxiter=1),
+        MapDemeaner(backend="rust", fixef_maxiter=1),
+        LsmrDemeaner(use_gpu=False, solver_maxiter=1),
+    ],
+    ids=["numba", "jax", "rust", "lsmr_scipy"],
 )
-def test_demean_model_maxiter_convergence_failure(demean_func):
+def test_demean_model_maxiter_convergence_failure(demeaner):
     """Test that demean_model fails when maxiter is too small."""
     N = 100
     rng = np.random.default_rng(42)
@@ -289,7 +309,7 @@ def test_demean_model_maxiter_convergence_failure(demean_func):
     lookup_dict = {}
 
     # Should fail with very small maxiter
-    with pytest.raises(ValueError, match="Demeaning failed after 1 iterations"):
+    with pytest.raises(ValueError, match="Demeaning failed after"):
         demean_model(
             Y=Y,
             X=X,
@@ -297,18 +317,21 @@ def test_demean_model_maxiter_convergence_failure(demean_func):
             weights=weights,
             lookup_demeaned_data=lookup_dict,
             na_index=frozenset(),
-            fixef_tol=1e-6,
-            fixef_maxiter=1,  # Very small limit
-            demean_func=demean_func,
+            demeaner=demeaner,
         )
 
 
 @pytest.mark.parametrize(
-    argnames="demean_func",
-    argvalues=[demean, demean_jax, demean_rs, demean_cupy32, demean_cupy64],
-    ids=["demean_numba", "demean_jax", "demean_rs", "demean_cupy32", "demean_cupy64"],
+    argnames="demeaner",
+    argvalues=[
+        MapDemeaner(backend="numba", fixef_maxiter=5000),
+        MapDemeaner(backend="jax", fixef_maxiter=5000),
+        MapDemeaner(backend="rust", fixef_maxiter=5000),
+        LsmrDemeaner(use_gpu=False, solver_maxiter=5000),
+    ],
+    ids=["numba", "jax", "rust", "lsmr_scipy"],
 )
-def test_demean_model_custom_maxiter_success(demean_func):
+def test_demean_model_custom_maxiter_success(demeaner):
     """Test that demean_model succeeds with reasonable maxiter."""
     N = 1000
     rng = np.random.default_rng(42)
@@ -327,9 +350,7 @@ def test_demean_model_custom_maxiter_success(demean_func):
         weights=weights,
         lookup_demeaned_data=lookup_dict,
         na_index=frozenset(),
-        fixef_tol=1e-6,
-        fixef_maxiter=5000,  # Custom limit
-        demean_func=demean_func,
+        demeaner=demeaner,
     )
 
     # Just verify it returns valid results
@@ -383,6 +404,173 @@ def test_feols_integration_maxiter():
     assert model is not None
 
 
+def test_demean_within_multiplicative_preconditioner_build_and_reuse():
+    rng = np.random.default_rng(123)
+    n = 400
+
+    x = rng.normal(size=(n, 3))
+    flist = np.column_stack([rng.integers(0, 40, n), rng.integers(0, 25, n)]).astype(
+        np.uint32
+    )
+    weights = rng.uniform(0.5, 1.5, n)
+
+    preconditioner = build_within_preconditioner(
+        flist,
+        weights,
+        preconditioner_type="multiplicative",
+    )
+
+    x_demeaned, success = demean_within(
+        x,
+        flist,
+        weights,
+        krylov_method="gmres",
+        preconditioner_type="multiplicative",
+        preconditioner=preconditioner,
+    )
+
+    assert success
+    assert x_demeaned.shape == x.shape
+
+    with pytest.raises(
+        ValueError,
+        match=re.escape("Multiplicative Schwarz requires `krylov_method='gmres'`."),
+    ):
+        demean_within(
+            x,
+            flist,
+            weights,
+            krylov_method="cg",
+            preconditioner_type="multiplicative",
+            preconditioner=preconditioner,
+        )
+
+
+def test_prepare_within_preconditioner_builds_once_and_refreshes_on_request(
+    monkeypatch,
+):
+    rng = np.random.default_rng(321)
+    n = 200
+    flist = np.column_stack([rng.integers(0, 30, n), rng.integers(0, 20, n)]).astype(
+        np.uint32
+    )
+    demeaner = WithinDemeaner(krylov_method="gmres")
+
+    build_calls: list[np.ndarray] = []
+
+    def fake_build(
+        flist: np.ndarray,
+        weights: np.ndarray,
+        preconditioner_type: str,
+    ) -> WithinPreconditioner:
+        build_calls.append(weights.copy())
+        return WithinPreconditioner(
+            n_obs=flist.shape[0],
+            n_factors=flist.shape[1],
+            factor_cardinalities=tuple((flist.max(axis=0) + 1).tolist()),
+            preconditioner_type=preconditioner_type,
+            _preconditioner_handle=object(),
+        )
+
+    monkeypatch.setattr(
+        "pyfixest.estimation.internals.demean_.build_within_preconditioner",
+        fake_build,
+    )
+
+    weights0 = np.ones(n)
+    effective_demeaner, preconditioner = _prepare_within_preconditioner(
+        flist=flist,
+        weights=weights0,
+        demeaner=demeaner,
+        preconditioner=None,
+    )
+
+    assert len(build_calls) == 1
+    assert effective_demeaner.preconditioner is preconditioner
+
+    effective_demeaner, reused_preconditioner = _prepare_within_preconditioner(
+        flist=flist,
+        weights=weights0 * 1.01,
+        demeaner=demeaner,
+        preconditioner=preconditioner,
+    )
+
+    assert len(build_calls) == 1
+    assert effective_demeaner.preconditioner is reused_preconditioner
+    assert reused_preconditioner is preconditioner
+
+    effective_demeaner, refreshed_preconditioner = _prepare_within_preconditioner(
+        flist=flist,
+        weights=weights0 * 1.01,
+        demeaner=demeaner,
+        preconditioner=preconditioner,
+        refresh_preconditioner=True,
+    )
+
+    assert len(build_calls) == 2
+    assert effective_demeaner.preconditioner is refreshed_preconditioner
+    assert refreshed_preconditioner is not preconditioner
+
+
+def test_within_preconditioner_roundtrips_via_pickle():
+    rng = np.random.default_rng(777)
+    n = 250
+    x = rng.normal(size=(n, 3))
+    flist = np.column_stack([rng.integers(0, 30, n), rng.integers(0, 20, n)]).astype(
+        np.uint32
+    )
+    weights = rng.uniform(0.5, 1.5, n)
+
+    preconditioner = build_within_preconditioner(flist, weights)
+    restored_from_pickle = pickle.loads(pickle.dumps(preconditioner))
+
+    x_demeaned_original, success_original = demean_within(
+        x,
+        flist,
+        weights,
+        preconditioner=preconditioner,
+    )
+    x_demeaned_pickle, success_pickle = demean_within(
+        x,
+        flist,
+        weights,
+        preconditioner=restored_from_pickle,
+    )
+
+    assert success_original and success_pickle
+    np.testing.assert_allclose(x_demeaned_pickle, x_demeaned_original, rtol=1e-10)
+    assert (
+        restored_from_pickle.factor_cardinalities == preconditioner.factor_cardinalities
+    )
+
+
+def test_restored_within_preconditioner_still_validates_structure():
+    rng = np.random.default_rng(991)
+    n = 200
+    x = rng.normal(size=(n, 2))
+    flist = np.column_stack([rng.integers(0, 20, n), rng.integers(0, 15, n)]).astype(
+        np.uint32
+    )
+    weights = rng.uniform(0.5, 1.5, n)
+    preconditioner = build_within_preconditioner(flist, weights)
+    restored = pickle.loads(pickle.dumps(preconditioner))
+
+    incompatible_flist = np.column_stack(
+        [rng.integers(0, 25, n), rng.integers(0, 15, n)]
+    ).astype(np.uint32)
+
+    with pytest.raises(
+        ValueError,
+        match="fixed-effect cardinalities do not match",
+    ):
+        demean_within(
+            x,
+            incompatible_flist,
+            weights,
+            preconditioner=restored,
+        )
+
+
 @pytest.mark.parametrize(
     argnames="demean_func",
     argvalues=[demean_rs, demean_within, demean_cupy32, demean_cupy64],

	worker_id	firm_id	year	female	experience	tenure	log_wage	worker_fe	firm_fe	log_bonus
0	0	16	2000	0	1	1	0.028921	0.152359	0.019964	-0.110874
1	1	34	2000	1	0	1	-0.526312	-0.519992	0.009835	-0.263521
2	2	20	2000	1	3	1	0.515641	0.375226	0.234705	0.410734
3	3	13	2000	1	0	1	0.528749	0.470282	0.131698	0.220568
4	4	31	2000	0	3	1	-0.337822	-0.975518	0.191260	-0.194413