diff --git a/docs/source/user_guide/benchmarks/nebs.rst b/docs/source/user_guide/benchmarks/nebs.rst
index e081babf1..9a741711e 100644
--- a/docs/source/user_guide/benchmarks/nebs.rst
+++ b/docs/source/user_guide/benchmarks/nebs.rst
@@ -46,3 +46,60 @@ Reference data:
 
 * Manually taken from https://doi.org/10.1149/1.1633511.
 * Meta-GGA (Perdew-Wang) exchange correlation functional
+
+
+Si defects
+==========
+
+Summary
+-------
+
+Performance in predicting DFT singlepoint energies and forces along fixed nudged-elastic-band
+(NEB) images for a silicon interstitial migration pathway.
+
+Metrics
+-------
+
+For each of the three NEB datasets (64 atoms, 216 atoms, and 216 atoms di-to-single), MLIPs are
+evaluated on the same ordered NEB images as the reference.
+
+1. Energy MAE
+
+Mean absolute error (MAE) of *relative* energies along the NEB, shifting image 0 to 0 eV
+for both the DFT reference and the MLIP predictions.
+
+2. Force MAE
+
+Mean absolute error (MAE) of forces across all atoms and images along the NEB.
+
+Computational cost
+------------------
+
+Medium: tests are likely to take several minutes to run on CPU.
+
+Data availability
+-----------------
+
+Input/reference data:
+
+* Reference extxyz trajectories (including per-image DFT energies and forces) are distributed as a
+  separate zip archive and downloaded on-demand from the ML-PEG data store.
+  The calculation script uses the public ML-PEG S3 bucket to retrieve these inputs.
+* The reference DFT energies/forces come from Quantum ESPRESSO (PWscf) single-point calculations
+  with:
+
+  - Code/version: Quantum ESPRESSO PWSCF v.7.0
+  - XC functional: ``input_dft='PBE'``
+  - Cutoffs: ``ecutwfc=30.0`` Ry, ``ecutrho=240.0`` Ry
+  - Smearing: ``occupations='smearing'``, ``smearing='mv'``, ``degauss=0.01`` Ry
+  - SCF convergence/mixing: ``conv_thr=1.0d-6``, ``electron_maxstep=250``, ``mixing_beta=0.2``,
+    ``mixing_mode='local-TF'``
+  - Diagonalization: ``diagonalization='david'``
+  - Symmetry: ``nosym=.false.``, ``noinv=.false.`` (symmetry enabled)
+  - Pseudopotential: ``Si.pbe-n-kjpaw_psl.1.0.0.UPF`` (PSLibrary)
+
+  K-points by case:
+
+  - 64 atoms: Γ-only (``K_POINTS automatic 1 1 1 0 0 0``)
+  - 216 atoms: Γ-only (``K_POINTS gamma``)
+  - 216 atoms di-to-single: Γ-only (``K_POINTS gamma``)
diff --git a/ml_peg/analysis/nebs/si_defects/analyse_si_defects.py b/ml_peg/analysis/nebs/si_defects/analyse_si_defects.py
new file mode 100644
index 000000000..43ea77029
--- /dev/null
+++ b/ml_peg/analysis/nebs/si_defects/analyse_si_defects.py
@@ -0,0 +1,294 @@
+"""Analyse Si defects DFT singlepoints (energies + forces)."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from pathlib import Path
+
+from ase.atoms import Atoms
+from ase.io import read, write
+import numpy as np
+import pandas as pd
+import pytest
+
+from ml_peg.analysis.utils.decorators import build_table, plot_scatter
+from ml_peg.analysis.utils.utils import load_metrics_config, mae
+from ml_peg.app import APP_ROOT
+from ml_peg.calcs import CALCS_ROOT
+from ml_peg.models.get_models import get_model_names
+from ml_peg.models.models import current_models
+
+MODELS = get_model_names(current_models)
+
+BENCHMARK_DIR = "si_defects"
+CALC_PATH = CALCS_ROOT / "nebs" / BENCHMARK_DIR / "outputs"
+OUT_PATH = APP_ROOT / "data" / "nebs" / BENCHMARK_DIR
+
+METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
+DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
+    METRICS_CONFIG_PATH
+)
+
+
+@dataclass(frozen=True)
+class _Case:
+    """Definition of a single Si defects NEB dataset."""
+
+    key: str
+    label: str
+
+
+CASES: tuple[_Case, ...] = (
+    _Case(key="64_atoms", label="64"),
+    _Case(key="216_atoms", label="216"),
+    _Case(key="216_atoms_di_to_single", label="216 di-to-single"),
+)
+
+
+@dataclass(frozen=True)
+class _Results:
+    """Computed errors and assets for one (case, model) pair."""
+
+    energy_mae: float
+    force_mae: float
+    x: list[float]
+    energy_error: list[float]
+    force_rms: list[float]
+    structs: list[Atoms]
+
+
+def _load_structs(case_key: str, model: str) -> list[Atoms] | None:
+    """
+    Load MLIP-evaluated NEB frames (with DFT reference stored).
+
+    Parameters
+    ----------
+    case_key
+        Case key.
+    model
+        Model name.
+
+    Returns
+    -------
+    list[ase.atoms.Atoms] | None
+        Frames if the calculation output exists, otherwise ``None``.
+    """
+    path = CALC_PATH / case_key / model / f"{BENCHMARK_DIR}.extxyz"
+    if not path.exists():
+        return None
+    structs = read(path, index=":")
+    if not isinstance(structs, list) or not structs:
+        raise ValueError(f"Unexpected output content: {path}")
+    return structs
+
+
+def _compute(structs: list[Atoms]) -> _Results:
+    """
+    Compute per-image errors and global MAEs.
+
+    Parameters
+    ----------
+    structs
+        Ordered NEB frames with ``ref_energy_ev``, ``ref_forces``, ``pred_energy_ev``,
+        and ``pred_forces``.
+
+    Returns
+    -------
+    _Results
+        Computed errors and frames for downstream plotting/app.
+    """
+    ref_e = np.asarray([float(s.info["ref_energy_ev"]) for s in structs])
+    pred_e = np.asarray([float(s.info["pred_energy_ev"]) for s in structs])
+    # Compare *relative* energies along the NEB (shift image 0 to 0 eV for both
+    # reference and predictions). This removes any constant energy offset.
+    ref_e_rel = ref_e - ref_e[0]
+    pred_e_rel = pred_e - pred_e[0]
+    energy_error = pred_e_rel - ref_e_rel
+
+    ref_f = np.asarray([s.arrays["ref_forces"] for s in structs], dtype=float)
+    pred_f = np.asarray([s.arrays["pred_forces"] for s in structs], dtype=float)
+    force_error = pred_f - ref_f
+
+    x = [float(s.info.get("image_index", idx)) for idx, s in enumerate(structs)]
+    return _Results(
+        energy_mae=mae(ref_e_rel, pred_e_rel),
+        force_mae=mae(ref_f.reshape(-1), pred_f.reshape(-1)),
+        x=x,
+        energy_error=[float(v) for v in energy_error],
+        force_rms=[float(v) for v in np.sqrt(np.mean(force_error**2, axis=(1, 2)))],
+        structs=structs,
+    )
+
+
+def _write_assets(case_key: str, model: str, results: _Results) -> None:
+    """
+    Write structure trajectory and per-image CSV for the app.
+
+    Parameters
+    ----------
+    case_key
+        Case key.
+    model
+        Model name.
+    results
+        Computed errors and frames.
+    """
+    out_dir = OUT_PATH / case_key / model
+    out_dir.mkdir(parents=True, exist_ok=True)
+    write(out_dir / f"{model}-neb-band.extxyz", results.structs)
+    pd.DataFrame(
+        {
+            "image": results.x,
+            "energy_error_ev": results.energy_error,
+            "force_rms_ev_per_ang": results.force_rms,
+        }
+    ).to_csv(out_dir / f"{model}-per_image_errors.csv", index=False)
+
+
+def _write_plots(case_key: str, case_label: str, model: str, results: _Results) -> None:
+    """
+    Write Dash/Plotly JSON plots for one (case, model) pair.
+
+    Parameters
+    ----------
+    case_key
+        Case key.
+    case_label
+        Human-friendly case label.
+    model
+        Model name.
+    results
+        Computed errors and frames.
+    """
+    OUT_PATH.mkdir(parents=True, exist_ok=True)
+
+    @plot_scatter(
+        filename=OUT_PATH / f"figure_{model}_{case_key}_energy_error.json",
+        title=f"Energy error along NEB ({case_label})",
+        x_label="Image",
+        y_label="Energy error / eV",
+        show_line=True,
+    )
+    def plot_energy() -> dict[str, tuple[list[float], list[float]]]:
+        """
+        Plot per-image energy errors for a single model and case.
+
+        Returns
+        -------
+        dict[str, tuple[list[float], list[float]]]
+            Mapping of model name to x/y arrays.
+        """
+        return {model: [results.x, results.energy_error]}
+
+    @plot_scatter(
+        filename=OUT_PATH / f"figure_{model}_{case_key}_force_rms.json",
+        title=f"Force RMS error along NEB ({case_label})",
+        x_label="Image",
+        y_label="Force RMS error / (eV/Å)",
+        show_line=True,
+    )
+    def plot_forces() -> dict[str, tuple[list[float], list[float]]]:
+        """
+        Plot per-image force RMS errors for a single model and case.
+
+        Returns
+        -------
+        dict[str, tuple[list[float], list[float]]]
+            Mapping of model name to x/y arrays.
+        """
+        return {model: [results.x, results.force_rms]}
+
+    plot_energy()
+    plot_forces()
+
+
+@pytest.fixture
+def case_results() -> dict[tuple[str, str], _Results]:
+    """
+    Compute results for all available (case, model) combinations.
+
+    Returns
+    -------
+    dict[tuple[str, str], _Results]
+        Results indexed by ``(case_key, model_name)``.
+    """
+    OUT_PATH.mkdir(parents=True, exist_ok=True)
+    results: dict[tuple[str, str], _Results] = {}
+    for case in CASES:
+        for model in MODELS:
+            structs = _load_structs(case.key, model)
+            if structs is None:
+                continue
+            computed = _compute(structs)
+            _write_assets(case.key, model, computed)
+            _write_plots(case.key, case.label, model, computed)
+            results[(case.key, model)] = computed
+    return results
+
+
+@pytest.fixture
+def metrics_dict(
+    case_results: dict[tuple[str, str], _Results],
+) -> dict[str, dict[str, float]]:
+    """
+    Build raw metric dict for the benchmark table.
+
+    Parameters
+    ----------
+    case_results
+        Results indexed by ``(case_key, model_name)``.
+
+    Returns
+    -------
+    dict[str, dict[str, float]]
+        Metric values for all models.
+    """
+    metrics: dict[str, dict[str, float]] = {}
+    for case in CASES:
+        energy_key = f"Energy MAE ({case.label})"
+        force_key = f"Force MAE ({case.label})"
+        metrics[energy_key] = {}
+        metrics[force_key] = {}
+        for model in MODELS:
+            result = case_results.get((case.key, model))
+            if result is None:
+                continue
+            metrics[energy_key][model] = result.energy_mae
+            metrics[force_key][model] = result.force_mae
+    return metrics
+
+
+@pytest.fixture
+@build_table(
+    filename=OUT_PATH / f"{BENCHMARK_DIR}_metrics_table.json",
+    metric_tooltips=DEFAULT_TOOLTIPS,
+    thresholds=DEFAULT_THRESHOLDS,
+    weights=DEFAULT_WEIGHTS,
+)
+def metrics(metrics_dict: dict[str, dict[str, float]]) -> dict[str, dict]:
+    """
+    Build the benchmark table JSON.
+
+    Parameters
+    ----------
+    metrics_dict
+        Metric values for all models.
+
+    Returns
+    -------
+    dict[str, dict]
+        Metric names and values for all models.
+    """
+    return metrics_dict
+
+
+def test_si_defects(metrics: dict[str, dict]) -> None:
+    """
+    Run analysis for Si defects DFT singlepoints.
+
+    Parameters
+    ----------
+    metrics
+        Benchmark metrics table.
+    """
+    return
diff --git a/ml_peg/analysis/nebs/si_defects/metrics.yml b/ml_peg/analysis/nebs/si_defects/metrics.yml
new file mode 100644
index 000000000..fdd5cb755
--- /dev/null
+++ b/ml_peg/analysis/nebs/si_defects/metrics.yml
@@ -0,0 +1,37 @@
+metrics:
+  Energy MAE (64):
+    good: 0.02
+    bad: 0.2
+    unit: eV
+    tooltip: "MAE between MLIP and DFT (PBE) relative single-point energies along the NEB (image 0 shifted to 0 eV)."
+    level_of_theory: PBE
+  Force MAE (64):
+    good: 0.05
+    bad: 0.5
+    unit: eV/Å
+    tooltip: "MAE between MLIP and DFT (PBE) single-point forces across all atoms and images along the NEB."
+    level_of_theory: PBE
+  Energy MAE (216):
+    good: 0.02
+    bad: 0.2
+    unit: eV
+    tooltip: "MAE between MLIP and DFT (PBE) relative single-point energies along the NEB (image 0 shifted to 0 eV)."
+    level_of_theory: PBE
+  Force MAE (216):
+    good: 0.05
+    bad: 0.5
+    unit: eV/Å
+    tooltip: "MAE between MLIP and DFT (PBE) single-point forces across all atoms and images along the NEB."
+    level_of_theory: PBE
+  Energy MAE (216 di-to-single):
+    good: 0.02
+    bad: 0.2
+    unit: eV
+    tooltip: "MAE between MLIP and DFT (PBE) relative single-point energies along the NEB (image 0 shifted to 0 eV)."
+    level_of_theory: PBE
+  Force MAE (216 di-to-single):
+    good: 0.05
+    bad: 0.5
+    unit: eV/Å
+    tooltip: "MAE between MLIP and DFT (PBE) single-point forces across all atoms and images along the NEB."
+    level_of_theory: PBE
diff --git a/ml_peg/app/nebs/si_defects/app_si_defects.py b/ml_peg/app/nebs/si_defects/app_si_defects.py
new file mode 100644
index 000000000..a15a6936c
--- /dev/null
+++ b/ml_peg/app/nebs/si_defects/app_si_defects.py
@@ -0,0 +1,123 @@
+"""Run app for Si defects benchmark."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+from dash import Dash
+from dash.html import Div
+
+from ml_peg.app import APP_ROOT
+from ml_peg.app.base_app import BaseApp
+from ml_peg.app.utils.build_callbacks import plot_from_table_cell, struct_from_scatter
+from ml_peg.app.utils.load import read_plot
+from ml_peg.models.get_models import get_model_names
+from ml_peg.models.models import current_models
+
+MODELS = get_model_names(current_models)
+
+BENCHMARK_NAME = "Si defects"
+DOCS_URL = "https://ddmms.github.io/ml-peg/user_guide/benchmarks/nebs.html#si-defects"
+DATA_PATH = APP_ROOT / "data" / "nebs" / "si_defects"
+
+
+@dataclass(frozen=True)
+class _Case:
+    """Definition of a single Si defects NEB dataset."""
+
+    key: str
+    label: str
+
+
+CASES: tuple[_Case, ...] = (
+    _Case(key="64_atoms", label="64"),
+    _Case(key="216_atoms", label="216"),
+    _Case(key="216_atoms_di_to_single", label="216 di-to-single"),
+)
+
+
+class SiDefectNebSinglepointsApp(BaseApp):
+    """Si defects benchmark app layout and callbacks."""
+
+    def register_callbacks(self) -> None:
+        """Register interactive callbacks for plot and structure viewing."""
+        scatter_plots: dict[str, dict] = {}
+
+        for model in MODELS:
+            model_plots: dict[str, object] = {}
+            for case in CASES:
+                energy_plot_path = (
+                    DATA_PATH / f"figure_{model}_{case.key}_energy_error.json"
+                )
+                force_plot_path = (
+                    DATA_PATH / f"figure_{model}_{case.key}_force_rms.json"
+                )
+                if energy_plot_path.exists():
+                    model_plots[f"Energy MAE ({case.label})"] = read_plot(
+                        energy_plot_path,
+                        id=f"{BENCHMARK_NAME}-{model}-{case.key}-energy-figure",
+                    )
+                if force_plot_path.exists():
+                    model_plots[f"Force MAE ({case.label})"] = read_plot(
+                        force_plot_path,
+                        id=f"{BENCHMARK_NAME}-{model}-{case.key}-force-figure",
+                    )
+            if model_plots:
+                scatter_plots[model] = model_plots
+
+        plot_from_table_cell(
+            table_id=self.table_id,
+            plot_id=f"{BENCHMARK_NAME}-figure-placeholder",
+            cell_to_plot=scatter_plots,
+        )
+
+        for model in scatter_plots:
+            for case in CASES:
+                structs = (
+                    f"assets/nebs/si_defects/{case.key}/{model}/{model}-neb-band.extxyz"
+                )
+                struct_from_scatter(
+                    scatter_id=f"{BENCHMARK_NAME}-{model}-{case.key}-energy-figure",
+                    struct_id=f"{BENCHMARK_NAME}-struct-placeholder",
+                    structs=structs,
+                    mode="traj",
+                )
+                struct_from_scatter(
+                    scatter_id=f"{BENCHMARK_NAME}-{model}-{case.key}-force-figure",
+                    struct_id=f"{BENCHMARK_NAME}-struct-placeholder",
+                    structs=structs,
+                    mode="traj",
+                )
+
+
+def get_app() -> SiDefectNebSinglepointsApp:
+    """
+    Get Si defects app.
+
+    Returns
+    -------
+    SiDefectNebSinglepointsApp
+        App instance.
+    """
+    return SiDefectNebSinglepointsApp(
+        name=BENCHMARK_NAME,
+        description=(
+            "Energy/force MAE of MLIPs on fixed Si interstitial migration NEB images, "
+            "referenced to DFT singlepoints."
+        ),
+        docs_url=DOCS_URL,
+        table_path=DATA_PATH / "si_defects_metrics_table.json",
+        extra_components=[
+            Div(id=f"{BENCHMARK_NAME}-figure-placeholder"),
+            Div(id=f"{BENCHMARK_NAME}-struct-placeholder"),
+        ],
+    )
+
+
+if __name__ == "__main__":
+    # Use APP_ROOT/data as assets root so `assets/nebs/...` resolves correctly.
+    full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent)
+    benchmark_app = get_app()
+    full_app.layout = benchmark_app.layout
+    benchmark_app.register_callbacks()
+    full_app.run(port=8060, debug=True)
diff --git a/ml_peg/calcs/nebs/si_defects/calc_si_defects.py b/ml_peg/calcs/nebs/si_defects/calc_si_defects.py
new file mode 100644
index 000000000..879087008
--- /dev/null
+++ b/ml_peg/calcs/nebs/si_defects/calc_si_defects.py
@@ -0,0 +1,157 @@
+"""Evaluate MLIPs on Si defects DFT singlepoints (energies + forces)."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from pathlib import Path
+import sys
+from typing import Any
+
+from ase.atoms import Atoms
+from ase.io import read, write
+import numpy as np
+import pytest
+from tqdm.auto import tqdm
+
+from ml_peg.calcs.utils.utils import download_s3_data
+from ml_peg.models.get_models import load_models
+from ml_peg.models.models import current_models
+
+MODELS = load_models(current_models)
+
+DATA_PATH = Path(__file__).parent / "data"
+OUT_PATH = Path(__file__).parent / "outputs"
+S3_KEY = "inputs/nebs/si_defects/si_defects.zip"
+S3_FILENAME = "si_defects.zip"
+
+
+@dataclass(frozen=True)
+class _Case:
+    """Definition of a single Si defects NEB dataset."""
+
+    key: str
+    ref_file: str
+
+
+CASES: tuple[_Case, ...] = (
+    _Case(key="64_atoms", ref_file="64_atoms.extxyz"),
+    _Case(key="216_atoms", ref_file="216_atoms.extxyz"),
+    _Case(key="216_atoms_di_to_single", ref_file="216_atoms_di_to_single.extxyz"),
+)
+
+
+def _read_frames(path: Path) -> list[Atoms]:
+    """
+    Read all frames from an extxyz trajectory.
+
+    Parameters
+    ----------
+    path
+        Path to extxyz trajectory.
+
+    Returns
+    -------
+    list[ase.atoms.Atoms]
+        Frames as ASE atoms objects.
+    """
+    frames = read(path, index=":")
+    if not isinstance(frames, list) or not frames:
+        raise ValueError(f"No frames found in {path}")
+    return frames
+
+
+def _ref_energy_ev(atoms: Atoms) -> float:
+    """
+    Extract reference (DFT) energy in eV from a frame.
+
+    Parameters
+    ----------
+    atoms
+        Frame with ``ref_energy_ev`` in ``atoms.info``.
+
+    Returns
+    -------
+    float
+        Reference energy in eV.
+    """
+    if "ref_energy_ev" not in atoms.info:
+        raise KeyError("Missing ref_energy_ev in reference trajectory.")
+    return float(atoms.info["ref_energy_ev"])
+
+
+def _ref_forces(atoms: Atoms) -> np.ndarray:
+    """
+    Extract reference (DFT) forces in eV/Å from a frame.
+
+    Parameters
+    ----------
+    atoms
+        Frame with ``ref_forces`` in ``atoms.arrays``.
+
+    Returns
+    -------
+    numpy.ndarray
+        Reference forces array with shape ``(n_atoms, 3)``.
+    """
+    if "ref_forces" not in atoms.arrays:
+        raise KeyError("Missing ref_forces in reference trajectory.")
+    return np.asarray(atoms.arrays["ref_forces"], dtype=float)
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize("mlip", MODELS.items())
+def test_si_defects(mlip: tuple[str, Any]) -> None:
+    """
+    Compare MLIP energies/forces to DFT along fixed NEB images.
+
+    Outputs per-case trajectories containing:
+    - ref_energy_ev, ref_forces
+    - pred_energy_ev, pred_forces
+
+    Parameters
+    ----------
+    mlip
+        Tuple of ``(model_name, model)`` as provided by ``MODELS.items()``.
+    """
+    model_name, model = mlip
+    calc = model.get_calculator()
+
+    local_files_present = all((DATA_PATH / case.ref_file).exists() for case in CASES)
+    if local_files_present:
+        data_dir = DATA_PATH
+    else:
+        data_dir = download_s3_data(key=S3_KEY, filename=S3_FILENAME) / "si_defects"
+
+    for case in CASES:
+        frames = _read_frames(data_dir / case.ref_file)
+        out_dir = OUT_PATH / case.key / model_name
+        out_dir.mkdir(parents=True, exist_ok=True)
+
+        results: list[Atoms] = []
+        it = frames
+        # Only show tqdm progress bars in an interactive terminal; otherwise the
+        # carriage-return updates tend to spam CI/log outputs.
+        if sys.stderr.isatty():
+            it = tqdm(
+                frames,
+                desc=f"{model_name} {case.key}",
+                unit="img",
+                leave=False,
+            )
+        for atoms in it:
+            ref_energy_ev = _ref_energy_ev(atoms)
+            ref_forces = _ref_forces(atoms)
+
+            atoms_pred = atoms.copy()
+            atoms_pred.calc = calc
+
+            out_atoms = atoms.copy()
+            out_atoms.info["ref_energy_ev"] = ref_energy_ev
+            out_atoms.arrays["ref_forces"] = ref_forces
+            out_atoms.info["pred_energy_ev"] = float(atoms_pred.get_potential_energy())
+            out_atoms.arrays["pred_forces"] = np.asarray(
+                atoms_pred.get_forces(), dtype=float
+            )
+            results.append(out_atoms)
+
+        write(out_dir / "si_defects.extxyz", results)