From fc06321f9083807a0e8ba2a77f7ace6608db3510 Mon Sep 17 00:00:00 2001
From: zhonganr <zhonganruo@gmail.com>
Date: Tue, 3 Feb 2026 19:17:08 +0100
Subject: [PATCH] Add new category containing two interstitial benchmarks

---
 .gitignore                                    |   3 +
 docs/source/user_guide/benchmarks/index.rst   |   1 +
 .../user_guide/benchmarks/interstitial.rst    |  92 ++++++++
 .../interstitial/FE1SIA/analyse_FE1SIA.py     | 184 ++++++++++++++++
 .../analysis/interstitial/FE1SIA/metrics.yml  |   7 +
 .../interstitial/Relastab/analyse_Relastab.py | 201 ++++++++++++++++++
 .../interstitial/Relastab/metrics.yml         |  13 ++
 ml_peg/app/interstitial/FE1SIA/app_FE1SIA.py  |  93 ++++++++
 .../app/interstitial/Relastab/app_Relastab.py |  90 ++++++++
 ml_peg/app/interstitial/interstitial.yml      |   2 +
 .../calcs/interstitial/FE1SIA/calc_FE1SIA.py  | 104 +++++++++
 .../interstitial/Relastab/calc_Relastab.py    |  67 ++++++
 uv.lock                                       |  23 ++
 13 files changed, 880 insertions(+)
 create mode 100644 docs/source/user_guide/benchmarks/interstitial.rst
 create mode 100644 ml_peg/analysis/interstitial/FE1SIA/analyse_FE1SIA.py
 create mode 100644 ml_peg/analysis/interstitial/FE1SIA/metrics.yml
 create mode 100644 ml_peg/analysis/interstitial/Relastab/analyse_Relastab.py
 create mode 100644 ml_peg/analysis/interstitial/Relastab/metrics.yml
 create mode 100644 ml_peg/app/interstitial/FE1SIA/app_FE1SIA.py
 create mode 100644 ml_peg/app/interstitial/Relastab/app_Relastab.py
 create mode 100644 ml_peg/app/interstitial/interstitial.yml
 create mode 100644 ml_peg/calcs/interstitial/FE1SIA/calc_FE1SIA.py
 create mode 100644 ml_peg/calcs/interstitial/Relastab/calc_Relastab.py

diff --git a/.gitignore b/.gitignore
index 6b68b80ca..bca42cdf8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -41,3 +41,6 @@ __pycache__/
 ml_peg/app/data/*
 !ml_peg/app/data/onboarding/
 certs/
+calculate_rmsd.py
+DB/
+DB.zip
diff --git a/docs/source/user_guide/benchmarks/index.rst b/docs/source/user_guide/benchmarks/index.rst
index 9f339f4d2..00cda06b9 100644
--- a/docs/source/user_guide/benchmarks/index.rst
+++ b/docs/source/user_guide/benchmarks/index.rst
@@ -12,3 +12,4 @@ Benchmarks
     molecular_crystal
     molecular
     bulk_crystal
+    interstitial
diff --git a/docs/source/user_guide/benchmarks/interstitial.rst b/docs/source/user_guide/benchmarks/interstitial.rst
new file mode 100644
index 000000000..ed0e1514b
--- /dev/null
+++ b/docs/source/user_guide/benchmarks/interstitial.rst
@@ -0,0 +1,92 @@
+============
+Interstitial
+============
+
+FE1SIA
+======
+
+Summary
+-------
+
+This benchmark evaluates the formation energies of a single self-interstitial atom (SIA) in a host lattice. The test includes formation energies for distinct configurations within a supercell.
+
+Metrics
+-------
+
+1. RMSD
+
+Root Mean Square Deviation of formation energies compared to DFT data.
+
+The formation energy of a configuration :math:`E_f` is calculated as:
+
+.. math::
+    E_f = E_{config} - \frac{N_{config}}{N_{bulk}} E_{bulk}
+
+where :math:`E_{config}` is the total energy of the interstitial configuration containing :math:`N_{config}` atoms, and :math:`E_{bulk}` is the energy of the perfect bulk supercell consisting of :math:`N_{bulk}` atoms.
+
+The reference formation energies are derived from DFT calculations provided in the dataset.
+
+Computational cost
+------------------
+
+Low: The geometries are static, requiring only single-point energy calculations for the configurations and the bulk reference.
+
+Data availability
+-----------------
+
+Input structures:
+
+* Subset ``formation_energy`` of the DFT dataset.
+
+  * A. Allera, A. M. Goryaeva, P. Lafourcade, J.-B. Maillet, and M.-C. Marinica,
+    Neighbors map: An efficient atomic descriptor for structural analysis,
+    Computational Materials Science 231 (2024).
+
+Reference data:
+
+* Computed from the subset ``formation_energy`` of the DFT dataset as input structures.
+
+
+Relastab
+========
+
+Summary
+-------
+
+This benchmark evaluates the ability of models to correctly rank the stability of different interstitial configurations. It focuses on the relative energy ordering of distinct interstitial structures in the dataset.
+
+Metrics
+-------
+
+1. Kendall Tau
+
+Kendall rank correlation coefficient (:math:`\tau`): a measure of rank correlation that evaluates the similarity of the orderings of the data. It assesses the number of *concordant* and *discordant* pairs of observations. For every pair of configurations, it checks if the model agrees with the reference on which is more stable.
+A value of 1.0 indicates perfect agreement, 0.0 indicates no correlation, and -1.0 indicates perfect inversion.
+This metric is sensitive to **pairwise ordering** errors. It is particularly robust for small datasets and focuses strictly on whether the relative stability order is preserved.
+
+2. Spearman
+
+Spearman rank correlation coefficient (:math:`\rho`): a non-parametric measure of rank correlation.
+It is defined as the Pearson correlation coefficient between the *rank variables*. It converts raw energies to integer ranks and calculates the linear correlation between them.
+Like Kendall Tau, values range from -1 to 1. An absolute value of 1 indicates a perfect monotonic function.
+While both metrics evaluate ranking, Spearman assesses the general **monotonic relationship**, while Kendall Tau assesses the probability of correct pairwise discrimination.
+
+Computational cost
+------------------
+
+Low: Requires single-point energy calculations for each configuration in the dataset.
+
+Data availability
+-----------------
+
+Input structures:
+
+* Subset ``relative_stability`` of the DFT dataset.
+
+  * A. Allera, A. M. Goryaeva, P. Lafourcade, J.-B. Maillet, and M.-C. Marinica,
+    Neighbors map: An efficient atomic descriptor for structural analysis,
+    Computational Materials Science 231 (2024).
+
+Reference data:
+
+* Computed from the subset ``relative_stability`` of the DFT dataset as input structures.
diff --git a/ml_peg/analysis/interstitial/FE1SIA/analyse_FE1SIA.py b/ml_peg/analysis/interstitial/FE1SIA/analyse_FE1SIA.py
new file mode 100644
index 000000000..5d28ad0ee
--- /dev/null
+++ b/ml_peg/analysis/interstitial/FE1SIA/analyse_FE1SIA.py
@@ -0,0 +1,184 @@
+"""Analyse FE1SIA benchmark."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from ase.io import read, write
+import pytest
+
+from ml_peg.analysis.utils.decorators import build_table, plot_parity
+from ml_peg.analysis.utils.utils import load_metrics_config, rmse
+from ml_peg.app import APP_ROOT
+from ml_peg.calcs import CALCS_ROOT
+from ml_peg.models.get_models import get_model_names
+from ml_peg.models.models import current_models
+
+MODELS = get_model_names(current_models)
+# D3_MODEL_NAMES = build_d3_name_map(MODELS)
+D3_MODEL_NAMES = {m: m for m in MODELS}
+CALC_PATH = CALCS_ROOT / "interstitial" / "FE1SIA" / "outputs"
+OUT_PATH = APP_ROOT / "data" / "interstitial" / "FE1SIA"
+
+METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
+DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
+    METRICS_CONFIG_PATH
+)
+
+
+def get_system_names() -> list[str]:
+    """
+    Get list of FE1SIA system names.
+
+    Returns
+    -------
+    list[str]
+        List of system names.
+    """
+    system_names = []
+    # Try to find names from one of the models
+    for model_name in MODELS:
+        model_dir = CALC_PATH / model_name
+        if model_dir.exists():
+            xyz_files = sorted(model_dir.glob("*.xyz"))
+            for xyz in xyz_files:
+                if xyz.stem != "ref":
+                    system_names.append(xyz.stem)
+            if system_names:
+                break
+    return system_names
+
+
+@pytest.fixture
+@plot_parity(
+    filename=OUT_PATH / "figure_energy.json",
+    title="FE1SIA Formation Energies",
+    x_label="Predicted Formation Energy / eV",
+    y_label="Reference Formation Energy / eV",
+    hoverdata={
+        "System": get_system_names(),
+    },
+)
+def formation_energies() -> dict[str, list]:
+    """
+    Get formation energies for FE1SIA systems.
+
+    Returns
+    -------
+    dict[str, list]
+        Dictionary of reference and predicted formation energies.
+    """
+    results = {"ref": []} | {mlip: [] for mlip in MODELS}
+    ref_stored = False
+
+    for model_name in MODELS:
+        model_dir = CALC_PATH / model_name
+        if not model_dir.exists():
+            continue
+
+        # Load bulk (ref)
+        # Note: We rely on calc script having produced ref.xyz
+        bulk_path = model_dir / "ref.xyz"
+        if not bulk_path.exists():
+            # If bulk is missing, we can't compute formation energy properly
+            print(f"Warning: Bulk reference not found for {model_name}")
+            continue
+
+        bulk_atoms = read(bulk_path)
+        e_bulk = bulk_atoms.get_potential_energy()
+        n_bulk = len(bulk_atoms)
+
+        xyz_files = sorted(model_dir.glob("*.xyz"))
+
+        for xyz_file in xyz_files:
+            if xyz_file.name == "ref.xyz":
+                continue
+
+            atoms = read(xyz_file)
+            e_config = atoms.get_potential_energy()
+            n_config = len(atoms)
+
+            # Predicted formation energy
+            # E_f = E_config - (N_config / N_bulk) * E_bulk
+            pred_fe = e_config - (n_config / n_bulk) * e_bulk
+            # print(model_name, pred_fe)
+
+            results[model_name].append(pred_fe)
+
+            # Copy individual structure files to app data directory
+            structs_dir = OUT_PATH / model_name
+            structs_dir.mkdir(parents=True, exist_ok=True)
+            write(structs_dir / xyz_file.name, atoms)
+
+            if not ref_stored:
+                results["ref"].append(atoms.info["ref"])
+
+        ref_stored = True
+
+    return results
+
+
+@pytest.fixture
+def fe_errors(formation_energies) -> dict[str, float]:
+    """
+    Get RMSE for formation energies.
+
+    Parameters
+    ----------
+    formation_energies
+        Dictionary of reference and predicted formation energies.
+
+    Returns
+    -------
+    dict[str, float]
+        Dictionary of RMSEs for all models.
+    """
+    results = {}
+    for model_name in MODELS:
+        if formation_energies.get(model_name):
+            results[model_name] = rmse(
+                formation_energies["ref"], formation_energies[model_name]
+            )
+            # print(f"FE1SIA RMSD for {model_name}: {results[model_name]:.6f} eV")
+            # print(formation_energies["ref"], formation_energies[model_name])
+        else:
+            results[model_name] = None
+    return results
+
+
+@pytest.fixture
+@build_table(
+    filename=OUT_PATH / "fe1sia_metrics_table.json",
+    metric_tooltips=DEFAULT_TOOLTIPS,
+    thresholds=DEFAULT_THRESHOLDS,
+    mlip_name_map=D3_MODEL_NAMES,
+)
+def metrics(fe_errors: dict[str, float]) -> dict[str, dict]:
+    """
+    Get all FE1SIA metrics.
+
+    Parameters
+    ----------
+    fe_errors
+        RMSE errors for all systems.
+
+    Returns
+    -------
+    dict[str, dict]
+        Metric names and values for all models.
+    """
+    return {
+        "RMSD": fe_errors,
+    }
+
+
+def test_fe1sia_analysis(metrics: dict[str, dict]) -> None:
+    """
+    Run FE1SIA analysis test.
+
+    Parameters
+    ----------
+    metrics
+        All FE1SIA metrics.
+    """
+    return
diff --git a/ml_peg/analysis/interstitial/FE1SIA/metrics.yml b/ml_peg/analysis/interstitial/FE1SIA/metrics.yml
new file mode 100644
index 000000000..5e59bd3af
--- /dev/null
+++ b/ml_peg/analysis/interstitial/FE1SIA/metrics.yml
@@ -0,0 +1,7 @@
+metrics:
+    RMSD:
+        good: 0.0
+        bad: 30.0
+        unit: eV
+        tooltip: "Root Mean Square Deviation of formation energies"
+        level_of_theory: DFT
diff --git a/ml_peg/analysis/interstitial/Relastab/analyse_Relastab.py b/ml_peg/analysis/interstitial/Relastab/analyse_Relastab.py
new file mode 100644
index 000000000..c7b9acd6b
--- /dev/null
+++ b/ml_peg/analysis/interstitial/Relastab/analyse_Relastab.py
@@ -0,0 +1,201 @@
+"""Analyse Relastab benchmark."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from ase.io import read, write
+import pytest
+from scipy.stats import kendalltau, spearmanr
+
+from ml_peg.analysis.utils.decorators import build_table, plot_parity
+from ml_peg.analysis.utils.utils import load_metrics_config
+from ml_peg.app import APP_ROOT
+from ml_peg.calcs import CALCS_ROOT
+from ml_peg.models.get_models import get_model_names
+from ml_peg.models.models import current_models
+
+MODELS = get_model_names(current_models)
+# D3_MODEL_NAMES = build_d3_name_map(MODELS)
+D3_MODEL_NAMES = {m: m for m in MODELS}
+CALC_PATH = CALCS_ROOT / "interstitial" / "Relastab" / "outputs"
+OUT_PATH = APP_ROOT / "data" / "interstitial" / "Relastab"
+
+METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
+DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
+    METRICS_CONFIG_PATH
+)
+
+
+def get_system_names() -> list[str]:
+    """
+    Get list of system names.
+
+    Returns
+    -------
+    list[str]
+        List of system names.
+    """
+    system_names = []
+
+    for model_name in MODELS:
+        model_dir = CALC_PATH / model_name
+        if model_dir.exists():
+            xyz_files = sorted(model_dir.glob("*.xyz"))
+            system_names = [xyz.stem for xyz in xyz_files]
+            if system_names:
+                break
+    return system_names
+
+
+@pytest.fixture
+@plot_parity(
+    filename=OUT_PATH / "figure_energy.json",
+    title="Relastab Energies (Shifted)",
+    x_label="Predicted Energy (Shifted) / eV",
+    y_label="Reference Energy (Shifted) / eV",
+    hoverdata={
+        "System": get_system_names(),
+    },
+)
+def stability_energies() -> dict[str, list]:
+    """
+    Get energies for Relastab systems.
+
+    Returns
+    -------
+    dict[str, list]
+        Dictionary of reference and predicted energies.
+    """
+    results = {"ref": []} | {mlip: [] for mlip in MODELS}
+    ref_stored = False
+
+    for model_name in MODELS:
+        model_dir = CALC_PATH / model_name
+        if not model_dir.exists():
+            continue
+
+        xyz_files = sorted(model_dir.glob("*.xyz"))
+
+        # Temporary lists to sort together
+        temp_data = []
+
+        for xyz_file in xyz_files:
+            atoms = read(xyz_file)
+            e_config = atoms.get_potential_energy()
+            ref_energy = atoms.info.get("ref", None)
+
+            if ref_energy is None:
+                continue
+
+            temp_data.append(
+                {
+                    "name": xyz_file.name,
+                    "atoms": atoms,
+                    "pred": e_config,
+                    "ref": ref_energy,
+                }
+            )
+
+            # Copy structure to output for App
+            structs_dir = OUT_PATH / model_name
+            structs_dir.mkdir(parents=True, exist_ok=True)
+            write(structs_dir / xyz_file.name, atoms)
+
+        # To make the plot nicer, mean shifting is usually good for total energies.
+
+        preds = [d["pred"] for d in temp_data]
+        refs = [d["ref"] for d in temp_data]
+
+        if not preds or not refs:
+            continue
+
+        # Shift energies by mean to make them comparable on plot
+        mean_pred = sum(preds) / len(preds)
+        mean_ref = sum(refs) / len(refs)
+
+        results[model_name] = [p - mean_pred for p in preds]
+
+        if not ref_stored:
+            results["ref"] = [r - mean_ref for r in refs]
+            ref_stored = True
+
+    return results
+
+
+@pytest.fixture
+def ranking_metrics(stability_energies) -> dict[str, dict[str, float]]:
+    """
+    Compute ranking metrics (KendallTau and Spearman).
+
+    Parameters
+    ----------
+    stability_energies
+        Dictionary of reference and predicted energies.
+
+    Returns
+    -------
+    dict[str, dict[str, float]]
+        Dictionary of ranking metrics per model.
+    """
+    results = {}
+
+    ref_values = stability_energies.get("ref", [])
+    if not ref_values:
+        return {}
+
+    for model_name in MODELS:
+        pred_values = stability_energies.get(model_name, [])
+        if not pred_values or len(pred_values) != len(ref_values):
+            results[model_name] = {"KendallTau": None, "Spearman": None}
+            continue
+
+        tau, _ = kendalltau(ref_values, pred_values)
+        spearman, _ = spearmanr(ref_values, pred_values)
+
+        results[model_name] = {"KendallTau": tau, "Spearman": spearman}
+
+    return results
+
+
+@pytest.fixture
+@build_table(
+    filename=OUT_PATH / "relastab_metrics_table.json",
+    metric_tooltips=DEFAULT_TOOLTIPS,
+    thresholds=DEFAULT_THRESHOLDS,
+    mlip_name_map=D3_MODEL_NAMES,
+)
+def metrics(ranking_metrics: dict[str, dict[str, float]]) -> dict[str, dict]:
+    """
+    Get all metrics.
+
+    Parameters
+    ----------
+    ranking_metrics
+        Dictionary of ranking metrics per model.
+
+    Returns
+    -------
+    dict[str, dict]
+        Metric names and values for all models.
+    """
+    # Reshape for build_table: {MetricName: {ModelName: Value}}
+    reshaped = {"KendallTau": {}, "Spearman": {}}
+
+    for model, scores in ranking_metrics.items():
+        reshaped["KendallTau"][model] = scores.get("KendallTau")
+        reshaped["Spearman"][model] = scores.get("Spearman")
+
+    return reshaped
+
+
+def test_relastab_analysis(metrics: dict[str, dict]) -> None:
+    """
+    Run Relastab analysis test.
+
+    Parameters
+    ----------
+    metrics
+        All Relastab metrics.
+    """
+    return
diff --git a/ml_peg/analysis/interstitial/Relastab/metrics.yml b/ml_peg/analysis/interstitial/Relastab/metrics.yml
new file mode 100644
index 000000000..3277a17cc
--- /dev/null
+++ b/ml_peg/analysis/interstitial/Relastab/metrics.yml
@@ -0,0 +1,13 @@
+metrics:
+    KendallTau:
+        good: 1.0
+        bad: 0.0
+        unit: au
+        tooltip: "Kendall rank correlation coefficient (1.0 is perfect agreement)"
+        level_of_theory: DFT
+    Spearman:
+        good: 1.0
+        bad: 0.0
+        unit: au
+        tooltip: "Spearman rank correlation coefficient"
+        level_of_theory: DFT
diff --git a/ml_peg/app/interstitial/FE1SIA/app_FE1SIA.py b/ml_peg/app/interstitial/FE1SIA/app_FE1SIA.py
new file mode 100644
index 000000000..3f274d22b
--- /dev/null
+++ b/ml_peg/app/interstitial/FE1SIA/app_FE1SIA.py
@@ -0,0 +1,93 @@
+"""Run FE1SIA app."""
+
+from __future__ import annotations
+
+from dash import Dash
+from dash.html import Div
+
+from ml_peg.app import APP_ROOT
+from ml_peg.app.base_app import BaseApp
+from ml_peg.app.utils.build_callbacks import (
+    plot_from_table_column,
+    struct_from_scatter,
+)
+from ml_peg.app.utils.load import read_plot
+from ml_peg.models.get_models import get_model_names
+from ml_peg.models.models import current_models
+
+# Get all models
+MODELS = get_model_names(current_models)
+BENCHMARK_NAME = "FE1SIA Formation Energies"
+# Update this URL when documentation is added
+DOCS_URL = (
+    "https://ddmms.github.io/ml-peg/user_guide/benchmarks/interstitial.html#fe1sia"
+)
+DATA_PATH = APP_ROOT / "data" / "interstitial" / "FE1SIA"
+
+
+class FE1SIAApp(BaseApp):
+    """FE1SIA benchmark app layout and callbacks."""
+
+    def register_callbacks(self) -> None:
+        """Register callbacks to app."""
+        scatter = read_plot(
+            DATA_PATH / "figure_energy.json",
+            id=f"{BENCHMARK_NAME}-figure",
+        )
+
+        # Assets dir will be parent directory - individual files for each system
+        # Assuming the first model has all structures
+        structs_dir = DATA_PATH / MODELS[0]
+        # Sort files to match the order in the scatter plot (usually sorted by filename)
+        # Note: In analysis we sorted by glob which is lexicographical.
+        structs = [
+            f"assets/interstitial/FE1SIA/{MODELS[0]}/{struct_file.name}"
+            for struct_file in sorted(structs_dir.glob("*.xyz"))
+        ]
+
+        plot_from_table_column(
+            table_id=self.table_id,
+            plot_id=f"{BENCHMARK_NAME}-figure-placeholder",
+            column_to_plot={"RMSD": scatter},
+        )
+
+        struct_from_scatter(
+            scatter_id=f"{BENCHMARK_NAME}-figure",
+            struct_id=f"{BENCHMARK_NAME}-struct-placeholder",
+            structs=structs,
+            mode="struct",
+        )
+
+
+def get_app() -> FE1SIAApp:
+    """
+    Get FE1SIA benchmark app layout and callback registration.
+
+    Returns
+    -------
+    FE1SIAApp
+        Benchmark layout and callback registration.
+    """
+    return FE1SIAApp(
+        name=BENCHMARK_NAME,
+        description="Formation energies of single interstitial atoms.",
+        docs_url=DOCS_URL,
+        table_path=DATA_PATH / "fe1sia_metrics_table.json",
+        extra_components=[
+            Div(id=f"{BENCHMARK_NAME}-figure-placeholder"),
+            Div(id=f"{BENCHMARK_NAME}-struct-placeholder"),
+        ],
+    )
+
+
+if __name__ == "__main__":
+    # Create Dash app
+    full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent)
+
+    # Construct layout and register callbacks
+    fe1sia_app = get_app()
+    full_app.layout = fe1sia_app.layout
+    fe1sia_app.register_callbacks()
+
+    # Run app
+    full_app.run(port=8055, debug=True)
diff --git a/ml_peg/app/interstitial/Relastab/app_Relastab.py b/ml_peg/app/interstitial/Relastab/app_Relastab.py
new file mode 100644
index 000000000..5d5a06b0f
--- /dev/null
+++ b/ml_peg/app/interstitial/Relastab/app_Relastab.py
@@ -0,0 +1,90 @@
+"""Run Relastab app."""
+
+from __future__ import annotations
+
+from dash import Dash
+from dash.html import Div
+
+from ml_peg.app import APP_ROOT
+from ml_peg.app.base_app import BaseApp
+from ml_peg.app.utils.build_callbacks import (
+    plot_from_table_column,
+    struct_from_scatter,
+)
+from ml_peg.app.utils.load import read_plot
+from ml_peg.models.get_models import get_model_names
+from ml_peg.models.models import current_models
+
+# Get all models
+MODELS = get_model_names(current_models)
+BENCHMARK_NAME = "Relastab Relative Stability"
+DOCS_URL = (
+    "https://ddmms.github.io/ml-peg/user_guide/benchmarks/interstitial.html#relastab"
+)
+DATA_PATH = APP_ROOT / "data" / "interstitial" / "Relastab"
+
+
+class RelastabApp(BaseApp):
+    """Relastab benchmark app layout and callbacks."""
+
+    def register_callbacks(self) -> None:
+        """Register callbacks to app."""
+        scatter = read_plot(
+            DATA_PATH / "figure_energy.json",
+            id=f"{BENCHMARK_NAME}-figure",
+        )
+
+        # Assuming first model has structure files
+        structs_dir = DATA_PATH / MODELS[0]
+        # Sort glob to match analysis order
+        structs = [
+            f"assets/interstitial/Relastab/{MODELS[0]}/{struct_file.name}"
+            for struct_file in sorted(structs_dir.glob("*.xyz"))
+        ]
+
+        plot_from_table_column(
+            table_id=self.table_id,
+            plot_id=f"{BENCHMARK_NAME}-figure-placeholder",
+            column_to_plot={"KendallTau": scatter, "Spearman": scatter},
+        )
+
+        struct_from_scatter(
+            scatter_id=f"{BENCHMARK_NAME}-figure",
+            struct_id=f"{BENCHMARK_NAME}-struct-placeholder",
+            structs=structs,
+            mode="struct",
+        )
+
+
+def get_app() -> RelastabApp:
+    """
+    Get Relastab benchmark app.
+
+    Returns
+    -------
+    RelastabApp
+        Benchmark layout and callback registration.
+    """
+    return RelastabApp(
+        name=BENCHMARK_NAME,
+        description="Relative stability ranking of interstitial Fe configurations.",
+        docs_url=DOCS_URL,
+        table_path=DATA_PATH / "relastab_metrics_table.json",
+        extra_components=[
+            Div(id=f"{BENCHMARK_NAME}-figure-placeholder"),
+            Div(id=f"{BENCHMARK_NAME}-struct-placeholder"),
+        ],
+    )
+
+
+if __name__ == "__main__":
+    # Create Dash app
+    full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent)
+
+    # Construct layout and register callbacks
+    rel_app = get_app()
+    full_app.layout = rel_app.layout
+    rel_app.register_callbacks()
+
+    # Run app
+    full_app.run(port=8055, debug=True)
diff --git a/ml_peg/app/interstitial/interstitial.yml b/ml_peg/app/interstitial/interstitial.yml
new file mode 100644
index 000000000..5dda9652f
--- /dev/null
+++ b/ml_peg/app/interstitial/interstitial.yml
@@ -0,0 +1,2 @@
+    title: Interstitial
+    description: Formation energy of interstitials and relative stability.
diff --git a/ml_peg/calcs/interstitial/FE1SIA/calc_FE1SIA.py b/ml_peg/calcs/interstitial/FE1SIA/calc_FE1SIA.py
new file mode 100644
index 000000000..fbacf0948
--- /dev/null
+++ b/ml_peg/calcs/interstitial/FE1SIA/calc_FE1SIA.py
@@ -0,0 +1,104 @@
+"""Run calculations for FE1SIA (formation energy of 1 SIA) tests."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+from ase.io import read, write
+import pytest
+
+from ml_peg.calcs.utils.utils import download_s3_data
+from ml_peg.models.get_models import load_models
+from ml_peg.models.models import current_models
+
+MODELS = load_models(current_models)
+
+DATA_PATH = Path(__file__).parent / "data"
+OUT_PATH = Path(__file__).parent / "outputs"
+
+# Reference energy for bulk
+# E_BULK = -1054.46260251
+
+
+@pytest.mark.parametrize("mlip", MODELS.items())
+def test_fe1sia(mlip: tuple[str, Any]) -> None:
+    """
+    Run FE1SIA test.
+
+    Parameters
+    ----------
+    mlip
+        Name of model use and model to get calculator.
+    """
+    model_name, model = mlip
+    calc = model.get_calculator()
+
+    data_path = download_s3_data(
+        key="inputs/interstitial/FE1SIA/DB.zip",
+        filename="DB.zip",
+    )
+    formation_energy_dir = data_path / "DB" / "formation_energy"
+
+    # Process all *.poscar files
+    poscar_files = sorted(formation_energy_dir.glob("*.poscar"))
+
+    with open(formation_energy_dir / "ref.poscar") as f:
+        line1 = f.readline()
+        tokens = line1.split()
+        try:
+            energy_bulk = float(tokens[4])
+        except (ValueError, IndexError):
+            print("Skipping ref.poscar: distinct energy value not found in header.")
+            energy_bulk = 0.0  # Fallback or break
+
+        # Read n_bulk from line 6. Skip lines 2-5 (4 lines)
+        for _ in range(4):
+            f.readline()
+        line6 = f.readline()
+        n_bulk = sum(float(x) for x in line6.split())
+
+    for poscar_path in poscar_files:
+        # Parse reference energy E from the first line of the POSCAR file
+        # Format: 111 1 Fe 26 E ...
+        with open(poscar_path) as f:
+            line1 = f.readline()
+            tokens = line1.split()
+            try:
+                energy_ref_raw = float(tokens[4])
+            except (ValueError, IndexError):
+                print(
+                    f"Skipping {poscar_path.name}: distinct energy value not found in "
+                    f"header."
+                )
+                continue
+
+            # Read n_config from line 6. Skip lines 2-5 (4 lines)
+            for _ in range(4):
+                f.readline()
+            line6 = f.readline()
+            n_config = sum(float(x) for x in line6.split())
+
+        # Calculate reference formation energy
+        # Formula: E - (N_config / N_bulk) * E_bulk
+        # For ref.poscar (bulk), formation energy is 0
+        if poscar_path.name == "ref.poscar":
+            ref_formation_energy = 0.0
+        else:
+            ref_formation_energy = energy_ref_raw - (n_config / n_bulk) * energy_bulk
+
+        # Read structure
+        atoms = read(poscar_path, format="vasp")
+
+        # Run calculation
+        atoms.calc = calc
+        atoms.get_potential_energy()
+
+        # Store reference info and system name
+        atoms.info["ref"] = ref_formation_energy
+        atoms.info["system"] = poscar_path.stem
+
+        # Write outputs
+        write_dir = OUT_PATH / model_name
+        write_dir.mkdir(parents=True, exist_ok=True)
+        write(write_dir / f"{poscar_path.stem}.xyz", atoms)
diff --git a/ml_peg/calcs/interstitial/Relastab/calc_Relastab.py b/ml_peg/calcs/interstitial/Relastab/calc_Relastab.py
new file mode 100644
index 000000000..63e9e6c23
--- /dev/null
+++ b/ml_peg/calcs/interstitial/Relastab/calc_Relastab.py
@@ -0,0 +1,67 @@
+"""Calculate Relastab benchmark."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+from ase.io import read, write
+import pytest
+
+# from ml_peg.calcs import CALCS_ROOT
+from ml_peg.calcs.utils.utils import download_s3_data
+from ml_peg.models.get_models import load_models
+from ml_peg.models.models import current_models
+
+MODELS = load_models(current_models)
+
+DATA_PATH = Path(__file__).parent / "data"
+OUT_PATH = Path(__file__).parent / "outputs"
+
+
+@pytest.mark.parametrize("mlip", MODELS.items())
+def test_relastab(mlip: tuple[str, Any]) -> None:
+    """
+    Run Relastab calculations.
+
+    Parameters
+    ----------
+    mlip
+        Name of model use and model to get calculator.
+    """
+    model_name, model = mlip
+    calc = model.get_calculator()
+
+    data_path = download_s3_data(
+        key="inputs/interstitial/relative_stability/DB.zip",
+        filename="DB.zip",
+    )
+    relative_stability_dir = data_path / "DB" / "relative_stability"
+
+    # Process all *.poscar files
+    poscar_files = sorted(relative_stability_dir.glob("*.poscar"))
+
+    for poscar in poscar_files:
+        # Read structure
+        atoms = read(poscar, format="vasp")
+
+        # Extract ref energy
+        with open(poscar) as f:
+            header = f.readline().strip()
+            try:
+                ref_energy = float(header.split()[4])
+                atoms.info["ref"] = ref_energy
+            except (IndexError, ValueError) as e:
+                print(f"Warning: Could not extract energy from header '{header}': {e}")
+                atoms.info["ref"] = None
+
+        # Calculate
+        atoms.calc = calc
+
+        atoms.get_potential_energy()
+        atoms.info["system"] = poscar.stem
+
+        # Write outputs
+        write_dir = OUT_PATH / model_name
+        write_dir.mkdir(parents=True, exist_ok=True)
+        write(write_dir / f"{poscar.stem}.xyz", atoms)
diff --git a/uv.lock b/uv.lock
index f828ab629..b1665744d 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2603,6 +2603,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/35/a8/365059bbcd4572cbc41de17fd5b682be5868b218c3c5479071865cab9078/entrypoints-0.4-py3-none-any.whl", hash = "sha256:f174b5ff827504fd3cd97cc3f8649f3693f51538c7e4bdf3ef002c8429d42f9f", size = 5294, upload-time = "2022-02-02T21:30:26.024Z" },
 ]
 
+[[package]]
+name = "et-xmlfile"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d3/38/af70d7ab1ae9d4da450eeec1fa3918940a5fafb9055e934af8d6eb0c2313/et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54", size = 17234, upload-time = "2024-10-25T17:25:40.039Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059, upload-time = "2024-10-25T17:25:39.051Z" },
+]
+
 [[package]]
 name = "eventlet"
 version = "0.40.4"
@@ -5998,6 +6007,7 @@ dependencies = [
     { name = "mdanalysis", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-6-ml-peg-grace' and extra == 'extra-6-ml-peg-mattersim') or (extra == 'extra-6-ml-peg-grace' and extra == 'extra-6-ml-peg-uma') or (extra == 'extra-6-ml-peg-mace' and extra == 'extra-6-ml-peg-mattersim') or (extra == 'extra-6-ml-peg-mace' and extra == 'extra-6-ml-peg-uma')" },
     { name = "mdanalysis", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-6-ml-peg-grace' and extra == 'extra-6-ml-peg-mattersim') or (extra == 'extra-6-ml-peg-grace' and extra == 'extra-6-ml-peg-uma') or (extra == 'extra-6-ml-peg-mace' and extra == 'extra-6-ml-peg-mattersim') or (extra == 'extra-6-ml-peg-mace' and extra == 'extra-6-ml-peg-uma')" },
     { name = "mlipx" },
+    { name = "openpyxl" },
     { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-6-ml-peg-grace' and extra == 'extra-6-ml-peg-mattersim') or (extra == 'extra-6-ml-peg-grace' and extra == 'extra-6-ml-peg-uma') or (extra == 'extra-6-ml-peg-mace' and extra == 'extra-6-ml-peg-mattersim') or (extra == 'extra-6-ml-peg-mace' and extra == 'extra-6-ml-peg-uma')" },
     { name = "scikit-learn", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-6-ml-peg-grace' and extra == 'extra-6-ml-peg-mattersim') or (extra == 'extra-6-ml-peg-grace' and extra == 'extra-6-ml-peg-uma') or (extra == 'extra-6-ml-peg-mace' and extra == 'extra-6-ml-peg-mattersim') or (extra == 'extra-6-ml-peg-mace' and extra == 'extra-6-ml-peg-uma')" },
     { name = "tqdm" },
@@ -6078,6 +6088,7 @@ requires-dist = [
     { name = "mattersim", marker = "extra == 'mattersim'", specifier = "==1.2.0" },
     { name = "mdanalysis" },
     { name = "mlipx", specifier = ">=0.1.5,<0.2" },
+    { name = "openpyxl" },
     { name = "orb-models", marker = "python_full_version < '3.13' and sys_platform != 'win32' and extra == 'orb'", specifier = "==0.5.5" },
     { name = "pet-mad", marker = "sys_platform != 'win32' and extra == 'pet-mad'", specifier = "==1.4.4" },
     { name = "scikit-learn", specifier = ">=1.7.1" },
@@ -7337,6 +7348,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e3/94/1843518e420fa3ed6919835845df698c7e27e183cb997394e4a670973a65/omegaconf-2.3.0-py3-none-any.whl", hash = "sha256:7b4df175cdb08ba400f45cae3bdcae7ba8365db4d165fc65fd04b050ab63b46b", size = 79500, upload-time = "2022-12-08T20:59:19.686Z" },
 ]
 
+[[package]]
+name = "openpyxl"
+version = "3.1.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "et-xmlfile" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/3d/f9/88d94a75de065ea32619465d2f77b29a0469500e99012523b91cc4141cd1/openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050", size = 186464, upload-time = "2024-06-28T14:03:44.161Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c0/da/977ded879c29cbd04de313843e76868e6e13408a94ed6b987245dc7c8506/openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2", size = 250910, upload-time = "2024-06-28T14:03:41.161Z" },
+]
+
 [[package]]
 name = "opt-einsum"
 version = "3.4.0"