From 31d192151558cb5542ba81410f5f2268f0a7fd5b Mon Sep 17 00:00:00 2001
From: Domantas Kuryla <dk584@icepc16>
Date: Thu, 19 Feb 2026 14:56:55 +0000
Subject: [PATCH 1/8] 3dTMV calc

---
 ml_peg/calcs/tm_complexes/3dTMV/calc_3dTMV.py | 161 ++++++++++++++++++
 1 file changed, 161 insertions(+)
 create mode 100644 ml_peg/calcs/tm_complexes/3dTMV/calc_3dTMV.py

diff --git a/ml_peg/calcs/tm_complexes/3dTMV/calc_3dTMV.py b/ml_peg/calcs/tm_complexes/3dTMV/calc_3dTMV.py
new file mode 100644
index 00000000..da903ec9
--- /dev/null
+++ b/ml_peg/calcs/tm_complexes/3dTMV/calc_3dTMV.py
@@ -0,0 +1,161 @@
+"""
+Compute the 3dTMV dataset for transition metal complex vertical ionization energies.
+
+Journal of Chemical Theory and Computation 2023 19 (18), 6208-6225
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+from ase import units
+from ase.io import read, write
+import pytest
+from tqdm import tqdm
+
+from ml_peg.calcs.utils.utils import download_s3_data
+from ml_peg.models.get_models import load_models
+from ml_peg.models.models import current_models
+
+MODELS = load_models(current_models)
+
+KCAL_TO_EV = units.kcal / units.mol
+
+OUT_PATH = Path(__file__).parent / "outputs"
+
+# Molecular data from Main Paper Table 1
+MOLECULAR_DATA = {
+    1: {"charge_ox": 1, "charge_in": 0, "mult_ox": 2, "mult_in": 1, "subset": "SR"},
+    2: {"charge_ox": 1, "charge_in": 0, "mult_ox": 1, "mult_in": 2, "subset": "SR"},
+    3: {"charge_ox": 1, "charge_in": 0, "mult_ox": 4, "mult_in": 3, "subset": "SR"},
+    4: {"charge_ox": 1, "charge_in": 0, "mult_ox": 2, "mult_in": 1, "subset": "SR"},
+    5: {"charge_ox": 1, "charge_in": 0, "mult_ox": 2, "mult_in": 1, "subset": "SR"},
+    6: {"charge_ox": 2, "charge_in": 1, "mult_ox": 2, "mult_in": 1, "subset": "SR"},
+    7: {"charge_ox": 1, "charge_in": 0, "mult_ox": 2, "mult_in": 1, "subset": "SR"},
+    8: {"charge_ox": 2, "charge_in": 1, "mult_ox": 2, "mult_in": 1, "subset": "SR"},
+    9: {"charge_ox": 2, "charge_in": 1, "mult_ox": 2, "mult_in": 1, "subset": "SR"},
+    10: {"charge_ox": 2, "charge_in": 1, "mult_ox": 2, "mult_in": 1, "subset": "SR"},
+    11: {"charge_ox": 2, "charge_in": 1, "mult_ox": 1, "mult_in": 2, "subset": "SR"},
+    12: {"charge_ox": 2, "charge_in": 1, "mult_ox": 1, "mult_in": 2, "subset": "SR"},
+    13: {"charge_ox": 1, "charge_in": 0, "mult_ox": 1, "mult_in": 2, "subset": "SR/MR"},
+    14: {"charge_ox": 1, "charge_in": 0, "mult_ox": 2, "mult_in": 3, "subset": "SR/MR"},
+    15: {"charge_ox": 1, "charge_in": 0, "mult_ox": 2, "mult_in": 1, "subset": "SR/MR"},
+    16: {"charge_ox": 1, "charge_in": 0, "mult_ox": 2, "mult_in": 1, "subset": "SR/MR"},
+    17: {"charge_ox": 1, "charge_in": 0, "mult_ox": 2, "mult_in": 1, "subset": "SR/MR"},
+    18: {"charge_ox": 1, "charge_in": 0, "mult_ox": 2, "mult_in": 1, "subset": "SR/MR"},
+    19: {"charge_ox": 2, "charge_in": 1, "mult_ox": 2, "mult_in": 1, "subset": "SR/MR"},
+    20: {"charge_ox": 1, "charge_in": 0, "mult_ox": 3, "mult_in": 2, "subset": "SR/MR"},
+    21: {"charge_ox": 1, "charge_in": 0, "mult_ox": 3, "mult_in": 2, "subset": "SR/MR"},
+    22: {"charge_ox": 1, "charge_in": 0, "mult_ox": 2, "mult_in": 1, "subset": "SR/MR"},
+    23: {"charge_ox": 1, "charge_in": 0, "mult_ox": 2, "mult_in": 1, "subset": "MR"},
+    24: {"charge_ox": 1, "charge_in": 0, "mult_ox": 3, "mult_in": 4, "subset": "MR"},
+    25: {"charge_ox": 1, "charge_in": 0, "mult_ox": 3, "mult_in": 6, "subset": "MR"},
+    26: {"charge_ox": 1, "charge_in": 0, "mult_ox": 2, "mult_in": 1, "subset": "MR"},
+    27: {"charge_ox": 0, "charge_in": -1, "mult_ox": 2, "mult_in": 3, "subset": "MR"},
+    28: {"charge_ox": 0, "charge_in": -1, "mult_ox": 1, "mult_in": 2, "subset": "MR"},
+}
+
+# ph-AFQMC reference IPs from SM Table S9 (kcal/mol)
+REFERENCE_IES = {
+    # SR subset (1-12)
+    1: 188.4,
+    2: 158.3,
+    3: 119.6,
+    4: 152.3,
+    5: 142.2,
+    6: 315.9,
+    7: 191.1,
+    8: 259.6,
+    9: 276.2,
+    10: 284.1,
+    11: 198.5,
+    12: 230.3,
+    # SR/MR subset (13-22)
+    13: 120.9,
+    14: 148.1,
+    15: 140.4,
+    16: 164.1,
+    17: 130.9,
+    18: 136.3,
+    19: 300.7,
+    20: 186.4,
+    21: 125.3,
+    22: 161.2,
+    # MR subset (23-28)
+    23: 198.9,
+    24: 166.0,
+    25: 215.8,
+    26: 192.9,
+    27: 68.6,
+    28: 43.6,
+}
+
+
+def get_atoms(data_path, complex_id: int):
+    """
+    Get the atoms object with charge and spin.
+
+    Parameters
+    ----------
+    data_path
+        Path to the data.
+    complex_id
+        Identifier of the complex, from 1 to 28.
+
+    Returns
+    -------
+    Atoms
+        Atoms object of the system.
+    """
+    return read(data_path / str(complex_id) / "struc.xyz")
+
+
+@pytest.mark.parametrize("mlip", MODELS.items())
+def test_3dtmv(mlip: tuple[str, Any]) -> None:
+    """
+    Run 3dTMV benchmark.
+
+    Parameters
+    ----------
+    mlip
+        Name of model use and model to get calculator.
+    """
+    model_name, model = mlip
+    calc = model.get_calculator()
+
+    data_path = (
+        download_s3_data(
+            filename="3dTMV.zip",
+            key="inputs/tm_complexes/3dTMV/3dTMV.zip",
+        )
+        / "3dTMV"
+    )
+    # Read in data and attach calculator
+    calc = model.get_calculator()
+    # Add D3 calculator for this test
+    calc = model.add_d3_calculator(calc)
+
+    for complex_id in tqdm(range(1, 29)):
+        atoms = get_atoms(data_path, complex_id)
+        model_ion_energy = 0
+        # Get oxidized complex energy
+        atoms.info["charge"] = MOLECULAR_DATA[complex_id]["charge_ox"]
+        atoms.info["spin"] = MOLECULAR_DATA[complex_id]["mult_ox"]
+        atoms.calc = calc
+        model_ion_energy += atoms.get_potential_energy()
+        # Get initial complex energy
+        atoms.info["charge"] = MOLECULAR_DATA[complex_id]["charge_in"]
+        atoms.info["spin"] = MOLECULAR_DATA[complex_id]["mult_in"]
+        atoms.calc = calc
+        model_ion_energy -= atoms.get_potential_energy()
+
+        atoms.info.update(
+            {
+                "model_ionization_energy": model_ion_energy,
+                "ref_ionization_energy": REFERENCE_IES[complex_id] * KCAL_TO_EV,
+            }
+        )
+        write_dir = OUT_PATH / model_name
+        write_dir.mkdir(parents=True, exist_ok=True)
+        write(write_dir / f"{complex_id}.xyz", atoms)

From c1c007058ad1b59a12d9ee0bf9bfac7d02c22e4a Mon Sep 17 00:00:00 2001
From: Domantas Kuryla <dk584@icepc16>
Date: Thu, 19 Feb 2026 14:58:27 +0000
Subject: [PATCH 2/8] 3dTMV analysis

---
 .../tm_complexes/3dTMV/analyse_3dTMV.py       | 313 ++++++++++++++++++
 .../analysis/tm_complexes/3dTMV/metrics.yml   |  25 ++
 2 files changed, 338 insertions(+)
 create mode 100644 ml_peg/analysis/tm_complexes/3dTMV/analyse_3dTMV.py
 create mode 100644 ml_peg/analysis/tm_complexes/3dTMV/metrics.yml

diff --git a/ml_peg/analysis/tm_complexes/3dTMV/analyse_3dTMV.py b/ml_peg/analysis/tm_complexes/3dTMV/analyse_3dTMV.py
new file mode 100644
index 00000000..e12602c9
--- /dev/null
+++ b/ml_peg/analysis/tm_complexes/3dTMV/analyse_3dTMV.py
@@ -0,0 +1,313 @@
+"""Analyse the 3dTMV benchmark."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from ase import units
+from ase.io import read, write
+import pytest
+
+from ml_peg.analysis.utils.decorators import (
+    build_table,
+    plot_parity,
+)
+from ml_peg.analysis.utils.utils import (
+    build_d3_name_map,
+    load_metrics_config,
+    mae,
+)
+from ml_peg.app import APP_ROOT
+from ml_peg.calcs import CALCS_ROOT
+from ml_peg.models.get_models import load_models
+from ml_peg.models.models import current_models
+
+MODELS = load_models(current_models)
+D3_MODEL_NAMES = build_d3_name_map(MODELS)
+
+EV_TO_KCAL = units.mol / units.kcal
+CALC_PATH = CALCS_ROOT / "tm_complexes" / "3dTMV" / "outputs"
+OUT_PATH = APP_ROOT / "data" / "tm_complexes" / "3dTMV"
+
+METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
+DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
+    METRICS_CONFIG_PATH
+)
+
+SUBSETS = {
+    1: "SR",
+    2: "SR",
+    3: "SR",
+    4: "SR",
+    5: "SR",
+    6: "SR",
+    7: "SR",
+    8: "SR",
+    9: "SR",
+    10: "SR",
+    11: "SR",
+    12: "SR",
+    13: "SR/MR",
+    14: "SR/MR",
+    15: "SR/MR",
+    16: "SR/MR",
+    17: "SR/MR",
+    18: "SR/MR",
+    19: "SR/MR",
+    20: "SR/MR",
+    21: "SR/MR",
+    22: "SR/MR",
+    23: "MR",
+    24: "MR",
+    25: "MR",
+    26: "MR",
+    27: "MR",
+    28: "MR",
+}
+
+
+def labels():
+    """
+    Get complex ids.
+
+    Returns
+    -------
+    list
+        IDs of the complexes.
+    """
+    return list(range(1, 29))
+
+
+@pytest.fixture
+def interaction_energies() -> dict[str, list]:
+    """
+    Get interaction energies for all systems.
+
+    Returns
+    -------
+    dict[str, list]
+        Dictionary of all reference and predicted interaction energies.
+    """
+    results = {"ref": []} | {mlip: [] for mlip in MODELS}
+
+    ref_stored = False
+
+    for model_name in MODELS:
+        for label in range(1, 29):
+            atoms = read(CALC_PATH / model_name / f"{label}.xyz")
+            if not ref_stored:
+                results["ref"].append(atoms.info["ref_ionization_energy"] * EV_TO_KCAL)
+
+            results[model_name].append(
+                atoms.info["model_ionization_energy"] * EV_TO_KCAL
+            )
+
+            # Write structures for app
+            structs_dir = OUT_PATH / model_name
+            structs_dir.mkdir(parents=True, exist_ok=True)
+            write(structs_dir / f"{label}.xyz", atoms)
+
+        ref_stored = True
+    return results
+
+
+@pytest.fixture
+@plot_parity(
+    filename=OUT_PATH / "figure_3dtmv.json",
+    title="Ionization energies",
+    x_label="Predicted ionization energy / kcal/mol",
+    y_label="Reference ionization energy / kcal/mol",
+    hoverdata={
+        "Labels": labels(),
+    },
+)
+def ionization_energies() -> dict[str, list]:
+    """
+    Get ionization energies for all systems.
+
+    Returns
+    -------
+    dict[str, list]
+        Dictionary of all reference and predicted energies.
+    """
+    results = {"ref": []} | {mlip: [] for mlip in MODELS}
+    ref_stored = False
+
+    for model_name in MODELS:
+        for complex_id in labels():
+            atoms = read(CALC_PATH / model_name / f"{complex_id}.xyz")
+            model_ion_energy = atoms.info["model_ionization_energy"]
+            ref_ion_energy = atoms.info["ref_ionization_energy"]
+            # Write structures for app
+            structs_dir = OUT_PATH / model_name
+            structs_dir.mkdir(parents=True, exist_ok=True)
+            write(structs_dir / f"{complex_id}.xyz", atoms)
+            results[model_name].append(model_ion_energy * EV_TO_KCAL)
+            if not ref_stored:
+                results["ref"].append(ref_ion_energy * EV_TO_KCAL)
+        ref_stored = True
+    return results
+
+
+@pytest.fixture
+def sr_mae(interaction_energies) -> dict[str, float]:
+    """
+    Get mean absolute error for SR subset.
+
+    Parameters
+    ----------
+    interaction_energies
+        Dictionary of reference and predicted energies.
+
+    Returns
+    -------
+    dict[str, float]
+        Dictionary of predicted energy errors for all models.
+    """
+    results = {}
+
+    for model_name in MODELS:
+        subsampled_ref_e = [
+            interaction_energies["ref"][i] for i in labels() if SUBSETS[i] == "SR"
+        ]
+        subsampled_model_e = [
+            interaction_energies[model_name][i] for i in labels() if SUBSETS[i] == "SR"
+        ]
+        results[model_name] = mae(subsampled_ref_e, subsampled_model_e)
+    return results
+
+
+@pytest.fixture
+def mr_mae(interaction_energies) -> dict[str, float]:
+    """
+    Get mean absolute error for MR subset.
+
+    Parameters
+    ----------
+    interaction_energies
+        Dictionary of reference and predicted energies.
+
+    Returns
+    -------
+    dict[str, float]
+        Dictionary of predicted energy errors for all models.
+    """
+    results = {}
+
+    for model_name in MODELS:
+        subsampled_ref_e = [
+            interaction_energies["ref"][i - 1] for i in labels() if SUBSETS[i] == "MR"
+        ]
+        subsampled_model_e = [
+            interaction_energies[model_name][i - 1]
+            for i in labels()
+            if SUBSETS[i] == "MR"
+        ]
+        results[model_name] = mae(subsampled_ref_e, subsampled_model_e)
+    return results
+
+
+@pytest.fixture
+def sr_mr_mae(interaction_energies) -> dict[str, float]:
+    """
+    Get mean absolute error for SR/MR subset.
+
+    Parameters
+    ----------
+    interaction_energies
+        Dictionary of reference and predicted energies.
+
+    Returns
+    -------
+    dict[str, float]
+        Dictionary of predicted energy errors for all models.
+    """
+    results = {}
+
+    for model_name in MODELS:
+        subsampled_ref_e = [
+            interaction_energies["ref"][i] for i in labels() if SUBSETS[i] == "SR/MR"
+        ]
+        subsampled_model_e = [
+            interaction_energies[model_name][i]
+            for i in labels()
+            if SUBSETS[i] == "SR/MR"
+        ]
+        results[model_name] = mae(subsampled_ref_e, subsampled_model_e)
+    return results
+
+
+@pytest.fixture
+def total_mae(interaction_energies) -> dict[str, float]:
+    """
+    Get mean absolute error for all conmplexes.
+
+    Parameters
+    ----------
+    interaction_energies
+        Dictionary of reference and predicted energies.
+
+    Returns
+    -------
+    dict[str, float]
+        Dictionary of predicted energy errors for all models.
+    """
+    results = {}
+
+    for model_name in MODELS:
+        results[model_name] = mae(
+            interaction_energies["ref"], interaction_energies[model_name]
+        )
+    return results
+
+
+@pytest.fixture
+@build_table(
+    filename=OUT_PATH / "3dtmv_metrics_table.json",
+    metric_tooltips=DEFAULT_TOOLTIPS,
+    thresholds=DEFAULT_THRESHOLDS,
+    mlip_name_map=D3_MODEL_NAMES,
+)
+def metrics(
+    total_mae: dict[str, float],
+    sr_mae: dict[str, float],
+    mr_mae: dict[str, float],
+    sr_mr_mae: dict[str, float],
+) -> dict[str, dict]:
+    """
+    Get all metrics.
+
+    Parameters
+    ----------
+    total_mae
+        Mean absolute errors for all models, all complexes.
+    sr_mae
+        Mean absolute errors for all models, single-reference complexes.
+    mr_mae
+        Mean absolute errors for all models, multi-reference complexes.
+    sr_mr_mae
+        Mean absolute errors for all models, intermediate complexes.
+
+    Returns
+    -------
+    dict[str, dict]
+        Metric names and values for all models.
+    """
+    return {
+        "Overall MAE": total_mae,
+        "SR MAE": sr_mae,
+        "MR MAE": mr_mae,
+        "SR/MR MAE": sr_mr_mae,
+    }
+
+
+def test_3dtmv(metrics: dict[str, dict]) -> None:
+    """
+    Run 3dTMV test.
+
+    Parameters
+    ----------
+    metrics
+        All new benchmark metric names and dictionary of values for each model.
+    """
+    return
diff --git a/ml_peg/analysis/tm_complexes/3dTMV/metrics.yml b/ml_peg/analysis/tm_complexes/3dTMV/metrics.yml
new file mode 100644
index 00000000..181f5fca
--- /dev/null
+++ b/ml_peg/analysis/tm_complexes/3dTMV/metrics.yml
@@ -0,0 +1,25 @@
+metrics:
+  Overall MAE:
+    good: 0.0
+    bad: 50
+    unit: kcal/mol
+    tooltip: Mean Absolute Error for all systems
+    level_of_theory: ph-AFQMC
+  SR MAE:
+    good: 0.0
+    bad: 50
+    unit: kcal/mol
+    tooltip: Mean Absolute Error for all systems
+    level_of_theory: ph-AFQMC
+  MR MAE:
+    good: 0.0
+    bad: 50
+    unit: kcal/mol
+    tooltip: Mean Absolute Error for all systems
+    level_of_theory: ph-AFQMC
+  SR/MR MAE:
+    good: 0.0
+    bad: 50
+    unit: kcal/mol
+    tooltip: Mean Absolute Error for all systems
+    level_of_theory: ph-AFQMC

From af1aa3ae0e74bf414a637c748f4f632bbf63f100 Mon Sep 17 00:00:00 2001
From: Domantas Kuryla <dk584@icepc16>
Date: Thu, 19 Feb 2026 14:59:10 +0000
Subject: [PATCH 3/8] 3dTMV app

---
 ml_peg/app/tm_complexes/3dTMV/app_3dTMV.py | 92 ++++++++++++++++++++++
 1 file changed, 92 insertions(+)
 create mode 100644 ml_peg/app/tm_complexes/3dTMV/app_3dTMV.py

diff --git a/ml_peg/app/tm_complexes/3dTMV/app_3dTMV.py b/ml_peg/app/tm_complexes/3dTMV/app_3dTMV.py
new file mode 100644
index 00000000..a9c46b7a
--- /dev/null
+++ b/ml_peg/app/tm_complexes/3dTMV/app_3dTMV.py
@@ -0,0 +1,92 @@
+"""Run 3dTMV barriers app."""
+
+from __future__ import annotations
+
+from dash import Dash
+from dash.html import Div
+
+from ml_peg.app import APP_ROOT
+from ml_peg.app.base_app import BaseApp
+from ml_peg.app.utils.build_callbacks import (
+    plot_from_table_column,
+    struct_from_scatter,
+)
+from ml_peg.app.utils.load import read_plot
+from ml_peg.models.get_models import get_model_names
+from ml_peg.models.models import current_models
+
+MODELS = get_model_names(current_models)
+BENCHMARK_NAME = "BH9"
+DOCS_URL = (
+    "https://ddmms.github.io/ml-peg/user_guide/benchmarks/"
+    "molecular.html#3dTMV-tm-complexes"
+)
+DATA_PATH = APP_ROOT / "data" / "tm_complexes" / "3dTMV"
+
+
+class Benchmark3dTMVApp(BaseApp):
+    """3dTMV benchmark app layout and callbacks."""
+
+    def register_callbacks(self) -> None:
+        """Register callbacks to app."""
+        scatter = read_plot(
+            DATA_PATH / "figure_3dtmv.json",
+            id=f"{BENCHMARK_NAME}-figure",
+        )
+
+        model_dir = DATA_PATH / MODELS[0]
+        if model_dir.exists():
+            # Note: sorting different to rxn_count order in calc
+            ts_files = sorted(model_dir.glob("*.xyz"))
+            structs = [
+                f"assets/tm_complexes/3dTMV/{MODELS[0]}/{ts_file.name}"
+                for ts_file in ts_files
+            ]
+        else:
+            structs = []
+
+        plot_from_table_column(
+            table_id=self.table_id,
+            plot_id=f"{BENCHMARK_NAME}-figure-placeholder",
+            column_to_plot={"MAE": scatter},
+        )
+
+        struct_from_scatter(
+            scatter_id=f"{BENCHMARK_NAME}-figure",
+            struct_id=f"{BENCHMARK_NAME}-struct-placeholder",
+            structs=structs,
+            mode="struct",
+        )
+
+
+def get_app() -> Benchmark3dTMVApp:
+    """
+    Get 3dTMV benchmark app layout and callback registration.
+
+    Returns
+    -------
+    Benchmark3dTMVApp
+        Benchmark layout and callback registration.
+    """
+    return Benchmark3dTMVApp(
+        name=BENCHMARK_NAME,
+        description=(
+            "Performance in predicting vertical ionization energies for the "
+            "3dTMV dataset of 28 transition metal complexes."
+            "Reference data from ph-AFQMC calculations."
+        ),
+        docs_url=DOCS_URL,
+        table_path=DATA_PATH / "3dtmv_metrics_table.json",
+        extra_components=[
+            Div(id=f"{BENCHMARK_NAME}-figure-placeholder"),
+            Div(id=f"{BENCHMARK_NAME}-struct-placeholder"),
+        ],
+    )
+
+
+if __name__ == "__main__":
+    full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent)
+    benchmark_app = get_app()
+    full_app.layout = benchmark_app.layout
+    benchmark_app.register_callbacks()
+    full_app.run(port=8071, debug=True)

From 502d1328682ccdece308186d90e63a2c82b946b7 Mon Sep 17 00:00:00 2001
From: Domantas Kuryla <dk584@icepc16>
Date: Thu, 19 Feb 2026 17:15:41 +0000
Subject: [PATCH 4/8] 3dTMV docs

---
 .../user_guide/benchmarks/tm_complexes.rst    | 41 +++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 docs/source/user_guide/benchmarks/tm_complexes.rst

diff --git a/docs/source/user_guide/benchmarks/tm_complexes.rst b/docs/source/user_guide/benchmarks/tm_complexes.rst
new file mode 100644
index 00000000..e1e990ab
--- /dev/null
+++ b/docs/source/user_guide/benchmarks/tm_complexes.rst
@@ -0,0 +1,41 @@
+==========================
+Transition Metal Complexes
+==========================
+
+3dTMV
+=======
+
+Summary
+-------
+
+Performance in predicting vertical ionization energies for 28 transition metal
+complexes.
+
+Metrics
+-------
+
+1. Ionization energy error
+
+For each complex, the ionization energy is calculated by taking the difference in energy
+between the complex in its oxidized state and initial state, which differ by one electron
+and spin multiplicity. This is compared to the reference ionization energy, calculated in the same way.
+
+Computational cost
+------------------
+
+Low: tests are likely to take minutes to run on CPU.
+
+Data availability
+-----------------
+
+Input structures:
+
+* Toward Benchmark-Quality Ab Initio Predictions for 3d Transition Metal Electrocatalysts: A Comparison of CCSD(T) and ph-AFQMC
+Hagen Neugebauer, Hung T. Vuong, John L. Weber, Richard A. Friesner, James Shee, and Andreas Hansen
+Journal of Chemical Theory and Computation 2023 19 (18), 6208-6225
+DOI: 10.1021/acs.jctc.3c00617
+
+Reference data:
+
+* Same as input data
+* ph-AFQMC level of theory: Auxiliary-Field Quantum Monte Carlo.

From c0d4c43b552f1b21fc67ed3855782f4774dc04e6 Mon Sep 17 00:00:00 2001
From: joehart2001 <jh2536@cam.ac.uk>
Date: Thu, 26 Feb 2026 22:54:16 +0000
Subject: [PATCH 5/8] update app name from BH9 to 3dTMV

---
 ml_peg/app/tm_complexes/3dTMV/app_3dTMV.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ml_peg/app/tm_complexes/3dTMV/app_3dTMV.py b/ml_peg/app/tm_complexes/3dTMV/app_3dTMV.py
index a9c46b7a..ca1dd86e 100644
--- a/ml_peg/app/tm_complexes/3dTMV/app_3dTMV.py
+++ b/ml_peg/app/tm_complexes/3dTMV/app_3dTMV.py
@@ -16,7 +16,7 @@
 from ml_peg.models.models import current_models
 
 MODELS = get_model_names(current_models)
-BENCHMARK_NAME = "BH9"
+BENCHMARK_NAME = "3dTMV"
 DOCS_URL = (
     "https://ddmms.github.io/ml-peg/user_guide/benchmarks/"
     "molecular.html#3dTMV-tm-complexes"

From 4275e3b581276b9559a14e9afaa64c86676cfdb1 Mon Sep 17 00:00:00 2001
From: joehart2001 <jh2536@cam.ac.uk>
Date: Thu, 26 Feb 2026 22:59:02 +0000
Subject: [PATCH 6/8] update  metric tooltips

---
 ml_peg/analysis/tm_complexes/3dTMV/metrics.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ml_peg/analysis/tm_complexes/3dTMV/metrics.yml b/ml_peg/analysis/tm_complexes/3dTMV/metrics.yml
index 181f5fca..c0fed546 100644
--- a/ml_peg/analysis/tm_complexes/3dTMV/metrics.yml
+++ b/ml_peg/analysis/tm_complexes/3dTMV/metrics.yml
@@ -9,17 +9,17 @@ metrics:
     good: 0.0
     bad: 50
     unit: kcal/mol
-    tooltip: Mean Absolute Error for all systems
+    tooltip: Mean Absolute Error for the single reference (SR) subset
     level_of_theory: ph-AFQMC
   MR MAE:
     good: 0.0
     bad: 50
     unit: kcal/mol
-    tooltip: Mean Absolute Error for all systems
+    tooltip: Mean Absolute Error for the multireference (MR) subset
     level_of_theory: ph-AFQMC
   SR/MR MAE:
     good: 0.0
     bad: 50
     unit: kcal/mol
-    tooltip: Mean Absolute Error for all systems
+    tooltip: Mean Absolute Error for the SR/MR (intermediate category) subset
     level_of_theory: ph-AFQMC

From 2b29e8348268f7ce0d861f80550b9d2eae7a7509 Mon Sep 17 00:00:00 2001
From: joehart2001 <jh2536@cam.ac.uk>
Date: Thu, 26 Feb 2026 23:06:58 +0000
Subject: [PATCH 7/8] fix zero energy issue

---
 ml_peg/calcs/tm_complexes/3dTMV/calc_3dTMV.py | 30 ++++++++-----------
 1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/ml_peg/calcs/tm_complexes/3dTMV/calc_3dTMV.py b/ml_peg/calcs/tm_complexes/3dTMV/calc_3dTMV.py
index da903ec9..9c7e4941 100644
--- a/ml_peg/calcs/tm_complexes/3dTMV/calc_3dTMV.py
+++ b/ml_peg/calcs/tm_complexes/3dTMV/calc_3dTMV.py
@@ -122,7 +122,6 @@ def test_3dtmv(mlip: tuple[str, Any]) -> None:
         Name of model use and model to get calculator.
     """
     model_name, model = mlip
-    calc = model.get_calculator()
 
     data_path = (
         download_s3_data(
@@ -131,24 +130,21 @@ def test_3dtmv(mlip: tuple[str, Any]) -> None:
         )
         / "3dTMV"
     )
-    # Read in data and attach calculator
-    calc = model.get_calculator()
-    # Add D3 calculator for this test
-    calc = model.add_d3_calculator(calc)
-
     for complex_id in tqdm(range(1, 29)):
         atoms = get_atoms(data_path, complex_id)
-        model_ion_energy = 0
-        # Get oxidized complex energy
-        atoms.info["charge"] = MOLECULAR_DATA[complex_id]["charge_ox"]
-        atoms.info["spin"] = MOLECULAR_DATA[complex_id]["mult_ox"]
-        atoms.calc = calc
-        model_ion_energy += atoms.get_potential_energy()
-        # Get initial complex energy
-        atoms.info["charge"] = MOLECULAR_DATA[complex_id]["charge_in"]
-        atoms.info["spin"] = MOLECULAR_DATA[complex_id]["mult_in"]
-        atoms.calc = calc
-        model_ion_energy -= atoms.get_potential_energy()
+        atoms_ox = atoms.copy()
+        atoms_ox.info["charge"] = MOLECULAR_DATA[complex_id]["charge_ox"]
+        atoms_ox.info["spin"] = MOLECULAR_DATA[complex_id]["mult_ox"]
+        atoms_ox.calc = model.add_d3_calculator(model.get_calculator())
+        oxidized_energy = atoms_ox.get_potential_energy()
+
+        atoms_in = atoms.copy()
+        atoms_in.info["charge"] = MOLECULAR_DATA[complex_id]["charge_in"]
+        atoms_in.info["spin"] = MOLECULAR_DATA[complex_id]["mult_in"]
+        atoms_in.calc = model.add_d3_calculator(model.get_calculator())
+        initial_energy = atoms_in.get_potential_energy()
+
+        model_ion_energy = oxidized_energy - initial_energy
 
         atoms.info.update(
             {

From 146514462ebedddf2035c47cf26be1e422c2062e Mon Sep 17 00:00:00 2001
From: joehart2001 <jh2536@cam.ac.uk>
Date: Thu, 26 Feb 2026 23:24:08 +0000
Subject: [PATCH 8/8] fix app and analysis

---
 .../tm_complexes/3dTMV/analyse_3dTMV.py       | 65 +++++--------------
 ml_peg/app/tm_complexes/3dTMV/app_3dTMV.py    |  9 ++-
 2 files changed, 24 insertions(+), 50 deletions(-)

diff --git a/ml_peg/analysis/tm_complexes/3dTMV/analyse_3dTMV.py b/ml_peg/analysis/tm_complexes/3dTMV/analyse_3dTMV.py
index e12602c9..ddd6e2db 100644
--- a/ml_peg/analysis/tm_complexes/3dTMV/analyse_3dTMV.py
+++ b/ml_peg/analysis/tm_complexes/3dTMV/analyse_3dTMV.py
@@ -78,39 +78,6 @@ def labels():
     return list(range(1, 29))
 
 
-@pytest.fixture
-def interaction_energies() -> dict[str, list]:
-    """
-    Get interaction energies for all systems.
-
-    Returns
-    -------
-    dict[str, list]
-        Dictionary of all reference and predicted interaction energies.
-    """
-    results = {"ref": []} | {mlip: [] for mlip in MODELS}
-
-    ref_stored = False
-
-    for model_name in MODELS:
-        for label in range(1, 29):
-            atoms = read(CALC_PATH / model_name / f"{label}.xyz")
-            if not ref_stored:
-                results["ref"].append(atoms.info["ref_ionization_energy"] * EV_TO_KCAL)
-
-            results[model_name].append(
-                atoms.info["model_ionization_energy"] * EV_TO_KCAL
-            )
-
-            # Write structures for app
-            structs_dir = OUT_PATH / model_name
-            structs_dir.mkdir(parents=True, exist_ok=True)
-            write(structs_dir / f"{label}.xyz", atoms)
-
-        ref_stored = True
-    return results
-
-
 @pytest.fixture
 @plot_parity(
     filename=OUT_PATH / "figure_3dtmv.json",
@@ -150,13 +117,13 @@ def ionization_energies() -> dict[str, list]:
 
 
 @pytest.fixture
-def sr_mae(interaction_energies) -> dict[str, float]:
+def sr_mae(ionization_energies) -> dict[str, float]:
     """
     Get mean absolute error for SR subset.
 
     Parameters
     ----------
-    interaction_energies
+    ionization_energies
         Dictionary of reference and predicted energies.
 
     Returns
@@ -168,23 +135,25 @@ def sr_mae(interaction_energies) -> dict[str, float]:
 
     for model_name in MODELS:
         subsampled_ref_e = [
-            interaction_energies["ref"][i] for i in labels() if SUBSETS[i] == "SR"
+            ionization_energies["ref"][i - 1] for i in labels() if SUBSETS[i] == "SR"
         ]
         subsampled_model_e = [
-            interaction_energies[model_name][i] for i in labels() if SUBSETS[i] == "SR"
+            ionization_energies[model_name][i - 1]
+            for i in labels()
+            if SUBSETS[i] == "SR"
         ]
         results[model_name] = mae(subsampled_ref_e, subsampled_model_e)
     return results
 
 
 @pytest.fixture
-def mr_mae(interaction_energies) -> dict[str, float]:
+def mr_mae(ionization_energies) -> dict[str, float]:
     """
     Get mean absolute error for MR subset.
 
     Parameters
     ----------
-    interaction_energies
+    ionization_energies
         Dictionary of reference and predicted energies.
 
     Returns
@@ -196,10 +165,10 @@ def mr_mae(interaction_energies) -> dict[str, float]:
 
     for model_name in MODELS:
         subsampled_ref_e = [
-            interaction_energies["ref"][i - 1] for i in labels() if SUBSETS[i] == "MR"
+            ionization_energies["ref"][i - 1] for i in labels() if SUBSETS[i] == "MR"
         ]
         subsampled_model_e = [
-            interaction_energies[model_name][i - 1]
+            ionization_energies[model_name][i - 1]
             for i in labels()
             if SUBSETS[i] == "MR"
         ]
@@ -208,13 +177,13 @@ def mr_mae(interaction_energies) -> dict[str, float]:
 
 
 @pytest.fixture
-def sr_mr_mae(interaction_energies) -> dict[str, float]:
+def sr_mr_mae(ionization_energies) -> dict[str, float]:
     """
     Get mean absolute error for SR/MR subset.
 
     Parameters
     ----------
-    interaction_energies
+    ionization_energies
         Dictionary of reference and predicted energies.
 
     Returns
@@ -226,10 +195,10 @@ def sr_mr_mae(interaction_energies) -> dict[str, float]:
 
     for model_name in MODELS:
         subsampled_ref_e = [
-            interaction_energies["ref"][i] for i in labels() if SUBSETS[i] == "SR/MR"
+            ionization_energies["ref"][i - 1] for i in labels() if SUBSETS[i] == "SR/MR"
         ]
         subsampled_model_e = [
-            interaction_energies[model_name][i]
+            ionization_energies[model_name][i - 1]
             for i in labels()
             if SUBSETS[i] == "SR/MR"
         ]
@@ -238,13 +207,13 @@ def sr_mr_mae(interaction_energies) -> dict[str, float]:
 
 
 @pytest.fixture
-def total_mae(interaction_energies) -> dict[str, float]:
+def total_mae(ionization_energies) -> dict[str, float]:
     """
     Get mean absolute error for all conmplexes.
 
     Parameters
     ----------
-    interaction_energies
+    ionization_energies
         Dictionary of reference and predicted energies.
 
     Returns
@@ -256,7 +225,7 @@ def total_mae(interaction_energies) -> dict[str, float]:
 
     for model_name in MODELS:
         results[model_name] = mae(
-            interaction_energies["ref"], interaction_energies[model_name]
+            ionization_energies["ref"], ionization_energies[model_name]
         )
     return results
 
diff --git a/ml_peg/app/tm_complexes/3dTMV/app_3dTMV.py b/ml_peg/app/tm_complexes/3dTMV/app_3dTMV.py
index ca1dd86e..af3290b4 100644
--- a/ml_peg/app/tm_complexes/3dTMV/app_3dTMV.py
+++ b/ml_peg/app/tm_complexes/3dTMV/app_3dTMV.py
@@ -37,7 +37,7 @@ def register_callbacks(self) -> None:
         model_dir = DATA_PATH / MODELS[0]
         if model_dir.exists():
             # Note: sorting different to rxn_count order in calc
-            ts_files = sorted(model_dir.glob("*.xyz"))
+            ts_files = sorted(model_dir.glob("*.xyz"), key=lambda path: int(path.stem))
             structs = [
                 f"assets/tm_complexes/3dTMV/{MODELS[0]}/{ts_file.name}"
                 for ts_file in ts_files
@@ -48,7 +48,12 @@ def register_callbacks(self) -> None:
         plot_from_table_column(
             table_id=self.table_id,
             plot_id=f"{BENCHMARK_NAME}-figure-placeholder",
-            column_to_plot={"MAE": scatter},
+            column_to_plot={
+                "Overall MAE": scatter,
+                "SR MAE": scatter,
+                "MR MAE": scatter,
+                "SR/MR MAE": scatter,
+            },
         )
 
         struct_from_scatter(