Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions docs/source/user_guide/benchmarks/tm_complexes.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
==========================
Transition Metal Complexes
==========================

3dTMV
=======

Summary
-------

Performance in predicting vertical ionization energies for 28 transition metal
complexes.

Metrics
-------

1. Ionization energy error

For each complex, the ionization energy is calculated by taking the difference in energy
between the complex in its oxidized state and initial state, which differ by one electron
and spin multiplicity. This is compared to the reference ionization energy, calculated in the same way.

Computational cost
------------------

Low: tests are likely to take minutes to run on CPU.

Data availability
-----------------

Input structures:

* Toward Benchmark-Quality Ab Initio Predictions for 3d Transition Metal Electrocatalysts: A Comparison of CCSD(T) and ph-AFQMC
Hagen Neugebauer, Hung T. Vuong, John L. Weber, Richard A. Friesner, James Shee, and Andreas Hansen
Journal of Chemical Theory and Computation 2023 19 (18), 6208-6225
DOI: 10.1021/acs.jctc.3c00617

Reference data:

* Same as input data
* ph-AFQMC level of theory: Auxiliary-Field Quantum Monte Carlo.
282 changes: 282 additions & 0 deletions ml_peg/analysis/tm_complexes/3dTMV/analyse_3dTMV.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,282 @@
"""Analyse the 3dTMV benchmark."""

from __future__ import annotations

from pathlib import Path

from ase import units
from ase.io import read, write
import pytest

from ml_peg.analysis.utils.decorators import (
build_table,
plot_parity,
)
from ml_peg.analysis.utils.utils import (
build_d3_name_map,
load_metrics_config,
mae,
)
from ml_peg.app import APP_ROOT
from ml_peg.calcs import CALCS_ROOT
from ml_peg.models.get_models import load_models
from ml_peg.models.models import current_models

MODELS = load_models(current_models)
D3_MODEL_NAMES = build_d3_name_map(MODELS)

EV_TO_KCAL = units.mol / units.kcal
CALC_PATH = CALCS_ROOT / "tm_complexes" / "3dTMV" / "outputs"
OUT_PATH = APP_ROOT / "data" / "tm_complexes" / "3dTMV"

METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
METRICS_CONFIG_PATH
)

SUBSETS = {
1: "SR",
2: "SR",
3: "SR",
4: "SR",
5: "SR",
6: "SR",
7: "SR",
8: "SR",
9: "SR",
10: "SR",
11: "SR",
12: "SR",
13: "SR/MR",
14: "SR/MR",
15: "SR/MR",
16: "SR/MR",
17: "SR/MR",
18: "SR/MR",
19: "SR/MR",
20: "SR/MR",
21: "SR/MR",
22: "SR/MR",
23: "MR",
24: "MR",
25: "MR",
26: "MR",
27: "MR",
28: "MR",
}


def labels():
"""
Get complex ids.

Returns
-------
list
IDs of the complexes.
"""
return list(range(1, 29))


@pytest.fixture
@plot_parity(
filename=OUT_PATH / "figure_3dtmv.json",
title="Ionization energies",
x_label="Predicted ionization energy / kcal/mol",
y_label="Reference ionization energy / kcal/mol",
hoverdata={
"Labels": labels(),
},
)
def ionization_energies() -> dict[str, list]:
"""
Get ionization energies for all systems.

Returns
-------
dict[str, list]
Dictionary of all reference and predicted energies.
"""
results = {"ref": []} | {mlip: [] for mlip in MODELS}
ref_stored = False

for model_name in MODELS:
for complex_id in labels():
atoms = read(CALC_PATH / model_name / f"{complex_id}.xyz")
model_ion_energy = atoms.info["model_ionization_energy"]
ref_ion_energy = atoms.info["ref_ionization_energy"]
# Write structures for app
structs_dir = OUT_PATH / model_name
structs_dir.mkdir(parents=True, exist_ok=True)
write(structs_dir / f"{complex_id}.xyz", atoms)
results[model_name].append(model_ion_energy * EV_TO_KCAL)
if not ref_stored:
results["ref"].append(ref_ion_energy * EV_TO_KCAL)
ref_stored = True
return results


@pytest.fixture
def sr_mae(ionization_energies) -> dict[str, float]:
"""
Get mean absolute error for SR subset.

Parameters
----------
ionization_energies
Dictionary of reference and predicted energies.

Returns
-------
dict[str, float]
Dictionary of predicted energy errors for all models.
"""
results = {}

for model_name in MODELS:
subsampled_ref_e = [
ionization_energies["ref"][i - 1] for i in labels() if SUBSETS[i] == "SR"
]
subsampled_model_e = [
ionization_energies[model_name][i - 1]
for i in labels()
if SUBSETS[i] == "SR"
]
results[model_name] = mae(subsampled_ref_e, subsampled_model_e)
return results


@pytest.fixture
def mr_mae(ionization_energies) -> dict[str, float]:
"""
Get mean absolute error for MR subset.

Parameters
----------
ionization_energies
Dictionary of reference and predicted energies.

Returns
-------
dict[str, float]
Dictionary of predicted energy errors for all models.
"""
results = {}

for model_name in MODELS:
subsampled_ref_e = [
ionization_energies["ref"][i - 1] for i in labels() if SUBSETS[i] == "MR"
]
subsampled_model_e = [
ionization_energies[model_name][i - 1]
for i in labels()
if SUBSETS[i] == "MR"
]
results[model_name] = mae(subsampled_ref_e, subsampled_model_e)
return results


@pytest.fixture
def sr_mr_mae(ionization_energies) -> dict[str, float]:
"""
Get mean absolute error for SR/MR subset.

Parameters
----------
ionization_energies
Dictionary of reference and predicted energies.

Returns
-------
dict[str, float]
Dictionary of predicted energy errors for all models.
"""
results = {}

for model_name in MODELS:
subsampled_ref_e = [
ionization_energies["ref"][i - 1] for i in labels() if SUBSETS[i] == "SR/MR"
]
subsampled_model_e = [
ionization_energies[model_name][i - 1]
for i in labels()
if SUBSETS[i] == "SR/MR"
]
results[model_name] = mae(subsampled_ref_e, subsampled_model_e)
return results


@pytest.fixture
def total_mae(ionization_energies) -> dict[str, float]:
"""
Get mean absolute error for all conmplexes.

Parameters
----------
ionization_energies
Dictionary of reference and predicted energies.

Returns
-------
dict[str, float]
Dictionary of predicted energy errors for all models.
"""
results = {}

for model_name in MODELS:
results[model_name] = mae(
ionization_energies["ref"], ionization_energies[model_name]
)
return results


@pytest.fixture
@build_table(
filename=OUT_PATH / "3dtmv_metrics_table.json",
metric_tooltips=DEFAULT_TOOLTIPS,
thresholds=DEFAULT_THRESHOLDS,
mlip_name_map=D3_MODEL_NAMES,
)
def metrics(
total_mae: dict[str, float],
sr_mae: dict[str, float],
mr_mae: dict[str, float],
sr_mr_mae: dict[str, float],
) -> dict[str, dict]:
"""
Get all metrics.

Parameters
----------
total_mae
Mean absolute errors for all models, all complexes.
sr_mae
Mean absolute errors for all models, single-reference complexes.
mr_mae
Mean absolute errors for all models, multi-reference complexes.
sr_mr_mae
Mean absolute errors for all models, intermediate complexes.

Returns
-------
dict[str, dict]
Metric names and values for all models.
"""
return {
"Overall MAE": total_mae,
"SR MAE": sr_mae,
"MR MAE": mr_mae,
"SR/MR MAE": sr_mr_mae,
}


def test_3dtmv(metrics: dict[str, dict]) -> None:
"""
Run 3dTMV test.

Parameters
----------
metrics
All new benchmark metric names and dictionary of values for each model.
"""
return
25 changes: 25 additions & 0 deletions ml_peg/analysis/tm_complexes/3dTMV/metrics.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
metrics:
Overall MAE:
good: 0.0
bad: 50
unit: kcal/mol
tooltip: Mean Absolute Error for all systems
level_of_theory: ph-AFQMC
SR MAE:
good: 0.0
bad: 50
unit: kcal/mol
tooltip: Mean Absolute Error for the single reference (SR) subset
level_of_theory: ph-AFQMC
MR MAE:
good: 0.0
bad: 50
unit: kcal/mol
tooltip: Mean Absolute Error for the multireference (MR) subset
level_of_theory: ph-AFQMC
SR/MR MAE:
good: 0.0
bad: 50
unit: kcal/mol
tooltip: Mean Absolute Error for the SR/MR (intermediate category) subset
level_of_theory: ph-AFQMC
Loading