Skip to content
Open
39 changes: 39 additions & 0 deletions docs/source/user_guide/benchmarks/surfaces.rst
Original file line number Diff line number Diff line change
Expand Up @@ -145,3 +145,42 @@ Reference data:
* S. P. Ong, W. D. Richards, A. Jain, G. Hautier, M. Kocher, S. Cholia, D. Gunter, V. Chevrier, K. A. Persson, G. Ceder, "Python Materials Genomics (pymatgen): A Robust, Open-Source Python Library for Materials Analysis," Comput. Mater. Sci., 2013, 68, 314–319. https://doi.org/10.1016/j.commatsci.2012.10.028

* Tran et al. relaxed the slabs using spin-polarized PBE calculations performed in VASP, with a cutoff energy of 400 eV.


SBH17
=====

Summary
-------

Performance in predicting activation barriers to dissociative chemisorption
from the gas phase, for a set of 16 adsorbate-surface combinations.

Metrics
-------

Error in activation energy (barrier)

For each adsorbate-surface combination, two single points are performed.
One is of the clean surface with the adsorbate in the gas phase far from the surface,
the second is of the transition state structure with the adsorbate at the surface
(minimum barrier geometry to dissociation and chemisorption).


Computational cost
------------------

Very low: tests are likely to take less than a minute to run on CPU.

Data availability
-----------------

Input structures:

* Tchakoua, T., Gerrits, N., Smeets, E. W. F., & Kroes, G. J. (2022). SBH17: Benchmark database of barrier heights for dissociative chemisorption on transition metal surfaces. Journal of Chemical Theory and Computation, 19(1), 245-270.

Reference data:

* Taken from the SI of the publication above (as the main text of the publication discusses mixed levels of theory). Values from the "Medium algorithm" are used in order to be consistent with the structures.

* PBE without dispersion
167 changes: 167 additions & 0 deletions ml_peg/analysis/surfaces/SBH17/analyse_SBH17.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
"""Analyse SBH17 benchmark."""

from __future__ import annotations

from pathlib import Path

from ase.io import read, write
import pytest

from ml_peg.analysis.utils.decorators import build_table, plot_parity
from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae
from ml_peg.app import APP_ROOT
from ml_peg.calcs import CALCS_ROOT
from ml_peg.models.get_models import get_model_names
from ml_peg.models.models import current_models

MODELS = get_model_names(current_models)
D3_MODEL_NAMES = build_d3_name_map(MODELS)
CALC_PATH = CALCS_ROOT / "surfaces" / "SBH17" / "outputs"
OUT_PATH = APP_ROOT / "data" / "surfaces" / "SBH17"

METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
METRICS_CONFIG_PATH
)


def get_system_names() -> list[str]:
"""
Get list of SBH17 system names.

Returns
-------
list[str]
List of system names from structure files.
"""
system_names = []
for model_name in MODELS:
model_dir = CALC_PATH / model_name
if model_dir.exists():
xyz_files = sorted(model_dir.glob("*.xyz"))
if xyz_files:
for xyz_file in xyz_files:
atoms = read(xyz_file)
system_names.append(atoms.info["system"])
break
return system_names


@pytest.fixture
@plot_parity(
filename=OUT_PATH / "figure_surface_barriers.json",
title="SBH17 dissociative chemisorption barriers",
x_label="Predicted barrier / eV",
y_label="Reference barrier / eV",
hoverdata={
"System": get_system_names(),
},
)
def surface_barriers() -> dict[str, list]:
"""
Get barriers for all SBH17 systems.

Returns
-------
dict[str, list]
Dictionary of reference and predicted surface barriers.
"""
results = {"ref": []} | {mlip: [] for mlip in MODELS}
ref_stored = False

for model_name in MODELS:
model_dir = CALC_PATH / model_name

if not model_dir.exists():
continue

xyz_files = sorted(model_dir.glob("*.xyz"))
if not xyz_files:
continue

for xyz_file in xyz_files:
structs = read(xyz_file, index=":")

gp_energy = structs[0].get_potential_energy()
system = structs[0].info["system"]
ts_energy = structs[1].get_potential_energy()

barrier = ts_energy - gp_energy
results[model_name].append(barrier)

# Copy individual structure files to app data directory
structs_dir = OUT_PATH / model_name
structs_dir.mkdir(parents=True, exist_ok=True)
write(structs_dir / f"{system}.xyz", structs)

# Store reference energies (only once)
if not ref_stored:
results["ref"].append(structs[0].info["ref"])

ref_stored = True

return results


@pytest.fixture
def sbh17_errors(surface_barriers) -> dict[str, float]:
"""
Get mean absolute error for surface barriers.

Parameters
----------
surface_barriers
Dictionary of reference and predicted surface barriers.

Returns
-------
dict[str, float]
Dictionary of predicted barrier errors for all models.
"""
results = {}
for model_name in MODELS:
if surface_barriers[model_name]:
results[model_name] = mae(
surface_barriers["ref"], surface_barriers[model_name]
)
else:
results[model_name] = None
return results


@pytest.fixture
@build_table(
filename=OUT_PATH / "SBH17_metrics_table.json",
metric_tooltips=DEFAULT_TOOLTIPS,
thresholds=DEFAULT_THRESHOLDS,
# mlip_name_map=D3_MODEL_NAMES,
)
def metrics(sbh17_errors: dict[str, float]) -> dict[str, dict]:
"""
Get all SBH17 metrics.

Parameters
----------
sbh17_errors
Mean absolute errors for all systems.

Returns
-------
dict[str, dict]
Metric names and values for all models.
"""
return {
"MAE": sbh17_errors,
}


def test_sbh17(metrics: dict[str, dict]) -> None:
"""
Run SBH17 test.

Parameters
----------
metrics
All SBH17 metrics.
"""
return
7 changes: 7 additions & 0 deletions ml_peg/analysis/surfaces/SBH17/metrics.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
metrics:
MAE:
good: 0.02
bad: 0.5
unit: eV
tooltip: "Mean Absolute Error"
level_of_theory: PBE
88 changes: 88 additions & 0 deletions ml_peg/app/surfaces/SBH17/app_SBH17.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
"""Run SBH17 app."""

from __future__ import annotations

from dash import Dash
from dash.html import Div

from ml_peg.app import APP_ROOT
from ml_peg.app.base_app import BaseApp
from ml_peg.app.utils.build_callbacks import (
plot_from_table_column,
struct_from_scatter,
)
from ml_peg.app.utils.load import read_plot
from ml_peg.models.get_models import get_model_names
from ml_peg.models.models import current_models

# Get all models
MODELS = get_model_names(current_models)
BENCHMARK_NAME = "SBH17 chemisorption barriers"
DOCS_URL = "https://ddmms.github.io/ml-peg/user_guide/benchmarks/surfaces.html#sbh17"
DATA_PATH = APP_ROOT / "data" / "surfaces" / "SBH17"


class SBH17App(BaseApp):
"""SBH17 benchmark app layout and callbacks."""

def register_callbacks(self) -> None:
"""Register callbacks to app."""
scatter = read_plot(
DATA_PATH / "figure_surface_barriers.json",
id=f"{BENCHMARK_NAME}-figure",
)

# Assets dir will be parent directory - individual files for each system
structs_dir = DATA_PATH / MODELS[0]
structs = [
f"assets/surfaces/SBH17/{MODELS[0]}/{struct_file.stem}.xyz"
for struct_file in sorted(structs_dir.glob("*.xyz"))
]

plot_from_table_column(
table_id=self.table_id,
plot_id=f"{BENCHMARK_NAME}-figure-placeholder",
column_to_plot={"MAE": scatter},
)

struct_from_scatter(
scatter_id=f"{BENCHMARK_NAME}-figure",
struct_id=f"{BENCHMARK_NAME}-struct-placeholder",
structs=structs,
mode="struct",
)


def get_app() -> SBH17App:
"""
Get SBH17 benchmark app layout and callback registration.

Returns
-------
SBH17App
Benchmark layout and callback registration.
"""
return SBH17App(
name=BENCHMARK_NAME,
description="Barriers to dissociative chemisorption for 16 \
combinations of adsorbates and transition metal surfaces.",
docs_url=DOCS_URL,
table_path=DATA_PATH / "SBH17_metrics_table.json",
extra_components=[
Div(id=f"{BENCHMARK_NAME}-figure-placeholder"),
Div(id=f"{BENCHMARK_NAME}-struct-placeholder"),
],
)


if __name__ == "__main__":
# Create Dash app
full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent)

# Construct layout and register callbacks
SBH17_app = get_app()
full_app.layout = SBH17_app.layout
SBH17_app.register_callbacks()

# Run app
full_app.run(port=8055, debug=True)
77 changes: 77 additions & 0 deletions ml_peg/calcs/surfaces/SBH17/calc_SBH17.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
"""Run calculations for SBH17 tests."""

from __future__ import annotations

from copy import copy
from pathlib import Path
from typing import Any

from ase.io import read, write
import numpy as np
import pytest
from tqdm import tqdm

from ml_peg.calcs.utils.utils import download_s3_data
from ml_peg.models.get_models import load_models
from ml_peg.models.models import current_models

MODELS = load_models(current_models)

DATA_PATH = Path(__file__).parent / "data"
OUT_PATH = Path(__file__).parent / "outputs"


@pytest.mark.parametrize("mlip", MODELS.items())
def test_surface_barrier(mlip: tuple[str, Any]) -> None:
"""
Run SBH17 dissociative chemisorption barrier test.

Note the data directory currently excludes one data point
from the paper, H2Cu111, because the PBE value
is not available.

Parameters
----------
mlip
Name of model use and model to get calculator.
"""
model_name, model = mlip
# Do not want D3 as references here are dispersionless PBE
calc = model.get_calculator()

# Download SBH17 dataset
sbh17_dir = (
download_s3_data(
key="inputs/surfaces/SBH17/SBH17.zip",
filename="SBH17.zip",
)
/ "SBH17"
)

with open(sbh17_dir / "list") as f:
systems = f.read().splitlines()

for system in tqdm(systems, desc="Evaluating models on SBH17 structures"):
gp_path = sbh17_dir / system / "POSCAR-gp"
ts_path = sbh17_dir / system / "POSCAR-ts"
ref_path = sbh17_dir / system / "barrier_pbe"

gp = read(gp_path, index=0, format="vasp")
gp.calc = calc
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a default spin (multiplicity) and charge? See similar changes here: https://github.com/ddmms/ml-peg/pull/384/changes

I very recently discovered Orb's omol model always requires both to be set, annoyingly

gp.get_potential_energy()

ts = read(ts_path, index=0, format="vasp")
ts.calc = copy(calc)
ts.get_potential_energy()

ref = np.loadtxt(ref_path)

gp.info["ref"] = ref
gp.info["system"] = system
ts.info["ref"] = ref
ts.info["system"] = system

# Write output structures
write_dir = OUT_PATH / model_name
write_dir.mkdir(parents=True, exist_ok=True)
write(write_dir / f"{system}.xyz", [gp, ts])