diff --git a/docs/source/user_guide/benchmarks/nebs.rst b/docs/source/user_guide/benchmarks/nebs.rst index e081babf1..9a741711e 100644 --- a/docs/source/user_guide/benchmarks/nebs.rst +++ b/docs/source/user_guide/benchmarks/nebs.rst @@ -46,3 +46,60 @@ Reference data: * Manually taken from https://doi.org/10.1149/1.1633511. * Meta-GGA (Perdew-Wang) exchange correlation functional + + +Si defects +========== + +Summary +------- + +Performance in predicting DFT singlepoint energies and forces along fixed nudged-elastic-band +(NEB) images for a silicon interstitial migration pathway. + +Metrics +------- + +For each of the three NEB datasets (64 atoms, 216 atoms, and 216 atoms di-to-single), MLIPs are +evaluated on the same ordered NEB images as the reference. + +1. Energy MAE + +Mean absolute error (MAE) of *relative* energies along the NEB, shifting image 0 to 0 eV +for both the DFT reference and the MLIP predictions. + +2. Force MAE + +Mean absolute error (MAE) of forces across all atoms and images along the NEB. + +Computational cost +------------------ + +Medium: tests are likely to take several minutes to run on CPU. + +Data availability +----------------- + +Input/reference data: + +* Reference extxyz trajectories (including per-image DFT energies and forces) are distributed as a + separate zip archive and downloaded on-demand from the ML-PEG data store. + The calculation script uses the public ML-PEG S3 bucket to retrieve these inputs. +* The reference DFT energies/forces come from Quantum ESPRESSO (PWscf) single-point calculations + with: + + - Code/version: Quantum ESPRESSO PWSCF v.7.0 + - XC functional: ``input_dft='PBE'`` + - Cutoffs: ``ecutwfc=30.0`` Ry, ``ecutrho=240.0`` Ry + - Smearing: ``occupations='smearing'``, ``smearing='mv'``, ``degauss=0.01`` Ry + - SCF convergence/mixing: ``conv_thr=1.0d-6``, ``electron_maxstep=250``, ``mixing_beta=0.2``, + ``mixing_mode='local-TF'`` + - Diagonalization: ``diagonalization='david'`` + - Symmetry: ``nosym=.false.``, ``noinv=.false.`` (symmetry enabled) + - Pseudopotential: ``Si.pbe-n-kjpaw_psl.1.0.0.UPF`` (PSLibrary) + + K-points by case: + + - 64 atoms: Γ-only (``K_POINTS automatic 1 1 1 0 0 0``) + - 216 atoms: Γ-only (``K_POINTS gamma``) + - 216 atoms di-to-single: Γ-only (``K_POINTS gamma``) diff --git a/ml_peg/analysis/nebs/si_defects/analyse_si_defects.py b/ml_peg/analysis/nebs/si_defects/analyse_si_defects.py new file mode 100644 index 000000000..43ea77029 --- /dev/null +++ b/ml_peg/analysis/nebs/si_defects/analyse_si_defects.py @@ -0,0 +1,294 @@ +"""Analyse Si defects DFT singlepoints (energies + forces).""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + +from ase.atoms import Atoms +from ase.io import read, write +import numpy as np +import pandas as pd +import pytest + +from ml_peg.analysis.utils.decorators import build_table, plot_scatter +from ml_peg.analysis.utils.utils import load_metrics_config, mae +from ml_peg.app import APP_ROOT +from ml_peg.calcs import CALCS_ROOT +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models + +MODELS = get_model_names(current_models) + +BENCHMARK_DIR = "si_defects" +CALC_PATH = CALCS_ROOT / "nebs" / BENCHMARK_DIR / "outputs" +OUT_PATH = APP_ROOT / "data" / "nebs" / BENCHMARK_DIR + +METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") +DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( + METRICS_CONFIG_PATH +) + + +@dataclass(frozen=True) +class _Case: + """Definition of a single Si defects NEB dataset.""" + + key: str + label: str + + +CASES: tuple[_Case, ...] = ( + _Case(key="64_atoms", label="64"), + _Case(key="216_atoms", label="216"), + _Case(key="216_atoms_di_to_single", label="216 di-to-single"), +) + + +@dataclass(frozen=True) +class _Results: + """Computed errors and assets for one (case, model) pair.""" + + energy_mae: float + force_mae: float + x: list[float] + energy_error: list[float] + force_rms: list[float] + structs: list[Atoms] + + +def _load_structs(case_key: str, model: str) -> list[Atoms] | None: + """ + Load MLIP-evaluated NEB frames (with DFT reference stored). + + Parameters + ---------- + case_key + Case key. + model + Model name. + + Returns + ------- + list[ase.atoms.Atoms] | None + Frames if the calculation output exists, otherwise ``None``. + """ + path = CALC_PATH / case_key / model / f"{BENCHMARK_DIR}.extxyz" + if not path.exists(): + return None + structs = read(path, index=":") + if not isinstance(structs, list) or not structs: + raise ValueError(f"Unexpected output content: {path}") + return structs + + +def _compute(structs: list[Atoms]) -> _Results: + """ + Compute per-image errors and global MAEs. + + Parameters + ---------- + structs + Ordered NEB frames with ``ref_energy_ev``, ``ref_forces``, ``pred_energy_ev``, + and ``pred_forces``. + + Returns + ------- + _Results + Computed errors and frames for downstream plotting/app. + """ + ref_e = np.asarray([float(s.info["ref_energy_ev"]) for s in structs]) + pred_e = np.asarray([float(s.info["pred_energy_ev"]) for s in structs]) + # Compare *relative* energies along the NEB (shift image 0 to 0 eV for both + # reference and predictions). This removes any constant energy offset. + ref_e_rel = ref_e - ref_e[0] + pred_e_rel = pred_e - pred_e[0] + energy_error = pred_e_rel - ref_e_rel + + ref_f = np.asarray([s.arrays["ref_forces"] for s in structs], dtype=float) + pred_f = np.asarray([s.arrays["pred_forces"] for s in structs], dtype=float) + force_error = pred_f - ref_f + + x = [float(s.info.get("image_index", idx)) for idx, s in enumerate(structs)] + return _Results( + energy_mae=mae(ref_e_rel, pred_e_rel), + force_mae=mae(ref_f.reshape(-1), pred_f.reshape(-1)), + x=x, + energy_error=[float(v) for v in energy_error], + force_rms=[float(v) for v in np.sqrt(np.mean(force_error**2, axis=(1, 2)))], + structs=structs, + ) + + +def _write_assets(case_key: str, model: str, results: _Results) -> None: + """ + Write structure trajectory and per-image CSV for the app. + + Parameters + ---------- + case_key + Case key. + model + Model name. + results + Computed errors and frames. + """ + out_dir = OUT_PATH / case_key / model + out_dir.mkdir(parents=True, exist_ok=True) + write(out_dir / f"{model}-neb-band.extxyz", results.structs) + pd.DataFrame( + { + "image": results.x, + "energy_error_ev": results.energy_error, + "force_rms_ev_per_ang": results.force_rms, + } + ).to_csv(out_dir / f"{model}-per_image_errors.csv", index=False) + + +def _write_plots(case_key: str, case_label: str, model: str, results: _Results) -> None: + """ + Write Dash/Plotly JSON plots for one (case, model) pair. + + Parameters + ---------- + case_key + Case key. + case_label + Human-friendly case label. + model + Model name. + results + Computed errors and frames. + """ + OUT_PATH.mkdir(parents=True, exist_ok=True) + + @plot_scatter( + filename=OUT_PATH / f"figure_{model}_{case_key}_energy_error.json", + title=f"Energy error along NEB ({case_label})", + x_label="Image", + y_label="Energy error / eV", + show_line=True, + ) + def plot_energy() -> dict[str, tuple[list[float], list[float]]]: + """ + Plot per-image energy errors for a single model and case. + + Returns + ------- + dict[str, tuple[list[float], list[float]]] + Mapping of model name to x/y arrays. + """ + return {model: [results.x, results.energy_error]} + + @plot_scatter( + filename=OUT_PATH / f"figure_{model}_{case_key}_force_rms.json", + title=f"Force RMS error along NEB ({case_label})", + x_label="Image", + y_label="Force RMS error / (eV/Å)", + show_line=True, + ) + def plot_forces() -> dict[str, tuple[list[float], list[float]]]: + """ + Plot per-image force RMS errors for a single model and case. + + Returns + ------- + dict[str, tuple[list[float], list[float]]] + Mapping of model name to x/y arrays. + """ + return {model: [results.x, results.force_rms]} + + plot_energy() + plot_forces() + + +@pytest.fixture +def case_results() -> dict[tuple[str, str], _Results]: + """ + Compute results for all available (case, model) combinations. + + Returns + ------- + dict[tuple[str, str], _Results] + Results indexed by ``(case_key, model_name)``. + """ + OUT_PATH.mkdir(parents=True, exist_ok=True) + results: dict[tuple[str, str], _Results] = {} + for case in CASES: + for model in MODELS: + structs = _load_structs(case.key, model) + if structs is None: + continue + computed = _compute(structs) + _write_assets(case.key, model, computed) + _write_plots(case.key, case.label, model, computed) + results[(case.key, model)] = computed + return results + + +@pytest.fixture +def metrics_dict( + case_results: dict[tuple[str, str], _Results], +) -> dict[str, dict[str, float]]: + """ + Build raw metric dict for the benchmark table. + + Parameters + ---------- + case_results + Results indexed by ``(case_key, model_name)``. + + Returns + ------- + dict[str, dict[str, float]] + Metric values for all models. + """ + metrics: dict[str, dict[str, float]] = {} + for case in CASES: + energy_key = f"Energy MAE ({case.label})" + force_key = f"Force MAE ({case.label})" + metrics[energy_key] = {} + metrics[force_key] = {} + for model in MODELS: + result = case_results.get((case.key, model)) + if result is None: + continue + metrics[energy_key][model] = result.energy_mae + metrics[force_key][model] = result.force_mae + return metrics + + +@pytest.fixture +@build_table( + filename=OUT_PATH / f"{BENCHMARK_DIR}_metrics_table.json", + metric_tooltips=DEFAULT_TOOLTIPS, + thresholds=DEFAULT_THRESHOLDS, + weights=DEFAULT_WEIGHTS, +) +def metrics(metrics_dict: dict[str, dict[str, float]]) -> dict[str, dict]: + """ + Build the benchmark table JSON. + + Parameters + ---------- + metrics_dict + Metric values for all models. + + Returns + ------- + dict[str, dict] + Metric names and values for all models. + """ + return metrics_dict + + +def test_si_defects(metrics: dict[str, dict]) -> None: + """ + Run analysis for Si defects DFT singlepoints. + + Parameters + ---------- + metrics + Benchmark metrics table. + """ + return diff --git a/ml_peg/analysis/nebs/si_defects/metrics.yml b/ml_peg/analysis/nebs/si_defects/metrics.yml new file mode 100644 index 000000000..fdd5cb755 --- /dev/null +++ b/ml_peg/analysis/nebs/si_defects/metrics.yml @@ -0,0 +1,37 @@ +metrics: + Energy MAE (64): + good: 0.02 + bad: 0.2 + unit: eV + tooltip: "MAE between MLIP and DFT (PBE) relative single-point energies along the NEB (image 0 shifted to 0 eV)." + level_of_theory: PBE + Force MAE (64): + good: 0.05 + bad: 0.5 + unit: eV/Å + tooltip: "MAE between MLIP and DFT (PBE) single-point forces across all atoms and images along the NEB." + level_of_theory: PBE + Energy MAE (216): + good: 0.02 + bad: 0.2 + unit: eV + tooltip: "MAE between MLIP and DFT (PBE) relative single-point energies along the NEB (image 0 shifted to 0 eV)." + level_of_theory: PBE + Force MAE (216): + good: 0.05 + bad: 0.5 + unit: eV/Å + tooltip: "MAE between MLIP and DFT (PBE) single-point forces across all atoms and images along the NEB." + level_of_theory: PBE + Energy MAE (216 di-to-single): + good: 0.02 + bad: 0.2 + unit: eV + tooltip: "MAE between MLIP and DFT (PBE) relative single-point energies along the NEB (image 0 shifted to 0 eV)." + level_of_theory: PBE + Force MAE (216 di-to-single): + good: 0.05 + bad: 0.5 + unit: eV/Å + tooltip: "MAE between MLIP and DFT (PBE) single-point forces across all atoms and images along the NEB." + level_of_theory: PBE diff --git a/ml_peg/app/nebs/si_defects/app_si_defects.py b/ml_peg/app/nebs/si_defects/app_si_defects.py new file mode 100644 index 000000000..a15a6936c --- /dev/null +++ b/ml_peg/app/nebs/si_defects/app_si_defects.py @@ -0,0 +1,123 @@ +"""Run app for Si defects benchmark.""" + +from __future__ import annotations + +from dataclasses import dataclass + +from dash import Dash +from dash.html import Div + +from ml_peg.app import APP_ROOT +from ml_peg.app.base_app import BaseApp +from ml_peg.app.utils.build_callbacks import plot_from_table_cell, struct_from_scatter +from ml_peg.app.utils.load import read_plot +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models + +MODELS = get_model_names(current_models) + +BENCHMARK_NAME = "Si defects" +DOCS_URL = "https://ddmms.github.io/ml-peg/user_guide/benchmarks/nebs.html#si-defects" +DATA_PATH = APP_ROOT / "data" / "nebs" / "si_defects" + + +@dataclass(frozen=True) +class _Case: + """Definition of a single Si defects NEB dataset.""" + + key: str + label: str + + +CASES: tuple[_Case, ...] = ( + _Case(key="64_atoms", label="64"), + _Case(key="216_atoms", label="216"), + _Case(key="216_atoms_di_to_single", label="216 di-to-single"), +) + + +class SiDefectNebSinglepointsApp(BaseApp): + """Si defects benchmark app layout and callbacks.""" + + def register_callbacks(self) -> None: + """Register interactive callbacks for plot and structure viewing.""" + scatter_plots: dict[str, dict] = {} + + for model in MODELS: + model_plots: dict[str, object] = {} + for case in CASES: + energy_plot_path = ( + DATA_PATH / f"figure_{model}_{case.key}_energy_error.json" + ) + force_plot_path = ( + DATA_PATH / f"figure_{model}_{case.key}_force_rms.json" + ) + if energy_plot_path.exists(): + model_plots[f"Energy MAE ({case.label})"] = read_plot( + energy_plot_path, + id=f"{BENCHMARK_NAME}-{model}-{case.key}-energy-figure", + ) + if force_plot_path.exists(): + model_plots[f"Force MAE ({case.label})"] = read_plot( + force_plot_path, + id=f"{BENCHMARK_NAME}-{model}-{case.key}-force-figure", + ) + if model_plots: + scatter_plots[model] = model_plots + + plot_from_table_cell( + table_id=self.table_id, + plot_id=f"{BENCHMARK_NAME}-figure-placeholder", + cell_to_plot=scatter_plots, + ) + + for model in scatter_plots: + for case in CASES: + structs = ( + f"assets/nebs/si_defects/{case.key}/{model}/{model}-neb-band.extxyz" + ) + struct_from_scatter( + scatter_id=f"{BENCHMARK_NAME}-{model}-{case.key}-energy-figure", + struct_id=f"{BENCHMARK_NAME}-struct-placeholder", + structs=structs, + mode="traj", + ) + struct_from_scatter( + scatter_id=f"{BENCHMARK_NAME}-{model}-{case.key}-force-figure", + struct_id=f"{BENCHMARK_NAME}-struct-placeholder", + structs=structs, + mode="traj", + ) + + +def get_app() -> SiDefectNebSinglepointsApp: + """ + Get Si defects app. + + Returns + ------- + SiDefectNebSinglepointsApp + App instance. + """ + return SiDefectNebSinglepointsApp( + name=BENCHMARK_NAME, + description=( + "Energy/force MAE of MLIPs on fixed Si interstitial migration NEB images, " + "referenced to DFT singlepoints." + ), + docs_url=DOCS_URL, + table_path=DATA_PATH / "si_defects_metrics_table.json", + extra_components=[ + Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + Div(id=f"{BENCHMARK_NAME}-struct-placeholder"), + ], + ) + + +if __name__ == "__main__": + # Use APP_ROOT/data as assets root so `assets/nebs/...` resolves correctly. + full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent) + benchmark_app = get_app() + full_app.layout = benchmark_app.layout + benchmark_app.register_callbacks() + full_app.run(port=8060, debug=True) diff --git a/ml_peg/calcs/nebs/si_defects/calc_si_defects.py b/ml_peg/calcs/nebs/si_defects/calc_si_defects.py new file mode 100644 index 000000000..879087008 --- /dev/null +++ b/ml_peg/calcs/nebs/si_defects/calc_si_defects.py @@ -0,0 +1,157 @@ +"""Evaluate MLIPs on Si defects DFT singlepoints (energies + forces).""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +import sys +from typing import Any + +from ase.atoms import Atoms +from ase.io import read, write +import numpy as np +import pytest +from tqdm.auto import tqdm + +from ml_peg.calcs.utils.utils import download_s3_data +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +MODELS = load_models(current_models) + +DATA_PATH = Path(__file__).parent / "data" +OUT_PATH = Path(__file__).parent / "outputs" +S3_KEY = "inputs/nebs/si_defects/si_defects.zip" +S3_FILENAME = "si_defects.zip" + + +@dataclass(frozen=True) +class _Case: + """Definition of a single Si defects NEB dataset.""" + + key: str + ref_file: str + + +CASES: tuple[_Case, ...] = ( + _Case(key="64_atoms", ref_file="64_atoms.extxyz"), + _Case(key="216_atoms", ref_file="216_atoms.extxyz"), + _Case(key="216_atoms_di_to_single", ref_file="216_atoms_di_to_single.extxyz"), +) + + +def _read_frames(path: Path) -> list[Atoms]: + """ + Read all frames from an extxyz trajectory. + + Parameters + ---------- + path + Path to extxyz trajectory. + + Returns + ------- + list[ase.atoms.Atoms] + Frames as ASE atoms objects. + """ + frames = read(path, index=":") + if not isinstance(frames, list) or not frames: + raise ValueError(f"No frames found in {path}") + return frames + + +def _ref_energy_ev(atoms: Atoms) -> float: + """ + Extract reference (DFT) energy in eV from a frame. + + Parameters + ---------- + atoms + Frame with ``ref_energy_ev`` in ``atoms.info``. + + Returns + ------- + float + Reference energy in eV. + """ + if "ref_energy_ev" not in atoms.info: + raise KeyError("Missing ref_energy_ev in reference trajectory.") + return float(atoms.info["ref_energy_ev"]) + + +def _ref_forces(atoms: Atoms) -> np.ndarray: + """ + Extract reference (DFT) forces in eV/Å from a frame. + + Parameters + ---------- + atoms + Frame with ``ref_forces`` in ``atoms.arrays``. + + Returns + ------- + numpy.ndarray + Reference forces array with shape ``(n_atoms, 3)``. + """ + if "ref_forces" not in atoms.arrays: + raise KeyError("Missing ref_forces in reference trajectory.") + return np.asarray(atoms.arrays["ref_forces"], dtype=float) + + +@pytest.mark.slow +@pytest.mark.parametrize("mlip", MODELS.items()) +def test_si_defects(mlip: tuple[str, Any]) -> None: + """ + Compare MLIP energies/forces to DFT along fixed NEB images. + + Outputs per-case trajectories containing: + - ref_energy_ev, ref_forces + - pred_energy_ev, pred_forces + + Parameters + ---------- + mlip + Tuple of ``(model_name, model)`` as provided by ``MODELS.items()``. + """ + model_name, model = mlip + calc = model.get_calculator() + + local_files_present = all((DATA_PATH / case.ref_file).exists() for case in CASES) + if local_files_present: + data_dir = DATA_PATH + else: + data_dir = download_s3_data(key=S3_KEY, filename=S3_FILENAME) / "si_defects" + + for case in CASES: + frames = _read_frames(data_dir / case.ref_file) + out_dir = OUT_PATH / case.key / model_name + out_dir.mkdir(parents=True, exist_ok=True) + + results: list[Atoms] = [] + it = frames + # Only show tqdm progress bars in an interactive terminal; otherwise the + # carriage-return updates tend to spam CI/log outputs. + if sys.stderr.isatty(): + it = tqdm( + frames, + desc=f"{model_name} {case.key}", + unit="img", + leave=False, + ) + for atoms in it: + ref_energy_ev = _ref_energy_ev(atoms) + ref_forces = _ref_forces(atoms) + + atoms_pred = atoms.copy() + atoms_pred.calc = calc + + out_atoms = atoms.copy() + out_atoms.info["ref_energy_ev"] = ref_energy_ev + out_atoms.arrays["ref_forces"] = ref_forces + out_atoms.info["pred_energy_ev"] = float(atoms_pred.get_potential_energy()) + out_atoms.arrays["pred_forces"] = np.asarray( + atoms_pred.get_forces(), dtype=float + ) + results.append(out_atoms) + + write(out_dir / "si_defects.extxyz", results)