diff --git a/docs/source/user_guide/benchmarks/bulk_crystal.rst b/docs/source/user_guide/benchmarks/bulk_crystal.rst index 575bbcc11..c7ef3a5b7 100644 --- a/docs/source/user_guide/benchmarks/bulk_crystal.rst +++ b/docs/source/user_guide/benchmarks/bulk_crystal.rst @@ -115,3 +115,62 @@ Reference data: * Same as input data * PBE + + +High-Pressure Relaxation +======================== + +Summary +------- + +Performance in relaxing bulk crystal structures under high-pressure conditions. +3000 structures from the Alexandria database are relaxed at 7 pressure conditions +(0, 25, 50, 75, 100, 125, 150 GPa) and compared to PBE reference calculations. + + +Metrics +------- + +For each pressure condition (0, 25, 50, 75, 100, 125, 150 GPa): + +(1) Volume MAE + +Mean absolute error of volume per atom compared to PBE reference. + +(2) Energy MAE + +Mean absolute error of enthalpy per atom compared to PBE reference. The enthalpy is +calculated as H = E + PV, where E is the potential energy, P is the applied pressure, +and V is the volume. + +(3) Convergence + +Percentage of structures that successfully converged during relaxation. + +Structures are relaxed using janus-core's GeomOpt with the ase `FixSymmetry` constraint +applied to preserve crystallographic symmetry analogously to DFT. Starting from P000 (0 GPa) structures, each structure is relaxed at the target pressure using the FrechetCellFilter with the +specified scalar pressure. Relaxation continues until the maximum force component is +below 0.0002 eV/Å or until 500 steps are reached. If not converged, relaxation is +repeated up from the last structure of the previous relaxation up to 3 times. + + +Computational cost +------------------ + +High: tests are likely to take hours-days to run on GPU, depending on the number of +structures and pressure conditions tested. + + +Data availability +----------------- + +Input structures: + +* Alexandria database pressure benchmark dataset +* URL: https://alexandria.icams.rub.de/data/pbe/benchmarks/pressure +* 3000 structures randomly sampled from the full datasets at each pressure + +Reference data: + +* PBE calculations from the Alexandria database +* Loew et al 2026 J. Phys. Mater. 9 015010 https://iopscience.iop.org/article/10.1088/2515-7639/ae2ba8 diff --git a/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/analyse_high_pressure_relaxation.py b/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/analyse_high_pressure_relaxation.py new file mode 100644 index 000000000..78a591f6f --- /dev/null +++ b/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/analyse_high_pressure_relaxation.py @@ -0,0 +1,389 @@ +"""Analyse high-pressure crystal relaxation benchmark.""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pandas as pd +import plotly.express as px +import plotly.graph_objects as go +import pytest + +from ml_peg.analysis.utils.decorators import build_table +from ml_peg.analysis.utils.utils import load_metrics_config, mae +from ml_peg.app import APP_ROOT +from ml_peg.calcs import CALCS_ROOT +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models + +MODELS = get_model_names(current_models) +CALC_PATH = CALCS_ROOT / "bulk_crystal" / "high_pressure_relaxation" / "outputs" +OUT_PATH = APP_ROOT / "data" / "bulk_crystal" / "high_pressure_relaxation" + +# Pressure conditions +PRESSURES = [0, 25, 50, 75, 100, 125, 150] +PRESSURE_LABELS = ["P000", "P025", "P050", "P075", "P100", "P125", "P150"] + +# Generate colors using viridis colorscale +PRESSURE_COLORS = px.colors.sample_colorscale("viridis", len(PRESSURES)) + +METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") +DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( + METRICS_CONFIG_PATH +) + +# Energy outlier thresholds (eV/atom) +ENERGY_OUTLIER_MIN = -25 +ENERGY_OUTLIER_MAX = 25 + + +def _filter_energy_outliers(df: pd.DataFrame) -> pd.DataFrame: + """ + Mark structures with extreme predicted energies as unconverged. + + Structures with predicted energy per atom outside [-25, 25] eV/atom + are considered outliers and marked as unconverged. + + Parameters + ---------- + df + Results dataframe with 'pred_energy_per_atom' and 'converged' columns. + + Returns + ------- + pd.DataFrame + Dataframe with outliers marked as unconverged. + """ + df.loc[ + (df["pred_energy_per_atom"] <= ENERGY_OUTLIER_MIN) + | (df["pred_energy_per_atom"] >= ENERGY_OUTLIER_MAX), + "converged", + ] = False + return df + + +def load_results_for_pressure(model_name: str, pressure_label: str) -> pd.DataFrame: + """ + Load results for a specific model and pressure. + + Parameters + ---------- + model_name + Name of the model. + pressure_label + Pressure label (e.g., "P000"). + + Returns + ------- + pd.DataFrame + Results dataframe or empty dataframe if not found. + """ + csv_path = CALC_PATH / model_name / f"results_{pressure_label}.csv" + if csv_path.exists(): + return pd.read_csv(csv_path) + return pd.DataFrame() + + +def get_converged_data_for_pressure( + model_name: str, pressure_label: str +) -> tuple[list, list, list, list]: + """ + Get converged volume and energy data for a model at a specific pressure. + + Parameters + ---------- + model_name + Name of the model. + pressure_label + Pressure label (e.g., "P000"). + + Returns + ------- + tuple[list, list, list, list] + Ref volumes, pred volumes, ref energies, pred energies for converged structures. + """ + df = load_results_for_pressure(model_name, pressure_label) + if df.empty: + return [], [], [], [] + + # Filter converged structures and remove NaN values + df = _filter_energy_outliers(df) + df_conv = df[df["converged"]].copy() + df_conv = df_conv.dropna(subset=["pred_volume_per_atom", "pred_energy_per_atom"]) + + return ( + df_conv["ref_volume_per_atom"].tolist(), + df_conv["pred_volume_per_atom"].tolist(), + df_conv["ref_energy_per_atom"].tolist(), + df_conv["pred_energy_per_atom"].tolist(), + ) + + +def get_convergence_rate_for_pressure( + model_name: str, pressure_label: str +) -> float | None: + """ + Get convergence rate for a model at a specific pressure. + + Parameters + ---------- + model_name + Name of the model. + pressure_label + Pressure label (e.g., "P000"). + + Returns + ------- + float | None + Convergence rate (%) or None if no data. + """ + df = load_results_for_pressure(model_name, pressure_label) + if df.empty: + return None + df = _filter_energy_outliers(df) + return (df["converged"].sum() / len(df)) * 100 + + +def create_pressure_colored_parity_plot( + data_getter: str, + title: str, + x_label: str, + y_label: str, + filename: Path, +) -> None: + """ + Create a parity plot with different colors for each pressure. + + Parameters + ---------- + data_getter + Either "volume" or "energy" to select which data to plot. + title + Plot title. + x_label + X-axis label. + y_label + Y-axis label. + filename + Path to save the plot JSON. + """ + fig = go.Figure() + + # Add a trace for each pressure + for pressure, pressure_label, color in zip( + PRESSURES, PRESSURE_LABELS, PRESSURE_COLORS, strict=False + ): + ref_values = [] + pred_values = [] + + # Collect data from all models for this pressure + for model_name in MODELS: + if data_getter == "volume": + ref_vol, pred_vol, _, _ = get_converged_data_for_pressure( + model_name, pressure_label + ) + ref_values.extend(ref_vol) + pred_values.extend(pred_vol) + else: # energy + _, _, ref_energy, pred_energy = get_converged_data_for_pressure( + model_name, pressure_label + ) + ref_values.extend(ref_energy) + pred_values.extend(pred_energy) + + if ref_values and pred_values: + fig.add_trace( + go.Scatter( + x=pred_values, + y=ref_values, + name=f"{pressure} GPa", + mode="markers", + marker={"color": color, "size": 6, "opacity": 0.7}, + hovertemplate=( + f"{pressure} GPa
" + "Pred: %{x:.4f}
" + "Ref: %{y:.4f}
" + "" + ), + ) + ) + + # Add diagonal line (y=x) + all_values = [] + for trace in fig.data: + all_values.extend(trace.x) + all_values.extend(trace.y) + + if all_values: + min_val = min(all_values) + max_val = max(all_values) + fig.add_trace( + go.Scatter( + x=[min_val, max_val], + y=[min_val, max_val], + mode="lines", + name="y=x", + line={"color": "gray", "dash": "dash"}, + showlegend=True, + ) + ) + + fig.update_layout( + title=title, + xaxis_title=x_label, + yaxis_title=y_label, + legend_title="Pressure", + hovermode="closest", + ) + + # Save to JSON + filename.parent.mkdir(parents=True, exist_ok=True) + with open(filename, "w") as f: + json.dump(fig.to_dict(), f) + + +@pytest.fixture +def volume_predictions() -> None: + """Create volume parity plot with different colors for each pressure.""" + create_pressure_colored_parity_plot( + data_getter="volume", + title="Volume per atom", + x_label="Predicted volume / ų/atom", + y_label="Reference volume / ų/atom", + filename=OUT_PATH / "figure_volume.json", + ) + + +@pytest.fixture +def energy_predictions() -> None: + """Create energy parity plot with different colors for each pressure.""" + create_pressure_colored_parity_plot( + data_getter="energy", + title="Energy per atom", + x_label="Predicted energy / eV/atom", + y_label="Reference energy / eV/atom", + filename=OUT_PATH / "figure_energy.json", + ) + + +@pytest.fixture +def volume_mae_per_pressure() -> dict[str, dict[str, float]]: + """ + Calculate MAE for volume predictions at each pressure. + + Returns + ------- + dict[str, dict[str, float]] + Nested dict: {pressure_gpa: {model_name: mae_value}}. + """ + results = {} + for pressure, pressure_label in zip(PRESSURES, PRESSURE_LABELS, strict=False): + pressure_key = f"Volume MAE ({pressure} GPa)" + results[pressure_key] = {} + for model_name in MODELS: + ref_vol, pred_vol, _, _ = get_converged_data_for_pressure( + model_name, pressure_label + ) + if ref_vol and pred_vol: + results[pressure_key][model_name] = mae(ref_vol, pred_vol) + return results + + +@pytest.fixture +def energy_mae_per_pressure() -> dict[str, dict[str, float]]: + """ + Calculate MAE for energy predictions at each pressure. + + Returns + ------- + dict[str, dict[str, float]] + Nested dict: {pressure_gpa: {model_name: mae_value}}. + """ + results = {} + for pressure, pressure_label in zip(PRESSURES, PRESSURE_LABELS, strict=False): + pressure_key = f"Energy MAE ({pressure} GPa)" + results[pressure_key] = {} + for model_name in MODELS: + _, _, ref_energy, pred_energy = get_converged_data_for_pressure( + model_name, pressure_label + ) + if ref_energy and pred_energy: + results[pressure_key][model_name] = mae(ref_energy, pred_energy) + return results + + +@pytest.fixture +def convergence_per_pressure() -> dict[str, dict[str, float]]: + """ + Calculate convergence rate at each pressure. + + Returns + ------- + dict[str, dict[str, float]] + Nested dict: {pressure_gpa: {model_name: convergence_rate}}. + """ + results = {} + for pressure, pressure_label in zip(PRESSURES, PRESSURE_LABELS, strict=False): + pressure_key = f"Convergence ({pressure} GPa)" + results[pressure_key] = {} + for model_name in MODELS: + conv_rate = get_convergence_rate_for_pressure(model_name, pressure_label) + if conv_rate is not None: + results[pressure_key][model_name] = conv_rate + return results + + +@pytest.fixture +@build_table( + filename=OUT_PATH / "high_pressure_metrics_table.json", + metric_tooltips=DEFAULT_TOOLTIPS, + thresholds=DEFAULT_THRESHOLDS, +) +def metrics( + volume_mae_per_pressure: dict[str, dict[str, float]], + energy_mae_per_pressure: dict[str, dict[str, float]], + convergence_per_pressure: dict[str, dict[str, float]], +) -> dict[str, dict]: + """ + Get all high-pressure relaxation metrics separated by pressure. + + Parameters + ---------- + volume_mae_per_pressure + Volume MAE for all models at each pressure. + energy_mae_per_pressure + Energy MAE for all models at each pressure. + convergence_per_pressure + Convergence rate for all models at each pressure. + + Returns + ------- + dict[str, dict] + All metrics for all models. + """ + all_metrics = {} + all_metrics.update(volume_mae_per_pressure) + all_metrics.update(energy_mae_per_pressure) + all_metrics.update(convergence_per_pressure) + return all_metrics + + +def test_high_pressure_relaxation( + metrics: dict[str, dict], + volume_predictions: None, + energy_predictions: None, +) -> None: + """ + Run high-pressure relaxation analysis test. + + Parameters + ---------- + metrics + All high-pressure relaxation metrics. + volume_predictions + Triggers volume plot generation. + energy_predictions + Triggers energy plot generation. + """ + return diff --git a/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/metrics.yml b/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/metrics.yml new file mode 100644 index 000000000..1e09408cd --- /dev/null +++ b/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/metrics.yml @@ -0,0 +1,140 @@ +metrics: + # 0 GPa + Volume MAE (0 GPa): + good: 0.05 + bad: 0.1 + unit: "ų/atom" + tooltip: "Mean Absolute Error of volume per atom at 0 GPa compared to PBE" + level_of_theory: PBE + Energy MAE (0 GPa): + good: 0.02 + bad: 0.03 + unit: "eV/atom" + tooltip: "Mean Absolute Error of energy per atom at 0 GPa compared to PBE" + level_of_theory: PBE + Convergence (0 GPa): + good: 100.0 + bad: 99.9 + unit: "%" + tooltip: "Percentage of structures that converged at 0 GPa" + level_of_theory: PBE + + # 25 GPa + Volume MAE (25 GPa): + good: 0.05 + bad: 0.1 + unit: "ų/atom" + tooltip: "Mean Absolute Error of volume per atom at 25 GPa compared to PBE" + level_of_theory: PBE + Energy MAE (25 GPa): + good: 0.02 + bad: 0.03 + unit: "eV/atom" + tooltip: "Mean Absolute Error of energy per atom at 25 GPa compared to PBE" + level_of_theory: PBE + Convergence (25 GPa): + good: 100.0 + bad: 99.0 + unit: "%" + tooltip: "Percentage of structures that converged at 25 GPa" + level_of_theory: PBE + + # 50 GPa + Volume MAE (50 GPa): + good: 0.05 + bad: 0.1 + unit: "ų/atom" + tooltip: "Mean Absolute Error of volume per atom at 50 GPa compared to PBE" + level_of_theory: PBE + Energy MAE (50 GPa): + good: 0.02 + bad: 0.03 + unit: "eV/atom" + tooltip: "Mean Absolute Error of energy per atom at 50 GPa compared to PBE" + level_of_theory: PBE + Convergence (50 GPa): + good: 100.0 + bad: 99.0 + unit: "%" + tooltip: "Percentage of structures that converged at 50 GPa" + level_of_theory: PBE + + # 75 GPa + Volume MAE (75 GPa): + good: 0.05 + bad: 0.1 + unit: "ų/atom" + tooltip: "Mean Absolute Error of volume per atom at 75 GPa compared to PBE" + level_of_theory: PBE + Energy MAE (75 GPa): + good: 0.02 + bad: 0.03 + unit: "eV/atom" + tooltip: "Mean Absolute Error of energy per atom at 75 GPa compared to PBE" + level_of_theory: PBE + Convergence (75 GPa): + good: 100.0 + bad: 99.0 + unit: "%" + tooltip: "Percentage of structures that converged at 75 GPa" + level_of_theory: PBE + + # 100 GPa + Volume MAE (100 GPa): + good: 0.05 + bad: 0.1 + unit: "ų/atom" + tooltip: "Mean Absolute Error of volume per atom at 100 GPa compared to PBE" + level_of_theory: PBE + Energy MAE (100 GPa): + good: 0.02 + bad: 0.03 + unit: "eV/atom" + tooltip: "Mean Absolute Error of energy per atom at 100 GPa compared to PBE" + level_of_theory: PBE + Convergence (100 GPa): + good: 100.0 + bad: 99.0 + unit: "%" + tooltip: "Percentage of structures that converged at 100 GPa" + level_of_theory: PBE + + # 125 GPa + Volume MAE (125 GPa): + good: 0.05 + bad: 0.1 + unit: "ų/atom" + tooltip: "Mean Absolute Error of volume per atom at 125 GPa compared to PBE" + level_of_theory: PBE + Energy MAE (125 GPa): + good: 0.02 + bad: 0.03 + unit: "eV/atom" + tooltip: "Mean Absolute Error of energy per atom at 125 GPa compared to PBE" + level_of_theory: PBE + Convergence (125 GPa): + good: 100.0 + bad: 99.0 + unit: "%" + tooltip: "Percentage of structures that converged at 125 GPa" + level_of_theory: PBE + + # 150 GPa + Volume MAE (150 GPa): + good: 0.05 + bad: 0.1 + unit: "ų/atom" + tooltip: "Mean Absolute Error of volume per atom at 150 GPa compared to PBE" + level_of_theory: PBE + Energy MAE (150 GPa): + good: 0.02 + bad: 0.03 + unit: "eV/atom" + tooltip: "Mean Absolute Error of energy per atom at 150 GPa compared to PBE" + level_of_theory: PBE + Convergence (150 GPa): + good: 100.0 + bad: 99.0 + unit: "%" + tooltip: "Percentage of structures that converged at 150 GPa" + level_of_theory: PBE diff --git a/ml_peg/app/bulk_crystal/high_pressure_relaxation/app_high_pressure_relaxation.py b/ml_peg/app/bulk_crystal/high_pressure_relaxation/app_high_pressure_relaxation.py new file mode 100644 index 000000000..b2746c9c7 --- /dev/null +++ b/ml_peg/app/bulk_crystal/high_pressure_relaxation/app_high_pressure_relaxation.py @@ -0,0 +1,82 @@ +"""Run high-pressure crystal relaxation benchmark app.""" + +from __future__ import annotations + +from dash import Dash +from dash.html import Div + +from ml_peg.app import APP_ROOT +from ml_peg.app.base_app import BaseApp +from ml_peg.app.utils.build_callbacks import plot_from_table_column +from ml_peg.app.utils.load import read_plot +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models + +# Get all models +MODELS = get_model_names(current_models) +BENCHMARK_NAME = "High-pressure relaxation" +DOCS_URL = "https://ddmms.github.io/ml-peg/user_guide/benchmarks/bulk_crystal.html#high-pressure-relaxation" +DATA_PATH = APP_ROOT / "data" / "bulk_crystal" / "high_pressure_relaxation" + + +PRESSURES = [0, 25, 50, 75, 100, 125, 150] + + +class HighPressureRelaxationApp(BaseApp): + """High-pressure crystal relaxation benchmark app layout and callbacks.""" + + def register_callbacks(self) -> None: + """Register callbacks to app.""" + scatter_volume = read_plot( + DATA_PATH / "figure_volume.json", id=f"{BENCHMARK_NAME}-figure" + ) + scatter_energy = read_plot( + DATA_PATH / "figure_energy.json", id=f"{BENCHMARK_NAME}-figure" + ) + + # Build column_to_plot mapping for all pressures + column_to_plot = {} + for pressure in PRESSURES: + column_to_plot[f"Volume MAE ({pressure} GPa)"] = scatter_volume + column_to_plot[f"Energy MAE ({pressure} GPa)"] = scatter_energy + + plot_from_table_column( + table_id=self.table_id, + plot_id=f"{BENCHMARK_NAME}-figure-placeholder", + column_to_plot=column_to_plot, + ) + + +def get_app() -> HighPressureRelaxationApp: + """ + Get high-pressure relaxation benchmark app layout and callback registration. + + Returns + ------- + HighPressureRelaxationApp + Benchmark layout and callback registration. + """ + return HighPressureRelaxationApp( + name=BENCHMARK_NAME, + description=( + "Performance when relaxing crystal structures under high pressure " + "(0-150 GPa). Evaluates volume, energy, and convergence percentage " + "against PBE reference calculations from the Alexandria database. " + "Please also reference Loew et al 2026 J. Phys. Mater. 9 015010" + " (https://iopscience.iop.org/article/10.1088/2515-7639/ae2ba8) " + "when using this benchmark." + ), + docs_url=DOCS_URL, + table_path=DATA_PATH / "high_pressure_metrics_table.json", + extra_components=[ + Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + ], + ) + + +if __name__ == "__main__": + full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent) + hp_app = get_app() + full_app.layout = hp_app.layout + hp_app.register_callbacks() + full_app.run(port=8055, debug=True) diff --git a/ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py b/ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py new file mode 100644 index 000000000..c3a458b5a --- /dev/null +++ b/ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py @@ -0,0 +1,272 @@ +"""Run calculations for high-pressure crystal relaxation benchmark.""" + +from __future__ import annotations + +import bz2 +from copy import copy +import json +from pathlib import Path +import random +from typing import Any +import urllib.request + +from ase import Atoms +from ase.constraints import FixSymmetry +from ase.units import GPa +from janus_core.calculations.geom_opt import GeomOpt +import pandas as pd +from pymatgen.entries.computed_entries import ComputedStructureEntry +from pymatgen.io.ase import AseAtomsAdaptor +import pytest +from tqdm import tqdm + +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +MODELS = load_models(current_models) + +# URL for Alexandria database +ALEXANDRIA_BASE_URL = "https://alexandria.icams.rub.de/data/pbe/benchmarks/pressure" + +# Local cache directory for downloaded data +DATA_PATH = Path(__file__).parent / "data" +OUT_PATH = Path(__file__).parent / "outputs" + +# Pressure conditions in GPa +PRESSURES = [0, 25, 50, 75, 100, 125, 150] +PRESSURE_LABELS = ["P000", "P025", "P050", "P075", "P100", "P125", "P150"] + + +# Relaxation parameters +FMAX = 0.0002 # eV/A - tight convergence +MAX_STEPS = 500 +RANDOM_SEED = 42 + +# Number of structures to use for testing +N_STRUCTURES = 3000 + + +def download_pressure_data(pressure_label: str) -> Path: + """ + Download pressure data from Alexandria database if not already cached. + + Parameters + ---------- + pressure_label + Pressure label (e.g., "P000", "P025"). + + Returns + ------- + Path + Path to the downloaded/cached file. + """ + DATA_PATH.mkdir(parents=True, exist_ok=True) + local_path = DATA_PATH / f"{pressure_label}.json.bz2" + + if not local_path.exists(): + url = f"{ALEXANDRIA_BASE_URL}/{pressure_label}.json.bz2" + print(f"Downloading {url}...") + urllib.request.urlretrieve(url, local_path) + print(f"Downloaded to {local_path}") + + return local_path + + +def load_structures( + pressure_label: str, n_structures: int = N_STRUCTURES, random_select: bool = True +) -> list[dict]: + """ + Load structures using P000 starting structures and pressure-specific references. + + Downloads data from Alexandria database if not already cached locally. + + Parameters + ---------- + pressure_label + Pressure label (e.g., "P000", "P025") used for reference values. + n_structures + Number of structures to load. Default is N_STRUCTURES. + random_select + Whether to randomly select structures from the available pool. + Default is True. Set to False for regression testing. + + Returns + ------- + list[dict] + List of structure dictionaries with atoms, mat_id, and reference values. + """ + start_json_path = download_pressure_data("P000") + ref_json_path = download_pressure_data(pressure_label) + + with bz2.open(start_json_path, "rt") as f: + start_data = json.load(f) + + with bz2.open(ref_json_path, "rt") as f: + ref_data = json.load(f) + + ref_map: dict[str, ComputedStructureEntry] = {} + for entry_dict in ref_data["entries"]: + entry = ComputedStructureEntry.from_dict(entry_dict) + ref_map[entry.data["mat_id"]] = entry + + adaptor = AseAtomsAdaptor() + structures = [] + + start_entries = start_data["entries"] + if n_structures > len(start_entries): + raise ValueError( + f"Requested {n_structures} structures but only" + f" {len(start_entries)} available" + ) + + rng = random.Random(RANDOM_SEED) + if random_select: + selected_indices = rng.sample(range(len(start_entries)), n_structures) + else: + selected_indices = list(range(n_structures)) + + for idx in selected_indices: + entry_dict = start_entries[idx] + entry = ComputedStructureEntry.from_dict(entry_dict) + mat_id = entry.data["mat_id"] + ref_entry = ref_map.get(mat_id) + if ref_entry is None: + raise ValueError( + f"Missing reference entry for {mat_id} at {pressure_label}" + ) + + atoms = adaptor.get_atoms(entry.structure) + n_atoms = len(ref_entry.structure) + + structures.append( + { + "mat_id": mat_id, + "atoms": atoms, + "ref_energy_per_atom": ref_entry.energy / n_atoms, + "ref_volume_per_atom": ref_entry.structure.volume / n_atoms, + } + ) + + return structures + + +def relax_with_pressure( + atoms: Atoms, + pressure_gpa: float, + fmax: float = FMAX, + max_steps: int = MAX_STEPS, +) -> tuple[Atoms | None, bool, float | None]: + """ + Relax structure under specified pressure using janus-core GeomOpt. + + Parameters + ---------- + atoms + ASE Atoms object with calculator attached. + pressure_gpa + Pressure in GPa. + fmax + Maximum force tolerance in eV/A. + max_steps + Maximum number of optimization steps. + + Returns + ------- + tuple[Atoms | None, bool, float | None] + Relaxed atoms (or None if failed), convergence status, enthalpy per atom. + """ + try: + converged = False + counter = 0 + # repeat up to 3 times if not converged to be consistent with reference + while counter < 3 and not converged: + atoms.set_constraint( + FixSymmetry( + atoms, + ) + ) + geom_opt = GeomOpt( + struct=atoms, + fmax=fmax, + steps=max_steps, + filter_kwargs={"scalar_pressure": pressure_gpa}, + calc_kwargs={"default_dtype": "float64"}, + ) + geom_opt.run() + relaxed = geom_opt.struct + # asses forces to determine convergence + max_force = max(relaxed.get_forces().flatten(), key=abs) + if max_force < fmax: + converged = True + counter += 1 + atoms = relaxed + except Exception as e: + print(f"Relaxation failed: {e}") + return None, False, None + if not converged: + return None, False, None + # Calculate enthalpy: H = E + PV + energy = relaxed.get_potential_energy() + volume = relaxed.get_volume() + enthalpy = energy + pressure_gpa * GPa * volume + n_atoms = len(relaxed) + + return relaxed, converged, enthalpy / n_atoms + + +@pytest.mark.parametrize("mlip", MODELS.items()) +@pytest.mark.parametrize("pressure_idx", range(len(PRESSURES))) +def test_high_pressure_relaxation(mlip: tuple[str, Any], pressure_idx: int) -> None: + """ + Run high-pressure relaxation benchmark. + + Parameters + ---------- + mlip + Tuple of model name and model object. + pressure_idx + Index into PRESSURES list. + """ + model_name, model = mlip + calc = model.get_calculator() + + pressure_gpa = PRESSURES[pressure_idx] + pressure_label = PRESSURE_LABELS[pressure_idx] + + # Load structures (downloads from Alexandria if not cached) + structures = load_structures(pressure_label) + + results = [] + for struct_data in tqdm( + structures, desc=f"{model_name} @ {pressure_gpa} GPa", leave=False + ): + atoms = struct_data["atoms"].copy() + atoms.calc = copy(calc) + mat_id = struct_data["mat_id"] + + relaxed_atoms, converged, enthalpy_per_atom = relax_with_pressure( + atoms, pressure_gpa + ) + + if relaxed_atoms is not None: + pred_volume = relaxed_atoms.get_volume() / len(relaxed_atoms) + else: + pred_volume = None + + results.append( + { + "mat_id": mat_id, + "pressure_gpa": pressure_gpa, + "ref_volume_per_atom": struct_data["ref_volume_per_atom"], + "ref_energy_per_atom": struct_data["ref_energy_per_atom"], + "pred_volume_per_atom": pred_volume, + "pred_energy_per_atom": enthalpy_per_atom, + "converged": converged, + } + ) + + # Save results + out_dir = OUT_PATH / model_name + out_dir.mkdir(parents=True, exist_ok=True) + df = pd.DataFrame(results) + df.to_csv(out_dir / f"results_{pressure_label}.csv", index=False) diff --git a/ml_peg/models/models.yml b/ml_peg/models/models.yml index c230bbee5..3449e5f8a 100644 --- a/ml_peg/models/models.yml +++ b/ml_peg/models/models.yml @@ -72,6 +72,15 @@ orb-v3-consv-inf-omat: kwargs: name: "orb_v3_conservative_inf_omat" +orb-v3-consv-inf-mpa: + module: orb_models.forcefield + class_name: OrbCalc + device: "cpu" + default_dtype: float32-high + trained_on_d3: false + level_of_theory: PBE + kwargs: + name: "orb_v3_conservative_inf_mpa" # orb-v3-consv-omol: # module: orb_models.forcefield # class_name: OrbCalc diff --git a/tests/test_high_pressure_relaxation_regression.py b/tests/test_high_pressure_relaxation_regression.py new file mode 100644 index 000000000..05d344d25 --- /dev/null +++ b/tests/test_high_pressure_relaxation_regression.py @@ -0,0 +1,97 @@ +"""Regression checks for high-pressure relaxation outputs.""" + +from __future__ import annotations + +from copy import copy +from pathlib import Path + +import numpy as np +import pandas as pd +import pytest + +from ml_peg.calcs.bulk_crystal.high_pressure_relaxation.calc_high_pressure_relaxation import ( # noqa: E501 + PRESSURE_LABELS, + PRESSURES, + load_structures, + relax_with_pressure, +) +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +N_TEST_STRUCTURES = 3 +VOLUME_ATOL = 0.001 +ENERGY_ATOL = 0.0001 + +TEST_DATA_DIR = Path(__file__).parent / "data" / "high_pressure_relaxation" +MODEL_NAME = "mace-mpa-0" +REFERENCE_FILE = TEST_DATA_DIR / f"{MODEL_NAME}_first{N_TEST_STRUCTURES}.csv" + + +@pytest.fixture(scope="module") +def regression_results() -> pd.DataFrame: + """Run relaxation on first N test structures for all pressures.""" + models = load_models(current_models) + model = models[MODEL_NAME] + calc = model.get_calculator() + + all_results = [] + for pressure_gpa, pressure_label in zip(PRESSURES, PRESSURE_LABELS, strict=False): + structures = load_structures( + pressure_label, n_structures=N_TEST_STRUCTURES, random_select=False + ) + + for struct_data in structures: + atoms = struct_data["atoms"].copy() + atoms.calc = copy(calc) + + relaxed, converged, enthalpy_per_atom = relax_with_pressure( + atoms, pressure_gpa + ) + + pred_volume = ( + relaxed.get_volume() / len(relaxed) if relaxed is not None else None + ) + + all_results.append( + { + "pressure_label": pressure_label, + "mat_id": struct_data["mat_id"], + "volume": pred_volume, + "energy_per_atom": enthalpy_per_atom, + } + ) + + return pd.DataFrame(all_results) + + +@pytest.mark.parametrize("pressure_label", PRESSURE_LABELS) +def test_high_pressure_relaxation_regression( + regression_results: pd.DataFrame, + pressure_label: str, +) -> None: + """Run calculations and check against stored regression baselines.""" + if not REFERENCE_FILE.exists(): + TEST_DATA_DIR.mkdir(parents=True, exist_ok=True) + regression_results.to_csv(REFERENCE_FILE, index=False) + pytest.skip(f"Generated baseline {REFERENCE_FILE.name}; re-run to validate") + + expected = pd.read_csv(REFERENCE_FILE) + expected_p = expected[expected["pressure_label"] == pressure_label] + results_p = regression_results[ + regression_results["pressure_label"] == pressure_label + ] + + assert len(expected_p) == N_TEST_STRUCTURES + assert expected_p["mat_id"].tolist() == results_p["mat_id"].tolist() + np.testing.assert_allclose( + results_p["volume"].to_numpy(), + expected_p["volume"].to_numpy(), + rtol=0.0, + atol=VOLUME_ATOL, + ) + np.testing.assert_allclose( + results_p["energy_per_atom"].to_numpy(), + expected_p["energy_per_atom"].to_numpy(), + rtol=0.0, + atol=ENERGY_ATOL, + )