From 18834215b15216e6cbe31de997906889aa4ae3d5 Mon Sep 17 00:00:00 2001 From: JonathanSchmidt1 Date: Mon, 2 Feb 2026 22:25:59 +0100 Subject: [PATCH 1/8] add high pressure benchmark --- .../analyse_high_pressure_relaxation.py | 378 ++++++++++++++++++ .../high_pressure_relaxation/metrics.yml | 140 +++++++ .../app_high_pressure_relaxation.py | 82 ++++ .../calc_high_pressure_relaxation.py | 252 ++++++++++++ .../mace-mpa-0_first5.csv | 36 ++ ...est_high_pressure_relaxation_regression.py | 74 ++++ 6 files changed, 962 insertions(+) create mode 100644 ml_peg/analysis/bulk_crystal/high_pressure_relaxation/analyse_high_pressure_relaxation.py create mode 100644 ml_peg/analysis/bulk_crystal/high_pressure_relaxation/metrics.yml create mode 100644 ml_peg/app/bulk_crystal/high_pressure_relaxation/app_high_pressure_relaxation.py create mode 100644 ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py create mode 100644 tests/data/high_pressure_relaxation/mace-mpa-0_first5.csv create mode 100644 tests/test_high_pressure_relaxation_regression.py diff --git a/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/analyse_high_pressure_relaxation.py b/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/analyse_high_pressure_relaxation.py new file mode 100644 index 000000000..742a22c97 --- /dev/null +++ b/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/analyse_high_pressure_relaxation.py @@ -0,0 +1,378 @@ +"""Analyse high-pressure crystal relaxation benchmark.""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pandas as pd +import plotly.express as px +import plotly.graph_objects as go +import pytest +from sklearn.metrics import mean_absolute_error + +from ml_peg.analysis.utils.decorators import build_table +from ml_peg.analysis.utils.utils import load_metrics_config +from ml_peg.app import APP_ROOT +from ml_peg.calcs import CALCS_ROOT +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models + +MODELS = get_model_names(current_models) +CALC_PATH = CALCS_ROOT / "bulk_crystal" / "high_pressure_relaxation" / "outputs" +OUT_PATH = APP_ROOT / "data" / "bulk_crystal" / "high_pressure_relaxation" + +# Pressure conditions +PRESSURES = [0, 25, 50, 75, 100, 125, 150] +PRESSURE_LABELS = ["P000", "P025", "P050", "P075", "P100", "P125", "P150"] + +# Generate colors using viridis colorscale +PRESSURE_COLORS = px.colors.sample_colorscale("viridis", len(PRESSURES)) + +METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") +DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( + METRICS_CONFIG_PATH +) + + +def mae(ref: list, pred: list) -> float: + """ + Calculate Mean Absolute Error. + + Parameters + ---------- + ref + Reference values. + pred + Predicted values. + + Returns + ------- + float + Mean absolute error. + """ + return mean_absolute_error(ref, pred) + + +def load_results_for_pressure(model_name: str, pressure_label: str) -> pd.DataFrame: + """ + Load results for a specific model and pressure. + + Parameters + ---------- + model_name + Name of the model. + pressure_label + Pressure label (e.g., "P000"). + + Returns + ------- + pd.DataFrame + Results dataframe or empty dataframe if not found. + """ + csv_path = CALC_PATH / model_name / f"results_{pressure_label}.csv" + if csv_path.exists(): + return pd.read_csv(csv_path) + return pd.DataFrame() + + +def get_converged_data_for_pressure( + model_name: str, pressure_label: str +) -> tuple[list, list, list, list]: + """ + Get converged volume and energy data for a model at a specific pressure. + + Parameters + ---------- + model_name + Name of the model. + pressure_label + Pressure label (e.g., "P000"). + + Returns + ------- + tuple[list, list, list, list] + Ref volumes, pred volumes, ref energies, pred energies for converged structures. + """ + df = load_results_for_pressure(model_name, pressure_label) + if df.empty: + return [], [], [], [] + + # Filter converged structures and remove NaN values + df_conv = df[df["converged"]].copy() + df_conv = df_conv.dropna(subset=["pred_volume_per_atom", "pred_energy_per_atom"]) + + return ( + df_conv["ref_volume_per_atom"].tolist(), + df_conv["pred_volume_per_atom"].tolist(), + df_conv["ref_energy_per_atom"].tolist(), + df_conv["pred_energy_per_atom"].tolist(), + ) + + +def get_convergence_rate_for_pressure( + model_name: str, pressure_label: str +) -> float | None: + """ + Get convergence rate for a model at a specific pressure. + + Parameters + ---------- + model_name + Name of the model. + pressure_label + Pressure label (e.g., "P000"). + + Returns + ------- + float | None + Convergence rate (%) or None if no data. + """ + df = load_results_for_pressure(model_name, pressure_label) + if df.empty: + return None + return (df["converged"].sum() / len(df)) * 100 + + +def create_pressure_colored_parity_plot( + data_getter: str, + title: str, + x_label: str, + y_label: str, + filename: Path, +) -> None: + """ + Create a parity plot with different colors for each pressure. + + Parameters + ---------- + data_getter + Either "volume" or "energy" to select which data to plot. + title + Plot title. + x_label + X-axis label. + y_label + Y-axis label. + filename + Path to save the plot JSON. + """ + fig = go.Figure() + + # Add a trace for each pressure + for pressure, pressure_label, color in zip( + PRESSURES, PRESSURE_LABELS, PRESSURE_COLORS, strict=False + ): + ref_values = [] + pred_values = [] + + # Collect data from all models for this pressure + for model_name in MODELS: + if data_getter == "volume": + ref_vol, pred_vol, _, _ = get_converged_data_for_pressure( + model_name, pressure_label + ) + ref_values.extend(ref_vol) + pred_values.extend(pred_vol) + else: # energy + _, _, ref_energy, pred_energy = get_converged_data_for_pressure( + model_name, pressure_label + ) + ref_values.extend(ref_energy) + pred_values.extend(pred_energy) + + if ref_values and pred_values: + fig.add_trace( + go.Scatter( + x=pred_values, + y=ref_values, + name=f"{pressure} GPa", + mode="markers", + marker={"color": color, "size": 6, "opacity": 0.7}, + hovertemplate=( + f"{pressure} GPa
" + "Pred: %{x:.4f}
" + "Ref: %{y:.4f}
" + "" + ), + ) + ) + + # Add diagonal line (y=x) + all_values = [] + for trace in fig.data: + all_values.extend(trace.x) + all_values.extend(trace.y) + + if all_values: + min_val = min(all_values) + max_val = max(all_values) + fig.add_trace( + go.Scatter( + x=[min_val, max_val], + y=[min_val, max_val], + mode="lines", + name="y=x", + line={"color": "gray", "dash": "dash"}, + showlegend=True, + ) + ) + + fig.update_layout( + title=title, + xaxis_title=x_label, + yaxis_title=y_label, + legend_title="Pressure", + hovermode="closest", + ) + + # Save to JSON + filename.parent.mkdir(parents=True, exist_ok=True) + with open(filename, "w") as f: + json.dump(fig.to_dict(), f) + + +@pytest.fixture +def volume_predictions() -> None: + """Create volume parity plot with different colors for each pressure.""" + create_pressure_colored_parity_plot( + data_getter="volume", + title="Volume per atom", + x_label="Predicted volume / ų/atom", + y_label="Reference volume / ų/atom", + filename=OUT_PATH / "figure_volume.json", + ) + + +@pytest.fixture +def energy_predictions() -> None: + """Create energy parity plot with different colors for each pressure.""" + create_pressure_colored_parity_plot( + data_getter="energy", + title="Energy per atom", + x_label="Predicted energy / eV/atom", + y_label="Reference energy / eV/atom", + filename=OUT_PATH / "figure_energy.json", + ) + + +@pytest.fixture +def volume_mae_per_pressure() -> dict[str, dict[str, float]]: + """ + Calculate MAE for volume predictions at each pressure. + + Returns + ------- + dict[str, dict[str, float]] + Nested dict: {pressure_gpa: {model_name: mae_value}}. + """ + results = {} + for pressure, pressure_label in zip(PRESSURES, PRESSURE_LABELS, strict=False): + pressure_key = f"Volume MAE ({pressure} GPa)" + results[pressure_key] = {} + for model_name in MODELS: + ref_vol, pred_vol, _, _ = get_converged_data_for_pressure( + model_name, pressure_label + ) + if ref_vol and pred_vol: + results[pressure_key][model_name] = mae(ref_vol, pred_vol) + return results + + +@pytest.fixture +def energy_mae_per_pressure() -> dict[str, dict[str, float]]: + """ + Calculate MAE for energy predictions at each pressure. + + Returns + ------- + dict[str, dict[str, float]] + Nested dict: {pressure_gpa: {model_name: mae_value}}. + """ + results = {} + for pressure, pressure_label in zip(PRESSURES, PRESSURE_LABELS, strict=False): + pressure_key = f"Energy MAE ({pressure} GPa)" + results[pressure_key] = {} + for model_name in MODELS: + _, _, ref_energy, pred_energy = get_converged_data_for_pressure( + model_name, pressure_label + ) + if ref_energy and pred_energy: + results[pressure_key][model_name] = mae(ref_energy, pred_energy) + return results + + +@pytest.fixture +def convergence_per_pressure() -> dict[str, dict[str, float]]: + """ + Calculate convergence rate at each pressure. + + Returns + ------- + dict[str, dict[str, float]] + Nested dict: {pressure_gpa: {model_name: convergence_rate}}. + """ + results = {} + for pressure, pressure_label in zip(PRESSURES, PRESSURE_LABELS, strict=False): + pressure_key = f"Convergence ({pressure} GPa)" + results[pressure_key] = {} + for model_name in MODELS: + conv_rate = get_convergence_rate_for_pressure(model_name, pressure_label) + if conv_rate is not None: + results[pressure_key][model_name] = conv_rate + return results + + +@pytest.fixture +@build_table( + filename=OUT_PATH / "high_pressure_metrics_table.json", + metric_tooltips=DEFAULT_TOOLTIPS, + thresholds=DEFAULT_THRESHOLDS, +) +def metrics( + volume_mae_per_pressure: dict[str, dict[str, float]], + energy_mae_per_pressure: dict[str, dict[str, float]], + convergence_per_pressure: dict[str, dict[str, float]], +) -> dict[str, dict]: + """ + Get all high-pressure relaxation metrics separated by pressure. + + Parameters + ---------- + volume_mae_per_pressure + Volume MAE for all models at each pressure. + energy_mae_per_pressure + Energy MAE for all models at each pressure. + convergence_per_pressure + Convergence rate for all models at each pressure. + + Returns + ------- + dict[str, dict] + All metrics for all models. + """ + all_metrics = {} + all_metrics.update(volume_mae_per_pressure) + all_metrics.update(energy_mae_per_pressure) + all_metrics.update(convergence_per_pressure) + return all_metrics + + +def test_high_pressure_relaxation( + metrics: dict[str, dict], + volume_predictions: None, + energy_predictions: None, +) -> None: + """ + Run high-pressure relaxation analysis test. + + Parameters + ---------- + metrics + All high-pressure relaxation metrics. + volume_predictions + Triggers volume plot generation. + energy_predictions + Triggers energy plot generation. + """ + return diff --git a/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/metrics.yml b/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/metrics.yml new file mode 100644 index 000000000..fa5b810fa --- /dev/null +++ b/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/metrics.yml @@ -0,0 +1,140 @@ +metrics: + # 0 GPa + Volume MAE (0 GPa): + good: 0.2 + bad: 0.1 + unit: "ų/atom" + tooltip: "Mean Absolute Error of volume per atom at 0 GPa compared to PBE" + level_of_theory: PBE + Energy MAE (0 GPa): + good: 0.02 + bad: 0.03 + unit: "eV/atom" + tooltip: "Mean Absolute Error of energy per atom at 0 GPa compared to PBE" + level_of_theory: PBE + Convergence (0 GPa): + good: 100.0 + bad: 80.0 + unit: "%" + tooltip: "Percentage of structures that converged at 0 GPa" + level_of_theory: PBE + + # 25 GPa + Volume MAE (25 GPa): + good: 0.02 + bad: 0.1 + unit: "ų/atom" + tooltip: "Mean Absolute Error of volume per atom at 25 GPa compared to PBE" + level_of_theory: PBE + Energy MAE (25 GPa): + good: 0.02 + bad: 0.03 + unit: "eV/atom" + tooltip: "Mean Absolute Error of energy per atom at 25 GPa compared to PBE" + level_of_theory: PBE + Convergence (25 GPa): + good: 100.0 + bad: 99.0 + unit: "%" + tooltip: "Percentage of structures that converged at 25 GPa" + level_of_theory: PBE + + # 50 GPa + Volume MAE (50 GPa): + good: 0.02 + bad: 0.1 + unit: "ų/atom" + tooltip: "Mean Absolute Error of volume per atom at 50 GPa compared to PBE" + level_of_theory: PBE + Energy MAE (50 GPa): + good: 0.02 + bad: 0.03 + unit: "eV/atom" + tooltip: "Mean Absolute Error of energy per atom at 50 GPa compared to PBE" + level_of_theory: PBE + Convergence (50 GPa): + good: 100.0 + bad: 99.0 + unit: "%" + tooltip: "Percentage of structures that converged at 50 GPa" + level_of_theory: PBE + + # 75 GPa + Volume MAE (75 GPa): + good: 0.02 + bad: 0.1 + unit: "ų/atom" + tooltip: "Mean Absolute Error of volume per atom at 75 GPa compared to PBE" + level_of_theory: PBE + Energy MAE (75 GPa): + good: 0.02 + bad: 0.03 + unit: "eV/atom" + tooltip: "Mean Absolute Error of energy per atom at 75 GPa compared to PBE" + level_of_theory: PBE + Convergence (75 GPa): + good: 100.0 + bad: 99.0 + unit: "%" + tooltip: "Percentage of structures that converged at 75 GPa" + level_of_theory: PBE + + # 100 GPa + Volume MAE (100 GPa): + good: 0.02 + bad: 0.1 + unit: "ų/atom" + tooltip: "Mean Absolute Error of volume per atom at 100 GPa compared to PBE" + level_of_theory: PBE + Energy MAE (100 GPa): + good: 0.02 + bad: 0.03 + unit: "eV/atom" + tooltip: "Mean Absolute Error of energy per atom at 100 GPa compared to PBE" + level_of_theory: PBE + Convergence (100 GPa): + good: 100.0 + bad: 99.0 + unit: "%" + tooltip: "Percentage of structures that converged at 100 GPa" + level_of_theory: PBE + + # 125 GPa + Volume MAE (125 GPa): + good: 0.02 + bad: 0.1 + unit: "ų/atom" + tooltip: "Mean Absolute Error of volume per atom at 125 GPa compared to PBE" + level_of_theory: PBE + Energy MAE (125 GPa): + good: 0.02 + bad: 0.03 + unit: "eV/atom" + tooltip: "Mean Absolute Error of energy per atom at 125 GPa compared to PBE" + level_of_theory: PBE + Convergence (125 GPa): + good: 100.0 + bad: 99.0 + unit: "%" + tooltip: "Percentage of structures that converged at 125 GPa" + level_of_theory: PBE + + # 150 GPa + Volume MAE (150 GPa): + good: 0.02 + bad: 0.1 + unit: "ų/atom" + tooltip: "Mean Absolute Error of volume per atom at 150 GPa compared to PBE" + level_of_theory: PBE + Energy MAE (150 GPa): + good: 0.02 + bad: 0.03 + unit: "eV/atom" + tooltip: "Mean Absolute Error of energy per atom at 150 GPa compared to PBE" + level_of_theory: PBE + Convergence (150 GPa): + good: 100.0 + bad: 99.0 + unit: "%" + tooltip: "Percentage of structures that converged at 150 GPa" + level_of_theory: PBE diff --git a/ml_peg/app/bulk_crystal/high_pressure_relaxation/app_high_pressure_relaxation.py b/ml_peg/app/bulk_crystal/high_pressure_relaxation/app_high_pressure_relaxation.py new file mode 100644 index 000000000..b2746c9c7 --- /dev/null +++ b/ml_peg/app/bulk_crystal/high_pressure_relaxation/app_high_pressure_relaxation.py @@ -0,0 +1,82 @@ +"""Run high-pressure crystal relaxation benchmark app.""" + +from __future__ import annotations + +from dash import Dash +from dash.html import Div + +from ml_peg.app import APP_ROOT +from ml_peg.app.base_app import BaseApp +from ml_peg.app.utils.build_callbacks import plot_from_table_column +from ml_peg.app.utils.load import read_plot +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models + +# Get all models +MODELS = get_model_names(current_models) +BENCHMARK_NAME = "High-pressure relaxation" +DOCS_URL = "https://ddmms.github.io/ml-peg/user_guide/benchmarks/bulk_crystal.html#high-pressure-relaxation" +DATA_PATH = APP_ROOT / "data" / "bulk_crystal" / "high_pressure_relaxation" + + +PRESSURES = [0, 25, 50, 75, 100, 125, 150] + + +class HighPressureRelaxationApp(BaseApp): + """High-pressure crystal relaxation benchmark app layout and callbacks.""" + + def register_callbacks(self) -> None: + """Register callbacks to app.""" + scatter_volume = read_plot( + DATA_PATH / "figure_volume.json", id=f"{BENCHMARK_NAME}-figure" + ) + scatter_energy = read_plot( + DATA_PATH / "figure_energy.json", id=f"{BENCHMARK_NAME}-figure" + ) + + # Build column_to_plot mapping for all pressures + column_to_plot = {} + for pressure in PRESSURES: + column_to_plot[f"Volume MAE ({pressure} GPa)"] = scatter_volume + column_to_plot[f"Energy MAE ({pressure} GPa)"] = scatter_energy + + plot_from_table_column( + table_id=self.table_id, + plot_id=f"{BENCHMARK_NAME}-figure-placeholder", + column_to_plot=column_to_plot, + ) + + +def get_app() -> HighPressureRelaxationApp: + """ + Get high-pressure relaxation benchmark app layout and callback registration. + + Returns + ------- + HighPressureRelaxationApp + Benchmark layout and callback registration. + """ + return HighPressureRelaxationApp( + name=BENCHMARK_NAME, + description=( + "Performance when relaxing crystal structures under high pressure " + "(0-150 GPa). Evaluates volume, energy, and convergence percentage " + "against PBE reference calculations from the Alexandria database. " + "Please also reference Loew et al 2026 J. Phys. Mater. 9 015010" + " (https://iopscience.iop.org/article/10.1088/2515-7639/ae2ba8) " + "when using this benchmark." + ), + docs_url=DOCS_URL, + table_path=DATA_PATH / "high_pressure_metrics_table.json", + extra_components=[ + Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + ], + ) + + +if __name__ == "__main__": + full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent) + hp_app = get_app() + full_app.layout = hp_app.layout + hp_app.register_callbacks() + full_app.run(port=8055, debug=True) diff --git a/ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py b/ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py new file mode 100644 index 000000000..372f3342a --- /dev/null +++ b/ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py @@ -0,0 +1,252 @@ +"""Run calculations for high-pressure crystal relaxation benchmark.""" + +from __future__ import annotations + +import bz2 +from copy import copy +import json +from pathlib import Path +from typing import Any +import urllib.request + +from ase import Atoms +from ase.constraints import FixSymmetry +from ase.units import GPa +from janus_core.calculations.geom_opt import GeomOpt +import pandas as pd +from pymatgen.entries.computed_entries import ComputedStructureEntry +from pymatgen.io.ase import AseAtomsAdaptor +import pytest +from tqdm import tqdm + +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +MODELS = load_models(current_models) + +# URL for Alexandria database +ALEXANDRIA_BASE_URL = "https://alexandria.icams.rub.de/data/pbe/benchmarks/pressure" + +# Local cache directory for downloaded data +DATA_PATH = Path(__file__).parent / "data" +OUT_PATH = Path(__file__).parent / "outputs" + +# Pressure conditions in GPa +PRESSURES = [0, 25, 50, 75, 100, 125, 150] +PRESSURE_LABELS = ["P000", "P025", "P050", "P075", "P100", "P125", "P150"] + + +# Relaxation parameters +FMAX = 0.0002 # eV/A - tight convergence +MAX_STEPS = 500 + +# Number of structures to use for testing +N_STRUCTURES = 3 + + +def download_pressure_data(pressure_label: str) -> Path: + """ + Download pressure data from Alexandria database if not already cached. + + Parameters + ---------- + pressure_label + Pressure label (e.g., "P000", "P025"). + + Returns + ------- + Path + Path to the downloaded/cached file. + """ + DATA_PATH.mkdir(parents=True, exist_ok=True) + local_path = DATA_PATH / f"{pressure_label}.json.bz2" + + if not local_path.exists(): + url = f"{ALEXANDRIA_BASE_URL}/{pressure_label}.json.bz2" + print(f"Downloading {url}...") + urllib.request.urlretrieve(url, local_path) + print(f"Downloaded to {local_path}") + + return local_path + + +def load_structures( + pressure_label: str, n_structures: int = N_STRUCTURES +) -> list[dict]: + """ + Load structures using P000 starting structures and pressure-specific references. + + Downloads data from Alexandria database if not already cached locally. + + Parameters + ---------- + pressure_label + Pressure label (e.g., "P000", "P025") used for reference values. + n_structures + Number of structures to load. Default is N_STRUCTURES. + + Returns + ------- + list[dict] + List of structure dictionaries with atoms, mat_id, and reference values. + """ + start_json_path = download_pressure_data("P000") + ref_json_path = download_pressure_data(pressure_label) + + with bz2.open(start_json_path, "rt") as f: + start_data = json.load(f) + + with bz2.open(ref_json_path, "rt") as f: + ref_data = json.load(f) + + ref_map: dict[str, ComputedStructureEntry] = {} + for entry_dict in ref_data["entries"]: + entry = ComputedStructureEntry.from_dict(entry_dict) + ref_map[entry.data["mat_id"]] = entry + + adaptor = AseAtomsAdaptor() + structures = [] + + for entry_dict in start_data["entries"][:n_structures]: + entry = ComputedStructureEntry.from_dict(entry_dict) + mat_id = entry.data["mat_id"] + ref_entry = ref_map.get(mat_id) + if ref_entry is None: + raise ValueError( + f"Missing reference entry for {mat_id} at {pressure_label}" + ) + + atoms = adaptor.get_atoms(entry.structure) + n_atoms = len(ref_entry.structure) + + structures.append( + { + "mat_id": mat_id, + "atoms": atoms, + "ref_energy_per_atom": ref_entry.energy / n_atoms, + "ref_volume_per_atom": ref_entry.structure.volume / n_atoms, + } + ) + + return structures + + +def relax_with_pressure( + atoms: Atoms, + pressure_gpa: float, + fmax: float = FMAX, + max_steps: int = MAX_STEPS, +) -> tuple[Atoms | None, bool, float | None]: + """ + Relax structure under specified pressure using janus-core GeomOpt. + + Parameters + ---------- + atoms + ASE Atoms object with calculator attached. + pressure_gpa + Pressure in GPa. + fmax + Maximum force tolerance in eV/A. + max_steps + Maximum number of optimization steps. + + Returns + ------- + tuple[Atoms | None, bool, float | None] + Relaxed atoms (or None if failed), convergence status, enthalpy per atom. + """ + try: + converged = False + counter = 0 + # repeat up to 3 times if not converged to be consistent with reference + while counter < 3 and not converged: + atoms.set_constraint( + FixSymmetry( + atoms, + ) + ) + geom_opt = GeomOpt( + struct=atoms, + fmax=fmax, + steps=max_steps, + filter_kwargs={"scalar_pressure": pressure_gpa}, + calc_kwargs={"default_dtype": "float64"}, + ) + geom_opt.run() + relaxed = geom_opt.struct + # asses forces to determine convergence + max_force = max(relaxed.get_forces().flatten(), key=abs) + if max_force < fmax: + converged = True + counter += 1 + atoms = relaxed + except Exception as e: + print(f"Relaxation failed: {e}") + return None, False, None + + # Calculate enthalpy: H = E + PV + energy = relaxed.get_potential_energy() + volume = relaxed.get_volume() + enthalpy = energy + pressure_gpa * GPa * volume + n_atoms = len(relaxed) + + return relaxed, converged, enthalpy / n_atoms + + +@pytest.mark.parametrize("mlip", MODELS.items()) +@pytest.mark.parametrize("pressure_idx", range(len(PRESSURES))) +def test_high_pressure_relaxation(mlip: tuple[str, Any], pressure_idx: int) -> None: + """ + Run high-pressure relaxation benchmark. + + Parameters + ---------- + mlip + Tuple of model name and model object. + pressure_idx + Index into PRESSURES list. + """ + model_name, model = mlip + calc = model.get_calculator() + + pressure_gpa = PRESSURES[pressure_idx] + pressure_label = PRESSURE_LABELS[pressure_idx] + + # Load structures (downloads from Alexandria if not cached) + structures = load_structures(pressure_label) + + results = [] + for struct_data in tqdm( + structures, desc=f"{model_name} @ {pressure_gpa} GPa", leave=False + ): + atoms = struct_data["atoms"].copy() + atoms.calc = copy(calc) + mat_id = struct_data["mat_id"] + + relaxed_atoms, converged, enthalpy_per_atom = relax_with_pressure( + atoms, pressure_gpa + ) + + if relaxed_atoms is not None: + pred_volume = relaxed_atoms.get_volume() / len(relaxed_atoms) + else: + pred_volume = None + + results.append( + { + "mat_id": mat_id, + "pressure_gpa": pressure_gpa, + "ref_volume_per_atom": struct_data["ref_volume_per_atom"], + "ref_energy_per_atom": struct_data["ref_energy_per_atom"], + "pred_volume_per_atom": pred_volume, + "pred_energy_per_atom": enthalpy_per_atom, + "converged": converged, + } + ) + + # Save results + out_dir = OUT_PATH / model_name + out_dir.mkdir(parents=True, exist_ok=True) + df = pd.DataFrame(results) + df.to_csv(out_dir / f"results_{pressure_label}.csv", index=False) diff --git a/tests/data/high_pressure_relaxation/mace-mpa-0_first5.csv b/tests/data/high_pressure_relaxation/mace-mpa-0_first5.csv new file mode 100644 index 000000000..9cd448467 --- /dev/null +++ b/tests/data/high_pressure_relaxation/mace-mpa-0_first5.csv @@ -0,0 +1,36 @@ +pressure_label,mat_id,volume,energy_per_atom +P000,agm001000110,25.033782103611063,-5.936990103881239 +P000,agm001001040,15.204527828733536,-8.27554636941658 +P000,agm001001919,21.303183572813897,-8.435935053726139 +P000,agm001003503,24.690326355959755,-4.327203031854863 +P000,agm001004359,20.86226724052588,-7.829242292338797 +P025,agm001000110,19.71321916175995,-2.5294162683198125 +P025,agm001001040,13.487048183285616,-6.052898044522763 +P025,agm001001919,17.670764016174328,-5.438176257490443 +P025,agm001003503,19.12780621300846,-1.006088317132572 +P025,agm001004359,17.457486573286673,-4.870256961601473 +P050,agm001000110,17.715912861957246,0.3763531065086264 +P050,agm001001040,12.512987673468842,-4.029860765358488 +P050,agm001001919,15.46189689156924,-2.861038673041728 +P050,agm001003503,16.694829244071812,1.7723527015771132 +P050,agm001004359,15.13105554384206,-2.338786848103142 +P075,agm001000110,16.458372063400443,3.036751076640864 +P075,agm001001040,11.833602300101647,-2.13312140913839 +P075,agm001001919,14.04859528970246,-0.5622805372532179 +P075,agm001003503,15.23016215452774,4.255252390053997 +P075,agm001004359,13.686750914395638,-0.0937456687284251 +P100,agm001000110,15.506324317224594,5.527588395992431 +P100,agm001001040,11.324664691623278,-0.328009659077703 +P100,agm001001919,13.09661261512976,1.547445860475598 +P100,agm001003503,14.113463382167296,6.544697423965405 +P100,agm001004359,12.807027240161723,1.9611872723655293 +P125,agm001000110,14.768322896353382,7.887451277337899 +P125,agm001001040,10.9082879022004,1.4057552363067838 +P125,agm001001919,12.503731488724002,3.542297291380569 +P125,agm001003503,13.125761236229373,8.668450989536197 +P125,agm001004359,12.298360564455225,3.9181192151113904 +P150,agm001000110,14.163630823547033,10.143286868066651 +P150,agm001001040,10.539232357961302,3.078605889229442 +P150,agm001001919,12.045275482032784,5.456359868213672 +P150,agm001003503,12.192106272396378,10.643260044585606 +P150,agm001004359,11.883204516891276,5.803899385939424 diff --git a/tests/test_high_pressure_relaxation_regression.py b/tests/test_high_pressure_relaxation_regression.py new file mode 100644 index 000000000..dda8eada5 --- /dev/null +++ b/tests/test_high_pressure_relaxation_regression.py @@ -0,0 +1,74 @@ +"""Regression checks for high-pressure relaxation outputs.""" + +from __future__ import annotations + +from pathlib import Path + +import numpy as np +import pandas as pd +import pytest + +PRESSURE_LABELS = ["P000", "P025", "P050", "P075", "P100", "P125", "P150"] +FIRST_N = 5 +VOLUME_ATOL = 0.001 +ENERGY_ATOL = 0.0001 + +REPO_ROOT = Path(__file__).resolve().parents[1] +TEST_DATA_ROOT = REPO_ROOT / "tests" / "data" / "high_pressure_relaxation" +OUTPUT_ROOT = ( + REPO_ROOT + / "ml_peg" + / "calcs" + / "bulk_crystal" + / "high_pressure_relaxation" + / "outputs" +) + +TEST_CASES = [ + { + "name": "mace-mpa-0", + "output_model": "mace-mpa-0", + "data_file": TEST_DATA_ROOT / "mace-mpa-0_first5.csv", + }, +] + + +@pytest.mark.parametrize("case", TEST_CASES, ids=[case["name"] for case in TEST_CASES]) +@pytest.mark.parametrize("pressure_label", PRESSURE_LABELS) +def test_high_pressure_relaxation_regression( + case: dict[str, Path | str], + pressure_label: str, +) -> None: + """Check entries against stored regression baselines.""" + data_path = Path(case["data_file"]) + results_path = ( + OUTPUT_ROOT / str(case["output_model"]) / f"results_{pressure_label}.csv" + ) + + assert data_path.exists(), f"Missing test data file: {data_path}" + assert results_path.exists(), f"Missing results file: {results_path}" + + expected = pd.read_csv(data_path) + expected_pressure = expected[expected["pressure_label"] == pressure_label] + + assert len(expected_pressure) == FIRST_N + + results = pd.read_csv(results_path).head(FIRST_N) + if len(results) < FIRST_N: + pytest.skip( + f"{case['output_model']} only has {len(results)} rows for {pressure_label}." + ) + + assert expected_pressure["mat_id"].tolist() == results["mat_id"].tolist() + np.testing.assert_allclose( + results["pred_volume_per_atom"].to_numpy(), + expected_pressure["volume"].to_numpy(), + rtol=0.0, + atol=VOLUME_ATOL, + ) + np.testing.assert_allclose( + results["pred_energy_per_atom"].to_numpy(), + expected_pressure["energy_per_atom"].to_numpy(), + rtol=0.0, + atol=ENERGY_ATOL, + ) From cb5c0527dc4b152d282793e5a82d372b78c33a73 Mon Sep 17 00:00:00 2001 From: JonathanSchmidt1 Date: Tue, 3 Feb 2026 00:36:01 +0100 Subject: [PATCH 2/8] add random subsampling --- .../calc_high_pressure_relaxation.py | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py b/ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py index 372f3342a..1a9b4fda2 100644 --- a/ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py +++ b/ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py @@ -6,6 +6,7 @@ from copy import copy import json from pathlib import Path +import random from typing import Any import urllib.request @@ -39,9 +40,10 @@ # Relaxation parameters FMAX = 0.0002 # eV/A - tight convergence MAX_STEPS = 500 +RANDOM_SEED = 42 # Number of structures to use for testing -N_STRUCTURES = 3 +N_STRUCTURES = 100 def download_pressure_data(pressure_label: str) -> Path: @@ -107,7 +109,18 @@ def load_structures( adaptor = AseAtomsAdaptor() structures = [] - for entry_dict in start_data["entries"][:n_structures]: + start_entries = start_data["entries"] + if n_structures > len(start_entries): + raise ValueError( + f"Requested {n_structures} structures but only" + f" {len(start_entries)} available" + ) + + rng = random.Random(RANDOM_SEED) + selected_indices = sorted(rng.sample(range(len(start_entries)), k=n_structures)) + + for idx in selected_indices: + entry_dict = start_entries[idx] entry = ComputedStructureEntry.from_dict(entry_dict) mat_id = entry.data["mat_id"] ref_entry = ref_map.get(mat_id) @@ -184,7 +197,8 @@ def relax_with_pressure( except Exception as e: print(f"Relaxation failed: {e}") return None, False, None - + if not converged: + return None, False, None # Calculate enthalpy: H = E + PV energy = relaxed.get_potential_energy() volume = relaxed.get_volume() From 8ef30cc5c2e18fc19bcc372068a118cd01e20498 Mon Sep 17 00:00:00 2001 From: JonathanSchmidt1 Date: Tue, 3 Feb 2026 11:20:58 +0100 Subject: [PATCH 3/8] remove outliers --- .../analyse_high_pressure_relaxation.py | 11 +++++++++++ .../calc_high_pressure_relaxation.py | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/analyse_high_pressure_relaxation.py b/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/analyse_high_pressure_relaxation.py index 742a22c97..88d2a35b5 100644 --- a/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/analyse_high_pressure_relaxation.py +++ b/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/analyse_high_pressure_relaxation.py @@ -99,6 +99,11 @@ def get_converged_data_for_pressure( return [], [], [], [] # Filter converged structures and remove NaN values + # set outliers with less than -25 eV/atom or more than 25 eV/atom to unconverged + df.loc[ + (df["pred_energy_per_atom"] <= -25) | (df["pred_energy_per_atom"] >= 25), + "converged", + ] = False df_conv = df[df["converged"]].copy() df_conv = df_conv.dropna(subset=["pred_volume_per_atom", "pred_energy_per_atom"]) @@ -129,8 +134,14 @@ def get_convergence_rate_for_pressure( Convergence rate (%) or None if no data. """ df = load_results_for_pressure(model_name, pressure_label) + # set outliers with less than -25 eV/atom or more than 25 eV/atom to unconverged if df.empty: return None + df.loc[ + (df["pred_energy_per_atom"] <= -25) | (df["pred_energy_per_atom"] >= 25), + "converged", + ] = False + # set energies of unconverged to NaN return (df["converged"].sum() / len(df)) * 100 diff --git a/ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py b/ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py index 1a9b4fda2..bcf6c88fc 100644 --- a/ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py +++ b/ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py @@ -43,7 +43,7 @@ RANDOM_SEED = 42 # Number of structures to use for testing -N_STRUCTURES = 100 +N_STRUCTURES = 3000 def download_pressure_data(pressure_label: str) -> Path: From 5bbeba1872d8c557dcb8afa5c35c793e4ab3011f Mon Sep 17 00:00:00 2001 From: JonathanSchmidt1 Date: Tue, 3 Feb 2026 11:25:32 +0100 Subject: [PATCH 4/8] add orb-v3-cons-inf-mpa for comparison with original benchmark --- ml_peg/models/models.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ml_peg/models/models.yml b/ml_peg/models/models.yml index 77e54a16b..9276c9865 100644 --- a/ml_peg/models/models.yml +++ b/ml_peg/models/models.yml @@ -72,6 +72,16 @@ orb-v3-consv-inf-omat: kwargs: name: "orb_v3_conservative_inf_omat" +orb-v3-consv-inf-mpa: + module: orb_models.forcefield + class_name: OrbCalc + device: "cpu" + default_dtype: float32-high + trained_on_d3: false + level_of_theory: PBE + kwargs: + name: "orb_v3_conservative_inf_mpa" + # mattersim-5M: # module: mattersim.forcefield # class_name: MatterSimCalculator From 1d2fb2ee62d689bb831d8c386a3231d10e687f44 Mon Sep 17 00:00:00 2001 From: JonathanSchmidt1 Date: Wed, 4 Feb 2026 11:30:02 +0100 Subject: [PATCH 5/8] add docs for benchmark --- .../user_guide/benchmarks/bulk_crystal.rst | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/docs/source/user_guide/benchmarks/bulk_crystal.rst b/docs/source/user_guide/benchmarks/bulk_crystal.rst index 575bbcc11..c7ef3a5b7 100644 --- a/docs/source/user_guide/benchmarks/bulk_crystal.rst +++ b/docs/source/user_guide/benchmarks/bulk_crystal.rst @@ -115,3 +115,62 @@ Reference data: * Same as input data * PBE + + +High-Pressure Relaxation +======================== + +Summary +------- + +Performance in relaxing bulk crystal structures under high-pressure conditions. +3000 structures from the Alexandria database are relaxed at 7 pressure conditions +(0, 25, 50, 75, 100, 125, 150 GPa) and compared to PBE reference calculations. + + +Metrics +------- + +For each pressure condition (0, 25, 50, 75, 100, 125, 150 GPa): + +(1) Volume MAE + +Mean absolute error of volume per atom compared to PBE reference. + +(2) Energy MAE + +Mean absolute error of enthalpy per atom compared to PBE reference. The enthalpy is +calculated as H = E + PV, where E is the potential energy, P is the applied pressure, +and V is the volume. + +(3) Convergence + +Percentage of structures that successfully converged during relaxation. + +Structures are relaxed using janus-core's GeomOpt with the ase `FixSymmetry` constraint +applied to preserve crystallographic symmetry analogously to DFT. Starting from P000 (0 GPa) structures, each structure is relaxed at the target pressure using the FrechetCellFilter with the +specified scalar pressure. Relaxation continues until the maximum force component is +below 0.0002 eV/Šor until 500 steps are reached. If not converged, relaxation is +repeated up from the last structure of the previous relaxation up to 3 times. + + +Computational cost +------------------ + +High: tests are likely to take hours-days to run on GPU, depending on the number of +structures and pressure conditions tested. + + +Data availability +----------------- + +Input structures: + +* Alexandria database pressure benchmark dataset +* URL: https://alexandria.icams.rub.de/data/pbe/benchmarks/pressure +* 3000 structures randomly sampled from the full datasets at each pressure + +Reference data: + +* PBE calculations from the Alexandria database +* Loew et al 2026 J. Phys. Mater. 9 015010 https://iopscience.iop.org/article/10.1088/2515-7639/ae2ba8 From 3a06816d193575729877eac7623b84cb80c966fe Mon Sep 17 00:00:00 2001 From: JonathanSchmidt1 Date: Wed, 4 Feb 2026 12:27:02 +0100 Subject: [PATCH 6/8] fix good/bad metrics, use internal mae function --- .../analyse_high_pressure_relaxation.py | 44 +++++++++---------- .../high_pressure_relaxation/metrics.yml | 6 +-- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/analyse_high_pressure_relaxation.py b/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/analyse_high_pressure_relaxation.py index 88d2a35b5..78a591f6f 100644 --- a/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/analyse_high_pressure_relaxation.py +++ b/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/analyse_high_pressure_relaxation.py @@ -9,10 +9,9 @@ import plotly.express as px import plotly.graph_objects as go import pytest -from sklearn.metrics import mean_absolute_error from ml_peg.analysis.utils.decorators import build_table -from ml_peg.analysis.utils.utils import load_metrics_config +from ml_peg.analysis.utils.utils import load_metrics_config, mae from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT from ml_peg.models.get_models import get_model_names @@ -34,24 +33,34 @@ METRICS_CONFIG_PATH ) +# Energy outlier thresholds (eV/atom) +ENERGY_OUTLIER_MIN = -25 +ENERGY_OUTLIER_MAX = 25 -def mae(ref: list, pred: list) -> float: + +def _filter_energy_outliers(df: pd.DataFrame) -> pd.DataFrame: """ - Calculate Mean Absolute Error. + Mark structures with extreme predicted energies as unconverged. + + Structures with predicted energy per atom outside [-25, 25] eV/atom + are considered outliers and marked as unconverged. Parameters ---------- - ref - Reference values. - pred - Predicted values. + df + Results dataframe with 'pred_energy_per_atom' and 'converged' columns. Returns ------- - float - Mean absolute error. + pd.DataFrame + Dataframe with outliers marked as unconverged. """ - return mean_absolute_error(ref, pred) + df.loc[ + (df["pred_energy_per_atom"] <= ENERGY_OUTLIER_MIN) + | (df["pred_energy_per_atom"] >= ENERGY_OUTLIER_MAX), + "converged", + ] = False + return df def load_results_for_pressure(model_name: str, pressure_label: str) -> pd.DataFrame: @@ -99,11 +108,7 @@ def get_converged_data_for_pressure( return [], [], [], [] # Filter converged structures and remove NaN values - # set outliers with less than -25 eV/atom or more than 25 eV/atom to unconverged - df.loc[ - (df["pred_energy_per_atom"] <= -25) | (df["pred_energy_per_atom"] >= 25), - "converged", - ] = False + df = _filter_energy_outliers(df) df_conv = df[df["converged"]].copy() df_conv = df_conv.dropna(subset=["pred_volume_per_atom", "pred_energy_per_atom"]) @@ -134,14 +139,9 @@ def get_convergence_rate_for_pressure( Convergence rate (%) or None if no data. """ df = load_results_for_pressure(model_name, pressure_label) - # set outliers with less than -25 eV/atom or more than 25 eV/atom to unconverged if df.empty: return None - df.loc[ - (df["pred_energy_per_atom"] <= -25) | (df["pred_energy_per_atom"] >= 25), - "converged", - ] = False - # set energies of unconverged to NaN + df = _filter_energy_outliers(df) return (df["converged"].sum() / len(df)) * 100 diff --git a/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/metrics.yml b/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/metrics.yml index fa5b810fa..5fe501cc1 100644 --- a/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/metrics.yml +++ b/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/metrics.yml @@ -1,8 +1,8 @@ metrics: # 0 GPa Volume MAE (0 GPa): - good: 0.2 - bad: 0.1 + good: 0.05 + bad: 0.15 unit: "ų/atom" tooltip: "Mean Absolute Error of volume per atom at 0 GPa compared to PBE" level_of_theory: PBE @@ -14,7 +14,7 @@ metrics: level_of_theory: PBE Convergence (0 GPa): good: 100.0 - bad: 80.0 + bad: 99.9 unit: "%" tooltip: "Percentage of structures that converged at 0 GPa" level_of_theory: PBE From b0c5d56074bf36add7f0dea356e96b5451057c5e Mon Sep 17 00:00:00 2001 From: JonathanSchmidt1 Date: Wed, 4 Feb 2026 14:50:20 +0100 Subject: [PATCH 7/8] metrics.yml update --- .../high_pressure_relaxation/metrics.yml | 14 +++++------ uv.lock | 23 +++++++++++++++++++ 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/metrics.yml b/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/metrics.yml index 5fe501cc1..1e09408cd 100644 --- a/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/metrics.yml +++ b/ml_peg/analysis/bulk_crystal/high_pressure_relaxation/metrics.yml @@ -2,7 +2,7 @@ metrics: # 0 GPa Volume MAE (0 GPa): good: 0.05 - bad: 0.15 + bad: 0.1 unit: "ų/atom" tooltip: "Mean Absolute Error of volume per atom at 0 GPa compared to PBE" level_of_theory: PBE @@ -21,7 +21,7 @@ metrics: # 25 GPa Volume MAE (25 GPa): - good: 0.02 + good: 0.05 bad: 0.1 unit: "ų/atom" tooltip: "Mean Absolute Error of volume per atom at 25 GPa compared to PBE" @@ -41,7 +41,7 @@ metrics: # 50 GPa Volume MAE (50 GPa): - good: 0.02 + good: 0.05 bad: 0.1 unit: "ų/atom" tooltip: "Mean Absolute Error of volume per atom at 50 GPa compared to PBE" @@ -61,7 +61,7 @@ metrics: # 75 GPa Volume MAE (75 GPa): - good: 0.02 + good: 0.05 bad: 0.1 unit: "ų/atom" tooltip: "Mean Absolute Error of volume per atom at 75 GPa compared to PBE" @@ -81,7 +81,7 @@ metrics: # 100 GPa Volume MAE (100 GPa): - good: 0.02 + good: 0.05 bad: 0.1 unit: "ų/atom" tooltip: "Mean Absolute Error of volume per atom at 100 GPa compared to PBE" @@ -101,7 +101,7 @@ metrics: # 125 GPa Volume MAE (125 GPa): - good: 0.02 + good: 0.05 bad: 0.1 unit: "ų/atom" tooltip: "Mean Absolute Error of volume per atom at 125 GPa compared to PBE" @@ -121,7 +121,7 @@ metrics: # 150 GPa Volume MAE (150 GPa): - good: 0.02 + good: 0.05 bad: 0.1 unit: "ų/atom" tooltip: "Mean Absolute Error of volume per atom at 150 GPa compared to PBE" diff --git a/uv.lock b/uv.lock index f828ab629..b1665744d 100644 --- a/uv.lock +++ b/uv.lock @@ -2603,6 +2603,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/35/a8/365059bbcd4572cbc41de17fd5b682be5868b218c3c5479071865cab9078/entrypoints-0.4-py3-none-any.whl", hash = "sha256:f174b5ff827504fd3cd97cc3f8649f3693f51538c7e4bdf3ef002c8429d42f9f", size = 5294, upload-time = "2022-02-02T21:30:26.024Z" }, ] +[[package]] +name = "et-xmlfile" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d3/38/af70d7ab1ae9d4da450eeec1fa3918940a5fafb9055e934af8d6eb0c2313/et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54", size = 17234, upload-time = "2024-10-25T17:25:40.039Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059, upload-time = "2024-10-25T17:25:39.051Z" }, +] + [[package]] name = "eventlet" version = "0.40.4" @@ -5998,6 +6007,7 @@ dependencies = [ { name = "mdanalysis", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-6-ml-peg-grace' and extra == 'extra-6-ml-peg-mattersim') or (extra == 'extra-6-ml-peg-grace' and extra == 'extra-6-ml-peg-uma') or (extra == 'extra-6-ml-peg-mace' and extra == 'extra-6-ml-peg-mattersim') or (extra == 'extra-6-ml-peg-mace' and extra == 'extra-6-ml-peg-uma')" }, { name = "mdanalysis", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-6-ml-peg-grace' and extra == 'extra-6-ml-peg-mattersim') or (extra == 'extra-6-ml-peg-grace' and extra == 'extra-6-ml-peg-uma') or (extra == 'extra-6-ml-peg-mace' and extra == 'extra-6-ml-peg-mattersim') or (extra == 'extra-6-ml-peg-mace' and extra == 'extra-6-ml-peg-uma')" }, { name = "mlipx" }, + { name = "openpyxl" }, { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-6-ml-peg-grace' and extra == 'extra-6-ml-peg-mattersim') or (extra == 'extra-6-ml-peg-grace' and extra == 'extra-6-ml-peg-uma') or (extra == 'extra-6-ml-peg-mace' and extra == 'extra-6-ml-peg-mattersim') or (extra == 'extra-6-ml-peg-mace' and extra == 'extra-6-ml-peg-uma')" }, { name = "scikit-learn", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-6-ml-peg-grace' and extra == 'extra-6-ml-peg-mattersim') or (extra == 'extra-6-ml-peg-grace' and extra == 'extra-6-ml-peg-uma') or (extra == 'extra-6-ml-peg-mace' and extra == 'extra-6-ml-peg-mattersim') or (extra == 'extra-6-ml-peg-mace' and extra == 'extra-6-ml-peg-uma')" }, { name = "tqdm" }, @@ -6078,6 +6088,7 @@ requires-dist = [ { name = "mattersim", marker = "extra == 'mattersim'", specifier = "==1.2.0" }, { name = "mdanalysis" }, { name = "mlipx", specifier = ">=0.1.5,<0.2" }, + { name = "openpyxl" }, { name = "orb-models", marker = "python_full_version < '3.13' and sys_platform != 'win32' and extra == 'orb'", specifier = "==0.5.5" }, { name = "pet-mad", marker = "sys_platform != 'win32' and extra == 'pet-mad'", specifier = "==1.4.4" }, { name = "scikit-learn", specifier = ">=1.7.1" }, @@ -7337,6 +7348,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e3/94/1843518e420fa3ed6919835845df698c7e27e183cb997394e4a670973a65/omegaconf-2.3.0-py3-none-any.whl", hash = "sha256:7b4df175cdb08ba400f45cae3bdcae7ba8365db4d165fc65fd04b050ab63b46b", size = 79500, upload-time = "2022-12-08T20:59:19.686Z" }, ] +[[package]] +name = "openpyxl" +version = "3.1.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "et-xmlfile" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3d/f9/88d94a75de065ea32619465d2f77b29a0469500e99012523b91cc4141cd1/openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050", size = 186464, upload-time = "2024-06-28T14:03:44.161Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c0/da/977ded879c29cbd04de313843e76868e6e13408a94ed6b987245dc7c8506/openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2", size = 250910, upload-time = "2024-06-28T14:03:41.161Z" }, +] + [[package]] name = "opt-einsum" version = "3.4.0" From 8d87f2a07feefd1ac010d33b3cca6de191721e01 Mon Sep 17 00:00:00 2001 From: JonathanSchmidt1 Date: Mon, 16 Feb 2026 11:51:30 +0100 Subject: [PATCH 8/8] fix regression test to rerun instead of using output data --- .../calc_high_pressure_relaxation.py | 10 +- .../mace-mpa-0_first5.csv | 36 ------ ...est_high_pressure_relaxation_regression.py | 111 +++++++++++------- 3 files changed, 75 insertions(+), 82 deletions(-) delete mode 100644 tests/data/high_pressure_relaxation/mace-mpa-0_first5.csv diff --git a/ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py b/ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py index bcf6c88fc..c3a458b5a 100644 --- a/ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py +++ b/ml_peg/calcs/bulk_crystal/high_pressure_relaxation/calc_high_pressure_relaxation.py @@ -73,7 +73,7 @@ def download_pressure_data(pressure_label: str) -> Path: def load_structures( - pressure_label: str, n_structures: int = N_STRUCTURES + pressure_label: str, n_structures: int = N_STRUCTURES, random_select: bool = True ) -> list[dict]: """ Load structures using P000 starting structures and pressure-specific references. @@ -86,6 +86,9 @@ def load_structures( Pressure label (e.g., "P000", "P025") used for reference values. n_structures Number of structures to load. Default is N_STRUCTURES. + random_select + Whether to randomly select structures from the available pool. + Default is True. Set to False for regression testing. Returns ------- @@ -117,7 +120,10 @@ def load_structures( ) rng = random.Random(RANDOM_SEED) - selected_indices = sorted(rng.sample(range(len(start_entries)), k=n_structures)) + if random_select: + selected_indices = rng.sample(range(len(start_entries)), n_structures) + else: + selected_indices = list(range(n_structures)) for idx in selected_indices: entry_dict = start_entries[idx] diff --git a/tests/data/high_pressure_relaxation/mace-mpa-0_first5.csv b/tests/data/high_pressure_relaxation/mace-mpa-0_first5.csv deleted file mode 100644 index 9cd448467..000000000 --- a/tests/data/high_pressure_relaxation/mace-mpa-0_first5.csv +++ /dev/null @@ -1,36 +0,0 @@ -pressure_label,mat_id,volume,energy_per_atom -P000,agm001000110,25.033782103611063,-5.936990103881239 -P000,agm001001040,15.204527828733536,-8.27554636941658 -P000,agm001001919,21.303183572813897,-8.435935053726139 -P000,agm001003503,24.690326355959755,-4.327203031854863 -P000,agm001004359,20.86226724052588,-7.829242292338797 -P025,agm001000110,19.71321916175995,-2.5294162683198125 -P025,agm001001040,13.487048183285616,-6.052898044522763 -P025,agm001001919,17.670764016174328,-5.438176257490443 -P025,agm001003503,19.12780621300846,-1.006088317132572 -P025,agm001004359,17.457486573286673,-4.870256961601473 -P050,agm001000110,17.715912861957246,0.3763531065086264 -P050,agm001001040,12.512987673468842,-4.029860765358488 -P050,agm001001919,15.46189689156924,-2.861038673041728 -P050,agm001003503,16.694829244071812,1.7723527015771132 -P050,agm001004359,15.13105554384206,-2.338786848103142 -P075,agm001000110,16.458372063400443,3.036751076640864 -P075,agm001001040,11.833602300101647,-2.13312140913839 -P075,agm001001919,14.04859528970246,-0.5622805372532179 -P075,agm001003503,15.23016215452774,4.255252390053997 -P075,agm001004359,13.686750914395638,-0.0937456687284251 -P100,agm001000110,15.506324317224594,5.527588395992431 -P100,agm001001040,11.324664691623278,-0.328009659077703 -P100,agm001001919,13.09661261512976,1.547445860475598 -P100,agm001003503,14.113463382167296,6.544697423965405 -P100,agm001004359,12.807027240161723,1.9611872723655293 -P125,agm001000110,14.768322896353382,7.887451277337899 -P125,agm001001040,10.9082879022004,1.4057552363067838 -P125,agm001001919,12.503731488724002,3.542297291380569 -P125,agm001003503,13.125761236229373,8.668450989536197 -P125,agm001004359,12.298360564455225,3.9181192151113904 -P150,agm001000110,14.163630823547033,10.143286868066651 -P150,agm001001040,10.539232357961302,3.078605889229442 -P150,agm001001919,12.045275482032784,5.456359868213672 -P150,agm001003503,12.192106272396378,10.643260044585606 -P150,agm001004359,11.883204516891276,5.803899385939424 diff --git a/tests/test_high_pressure_relaxation_regression.py b/tests/test_high_pressure_relaxation_regression.py index dda8eada5..05d344d25 100644 --- a/tests/test_high_pressure_relaxation_regression.py +++ b/tests/test_high_pressure_relaxation_regression.py @@ -2,73 +2,96 @@ from __future__ import annotations +from copy import copy from pathlib import Path import numpy as np import pandas as pd import pytest -PRESSURE_LABELS = ["P000", "P025", "P050", "P075", "P100", "P125", "P150"] -FIRST_N = 5 +from ml_peg.calcs.bulk_crystal.high_pressure_relaxation.calc_high_pressure_relaxation import ( # noqa: E501 + PRESSURE_LABELS, + PRESSURES, + load_structures, + relax_with_pressure, +) +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +N_TEST_STRUCTURES = 3 VOLUME_ATOL = 0.001 ENERGY_ATOL = 0.0001 -REPO_ROOT = Path(__file__).resolve().parents[1] -TEST_DATA_ROOT = REPO_ROOT / "tests" / "data" / "high_pressure_relaxation" -OUTPUT_ROOT = ( - REPO_ROOT - / "ml_peg" - / "calcs" - / "bulk_crystal" - / "high_pressure_relaxation" - / "outputs" -) +TEST_DATA_DIR = Path(__file__).parent / "data" / "high_pressure_relaxation" +MODEL_NAME = "mace-mpa-0" +REFERENCE_FILE = TEST_DATA_DIR / f"{MODEL_NAME}_first{N_TEST_STRUCTURES}.csv" + + +@pytest.fixture(scope="module") +def regression_results() -> pd.DataFrame: + """Run relaxation on first N test structures for all pressures.""" + models = load_models(current_models) + model = models[MODEL_NAME] + calc = model.get_calculator() + + all_results = [] + for pressure_gpa, pressure_label in zip(PRESSURES, PRESSURE_LABELS, strict=False): + structures = load_structures( + pressure_label, n_structures=N_TEST_STRUCTURES, random_select=False + ) + + for struct_data in structures: + atoms = struct_data["atoms"].copy() + atoms.calc = copy(calc) -TEST_CASES = [ - { - "name": "mace-mpa-0", - "output_model": "mace-mpa-0", - "data_file": TEST_DATA_ROOT / "mace-mpa-0_first5.csv", - }, -] + relaxed, converged, enthalpy_per_atom = relax_with_pressure( + atoms, pressure_gpa + ) + + pred_volume = ( + relaxed.get_volume() / len(relaxed) if relaxed is not None else None + ) + + all_results.append( + { + "pressure_label": pressure_label, + "mat_id": struct_data["mat_id"], + "volume": pred_volume, + "energy_per_atom": enthalpy_per_atom, + } + ) + + return pd.DataFrame(all_results) -@pytest.mark.parametrize("case", TEST_CASES, ids=[case["name"] for case in TEST_CASES]) @pytest.mark.parametrize("pressure_label", PRESSURE_LABELS) def test_high_pressure_relaxation_regression( - case: dict[str, Path | str], + regression_results: pd.DataFrame, pressure_label: str, ) -> None: - """Check entries against stored regression baselines.""" - data_path = Path(case["data_file"]) - results_path = ( - OUTPUT_ROOT / str(case["output_model"]) / f"results_{pressure_label}.csv" - ) + """Run calculations and check against stored regression baselines.""" + if not REFERENCE_FILE.exists(): + TEST_DATA_DIR.mkdir(parents=True, exist_ok=True) + regression_results.to_csv(REFERENCE_FILE, index=False) + pytest.skip(f"Generated baseline {REFERENCE_FILE.name}; re-run to validate") - assert data_path.exists(), f"Missing test data file: {data_path}" - assert results_path.exists(), f"Missing results file: {results_path}" - - expected = pd.read_csv(data_path) - expected_pressure = expected[expected["pressure_label"] == pressure_label] - - assert len(expected_pressure) == FIRST_N - - results = pd.read_csv(results_path).head(FIRST_N) - if len(results) < FIRST_N: - pytest.skip( - f"{case['output_model']} only has {len(results)} rows for {pressure_label}." - ) + expected = pd.read_csv(REFERENCE_FILE) + expected_p = expected[expected["pressure_label"] == pressure_label] + results_p = regression_results[ + regression_results["pressure_label"] == pressure_label + ] - assert expected_pressure["mat_id"].tolist() == results["mat_id"].tolist() + assert len(expected_p) == N_TEST_STRUCTURES + assert expected_p["mat_id"].tolist() == results_p["mat_id"].tolist() np.testing.assert_allclose( - results["pred_volume_per_atom"].to_numpy(), - expected_pressure["volume"].to_numpy(), + results_p["volume"].to_numpy(), + expected_p["volume"].to_numpy(), rtol=0.0, atol=VOLUME_ATOL, ) np.testing.assert_allclose( - results["pred_energy_per_atom"].to_numpy(), - expected_pressure["energy_per_atom"].to_numpy(), + results_p["energy_per_atom"].to_numpy(), + expected_p["energy_per_atom"].to_numpy(), rtol=0.0, atol=ENERGY_ATOL, )