From 85fd67b41f7b629f50f00ff7776b3c0ebeca5e40 Mon Sep 17 00:00:00 2001 From: Anyang Peng <137014849+anyangml@users.noreply.github.com> Date: Wed, 28 Jan 2026 11:59:18 +0800 Subject: [PATCH 1/6] feat: support pressure task --- lambench/metrics/downstream_tasks_metrics.yml | 4 + lambench/metrics/post_process.py | 1 + lambench/metrics/results/README.md | 2 +- lambench/metrics/results/metadata.json | 16 ++++ lambench/models/ase_models.py | 7 ++ .../tasks/calculator/calculator_tasks.yml | 5 ++ .../tasks/calculator/pressure/pressure.py | 75 +++++++++++++++++++ 7 files changed, 109 insertions(+), 1 deletion(-) create mode 100644 lambench/tasks/calculator/pressure/pressure.py diff --git a/lambench/metrics/downstream_tasks_metrics.yml b/lambench/metrics/downstream_tasks_metrics.yml index 0ded912..368a043 100644 --- a/lambench/metrics/downstream_tasks_metrics.yml +++ b/lambench/metrics/downstream_tasks_metrics.yml @@ -32,3 +32,7 @@ rxn_barrier: domain: Molecules metrics: [MAE] dummy: {"MAE": 20.975} +pressure: + domain: Inorganic Materials + metrics: [MAE] + dummy: {"MAE": 2.505} # Estimated from the MAE between DFT and avg(DFT) over 270 structures diff --git a/lambench/metrics/post_process.py b/lambench/metrics/post_process.py index 0f25d58..48e582c 100644 --- a/lambench/metrics/post_process.py +++ b/lambench/metrics/post_process.py @@ -120,6 +120,7 @@ def process_domain_specific_for_one_model(model: BaseLargeAtomModel): "vacancy", "binding_energy", "rxn_barrier", + "pressure", ]: applicability_results[record.task_name] = record.metrics return applicability_results diff --git a/lambench/metrics/results/README.md b/lambench/metrics/results/README.md index 1f26f2e..19b3b3f 100644 --- a/lambench/metrics/results/README.md +++ b/lambench/metrics/results/README.md @@ -19,7 +19,7 @@ Large atomistic models (LAM), also known as machine learning interatomic potenti The following changes have been made compared to the previouly release version v0.3.1: - Added new models: MACE-MH-1, DPA-3.2-5M - Updated `Force Field Prediction` tasks, and for the domain of `Molecules`, two sets of labels were provided to support OMol25-trained models. -- Added new `Property Calculation` tasks: oxygen vacancy formation energy prediction, protein-ligand binding energy prediction, and reaction energy barrier prediction. +- Added new `Property Calculation` tasks: oxygen vacancy formation energy prediction, protein-ligand binding energy prediction, reaction energy barrier prediction, and volume prediction from materials under pressure. ⚠️ Note: To assess full LAM capacity, we use OMat24-trained task heads for *Force Field Prediction* in Inorganic Materials and Catalysis, and OMol25-trained task heads for Molecules, when available. As for *Property Calculation*, we follow a similar approach, but use OC20-trained task heads for Catalysis when available, as this tends to yield better performance. diff --git a/lambench/metrics/results/metadata.json b/lambench/metrics/results/metadata.json index ca25d73..020e00b 100644 --- a/lambench/metrics/results/metadata.json +++ b/lambench/metrics/results/metadata.json @@ -933,6 +933,22 @@ "DISPLAY_NAME": "Success Rate", "DESCRIPTION": "The success rate of reaction barrier calculations." } + }, + "pressure": { + "DISPLAY_NAME": "Pressurized Materials", + "DESCRIPTION": "Evaluation of the volume over 45 structures at elevated pressure from 25 GPa to 150 GPa. Structures are obtained from `Antoine Loew et al 2026 J. Phys. Mater. 9 015010. https://iopscience.iop.org/article/10.1088/2515-7639/ae2ba8.` ", + "MAE": { + "DISPLAY_NAME": "MAE (Å^3/atom)", + "DESCRIPTION": "The mean absolute error of the volume per atom across all configurations and all pressures." + }, + "RMSE": { + "DISPLAY_NAME": "RMSE (Å^3/atom)", + "DESCRIPTION": "The root mean squared error of the volume per atom across all configurations and all pressures." + }, + "success_rate":{ + "DISPLAY_NAME": "Success Rate", + "DESCRIPTION": "The success rate of volume calculations at elevated pressures." + } } }, "adaptability_results": { diff --git a/lambench/models/ase_models.py b/lambench/models/ase_models.py index c7b3fde..e22d52b 100644 --- a/lambench/models/ase_models.py +++ b/lambench/models/ase_models.py @@ -308,6 +308,13 @@ def evaluate( assert task.test_data is not None return {"metrics": run_inference(self, task.test_data)} + elif task.task_name == "pressure": + from lambench.tasks.calculator.pressure.pressure import run_inference + + assert task.test_data is not None + fmax = task.calculator_params.get("fmax", 1e-3) + max_steps = task.calculator_params.get("max_steps", 500) + return {"metrics": run_inference(self, task.test_data, fmax, max_steps)} else: raise NotImplementedError(f"Task {task.task_name} is not implemented.") diff --git a/lambench/tasks/calculator/calculator_tasks.yml b/lambench/tasks/calculator/calculator_tasks.yml index ef7896b..a518429 100644 --- a/lambench/tasks/calculator/calculator_tasks.yml +++ b/lambench/tasks/calculator/calculator_tasks.yml @@ -37,3 +37,8 @@ binding_energy: rxn_barrier: test_data: /bohr/lambench-BH876-uplk/v1/BH876 calculator_params: null +pressure: + test_data: /bohr/lambench-pressure-arjy/v1 + calculator_params: + fmax: 0.001 + max_steps: 500 \ No newline at end of file diff --git a/lambench/tasks/calculator/pressure/pressure.py b/lambench/tasks/calculator/pressure/pressure.py new file mode 100644 index 0000000..5b7cc29 --- /dev/null +++ b/lambench/tasks/calculator/pressure/pressure.py @@ -0,0 +1,75 @@ +# ruff: noqa: E402 +""" +The test data is obtained from the following paper: + +Antoine Loew et al 2026 J. Phys. Mater. 9 015010 Universal machine learning potentials under pressure +DOI 10.1088/2515-7639/ae2ba8 + +We downsampled the original test set to 45 structures at each pressure point (25, 50, 75, 100, 125, 150 GPa) +""" + +from ase.io import read +from ase import Atoms, Calculator +from ase.optimize import FIRE +from ase.filters import FrechetCellFilter +from pathlib import Path +from tqdm import tqdm +from sklearn.metrics import root_mean_squared_error, mean_absolute_error +from lambench.models.ase_models import ASEModel +import numpy as np +from collections import defaultdict + + +KBAR_2_EVA3 = 6.2415e-4 +GPA_2_KBAR = 10 + + +def optimize(structure: Atoms, target_p: float, fmax: float, steps: int) -> Atoms: + target_p = target_p * GPA_2_KBAR * KBAR_2_EVA3 # to eV/A3 + filter = FrechetCellFilter(structure,scalar_pressure=target_p) + opt = FIRE(filter) + opt.run(fmax=fmax,steps=steps) + return filter.atoms + + +def test_one(init: Atoms, final: Atoms, target_p: float, calc: Calculator, fmax: float, max_steps: int) -> tuple[float, float]: + init.calc = calc + optimized = optimize(init, int(target_p), fmax, max_steps) + natoms = len(init) + return final.get_volume()/natoms, optimized.get_volume()/natoms + + +def run_inference( + model: ASEModel, + test_data: Path, + fmax: float, + max_steps: int, +) -> dict[str, float]: + calc = model.calc + final_res = defaultdict(list) + num_samples = 0 + num_fails = 0 + + for pressure in tqdm(["025", "050", "075", "100", "125", "150"]): + init_traj = read(f"{test_data}/P{pressure}.traj",":") + final_traj = read(f"{test_data}/P{pressure}.traj",":") + for i in tqdm(range(len(init_traj))): + init = init_traj[i] + final = final_traj[i] + assert init.get_chemical_formula() == final.get_chemical_formula() + try: + dft, lam = test_one(init, final, int(pressure), calc, fmax, max_steps) + except: + dft, lam = None, None + if not dft: + num_fails += 1 + continue + num_samples += 1 + final_res[f"{pressure}_labels"].append(dft) + final_res[f"{pressure}_preds"].append(lam) + + return { + "MAE": mean_absolute_error(final_res[f"{pressure}_labels"], final_res[f"{pressure}_preds"]), # A3/atom + "RMSE": root_mean_squared_error(final_res[f"{pressure}_labels"], final_res[f"{pressure}_preds"]), # A3/atom + "success_rate": (num_samples-num_fails)/num_samples, + } \ No newline at end of file From 15236938a03ee975ee4f5b9bda5ade9e9ee9795b Mon Sep 17 00:00:00 2001 From: Anyang Peng <137014849+anyangml@users.noreply.github.com> Date: Wed, 28 Jan 2026 12:05:55 +0800 Subject: [PATCH 2/6] chore: format --- .../tasks/calculator/calculator_tasks.yml | 4 +- .../tasks/calculator/pressure/pressure.py | 45 ++++++++++++------- 2 files changed, 31 insertions(+), 18 deletions(-) diff --git a/lambench/tasks/calculator/calculator_tasks.yml b/lambench/tasks/calculator/calculator_tasks.yml index a518429..99f7ec5 100644 --- a/lambench/tasks/calculator/calculator_tasks.yml +++ b/lambench/tasks/calculator/calculator_tasks.yml @@ -39,6 +39,6 @@ rxn_barrier: calculator_params: null pressure: test_data: /bohr/lambench-pressure-arjy/v1 - calculator_params: + calculator_params: fmax: 0.001 - max_steps: 500 \ No newline at end of file + max_steps: 500 diff --git a/lambench/tasks/calculator/pressure/pressure.py b/lambench/tasks/calculator/pressure/pressure.py index 5b7cc29..cb7e27b 100644 --- a/lambench/tasks/calculator/pressure/pressure.py +++ b/lambench/tasks/calculator/pressure/pressure.py @@ -15,28 +15,34 @@ from pathlib import Path from tqdm import tqdm from sklearn.metrics import root_mean_squared_error, mean_absolute_error -from lambench.models.ase_models import ASEModel -import numpy as np +from lambench.models.ase_models import ASEModel from collections import defaultdict - +import logging KBAR_2_EVA3 = 6.2415e-4 GPA_2_KBAR = 10 def optimize(structure: Atoms, target_p: float, fmax: float, steps: int) -> Atoms: - target_p = target_p * GPA_2_KBAR * KBAR_2_EVA3 # to eV/A3 - filter = FrechetCellFilter(structure,scalar_pressure=target_p) + target_p = target_p * GPA_2_KBAR * KBAR_2_EVA3 # to eV/A3 + filter = FrechetCellFilter(structure, scalar_pressure=target_p) opt = FIRE(filter) - opt.run(fmax=fmax,steps=steps) + opt.run(fmax=fmax, steps=steps) return filter.atoms - -def test_one(init: Atoms, final: Atoms, target_p: float, calc: Calculator, fmax: float, max_steps: int) -> tuple[float, float]: + +def test_one( + init: Atoms, + final: Atoms, + target_p: float, + calc: Calculator, + fmax: float, + max_steps: int, +) -> tuple[float, float]: init.calc = calc optimized = optimize(init, int(target_p), fmax, max_steps) natoms = len(init) - return final.get_volume()/natoms, optimized.get_volume()/natoms + return final.get_volume() / natoms, optimized.get_volume() / natoms def run_inference( @@ -51,15 +57,18 @@ def run_inference( num_fails = 0 for pressure in tqdm(["025", "050", "075", "100", "125", "150"]): - init_traj = read(f"{test_data}/P{pressure}.traj",":") - final_traj = read(f"{test_data}/P{pressure}.traj",":") + init_traj = read(f"{test_data}/P{pressure}.traj", ":") + final_traj = read(f"{test_data}/P{pressure}.traj", ":") for i in tqdm(range(len(init_traj))): init = init_traj[i] final = final_traj[i] assert init.get_chemical_formula() == final.get_chemical_formula() try: dft, lam = test_one(init, final, int(pressure), calc, fmax, max_steps) - except: + except Exception as e: + logging.error( + f"Error during test_one at pressure {pressure}, index {i}: {e}" + ) dft, lam = None, None if not dft: num_fails += 1 @@ -69,7 +78,11 @@ def run_inference( final_res[f"{pressure}_preds"].append(lam) return { - "MAE": mean_absolute_error(final_res[f"{pressure}_labels"], final_res[f"{pressure}_preds"]), # A3/atom - "RMSE": root_mean_squared_error(final_res[f"{pressure}_labels"], final_res[f"{pressure}_preds"]), # A3/atom - "success_rate": (num_samples-num_fails)/num_samples, - } \ No newline at end of file + "MAE": mean_absolute_error( + final_res[f"{pressure}_labels"], final_res[f"{pressure}_preds"] + ), # A3/atom + "RMSE": root_mean_squared_error( + final_res[f"{pressure}_labels"], final_res[f"{pressure}_preds"] + ), # A3/atom + "success_rate": (num_samples - num_fails) / num_samples, + } From 8d90d5af8572dbd7b16c917de72e50cf5b2834c9 Mon Sep 17 00:00:00 2001 From: Anyang Peng <137014849+anyangml@users.noreply.github.com> Date: Wed, 28 Jan 2026 12:44:02 +0800 Subject: [PATCH 3/6] Update lambench/tasks/calculator/pressure/pressure.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- lambench/tasks/calculator/pressure/pressure.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lambench/tasks/calculator/pressure/pressure.py b/lambench/tasks/calculator/pressure/pressure.py index cb7e27b..0f5f526 100644 --- a/lambench/tasks/calculator/pressure/pressure.py +++ b/lambench/tasks/calculator/pressure/pressure.py @@ -9,7 +9,8 @@ """ from ase.io import read -from ase import Atoms, Calculator +from ase import Atoms +from ase.calculators.calculator import Calculator from ase.optimize import FIRE from ase.filters import FrechetCellFilter from pathlib import Path From 8e4ab9159db1b4b5c6f0bf8f83ab0d9f2f31af21 Mon Sep 17 00:00:00 2001 From: Anyang Peng <137014849+anyangml@users.noreply.github.com> Date: Wed, 28 Jan 2026 12:45:46 +0800 Subject: [PATCH 4/6] Update lambench/tasks/calculator/pressure/pressure.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- lambench/tasks/calculator/pressure/pressure.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lambench/tasks/calculator/pressure/pressure.py b/lambench/tasks/calculator/pressure/pressure.py index 0f5f526..cd477a7 100644 --- a/lambench/tasks/calculator/pressure/pressure.py +++ b/lambench/tasks/calculator/pressure/pressure.py @@ -71,7 +71,7 @@ def run_inference( f"Error during test_one at pressure {pressure}, index {i}: {e}" ) dft, lam = None, None - if not dft: + if dft is None or lam is None: num_fails += 1 continue num_samples += 1 From 4e5cd8a1b59ea4aa528ed4c8af62d7208f147827 Mon Sep 17 00:00:00 2001 From: Anyang Peng <137014849+anyangml@users.noreply.github.com> Date: Wed, 28 Jan 2026 12:46:17 +0800 Subject: [PATCH 5/6] Update lambench/tasks/calculator/pressure/pressure.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- lambench/tasks/calculator/pressure/pressure.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lambench/tasks/calculator/pressure/pressure.py b/lambench/tasks/calculator/pressure/pressure.py index cd477a7..75ee200 100644 --- a/lambench/tasks/calculator/pressure/pressure.py +++ b/lambench/tasks/calculator/pressure/pressure.py @@ -26,10 +26,10 @@ def optimize(structure: Atoms, target_p: float, fmax: float, steps: int) -> Atoms: target_p = target_p * GPA_2_KBAR * KBAR_2_EVA3 # to eV/A3 - filter = FrechetCellFilter(structure, scalar_pressure=target_p) - opt = FIRE(filter) + cell_filter = FrechetCellFilter(structure, scalar_pressure=target_p) + opt = FIRE(cell_filter) opt.run(fmax=fmax, steps=steps) - return filter.atoms + return cell_filter.atoms def test_one( From 8d2088ab9460da78e81432deac174986eb322f82 Mon Sep 17 00:00:00 2001 From: Anyang Peng <137014849+anyangml@users.noreply.github.com> Date: Wed, 28 Jan 2026 12:51:36 +0800 Subject: [PATCH 6/6] fix: metrics calculation --- .../tasks/calculator/pressure/pressure.py | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/lambench/tasks/calculator/pressure/pressure.py b/lambench/tasks/calculator/pressure/pressure.py index 75ee200..5427d24 100644 --- a/lambench/tasks/calculator/pressure/pressure.py +++ b/lambench/tasks/calculator/pressure/pressure.py @@ -17,7 +17,6 @@ from tqdm import tqdm from sklearn.metrics import root_mean_squared_error, mean_absolute_error from lambench.models.ase_models import ASEModel -from collections import defaultdict import logging KBAR_2_EVA3 = 6.2415e-4 @@ -53,13 +52,14 @@ def run_inference( max_steps: int, ) -> dict[str, float]: calc = model.calc - final_res = defaultdict(list) + all_labels = [] + all_preds = [] num_samples = 0 num_fails = 0 for pressure in tqdm(["025", "050", "075", "100", "125", "150"]): - init_traj = read(f"{test_data}/P{pressure}.traj", ":") - final_traj = read(f"{test_data}/P{pressure}.traj", ":") + init_traj = read(test_data / f"P{pressure}.traj", ":") + final_traj = read(test_data / f"P{pressure}.traj", ":") for i in tqdm(range(len(init_traj))): init = init_traj[i] final = final_traj[i] @@ -75,15 +75,11 @@ def run_inference( num_fails += 1 continue num_samples += 1 - final_res[f"{pressure}_labels"].append(dft) - final_res[f"{pressure}_preds"].append(lam) + all_labels.append(dft) + all_preds.append(lam) return { - "MAE": mean_absolute_error( - final_res[f"{pressure}_labels"], final_res[f"{pressure}_preds"] - ), # A3/atom - "RMSE": root_mean_squared_error( - final_res[f"{pressure}_labels"], final_res[f"{pressure}_preds"] - ), # A3/atom + "MAE": mean_absolute_error(all_labels, all_preds), # A3/atom + "RMSE": root_mean_squared_error(all_labels, all_preds), # A3/atom "success_rate": (num_samples - num_fails) / num_samples, }