deepmodeling · anyangml · Jan 28, 2026 · Jan 28, 2026 · Jan 28, 2026 · Jan 28, 2026
diff --git a/lambench/metrics/downstream_tasks_metrics.yml b/lambench/metrics/downstream_tasks_metrics.yml
@@ -32,3 +32,7 @@ rxn_barrier:
   domain: Molecules
   metrics: [MAE]
   dummy: {"MAE": 20.975}
+pressure:
+  domain: Inorganic Materials
+  metrics: [MAE]
+  dummy: {"MAE": 2.505} # Estimated from the MAE between DFT and avg(DFT) over 270 structures
diff --git a/lambench/metrics/post_process.py b/lambench/metrics/post_process.py
@@ -120,6 +120,7 @@ def process_domain_specific_for_one_model(model: BaseLargeAtomModel):
             "vacancy",
             "binding_energy",
             "rxn_barrier",
+            "pressure",
         ]:
             applicability_results[record.task_name] = record.metrics
     return applicability_results

diff --git a/lambench/metrics/results/README.md b/lambench/metrics/results/README.md
@@ -19,7 +19,7 @@ Large atomistic models (LAM), also known as machine learning interatomic potenti
 The following changes have been made compared to the previouly release version v0.3.1:
 - Added new models: MACE-MH-1, DPA-3.2-5M
 - Updated `Force Field Prediction` tasks, and for the domain of `Molecules`, two sets of labels were provided to support OMol25-trained models.
-- Added new `Property Calculation` tasks: oxygen vacancy formation energy prediction, protein-ligand binding energy prediction, and reaction energy barrier prediction.
+- Added new `Property Calculation` tasks: oxygen vacancy formation energy prediction, protein-ligand binding energy prediction, reaction energy barrier prediction, and volume prediction from materials under pressure.
 
 <span style="color:red">⚠️ Note: To assess full LAM capacity, we use OMat24-trained task heads for *Force Field Prediction* in Inorganic Materials and Catalysis, and OMol25-trained task heads for Molecules, when available. As for *Property Calculation*, we follow a similar approach, but use OC20-trained task heads for Catalysis when available, as this tends to yield better performance.</span>
 

diff --git a/lambench/metrics/results/metadata.json b/lambench/metrics/results/metadata.json
@@ -933,6 +933,22 @@
         "DISPLAY_NAME": "Success Rate",
         "DESCRIPTION": "The success rate of reaction barrier calculations."
       }
+    },
+    "pressure": {
+      "DISPLAY_NAME": "Pressurized Materials",
+      "DESCRIPTION": "Evaluation of the volume over 45 structures at elevated pressure from 25 GPa to 150 GPa. Structures are obtained from `Antoine Loew et al 2026 J. Phys. Mater. 9 015010. https://iopscience.iop.org/article/10.1088/2515-7639/ae2ba8.` ",
+      "MAE": {
+        "DISPLAY_NAME": "MAE (Å^3/atom)",
+        "DESCRIPTION": "The mean absolute error of the volume per atom across all configurations and all pressures."
+      },
+      "RMSE": {
+        "DISPLAY_NAME": "RMSE (Å^3/atom)",
+        "DESCRIPTION": "The root mean squared error of the volume per atom across all configurations and all pressures."
+      },
+      "success_rate":{
+        "DISPLAY_NAME": "Success Rate",
+        "DESCRIPTION": "The success rate of volume calculations at elevated pressures."
+      }
     }
   },
   "adaptability_results": {

diff --git a/lambench/models/ase_models.py b/lambench/models/ase_models.py
@@ -308,6 +308,13 @@ def evaluate(
 
                 assert task.test_data is not None
                 return {"metrics": run_inference(self, task.test_data)}
+            elif task.task_name == "pressure":
+                from lambench.tasks.calculator.pressure.pressure import run_inference
+
+                assert task.test_data is not None
+                fmax = task.calculator_params.get("fmax", 1e-3)
+                max_steps = task.calculator_params.get("max_steps", 500)
+                return {"metrics": run_inference(self, task.test_data, fmax, max_steps)}
             else:
                 raise NotImplementedError(f"Task {task.task_name} is not implemented.")
 

diff --git a/lambench/tasks/calculator/calculator_tasks.yml b/lambench/tasks/calculator/calculator_tasks.yml
@@ -37,3 +37,8 @@ binding_energy:
 rxn_barrier:
   test_data: /bohr/lambench-BH876-uplk/v1/BH876
   calculator_params: null
+pressure:
+  test_data: /bohr/lambench-pressure-arjy/v1
+  calculator_params:
+    fmax: 0.001
+    max_steps: 500
diff --git a/lambench/tasks/calculator/pressure/pressure.py b/lambench/tasks/calculator/pressure/pressure.py
@@ -0,0 +1,85 @@
+# ruff: noqa: E402
+"""
+The test data is obtained from the following paper:
+
+Antoine Loew et al 2026 J. Phys. Mater. 9 015010 Universal machine learning potentials under pressure
+DOI 10.1088/2515-7639/ae2ba8
+
+We downsampled the original test set to 45 structures at each pressure point (25, 50, 75, 100, 125, 150 GPa)
+"""
+
+from ase.io import read
+from ase import Atoms
+from ase.calculators.calculator import Calculator
+from ase.optimize import FIRE
+from ase.filters import FrechetCellFilter
+from pathlib import Path
+from tqdm import tqdm
+from sklearn.metrics import root_mean_squared_error, mean_absolute_error
+from lambench.models.ase_models import ASEModel
+import logging
+
+KBAR_2_EVA3 = 6.2415e-4
+GPA_2_KBAR = 10
+
+
+def optimize(structure: Atoms, target_p: float, fmax: float, steps: int) -> Atoms:
+    target_p = target_p * GPA_2_KBAR * KBAR_2_EVA3  # to eV/A3
+    cell_filter = FrechetCellFilter(structure, scalar_pressure=target_p)
+    opt = FIRE(cell_filter)
+    opt.run(fmax=fmax, steps=steps)
+    return cell_filter.atoms
+
+
+def test_one(
+    init: Atoms,
+    final: Atoms,
+    target_p: float,
+    calc: Calculator,
+    fmax: float,
+    max_steps: int,
+) -> tuple[float, float]:
+    init.calc = calc
+    optimized = optimize(init, int(target_p), fmax, max_steps)
+    natoms = len(init)
+    return final.get_volume() / natoms, optimized.get_volume() / natoms
+
+
+def run_inference(
+    model: ASEModel,
+    test_data: Path,
+    fmax: float,
+    max_steps: int,
+) -> dict[str, float]:
+    calc = model.calc
+    all_labels = []
+    all_preds = []
+    num_samples = 0
+    num_fails = 0
+
+    for pressure in tqdm(["025", "050", "075", "100", "125", "150"]):
+        init_traj = read(test_data / f"P{pressure}.traj", ":")
+        final_traj = read(test_data / f"P{pressure}.traj", ":")
+        for i in tqdm(range(len(init_traj))):
+            init = init_traj[i]
+            final = final_traj[i]
+            assert init.get_chemical_formula() == final.get_chemical_formula()
+            try:
+                dft, lam = test_one(init, final, int(pressure), calc, fmax, max_steps)
+            except Exception as e:
+                logging.error(
+                    f"Error during test_one at pressure {pressure}, index {i}: {e}"
+                )
+                dft, lam = None, None
+            if dft is None or lam is None:
+                num_fails += 1
+                continue
+            num_samples += 1
+            all_labels.append(dft)
+            all_preds.append(lam)
+
+    return {
+        "MAE": mean_absolute_error(all_labels, all_preds),  # A3/atom
+        "RMSE": root_mean_squared_error(all_labels, all_preds),  # A3/atom
+        "success_rate": (num_samples - num_fails) / num_samples,
+    }