From 6062685c37afca725d6ae7a08c1c2bc2a61181bc Mon Sep 17 00:00:00 2001 From: Anyang Peng <137014849+anyangml@users.noreply.github.com> Date: Wed, 28 Jan 2026 15:44:41 +0800 Subject: [PATCH 1/5] feat: add stacking fault task --- lambench/metrics/downstream_tasks_metrics.yml | 4 ++ lambench/metrics/post_process.py | 1 + lambench/metrics/results/metadata.json | 12 ++++ lambench/models/ase_models.py | 10 ++- .../tasks/calculator/calculator_tasks.yml | 3 + .../stacking_fault/stacking_fault.py | 72 +++++++++++++++++++ .../tasks/calculator/stacking_fault/utils.py | 44 ++++++++++++ 7 files changed, 145 insertions(+), 1 deletion(-) create mode 100644 lambench/tasks/calculator/stacking_fault/stacking_fault.py create mode 100644 lambench/tasks/calculator/stacking_fault/utils.py diff --git a/lambench/metrics/downstream_tasks_metrics.yml b/lambench/metrics/downstream_tasks_metrics.yml index 368a043..41b48c2 100644 --- a/lambench/metrics/downstream_tasks_metrics.yml +++ b/lambench/metrics/downstream_tasks_metrics.yml @@ -36,3 +36,7 @@ pressure: domain: Inorganic Materials metrics: [MAE] dummy: {"MAE": 2.505} # Estimated from the MAE between DFT and avg(DFT) over 270 structures +stacking_fault: + domain: Inorganic Materials + metrics: [MAE_E, MAE_dE] + dummy: {'MAE_E': 1544.8257, 'MAE_dE': 2.0} diff --git a/lambench/metrics/post_process.py b/lambench/metrics/post_process.py index 48e582c..9294586 100644 --- a/lambench/metrics/post_process.py +++ b/lambench/metrics/post_process.py @@ -121,6 +121,7 @@ def process_domain_specific_for_one_model(model: BaseLargeAtomModel): "binding_energy", "rxn_barrier", "pressure", + "stacking_fault", ]: applicability_results[record.task_name] = record.metrics return applicability_results diff --git a/lambench/metrics/results/metadata.json b/lambench/metrics/results/metadata.json index 020e00b..8a61e88 100644 --- a/lambench/metrics/results/metadata.json +++ b/lambench/metrics/results/metadata.json @@ -949,6 +949,18 @@ "DISPLAY_NAME": "Success Rate", "DESCRIPTION": "The success rate of volume calculations at elevated pressures." } + }, + "stacking_fault": { + "DISPLAY_NAME": "Stacking Fault Energy", + "DESCRIPTION": "Evaluation of the stacking fault energy over 46 structures. For each structure, 10 frames were taken from each sliding trajectory, and are labeled with VASP PBE-PAW. During metrics calculation, a smoothing spline is applied to the energy profile along the sliding path to reduce numerical noise. All datapoints from the smoothed energy profile are used to compute the metrics.", + "MAE_E": { + "DISPLAY_NAME": "MAE_E (mJ/m²)", + "DESCRIPTION": "The mean absolute error of the stacking fault energy." + }, + "MAE_dE":{ + "DISPLAY_NAME": "MAE_dE (mJ/m²/unit displacement)", + "DESCRIPTION": "The mean absolute error of the change in stacking fault energy along the sliding path." + } } }, "adaptability_results": { diff --git a/lambench/models/ase_models.py b/lambench/models/ase_models.py index e22d52b..5d99131 100644 --- a/lambench/models/ase_models.py +++ b/lambench/models/ase_models.py @@ -121,7 +121,8 @@ def _init_mace_calculator(self) -> Calculator: if self.model_domain == "molecules": head = "omol" else: - head = "oc20_usemppbe" + # head = "oc20_usemppbe" + head = "omat_pbe" if self.model_name == "MACE-MH-1": model_config["head"] = head return mace_mp(**model_config) @@ -315,6 +316,13 @@ def evaluate( fmax = task.calculator_params.get("fmax", 1e-3) max_steps = task.calculator_params.get("max_steps", 500) return {"metrics": run_inference(self, task.test_data, fmax, max_steps)} + elif task.task_name == "stacking_fault": + from lambench.tasks.calculator.stacking_fault.stacking_fault import ( + run_inference, + ) + + assert task.test_data is not None + return {"metrics": run_inference(self, task.test_data)} else: raise NotImplementedError(f"Task {task.task_name} is not implemented.") diff --git a/lambench/tasks/calculator/calculator_tasks.yml b/lambench/tasks/calculator/calculator_tasks.yml index 99f7ec5..ae87353 100644 --- a/lambench/tasks/calculator/calculator_tasks.yml +++ b/lambench/tasks/calculator/calculator_tasks.yml @@ -42,3 +42,6 @@ pressure: calculator_params: fmax: 0.001 max_steps: 500 +stacking_fault: + test_data: /bohr/lambench-stacking-fault-dtjt/v1 + calculator_params: null diff --git a/lambench/tasks/calculator/stacking_fault/stacking_fault.py b/lambench/tasks/calculator/stacking_fault/stacking_fault.py new file mode 100644 index 0000000..322181e --- /dev/null +++ b/lambench/tasks/calculator/stacking_fault/stacking_fault.py @@ -0,0 +1,72 @@ +from ase.io import read +from pathlib import Path +from tqdm import tqdm +import numpy as np +import pandas as pd +from sklearn.metrics import mean_absolute_error +from lambench.models.ase_models import ASEModel +from lambench.tasks.calculator.stacking_fault.utils import fit_pchip + + +EV_A2_TO_MJ_M2 = 16021.766208 +NUM_POINTS = 200 + + +def calc_one_traj(traj, label, calc): + atoms_list = read(traj, ":") + df = pd.read_csv(label, header=0) + + a_vector = atoms_list[0].cell[0] + b_vector = atoms_list[0].cell[1] + area = np.linalg.norm(np.cross(a_vector, b_vector)) + + d = df["Displacement"] + preds = [] + for atoms in atoms_list: + atoms.calc = calc + preds.append(atoms.get_potential_energy()) + res = pd.DataFrame({"Displacement": d.to_list(), "Energy": preds}) + res["Energy"] = (res["Energy"] - res["Energy"].min()) * EV_A2_TO_MJ_M2 / area + + _, y_smooth_label = fit_pchip( + df, x_col="Displacement", y_col="Energy", num_points=NUM_POINTS + ) + _, y_smooth_pred = fit_pchip( + res, x_col="Displacement", y_col="Energy", num_points=NUM_POINTS + ) + + derivative_label = ( + (y_smooth_label[1:] - y_smooth_label[:-1]) + * (NUM_POINTS - 1) + / max(y_smooth_label) + ) + derivative_pred = ( + (y_smooth_pred[1:] - y_smooth_pred[:-1]) * (NUM_POINTS - 1) / max(y_smooth_pred) + ) + + return np.round(mean_absolute_error(y_smooth_label, y_smooth_pred), 4), np.round( + mean_absolute_error(derivative_label, derivative_pred), 4 + ) + + +def run_inference(model: ASEModel, test_data: Path) -> dict: + calc = model.calc + + traj_files = sorted(list(test_data.glob("*.traj"))) + label_files = sorted(list(test_data.glob("*.csv"))) + + energy_maes = [] + derivative_maes = [] + for traj_file, label_file in tqdm( + zip(traj_files, label_files), + total=len(traj_files), + desc="Calculating Stacking Fault Energies", + ): + energy_mae, derivative_mae = calc_one_traj(traj_file, label_file, calc) + energy_maes.append(energy_mae) + derivative_maes.append(derivative_mae) + + return { + "MAE_E": np.round(np.mean(energy_maes), 4), # mJ/m² + "MAE_dE": np.round(np.mean(derivative_maes), 4), # mJ/m²/unit displacement + } diff --git a/lambench/tasks/calculator/stacking_fault/utils.py b/lambench/tasks/calculator/stacking_fault/utils.py new file mode 100644 index 0000000..9f54574 --- /dev/null +++ b/lambench/tasks/calculator/stacking_fault/utils.py @@ -0,0 +1,44 @@ +from scipy.interpolate import PchipInterpolator +import pandas as pd +import numpy as np + + +def fit_pchip( + df: pd.DataFrame, x_col: str, y_col: str, num_points: int +) -> tuple[np.ndarray, np.ndarray]: + """ + Fit a PCHIP (Piecewise Cubic Hermite Interpolating Polynomial) to a dataframe. + PCHIP preserves monotonicity and is shape-preserving. + + Parameters: + ----------- + df : pandas.DataFrame + Dataframe with x and y columns + x_col : str + Name of the x column (default: 'Displacement') + y_col : str + Name of the y column (default: 'Energy') + num_points : int + Number of points for smooth interpolation (default: 200) + + Returns: + -------- + x_smooth : numpy.ndarray + Smooth x values + y_smooth : numpy.ndarray + Smooth y values from PCHIP interpolation + """ + # Extract x and y values + x = df[x_col].values + y = df[y_col].values + + # Create PCHIP interpolator + pchip = PchipInterpolator(x, y) + + # Generate smooth x values + x_smooth = np.linspace(x.min(), x.max(), num_points) + + # Evaluate PCHIP at smooth x values + y_smooth = pchip(x_smooth) + + return x_smooth, y_smooth From 9c901ea4d193f11602ee64b99f92fd929eb45146 Mon Sep 17 00:00:00 2001 From: Anyang Peng <137014849+anyangml@users.noreply.github.com> Date: Wed, 28 Jan 2026 15:56:10 +0800 Subject: [PATCH 2/5] Update lambench/metrics/results/metadata.json Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- lambench/metrics/results/metadata.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lambench/metrics/results/metadata.json b/lambench/metrics/results/metadata.json index 8a61e88..40048e2 100644 --- a/lambench/metrics/results/metadata.json +++ b/lambench/metrics/results/metadata.json @@ -952,7 +952,7 @@ }, "stacking_fault": { "DISPLAY_NAME": "Stacking Fault Energy", - "DESCRIPTION": "Evaluation of the stacking fault energy over 46 structures. For each structure, 10 frames were taken from each sliding trajectory, and are labeled with VASP PBE-PAW. During metrics calculation, a smoothing spline is applied to the energy profile along the sliding path to reduce numerical noise. All datapoints from the smoothed energy profile are used to compute the metrics.", + "DESCRIPTION": "Evaluation of the stacking fault energy over 46 structures. For each structure, 10 frames were taken from each sliding trajectory, and are labeled with VASP PBE-PAW. During metrics calculation, a shape-preserving PCHIP interpolation is applied to the energy profile along the sliding path, and all datapoints from the interpolated energy profile are used to compute the metrics.", "MAE_E": { "DISPLAY_NAME": "MAE_E (mJ/m²)", "DESCRIPTION": "The mean absolute error of the stacking fault energy." From 28d8e0c47a68e92c7ec8ed2908c1d3aa2a2323b3 Mon Sep 17 00:00:00 2001 From: Anyang Peng <137014849+anyangml@users.noreply.github.com> Date: Wed, 28 Jan 2026 15:58:40 +0800 Subject: [PATCH 3/5] Update lambench/metrics/results/metadata.json Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- lambench/metrics/results/metadata.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lambench/metrics/results/metadata.json b/lambench/metrics/results/metadata.json index 40048e2..dd60faf 100644 --- a/lambench/metrics/results/metadata.json +++ b/lambench/metrics/results/metadata.json @@ -958,8 +958,8 @@ "DESCRIPTION": "The mean absolute error of the stacking fault energy." }, "MAE_dE":{ - "DISPLAY_NAME": "MAE_dE (mJ/m²/unit displacement)", - "DESCRIPTION": "The mean absolute error of the change in stacking fault energy along the sliding path." + "DISPLAY_NAME": "MAE_dE (1/unit displacement)", + "DESCRIPTION": "The mean absolute error of the normalized change in stacking fault energy per unit displacement along the sliding path (derivative divided by the maximum stacking fault energy)." } } }, From b625f3649fa4d0980e5465709aff9bbfb679e9be Mon Sep 17 00:00:00 2001 From: Anyang Peng <137014849+anyangml@users.noreply.github.com> Date: Wed, 28 Jan 2026 15:59:28 +0800 Subject: [PATCH 4/5] Update lambench/tasks/calculator/stacking_fault/utils.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- lambench/tasks/calculator/stacking_fault/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lambench/tasks/calculator/stacking_fault/utils.py b/lambench/tasks/calculator/stacking_fault/utils.py index 9f54574..1f3678e 100644 --- a/lambench/tasks/calculator/stacking_fault/utils.py +++ b/lambench/tasks/calculator/stacking_fault/utils.py @@ -15,11 +15,11 @@ def fit_pchip( df : pandas.DataFrame Dataframe with x and y columns x_col : str - Name of the x column (default: 'Displacement') + Name of the x column y_col : str - Name of the y column (default: 'Energy') + Name of the y column num_points : int - Number of points for smooth interpolation (default: 200) + Number of points for smooth interpolation Returns: -------- From 90740edff34ad810769f8b4892e7ff421d87b926 Mon Sep 17 00:00:00 2001 From: Anyang Peng <137014849+anyangml@users.noreply.github.com> Date: Wed, 28 Jan 2026 16:03:21 +0800 Subject: [PATCH 5/5] chore: add dependency --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 5c30534..8dbe87f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,6 +9,7 @@ dependencies = [ "ase", "python-dotenv", "numpy", + "scipy", "pydantic", "pyyaml", "SQLAlchemy[pymysql]",