deepmodeling · anyangml · Jan 28, 2026 · Jan 28, 2026 · Jan 28, 2026 · Jan 28, 2026
diff --git a/lambench/metrics/downstream_tasks_metrics.yml b/lambench/metrics/downstream_tasks_metrics.yml
@@ -36,3 +36,7 @@ pressure:
   domain: Inorganic Materials
   metrics: [MAE]
   dummy: {"MAE": 2.505} # Estimated from the MAE between DFT and avg(DFT) over 270 structures
+stacking_fault:
+  domain: Inorganic Materials
+  metrics: [MAE_E, MAE_dE]
+  dummy: {'MAE_E': 1544.8257, 'MAE_dE': 2.0}
diff --git a/lambench/metrics/post_process.py b/lambench/metrics/post_process.py
@@ -121,6 +121,7 @@ def process_domain_specific_for_one_model(model: BaseLargeAtomModel):
             "binding_energy",
             "rxn_barrier",
             "pressure",
+            "stacking_fault",
         ]:
             applicability_results[record.task_name] = record.metrics
     return applicability_results

diff --git a/lambench/metrics/results/metadata.json b/lambench/metrics/results/metadata.json
@@ -949,6 +949,18 @@
         "DISPLAY_NAME": "Success Rate",
         "DESCRIPTION": "The success rate of volume calculations at elevated pressures."
       }
+    },
+    "stacking_fault": {
+      "DISPLAY_NAME": "Stacking Fault Energy",
+      "DESCRIPTION": "Evaluation of the stacking fault energy over 46 structures. For each structure, 10 frames were taken from each sliding trajectory, and are labeled with VASP PBE-PAW. During metrics calculation, a shape-preserving PCHIP interpolation is applied to the energy profile along the sliding path, and all datapoints from the interpolated energy profile are used to compute the metrics.",
+      "MAE_E": {
+        "DISPLAY_NAME": "MAE_E (mJ/m²)",
+        "DESCRIPTION": "The mean absolute error of the stacking fault energy."
+      },
+      "MAE_dE":{
+        "DISPLAY_NAME": "MAE_dE (1/unit displacement)",
+        "DESCRIPTION": "The mean absolute error of the normalized change in stacking fault energy per unit displacement along the sliding path (derivative divided by the maximum stacking fault energy)."
+      }
     }
   },
   "adaptability_results": {

diff --git a/lambench/models/ase_models.py b/lambench/models/ase_models.py
@@ -121,7 +121,8 @@ def _init_mace_calculator(self) -> Calculator:
         if self.model_domain == "molecules":
             head = "omol"
         else:
-            head = "oc20_usemppbe"
+            # head = "oc20_usemppbe"
+            head = "omat_pbe"
         if self.model_name == "MACE-MH-1":
             model_config["head"] = head
         return mace_mp(**model_config)
@@ -315,6 +316,13 @@ def evaluate(
                 fmax = task.calculator_params.get("fmax", 1e-3)
                 max_steps = task.calculator_params.get("max_steps", 500)
                 return {"metrics": run_inference(self, task.test_data, fmax, max_steps)}
+            elif task.task_name == "stacking_fault":
+                from lambench.tasks.calculator.stacking_fault.stacking_fault import (
+                    run_inference,
+                )
+
+                assert task.test_data is not None
+                return {"metrics": run_inference(self, task.test_data)}
             else:
                 raise NotImplementedError(f"Task {task.task_name} is not implemented.")
 

diff --git a/lambench/tasks/calculator/calculator_tasks.yml b/lambench/tasks/calculator/calculator_tasks.yml
@@ -42,3 +42,6 @@ pressure:
   calculator_params:
     fmax: 0.001
     max_steps: 500
+stacking_fault:
+  test_data: /bohr/lambench-stacking-fault-dtjt/v1
+  calculator_params: null
diff --git a/lambench/tasks/calculator/stacking_fault/stacking_fault.py b/lambench/tasks/calculator/stacking_fault/stacking_fault.py
@@ -0,0 +1,72 @@
+from ase.io import read
+from pathlib import Path
+from tqdm import tqdm
+import numpy as np
+import pandas as pd
+from sklearn.metrics import mean_absolute_error
+from lambench.models.ase_models import ASEModel
+from lambench.tasks.calculator.stacking_fault.utils import fit_pchip
+
+
+EV_A2_TO_MJ_M2 = 16021.766208
+NUM_POINTS = 200
+
+
+def calc_one_traj(traj, label, calc):
+    atoms_list = read(traj, ":")
+    df = pd.read_csv(label, header=0)
+
+    a_vector = atoms_list[0].cell[0]
+    b_vector = atoms_list[0].cell[1]
+    area = np.linalg.norm(np.cross(a_vector, b_vector))
+
+    d = df["Displacement"]
+    preds = []
+    for atoms in atoms_list:
+        atoms.calc = calc
+        preds.append(atoms.get_potential_energy())
+    res = pd.DataFrame({"Displacement": d.to_list(), "Energy": preds})
+    res["Energy"] = (res["Energy"] - res["Energy"].min()) * EV_A2_TO_MJ_M2 / area
+
+    _, y_smooth_label = fit_pchip(
+        df, x_col="Displacement", y_col="Energy", num_points=NUM_POINTS
+    )
+    _, y_smooth_pred = fit_pchip(
+        res, x_col="Displacement", y_col="Energy", num_points=NUM_POINTS
+    )
+
+    derivative_label = (
+        (y_smooth_label[1:] - y_smooth_label[:-1])
+        * (NUM_POINTS - 1)
+        / max(y_smooth_label)
+    )
+    derivative_pred = (
+        (y_smooth_pred[1:] - y_smooth_pred[:-1]) * (NUM_POINTS - 1) / max(y_smooth_pred)
+    )
+
+    return np.round(mean_absolute_error(y_smooth_label, y_smooth_pred), 4), np.round(
+        mean_absolute_error(derivative_label, derivative_pred), 4
+    )
+
+
+def run_inference(model: ASEModel, test_data: Path) -> dict:
+    calc = model.calc
+
+    traj_files = sorted(list(test_data.glob("*.traj")))
+    label_files = sorted(list(test_data.glob("*.csv")))
+
+    energy_maes = []
+    derivative_maes = []
+    for traj_file, label_file in tqdm(
+        zip(traj_files, label_files),
+        total=len(traj_files),
+        desc="Calculating Stacking Fault Energies",
+    ):
+        energy_mae, derivative_mae = calc_one_traj(traj_file, label_file, calc)
+        energy_maes.append(energy_mae)
+        derivative_maes.append(derivative_mae)
+
+    return {
+        "MAE_E": np.round(np.mean(energy_maes), 4),  # mJ/m²
+        "MAE_dE": np.round(np.mean(derivative_maes), 4),  # mJ/m²/unit displacement
+    }
diff --git a/lambench/tasks/calculator/stacking_fault/utils.py b/lambench/tasks/calculator/stacking_fault/utils.py
@@ -0,0 +1,44 @@
+from scipy.interpolate import PchipInterpolator
+import pandas as pd
+import numpy as np
+
+
+def fit_pchip(
+    df: pd.DataFrame, x_col: str, y_col: str, num_points: int
+) -> tuple[np.ndarray, np.ndarray]:
+    """
+    Fit a PCHIP (Piecewise Cubic Hermite Interpolating Polynomial) to a dataframe.
+    PCHIP preserves monotonicity and is shape-preserving.
+
+    Parameters:
+    -----------
+    df : pandas.DataFrame
+        Dataframe with x and y columns
+    x_col : str
+        Name of the x column
+    y_col : str
+        Name of the y column
+    num_points : int
+        Number of points for smooth interpolation
+
+    Returns:
+    --------
+    x_smooth : numpy.ndarray
+        Smooth x values
+    y_smooth : numpy.ndarray
+        Smooth y values from PCHIP interpolation
+    """
+    # Extract x and y values
+    x = df[x_col].values
+    y = df[y_col].values
+
+    # Create PCHIP interpolator
+    pchip = PchipInterpolator(x, y)
+
+    # Generate smooth x values
+    x_smooth = np.linspace(x.min(), x.max(), num_points)
+
+    # Evaluate PCHIP at smooth x values
+    y_smooth = pchip(x_smooth)
+
+    return x_smooth, y_smooth
diff --git a/pyproject.toml b/pyproject.toml
@@ -9,6 +9,7 @@ dependencies = [
 	"ase",
 	"python-dotenv",
 	"numpy",
+	"scipy",
 	"pydantic",
 	"pyyaml",
 	"SQLAlchemy[pymysql]",