diff --git a/lambench/metrics/downstream_tasks_metrics.yml b/lambench/metrics/downstream_tasks_metrics.yml index 368a043..41b48c2 100644 --- a/lambench/metrics/downstream_tasks_metrics.yml +++ b/lambench/metrics/downstream_tasks_metrics.yml @@ -36,3 +36,7 @@ pressure: domain: Inorganic Materials metrics: [MAE] dummy: {"MAE": 2.505} # Estimated from the MAE between DFT and avg(DFT) over 270 structures +stacking_fault: + domain: Inorganic Materials + metrics: [MAE_E, MAE_dE] + dummy: {'MAE_E': 1544.8257, 'MAE_dE': 2.0} diff --git a/lambench/metrics/post_process.py b/lambench/metrics/post_process.py index 48e582c..9294586 100644 --- a/lambench/metrics/post_process.py +++ b/lambench/metrics/post_process.py @@ -121,6 +121,7 @@ def process_domain_specific_for_one_model(model: BaseLargeAtomModel): "binding_energy", "rxn_barrier", "pressure", + "stacking_fault", ]: applicability_results[record.task_name] = record.metrics return applicability_results diff --git a/lambench/metrics/results/metadata.json b/lambench/metrics/results/metadata.json index 020e00b..dd60faf 100644 --- a/lambench/metrics/results/metadata.json +++ b/lambench/metrics/results/metadata.json @@ -949,6 +949,18 @@ "DISPLAY_NAME": "Success Rate", "DESCRIPTION": "The success rate of volume calculations at elevated pressures." } + }, + "stacking_fault": { + "DISPLAY_NAME": "Stacking Fault Energy", + "DESCRIPTION": "Evaluation of the stacking fault energy over 46 structures. For each structure, 10 frames were taken from each sliding trajectory, and are labeled with VASP PBE-PAW. During metrics calculation, a shape-preserving PCHIP interpolation is applied to the energy profile along the sliding path, and all datapoints from the interpolated energy profile are used to compute the metrics.", + "MAE_E": { + "DISPLAY_NAME": "MAE_E (mJ/m²)", + "DESCRIPTION": "The mean absolute error of the stacking fault energy." + }, + "MAE_dE":{ + "DISPLAY_NAME": "MAE_dE (1/unit displacement)", + "DESCRIPTION": "The mean absolute error of the normalized change in stacking fault energy per unit displacement along the sliding path (derivative divided by the maximum stacking fault energy)." + } } }, "adaptability_results": { diff --git a/lambench/models/ase_models.py b/lambench/models/ase_models.py index e22d52b..5d99131 100644 --- a/lambench/models/ase_models.py +++ b/lambench/models/ase_models.py @@ -121,7 +121,8 @@ def _init_mace_calculator(self) -> Calculator: if self.model_domain == "molecules": head = "omol" else: - head = "oc20_usemppbe" + # head = "oc20_usemppbe" + head = "omat_pbe" if self.model_name == "MACE-MH-1": model_config["head"] = head return mace_mp(**model_config) @@ -315,6 +316,13 @@ def evaluate( fmax = task.calculator_params.get("fmax", 1e-3) max_steps = task.calculator_params.get("max_steps", 500) return {"metrics": run_inference(self, task.test_data, fmax, max_steps)} + elif task.task_name == "stacking_fault": + from lambench.tasks.calculator.stacking_fault.stacking_fault import ( + run_inference, + ) + + assert task.test_data is not None + return {"metrics": run_inference(self, task.test_data)} else: raise NotImplementedError(f"Task {task.task_name} is not implemented.") diff --git a/lambench/tasks/calculator/calculator_tasks.yml b/lambench/tasks/calculator/calculator_tasks.yml index 99f7ec5..ae87353 100644 --- a/lambench/tasks/calculator/calculator_tasks.yml +++ b/lambench/tasks/calculator/calculator_tasks.yml @@ -42,3 +42,6 @@ pressure: calculator_params: fmax: 0.001 max_steps: 500 +stacking_fault: + test_data: /bohr/lambench-stacking-fault-dtjt/v1 + calculator_params: null diff --git a/lambench/tasks/calculator/stacking_fault/stacking_fault.py b/lambench/tasks/calculator/stacking_fault/stacking_fault.py new file mode 100644 index 0000000..322181e --- /dev/null +++ b/lambench/tasks/calculator/stacking_fault/stacking_fault.py @@ -0,0 +1,72 @@ +from ase.io import read +from pathlib import Path +from tqdm import tqdm +import numpy as np +import pandas as pd +from sklearn.metrics import mean_absolute_error +from lambench.models.ase_models import ASEModel +from lambench.tasks.calculator.stacking_fault.utils import fit_pchip + + +EV_A2_TO_MJ_M2 = 16021.766208 +NUM_POINTS = 200 + + +def calc_one_traj(traj, label, calc): + atoms_list = read(traj, ":") + df = pd.read_csv(label, header=0) + + a_vector = atoms_list[0].cell[0] + b_vector = atoms_list[0].cell[1] + area = np.linalg.norm(np.cross(a_vector, b_vector)) + + d = df["Displacement"] + preds = [] + for atoms in atoms_list: + atoms.calc = calc + preds.append(atoms.get_potential_energy()) + res = pd.DataFrame({"Displacement": d.to_list(), "Energy": preds}) + res["Energy"] = (res["Energy"] - res["Energy"].min()) * EV_A2_TO_MJ_M2 / area + + _, y_smooth_label = fit_pchip( + df, x_col="Displacement", y_col="Energy", num_points=NUM_POINTS + ) + _, y_smooth_pred = fit_pchip( + res, x_col="Displacement", y_col="Energy", num_points=NUM_POINTS + ) + + derivative_label = ( + (y_smooth_label[1:] - y_smooth_label[:-1]) + * (NUM_POINTS - 1) + / max(y_smooth_label) + ) + derivative_pred = ( + (y_smooth_pred[1:] - y_smooth_pred[:-1]) * (NUM_POINTS - 1) / max(y_smooth_pred) + ) + + return np.round(mean_absolute_error(y_smooth_label, y_smooth_pred), 4), np.round( + mean_absolute_error(derivative_label, derivative_pred), 4 + ) + + +def run_inference(model: ASEModel, test_data: Path) -> dict: + calc = model.calc + + traj_files = sorted(list(test_data.glob("*.traj"))) + label_files = sorted(list(test_data.glob("*.csv"))) + + energy_maes = [] + derivative_maes = [] + for traj_file, label_file in tqdm( + zip(traj_files, label_files), + total=len(traj_files), + desc="Calculating Stacking Fault Energies", + ): + energy_mae, derivative_mae = calc_one_traj(traj_file, label_file, calc) + energy_maes.append(energy_mae) + derivative_maes.append(derivative_mae) + + return { + "MAE_E": np.round(np.mean(energy_maes), 4), # mJ/m² + "MAE_dE": np.round(np.mean(derivative_maes), 4), # mJ/m²/unit displacement + } diff --git a/lambench/tasks/calculator/stacking_fault/utils.py b/lambench/tasks/calculator/stacking_fault/utils.py new file mode 100644 index 0000000..1f3678e --- /dev/null +++ b/lambench/tasks/calculator/stacking_fault/utils.py @@ -0,0 +1,44 @@ +from scipy.interpolate import PchipInterpolator +import pandas as pd +import numpy as np + + +def fit_pchip( + df: pd.DataFrame, x_col: str, y_col: str, num_points: int +) -> tuple[np.ndarray, np.ndarray]: + """ + Fit a PCHIP (Piecewise Cubic Hermite Interpolating Polynomial) to a dataframe. + PCHIP preserves monotonicity and is shape-preserving. + + Parameters: + ----------- + df : pandas.DataFrame + Dataframe with x and y columns + x_col : str + Name of the x column + y_col : str + Name of the y column + num_points : int + Number of points for smooth interpolation + + Returns: + -------- + x_smooth : numpy.ndarray + Smooth x values + y_smooth : numpy.ndarray + Smooth y values from PCHIP interpolation + """ + # Extract x and y values + x = df[x_col].values + y = df[y_col].values + + # Create PCHIP interpolator + pchip = PchipInterpolator(x, y) + + # Generate smooth x values + x_smooth = np.linspace(x.min(), x.max(), num_points) + + # Evaluate PCHIP at smooth x values + y_smooth = pchip(x_smooth) + + return x_smooth, y_smooth diff --git a/pyproject.toml b/pyproject.toml index 5c30534..8dbe87f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,6 +9,7 @@ dependencies = [ "ase", "python-dotenv", "numpy", + "scipy", "pydantic", "pyyaml", "SQLAlchemy[pymysql]",