Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions lambench/metrics/downstream_tasks_metrics.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,7 @@ pressure:
domain: Inorganic Materials
metrics: [MAE]
dummy: {"MAE": 2.505} # Estimated from the MAE between DFT and avg(DFT) over 270 structures
stacking_fault:
domain: Inorganic Materials
metrics: [MAE_E, MAE_dE]
dummy: {'MAE_E': 1544.8257, 'MAE_dE': 2.0}
1 change: 1 addition & 0 deletions lambench/metrics/post_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ def process_domain_specific_for_one_model(model: BaseLargeAtomModel):
"binding_energy",
"rxn_barrier",
"pressure",
"stacking_fault",
]:
applicability_results[record.task_name] = record.metrics
return applicability_results
Expand Down
12 changes: 12 additions & 0 deletions lambench/metrics/results/metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -949,6 +949,18 @@
"DISPLAY_NAME": "Success Rate",
"DESCRIPTION": "The success rate of volume calculations at elevated pressures."
}
},
"stacking_fault": {
"DISPLAY_NAME": "Stacking Fault Energy",
"DESCRIPTION": "Evaluation of the stacking fault energy over 46 structures. For each structure, 10 frames were taken from each sliding trajectory, and are labeled with VASP PBE-PAW. During metrics calculation, a shape-preserving PCHIP interpolation is applied to the energy profile along the sliding path, and all datapoints from the interpolated energy profile are used to compute the metrics.",
"MAE_E": {
"DISPLAY_NAME": "MAE_E (mJ/m²)",
"DESCRIPTION": "The mean absolute error of the stacking fault energy."
},
"MAE_dE":{
"DISPLAY_NAME": "MAE_dE (1/unit displacement)",
"DESCRIPTION": "The mean absolute error of the normalized change in stacking fault energy per unit displacement along the sliding path (derivative divided by the maximum stacking fault energy)."
}
}
},
"adaptability_results": {
Expand Down
10 changes: 9 additions & 1 deletion lambench/models/ase_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,8 @@ def _init_mace_calculator(self) -> Calculator:
if self.model_domain == "molecules":
head = "omol"
else:
head = "oc20_usemppbe"
# head = "oc20_usemppbe"
head = "omat_pbe"
if self.model_name == "MACE-MH-1":
model_config["head"] = head
return mace_mp(**model_config)
Expand Down Expand Up @@ -315,6 +316,13 @@ def evaluate(
fmax = task.calculator_params.get("fmax", 1e-3)
max_steps = task.calculator_params.get("max_steps", 500)
return {"metrics": run_inference(self, task.test_data, fmax, max_steps)}
elif task.task_name == "stacking_fault":
from lambench.tasks.calculator.stacking_fault.stacking_fault import (
run_inference,
)

assert task.test_data is not None
return {"metrics": run_inference(self, task.test_data)}
else:
raise NotImplementedError(f"Task {task.task_name} is not implemented.")

Expand Down
3 changes: 3 additions & 0 deletions lambench/tasks/calculator/calculator_tasks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,6 @@ pressure:
calculator_params:
fmax: 0.001
max_steps: 500
stacking_fault:
test_data: /bohr/lambench-stacking-fault-dtjt/v1
calculator_params: null
72 changes: 72 additions & 0 deletions lambench/tasks/calculator/stacking_fault/stacking_fault.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
from ase.io import read
from pathlib import Path
from tqdm import tqdm
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error
from lambench.models.ase_models import ASEModel
from lambench.tasks.calculator.stacking_fault.utils import fit_pchip


EV_A2_TO_MJ_M2 = 16021.766208
NUM_POINTS = 200


def calc_one_traj(traj, label, calc):
atoms_list = read(traj, ":")
df = pd.read_csv(label, header=0)

a_vector = atoms_list[0].cell[0]
b_vector = atoms_list[0].cell[1]
area = np.linalg.norm(np.cross(a_vector, b_vector))

d = df["Displacement"]
preds = []
for atoms in atoms_list:
atoms.calc = calc
preds.append(atoms.get_potential_energy())
res = pd.DataFrame({"Displacement": d.to_list(), "Energy": preds})
res["Energy"] = (res["Energy"] - res["Energy"].min()) * EV_A2_TO_MJ_M2 / area

_, y_smooth_label = fit_pchip(
df, x_col="Displacement", y_col="Energy", num_points=NUM_POINTS
)
_, y_smooth_pred = fit_pchip(
res, x_col="Displacement", y_col="Energy", num_points=NUM_POINTS
)

derivative_label = (
(y_smooth_label[1:] - y_smooth_label[:-1])
* (NUM_POINTS - 1)
/ max(y_smooth_label)
)
derivative_pred = (
(y_smooth_pred[1:] - y_smooth_pred[:-1]) * (NUM_POINTS - 1) / max(y_smooth_pred)
)

return np.round(mean_absolute_error(y_smooth_label, y_smooth_pred), 4), np.round(
mean_absolute_error(derivative_label, derivative_pred), 4
)


def run_inference(model: ASEModel, test_data: Path) -> dict:
calc = model.calc

traj_files = sorted(list(test_data.glob("*.traj")))
label_files = sorted(list(test_data.glob("*.csv")))

energy_maes = []
derivative_maes = []
for traj_file, label_file in tqdm(
zip(traj_files, label_files),
total=len(traj_files),
desc="Calculating Stacking Fault Energies",
):
energy_mae, derivative_mae = calc_one_traj(traj_file, label_file, calc)
energy_maes.append(energy_mae)
derivative_maes.append(derivative_mae)

return {
"MAE_E": np.round(np.mean(energy_maes), 4), # mJ/m²
"MAE_dE": np.round(np.mean(derivative_maes), 4), # mJ/m²/unit displacement
}
44 changes: 44 additions & 0 deletions lambench/tasks/calculator/stacking_fault/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from scipy.interpolate import PchipInterpolator
import pandas as pd
import numpy as np


def fit_pchip(
df: pd.DataFrame, x_col: str, y_col: str, num_points: int
) -> tuple[np.ndarray, np.ndarray]:
"""
Fit a PCHIP (Piecewise Cubic Hermite Interpolating Polynomial) to a dataframe.
PCHIP preserves monotonicity and is shape-preserving.

Parameters:
-----------
df : pandas.DataFrame
Dataframe with x and y columns
x_col : str
Name of the x column
y_col : str
Name of the y column
num_points : int
Number of points for smooth interpolation

Returns:
--------
x_smooth : numpy.ndarray
Smooth x values
y_smooth : numpy.ndarray
Smooth y values from PCHIP interpolation
"""
# Extract x and y values
x = df[x_col].values
y = df[y_col].values

# Create PCHIP interpolator
pchip = PchipInterpolator(x, y)

# Generate smooth x values
x_smooth = np.linspace(x.min(), x.max(), num_points)

# Evaluate PCHIP at smooth x values
y_smooth = pchip(x_smooth)

return x_smooth, y_smooth
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ dependencies = [
"ase",
"python-dotenv",
"numpy",
"scipy",
"pydantic",
"pyyaml",
"SQLAlchemy[pymysql]",
Expand Down