diff --git a/docs/source/user_guide/benchmarks/bulk_crystal.rst b/docs/source/user_guide/benchmarks/bulk_crystal.rst index 575bbcc11..a4757700b 100644 --- a/docs/source/user_guide/benchmarks/bulk_crystal.rst +++ b/docs/source/user_guide/benchmarks/bulk_crystal.rst @@ -115,3 +115,76 @@ Reference data: * Same as input data * PBE + + + +Low-Dimensional Relaxation +========================== + +Summary +------- + +Performance in relaxing low-dimensional (2D and 1D) crystal structures. +Structures from the Alexandria database are relaxed with cell masks to constrain +relaxation to the appropriate dimensions and compared to PBE reference calculations. + + +Metrics +------- + +**2D Structures:** + +(1) Area MAE (2D) + +Mean absolute error of area per atom compared to PBE reference. + +(2) Energy MAE (2D) + +Mean absolute error of energy per atom compared to PBE reference. + +(3) Convergence (2D) + +Percentage of 2D structures that successfully converged during relaxation. + +**1D Structures:** + +(4) Length MAE (1D) + +Mean absolute error of chain length per atom compared to PBE reference. + +(5) Energy MAE (1D) + +Mean absolute error of energy per atom compared to PBE reference. + +(6) Convergence (1D) + +Percentage of 1D structures that successfully converged during relaxation. + +Structures are relaxed using janus-core's GeomOpt after calling from `ase.spacegroup.symmetrize.refine_symmetry`. Cell relaxation is constrained using +cell masks: + +* 2D: Only in-plane cell components (a, b, and γ) are allowed to relax +* 1D: Only the chain direction (a) is allowed to relax + +Relaxation continues until the maximum force component is below 0.0002 eV/Å or until 500 steps are reached. If not converged, relaxation is repeated up to 3 times. + + +Computational cost +------------------ + +High: tests are likely to take hours-days to run on GPU, depending on the number of structures tested. + + +Data availability +----------------- + +Input structures: + +* Alexandria database 2D structures: https://alexandria.icams.rub.de/data/pbe_2d +* Alexandria database 1D structures: https://alexandria.icams.rub.de/data/pbe_1d +* 3000 structures randomly sampled from each dataset + +Reference data: + +* Hai-Chen Wang et al 2023 2D Mater. 10 035007 +* Jonathan Schmidt et al 2024, Mater. Tod. Phys, 48, 101560 diff --git a/ml_peg/analysis/bulk_crystal/low_dimensional_relaxation/analyse_low_dimensional_relaxation.py b/ml_peg/analysis/bulk_crystal/low_dimensional_relaxation/analyse_low_dimensional_relaxation.py new file mode 100644 index 000000000..153c23ff3 --- /dev/null +++ b/ml_peg/analysis/bulk_crystal/low_dimensional_relaxation/analyse_low_dimensional_relaxation.py @@ -0,0 +1,352 @@ +"""Analyse low-dimensional (2D/1D) crystal relaxation benchmark.""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pandas as pd +import plotly.graph_objects as go +import pytest + +from ml_peg.analysis.utils.decorators import build_table +from ml_peg.analysis.utils.utils import load_metrics_config, mae +from ml_peg.app import APP_ROOT +from ml_peg.calcs import CALCS_ROOT +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models + +MODELS = get_model_names(current_models) +CALC_PATH = CALCS_ROOT / "bulk_crystal" / "low_dimensional_relaxation" / "outputs" +OUT_PATH = APP_ROOT / "data" / "bulk_crystal" / "low_dimensional_relaxation" + +METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") +DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( + METRICS_CONFIG_PATH +) + +# Energy outlier thresholds (eV/atom) +ENERGY_OUTLIER_MIN = -40 +ENERGY_OUTLIER_MAX = 40 + +# Dimensionality-specific configuration +DIM_CONFIGS = { + "2D": {"geom_col": "area_per_atom", "geom_label": "Area", "geom_unit": "Ų/atom"}, + "1D": { + "geom_col": "length_per_atom", + "geom_label": "Length", + "geom_unit": "Å/atom", + }, +} + + +def _filter_energy_outliers(df: pd.DataFrame) -> pd.DataFrame: + """ + Mark structures with extreme predicted energies as unconverged. + + Structures with predicted energy per atom outside [-40, 40] eV/atom + are considered outliers and marked as unconverged. + + Parameters + ---------- + df + Results dataframe with 'pred_energy_per_atom' and 'converged' columns. + + Returns + ------- + pd.DataFrame + Dataframe with outliers marked as unconverged. + """ + df.loc[ + (df["pred_energy_per_atom"] <= ENERGY_OUTLIER_MIN) + | (df["pred_energy_per_atom"] >= ENERGY_OUTLIER_MAX), + "converged", + ] = False + return df + + +def load_results(model_name: str, dimensionality: str = "2D") -> pd.DataFrame: + """ + Load results for a specific model and dimensionality. + + Parameters + ---------- + model_name + Name of the model. + dimensionality + Either "2D" or "1D". + + Returns + ------- + pd.DataFrame + Results dataframe or empty dataframe if not found. + """ + csv_path = CALC_PATH / model_name / f"results_{dimensionality}.csv" + if csv_path.exists(): + return pd.read_csv(csv_path) + return pd.DataFrame() + + +def get_converged_data(model_name: str, dimensionality: str = "2D") -> dict[str, list]: + """ + Get converged geometric and energy data for a model. + + Parameters + ---------- + model_name + Name of the model. + dimensionality + Either "2D" or "1D". + + Returns + ------- + dict[str, list] + Dictionary with ref/pred values for geometric metric and energy. + """ + df = load_results(model_name, dimensionality) + if df.empty: + return {"ref_geom": [], "pred_geom": [], "ref_energy": [], "pred_energy": []} + + df = _filter_energy_outliers(df) + df_conv = df[df["converged"]].copy() + + geom_col = DIM_CONFIGS[dimensionality]["geom_col"] + df_conv = df_conv.dropna(subset=[f"pred_{geom_col}", "pred_energy_per_atom"]) + + return { + "ref_geom": df_conv[f"ref_{geom_col}"].tolist(), + "pred_geom": df_conv[f"pred_{geom_col}"].tolist(), + "ref_energy": df_conv["ref_energy_per_atom"].tolist(), + "pred_energy": df_conv["pred_energy_per_atom"].tolist(), + } + + +def get_convergence_rate(model_name: str, dimensionality: str = "2D") -> float | None: + """ + Get convergence rate for a model. + + Parameters + ---------- + model_name + Name of the model. + dimensionality + Either "2D" or "1D". + + Returns + ------- + float | None + Convergence rate (%) or None if no data. + """ + df = load_results(model_name, dimensionality) + if df.empty: + return None + df = _filter_energy_outliers(df) + return (df["converged"].sum() / len(df)) * 100 + + +def create_parity_plot( + data_type: str, + title: str, + x_label: str, + y_label: str, + filename: Path, + dimensionality: str = "2D", +) -> None: + """ + Create a parity plot for low-dimensional structures. + + Parameters + ---------- + data_type + Either "geom" (area for 2D, length for 1D) or "energy". + title + Plot title. + x_label + X-axis label. + y_label + Y-axis label. + filename + Path to save the plot JSON. + dimensionality + Either "2D" or "1D". + """ + fig = go.Figure() + + ref_values = [] + pred_values = [] + + for model_name in MODELS: + data = get_converged_data(model_name, dimensionality) + if data_type == "geom": + ref_values.extend(data["ref_geom"]) + pred_values.extend(data["pred_geom"]) + else: + ref_values.extend(data["ref_energy"]) + pred_values.extend(data["pred_energy"]) + + if ref_values and pred_values: + fig.add_trace( + go.Scatter( + x=pred_values, + y=ref_values, + name=dimensionality, + mode="markers", + marker={"size": 6, "opacity": 0.7}, + hovertemplate=( + f"{dimensionality}
" + "Pred: %{x:.4f}
" + "Ref: %{y:.4f}
" + "" + ), + ) + ) + + all_values = [] + for trace in fig.data: + all_values.extend(trace.x) + all_values.extend(trace.y) + + if all_values: + min_val = min(all_values) + max_val = max(all_values) + fig.add_trace( + go.Scatter( + x=[min_val, max_val], + y=[min_val, max_val], + mode="lines", + name="y=x", + line={"color": "gray", "dash": "dash"}, + showlegend=True, + ) + ) + + fig.update_layout( + title=title, + xaxis_title=x_label, + yaxis_title=y_label, + hovermode="closest", + ) + + filename.parent.mkdir(parents=True, exist_ok=True) + with open(filename, "w") as f: + json.dump(fig.to_dict(), f) + + +def _compute_mae(dimensionality: str, data_key: str) -> dict[str, float]: + """ + Compute MAE across all models for a given dimensionality and data key. + + Parameters + ---------- + dimensionality + Either "2D" or "1D". + data_key + Either "geom" or "energy". + + Returns + ------- + dict[str, float] + {model_name: mae_value}. + """ + results = {} + ref_key = f"ref_{data_key}" + pred_key = f"pred_{data_key}" + for model_name in MODELS: + data = get_converged_data(model_name, dimensionality) + if data[ref_key] and data[pred_key]: + results[model_name] = mae(data[ref_key], data[pred_key]) + return results + + +def _compute_convergence(dimensionality: str) -> dict[str, float]: + """ + Compute convergence rates across all models for a given dimensionality. + + Parameters + ---------- + dimensionality + Either "2D" or "1D". + + Returns + ------- + dict[str, float] + {model_name: convergence_rate}. + """ + results = {} + for model_name in MODELS: + conv_rate = get_convergence_rate(model_name, dimensionality) + if conv_rate is not None: + results[model_name] = conv_rate + return results + + +def _generate_all_plots() -> None: + """Generate all parity plots for each dimensionality.""" + for dim, cfg in DIM_CONFIGS.items(): + geom_label = cfg["geom_label"] + geom_unit = cfg["geom_unit"] + dim_lower = dim.lower() + + create_parity_plot( + data_type="geom", + title=f"{geom_label} per atom ({dim})", + x_label=f"Predicted {geom_label.lower()} / {geom_unit}", + y_label=f"Reference {geom_label.lower()} / {geom_unit}", + filename=OUT_PATH / f"figure_{geom_label.lower()}_{dim_lower}.json", + dimensionality=dim, + ) + create_parity_plot( + data_type="energy", + title=f"Energy per atom ({dim})", + x_label="Predicted energy / eV/atom", + y_label="Reference energy / eV/atom", + filename=OUT_PATH / f"figure_energy_{dim_lower}.json", + dimensionality=dim, + ) + + +@pytest.fixture +def parity_plots() -> None: + """Generate all parity plots for 2D and 1D structures.""" + _generate_all_plots() + + +@pytest.fixture +@build_table( + filename=OUT_PATH / "low_dimensional_metrics_table.json", + metric_tooltips=DEFAULT_TOOLTIPS, + thresholds=DEFAULT_THRESHOLDS, +) +def metrics() -> dict[str, dict]: + """ + Compute all low-dimensional relaxation metrics. + + Returns + ------- + dict[str, dict] + All metrics for all models. + """ + result = {} + for dim, cfg in DIM_CONFIGS.items(): + geom_label = cfg["geom_label"] + result[f"{geom_label} MAE ({dim})"] = _compute_mae(dim, "geom") + result[f"Energy MAE ({dim})"] = _compute_mae(dim, "energy") + result[f"Convergence ({dim})"] = _compute_convergence(dim) + return result + + +def test_low_dimensional_relaxation( + metrics: dict[str, dict], + parity_plots: None, +) -> None: + """ + Run low-dimensional relaxation analysis test. + + Parameters + ---------- + metrics + All low-dimensional relaxation metrics. + parity_plots + Triggers parity plot generation for all dimensionalities. + """ + return diff --git a/ml_peg/analysis/bulk_crystal/low_dimensional_relaxation/metrics.yml b/ml_peg/analysis/bulk_crystal/low_dimensional_relaxation/metrics.yml new file mode 100644 index 000000000..59b851b24 --- /dev/null +++ b/ml_peg/analysis/bulk_crystal/low_dimensional_relaxation/metrics.yml @@ -0,0 +1,44 @@ +metrics: + # 2D structures + Area MAE (2D): + good: 0.05 + bad: 0.15 + unit: "Ų/atom" + tooltip: "Mean Absolute Error of area per atom for 2D structures compared to PBE" + level_of_theory: PBE + + Energy MAE (2D): + good: 0.02 + bad: 0.03 + unit: "eV/atom" + tooltip: "Mean Absolute Error of energy per atom for 2D structures compared to PBE" + level_of_theory: PBE + + Convergence (2D): + good: 100.0 + bad: 99.0 + unit: "%" + tooltip: "Percentage of 2D structures that converged during relaxation" + level_of_theory: PBE + + # 1D structures + Length MAE (1D): + good: 0.01 + bad: 0.05 + unit: "Å/atom" + tooltip: "Mean Absolute Error of chain length per atom for 1D structures compared to PBE" + level_of_theory: PBE + + Energy MAE (1D): + good: 0.02 + bad: 0.03 + unit: "eV/atom" + tooltip: "Mean Absolute Error of energy per atom for 1D structures compared to PBE" + level_of_theory: PBE + + Convergence (1D): + good: 100.0 + bad: 99.0 + unit: "%" + tooltip: "Percentage of 1D structures that converged during relaxation" + level_of_theory: PBE diff --git a/ml_peg/app/bulk_crystal/low_dimensional_relaxation/app_low_dimensional_relaxation.py b/ml_peg/app/bulk_crystal/low_dimensional_relaxation/app_low_dimensional_relaxation.py new file mode 100644 index 000000000..15d246291 --- /dev/null +++ b/ml_peg/app/bulk_crystal/low_dimensional_relaxation/app_low_dimensional_relaxation.py @@ -0,0 +1,84 @@ +"""Low-dimensional (2D/1D) crystal relaxation benchmark app.""" + +from __future__ import annotations + +from dash import Dash +from dash.html import Div + +from ml_peg.app import APP_ROOT +from ml_peg.app.base_app import BaseApp +from ml_peg.app.utils.build_callbacks import plot_from_table_column +from ml_peg.app.utils.load import read_plot + +BENCHMARK_NAME = "Low-Dimensional Relaxation" +DOCS_URL = ( + "https://ddmms.github.io/ml-peg/user_guide/benchmarks/bulk_crystal.html" + "#low-dimensional-relaxation" +) +DATA_PATH = APP_ROOT / "data" / "bulk_crystal" / "low_dimensional_relaxation" + + +class LowDimensionalRelaxationApp(BaseApp): + """Low-dimensional relaxation benchmark app layout and callbacks.""" + + def register_callbacks(self) -> None: + """Register callbacks to app.""" + # Define plot paths and their metric mappings + plot_configs = [ + ("figure_area_2d.json", "Area MAE (2D)", "area-2d"), + ("figure_energy_2d.json", "Energy MAE (2D)", "energy-2d"), + ("figure_length_1d.json", "Length MAE (1D)", "length-1d"), + ("figure_energy_1d.json", "Energy MAE (1D)", "energy-1d"), + ] + + # Build column-to-plot mapping + column_to_plot = {} + for filename, metric_name, plot_id_suffix in plot_configs: + plot_path = DATA_PATH / filename + if plot_path.exists(): + scatter = read_plot( + plot_path, + id=f"{BENCHMARK_NAME}-figure-{plot_id_suffix}", + ) + column_to_plot[metric_name] = scatter + + if column_to_plot: + plot_from_table_column( + table_id=self.table_id, + plot_id=f"{BENCHMARK_NAME}-figure-placeholder", + column_to_plot=column_to_plot, + ) + + +def get_app() -> LowDimensionalRelaxationApp: + """ + Get low-dimensional relaxation benchmark app. + + Returns + ------- + LowDimensionalRelaxationApp + Benchmark layout and callback registration. + """ + return LowDimensionalRelaxationApp( + name=BENCHMARK_NAME, + description=( + "Performance in relaxing low-dimensional crystal structures. " + "2D structures are evaluated on area per atom, and 1D structures " + "on length per atom. Structures from the Alexandria database are " + "relaxed with cell masks to constrain relaxation to the appropriate " + "dimensions and compared to PBE reference calculations." + ), + docs_url=DOCS_URL, + table_path=DATA_PATH / "low_dimensional_metrics_table.json", + extra_components=[ + Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + ], + ) + + +if __name__ == "__main__": + full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent) + ld_app = get_app() + full_app.layout = ld_app.layout + ld_app.register_callbacks() + full_app.run(port=8064, debug=True) diff --git a/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py b/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py new file mode 100644 index 000000000..b1f253c1f --- /dev/null +++ b/ml_peg/calcs/bulk_crystal/low_dimensional_relaxation/calc_low_dimensional_relaxation.py @@ -0,0 +1,412 @@ +"""Run calculations for low-dimensional (2D/1D) crystal relaxation benchmark.""" + +from __future__ import annotations + +import bz2 +from copy import copy +import json +from pathlib import Path +import random +from typing import Any +import urllib.request + +from ase import Atoms +from ase.spacegroup.symmetrize import refine_symmetry +from janus_core.calculations.geom_opt import GeomOpt +import numpy as np +import pandas as pd +from pymatgen.entries.computed_entries import ComputedStructureEntry +from pymatgen.io.ase import AseAtomsAdaptor +import pytest +from tqdm import tqdm + +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +MODELS = load_models(current_models) + +# URLs for Alexandria database +ALEXANDRIA_URLS = { + "2D": "https://alexandria.icams.rub.de/data/pbe_2d", + "1D": "https://alexandria.icams.rub.de/data/pbe_1d", +} + +# Default data files for each dimensionality (list of files) +DEFAULT_DATA_FILES: dict[str, list[str]] = { + "2D": ["alexandria_2d_001.json.bz2", "alexandria_2d_000.json.bz2"], + "1D": ["alexandria_1d_000.json.bz2"], +} + +# Local cache directory for downloaded data +DATA_PATH = Path(__file__).parent / "data" +OUT_PATH = Path(__file__).parent / "outputs" + +# Relaxation parameters +FMAX = 0.0002 # eV/A - tight convergence +MAX_STEPS = 500 +RANDOM_SEED = 42 + +# Number of structures to use for testing +N_STRUCTURES = 3000 + +# Vacuum padding for non-periodic directions (Angstrom) +VACUUM_PADDING = 100.0 + +# Dimensionality configurations +# For 2D: mask allows in-plane cell relaxation only (fix z) +# For 1D: mask allows along-chain relaxation only (fix y and z) +CELL_MASKS = { + "2D": np.array([[1, 1, 0], [1, 1, 0], [0, 0, 0]], dtype=bool), + "1D": np.array([[1, 0, 0], [0, 0, 0], [0, 0, 0]], dtype=bool), +} + + +def download_data(dimensionality: str, filename: str) -> Path: + """ + Download a single structure data file from Alexandria database if not cached. + + Parameters + ---------- + dimensionality + Either "2D" or "1D". + filename + Name of the file to download. + + Returns + ------- + Path + Path to the downloaded/cached file. + """ + DATA_PATH.mkdir(parents=True, exist_ok=True) + local_path = DATA_PATH / filename + + if not local_path.exists(): + url = f"{ALEXANDRIA_URLS[dimensionality]}/{filename}" + print(f"Downloading {url}...") + urllib.request.urlretrieve(url, local_path) + print(f"Downloaded to {local_path}") + + return local_path + + +def download_all_data( + dimensionality: str, filenames: list[str] | None = None +) -> list[Path]: + """ + Download all structure data files for a dimensionality. + + Parameters + ---------- + dimensionality + Either "2D" or "1D". + filenames + List of filenames to download. If None, uses defaults for dimensionality. + + Returns + ------- + list[Path] + Paths to the downloaded/cached files. + """ + if filenames is None: + filenames = DEFAULT_DATA_FILES[dimensionality] + + return [download_data(dimensionality, f) for f in filenames] + + +def get_area_per_atom(atoms: Atoms) -> float: + """ + Calculate the in-plane area per atom for 2D structures. + + For 2D materials, the area is calculated as a x b x sin(gamma), + where a and b are the in-plane lattice vectors. + + Parameters + ---------- + atoms + ASE Atoms object. + + Returns + ------- + float + In-plane area per atom in Ų. + """ + cell = atoms.get_cell() + # Calculate cross product of first two lattice vectors (in-plane) + a = cell[0] + b = cell[1] + area = np.linalg.norm(np.cross(a, b)) + return area / len(atoms) + + +def get_length_per_atom(atoms: Atoms) -> float: + """ + Calculate the chain length per atom for 1D structures. + + For 1D materials, the length is the magnitude of the first lattice vector. + + Parameters + ---------- + atoms + ASE Atoms object. + + Returns + ------- + float + Chain length per atom in Å. + """ + cell = atoms.get_cell() + length = np.linalg.norm(cell[0]) + return length / len(atoms) + + +def set_vacuum_padding( + atoms: Atoms, dimensionality: str, vacuum: float = VACUUM_PADDING +) -> Atoms: + """ + Set lattice constants in non-periodic directions to specified vacuum padding. + + For 2D materials, sets the c lattice vector magnitude to the vacuum value. + For 1D materials, sets both b and c lattice vector magnitudes to the vacuum value. + The direction of each lattice vector is preserved. + + Parameters + ---------- + atoms + ASE Atoms object. + dimensionality + Either "2D" or "1D". + vacuum + Target length for non-periodic lattice vectors in Angstrom. + + Returns + ------- + Atoms + Modified atoms object with updated cell. + """ + cell = np.array(atoms.get_cell()) + + if dimensionality == "2D": + # Set c vector magnitude to vacuum while preserving direction + c_vec = cell[2] + c_norm = np.linalg.norm(c_vec) + if c_norm > 0: + cell[2] = c_vec / c_norm * vacuum + else: + # If c vector is zero, set it along z direction + cell[2] = np.array([0.0, 0.0, vacuum]) + else: # 1D + # Set b vector magnitude to vacuum while preserving direction + b_vec = cell[1] + b_norm = np.linalg.norm(b_vec) + if b_norm > 0: + cell[1] = b_vec / b_norm * vacuum + else: + cell[1] = np.array([0.0, vacuum, 0.0]) + + # Set c vector magnitude to vacuum while preserving direction + c_vec = cell[2] + c_norm = np.linalg.norm(c_vec) + if c_norm > 0: + cell[2] = c_vec / c_norm * vacuum + else: + cell[2] = np.array([0.0, 0.0, vacuum]) + + atoms.set_cell(cell, scale_atoms=False) + return atoms + + +def load_structures( + dimensionality: str = "2D", + filenames: list[str] | None = None, + n_structures: int = N_STRUCTURES, +) -> list[dict]: + """ + Load structures from Alexandria database. + + Downloads data if not already cached locally. When multiple files are provided, + structures are pooled together and randomly sampled from the combined set. + + Parameters + ---------- + dimensionality + Either "2D" or "1D". + filenames + List of data filenames to load. If None, uses defaults for dimensionality. + n_structures + Number of structures to load. Default is N_STRUCTURES. + + Returns + ------- + list[dict] + List of structure dictionaries with atoms, mat_id, and reference values. + """ + json_paths = download_all_data(dimensionality, filenames) + + # Load all entries from all files + all_entries = [] + for json_path in json_paths: + with bz2.open(json_path, "rt") as f: + data = json.load(f) + all_entries.extend(data["entries"]) + + if n_structures > len(all_entries): + n_structures = len(all_entries) + print(f"Only {len(all_entries)} structures available, using all of them") + + rng = random.Random(RANDOM_SEED) + selected_indices = sorted(rng.sample(range(len(all_entries)), k=n_structures)) + + adaptor = AseAtomsAdaptor() + structures = [] + + for idx in selected_indices: + entry_dict = all_entries[idx] + entry = ComputedStructureEntry.from_dict(entry_dict) + mat_id = entry.data.get("mat_id", f"struct_{idx}") + + atoms = adaptor.get_atoms(entry.structure) + n_atoms = len(entry.structure) + + struct_data = { + "mat_id": mat_id, + "atoms": atoms, + "ref_energy_per_atom": entry.energy / n_atoms, + } + + # Use appropriate geometric metric based on dimensionality + if dimensionality == "2D": + struct_data["ref_area_per_atom"] = get_area_per_atom(atoms) + else: # 1D + struct_data["ref_length_per_atom"] = get_length_per_atom(atoms) + + structures.append(struct_data) + + return structures + + +def relax_low_dimensional( + atoms: Atoms, + dimensionality: str = "2D", + fmax: float = FMAX, + max_steps: int = MAX_STEPS, +) -> tuple[Atoms | None, bool, float | None]: + """ + Relax low-dimensional structure with cell mask to constrain relaxation. + + Parameters + ---------- + atoms + ASE Atoms object with calculator attached. + dimensionality + Either "2D" or "1D". + fmax + Maximum force tolerance in eV/A. + max_steps + Maximum number of optimization steps. + + Returns + ------- + tuple[Atoms | None, bool, float | None] + Relaxed atoms (or None if failed), convergence status, energy per atom. + """ + try: + converged = False + counter = 0 + # repeat up to 3 times if not converged + while counter < 3 and not converged: + geom_opt = GeomOpt( + struct=atoms, + fmax=fmax, + steps=max_steps, + # Use cell mask to constrain relaxation to appropriate dimensions + filter_kwargs={"mask": CELL_MASKS[dimensionality]}, + calc_kwargs={"default_dtype": "float64"}, + ) + geom_opt.run() + relaxed = geom_opt.struct + # assess forces to determine convergence + max_force = max(relaxed.get_forces().flatten(), key=abs) + if max_force < fmax: + converged = True + counter += 1 + atoms = relaxed + except Exception as e: + print(f"Relaxation failed: {e}") + return None, False, None + + if not converged: + return None, False, None + + energy = relaxed.get_potential_energy() + n_atoms = len(relaxed) + + return relaxed, converged, energy / n_atoms + + +DIMENSIONALITIES = ["2D", "1D"] + + +@pytest.mark.parametrize("mlip", MODELS.items()) +@pytest.mark.parametrize("dimensionality", DIMENSIONALITIES) +def test_low_dimensional_relaxation(mlip: tuple[str, Any], dimensionality: str) -> None: + """ + Run low-dimensional relaxation benchmark. + + Parameters + ---------- + mlip + Tuple of model name and model object. + dimensionality + Either "2D" or "1D". + """ + model_name, model = mlip + calc = model.get_calculator() + + # Load structures (downloads from Alexandria if not cached) + structures = load_structures(dimensionality) + + results = [] + for struct_data in tqdm( + structures, desc=f"{model_name} {dimensionality}", leave=False + ): + atoms = struct_data["atoms"].copy() + # Symmetrize structure before relaxation + refine_symmetry(atoms) + # Set vacuum padding in non-periodic directions + atoms = set_vacuum_padding(atoms, dimensionality) + atoms.calc = copy(calc) + mat_id = struct_data["mat_id"] + + relaxed_atoms, converged, energy_per_atom = relax_low_dimensional( + atoms, dimensionality + ) + + result = { + "mat_id": mat_id, + "dimensionality": dimensionality, + "ref_energy_per_atom": struct_data["ref_energy_per_atom"], + "pred_energy_per_atom": energy_per_atom, + "converged": converged, + } + + # Add dimension-specific geometric metrics + if dimensionality == "2D": + result["ref_area_per_atom"] = struct_data["ref_area_per_atom"] + if relaxed_atoms is not None: + result["pred_area_per_atom"] = get_area_per_atom(relaxed_atoms) + else: + result["pred_area_per_atom"] = None + else: # 1D + result["ref_length_per_atom"] = struct_data["ref_length_per_atom"] + if relaxed_atoms is not None: + result["pred_length_per_atom"] = get_length_per_atom(relaxed_atoms) + else: + result["pred_length_per_atom"] = None + + results.append(result) + + # Save results + out_dir = OUT_PATH / model_name + out_dir.mkdir(parents=True, exist_ok=True) + df = pd.DataFrame(results) + df.to_csv(out_dir / f"results_{dimensionality}.csv", index=False) diff --git a/tests/test_low_dimensional_relaxation.py b/tests/test_low_dimensional_relaxation.py new file mode 100644 index 000000000..17ec100cd --- /dev/null +++ b/tests/test_low_dimensional_relaxation.py @@ -0,0 +1,25 @@ +"""Unit tests for low-dimensional relaxation utilities.""" + +from __future__ import annotations + +from ase import Atoms +import pytest + +from ml_peg.calcs.bulk_crystal.low_dimensional_relaxation.calc_low_dimensional_relaxation import ( # noqa: E501 + get_area_per_atom, + get_length_per_atom, +) + + +def test_get_area_per_atom() -> None: + """Test get_area_per_atom for a simple 2D structure.""" + # 2-atom structure with 3x4 in-plane cell → area = 12, area/atom = 6 + atoms = Atoms("H2", positions=[[0, 0, 0], [1.5, 0, 0]], cell=[3, 4, 20], pbc=True) + assert get_area_per_atom(atoms) == pytest.approx(6.0) + + +def test_get_length_per_atom() -> None: + """Test get_length_per_atom for a simple 1D structure.""" + # 2-atom chain with a=5 Å → length/atom = 2.5 + atoms = Atoms("H2", positions=[[0, 0, 0], [2.5, 0, 0]], cell=[5, 20, 20], pbc=True) + assert get_length_per_atom(atoms) == pytest.approx(2.5)