From fc06321f9083807a0e8ba2a77f7ace6608db3510 Mon Sep 17 00:00:00 2001 From: zhonganr Date: Tue, 3 Feb 2026 19:17:08 +0100 Subject: [PATCH] Add new category containing two interstitial benchmarks --- .gitignore | 3 + docs/source/user_guide/benchmarks/index.rst | 1 + .../user_guide/benchmarks/interstitial.rst | 92 ++++++++ .../interstitial/FE1SIA/analyse_FE1SIA.py | 184 ++++++++++++++++ .../analysis/interstitial/FE1SIA/metrics.yml | 7 + .../interstitial/Relastab/analyse_Relastab.py | 201 ++++++++++++++++++ .../interstitial/Relastab/metrics.yml | 13 ++ ml_peg/app/interstitial/FE1SIA/app_FE1SIA.py | 93 ++++++++ .../app/interstitial/Relastab/app_Relastab.py | 90 ++++++++ ml_peg/app/interstitial/interstitial.yml | 2 + .../calcs/interstitial/FE1SIA/calc_FE1SIA.py | 104 +++++++++ .../interstitial/Relastab/calc_Relastab.py | 67 ++++++ uv.lock | 23 ++ 13 files changed, 880 insertions(+) create mode 100644 docs/source/user_guide/benchmarks/interstitial.rst create mode 100644 ml_peg/analysis/interstitial/FE1SIA/analyse_FE1SIA.py create mode 100644 ml_peg/analysis/interstitial/FE1SIA/metrics.yml create mode 100644 ml_peg/analysis/interstitial/Relastab/analyse_Relastab.py create mode 100644 ml_peg/analysis/interstitial/Relastab/metrics.yml create mode 100644 ml_peg/app/interstitial/FE1SIA/app_FE1SIA.py create mode 100644 ml_peg/app/interstitial/Relastab/app_Relastab.py create mode 100644 ml_peg/app/interstitial/interstitial.yml create mode 100644 ml_peg/calcs/interstitial/FE1SIA/calc_FE1SIA.py create mode 100644 ml_peg/calcs/interstitial/Relastab/calc_Relastab.py diff --git a/.gitignore b/.gitignore index 6b68b80ca..bca42cdf8 100644 --- a/.gitignore +++ b/.gitignore @@ -41,3 +41,6 @@ __pycache__/ ml_peg/app/data/* !ml_peg/app/data/onboarding/ certs/ +calculate_rmsd.py +DB/ +DB.zip diff --git a/docs/source/user_guide/benchmarks/index.rst b/docs/source/user_guide/benchmarks/index.rst index 9f339f4d2..00cda06b9 100644 --- a/docs/source/user_guide/benchmarks/index.rst +++ b/docs/source/user_guide/benchmarks/index.rst @@ -12,3 +12,4 @@ Benchmarks molecular_crystal molecular bulk_crystal + interstitial diff --git a/docs/source/user_guide/benchmarks/interstitial.rst b/docs/source/user_guide/benchmarks/interstitial.rst new file mode 100644 index 000000000..ed0e1514b --- /dev/null +++ b/docs/source/user_guide/benchmarks/interstitial.rst @@ -0,0 +1,92 @@ +============ +Interstitial +============ + +FE1SIA +====== + +Summary +------- + +This benchmark evaluates the formation energies of a single self-interstitial atom (SIA) in a host lattice. The test includes formation energies for distinct configurations within a supercell. + +Metrics +------- + +1. RMSD + +Root Mean Square Deviation of formation energies compared to DFT data. + +The formation energy of a configuration :math:`E_f` is calculated as: + +.. math:: + E_f = E_{config} - \frac{N_{config}}{N_{bulk}} E_{bulk} + +where :math:`E_{config}` is the total energy of the interstitial configuration containing :math:`N_{config}` atoms, and :math:`E_{bulk}` is the energy of the perfect bulk supercell consisting of :math:`N_{bulk}` atoms. + +The reference formation energies are derived from DFT calculations provided in the dataset. + +Computational cost +------------------ + +Low: The geometries are static, requiring only single-point energy calculations for the configurations and the bulk reference. + +Data availability +----------------- + +Input structures: + +* Subset ``formation_energy`` of the DFT dataset. + + * A. Allera, A. M. Goryaeva, P. Lafourcade, J.-B. Maillet, and M.-C. Marinica, + Neighbors map: An efficient atomic descriptor for structural analysis, + Computational Materials Science 231 (2024). + +Reference data: + +* Computed from the subset ``formation_energy`` of the DFT dataset as input structures. + + +Relastab +======== + +Summary +------- + +This benchmark evaluates the ability of models to correctly rank the stability of different interstitial configurations. It focuses on the relative energy ordering of distinct interstitial structures in the dataset. + +Metrics +------- + +1. Kendall Tau + +Kendall rank correlation coefficient (:math:`\tau`): a measure of rank correlation that evaluates the similarity of the orderings of the data. It assesses the number of *concordant* and *discordant* pairs of observations. For every pair of configurations, it checks if the model agrees with the reference on which is more stable. +A value of 1.0 indicates perfect agreement, 0.0 indicates no correlation, and -1.0 indicates perfect inversion. +This metric is sensitive to **pairwise ordering** errors. It is particularly robust for small datasets and focuses strictly on whether the relative stability order is preserved. + +2. Spearman + +Spearman rank correlation coefficient (:math:`\rho`): a non-parametric measure of rank correlation. +It is defined as the Pearson correlation coefficient between the *rank variables*. It converts raw energies to integer ranks and calculates the linear correlation between them. +Like Kendall Tau, values range from -1 to 1. An absolute value of 1 indicates a perfect monotonic function. +While both metrics evaluate ranking, Spearman assesses the general **monotonic relationship**, while Kendall Tau assesses the probability of correct pairwise discrimination. + +Computational cost +------------------ + +Low: Requires single-point energy calculations for each configuration in the dataset. + +Data availability +----------------- + +Input structures: + +* Subset ``relative_stability`` of the DFT dataset. + + * A. Allera, A. M. Goryaeva, P. Lafourcade, J.-B. Maillet, and M.-C. Marinica, + Neighbors map: An efficient atomic descriptor for structural analysis, + Computational Materials Science 231 (2024). + +Reference data: + +* Computed from the subset ``relative_stability`` of the DFT dataset as input structures. diff --git a/ml_peg/analysis/interstitial/FE1SIA/analyse_FE1SIA.py b/ml_peg/analysis/interstitial/FE1SIA/analyse_FE1SIA.py new file mode 100644 index 000000000..5d28ad0ee --- /dev/null +++ b/ml_peg/analysis/interstitial/FE1SIA/analyse_FE1SIA.py @@ -0,0 +1,184 @@ +"""Analyse FE1SIA benchmark.""" + +from __future__ import annotations + +from pathlib import Path + +from ase.io import read, write +import pytest + +from ml_peg.analysis.utils.decorators import build_table, plot_parity +from ml_peg.analysis.utils.utils import load_metrics_config, rmse +from ml_peg.app import APP_ROOT +from ml_peg.calcs import CALCS_ROOT +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models + +MODELS = get_model_names(current_models) +# D3_MODEL_NAMES = build_d3_name_map(MODELS) +D3_MODEL_NAMES = {m: m for m in MODELS} +CALC_PATH = CALCS_ROOT / "interstitial" / "FE1SIA" / "outputs" +OUT_PATH = APP_ROOT / "data" / "interstitial" / "FE1SIA" + +METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") +DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( + METRICS_CONFIG_PATH +) + + +def get_system_names() -> list[str]: + """ + Get list of FE1SIA system names. + + Returns + ------- + list[str] + List of system names. + """ + system_names = [] + # Try to find names from one of the models + for model_name in MODELS: + model_dir = CALC_PATH / model_name + if model_dir.exists(): + xyz_files = sorted(model_dir.glob("*.xyz")) + for xyz in xyz_files: + if xyz.stem != "ref": + system_names.append(xyz.stem) + if system_names: + break + return system_names + + +@pytest.fixture +@plot_parity( + filename=OUT_PATH / "figure_energy.json", + title="FE1SIA Formation Energies", + x_label="Predicted Formation Energy / eV", + y_label="Reference Formation Energy / eV", + hoverdata={ + "System": get_system_names(), + }, +) +def formation_energies() -> dict[str, list]: + """ + Get formation energies for FE1SIA systems. + + Returns + ------- + dict[str, list] + Dictionary of reference and predicted formation energies. + """ + results = {"ref": []} | {mlip: [] for mlip in MODELS} + ref_stored = False + + for model_name in MODELS: + model_dir = CALC_PATH / model_name + if not model_dir.exists(): + continue + + # Load bulk (ref) + # Note: We rely on calc script having produced ref.xyz + bulk_path = model_dir / "ref.xyz" + if not bulk_path.exists(): + # If bulk is missing, we can't compute formation energy properly + print(f"Warning: Bulk reference not found for {model_name}") + continue + + bulk_atoms = read(bulk_path) + e_bulk = bulk_atoms.get_potential_energy() + n_bulk = len(bulk_atoms) + + xyz_files = sorted(model_dir.glob("*.xyz")) + + for xyz_file in xyz_files: + if xyz_file.name == "ref.xyz": + continue + + atoms = read(xyz_file) + e_config = atoms.get_potential_energy() + n_config = len(atoms) + + # Predicted formation energy + # E_f = E_config - (N_config / N_bulk) * E_bulk + pred_fe = e_config - (n_config / n_bulk) * e_bulk + # print(model_name, pred_fe) + + results[model_name].append(pred_fe) + + # Copy individual structure files to app data directory + structs_dir = OUT_PATH / model_name + structs_dir.mkdir(parents=True, exist_ok=True) + write(structs_dir / xyz_file.name, atoms) + + if not ref_stored: + results["ref"].append(atoms.info["ref"]) + + ref_stored = True + + return results + + +@pytest.fixture +def fe_errors(formation_energies) -> dict[str, float]: + """ + Get RMSE for formation energies. + + Parameters + ---------- + formation_energies + Dictionary of reference and predicted formation energies. + + Returns + ------- + dict[str, float] + Dictionary of RMSEs for all models. + """ + results = {} + for model_name in MODELS: + if formation_energies.get(model_name): + results[model_name] = rmse( + formation_energies["ref"], formation_energies[model_name] + ) + # print(f"FE1SIA RMSD for {model_name}: {results[model_name]:.6f} eV") + # print(formation_energies["ref"], formation_energies[model_name]) + else: + results[model_name] = None + return results + + +@pytest.fixture +@build_table( + filename=OUT_PATH / "fe1sia_metrics_table.json", + metric_tooltips=DEFAULT_TOOLTIPS, + thresholds=DEFAULT_THRESHOLDS, + mlip_name_map=D3_MODEL_NAMES, +) +def metrics(fe_errors: dict[str, float]) -> dict[str, dict]: + """ + Get all FE1SIA metrics. + + Parameters + ---------- + fe_errors + RMSE errors for all systems. + + Returns + ------- + dict[str, dict] + Metric names and values for all models. + """ + return { + "RMSD": fe_errors, + } + + +def test_fe1sia_analysis(metrics: dict[str, dict]) -> None: + """ + Run FE1SIA analysis test. + + Parameters + ---------- + metrics + All FE1SIA metrics. + """ + return diff --git a/ml_peg/analysis/interstitial/FE1SIA/metrics.yml b/ml_peg/analysis/interstitial/FE1SIA/metrics.yml new file mode 100644 index 000000000..5e59bd3af --- /dev/null +++ b/ml_peg/analysis/interstitial/FE1SIA/metrics.yml @@ -0,0 +1,7 @@ +metrics: + RMSD: + good: 0.0 + bad: 30.0 + unit: eV + tooltip: "Root Mean Square Deviation of formation energies" + level_of_theory: DFT diff --git a/ml_peg/analysis/interstitial/Relastab/analyse_Relastab.py b/ml_peg/analysis/interstitial/Relastab/analyse_Relastab.py new file mode 100644 index 000000000..c7b9acd6b --- /dev/null +++ b/ml_peg/analysis/interstitial/Relastab/analyse_Relastab.py @@ -0,0 +1,201 @@ +"""Analyse Relastab benchmark.""" + +from __future__ import annotations + +from pathlib import Path + +from ase.io import read, write +import pytest +from scipy.stats import kendalltau, spearmanr + +from ml_peg.analysis.utils.decorators import build_table, plot_parity +from ml_peg.analysis.utils.utils import load_metrics_config +from ml_peg.app import APP_ROOT +from ml_peg.calcs import CALCS_ROOT +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models + +MODELS = get_model_names(current_models) +# D3_MODEL_NAMES = build_d3_name_map(MODELS) +D3_MODEL_NAMES = {m: m for m in MODELS} +CALC_PATH = CALCS_ROOT / "interstitial" / "Relastab" / "outputs" +OUT_PATH = APP_ROOT / "data" / "interstitial" / "Relastab" + +METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") +DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( + METRICS_CONFIG_PATH +) + + +def get_system_names() -> list[str]: + """ + Get list of system names. + + Returns + ------- + list[str] + List of system names. + """ + system_names = [] + + for model_name in MODELS: + model_dir = CALC_PATH / model_name + if model_dir.exists(): + xyz_files = sorted(model_dir.glob("*.xyz")) + system_names = [xyz.stem for xyz in xyz_files] + if system_names: + break + return system_names + + +@pytest.fixture +@plot_parity( + filename=OUT_PATH / "figure_energy.json", + title="Relastab Energies (Shifted)", + x_label="Predicted Energy (Shifted) / eV", + y_label="Reference Energy (Shifted) / eV", + hoverdata={ + "System": get_system_names(), + }, +) +def stability_energies() -> dict[str, list]: + """ + Get energies for Relastab systems. + + Returns + ------- + dict[str, list] + Dictionary of reference and predicted energies. + """ + results = {"ref": []} | {mlip: [] for mlip in MODELS} + ref_stored = False + + for model_name in MODELS: + model_dir = CALC_PATH / model_name + if not model_dir.exists(): + continue + + xyz_files = sorted(model_dir.glob("*.xyz")) + + # Temporary lists to sort together + temp_data = [] + + for xyz_file in xyz_files: + atoms = read(xyz_file) + e_config = atoms.get_potential_energy() + ref_energy = atoms.info.get("ref", None) + + if ref_energy is None: + continue + + temp_data.append( + { + "name": xyz_file.name, + "atoms": atoms, + "pred": e_config, + "ref": ref_energy, + } + ) + + # Copy structure to output for App + structs_dir = OUT_PATH / model_name + structs_dir.mkdir(parents=True, exist_ok=True) + write(structs_dir / xyz_file.name, atoms) + + # To make the plot nicer, mean shifting is usually good for total energies. + + preds = [d["pred"] for d in temp_data] + refs = [d["ref"] for d in temp_data] + + if not preds or not refs: + continue + + # Shift energies by mean to make them comparable on plot + mean_pred = sum(preds) / len(preds) + mean_ref = sum(refs) / len(refs) + + results[model_name] = [p - mean_pred for p in preds] + + if not ref_stored: + results["ref"] = [r - mean_ref for r in refs] + ref_stored = True + + return results + + +@pytest.fixture +def ranking_metrics(stability_energies) -> dict[str, dict[str, float]]: + """ + Compute ranking metrics (KendallTau and Spearman). + + Parameters + ---------- + stability_energies + Dictionary of reference and predicted energies. + + Returns + ------- + dict[str, dict[str, float]] + Dictionary of ranking metrics per model. + """ + results = {} + + ref_values = stability_energies.get("ref", []) + if not ref_values: + return {} + + for model_name in MODELS: + pred_values = stability_energies.get(model_name, []) + if not pred_values or len(pred_values) != len(ref_values): + results[model_name] = {"KendallTau": None, "Spearman": None} + continue + + tau, _ = kendalltau(ref_values, pred_values) + spearman, _ = spearmanr(ref_values, pred_values) + + results[model_name] = {"KendallTau": tau, "Spearman": spearman} + + return results + + +@pytest.fixture +@build_table( + filename=OUT_PATH / "relastab_metrics_table.json", + metric_tooltips=DEFAULT_TOOLTIPS, + thresholds=DEFAULT_THRESHOLDS, + mlip_name_map=D3_MODEL_NAMES, +) +def metrics(ranking_metrics: dict[str, dict[str, float]]) -> dict[str, dict]: + """ + Get all metrics. + + Parameters + ---------- + ranking_metrics + Dictionary of ranking metrics per model. + + Returns + ------- + dict[str, dict] + Metric names and values for all models. + """ + # Reshape for build_table: {MetricName: {ModelName: Value}} + reshaped = {"KendallTau": {}, "Spearman": {}} + + for model, scores in ranking_metrics.items(): + reshaped["KendallTau"][model] = scores.get("KendallTau") + reshaped["Spearman"][model] = scores.get("Spearman") + + return reshaped + + +def test_relastab_analysis(metrics: dict[str, dict]) -> None: + """ + Run Relastab analysis test. + + Parameters + ---------- + metrics + All Relastab metrics. + """ + return diff --git a/ml_peg/analysis/interstitial/Relastab/metrics.yml b/ml_peg/analysis/interstitial/Relastab/metrics.yml new file mode 100644 index 000000000..3277a17cc --- /dev/null +++ b/ml_peg/analysis/interstitial/Relastab/metrics.yml @@ -0,0 +1,13 @@ +metrics: + KendallTau: + good: 1.0 + bad: 0.0 + unit: au + tooltip: "Kendall rank correlation coefficient (1.0 is perfect agreement)" + level_of_theory: DFT + Spearman: + good: 1.0 + bad: 0.0 + unit: au + tooltip: "Spearman rank correlation coefficient" + level_of_theory: DFT diff --git a/ml_peg/app/interstitial/FE1SIA/app_FE1SIA.py b/ml_peg/app/interstitial/FE1SIA/app_FE1SIA.py new file mode 100644 index 000000000..3f274d22b --- /dev/null +++ b/ml_peg/app/interstitial/FE1SIA/app_FE1SIA.py @@ -0,0 +1,93 @@ +"""Run FE1SIA app.""" + +from __future__ import annotations + +from dash import Dash +from dash.html import Div + +from ml_peg.app import APP_ROOT +from ml_peg.app.base_app import BaseApp +from ml_peg.app.utils.build_callbacks import ( + plot_from_table_column, + struct_from_scatter, +) +from ml_peg.app.utils.load import read_plot +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models + +# Get all models +MODELS = get_model_names(current_models) +BENCHMARK_NAME = "FE1SIA Formation Energies" +# Update this URL when documentation is added +DOCS_URL = ( + "https://ddmms.github.io/ml-peg/user_guide/benchmarks/interstitial.html#fe1sia" +) +DATA_PATH = APP_ROOT / "data" / "interstitial" / "FE1SIA" + + +class FE1SIAApp(BaseApp): + """FE1SIA benchmark app layout and callbacks.""" + + def register_callbacks(self) -> None: + """Register callbacks to app.""" + scatter = read_plot( + DATA_PATH / "figure_energy.json", + id=f"{BENCHMARK_NAME}-figure", + ) + + # Assets dir will be parent directory - individual files for each system + # Assuming the first model has all structures + structs_dir = DATA_PATH / MODELS[0] + # Sort files to match the order in the scatter plot (usually sorted by filename) + # Note: In analysis we sorted by glob which is lexicographical. + structs = [ + f"assets/interstitial/FE1SIA/{MODELS[0]}/{struct_file.name}" + for struct_file in sorted(structs_dir.glob("*.xyz")) + ] + + plot_from_table_column( + table_id=self.table_id, + plot_id=f"{BENCHMARK_NAME}-figure-placeholder", + column_to_plot={"RMSD": scatter}, + ) + + struct_from_scatter( + scatter_id=f"{BENCHMARK_NAME}-figure", + struct_id=f"{BENCHMARK_NAME}-struct-placeholder", + structs=structs, + mode="struct", + ) + + +def get_app() -> FE1SIAApp: + """ + Get FE1SIA benchmark app layout and callback registration. + + Returns + ------- + FE1SIAApp + Benchmark layout and callback registration. + """ + return FE1SIAApp( + name=BENCHMARK_NAME, + description="Formation energies of single interstitial atoms.", + docs_url=DOCS_URL, + table_path=DATA_PATH / "fe1sia_metrics_table.json", + extra_components=[ + Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + Div(id=f"{BENCHMARK_NAME}-struct-placeholder"), + ], + ) + + +if __name__ == "__main__": + # Create Dash app + full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent) + + # Construct layout and register callbacks + fe1sia_app = get_app() + full_app.layout = fe1sia_app.layout + fe1sia_app.register_callbacks() + + # Run app + full_app.run(port=8055, debug=True) diff --git a/ml_peg/app/interstitial/Relastab/app_Relastab.py b/ml_peg/app/interstitial/Relastab/app_Relastab.py new file mode 100644 index 000000000..5d5a06b0f --- /dev/null +++ b/ml_peg/app/interstitial/Relastab/app_Relastab.py @@ -0,0 +1,90 @@ +"""Run Relastab app.""" + +from __future__ import annotations + +from dash import Dash +from dash.html import Div + +from ml_peg.app import APP_ROOT +from ml_peg.app.base_app import BaseApp +from ml_peg.app.utils.build_callbacks import ( + plot_from_table_column, + struct_from_scatter, +) +from ml_peg.app.utils.load import read_plot +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models + +# Get all models +MODELS = get_model_names(current_models) +BENCHMARK_NAME = "Relastab Relative Stability" +DOCS_URL = ( + "https://ddmms.github.io/ml-peg/user_guide/benchmarks/interstitial.html#relastab" +) +DATA_PATH = APP_ROOT / "data" / "interstitial" / "Relastab" + + +class RelastabApp(BaseApp): + """Relastab benchmark app layout and callbacks.""" + + def register_callbacks(self) -> None: + """Register callbacks to app.""" + scatter = read_plot( + DATA_PATH / "figure_energy.json", + id=f"{BENCHMARK_NAME}-figure", + ) + + # Assuming first model has structure files + structs_dir = DATA_PATH / MODELS[0] + # Sort glob to match analysis order + structs = [ + f"assets/interstitial/Relastab/{MODELS[0]}/{struct_file.name}" + for struct_file in sorted(structs_dir.glob("*.xyz")) + ] + + plot_from_table_column( + table_id=self.table_id, + plot_id=f"{BENCHMARK_NAME}-figure-placeholder", + column_to_plot={"KendallTau": scatter, "Spearman": scatter}, + ) + + struct_from_scatter( + scatter_id=f"{BENCHMARK_NAME}-figure", + struct_id=f"{BENCHMARK_NAME}-struct-placeholder", + structs=structs, + mode="struct", + ) + + +def get_app() -> RelastabApp: + """ + Get Relastab benchmark app. + + Returns + ------- + RelastabApp + Benchmark layout and callback registration. + """ + return RelastabApp( + name=BENCHMARK_NAME, + description="Relative stability ranking of interstitial Fe configurations.", + docs_url=DOCS_URL, + table_path=DATA_PATH / "relastab_metrics_table.json", + extra_components=[ + Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + Div(id=f"{BENCHMARK_NAME}-struct-placeholder"), + ], + ) + + +if __name__ == "__main__": + # Create Dash app + full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent) + + # Construct layout and register callbacks + rel_app = get_app() + full_app.layout = rel_app.layout + rel_app.register_callbacks() + + # Run app + full_app.run(port=8055, debug=True) diff --git a/ml_peg/app/interstitial/interstitial.yml b/ml_peg/app/interstitial/interstitial.yml new file mode 100644 index 000000000..5dda9652f --- /dev/null +++ b/ml_peg/app/interstitial/interstitial.yml @@ -0,0 +1,2 @@ + title: Interstitial + description: Formation energy of interstitials and relative stability. diff --git a/ml_peg/calcs/interstitial/FE1SIA/calc_FE1SIA.py b/ml_peg/calcs/interstitial/FE1SIA/calc_FE1SIA.py new file mode 100644 index 000000000..fbacf0948 --- /dev/null +++ b/ml_peg/calcs/interstitial/FE1SIA/calc_FE1SIA.py @@ -0,0 +1,104 @@ +"""Run calculations for FE1SIA (formation energy of 1 SIA) tests.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +from ase.io import read, write +import pytest + +from ml_peg.calcs.utils.utils import download_s3_data +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +MODELS = load_models(current_models) + +DATA_PATH = Path(__file__).parent / "data" +OUT_PATH = Path(__file__).parent / "outputs" + +# Reference energy for bulk +# E_BULK = -1054.46260251 + + +@pytest.mark.parametrize("mlip", MODELS.items()) +def test_fe1sia(mlip: tuple[str, Any]) -> None: + """ + Run FE1SIA test. + + Parameters + ---------- + mlip + Name of model use and model to get calculator. + """ + model_name, model = mlip + calc = model.get_calculator() + + data_path = download_s3_data( + key="inputs/interstitial/FE1SIA/DB.zip", + filename="DB.zip", + ) + formation_energy_dir = data_path / "DB" / "formation_energy" + + # Process all *.poscar files + poscar_files = sorted(formation_energy_dir.glob("*.poscar")) + + with open(formation_energy_dir / "ref.poscar") as f: + line1 = f.readline() + tokens = line1.split() + try: + energy_bulk = float(tokens[4]) + except (ValueError, IndexError): + print("Skipping ref.poscar: distinct energy value not found in header.") + energy_bulk = 0.0 # Fallback or break + + # Read n_bulk from line 6. Skip lines 2-5 (4 lines) + for _ in range(4): + f.readline() + line6 = f.readline() + n_bulk = sum(float(x) for x in line6.split()) + + for poscar_path in poscar_files: + # Parse reference energy E from the first line of the POSCAR file + # Format: 111 1 Fe 26 E ... + with open(poscar_path) as f: + line1 = f.readline() + tokens = line1.split() + try: + energy_ref_raw = float(tokens[4]) + except (ValueError, IndexError): + print( + f"Skipping {poscar_path.name}: distinct energy value not found in " + f"header." + ) + continue + + # Read n_config from line 6. Skip lines 2-5 (4 lines) + for _ in range(4): + f.readline() + line6 = f.readline() + n_config = sum(float(x) for x in line6.split()) + + # Calculate reference formation energy + # Formula: E - (N_config / N_bulk) * E_bulk + # For ref.poscar (bulk), formation energy is 0 + if poscar_path.name == "ref.poscar": + ref_formation_energy = 0.0 + else: + ref_formation_energy = energy_ref_raw - (n_config / n_bulk) * energy_bulk + + # Read structure + atoms = read(poscar_path, format="vasp") + + # Run calculation + atoms.calc = calc + atoms.get_potential_energy() + + # Store reference info and system name + atoms.info["ref"] = ref_formation_energy + atoms.info["system"] = poscar_path.stem + + # Write outputs + write_dir = OUT_PATH / model_name + write_dir.mkdir(parents=True, exist_ok=True) + write(write_dir / f"{poscar_path.stem}.xyz", atoms) diff --git a/ml_peg/calcs/interstitial/Relastab/calc_Relastab.py b/ml_peg/calcs/interstitial/Relastab/calc_Relastab.py new file mode 100644 index 000000000..63e9e6c23 --- /dev/null +++ b/ml_peg/calcs/interstitial/Relastab/calc_Relastab.py @@ -0,0 +1,67 @@ +"""Calculate Relastab benchmark.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +from ase.io import read, write +import pytest + +# from ml_peg.calcs import CALCS_ROOT +from ml_peg.calcs.utils.utils import download_s3_data +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +MODELS = load_models(current_models) + +DATA_PATH = Path(__file__).parent / "data" +OUT_PATH = Path(__file__).parent / "outputs" + + +@pytest.mark.parametrize("mlip", MODELS.items()) +def test_relastab(mlip: tuple[str, Any]) -> None: + """ + Run Relastab calculations. + + Parameters + ---------- + mlip + Name of model use and model to get calculator. + """ + model_name, model = mlip + calc = model.get_calculator() + + data_path = download_s3_data( + key="inputs/interstitial/relative_stability/DB.zip", + filename="DB.zip", + ) + relative_stability_dir = data_path / "DB" / "relative_stability" + + # Process all *.poscar files + poscar_files = sorted(relative_stability_dir.glob("*.poscar")) + + for poscar in poscar_files: + # Read structure + atoms = read(poscar, format="vasp") + + # Extract ref energy + with open(poscar) as f: + header = f.readline().strip() + try: + ref_energy = float(header.split()[4]) + atoms.info["ref"] = ref_energy + except (IndexError, ValueError) as e: + print(f"Warning: Could not extract energy from header '{header}': {e}") + atoms.info["ref"] = None + + # Calculate + atoms.calc = calc + + atoms.get_potential_energy() + atoms.info["system"] = poscar.stem + + # Write outputs + write_dir = OUT_PATH / model_name + write_dir.mkdir(parents=True, exist_ok=True) + write(write_dir / f"{poscar.stem}.xyz", atoms) diff --git a/uv.lock b/uv.lock index f828ab629..b1665744d 100644 --- a/uv.lock +++ b/uv.lock @@ -2603,6 +2603,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/35/a8/365059bbcd4572cbc41de17fd5b682be5868b218c3c5479071865cab9078/entrypoints-0.4-py3-none-any.whl", hash = "sha256:f174b5ff827504fd3cd97cc3f8649f3693f51538c7e4bdf3ef002c8429d42f9f", size = 5294, upload-time = "2022-02-02T21:30:26.024Z" }, ] +[[package]] +name = "et-xmlfile" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d3/38/af70d7ab1ae9d4da450eeec1fa3918940a5fafb9055e934af8d6eb0c2313/et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54", size = 17234, upload-time = "2024-10-25T17:25:40.039Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059, upload-time = "2024-10-25T17:25:39.051Z" }, +] + [[package]] name = "eventlet" version = "0.40.4" @@ -5998,6 +6007,7 @@ dependencies = [ { name = "mdanalysis", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-6-ml-peg-grace' and extra == 'extra-6-ml-peg-mattersim') or (extra == 'extra-6-ml-peg-grace' and extra == 'extra-6-ml-peg-uma') or (extra == 'extra-6-ml-peg-mace' and extra == 'extra-6-ml-peg-mattersim') or (extra == 'extra-6-ml-peg-mace' and extra == 'extra-6-ml-peg-uma')" }, { name = "mdanalysis", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-6-ml-peg-grace' and extra == 'extra-6-ml-peg-mattersim') or (extra == 'extra-6-ml-peg-grace' and extra == 'extra-6-ml-peg-uma') or (extra == 'extra-6-ml-peg-mace' and extra == 'extra-6-ml-peg-mattersim') or (extra == 'extra-6-ml-peg-mace' and extra == 'extra-6-ml-peg-uma')" }, { name = "mlipx" }, + { name = "openpyxl" }, { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-6-ml-peg-grace' and extra == 'extra-6-ml-peg-mattersim') or (extra == 'extra-6-ml-peg-grace' and extra == 'extra-6-ml-peg-uma') or (extra == 'extra-6-ml-peg-mace' and extra == 'extra-6-ml-peg-mattersim') or (extra == 'extra-6-ml-peg-mace' and extra == 'extra-6-ml-peg-uma')" }, { name = "scikit-learn", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-6-ml-peg-grace' and extra == 'extra-6-ml-peg-mattersim') or (extra == 'extra-6-ml-peg-grace' and extra == 'extra-6-ml-peg-uma') or (extra == 'extra-6-ml-peg-mace' and extra == 'extra-6-ml-peg-mattersim') or (extra == 'extra-6-ml-peg-mace' and extra == 'extra-6-ml-peg-uma')" }, { name = "tqdm" }, @@ -6078,6 +6088,7 @@ requires-dist = [ { name = "mattersim", marker = "extra == 'mattersim'", specifier = "==1.2.0" }, { name = "mdanalysis" }, { name = "mlipx", specifier = ">=0.1.5,<0.2" }, + { name = "openpyxl" }, { name = "orb-models", marker = "python_full_version < '3.13' and sys_platform != 'win32' and extra == 'orb'", specifier = "==0.5.5" }, { name = "pet-mad", marker = "sys_platform != 'win32' and extra == 'pet-mad'", specifier = "==1.4.4" }, { name = "scikit-learn", specifier = ">=1.7.1" }, @@ -7337,6 +7348,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e3/94/1843518e420fa3ed6919835845df698c7e27e183cb997394e4a670973a65/omegaconf-2.3.0-py3-none-any.whl", hash = "sha256:7b4df175cdb08ba400f45cae3bdcae7ba8365db4d165fc65fd04b050ab63b46b", size = 79500, upload-time = "2022-12-08T20:59:19.686Z" }, ] +[[package]] +name = "openpyxl" +version = "3.1.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "et-xmlfile" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3d/f9/88d94a75de065ea32619465d2f77b29a0469500e99012523b91cc4141cd1/openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050", size = 186464, upload-time = "2024-06-28T14:03:44.161Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c0/da/977ded879c29cbd04de313843e76868e6e13408a94ed6b987245dc7c8506/openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2", size = 250910, upload-time = "2024-06-28T14:03:41.161Z" }, +] + [[package]] name = "opt-einsum" version = "3.4.0"