diff --git a/docs/source/user_guide/benchmarks/surfaces.rst b/docs/source/user_guide/benchmarks/surfaces.rst
index 780118c7c..4bd81d9f1 100644
--- a/docs/source/user_guide/benchmarks/surfaces.rst
+++ b/docs/source/user_guide/benchmarks/surfaces.rst
@@ -145,3 +145,45 @@ Reference data:
   * S. P. Ong, W. D. Richards, A. Jain, G. Hautier, M. Kocher, S. Cholia, D. Gunter, V. Chevrier, K. A. Persson, G. Ceder, "Python Materials Genomics (pymatgen): A Robust, Open-Source Python Library for Materials Analysis," Comput. Mater. Sci., 2013, 68, 314–319. https://doi.org/10.1016/j.commatsci.2012.10.028
 
 * Tran et al. relaxed the slabs using spin-polarized PBE calculations performed in VASP, with a cutoff energy of 400 eV.
+
+
+Metal Surface Reconstructions
+================================
+
+Summary
+-------
+
+Performance in the predicting surface energy and representing the surface reconstruction. 
+
+Metrics
+-------
+
+For each slab, a geometry optimization (F_max = 0.05 eV/Å) is performed, where the bottom most layer is fixed. After that three metrics are evaluated:
+
+* Surface energy error (the surface energy is calculated with regard to the bulk and gas phase references)
+
+* Displacement with regard to the reference configuration
+
+* Energetic ranking (using the surface energy)
+
+Computational cost
+------------------
+
+Very low: tests are likely to take half an hour.
+
+Data availability
+-----------------
+
+Input data:
+
+* Stable and metastable Cu(111)O surface reconstructions.
+
+  * Zhu, B., Huang, Y., Lv, J., Huang, W., Lian, Z., Ouyang, R.,  Yang, F. "Dynamic Evolution and Transformation of Copper Oxides on Cu (111)." The Journal of Physical Chemistry C, 2025, 129(29), 13497-13504. https://doi.org/10.1021/acs.jpcc.5c03416
+
+* Stable and metastable Pd/Au reconstructions with C, O and N atoms.
+
+  * Vinogradova, O. V., Reuter, K., Bukas, V. J. "Trends of Pd3Au (111) alloy surface segregation in oxygen, carbon, and nitrogen environments." The Journal of Physical Chemistry C, 2023, 127(45), 22060-22066. https://doi.org/10.1021/acs.jpcc.3c05640
+
+Reference data:
+
+* PBE conjugate gradient geomerty optimizations are performed via VASP, with a cutoff energy of 500 eV.
diff --git a/ml_peg/analysis/surfaces/metal_surface_reconstructions/analyse_metal_surface_reconstructions.py b/ml_peg/analysis/surfaces/metal_surface_reconstructions/analyse_metal_surface_reconstructions.py
new file mode 100644
index 000000000..72404ce33
--- /dev/null
+++ b/ml_peg/analysis/surfaces/metal_surface_reconstructions/analyse_metal_surface_reconstructions.py
@@ -0,0 +1,311 @@
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from ase import units
+from ase.io import read, write
+import numpy as np
+import pytest
+
+from ml_peg.analysis.utils.decorators import build_table, plot_parity
+from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae
+from ml_peg.app import APP_ROOT
+from ml_peg.calcs import CALCS_ROOT
+from ml_peg.models.get_models import get_model_names
+from ml_peg.models.models import current_models
+
+MODELS = get_model_names(current_models)
+print(MODELS)
+#D3_MODEL_NAMES = build_d3_name_map(MODELS)
+CALC_PATH = CALCS_ROOT / "surfaces" / "metal_surfaces" / "outputs"
+OUT_PATH = APP_ROOT / "data" / "surfaces" / "metal_surfaces"
+
+METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
+DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
+    METRICS_CONFIG_PATH
+)
+
+# Unit conversion
+EV_TO_KJ_PER_MOL = units.mol / units.kJ
+
+
+def get_system_names() -> list[str]:
+    """
+    Get list of metal surface system names.
+
+    Returns
+    -------
+    list[str]
+        List of system names from structure files.
+    """
+    system_names = []
+    for model_name in MODELS:
+        model_dir = CALC_PATH / model_name
+        print(model_dir)
+        if model_dir.exists():
+            print('yes')
+            xyz_files = sorted(model_dir.glob("*.xyz"))
+            if xyz_files:
+                for xyz_file in xyz_files:
+                    atoms = read(xyz_file)
+                    system_names.append(atoms.info["system"])
+                
+    return system_names
+
+
+
+@pytest.fixture
+@plot_parity(
+    filename=OUT_PATH / "slab_energies.json",
+    title="Metal Slab Energies",
+    x_label="Predicted Surface Free Energy / meV/Å²",
+    y_label="Reference EneSurface Free Energy / meV/Å²",
+    hoverdata={
+        "System": get_system_names(),
+    },
+)
+def slab_energies() -> dict[str, list]:
+    """
+    Get energies for all slabs systems.
+
+    Returns
+    -------
+    dict[str, list]
+        Dictionary of reference and predicted lattice energies.
+    """
+    results = {"ref": []} | {mlip: [] for mlip in MODELS}
+    ref_stored = False
+
+    ref_mu = {}
+
+    for model_name in MODELS:
+        model_dir = CALC_PATH / model_name
+
+        if not model_dir.exists():
+            continue
+
+        xyz_files = sorted(model_dir.glob("*.xyz"))
+        if not xyz_files:
+            continue
+        
+        model_mu = {}
+        for xyz_file in xyz_files:
+            name = xyz_file.name
+            if name.startswith('bulk') or name.startswith('gas_phase'):
+                structs = read(xyz_file, index=":")[0]
+                model_mu[structs.get_chemical_symbols()[0]] = structs.get_potential_energy()/len(structs)
+                if not ref_stored:
+                    ref_mu[structs.get_chemical_symbols()[0]] = structs.info['DFT_energy']/len(structs)
+
+
+
+        for xyz_file in xyz_files:
+            name = xyz_file.name
+            if not (name.startswith('bulk') or name.startswith('gas_phase')):
+                structs = read(xyz_file, index=":")[0]
+                system = structs.info["system"]
+                symbols = structs.get_chemical_symbols()
+                cell = structs.cell
+                A = np.linalg.norm(np.cross(cell[0], cell[1]))
+
+                results[model_name].append((structs.get_potential_energy()-np.sum([model_mu[s] for s in symbols]))*1000/A)
+
+
+
+                # Copy individual structure files to app data directory
+                structs_dir = OUT_PATH / model_name
+                structs_dir.mkdir(parents=True, exist_ok=True)
+                write(structs_dir / f"{system}.xyz", structs)
+
+                # Store reference energies (only once)
+                if not ref_stored:
+                    results["ref"].append((structs.info["DFT_energy"]-np.sum([ref_mu[s] for s in symbols]))*1000/A)
+
+        ref_stored = True
+
+    return results
+
+
+
+
+
+
+
+@pytest.fixture
+def slab_positions() -> dict[str, list]:
+    """
+    Get energies for all slabs systems.
+
+    Returns
+    -------
+    dict[str, list]
+        Dictionary of reference and predicted lattice energies.
+    """
+    results = {"ref": []} | {mlip: [] for mlip in MODELS}
+    ref_stored = False
+
+    for model_name in MODELS:
+        model_dir = CALC_PATH / model_name
+
+        if not model_dir.exists():
+            continue
+
+        xyz_files = sorted(model_dir.glob("*.xyz"))
+        if not xyz_files:
+            continue
+
+        for xyz_file in xyz_files:
+            name = xyz_file.name
+            if not (name.startswith('bulk') or name.startswith('gas_phase')):
+                structs = read(xyz_file, index=":")[0]
+                z_min = np.min(structs.positions[:,2])
+                moving = structs.positions[:,2]>z_min+0.1
+                results[model_name].append(structs.positions[moving])
+
+
+
+                # Store reference energies (only once)
+                if not ref_stored:
+                    results["ref"].append(structs.arrays["DFT_positions"][moving])
+
+        ref_stored = True
+
+    return results
+
+
+@pytest.fixture
+def ranking_error(slab_energies) -> dict[str, float]:
+    """
+    Get ranking error across all triplets.
+
+    Parameters
+    ----------
+    relative_energies
+        Dictionary of reference and predicted relative energies.
+
+    Returns
+    -------
+    dict[str, float]
+        Dictionary of predicted ranking errors for all models.
+    """
+    print(slab_energies.keys())
+    results = {}
+    ref_min = []
+    ref_max = []
+    for i in range(len(slab_energies["ref"]) // 3):
+        ref_energies = slab_energies["ref"][3 * i : 3 * i + 3]
+        ref_min.append(np.argmin(ref_energies))
+        ref_max.append(np.argmax(ref_energies))
+
+    for model_name in MODELS:
+        if slab_energies[model_name]:
+            pred_min = []
+            pred_max = []
+            for i in range(len(slab_energies[model_name]) // 3):
+                pred_energies = slab_energies[model_name][3 * i : 3 * i + 3]
+                pred_min.append(np.argmin(pred_energies))
+                pred_max.append(np.argmax(pred_energies))
+
+            results[model_name] = (
+                1
+                - 0.5 * np.mean(np.array(ref_min) == np.array(pred_min))
+                - 0.5 * np.mean(np.array(ref_max) == np.array(pred_max))
+            )
+        else:
+            results[model_name] = None
+
+    return results
+
+
+
+
+@pytest.fixture
+def metal_surfaces_errors(slab_energies) -> dict[str, float]:
+    """
+    Get mean absolute error for lattice energies.
+
+    Parameters
+    ----------
+    lattice_energies
+        Dictionary of reference and predicted lattice energies.
+
+    Returns
+    -------
+    dict[str, float]
+        Dictionary of predicted lattice energy errors for all models.
+    """
+    results = {}
+    for model_name in MODELS:
+        if slab_energies[model_name]:
+            results[model_name] = mae(
+                slab_energies["ref"], slab_energies[model_name]
+            )
+        else:
+            results[model_name] = None
+    return results
+
+
+
+@pytest.fixture
+def metal_position_errors(slab_positions) -> dict[str, float]:
+    """
+    Get mean absolute error for lattice energies.
+
+    Parameters
+    ----------
+    lattice_energies
+        Dictionary of reference and predicted lattice energies.
+
+    Returns
+    -------
+    dict[str, float]
+        Dictionary of predicted lattice energy errors for all models.
+    """
+    results = {}
+    for model_name in MODELS:
+        if slab_positions[model_name]:
+            results[model_name] = (
+                np.mean(np.linalg.norm(np.concat(slab_positions["ref"])-np.concat(slab_positions[model_name]),axis=1))
+            )
+        else:
+            results[model_name] = None
+    return results
+
+@pytest.fixture
+@build_table(
+    filename=OUT_PATH / "metal_surfaces_metrics_table.json",
+    metric_tooltips=DEFAULT_TOOLTIPS,
+    thresholds=DEFAULT_THRESHOLDS,
+    mlip_name_map=None,
+)
+def metrics(metal_surfaces_errors: dict[str, float], metal_position_errors: dict[str, float], ranking_error:  dict[str, float]) -> dict[str, dict]:
+    """
+    Get all metal surface metrics.
+
+    Parameters
+    ----------
+    metal_surfaces_errors
+        Mean absolute errors for all systems.
+
+    Returns
+    -------
+    dict[str, dict]
+        Metric names and values for all models.
+    """
+    return {
+        "MAE": metal_surfaces_errors,
+        "Displacement": metal_position_errors,
+        "Ranking Error": ranking_error,
+    }
+
+def test_metal_surfaces(metrics: dict[str, dict]) -> None:
+    """
+    Run metal surface test.
+
+    Parameters
+    ----------
+    metrics
+        All metal surface metrics.
+    """
+    return
diff --git a/ml_peg/analysis/surfaces/metal_surface_reconstructions/metrics.yml b/ml_peg/analysis/surfaces/metal_surface_reconstructions/metrics.yml
new file mode 100644
index 000000000..6bc9dc4ff
--- /dev/null
+++ b/ml_peg/analysis/surfaces/metal_surface_reconstructions/metrics.yml
@@ -0,0 +1,19 @@
+metrics:
+    MAE:
+        good: 0.0
+        bad: 20.0
+        unit: meV/Å²
+        tooltip: "Mean Absolute Error for all systems"
+        level_of_theory: PBE
+    Displacement:
+        good: 0.0
+        bad: 0.1
+        unit: Å
+        tooltip: "Mean Atom displacement"
+        level_of_theory: PBE
+    Ranking Error:
+        good: 0.0
+        bad: 1.0
+        unit: null
+        tooltip: "Error in ranking stability across neighboring triplets"
+        level_of_theory: PBE
\ No newline at end of file
diff --git a/ml_peg/app/surfaces/metal_surface_reconstructions/app_metal_surface_reconstructions.py b/ml_peg/app/surfaces/metal_surface_reconstructions/app_metal_surface_reconstructions.py
new file mode 100644
index 000000000..58162a487
--- /dev/null
+++ b/ml_peg/app/surfaces/metal_surface_reconstructions/app_metal_surface_reconstructions.py
@@ -0,0 +1,82 @@
+
+
+"""Run Metal surface reconstructions app."""
+
+from __future__ import annotations
+
+from dash import Dash
+from dash.html import Div
+
+from ml_peg.app import APP_ROOT
+from ml_peg.app.base_app import BaseApp
+from ml_peg.app.utils.build_callbacks import (
+    plot_from_table_column,
+    struct_from_scatter,
+)
+from ml_peg.app.utils.load import read_plot
+from ml_peg.models.get_models import get_model_names
+from ml_peg.models.models import current_models
+
+# Get all models
+MODELS = get_model_names(current_models)
+BENCHMARK_NAME = "Metal Surfaces"
+DOCS_URL = (
+    "https://ddmms.github.io/ml-peg/user_guide/benchmarks/surfaces.html#metal_surfaces"
+)
+DATA_PATH = APP_ROOT / "data" / "surfaces" / "metal_surfaces"
+
+
+
+class Metal_surface_reconstructions(BaseApp):
+    """Metal surface reconstructions benchmark app layout and callbacks."""
+
+    def register_callbacks(self) -> None:
+        """Register callbacks to app."""
+        scatter = read_plot(
+            DATA_PATH / "slab_energies.json",
+            id=f"{BENCHMARK_NAME}-figure",
+        )
+
+        # Assets dir will be parent directory - individual files for each system
+        structs_dir = DATA_PATH / MODELS[0]
+        structs = [
+            f"assets/surfaces/metal_surfaces/{MODELS[0]}/{struct_file.stem}.xyz"
+            for struct_file in sorted(structs_dir.glob("*.xyz"))
+        ]
+
+        plot_from_table_column(
+            table_id=self.table_id,
+            plot_id=f"{BENCHMARK_NAME}-figure-placeholder",
+            column_to_plot={"MAE": scatter, "Displacement": scatter},
+        )
+        
+
+        struct_from_scatter(
+            scatter_id=f"{BENCHMARK_NAME}-figure",
+            struct_id=f"{BENCHMARK_NAME}-struct-placeholder",
+            structs=structs,
+            mode="struct",
+        )
+
+
+
+def get_app() -> Metal_surface_reconstructions:
+    """
+    Get Metal surface reconstructions benchmark app layout and callback registration.
+
+    Returns
+    -------
+    Metal surface reconstructions App
+        Benchmark layout and callback registration.
+    """
+    return Metal_surface_reconstructions(
+        name=BENCHMARK_NAME,
+        description="Energies for two surface reconstuctions.",
+        docs_url=DOCS_URL,
+        table_path=DATA_PATH / "metal_surfaces_metrics_table.json",
+        extra_components=[
+            Div(id=f"{BENCHMARK_NAME}-figure-placeholder"),
+            Div(id=f"{BENCHMARK_NAME}-struct-placeholder"),
+        ],
+    )
+
diff --git a/ml_peg/calcs/surfaces/metal_surface_reconstructions/calc_metal_surface_reconstructions.py b/ml_peg/calcs/surfaces/metal_surface_reconstructions/calc_metal_surface_reconstructions.py
new file mode 100644
index 000000000..b148515e3
--- /dev/null
+++ b/ml_peg/calcs/surfaces/metal_surface_reconstructions/calc_metal_surface_reconstructions.py
@@ -0,0 +1,80 @@
+
+"""Run calculations for metal surface tests."""
+
+from __future__ import annotations
+
+from copy import copy
+from pathlib import Path
+from typing import Any
+
+from ase import units
+from ase.io import read, write
+from ase.optimize import BFGS
+from ase.constraints import FixAtoms
+
+import numpy as np
+import pytest
+
+from ml_peg.calcs.utils.utils import download_s3_data
+from ml_peg.models.get_models import load_models
+from ml_peg.models.models import current_models
+
+MODELS = load_models(current_models)
+
+DATA_PATH = Path(__file__).parent / "data"
+OUT_PATH = Path(__file__).parent / "outputs"
+
+# Unit conversion
+EV_TO_KJ_PER_MOL = units.mol / units.kJ
+
+@pytest.mark.parametrize("mlip", MODELS.items())
+def test_lattice_energy(mlip: tuple[str, Any]) -> None:
+    """
+    Run Metal surface reconstructions lattice energy test.
+
+    Parameters
+    ----------
+    mlip
+        Name of model use and model to get calculator.
+    """
+    model_name, model = mlip
+    calc = model.get_calculator()
+
+
+    # Download metal_surface_reconstructions dataset
+    surface_configurations = (
+       download_s3_data(
+           key="inputs/surfaces/metal_surface_reconstructions/metal_surface_reconstructions.zip",
+           filename="metal_surface_reconstructions.zip",
+       )
+       / "metal_surface_reconstructions"
+    )
+
+
+
+    with open(surface_configurations / "list") as f:
+        systems = f.read().splitlines()
+
+    for system in systems:
+        slab = read(surface_configurations / f"{system}.xyz")
+        slab.info["system"] = system
+        slab.calc = calc
+        
+        if not (system.startswith('gas_phase') and system.startswith('bulk')):
+            z_min = np.min(slab.positions[:,2])
+            c = FixAtoms(indices=[at.index for at in slab if at.position[2] < (z_min+0.1)])
+            slab.set_constraint(c)
+
+        if system.startswith('gas_phase'):
+            opt = BFGS(slab)
+            opt.run(fmax=0.01)
+        elif not system.startswith('bulk'):
+            opt = BFGS(slab)
+            opt.run(fmax=0.05,steps=500)
+
+        slab.get_potential_energy()
+
+        # Write output structures
+        write_dir = OUT_PATH / model_name
+        write_dir.mkdir(parents=True, exist_ok=True)
+        write(write_dir / f"{system}.xyz", slab)