Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/material_hasher/benchmark/run_disordered.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
import yaml
from pymatgen.core import Structure

logger = logging.getLogger(__name__)

from material_hasher.benchmark.disordered import (
download_disordered_structures,
get_classification_results_dissimilar,
Expand All @@ -25,6 +23,8 @@
from material_hasher.similarity import SIMILARITY_MATCHERS
from material_hasher.types import StructureEquivalenceChecker

logger = logging.getLogger(__name__)

STRUCTURE_CHECKERS = {**HASHERS, **SIMILARITY_MATCHERS}


Expand Down
6 changes: 5 additions & 1 deletion src/material_hasher/benchmark/run_transformations.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,11 @@ def hasher_sensitivity(
else:
raise ValueError("Unknown structure checker")

return matching_hashes / len(transformed_structures) if len(transformed_structures) > 0 else 0
return (
matching_hashes / len(transformed_structures)
if len(transformed_structures) > 0
else 0
)


def mean_sensitivity(
Expand Down
13 changes: 6 additions & 7 deletions src/material_hasher/benchmark/transformations.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
# Copyright 2025 Entalpic
import inspect
import random
from typing import Optional, Union
from typing import Optional

import numpy as np
from pymatgen.core import Structure, SymmOp
from pymatgen.core import Structure
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer

ALL_TEST_CASES = [
Expand All @@ -16,10 +15,10 @@
]

PARAMETERS = {
"gaussian_noise": {"sigma": np.logspace(0.0001, 0.5,15, base=0.0000001)},
"isometric_strain": {"pct": [1,1.05,1.1,1.2,1.5]},
"strain": {"sigma": np.logspace(0.001, 0.5,10, base=0.0000001)},
"translation": {"sigma": np.logspace(0.0001, 0.5,15, base=0.0000001)},
"gaussian_noise": {"sigma": np.logspace(0.0001, 0.5, 15, base=0.0000001)},
"isometric_strain": {"pct": [1, 1.05, 1.1, 1.2, 1.5]},
"strain": {"sigma": np.logspace(0.001, 0.5, 10, base=0.0000001)},
"translation": {"sigma": np.logspace(0.0001, 0.5, 15, base=0.0000001)},
"symm_ops": {"structure_symmetries": ["all_symmetries_found"]},
}

Expand Down
37 changes: 35 additions & 2 deletions src/material_hasher/hasher/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,51 @@
from pymatgen.core import Structure

from material_hasher.types import StructureEquivalenceChecker
from material_hasher.utils import reduce_structure


class HasherBase(ABC, StructureEquivalenceChecker):
"""Abstract class for matching of the hashes between structures."""
"""Abstract class for matching of the hashes between structures.

Parameters
----------
primitive_reduction : bool, optional
Whether to reduce the structures to their primitive cells.
Defaults to False.
"""

def __init__(self, primitive_reduction: bool = False):
self.primitive_reduction = primitive_reduction

@abstractmethod
def get_material_hash(
self,
structure: Structure,
) -> str:
"""Returns a hash of the structure.

Parameters
----------
structure : Structure
Structure to hash.

Returns
-------
str
Hash of the structure.
"""
if self.primitive_reduction:
structure = reduce_structure(structure)
return self._get_material_hash(structure)

@abstractmethod
def _get_material_hash(
self,
structure: Structure,
) -> str:
"""Get the material hash of the structure.

Should be implemented by the subclass.

Parameters
----------
structure : Structure
Expand Down
5 changes: 2 additions & 3 deletions src/material_hasher/hasher/bawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,12 @@ def __init__(
primitive_reduction: bool = False,
shorten_hash: bool = False,
):
super().__init__(primitive_reduction=primitive_reduction)
self.graphing_algorithm = graphing_algorithm
self.bonding_algorithm = bonding_algorithm
self.bonding_kwargs = bonding_kwargs
self.include_composition = include_composition
self.symmetry_labeling = symmetry_labeling
self.primitive_reduction = primitive_reduction
self.shorten_hash = shorten_hash

def get_bawl_materials_data(
Expand Down Expand Up @@ -92,7 +92,6 @@ def get_bawl_materials_data(
structure,
bonding_kwargs=self.bonding_kwargs,
bonding_algorithm=self.bonding_algorithm,
primitive_reduction=self.primitive_reduction,
)
data["bonding_graph_hash"] = get_weisfeiler_lehman_hash(graph)
else:
Expand Down Expand Up @@ -121,7 +120,7 @@ def get_bawl_materials_data(
data["composition"] = structure.composition.reduced_formula.replace(" ", "")
return data

def get_material_hash(self, structure: Structure) -> str:
def _get_material_hash(self, structure: Structure) -> str:
"""Returns a hash of the structure.

Parameters
Expand Down
6 changes: 3 additions & 3 deletions src/material_hasher/hasher/example.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ class SimpleCompositionHasher(HasherBase):
This is just a demo.
"""

def __init__(self) -> None:
pass
def __init__(self, primitive_reduction: bool = False) -> None:
super().__init__(primitive_reduction=primitive_reduction)

def get_material_hash(self, structure: Structure) -> str:
def _get_material_hash(self, structure: Structure) -> str:
"""Returns a hash of the structure.

Parameters
Expand Down
9 changes: 4 additions & 5 deletions src/material_hasher/hasher/pdd.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@


class PointwiseDistanceDistributionHasher(HasherBase):
def __init__(self, cutoff: float = 100.0):
def __init__(self, cutoff: float = 100.0, primitive_reduction: bool = False):
"""
Initialize the PDD Generator.

Parameters:
cutoff (float): Cutoff distance for PDD calculation. Default is 100.
"""
super().__init__(primitive_reduction=primitive_reduction)
self.cutoff = int(cutoff) # Ensure cutoff is an integer

def periodicset_from_structure(self, structure: Structure) -> PeriodicSet:
Expand Down Expand Up @@ -60,7 +61,7 @@ def periodicset_from_structure(self, structure: Structure) -> PeriodicSet:
types=atomic_numbers,
)

def get_material_hash(self, structure: Structure) -> str:
def _get_material_hash(self, structure: Structure) -> str:
"""
Generate a hashed string for a single pymatgen structure based on its
Point-wise Distance Distribution (PDD).
Expand All @@ -77,9 +78,7 @@ def get_material_hash(self, structure: Structure) -> str:
"""
periodic_set = self.periodicset_from_structure(structure)

pdd = PDD(
periodic_set, int(self.cutoff), collapse=False
)
pdd = PDD(periodic_set, int(self.cutoff), collapse=False)

# Round the PDD values to 4 decimal places for numerical stability and consistency.
pdd = np.round(pdd, decimals=4)
Expand Down
6 changes: 3 additions & 3 deletions src/material_hasher/hasher/slices.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@


class SLICESHasher(HasherBase):
def __init__(self):
def __init__(self, primitive_reduction: bool = False):
"""
Initializes the SLICESHasher with the SLICES backend.
"""
super().__init__(primitive_reduction=primitive_reduction)
self.backend = SLICES()

def get_material_hash(self, structure: Structure) -> str:
def _get_material_hash(self, structure: Structure) -> str:
"""
Converts a pymatgen Structure to a SLICES string.

Expand All @@ -32,4 +33,3 @@ def get_material_hash(self, structure: Structure) -> str:
The SLICES string representation of the structure.
"""
return self.backend.structure2SLICES(structure)

15 changes: 2 additions & 13 deletions src/material_hasher/hasher/utils/graph_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,12 @@
from pymatgen.analysis.local_env import EconNN, NearNeighbors
from pymatgen.core import Structure
from networkx import Graph
from moyopy import MoyoDataset
from moyopy.interface import MoyoAdapter
import warnings


def get_structure_graph(
structure: Structure,
bonding_kwargs: dict = {},
bonding_algorithm: NearNeighbors = EconNN,
primitive_reduction: bool = False,
) -> Graph:
"""Method to build networkx graph object based on
bonding algorithm from Pymatgen Structure
Expand All @@ -27,18 +23,11 @@ class to build bonded structure. Defaults to EconNN.
Returns:
Graph: networkx Graph object
"""
assess_structure = (
MoyoAdapter.get_structure(
MoyoDataset(MoyoAdapter.from_structure(structure)).prim_std_cell
)
if primitive_reduction
else structure.copy()
)
structure_graph = StructureGraph.with_local_env_strategy(
structure=assess_structure,
structure=structure,
strategy=bonding_algorithm(**bonding_kwargs),
)
for n, site in zip(range(len(assess_structure)), assess_structure):
for n, site in zip(range(len(structure)), structure):
structure_graph.graph.nodes[n]["specie"] = site.specie.name
for edge in structure_graph.graph.edges:
structure_graph.graph.edges[edge]["voltage"] = structure_graph.graph.edges[
Expand Down
10 changes: 8 additions & 2 deletions src/material_hasher/hasher/utils/symmetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

logger = logging.getLogger(__name__)


class MoyoSymmetry:
"""
This is a wrapper around the functions of the Moyo library.
Expand All @@ -29,7 +30,10 @@ class MoyoSymmetry:
"""

def __init__(
self, symprec: float | None = None, angle_tolerance: float | None = None, setting: str | None = None
self,
symprec: float | None = None,
angle_tolerance: float | None = None,
setting: str | None = None,
):
self.symprec = symprec
self.angle_tolerance = angle_tolerance
Expand Down Expand Up @@ -127,7 +131,9 @@ def __init__(self, aflow_executable: str = None):
f"the binary to be specified via {self.aflow_executable=}.\n"
)

def get_symmetry_label(self, structure: Structure, tolerance: float = 0.1) -> str | None:
def get_symmetry_label(
self, structure: Structure, tolerance: float = 0.1
) -> str | None:
"""
Returns AFLOW label for a given structure
Args:
Expand Down
73 changes: 70 additions & 3 deletions src/material_hasher/similarity/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,22 @@
from pymatgen.core import Structure

from material_hasher.types import StructureEquivalenceChecker
from material_hasher.utils import reduce_structure


class SimilarityMatcherBase(ABC, StructureEquivalenceChecker):
"""Abstract class for similarity matching between structures."""
"""Abstract class for similarity matching between structures.

Parameters
----------
primitive_reduction : bool, optional
Whether to reduce the structures to their primitive cells.
Defaults to False.
"""

def __init__(self, primitive_reduction: bool = False):
self.primitive_reduction = primitive_reduction

@abstractmethod
def get_similarity_score(
self, structure1: Structure, structure2: Structure
) -> float:
Expand All @@ -28,9 +38,32 @@ def get_similarity_score(
float
Similarity score between the two structures.
"""
pass
if self.primitive_reduction:
structure1 = reduce_structure(structure1)
structure2 = reduce_structure(structure2)
return self._get_similarity_score(structure1, structure2)

@abstractmethod
def _get_similarity_score(
self, structure1: Structure, structure2: Structure
) -> float:
"""Returns a similarity score between two structures.

Should be implemented by the subclass.

Parameters
----------
structure1 : Structure
First structure to compare.
structure2 : Structure

Returns
-------
float
Similarity score between the two structures.
"""
pass

def is_equivalent(
self,
structure1: Structure,
Expand All @@ -42,6 +75,40 @@ def is_equivalent(
Uses a threshold to determine equivalence if provided and the algorithm
does not have a built-in threshold.

Parameters
----------
structure1 : Structure
First structure to compare.
structure2 : Structure
Second structure to compare.
threshold : float, optional
Threshold to determine similarity, by default None and the
algorithm's default threshold is used if it exists.

Returns
-------
bool
True if the two structures are similar, False otherwise.
"""
if self.primitive_reduction:
structure1 = reduce_structure(structure1)
structure2 = reduce_structure(structure2)
return self._is_equivalent(structure1, structure2, threshold)

@abstractmethod
def _is_equivalent(
self,
structure1: Structure,
structure2: Structure,
threshold: Optional[float] = None,
) -> bool:
"""Returns True if the two structures are equivalent according to the
implemented algorithm.
Uses a threshold to determine equivalence if provided and the algorithm
does not have a built-in threshold.

Should be implemented by the subclass.

Parameters
----------
structure1 : Structure
Expand Down
Loading