diff --git a/src/material_hasher/benchmark/run_disordered.py b/src/material_hasher/benchmark/run_disordered.py index 151b5a8..8d8357c 100644 --- a/src/material_hasher/benchmark/run_disordered.py +++ b/src/material_hasher/benchmark/run_disordered.py @@ -13,8 +13,6 @@ import yaml from pymatgen.core import Structure -logger = logging.getLogger(__name__) - from material_hasher.benchmark.disordered import ( download_disordered_structures, get_classification_results_dissimilar, @@ -25,6 +23,8 @@ from material_hasher.similarity import SIMILARITY_MATCHERS from material_hasher.types import StructureEquivalenceChecker +logger = logging.getLogger(__name__) + STRUCTURE_CHECKERS = {**HASHERS, **SIMILARITY_MATCHERS} diff --git a/src/material_hasher/benchmark/run_transformations.py b/src/material_hasher/benchmark/run_transformations.py index d6c0805..ea5f81f 100644 --- a/src/material_hasher/benchmark/run_transformations.py +++ b/src/material_hasher/benchmark/run_transformations.py @@ -232,7 +232,11 @@ def hasher_sensitivity( else: raise ValueError("Unknown structure checker") - return matching_hashes / len(transformed_structures) if len(transformed_structures) > 0 else 0 + return ( + matching_hashes / len(transformed_structures) + if len(transformed_structures) > 0 + else 0 + ) def mean_sensitivity( diff --git a/src/material_hasher/benchmark/transformations.py b/src/material_hasher/benchmark/transformations.py index 40d1cb1..5b4ff61 100644 --- a/src/material_hasher/benchmark/transformations.py +++ b/src/material_hasher/benchmark/transformations.py @@ -1,10 +1,9 @@ # Copyright 2025 Entalpic import inspect -import random -from typing import Optional, Union +from typing import Optional import numpy as np -from pymatgen.core import Structure, SymmOp +from pymatgen.core import Structure from pymatgen.symmetry.analyzer import SpacegroupAnalyzer ALL_TEST_CASES = [ @@ -16,10 +15,10 @@ ] PARAMETERS = { - "gaussian_noise": {"sigma": np.logspace(0.0001, 0.5,15, base=0.0000001)}, - "isometric_strain": {"pct": [1,1.05,1.1,1.2,1.5]}, - "strain": {"sigma": np.logspace(0.001, 0.5,10, base=0.0000001)}, - "translation": {"sigma": np.logspace(0.0001, 0.5,15, base=0.0000001)}, + "gaussian_noise": {"sigma": np.logspace(0.0001, 0.5, 15, base=0.0000001)}, + "isometric_strain": {"pct": [1, 1.05, 1.1, 1.2, 1.5]}, + "strain": {"sigma": np.logspace(0.001, 0.5, 10, base=0.0000001)}, + "translation": {"sigma": np.logspace(0.0001, 0.5, 15, base=0.0000001)}, "symm_ops": {"structure_symmetries": ["all_symmetries_found"]}, } diff --git a/src/material_hasher/hasher/base.py b/src/material_hasher/hasher/base.py index 21fdebf..5fdddce 100644 --- a/src/material_hasher/hasher/base.py +++ b/src/material_hasher/hasher/base.py @@ -6,18 +6,51 @@ from pymatgen.core import Structure from material_hasher.types import StructureEquivalenceChecker +from material_hasher.utils import reduce_structure class HasherBase(ABC, StructureEquivalenceChecker): - """Abstract class for matching of the hashes between structures.""" + """Abstract class for matching of the hashes between structures. + + Parameters + ---------- + primitive_reduction : bool, optional + Whether to reduce the structures to their primitive cells. + Defaults to False. + """ + + def __init__(self, primitive_reduction: bool = False): + self.primitive_reduction = primitive_reduction - @abstractmethod def get_material_hash( self, structure: Structure, ) -> str: """Returns a hash of the structure. + Parameters + ---------- + structure : Structure + Structure to hash. + + Returns + ------- + str + Hash of the structure. + """ + if self.primitive_reduction: + structure = reduce_structure(structure) + return self._get_material_hash(structure) + + @abstractmethod + def _get_material_hash( + self, + structure: Structure, + ) -> str: + """Get the material hash of the structure. + + Should be implemented by the subclass. + Parameters ---------- structure : Structure diff --git a/src/material_hasher/hasher/bawl.py b/src/material_hasher/hasher/bawl.py index 148c320..3b2f9ab 100644 --- a/src/material_hasher/hasher/bawl.py +++ b/src/material_hasher/hasher/bawl.py @@ -55,12 +55,12 @@ def __init__( primitive_reduction: bool = False, shorten_hash: bool = False, ): + super().__init__(primitive_reduction=primitive_reduction) self.graphing_algorithm = graphing_algorithm self.bonding_algorithm = bonding_algorithm self.bonding_kwargs = bonding_kwargs self.include_composition = include_composition self.symmetry_labeling = symmetry_labeling - self.primitive_reduction = primitive_reduction self.shorten_hash = shorten_hash def get_bawl_materials_data( @@ -92,7 +92,6 @@ def get_bawl_materials_data( structure, bonding_kwargs=self.bonding_kwargs, bonding_algorithm=self.bonding_algorithm, - primitive_reduction=self.primitive_reduction, ) data["bonding_graph_hash"] = get_weisfeiler_lehman_hash(graph) else: @@ -121,7 +120,7 @@ def get_bawl_materials_data( data["composition"] = structure.composition.reduced_formula.replace(" ", "") return data - def get_material_hash(self, structure: Structure) -> str: + def _get_material_hash(self, structure: Structure) -> str: """Returns a hash of the structure. Parameters diff --git a/src/material_hasher/hasher/example.py b/src/material_hasher/hasher/example.py index f11b433..709a5e5 100644 --- a/src/material_hasher/hasher/example.py +++ b/src/material_hasher/hasher/example.py @@ -10,10 +10,10 @@ class SimpleCompositionHasher(HasherBase): This is just a demo. """ - def __init__(self) -> None: - pass + def __init__(self, primitive_reduction: bool = False) -> None: + super().__init__(primitive_reduction=primitive_reduction) - def get_material_hash(self, structure: Structure) -> str: + def _get_material_hash(self, structure: Structure) -> str: """Returns a hash of the structure. Parameters diff --git a/src/material_hasher/hasher/pdd.py b/src/material_hasher/hasher/pdd.py index 555f3e6..9a6043d 100644 --- a/src/material_hasher/hasher/pdd.py +++ b/src/material_hasher/hasher/pdd.py @@ -9,13 +9,14 @@ class PointwiseDistanceDistributionHasher(HasherBase): - def __init__(self, cutoff: float = 100.0): + def __init__(self, cutoff: float = 100.0, primitive_reduction: bool = False): """ Initialize the PDD Generator. Parameters: cutoff (float): Cutoff distance for PDD calculation. Default is 100. """ + super().__init__(primitive_reduction=primitive_reduction) self.cutoff = int(cutoff) # Ensure cutoff is an integer def periodicset_from_structure(self, structure: Structure) -> PeriodicSet: @@ -60,7 +61,7 @@ def periodicset_from_structure(self, structure: Structure) -> PeriodicSet: types=atomic_numbers, ) - def get_material_hash(self, structure: Structure) -> str: + def _get_material_hash(self, structure: Structure) -> str: """ Generate a hashed string for a single pymatgen structure based on its Point-wise Distance Distribution (PDD). @@ -77,9 +78,7 @@ def get_material_hash(self, structure: Structure) -> str: """ periodic_set = self.periodicset_from_structure(structure) - pdd = PDD( - periodic_set, int(self.cutoff), collapse=False - ) + pdd = PDD(periodic_set, int(self.cutoff), collapse=False) # Round the PDD values to 4 decimal places for numerical stability and consistency. pdd = np.round(pdd, decimals=4) diff --git a/src/material_hasher/hasher/slices.py b/src/material_hasher/hasher/slices.py index 69ddc3c..4a5f808 100644 --- a/src/material_hasher/hasher/slices.py +++ b/src/material_hasher/hasher/slices.py @@ -11,13 +11,14 @@ class SLICESHasher(HasherBase): - def __init__(self): + def __init__(self, primitive_reduction: bool = False): """ Initializes the SLICESHasher with the SLICES backend. """ + super().__init__(primitive_reduction=primitive_reduction) self.backend = SLICES() - def get_material_hash(self, structure: Structure) -> str: + def _get_material_hash(self, structure: Structure) -> str: """ Converts a pymatgen Structure to a SLICES string. @@ -32,4 +33,3 @@ def get_material_hash(self, structure: Structure) -> str: The SLICES string representation of the structure. """ return self.backend.structure2SLICES(structure) - diff --git a/src/material_hasher/hasher/utils/graph_structure.py b/src/material_hasher/hasher/utils/graph_structure.py index 58cc648..349841d 100644 --- a/src/material_hasher/hasher/utils/graph_structure.py +++ b/src/material_hasher/hasher/utils/graph_structure.py @@ -3,16 +3,12 @@ from pymatgen.analysis.local_env import EconNN, NearNeighbors from pymatgen.core import Structure from networkx import Graph -from moyopy import MoyoDataset -from moyopy.interface import MoyoAdapter -import warnings def get_structure_graph( structure: Structure, bonding_kwargs: dict = {}, bonding_algorithm: NearNeighbors = EconNN, - primitive_reduction: bool = False, ) -> Graph: """Method to build networkx graph object based on bonding algorithm from Pymatgen Structure @@ -27,18 +23,11 @@ class to build bonded structure. Defaults to EconNN. Returns: Graph: networkx Graph object """ - assess_structure = ( - MoyoAdapter.get_structure( - MoyoDataset(MoyoAdapter.from_structure(structure)).prim_std_cell - ) - if primitive_reduction - else structure.copy() - ) structure_graph = StructureGraph.with_local_env_strategy( - structure=assess_structure, + structure=structure, strategy=bonding_algorithm(**bonding_kwargs), ) - for n, site in zip(range(len(assess_structure)), assess_structure): + for n, site in zip(range(len(structure)), structure): structure_graph.graph.nodes[n]["specie"] = site.specie.name for edge in structure_graph.graph.edges: structure_graph.graph.edges[edge]["voltage"] = structure_graph.graph.edges[ diff --git a/src/material_hasher/hasher/utils/symmetry.py b/src/material_hasher/hasher/utils/symmetry.py index 2083610..f28759e 100644 --- a/src/material_hasher/hasher/utils/symmetry.py +++ b/src/material_hasher/hasher/utils/symmetry.py @@ -10,6 +10,7 @@ logger = logging.getLogger(__name__) + class MoyoSymmetry: """ This is a wrapper around the functions of the Moyo library. @@ -29,7 +30,10 @@ class MoyoSymmetry: """ def __init__( - self, symprec: float | None = None, angle_tolerance: float | None = None, setting: str | None = None + self, + symprec: float | None = None, + angle_tolerance: float | None = None, + setting: str | None = None, ): self.symprec = symprec self.angle_tolerance = angle_tolerance @@ -127,7 +131,9 @@ def __init__(self, aflow_executable: str = None): f"the binary to be specified via {self.aflow_executable=}.\n" ) - def get_symmetry_label(self, structure: Structure, tolerance: float = 0.1) -> str | None: + def get_symmetry_label( + self, structure: Structure, tolerance: float = 0.1 + ) -> str | None: """ Returns AFLOW label for a given structure Args: diff --git a/src/material_hasher/similarity/base.py b/src/material_hasher/similarity/base.py index e9c0851..ba5a648 100644 --- a/src/material_hasher/similarity/base.py +++ b/src/material_hasher/similarity/base.py @@ -6,12 +6,22 @@ from pymatgen.core import Structure from material_hasher.types import StructureEquivalenceChecker +from material_hasher.utils import reduce_structure class SimilarityMatcherBase(ABC, StructureEquivalenceChecker): - """Abstract class for similarity matching between structures.""" + """Abstract class for similarity matching between structures. + + Parameters + ---------- + primitive_reduction : bool, optional + Whether to reduce the structures to their primitive cells. + Defaults to False. + """ + + def __init__(self, primitive_reduction: bool = False): + self.primitive_reduction = primitive_reduction - @abstractmethod def get_similarity_score( self, structure1: Structure, structure2: Structure ) -> float: @@ -28,9 +38,32 @@ def get_similarity_score( float Similarity score between the two structures. """ - pass + if self.primitive_reduction: + structure1 = reduce_structure(structure1) + structure2 = reduce_structure(structure2) + return self._get_similarity_score(structure1, structure2) @abstractmethod + def _get_similarity_score( + self, structure1: Structure, structure2: Structure + ) -> float: + """Returns a similarity score between two structures. + + Should be implemented by the subclass. + + Parameters + ---------- + structure1 : Structure + First structure to compare. + structure2 : Structure + + Returns + ------- + float + Similarity score between the two structures. + """ + pass + def is_equivalent( self, structure1: Structure, @@ -42,6 +75,40 @@ def is_equivalent( Uses a threshold to determine equivalence if provided and the algorithm does not have a built-in threshold. + Parameters + ---------- + structure1 : Structure + First structure to compare. + structure2 : Structure + Second structure to compare. + threshold : float, optional + Threshold to determine similarity, by default None and the + algorithm's default threshold is used if it exists. + + Returns + ------- + bool + True if the two structures are similar, False otherwise. + """ + if self.primitive_reduction: + structure1 = reduce_structure(structure1) + structure2 = reduce_structure(structure2) + return self._is_equivalent(structure1, structure2, threshold) + + @abstractmethod + def _is_equivalent( + self, + structure1: Structure, + structure2: Structure, + threshold: Optional[float] = None, + ) -> bool: + """Returns True if the two structures are equivalent according to the + implemented algorithm. + Uses a threshold to determine equivalence if provided and the algorithm + does not have a built-in threshold. + + Should be implemented by the subclass. + Parameters ---------- structure1 : Structure diff --git a/src/material_hasher/similarity/eqv2.py b/src/material_hasher/similarity/eqv2.py index e3b31be..3248d77 100644 --- a/src/material_hasher/similarity/eqv2.py +++ b/src/material_hasher/similarity/eqv2.py @@ -14,6 +14,8 @@ from pymatgen.core import Structure from pymatgen.io.ase import AseAtomsAdaptor +from material_hasher.utils import reduce_structure + from material_hasher.similarity.base import SimilarityMatcherBase HF_MODEL_REPO_ID = os.getenv("HF_MODEL_REPO_ID", "fairchem/OMAT24") @@ -54,7 +56,9 @@ def __init__( model_path: Optional[Union[str, Path]] = None, load_from_hf: bool = True, agg_type: str = "sum", + primitive_reduction: bool = False, ): + super().__init__(primitive_reduction=primitive_reduction) self.model_path = model_path self.load_from_hf = load_from_hf @@ -161,6 +165,9 @@ def get_structure_embeddings(self, structure: Structure) -> np.ndarray: np.ndarray Embeddings of the structure. """ + if self.primitive_reduction: + structure = reduce_structure(structure) + atoms = AseAtomsAdaptor.get_atoms(structure) atoms = self.relax_atoms(atoms) @@ -195,7 +202,7 @@ def get_similarity_embeddings( return np.dot(embeddings1, embeddings2) / (embeddings1_norm * embeddings2_norm) - def get_similarity_score( + def _get_similarity_score( self, structure1: Structure, structure2: Structure ) -> float: """Get the similarity score between two structures. diff --git a/src/material_hasher/similarity/structure_matchers.py b/src/material_hasher/similarity/structure_matchers.py index e39036d..7106db3 100644 --- a/src/material_hasher/similarity/structure_matchers.py +++ b/src/material_hasher/similarity/structure_matchers.py @@ -11,7 +11,8 @@ class PymatgenStructureSimilarity(SimilarityMatcherBase): """Implementation of the StructureMatcherBase using pymatgen's StructureMatcher.""" - def __init__(self, tolerance=0.01): + def __init__(self, tolerance=0.01, primitive_reduction: bool = False): + super().__init__(primitive_reduction=primitive_reduction) self.tolerance = tolerance self.matcher = StructureMatcher(ltol=tolerance) self.structures: List[Structure] = [] @@ -41,7 +42,7 @@ def is_equivalent( """ return self.matcher.fit(structure1, structure2) - def get_similarity_score( + def _get_similarity_score( self, structure1: Structure, structure2: Structure ) -> float: """ diff --git a/src/material_hasher/similarity/utils/utils_experiments.py b/src/material_hasher/similarity/utils/utils_experiments.py index bff85a2..abfd9fc 100644 --- a/src/material_hasher/similarity/utils/utils_experiments.py +++ b/src/material_hasher/similarity/utils/utils_experiments.py @@ -684,14 +684,10 @@ def apply_noise_to_structures_and_compare( rmsd = PymatgenStructureSimilarity().get_similarity_score( initial_structure, noisy_structure ) # rmsd comparison - hash_comparison = BAWLHasher( - shorten_hash=True - ).is_equivalent( + hash_comparison = BAWLHasher(shorten_hash=True).is_equivalent( initial_structure, noisy_structure ) # short hash comparison - full_hash_comparison = BAWLHasher( - shorten_hash=False - ).is_equivalent( + full_hash_comparison = BAWLHasher(shorten_hash=False).is_equivalent( initial_structure, noisy_structure ) # full hash comparison diff --git a/src/material_hasher/utils.py b/src/material_hasher/utils.py new file mode 100644 index 0000000..b8a3bb5 --- /dev/null +++ b/src/material_hasher/utils.py @@ -0,0 +1,14 @@ +# Copyright 2025 Entalpic +from pymatgen.core import Structure + +from moyopy import MoyoDataset +from moyopy.interface import MoyoAdapter + + +def reduce_structure(self, structure: Structure) -> Structure: + """Reduce the structure to its primitive cell.""" + if self.primitive_reduction: + return MoyoAdapter.get_structure( + MoyoDataset(MoyoAdapter.from_structure(structure)).prim_std_cell + ) + return structure