From 88a3e0f3c187cdd8128eb641e2428358e99e8bb5 Mon Sep 17 00:00:00 2001 From: janbridley Date: Sun, 7 Dec 2025 19:37:33 -0500 Subject: [PATCH 01/45] Update documentation for the relationship between CIF grammar and the parser backend --- parsnip/parsnip.py | 40 ++++++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/parsnip/parsnip.py b/parsnip/parsnip.py index 1b599aa5..c497e2ff 100644 --- a/parsnip/parsnip.py +++ b/parsnip/parsnip.py @@ -113,7 +113,7 @@ class CifFile: - """Lightweight, performant parser for CIF files. + """Parser for CIF files. Example ------- @@ -1066,34 +1066,57 @@ def _strip_comments(self, line: str) -> str: def __repr__(self): n_pairs = len(self.pairs) n_tabs = len(self.loops) - return f"CifFile(file={self._fn}) : {n_pairs} data entries, {n_tabs} data loops" + return ( + f"CifFile(file='{self._fn}') : {n_pairs} data entries, {n_tabs} data loops" + ) + # PATTERNS dict is based on the [CIF grammar] specification, adapted to be formally + # regular for performance. Context-free parts of the grammar are handled by + # the `_accumulate_nonsimple_data` pattern, which builds balanced blocks of tokens. + # Context-sensitive components -- primarily relationships between + # column labels and columnar data -- are handled directly in the parser. + # + # [CIF grammar](https://www.iucr.org/resources/cif/spec/version1.1/cifsyntax#bnf) PATTERNS: ClassVar = { + # Matcher for syntactic units "key_value_general": rf"^(_{_CIF_KEY}+?)\s{_PROG_PLUS}({_ANY}+?)$", + # Matcher for the first token of syntactic units "loop_delimiter": rf"(loop_){_WHITESPACE}{_PROG_STAR}([^\n]{_PROG_STAR})", + # Matcher for syntactic units. Currently, these are ignored. "block_delimiter": rf"(data_){_WHITESPACE}{_PROG_STAR}([^\n]{_PROG_STAR})", - "key_list": rf"_{_CIF_KEY}+?(?=\s|$)", # Match space or endline-separated keys + # Matcher for the column labels of a syntactic unit + "key_list": rf"_{_CIF_KEY}+?(?=\s|$)", + # Matcher for syntactic units, which may span multiple lines. + # Note that this allows for backtracking, as and + # values may contain whitespace and quotation marks. "space_delimited_data": ( "(" r";[^;]*?;|" # Non-semicolon data bracketed by semicolons r"'(?:'\S|[^'])*'|" # Data with single quotes not followed by \s - # rf"\"[^\"]{_PROG_STAR}\"|" # Data with double quotes rf"[^';\"\s]{_PROG_STAR}" # Additional non-bracketed data ")" ), - "comment": "#.*?$", # A comment at the end of a line or string + # Matcher for syntactic units + "comment": "#.*?$", + # Matcher for CIF 2.0 -related tokens + # https://www.iucr.org/__data/assets/text_file/0009/112131/CIF2-ENBF.txt "bracket": r"(\[|\])", } """Regex patterns used when parsing files. This dictionary can be modified to change parsing behavior, although doing is not recommended. Changes to this variable are shared across all instances of the class. + + Please refer to the + [CIF grammar](https://www.iucr.org/resources/cif/spec/version1.1/cifsyntax#bnf) for + further details. """ _SYMOP_KEYS = ( "_symmetry_equiv?pos_as_xyz", "_space_group_symop?operation_xyz", ) + """Keys required to extract symmetry operations from CIF & mmCIF files.""" _WYCKOFF_KEYS = ( "_atom_site?fract_x", "_atom_site?fract_y", @@ -1101,4 +1124,9 @@ def __repr__(self): "_atom_site?Cartn_x", "_atom_site?Cartn_y", "_atom_site?Cartn_z", - ) # Only one set should be stored at a time + ) + """Keys required to extract Wyckoff site data from CIF & mmCIF files. + + Note that per the specification, only the *fract_? or *Cartn_? keys may be included + but not both. + """ From 39a939694629fb98f16884d6d10fdbdc79290f87 Mon Sep 17 00:00:00 2001 From: janbridley Date: Sun, 7 Dec 2025 19:40:30 -0500 Subject: [PATCH 02/45] Fix string format --- parsnip/parsnip.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parsnip/parsnip.py b/parsnip/parsnip.py index c497e2ff..f67a71d8 100644 --- a/parsnip/parsnip.py +++ b/parsnip/parsnip.py @@ -122,7 +122,7 @@ class CifFile: >>> from parsnip import CifFile >>> cif = CifFile("example_file.cif") >>> print(cif) - CifFile(file=example_file.cif) : 12 data entries, 2 data loops + CifFile(file='example_file.cif') : 12 data entries, 2 data loops Data entries are accessible via the :attr:`~.pairs` and :attr:`~.loops` attributes: From e283d3949c612c86d7ade5d50caf13f6e3bde721 Mon Sep 17 00:00:00 2001 From: janbridley Date: Sun, 7 Dec 2025 19:44:31 -0500 Subject: [PATCH 03/45] Unix-style wildcards doc --- parsnip/parsnip.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parsnip/parsnip.py b/parsnip/parsnip.py index f67a71d8..9534e0ef 100644 --- a/parsnip/parsnip.py +++ b/parsnip/parsnip.py @@ -296,7 +296,7 @@ def get_from_pairs(self, index: str | Iterable[str]): .. tip:: - This method supports a few unix-style wildcards. Use ``*`` to match any + This method supports unix-style wildcards. Use ``*`` to match any number of any character, and ``?`` to match any single character. If a wildcard matches more than one key, a list is returned for that index. The ordering of array data resulting from wildcard queries matches the From aaf5ad59e15a08be22fe77d573e6d373ec604d76 Mon Sep 17 00:00:00 2001 From: janbridley Date: Sun, 7 Dec 2025 19:49:29 -0500 Subject: [PATCH 04/45] Remove exclamation points --- parsnip/parsnip.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parsnip/parsnip.py b/parsnip/parsnip.py index 9534e0ef..62d271c7 100644 --- a/parsnip/parsnip.py +++ b/parsnip/parsnip.py @@ -141,7 +141,7 @@ class CifFile: .. tip:: See the docs for :attr:`__getitem__` and :attr:`get_from_loops` to query - for data by key or column label! + for data by key or column label. Parameters ---------- From c498b9db180c28229259b7b89dc29371ae8ad58a Mon Sep 17 00:00:00 2001 From: janbridley Date: Sun, 7 Dec 2025 19:58:15 -0500 Subject: [PATCH 05/45] Clean up development guide --- doc/source/development.rst | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/doc/source/development.rst b/doc/source/development.rst index 92ce574c..f30f1386 100644 --- a/doc/source/development.rst +++ b/doc/source/development.rst @@ -5,9 +5,11 @@ Development Guide ================= -All contributions to **parsnip** are welcome! -Developers are invited to contribute to the framework by pull request to the package repository on `GitHub`_, and all users are welcome to provide contributions in the form of **user feedback** and **bug reports**. -We recommend discussing new features in form of a proposal on the issue tracker for the appropriate project prior to development. +All contributions to **parsnip** are welcome! Developers are invited to contribute to +the framework by pull request to the package repository on `GitHub`_, and all users are +welcome to provide contributions in the form of **user feedback** and **bug reports**. +We recommend discussing new features in form of a proposal on the issue tracker for the +appropriate project prior to development. .. _github: https://github.com/glotzerlab/parsnip @@ -16,10 +18,12 @@ General Guidelines All code contributed to **parsnip** must adhere to the following guidelines: -* Hard dependencies (those that end users must install to use **parsnip**) are *strongly* discouraged, and should be avoided where possible. Additional dependencies required by developers (those used to run tests or build docs) are allowed where necessary. +* Hard dependencies (those that end users must install to use **parsnip**) are *strongly* discouraged, and should be avoided where possible. Additional dependencies required by developers (those used to run tests or build docs) are allowed if necessary. * All code should adhere to the source code conventions and satisfy the documentation and testing requirements discussed below. -As portability is a primary feature of **parsnip**, tests are run run on Python versions 3.7 and later. However, first class support should only be expected for versions covered by `NEP 29`_. +As portability is a primary feature of **parsnip**, tests are run run on Python versions +3.9 and later. However, first class support should only be expected for versions covered +by `NEP 29`_. .. _NEP 29: https://numpy.org/neps/nep-0029-deprecation_policy.html @@ -43,9 +47,10 @@ API documentation should be written as part of the docstrings of the package in Docstrings are automatically validated using `pydocstyle `_ whenever the ruff prek hooks are run. The `official documentation `_ is generated from the docstrings using `Sphinx `_. -In addition to API documentation, inline comments are strongly encouraged. -Code should be written as transparently as possible, so the primary goal of documentation should be explaining the algorithms or mathematical concepts underlying the code. -Multiline comments for regex strings may sometimes be necessary. +In addition to API documentation, inline comments are strongly encouraged. Code should +be written as transparently as possible, so the primary goal of documentation should +be explaining the algorithms or mathematical concepts underlying the code. + Building Documentation ^^^^^^^^^^^^^^^^^^^^^^ @@ -65,4 +70,7 @@ All code should include a set of tests which validate correct behavior. All tests should be placed in the ``tests`` folder at the root of the project. In general, most parts of parsnip primarily require `unit tests `_, but where appropriate `integration tests `_ are also welcome. Core functions should be tested against the sample CIF files included in ``tests/sample_data``. Tests in **parsnip** use the `pytest `__ testing framework. +Doctests are automatically integrated with ``pytest`` via +`pytest-doctestplus `_. + To run the tests, simply execute ``pytest`` at the root of the repository. From 6db5a2abf02fa4529d3166cb1ea7df7596a12df6 Mon Sep 17 00:00:00 2001 From: janbridley Date: Sun, 7 Dec 2025 20:08:45 -0500 Subject: [PATCH 06/45] Swap to RST link style for doc comment --- parsnip/parsnip.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/parsnip/parsnip.py b/parsnip/parsnip.py index 62d271c7..d4973bee 100644 --- a/parsnip/parsnip.py +++ b/parsnip/parsnip.py @@ -1104,12 +1104,15 @@ def __repr__(self): } """Regex patterns used when parsing files. - This dictionary can be modified to change parsing behavior, although doing is not - recommended. Changes to this variable are shared across all instances of the class. + .. note:: - Please refer to the - [CIF grammar](https://www.iucr.org/resources/cif/spec/version1.1/cifsyntax#bnf) for - further details. + This dictionary can be modified to change parsing behavior, although doing + is not recommended. Changes to this variable are shared across all + instances of the class. + + Please refer to the `CIF grammar`_ for further details. + + .. _`CIF grammar`: https://www.iucr.org/resources/cif/spec/version1.1/cifsyntax#bnf """ _SYMOP_KEYS = ( From f77c6f8e6b9cce8f921e113e3974196c8f0b10fb Mon Sep 17 00:00:00 2001 From: janbridley Date: Sun, 7 Dec 2025 20:51:02 -0500 Subject: [PATCH 07/45] Update type hints --- parsnip/parsnip.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/parsnip/parsnip.py b/parsnip/parsnip.py index d4973bee..0246c4cb 100644 --- a/parsnip/parsnip.py +++ b/parsnip/parsnip.py @@ -745,7 +745,7 @@ def box(self): return _box_from_lengths_and_angles(*self.read_cell_params(degrees=False)) @property - def lattice_vectors(self): + def lattice_vectors(self) -> np.ndarray[(3, 3), np.float64]: r"""The lattice vectors of the unit cell, with :math:`\vec{a_1}\perp[100]`. .. important:: @@ -777,23 +777,25 @@ def lattice_vectors(self): The lattice vectors of the unit cell :math:`\vec{a_1}, \vec{a_2},\vec{a_3}`. """ lx, ly, lz, xy, xz, yz = self.box - return np.asarray([[lx, xy * ly, xz * lz], [0, ly, lz * yz], [0, 0, lz]]) + return np.asarray( + [[lx, xy * ly, xz * lz], [0, ly, lz * yz], [0, 0, lz]], dtype=np.float64 + ) @property - def loop_labels(self): + def loop_labels(self) -> list[tuple[str, ...]]: """A list of column labels for each data array. This property is equivalent to :code:`[arr.dtype.names for arr in self.loops]`. Returns ------- - list[list[str]]: + list[tuple[str, ...]]: Column labels for :attr:`~.loops`, stored as a nested list of strings. """ return [arr.dtype.names for arr in self.loops] @property - def symops(self): + def symops(self) -> np.ndarray | None: r"""Extract the symmetry operations in a `parsable algebraic form`_. Example @@ -807,13 +809,13 @@ def symops(self): Returns ------- :math:`(N,1)` numpy.ndarray[str]: - An array containing the symmetry operations. + An array containing the symmetry operations, or None if none are found. .. _`parsable algebraic form`: https://www.iucr.org/__data/iucr/cifdic_html/1/cif_core.dic/Ispace_group_symop_operation_xyz.html """ # Only one key is valid in each standard, so we only ever get one match. for key in self.__class__._SYMOP_KEYS: - symops = self.get_from_loops(key) + symops: np.ndarray | None = self.get_from_loops(key) if symops is not None: self._symops_key = self._wildcard_mapping[key] return symops From 5e82c5cdb4f27199a4aa28d43b2f29ae3a2330b7 Mon Sep 17 00:00:00 2001 From: janbridley Date: Mon, 8 Dec 2025 08:46:24 -0500 Subject: [PATCH 08/45] Add type hints where they cannot be inferred --- parsnip/parsnip.py | 4 ++-- parsnip/patterns.py | 12 ++++++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/parsnip/parsnip.py b/parsnip/parsnip.py index 0246c4cb..b0e504bd 100644 --- a/parsnip/parsnip.py +++ b/parsnip/parsnip.py @@ -75,7 +75,7 @@ from fnmatch import fnmatch from importlib.util import find_spec from pathlib import Path -from typing import ClassVar, TextIO +from typing import ClassVar, Literal, TextIO import numpy as np from more_itertools import flatten, peekable @@ -543,7 +543,7 @@ def build_unit_cell( self, n_decimal_places: int = 4, additional_columns: str | Iterable[str] | None = None, - parse_mode: str = "python_float", + parse_mode: Literal["python_float", "sympy"] = "python_float", verbose: bool = False, ): """Reconstruct fractional atomic positions from Wyckoff sites and symops. diff --git a/parsnip/patterns.py b/parsnip/patterns.py index 9b147786..5d945555 100644 --- a/parsnip/patterns.py +++ b/parsnip/patterns.py @@ -13,10 +13,13 @@ import re import sys +from typing import Literal, TypeVar import numpy as np from numpy.typing import ArrayLike +T = TypeVar("T") + ALLOWED_DELIMITERS = [";\n", "'''", '"""'] """Delimiters allowed for nonsimple (multi-line) data entries.""" @@ -46,7 +49,7 @@ def _contains_wildcard(s: str) -> bool: return "?" in s or "*" in s -def _flatten_or_none(ls: list): +def _flatten_or_none(ls: list[T]): """Return the sole element from a list of l=1, None if l=0, else l.""" return None if not ls else ls[0] if len(ls) == 1 else ls @@ -69,8 +72,9 @@ def _safe_eval( x: int | float, y: int | float, z: int | float, - parse_mode: str = "python_float", -): + *, + parse_mode: Literal["python_float", "sympy"] = "python_float", +) -> list[list[float]]: """Attempt to safely evaluate a string of symmetry equivalent positions. Python's ``eval`` is notoriously unsafe. While we could evaluate the entire list at @@ -162,7 +166,7 @@ def cast_array_to_float(arr: ArrayLike | None, dtype: type = np.float32): return np.char.partition(arr, "(")[..., 0].astype(dtype) -def _accumulate_nonsimple_data(data_iter, line=""): +def _accumulate_nonsimple_data(data_iter, line: str = ""): """Accumulate nonsimmple (multi-line) data entries into a single string.""" delimiter_count = 0 while _line_is_continued(data_iter.peek(None)): From 4d0ba4d31d2b5658530063f268a627c7165c276b Mon Sep 17 00:00:00 2001 From: janbridley Date: Mon, 8 Dec 2025 08:48:28 -0500 Subject: [PATCH 09/45] Standardize error handling for gemmi tests --- tests/test_unitcells.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_unitcells.py b/tests/test_unitcells.py index 3229b5e8..ed7f2d3c 100644 --- a/tests/test_unitcells.py +++ b/tests/test_unitcells.py @@ -24,8 +24,8 @@ def _gemmi_read_table(filename, keys): try: return np.array(cif.read_file(filename).sole_block().find(keys)) - except (RuntimeError, ValueError): - pytest.skip("Gemmi failed to read file!") + except (RuntimeError, ValueError) as e: + pytest.skip(f"Gemmi failed to read file: {e}") @all_files_mark From d03273009f4fa6d20bccc87e41dd2f91f8c7efbc Mon Sep 17 00:00:00 2001 From: janbridley Date: Mon, 8 Dec 2025 11:57:51 -0500 Subject: [PATCH 10/45] Clean up unused TODOs --- .github/workflows/pypi-test-and-publish.yaml | 1 - README.rst | 3 --- tests/conftest.py | 4 +--- tests/test_table_reader.py | 2 -- 4 files changed, 1 insertion(+), 9 deletions(-) diff --git a/.github/workflows/pypi-test-and-publish.yaml b/.github/workflows/pypi-test-and-publish.yaml index d36d0306..f9d5a582 100644 --- a/.github/workflows/pypi-test-and-publish.yaml +++ b/.github/workflows/pypi-test-and-publish.yaml @@ -47,7 +47,6 @@ jobs: name: python-package-distributions path: dist/ - name: Publish to TestPyPI - # TODO: https://github.com/pypa/gh-action-pypi-publish/pull/378 uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0 with: repository-url: https://test.pypi.org/legacy/ diff --git a/README.rst b/README.rst index 91b59913..50431f37 100644 --- a/README.rst +++ b/README.rst @@ -6,9 +6,6 @@ .. _header: -.. - TODO: set up Readthedocs, PyPI, and conda-forge - |ReadTheDocs| |PyPI| |conda-forge| diff --git a/tests/conftest.py b/tests/conftest.py index e5fe5e09..7af7c499 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -32,9 +32,7 @@ def pycifrw_or_skip(cif_data): def remove_invalid(s): - """Our parser strips newlines and carriage returns. - TODO: newlines should be retained - """ + """Our parser strips newlines and carriage returns.""" if s is None or s == "": return None return s.replace("\r", "") diff --git a/tests/test_table_reader.py b/tests/test_table_reader.py index 05b1823a..3ac276c1 100644 --- a/tests/test_table_reader.py +++ b/tests/test_table_reader.py @@ -18,8 +18,6 @@ Used to simplify processing of structured arrays. """ -# TODO: update to verify the number and shape of tables is correct - @all_files_mark def test_reads_all_keys(cif_data): From ea0e5155a0050fa04e80aaf914bcf12bb8925372 Mon Sep 17 00:00:00 2001 From: janbridley Date: Mon, 8 Dec 2025 12:01:08 -0500 Subject: [PATCH 11/45] One straggler TODO --- tests/test_unitcells.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_unitcells.py b/tests/test_unitcells.py index ed7f2d3c..34697133 100644 --- a/tests/test_unitcells.py +++ b/tests/test_unitcells.py @@ -35,7 +35,7 @@ def test_read_symops(cif_data): np.testing.assert_array_equal(parsnip_symops, gemmi_symops) -@all_files_mark # TODO: test with conversions to numeric as well +@all_files_mark def test_read_wyckoff_positions(cif_data): parsnip_data = cif_data.file.wyckoff_positions gemmi_data = _gemmi_read_table(cif_data.filename, cif_data.file._wyckoff_site_keys) From 92ef1f3610ec00795a60d468dfc4a0468d826b2c Mon Sep 17 00:00:00 2001 From: janbridley Date: Mon, 8 Dec 2025 12:02:35 -0500 Subject: [PATCH 12/45] Note TODO --- parsnip/parsnip.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/parsnip/parsnip.py b/parsnip/parsnip.py index b0e504bd..4f930c66 100644 --- a/parsnip/parsnip.py +++ b/parsnip/parsnip.py @@ -930,7 +930,8 @@ def _parse(self, data_iter: peekable): if line == "": continue - # TODO: could separate multi-block files in the future ===================== + # While we could separate multi-block files in the future, its actually + # beneficial for us to flatten the file into one block. # block = re.match(self._cpat["block_delimiter"], line.lower().lstrip()) # if block is not None: # continue From 3550eae0022013bb86665799ed68481dc2f9d026 Mon Sep 17 00:00:00 2001 From: janbridley Date: Mon, 8 Dec 2025 12:03:50 -0500 Subject: [PATCH 13/45] Swap note -> attention admonition --- parsnip/parsnip.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parsnip/parsnip.py b/parsnip/parsnip.py index 4f930c66..5a338350 100644 --- a/parsnip/parsnip.py +++ b/parsnip/parsnip.py @@ -1107,7 +1107,7 @@ def __repr__(self): } """Regex patterns used when parsing files. - .. note:: + .. attention:: This dictionary can be modified to change parsing behavior, although doing is not recommended. Changes to this variable are shared across all From 8b0e7ff79bf0ec95bca3ea1038618510be2552aa Mon Sep 17 00:00:00 2001 From: janbridley Date: Mon, 8 Dec 2025 12:04:08 -0500 Subject: [PATCH 14/45] Swap to caution --- parsnip/parsnip.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parsnip/parsnip.py b/parsnip/parsnip.py index 5a338350..7d7c0122 100644 --- a/parsnip/parsnip.py +++ b/parsnip/parsnip.py @@ -1107,7 +1107,7 @@ def __repr__(self): } """Regex patterns used when parsing files. - .. attention:: + .. caution:: This dictionary can be modified to change parsing behavior, although doing is not recommended. Changes to this variable are shared across all From 81ef61a688f6321403ddc264df38db8969bca076 Mon Sep 17 00:00:00 2001 From: janbridley Date: Mon, 8 Dec 2025 12:04:40 -0500 Subject: [PATCH 15/45] One more caution --- parsnip/parsnip.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parsnip/parsnip.py b/parsnip/parsnip.py index 7d7c0122..ea259536 100644 --- a/parsnip/parsnip.py +++ b/parsnip/parsnip.py @@ -873,7 +873,7 @@ def wyckoff_positions(self): def cast_values(self): """Bool : Whether to cast "number-like" values to ints & floats. - .. note:: + .. caution:: When set to `True` after construction, the values are modified in-place. This action cannot be reversed. From f9522b79ef18e7f4342883d2859c2876f9188d89 Mon Sep 17 00:00:00 2001 From: janbridley Date: Mon, 8 Dec 2025 14:38:28 -0500 Subject: [PATCH 16/45] Add GSD requirement for testing --- .github/requirements-3.10.txt | 3 +++ .github/requirements-3.11.txt | 3 +++ .github/requirements-3.12.txt | 3 +++ .github/requirements-3.13.txt | 3 +++ .github/requirements-3.9.txt | 3 +++ tests/requirements.in | 1 + 6 files changed, 16 insertions(+) diff --git a/.github/requirements-3.10.txt b/.github/requirements-3.10.txt index 3bbcf922..1127df4d 100644 --- a/.github/requirements-3.10.txt +++ b/.github/requirements-3.10.txt @@ -14,6 +14,8 @@ fonttools==4.61.0 # via matplotlib gemmi==0.7.4 # via -r tests/requirements.in +gsd==4.0.0 + # via -r tests/requirements.in iniconfig==2.3.0 # via pytest kiwisolver==1.4.9 @@ -29,6 +31,7 @@ numpy==2.2.6 # parsnip-cif (pyproject.toml) # ase # contourpy + # gsd # matplotlib # pycifrw # scipy diff --git a/.github/requirements-3.11.txt b/.github/requirements-3.11.txt index 3d88b04a..6db328f5 100644 --- a/.github/requirements-3.11.txt +++ b/.github/requirements-3.11.txt @@ -12,6 +12,8 @@ fonttools==4.61.0 # via matplotlib gemmi==0.7.4 # via -r tests/requirements.in +gsd==4.2.0 + # via -r tests/requirements.in iniconfig==2.3.0 # via pytest kiwisolver==1.4.9 @@ -27,6 +29,7 @@ numpy==2.3.5 # parsnip-cif (pyproject.toml) # ase # contourpy + # gsd # matplotlib # pycifrw # scipy diff --git a/.github/requirements-3.12.txt b/.github/requirements-3.12.txt index 49206563..bf214391 100644 --- a/.github/requirements-3.12.txt +++ b/.github/requirements-3.12.txt @@ -12,6 +12,8 @@ fonttools==4.61.0 # via matplotlib gemmi==0.7.4 # via -r tests/requirements.in +gsd==4.2.0 + # via -r tests/requirements.in iniconfig==2.3.0 # via pytest kiwisolver==1.4.9 @@ -27,6 +29,7 @@ numpy==2.3.5 # parsnip-cif (pyproject.toml) # ase # contourpy + # gsd # matplotlib # pycifrw # scipy diff --git a/.github/requirements-3.13.txt b/.github/requirements-3.13.txt index 8b0c6978..c3a0374d 100644 --- a/.github/requirements-3.13.txt +++ b/.github/requirements-3.13.txt @@ -12,6 +12,8 @@ fonttools==4.61.0 # via matplotlib gemmi==0.7.4 # via -r tests/requirements.in +gsd==4.2.0 + # via -r tests/requirements.in iniconfig==2.3.0 # via pytest kiwisolver==1.4.9 @@ -27,6 +29,7 @@ numpy==2.3.5 # parsnip-cif (pyproject.toml) # ase # contourpy + # gsd # matplotlib # pycifrw # scipy diff --git a/.github/requirements-3.9.txt b/.github/requirements-3.9.txt index 7f496b4d..cdb23526 100644 --- a/.github/requirements-3.9.txt +++ b/.github/requirements-3.9.txt @@ -14,6 +14,8 @@ fonttools==4.60.1 # via matplotlib gemmi==0.7.4 # via -r tests/requirements.in +gsd==4.0.0 + # via -r tests/requirements.in importlib-resources==6.5.2 # via matplotlib iniconfig==2.1.0 @@ -31,6 +33,7 @@ numpy==2.0.2 # parsnip-cif (pyproject.toml) # ase # contourpy + # gsd # matplotlib # pycifrw # scipy diff --git a/tests/requirements.in b/tests/requirements.in index 84e1c73d..b1880a7d 100644 --- a/tests/requirements.in +++ b/tests/requirements.in @@ -1,5 +1,6 @@ ase gemmi +gsd pycifrw!=5.0.0 pytest pytest-doctestplus From c66d871673d88bb4309df19a5f5b4e9837a20a1d Mon Sep 17 00:00:00 2001 From: janbridley Date: Mon, 8 Dec 2025 14:38:42 -0500 Subject: [PATCH 17/45] Fix typos --- doc/source/quickstart.rst | 2 +- parsnip/_errors.py | 2 +- parsnip/parsnip.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/quickstart.rst b/doc/source/quickstart.rst index a7f2ec37..f2e4c790 100644 --- a/doc/source/quickstart.rst +++ b/doc/source/quickstart.rst @@ -130,7 +130,7 @@ Building Unit Cells CIF files are commonly used to reconstruct atomic positions for a particular crystal. While the example file shown throughout this tutorial corresponds to FCC copper, it only -contains a single atomic position, in constrast to the 4 expected for FCC's +contains a single atomic position, in contrast to the 4 expected for FCC's primitive cell. `parsnip` can reconstruct tilable unit cells from symmetry operations and symmetry-irreducible (Wyckoff) positions contained in the file. diff --git a/parsnip/_errors.py b/parsnip/_errors.py index e307f46b..5bb604bb 100644 --- a/parsnip/_errors.py +++ b/parsnip/_errors.py @@ -8,7 +8,7 @@ def _is_potentially_valid_path(file: str) -> bool: """Check whether a file string could possibly be intended as a path. - This method returns true if the provided string is a valid path, whther the suffix + This method returns true if the provided string is a valid path, whether the suffix ".cif" is contained in the path, if the path links to a file, or if the path's parent is a directory. """ diff --git a/parsnip/parsnip.py b/parsnip/parsnip.py index ea259536..4acf65ed 100644 --- a/parsnip/parsnip.py +++ b/parsnip/parsnip.py @@ -1022,7 +1022,7 @@ def _parse(self, data_iter: peekable): loop_data = np.array([*flatten(loop_data)]).reshape(-1, n_cols) if len(loop_data) == 0: - msg = "Loop data is empy, but n_cols > 0: check CIF file syntax." + msg = "Loop data is empty, but n_cols > 0: check CIF file syntax." _warn_or_err(msg, self._strict) continue dt = _dtype_from_int(max(len(s) for l in loop_data for s in l)) From 2fef24d05e9484866a7c2eef5dcb247dbe6db382 Mon Sep 17 00:00:00 2001 From: janbridley Date: Mon, 8 Dec 2025 14:49:40 -0500 Subject: [PATCH 18/45] Add HOOMD-Blue example and examples toc section --- doc/source/example_simulation.rst | 88 +++++++++++++++++++++++++++++++ doc/source/examples.rst | 18 +++++++ doc/source/index.rst | 1 + 3 files changed, 107 insertions(+) create mode 100644 doc/source/example_simulation.rst create mode 100644 doc/source/examples.rst diff --git a/doc/source/example_simulation.rst b/doc/source/example_simulation.rst new file mode 100644 index 00000000..1fbb07e0 --- /dev/null +++ b/doc/source/example_simulation.rst @@ -0,0 +1,88 @@ +Initializing Molecular Simulations with ``parsnip`` +=================================================== + +When performing molecular simulations of solid materials, it is often useful to +initialize your system in a crystal structure. **parsnip** makes this extremely easy! + +HOOMD-Blue +^^^^^^^^^^ + +HOOMD-Blue can operate directly on array data, so we can move data directly from +**parsnip** to the simulation itself. + +.. testsetup:: + + >>> import os + >>> import numpy as np + >>> if "doc/source" not in os.getcwd(): os.chdir("doc/source") + >>> from parsnip import CifFile + >>> filename = "example_file.cif" + >>> cif = CifFile(filename) + >>> # Mock HOOMD import, as it is not available via pip install + >>> import gsd.hoomd as hoomd + >>> hoomd.Snapshot = hoomd.Frame + >>> snapshot = hoomd.Snapshot() + >>> # Pre-initialize the data arrays, as gsd does not support HOOMD's arr[:] + >>> # pattern for assignment. This data will be overwritten in the doctest. + >>> snapshot.particles.position = np.full((4, 3), -999.0) + >>> snapshot.particles.typeid = np.full((4,), -999) + +.. doctest:: + + >>> import hoomd # doctest: +SKIP + >>> from parsnip import CifFile + >>> filename = "example_file.cif" + >>> cif = CifFile(filename) + + >>> snapshot = hoomd.Snapshot() # doctest: +SKIP + >>> snapshot.particles.N = len(cif.build_unit_cell()) + >>> snapshot.particles.position[:] = cif.build_unit_cell() + >>> snapshot.configuration.box = cif.box + >>> snapshot.particles.types = ["Particle"] + + >>> snapshot.replicate(nx=2, ny=2, nz=3) # 2 x 2 x 3 supercell # doctest: +SKIP + >>> assert snapshot.particles.N == (2 * 2 * 3) * len(pos) # doctest: +SKIP + + +Once the snapshot is constructed, it can be attached to a simulation as follows: + +.. doctest-skip:: + + >>> import hoomd + >>> simulation = hoomd.Simulation(device=hoomd.device.CPU()) + >>> simulation.create_state_from_snapshot(snapshot) + +If we want to extract additional data for our simulation, there are a few extra steps. +In HOOMD-Blue, ``particle.types`` are unique string identifiers that get mapped to +individual particles via the ``particles.typeid`` array. The following code extracts +``_atom_site_type_label`` data and assigns the "Cu" atom type to all particles. For +structures with multiple atom sites, the ``particles.typeid`` array will have nonzero +entries that correspond with other type labels. + + +.. doctest:: + + >>> from collections import defaultdict + + >>> labels, pos = cif.build_unit_cell(additional_columns=["_atom_site_type_symbol"]) + >>> # ... initialize the snapshot's `N`, `box`, and `position` data as above + + >>> particle_type_map = defaultdict(list) + >>> for i, label in enumerate(labels.squeeze(axis=1)): + ... particle_type_map[label].append(i) + >>> particle_type_map["Cu"] # Atoms 1-4 have the type symbol "Cu" + [0, 1, 2, 3] + + >>> # Construct the TypeIDs that map our atomic symbol to the corresponding position + >>> typeid_array = np.ones(len(snapshot.particles.position), dtype=int) + >>> for typeid, label in enumerate(particle_type_map.keys()): + ... typeid_array[particle_type_map[label]] = typeid + >>> snapshot.particles.typeid[:] = typeid_array + >>> snapshot.particles.typeid + array([0, 0, 0, 0]) + + >>> snapshot.particles.types = [str(key) for key in particle_type_map.keys()] + >>> snapshot.particles.types + ['Cu'] + + >>> assert len(snapshot.particles.types) == len(cif["_atom_site_type_symbol"]) diff --git a/doc/source/examples.rst b/doc/source/examples.rst new file mode 100644 index 00000000..e30c2422 --- /dev/null +++ b/doc/source/examples.rst @@ -0,0 +1,18 @@ +.. _examples: + +======== +Examples +======== + +This tutorial provides a complete introduction to **parsnip**, including its place in +the broader simulation and data science ecosystems. We begin by illustrating how +**parsnip** aids in simulation initialization in two common libraries, HOOMD-Blue and +LAMMPS. We then highlight **parsnip**'s class-leading performance reconstructing noisy +experimental data. We conclude with a tutorial on using **parsnip** to generate new +structures from existing data. + + +.. toctree:: + :maxdepth: 2 + + example_simulation diff --git a/doc/source/index.rst b/doc/source/index.rst index 4bccb887..89768538 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -16,6 +16,7 @@ installation quickstart + examples .. toctree:: From e083212b41125b78cf401621d58f0f90b3dbf7f4 Mon Sep 17 00:00:00 2001 From: janbridley Date: Mon, 8 Dec 2025 15:45:25 -0500 Subject: [PATCH 19/45] Add LAMMPS example --- doc/source/example_simulation.rst | 74 +++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/doc/source/example_simulation.rst b/doc/source/example_simulation.rst index 1fbb07e0..93cf775b 100644 --- a/doc/source/example_simulation.rst +++ b/doc/source/example_simulation.rst @@ -86,3 +86,77 @@ entries that correspond with other type labels. ['Cu'] >>> assert len(snapshot.particles.types) == len(cif["_atom_site_type_symbol"]) + + +LAMMPS +^^^^^^ + +In contrast to HOOMD-Blue, LAMMPS typically requires us to write out structure data to +a `LAMMPS Data File`_ before simulations can begin. Although topology data is not +commonly stored in CIF files, **parsnip** makes it simple to reconstruct atomic crystals +in LAMMPS. + + +.. _`LAMMPS Data File`: https://docs.lammps.org/2001/data_format.html + +.. doctest-requires:: lammps + + >>> from collections import defaultdict + + >>> def write_lammps_data(cif: CifFile): + ... """Convert a CIF file into a LAMMPS data file.""" + ... data = "(LAMMPS Data File, written with `parsnip`)\n\n" + ... + ... atomic_positions = cif.build_unit_cell() + ... atom_types = cif["_atom_site_type_symbol"] + ... particle_type_map = defaultdict(list) + ... + ... # Write out the number of atoms and atom types + ... data += f"{len(atomic_positions)} atoms\n" + ... data += f"{len(atom_types)} atom types\n\n" + ... + ... # Write out the box, including the (zero, in this case) tilt factors + ... lx, ly, lz, xy, xz, yz = cif.box + ... data += f"0.0 {lx:.12f} xlo xhi\n" + ... data += f"0.0 {ly:.12f} ylo yhi\n" + ... data += f"0.0 {lz:.12f} zlo zhi\n" + ... data += f"{xy:.12f} {xz:.12f} {yz:.12f} xy xz yz\n\n" + ... + ... # Write out the atomic position data -- note the similarities with typeid! + ... data += f"Atoms # atomic\n" + ... + ... for i, label in enumerate(labels.squeeze(axis=1)): + ... particle_type_map[label].append(i) + ... + ... # Construct the TypeIDs that map our atomic symbol to an index + ... atom_type_array = np.ones(len(atomic_positions), dtype=int) + ... for typeid, label in enumerate(particle_type_map.keys()): + ... atom_type_array[particle_type_map[label]] = typeid + ... + ... for i, coordinate in enumerate(atomic_positions): + ... coord_str = " ".join([f"{xyz:.12f}" for xyz in coordinate]) + ... data += f" {i} {atom_type_array[i]} {coord_str}\n" + ... + ... return data + + >>> with open("fcc.data") as f: # doctest: +SKIP + ... f.write(write_lammps_data(cif)) # doctest: +SKIP + + >>> # Or, simply print the output: + >>> print(write_lammps_data(cif)) + (LAMMPS Data File, written with `parsnip`) + + 4 atoms + 1 atom types + + 0.0 3.600000000000 xlo xhi + 0.0 3.600000000000 ylo yhi + 0.0 3.600000000000 zlo zhi + 0.000000000000 0.000000000000 0.000000000000 xy xz yz + + Atoms # atomic + 0 0 0.000000000000 0.000000000000 0.000000000000 + 1 0 0.000000000000 0.500000000000 0.500000000000 + 2 0 0.500000000000 0.000000000000 0.500000000000 + 3 0 0.500000000000 0.500000000000 0.000000000000 + From fe34e8db9162ede8c0ad334e78acffac5ba03e68 Mon Sep 17 00:00:00 2001 From: janbridley Date: Mon, 8 Dec 2025 16:06:46 -0500 Subject: [PATCH 20/45] Fix LAMMPS example --- doc/source/example_simulation.rst | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/doc/source/example_simulation.rst b/doc/source/example_simulation.rst index 93cf775b..aa8845e6 100644 --- a/doc/source/example_simulation.rst +++ b/doc/source/example_simulation.rst @@ -99,15 +99,17 @@ in LAMMPS. .. _`LAMMPS Data File`: https://docs.lammps.org/2001/data_format.html -.. doctest-requires:: lammps +.. doctest:: >>> from collections import defaultdict >>> def write_lammps_data(cif: CifFile): ... """Convert a CIF file into a LAMMPS data file.""" - ... data = "(LAMMPS Data File, written with `parsnip`)\n\n" + ... data = "(LAMMPS Data File, written with parsnip)\n\n" + ... + ... fractional_coordinates = cif.build_unit_cell() + ... atomic_positions = fractional_coordinates @ cif.lattice_vectors.T ... - ... atomic_positions = cif.build_unit_cell() ... atom_types = cif["_atom_site_type_symbol"] ... particle_type_map = defaultdict(list) ... @@ -123,7 +125,7 @@ in LAMMPS. ... data += f"{xy:.12f} {xz:.12f} {yz:.12f} xy xz yz\n\n" ... ... # Write out the atomic position data -- note the similarities with typeid! - ... data += f"Atoms # atomic\n" + ... data += f"Atoms # atomic\n\n" ... ... for i, label in enumerate(labels.squeeze(axis=1)): ... particle_type_map[label].append(i) @@ -139,12 +141,9 @@ in LAMMPS. ... ... return data - >>> with open("fcc.data") as f: # doctest: +SKIP - ... f.write(write_lammps_data(cif)) # doctest: +SKIP - >>> # Or, simply print the output: >>> print(write_lammps_data(cif)) - (LAMMPS Data File, written with `parsnip`) + (LAMMPS Data File, written with parsnip) 4 atoms 1 atom types @@ -156,7 +155,6 @@ in LAMMPS. Atoms # atomic 0 0 0.000000000000 0.000000000000 0.000000000000 - 1 0 0.000000000000 0.500000000000 0.500000000000 - 2 0 0.500000000000 0.000000000000 0.500000000000 - 3 0 0.500000000000 0.500000000000 0.000000000000 - + 1 0 0.000000000000 1.800000000000 1.800000000000 + 2 0 1.800000000000 0.000000000000 1.800000000000 + 3 0 1.800000000000 1.800000000000 0.000000000000 From 13b282303c40220047f1bc5745817cf0d191771d Mon Sep 17 00:00:00 2001 From: janbridley Date: Mon, 8 Dec 2025 16:07:06 -0500 Subject: [PATCH 21/45] Remove unused line --- doc/source/example_simulation.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/example_simulation.rst b/doc/source/example_simulation.rst index aa8845e6..0605354f 100644 --- a/doc/source/example_simulation.rst +++ b/doc/source/example_simulation.rst @@ -141,7 +141,6 @@ in LAMMPS. ... ... return data - >>> # Or, simply print the output: >>> print(write_lammps_data(cif)) (LAMMPS Data File, written with parsnip) From 9398394733798c2c82cd4541eba696f88d82bb8b Mon Sep 17 00:00:00 2001 From: janbridley Date: Mon, 8 Dec 2025 16:09:50 -0500 Subject: [PATCH 22/45] Add noisy data and fix headers --- doc/source/example_noisy.rst | 2 ++ doc/source/example_simulation.rst | 4 ++-- doc/source/examples.rst | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 doc/source/example_noisy.rst diff --git a/doc/source/example_noisy.rst b/doc/source/example_noisy.rst new file mode 100644 index 00000000..02f22fe7 --- /dev/null +++ b/doc/source/example_noisy.rst @@ -0,0 +1,2 @@ +Reconstrucing Noisy Unit-Cell Data +================================== diff --git a/doc/source/example_simulation.rst b/doc/source/example_simulation.rst index 0605354f..2ceba89a 100644 --- a/doc/source/example_simulation.rst +++ b/doc/source/example_simulation.rst @@ -1,5 +1,5 @@ -Initializing Molecular Simulations with ``parsnip`` -=================================================== +Initializing Molecular Simulations +================================== When performing molecular simulations of solid materials, it is often useful to initialize your system in a crystal structure. **parsnip** makes this extremely easy! diff --git a/doc/source/examples.rst b/doc/source/examples.rst index e30c2422..4a062683 100644 --- a/doc/source/examples.rst +++ b/doc/source/examples.rst @@ -16,3 +16,4 @@ structures from existing data. :maxdepth: 2 example_simulation + example_noisy From 63574597e913a84725e5e4f7ab2a4af759fb01a6 Mon Sep 17 00:00:00 2001 From: janbridley Date: Mon, 8 Dec 2025 16:10:55 -0500 Subject: [PATCH 23/45] Final title --- doc/source/example_noisy.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/example_noisy.rst b/doc/source/example_noisy.rst index 02f22fe7..3e4cb179 100644 --- a/doc/source/example_noisy.rst +++ b/doc/source/example_noisy.rst @@ -1,2 +1,2 @@ -Reconstrucing Noisy Unit-Cell Data -================================== +Reconstrucing Noisy Unit Cells +============================== From a8d80f46eb939d99c3a270b266a001baa1d76530 Mon Sep 17 00:00:00 2001 From: janbridley Date: Mon, 8 Dec 2025 16:28:55 -0500 Subject: [PATCH 24/45] Doctest LAMMPS output --- doc/source/example_simulation.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/doc/source/example_simulation.rst b/doc/source/example_simulation.rst index 2ceba89a..6a1a3d9f 100644 --- a/doc/source/example_simulation.rst +++ b/doc/source/example_simulation.rst @@ -157,3 +157,16 @@ in LAMMPS. 1 0 0.000000000000 1.800000000000 1.800000000000 2 0 1.800000000000 0.000000000000 1.800000000000 3 0 1.800000000000 1.800000000000 0.000000000000 + +.. Validate our output data is (1) valid LAMMPS data and (2) reconstructs our system. +.. testcleanup:: + + >>> from io import StringIO + >>> from ase.io import read + >>> atoms = read(StringIO(write_lammps_data(cif)), format='lammps-data') + + >>> fractional_coordinates = cif.build_unit_cell() + >>> atomic_positions = fractional_coordinates @ cif.lattice_vectors.T + >>> assert len(atomic_positions) == 4 + >>> np.testing.assert_array_equal(atoms.get_atomic_numbers(), [0,0,0,0]) + >>> np.testing.assert_array_equal(np.diag([3.6, 3.6, 3.6]), atoms.get_cell()) From 239dc7e2d4250e9962b5f08779700812453b6e55 Mon Sep 17 00:00:00 2001 From: janbridley Date: Tue, 9 Dec 2025 16:37:21 -0500 Subject: [PATCH 25/45] Add example on numerical precision --- doc/source/example_noisy.rst | 107 +++++++++++++++++++++++++ doc/source/hP3-four-decimal-places.cif | 33 ++++++++ doc/source/hP3.cif | 33 ++++++++ 3 files changed, 173 insertions(+) create mode 100644 doc/source/hP3-four-decimal-places.cif create mode 100644 doc/source/hP3.cif diff --git a/doc/source/example_noisy.rst b/doc/source/example_noisy.rst index 3e4cb179..4058fa22 100644 --- a/doc/source/example_noisy.rst +++ b/doc/source/example_noisy.rst @@ -1,2 +1,109 @@ Reconstrucing Noisy Unit Cells ============================== + +Diffraction experiments and other experimental techniques for quantifying structure +typically offer limited precision in the measurements that can be made. As a result, +the Wyckoff position data recorded in some CIF files -- particularly older ones -- may +make reproduction of the original structure challenging. In this example, we explore how +**parsnip**'s `build_unit_cell` method can be tuned to accurately reproduce structures +with complicated geometries, using alpha-Selenium as an example. + +.. testsetup:: + + >>> import os + >>> import numpy as np + >>> if "doc/source" not in os.getcwd(): os.chdir("doc/source") + + + + +.. literalinclude:: hP3.cif + +Note that the basis positions for alpha-Selenium are provided to five decimal +places of accuracy, while the symmetry operations are provided in a rational form. + +.. doctest:: + + >>> from parsnip import CifFile + >>> cif = CifFile("hP3.cif") + >>> # Let's make sure we reconstruct the unit cell's three atoms + >>> correct_uc = cif.build_unit_cell() + >>> correct_uc + array([[0.2254 , 0. , 0.33333 ], + [0. , 0.2254 , 0.66666333], + [0.7746 , 0.7746 , 0.99999667]]) + >>> site_multiplicity = int(cif["_atom_site_symmetry_multiplicity"].squeeze()) + >>> assert len(correct_uc) == site_multiplicity + +**parsnip**'s default settings are able to correctly reproduce the unit cell -- but +the mismatch between numerical data and the symmetry operation strings can cause issues. +If we truncate the Wyckoff position data, even by one decimal place, the reconstructed +crystal contains duplicate atoms: + +.. literalinclude:: hP3-four-decimal-places.cif + :diff: hP3.cif + +Rebuilding our crystal results in an error: + +.. doctest:: + + >>> lower_precision_cif = CifFile("hP3-four-decimal-places.cif") + >>> uc = lower_precision_cif.build_unit_cell() + >>> uc # doctest: +SKIP + array([[0.2254 , 0. , 0.3333 ], # A + [0. , 0.2254 , 0.66663333], # B + [0.7746 , 0.7746 , 0.99996667], # C + [0.2254 , 0. , 0.33336667], # A + [0. , 0.2254 , 0.6667 ]]) # B + >>> uc.shape == correct_uc.shape # Our unit cell has duplicate atoms! + False + +By default, **parsnip** uses four decimal places of accuracy to reconstruct crystals. +This yields the best overall accuracy (tested with several thousand CIFs), but is not +always the best choice in general. A good rule of thumb is to use one fewer decimal +places than the CIF file contains. This ensures positions are rounded sufficiently to +detect duplicate atoms, and avoids issues in complex structures where Wyckoff positions +may be very close to one another. Making this change results in the correct structure +once again. + + +.. doctest:: + + >>> cif = CifFile("hP3-four-decimal-places.cif") + >>> four_decimal_places = cif.build_unit_cell(n_decimal_places=3) + >>> four_decimal_places + array([[0.2254 , 0. , 0.3333 ], + [0. , 0.2254 , 0.66663333], + [0.7746 , 0.7746 , 0.99996667]]) + >>> assert four_decimal_places.shape == correct_uc.shape + +.. important:: + + Rounding of Wyckoff positions is an intermediate step in the unit cell + reconstruction, and does not negatively impact the accuracy of the returned data. + The unit cell is always returned in the full precision of the input CIF: + + .. doctest:: + + >>> cif = CifFile("hP3-four-decimal-places.cif") + >>> one_decimal_place = cif.build_unit_cell(n_decimal_places=1) + >>> np.testing.assert_array_equal(one_decimal_place, four_decimal_places) + + +In some cases, particularly in structures with many atoms, careful tuning of numerical +precision is not enough to accurately reproduce a crystal. **parsnip** includes a +specialized parser that uses rational arithmetic to correctly compare fractions that +only differ by a few units in last place. To enable this, install the `sympy`_ library +and set ``parse_mode="sympy"`` when building the unit cell. + +.. doctest:: + + >>> cif = CifFile("hP3.cif") + >>> symbolic = cif.build_unit_cell(n_decimal_places=4, parse_mode="sympy") + >>> symbolic + array([[0.2254 , 0. , 0.33333 ], + [0. , 0.2254 , 0.6666633], + [0.7746 , 0.7746 , 0.99999667]]) + >>> assert symbolic.shape == correct_uc.shape + +.. _sympy: https://www.sympy.org/en/index.html diff --git a/doc/source/hP3-four-decimal-places.cif b/doc/source/hP3-four-decimal-places.cif new file mode 100644 index 00000000..43a8156a --- /dev/null +++ b/doc/source/hP3-four-decimal-places.cif @@ -0,0 +1,33 @@ +# A header describing this portion of the file +data_cif_Se-hP3 + +_chemical_name_mineral 'alpha-Selenium' +_chemical_formula_sum 'Se' + +# Key-value pairs describing the unit cell (Å and °) +# Note the cell angles 90-90-120 indicate a hexagonal structure. +_cell_length_a 4.36620 +_cell_length_b 4.36620 +_cell_length_c 4.95360 +_cell_angle_alpha 90.00000 +_cell_angle_beta 90.00000 +_cell_angle_gamma 120.00000 + +loop_ +_space_group_symop_id +_space_group_symop_operation_xyz +1 x,y,z +2 -y,x-y,z+1/3 +3 -x+y,-x,z+2/3 +4 x-y,-y,-z+2/3 +5 y,x,-z +6 -x,-x+y,-z+1/3 + +loop_ +_atom_site_type_symbol +_atom_site_symmetry_multiplicity +_atom_site_Wyckoff_label +_atom_site_fract_x +_atom_site_fract_y +_atom_site_fract_z +Se 3 a 0.2254 0.0000 0.3333 diff --git a/doc/source/hP3.cif b/doc/source/hP3.cif new file mode 100644 index 00000000..5bc6f3d1 --- /dev/null +++ b/doc/source/hP3.cif @@ -0,0 +1,33 @@ +# A header describing this portion of the file +data_cif_Se-hP3 + +_chemical_name_mineral 'alpha-Selenium' +_chemical_formula_sum 'Se' + +# Key-value pairs describing the unit cell (Å and °) +# Note the cell angles 90-90-120 indicate a hexagonal structure. +_cell_length_a 4.36620 +_cell_length_b 4.36620 +_cell_length_c 4.95360 +_cell_angle_alpha 90.00000 +_cell_angle_beta 90.00000 +_cell_angle_gamma 120.00000 + +loop_ +_space_group_symop_id +_space_group_symop_operation_xyz +1 x,y,z +2 -y,x-y,z+1/3 +3 -x+y,-x,z+2/3 +4 x-y,-y,-z+2/3 +5 y,x,-z +6 -x,-x+y,-z+1/3 + +loop_ +_atom_site_type_symbol +_atom_site_symmetry_multiplicity +_atom_site_Wyckoff_label +_atom_site_fract_x +_atom_site_fract_y +_atom_site_fract_z +Se 3 a 0.22540 0.00000 0.33333 From 81b07715c006e4aba1bff4753b43c91e0a3bf71f Mon Sep 17 00:00:00 2001 From: janbridley Date: Tue, 9 Dec 2025 16:41:35 -0500 Subject: [PATCH 26/45] Update requirements file for py3.14 --- .github/requirements-3.14.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/requirements-3.14.txt b/.github/requirements-3.14.txt index 55366600..d7623429 100644 --- a/.github/requirements-3.14.txt +++ b/.github/requirements-3.14.txt @@ -1,5 +1,5 @@ # This file was autogenerated by uv via the following command: -# uv --allow-python-downloads pip compile --python-version=3.14 pyproject.toml requirements-sympy.in tests/requirements.in +# uv --allow-python-downloads pip compile --python-version=3.14 pyproject.toml requirements-sympy.in tests/requirements.in --output-file=.github/requirements-3.14.txt ase==3.26.0 # via -r tests/requirements.in contourpy==1.3.3 @@ -12,6 +12,8 @@ fonttools==4.60.1 # via matplotlib gemmi==0.7.3 # via -r tests/requirements.in +gsd==4.2.0 + # via -r tests/requirements.in iniconfig==2.1.0 # via pytest kiwisolver==1.4.9 @@ -27,6 +29,7 @@ numpy==2.3.3 # parsnip-cif (pyproject.toml) # ase # contourpy + # gsd # matplotlib # pycifrw # scipy From 6ec6f293f857c3fb3329912b6fe3e3c508c8263f Mon Sep 17 00:00:00 2001 From: janbridley Date: Tue, 9 Dec 2025 16:45:36 -0500 Subject: [PATCH 27/45] Update changelog.rst --- changelog.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/changelog.rst b/changelog.rst index b475423e..46257b3e 100644 --- a/changelog.rst +++ b/changelog.rst @@ -10,6 +10,15 @@ v0.4.1 - 2025-10-08 Added ~~~~~ - Support for Python 3.14 +- Tutorial on loading CIF files in HOOMD-Blue +- Tutorial on loading CIF files in LAMMPS +- Tutorial on reconstructing CIF files with limited numerical precision +- Documentation for the ``CifFile.PATTERNS`` dict and its relation to the formal CIF + grammar + +Changed +~~~~~~~ +- ``CifFile.__repr__`` now includes a copy-pasteable section for reproducibility v0.4.0 - 2025-09-03 ------------------- From 1b5be04318ec4e6a9220a367bf1c29bcf1565330 Mon Sep 17 00:00:00 2001 From: janbridley Date: Tue, 9 Dec 2025 16:46:41 -0500 Subject: [PATCH 28/45] Fix label in CHANGELOG --- changelog.rst | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/changelog.rst b/changelog.rst index 46257b3e..fe2e494a 100644 --- a/changelog.rst +++ b/changelog.rst @@ -4,12 +4,11 @@ Changelog The format is based on `Keep a Changelog `__. This project adheres to `Semantic Versioning `__. -v0.4.1 - 2025-10-08 +v0.X.X - 20XX-XX-XX ------------------- Added ~~~~~ -- Support for Python 3.14 - Tutorial on loading CIF files in HOOMD-Blue - Tutorial on loading CIF files in LAMMPS - Tutorial on reconstructing CIF files with limited numerical precision @@ -20,6 +19,13 @@ Changed ~~~~~~~ - ``CifFile.__repr__`` now includes a copy-pasteable section for reproducibility +v0.4.1 - 2025-10-08 +------------------- + +Added +~~~~~ +- Support for Python 3.14 + v0.4.0 - 2025-09-03 ------------------- From ab33ab2d00a226db98ed98ca18369fa4b1878041 Mon Sep 17 00:00:00 2001 From: janbridley Date: Tue, 9 Dec 2025 19:14:48 -0500 Subject: [PATCH 29/45] Pre-compile patterns for unit cell evaluation --- parsnip/patterns.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/parsnip/patterns.py b/parsnip/patterns.py index 5d945555..fc080053 100644 --- a/parsnip/patterns.py +++ b/parsnip/patterns.py @@ -44,6 +44,8 @@ See section 3.2 of dx.doi.org/10.1107/S1600576715021871 for clarification. """ +_SAFE_STRING_RE = re.compile(r"(\(\d+\))|[^\d\[\]\,\+\-\/\*\.]") + def _contains_wildcard(s: str) -> bool: return "?" in s or "*" in s @@ -105,22 +107,17 @@ def _safe_eval( :math:`(N,3)` list of fractional coordinates. """ - ordered_inputs = {"x": "{0}", "y": "{1}", "z": "{2}"} - # Replace any x, y, or z with the same character surrounded by curly braces. Then, - # perform substitutions to insert the actual values. + # Replace x, y, and z with positional format specifiers and then format in values substituted_string = ( - re.sub(r"([xyz])", r"{\1}", str_input).format(**ordered_inputs).format(x, y, z) + str_input.replace("x", "{0}") + .replace("y", "{1}") + .replace("z", "{2}") + .format(x, y, z) ) # Remove any unexpected characters from the string, including precision specifiers. - safe_string = re.sub(r"(\(\d+\))|[^\d\[\]\,\+\-\/\*\.]", "", substituted_string) + safe_string = _SAFE_STRING_RE.sub("", substituted_string) - # Double check to be sure: - assert all(char in ",.0123456789+-/*[]" for char in safe_string), ( - "Evaluation aborted. Check that symmetry operation string only contains " - "numerics or characters in { [],.+-/ } and adjust `regex_filter` param " - "accordingly." - ) if parse_mode == "sympy": return _sympy_evaluate_array(safe_string) if parse_mode == "python_float": From 4129943074d9a9db23bf2e68eb8d9ced389c4870 Mon Sep 17 00:00:00 2001 From: janbridley Date: Wed, 10 Dec 2025 14:51:01 -0500 Subject: [PATCH 30/45] Add example for setting Wyckoff sites --- doc/source/_static/perfect_imperfect_bmn.svg | 302 +++++++++++++++++++ doc/source/betamn.cif | 52 ++++ doc/source/example_new_structures.rst | 129 ++++++++ doc/source/examples.rst | 18 ++ doc/source/index.rst | 1 + parsnip/parsnip.py | 85 +++++- 6 files changed, 586 insertions(+), 1 deletion(-) create mode 100644 doc/source/_static/perfect_imperfect_bmn.svg create mode 100644 doc/source/betamn.cif create mode 100644 doc/source/example_new_structures.rst create mode 100644 doc/source/examples.rst diff --git a/doc/source/_static/perfect_imperfect_bmn.svg b/doc/source/_static/perfect_imperfect_bmn.svg new file mode 100644 index 00000000..7b944103 --- /dev/null +++ b/doc/source/_static/perfect_imperfect_bmn.svg @@ -0,0 +1,302 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/betamn.cif b/doc/source/betamn.cif new file mode 100644 index 00000000..ae374a25 --- /dev/null +++ b/doc/source/betamn.cif @@ -0,0 +1,52 @@ +data_beta_manganese + +_chemical_name_mineral "beta" +_chemical_formula_sum "Mn" + +_symmetry_Int_Tables_number 213 + +_cell_length_a 6.315000 +_cell_length_b 6.315000 +_cell_length_c 6.315000 +_cell_angle_alpha 90.00000 +_cell_angle_beta 90.00000 +_cell_angle_gamma 90.00000 + +loop_ +_space_group_symop_id +_space_group_symop_operation_xyz +1 x,y,z +2 x+1/2,-y+1/2,-z +3 -x,y+1/2,-z+1/2 +4 -x+1/2,-y,z+1/2 +5 y,z,x +6 y+1/2,-z+1/2,-x +7 -y,z+1/2,-x+1/2 +8 -y+1/2,-z,x+1/2 +9 z,x,y +10 z+1/2,-x+1/2,-y +11 -z,x+1/2,-y+1/2 +12 -z+1/2,-x,y+1/2 +13 -y+3/4,-x+3/4,-z+3/4 +14 -y+1/4,x+3/4,z+1/4 +15 y+1/4,-x+1/4,z+3/4 +16 y+3/4,x+1/4,-z+1/4 +17 -x+3/4,-z+3/4,-y+3/4 +18 -x+1/4,z+3/4,y+1/4 +19 x+1/4,-z+1/4,y+3/4 +20 x+3/4,z+1/4,-y+1/4 +21 -z+3/4,-y+3/4,-x+3/4 +22 -z+1/4,y+3/4,x+1/4 +23 z+1/4,-y+1/4,x+3/4 +24 z+3/4,y+1/4,-x+1/4 + +loop_ +_atom_site_label +_atom_site_type_symbol +_atom_site_symmetry_multiplicity +_atom_site_Wyckoff_label +_atom_site_fract_x +_atom_site_fract_y +_atom_site_fract_z +Mn1 Mn 8 c 0.06361 0.06361 0.06361 +Mn2 Mn 12 d 0.12500 0.20224 0.45224 diff --git a/doc/source/example_new_structures.rst b/doc/source/example_new_structures.rst new file mode 100644 index 00000000..ae1939cb --- /dev/null +++ b/doc/source/example_new_structures.rst @@ -0,0 +1,129 @@ +Refining and Experimenting with Structures +========================================== + +**parsnip** allows users to set the Wyckoff positions of a crystal, enabling the +construction of modified -- or entirely new -- structures. In this example, we show +how an experimental beta-Manganese (cP20-Mn) structure can be refined into the +more uniform variant described by `O'Keefe and Andersson`_. + +.. _`O'Keefe and Andersson`: https://doi.org/10.1107/S0567739477002228 + +These are the Wyckoff positions for elemental Beta-Manganese: + +.. literalinclude:: betamn.cif + :lines: 50-52 + + +.. testsetup:: + + >>> import os + >>> import numpy as np + >>> if "doc/source" not in os.getcwd(): os.chdir("doc/source") + +Loading the file shows the twenty atoms we expect for β-Mn: + +.. doctest:: + + >>> from parsnip import CifFile + >>> filename = "betamn.cif" + >>> cif = CifFile(filename) + >>> uc = cif.build_unit_cell() + >>> assert uc.shape == (20, 3) + +Introducing Beta-Manganese +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Beta-Manganese is a `tetrahedrally close-packed`_ (TCP) structure, a class of complex +phases whose geometry minimizes the distance between atoms in a manner that prevents the +formation of octahedral interstitial sites. Intuitively, one can image the bond network +of TCP structures forming a space-filling collection of irregular tetrahedra, with some +required amount of distortion imposed by the requirement that the structure tiles space. + +It turns out that natural beta-Manganese actually has *more* variation in bond lengths +than is strictly required for this topology of structure. `O'Keefe and Andersson`_ +noticed that moving the ``Mn1`` and ``Mn2`` Wyckoff positions by just ``0.0011`` and +``0.0042`` fractional units results in a TCP structure composed of bonds whose maximum +relative distance is lower than experiments predicted. + +.. _`tetrahedrally close-packed`: https://www.chemie-biologie.uni-siegen.de/ac/hjd/lehre/ss08/vortraege/mehboob_tetrahedrally_close_packing_corr_.pdf + +Using **parsnip**, we can explore the differences between experimental and ideal +beta-Manganese, quantifying the distribution of bond lengths in the crystal: + +.. doctest:: + + >>> from parsnip import CifFile + >>> from math import sqrt + >>> filename = "betamn.cif" + >>> cif = CifFile(filename) + >>> atomic_uc = cif.build_unit_cell() + >>> assert atomic_uc.shape == (20, 3) + >>> # Values are drawn from O'Keefe and Andersson, linked above. + >>> x = 1 / (9 + sqrt(33)) + >>> mn1 = [x, x, x] # doctest: +FLOAT_CMP + >>> mn1 + [0.0678216, 0.0678216, 0.0678216] + >>> y = (9 - sqrt(33)) / 16 + >>> z = (13 - sqrt(33)) / 16 + >>> mn2 = [1 / 8, y, z] + >>> mn2 # doctest: +FLOAT_CMP + [0.1250000, 0.2034648, 0.4534648] + + >>> cif.set_wyckoff_positions([mn1, mn2]) + CifFile(file=betamn.cif) : 9 data entries, 2 data loops + >>> # We should still have the same number of atoms + >>> ideal_uc = cif.build_unit_cell(n_decimal_places=4) + >>> assert ideal_uc.shape == atomic_uc.shape + + +Analyzing our New Structure +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The following plot shows a histogram of neighbor distances for experimental +beta-Manganese (top) and the ideal structure (bottom). Each bar corresponds with a +single neighbor bond length, with each particle's neighbors existing at one of the +specified distances. Interestingly, althought the ideal structure has a more uniform +topology with fewer total distinct edges, the observed atomic structure more uniformly +distributes bonds to each particle. + + +.. image:: _static/perfect_imperfect_bmn.svg + :width: 100% + + +A Note on Symmetry +^^^^^^^^^^^^^^^^^^ + +Modifying the Wyckoff positions of a crystal (without changing the symmetry operations) +cannot reduce the symmetry of the structure -- however, some choices of sites can +result in *additional* symmetry operations that are not present in the input space +group. While the example provided above preserved the space group of our crystal, +choosing a fractional coordinate that lies on a high symmetry point (like the origin, +or the center of the cell) can result in differences. + + +.. doctest-requires:: spglib + + >>> import spglib + >>> box = cif.lattice_vectors + >>> # Verify that our initial and "ideal" beta-Manganese cells share a space group + >>> spglib.get_spacegroup((box, atomic_uc, [0] * 20)) + 'P4_132 (213)' + >>> spglib.get_spacegroup((box, ideal_uc, [0] * 20)) + 'P4_132 (213)' + >>> cif["_symmetry_Int_Tables_number"] # Data from the initial file. + '213' + + >>> cif = CifFile("betamn.cif").set_wyckoff_positions([[0.0, 0.0, 0.0]]) + >>> different_uc = cif.build_unit_cell() + >>> spglib.get_spacegroup((box, different_uc, [0] * len(different_uc))) + 'Fd-3m (227)' + +Takeaways +^^^^^^^^^ + +**parsnip** allows us to use existing structural data to generate new crystals, +including those that have not been observed in experiment. While the example shown here +is relatively simple, assigning alternative Wyckoff positions enables high-throughput +materials discovery research and offers a simple framework by which structural features +can be explored. diff --git a/doc/source/examples.rst b/doc/source/examples.rst new file mode 100644 index 00000000..56e351c5 --- /dev/null +++ b/doc/source/examples.rst @@ -0,0 +1,18 @@ +.. _examples: + +======== +Examples +======== + +This tutorial provides a complete introduction to **parsnip**, including its place in +the broader simulation and data science ecosystems. We begin by illustrating how +**parsnip** aids in simulation initialization in two common libraries, HOOMD-Blue and +LAMMPS. We then highlight **parsnip**'s class-leading performance reconstructing noisy +experimental data. We conclude with a tutorial on using **parsnip** to generate new +structures from existing data. + + +.. toctree:: + :maxdepth: 2 + + example_new_structures diff --git a/doc/source/index.rst b/doc/source/index.rst index 4bccb887..89768538 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -16,6 +16,7 @@ installation quickstart + examples .. toctree:: diff --git a/parsnip/parsnip.py b/parsnip/parsnip.py index 1b599aa5..7ff39414 100644 --- a/parsnip/parsnip.py +++ b/parsnip/parsnip.py @@ -852,7 +852,10 @@ def _read_wyckoff_positions(self): for (k, v) in zip(self.__class__._WYCKOFF_KEYS, wyckoff_position_data) if v is not None ] - return np.hstack([x for x in wyckoff_position_data if x is not None] or [[]]) + data_to_stack = [x for x in wyckoff_position_data if x is not None] + if not data_to_stack: + return np.array([]) + return np.column_stack(data_to_stack) @property def wyckoff_positions(self): @@ -867,6 +870,86 @@ def wyckoff_positions(self): """ return cast_array_to_float(self._read_wyckoff_positions(), dtype=float) + def set_wyckoff_positions(self, wyckoff_sites: np.ndarray[(None, 3), np.float64]): + r"""Set the Wyckoff sites in the CIF file data. + + This method updates the values of the Wyckoff position coordinates + (e.g., ``_atom_site_fract_x``, ``_atom_site_fract_y``, ``_atom_site_fract_z``) + in the corresponding loop structure. The input is a NumPy array of floating + point values, which will be converted to strings for storage. + + If the provided array has a different number of rows than the existing + data, the loop will be resized. When adding new sites, placeholder + data ("?") will be used for non-coordinate columns. When removing sites, + rows are removed from the end of the loop. + + Parameters + ---------- + wyckoff_sites : np.ndarray[(None, 3), np.float64] + A NumPy array of shape (N, 3) containing the new Wyckoff sites. + + Raises + ------ + ValueError + If the Wyckoff position keys cannot be found in any loop, or if the + input array does not have 3 columns. + """ + wyckoff_sites = np.asarray(wyckoff_sites) + if len(self._raw_wyckoff_keys) == 0: + self._read_wyckoff_positions() + + keys_to_set = self._wyckoff_site_keys + + # If we have both fractional and cartesian, only use the first three (fract) + if len(keys_to_set) > 3: + keys_to_set = keys_to_set[:3] + + if len(keys_to_set) != 3: + raise ValueError(f"Found {len(keys_to_set)} Wyckoff keys, expected 3.") + + target_loop_idx = -1 + for i, loop in enumerate(self._loops): + if all(key in loop.dtype.names for key in keys_to_set): + target_loop_idx = i + break + + if target_loop_idx == -1: + raise ValueError( + f"Could not find a loop containing all Wyckoff keys: {keys_to_set}" + ) + + if wyckoff_sites.ndim != 2 or wyckoff_sites.shape[1] != 3: + raise ValueError( + "Input `wyckoff_sites` must have shape (N, 3), but has shape" + f"{wyckoff_sites.shape}." + ) + + target_loop = self._loops[target_loop_idx] + n_current = len(target_loop) + n_new = len(wyckoff_sites) + + new_loop = np.empty(n_new, dtype=target_loop.dtype) + + # Copy over existing data for columns that are not being set + other_keys = [ + name for name in target_loop.dtype.names if name not in keys_to_set + ] + n_to_copy = min(n_current, n_new) + for key in other_keys: + new_loop[key][:n_to_copy] = target_loop[key][:n_to_copy].squeeze() + + # Set new coordinates + for i, key in enumerate(keys_to_set): + new_loop[key] = [f"{val:.8f}" for val in wyckoff_sites[:, i]] + + # Fill in default values for added rows + if n_new > n_current: + for key in other_keys: + new_loop[key][n_current:] = "?" + + self._loops[target_loop_idx] = new_loop + return self # Allow for chaining. + @property def cast_values(self): """Bool : Whether to cast "number-like" values to ints & floats. From fbf40fb3cf1c9b0e028a377c2c47f162a53fd11e Mon Sep 17 00:00:00 2001 From: janbridley Date: Wed, 10 Dec 2025 14:53:41 -0500 Subject: [PATCH 31/45] Fix doctest-requires --- doc/source/example_new_structures.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/source/example_new_structures.rst b/doc/source/example_new_structures.rst index ae1939cb..75e7b0e9 100644 --- a/doc/source/example_new_structures.rst +++ b/doc/source/example_new_structures.rst @@ -114,6 +114,11 @@ or the center of the cell) can result in differences. >>> cif["_symmetry_Int_Tables_number"] # Data from the initial file. '213' +Placing a Wyckoff position on a high-symmetry site results in a change in the space +group. + +.. doctest-requires:: spglib + >>> cif = CifFile("betamn.cif").set_wyckoff_positions([[0.0, 0.0, 0.0]]) >>> different_uc = cif.build_unit_cell() >>> spglib.get_spacegroup((box, different_uc, [0] * len(different_uc))) From 21a5c3f7a5a951d548e868c47494924cb9e06c55 Mon Sep 17 00:00:00 2001 From: janbridley Date: Wed, 10 Dec 2025 15:19:00 -0500 Subject: [PATCH 32/45] Add warning for setting structure --- doc/source/example_new_structures.rst | 2 ++ parsnip/parsnip.py | 10 +++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/doc/source/example_new_structures.rst b/doc/source/example_new_structures.rst index 75e7b0e9..c7aabd5b 100644 --- a/doc/source/example_new_structures.rst +++ b/doc/source/example_new_structures.rst @@ -1,3 +1,5 @@ +.. _setbasis: + Refining and Experimenting with Structures ========================================== diff --git a/parsnip/parsnip.py b/parsnip/parsnip.py index 7ff39414..25b34159 100644 --- a/parsnip/parsnip.py +++ b/parsnip/parsnip.py @@ -874,7 +874,6 @@ def set_wyckoff_positions(self, wyckoff_sites: np.ndarray[(None, 3), np.float64] r"""Set the Wyckoff sites in the CIF file data. This method updates the values of the Wyckoff position coordinates - (e.g., ``_atom_site_fract_x``, ``_atom_site_fract_y``, ``_atom_site_fract_z``) in the corresponding loop structure. The input is a NumPy array of floating point values, which will be converted to strings for storage. @@ -883,6 +882,15 @@ def set_wyckoff_positions(self, wyckoff_sites: np.ndarray[(None, 3), np.float64] data ("?") will be used for non-coordinate columns. When removing sites, rows are removed from the end of the loop. + .. danger:: + + Changing the Wyckoff positions may invalidate other keys in the original + file, most commonly by changing the ``_chemical_formula_sum`` and space + group data. Correct structures will be built when using + :meth:`~.build_unit_cell` , but use of keys related to structural or + chemical data is discouraged once the basis has been modified. Refer to + :ref:`setbasis` for further details. + Parameters ---------- wyckoff_sites : np.ndarray[(None, 3), np.float64] From 228752688e9f40abe086d842730501826f5101d5 Mon Sep 17 00:00:00 2001 From: janbridley Date: Wed, 10 Dec 2025 15:22:02 -0500 Subject: [PATCH 33/45] Fix type annotation --- parsnip/parsnip.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parsnip/parsnip.py b/parsnip/parsnip.py index 25b34159..ed5fe8bd 100644 --- a/parsnip/parsnip.py +++ b/parsnip/parsnip.py @@ -893,8 +893,8 @@ def set_wyckoff_positions(self, wyckoff_sites: np.ndarray[(None, 3), np.float64] Parameters ---------- - wyckoff_sites : np.ndarray[(None, 3), np.float64] - A NumPy array of shape (N, 3) containing the new Wyckoff sites. + wyckoff_sites : :math:`(N, 3)` :class:`numpy.ndarray`: + The new Wyckoff site data. Raises ------ From a624d3aae1701534cb3cd17a90e5cfdb63f44045 Mon Sep 17 00:00:00 2001 From: janbridley Date: Wed, 10 Dec 2025 15:25:56 -0500 Subject: [PATCH 34/45] Include only necessary data --- doc/source/example_new_structures.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/example_new_structures.rst b/doc/source/example_new_structures.rst index c7aabd5b..a1d06bbd 100644 --- a/doc/source/example_new_structures.rst +++ b/doc/source/example_new_structures.rst @@ -13,7 +13,7 @@ more uniform variant described by `O'Keefe and Andersson`_. These are the Wyckoff positions for elemental Beta-Manganese: .. literalinclude:: betamn.cif - :lines: 50-52 + :lines: 51-52 .. testsetup:: From 84c08e31709474cd0382bce6695dadcf4b57b915 Mon Sep 17 00:00:00 2001 From: janbridley Date: Fri, 12 Dec 2025 11:38:21 -0500 Subject: [PATCH 35/45] sphinx-inline-tabs --- doc/requirements.in | 1 + doc/requirements.txt | 2 ++ doc/source/conf.py | 1 + 3 files changed, 4 insertions(+) diff --git a/doc/requirements.in b/doc/requirements.in index d64ee66d..8e83a801 100644 --- a/doc/requirements.in +++ b/doc/requirements.in @@ -3,5 +3,6 @@ furo numpy>=1.26.4 sphinx>=7.3.7 sphinx-copybutton +sphinx-inline-tabs sphinx-notfound-page pytest-doctestplus diff --git a/doc/requirements.txt b/doc/requirements.txt index b3d35b82..cca46e12 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -71,6 +71,8 @@ sphinx-basic-ng==1.0.0b2 # via furo sphinx-copybutton==0.5.2 # via -r doc/requirements.in +sphinx-inline-tabs==2023.04.21 + # via -r doc/requirements.in sphinx-notfound-page==1.1.0 # via -r doc/requirements.in sphinxcontrib-applehelp==2.0.0 diff --git a/doc/source/conf.py b/doc/source/conf.py index 666e3b58..773379fd 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -26,6 +26,7 @@ "sphinx.ext.intersphinx", "sphinx.ext.napoleon", "sphinx_copybutton", + "sphinx_inline_tabs", "pytest_doctestplus.sphinx.doctestplus", "autodocsumm", "notfound.extension", From 6ca3a23fbdf3139387e8a152ee0cfd600076bd76 Mon Sep 17 00:00:00 2001 From: janbridley Date: Fri, 12 Dec 2025 11:38:33 -0500 Subject: [PATCH 36/45] Description of Wyckoff postions --- doc/source/example_new_structures.rst | 303 ++++++++++++++++++++++++-- 1 file changed, 289 insertions(+), 14 deletions(-) diff --git a/doc/source/example_new_structures.rst b/doc/source/example_new_structures.rst index a1d06bbd..a4649dbb 100644 --- a/doc/source/example_new_structures.rst +++ b/doc/source/example_new_structures.rst @@ -5,16 +5,54 @@ Refining and Experimenting with Structures **parsnip** allows users to set the Wyckoff positions of a crystal, enabling the construction of modified -- or entirely new -- structures. In this example, we show -how an experimental beta-Manganese (cP20-Mn) structure can be refined into the +how an experimental β-Manganese (cP20-Mn) structure can be refined into the more uniform variant described by `O'Keefe and Andersson`_. .. _`O'Keefe and Andersson`: https://doi.org/10.1107/S0567739477002228 -These are the Wyckoff positions for elemental Beta-Manganese: + +These are the Wyckoff positions for elemental β-Manganese, drawn directly from a CIF +file: .. literalinclude:: betamn.cif :lines: 51-52 +Formatted more nicely, we see the following: + +.. list-table:: Crystallographic data loop for β-Mn + :widths: 15 15 20 15 10 10 10 + :header-rows: 1 + + * - Site Label + - Type Symbol + - Symmetry Multiplicity + - Wyckoff letter + - x + - y + - z + * - Mn1 + - Mn + - 8 + - c + - 0.06361 + - 0.06361 + - 0.06361 + * - Mn2 + - Mn + - 12 + - d + - 0.12500 + - 0.20224 + - 0.45224 + +The key notes are the symmetry multiplicity (8 for Mn1 and 12 for Mn2), which indicates +how many atomic positions arise from each Wyckoff site, and the Wyckoff label. While +this tutorial will not delve too deeply into crystallography, it is sufficient to note +that this label provides a mapping to the International Tables for each space group. +For β-Manganese, we will use this mapping to identify one coordinate equation that +describes each site. For Mn1, this yields ``[x, x, x]`` and for Mn2, we select +``[1 / 8, y, y + 1 / 4]`` to match the CIF data from above. + .. testsetup:: @@ -32,16 +70,29 @@ Loading the file shows the twenty atoms we expect for β-Mn: >>> uc = cif.build_unit_cell() >>> assert uc.shape == (20, 3) -Introducing Beta-Manganese -^^^^^^^^^^^^^^^^^^^^^^^^^^ +And of course, the Wyckoff position data reflects the data tabulated above: -Beta-Manganese is a `tetrahedrally close-packed`_ (TCP) structure, a class of complex + >>> mn1, mn2 = cif.wyckoff_positions + >>> mn1 + array([0.06361, 0.06361, 0.06361]) + >>> mn2 + array([0.125 , 0.20224, 0.45224]) + >>> x = mn1[0] + >>> y = mn2[1] + >>> np.testing.assert_allclose(mn1, x) + >>> np.testing.assert_allclose(mn2[2], y + 1 / 4) + >>> np.testing.assert_allclose(mn2[0], 1 / 8) + +Introducing β-Manganese +^^^^^^^^^^^^^^^^^^^^^^^ + +β-Manganese is a `tetrahedrally close-packed`_ (TCP) structure, a class of complex phases whose geometry minimizes the distance between atoms in a manner that prevents the formation of octahedral interstitial sites. Intuitively, one can image the bond network of TCP structures forming a space-filling collection of irregular tetrahedra, with some required amount of distortion imposed by the requirement that the structure tiles space. -It turns out that natural beta-Manganese actually has *more* variation in bond lengths +It turns out that natural β-Manganese actually has *more* variation in bond lengths than is strictly required for this topology of structure. `O'Keefe and Andersson`_ noticed that moving the ``Mn1`` and ``Mn2`` Wyckoff positions by just ``0.0011`` and ``0.0042`` fractional units results in a TCP structure composed of bonds whose maximum @@ -50,7 +101,7 @@ relative distance is lower than experiments predicted. .. _`tetrahedrally close-packed`: https://www.chemie-biologie.uni-siegen.de/ac/hjd/lehre/ss08/vortraege/mehboob_tetrahedrally_close_packing_corr_.pdf Using **parsnip**, we can explore the differences between experimental and ideal -beta-Manganese, quantifying the distribution of bond lengths in the crystal: +β-Manganese, quantifying the distribution of bond lengths in the crystal: .. doctest:: @@ -61,18 +112,16 @@ beta-Manganese, quantifying the distribution of bond lengths in the crystal: >>> atomic_uc = cif.build_unit_cell() >>> assert atomic_uc.shape == (20, 3) >>> # Values are drawn from O'Keefe and Andersson, linked above. - >>> x = 1 / (9 + sqrt(33)) + >>> x = 1 / (9 + sqrt(33)) # Parameter for the 8c Wyckoff position >>> mn1 = [x, x, x] # doctest: +FLOAT_CMP >>> mn1 [0.0678216, 0.0678216, 0.0678216] >>> y = (9 - sqrt(33)) / 16 - >>> z = (13 - sqrt(33)) / 16 - >>> mn2 = [1 / 8, y, z] + >>> mn2 = [1 / 8, y, y + 1 / 4] # Parameter for the 12d Wyckoff position >>> mn2 # doctest: +FLOAT_CMP [0.1250000, 0.2034648, 0.4534648] - >>> cif.set_wyckoff_positions([mn1, mn2]) - CifFile(file=betamn.cif) : 9 data entries, 2 data loops + >>> _ = cif.set_wyckoff_positions([mn1, mn2]) >>> # We should still have the same number of atoms >>> ideal_uc = cif.build_unit_cell(n_decimal_places=4) >>> assert ideal_uc.shape == atomic_uc.shape @@ -82,7 +131,7 @@ Analyzing our New Structure ^^^^^^^^^^^^^^^^^^^^^^^^^^^ The following plot shows a histogram of neighbor distances for experimental -beta-Manganese (top) and the ideal structure (bottom). Each bar corresponds with a +β-Manganese (top) and the ideal structure (bottom). Each bar corresponds with a single neighbor bond length, with each particle's neighbors existing at one of the specified distances. Interestingly, althought the ideal structure has a more uniform topology with fewer total distinct edges, the observed atomic structure more uniformly @@ -108,7 +157,7 @@ or the center of the cell) can result in differences. >>> import spglib >>> box = cif.lattice_vectors - >>> # Verify that our initial and "ideal" beta-Manganese cells share a space group + >>> # Verify that our initial and "ideal" β-Manganese cells share a space group >>> spglib.get_spacegroup((box, atomic_uc, [0] * 20)) 'P4_132 (213)' >>> spglib.get_spacegroup((box, ideal_uc, [0] * 20)) @@ -116,6 +165,7 @@ or the center of the cell) can result in differences. >>> cif["_symmetry_Int_Tables_number"] # Data from the initial file. '213' + Placing a Wyckoff position on a high-symmetry site results in a change in the space group. @@ -126,9 +176,234 @@ group. >>> spglib.get_spacegroup((box, different_uc, [0] * len(different_uc))) 'Fd-3m (227)' +Design Rules for Crystal Construction +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +While the global space group symmetry can only be increased by changing the Wyckoff +positions, the point group symmetry of sites can vary greatly. The example above +chose points that maintained the multiplicity of each site, but general choices do not +preserve this. First, let's confirm that the Wyckoff letters and site point groups are +the same in the atomic and ideal crystals: + +.. doctest-requires:: spglib + + >>> def get_particle_point_groups(box, basis): + ... spglib_cell = (box, basis, [0] * len(basis)) + ... dataset = spglib.get_symmetry_dataset(spglib_cell) + ... wycks = sorted({*dataset.wyckoffs}) + ... point_groups = sorted({*dataset.site_symmetry_symbols})[::-1] + ... return (wycks, point_groups) + >>> get_particle_point_groups(box, atomic_uc) + (['c', 'd'], ['.3.', '..2']) + >>> get_particle_point_groups(box, ideal_uc) + (['c', 'd'], ['.3.', '..2']) + + +A more general choice of the basis will often result in different point symmetry. +Referring to the `symmetry tables`_ for space group 213 shows the ``a`` and ``b`` +Wyckoff positions, which have higher symmetry and a lower multiplicity. Selecting any +value from the "coordinates" table for the 4a position yields the expected 4-particle +unit cell with a site symmetry of ``'.32'``. For convenience, we include the table for +space group #213 here. Each tab is titled by its multiplicity and Wyckoff letter, with +the coordinate used in these examples highlighted in bold. + +.. tab:: 4a + + .. list-table:: + :widths: 30 70 + :header-rows: 0 + + * - **Multiplicity** + - 4 + * - **Site Symmetry** + - ``.32`` + * - **Coordinates** + - | **(3/8, 3/8, 3/8)** + | (1/8, 5/8, 7/8) + | (5/8, 7/8, 1/8) + | (7/8, 1/8, 5/8) + +.. tab:: 4b + + .. list-table:: + :widths: 30 70 + :header-rows: 0 + + * - **Multiplicity** + - 4 + * - **Site Symmetry** + - ``.32`` + * - **Coordinates** + - | **(7/8, 7/8, 7/8)** + | (5/8, 1/8, 3/8) + | (1/8, 3/8, 5/8) + | (3/8, 5/8, 1/8) + +.. tab:: 8c + + .. list-table:: + :widths: 30 70 + :header-rows: 0 + + * - **Multiplicity** + - 8 + * - **Site Symmetry** + - ``.3.`` + * - **Coordinates** + - | **(x, x, x)** + | (-x+1/2, -x, x+1/2) + | (-x, x+1/2, -x+1/2) + | (x+1/2, -x+1/2, -x) + | (x+3/4, x+1/4, -x+1/4) + | (-x+3/4, -x+3/4, -x+3/4) + | (x+1/4, -x+1/4, x+3/4) + | (-x+1/4, x+3/4, x+1/4) + +.. tab:: 12d + + .. list-table:: + :widths: 30 70 + :header-rows: 0 + + * - **Multiplicity** + - 12 + * - **Site Symmetry** + - ``..2`` + * - **Coordinates** + - | **(1/8, y, y+1/4)** + | (3/8, -y, y+3/4) + | (7/8, y+1/2, -y+1/4) + | (5/8, -y+1/2, -y+3/4) + | (y+1/4, 1/8, y) + | (y+3/4, 3/8, -y) + | (-y+1/4, 7/8, y+1/2) + | (-y+3/4, 5/8, -y+1/2) + | (y, y+1/4, 1/8) + | (-y, y+3/4, 3/8) + | (y+1/2, -y+1/4, 7/8) + | (-y+1/2, -y+3/4, 5/8) + +.. tab:: 16e + + .. list-table:: + :widths: 30 70 + :header-rows: 0 + + * - **Multiplicity** + - 24 + * - **Site Symmetry** + - ``1`` + * - **Coordinates** + - | (x, y, z) + | (-x+1/2, -y, z+1/2) + | (-x, y+1/2, -z+1/2) + | (x+1/2, -y+1/2, -z) + | (z, x, y) + | (z+1/2, -x+1/2, -y) + | (-z+1/2, -x, y+1/2) + | (-z, x+1/2, -y+1/2) + | (y, z, x) + | (-y, z+1/2, -x+1/2) + | (y+1/2, -z+1/2, -x) + | (-y+1/2, -z, x+1/2) + | (y+3/4, x+1/4, -z+1/4) + | (-y+3/4, -x+3/4, -z+3/4) + | (y+1/4, -x+1/4, z+3/4) + | (-y+1/4, x+3/4, z+1/4) + | (x+3/4, z+1/4, -y+1/4) + | (-x+1/4, z+3/4, y+1/4) + | (-x+3/4, -z+3/4, -y+3/4) + | (x+1/4, -z+1/4, y+3/4) + | (z+3/4, y+1/4, -x+1/4) + | (z+1/4, -y+1/4, x+3/4) + | (-z+1/4, y+3/4, x+1/4) + | (-z+3/4, -y+3/4, -x+3/4) + + +.. _`symmetry tables`: https://web.archive.org/web/20170430110556/http://www.cryst.ehu.es/cgi-bin/cryst/programs/nph-wp-list?gnum=213 + + +.. doctest-requires:: spglib + + >>> four_a = [[3/8, 3/8, 3/8]] + >>> four_a_cif = CifFile("betamn.cif").set_wyckoff_positions(four_a) + >>> four_a_uc = four_a_cif.build_unit_cell() + >>> get_particle_point_groups(box, four_a_uc) + (['a'], ['.32']) + >>> assert four_a_uc.shape == (4, 3) + >>> spglib.get_spacegroup((box, four_a_uc, [0] * 4)) + 'P4_132 (213)' + >>> four_a_uc + array([[0.375, 0.375, 0.375], + [0.875, 0.125, 0.625], + [0.625, 0.875, 0.125], + [0.125, 0.625, 0.875]]) + + +When working with systems where the same particle type lies on multiple Wyckoff +positions, care must be taken to ensure those sites do not satisfy a symmetry operation +in a *higher* space group than the target. The following example assigns the 4a and 4b +Wyckoff positions to a single atomic type. Even though the reconstructed crystal +contains the expected 8 particles, the sites are related by the symmetry element +``x+1/2, y+1/2, z+1/2`` of the next highest space group, #214. + + +.. doctest-requires:: spglib + + >>> four_a_four_b = [[3/8, 3/8, 3/8], [7/8, 7/8, 7/8]] + >>> four_a_four_b_cif = CifFile("betamn.cif").set_wyckoff_positions(four_a_four_b) + >>> four_a_four_b_uc = four_a_four_b_cif.build_unit_cell() + >>> assert four_a_four_b_uc.shape == (8, 3) + >>> # NOTE: these sites are equivalent under a *higher* space group! + >>> get_particle_point_groups(box, four_a_four_b_uc) + (['b'], ['.32']) + >>> spglib.get_spacegroup((box, four_a_four_b_uc, [0] * 8)) + 'I4_132 (214)' + >>> # If the sites are different elements, the space group is preserved + >>> spglib.get_spacegroup((box, four_a_four_b_uc, [0,0,0,0, 1,1,1,1])) + 'P4_132 (213)' + +A similar consideration must be made for Wyckoff positions whose coordinates contain +one or more degrees of freedom. In β-Manganese, the 8c and 12d Wyckoff sites each +have one degree of freedom -- the ``x`` and ``y`` variables assigned above. If we set +these degrees of freedom such that Wyckoff positions are no longer independent, we also +alter the space group of the structure. In this case, we solve the system of equations +that arises from setting the coordinates ``[x, x, x] = [1 / 8, y, y + 1 / 4]`` and +assign that value to both ``x`` and ``y``. The resulting points end up reconstructing +the 16d Wyckoff position in the space group #227! + +.. doctest-requires:: spglib + + >>> x = y = -1/8 + >>> wyckoff_c = [x, x, x] + >>> wyckoff_d = [1 / 8, y, y + 1 / 4] + >>> c_d_linked = [wyckoff_c, wyckoff_d] + >>> not_beta_manganese = CifFile("betamn.cif").set_wyckoff_positions(c_d_linked) + >>> not_beta_mn_uc = not_beta_manganese.build_unit_cell() + >>> not_beta_mn_uc.shape # NOTE: this is no longer 12+8 sites! + (16, 3) + >>> spglib.get_spacegroup((box, not_beta_mn_uc, [0] * 16)) + 'Fd-3m (227)' + >>> get_particle_point_groups(box, not_beta_mn_uc) + (['d'], ['.-3m']) + Takeaways ^^^^^^^^^ +The examples above give rise to a few design rules for structure refinement and +modification: + +1. For Wyckoff positions without degrees of freedom, care must be taken to ensure sites are not linked by symmetry operations present in a higher space group. + - In general, this can be verified be comparing against a list of operations from + the IUCR Crystal database, or a CIF file with space group #230. +2. For Wyckoff positions *with* degrees of freedom, the following must be ensured: + - Wyckoff positions with different labels must be linearly independent (i.e. their + coordinate equations must not be equal for the chosen degrees of freedom). + - Free variables must be chosen such that the points do not lie on high-symmetry + locations, particularly the origin and power-of-two fractions. This condition is + equivalent to that in point (1), and may be resolved in a similar manner. + + **parsnip** allows us to use existing structural data to generate new crystals, including those that have not been observed in experiment. While the example shown here is relatively simple, assigning alternative Wyckoff positions enables high-throughput From 02c774096c89fecb69fba9025703d80e3b79120b Mon Sep 17 00:00:00 2001 From: janbridley Date: Fri, 12 Dec 2025 12:02:47 -0500 Subject: [PATCH 37/45] Move up table and fix formatting --- doc/source/example_new_structures.rst | 299 +++++++++++++++----------- 1 file changed, 170 insertions(+), 129 deletions(-) diff --git a/doc/source/example_new_structures.rst b/doc/source/example_new_structures.rst index a4649dbb..a82dcd8d 100644 --- a/doc/source/example_new_structures.rst +++ b/doc/source/example_new_structures.rst @@ -45,14 +45,150 @@ Formatted more nicely, we see the following: - 0.20224 - 0.45224 -The key notes are the symmetry multiplicity (8 for Mn1 and 12 for Mn2), which indicates -how many atomic positions arise from each Wyckoff site, and the Wyckoff label. While -this tutorial will not delve too deeply into crystallography, it is sufficient to note -that this label provides a mapping to the International Tables for each space group. -For β-Manganese, we will use this mapping to identify one coordinate equation that -describes each site. For Mn1, this yields ``[x, x, x]`` and for Mn2, we select -``[1 / 8, y, y + 1 / 4]`` to match the CIF data from above. +First, we note the symmetry multiplicity (8 for :math:`Mn_1` and 12 for +:math:`Mn_2`), which indicates how many atomic positions arise from each Wyckoff +site, and the Wyckoff label. Second, we can identify that each Wyckoff position is +labeled by a letter that differentiates it from other sites. While this tutorial will +not delve too deeply into crystallography, it is sufficient to note that this Wyckoff +letter provides a mapping to the `International Tables`_ for each space group. For +β-Manganese, we will use this mapping to identify one coordinate equation that describes +each site. The correct table for β-Mn is included in the tabs below, with the coordinate +equations that match the CIF data (:math:`(x, x, x)` and :math:`(1/8, y, y + 1/4)`) +highlighted in bold on their corresponding tabs. + +.. _`International Tables`: https://web.archive.org/web/20170430110556/http://www.cryst.ehu.es/cgi-bin/cryst/programs/nph-wp-list?gnum=213 +.. tab:: 4a + + .. list-table:: + :widths: 30 70 + :header-rows: 0 + + * - **Multiplicity** + - 4 + * - **Site Symmetry** + - ``.32`` + * - **Coordinates** + - .. list-table:: + :header-rows: 0 + :widths: 50 50 + + * - :math:`(3/8, 3/8, 3/8)` + - :math:`(1/8, 5/8, 7/8)` + * - :math:`(5/8, 7/8, 1/8)` + - :math:`(7/8, 1/8, 5/8)` + +.. tab:: 4b + + .. list-table:: + :widths: 30 70 + :header-rows: 0 + + * - **Multiplicity** + - 4 + * - **Site Symmetry** + - ``.32`` + * - **Coordinates** + - .. list-table:: + :header-rows: 0 + :widths: 50 50 + + * - :math:`(7/8, 7/8, 7/8)` + - :math:`(5/8, 1/8, 3/8)` + * - :math:`(1/8, 3/8, 5/8)` + - :math:`(3/8, 5/8, 1/8)` + +.. tab:: 8c + + .. list-table:: + :widths: 30 70 + :header-rows: 0 + + * - **Multiplicity** + - 8 + * - **Site Symmetry** + - ``.3.`` + * - **Coordinates** + - .. list-table:: + :header-rows: 0 + :widths: 50 50 + + * - :math:`\mathbf{(x, x, x)}` + - :math:`(-x+1/2, -x, x+1/2)` + * - :math:`(-x, x+1/2, -x+1/2)` + - :math:`(x+1/2, -x+1/2, -x)` + * - :math:`(x+3/4, x+1/4, -x+1/4)` + - :math:`(-x+3/4, -x+3/4, -x+3/4)` + * - :math:`(x+1/4, -x+1/4, x+3/4)` + - :math:`(-x+1/4, x+3/4, x+1/4)` + +.. tab:: 12d + + .. list-table:: + :widths: 30 70 + :header-rows: 0 + + * - **Multiplicity** + - 12 + * - **Site Symmetry** + - ``..2`` + * - **Coordinates** + - .. list-table:: + :header-rows: 0 + :widths: 50 50 + + * - :math:`\mathbf{(1/8, y, y+1/4)}` + - :math:`(3/8, -y, y+3/4)` + * - :math:`(7/8, y+1/2, -y+1/4)` + - :math:`(5/8, -y+1/2, -y+3/4)` + * - :math:`(y+1/4, 1/8, y)` + - :math:`(y+3/4, 3/8, -y)` + * - :math:`(-y+1/4, 7/8, y+1/2)` + - :math:`(-y+3/4, 5/8, -y+1/2)` + * - :math:`(y, y+1/4, 1/8)` + - :math:`(-y, y+3/4, 3/8)` + * - :math:`(y+1/2, -y+1/4, 7/8)` + - :math:`(-y+1/2, -y+3/4, 5/8)` + +.. tab:: 24e + + .. list-table:: + :widths: 30 70 + :header-rows: 0 + + * - **Multiplicity** + - 24 + * - **Site Symmetry** + - ``1`` + * - **Coordinates** + - .. list-table:: + :header-rows: 0 + :widths: 50 50 + + * - :math:`(x, y, z)` + - :math:`(-x+1/2, -y, z+1/2)` + * - :math:`(-x, y+1/2, -z+1/2)` + - :math:`(x+1/2, -y+1/2, -z)` + * - :math:`(z, x, y)` + - :math:`(z+1/2, -x+1/2, -y)` + * - :math:`(-z+1/2, -x, y+1/2)` + - :math:`(-z, x+1/2, -y+1/2)` + * - :math:`(y, z, x)` + - :math:`(-y, z+1/2, -x+1/2)` + * - :math:`(y+1/2, -z+1/2, -x)` + - :math:`(-y+1/2, -z, x+1/2)` + * - :math:`(y+3/4, x+1/4, -z+1/4)` + - :math:`(-y+3/4, -x+3/4, -z+3/4)` + * - :math:`(y+1/4, -x+1/4, z+3/4)` + - :math:`(-y+1/4, x+3/4, z+1/4)` + * - :math:`(x+3/4, z+1/4, -y+1/4)` + - :math:`(-x+1/4, z+3/4, y+1/4)` + * - :math:`(-x+3/4, -z+3/4, -y+3/4)` + - :math:`(x+1/4, -z+1/4, y+3/4)` + * - :math:`(z+3/4, y+1/4, -x+1/4)` + - :math:`(z+1/4, -y+1/4, x+3/4)` + * - :math:`(-z+1/4, y+3/4, x+1/4)` + - :math:`(-z+3/4, -y+3/4, -x+3/4)` .. testsetup:: @@ -83,8 +219,8 @@ And of course, the Wyckoff position data reflects the data tabulated above: >>> np.testing.assert_allclose(mn2[2], y + 1 / 4) >>> np.testing.assert_allclose(mn2[0], 1 / 8) -Introducing β-Manganese -^^^^^^^^^^^^^^^^^^^^^^^ +Exploring β-Manganese +^^^^^^^^^^^^^^^^^^^^^ β-Manganese is a `tetrahedrally close-packed`_ (TCP) structure, a class of complex phases whose geometry minimizes the distance between atoms in a manner that prevents the @@ -200,127 +336,10 @@ the same in the atomic and ideal crystals: A more general choice of the basis will often result in different point symmetry. -Referring to the `symmetry tables`_ for space group 213 shows the ``a`` and ``b`` +Referring to the `International Tables`_ for space group 213 shows the ``a`` and ``b`` Wyckoff positions, which have higher symmetry and a lower multiplicity. Selecting any value from the "coordinates" table for the 4a position yields the expected 4-particle -unit cell with a site symmetry of ``'.32'``. For convenience, we include the table for -space group #213 here. Each tab is titled by its multiplicity and Wyckoff letter, with -the coordinate used in these examples highlighted in bold. - -.. tab:: 4a - - .. list-table:: - :widths: 30 70 - :header-rows: 0 - - * - **Multiplicity** - - 4 - * - **Site Symmetry** - - ``.32`` - * - **Coordinates** - - | **(3/8, 3/8, 3/8)** - | (1/8, 5/8, 7/8) - | (5/8, 7/8, 1/8) - | (7/8, 1/8, 5/8) - -.. tab:: 4b - - .. list-table:: - :widths: 30 70 - :header-rows: 0 - - * - **Multiplicity** - - 4 - * - **Site Symmetry** - - ``.32`` - * - **Coordinates** - - | **(7/8, 7/8, 7/8)** - | (5/8, 1/8, 3/8) - | (1/8, 3/8, 5/8) - | (3/8, 5/8, 1/8) - -.. tab:: 8c - - .. list-table:: - :widths: 30 70 - :header-rows: 0 - - * - **Multiplicity** - - 8 - * - **Site Symmetry** - - ``.3.`` - * - **Coordinates** - - | **(x, x, x)** - | (-x+1/2, -x, x+1/2) - | (-x, x+1/2, -x+1/2) - | (x+1/2, -x+1/2, -x) - | (x+3/4, x+1/4, -x+1/4) - | (-x+3/4, -x+3/4, -x+3/4) - | (x+1/4, -x+1/4, x+3/4) - | (-x+1/4, x+3/4, x+1/4) - -.. tab:: 12d - - .. list-table:: - :widths: 30 70 - :header-rows: 0 - - * - **Multiplicity** - - 12 - * - **Site Symmetry** - - ``..2`` - * - **Coordinates** - - | **(1/8, y, y+1/4)** - | (3/8, -y, y+3/4) - | (7/8, y+1/2, -y+1/4) - | (5/8, -y+1/2, -y+3/4) - | (y+1/4, 1/8, y) - | (y+3/4, 3/8, -y) - | (-y+1/4, 7/8, y+1/2) - | (-y+3/4, 5/8, -y+1/2) - | (y, y+1/4, 1/8) - | (-y, y+3/4, 3/8) - | (y+1/2, -y+1/4, 7/8) - | (-y+1/2, -y+3/4, 5/8) - -.. tab:: 16e - - .. list-table:: - :widths: 30 70 - :header-rows: 0 - - * - **Multiplicity** - - 24 - * - **Site Symmetry** - - ``1`` - * - **Coordinates** - - | (x, y, z) - | (-x+1/2, -y, z+1/2) - | (-x, y+1/2, -z+1/2) - | (x+1/2, -y+1/2, -z) - | (z, x, y) - | (z+1/2, -x+1/2, -y) - | (-z+1/2, -x, y+1/2) - | (-z, x+1/2, -y+1/2) - | (y, z, x) - | (-y, z+1/2, -x+1/2) - | (y+1/2, -z+1/2, -x) - | (-y+1/2, -z, x+1/2) - | (y+3/4, x+1/4, -z+1/4) - | (-y+3/4, -x+3/4, -z+3/4) - | (y+1/4, -x+1/4, z+3/4) - | (-y+1/4, x+3/4, z+1/4) - | (x+3/4, z+1/4, -y+1/4) - | (-x+1/4, z+3/4, y+1/4) - | (-x+3/4, -z+3/4, -y+3/4) - | (x+1/4, -z+1/4, y+3/4) - | (z+3/4, y+1/4, -x+1/4) - | (z+1/4, -y+1/4, x+3/4) - | (-z+1/4, y+3/4, x+1/4) - | (-z+3/4, -y+3/4, -x+3/4) - - -.. _`symmetry tables`: https://web.archive.org/web/20170430110556/http://www.cryst.ehu.es/cgi-bin/cryst/programs/nph-wp-list?gnum=213 +unit cell with a site symmetry of ``'.32'``. .. doctest-requires:: spglib @@ -368,7 +387,7 @@ one or more degrees of freedom. In β-Manganese, the 8c and 12d Wyckoff sites ea have one degree of freedom -- the ``x`` and ``y`` variables assigned above. If we set these degrees of freedom such that Wyckoff positions are no longer independent, we also alter the space group of the structure. In this case, we solve the system of equations -that arises from setting the coordinates ``[x, x, x] = [1 / 8, y, y + 1 / 4]`` and +that arises from setting the coordinates :math:`[x, x, x] = [1 / 8, y, y + 1 / 4]` and assign that value to both ``x`` and ``y``. The resulting points end up reconstructing the 16d Wyckoff position in the space group #227! @@ -387,6 +406,28 @@ the 16d Wyckoff position in the space group #227! >>> get_particle_point_groups(box, not_beta_mn_uc) (['d'], ['.-3m']) +.. tab:: #227 16d + + .. list-table:: + :widths: 30 70 + :header-rows: 0 + + * - **Space Group** + - #227 + * - **Multiplicity** + - 16 + * - **Site Symmetry** + - ``.-3m`` + * - **Coordinates** + - .. list-table:: + :header-rows: 0 + :widths: 50 50 + + * - :math:`(5/8, 5/8, 5/8)` + - :math:`(3/8, 7/8, 1/8)` + * - :math:`(7/8, 1/8, 3/8)` + - :math:`(1/8, 3/8, 7/8)` + Takeaways ^^^^^^^^^ From ff427e757d53fcc67dbf2a10bd722c74d2f307f4 Mon Sep 17 00:00:00 2001 From: janbridley Date: Fri, 12 Dec 2025 13:27:40 -0500 Subject: [PATCH 38/45] Restore heading --- doc/source/example_noisy.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/example_noisy.rst b/doc/source/example_noisy.rst index 2cc99cfb..0c1b8303 100644 --- a/doc/source/example_noisy.rst +++ b/doc/source/example_noisy.rst @@ -87,6 +87,8 @@ once again. >>> one_decimal_place = cif.build_unit_cell(n_decimal_places=1) >>> np.testing.assert_array_equal(one_decimal_place, four_decimal_places) +Symbolic Parsing +^^^^^^^^^^^^^^^^ In some cases, particularly in structures with many atoms, careful tuning of numerical precision is not enough to accurately reproduce a crystal. **parsnip** includes a From e5b02b8baa202ac222049cc31e2fc5e143e5db9f Mon Sep 17 00:00:00 2001 From: janbridley Date: Fri, 2 Jan 2026 12:02:11 -0500 Subject: [PATCH 39/45] Fix format --- doc/source/example_new_structures.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/example_new_structures.rst b/doc/source/example_new_structures.rst index a82dcd8d..c981eb49 100644 --- a/doc/source/example_new_structures.rst +++ b/doc/source/example_new_structures.rst @@ -4,7 +4,7 @@ Refining and Experimenting with Structures ========================================== **parsnip** allows users to set the Wyckoff positions of a crystal, enabling the -construction of modified -- or entirely new -- structures. In this example, we show +construction of modified (or entirely new) structures. In this example, we show how an experimental β-Manganese (cP20-Mn) structure can be refined into the more uniform variant described by `O'Keefe and Andersson`_. From a8c0031a78026ff69cd01c06c803439d64b48411 Mon Sep 17 00:00:00 2001 From: janbridley Date: Fri, 2 Jan 2026 12:04:27 -0500 Subject: [PATCH 40/45] Clean up text --- doc/source/example_new_structures.rst | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/source/example_new_structures.rst b/doc/source/example_new_structures.rst index c981eb49..f5534fb1 100644 --- a/doc/source/example_new_structures.rst +++ b/doc/source/example_new_structures.rst @@ -47,14 +47,14 @@ Formatted more nicely, we see the following: First, we note the symmetry multiplicity (8 for :math:`Mn_1` and 12 for :math:`Mn_2`), which indicates how many atomic positions arise from each Wyckoff -site, and the Wyckoff label. Second, we can identify that each Wyckoff position is -labeled by a letter that differentiates it from other sites. While this tutorial will -not delve too deeply into crystallography, it is sufficient to note that this Wyckoff -letter provides a mapping to the `International Tables`_ for each space group. For -β-Manganese, we will use this mapping to identify one coordinate equation that describes -each site. The correct table for β-Mn is included in the tabs below, with the coordinate -equations that match the CIF data (:math:`(x, x, x)` and :math:`(1/8, y, y + 1/4)`) -highlighted in bold on their corresponding tabs. +site. Second, we can identify that each Wyckoff position is labeled by a letter that +differentiates it from other sites. While this tutorial will not delve too deeply into +crystallography, it is sufficient to note that this Wyckoff letter provides a mapping +to the `International Tables`_ for each space group. For β-Manganese, we will use this +mapping to identify one coordinate equation that describes each site. The correct table +for β-Mn is included in the tabs below, with the coordinate equations that match the +CIF data (:math:`(x, x, x)` and :math:`(1/8, y, y + 1/4)`) highlighted in bold on their +corresponding tabs. .. _`International Tables`: https://web.archive.org/web/20170430110556/http://www.cryst.ehu.es/cgi-bin/cryst/programs/nph-wp-list?gnum=213 From 388749910d55b6065ebc7024188bfe52e8ad33ac Mon Sep 17 00:00:00 2001 From: janbridley Date: Fri, 2 Jan 2026 12:08:00 -0500 Subject: [PATCH 41/45] Clarify number of atoms --- doc/source/example_new_structures.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/example_new_structures.rst b/doc/source/example_new_structures.rst index f5534fb1..9cca72e4 100644 --- a/doc/source/example_new_structures.rst +++ b/doc/source/example_new_structures.rst @@ -204,7 +204,7 @@ Loading the file shows the twenty atoms we expect for β-Mn: >>> filename = "betamn.cif" >>> cif = CifFile(filename) >>> uc = cif.build_unit_cell() - >>> assert uc.shape == (20, 3) + >>> assert uc.shape == (8 + 12, 3) And of course, the Wyckoff position data reflects the data tabulated above: From 9b66854c87df45ad7264cd9fda5c92766eb401e5 Mon Sep 17 00:00:00 2001 From: janbridley Date: Fri, 2 Jan 2026 12:08:15 -0500 Subject: [PATCH 42/45] Clarify optimal structure --- doc/source/example_new_structures.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/example_new_structures.rst b/doc/source/example_new_structures.rst index 9cca72e4..8c32fd87 100644 --- a/doc/source/example_new_structures.rst +++ b/doc/source/example_new_structures.rst @@ -231,8 +231,8 @@ required amount of distortion imposed by the requirement that the structure tile It turns out that natural β-Manganese actually has *more* variation in bond lengths than is strictly required for this topology of structure. `O'Keefe and Andersson`_ noticed that moving the ``Mn1`` and ``Mn2`` Wyckoff positions by just ``0.0011`` and -``0.0042`` fractional units results in a TCP structure composed of bonds whose maximum -relative distance is lower than experiments predicted. +``0.0042`` fractional units results in a TCP structure composed of only five unique bond +lengths, rather than the seven observed in experimental β-Manganese. .. _`tetrahedrally close-packed`: https://www.chemie-biologie.uni-siegen.de/ac/hjd/lehre/ss08/vortraege/mehboob_tetrahedrally_close_packing_corr_.pdf From bcfa11e3c9fcc507f6779e40fb5edd7aa16e1164 Mon Sep 17 00:00:00 2001 From: janbridley Date: Fri, 2 Jan 2026 12:08:28 -0500 Subject: [PATCH 43/45] en dash -> em dash --- doc/source/example_new_structures.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/example_new_structures.rst b/doc/source/example_new_structures.rst index 8c32fd87..d74783aa 100644 --- a/doc/source/example_new_structures.rst +++ b/doc/source/example_new_structures.rst @@ -282,7 +282,7 @@ A Note on Symmetry ^^^^^^^^^^^^^^^^^^ Modifying the Wyckoff positions of a crystal (without changing the symmetry operations) -cannot reduce the symmetry of the structure -- however, some choices of sites can +cannot reduce the symmetry of the structure --- however, some choices of sites can result in *additional* symmetry operations that are not present in the input space group. While the example provided above preserved the space group of our crystal, choosing a fractional coordinate that lies on a high symmetry point (like the origin, From d48bcea3df4f7e0ee843fbe1a30c5662ce0dbb42 Mon Sep 17 00:00:00 2001 From: janbridley Date: Fri, 2 Jan 2026 12:12:47 -0500 Subject: [PATCH 44/45] Tighten up text --- doc/source/example_new_structures.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/example_new_structures.rst b/doc/source/example_new_structures.rst index d74783aa..3464dd2b 100644 --- a/doc/source/example_new_structures.rst +++ b/doc/source/example_new_structures.rst @@ -5,8 +5,8 @@ Refining and Experimenting with Structures **parsnip** allows users to set the Wyckoff positions of a crystal, enabling the construction of modified (or entirely new) structures. In this example, we show -how an experimental β-Manganese (cP20-Mn) structure can be refined into the -more uniform variant described by `O'Keefe and Andersson`_. +how an experimental β-Manganese structure can be refined into the more uniform variant + described by `O'Keefe and Andersson`_. .. _`O'Keefe and Andersson`: https://doi.org/10.1107/S0567739477002228 From 66c91193a0e024eeaa72e1f80d25fa31afe35329 Mon Sep 17 00:00:00 2001 From: janbridley Date: Fri, 2 Jan 2026 12:14:19 -0500 Subject: [PATCH 45/45] Fix indentation typo --- doc/source/example_new_structures.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/example_new_structures.rst b/doc/source/example_new_structures.rst index 3464dd2b..6001f004 100644 --- a/doc/source/example_new_structures.rst +++ b/doc/source/example_new_structures.rst @@ -6,7 +6,7 @@ Refining and Experimenting with Structures **parsnip** allows users to set the Wyckoff positions of a crystal, enabling the construction of modified (or entirely new) structures. In this example, we show how an experimental β-Manganese structure can be refined into the more uniform variant - described by `O'Keefe and Andersson`_. +described by `O'Keefe and Andersson`_. .. _`O'Keefe and Andersson`: https://doi.org/10.1107/S0567739477002228