From 91df96872260ce49c4116f881e440d5bf8b36b7f Mon Sep 17 00:00:00 2001 From: Niolon Date: Mon, 29 Sep 2025 10:49:43 +0100 Subject: [PATCH 1/6] Base untested implementation of TSC including CIF im-/export --- qcrboxtools/cif/file_converter/cartesian.py | 44 ++ qcrboxtools/cif/file_converter/tsc.py | 510 ++++++++++++++++++++ qcrboxtools/cif/read.py | 2 +- 3 files changed, 555 insertions(+), 1 deletion(-) create mode 100644 qcrboxtools/cif/file_converter/cartesian.py create mode 100644 qcrboxtools/cif/file_converter/tsc.py diff --git a/qcrboxtools/cif/file_converter/cartesian.py b/qcrboxtools/cif/file_converter/cartesian.py new file mode 100644 index 0000000..14c0386 --- /dev/null +++ b/qcrboxtools/cif/file_converter/cartesian.py @@ -0,0 +1,44 @@ +import numpy as np + +def cell_constants_to_matrix(a: float, b: float, c: float, alpha: float, beta: float, gamma: float) -> np.ndarray: + """ + Convert cell constants to a 3x3 cell matrix. + + Parameters + ---------- + a : float + Cell length a in Angstrom. + b : float + Cell length b in Angstrom. + c : float + Cell length c in Angstrom. + alpha : float + Cell angle alpha in degrees. + beta : float + Cell angle beta in degrees. + gamma : float + Cell angle gamma in degrees. + + Returns + ------- + np.ndarray + The 3x3 cell matrix. + """ + alpha_rad = np.radians(alpha) + beta_rad = np.radians(beta) + gamma_rad = np.radians(gamma) + + cos_alpha = np.cos(alpha_rad) + cos_beta = np.cos(beta_rad) + cos_gamma = np.cos(gamma_rad) + sin_gamma = np.sin(gamma_rad) + + matrix = np.zeros((3, 3)) + matrix[0, 0] = a + matrix[0, 1] = b * cos_gamma + matrix[0, 2] = c * cos_beta + matrix[1, 1] = b * sin_gamma + matrix[1, 2] = c * (cos_alpha - cos_beta * cos_gamma) / sin_gamma + matrix[2, 2] = c * np.sqrt(1 - cos_beta**2 - ((cos_alpha - cos_beta * cos_gamma) / sin_gamma) ** 2) + + return matrix \ No newline at end of file diff --git a/qcrboxtools/cif/file_converter/tsc.py b/qcrboxtools/cif/file_converter/tsc.py new file mode 100644 index 0000000..4d09d52 --- /dev/null +++ b/qcrboxtools/cif/file_converter/tsc.py @@ -0,0 +1,510 @@ +import struct +from abc import ABC, abstractmethod +from collections.abc import Iterable +from pathlib import Path +from textwrap import wrap + +from typing import Dict, List, Tuple, Union + +import numpy as np +from iotbx.cif.model import block, loop + +from .cartesian import cell_constants_to_matrix +from ..read import read_cif_as_unified, cifdata_str_or_index + +def read_tsc_file(path: Path): + """ + Reads a TSC or TSCB file and returns the corresponding object. + Parameters + ---------- + path : Path + The path to the TSC or TSCB file. + + Returns + ------- + TSCFile or TSCBFile + The TSCFile or TSCBFile object representing the file content. + + Raises + ------ + ValueError + If the file cannot be read as either TSC or TSCB format. + """ + path = Path(path) + if path.suffix == ".tscb": + try: + return TSCBFile.from_file(path) + except Exception as exc: + try: + return TSCFile.from_file(path) + except Exception: + raise ValueError(f"Cannot read AFF file: {str(path)}") from exc + elif path.suffix == ".tsc": + try: + return TSCFile.from_file(path) + except Exception as exc: + try: + return TSCBFile.from_file(path) + except Exception: + raise ValueError(f"Cannot read AFF file: {str(path)}") from exc + + +def parse_header(header_str): + header = {} + header_split = iter(val.split(':') for val in header_str.strip().split('\n')) + + header_key = None + for line_split in header_split: + if len(line_split) == 2 and header_key is not None: + header[header_key] = header_entry + if len(line_split) == 2: + header_key, header_entry = line_split + else: + header_entry += '\n' + line_split[0] + header[header_key] = header_entry + return header + +def parse_tsc_data_line( + line: str + ) -> Tuple[Tuple[int, int, int], np.ndarray]: + """ + Parses a line of TSC data. + + Parameters + ---------- + line : str + The line of TSC data to parse. + + Returns + ------- + tuple + A tuple containing the indices h, k, l and the array of f0j values. + """ + h_str, k_str, l_str, *f0j_strs = line.split() + f0js = np.array([float(val.split(',')[0]) + 1j * float(val.split(',')[1]) for val in f0j_strs]) + return (int(h_str), int(k_str), int(l_str)), f0js + +class TSCBase(ABC): + def __init__(self): + self.header = { + 'TITLE': 'generic_tsc', + 'SYMM': 'expanded', + 'SCATTERERS' : '' + } + self.data = {} + + @property + def scatterers(self) -> List[str]: + """ + Retrieves scatterers from the TSC file as a list of strings generated + from the SCATTERERS header entry. + + Returns + ------- + list + A list of scatterer names. + """ + + return self.header['SCATTERERS'].strip().split() + + @scatterers.setter + def scatterers(self, scatterers: Iterable): + """ + Sets the scatterers in the TSC file. + + The input scatterers are converted to a space-separated string and + stored in the header under the key 'SCATTERERS'. + + Parameters + ---------- + scatterers : iterable + An iterable of scatterer names. + """ + self.header['SCATTERERS'] = ' '.join(str(val) for val in scatterers) + + def __getitem__( + self, + atom_site_label: Union[str, Iterable] + ) -> Dict[Tuple[int, int, int], np.ndarray]: + """ + Retrieves f0j values for a given atom site label. + + The function allows indexing the TSCFile object by atom site label or a + list of labels. If the given label is not found among the scatterers, + a ValueError is raised. + + Parameters + ---------- + atom_site_label : str or iterable + The atom site label or a list of labels to retrieve f0j values for. + + Returns + ------- + dict + A dictionary where each key is a tuple of indices (h, k, l) and the + corresponding value is a numpy array of f0j values for the given + label(s). + + Raises + ------ + ValueError + If an unknown atom site label is used for indexing. + """ + try: + if isinstance(atom_site_label, str): + index = self.scatterers.index(atom_site_label) + return {hkl: f0js[index] for hkl, f0js in self.data.items()} + elif isinstance(atom_site_label, Iterable): + indexes = np.array([self.scatterers.index(label) for label in atom_site_label]) + return {hkl: f0js[indexes] for hkl, f0js in self.data.items()} + else: + index = self.scatterers.index(atom_site_label) + return {hkl: f0js[index] for hkl, f0js in self.data.items()} + except ValueError as exc: + if isinstance(atom_site_label, str): + unknown = [atom_site_label] + elif isinstance(atom_site_label, Iterable): + unknown = [label for label in atom_site_label if label not in self.scatterers] + else: + unknown = [atom_site_label] + raise ValueError(f'Unknown atom label(s) used for lookup from TSCFile: {" ".join(unknown)}') from exc + + @classmethod + @abstractmethod + def from_file(cls, filename: Path): + pass + + @abstractmethod + def to_file(self, filename: Path): + pass + + def _construct_moiety_loop(self, structure_cif_block: block): + """ + Constructs a CIF loop containing moiety information from a given CIF block. + + Parameters + ---------- + structure_cif_block : block + The CIF block containing the structure information. + + Returns + ------- + loop + A CIF loop containing the moiety information. + """ + cell_a = float(structure_cif_block["_cell.length_a"]) + cell_b = float(structure_cif_block["_cell.length_b"]) + cell_c = float(structure_cif_block["_cell.length_c"]) + alpha = float(structure_cif_block["_cell.angle_alpha"]) + beta = float(structure_cif_block["_cell.angle_beta"]) + gamma = float(structure_cif_block["_cell.angle_gamma"]) + cell_mat_m = cell_constants_to_matrix(cell_a, cell_b, cell_c, alpha, beta, gamma) + fract_x = np.array([float(val) for val in structure_cif_block["_atom_site.fract_x"]]) + fract_y = np.array([float(val) for val in structure_cif_block["_atom_site.fract_y"]]) + fract_z = np.array([float(val) for val in structure_cif_block["_atom_site.fract_z"]]) + + xyz_fract = np.stack((fract_x, fract_y, fract_z), axis=-1) + xyz_cart = np.einsum("xy, zy -> zx", cell_mat_m, xyz_fract) + cart_x, cart_y, cart_z = xyz_cart.T + + n_atoms = len(cart_x) + + # TODO revisit this as more sophisticated moiety handling is implemented + moiety_loop_data = { + '_wfn_moiety.id': np.full(n_atoms, 1), + '_wfn_moiety.atom_id': np.arange(1, n_atoms + 1), + '_wfn_moiety.asu_atom_site_label': structure_cif_block['_atom_site.label'], + '_wfn_moiety.atom_type_symbol': structure_cif_block['_atom_site.type_symbol'], + '_wfn_moiety.symm_code': ['1_555'] * n_atoms, + '_wfn_moiety.cartn_x': list(cart_x), + '_wfn_moiety.cartn_y': list(cart_y), + '_wfn_moiety.cartn_z': list(cart_z), + '_wfn_moiety.aff_index': [self.scatterers.index(name) + 1 for name in structure_cif_block['_atom_site.label']] + } + + return loop(data=moiety_loop_data) + + def _construct_aff_loop(self): + def create_aff_line_string(values): + converted = [f"{val: 3.8f}" for val in values] + single_line = " ".join(converted) + return "[" + "\n".join(wrap(single_line, width=2047)) + "]" + + mil_hkl = np.asarray(list(self.data.keys())) + aff_loop_data = { + '_aspheric_ff.index_h': mil_hkl[:, 0].copy(), + '_aspheric_ff.index_k': mil_hkl[:, 1].copy(), + '_aspheric_ff.index_l': mil_hkl[:, 2].copy(), + '_aspheric_ff.form_factor_real': list(create_aff_line_string(line) for line in np.real(all_affs)), + '_aspheric_ff.form_factor_imag': list(create_aff_line_string(line) for line in np.imag(all_affs)), + } + return loop(data=aff_loop_data) + + def to_cif( + self, + structure_cif_block: block, + partitioning_source: str, + partitioning_name: str, + partitioning_software: str + ) -> block: + """ + Converts the TSC data to a CIF block format. + + Parameters + ---------- + structure_cif_block : block + The CIF block containing the structure information. + partitioning_source : str + The source of the partitioning. + partitioning_name : str + The name of the partitioning scheme employed. + partitioning_software : str + The software used for the partitioning. + + Returns + ------- + block + A CIF block containing the TSC data. + """ + tsc_block = block() + tsc_block.add_data_item("_cell.length_a", structure_cif_block["_cell.length_a"]) + tsc_block.add_data_item("_cell.length_b", structure_cif_block["_cell.length_b"]) + tsc_block.add_data_item("_cell.length_c", structure_cif_block["_cell.length_c"]) + tsc_block.add_data_item("_cell.angle_alpha", structure_cif_block["_cell.angle_alpha"]) + tsc_block.add_data_item("_cell.angle_beta", structure_cif_block["_cell.angle_beta"]) + tsc_block.add_data_item("_cell.angle_gamma", structure_cif_block["_cell.angle_gamma"]) + tsc_block.add_loop(self._construct_moiety_loop(structure_cif_block)) + tsc_block.add_data_item("_aspheric_ffs.source", partitioning_source) + tsc_block.add_data_item("_aspheric_ffs_partitioning.name", partitioning_name) + tsc_block.add_data_item("_aspheric_ffs_partitioning.software", partitioning_software) + tsc_block.add_loop(self._construct_aff_loop()) + + return tsc_block + + def populate_from_cif_block(self, cif_block: block): + """ + Populates the TSCFile object from a CIF block created by the TSC to cif export function. + Parameters + ---------- + cif_block : block + The CIF block containing the TSC data. + Raises + ------ + ValueError + If the CIF block does not contain the required entries. + """ + if '_aspheric_ffs.source' not in cif_block or '_aspheric_ffs_partitioning.name' not in cif_block or '_aspheric_ffs_partitioning.software' not in cif_block: + raise ValueError("CIF block does not contain required TSC entries.") + self.scatterers = cif_block['_wfn_moiety.asu_atom_site_label'] + aff_loop = cif_block.get_loop('_aspheric_ff.index_h') + if aff_loop is None: + raise ValueError("CIF block does not contain required TSC entries.") + hkl_tuples = tuple((int(h), int(k), int(l)) for h, k, l in zip(aff_loop['_aspheric_ff.index_h'], aff_loop['_aspheric_ff.index_k'], aff_loop['_aspheric_ff.index_l'])) + real_lines = aff_loop['_aspheric_ff.form_factor_real'] + imag_lines = aff_loop['_aspheric_ff.form_factor_imag'] + real_vals = np.fromiter((float(val) for line in real_lines for val in line.strip('[]').split()), dtype=np.float64) + imag_vals = np.fromiter((float(val) for line in imag_lines for val in line.strip('[]').split()), dtype=np.float64) + all_affs = real_vals + 1j * imag_vals + n_atoms = len(self.scatterers) + if len(all_affs) % n_atoms != 0: + raise ValueError("Number of AFF values is not a multiple of number of scatterers.") + all_affs = all_affs.reshape((-1, n_atoms)) + self.data = {hkl: affs for hkl, affs in zip(hkl_tuples, all_affs, strict=False)} + + + + + + +class TSCFile(TSCBase): + """ + A class representing a TSC file as defined in doi:10.48550/arXiv.1911.08847 + + A TSC file contains atomic form factors for a list of atoms and miller + indicees + + You can get data for atoms for example with tsc['C1'] or tsc[['C1', 'C2']] + currently setting is not implemented this way. All data is represented + in the data attribute + + Attributes + ---------- + header : dict + A dictionary holding the header information from the TSC file. + data : dict + A dictionary mapping tuples (h, k, l) to numpy arrays of f0j values, + where the ordering of the values is given by the content of the + scatterers property / the SCATTERERS entry in the header. + """ + + @classmethod + def from_file(cls, filename: Path) -> "TSCFile": + """ + Constructs a TSCFile object from a file. + + The function reads the TSC file, parses its header and data sections, + and constructs a TSCFile instance with these data. + + Parameters + ---------- + filename : Path + The name of the TSC file to read. + + Returns + ------- + TSCFile + A TSCFile instance with data loaded from the file. + """ + with open(filename, 'r') as fobj: + tsc_content = fobj.read() + header_str, data_str = tsc_content.split('DATA:\n') + + new_obj = cls() + + new_obj.header.update(parse_header(header_str)) + + parsed_iter = iter(parse_tsc_data_line(line) for line in data_str.strip().split('\n')) + + new_obj.data = {hkl: f0js for hkl, f0js in parsed_iter} + + return new_obj + + def to_file(self, filename: Path) -> None: + """ + Writes the TSCFile object to a file. + + The function formats the header and data sections of the TSCFile object + and writes them to a file. Currently no safety checks are implemented + SCATTERERS and data need to match + + Parameters + ---------- + filename : Path + The name of the file to write. + """ + header_str = '\n'.join(f'{key}: {value}' for key, value in self.header.items()) + data_iter = iter(f"{int(hkl[0])} {int(hkl[1])} {int(hkl[2])} {' '.join(f'{np.real(val):.8e},{np.imag(val):.8e}' for val in values)}" for hkl, values in self.data.items()) + data_str = '\n'.join(data_iter) + + with open(filename, 'w') as fobj: + fobj.write(f'{header_str}\nDATA:\n{data_str}\n') + + @classmethod + def from_cif_file(cls, cif_path: Path) -> "TSCFile": + """ + Constructs a TSCFile object from a CIF file created by the TSC to cif export function. + + Parameters + ---------- + filename : Path + The name of the CIF file to read. + + Returns + ------- + TSCFile + A TSCFile instance with data loaded from the CIF file. + """ + cif_block = read_cif_as_unified(cif_path, 0) + new_obj = cls() + new_obj.populate_from_cif_block(cif_block) + return new_obj + + +class TSCBFile(TSCBase): + """ + A class representing a TSCB file used by for example NoSpherA2 + + A TSC file contains atomic form factors for a list of atoms and miller + indicees + + You can get data for atoms for example with tsc['C1'] or tsc[['C1', 'C2']] + currently setting is not implemented this way. All data is represented + in the data attribute + + Attributes + ---------- + header : dict + A dictionary holding the header information from the TSC file. + data : dict + A dictionary mapping tuples (h, k, l) to numpy arrays of f0j values, + where the ordering of the values is given by the content of the + scatterers property / the SCATTERERS entry in the header. + """ + + @classmethod + def from_file(cls, filename: Path) -> "TSCBFile": + """ + Constructs a TSCFile object from a file. + + The function reads the TSC file, parses its header and data sections, + and constructs a TSCFile instance with these data. + + Parameters + ---------- + filename : Path + The name of the TSC file to read. + + Returns + ------- + TSCFile + A TSCBFile instance with data loaded from the file. + """ + new_obj = cls() + with open(filename, 'rb') as fobj: + additional_header_size, n_bytes_labels = struct.unpack('2i', fobj.read(8)) + if additional_header_size > 0: + header_str = fobj.read(additional_header_size).decode('ASCII') + + new_obj.header.update(parse_header(header_str)) + new_obj.header['SCATTERERS'] = fobj.read(n_bytes_labels).decode('ASCII') + + n_refln = struct.unpack('i', fobj.read(4))[0] + n_atoms = len(new_obj.header['SCATTERERS'].split()) + new_obj.data = { + tuple(np.frombuffer(fobj.read(12), dtype=np.int32)): np.frombuffer(fobj.read(n_atoms*16), dtype=np.complex128) for i in range(n_refln) + } + return new_obj + + def to_file(self, filename: Path) -> None: + """ + Writes the TSCBFile object to a file. + + The function formats the header and data sections of the TSCBFile object + and writes them to a file. Currently no safety checks are implemented + SCATTERERS and data need to match + + Parameters + ---------- + filename : str + The name of the file to write. + """ + if not next(iter(self.data.values())).dtype == np.complex128: + self.data = {key: value.astype(np.complex128) for key, value in self.data.items()} + omitted_header_entries = ('SCATTERERS', 'TITLE', 'SYMM') + header_string = '\n'.join(f'{name}: {entry}' for name, entry in self.header.items() if name not in omitted_header_entries) + with open(filename, 'wb') as fobj: + fobj.write(struct.pack('2i', len(header_string), len(self.header['SCATTERERS']))) + fobj.write(header_string.encode('ASCII')) + fobj.write(self.header['SCATTERERS'].encode('ASCII')) + fobj.write(struct.pack('i', len(self.data))) + fobj.write(bytes().join(struct.pack('3i', *hkl) + f0js.tobytes() for hkl, f0js in self.data.items())) + + @classmethod + def from_cif_file(cls, cif_path: Path) -> "TSCBFile": + """ + Constructs a TSCFile object from a CIF file created by the TSC to cif export function. + + Parameters + ---------- + filename : Path + The name of the CIF file to read. + + Returns + ------- + TSCBFile + A TSCBFile instance with data loaded from the CIF file. + """ + cif_block = read_cif_as_unified(cif_path, 0) + new_obj = cls() + new_obj.populate_from_cif_block(cif_block) + return new_obj \ No newline at end of file diff --git a/qcrboxtools/cif/read.py b/qcrboxtools/cif/read.py index 47ebf3f..a668205 100644 --- a/qcrboxtools/cif/read.py +++ b/qcrboxtools/cif/read.py @@ -64,7 +64,7 @@ def cifdata_str_or_index(model: cif.model.cif, dataset: Union[int, str]) -> Tupl def read_cif_as_unified( cif_path: Union[str, Path], - dataset: Optional[str] = None, + dataset: Optional[Union[str, int]] = None, convert_keywords: bool = True, custom_categories: Optional[List[str]] = None, split_sus: bool = True, From 9ef53c3ef98d575ae07ec55964140362bd103aa2 Mon Sep 17 00:00:00 2001 From: Niolon Date: Mon, 29 Sep 2025 10:55:25 +0100 Subject: [PATCH 2/6] improve formatting, fix some bugs --- qcrboxtools/cif/file_converter/cartesian.py | 3 +- qcrboxtools/cif/file_converter/tsc.py | 177 +++++++++++--------- 2 files changed, 96 insertions(+), 84 deletions(-) diff --git a/qcrboxtools/cif/file_converter/cartesian.py b/qcrboxtools/cif/file_converter/cartesian.py index 14c0386..8790b6e 100644 --- a/qcrboxtools/cif/file_converter/cartesian.py +++ b/qcrboxtools/cif/file_converter/cartesian.py @@ -1,5 +1,6 @@ import numpy as np + def cell_constants_to_matrix(a: float, b: float, c: float, alpha: float, beta: float, gamma: float) -> np.ndarray: """ Convert cell constants to a 3x3 cell matrix. @@ -41,4 +42,4 @@ def cell_constants_to_matrix(a: float, b: float, c: float, alpha: float, beta: f matrix[1, 2] = c * (cos_alpha - cos_beta * cos_gamma) / sin_gamma matrix[2, 2] = c * np.sqrt(1 - cos_beta**2 - ((cos_alpha - cos_beta * cos_gamma) / sin_gamma) ** 2) - return matrix \ No newline at end of file + return matrix diff --git a/qcrboxtools/cif/file_converter/tsc.py b/qcrboxtools/cif/file_converter/tsc.py index 4d09d52..fee98cd 100644 --- a/qcrboxtools/cif/file_converter/tsc.py +++ b/qcrboxtools/cif/file_converter/tsc.py @@ -3,14 +3,14 @@ from collections.abc import Iterable from pathlib import Path from textwrap import wrap - from typing import Dict, List, Tuple, Union import numpy as np from iotbx.cif.model import block, loop +from ..read import read_cif_as_unified from .cartesian import cell_constants_to_matrix -from ..read import read_cif_as_unified, cifdata_str_or_index + def read_tsc_file(path: Path): """ @@ -51,22 +51,22 @@ def read_tsc_file(path: Path): def parse_header(header_str): header = {} - header_split = iter(val.split(':') for val in header_str.strip().split('\n')) + header_split = iter(val.split(":") for val in header_str.strip().split("\n")) header_key = None + header_entry = "" for line_split in header_split: if len(line_split) == 2 and header_key is not None: header[header_key] = header_entry if len(line_split) == 2: header_key, header_entry = line_split else: - header_entry += '\n' + line_split[0] + header_entry += "\n" + line_split[0] header[header_key] = header_entry return header -def parse_tsc_data_line( - line: str - ) -> Tuple[Tuple[int, int, int], np.ndarray]: + +def parse_tsc_data_line(line: str) -> Tuple[Tuple[int, int, int], np.ndarray]: """ Parses a line of TSC data. @@ -80,17 +80,14 @@ def parse_tsc_data_line( tuple A tuple containing the indices h, k, l and the array of f0j values. """ - h_str, k_str, l_str, *f0j_strs = line.split() - f0js = np.array([float(val.split(',')[0]) + 1j * float(val.split(',')[1]) for val in f0j_strs]) + h_str, k_str, l_str, *f0j_strs = line.split() + f0js = np.array([float(val.split(",")[0]) + 1j * float(val.split(",")[1]) for val in f0j_strs]) return (int(h_str), int(k_str), int(l_str)), f0js + class TSCBase(ABC): def __init__(self): - self.header = { - 'TITLE': 'generic_tsc', - 'SYMM': 'expanded', - 'SCATTERERS' : '' - } + self.header = {"TITLE": "generic_tsc", "SYMM": "expanded", "SCATTERERS": ""} self.data = {} @property @@ -105,7 +102,7 @@ def scatterers(self) -> List[str]: A list of scatterer names. """ - return self.header['SCATTERERS'].strip().split() + return self.header["SCATTERERS"].strip().split() @scatterers.setter def scatterers(self, scatterers: Iterable): @@ -120,12 +117,9 @@ def scatterers(self, scatterers: Iterable): scatterers : iterable An iterable of scatterer names. """ - self.header['SCATTERERS'] = ' '.join(str(val) for val in scatterers) + self.header["SCATTERERS"] = " ".join(str(val) for val in scatterers) - def __getitem__( - self, - atom_site_label: Union[str, Iterable] - ) -> Dict[Tuple[int, int, int], np.ndarray]: + def __getitem__(self, atom_site_label: Union[str, Iterable]) -> Dict[Tuple[int, int, int], np.ndarray]: """ Retrieves f0j values for a given atom site label. @@ -167,7 +161,7 @@ def __getitem__( unknown = [label for label in atom_site_label if label not in self.scatterers] else: unknown = [atom_site_label] - raise ValueError(f'Unknown atom label(s) used for lookup from TSCFile: {" ".join(unknown)}') from exc + raise ValueError(f"Unknown atom label(s) used for lookup from TSCFile: {' '.join(unknown)}") from exc @classmethod @abstractmethod @@ -211,41 +205,40 @@ def _construct_moiety_loop(self, structure_cif_block: block): # TODO revisit this as more sophisticated moiety handling is implemented moiety_loop_data = { - '_wfn_moiety.id': np.full(n_atoms, 1), - '_wfn_moiety.atom_id': np.arange(1, n_atoms + 1), - '_wfn_moiety.asu_atom_site_label': structure_cif_block['_atom_site.label'], - '_wfn_moiety.atom_type_symbol': structure_cif_block['_atom_site.type_symbol'], - '_wfn_moiety.symm_code': ['1_555'] * n_atoms, - '_wfn_moiety.cartn_x': list(cart_x), - '_wfn_moiety.cartn_y': list(cart_y), - '_wfn_moiety.cartn_z': list(cart_z), - '_wfn_moiety.aff_index': [self.scatterers.index(name) + 1 for name in structure_cif_block['_atom_site.label']] + "_wfn_moiety.id": np.full(n_atoms, 1), + "_wfn_moiety.atom_id": np.arange(1, n_atoms + 1), + "_wfn_moiety.asu_atom_site_label": structure_cif_block["_atom_site.label"], + "_wfn_moiety.atom_type_symbol": structure_cif_block["_atom_site.type_symbol"], + "_wfn_moiety.symm_code": ["1_555"] * n_atoms, + "_wfn_moiety.cartn_x": list(cart_x), + "_wfn_moiety.cartn_y": list(cart_y), + "_wfn_moiety.cartn_z": list(cart_z), + "_wfn_moiety.aff_index": [ + self.scatterers.index(name) + 1 for name in structure_cif_block["_atom_site.label"] + ], } return loop(data=moiety_loop_data) - + def _construct_aff_loop(self): def create_aff_line_string(values): converted = [f"{val: 3.8f}" for val in values] single_line = " ".join(converted) return "[" + "\n".join(wrap(single_line, width=2047)) + "]" - + mil_hkl = np.asarray(list(self.data.keys())) + all_affs = np.array(list(self.data.values())) aff_loop_data = { - '_aspheric_ff.index_h': mil_hkl[:, 0].copy(), - '_aspheric_ff.index_k': mil_hkl[:, 1].copy(), - '_aspheric_ff.index_l': mil_hkl[:, 2].copy(), - '_aspheric_ff.form_factor_real': list(create_aff_line_string(line) for line in np.real(all_affs)), - '_aspheric_ff.form_factor_imag': list(create_aff_line_string(line) for line in np.imag(all_affs)), + "_aspheric_ff.index_h": mil_hkl[:, 0].copy(), + "_aspheric_ff.index_k": mil_hkl[:, 1].copy(), + "_aspheric_ff.index_l": mil_hkl[:, 2].copy(), + "_aspheric_ff.form_factor_real": list(create_aff_line_string(line) for line in np.real(all_affs)), + "_aspheric_ff.form_factor_imag": list(create_aff_line_string(line) for line in np.imag(all_affs)), } return loop(data=aff_loop_data) def to_cif( - self, - structure_cif_block: block, - partitioning_source: str, - partitioning_name: str, - partitioning_software: str + self, structure_cif_block: block, partitioning_source: str, partitioning_name: str, partitioning_software: str ) -> block: """ Converts the TSC data to a CIF block format. @@ -260,7 +253,7 @@ def to_cif( The name of the partitioning scheme employed. partitioning_software : str The software used for the partitioning. - + Returns ------- block @@ -280,7 +273,7 @@ def to_cif( tsc_block.add_loop(self._construct_aff_loop()) return tsc_block - + def populate_from_cif_block(self, cif_block: block): """ Populates the TSCFile object from a CIF block created by the TSC to cif export function. @@ -291,29 +284,36 @@ def populate_from_cif_block(self, cif_block: block): Raises ------ ValueError - If the CIF block does not contain the required entries. + If the CIF block does not contain the required entries. """ - if '_aspheric_ffs.source' not in cif_block or '_aspheric_ffs_partitioning.name' not in cif_block or '_aspheric_ffs_partitioning.software' not in cif_block: + if ( + "_aspheric_ffs.source" not in cif_block + or "_aspheric_ffs_partitioning.name" not in cif_block + or "_aspheric_ffs_partitioning.software" not in cif_block + ): raise ValueError("CIF block does not contain required TSC entries.") - self.scatterers = cif_block['_wfn_moiety.asu_atom_site_label'] - aff_loop = cif_block.get_loop('_aspheric_ff.index_h') + self.scatterers = cif_block["_wfn_moiety.asu_atom_site_label"] + aff_loop = cif_block.get_loop("_aspheric_ff.index_h") if aff_loop is None: raise ValueError("CIF block does not contain required TSC entries.") - hkl_tuples = tuple((int(h), int(k), int(l)) for h, k, l in zip(aff_loop['_aspheric_ff.index_h'], aff_loop['_aspheric_ff.index_k'], aff_loop['_aspheric_ff.index_l'])) - real_lines = aff_loop['_aspheric_ff.form_factor_real'] - imag_lines = aff_loop['_aspheric_ff.form_factor_imag'] - real_vals = np.fromiter((float(val) for line in real_lines for val in line.strip('[]').split()), dtype=np.float64) - imag_vals = np.fromiter((float(val) for line in imag_lines for val in line.strip('[]').split()), dtype=np.float64) + hkl_zip = zip( + aff_loop["_aspheric_ff.index_h"], aff_loop["_aspheric_ff.index_k"], aff_loop["_aspheric_ff.index_l"] + ) + hkl_tuples = tuple((int(mil_h), int(mil_k), int(mil_l)) for mil_h, mil_k, mil_l in hkl_zip) + real_lines = aff_loop["_aspheric_ff.form_factor_real"] + imag_lines = aff_loop["_aspheric_ff.form_factor_imag"] + real_vals = np.fromiter( + (float(val) for line in real_lines for val in line.strip("[]").split()), dtype=np.float64 + ) + imag_vals = np.fromiter( + (float(val) for line in imag_lines for val in line.strip("[]").split()), dtype=np.float64 + ) all_affs = real_vals + 1j * imag_vals n_atoms = len(self.scatterers) if len(all_affs) % n_atoms != 0: raise ValueError("Number of AFF values is not a multiple of number of scatterers.") all_affs = all_affs.reshape((-1, n_atoms)) self.data = {hkl: affs for hkl, affs in zip(hkl_tuples, all_affs, strict=False)} - - - - class TSCFile(TSCBase): @@ -355,15 +355,15 @@ def from_file(cls, filename: Path) -> "TSCFile": TSCFile A TSCFile instance with data loaded from the file. """ - with open(filename, 'r') as fobj: + with open(filename, "r") as fobj: tsc_content = fobj.read() - header_str, data_str = tsc_content.split('DATA:\n') + header_str, data_str = tsc_content.split("DATA:\n") new_obj = cls() new_obj.header.update(parse_header(header_str)) - parsed_iter = iter(parse_tsc_data_line(line) for line in data_str.strip().split('\n')) + parsed_iter = iter(parse_tsc_data_line(line) for line in data_str.strip().split("\n")) new_obj.data = {hkl: f0js for hkl, f0js in parsed_iter} @@ -382,12 +382,18 @@ def to_file(self, filename: Path) -> None: filename : Path The name of the file to write. """ - header_str = '\n'.join(f'{key}: {value}' for key, value in self.header.items()) - data_iter = iter(f"{int(hkl[0])} {int(hkl[1])} {int(hkl[2])} {' '.join(f'{np.real(val):.8e},{np.imag(val):.8e}' for val in values)}" for hkl, values in self.data.items()) - data_str = '\n'.join(data_iter) - - with open(filename, 'w') as fobj: - fobj.write(f'{header_str}\nDATA:\n{data_str}\n') + header_str = "\n".join(f"{key}: {value}" for key, value in self.header.items()) + data_iter = iter( + ( + f"{int(hkl[0])} {int(hkl[1])} {int(hkl[2])} " + + f"{' '.join(f'{np.real(val):.8e},{np.imag(val):.8e}' for val in values)}" + ) + for hkl, values in self.data.items() + ) + data_str = "\n".join(data_iter) + + with open(filename, "w") as fobj: + fobj.write(f"{header_str}\nDATA:\n{data_str}\n") @classmethod def from_cif_file(cls, cif_path: Path) -> "TSCFile": @@ -409,7 +415,7 @@ def from_cif_file(cls, cif_path: Path) -> "TSCFile": new_obj.populate_from_cif_block(cif_block) return new_obj - + class TSCBFile(TSCBase): """ A class representing a TSCB file used by for example NoSpherA2 @@ -450,18 +456,21 @@ def from_file(cls, filename: Path) -> "TSCBFile": A TSCBFile instance with data loaded from the file. """ new_obj = cls() - with open(filename, 'rb') as fobj: - additional_header_size, n_bytes_labels = struct.unpack('2i', fobj.read(8)) + with open(filename, "rb") as fobj: + additional_header_size, n_bytes_labels = struct.unpack("2i", fobj.read(8)) if additional_header_size > 0: - header_str = fobj.read(additional_header_size).decode('ASCII') + header_str = fobj.read(additional_header_size).decode("ASCII") new_obj.header.update(parse_header(header_str)) - new_obj.header['SCATTERERS'] = fobj.read(n_bytes_labels).decode('ASCII') + new_obj.header["SCATTERERS"] = fobj.read(n_bytes_labels).decode("ASCII") - n_refln = struct.unpack('i', fobj.read(4))[0] - n_atoms = len(new_obj.header['SCATTERERS'].split()) + n_refln = struct.unpack("i", fobj.read(4))[0] + n_atoms = len(new_obj.header["SCATTERERS"].split()) new_obj.data = { - tuple(np.frombuffer(fobj.read(12), dtype=np.int32)): np.frombuffer(fobj.read(n_atoms*16), dtype=np.complex128) for i in range(n_refln) + tuple(np.frombuffer(fobj.read(12), dtype=np.int32)): np.frombuffer( + fobj.read(n_atoms * 16), dtype=np.complex128 + ) + for i in range(n_refln) } return new_obj @@ -480,14 +489,16 @@ def to_file(self, filename: Path) -> None: """ if not next(iter(self.data.values())).dtype == np.complex128: self.data = {key: value.astype(np.complex128) for key, value in self.data.items()} - omitted_header_entries = ('SCATTERERS', 'TITLE', 'SYMM') - header_string = '\n'.join(f'{name}: {entry}' for name, entry in self.header.items() if name not in omitted_header_entries) - with open(filename, 'wb') as fobj: - fobj.write(struct.pack('2i', len(header_string), len(self.header['SCATTERERS']))) - fobj.write(header_string.encode('ASCII')) - fobj.write(self.header['SCATTERERS'].encode('ASCII')) - fobj.write(struct.pack('i', len(self.data))) - fobj.write(bytes().join(struct.pack('3i', *hkl) + f0js.tobytes() for hkl, f0js in self.data.items())) + omitted_header_entries = ("SCATTERERS", "TITLE", "SYMM") + header_string = "\n".join( + f"{name}: {entry}" for name, entry in self.header.items() if name not in omitted_header_entries + ) + with open(filename, "wb") as fobj: + fobj.write(struct.pack("2i", len(header_string), len(self.header["SCATTERERS"]))) + fobj.write(header_string.encode("ASCII")) + fobj.write(self.header["SCATTERERS"].encode("ASCII")) + fobj.write(struct.pack("i", len(self.data))) + fobj.write(bytes().join(struct.pack("3i", *hkl) + f0js.tobytes() for hkl, f0js in self.data.items())) @classmethod def from_cif_file(cls, cif_path: Path) -> "TSCBFile": @@ -507,4 +518,4 @@ def from_cif_file(cls, cif_path: Path) -> "TSCBFile": cif_block = read_cif_as_unified(cif_path, 0) new_obj = cls() new_obj.populate_from_cif_block(cif_block) - return new_obj \ No newline at end of file + return new_obj From 167dbe9982fdc280a39180e6d70258c08f363489 Mon Sep 17 00:00:00 2001 From: Niolon Date: Mon, 29 Sep 2025 11:48:12 +0100 Subject: [PATCH 3/6] Add tests for and improve the tsc handling --- qcrboxtools/cif/file_converter/tsc.py | 15 +- tests/cif/convert/test_tsc.py | 575 ++++++++++++++++++++++++++ 2 files changed, 580 insertions(+), 10 deletions(-) create mode 100644 tests/cif/convert/test_tsc.py diff --git a/qcrboxtools/cif/file_converter/tsc.py b/qcrboxtools/cif/file_converter/tsc.py index fee98cd..553b6e1 100644 --- a/qcrboxtools/cif/file_converter/tsc.py +++ b/qcrboxtools/cif/file_converter/tsc.py @@ -145,19 +145,14 @@ def __getitem__(self, atom_site_label: Union[str, Iterable]) -> Dict[Tuple[int, If an unknown atom site label is used for indexing. """ try: - if isinstance(atom_site_label, str): - index = self.scatterers.index(atom_site_label) - return {hkl: f0js[index] for hkl, f0js in self.data.items()} - elif isinstance(atom_site_label, Iterable): + if isinstance(atom_site_label, Iterable) and not isinstance(atom_site_label, str): indexes = np.array([self.scatterers.index(label) for label in atom_site_label]) return {hkl: f0js[indexes] for hkl, f0js in self.data.items()} else: index = self.scatterers.index(atom_site_label) return {hkl: f0js[index] for hkl, f0js in self.data.items()} except ValueError as exc: - if isinstance(atom_site_label, str): - unknown = [atom_site_label] - elif isinstance(atom_site_label, Iterable): + if isinstance(atom_site_label, Iterable) and not isinstance(atom_site_label, str): unknown = [label for label in atom_site_label if label not in self.scatterers] else: unknown = [atom_site_label] @@ -293,9 +288,9 @@ def populate_from_cif_block(self, cif_block: block): ): raise ValueError("CIF block does not contain required TSC entries.") self.scatterers = cif_block["_wfn_moiety.asu_atom_site_label"] - aff_loop = cif_block.get_loop("_aspheric_ff.index_h") + aff_loop = cif_block.get_loop("_aspheric_ff") if aff_loop is None: - raise ValueError("CIF block does not contain required TSC entries.") + raise ValueError("CIF block does not contain required TSC entries for the loop _aspheric_ff.") hkl_zip = zip( aff_loop["_aspheric_ff.index_h"], aff_loop["_aspheric_ff.index_k"], aff_loop["_aspheric_ff.index_l"] ) @@ -517,5 +512,5 @@ def from_cif_file(cls, cif_path: Path) -> "TSCBFile": """ cif_block = read_cif_as_unified(cif_path, 0) new_obj = cls() - new_obj.populate_from_cif_block(cif_block) + new_obj.populate_from_cif_block(cif_block) return new_obj diff --git a/tests/cif/convert/test_tsc.py b/tests/cif/convert/test_tsc.py new file mode 100644 index 0000000..be16321 --- /dev/null +++ b/tests/cif/convert/test_tsc.py @@ -0,0 +1,575 @@ +""" +Test module for `qcrboxtools.cif.file_converter.tsc`. + +This module contains unit tests for TSC and TSCB file reading, parsing, +and conversion functionality. +""" + +import struct +from pathlib import Path +from unittest.mock import patch + +import numpy as np +import pytest + +from qcrboxtools.cif.file_converter.tsc import ( + TSCBFile, + TSCFile, + parse_header, + parse_tsc_data_line, + read_tsc_file, +) + + +@pytest.fixture +def sample_tsc_content(): + """Sample TSC file content for testing.""" + return """TITLE: Test TSC File +SYMM: expanded +SCATTERERS: C1 C2 O1 +DATA: +1 0 0 1.23450000e+00,0.00000000e+00 2.34560000e+00,1.00000000e+00 3.45670000e+00,-1.00000000e+00 +0 1 0 4.56780000e+00,0.50000000e+00 5.67890000e+00,-0.50000000e+00 6.78900000e+00,0.25000000e+00 +""" + + +@pytest.fixture +def sample_header_string(): + """Sample header string for testing.""" + return """TITLE: Test Header +SYMM: expanded +SCATTERERS: C1 C2 O1 +MULTI_LINE: This is a +multi-line entry +that spans several lines""" + + +@pytest.fixture +def sample_tscb_file(tmp_path): + """Create a sample TSCB file for testing.""" + tscb_path = tmp_path / "test.tscb" + + # Create minimal TSCB file content + header_str = "TITLE: Test TSCB\nSYMM: expanded" + scatterers_str = "C1 C2 O1" + + with open(tscb_path, "wb") as f: + # Write header size and scatterers size + f.write(struct.pack("2i", len(header_str), len(scatterers_str))) + # Write header and scatterers + f.write(header_str.encode("ASCII")) + f.write(scatterers_str.encode("ASCII")) + # Write number of reflections + f.write(struct.pack("i", 2)) + + # Write reflection data: hkl + form factors for 3 atoms + # Reflection 1: (1,0,0) + f.write(struct.pack("3i", 1, 0, 0)) + f.write(np.array([1.0+0.0j, 2.0+1.0j, 3.0-1.0j], dtype=np.complex128).tobytes()) + + # Reflection 2: (0,1,0) + f.write(struct.pack("3i", 0, 1, 0)) + f.write(np.array([4.0+0.5j, 5.0-0.5j, 6.0+0.25j], dtype=np.complex128).tobytes()) + + return tscb_path + + +def test_parse_header(sample_header_string): + """Test header parsing functionality.""" + header = parse_header(sample_header_string) + + assert header["TITLE"] == " Test Header" + assert header["SYMM"] == " expanded" + assert header["SCATTERERS"] == " C1 C2 O1" + assert "This is a\nmulti-line entry\nthat spans several lines" in header["MULTI_LINE"] + + +def test_parse_header_empty(): + """Test header parsing with empty string.""" + header = parse_header("") + assert header == {None: "\n"} + + +def test_parse_tsc_data_line(): + """Test parsing of TSC data lines.""" + line = "1 0 0 1.23450000e+00,0.00000000e+00 2.34560000e+00,1.00000000e+00" + hkl, f0js = parse_tsc_data_line(line) + + assert hkl == (1, 0, 0) + assert len(f0js) == 2 + assert np.isclose(f0js[0], 1.2345 + 0.0j) + assert np.isclose(f0js[1], 2.3456 + 1.0j) + + +def test_parse_tsc_data_line_negative_indices(): + """Test parsing TSC data line with negative Miller indices.""" + line = "-1 2 -3 1.00000000e+00,2.00000000e+00" + hkl, f0js = parse_tsc_data_line(line) + + assert hkl == (-1, 2, -3) + assert len(f0js) == 1 + assert np.isclose(f0js[0], 1.0 + 2.0j) + + +def test_read_tsc_file_tsc_extension(tmp_path, sample_tsc_content): + """Test reading a .tsc file.""" + tsc_path = tmp_path / "test.tsc" + tsc_path.write_text(sample_tsc_content) + + result = read_tsc_file(tsc_path) + + assert isinstance(result, TSCFile) + assert result.scatterers == ["C1", "C2", "O1"] + assert (1, 0, 0) in result.data + assert (0, 1, 0) in result.data + + +def test_read_tsc_file_tscb_extension(sample_tscb_file): + """Test reading a .tscb file.""" + result = read_tsc_file(sample_tscb_file) + + assert isinstance(result, TSCBFile) + assert result.scatterers == ["C1", "C2", "O1"] + assert (1, 0, 0) in result.data + assert (0, 1, 0) in result.data + + +def test_read_tsc_file_invalid_tsc(tmp_path): + """Test reading invalid TSC file raises ValueError.""" + invalid_tsc = tmp_path / "invalid.tsc" + invalid_tsc.write_text("This is not a valid TSC file") + + with pytest.raises(ValueError, match="Cannot read AFF file"): + read_tsc_file(invalid_tsc) + + +def test_read_tsc_file_invalid_tscb(tmp_path): + """Test reading invalid TSCB file raises ValueError.""" + invalid_tscb = tmp_path / "invalid.tscb" + invalid_tscb.write_bytes(b"This is not a valid TSCB file") + + with pytest.raises(ValueError, match="Cannot read AFF file"): + read_tsc_file(invalid_tscb) + + +def test_tsc_file_scatterers_property(): + """Test TSCFile scatterers property.""" + tsc = TSCFile() + tsc.header["SCATTERERS"] = "C1 C2 O1 N1" + + assert tsc.scatterers == ["C1", "C2", "O1", "N1"] + + +def test_tsc_file_scatterers_setter(): + """Test TSCFile scatterers setter.""" + tsc = TSCFile() + tsc.scatterers = ["C1", "C2", "O1"] + + assert tsc.header["SCATTERERS"] == "C1 C2 O1" + + +def test_tsc_file_getitem_single_atom(): + """Test TSCFile indexing with single atom label.""" + tsc = TSCFile() + tsc.scatterers = ["C1", "C2", "O1"] + tsc.data = { + (1, 0, 0): np.array([1.0+0.0j, 2.0+1.0j, 3.0-1.0j]), + (0, 1, 0): np.array([4.0+0.5j, 5.0-0.5j, 6.0+0.25j]) + } + + result = tsc["C1"] + expected = { + (1, 0, 0): 1.0+0.0j, + (0, 1, 0): 4.0+0.5j + } + + assert len(result) == 2 + for hkl, value in expected.items(): + assert np.isclose(result[hkl], value) + + +def test_tsc_file_getitem_multiple_atoms(): + """Test TSCFile indexing with multiple atom labels.""" + tsc = TSCFile() + tsc.scatterers = ["C1", "C2", "O1"] + tsc.data = { + (1, 0, 0): np.array([1.0+0.0j, 2.0+1.0j, 3.0-1.0j]), + } + + result = tsc[["C1", "O1"]] + expected_values = np.array([1.0+0.0j, 3.0-1.0j]) + + assert len(result) == 1 + assert np.allclose(result[(1, 0, 0)], expected_values) + + +def test_tsc_file_getitem_unknown_atom(): + """Test TSCFile indexing with unknown atom raises ValueError.""" + tsc = TSCFile() + tsc.scatterers = ["C1", "C2"] + + with pytest.raises(ValueError, match="Unknown atom label.*O1"): + tsc["O1"] + + +def test_tsc_file_from_file(tmp_path, sample_tsc_content): + """Test TSCFile.from_file method.""" + tsc_path = tmp_path / "test.tsc" + tsc_path.write_text(sample_tsc_content) + + tsc = TSCFile.from_file(tsc_path) + + assert tsc.header["TITLE"] == " Test TSC File" + assert tsc.scatterers == ["C1", "C2", "O1"] + assert len(tsc.data) == 2 + + +def test_tsc_file_to_file(tmp_path): + """Test TSCFile.to_file method.""" + tsc = TSCFile() + tsc.header = {"TITLE": "Test", "SYMM": "expanded", "SCATTERERS": "C1 C2"} + tsc.data = {(1, 0, 0): np.array([1.0+0.0j, 2.0+1.0j])} + + output_path = tmp_path / "output.tsc" + tsc.to_file(output_path) + + # Verify file was written correctly + content = output_path.read_text() + assert "TITLE: Test" in content + assert "DATA:" in content + assert "1 0 0" in content + + +def test_tscb_file_from_file(sample_tscb_file): + """Test TSCBFile.from_file method.""" + tscb = TSCBFile.from_file(sample_tscb_file) + + assert tscb.scatterers == ["C1", "C2", "O1"] + assert len(tscb.data) == 2 + assert (1, 0, 0) in tscb.data + assert len(tscb.data[(1, 0, 0)]) == 3 + + +def test_tscb_file_to_file(tmp_path): + """Test TSCBFile.to_file method.""" + tscb = TSCBFile() + tscb.header = {"TITLE": "Test", "SYMM": "expanded", "SCATTERERS": "C1 C2"} + tscb.data = {(1, 0, 0): np.array([1.0+0.0j, 2.0+1.0j], dtype=np.complex128)} + + output_path = tmp_path / "output.tscb" + tscb.to_file(output_path) + + # Verify file exists and has content + assert output_path.exists() + assert output_path.stat().st_size > 0 + + # Try to read it back + tscb_read = TSCBFile.from_file(output_path) + assert tscb_read.scatterers == ["C1", "C2"] + assert (1, 0, 0) in tscb_read.data + + +def test_tscb_file_empty_header(tmp_path): + """Test TSCBFile with empty additional header.""" + tscb_path = tmp_path / "test_empty_header.tscb" + scatterers_str = "C1" + + with open(tscb_path, "wb") as f: + # Write zero header size + f.write(struct.pack("2i", 0, len(scatterers_str))) + f.write(scatterers_str.encode("ASCII")) + f.write(struct.pack("i", 1)) # One reflection + f.write(struct.pack("3i", 1, 0, 0)) + f.write(np.array([1.0+0.0j], dtype=np.complex128).tobytes()) + + tscb = TSCBFile.from_file(tscb_path) + assert tscb.scatterers == ["C1"] + assert len(tscb.data) == 1 + +@pytest.fixture +def sample_structure_cif_content(): + """Sample structure CIF content for testing.""" + return """ +data_test +_cell.length_a 10.000 +_cell.length_b 12.000 +_cell.length_c 8.000 +_cell.angle_alpha 90.0 +_cell.angle_beta 95.0 +_cell.angle_gamma 90.0 + +loop_ +_atom_site.label +_atom_site.type_symbol +_atom_site.fract_x +_atom_site.fract_y +_atom_site.fract_z +C1 C 0.1 0.2 0.3 +C2 C 0.4 0.5 0.6 +O1 O 0.7 0.8 0.9 +""" + + +@pytest.fixture +def sample_tsc_cif_content(): + """Sample TSC-generated CIF content for testing.""" + return """ +data_test +_cell.length_a 10.000 +_cell.length_b 12.000 +_cell.length_c 8.000 +_cell.angle_alpha 90.0 +_cell.angle_beta 95.0 +_cell.angle_gamma 90.0 + +loop_ +_wfn_moiety.id +_wfn_moiety.atom_id +_wfn_moiety.asu_atom_site_label +_wfn_moiety.atom_type_symbol +_wfn_moiety.symm_code +_wfn_moiety.cartn_x +_wfn_moiety.cartn_y +_wfn_moiety.cartn_z +_wfn_moiety.aff_index +1 1 C1 C 1_555 1.0 2.4 2.4 1 +1 2 C2 C 1_555 4.0 6.0 4.8 2 +1 3 O1 O 1_555 7.0 9.6 7.2 3 + +_aspheric_ffs.source 'test_source' +_aspheric_ffs_partitioning.name 'test_partitioning' +_aspheric_ffs_partitioning.software 'test_software' + +loop_ +_aspheric_ff.index_h +_aspheric_ff.index_k +_aspheric_ff.index_l +_aspheric_ff.form_factor_real +_aspheric_ff.form_factor_imag +1 0 0 '[1.00000000 2.00000000 3.00000000]' '[0.00000000 1.00000000 -1.00000000]' +0 1 0 '[4.00000000 5.00000000 6.00000000]' '[0.50000000 -0.50000000 0.25000000]' +""" + + +@pytest.fixture +def structure_cif_block(tmp_path, sample_structure_cif_content): + """Create a structure CIF file and return the first block.""" + cif_path = tmp_path / "structure.cif" + cif_path.write_text(sample_structure_cif_content) + + from qcrboxtools.cif.read import read_cif_as_unified + return read_cif_as_unified(cif_path, 0) + + +@pytest.fixture +def tsc_cif_block(tmp_path, sample_tsc_cif_content): + """Create a TSC CIF file and return the first block.""" + cif_path = tmp_path / "tsc.cif" + cif_path.write_text(sample_tsc_cif_content) + + from qcrboxtools.cif.read import read_cif_as_unified + return read_cif_as_unified(cif_path, 0) + + +def test_tsc_to_cif_conversion(structure_cif_block): + """Test converting TSC data to CIF format.""" + # Create a TSC file with test data + tsc = TSCFile() + tsc.scatterers = ["C1", "C2", "O1"] + tsc.data = { + (1, 0, 0): np.array([1.0+0.0j, 2.0+1.0j, 3.0-1.0j]), + (0, 1, 0): np.array([4.0+0.5j, 5.0-0.5j, 6.0+0.25j]) + } + + # Convert to CIF + cif_block = tsc.to_cif( + structure_cif_block, + partitioning_source="test_source", + partitioning_name="test_partitioning", + partitioning_software="test_software" + ) + + # Verify cell parameters are preserved + assert cif_block["_cell.length_a"] == "10.000" + assert cif_block["_cell.length_b"] == "12.000" + assert cif_block["_cell.angle_beta"] == "95.0" + + # Verify partitioning metadata + assert cif_block["_aspheric_ffs.source"] == "test_source" + assert cif_block["_aspheric_ffs_partitioning.name"] == "test_partitioning" + assert cif_block["_aspheric_ffs_partitioning.software"] == "test_software" + + # Verify moiety loop exists + moiety_loop = cif_block.get_loop("_wfn_moiety") + assert moiety_loop is not None + assert len(moiety_loop["_wfn_moiety.atom_id"]) == 3 + + # Verify AFF loop exists + aff_loop = cif_block.get_loop("_aspheric_ff") + assert aff_loop is not None + assert len(aff_loop["_aspheric_ff.index_h"]) == 2 + + +def test_tsc_populate_from_cif_block(tsc_cif_block): + """Test populating TSC from CIF block.""" + tsc = TSCFile() + tsc.populate_from_cif_block(tsc_cif_block) + + # Verify scatterers + assert tsc.scatterers == ["C1", "C2", "O1"] + + # Verify data was loaded correctly + assert len(tsc.data) == 2 + assert (1, 0, 0) in tsc.data + assert (0, 1, 0) in tsc.data + + # Verify form factor values + hkl_100_data = tsc.data[(1, 0, 0)] + assert np.isclose(hkl_100_data[0], 1.0+0.0j) + assert np.isclose(hkl_100_data[1], 2.0+1.0j) + assert np.isclose(hkl_100_data[2], 3.0-1.0j) + + +def test_tsc_from_cif_file(tmp_path, sample_tsc_cif_content): + """Test creating TSC from CIF file.""" + cif_path = tmp_path / "test.cif" + cif_path.write_text(sample_tsc_cif_content) + + tsc = TSCFile.from_cif_file(cif_path) + + assert tsc.scatterers == ["C1", "C2", "O1"] + assert len(tsc.data) == 2 + assert (1, 0, 0) in tsc.data + + +def test_tscb_from_cif_file(tmp_path, sample_tsc_cif_content): + """Test creating TSCB from CIF file.""" + cif_path = tmp_path / "test.cif" + cif_path.write_text(sample_tsc_cif_content) + + tscb = TSCBFile.from_cif_file(cif_path) + + assert tscb.scatterers == ["C1", "C2", "O1"] + assert len(tscb.data) == 2 + assert (1, 0, 0) in tscb.data + + +def test_populate_from_cif_block_missing_entries(): + """Test error handling when CIF block is missing required entries.""" + from iotbx.cif.model import block + + # Create incomplete block missing required entries + incomplete_block = block() + incomplete_block.add_data_item("_cell.length_a", "10.0") + + tsc = TSCFile() + + with pytest.raises(ValueError, match="CIF block does not contain required TSC entries"): + tsc.populate_from_cif_block(incomplete_block) + + +def test_populate_from_cif_block_missing_aff_loop(): + """Test error handling when AFF loop is missing.""" + from iotbx.cif.model import block + + # Create block with metadata but no AFF loop + incomplete_block = block() + incomplete_block.add_data_item("_aspheric_ffs.source", "test") + incomplete_block.add_data_item("_aspheric_ffs_partitioning.name", "test") + incomplete_block.add_data_item("_aspheric_ffs_partitioning.software", "test") + + tsc = TSCFile() + + with pytest.raises(KeyError): + tsc.populate_from_cif_block(incomplete_block) + + +def test_populate_from_cif_mismatched_atom_count(): + """Test error when AFF values don't match atom count.""" + from iotbx.cif.model import block, loop + + # Create block with mismatched data + test_block = block() + test_block.add_data_item("_aspheric_ffs.source", "test") + test_block.add_data_item("_aspheric_ffs_partitioning.name", "test") + test_block.add_data_item("_aspheric_ffs_partitioning.software", "test") + + # Moiety loop with 2 atoms + moiety_data = { + "_wfn_moiety.asu_atom_site_label": ["C1", "C2"] + } + test_block.add_loop(loop(data=moiety_data)) + + # AFF loop with 3 values (mismatch) + aff_data = { + "_aspheric_ff.index_h": [1], + "_aspheric_ff.index_k": [0], + "_aspheric_ff.index_l": [0], + "_aspheric_ff.form_factor_real": ["[1.0 2.0 3.0]"], # 3 values + "_aspheric_ff.form_factor_imag": ["[0.0 1.0 -1.0]"] # 3 values + } + test_block.add_loop(loop(data=aff_data)) + + tsc = TSCFile() + + with pytest.raises(ValueError, match="Number of AFF values is not a multiple of number of scatterers"): + tsc.populate_from_cif_block(test_block) + + +def test_construct_aff_loop_formatting(): + """Test that AFF loop formats form factors correctly.""" + tsc = TSCFile() + tsc.data = { + (1, 0, 0): np.array([1.23456789+0.0j, 2.345+1.0j]), + (-1, 2, -3): np.array([4.567-0.5j, 6.789+0.25j]) + } + + aff_loop = tsc._construct_aff_loop() + + # Verify structure + assert len(aff_loop["_aspheric_ff.index_h"]) == 2 + assert aff_loop["_aspheric_ff.index_h"][0] == "1" + assert aff_loop["_aspheric_ff.index_k"][1] == "2" + assert aff_loop["_aspheric_ff.index_l"][1] == "-3" + + # Verify formatting (should be wrapped in brackets with proper precision) + real_line = aff_loop["_aspheric_ff.form_factor_real"][0] + assert real_line.startswith("[") + assert real_line.endswith("]") + assert "1.23456789" in real_line + + +def test_round_trip_tsc_cif_conversion(tmp_path, structure_cif_block): + """Test round-trip TSC -> CIF -> TSC conversion preserves data.""" + # Create original TSC + original_tsc = TSCFile() + original_tsc.scatterers = ["C1", "C2", "O1"] + original_tsc.data = { + (1, 0, 0): np.array([1.0+0.0j, 2.0+1.0j, 3.0-1.0j]), + (0, 1, 0): np.array([4.0+0.5j, 5.0-0.5j, 6.0+0.25j]), + (-1, -2, 3): np.array([7.0+2.0j, 8.0-2.0j, 9.0+0.1j]) + } + + # Convert to CIF + cif_block = original_tsc.to_cif( + structure_cif_block, + partitioning_source="test", + partitioning_name="test", + partitioning_software="test" + ) + + # Convert back to TSC + reconstructed_tsc = TSCFile() + reconstructed_tsc.populate_from_cif_block(cif_block) + + # Verify data is preserved + assert reconstructed_tsc.scatterers == original_tsc.scatterers + assert len(reconstructed_tsc.data) == len(original_tsc.data) + + for hkl in original_tsc.data: + assert hkl in reconstructed_tsc.data + np.testing.assert_allclose( + reconstructed_tsc.data[hkl], + original_tsc.data[hkl], + rtol=1e-6 + ) \ No newline at end of file From 2c834dd1ba1084400f2031b0342362b829a22273 Mon Sep 17 00:00:00 2001 From: Niolon Date: Mon, 29 Sep 2025 11:50:15 +0100 Subject: [PATCH 4/6] linting for tsc file handling and tests --- qcrboxtools/cif/file_converter/tsc.py | 2 +- tests/cif/convert/test_tsc.py | 195 ++++++++++++-------------- 2 files changed, 93 insertions(+), 104 deletions(-) diff --git a/qcrboxtools/cif/file_converter/tsc.py b/qcrboxtools/cif/file_converter/tsc.py index 553b6e1..2cd8fc2 100644 --- a/qcrboxtools/cif/file_converter/tsc.py +++ b/qcrboxtools/cif/file_converter/tsc.py @@ -512,5 +512,5 @@ def from_cif_file(cls, cif_path: Path) -> "TSCBFile": """ cif_block = read_cif_as_unified(cif_path, 0) new_obj = cls() - new_obj.populate_from_cif_block(cif_block) + new_obj.populate_from_cif_block(cif_block) return new_obj diff --git a/tests/cif/convert/test_tsc.py b/tests/cif/convert/test_tsc.py index be16321..f404728 100644 --- a/tests/cif/convert/test_tsc.py +++ b/tests/cif/convert/test_tsc.py @@ -6,8 +6,6 @@ """ import struct -from pathlib import Path -from unittest.mock import patch import numpy as np import pytest @@ -48,11 +46,11 @@ def sample_header_string(): def sample_tscb_file(tmp_path): """Create a sample TSCB file for testing.""" tscb_path = tmp_path / "test.tscb" - + # Create minimal TSCB file content header_str = "TITLE: Test TSCB\nSYMM: expanded" scatterers_str = "C1 C2 O1" - + with open(tscb_path, "wb") as f: # Write header size and scatterers size f.write(struct.pack("2i", len(header_str), len(scatterers_str))) @@ -61,23 +59,23 @@ def sample_tscb_file(tmp_path): f.write(scatterers_str.encode("ASCII")) # Write number of reflections f.write(struct.pack("i", 2)) - + # Write reflection data: hkl + form factors for 3 atoms # Reflection 1: (1,0,0) f.write(struct.pack("3i", 1, 0, 0)) - f.write(np.array([1.0+0.0j, 2.0+1.0j, 3.0-1.0j], dtype=np.complex128).tobytes()) - - # Reflection 2: (0,1,0) + f.write(np.array([1.0 + 0.0j, 2.0 + 1.0j, 3.0 - 1.0j], dtype=np.complex128).tobytes()) + + # Reflection 2: (0,1,0) f.write(struct.pack("3i", 0, 1, 0)) - f.write(np.array([4.0+0.5j, 5.0-0.5j, 6.0+0.25j], dtype=np.complex128).tobytes()) - + f.write(np.array([4.0 + 0.5j, 5.0 - 0.5j, 6.0 + 0.25j], dtype=np.complex128).tobytes()) + return tscb_path def test_parse_header(sample_header_string): """Test header parsing functionality.""" header = parse_header(sample_header_string) - + assert header["TITLE"] == " Test Header" assert header["SYMM"] == " expanded" assert header["SCATTERERS"] == " C1 C2 O1" @@ -94,7 +92,7 @@ def test_parse_tsc_data_line(): """Test parsing of TSC data lines.""" line = "1 0 0 1.23450000e+00,0.00000000e+00 2.34560000e+00,1.00000000e+00" hkl, f0js = parse_tsc_data_line(line) - + assert hkl == (1, 0, 0) assert len(f0js) == 2 assert np.isclose(f0js[0], 1.2345 + 0.0j) @@ -105,7 +103,7 @@ def test_parse_tsc_data_line_negative_indices(): """Test parsing TSC data line with negative Miller indices.""" line = "-1 2 -3 1.00000000e+00,2.00000000e+00" hkl, f0js = parse_tsc_data_line(line) - + assert hkl == (-1, 2, -3) assert len(f0js) == 1 assert np.isclose(f0js[0], 1.0 + 2.0j) @@ -115,9 +113,9 @@ def test_read_tsc_file_tsc_extension(tmp_path, sample_tsc_content): """Test reading a .tsc file.""" tsc_path = tmp_path / "test.tsc" tsc_path.write_text(sample_tsc_content) - + result = read_tsc_file(tsc_path) - + assert isinstance(result, TSCFile) assert result.scatterers == ["C1", "C2", "O1"] assert (1, 0, 0) in result.data @@ -127,7 +125,7 @@ def test_read_tsc_file_tsc_extension(tmp_path, sample_tsc_content): def test_read_tsc_file_tscb_extension(sample_tscb_file): """Test reading a .tscb file.""" result = read_tsc_file(sample_tscb_file) - + assert isinstance(result, TSCBFile) assert result.scatterers == ["C1", "C2", "O1"] assert (1, 0, 0) in result.data @@ -138,7 +136,7 @@ def test_read_tsc_file_invalid_tsc(tmp_path): """Test reading invalid TSC file raises ValueError.""" invalid_tsc = tmp_path / "invalid.tsc" invalid_tsc.write_text("This is not a valid TSC file") - + with pytest.raises(ValueError, match="Cannot read AFF file"): read_tsc_file(invalid_tsc) @@ -147,7 +145,7 @@ def test_read_tsc_file_invalid_tscb(tmp_path): """Test reading invalid TSCB file raises ValueError.""" invalid_tscb = tmp_path / "invalid.tscb" invalid_tscb.write_bytes(b"This is not a valid TSCB file") - + with pytest.raises(ValueError, match="Cannot read AFF file"): read_tsc_file(invalid_tscb) @@ -156,7 +154,7 @@ def test_tsc_file_scatterers_property(): """Test TSCFile scatterers property.""" tsc = TSCFile() tsc.header["SCATTERERS"] = "C1 C2 O1 N1" - + assert tsc.scatterers == ["C1", "C2", "O1", "N1"] @@ -164,7 +162,7 @@ def test_tsc_file_scatterers_setter(): """Test TSCFile scatterers setter.""" tsc = TSCFile() tsc.scatterers = ["C1", "C2", "O1"] - + assert tsc.header["SCATTERERS"] == "C1 C2 O1" @@ -173,16 +171,13 @@ def test_tsc_file_getitem_single_atom(): tsc = TSCFile() tsc.scatterers = ["C1", "C2", "O1"] tsc.data = { - (1, 0, 0): np.array([1.0+0.0j, 2.0+1.0j, 3.0-1.0j]), - (0, 1, 0): np.array([4.0+0.5j, 5.0-0.5j, 6.0+0.25j]) + (1, 0, 0): np.array([1.0 + 0.0j, 2.0 + 1.0j, 3.0 - 1.0j]), + (0, 1, 0): np.array([4.0 + 0.5j, 5.0 - 0.5j, 6.0 + 0.25j]), } - + result = tsc["C1"] - expected = { - (1, 0, 0): 1.0+0.0j, - (0, 1, 0): 4.0+0.5j - } - + expected = {(1, 0, 0): 1.0 + 0.0j, (0, 1, 0): 4.0 + 0.5j} + assert len(result) == 2 for hkl, value in expected.items(): assert np.isclose(result[hkl], value) @@ -193,12 +188,12 @@ def test_tsc_file_getitem_multiple_atoms(): tsc = TSCFile() tsc.scatterers = ["C1", "C2", "O1"] tsc.data = { - (1, 0, 0): np.array([1.0+0.0j, 2.0+1.0j, 3.0-1.0j]), + (1, 0, 0): np.array([1.0 + 0.0j, 2.0 + 1.0j, 3.0 - 1.0j]), } - + result = tsc[["C1", "O1"]] - expected_values = np.array([1.0+0.0j, 3.0-1.0j]) - + expected_values = np.array([1.0 + 0.0j, 3.0 - 1.0j]) + assert len(result) == 1 assert np.allclose(result[(1, 0, 0)], expected_values) @@ -207,7 +202,7 @@ def test_tsc_file_getitem_unknown_atom(): """Test TSCFile indexing with unknown atom raises ValueError.""" tsc = TSCFile() tsc.scatterers = ["C1", "C2"] - + with pytest.raises(ValueError, match="Unknown atom label.*O1"): tsc["O1"] @@ -216,9 +211,9 @@ def test_tsc_file_from_file(tmp_path, sample_tsc_content): """Test TSCFile.from_file method.""" tsc_path = tmp_path / "test.tsc" tsc_path.write_text(sample_tsc_content) - + tsc = TSCFile.from_file(tsc_path) - + assert tsc.header["TITLE"] == " Test TSC File" assert tsc.scatterers == ["C1", "C2", "O1"] assert len(tsc.data) == 2 @@ -228,11 +223,11 @@ def test_tsc_file_to_file(tmp_path): """Test TSCFile.to_file method.""" tsc = TSCFile() tsc.header = {"TITLE": "Test", "SYMM": "expanded", "SCATTERERS": "C1 C2"} - tsc.data = {(1, 0, 0): np.array([1.0+0.0j, 2.0+1.0j])} - + tsc.data = {(1, 0, 0): np.array([1.0 + 0.0j, 2.0 + 1.0j])} + output_path = tmp_path / "output.tsc" tsc.to_file(output_path) - + # Verify file was written correctly content = output_path.read_text() assert "TITLE: Test" in content @@ -243,7 +238,7 @@ def test_tsc_file_to_file(tmp_path): def test_tscb_file_from_file(sample_tscb_file): """Test TSCBFile.from_file method.""" tscb = TSCBFile.from_file(sample_tscb_file) - + assert tscb.scatterers == ["C1", "C2", "O1"] assert len(tscb.data) == 2 assert (1, 0, 0) in tscb.data @@ -254,15 +249,15 @@ def test_tscb_file_to_file(tmp_path): """Test TSCBFile.to_file method.""" tscb = TSCBFile() tscb.header = {"TITLE": "Test", "SYMM": "expanded", "SCATTERERS": "C1 C2"} - tscb.data = {(1, 0, 0): np.array([1.0+0.0j, 2.0+1.0j], dtype=np.complex128)} - + tscb.data = {(1, 0, 0): np.array([1.0 + 0.0j, 2.0 + 1.0j], dtype=np.complex128)} + output_path = tmp_path / "output.tscb" tscb.to_file(output_path) - + # Verify file exists and has content assert output_path.exists() assert output_path.stat().st_size > 0 - + # Try to read it back tscb_read = TSCBFile.from_file(output_path) assert tscb_read.scatterers == ["C1", "C2"] @@ -273,19 +268,20 @@ def test_tscb_file_empty_header(tmp_path): """Test TSCBFile with empty additional header.""" tscb_path = tmp_path / "test_empty_header.tscb" scatterers_str = "C1" - + with open(tscb_path, "wb") as f: # Write zero header size f.write(struct.pack("2i", 0, len(scatterers_str))) f.write(scatterers_str.encode("ASCII")) f.write(struct.pack("i", 1)) # One reflection f.write(struct.pack("3i", 1, 0, 0)) - f.write(np.array([1.0+0.0j], dtype=np.complex128).tobytes()) - + f.write(np.array([1.0 + 0.0j], dtype=np.complex128).tobytes()) + tscb = TSCBFile.from_file(tscb_path) assert tscb.scatterers == ["C1"] assert len(tscb.data) == 1 + @pytest.fixture def sample_structure_cif_content(): """Sample structure CIF content for testing.""" @@ -356,8 +352,9 @@ def structure_cif_block(tmp_path, sample_structure_cif_content): """Create a structure CIF file and return the first block.""" cif_path = tmp_path / "structure.cif" cif_path.write_text(sample_structure_cif_content) - + from qcrboxtools.cif.read import read_cif_as_unified + return read_cif_as_unified(cif_path, 0) @@ -366,8 +363,9 @@ def tsc_cif_block(tmp_path, sample_tsc_cif_content): """Create a TSC CIF file and return the first block.""" cif_path = tmp_path / "tsc.cif" cif_path.write_text(sample_tsc_cif_content) - + from qcrboxtools.cif.read import read_cif_as_unified + return read_cif_as_unified(cif_path, 0) @@ -377,33 +375,33 @@ def test_tsc_to_cif_conversion(structure_cif_block): tsc = TSCFile() tsc.scatterers = ["C1", "C2", "O1"] tsc.data = { - (1, 0, 0): np.array([1.0+0.0j, 2.0+1.0j, 3.0-1.0j]), - (0, 1, 0): np.array([4.0+0.5j, 5.0-0.5j, 6.0+0.25j]) + (1, 0, 0): np.array([1.0 + 0.0j, 2.0 + 1.0j, 3.0 - 1.0j]), + (0, 1, 0): np.array([4.0 + 0.5j, 5.0 - 0.5j, 6.0 + 0.25j]), } - + # Convert to CIF cif_block = tsc.to_cif( structure_cif_block, partitioning_source="test_source", - partitioning_name="test_partitioning", - partitioning_software="test_software" + partitioning_name="test_partitioning", + partitioning_software="test_software", ) - + # Verify cell parameters are preserved assert cif_block["_cell.length_a"] == "10.000" assert cif_block["_cell.length_b"] == "12.000" assert cif_block["_cell.angle_beta"] == "95.0" - + # Verify partitioning metadata assert cif_block["_aspheric_ffs.source"] == "test_source" assert cif_block["_aspheric_ffs_partitioning.name"] == "test_partitioning" assert cif_block["_aspheric_ffs_partitioning.software"] == "test_software" - + # Verify moiety loop exists moiety_loop = cif_block.get_loop("_wfn_moiety") assert moiety_loop is not None assert len(moiety_loop["_wfn_moiety.atom_id"]) == 3 - + # Verify AFF loop exists aff_loop = cif_block.get_loop("_aspheric_ff") assert aff_loop is not None @@ -414,29 +412,29 @@ def test_tsc_populate_from_cif_block(tsc_cif_block): """Test populating TSC from CIF block.""" tsc = TSCFile() tsc.populate_from_cif_block(tsc_cif_block) - + # Verify scatterers assert tsc.scatterers == ["C1", "C2", "O1"] - + # Verify data was loaded correctly assert len(tsc.data) == 2 assert (1, 0, 0) in tsc.data assert (0, 1, 0) in tsc.data - + # Verify form factor values hkl_100_data = tsc.data[(1, 0, 0)] - assert np.isclose(hkl_100_data[0], 1.0+0.0j) - assert np.isclose(hkl_100_data[1], 2.0+1.0j) - assert np.isclose(hkl_100_data[2], 3.0-1.0j) + assert np.isclose(hkl_100_data[0], 1.0 + 0.0j) + assert np.isclose(hkl_100_data[1], 2.0 + 1.0j) + assert np.isclose(hkl_100_data[2], 3.0 - 1.0j) def test_tsc_from_cif_file(tmp_path, sample_tsc_cif_content): """Test creating TSC from CIF file.""" cif_path = tmp_path / "test.cif" cif_path.write_text(sample_tsc_cif_content) - + tsc = TSCFile.from_cif_file(cif_path) - + assert tsc.scatterers == ["C1", "C2", "O1"] assert len(tsc.data) == 2 assert (1, 0, 0) in tsc.data @@ -446,9 +444,9 @@ def test_tscb_from_cif_file(tmp_path, sample_tsc_cif_content): """Test creating TSCB from CIF file.""" cif_path = tmp_path / "test.cif" cif_path.write_text(sample_tsc_cif_content) - + tscb = TSCBFile.from_cif_file(cif_path) - + assert tscb.scatterers == ["C1", "C2", "O1"] assert len(tscb.data) == 2 assert (1, 0, 0) in tscb.data @@ -457,13 +455,13 @@ def test_tscb_from_cif_file(tmp_path, sample_tsc_cif_content): def test_populate_from_cif_block_missing_entries(): """Test error handling when CIF block is missing required entries.""" from iotbx.cif.model import block - + # Create incomplete block missing required entries incomplete_block = block() incomplete_block.add_data_item("_cell.length_a", "10.0") - + tsc = TSCFile() - + with pytest.raises(ValueError, match="CIF block does not contain required TSC entries"): tsc.populate_from_cif_block(incomplete_block) @@ -471,15 +469,15 @@ def test_populate_from_cif_block_missing_entries(): def test_populate_from_cif_block_missing_aff_loop(): """Test error handling when AFF loop is missing.""" from iotbx.cif.model import block - + # Create block with metadata but no AFF loop incomplete_block = block() incomplete_block.add_data_item("_aspheric_ffs.source", "test") incomplete_block.add_data_item("_aspheric_ffs_partitioning.name", "test") incomplete_block.add_data_item("_aspheric_ffs_partitioning.software", "test") - + tsc = TSCFile() - + with pytest.raises(KeyError): tsc.populate_from_cif_block(incomplete_block) @@ -487,31 +485,29 @@ def test_populate_from_cif_block_missing_aff_loop(): def test_populate_from_cif_mismatched_atom_count(): """Test error when AFF values don't match atom count.""" from iotbx.cif.model import block, loop - + # Create block with mismatched data test_block = block() test_block.add_data_item("_aspheric_ffs.source", "test") test_block.add_data_item("_aspheric_ffs_partitioning.name", "test") test_block.add_data_item("_aspheric_ffs_partitioning.software", "test") - + # Moiety loop with 2 atoms - moiety_data = { - "_wfn_moiety.asu_atom_site_label": ["C1", "C2"] - } + moiety_data = {"_wfn_moiety.asu_atom_site_label": ["C1", "C2"]} test_block.add_loop(loop(data=moiety_data)) - + # AFF loop with 3 values (mismatch) aff_data = { "_aspheric_ff.index_h": [1], - "_aspheric_ff.index_k": [0], + "_aspheric_ff.index_k": [0], "_aspheric_ff.index_l": [0], "_aspheric_ff.form_factor_real": ["[1.0 2.0 3.0]"], # 3 values - "_aspheric_ff.form_factor_imag": ["[0.0 1.0 -1.0]"] # 3 values + "_aspheric_ff.form_factor_imag": ["[0.0 1.0 -1.0]"], # 3 values } test_block.add_loop(loop(data=aff_data)) - + tsc = TSCFile() - + with pytest.raises(ValueError, match="Number of AFF values is not a multiple of number of scatterers"): tsc.populate_from_cif_block(test_block) @@ -520,12 +516,12 @@ def test_construct_aff_loop_formatting(): """Test that AFF loop formats form factors correctly.""" tsc = TSCFile() tsc.data = { - (1, 0, 0): np.array([1.23456789+0.0j, 2.345+1.0j]), - (-1, 2, -3): np.array([4.567-0.5j, 6.789+0.25j]) + (1, 0, 0): np.array([1.23456789 + 0.0j, 2.345 + 1.0j]), + (-1, 2, -3): np.array([4.567 - 0.5j, 6.789 + 0.25j]), } - + aff_loop = tsc._construct_aff_loop() - + # Verify structure assert len(aff_loop["_aspheric_ff.index_h"]) == 2 assert aff_loop["_aspheric_ff.index_h"][0] == "1" @@ -545,31 +541,24 @@ def test_round_trip_tsc_cif_conversion(tmp_path, structure_cif_block): original_tsc = TSCFile() original_tsc.scatterers = ["C1", "C2", "O1"] original_tsc.data = { - (1, 0, 0): np.array([1.0+0.0j, 2.0+1.0j, 3.0-1.0j]), - (0, 1, 0): np.array([4.0+0.5j, 5.0-0.5j, 6.0+0.25j]), - (-1, -2, 3): np.array([7.0+2.0j, 8.0-2.0j, 9.0+0.1j]) + (1, 0, 0): np.array([1.0 + 0.0j, 2.0 + 1.0j, 3.0 - 1.0j]), + (0, 1, 0): np.array([4.0 + 0.5j, 5.0 - 0.5j, 6.0 + 0.25j]), + (-1, -2, 3): np.array([7.0 + 2.0j, 8.0 - 2.0j, 9.0 + 0.1j]), } - + # Convert to CIF cif_block = original_tsc.to_cif( - structure_cif_block, - partitioning_source="test", - partitioning_name="test", - partitioning_software="test" + structure_cif_block, partitioning_source="test", partitioning_name="test", partitioning_software="test" ) - + # Convert back to TSC reconstructed_tsc = TSCFile() reconstructed_tsc.populate_from_cif_block(cif_block) - + # Verify data is preserved assert reconstructed_tsc.scatterers == original_tsc.scatterers assert len(reconstructed_tsc.data) == len(original_tsc.data) - + for hkl in original_tsc.data: assert hkl in reconstructed_tsc.data - np.testing.assert_allclose( - reconstructed_tsc.data[hkl], - original_tsc.data[hkl], - rtol=1e-6 - ) \ No newline at end of file + np.testing.assert_allclose(reconstructed_tsc.data[hkl], original_tsc.data[hkl], rtol=1e-6) From 7862c0b9bea1f41770cec6d5d6187b41bde284bc Mon Sep 17 00:00:00 2001 From: Niolon Date: Mon, 29 Sep 2025 12:06:32 +0100 Subject: [PATCH 5/6] More improvements --- qcrboxtools/cif/file_converter/tsc.py | 31 ++++++++++++++++++++++----- tests/cif/convert/test_tsc.py | 6 +++--- 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/qcrboxtools/cif/file_converter/tsc.py b/qcrboxtools/cif/file_converter/tsc.py index 2cd8fc2..5a857e2 100644 --- a/qcrboxtools/cif/file_converter/tsc.py +++ b/qcrboxtools/cif/file_converter/tsc.py @@ -38,7 +38,7 @@ def read_tsc_file(path: Path): try: return TSCFile.from_file(path) except Exception: - raise ValueError(f"Cannot read AFF file: {str(path)}") from exc + raise ValueError(f"Cannot read TSCB file: {str(path)}") from exc elif path.suffix == ".tsc": try: return TSCFile.from_file(path) @@ -46,10 +46,25 @@ def read_tsc_file(path: Path): try: return TSCBFile.from_file(path) except Exception: - raise ValueError(f"Cannot read AFF file: {str(path)}") from exc + raise ValueError(f"Cannot read TSC file: {str(path)}") from exc def parse_header(header_str): + """ + Parses the header section of a TSC file. + + Parameters + ---------- + header_str : str + The header section of the TSC file as a string. + + Returns + ------- + dict + A dictionary containing the parsed header information. + """ + if not header_str.strip(): + return {} header = {} header_split = iter(val.split(":") for val in header_str.strip().split("\n")) @@ -60,8 +75,10 @@ def parse_header(header_str): header[header_key] = header_entry if len(line_split) == 2: header_key, header_entry = line_split - else: + elif len(line_split) == 1 and header_key is not None: header_entry += "\n" + line_split[0] + else: + raise ValueError(f"Malformed header line: {':'.join(line_split)}") header[header_key] = header_entry return header @@ -80,8 +97,10 @@ def parse_tsc_data_line(line: str) -> Tuple[Tuple[int, int, int], np.ndarray]: tuple A tuple containing the indices h, k, l and the array of f0j values. """ + h_str, k_str, l_str, *f0j_strs = line.split() - f0js = np.array([float(val.split(",")[0]) + 1j * float(val.split(",")[1]) for val in f0j_strs]) + parts = (val.split(",") for val in f0j_strs) + f0js = np.array([float(real_val) + 1j * float(imag_val) for real_val, imag_val in parts]) return (int(h_str), int(k_str), int(l_str)), f0js @@ -308,7 +327,9 @@ def populate_from_cif_block(self, cif_block: block): if len(all_affs) % n_atoms != 0: raise ValueError("Number of AFF values is not a multiple of number of scatterers.") all_affs = all_affs.reshape((-1, n_atoms)) - self.data = {hkl: affs for hkl, affs in zip(hkl_tuples, all_affs, strict=False)} + if len(hkl_tuples) != len(all_affs): + raise ValueError("Number of Miller indices does not match number of AFF value sets.") + self.data = {hkl: affs for hkl, affs in zip(hkl_tuples, all_affs)} class TSCFile(TSCBase): diff --git a/tests/cif/convert/test_tsc.py b/tests/cif/convert/test_tsc.py index f404728..72ee345 100644 --- a/tests/cif/convert/test_tsc.py +++ b/tests/cif/convert/test_tsc.py @@ -85,7 +85,7 @@ def test_parse_header(sample_header_string): def test_parse_header_empty(): """Test header parsing with empty string.""" header = parse_header("") - assert header == {None: "\n"} + assert header == {} def test_parse_tsc_data_line(): @@ -137,7 +137,7 @@ def test_read_tsc_file_invalid_tsc(tmp_path): invalid_tsc = tmp_path / "invalid.tsc" invalid_tsc.write_text("This is not a valid TSC file") - with pytest.raises(ValueError, match="Cannot read AFF file"): + with pytest.raises(ValueError, match="Cannot read TSC file"): read_tsc_file(invalid_tsc) @@ -146,7 +146,7 @@ def test_read_tsc_file_invalid_tscb(tmp_path): invalid_tscb = tmp_path / "invalid.tscb" invalid_tscb.write_bytes(b"This is not a valid TSCB file") - with pytest.raises(ValueError, match="Cannot read AFF file"): + with pytest.raises(ValueError, match="Cannot read TSCB file"): read_tsc_file(invalid_tscb) From 6e3ae5f2b96cd76c98e2e1ed22b57acf0876a7b7 Mon Sep 17 00:00:00 2001 From: Niolon Date: Mon, 29 Sep 2025 12:12:19 +0100 Subject: [PATCH 6/6] Further small improvements to tsc handling module --- qcrboxtools/cif/file_converter/tsc.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/qcrboxtools/cif/file_converter/tsc.py b/qcrboxtools/cif/file_converter/tsc.py index 5a857e2..3e9d221 100644 --- a/qcrboxtools/cif/file_converter/tsc.py +++ b/qcrboxtools/cif/file_converter/tsc.py @@ -97,7 +97,7 @@ def parse_tsc_data_line(line: str) -> Tuple[Tuple[int, int, int], np.ndarray]: tuple A tuple containing the indices h, k, l and the array of f0j values. """ - + h_str, k_str, l_str, *f0j_strs = line.split() parts = (val.split(",") for val in f0j_strs) f0js = np.array([float(real_val) + 1j * float(imag_val) for real_val, imag_val in parts]) @@ -379,9 +379,9 @@ def from_file(cls, filename: Path) -> "TSCFile": new_obj.header.update(parse_header(header_str)) - parsed_iter = iter(parse_tsc_data_line(line) for line in data_str.strip().split("\n")) + parsed_data_lines = iter(parse_tsc_data_line(line) for line in data_str.strip().split("\n")) - new_obj.data = {hkl: f0js for hkl, f0js in parsed_iter} + new_obj.data = {hkl: f0js for hkl, f0js in parsed_data_lines} return new_obj @@ -399,14 +399,12 @@ def to_file(self, filename: Path) -> None: The name of the file to write. """ header_str = "\n".join(f"{key}: {value}" for key, value in self.header.items()) - data_iter = iter( - ( - f"{int(hkl[0])} {int(hkl[1])} {int(hkl[2])} " - + f"{' '.join(f'{np.real(val):.8e},{np.imag(val):.8e}' for val in values)}" - ) + formatted_data_lines = ( + f"{int(hkl[0])} {int(hkl[1])} {int(hkl[2])} " + + f"{' '.join(f'{np.real(val):.8e},{np.imag(val):.8e}' for val in values)}" for hkl, values in self.data.items() ) - data_str = "\n".join(data_iter) + data_str = "\n".join(formatted_data_lines) with open(filename, "w") as fobj: fobj.write(f"{header_str}\nDATA:\n{data_str}\n")