From 91df96872260ce49c4116f881e440d5bf8b36b7f Mon Sep 17 00:00:00 2001
From: Niolon <panikruth@gmail.com>
Date: Mon, 29 Sep 2025 10:49:43 +0100
Subject: [PATCH 1/6] Base untested implementation of TSC including CIF
 im-/export

---
 qcrboxtools/cif/file_converter/cartesian.py |  44 ++
 qcrboxtools/cif/file_converter/tsc.py       | 510 ++++++++++++++++++++
 qcrboxtools/cif/read.py                     |   2 +-
 3 files changed, 555 insertions(+), 1 deletion(-)
 create mode 100644 qcrboxtools/cif/file_converter/cartesian.py
 create mode 100644 qcrboxtools/cif/file_converter/tsc.py

diff --git a/qcrboxtools/cif/file_converter/cartesian.py b/qcrboxtools/cif/file_converter/cartesian.py
new file mode 100644
index 0000000..14c0386
--- /dev/null
+++ b/qcrboxtools/cif/file_converter/cartesian.py
@@ -0,0 +1,44 @@
+import numpy as np
+
+def cell_constants_to_matrix(a: float, b: float, c: float, alpha: float, beta: float, gamma: float) -> np.ndarray:
+    """
+    Convert cell constants to a 3x3 cell matrix.
+
+    Parameters
+    ----------
+    a : float
+        Cell length a in Angstrom.
+    b : float
+        Cell length b in Angstrom.
+    c : float
+        Cell length c in Angstrom.
+    alpha : float
+        Cell angle alpha in degrees.
+    beta : float
+        Cell angle beta in degrees.
+    gamma : float
+        Cell angle gamma in degrees.
+
+    Returns
+    -------
+    np.ndarray
+        The 3x3 cell matrix.
+    """
+    alpha_rad = np.radians(alpha)
+    beta_rad = np.radians(beta)
+    gamma_rad = np.radians(gamma)
+
+    cos_alpha = np.cos(alpha_rad)
+    cos_beta = np.cos(beta_rad)
+    cos_gamma = np.cos(gamma_rad)
+    sin_gamma = np.sin(gamma_rad)
+
+    matrix = np.zeros((3, 3))
+    matrix[0, 0] = a
+    matrix[0, 1] = b * cos_gamma
+    matrix[0, 2] = c * cos_beta
+    matrix[1, 1] = b * sin_gamma
+    matrix[1, 2] = c * (cos_alpha - cos_beta * cos_gamma) / sin_gamma
+    matrix[2, 2] = c * np.sqrt(1 - cos_beta**2 - ((cos_alpha - cos_beta * cos_gamma) / sin_gamma) ** 2)
+
+    return matrix
\ No newline at end of file
diff --git a/qcrboxtools/cif/file_converter/tsc.py b/qcrboxtools/cif/file_converter/tsc.py
new file mode 100644
index 0000000..4d09d52
--- /dev/null
+++ b/qcrboxtools/cif/file_converter/tsc.py
@@ -0,0 +1,510 @@
+import struct
+from abc import ABC, abstractmethod
+from collections.abc import Iterable
+from pathlib import Path
+from textwrap import wrap
+
+from typing import Dict, List, Tuple, Union
+
+import numpy as np
+from iotbx.cif.model import block, loop
+
+from .cartesian import cell_constants_to_matrix
+from ..read import read_cif_as_unified, cifdata_str_or_index
+
+def read_tsc_file(path: Path):
+    """
+    Reads a TSC or TSCB file and returns the corresponding object.
+    Parameters
+    ----------
+    path : Path
+        The path to the TSC or TSCB file.
+
+    Returns
+    -------
+    TSCFile or TSCBFile
+    The TSCFile or TSCBFile object representing the file content.
+
+    Raises
+    ------
+    ValueError
+    If the file cannot be read as either TSC or TSCB format.
+    """
+    path = Path(path)
+    if path.suffix == ".tscb":
+        try:
+            return TSCBFile.from_file(path)
+        except Exception as exc:
+            try:
+                return TSCFile.from_file(path)
+            except Exception:
+                raise ValueError(f"Cannot read AFF file: {str(path)}") from exc
+    elif path.suffix == ".tsc":
+        try:
+            return TSCFile.from_file(path)
+        except Exception as exc:
+            try:
+                return TSCBFile.from_file(path)
+            except Exception:
+                raise ValueError(f"Cannot read AFF file: {str(path)}") from exc
+
+
+def parse_header(header_str):
+    header = {}
+    header_split = iter(val.split(':') for val in header_str.strip().split('\n'))
+
+    header_key = None
+    for line_split in header_split:
+        if len(line_split) == 2 and header_key is not None:
+            header[header_key] = header_entry
+        if len(line_split) == 2:
+            header_key, header_entry = line_split
+        else:
+            header_entry += '\n' + line_split[0]
+    header[header_key] = header_entry
+    return header
+
+def parse_tsc_data_line(
+        line: str
+    ) -> Tuple[Tuple[int, int, int], np.ndarray]:
+    """
+    Parses a line of TSC data.
+
+    Parameters
+    ----------
+    line : str
+        The line of TSC data to parse.
+
+    Returns
+    -------
+    tuple
+        A tuple containing the indices h, k, l and the array of f0j values.
+    """
+    h_str, k_str, l_str, *f0j_strs =  line.split()
+    f0js = np.array([float(val.split(',')[0]) + 1j * float(val.split(',')[1]) for val in f0j_strs])
+    return (int(h_str), int(k_str), int(l_str)), f0js
+
+class TSCBase(ABC):
+    def __init__(self):
+        self.header = {
+            'TITLE': 'generic_tsc',
+            'SYMM': 'expanded',
+            'SCATTERERS' : ''
+        }
+        self.data = {}
+
+    @property
+    def scatterers(self) -> List[str]:
+        """
+        Retrieves scatterers from the TSC file as a list of strings generated
+        from the SCATTERERS header entry.
+
+        Returns
+        -------
+        list
+            A list of scatterer names.
+        """
+
+        return self.header['SCATTERERS'].strip().split()
+
+    @scatterers.setter
+    def scatterers(self, scatterers: Iterable):
+        """
+        Sets the scatterers in the TSC file.
+
+        The input scatterers are converted to a space-separated string and
+        stored in the header under the key 'SCATTERERS'.
+
+        Parameters
+        ----------
+        scatterers : iterable
+            An iterable of scatterer names.
+        """
+        self.header['SCATTERERS'] = ' '.join(str(val) for val in scatterers)
+
+    def __getitem__(
+        self,
+        atom_site_label: Union[str, Iterable]
+    ) -> Dict[Tuple[int, int, int], np.ndarray]:
+        """
+        Retrieves f0j values for a given atom site label.
+
+        The function allows indexing the TSCFile object by atom site label or a
+        list of labels. If the given label is not found among the scatterers,
+        a ValueError is raised.
+
+        Parameters
+        ----------
+        atom_site_label : str or iterable
+            The atom site label or a list of labels to retrieve f0j values for.
+
+        Returns
+        -------
+        dict
+            A dictionary where each key is a tuple of indices (h, k, l) and the
+            corresponding value is a numpy array of f0j values for the given
+            label(s).
+
+        Raises
+        ------
+        ValueError
+            If an unknown atom site label is used for indexing.
+        """
+        try:
+            if isinstance(atom_site_label, str):
+                index = self.scatterers.index(atom_site_label)
+                return {hkl: f0js[index] for hkl, f0js in self.data.items()}
+            elif isinstance(atom_site_label, Iterable):
+                indexes = np.array([self.scatterers.index(label) for label in atom_site_label])
+                return {hkl: f0js[indexes] for hkl, f0js in self.data.items()}
+            else:
+                index = self.scatterers.index(atom_site_label)
+                return {hkl: f0js[index] for hkl, f0js in self.data.items()}
+        except ValueError as exc:
+            if isinstance(atom_site_label, str):
+                unknown = [atom_site_label]
+            elif isinstance(atom_site_label, Iterable):
+                unknown = [label for label in atom_site_label if label not in self.scatterers]
+            else:
+                unknown = [atom_site_label]
+            raise ValueError(f'Unknown atom label(s) used for lookup from TSCFile: {" ".join(unknown)}') from exc
+
+    @classmethod
+    @abstractmethod
+    def from_file(cls, filename: Path):
+        pass
+
+    @abstractmethod
+    def to_file(self, filename: Path):
+        pass
+
+    def _construct_moiety_loop(self, structure_cif_block: block):
+        """
+        Constructs a CIF loop containing moiety information from a given CIF block.
+
+        Parameters
+        ----------
+        structure_cif_block : block
+            The CIF block containing the structure information.
+
+        Returns
+        -------
+        loop
+            A CIF loop containing the moiety information.
+        """
+        cell_a = float(structure_cif_block["_cell.length_a"])
+        cell_b = float(structure_cif_block["_cell.length_b"])
+        cell_c = float(structure_cif_block["_cell.length_c"])
+        alpha = float(structure_cif_block["_cell.angle_alpha"])
+        beta = float(structure_cif_block["_cell.angle_beta"])
+        gamma = float(structure_cif_block["_cell.angle_gamma"])
+        cell_mat_m = cell_constants_to_matrix(cell_a, cell_b, cell_c, alpha, beta, gamma)
+        fract_x = np.array([float(val) for val in structure_cif_block["_atom_site.fract_x"]])
+        fract_y = np.array([float(val) for val in structure_cif_block["_atom_site.fract_y"]])
+        fract_z = np.array([float(val) for val in structure_cif_block["_atom_site.fract_z"]])
+
+        xyz_fract = np.stack((fract_x, fract_y, fract_z), axis=-1)
+        xyz_cart = np.einsum("xy, zy -> zx", cell_mat_m, xyz_fract)
+        cart_x, cart_y, cart_z = xyz_cart.T
+
+        n_atoms = len(cart_x)
+
+        # TODO revisit this as more sophisticated moiety handling is implemented
+        moiety_loop_data = {
+            '_wfn_moiety.id': np.full(n_atoms, 1),
+            '_wfn_moiety.atom_id': np.arange(1, n_atoms + 1),
+            '_wfn_moiety.asu_atom_site_label': structure_cif_block['_atom_site.label'],
+            '_wfn_moiety.atom_type_symbol': structure_cif_block['_atom_site.type_symbol'],
+            '_wfn_moiety.symm_code': ['1_555'] * n_atoms,
+            '_wfn_moiety.cartn_x': list(cart_x),
+            '_wfn_moiety.cartn_y': list(cart_y),
+            '_wfn_moiety.cartn_z': list(cart_z),
+            '_wfn_moiety.aff_index': [self.scatterers.index(name) + 1 for name in structure_cif_block['_atom_site.label']]
+        }
+
+        return loop(data=moiety_loop_data)
+    
+    def _construct_aff_loop(self):
+        def create_aff_line_string(values):
+            converted = [f"{val: 3.8f}" for val in values]
+            single_line = " ".join(converted)
+            return "[" + "\n".join(wrap(single_line, width=2047)) + "]"
+        
+        mil_hkl = np.asarray(list(self.data.keys()))
+        aff_loop_data = {
+            '_aspheric_ff.index_h': mil_hkl[:, 0].copy(),
+            '_aspheric_ff.index_k': mil_hkl[:, 1].copy(),
+            '_aspheric_ff.index_l': mil_hkl[:, 2].copy(),
+            '_aspheric_ff.form_factor_real': list(create_aff_line_string(line) for line in np.real(all_affs)),
+            '_aspheric_ff.form_factor_imag': list(create_aff_line_string(line) for line in np.imag(all_affs)),
+        }
+        return loop(data=aff_loop_data)
+
+    def to_cif(
+        self,
+        structure_cif_block: block,
+        partitioning_source: str,
+        partitioning_name: str,
+        partitioning_software: str
+    ) -> block:
+        """
+        Converts the TSC data to a CIF block format.
+
+        Parameters
+        ----------
+        structure_cif_block : block
+            The CIF block containing the structure information.
+        partitioning_source : str
+            The source of the partitioning.
+        partitioning_name : str
+            The name of the partitioning scheme employed.
+        partitioning_software : str
+            The software used for the partitioning.
+        
+        Returns
+        -------
+        block
+            A CIF block containing the TSC data.
+        """
+        tsc_block = block()
+        tsc_block.add_data_item("_cell.length_a", structure_cif_block["_cell.length_a"])
+        tsc_block.add_data_item("_cell.length_b", structure_cif_block["_cell.length_b"])
+        tsc_block.add_data_item("_cell.length_c", structure_cif_block["_cell.length_c"])
+        tsc_block.add_data_item("_cell.angle_alpha", structure_cif_block["_cell.angle_alpha"])
+        tsc_block.add_data_item("_cell.angle_beta", structure_cif_block["_cell.angle_beta"])
+        tsc_block.add_data_item("_cell.angle_gamma", structure_cif_block["_cell.angle_gamma"])
+        tsc_block.add_loop(self._construct_moiety_loop(structure_cif_block))
+        tsc_block.add_data_item("_aspheric_ffs.source", partitioning_source)
+        tsc_block.add_data_item("_aspheric_ffs_partitioning.name", partitioning_name)
+        tsc_block.add_data_item("_aspheric_ffs_partitioning.software", partitioning_software)
+        tsc_block.add_loop(self._construct_aff_loop())
+
+        return tsc_block
+    
+    def populate_from_cif_block(self, cif_block: block):
+        """
+        Populates the TSCFile object from a CIF block created by the TSC to cif export function.
+        Parameters
+        ----------
+        cif_block : block
+            The CIF block containing the TSC data.
+        Raises
+        ------
+        ValueError
+            If the CIF block does not contain the required entries.        
+        """
+        if '_aspheric_ffs.source' not in cif_block or '_aspheric_ffs_partitioning.name' not in cif_block or '_aspheric_ffs_partitioning.software' not in cif_block:
+            raise ValueError("CIF block does not contain required TSC entries.")
+        self.scatterers = cif_block['_wfn_moiety.asu_atom_site_label']
+        aff_loop = cif_block.get_loop('_aspheric_ff.index_h')
+        if aff_loop is None:
+            raise ValueError("CIF block does not contain required TSC entries.")
+        hkl_tuples = tuple((int(h), int(k), int(l)) for h, k, l in zip(aff_loop['_aspheric_ff.index_h'], aff_loop['_aspheric_ff.index_k'], aff_loop['_aspheric_ff.index_l']))
+        real_lines = aff_loop['_aspheric_ff.form_factor_real']
+        imag_lines = aff_loop['_aspheric_ff.form_factor_imag']
+        real_vals = np.fromiter((float(val) for line in real_lines for val in line.strip('[]').split()), dtype=np.float64)
+        imag_vals = np.fromiter((float(val) for line in imag_lines for val in line.strip('[]').split()), dtype=np.float64)
+        all_affs = real_vals + 1j * imag_vals
+        n_atoms = len(self.scatterers)
+        if len(all_affs) % n_atoms != 0:
+            raise ValueError("Number of AFF values is not a multiple of number of scatterers.")
+        all_affs = all_affs.reshape((-1, n_atoms))
+        self.data = {hkl: affs for hkl, affs in zip(hkl_tuples, all_affs, strict=False)}
+        
+
+        
+
+
+
+class TSCFile(TSCBase):
+    """
+    A class representing a TSC file as defined in doi:10.48550/arXiv.1911.08847
+
+    A TSC file contains atomic form factors for a list of atoms and miller
+    indicees
+
+    You can get data for atoms for example with tsc['C1'] or tsc[['C1', 'C2']]
+    currently setting is not implemented this way. All data is represented
+    in the data attribute
+
+    Attributes
+    ----------
+    header : dict
+        A dictionary holding the header information from the TSC file.
+    data : dict
+        A dictionary mapping tuples (h, k, l) to numpy arrays of f0j values,
+        where the ordering of the values is given by the content of the
+        scatterers property / the SCATTERERS entry in the header.
+    """
+
+    @classmethod
+    def from_file(cls, filename: Path) -> "TSCFile":
+        """
+        Constructs a TSCFile object from a file.
+
+        The function reads the TSC file, parses its header and data sections,
+        and constructs a TSCFile instance with these data.
+
+        Parameters
+        ----------
+        filename : Path
+            The name of the TSC file to read.
+
+        Returns
+        -------
+        TSCFile
+            A TSCFile instance with data loaded from the file.
+        """
+        with open(filename, 'r') as fobj:
+            tsc_content = fobj.read()
+        header_str, data_str = tsc_content.split('DATA:\n')
+
+        new_obj = cls()
+
+        new_obj.header.update(parse_header(header_str))
+
+        parsed_iter = iter(parse_tsc_data_line(line) for line in data_str.strip().split('\n'))
+
+        new_obj.data = {hkl: f0js for hkl, f0js in parsed_iter}
+
+        return new_obj
+
+    def to_file(self, filename: Path) -> None:
+        """
+        Writes the TSCFile object to a file.
+
+        The function formats the header and data sections of the TSCFile object
+        and writes them to a file. Currently no safety checks are implemented
+        SCATTERERS and data need to match
+
+        Parameters
+        ----------
+        filename : Path
+            The name of the file to write.
+        """
+        header_str = '\n'.join(f'{key}: {value}' for key, value in self.header.items())
+        data_iter = iter(f"{int(hkl[0])} {int(hkl[1])} {int(hkl[2])} {' '.join(f'{np.real(val):.8e},{np.imag(val):.8e}' for val in values)}" for hkl, values in self.data.items())
+        data_str = '\n'.join(data_iter)
+
+        with open(filename, 'w') as fobj:
+            fobj.write(f'{header_str}\nDATA:\n{data_str}\n')
+
+    @classmethod
+    def from_cif_file(cls, cif_path: Path) -> "TSCFile":
+        """
+        Constructs a TSCFile object from a CIF file created by the TSC to cif export function.
+
+        Parameters
+        ----------
+        filename : Path
+            The name of the CIF file to read.
+
+        Returns
+        -------
+        TSCFile
+            A TSCFile instance with data loaded from the CIF file.
+        """
+        cif_block = read_cif_as_unified(cif_path, 0)
+        new_obj = cls()
+        new_obj.populate_from_cif_block(cif_block)
+        return new_obj
+
+ 
+class TSCBFile(TSCBase):
+    """
+    A class representing a TSCB file used by for example NoSpherA2
+
+    A TSC file contains atomic form factors for a list of atoms and miller
+    indicees
+
+    You can get data for atoms for example with tsc['C1'] or tsc[['C1', 'C2']]
+    currently setting is not implemented this way. All data is represented
+    in the data attribute
+
+    Attributes
+    ----------
+    header : dict
+        A dictionary holding the header information from the TSC file.
+    data : dict
+        A dictionary mapping tuples (h, k, l) to numpy arrays of f0j values,
+        where the ordering of the values is given by the content of the
+        scatterers property / the SCATTERERS entry in the header.
+    """
+
+    @classmethod
+    def from_file(cls, filename: Path) -> "TSCBFile":
+        """
+        Constructs a TSCFile object from a file.
+
+        The function reads the TSC file, parses its header and data sections,
+        and constructs a TSCFile instance with these data.
+
+        Parameters
+        ----------
+        filename : Path
+            The name of the TSC file to read.
+
+        Returns
+        -------
+        TSCFile
+            A TSCBFile instance with data loaded from the file.
+        """
+        new_obj = cls()
+        with open(filename, 'rb') as fobj:
+            additional_header_size, n_bytes_labels = struct.unpack('2i', fobj.read(8))
+            if additional_header_size > 0:
+                header_str = fobj.read(additional_header_size).decode('ASCII')
+
+                new_obj.header.update(parse_header(header_str))
+            new_obj.header['SCATTERERS'] = fobj.read(n_bytes_labels).decode('ASCII')
+
+            n_refln = struct.unpack('i', fobj.read(4))[0]
+            n_atoms = len(new_obj.header['SCATTERERS'].split())
+            new_obj.data = {
+                tuple(np.frombuffer(fobj.read(12), dtype=np.int32)): np.frombuffer(fobj.read(n_atoms*16), dtype=np.complex128) for i in range(n_refln)
+            }
+        return new_obj
+
+    def to_file(self, filename: Path) -> None:
+        """
+        Writes the TSCBFile object to a file.
+
+        The function formats the header and data sections of the TSCBFile object
+        and writes them to a file. Currently no safety checks are implemented
+        SCATTERERS and data need to match
+
+        Parameters
+        ----------
+        filename : str
+            The name of the file to write.
+        """
+        if not next(iter(self.data.values())).dtype == np.complex128:
+            self.data = {key: value.astype(np.complex128) for key, value in self.data.items()}
+        omitted_header_entries = ('SCATTERERS', 'TITLE', 'SYMM')
+        header_string = '\n'.join(f'{name}: {entry}' for name, entry in self.header.items() if name not in omitted_header_entries)
+        with open(filename, 'wb') as fobj:
+            fobj.write(struct.pack('2i', len(header_string), len(self.header['SCATTERERS'])))
+            fobj.write(header_string.encode('ASCII'))
+            fobj.write(self.header['SCATTERERS'].encode('ASCII'))
+            fobj.write(struct.pack('i', len(self.data)))
+            fobj.write(bytes().join(struct.pack('3i', *hkl) + f0js.tobytes() for hkl, f0js in self.data.items()))
+
+    @classmethod
+    def from_cif_file(cls, cif_path: Path) -> "TSCBFile":
+        """
+        Constructs a TSCFile object from a CIF file created by the TSC to cif export function.
+
+        Parameters
+        ----------
+        filename : Path
+            The name of the CIF file to read.
+
+        Returns
+        -------
+        TSCBFile
+            A TSCBFile instance with data loaded from the CIF file.
+        """
+        cif_block = read_cif_as_unified(cif_path, 0)
+        new_obj = cls()
+        new_obj.populate_from_cif_block(cif_block)
+        return new_obj
\ No newline at end of file
diff --git a/qcrboxtools/cif/read.py b/qcrboxtools/cif/read.py
index 47ebf3f..a668205 100644
--- a/qcrboxtools/cif/read.py
+++ b/qcrboxtools/cif/read.py
@@ -64,7 +64,7 @@ def cifdata_str_or_index(model: cif.model.cif, dataset: Union[int, str]) -> Tupl
 
 def read_cif_as_unified(
     cif_path: Union[str, Path],
-    dataset: Optional[str] = None,
+    dataset: Optional[Union[str, int]] = None,
     convert_keywords: bool = True,
     custom_categories: Optional[List[str]] = None,
     split_sus: bool = True,

From 9ef53c3ef98d575ae07ec55964140362bd103aa2 Mon Sep 17 00:00:00 2001
From: Niolon <panikruth@gmail.com>
Date: Mon, 29 Sep 2025 10:55:25 +0100
Subject: [PATCH 2/6] improve formatting, fix some bugs

---
 qcrboxtools/cif/file_converter/cartesian.py |   3 +-
 qcrboxtools/cif/file_converter/tsc.py       | 177 +++++++++++---------
 2 files changed, 96 insertions(+), 84 deletions(-)

diff --git a/qcrboxtools/cif/file_converter/cartesian.py b/qcrboxtools/cif/file_converter/cartesian.py
index 14c0386..8790b6e 100644
--- a/qcrboxtools/cif/file_converter/cartesian.py
+++ b/qcrboxtools/cif/file_converter/cartesian.py
@@ -1,5 +1,6 @@
 import numpy as np
 
+
 def cell_constants_to_matrix(a: float, b: float, c: float, alpha: float, beta: float, gamma: float) -> np.ndarray:
     """
     Convert cell constants to a 3x3 cell matrix.
@@ -41,4 +42,4 @@ def cell_constants_to_matrix(a: float, b: float, c: float, alpha: float, beta: f
     matrix[1, 2] = c * (cos_alpha - cos_beta * cos_gamma) / sin_gamma
     matrix[2, 2] = c * np.sqrt(1 - cos_beta**2 - ((cos_alpha - cos_beta * cos_gamma) / sin_gamma) ** 2)
 
-    return matrix
\ No newline at end of file
+    return matrix
diff --git a/qcrboxtools/cif/file_converter/tsc.py b/qcrboxtools/cif/file_converter/tsc.py
index 4d09d52..fee98cd 100644
--- a/qcrboxtools/cif/file_converter/tsc.py
+++ b/qcrboxtools/cif/file_converter/tsc.py
@@ -3,14 +3,14 @@
 from collections.abc import Iterable
 from pathlib import Path
 from textwrap import wrap
-
 from typing import Dict, List, Tuple, Union
 
 import numpy as np
 from iotbx.cif.model import block, loop
 
+from ..read import read_cif_as_unified
 from .cartesian import cell_constants_to_matrix
-from ..read import read_cif_as_unified, cifdata_str_or_index
+
 
 def read_tsc_file(path: Path):
     """
@@ -51,22 +51,22 @@ def read_tsc_file(path: Path):
 
 def parse_header(header_str):
     header = {}
-    header_split = iter(val.split(':') for val in header_str.strip().split('\n'))
+    header_split = iter(val.split(":") for val in header_str.strip().split("\n"))
 
     header_key = None
+    header_entry = ""
     for line_split in header_split:
         if len(line_split) == 2 and header_key is not None:
             header[header_key] = header_entry
         if len(line_split) == 2:
             header_key, header_entry = line_split
         else:
-            header_entry += '\n' + line_split[0]
+            header_entry += "\n" + line_split[0]
     header[header_key] = header_entry
     return header
 
-def parse_tsc_data_line(
-        line: str
-    ) -> Tuple[Tuple[int, int, int], np.ndarray]:
+
+def parse_tsc_data_line(line: str) -> Tuple[Tuple[int, int, int], np.ndarray]:
     """
     Parses a line of TSC data.
 
@@ -80,17 +80,14 @@ def parse_tsc_data_line(
     tuple
         A tuple containing the indices h, k, l and the array of f0j values.
     """
-    h_str, k_str, l_str, *f0j_strs =  line.split()
-    f0js = np.array([float(val.split(',')[0]) + 1j * float(val.split(',')[1]) for val in f0j_strs])
+    h_str, k_str, l_str, *f0j_strs = line.split()
+    f0js = np.array([float(val.split(",")[0]) + 1j * float(val.split(",")[1]) for val in f0j_strs])
     return (int(h_str), int(k_str), int(l_str)), f0js
 
+
 class TSCBase(ABC):
     def __init__(self):
-        self.header = {
-            'TITLE': 'generic_tsc',
-            'SYMM': 'expanded',
-            'SCATTERERS' : ''
-        }
+        self.header = {"TITLE": "generic_tsc", "SYMM": "expanded", "SCATTERERS": ""}
         self.data = {}
 
     @property
@@ -105,7 +102,7 @@ def scatterers(self) -> List[str]:
             A list of scatterer names.
         """
 
-        return self.header['SCATTERERS'].strip().split()
+        return self.header["SCATTERERS"].strip().split()
 
     @scatterers.setter
     def scatterers(self, scatterers: Iterable):
@@ -120,12 +117,9 @@ def scatterers(self, scatterers: Iterable):
         scatterers : iterable
             An iterable of scatterer names.
         """
-        self.header['SCATTERERS'] = ' '.join(str(val) for val in scatterers)
+        self.header["SCATTERERS"] = " ".join(str(val) for val in scatterers)
 
-    def __getitem__(
-        self,
-        atom_site_label: Union[str, Iterable]
-    ) -> Dict[Tuple[int, int, int], np.ndarray]:
+    def __getitem__(self, atom_site_label: Union[str, Iterable]) -> Dict[Tuple[int, int, int], np.ndarray]:
         """
         Retrieves f0j values for a given atom site label.
 
@@ -167,7 +161,7 @@ def __getitem__(
                 unknown = [label for label in atom_site_label if label not in self.scatterers]
             else:
                 unknown = [atom_site_label]
-            raise ValueError(f'Unknown atom label(s) used for lookup from TSCFile: {" ".join(unknown)}') from exc
+            raise ValueError(f"Unknown atom label(s) used for lookup from TSCFile: {' '.join(unknown)}") from exc
 
     @classmethod
     @abstractmethod
@@ -211,41 +205,40 @@ def _construct_moiety_loop(self, structure_cif_block: block):
 
         # TODO revisit this as more sophisticated moiety handling is implemented
         moiety_loop_data = {
-            '_wfn_moiety.id': np.full(n_atoms, 1),
-            '_wfn_moiety.atom_id': np.arange(1, n_atoms + 1),
-            '_wfn_moiety.asu_atom_site_label': structure_cif_block['_atom_site.label'],
-            '_wfn_moiety.atom_type_symbol': structure_cif_block['_atom_site.type_symbol'],
-            '_wfn_moiety.symm_code': ['1_555'] * n_atoms,
-            '_wfn_moiety.cartn_x': list(cart_x),
-            '_wfn_moiety.cartn_y': list(cart_y),
-            '_wfn_moiety.cartn_z': list(cart_z),
-            '_wfn_moiety.aff_index': [self.scatterers.index(name) + 1 for name in structure_cif_block['_atom_site.label']]
+            "_wfn_moiety.id": np.full(n_atoms, 1),
+            "_wfn_moiety.atom_id": np.arange(1, n_atoms + 1),
+            "_wfn_moiety.asu_atom_site_label": structure_cif_block["_atom_site.label"],
+            "_wfn_moiety.atom_type_symbol": structure_cif_block["_atom_site.type_symbol"],
+            "_wfn_moiety.symm_code": ["1_555"] * n_atoms,
+            "_wfn_moiety.cartn_x": list(cart_x),
+            "_wfn_moiety.cartn_y": list(cart_y),
+            "_wfn_moiety.cartn_z": list(cart_z),
+            "_wfn_moiety.aff_index": [
+                self.scatterers.index(name) + 1 for name in structure_cif_block["_atom_site.label"]
+            ],
         }
 
         return loop(data=moiety_loop_data)
-    
+
     def _construct_aff_loop(self):
         def create_aff_line_string(values):
             converted = [f"{val: 3.8f}" for val in values]
             single_line = " ".join(converted)
             return "[" + "\n".join(wrap(single_line, width=2047)) + "]"
-        
+
         mil_hkl = np.asarray(list(self.data.keys()))
+        all_affs = np.array(list(self.data.values()))
         aff_loop_data = {
-            '_aspheric_ff.index_h': mil_hkl[:, 0].copy(),
-            '_aspheric_ff.index_k': mil_hkl[:, 1].copy(),
-            '_aspheric_ff.index_l': mil_hkl[:, 2].copy(),
-            '_aspheric_ff.form_factor_real': list(create_aff_line_string(line) for line in np.real(all_affs)),
-            '_aspheric_ff.form_factor_imag': list(create_aff_line_string(line) for line in np.imag(all_affs)),
+            "_aspheric_ff.index_h": mil_hkl[:, 0].copy(),
+            "_aspheric_ff.index_k": mil_hkl[:, 1].copy(),
+            "_aspheric_ff.index_l": mil_hkl[:, 2].copy(),
+            "_aspheric_ff.form_factor_real": list(create_aff_line_string(line) for line in np.real(all_affs)),
+            "_aspheric_ff.form_factor_imag": list(create_aff_line_string(line) for line in np.imag(all_affs)),
         }
         return loop(data=aff_loop_data)
 
     def to_cif(
-        self,
-        structure_cif_block: block,
-        partitioning_source: str,
-        partitioning_name: str,
-        partitioning_software: str
+        self, structure_cif_block: block, partitioning_source: str, partitioning_name: str, partitioning_software: str
     ) -> block:
         """
         Converts the TSC data to a CIF block format.
@@ -260,7 +253,7 @@ def to_cif(
             The name of the partitioning scheme employed.
         partitioning_software : str
             The software used for the partitioning.
-        
+
         Returns
         -------
         block
@@ -280,7 +273,7 @@ def to_cif(
         tsc_block.add_loop(self._construct_aff_loop())
 
         return tsc_block
-    
+
     def populate_from_cif_block(self, cif_block: block):
         """
         Populates the TSCFile object from a CIF block created by the TSC to cif export function.
@@ -291,29 +284,36 @@ def populate_from_cif_block(self, cif_block: block):
         Raises
         ------
         ValueError
-            If the CIF block does not contain the required entries.        
+            If the CIF block does not contain the required entries.
         """
-        if '_aspheric_ffs.source' not in cif_block or '_aspheric_ffs_partitioning.name' not in cif_block or '_aspheric_ffs_partitioning.software' not in cif_block:
+        if (
+            "_aspheric_ffs.source" not in cif_block
+            or "_aspheric_ffs_partitioning.name" not in cif_block
+            or "_aspheric_ffs_partitioning.software" not in cif_block
+        ):
             raise ValueError("CIF block does not contain required TSC entries.")
-        self.scatterers = cif_block['_wfn_moiety.asu_atom_site_label']
-        aff_loop = cif_block.get_loop('_aspheric_ff.index_h')
+        self.scatterers = cif_block["_wfn_moiety.asu_atom_site_label"]
+        aff_loop = cif_block.get_loop("_aspheric_ff.index_h")
         if aff_loop is None:
             raise ValueError("CIF block does not contain required TSC entries.")
-        hkl_tuples = tuple((int(h), int(k), int(l)) for h, k, l in zip(aff_loop['_aspheric_ff.index_h'], aff_loop['_aspheric_ff.index_k'], aff_loop['_aspheric_ff.index_l']))
-        real_lines = aff_loop['_aspheric_ff.form_factor_real']
-        imag_lines = aff_loop['_aspheric_ff.form_factor_imag']
-        real_vals = np.fromiter((float(val) for line in real_lines for val in line.strip('[]').split()), dtype=np.float64)
-        imag_vals = np.fromiter((float(val) for line in imag_lines for val in line.strip('[]').split()), dtype=np.float64)
+        hkl_zip = zip(
+            aff_loop["_aspheric_ff.index_h"], aff_loop["_aspheric_ff.index_k"], aff_loop["_aspheric_ff.index_l"]
+        )
+        hkl_tuples = tuple((int(mil_h), int(mil_k), int(mil_l)) for mil_h, mil_k, mil_l in hkl_zip)
+        real_lines = aff_loop["_aspheric_ff.form_factor_real"]
+        imag_lines = aff_loop["_aspheric_ff.form_factor_imag"]
+        real_vals = np.fromiter(
+            (float(val) for line in real_lines for val in line.strip("[]").split()), dtype=np.float64
+        )
+        imag_vals = np.fromiter(
+            (float(val) for line in imag_lines for val in line.strip("[]").split()), dtype=np.float64
+        )
         all_affs = real_vals + 1j * imag_vals
         n_atoms = len(self.scatterers)
         if len(all_affs) % n_atoms != 0:
             raise ValueError("Number of AFF values is not a multiple of number of scatterers.")
         all_affs = all_affs.reshape((-1, n_atoms))
         self.data = {hkl: affs for hkl, affs in zip(hkl_tuples, all_affs, strict=False)}
-        
-
-        
-
 
 
 class TSCFile(TSCBase):
@@ -355,15 +355,15 @@ def from_file(cls, filename: Path) -> "TSCFile":
         TSCFile
             A TSCFile instance with data loaded from the file.
         """
-        with open(filename, 'r') as fobj:
+        with open(filename, "r") as fobj:
             tsc_content = fobj.read()
-        header_str, data_str = tsc_content.split('DATA:\n')
+        header_str, data_str = tsc_content.split("DATA:\n")
 
         new_obj = cls()
 
         new_obj.header.update(parse_header(header_str))
 
-        parsed_iter = iter(parse_tsc_data_line(line) for line in data_str.strip().split('\n'))
+        parsed_iter = iter(parse_tsc_data_line(line) for line in data_str.strip().split("\n"))
 
         new_obj.data = {hkl: f0js for hkl, f0js in parsed_iter}
 
@@ -382,12 +382,18 @@ def to_file(self, filename: Path) -> None:
         filename : Path
             The name of the file to write.
         """
-        header_str = '\n'.join(f'{key}: {value}' for key, value in self.header.items())
-        data_iter = iter(f"{int(hkl[0])} {int(hkl[1])} {int(hkl[2])} {' '.join(f'{np.real(val):.8e},{np.imag(val):.8e}' for val in values)}" for hkl, values in self.data.items())
-        data_str = '\n'.join(data_iter)
-
-        with open(filename, 'w') as fobj:
-            fobj.write(f'{header_str}\nDATA:\n{data_str}\n')
+        header_str = "\n".join(f"{key}: {value}" for key, value in self.header.items())
+        data_iter = iter(
+            (
+                f"{int(hkl[0])} {int(hkl[1])} {int(hkl[2])} "
+                + f"{' '.join(f'{np.real(val):.8e},{np.imag(val):.8e}' for val in values)}"
+            )
+            for hkl, values in self.data.items()
+        )
+        data_str = "\n".join(data_iter)
+
+        with open(filename, "w") as fobj:
+            fobj.write(f"{header_str}\nDATA:\n{data_str}\n")
 
     @classmethod
     def from_cif_file(cls, cif_path: Path) -> "TSCFile":
@@ -409,7 +415,7 @@ def from_cif_file(cls, cif_path: Path) -> "TSCFile":
         new_obj.populate_from_cif_block(cif_block)
         return new_obj
 
- 
+
 class TSCBFile(TSCBase):
     """
     A class representing a TSCB file used by for example NoSpherA2
@@ -450,18 +456,21 @@ def from_file(cls, filename: Path) -> "TSCBFile":
             A TSCBFile instance with data loaded from the file.
         """
         new_obj = cls()
-        with open(filename, 'rb') as fobj:
-            additional_header_size, n_bytes_labels = struct.unpack('2i', fobj.read(8))
+        with open(filename, "rb") as fobj:
+            additional_header_size, n_bytes_labels = struct.unpack("2i", fobj.read(8))
             if additional_header_size > 0:
-                header_str = fobj.read(additional_header_size).decode('ASCII')
+                header_str = fobj.read(additional_header_size).decode("ASCII")
 
                 new_obj.header.update(parse_header(header_str))
-            new_obj.header['SCATTERERS'] = fobj.read(n_bytes_labels).decode('ASCII')
+            new_obj.header["SCATTERERS"] = fobj.read(n_bytes_labels).decode("ASCII")
 
-            n_refln = struct.unpack('i', fobj.read(4))[0]
-            n_atoms = len(new_obj.header['SCATTERERS'].split())
+            n_refln = struct.unpack("i", fobj.read(4))[0]
+            n_atoms = len(new_obj.header["SCATTERERS"].split())
             new_obj.data = {
-                tuple(np.frombuffer(fobj.read(12), dtype=np.int32)): np.frombuffer(fobj.read(n_atoms*16), dtype=np.complex128) for i in range(n_refln)
+                tuple(np.frombuffer(fobj.read(12), dtype=np.int32)): np.frombuffer(
+                    fobj.read(n_atoms * 16), dtype=np.complex128
+                )
+                for i in range(n_refln)
             }
         return new_obj
 
@@ -480,14 +489,16 @@ def to_file(self, filename: Path) -> None:
         """
         if not next(iter(self.data.values())).dtype == np.complex128:
             self.data = {key: value.astype(np.complex128) for key, value in self.data.items()}
-        omitted_header_entries = ('SCATTERERS', 'TITLE', 'SYMM')
-        header_string = '\n'.join(f'{name}: {entry}' for name, entry in self.header.items() if name not in omitted_header_entries)
-        with open(filename, 'wb') as fobj:
-            fobj.write(struct.pack('2i', len(header_string), len(self.header['SCATTERERS'])))
-            fobj.write(header_string.encode('ASCII'))
-            fobj.write(self.header['SCATTERERS'].encode('ASCII'))
-            fobj.write(struct.pack('i', len(self.data)))
-            fobj.write(bytes().join(struct.pack('3i', *hkl) + f0js.tobytes() for hkl, f0js in self.data.items()))
+        omitted_header_entries = ("SCATTERERS", "TITLE", "SYMM")
+        header_string = "\n".join(
+            f"{name}: {entry}" for name, entry in self.header.items() if name not in omitted_header_entries
+        )
+        with open(filename, "wb") as fobj:
+            fobj.write(struct.pack("2i", len(header_string), len(self.header["SCATTERERS"])))
+            fobj.write(header_string.encode("ASCII"))
+            fobj.write(self.header["SCATTERERS"].encode("ASCII"))
+            fobj.write(struct.pack("i", len(self.data)))
+            fobj.write(bytes().join(struct.pack("3i", *hkl) + f0js.tobytes() for hkl, f0js in self.data.items()))
 
     @classmethod
     def from_cif_file(cls, cif_path: Path) -> "TSCBFile":
@@ -507,4 +518,4 @@ def from_cif_file(cls, cif_path: Path) -> "TSCBFile":
         cif_block = read_cif_as_unified(cif_path, 0)
         new_obj = cls()
         new_obj.populate_from_cif_block(cif_block)
-        return new_obj
\ No newline at end of file
+        return new_obj

From 167dbe9982fdc280a39180e6d70258c08f363489 Mon Sep 17 00:00:00 2001
From: Niolon <panikruth@gmail.com>
Date: Mon, 29 Sep 2025 11:48:12 +0100
Subject: [PATCH 3/6] Add tests for and improve the tsc handling

---
 qcrboxtools/cif/file_converter/tsc.py |  15 +-
 tests/cif/convert/test_tsc.py         | 575 ++++++++++++++++++++++++++
 2 files changed, 580 insertions(+), 10 deletions(-)
 create mode 100644 tests/cif/convert/test_tsc.py

diff --git a/qcrboxtools/cif/file_converter/tsc.py b/qcrboxtools/cif/file_converter/tsc.py
index fee98cd..553b6e1 100644
--- a/qcrboxtools/cif/file_converter/tsc.py
+++ b/qcrboxtools/cif/file_converter/tsc.py
@@ -145,19 +145,14 @@ def __getitem__(self, atom_site_label: Union[str, Iterable]) -> Dict[Tuple[int,
             If an unknown atom site label is used for indexing.
         """
         try:
-            if isinstance(atom_site_label, str):
-                index = self.scatterers.index(atom_site_label)
-                return {hkl: f0js[index] for hkl, f0js in self.data.items()}
-            elif isinstance(atom_site_label, Iterable):
+            if isinstance(atom_site_label, Iterable) and not isinstance(atom_site_label, str):
                 indexes = np.array([self.scatterers.index(label) for label in atom_site_label])
                 return {hkl: f0js[indexes] for hkl, f0js in self.data.items()}
             else:
                 index = self.scatterers.index(atom_site_label)
                 return {hkl: f0js[index] for hkl, f0js in self.data.items()}
         except ValueError as exc:
-            if isinstance(atom_site_label, str):
-                unknown = [atom_site_label]
-            elif isinstance(atom_site_label, Iterable):
+            if isinstance(atom_site_label, Iterable) and not isinstance(atom_site_label, str):
                 unknown = [label for label in atom_site_label if label not in self.scatterers]
             else:
                 unknown = [atom_site_label]
@@ -293,9 +288,9 @@ def populate_from_cif_block(self, cif_block: block):
         ):
             raise ValueError("CIF block does not contain required TSC entries.")
         self.scatterers = cif_block["_wfn_moiety.asu_atom_site_label"]
-        aff_loop = cif_block.get_loop("_aspheric_ff.index_h")
+        aff_loop = cif_block.get_loop("_aspheric_ff")
         if aff_loop is None:
-            raise ValueError("CIF block does not contain required TSC entries.")
+            raise ValueError("CIF block does not contain required TSC entries for the loop _aspheric_ff.")
         hkl_zip = zip(
             aff_loop["_aspheric_ff.index_h"], aff_loop["_aspheric_ff.index_k"], aff_loop["_aspheric_ff.index_l"]
         )
@@ -517,5 +512,5 @@ def from_cif_file(cls, cif_path: Path) -> "TSCBFile":
         """
         cif_block = read_cif_as_unified(cif_path, 0)
         new_obj = cls()
-        new_obj.populate_from_cif_block(cif_block)
+        new_obj.populate_from_cif_block(cif_block) 
         return new_obj
diff --git a/tests/cif/convert/test_tsc.py b/tests/cif/convert/test_tsc.py
new file mode 100644
index 0000000..be16321
--- /dev/null
+++ b/tests/cif/convert/test_tsc.py
@@ -0,0 +1,575 @@
+"""
+Test module for `qcrboxtools.cif.file_converter.tsc`.
+
+This module contains unit tests for TSC and TSCB file reading, parsing,
+and conversion functionality.
+"""
+
+import struct
+from pathlib import Path
+from unittest.mock import patch
+
+import numpy as np
+import pytest
+
+from qcrboxtools.cif.file_converter.tsc import (
+    TSCBFile,
+    TSCFile,
+    parse_header,
+    parse_tsc_data_line,
+    read_tsc_file,
+)
+
+
+@pytest.fixture
+def sample_tsc_content():
+    """Sample TSC file content for testing."""
+    return """TITLE: Test TSC File
+SYMM: expanded
+SCATTERERS: C1 C2 O1
+DATA:
+1 0 0 1.23450000e+00,0.00000000e+00 2.34560000e+00,1.00000000e+00 3.45670000e+00,-1.00000000e+00
+0 1 0 4.56780000e+00,0.50000000e+00 5.67890000e+00,-0.50000000e+00 6.78900000e+00,0.25000000e+00
+"""
+
+
+@pytest.fixture
+def sample_header_string():
+    """Sample header string for testing."""
+    return """TITLE: Test Header
+SYMM: expanded
+SCATTERERS: C1 C2 O1
+MULTI_LINE: This is a
+multi-line entry
+that spans several lines"""
+
+
+@pytest.fixture
+def sample_tscb_file(tmp_path):
+    """Create a sample TSCB file for testing."""
+    tscb_path = tmp_path / "test.tscb"
+    
+    # Create minimal TSCB file content
+    header_str = "TITLE: Test TSCB\nSYMM: expanded"
+    scatterers_str = "C1 C2 O1"
+    
+    with open(tscb_path, "wb") as f:
+        # Write header size and scatterers size
+        f.write(struct.pack("2i", len(header_str), len(scatterers_str)))
+        # Write header and scatterers
+        f.write(header_str.encode("ASCII"))
+        f.write(scatterers_str.encode("ASCII"))
+        # Write number of reflections
+        f.write(struct.pack("i", 2))
+        
+        # Write reflection data: hkl + form factors for 3 atoms
+        # Reflection 1: (1,0,0)
+        f.write(struct.pack("3i", 1, 0, 0))
+        f.write(np.array([1.0+0.0j, 2.0+1.0j, 3.0-1.0j], dtype=np.complex128).tobytes())
+        
+        # Reflection 2: (0,1,0)  
+        f.write(struct.pack("3i", 0, 1, 0))
+        f.write(np.array([4.0+0.5j, 5.0-0.5j, 6.0+0.25j], dtype=np.complex128).tobytes())
+    
+    return tscb_path
+
+
+def test_parse_header(sample_header_string):
+    """Test header parsing functionality."""
+    header = parse_header(sample_header_string)
+    
+    assert header["TITLE"] == " Test Header"
+    assert header["SYMM"] == " expanded"
+    assert header["SCATTERERS"] == " C1 C2 O1"
+    assert "This is a\nmulti-line entry\nthat spans several lines" in header["MULTI_LINE"]
+
+
+def test_parse_header_empty():
+    """Test header parsing with empty string."""
+    header = parse_header("")
+    assert header == {None: "\n"}
+
+
+def test_parse_tsc_data_line():
+    """Test parsing of TSC data lines."""
+    line = "1 0 0 1.23450000e+00,0.00000000e+00 2.34560000e+00,1.00000000e+00"
+    hkl, f0js = parse_tsc_data_line(line)
+    
+    assert hkl == (1, 0, 0)
+    assert len(f0js) == 2
+    assert np.isclose(f0js[0], 1.2345 + 0.0j)
+    assert np.isclose(f0js[1], 2.3456 + 1.0j)
+
+
+def test_parse_tsc_data_line_negative_indices():
+    """Test parsing TSC data line with negative Miller indices."""
+    line = "-1 2 -3 1.00000000e+00,2.00000000e+00"
+    hkl, f0js = parse_tsc_data_line(line)
+    
+    assert hkl == (-1, 2, -3)
+    assert len(f0js) == 1
+    assert np.isclose(f0js[0], 1.0 + 2.0j)
+
+
+def test_read_tsc_file_tsc_extension(tmp_path, sample_tsc_content):
+    """Test reading a .tsc file."""
+    tsc_path = tmp_path / "test.tsc"
+    tsc_path.write_text(sample_tsc_content)
+    
+    result = read_tsc_file(tsc_path)
+    
+    assert isinstance(result, TSCFile)
+    assert result.scatterers == ["C1", "C2", "O1"]
+    assert (1, 0, 0) in result.data
+    assert (0, 1, 0) in result.data
+
+
+def test_read_tsc_file_tscb_extension(sample_tscb_file):
+    """Test reading a .tscb file."""
+    result = read_tsc_file(sample_tscb_file)
+    
+    assert isinstance(result, TSCBFile)
+    assert result.scatterers == ["C1", "C2", "O1"]
+    assert (1, 0, 0) in result.data
+    assert (0, 1, 0) in result.data
+
+
+def test_read_tsc_file_invalid_tsc(tmp_path):
+    """Test reading invalid TSC file raises ValueError."""
+    invalid_tsc = tmp_path / "invalid.tsc"
+    invalid_tsc.write_text("This is not a valid TSC file")
+    
+    with pytest.raises(ValueError, match="Cannot read AFF file"):
+        read_tsc_file(invalid_tsc)
+
+
+def test_read_tsc_file_invalid_tscb(tmp_path):
+    """Test reading invalid TSCB file raises ValueError."""
+    invalid_tscb = tmp_path / "invalid.tscb"
+    invalid_tscb.write_bytes(b"This is not a valid TSCB file")
+    
+    with pytest.raises(ValueError, match="Cannot read AFF file"):
+        read_tsc_file(invalid_tscb)
+
+
+def test_tsc_file_scatterers_property():
+    """Test TSCFile scatterers property."""
+    tsc = TSCFile()
+    tsc.header["SCATTERERS"] = "C1 C2 O1 N1"
+    
+    assert tsc.scatterers == ["C1", "C2", "O1", "N1"]
+
+
+def test_tsc_file_scatterers_setter():
+    """Test TSCFile scatterers setter."""
+    tsc = TSCFile()
+    tsc.scatterers = ["C1", "C2", "O1"]
+    
+    assert tsc.header["SCATTERERS"] == "C1 C2 O1"
+
+
+def test_tsc_file_getitem_single_atom():
+    """Test TSCFile indexing with single atom label."""
+    tsc = TSCFile()
+    tsc.scatterers = ["C1", "C2", "O1"]
+    tsc.data = {
+        (1, 0, 0): np.array([1.0+0.0j, 2.0+1.0j, 3.0-1.0j]),
+        (0, 1, 0): np.array([4.0+0.5j, 5.0-0.5j, 6.0+0.25j])
+    }
+    
+    result = tsc["C1"]
+    expected = {
+        (1, 0, 0): 1.0+0.0j,
+        (0, 1, 0): 4.0+0.5j
+    }
+    
+    assert len(result) == 2
+    for hkl, value in expected.items():
+        assert np.isclose(result[hkl], value)
+
+
+def test_tsc_file_getitem_multiple_atoms():
+    """Test TSCFile indexing with multiple atom labels."""
+    tsc = TSCFile()
+    tsc.scatterers = ["C1", "C2", "O1"]
+    tsc.data = {
+        (1, 0, 0): np.array([1.0+0.0j, 2.0+1.0j, 3.0-1.0j]),
+    }
+    
+    result = tsc[["C1", "O1"]]
+    expected_values = np.array([1.0+0.0j, 3.0-1.0j])
+    
+    assert len(result) == 1
+    assert np.allclose(result[(1, 0, 0)], expected_values)
+
+
+def test_tsc_file_getitem_unknown_atom():
+    """Test TSCFile indexing with unknown atom raises ValueError."""
+    tsc = TSCFile()
+    tsc.scatterers = ["C1", "C2"]
+    
+    with pytest.raises(ValueError, match="Unknown atom label.*O1"):
+        tsc["O1"]
+
+
+def test_tsc_file_from_file(tmp_path, sample_tsc_content):
+    """Test TSCFile.from_file method."""
+    tsc_path = tmp_path / "test.tsc"
+    tsc_path.write_text(sample_tsc_content)
+    
+    tsc = TSCFile.from_file(tsc_path)
+    
+    assert tsc.header["TITLE"] == " Test TSC File"
+    assert tsc.scatterers == ["C1", "C2", "O1"]
+    assert len(tsc.data) == 2
+
+
+def test_tsc_file_to_file(tmp_path):
+    """Test TSCFile.to_file method."""
+    tsc = TSCFile()
+    tsc.header = {"TITLE": "Test", "SYMM": "expanded", "SCATTERERS": "C1 C2"}
+    tsc.data = {(1, 0, 0): np.array([1.0+0.0j, 2.0+1.0j])}
+    
+    output_path = tmp_path / "output.tsc"
+    tsc.to_file(output_path)
+    
+    # Verify file was written correctly
+    content = output_path.read_text()
+    assert "TITLE: Test" in content
+    assert "DATA:" in content
+    assert "1 0 0" in content
+
+
+def test_tscb_file_from_file(sample_tscb_file):
+    """Test TSCBFile.from_file method."""
+    tscb = TSCBFile.from_file(sample_tscb_file)
+    
+    assert tscb.scatterers == ["C1", "C2", "O1"]
+    assert len(tscb.data) == 2
+    assert (1, 0, 0) in tscb.data
+    assert len(tscb.data[(1, 0, 0)]) == 3
+
+
+def test_tscb_file_to_file(tmp_path):
+    """Test TSCBFile.to_file method."""
+    tscb = TSCBFile()
+    tscb.header = {"TITLE": "Test", "SYMM": "expanded", "SCATTERERS": "C1 C2"}
+    tscb.data = {(1, 0, 0): np.array([1.0+0.0j, 2.0+1.0j], dtype=np.complex128)}
+    
+    output_path = tmp_path / "output.tscb"
+    tscb.to_file(output_path)
+    
+    # Verify file exists and has content
+    assert output_path.exists()
+    assert output_path.stat().st_size > 0
+    
+    # Try to read it back
+    tscb_read = TSCBFile.from_file(output_path)
+    assert tscb_read.scatterers == ["C1", "C2"]
+    assert (1, 0, 0) in tscb_read.data
+
+
+def test_tscb_file_empty_header(tmp_path):
+    """Test TSCBFile with empty additional header."""
+    tscb_path = tmp_path / "test_empty_header.tscb"
+    scatterers_str = "C1"
+    
+    with open(tscb_path, "wb") as f:
+        # Write zero header size
+        f.write(struct.pack("2i", 0, len(scatterers_str)))
+        f.write(scatterers_str.encode("ASCII"))
+        f.write(struct.pack("i", 1))  # One reflection
+        f.write(struct.pack("3i", 1, 0, 0))
+        f.write(np.array([1.0+0.0j], dtype=np.complex128).tobytes())
+    
+    tscb = TSCBFile.from_file(tscb_path)
+    assert tscb.scatterers == ["C1"]
+    assert len(tscb.data) == 1
+
+@pytest.fixture
+def sample_structure_cif_content():
+    """Sample structure CIF content for testing."""
+    return """
+data_test
+_cell.length_a 10.000
+_cell.length_b 12.000
+_cell.length_c 8.000
+_cell.angle_alpha 90.0
+_cell.angle_beta 95.0
+_cell.angle_gamma 90.0
+
+loop_
+_atom_site.label
+_atom_site.type_symbol
+_atom_site.fract_x
+_atom_site.fract_y
+_atom_site.fract_z
+C1 C 0.1 0.2 0.3
+C2 C 0.4 0.5 0.6
+O1 O 0.7 0.8 0.9
+"""
+
+
+@pytest.fixture
+def sample_tsc_cif_content():
+    """Sample TSC-generated CIF content for testing."""
+    return """
+data_test
+_cell.length_a 10.000
+_cell.length_b 12.000
+_cell.length_c 8.000
+_cell.angle_alpha 90.0
+_cell.angle_beta 95.0
+_cell.angle_gamma 90.0
+
+loop_
+_wfn_moiety.id
+_wfn_moiety.atom_id
+_wfn_moiety.asu_atom_site_label
+_wfn_moiety.atom_type_symbol
+_wfn_moiety.symm_code
+_wfn_moiety.cartn_x
+_wfn_moiety.cartn_y
+_wfn_moiety.cartn_z
+_wfn_moiety.aff_index
+1 1 C1 C 1_555 1.0 2.4 2.4 1
+1 2 C2 C 1_555 4.0 6.0 4.8 2
+1 3 O1 O 1_555 7.0 9.6 7.2 3
+
+_aspheric_ffs.source 'test_source'
+_aspheric_ffs_partitioning.name 'test_partitioning'
+_aspheric_ffs_partitioning.software 'test_software'
+
+loop_
+_aspheric_ff.index_h
+_aspheric_ff.index_k
+_aspheric_ff.index_l
+_aspheric_ff.form_factor_real
+_aspheric_ff.form_factor_imag
+1 0 0 '[1.00000000 2.00000000 3.00000000]' '[0.00000000 1.00000000 -1.00000000]'
+0 1 0 '[4.00000000 5.00000000 6.00000000]' '[0.50000000 -0.50000000 0.25000000]'
+"""
+
+
+@pytest.fixture
+def structure_cif_block(tmp_path, sample_structure_cif_content):
+    """Create a structure CIF file and return the first block."""
+    cif_path = tmp_path / "structure.cif"
+    cif_path.write_text(sample_structure_cif_content)
+    
+    from qcrboxtools.cif.read import read_cif_as_unified
+    return read_cif_as_unified(cif_path, 0)
+
+
+@pytest.fixture
+def tsc_cif_block(tmp_path, sample_tsc_cif_content):
+    """Create a TSC CIF file and return the first block."""
+    cif_path = tmp_path / "tsc.cif"
+    cif_path.write_text(sample_tsc_cif_content)
+    
+    from qcrboxtools.cif.read import read_cif_as_unified
+    return read_cif_as_unified(cif_path, 0)
+
+
+def test_tsc_to_cif_conversion(structure_cif_block):
+    """Test converting TSC data to CIF format."""
+    # Create a TSC file with test data
+    tsc = TSCFile()
+    tsc.scatterers = ["C1", "C2", "O1"]
+    tsc.data = {
+        (1, 0, 0): np.array([1.0+0.0j, 2.0+1.0j, 3.0-1.0j]),
+        (0, 1, 0): np.array([4.0+0.5j, 5.0-0.5j, 6.0+0.25j])
+    }
+    
+    # Convert to CIF
+    cif_block = tsc.to_cif(
+        structure_cif_block,
+        partitioning_source="test_source",
+        partitioning_name="test_partitioning", 
+        partitioning_software="test_software"
+    )
+    
+    # Verify cell parameters are preserved
+    assert cif_block["_cell.length_a"] == "10.000"
+    assert cif_block["_cell.length_b"] == "12.000"
+    assert cif_block["_cell.angle_beta"] == "95.0"
+    
+    # Verify partitioning metadata
+    assert cif_block["_aspheric_ffs.source"] == "test_source"
+    assert cif_block["_aspheric_ffs_partitioning.name"] == "test_partitioning"
+    assert cif_block["_aspheric_ffs_partitioning.software"] == "test_software"
+    
+    # Verify moiety loop exists
+    moiety_loop = cif_block.get_loop("_wfn_moiety")
+    assert moiety_loop is not None
+    assert len(moiety_loop["_wfn_moiety.atom_id"]) == 3
+    
+    # Verify AFF loop exists
+    aff_loop = cif_block.get_loop("_aspheric_ff")
+    assert aff_loop is not None
+    assert len(aff_loop["_aspheric_ff.index_h"]) == 2
+
+
+def test_tsc_populate_from_cif_block(tsc_cif_block):
+    """Test populating TSC from CIF block."""
+    tsc = TSCFile()
+    tsc.populate_from_cif_block(tsc_cif_block)
+    
+    # Verify scatterers
+    assert tsc.scatterers == ["C1", "C2", "O1"]
+    
+    # Verify data was loaded correctly
+    assert len(tsc.data) == 2
+    assert (1, 0, 0) in tsc.data
+    assert (0, 1, 0) in tsc.data
+    
+    # Verify form factor values
+    hkl_100_data = tsc.data[(1, 0, 0)]
+    assert np.isclose(hkl_100_data[0], 1.0+0.0j)
+    assert np.isclose(hkl_100_data[1], 2.0+1.0j)
+    assert np.isclose(hkl_100_data[2], 3.0-1.0j)
+
+
+def test_tsc_from_cif_file(tmp_path, sample_tsc_cif_content):
+    """Test creating TSC from CIF file."""
+    cif_path = tmp_path / "test.cif"
+    cif_path.write_text(sample_tsc_cif_content)
+    
+    tsc = TSCFile.from_cif_file(cif_path)
+    
+    assert tsc.scatterers == ["C1", "C2", "O1"]
+    assert len(tsc.data) == 2
+    assert (1, 0, 0) in tsc.data
+
+
+def test_tscb_from_cif_file(tmp_path, sample_tsc_cif_content):
+    """Test creating TSCB from CIF file."""
+    cif_path = tmp_path / "test.cif"
+    cif_path.write_text(sample_tsc_cif_content)
+    
+    tscb = TSCBFile.from_cif_file(cif_path)
+    
+    assert tscb.scatterers == ["C1", "C2", "O1"]
+    assert len(tscb.data) == 2
+    assert (1, 0, 0) in tscb.data
+
+
+def test_populate_from_cif_block_missing_entries():
+    """Test error handling when CIF block is missing required entries."""
+    from iotbx.cif.model import block
+    
+    # Create incomplete block missing required entries
+    incomplete_block = block()
+    incomplete_block.add_data_item("_cell.length_a", "10.0")
+    
+    tsc = TSCFile()
+    
+    with pytest.raises(ValueError, match="CIF block does not contain required TSC entries"):
+        tsc.populate_from_cif_block(incomplete_block)
+
+
+def test_populate_from_cif_block_missing_aff_loop():
+    """Test error handling when AFF loop is missing."""
+    from iotbx.cif.model import block
+    
+    # Create block with metadata but no AFF loop
+    incomplete_block = block()
+    incomplete_block.add_data_item("_aspheric_ffs.source", "test")
+    incomplete_block.add_data_item("_aspheric_ffs_partitioning.name", "test")
+    incomplete_block.add_data_item("_aspheric_ffs_partitioning.software", "test")
+    
+    tsc = TSCFile()
+    
+    with pytest.raises(KeyError):
+        tsc.populate_from_cif_block(incomplete_block)
+
+
+def test_populate_from_cif_mismatched_atom_count():
+    """Test error when AFF values don't match atom count."""
+    from iotbx.cif.model import block, loop
+    
+    # Create block with mismatched data
+    test_block = block()
+    test_block.add_data_item("_aspheric_ffs.source", "test")
+    test_block.add_data_item("_aspheric_ffs_partitioning.name", "test")
+    test_block.add_data_item("_aspheric_ffs_partitioning.software", "test")
+    
+    # Moiety loop with 2 atoms
+    moiety_data = {
+        "_wfn_moiety.asu_atom_site_label": ["C1", "C2"]
+    }
+    test_block.add_loop(loop(data=moiety_data))
+    
+    # AFF loop with 3 values (mismatch)
+    aff_data = {
+        "_aspheric_ff.index_h": [1],
+        "_aspheric_ff.index_k": [0], 
+        "_aspheric_ff.index_l": [0],
+        "_aspheric_ff.form_factor_real": ["[1.0 2.0 3.0]"],  # 3 values
+        "_aspheric_ff.form_factor_imag": ["[0.0 1.0 -1.0]"]   # 3 values
+    }
+    test_block.add_loop(loop(data=aff_data))
+    
+    tsc = TSCFile()
+    
+    with pytest.raises(ValueError, match="Number of AFF values is not a multiple of number of scatterers"):
+        tsc.populate_from_cif_block(test_block)
+
+
+def test_construct_aff_loop_formatting():
+    """Test that AFF loop formats form factors correctly."""
+    tsc = TSCFile()
+    tsc.data = {
+        (1, 0, 0): np.array([1.23456789+0.0j, 2.345+1.0j]),
+        (-1, 2, -3): np.array([4.567-0.5j, 6.789+0.25j])
+    }
+    
+    aff_loop = tsc._construct_aff_loop()
+    
+    # Verify structure
+    assert len(aff_loop["_aspheric_ff.index_h"]) == 2
+    assert aff_loop["_aspheric_ff.index_h"][0] == "1"
+    assert aff_loop["_aspheric_ff.index_k"][1] == "2"
+    assert aff_loop["_aspheric_ff.index_l"][1] == "-3"
+
+    # Verify formatting (should be wrapped in brackets with proper precision)
+    real_line = aff_loop["_aspheric_ff.form_factor_real"][0]
+    assert real_line.startswith("[")
+    assert real_line.endswith("]")
+    assert "1.23456789" in real_line
+
+
+def test_round_trip_tsc_cif_conversion(tmp_path, structure_cif_block):
+    """Test round-trip TSC -> CIF -> TSC conversion preserves data."""
+    # Create original TSC
+    original_tsc = TSCFile()
+    original_tsc.scatterers = ["C1", "C2", "O1"]
+    original_tsc.data = {
+        (1, 0, 0): np.array([1.0+0.0j, 2.0+1.0j, 3.0-1.0j]),
+        (0, 1, 0): np.array([4.0+0.5j, 5.0-0.5j, 6.0+0.25j]),
+        (-1, -2, 3): np.array([7.0+2.0j, 8.0-2.0j, 9.0+0.1j])
+    }
+    
+    # Convert to CIF
+    cif_block = original_tsc.to_cif(
+        structure_cif_block,
+        partitioning_source="test",
+        partitioning_name="test",
+        partitioning_software="test"
+    )
+    
+    # Convert back to TSC
+    reconstructed_tsc = TSCFile()
+    reconstructed_tsc.populate_from_cif_block(cif_block)
+    
+    # Verify data is preserved
+    assert reconstructed_tsc.scatterers == original_tsc.scatterers
+    assert len(reconstructed_tsc.data) == len(original_tsc.data)
+    
+    for hkl in original_tsc.data:
+        assert hkl in reconstructed_tsc.data
+        np.testing.assert_allclose(
+            reconstructed_tsc.data[hkl], 
+            original_tsc.data[hkl],
+            rtol=1e-6
+        )
\ No newline at end of file

From 2c834dd1ba1084400f2031b0342362b829a22273 Mon Sep 17 00:00:00 2001
From: Niolon <panikruth@gmail.com>
Date: Mon, 29 Sep 2025 11:50:15 +0100
Subject: [PATCH 4/6] linting for tsc file handling and tests

---
 qcrboxtools/cif/file_converter/tsc.py |   2 +-
 tests/cif/convert/test_tsc.py         | 195 ++++++++++++--------------
 2 files changed, 93 insertions(+), 104 deletions(-)

diff --git a/qcrboxtools/cif/file_converter/tsc.py b/qcrboxtools/cif/file_converter/tsc.py
index 553b6e1..2cd8fc2 100644
--- a/qcrboxtools/cif/file_converter/tsc.py
+++ b/qcrboxtools/cif/file_converter/tsc.py
@@ -512,5 +512,5 @@ def from_cif_file(cls, cif_path: Path) -> "TSCBFile":
         """
         cif_block = read_cif_as_unified(cif_path, 0)
         new_obj = cls()
-        new_obj.populate_from_cif_block(cif_block) 
+        new_obj.populate_from_cif_block(cif_block)
         return new_obj
diff --git a/tests/cif/convert/test_tsc.py b/tests/cif/convert/test_tsc.py
index be16321..f404728 100644
--- a/tests/cif/convert/test_tsc.py
+++ b/tests/cif/convert/test_tsc.py
@@ -6,8 +6,6 @@
 """
 
 import struct
-from pathlib import Path
-from unittest.mock import patch
 
 import numpy as np
 import pytest
@@ -48,11 +46,11 @@ def sample_header_string():
 def sample_tscb_file(tmp_path):
     """Create a sample TSCB file for testing."""
     tscb_path = tmp_path / "test.tscb"
-    
+
     # Create minimal TSCB file content
     header_str = "TITLE: Test TSCB\nSYMM: expanded"
     scatterers_str = "C1 C2 O1"
-    
+
     with open(tscb_path, "wb") as f:
         # Write header size and scatterers size
         f.write(struct.pack("2i", len(header_str), len(scatterers_str)))
@@ -61,23 +59,23 @@ def sample_tscb_file(tmp_path):
         f.write(scatterers_str.encode("ASCII"))
         # Write number of reflections
         f.write(struct.pack("i", 2))
-        
+
         # Write reflection data: hkl + form factors for 3 atoms
         # Reflection 1: (1,0,0)
         f.write(struct.pack("3i", 1, 0, 0))
-        f.write(np.array([1.0+0.0j, 2.0+1.0j, 3.0-1.0j], dtype=np.complex128).tobytes())
-        
-        # Reflection 2: (0,1,0)  
+        f.write(np.array([1.0 + 0.0j, 2.0 + 1.0j, 3.0 - 1.0j], dtype=np.complex128).tobytes())
+
+        # Reflection 2: (0,1,0)
         f.write(struct.pack("3i", 0, 1, 0))
-        f.write(np.array([4.0+0.5j, 5.0-0.5j, 6.0+0.25j], dtype=np.complex128).tobytes())
-    
+        f.write(np.array([4.0 + 0.5j, 5.0 - 0.5j, 6.0 + 0.25j], dtype=np.complex128).tobytes())
+
     return tscb_path
 
 
 def test_parse_header(sample_header_string):
     """Test header parsing functionality."""
     header = parse_header(sample_header_string)
-    
+
     assert header["TITLE"] == " Test Header"
     assert header["SYMM"] == " expanded"
     assert header["SCATTERERS"] == " C1 C2 O1"
@@ -94,7 +92,7 @@ def test_parse_tsc_data_line():
     """Test parsing of TSC data lines."""
     line = "1 0 0 1.23450000e+00,0.00000000e+00 2.34560000e+00,1.00000000e+00"
     hkl, f0js = parse_tsc_data_line(line)
-    
+
     assert hkl == (1, 0, 0)
     assert len(f0js) == 2
     assert np.isclose(f0js[0], 1.2345 + 0.0j)
@@ -105,7 +103,7 @@ def test_parse_tsc_data_line_negative_indices():
     """Test parsing TSC data line with negative Miller indices."""
     line = "-1 2 -3 1.00000000e+00,2.00000000e+00"
     hkl, f0js = parse_tsc_data_line(line)
-    
+
     assert hkl == (-1, 2, -3)
     assert len(f0js) == 1
     assert np.isclose(f0js[0], 1.0 + 2.0j)
@@ -115,9 +113,9 @@ def test_read_tsc_file_tsc_extension(tmp_path, sample_tsc_content):
     """Test reading a .tsc file."""
     tsc_path = tmp_path / "test.tsc"
     tsc_path.write_text(sample_tsc_content)
-    
+
     result = read_tsc_file(tsc_path)
-    
+
     assert isinstance(result, TSCFile)
     assert result.scatterers == ["C1", "C2", "O1"]
     assert (1, 0, 0) in result.data
@@ -127,7 +125,7 @@ def test_read_tsc_file_tsc_extension(tmp_path, sample_tsc_content):
 def test_read_tsc_file_tscb_extension(sample_tscb_file):
     """Test reading a .tscb file."""
     result = read_tsc_file(sample_tscb_file)
-    
+
     assert isinstance(result, TSCBFile)
     assert result.scatterers == ["C1", "C2", "O1"]
     assert (1, 0, 0) in result.data
@@ -138,7 +136,7 @@ def test_read_tsc_file_invalid_tsc(tmp_path):
     """Test reading invalid TSC file raises ValueError."""
     invalid_tsc = tmp_path / "invalid.tsc"
     invalid_tsc.write_text("This is not a valid TSC file")
-    
+
     with pytest.raises(ValueError, match="Cannot read AFF file"):
         read_tsc_file(invalid_tsc)
 
@@ -147,7 +145,7 @@ def test_read_tsc_file_invalid_tscb(tmp_path):
     """Test reading invalid TSCB file raises ValueError."""
     invalid_tscb = tmp_path / "invalid.tscb"
     invalid_tscb.write_bytes(b"This is not a valid TSCB file")
-    
+
     with pytest.raises(ValueError, match="Cannot read AFF file"):
         read_tsc_file(invalid_tscb)
 
@@ -156,7 +154,7 @@ def test_tsc_file_scatterers_property():
     """Test TSCFile scatterers property."""
     tsc = TSCFile()
     tsc.header["SCATTERERS"] = "C1 C2 O1 N1"
-    
+
     assert tsc.scatterers == ["C1", "C2", "O1", "N1"]
 
 
@@ -164,7 +162,7 @@ def test_tsc_file_scatterers_setter():
     """Test TSCFile scatterers setter."""
     tsc = TSCFile()
     tsc.scatterers = ["C1", "C2", "O1"]
-    
+
     assert tsc.header["SCATTERERS"] == "C1 C2 O1"
 
 
@@ -173,16 +171,13 @@ def test_tsc_file_getitem_single_atom():
     tsc = TSCFile()
     tsc.scatterers = ["C1", "C2", "O1"]
     tsc.data = {
-        (1, 0, 0): np.array([1.0+0.0j, 2.0+1.0j, 3.0-1.0j]),
-        (0, 1, 0): np.array([4.0+0.5j, 5.0-0.5j, 6.0+0.25j])
+        (1, 0, 0): np.array([1.0 + 0.0j, 2.0 + 1.0j, 3.0 - 1.0j]),
+        (0, 1, 0): np.array([4.0 + 0.5j, 5.0 - 0.5j, 6.0 + 0.25j]),
     }
-    
+
     result = tsc["C1"]
-    expected = {
-        (1, 0, 0): 1.0+0.0j,
-        (0, 1, 0): 4.0+0.5j
-    }
-    
+    expected = {(1, 0, 0): 1.0 + 0.0j, (0, 1, 0): 4.0 + 0.5j}
+
     assert len(result) == 2
     for hkl, value in expected.items():
         assert np.isclose(result[hkl], value)
@@ -193,12 +188,12 @@ def test_tsc_file_getitem_multiple_atoms():
     tsc = TSCFile()
     tsc.scatterers = ["C1", "C2", "O1"]
     tsc.data = {
-        (1, 0, 0): np.array([1.0+0.0j, 2.0+1.0j, 3.0-1.0j]),
+        (1, 0, 0): np.array([1.0 + 0.0j, 2.0 + 1.0j, 3.0 - 1.0j]),
     }
-    
+
     result = tsc[["C1", "O1"]]
-    expected_values = np.array([1.0+0.0j, 3.0-1.0j])
-    
+    expected_values = np.array([1.0 + 0.0j, 3.0 - 1.0j])
+
     assert len(result) == 1
     assert np.allclose(result[(1, 0, 0)], expected_values)
 
@@ -207,7 +202,7 @@ def test_tsc_file_getitem_unknown_atom():
     """Test TSCFile indexing with unknown atom raises ValueError."""
     tsc = TSCFile()
     tsc.scatterers = ["C1", "C2"]
-    
+
     with pytest.raises(ValueError, match="Unknown atom label.*O1"):
         tsc["O1"]
 
@@ -216,9 +211,9 @@ def test_tsc_file_from_file(tmp_path, sample_tsc_content):
     """Test TSCFile.from_file method."""
     tsc_path = tmp_path / "test.tsc"
     tsc_path.write_text(sample_tsc_content)
-    
+
     tsc = TSCFile.from_file(tsc_path)
-    
+
     assert tsc.header["TITLE"] == " Test TSC File"
     assert tsc.scatterers == ["C1", "C2", "O1"]
     assert len(tsc.data) == 2
@@ -228,11 +223,11 @@ def test_tsc_file_to_file(tmp_path):
     """Test TSCFile.to_file method."""
     tsc = TSCFile()
     tsc.header = {"TITLE": "Test", "SYMM": "expanded", "SCATTERERS": "C1 C2"}
-    tsc.data = {(1, 0, 0): np.array([1.0+0.0j, 2.0+1.0j])}
-    
+    tsc.data = {(1, 0, 0): np.array([1.0 + 0.0j, 2.0 + 1.0j])}
+
     output_path = tmp_path / "output.tsc"
     tsc.to_file(output_path)
-    
+
     # Verify file was written correctly
     content = output_path.read_text()
     assert "TITLE: Test" in content
@@ -243,7 +238,7 @@ def test_tsc_file_to_file(tmp_path):
 def test_tscb_file_from_file(sample_tscb_file):
     """Test TSCBFile.from_file method."""
     tscb = TSCBFile.from_file(sample_tscb_file)
-    
+
     assert tscb.scatterers == ["C1", "C2", "O1"]
     assert len(tscb.data) == 2
     assert (1, 0, 0) in tscb.data
@@ -254,15 +249,15 @@ def test_tscb_file_to_file(tmp_path):
     """Test TSCBFile.to_file method."""
     tscb = TSCBFile()
     tscb.header = {"TITLE": "Test", "SYMM": "expanded", "SCATTERERS": "C1 C2"}
-    tscb.data = {(1, 0, 0): np.array([1.0+0.0j, 2.0+1.0j], dtype=np.complex128)}
-    
+    tscb.data = {(1, 0, 0): np.array([1.0 + 0.0j, 2.0 + 1.0j], dtype=np.complex128)}
+
     output_path = tmp_path / "output.tscb"
     tscb.to_file(output_path)
-    
+
     # Verify file exists and has content
     assert output_path.exists()
     assert output_path.stat().st_size > 0
-    
+
     # Try to read it back
     tscb_read = TSCBFile.from_file(output_path)
     assert tscb_read.scatterers == ["C1", "C2"]
@@ -273,19 +268,20 @@ def test_tscb_file_empty_header(tmp_path):
     """Test TSCBFile with empty additional header."""
     tscb_path = tmp_path / "test_empty_header.tscb"
     scatterers_str = "C1"
-    
+
     with open(tscb_path, "wb") as f:
         # Write zero header size
         f.write(struct.pack("2i", 0, len(scatterers_str)))
         f.write(scatterers_str.encode("ASCII"))
         f.write(struct.pack("i", 1))  # One reflection
         f.write(struct.pack("3i", 1, 0, 0))
-        f.write(np.array([1.0+0.0j], dtype=np.complex128).tobytes())
-    
+        f.write(np.array([1.0 + 0.0j], dtype=np.complex128).tobytes())
+
     tscb = TSCBFile.from_file(tscb_path)
     assert tscb.scatterers == ["C1"]
     assert len(tscb.data) == 1
 
+
 @pytest.fixture
 def sample_structure_cif_content():
     """Sample structure CIF content for testing."""
@@ -356,8 +352,9 @@ def structure_cif_block(tmp_path, sample_structure_cif_content):
     """Create a structure CIF file and return the first block."""
     cif_path = tmp_path / "structure.cif"
     cif_path.write_text(sample_structure_cif_content)
-    
+
     from qcrboxtools.cif.read import read_cif_as_unified
+
     return read_cif_as_unified(cif_path, 0)
 
 
@@ -366,8 +363,9 @@ def tsc_cif_block(tmp_path, sample_tsc_cif_content):
     """Create a TSC CIF file and return the first block."""
     cif_path = tmp_path / "tsc.cif"
     cif_path.write_text(sample_tsc_cif_content)
-    
+
     from qcrboxtools.cif.read import read_cif_as_unified
+
     return read_cif_as_unified(cif_path, 0)
 
 
@@ -377,33 +375,33 @@ def test_tsc_to_cif_conversion(structure_cif_block):
     tsc = TSCFile()
     tsc.scatterers = ["C1", "C2", "O1"]
     tsc.data = {
-        (1, 0, 0): np.array([1.0+0.0j, 2.0+1.0j, 3.0-1.0j]),
-        (0, 1, 0): np.array([4.0+0.5j, 5.0-0.5j, 6.0+0.25j])
+        (1, 0, 0): np.array([1.0 + 0.0j, 2.0 + 1.0j, 3.0 - 1.0j]),
+        (0, 1, 0): np.array([4.0 + 0.5j, 5.0 - 0.5j, 6.0 + 0.25j]),
     }
-    
+
     # Convert to CIF
     cif_block = tsc.to_cif(
         structure_cif_block,
         partitioning_source="test_source",
-        partitioning_name="test_partitioning", 
-        partitioning_software="test_software"
+        partitioning_name="test_partitioning",
+        partitioning_software="test_software",
     )
-    
+
     # Verify cell parameters are preserved
     assert cif_block["_cell.length_a"] == "10.000"
     assert cif_block["_cell.length_b"] == "12.000"
     assert cif_block["_cell.angle_beta"] == "95.0"
-    
+
     # Verify partitioning metadata
     assert cif_block["_aspheric_ffs.source"] == "test_source"
     assert cif_block["_aspheric_ffs_partitioning.name"] == "test_partitioning"
     assert cif_block["_aspheric_ffs_partitioning.software"] == "test_software"
-    
+
     # Verify moiety loop exists
     moiety_loop = cif_block.get_loop("_wfn_moiety")
     assert moiety_loop is not None
     assert len(moiety_loop["_wfn_moiety.atom_id"]) == 3
-    
+
     # Verify AFF loop exists
     aff_loop = cif_block.get_loop("_aspheric_ff")
     assert aff_loop is not None
@@ -414,29 +412,29 @@ def test_tsc_populate_from_cif_block(tsc_cif_block):
     """Test populating TSC from CIF block."""
     tsc = TSCFile()
     tsc.populate_from_cif_block(tsc_cif_block)
-    
+
     # Verify scatterers
     assert tsc.scatterers == ["C1", "C2", "O1"]
-    
+
     # Verify data was loaded correctly
     assert len(tsc.data) == 2
     assert (1, 0, 0) in tsc.data
     assert (0, 1, 0) in tsc.data
-    
+
     # Verify form factor values
     hkl_100_data = tsc.data[(1, 0, 0)]
-    assert np.isclose(hkl_100_data[0], 1.0+0.0j)
-    assert np.isclose(hkl_100_data[1], 2.0+1.0j)
-    assert np.isclose(hkl_100_data[2], 3.0-1.0j)
+    assert np.isclose(hkl_100_data[0], 1.0 + 0.0j)
+    assert np.isclose(hkl_100_data[1], 2.0 + 1.0j)
+    assert np.isclose(hkl_100_data[2], 3.0 - 1.0j)
 
 
 def test_tsc_from_cif_file(tmp_path, sample_tsc_cif_content):
     """Test creating TSC from CIF file."""
     cif_path = tmp_path / "test.cif"
     cif_path.write_text(sample_tsc_cif_content)
-    
+
     tsc = TSCFile.from_cif_file(cif_path)
-    
+
     assert tsc.scatterers == ["C1", "C2", "O1"]
     assert len(tsc.data) == 2
     assert (1, 0, 0) in tsc.data
@@ -446,9 +444,9 @@ def test_tscb_from_cif_file(tmp_path, sample_tsc_cif_content):
     """Test creating TSCB from CIF file."""
     cif_path = tmp_path / "test.cif"
     cif_path.write_text(sample_tsc_cif_content)
-    
+
     tscb = TSCBFile.from_cif_file(cif_path)
-    
+
     assert tscb.scatterers == ["C1", "C2", "O1"]
     assert len(tscb.data) == 2
     assert (1, 0, 0) in tscb.data
@@ -457,13 +455,13 @@ def test_tscb_from_cif_file(tmp_path, sample_tsc_cif_content):
 def test_populate_from_cif_block_missing_entries():
     """Test error handling when CIF block is missing required entries."""
     from iotbx.cif.model import block
-    
+
     # Create incomplete block missing required entries
     incomplete_block = block()
     incomplete_block.add_data_item("_cell.length_a", "10.0")
-    
+
     tsc = TSCFile()
-    
+
     with pytest.raises(ValueError, match="CIF block does not contain required TSC entries"):
         tsc.populate_from_cif_block(incomplete_block)
 
@@ -471,15 +469,15 @@ def test_populate_from_cif_block_missing_entries():
 def test_populate_from_cif_block_missing_aff_loop():
     """Test error handling when AFF loop is missing."""
     from iotbx.cif.model import block
-    
+
     # Create block with metadata but no AFF loop
     incomplete_block = block()
     incomplete_block.add_data_item("_aspheric_ffs.source", "test")
     incomplete_block.add_data_item("_aspheric_ffs_partitioning.name", "test")
     incomplete_block.add_data_item("_aspheric_ffs_partitioning.software", "test")
-    
+
     tsc = TSCFile()
-    
+
     with pytest.raises(KeyError):
         tsc.populate_from_cif_block(incomplete_block)
 
@@ -487,31 +485,29 @@ def test_populate_from_cif_block_missing_aff_loop():
 def test_populate_from_cif_mismatched_atom_count():
     """Test error when AFF values don't match atom count."""
     from iotbx.cif.model import block, loop
-    
+
     # Create block with mismatched data
     test_block = block()
     test_block.add_data_item("_aspheric_ffs.source", "test")
     test_block.add_data_item("_aspheric_ffs_partitioning.name", "test")
     test_block.add_data_item("_aspheric_ffs_partitioning.software", "test")
-    
+
     # Moiety loop with 2 atoms
-    moiety_data = {
-        "_wfn_moiety.asu_atom_site_label": ["C1", "C2"]
-    }
+    moiety_data = {"_wfn_moiety.asu_atom_site_label": ["C1", "C2"]}
     test_block.add_loop(loop(data=moiety_data))
-    
+
     # AFF loop with 3 values (mismatch)
     aff_data = {
         "_aspheric_ff.index_h": [1],
-        "_aspheric_ff.index_k": [0], 
+        "_aspheric_ff.index_k": [0],
         "_aspheric_ff.index_l": [0],
         "_aspheric_ff.form_factor_real": ["[1.0 2.0 3.0]"],  # 3 values
-        "_aspheric_ff.form_factor_imag": ["[0.0 1.0 -1.0]"]   # 3 values
+        "_aspheric_ff.form_factor_imag": ["[0.0 1.0 -1.0]"],  # 3 values
     }
     test_block.add_loop(loop(data=aff_data))
-    
+
     tsc = TSCFile()
-    
+
     with pytest.raises(ValueError, match="Number of AFF values is not a multiple of number of scatterers"):
         tsc.populate_from_cif_block(test_block)
 
@@ -520,12 +516,12 @@ def test_construct_aff_loop_formatting():
     """Test that AFF loop formats form factors correctly."""
     tsc = TSCFile()
     tsc.data = {
-        (1, 0, 0): np.array([1.23456789+0.0j, 2.345+1.0j]),
-        (-1, 2, -3): np.array([4.567-0.5j, 6.789+0.25j])
+        (1, 0, 0): np.array([1.23456789 + 0.0j, 2.345 + 1.0j]),
+        (-1, 2, -3): np.array([4.567 - 0.5j, 6.789 + 0.25j]),
     }
-    
+
     aff_loop = tsc._construct_aff_loop()
-    
+
     # Verify structure
     assert len(aff_loop["_aspheric_ff.index_h"]) == 2
     assert aff_loop["_aspheric_ff.index_h"][0] == "1"
@@ -545,31 +541,24 @@ def test_round_trip_tsc_cif_conversion(tmp_path, structure_cif_block):
     original_tsc = TSCFile()
     original_tsc.scatterers = ["C1", "C2", "O1"]
     original_tsc.data = {
-        (1, 0, 0): np.array([1.0+0.0j, 2.0+1.0j, 3.0-1.0j]),
-        (0, 1, 0): np.array([4.0+0.5j, 5.0-0.5j, 6.0+0.25j]),
-        (-1, -2, 3): np.array([7.0+2.0j, 8.0-2.0j, 9.0+0.1j])
+        (1, 0, 0): np.array([1.0 + 0.0j, 2.0 + 1.0j, 3.0 - 1.0j]),
+        (0, 1, 0): np.array([4.0 + 0.5j, 5.0 - 0.5j, 6.0 + 0.25j]),
+        (-1, -2, 3): np.array([7.0 + 2.0j, 8.0 - 2.0j, 9.0 + 0.1j]),
     }
-    
+
     # Convert to CIF
     cif_block = original_tsc.to_cif(
-        structure_cif_block,
-        partitioning_source="test",
-        partitioning_name="test",
-        partitioning_software="test"
+        structure_cif_block, partitioning_source="test", partitioning_name="test", partitioning_software="test"
     )
-    
+
     # Convert back to TSC
     reconstructed_tsc = TSCFile()
     reconstructed_tsc.populate_from_cif_block(cif_block)
-    
+
     # Verify data is preserved
     assert reconstructed_tsc.scatterers == original_tsc.scatterers
     assert len(reconstructed_tsc.data) == len(original_tsc.data)
-    
+
     for hkl in original_tsc.data:
         assert hkl in reconstructed_tsc.data
-        np.testing.assert_allclose(
-            reconstructed_tsc.data[hkl], 
-            original_tsc.data[hkl],
-            rtol=1e-6
-        )
\ No newline at end of file
+        np.testing.assert_allclose(reconstructed_tsc.data[hkl], original_tsc.data[hkl], rtol=1e-6)

From 7862c0b9bea1f41770cec6d5d6187b41bde284bc Mon Sep 17 00:00:00 2001
From: Niolon <panikruth@gmail.com>
Date: Mon, 29 Sep 2025 12:06:32 +0100
Subject: [PATCH 5/6] More improvements

---
 qcrboxtools/cif/file_converter/tsc.py | 31 ++++++++++++++++++++++-----
 tests/cif/convert/test_tsc.py         |  6 +++---
 2 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/qcrboxtools/cif/file_converter/tsc.py b/qcrboxtools/cif/file_converter/tsc.py
index 2cd8fc2..5a857e2 100644
--- a/qcrboxtools/cif/file_converter/tsc.py
+++ b/qcrboxtools/cif/file_converter/tsc.py
@@ -38,7 +38,7 @@ def read_tsc_file(path: Path):
             try:
                 return TSCFile.from_file(path)
             except Exception:
-                raise ValueError(f"Cannot read AFF file: {str(path)}") from exc
+                raise ValueError(f"Cannot read TSCB file: {str(path)}") from exc
     elif path.suffix == ".tsc":
         try:
             return TSCFile.from_file(path)
@@ -46,10 +46,25 @@ def read_tsc_file(path: Path):
             try:
                 return TSCBFile.from_file(path)
             except Exception:
-                raise ValueError(f"Cannot read AFF file: {str(path)}") from exc
+                raise ValueError(f"Cannot read TSC file: {str(path)}") from exc
 
 
 def parse_header(header_str):
+    """
+    Parses the header section of a TSC file.
+
+    Parameters
+    ----------
+    header_str : str
+        The header section of the TSC file as a string.
+
+    Returns
+    -------
+    dict
+        A dictionary containing the parsed header information.
+    """
+    if not header_str.strip():
+        return {}
     header = {}
     header_split = iter(val.split(":") for val in header_str.strip().split("\n"))
 
@@ -60,8 +75,10 @@ def parse_header(header_str):
             header[header_key] = header_entry
         if len(line_split) == 2:
             header_key, header_entry = line_split
-        else:
+        elif len(line_split) == 1 and header_key is not None:
             header_entry += "\n" + line_split[0]
+        else:
+            raise ValueError(f"Malformed header line: {':'.join(line_split)}")
     header[header_key] = header_entry
     return header
 
@@ -80,8 +97,10 @@ def parse_tsc_data_line(line: str) -> Tuple[Tuple[int, int, int], np.ndarray]:
     tuple
         A tuple containing the indices h, k, l and the array of f0j values.
     """
+    
     h_str, k_str, l_str, *f0j_strs = line.split()
-    f0js = np.array([float(val.split(",")[0]) + 1j * float(val.split(",")[1]) for val in f0j_strs])
+    parts = (val.split(",") for val in f0j_strs)
+    f0js = np.array([float(real_val) + 1j * float(imag_val) for real_val, imag_val in parts])
     return (int(h_str), int(k_str), int(l_str)), f0js
 
 
@@ -308,7 +327,9 @@ def populate_from_cif_block(self, cif_block: block):
         if len(all_affs) % n_atoms != 0:
             raise ValueError("Number of AFF values is not a multiple of number of scatterers.")
         all_affs = all_affs.reshape((-1, n_atoms))
-        self.data = {hkl: affs for hkl, affs in zip(hkl_tuples, all_affs, strict=False)}
+        if len(hkl_tuples) != len(all_affs):
+            raise ValueError("Number of Miller indices does not match number of AFF value sets.")
+        self.data = {hkl: affs for hkl, affs in zip(hkl_tuples, all_affs)}
 
 
 class TSCFile(TSCBase):
diff --git a/tests/cif/convert/test_tsc.py b/tests/cif/convert/test_tsc.py
index f404728..72ee345 100644
--- a/tests/cif/convert/test_tsc.py
+++ b/tests/cif/convert/test_tsc.py
@@ -85,7 +85,7 @@ def test_parse_header(sample_header_string):
 def test_parse_header_empty():
     """Test header parsing with empty string."""
     header = parse_header("")
-    assert header == {None: "\n"}
+    assert header == {}
 
 
 def test_parse_tsc_data_line():
@@ -137,7 +137,7 @@ def test_read_tsc_file_invalid_tsc(tmp_path):
     invalid_tsc = tmp_path / "invalid.tsc"
     invalid_tsc.write_text("This is not a valid TSC file")
 
-    with pytest.raises(ValueError, match="Cannot read AFF file"):
+    with pytest.raises(ValueError, match="Cannot read TSC file"):
         read_tsc_file(invalid_tsc)
 
 
@@ -146,7 +146,7 @@ def test_read_tsc_file_invalid_tscb(tmp_path):
     invalid_tscb = tmp_path / "invalid.tscb"
     invalid_tscb.write_bytes(b"This is not a valid TSCB file")
 
-    with pytest.raises(ValueError, match="Cannot read AFF file"):
+    with pytest.raises(ValueError, match="Cannot read TSCB file"):
         read_tsc_file(invalid_tscb)
 
 

From 6e3ae5f2b96cd76c98e2e1ed22b57acf0876a7b7 Mon Sep 17 00:00:00 2001
From: Niolon <panikruth@gmail.com>
Date: Mon, 29 Sep 2025 12:12:19 +0100
Subject: [PATCH 6/6] Further small improvements to tsc handling module

---
 qcrboxtools/cif/file_converter/tsc.py | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/qcrboxtools/cif/file_converter/tsc.py b/qcrboxtools/cif/file_converter/tsc.py
index 5a857e2..3e9d221 100644
--- a/qcrboxtools/cif/file_converter/tsc.py
+++ b/qcrboxtools/cif/file_converter/tsc.py
@@ -97,7 +97,7 @@ def parse_tsc_data_line(line: str) -> Tuple[Tuple[int, int, int], np.ndarray]:
     tuple
         A tuple containing the indices h, k, l and the array of f0j values.
     """
-    
+
     h_str, k_str, l_str, *f0j_strs = line.split()
     parts = (val.split(",") for val in f0j_strs)
     f0js = np.array([float(real_val) + 1j * float(imag_val) for real_val, imag_val in parts])
@@ -379,9 +379,9 @@ def from_file(cls, filename: Path) -> "TSCFile":
 
         new_obj.header.update(parse_header(header_str))
 
-        parsed_iter = iter(parse_tsc_data_line(line) for line in data_str.strip().split("\n"))
+        parsed_data_lines = iter(parse_tsc_data_line(line) for line in data_str.strip().split("\n"))
 
-        new_obj.data = {hkl: f0js for hkl, f0js in parsed_iter}
+        new_obj.data = {hkl: f0js for hkl, f0js in parsed_data_lines}
 
         return new_obj
 
@@ -399,14 +399,12 @@ def to_file(self, filename: Path) -> None:
             The name of the file to write.
         """
         header_str = "\n".join(f"{key}: {value}" for key, value in self.header.items())
-        data_iter = iter(
-            (
-                f"{int(hkl[0])} {int(hkl[1])} {int(hkl[2])} "
-                + f"{' '.join(f'{np.real(val):.8e},{np.imag(val):.8e}' for val in values)}"
-            )
+        formatted_data_lines = (
+            f"{int(hkl[0])} {int(hkl[1])} {int(hkl[2])} "
+            + f"{' '.join(f'{np.real(val):.8e},{np.imag(val):.8e}' for val in values)}"
             for hkl, values in self.data.items()
         )
-        data_str = "\n".join(data_iter)
+        data_str = "\n".join(formatted_data_lines)
 
         with open(filename, "w") as fobj:
             fobj.write(f"{header_str}\nDATA:\n{data_str}\n")