From 29600f365165a7ecd958a90b9ab97a2d82a1618b Mon Sep 17 00:00:00 2001
From: Dima Molodenskiy <dmolodenskiy@embl-hamburg.de>
Date: Wed, 12 Nov 2025 11:48:51 +0100
Subject: [PATCH] Use parser from pip

---
 alphapulldown/scripts/parse_input.py          |  15 +-
 alphapulldown/scripts/run_multimer_jobs.py    |  15 +-
 .../scripts/split_jobs_into_clusters.py       |  16 +-
 alphapulldown/utils/create_combinations.py    |  62 ------
 alphapulldown/utils/modelling_setup.py        | 126 +++---------
 setup.cfg                                     |   1 +
 test/check_alphafold2_predictions.py          |  27 ++-
 test/check_alphafold3_predictions.py          |  18 +-
 test/check_alphalink_predictions.py           |  29 +--
 test/test_parse_fold.py                       | 186 ------------------
 test/test_python_imports.py                   |   2 -
 11 files changed, 82 insertions(+), 415 deletions(-)
 delete mode 100644 alphapulldown/utils/create_combinations.py
 delete mode 100644 test/test_parse_fold.py

diff --git a/alphapulldown/scripts/parse_input.py b/alphapulldown/scripts/parse_input.py
index 452952dd..c2282c77 100644
--- a/alphapulldown/scripts/parse_input.py
+++ b/alphapulldown/scripts/parse_input.py
@@ -2,8 +2,7 @@
 from absl import flags, app, logging
 import json
 from alphapulldown.utils.modelling_setup import parse_fold, create_custom_info
-from alphapulldown.utils.create_combinations import process_files
-import io
+from alphapulldown_input_parser import generate_fold_specifications
 
 logging.set_verbosity(logging.INFO)
 
@@ -23,16 +22,12 @@
 FLAGS = flags.FLAGS
 
 def main(argv):
-    buffer = io.StringIO()
-    _ = process_files(
+    specifications = generate_fold_specifications(
         input_files=FLAGS.input_list,
-        output_path=buffer,
-        exclude_permutations = True
+        delimiter=FLAGS.protein_delimiter,
+        exclude_permutations=True,
     )
-    buffer.seek(0)
-    all_folds = buffer.readlines()
-    all_folds = [x.strip() for x in all_folds]
-    parsed = parse_fold(all_folds, FLAGS.features_directory, FLAGS.protein_delimiter)
+    parsed = parse_fold(specifications, FLAGS.features_directory, FLAGS.protein_delimiter)
     data = create_custom_info(parsed)
 
     with open(FLAGS.output_prefix + "data.json", 'w') as out_f:
diff --git a/alphapulldown/scripts/run_multimer_jobs.py b/alphapulldown/scripts/run_multimer_jobs.py
index 5d09ba34..a92e3807 100644
--- a/alphapulldown/scripts/run_multimer_jobs.py
+++ b/alphapulldown/scripts/run_multimer_jobs.py
@@ -5,7 +5,6 @@
 
     Authors: Dingquan Yu, Valentin Maurer <name.surname@embl-hamburg.de>
 """
-import io
 import warnings
 import subprocess
 from absl import app, logging, flags
@@ -14,7 +13,7 @@
 import jax
 gpus = jax.local_devices(backend='gpu')
 from alphapulldown.scripts.run_structure_prediction import FLAGS
-from alphapulldown.utils.create_combinations import process_files
+from alphapulldown_input_parser import generate_fold_specifications
 
 logging.set_verbosity(logging.INFO)
 
@@ -55,16 +54,12 @@ def main(argv):
             DeprecationWarning,
         )
 
-    buffer = io.StringIO()
-    _ = process_files(
+    specifications = generate_fold_specifications(
         input_files=protein_lists,
-        output_path=buffer,
-        exclude_permutations = True
+        delimiter="+",
+        exclude_permutations=True,
     )
-    buffer.seek(0)
-    all_folds = buffer.readlines()
-    all_folds = [x.strip().replace(",", ":") for x in all_folds]
-    all_folds = [x.strip().replace(";", "+") for x in all_folds]
+    all_folds = [spec.replace(",", ":").replace(";", "+") for spec in specifications]
     if FLAGS.dry_run:
         logging.info(f"Dry run: the total number of jobs to be run: {len(all_folds)}")
         sys.exit(0)
diff --git a/alphapulldown/scripts/split_jobs_into_clusters.py b/alphapulldown/scripts/split_jobs_into_clusters.py
index 1781aca6..735c41a7 100644
--- a/alphapulldown/scripts/split_jobs_into_clusters.py
+++ b/alphapulldown/scripts/split_jobs_into_clusters.py
@@ -1,6 +1,5 @@
 import argparse
-import io
-from alphapulldown.utils.create_combinations import process_files
+from alphapulldown_input_parser import generate_fold_specifications
 from alphapulldown.utils.modelling_setup import parse_fold, create_custom_info, create_interactors
 from alphapulldown.objects import MultimericObject
 import pandas as pd
@@ -132,20 +131,21 @@ def main():
     protein_lists = args.protein_lists
     if args.mode == "all_vs_all":
         protein_lists = [args.protein_lists[0], args.protein_lists[0]]
-    # buffer = io.StringIO()
     import time
     start = time.time()
-    all_combinations = process_files(input_files=protein_lists)
+    specifications = generate_fold_specifications(
+        input_files=protein_lists,
+        delimiter=args.protein_delimiter,
+        exclude_permutations=True,
+    )
 
-    all_folds = ["+".join(combo) for combo in all_combinations]
-    all_folds = [x.strip().replace(",", ":") for x in all_folds]
-    all_folds = [x.strip().replace(";", "+") for x in all_folds]
+    all_folds = [spec.replace(",", ":").replace(";", "+") for spec in specifications]
     end = time.time()
     diff1 = end - start 
     cluster_jobs(all_folds, args)
     end = time.time()
     diff2 = end - start 
-    logger.info(f"process_files steps takes {diff1}s and total time is: {diff2}")
+    logger.info(f"generate_fold_specifications step takes {diff1}s and total time is: {diff2}")
 
 
 if __name__ == "__main__":
diff --git a/alphapulldown/utils/create_combinations.py b/alphapulldown/utils/create_combinations.py
deleted file mode 100644
index 069aab29..00000000
--- a/alphapulldown/utils/create_combinations.py
+++ /dev/null
@@ -1,62 +0,0 @@
-""" Computes cartesian product of lines in multiple files.
-
-    Copyright (c) 2024 European Molecular Biology Laboratory
-
-    Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
-"""
-
-import argparse
-import itertools
-from contextlib import nullcontext
-from typing import List, Union, TextIO
-
-def read_file(filepath : str):
-    with open(filepath, mode = "r", encoding = "utf-8") as file:
-        lines = file.read().splitlines()
-    return list(line.lstrip().rstrip() for line in lines if line)
-
-def process_files(input_files : List[str], 
-                  output_path : Union[str, TextIO] = None, 
-                  delimiter : str = '+',
-                  exclude_permutations : bool = True
-                  ):
-    """Process the input files to compute the Cartesian product and write to the output file."""
-    lists_of_lines = [read_file(filepath) for filepath in input_files]
-    cartesian_product = list(itertools.product(*lists_of_lines))
-
-    if exclude_permutations:
-        keep = []
-        unique_elements = set()
-        for combination in cartesian_product:
-            sorted_combination = tuple(sorted(combination))
-            if sorted_combination in unique_elements:
-                continue
-            unique_elements.add(sorted_combination)
-            keep.append(combination)
-        cartesian_product = keep
-
-    if output_path is None:
-        return cartesian_product
-    else:
-        context_manager = nullcontext(output_path)
-        if isinstance(output_path, str):
-            context_manager = open(output_path, mode = "w", encoding = "utf-8")
-        
-        with context_manager as output_file:
-            for combination in cartesian_product:
-                output_file.write(delimiter.join(combination) + '\n')
-
-def main():
-    parser = argparse.ArgumentParser(
-        description="Compute cartesian product of lines in multiple files."
-    )
-    parser.add_argument('input_files', nargs='+', help="List of input files.")
-    parser.add_argument('--output', required=True, help="Path to output file.")
-    parser.add_argument('--delimiter', default='_', help="Delimiter for line from each file.")
-
-    args = parser.parse_args()
-
-    process_files(args.input_files, args.output, args.delimiter)
-
-if __name__ == "__main__":
-    main()
diff --git a/alphapulldown/utils/modelling_setup.py b/alphapulldown/utils/modelling_setup.py
index ff00a11c..fcd5ba00 100644
--- a/alphapulldown/utils/modelling_setup.py
+++ b/alphapulldown/utils/modelling_setup.py
@@ -4,15 +4,15 @@
 # 
 
 import os
-import sys
 import pickle
 import lzma
-from typing import List,Dict,Union
+from typing import Dict, List, Tuple, Union
 import numpy as np
 from alphafold.data.tools import jackhmmer
 from alphafold.data import templates
+from alphapulldown_input_parser import RegionSelection
+from alphapulldown_input_parser import parse_fold as _external_parse_fold
 from alphapulldown.objects import MonomericObject
-from os.path import exists,join
 from alphapulldown.objects import ChoppedObject
 from alphapulldown.utils.file_handling import make_dir_monomer_dictionary
 from absl import logging
@@ -20,107 +20,37 @@
 
 
 
-def parse_fold(input_list, features_directory, protein_delimiter):
-    """
-    Parses a list of protein fold specifications and returns structured folding jobs.
-
-    Args:
-        input_list (list): List of protein fold specifications as strings.
-        features_directory (list): List of directories to search for protein feature files.
-        protein_delimiter (str): Delimiter used to separate different protein folds.
-
-    Returns:
-        list: A list of folding jobs, each represented by a list of dictionaries.
-
-    Raises:
-        FileNotFoundError: If any required protein features are missing.
-        ValueError: If the format of the input specifications is incorrect.
-    """
-    def format_error(spec):
-        print(f"Your format: {spec} is wrong. The program will terminate.")
-        sys.exit(1)
-
-    def extract_copy_and_regions(tokens, spec):
-        # try head copy then tail copy, default to 1
-        if len(tokens) > 1:
-            try:
-                return int(tokens[1]), tokens[2:]
-            except ValueError:
-                pass
-            try:
-                return int(tokens[-1]), tokens[1:-1]
-            except ValueError:
-                pass
-        return 1, tokens[1:]
-
-    def parse_regions(region_tokens, spec):
-        if not region_tokens:
-            return "all"
-        regions = []
-        for tok in region_tokens:
-            parts = tok.split("-")
-            if len(parts) != 2:
-                format_error(spec)
-            try:
-                regions.append(tuple(map(int, parts)))
-            except ValueError:
-                format_error(spec)
-        return regions
-
-    def feature_exists(name):
-        return any(
-            exists(join(dirpath, f"{name}{ext}"))
-            for dirpath in features_directory
-            for ext in (".pkl", ".pkl.xz")
-        )
-
-    def json_exists(name):
-        return any(
-            exists(join(dirpath, name))
-            for dirpath in features_directory
-        )
-
-    all_folding_jobs = []
-    missing_features = set()
-
-    for spec in input_list:
-        formatted_folds = []
-        for pf in spec.split(protein_delimiter):
-            # Handle JSON input
-            if pf.endswith('.json'):
-                json_name = pf
-                if json_exists(json_name):
-                    for d in features_directory:
-                        path = join(d, json_name)
-                        if exists(path):
-                            formatted_folds.append({'json_input': path})
-                            break
-                else:
-                    missing_features.add(json_name)
-                continue
-
-            # Handle protein input
-            tokens = pf.split(":")
-            if not tokens or not tokens[0]:
-                format_error(spec)
+def _normalise_fold_entry(entry: Dict[str, Union[str, RegionSelection]]) -> Dict[str, Union[str, List[Tuple[int, int]]]]:
+    """Convert entries from alphapulldown-input-parser into legacy AlphaPulldown format."""
+    if "json_input" in entry:
+        return {"json_input": entry["json_input"]}
 
-            name = tokens[0]
-            number, region_tokens = extract_copy_and_regions(tokens, spec)
-            regions = parse_regions(region_tokens, spec)
+    if len(entry) != 1:
+        return entry
 
-            if not feature_exists(name):
-                missing_features.add(name)
-                continue
+    name, selection = next(iter(entry.items()))
+    if isinstance(selection, RegionSelection):
+        if selection.is_all:
+            value: Union[str, List[Tuple[int, int]]] = "all"
+        else:
+            value = [(region.start, region.end) for region in selection.regions]
+        return {name: value}
 
-            formatted_folds += [{name: regions} for _ in range(number)]
+    return {name: selection}
 
-        if formatted_folds:
-            all_folding_jobs.append(formatted_folds)
 
-    if missing_features:
-        raise FileNotFoundError(f"{sorted(missing_features)} not found in {features_directory}")
+def parse_fold(input_list, features_directory, protein_delimiter):
+    """Parse fold specifications using alphapulldown-input-parser and normalise the output."""
+    parsed_jobs = _external_parse_fold(
+        input_list=input_list,
+        features_directory=features_directory,
+        protein_delimiter=protein_delimiter,
+    )
 
-    return all_folding_jobs
+    normalised_jobs: List[List[Dict[str, Union[str, List[Tuple[int, int]]]]]] = []
+    for job in parsed_jobs:
+        normalised_jobs.append([_normalise_fold_entry(entry) for entry in job])
+    return normalised_jobs
 
 def pad_input_features(feature_dict: dict, 
                        desired_num_res : int, desired_num_msa : int) -> None:
diff --git a/setup.cfg b/setup.cfg
index da8695dc..9e109663 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -57,6 +57,7 @@ include_package_data = True
 python_requires = >=3.8
 install_requires =
     absl-py >= 0.13.0
+    alphapulldown-input-parser
     dm-haiku
     dm-tree>=0.1.6
     h5py>=3.1.0
diff --git a/test/check_alphafold2_predictions.py b/test/check_alphafold2_predictions.py
index 27cce716..171ac52b 100755
--- a/test/check_alphafold2_predictions.py
+++ b/test/check_alphafold2_predictions.py
@@ -5,7 +5,6 @@
 """
 from __future__ import annotations
 
-import io
 import os
 import json
 import pickle
@@ -19,7 +18,7 @@
 from absl.testing import absltest, parameterized
 
 import alphapulldown
-from alphapulldown.utils.create_combinations import process_files
+from alphapulldown_input_parser import generate_fold_specifications
 
 # --------------------------------------------------------------------------- #
 #                         configuration / logging                             #
@@ -161,16 +160,14 @@ def _args(self, *, plist, mode, script):
                 ) + f"={self.test_protein_lists_dir / plist}",
             ]
         else:
-            buffer = io.StringIO()
-            _ = process_files(
+            specifications = generate_fold_specifications(
                 input_files=[str(self.test_protein_lists_dir / plist)],
-                output_path=buffer,
-                exclude_permutations=True
+                delimiter="+",
+                exclude_permutations=True,
             )
-            buffer.seek(0)
             lines = [
-                x.strip().replace(",", ":").replace(";", "+")
-                for x in buffer.readlines() if x.strip()
+                spec.replace(",", ":").replace(";", "+")
+                for spec in specifications if spec.strip()
             ]
             formatted_input = lines[0] if lines else ""
             return [
@@ -321,16 +318,14 @@ def test_dropout_increases_diversity(self):
         no_dropout_output_dir.mkdir(parents=True, exist_ok=True)
 
         # Use simple test input 
-        buffer = io.StringIO()
-        _ = process_files(
+        specifications = generate_fold_specifications(
             input_files=[str(self.protein_lists)],
-            output_path=buffer,
-            exclude_permutations=True
+            delimiter="+",
+            exclude_permutations=True,
         )
-        buffer.seek(0)
         lines = [
-            x.strip().replace(",", ":").replace(";", "+")
-            for x in buffer.readlines() if x.strip()
+            spec.replace(",", ":").replace(";", "+")
+            for spec in specifications if spec.strip()
         ]
         formatted_input = lines[0] if lines else ""
 
diff --git a/test/check_alphafold3_predictions.py b/test/check_alphafold3_predictions.py
index f5019b24..8c258598 100755
--- a/test/check_alphafold3_predictions.py
+++ b/test/check_alphafold3_predictions.py
@@ -6,7 +6,6 @@
 wrapper decides *how* each case is executed.
 """
 from __future__ import annotations
-import io
 import os
 import subprocess
 import time
@@ -23,7 +22,7 @@
 from absl.testing import absltest, parameterized
 
 import alphapulldown
-from alphapulldown.utils.create_combinations import process_files
+from alphapulldown_input_parser import generate_fold_specifications
 
 
 # --------------------------------------------------------------------------- #
@@ -967,15 +966,16 @@ def _args(self, *, plist, script):
             
         if script == "run_structure_prediction.py":
             # Format from run_multimer_jobs.py input to run_structure_prediction.py input
-            buffer = io.StringIO()
-            _ = process_files(
+            specifications = generate_fold_specifications(
                 input_files=[str(self.test_protein_lists_dir / plist)],
-                output_path=buffer,
-                exclude_permutations = True
+                delimiter="+",
+                exclude_permutations=True,
             )
-            buffer.seek(0)
-            formatted_input_lines = [x.strip().replace(",", ":").replace(";", "+") for x in buffer.readlines() if x.strip()]
-            # Use the first non-empty line as the input string
+            formatted_input_lines = [
+                spec.replace(",", ":").replace(";", "+")
+                for spec in specifications
+                if spec.strip()
+            ]
             formatted_input = formatted_input_lines[0] if formatted_input_lines else ""
             args = [
                 sys.executable,
diff --git a/test/check_alphalink_predictions.py b/test/check_alphalink_predictions.py
index 06505953..6a8dc1a7 100644
--- a/test/check_alphalink_predictions.py
+++ b/test/check_alphalink_predictions.py
@@ -19,7 +19,6 @@
     SAVE_PREDICTIONS=1 python check_alphalink_predictions.py
 """
 from __future__ import annotations
-import io
 import os
 import subprocess
 import sys
@@ -34,7 +33,7 @@
 from absl.testing import absltest, parameterized
 
 import alphapulldown
-from alphapulldown.utils.create_combinations import process_files
+from alphapulldown_input_parser import generate_fold_specifications
 
 
 # --------------------------------------------------------------------------- #
@@ -519,14 +518,15 @@ def _args(self, *, plist, script):
             
         if script == "run_structure_prediction.py":
             # Format from run_multimer_jobs.py input to run_structure_prediction.py input
-            buffer = io.StringIO()
-            _ = process_files(
+            specifications = generate_fold_specifications(
                 input_files=[str(self.test_protein_lists_dir / plist)],
-                output_path=buffer,
-                exclude_permutations = True
+                delimiter="+",
+                exclude_permutations=True,
             )
-            buffer.seek(0)
-            formatted_input_lines = [x.strip().replace(",", ":").replace(";", "+") for x in buffer.readlines() if x.strip()]
+            formatted_input_lines = [
+                spec.replace(",", ":").replace(";", "+")
+                for spec in specifications if spec.strip()
+            ]
             # Use the first non-empty line as the input string
             formatted_input = formatted_input_lines[0] if formatted_input_lines else ""
             args = [
@@ -756,14 +756,15 @@ def _args_no_crosslinks(self, *, plist, script):
             
         if script == "run_structure_prediction.py":
             # Format from run_multimer_jobs.py input to run_structure_prediction.py input
-            buffer = io.StringIO()
-            _ = process_files(
+            specifications = generate_fold_specifications(
                 input_files=[str(self.test_protein_lists_dir / plist)],
-                output_path=buffer,
-                exclude_permutations = True
+                delimiter="+",
+                exclude_permutations=True,
             )
-            buffer.seek(0)
-            formatted_input_lines = [x.strip().replace(",", ":").replace(";", "+") for x in buffer.readlines() if x.strip()]
+            formatted_input_lines = [
+                spec.replace(",", ":").replace(";", "+")
+                for spec in specifications if spec.strip()
+            ]
             # Use the first non-empty line as the input string
             formatted_input = formatted_input_lines[0] if formatted_input_lines else ""
             args = [
diff --git a/test/test_parse_fold.py b/test/test_parse_fold.py
deleted file mode 100644
index 37458f33..00000000
--- a/test/test_parse_fold.py
+++ /dev/null
@@ -1,186 +0,0 @@
-import logging
-from absl.testing import parameterized
-from unittest import mock
-from alphapulldown.utils.modelling_setup import parse_fold
-
-"""
-Test parse_fold function with different scenarios
-"""
-
-class TestParseFold(parameterized.TestCase):
-
-    def setUp(self) -> None:
-        super().setUp()
-        # Set logging level to INFO
-        logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-
-    @parameterized.named_parameters(
-        {
-            'testcase_name': 'single_protein_no_copy',
-            'input': ['protein1'],
-            'features_directory': ['dir1'],
-            'protein_delimiter': '_',
-            'mock_side_effect': {
-                'dir1/protein1.pkl': True,
-                'dir1/protein1.pkl.xz': False,
-            },
-            'expected_result': [[{'protein1': 'all'}]],
-        },
-        {
-            'testcase_name': 'single_protein_with_copy_number',
-            'input': ['protein1:2'],
-            'features_directory': ['dir1'],
-            'protein_delimiter': '_',
-            'mock_side_effect': {
-                'dir1/protein1.pkl': True,
-                'dir1/protein1.pkl.xz': False,
-            },
-            'expected_result': [[{'protein1': 'all'}, {'protein1': 'all'}]],
-        },
-        {
-            'testcase_name': 'single_protein_with_region',
-            'input': ['protein1:1-10'],
-            'features_directory': ['dir1'],
-            'protein_delimiter': '_',
-            'mock_side_effect': {
-                'dir1/protein1.pkl': True,
-                'dir1/protein1.pkl.xz': False,
-            },
-            'expected_result': [[{'protein1': [(1, 10)]}]],
-        },
-        {
-            'testcase_name': 'single_protein_with_copy_and_regions',
-            'input': ['protein1:2:1-10:20-30'],
-            'features_directory': ['dir1'],
-            'protein_delimiter': '_',
-            'mock_side_effect': {
-                'dir1/protein1.pkl': True,
-                'dir1/protein1.pkl.xz': False,
-            },
-            'expected_result': [[{'protein1': [(1, 10), (20, 30)]}, {'protein1': [(1, 10), (20, 30)]}]],
-        },
-        {
-            'testcase_name': 'single_protein_with_region_and_copy',
-            'input': ['protein1:1-10:20-30:2'],
-            'features_directory': ['dir1'],
-            'protein_delimiter': '_',
-            'mock_side_effect': {
-                'dir1/protein1.pkl': True,
-                'dir1/protein1.pkl.xz': False,
-            },
-            'expected_result': [[{'protein1': [(1, 10), (20, 30)]}, {'protein1': [(1, 10), (20, 30)]}]],
-        },
-        {
-            'testcase_name': 'multiple_proteins',
-            'input': ['protein1:2_protein2:1-50'],
-            'features_directory': ['dir1'],
-            'protein_delimiter': '_',
-            'mock_side_effect': {
-                'dir1/protein1.pkl': True,
-                'dir1/protein1.pkl.xz': False,
-                'dir1/protein2.pkl': True,
-                'dir1/protein2.pkl.xz': False,
-            },
-            'expected_result': [[{'protein1': 'all'}, {'protein1': 'all'}, {'protein2': [(1, 50)]}]],
-        },
-        {
-            'testcase_name': 'missing_features',
-            'input': ['protein1', 'protein2'],
-            'features_directory': ['dir1'],
-            'protein_delimiter': '_',
-            'mock_side_effect': {
-                'dir1/protein1.pkl': False,
-                'dir1/protein1.pkl.xz': False,
-                'dir1/protein2.pkl': False,
-                'dir1/protein2.pkl.xz': False,
-            },
-            'expected_exception': FileNotFoundError,
-            'expected_exception_message': "['protein1', 'protein2'] not found in ['dir1']",
-        },
-        {
-            'testcase_name': 'invalid_format',
-            'input': ['protein1::1-10'],
-            'features_directory': ['dir1'],
-            'protein_delimiter': '_',
-            'mock_side_effect': {},
-            'expected_exception': SystemExit,
-        },
-        {
-            'testcase_name': 'feature_exists_in_multiple_dirs',
-            'input': ['protein1'],
-            'features_directory': ['dir1', 'dir2'],
-            'protein_delimiter': '_',
-            'mock_side_effect': {
-                'dir1/protein1.pkl': False,
-                'dir1/protein1.pkl.xz': False,
-                'dir2/protein1.pkl': True,
-                'dir2/protein1.pkl.xz': False,
-            },
-            'expected_result': [[{'protein1': 'all'}]],
-        },
-        # New test cases for JSON handling
-        {
-            'testcase_name': 'single_json_file',
-            'input': ['rna.json'],
-            'features_directory': ['dir1'],
-            'protein_delimiter': '+',
-            'mock_side_effect': {
-                'dir1/rna.json': True,
-            },
-            'expected_result': [[{'json_input': 'dir1/rna.json'}]],
-        },
-        {
-            'testcase_name': 'json_with_protein',
-            'input': ['protein1+rna.json'],
-            'features_directory': ['dir1'],
-            'protein_delimiter': '+',
-            'mock_side_effect': {
-                'dir1/protein1.pkl': True,
-                'dir1/protein1.pkl.xz': False,
-                'dir1/rna.json': True,
-            },
-            'expected_result': [[{'protein1': 'all'}, {'json_input': 'dir1/rna.json'}]],
-        },
-        {
-            'testcase_name': 'missing_json_file',
-            'input': ['rna.json'],
-            'features_directory': ['dir1'],
-            'protein_delimiter': '+',
-            'mock_side_effect': {
-                'dir1/rna.json': False,
-            },
-            'expected_exception': FileNotFoundError,
-            'expected_exception_message': "['rna.json'] not found in ['dir1']",
-        },
-        {
-            'testcase_name': 'json_in_multiple_dirs',
-            'input': ['rna.json'],
-            'features_directory': ['dir1', 'dir2'],
-            'protein_delimiter': '+',
-            'mock_side_effect': {
-                'dir1/rna.json': False,
-                'dir2/rna.json': True,
-            },
-            'expected_result': [[{'json_input': 'dir2/rna.json'}]],
-        },
-    )
-    def test_parse_fold(self, input, features_directory, protein_delimiter, mock_side_effect,
-                        expected_result=None, expected_exception=None, expected_exception_message=None):
-        """Test parse_fold with different input scenarios"""
-        with mock.patch('alphapulldown.utils.modelling_setup.exists') as mock_exists, \
-             mock.patch('sys.exit') as mock_exit:
-            mock_exists.side_effect = lambda path: mock_side_effect.get(path, False)
-            # Mock sys.exit to raise SystemExit exception
-            mock_exit.side_effect = SystemExit
-            logging.info(f"Testing with input: {input}, features_directory: {features_directory}, "
-                         f"protein_delimiter: '{protein_delimiter}'")
-            logging.info(f"Mock side effects: {mock_side_effect}")
-            if expected_exception:
-                with self.assertRaises(expected_exception) as context:
-                    result = parse_fold(input, features_directory, protein_delimiter)
-                if expected_exception_message:
-                    self.assertEqual(str(context.exception), expected_exception_message)
-            else:
-                result = parse_fold(input, features_directory, protein_delimiter)
-                logging.info(f"Result: {result}, Expected: {expected_result}")
-                self.assertEqual(result, expected_result)
diff --git a/test/test_python_imports.py b/test/test_python_imports.py
index 4cfa9502..cc5f7b40 100644
--- a/test/test_python_imports.py
+++ b/test/test_python_imports.py
@@ -1,10 +1,8 @@
 from alphapulldown.utils import *
-import io
 import warnings
 import subprocess
 from absl import app
 import os
-from alphapulldown.utils.create_combinations import process_files
 
 import argparse
 from os import makedirs