diff --git a/docs/contributing/index.rst b/docs/contributing/index.rst index 0d8a542ff..9b9914133 100644 --- a/docs/contributing/index.rst +++ b/docs/contributing/index.rst @@ -338,11 +338,9 @@ image using the ``Dockerfile`` that was completed in Step 2. Modify generate inputs: -1. Include a key-value pair in the algo_exp_file dictionary that links - the specific algorithm to its expected network file. -2. Obtain the expected network file from the workflow, manually confirm - it is correct, and save it to ``test/generate-inputs/expected``. Name - it as ``{algorithm_name}-{network_file_name}-expected.txt``. +1. Obtain the expected network and nodes file from the workflow, + manually confirm it is correct, and save it to ``test/generate-inputs/expected``. + Name it as ``{algorithm_name}-{network_file_name}-expected.txt``. Modify parse outputs: diff --git a/spras/dataset.py b/spras/dataset.py index 1346750e3..2c01997a5 100644 --- a/spras/dataset.py +++ b/spras/dataset.py @@ -104,11 +104,12 @@ def load_files_from_dict(self, dataset_dict): f"Edge file {interactome_loc} must have three or four columns but found {num_cols}" ) - node_set = set(self.interactome.Interactor1.unique()) - node_set = node_set.union(set(self.interactome.Interactor2.unique())) + # We get uniqueness afterwards to make `load_files_from_dict` have a well-defined node ordering, + # since algorithms may depend on the order of nodes passed. + nodes = list(pd.concat([self.interactome.Interactor1, self.interactome.Interactor2]).unique()) # Load generic node tables - self.node_table = pd.DataFrame(node_set, columns=[self.NODE_ID]) + self.node_table = pd.DataFrame(nodes, columns=[self.NODE_ID]) for node_file in node_data_files: single_node_table = pd.read_table(os.path.join(data_loc, node_file)) # If we have only 1 column, assume this is an indicator variable diff --git a/test/generate-inputs/expected/allpairs/allpairs-directed_flag-expected.txt b/test/generate-inputs/expected/allpairs/allpairs-directed_flag-expected.txt new file mode 100644 index 000000000..02e4a84d6 --- /dev/null +++ b/test/generate-inputs/expected/allpairs/allpairs-directed_flag-expected.txt @@ -0,0 +1 @@ +false \ No newline at end of file diff --git a/test/generate-inputs/expected/allpairs-network-expected.txt b/test/generate-inputs/expected/allpairs/allpairs-network-expected.txt similarity index 100% rename from test/generate-inputs/expected/allpairs-network-expected.txt rename to test/generate-inputs/expected/allpairs/allpairs-network-expected.txt diff --git a/test/generate-inputs/expected/allpairs/allpairs-nodetypes-expected.txt b/test/generate-inputs/expected/allpairs/allpairs-nodetypes-expected.txt new file mode 100644 index 000000000..4595f96e2 --- /dev/null +++ b/test/generate-inputs/expected/allpairs/allpairs-nodetypes-expected.txt @@ -0,0 +1,3 @@ +#Node Node type +test_A source +C target diff --git a/test/generate-inputs/expected/bowtiebuilder-edges-expected.txt b/test/generate-inputs/expected/bowtiebuilder/bowtiebuilder-edges-expected.txt similarity index 100% rename from test/generate-inputs/expected/bowtiebuilder-edges-expected.txt rename to test/generate-inputs/expected/bowtiebuilder/bowtiebuilder-edges-expected.txt diff --git a/test/generate-inputs/expected/bowtiebuilder/bowtiebuilder-sources-expected.txt b/test/generate-inputs/expected/bowtiebuilder/bowtiebuilder-sources-expected.txt new file mode 100644 index 000000000..050111aca --- /dev/null +++ b/test/generate-inputs/expected/bowtiebuilder/bowtiebuilder-sources-expected.txt @@ -0,0 +1 @@ +test_A diff --git a/test/generate-inputs/expected/bowtiebuilder/bowtiebuilder-targets-expected.txt b/test/generate-inputs/expected/bowtiebuilder/bowtiebuilder-targets-expected.txt new file mode 100644 index 000000000..3cc58df83 --- /dev/null +++ b/test/generate-inputs/expected/bowtiebuilder/bowtiebuilder-targets-expected.txt @@ -0,0 +1 @@ +C diff --git a/test/generate-inputs/expected/domino/domino-active_genes-expected.txt b/test/generate-inputs/expected/domino/domino-active_genes-expected.txt new file mode 100644 index 000000000..7094db71a --- /dev/null +++ b/test/generate-inputs/expected/domino/domino-active_genes-expected.txt @@ -0,0 +1,2 @@ +ENSG0test_A +ENSG0C diff --git a/test/generate-inputs/expected/domino-network-expected.txt b/test/generate-inputs/expected/domino/domino-network-expected.txt similarity index 100% rename from test/generate-inputs/expected/domino-network-expected.txt rename to test/generate-inputs/expected/domino/domino-network-expected.txt diff --git a/test/generate-inputs/expected/meo-edges-expected.txt b/test/generate-inputs/expected/meo/meo-edges-expected.txt similarity index 100% rename from test/generate-inputs/expected/meo-edges-expected.txt rename to test/generate-inputs/expected/meo/meo-edges-expected.txt diff --git a/test/generate-inputs/expected/meo/meo-sources-expected.txt b/test/generate-inputs/expected/meo/meo-sources-expected.txt new file mode 100644 index 000000000..06cb3030f --- /dev/null +++ b/test/generate-inputs/expected/meo/meo-sources-expected.txt @@ -0,0 +1 @@ +test꧁SEP꧂A diff --git a/test/generate-inputs/expected/meo/meo-targets-expected.txt b/test/generate-inputs/expected/meo/meo-targets-expected.txt new file mode 100644 index 000000000..3cc58df83 --- /dev/null +++ b/test/generate-inputs/expected/meo/meo-targets-expected.txt @@ -0,0 +1 @@ +C diff --git a/test/generate-inputs/expected/mincostflow-edges-expected.txt b/test/generate-inputs/expected/mincostflow/mincostflow-edges-expected.txt similarity index 100% rename from test/generate-inputs/expected/mincostflow-edges-expected.txt rename to test/generate-inputs/expected/mincostflow/mincostflow-edges-expected.txt diff --git a/test/generate-inputs/expected/mincostflow/mincostflow-sources-expected.txt b/test/generate-inputs/expected/mincostflow/mincostflow-sources-expected.txt new file mode 100644 index 000000000..050111aca --- /dev/null +++ b/test/generate-inputs/expected/mincostflow/mincostflow-sources-expected.txt @@ -0,0 +1 @@ +test_A diff --git a/test/generate-inputs/expected/mincostflow/mincostflow-targets-expected.txt b/test/generate-inputs/expected/mincostflow/mincostflow-targets-expected.txt new file mode 100644 index 000000000..3cc58df83 --- /dev/null +++ b/test/generate-inputs/expected/mincostflow/mincostflow-targets-expected.txt @@ -0,0 +1 @@ +C diff --git a/test/generate-inputs/expected/omicsintegrator1/omicsintegrator1-dummy_nodes-expected.txt b/test/generate-inputs/expected/omicsintegrator1/omicsintegrator1-dummy_nodes-expected.txt new file mode 100644 index 000000000..e69de29bb diff --git a/test/generate-inputs/expected/omicsintegrator1-edges-expected.txt b/test/generate-inputs/expected/omicsintegrator1/omicsintegrator1-edges-expected.txt similarity index 100% rename from test/generate-inputs/expected/omicsintegrator1-edges-expected.txt rename to test/generate-inputs/expected/omicsintegrator1/omicsintegrator1-edges-expected.txt diff --git a/test/generate-inputs/expected/omicsintegrator1/omicsintegrator1-prizes-expected.txt b/test/generate-inputs/expected/omicsintegrator1/omicsintegrator1-prizes-expected.txt new file mode 100644 index 000000000..9bd069b92 --- /dev/null +++ b/test/generate-inputs/expected/omicsintegrator1/omicsintegrator1-prizes-expected.txt @@ -0,0 +1,3 @@ +name prize +test_A 2.0 +C 5.7 diff --git a/test/generate-inputs/expected/omicsintegrator2-edges-expected.txt b/test/generate-inputs/expected/omicsintegrator2/omicsintegrator2-edges-expected.txt similarity index 100% rename from test/generate-inputs/expected/omicsintegrator2-edges-expected.txt rename to test/generate-inputs/expected/omicsintegrator2/omicsintegrator2-edges-expected.txt diff --git a/test/generate-inputs/expected/omicsintegrator2/omicsintegrator2-prizes-expected.txt b/test/generate-inputs/expected/omicsintegrator2/omicsintegrator2-prizes-expected.txt new file mode 100644 index 000000000..9bd069b92 --- /dev/null +++ b/test/generate-inputs/expected/omicsintegrator2/omicsintegrator2-prizes-expected.txt @@ -0,0 +1,3 @@ +name prize +test_A 2.0 +C 5.7 diff --git a/test/generate-inputs/expected/pathlinker-network-expected.txt b/test/generate-inputs/expected/pathlinker/pathlinker-network-expected.txt similarity index 100% rename from test/generate-inputs/expected/pathlinker-network-expected.txt rename to test/generate-inputs/expected/pathlinker/pathlinker-network-expected.txt diff --git a/test/generate-inputs/expected/pathlinker/pathlinker-nodetypes-expected.txt b/test/generate-inputs/expected/pathlinker/pathlinker-nodetypes-expected.txt new file mode 100644 index 000000000..4595f96e2 --- /dev/null +++ b/test/generate-inputs/expected/pathlinker/pathlinker-nodetypes-expected.txt @@ -0,0 +1,3 @@ +#Node Node type +test_A source +C target diff --git a/test/generate-inputs/expected/responsenet-edges-expected.txt b/test/generate-inputs/expected/responsenet/responsenet-edges-expected.txt similarity index 100% rename from test/generate-inputs/expected/responsenet-edges-expected.txt rename to test/generate-inputs/expected/responsenet/responsenet-edges-expected.txt diff --git a/test/generate-inputs/expected/responsenet/responsenet-sources-expected.txt b/test/generate-inputs/expected/responsenet/responsenet-sources-expected.txt new file mode 100644 index 000000000..050111aca --- /dev/null +++ b/test/generate-inputs/expected/responsenet/responsenet-sources-expected.txt @@ -0,0 +1 @@ +test_A diff --git a/test/generate-inputs/expected/responsenet/responsenet-targets-expected.txt b/test/generate-inputs/expected/responsenet/responsenet-targets-expected.txt new file mode 100644 index 000000000..3cc58df83 --- /dev/null +++ b/test/generate-inputs/expected/responsenet/responsenet-targets-expected.txt @@ -0,0 +1 @@ +C diff --git a/test/generate-inputs/expected/rwr-network-expected.txt b/test/generate-inputs/expected/rwr/rwr-network-expected.txt similarity index 100% rename from test/generate-inputs/expected/rwr-network-expected.txt rename to test/generate-inputs/expected/rwr/rwr-network-expected.txt diff --git a/test/generate-inputs/expected/rwr/rwr-nodes-expected.txt b/test/generate-inputs/expected/rwr/rwr-nodes-expected.txt new file mode 100644 index 000000000..5563e0ca3 --- /dev/null +++ b/test/generate-inputs/expected/rwr/rwr-nodes-expected.txt @@ -0,0 +1,2 @@ +test_A +C diff --git a/test/generate-inputs/expected/strwr-network-expected.txt b/test/generate-inputs/expected/strwr/strwr-network-expected.txt similarity index 100% rename from test/generate-inputs/expected/strwr-network-expected.txt rename to test/generate-inputs/expected/strwr/strwr-network-expected.txt diff --git a/test/generate-inputs/expected/strwr/strwr-sources-expected.txt b/test/generate-inputs/expected/strwr/strwr-sources-expected.txt new file mode 100644 index 000000000..050111aca --- /dev/null +++ b/test/generate-inputs/expected/strwr/strwr-sources-expected.txt @@ -0,0 +1 @@ +test_A diff --git a/test/generate-inputs/expected/strwr/strwr-targets-expected.txt b/test/generate-inputs/expected/strwr/strwr-targets-expected.txt new file mode 100644 index 000000000..3cc58df83 --- /dev/null +++ b/test/generate-inputs/expected/strwr/strwr-targets-expected.txt @@ -0,0 +1 @@ +C diff --git a/test/generate-inputs/test_generate_inputs.py b/test/generate-inputs/test_generate_inputs.py index f79b673fc..fa6e9b364 100644 --- a/test/generate-inputs/test_generate_inputs.py +++ b/test/generate-inputs/test_generate_inputs.py @@ -1,28 +1,14 @@ import filecmp import os +import shutil from pathlib import Path import yaml -from spras import runner - -OUTDIR = "test/generate-inputs/output/" -EXPDIR = "test/generate-inputs/expected/" - -algo_exp_file = { - 'mincostflow': 'edges', - 'meo': 'edges', - 'omicsintegrator1': 'edges', - 'omicsintegrator2': 'edges', - 'domino': 'network', - 'pathlinker': 'network', - 'allpairs': 'network', - 'bowtiebuilder': 'edges', - 'strwr': 'network', - 'rwr': 'network', - 'responsenet': 'edges' -} +from spras.runner import algorithms, get_required_inputs, merge_input, prepare_inputs +OUTDIR = Path("test", "generate-inputs", "output") +EXPDIR = Path("test", "generate-inputs", "expected") class TestGenerateInputs: @classmethod @@ -30,7 +16,9 @@ def setup_class(cls): """ Create the expected output directory """ - Path(OUTDIR).mkdir(parents=True, exist_ok=True) + if OUTDIR.exists(): + shutil.rmtree(OUTDIR) + OUTDIR.mkdir(parents=True, exist_ok=True) def test_prepare_inputs_networks(self): config_loc = os.path.join("test", "generate-inputs", "inputs", "test_config.yaml") @@ -40,16 +28,15 @@ def test_prepare_inputs_networks(self): test_file = "test/generate-inputs/output/test_pickled_dataset.pkl" test_dataset = next((ds for ds in config["datasets"] if ds["label"] == "test_data"), None) - runner.merge_input(test_dataset, test_file) - - for algo in algo_exp_file.keys(): - inputs = runner.get_required_inputs(algo) - filename_map = {input_str: os.path.join("test", "generate-inputs", "output", f"{algo}-{input_str}.txt") - for input_str in inputs} - runner.prepare_inputs(algo, test_file, filename_map) - exp_file_name = algo_exp_file[algo] - assert filecmp.cmp(OUTDIR + f"{algo}-{exp_file_name}.txt", EXPDIR + f"{algo}-{exp_file_name}-expected.txt", - shallow=False) - - for file in filename_map.values(): - assert Path(file).exists() + merge_input(test_dataset, test_file) + + for algo in algorithms.keys(): + inputs = get_required_inputs(algo) + (OUTDIR / algo).mkdir(exist_ok=True) + filename_map = {input_str: str(OUTDIR / algo / f"{algo}-{input_str}.txt") for input_str in inputs} + prepare_inputs(algo, test_file, filename_map) + required_inputs = algorithms[algo].required_inputs + for exp_file_name in required_inputs: + assert filecmp.cmp(OUTDIR / algo / f"{algo}-{exp_file_name}.txt", EXPDIR / algo / f"{algo}-{exp_file_name}-expected.txt", + shallow=False), f"{algo} for {exp_file_name}.txt does not match up!" +