Merge pull request #108 from wfcommons/testing

henricasanova · web-flow · commit 471e1cce9e89 · 2025-09-18T07:57:13.000-10:00
Test improvements
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
@@ -5,8 +5,12 @@
 import io
 import sys
 import docker
+import networkx
 from docker.errors import ImageNotFound
 
+from wfcommons.common import Workflow
+
+
 def _create_fresh_local_dir(path: str) -> pathlib.Path:
     dirpath = pathlib.Path(path)
     if dirpath.exists():
@@ -99,4 +103,30 @@ def _get_total_size_of_directory(directory_path: str):
         for filename in filenames:
             filepath = os.path.join(dirpath, filename)
             total_size += os.path.getsize(filepath)
-    return total_size
+    return total_size
+
+def _compare_workflows(workflow1: Workflow, workflow_2: Workflow):
+    
+    # Test the number of tasks
+    assert (len(workflow1.tasks) == len(workflow_2.tasks))
+    # Test the task graph topology
+    assert (networkx.is_isomorphic(workflow1, workflow_2))
+    # Test the total file size sum
+    workflow1_input_bytes, workflow2_input_bytes = 0, 0
+    workflow1_output_bytes, workflow2_output_bytes = 0, 0
+    for workflow1_task, workflow2_task in zip(workflow1.tasks.values(), workflow_2.tasks.values()):
+        # sys.stderr.write(f"WORKFLOW1: {workflow1_task.task_id}  WORKFLOW2 TASK: {workflow2_task.task_id}\n")
+        for input_file in workflow1_task.input_files:
+            # sys.stderr.write(f"WORKFLOW1 INPUT FILE: {input_file.file_id} {input_file.size}\n")
+            workflow1_input_bytes += input_file.size
+        for input_file in workflow2_task.input_files:
+            # sys.stderr.write(f"WORKFLOW2 INPUT FILE: {input_file.file_id} {input_file.size}\n")
+            workflow2_input_bytes += input_file.size
+        for output_file in workflow1_task.output_files:
+            # sys.stderr.write(f"WORKFLOW1 OUTPUT FILE: {output_file.file_id} {output_file.size}\n")
+            workflow1_output_bytes += output_file.size
+        for output_file in workflow2_task.output_files:
+            # sys.stderr.write(f"WORKFLOW2 OUTPUT FILE: {output_file.file_id} {output_file.size}\n")
+            workflow2_output_bytes += output_file.size
+    assert (workflow1_input_bytes == workflow2_input_bytes)
+    assert (workflow1_output_bytes == workflow2_output_bytes)
diff --git a/tests/translators_loggers/test_translators_loggers.py b/tests/translators_loggers/test_translators_loggers.py
@@ -14,11 +14,15 @@
 import sys
 import json
 import time
+import networkx
 
 from tests.test_helpers import _create_fresh_local_dir
 from tests.test_helpers import _remove_local_dir_if_it_exists
 from tests.test_helpers import _start_docker_container
+from tests.test_helpers import _compare_workflows
+
 from wfcommons import BlastRecipe
+from wfcommons.common import Workflow, Task
 from wfcommons.wfbench import WorkflowBenchmark
 from wfcommons.wfbench import DaskTranslator
 from wfcommons.wfbench import ParslTranslator
@@ -34,7 +38,7 @@
 from wfcommons.wfinstances.logs import TaskVineLogsParser
 
 
-def _create_workflow_benchmark():
+def _create_workflow_benchmark() -> (WorkflowBenchmark, int):
     # Create a workflow benchmark object to generate specifications based on a recipe (in /tmp/, whatever)
     desired_num_tasks = 45
     benchmark_full_path = "/tmp/blast-benchmark-{desired_num_tasks}.json"
@@ -85,8 +89,13 @@ def _additional_setup_swiftt(container):
     # Start a redis server in the background
     exit_code, output = container.exec_run(
         cmd=["bash", "-c", "redis-server"], detach=True, stdout=True, stderr=True)
-    # Note that exit_code will always be None because of detach=True. So hopefully this works.
-    # TODO?: check that the vine_worker is running....
+    # Note that exit_code will always be None because of detach=True.
+
+    # Check that the redis-server is up
+    exit_code, output = container.exec_run(
+        cmd=["bash", "-c", "redis-cli ping"], stdout=True, stderr=True)
+    if output.decode().strip() != 'PONG':
+        raise Exception("Failed to start redis-server...")
 
 additional_setup_methods = {
     "dask": noop,
@@ -242,6 +251,7 @@ def test_translator(self, backend) -> None:
         # Create workflow benchmark
         benchmark, num_tasks = _create_workflow_benchmark()
 
+
         # Create a local translation directory
         str_dirpath = "/tmp/" + backend + "_translated_workflow/"
         dirpath = pathlib.Path(str_dirpath)
@@ -270,13 +280,16 @@ def test_translator(self, backend) -> None:
         if backend == "pegasus":
             parser = PegasusLogsParser(dirpath / "work/wfcommons/pegasus/Blast-Benchmark/run0001/")
         elif backend == "taskvine":
-            parser = TaskVineLogsParser(dirpath / "vine-run-info/", filenames_to_ignore=["cpu-benchmark","stress-ng"])
+            parser = TaskVineLogsParser(dirpath / "vine-run-info/", filenames_to_ignore=["cpu-benchmark","stress-ng", "wfbench"])
         else:
             parser = None
 
         if parser:
             sys.stderr.write("\nParsing the logs...\n")
-            workflow = parser.build_workflow("reconstructed_workflow")
-            # TODO: test more stuff
-            workflow.write_json(pathlib.Path("/tmp/reconstructed_workflow.json"))
-            assert(num_tasks == len(workflow.tasks))
+            reconstructed_workflow : Workflow = parser.build_workflow("reconstructed_workflow")
+            reconstructed_workflow.write_json(pathlib.Path("/tmp/reconstructed_workflow.json"))
+
+            original_workflow : Workflow = benchmark.workflow
+
+            _compare_workflows(original_workflow, reconstructed_workflow)
+
diff --git a/tests/wfbench/test_wfbench.py b/tests/wfbench/test_wfbench.py
@@ -13,14 +13,19 @@
 import pathlib
 import sys
 import json
+import networkx
 
 from tests.test_helpers import _create_fresh_local_dir
 from tests.test_helpers import _start_docker_container
 from tests.test_helpers import _remove_local_dir_if_it_exists
 from tests.test_helpers import _get_total_size_of_directory
+from tests.test_helpers import _compare_workflows
+
 from wfcommons import BlastRecipe
 from wfcommons.common import Workflow
 from wfcommons.wfbench import WorkflowBenchmark, BashTranslator
+from wfcommons.wfinstances import Instance
+
 
 def _directory_content_as_expected(dirpath: pathlib.Path,
                                    workflow: Workflow,
@@ -38,11 +43,16 @@ def _workflow_as_expected(dirpath: pathlib.Path,
                           num_tasks: int,
                           cpu_work: int,
                           percent_cpu: float):
+
+    # Some checks based on the generated JSON
+    #########################################
+
     # Get the generated JSON
     json_path = dirpath / f"{workflow.name.lower()}-{num_tasks}.json"
     with json_path.open("r") as f:
         generated_json = json.load(f)
 
+
     # Check the number of tasks
     assert(len(workflow.tasks) == len(generated_json['workflow']['specification']['tasks']))
 
@@ -59,7 +69,13 @@ def _workflow_as_expected(dirpath: pathlib.Path,
         for file in workflow_task.input_files:
             assert(file.file_id in generated_task['inputFiles'])
 
-    # TODO: Implement more sanity checks
+    # Some checks based on an Instance generated from the JSON
+    ##########################################################
+
+    # Get the generated Workflow via an Instance
+    reconstructed_workflow = Instance(json_path).workflow
+
+    _compare_workflows(workflow, reconstructed_workflow)
 
     return True
 
diff --git a/wfcommons/wfinstances/logs/taskvine.py b/wfcommons/wfinstances/logs/taskvine.py
@@ -36,10 +36,11 @@ class TaskVineLogsParser(LogsParser):
 
     :param vine_run_info_dir: TaskVine's  vine-run-info directory.
     :type vine_run_info_dir: pathlib.Path
-    :param filenames_to_ignore: TaskVine considers that executables and package files (e.g., poncho package.tgz)
+    :param filenames_to_ignore: TaskVine sometimes considers that executables and package files
                                 are input to tasks. This argument is the list of names of files that should be
                                 ignored in the reconstructed instances, which typically do not include such
-                                files at task input.
+                                files at task input. For instance, if reconstructing a workflow from an execution
+                                of a WfBench-generated benchmark, one could pass ["wfbench", "cpu-benchmark", "stress-ng"]
     :type filenames_to_ignore: List[str]
     :param description: Workflow instance description.
     :type description: Optional[str]