From 2374a2c11a27bce903af90f796c8f07fb7536186 Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Sat, 30 Aug 2025 17:01:36 -0700 Subject: [PATCH 01/13] refactor!: move container options to shared `containers` key --- config/config.yaml | 49 +++++++------- config/egfr.yaml | 43 ++++++------ spras/config/config.py | 22 ++----- spras/config/container_schema.py | 69 ++++++++++++++++++++ spras/config/schema.py | 18 +---- test/analysis/input/config.yaml | 41 +++++++----- test/analysis/input/egfr.yaml | 41 +++++++----- test/generate-inputs/inputs/test_config.yaml | 11 ++-- 8 files changed, 175 insertions(+), 119 deletions(-) create mode 100644 spras/config/container_schema.py diff --git a/config/config.yaml b/config/config.yaml index 1e5f1d561..177e6634c 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -3,30 +3,31 @@ # The length of the hash used to identify a parameter combination hash_length: 7 -# Specify the container framework used by each PRM wrapper. Valid options include: -# - docker (default if not specified) -# - singularity -- Also known as apptainer, useful in HPC/HTC environments where docker isn't allowed -# - dsub -- experimental with limited support, used for running on Google Cloud with the All of Us cloud environment. -# - There is no support for other environments at the moment. -container_framework: docker - -# Only used if container_framework is set to singularity, this will unpack the singularity containers -# to the local filesystem. This is useful when PRM containers need to run inside another container, -# such as would be the case in an HTCondor/OSPool environment. -# NOTE: This unpacks singularity containers to the local filesystem, which will take up space in a way -# that persists after the workflow is complete. To clean up the unpacked containers, the user must -# manually delete them. For convenience, these unpacked files will exist in the current working directory -# under `unpacked`. -unpack_singularity: false - -# Allow the user to configure which container registry containers should be pulled from -# Note that this assumes container names are consistent across registries, and that the -# registry being passed doesn't require authentication for pull actions -container_registry: - base_url: docker.io - # The owner or project of the registry - # For example, "reedcompbio" if the image is available as docker.io/reedcompbio/allpairs - owner: reedcompbio +containers: + # Specify the container framework used by each PRM wrapper. Valid options include: + # - docker (default if not specified) + # - singularity -- Also known as apptainer, useful in HPC/HTC environments where docker isn't allowed + # - dsub -- experimental with limited support, used for running on Google Cloud with the All of Us cloud environment. + # - There is no support for other environments at the moment. + framework: docker + + # Only used if container_framework is set to singularity, this will unpack the singularity containers + # to the local filesystem. This is useful when PRM containers need to run inside another container, + # such as would be the case in an HTCondor/OSPool environment. + # NOTE: This unpacks singularity containers to the local filesystem, which will take up space in a way + # that persists after the workflow is complete. To clean up the unpacked containers, the user must + # manually delete them. For convenience, these unpacked files will exist in the current working directory + # under `unpacked`. + unpack_singularity: false + + # Allow the user to configure which container registry containers should be pulled from + # Note that this assumes container names are consistent across registries, and that the + # registry being passed doesn't require authentication for pull actions + registry: + base_url: docker.io + # The owner or project of the registry + # For example, "reedcompbio" if the image is available as docker.io/reedcompbio/allpairs + owner: reedcompbio # This list of algorithms should be generated by a script which checks the filesystem for installs. # It shouldn't be changed by mere mortals. (alternatively, we could add a path to executable for each algorithm diff --git a/config/egfr.yaml b/config/egfr.yaml index 667cb55f0..f35f407b8 100644 --- a/config/egfr.yaml +++ b/config/egfr.yaml @@ -1,28 +1,31 @@ # The length of the hash used to identify a parameter combination hash_length: 7 -# Specify the container framework used by each PRM wrapper. Valid options include: -# - docker (default if not specified) -# - singularity -- Also known as apptainer, useful in HPC/HTC environments where docker isn't allowed -# - dsub -- experimental with limited support, used for running on Google Cloud -container_framework: docker +containers: + # Specify the container framework used by each PRM wrapper. Valid options include: + # - docker (default if not specified) + # - singularity -- Also known as apptainer, useful in HPC/HTC environments where docker isn't allowed + # - dsub -- experimental with limited support, used for running on Google Cloud with the All of Us cloud environment. + # - There is no support for other environments at the moment. + framework: docker -# Only used if container_framework is set to singularity, this will unpack the singularity containers -# to the local filesystem. This is useful when PRM containers need to run inside another container, -# such as would be the case in an HTCondor/OSPool environment. -# NOTE: This unpacks singularity containers to the local filesystem, which will take up space in a way -# that persists after the workflow is complete. To clean up the unpacked containers, the user must -# manually delete them. -unpack_singularity: false + # Only used if container_framework is set to singularity, this will unpack the singularity containers + # to the local filesystem. This is useful when PRM containers need to run inside another container, + # such as would be the case in an HTCondor/OSPool environment. + # NOTE: This unpacks singularity containers to the local filesystem, which will take up space in a way + # that persists after the workflow is complete. To clean up the unpacked containers, the user must + # manually delete them. For convenience, these unpacked files will exist in the current working directory + # under `unpacked`. + unpack_singularity: false -# Allow the user to configure which container registry containers should be pulled from -# Note that this assumes container names are consistent across registries, and that the -# registry being passed doesn't require authentication for pull actions -container_registry: - base_url: docker.io - # The owner or project of the registry - # For example, "reedcompbio" if the image is available as docker.io/reedcompbio/allpairs - owner: reedcompbio + # Allow the user to configure which container registry containers should be pulled from + # Note that this assumes container names are consistent across registries, and that the + # registry being passed doesn't require authentication for pull actions + registry: + base_url: docker.io + # The owner or project of the registry + # For example, "reedcompbio" if the image is available as docker.io/reedcompbio/allpairs + owner: reedcompbio algorithms: - name: pathlinker diff --git a/spras/config/config.py b/spras/config/config.py index 605faecf4..53591cd20 100644 --- a/spras/config/config.py +++ b/spras/config/config.py @@ -22,7 +22,8 @@ import numpy as np import yaml -from spras.config.schema import ContainerFramework, RawConfig +from spras.config.container_schema import ContainerSettings, ProcessedContainerSettings +from spras.config.schema import RawConfig from spras.util import NpHashEncoder, hash_params_sha1_base32 config = None @@ -65,10 +66,8 @@ def __init__(self, raw_config: dict[str, Any]): # Directory used for storing output self.out_dir = parsed_raw_config.reconstruction_settings.locations.reconstruction_dir - # Container framework used by PRMs. Valid options are "docker", "dsub", and "singularity" - self.container_framework = None - # The container prefix (host and organization) to use for images. Default is "docker.io/reedcompbio" - self.container_prefix: str = DEFAULT_CONTAINER_PREFIX + # Container settings used by PRMs. + self.container_settings = ProcessedContainerSettings.from_container_settings(parsed_raw_config.containers, parsed_raw_config.hash_length) # A Boolean specifying whether to unpack singularity containers. Default is False self.unpack_singularity = False # A dictionary to store configured datasets against which SPRAS will be run @@ -286,19 +285,6 @@ def process_config(self, raw_config: RawConfig): # Set up a few top-level config variables self.out_dir = raw_config.reconstruction_settings.locations.reconstruction_dir - if raw_config.container_framework == ContainerFramework.dsub: - warnings.warn("'dsub' framework integration is experimental and may not be fully supported.", stacklevel=2) - self.container_framework = raw_config.container_framework - - # Unpack settings for running in singularity mode. Needed when running PRM containers if already in a container. - if raw_config.unpack_singularity and self.container_framework != "singularity": - warnings.warn("unpack_singularity is set to True, but the container framework is not singularity. This setting will have no effect.", stacklevel=2) - self.unpack_singularity = raw_config.unpack_singularity - - # Grab registry from the config, and if none is provided default to docker - if raw_config.container_registry and raw_config.container_registry.base_url != "" and raw_config.container_registry.owner != "": - self.container_prefix = raw_config.container_registry.base_url + "/" + raw_config.container_registry.owner - self.process_datasets(raw_config) self.process_algorithms(raw_config) self.process_analysis(raw_config) diff --git a/spras/config/container_schema.py b/spras/config/container_schema.py new file mode 100644 index 000000000..11b13576f --- /dev/null +++ b/spras/config/container_schema.py @@ -0,0 +1,69 @@ +""" +The separate container schema specification file. +For information about pydantic, see schema.py. + +We move this to a separate file to allow `containers.py` to explicitly take in +this subsection of the configuration. +""" + +import warnings +from dataclasses import dataclass + +from pydantic import BaseModel, ConfigDict + +from spras.config.util import CaseInsensitiveEnum + +DEFAULT_CONTAINER_PREFIX = "docker.io/reedcompbio" + +class ContainerFramework(CaseInsensitiveEnum): + docker = 'docker' + # TODO: add apptainer variant once #260 gets merged + singularity = 'singularity' + dsub = 'dsub' + +class ContainerRegistry(BaseModel): + base_url: str = "docker.io" + "The domain of the registry" + + owner: str = "reedcompbio" + "The owner or project of the registry" + + model_config = ConfigDict(extra='forbid', use_attribute_docstrings=True) + +class ContainerSettings(BaseModel): + framework: ContainerFramework = ContainerFramework.docker + unpack_singularity: bool = False + registry: ContainerRegistry + hash_length: int = 7 + + model_config = ConfigDict(extra='forbid') + +@dataclass +class ProcessedContainerSettings: + framework: ContainerFramework = ContainerFramework.docker + unpack_singularity: bool = False + prefix: str = DEFAULT_CONTAINER_PREFIX + hash_length: int = 7 + + @staticmethod + def from_container_settings(settings: ContainerSettings, default_hash_length: int) -> "ProcessedContainerSettings": + if settings.framework == ContainerFramework.dsub: + warnings.warn("'dsub' framework integration is experimental and may not be fully supported.", stacklevel=2) + container_framework = settings.framework + + # Unpack settings for running in singularity mode. Needed when running PRM containers if already in a container. + if settings.unpack_singularity and container_framework != "singularity": + warnings.warn("unpack_singularity is set to True, but the container framework is not singularity. This setting will have no effect.", stacklevel=2) + unpack_singularity = settings.unpack_singularity + + # Grab registry from the config, and if none is provided default to docker + container_prefix = DEFAULT_CONTAINER_PREFIX + if settings.registry and settings.registry.base_url != "" and settings.registry.owner != "": + container_prefix = settings.registry.base_url + "/" + settings.registry.owner + + return ProcessedContainerSettings( + framework=container_framework, + unpack_singularity=unpack_singularity, + prefix=container_prefix, + hash_length=settings.hash_length or default_hash_length + ) diff --git a/spras/config/schema.py b/spras/config/schema.py index c84ea4384..e1fdb83c8 100644 --- a/spras/config/schema.py +++ b/spras/config/schema.py @@ -15,6 +15,7 @@ from pydantic import AfterValidator, BaseModel, ConfigDict, Field +from spras.config.container_schema import ContainerSettings from spras.config.util import CaseInsensitiveEnum # Most options here have an `include` property, @@ -89,18 +90,6 @@ def validate(label: str): return label return validate -class ContainerFramework(CaseInsensitiveEnum): - docker = 'docker' - # TODO: add apptainer variant once #260 gets merged - singularity = 'singularity' - dsub = 'dsub' - -class ContainerRegistry(BaseModel): - base_url: str - owner: str = Field(description="The owner or project of the registry") - - model_config = ConfigDict(extra='forbid') - class AlgorithmParams(BaseModel): include: bool directed: Optional[bool] = None @@ -148,10 +137,7 @@ class ReconstructionSettings(BaseModel): model_config = ConfigDict(extra='forbid') class RawConfig(BaseModel): - # TODO: move these container values to a nested container key - container_framework: ContainerFramework = ContainerFramework.docker - unpack_singularity: bool = False - container_registry: ContainerRegistry + containers: ContainerSettings hash_length: int = DEFAULT_HASH_LENGTH "The length of the hash used to identify a parameter combination" diff --git a/test/analysis/input/config.yaml b/test/analysis/input/config.yaml index abde6f979..15a5572fa 100644 --- a/test/analysis/input/config.yaml +++ b/test/analysis/input/config.yaml @@ -1,26 +1,31 @@ # The length of the hash used to identify a parameter combination hash_length: 7 -# Specify the container framework. Current supported versions include 'docker' and -# 'singularity'. If container_framework is not specified, SPRAS will default to docker. -container_framework: docker +containers: + # Specify the container framework used by each PRM wrapper. Valid options include: + # - docker (default if not specified) + # - singularity -- Also known as apptainer, useful in HPC/HTC environments where docker isn't allowed + # - dsub -- experimental with limited support, used for running on Google Cloud with the All of Us cloud environment. + # - There is no support for other environments at the moment. + framework: docker -# Only used if container_framework is set to singularity, this will unpack the singularity containers -# to the local filesystem. This is useful when PRM containers need to run inside another container, -# such as would be the case in an HTCondor/OSPool environment. -# NOTE: This unpacks singularity containers to the local filesystem, which will take up space in a way -# that persists after the workflow is complete. To clean up the unpacked containers, the user must -# manually delete them. -unpack_singularity: false + # Only used if container_framework is set to singularity, this will unpack the singularity containers + # to the local filesystem. This is useful when PRM containers need to run inside another container, + # such as would be the case in an HTCondor/OSPool environment. + # NOTE: This unpacks singularity containers to the local filesystem, which will take up space in a way + # that persists after the workflow is complete. To clean up the unpacked containers, the user must + # manually delete them. For convenience, these unpacked files will exist in the current working directory + # under `unpacked`. + unpack_singularity: false -# Allow the user to configure which container registry containers should be pulled from -# Note that this assumes container names are consistent across registries, and that the -# registry being passed doesn't require authentication for pull actions -container_registry: - base_url: docker.io - # The owner or project of the registry - # For example, "reedcompbio" if the image is available as docker.io/reedcompbio/allpairs - owner: reedcompbio + # Allow the user to configure which container registry containers should be pulled from + # Note that this assumes container names are consistent across registries, and that the + # registry being passed doesn't require authentication for pull actions + registry: + base_url: docker.io + # The owner or project of the registry + # For example, "reedcompbio" if the image is available as docker.io/reedcompbio/allpairs + owner: reedcompbio algorithms: - name: "pathlinker" diff --git a/test/analysis/input/egfr.yaml b/test/analysis/input/egfr.yaml index da4560df9..d26bded2d 100644 --- a/test/analysis/input/egfr.yaml +++ b/test/analysis/input/egfr.yaml @@ -1,26 +1,31 @@ # The length of the hash used to identify a parameter combination hash_length: 7 -# Specify the container framework. Current supported versions include 'docker' and -# 'singularity'. If container_framework is not specified, SPRAS will default to docker. -container_framework: docker +containers: + # Specify the container framework used by each PRM wrapper. Valid options include: + # - docker (default if not specified) + # - singularity -- Also known as apptainer, useful in HPC/HTC environments where docker isn't allowed + # - dsub -- experimental with limited support, used for running on Google Cloud with the All of Us cloud environment. + # - There is no support for other environments at the moment. + framework: docker -# Only used if container_framework is set to singularity, this will unpack the singularity containers -# to the local filesystem. This is useful when PRM containers need to run inside another container, -# such as would be the case in an HTCondor/OSPool environment. -# NOTE: This unpacks singularity containers to the local filesystem, which will take up space in a way -# that persists after the workflow is complete. To clean up the unpacked containers, the user must -# manually delete them. -unpack_singularity: false + # Only used if container_framework is set to singularity, this will unpack the singularity containers + # to the local filesystem. This is useful when PRM containers need to run inside another container, + # such as would be the case in an HTCondor/OSPool environment. + # NOTE: This unpacks singularity containers to the local filesystem, which will take up space in a way + # that persists after the workflow is complete. To clean up the unpacked containers, the user must + # manually delete them. For convenience, these unpacked files will exist in the current working directory + # under `unpacked`. + unpack_singularity: false -# Allow the user to configure which container registry containers should be pulled from -# Note that this assumes container names are consistent across registries, and that the -# registry being passed doesn't require authentication for pull actions -container_registry: - base_url: docker.io - # The owner or project of the registry - # For example, "reedcompbio" if the image is available as docker.io/reedcompbio/allpairs - owner: reedcompbio + # Allow the user to configure which container registry containers should be pulled from + # Note that this assumes container names are consistent across registries, and that the + # registry being passed doesn't require authentication for pull actions + registry: + base_url: docker.io + # The owner or project of the registry + # For example, "reedcompbio" if the image is available as docker.io/reedcompbio/allpairs + owner: reedcompbio algorithms: - name: pathlinker diff --git a/test/generate-inputs/inputs/test_config.yaml b/test/generate-inputs/inputs/test_config.yaml index ed59497b1..0c83017fe 100644 --- a/test/generate-inputs/inputs/test_config.yaml +++ b/test/generate-inputs/inputs/test_config.yaml @@ -1,9 +1,10 @@ hash_length: 7 -container_framework: docker -unpack_singularity: false -container_registry: - base_url: docker.io - owner: reedcompbio +containers: + framework: docker + unpack_singularity: false + registry: + base_url: docker.io + owner: reedcompbio algorithms: - name: "pathlinker" From d0edde5e6eddfd2e52ee4f624f135c474d5c9336 Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Sun, 31 Aug 2025 00:13:28 +0000 Subject: [PATCH 02/13] fix: update refs to other settings --- Snakefile | 2 +- spras/config/config.py | 4 +--- spras/containers.py | 6 +++--- test/AllPairs/test_ap.py | 4 ++-- test/test_config.py | 14 +++++++------- 5 files changed, 14 insertions(+), 16 deletions(-) diff --git a/Snakefile b/Snakefile index 1daa8dee9..b65c7e13d 100644 --- a/Snakefile +++ b/Snakefile @@ -25,7 +25,7 @@ algorithm_params = _config.config.algorithm_params algorithm_directed = _config.config.algorithm_directed pca_params = _config.config.pca_params hac_params = _config.config.hac_params -FRAMEWORK = _config.config.container_framework +FRAMEWORK = _config.config.container_settings.framework include_aggregate_algo_eval = _config.config.analysis_include_evaluation_aggregate_algo # Return the dataset or gold_standard dictionary from the config file given the label diff --git a/spras/config/config.py b/spras/config/config.py index 53591cd20..7827b6343 100644 --- a/spras/config/config.py +++ b/spras/config/config.py @@ -7,7 +7,7 @@ value. For example import spras.config.config as config -container_framework = config.config.container_framework +container_framework = config.config.container_settings.framework will grab the top level registry configuration option as it appears in the config file """ @@ -68,8 +68,6 @@ def __init__(self, raw_config: dict[str, Any]): self.out_dir = parsed_raw_config.reconstruction_settings.locations.reconstruction_dir # Container settings used by PRMs. self.container_settings = ProcessedContainerSettings.from_container_settings(parsed_raw_config.containers, parsed_raw_config.hash_length) - # A Boolean specifying whether to unpack singularity containers. Default is False - self.unpack_singularity = False # A dictionary to store configured datasets against which SPRAS will be run self.datasets = None # A dictionary to store configured gold standard data against output of SPRAS runs diff --git a/spras/containers.py b/spras/containers.py index b7711c4f6..7fdf9f904 100644 --- a/spras/containers.py +++ b/spras/containers.py @@ -144,7 +144,7 @@ def run_container(framework: str, container_suffix: str, command: List[str], vol """ normalized_framework = framework.casefold() - container = config.config.container_prefix + "/" + container_suffix + container = config.config.container_settings.prefix + "/" + container_suffix if normalized_framework == 'docker': return run_container_docker(container, command, volumes, working_dir, environment) elif normalized_framework == 'singularity': @@ -329,7 +329,7 @@ def run_container_singularity(container: str, command: List[str], volumes: List[ singularity_options.extend(['--env', ",".join(env_to_items(environment))]) # Handle unpacking singularity image if needed. Potentially needed for running nested unprivileged containers - if config.config.unpack_singularity: + if config.config.container_settings.unpack_singularity: # Split the string by "/" path_elements = container.split("/") @@ -388,7 +388,7 @@ def prepare_volume(filename: Union[str, PurePath], volume_base: Union[str, PureP if isinstance(filename, PurePath): filename = str(filename) - filename_hash = hash_filename(filename, config.config.hash_length) + filename_hash = hash_filename(filename, config.config.container_settings.hash_length) dest = PurePosixPath(base_path, filename_hash) abs_filename = Path(filename).resolve() diff --git a/test/AllPairs/test_ap.py b/test/AllPairs/test_ap.py index a8291f72f..355033f9f 100644 --- a/test/AllPairs/test_ap.py +++ b/test/AllPairs/test_ap.py @@ -81,14 +81,14 @@ def test_allpairs_singularity_unpacked(self): out_path = OUT_DIR / 'sample-out-unpack.txt' out_path.unlink(missing_ok=True) # Indicate via config mechanism that we want to unpack the Singularity container - config.config.unpack_singularity = True + config.config.container_settings.unpack_singularity = True AllPairs.run( nodetypes=str(TEST_DIR / 'input/sample-in-nodetypes.txt'), network=str(TEST_DIR / 'input/sample-in-net.txt'), directed_flag=str(TEST_DIR / 'input' / 'directed-flag-false.txt'), output_file=str(out_path), container_framework="singularity") - config.config.unpack_singularity = False + config.config.container_settings.unpack_singularity = False assert out_path.exists() def test_allpairs_correctness(self): diff --git a/test/test_config.py b/test/test_config.py index 72df9e0b9..80bc5110c 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -161,22 +161,22 @@ def test_config_container_framework_normalization(self): test_config["container_framework"] = "singularity" config.init_global(test_config) - assert (config.config.container_framework == "singularity") + assert (config.config.container_settings.framework == "singularity") # Test singularity with capitalization test_config["container_framework"] = "Singularity" config.init_global(test_config) - assert (config.config.container_framework == "singularity") + assert (config.config.container_settings.framework == "singularity") # Test docker test_config["container_framework"] = "docker" config.init_global(test_config) - assert (config.config.container_framework == "docker") + assert (config.config.container_settings.framework == "docker") # Test docker with capitalization test_config["container_framework"] = "Docker" config.init_global(test_config) - assert (config.config.container_framework == "docker") + assert (config.config.container_settings.framework == "docker") # Test unknown framework test_config["container_framework"] = "badFramework" @@ -188,17 +188,17 @@ def test_config_container_registry(self): test_config["container_registry"]["base_url"] = "docker.io" test_config["container_registry"]["owner"] = "reedcompbio" config.init_global(test_config) - assert (config.config.container_prefix == "docker.io/reedcompbio") + assert (config.config.container_settings.prefix == "docker.io/reedcompbio") test_config["container_registry"]["base_url"] = "another.repo" test_config["container_registry"]["owner"] = "different-owner" config.init_global(test_config) - assert (config.config.container_prefix == "another.repo/different-owner") + assert (config.config.container_settings.prefix == "another.repo/different-owner") test_config["container_registry"]["base_url"] = "" test_config["container_registry"]["owner"] = "" config.init_global(test_config) - assert (config.config.container_prefix == config.DEFAULT_CONTAINER_PREFIX) + assert (config.config.container_settings.prefix == config.DEFAULT_CONTAINER_PREFIX) def test_error_dataset_label(self): test_config = get_test_config() From e9bddcc33cc9123f82cbc2b2af731a830c2704f3 Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Sun, 31 Aug 2025 00:23:28 +0000 Subject: [PATCH 03/13] test: correct containers inside config --- test/test_config.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/test/test_config.py b/test/test_config.py index 80bc5110c..af6381ae1 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -18,12 +18,14 @@ # individual values of the dict can be changed and the whole initialization can be re-run. def get_test_config(): test_raw_config = { - "container_framework": "singularity", - "container_registry": { - "base_url": "docker.io", - "owner": "reedcompbio", + "containers": { + "framework": "singularity", + "registry": { + "base_url": "docker.io", + "owner": "reedcompbio", + }, + "hash_length": 7, }, - "hash_length": 7, "reconstruction_settings": { "locations": { "reconstruction_dir": "my_dir" From 77b5b110be4b834de65ac236a06c4cd5b38524f1 Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Sun, 31 Aug 2025 00:44:31 +0000 Subject: [PATCH 04/13] test: use correct containers key --- test/test_config.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/test/test_config.py b/test/test_config.py index af6381ae1..a16ca6695 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -161,44 +161,44 @@ def test_config_container_framework_normalization(self): # Test singularity test_config = get_test_config() - test_config["container_framework"] = "singularity" + test_config["containers"]["framework"] = "singularity" config.init_global(test_config) assert (config.config.container_settings.framework == "singularity") # Test singularity with capitalization - test_config["container_framework"] = "Singularity" + test_config["containers"]["framework"] = "Singularity" config.init_global(test_config) assert (config.config.container_settings.framework == "singularity") # Test docker - test_config["container_framework"] = "docker" + test_config["containers"]["framework"] = "docker" config.init_global(test_config) assert (config.config.container_settings.framework == "docker") # Test docker with capitalization - test_config["container_framework"] = "Docker" + test_config["containers"]["framework"] = "Docker" config.init_global(test_config) assert (config.config.container_settings.framework == "docker") # Test unknown framework - test_config["container_framework"] = "badFramework" + test_config["containers"]["framework"] = "badFramework" with pytest.raises(ValueError): config.init_global(test_config) def test_config_container_registry(self): test_config = get_test_config() - test_config["container_registry"]["base_url"] = "docker.io" - test_config["container_registry"]["owner"] = "reedcompbio" + test_config["containers"]["registry"]["base_url"] = "docker.io" + test_config["containers"]["registry"]["owner"] = "reedcompbio" config.init_global(test_config) assert (config.config.container_settings.prefix == "docker.io/reedcompbio") - test_config["container_registry"]["base_url"] = "another.repo" - test_config["container_registry"]["owner"] = "different-owner" + test_config["containers"]["registry"]["base_url"] = "another.repo" + test_config["containers"]["registry"]["owner"] = "different-owner" config.init_global(test_config) assert (config.config.container_settings.prefix == "another.repo/different-owner") - test_config["container_registry"]["base_url"] = "" - test_config["container_registry"]["owner"] = "" + test_config["containers"]["registry"]["base_url"] = "" + test_config["containers"]["registry"]["owner"] = "" config.init_global(test_config) assert (config.config.container_settings.prefix == config.DEFAULT_CONTAINER_PREFIX) From 3cadfc479b43d583b1addf4612926994d52bedeb Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Sat, 6 Sep 2025 05:18:32 +0000 Subject: [PATCH 05/13] docs: container_registry -> containeres.registry --- docs/contributing/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/contributing/index.rst b/docs/contributing/index.rst index 600d05a02..92221f616 100644 --- a/docs/contributing/index.rst +++ b/docs/contributing/index.rst @@ -285,7 +285,7 @@ Local Neighborhood has no other parameters. Optionally set ``include: false`` for the other pathway reconstruction algorithms to make testing faster. -The config file has an option ``owner`` under the ``container_registry`` +The config file has an option ``owner`` under the ``containers.registry`` settings that controls which Docker Hub account will be used when pulling Docker images. The same Docker Hub account will be used for all images and cannot currently be set different for each algorithm. Set the From 2301443e08e6b17b0fc16431fa30a64643fb5172 Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Sat, 6 Sep 2025 05:22:45 +0000 Subject: [PATCH 06/13] docs: cmt --- spras/config/container_schema.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/spras/config/container_schema.py b/spras/config/container_schema.py index 11b13576f..b777f2a68 100644 --- a/spras/config/container_schema.py +++ b/spras/config/container_schema.py @@ -44,6 +44,14 @@ class ProcessedContainerSettings: unpack_singularity: bool = False prefix: str = DEFAULT_CONTAINER_PREFIX hash_length: int = 7 + """ + The hash length for container-specific usage. This does not appear in + the output folder, but it may show up in logs, and usually never needs + to be tinkered with. By default, this will be the top-level `hash_length`. + + We prefer this `hash_length` in our container-running logic to + avoid a (future) dependency diamond. + """ @staticmethod def from_container_settings(settings: ContainerSettings, default_hash_length: int) -> "ProcessedContainerSettings": From b1f2034f630b7957cf1872adb531372e75cb32e6 Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Sat, 13 Sep 2025 05:19:28 +0000 Subject: [PATCH 07/13] refactor!: prefer top-level hash length in config --- spras/config/config.py | 4 ++-- spras/config/container_schema.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/spras/config/config.py b/spras/config/config.py index 7827b6343..746ee62e8 100644 --- a/spras/config/config.py +++ b/spras/config/config.py @@ -66,14 +66,14 @@ def __init__(self, raw_config: dict[str, Any]): # Directory used for storing output self.out_dir = parsed_raw_config.reconstruction_settings.locations.reconstruction_dir - # Container settings used by PRMs. - self.container_settings = ProcessedContainerSettings.from_container_settings(parsed_raw_config.containers, parsed_raw_config.hash_length) # A dictionary to store configured datasets against which SPRAS will be run self.datasets = None # A dictionary to store configured gold standard data against output of SPRAS runs self.gold_standards = None # The hash length SPRAS will use to identify parameter combinations. self.hash_length = parsed_raw_config.hash_length + # Container settings used by PRMs. + self.container_settings = ProcessedContainerSettings.from_container_settings(parsed_raw_config.containers, self.hash_length) # The list of algorithms to run in the workflow. Each is a dict with 'name' as an expected key. self.algorithms = None # A nested dict mapping algorithm names to dicts that map parameter hashes to parameter combinations. diff --git a/spras/config/container_schema.py b/spras/config/container_schema.py index b777f2a68..726313b40 100644 --- a/spras/config/container_schema.py +++ b/spras/config/container_schema.py @@ -34,7 +34,6 @@ class ContainerSettings(BaseModel): framework: ContainerFramework = ContainerFramework.docker unpack_singularity: bool = False registry: ContainerRegistry - hash_length: int = 7 model_config = ConfigDict(extra='forbid') @@ -47,14 +46,15 @@ class ProcessedContainerSettings: """ The hash length for container-specific usage. This does not appear in the output folder, but it may show up in logs, and usually never needs - to be tinkered with. By default, this will be the top-level `hash_length`. + to be tinkered with. This will be the top-level `hash_length` specified + in the config. We prefer this `hash_length` in our container-running logic to avoid a (future) dependency diamond. """ @staticmethod - def from_container_settings(settings: ContainerSettings, default_hash_length: int) -> "ProcessedContainerSettings": + def from_container_settings(settings: ContainerSettings, hash_length: int) -> "ProcessedContainerSettings": if settings.framework == ContainerFramework.dsub: warnings.warn("'dsub' framework integration is experimental and may not be fully supported.", stacklevel=2) container_framework = settings.framework @@ -73,5 +73,5 @@ def from_container_settings(settings: ContainerSettings, default_hash_length: in framework=container_framework, unpack_singularity=unpack_singularity, prefix=container_prefix, - hash_length=settings.hash_length or default_hash_length + hash_length=hash_length ) From 5e2c1f101f60c3c37c1327d44cd22af94dd4a5a9 Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Fri, 12 Sep 2025 22:28:33 -0700 Subject: [PATCH 08/13] fix: mv hash-length --- test/test_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_config.py b/test/test_config.py index a16ca6695..fe1a4997b 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -24,8 +24,8 @@ def get_test_config(): "base_url": "docker.io", "owner": "reedcompbio", }, - "hash_length": 7, }, + "hash_length": 7, "reconstruction_settings": { "locations": { "reconstruction_dir": "my_dir" From 3f46af6babf9fbb66fa44a0c3bd4333aedc12698 Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Fri, 3 Oct 2025 17:28:09 -0700 Subject: [PATCH 09/13] fix: add apptainer variant --- spras/config/container_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spras/config/container_schema.py b/spras/config/container_schema.py index 726313b40..e85d22e60 100644 --- a/spras/config/container_schema.py +++ b/spras/config/container_schema.py @@ -17,8 +17,8 @@ class ContainerFramework(CaseInsensitiveEnum): docker = 'docker' - # TODO: add apptainer variant once #260 gets merged singularity = 'singularity' + apptainer = 'apptainer' dsub = 'dsub' class ContainerRegistry(BaseModel): From a26ef0b6737cd9d354d68f5047cd2c2521ca9a30 Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Fri, 3 Oct 2025 17:29:59 -0700 Subject: [PATCH 10/13] fix: config indentation --- config/config.yaml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index a142bf006..873a43ac4 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -19,6 +19,15 @@ containers: # under `unpacked`. unpack_singularity: false + # Allow the user to configure which container registry containers should be pulled from + # Note that this assumes container names are consistent across registries, and that the + # registry being passed doesn't require authentication for pull actions + registry: + base_url: docker.io + # The owner or project of the registry + # For example, "reedcompbio" if the image is available as docker.io/reedcompbio/allpairs + owner: reedcompbio + # Enabling profiling adds a file called 'usage-profile.tsv' to the output directory of each algorithm. # The contents of this file describe the CPU utilization and peak memory consumption of the algorithm # as seen by its runtime container. @@ -31,15 +40,6 @@ containers: # requirements = versionGE(split(Target.CondorVersion)[1], "24.8.0") && (isenforcingdiskusage =!= true) enable_profiling: false - # Allow the user to configure which container registry containers should be pulled from - # Note that this assumes container names are consistent across registries, and that the - # registry being passed doesn't require authentication for pull actions - registry: - base_url: docker.io - # The owner or project of the registry - # For example, "reedcompbio" if the image is available as docker.io/reedcompbio/allpairs - owner: reedcompbio - # This list of algorithms should be generated by a script which checks the filesystem for installs. # It shouldn't be changed by mere mortals. (alternatively, we could add a path to executable for each algorithm # in the list to reduce the number of assumptions of the program at the cost of making the config a little more involved) From d7ad4026b3e6812cf89278fb4d4b4a94e82ba30c Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Tue, 14 Oct 2025 01:15:23 +0000 Subject: [PATCH 11/13] style: fmt --- spras/config/config.py | 4 ++-- spras/config/schema.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/spras/config/config.py b/spras/config/config.py index a8fe88089..db20456eb 100644 --- a/spras/config/config.py +++ b/spras/config/config.py @@ -22,7 +22,7 @@ import numpy as np import yaml -from spras.config.container_schema import ContainerSettings, ProcessedContainerSettings +from spras.config.container_schema import ProcessedContainerSettings from spras.config.schema import RawConfig from spras.util import NpHashEncoder, hash_params_sha1_base32 @@ -292,7 +292,7 @@ def process_config(self, raw_config: RawConfig): # Set up a few top-level config variables self.out_dir = raw_config.reconstruction_settings.locations.reconstruction_dir - if raw_config.enable_profiling and not raw_config.containers.framework in ["singularity", "apptainer"]: + if raw_config.enable_profiling and raw_config.containers.framework not in ["singularity", "apptainer"]: warnings.warn("enable_profiling is set to true, but the container framework is not singularity/apptainer. This setting will have no effect.") self.enable_profiling = raw_config.enable_profiling diff --git a/spras/config/schema.py b/spras/config/schema.py index 1e71fa277..a1936b0c0 100644 --- a/spras/config/schema.py +++ b/spras/config/schema.py @@ -13,7 +13,7 @@ import re from typing import Annotated, Optional -from pydantic import AfterValidator, BaseModel, ConfigDict, Field +from pydantic import AfterValidator, BaseModel, ConfigDict from spras.config.container_schema import ContainerSettings from spras.config.util import CaseInsensitiveEnum From 45803d7e42bcda46634c11edf4b4be1b6491b504 Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Tue, 14 Oct 2025 01:26:22 +0000 Subject: [PATCH 12/13] fix: apply corrected stacklevel --- spras/config/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spras/config/config.py b/spras/config/config.py index db20456eb..25e6f72de 100644 --- a/spras/config/config.py +++ b/spras/config/config.py @@ -293,7 +293,7 @@ def process_config(self, raw_config: RawConfig): self.out_dir = raw_config.reconstruction_settings.locations.reconstruction_dir if raw_config.enable_profiling and raw_config.containers.framework not in ["singularity", "apptainer"]: - warnings.warn("enable_profiling is set to true, but the container framework is not singularity/apptainer. This setting will have no effect.") + warnings.warn("enable_profiling is set to true, but the container framework is not singularity/apptainer. This setting will have no effect.", stacklevel=2) self.enable_profiling = raw_config.enable_profiling self.process_datasets(raw_config) From 9eb8c90c88e6d8d0e89be1b0178c797f402d094c Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Thu, 23 Oct 2025 21:41:43 +0000 Subject: [PATCH 13/13] docs: update containers references --- config/config.yaml | 2 +- config/egfr.yaml | 2 +- docker-wrappers/SPRAS/example_config.yaml | 40 ++++++++++++++--------- docs/_static/config/beginner.yaml | 11 ++++--- docs/_static/config/intermediate.yaml | 11 ++++--- docs/htcondor.rst | 2 +- docs/tutorial/advanced.rst | 3 +- 7 files changed, 42 insertions(+), 29 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index 873a43ac4..7879f5e9f 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -10,7 +10,7 @@ containers: # - dsub -- experimental with limited support, used for running on Google Cloud framework: docker - # Only used if container_framework is set to singularity/apptainer, this will unpack the containers + # Only used if framework is set to singularity/apptainer, this will unpack the containers # to the local filesystem. This is useful when PRM containers need to run inside another container, # such as would be the case in an HTCondor/OSPool environment. # NOTE: This unpacks containers to the local filesystem, which will take up space in a way diff --git a/config/egfr.yaml b/config/egfr.yaml index f35f407b8..25e56ab25 100644 --- a/config/egfr.yaml +++ b/config/egfr.yaml @@ -9,7 +9,7 @@ containers: # - There is no support for other environments at the moment. framework: docker - # Only used if container_framework is set to singularity, this will unpack the singularity containers + # Only used if framework is set to singularity, this will unpack the singularity containers # to the local filesystem. This is useful when PRM containers need to run inside another container, # such as would be the case in an HTCondor/OSPool environment. # NOTE: This unpacks singularity containers to the local filesystem, which will take up space in a way diff --git a/docker-wrappers/SPRAS/example_config.yaml b/docker-wrappers/SPRAS/example_config.yaml index 87e996a9c..db1c2dbbf 100644 --- a/docker-wrappers/SPRAS/example_config.yaml +++ b/docker-wrappers/SPRAS/example_config.yaml @@ -3,21 +3,31 @@ # The length of the hash used to identify a parameter combination hash_length: 7 -# Specify the container framework. Current supported versions include 'docker' and -# 'singularity'. If container_framework is not specified, SPRAS will default to docker. -container_framework: singularity - -# Unpack singularity. See config/config.yaml for details. -unpack_singularity: true - -# Allow the user to configure which container registry containers should be pulled from -# Note that this assumes container names are consistent across registries, and that the -# registry being passed doesn't require authentication for pull actions -container_registry: - base_url: docker.io - # The owner or project of the registry - # For example, "reedcompbio" if the image is available as docker.io/reedcompbio/allpairs - owner: reedcompbio +containers: + # Specify the container framework used by each PRM wrapper. Valid options include: + # - docker (default if not specified) + # - singularity OR apptainer -- Apptainer (formerly Singularity) is useful in HPC/HTC environments where docker isn't allowed + # - dsub -- experimental with limited support, used for running on Google Cloud + framework: singularity + + # Only used if framework is set to singularity/apptainer, this will unpack the containers + # to the local filesystem. This is useful when PRM containers need to run inside another container, + # such as would be the case in an HTCondor/OSPool environment. + # NOTE: This unpacks containers to the local filesystem, which will take up space in a way + # that persists after the workflow is complete. To clean up the unpacked containers, the user must + # manually delete them. For convenience, these unpacked files will exist in the current working directory + # under `unpacked`. + # Here, we unpack it since we're running on HTCondor. + unpack_singularity: true + + # Allow the user to configure which container registry containers should be pulled from + # Note that this assumes container names are consistent across registries, and that the + # registry being passed doesn't require authentication for pull actions + registry: + base_url: docker.io + # The owner or project of the registry + # For example, "reedcompbio" if the image is available as docker.io/reedcompbio/allpairs + owner: reedcompbio # This list of algorithms should be generated by a script which checks the filesystem for installs. # It shouldn't be changed by mere mortals. (alternatively, we could add a path to executable for each algorithm diff --git a/docs/_static/config/beginner.yaml b/docs/_static/config/beginner.yaml index 951dd2834..1ddda6dc2 100644 --- a/docs/_static/config/beginner.yaml +++ b/docs/_static/config/beginner.yaml @@ -1,9 +1,10 @@ hash_length: 7 -container_framework: docker -unpack_singularity: false -container_registry: - base_url: docker.io - owner: reedcompbio +containers: + framework: docker + unpack_singularity: false + registry: + base_url: docker.io + owner: reedcompbio # Each algorithm has an 'include' parameter. By toggling 'include' to true/false the user can change # which algorithms are run in a given experiment. diff --git a/docs/_static/config/intermediate.yaml b/docs/_static/config/intermediate.yaml index 78f5a7489..1f0ba2eb5 100644 --- a/docs/_static/config/intermediate.yaml +++ b/docs/_static/config/intermediate.yaml @@ -1,9 +1,10 @@ hash_length: 7 -container_framework: docker -unpack_singularity: false -container_registry: - base_url: docker.io - owner: reedcompbio +containers: + framework: docker + unpack_singularity: false + registry: + base_url: docker.io + owner: reedcompbio # Each algorithm has an 'include' parameter. By toggling 'include' to true/false the user can change # which algorithms are run in a given experiment. diff --git a/docs/htcondor.rst b/docs/htcondor.rst index 6103e1414..4a96d4487 100644 --- a/docs/htcondor.rst +++ b/docs/htcondor.rst @@ -71,7 +71,7 @@ it uses the SPRAS apptainer image you created: container_image = < your spras image >.sif Make sure to modify the configuration file to have -``unpack_singularity`` set to ``true``, and ``container_framework`` set +``unpack_singularity`` set to ``true``, and ``containers.framework`` set to ``singularity``: else, the workflow will (likely) fail. Then run ``condor_submit spras.sub``, which will submit SPRAS to diff --git a/docs/tutorial/advanced.rst b/docs/tutorial/advanced.rst index 7a20cc68f..8f7e8b645 100644 --- a/docs/tutorial/advanced.rst +++ b/docs/tutorial/advanced.rst @@ -177,6 +177,7 @@ The global workflow control section in the configuration file allows a user to s .. code-block:: yaml - container_framework: docker + containers: + framework: docker The frameworks include Docker, Apptainer/Singularity, or dsub