From 2cb2a2d7c078cad4ac0c3c48d06b47db35d81559 Mon Sep 17 00:00:00 2001 From: Dirk Barbi Date: Wed, 25 Nov 2020 10:54:16 +0100 Subject: [PATCH 01/52] merged kais last minute config into prep_release --- esm_runscripts/batch_system.py | 4 ++ esm_runscripts/cli.py | 13 ++++++ esm_runscripts/last_minute.py | 75 ++++++++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+) create mode 100644 esm_runscripts/last_minute.py diff --git a/esm_runscripts/batch_system.py b/esm_runscripts/batch_system.py index e77edae..9b073de 100644 --- a/esm_runscripts/batch_system.py +++ b/esm_runscripts/batch_system.py @@ -192,6 +192,10 @@ def write_simple_runscript(config): + config["general"]["jobtype"] + " -v " ) + + if config["general"]["modify_config_file_abspath"]: + tidy_call += " -m " + config["general"]["modify_config_file_abspath"] + elif config["general"]["jobtype"] == "post": tidy_call = "" commands = config["general"]["post_task_list"] diff --git a/esm_runscripts/cli.py b/esm_runscripts/cli.py index 2fc5bfa..c38fe27 100644 --- a/esm_runscripts/cli.py +++ b/esm_runscripts/cli.py @@ -53,6 +53,14 @@ def parse_shargs(): action="store_true", ) + parser.add_argument( + "--modify-config", + "-m", + dest="modify", + help="[m]odify configuration", + default="", # kh 15.07.20 "usermods.yaml" + ) + parser.add_argument( "-j", "--last_jobtype", @@ -112,6 +120,7 @@ def main(): jobtype = "compute" verbose = False inspect = None + modify_config_file = None parsed_args = vars(ARGS) @@ -135,6 +144,8 @@ def main(): verbose = parsed_args["verbose"] if "inspect" in parsed_args: inspect = parsed_args["inspect"] + if "modify" in parsed_args: + modify_config_file = parsed_args["modify"] command_line_config = {} command_line_config["check"] = check @@ -147,6 +158,8 @@ def main(): command_line_config["last_jobtype"] = ARGS.last_jobtype command_line_config["verbose"] = verbose command_line_config["inspect"] = inspect + if modify_config_file: + command_line_config["modify_config_file"] = modify_config_file command_line_config["original_command"] = original_command.strip() command_line_config["started_from"] = os.getcwd() diff --git a/esm_runscripts/last_minute.py b/esm_runscripts/last_minute.py new file mode 100644 index 0000000..85fbb3a --- /dev/null +++ b/esm_runscripts/last_minute.py @@ -0,0 +1,75 @@ +import os +import copy +import esm_parser + +class last_minute_changes: + def __init__(self, config): + self.modify_config_file = config["general"].get("modify_config_file") + + if self.modify_config_file: + self.modify_config_file_abspath = os.path.abspath(modify_config_file) + self.modify_config = esm_parser.yaml_file_to_dict(modify_config_file_abspath) + + config["general"]["modify_config"] = copy.deepcopy(self.modify_config) + config["general"]["modify_config_file_abspath"] = self.modify_config_file_abspath + + config["general"]["original_command"] = config["general"]["original_command"].replace( + self.modify_config_file, + self.modify_config_file_abspath + ) + + config["general"]["command_line_config"]["original_command"] = config["general"]["original_command"] + + else: + self.modify_config_file_abspath = self.modify_config = None + + +def apply_last_minute_changes(config): + config["general"]["modify_config_memo"] = last_minute_changes(config) + + modify_config = config["general"]["modify_config_memo"].modify_config + + if modify_config: + settings = modify_config.get("build_and_run_modifications", {}).get("machine", {}).get("chooseable_settings") + _modify_config_with_settings(config, settings) + + settings = modify_config.get("build_only_modifications", {}).get("machine", {}).get("environment_settings") + _modify_config_with_settings(config, settings) + + settings = modify_config.get("run_only_modifications", {}).get("machine", {}).get("chooseable_settings") + _modify_config_with_settings(config, settings) + + settings = modify_config.get("run_only_modifications", {}).get("batch_system", {}).get("direct_settings") + _modify_config_with_settings(config, settings) + + return config + + +def restore_protected_last_minute_changes(config): + if config["general"]["modify_config_memo"]: + if config["general"]["modify_config_memo"].config: + config["general"]["modify_config"] = config["general"]["modify_config_memo"].config + del config["general"]["modify_config_memo"] + + if config["general"].has_key("modify_config_memo"): # Entry could exist but be False + del config["general"]["modify_config_memo"] + + return config + + +def _modify_config_with_settings(self, config, settings): + if settings: + for k, v in settings.items(): + path_to_key = k.split(".") + entry = path_to_key.pop() + selected_config = config + for k2 in path_to_key: + selected_config = selected_config[k2] + if type(selected_config) == dict: + selected_config[entry] = v + elif type(selected_config) == list: + selected_config.append(entry + "=" + v) + + else: + raise ValueError("unexpected container type (neither dict nor list") + From 289273da4e6ca3817d5209379c9b87bdaa6f254d Mon Sep 17 00:00:00 2001 From: Dirk Barbi Date: Wed, 25 Nov 2020 11:06:50 +0100 Subject: [PATCH 02/52] forgot to export last_minute --- esm_runscripts/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/esm_runscripts/__init__.py b/esm_runscripts/__init__.py index a3e52d2..cb6c7da 100644 --- a/esm_runscripts/__init__.py +++ b/esm_runscripts/__init__.py @@ -11,6 +11,7 @@ from .compute import * from .tidy import * from .prepare import * +from .last_minute import * from .postprocess import * from .filelists import * from .tidy import * From ce0f4a93282d3356b51abde28f04069ee3818719 Mon Sep 17 00:00:00 2001 From: Dirk Barbi Date: Wed, 25 Nov 2020 11:51:45 +0100 Subject: [PATCH 03/52] possible bug fixed --- esm_runscripts/last_minute.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/esm_runscripts/last_minute.py b/esm_runscripts/last_minute.py index 85fbb3a..97b2fe0 100644 --- a/esm_runscripts/last_minute.py +++ b/esm_runscripts/last_minute.py @@ -4,7 +4,7 @@ class last_minute_changes: def __init__(self, config): - self.modify_config_file = config["general"].get("modify_config_file") + self.modify_config_file = config["general"]["command_line_config"].get("modify_config_file") if self.modify_config_file: self.modify_config_file_abspath = os.path.abspath(modify_config_file) @@ -18,8 +18,6 @@ def __init__(self, config): self.modify_config_file_abspath ) - config["general"]["command_line_config"]["original_command"] = config["general"]["original_command"] - else: self.modify_config_file_abspath = self.modify_config = None From cf41b55d890dda282f8bcef2c80bcb0db4927aa3 Mon Sep 17 00:00:00 2001 From: Kai Himstedt Date: Wed, 2 Dec 2020 14:16:21 +0100 Subject: [PATCH 04/52] fixes to branch more_dyn_conf_merged --- esm_runscripts/batch_system.py | 5 +++-- esm_runscripts/last_minute.py | 25 ++++++++++++++----------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/esm_runscripts/batch_system.py b/esm_runscripts/batch_system.py index 9b073de..cafa6f6 100644 --- a/esm_runscripts/batch_system.py +++ b/esm_runscripts/batch_system.py @@ -193,8 +193,9 @@ def write_simple_runscript(config): + " -v " ) - if config["general"]["modify_config_file_abspath"]: - tidy_call += " -m " + config["general"]["modify_config_file_abspath"] + if "modify_config_file_abspath" in config["general"]: + if config["general"]["modify_config_file_abspath"]: + tidy_call += " -m " + config["general"]["modify_config_file_abspath"] elif config["general"]["jobtype"] == "post": tidy_call = "" diff --git a/esm_runscripts/last_minute.py b/esm_runscripts/last_minute.py index 97b2fe0..560d3db 100644 --- a/esm_runscripts/last_minute.py +++ b/esm_runscripts/last_minute.py @@ -4,19 +4,21 @@ class last_minute_changes: def __init__(self, config): - self.modify_config_file = config["general"]["command_line_config"].get("modify_config_file") + self.modify_config_file = config["general"].get("modify_config_file") if self.modify_config_file: - self.modify_config_file_abspath = os.path.abspath(modify_config_file) - self.modify_config = esm_parser.yaml_file_to_dict(modify_config_file_abspath) + self.modify_config_file_abspath = os.path.abspath(self.modify_config_file) + self.modify_config = esm_parser.yaml_file_to_dict(self.modify_config_file_abspath) config["general"]["modify_config"] = copy.deepcopy(self.modify_config) config["general"]["modify_config_file_abspath"] = self.modify_config_file_abspath - config["general"]["original_command"] = config["general"]["original_command"].replace( - self.modify_config_file, - self.modify_config_file_abspath - ) +# kh 27.11.20 "original command" is not available for esm_master (but for esm_runscripts) + if "original_command" in "general": + config["general"]["original_command"] = config["general"]["original_command"].replace( + self.modify_config_file, + self.modify_config_file_abspath + ) else: self.modify_config_file_abspath = self.modify_config = None @@ -45,17 +47,18 @@ def apply_last_minute_changes(config): def restore_protected_last_minute_changes(config): if config["general"]["modify_config_memo"]: - if config["general"]["modify_config_memo"].config: - config["general"]["modify_config"] = config["general"]["modify_config_memo"].config + if config["general"]["modify_config_memo"].modify_config: + config["general"]["modify_config"] = config["general"]["modify_config_memo"].modify_config del config["general"]["modify_config_memo"] - if config["general"].has_key("modify_config_memo"): # Entry could exist but be False +# kh 26.11.20 + if "modify_config_memo" in config["general"]: # Entry could exist but be False del config["general"]["modify_config_memo"] return config -def _modify_config_with_settings(self, config, settings): +def _modify_config_with_settings(config, settings): if settings: for k, v in settings.items(): path_to_key = k.split(".") From 8eb42c28a9407dbc31b0289eb1089e433717fedc Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Tue, 8 Dec 2020 09:04:30 +0100 Subject: [PATCH 05/52] =?UTF-8?q?Bump=20version:=205.0.2=20=E2=86=92=205.0?= =?UTF-8?q?.3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- esm_runscripts/__init__.py | 2 +- setup.cfg | 3 +-- setup.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/esm_runscripts/__init__.py b/esm_runscripts/__init__.py index f397007..ecd8cff 100644 --- a/esm_runscripts/__init__.py +++ b/esm_runscripts/__init__.py @@ -2,7 +2,7 @@ __author__ = """Dirk Barbi""" __email__ = 'dirk.barbi@awi.de' -__version__ = "5.0.2" +__version__ = "5.0.3" from .sim_objects import * from .batch_system import * diff --git a/setup.cfg b/setup.cfg index 09072c7..fc24715 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 5.0.2 +current_version = 5.0.3 commit = True tag = True @@ -20,4 +20,3 @@ select = C,E,F,W,B,B950 ignore = E203, E501, W503 [aliases] - diff --git a/setup.py b/setup.py index e4b895b..59e94bb 100644 --- a/setup.py +++ b/setup.py @@ -62,6 +62,6 @@ test_suite='tests', tests_require=test_requirements, url='https://github.com/dbarbi/esm_runscripts', - version="5.0.2", + version="5.0.3", zip_safe=False, ) From 20d191e3b2ed6477a7c912d39760c3822fba624a Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Fri, 11 Dec 2020 08:58:31 +0100 Subject: [PATCH 06/52] colorful diffs --- esm_runscripts/compute.py | 13 ++++++++++++- setup.py | 1 + 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/esm_runscripts/compute.py b/esm_runscripts/compute.py index 05b7392..c6b8f4a 100644 --- a/esm_runscripts/compute.py +++ b/esm_runscripts/compute.py @@ -6,6 +6,7 @@ import six import yaml from esm_calendar import Date +from colorama import Fore, Back, Style, init import esm_tools @@ -317,6 +318,16 @@ def strip_python_tags(s): config_file.write(out) return config +def color_diff(diff): + for line in diff: + if line.startswith('+'): + yield Fore.GREEN + line + Fore.RESET + elif line.startswith('-'): + yield Fore.RED + line + Fore.RESET + elif line.startswith('^'): + yield Fore.BLUE + line + Fore.RESET + else: + yield line def update_runscript(fromdir, scriptsdir, tfile, gconfig, file_type): """ @@ -369,7 +380,7 @@ def update_runscript(fromdir, scriptsdir, tfile, gconfig, file_type): f"{fromdir + '/' + tfile} differs from " + f"{scriptsdir + '/' + tfile}:\n" ) - for line in difflib.unified_diff(script_t, script_o): + for line in color_diff(difflib.unified_diff(script_t, script_o)): differences += line # If the --update flag is used, notify that the target script will diff --git a/setup.py b/setup.py index 2dd9cae..6bc2dd1 100644 --- a/setup.py +++ b/setup.py @@ -20,6 +20,7 @@ "esm_motd @ git+https://github.com/esm-tools/esm_motd.git", "psutil", "f90nml", + "colorama", "coloredlogs", "tqdm", "sqlalchemy", From 5565af490720294238dfb6e8d8a1c06cba6ce080 Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Fri, 11 Dec 2020 08:59:34 +0100 Subject: [PATCH 07/52] allows to exit right away from venv question --- esm_runscripts/virtual_env_builder.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/esm_runscripts/virtual_env_builder.py b/esm_runscripts/virtual_env_builder.py index 33735df..45f1f5c 100644 --- a/esm_runscripts/virtual_env_builder.py +++ b/esm_runscripts/virtual_env_builder.py @@ -180,7 +180,10 @@ def _integorate_user_venv(config): choices=[ 'Run in virtualenv (You may set the flag `--contained-run` during your run call or set `general.use_venv: True`)', 'Run using default installation (You may set the flag `--open-run` during your run call or set `general.use_venv: False`)', + "Quit right now to adapt your runscript", ]).ask() # returns value of selection + if "Quit" in response: + sys.exit(0) config['general']['use_venv'] = "Run in virtualenv" in response user_confirmed = questionary.confirm("Are you sure?").ask() if "Run in virtualenv" in response: From 0d201d6d3547ddc177da37cb583482afc1eb9579 Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Fri, 11 Dec 2020 09:00:03 +0100 Subject: [PATCH 08/52] removes merge markers --- esm_runscripts/cli.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/esm_runscripts/cli.py b/esm_runscripts/cli.py index 199ff27..1fe1192 100644 --- a/esm_runscripts/cli.py +++ b/esm_runscripts/cli.py @@ -65,7 +65,7 @@ def parse_shargs(): "--modify-config", "-m", dest="modify", - help="[m]odify configuration", + help="[m]odify configuration", default="", # kh 15.07.20 "usermods.yaml" ) @@ -161,7 +161,7 @@ def main(): use_venv = parsed_args["contained_run"] if parsed_args["open_run"] is not None: use_venv = not parsed_args["open_run"] - if "modify" in parsed_args: + if "modify" in parsed_args: modify_config_file = parsed_args["modify"] command_line_config = {} @@ -175,12 +175,9 @@ def main(): command_line_config["last_jobtype"] = ARGS.last_jobtype command_line_config["verbose"] = verbose command_line_config["inspect"] = inspect -<<<<<<< HEAD command_line_config["use_venv"] = use_venv -======= if modify_config_file: command_line_config["modify_config_file"] = modify_config_file ->>>>>>> fixes/more_dyn_conf_merged command_line_config["original_command"] = original_command.strip() command_line_config["started_from"] = os.getcwd() From 3a7e538652599628de5508c312872bbbbbaa9cd2 Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Thu, 31 Dec 2020 13:11:04 +0100 Subject: [PATCH 09/52] feat(virtual_env_builder): recycles the virtual env if one already exists in the experiment tree If a run crashes, and the user has been working in the virtual environment, they needed to remember to reactivate the environment to continue the run. Now, this check happens for you automatically and the virtual environment under ``$BASE_DIR/$EXP_ID/.venv_esmtools`` is reused if it exists. --- esm_runscripts/virtual_env_builder.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/esm_runscripts/virtual_env_builder.py b/esm_runscripts/virtual_env_builder.py index 33735df..df7fe9e 100644 --- a/esm_runscripts/virtual_env_builder.py +++ b/esm_runscripts/virtual_env_builder.py @@ -109,15 +109,20 @@ def venv_bootstrap(config): config["general"]["command_line_config"]["use_venv"] = config["general"]["use_venv"] if config["general"].get("use_venv", False): if not in_virtualenv(): - print(f"Building virtual env, please be patient (this takes about 3 minutes)...") - start_time = datetime.datetime.now() venv_path = pathlib.Path(config['general']['experiment_dir']).joinpath('.venv_esmtools') - venv_context = _venv_create(venv_path) - _run_python_in_venv(venv_context, ['-m', 'pip', '-q', 'install', '-U', 'pip']) - _run_python_in_venv(venv_context, ['-m', 'pip', '-q', 'install', '-U', 'wheel']) - _install_tools(venv_context, config) - _install_required_plugins(venv_context, config) - print(f"...finished {datetime.datetime.now() - start_time}, restarting your job in the virtual env") + if venv_path.exists(): + print(f"{venv_path} already exists, reusing...") + venv_context = _EnvBuilder(with_pip=True).ensure_directories(venv_path) + else: + print(f"Building virtual env, please be patient (this takes about 3 minutes)...") + start_time = datetime.datetime.now() + venv_path = pathlib.Path(config['general']['experiment_dir']).joinpath('.venv_esmtools') + venv_context = _venv_create(venv_path) + _run_python_in_venv(venv_context, ['-m', 'pip', '-q', 'install', '-U', 'pip']) + _run_python_in_venv(venv_context, ['-m', 'pip', '-q', 'install', '-U', 'wheel']) + _install_tools(venv_context, config) + _install_required_plugins(venv_context, config) + print(f"...finished {datetime.datetime.now() - start_time}, restarting your job in the virtual env") sys.argv[0] = pathlib.Path(sys.argv[0]).name # NOTE(PG): This next line allows the job to restart itself in the # virtual environment. From 379c030b95fc58f6e353f7406986c57bd7d41b59 Mon Sep 17 00:00:00 2001 From: denizural Date: Tue, 5 Jan 2021 12:04:19 +0100 Subject: [PATCH 10/52] =?UTF-8?q?Bump=20version:=205.0.7=20=E2=86=92=205.0?= =?UTF-8?q?.8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- esm_runscripts/__init__.py | 2 +- setup.cfg | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/esm_runscripts/__init__.py b/esm_runscripts/__init__.py index 980aa1a..cd1b625 100644 --- a/esm_runscripts/__init__.py +++ b/esm_runscripts/__init__.py @@ -2,7 +2,7 @@ __author__ = """Dirk Barbi""" __email__ = 'dirk.barbi@awi.de' -__version__ = "5.0.7" +__version__ = "5.0.8" from .sim_objects import * from .batch_system import * diff --git a/setup.cfg b/setup.cfg index 2968127..c5cb331 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 5.0.7 +current_version = 5.0.8 commit = True tag = True diff --git a/setup.py b/setup.py index 250598b..5bba038 100644 --- a/setup.py +++ b/setup.py @@ -63,6 +63,6 @@ test_suite='tests', tests_require=test_requirements, url='https://github.com/dbarbi/esm_runscripts', - version="5.0.7", + version="5.0.8", zip_safe=False, ) From a3da441ff653c9e12c93c7c27e7c526ac0454096 Mon Sep 17 00:00:00 2001 From: Miguel Andres-Martinez Date: Fri, 29 Jan 2021 17:02:07 +0100 Subject: [PATCH 11/52] allows to define the 'reusable_filetypes' variable inside the general section of a simulation. By default the only reusable files are 'bin' and 'src'. With this change it is possible to also reuse, for example, 'input'. Reused subfolders can be now copied correctly into the work directory (before it was making a mess) --- esm_runscripts/filelists.py | 2 +- esm_runscripts/prepare.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/esm_runscripts/filelists.py b/esm_runscripts/filelists.py index 646595f..4e8144c 100644 --- a/esm_runscripts/filelists.py +++ b/esm_runscripts/filelists.py @@ -154,7 +154,7 @@ def reuse_sources(config): config[model][filetype + "_sources"][categ] = ( config[model]["experiment_" + filetype + "_dir"] + "/" - + config[model][filetype + "_targets"][categ].split("/")[-1] + + config[model][filetype + "_targets"][categ] ) return config diff --git a/esm_runscripts/prepare.py b/esm_runscripts/prepare.py index 3061d90..17093bb 100644 --- a/esm_runscripts/prepare.py +++ b/esm_runscripts/prepare.py @@ -442,7 +442,7 @@ def _add_all_folders(config): "config", "restart_in", ] - config["general"]["reusable_filetypes"] = ["bin", "src"] + config["general"]["reusable_filetypes"] = config["general"].get("reusable_filetypes", ["bin", "src"]) config["general"]["thisrun_dir"] = ( config["general"]["experiment_dir"] From 40190d59a6206f775e6c524f4ae70434c59aec7b Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Wed, 3 Feb 2021 11:25:57 +0100 Subject: [PATCH 12/52] feat: allows multiple srun commands to be placed in the sad file --- esm_runscripts/batch_system.py | 150 +++++++++++++++++++++++++++++++++ esm_runscripts/slurm.py | 64 +++++++++----- 2 files changed, 193 insertions(+), 21 deletions(-) diff --git a/esm_runscripts/batch_system.py b/esm_runscripts/batch_system.py index 67d8526..afdd5a4 100644 --- a/esm_runscripts/batch_system.py +++ b/esm_runscripts/batch_system.py @@ -1,4 +1,5 @@ import os +import textwrap import sys import esm_environment @@ -119,6 +120,39 @@ def get_environment(config): env = esm_environment.environment_infos("runtime", config) return env.commands + @staticmethod + def determine_nodelist(config): + setup_name = config['general']['setup_name'] + if config['general'].get('multi_srun'): + for run_type in config['general']['multi_srun']: + print(run_type) + total_tasks = 0 + for model in config['general']['multi_srun'][run_type]['models']: + print(total_tasks) + # determine how many nodes that component needs + if "nproc" in config[model]: + print("Adding to total_tasks") + total_tasks += int(config[model]["nproc"]) + print(total_tasks) + elif "nproca" in config[model] and "nprocb" in config[model]: + print("Adding to total_tasks") + total_tasks += int(config[model]["nproca"])*int(config[model]["nprocb"]) + print(total_tasks) + + # KH 30.04.20: nprocrad is replaced by more flexible + # partitioning using nprocar and nprocbr + if "nprocar" in config[model] and "nprocbr" in config[model]: + if config[model]["nprocar"] != "remove_from_namelist" and config[model]["nprocbr"] != "remove_from_namelist": + print("Adding to total_tasks") + total_tasks += config[model]["nprocar"] * config[model]["nprocbr"] + print(total_tasks) + + else: + continue + config['general']['multi_srun'][run_type]['total_tasks'] = total_tasks + print(config['general']['multi_srun']) + + @staticmethod def get_extra(config): extras = [] @@ -151,11 +185,16 @@ def get_run_commands(config): # here or in compute.py? commands.append( "echo " + line + " >> " + config["general"]["experiment_log_file"] ) + if config['general']['multi_srun']: + return get_run_commands_multisrun(config, commands) commands.append("time " + batch_system["execution_command"] + " &") return commands + + @staticmethod def get_submit_command(config, sadfilename): + # FIXME(PG): Here we need to include a multi-srun thing commands = [] batch_system = config["computer"] if "submit" in batch_system: @@ -175,6 +214,8 @@ def write_simple_runscript(config): sadfilename = batch_system.get_sad_filename(config) header = batch_system.get_batch_header(config) environment = batch_system.get_environment(config) + # NOTE(PG): This next line allows for multi-srun simulations: + batch_system.determine_nodelist(config) extra = batch_system.get_extra(config) if config["general"]["verbose"]: @@ -261,3 +302,112 @@ def submit(config): ) print() return config + + +def get_run_commands_multisrun(config, commands): + default_exec_command = config['computer']["execution_command"] + print("---> This is a multi-srun job.") + print("The default command:") + print(default_exec_command) + print("Will be replaced") + # Since I am already confused, I need to write comments. + # + # The next part is actually a shell script fragment, which will be injected + # into the "sad" file. sad = Sys Admin Dump. It's sad :-( + # + # In this part, we figure out what compute nodes we are using so we can + # specify nodes for each srun command. That means, ECHAM+FESOM will use one + # pre-defined set of nodes, PISM another, and so on. That should be general + # enough to also work for other model combos... + # + # Not sure if this is specific to Mistral as a HPC, Slurm as a batch + # system, or whatever else might pop up... + # @Dirk, please move this where you see it best (I guess slurm.py) + job_node_extraction = r""" + # Job nodes extraction + nodeslurm=$SLURM_JOB_NODELIST + echo "nodeslurm = ${nodeslurm}" + # Get rid of the hostname and surrounding brackets: + tmp=${nodeslurm#"*["} + nodes=${tmp%]*} + # Turn it into an array seperated by newlines: + myarray=(`echo ${nodes} | sed 's/,/\n/g'`) + # + idx=0 + for element in "${myarray[@]}"; do + if [[ "$element" == *"-"* ]]; then + array=(`echo $element | sed 's/-/\n/g'`) + for node in $(seq ${array[0]} ${array[1]}); do + nodelist[$idx]=${node} + idx=${idx}+1 + done + else + nodelist[$idx]=${element} + idx=${idx}+1 + fi + done + + for element in "${nodelist[@]}"; do + echo "${element}" + done + """ + + def assign_nodes(run_type, need_length=False, start_node=0, num_nodes_first_model=0): + template = f""" + # Assign nodes for {run_type} + {run_type}="" + %%NEED_LENGTH%% + for idx in $srbseq {start_node} $srbsrb???-1erberberb; do + if ssbssb $idx == $srbsrb???-1erberb esbesb; then + {run_type}="$scb{run_type}ecb$scbnodelist[$idx]ecb" + else + {run_type}="$scb{run_type}ecb$scbnodelistssb$idxesbecb," + fi + done + echo "{run_type} nodes: $scb{run_type}ecb" + """ + # Since Python f-strings and other braces don't play nicely together, + # we replace some stuff: + # + # For the confused: + # scb = start curly brace { + # ecb = end curly brace } + # ssb = start square brace [ + # esb = end square brace ] + # srb = start round brace ( + # erb = end round brace ) + template = template.replace("scb", "{") + template = template.replace("ecb", "}") + template = template.replace("ssb", "[") + template = template.replace("esb", "]") + template = template.replace("srb", "(") + template = template.replace("erb", ")") + # Get rid of the starting spaces (they come from Python as the string + # is defined inside of this function which is indented (facepalm)) + template = textwrap.dedent(template) + # TODO: Some replacements + if need_length: + length_stuff = r"length=${#nodelist[@]}" + template = template.replace("%%NEED_LENGTH%%", length_stuff) + template = template.replace("???", "length") + else: + template = template.replace("%%NEED_LENGTH%%", "") + template = template.replace("???", str(num_nodes_first_model)) + return template + + + commands.append(textwrap.dedent(job_node_extraction)) + for idx, run_type in enumerate(config['general']['multi_srun']): + if idx == 0: + start_node = run_type + num_nodes_first_model = config['general']['multi_srun'][run_type]['total_tasks'] / config['computer']['cores_per_node'] + num_nodes_first_model = int(num_nodes_first_model) + nodes = assign_nodes(run_type, need_length=False, num_nodes_first_model=num_nodes_first_model) + else: + nodes = assign_nodes(run_type, need_length=True, start_node=start_node) + commands.append(nodes) + for run_type in config['general']['multi_srun']: + new_exec_command = default_exec_command.replace("hostfile_srun", config['general']['multi_srun'][run_type]['hostfile']) + new_exec_command += f" --nodelist ${run_type}" + commands.append("time " + new_exec_command + " &") + return commands diff --git a/esm_runscripts/slurm.py b/esm_runscripts/slurm.py index 90f43de..28c523a 100644 --- a/esm_runscripts/slurm.py +++ b/esm_runscripts/slurm.py @@ -53,35 +53,57 @@ def get_jobid(): """ return os.environ.get("SLURM_JOB_ID") + def calc_requirements_multi_srun(self, config): + print("Paul was here...") + for run_type in list(config['general']['multi_srun']): + current_hostfile = self.path+"_"+run_type + print(f"Writing to: {current_hostfile}") + start_proc = 0 + end_proc = 0 + with open(current_hostfile, "w") as hostfile: + for model in config['general']['multi_srun'][run_type]['models']: + start_proc, end_proc = self.mini_calc_reqs(config, model, hostfile, start_proc, end_proc) + config['general']['multi_srun'][run_type]['hostfile'] = os.path.basename(current_hostfile) + + + @staticmethod + def mini_calc_reqs(config, model, hostfile, start_proc, end_proc): + if "nproc" in config[model]: + end_proc = start_proc + int(config[model]["nproc"]) - 1 + elif "nproca" in config[model] and "nprocb" in config[model]: + end_proc = start_proc + int(config[model]["nproca"])*int(config[model]["nprocb"]) - 1 + + # KH 30.04.20: nprocrad is replaced by more flexible + # partitioning using nprocar and nprocbr + if "nprocar" in config[model] and "nprocbr" in config[model]: + if config[model]["nprocar"] != "remove_from_namelist" and config[model]["nprocbr"] != "remove_from_namelist": + end_proc += config[model]["nprocar"] * config[model]["nprocbr"] + + else: + return start_proc, end_proc + if "execution_command" in config[model]: + command = "./" + config[model]["execution_command"] + elif "executable" in config[model]: + command = "./" + config[model]["executable"] + else: + return start_proc, end_proc + hostfile.write(str(start_proc) + "-" + str(end_proc) + " " + command + "\n") + start_proc = end_proc + 1 + return start_proc, end_proc + + def calc_requirements(self, config): """ Calculates requirements and writes them to ``self.path``. """ + if config['general']['multi_srun']: + self.calc_requirements_multi_srun(config) + return start_proc = 0 end_proc = 0 with open(self.path, "w") as hostfile: for model in config["general"]["valid_model_names"]: - if "nproc" in config[model]: - end_proc = start_proc + int(config[model]["nproc"]) - 1 - elif "nproca" in config[model] and "nprocb" in config[model]: - end_proc = start_proc + int(config[model]["nproca"])*int(config[model]["nprocb"]) - 1 - - # KH 30.04.20: nprocrad is replaced by more flexible - # partitioning using nprocar and nprocbr - if "nprocar" in config[model] and "nprocbr" in config[model]: - if config[model]["nprocar"] != "remove_from_namelist" and config[model]["nprocbr"] != "remove_from_namelist": - end_proc += config[model]["nprocar"] * config[model]["nprocbr"] - - else: - continue - if "execution_command" in config[model]: - command = "./" + config[model]["execution_command"] - elif "executable" in config[model]: - command = "./" + config[model]["executable"] - else: - continue - hostfile.write(str(start_proc) + "-" + str(end_proc) + " " + command + "\n") - start_proc = end_proc + 1 + start_proc, end_proc = self.mini_calc_reqs(config, model, hostfile, start_proc, end_proc) @staticmethod From 15dcbf8b8c524a639301559bd947e9767368d17b Mon Sep 17 00:00:00 2001 From: Miguel Andres-Martinez Date: Fri, 12 Feb 2021 13:00:44 +0100 Subject: [PATCH 13/52] fixes the issues with dependencies destroying the editable/branched options of package installation in venv --- esm_runscripts/virtual_env_builder.py | 81 +++++++++++++++++++++++++-- 1 file changed, 75 insertions(+), 6 deletions(-) diff --git a/esm_runscripts/virtual_env_builder.py b/esm_runscripts/virtual_env_builder.py index 5670071..ff14a4c 100644 --- a/esm_runscripts/virtual_env_builder.py +++ b/esm_runscripts/virtual_env_builder.py @@ -59,31 +59,100 @@ def _source_and_run_bin_in_venv(venv_context, command, shell): return subprocess.check_call(command, shell=shell) def _install_tools(venv_context, config): - #_run_bin_in_venv(venv_context, ['pip', 'install', 'git+https://github.com/esm-tools/esm_tools']) + """ + Installs the ESM-Tools packages for a virtual environment, taking into account + the user's specifications for editable packages and desired branches. + + To control which packages are installed in editable mode the user can add the + following to their runscript: + + .. code-block:: yaml + + general: + install__editable: True/False + + To control which package branch is installed (compatible with editable mode and + non-editable mode) the user can add the following to their runscript: + + .. code-block:: yaml + + general: + install__branch: + + Parameters + ---------- + venv_context : type + Some description + config : dict + Configuration dictionary for this run + """ + + # First installation of all packages, with the desired mode (editable/non-editable), + # and branches, together with all their dependencies + _install_tools_general(venv_context, config) + # Some packages, such as `esm_tools` have other packages as dependencies (i.e. + # `esm_parser`). In the previous step, if a package is installed for which a + # dependency is editable and/or branched, this dependency gets back to non-editable + # realese-branch. The following line resinstalls all the editable/branched packages + # again, this time without dependencies. + _install_tools_general(venv_context, config, deps=False) + +def _install_tools_general(venv_context, config, deps=True): + ''' + Actual installer of ESM-Tools packages for virtual environments. Used by + `_install_tools` method to correctly install packages with the user's requested + options for each package (editable/non-editable and branch). See `_install_tools` + documentation for more information. + + Parameters + ---------- + venv_context : type + Some description + config : dict + Configuration dictionary for this run + deps : bool + Boolean indicating whether dependencies should be installed or not + ''' + # Setup the --no-deps flag if necessary + if not deps: + no_deps_flag = "--no-deps" + else: + no_deps_flag = "" + # Loop through the esm_tools packages to be installed for tool in esm_tools_modules: + # Module info (url, editable install, branch...) url = f"https://github.com/esm-tools/{tool}" user_wants_editable = config["general"].get(f"install_{tool}_editable", False) user_wants_branch = config["general"].get(f"install_{tool}_branch") + # If the package is editable install it in /src/esm-tools/ if user_wants_editable: # Make sure the directory exists: src_dir = pathlib.Path(config['general']['experiment_dir'] + f"/src/esm-tools/{tool}") - src_dir.mkdir(parents=True, exist_ok=True) + if not src_dir.exists(): + src_dir.mkdir(parents=True, exist_ok=True) + # Select branch if necessary if user_wants_branch: branch_command = f" -b {user_wants_branch} " else: branch_command = "" + # Clone from git subprocess.check_call(f"git clone --quiet {branch_command} {url} {src_dir}", shell=True) - _run_bin_in_venv(venv_context, ["pip", "install", "-q", f"--find-links={os.environ.get('HOME')}/.cache/pip/wheels", "-e", src_dir]) + # Carry out the editable installation (with or without dependencies) + _run_bin_in_venv(venv_context, ["pip", "install", "-q", f"--find-links={os.environ.get('HOME')}/.cache/pip/wheels", "-e", src_dir, no_deps_flag]) _run_bin_in_venv(venv_context, ["pip", "wheel", "-q", f"--wheel-dir={os.environ.get('HOME')}/.cache/pip/wheels", src_dir]) - else: + # If the package is not editable then do a standard installation. + # Note: this step only runs with the `--no-deps` flag if the user has specified + # a branch, as this flags means also that is the second time passing through + # here, and we don't want to waste time on installing everything a second time + # if not necessary. + elif deps or (not deps and user_wants_branch): url = f"git+{url}" if user_wants_branch: url += f"@{user_wants_branch}" # NOTE(PG): We need the -U flag to ensure the branch is actually installed. - _run_bin_in_venv(venv_context, ["pip", "install", '-q', f"--find-links={os.environ.get('HOME')}/.cache/pip/wheels", "-U", url]) + _run_bin_in_venv(venv_context, ["pip", "install", '-q', f"--find-links={os.environ.get('HOME')}/.cache/pip/wheels", "-U", url, no_deps_flag]) _run_bin_in_venv(venv_context, ["pip", "wheel", '-q', f"--wheel-dir={os.environ.get('HOME')}/.cache/pip/wheels", url]) - def _install_required_plugins(venv_context, config): required_plugins = [] for sub_cfg_key, sub_cfg in config.items(): From dfd92d96ad61914e6648ed7fcb41fc2e6d515385 Mon Sep 17 00:00:00 2001 From: Miguel Andres-Martinez Date: Fri, 12 Feb 2021 13:18:48 +0100 Subject: [PATCH 14/52] a syntax fix --- esm_runscripts/virtual_env_builder.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/esm_runscripts/virtual_env_builder.py b/esm_runscripts/virtual_env_builder.py index ff14a4c..4e25eed 100644 --- a/esm_runscripts/virtual_env_builder.py +++ b/esm_runscripts/virtual_env_builder.py @@ -115,9 +115,9 @@ def _install_tools_general(venv_context, config, deps=True): ''' # Setup the --no-deps flag if necessary if not deps: - no_deps_flag = "--no-deps" + no_deps_flag = ["--no-deps"] else: - no_deps_flag = "" + no_deps_flag = [] # Loop through the esm_tools packages to be installed for tool in esm_tools_modules: # Module info (url, editable install, branch...) @@ -138,7 +138,7 @@ def _install_tools_general(venv_context, config, deps=True): # Clone from git subprocess.check_call(f"git clone --quiet {branch_command} {url} {src_dir}", shell=True) # Carry out the editable installation (with or without dependencies) - _run_bin_in_venv(venv_context, ["pip", "install", "-q", f"--find-links={os.environ.get('HOME')}/.cache/pip/wheels", "-e", src_dir, no_deps_flag]) + _run_bin_in_venv(venv_context, ["pip", "install", "-q", f"--find-links={os.environ.get('HOME')}/.cache/pip/wheels", "-e", src_dir] + no_deps_flag) _run_bin_in_venv(venv_context, ["pip", "wheel", "-q", f"--wheel-dir={os.environ.get('HOME')}/.cache/pip/wheels", src_dir]) # If the package is not editable then do a standard installation. # Note: this step only runs with the `--no-deps` flag if the user has specified @@ -150,7 +150,7 @@ def _install_tools_general(venv_context, config, deps=True): if user_wants_branch: url += f"@{user_wants_branch}" # NOTE(PG): We need the -U flag to ensure the branch is actually installed. - _run_bin_in_venv(venv_context, ["pip", "install", '-q', f"--find-links={os.environ.get('HOME')}/.cache/pip/wheels", "-U", url, no_deps_flag]) + _run_bin_in_venv(venv_context, ["pip", "install", '-q', f"--find-links={os.environ.get('HOME')}/.cache/pip/wheels", "-U", url] + no_deps_flag) _run_bin_in_venv(venv_context, ["pip", "wheel", '-q', f"--wheel-dir={os.environ.get('HOME')}/.cache/pip/wheels", url]) def _install_required_plugins(venv_context, config): From fb6e237fc7b55249542489788dffe7a59478a913 Mon Sep 17 00:00:00 2001 From: Miguel Andres-Martinez Date: Fri, 12 Feb 2021 13:53:18 +0100 Subject: [PATCH 15/52] more fixing, testing finally successful --- esm_runscripts/virtual_env_builder.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/esm_runscripts/virtual_env_builder.py b/esm_runscripts/virtual_env_builder.py index 4e25eed..51007c6 100644 --- a/esm_runscripts/virtual_env_builder.py +++ b/esm_runscripts/virtual_env_builder.py @@ -136,7 +136,8 @@ def _install_tools_general(venv_context, config, deps=True): else: branch_command = "" # Clone from git - subprocess.check_call(f"git clone --quiet {branch_command} {url} {src_dir}", shell=True) + if deps: + subprocess.check_call(f"git clone --quiet {branch_command} {url} {src_dir}", shell=True) # Carry out the editable installation (with or without dependencies) _run_bin_in_venv(venv_context, ["pip", "install", "-q", f"--find-links={os.environ.get('HOME')}/.cache/pip/wheels", "-e", src_dir] + no_deps_flag) _run_bin_in_venv(venv_context, ["pip", "wheel", "-q", f"--wheel-dir={os.environ.get('HOME')}/.cache/pip/wheels", src_dir]) From d269d9d7e23e042a40849cf2802a3785f80ace46 Mon Sep 17 00:00:00 2001 From: Miguel Andres-Martinez Date: Mon, 15 Feb 2021 16:09:12 +0100 Subject: [PATCH 16/52] implementation of EsmToolsDir class --- esm_runscripts/compute.py | 21 +++++++-------------- esm_runscripts/filelists.py | 16 ---------------- esm_runscripts/helpers.py | 32 ++++++-------------------------- esm_runscripts/sim_objects.py | 6 ++---- 4 files changed, 15 insertions(+), 60 deletions(-) diff --git a/esm_runscripts/compute.py b/esm_runscripts/compute.py index 7e68b1e..10c1738 100644 --- a/esm_runscripts/compute.py +++ b/esm_runscripts/compute.py @@ -485,21 +485,14 @@ def copy_tools_to_thisrun(config): # In case there is no esm_tools or namelists in the experiment folder, # copy from the default esm_tools path if not os.path.isdir(tools_dir): - if config['general'].get("use_venv") or esm_rcfile.FUNCTION_PATH.startswith("NONE_YET"): - if config["general"]["verbose"]: - print("Copying standard yamls from: package interal configs") - esm_tools.copy_config_folder(tools_dir) - else: - if config["general"]["verbose"]: - print("Copying from: ", esm_rcfile.FUNCTION_PATH) - shutil.copytree(esm_rcfile.FUNCTION_PATH, tools_dir) + print("Copying standard yamls from: ", esm_rcfile.EsmToolsDir("FUNCTION_PATH")) + esm_tools.copy_config_folder(tools_dir) if not os.path.isdir(namelists_dir): - if esm_rcfile.get_rc_entry("NAMELIST_PATH", "NONE_YET").startswith("NONE_YET"): - if config["general"]["verbose"]: - print("Copying standard namelists from: package internal namelists") - esm_tools.copy_namelist_folder(namelists_dir) - else: - shutil.copytree(esm_rcfile.get_rc_entry("NAMELIST_PATH"), namelists_dir) + print( + "Copying standard namelists from: ", + esm_rcfile.EsmToolsDir("NAMELIST_PATH"), + ) + esm_tools.copy_namelist_folder(namelists_dir) # If ``fromdir`` and ``scriptsdir`` are the same, this is already a computing # simulation which means we want to use the script in the experiment folder, diff --git a/esm_runscripts/filelists.py b/esm_runscripts/filelists.py index 4e8144c..8dcb2b9 100644 --- a/esm_runscripts/filelists.py +++ b/esm_runscripts/filelists.py @@ -566,22 +566,6 @@ def copy_files(config, filetypes, source, target): targetblock = config[model][filetype + "_" + text_target] for categ in sourceblock: file_source = os.path.normpath(sourceblock[categ]) - # NOTE(PG): This is a really, really, REALLY bad hack and it - # makes me physically ill to look at: - # NOTE(MA): The previous implementation was not able to include - # namelists that have no ``namelist`` in their name. This is a more - # general implementation but it enforces the use of the - # ``namelists`` list to be defined for each model with namelists. - namelist_candidates = ( - [item for item in config[model].get("namelists", [])] - + ["namelist"] - ) - isnamelist = any(map(file_source.__contains__, namelist_candidates)) - if source == "init": - if isnamelist and file_source.startswith("NONE_YET"): - file_source = esm_tools.get_namelist_filepath( - file_source.replace("NONE_YET/", "") - ) file_target = os.path.normpath(targetblock[categ]) if config["general"]["verbose"]: print(f"source: {file_source}") diff --git a/esm_runscripts/helpers.py b/esm_runscripts/helpers.py index 71c0d7b..9d8354e 100644 --- a/esm_runscripts/helpers.py +++ b/esm_runscripts/helpers.py @@ -28,32 +28,12 @@ def evaluate(config, job_type, recipe_name): % setup_name ) sys.exit(1) - if config["general"].get("use_venv"): - recipe = esm_tools.get_config_filepath("esm_software/esm_runscripts/esm_runscripts.yaml") - need_to_parse_recipe = True - plugins_bare = esm_tools.get_config_filepath( - "esm_software/esm_runscripts/esm_plugins.yaml" - ) - need_to_parse_plugins = True - elif esm_rcfile.FUNCTION_PATH.startswith("NONE_YET"): - recipe = esm_tools.get_config_filepath( - "esm_software/esm_runscripts/esm_runscripts.yaml" - ) - need_to_parse_recipe = True - plugins_bare = esm_tools.get_config_filepath( - "esm_software/esm_runscripts/esm_plugins.yaml" - ) - need_to_parse_plugins = True - else: - recipe = ( - esm_rcfile.FUNCTION_PATH - + "/esm_software/esm_runscripts/esm_runscripts.yaml" - ) - need_to_parse_recipe = True - plugins_bare = ( - esm_rcfile.FUNCTION_PATH + "/esm_software/esm_runscripts/esm_plugins.yaml" - ) - need_to_parse_plugins = True + + FUNCTION_PATH = esm_rcfile.EsmToolsDir("FUNCTION_PATH") + recipe = FUNCTION_PATH + "esm_software/esm_runscripts/esm_runscripts.yaml" + need_to_parse_recipe = True + plugins_bare = FUNCTION_PATH + "/esm_software/esm_runscripts/esm_plugins.yaml" + need_to_parse_plugins = True framework_recipe = esm_plugin_manager.read_recipe( recipe, {"job_type": job_type}, need_to_parse_recipe diff --git a/esm_runscripts/sim_objects.py b/esm_runscripts/sim_objects.py index fb0ba13..32e2eec 100644 --- a/esm_runscripts/sim_objects.py +++ b/esm_runscripts/sim_objects.py @@ -165,10 +165,8 @@ def distribute_per_model_defaults(self, config): def add_esm_runscripts_defaults_to_config(self, config): - if config['general'].get("use_venv") or esm_rcfile.FUNCTION_PATH.startswith("NONE_YET"): - path_to_file = esm_tools.get_config_filepath("esm_software/esm_runscripts/defaults.yaml") - else: - path_to_file = esm_rcfile.FUNCTION_PATH + "/esm_software/esm_runscripts/defaults.yaml" + FUNCTION_PATH = esm_rcfile.EsmToolsDir("FUNCTION_PATH") + path_to_file = FUNCTION_PATH + "/esm_software/esm_runscripts/defaults.yaml" default_config = esm_parser.yaml_file_to_dict(path_to_file) config["general"]["defaults.yaml"] = default_config config = self.distribute_per_model_defaults(config) From 0d8a59df1bb01e73f6be97e95b96f636379ed6bc Mon Sep 17 00:00:00 2001 From: Miguel Andres-Martinez Date: Mon, 15 Feb 2021 17:33:07 +0100 Subject: [PATCH 17/52] multi_srun issue affecting files without general.multi_srun fixed --- esm_runscripts/slurm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esm_runscripts/slurm.py b/esm_runscripts/slurm.py index 28c523a..726316a 100644 --- a/esm_runscripts/slurm.py +++ b/esm_runscripts/slurm.py @@ -96,7 +96,7 @@ def calc_requirements(self, config): """ Calculates requirements and writes them to ``self.path``. """ - if config['general']['multi_srun']: + if config['general'].get('multi_srun'): self.calc_requirements_multi_srun(config) return start_proc = 0 From f94edfaa7ccd87b083bc8c2f335df61771da4486 Mon Sep 17 00:00:00 2001 From: Miguel Andres-Martinez Date: Mon, 15 Feb 2021 17:49:37 +0100 Subject: [PATCH 18/52] further fixes --- esm_runscripts/batch_system.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/esm_runscripts/batch_system.py b/esm_runscripts/batch_system.py index afdd5a4..651d940 100644 --- a/esm_runscripts/batch_system.py +++ b/esm_runscripts/batch_system.py @@ -150,7 +150,7 @@ def determine_nodelist(config): else: continue config['general']['multi_srun'][run_type]['total_tasks'] = total_tasks - print(config['general']['multi_srun']) + print(config['general']['multi_srun']) @staticmethod @@ -185,7 +185,7 @@ def get_run_commands(config): # here or in compute.py? commands.append( "echo " + line + " >> " + config["general"]["experiment_log_file"] ) - if config['general']['multi_srun']: + if config['general'].get('multi_srun'): return get_run_commands_multisrun(config, commands) commands.append("time " + batch_system["execution_command"] + " &") return commands From 8e4b8745dfd1c609e9200073eed90429eb695e6d Mon Sep 17 00:00:00 2001 From: Miguel Andres-Martinez Date: Tue, 23 Feb 2021 14:11:34 +0100 Subject: [PATCH 19/52] added the possibility to control the reusable_filetypes both from each component and from the general section --- esm_runscripts/filelists.py | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/esm_runscripts/filelists.py b/esm_runscripts/filelists.py index 8dcb2b9..bf3d844 100644 --- a/esm_runscripts/filelists.py +++ b/esm_runscripts/filelists.py @@ -147,9 +147,28 @@ def complete_sources(config): def reuse_sources(config): if config["general"]["run_number"] == 1: return config - for filetype in config["general"]["reusable_filetypes"]: + + # MA: the changes belowe are to be able to specify model specific reusable_filetypes + # without changing the looping order (a model loop nested inside a file-type loop) + + # Put together all the possible reusable file types + all_reusable_filetypes = [] + for model in config["general"]["valid_model_names"] + ["general"]: + all_reusable_filetypes = list( + set(all_reusable_filetypes) | set(config[model].get("reusable_filetypes", [])) + ) + # Loop through all the reusable file types + for filetype in all_reusable_filetypes: for model in config["general"]["valid_model_names"] + ["general"]: - if filetype + "_sources" in config[model]: + # Get the model-specific reusable_filetypes, if not existing, get the + # general ones + model_reusable_filetypes = config[model].get( + "reusable_filetypes", + config["general"]["reusable_filetypes"] + ) + # If _sources dictionary exists and filetype is in the + # model-specific filetype list then add the sources + if filetype + "_sources" in config[model] and filetype in model_reusable_filetypes: for categ in config[model][filetype + "_sources"]: config[model][filetype + "_sources"][categ] = ( config[model]["experiment_" + filetype + "_dir"] @@ -733,7 +752,13 @@ def complete_all_file_movements(config): def get_movement(config, model, filetype, source, target): if source == "init": - if config["general"]["run_number"] == 1 or filetype not in config["general"]["reusable_filetypes"]: + # Get the model-specific reusable_filetypes, if not existing, get the + # general ones + model_reusable_filetypes = config[model].get( + "reusable_filetypes", + config["general"]["reusable_filetypes"] + ) + if config["general"]["run_number"] == 1 or filetype not in model_reusable_filetypes: return config[model]["file_movements"][filetype]["init_to_exp"] else: return config[model]["file_movements"][filetype]["exp_to_run"] From 31a55d31a8afd6451b31faa36ea95d4042186097 Mon Sep 17 00:00:00 2001 From: Dirk Barbi Date: Tue, 23 Feb 2021 15:54:21 +0100 Subject: [PATCH 20/52] debugging lines --- esm_runscripts/filelists.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/esm_runscripts/filelists.py b/esm_runscripts/filelists.py index 646595f..c30a615 100644 --- a/esm_runscripts/filelists.py +++ b/esm_runscripts/filelists.py @@ -716,6 +716,7 @@ def complete_all_file_movements(config): config = create_missing_file_movement_entries(config) for model in config["general"]["valid_model_names"] + ["general"]: + print(f"Iterating model: {model}") mconfig = config[model] if model == "general": if "defaults.yaml" in mconfig: @@ -733,6 +734,8 @@ def complete_all_file_movements(config): del mconfig["file_movements"][filetype]["all_directions"] if "default" in mconfig["file_movements"]: + if model == "hdmodel": + esm_parser.pprint_config(mconfig) if "all_directions" in mconfig["file_movements"]["default"]: movement_type = mconfig["file_movements"]["default"]["all_directions"] for movement in ['init_to_exp', 'exp_to_run', 'run_to_work', 'work_to_run']: From d5f01520e2b770c74949b3b12d8829fad2a5e9c2 Mon Sep 17 00:00:00 2001 From: Dirk Barbi Date: Tue, 23 Feb 2021 16:30:51 +0100 Subject: [PATCH 21/52] fixed it --- esm_runscripts/filelists.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/esm_runscripts/filelists.py b/esm_runscripts/filelists.py index c30a615..856ec81 100644 --- a/esm_runscripts/filelists.py +++ b/esm_runscripts/filelists.py @@ -713,17 +713,18 @@ def get_method(movement): def complete_all_file_movements(config): + + mconfig = config["general"] + if "defaults.yaml" in mconfig: + if "per_model_defaults" in mconfig["defaults.yaml"]: + if "file_movements" in mconfig["defaults.yaml"]["per_model_defaults"]: + mconfig["file_movements"] = copy.deepcopy(mconfig["defaults.yaml"]["per_model_defaults"]["file_movements"]) + del mconfig["defaults.yaml"]["per_model_defaults"]["file_movements"] + config = create_missing_file_movement_entries(config) for model in config["general"]["valid_model_names"] + ["general"]: - print(f"Iterating model: {model}") mconfig = config[model] - if model == "general": - if "defaults.yaml" in mconfig: - if "per_model_defaults" in mconfig["defaults.yaml"]: - if "file_movements" in mconfig["defaults.yaml"]["per_model_defaults"]: - mconfig["file_movements"] = mconfig["defaults.yaml"]["per_model_defaults"]["file_movements"] - del mconfig["defaults.yaml"]["per_model_defaults"]["file_movements"] if "file_movements" in mconfig: for filetype in config["general"]["all_model_filetypes"] + ["scripts", "unknown"]: if filetype in mconfig["file_movements"]: @@ -734,8 +735,6 @@ def complete_all_file_movements(config): del mconfig["file_movements"][filetype]["all_directions"] if "default" in mconfig["file_movements"]: - if model == "hdmodel": - esm_parser.pprint_config(mconfig) if "all_directions" in mconfig["file_movements"]["default"]: movement_type = mconfig["file_movements"]["default"]["all_directions"] for movement in ['init_to_exp', 'exp_to_run', 'run_to_work', 'work_to_run']: From af6b1020b1a2895d022a83692f5c24ff97796338 Mon Sep 17 00:00:00 2001 From: Dirk Barbi Date: Tue, 23 Feb 2021 16:32:48 +0100 Subject: [PATCH 22/52] =?UTF-8?q?Bump=20version:=205.0.14=20=E2=86=92=205.?= =?UTF-8?q?0.15?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- esm_runscripts/__init__.py | 2 +- setup.cfg | 3 ++- setup.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/esm_runscripts/__init__.py b/esm_runscripts/__init__.py index 3a4c55d..27f3b64 100644 --- a/esm_runscripts/__init__.py +++ b/esm_runscripts/__init__.py @@ -2,7 +2,7 @@ __author__ = """Dirk Barbi""" __email__ = 'dirk.barbi@awi.de' -__version__ = "5.0.14" +__version__ = "5.0.15" from .sim_objects import * from .batch_system import * diff --git a/setup.cfg b/setup.cfg index e876150..1838b63 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 5.0.14 +current_version = 5.0.15 commit = True tag = True @@ -20,3 +20,4 @@ select = C,E,F,W,B,B950 ignore = E203, E501, W503 [aliases] + diff --git a/setup.py b/setup.py index 3dd8bdf..203a103 100644 --- a/setup.py +++ b/setup.py @@ -62,6 +62,6 @@ test_suite='tests', tests_require=test_requirements, url='https://github.com/dbarbi/esm_runscripts', - version="5.0.14", + version="5.0.15", zip_safe=False, ) From d54411336fbbfe38e95b3cc7a2222508533eb716 Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Thu, 25 Feb 2021 18:27:42 +0100 Subject: [PATCH 23/52] fix: date from environmental variable --- esm_runscripts/prepare.py | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/esm_runscripts/prepare.py b/esm_runscripts/prepare.py index 3061d90..c5852aa 100644 --- a/esm_runscripts/prepare.py +++ b/esm_runscripts/prepare.py @@ -5,6 +5,26 @@ def run_job(config): helpers.evaluate(config, "prepare", "prepare_recipe") return config +def mini_resolve_variable_date_file(date_file, config): + while "${" in date_file: + pre, post = date_file.split("${", 1) + variable, post = post.split("}", 1) + if "." in variable: + variable_section, variable = variable.split(".") + answer = config[variable_section].get(variable) + else: + answer = config["general"].get(variable) + if not answer: + answer = config.get("env", {}).get(variable) + if not answer: + try: + assert (variable.startswith("env.") or variable.startswith("general.")) + except AssertionError: + print("The date file contains a variable which is not in the >>env<< or >>general<< section. This is not allowed!") + print(f"date_file = {date_file}") + date_file = pre + answer + post + return date_file + def _read_date_file(config): import os @@ -18,6 +38,9 @@ def _read_date_file(config): + config["general"]["setup_name"] + ".date" ) + + date_file = mini_resolve_variable_date_file(date_file, config) + if os.path.isfile(date_file): logging.info("Date file read from %s", date_file) with open(date_file) as date_file: @@ -308,7 +331,7 @@ def set_overall_calendar(config): config["general"]["calendar"] = Calendar(0) return config - + def find_last_prepared_run(config): from esm_calendar import Date, Calendar import os @@ -332,8 +355,8 @@ def find_last_prepared_run(config): next_date = current_date.add(delta_date) end_date = next_date - (0, 0, 1, 0, 0, 0) - - datestamp = ( + + datestamp = ( current_date.format( form=9, givenph=False, givenpm=False, givenps=False ) From bd1aa1129c9e3525c056c2f3026515dc2a90477e Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Thu, 25 Feb 2021 18:50:23 +0100 Subject: [PATCH 24/52] fix: adds exit to last-minute date parse --- esm_runscripts/prepare.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/esm_runscripts/prepare.py b/esm_runscripts/prepare.py index c5852aa..df7a21a 100644 --- a/esm_runscripts/prepare.py +++ b/esm_runscripts/prepare.py @@ -1,4 +1,5 @@ from . import helpers +import sys def run_job(config): @@ -22,6 +23,7 @@ def mini_resolve_variable_date_file(date_file, config): except AssertionError: print("The date file contains a variable which is not in the >>env<< or >>general<< section. This is not allowed!") print(f"date_file = {date_file}") + sys.exit(1) date_file = pre + answer + post return date_file From 6da8d4ede2decb5812e237d6da0bb5fef43124e3 Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Fri, 26 Feb 2021 08:07:53 +0100 Subject: [PATCH 25/52] =?UTF-8?q?Bump=20version:=205.0.15=20=E2=86=92=205.?= =?UTF-8?q?0.16?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- esm_runscripts/__init__.py | 2 +- setup.cfg | 3 +-- setup.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/esm_runscripts/__init__.py b/esm_runscripts/__init__.py index 27f3b64..4012b34 100644 --- a/esm_runscripts/__init__.py +++ b/esm_runscripts/__init__.py @@ -2,7 +2,7 @@ __author__ = """Dirk Barbi""" __email__ = 'dirk.barbi@awi.de' -__version__ = "5.0.15" +__version__ = "5.0.16" from .sim_objects import * from .batch_system import * diff --git a/setup.cfg b/setup.cfg index 1838b63..84c6068 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 5.0.15 +current_version = 5.0.16 commit = True tag = True @@ -20,4 +20,3 @@ select = C,E,F,W,B,B950 ignore = E203, E501, W503 [aliases] - diff --git a/setup.py b/setup.py index 203a103..4a7d473 100644 --- a/setup.py +++ b/setup.py @@ -62,6 +62,6 @@ test_suite='tests', tests_require=test_requirements, url='https://github.com/dbarbi/esm_runscripts', - version="5.0.15", + version="5.0.16", zip_safe=False, ) From 0eabc8655d41e59b0a9b9499ac7d6cc5936d96b5 Mon Sep 17 00:00:00 2001 From: Miguel <63242832+mandresm@users.noreply.github.com> Date: Fri, 26 Feb 2021 19:30:51 +0100 Subject: [PATCH 26/52] typo in comments fixed --- esm_runscripts/filelists.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esm_runscripts/filelists.py b/esm_runscripts/filelists.py index bf3d844..daa6280 100644 --- a/esm_runscripts/filelists.py +++ b/esm_runscripts/filelists.py @@ -148,7 +148,7 @@ def reuse_sources(config): if config["general"]["run_number"] == 1: return config - # MA: the changes belowe are to be able to specify model specific reusable_filetypes + # MA: the changes below are to be able to specify model specific reusable_filetypes # without changing the looping order (a model loop nested inside a file-type loop) # Put together all the possible reusable file types From 988d28456acea1406f49341e94b271068124d63e Mon Sep 17 00:00:00 2001 From: Miguel Andres-Martinez Date: Tue, 2 Mar 2021 15:43:31 +0100 Subject: [PATCH 27/52] suggestions by Deniz --- esm_runscripts/filelists.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esm_runscripts/filelists.py b/esm_runscripts/filelists.py index bf3d844..75614cf 100644 --- a/esm_runscripts/filelists.py +++ b/esm_runscripts/filelists.py @@ -155,7 +155,7 @@ def reuse_sources(config): all_reusable_filetypes = [] for model in config["general"]["valid_model_names"] + ["general"]: all_reusable_filetypes = list( - set(all_reusable_filetypes) | set(config[model].get("reusable_filetypes", [])) + set(all_reusable_filetypes + config[model].get("reusable_filetypes", [])) ) # Loop through all the reusable file types for filetype in all_reusable_filetypes: From df8e88b22430f4271b148d18dca54fb3cb39df01 Mon Sep 17 00:00:00 2001 From: JanStreffing Date: Wed, 10 Mar 2021 17:11:22 +0100 Subject: [PATCH 28/52] first test for MPI + MPI/OMP parallelization --- esm_runscripts/slurm.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/esm_runscripts/slurm.py b/esm_runscripts/slurm.py index 726316a..65f61ed 100644 --- a/esm_runscripts/slurm.py +++ b/esm_runscripts/slurm.py @@ -16,6 +16,8 @@ class Slurm: ---------- filename : str The filename for srun commands, defaults to ``hostfile_srun`` + hostlist : str + The hostlist for srun commands, defaults to ``hostlist`` path : str Full path to this file, defaults to ``thisrun_scripts_dir / filename`` @@ -28,6 +30,7 @@ class Slurm: def __init__(self, config): folder = config["general"]["thisrun_scripts_dir"] self.filename = "hostfile_srun" + self.hostlist = "hostlist" self.path = folder + "/" + self.filename @staticmethod @@ -69,7 +72,10 @@ def calc_requirements_multi_srun(self, config): @staticmethod def mini_calc_reqs(config, model, hostfile, start_proc, end_proc): if "nproc" in config[model]: - end_proc = start_proc + int(config[model]["nproc"]) - 1 + if "OMP_NUM_PROC" in config[model]: + end_proc = start_proc + int(config[model]["nproc"])*int(config[model]["OMP_NUM_PROC"]) - 1 + else: + end_proc = start_proc + int(config[model]["nproc"]) - 1 elif "nproca" in config[model] and "nprocb" in config[model]: end_proc = start_proc + int(config[model]["nproca"])*int(config[model]["nprocb"]) - 1 @@ -87,6 +93,19 @@ def mini_calc_reqs(config, model, hostfile, start_proc, end_proc): command = "./" + config[model]["executable"] else: return start_proc, end_proc + print("Jan was here") + if "OMP_NUM_PROC" in config[model]: + print("And here") + cores_per_node = int(config["computer"]["cores_per_node"]) + state_command = ["scontrol show hostnames $((SLURM_JOB_NODELIST))"] #$SLURM_JOB_NODELIST"] + scontrol_output = subprocess.Popen(state_command, stdout = subprocess.PIPE, stderr = subprocess.PIPE).communicate()[0] + for index_proc in range(0, nproc): + index_host = index_proc+end_proc / cores_per_node + host_value = scontrol_output[index_host] + #slot = iproc+end_proc // cores_per_node + with open(hostlist, "a") as file_object: + file_object.write(host_value) + hostfile.write(str(start_proc) + "-" + str(end_proc) + " " + command + "\n") start_proc = end_proc + 1 return start_proc, end_proc From 474d8a92c3d625590458b37bc8b54d2afbe03f88 Mon Sep 17 00:00:00 2001 From: JanStreffing Date: Thu, 11 Mar 2021 09:51:41 +0100 Subject: [PATCH 29/52] second pass at implementing tasksets --- esm_runscripts/batch_system.py | 22 ++++++++++++++++++ esm_runscripts/slurm.py | 42 ++++++++++++++++++++-------------- 2 files changed, 47 insertions(+), 17 deletions(-) diff --git a/esm_runscripts/batch_system.py b/esm_runscripts/batch_system.py index 651d940..e59b224 100644 --- a/esm_runscripts/batch_system.py +++ b/esm_runscripts/batch_system.py @@ -260,12 +260,34 @@ def write_simple_runscript(config): sadfile.write(line + "\n") sadfile.write("\n") sadfile.write("cd " + config["general"]["thisrun_work_dir"] + "\n") + if "taskset" in config["general"] and name == "slurm": + sadfile.write("\n"+"#Creating hostlist for MPI + MPI&OMP heterogeneous parallel job" + "\n") + sadfile.write("export SLURM_HOSTFILE=./hostlist" + "\n") + sadfile.write("IFS=$\'\n\'; set -f" + "\n") + sadfile.write("listnodes=($(< <( scontrol show hostnames $SLURM_JOB_NODELIST )))"+"\n") + sadfile.write("unset IFS; set +f" + "\n") + sadfile.write("rank = 0" + "\n") + sadfile.write("current_core = 0" + "\n") + sadfile.write("current_core_mpi = 0" + "\n") + sadfile.write("for model in " + str(config["general"]["valid_model_names"]) + "\n") # TODO: Does this even work? I kind of need it as a list, but that can not be concatenated, also this contains oasis, which is wrong. But maybe it doesn't matter because it has no cores or tasks. + sadfile.write("do" + "\n") + sadfile.write(" eval nb_of_cores=\${tasks_${model}}" + "\n") # TODO: find equivalent to ${tasks_${model}}. Note: this has to be available after entering queueu. So in env. + sadfile.write(" eval nb_of_cores=$((${nb_of_cores}-1))" + "\n") + sadfile.write(" for nb_proc_mpi in `seq 0 ${nb_of_cores}`; do" + "\n") + sadfile.write(" (( index_host = current_core / " + str(config["computer"]["cores_per_node"]) +" ))" + "\n") + sadfile.write(" host_value=${listnodes[${index_host}]}" + "\n") + sadfile.write(" (( slot = current_core % cores_per_compute_node ))" + "\n") + sadfile.write(" echo '$host_value' >> hostlist" + "\n") + #sadfile.write(" (( current_core = current_core + " + config[model]["OMP_NUM_PROC"] +" ))" + "\n") # TODO config[model] does not work here since we are only calling this once. + sadfile.write(" (( current_core = current_core + omp_num_threads_compute_${model} ))" + "\n") # TODO find equivalent to ${tasks_${model}}. Note: this has to be available after entering queueu. So in env. + sadfile.write("done" + "\n\n") for line in commands: sadfile.write(line + "\n") sadfile.write("process=$! \n") sadfile.write("cd " + config["general"]["experiment_scripts_dir"] + "\n") sadfile.write(tidy_call + "\n") + config["general"]["submit_command"] = batch_system.get_submit_command( config, sadfilename ) diff --git a/esm_runscripts/slurm.py b/esm_runscripts/slurm.py index 65f61ed..f77c6ce 100644 --- a/esm_runscripts/slurm.py +++ b/esm_runscripts/slurm.py @@ -30,7 +30,6 @@ class Slurm: def __init__(self, config): folder = config["general"]["thisrun_scripts_dir"] self.filename = "hostfile_srun" - self.hostlist = "hostlist" self.path = folder + "/" + self.filename @staticmethod @@ -70,10 +69,10 @@ def calc_requirements_multi_srun(self, config): @staticmethod - def mini_calc_reqs(config, model, hostfile, start_proc, end_proc): + def mini_calc_reqs(self,config, model, hostfile, start_proc, end_proc): if "nproc" in config[model]: - if "OMP_NUM_PROC" in config[model]: - end_proc = start_proc + int(config[model]["nproc"])*int(config[model]["OMP_NUM_PROC"]) - 1 + if "omp_num_proc" in config[model]: + end_proc = start_proc + int(config[model]["nproc"])*int(config[model]["omp_num_proc"]) - 1 else: end_proc = start_proc + int(config[model]["nproc"]) - 1 elif "nproca" in config[model] and "nprocb" in config[model]: @@ -93,18 +92,27 @@ def mini_calc_reqs(config, model, hostfile, start_proc, end_proc): command = "./" + config[model]["executable"] else: return start_proc, end_proc - print("Jan was here") - if "OMP_NUM_PROC" in config[model]: - print("And here") - cores_per_node = int(config["computer"]["cores_per_node"]) - state_command = ["scontrol show hostnames $((SLURM_JOB_NODELIST))"] #$SLURM_JOB_NODELIST"] - scontrol_output = subprocess.Popen(state_command, stdout = subprocess.PIPE, stderr = subprocess.PIPE).communicate()[0] - for index_proc in range(0, nproc): - index_host = index_proc+end_proc / cores_per_node - host_value = scontrol_output[index_host] - #slot = iproc+end_proc // cores_per_node - with open(hostlist, "a") as file_object: - file_object.write(host_value) + + if "taskset" in config["general"]: + scriptname="script_"+model+".ksh" + with open(self.path, "w") as scriptname: + scriptname.write("#!/bin/ksh"+"\n") + scriptname.write("export OMP_NUM_THREADS=$(("+str(config[model]["omp_num_proc"])+"))"+"n") + scriptname.write(command+"\n") + + progname="prog_"+model+".sh" + print(progname) + with open(self.path, "w") as progname: + progname.write("#!/bin/sh"+"\n") + progname.write("(( init = "+str(end_proc)+" + \$1 ))"+"\n") + progname.write("(( index = init * "+str(config[model]["omp_num_proc"])+")) ))"+"\n") + #import pdb + #pdb.set_trace() + progname.write("(( slot = index % "+str(config("computer"["cores_per_node"]))+"))"+"\n") + progname.write("(( echo "+model+" taskset -c \$slot\"-\"\$((slot + "+str(config[model]["omp_num_proc"])+" - 1 ))"+"\n") + progname.write("taskset -c \$slot\"-\"\$((slot + "+str(config[model]["omp_num_proc"])+")) - 1)) ./script_${model}.ksh"+"\n") + + hostfile.write(str(start_proc) + "-" + str(end_proc) + " " + command + "\n") start_proc = end_proc + 1 @@ -122,7 +130,7 @@ def calc_requirements(self, config): end_proc = 0 with open(self.path, "w") as hostfile: for model in config["general"]["valid_model_names"]: - start_proc, end_proc = self.mini_calc_reqs(config, model, hostfile, start_proc, end_proc) + start_proc, end_proc = self.mini_calc_reqs(self,config, model, hostfile, start_proc, end_proc) @staticmethod From eb968d05b2e0591dfc582f9e6351fe35ff961467 Mon Sep 17 00:00:00 2001 From: JanStreffing Date: Fri, 12 Mar 2021 10:27:47 +0100 Subject: [PATCH 30/52] move hostfile creation into the loop to allow for the creaion of more than one file inside the mini_calc_reqs function --- esm_runscripts/slurm.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/esm_runscripts/slurm.py b/esm_runscripts/slurm.py index f77c6ce..a6dec5b 100644 --- a/esm_runscripts/slurm.py +++ b/esm_runscripts/slurm.py @@ -69,7 +69,7 @@ def calc_requirements_multi_srun(self, config): @staticmethod - def mini_calc_reqs(self,config, model, hostfile, start_proc, end_proc): + def mini_calc_reqs(self,config, model, start_proc, end_proc): if "nproc" in config[model]: if "omp_num_proc" in config[model]: end_proc = start_proc + int(config[model]["nproc"])*int(config[model]["omp_num_proc"]) - 1 @@ -113,10 +113,10 @@ def mini_calc_reqs(self,config, model, hostfile, start_proc, end_proc): progname.write("taskset -c \$slot\"-\"\$((slot + "+str(config[model]["omp_num_proc"])+")) - 1)) ./script_${model}.ksh"+"\n") - - hostfile.write(str(start_proc) + "-" + str(end_proc) + " " + command + "\n") - start_proc = end_proc + 1 - return start_proc, end_proc + with open(self.path, "a") as hostfile: + hostfile.write(str(start_proc) + "-" + str(end_proc) + " " + command + "\n") + start_proc = end_proc + 1 + return start_proc, end_proc def calc_requirements(self, config): @@ -127,10 +127,9 @@ def calc_requirements(self, config): self.calc_requirements_multi_srun(config) return start_proc = 0 - end_proc = 0 - with open(self.path, "w") as hostfile: - for model in config["general"]["valid_model_names"]: - start_proc, end_proc = self.mini_calc_reqs(self,config, model, hostfile, start_proc, end_proc) + end_proc = 0 + for model in config["general"]["valid_model_names"]: + start_proc, end_proc = self.mini_calc_reqs(self,config, model, start_proc, end_proc) @staticmethod From 3c0b18af8175a18d54e94d0733966f452ac3aec7 Mon Sep 17 00:00:00 2001 From: JanStreffing Date: Fri, 12 Mar 2021 15:29:00 +0100 Subject: [PATCH 31/52] fixes from call with Deniz --- esm_runscripts/batch_system.py | 3 ++- esm_runscripts/slurm.py | 27 +++++++++++++++------------ 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/esm_runscripts/batch_system.py b/esm_runscripts/batch_system.py index e59b224..2b1d482 100644 --- a/esm_runscripts/batch_system.py +++ b/esm_runscripts/batch_system.py @@ -260,7 +260,8 @@ def write_simple_runscript(config): sadfile.write(line + "\n") sadfile.write("\n") sadfile.write("cd " + config["general"]["thisrun_work_dir"] + "\n") - if "taskset" in config["general"] and name == "slurm": + #if "taskset" in config["general"] and name == "slurm": TODO this line fails + if "taskset" in config["general"]: sadfile.write("\n"+"#Creating hostlist for MPI + MPI&OMP heterogeneous parallel job" + "\n") sadfile.write("export SLURM_HOSTFILE=./hostlist" + "\n") sadfile.write("IFS=$\'\n\'; set -f" + "\n") diff --git a/esm_runscripts/slurm.py b/esm_runscripts/slurm.py index a6dec5b..72f1501 100644 --- a/esm_runscripts/slurm.py +++ b/esm_runscripts/slurm.py @@ -31,6 +31,7 @@ def __init__(self, config): folder = config["general"]["thisrun_scripts_dir"] self.filename = "hostfile_srun" self.path = folder + "/" + self.filename + self.folder = folder + "/" @staticmethod def check_if_submitted(): @@ -95,28 +96,30 @@ def mini_calc_reqs(self,config, model, start_proc, end_proc): if "taskset" in config["general"]: scriptname="script_"+model+".ksh" - with open(self.path, "w") as scriptname: - scriptname.write("#!/bin/ksh"+"\n") - scriptname.write("export OMP_NUM_THREADS=$(("+str(config[model]["omp_num_proc"])+"))"+"n") - scriptname.write(command+"\n") + with open(self.folder+scriptname, "w") as f: + f.write("#!/bin/ksh"+"\n") + f.write("export OMP_NUM_THREADS=$(("+str(config[model]["omp_num_proc"])+"))"+"n") + f.write(command+"\n") progname="prog_"+model+".sh" print(progname) - with open(self.path, "w") as progname: - progname.write("#!/bin/sh"+"\n") - progname.write("(( init = "+str(end_proc)+" + \$1 ))"+"\n") - progname.write("(( index = init * "+str(config[model]["omp_num_proc"])+")) ))"+"\n") + import pdb + pdb.set_trace() + with open(self.folder+progname, "w") as f: + f.write("#!/bin/sh"+"\n") + f.write("(( init = "+str(end_proc)+" + \$1 ))"+"\n") + f.write("(( index = init * "+str(config[model]["omp_num_proc"])+")) ))"+"\n") #import pdb #pdb.set_trace() - progname.write("(( slot = index % "+str(config("computer"["cores_per_node"]))+"))"+"\n") - progname.write("(( echo "+model+" taskset -c \$slot\"-\"\$((slot + "+str(config[model]["omp_num_proc"])+" - 1 ))"+"\n") - progname.write("taskset -c \$slot\"-\"\$((slot + "+str(config[model]["omp_num_proc"])+")) - 1)) ./script_${model}.ksh"+"\n") + f.write("(( slot = index % "+str(config["computer"]["cores_per_node"])+"))"+"\n") #TODO: We need this line, no the one below. But this line fails + f.write("echo "+model+" taskset -c \$slot\"-\"\$((slot + "+str(config[model]["omp_num_proc"])+" - 1"+"\n") + f.write("taskset -c \$slot\"-\"\$((slot + "+str(config[model]["omp_num_proc"])+")) - 1)) ./script_"+model+".ksh"+"\n") with open(self.path, "a") as hostfile: hostfile.write(str(start_proc) + "-" + str(end_proc) + " " + command + "\n") start_proc = end_proc + 1 - return start_proc, end_proc + return start_proc, end_proc def calc_requirements(self, config): From f71db375085589a53f7daab95ceb4c0ebce9f064 Mon Sep 17 00:00:00 2001 From: JanStreffing Date: Fri, 12 Mar 2021 16:47:18 +0100 Subject: [PATCH 32/52] small fixes to make prog and script file match the ksh version --- esm_runscripts/slurm.py | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/esm_runscripts/slurm.py b/esm_runscripts/slurm.py index 72f1501..96f5d79 100644 --- a/esm_runscripts/slurm.py +++ b/esm_runscripts/slurm.py @@ -72,10 +72,10 @@ def calc_requirements_multi_srun(self, config): @staticmethod def mini_calc_reqs(self,config, model, start_proc, end_proc): if "nproc" in config[model]: - if "omp_num_proc" in config[model]: - end_proc = start_proc + int(config[model]["nproc"])*int(config[model]["omp_num_proc"]) - 1 - else: - end_proc = start_proc + int(config[model]["nproc"]) - 1 + #if "omp_num_proc" in config[model]: + # end_proc = start_proc + int(config[model]["nproc"])*int(config[model]["omp_num_proc"]) - 1 + #else: + end_proc = start_proc + int(config[model]["nproc"]) - 1 elif "nproca" in config[model] and "nprocb" in config[model]: end_proc = start_proc + int(config[model]["nproca"])*int(config[model]["nprocb"]) - 1 @@ -85,36 +85,32 @@ def mini_calc_reqs(self,config, model, start_proc, end_proc): if config[model]["nprocar"] != "remove_from_namelist" and config[model]["nprocbr"] != "remove_from_namelist": end_proc += config[model]["nprocar"] * config[model]["nprocbr"] - else: - return start_proc, end_proc - if "execution_command" in config[model]: - command = "./" + config[model]["execution_command"] - elif "executable" in config[model]: - command = "./" + config[model]["executable"] else: return start_proc, end_proc if "taskset" in config["general"]: + command = "./" + config[model]["execution_command_script"] scriptname="script_"+model+".ksh" with open(self.folder+scriptname, "w") as f: f.write("#!/bin/ksh"+"\n") - f.write("export OMP_NUM_THREADS=$(("+str(config[model]["omp_num_proc"])+"))"+"n") + f.write("export OMP_NUM_THREADS="+str(config[model]["omp_num_proc"])+"\n") f.write(command+"\n") progname="prog_"+model+".sh" - print(progname) - import pdb - pdb.set_trace() with open(self.folder+progname, "w") as f: f.write("#!/bin/sh"+"\n") - f.write("(( init = "+str(end_proc)+" + \$1 ))"+"\n") - f.write("(( index = init * "+str(config[model]["omp_num_proc"])+")) ))"+"\n") - #import pdb - #pdb.set_trace() - f.write("(( slot = index % "+str(config["computer"]["cores_per_node"])+"))"+"\n") #TODO: We need this line, no the one below. But this line fails - f.write("echo "+model+" taskset -c \$slot\"-\"\$((slot + "+str(config[model]["omp_num_proc"])+" - 1"+"\n") - f.write("taskset -c \$slot\"-\"\$((slot + "+str(config[model]["omp_num_proc"])+")) - 1)) ./script_"+model+".ksh"+"\n") + f.write("(( init = "+str(start_proc)+" + $1 ))"+"\n") + f.write("(( index = init * "+str(config[model]["omp_num_proc"])+" ))"+"\n") + f.write("(( slot = index % "+str(config["computer"]["cores_per_node"])+" ))"+"\n") + f.write("echo "+model+" taskset -c $slot-$((slot + "+str(config[model]["omp_num_proc"])+" - 1"+"))"+"\n") + f.write("taskset -c $slot-$((slot + "+str(config[model]["omp_num_proc"])+" - 1)) ./script_"+model+".ksh"+"\n") + if "execution_command" in config[model]: + command = "./" + config[model]["execution_command"] + elif "executable" in config[model]: + command = "./" + config[model]["executable"] + else: + return start_proc, end_proc with open(self.path, "a") as hostfile: hostfile.write(str(start_proc) + "-" + str(end_proc) + " " + command + "\n") From ee2189138da18ad2a022001834ba538bbf693049 Mon Sep 17 00:00:00 2001 From: JanStreffing Date: Fri, 12 Mar 2021 17:18:27 +0100 Subject: [PATCH 33/52] small fix for IFS line --- esm_runscripts/batch_system.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esm_runscripts/batch_system.py b/esm_runscripts/batch_system.py index 2b1d482..0115768 100644 --- a/esm_runscripts/batch_system.py +++ b/esm_runscripts/batch_system.py @@ -264,7 +264,7 @@ def write_simple_runscript(config): if "taskset" in config["general"]: sadfile.write("\n"+"#Creating hostlist for MPI + MPI&OMP heterogeneous parallel job" + "\n") sadfile.write("export SLURM_HOSTFILE=./hostlist" + "\n") - sadfile.write("IFS=$\'\n\'; set -f" + "\n") + sadfile.write("IFS=$'\\n'; set -f" + "\n") sadfile.write("listnodes=($(< <( scontrol show hostnames $SLURM_JOB_NODELIST )))"+"\n") sadfile.write("unset IFS; set +f" + "\n") sadfile.write("rank = 0" + "\n") From 75011b3d3aa201192583100e02e75c6505d2cfe4 Mon Sep 17 00:00:00 2001 From: JanStreffing Date: Sat, 13 Mar 2021 22:45:07 +0100 Subject: [PATCH 34/52] status at the end of the day --- esm_runscripts/batch_system.py | 62 ++++++++++++++++++++++------------ esm_runscripts/slurm.py | 32 ++++++++++-------- 2 files changed, 58 insertions(+), 36 deletions(-) diff --git a/esm_runscripts/batch_system.py b/esm_runscripts/batch_system.py index 0115768..c82b33f 100644 --- a/esm_runscripts/batch_system.py +++ b/esm_runscripts/batch_system.py @@ -61,15 +61,25 @@ def get_batch_header(config): this_batch_system = config["computer"] if "sh_interpreter" in this_batch_system: header.append("#!" + this_batch_system["sh_interpreter"]) - tasks = batch_system.calculate_requirements(config) + tasks, nodes = batch_system.calculate_requirements(config) replacement_tags = [("@tasks@", tasks)] - all_flags = [ - "partition_flag", - "time_flag", - "tasks_flag", - "output_flags", - "name_flag", - ] + if "taskset" in config["general"]: + replacement_tags = [("@nodes@", nodes)] + all_flags = [ + "partition_flag", + "time_flag", + "nodes_flag", + "output_flags", + "name_flag", + ] + else: + all_flags = [ + "partition_flag", + "time_flag", + "tasks_flag", + "output_flags", + "name_flag", + ] conditional_flags = [ "accounting_flag", "notification_flag", @@ -94,10 +104,13 @@ def get_batch_header(config): @staticmethod def calculate_requirements(config): tasks = 0 + nodes = 0 if config["general"]["jobtype"] == "compute": for model in config["general"]["valid_model_names"]: if "nproc" in config[model]: tasks += config[model]["nproc"] + if "taskset" in config["general"]: + nodes +=int((config[model]["nproc"]*config[model]["omp_num_threads"])/config['computer']['cores_per_node']) elif "nproca" in config[model] and "nprocb" in config[model]: tasks += config[model]["nproca"] * config[model]["nprocb"] @@ -112,7 +125,7 @@ def calculate_requirements(config): elif config["general"]["jobtype"] == "post": tasks = 1 - return tasks + return tasks, nodes @staticmethod def get_environment(config): @@ -260,27 +273,32 @@ def write_simple_runscript(config): sadfile.write(line + "\n") sadfile.write("\n") sadfile.write("cd " + config["general"]["thisrun_work_dir"] + "\n") - #if "taskset" in config["general"] and name == "slurm": TODO this line fails if "taskset" in config["general"]: sadfile.write("\n"+"#Creating hostlist for MPI + MPI&OMP heterogeneous parallel job" + "\n") + sadfile.write("rm -f ./hostlist" + "\n") sadfile.write("export SLURM_HOSTFILE=./hostlist" + "\n") sadfile.write("IFS=$'\\n'; set -f" + "\n") sadfile.write("listnodes=($(< <( scontrol show hostnames $SLURM_JOB_NODELIST )))"+"\n") sadfile.write("unset IFS; set +f" + "\n") - sadfile.write("rank = 0" + "\n") - sadfile.write("current_core = 0" + "\n") - sadfile.write("current_core_mpi = 0" + "\n") - sadfile.write("for model in " + str(config["general"]["valid_model_names"]) + "\n") # TODO: Does this even work? I kind of need it as a list, but that can not be concatenated, also this contains oasis, which is wrong. But maybe it doesn't matter because it has no cores or tasks. - sadfile.write("do" + "\n") - sadfile.write(" eval nb_of_cores=\${tasks_${model}}" + "\n") # TODO: find equivalent to ${tasks_${model}}. Note: this has to be available after entering queueu. So in env. + sadfile.write("rank=0" + "\n") + sadfile.write("current_core=0" + "\n") + sadfile.write("current_core_mpi=0" + "\n") + for model in config["general"]["valid_model_names"]: + if model != "oasis3mct": + sadfile.write("mpi_tasks_"+model+"="+str(config[model]["nproc"])+ "\n") + sadfile.write("omp_threads_"+model+"="+str(config[model]["omp_num_threads"])+ "\n") + import pdb + #pdb.set_trace() + sadfile.write("for model in " + str(config["general"]["valid_model_names"])[1:-1].replace(',', '').replace('\'', '') +" ;do"+ "\n") + sadfile.write(" eval nb_of_cores=\${mpi_tasks_${model}}" + "\n") sadfile.write(" eval nb_of_cores=$((${nb_of_cores}-1))" + "\n") sadfile.write(" for nb_proc_mpi in `seq 0 ${nb_of_cores}`; do" + "\n") - sadfile.write(" (( index_host = current_core / " + str(config["computer"]["cores_per_node"]) +" ))" + "\n") - sadfile.write(" host_value=${listnodes[${index_host}]}" + "\n") - sadfile.write(" (( slot = current_core % cores_per_compute_node ))" + "\n") - sadfile.write(" echo '$host_value' >> hostlist" + "\n") - #sadfile.write(" (( current_core = current_core + " + config[model]["OMP_NUM_PROC"] +" ))" + "\n") # TODO config[model] does not work here since we are only calling this once. - sadfile.write(" (( current_core = current_core + omp_num_threads_compute_${model} ))" + "\n") # TODO find equivalent to ${tasks_${model}}. Note: this has to be available after entering queueu. So in env. + sadfile.write(" (( index_host = current_core / " + str(config["computer"]["cores_per_node"]) +" ))" + "\n") + sadfile.write(" host_value=${listnodes[${index_host}]}" + "\n") + sadfile.write(" (( slot = current_core % " + str(config["computer"]["cores_per_node"]) +" ))" + "\n") + sadfile.write(" echo $host_value >> hostlist" + "\n") + sadfile.write(" (( current_core = current_core + omp_threads_${model} ))" + "\n") + sadfile.write(" done" + "\n") sadfile.write("done" + "\n\n") for line in commands: sadfile.write(line + "\n") diff --git a/esm_runscripts/slurm.py b/esm_runscripts/slurm.py index 96f5d79..5249035 100644 --- a/esm_runscripts/slurm.py +++ b/esm_runscripts/slurm.py @@ -31,7 +31,7 @@ def __init__(self, config): folder = config["general"]["thisrun_scripts_dir"] self.filename = "hostfile_srun" self.path = folder + "/" + self.filename - self.folder = folder + "/" + self.folder = folder + "/../work/" @staticmethod def check_if_submitted(): @@ -70,12 +70,11 @@ def calc_requirements_multi_srun(self, config): @staticmethod - def mini_calc_reqs(self,config, model, start_proc, end_proc): + def mini_calc_reqs(self,config, model, start_proc, start_core, end_proc, end_core): if "nproc" in config[model]: - #if "omp_num_proc" in config[model]: - # end_proc = start_proc + int(config[model]["nproc"])*int(config[model]["omp_num_proc"]) - 1 - #else: end_proc = start_proc + int(config[model]["nproc"]) - 1 + if "omp_num_threads" in config[model]: + end_core = start_core + int(config[model]["nproc"])*int(config[model]["omp_num_threads"]) - 1 elif "nproca" in config[model] and "nprocb" in config[model]: end_proc = start_proc + int(config[model]["nproca"])*int(config[model]["nprocb"]) - 1 @@ -86,36 +85,39 @@ def mini_calc_reqs(self,config, model, start_proc, end_proc): end_proc += config[model]["nprocar"] * config[model]["nprocbr"] else: - return start_proc, end_proc + return start_proc, start_core, end_proc, end_core if "taskset" in config["general"]: command = "./" + config[model]["execution_command_script"] scriptname="script_"+model+".ksh" with open(self.folder+scriptname, "w") as f: f.write("#!/bin/ksh"+"\n") - f.write("export OMP_NUM_THREADS="+str(config[model]["omp_num_proc"])+"\n") + f.write("export OMP_NUM_THREADS="+str(config[model]["omp_num_threads"])+"\n") f.write(command+"\n") + os.chmod(self.folder+scriptname, 0o755) progname="prog_"+model+".sh" with open(self.folder+progname, "w") as f: f.write("#!/bin/sh"+"\n") - f.write("(( init = "+str(start_proc)+" + $1 ))"+"\n") - f.write("(( index = init * "+str(config[model]["omp_num_proc"])+" ))"+"\n") + f.write("(( init = "+str(start_core)+" + $1 ))"+"\n") + f.write("(( index = init * "+str(config[model]["omp_num_threads"])+" ))"+"\n") f.write("(( slot = index % "+str(config["computer"]["cores_per_node"])+" ))"+"\n") - f.write("echo "+model+" taskset -c $slot-$((slot + "+str(config[model]["omp_num_proc"])+" - 1"+"))"+"\n") - f.write("taskset -c $slot-$((slot + "+str(config[model]["omp_num_proc"])+" - 1)) ./script_"+model+".ksh"+"\n") + f.write("echo "+model+" taskset -c $slot-$((slot + "+str(config[model]["omp_num_threads"])+" - 1"+"))"+"\n") + f.write("taskset -c $slot-$((slot + "+str(config[model]["omp_num_threads"])+" - 1)) ./script_"+model+".ksh"+"\n") + os.chmod(self.folder+progname, 0o755) if "execution_command" in config[model]: command = "./" + config[model]["execution_command"] elif "executable" in config[model]: command = "./" + config[model]["executable"] else: - return start_proc, end_proc + return start_proc, start_core, end_proc, end_core with open(self.path, "a") as hostfile: hostfile.write(str(start_proc) + "-" + str(end_proc) + " " + command + "\n") start_proc = end_proc + 1 - return start_proc, end_proc + start_core = end_core + 1 + return start_proc, start_core, end_proc, end_core def calc_requirements(self, config): @@ -126,9 +128,11 @@ def calc_requirements(self, config): self.calc_requirements_multi_srun(config) return start_proc = 0 + start_core = 0 end_proc = 0 + end_core = 0 for model in config["general"]["valid_model_names"]: - start_proc, end_proc = self.mini_calc_reqs(self,config, model, start_proc, end_proc) + start_proc, start_core, end_proc, end_core = self.mini_calc_reqs(self,config, model, start_proc, start_core, end_proc, end_core) @staticmethod From c088b1963455a8085daa3cc85c769cde2d497d25 Mon Sep 17 00:00:00 2001 From: Jan Streffing Date: Mon, 15 Mar 2021 10:51:21 +0100 Subject: [PATCH 35/52] corrected switches --- esm_runscripts/batch_system.py | 6 +++--- esm_runscripts/slurm.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/esm_runscripts/batch_system.py b/esm_runscripts/batch_system.py index c82b33f..9f41271 100644 --- a/esm_runscripts/batch_system.py +++ b/esm_runscripts/batch_system.py @@ -63,7 +63,7 @@ def get_batch_header(config): header.append("#!" + this_batch_system["sh_interpreter"]) tasks, nodes = batch_system.calculate_requirements(config) replacement_tags = [("@tasks@", tasks)] - if "taskset" in config["general"]: + if "taskset" in config["general"] == "true": replacement_tags = [("@nodes@", nodes)] all_flags = [ "partition_flag", @@ -109,7 +109,7 @@ def calculate_requirements(config): for model in config["general"]["valid_model_names"]: if "nproc" in config[model]: tasks += config[model]["nproc"] - if "taskset" in config["general"]: + if "taskset" in config["general"] == "true": nodes +=int((config[model]["nproc"]*config[model]["omp_num_threads"])/config['computer']['cores_per_node']) elif "nproca" in config[model] and "nprocb" in config[model]: tasks += config[model]["nproca"] * config[model]["nprocb"] @@ -273,7 +273,7 @@ def write_simple_runscript(config): sadfile.write(line + "\n") sadfile.write("\n") sadfile.write("cd " + config["general"]["thisrun_work_dir"] + "\n") - if "taskset" in config["general"]: + if "taskset" in config["general"] == "true": sadfile.write("\n"+"#Creating hostlist for MPI + MPI&OMP heterogeneous parallel job" + "\n") sadfile.write("rm -f ./hostlist" + "\n") sadfile.write("export SLURM_HOSTFILE=./hostlist" + "\n") diff --git a/esm_runscripts/slurm.py b/esm_runscripts/slurm.py index 5249035..5d5ef7b 100644 --- a/esm_runscripts/slurm.py +++ b/esm_runscripts/slurm.py @@ -87,7 +87,7 @@ def mini_calc_reqs(self,config, model, start_proc, start_core, end_proc, end_cor else: return start_proc, start_core, end_proc, end_core - if "taskset" in config["general"]: + if "taskset" in config["general"] == "true": command = "./" + config[model]["execution_command_script"] scriptname="script_"+model+".ksh" with open(self.folder+scriptname, "w") as f: From 595f5d029a6065c8a13fac64fc7169feaeaaadef Mon Sep 17 00:00:00 2001 From: Jan Streffing Date: Mon, 15 Mar 2021 14:25:04 +0100 Subject: [PATCH 36/52] better switches again --- esm_runscripts/batch_system.py | 6 +++--- esm_runscripts/slurm.py | 37 +++++++++++++++++----------------- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/esm_runscripts/batch_system.py b/esm_runscripts/batch_system.py index 9f41271..53925f2 100644 --- a/esm_runscripts/batch_system.py +++ b/esm_runscripts/batch_system.py @@ -63,7 +63,7 @@ def get_batch_header(config): header.append("#!" + this_batch_system["sh_interpreter"]) tasks, nodes = batch_system.calculate_requirements(config) replacement_tags = [("@tasks@", tasks)] - if "taskset" in config["general"] == "true": + if config["general"].get("taskset", False): replacement_tags = [("@nodes@", nodes)] all_flags = [ "partition_flag", @@ -109,7 +109,7 @@ def calculate_requirements(config): for model in config["general"]["valid_model_names"]: if "nproc" in config[model]: tasks += config[model]["nproc"] - if "taskset" in config["general"] == "true": + if config["general"].get("taskset", False): nodes +=int((config[model]["nproc"]*config[model]["omp_num_threads"])/config['computer']['cores_per_node']) elif "nproca" in config[model] and "nprocb" in config[model]: tasks += config[model]["nproca"] * config[model]["nprocb"] @@ -273,7 +273,7 @@ def write_simple_runscript(config): sadfile.write(line + "\n") sadfile.write("\n") sadfile.write("cd " + config["general"]["thisrun_work_dir"] + "\n") - if "taskset" in config["general"] == "true": + if config["general"].get("taskset", False): sadfile.write("\n"+"#Creating hostlist for MPI + MPI&OMP heterogeneous parallel job" + "\n") sadfile.write("rm -f ./hostlist" + "\n") sadfile.write("export SLURM_HOSTFILE=./hostlist" + "\n") diff --git a/esm_runscripts/slurm.py b/esm_runscripts/slurm.py index 5d5ef7b..f29dff2 100644 --- a/esm_runscripts/slurm.py +++ b/esm_runscripts/slurm.py @@ -87,24 +87,25 @@ def mini_calc_reqs(self,config, model, start_proc, start_core, end_proc, end_cor else: return start_proc, start_core, end_proc, end_core - if "taskset" in config["general"] == "true": - command = "./" + config[model]["execution_command_script"] - scriptname="script_"+model+".ksh" - with open(self.folder+scriptname, "w") as f: - f.write("#!/bin/ksh"+"\n") - f.write("export OMP_NUM_THREADS="+str(config[model]["omp_num_threads"])+"\n") - f.write(command+"\n") - os.chmod(self.folder+scriptname, 0o755) - - progname="prog_"+model+".sh" - with open(self.folder+progname, "w") as f: - f.write("#!/bin/sh"+"\n") - f.write("(( init = "+str(start_core)+" + $1 ))"+"\n") - f.write("(( index = init * "+str(config[model]["omp_num_threads"])+" ))"+"\n") - f.write("(( slot = index % "+str(config["computer"]["cores_per_node"])+" ))"+"\n") - f.write("echo "+model+" taskset -c $slot-$((slot + "+str(config[model]["omp_num_threads"])+" - 1"+"))"+"\n") - f.write("taskset -c $slot-$((slot + "+str(config[model]["omp_num_threads"])+" - 1)) ./script_"+model+".ksh"+"\n") - os.chmod(self.folder+progname, 0o755) + if config["general"].get("taskset", False): + if "taskset" in config["general"]: + command = "./" + config[model]["execution_command_script"] + scriptname="script_"+model+".ksh" + with open(self.folder+scriptname, "w") as f: + f.write("#!/bin/ksh"+"\n") + f.write("export OMP_NUM_THREADS="+str(config[model]["omp_num_threads"])+"\n") + f.write(command+"\n") + os.chmod(self.folder+scriptname, 0o755) + + progname="prog_"+model+".sh" + with open(self.folder+progname, "w") as f: + f.write("#!/bin/sh"+"\n") + f.write("(( init = "+str(start_core)+" + $1 ))"+"\n") + f.write("(( index = init * "+str(config[model]["omp_num_threads"])+" ))"+"\n") + f.write("(( slot = index % "+str(config["computer"]["cores_per_node"])+" ))"+"\n") + f.write("echo "+model+" taskset -c $slot-$((slot + "+str(config[model]["omp_num_threads"])+" - 1"+"))"+"\n") + f.write("taskset -c $slot-$((slot + "+str(config[model]["omp_num_threads"])+" - 1)) ./script_"+model+".ksh"+"\n") + os.chmod(self.folder+progname, 0o755) if "execution_command" in config[model]: command = "./" + config[model]["execution_command"] From a3fc629ace009b208fe2d12d29c0cecb00c7baa4 Mon Sep 17 00:00:00 2001 From: Jan Streffing Date: Mon, 15 Mar 2021 14:29:59 +0100 Subject: [PATCH 37/52] removed unused artibute comment --- esm_runscripts/slurm.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/esm_runscripts/slurm.py b/esm_runscripts/slurm.py index f29dff2..4ff1262 100644 --- a/esm_runscripts/slurm.py +++ b/esm_runscripts/slurm.py @@ -16,8 +16,6 @@ class Slurm: ---------- filename : str The filename for srun commands, defaults to ``hostfile_srun`` - hostlist : str - The hostlist for srun commands, defaults to ``hostlist`` path : str Full path to this file, defaults to ``thisrun_scripts_dir / filename`` From a7a2765ff96974cdc4690498f5469d083b02dc1b Mon Sep 17 00:00:00 2001 From: Jan Streffing Date: Mon, 15 Mar 2021 18:09:15 +0100 Subject: [PATCH 38/52] is this what you meant, Paul? --- esm_runscripts/slurm.py | 44 ++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/esm_runscripts/slurm.py b/esm_runscripts/slurm.py index 4ff1262..62db778 100644 --- a/esm_runscripts/slurm.py +++ b/esm_runscripts/slurm.py @@ -29,7 +29,6 @@ def __init__(self, config): folder = config["general"]["thisrun_scripts_dir"] self.filename = "hostfile_srun" self.path = folder + "/" + self.filename - self.folder = folder + "/../work/" @staticmethod def check_if_submitted(): @@ -68,7 +67,7 @@ def calc_requirements_multi_srun(self, config): @staticmethod - def mini_calc_reqs(self,config, model, start_proc, start_core, end_proc, end_core): + def mini_calc_reqs(path, config, model, start_proc, start_core, end_proc, end_core): if "nproc" in config[model]: end_proc = start_proc + int(config[model]["nproc"]) - 1 if "omp_num_threads" in config[model]: @@ -85,25 +84,25 @@ def mini_calc_reqs(self,config, model, start_proc, start_core, end_proc, end_cor else: return start_proc, start_core, end_proc, end_core + scriptfolder = config["general"]["thisrun_scripts_dir"] + "../work/" if config["general"].get("taskset", False): - if "taskset" in config["general"]: - command = "./" + config[model]["execution_command_script"] - scriptname="script_"+model+".ksh" - with open(self.folder+scriptname, "w") as f: - f.write("#!/bin/ksh"+"\n") - f.write("export OMP_NUM_THREADS="+str(config[model]["omp_num_threads"])+"\n") - f.write(command+"\n") - os.chmod(self.folder+scriptname, 0o755) - - progname="prog_"+model+".sh" - with open(self.folder+progname, "w") as f: - f.write("#!/bin/sh"+"\n") - f.write("(( init = "+str(start_core)+" + $1 ))"+"\n") - f.write("(( index = init * "+str(config[model]["omp_num_threads"])+" ))"+"\n") - f.write("(( slot = index % "+str(config["computer"]["cores_per_node"])+" ))"+"\n") - f.write("echo "+model+" taskset -c $slot-$((slot + "+str(config[model]["omp_num_threads"])+" - 1"+"))"+"\n") - f.write("taskset -c $slot-$((slot + "+str(config[model]["omp_num_threads"])+" - 1)) ./script_"+model+".ksh"+"\n") - os.chmod(self.folder+progname, 0o755) + command = "./" + config[model]["execution_command_script"] + scriptname="script_"+model+".ksh" + with open(scriptfolder+scriptname, "w") as f: + f.write("#!/bin/ksh"+"\n") + f.write("export OMP_NUM_THREADS="+str(config[model]["omp_num_threads"])+"\n") + f.write(command+"\n") + os.chmod(scriptfolder+scriptname, 0o755) + + progname="prog_"+model+".sh" + with open(scriptfolder+progname, "w") as f: + f.write("#!/bin/sh"+"\n") + f.write("(( init = "+str(start_core)+" + $1 ))"+"\n") + f.write("(( index = init * "+str(config[model]["omp_num_threads"])+" ))"+"\n") + f.write("(( slot = index % "+str(config["computer"]["cores_per_node"])+" ))"+"\n") + f.write("echo "+model+" taskset -c $slot-$((slot + "+str(config[model]["omp_num_threads"])+" - 1"+"))"+"\n") + f.write("taskset -c $slot-$((slot + "+str(config[model]["omp_num_threads"])+" - 1)) ./script_"+model+".ksh"+"\n") + os.chmod(scriptfolder+progname, 0o755) if "execution_command" in config[model]: command = "./" + config[model]["execution_command"] @@ -112,7 +111,7 @@ def mini_calc_reqs(self,config, model, start_proc, start_core, end_proc, end_cor else: return start_proc, start_core, end_proc, end_core - with open(self.path, "a") as hostfile: + with open(path, "a") as hostfile: hostfile.write(str(start_proc) + "-" + str(end_proc) + " " + command + "\n") start_proc = end_proc + 1 start_core = end_core + 1 @@ -130,8 +129,9 @@ def calc_requirements(self, config): start_core = 0 end_proc = 0 end_core = 0 + path=self.path for model in config["general"]["valid_model_names"]: - start_proc, start_core, end_proc, end_core = self.mini_calc_reqs(self,config, model, start_proc, start_core, end_proc, end_core) + start_proc, start_core, end_proc, end_core = self.mini_calc_reqs(path ,config, model, start_proc, start_core, end_proc, end_core) @staticmethod From c00addeeb84fa65b933997eaef731d007d6a1089 Mon Sep 17 00:00:00 2001 From: Jan Streffing Date: Mon, 15 Mar 2021 18:48:28 +0100 Subject: [PATCH 39/52] doh --- esm_runscripts/slurm.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/esm_runscripts/slurm.py b/esm_runscripts/slurm.py index 62db778..c7436b5 100644 --- a/esm_runscripts/slurm.py +++ b/esm_runscripts/slurm.py @@ -129,9 +129,8 @@ def calc_requirements(self, config): start_core = 0 end_proc = 0 end_core = 0 - path=self.path for model in config["general"]["valid_model_names"]: - start_proc, start_core, end_proc, end_core = self.mini_calc_reqs(path ,config, model, start_proc, start_core, end_proc, end_core) + start_proc, start_core, end_proc, end_core = self.mini_calc_reqs(self.path ,config, model, start_proc, start_core, end_proc, end_core) @staticmethod From 49adcda13fd160751cfd441a26f38f0aa4dea414 Mon Sep 17 00:00:00 2001 From: Miguel Andres-Martinez Date: Wed, 17 Mar 2021 18:14:10 +0100 Subject: [PATCH 40/52] solved bug of the content duplication of the hostfile_srun after several calls of esm_runscripts without cleaning the experiment folder. Call to the mini_calc_reqs method modified in all the necessary lines --- esm_runscripts/slurm.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/esm_runscripts/slurm.py b/esm_runscripts/slurm.py index c7436b5..dcf8def 100644 --- a/esm_runscripts/slurm.py +++ b/esm_runscripts/slurm.py @@ -62,12 +62,15 @@ def calc_requirements_multi_srun(self, config): end_proc = 0 with open(current_hostfile, "w") as hostfile: for model in config['general']['multi_srun'][run_type]['models']: - start_proc, end_proc = self.mini_calc_reqs(config, model, hostfile, start_proc, end_proc) + start_proc, start_core, end_proc, end_core = self.mini_calc_reqs( + config, model, hostfile, + start_proc, start_core, end_proc, end_core + ) config['general']['multi_srun'][run_type]['hostfile'] = os.path.basename(current_hostfile) @staticmethod - def mini_calc_reqs(path, config, model, start_proc, start_core, end_proc, end_core): + def mini_calc_reqs(config, model, hostfile, start_proc, start_core, end_proc, end_core): if "nproc" in config[model]: end_proc = start_proc + int(config[model]["nproc"]) - 1 if "omp_num_threads" in config[model]: @@ -110,11 +113,9 @@ def mini_calc_reqs(path, config, model, start_proc, start_core, end_proc, end_co command = "./" + config[model]["executable"] else: return start_proc, start_core, end_proc, end_core - - with open(path, "a") as hostfile: - hostfile.write(str(start_proc) + "-" + str(end_proc) + " " + command + "\n") - start_proc = end_proc + 1 - start_core = end_core + 1 + hostfile.write(str(start_proc) + "-" + str(end_proc) + " " + command + "\n") + start_proc = end_proc + 1 + start_core = end_core + 1 return start_proc, start_core, end_proc, end_core @@ -127,10 +128,11 @@ def calc_requirements(self, config): return start_proc = 0 start_core = 0 - end_proc = 0 + end_proc = 0 end_core = 0 - for model in config["general"]["valid_model_names"]: - start_proc, start_core, end_proc, end_core = self.mini_calc_reqs(self.path ,config, model, start_proc, start_core, end_proc, end_core) + with open(self.path, "w") as hostfile: + for model in config["general"]["valid_model_names"]: + start_proc, start_core, end_proc, end_core = self.mini_calc_reqs(config, model, hostfile, start_proc, start_core, end_proc, end_core) @staticmethod From 28c1934ad059be20b4fa3ff36301913fcbabd0f2 Mon Sep 17 00:00:00 2001 From: denizural Date: Fri, 19 Mar 2021 15:32:57 +0100 Subject: [PATCH 41/52] some improvements for better readability --- esm_runscripts/compute.py | 10 ++++------ esm_runscripts/filelists.py | 39 +++++++++++++++++++++---------------- esm_runscripts/namelists.py | 11 +++++++---- 3 files changed, 33 insertions(+), 27 deletions(-) diff --git a/esm_runscripts/compute.py b/esm_runscripts/compute.py index 775c04e..58a0b18 100644 --- a/esm_runscripts/compute.py +++ b/esm_runscripts/compute.py @@ -153,9 +153,9 @@ def modify_namelists(config): if config["general"]["verbose"]: six.print_("\n" "- Setting up namelists for this run...") - for model in config["general"]["valid_model_names"]: - six.print_("-" * 80) - six.print_("* %s" % config[model]["model"], "\n") + for index, model in enumerate(config["general"]["valid_model_names"]): + print(f'{index+1}) {config[model]["model"]}') + print() for model in config["general"]["valid_model_names"]: config[model] = Namelist.nmls_load(config[model]) @@ -168,13 +168,12 @@ def modify_namelists(config): ) if config["general"]["verbose"]: - print("end of namelist section") + print("::: end of namelist section\n") return config def copy_files_to_thisrun(config): if config["general"]["verbose"]: - six.print_("=" * 80, "\n") six.print_("PREPARING EXPERIMENT") # Copy files: six.print_("\n" "- File lists populated, proceeding with copy...") @@ -191,7 +190,6 @@ def copy_files_to_thisrun(config): def copy_files_to_work(config): if config["general"]["verbose"]: - six.print_("=" * 80, "\n") six.print_("PREPARING WORK FOLDER") config = copy_files( config, config["general"]["in_filetypes"], source="thisrun", target="work" diff --git a/esm_runscripts/filelists.py b/esm_runscripts/filelists.py index 856ec81..26d3180 100644 --- a/esm_runscripts/filelists.py +++ b/esm_runscripts/filelists.py @@ -416,6 +416,8 @@ def replace_year_placeholder(config): def log_used_files(config): + if config["general"]["verbose"]: + print("\n::: Logging used files") filetypes = config["general"]["relevant_filetypes"] for model in config["general"]["valid_model_names"] + ["general"]: with open( @@ -454,12 +456,11 @@ def log_used_files(config): + config[model][filetype + "_targets"][category] ) if config["general"]["verbose"]: - print( - "- " - + config[model][filetype + "_targets"][category] - + " : " - + config[model][filetype + "_sources"][category] - ) + print() + print((f'- source: ' + f'{config[model][filetype + "_sources"][category]}')) + print((f'- target: ' + f'{config[model][filetype + "_targets"][category]}')) flist.write("\n") flist.write("\n") flist.write(80 * "-") @@ -541,6 +542,8 @@ def resolve_symlinks(file_source): def copy_files(config, filetypes, source, target): + if config["general"]["verbose"]: + print("\n::: Copying files") successful_files = [] missing_files = {} @@ -584,8 +587,9 @@ def copy_files(config, filetypes, source, target): ) file_target = os.path.normpath(targetblock[categ]) if config["general"]["verbose"]: - print(f"source: {file_source}") - print(f" --> target: {file_target}") + print() + print(f"- source: {file_source}") + print(f"- target: {file_target}") if file_source == file_target: if config["general"]["verbose"]: print( @@ -602,7 +606,7 @@ def copy_files(config, filetypes, source, target): # (same as with ``mkdir -p >``) os.makedirs(dest_dir) if not os.path.isfile(file_source): - print(f"File not found: {file_source}...") + print(f"WARNING: File not found: {file_source}") missing_files.update({file_target: file_source}) continue if os.path.isfile(file_target) and filecmp.cmp( @@ -626,27 +630,28 @@ def copy_files(config, filetypes, source, target): if not "files_missing_when_preparing_run" in config["general"]: config["general"]["files_missing_when_preparing_run"] = {} if config["general"]["verbose"]: - six.print_("--- WARNING: These files were missing:") + six.print_("\n\nWARNING: These files were missing:") for missing_file in missing_files: - print(" - " + missing_file + ": " + missing_files[missing_file]) + print(f'- missing source: {missing_files[missing_file]}') + print(f'- missing target: {missing_file}') + print() config["general"]["files_missing_when_preparing_run"].update(missing_files) return config def report_missing_files(config): + # this list is populated by the ``copy_files`` function in filelists.py if "files_missing_when_preparing_run" in config["general"]: config = _check_fesom_missing_files(config) if not config["general"]["files_missing_when_preparing_run"] == {}: - six.print_(80 * "=") print("MISSING FILES:") for missing_file in config["general"]["files_missing_when_preparing_run"]: - print("-- " + missing_file + ": ") - print( - " --> " - + config["general"]["files_missing_when_preparing_run"][missing_file] - ) + print() + print(f'- missing source: {config["general"]["files_missing_when_preparing_run"][missing_file]}') + print(f'- missing target: {missing_file}') if not config["general"]["files_missing_when_preparing_run"] == {}: six.print_(80 * "=") + print() return config diff --git a/esm_runscripts/namelists.py b/esm_runscripts/namelists.py index cec788b..6d16342 100644 --- a/esm_runscripts/namelists.py +++ b/esm_runscripts/namelists.py @@ -207,8 +207,8 @@ def apply_echam_disturbance(config): else: disturbance_file = None if config["general"]["verbose"]: - print( - config["general"]["experiment_scripts_dir"] + print("WARNING: " + + config["general"]["experiment_scripts_dir"] + "/disturb_years.dat", "was not found", ) @@ -288,9 +288,12 @@ def nmls_output(mconfig): for nml_name, nml_obj in six.iteritems(mconfig.get("namelists", {})): all_nmls[nml_name] = nml_obj # PG: or a string representation? for nml_name, nml in all_nmls.items(): - six.print_("Final Contents of ", nml_name, ":") + message = f'\nFinal Contents of {nml_name}:' + six.print_(message) + six.print_(len(message) * '-') nml.write(sys.stdout) - six.print_("\n", 40 * "+ ") + print('-' * 80) + print(f'::: end of the contents of {nml_name}\n') return mconfig From 75ec7503a43f513af2bed965d75c7fa7c4206aab Mon Sep 17 00:00:00 2001 From: Miguel Andres-Martinez Date: Sun, 21 Mar 2021 12:56:59 +0100 Subject: [PATCH 42/52] allows for writing environment files that can be sourced from pre- and postprocessing scripts --- esm_runscripts/batch_system.py | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/esm_runscripts/batch_system.py b/esm_runscripts/batch_system.py index 53925f2..f57592f 100644 --- a/esm_runscripts/batch_system.py +++ b/esm_runscripts/batch_system.py @@ -277,7 +277,7 @@ def write_simple_runscript(config): sadfile.write("\n"+"#Creating hostlist for MPI + MPI&OMP heterogeneous parallel job" + "\n") sadfile.write("rm -f ./hostlist" + "\n") sadfile.write("export SLURM_HOSTFILE=./hostlist" + "\n") - sadfile.write("IFS=$'\\n'; set -f" + "\n") + sadfile.write("IFS=$'\\n'; set -f" + "\n") sadfile.write("listnodes=($(< <( scontrol show hostnames $SLURM_JOB_NODELIST )))"+"\n") sadfile.write("unset IFS; set +f" + "\n") sadfile.write("rank=0" + "\n") @@ -297,8 +297,8 @@ def write_simple_runscript(config): sadfile.write(" host_value=${listnodes[${index_host}]}" + "\n") sadfile.write(" (( slot = current_core % " + str(config["computer"]["cores_per_node"]) +" ))" + "\n") sadfile.write(" echo $host_value >> hostlist" + "\n") - sadfile.write(" (( current_core = current_core + omp_threads_${model} ))" + "\n") - sadfile.write(" done" + "\n") + sadfile.write(" (( current_core = current_core + omp_threads_${model} ))" + "\n") + sadfile.write(" done" + "\n") sadfile.write("done" + "\n\n") for line in commands: sadfile.write(line + "\n") @@ -321,8 +321,29 @@ def write_simple_runscript(config): six.print_("Contents of ", self.bs.filename, ":") with open(self.bs.filename, "r") as fin: print(fin.read()) + + # Write the environment in a file that can be sourced from preprocessing and + # postprocessing scripts + batch_system.write_env(config, environment, sadfilename) + return config + @staticmethod + def write_env(config, environment, sadfilename): + folder = config["general"]["thisrun_scripts_dir"] + this_batch_system = config["computer"] + sadfilename_short = sadfilename.split("/")[-1] + envfilename = folder + "/env.sh" + + with open(envfilename, "w") as envfile: + if "sh_interpreter" in this_batch_system: + envfile.write("#!" + this_batch_system["sh_interpreter"] + "\n") + envfile.write(f"# ENVIRONMENT used in {sadfilename_short}\n") + envfile.write("# Use this file to source the environment in your\n") + envfile.write("# preprocessing or postprocessing scripts\n\n") + for line in environment: + envfile.write(line + "\n") + @staticmethod def submit(config): if not config["general"]["check"]: From 46406fc946069ccd5bac832bf322594855286914 Mon Sep 17 00:00:00 2001 From: denizural Date: Sun, 21 Mar 2021 21:26:19 +0100 Subject: [PATCH 43/52] =?UTF-8?q?Bump=20version:=205.0.16=20=E2=86=92=205.?= =?UTF-8?q?0.17?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- esm_runscripts/__init__.py | 2 +- setup.cfg | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/esm_runscripts/__init__.py b/esm_runscripts/__init__.py index 4012b34..56236d3 100644 --- a/esm_runscripts/__init__.py +++ b/esm_runscripts/__init__.py @@ -2,7 +2,7 @@ __author__ = """Dirk Barbi""" __email__ = 'dirk.barbi@awi.de' -__version__ = "5.0.16" +__version__ = "5.0.17" from .sim_objects import * from .batch_system import * diff --git a/setup.cfg b/setup.cfg index 84c6068..44a2ce0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 5.0.16 +current_version = 5.0.17 commit = True tag = True diff --git a/setup.py b/setup.py index 4a7d473..5f4e096 100644 --- a/setup.py +++ b/setup.py @@ -62,6 +62,6 @@ test_suite='tests', tests_require=test_requirements, url='https://github.com/dbarbi/esm_runscripts', - version="5.0.16", + version="5.0.17", zip_safe=False, ) From 0962432e1ed01d9277ef7ccee7c5c46bc3b90986 Mon Sep 17 00:00:00 2001 From: denizural Date: Mon, 22 Mar 2021 11:32:02 +0100 Subject: [PATCH 44/52] overcommit feature: possibility to use less CPUs than the number of process --- esm_runscripts/batch_system.py | 1 + 1 file changed, 1 insertion(+) diff --git a/esm_runscripts/batch_system.py b/esm_runscripts/batch_system.py index 53925f2..6157e67 100644 --- a/esm_runscripts/batch_system.py +++ b/esm_runscripts/batch_system.py @@ -85,6 +85,7 @@ def get_batch_header(config): "notification_flag", "hyperthreading_flag", "additional_flags", + "overcommit_flag" ] if config["general"]["jobtype"] in ["compute", "tidy_and_resume"]: conditional_flags.append("exclusive_flag") From 794443e511f0ed927b62d8e72441970f55b0cf6d Mon Sep 17 00:00:00 2001 From: Miguel Andres-Martinez Date: Tue, 23 Mar 2021 08:45:52 +0100 Subject: [PATCH 45/52] fix on the SLURM_HOSTFILE variable, now an absolute path --- esm_runscripts/batch_system.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/esm_runscripts/batch_system.py b/esm_runscripts/batch_system.py index 53925f2..1bde18a 100644 --- a/esm_runscripts/batch_system.py +++ b/esm_runscripts/batch_system.py @@ -276,8 +276,8 @@ def write_simple_runscript(config): if config["general"].get("taskset", False): sadfile.write("\n"+"#Creating hostlist for MPI + MPI&OMP heterogeneous parallel job" + "\n") sadfile.write("rm -f ./hostlist" + "\n") - sadfile.write("export SLURM_HOSTFILE=./hostlist" + "\n") - sadfile.write("IFS=$'\\n'; set -f" + "\n") + sadfile.write(f"export SLURM_HOSTFILE={config['general']['thisrun_work_dir']}/hostlist\n") + sadfile.write("IFS=$'\\n'; set -f" + "\n") sadfile.write("listnodes=($(< <( scontrol show hostnames $SLURM_JOB_NODELIST )))"+"\n") sadfile.write("unset IFS; set +f" + "\n") sadfile.write("rank=0" + "\n") @@ -297,8 +297,8 @@ def write_simple_runscript(config): sadfile.write(" host_value=${listnodes[${index_host}]}" + "\n") sadfile.write(" (( slot = current_core % " + str(config["computer"]["cores_per_node"]) +" ))" + "\n") sadfile.write(" echo $host_value >> hostlist" + "\n") - sadfile.write(" (( current_core = current_core + omp_threads_${model} ))" + "\n") - sadfile.write(" done" + "\n") + sadfile.write(" (( current_core = current_core + omp_threads_${model} ))" + "\n") + sadfile.write(" done" + "\n") sadfile.write("done" + "\n\n") for line in commands: sadfile.write(line + "\n") From ec809a2a590a9bd089be6a7c75aed8a1b40740b4 Mon Sep 17 00:00:00 2001 From: Miguel Andres-Martinez Date: Wed, 31 Mar 2021 17:09:11 +0200 Subject: [PATCH 46/52] fixes to the venv editable installs --- esm_runscripts/virtual_env_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esm_runscripts/virtual_env_builder.py b/esm_runscripts/virtual_env_builder.py index 51007c6..92343f7 100644 --- a/esm_runscripts/virtual_env_builder.py +++ b/esm_runscripts/virtual_env_builder.py @@ -140,7 +140,7 @@ def _install_tools_general(venv_context, config, deps=True): subprocess.check_call(f"git clone --quiet {branch_command} {url} {src_dir}", shell=True) # Carry out the editable installation (with or without dependencies) _run_bin_in_venv(venv_context, ["pip", "install", "-q", f"--find-links={os.environ.get('HOME')}/.cache/pip/wheels", "-e", src_dir] + no_deps_flag) - _run_bin_in_venv(venv_context, ["pip", "wheel", "-q", f"--wheel-dir={os.environ.get('HOME')}/.cache/pip/wheels", src_dir]) + #_run_bin_in_venv(venv_context, ["pip", "wheel", "-q", f"--wheel-dir={os.environ.get('HOME')}/.cache/pip/wheels", src_dir]) # If the package is not editable then do a standard installation. # Note: this step only runs with the `--no-deps` flag if the user has specified # a branch, as this flags means also that is the second time passing through From 0c4b0288f5d5301d34cad1d33f7558040786337b Mon Sep 17 00:00:00 2001 From: Miguel Andres-Martinez Date: Fri, 2 Apr 2021 18:44:27 +0200 Subject: [PATCH 47/52] fixes to the wheel calls for the virtual environment --- esm_runscripts/virtual_env_builder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/esm_runscripts/virtual_env_builder.py b/esm_runscripts/virtual_env_builder.py index 92343f7..7ab3944 100644 --- a/esm_runscripts/virtual_env_builder.py +++ b/esm_runscripts/virtual_env_builder.py @@ -140,7 +140,7 @@ def _install_tools_general(venv_context, config, deps=True): subprocess.check_call(f"git clone --quiet {branch_command} {url} {src_dir}", shell=True) # Carry out the editable installation (with or without dependencies) _run_bin_in_venv(venv_context, ["pip", "install", "-q", f"--find-links={os.environ.get('HOME')}/.cache/pip/wheels", "-e", src_dir] + no_deps_flag) - #_run_bin_in_venv(venv_context, ["pip", "wheel", "-q", f"--wheel-dir={os.environ.get('HOME')}/.cache/pip/wheels", src_dir]) + _run_bin_in_venv(venv_context, ["pip", "wheel", "-q", f"--wheel-dir={os.environ.get('HOME')}/.cache/pip/wheels", "-e", src_dir] + no_deps_flag) # If the package is not editable then do a standard installation. # Note: this step only runs with the `--no-deps` flag if the user has specified # a branch, as this flags means also that is the second time passing through @@ -152,7 +152,7 @@ def _install_tools_general(venv_context, config, deps=True): url += f"@{user_wants_branch}" # NOTE(PG): We need the -U flag to ensure the branch is actually installed. _run_bin_in_venv(venv_context, ["pip", "install", '-q', f"--find-links={os.environ.get('HOME')}/.cache/pip/wheels", "-U", url] + no_deps_flag) - _run_bin_in_venv(venv_context, ["pip", "wheel", '-q', f"--wheel-dir={os.environ.get('HOME')}/.cache/pip/wheels", url]) + _run_bin_in_venv(venv_context, ["pip", "wheel", '-q', f"--wheel-dir={os.environ.get('HOME')}/.cache/pip/wheels", url] + no_deps_flag) def _install_required_plugins(venv_context, config): required_plugins = [] From cb3630792d6e9648939ecbaf59a2595768d378ca Mon Sep 17 00:00:00 2001 From: denizural Date: Mon, 12 Apr 2021 14:32:46 +0200 Subject: [PATCH 48/52] HIST scenario is working at least for T63 --- esm_runscripts/filelists.py | 218 +++++++++++++++++++++++++++--------- 1 file changed, 162 insertions(+), 56 deletions(-) diff --git a/esm_runscripts/filelists.py b/esm_runscripts/filelists.py index b4620c0..ffeef58 100644 --- a/esm_runscripts/filelists.py +++ b/esm_runscripts/filelists.py @@ -355,6 +355,20 @@ def assemble_intermediate_files_and_finalize_targets(config): return config +def find_valid_year(config, year): + for entry in config: + min_val = -50000000000 + max_val = 500000000000 + + from_info = float(config[entry].get("from", min_val)) + to_info = float(config[entry].get("to", max_val)) + + if from_info <= year <= to_info: + return entry + # if the current model year is out of the valid bounds, report and exit + print(f"Sorry, no entry found for year {year} in config {config}") + sys.exit(-1) + def replace_year_placeholder(config): for filetype in config["general"]["all_model_filetypes"]: for model in config["general"]["valid_model_names"] + ["general"]: @@ -364,73 +378,165 @@ def replace_year_placeholder(config): filetype + "_additional_information" ]: if file_category in config[model][filetype + "_targets"]: + + all_years = [config["general"]["current_date"].year] + + if ( + "need_timestep_before" + in config[model][ + filetype + "_additional_information" + ][file_category] + ): + all_years.append( + config["general"]["prev_date"].year + ) + if ( + "need_timestep_after" + in config[model][ + filetype + "_additional_information" + ][file_category] + ): + all_years.append( + config["general"]["next_date"].year + ) + if ( + "need_year_before" + in config[model][ + filetype + "_additional_information" + ][file_category] + ): + all_years.append( + config["general"]["current_date"].year - 1 + ) if ( - "@YEAR@" + "need_year_after" + in config[model][ + filetype + "_additional_information" + ][file_category] + ): + all_years.append( + config["general"]["current_date"].year + 1 + ) + + all_years = list( + dict.fromkeys(all_years) + ) # removes duplicates + + # loop over all years (including year_before & after) + # change replace the @YEAR@ variable with the + # corresponding year + for year in all_years: + new_category = file_category + "_year_" + str(year) + + # if the source contains 'from' or 'to' information + # then they have a dict type + if type(config[model][filetype + "_sources"][file_category]) == dict: + + # process the 'from' and 'to' information in + # file sources and targets + config[model][filetype + "_sources"][new_category] = \ + find_valid_year( + config[model][filetype + "_sources"][file_category], + year + ) + + config[model][filetype + "_targets"][new_category] = \ + config[model][filetype + "_targets"][file_category] + + if ( + "@YEAR@" + in config[model][filetype + "_targets"][new_category] + ): + new_target_name = config[model][ + filetype + "_targets" + ][new_category].replace("@YEAR@", str(year)) + + config[model][filetype + "_targets"][ + new_category + ] = new_target_name + + if ( + "@YEAR@" + in config[model][filetype + "_sources"][new_category] + ): + new_source_name = config[model][ + filetype + "_sources" + ][new_category].replace("@YEAR@", str(year)) + + config[model][filetype + "_sources"][ + new_category + ] = new_source_name + # end of the for year loop + + # deniz: new additions for @YEAR_1850@ + # these are the Kinne aerosol files for the background + # aerosol concentration. They are needed for years + # 1849, 1850, and 1851. All these 3 files are the same + # and ECHAM needs them + if ("@YEAR_1850@" in config[model][filetype + "_targets"][file_category] ): - all_years = [config["general"]["current_date"].year] - - if ( - "need_timestep_before" - in config[model][ - filetype + "_additional_information" - ][file_category] - ): - all_years.append( - config["general"]["prev_date"].year - ) - if ( - "need_timestep_after" - in config[model][ - filetype + "_additional_information" - ][file_category] - ): - all_years.append( - config["general"]["next_date"].year - ) - if ( - "need_year_before" - in config[model][ - filetype + "_additional_information" - ][file_category] - ): - all_years.append( - config["general"]["current_date"].year - 1 - ) - if ( - "need_year_after" - in config[model][ - filetype + "_additional_information" - ][file_category] - ): - all_years.append( - config["general"]["current_date"].year + 1 - ) - - all_years = list( - dict.fromkeys(all_years) - ) # removes duplicates - - for year in all_years: - + # only target name is changed since source file is for a fixed year (1850) + for year in [1849, 1850, 1851]: new_category = file_category + "_year_" + str(year) + + # add the sources and targets to the config + config[model][filetype + "_sources"][new_category] = \ + config[model][filetype + "_sources"][file_category] + + config[model][filetype + "_targets"][new_category] = \ + config[model][filetype + "_targets"][file_category] + + # construct the file target and add this to the config new_target_name = config[model][ filetype + "_targets" - ][file_category].replace("@YEAR@", str(year)) - new_source_name = config[model][ - filetype + "_sources" - ][file_category].replace("@YEAR@", str(year)) - + ][new_category].replace("@YEAR_1850@", str(year)) + config[model][filetype + "_targets"][ new_category ] = new_target_name - config[model][filetype + "_sources"][ - new_category - ] = new_source_name - del config[model][filetype + "_targets"][file_category] - del config[model][filetype + "_sources"][file_category] + del config[model][filetype + "_sources"][file_category] + del config[model][filetype + "_targets"][file_category] + # end of if additonal information + + year = config["general"]["current_date"].year + + for file_category in config[model][filetype + "_targets"]: + + if type(config[model][filetype + "_sources"][file_category]) == dict: + config[model][filetype + "_sources"][file_category] = \ + find_valid_year( + config[model][filetype + "_sources"][file_category], + year + ) + if ( + "@YEAR@" + in config[model][filetype + "_targets"][file_category] + ): + new_target_name = config[model][ + filetype + "_targets" + ][file_category].replace("@YEAR@", str(year)) + + config[model][filetype + "_targets"][ + file_category + ] = new_target_name + if ( + "@YEAR@" + in config[model][filetype + "_sources"][file_category] + ): + new_source_name = config[model][ + filetype + "_sources" + ][file_category].replace("@YEAR@", str(year)) + + config[model][filetype + "_sources"][ + file_category + ] = new_source_name + + # end of if filetype in target + # end of model loop + # end of filetype loop return config From cda27a3e90e3f1e1c1bcb5de53b6632b4844ee40 Mon Sep 17 00:00:00 2001 From: denizural Date: Wed, 14 Jul 2021 08:45:35 +0200 Subject: [PATCH 49/52] bugfix for the non-dict variables containing @YEAR@ --- esm_runscripts/filelists.py | 41 +++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/esm_runscripts/filelists.py b/esm_runscripts/filelists.py index ffeef58..562a505 100644 --- a/esm_runscripts/filelists.py +++ b/esm_runscripts/filelists.py @@ -443,6 +443,7 @@ def replace_year_placeholder(config): config[model][filetype + "_targets"][new_category] = \ config[model][filetype + "_targets"][file_category] + # replace @YEAR@ in the targets if ( "@YEAR@" in config[model][filetype + "_targets"][new_category] @@ -455,6 +456,7 @@ def replace_year_placeholder(config): new_category ] = new_target_name + # replace @YEAR@ in the sources if ( "@YEAR@" in config[model][filetype + "_sources"][new_category] @@ -466,6 +468,45 @@ def replace_year_placeholder(config): config[model][filetype + "_sources"][ new_category ] = new_source_name + + # value is not a dictionary. Ie. it does not + # have `from` or `to` attributes. This else + # block preserves these sections in the config. + else: + # create `new_category` from `file_category` + config[model][filetype + "_sources"][new_category] = \ + config[model][filetype + "_sources"][file_category] + + config[model][filetype + "_targets"][new_category] = \ + config[model][filetype + "_targets"][file_category] + + # replace @YEAR@ in the targets + if ( + "@YEAR@" + in config[model][filetype + "_targets"][new_category] + ): + new_target_name = config[model][ + filetype + "_targets" + ][new_category].replace("@YEAR@", str(year)) + + config[model][filetype + "_targets"][ + new_category + ] = new_target_name + + # replace @YEAR@ in the sources + if ( + "@YEAR@" + in config[model][filetype + "_sources"][new_category] + ): + new_source_name = config[model][ + filetype + "_sources" + ][new_category].replace("@YEAR@", str(year)) + + config[model][filetype + "_sources"][ + new_category + ] = new_source_name + + # end if # end of the for year loop # deniz: new additions for @YEAR_1850@ From be54dbd11d881bc6a77a4e3fb610215d53eca6e2 Mon Sep 17 00:00:00 2001 From: denizural Date: Mon, 19 Jul 2021 06:15:51 +0200 Subject: [PATCH 50/52] added support for need_2years_before & after variables --- esm_runscripts/filelists.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/esm_runscripts/filelists.py b/esm_runscripts/filelists.py index 562a505..3b9c8d4 100644 --- a/esm_runscripts/filelists.py +++ b/esm_runscripts/filelists.py @@ -417,6 +417,14 @@ def replace_year_placeholder(config): all_years.append( config["general"]["current_date"].year + 1 ) + + + if "need_2years_before" in config[model][filetype + "_additional_information"][file_category]: + all_years.append(config["general"]["current_date"].year - 2) + + if "need_2years_after" in config[model][filetype + "_additional_information" ][file_category]: + all_years.append(config["general"]["current_date"].year + 2) + all_years = list( dict.fromkeys(all_years) From 5389c6d451ddb016a0f6d5f6200f466ad32b956e Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Wed, 28 Jul 2021 10:00:33 +0200 Subject: [PATCH 51/52] feat(namelist): allows user to override streams by checking what is defined in the namelist --- esm_runscripts/compute.py | 3 +++ esm_runscripts/namelists.py | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/esm_runscripts/compute.py b/esm_runscripts/compute.py index afae02b..26e6b6e 100644 --- a/esm_runscripts/compute.py +++ b/esm_runscripts/compute.py @@ -163,6 +163,9 @@ def modify_namelists(config): config[model] = Namelist.nmls_remove(config[model]) if model == "echam": config = Namelist.apply_echam_disturbance(config) + # NOTE(PG): This really doesn't belong in modify namelists, but, + # what the hell... + config = Namelist.echam_determine_streams_from_nml(config) config[model] = Namelist.nmls_modify(config[model]) config[model] = Namelist.nmls_finalize( config[model], config["general"]["verbose"] diff --git a/esm_runscripts/namelists.py b/esm_runscripts/namelists.py index 6d16342..ba1a175 100644 --- a/esm_runscripts/namelists.py +++ b/esm_runscripts/namelists.py @@ -235,6 +235,24 @@ def apply_echam_disturbance(config): ) return config + @staticmethod + def echam_determine_streams_from_nml(config): + if "echam" in config["general"]["valid_model_names"]: + nml = config["echam"]["namelists"]["namelist.echam"] + mvstreams = nml["mvstreamctl"] + mvstreams_tags = [nml["filetag"] for nml in mvstreams] + # NOTE(PG): There may still be warnings about missing files -- we + # still need to implement an "allowed missing files" feature, but + # this should now put all the streams away correctly. + if not config["echam"].get("override_streams_from_namelist", False): + config["echam"]["streams"] += mvstreams_tags + else: + # NOTE(PG): I honestly am not sure if this will work, maybe the + # restart will get messed up horribly. This just overrides + # whatever was there in the default. It might be dangerous. + config["echam"]["streams"] = mvstreams_tags + return config + @staticmethod def nmls_finalize(mconfig, verbose): """ From 51d71e650e97b1c4a8ef1a2eaa49c37a8a5f7a47 Mon Sep 17 00:00:00 2001 From: chrisdane Date: Tue, 12 Oct 2021 16:42:18 +0200 Subject: [PATCH 52/52] Revert "feat(namelist): allows user to override streams by checking what is defined in the namelist" This reverts commit 5389c6d451ddb016a0f6d5f6200f466ad32b956e. accidentally pushed --- esm_runscripts/compute.py | 3 --- esm_runscripts/namelists.py | 18 ------------------ 2 files changed, 21 deletions(-) diff --git a/esm_runscripts/compute.py b/esm_runscripts/compute.py index 26e6b6e..afae02b 100644 --- a/esm_runscripts/compute.py +++ b/esm_runscripts/compute.py @@ -163,9 +163,6 @@ def modify_namelists(config): config[model] = Namelist.nmls_remove(config[model]) if model == "echam": config = Namelist.apply_echam_disturbance(config) - # NOTE(PG): This really doesn't belong in modify namelists, but, - # what the hell... - config = Namelist.echam_determine_streams_from_nml(config) config[model] = Namelist.nmls_modify(config[model]) config[model] = Namelist.nmls_finalize( config[model], config["general"]["verbose"] diff --git a/esm_runscripts/namelists.py b/esm_runscripts/namelists.py index ba1a175..6d16342 100644 --- a/esm_runscripts/namelists.py +++ b/esm_runscripts/namelists.py @@ -235,24 +235,6 @@ def apply_echam_disturbance(config): ) return config - @staticmethod - def echam_determine_streams_from_nml(config): - if "echam" in config["general"]["valid_model_names"]: - nml = config["echam"]["namelists"]["namelist.echam"] - mvstreams = nml["mvstreamctl"] - mvstreams_tags = [nml["filetag"] for nml in mvstreams] - # NOTE(PG): There may still be warnings about missing files -- we - # still need to implement an "allowed missing files" feature, but - # this should now put all the streams away correctly. - if not config["echam"].get("override_streams_from_namelist", False): - config["echam"]["streams"] += mvstreams_tags - else: - # NOTE(PG): I honestly am not sure if this will work, maybe the - # restart will get messed up horribly. This just overrides - # whatever was there in the default. It might be dangerous. - config["echam"]["streams"] = mvstreams_tags - return config - @staticmethod def nmls_finalize(mconfig, verbose): """