From 4d040636f9a0f17c591a9e741dd54d228d53b509 Mon Sep 17 00:00:00 2001 From: SJaffa Date: Thu, 8 May 2025 15:11:45 +0100 Subject: [PATCH 01/10] simple bits --- docs/source/conf.py | 5 ++++- docs/source/reference/config_file.rst | 2 ++ docs/source/reference/index.rst | 12 ++++++++++++ .../reference/template_components/task_schemas.rst | 2 ++ 4 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 docs/source/reference/index.rst diff --git a/docs/source/conf.py b/docs/source/conf.py index 9863126..71fbcab 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -226,7 +226,9 @@ def prepare_task_schema_action_info(app: BaseApp): # distribution name (i.e. name on PyPI): with open("../../pyproject.toml") as fp: - dist_name = tomlkit.load(fp)["tool"]["poetry"]["name"] + pyproject_config = tomlkit.load(fp) + dist_name = pyproject_config["tool"]["poetry"]["name"] + supported_python = pyproject_config["tool"]["poetry"]["dependencies"]["python"] extensions = [ "sphinx.ext.autodoc", @@ -274,6 +276,7 @@ def prepare_task_schema_action_info(app: BaseApp): "download_links_table_html": generate_download_links_table(), "github_user": github_user, "github_repo": github_repo, + "supported_python": supported_python, } } diff --git a/docs/source/reference/config_file.rst b/docs/source/reference/config_file.rst index 88912db..4915c1a 100644 --- a/docs/source/reference/config_file.rst +++ b/docs/source/reference/config_file.rst @@ -3,6 +3,8 @@ Configuration file Configurable settings that can be set within the configuration file are listed here. +.. _default_config: + Default config file ~~~~~~~~~~~~~~~~~~~ diff --git a/docs/source/reference/index.rst b/docs/source/reference/index.rst new file mode 100644 index 0000000..c5ab46a --- /dev/null +++ b/docs/source/reference/index.rst @@ -0,0 +1,12 @@ +Reference +========= + +.. toctree:: + :maxdepth: 2 + + Python API + Command-line interface + Template components + Demo workflows + Configuration file + Glossary \ No newline at end of file diff --git a/docs/source/reference/template_components/task_schemas.rst b/docs/source/reference/template_components/task_schemas.rst index 6e0a7ea..24f600d 100644 --- a/docs/source/reference/template_components/task_schemas.rst +++ b/docs/source/reference/template_components/task_schemas.rst @@ -1,3 +1,5 @@ +.. _task_schemas: + Task schemas ============ From 00a2ac67fda6f9bfd30999256d0372d002a0dece Mon Sep 17 00:00:00 2001 From: SJaffa Date: Thu, 8 May 2025 15:15:22 +0100 Subject: [PATCH 02/10] add common glossary terms --- docs/source/reference/glossary.rst | 89 +++++++++++++++++++++++++++++- 1 file changed, 88 insertions(+), 1 deletion(-) diff --git a/docs/source/reference/glossary.rst b/docs/source/reference/glossary.rst index 8c7c089..626228e 100644 --- a/docs/source/reference/glossary.rst +++ b/docs/source/reference/glossary.rst @@ -1,7 +1,94 @@ Glossary ======== -workflow +API +--- + +Application programming interface. {{ app_name }}'s API allows us to design and execute workflows from within a Python interpreter or Jupyter notebook. + +.. _def_command_files: + +Command files +------------- +If you want to refer to any files that are used as inputs or output, +they should be listed under ``command_files`` in the workflow file + +.. code-block:: console + + command_files: + - label: new_inp_file + name: + name: friction_conductance.inp + + +CLI +--- + +Command line interface. The CLI is typically how we interact with {{ app_name }} on HPC systems. + +cluster +------- + +See HPC + + +Environment/virtual environment +------------------------------- + +An environment is an isolated set of installed software. +Using environments allows you to have multiple copies of the same software installed in different environments so you can run different versions, or to run two pieces of software with competing dependencies on the same machine. +Using and sharing environments helps make your work reproducible because someone can use the same environment on a different machine and be sure they have the same versions of everything. + + +HPC +--- + +High-performance computer/computing + +HPCFlow +------- + +{{ app_description }} + +jobscript +--------- + +A job submission script that is used to queue a job on a batch scheduler system, such as SLURM or SGE. +Jobscripts are generated by {{ app_name }} during workflow submission. + +.. _def_task: + +Tasks +------------- +These are actual usages of a :ref:`task schema `, run with defined inputs. + +.. _def_task_schema: + +Task schema +------------- +This is a template for a task you want to run, +with definitions of the input and outputs that are expected. + +Matflow has many :ref:`built-in task schemas `, but you may want to +write your own. + +.. _def_workflow: + +Workflow -------- A pipeline that processes data in some way. +A workflow is a list of tasks that run one after the other. + + +.. _def_workflow_template: + +Workflow template +------------------ + +A workflow template parameterises a workflow, +providing the required input values for the task schemas of the workflow. +However, it doesn't actually run the :ref:`workflow `. +A workflow template is usually just the list of tasks, +but can optionally include matflow environment, +the :ref:`task schemas `, and the :ref:`command files `. From 3a130b7ef1b6ded0919ce8c26c7a2394e5c4cb41 Mon Sep 17 00:00:00 2001 From: SJaffa Date: Thu, 8 May 2025 15:17:12 +0100 Subject: [PATCH 03/10] remove missed specifics --- docs/source/reference/glossary.rst | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/docs/source/reference/glossary.rst b/docs/source/reference/glossary.rst index 626228e..2587ff5 100644 --- a/docs/source/reference/glossary.rst +++ b/docs/source/reference/glossary.rst @@ -45,10 +45,6 @@ HPC High-performance computer/computing -HPCFlow -------- - -{{ app_description }} jobscript --------- @@ -69,8 +65,7 @@ Task schema This is a template for a task you want to run, with definitions of the input and outputs that are expected. -Matflow has many :ref:`built-in task schemas `, but you may want to -write your own. +{{ app_name }} has many :ref:`built-in task schemas `, but you may want to write your own. .. _def_workflow: @@ -90,5 +85,5 @@ A workflow template parameterises a workflow, providing the required input values for the task schemas of the workflow. However, it doesn't actually run the :ref:`workflow `. A workflow template is usually just the list of tasks, -but can optionally include matflow environment, +but can optionally include {{ app_name }} environment, the :ref:`task schemas `, and the :ref:`command files `. From 0cd36a5f09d447ae7e6857502ef5bf920d5c7986 Mon Sep 17 00:00:00 2001 From: SJaffa Date: Thu, 8 May 2025 15:26:21 +0100 Subject: [PATCH 04/10] merge installation differences --- docs/source/alternative_install.rst | 71 ++++++++++++ docs/source/installation.rst | 171 ++++++++++++++++------------ 2 files changed, 171 insertions(+), 71 deletions(-) create mode 100644 docs/source/alternative_install.rst diff --git a/docs/source/alternative_install.rst b/docs/source/alternative_install.rst new file mode 100644 index 0000000..c914c98 --- /dev/null +++ b/docs/source/alternative_install.rst @@ -0,0 +1,71 @@ +:orphan: + +.. _alternative_install: + +.. jinja:: first_ctx + + ################################# + Alternative installation methods + ################################# + + ****************** + {{ app_name }} CLI + ****************** + + The {{ app_name }} CLI can be installed on Linux, macOS, and Windows through a terminal + or shell prompt: + + .. tab-set:: + + .. tab-item:: Linux/macOS + + Open a terminal, paste the command shown below and press enter. + + .. code-block:: bash + + (touch tmp.sh && curl -fsSL https://raw.githubusercontent.com/hpcflow/install-scripts/main/src/install-{{ app_package_name }}.sh > tmp.sh && bash tmp.sh --prerelease --path --univlink) ; rm tmp.sh + + .. tab-item:: Windows + + Open a Powershell terminal, paste the command shown below and press enter. + + .. code-block:: powershell + + & $([scriptblock]::Create((New-Object Net.WebClient).DownloadString('https://raw.githubusercontent.com/hpcflow/install-scripts/main/src/install-{{ app_package_name }}.ps1'))) -PreRelease -UnivLink + + .. admonition:: What does this script do? + :class: note dropdown + + The above command downloads a script from the {{ app_name }} GitHub repository and runs it. The script does the following: + + #. It downloads the latest prerelease version of {{ app_name }} zip archived in a single folder. + #. The archive is extracted and the folder placed in an accessible location. The location depends on the operating system. In Linux it is ``/.local/share/{{ app_package_name }}``. In macOS it is ``~/Library/Application Support/{{ app_package_name }}``. In Windows it is ``Username\AppData\Local\{{ app_package_name }}``. + #. A symbolic link (Linux/macOS) or an alias pointing to the file is created. This allows {{ app_name }} to be run by entering a simple command. + #. A command is added to ``.bashrc``/``.zshrc`` (linux/macOS) or the Powershell profile (Windows) that allows {{ app_name }} to be run from any folder. + + If the script detects that the version of {{ app_name }} it is trying to install is already there, it will stop + running and exit. + + + + .. hint:: + + If you are installing {{ app_name }} on an HPC resource, check that you can connect + to the internet first. You might need to load a proxy module, for example. + + + ******************************** + Download CLI binaries (advanced) + ******************************** + + Binaries are available in two formats, corresponding to the two different formats that + PyInstaller `can generate `_: + + * A single executable file containing everything. + * A folder containing an executable and supporting files. + + Click below to download the {{ app_name }} binary for your platform: + + .. raw:: html + + {{ download_links_table_html }} diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 65bac8f..6d6bb01 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -22,82 +22,15 @@ The CLI and the Python package can be used simultaneously. - ****************** - {{ app_name }} CLI - ****************** - - The {{ app_name }} CLI can be installed on Linux, macOS, and Windows through a terminal - or shell prompt: - - .. tab-set:: - - .. tab-item:: Linux/macOS - - Open a terminal, paste the command shown below and press enter. - - .. code-block:: bash - - (touch tmp.sh && curl -fsSL https://raw.githubusercontent.com/hpcflow/install-scripts/main/src/install-{{ app_package_name }}.sh > tmp.sh && bash tmp.sh --prerelease --path --univlink) ; rm tmp.sh - - .. tab-item:: Windows - - Open a Powershell terminal, paste the command shown below and press enter. - - .. code-block:: powershell - - & $([scriptblock]::Create((New-Object Net.WebClient).DownloadString('https://raw.githubusercontent.com/hpcflow/install-scripts/main/src/install-{{ app_package_name }}.ps1'))) -PreRelease -UnivLink - - .. admonition:: What does this script do? - :class: note dropdown - - The above command downloads a script from the {{ app_name }} GitHub repository and runs it. The script does the following: - - #. It downloads the latest prerelease version of {{ app_name }} zip archived in a single folder. - #. The archive is extracted and the folder placed in an accessible location. The location depends on the operating system. In Linux it is ``/.local/share/{{ app_package_name }}``. In macOS it is ``~/Library/Application Support/{{ app_package_name }}``. In Windows it is ``Username\AppData\Local\{{ app_package_name }}``. - #. A symbolic link (Linux/macOS) or an alias pointing to the file is created. This allows {{ app_name }} to be run by entering a simple command. - #. A command is added to ``.bashrc``/``.zshrc`` (linux/macOS) or the Powershell profile (Windows) that allows {{ app_name }} to be run from any folder. - - If the script detects that the version of {{ app_name }} it is trying to install is already there, it will stop - running and exit. - - - - .. hint:: - - If you are installing {{ app_name }} on an HPC resource, check that you can connect - to the internet first. You might need to load a proxy module, for example. - - ***************************** - {{ app_name }} Python package - ***************************** - Using pip ========================== - Use pip to install the Python package from PyPI:: - - pip install {{ dist_name }}=="{{ app_version }}" - - Using conda - =========== + The recommended way to install {{ app_name }} is to + use pip to install the Python package from PyPI:: - Coming soon! + pip install {{ dist_name }} - ******************************** - Download CLI binaries (advanced) - ******************************** - - Binaries are available in two formats, corresponding to the two different formats that - PyInstaller `can generate `_: - - * A single executable file containing everything. - * A folder containing an executable and supporting files. - - Click below to download the {{ app_name }} binary for your platform: - - .. raw:: html - - {{ download_links_table_html }} + This installs the python package, which also gives the CLI version of {{ app_name }}. ************* Release notes @@ -105,3 +38,99 @@ Release notes for this version ({{app_version}}) are `available on GitHub `_. Use the version switcher in the top-right corner of the page to download/install other versions. + + Alternative installation methods + ================================ + Although *not currently recommended*, + advanced users may wish to use one of the :ref:`alternative installation methods `. + + + ############# + Configuration + ############# + + {{ app_name }} uses a config file to control details of how it executes workflows. + A :ref:`default config file ` will be created the first time you submit a workflow. + This will work without modification on a personal machine, + however if you are using {{ app_name }} on HPC you will likely need to make some + modifications to describe the job scheduler, and settings for multiple cores, + and to point to your {{ app_name }} environments file. + + `Some examples `_ are given + for the University of Manchester's CSF. + + The path to your config file can be found using ``{{ app_module }} manage get-config-path``, + or to open the config file directly, use ``{{ app_module }} open config``. + + ############# + Environments + ############# + + {{ app_name }} has the concept of environments, similar to python virtual environments. + These are required so that tasks can run using the specific software they require. + Your {{ app_name }} environments must be defined in your environments (YAML) file before {{ app_name }} + can run workflows, and this environment file must be pointed to in the config file + via the ``environment_sources`` key. + Once this has been done, + your environment file can be be opened using ``{{ app_module }} open env-source``. + + Below is an example environments file that defines environment for some commonly used software. + This is not a complete list of all the software that can be used with {{ app_name }}, + and domain-specific tools can be added to the environments file as required. + + You may wish to use this as a tempalte and modify it for your own computer, + in particular the ``setup`` sections for each environment. + + .. code-block:: yaml + + - name: matlab_env + setup: | + module load apps/binapps/matlab/R2019a + module load apps/binapps/matlab/third-party-toolboxes/mtex/5.3 + executables: + + - label: compile_mtex + instances: + - command: compile-mtex <> <> + num_cores: 1 + parallel_mode: null + + - label: run_compiled_mtex + instances: + - command: ./run_<>.sh $MATLAB_HOME <> + num_cores: 1 + parallel_mode: null + + - label: run_mtex + instances: + - command: matlab -singleCompThread -batch "<> <>" + num_cores: 1 + parallel_mode: null + - command: matlab -batch "<> <>" + num_cores: + start: 2 + stop: 16 + parallel_mode: null + + - name: python_env + executables: + - label: python_script + instances: + - command: python <> <> + num_cores: + start: 1 + stop: 32 + parallel_mode: null + + - name: dream_3D_env + executables: + - label: dream_3D_runner + instances: + - command: /full/path/to/dream3d/DREAM3D-6.5.171-Linux-x86_64/bin/PipelineRunner + num_cores: 1 + parallel_mode: null + - label: python_script + instances: + - command: python <> <> + num_cores: 1 + parallel_mode: null From be4c79379ac4ec578c910a601cc58fc28e2cbff8 Mon Sep 17 00:00:00 2001 From: SJaffa Date: Thu, 8 May 2025 16:13:01 +0100 Subject: [PATCH 05/10] merge user docs changes --- .../advanced_workflow_concepts.rst | 309 +++++++++++++++ .../custom_task_schemas_and_workflows.rst | 362 ++++++++++++++++++ docs/source/user/getting_started/errors.rst | 30 ++ docs/source/user/getting_started/index.rst | 13 + .../getting_started/running_workflows.rst | 68 ++++ docs/source/user/how_to/config.rst | 102 +++-- docs/source/user/how_to/index.rst | 1 + docs/source/user/how_to/resources.rst | 72 ++++ docs/source/user/index.rst | 3 +- 9 files changed, 917 insertions(+), 43 deletions(-) create mode 100644 docs/source/user/getting_started/advanced_workflow_concepts.rst create mode 100644 docs/source/user/getting_started/custom_task_schemas_and_workflows.rst create mode 100644 docs/source/user/getting_started/errors.rst create mode 100644 docs/source/user/getting_started/index.rst create mode 100644 docs/source/user/getting_started/running_workflows.rst create mode 100644 docs/source/user/how_to/resources.rst diff --git a/docs/source/user/getting_started/advanced_workflow_concepts.rst b/docs/source/user/getting_started/advanced_workflow_concepts.rst new file mode 100644 index 0000000..9beaa9b --- /dev/null +++ b/docs/source/user/getting_started/advanced_workflow_concepts.rst @@ -0,0 +1,309 @@ +.. jinja:: first_ctx + +Advanced workflow concepts +########################### + +Resources +---------- + +Requesting resources can be done using a ``resources`` block, either for the whole workflow at the top level, + +.. code-block:: yaml + + resources: + any: + scheduler: sge # Setting the scheduler is not normally needed because a + # `default_scheduler` will be set in the config file. + scheduler_args: + shebang_args: --login + options: + -l: short + + +or at the task level + +.. code-block:: yaml + + - schema: simulate_VE_loading_damask + resources: + any: + # This will use two cores for input file generators and output file parsers + num_cores: 2 + main: + # Use 16 cores for the "main" part of the task (the simulation in this case) + num_cores: 16 + inputs: + ... + + +We can see from above that it is possible to request resources for subsets of the actions +in a task schema. A full list of the different options you can select resources for is given below. + +- ``input_file_generator`` +- ``output_file_parser`` +- ``processing`` (a shortcut for ``input_file_generator`` + ``output_file_parser``) +- ``main`` (the main part of the action i.e. not ``processing``) +- ``any`` (anything not already specified with any of the above options) + +These are used to choose resources (done at the workflow/task level), +and also the same values can be used within the schema to select an ``environment`` +by ``scope`` e.g. + +.. code-block:: yaml + + actions: + - environments: + - scope: + type: processing + environment: damask_parse_env + - scope: + type: main + environment: damask_env + + +{{ app_name }} is then looking for a match within your environment definitions for the requested +resources, and will run the command which matches those resources. + +There are lots of :ref:`resource options ` +available that can be requested. + +Scheduler arguments can be passed like this e.g. to target high memory nodes: + +.. code-block:: yaml + + resources: + any: + num_cores: 10 + SGE_parallel_env: smp.pe + scheduler_args: + options: + -l: mem512 + +Anything specified under `options` is passed directly to the scheduler as a jobscript command (i.e. isn't processed by {{ app_name }} at all). + +If you have set resource options at the top level (for the whole workflow), but would like to "unset" them for a particular task, + +you can pass an empty dictionary: + +.. code-block:: yaml + + - schema: simulate_VE_loading_damask + resources: + main: + num_cores: 16 + scheduler_args: + options: {} # "Clear" any previous options which have been set. + inputs: + + +Task sequences +---------------- + +{{ app_name }} can run tasks over a set of independent input values. +For this, you use a ``sequence``, and a ``nesting_order`` to control the nesting of the loops +but you can also "zip" two or more lists of inputs by using the same level of nesting. +Lower values of ``nesting_order`` act like the "outer" loop. + +.. code-block:: yaml + + tasks: + - schema: my_schema + sequences: + - path: inputs.conductance_value + values: + - 0 + - 100 + - 200 + nesting_order: 0 + +Groups +------- + +To combine outputs from multiple elements, you can use a ``group`` in a task schema: + +.. code-block:: yaml + + - objective: my_task_schema + inputs: + - parameter: p2 + group: my_group + +combined with a ``groups`` entry in the task itself. + +.. code-block:: yaml + + - schema: my_task_schema + groups: + - name: my_group + + +Then whichever parameters are linked with the group in the task schema will be received by the task as a list. + +Here is an example workflow using sequences and groups that you might wish to run to solidify your understanding + +.. code-block:: yaml + + # groups_workflow.yaml + + template_components: + task_schemas: + - objective: s1 + inputs: + - parameter: p1 + outputs: + - parameter: p2 + actions: + - commands: + - command: echo $(( <> + 1 )) # This is printed to stdout + - command: echo $(( <> + 1 )) # This is captured as p2 + stdout: <> + - objective: s2 + inputs: + - parameter: p2 + group: my_group + outputs: + - parameter: p3 + actions: + - commands: + - command: echo <> # This one is printed to stdout + - command: echo $(( <> )) # This is captured as p3 + stdout: <> + tasks: + - schema: s1 + sequences: + - path: inputs.p1 + values: [1, 2] + groups: + - name: my_group + - schema: s2 + + +Task schema shortcuts +--------------------- + +Input file generators +~~~~~~~~~~~~~~~~~~~~~ + +``input_file_generators`` is a convenience shortcut for a python script which generates an input file +for a subsequent action within a task. It's more compact, easier to reference, and has more interaction options. +The first parameter in the input generator (python) function definition must be "path", +which is the file path to ``input_file``, the file you want to create. +Given this is a {{ app_name }} input file, the path is just the file name which will be created in the +execute directory. +The ``input_file`` must point to the label of a file in ``command_files``. +``from_inputs`` defines which of the task schema inputs are required for each of the ``input_file_generators``. + +.. code-block:: yaml + + task_schemas: + - objective: my_task_schema + actions: + - input_file_generators: + - input_file: my_command_file + from_inputs: + - my_input_1 + - my_input_2 + script: <> + +An example is given in [advanced_workflow.yaml](advanced_workflow.yaml), along with the alternative code which would be needed + +to achieve the same result without an input file generator. + +Output file parsers +~~~~~~~~~~~~~~~~~~~ + +``output_file_parsers`` is a shortcut for a python script which processes output files +from previous steps. +The function in the python script must have parameters for each of the files listed +in ``from_files``, and this function should return data in a dictionary. +The output file parser script can also have parameters for any of the task schema inputs, +and these are listed under an ``inputs`` key. +If you want to save results to a file, this can be done in the python function too, +but the function should return a dict. This can be hard-coded in the function, +or via an ``inputs: [path_to_output_file]`` line in the output file parser, +and it will come after the output files in the function signature. + +The "name" of the ``output_file_parsers`` is the parameter returned i.e. + +.. code-block:: yaml + + output_file_parsers: + return_parameter: # This should be listed as an output parameter for the task schema + from_files: + - command_file1 + - command_file2 + script: <> + save_files: + - command_file_you_want_to_save + inputs: + - input1 + - input2 + +The output_file_parser script that is run as the action should return one variable, +rather than a dictionary. This is different behaviour to +a "main" action script. +i.e. ``return the_data`` rather than ``return {"return_parameter": the_data}``. +This is because an output file parser only has one named output parameter, +so a dictionary isn't needed to distinguish different output parameters. + +The :ref:`previous example ` has been reworked and +expanded below to demonstrate ``input_file_generators`` and ``output_file_parsers``. + +.. code-block:: yaml + + # workflow.yaml + + template_components: + task_schemas: + - objective: process_some_data + inputs: + - parameter: input_data + outputs: + - parameter: parsed_output + actions: + - input_file_generators: + - input_file: my_input_file + from_inputs: + - input_data + script: <> + environments: + - scope: + type: any + environment: python_env + script_exe: python_script + script: <> + save_files: + - processed_file + output_file_parsers: + parsed_output: + from_files: + - my_input_file + - processed_file + script: <> + save_files: + - parsed_output + +This workflow uses the same python scripts as before, with the addition of + +.. code-block:: python + + # parse_output.py + + import json + def parse_output(my_input_file: str, processed_file: str): + """Do some post-processing of data files. + + In this instance, we're just making a dictionary containing both the input + and output data. + """ + with open(my_input_file, "r") as f: + input_data = json.load(f) + with open(processed_file, "r") as f: + processed_data = json.load(f) + + combined_data = {"input_data": input_data, "output_data": processed_data} + # Save file so we can look at the data + with open("parsed_output.json", "w") as f: + json.dump(combined_data, f, indent=2) + + return {"parsed_output": combined_data} diff --git a/docs/source/user/getting_started/custom_task_schemas_and_workflows.rst b/docs/source/user/getting_started/custom_task_schemas_and_workflows.rst new file mode 100644 index 0000000..23a44b6 --- /dev/null +++ b/docs/source/user/getting_started/custom_task_schemas_and_workflows.rst @@ -0,0 +1,362 @@ +.. jinja:: first_ctx + +Writing custom workflows +######################## + +{{ app_name }} has a number of built-in :ref:`workflows `, which use combinations of the +built-in :ref:`task schemas `. +It is quite possible to mix and match these task schema into new workflows, +and indeed to write your own task schemas to achieve a particular task. + + +Workflow files +-------------- + +In-built {{ app_name }} workflows are split up over a few different files, +but for development, your workflow code can all go in one yaml file. +The workflow template has a top-level key ``template_components`` +underneath which come the ``task_schema``, ``environments`` and ``command_files`` keys. + +The workflow itself goes under a different top-level `tasks` key. + +Components of a task schema +--------------------------- + +Required keys +***************** +- ``objective`` (this is a name or label for the schema) +- ``actions`` (what the task schema actually "does") + +Optional keys +***************** +- ``inputs`` +- ``outputs`` + +{{ app_name }} syntax +--------------------- + +If you want to reference parameters in the action of your task schema, +it should be done using this syntax: +``<>``. + +Similarly, commands defined in an environment can be used like this: +``<>``, and files defined as :ref:`command_files ` +are referenced using ``<>`` e.g. + +.. code-block:: console + + actions: + - commands: + - command: <> job=sub_script_check input=<> interactive + + +Note that while command files can be referenced in an action, they cannot be referenced in this way as an input to a task schema. + +Python scripts however are executed slightly differently, and run the +function defined in your python file which has the same name as the python file. +The ``<> + + +where ``my_script.py`` would start with a function definition like this: + +.. code-block:: python + + def my_script(): + ... + + + +Passing variables around a workflow +----------------------------------- + +Python scripts that are run by top-level actions and which return values directly +(i.e. instead of saving to a file) should return a dictionary of values, +containing keys matching the output parameters defined in the task schema. +e.g. + +.. code-block:: python + + return {output_parameter_1: values, output_parameter_2: other_values} + + +In order for the dictionaries returned from tasks to be accessible to other tasks, +the task schemas needs to set the input and output type accordingly: + +.. code-block:: yaml + + ... + actions: + - script: <> + script_data_in: direct + script_data_out: direct + + +It might however be more appropriate to save results to files instead. + +In addition to passing variables directly, +tasks can read parameters from (and save to) various file formats including JSON and HDF5. + +An example of passing variables directly and via json files is given below. +{{ app_name }} writes the input parameters into a json file ``js_0_act_0_inputs.json``, +and the output into a file ``js_0_act_0_outputs.json``. +These file names are generated automatically, +and {{ app_name }} keeps track of where the various parameters are stored. +So if any parameters saved in json files (or passed directly) are needed as input for another function, +{{ app_name }} can pass them directly or via json as specified in the task schema. +An example is given of both combinations. + +To run this example, create a ``workflow.yaml`` file with the contents below, +along with the ``json_in_json_out.py``, ``json_in_direct_out.py``, and ``mixed_in_direct_out.py`` files. + + +.. code-block:: yaml + + # workflow.yaml + template_components: + task_schemas: + - objective: read_and_save_using_json + inputs: + - parameter: p1 + - parameter: p2 + actions: + - script: <> + script_data_in: json + script_data_out: json + script_exe: python_script + environments: + - scope: + type: any + environment: python_env + outputs: + - parameter: p3 + - objective: read_json_from_another_task + inputs: + - parameter: p3 + actions: + - script: <> + script_data_in: json + script_data_out: direct + script_exe: python_script + environments: + - scope: + type: any + environment: python_env + outputs: + - parameter: p4 + - objective: pass_mixed_from_another_task + inputs: + - parameter: p3 + - parameter: p4 + actions: + - script: <> + script_data_in: + p3: direct # previously saved as json in task read_and_save_using_json + p4: json # previously saved directly in task read_json_from_another_task + script_data_out: direct + script_exe: python_script + environments: + - scope: + type: any + environment: python_env + outputs: + - parameter: p5 + + tasks: + - schema: read_and_save_using_json + inputs: + p1: 1 + p2: 2 + - schema: read_json_from_another_task + - schema: pass_mixed_from_another_task + + +.. code-block:: python + + # json_in_json_out.py + import json + + def json_in_json_out(_input_files, _output_files): + with open(_input_files["json"]) as json_data: + inputs = json.load(json_data) + p1 = inputs["p1"] + p2 = inputs["p2"] + + p3 = p1 + p2 + with open(_output_files["json"], 'w') as f: + json.dump({"p3": p3}, f) + + +.. code-block:: python + + # json_in_direct_out.py + import json + + def json_in_direct_out(_input_files): + with open(_input_files["json"]) as json_data: + inputs = json.load(json_data) + p3 = inputs["p3"] + p4 = p3 + 1 + + print(f"{p3=}") + print(f"{p4=}") + + return {"p4": p4} + + +.. code-block:: python + + # mixed_in_json_out.py + import json + + def mixed_in_direct_out(p3, _input_files): + with open(_input_files["json"]) as json_data: + inputs = json.load(json_data) + p4 = inputs["p4"] + p5 = p3 + p4 + + print(f"{p3=}") + print(f"{p4=}") + print(f"{p5=}") + + return {"p5": p5} + +The particular variables names used to pass parameters using json/HDF5 depend on +which language is being used. +For example using MATLAB uses this syntax ``inputs_JSON_path``, ``outputs_HDF5_path`` +instead of the python equivalents ``_input_files`` and ``_output_files``. +See the MTEX examples for more details. + +Writing a workflow +---------------------------- + +A workflow is just a list of tasks, which are run like this + +.. code-block:: yaml + + tasks: + - schema: my_task_schema + inputs: + my_input: input_value + + +A task can find output variables from previous tasks, and use them +as inputs. There is generally no need specify them explicitly, +but this can be done by using the ``input_sources`` key within a task +to tell {{ app_name }} where to obtain input values for a given input parameter, +in combination with the dot notation e.g. + +.. code-block:: yaml + + - schema: print + # Explicitly reference output parameter from a task + input_sources: + string_to_print: task.my_other_task_schema + + +When running a workflow with {{ app_name }}, the required files are copied into a directory +that {{ app_name }} creates, and any output files are saved into the ``execute`` directory. +If you want to keep any of theses files, you should tell {{ app_name }} to copy them to the ``artifacts`` +directory using ``save_files``: + +.. code-block:: yaml + + task_schemas: + - objective: my_task_schema + inputs: + - parameter: my_input + outputs: + - parameter: my_output + actions: + - environments: ... + commands: ... + save_files: + - my_command_file + + +Example workflow +----------------- + +.. _command_files_example_workflow: + +Here we have an example workflow which illustrates use of command files. +To run this example, create a ``workflow.yaml`` file with the contents below, +along with the ``generate_input_file.py`` and ``process_input_file.py`` files. + +Modify the paths to the python scripts under the ``action`` keys to give the full path +to your files. + +You can then run the workflow using ``{{ app_module }} go workflow.yaml``. + +.. code-block:: yaml + + # workflow.yaml + template_components: + task_schemas: + - objective: process_data + inputs: + - parameter: input_data + - parameter: path + default_value: input_file.json + actions: + - script: <> + script_data_in: direct + script_exe: python_script + save_files: # A copy of any command files listed here will be saved in the the artifacts directory + - my_input_file + environments: + - scope: + type: any + environment: python_env + - script: <> + script_exe: python_script + environments: + - scope: + type: any + environment: python_env + save_files: + - processed_file + + command_files: + - label: my_input_file + name: + name: input_file.json + - label: processed_file + name: + name: processed_file.json + + + tasks: + - schema: process_data + inputs: + input_data: [1, 2, 3, 4] + path: input_file.json + +.. code-block:: python + + # generate_input_file.py + import json + def generate_input_file(path: str, input_data: list): + """Generate an input file""" + with open(path, "w") as f: + json.dump(input_data, f, indent=2) + +.. code-block:: python + + # process_input_file.py + import json + def process_input_file(): + """Process an input file. + + This could be a materials science simulation for example. + """ + with open("input_file.json", "r") as f: + data = json.load(f) + data = [item * 2 for item in data] + with open("processed_file.json", "w") as f: + json.dump(data, f, indent=2) diff --git a/docs/source/user/getting_started/errors.rst b/docs/source/user/getting_started/errors.rst new file mode 100644 index 0000000..4e6c154 --- /dev/null +++ b/docs/source/user/getting_started/errors.rst @@ -0,0 +1,30 @@ +.. jinja:: first_ctx + +Common errors +############# + +Certain errors have cropped up multiple times for {{ app_name }} users. +Here's some advice for those errors. + +Submitting a workflow +----------------------------- + +If you get an error which (often) starts with + +.. code-block:: console + + ERROR {{ app_module }}.persistence: batch update exception! + +and ends with something like + +.. code-block:: console + + File "hpcflow/sdk/app.py", line 1150, in read_known_submissions_file + File "hpcflow/sdk/app.py", line 1122, in _parse_known_submissions_line + ValueError: not enough values to unpack (expected 8, got 6) + +This is usually caused by updating the {{ app_name }} version. +Leftover submissions info causes the newer {{ app_name }} version to get confused. +The fix? ``{{ app_module }} manage clear-known-subs``. +This will delete the known submissions file, and the next time you submit a workflow, +{{ app_name }} will create a new one. diff --git a/docs/source/user/getting_started/index.rst b/docs/source/user/getting_started/index.rst new file mode 100644 index 0000000..e85da95 --- /dev/null +++ b/docs/source/user/getting_started/index.rst @@ -0,0 +1,13 @@ +Getting started +############### + +This section is intended to help new users to run their first workflows. + + +.. toctree:: + :maxdepth: 3 + + Run a demo workflow + Write your own task schema and workflow + Advanced workflow concepts + Common errors diff --git a/docs/source/user/getting_started/running_workflows.rst b/docs/source/user/getting_started/running_workflows.rst new file mode 100644 index 0000000..ceb7362 --- /dev/null +++ b/docs/source/user/getting_started/running_workflows.rst @@ -0,0 +1,68 @@ +.. jinja:: first_ctx + +Demo workflows +---------------- +A good way to get started with {{ app_name }} is to run a built-in demo workflows. +This will also test your installation, configuration, and some of your environments. + +Submit a workflow +~~~~~~~~~~~~~~~~~ + +{{ app_name }} comes with some demo workflows, which can be listed using + +.. code-block:: console + + {{ app_module }} demo-workflow --list + + +We can run the following command to copy the in-built workflow file to the current directory +(note the final dot at the end), + +.. code-block:: console + + {{ app_module }} demo-workflow copy . + +which we can then use to submit the workflow. + +.. code-block:: console + + {{ app_module }} go + +This small workflow should complete in less than 30s. +Note that there is also a convenience shortcut for the demo workflows which combines +the copy-then-submit pattern we saw above: + +.. code-block:: console + + {{ app_module }} demo-workflow go + +However, in general workflows would first be created in a yaml file which is then submitted using +``{{ app_module }} go WORKFLOW_FILE``. + +Check the status of a workflow +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +After submitting a workflow, you can check whether it has run successfully using + +.. code-block:: console + + {{ app_module }} show -f + +For clarification of the output, a legend can be shown using + +.. code-block:: console + + {{ app_module }} show --legend + + +Cancel a workflow +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Sometimes you might want to cancel a workflow that is running. Use + +.. code-block:: console + + {{ app_module }} cancel WORKFLOW_REF + +where ``WORKFLOW_REF`` is either the path to the workflow directory, +or the ID of the workflow displayed by ``{{ app_module }} show``. diff --git a/docs/source/user/how_to/config.rst b/docs/source/user/how_to/config.rst index 95d96ea..595ee46 100644 --- a/docs/source/user/how_to/config.rst +++ b/docs/source/user/how_to/config.rst @@ -1,89 +1,107 @@ +.. jinja:: first_ctx + Configuration how-tos --------------------- Get and set config items ~~~~~~~~~~~~~~~~~~~~~~~~ -Using the config sub-command in the |app_name| CLI, we can get configuration items like this: +The configuration file is used to store settings that control the behavior, +such as the machine name, the log level, and the location of the template component source files. +These can also be set and retrieved using the CLI or Python API. -.. jinja:: first_ctx +.. tab-set:: + + .. tab-item:: CLI + + Using the config sub-command in the |app_name| CLI, we can get configuration items like this: - .. code-block:: console + .. code-block:: console - {{ app_package_name }} config get machine + {{ app_package_name }} config get machine - Items can be set like this: + Items can be set like this: - .. code-block:: console + .. code-block:: console - {{ app_package_name }} config set machine my-machine-name + {{ app_package_name }} config set machine my-machine-name - ------------ + .. tab-item:: Python API - In the Python API, we can interact with the |app_name| configuration as below. Note that we must call :meth:`config.save ` to make the config changes persistent, otherwise any changes made will only be temporary. + In the Python API, we can interact with the |app_name| configuration as below. Note that we must call :meth:`config.save ` to make the config changes persistent, otherwise any changes made will only be temporary. - .. code-block:: python + .. code-block:: python - import {{ app_module }} as {{ app_docs_import_conv }} + import {{ app_module }} as {{ app_docs_import_conv }} - # print the value of the `machine` item: - print({{ app_docs_import_conv }}.config.machine) + # print the value of the `machine` item: + print({{ app_docs_import_conv }}.config.machine) - # set the value of the `machine` item: - {{ app_docs_import_conv }}.config.machine = "my-machine-name" + # set the value of the `machine` item: + {{ app_docs_import_conv }}.config.machine = "my-machine-name" - # optionally save the changes to the config file: - {{ app_docs_import_conv }}.config.save() + # optionally save the changes to the config file: + {{ app_docs_import_conv }}.config.save() - If you want to change a configuration item temporarily (just for the current session), you can also provide configuration item values to `load_config` and `reload_config`, like this: + If you want to change a configuration item temporarily (just for the current session), you can also provide configuration item values to `load_config` and `reload_config`, like this: - .. code-block:: python + .. code-block:: python - import {{ app_module }} as {{ app_docs_import_conv }} + import {{ app_module }} as {{ app_docs_import_conv }} - # modify the log console level just for this session: - {{ app_docs_import_conv }}.load_config(log_console_level="debug") + # modify the log console level just for this session: + {{ app_docs_import_conv }}.load_config(log_console_level="debug") - See the configuration :ref:`reference documentation ` for a listing of configurable items. +See the configuration :ref:`reference documentation ` for a listing of configurable items. Reset the config to default values ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Usually, when |app_name| is invoked, the first thing it does is load the configuration file. However, if you have updated to a newer, incompatible version, sometime your existing configuration file will fail validation. In this case, you can reset the configuration file to its default value by running the following CLI command: +Usually, when |app_name| is invoked, the first thing it does is load the configuration file. However, if you have updated to a newer, incompatible version, sometime your existing configuration file will fail validation. In this case, you can reset the configuration file to its default value. -.. jinja:: first_ctx +.. tab-set:: + + .. tab-item:: CLI - .. code-block:: console + Reset configuration to the default value by running the following CLI command: + + .. code-block:: console - {{ app_package_name }} manage reset-config + {{ app_package_name }} manage reset-config - Within the Python API, the config can be reset like this: + .. tab-item:: Python API - .. code-block:: python + Within the Python API, the config can be reset like this: - import {{ app_module }} as {{ app_docs_import_conv }} + .. code-block:: python - {{ app_docs_import_conv }}.reset_config() + import {{ app_module }} as {{ app_docs_import_conv }} - .. warning:: - - Resetting the configuration will remove any custom configuration you had, including pointers to template component source files (like environment source files). If you want to make a copy of the old file before resetting, you can retrieve its file path like this: :code:`{{ app_package_name }} manage get-config-path`, with the CLI, or, :code:`{{ app_docs_import_conv }}.get_config_path()`, with the Python API. + {{ app_docs_import_conv }}.reset_config() + +.. warning:: + + Resetting the configuration will remove any custom configuration you had, including pointers to template component source files (like environment source files). If you want to make a copy of the old file before resetting, you can retrieve its file path like this: :code:`{{ app_package_name }} manage get-config-path`, with the CLI, or, :code:`{{ app_docs_import_conv }}.get_config_path()`, with the Python API. Clear the known-submissions file ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. jinja:: first_ctx +The known-submissions file is used to track running and recent workflow, and is used by the :code:`{{ app_docs_import_conv }}.show` / :code:`{{ app_package_name }} show` command. Sometimes you might need to clear this file, which can be done like this: - The known-submissions file is used to track running and recent workflow, and is used by the :code:`{{ app_docs_import_conv }}.show` / :code:`{{ app_package_name }} show` command. Sometimes you might need to clear this file, which can be done like this: +.. tab-set:: - .. code-block:: console + .. tab-item:: CLI + + .. code-block:: console - {{ app_package_name }} manage clear-known-subs + {{ app_package_name }} manage clear-known-subs - Within the Python API, the equivalent command is: + .. tab-item:: Python API + + Within the Python API, the equivalent command is: - .. code-block:: python + .. code-block:: python - import {{ app_module }} as {{ app_docs_import_conv }} + import {{ app_module }} as {{ app_docs_import_conv }} - {{ app_docs_import_conv }}.clear_known_submissions_file() + {{ app_docs_import_conv }}.clear_known_submissions_file() diff --git a/docs/source/user/how_to/index.rst b/docs/source/user/how_to/index.rst index ccc8232..5b778a4 100644 --- a/docs/source/user/how_to/index.rst +++ b/docs/source/user/how_to/index.rst @@ -14,3 +14,4 @@ This help snippets guide you through common quick tasks in |app_name|. Environments Meta-tasks Loops + Resources \ No newline at end of file diff --git a/docs/source/user/how_to/resources.rst b/docs/source/user/how_to/resources.rst new file mode 100644 index 0000000..bfd9ff0 --- /dev/null +++ b/docs/source/user/how_to/resources.rst @@ -0,0 +1,72 @@ +.. jinja:: first_ctx + + Resources + --------- + + Shell and scheduler arguments + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + When submitting a workflow, {{ app_name }} generates jobscripts that are submitted to + the scheduler (if using one), or invoked directly (if not). Depending on how the + scheduler is configured by your HPC administrators, you may need to add extra + arguments to the shebang line of the jobscript. A shebang line usually looks something + like this: + + .. code-block:: bash + + #!/bin/bash + + For example, on an HPC system, you might need to execute the job submission script via + a bash *login* shell, meaning the first line in your jobscript should look like this: + + .. code-block:: bash + + #!/bin/bash --login + + To achieve this in {{ app_name }}, we can edit the configuration's `shells` block to + look like this (note this excerpt is not a valid configuration on its own!): + + .. code-block:: yaml + + config: + shells: + bash: + defaults: + executable_args: [--login] + + In this way, we ensure that wherever a ``bash`` shell command is constructed (such as + when constructing the shebang line for a jobscript), ``--login`` will be appended to + the shell executable command. + + We can also modify the shell executable path like this: + + .. code-block:: yaml + + config: + shells: + bash: + defaults: + executable: /bin/bash # /bin/bash is the default value + executable_args: [--login] + + Additionally, there is one other place where the shell command is constructed, which + is when {{ app_name }} invokes a commands file to execute a run. Typically, the shell + command that you set in the above configuration change is sufficient. However, if you + need these two scenarios to use different shell executables or executable arguments, + you can additionally modify the scheduler's ``shebang_executable`` default value in + the configuration (which overrides the ``shell`` configuration) like this: + + .. code-block:: yaml + + config: + shells: + bash: + defaults: + executable_args: [--login] # applied when invoking command files + schedulers: + sge: + defaults: + shebang_executable: [/path/to/bash/executable, arg_1, arg_2] # applied to scheduler shebang only + + Note that in this case (for ``shebang_exectuable``), the shell executable path must + also be specified, in addition to the shell arguments. diff --git a/docs/source/user/index.rst b/docs/source/user/index.rst index ff57afc..2f37905 100644 --- a/docs/source/user/index.rst +++ b/docs/source/user/index.rst @@ -7,6 +7,7 @@ .. toctree:: :maxdepth: 3 - + + Getting started How-to guides Tutorials From cc61b9b43fc40920c7ff2f7fd44cd44335dd7f58 Mon Sep 17 00:00:00 2001 From: S Jaffa Date: Fri, 23 May 2025 12:22:48 +0100 Subject: [PATCH 06/10] Apply suggestions from code review Co-authored-by: Adam Plowman --- .../advanced_workflow_concepts.rst | 34 +++++++++---------- .../custom_task_schemas_and_workflows.rst | 2 +- docs/source/user/getting_started/errors.rst | 2 +- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/docs/source/user/getting_started/advanced_workflow_concepts.rst b/docs/source/user/getting_started/advanced_workflow_concepts.rst index 9beaa9b..3601a59 100644 --- a/docs/source/user/getting_started/advanced_workflow_concepts.rst +++ b/docs/source/user/getting_started/advanced_workflow_concepts.rst @@ -14,12 +14,12 @@ Requesting resources can be done using a ``resources`` block, either for the who any: scheduler: sge # Setting the scheduler is not normally needed because a # `default_scheduler` will be set in the config file. + shell_args: + executable_args: ["--login"] scheduler_args: - shebang_args: --login - options: + directives: -l: short - or at the task level .. code-block:: yaml @@ -27,11 +27,11 @@ or at the task level - schema: simulate_VE_loading_damask resources: any: - # This will use two cores for input file generators and output file parsers - num_cores: 2 + # This will use two cores for input file generators and output file parsers + num_cores: 2 main: - # Use 16 cores for the "main" part of the task (the simulation in this case) - num_cores: 16 + # Use 16 cores for the "main" part of the task (the simulation in this case) + num_cores: 16 inputs: ... @@ -72,14 +72,14 @@ Scheduler arguments can be passed like this e.g. to target high memory nodes: .. code-block:: yaml resources: - any: - num_cores: 10 - SGE_parallel_env: smp.pe - scheduler_args: - options: - -l: mem512 + any: + num_cores: 10 + SGE_parallel_env: smp.pe + scheduler_args: + directives: + -l: mem512 -Anything specified under `options` is passed directly to the scheduler as a jobscript command (i.e. isn't processed by {{ app_name }} at all). +Anything specified under `directives` is passed directly to the scheduler as a jobscript command (i.e. isn't processed by {{ app_name }} at all). If you have set resource options at the top level (for the whole workflow), but would like to "unset" them for a particular task, @@ -90,9 +90,9 @@ you can pass an empty dictionary: - schema: simulate_VE_loading_damask resources: main: - num_cores: 16 - scheduler_args: - options: {} # "Clear" any previous options which have been set. + num_cores: 16 + scheduler_args: + directives: {} # "Clear" any previous directives which have been set. inputs: diff --git a/docs/source/user/getting_started/custom_task_schemas_and_workflows.rst b/docs/source/user/getting_started/custom_task_schemas_and_workflows.rst index 23a44b6..5861851 100644 --- a/docs/source/user/getting_started/custom_task_schemas_and_workflows.rst +++ b/docs/source/user/getting_started/custom_task_schemas_and_workflows.rst @@ -291,7 +291,7 @@ along with the ``generate_input_file.py`` and ``process_input_file.py`` files. Modify the paths to the python scripts under the ``action`` keys to give the full path to your files. -You can then run the workflow using ``{{ app_module }} go workflow.yaml``. +You can then run the workflow using ``{{ app_package_name }} go workflow.yaml``. .. code-block:: yaml diff --git a/docs/source/user/getting_started/errors.rst b/docs/source/user/getting_started/errors.rst index 4e6c154..3f7f250 100644 --- a/docs/source/user/getting_started/errors.rst +++ b/docs/source/user/getting_started/errors.rst @@ -25,6 +25,6 @@ and ends with something like This is usually caused by updating the {{ app_name }} version. Leftover submissions info causes the newer {{ app_name }} version to get confused. -The fix? ``{{ app_module }} manage clear-known-subs``. +The fix? ``{{ app_package_name }} manage clear-known-subs``. This will delete the known submissions file, and the next time you submit a workflow, {{ app_name }} will create a new one. From 22de7a46ba9cd9c856835840042385f1c1969197 Mon Sep 17 00:00:00 2001 From: SJaffa Date: Fri, 23 May 2025 12:18:56 +0100 Subject: [PATCH 07/10] Less example environments --- docs/source/installation.rst | 72 +++++++----------------------------- 1 file changed, 13 insertions(+), 59 deletions(-) diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 6d6bb01..c65596a 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -71,66 +71,20 @@ Your {{ app_name }} environments must be defined in your environments (YAML) file before {{ app_name }} can run workflows, and this environment file must be pointed to in the config file via the ``environment_sources`` key. - Once this has been done, - your environment file can be be opened using ``{{ app_module }} open env-source``. + Once this has been done, your environment file can be be opened using ``{{ app_module }} open env-source``. - Below is an example environments file that defines environment for some commonly used software. - This is not a complete list of all the software that can be used with {{ app_name }}, - and domain-specific tools can be added to the environments file as required. - - You may wish to use this as a tempalte and modify it for your own computer, - in particular the ``setup`` sections for each environment. + Below is an example environments file that defines an environment for running Pyton scripts. + Domain-specific tools can be added to the environments file as required, each with their own + setup instructions for loading that tool on your machine. .. code-block:: yaml - - name: matlab_env - setup: | - module load apps/binapps/matlab/R2019a - module load apps/binapps/matlab/third-party-toolboxes/mtex/5.3 - executables: - - - label: compile_mtex - instances: - - command: compile-mtex <> <> - num_cores: 1 - parallel_mode: null - - - label: run_compiled_mtex - instances: - - command: ./run_<>.sh $MATLAB_HOME <> - num_cores: 1 - parallel_mode: null - - - label: run_mtex - instances: - - command: matlab -singleCompThread -batch "<> <>" - num_cores: 1 - parallel_mode: null - - command: matlab -batch "<> <>" - num_cores: - start: 2 - stop: 16 - parallel_mode: null - - - name: python_env - executables: - - label: python_script - instances: - - command: python <> <> - num_cores: - start: 1 - stop: 32 - parallel_mode: null - - - name: dream_3D_env - executables: - - label: dream_3D_runner - instances: - - command: /full/path/to/dream3d/DREAM3D-6.5.171-Linux-x86_64/bin/PipelineRunner - num_cores: 1 - parallel_mode: null - - label: python_script - instances: - - command: python <> <> - num_cores: 1 - parallel_mode: null + - name: python_env + executables: + - label: python_script + instances: + - command: python "<>" <> + num_cores: + start: 1 + stop: 32 + parallel_mode: null From b223c7a864dedff20167632476585b88a3fe83b4 Mon Sep 17 00:00:00 2001 From: SJaffa Date: Fri, 23 May 2025 12:19:57 +0100 Subject: [PATCH 08/10] Add jinja context --- docs/source/reference/glossary.rst | 116 +++++++++++++++-------------- 1 file changed, 59 insertions(+), 57 deletions(-) diff --git a/docs/source/reference/glossary.rst b/docs/source/reference/glossary.rst index 2587ff5..eb21408 100644 --- a/docs/source/reference/glossary.rst +++ b/docs/source/reference/glossary.rst @@ -1,89 +1,91 @@ -Glossary -======== +.. jinja:: first_ctx -API ---- + Glossary + ======== -Application programming interface. {{ app_name }}'s API allows us to design and execute workflows from within a Python interpreter or Jupyter notebook. + API + --- -.. _def_command_files: + Application programming interface. {{ app_name }}'s API allows us to design and execute workflows from within a Python interpreter or Jupyter notebook. -Command files -------------- -If you want to refer to any files that are used as inputs or output, -they should be listed under ``command_files`` in the workflow file + .. _def_command_files: -.. code-block:: console + Command files + ------------- + If you want to refer to any files that are used as inputs or output, + they should be listed under ``command_files`` in the workflow file - command_files: - - label: new_inp_file - name: - name: friction_conductance.inp + .. code-block:: console + command_files: + - label: new_inp_file + name: + name: friction_conductance.inp -CLI ---- -Command line interface. The CLI is typically how we interact with {{ app_name }} on HPC systems. + CLI + --- -cluster -------- + Command line interface. The CLI is typically how we interact with {{ app_name }} on HPC systems. -See HPC + cluster + ------- + See HPC -Environment/virtual environment -------------------------------- -An environment is an isolated set of installed software. -Using environments allows you to have multiple copies of the same software installed in different environments so you can run different versions, or to run two pieces of software with competing dependencies on the same machine. -Using and sharing environments helps make your work reproducible because someone can use the same environment on a different machine and be sure they have the same versions of everything. + Environment/virtual environment + ------------------------------- + An environment is an isolated set of installed software. + Using environments allows you to have multiple copies of the same software installed in different environments so you can run different versions, or to run two pieces of software with competing dependencies on the same machine. + Using and sharing environments helps make your work reproducible because someone can use the same environment on a different machine and be sure they have the same versions of everything. -HPC ---- -High-performance computer/computing + HPC + --- + High-performance computer/computing -jobscript ---------- -A job submission script that is used to queue a job on a batch scheduler system, such as SLURM or SGE. -Jobscripts are generated by {{ app_name }} during workflow submission. + jobscript + --------- -.. _def_task: + A job submission script that is used to queue a job on a batch scheduler system, such as SLURM or SGE. + Jobscripts are generated by {{ app_name }} during workflow submission. -Tasks -------------- -These are actual usages of a :ref:`task schema `, run with defined inputs. + .. _def_task: -.. _def_task_schema: + Tasks + ------------- + These are actual usages of a :ref:`task schema `, run with defined inputs. -Task schema -------------- -This is a template for a task you want to run, -with definitions of the input and outputs that are expected. + .. _def_task_schema: -{{ app_name }} has many :ref:`built-in task schemas `, but you may want to write your own. + Task schema + ------------- + This is a template for a task you want to run, + with definitions of the input and outputs that are expected. -.. _def_workflow: + {{ app_name }} has many :ref:`built-in task schemas `, but you may want to write your own. -Workflow --------- + .. _def_workflow: -A pipeline that processes data in some way. -A workflow is a list of tasks that run one after the other. + Workflow + -------- + A pipeline that processes data in some way. + A workflow is a list of tasks that run one after the other. -.. _def_workflow_template: -Workflow template ------------------- + .. _def_workflow_template: -A workflow template parameterises a workflow, -providing the required input values for the task schemas of the workflow. -However, it doesn't actually run the :ref:`workflow `. -A workflow template is usually just the list of tasks, -but can optionally include {{ app_name }} environment, -the :ref:`task schemas `, and the :ref:`command files `. + Workflow template + ------------------ + + A workflow template parameterises a workflow, + providing the required input values for the task schemas of the workflow. + However, it doesn't actually run the :ref:`workflow `. + A workflow template is usually just the list of tasks, + but can optionally include {{ app_name }} environment, + the :ref:`task schemas `, and the :ref:`command files `. From 0d190b7c97a13f6d0f2aff5f0ad39ed0b05b779e Mon Sep 17 00:00:00 2001 From: SJaffa Date: Fri, 23 May 2025 15:35:52 +0100 Subject: [PATCH 09/10] Remove broken link to example --- .../user/getting_started/advanced_workflow_concepts.rst | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docs/source/user/getting_started/advanced_workflow_concepts.rst b/docs/source/user/getting_started/advanced_workflow_concepts.rst index 3601a59..12cb346 100644 --- a/docs/source/user/getting_started/advanced_workflow_concepts.rst +++ b/docs/source/user/getting_started/advanced_workflow_concepts.rst @@ -205,10 +205,6 @@ The ``input_file`` must point to the label of a file in ``command_files``. - my_input_2 script: <> -An example is given in [advanced_workflow.yaml](advanced_workflow.yaml), along with the alternative code which would be needed - -to achieve the same result without an input file generator. - Output file parsers ~~~~~~~~~~~~~~~~~~~ From 25161a76a7f62b805a94dc63100368803766b465 Mon Sep 17 00:00:00 2001 From: SJaffa Date: Thu, 29 May 2025 10:06:38 +0100 Subject: [PATCH 10/10] Fix last matflow reference --- docs/source/user/getting_started/advanced_workflow_concepts.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/user/getting_started/advanced_workflow_concepts.rst b/docs/source/user/getting_started/advanced_workflow_concepts.rst index 12cb346..01feaee 100644 --- a/docs/source/user/getting_started/advanced_workflow_concepts.rst +++ b/docs/source/user/getting_started/advanced_workflow_concepts.rst @@ -64,7 +64,7 @@ by ``scope`` e.g. {{ app_name }} is then looking for a match within your environment definitions for the requested resources, and will run the command which matches those resources. -There are lots of :ref:`resource options ` +There are lots of :ref:`resource options ` available that can be requested. Scheduler arguments can be passed like this e.g. to target high memory nodes: