diff --git a/docs/source/conf.py b/docs/source/conf.py index 71fbcab..1c7acaa 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -203,6 +203,7 @@ def prepare_task_schema_action_info(app: BaseApp): with open("config.jsonc") as fp: jsonc_str = fp.read() + # Strip out comments denoted by // to leave a valid JSON file json_str = re.sub( r'\/\/(?=([^"]*"[^"]*")*[^"]*$).*', "", jsonc_str, flags=re.MULTILINE ) @@ -226,6 +227,8 @@ def prepare_task_schema_action_info(app: BaseApp): # distribution name (i.e. name on PyPI): with open("../../pyproject.toml") as fp: + dist_name = tomlkit.load(fp)["tool"]["poetry"]["name"] + supported_python_versions = tomlkit.load(fp)["tool"]["poetry"]["dependencies"]["python"] pyproject_config = tomlkit.load(fp) dist_name = pyproject_config["tool"]["poetry"]["name"] supported_python = pyproject_config["tool"]["poetry"]["dependencies"]["python"] diff --git a/docs/source/user/tutorials/index.rst b/docs/source/user/tutorials/index.rst index eeb4d1e..c517d9f 100644 --- a/docs/source/user/tutorials/index.rst +++ b/docs/source/user/tutorials/index.rst @@ -4,3 +4,5 @@ Tutorials .. toctree:: :maxdepth: 1 + Beginner: Install MatFlow on your local machine + diff --git a/docs/source/user/tutorials/install-locally.rst b/docs/source/user/tutorials/install-locally.rst new file mode 100644 index 0000000..fd1c47b --- /dev/null +++ b/docs/source/user/tutorials/install-locally.rst @@ -0,0 +1,93 @@ +.. jinja:: first_ctx + +################################################ +Tutorial: Install {{ app_name }} on your local machine +################################################ + +This tutorial will guide you through the process of installing {{ app_name }} on your local machine (laptop or desktop), creating and running some example workflows. +This tutorial is intended for users who are new to {{ app_name }} and want to understand the setup and terminology. +Most workflows used in your research will be too large to run on your local machine, +but this tutorial will help you understand the basics of how {{ app_name }} works before you move to setting it up on a cluster. + +Step 1: Set up a Python environment +==================================== + +The first step is to set up a Python environment on your local machine. + +**If you have not already installed Python**, you can download the latest version of Python from the `Python website `_. +Follow the instructions on the website for your operating system. + +**If you have already installed Python**, you can check the version of Python installed on your machine by running +``python --version``. + +Check that your version matches one of the ones supported by {{ app_name }}. +You can find the supported Python versions in the :ref:`installation instructions `_. +If your version is not supported, you may need to update to a newer version of Python. + +Next, you will need to set up a virtual environment to install {{ app_name }} and its dependencies. +A virtual environment is a self-contained directory that contains a particular version of Python with the all libraries and dependencies you install. +This allows you to install packages without affecting the system Python installation or other projects, +and when you run a command inside that environment you are certain which versions are being used. + +To create a virtual environment, you can use the `venv `_ module that comes with Python. +Follow the instructions in the `Python Packaging Guide `_ to create and activate a virtual environment. +The convention is to call your environment ``.venv``, but you can call it whatever you like. +We recommend calling it ``{{ app_module }}-env`` to make it clear that this environment is for {{ app_name }}. + +When the environment is activated, you should see the name of the virtual environment in brackets in your terminal prompt. +Whenever you are working with Python in the terminal, you can check if it is accessing your system installation of Python or a virtual environemnt by running ``which python``. +This will print out the path to the Python executable it is calling, so currently the path should be inside the virtual environment folder you just created. + +Step 2: Install {{ app_name }} +======================= + +Once you have created and activated a Python environment (check for the environment name in brackets in your prompt), you can install {{ app_name }} using pip by running +``pip install {{ dist_name }}``. + +This will install the latest version of {{ app_name }} from the Python Package Index (PyPI), and all the dependencies it needs. +Once it has finished, check that {{ app_name }} has been installed correctly by running +``{{ app_module }} --version``. + +This should print the version of {{ app_name }} that you have installed. +If you see an error message saying it doesn't recognise "{{ app_module }}" as a command name, check that you have activated the correct virtual environment and that you have installed {{ app_name }} correctly. + +Step 3: Configure {{ app_name }} for your machine +======================================== + +Now that you have installed {{ app_name }}, you need to set it up for your machine. +{{ app_name }} uses a configuration file to store information about the machine you are running on, such as the number of cores available and the locations of important folders. +This will be stored in your user home directory so that it can be read by {{ app_name }} no matter what project you are working on, or what folder you are working in. + +The configuration file is called `config.yml` and is stored in the `~/.{{ app_name }}-new` directory (`~` is a shortcut for your user home directory, and the `.` at the start of the filename indicates that this is a hidden folder). +When you first install {{ app_name }}, the directory and file will not exist. +You can either make it yourself or run ``{{ app_name }} init`` to create the ``~/.{{ app_name }}-new`` directory and a ``config.yml`` file inside it with the minimum default settings. + +Step 4: Define workflow +======================== + +Now that you have installed {{ app_name }} and set up the configuration file, you can start defining :ref:`workflows <_def_workflow>`_. +{{ app_name }} uses a YAML file to define the workflow, which is a text file that describes the steps in the workflow and the parameters for each step. +The workflow file is stored in the directory where you want to run the workflow. + +Step 5: Run the workflow +======================== + +Once you have defined the workflow, you can run it using the command +``{{ app_module }} go ``. + +Step 6: Monitor the workflow +============================ + +You can monitor the progress of the workflow by running +``{{ app_module }} show``. +This will show you the status of each step in the workflow, including whether it is running, completed, or failed. +You can also view the log files generated during the run by running +``{{ app_module }} logs ``. +This will show you the log files for each step in the workflow, including any error messages or warnings that were generated during the run. + + +Step 6: View the results +======================== + +Once the workflow has finished running, you can view the results in the output directory specified in the workflow file. +The output directory will contain the results of each step in the workflow, as well as any log files generated during the run. \ No newline at end of file diff --git a/docs/source/user/tutorials/tutorial_resources/advanced_workflow.yaml b/docs/source/user/tutorials/tutorial_resources/advanced_workflow.yaml new file mode 100644 index 0000000..5f26e32 --- /dev/null +++ b/docs/source/user/tutorials/tutorial_resources/advanced_workflow.yaml @@ -0,0 +1,73 @@ +template_components: + task_schemas: + - objective: process_some_data + inputs: + - parameter: input_data + outputs: + - parameter: parsed_output + actions: + - input_file_generators: + - input_file: my_input_file + from_inputs: + - input_data + script: <> + environments: + - scope: + type: any + environment: python_env + script_exe: python_script + script: <> + save_files: + - processed_file + output_file_parsers: + parsed_output: + from_files: + - my_input_file + - processed_file + script: <> + save_files: + - parsed_output + + - objective: process_data_without_input_file_generator + inputs: + - parameter: input_data + - parameter: path + actions: + - script: <> + script_data_in: direct + script_exe: python_script + save_files: + - my_input_file + environments: + - scope: + type: any + environment: python_env + - script: <> + script_exe: python_script + environments: + - scope: + type: any + environment: python_env + save_files: + - processed_file + + command_files: + - label: my_input_file + name: + name: input_file.json + - label: processed_file + name: + name: processed_file.json + - label: parsed_output + name: + name: parsed_output.json + + +tasks: +- schema: process_some_data + inputs: + input_data: [1, 2, 3, 4] +- schema: process_data_without_input_file_generator + inputs: + input_data: [1, 2, 3, 4] + path: input_file.json diff --git a/docs/source/user/tutorials/tutorial_resources/config.yaml b/docs/source/user/tutorials/tutorial_resources/config.yaml new file mode 100644 index 0000000..add93a6 --- /dev/null +++ b/docs/source/user/tutorials/tutorial_resources/config.yaml @@ -0,0 +1,20 @@ +configs: + default: + invocation: + environment_setup: + match: {} + config: + machine: YOUR-MACHINE-NAME + log_file_path: logs/<>_v<>.log + environment_sources: [~/.matflow-new/envs_local.yaml] + task_schema_sources: [] + command_file_sources: [] + parameter_sources: [] + default_scheduler: direct + default_shell: bash + schedulers: + direct: + defaults: {} + shells: + bash: + defaults: {} \ No newline at end of file diff --git a/docs/source/user/tutorials/tutorial_resources/envs_local.yaml b/docs/source/user/tutorials/tutorial_resources/envs_local.yaml new file mode 100644 index 0000000..a842967 --- /dev/null +++ b/docs/source/user/tutorials/tutorial_resources/envs_local.yaml @@ -0,0 +1,14 @@ +name: temp_python_env + # Any setup steps e.g. loading a module, activating a virtual environment can go here + setup: source venv/bin/activate + # There might be multiple executables in your environment + # e.g. python, abaqus, etc + executables: + # It's probably a good idea to stick with `python_script` for any python + # executables for compatiblility with existing tasks which you + # might want to call in your workflow which will expect this label + - label: python_script + instances: + - command: python <> <> + num_cores: 1 + parallel_mode: null \ No newline at end of file diff --git a/docs/source/user/tutorials/tutorial_resources/generate_input_file.py b/docs/source/user/tutorials/tutorial_resources/generate_input_file.py new file mode 100644 index 0000000..e163113 --- /dev/null +++ b/docs/source/user/tutorials/tutorial_resources/generate_input_file.py @@ -0,0 +1,5 @@ +import json +def generate_input_file(path: str, input_data: list): + """Generate an input file""" + with open(path, "w") as f: + json.dump(input_data, f, indent=2) \ No newline at end of file diff --git a/docs/source/user/tutorials/tutorial_resources/greet.py b/docs/source/user/tutorials/tutorial_resources/greet.py new file mode 100644 index 0000000..839fe44 --- /dev/null +++ b/docs/source/user/tutorials/tutorial_resources/greet.py @@ -0,0 +1,3 @@ +def greet(greeting: str, name: str): + """Return a greeting""" + return {"string_to_print": f"{greeting}, {name}!"} \ No newline at end of file diff --git a/docs/source/user/tutorials/tutorial_resources/groups.yaml b/docs/source/user/tutorials/tutorial_resources/groups.yaml new file mode 100644 index 0000000..d924d1e --- /dev/null +++ b/docs/source/user/tutorials/tutorial_resources/groups.yaml @@ -0,0 +1,31 @@ +template_components: + task_schemas: + - objective: s1 + inputs: + - parameter: p1 + outputs: + - parameter: p2 + actions: + - commands: + - command: echo $(( <> + 1 )) # This is printed to stdout + - command: echo $(( <> + 1 )) # This is captured as p2 + stdout: <> + - objective: s2 + inputs: + - parameter: p2 + group: my_group + outputs: + - parameter: p3 + actions: + - commands: + - command: echo <> # This one is printed to stdout + - command: echo $(( <> )) # This is captured as p3 + stdout: <> +tasks: + - schema: s1 + sequences: + - path: inputs.p1 + values: [1, 2] + groups: + - name: my_group + - schema: s2 diff --git a/docs/source/user/tutorials/tutorial_resources/hello.yaml b/docs/source/user/tutorials/tutorial_resources/hello.yaml new file mode 100644 index 0000000..36e5a88 --- /dev/null +++ b/docs/source/user/tutorials/tutorial_resources/hello.yaml @@ -0,0 +1,89 @@ +template_components: + task_schemas: + - objective: greet + inputs: + - parameter: name + default_value: World + - parameter: greeting + default_value: Hello + actions: + - commands: + - command: echo "<>, <>!" > printed_string.txt + + - objective: python_greet + inputs: + - parameter: name + default_value: World + - parameter: greeting + default_value: Hello + outputs: + - parameter: string_to_print + actions: + - script: <> + script_data_in: direct + script_data_out: direct + script_exe: python_script + environments: + - scope: + type: any + environment: python_env + + - objective: print + inputs: + - parameter: string_to_print + actions: + - commands: + - command: echo "<>" > printed_string.txt + + # This schema uses the environment `temp_python_env` + # which loads a python venv. + # This is shown in `envs.yaml` in this repo. + - objective: which_python + actions: + - commands: + - command: which python + environments: + - scope: + type: any + environment: temp_python_env + +# Workflow +tasks: +- schema: greet +- schema: greet + inputs: + greeting: What's up + name: doc +- schema: python_greet + inputs: + greeting: Howdy + name: partner +- schema: print +- schema: print + inputs: + string_to_print: another string to print! +- schema: print + # Explicitly reference output parameter from a task + input_sources: + string_to_print: task.python_greet +- schema: print + input_sources: + # Note that local variable will appear first, regardless of its position in the list + string_to_print: [task.python_greet, local] + inputs: + string_to_print: Yet another string to print! +- schema: which_python +- schema: greet + sequences: + - path: inputs.greeting + values: + - hey + - see ya later + - in a while + nesting_order: 0 + - path: inputs.name + values: + - you + - alligator + - crocodile + nesting_order: 1 diff --git a/docs/source/user/tutorials/tutorial_resources/parse_output.py b/docs/source/user/tutorials/tutorial_resources/parse_output.py new file mode 100644 index 0000000..d973c32 --- /dev/null +++ b/docs/source/user/tutorials/tutorial_resources/parse_output.py @@ -0,0 +1,18 @@ +import json +def parse_output(my_input_file: str, processed_file: str): + """Do some post-processing of data files. + + In this instance, we're just making a dictionary containing both the input + and output data. + """ + with open(my_input_file, "r") as f: + input_data = json.load(f) + with open(processed_file, "r") as f: + processed_data = json.load(f) + + combined_data = {"input_data": input_data, "output_data": processed_data} + # Save file so we can look at the data + with open("parsed_output.json", "w") as f: + json.dump(combined_data, f, indent=2) + + return {"parsed_output": combined_data} \ No newline at end of file diff --git a/docs/source/user/tutorials/tutorial_resources/process_input_file.py b/docs/source/user/tutorials/tutorial_resources/process_input_file.py new file mode 100644 index 0000000..612fd33 --- /dev/null +++ b/docs/source/user/tutorials/tutorial_resources/process_input_file.py @@ -0,0 +1,11 @@ +import json +def process_input_file(): + """Process an input file. + + This could be a materials science simulation for example. + """ + with open("input_file.json", "r") as f: + data = json.load(f) + data = [item * 2 for item in data] + with open("processed_file.json", "w") as f: + json.dump(data, f, indent=2) \ No newline at end of file