Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
/.gradle
/.history
/.idea
*.log
api/src/main/resources/static/
Expand Down
4 changes: 3 additions & 1 deletion deploy/contents/install/app/configure-utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,9 @@ function api_create_pipeline {
read -r -d '' payload <<-EOF
{
"name":"$pipeline_name",
"description":"$pipeline_description" ${folder_parent_key_value}
"description":"$pipeline_description" ${folder_parent_key_value},
"codePath": "src/",
"docsPath": "docs/"
}
EOF

Expand Down
7 changes: 6 additions & 1 deletion deploy/docker/build-dockers.sh
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,8 @@ build_and_push_tool $BASE_TOOLS_DOCKERS_SOURCES_PATH/rockylinux/vanilla "$CP_DIS
build_and_push_tool $BASE_TOOLS_DOCKERS_SOURCES_PATH/cromwell "$CP_DIST_REPO_NAME:tools-base-cromwell-${DOCKERS_VERSION}" "library/cromwell:latest"

# Nextflow
build_and_push_tool $BASE_TOOLS_DOCKERS_SOURCES_PATH/nextflow "$CP_DIST_REPO_NAME:tools-base-nextflow-${DOCKERS_VERSION}" "library/nextflow:latest" --build-arg BASE_IMAGE="library/rockylinux:8.7"
build_and_push_tool $BASE_TOOLS_DOCKERS_SOURCES_PATH/nextflow/latest "$CP_DIST_REPO_NAME:tools-base-nextflow-${DOCKERS_VERSION}" "library/nextflow:latest" --build-arg BASE_IMAGE="library/rockylinux:8.7"
build_and_push_tool $BASE_TOOLS_DOCKERS_SOURCES_PATH/nextflow/nf-25.02.3-edge-rocky8 "$CP_DIST_REPO_NAME:tools-base-nextflow-${DOCKERS_VERSION}" "library/nextflow:25.02.3-edge-rocky8"

# # Snakemake
# build_and_push_tool $BASE_TOOLS_DOCKERS_SOURCES_PATH/snakemake "$CP_DIST_REPO_NAME:tools-base-snakemake-${DOCKERS_VERSION}" "library/snakemake:latest"
Expand Down Expand Up @@ -499,6 +500,10 @@ NGS_TOOLS_DOCKERS_SOURCES_PATH=$DOCKERS_SOURCES_PATH/cp-tools/ngs

RESEARCH_TOOLS_DOCKERS_SOURCES_PATH=$DOCKERS_SOURCES_PATH/cp-tools/research

# Active Learning Docking

build_and_push_tool $RESEARCH_TOOLS_DOCKERS_SOURCES_PATH/al-docking "$CP_DIST_REPO_NAME:tools-research-al-docking-${DOCKERS_VERSION}" "library/al-docking:1.0.0"

### TODO disabled because of error with ubuntu public key
# Spyder with noVNC
#build_and_push_tool $RESEARCH_TOOLS_DOCKERS_SOURCES_PATH/spyder "$CP_DIST_REPO_NAME:tools-research-spyder-novnc-py37-${DOCKERS_VERSION}" "library/spyder-novnc:3.7" --spec "noVNC" --build-arg BASE_IMAGE="$CP_DIST_REPO_NAME:tools-base-ubuntu-novnc-${DOCKERS_VERSION}"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Copyright 2017-2019 EPAM Systems, Inc. (https://www.epam.com/)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

ARG BASE_IMAGE=nvidia/cuda:11.3.1-cudnn8-runtime-rockylinux8
FROM ${BASE_IMAGE}

ARG NXF_VER="25.02.3-edge"

# Common applications
RUN dnf install -y \
ca-certificates \
&& dnf install -y \
which \
nano \
diffutils \
git \
wget curl \
zip unzip bzip2 \
jre-1.8.0-openjdk \
jre-21-openjdk \
graphviz \
python2 \
&& dnf clean all \
&& alternatives --install /usr/bin/java java /usr/lib/jvm/jre-21-openjdk/bin/java 21000 \
&& alternatives --install /usr/bin/java java /usr/lib/jvm/jre-1.8.0-openjdk/bin/java 10800 \
&& alternatives --install /usr/bin/java java /usr/lib/jvm/jre-1.8.0-openjdk/bin/java 10800 \
&& alternatives --set java /usr/lib/jvm/jre-21-openjdk/bin/java \
&& curl https://cloud-pipeline-oss-builds.s3.amazonaws.com/tools/pip/2.7/get-pip.py | python2 -

ENV CONDA_DIR="/opt/conda"
ENV PATH="$CONDA_DIR/bin:$PATH"
RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh && \
bash /tmp/miniconda.sh -b -p $CONDA_DIR && \
rm -f /tmp/miniconda.sh && \
conda update -y -n base -c defaults conda && \
conda clean -afy

ENV PATH="/opt/conda/bin:$PATH"

# Stick to a specific version, as the installation script at get.nextflow.io may be upgraded without a notification
# NXF_VER will be considered in the installation script
ENV NXF_HOME_ROOT="/opt/nextflow"
ENV NXF_HOME="$NXF_HOME_ROOT/$NXF_VER"

ENV CP_CAP_GE_CONSUMABLE_RESOURCE_NAME_RAM="mem_free"
ENV CP_CAP_GE_CONSUMABLE_RESOURCE_CONSUMABLE_TYPE_RAM="JOB"

# Redefine default analysis and input location to /common, which will be shared across nodes. These values can be also changed during startup
ENV ANALYSIS_DIR="/common/analysis"
ENV INPUT_DIR="/common/input"

# Install nextflow itlself into $NXF_HOME
RUN mkdir -p $NXF_HOME && \
cd $NXF_HOME_ROOT && \
curl -fsSL get.nextflow.io | bash && \
mv nextflow "$NXF_HOME/nextflow"

ADD nextflow /usr/bin/nextflow
RUN chmod +x /usr/bin/nextflow

ADD nf-weblog-handler /opt/nf-weblog-handler
RUN chmod +x /opt/nf-weblog-handler/nf-weblog-handler.sh
RUN python2 -m pip install flask

84 changes: 84 additions & 0 deletions deploy/docker/cp-tools/base/nextflow/25.02.3-edge-rocky8/nextflow
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#!/bin/bash

# Copyright 2017-2023 EPAM Systems, Inc. (https://www.epam.com/)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

CP_NF_WORKDIR="${CP_NF_WORKDIR:-${ANALYSIS_DIR}/work}"
CP_NF_TRACE_FILE="${CP_NF_TRACE_FILE_DIR:-$CP_NF_WORKDIR}/trace.txt"
CP_NF_RUNTIME_DATA_SYNC_PERIOD=${CP_NF_RUNTIME_DATA_SYNC_PERIOD:-300}
CP_NF_RUNTIME_DATA_SYNC_BACKOFF_SEC=10

export NXF_WORK="$CP_NF_WORKDIR"
mkdir -p "$NXF_WORK"

_IS_RUN_COMMAND=
for param in "$@"; do
if [ "$param" == "run" ]; then
_IS_RUN_COMMAND="1"
break
fi
done

_ARGS=("$@")

NEXTFLOW_CONFIG="/etc/nextflow/nextflow.config"
mkdir -p $(dirname "$NEXTFLOW_CONFIG")
rm -f "$NEXTFLOW_CONFIG"

if [ "$CP_CAP_SINGULARITY" == "true" ]; then
echo "singularity.enabled = true" >> $NEXTFLOW_CONFIG
elif [ "$CP_CAP_DIND_CONTAINER" == "true" ]; then
echo "docker.enabled = true" >> $NEXTFLOW_CONFIG
if nvidia-smi >/dev/null 2>&1; then
echo "docker.runOptions = '-u \$(id -u):\$(id -g) --privileged --runtime=nvidia'" >> $NEXTFLOW_CONFIG
else
echo "docker.runOptions = '-u \$(id -u):\$(id -g) --privileged'" >> $NEXTFLOW_CONFIG
fi
fi

if [ "$CP_CAP_SGE" == "true" ]; then
echo "process.executor = 'sge'" >> $NEXTFLOW_CONFIG
echo "process.penv = 'local'" >> $NEXTFLOW_CONFIG
echo "process.queue = 'main.q'" >> $NEXTFLOW_CONFIG
echo "process.clusterOptions = '-V'" >> $NEXTFLOW_CONFIG
fi

if [ "$_IS_RUN_COMMAND" == "1" ]; then
if [ "$CP_NF_WEBLOG_HANDLER_ENABLED" == "1" ] && /opt/nf-weblog-handler/nf-weblog-handler.sh --check ; then
echo "Running Nextflow with -with-weblog http://localhost:${CP_NF_WEBLOG_HANDLER_PORT:-8080}/nextflow/event option"
_ARGS+=('-with-weblog')
_ARGS+=("http://localhost:${CP_NF_WEBLOG_HANDLER_PORT:-8080}/nextflow/event")
if [ -n "$CP_NF_WORKDIR" ]; then
echo "trace.enabled = true" >> $NEXTFLOW_CONFIG
echo "trace.raw = true" >> $NEXTFLOW_CONFIG
echo "trace.file = '${CP_NF_TRACE_FILE}'" >> $NEXTFLOW_CONFIG
echo "trace.fields = 'task_id,hash,native_id,process,tag,name,status,exit,module,container,cpus,time,disk,memory,attempt,submit,start,complete,duration,realtime,queue,%cpu,%mem,rss,vmem,peak_rss,peak_vmem,rchar,wchar,syscr,syscw,read_bytes,write_bytes'" >> $NEXTFLOW_CONFIG
fi
fi
fi

if [ -f "$NEXTFLOW_CONFIG" ]; then
_ARGS+=('-c')
_ARGS+=("$NEXTFLOW_CONFIG")
fi

$NXF_HOME/nextflow "${_ARGS[@]}"
_result_code="$?"

if [ "$_IS_RUN_COMMAND" == "1" ]; then
/opt/nf-weblog-handler/nf-weblog-handler.sh --wait-runtime-data -tf "$CP_NF_TRACE_FILE"
fi

exit $_result_code

25 changes: 25 additions & 0 deletions deploy/docker/cp-tools/base/nextflow/latest/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
![Nextflow logo](https://raw.githubusercontent.com/nextflow-io/trademark/master/nextflow2014_no-bg.png)


References
=========

https://www.nextflow.io/

https://github.com/nextflow-io/nextflow

https://www.nextflow.io/docs/latest/getstarted.html


Rationale
=========

With the rise of big data, techniques to analyse and run experiments on large datasets are increasingly necessary.

Parallelization and distributed computing are the best ways to tackle this problem, but the tools commonly available to the bioinformatics community often lack good support for these techniques, or provide a model that fits badly with the specific requirements in the bioinformatics domain and, most of the time, require the knowledge of complex tools or low-level APIs.

Nextflow framework is based on the dataflow programming model, which greatly simplifies writing parallel and distributed pipelines without adding unnecessary complexity and letting you concentrate on the flow of data, i.e. the functional logic of the application/algorithm.

It doesn't aim to be another pipeline scripting language yet, but it is built around the idea that the Linux platform is the *lingua franca* of data science, since it provides many simple command line and scripting tools, which by themselves are powerful, but when chained together facilitate complex data manipulations.

In practice, this means that a Nextflow script is defined by composing many different processes. Each process can execute a given bioinformatics tool or scripting language, to which is added the ability to coordinate and synchronize the processes execution by simply specifying their inputs and outputs.
40 changes: 40 additions & 0 deletions deploy/docker/cp-tools/base/nextflow/latest/howto.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# How to setup

## Description
This Docker includes additional component `nf-weblog-handler`.
This `handler` can utilize `nextflow` feature `-with-weblog` to redirect events to Cloud-Pipeline `run/{runId}/engine/tasks` API

## How to use

### Manually
To manually configure `nf-weblog-handler`. The following steps should be done:
1. Start `nf-weblog-handler` with: `/opt/nf-weblog-handler/nf-weblog-handler.sh --start -p <port [default: 8080]>`
2. Run nextflow with `-with-weblog http://localhost:<port>/nextflow/event` parameter:
```
nextflow run <path-to-nf-file> -with-weblog http://localhost:8080/nextflow/event
```
3. Now `nf-weblog-handler` should send event to the Cloud-Pipeline API. And you should be able to see expanded nextflow statistic for the run.

### Configure Cloud-Pipeline custom capamility

It is also possible to configure this image to use this functionality automatically:

1. Configure new custom capability in `launch.capabilities` system preference, and make it visible:
```
"NF_EVENT_HANDLER": {
"description": "Enables Nextflow WebLog event handler.",
"commands": [
"[ -f /opt/nf-weblog-handler/nf-weblog-handler.sh ] && /opt/nf-weblog-handler/nf-weblog-handler.sh --start --enable-runtime-data -p $CP_NF_WEBLOG_HANDLER_PORT || exit 1"
],
"params": {
"CP_SYNC_TO_STORAGE_BATCH_MODE": "1",
"CP_RUN_ENGINE_TYPE": "NEXTFLOW",
"CP_NF_WEBLOG_HANDLER_ENABLED": "1",
"CP_NF_WEBLOG_HANDLER_PORT": "8080",
"CP_NF_WEBLOG_HANDLER_LOG_FILE": "/var/log/nf_weblog_handler.log"
}
}
```

2. For the particular tool and pipeline, configure to use this `NF_EVENT_HANDLER` capability.
3. Simply run your workload and `nextflow` wrapper will automatically start using it.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Nextflow weblog handler

This python web application allows to listen nextflow weblog plugin events: https://github.com/nextflow-io/nf-weblog
Which can be enabled by running nextflow with parameter `-with-weblog <url>`.
Where `<url>` is an address of the endpoint where nextflow will send all such events.

### How to use

[nf-weblog-handler.sh](nf-weblog-handler.sh) - Starter script to launch this hanler

1. Run the script to enable Nextflow event handler
```commandline
bash nf-weblog-handler.sh --start --port 8080
```
This command will start the application on port 8080. And events can be submitted to `<app-host>:8080/nextflow/event`

2. Start nextflow run with an additional parameter `-with-weblog <app-host>:8080/nextflow/event`
3. Event handler will consume, batch, and redirect all event to the Cloud-Pipeline `/run/{runId}/engine/tasks` REST API
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Copyright 2025 EPAM Systems, Inc. (https://www.epam.com/)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging

from flask import Flask, request

from app.cp_api_client import CloudPipelineApi
from app.nextflow_event_handler import NextflowEventHandler
from app.util import parse

app = Flask(__name__)
logger = app.logger

API = parse.get_required_env("API")
RUN_ID = parse.get_required_env("RUN_ID")
APP_PORT = int(parse.get_required_env("CP_NF_WEBLOG_HANDLER_PORT", 8080))
SYNC_BATCH_SIZE = int(parse.get_required_env("CP_NF_WEBLOG_HANDLER_SYNC_BATCH_SIZE", 10))
SYNC_BATCH_TIMEOUT = int(parse.get_required_env("CP_NF_WEBLOG_HANDLER_SYNC_BATCH_TIMEOUT", 60))
VERBOSE = int(parse.get_required_env("CP_NF_WEBLOG_HANDLER_VERBOSE", 0))

logging_level = logging.INFO
if VERBOSE == 1:
logging_level=logging.DEBUG
logger.setLevel(logging_level)

api_client = CloudPipelineApi(API, RUN_ID, logger)
event_handler = NextflowEventHandler(logger, api_client, RUN_ID, SYNC_BATCH_SIZE, SYNC_BATCH_TIMEOUT)
event_handler.enable_sync()

@app.route('/nextflow/event', methods=['POST'])
def handle_events():
try:
data = request.get_json()
event_handler.put_event(data)
except Exception as e:
logger.error("Can't process event", e)
return "True"

@app.route('/nextflow/event/tracefile', methods=['GET'])
def handle_tracefile():
trace_file_path = request.args.get('path')
if trace_file_path:
event_handler.sync_events_from_trace_file(trace_file_path, 5)
return "True"


@app.route('/nextflow/event/flush', methods=['POST'])
def flush_events():
try:
event_handler.flush_events()
except Exception as e:
logger.error("Can't flush events", e)
return "True"

if __name__ == '__main__':
app.run(host='0.0.0.0', port=APP_PORT)
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright 2025 EPAM Systems, Inc. (https://www.epam.com/)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Loading