From 37802c092cc7155165f12355158dfcec3824d7a9 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 17 Dec 2024 12:16:49 -0500 Subject: [PATCH 01/18] refactor: simplify cluster profile names, convert cluster from yaml to json --- charlie | 4 +- config/biowulf/cluster.yaml | 94 ++++++++++++++ .../cluster_status.sh | 0 config/{slurm-biowulf => biowulf}/config.yaml | 2 +- config/fnlcr/cluster.yaml | 93 ++++++++++++++ .../{slurm-fnlcr => fnlcr}/cluster_status.sh | 0 config/{slurm-fnlcr => fnlcr}/config.yaml | 2 +- config/slurm-biowulf/cluster.json | 120 ------------------ config/slurm-fnlcr/cluster.json | 119 ----------------- config/unknown/cluster.json | 119 ----------------- 10 files changed, 191 insertions(+), 362 deletions(-) create mode 100644 config/biowulf/cluster.yaml rename config/{slurm-biowulf => biowulf}/cluster_status.sh (100%) rename config/{slurm-biowulf => biowulf}/config.yaml (93%) create mode 100644 config/fnlcr/cluster.yaml rename config/{slurm-fnlcr => fnlcr}/cluster_status.sh (100%) rename config/{slurm-fnlcr => fnlcr}/config.yaml (93%) delete mode 100644 config/slurm-biowulf/cluster.json delete mode 100644 config/slurm-fnlcr/cluster.json delete mode 100644 config/unknown/cluster.json diff --git a/charlie b/charlie index 1c91cc1..fb0b2be 100755 --- a/charlie +++ b/charlie @@ -62,7 +62,7 @@ TEMP_DIR="" REFS_DIR="" CLUSTER_PROFILE="config/unknown" if [ "$PLATFORM" == "biowulf" ]; then - CLUSTER_PROFILE="config/slurm-biowulf" + CLUSTER_PROFILE="config/biowulf" PARTITION="ccr,$PARTITION" EXTRA_SINGULARITY_BINDS="/lscratch" CONDA_ACTIVATE='. "/data/CCBR_Pipeliner/db/PipeDB/Conda/etc/profile.d/conda.sh" && conda activate py311' @@ -70,7 +70,7 @@ if [ "$PLATFORM" == "biowulf" ]; then TEMP_DIR='/lscratch/$SLURM_JOB_ID/' REFS_DIR="/gpfs/gsfs10/users/CCBR_Pipeliner/db/PipeDB/charlie/fastas_gtfs/" elif [ "$PLATFORM" == "fnlcr" ]; then - CLUSTER_PROFILE="config/slurm-fnlcr" + CLUSTER_PROFILE="config/fnlcr" EXTRA_SINGULARITY_BINDS="/scratch/local" CONDA_ACTIVATE=". '/mnt/projects/CCBR-Pipelines/resources/miniconda3/etc/profile.d/conda.sh' && conda activate py311" # make sure spooker is in the path diff --git a/config/biowulf/cluster.yaml b/config/biowulf/cluster.yaml new file mode 100644 index 0000000..d3c197e --- /dev/null +++ b/config/biowulf/cluster.yaml @@ -0,0 +1,94 @@ +__default__: + gres: lscratch:256 + mem: 40g + partition: ccr,norm + threads: "2" + time: 4:00:00 + name: "{rule}.{wildcards}" + output: logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.out + error: logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.err +cutadapt: + mem: 120g + threads: "56" + time: 6:00:00 +dcc: + mem: 120g + threads: "4" + time: 4:00:00 +find_circ_align: + mem: 120g + threads: "56" + time: 6:00:00 +find_circ: + mem: 120g + threads: "56" + time: 6:00:00 +mapsplice: + mem: 200g + threads: "56" + time: 48:00:00 +mapsplice_postprocess: + mem: 120g + threads: "4" + time: 4:00:00 +nclscan: + mem: 512g + threads: "56" + time: 4:00:00 + partition: largemem +fastqc: + mem: 40g + threads: "4" + time: 4:00:00 +ciri: + mem: 512g + threads: "56" + time: 4:00:00 + partition: largemem +filter_ciri_bam_for_BSJs: + mem: 512g + threads: "4" + time: 24:00:00 + partition: largemem +create_index: + mem: 200g + threads: "56" + time: 12:00:00 +star1p: + mem: 200g + threads: "56" + time: 6:00:00 +star2p: + mem: 200g + threads: "56" + time: 6:00:00 +star_circrnafinder: + mem: 200g + threads: "56" + time: 6:00:00 +estimate_duplication: + mem: 200g + threads: "4" + time: 4:00:00 +create_circExplorer_BSJ_bam: + mem: 120g + threads: "4" + time: 4:00:00 +create_circExplorer_linear_spliced_bams: + mem: 120g + threads: "56" + time: 8:00:00 +clear: + time: 1:00:00 +split_splice_reads_BAM_create_BW: + mem: 120g + time: 24:00:00 +split_linear_reads_BAM_create_BW: + mem: 120g + time: 24:00:00 +alignment_stats: + time: 1:00:00 +merge_per_sample: + time: 1:00:00 +merge_SJ_tabs: + time: 1:00:00 diff --git a/config/slurm-biowulf/cluster_status.sh b/config/biowulf/cluster_status.sh similarity index 100% rename from config/slurm-biowulf/cluster_status.sh rename to config/biowulf/cluster_status.sh diff --git a/config/slurm-biowulf/config.yaml b/config/biowulf/config.yaml similarity index 93% rename from config/slurm-biowulf/config.yaml rename to config/biowulf/config.yaml index 0697a63..efce53f 100644 --- a/config/slurm-biowulf/config.yaml +++ b/config/biowulf/config.yaml @@ -8,7 +8,7 @@ cluster: sbatch --output {cluster.output} --error {cluster.error} --gres {cluster.gres} -cluster-config: "cluster.json" +cluster-config: "cluster.yaml" cluster-status: "cluster_status.sh" jobs: 499 immediate-submit: false diff --git a/config/fnlcr/cluster.yaml b/config/fnlcr/cluster.yaml new file mode 100644 index 0000000..428f49f --- /dev/null +++ b/config/fnlcr/cluster.yaml @@ -0,0 +1,93 @@ +__default__: + mem: 40g + partition: norm + threads: "2" + time: 4:00:00 + name: "{rule}.{wildcards}" + output: logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.out + error: logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.err +cutadapt: + mem: 120g + threads: "32" + time: 6:00:00 +dcc: + mem: 120g + threads: "4" + time: 4:00:00 +find_circ_align: + mem: 120g + threads: "32" + time: 6:00:00 +find_circ: + mem: 120g + threads: "32" + time: 6:00:00 +mapsplice: + mem: 200g + threads: "32" + time: 48:00:00 +mapsplice_postprocess: + mem: 120g + threads: "4" + time: 4:00:00 +nclscan: + mem: 512g + threads: "32" + time: 4:00:00 + partition: largemem +fastqc: + mem: 40g + threads: "4" + time: 4:00:00 +ciri: + mem: 512g + threads: "32" + time: 4:00:00 + partition: largemem +filter_ciri_bam_for_BSJs: + mem: 512g + threads: "4" + time: 24:00:00 + partition: largemem +create_index: + mem: 200g + threads: "32" + time: 12:00:00 +star1p: + mem: 200g + threads: "32" + time: 6:00:00 +star2p: + mem: 200g + threads: "32" + time: 6:00:00 +star_circrnafinder: + mem: 200g + threads: "32" + time: 6:00:00 +estimate_duplication: + mem: 200g + threads: "4" + time: 4:00:00 +create_circExplorer_BSJ_bam: + mem: 120g + threads: "4" + time: 4:00:00 +create_circExplorer_linear_spliced_bams: + mem: 120g + threads: "32" + time: 8:00:00 +clear: + time: 1:00:00 +split_splice_reads_BAM_create_BW: + mem: 120g + time: 24:00:00 +split_linear_reads_BAM_create_BW: + mem: 120g + time: 24:00:00 +alignment_stats: + time: 1:00:00 +merge_per_sample: + time: 1:00:00 +merge_SJ_tabs: + time: 1:00:00 diff --git a/config/slurm-fnlcr/cluster_status.sh b/config/fnlcr/cluster_status.sh similarity index 100% rename from config/slurm-fnlcr/cluster_status.sh rename to config/fnlcr/cluster_status.sh diff --git a/config/slurm-fnlcr/config.yaml b/config/fnlcr/config.yaml similarity index 93% rename from config/slurm-fnlcr/config.yaml rename to config/fnlcr/config.yaml index 8fa374f..6f7e685 100644 --- a/config/slurm-fnlcr/config.yaml +++ b/config/fnlcr/config.yaml @@ -7,7 +7,7 @@ cluster: sbatch --job-name {cluster.name} --output {cluster.output} --error {cluster.error} -cluster-config: "cluster.json" +cluster-config: "cluster.yaml" cluster-status: "cluster_status.sh" jobs: 499 immediate-submit: false diff --git a/config/slurm-biowulf/cluster.json b/config/slurm-biowulf/cluster.json deleted file mode 100644 index 028a2e2..0000000 --- a/config/slurm-biowulf/cluster.json +++ /dev/null @@ -1,120 +0,0 @@ -{ - "__default__": { - "gres": "lscratch:256", - "mem": "40g", - "partition": "ccr,norm", - "threads": "2", - "time": "4:00:00", - "name": "{rule}.{wildcards}", - "output": "logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.out", - "error": "logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.err" - }, - "cutadapt": { - "mem": "120g", - "threads": "56", - "time": "6:00:00" - }, - "dcc": { - "mem": "120g", - "threads": "4", - "time": "4:00:00" - }, - "find_circ_align": { - "mem": "120g", - "threads": "56", - "time": "6:00:00" - }, - "find_circ": { - "mem": "120g", - "threads": "56", - "time": "6:00:00" - }, - "mapsplice": { - "mem": "200g", - "threads": "56", - "time": "48:00:00" - }, - "mapsplice_postprocess": { - "mem": "120g", - "threads": "4", - "time": "4:00:00" - }, - "nclscan": { - "mem": "512g", - "threads": "56", - "time": "4:00:00", - "partition": "largemem" - }, - "fastqc": { - "mem": "40g", - "threads": "4", - "time": "4:00:00" - }, - "ciri": { - "mem": "512g", - "threads": "56", - "time": "4:00:00", - "partition": "largemem" - }, - "filter_ciri_bam_for_BSJs": { - "mem": "512g", - "threads": "4", - "time": "24:00:00", - "partition": "largemem" - }, - "create_index": { - "mem": "200g", - "threads": "56", - "time": "12:00:00" - }, - "star1p": { - "mem": "200g", - "threads": "56", - "time": "6:00:00" - }, - "star2p": { - "mem": "200g", - "threads": "56", - "time": "6:00:00" - }, - "star_circrnafinder": { - "mem": "200g", - "threads": "56", - "time": "6:00:00" - }, - "estimate_duplication": { - "mem": "200g", - "threads": "4", - "time": "4:00:00" - }, - "create_circExplorer_BSJ_bam": { - "mem": "120g", - "threads": "4", - "time": "4:00:00" - }, - "create_circExplorer_linear_spliced_bams": { - "mem": "120g", - "threads": "56", - "time": "8:00:00" - }, - "clear": { - "time": "1:00:00" - }, - "split_splice_reads_BAM_create_BW": { - "mem": "120g", - "time": "24:00:00" - }, - "split_linear_reads_BAM_create_BW": { - "mem": "120g", - "time": "24:00:00" - }, - "alignment_stats": { - "time": "1:00:00" - }, - "merge_per_sample": { - "time": "1:00:00" - }, - "merge_SJ_tabs": { - "time": "1:00:00" - } -} diff --git a/config/slurm-fnlcr/cluster.json b/config/slurm-fnlcr/cluster.json deleted file mode 100644 index fbc50f9..0000000 --- a/config/slurm-fnlcr/cluster.json +++ /dev/null @@ -1,119 +0,0 @@ -{ - "__default__": { - "mem": "40g", - "partition": "norm", - "threads": "2", - "time": "4:00:00", - "name": "{rule}.{wildcards}", - "output": "logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.out", - "error": "logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.err" - }, - "cutadapt": { - "mem": "120g", - "threads": "32", - "time": "6:00:00" - }, - "dcc": { - "mem": "120g", - "threads": "4", - "time": "4:00:00" - }, - "find_circ_align": { - "mem": "120g", - "threads": "32", - "time": "6:00:00" - }, - "find_circ": { - "mem": "120g", - "threads": "32", - "time": "6:00:00" - }, - "mapsplice": { - "mem": "200g", - "threads": "32", - "time": "48:00:00" - }, - "mapsplice_postprocess": { - "mem": "120g", - "threads": "4", - "time": "4:00:00" - }, - "nclscan": { - "mem": "512g", - "threads": "32", - "time": "4:00:00", - "partition": "largemem" - }, - "fastqc": { - "mem": "40g", - "threads": "4", - "time": "4:00:00" - }, - "ciri": { - "mem": "512g", - "threads": "32", - "time": "4:00:00", - "partition": "largemem" - }, - "filter_ciri_bam_for_BSJs": { - "mem": "512g", - "threads": "4", - "time": "24:00:00", - "partition": "largemem" - }, - "create_index": { - "mem": "200g", - "threads": "32", - "time": "12:00:00" - }, - "star1p": { - "mem": "200g", - "threads": "32", - "time": "6:00:00" - }, - "star2p": { - "mem": "200g", - "threads": "32", - "time": "6:00:00" - }, - "star_circrnafinder": { - "mem": "200g", - "threads": "32", - "time": "6:00:00" - }, - "estimate_duplication": { - "mem": "200g", - "threads": "4", - "time": "4:00:00" - }, - "create_circExplorer_BSJ_bam": { - "mem": "120g", - "threads": "4", - "time": "4:00:00" - }, - "create_circExplorer_linear_spliced_bams": { - "mem": "120g", - "threads": "32", - "time": "8:00:00" - }, - "clear": { - "time": "1:00:00" - }, - "split_splice_reads_BAM_create_BW": { - "mem": "120g", - "time": "24:00:00" - }, - "split_linear_reads_BAM_create_BW": { - "mem": "120g", - "time": "24:00:00" - }, - "alignment_stats": { - "time": "1:00:00" - }, - "merge_per_sample": { - "time": "1:00:00" - }, - "merge_SJ_tabs": { - "time": "1:00:00" - } -} diff --git a/config/unknown/cluster.json b/config/unknown/cluster.json deleted file mode 100644 index fbc50f9..0000000 --- a/config/unknown/cluster.json +++ /dev/null @@ -1,119 +0,0 @@ -{ - "__default__": { - "mem": "40g", - "partition": "norm", - "threads": "2", - "time": "4:00:00", - "name": "{rule}.{wildcards}", - "output": "logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.out", - "error": "logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.err" - }, - "cutadapt": { - "mem": "120g", - "threads": "32", - "time": "6:00:00" - }, - "dcc": { - "mem": "120g", - "threads": "4", - "time": "4:00:00" - }, - "find_circ_align": { - "mem": "120g", - "threads": "32", - "time": "6:00:00" - }, - "find_circ": { - "mem": "120g", - "threads": "32", - "time": "6:00:00" - }, - "mapsplice": { - "mem": "200g", - "threads": "32", - "time": "48:00:00" - }, - "mapsplice_postprocess": { - "mem": "120g", - "threads": "4", - "time": "4:00:00" - }, - "nclscan": { - "mem": "512g", - "threads": "32", - "time": "4:00:00", - "partition": "largemem" - }, - "fastqc": { - "mem": "40g", - "threads": "4", - "time": "4:00:00" - }, - "ciri": { - "mem": "512g", - "threads": "32", - "time": "4:00:00", - "partition": "largemem" - }, - "filter_ciri_bam_for_BSJs": { - "mem": "512g", - "threads": "4", - "time": "24:00:00", - "partition": "largemem" - }, - "create_index": { - "mem": "200g", - "threads": "32", - "time": "12:00:00" - }, - "star1p": { - "mem": "200g", - "threads": "32", - "time": "6:00:00" - }, - "star2p": { - "mem": "200g", - "threads": "32", - "time": "6:00:00" - }, - "star_circrnafinder": { - "mem": "200g", - "threads": "32", - "time": "6:00:00" - }, - "estimate_duplication": { - "mem": "200g", - "threads": "4", - "time": "4:00:00" - }, - "create_circExplorer_BSJ_bam": { - "mem": "120g", - "threads": "4", - "time": "4:00:00" - }, - "create_circExplorer_linear_spliced_bams": { - "mem": "120g", - "threads": "32", - "time": "8:00:00" - }, - "clear": { - "time": "1:00:00" - }, - "split_splice_reads_BAM_create_BW": { - "mem": "120g", - "time": "24:00:00" - }, - "split_linear_reads_BAM_create_BW": { - "mem": "120g", - "time": "24:00:00" - }, - "alignment_stats": { - "time": "1:00:00" - }, - "merge_per_sample": { - "time": "1:00:00" - }, - "merge_SJ_tabs": { - "time": "1:00:00" - } -} From 06f663c7694c3ce00cc784cda1f1485148f23109 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 17 Dec 2024 12:42:14 -0500 Subject: [PATCH 02/18] feat: adapt grid engine profile from https://github.com/Snakemake-Profiles/sge --- config/eddie/cluster.yaml | 93 ++++++++++++++++++++++++++++++++++++++ config/eddie/config.yaml | 26 +++++++++++ config/eddie/sge-cancel.py | 8 ++++ config/eddie/sge-status.py | 69 ++++++++++++++++++++++++++++ 4 files changed, 196 insertions(+) create mode 100644 config/eddie/cluster.yaml create mode 100644 config/eddie/config.yaml create mode 100644 config/eddie/sge-cancel.py create mode 100644 config/eddie/sge-status.py diff --git a/config/eddie/cluster.yaml b/config/eddie/cluster.yaml new file mode 100644 index 0000000..995ad93 --- /dev/null +++ b/config/eddie/cluster.yaml @@ -0,0 +1,93 @@ +__default__: + output: "logs/{rule}.{wildcards}.$JOB_ID.out" + error: "logs/{rule}.{wildcards}.$JOB_ID.err" + mem: 40g + threads: 2 + time: 4:00:00 + name: "{rule}.{wildcards}" + +cutadapt: + mem: 120g + threads: 56 + time: 6:00:00 +dcc: + mem: 120g + threads: 4 + time: 4:00:00 +find_circ_align: + mem: 120g + threads: 56 + time: 6:00:00 +find_circ: + mem: 120g + threads: 56 + time: 6:00:00 +mapsplice: + mem: 200g + threads: 56 + time: 48:00:00 +mapsplice_postprocess: + mem: 120g + threads: 4 + time: 4:00:00 +nclscan: + mem: 512g + threads: 56 + time: 4:00:00 + partition: largemem +fastqc: + mem: 40g + threads: 4 + time: 4:00:00 +ciri: + mem: 512g + threads: 56 + time: 4:00:00 + partition: largemem +filter_ciri_bam_for_BSJs: + mem: 512g + threads: 4 + time: 24:00:00 + partition: largemem +create_index: + mem: 200g + threads: 56 + time: 12:00:00 +star1p: + mem: 200g + threads: 56 + time: 6:00:00 +star2p: + mem: 200g + threads: 56 + time: 6:00:00 +star_circrnafinder: + mem: 200g + threads: 56 + time: 6:00:00 +estimate_duplication: + mem: 200g + threads: 4 + time: 4:00:00 +create_circExplorer_BSJ_bam: + mem: 120g + threads: 4 + time: 4:00:00 +create_circExplorer_linear_spliced_bams: + mem: 120g + threads: 56 + time: 8:00:00 +clear: + time: 1:00:00 +split_splice_reads_BAM_create_BW: + mem: 120g + time: 24:00:00 +split_linear_reads_BAM_create_BW: + mem: 120g + time: 24:00:00 +alignment_stats: + time: 1:00:00 +merge_per_sample: + time: 1:00:00 +merge_SJ_tabs: + time: 1:00:00 diff --git a/config/eddie/config.yaml b/config/eddie/config.yaml new file mode 100644 index 0000000..7718c71 --- /dev/null +++ b/config/eddie/config.yaml @@ -0,0 +1,26 @@ +cluster: qsub + -cwd + -l h_rt={cluster.time} + -l h_vmem={cluster.mem} + -pe sharedmem {cluster.threads} + -N {cluster.name} + -o {cluster.output} + -e {cluster.error} +cluster-config: "cluster.yaml" +cluster-status: "sge-status.py" +cluster-cancel: "sge-cancel.py" +cluster-cancel-nargs: 20 +max-jobs-per-second: 1 +max-status-checks-per-second: 1 +latency-wait: 60 +local-cores: 1 +jobs: 499 +immediate-submit: false +verbose: true +notemp: true +printshellcmds: true +use-singularity: true +rerun-incomplete: true +rerun-triggers: mtime +retries: 2 +keep-going: true diff --git a/config/eddie/sge-cancel.py b/config/eddie/sge-cancel.py new file mode 100644 index 0000000..54f006a --- /dev/null +++ b/config/eddie/sge-cancel.py @@ -0,0 +1,8 @@ +#!/usr/bin/env python3 +import subprocess as sp +import shlex +import sys + +jobid_list = ", ".join(sys.argv[1:]) + +sp.check_call(shlex.split(f"qdel {jobid_list}")) diff --git a/config/eddie/sge-status.py b/config/eddie/sge-status.py new file mode 100644 index 0000000..3b25265 --- /dev/null +++ b/config/eddie/sge-status.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 +import re +import subprocess as sp +import shlex +import sys +import time +import logging + +logger = logging.getLogger("__name__") +logger.setLevel(40) + +STATUS_ATTEMPTS = 20 + +jobid = int(sys.argv[1]) +job_status = "running" + +# WARNING this currently has no support for task array jobs + +for i in range(STATUS_ATTEMPTS): + # first try qstat to see if job is running + # we can use `qstat -s pr -u "*"` to check for all running and pending jobs + try: + qstat_res = sp.check_output(shlex.split(f"qstat -s pr")).decode().strip() + + # skip the header using [2:] + res = {int(x.split()[0]): x.split()[4] for x in qstat_res.splitlines()[2:]} + + # job is in an unspecified error state + if "E" in res[jobid]: + job_status = "failed" + break + + job_status = "running" + break + + except sp.CalledProcessError as e: + logger.error("qstat process error") + logger.error(e) + except KeyError as e: + # if the job has finished it won't appear in qstat and we should check qacct + # this will also provide the exit status (0 on success, 128 + exit_status on fail) + # Try getting job with scontrol instead in case sacct is misconfigured + try: + qacct_res = sp.check_output(shlex.split(f"qacct -j {jobid}")) + + exit_code = int( + re.search("exit_status ([0-9]+)", qacct_res.decode()).group(1) + ) + + if exit_code == 0: + job_status = "success" + break + + if exit_code != 0: + job_status = "failed" + break + + except sp.CalledProcessError as e: + logger.warning("qacct process error") + logger.warning(e) + if i >= STATUS_ATTEMPTS - 1: + job_status = "failed" + break + else: + # qacct can be quite slow to update on large servers + time.sleep(5) + pass + +print(job_status) From 9ab541b6219c6b0b43156c24a24d880c195635d1 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 17 Dec 2024 12:43:17 -0500 Subject: [PATCH 03/18] style: fix yaml quotes --- config/biowulf/cluster.yaml | 40 ++++++++++++++++++------------------- config/fnlcr/cluster.yaml | 40 ++++++++++++++++++------------------- 2 files changed, 40 insertions(+), 40 deletions(-) diff --git a/config/biowulf/cluster.yaml b/config/biowulf/cluster.yaml index d3c197e..945419d 100644 --- a/config/biowulf/cluster.yaml +++ b/config/biowulf/cluster.yaml @@ -2,81 +2,81 @@ __default__: gres: lscratch:256 mem: 40g partition: ccr,norm - threads: "2" + threads: 2 time: 4:00:00 name: "{rule}.{wildcards}" - output: logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.out - error: logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.err + output: "logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.out" + error: "logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.err" cutadapt: mem: 120g - threads: "56" + threads: 56 time: 6:00:00 dcc: mem: 120g - threads: "4" + threads: 4 time: 4:00:00 find_circ_align: mem: 120g - threads: "56" + threads: 56 time: 6:00:00 find_circ: mem: 120g - threads: "56" + threads: 56 time: 6:00:00 mapsplice: mem: 200g - threads: "56" + threads: 56 time: 48:00:00 mapsplice_postprocess: mem: 120g - threads: "4" + threads: 4 time: 4:00:00 nclscan: mem: 512g - threads: "56" + threads: 56 time: 4:00:00 partition: largemem fastqc: mem: 40g - threads: "4" + threads: 4 time: 4:00:00 ciri: mem: 512g - threads: "56" + threads: 56 time: 4:00:00 partition: largemem filter_ciri_bam_for_BSJs: mem: 512g - threads: "4" + threads: 4 time: 24:00:00 partition: largemem create_index: mem: 200g - threads: "56" + threads: 56 time: 12:00:00 star1p: mem: 200g - threads: "56" + threads: 56 time: 6:00:00 star2p: mem: 200g - threads: "56" + threads: 56 time: 6:00:00 star_circrnafinder: mem: 200g - threads: "56" + threads: 56 time: 6:00:00 estimate_duplication: mem: 200g - threads: "4" + threads: 4 time: 4:00:00 create_circExplorer_BSJ_bam: mem: 120g - threads: "4" + threads: 4 time: 4:00:00 create_circExplorer_linear_spliced_bams: mem: 120g - threads: "56" + threads: 56 time: 8:00:00 clear: time: 1:00:00 diff --git a/config/fnlcr/cluster.yaml b/config/fnlcr/cluster.yaml index 428f49f..c1e0fb6 100644 --- a/config/fnlcr/cluster.yaml +++ b/config/fnlcr/cluster.yaml @@ -1,81 +1,81 @@ __default__: mem: 40g partition: norm - threads: "2" + threads: 2 time: 4:00:00 name: "{rule}.{wildcards}" - output: logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.out - error: logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.err + output: "logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.out" + error: "logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.err" cutadapt: mem: 120g - threads: "32" + threads: 32 time: 6:00:00 dcc: mem: 120g - threads: "4" + threads: 4 time: 4:00:00 find_circ_align: mem: 120g - threads: "32" + threads: 32 time: 6:00:00 find_circ: mem: 120g - threads: "32" + threads: 32 time: 6:00:00 mapsplice: mem: 200g - threads: "32" + threads: 32 time: 48:00:00 mapsplice_postprocess: mem: 120g - threads: "4" + threads: 4 time: 4:00:00 nclscan: mem: 512g - threads: "32" + threads: 32 time: 4:00:00 partition: largemem fastqc: mem: 40g - threads: "4" + threads: 4 time: 4:00:00 ciri: mem: 512g - threads: "32" + threads: 32 time: 4:00:00 partition: largemem filter_ciri_bam_for_BSJs: mem: 512g - threads: "4" + threads: 4 time: 24:00:00 partition: largemem create_index: mem: 200g - threads: "32" + threads: 32 time: 12:00:00 star1p: mem: 200g - threads: "32" + threads: 32 time: 6:00:00 star2p: mem: 200g - threads: "32" + threads: 32 time: 6:00:00 star_circrnafinder: mem: 200g - threads: "32" + threads: 32 time: 6:00:00 estimate_duplication: mem: 200g - threads: "4" + threads: 4 time: 4:00:00 create_circExplorer_BSJ_bam: mem: 120g - threads: "4" + threads: 4 time: 4:00:00 create_circExplorer_linear_spliced_bams: mem: 120g - threads: "32" + threads: 32 time: 8:00:00 clear: time: 1:00:00 From d883661891c41a145f6c8234c19686e4af83ae60 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 17 Dec 2024 14:37:23 -0500 Subject: [PATCH 04/18] feat: add --platform & -m=runqsub to support eddie also refactor templating logic --- README.md | 1 + charlie | 250 +++++++++++++----------------- config/biowulf/submit_script.sh | 35 +++++ config/config.yaml | 31 ++-- config/eddie/submit_script.sh | 35 +++++ config/fnlcr/submit_script.sh | 35 +++++ resources/NCLscan.config.template | 15 +- 7 files changed, 238 insertions(+), 164 deletions(-) create mode 100644 config/biowulf/submit_script.sh create mode 100644 config/eddie/submit_script.sh create mode 100644 config/fnlcr/submit_script.sh diff --git a/README.md b/README.md index b1867ba..343da1d 100644 --- a/README.md +++ b/README.md @@ -181,6 +181,7 @@ Optional Arguments: --viruses|-v : supply comma-separated list of viruses at command line (--runmode=init only) --manifest|-s : absolute path to samples.tsv. This will be copied to output folder (--runmode=init only) --changegrp|-z : change group to "Ziegelbauer_lab" before running anything. Biowulf-only. Useful for correctly setting permissions. +--platform : set the HPC platform (biowulf, fnlcr, eddie). If not set, CHARLIE will try to detect the platform with scontrol. --help|-h : print this help diff --git a/charlie b/charlie index fb0b2be..d0e5186 100755 --- a/charlie +++ b/charlie @@ -5,12 +5,7 @@ # CHARLIE set -eo pipefail -## TODO module statements can only run on biowulf -# decide trigger -trigger="mtime" -# trigger="input" -# trigger="code" ########################################################################################## # functions @@ -36,58 +31,16 @@ function get_platform() { ########################################################################################## # initial setup ########################################################################################## - +# set defaults for global variables # set PIPELINE_HOME PIPELINE_HOME=$(readlink -f $(dirname "$0")) -# set snakefile -SNAKEFILE="${PIPELINE_HOME}/workflow/Snakefile" # get github commit tag GIT_COMMIT_TAG=$(get_git_commitid_tag $PIPELINE_HOME) -########################################################################################## -# Some more set up -########################################################################################## - PYTHONVERSION="3" SNAKEMAKEVERSION="7" -CONDA_ACTIVATE='' -PATH_PREPEND='' -MODULE_LOAD='' -PLATFORM=$(get_platform) -PARTITION='norm' -EXTRA_SINGULARITY_BINDS="" -TEMP_DIR="" -REFS_DIR="" -CLUSTER_PROFILE="config/unknown" -if [ "$PLATFORM" == "biowulf" ]; then - CLUSTER_PROFILE="config/biowulf" - PARTITION="ccr,$PARTITION" - EXTRA_SINGULARITY_BINDS="/lscratch" - CONDA_ACTIVATE='. "/data/CCBR_Pipeliner/db/PipeDB/Conda/etc/profile.d/conda.sh" && conda activate py311' - MODULE_LOAD="module load python/$PYTHONVERSION snakemake/$SNAKEMAKEVERSION singularity; $CONDA_ACTIVATE" - TEMP_DIR='/lscratch/$SLURM_JOB_ID/' - REFS_DIR="/gpfs/gsfs10/users/CCBR_Pipeliner/db/PipeDB/charlie/fastas_gtfs/" -elif [ "$PLATFORM" == "fnlcr" ]; then - CLUSTER_PROFILE="config/fnlcr" - EXTRA_SINGULARITY_BINDS="/scratch/local" - CONDA_ACTIVATE=". '/mnt/projects/CCBR-Pipelines/resources/miniconda3/etc/profile.d/conda.sh' && conda activate py311" - # make sure spooker is in the path - PATH_PREPEND='export PATH="/mnt/projects/CCBR-Pipelines/bin:$PATH"' - MODULE_LOAD="module load singularity; $PATH_PREPEND; $CONDA_ACTIVATE" - TEMP_DIR="/scratch/local/" - REFS_DIR="/mnt/projects/CCBR-Pipelines/db/charlie/fastas_gtfs/" -else - echo """WARNING: detected platform is $PLATFORM. Please edit the files in config/unknown/ & config.yaml for compatibility with your computing environment - """ -fi - -# set defaults -HOST="hg38" -ADDITIVES="ERCC" -VIRUSES="NC_009333.1" -MANIFEST="${PIPELINE_HOME}/config/samples.tsv" # set variables SCRIPTNAME="$0" @@ -156,8 +109,9 @@ Required Arguments: 2. RUNMODE : [Type: String] Valid options: * init : initialize workdir * dryrun : dry run snakemake to generate DAG - * run : run with slurm - * runlocal : run without submitting to sbatch + * run : run by submitting the job with slurm + * runqsub : run by submitting the job with qsub + * runlocal : run locally without submitting the job to a scheduler ADVANCED RUNMODES (use with caution!!) * unlock : unlock WORKDIR if locked by snakemake NEVER UNLOCK WORKDIR WHERE PIPELINE IS CURRENTLY RUNNING! * reconfig : recreate config file in WORKDIR (debugging option) EDITS TO config.yaml WILL BE LOST! @@ -172,6 +126,7 @@ Optional Arguments: --viruses|-v : supply comma-separated list of viruses at command line (--runmode=init only) --manifest|-s : absolute path to samples.tsv. This will be copied to output folder (--runmode=init only) --changegrp|-z : change group to "Ziegelbauer_lab" before running anything. Biowulf-only. Useful for correctly setting permissions. +--platform : set the HPC platform (biowulf, fnlcr, eddie). If not set, CHARLIE will try to detect the platform with scontrol. (--runmode=init only) --help|-h : print this help @@ -212,39 +167,34 @@ function err() { usage && cat <<< " function init() { -# create output folder -if [ -d $WORKDIR ];then err "Folder $WORKDIR already exists!"; fi -mkdir -p $WORKDIR - -# copy config resources -cp -r ${PIPELINE_HOME}/config $WORKDIR/ - -# copy config template and samples files -if [ ! -f $CONFIGFILE ];then -sed -e "s/PIPELINE_HOME/${PIPELINE_HOME//\//\\/}/g" \ - -e "s/WORKDIR/${WORKDIR//\//\\/}/g" \ - -e "s/HOST/${HOST}/g" \ - -e "s/ADDITIVES/${ADDITIVES}/g" \ - -e "s/VIRUSES/${VIRUSES}/g" \ - -e "s/TEMP_DIR/${TEMP_DIR//\//\\/}/g" \ - -e "s/REFS_DIR/${REFS_DIR//\//\\/}/g" \ - -e "s|CLUSTER_PROFILE|${CLUSTER_PROFILE}|g" \ - ${PIPELINE_HOME}/config/config.yaml \ - > $CONFIGFILE -fi -if [ ! -f $WORKDIR/nclscan.config ];then -sed -e "s/PIPELINE_HOME/${PIPELINE_HOME//\//\\/}/g" -e "s/WORKDIR/${WORKDIR//\//\\/}/g" ${PIPELINE_HOME}/resources/NCLscan.config.template > $WORKDIR/nclscan.config -fi - -if [ ! -f $WORKDIR/samples.tsv ];then -cp $MANIFEST $WORKDIR/samples.tsv -fi - -#create log and stats folders -if [ ! -d $WORKDIR/logs ]; then mkdir -p $WORKDIR/logs;echo "Logs Dir: $WORKDIR/logs";fi -if [ ! -d $WORKDIR/stats ];then mkdir -p $WORKDIR/stats;echo "Stats Dir: $WORKDIR/stats";fi - -echo "Done Initializing $WORKDIR. You can now edit $WORKDIR/config.yaml and $WORKDIR/samples.tsv" + # create output folder + if [ -d $WORKDIR ];then err "Folder $WORKDIR already exists!"; fi + mkdir -p $WORKDIR + + # copy config resources + cp -r ${PIPELINE_HOME}/config $WORKDIR/ + + # copy config template and samples files + if [ ! -f $CONFIGFILE ];then + cat ${PIPELINE_HOME}/config/config.yaml |\ + envsubst '$PIPELINE_HOME $WORKDIR $HOST $ADDITIVES $VIRUSES $TEMP_DIR $REFS_DIR $CLUSTER_PROFILE' \ + > $CONFIGFILE + fi + if [ ! -f $WORKDIR/nclscan.config ];then + cat ${PIPELINE_HOME}/resources/NCLscan.config.template |\ + envsubst '$WORKDIR' |\ + > $WORKDIR/nclscan.config + fi + + if [ ! -f $WORKDIR/samples.tsv ];then + cp $MANIFEST $WORKDIR/samples.tsv + fi + + #create log and stats folders + if [ ! -d $WORKDIR/logs ]; then mkdir -p $WORKDIR/logs;echo "Logs Dir: $WORKDIR/logs";fi + if [ ! -d $WORKDIR/stats ];then mkdir -p $WORKDIR/stats;echo "Stats Dir: $WORKDIR/stats";fi + + echo "Done Initializing $WORKDIR. You can now edit $WORKDIR/config.yaml and $WORKDIR/samples.tsv" } @@ -301,16 +251,9 @@ function reconfig(){ # rebuild config file and replace the config.yaml in the WORKDIR # this is only for dev purposes when new key-value pairs are being added to the config file check_essential_files - sed -e "s/PIPELINE_HOME/${PIPELINE_HOME//\//\\/}/g" \ - -e "s/WORKDIR/${WORKDIR//\//\\/}/g" \ - -e "s/HOST/${HOST}/g" \ - -e "s/ADDITIVES/${ADDITIVES}/g" \ - -e "s/VIRUSES/${VIRUSES}/g" \ - -e "s/TEMP_DIR/${TEMP_DIR//\//\\/}/g" \ - -e "s/REFS_DIR/${REFS_DIR//\//\\/}/g" \ - -e "s|CLUSTER_PROFILE|${CLUSTER_PROFILE}|g" \ - ${PIPELINE_HOME}/config/config.yaml \ - > $CONFIGFILE + cat ${PIPELINE_HOME}/config/config.yaml |\ + envsubst '$PIPELINE_HOME $WORKDIR $HOST $ADDITIVES $VIRUSES $TEMP_DIR $REFS_DIR $CLUSTER_PROFILE' \ + > $WORKDIR/config.yaml echo "$WORKDIR/config.yaml has been updated!" } @@ -335,6 +278,7 @@ function load_modules() { function runcheck(){ check_essential_files load_modules + set_singularity_binds } ########################################################################################## @@ -374,7 +318,7 @@ function unlock() { function set_singularity_binds() { binds=$( $PIPELINE_HOME/workflow/scripts/set_singularity_bind_paths.py ${WORKDIR}/config.yaml ${WORKDIR}/samples.tsv) - SINGULARITY_BINDS="-B $EXTRA_SINGULARITY_BINDS,$binds" + export SINGULARITY_BINDS="-B $EXTRA_SINGULARITY_BINDS,$binds" } ########################################################################################## # PRINT SINGULARITY BINDS ... print bound singularity folders for debugging @@ -391,7 +335,7 @@ function printbinds(){ function runlocal() { runcheck - set_singularity_binds + # TODO do not assume $SLURM_JOB_ID exists, may run on other platform without slurm e.g. eddie if [ "$SLURM_JOB_ID" == "" ];then err "runlocal can only be done on an interactive node"; exit 1; fi run "local" } @@ -402,9 +346,12 @@ function runlocal() { function runslurm() { runcheck - set_singularity_binds run "--dry-run " && run "slurm" } +function runqsub() { + runcheck + run "--dry-run " && run "qsub" +} ########################################################################################## # CREATE RUNINFO ... create runinfo.yaml in workdir @@ -508,50 +455,21 @@ function run() { --configfile $CONFIGFILE fi - elif [ "$1" == "slurm" ];then + elif [ "$1" == "slurm" ]; then preruncleanup + cat ${WORKDIR}/config/${PLATFORM}/submit_script.sh |\ + envsubst '$CLUSTER_PROFILE $CONFIGFILE $EXPORT_SING_CACHE_DIR_CMD $MODULE_LOAD $PARTITION $SINGULARITY_BINDS $SNAKEFILE $trigger $WORKDIR' \ + > ${WORKDIR}/submit_script.sh + sbatch ${WORKDIR}/submit_script.sh - cat > ${WORKDIR}/submit_script.sbatch << EOF -#!/bin/bash -#SBATCH --job-name="charlie" -#SBATCH --mem=40g -#SBATCH --partition="$PARTITION" -#SBATCH --time=48:00:00 -#SBATCH --cpus-per-task=2 -#SBATCH --mail-type=BEGIN,END,FAIL - -cd \$SLURM_SUBMIT_DIR -$MODULE_LOAD -$EXPORT_SING_CACHE_DIR_CMD - -snakemake -s $SNAKEFILE \ - --directory $WORKDIR \ - --use-singularity \ - --singularity-args "$SINGULARITY_BINDS" \ - --use-envmodules \ - --printshellcmds \ - --latency-wait 300 \ - --configfile $CONFIGFILE \ - --profile $CLUSTER_PROFILE \ - -j 500 \ - --rerun-incomplete \ - --rerun-triggers $trigger \ - --retries 2 \ - --keep-going \ - --stats ${WORKDIR}/snakemake.stats \ - 2>&1 | tee ${WORKDIR}/snakemake.log - -if [ "\$?" -eq "0" ];then - snakemake -s $SNAKEFILE \ - --directory $WORKDIR \ - --report ${WORKDIR}/runslurm_snakemake_report.html \ - --configfile $CONFIGFILE -fi - -EOF + elif [ "$1" == "qsub" ]; then - sbatch ${WORKDIR}/submit_script.sbatch + preruncleanup + cat ${WORKDIR}/config/${PLATFORM}/submit_script.sh |\ + envsubst '$CLUSTER_PROFILE $CONFIGFILE $EXPORT_SING_CACHE_DIR_CMD $MODULE_LOAD $PARTITION $SINGULARITY_BINDS $SNAKEFILE $trigger $WORKDIR' \ + > ${WORKDIR}/submit_script.sh + pushd $WORKDIR && qsub submit_script.sh && popd elif [ "$1" == "--touch" ];then @@ -636,6 +554,9 @@ function main(){ MANIFEST="${i#*=}" if [ ! -f $MANIFEST ];then err "File $MANIFEST does NOT exist!";fi ;; + -p=*|--platform=*) + PLATFORM="${i#*=}" + ;; -h|--help) usage && exit 0; ;; @@ -645,8 +566,22 @@ function main(){ esac done - WORKDIR=$(readlink -f $WORKDIR) + export WORKDIR=$(readlink -f $WORKDIR) + export PIPELINE_HOME=$(readlink -f $(dirname "$0")) + export SNAKEFILE="${PIPELINE_HOME}/workflow/Snakefile" echo "Working Dir: $WORKDIR" + if [ -z "$PLATFORM" ]; then PLATFORM=$(get_platform); fi + export PLATFORM + echo "Platform: $PLATFORM" + + # set defaults + if [ -z "$HOST" ]; then HOST="hg38"; fi + export HOST + if [ -z "$ADDITIVES" ]; then ADDITIVES="ERCC"; fi + export ADDITIVES + if [ -z "$VIRUSES" ]; then VIRUSES="NC_009333.1"; fi + export VIRUSES + export MANIFEST="${PIPELINE_HOME}/config/samples.tsv" if [[ -z "$SING_CACHE_DIR" ]]; then if [[ -d "/data/$USER" ]]; then @@ -657,10 +592,46 @@ function main(){ echo "singularity cache dir (--singcache) is not set, using ${SING_CACHE_DIR}" fi mkdir -p $SING_CACHE_DIR - EXPORT_SING_CACHE_DIR_CMD="export SINGULARITY_CACHEDIR=\"${SING_CACHE_DIR}\"" + export EXPORT_SING_CACHE_DIR_CMD="export SINGULARITY_CACHEDIR=\"${SING_CACHE_DIR}\"" # required files - CONFIGFILE="${WORKDIR}/config.yaml" + export CONFIGFILE="${WORKDIR}/config.yaml" + + # decide trigger + export trigger="mtime" + # trigger="input" + # trigger="code" + + # set variables based on the detected platform + if [ "$PLATFORM" == "biowulf" ]; then + CLUSTER_PROFILE="config/biowulf" + PARTITION="ccr,norm" + EXTRA_SINGULARITY_BINDS="/lscratch" + CONDA_ACTIVATE='. "/data/CCBR_Pipeliner/db/PipeDB/Conda/etc/profile.d/conda.sh" && conda activate py311' + MODULE_LOAD="module load python/$PYTHONVERSION snakemake/$SNAKEMAKEVERSION singularity; $CONDA_ACTIVATE" + TEMP_DIR='/lscratch/$SLURM_JOB_ID/' + REFS_DIR="/gpfs/gsfs10/users/CCBR_Pipeliner/db/PipeDB/charlie/fastas_gtfs/" + elif [ "$PLATFORM" == "fnlcr" ]; then + CLUSTER_PROFILE="config/fnlcr" + PARTITION="norm" + EXTRA_SINGULARITY_BINDS="/scratch/local" + CONDA_ACTIVATE=". '/mnt/projects/CCBR-Pipelines/resources/miniconda3/etc/profile.d/conda.sh' && conda activate py311" + # make sure spooker is in the path + PATH_PREPEND='export PATH="/mnt/projects/CCBR-Pipelines/bin:$PATH"' + MODULE_LOAD="module load singularity; $PATH_PREPEND; $CONDA_ACTIVATE" + TEMP_DIR="/scratch/local/" + REFS_DIR="/mnt/projects/CCBR-Pipelines/db/charlie/fastas_gtfs/" + elif [ "$PLATFORM" == "eddie" ]; then + # TODO fill in other variables for eddie + CLUSTER_PROFILE="config/eddie" + MODULE_LOAD="module load python/$PYTHONVERSION snakemake/$SNAKEMAKEVERSION singularity" + TEMP_DIR="/exports/eddie/scratch/$USER" + else + CLUSTER_PROFILE="config/unknown" + echo """WARNING: detected platform is $PLATFORM. Please edit the files in $CLUSTER_PROFILE & config.yaml for compatibility with your computing environment + """ + fi + export PLATFORM CLUSTER_PROFILE PARTITION EXTRA_SINGULARITY_BINDS CONDA_ACTIVATE MODULE_LOAD TEMP_DIR REFS_DIR # change group to Ziegelbauer_lab before doing anything if [ "$CHANGEGRP" == "1" ]; then change_grp "$allargs"; fi @@ -670,6 +641,7 @@ function main(){ dryrun) dryrun && exit 0;; unlock) unlock && exit 0;; run) runslurm && exit 0;; + runqsub) runqsub && exit 0;; runlocal) runlocal && exit 0;; reset) reset && exit 0;; touch) touch && exit 0;; diff --git a/config/biowulf/submit_script.sh b/config/biowulf/submit_script.sh new file mode 100644 index 0000000..926718b --- /dev/null +++ b/config/biowulf/submit_script.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +#SBATCH --job-name="charlie" +#SBATCH --mem=40g +#SBATCH --partition="$PARTITION" +#SBATCH --time=48:00:00 +#SBATCH --cpus-per-task=2 +#SBATCH --mail-type=BEGIN,END,FAIL + +cd $SLURM_SUBMIT_DIR +$MODULE_LOAD +$EXPORT_SING_CACHE_DIR_CMD + +snakemake -s $SNAKEFILE \ + --directory $WORKDIR \ + --use-singularity \ + --singularity-args "$SINGULARITY_BINDS" \ + --use-envmodules \ + --printshellcmds \ + --latency-wait 300 \ + --configfile $CONFIGFILE \ + --profile $CLUSTER_PROFILE \ + -j 500 \ + --rerun-incomplete \ + --rerun-triggers $trigger \ + --retries 2 \ + --keep-going \ + --stats ${WORKDIR}/snakemake.stats \ + 2>&1 | tee ${WORKDIR}/snakemake.log + +if [ "$?" -eq "0" ];then + snakemake -s $SNAKEFILE \ + --directory $WORKDIR \ + --report ${WORKDIR}/runslurm_snakemake_report.html \ + --configfile $CONFIGFILE +fi diff --git a/config/config.yaml b/config/config.yaml index c94dce0..8c14a81 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -1,14 +1,14 @@ ## you probably need to change or comment/uncomment some of these # # The working dir... output will be in the results subfolder of the workdir -workdir: "WORKDIR" +workdir: "$WORKDIR" # temporary directory for intermediate files that are not saved -tempdir: "TEMP_DIR" +tempdir: "$TEMP_DIR" # tab delimited samples file ... should have the following 3 columns # sampleName path_to_R1_fastq path_to_R2_fastq -samples: "WORKDIR/samples.tsv" +samples: "$WORKDIR/samples.tsv" # Should the CLEAR pipeline be run? True or False WITHOUT quotes run_clear: True @@ -26,7 +26,7 @@ run_circRNAFinder: True # Should the NCLscan pipeline be run? True or False WITHOUT quotes # This can only be run for PE data run_nclscan: False -nclscan_config: "WORKDIR/nclscan.config" +nclscan_config: "$WORKDIR/nclscan.config" # Should we also run find_circ? True or False WITHOUT quotes run_findcirc: False @@ -38,9 +38,9 @@ findcirc_params: "--noncanonical" # host: "hg38" # additives: "ERCC" # options are ERCC and BAC16Insert # viruses: "NC_009333.1" -host: "HOST" -additives: "ADDITIVES" -viruses: "VIRUSES" +host: "$HOST" +additives: "$ADDITIVES" +viruses: "$VIRUSES" # select viruses and other (ERCC/BAC): options are # ERCC # BAC16Insert @@ -85,14 +85,13 @@ maxsize_host: 1000000000 maxsize_virus: 5000 ## you most probably dont need to change these -scriptsdir: "PIPELINE_HOME/workflow/scripts" -resourcesdir: "PIPELINE_HOME/resources" +scriptsdir: "$PIPELINE_HOME/workflow/scripts" +resourcesdir: "$PIPELINE_HOME/resources" -# default cluster -# cluster: "PIPELINE_HOME/resources/cluster.json" -cluster: "WORKDIR/CLUSTER_PROFILE/cluster.json" +# default cluster config file +cluster: "$WORKDIR/$CLUSTER_PROFILE/cluster.yaml" -adapters: "PIPELINE_HOME/resources/TruSeq_and_nextera_adapters.consolidated.fa" +adapters: "$PIPELINE_HOME/resources/TruSeq_and_nextera_adapters.consolidated.fa" circexplorer_bsj_circRNA_min_reads: 3 # in addition to "known" and "low-conf" circRNAs identified by circexplorer, we also include those found in back_spliced.bed file but not classified as known/low-conf only if the number of reads supporting the BSJ call is greater than this number minreadcount: 3 # this is used to filter circRNAs while creating the per-sample counts table flanksize: 18 # 18bp flank on either side of the BSJ .. used by multiple BSJ callers @@ -107,11 +106,11 @@ high_confidence_core_callers_plus_n: 1 ciri_perl_script: "/opt2/CIRI_v2.0.6/CIRI2.pl" # path in docker container # change this path to a directory containing fasta and GTF files for all host and virus genomes -fastas_gtfs_dir: "REFS_DIR" +fastas_gtfs_dir: "$REFS_DIR" annotation_lookups: - hg38: "PIPELINE_HOME/resources/hg38_2_hg19_lookup.txt" - mm39: "PIPELINE_HOME/resources/mm39_circBase_annotation_lookup.txt" + hg38: "$PIPELINE_HOME/resources/hg38_2_hg19_lookup.txt" + mm39: "$PIPELINE_HOME/resources/mm39_circBase_annotation_lookup.txt" containers: base: "docker://nciccbr/ccbr_ubuntu_base_20.04:v7" diff --git a/config/eddie/submit_script.sh b/config/eddie/submit_script.sh new file mode 100644 index 0000000..4de3ddc --- /dev/null +++ b/config/eddie/submit_script.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +#$ -N charlie +#$ -cwd +#$ -l h_rt=48:00:00 +#$ -l h_vmem=40g +#$ -pe sharedmem 2 +#$ -o logs/$JOB_NAME-$JOB_ID-$HOSTNAME.out +#$ -e logs/$JOB_NAME-$JOB_ID-$HOSTNAME.err + +$MODULE_LOAD +$EXPORT_SING_CACHE_DIR_CMD + +snakemake -s $SNAKEFILE \ + --directory $WORKDIR \ + --use-singularity \ + --singularity-args "$SINGULARITY_BINDS" \ + --use-envmodules \ + --printshellcmds \ + --latency-wait 300 \ + --configfile $CONFIGFILE \ + --profile $CLUSTER_PROFILE \ + -j 500 \ + --rerun-incomplete \ + --rerun-triggers $trigger \ + --retries 2 \ + --keep-going \ + --stats ${WORKDIR}/snakemake.stats \ + 2>&1 | tee ${WORKDIR}/snakemake.log + +if [ "$?" -eq "0" ];then + snakemake -s $SNAKEFILE \ + --directory $WORKDIR \ + --report ${WORKDIR}/runqsub_snakemake_report.html \ + --configfile $CONFIGFILE +fi diff --git a/config/fnlcr/submit_script.sh b/config/fnlcr/submit_script.sh new file mode 100644 index 0000000..926718b --- /dev/null +++ b/config/fnlcr/submit_script.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +#SBATCH --job-name="charlie" +#SBATCH --mem=40g +#SBATCH --partition="$PARTITION" +#SBATCH --time=48:00:00 +#SBATCH --cpus-per-task=2 +#SBATCH --mail-type=BEGIN,END,FAIL + +cd $SLURM_SUBMIT_DIR +$MODULE_LOAD +$EXPORT_SING_CACHE_DIR_CMD + +snakemake -s $SNAKEFILE \ + --directory $WORKDIR \ + --use-singularity \ + --singularity-args "$SINGULARITY_BINDS" \ + --use-envmodules \ + --printshellcmds \ + --latency-wait 300 \ + --configfile $CONFIGFILE \ + --profile $CLUSTER_PROFILE \ + -j 500 \ + --rerun-incomplete \ + --rerun-triggers $trigger \ + --retries 2 \ + --keep-going \ + --stats ${WORKDIR}/snakemake.stats \ + 2>&1 | tee ${WORKDIR}/snakemake.log + +if [ "$?" -eq "0" ];then + snakemake -s $SNAKEFILE \ + --directory $WORKDIR \ + --report ${WORKDIR}/runslurm_snakemake_report.html \ + --configfile $CONFIGFILE +fi diff --git a/resources/NCLscan.config.template b/resources/NCLscan.config.template index 0f53d48..dd97366 100644 --- a/resources/NCLscan.config.template +++ b/resources/NCLscan.config.template @@ -7,22 +7,22 @@ NCLscan_dir = /opt2/NCLscan-1.7.0 ## The directory of references and indices ## The script "create_reference.py" would create the needed references and indices here. -NCLscan_ref_dir = WORKDIR/ref/NCLscan_index +NCLscan_ref_dir = $WORKDIR/ref/NCLscan_index ## The following four reference files can be downloaded from the GENCODE website (http://www.gencodegenes.org/). ## The reference genome sequence, eg. /path/to/GRCh37.p13.genome.fa -Reference_genome = WORKDIR/ref/ref.fa +Reference_genome = $WORKDIR/ref/ref.fa ## The gene annotation file, eg. /path/to/gencode.v19.annotation.gtf -Gene_annotation = WORKDIR/ref/ref.fixed.gtf +Gene_annotation = $WORKDIR/ref/ref.fixed.gtf ## The protein-coding transcript sequences, eg. /path/to/gencode.v19.pc_transcripts.fa -Protein_coding_transcripts = WORKDIR/ref/ref.transcripts.fa +Protein_coding_transcripts = $WORKDIR/ref/ref.transcripts.fa ## The long non-coding RNA transcript sequences, eg. /path/to/gencode.v19.lncRNA_transcripts.fa -lncRNA_transcripts = WORKDIR/ref/ref.dummy.fa +lncRNA_transcripts = $WORKDIR/ref/ref.dummy.fa ## External tools @@ -68,7 +68,7 @@ SeqOut_bin = {NCLscan_bin}/SeqOut ### Advanced parameters ### ########################### -## The following two parameters indicate the maximal read length (L) and fragment size of the used paired-end RNA-seq data (FASTQ files), where fragment size = 2L + insert size. +## The following two parameters indicate the maximal read length (L) and fragment size of the used paired-end RNA-seq data (FASTQ files), where fragment size = 2L + insert size. ## If L > 151, the users should change these two parameters to (L, 2L + insert size). max_read_len = 151 max_fragment_size = 500 @@ -96,6 +96,3 @@ bwa-mem-t = 56 ## NOTE: The memory usage of each blat process would be up to 4 GB! ## mp_blat_process = 56 - - - From dc80ccc20e52cd754d7198cf7e91ee94683b5356 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 17 Dec 2024 14:48:09 -0500 Subject: [PATCH 05/18] docs: note new platform & runmode options --- CHANGELOG.md | 2 ++ README.md | 6 ++++-- charlie | 8 +++++--- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f65f38f..8da2b20 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,7 @@ ## CHARLIE development version +- Support Eddie (The University of Edinburgh's HPC cluster) with `--platform=eddie` and `--runmode=qsub`. (#136, @kelly-sovacool) + ## CHARLIE 0.11.1 - CHARLIE was falsely throwing a file permissions error for tempdir values containing bash variables. (#118, @kelly-sovacool) diff --git a/README.md b/README.md index 343da1d..ce4630e 100644 --- a/README.md +++ b/README.md @@ -164,7 +164,8 @@ Required Arguments: 2. RUNMODE : [Type: String] Valid options: * init : initialize workdir * dryrun : dry run snakemake to generate DAG - * run : run with slurm + * run : run by submitting the job with slurm + * qsub : run by submitting the job with qsub * runlocal : run without submitting to sbatch ADVANCED RUNMODES (use with caution!!) * unlock : unlock WORKDIR if locked by snakemake NEVER UNLOCK WORKDIR WHERE PIPELINE IS CURRENTLY RUNNING! @@ -172,6 +173,7 @@ Required Arguments: * reset : DELETE workdir dir and re-init it (debugging option) EDITS TO ALL FILES IN WORKDIR WILL BE LOST! * printbinds: print singularity binds (paths) * local : same as runlocal + * slurm : same as run (run with slurm) Optional Arguments: @@ -181,7 +183,7 @@ Optional Arguments: --viruses|-v : supply comma-separated list of viruses at command line (--runmode=init only) --manifest|-s : absolute path to samples.tsv. This will be copied to output folder (--runmode=init only) --changegrp|-z : change group to "Ziegelbauer_lab" before running anything. Biowulf-only. Useful for correctly setting permissions. ---platform : set the HPC platform (biowulf, fnlcr, eddie). If not set, CHARLIE will try to detect the platform with scontrol. +--platform : set the HPC platform (biowulf, fnlcr, eddie). If not set, CHARLIE will try to detect the platform with `scontrol`. --help|-h : print this help diff --git a/charlie b/charlie index d0e5186..c6f4377 100755 --- a/charlie +++ b/charlie @@ -110,7 +110,7 @@ Required Arguments: * init : initialize workdir * dryrun : dry run snakemake to generate DAG * run : run by submitting the job with slurm - * runqsub : run by submitting the job with qsub + * qsub : run by submitting the job with qsub * runlocal : run locally without submitting the job to a scheduler ADVANCED RUNMODES (use with caution!!) * unlock : unlock WORKDIR if locked by snakemake NEVER UNLOCK WORKDIR WHERE PIPELINE IS CURRENTLY RUNNING! @@ -118,6 +118,7 @@ Required Arguments: * reset : DELETE workdir dir and re-init it (debugging option) EDITS TO ALL FILES IN WORKDIR WILL BE LOST! * printbinds: print singularity binds (paths) * local : same as runlocal + * slurm : same as run (run with slurm) Optional Arguments: @@ -641,12 +642,13 @@ function main(){ dryrun) dryrun && exit 0;; unlock) unlock && exit 0;; run) runslurm && exit 0;; - runqsub) runqsub && exit 0;; + slurm) runslurm && exit 0;; # same as run + qsub) runqsub && exit 0;; runlocal) runlocal && exit 0;; + local) runlocal && exit 0;; # hidden option reset) reset && exit 0;; touch) touch && exit 0;; dry) dryrun && exit 0;; # hidden option - local) runlocal && exit 0;; # hidden option reconfig) reconfig && exit 0;; # hidden option for debugging printbinds) printbinds && exit 0;; # hidden option help) usage && exit 0;; # print help From 8f794b204dcd526f103b98f18380d499769a3afe Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 17 Dec 2024 14:56:45 -0500 Subject: [PATCH 06/18] docs: note sge config sources --- config/eddie/README.md | 5 +++++ config/eddie/submit_script.sh | 1 + 2 files changed, 6 insertions(+) create mode 100644 config/eddie/README.md diff --git a/config/eddie/README.md b/config/eddie/README.md new file mode 100644 index 0000000..0229f41 --- /dev/null +++ b/config/eddie/README.md @@ -0,0 +1,5 @@ +these config files were adapted from the following sources: + +- https://github.com/Snakemake-Profiles/sge/tree/e8175c52c0566f4d569e132e748568283c799f78 +- https://github.com/riboviz/riboviz/tree/476ee8c8fed775a795e08f24863adfee7355c486/jobs +- https://nf-co.re/configs/eddie/ diff --git a/config/eddie/submit_script.sh b/config/eddie/submit_script.sh index 4de3ddc..148bcef 100644 --- a/config/eddie/submit_script.sh +++ b/config/eddie/submit_script.sh @@ -7,6 +7,7 @@ #$ -o logs/$JOB_NAME-$JOB_ID-$HOSTNAME.out #$ -e logs/$JOB_NAME-$JOB_ID-$HOSTNAME.err +. /etc/profile.d/modules.sh $MODULE_LOAD $EXPORT_SING_CACHE_DIR_CMD From b05f54b27d1f4b3dfe1dc80012d02e4c10e38c7d Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 27 Dec 2024 10:24:59 -0500 Subject: [PATCH 07/18] fix: create sing_cache_dir during init --- charlie | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/charlie b/charlie index c6f4377..3a860e3 100755 --- a/charlie +++ b/charlie @@ -172,6 +172,8 @@ function init() { if [ -d $WORKDIR ];then err "Folder $WORKDIR already exists!"; fi mkdir -p $WORKDIR + mkdir -p $SING_CACHE_DIR + # copy config resources cp -r ${PIPELINE_HOME}/config $WORKDIR/ @@ -592,7 +594,6 @@ function main(){ fi echo "singularity cache dir (--singcache) is not set, using ${SING_CACHE_DIR}" fi - mkdir -p $SING_CACHE_DIR export EXPORT_SING_CACHE_DIR_CMD="export SINGULARITY_CACHEDIR=\"${SING_CACHE_DIR}\"" # required files From ac602af6e4db8947a0905ecd727553c9bd457117 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 27 Dec 2024 10:38:14 -0500 Subject: [PATCH 08/18] chore: snakemake must already be in user path for eddie --- charlie | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/charlie b/charlie index 3a860e3..bf419ae 100755 --- a/charlie +++ b/charlie @@ -626,7 +626,8 @@ function main(){ elif [ "$PLATFORM" == "eddie" ]; then # TODO fill in other variables for eddie CLUSTER_PROFILE="config/eddie" - MODULE_LOAD="module load python/$PYTHONVERSION snakemake/$SNAKEMAKEVERSION singularity" + MODULE_LOAD="module load python/3.8 singularity" + # snakemake is already in Taka's path in his bashrc TEMP_DIR="/exports/eddie/scratch/$USER" else CLUSTER_PROFILE="config/unknown" From 1d8670ad9d567dc005a651f1c73035841f1d2f86 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 27 Dec 2024 10:51:27 -0500 Subject: [PATCH 09/18] fix(eddie): do not specify python version --- charlie | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charlie b/charlie index bf419ae..a1178bf 100755 --- a/charlie +++ b/charlie @@ -626,7 +626,7 @@ function main(){ elif [ "$PLATFORM" == "eddie" ]; then # TODO fill in other variables for eddie CLUSTER_PROFILE="config/eddie" - MODULE_LOAD="module load python/3.8 singularity" + MODULE_LOAD="module load python singularity" # snakemake is already in Taka's path in his bashrc TEMP_DIR="/exports/eddie/scratch/$USER" else From 442b6cbb35a471bcd44b3f6cd96dc111a0d1edc0 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 27 Dec 2024 10:53:28 -0500 Subject: [PATCH 10/18] fix: only load singularity module for eddie --- charlie | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/charlie b/charlie index a1178bf..5d1d575 100755 --- a/charlie +++ b/charlie @@ -626,8 +626,8 @@ function main(){ elif [ "$PLATFORM" == "eddie" ]; then # TODO fill in other variables for eddie CLUSTER_PROFILE="config/eddie" - MODULE_LOAD="module load python singularity" - # snakemake is already in Taka's path in his bashrc + MODULE_LOAD="module load singularity" + # python & snakemake are already in Taka's path in his bashrc TEMP_DIR="/exports/eddie/scratch/$USER" else CLUSTER_PROFILE="config/unknown" From 2a8bc5b4ac3f504b3614743942f70b45e58bb72a Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 31 Dec 2024 10:49:30 -0500 Subject: [PATCH 11/18] docs: add details to warning message for unknown platform --- charlie | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/charlie b/charlie index 5d1d575..4a635ad 100755 --- a/charlie +++ b/charlie @@ -631,7 +631,9 @@ function main(){ TEMP_DIR="/exports/eddie/scratch/$USER" else CLUSTER_PROFILE="config/unknown" - echo """WARNING: detected platform is $PLATFORM. Please edit the files in $CLUSTER_PROFILE & config.yaml for compatibility with your computing environment + echo """WARNING: detected platform is $PLATFORM. + Please edit the files in $CLUSTER_PROFILE & config.yaml for compatibility with your computing environment. + Also, make sure Singularity, Snakemake $SNAKEMAKEVERSION, Python $PYTHONVERSION, and pandas are installed. """ fi export PLATFORM CLUSTER_PROFILE PARTITION EXTRA_SINGULARITY_BINDS CONDA_ACTIVATE MODULE_LOAD TEMP_DIR REFS_DIR From a89b1ff3dce23a4da8beafd1daa87c8110e97258 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 31 Dec 2024 12:20:41 -0500 Subject: [PATCH 12/18] fix(eddie): add Taka's venv to path temporary workaround until we find out how to make this work more generally --- charlie | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/charlie b/charlie index 4a635ad..594ff11 100755 --- a/charlie +++ b/charlie @@ -626,7 +626,8 @@ function main(){ elif [ "$PLATFORM" == "eddie" ]; then # TODO fill in other variables for eddie CLUSTER_PROFILE="config/eddie" - MODULE_LOAD="module load singularity" + PATH_PREPEND='export PATH="/home/ttakanob/py3.8_venv/bin/:$PATH"' + MODULE_LOAD="module load singularity; $PATH_PREPEND" # python & snakemake are already in Taka's path in his bashrc TEMP_DIR="/exports/eddie/scratch/$USER" else From 191b11e97bb6f82b0c08ce001000f243c3d35ad1 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Thu, 2 Jan 2025 14:04:03 -0500 Subject: [PATCH 13/18] fix(runlocal): only check slurm job id on biowulf and frce to enforce interactive node only --- charlie | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/charlie b/charlie index 594ff11..a712c9c 100755 --- a/charlie +++ b/charlie @@ -338,8 +338,6 @@ function printbinds(){ function runlocal() { runcheck - # TODO do not assume $SLURM_JOB_ID exists, may run on other platform without slurm e.g. eddie - if [ "$SLURM_JOB_ID" == "" ];then err "runlocal can only be done on an interactive node"; exit 1; fi run "local" } @@ -431,6 +429,12 @@ function run() { if [ "$1" == "local" ];then + if [ "$PLATFORM" == "biowulf" ] || [ "$PLATFORM" == "fnlcr"]; then + if [ "$SLURM_JOB_ID" == "" ]; then + err "runlocal can only be done on an interactive node"; + exit 1; + fi + fi preruncleanup $EXPORT_SING_CACHE_DIR_CMD From ab5ca2edc3c201938e8284272e5d9ee027b0b39e Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Thu, 9 Jan 2025 14:49:53 -0500 Subject: [PATCH 14/18] fix: do not submit batch job; eddie workers do not have qsub --- charlie | 4 ++-- config/eddie/submit_script.sh | 18 +++--------------- 2 files changed, 5 insertions(+), 17 deletions(-) diff --git a/charlie b/charlie index a712c9c..08817cd 100755 --- a/charlie +++ b/charlie @@ -475,8 +475,8 @@ function run() { preruncleanup cat ${WORKDIR}/config/${PLATFORM}/submit_script.sh |\ envsubst '$CLUSTER_PROFILE $CONFIGFILE $EXPORT_SING_CACHE_DIR_CMD $MODULE_LOAD $PARTITION $SINGULARITY_BINDS $SNAKEFILE $trigger $WORKDIR' \ - > ${WORKDIR}/submit_script.sh - pushd $WORKDIR && qsub submit_script.sh && popd + > ${WORKDIR}/run_script.sh + pushd $WORKDIR && bash run_script.sh && popd elif [ "$1" == "--touch" ];then diff --git a/config/eddie/submit_script.sh b/config/eddie/submit_script.sh index 148bcef..52637a7 100644 --- a/config/eddie/submit_script.sh +++ b/config/eddie/submit_script.sh @@ -1,11 +1,6 @@ -#!/usr/bin/env bash -#$ -N charlie -#$ -cwd -#$ -l h_rt=48:00:00 -#$ -l h_vmem=40g -#$ -pe sharedmem 2 -#$ -o logs/$JOB_NAME-$JOB_ID-$HOSTNAME.out -#$ -e logs/$JOB_NAME-$JOB_ID-$HOSTNAME.err +#!usr/bin/env bash +# do not submit this script with qsub +# as worker nodes cannot submit additional jobs themselves . /etc/profile.d/modules.sh $MODULE_LOAD @@ -27,10 +22,3 @@ snakemake -s $SNAKEFILE \ --keep-going \ --stats ${WORKDIR}/snakemake.stats \ 2>&1 | tee ${WORKDIR}/snakemake.log - -if [ "$?" -eq "0" ];then - snakemake -s $SNAKEFILE \ - --directory $WORKDIR \ - --report ${WORKDIR}/runqsub_snakemake_report.html \ - --configfile $CONFIGFILE -fi From 8de9454323e6c104630965846c908aaeb6fe5372 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 17 Jan 2025 15:08:52 -0500 Subject: [PATCH 15/18] feat: add back custom submit & jobscripts from sge profile https://github.com/Snakemake-Profiles/sge/blob/e8175c52c0566f4d569e132e748568283c799f78/%7B%7Bcookiecutter.profile_name%7D%7D/sge-jobscript.sh --- config/eddie/sge-jobscript.sh | 7 + config/eddie/sge-submit.py | 299 ++++++++++++++++++++++++++++++++++ 2 files changed, 306 insertions(+) create mode 100644 config/eddie/sge-jobscript.sh create mode 100644 config/eddie/sge-submit.py diff --git a/config/eddie/sge-jobscript.sh b/config/eddie/sge-jobscript.sh new file mode 100644 index 0000000..e416637 --- /dev/null +++ b/config/eddie/sge-jobscript.sh @@ -0,0 +1,7 @@ +#!/bin/bash +# properties = {properties} + +# exit on first error +set -o errexit + +{exec_job} diff --git a/config/eddie/sge-submit.py b/config/eddie/sge-submit.py new file mode 100644 index 0000000..b77ce4c --- /dev/null +++ b/config/eddie/sge-submit.py @@ -0,0 +1,299 @@ +#!/usr/bin/env python3 + +import os +import re +import math +import argparse +import subprocess + +# use warnings.warn() rather than print() to output info in this script +# because snakemake expects the jobid to be the only output +import warnings + +from snakemake import io +from snakemake.utils import read_job_properties + +DEFAULT_JOB_NAME = "snakemake_job" +QSUB_DEFAULTS = "-cwd -V" +CLUSTER_CONFIG = "cluster.yaml" + +# SGE syntax for options is `-option [value]` and for resources is `-l name=value` +# we therefore distinguish the two in this script to make it easier to handle. +# We also define some aliases for options and resources so that the rules can +# be more expressive than a list of cryptic SGE resources. + +# We additionally pickup a list of environment modules which will be loaded in the +# jobscript + +OPTION_MAPPING = { + "binding": ("binding",), + "cwd": ("cwd",), + "e": ("e", "error"), + "hard": ("hard",), + "j": ("j", "join"), + "m": ("m", "mail_options"), + "M": ("M", "email"), + "notify": ("notify",), + "now": ("now",), + "N": ("N", "name"), + "o": ("o", "output"), + "P": ("P", "project"), + "p": ("p", "priority"), + "pe": ("pe", "parallel_environment"), + "pty": ("pty",), + "q": ("q", "queue"), + "R": ("R", "reservation"), + "r": ("r", "rerun"), + "soft": ("soft",), + "v": ("v", "variable"), + "V": ("V", "export_env"), +} + +RESOURCE_MAPPING = { + # default queue resources + "qname": ("qname",), + "hostname": ("hostname",), + # "notify" -- conflicts with OPTION_MAPPING + "calendar": ("calendar",), + "min_cpu_interval": ("min_cpu_interval",), + "tmpdir": ("tmpdir",), + "seq_no": ("seq_no",), + "s_rt": ("s_rt", "soft_runtime", "soft_walltime"), + "h_rt": ("h_rt", "time", "runtime", "walltime"), + "s_cpu": ("s_cpu", "soft_cpu"), + "h_cpu": ("h_cpu", "cpu"), + "s_data": ("s_data", "soft_data"), + "h_data": ("h_data", "data"), + "s_stack": ("s_stack", "soft_stack"), + "h_stack": ("h_stack", "stack"), + "s_core": ("s_core", "soft_core"), + "h_core": ("h_core", "core"), + "s_rss": ("s_rss", "soft_resident_set_size"), + "h_rss": ("h_rss", "resident_set_size"), + # default host resources + "slots": ("slots",), + "s_vmem": ("s_vmem", "soft_memory", "soft_virtual_memory"), + # "mem_mb" is a default snakemake resource name which will be passed in + "h_vmem": ("h_vmem", "mem_mb", "mem", "memory", "virtual_memory"), + "s_fsize": ("s_fsize", "soft_file_size"), + # "disk_mb" is a default snakemake resource name which will be passed in + "h_fsize": ("h_fsize", "disk_mb", "file_size"), +} + +IGNORED_RESOURCES = ["mem_mib", "disk_mib"] + +NONREQUESTABLE_RESOURCES = ["tmpdir"] + + +def add_custom_resources(resources, resource_mapping=RESOURCE_MAPPING): + """Adds new resources to resource_mapping. + + resources -> dict where key is sge resource name and value is a + single name or a list of names to be used as aliased + """ + for key, val in resources.items(): + if key not in resource_mapping: + resource_mapping[key] = tuple() + + # make sure the resource name itself is an alias + resource_mapping[key] += (key,) + if isinstance(val, list): + for alias in val: + if val != key: + resource_mapping[key] += (alias,) + else: + if val != key: + resource_mapping[key] += (val,) + + +def parse_jobscript(): + """Minimal CLI to require/only accept single positional argument.""" + p = argparse.ArgumentParser(description="SGE snakemake submit script") + p.add_argument("jobscript", help="Snakemake jobscript with job properties.") + return p.parse_args().jobscript + + +def parse_qsub_defaults(parsed): + """Unpack QSUB_DEFAULTS.""" + d = parsed.split() if type(parsed) == str else parsed + + options = {} + for arg in d: + if "=" in arg: + k, v = arg.split("=") + options[k.strip("-")] = v.strip() + else: + options[arg.strip("-")] = "" + return options + + +def format_job_properties(string): + # we use 'rulename' rather than 'rule' for consistency with the --cluster-config + # snakemake option + if job_properties["type"] == "group": + return string.format(rulename="snakejob", jobid=job_properties["jobid"]) + return string.format(rulename="snakejob", jobid=job_properties["jobid"]) + + +def parse_qsub_settings( + source, resource_mapping=RESOURCE_MAPPING, option_mapping=OPTION_MAPPING +): + job_options = {"options": {}, "resources": {}} + + for skey, sval in source.items(): + found = False + for rkey, rval in resource_mapping.items(): + if skey in IGNORED_RESOURCES: + found = True + break + if skey in rval: + found = True + # Snakemake resources can only be defined as integers, but SGE interprets + # plain integers for memory as bytes. This hack means we interpret memory + # requests as megabytes which maps to the snakemake resources "mem_mb" + # and "disk_mb". + if (rkey == "s_vmem") or (rkey == "h_vmem"): + job_options["resources"].update({rkey: str(sval) + "M"}) + elif (rkey == "s_fsize") or (rkey == "h_fsize"): + job_options["resources"].update({rkey: str(sval) + "M"}) + else: + job_options["resources"].update({rkey: sval}) + break + if found: + continue + for okey, oval in option_mapping.items(): + if skey in oval: + found = True + job_options["options"].update({okey: sval}) + break + if not found: + raise KeyError(f"Unknown SGE option or resource: {skey}") + + return job_options + + +def load_cluster_config(path): + """Load config to dict either from absolute path or relative to profile dir.""" + if path: + path = os.path.join(os.path.dirname(__file__), os.path.expandvars(path)) + default_cluster_config = io.load_configfile(path) + else: + default_cluster_config = {} + if "__default__" not in default_cluster_config: + default_cluster_config["__default__"] = {} + return default_cluster_config + + +def ensure_directory_exists(path): + """Check if directory exists and create if not""" + directory = os.path.dirname(path) + if not os.path.exists(directory): + os.makedirs(directory, exist_ok=True) + return + + +def update_double_dict(outer, inner): + """Similar to dict.update() but does the update on nested dictionaries""" + for k, v in outer.items(): + outer[k].update(inner[k]) + + +def sge_option_string(key, val): + if val == "": + return f"-{key}" + if type(val) == bool: + return f"-{key} " + ("yes" if val else "no") + return format_job_properties(f"-{key} {val}") + + +def sge_resource_string(key, val): + if val == "": + return f"-l {key}" + if type(val) == bool: + return f"-{key}=" + ("true" if val else "false") + return f"-l {key}={val}" + + +def submit_job(jobscript, qsub_settings): + """Submit jobscript and return jobid.""" + + # remove any non-requestable resources which have somehow been added to + # the resource list + for resource in list(qsub_settings["resources"].keys()): + if resource in NONREQUESTABLE_RESOURCES: + del qsub_settings["resources"][resource] + + flatten = lambda l: [item for sublist in l for item in sublist] + batch_options = flatten( + [sge_option_string(k, v).split() for k, v in qsub_settings["options"].items()] + ) + batch_resources = flatten( + [ + sge_resource_string(k, v).split() + for k, v in qsub_settings["resources"].items() + ] + ) + try: + # -terse means only the jobid is returned rather than the normal 'Your job...' string + jobid = ( + subprocess.check_output( + ["qsub", "-terse"] + batch_options + batch_resources + [jobscript] + ) + .decode() + .rstrip() + ) + except subprocess.CalledProcessError as e: + raise e + except Exception as e: + raise e + return jobid + + +qsub_settings = {"options": {}, "resources": {}} + +jobscript = parse_jobscript() + +# get the job properties dictionary from snakemake +job_properties = read_job_properties(jobscript) + +# load the default cluster config +cluster_config = load_cluster_config(CLUSTER_CONFIG) + +if "__resources__" in cluster_config: + add_custom_resources(cluster_config["__resources__"]) + +# qsub default arguments +update_double_dict( + qsub_settings, parse_qsub_settings(parse_qsub_defaults(QSUB_DEFAULTS)) +) + +# cluster_config defaults +update_double_dict(qsub_settings, parse_qsub_settings(cluster_config["__default__"])) + +# resources defined in the snakemake file (note that these must be integer) +# we pass an empty dictionary for option_mapping because options should not be +# specified in the snakemake file +update_double_dict( + qsub_settings, + parse_qsub_settings(job_properties.get("resources", {}), option_mapping={}), +) + +# get any rule specific options/resources from the default cluster config +update_double_dict( + qsub_settings, + parse_qsub_settings(cluster_config.get(job_properties.get("rule"), {})), +) + +# get any options/resources specified through the --cluster-config command line argument +update_double_dict( + qsub_settings, parse_qsub_settings(job_properties.get("cluster", {})) +) + +# ensure qsub output dirs exist +for o in ("o", "e"): + ensure_directory_exists(qsub_settings["options"][o]) if o in qsub_settings[ + "options" + ] else None + +# submit job and echo id back to Snakemake (must be the only stdout) +print(submit_job(jobscript, qsub_settings)) From e0778f10785e486ee27d16a952544335f0bee8bd Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 21 Jan 2025 12:17:34 -0500 Subject: [PATCH 16/18] chore: make scripts executable --- config/eddie/sge-cancel.py | 0 config/eddie/sge-jobscript.sh | 0 config/eddie/sge-status.py | 0 config/eddie/submit_script.sh | 0 4 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 config/eddie/sge-cancel.py mode change 100644 => 100755 config/eddie/sge-jobscript.sh mode change 100644 => 100755 config/eddie/sge-status.py mode change 100644 => 100755 config/eddie/submit_script.sh diff --git a/config/eddie/sge-cancel.py b/config/eddie/sge-cancel.py old mode 100644 new mode 100755 diff --git a/config/eddie/sge-jobscript.sh b/config/eddie/sge-jobscript.sh old mode 100644 new mode 100755 diff --git a/config/eddie/sge-status.py b/config/eddie/sge-status.py old mode 100644 new mode 100755 diff --git a/config/eddie/submit_script.sh b/config/eddie/submit_script.sh old mode 100644 new mode 100755 From 1740bc2c86242873364e5763131d763701abd0f8 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 21 Jan 2025 12:19:01 -0500 Subject: [PATCH 17/18] fix: use qsub -V & mem_mib --- config/eddie/add_mem_mib.py | 17 +++++++++++++++ config/eddie/cluster.yaml | 42 +++++++++++++++++++------------------ config/eddie/config.yaml | 4 ++-- 3 files changed, 41 insertions(+), 22 deletions(-) create mode 100644 config/eddie/add_mem_mib.py diff --git a/config/eddie/add_mem_mib.py b/config/eddie/add_mem_mib.py new file mode 100644 index 0000000..6a8f102 --- /dev/null +++ b/config/eddie/add_mem_mib.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python + +import ruamel.yaml + +yaml = ruamel.yaml.YAML() +yaml.preserve_quotes = True +yaml.explicit_start = True + +with open("cluster.yaml", "r") as infile: + data = yaml.load(infile) + +for k, v in data.items(): + if "mem" in v: + data[k]["mem_mib"] = int(v["mem"].rstrip("g")) * 1024 + +with open("cluster.yaml.2", "w") as outfile: + yaml.dump(data, outfile) diff --git a/config/eddie/cluster.yaml b/config/eddie/cluster.yaml index 995ad93..1351a3f 100644 --- a/config/eddie/cluster.yaml +++ b/config/eddie/cluster.yaml @@ -1,89 +1,91 @@ __default__: output: "logs/{rule}.{wildcards}.$JOB_ID.out" error: "logs/{rule}.{wildcards}.$JOB_ID.err" - mem: 40g + mem_mib: 40960 threads: 2 time: 4:00:00 name: "{rule}.{wildcards}" +test: + mem_mib: 10240 cutadapt: - mem: 120g + mem_mib: 122880 threads: 56 time: 6:00:00 dcc: - mem: 120g + mem_mib: 122880 threads: 4 time: 4:00:00 find_circ_align: - mem: 120g + mem_mib: 122880 threads: 56 time: 6:00:00 find_circ: - mem: 120g + mem_mib: 122880 threads: 56 time: 6:00:00 mapsplice: - mem: 200g + mem_mib: 204800 threads: 56 time: 48:00:00 mapsplice_postprocess: - mem: 120g + mem_mib: 122880 threads: 4 time: 4:00:00 nclscan: - mem: 512g + mem_mib: 524288 threads: 56 time: 4:00:00 partition: largemem fastqc: - mem: 40g + mem_mib: 40960 threads: 4 time: 4:00:00 ciri: - mem: 512g + mem_mib: 524288 threads: 56 time: 4:00:00 partition: largemem filter_ciri_bam_for_BSJs: - mem: 512g + mem_mib: 524288 threads: 4 time: 24:00:00 partition: largemem create_index: - mem: 200g + mem_mib: 204800 threads: 56 time: 12:00:00 star1p: - mem: 200g + mem_mib: 204800 threads: 56 time: 6:00:00 star2p: - mem: 200g + mem_mib: 204800 threads: 56 time: 6:00:00 star_circrnafinder: - mem: 200g + mem_mib: 204800 threads: 56 time: 6:00:00 estimate_duplication: - mem: 200g + mem_mib: 204800 threads: 4 time: 4:00:00 create_circExplorer_BSJ_bam: - mem: 120g + mem_mib: 122880 threads: 4 time: 4:00:00 create_circExplorer_linear_spliced_bams: - mem: 120g + mem_mib: 122880 threads: 56 time: 8:00:00 clear: time: 1:00:00 split_splice_reads_BAM_create_BW: - mem: 120g + mem_mib: 122880 time: 24:00:00 split_linear_reads_BAM_create_BW: - mem: 120g + mem_mib: 122880 time: 24:00:00 alignment_stats: time: 1:00:00 diff --git a/config/eddie/config.yaml b/config/eddie/config.yaml index 7718c71..0cac0cb 100644 --- a/config/eddie/config.yaml +++ b/config/eddie/config.yaml @@ -1,7 +1,7 @@ cluster: qsub - -cwd + -terse -cwd -V -l h_rt={cluster.time} - -l h_vmem={cluster.mem} + -l h_vmem={cluster.mem_mib}M -pe sharedmem {cluster.threads} -N {cluster.name} -o {cluster.output} From 440cee977a3cd6c3d7df5ea8ab751dfa2f0bbb8f Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 21 Jan 2025 12:32:30 -0500 Subject: [PATCH 18/18] fix: set singularity cachedir & tmpdir in submission script for eddie, need to set cachedir to /exports/eddie/scratch/$USER/.singularity and set sing tmpdir to \$TMPDIR --- charlie | 11 +++++++---- config/biowulf/submit_script.sh | 2 +- config/eddie/submit_script.sh | 3 ++- config/fnlcr/submit_script.sh | 2 +- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/charlie b/charlie index 08817cd..c36a096 100755 --- a/charlie +++ b/charlie @@ -437,7 +437,8 @@ function run() { fi preruncleanup - $EXPORT_SING_CACHE_DIR_CMD + export SINGULARITY_CACHEDIR=$SING_CACHE_DIR + export SINGULARITY_TMPDIR=$TMPDIR snakemake -s $SNAKEFILE\ --directory $WORKDIR \ @@ -466,7 +467,7 @@ function run() { preruncleanup cat ${WORKDIR}/config/${PLATFORM}/submit_script.sh |\ - envsubst '$CLUSTER_PROFILE $CONFIGFILE $EXPORT_SING_CACHE_DIR_CMD $MODULE_LOAD $PARTITION $SINGULARITY_BINDS $SNAKEFILE $trigger $WORKDIR' \ + envsubst '$CLUSTER_PROFILE $CONFIGFILE $SING_CACHE_DIR $MODULE_LOAD $PARTITION $SINGULARITY_BINDS $SNAKEFILE $trigger $WORKDIR' \ > ${WORKDIR}/submit_script.sh sbatch ${WORKDIR}/submit_script.sh @@ -474,7 +475,7 @@ function run() { preruncleanup cat ${WORKDIR}/config/${PLATFORM}/submit_script.sh |\ - envsubst '$CLUSTER_PROFILE $CONFIGFILE $EXPORT_SING_CACHE_DIR_CMD $MODULE_LOAD $PARTITION $SINGULARITY_BINDS $SNAKEFILE $trigger $WORKDIR' \ + envsubst '$CLUSTER_PROFILE $CONFIGFILE $SING_CACHE_DIR $MODULE_LOAD $PARTITION $SINGULARITY_BINDS $SNAKEFILE $trigger $WORKDIR' \ > ${WORKDIR}/run_script.sh pushd $WORKDIR && bash run_script.sh && popd @@ -593,12 +594,14 @@ function main(){ if [[ -z "$SING_CACHE_DIR" ]]; then if [[ -d "/data/$USER" ]]; then SING_CACHE_DIR="/data/$USER/.singularity" + elif [[ "$PLATFORM" == "eddie" ]]; then + SING_CACHE_DIR="/exports/eddie/scratch/$USER/.singularity" else SING_CACHE_DIR="${WORKDIR}/.singularity" fi + export SING_CACHE_DIR echo "singularity cache dir (--singcache) is not set, using ${SING_CACHE_DIR}" fi - export EXPORT_SING_CACHE_DIR_CMD="export SINGULARITY_CACHEDIR=\"${SING_CACHE_DIR}\"" # required files export CONFIGFILE="${WORKDIR}/config.yaml" diff --git a/config/biowulf/submit_script.sh b/config/biowulf/submit_script.sh index 926718b..8f7087d 100644 --- a/config/biowulf/submit_script.sh +++ b/config/biowulf/submit_script.sh @@ -8,7 +8,7 @@ cd $SLURM_SUBMIT_DIR $MODULE_LOAD -$EXPORT_SING_CACHE_DIR_CMD +export SINGULARITY_CACHEDIR=$SING_CACHE_DIR snakemake -s $SNAKEFILE \ --directory $WORKDIR \ diff --git a/config/eddie/submit_script.sh b/config/eddie/submit_script.sh index 52637a7..fa7617f 100755 --- a/config/eddie/submit_script.sh +++ b/config/eddie/submit_script.sh @@ -4,7 +4,8 @@ . /etc/profile.d/modules.sh $MODULE_LOAD -$EXPORT_SING_CACHE_DIR_CMD +export SINGULARITY_CACHEDIR=$SING_CACHE_DIR +export SINGULARITY_TMPDIR=$TMPDIR snakemake -s $SNAKEFILE \ --directory $WORKDIR \ diff --git a/config/fnlcr/submit_script.sh b/config/fnlcr/submit_script.sh index 926718b..8f7087d 100644 --- a/config/fnlcr/submit_script.sh +++ b/config/fnlcr/submit_script.sh @@ -8,7 +8,7 @@ cd $SLURM_SUBMIT_DIR $MODULE_LOAD -$EXPORT_SING_CACHE_DIR_CMD +export SINGULARITY_CACHEDIR=$SING_CACHE_DIR snakemake -s $SNAKEFILE \ --directory $WORKDIR \