diff --git a/CHANGELOG.md b/CHANGELOG.md index f65f38f..8da2b20 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,7 @@ ## CHARLIE development version +- Support Eddie (The University of Edinburgh's HPC cluster) with `--platform=eddie` and `--runmode=qsub`. (#136, @kelly-sovacool) + ## CHARLIE 0.11.1 - CHARLIE was falsely throwing a file permissions error for tempdir values containing bash variables. (#118, @kelly-sovacool) diff --git a/README.md b/README.md index b1867ba..ce4630e 100644 --- a/README.md +++ b/README.md @@ -164,7 +164,8 @@ Required Arguments: 2. RUNMODE : [Type: String] Valid options: * init : initialize workdir * dryrun : dry run snakemake to generate DAG - * run : run with slurm + * run : run by submitting the job with slurm + * qsub : run by submitting the job with qsub * runlocal : run without submitting to sbatch ADVANCED RUNMODES (use with caution!!) * unlock : unlock WORKDIR if locked by snakemake NEVER UNLOCK WORKDIR WHERE PIPELINE IS CURRENTLY RUNNING! @@ -172,6 +173,7 @@ Required Arguments: * reset : DELETE workdir dir and re-init it (debugging option) EDITS TO ALL FILES IN WORKDIR WILL BE LOST! * printbinds: print singularity binds (paths) * local : same as runlocal + * slurm : same as run (run with slurm) Optional Arguments: @@ -181,6 +183,7 @@ Optional Arguments: --viruses|-v : supply comma-separated list of viruses at command line (--runmode=init only) --manifest|-s : absolute path to samples.tsv. This will be copied to output folder (--runmode=init only) --changegrp|-z : change group to "Ziegelbauer_lab" before running anything. Biowulf-only. Useful for correctly setting permissions. +--platform : set the HPC platform (biowulf, fnlcr, eddie). If not set, CHARLIE will try to detect the platform with `scontrol`. --help|-h : print this help diff --git a/charlie b/charlie index 1c91cc1..c36a096 100755 --- a/charlie +++ b/charlie @@ -5,12 +5,7 @@ # CHARLIE set -eo pipefail -## TODO module statements can only run on biowulf -# decide trigger -trigger="mtime" -# trigger="input" -# trigger="code" ########################################################################################## # functions @@ -36,58 +31,16 @@ function get_platform() { ########################################################################################## # initial setup ########################################################################################## - +# set defaults for global variables # set PIPELINE_HOME PIPELINE_HOME=$(readlink -f $(dirname "$0")) -# set snakefile -SNAKEFILE="${PIPELINE_HOME}/workflow/Snakefile" # get github commit tag GIT_COMMIT_TAG=$(get_git_commitid_tag $PIPELINE_HOME) -########################################################################################## -# Some more set up -########################################################################################## - PYTHONVERSION="3" SNAKEMAKEVERSION="7" -CONDA_ACTIVATE='' -PATH_PREPEND='' -MODULE_LOAD='' -PLATFORM=$(get_platform) -PARTITION='norm' -EXTRA_SINGULARITY_BINDS="" -TEMP_DIR="" -REFS_DIR="" -CLUSTER_PROFILE="config/unknown" -if [ "$PLATFORM" == "biowulf" ]; then - CLUSTER_PROFILE="config/slurm-biowulf" - PARTITION="ccr,$PARTITION" - EXTRA_SINGULARITY_BINDS="/lscratch" - CONDA_ACTIVATE='. "/data/CCBR_Pipeliner/db/PipeDB/Conda/etc/profile.d/conda.sh" && conda activate py311' - MODULE_LOAD="module load python/$PYTHONVERSION snakemake/$SNAKEMAKEVERSION singularity; $CONDA_ACTIVATE" - TEMP_DIR='/lscratch/$SLURM_JOB_ID/' - REFS_DIR="/gpfs/gsfs10/users/CCBR_Pipeliner/db/PipeDB/charlie/fastas_gtfs/" -elif [ "$PLATFORM" == "fnlcr" ]; then - CLUSTER_PROFILE="config/slurm-fnlcr" - EXTRA_SINGULARITY_BINDS="/scratch/local" - CONDA_ACTIVATE=". '/mnt/projects/CCBR-Pipelines/resources/miniconda3/etc/profile.d/conda.sh' && conda activate py311" - # make sure spooker is in the path - PATH_PREPEND='export PATH="/mnt/projects/CCBR-Pipelines/bin:$PATH"' - MODULE_LOAD="module load singularity; $PATH_PREPEND; $CONDA_ACTIVATE" - TEMP_DIR="/scratch/local/" - REFS_DIR="/mnt/projects/CCBR-Pipelines/db/charlie/fastas_gtfs/" -else - echo """WARNING: detected platform is $PLATFORM. Please edit the files in config/unknown/ & config.yaml for compatibility with your computing environment - """ -fi - -# set defaults -HOST="hg38" -ADDITIVES="ERCC" -VIRUSES="NC_009333.1" -MANIFEST="${PIPELINE_HOME}/config/samples.tsv" # set variables SCRIPTNAME="$0" @@ -156,14 +109,16 @@ Required Arguments: 2. RUNMODE : [Type: String] Valid options: * init : initialize workdir * dryrun : dry run snakemake to generate DAG - * run : run with slurm - * runlocal : run without submitting to sbatch + * run : run by submitting the job with slurm + * qsub : run by submitting the job with qsub + * runlocal : run locally without submitting the job to a scheduler ADVANCED RUNMODES (use with caution!!) * unlock : unlock WORKDIR if locked by snakemake NEVER UNLOCK WORKDIR WHERE PIPELINE IS CURRENTLY RUNNING! * reconfig : recreate config file in WORKDIR (debugging option) EDITS TO config.yaml WILL BE LOST! * reset : DELETE workdir dir and re-init it (debugging option) EDITS TO ALL FILES IN WORKDIR WILL BE LOST! * printbinds: print singularity binds (paths) * local : same as runlocal + * slurm : same as run (run with slurm) Optional Arguments: @@ -172,6 +127,7 @@ Optional Arguments: --viruses|-v : supply comma-separated list of viruses at command line (--runmode=init only) --manifest|-s : absolute path to samples.tsv. This will be copied to output folder (--runmode=init only) --changegrp|-z : change group to "Ziegelbauer_lab" before running anything. Biowulf-only. Useful for correctly setting permissions. +--platform : set the HPC platform (biowulf, fnlcr, eddie). If not set, CHARLIE will try to detect the platform with scontrol. (--runmode=init only) --help|-h : print this help @@ -212,39 +168,36 @@ function err() { usage && cat <<< " function init() { -# create output folder -if [ -d $WORKDIR ];then err "Folder $WORKDIR already exists!"; fi -mkdir -p $WORKDIR - -# copy config resources -cp -r ${PIPELINE_HOME}/config $WORKDIR/ - -# copy config template and samples files -if [ ! -f $CONFIGFILE ];then -sed -e "s/PIPELINE_HOME/${PIPELINE_HOME//\//\\/}/g" \ - -e "s/WORKDIR/${WORKDIR//\//\\/}/g" \ - -e "s/HOST/${HOST}/g" \ - -e "s/ADDITIVES/${ADDITIVES}/g" \ - -e "s/VIRUSES/${VIRUSES}/g" \ - -e "s/TEMP_DIR/${TEMP_DIR//\//\\/}/g" \ - -e "s/REFS_DIR/${REFS_DIR//\//\\/}/g" \ - -e "s|CLUSTER_PROFILE|${CLUSTER_PROFILE}|g" \ - ${PIPELINE_HOME}/config/config.yaml \ - > $CONFIGFILE -fi -if [ ! -f $WORKDIR/nclscan.config ];then -sed -e "s/PIPELINE_HOME/${PIPELINE_HOME//\//\\/}/g" -e "s/WORKDIR/${WORKDIR//\//\\/}/g" ${PIPELINE_HOME}/resources/NCLscan.config.template > $WORKDIR/nclscan.config -fi - -if [ ! -f $WORKDIR/samples.tsv ];then -cp $MANIFEST $WORKDIR/samples.tsv -fi - -#create log and stats folders -if [ ! -d $WORKDIR/logs ]; then mkdir -p $WORKDIR/logs;echo "Logs Dir: $WORKDIR/logs";fi -if [ ! -d $WORKDIR/stats ];then mkdir -p $WORKDIR/stats;echo "Stats Dir: $WORKDIR/stats";fi - -echo "Done Initializing $WORKDIR. You can now edit $WORKDIR/config.yaml and $WORKDIR/samples.tsv" + # create output folder + if [ -d $WORKDIR ];then err "Folder $WORKDIR already exists!"; fi + mkdir -p $WORKDIR + + mkdir -p $SING_CACHE_DIR + + # copy config resources + cp -r ${PIPELINE_HOME}/config $WORKDIR/ + + # copy config template and samples files + if [ ! -f $CONFIGFILE ];then + cat ${PIPELINE_HOME}/config/config.yaml |\ + envsubst '$PIPELINE_HOME $WORKDIR $HOST $ADDITIVES $VIRUSES $TEMP_DIR $REFS_DIR $CLUSTER_PROFILE' \ + > $CONFIGFILE + fi + if [ ! -f $WORKDIR/nclscan.config ];then + cat ${PIPELINE_HOME}/resources/NCLscan.config.template |\ + envsubst '$WORKDIR' |\ + > $WORKDIR/nclscan.config + fi + + if [ ! -f $WORKDIR/samples.tsv ];then + cp $MANIFEST $WORKDIR/samples.tsv + fi + + #create log and stats folders + if [ ! -d $WORKDIR/logs ]; then mkdir -p $WORKDIR/logs;echo "Logs Dir: $WORKDIR/logs";fi + if [ ! -d $WORKDIR/stats ];then mkdir -p $WORKDIR/stats;echo "Stats Dir: $WORKDIR/stats";fi + + echo "Done Initializing $WORKDIR. You can now edit $WORKDIR/config.yaml and $WORKDIR/samples.tsv" } @@ -301,16 +254,9 @@ function reconfig(){ # rebuild config file and replace the config.yaml in the WORKDIR # this is only for dev purposes when new key-value pairs are being added to the config file check_essential_files - sed -e "s/PIPELINE_HOME/${PIPELINE_HOME//\//\\/}/g" \ - -e "s/WORKDIR/${WORKDIR//\//\\/}/g" \ - -e "s/HOST/${HOST}/g" \ - -e "s/ADDITIVES/${ADDITIVES}/g" \ - -e "s/VIRUSES/${VIRUSES}/g" \ - -e "s/TEMP_DIR/${TEMP_DIR//\//\\/}/g" \ - -e "s/REFS_DIR/${REFS_DIR//\//\\/}/g" \ - -e "s|CLUSTER_PROFILE|${CLUSTER_PROFILE}|g" \ - ${PIPELINE_HOME}/config/config.yaml \ - > $CONFIGFILE + cat ${PIPELINE_HOME}/config/config.yaml |\ + envsubst '$PIPELINE_HOME $WORKDIR $HOST $ADDITIVES $VIRUSES $TEMP_DIR $REFS_DIR $CLUSTER_PROFILE' \ + > $WORKDIR/config.yaml echo "$WORKDIR/config.yaml has been updated!" } @@ -335,6 +281,7 @@ function load_modules() { function runcheck(){ check_essential_files load_modules + set_singularity_binds } ########################################################################################## @@ -374,7 +321,7 @@ function unlock() { function set_singularity_binds() { binds=$( $PIPELINE_HOME/workflow/scripts/set_singularity_bind_paths.py ${WORKDIR}/config.yaml ${WORKDIR}/samples.tsv) - SINGULARITY_BINDS="-B $EXTRA_SINGULARITY_BINDS,$binds" + export SINGULARITY_BINDS="-B $EXTRA_SINGULARITY_BINDS,$binds" } ########################################################################################## # PRINT SINGULARITY BINDS ... print bound singularity folders for debugging @@ -391,8 +338,6 @@ function printbinds(){ function runlocal() { runcheck - set_singularity_binds - if [ "$SLURM_JOB_ID" == "" ];then err "runlocal can only be done on an interactive node"; exit 1; fi run "local" } @@ -402,9 +347,12 @@ function runlocal() { function runslurm() { runcheck - set_singularity_binds run "--dry-run " && run "slurm" } +function runqsub() { + runcheck + run "--dry-run " && run "qsub" +} ########################################################################################## # CREATE RUNINFO ... create runinfo.yaml in workdir @@ -481,9 +429,16 @@ function run() { if [ "$1" == "local" ];then + if [ "$PLATFORM" == "biowulf" ] || [ "$PLATFORM" == "fnlcr"]; then + if [ "$SLURM_JOB_ID" == "" ]; then + err "runlocal can only be done on an interactive node"; + exit 1; + fi + fi preruncleanup - $EXPORT_SING_CACHE_DIR_CMD + export SINGULARITY_CACHEDIR=$SING_CACHE_DIR + export SINGULARITY_TMPDIR=$TMPDIR snakemake -s $SNAKEFILE\ --directory $WORKDIR \ @@ -508,50 +463,21 @@ function run() { --configfile $CONFIGFILE fi - elif [ "$1" == "slurm" ];then + elif [ "$1" == "slurm" ]; then preruncleanup + cat ${WORKDIR}/config/${PLATFORM}/submit_script.sh |\ + envsubst '$CLUSTER_PROFILE $CONFIGFILE $SING_CACHE_DIR $MODULE_LOAD $PARTITION $SINGULARITY_BINDS $SNAKEFILE $trigger $WORKDIR' \ + > ${WORKDIR}/submit_script.sh + sbatch ${WORKDIR}/submit_script.sh - cat > ${WORKDIR}/submit_script.sbatch << EOF -#!/bin/bash -#SBATCH --job-name="charlie" -#SBATCH --mem=40g -#SBATCH --partition="$PARTITION" -#SBATCH --time=48:00:00 -#SBATCH --cpus-per-task=2 -#SBATCH --mail-type=BEGIN,END,FAIL - -cd \$SLURM_SUBMIT_DIR -$MODULE_LOAD -$EXPORT_SING_CACHE_DIR_CMD - -snakemake -s $SNAKEFILE \ - --directory $WORKDIR \ - --use-singularity \ - --singularity-args "$SINGULARITY_BINDS" \ - --use-envmodules \ - --printshellcmds \ - --latency-wait 300 \ - --configfile $CONFIGFILE \ - --profile $CLUSTER_PROFILE \ - -j 500 \ - --rerun-incomplete \ - --rerun-triggers $trigger \ - --retries 2 \ - --keep-going \ - --stats ${WORKDIR}/snakemake.stats \ - 2>&1 | tee ${WORKDIR}/snakemake.log - -if [ "\$?" -eq "0" ];then - snakemake -s $SNAKEFILE \ - --directory $WORKDIR \ - --report ${WORKDIR}/runslurm_snakemake_report.html \ - --configfile $CONFIGFILE -fi - -EOF + elif [ "$1" == "qsub" ]; then - sbatch ${WORKDIR}/submit_script.sbatch + preruncleanup + cat ${WORKDIR}/config/${PLATFORM}/submit_script.sh |\ + envsubst '$CLUSTER_PROFILE $CONFIGFILE $SING_CACHE_DIR $MODULE_LOAD $PARTITION $SINGULARITY_BINDS $SNAKEFILE $trigger $WORKDIR' \ + > ${WORKDIR}/run_script.sh + pushd $WORKDIR && bash run_script.sh && popd elif [ "$1" == "--touch" ];then @@ -636,6 +562,9 @@ function main(){ MANIFEST="${i#*=}" if [ ! -f $MANIFEST ];then err "File $MANIFEST does NOT exist!";fi ;; + -p=*|--platform=*) + PLATFORM="${i#*=}" + ;; -h|--help) usage && exit 0; ;; @@ -645,22 +574,77 @@ function main(){ esac done - WORKDIR=$(readlink -f $WORKDIR) + export WORKDIR=$(readlink -f $WORKDIR) + export PIPELINE_HOME=$(readlink -f $(dirname "$0")) + export SNAKEFILE="${PIPELINE_HOME}/workflow/Snakefile" echo "Working Dir: $WORKDIR" + if [ -z "$PLATFORM" ]; then PLATFORM=$(get_platform); fi + export PLATFORM + echo "Platform: $PLATFORM" + + # set defaults + if [ -z "$HOST" ]; then HOST="hg38"; fi + export HOST + if [ -z "$ADDITIVES" ]; then ADDITIVES="ERCC"; fi + export ADDITIVES + if [ -z "$VIRUSES" ]; then VIRUSES="NC_009333.1"; fi + export VIRUSES + export MANIFEST="${PIPELINE_HOME}/config/samples.tsv" if [[ -z "$SING_CACHE_DIR" ]]; then if [[ -d "/data/$USER" ]]; then SING_CACHE_DIR="/data/$USER/.singularity" + elif [[ "$PLATFORM" == "eddie" ]]; then + SING_CACHE_DIR="/exports/eddie/scratch/$USER/.singularity" else SING_CACHE_DIR="${WORKDIR}/.singularity" fi + export SING_CACHE_DIR echo "singularity cache dir (--singcache) is not set, using ${SING_CACHE_DIR}" fi - mkdir -p $SING_CACHE_DIR - EXPORT_SING_CACHE_DIR_CMD="export SINGULARITY_CACHEDIR=\"${SING_CACHE_DIR}\"" # required files - CONFIGFILE="${WORKDIR}/config.yaml" + export CONFIGFILE="${WORKDIR}/config.yaml" + + # decide trigger + export trigger="mtime" + # trigger="input" + # trigger="code" + + # set variables based on the detected platform + if [ "$PLATFORM" == "biowulf" ]; then + CLUSTER_PROFILE="config/biowulf" + PARTITION="ccr,norm" + EXTRA_SINGULARITY_BINDS="/lscratch" + CONDA_ACTIVATE='. "/data/CCBR_Pipeliner/db/PipeDB/Conda/etc/profile.d/conda.sh" && conda activate py311' + MODULE_LOAD="module load python/$PYTHONVERSION snakemake/$SNAKEMAKEVERSION singularity; $CONDA_ACTIVATE" + TEMP_DIR='/lscratch/$SLURM_JOB_ID/' + REFS_DIR="/gpfs/gsfs10/users/CCBR_Pipeliner/db/PipeDB/charlie/fastas_gtfs/" + elif [ "$PLATFORM" == "fnlcr" ]; then + CLUSTER_PROFILE="config/fnlcr" + PARTITION="norm" + EXTRA_SINGULARITY_BINDS="/scratch/local" + CONDA_ACTIVATE=". '/mnt/projects/CCBR-Pipelines/resources/miniconda3/etc/profile.d/conda.sh' && conda activate py311" + # make sure spooker is in the path + PATH_PREPEND='export PATH="/mnt/projects/CCBR-Pipelines/bin:$PATH"' + MODULE_LOAD="module load singularity; $PATH_PREPEND; $CONDA_ACTIVATE" + TEMP_DIR="/scratch/local/" + REFS_DIR="/mnt/projects/CCBR-Pipelines/db/charlie/fastas_gtfs/" + elif [ "$PLATFORM" == "eddie" ]; then + # TODO fill in other variables for eddie + CLUSTER_PROFILE="config/eddie" + PATH_PREPEND='export PATH="/home/ttakanob/py3.8_venv/bin/:$PATH"' + MODULE_LOAD="module load singularity; $PATH_PREPEND" + # python & snakemake are already in Taka's path in his bashrc + TEMP_DIR="/exports/eddie/scratch/$USER" + else + CLUSTER_PROFILE="config/unknown" + echo """WARNING: detected platform is $PLATFORM. + Please edit the files in $CLUSTER_PROFILE & config.yaml for compatibility with your computing environment. + Also, make sure Singularity, Snakemake $SNAKEMAKEVERSION, Python $PYTHONVERSION, and pandas are installed. + """ + fi + export PLATFORM CLUSTER_PROFILE PARTITION EXTRA_SINGULARITY_BINDS CONDA_ACTIVATE MODULE_LOAD TEMP_DIR REFS_DIR # change group to Ziegelbauer_lab before doing anything if [ "$CHANGEGRP" == "1" ]; then change_grp "$allargs"; fi @@ -670,11 +654,13 @@ function main(){ dryrun) dryrun && exit 0;; unlock) unlock && exit 0;; run) runslurm && exit 0;; + slurm) runslurm && exit 0;; # same as run + qsub) runqsub && exit 0;; runlocal) runlocal && exit 0;; + local) runlocal && exit 0;; # hidden option reset) reset && exit 0;; touch) touch && exit 0;; dry) dryrun && exit 0;; # hidden option - local) runlocal && exit 0;; # hidden option reconfig) reconfig && exit 0;; # hidden option for debugging printbinds) printbinds && exit 0;; # hidden option help) usage && exit 0;; # print help diff --git a/config/biowulf/cluster.yaml b/config/biowulf/cluster.yaml new file mode 100644 index 0000000..945419d --- /dev/null +++ b/config/biowulf/cluster.yaml @@ -0,0 +1,94 @@ +__default__: + gres: lscratch:256 + mem: 40g + partition: ccr,norm + threads: 2 + time: 4:00:00 + name: "{rule}.{wildcards}" + output: "logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.out" + error: "logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.err" +cutadapt: + mem: 120g + threads: 56 + time: 6:00:00 +dcc: + mem: 120g + threads: 4 + time: 4:00:00 +find_circ_align: + mem: 120g + threads: 56 + time: 6:00:00 +find_circ: + mem: 120g + threads: 56 + time: 6:00:00 +mapsplice: + mem: 200g + threads: 56 + time: 48:00:00 +mapsplice_postprocess: + mem: 120g + threads: 4 + time: 4:00:00 +nclscan: + mem: 512g + threads: 56 + time: 4:00:00 + partition: largemem +fastqc: + mem: 40g + threads: 4 + time: 4:00:00 +ciri: + mem: 512g + threads: 56 + time: 4:00:00 + partition: largemem +filter_ciri_bam_for_BSJs: + mem: 512g + threads: 4 + time: 24:00:00 + partition: largemem +create_index: + mem: 200g + threads: 56 + time: 12:00:00 +star1p: + mem: 200g + threads: 56 + time: 6:00:00 +star2p: + mem: 200g + threads: 56 + time: 6:00:00 +star_circrnafinder: + mem: 200g + threads: 56 + time: 6:00:00 +estimate_duplication: + mem: 200g + threads: 4 + time: 4:00:00 +create_circExplorer_BSJ_bam: + mem: 120g + threads: 4 + time: 4:00:00 +create_circExplorer_linear_spliced_bams: + mem: 120g + threads: 56 + time: 8:00:00 +clear: + time: 1:00:00 +split_splice_reads_BAM_create_BW: + mem: 120g + time: 24:00:00 +split_linear_reads_BAM_create_BW: + mem: 120g + time: 24:00:00 +alignment_stats: + time: 1:00:00 +merge_per_sample: + time: 1:00:00 +merge_SJ_tabs: + time: 1:00:00 diff --git a/config/slurm-biowulf/cluster_status.sh b/config/biowulf/cluster_status.sh similarity index 100% rename from config/slurm-biowulf/cluster_status.sh rename to config/biowulf/cluster_status.sh diff --git a/config/slurm-biowulf/config.yaml b/config/biowulf/config.yaml similarity index 93% rename from config/slurm-biowulf/config.yaml rename to config/biowulf/config.yaml index 0697a63..efce53f 100644 --- a/config/slurm-biowulf/config.yaml +++ b/config/biowulf/config.yaml @@ -8,7 +8,7 @@ cluster: sbatch --output {cluster.output} --error {cluster.error} --gres {cluster.gres} -cluster-config: "cluster.json" +cluster-config: "cluster.yaml" cluster-status: "cluster_status.sh" jobs: 499 immediate-submit: false diff --git a/config/biowulf/submit_script.sh b/config/biowulf/submit_script.sh new file mode 100644 index 0000000..8f7087d --- /dev/null +++ b/config/biowulf/submit_script.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +#SBATCH --job-name="charlie" +#SBATCH --mem=40g +#SBATCH --partition="$PARTITION" +#SBATCH --time=48:00:00 +#SBATCH --cpus-per-task=2 +#SBATCH --mail-type=BEGIN,END,FAIL + +cd $SLURM_SUBMIT_DIR +$MODULE_LOAD +export SINGULARITY_CACHEDIR=$SING_CACHE_DIR + +snakemake -s $SNAKEFILE \ + --directory $WORKDIR \ + --use-singularity \ + --singularity-args "$SINGULARITY_BINDS" \ + --use-envmodules \ + --printshellcmds \ + --latency-wait 300 \ + --configfile $CONFIGFILE \ + --profile $CLUSTER_PROFILE \ + -j 500 \ + --rerun-incomplete \ + --rerun-triggers $trigger \ + --retries 2 \ + --keep-going \ + --stats ${WORKDIR}/snakemake.stats \ + 2>&1 | tee ${WORKDIR}/snakemake.log + +if [ "$?" -eq "0" ];then + snakemake -s $SNAKEFILE \ + --directory $WORKDIR \ + --report ${WORKDIR}/runslurm_snakemake_report.html \ + --configfile $CONFIGFILE +fi diff --git a/config/config.yaml b/config/config.yaml index c94dce0..8c14a81 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -1,14 +1,14 @@ ## you probably need to change or comment/uncomment some of these # # The working dir... output will be in the results subfolder of the workdir -workdir: "WORKDIR" +workdir: "$WORKDIR" # temporary directory for intermediate files that are not saved -tempdir: "TEMP_DIR" +tempdir: "$TEMP_DIR" # tab delimited samples file ... should have the following 3 columns # sampleName path_to_R1_fastq path_to_R2_fastq -samples: "WORKDIR/samples.tsv" +samples: "$WORKDIR/samples.tsv" # Should the CLEAR pipeline be run? True or False WITHOUT quotes run_clear: True @@ -26,7 +26,7 @@ run_circRNAFinder: True # Should the NCLscan pipeline be run? True or False WITHOUT quotes # This can only be run for PE data run_nclscan: False -nclscan_config: "WORKDIR/nclscan.config" +nclscan_config: "$WORKDIR/nclscan.config" # Should we also run find_circ? True or False WITHOUT quotes run_findcirc: False @@ -38,9 +38,9 @@ findcirc_params: "--noncanonical" # host: "hg38" # additives: "ERCC" # options are ERCC and BAC16Insert # viruses: "NC_009333.1" -host: "HOST" -additives: "ADDITIVES" -viruses: "VIRUSES" +host: "$HOST" +additives: "$ADDITIVES" +viruses: "$VIRUSES" # select viruses and other (ERCC/BAC): options are # ERCC # BAC16Insert @@ -85,14 +85,13 @@ maxsize_host: 1000000000 maxsize_virus: 5000 ## you most probably dont need to change these -scriptsdir: "PIPELINE_HOME/workflow/scripts" -resourcesdir: "PIPELINE_HOME/resources" +scriptsdir: "$PIPELINE_HOME/workflow/scripts" +resourcesdir: "$PIPELINE_HOME/resources" -# default cluster -# cluster: "PIPELINE_HOME/resources/cluster.json" -cluster: "WORKDIR/CLUSTER_PROFILE/cluster.json" +# default cluster config file +cluster: "$WORKDIR/$CLUSTER_PROFILE/cluster.yaml" -adapters: "PIPELINE_HOME/resources/TruSeq_and_nextera_adapters.consolidated.fa" +adapters: "$PIPELINE_HOME/resources/TruSeq_and_nextera_adapters.consolidated.fa" circexplorer_bsj_circRNA_min_reads: 3 # in addition to "known" and "low-conf" circRNAs identified by circexplorer, we also include those found in back_spliced.bed file but not classified as known/low-conf only if the number of reads supporting the BSJ call is greater than this number minreadcount: 3 # this is used to filter circRNAs while creating the per-sample counts table flanksize: 18 # 18bp flank on either side of the BSJ .. used by multiple BSJ callers @@ -107,11 +106,11 @@ high_confidence_core_callers_plus_n: 1 ciri_perl_script: "/opt2/CIRI_v2.0.6/CIRI2.pl" # path in docker container # change this path to a directory containing fasta and GTF files for all host and virus genomes -fastas_gtfs_dir: "REFS_DIR" +fastas_gtfs_dir: "$REFS_DIR" annotation_lookups: - hg38: "PIPELINE_HOME/resources/hg38_2_hg19_lookup.txt" - mm39: "PIPELINE_HOME/resources/mm39_circBase_annotation_lookup.txt" + hg38: "$PIPELINE_HOME/resources/hg38_2_hg19_lookup.txt" + mm39: "$PIPELINE_HOME/resources/mm39_circBase_annotation_lookup.txt" containers: base: "docker://nciccbr/ccbr_ubuntu_base_20.04:v7" diff --git a/config/eddie/README.md b/config/eddie/README.md new file mode 100644 index 0000000..0229f41 --- /dev/null +++ b/config/eddie/README.md @@ -0,0 +1,5 @@ +these config files were adapted from the following sources: + +- https://github.com/Snakemake-Profiles/sge/tree/e8175c52c0566f4d569e132e748568283c799f78 +- https://github.com/riboviz/riboviz/tree/476ee8c8fed775a795e08f24863adfee7355c486/jobs +- https://nf-co.re/configs/eddie/ diff --git a/config/eddie/add_mem_mib.py b/config/eddie/add_mem_mib.py new file mode 100644 index 0000000..6a8f102 --- /dev/null +++ b/config/eddie/add_mem_mib.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python + +import ruamel.yaml + +yaml = ruamel.yaml.YAML() +yaml.preserve_quotes = True +yaml.explicit_start = True + +with open("cluster.yaml", "r") as infile: + data = yaml.load(infile) + +for k, v in data.items(): + if "mem" in v: + data[k]["mem_mib"] = int(v["mem"].rstrip("g")) * 1024 + +with open("cluster.yaml.2", "w") as outfile: + yaml.dump(data, outfile) diff --git a/config/eddie/cluster.yaml b/config/eddie/cluster.yaml new file mode 100644 index 0000000..1351a3f --- /dev/null +++ b/config/eddie/cluster.yaml @@ -0,0 +1,95 @@ +__default__: + output: "logs/{rule}.{wildcards}.$JOB_ID.out" + error: "logs/{rule}.{wildcards}.$JOB_ID.err" + mem_mib: 40960 + threads: 2 + time: 4:00:00 + name: "{rule}.{wildcards}" + +test: + mem_mib: 10240 +cutadapt: + mem_mib: 122880 + threads: 56 + time: 6:00:00 +dcc: + mem_mib: 122880 + threads: 4 + time: 4:00:00 +find_circ_align: + mem_mib: 122880 + threads: 56 + time: 6:00:00 +find_circ: + mem_mib: 122880 + threads: 56 + time: 6:00:00 +mapsplice: + mem_mib: 204800 + threads: 56 + time: 48:00:00 +mapsplice_postprocess: + mem_mib: 122880 + threads: 4 + time: 4:00:00 +nclscan: + mem_mib: 524288 + threads: 56 + time: 4:00:00 + partition: largemem +fastqc: + mem_mib: 40960 + threads: 4 + time: 4:00:00 +ciri: + mem_mib: 524288 + threads: 56 + time: 4:00:00 + partition: largemem +filter_ciri_bam_for_BSJs: + mem_mib: 524288 + threads: 4 + time: 24:00:00 + partition: largemem +create_index: + mem_mib: 204800 + threads: 56 + time: 12:00:00 +star1p: + mem_mib: 204800 + threads: 56 + time: 6:00:00 +star2p: + mem_mib: 204800 + threads: 56 + time: 6:00:00 +star_circrnafinder: + mem_mib: 204800 + threads: 56 + time: 6:00:00 +estimate_duplication: + mem_mib: 204800 + threads: 4 + time: 4:00:00 +create_circExplorer_BSJ_bam: + mem_mib: 122880 + threads: 4 + time: 4:00:00 +create_circExplorer_linear_spliced_bams: + mem_mib: 122880 + threads: 56 + time: 8:00:00 +clear: + time: 1:00:00 +split_splice_reads_BAM_create_BW: + mem_mib: 122880 + time: 24:00:00 +split_linear_reads_BAM_create_BW: + mem_mib: 122880 + time: 24:00:00 +alignment_stats: + time: 1:00:00 +merge_per_sample: + time: 1:00:00 +merge_SJ_tabs: + time: 1:00:00 diff --git a/config/eddie/config.yaml b/config/eddie/config.yaml new file mode 100644 index 0000000..0cac0cb --- /dev/null +++ b/config/eddie/config.yaml @@ -0,0 +1,26 @@ +cluster: qsub + -terse -cwd -V + -l h_rt={cluster.time} + -l h_vmem={cluster.mem_mib}M + -pe sharedmem {cluster.threads} + -N {cluster.name} + -o {cluster.output} + -e {cluster.error} +cluster-config: "cluster.yaml" +cluster-status: "sge-status.py" +cluster-cancel: "sge-cancel.py" +cluster-cancel-nargs: 20 +max-jobs-per-second: 1 +max-status-checks-per-second: 1 +latency-wait: 60 +local-cores: 1 +jobs: 499 +immediate-submit: false +verbose: true +notemp: true +printshellcmds: true +use-singularity: true +rerun-incomplete: true +rerun-triggers: mtime +retries: 2 +keep-going: true diff --git a/config/eddie/sge-cancel.py b/config/eddie/sge-cancel.py new file mode 100755 index 0000000..54f006a --- /dev/null +++ b/config/eddie/sge-cancel.py @@ -0,0 +1,8 @@ +#!/usr/bin/env python3 +import subprocess as sp +import shlex +import sys + +jobid_list = ", ".join(sys.argv[1:]) + +sp.check_call(shlex.split(f"qdel {jobid_list}")) diff --git a/config/eddie/sge-jobscript.sh b/config/eddie/sge-jobscript.sh new file mode 100755 index 0000000..e416637 --- /dev/null +++ b/config/eddie/sge-jobscript.sh @@ -0,0 +1,7 @@ +#!/bin/bash +# properties = {properties} + +# exit on first error +set -o errexit + +{exec_job} diff --git a/config/eddie/sge-status.py b/config/eddie/sge-status.py new file mode 100755 index 0000000..3b25265 --- /dev/null +++ b/config/eddie/sge-status.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 +import re +import subprocess as sp +import shlex +import sys +import time +import logging + +logger = logging.getLogger("__name__") +logger.setLevel(40) + +STATUS_ATTEMPTS = 20 + +jobid = int(sys.argv[1]) +job_status = "running" + +# WARNING this currently has no support for task array jobs + +for i in range(STATUS_ATTEMPTS): + # first try qstat to see if job is running + # we can use `qstat -s pr -u "*"` to check for all running and pending jobs + try: + qstat_res = sp.check_output(shlex.split(f"qstat -s pr")).decode().strip() + + # skip the header using [2:] + res = {int(x.split()[0]): x.split()[4] for x in qstat_res.splitlines()[2:]} + + # job is in an unspecified error state + if "E" in res[jobid]: + job_status = "failed" + break + + job_status = "running" + break + + except sp.CalledProcessError as e: + logger.error("qstat process error") + logger.error(e) + except KeyError as e: + # if the job has finished it won't appear in qstat and we should check qacct + # this will also provide the exit status (0 on success, 128 + exit_status on fail) + # Try getting job with scontrol instead in case sacct is misconfigured + try: + qacct_res = sp.check_output(shlex.split(f"qacct -j {jobid}")) + + exit_code = int( + re.search("exit_status ([0-9]+)", qacct_res.decode()).group(1) + ) + + if exit_code == 0: + job_status = "success" + break + + if exit_code != 0: + job_status = "failed" + break + + except sp.CalledProcessError as e: + logger.warning("qacct process error") + logger.warning(e) + if i >= STATUS_ATTEMPTS - 1: + job_status = "failed" + break + else: + # qacct can be quite slow to update on large servers + time.sleep(5) + pass + +print(job_status) diff --git a/config/eddie/sge-submit.py b/config/eddie/sge-submit.py new file mode 100644 index 0000000..b77ce4c --- /dev/null +++ b/config/eddie/sge-submit.py @@ -0,0 +1,299 @@ +#!/usr/bin/env python3 + +import os +import re +import math +import argparse +import subprocess + +# use warnings.warn() rather than print() to output info in this script +# because snakemake expects the jobid to be the only output +import warnings + +from snakemake import io +from snakemake.utils import read_job_properties + +DEFAULT_JOB_NAME = "snakemake_job" +QSUB_DEFAULTS = "-cwd -V" +CLUSTER_CONFIG = "cluster.yaml" + +# SGE syntax for options is `-option [value]` and for resources is `-l name=value` +# we therefore distinguish the two in this script to make it easier to handle. +# We also define some aliases for options and resources so that the rules can +# be more expressive than a list of cryptic SGE resources. + +# We additionally pickup a list of environment modules which will be loaded in the +# jobscript + +OPTION_MAPPING = { + "binding": ("binding",), + "cwd": ("cwd",), + "e": ("e", "error"), + "hard": ("hard",), + "j": ("j", "join"), + "m": ("m", "mail_options"), + "M": ("M", "email"), + "notify": ("notify",), + "now": ("now",), + "N": ("N", "name"), + "o": ("o", "output"), + "P": ("P", "project"), + "p": ("p", "priority"), + "pe": ("pe", "parallel_environment"), + "pty": ("pty",), + "q": ("q", "queue"), + "R": ("R", "reservation"), + "r": ("r", "rerun"), + "soft": ("soft",), + "v": ("v", "variable"), + "V": ("V", "export_env"), +} + +RESOURCE_MAPPING = { + # default queue resources + "qname": ("qname",), + "hostname": ("hostname",), + # "notify" -- conflicts with OPTION_MAPPING + "calendar": ("calendar",), + "min_cpu_interval": ("min_cpu_interval",), + "tmpdir": ("tmpdir",), + "seq_no": ("seq_no",), + "s_rt": ("s_rt", "soft_runtime", "soft_walltime"), + "h_rt": ("h_rt", "time", "runtime", "walltime"), + "s_cpu": ("s_cpu", "soft_cpu"), + "h_cpu": ("h_cpu", "cpu"), + "s_data": ("s_data", "soft_data"), + "h_data": ("h_data", "data"), + "s_stack": ("s_stack", "soft_stack"), + "h_stack": ("h_stack", "stack"), + "s_core": ("s_core", "soft_core"), + "h_core": ("h_core", "core"), + "s_rss": ("s_rss", "soft_resident_set_size"), + "h_rss": ("h_rss", "resident_set_size"), + # default host resources + "slots": ("slots",), + "s_vmem": ("s_vmem", "soft_memory", "soft_virtual_memory"), + # "mem_mb" is a default snakemake resource name which will be passed in + "h_vmem": ("h_vmem", "mem_mb", "mem", "memory", "virtual_memory"), + "s_fsize": ("s_fsize", "soft_file_size"), + # "disk_mb" is a default snakemake resource name which will be passed in + "h_fsize": ("h_fsize", "disk_mb", "file_size"), +} + +IGNORED_RESOURCES = ["mem_mib", "disk_mib"] + +NONREQUESTABLE_RESOURCES = ["tmpdir"] + + +def add_custom_resources(resources, resource_mapping=RESOURCE_MAPPING): + """Adds new resources to resource_mapping. + + resources -> dict where key is sge resource name and value is a + single name or a list of names to be used as aliased + """ + for key, val in resources.items(): + if key not in resource_mapping: + resource_mapping[key] = tuple() + + # make sure the resource name itself is an alias + resource_mapping[key] += (key,) + if isinstance(val, list): + for alias in val: + if val != key: + resource_mapping[key] += (alias,) + else: + if val != key: + resource_mapping[key] += (val,) + + +def parse_jobscript(): + """Minimal CLI to require/only accept single positional argument.""" + p = argparse.ArgumentParser(description="SGE snakemake submit script") + p.add_argument("jobscript", help="Snakemake jobscript with job properties.") + return p.parse_args().jobscript + + +def parse_qsub_defaults(parsed): + """Unpack QSUB_DEFAULTS.""" + d = parsed.split() if type(parsed) == str else parsed + + options = {} + for arg in d: + if "=" in arg: + k, v = arg.split("=") + options[k.strip("-")] = v.strip() + else: + options[arg.strip("-")] = "" + return options + + +def format_job_properties(string): + # we use 'rulename' rather than 'rule' for consistency with the --cluster-config + # snakemake option + if job_properties["type"] == "group": + return string.format(rulename="snakejob", jobid=job_properties["jobid"]) + return string.format(rulename="snakejob", jobid=job_properties["jobid"]) + + +def parse_qsub_settings( + source, resource_mapping=RESOURCE_MAPPING, option_mapping=OPTION_MAPPING +): + job_options = {"options": {}, "resources": {}} + + for skey, sval in source.items(): + found = False + for rkey, rval in resource_mapping.items(): + if skey in IGNORED_RESOURCES: + found = True + break + if skey in rval: + found = True + # Snakemake resources can only be defined as integers, but SGE interprets + # plain integers for memory as bytes. This hack means we interpret memory + # requests as megabytes which maps to the snakemake resources "mem_mb" + # and "disk_mb". + if (rkey == "s_vmem") or (rkey == "h_vmem"): + job_options["resources"].update({rkey: str(sval) + "M"}) + elif (rkey == "s_fsize") or (rkey == "h_fsize"): + job_options["resources"].update({rkey: str(sval) + "M"}) + else: + job_options["resources"].update({rkey: sval}) + break + if found: + continue + for okey, oval in option_mapping.items(): + if skey in oval: + found = True + job_options["options"].update({okey: sval}) + break + if not found: + raise KeyError(f"Unknown SGE option or resource: {skey}") + + return job_options + + +def load_cluster_config(path): + """Load config to dict either from absolute path or relative to profile dir.""" + if path: + path = os.path.join(os.path.dirname(__file__), os.path.expandvars(path)) + default_cluster_config = io.load_configfile(path) + else: + default_cluster_config = {} + if "__default__" not in default_cluster_config: + default_cluster_config["__default__"] = {} + return default_cluster_config + + +def ensure_directory_exists(path): + """Check if directory exists and create if not""" + directory = os.path.dirname(path) + if not os.path.exists(directory): + os.makedirs(directory, exist_ok=True) + return + + +def update_double_dict(outer, inner): + """Similar to dict.update() but does the update on nested dictionaries""" + for k, v in outer.items(): + outer[k].update(inner[k]) + + +def sge_option_string(key, val): + if val == "": + return f"-{key}" + if type(val) == bool: + return f"-{key} " + ("yes" if val else "no") + return format_job_properties(f"-{key} {val}") + + +def sge_resource_string(key, val): + if val == "": + return f"-l {key}" + if type(val) == bool: + return f"-{key}=" + ("true" if val else "false") + return f"-l {key}={val}" + + +def submit_job(jobscript, qsub_settings): + """Submit jobscript and return jobid.""" + + # remove any non-requestable resources which have somehow been added to + # the resource list + for resource in list(qsub_settings["resources"].keys()): + if resource in NONREQUESTABLE_RESOURCES: + del qsub_settings["resources"][resource] + + flatten = lambda l: [item for sublist in l for item in sublist] + batch_options = flatten( + [sge_option_string(k, v).split() for k, v in qsub_settings["options"].items()] + ) + batch_resources = flatten( + [ + sge_resource_string(k, v).split() + for k, v in qsub_settings["resources"].items() + ] + ) + try: + # -terse means only the jobid is returned rather than the normal 'Your job...' string + jobid = ( + subprocess.check_output( + ["qsub", "-terse"] + batch_options + batch_resources + [jobscript] + ) + .decode() + .rstrip() + ) + except subprocess.CalledProcessError as e: + raise e + except Exception as e: + raise e + return jobid + + +qsub_settings = {"options": {}, "resources": {}} + +jobscript = parse_jobscript() + +# get the job properties dictionary from snakemake +job_properties = read_job_properties(jobscript) + +# load the default cluster config +cluster_config = load_cluster_config(CLUSTER_CONFIG) + +if "__resources__" in cluster_config: + add_custom_resources(cluster_config["__resources__"]) + +# qsub default arguments +update_double_dict( + qsub_settings, parse_qsub_settings(parse_qsub_defaults(QSUB_DEFAULTS)) +) + +# cluster_config defaults +update_double_dict(qsub_settings, parse_qsub_settings(cluster_config["__default__"])) + +# resources defined in the snakemake file (note that these must be integer) +# we pass an empty dictionary for option_mapping because options should not be +# specified in the snakemake file +update_double_dict( + qsub_settings, + parse_qsub_settings(job_properties.get("resources", {}), option_mapping={}), +) + +# get any rule specific options/resources from the default cluster config +update_double_dict( + qsub_settings, + parse_qsub_settings(cluster_config.get(job_properties.get("rule"), {})), +) + +# get any options/resources specified through the --cluster-config command line argument +update_double_dict( + qsub_settings, parse_qsub_settings(job_properties.get("cluster", {})) +) + +# ensure qsub output dirs exist +for o in ("o", "e"): + ensure_directory_exists(qsub_settings["options"][o]) if o in qsub_settings[ + "options" + ] else None + +# submit job and echo id back to Snakemake (must be the only stdout) +print(submit_job(jobscript, qsub_settings)) diff --git a/config/eddie/submit_script.sh b/config/eddie/submit_script.sh new file mode 100755 index 0000000..fa7617f --- /dev/null +++ b/config/eddie/submit_script.sh @@ -0,0 +1,25 @@ +#!usr/bin/env bash +# do not submit this script with qsub +# as worker nodes cannot submit additional jobs themselves + +. /etc/profile.d/modules.sh +$MODULE_LOAD +export SINGULARITY_CACHEDIR=$SING_CACHE_DIR +export SINGULARITY_TMPDIR=$TMPDIR + +snakemake -s $SNAKEFILE \ + --directory $WORKDIR \ + --use-singularity \ + --singularity-args "$SINGULARITY_BINDS" \ + --use-envmodules \ + --printshellcmds \ + --latency-wait 300 \ + --configfile $CONFIGFILE \ + --profile $CLUSTER_PROFILE \ + -j 500 \ + --rerun-incomplete \ + --rerun-triggers $trigger \ + --retries 2 \ + --keep-going \ + --stats ${WORKDIR}/snakemake.stats \ + 2>&1 | tee ${WORKDIR}/snakemake.log diff --git a/config/fnlcr/cluster.yaml b/config/fnlcr/cluster.yaml new file mode 100644 index 0000000..c1e0fb6 --- /dev/null +++ b/config/fnlcr/cluster.yaml @@ -0,0 +1,93 @@ +__default__: + mem: 40g + partition: norm + threads: 2 + time: 4:00:00 + name: "{rule}.{wildcards}" + output: "logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.out" + error: "logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.err" +cutadapt: + mem: 120g + threads: 32 + time: 6:00:00 +dcc: + mem: 120g + threads: 4 + time: 4:00:00 +find_circ_align: + mem: 120g + threads: 32 + time: 6:00:00 +find_circ: + mem: 120g + threads: 32 + time: 6:00:00 +mapsplice: + mem: 200g + threads: 32 + time: 48:00:00 +mapsplice_postprocess: + mem: 120g + threads: 4 + time: 4:00:00 +nclscan: + mem: 512g + threads: 32 + time: 4:00:00 + partition: largemem +fastqc: + mem: 40g + threads: 4 + time: 4:00:00 +ciri: + mem: 512g + threads: 32 + time: 4:00:00 + partition: largemem +filter_ciri_bam_for_BSJs: + mem: 512g + threads: 4 + time: 24:00:00 + partition: largemem +create_index: + mem: 200g + threads: 32 + time: 12:00:00 +star1p: + mem: 200g + threads: 32 + time: 6:00:00 +star2p: + mem: 200g + threads: 32 + time: 6:00:00 +star_circrnafinder: + mem: 200g + threads: 32 + time: 6:00:00 +estimate_duplication: + mem: 200g + threads: 4 + time: 4:00:00 +create_circExplorer_BSJ_bam: + mem: 120g + threads: 4 + time: 4:00:00 +create_circExplorer_linear_spliced_bams: + mem: 120g + threads: 32 + time: 8:00:00 +clear: + time: 1:00:00 +split_splice_reads_BAM_create_BW: + mem: 120g + time: 24:00:00 +split_linear_reads_BAM_create_BW: + mem: 120g + time: 24:00:00 +alignment_stats: + time: 1:00:00 +merge_per_sample: + time: 1:00:00 +merge_SJ_tabs: + time: 1:00:00 diff --git a/config/slurm-fnlcr/cluster_status.sh b/config/fnlcr/cluster_status.sh similarity index 100% rename from config/slurm-fnlcr/cluster_status.sh rename to config/fnlcr/cluster_status.sh diff --git a/config/slurm-fnlcr/config.yaml b/config/fnlcr/config.yaml similarity index 93% rename from config/slurm-fnlcr/config.yaml rename to config/fnlcr/config.yaml index 8fa374f..6f7e685 100644 --- a/config/slurm-fnlcr/config.yaml +++ b/config/fnlcr/config.yaml @@ -7,7 +7,7 @@ cluster: sbatch --job-name {cluster.name} --output {cluster.output} --error {cluster.error} -cluster-config: "cluster.json" +cluster-config: "cluster.yaml" cluster-status: "cluster_status.sh" jobs: 499 immediate-submit: false diff --git a/config/fnlcr/submit_script.sh b/config/fnlcr/submit_script.sh new file mode 100644 index 0000000..8f7087d --- /dev/null +++ b/config/fnlcr/submit_script.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +#SBATCH --job-name="charlie" +#SBATCH --mem=40g +#SBATCH --partition="$PARTITION" +#SBATCH --time=48:00:00 +#SBATCH --cpus-per-task=2 +#SBATCH --mail-type=BEGIN,END,FAIL + +cd $SLURM_SUBMIT_DIR +$MODULE_LOAD +export SINGULARITY_CACHEDIR=$SING_CACHE_DIR + +snakemake -s $SNAKEFILE \ + --directory $WORKDIR \ + --use-singularity \ + --singularity-args "$SINGULARITY_BINDS" \ + --use-envmodules \ + --printshellcmds \ + --latency-wait 300 \ + --configfile $CONFIGFILE \ + --profile $CLUSTER_PROFILE \ + -j 500 \ + --rerun-incomplete \ + --rerun-triggers $trigger \ + --retries 2 \ + --keep-going \ + --stats ${WORKDIR}/snakemake.stats \ + 2>&1 | tee ${WORKDIR}/snakemake.log + +if [ "$?" -eq "0" ];then + snakemake -s $SNAKEFILE \ + --directory $WORKDIR \ + --report ${WORKDIR}/runslurm_snakemake_report.html \ + --configfile $CONFIGFILE +fi diff --git a/config/slurm-biowulf/cluster.json b/config/slurm-biowulf/cluster.json deleted file mode 100644 index 028a2e2..0000000 --- a/config/slurm-biowulf/cluster.json +++ /dev/null @@ -1,120 +0,0 @@ -{ - "__default__": { - "gres": "lscratch:256", - "mem": "40g", - "partition": "ccr,norm", - "threads": "2", - "time": "4:00:00", - "name": "{rule}.{wildcards}", - "output": "logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.out", - "error": "logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.err" - }, - "cutadapt": { - "mem": "120g", - "threads": "56", - "time": "6:00:00" - }, - "dcc": { - "mem": "120g", - "threads": "4", - "time": "4:00:00" - }, - "find_circ_align": { - "mem": "120g", - "threads": "56", - "time": "6:00:00" - }, - "find_circ": { - "mem": "120g", - "threads": "56", - "time": "6:00:00" - }, - "mapsplice": { - "mem": "200g", - "threads": "56", - "time": "48:00:00" - }, - "mapsplice_postprocess": { - "mem": "120g", - "threads": "4", - "time": "4:00:00" - }, - "nclscan": { - "mem": "512g", - "threads": "56", - "time": "4:00:00", - "partition": "largemem" - }, - "fastqc": { - "mem": "40g", - "threads": "4", - "time": "4:00:00" - }, - "ciri": { - "mem": "512g", - "threads": "56", - "time": "4:00:00", - "partition": "largemem" - }, - "filter_ciri_bam_for_BSJs": { - "mem": "512g", - "threads": "4", - "time": "24:00:00", - "partition": "largemem" - }, - "create_index": { - "mem": "200g", - "threads": "56", - "time": "12:00:00" - }, - "star1p": { - "mem": "200g", - "threads": "56", - "time": "6:00:00" - }, - "star2p": { - "mem": "200g", - "threads": "56", - "time": "6:00:00" - }, - "star_circrnafinder": { - "mem": "200g", - "threads": "56", - "time": "6:00:00" - }, - "estimate_duplication": { - "mem": "200g", - "threads": "4", - "time": "4:00:00" - }, - "create_circExplorer_BSJ_bam": { - "mem": "120g", - "threads": "4", - "time": "4:00:00" - }, - "create_circExplorer_linear_spliced_bams": { - "mem": "120g", - "threads": "56", - "time": "8:00:00" - }, - "clear": { - "time": "1:00:00" - }, - "split_splice_reads_BAM_create_BW": { - "mem": "120g", - "time": "24:00:00" - }, - "split_linear_reads_BAM_create_BW": { - "mem": "120g", - "time": "24:00:00" - }, - "alignment_stats": { - "time": "1:00:00" - }, - "merge_per_sample": { - "time": "1:00:00" - }, - "merge_SJ_tabs": { - "time": "1:00:00" - } -} diff --git a/config/slurm-fnlcr/cluster.json b/config/slurm-fnlcr/cluster.json deleted file mode 100644 index fbc50f9..0000000 --- a/config/slurm-fnlcr/cluster.json +++ /dev/null @@ -1,119 +0,0 @@ -{ - "__default__": { - "mem": "40g", - "partition": "norm", - "threads": "2", - "time": "4:00:00", - "name": "{rule}.{wildcards}", - "output": "logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.out", - "error": "logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.err" - }, - "cutadapt": { - "mem": "120g", - "threads": "32", - "time": "6:00:00" - }, - "dcc": { - "mem": "120g", - "threads": "4", - "time": "4:00:00" - }, - "find_circ_align": { - "mem": "120g", - "threads": "32", - "time": "6:00:00" - }, - "find_circ": { - "mem": "120g", - "threads": "32", - "time": "6:00:00" - }, - "mapsplice": { - "mem": "200g", - "threads": "32", - "time": "48:00:00" - }, - "mapsplice_postprocess": { - "mem": "120g", - "threads": "4", - "time": "4:00:00" - }, - "nclscan": { - "mem": "512g", - "threads": "32", - "time": "4:00:00", - "partition": "largemem" - }, - "fastqc": { - "mem": "40g", - "threads": "4", - "time": "4:00:00" - }, - "ciri": { - "mem": "512g", - "threads": "32", - "time": "4:00:00", - "partition": "largemem" - }, - "filter_ciri_bam_for_BSJs": { - "mem": "512g", - "threads": "4", - "time": "24:00:00", - "partition": "largemem" - }, - "create_index": { - "mem": "200g", - "threads": "32", - "time": "12:00:00" - }, - "star1p": { - "mem": "200g", - "threads": "32", - "time": "6:00:00" - }, - "star2p": { - "mem": "200g", - "threads": "32", - "time": "6:00:00" - }, - "star_circrnafinder": { - "mem": "200g", - "threads": "32", - "time": "6:00:00" - }, - "estimate_duplication": { - "mem": "200g", - "threads": "4", - "time": "4:00:00" - }, - "create_circExplorer_BSJ_bam": { - "mem": "120g", - "threads": "4", - "time": "4:00:00" - }, - "create_circExplorer_linear_spliced_bams": { - "mem": "120g", - "threads": "32", - "time": "8:00:00" - }, - "clear": { - "time": "1:00:00" - }, - "split_splice_reads_BAM_create_BW": { - "mem": "120g", - "time": "24:00:00" - }, - "split_linear_reads_BAM_create_BW": { - "mem": "120g", - "time": "24:00:00" - }, - "alignment_stats": { - "time": "1:00:00" - }, - "merge_per_sample": { - "time": "1:00:00" - }, - "merge_SJ_tabs": { - "time": "1:00:00" - } -} diff --git a/config/unknown/cluster.json b/config/unknown/cluster.json deleted file mode 100644 index fbc50f9..0000000 --- a/config/unknown/cluster.json +++ /dev/null @@ -1,119 +0,0 @@ -{ - "__default__": { - "mem": "40g", - "partition": "norm", - "threads": "2", - "time": "4:00:00", - "name": "{rule}.{wildcards}", - "output": "logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.out", - "error": "logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.err" - }, - "cutadapt": { - "mem": "120g", - "threads": "32", - "time": "6:00:00" - }, - "dcc": { - "mem": "120g", - "threads": "4", - "time": "4:00:00" - }, - "find_circ_align": { - "mem": "120g", - "threads": "32", - "time": "6:00:00" - }, - "find_circ": { - "mem": "120g", - "threads": "32", - "time": "6:00:00" - }, - "mapsplice": { - "mem": "200g", - "threads": "32", - "time": "48:00:00" - }, - "mapsplice_postprocess": { - "mem": "120g", - "threads": "4", - "time": "4:00:00" - }, - "nclscan": { - "mem": "512g", - "threads": "32", - "time": "4:00:00", - "partition": "largemem" - }, - "fastqc": { - "mem": "40g", - "threads": "4", - "time": "4:00:00" - }, - "ciri": { - "mem": "512g", - "threads": "32", - "time": "4:00:00", - "partition": "largemem" - }, - "filter_ciri_bam_for_BSJs": { - "mem": "512g", - "threads": "4", - "time": "24:00:00", - "partition": "largemem" - }, - "create_index": { - "mem": "200g", - "threads": "32", - "time": "12:00:00" - }, - "star1p": { - "mem": "200g", - "threads": "32", - "time": "6:00:00" - }, - "star2p": { - "mem": "200g", - "threads": "32", - "time": "6:00:00" - }, - "star_circrnafinder": { - "mem": "200g", - "threads": "32", - "time": "6:00:00" - }, - "estimate_duplication": { - "mem": "200g", - "threads": "4", - "time": "4:00:00" - }, - "create_circExplorer_BSJ_bam": { - "mem": "120g", - "threads": "4", - "time": "4:00:00" - }, - "create_circExplorer_linear_spliced_bams": { - "mem": "120g", - "threads": "32", - "time": "8:00:00" - }, - "clear": { - "time": "1:00:00" - }, - "split_splice_reads_BAM_create_BW": { - "mem": "120g", - "time": "24:00:00" - }, - "split_linear_reads_BAM_create_BW": { - "mem": "120g", - "time": "24:00:00" - }, - "alignment_stats": { - "time": "1:00:00" - }, - "merge_per_sample": { - "time": "1:00:00" - }, - "merge_SJ_tabs": { - "time": "1:00:00" - } -} diff --git a/resources/NCLscan.config.template b/resources/NCLscan.config.template index 0f53d48..dd97366 100644 --- a/resources/NCLscan.config.template +++ b/resources/NCLscan.config.template @@ -7,22 +7,22 @@ NCLscan_dir = /opt2/NCLscan-1.7.0 ## The directory of references and indices ## The script "create_reference.py" would create the needed references and indices here. -NCLscan_ref_dir = WORKDIR/ref/NCLscan_index +NCLscan_ref_dir = $WORKDIR/ref/NCLscan_index ## The following four reference files can be downloaded from the GENCODE website (http://www.gencodegenes.org/). ## The reference genome sequence, eg. /path/to/GRCh37.p13.genome.fa -Reference_genome = WORKDIR/ref/ref.fa +Reference_genome = $WORKDIR/ref/ref.fa ## The gene annotation file, eg. /path/to/gencode.v19.annotation.gtf -Gene_annotation = WORKDIR/ref/ref.fixed.gtf +Gene_annotation = $WORKDIR/ref/ref.fixed.gtf ## The protein-coding transcript sequences, eg. /path/to/gencode.v19.pc_transcripts.fa -Protein_coding_transcripts = WORKDIR/ref/ref.transcripts.fa +Protein_coding_transcripts = $WORKDIR/ref/ref.transcripts.fa ## The long non-coding RNA transcript sequences, eg. /path/to/gencode.v19.lncRNA_transcripts.fa -lncRNA_transcripts = WORKDIR/ref/ref.dummy.fa +lncRNA_transcripts = $WORKDIR/ref/ref.dummy.fa ## External tools @@ -68,7 +68,7 @@ SeqOut_bin = {NCLscan_bin}/SeqOut ### Advanced parameters ### ########################### -## The following two parameters indicate the maximal read length (L) and fragment size of the used paired-end RNA-seq data (FASTQ files), where fragment size = 2L + insert size. +## The following two parameters indicate the maximal read length (L) and fragment size of the used paired-end RNA-seq data (FASTQ files), where fragment size = 2L + insert size. ## If L > 151, the users should change these two parameters to (L, 2L + insert size). max_read_len = 151 max_fragment_size = 500 @@ -96,6 +96,3 @@ bwa-mem-t = 56 ## NOTE: The memory usage of each blat process would be up to 4 GB! ## mp_blat_process = 56 - - -