diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ddba75..1a4baf3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,9 @@ # Changelog +## [v1.8.7] +- Add options to tweak minimap2 and GSAlign memory (e.g. `--gsalign_memory 6.GB` or `--minimap2_memory 8.GB`). +- Uniform minimap2 options by prefixing with `--minimap2_` +- Remove unnecessary configurations + ## [v1.8.6] - Fix bugs originated after the workflow simplification. diff --git a/README.md b/README.md index 8466502..1b64478 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ # Updates See [CHANGELOG](#CHANGELOG.md) for more details. -**UPDATE 05/2024**: The `--aligner minimap2` mode now runs in multiple processes, splitting the target genome in fragments of at least `--tgtSize` bases; individual contigs and scaffolds **will not be fragmented**, and each chunk will contain entire sequences, unless the `--mm2_lowmem` option is provided. The old approach is still accessible through the `--mm2_full_alignment` option. The anaconda recipe with the dependencies has been updated, so please ensure to re-create the container where needed. This optimization allows to perform a `minimap2` liftover of the panTro6 to the hg38 genomes on a 16-cores Ryzen 7 8700G 64G Ubuntu machine in under half an hour +**UPDATE 05/2024**: The `--aligner minimap2` mode now runs in multiple processes, splitting the target genome in fragments of at least `--tgtSize` bases; individual contigs and scaffolds **will not be fragmented**, and each chunk will contain entire sequences, unless the `--minimap2_lowmem` option is provided. The old approach is still accessible through the `--minimap2_full_alignment` option. The anaconda recipe with the dependencies has been updated, so please ensure to re-create the container where needed. This optimization allows to perform a `minimap2` liftover of the panTro6 to the hg38 genomes on a 16-cores Ryzen 7 8700G 64G Ubuntu machine in under half an hour. **UPDATE 14/12/2022**: Now the NCBI/iGenomes accession have to be provided in the `--source`/`--target` field, and then use the appropriate `--igenomes_source`/`--ncbi_source` and `--igenomes_target`/`--ncbi_target` as a modifier. diff --git a/conf/base.config b/conf/base.config index 1eac65e..d6f5128 100644 --- a/conf/base.config +++ b/conf/base.config @@ -10,7 +10,7 @@ process { cpus = { def baseCpu = 1 if (params.max_cpus){ - baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int : baseMem + baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int : baseCpu } else { baseCpu } @@ -36,7 +36,7 @@ process { cpus = { def baseCpu = 1 if (params.max_cpus){ - baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int : baseMem + baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int : baseCpu } else { baseCpu } @@ -62,7 +62,7 @@ process { cpus = { def baseCpu = 1 if (params.max_cpus){ - baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int : baseMem + baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int : baseCpu } else { baseCpu } @@ -88,7 +88,7 @@ process { cpus = { def baseCpu = 2 * task.attempt if (params.max_cpus){ - baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int : baseMem + baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int : baseCpu } else { baseCpu } @@ -114,13 +114,13 @@ process { cpus = { def baseCpu = params.gsalign_threads ? params.gsalign_threads as int * task.attempt : 1 * task.attempt if (params.max_cpus){ - baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int : baseMem + baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int : baseCpu } else { baseCpu } } memory = { - def baseMem = 6.GB * task.attempt + def baseMem = params.gsalign_memory as nextflow.util.MemoryUnit * task.attempt if (params.max_memory){ baseMem.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1 ? params.max_memory as nextflow.util.MemoryUnit : baseMem } else { @@ -140,13 +140,13 @@ process { cpus = { def baseCpu = params.minimap2_threads ? params.minimap2_threads as int * task.attempt : 1 * task.attempt if (params.max_cpus){ - baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int : baseMem + baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int : baseCpu } else { baseCpu } } memory = { - def baseMem = 8.GB * task.attempt + def baseMem = params.minimap2_memory as nextflow.util.MemoryUnit * task.attempt if (params.max_memory){ baseMem.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1 ? params.max_memory as nextflow.util.MemoryUnit : baseMem } else { diff --git a/conf/eddie.config b/conf/eddie.config index 4e67945..4d10571 100644 --- a/conf/eddie.config +++ b/conf/eddie.config @@ -1,58 +1,71 @@ /* * ------------------------------------------------------ - * Based on the nf-core/rnaseq Nextflow base config file + * Custom eddie (singularity/apptainer) config file * ------------------------------------------------------ */ - //Profile config names for nf-core/configs +// New parameters specific to customize eddie behaviour params { - // iGenomes reference base - saveReference = true - igenomes_base = '/exports/igmm/eddie/BioinformaticsResources/igenomes' config_profile_description = 'University of Edinburgh (eddie) cluster profile using anaconda tweaked by nf-core/configs.' - config_profile_contact = 'Andrea Talenti (@RenzoTale88)' - config_profile_url = 'https://www.ed.ac.uk/information-services/research-support/research-computing/ecdf/high-performance-computing'// Add parameter to specify extra flags for eddie - extra_cluster_options = "" - enable_conda = false - cache_dir = null - max_memory = 2048.GB - max_cpus = 64 - max_time = 240.h - scratch = false - queue_size = 100 - rl9 = true - project = "uoe_baseline" + config_profile_contact = 'Andrea Talenti (@RenzoTale88)' + config_profile_url = 'https://www.ed.ac.uk/information-services/research-support/research-computing/ecdf/high-performance-computing'// Add parameter to specify extra flags for eddie + extra_cluster_options = "" + enable_conda = false + singularity_cache_dir = null + max_memory = 2048.GB + max_cpus = 64 + max_time = 240.h + scratch = false + queue_size = 100 + project = "uoe_baseline" } - + executor { - name = "sge" - queueSize = "${params.queue_size}" + name = "sge" + queueSize = params.queue_size } - + process { - clusterOptions = { task.memory ? "-l h_vmem=${task.memory.bytes/task.cpus} -R y -l rl9=${params.rl9} -P ${params.project} ${params.extra_cluster_options}" : "-R y -l rl9=${params.rl9} -P ${params.project ?: ''} ${params.extra_cluster_options}" } - scratch = params.scratch - penv = { task.cpus > 1 ? "sharedmem" : null } + stageInMode = 'symlink' + scratch = 'false' + penv = { task.cpus > 1 ? "sharedmem" : null } + // This will override all jobs clusterOptions + // This is necessary to allow jobs to run on Eddie for many users + // For each job, we add an extra 8 Gb of memory. + // For example, the process asked 16 Gb of RAM (task.memory). The job will reserve 24 Gb of RAM. + // The process will still use 16 Gb (task.memory) leaving 8 Gb for other system processes. + // This is very useful any JAVA programs which allocate task.memory RAM for its Virtual Machine + // Also it leaves enough memory for singularity to unpack images. // common SGE error statuses - errorStrategy = {task.exitStatus in [143,137,104,134,139,140] ? 'retry' : 'finish'} - maxErrors = '-1' - maxRetries = 3 - - beforeScript = - """ - . /etc/profile.d/modules.sh - module load singularity - export SINGULARITY_TMPDIR="\$TMPDIR" - """ + errorStrategy = {task.exitStatus in [143,137,104,134,139,140] ? 'retry' : 'finish'} + maxErrors = '-1' + maxRetries = 3 + + clusterOptions = { + def tot_memory = task.memory.toMega() + 8192 + def memory_per_core = tot_memory / task.cpus + "-l h_vmem=${memory_per_core}M -R y -P ${params.project} ${params.extra_cluster_options}" + } + beforeScript = + """ + . /etc/profile.d/modules.sh + module load igmm/apps/singularity/3 + export SINGULARITY_TMPDIR="\$TMPDIR" + export CUDA_VISIBLE_DEVICES=-1 + """ } env { - MALLOC_ARENA_MAX=1 + MALLOC_ARENA_MAX=1 } singularity { - envWhitelist = "SINGULARITY_TMPDIR,TMPDIR" - runOptions = '-p -B "$TMPDIR"' - enabled = true - autoMounts = true -} \ No newline at end of file + envWhitelist = "APPTAINER_TMPDIR,SINGULARITY_TMPDIR,TMPDIR,CUDA_VISIBLE_DEVICES" + runOptions = '-p -B "$TMPDIR"' + enabled = true + autoMounts = true + // Define the singularity cache directory depending on the presence of the NFX_SGE_PROJECT variable + // User without compute project can't access to the shared cache directory. + // So, they need to store singularity images into the work directory. + cacheDir = params.singularity_cache_dir +} diff --git a/conf/eddie_conda.config b/conf/eddie_conda.config index 7e77c0a..60f92bb 100644 --- a/conf/eddie_conda.config +++ b/conf/eddie_conda.config @@ -1,52 +1,58 @@ /* * ------------------------------------------------------ - * Based on the nf-core/rnaseq Nextflow base config file + * Custom eddie (anaconda) config file * ------------------------------------------------------ */ - //Profile config names for nf-core/configs + +// New parameters specific to customize eddie behaviour params { - // iGenomes reference base - saveReference = true - igenomes_base = '/exports/igmm/eddie/BioinformaticsResources/igenomes' config_profile_description = 'University of Edinburgh (eddie) cluster profile using anaconda tweaked by nf-core/configs.' - config_profile_contact = 'Andrea Talenti (@RenzoTale88)' - config_profile_url = 'https://www.ed.ac.uk/information-services/research-support/research-computing/ecdf/high-performance-computing'// Add parameter to specify extra flags for eddie - extra_cluster_options = "" - enable_conda = false - cache_dir = null - max_memory = 2048.GB - max_cpus = 64 - max_time = 240.h - scratch = false - queue_size = 100 - rl9 = true - project = "uoe_baseline" + config_profile_contact = 'Andrea Talenti (@RenzoTale88)' + config_profile_url = 'https://www.ed.ac.uk/information-services/research-support/research-computing/ecdf/high-performance-computing'// Add parameter to specify extra flags for eddie + extra_cluster_options = "" + enable_conda = false + singularity_cache_dir = null + max_memory = 2048.GB + max_cpus = 64 + max_time = 240.h + scratch = false + queue_size = 100 + project = "uoe_baseline" } - + executor { - name = "sge" - queueSize = "${params.queue_size}" + name = "sge" + queueSize = params.queue_size } process { - clusterOptions = { task.memory ? "-l h_vmem=${task.memory.bytes/task.cpus} -R y -l rl9=${params.rl9} -P ${params.project} ${params.extra_cluster_options}" : "-R y -l rl9=${params.rl9} -P ${params.project ?: ''} ${params.extra_cluster_options}" } - scratch = params.scratch - penv = { task.cpus > 1 ? "sharedmem" : null } + stageInMode = 'symlink' + scratch = 'false' + penv = { task.cpus > 1 ? "sharedmem" : null } + // This will override all jobs clusterOptions + // This is necessary to allow jobs to run on Eddie for many users + // For each job, we add an extra 8 Gb of memory. + // For example, the process asked 16 Gb of RAM (task.memory). The job will reserve 24 Gb of RAM. + // The process will still use 16 Gb (task.memory) leaving 8 Gb for other system processes. + // This is very useful any JAVA programs which allocate task.memory RAM for its Virtual Machine + // Also it leaves enough memory for singularity to unpack images. // common SGE error statuses - errorStrategy = {task.exitStatus in [143,137,104,134,139,140] ? 'retry' : 'finish'} - maxErrors = '-1' - maxRetries = 3 - - beforeScript = - """ - . /etc/profile.d/modules.sh - module load anaconda/2024.02 - """ + errorStrategy = {task.exitStatus in [143,137,104,134,139,140] ? 'retry' : 'finish'} + maxErrors = '-1' + maxRetries = 3 - withName: mafstats { - conda = "$projectDir/assets/maf-environment.yml" + clusterOptions = { + def tot_memory = task.memory.toMega() + 8192 + def memory_per_core = tot_memory / task.cpus + "-l h_vmem=${memory_per_core}M -R y -P ${params.project} ${params.extra_cluster_options}" } + beforeScript = + """ + . /etc/profile.d/modules.sh + module load anaconda + export CUDA_VISIBLE_DEVICES=-1 + """ } env { diff --git a/conf/params.config b/conf/params.config deleted file mode 100644 index f051f43..0000000 --- a/conf/params.config +++ /dev/null @@ -1,38 +0,0 @@ -params { - source = null - target = null - ncbi_source = false - ncbi_target = false - igenomes_source = false - igenomes_target = false - distance = 'medium' - aligner = 'lastz' - srcSize = 20000000 - tgtSize = 10000000 - tgtOvlp = 100000 - srcOvlp = 0 - qscores = null - outdir = "${launchDir}/OUTPUTS" - annotation = null - annotation_format = null - custom = null - chainCustom = null - chain_name = 'liftover' - liftover_algorithm = 'liftover' - maf_tgt_name = 'tgt' - igenomes_base = 's3://ngi-igenomes/igenomes/' - igenomes_ignore = false - no_maf = false - no_netsynt = false - mafTools = null - reciprocal_best = false - minimap2_threads = 2 - gsalign_threads = 2 - help = false - mamba = false - publish_dir_mode = 'copy' - extra_cluster_options = null - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - my_config = null -} diff --git a/docs/alignments.md b/docs/alignments.md index 3ac77cb..2ff5f11 100644 --- a/docs/alignments.md +++ b/docs/alignments.md @@ -102,6 +102,6 @@ The workflow now minimizes the memory impact of `minimap2` by generating an `.mm The default `minimap2` behaviour is now to align each sequence from the target genome separately, using one task at the time. This should achieve a good balance of number of processes and low number of cores per process. -If the user wishes to use a single process, as in the previous version of the workflow, they can do so by providing `--mm2_full_alignment`. This will perform a single genome-to-genome process. You might want to increase the number of cores provided to minimap2 with `--minimap2_threads`. +If the user wishes to use a single process, as in the previous version of the workflow, they can do so by providing `--minimap2_full_alignment`. This will perform a single genome-to-genome process. You might want to increase the number of cores provided to minimap2 with `--minimap2_threads` and memory with e.g. `--minimap2_memory 8.GB`. -If the user needs to perform the alignment in a particularly low-memory environment, they can provide `--mm2_lowmem`. This will perform the scattering of the target genome using `--tgtSize`, and with the overlap specified in `--tgtOvlp`. +If the user needs to perform the alignment in a particularly low-memory environment, they can provide `--minimap2_lowmem`. This will perform the scattering of the target genome using `--tgtSize`, and with the overlap specified in `--tgtOvlp`. diff --git a/docs/changelog.md b/docs/changelog.md index 6575586..1a4baf3 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,4 +1,15 @@ # Changelog +## [v1.8.7] +- Add options to tweak minimap2 and GSAlign memory (e.g. `--gsalign_memory 6.GB` or `--minimap2_memory 8.GB`). +- Uniform minimap2 options by prefixing with `--minimap2_` +- Remove unnecessary configurations + +## [v1.8.6] +- Fix bugs originated after the workflow simplification. + +## [v1.8.5] +- Upkeep release. + ## [v1.8.4] - Fix broken anaconda environment creation due to wrong repository order - Better schema file diff --git a/main.nf b/main.nf index a76f558..882412e 100644 --- a/main.nf +++ b/main.nf @@ -29,14 +29,11 @@ workflow { exit 0 } - // If params.custom is set, define that as distance - if ( params.custom != '' && params.distance == 'custom' ) { params.distance = 'custom' } - // If params.custom is set, define that as distance if ( !params.source && !params.target ) { log.error "You did not provide a source and a target files."; exit 1 } if ( !params.source && params.target ) { log.error "You did not provide a source file."; exit 1 } if ( params.source && !params.target ) { log.error "You did not provide a target file."; exit 1 } - if ( params.mm2_full_alignment && params.mm2_lowmem ) { log.error "Incompatible options: --mm2_lowmem and --mm2_full_alignment."; exit 1 } + if ( params.minimap2_full_alignment && params.minimap2_lowmem ) { log.error "Incompatible options: --mm2_lowmem and --mm2_full_alignment."; exit 1 } // Print run informations log.info ''' @@ -93,11 +90,11 @@ no_maf : $params.no_maf""" if (params.gsalign_threads && params.aligner == 'gsalign'){ log.info"""low memory (mm2): $params.gsalign_threads""" } - if (params.mm2_lowmem){ - log.info"""low memory (mm2): $params.mm2_lowmem""" + if (params.minimap2_lowmem){ + log.info"""low memory (mm2): $params.minimap2_lowmem""" } - if (params.mm2_full_alignment){ - log.info"""full-alignment : $params.mm2_full_alignment""" + if (params.minimap2_full_alignment){ + log.info"""full-alignment : $params.minimap2_full_alignment""" } if (params.mafTools){ log.info"""mafTools : $params.mafTools""" diff --git a/modules/processes/minimap2/main.nf b/modules/processes/minimap2/main.nf index 763bbb6..11f5a5e 100644 --- a/modules/processes/minimap2/main.nf +++ b/modules/processes/minimap2/main.nf @@ -10,10 +10,11 @@ process minimap2 { output: tuple val(srcname), val(tgtname), file("${srcname}.${tgtname}.psl"), emit: al_files_ch + tuple val(srcname), val(tgtname), file("${srcname}.${tgtname}.paf.gz"), emit: paf_files_ch script: def mm2_args = "-cx asm10" - if (params.custom) { + if (params.custom && params.distance == "custom") { mm2_args = params.custom } else if (params.distance == 'near'){ mm2_args = "-cx asm5" @@ -25,8 +26,8 @@ process minimap2 { mm2_args = "-cx asm10" } """ - minimap2 -t ${task.cpus} ${mm2_args} --cap-kalloc 100m --cap-sw-mem 50m --cs=long ${srcfile} ${tgtfile} | - paftools.js view -f maf - | + minimap2 -t ${task.cpus} ${mm2_args} --cs=long ${srcfile} ${tgtfile} | gzip -c > ${srcname}.${tgtname}.paf.gz + paftools.js view -f maf ${srcname}.${tgtname}.paf.gz | maf-convert psl - | liftUp -type=.psl stdout ${srclift} warn stdin | liftUp -type=.psl -pslQ ${srcname}.${tgtname}.psl ${tgtlift} warn stdin diff --git a/modules/processes/preprocess/main.nf b/modules/processes/preprocess/main.nf index 3a7e50e..9bf7b3a 100644 --- a/modules/processes/preprocess/main.nf +++ b/modules/processes/preprocess/main.nf @@ -142,13 +142,11 @@ process splitsrc { if [ -z \$myvalue ]; then myvalue=`faSize -tab ${source} | awk '\$1=="baseCount" {print \$2}'` fi - mkdir ./SPLIT_src && chmod a+rw ./SPLIT_src - faSplit size -oneFile -lift=source.lift ${source} \$myvalue SPLIT_src/src + mkdir ./SPLIT_src && faSplit size -oneFile -lift=source.lift ${source} \$myvalue SPLIT_src/src """ else """ - mkdir ./SPLIT_src && chmod a+rw ./SPLIT_src - faSplit size -lift=source.lift -extra=${params.srcOvlp} ${source} ${params.srcSize} SPLIT_src/ + mkdir ./SPLIT_src && faSplit size -lift=source.lift -extra=${params.srcOvlp} ${source} ${params.srcSize} SPLIT_src/ """ stub: @@ -236,31 +234,27 @@ process splittgt { script: if( params.aligner == "blat" ) """ - mkdir ./SPLIT_tgt && chmod a+rw ./SPLIT_tgt - faSplit size -oneFile -lift=target.lift -extra=500 ${target} 4500 SPLIT_tgt/tmp + mkdir ./SPLIT_tgt && faSplit size -oneFile -lift=target.lift -extra=500 ${target} 4500 SPLIT_tgt/tmp """ - else if ( params.aligner.toLowerCase() == "gsalign" || (params.aligner == 'minimap2' && params.mm2_full_alignment) ) + else if ( params.aligner.toLowerCase() == "gsalign" || (params.aligner == 'minimap2' && params.minimap2_full_alignment) ) """ myvalue=`faSize -tab ${target} | awk '\$1=="maxSize" {print \$2}'` if [ -z \$myvalue ]; then myvalue=`faSize -tab ${target} | awk '\$1=="baseCount" {print \$2}'` fi - mkdir ./SPLIT_tgt && chmod a+rw ./SPLIT_tgt - faSplit size -oneFile -lift=target.lift ${target} \$myvalue SPLIT_tgt/tgt + mkdir ./SPLIT_tgt && faSplit size -oneFile -lift=target.lift ${target} \$myvalue SPLIT_tgt/tgt """ - else if ( params.aligner == "minimap2" && !params.mm2_full_alignment && !params.mm2_lowmem ) + else if ( params.aligner == "minimap2" && !params.minimap2_full_alignment && !params.minimap2_lowmem ) """ myvalue=`faSize -tab ${target} | awk '\$1=="maxSize" {print \$2}'` if [ -z \$myvalue ]; then myvalue=`faSize -tab ${target} | awk '\$1=="baseCount" {print \$2}'` fi - mkdir ./SPLIT_tgt && chmod a+rw ./SPLIT_tgt - faSplit size -lift=target.lift ${target} \$myvalue SPLIT_tgt/tgt + mkdir ./SPLIT_tgt && faSplit size -lift=target.lift ${target} \$myvalue SPLIT_tgt/tgt """ else """ - mkdir SPLIT_tgt && chmod a+rw SPLIT_tgt - faSplit size -lift=target.lift -extra=${params.tgtOvlp} ${target} ${params.tgtSize} SPLIT_tgt/ + mkdir SPLIT_tgt && faSplit size -lift=target.lift -extra=${params.tgtOvlp} ${target} ${params.tgtSize} SPLIT_tgt/ """ stub: @@ -447,7 +441,7 @@ process makeSizeT { process make_mmi { tag "mmi" - label 'medium' + label 'minimap2' input: path fasta @@ -461,7 +455,7 @@ process make_mmi { minimap2_conf = params.custom } """ - minimap2 ${minimap2_conf} -d ${fasta.baseName}.mmi ${fasta} + minimap2 -t ${task.cpus} ${minimap2_conf} -d ${fasta.baseName}.mmi ${fasta} """ stub: diff --git a/modules/subworkflows/preprocess.nf b/modules/subworkflows/preprocess.nf index 29ee5f8..b31f3b2 100644 --- a/modules/subworkflows/preprocess.nf +++ b/modules/subworkflows/preprocess.nf @@ -34,7 +34,7 @@ workflow PREPROC { // split and group target splittgt(ch_target) tgt_lift = splittgt.out.tgt_lift_ch - if ( params.aligner.toLowerCase() == 'gsalign' || (params.aligner == 'minimap2' && params.mm2_full_alignment) ){ + if ( params.aligner.toLowerCase() == 'gsalign' || (params.aligner == 'minimap2' && params.minimap2_full_alignment) ){ ch_fragm_tgt_out = splittgt.out.tgtsplit_ch ch_fragm_tgt_fa = splittgt.out.tgtfas_ch .flatten() @@ -49,12 +49,18 @@ workflow PREPROC { // If minimap2 requested, convert reference to mmi to save memory if (params.aligner.toLowerCase() == 'minimap2'){ - ch_fragm_src_fa = ch_fragm_src_fa | make_mmi | map{it -> [it.baseName, it]} + if (!params.skip_mmi){ + ch_fragm_src_fa = ch_fragm_src_fa | make_mmi | map{it -> [it.baseName, it]} + } else { + ch_fragm_src_fa = ch_fragm_src_fa | map{it -> [it.baseName, it]} + } } else { ch_fragm_src_fa = ch_fragm_src_fa.map{it -> [it.baseName, it]} } // Prepare pairs of sequences + ch_fragm_src_fa.count().subscribe{ nqry -> log.info "Found ${nqry} source fragments" } + ch_fragm_tgt_fa.count().subscribe{ ntgt -> log.info "Found ${ntgt} target fragments" } ch_fragm_src_fa .combine(ch_fragm_tgt_fa) .transpose() diff --git a/nextflow.config b/nextflow.config index 891b0cc..679f3bc 100644 --- a/nextflow.config +++ b/nextflow.config @@ -38,10 +38,13 @@ params { mafTools = null report = false reciprocal_best = false - mm2_full_alignment = false - mm2_lowmem = false + minimap2_full_alignment = false + minimap2_lowmem = false + minimap2_memory = "8.GB" minimap2_threads = 2 + skip_mmi = false gsalign_threads = 2 + gsalign_memory = "6.GB" max_memory = (Runtime.runtime.maxMemory() as nextflow.util.MemoryUnit).toGiga().GB max_cpus = Runtime.runtime.availableProcessors() == 1 ? 1 : Runtime.runtime.availableProcessors() - 1 max_time = '240.h' @@ -76,12 +79,12 @@ profiles { executor.queueSize = Runtime.runtime.availableProcessors() == 1 ? 1 : Runtime.runtime.availableProcessors() - 1 executor.submitRateLimit = '4sec' } - eddie { + roslin_eddie { includeConfig 'conf/eddie.config' singularity.enabled = true singularity.autoMounts = true } - eddie_conda { + roslin_eddie_conda { conda.enabled = true includeConfig 'conf/eddie_conda.config' process.conda = "$projectDir/environment.yml" @@ -179,6 +182,16 @@ dag { file = "${params.outdir}/reports/pipeline_dag.html" } +// Define shell behaviour +process.shell = [ + "bash", + "-C", // No clobber - prevent output redirection from overwriting files. + "-e", // Exit if a tool returns a non-zero status/exit code + "-u", // Treat unset variables and parameters as an error + "-o", // Returns the status of the last command to exit.. + "pipefail" // ..with a non-zero status or zero if all successfully execute +] + manifest { name = 'evotools/nf-LO' author = 'Andrea Talenti' @@ -187,5 +200,5 @@ manifest { mainScript = 'main.nf' nextflowVersion = '>=21.10.0' defaultBranch = 'main' - version = '1.8.6' + version = '1.8.7' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 109b383..824e9ad 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -109,23 +109,41 @@ "type": "string", "description": "Alignment custom options" }, - "minimap2_threads": { - "type": "integer", - "default": 2, - "description": "Threads for the minimap2 alignment" + "gsalign_memory": { + "type": "string", + "default": "8.GB", + "description": "Memory for the GSAlign alignment", + "fa_icon": "fas fa-memory", + "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$" }, "gsalign_threads": { "type": "integer", "default": 2, "description": "Threads for the GSAlign alignment" }, - "mm2_full_alignment": { + "skip_mmi": { + "type": "boolean", + "description": "Skip MMI index generation" + }, + "minimap2_threads": { + "type": "integer", + "default": 2, + "description": "Threads for the minimap2 alignment" + }, + "minimap2_memory": { + "type": "string", + "default": "8.GB", + "description": "Memory for the minimap2 alignment", + "fa_icon": "fas fa-memory", + "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$" + }, + "minimap2_full_alignment": { "type": "boolean", "default": false, "hidden": true, "description": "Full minimap2 alignment; faster, but requires more memory" }, - "mm2_lowmem": { + "minimap2_lowmem": { "type": "boolean", "default": false, "hidden": true,