From 7100f9ebc23880af9458d4ef81c9bf497fc35370 Mon Sep 17 00:00:00 2001 From: Andrea Talenti <23279528+RenzoTale88@users.noreply.github.com> Date: Wed, 2 Apr 2025 10:57:13 +0100 Subject: [PATCH 01/12] Update release version, changelog and docs --- CHANGELOG.md | 5 ++++ conf/base.config | 4 +-- conf/eddie.config | 3 +-- conf/eddie_conda.config | 3 +-- conf/params.config | 38 ---------------------------- docs/changelog.md | 11 ++++++++ modules/processes/preprocess/main.nf | 22 ++++++---------- nextflow.config | 8 +++--- nextflow_schema.json | 26 ++++++++++++++----- 9 files changed, 53 insertions(+), 67 deletions(-) delete mode 100644 conf/params.config diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ddba75..b83c25d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,9 @@ # Changelog +## [v1.8.6] +- Add options to tweak minimap2 and GSAlign memory (e.g. `--gsalign_memory 6.GB` or `--minimap2_memory 8.GB`). +- Uniform minimap2 options by prefixing with `--minimap2_` +- Remove unnecessary configurations + ## [v1.8.6] - Fix bugs originated after the workflow simplification. diff --git a/conf/base.config b/conf/base.config index 1eac65e..db74a02 100644 --- a/conf/base.config +++ b/conf/base.config @@ -120,7 +120,7 @@ process { } } memory = { - def baseMem = 6.GB * task.attempt + def baseMem = params.gsalign_memory as nextflow.util.MemoryUnit * task.attempt if (params.max_memory){ baseMem.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1 ? params.max_memory as nextflow.util.MemoryUnit : baseMem } else { @@ -146,7 +146,7 @@ process { } } memory = { - def baseMem = 8.GB * task.attempt + def baseMem = params.mm2_memory as nextflow.util.MemoryUnit * task.attempt if (params.max_memory){ baseMem.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1 ? params.max_memory as nextflow.util.MemoryUnit : baseMem } else { diff --git a/conf/eddie.config b/conf/eddie.config index 4e67945..c7d9549 100644 --- a/conf/eddie.config +++ b/conf/eddie.config @@ -19,7 +19,6 @@ params { max_time = 240.h scratch = false queue_size = 100 - rl9 = true project = "uoe_baseline" } @@ -29,7 +28,7 @@ executor { } process { - clusterOptions = { task.memory ? "-l h_vmem=${task.memory.bytes/task.cpus} -R y -l rl9=${params.rl9} -P ${params.project} ${params.extra_cluster_options}" : "-R y -l rl9=${params.rl9} -P ${params.project ?: ''} ${params.extra_cluster_options}" } + clusterOptions = { task.memory ? "-l h_vmem=${task.memory.bytes/task.cpus} -R y -P ${params.project} ${params.extra_cluster_options}" : "-R y -P ${params.project ?: ''} ${params.extra_cluster_options}" } scratch = params.scratch penv = { task.cpus > 1 ? "sharedmem" : null } diff --git a/conf/eddie_conda.config b/conf/eddie_conda.config index 7e77c0a..51b2691 100644 --- a/conf/eddie_conda.config +++ b/conf/eddie_conda.config @@ -19,7 +19,6 @@ params { max_time = 240.h scratch = false queue_size = 100 - rl9 = true project = "uoe_baseline" } @@ -29,7 +28,7 @@ executor { } process { - clusterOptions = { task.memory ? "-l h_vmem=${task.memory.bytes/task.cpus} -R y -l rl9=${params.rl9} -P ${params.project} ${params.extra_cluster_options}" : "-R y -l rl9=${params.rl9} -P ${params.project ?: ''} ${params.extra_cluster_options}" } + clusterOptions = { task.memory ? "-l h_vmem=${task.memory.bytes/task.cpus} -R y -P ${params.project} ${params.extra_cluster_options}" : "-R y -P ${params.project ?: ''} ${params.extra_cluster_options}" } scratch = params.scratch penv = { task.cpus > 1 ? "sharedmem" : null } diff --git a/conf/params.config b/conf/params.config deleted file mode 100644 index f051f43..0000000 --- a/conf/params.config +++ /dev/null @@ -1,38 +0,0 @@ -params { - source = null - target = null - ncbi_source = false - ncbi_target = false - igenomes_source = false - igenomes_target = false - distance = 'medium' - aligner = 'lastz' - srcSize = 20000000 - tgtSize = 10000000 - tgtOvlp = 100000 - srcOvlp = 0 - qscores = null - outdir = "${launchDir}/OUTPUTS" - annotation = null - annotation_format = null - custom = null - chainCustom = null - chain_name = 'liftover' - liftover_algorithm = 'liftover' - maf_tgt_name = 'tgt' - igenomes_base = 's3://ngi-igenomes/igenomes/' - igenomes_ignore = false - no_maf = false - no_netsynt = false - mafTools = null - reciprocal_best = false - minimap2_threads = 2 - gsalign_threads = 2 - help = false - mamba = false - publish_dir_mode = 'copy' - extra_cluster_options = null - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - my_config = null -} diff --git a/docs/changelog.md b/docs/changelog.md index 6575586..b83c25d 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,4 +1,15 @@ # Changelog +## [v1.8.6] +- Add options to tweak minimap2 and GSAlign memory (e.g. `--gsalign_memory 6.GB` or `--minimap2_memory 8.GB`). +- Uniform minimap2 options by prefixing with `--minimap2_` +- Remove unnecessary configurations + +## [v1.8.6] +- Fix bugs originated after the workflow simplification. + +## [v1.8.5] +- Upkeep release. + ## [v1.8.4] - Fix broken anaconda environment creation due to wrong repository order - Better schema file diff --git a/modules/processes/preprocess/main.nf b/modules/processes/preprocess/main.nf index 3a7e50e..66f8766 100644 --- a/modules/processes/preprocess/main.nf +++ b/modules/processes/preprocess/main.nf @@ -142,13 +142,11 @@ process splitsrc { if [ -z \$myvalue ]; then myvalue=`faSize -tab ${source} | awk '\$1=="baseCount" {print \$2}'` fi - mkdir ./SPLIT_src && chmod a+rw ./SPLIT_src - faSplit size -oneFile -lift=source.lift ${source} \$myvalue SPLIT_src/src + mkdir ./SPLIT_src && faSplit size -oneFile -lift=source.lift ${source} \$myvalue SPLIT_src/src """ else """ - mkdir ./SPLIT_src && chmod a+rw ./SPLIT_src - faSplit size -lift=source.lift -extra=${params.srcOvlp} ${source} ${params.srcSize} SPLIT_src/ + mkdir ./SPLIT_src && faSplit size -lift=source.lift -extra=${params.srcOvlp} ${source} ${params.srcSize} SPLIT_src/ """ stub: @@ -236,31 +234,27 @@ process splittgt { script: if( params.aligner == "blat" ) """ - mkdir ./SPLIT_tgt && chmod a+rw ./SPLIT_tgt - faSplit size -oneFile -lift=target.lift -extra=500 ${target} 4500 SPLIT_tgt/tmp + mkdir ./SPLIT_tgt && faSplit size -oneFile -lift=target.lift -extra=500 ${target} 4500 SPLIT_tgt/tmp """ - else if ( params.aligner.toLowerCase() == "gsalign" || (params.aligner == 'minimap2' && params.mm2_full_alignment) ) + else if ( params.aligner.toLowerCase() == "gsalign" || (params.aligner == 'minimap2' && params.minimap2_full_alignment) ) """ myvalue=`faSize -tab ${target} | awk '\$1=="maxSize" {print \$2}'` if [ -z \$myvalue ]; then myvalue=`faSize -tab ${target} | awk '\$1=="baseCount" {print \$2}'` fi - mkdir ./SPLIT_tgt && chmod a+rw ./SPLIT_tgt - faSplit size -oneFile -lift=target.lift ${target} \$myvalue SPLIT_tgt/tgt + mkdir ./SPLIT_tgt && faSplit size -oneFile -lift=target.lift ${target} \$myvalue SPLIT_tgt/tgt """ - else if ( params.aligner == "minimap2" && !params.mm2_full_alignment && !params.mm2_lowmem ) + else if ( params.aligner == "minimap2" && !params.minimap2_full_alignment && !params.minimap2_lowmem ) """ myvalue=`faSize -tab ${target} | awk '\$1=="maxSize" {print \$2}'` if [ -z \$myvalue ]; then myvalue=`faSize -tab ${target} | awk '\$1=="baseCount" {print \$2}'` fi - mkdir ./SPLIT_tgt && chmod a+rw ./SPLIT_tgt - faSplit size -lift=target.lift ${target} \$myvalue SPLIT_tgt/tgt + mkdir ./SPLIT_tgt && faSplit size -lift=target.lift ${target} \$myvalue SPLIT_tgt/tgt """ else """ - mkdir SPLIT_tgt && chmod a+rw SPLIT_tgt - faSplit size -lift=target.lift -extra=${params.tgtOvlp} ${target} ${params.tgtSize} SPLIT_tgt/ + mkdir SPLIT_tgt && faSplit size -lift=target.lift -extra=${params.tgtOvlp} ${target} ${params.tgtSize} SPLIT_tgt/ """ stub: diff --git a/nextflow.config b/nextflow.config index 891b0cc..f8c0ab4 100644 --- a/nextflow.config +++ b/nextflow.config @@ -38,10 +38,12 @@ params { mafTools = null report = false reciprocal_best = false - mm2_full_alignment = false - mm2_lowmem = false + minimap2_full_alignment = false + minimap2_lowmem = false + minimap2_memory = "8.GB" minimap2_threads = 2 gsalign_threads = 2 + gsalign_memory = "6.GB" max_memory = (Runtime.runtime.maxMemory() as nextflow.util.MemoryUnit).toGiga().GB max_cpus = Runtime.runtime.availableProcessors() == 1 ? 1 : Runtime.runtime.availableProcessors() - 1 max_time = '240.h' @@ -187,5 +189,5 @@ manifest { mainScript = 'main.nf' nextflowVersion = '>=21.10.0' defaultBranch = 'main' - version = '1.8.6' + version = '1.8.7' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 109b383..40d6609 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -109,23 +109,37 @@ "type": "string", "description": "Alignment custom options" }, - "minimap2_threads": { - "type": "integer", - "default": 2, - "description": "Threads for the minimap2 alignment" + "gsalign_memory": { + "type": "string", + "default": "8.GB", + "description": "Memory for the GSAlign alignment", + "fa_icon": "fas fa-memory", + "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$" }, "gsalign_threads": { "type": "integer", "default": 2, "description": "Threads for the GSAlign alignment" }, - "mm2_full_alignment": { + "minimap2_threads": { + "type": "integer", + "default": 2, + "description": "Threads for the minimap2 alignment" + }, + "minimap2_memory": { + "type": "string", + "default": "8.GB", + "description": "Memory for the minimap2 alignment", + "fa_icon": "fas fa-memory", + "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$" + }, + "minimap2_full_alignment": { "type": "boolean", "default": false, "hidden": true, "description": "Full minimap2 alignment; faster, but requires more memory" }, - "mm2_lowmem": { + "minimap2_lowmem": { "type": "boolean", "default": false, "hidden": true, From 3e0047a5bf37c57b933de330cac7a0a9d22635d7 Mon Sep 17 00:00:00 2001 From: Andrea Talenti <23279528+RenzoTale88@users.noreply.github.com> Date: Wed, 2 Apr 2025 10:58:11 +0100 Subject: [PATCH 02/12] Further docs updates --- docs/alignments.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/alignments.md b/docs/alignments.md index 3ac77cb..2ff5f11 100644 --- a/docs/alignments.md +++ b/docs/alignments.md @@ -102,6 +102,6 @@ The workflow now minimizes the memory impact of `minimap2` by generating an `.mm The default `minimap2` behaviour is now to align each sequence from the target genome separately, using one task at the time. This should achieve a good balance of number of processes and low number of cores per process. -If the user wishes to use a single process, as in the previous version of the workflow, they can do so by providing `--mm2_full_alignment`. This will perform a single genome-to-genome process. You might want to increase the number of cores provided to minimap2 with `--minimap2_threads`. +If the user wishes to use a single process, as in the previous version of the workflow, they can do so by providing `--minimap2_full_alignment`. This will perform a single genome-to-genome process. You might want to increase the number of cores provided to minimap2 with `--minimap2_threads` and memory with e.g. `--minimap2_memory 8.GB`. -If the user needs to perform the alignment in a particularly low-memory environment, they can provide `--mm2_lowmem`. This will perform the scattering of the target genome using `--tgtSize`, and with the overlap specified in `--tgtOvlp`. +If the user needs to perform the alignment in a particularly low-memory environment, they can provide `--minimap2_lowmem`. This will perform the scattering of the target genome using `--tgtSize`, and with the overlap specified in `--tgtOvlp`. From 50d6b35a226e934d1ab72e370588bbb15b8ec6fb Mon Sep 17 00:00:00 2001 From: Andrea Talenti <23279528+RenzoTale88@users.noreply.github.com> Date: Wed, 2 Apr 2025 10:58:51 +0100 Subject: [PATCH 03/12] Update readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8466502..1b64478 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ # Updates See [CHANGELOG](#CHANGELOG.md) for more details. -**UPDATE 05/2024**: The `--aligner minimap2` mode now runs in multiple processes, splitting the target genome in fragments of at least `--tgtSize` bases; individual contigs and scaffolds **will not be fragmented**, and each chunk will contain entire sequences, unless the `--mm2_lowmem` option is provided. The old approach is still accessible through the `--mm2_full_alignment` option. The anaconda recipe with the dependencies has been updated, so please ensure to re-create the container where needed. This optimization allows to perform a `minimap2` liftover of the panTro6 to the hg38 genomes on a 16-cores Ryzen 7 8700G 64G Ubuntu machine in under half an hour +**UPDATE 05/2024**: The `--aligner minimap2` mode now runs in multiple processes, splitting the target genome in fragments of at least `--tgtSize` bases; individual contigs and scaffolds **will not be fragmented**, and each chunk will contain entire sequences, unless the `--minimap2_lowmem` option is provided. The old approach is still accessible through the `--minimap2_full_alignment` option. The anaconda recipe with the dependencies has been updated, so please ensure to re-create the container where needed. This optimization allows to perform a `minimap2` liftover of the panTro6 to the hg38 genomes on a 16-cores Ryzen 7 8700G 64G Ubuntu machine in under half an hour. **UPDATE 14/12/2022**: Now the NCBI/iGenomes accession have to be provided in the `--source`/`--target` field, and then use the appropriate `--igenomes_source`/`--ncbi_source` and `--igenomes_target`/`--ncbi_target` as a modifier. From 97100b75db7a73f2140c58de03af3f488d9b765b Mon Sep 17 00:00:00 2001 From: Andrea Talenti <23279528+RenzoTale88@users.noreply.github.com> Date: Wed, 2 Apr 2025 11:00:51 +0100 Subject: [PATCH 04/12] Bop --- CHANGELOG.md | 2 +- docs/changelog.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b83c25d..1a4baf3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,5 @@ # Changelog -## [v1.8.6] +## [v1.8.7] - Add options to tweak minimap2 and GSAlign memory (e.g. `--gsalign_memory 6.GB` or `--minimap2_memory 8.GB`). - Uniform minimap2 options by prefixing with `--minimap2_` - Remove unnecessary configurations diff --git a/docs/changelog.md b/docs/changelog.md index b83c25d..1a4baf3 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,5 +1,5 @@ # Changelog -## [v1.8.6] +## [v1.8.7] - Add options to tweak minimap2 and GSAlign memory (e.g. `--gsalign_memory 6.GB` or `--minimap2_memory 8.GB`). - Uniform minimap2 options by prefixing with `--minimap2_` - Remove unnecessary configurations From 72d3996e1e9bc3b75bab7812ff06a712cf3f0671 Mon Sep 17 00:00:00 2001 From: Andrea Talenti <23279528+RenzoTale88@users.noreply.github.com> Date: Fri, 4 Apr 2025 11:49:49 +0100 Subject: [PATCH 05/12] Refuse --- conf/base.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/base.config b/conf/base.config index db74a02..c4c7974 100644 --- a/conf/base.config +++ b/conf/base.config @@ -146,7 +146,7 @@ process { } } memory = { - def baseMem = params.mm2_memory as nextflow.util.MemoryUnit * task.attempt + def baseMem = params.minimap2_memory as nextflow.util.MemoryUnit * task.attempt if (params.max_memory){ baseMem.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1 ? params.max_memory as nextflow.util.MemoryUnit : baseMem } else { From 87d291d340f24f4acd0192b958d8ce809f5dee21 Mon Sep 17 00:00:00 2001 From: Andrea Talenti <23279528+RenzoTale88@users.noreply.github.com> Date: Fri, 4 Apr 2025 13:35:48 +0100 Subject: [PATCH 06/12] Further tweaks --- main.nf | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/main.nf b/main.nf index a76f558..882412e 100644 --- a/main.nf +++ b/main.nf @@ -29,14 +29,11 @@ workflow { exit 0 } - // If params.custom is set, define that as distance - if ( params.custom != '' && params.distance == 'custom' ) { params.distance = 'custom' } - // If params.custom is set, define that as distance if ( !params.source && !params.target ) { log.error "You did not provide a source and a target files."; exit 1 } if ( !params.source && params.target ) { log.error "You did not provide a source file."; exit 1 } if ( params.source && !params.target ) { log.error "You did not provide a target file."; exit 1 } - if ( params.mm2_full_alignment && params.mm2_lowmem ) { log.error "Incompatible options: --mm2_lowmem and --mm2_full_alignment."; exit 1 } + if ( params.minimap2_full_alignment && params.minimap2_lowmem ) { log.error "Incompatible options: --mm2_lowmem and --mm2_full_alignment."; exit 1 } // Print run informations log.info ''' @@ -93,11 +90,11 @@ no_maf : $params.no_maf""" if (params.gsalign_threads && params.aligner == 'gsalign'){ log.info"""low memory (mm2): $params.gsalign_threads""" } - if (params.mm2_lowmem){ - log.info"""low memory (mm2): $params.mm2_lowmem""" + if (params.minimap2_lowmem){ + log.info"""low memory (mm2): $params.minimap2_lowmem""" } - if (params.mm2_full_alignment){ - log.info"""full-alignment : $params.mm2_full_alignment""" + if (params.minimap2_full_alignment){ + log.info"""full-alignment : $params.minimap2_full_alignment""" } if (params.mafTools){ log.info"""mafTools : $params.mafTools""" From 2323d8536830738ffcdee68cb5c2e19e315cedcc Mon Sep 17 00:00:00 2001 From: Andrea Talenti <23279528+RenzoTale88@users.noreply.github.com> Date: Fri, 4 Apr 2025 13:59:33 +0100 Subject: [PATCH 07/12] Fix mmi generation to use multiple threads and minimap2 configuration --- modules/processes/preprocess/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/processes/preprocess/main.nf b/modules/processes/preprocess/main.nf index 66f8766..9bf7b3a 100644 --- a/modules/processes/preprocess/main.nf +++ b/modules/processes/preprocess/main.nf @@ -441,7 +441,7 @@ process makeSizeT { process make_mmi { tag "mmi" - label 'medium' + label 'minimap2' input: path fasta @@ -455,7 +455,7 @@ process make_mmi { minimap2_conf = params.custom } """ - minimap2 ${minimap2_conf} -d ${fasta.baseName}.mmi ${fasta} + minimap2 -t ${task.cpus} ${minimap2_conf} -d ${fasta.baseName}.mmi ${fasta} """ stub: From 11a3bbb92944564b415217588b95f9b8d7bc6d17 Mon Sep 17 00:00:00 2001 From: Andrea Talenti <23279528+RenzoTale88@users.noreply.github.com> Date: Tue, 17 Jun 2025 09:53:49 +0100 Subject: [PATCH 08/12] Fix old reference --- modules/subworkflows/preprocess.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/subworkflows/preprocess.nf b/modules/subworkflows/preprocess.nf index 29ee5f8..acfff67 100644 --- a/modules/subworkflows/preprocess.nf +++ b/modules/subworkflows/preprocess.nf @@ -34,7 +34,7 @@ workflow PREPROC { // split and group target splittgt(ch_target) tgt_lift = splittgt.out.tgt_lift_ch - if ( params.aligner.toLowerCase() == 'gsalign' || (params.aligner == 'minimap2' && params.mm2_full_alignment) ){ + if ( params.aligner.toLowerCase() == 'gsalign' || (params.aligner == 'minimap2' && params.minimap2_full_alignment) ){ ch_fragm_tgt_out = splittgt.out.tgtsplit_ch ch_fragm_tgt_fa = splittgt.out.tgtfas_ch .flatten() From 59feb0281b931830d5d20caf8809010398a0417c Mon Sep 17 00:00:00 2001 From: Andrea Talenti <23279528+RenzoTale88@users.noreply.github.com> Date: Tue, 17 Jun 2025 09:56:19 +0100 Subject: [PATCH 09/12] Fix configuration --- conf/base.config | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/conf/base.config b/conf/base.config index c4c7974..d6f5128 100644 --- a/conf/base.config +++ b/conf/base.config @@ -10,7 +10,7 @@ process { cpus = { def baseCpu = 1 if (params.max_cpus){ - baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int : baseMem + baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int : baseCpu } else { baseCpu } @@ -36,7 +36,7 @@ process { cpus = { def baseCpu = 1 if (params.max_cpus){ - baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int : baseMem + baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int : baseCpu } else { baseCpu } @@ -62,7 +62,7 @@ process { cpus = { def baseCpu = 1 if (params.max_cpus){ - baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int : baseMem + baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int : baseCpu } else { baseCpu } @@ -88,7 +88,7 @@ process { cpus = { def baseCpu = 2 * task.attempt if (params.max_cpus){ - baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int : baseMem + baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int : baseCpu } else { baseCpu } @@ -114,7 +114,7 @@ process { cpus = { def baseCpu = params.gsalign_threads ? params.gsalign_threads as int * task.attempt : 1 * task.attempt if (params.max_cpus){ - baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int : baseMem + baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int : baseCpu } else { baseCpu } @@ -140,7 +140,7 @@ process { cpus = { def baseCpu = params.minimap2_threads ? params.minimap2_threads as int * task.attempt : 1 * task.attempt if (params.max_cpus){ - baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int : baseMem + baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int : baseCpu } else { baseCpu } From b0fd269d3a5f61800e07c60c0fda71d4c26f5dd6 Mon Sep 17 00:00:00 2001 From: Andrea Talenti <23279528+RenzoTale88@users.noreply.github.com> Date: Tue, 17 Jun 2025 10:54:28 +0100 Subject: [PATCH 10/12] Multiple patches to the configurations --- conf/eddie.config | 92 ++++++++++++++++++++++++----------------- conf/eddie_conda.config | 73 +++++++++++++++++--------------- nextflow.config | 14 ++++++- 3 files changed, 105 insertions(+), 74 deletions(-) diff --git a/conf/eddie.config b/conf/eddie.config index c7d9549..4d10571 100644 --- a/conf/eddie.config +++ b/conf/eddie.config @@ -1,57 +1,71 @@ /* * ------------------------------------------------------ - * Based on the nf-core/rnaseq Nextflow base config file + * Custom eddie (singularity/apptainer) config file * ------------------------------------------------------ */ - //Profile config names for nf-core/configs +// New parameters specific to customize eddie behaviour params { - // iGenomes reference base - saveReference = true - igenomes_base = '/exports/igmm/eddie/BioinformaticsResources/igenomes' config_profile_description = 'University of Edinburgh (eddie) cluster profile using anaconda tweaked by nf-core/configs.' - config_profile_contact = 'Andrea Talenti (@RenzoTale88)' - config_profile_url = 'https://www.ed.ac.uk/information-services/research-support/research-computing/ecdf/high-performance-computing'// Add parameter to specify extra flags for eddie - extra_cluster_options = "" - enable_conda = false - cache_dir = null - max_memory = 2048.GB - max_cpus = 64 - max_time = 240.h - scratch = false - queue_size = 100 - project = "uoe_baseline" + config_profile_contact = 'Andrea Talenti (@RenzoTale88)' + config_profile_url = 'https://www.ed.ac.uk/information-services/research-support/research-computing/ecdf/high-performance-computing'// Add parameter to specify extra flags for eddie + extra_cluster_options = "" + enable_conda = false + singularity_cache_dir = null + max_memory = 2048.GB + max_cpus = 64 + max_time = 240.h + scratch = false + queue_size = 100 + project = "uoe_baseline" } - + executor { - name = "sge" - queueSize = "${params.queue_size}" + name = "sge" + queueSize = params.queue_size } - + process { - clusterOptions = { task.memory ? "-l h_vmem=${task.memory.bytes/task.cpus} -R y -P ${params.project} ${params.extra_cluster_options}" : "-R y -P ${params.project ?: ''} ${params.extra_cluster_options}" } - scratch = params.scratch - penv = { task.cpus > 1 ? "sharedmem" : null } + stageInMode = 'symlink' + scratch = 'false' + penv = { task.cpus > 1 ? "sharedmem" : null } + // This will override all jobs clusterOptions + // This is necessary to allow jobs to run on Eddie for many users + // For each job, we add an extra 8 Gb of memory. + // For example, the process asked 16 Gb of RAM (task.memory). The job will reserve 24 Gb of RAM. + // The process will still use 16 Gb (task.memory) leaving 8 Gb for other system processes. + // This is very useful any JAVA programs which allocate task.memory RAM for its Virtual Machine + // Also it leaves enough memory for singularity to unpack images. // common SGE error statuses - errorStrategy = {task.exitStatus in [143,137,104,134,139,140] ? 'retry' : 'finish'} - maxErrors = '-1' - maxRetries = 3 - - beforeScript = - """ - . /etc/profile.d/modules.sh - module load singularity - export SINGULARITY_TMPDIR="\$TMPDIR" - """ + errorStrategy = {task.exitStatus in [143,137,104,134,139,140] ? 'retry' : 'finish'} + maxErrors = '-1' + maxRetries = 3 + + clusterOptions = { + def tot_memory = task.memory.toMega() + 8192 + def memory_per_core = tot_memory / task.cpus + "-l h_vmem=${memory_per_core}M -R y -P ${params.project} ${params.extra_cluster_options}" + } + beforeScript = + """ + . /etc/profile.d/modules.sh + module load igmm/apps/singularity/3 + export SINGULARITY_TMPDIR="\$TMPDIR" + export CUDA_VISIBLE_DEVICES=-1 + """ } env { - MALLOC_ARENA_MAX=1 + MALLOC_ARENA_MAX=1 } singularity { - envWhitelist = "SINGULARITY_TMPDIR,TMPDIR" - runOptions = '-p -B "$TMPDIR"' - enabled = true - autoMounts = true -} \ No newline at end of file + envWhitelist = "APPTAINER_TMPDIR,SINGULARITY_TMPDIR,TMPDIR,CUDA_VISIBLE_DEVICES" + runOptions = '-p -B "$TMPDIR"' + enabled = true + autoMounts = true + // Define the singularity cache directory depending on the presence of the NFX_SGE_PROJECT variable + // User without compute project can't access to the shared cache directory. + // So, they need to store singularity images into the work directory. + cacheDir = params.singularity_cache_dir +} diff --git a/conf/eddie_conda.config b/conf/eddie_conda.config index 51b2691..60f92bb 100644 --- a/conf/eddie_conda.config +++ b/conf/eddie_conda.config @@ -1,51 +1,58 @@ /* * ------------------------------------------------------ - * Based on the nf-core/rnaseq Nextflow base config file + * Custom eddie (anaconda) config file * ------------------------------------------------------ */ - //Profile config names for nf-core/configs + +// New parameters specific to customize eddie behaviour params { - // iGenomes reference base - saveReference = true - igenomes_base = '/exports/igmm/eddie/BioinformaticsResources/igenomes' config_profile_description = 'University of Edinburgh (eddie) cluster profile using anaconda tweaked by nf-core/configs.' - config_profile_contact = 'Andrea Talenti (@RenzoTale88)' - config_profile_url = 'https://www.ed.ac.uk/information-services/research-support/research-computing/ecdf/high-performance-computing'// Add parameter to specify extra flags for eddie - extra_cluster_options = "" - enable_conda = false - cache_dir = null - max_memory = 2048.GB - max_cpus = 64 - max_time = 240.h - scratch = false - queue_size = 100 - project = "uoe_baseline" + config_profile_contact = 'Andrea Talenti (@RenzoTale88)' + config_profile_url = 'https://www.ed.ac.uk/information-services/research-support/research-computing/ecdf/high-performance-computing'// Add parameter to specify extra flags for eddie + extra_cluster_options = "" + enable_conda = false + singularity_cache_dir = null + max_memory = 2048.GB + max_cpus = 64 + max_time = 240.h + scratch = false + queue_size = 100 + project = "uoe_baseline" } - + executor { - name = "sge" - queueSize = "${params.queue_size}" + name = "sge" + queueSize = params.queue_size } process { - clusterOptions = { task.memory ? "-l h_vmem=${task.memory.bytes/task.cpus} -R y -P ${params.project} ${params.extra_cluster_options}" : "-R y -P ${params.project ?: ''} ${params.extra_cluster_options}" } - scratch = params.scratch - penv = { task.cpus > 1 ? "sharedmem" : null } + stageInMode = 'symlink' + scratch = 'false' + penv = { task.cpus > 1 ? "sharedmem" : null } + // This will override all jobs clusterOptions + // This is necessary to allow jobs to run on Eddie for many users + // For each job, we add an extra 8 Gb of memory. + // For example, the process asked 16 Gb of RAM (task.memory). The job will reserve 24 Gb of RAM. + // The process will still use 16 Gb (task.memory) leaving 8 Gb for other system processes. + // This is very useful any JAVA programs which allocate task.memory RAM for its Virtual Machine + // Also it leaves enough memory for singularity to unpack images. // common SGE error statuses - errorStrategy = {task.exitStatus in [143,137,104,134,139,140] ? 'retry' : 'finish'} - maxErrors = '-1' - maxRetries = 3 - - beforeScript = - """ - . /etc/profile.d/modules.sh - module load anaconda/2024.02 - """ + errorStrategy = {task.exitStatus in [143,137,104,134,139,140] ? 'retry' : 'finish'} + maxErrors = '-1' + maxRetries = 3 - withName: mafstats { - conda = "$projectDir/assets/maf-environment.yml" + clusterOptions = { + def tot_memory = task.memory.toMega() + 8192 + def memory_per_core = tot_memory / task.cpus + "-l h_vmem=${memory_per_core}M -R y -P ${params.project} ${params.extra_cluster_options}" } + beforeScript = + """ + . /etc/profile.d/modules.sh + module load anaconda + export CUDA_VISIBLE_DEVICES=-1 + """ } env { diff --git a/nextflow.config b/nextflow.config index f8c0ab4..b4059af 100644 --- a/nextflow.config +++ b/nextflow.config @@ -78,12 +78,12 @@ profiles { executor.queueSize = Runtime.runtime.availableProcessors() == 1 ? 1 : Runtime.runtime.availableProcessors() - 1 executor.submitRateLimit = '4sec' } - eddie { + roslin_eddie { includeConfig 'conf/eddie.config' singularity.enabled = true singularity.autoMounts = true } - eddie_conda { + roslin_eddie_conda { conda.enabled = true includeConfig 'conf/eddie_conda.config' process.conda = "$projectDir/environment.yml" @@ -181,6 +181,16 @@ dag { file = "${params.outdir}/reports/pipeline_dag.html" } +// Define shell behaviour +process.shell = [ + "bash", + "-C", // No clobber - prevent output redirection from overwriting files. + "-e", // Exit if a tool returns a non-zero status/exit code + "-u", // Treat unset variables and parameters as an error + "-o", // Returns the status of the last command to exit.. + "pipefail" // ..with a non-zero status or zero if all successfully execute +] + manifest { name = 'evotools/nf-LO' author = 'Andrea Talenti' From dee9e1158b6ba2c3cc0adb1f81bb79b299eb311c Mon Sep 17 00:00:00 2001 From: Andrea Talenti <23279528+RenzoTale88@users.noreply.github.com> Date: Tue, 17 Jun 2025 13:12:00 +0100 Subject: [PATCH 11/12] Improve minimap2 workflow --- modules/processes/minimap2/main.nf | 7 ++++--- modules/subworkflows/preprocess.nf | 6 +++++- nextflow.config | 1 + nextflow_schema.json | 4 ++++ 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/modules/processes/minimap2/main.nf b/modules/processes/minimap2/main.nf index 763bbb6..11f5a5e 100644 --- a/modules/processes/minimap2/main.nf +++ b/modules/processes/minimap2/main.nf @@ -10,10 +10,11 @@ process minimap2 { output: tuple val(srcname), val(tgtname), file("${srcname}.${tgtname}.psl"), emit: al_files_ch + tuple val(srcname), val(tgtname), file("${srcname}.${tgtname}.paf.gz"), emit: paf_files_ch script: def mm2_args = "-cx asm10" - if (params.custom) { + if (params.custom && params.distance == "custom") { mm2_args = params.custom } else if (params.distance == 'near'){ mm2_args = "-cx asm5" @@ -25,8 +26,8 @@ process minimap2 { mm2_args = "-cx asm10" } """ - minimap2 -t ${task.cpus} ${mm2_args} --cap-kalloc 100m --cap-sw-mem 50m --cs=long ${srcfile} ${tgtfile} | - paftools.js view -f maf - | + minimap2 -t ${task.cpus} ${mm2_args} --cs=long ${srcfile} ${tgtfile} | gzip -c > ${srcname}.${tgtname}.paf.gz + paftools.js view -f maf ${srcname}.${tgtname}.paf.gz | maf-convert psl - | liftUp -type=.psl stdout ${srclift} warn stdin | liftUp -type=.psl -pslQ ${srcname}.${tgtname}.psl ${tgtlift} warn stdin diff --git a/modules/subworkflows/preprocess.nf b/modules/subworkflows/preprocess.nf index acfff67..2dc8136 100644 --- a/modules/subworkflows/preprocess.nf +++ b/modules/subworkflows/preprocess.nf @@ -49,7 +49,11 @@ workflow PREPROC { // If minimap2 requested, convert reference to mmi to save memory if (params.aligner.toLowerCase() == 'minimap2'){ - ch_fragm_src_fa = ch_fragm_src_fa | make_mmi | map{it -> [it.baseName, it]} + if (!params.skip_mmi){ + ch_fragm_src_fa = ch_fragm_src_fa | make_mmi | map{it -> [it.baseName, it]} + } else { + ch_fragm_src_fa = ch_fragm_src_fa | map{it -> [it.baseName, it]} + } } else { ch_fragm_src_fa = ch_fragm_src_fa.map{it -> [it.baseName, it]} } diff --git a/nextflow.config b/nextflow.config index b4059af..679f3bc 100644 --- a/nextflow.config +++ b/nextflow.config @@ -42,6 +42,7 @@ params { minimap2_lowmem = false minimap2_memory = "8.GB" minimap2_threads = 2 + skip_mmi = false gsalign_threads = 2 gsalign_memory = "6.GB" max_memory = (Runtime.runtime.maxMemory() as nextflow.util.MemoryUnit).toGiga().GB diff --git a/nextflow_schema.json b/nextflow_schema.json index 40d6609..824e9ad 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -121,6 +121,10 @@ "default": 2, "description": "Threads for the GSAlign alignment" }, + "skip_mmi": { + "type": "boolean", + "description": "Skip MMI index generation" + }, "minimap2_threads": { "type": "integer", "default": 2, From 80096646990280c3fe88c6e2a28012e47b82474a Mon Sep 17 00:00:00 2001 From: Andrea Talenti <23279528+RenzoTale88@users.noreply.github.com> Date: Wed, 19 Nov 2025 09:39:34 +0000 Subject: [PATCH 12/12] More logging --- modules/subworkflows/preprocess.nf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/subworkflows/preprocess.nf b/modules/subworkflows/preprocess.nf index 2dc8136..b31f3b2 100644 --- a/modules/subworkflows/preprocess.nf +++ b/modules/subworkflows/preprocess.nf @@ -59,6 +59,8 @@ workflow PREPROC { } // Prepare pairs of sequences + ch_fragm_src_fa.count().subscribe{ nqry -> log.info "Found ${nqry} source fragments" } + ch_fragm_tgt_fa.count().subscribe{ ntgt -> log.info "Found ${ntgt} target fragments" } ch_fragm_src_fa .combine(ch_fragm_tgt_fa) .transpose()