evotools · RenzoTale88 · Apr 2, 2025 · Apr 2, 2025 · Apr 2, 2025 · Apr 2, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,4 +1,9 @@
 # Changelog
+## [v1.8.7]
+- Add options to tweak minimap2 and GSAlign memory (e.g. `--gsalign_memory 6.GB` or `--minimap2_memory 8.GB`).
+- Uniform minimap2 options by prefixing with `--minimap2_`
+- Remove unnecessary configurations
+
 ## [v1.8.6]
 - Fix bugs originated after the workflow simplification.
 

diff --git a/README.md b/README.md
@@ -7,7 +7,7 @@
 # Updates
 See [CHANGELOG](#CHANGELOG.md) for more details.
 
-**UPDATE 05/2024**: The `--aligner minimap2` mode now runs in multiple processes, splitting the target genome in fragments of at least `--tgtSize` bases; individual contigs and scaffolds **will not be fragmented**, and each chunk will contain entire sequences, unless the `--mm2_lowmem` option is provided. The old approach is still accessible through the `--mm2_full_alignment` option. The anaconda recipe with the dependencies has been updated, so please ensure to re-create the container where needed. This optimization allows to perform a `minimap2` liftover of the panTro6 to the hg38 genomes on a 16-cores Ryzen 7 8700G 64G Ubuntu machine in under half an hour
+**UPDATE 05/2024**: The `--aligner minimap2` mode now runs in multiple processes, splitting the target genome in fragments of at least `--tgtSize` bases; individual contigs and scaffolds **will not be fragmented**, and each chunk will contain entire sequences, unless the `--minimap2_lowmem` option is provided. The old approach is still accessible through the `--minimap2_full_alignment` option. The anaconda recipe with the dependencies has been updated, so please ensure to re-create the container where needed. This optimization allows to perform a `minimap2` liftover of the panTro6 to the hg38 genomes on a 16-cores Ryzen 7 8700G 64G Ubuntu machine in under half an hour.
 
 **UPDATE 14/12/2022**: Now the NCBI/iGenomes accession have to be provided in the `--source`/`--target` field, and then use the appropriate `--igenomes_source`/`--ncbi_source` and `--igenomes_target`/`--ncbi_target` as a modifier.
 

diff --git a/conf/base.config b/conf/base.config
@@ -10,7 +10,7 @@ process {
     cpus =  {
       def baseCpu = 1
       if (params.max_cpus){
-        baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int  : baseMem
+        baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int  : baseCpu
       } else {
         baseCpu
       }
@@ -36,7 +36,7 @@ process {
     cpus =  {
       def baseCpu = 1
       if (params.max_cpus){
-        baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int  : baseMem
+        baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int  : baseCpu
       } else {
         baseCpu
       }
@@ -62,7 +62,7 @@ process {
     cpus =  {
       def baseCpu = 1
       if (params.max_cpus){
-        baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int  : baseMem
+        baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int  : baseCpu
       } else {
         baseCpu
       }
@@ -88,7 +88,7 @@ process {
     cpus =  {
       def baseCpu = 2 * task.attempt
       if (params.max_cpus){
-        baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int  : baseMem
+        baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int  : baseCpu
       } else {
         baseCpu
       }
@@ -114,13 +114,13 @@ process {
     cpus =  {
       def baseCpu = params.gsalign_threads ? params.gsalign_threads as int * task.attempt : 1 * task.attempt
       if (params.max_cpus){
-        baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int : baseMem
+        baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int : baseCpu
       } else {
         baseCpu
       }
     }
     memory = {
-      def baseMem = 6.GB * task.attempt
+      def baseMem = params.gsalign_memory as nextflow.util.MemoryUnit * task.attempt
       if (params.max_memory){
         baseMem.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1 ? params.max_memory as nextflow.util.MemoryUnit : baseMem
       } else {
@@ -140,13 +140,13 @@ process {
     cpus =  {
       def baseCpu = params.minimap2_threads ? params.minimap2_threads as int * task.attempt : 1 * task.attempt
       if (params.max_cpus){
-        baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int  : baseMem
+        baseCpu.compareTo(params.max_cpus as int) == 1 ? params.max_cpus as int  : baseCpu
       } else {
         baseCpu
       }
     }
     memory = {
-      def baseMem = 8.GB * task.attempt
+      def baseMem = params.minimap2_memory as nextflow.util.MemoryUnit * task.attempt
       if (params.max_memory){
         baseMem.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1 ? params.max_memory as nextflow.util.MemoryUnit : baseMem
       } else {

diff --git a/conf/eddie.config b/conf/eddie.config
@@ -1,58 +1,71 @@
 /*
  * ------------------------------------------------------
- *  Based on the nf-core/rnaseq Nextflow base config file
+ *  Custom eddie (singularity/apptainer) config file
  * ------------------------------------------------------
  */
- //Profile config names for nf-core/configs
+// New parameters specific to customize eddie behaviour
 params {
-  // iGenomes reference base
-  saveReference = true
-  igenomes_base = '/exports/igmm/eddie/BioinformaticsResources/igenomes'
   config_profile_description = 'University of Edinburgh (eddie) cluster profile using anaconda tweaked by nf-core/configs.'
-  config_profile_contact = 'Andrea Talenti (@RenzoTale88)'
-  config_profile_url = 'https://www.ed.ac.uk/information-services/research-support/research-computing/ecdf/high-performance-computing'// Add parameter to specify extra flags for eddie
-  extra_cluster_options = ""
-  enable_conda = false
-  cache_dir = null
-  max_memory = 2048.GB
-  max_cpus = 64
-  max_time = 240.h
-  scratch = false
-  queue_size = 100
-  rl9 = true
-  project = "uoe_baseline"
+  config_profile_contact     = 'Andrea Talenti (@RenzoTale88)'
+  config_profile_url         = 'https://www.ed.ac.uk/information-services/research-support/research-computing/ecdf/high-performance-computing'// Add parameter to specify extra flags for eddie
+  extra_cluster_options      = ""
+  enable_conda               = false
+  singularity_cache_dir      = null
+  max_memory                 = 2048.GB
+  max_cpus                   = 64
+  max_time                   = 240.h
+  scratch                    = false
+  queue_size                 = 100
+  project                    = "uoe_baseline"
 }
- 
+
 executor {
-  name = "sge"
-  queueSize = "${params.queue_size}"
+  name                       = "sge"
+  queueSize                  = params.queue_size
 }
- 
+
 process {
-  clusterOptions = { task.memory ? "-l h_vmem=${task.memory.bytes/task.cpus} -R y -l rl9=${params.rl9} -P ${params.project} ${params.extra_cluster_options}" : "-R y -l rl9=${params.rl9} -P ${params.project ?: ''} ${params.extra_cluster_options}" }
-  scratch = params.scratch
-  penv = { task.cpus > 1 ? "sharedmem" : null }
+  stageInMode                = 'symlink'
+  scratch                    = 'false'
+  penv                       = { task.cpus > 1 ? "sharedmem" : null }
 
+  // This will override all jobs clusterOptions
+  // This is necessary to allow jobs to run on Eddie for many users
+  // For each job, we add an extra 8 Gb of memory.
+  // For example, the process asked 16 Gb of RAM (task.memory). The job will reserve 24 Gb of RAM.
+  // The process will still use 16 Gb (task.memory) leaving 8 Gb for other system processes.
+  // This is very useful any JAVA programs which allocate task.memory RAM for its Virtual Machine
+  // Also it leaves enough memory for singularity to unpack images.
   // common SGE error statuses
-  errorStrategy = {task.exitStatus in [143,137,104,134,139,140] ? 'retry' : 'finish'}
-  maxErrors = '-1'
-  maxRetries = 3
-
-  beforeScript =
-  """
-  . /etc/profile.d/modules.sh
-  module load singularity
-  export SINGULARITY_TMPDIR="\$TMPDIR"
-  """
+  errorStrategy              = {task.exitStatus in [143,137,104,134,139,140] ? 'retry' : 'finish'}
+  maxErrors                  = '-1'
+  maxRetries                 = 3
+
+  clusterOptions           = { 
+    def tot_memory = task.memory.toMega() + 8192
+    def memory_per_core = tot_memory / task.cpus
+    "-l h_vmem=${memory_per_core}M -R y -P ${params.project} ${params.extra_cluster_options}"
+  }
+  beforeScript             =
+    """
+    . /etc/profile.d/modules.sh
+    module load igmm/apps/singularity/3
+    export SINGULARITY_TMPDIR="\$TMPDIR"
+    export CUDA_VISIBLE_DEVICES=-1
+    """
 }
 
 env {
-  MALLOC_ARENA_MAX=1
+    MALLOC_ARENA_MAX=1
 }
 
 singularity {
-  envWhitelist = "SINGULARITY_TMPDIR,TMPDIR"
-  runOptions = '-p -B "$TMPDIR"'
-  enabled = true
-  autoMounts = true
-}
+    envWhitelist             = "APPTAINER_TMPDIR,SINGULARITY_TMPDIR,TMPDIR,CUDA_VISIBLE_DEVICES"
+    runOptions               = '-p -B "$TMPDIR"'
+    enabled                  = true
+    autoMounts               = true
+    // Define the singularity cache directory depending on the presence of the NFX_SGE_PROJECT variable
+    // User without compute project can't access to the shared cache directory.
+    // So, they need to store singularity images into the work directory.
+    cacheDir                 = params.singularity_cache_dir
+}
diff --git a/conf/eddie_conda.config b/conf/eddie_conda.config
@@ -1,52 +1,58 @@
 /*
  * ------------------------------------------------------
- *  Based on the nf-core/rnaseq Nextflow base config file
+ *  Custom eddie (anaconda) config file
  * ------------------------------------------------------
  */
- //Profile config names for nf-core/configs
+
+// New parameters specific to customize eddie behaviour
 params {
-  // iGenomes reference base
-  saveReference = true
-  igenomes_base = '/exports/igmm/eddie/BioinformaticsResources/igenomes'
   config_profile_description = 'University of Edinburgh (eddie) cluster profile using anaconda tweaked by nf-core/configs.'
-  config_profile_contact = 'Andrea Talenti (@RenzoTale88)'
-  config_profile_url = 'https://www.ed.ac.uk/information-services/research-support/research-computing/ecdf/high-performance-computing'// Add parameter to specify extra flags for eddie
-  extra_cluster_options = ""
-  enable_conda = false
-  cache_dir = null
-  max_memory = 2048.GB
-  max_cpus = 64
-  max_time = 240.h
-  scratch = false
-  queue_size = 100
-  rl9 = true
-  project = "uoe_baseline"
+  config_profile_contact     = 'Andrea Talenti (@RenzoTale88)'
+  config_profile_url         = 'https://www.ed.ac.uk/information-services/research-support/research-computing/ecdf/high-performance-computing'// Add parameter to specify extra flags for eddie
+  extra_cluster_options      = ""
+  enable_conda               = false
+  singularity_cache_dir      = null
+  max_memory                 = 2048.GB
+  max_cpus                   = 64
+  max_time                   = 240.h
+  scratch                    = false
+  queue_size                 = 100
+  project                    = "uoe_baseline"
 }
- 
+
 executor {
-  name = "sge"
-  queueSize = "${params.queue_size}"
+  name                       = "sge"
+  queueSize                  = params.queue_size
 }
 
 process {
-  clusterOptions = { task.memory ? "-l h_vmem=${task.memory.bytes/task.cpus} -R y -l rl9=${params.rl9} -P ${params.project} ${params.extra_cluster_options}" : "-R y -l rl9=${params.rl9} -P ${params.project ?: ''} ${params.extra_cluster_options}" }
-  scratch = params.scratch
-  penv = { task.cpus > 1 ? "sharedmem" : null }
+  stageInMode                = 'symlink'
+  scratch                    = 'false'
+  penv                       = { task.cpus > 1 ? "sharedmem" : null }
 
+  // This will override all jobs clusterOptions
+  // This is necessary to allow jobs to run on Eddie for many users
+  // For each job, we add an extra 8 Gb of memory.
+  // For example, the process asked 16 Gb of RAM (task.memory). The job will reserve 24 Gb of RAM.
+  // The process will still use 16 Gb (task.memory) leaving 8 Gb for other system processes.
+  // This is very useful any JAVA programs which allocate task.memory RAM for its Virtual Machine
+  // Also it leaves enough memory for singularity to unpack images.
   // common SGE error statuses
-  errorStrategy = {task.exitStatus in [143,137,104,134,139,140] ? 'retry' : 'finish'}
-  maxErrors = '-1'
-  maxRetries = 3
-
-  beforeScript =
-  """
-  . /etc/profile.d/modules.sh
-  module load anaconda/2024.02
-  """
+  errorStrategy              = {task.exitStatus in [143,137,104,134,139,140] ? 'retry' : 'finish'}
+  maxErrors                  = '-1'
+  maxRetries                 = 3
 
-  withName: mafstats {
-    conda = "$projectDir/assets/maf-environment.yml" 
+  clusterOptions           = { 
+    def tot_memory = task.memory.toMega() + 8192
+    def memory_per_core = tot_memory / task.cpus
+    "-l h_vmem=${memory_per_core}M -R y -P ${params.project} ${params.extra_cluster_options}"
   }
+  beforeScript             =
+    """
+    . /etc/profile.d/modules.sh
+    module load anaconda
+    export CUDA_VISIBLE_DEVICES=-1
+    """
 }
 
 env {

diff --git a/conf/params.config b/conf/params.config
diff --git a/docs/alignments.md b/docs/alignments.md
@@ -102,6 +102,6 @@ The workflow now minimizes the memory impact of `minimap2` by generating an `.mm
 
 The default `minimap2` behaviour is now to align each sequence from the target genome separately, using one task at the time. This should achieve a good balance of number of processes and low number of cores per process.
 
-If the user wishes to use a single process, as in the previous version of the workflow, they can do so by providing `--mm2_full_alignment`. This will perform a single genome-to-genome process. You might want to increase the number of cores provided to minimap2 with `--minimap2_threads`.
+If the user wishes to use a single process, as in the previous version of the workflow, they can do so by providing `--minimap2_full_alignment`. This will perform a single genome-to-genome process. You might want to increase the number of cores provided to minimap2 with `--minimap2_threads` and memory with e.g. `--minimap2_memory 8.GB`.
 
-If the user needs to perform the alignment in a particularly low-memory environment, they can provide `--mm2_lowmem`. This will perform the scattering of the target genome using `--tgtSize`, and with the overlap specified in `--tgtOvlp`.
+If the user needs to perform the alignment in a particularly low-memory environment, they can provide `--minimap2_lowmem`. This will perform the scattering of the target genome using `--tgtSize`, and with the overlap specified in `--tgtOvlp`.
diff --git a/docs/changelog.md b/docs/changelog.md
@@ -1,4 +1,15 @@
 # Changelog
+## [v1.8.7]
+- Add options to tweak minimap2 and GSAlign memory (e.g. `--gsalign_memory 6.GB` or `--minimap2_memory 8.GB`).
+- Uniform minimap2 options by prefixing with `--minimap2_`
+- Remove unnecessary configurations
+
+## [v1.8.6]
+- Fix bugs originated after the workflow simplification.
+
+## [v1.8.5]
+- Upkeep release.
+
 ## [v1.8.4]
 - Fix broken anaconda environment creation due to wrong repository order 
 - Better schema file

diff --git a/main.nf b/main.nf
@@ -29,14 +29,11 @@ workflow {
                 exit 0
         }
 
-        // If params.custom is set, define that as distance
-        if ( params.custom != '' && params.distance == 'custom' ) { params.distance = 'custom' }
-
         // If params.custom is set, define that as distance
         if ( !params.source && !params.target ) { log.error "You did not provide a source and a target files."; exit 1 }
         if ( !params.source && params.target ) { log.error "You did not provide a source file."; exit 1 }
         if ( params.source && !params.target ) { log.error "You did not provide a target file."; exit 1 }
-        if ( params.mm2_full_alignment && params.mm2_lowmem ) { log.error "Incompatible options: --mm2_lowmem and --mm2_full_alignment."; exit 1 }
+        if ( params.minimap2_full_alignment && params.minimap2_lowmem ) { log.error "Incompatible options: --mm2_lowmem and --mm2_full_alignment."; exit 1 }
 
         // Print run informations
         log.info '''
@@ -93,11 +90,11 @@ no_maf          : $params.no_maf"""
         if (params.gsalign_threads && params.aligner == 'gsalign'){
                 log.info"""low memory (mm2): $params.gsalign_threads"""
         } 
-        if (params.mm2_lowmem){
-                log.info"""low memory (mm2): $params.mm2_lowmem"""
+        if (params.minimap2_lowmem){
+                log.info"""low memory (mm2): $params.minimap2_lowmem"""
         } 
-        if (params.mm2_full_alignment){
-                log.info"""full-alignment  : $params.mm2_full_alignment"""
+        if (params.minimap2_full_alignment){
+                log.info"""full-alignment  : $params.minimap2_full_alignment"""
         } 
         if (params.mafTools){
                 log.info"""mafTools        : $params.mafTools"""