From 62819e1e73657bbbd6c745f4c33418930b642d30 Mon Sep 17 00:00:00 2001
From: Jacques Dainat <jacques.dainat@ird.fr>
Date: Wed, 2 Apr 2025 21:11:39 +0200
Subject: [PATCH 01/12] re-organize with modules / remove workflow and call
 properly twice the same process / improve reproducibility by using software
 version configuration / set profile to use either singularity/docker/conda /
 re-work on README / add test profile / add local and hpc ressource config
 file

---
 MobiCT.nf                                | 495 +----------------------
 README.md                                | 130 +++++-
 config/ressources/conda/bcftools.yaml    |   7 +
 config/ressources/conda/bwa.yaml         |   7 +
 config/ressources/conda/ensembl-vep.yaml |   7 +
 config/ressources/conda/fastp.yaml       |   7 +
 config/ressources/conda/fgbio.yaml       |   7 +
 config/ressources/conda/gatk4.yaml       |   7 +
 config/ressources/conda/multiqc.yaml     |   7 +
 config/ressources/conda/picard.yaml      |   7 +
 config/ressources/conda/samtools.yaml    |   7 +
 config/ressources/conda/vardict.yaml     |   7 +
 config/ressources/hpc.config             |  92 +++++
 config/ressources/local.config           |  48 +++
 config/softwares.config                  |  52 +++
 modules/bcftools.nf                      |  17 +
 modules/bwamem.nf                        |  24 ++
 modules/fastp.nf                         |  29 ++
 modules/fgbio.nf                         |  76 ++++
 modules/gatk.nf                          | 133 ++++++
 modules/multiqc.nf                       |  34 ++
 modules/picard.nf                        |  43 ++
 modules/samtools.nf                      |  19 +
 modules/vardict.nf                       |  26 ++
 modules/vep.nf                           |  32 ++
 nextflow.config                          |  92 ++++-
 26 files changed, 914 insertions(+), 498 deletions(-)
 create mode 100644 config/ressources/conda/bcftools.yaml
 create mode 100644 config/ressources/conda/bwa.yaml
 create mode 100644 config/ressources/conda/ensembl-vep.yaml
 create mode 100644 config/ressources/conda/fastp.yaml
 create mode 100644 config/ressources/conda/fgbio.yaml
 create mode 100644 config/ressources/conda/gatk4.yaml
 create mode 100644 config/ressources/conda/multiqc.yaml
 create mode 100644 config/ressources/conda/picard.yaml
 create mode 100644 config/ressources/conda/samtools.yaml
 create mode 100644 config/ressources/conda/vardict.yaml
 create mode 100644 config/ressources/hpc.config
 create mode 100644 config/ressources/local.config
 create mode 100644 config/softwares.config
 create mode 100644 modules/bcftools.nf
 create mode 100644 modules/bwamem.nf
 create mode 100644 modules/fastp.nf
 create mode 100644 modules/fgbio.nf
 create mode 100644 modules/gatk.nf
 create mode 100644 modules/multiqc.nf
 create mode 100644 modules/picard.nf
 create mode 100644 modules/samtools.nf
 create mode 100644 modules/vardict.nf
 create mode 100644 modules/vep.nf

diff --git a/MobiCT.nf b/MobiCT.nf
index 338c373..259a139 100644
--- a/MobiCT.nf
+++ b/MobiCT.nf
@@ -23,479 +23,20 @@ email = 's-cabelloaguilar@chu-montpellier.fr'
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-// PROCESSES
-
-// Convert the demultiplexed, raw sequencing FASTQ files to BAM
-process ConvertFastqToSam {
-    tag "$sample_id"
-
-    input:
-        tuple val('sample_id'), path(fastq)
-        val extension
-
-    output:
-        tuple val(sample_id), file("${sample_id}${extension}.bam")
-
-    """
-    gatk FastqToSam \
-        --FASTQ ${fastq[0]} \
-        --FASTQ2 ${fastq[1]} \
-        --OUTPUT ${sample_id}${extension}.bam \
-        --SAMPLE_NAME ${sample_id} \
-        --TMP_DIR ${params.tmp_dir}
-    """
-}
-
-// Extraction of UMIs from the insert reads
-process ExtractUmis {
-    tag "$sample_id"
-
-    input:
-        tuple val(sample_id), path(bam_file)
-        val struct_r1
-        val struct_r2
-        val extension
-
-    output:
-        tuple val(sample_id), file("${sample_id}${extension}.bam")
-
-    """
-    fgbio ExtractUmisFromBam \
-        -i ${bam_file} \
-        -o ${sample_id}${extension}.bam \
-        -r ${struct_r1} ${struct_r2} \
-        -t RX \
-        -a true
-    """
-}
-
-// Convert the BAM file with UMI extracted reads to a FASTQ file
-process ConvertSamToFastq {
-    tag "$sample_id"
-
-    input:
-        tuple val(sample_id), path(bam_file)
-        val extension
-
-    output:
-        tuple val(sample_id), file("${sample_id}${extension}.R[1,2].fq")
-
-    """
-    gatk SamToFastq \
-        -I ${bam_file} \
-        -F ${sample_id}${extension}.R1.fq \
-        -F2 ${sample_id}${extension}.R2.fq \
-        --CLIPPING_ATTRIBUTE XT \
-        --CLIPPING_ACTION 2 \
-        --MAX_RECORDS_IN_RAM 50000000 
-    """
-}
-
-// Adapter and quality trimming
-process Fastp {
-    tag "$sample_id"
-
-    publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: true, pattern: '*.{json,html}'
-
-    input:
-        tuple val(sample_id), path(fastq)
-        val extension
-
-    output:
-        tuple val(sample_id), file("${sample_id}${extension}.R[1,2].fq")
-        file "${sample_id}.QC.fastp.json"
-        file "${sample_id}.QC.fastp.html"
-
-    script:
-    """
-    fastp \
-        -i ${fastq[0]} \
-        -o ${sample_id}${extension}.R1.fq \
-        -I ${fastq[1]} \
-        -O ${sample_id}${extension}.R2.fq \
-        -g -W 5 -q 20 -u 40 -x -3 -l 75 -c \
-        -j ${sample_id}.QC.fastp.json \
-        -h ${sample_id}.QC.fastp.html \
-        -w 12
-    """
-}
-
-// Alignement before deduplication: Align quality and adapter trimmed reads to
-// the reference genome
-process BWAmem {
-    tag "$sample_id"
-    clusterOptions '-n 10'
-    
-    input:
-        tuple val(sample_id), path(fastq)
-        val opt_bwa
-        val extension
-    
-    output:
-        tuple val(sample_id), file("${sample_id}${extension}.bam")
-
-    """
-    bwa mem \
-        ${opt_bwa} \
-        -M ${params.ref} \
-        ${fastq[0]} \
-        ${fastq[1]} \
-        | \
-        samtools view -bh -o ${sample_id}${extension}.bam
-    """
-}
-
-// Merge the two BAM files containing:
-// 1: the UMI information: output of ExtractUmis process
-// 2: the alignment coordinate information: output of bwaMEM process
-process MergeBam {
-    tag "$sample_id"
-    
-    input:
-        tuple val(sample_id), path(bam_aligned), path(bam_unmapped)
-        val extension
-
-
-    output:
-        tuple val(sample_id), file("${sample_id}${extension}.ba[i,m]")
-
-    """
-    gatk MergeBamAlignment \
-        --ATTRIBUTES_TO_RETAIN X0 \
-        --ATTRIBUTES_TO_REMOVE NM \
-        --ATTRIBUTES_TO_REMOVE MD \
-        --ALIGNED_BAM ${bam_aligned} \
-        --UNMAPPED_BAM ${bam_unmapped} \
-        --OUTPUT ${sample_id}${extension}.bam \
-        --REFERENCE_SEQUENCE ${params.ref} \
-        --SORT_ORDER 'queryname' \
-        --ALIGNED_READS_ONLY true \
-        --MAX_INSERTIONS_OR_DELETIONS -1 \
-        --PRIMARY_ALIGNMENT_STRATEGY MostDistant \
-        --ALIGNER_PROPER_PAIR_FLAGS true \
-        --CREATE_INDEX true \
-        --CLIP_OVERLAPPING_READS false
-    """
-}
-
-// 
-process UmiMergeFilt {
-    tag "$sample_id"
-
-    input:
-        tuple val(sample_id), path(bam)
-        val extension
-
-    output:
-        tuple val(sample_id), file("${sample_id}${extension}.bam")
-
-    """
-    samtools view \
-        -f2 \
-        -bh ${bam} \
-        > ${sample_id}${extension}.bam
-    """
-}
-
-// Identify and group reads originating from the same source molecule
-// The user can control how many errors/mismatches are allowed in the UMI sequence when assigning source molecules (--edits=n).
-process GroupReads {
-    tag "$sample_id"
-
-    publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: true, pattern: '*.txt'
-
-    input:
-        tuple val(sample_id), path(bam)
-        val extension
-
-    output:
-        tuple val(sample_id), file("${sample_id}${extension}.bam"), emit: nextout
-        file "${sample_id}.QC.family_size_counts.txt"
-
-    """
-    fgbio GroupReadsByUmi \
-        -i ${bam} \
-        -o ${sample_id}${extension}.bam \
-        --strategy=adjacency \
-        --edits=1 \
-        -t RX \
-        -f ${sample_id}.QC.family_size_counts.txt
-    """
-}
-
-// Generate consensus reads
-// Calculate the consensus sequence, Reads that occur as singletons are
-// discarded by default but this can be changed by setting the –min-reads flag
-// to 1, in so doing the single read will be considered the consensus.
-process CallConsensus {
-    tag "$sample_id"
-
-    input:
-        tuple val(sample_id), path(bam)
-        val extension
-
-    output:
-        tuple val(sample_id), file("${sample_id}${extension}.bam")
-
-    """
-    fgbio CallMolecularConsensusReads \
-        -i ${bam} \
-        -o ${sample_id}${extension}.bam \
-        --error-rate-post-umi 40 \
-        --error-rate-pre-umi 45 \
-        --output-per-base-tags false \
-        --min-reads 2 \
-        --max-reads 50 \
-        --read-name-prefix='consensus'
-    """
-}
-
-workflow RerunConvertSamToFastq {
-    take:
-        tuple_input
-        extension
-    main:
-        ConvertSamToFastq(tuple_input, extension)
-    emit:
-        final_out = ConvertSamToFastq.out
-}
-
-workflow RerunBWAmem {
-    take:
-        tuple_input
-        opt_bwa
-        extension
-    main:
-        BWAmem(tuple_input, opt_bwa, extension)
-    emit:
-        final_out = BWAmem.out
-}
-
-// Sort the consensus_mapped.bam with the consensus_unmapped.bam to prepare them as input for the next step
-process SortConsensus {
-    tag "$sample_id"
-
-    input:
-        tuple val(sample_id), path(bam)
-        val extension
-
-    output:
-        tuple val(sample_id), path("${sample_id}${extension}.sort.bam")
-
-    script:
-
-    """
-    gatk SortSam \
-        -I ${bam} \
-        --OUTPUT ${sample_id}${extension}.sort.bam \
-        --SORT_ORDER queryname
-    """
-}
-
-workflow RerunSortConsensus {
-    take:
-        tuple_input
-        extension
-    main:
-        SortConsensus(tuple_input, extension)
-    emit:
-        final_out = SortConsensus.out
-}
-
-// Finally, merge the consensus_mapped.bam with the consensus_unmapped.bam to
-// retain the UMI group information.
-process MergeBam2 {
-    tag "$sample_id"
-    
-    publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: true
-
-    input:
-        tuple val(sample_id), path(bam_aligned), path(bam_unmapped)
-        val extension
-
-    output:
-        tuple val(sample_id), path("${sample_id}${extension}.ba[m,i]")
-
-    """
-    gatk MergeBamAlignment \
-        --ATTRIBUTES_TO_RETAIN X0 \
-        --ATTRIBUTES_TO_RETAIN RX \
-        --ALIGNED_BAM ${bam_aligned} \
-        --UNMAPPED_BAM ${bam_unmapped} \
-        --OUTPUT ${sample_id}${extension}.bam \
-        --REFERENCE_SEQUENCE ${params.ref} \
-        --SORT_ORDER coordinate \
-        --ADD_MATE_CIGAR true \
-        --MAX_INSERTIONS_OR_DELETIONS -1 \
-        --PRIMARY_ALIGNMENT_STRATEGY MostDistant \
-        --ALIGNER_PROPER_PAIR_FLAGS true \
-        --CREATE_INDEX true \
-        --CLIP_OVERLAPPING_READS false
-    """
-}
-
-// Variant calling step using vardict
-process VarDict {
-    tag "$sample_id"
-    
-    input:
-        tuple val(sample_id), path(bami)
-        val extension
-
-    output:
-        tuple val(sample_id), file("${sample_id}${extension}.vcf")
-
-    """
-    vardict \
-        -G ${params.ref} \
-        -f 0.0005 \
-        -N ${sample_id} \
-        -b ${bami[1]} \
-        -c 1 \
-        -S 2 \
-        -E 3 \
-        -g 4 ${params.bed} \
-        | $params.teststrandbias \
-        | $params.var2vcf > ${sample_id}${extension}.vcf
-    """
-}
-
-// Annotation step using VEP
-process AnnotationVEP {
-    tag "$sample_id"
-
-    publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: true
-
-    input:
-        tuple val(sample_id), path(vcf)
-        val extension
-    
-    output:
-         tuple val(sample_id), file("${sample_id}${extension}.vcf")
-
-    """
-    vep \
-        -i ${vcf} \
-        -o ${sample_id}${extension}.vcf \
-        --cache \
-        --dir_cache ${params.cache} \
-        --offline \
-        --force_overwrite \
-        --vcf \
-        --numbers \
-        --refseq \
-        --symbol \
-        --hgvs \
-        --canonical \
-        --max_af \
-        --fasta ${params.fasta}
-    """
-}
-
-process BedToIntervalList {
-
-    input:
-        path dict
-        path bed
-        val extension
-
-    output:
-        file "${extension}.interval_list"
-    
-    """
-    picard BedToIntervalList \
-        --SEQUENCE_DICTIONARY ${dict} \
-        --INPUT ${bed} \
-        --OUTPUT ${extension}.interval_list
-    """
-}
-
-// Extraction of read quality metrics before deduplication
-process CollectHsMetrics {
-    tag "$sample_id"
-
-    publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: true
-
-    input:
-        tuple val(sample_id), file(bam)
-        file bed
-        val extension
-
-    output:
-        tuple val(sample_id), file("${sample_id}${extension}.txt")
-
-    """
-    picard CollectHsMetrics \
-        --REFERENCE_SEQUENCE ${params.ref} \
-        --BAIT_INTERVALS ${bed} \
-        --TARGET_INTERVALS ${bed} \
-        --INPUT ${bam} \
-        --OUTPUT ${sample_id}${extension}.txt
-    """
-}
-
-workflow RerunCollectHsMetrics {
-    take:
-        tuple_input
-        bed
-        extension
-    main:
-        CollectHsMetrics(tuple_input, bed, extension)
-    emit:
-        final_out = CollectHsMetrics.out
-}
-
-process BCFtools_stats {
-    tag "${sample_id}"
-    
-    publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: true
-
-    input:
-        tuple val(sample_id), path(file)
-        val extension
-
-    output:
-        tuple val(sample_id), file("${sample_id}${extension}.stats")
-
-    """
-    bcftools stats ${file} > ${sample_id}${extension}.stats
-    """
-}
-
-// Generate a multi-quality control report from collected metrics data (process
-// collectmetrics2 output).
-process MultiQC {
-    tag "${sample_id}"
-    
-    publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: true
-
-    input:
-        tuple val(sample_id), path(file)
-        val extension
-
-    output:
-        tuple val(sample_id), file("${sample_id}${extension}")
-
-    """
-    multiqc ${params.outdir}/${sample_id} -o ${sample_id}${extension}
-    """
-}
-
-process MultiQC_ALL {
-    publishDir "${params.outdir}", mode: 'copy', overwrite: true
-
-    input:
-        tuple val(sample_id), path(file)
-        val extension
-
-    output:
-        file("${extension}")
-
-    """
-    multiqc ${params.outdir} -o ${extension}
-    """
-}
-
+// Include modules
+include {ConvertFastqToSam; MergeBam; MergeBam2; SortConsensus; SortConsensus as RerunSortConsensus; ConvertSamToFastq; ConvertSamToFastq as RerunConvertSamToFastq } from "$baseDir/modules/gatk.nf"
+include {BedToIntervalList; CollectHsMetrics; CollectHsMetrics as RerunCollectHsMetrics;} from "$baseDir/modules/picard.nf"
+include {BWAmem; BWAmem as RerunBWAmem;} from "$baseDir/modules/bwamem.nf"
+include {BCFtools_stats} from "$baseDir/modules/bcftools.nf"
+include {Fastp} from "$baseDir/modules/fastp.nf"
+include {ExtractUmis; GroupReads; CallConsensus; } from "$baseDir/modules/fgbio.nf"
+include {MultiQC; MultiQC_ALL; } from "$baseDir/modules/multiqc.nf"
+include {UmiMergeFilt} from "$baseDir/modules/samtools.nf"
+include {VarDict} from "$baseDir/modules/vardict.nf"
+include {AnnotationVEP} from "$baseDir/modules/vep.nf"
+
+
+// Define the workflow
 workflow {
     Channel.fromFilePairs(params.fastq, checkIfExists:true)
         .filter{ v -> v=~ params.filter_fastq}
@@ -518,11 +59,11 @@ workflow {
 
     // 3. Post process deduplication
     RerunConvertSamToFastq(CallConsensus.out, ".3.unmapped")
-    RerunBWAmem(RerunConvertSamToFastq.out.final_out, "-t 10 -Y", ".3.consensus_mapped")
+    RerunBWAmem(RerunConvertSamToFastq.out, "-t 10 -Y", ".3.consensus_mapped")
     SortConsensus(CallConsensus.out, ".3.unmapped")
-    RerunSortConsensus(RerunBWAmem.out.final_out, ".3.mapped")
+    RerunSortConsensus(RerunBWAmem.out, ".3.mapped")
 
-    RerunSortConsensus.out.final_out.join(SortConsensus.out).set{bams_consensus}
+    RerunSortConsensus.out.join(SortConsensus.out).set{bams_consensus}
     MergeBam2(bams_consensus, ".3.merged")
 
     // 4. Variant Calling & Annotation
@@ -532,7 +73,7 @@ workflow {
     // Quality Controls
     BedToIntervalList(params.dict, params.bed, params.bed.tokenize('.')[0].tokenize('/')[-1])
     CollectHsMetrics(BWAmem.out, BedToIntervalList.out, ".QC.HsMetrics.1")
-    RerunCollectHsMetrics(RerunBWAmem.out.final_out, BedToIntervalList.out, ".QC.HsMetrics.3")
+    RerunCollectHsMetrics(RerunBWAmem.out, BedToIntervalList.out, ".QC.HsMetrics.3")
     BCFtools_stats(VarDict.out, ".QC.bcftools_stats")
     MultiQC(BCFtools_stats.out, ".QC.multiQC")
 
diff --git a/README.md b/README.md
index 74e72ff..d08d2e0 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,22 @@
 # MobiCT
 ctDNA Analysis pipeline. Version 1.0.0
 
-# Introduction
+## Table of Contents
+
+* [Introduction](#introduction)
+* [Quick Start](#quick-start)
+* [Installation](#installation)
+    * [Nextflow](#nextflow)
+    * [Execution environment](#execution-environment)
+        * [Container platform](#container-platform)
+            * [Docker](#docker)
+            * [Singularity](#singularity)
+        * [Conda](conda)
+* [Usage](#usage)
+* [Pipeline output](#pipeline-output)
+* [Data availability](#data-availability)
+
+## Introduction
 Mobict is an analysis pipeline designed for detecting SNV and small InDels in circulating tumor DNA (ctDNA) obtained through non-invasive liquid biopsy, serving diagnostic, prognostic, and therapeutic purposes.
 The pipeline is built using Nextflow.
 The associated publication is available here: /doi/.../
@@ -20,13 +35,118 @@ The associated publication is available here: /doi/.../
 4. Download the reference genome
 5. Download the datasets needed by VEP (see https://github.com/Ensembl/ensembl-vep)
 6. Edit the *.config* file with input and output files/paths
-7. Activate your conda environment
-8. Run MobiCT on your Dataset
+7. Run MobiCT on your Dataset
 
     `Nextflow -log /output_directory/my.log run MobiCT.nf -c nextflow.config`
 
-# Pipeline output
+## Installation
+
+The prerequisites to run the pipeline are:  
+
+  * [Nextflow](https://www.nextflow.io/)  >= 22.04.0
+  * [Docker](https://www.docker.com) or [Singularity](https://sylabs.io/singularity/) or [conda](https://juke34.github.io/fix-anaconda-licensing-issues/en/pages/conda-distrib/)
+
+### Nextflow 
+
+  * Via conda 
+
+    <details>
+      <summary>See here</summary>
+      
+      ```bash
+      conda create -n nextflow
+      conda activate nextflow
+      conda install bioconda::nextflow
+      ```  
+    </details>
+
+  * Manually
+    <details>
+      <summary>See here</summary>
+      Nextflow runs on most POSIX systems (Linux, macOS, etc) and can typically be installed by running these commands:
+
+      ```bash
+      # Make sure 11 or later is installed on your computer by using the command:
+      java -version
+      
+      # Install Nextflow by entering this command in your terminal(it creates a file nextflow in the current dir):
+      curl -s https://get.nextflow.io | bash 
+      
+      # Add Nextflow binary to your user's PATH:
+      mv nextflow ~/bin/
+      # OR system-wide installation:
+      # sudo mv nextflow /usr/local/bin
+      ```
+    </details>
+
+### Execution environment
+
+#### Container platform
+
+To run the workflow you will need a container platform: docker or singularity.
+
+##### Docker
+
+Please follow the instructions at the [Docker website](https://docs.docker.com/desktop/)
+
+##### Singularity
+
+Please follow the instructions at the [Singularity website](https://docs.sylabs.io/guides/latest/admin-guide/installation.html)
+
+#### Conda
+
+It is recommended to use a container platform. However, if you prefer to use Conda, please follow the instructions available [here](https://juke34.github.io/fix-anaconda-licensing-issues/en/pages/conda-installation/)
+
+## Usage
+
+### Profile
+
+To run the workflow you must select a profile according to the container platform you want to use:   
+- `singularity`, a profile using Singularity containers to run the softwares
+- `docker`, a profile using Docker containers to run the softwares
+- `conda`, a profile using conda environments to run the softwares
+
+
+### From Release
+
+#### Default usage
+The command will look like that: 
+
+```bash
+nextflow run SimCab-CHU/MobiCT -r v1.0.0 -profile docker <rest of paramaters>
+```
+
+#### Test
+
+```bash
+nextflow run SimCab-CHU/MobiCT -r v1.0.0 -profile docker,test <rest of paramaters>
+```
+
+### From repo
+
+```bash
+# clone the workflow repository
+git clone https://github.com/SimCab-CHU/MobiCT.git
+
+# Move in it
+cd MobiCT
+```
+#### Default usage
+
+```bash
+# Run MobiCT
+nextflow run MobiCT.nf -profile docker <rest of paramaters>
+```
+
+#### Test
+
+```bash
+# Run MobiCT
+nextflow run MobiCT.nf -profile docker,test
+```
+
+## Pipeline output
 The output files are stored in the directory you specified using the **outdir** parameter in the .config file. The **outdir** contains a folder per sample plus a **multiQC** folder. Each sample folder contains a deduplicated and aligned *.bam* file and its index, an annotated *.vcf* file, 3 metrics files (*.HsMetrics.1.txt*, *.HsMetrics.3.txt* and *QC.bcftools_stats.stats*).
 
-# Data availability
+## Data availability
 Raw sequencing data (FASTQ files) of commercial controls used in the study are available @ https://www.ncbi.nlm.nih.gov/sra/PRJNA1209006.
diff --git a/config/ressources/conda/bcftools.yaml b/config/ressources/conda/bcftools.yaml
new file mode 100644
index 0000000..fc17e15
--- /dev/null
+++ b/config/ressources/conda/bcftools.yaml
@@ -0,0 +1,7 @@
+name: gatk4
+channels:
+  - conda-forge
+  - bioconda
+
+dependencies:
+  - bcftools=1.9
\ No newline at end of file
diff --git a/config/ressources/conda/bwa.yaml b/config/ressources/conda/bwa.yaml
new file mode 100644
index 0000000..7f710ae
--- /dev/null
+++ b/config/ressources/conda/bwa.yaml
@@ -0,0 +1,7 @@
+name: gatk4
+channels:
+  - conda-forge
+  - bioconda
+
+dependencies:
+  - bwa=0.7.19
\ No newline at end of file
diff --git a/config/ressources/conda/ensembl-vep.yaml b/config/ressources/conda/ensembl-vep.yaml
new file mode 100644
index 0000000..a9ee1c5
--- /dev/null
+++ b/config/ressources/conda/ensembl-vep.yaml
@@ -0,0 +1,7 @@
+name: gatk4
+channels:
+  - conda-forge
+  - bioconda
+
+dependencies:
+  - ensembl-vep=113.0
\ No newline at end of file
diff --git a/config/ressources/conda/fastp.yaml b/config/ressources/conda/fastp.yaml
new file mode 100644
index 0000000..8672409
--- /dev/null
+++ b/config/ressources/conda/fastp.yaml
@@ -0,0 +1,7 @@
+name: gatk4
+channels:
+  - conda-forge
+  - bioconda
+
+dependencies:
+  - fastp=0.24.0
\ No newline at end of file
diff --git a/config/ressources/conda/fgbio.yaml b/config/ressources/conda/fgbio.yaml
new file mode 100644
index 0000000..348db93
--- /dev/null
+++ b/config/ressources/conda/fgbio.yaml
@@ -0,0 +1,7 @@
+name: gatk4
+channels:
+  - conda-forge
+  - bioconda
+
+dependencies:
+  - fgbio=2.5.0
\ No newline at end of file
diff --git a/config/ressources/conda/gatk4.yaml b/config/ressources/conda/gatk4.yaml
new file mode 100644
index 0000000..079a8dc
--- /dev/null
+++ b/config/ressources/conda/gatk4.yaml
@@ -0,0 +1,7 @@
+name: gatk4
+channels:
+  - conda-forge
+  - bioconda
+
+dependencies:
+  - gatk4=4.6.1.0
\ No newline at end of file
diff --git a/config/ressources/conda/multiqc.yaml b/config/ressources/conda/multiqc.yaml
new file mode 100644
index 0000000..f630102
--- /dev/null
+++ b/config/ressources/conda/multiqc.yaml
@@ -0,0 +1,7 @@
+name: gatk4
+channels:
+  - conda-forge
+  - bioconda
+
+dependencies:
+  - multiqc=1.28
\ No newline at end of file
diff --git a/config/ressources/conda/picard.yaml b/config/ressources/conda/picard.yaml
new file mode 100644
index 0000000..61da0f6
--- /dev/null
+++ b/config/ressources/conda/picard.yaml
@@ -0,0 +1,7 @@
+name: gatk4
+channels:
+  - conda-forge
+  - bioconda
+
+dependencies:
+  - picard=3.3.0
\ No newline at end of file
diff --git a/config/ressources/conda/samtools.yaml b/config/ressources/conda/samtools.yaml
new file mode 100644
index 0000000..9beff77
--- /dev/null
+++ b/config/ressources/conda/samtools.yaml
@@ -0,0 +1,7 @@
+name: gatk4
+channels:
+  - conda-forge
+  - bioconda
+
+dependencies:
+  - samtools=1.6
\ No newline at end of file
diff --git a/config/ressources/conda/vardict.yaml b/config/ressources/conda/vardict.yaml
new file mode 100644
index 0000000..9af8e25
--- /dev/null
+++ b/config/ressources/conda/vardict.yaml
@@ -0,0 +1,7 @@
+name: gatk4
+channels:
+  - conda-forge
+  - bioconda
+
+dependencies:
+  - vardict=2018.10.18
\ No newline at end of file
diff --git a/config/ressources/hpc.config b/config/ressources/hpc.config
new file mode 100644
index 0000000..9156f0c
--- /dev/null
+++ b/config/ressources/hpc.config
@@ -0,0 +1,92 @@
+process {
+    cpus = 1
+    time = '1h'
+    maxForks = 20
+    shell = ['/bin/bash', '-euo', 'pipefail']
+    stageOutMode = 'rsync'
+
+    withLabel: 'bcftools' {
+        cpus = 1
+        time = '1h'
+    }
+    withLabel: 'bwamem' {
+        cpus = 10
+        time = '24h'
+    }
+    withLabel: 'bowtie' {
+        cpus = 16
+        time = '4h'
+    }
+    withLabel: 'bowtie2' {
+        cpus = 16
+        time = '4h'
+    }
+    withLabel: 'bwa' {
+        cpus = 16
+        time = '4h'
+    }
+    withName: 'fastp' {
+        cpus = 16
+        time = '2h'
+    }
+    withLabel: 'fastqc' {
+        cpus = 8
+        time = '1h'
+    }
+    withLabel: 'hisat2' {
+        cpus = 16
+        time = '4h'
+    }
+    withLabel: 'kallisto' {
+        cpus = 16
+        time = '4h'
+    }
+    withLabel: 'graphmap2' {
+        cpus = 16
+        time = '4h'
+    }
+    withLabel: 'minimap2' {
+        cpus = 16
+        time = '4h'
+    }
+    withLabel: 'multiqc' {
+        cpus = 8
+        time = '1h'
+    }
+    withLabel: 'mummer4' {
+        cpus = 16
+        time = '4h'
+    }
+    withLabel: 'ngmlr' {
+        cpus = 16
+        time = '1h'
+    }
+    withLabel: 'novoalign' {
+        cpus = 16
+        time = '4h'
+    }
+    withLabel: 'salmon' {
+        cpus = 16
+        time = '4h'
+    }
+    withLabel: 'samtools' {
+        cpus = 16
+        time = '2h'
+    }
+    withLabel: 'seqkit' {
+        cpus = 8
+        time = '4h'
+    }
+    withLabel: 'seqtk' {
+        cpus = 16
+        time = '4h'
+    }
+    withLabel: 'star' {
+        cpus = 16
+        time = '4h'
+    }
+    withLabel: 'subread' {
+        cpus = 16
+        time = '4h'
+    }
+}
diff --git a/config/ressources/local.config b/config/ressources/local.config
new file mode 100644
index 0000000..9774c2c
--- /dev/null
+++ b/config/ressources/local.config
@@ -0,0 +1,48 @@
+process {
+    cpus = 1
+    time = '1h'
+    maxForks = 8
+    shell = ['/bin/bash', '-euo', 'pipefail']
+    stageOutMode = 'rsync'
+
+    withLabel: 'bcftools' {
+        cpus = 4
+        time = '1h'
+    }
+    withLabel: 'bwamem' {
+        cpus = 4
+        time = '4h'
+    }
+    withLabel: 'fastp' {
+        cpus = 4
+        time = '4h'
+    }
+    withLabel: 'fgbio' {
+        cpus = 4
+        time = '4h'
+    }
+    withLabel: 'gatk4' {
+        cpus = 4
+        time = '4h'
+    }
+    withName: 'multiqc' {
+        cpus = 4
+        time = '2h'
+    }
+    withLabel: 'picard' {
+        cpus = 4
+        time = '1h'
+    }
+    withLabel: 'samtools' {
+        cpus = 4
+        time = '4h'
+    }
+    withLabel: 'vardict' {
+        cpus = 2
+        time = '4h'
+    }
+    withLabel: 'vep' {
+        cpus = 4
+        time = '4h'
+    }
+}
diff --git a/config/softwares.config b/config/softwares.config
new file mode 100644
index 0000000..a3564c8
--- /dev/null
+++ b/config/softwares.config
@@ -0,0 +1,52 @@
+process {
+    withLabel: 'bcftools' {
+        conda = { singularity.enabled || docker.enabled ? '' : "$baseDir/config/ressources/conda/bcftools.yaml" }
+        container = { singularity.enabled ? 'https://depot.galaxyproject.org/singularity/bcftools:1.9--h4da6232_0' : 
+        docker.enabled ? 'quay.io/biocontainers/bcftools:1.9--h4da6232_0' : '' }
+    }
+   withLabel: 'bwamem' {
+        conda = { singularity.enabled || docker.enabled ? '' : "$baseDir/config/ressources/conda/bwa.yaml" }
+        container = { singularity.enabled ? 'https://depot.galaxyproject.org/singularity/bwa:0.7.19--h577a1d6_0' : 
+        docker.enabled ? 'quay.io/biocontainers/bwa:0.7.19--h577a1d6_0' : '' }
+    }
+    withLabel: 'fastp' {
+        conda = { singularity.enabled || docker.enabled ? '' : "$baseDir/config/ressources/conda/fastp.yaml" }
+        container = { singularity.enabled ? 'https://depot.galaxyproject.org/singularity/fastp:0.24.0--heae3180_1' : 
+        docker.enabled ? 'quay.io/biocontainers/fastp:0.24.0--heae3180_1' : '' }
+  	}
+    withLabel: 'fgbio' {
+        conda = { singularity.enabled || docker.enabled ? '' : "$baseDir/config/ressources/conda/fgbio.yaml" }
+        container = { singularity.enabled ? 'https://depot.galaxyproject.org/singularity/fgbio:2.5.0--hdfd78af_1' : 
+        docker.enabled ? 'quay.io/biocontainers/fgbio:2.5.0--hdfd78af_1' : '' }
+  	}
+    withLabel: 'gatk4' {
+        conda = { singularity.enabled || docker.enabled ? '' : "$baseDir/config/ressources/conda/gatk4.yaml" }
+        container = { singularity.enabled ? 'https://depot.galaxyproject.org/singularity/gatk4:4.6.1.0--py310hdfd78af_0' : 
+        docker.enabled ? 'quay.io/biocontainers/gatk4:4.6.1.0--py310hdfd78af_0' : '' }
+    }
+    withLabel: 'multiqc' {
+        conda = { singularity.enabled || docker.enabled ? '' : "$baseDir/config/ressources/conda/multiqc.yaml" }
+        container = { singularity.enabled ? 'https://depot.galaxyproject.org/singularity/multiqc:1.28--pyhdfd78af_0' : 
+        docker.enabled ? 'quay.io/biocontainers/multiqc:1.28--pyhdfd78af_0' : '' }
+    }
+    withLabel: 'picard' {
+        conda = { singularity.enabled || docker.enabled ? '' : "$baseDir/config/ressources/conda/picard.yaml" }
+        container = { singularity.enabled ? 'https://depot.galaxyproject.org/singularity/picard:3.3.0--hdfd78af_0' : 
+        docker.enabled ? 'quay.io/biocontainers/picard:3.3.0--hdfd78af_0' : '' }
+    }
+    withLabel: 'samtools' {
+        conda = { singularity.enabled || docker.enabled ? '' : "$baseDir/config/ressources/conda/samtools.yaml" }
+        container = { singularity.enabled ? 'https://depot.galaxyproject.org/singularity/samtools:1.6--h5fe306e_11' : 
+        docker.enabled ? 'quay.io/biocontainers/samtools:1.6--h5fe306e_11' : '' }
+    }
+    withLabel: 'vardict' {
+        conda = { singularity.enabled || docker.enabled ? '' : "$baseDir/config/ressources/conda/vardict.yaml" }
+        container = { singularity.enabled ? 'https://depot.galaxyproject.org/singularity/vardict:2018.10.18--pl526_1' : 
+        docker.enabled ? 'quay.io/biocontainers/vardict:2018.10.18--pl526_1' : '' }
+    }
+    withLabel: 'vep' {
+        conda = { singularity.enabled || docker.enabled ? '' : "$baseDir/config/ressources/conda/ensembl-vep.yaml" }
+        container = { singularity.enabled ? 'https://depot.galaxyproject.org/singularity/ensembl-vep:113.0--pl5321h2a3209d_0' : 
+        docker.enabled ? 'quay.io/biocontainers/ensembl-vep:113.0--pl5321h2a3209d_0' : '' }
+    }
+}
\ No newline at end of file
diff --git a/modules/bcftools.nf b/modules/bcftools.nf
new file mode 100644
index 0000000..961c5c2
--- /dev/null
+++ b/modules/bcftools.nf
@@ -0,0 +1,17 @@
+process BCFtools_stats {
+    tag "${sample_id}"
+    label 'bcftools'
+
+    publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: true
+
+    input:
+        tuple val(sample_id), path(file)
+        val extension
+
+    output:
+        tuple val(sample_id), file("${sample_id}${extension}.stats")
+
+    """
+    bcftools stats ${file} > ${sample_id}${extension}.stats
+    """
+}
diff --git a/modules/bwamem.nf b/modules/bwamem.nf
new file mode 100644
index 0000000..6a46753
--- /dev/null
+++ b/modules/bwamem.nf
@@ -0,0 +1,24 @@
+// Alignement before deduplication: Align quality and adapter trimmed reads to
+// the reference genome
+process BWAmem {
+    tag "$sample_id"
+    label 'bwamem'
+
+    input:
+        tuple val(sample_id), path(fastq)
+        val opt_bwa
+        val extension
+    
+    output:
+        tuple val(sample_id), file("${sample_id}${extension}.bam")
+
+    """
+    bwa mem \
+        ${opt_bwa} \
+        -M ${params.ref} \
+        ${fastq[0]} \
+        ${fastq[1]} \
+        | \
+        samtools view -bh -o ${sample_id}${extension}.bam
+    """
+}
\ No newline at end of file
diff --git a/modules/fastp.nf b/modules/fastp.nf
new file mode 100644
index 0000000..2856a8e
--- /dev/null
+++ b/modules/fastp.nf
@@ -0,0 +1,29 @@
+// Adapter and quality trimming
+process Fastp {
+    tag "$sample_id"
+    label 'fastp'
+
+    publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: true, pattern: '*.{json,html}'
+
+    input:
+        tuple val(sample_id), path(fastq)
+        val extension
+
+    output:
+        tuple val(sample_id), file("${sample_id}${extension}.R[1,2].fq")
+        file "${sample_id}.QC.fastp.json"
+        file "${sample_id}.QC.fastp.html"
+
+    script:
+    """
+    fastp \
+        -i ${fastq[0]} \
+        -o ${sample_id}${extension}.R1.fq \
+        -I ${fastq[1]} \
+        -O ${sample_id}${extension}.R2.fq \
+        -g -W 5 -q 20 -u 40 -x -3 -l 75 -c \
+        -j ${sample_id}.QC.fastp.json \
+        -h ${sample_id}.QC.fastp.html \
+        -w 12
+    """
+}
\ No newline at end of file
diff --git a/modules/fgbio.nf b/modules/fgbio.nf
new file mode 100644
index 0000000..cd9b802
--- /dev/null
+++ b/modules/fgbio.nf
@@ -0,0 +1,76 @@
+// Extraction of UMIs from the insert reads
+process ExtractUmis {
+    tag "$sample_id"
+    label 'fgbio'
+
+    input:
+        tuple val(sample_id), path(bam_file)
+        val struct_r1
+        val struct_r2
+        val extension
+
+    output:
+        tuple val(sample_id), file("${sample_id}${extension}.bam")
+
+    """
+    fgbio ExtractUmisFromBam \
+        -i ${bam_file} \
+        -o ${sample_id}${extension}.bam \
+        -r ${struct_r1} ${struct_r2} \
+        -t RX \
+        -a true
+    """
+}
+
+// Identify and group reads originating from the same source molecule
+// The user can control how many errors/mismatches are allowed in the UMI sequence when assigning source molecules (--edits=n).
+process GroupReads {
+    tag "$sample_id"
+
+    publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: true, pattern: '*.txt'
+
+    input:
+        tuple val(sample_id), path(bam)
+        val extension
+
+    output:
+        tuple val(sample_id), file("${sample_id}${extension}.bam"), emit: nextout
+        file "${sample_id}.QC.family_size_counts.txt"
+
+    """
+    fgbio GroupReadsByUmi \
+        -i ${bam} \
+        -o ${sample_id}${extension}.bam \
+        --strategy=adjacency \
+        --edits=1 \
+        -t RX \
+        -f ${sample_id}.QC.family_size_counts.txt
+    """
+}
+
+// Generate consensus reads
+// Calculate the consensus sequence, Reads that occur as singletons are
+// discarded by default but this can be changed by setting the –min-reads flag
+// to 1, in so doing the single read will be considered the consensus.
+process CallConsensus {
+    tag "$sample_id"
+
+    input:
+        tuple val(sample_id), path(bam)
+        val extension
+
+    output:
+        tuple val(sample_id), file("${sample_id}${extension}.bam")
+
+    """
+    fgbio CallMolecularConsensusReads \
+        -i ${bam} \
+        -o ${sample_id}${extension}.bam \
+        --error-rate-post-umi 40 \
+        --error-rate-pre-umi 45 \
+        --output-per-base-tags false \
+        --min-reads 2 \
+        --max-reads 50 \
+        --read-name-prefix='consensus'
+    """
+}
\ No newline at end of file
diff --git a/modules/gatk.nf b/modules/gatk.nf
new file mode 100644
index 0000000..8050325
--- /dev/null
+++ b/modules/gatk.nf
@@ -0,0 +1,133 @@
+// Convert the demultiplexed, raw sequencing FASTQ files to BAM
+process ConvertFastqToSam {
+    tag "$sample_id"
+    label 'gatk4'
+
+    input:
+        tuple val('sample_id'), path(fastq)
+        val extension
+
+    output:
+        tuple val(sample_id), file("${sample_id}${extension}.bam")
+
+    """
+    gatk FastqToSam \
+        --FASTQ ${fastq[0]} \
+        --FASTQ2 ${fastq[1]} \
+        --OUTPUT ${sample_id}${extension}.bam \
+        --SAMPLE_NAME ${sample_id} \
+        --TMP_DIR ${params.tmp_dir}
+    """
+}
+
+// Convert the BAM file with UMI extracted reads to a FASTQ file
+process ConvertSamToFastq {
+    tag "$sample_id"
+    label 'gatk4'
+
+    input:
+        tuple val(sample_id), path(bam_file)
+        val extension
+
+    output:
+        tuple val(sample_id), file("${sample_id}${extension}.R[1,2].fq")
+
+    """
+    gatk SamToFastq \
+        -I ${bam_file} \
+        -F ${sample_id}${extension}.R1.fq \
+        -F2 ${sample_id}${extension}.R2.fq \
+        --CLIPPING_ATTRIBUTE XT \
+        --CLIPPING_ACTION 2 \
+        --MAX_RECORDS_IN_RAM 50000000 
+    """
+}
+
+// Merge the two BAM files containing:
+// 1: the UMI information: output of ExtractUmis process
+// 2: the alignment coordinate information: output of bwaMEM process
+process MergeBam {
+    tag "$sample_id"
+    label 'gatk4'
+
+    input:
+        tuple val(sample_id), path(bam_aligned), path(bam_unmapped)
+        val extension
+
+
+    output:
+        tuple val(sample_id), file("${sample_id}${extension}.ba[i,m]")
+
+    """
+    gatk MergeBamAlignment \
+        --ATTRIBUTES_TO_RETAIN X0 \
+        --ATTRIBUTES_TO_REMOVE NM \
+        --ATTRIBUTES_TO_REMOVE MD \
+        --ALIGNED_BAM ${bam_aligned} \
+        --UNMAPPED_BAM ${bam_unmapped} \
+        --OUTPUT ${sample_id}${extension}.bam \
+        --REFERENCE_SEQUENCE ${params.ref} \
+        --SORT_ORDER 'queryname' \
+        --ALIGNED_READS_ONLY true \
+        --MAX_INSERTIONS_OR_DELETIONS -1 \
+        --PRIMARY_ALIGNMENT_STRATEGY MostDistant \
+        --ALIGNER_PROPER_PAIR_FLAGS true \
+        --CREATE_INDEX true \
+        --CLIP_OVERLAPPING_READS false
+    """
+}
+
+// Sort the consensus_mapped.bam with the consensus_unmapped.bam to prepare them as input for the next step
+process SortConsensus {
+    tag "$sample_id"
+    label 'gatk4'
+
+    input:
+        tuple val(sample_id), path(bam)
+        val extension
+
+    output:
+        tuple val(sample_id), path("${sample_id}${extension}.sort.bam")
+
+    script:
+
+    """
+    gatk SortSam \
+        -I ${bam} \
+        --OUTPUT ${sample_id}${extension}.sort.bam \
+        --SORT_ORDER queryname
+    """
+}
+
+// Finally, merge the consensus_mapped.bam with the consensus_unmapped.bam to
+// retain the UMI group information.
+process MergeBam2 {
+    tag "$sample_id"
+    label 'gatk4'
+    
+    publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: true
+
+    input:
+        tuple val(sample_id), path(bam_aligned), path(bam_unmapped)
+        val extension
+
+    output:
+        tuple val(sample_id), path("${sample_id}${extension}.ba[m,i]")
+
+    """
+    gatk MergeBamAlignment \
+        --ATTRIBUTES_TO_RETAIN X0 \
+        --ATTRIBUTES_TO_RETAIN RX \
+        --ALIGNED_BAM ${bam_aligned} \
+        --UNMAPPED_BAM ${bam_unmapped} \
+        --OUTPUT ${sample_id}${extension}.bam \
+        --REFERENCE_SEQUENCE ${params.ref} \
+        --SORT_ORDER coordinate \
+        --ADD_MATE_CIGAR true \
+        --MAX_INSERTIONS_OR_DELETIONS -1 \
+        --PRIMARY_ALIGNMENT_STRATEGY MostDistant \
+        --ALIGNER_PROPER_PAIR_FLAGS true \
+        --CREATE_INDEX true \
+        --CLIP_OVERLAPPING_READS false
+    """
+}
\ No newline at end of file
diff --git a/modules/multiqc.nf b/modules/multiqc.nf
new file mode 100644
index 0000000..2c10719
--- /dev/null
+++ b/modules/multiqc.nf
@@ -0,0 +1,34 @@
+// Generate a multi-quality control report from collected metrics data (process
+// collectmetrics2 output).
+process MultiQC {
+    tag "${sample_id}"
+    label 'multiqc'
+
+    publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: true
+
+    input:
+        tuple val(sample_id), path(file)
+        val extension
+
+    output:
+        tuple val(sample_id), file("${sample_id}${extension}")
+
+    """
+    multiqc ${params.outdir}/${sample_id} -o ${sample_id}${extension}
+    """
+}
+
+process MultiQC_ALL {
+    publishDir "${params.outdir}", mode: 'copy', overwrite: true
+
+    input:
+        tuple val(sample_id), path(file)
+        val extension
+
+    output:
+        file("${extension}")
+
+    """
+    multiqc ${params.outdir} -o ${extension}
+    """
+}
\ No newline at end of file
diff --git a/modules/picard.nf b/modules/picard.nf
new file mode 100644
index 0000000..798e6d1
--- /dev/null
+++ b/modules/picard.nf
@@ -0,0 +1,43 @@
+process BedToIntervalList {
+    label 'picard'
+
+    input:
+        path dict
+        path bed
+        val extension
+
+    output:
+        file "${extension}.interval_list"
+    
+    """
+    picard BedToIntervalList \
+        --SEQUENCE_DICTIONARY ${dict} \
+        --INPUT ${bed} \
+        --OUTPUT ${extension}.interval_list
+    """
+}
+
+// Extraction of read quality metrics before deduplication
+process CollectHsMetrics {
+    tag "$sample_id"
+    label 'picard'
+    
+    publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: true
+
+    input:
+        tuple val(sample_id), file(bam)
+        file bed
+        val extension
+
+    output:
+        tuple val(sample_id), file("${sample_id}${extension}.txt")
+
+    """
+    picard CollectHsMetrics \
+        --REFERENCE_SEQUENCE ${params.ref} \
+        --BAIT_INTERVALS ${bed} \
+        --TARGET_INTERVALS ${bed} \
+        --INPUT ${bam} \
+        --OUTPUT ${sample_id}${extension}.txt
+    """
+}
diff --git a/modules/samtools.nf b/modules/samtools.nf
new file mode 100644
index 0000000..435f7c9
--- /dev/null
+++ b/modules/samtools.nf
@@ -0,0 +1,19 @@
+// 
+process UmiMergeFilt {
+    tag "$sample_id"
+    label 'samtools'
+
+    input:
+        tuple val(sample_id), path(bam)
+        val extension
+
+    output:
+        tuple val(sample_id), file("${sample_id}${extension}.bam")
+
+    """
+    samtools view \
+        -f2 \
+        -bh ${bam} \
+        > ${sample_id}${extension}.bam
+    """
+}
\ No newline at end of file
diff --git a/modules/vardict.nf b/modules/vardict.nf
new file mode 100644
index 0000000..8087217
--- /dev/null
+++ b/modules/vardict.nf
@@ -0,0 +1,26 @@
+// Variant calling step using vardict
+process VarDict {
+    tag "$sample_id"
+    label 'verdict'
+
+    input:
+        tuple val(sample_id), path(bami)
+        val extension
+
+    output:
+        tuple val(sample_id), file("${sample_id}${extension}.vcf")
+
+    """
+    vardict \
+        -G ${params.ref} \
+        -f 0.0005 \
+        -N ${sample_id} \
+        -b ${bami[1]} \
+        -c 1 \
+        -S 2 \
+        -E 3 \
+        -g 4 ${params.bed} \
+        | $params.teststrandbias \
+        | $params.var2vcf > ${sample_id}${extension}.vcf
+    """
+}
diff --git a/modules/vep.nf b/modules/vep.nf
new file mode 100644
index 0000000..ff331a7
--- /dev/null
+++ b/modules/vep.nf
@@ -0,0 +1,32 @@
+// Annotation step using VEP
+process AnnotationVEP {
+    tag "$sample_id"
+    label 'vep'
+
+    publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: true
+
+    input:
+        tuple val(sample_id), path(vcf)
+        val extension
+    
+    output:
+         tuple val(sample_id), file("${sample_id}${extension}.vcf")
+
+    """
+    vep \
+        -i ${vcf} \
+        -o ${sample_id}${extension}.vcf \
+        --cache \
+        --dir_cache ${params.cache} \
+        --offline \
+        --force_overwrite \
+        --vcf \
+        --numbers \
+        --refseq \
+        --symbol \
+        --hgvs \
+        --canonical \
+        --max_af \
+        --fasta ${params.fasta}
+    """
+}
\ No newline at end of file
diff --git a/nextflow.config b/nextflow.config
index 9293def..f46ef73 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -1,21 +1,81 @@
-docker.enabled = true
-conda.enabled = true
+manifest {
+    name = 'MobiCT'
+    author = ''
+    homePage = 'https://github.com/SimCab-CHU/MobiCT'
+    description = 'ctDNA Analysis pipeline'
+    mainScript = 'MobiCT.nf'
+    nextflowVersion = '>=22.04.0'
+    version = '1.0.0'
+}
+
+// meta parameters
 dag.overwrite = true
-process.executor = 'slurm'
-workDir = "/path/to/workDir"
 
 params {
-    struct_r1 = "5M2S+T"
-    struct_r2 = "5M2S+T"
-    ref = "/path/to/referenceGenome.fasta"
-    dict = "/path/to/referenceGenome.dict"
-    fastq = "/path/to/fastq/files/*_{R1,R2}_001.fastq.gz"
-    filter_fastq = /^((?!Undetermined).)*$/
-    outdir = "/output/path"
-    bed = "/path/to/bedFile" 
+    struct_r1      = "5M2S+T"
+    struct_r2      = "5M2S+T"
+    ref            = "/path/to/referenceGenome.fasta"
+    dict           = "/path/to/referenceGenome.dict"
+    fastq          = "/path/to/fastq/files/*_{R1,R2}_001.fastq.gz"
+    filter_fastq   = /^((?!Undetermined).)*$/
+    outdir         = "/path/to/outdir"
+    bed            = "/path/to/bedFile" 
     teststrandbias = "/path/to/teststrandbias.R/used/by/vardict" 
-    var2vcf = "/path/to/var2vcf_valid.pl/used/by/vardict" 
-    cache = "/path/to/cacheVEP"
-    fasta = "/path/to/fastaVEP"
-    tmp_dir = "/path/to/a/tmp/dir/" (optional)
+    var2vcf        = "/path/to/var2vcf_valid.pl/used/by/vardict" 
+    cache          = "/path/to/cacheVEP"
+    fasta          = "/path/to/fastaVEP"
+    pipeline_report="pipeline_report"
+}
+
+profiles {
+
+    slurm {
+        process.executor = 'slurm'
+    }
+
+    docker {
+        docker.enabled = true
+        docker.runOptions='-u "$( id -u ):$( id -g )"'
+        includeConfig "$baseDir/config/softwares.config"
+    }
+
+    singularity {
+        singularity.enabled = true
+        includeConfig "$baseDir/config/softwares.config"
+    }
+    conda {
+        conda.enabled = true
+        includeConfig "$baseDir/config/softwares.config"
+    }
+    test{
+        params {
+            bed    = "$baseDir/Example_data/panel_lung.bed"
+            fastq  = "$baseDir/Example_data/*_{R1,R2}_001.fastq.gz"
+            outdir = "result_test"
+            ref    = "/path/to/referenceGenome.fasta"
+            dict   = "/path/to/referenceGenome.dict"
+        }
+    }
+}
+
+resume = true
+
+timeline {
+    enabled = true
+    file = "${params.pipeline_report}/execution_timeline_${new Date().format('yyyyMMddHHmmss')}.html"
+}
+
+report {
+    enabled = true
+    file = "${params.pipeline_report}/execution_report_${new Date().format('yyyyMMddHHmmss')}.html"
+}
+
+trace {
+    enabled = true
+    file = "${params.pipeline_report}/execution_trace_${new Date().format('yyyyMMddHHmmss')}.txt"
+}
+
+dag {
+    enabled = true
+    file = "${params.pipeline_report}/pipeline_dag.svg"
 }

From 1bdc7420dd71fcd3e6d1c8a30dc1817ce472b0fa Mon Sep 17 00:00:00 2001
From: Jacques Dainat <jacques.dainat@ird.fr>
Date: Wed, 2 Apr 2025 21:12:08 +0200
Subject: [PATCH 02/12] fix hpc config file

---
 config/ressources/hpc.config | 76 ++++++++----------------------------
 1 file changed, 16 insertions(+), 60 deletions(-)

diff --git a/config/ressources/hpc.config b/config/ressources/hpc.config
index 9156f0c..785ecc4 100644
--- a/config/ressources/hpc.config
+++ b/config/ressources/hpc.config
@@ -6,87 +6,43 @@ process {
     stageOutMode = 'rsync'
 
     withLabel: 'bcftools' {
-        cpus = 1
+        cpus = 4
         time = '1h'
     }
     withLabel: 'bwamem' {
-        cpus = 10
-        time = '24h'
-    }
-    withLabel: 'bowtie' {
-        cpus = 16
-        time = '4h'
-    }
-    withLabel: 'bowtie2' {
-        cpus = 16
-        time = '4h'
-    }
-    withLabel: 'bwa' {
-        cpus = 16
-        time = '4h'
-    }
-    withName: 'fastp' {
-        cpus = 16
-        time = '2h'
-    }
-    withLabel: 'fastqc' {
-        cpus = 8
-        time = '1h'
-    }
-    withLabel: 'hisat2' {
         cpus = 16
         time = '4h'
     }
-    withLabel: 'kallisto' {
-        cpus = 16
+    withLabel: 'fastp' {
+        cpus = 4
         time = '4h'
     }
-    withLabel: 'graphmap2' {
-        cpus = 16
+    withLabel: 'fgbio' {
+        cpus = 4
         time = '4h'
     }
-    withLabel: 'minimap2' {
-        cpus = 16
+    withLabel: 'gatk4' {
+        cpus = 4
         time = '4h'
     }
-    withLabel: 'multiqc' {
-        cpus = 8
-        time = '1h'
-    }
-    withLabel: 'mummer4' {
-        cpus = 16
-        time = '4h'
+    withName: 'multiqc' {
+        cpus = 4
+        time = '2h'
     }
-    withLabel: 'ngmlr' {
-        cpus = 16
+    withLabel: 'picard' {
+        cpus = 4
         time = '1h'
     }
-    withLabel: 'novoalign' {
-        cpus = 16
-        time = '4h'
-    }
-    withLabel: 'salmon' {
-        cpus = 16
-        time = '4h'
-    }
     withLabel: 'samtools' {
-        cpus = 16
-        time = '2h'
-    }
-    withLabel: 'seqkit' {
         cpus = 8
         time = '4h'
     }
-    withLabel: 'seqtk' {
-        cpus = 16
+    withLabel: 'vardict' {
+        cpus = 2
         time = '4h'
     }
-    withLabel: 'star' {
-        cpus = 16
-        time = '4h'
-    }
-    withLabel: 'subread' {
-        cpus = 16
+    withLabel: 'vep' {
+        cpus = 4
         time = '4h'
     }
 }

From 91d3a79977ec9d9d5d1d8f4e25f3d51e6656eb06 Mon Sep 17 00:00:00 2001
From: Jacques Dainat <jacques.dainat@ird.fr>
Date: Thu, 3 Apr 2025 18:03:32 +0200
Subject: [PATCH 03/12] add multithreading

---
 modules/bwamem.nf | 1 +
 modules/fastp.nf  | 1 +
 modules/vep.nf    | 1 +
 3 files changed, 3 insertions(+)

diff --git a/modules/bwamem.nf b/modules/bwamem.nf
index 6a46753..17fded8 100644
--- a/modules/bwamem.nf
+++ b/modules/bwamem.nf
@@ -14,6 +14,7 @@ process BWAmem {
 
     """
     bwa mem \
+        -t ${task.cpus} \
         ${opt_bwa} \
         -M ${params.ref} \
         ${fastq[0]} \
diff --git a/modules/fastp.nf b/modules/fastp.nf
index 2856a8e..c0ade77 100644
--- a/modules/fastp.nf
+++ b/modules/fastp.nf
@@ -17,6 +17,7 @@ process Fastp {
     script:
     """
     fastp \
+        --thread ${task.cpus} \
         -i ${fastq[0]} \
         -o ${sample_id}${extension}.R1.fq \
         -I ${fastq[1]} \
diff --git a/modules/vep.nf b/modules/vep.nf
index ff331a7..d0d8ace 100644
--- a/modules/vep.nf
+++ b/modules/vep.nf
@@ -14,6 +14,7 @@ process AnnotationVEP {
 
     """
     vep \
+        --fork ${task.cpus} \
         -i ${vcf} \
         -o ${sample_id}${extension}.vcf \
         --cache \

From e6ae5123b8040b0d352bbe33106a7a280886c38f Mon Sep 17 00:00:00 2001
From: Jacques Dainat <jacques.dainat@ird.fr>
Date: Thu, 3 Apr 2025 18:13:30 +0200
Subject: [PATCH 04/12] I forgot to commit that one ^^

---
 modules/bcftools.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/bcftools.nf b/modules/bcftools.nf
index 961c5c2..297d942 100644
--- a/modules/bcftools.nf
+++ b/modules/bcftools.nf
@@ -12,6 +12,6 @@ process BCFtools_stats {
         tuple val(sample_id), file("${sample_id}${extension}.stats")
 
     """
-    bcftools stats ${file} > ${sample_id}${extension}.stats
+    bcftools stats --threads ${task.cpus} ${file} > ${sample_id}${extension}.stats
     """
 }

From aa6a728fe075ff253268aecac3361d19ee1591c3 Mon Sep 17 00:00:00 2001
From: Jacques Dainat <jacques.dainat@ird.fr>
Date: Thu, 3 Apr 2025 20:57:59 +0200
Subject: [PATCH 05/12] make genome ref as a channel to check early its
 existence to avoid proces crash and inform user at the very beginning

---
 MobiCT.nf          | 17 ++++++++++-------
 modules/bwamem.nf  |  3 ++-
 modules/gatk.nf    |  6 ++++--
 modules/picard.nf  |  3 ++-
 modules/vardict.nf |  3 ++-
 5 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/MobiCT.nf b/MobiCT.nf
index 259a139..33d0e0c 100644
--- a/MobiCT.nf
+++ b/MobiCT.nf
@@ -41,16 +41,19 @@ workflow {
     Channel.fromFilePairs(params.fastq, checkIfExists:true)
         .filter{ v -> v=~ params.filter_fastq}
         .set{read_pairs_fastq}
+    Channel.fromPath(params.ref, checkIfExists: true)
+            .ifEmpty { exit 1, "Cannot find genome matching ${params.ref}!\n" }
+            .set{genomeref}
 
     // 1. Preprocess deduplication
     ConvertFastqToSam(read_pairs_fastq, ".1.unmapped")
     ExtractUmis(ConvertFastqToSam.out, params.struct_r1, params.struct_r2, ".1.umi_extracted")
     ConvertSamToFastq(ExtractUmis.out, ".1.umi_extracted")
     Fastp(ConvertSamToFastq.out, ".1.umi_extracted.trimmed")
-    BWAmem(Fastp.out[0], "-t 10", ".1.umi_extracted.aligned")
+    BWAmem(Fastp.out[0], genomeref, "-t 10", ".1.umi_extracted.aligned")
 
     BWAmem.out.join(ExtractUmis.out).set{bams_umis}
-    MergeBam(bams_umis, ".1.merged")
+    MergeBam(bams_umis, genomeref, ".1.merged")
     UmiMergeFilt(MergeBam.out, ".1.filtered")
 
     // 2. Process deduplication
@@ -59,21 +62,21 @@ workflow {
 
     // 3. Post process deduplication
     RerunConvertSamToFastq(CallConsensus.out, ".3.unmapped")
-    RerunBWAmem(RerunConvertSamToFastq.out, "-t 10 -Y", ".3.consensus_mapped")
+    RerunBWAmem(RerunConvertSamToFastq.out, genomeref, "-t 10 -Y", ".3.consensus_mapped")
     SortConsensus(CallConsensus.out, ".3.unmapped")
     RerunSortConsensus(RerunBWAmem.out, ".3.mapped")
 
     RerunSortConsensus.out.join(SortConsensus.out).set{bams_consensus}
-    MergeBam2(bams_consensus, ".3.merged")
+    MergeBam2(bams_consensus, genomeref, ".3.merged")
 
     // 4. Variant Calling & Annotation
-    VarDict(MergeBam2.out, ".4.vardict")
+    VarDict(MergeBam2.out, genomeref, ".4.vardict")
     AnnotationVEP(VarDict.out, ".4.vardict.vep")
 
     // Quality Controls
     BedToIntervalList(params.dict, params.bed, params.bed.tokenize('.')[0].tokenize('/')[-1])
-    CollectHsMetrics(BWAmem.out, BedToIntervalList.out, ".QC.HsMetrics.1")
-    RerunCollectHsMetrics(RerunBWAmem.out, BedToIntervalList.out, ".QC.HsMetrics.3")
+    CollectHsMetrics(BWAmem.out, BedToIntervalList.out, genomeref, ".QC.HsMetrics.1")
+    RerunCollectHsMetrics(RerunBWAmem.out, BedToIntervalList.out, genomeref, ".QC.HsMetrics.3")
     BCFtools_stats(VarDict.out, ".QC.bcftools_stats")
     MultiQC(BCFtools_stats.out, ".QC.multiQC")
 
diff --git a/modules/bwamem.nf b/modules/bwamem.nf
index 17fded8..5a9f779 100644
--- a/modules/bwamem.nf
+++ b/modules/bwamem.nf
@@ -6,6 +6,7 @@ process BWAmem {
 
     input:
         tuple val(sample_id), path(fastq)
+        path genomeref
         val opt_bwa
         val extension
     
@@ -16,7 +17,7 @@ process BWAmem {
     bwa mem \
         -t ${task.cpus} \
         ${opt_bwa} \
-        -M ${params.ref} \
+        -M ${genomeref} \
         ${fastq[0]} \
         ${fastq[1]} \
         | \
diff --git a/modules/gatk.nf b/modules/gatk.nf
index 8050325..fc24f32 100644
--- a/modules/gatk.nf
+++ b/modules/gatk.nf
@@ -52,6 +52,7 @@ process MergeBam {
 
     input:
         tuple val(sample_id), path(bam_aligned), path(bam_unmapped)
+        path genomeref
         val extension
 
 
@@ -66,7 +67,7 @@ process MergeBam {
         --ALIGNED_BAM ${bam_aligned} \
         --UNMAPPED_BAM ${bam_unmapped} \
         --OUTPUT ${sample_id}${extension}.bam \
-        --REFERENCE_SEQUENCE ${params.ref} \
+        --REFERENCE_SEQUENCE ${genomeref} \
         --SORT_ORDER 'queryname' \
         --ALIGNED_READS_ONLY true \
         --MAX_INSERTIONS_OR_DELETIONS -1 \
@@ -109,6 +110,7 @@ process MergeBam2 {
 
     input:
         tuple val(sample_id), path(bam_aligned), path(bam_unmapped)
+        path genomeref
         val extension
 
     output:
@@ -121,7 +123,7 @@ process MergeBam2 {
         --ALIGNED_BAM ${bam_aligned} \
         --UNMAPPED_BAM ${bam_unmapped} \
         --OUTPUT ${sample_id}${extension}.bam \
-        --REFERENCE_SEQUENCE ${params.ref} \
+        --REFERENCE_SEQUENCE ${genomeref} \
         --SORT_ORDER coordinate \
         --ADD_MATE_CIGAR true \
         --MAX_INSERTIONS_OR_DELETIONS -1 \
diff --git a/modules/picard.nf b/modules/picard.nf
index 798e6d1..d37316f 100644
--- a/modules/picard.nf
+++ b/modules/picard.nf
@@ -27,6 +27,7 @@ process CollectHsMetrics {
     input:
         tuple val(sample_id), file(bam)
         file bed
+        path genomeref
         val extension
 
     output:
@@ -34,7 +35,7 @@ process CollectHsMetrics {
 
     """
     picard CollectHsMetrics \
-        --REFERENCE_SEQUENCE ${params.ref} \
+        --REFERENCE_SEQUENCE ${genomeref} \
         --BAIT_INTERVALS ${bed} \
         --TARGET_INTERVALS ${bed} \
         --INPUT ${bam} \
diff --git a/modules/vardict.nf b/modules/vardict.nf
index 8087217..c0a4335 100644
--- a/modules/vardict.nf
+++ b/modules/vardict.nf
@@ -5,6 +5,7 @@ process VarDict {
 
     input:
         tuple val(sample_id), path(bami)
+        path genomeref
         val extension
 
     output:
@@ -12,7 +13,7 @@ process VarDict {
 
     """
     vardict \
-        -G ${params.ref} \
+        -G ${genomeref} \
         -f 0.0005 \
         -N ${sample_id} \
         -b ${bami[1]} \

From 5fecc31ee8ded28a5311e6f9e4031b2dc8156d92 Mon Sep 17 00:00:00 2001
From: Jacques Dainat <jacques.dainat@ird.fr>
Date: Thu, 3 Apr 2025 20:58:34 +0200
Subject: [PATCH 06/12] add test for profile existence at the very beginning

---
 MobiCT.nf | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MobiCT.nf b/MobiCT.nf
index 33d0e0c..173be62 100644
--- a/MobiCT.nf
+++ b/MobiCT.nf
@@ -23,6 +23,14 @@ email = 's-cabelloaguilar@chu-montpellier.fr'
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
+// Check profiles
+if (
+    workflow.profile.contains('singularity') ||
+    workflow.profile.contains('docker') ||
+    workflow.profile.contains('conda')
+  ) { "executer selected" }
+else { exit 1, "No executer selected, please use: -profile docker/singularity/conda (choose one of the 3 possibilities)"}
+
 // Include modules
 include {ConvertFastqToSam; MergeBam; MergeBam2; SortConsensus; SortConsensus as RerunSortConsensus; ConvertSamToFastq; ConvertSamToFastq as RerunConvertSamToFastq } from "$baseDir/modules/gatk.nf"
 include {BedToIntervalList; CollectHsMetrics; CollectHsMetrics as RerunCollectHsMetrics;} from "$baseDir/modules/picard.nf"

From 341f9feb4a7095b60e8858e3d59e91a2007e8c9d Mon Sep 17 00:00:00 2001
From: Jacques Dainat <jacques.dainat@ird.fr>
Date: Tue, 8 Apr 2025 13:55:00 +0200
Subject: [PATCH 07/12] add pigz

---
 config/ressources/conda/pigz.yaml |  7 +++++++
 modules/pigz.nf                   | 29 +++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)
 create mode 100644 config/ressources/conda/pigz.yaml
 create mode 100644 modules/pigz.nf

diff --git a/config/ressources/conda/pigz.yaml b/config/ressources/conda/pigz.yaml
new file mode 100644
index 0000000..e18ebd6
--- /dev/null
+++ b/config/ressources/conda/pigz.yaml
@@ -0,0 +1,7 @@
+name: gatk4
+channels:
+  - conda-forge
+  - bioconda
+
+dependencies:
+  - pigz=2.8
\ No newline at end of file
diff --git a/modules/pigz.nf b/modules/pigz.nf
new file mode 100644
index 0000000..25bb466
--- /dev/null
+++ b/modules/pigz.nf
@@ -0,0 +1,29 @@
+//
+process DealWithRef {
+    tag "$genome_fasta"
+    label 'pigz'
+ 
+    input:
+        path(genome_fasta)
+
+    output:
+        path "*.fa", emit: genomerefok
+
+    script:
+        genomeReady = genome_fasta.baseName.replaceAll(/\.(gz)$/, '') // remove .gz
+        genomeReady = genomeReady.replaceAll(/\.(fasta|fa)$/, '') // remove .fasta or .fa
+        genomeFa = genomeReady + ".fa"
+    """
+        
+
+        # DEALING WITH GZIPPED FILES to uncompress if needed
+        extension=\$(echo ${genome_fasta} | awk -F . '{print \$NF}')
+
+        if [[ \${extension} == "gz" ]];then
+            pigz -dck -p ${task.cpus} ${genome_fasta} > ${genomeFa}
+        else
+            # copy fa
+            ln -s ${genome_fasta} ${genomeFa}
+        fi
+    """
+}
\ No newline at end of file

From cb5874f10b0a53079dc9b2bc0b58d106889f46ee Mon Sep 17 00:00:00 2001
From: Jacques Dainat <jacques.dainat@ird.fr>
Date: Tue, 8 Apr 2025 13:58:57 +0200
Subject: [PATCH 08/12] add CI with config for 2 cpus

---
 .github/workflows/ci.yml    | 62 +++++++++++++++++++++++++++++++++++++
 config/ressources/ci.config | 56 +++++++++++++++++++++++++++++++++
 2 files changed, 118 insertions(+)
 create mode 100644 .github/workflows/ci.yml
 create mode 100644 config/ressources/ci.config

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..fff23c8
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,62 @@
+# This is a basic workflow to help you get started with Actions
+
+name: CI
+
+# Controls when the action will run. 
+# Triggers the workflow on push or pull request events
+on: [push, pull_request, workflow_dispatch]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+# A workflow run is made up of one or more jobs that can run sequentially or in parallel
+jobs:  
+
+  # This workflow contains a second job called "build2"
+  test_MobiCT:
+    # avoid to run twice push and PR
+    if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name != github.event.pull_request.base.repo.full_name
+
+    # The type of runner that the job will run on
+    runs-on: ubuntu-24.04
+    services:
+      docker:
+        image: docker:dind
+        options: --privileged --shm-size=2g
+        volumes:
+          - /var/run/docker.sock:/var/run/docker.sock:ro
+
+    # Steps represent a sequence of tasks that will be executed as part of the job
+    steps:
+      # Install/cache OpenJDK
+      - name: Cache OpenJDK
+        id: cache-openjdk
+        uses: actions/cache@v3
+        with:
+          path: /usr/lib/jvm/java-11-openjdk-amd64
+          key: ${{ runner.os }}-openjdk-11
+          restore-keys: |
+            ${{ runner.os }}-openjdk-11
+      - name: Install openjdk
+        if: ${{ steps.cache-openjdk.outputs.cache-hit != 'true' }}
+        run: sudo apt-get install openjdk-11-jdk
+      # Install/cache nextflow
+      - name: Cache nextflow
+        id: cache-nextflow
+        uses: actions/cache@v3
+        with:
+          path: /usr/local/bin/nextflow
+          key: ${{ runner.os }}-nextflow
+          restore-keys: |
+            ${{ runner.os }}-nextflow
+      - name: Install Nextflox
+        if: ${{ steps.cache-nextflow.outputs.cache-hit != 'true' }}
+        run: curl -s https://get.nextflow.io | bash && mv nextflow /usr/local/bin && chmod +x /usr/local/bin/nextflow
+        
+      # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
+      - uses: actions/checkout@v3
+      # Run tests sequentialy
+      - name: test 
+        run: nextflow run -ansi-log -profile docker,test,ci MobiCT.nf
+
diff --git a/config/ressources/ci.config b/config/ressources/ci.config
new file mode 100644
index 0000000..8c79032
--- /dev/null
+++ b/config/ressources/ci.config
@@ -0,0 +1,56 @@
+process {
+    cpus = 1
+    time = '1h'
+    maxForks = 8
+    shell = ['/bin/bash', '-euo', 'pipefail']
+    stageOutMode = 'rsync'
+
+    withLabel: 'bcftools' {
+        cpus = 2
+        time = '1h'
+    }
+    withLabel: 'BwaIndex' {
+        cpus = 1
+        time = '4h'
+    }
+    withLabel: 'BWAmem' {
+        cpus = 2
+        time = '4h'
+    }
+    withLabel: 'fastp' {
+        cpus = 2
+        time = '4h'
+    }
+    withLabel: 'fgbio' {
+        cpus = 2
+        time = '4h'
+    }
+    withLabel: 'gatk4' {
+        cpus = 2
+        time = '4h'
+    }
+    withLabel: 'pigz' {
+        cpus = 2
+        time = '4h'
+    }
+    withLabel: 'multiqc' {
+        cpus = 2
+        time = '2h'
+    }
+    withLabel: 'picard' {
+        cpus = 2
+        time = '1h'
+    }
+    withLabel: 'samtools' {
+        cpus = 2
+        time = '4h'
+    }
+    withLabel: 'vardict' {
+        cpus = 1
+        time = '4h'
+    }
+    withLabel: 'vep' {
+        cpus = 2
+        time = '4h'
+    }
+}

From dfa02ce03b9cf3646c0f582d75e6c9fdb4a9c5c9 Mon Sep 17 00:00:00 2001
From: Jacques Dainat <jacques.dainat@ird.fr>
Date: Tue, 8 Apr 2025 14:01:13 +0200
Subject: [PATCH 09/12] simplify options, deal with gz ref, use channesl more
 than params

---
 MobiCT.nf                            | 58 +++++++++++++++++-----------
 config/ressources/conda/vardict.yaml |  2 +-
 config/ressources/hpc.config         | 12 +++++-
 config/ressources/local.config       | 14 +++++--
 config/softwares.config              |  9 ++++-
 modules/bwamem.nf                    | 26 +++++++++++--
 modules/fgbio.nf                     | 29 ++++++++------
 modules/gatk.nf                      |  3 +-
 modules/multiqc.nf                   | 17 +-------
 modules/picard.nf                    | 15 +++++++
 modules/samtools.nf                  | 34 ++++++++++++++++
 modules/vardict.nf                   | 12 +++---
 modules/vep.nf                       | 10 ++---
 nextflow.config                      | 27 +++++++------
 14 files changed, 182 insertions(+), 86 deletions(-)

diff --git a/MobiCT.nf b/MobiCT.nf
index 173be62..0106c7b 100644
--- a/MobiCT.nf
+++ b/MobiCT.nf
@@ -33,13 +33,14 @@ else { exit 1, "No executer selected, please use: -profile docker/singularity/co
 
 // Include modules
 include {ConvertFastqToSam; MergeBam; MergeBam2; SortConsensus; SortConsensus as RerunSortConsensus; ConvertSamToFastq; ConvertSamToFastq as RerunConvertSamToFastq } from "$baseDir/modules/gatk.nf"
-include {BedToIntervalList; CollectHsMetrics; CollectHsMetrics as RerunCollectHsMetrics;} from "$baseDir/modules/picard.nf"
-include {BWAmem; BWAmem as RerunBWAmem;} from "$baseDir/modules/bwamem.nf"
+include {CreateSequenceDictionary; BedToIntervalList; CollectHsMetrics; CollectHsMetrics as RerunCollectHsMetrics;} from "$baseDir/modules/picard.nf"
+include {BwaIndex; BWAmem; BWAmem as RerunBWAmem;} from "$baseDir/modules/bwamem.nf"
 include {BCFtools_stats} from "$baseDir/modules/bcftools.nf"
 include {Fastp} from "$baseDir/modules/fastp.nf"
 include {ExtractUmis; GroupReads; CallConsensus; } from "$baseDir/modules/fgbio.nf"
-include {MultiQC; MultiQC_ALL; } from "$baseDir/modules/multiqc.nf"
-include {UmiMergeFilt} from "$baseDir/modules/samtools.nf"
+include {MultiQC; MultiQC as MultiQC_ALL; } from "$baseDir/modules/multiqc.nf"
+include {Faidx; UmiMergeFilt; Sam2bam; Sam2bam as ReSam2bam} from "$baseDir/modules/samtools.nf"
+include {DealWithRef} from "$baseDir/modules/pigz.nf"
 include {VarDict} from "$baseDir/modules/vardict.nf"
 include {AnnotationVEP} from "$baseDir/modules/vep.nf"
 
@@ -53,45 +54,56 @@ workflow {
             .ifEmpty { exit 1, "Cannot find genome matching ${params.ref}!\n" }
             .set{genomeref}
 
+
+    // htslib-based tools (e.g., samtools, bcftools) Require BGZF-compressed FASTA and  Use .fai index (samtools faidx)
+    DealWithRef(genomeref.collect())
+    DealWithRef.out.genomerefok.set{genomerefok}
+
+    Faidx(genomerefok).set{genome_fai}
+
+    // Create the genome index
+    BwaIndex(genomerefok)
+
+    // Picard/GATK can use gzipped compressed fasta but need a .dict file 
+    // Create the genome dictionary
+    CreateSequenceDictionary(genomerefok)
+        .set{genome_dict}
+
     // 1. Preprocess deduplication
     ConvertFastqToSam(read_pairs_fastq, ".1.unmapped")
     ExtractUmis(ConvertFastqToSam.out, params.struct_r1, params.struct_r2, ".1.umi_extracted")
     ConvertSamToFastq(ExtractUmis.out, ".1.umi_extracted")
     Fastp(ConvertSamToFastq.out, ".1.umi_extracted.trimmed")
-    BWAmem(Fastp.out[0], genomeref, "-t 10", ".1.umi_extracted.aligned")
+    BWAmem(Fastp.out[0], genomerefok, BwaIndex.out, "", ".1.umi_extracted.aligned")
+    Sam2bam(BWAmem.out.tuple_sample_sam)
 
-    BWAmem.out.join(ExtractUmis.out).set{bams_umis}
-    MergeBam(bams_umis, genomeref, ".1.merged")
+    Sam2bam.out.join(ExtractUmis.out).set{bams_umis}
+    MergeBam(bams_umis, genomerefok, genome_dict, ".1.merged")
     UmiMergeFilt(MergeBam.out, ".1.filtered")
 
     // 2. Process deduplication
     GroupReads(UmiMergeFilt.out, ".2.umi_grouped")
-    CallConsensus(GroupReads.out.nextout, ".2.consensus_unmapped")
+    CallConsensus(GroupReads.out.nextout, genome_fai, ".2.consensus_unmapped")
 
     // 3. Post process deduplication
     RerunConvertSamToFastq(CallConsensus.out, ".3.unmapped")
-    RerunBWAmem(RerunConvertSamToFastq.out, genomeref, "-t 10 -Y", ".3.consensus_mapped")
+    RerunBWAmem(RerunConvertSamToFastq.out, genomerefok, BwaIndex.out, " -Y", ".3.consensus_mapped")
     SortConsensus(CallConsensus.out, ".3.unmapped")
-    RerunSortConsensus(RerunBWAmem.out, ".3.mapped")
+    ReSam2bam(RerunBWAmem.out.tuple_sample_sam)
+    RerunSortConsensus(ReSam2bam.out, ".3.mapped")
 
     RerunSortConsensus.out.join(SortConsensus.out).set{bams_consensus}
-    MergeBam2(bams_consensus, genomeref, ".3.merged")
+    MergeBam2(bams_consensus, genomerefok, genome_dict, ".3.merged")
 
     // 4. Variant Calling & Annotation
-    VarDict(MergeBam2.out, genomeref, ".4.vardict")
-    AnnotationVEP(VarDict.out, ".4.vardict.vep")
+    VarDict(MergeBam2.out, genomerefok, genome_fai, params.bed,".4.vardict")
+    AnnotationVEP(VarDict.out, genomerefok, ".4.vardict.vep")
 
     // Quality Controls
-    BedToIntervalList(params.dict, params.bed, params.bed.tokenize('.')[0].tokenize('/')[-1])
-    CollectHsMetrics(BWAmem.out, BedToIntervalList.out, genomeref, ".QC.HsMetrics.1")
-    RerunCollectHsMetrics(RerunBWAmem.out, BedToIntervalList.out, genomeref, ".QC.HsMetrics.3")
+    BedToIntervalList(genome_dict, params.bed, params.bed.tokenize('.')[0].tokenize('/')[-1])
+    CollectHsMetrics(Sam2bam.out, BedToIntervalList.out, genomerefok, genome_fai, ".QC.HsMetrics.1")
+    RerunCollectHsMetrics(ReSam2bam.out, BedToIntervalList.out, genomerefok, genome_fai, ".QC.HsMetrics.3")
     BCFtools_stats(VarDict.out, ".QC.bcftools_stats")
     MultiQC(BCFtools_stats.out, ".QC.multiQC")
-
-    Channel.empty()
-        .mix( MultiQC.out )
-        .map { sample, files -> files }
-        .collect()
-        .set { log_files }
-    MultiQC_ALL(log_files, "all.QC.multiQC")
+    MultiQC_ALL(BCFtools_stats.out.collect(), "all.QC.multiQC")
 }
diff --git a/config/ressources/conda/vardict.yaml b/config/ressources/conda/vardict.yaml
index 9af8e25..b6ab698 100644
--- a/config/ressources/conda/vardict.yaml
+++ b/config/ressources/conda/vardict.yaml
@@ -4,4 +4,4 @@ channels:
   - bioconda
 
 dependencies:
-  - vardict=2018.10.18
\ No newline at end of file
+  - vardict-java=1.8.3
\ No newline at end of file
diff --git a/config/ressources/hpc.config b/config/ressources/hpc.config
index 785ecc4..1be18d1 100644
--- a/config/ressources/hpc.config
+++ b/config/ressources/hpc.config
@@ -9,7 +9,11 @@ process {
         cpus = 4
         time = '1h'
     }
-    withLabel: 'bwamem' {
+    withName: 'BwaIndex' {
+        cpus = 1
+        time = '4h'
+    }
+    withName: 'BWAmem' {
         cpus = 16
         time = '4h'
     }
@@ -25,6 +29,10 @@ process {
         cpus = 4
         time = '4h'
     }
+    withLabel: 'pigz' {
+        cpus = 8
+        time = '4h'
+    }
     withName: 'multiqc' {
         cpus = 4
         time = '2h'
@@ -38,7 +46,7 @@ process {
         time = '4h'
     }
     withLabel: 'vardict' {
-        cpus = 2
+        cpus = 1
         time = '4h'
     }
     withLabel: 'vep' {
diff --git a/config/ressources/local.config b/config/ressources/local.config
index 9774c2c..5deefbc 100644
--- a/config/ressources/local.config
+++ b/config/ressources/local.config
@@ -9,7 +9,11 @@ process {
         cpus = 4
         time = '1h'
     }
-    withLabel: 'bwamem' {
+    withName: 'BwaIndex' {
+        cpus = 1
+        time = '4h'
+    }
+    withName: 'BWAmem' {
         cpus = 4
         time = '4h'
     }
@@ -25,7 +29,11 @@ process {
         cpus = 4
         time = '4h'
     }
-    withName: 'multiqc' {
+    withLabel: 'pigz' {
+        cpus = 4
+        time = '4h'
+    }
+    withLabel: 'multiqc' {
         cpus = 4
         time = '2h'
     }
@@ -38,7 +46,7 @@ process {
         time = '4h'
     }
     withLabel: 'vardict' {
-        cpus = 2
+        cpus = 1
         time = '4h'
     }
     withLabel: 'vep' {
diff --git a/config/softwares.config b/config/softwares.config
index a3564c8..05bbf04 100644
--- a/config/softwares.config
+++ b/config/softwares.config
@@ -34,6 +34,11 @@ process {
         container = { singularity.enabled ? 'https://depot.galaxyproject.org/singularity/picard:3.3.0--hdfd78af_0' : 
         docker.enabled ? 'quay.io/biocontainers/picard:3.3.0--hdfd78af_0' : '' }
     }
+    withLabel: 'pigz' {
+        conda = { singularity.enabled || docker.enabled ? '' : "$baseDir/config/ressources/conda/pigz.yaml" }
+        container = { singularity.enabled ? 'https://depot.galaxyproject.org/singularity/pigz:2.8' : 
+        docker.enabled ? 'quay.io/biocontainers/pigz:2.8' : '' }
+  	}
     withLabel: 'samtools' {
         conda = { singularity.enabled || docker.enabled ? '' : "$baseDir/config/ressources/conda/samtools.yaml" }
         container = { singularity.enabled ? 'https://depot.galaxyproject.org/singularity/samtools:1.6--h5fe306e_11' : 
@@ -41,8 +46,8 @@ process {
     }
     withLabel: 'vardict' {
         conda = { singularity.enabled || docker.enabled ? '' : "$baseDir/config/ressources/conda/vardict.yaml" }
-        container = { singularity.enabled ? 'https://depot.galaxyproject.org/singularity/vardict:2018.10.18--pl526_1' : 
-        docker.enabled ? 'quay.io/biocontainers/vardict:2018.10.18--pl526_1' : '' }
+        container = { singularity.enabled ? 'https://depot.galaxyproject.org/singularity/vardict-java:1.8.3--hdfd78af_0' : 
+        docker.enabled ? 'quay.io/biocontainers/vardict-java:1.8.3--hdfd78af_0' : '' }
     }
     withLabel: 'vep' {
         conda = { singularity.enabled || docker.enabled ? '' : "$baseDir/config/ressources/conda/ensembl-vep.yaml" }
diff --git a/modules/bwamem.nf b/modules/bwamem.nf
index 5a9f779..ccf6d8b 100644
--- a/modules/bwamem.nf
+++ b/modules/bwamem.nf
@@ -1,3 +1,19 @@
+process BwaIndex {
+    label 'bwamem'
+    tag "$genome_fasta"
+
+    input:
+        path(genome_fasta)
+
+    output:
+        path("*")
+
+    script:
+        """
+        bwa index $genome_fasta -p ${genome_fasta.baseName}
+        """
+}
+
 // Alignement before deduplication: Align quality and adapter trimmed reads to
 // the reference genome
 process BWAmem {
@@ -7,20 +23,22 @@ process BWAmem {
     input:
         tuple val(sample_id), path(fastq)
         path genomeref
+        path bwa_index_files
         val opt_bwa
         val extension
     
     output:
-        tuple val(sample_id), file("${sample_id}${extension}.bam")
+        tuple val(sample_id), file("${sample_id}${extension}.sam"), emit: tuple_sample_sam
+        path "*.log",  emit: bwamem_summary
 
     """
     bwa mem \
         -t ${task.cpus} \
         ${opt_bwa} \
-        -M ${genomeref} \
+        -M \
+        ${genomeref.baseName} \
         ${fastq[0]} \
         ${fastq[1]} \
-        | \
-        samtools view -bh -o ${sample_id}${extension}.bam
+        > ${sample_id}${extension}.sam 2> ${sample_id}${extension}.log 
     """
 }
\ No newline at end of file
diff --git a/modules/fgbio.nf b/modules/fgbio.nf
index cd9b802..227a1b7 100644
--- a/modules/fgbio.nf
+++ b/modules/fgbio.nf
@@ -12,6 +12,7 @@ process ExtractUmis {
     output:
         tuple val(sample_id), file("${sample_id}${extension}.bam")
 
+    script:
     """
     fgbio ExtractUmisFromBam \
         -i ${bam_file} \
@@ -26,7 +27,7 @@ process ExtractUmis {
 // The user can control how many errors/mismatches are allowed in the UMI sequence when assigning source molecules (--edits=n).
 process GroupReads {
     tag "$sample_id"
-
+    label 'fgbio'
     publishDir "${params.outdir}/${sample_id}", mode: 'copy', overwrite: true, pattern: '*.txt'
 
     input:
@@ -37,6 +38,7 @@ process GroupReads {
         tuple val(sample_id), file("${sample_id}${extension}.bam"), emit: nextout
         file "${sample_id}.QC.family_size_counts.txt"
 
+    script:
     """
     fgbio GroupReadsByUmi \
         -i ${bam} \
@@ -54,23 +56,26 @@ process GroupReads {
 // to 1, in so doing the single read will be considered the consensus.
 process CallConsensus {
     tag "$sample_id"
+    label 'fgbio'
 
     input:
         tuple val(sample_id), path(bam)
+        path genome_fai
         val extension
 
     output:
         tuple val(sample_id), file("${sample_id}${extension}.bam")
 
-    """
-    fgbio CallMolecularConsensusReads \
-        -i ${bam} \
-        -o ${sample_id}${extension}.bam \
-        --error-rate-post-umi 40 \
-        --error-rate-pre-umi 45 \
-        --output-per-base-tags false \
-        --min-reads 2 \
-        --max-reads 50 \
-        --read-name-prefix='consensus'
-    """
+    script:
+        """
+        fgbio CallMolecularConsensusReads \
+            -i ${bam} \
+            -o ${sample_id}${extension}.bam \
+            --error-rate-post-umi 40 \
+            --error-rate-pre-umi 45 \
+            --output-per-base-tags false \
+            --min-reads 2 \
+            --max-reads 50 \
+            --read-name-prefix='consensus'
+        """
 }
\ No newline at end of file
diff --git a/modules/gatk.nf b/modules/gatk.nf
index fc24f32..10f00cd 100644
--- a/modules/gatk.nf
+++ b/modules/gatk.nf
@@ -16,7 +16,6 @@ process ConvertFastqToSam {
         --FASTQ2 ${fastq[1]} \
         --OUTPUT ${sample_id}${extension}.bam \
         --SAMPLE_NAME ${sample_id} \
-        --TMP_DIR ${params.tmp_dir}
     """
 }
 
@@ -53,6 +52,7 @@ process MergeBam {
     input:
         tuple val(sample_id), path(bam_aligned), path(bam_unmapped)
         path genomeref
+        path genome_dict
         val extension
 
 
@@ -111,6 +111,7 @@ process MergeBam2 {
     input:
         tuple val(sample_id), path(bam_aligned), path(bam_unmapped)
         path genomeref
+        path genome_dict
         val extension
 
     output:
diff --git a/modules/multiqc.nf b/modules/multiqc.nf
index 2c10719..d1627d1 100644
--- a/modules/multiqc.nf
+++ b/modules/multiqc.nf
@@ -14,21 +14,6 @@ process MultiQC {
         tuple val(sample_id), file("${sample_id}${extension}")
 
     """
-    multiqc ${params.outdir}/${sample_id} -o ${sample_id}${extension}
-    """
-}
-
-process MultiQC_ALL {
-    publishDir "${params.outdir}", mode: 'copy', overwrite: true
-
-    input:
-        tuple val(sample_id), path(file)
-        val extension
-
-    output:
-        file("${extension}")
-
-    """
-    multiqc ${params.outdir} -o ${extension}
+    multiqc -p . -o ${sample_id}${extension}
     """
 }
\ No newline at end of file
diff --git a/modules/picard.nf b/modules/picard.nf
index d37316f..e638538 100644
--- a/modules/picard.nf
+++ b/modules/picard.nf
@@ -1,3 +1,17 @@
+process CreateSequenceDictionary {
+    label 'picard'
+
+    input:
+        path genome
+
+    output:
+        file "*.dict"
+    
+    """
+    picard CreateSequenceDictionary R=${genome} O=${genome}.dict
+    """
+}
+
 process BedToIntervalList {
     label 'picard'
 
@@ -28,6 +42,7 @@ process CollectHsMetrics {
         tuple val(sample_id), file(bam)
         file bed
         path genomeref
+        path genome_fai
         val extension
 
     output:
diff --git a/modules/samtools.nf b/modules/samtools.nf
index 435f7c9..6954802 100644
--- a/modules/samtools.nf
+++ b/modules/samtools.nf
@@ -1,3 +1,19 @@
+//
+process Faidx {
+    tag "$genome_fasta"
+    label 'samtools'
+
+    input:
+        path(genome_fasta)
+
+    output:
+        path("*")
+
+    """
+    samtools faidx $genome_fasta
+    """
+}
+
 // 
 process UmiMergeFilt {
     tag "$sample_id"
@@ -16,4 +32,22 @@ process UmiMergeFilt {
         -bh ${bam} \
         > ${sample_id}${extension}.bam
     """
+}
+
+process Sam2bam {
+    label 'samtools'
+    tag "$sample"
+
+    input:
+        tuple val(sample), path(sam)
+
+    output:
+        tuple val(sample), path ("*.bam"), emit: tuple_sample_bam
+
+    script:
+
+        """
+            samtools view -@ ${task.cpus} ${sam} -bh -o ${sam.baseName}.bam 
+        """
+
 }
\ No newline at end of file
diff --git a/modules/vardict.nf b/modules/vardict.nf
index c0a4335..f6e6e46 100644
--- a/modules/vardict.nf
+++ b/modules/vardict.nf
@@ -1,18 +1,20 @@
 // Variant calling step using vardict
 process VarDict {
     tag "$sample_id"
-    label 'verdict'
+    label 'vardict'
 
     input:
         tuple val(sample_id), path(bami)
         path genomeref
+        path genomeref_fai
+        path bed
         val extension
 
     output:
         tuple val(sample_id), file("${sample_id}${extension}.vcf")
 
     """
-    vardict \
+    vardict-java \
         -G ${genomeref} \
         -f 0.0005 \
         -N ${sample_id} \
@@ -20,8 +22,8 @@ process VarDict {
         -c 1 \
         -S 2 \
         -E 3 \
-        -g 4 ${params.bed} \
-        | $params.teststrandbias \
-        | $params.var2vcf > ${sample_id}${extension}.vcf
+        -g 4 ${bed} \
+        | teststrandbias.R \
+        | var2vcf_valid.pl > ${sample_id}${extension}.vcf
     """
 }
diff --git a/modules/vep.nf b/modules/vep.nf
index d0d8ace..c95075e 100644
--- a/modules/vep.nf
+++ b/modules/vep.nf
@@ -7,19 +7,20 @@ process AnnotationVEP {
 
     input:
         tuple val(sample_id), path(vcf)
+        path genomeref
         val extension
     
     output:
          tuple val(sample_id), file("${sample_id}${extension}.vcf")
 
+    script:
+        def config = params.cache?.trim() ? "--offline --cache --dir_cache ${params.cache} --max_af " : "--database" 
     """
     vep \
+        ${config} \
         --fork ${task.cpus} \
         -i ${vcf} \
         -o ${sample_id}${extension}.vcf \
-        --cache \
-        --dir_cache ${params.cache} \
-        --offline \
         --force_overwrite \
         --vcf \
         --numbers \
@@ -27,7 +28,6 @@ process AnnotationVEP {
         --symbol \
         --hgvs \
         --canonical \
-        --max_af \
-        --fasta ${params.fasta}
+        --fasta ${genomeref}
     """
 }
\ No newline at end of file
diff --git a/nextflow.config b/nextflow.config
index f46ef73..b5f4e35 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -12,23 +12,21 @@ manifest {
 dag.overwrite = true
 
 params {
-    struct_r1      = "5M2S+T"
-    struct_r2      = "5M2S+T"
-    ref            = "/path/to/referenceGenome.fasta"
-    dict           = "/path/to/referenceGenome.dict"
+    // main params
+    ref            = "/path/to/referenceGenome.fasta" // If you’re using a VEP cache (e.g. Ensembl’s), use the matching reference FASTA from Ensembl
     fastq          = "/path/to/fastq/files/*_{R1,R2}_001.fastq.gz"
-    filter_fastq   = /^((?!Undetermined).)*$/
     outdir         = "/path/to/outdir"
     bed            = "/path/to/bedFile" 
-    teststrandbias = "/path/to/teststrandbias.R/used/by/vardict" 
-    var2vcf        = "/path/to/var2vcf_valid.pl/used/by/vardict" 
-    cache          = "/path/to/cacheVEP"
-    fasta          = "/path/to/fastaVEP"
+    cache          = "" // Path to the VEP cache. If you’re using a VEP cache, use the matching reference FASTA from Ensembl:
+
+    // secondary params
+    struct_r1      = "5M2S+T"
+    struct_r2      = "5M2S+T"
+    filter_fastq   = /^((?!Undetermined).)*$/
     pipeline_report="pipeline_report"
 }
 
 profiles {
-
     slurm {
         process.executor = 'slurm'
     }
@@ -47,13 +45,18 @@ profiles {
         conda.enabled = true
         includeConfig "$baseDir/config/softwares.config"
     }
+    local {
+        includeConfig "$baseDir/config/ressources/local.config"
+    }
+    ci {
+        includeConfig "$baseDir/config/ressources/ci.config"
+    }
     test{
         params {
             bed    = "$baseDir/Example_data/panel_lung.bed"
             fastq  = "$baseDir/Example_data/*_{R1,R2}_001.fastq.gz"
             outdir = "result_test"
-            ref    = "/path/to/referenceGenome.fasta"
-            dict   = "/path/to/referenceGenome.dict"
+            ref    = "https://ftp.ensembl.org/pub/release-113/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.chromosome.7.fa.gz"
         }
     }
 }

From 2c48e98befbc12475f8dbec8b1daa92b7bff3843 Mon Sep 17 00:00:00 2001
From: Jacques Dainat <jacques.dainat@ird.fr>
Date: Tue, 8 Apr 2025 14:11:08 +0200
Subject: [PATCH 10/12] fix ID to be similar to ensembl

---
 Example_data/panel_lung.bed | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/Example_data/panel_lung.bed b/Example_data/panel_lung.bed
index 621c4a5..fe0c88b 100644
--- a/Example_data/panel_lung.bed
+++ b/Example_data/panel_lung.bed
@@ -1,8 +1,5 @@
-chr12	25245268	25245388	NM_033360	1
-chr15	90088586	90088747	NM_002168	1
-chr2	208248368	208248660	NM_005896	1
-chr7	55173921	55174041	NM_005228	1
-chr7	55174711	55174831	NM_005228	1
-chr7	55181292	55181478	NM_005228	1
-chr7	55191718	55191874	NM_005228	1
-chr7	140753274	140753394	NM_004333	1
+7	55173921	55174041	NM_005228	1
+7	55174711	55174831	NM_005228	1
+7	55181292	55181478	NM_005228	1
+7	55191718	55191874	NM_005228	1
+7	140753274	140753394	NM_004333	1

From 9d9da31d6bb0b1e515f3e1dcda25cabe6ed0d3d4 Mon Sep 17 00:00:00 2001
From: Jacques Dainat <jacques.dainat@ird.fr>
Date: Tue, 8 Apr 2025 14:11:40 +0200
Subject: [PATCH 11/12] fix maxfork

---
 config/ressources/ci.config    | 2 +-
 config/ressources/local.config | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/config/ressources/ci.config b/config/ressources/ci.config
index 8c79032..968c27c 100644
--- a/config/ressources/ci.config
+++ b/config/ressources/ci.config
@@ -1,7 +1,7 @@
 process {
     cpus = 1
     time = '1h'
-    maxForks = 8
+    maxForks = 2
     shell = ['/bin/bash', '-euo', 'pipefail']
     stageOutMode = 'rsync'
 
diff --git a/config/ressources/local.config b/config/ressources/local.config
index 5deefbc..327eeca 100644
--- a/config/ressources/local.config
+++ b/config/ressources/local.config
@@ -1,7 +1,7 @@
 process {
     cpus = 1
     time = '1h'
-    maxForks = 8
+    maxForks = 4
     shell = ['/bin/bash', '-euo', 'pipefail']
     stageOutMode = 'rsync'
 

From 18a5fe1146af26d6cc69859b00a309719f6ff3f3 Mon Sep 17 00:00:00 2001
From: Jacques Dainat <jacques.dainat@ird.fr>
Date: Tue, 8 Apr 2025 14:12:07 +0200
Subject: [PATCH 12/12] add comment

---
 nextflow.config | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/nextflow.config b/nextflow.config
index b5f4e35..c539348 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -45,9 +45,11 @@ profiles {
         conda.enabled = true
         includeConfig "$baseDir/config/softwares.config"
     }
+     // Use maximum 4 cpu per process
     local {
         includeConfig "$baseDir/config/ressources/local.config"
     }
+    // Use maximum 2 cpu per process
     ci {
         includeConfig "$baseDir/config/ressources/ci.config"
     }