From 16ecb0f83e50fe82a85fc503477b28f47a0cabd6 Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Wed, 25 Jun 2025 13:54:58 +0200 Subject: [PATCH 1/7] rename multiqc config file --- .../{multiqc_conf.yml => multiqc_config.yaml} | 25 ++++--------------- 1 file changed, 5 insertions(+), 20 deletions(-) rename config/{multiqc_conf.yml => multiqc_config.yaml} (52%) diff --git a/config/multiqc_conf.yml b/config/multiqc_config.yaml similarity index 52% rename from config/multiqc_conf.yml rename to config/multiqc_config.yaml index 39b2f4d..5e0f92a 100644 --- a/config/multiqc_conf.yml +++ b/config/multiqc_config.yaml @@ -2,28 +2,16 @@ title: "RAIN report" run_modules: - fastqc - - bowtie2 - - bwa - - bwa-mem - - hisat2 - - star - - picard/markdups + - picard module_order: - - fastqc: - name: FastQC (raw) - path_filters: - - "*logs_raw*" - - bowtie2 - - bwa - - bwa-mem - - hisat2 - - star - fastqc: name: FastQC (ali) path_filters: - "*logs_ali*" - - picard/markdups + - picard: + path_filters: + - "*marked_dup_metrics.txt" - fastqc: name: FastQC (markDuplicates) path_filters: @@ -31,7 +19,4 @@ module_order: - fastqc: name: FastQC (ClipOverlap) path_filters: - - "*logs_clip*" -sp: - picard/markdups: - fn: "*marked_dup_metrics.txt" \ No newline at end of file + - "*logs_clip*" \ No newline at end of file From 3ad6bbd4b9d57903b4f68d7eba69bd0459027cc2 Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Wed, 25 Jun 2025 14:31:46 +0200 Subject: [PATCH 2/7] new multiqc version. remove -p option make it faster... --- config/softwares.config | 4 +++- modules/multiqc.nf | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/config/softwares.config b/config/softwares.config index b01edd1..70c204e 100644 --- a/config/softwares.config +++ b/config/softwares.config @@ -52,7 +52,9 @@ process { container = singularity.enabled ? "${params.sifPath}/sapin.sif" : "sapin" } withLabel: 'multiqc' { - container = 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' + // container = 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' + //container = 'quay.io/biocontainers/multiqc:1.27--pyhdfd78af_0' + container = 'quay.io/biocontainers/multiqc:1.28--pyhdfd78af_0' } withLabel: "pluviometer" { container = singularity.enabled ? "${params.sifPath}/pluviometer.sif" : "pluviometer" diff --git a/modules/multiqc.nf b/modules/multiqc.nf index 2d9c59a..1c4c34e 100644 --- a/modules/multiqc.nf +++ b/modules/multiqc.nf @@ -12,6 +12,6 @@ process multiqc { script: """ - multiqc -p . -c ${multiqc_config} + multiqc . -c ${multiqc_config} """ } \ No newline at end of file From 58bc52f1945006407ef52be67adf596a433143fb Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Wed, 25 Jun 2025 14:32:21 +0200 Subject: [PATCH 3/7] add fastqc params --- rain.nf | 66 ++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 40 insertions(+), 26 deletions(-) diff --git a/rain.nf b/rain.nf index b55788f..4170a1f 100644 --- a/rain.nf +++ b/rain.nf @@ -9,6 +9,8 @@ import java.nio.file.* // STEP 0 - parameters //************************************************* +/* ---- Params specific to RAIN ---- */ + // Input/output params params.reads = null // "/path/to/reads_{1,2}.fastq.gz/or/folder" params.genome = null // "/path/to/genome.fa" @@ -16,20 +18,24 @@ params.annotation = null // "/path/to/annotations.gff3" params.outdir = "rain_result" params.clipoverlap = false -/* Specific AliNe params (some are shared with RAIN)*/ +// Edit counting params +edit_site_tools = ["reditools2", "reditools3", "jacusa2", "sapin"] +params.edit_site_tool = "reditools3" +params.edit_threshold = 1 +params.aggregation_mode = "all" +params.multiqc_config = "$baseDir/config/multiqc_config.yaml" // MultiQC config file + + +/* ---- Params shared between RAIN and AliNe ---- */ // Read feature params read_type_allowed = [ 'short_paired', 'short_single', 'pacbio', 'ont' ] params.read_type = null // short_paired, short_single, pacbio, ont strandedness_allowed = [ 'U', 'IU', 'MU', 'OU', 'ISF', 'ISR', 'MSF', 'MSR', 'OSF', 'OSR', 'auto' ] // see https://github.com/Juke34/AliNe for more information params.strandedness = null -params.read_length = null // Use by star to set the sjdbOverhang parameter +params.fastqc = false -// Edit counting params -edit_site_tools = ["reditools2", "reditools3", "jacusa2", "sapin"] -params.edit_site_tool = "reditools3" -params.edit_threshold = 1 -params.aggregation_mode = "all" +/* ---- Params Specific to AliNe ---- */ // Aline profiles aline_profile_allowed = [ 'docker', 'singularity', 'local', 'itrop' ] @@ -84,18 +90,19 @@ def helpMSG() { control1,path/to/data1.fastq.bam,,auto,short_single control2,path/to/data2_R1.fastq.gz,path/to/data2_R2.fastq.gz,auto,short_paired --genome Path to the reference genome in FASTA format. - --read_type Type of reads among this list ${read_type_allowed} (no default) + --read_type Type of reads among this list ${read_type_allowed} [no default] Output: - --output Path to the output directory (default: $params.outdir) + --output Path to the output directory [default: $params.outdir] Optional input: --aligner Aligner to use [default: $params.aligner] - --edit_site_tool Tool used for detecting edited sites. Default: $params.edit_site_tool - --strandedness Set the strandedness for all your input reads (default: null). In auto mode salmon will guess the library type for each fastq sample. [ 'U', 'IU', 'MU', 'OU', 'ISF', 'ISR', 'MSF', 'MSR', 'OSF', 'OSR', 'auto' ] - --edit_threshold Minimal number of edited reads to count a site as edited (default: 1) + --edit_site_tool Tool used for detecting edited sites. [default: $params.edit_site_tool] + --strandedness Set the strandedness for all your input reads [default: $params.strandedness]. In auto mode salmon will guess the library type for each fastq sample. [ 'U', 'IU', 'MU', 'OU', 'ISF', 'ISR', 'MSF', 'MSR', 'OSF', 'OSR', 'auto' ] + --edit_threshold Minimal number of edited reads to count a site as edited [default: $params.edit_threshold] --aggregation_mode Mode for aggregating edition counts mapped on genomic features. See documentation for details. Options are: "all" (default) or "cds_longest" - --clipoverlap Clip overlapping sequences in read pairs to avoid double counting. (default: false) + --clipoverlap Clip overlapping sequences in read pairs to avoid double counting. [default: $params.clipoverlap] + --fastqc run fastqc on main steps [default: $params.fastqc] Nextflow options: -profile Change the profile of nextflow both the engine and executor more details on github README [debug, test, itrop, singularity, local, docker] @@ -136,7 +143,7 @@ include { AliNe as ALIGNMENT } from "./modules/aline.nf" include { extract_libtype } from "./modules/bash.nf" include {bamutil_clipoverlap} from './modules/bamutil.nf' include {fastp} from './modules/fastp.nf' -include {fastqc as fastqc_raw; fastqc as fastqc_ali; fastqc as fastqc_dup; fastqc as fastqc_clip} from './modules/fastqc.nf' +include {fastqc as fastqc_ali; fastqc as fastqc_dup; fastqc as fastqc_clip} from './modules/fastqc.nf' include {gatk_markduplicates } from './modules/gatk.nf' include {multiqc} from './modules/multiqc.nf' include {fasta_unzip} from "$baseDir/modules/pigz.nf" @@ -199,6 +206,8 @@ def aline_profile = aline_profile_list.join(',') workflow { main: + + Channel.empty().set{logs} // logs channel // ---------------------------------------------------------------------------- // --- DEAL WITH REFERENCE --- // check if reference exists @@ -391,6 +400,11 @@ workflow { // sort the bam files Channel.empty().set{sorted_bam} sorted_bam = samtools_sort_bam( bams ) + // stat on aligned reads + if(params.fastqc){ + fastqc_ali(sorted_bam, "ali") + logs.concat(fastqc_ali.out).set{logs} // save log + } // ---------------------------------------------------------------------------- // DEAL WITH FASTQ FILES @@ -559,31 +573,28 @@ workflow { log.info "The following bam file(s) will be processed by RAIN:" tuple_sample_sortedbam.view() - // STEP 1 QC with fastp ? - Channel.empty().set{logs} - // stat on aligned reads - fastqc_ali(tuple_sample_sortedbam, "ali") - logs.concat(fastqc_ali.out).set{logs} // save log // remove duplicates gatk_markduplicates(tuple_sample_sortedbam) logs.concat(gatk_markduplicates.out.log).set{logs} // save log - // stat on bam without duplicatesāˆ‚ - fastqc_dup(gatk_markduplicates.out.tuple_sample_dedupbam, "dup") - logs.concat(fastqc_dup.out).set{logs} // save log + // stat on bam without duplicates + if(params.fastqc){ + fastqc_dup(gatk_markduplicates.out.tuple_sample_dedupbam, "dup") + logs.concat(fastqc_dup.out).set{logs} // save log + } // Clip overlap if (params.clipoverlap) { bamutil_clipoverlap(gatk_markduplicates.out.tuple_sample_dedupbam) tuple_sample_bam_processed = bamutil_clipoverlap.out.tuple_sample_clipoverbam // stat on bam with overlap clipped - fastqc_clip(tuple_sample_bam_processed, "clip") - logs.concat(fastqc_clip.out).set{logs} // save log + if(params.fastqc){ + fastqc_clip(tuple_sample_bam_processed, "clip") + logs.concat(fastqc_clip.out).set{logs} // save log + } } else { tuple_sample_bam_processed = gatk_markduplicates.out.tuple_sample_dedupbam } // index bam samtools_index(tuple_sample_bam_processed) - // report with multiqc - // multiqc(logs.collect(),params.multiqc_config) // Select site detection tool switch (params.edit_site_tool) { @@ -611,6 +622,9 @@ workflow { exit(1, "Wrong edit site tool was passed") } + // ------------------- MULTIQC ----------------- + multiqc(logs.collect(),params.multiqc_config) + } From 16bacc1edb65ead0b91c9c3a5ae1a77125feea4d Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Thu, 26 Jun 2025 12:10:03 +0200 Subject: [PATCH 4/7] add annotation handling and fix nf-cmd.sh print --- modules/aline.nf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/aline.nf b/modules/aline.nf index 7b81aa2..3038bc8 100644 --- a/modules/aline.nf +++ b/modules/aline.nf @@ -15,6 +15,7 @@ process AliNe { val read_type val aligner val library_type + val annotation val cache_dir // String when: @@ -34,11 +35,12 @@ process AliNe { read_type, aligner, library_type, + "--annotation ${annotation}", "--data_type rna", "--outdir $task.workDir/AliNe", ].join(" ") // Copy command to shell script in work dir for reference/debugging. - file("$task.workDir/nf-cmd.sh").text = nxf_cmd.join(" ") + file("$task.workDir/nf-cmd.sh").text = nxf_cmd // Run nextflow command locally def process = nxf_cmd.execute(null, cache_path.toFile()) process.waitFor() From 638de425f1bd3ccde038cfb70e4585a0aad215bf Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Thu, 26 Jun 2025 12:12:06 +0200 Subject: [PATCH 5/7] remove aline ressource config. Same profile name implemented in AliNe allow kind of transitivity --- config/resources/base_aline.config | 7 -- config/resources/custom_aline.config | 97 ---------------------------- nextflow.config | 2 - 3 files changed, 106 deletions(-) delete mode 100644 config/resources/base_aline.config delete mode 100644 config/resources/custom_aline.config diff --git a/config/resources/base_aline.config b/config/resources/base_aline.config deleted file mode 100644 index 761f7a6..0000000 --- a/config/resources/base_aline.config +++ /dev/null @@ -1,7 +0,0 @@ -process { - cpus = 1 - time = '1h' - maxForks = 2 - shell = ['/bin/bash', '-euo', 'pipefail'] - stageOutMode = 'rsync' -} diff --git a/config/resources/custom_aline.config b/config/resources/custom_aline.config deleted file mode 100644 index dc0c73d..0000000 --- a/config/resources/custom_aline.config +++ /dev/null @@ -1,97 +0,0 @@ -process { - cpus = 1 - time = '1h' - maxForks = 8 - shell = ['/bin/bash', '-euo', 'pipefail'] - stageOutMode = 'rsync' - - withLabel: 'bash' { - cpus = 1 - time = '1h' - } - withLabel: 'bbmap' { - cpus = 2 - time = '4h' - } - withLabel: 'bowtie' { - cpus = 2 - time = '4h' - } - withLabel: 'bowtie2' { - cpus = 2 - time = '4h' - } - withLabel: 'bwa' { - cpus = 2 - time = '4h' - } - withName: 'fastp' { - cpus = 2 - time = '2h' - } - withLabel: 'fastqc' { - cpus = 6 - time = '1h' - } - withLabel: 'hisat2' { - cpus = 2 - time = '4h' - } - withLabel: 'kallisto' { - cpus = 2 - time = '4h' - } - withLabel: 'graphmap2' { - cpus = 2 - time = '4h' - } - withLabel: 'minimap2' { - cpus = 2 - time = '4h' - } - withLabel: 'multiqc' { - cpus = 4 - time = '1h' - } - withLabel: 'jacusa2' { - cpus = 4 - memory = "8G" - time = '1d' - } - withLabel: 'mummer4' { - cpus = 2 - time = '4h' - } - withLabel: 'ngmlr' { - cpus = 4 - time = '1h' - } - withLabel: 'novoalign' { - cpus = 2 - time = '4h' - } - withLabel: 'salmon' { - cpus = 2 - time = '4h' - } - withLabel: 'samtools' { - cpus = 4 - time = '2h' - } - withLabel: 'seqkit' { - cpus = 2 - time = '4h' - } - withLabel: 'seqtk' { - cpus = 2 - time = '4h' - } - withLabel: 'star' { - cpus = 7 - time = '4h' - } - withLabel: 'subread' { - cpus = 2 - time = '4h' - } -} diff --git a/nextflow.config b/nextflow.config index 5a112b9..8cffd99 100644 --- a/nextflow.config +++ b/nextflow.config @@ -59,7 +59,6 @@ profiles { includeConfig "$baseDir/config/resources/local.config" } test { - params.aline_profiles = "${baseDir}/config/resources/base_aline.config" params.aligner = "STAR" params.reads = "${baseDir}/data/chr21/chr21_small_R1.fastq.gz" params.genome = "${baseDir}/data/chr21/chr21_small.fasta.gz" @@ -68,7 +67,6 @@ profiles { params.read_type = "short_single" } test2 { - params.aline_profiles = "${baseDir}/config/resources/base_aline.config" params.aligner = "STAR" params.reads = "${baseDir}/data/chr21/" params.genome = "${baseDir}/data/chr21/chr21_small.fasta.gz" From 6f174868da044eeaced926fec060f3de647d4466 Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Thu, 26 Jun 2025 12:14:04 +0200 Subject: [PATCH 6/7] add AliNe config file prefiled for User --- nextflow_aline.config | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 nextflow_aline.config diff --git a/nextflow_aline.config b/nextflow_aline.config new file mode 100644 index 0000000..827da5f --- /dev/null +++ b/nextflow_aline.config @@ -0,0 +1,42 @@ +// These are extra parameters specific to AliNe not handled by the RAIN pipeline + +params { + + // + samtools_stats = false // activate samtools stats + seqtk_sample_size = 50000 // to change default AliNe sample size used to guess library type + read_length = "" // Used by star to set the sjdbOverhang parameter but also by some other aligners + relax = "" // + + // Aligner extra parameters + bbmap_options = '' + bowtie_options = '' + bowtie2_options = '' + bwaaln_options = '' + bwamem_options = '' + bwamem2_options = '' + bwasw_options = '' + graphmap2_options = '' // owler option is possible + hisat2_options = '' + kallisto_options = '' + kallisto_index_options = '' // e.g. to use --distinguish, --make-unique, etc... + last_options = '' + last_index_options = '' + minimap2_options = '' + minimap2_index_options = '' // -k, -w, -H and -I + ngmlr_options = '' + novoalign_options = '' + novoalign_license = '' // license. You can ask for one month free trial license at http://www.novocraft.com/products/novoalign/ + nucmer_options = '' + salmon_options = '' + salmon_index_options = '' + star_options = '' + star_index_options = '' + star_2pass = false + subread_options = '' + sublong_options = '' + + // to store general information about pipeline execution + pipeline_report = "pipeline_report" + monochrome_logs = false +} From 5dc4cb39cf616ae2a18c4badddc5caabf0e445a7 Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Thu, 26 Jun 2025 12:17:00 +0200 Subject: [PATCH 7/7] add fastqc activation param linked to AliNe; Send annotation to AliNe; use AliNe v1.5.2; Umove gxf normalisation at the top before sending annotation file to AliNe --- rain.nf | 76 +++++++++++++++++++++------------------------------------ 1 file changed, 28 insertions(+), 48 deletions(-) diff --git a/rain.nf b/rain.nf index 4170a1f..4a5cfb2 100644 --- a/rain.nf +++ b/rain.nf @@ -8,9 +8,9 @@ import java.nio.file.* //************************************************* // STEP 0 - parameters //************************************************* - +// ------------------------------------ /* ---- Params specific to RAIN ---- */ - +// ------------------------------------ // Input/output params params.reads = null // "/path/to/reads_{1,2}.fastq.gz/or/folder" params.genome = null // "/path/to/genome.fa" @@ -23,10 +23,19 @@ edit_site_tools = ["reditools2", "reditools3", "jacusa2", "sapin"] params.edit_site_tool = "reditools3" params.edit_threshold = 1 params.aggregation_mode = "all" +// Report params params.multiqc_config = "$baseDir/config/multiqc_config.yaml" // MultiQC config file +/* Specific tool params */ +params.region = "" // e.g. chr21 - Used to limit the analysis to a specific region by REDITOOLS2 + +// others +params.help = null +params.monochrome_logs = false // if true, no color in logs -/* ---- Params shared between RAIN and AliNe ---- */ +// -------------------------------------------------- +/* ---- Params shared between RAIN and AliNe ---- */ +// -------------------------------------------------- // Read feature params read_type_allowed = [ 'short_paired', 'short_single', 'pacbio', 'ont' ] @@ -35,30 +44,19 @@ strandedness_allowed = [ 'U', 'IU', 'MU', 'OU', 'ISF', 'ISR', 'MSF', 'MSR', params.strandedness = null params.fastqc = false +// ------------------------------------- /* ---- Params Specific to AliNe ---- */ - +// ------------------------------------- +// The rest of params are in custom_aline.config.nf file // Aline profiles aline_profile_allowed = [ 'docker', 'singularity', 'local', 'itrop' ] - // Aline ressource config used -params.aline_profiles = "$baseDir/config/resources/custom_aline.config" // e.g. "docker, singularity,itrop,local" - +params.aline_profiles = "$baseDir/nextflow_aline.config" // e.g. "docker, singularity,itrop,local" +// made in aline but params here because it is main step +params.trimming_fastp = false // Aligner params -align_tools = ['hisat2', "STAR"] +align_tools = [ 'bbmap', 'bowtie', 'bowtie2', 'bwaaln', 'bwamem', 'bwamem2', 'bwasw', 'graphmap2', 'hisat2', 'kallisto', 'last', 'minimap2', 'novoalign', 'nucmer', 'ngmlr', 'salmon', 'star', 'subread', 'sublong' ] params.aligner = 'hisat2' -params.bowtie2_options = '' -params.hisat2_options = '' -params.star_options = '' - -/* Specific tool params */ -params.region = "" // e.g. chr21 - Used to limit the analysis to a specific region by REDITOOLS2 - -// Report params -params.multiqc_config = "$baseDir/config/multiqc_conf.yml" - -// other -params.help = null -params.monochrome_logs = false // if true, no color in logs //************************************************* // STEP 1 - HELP @@ -159,25 +157,6 @@ include {pluviometer} from "./modules/pluviometer.nf" // STEP 3 - Deal with parameters //************************************************* -// Check aligner params. Can be a list (comma or space separated) -def aligner_list=[] -if( !params.aligner ){ - exit 1, "Error: parameter is empty, please provide a aligner(s) among this list ${align_tools}.\n" -} else { - str_list = params.aligner.tokenize(',') - str_list.each { - str_list2 = it.tokenize(' ') - str_list2.each { - if ( ! (it in align_tools) ){ - exit 1, "Error: <${it}> aligner not accepted, please provide aligner(s) among this list ${align_tools}.\n" - } - else{ - aligner_list.add(it) - } - } - } -} - // Check edit site tool params. Does not accept list yet, but validates input. if ( ! (params.edit_site_tool in edit_site_tools) ){ exit 1, "Error: <${it}> edit site tool not accepted, please provide a tool in this list ${edit_site_tools}.\n" @@ -231,7 +210,9 @@ workflow { .ifEmpty { exit 1, "Cannot find annotation matching ${params.annotation}!\n" } .set{annotation} } - + // normalize the annotation + normalize_gxf(annotation) + normalize_gxf.out.gff.set{clean_annotation} // ---------------------------------------------------------------------------- // DEAL WITH CSV FILE FIRST // ---------------------------------------------------------------------------- @@ -470,8 +451,9 @@ workflow { } Channel.empty().set{aline_alignments_all} if (aline_data_in){ + ALIGNMENT ( - 'Juke34/AliNe -r v1.5.1', // Select pipeline + 'Juke34/AliNe -r v1.5.2', // Select pipeline "${workflow.resume?'-resume':''} -profile ${aline_profile}", // workflow opts supplied as params for flexibility "-config ${params.aline_profiles}", "--reads ${aline_data_in}", @@ -479,6 +461,7 @@ workflow { "--read_type ${params.read_type}", "--aligner ${params.aligner}", "--strandedness ${params.strandedness}", + clean_annotation, workflow.workDir.resolve('Juke34/AliNe').toUriString() ) @@ -602,21 +585,18 @@ workflow { // Create a fasta index file of the reference genome samtools_fasta_index(genome.collect()) jacusa2(samtools_index.out.tuple_sample_bam_bamindex, samtools_fasta_index.out.tuple_fasta_fastaindex.collect()) - normalize_gxf(annotation.collect()) - pluviometer(jacusa2.out.tuple_sample_jacusa2_table, normalize_gxf.out.gff.collect(), "jacusa2") + pluviometer(jacusa2.out.tuple_sample_jacusa2_table, clean_annotation, "jacusa2") break case "sapin": sapin(tuple_sample_bam_processed, genome.collect()) break case "reditools2": reditools2(samtools_index.out.tuple_sample_bam_bamindex, genome.collect(), params.region) - normalize_gxf(annotation.collect()) - pluviometer(reditools2.out.tuple_sample_serial_table, normalize_gxf.out.gff.collect(), "reditools2") + pluviometer(reditools2.out.tuple_sample_serial_table, clean_annotation, "reditools2") break case "reditools3": reditools3(samtools_index.out.tuple_sample_bam_bamindex, genome.collect()) - normalize_gxf(annotation.collect()) - pluviometer(reditools3.out.tuple_sample_serial_table, normalize_gxf.out.gff.collect(), "reditools3") + pluviometer(reditools3.out.tuple_sample_serial_table, clean_annotation, "reditools3") break default: exit(1, "Wrong edit site tool was passed")