From b7d3b58c2ee2634958cc74b40b40a95146898fdb Mon Sep 17 00:00:00 2001 From: earx Date: Wed, 14 May 2025 09:59:18 +0200 Subject: [PATCH 1/9] rename reditools pubish directory and output file --- modules/reditools2.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/reditools2.nf b/modules/reditools2.nf index f052544..7cb9219 100644 --- a/modules/reditools2.nf +++ b/modules/reditools2.nf @@ -1,6 +1,6 @@ process reditools2 { label "reditools2" - publishDir("${params.outdir}/edit_tables", mode: "copy") + publishDir("${params.outdir}/reditools", mode: "copy") input: tuple(val(sample), path(bam), path(bamindex)) @@ -8,10 +8,10 @@ process reditools2 { val region output: - tuple(val(sample), path("serial_table.txt"), emit: tuple_sample_serial_table) + tuple(val(sample), path("edit_table.txt"), emit: tuple_sample_serial_table) script: """ - reditools.py -f ${bam} -r ${genome} -S -o serial_table.txt + reditools.py -f ${bam} -r ${genome} -o edit_table.txt """ } From 3238b43e17bd91d5bc255e21853ea7179c206302 Mon Sep 17 00:00:00 2001 From: earx Date: Wed, 14 May 2025 14:06:40 +0200 Subject: [PATCH 2/9] add strand orientation option to reditools command --- modules/reditools2.nf | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/modules/reditools2.nf b/modules/reditools2.nf index 7cb9219..fe1753b 100644 --- a/modules/reditools2.nf +++ b/modules/reditools2.nf @@ -11,7 +11,21 @@ process reditools2 { tuple(val(sample), path("edit_table.txt"), emit: tuple_sample_serial_table) script: + if (params.library_type in ["ISR", "SR"]) { + // First-strand oriented + strand_orientation = "2" + } else if (params.library_type in ["ISF", "SF"]) { + // Second-strand oriented + strand_orientation = "1" + } else if (params.library_type in ["IU", "U"]) { + // Unstranded + strand_orientation = "0" + } else { + // Unsupported: Pass the library type string so that it's reported in + // the reditools error message + strand_orientation = params.library_type + } """ - reditools.py -f ${bam} -r ${genome} -o edit_table.txt + reditools.py -f ${bam} -r ${genome} -s ${strand_orientation} -o edit_table.txt """ } From 08a2d416435c732eaa386baaf9d66a3fa1a6057b Mon Sep 17 00:00:00 2001 From: earx Date: Wed, 14 May 2025 14:15:29 +0200 Subject: [PATCH 3/9] Add link to explanation of library type terms --- modules/reditools2.nf | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/modules/reditools2.nf b/modules/reditools2.nf index fe1753b..2b9f08c 100644 --- a/modules/reditools2.nf +++ b/modules/reditools2.nf @@ -11,6 +11,9 @@ process reditools2 { tuple(val(sample), path("edit_table.txt"), emit: tuple_sample_serial_table) script: + + // Set the strand orientation parameter from the library type parameter + // Terms explained in https://salmon.readthedocs.io/en/latest/library_type.html if (params.library_type in ["ISR", "SR"]) { // First-strand oriented strand_orientation = "2" @@ -25,6 +28,7 @@ process reditools2 { // the reditools error message strand_orientation = params.library_type } + """ reditools.py -f ${bam} -r ${genome} -s ${strand_orientation} -o edit_table.txt """ From 27248ef89260f546e9d9e96368eb9ce32b9a1e56 Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Fri, 16 May 2025 18:00:50 +0200 Subject: [PATCH 4/9] extract libtype from AliNe and use dictionary as first value in channels --- config/softwares.config | 3 +++ modules/aline.nf | 2 +- modules/bamutil.nf | 13 ++++++------ modules/bash.nf | 21 ++++++++++++++++++ modules/fastp.nf | 38 ++++++++++++++++++++------------- modules/fastqc.nf | 16 +++++++------- modules/gatk.nf | 22 +++++++++---------- modules/jacusa2.nf | 17 ++++++++------- modules/multiqc.nf | 14 ++++++------ modules/reditools2.nf | 17 ++++++++------- modules/samtools.nf | 18 ++++++++-------- modules/sapin.nf | 13 ++++++------ rain.nf | 47 ++++++++++++++++++++++++++++++++++++----- 13 files changed, 157 insertions(+), 84 deletions(-) create mode 100644 modules/bash.nf diff --git a/config/softwares.config b/config/softwares.config index c790adb..d827ca4 100644 --- a/config/softwares.config +++ b/config/softwares.config @@ -2,6 +2,9 @@ process { withLabel: 'agat' { container = 'quay.io/biocontainers/agat:1.4.2--pl5321hdfd78af_0' } + withLabel: 'bash' { + container = 'ubuntu:24.04' + } withLabel: 'bamutil' { container = 'quay.io/biocontainers/bamutil:1.0.15--h43eeafb_4' } diff --git a/modules/aline.nf b/modules/aline.nf index be6eed3..73e9f8d 100644 --- a/modules/aline.nf +++ b/modules/aline.nf @@ -30,7 +30,7 @@ process AliNe { profile, config, reads, - "--genome ${genome}", + "--reference ${genome}", read_type, aligner, library_type, diff --git a/modules/bamutil.nf b/modules/bamutil.nf index 17cc7c9..723ea29 100644 --- a/modules/bamutil.nf +++ b/modules/bamutil.nf @@ -1,18 +1,19 @@ process bamutil_clipoverlap { label 'bamutil' - tag "$sample_id" + tag "${meta.id}" publishDir "${params.outdir}/bamutil_clipoverlap", mode: 'copy' input: - tuple val(sample_id), path(bam) + tuple val(meta), path(bam) output: - tuple val(sample_id), path ("*clipoverlap.bam"), emit: tuple_sample_clipoverbam + tuple val(meta), path ("*clipoverlap.bam"), emit: tuple_sample_clipoverbam path ("*_bamutil_clipoverlap.log"), emit: log script: - """ - bam clipOverlap --storeOrig CG --poolSize 50000000 --in ${bam} --out ${bam}_clipoverlap.bam --stats > ${sample_id}_bamutil_clipoverlap.log - """ + + """ + bam clipOverlap --storeOrig CG --poolSize 50000000 --in ${bam} --out ${bam}_clipoverlap.bam --stats > ${meta.id}_bamutil_clipoverlap.log + """ } diff --git a/modules/bash.nf b/modules/bash.nf new file mode 100644 index 0000000..e877bf6 --- /dev/null +++ b/modules/bash.nf @@ -0,0 +1,21 @@ +/* +Here are described all processes related to bash +*/ + +// A process to compute the mean read length of a FASTQ +process extract_libtype { + label 'bash' + tag "$id" + + input: + tuple val(id), path(samlmon_json) + + output: + tuple val(id), env(LIBTYPE), emit: tuple_id_libtype + + script: + """ + LIBTYPE=\$(grep expected_format ${samlmon_json} | awk '{print \$2}' | tr -d '",\n') + """ + +} \ No newline at end of file diff --git a/modules/fastp.nf b/modules/fastp.nf index 2f48aeb..74401da 100644 --- a/modules/fastp.nf +++ b/modules/fastp.nf @@ -1,49 +1,57 @@ process fastp { label 'fastp' - publishDir "${params.output}/${id}/qc", mode: 'copy' + tag "${meta.id}" + publishDir "${params.output}/${meta.id}/qc", mode: 'copy' + input: - tuple val(id), path(illumina) + tuple val(meta), path(illumina) val(phred_type) + output: - tuple val(id), path("*_R?_clean.fastq") - path("${id}_fastp_report.html") + tuple val(meta), path("*_R?_clean.fastq") + path("${meta.id}_fastp_report.html") + script: """ if [ "${phred_type}" == "64" ] then fastp -i ${illumina[0]} -I ${illumina[1]} \ - -o ${id}_R1_clean.fastq -O ${id}_R2_clean.fastq \ + -o ${meta.id}_R1_clean.fastq -O ${meta.id}_R2_clean.fastq \ --phred64 \ - --detect_adapter_for_pe --html ${id}_fastp_report.html + --detect_adapter_for_pe --html ${meta.id}_fastp_report.html else fastp -i ${illumina[0]} -I ${illumina[1]} \ - -o ${id}_R1_clean.fastq -O ${id}_R2_clean.fastq \ - --detect_adapter_for_pe --html ${id}_fastp_report.html + -o ${meta.id}_R1_clean.fastq -O ${meta.id}_R2_clean.fastq \ + --detect_adapter_for_pe --html ${meta.id}_fastp_report.html fi """ } process fastp_hybrid { label 'fastp' + tag "${meta.id}" publishDir "${params.output}/${id}/qc", mode: 'copy' + input: - tuple val(id), path(illuminaR1), path(illuminaR2), path(ont) + tuple val(meta), path(illuminaR1), path(illuminaR2), path(ont) val(phred_type) + output: - tuple val(id), path("${id}_R1_clean.fastq"),path("${id}_R2_clean.fastq"), path(ont), emit: trimmed_hybrid - path("${id}_fastp_report.html") + tuple val(meta), path("${meta.id}_R1_clean.fastq"),path("${meta.id}_R2_clean.fastq"), path(ont), emit: trimmed_hybrid + path("${meta.id}_fastp_report.html") + script: """ if [ "${phred_type}" == "64" ] then fastp -i ${illuminaR1} -I ${illuminaR2} \ - -o ${id}_R1_clean.fastq -O ${id}_R2_clean.fastq \ + -o ${meta.id}_R1_clean.fastq -O ${meta.id}_R2_clean.fastq \ --phred64 \ - --detect_adapter_for_pe --html ${id}_fastp_report.html + --detect_adapter_for_pe --html ${meta.id}_fastp_report.html else fastp -i ${illuminaR1} -I ${illuminaR2} \ - -o ${id}_R1_clean.fastq -O ${id}_R2_clean.fastq \ - --detect_adapter_for_pe --html ${id}_fastp_report.html + -o ${meta.id}_R1_clean.fastq -O ${meta.id}_R2_clean.fastq \ + --detect_adapter_for_pe --html ${meta.id}_fastp_report.html fi """ } diff --git a/modules/fastqc.nf b/modules/fastqc.nf index 9aa5518..b83e6bb 100644 --- a/modules/fastqc.nf +++ b/modules/fastqc.nf @@ -1,19 +1,19 @@ process fastqc { label 'fastqc' - tag "$sample_id" + tag "${meta.id}" publishDir "${params.outdir}/FastQC", mode: 'copy' input: - tuple val(sample_id), path(reads) - val (suffix) + tuple val(meta), path(reads) + val (suffix) output: - path ("fastqc_${sample_id}_logs_${suffix}") + path ("fastqc_${meta.id}_logs_${suffix}") script: - """ - mkdir fastqc_${sample_id}_logs_${suffix} - fastqc -t ${task.cpus} -o fastqc_${sample_id}_logs_${suffix} -q ${reads} - """ + """ + mkdir fastqc_${meta.id}_logs_${suffix} + fastqc -t ${task.cpus} -o fastqc_${meta.id}_logs_${suffix} -q ${reads} + """ } diff --git a/modules/gatk.nf b/modules/gatk.nf index 32d25ff..c6cb602 100644 --- a/modules/gatk.nf +++ b/modules/gatk.nf @@ -1,22 +1,22 @@ process gatk_markduplicates { label 'gatk' - tag "$sample_id" + tag "${meta.id}" publishDir "${params.outdir}/gatk_markduplicates", mode: 'copy' input: - tuple val(sample_id), path(bam) + tuple val(meta), path(bam) output: - tuple val(sample_id), path ("*_marked_duplicates.bam"), emit: tuple_sample_dedupbam - path ("*_marked_dup_metrics.txt") , emit: log + tuple val(meta), path ("*_marked_duplicates.bam"), emit: tuple_sample_dedupbam + path ("*_marked_dup_metrics.txt") , emit: log script: - """ - gatk MarkDuplicates \ - -I ${bam} \ - -O ${bam.baseName}_marked_duplicates.bam \ - -M ${bam.baseName}_marked_dup_metrics.txt \ - --REMOVE_DUPLICATES - """ + """ + gatk MarkDuplicates \ + -I ${bam} \ + -O ${bam.baseName}_marked_duplicates.bam \ + -M ${bam.baseName}_marked_dup_metrics.txt \ + --REMOVE_DUPLICATES + """ } diff --git a/modules/jacusa2.nf b/modules/jacusa2.nf index 12ad31f..e789f87 100644 --- a/modules/jacusa2.nf +++ b/modules/jacusa2.nf @@ -1,18 +1,19 @@ process jacusa2 { label "jacusa2" + tag "${meta.id}" publishDir("${params.outdir}/jacusa2", mode: "copy") input: - tuple(val(sample), path(bam), path(bamindex)) - tuple(path(genome), path(fastaindex)) + tuple(val(meta), path(bam), path(bamindex)) + tuple(path(genome), path(fastaindex)) output: - path("*.txt") - path("*.filtered") - // tuple(sample, path("filtered_output.txt", emit: tuple_sample_jacusa2_table)) + path("*.txt") + path("*.filtered") + // tuple(sample, path("filtered_output.txt", emit: tuple_sample_jacusa2_table)) script: - """ - java -Xmx${task.memory.toMega()}M -jar /usr/local/bin/JACUSA_v2.0.4.jar call-1 -a D -f V -p ${task.cpus} -r jacusa_out.txt -c 1 -s -R ${genome} ${bam} - """ + """ + java -Xmx${task.memory.toMega()}M -jar /usr/local/bin/JACUSA_v2.0.4.jar call-1 -a D -f V -p ${task.cpus} -r jacusa_out.txt -c 1 -s -R ${genome} ${bam} + """ } \ No newline at end of file diff --git a/modules/multiqc.nf b/modules/multiqc.nf index bdeab5b..2d9c59a 100644 --- a/modules/multiqc.nf +++ b/modules/multiqc.nf @@ -3,15 +3,15 @@ process multiqc { publishDir "${params.outdir}/MultiQC", mode: 'copy' input: - path log_files - path multiqc_config + path log_files + path multiqc_config output: - path "*multiqc_report.html" - path "*_data" + path "*multiqc_report.html" + path "*_data" script: - """ - multiqc -p . -c ${multiqc_config} - """ + """ + multiqc -p . -c ${multiqc_config} + """ } \ No newline at end of file diff --git a/modules/reditools2.nf b/modules/reditools2.nf index 2b9f08c..e3b19b8 100644 --- a/modules/reditools2.nf +++ b/modules/reditools2.nf @@ -1,32 +1,33 @@ process reditools2 { label "reditools2" publishDir("${params.outdir}/reditools", mode: "copy") + tag "${meta.id}" input: - tuple(val(sample), path(bam), path(bamindex)) - path genome - val region + tuple(val(meta), path(bam), path(bamindex)) + path genome + val region output: - tuple(val(sample), path("edit_table.txt"), emit: tuple_sample_serial_table) + tuple(val(meta), path("edit_table.txt"), emit: tuple_sample_serial_table) script: // Set the strand orientation parameter from the library type parameter // Terms explained in https://salmon.readthedocs.io/en/latest/library_type.html - if (params.library_type in ["ISR", "SR"]) { + if (meta.libtype in ["ISR", "SR"]) { // First-strand oriented strand_orientation = "2" - } else if (params.library_type in ["ISF", "SF"]) { + } else if (meta.libtype in ["ISF", "SF"]) { // Second-strand oriented strand_orientation = "1" - } else if (params.library_type in ["IU", "U"]) { + } else if (meta.libtype in ["IU", "U"]) { // Unstranded strand_orientation = "0" } else { // Unsupported: Pass the library type string so that it's reported in // the reditools error message - strand_orientation = params.library_type + strand_orientation = meta.libtype } """ diff --git a/modules/samtools.nf b/modules/samtools.nf index aecdf40..0a7a55f 100644 --- a/modules/samtools.nf +++ b/modules/samtools.nf @@ -1,13 +1,13 @@ process samtools_index { label "samtools" - tag "$sample" + tag "${meta.id}" publishDir("${params.outdir}/bam_indices", mode:"copy") input: - tuple(val(sample), path(bam)) + tuple(val(meta), path(bam)) output: - tuple(val(sample), path(bam), path("*.bai"), emit: tuple_sample_bam_bamindex) + tuple(val(meta), path(bam), path("*.bai"), emit: tuple_sample_bam_bamindex) script: """ @@ -17,17 +17,17 @@ process samtools_index { process samtools_fasta_index { label "samtools" - tag "genome" + tag "${fasta}" publishDir("${params.outdir}/fasta_indices", mode:"copy") input: - path(fasta) + path(fasta) output: - tuple(path(fasta), path("${fasta}.fai"), emit: tuple_fasta_fastaindex) + tuple(path(fasta), path("${fasta}.fai"), emit: tuple_fasta_fastaindex) script: - """ - samtools faidx ${fasta} - """ + """ + samtools faidx ${fasta} + """ } diff --git a/modules/sapin.nf b/modules/sapin.nf index 9b6dec2..fe9f57a 100644 --- a/modules/sapin.nf +++ b/modules/sapin.nf @@ -1,16 +1,17 @@ process sapin { label "sapin" + tag "${meta.id}" publishDir("${params.outdir}/sapin", mode: "copy") input: - tuple(val(sample), path(bam)) - path(reference) + tuple(val(meta), path(bam)) + path(reference) output: - path("restable.tsv") + path("restable.tsv") script: - """ - sapin -a ${bam} -f ${reference} > restable.tsv - """ + """ + sapin -a ${bam} -f ${reference} > restable.tsv + """ } diff --git a/rain.nf b/rain.nf index c75dc4b..1acc9ce 100644 --- a/rain.nf +++ b/rain.nf @@ -127,6 +127,7 @@ Report Parameters // STEP 2 - Include needed modules //************************************************* include { AliNe as ALIGNMENT } from "./modules/aline.nf" +include { extract_libtype } from "./modules/bash.nf" include {bamutil_clipoverlap} from './modules/bamutil.nf' include {fastp} from './modules/fastp.nf' include {fastqc as fastqc_raw; fastqc as fastqc_ali; fastqc as fastqc_dup; fastqc as fastqc_clip} from './modules/fastqc.nf' @@ -194,9 +195,9 @@ workflow { fasta_uncompress(genome_raw) fasta_uncompress.out.genomeFa.set{genome_ch} // set genome to the output of fasta_uncompress - + // Perform AliNe alignment ALIGNMENT ( - 'Juke34/AliNe -r v1.3.0', // Select pipeline + 'Juke34/AliNe -r v1.4.0', // Select pipeline "${workflow.resume?'-resume':''} -profile ${aline_profile}", // workflow opts supplied as params for flexibility "-config ${params.aline_profiles}", "--reads ${params.reads}", @@ -206,16 +207,52 @@ workflow { "--library_type ${params.library_type}", workflow.workDir.resolve('Juke34/AliNe').toUriString() ) + + // GET TUPLE [ID, BAM] FILES ALIGNMENT.out.output .map { dir -> files("$dir/alignment/*/*.bam", checkIfExists: true) // Find BAM files inside the output directory } .flatten() // Ensure we emit each file separately - .map { bam -> tuple(bam.baseName, bam) } // Convert each BAM file into a tuple, with the base name as the first element + .map { bam -> + def name = bam.getName().split('_seqkit')[0] // Extract the base name of the BAM file. _seqkit is the separator. + tuple(name, bam) + } // Convert each BAM file into a tuple, with the base name as the first element .set { aline_alignments } // Store the channel + + if (params.library_type.contains("auto") ) { + log.info "Library type is set to auto, extracting it from salmon output" + // GET TUPLE [ID, OUTPUT_SALMON_LIBTYPE] FILES + ALIGNMENT.out.output + .map { dir -> + files("$dir/salmon_libtype/*/*.json", checkIfExists: true) // Find BAM files inside the output directory + } + .flatten() // Ensure we emit each file separately + .map { json -> + def name = json.getParent().getName().split('_seqkit')[0] // Extract the base name of the BAM file. _seqkit is the separator. The name is in the fodler containing the json file. Why take this one? Because it is the same as teh bam name set by Aline. It will be used to sync both values + tuple(name, json) + } // Convert each BAM file into a tuple, with the base name as the first element + .set { aline_libtype } // Store the channel + // Extract the library type from the JSON file + aline_libtype = extract_libtype(aline_libtype) + aline_alignments.join(aline_libtype) + .map { key, val1, val2 -> tuple(key, val1, val2) } + .set { aline_alignments_all } + } else { + log.info "Library type is set to ${params.library_type}, no need to extract it from salmon output" + aline_alignments_all = aline_alignments.map { name, bam -> tuple(name, bam, params.library_type) } + } + + // transform [ID, BAM, LIBTYPE] into [[id: 'ID', libtype: 'LIBTYPE'], file('BAM')] + aline_alignments_all = aline_alignments_all.map { id, file, lib -> + def meta = [ id: id, libtype: lib ] + tuple(meta, file) + } + + aline_alignments_all.view() - // aline_alignments.view() - rain(aline_alignments, genome_ch) + // call rain + rain(aline_alignments_all, genome_ch) } workflow rain { From 216c8776839e4c3caebaf6a7eddbde3f93d4018a Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Mon, 19 May 2025 13:51:07 +0200 Subject: [PATCH 5/9] get bam from file/URL or folder path --- rain.nf | 250 ++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 179 insertions(+), 71 deletions(-) diff --git a/rain.nf b/rain.nf index 1acc9ce..42b700b 100644 --- a/rain.nf +++ b/rain.nf @@ -10,7 +10,8 @@ import java.nio.file.* //************************************************* // Input/output params -params.reads = "/path/to/reads_{1,2}.fastq.gz/or/folder" +params.reads = null // "/path/to/reads_{1,2}.fastq.gz/or/folder" +params.bams = null // "/path/to/reads.bam/or/folder" params.genome = "/path/to/genome.fa" params.annotation = "/path/to/annotations.gff3" params.outdir = "result" @@ -43,35 +44,21 @@ params.region = "" // e.g. chr21 - Used to limit the analysis to a specific regi // Report params params.multiqc_config = "$baseDir/config/multiqc_conf.yml" +// other +params.help = null +params.monochrome_logs = false // if true, no color in logs + //************************************************* -// STEP 1 - LOG INFO +// STEP 1 - HELP //************************************************* -// Header message -log.info """ -IRD -.-./`) .-------. ______ -\\ .-.')| _ _ \\ | _ `''. -/ `-' \\| ( ' ) | | _ | ) _ \\ - `-'`\"`|(_ o _) / |( ''_' ) | - .---. | (_,_).' __ | . (_) `. | - | | | |\\ \\ | ||(_ ._) ' - | | | | \\ `' /| (_.\\.' / - | | | | \\ / | .' - '---' ''-' `'-' '-----'` - - -RAIN - RNA Alterations Investigation using Nextflow -=================================================== - -""" - +log.info header() if (params.help) { exit 0, helpMSG() } // Help Message def helpMSG() { log.info """ - ********* RAIN - RNA Alterations Investigation using Nextflow ********* + RAIN - RNA Alterations Investigation using Nextflow - v${workflow.manifest.version} Usage example: nextflow run main.nf --illumina short_reads_Ecoli --genus Escherichia --species coli --species_taxid 562 -profile docker -resume @@ -187,7 +174,8 @@ def aline_profile = aline_profile_list.join(',') workflow { main: - // check if genome exists + // --- DEAL WITH REFERENCE --- + // check if reference exists Channel.fromPath(params.genome, checkIfExists: true) .ifEmpty { exit 1, "Cannot find genome matching ${params.genome}!\n" } .set{genome_raw} @@ -195,62 +183,118 @@ workflow { fasta_uncompress(genome_raw) fasta_uncompress.out.genomeFa.set{genome_ch} // set genome to the output of fasta_uncompress - // Perform AliNe alignment - ALIGNMENT ( - 'Juke34/AliNe -r v1.4.0', // Select pipeline - "${workflow.resume?'-resume':''} -profile ${aline_profile}", // workflow opts supplied as params for flexibility - "-config ${params.aline_profiles}", - "--reads ${params.reads}", - genome_ch, - "--read_type ${params.read_type}", - "--aligner ${params.aligner}", - "--library_type ${params.library_type}", - workflow.workDir.resolve('Juke34/AliNe').toUriString() - ) - - // GET TUPLE [ID, BAM] FILES - ALIGNMENT.out.output - .map { dir -> - files("$dir/alignment/*/*.bam", checkIfExists: true) // Find BAM files inside the output directory + // DEAL WITH BAM FILES + if ( params.bams ) { + + def bam_list=[] + def path_bams = params.bams + def via_URL = false + if( path_bams.indexOf(',') >= 0) { + // Cut into list with coma separator + str_list = path_bams.tokenize(',') + // loop over elements + str_list.each { + str_list2 = it.tokenize(' ') + str_list2.each { + if ( it.endsWith('.bam') ){ + if (it.startsWith('https:') || it.startsWith('s3:') || it.startsWith('az:') || it.startsWith('gs:') || it.startsWith('ftp:') ) { + log.info "This bam input is an URL: ${it}" + via_URL = true + } + bam_list.add(file(it)) // use file insted of File for URL + } + } + } } - .flatten() // Ensure we emit each file separately - .map { bam -> - def name = bam.getName().split('_seqkit')[0] // Extract the base name of the BAM file. _seqkit is the separator. - tuple(name, bam) - } // Convert each BAM file into a tuple, with the base name as the first element - .set { aline_alignments } // Store the channel - - if (params.library_type.contains("auto") ) { - log.info "Library type is set to auto, extracting it from salmon output" - // GET TUPLE [ID, OUTPUT_SALMON_LIBTYPE] FILES + else { + File input_reads = new File(path_bams) + if(input_reads.exists()){ + if ( input_reads.isDirectory()) { + log.info "The input ${path_bams} is a folder!\n" + // in case of folder provided, add a trailing slash if missing + path_bams = "${input_reads}" + "/" + } + else { + log.info "The input ${path_bams} is a file!\n" + if ( path_bams.endsWith('.bam') ){ + if (path_bams.startsWith('https:') || path_bams.startsWith('s3:') || path_bams.startsWith('az:') || path_bams.startsWith('gs:') || path_bams.startsWith('ftp:') ) { + log.info "This bam input is an URL: ${it}" + via_URL = true + } + bam_list = path_bams + } + } + } + } + if (via_URL ){ + my_samples = Channel.of(bam_list) + bams = my_samples.flatten().map { it -> [it.name.split('_')[0], it] } + .groupTuple() + .ifEmpty { exit 1, "Cannot find reads matching ${path_reads}!\n" } + } else { + bams = Channel.fromFilePairs("${path_bams}/*.bam", size: 1, checkIfExists: true) + .ifEmpty { exit 1, "Cannot find reads matching ${path_bams}!\n" } + } + bams + } + + // DEAL WITH FASTQ FILES + // Perform AliNe alignment + if (params.reads) { + + ALIGNMENT ( + 'Juke34/AliNe -r v1.4.0', // Select pipeline + "${workflow.resume?'-resume':''} -profile ${aline_profile}", // workflow opts supplied as params for flexibility + "-config ${params.aline_profiles}", + "--reads ${params.reads}", + genome_ch, + "--read_type ${params.read_type}", + "--aligner ${params.aligner}", + "--library_type ${params.library_type}", + workflow.workDir.resolve('Juke34/AliNe').toUriString() + ) + + // GET TUPLE [ID, BAM] FILES ALIGNMENT.out.output .map { dir -> - files("$dir/salmon_libtype/*/*.json", checkIfExists: true) // Find BAM files inside the output directory + files("$dir/alignment/*/*.bam", checkIfExists: true) // Find BAM files inside the output directory } .flatten() // Ensure we emit each file separately - .map { json -> - def name = json.getParent().getName().split('_seqkit')[0] // Extract the base name of the BAM file. _seqkit is the separator. The name is in the fodler containing the json file. Why take this one? Because it is the same as teh bam name set by Aline. It will be used to sync both values - tuple(name, json) + .map { bam -> + def name = bam.getName().split('_seqkit')[0] // Extract the base name of the BAM file. _seqkit is the separator. + tuple(name, bam) } // Convert each BAM file into a tuple, with the base name as the first element - .set { aline_libtype } // Store the channel - // Extract the library type from the JSON file - aline_libtype = extract_libtype(aline_libtype) - aline_alignments.join(aline_libtype) - .map { key, val1, val2 -> tuple(key, val1, val2) } - .set { aline_alignments_all } - } else { - log.info "Library type is set to ${params.library_type}, no need to extract it from salmon output" - aline_alignments_all = aline_alignments.map { name, bam -> tuple(name, bam, params.library_type) } - } + .set { aline_alignments } // Store the channel + + if (params.library_type.contains("auto") ) { + log.info "Library type is set to auto, extracting it from salmon output" + // GET TUPLE [ID, OUTPUT_SALMON_LIBTYPE] FILES + ALIGNMENT.out.output + .map { dir -> + files("$dir/salmon_libtype/*/*.json", checkIfExists: true) // Find BAM files inside the output directory + } + .flatten() // Ensure we emit each file separately + .map { json -> + def name = json.getParent().getName().split('_seqkit')[0] // Extract the base name of the BAM file. _seqkit is the separator. The name is in the fodler containing the json file. Why take this one? Because it is the same as teh bam name set by Aline. It will be used to sync both values + tuple(name, json) + } // Convert each BAM file into a tuple, with the base name as the first element + .set { aline_libtype } // Store the channel + // Extract the library type from the JSON file + aline_libtype = extract_libtype(aline_libtype) + aline_alignments.join(aline_libtype) + .map { key, val1, val2 -> tuple(key, val1, val2) } + .set { aline_alignments_all } + } else { + log.info "Library type is set to ${params.library_type}, no need to extract it from salmon output" + aline_alignments_all = aline_alignments.map { name, bam -> tuple(name, bam, params.library_type) } + } - // transform [ID, BAM, LIBTYPE] into [[id: 'ID', libtype: 'LIBTYPE'], file('BAM')] - aline_alignments_all = aline_alignments_all.map { id, file, lib -> - def meta = [ id: id, libtype: lib ] - tuple(meta, file) + // transform [ID, BAM, LIBTYPE] into [[id: 'ID', libtype: 'LIBTYPE'], file('BAM')] + aline_alignments_all = aline_alignments_all.map { id, file, lib -> + def meta = [ id: id, libtype: lib ] + tuple(meta, file) + } } - - aline_alignments_all.view() - // call rain rain(aline_alignments_all, genome_ch) } @@ -291,4 +335,68 @@ workflow rain { jacusa2(samtools_index.out.tuple_sample_bam_bamindex, samtools_fasta_index.out.tuple_fasta_fastaindex) sapin(bamutil_clipoverlap.out.tuple_sample_clipoverbam, genome) normalize_gxf(params.annotation) +} + + +//************************************************* +def header(){ + // Log colors ANSI codes + c_reset = params.monochrome_logs ? '' : "\033[0m"; + c_dim = params.monochrome_logs ? '' : "\033[2m"; + c_black = params.monochrome_logs ? '' : "\033[0;30m"; + c_green = params.monochrome_logs ? '' : "\033[0;32m"; + c_yellow = params.monochrome_logs ? '' : "\033[0;33m"; + c_blue = params.monochrome_logs ? '' : "\033[0;34m"; + c_purple = params.monochrome_logs ? '' : "\033[0;35m"; + c_cyan = params.monochrome_logs ? '' : "\033[0;36m"; + c_white = params.monochrome_logs ? '' : "\033[0;37m"; + c_red = params.monochrome_logs ? '' : "\033[0;31m"; + + return """ + -${c_dim}--------------------------------------------------${c_reset}- + ${c_blue}.-./`) ${c_white}.-------. ${c_red} ______${c_reset} + ${c_blue}\\ .-.')${c_white}| _ _ \\ ${c_red} | _ `''.${c_reset} French National + ${c_blue}/ `-' \\${c_white}| ( ' ) | ${c_red} | _ | ) _ \\${c_reset} + ${c_blue} `-'`\"`${c_white}|(_ o _) / ${c_red} |( ''_' ) |${c_reset} Research Institute for + ${c_blue} .---. ${c_white}| (_,_).' __ ${c_red}| . (_) `. |${c_reset} + ${c_blue} | | ${c_white}| |\\ \\ | |${c_red}|(_ ._) '${c_reset} Sustainable Development + ${c_blue} | | ${c_white}| | \\ `' /${c_red}| (_.\\.' /${c_reset} + ${c_blue} | | ${c_white}| | \\ / ${c_red}| .'${c_reset} + ${c_blue} '---' ${c_white}''-' `'-' ${c_red}'-----'`${c_reset} + ${c_purple} RAIN - RNA Alterations Investigation using Nextflow - v${workflow.manifest.version}${c_reset} + -${c_dim}--------------------------------------------------${c_reset}- + """.stripIndent() +} + +/************** onComplete ***************/ + +workflow.onComplete { + + // Log colors ANSI codes + c_reset = params.monochrome_logs ? '' : "\033[0m"; + c_green = params.monochrome_logs ? '' : "\033[0;32m"; + c_red = params.monochrome_logs ? '' : "\033[0;31m"; + + if (workflow.success) { + log.info "\n${c_green} RAIN pipeline complete!" + } else { + log.error "${c_red}Oops .. something went wrong}" + } + + log.info " The results are available in the ‘${params.outdir}’ directory." + + def dateComplete = workflow.complete.format("dd-MMM-yyyy HH:mm:ss") + def duration = workflow.duration + def succeeded = workflow.stats['succeededCount'] ?: 0 + def cached = workflow.stats['cachedCount'] ?: 0 + log.info """ + ================ RAIN Pipeline Summary ================ + Completed at: ${dateComplete} + UUID : ${workflow.sessionId} + Duration : ${duration} + Succeeded : ${succeeded} + Cached : ${cached} + ======================================================= + ${c_reset} + """ } \ No newline at end of file From 3f2a1438176740d466cf872f1a3b9df405075648 Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Tue, 27 May 2025 13:16:11 +0200 Subject: [PATCH 6/9] deal with bam files and csv file as input --- lib/Rain_utilities.groovy | 24 ++++ modules/samtools.nf | 27 ++++ rain.nf | 273 ++++++++++++++++++++++++++------------ 3 files changed, 240 insertions(+), 84 deletions(-) create mode 100644 lib/Rain_utilities.groovy diff --git a/lib/Rain_utilities.groovy b/lib/Rain_utilities.groovy new file mode 100644 index 0000000..d692afa --- /dev/null +++ b/lib/Rain_utilities.groovy @@ -0,0 +1,24 @@ +// dedicated to the utility functions +class Rain_utilities { + + // Function to chek if we + public static Boolean is_url(file) { + + // check if the file is a URL + if (file =~ /^(http|https|ftp|s3|az|gs):\/\/.*/) { + return true + } else { + return false + } + } + // Function to chek if we + public static Boolean is_fastq(file) { + + // check if the file is a URL + if (file =~ /.*(fq|fastq|fq.gz|fastq.gz)$/) { + return true + } else { + return false + } + } +} \ No newline at end of file diff --git a/modules/samtools.nf b/modules/samtools.nf index 0a7a55f..86cc39a 100644 --- a/modules/samtools.nf +++ b/modules/samtools.nf @@ -31,3 +31,30 @@ process samtools_fasta_index { samtools faidx ${fasta} """ } + +/* +http://www.htslib.org/doc/samtools-sort.html +Sort alignments by leftmost coordinates +*/ +process samtools_sort_bam { + label "samtools" + tag "${meta.id}" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path ("*_sorted.bam"), emit: tuple_sample_sortedbam + + script: + + """ + if [[ \$(samtools view -H ${bam}| awk '/^@HD/ { for(i=1;i<=NF;i++) if(\$i ~ /^SO:/) print \$i }') == *"coordinate"* ]]; then + echo "Already sorted by coordinate" + ln -s ${bam} ${bam.baseName}_sorted.bam + else + echo "Not sorted by coordinate" + samtools sort -@ ${task.cpus} ${bam} -o ${bam.baseName}_sorted.bam + fi + """ +} \ No newline at end of file diff --git a/rain.nf b/rain.nf index 42b700b..c18d34c 100644 --- a/rain.nf +++ b/rain.nf @@ -10,14 +10,12 @@ import java.nio.file.* //************************************************* // Input/output params -params.reads = null // "/path/to/reads_{1,2}.fastq.gz/or/folder" -params.bams = null // "/path/to/reads.bam/or/folder" -params.genome = "/path/to/genome.fa" -params.annotation = "/path/to/annotations.gff3" -params.outdir = "result" -params.reads_extension = ".fastq.gz" // Extension used to detect reads in folder -params.paired_reads_pattern = "_{1,2}" - +params.reads = null // "/path/to/reads_{1,2}.fastq.gz/or/folder" +params.bam = null // "/path/to/reads.bam/or/folder" +params.csv = null // "/path/to/reads.bam/or/folder" +params.genome = null // "/path/to/genome.fa" +params.annotation = null // "/path/to/annotations.gff3" +params.outdir = "result" /* Specific AliNe params (some are shared with RAIN)*/ @@ -25,6 +23,8 @@ params.paired_reads_pattern = "_{1,2}" read_type_allowed = [ 'short_paired', 'short_single', 'pacbio', 'ont' ] params.read_type = "short_paired" // short_paired, short_single, pacbio, ont params.library_type = "auto" // can be 'U', 'IU', 'MU', 'OU', 'ISF', 'ISR', 'MSF', 'MSR', 'OSF', 'OSR', 'auto' - see https://github.com/Juke34/AliNe for more information +params.bam_library_type = null // can be 'U', 'IU', 'MU', 'OU', 'ISF', 'ISR', 'MSF', 'MSR', 'OSF', 'OSR', 'auto' - see https://github.com/Juke34/AliNe for more information + // Aline profiles aline_profile_allowed = [ 'docker', 'singularity', 'local', 'itrop' ] @@ -61,27 +61,30 @@ def helpMSG() { RAIN - RNA Alterations Investigation using Nextflow - v${workflow.manifest.version} Usage example: - nextflow run main.nf --illumina short_reads_Ecoli --genus Escherichia --species coli --species_taxid 562 -profile docker -resume - --help prints the help section + nextflow run rain.nf -profile docker --genome /path/to/genome.fa --annotation /path/to/annotation.gff3 --reads /path/to/reads_folder --output /path/to/output --aligner hisat2 - Input sequences: - --reads path to the illumina read file (fastq or fastq.gz) (default: $params.reads) - --genome path to the genome (default: $params.genome) + Parameters: + --help Prints the help section - Annotation input: - --annotation path to a GFF3 file with annotations of genomic features + Input sequences: + --annotation Path to the annotation file (GFF or GTF) + --bam Path to the bam file or folder. + --csv Path to the csv file containing fastq/bam files and their metadata. The csv file must contain a 'sample' column with the sample name, a 'input_1' column with the path to the bam file/or fastq1 file, a 'input_2' column with the path to R2 fastq file in case of paired data (either empty), and a 'strandedness' column with the library type (e.g. U, IU, MU, OU, ISF, ISR, MSF, MSR, OSF, OSR). + --reads Path to the reads file or folder. If a folder is provided, all the files with proper extension (.fq, .fastq, .fq.gz, .fastq.gz) in the folder will be used. You can provide remote files (commma separated list). + --genome Path to the reference genome in FASTA format. Output: - --output path to the output directory (default: $params.outdir) + --output Path to the output directory (default: $params.outdir) Optional input: - --aligner Aligner to use [default: $params.aligner] - --read_type type of reads among this list ${read_type_allowed} (default: short_paired) - --reads_extension Extension of the read files [default: $params.reads_extension] + --aligner Aligner to use [default: $params.aligner] + --read_type Type of reads among this list ${read_type_allowed} (default: short_paired) + --library_type Set the library_type of your fastq reads (default: auto). In auto mode salmon will guess the library type for each sample. [ 'U', 'IU', 'MU', 'OU', 'ISF', 'ISR', 'MSF', 'MSR', 'OSF', 'OSR', 'auto' ] + --bam_library_type Set the library_type of your BAM reads (default: null). When using BAM you must set a library type: [ 'U', 'IU', 'MU', 'OU', 'ISF', 'ISR', 'MSF', 'MSR', 'OSF', 'OSR', 'auto' ] + Nextflow options: - -profile change the profile of nextflow both the engine and executor more details on github README [debug, test, itrop, singularity, local, docker] - -resume resume the workflow where it stopped + -profile Change the profile of nextflow both the engine and executor more details on github README [debug, test, itrop, singularity, local, docker] """ } @@ -90,22 +93,24 @@ def helpMSG() { log.info """ General Parameters - genome : ${params.genome} - annotation : ${params.annotation} - reads : ${params.reads} - read_type : ${params.read_type} - outdir : ${params.outdir} + annotation : ${params.annotation} + bam : ${params.bam} + csv : ${params.csv} + reads : ${params.reads} + genome : ${params.genome} + + reads_library_type : ${params.library_type} + bam_library_type : ${params.bam_library_type} + outdir : ${params.outdir} Alignment Parameters - aline_profiles : ${params.aline_profiles} - aligner : ${params.aligner} - library_type : ${params.library_type} - paired_reads_pattern : ${params.paired_reads_pattern} - reads_extension : ${params.reads_extension} + aline_profiles : ${params.aline_profiles} + aligner : ${params.aligner} + reads_library_type : ${params.library_type} Report Parameters MultiQC parameters - multiqc_config : ${params.multiqc_config} + multiqc_config : ${params.multiqc_config} """ @@ -121,7 +126,7 @@ include {fastqc as fastqc_raw; fastqc as fastqc_ali; fastqc as fastqc_dup; fastq include {gatk_markduplicates } from './modules/gatk.nf' include {multiqc} from './modules/multiqc.nf' include {fasta_uncompress} from "$baseDir/modules/pigz.nf" -include {samtools_index; samtools_fasta_index} from './modules/samtools.nf' +include {samtools_index; samtools_fasta_index; samtools_sort_bam} from './modules/samtools.nf' include {reditools2} from "./modules/reditools2.nf" include {jacusa2} from "./modules/jacusa2.nf" include {sapin} from "./modules/sapin.nf" @@ -173,80 +178,175 @@ def aline_profile = aline_profile_list.join(',') workflow { main: - +// ---------------------------------------------------------------------------- // --- DEAL WITH REFERENCE --- // check if reference exists Channel.fromPath(params.genome, checkIfExists: true) - .ifEmpty { exit 1, "Cannot find genome matching ${params.genome}!\n" } - .set{genome_raw} + .ifEmpty { exit 1, "Cannot find genome matching ${params.genome}!\n" } + .set{genome_raw} // uncompress it if needed fasta_uncompress(genome_raw) fasta_uncompress.out.genomeFa.set{genome_ch} // set genome to the output of fasta_uncompress +// ---------------------------------------------------------------------------- + // --- DEAL WITH ANNOTATION --- + Channel.empty().set{annotation_ch} + if (params.annotation){ + Channel.fromPath(params.annotation, checkIfExists: true) + .ifEmpty { exit 1, "Cannot find annotation matching ${params.annotation}!\n" } + .set{annotation_ch} + } +// ---------------------------------------------------------------------------- + def path_csv = params.csv + def path_bam = params.bam + def path_reads = params.reads + def has_fastq = false + // ---------------------- DEAL WITH BAM FILES ---------------------- + Channel.empty().set{sorted_bam} + if ( path_bam || path_csv ) { + + def bam_list=[] + def via_url = false + def via_csv = false + + if ( path_csv && path_csv.endsWith('.csv') ){ + log.info "Using CSV input file: ${path_csv}" + // --------- BAM CSV CASE --------- + via_csv = true + File input_csv = new File(path_csv) + if(!input_csv.exists()){ + error "The input ${params.csv} file does not exist!\n" + } + + bams = Channel.fromPath(params.csv) + .splitCsv(header: true, sep: ',') + .map { row -> + if(row.sample == null || row.sample.trim() == ''){ + error "The input ${params.csv} file does not contain a 'sample' column!\n" + } + def sample_id = row.sample + if(row.input_1 == null || row.input_1.trim() == ''){ + error "The input ${params.csv} file does not contain a 'input_1' column!\n" + } + if( row.input_1.toString().endsWith('.bam') ){ + def input_1 = file(row.input_1.trim()) + + if (! Rain_utilities.is_url(input_1) ) { + if (! input_1.exists() ) { + error "The input ${input_1} file does not does not exits!\n" + } + } else { + log.info "This bam input is an URL: ${input_1}" + } + + if(row.strandedness == null || row.strandedness.trim() == ''){ + error "The input ${params.csv} file does not contain a 'strandedness' column!\n" + } + def libtype = row.strandedness ?: 'U' + def meta = [ id: sample_id, libtype: libtype ] + return tuple(meta, input_1) + } + else if ( Rain_utilities.is_fastq(row.input_1.trim()) ) { + has_fastq = true + /* def input_1 = file(row.input_1.trim()) + if (! Rain_utilities.is_url(input_1) ) { + if (! input_1.exists() ) { + error "The input ${input_1} file does not does not exits!\n" + } + } else { + log.info "This fastq input is an URL: ${input_1}" + } + */ + } + else { + error "The input ${row.input_1} file is not a BAM or FASTQ file!\n" + } + } + } + else { - // DEAL WITH BAM FILES - if ( params.bams ) { - - def bam_list=[] - def path_bams = params.bams - def via_URL = false - if( path_bams.indexOf(',') >= 0) { - // Cut into list with coma separator - str_list = path_bams.tokenize(',') - // loop over elements - str_list.each { - str_list2 = it.tokenize(' ') - str_list2.each { - if ( it.endsWith('.bam') ){ - if (it.startsWith('https:') || it.startsWith('s3:') || it.startsWith('az:') || it.startsWith('gs:') || it.startsWith('ftp:') ) { - log.info "This bam input is an URL: ${it}" - via_URL = true + // --------- BAM LIST CASE --------- + if( path_bam.indexOf(',') >= 0) { + // Cut into list with coma separator + str_list = path_bam.tokenize(',') + // loop over elements + str_list.each { + str_list2 = it.tokenize(' ') + str_list2.each { + if ( it.endsWith('.bam') ){ + if ( Rain_utilities.is_url(it) ) { + log.info "This bam input is an URL: ${it}" + via_url = true + } + bam_list.add(file(it)) // use file insted of File for URL } - bam_list.add(file(it)) // use file insted of File for URL } } } - } - else { - File input_reads = new File(path_bams) - if(input_reads.exists()){ - if ( input_reads.isDirectory()) { - log.info "The input ${path_bams} is a folder!\n" - // in case of folder provided, add a trailing slash if missing - path_bams = "${input_reads}" + "/" - } - else { - log.info "The input ${path_bams} is a file!\n" - if ( path_bams.endsWith('.bam') ){ - if (path_bams.startsWith('https:') || path_bams.startsWith('s3:') || path_bams.startsWith('az:') || path_bams.startsWith('gs:') || path_bams.startsWith('ftp:') ) { - log.info "This bam input is an URL: ${it}" - via_URL = true + else { + // --------- BAM FOLDER CASE --------- + File input_reads = new File(path_bam) + if(input_reads.exists()){ + if ( input_reads.isDirectory()) { + log.info "The input ${path_bam} is a folder!" + // in case of folder provided, add a trailing slash if missing + path_bam = "${input_reads}" + "/*.bam" + } + // --------- BAM FILE CASE --------- + else { + if ( path_bam.endsWith('.bam') ){ + log.info "The input ${path_bam} is a bam file!" + if ( Rain_utilities.is_url(path_bam) ) { + log.info "This bam input is an URL: ${path_bam}" + via_url = true + } } - bam_list = path_bams } } } } - if (via_URL ){ - my_samples = Channel.of(bam_list) - bams = my_samples.flatten().map { it -> [it.name.split('_')[0], it] } - .groupTuple() - .ifEmpty { exit 1, "Cannot find reads matching ${path_reads}!\n" } - } else { - bams = Channel.fromFilePairs("${path_bams}/*.bam", size: 1, checkIfExists: true) - .ifEmpty { exit 1, "Cannot find reads matching ${path_bams}!\n" } + + // Add lib + if (! via_csv) { + if (via_url ){ + my_samples = Channel.of(bam_list) + bams = my_samples.flatten().map { it -> [it.name.split('_')[0], it] } + .groupTuple() + .ifEmpty { exit 1, "Cannot find reads matching ${path_reads}!\n" } + } else { + bams = Channel.fromFilePairs(path_bam, size: 1, checkIfExists: true) + .ifEmpty { exit 1, "Cannot find reads matching ${path_bam}!\n" } + } + + // check if bam library type is provided when bam files are used + bams.collect().map { list -> + if (!list.isEmpty() && !params.bam_library_type ) { + exit 1, "⚠️ When using BAM files, the library type must be provided. Please use the parameter --bam_library_type .\n" + } + } + // Set structure with dictionary as first value + bams = bams.map { id, bam_file -> + def meta = [ id: id, libtype: params.bam_library_type ] + tuple(meta, bam_file) + } } - bams + // sort the bam files + sorted_bam = samtools_sort_bam( bams ) } - +// ---------------------------------------------------------------------------- // DEAL WITH FASTQ FILES // Perform AliNe alignment - if (params.reads) { + Channel.empty().set{aline_alignments_all} + if (path_reads || ( path_csv && has_fastq) ) { + + if ( path_csv.endsWith('.csv') ){ + path_reads = path_csv + } ALIGNMENT ( 'Juke34/AliNe -r v1.4.0', // Select pipeline "${workflow.resume?'-resume':''} -profile ${aline_profile}", // workflow opts supplied as params for flexibility "-config ${params.aline_profiles}", - "--reads ${params.reads}", + "--reads ${path_reads}", genome_ch, "--read_type ${params.read_type}", "--aligner ${params.aligner}", @@ -295,8 +395,12 @@ workflow { tuple(meta, file) } } + // call rain - rain(aline_alignments_all, genome_ch) + all_bams = aline_alignments_all.mix(sorted_bam) + log.info "The following bam file(s) will be processed by RAIN:" + all_bams.view() + rain(all_bams, genome_ch, annotation_ch) } workflow rain { @@ -304,6 +408,7 @@ workflow rain { take: tuple_sample_sortedbam genome + annnotation main: @@ -334,7 +439,7 @@ workflow rain { samtools_fasta_index(genome) jacusa2(samtools_index.out.tuple_sample_bam_bamindex, samtools_fasta_index.out.tuple_fasta_fastaindex) sapin(bamutil_clipoverlap.out.tuple_sample_clipoverbam, genome) - normalize_gxf(params.annotation) + normalize_gxf(annnotation) } From 8a456b1eeb9798c3fd13ca9beb3410c75139c879 Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Tue, 27 May 2025 13:23:23 +0200 Subject: [PATCH 7/9] fix test --- rain.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rain.nf b/rain.nf index c18d34c..f8efe50 100644 --- a/rain.nf +++ b/rain.nf @@ -338,7 +338,7 @@ workflow { Channel.empty().set{aline_alignments_all} if (path_reads || ( path_csv && has_fastq) ) { - if ( path_csv.endsWith('.csv') ){ + if ( path_csv && path_csv.endsWith('.csv') ){ path_reads = path_csv } From ec654e887e312a0d290ed24ea1b049340c6c2805 Mon Sep 17 00:00:00 2001 From: earx Date: Tue, 27 May 2025 14:28:54 +0200 Subject: [PATCH 8/9] fix conflicting dependencies --- docker/reditools2/env_reditools2.yml | 56 ++++++++++++++-------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/docker/reditools2/env_reditools2.yml b/docker/reditools2/env_reditools2.yml index 863b736..014b6be 100644 --- a/docker/reditools2/env_reditools2.yml +++ b/docker/reditools2/env_reditools2.yml @@ -1,3 +1,5 @@ +name: myenv + channels: - bioconda - conda-forge @@ -6,8 +8,8 @@ dependencies: - _openmp_mutex=4.5=2_gnu - attr=2.5.1=h166bdaf_1 - bzip2=1.0.8=h4bc722e_7 - - c-ares=1.34.4=hb9d3cd8_0 - - ca-certificates=2025.1.31=hbcca054_0 + - c-ares=1.34.5=hb9d3cd8_0 + - ca-certificates=2025.4.26=hbd8a1cb_0 - certifi=2019.11.28=py27h8c360ce_1 - htslib=1.17=h6bc39ce_1 - icu=73.2=h59595ed_0 @@ -16,55 +18,53 @@ dependencies: - ld_impl_linux-64=2.43=h712a8e2_4 - libcap=2.71=h39aace5_0 - libcurl=7.87.0=h6312ad2_0 - - libdeflate=1.23=h4ddbbb0_0 - - libedit=3.1.20250104=pl5321h7949ede_0 + - libdeflate=1.24=h86f0d12_0 + - libedit=3.1.20191231=he28a2e2_2 - libev=4.33=hd590300_2 - - libfabric=2.0.0=ha770c72_1 - - libfabric1=2.0.0=h14e6f36_1 + - libfabric=2.1.0=ha770c72_0 + - libfabric1=2.1.0=h14e6f36_0 - libffi=3.2.1=he1b5a44_1007 - - libgcc=14.2.0=h767d61c_2 - - libgcc-ng=14.2.0=h69a702a_2 - - libgcrypt-lib=1.11.0=hb9d3cd8_2 - - libgfortran=14.2.0=h69a702a_2 - - libgfortran5=14.2.0=hf1ad2bd_2 - - libgomp=14.2.0=h767d61c_2 - - libgpg-error=1.51=hbd13f7d_1 + - libgcc=15.1.0=h767d61c_2 + - libgcc-ng=15.1.0=h69a702a_2 + - libgcrypt-lib=1.11.1=hb9d3cd8_0 + - libgfortran=15.1.0=h69a702a_2 + - libgfortran5=15.1.0=hcea5267_2 + - libgomp=15.1.0=h767d61c_2 + - libgpg-error=1.55=h3f2d84a_0 - libhwloc=2.11.2=default_he43201b_1000 - libiconv=1.18=h4ce23a2_1 - - liblzma=5.6.4=hb9d3cd8_0 - libnghttp2=1.51.0=hdcd2b5c_0 - libnl=3.11.0=hb9d3cd8_0 - - libsqlite=3.46.0=hde9e2c9_0 - libssh2=1.10.0=haa6b8db_3 - - libstdcxx=14.2.0=h8f9b012_2 - - libstdcxx-ng=14.2.0=h4852527_2 - - libsystemd0=257.3=h3dc2cb9_0 - - libudev1=257.3=h9a4d06a_0 + - libstdcxx=15.1.0=h8f9b012_2 + - libstdcxx-ng=15.1.0=h4852527_2 + - libsystemd0=256.9=h2774228_0 + - libudev1=257.4=h9a4d06a_0 - libxml2=2.12.7=hc051c1a_1 - libzlib=1.2.13=h4ab18f5_6 - - lz4-c=1.10.0=h5888daf_1 + - lz4-c=1.9.4=hcb278e6_0 - mpi=1.0.1=mpich - mpi4py=3.0.3=py27hf423c55_1 - mpich=4.3.0=h1a8bee6_100 - - ncurses=6.5=h2d0b736_3 - - netifaces=0.10.9=py27hdf8410d_1002 + - ncurses=6.2=h58526e2_4 + - netifaces=0.10.4=py27_1 - openssl=1.1.1w=hd590300_0 - pip=20.1.1=pyh9f0ad1d_0 - psutil=5.7.0=py27hdf8410d_1 - pysam=0.20.0=py27h7835474_0 - python=2.7.15=h5a48372_1011_cpython - python_abi=2.7=1_cp27mu - - rdma-core=56.0=h5888daf_0 - - readline=8.2=h8c095d6_2 - - samtools=1.18=hd87286a_0 + - rdma-core=55.0=h5888daf_0 + - readline=8.1=h46c0cb4_0 + - samtools=1.12=h9aed4be_1 - setuptools=44.0.0=py27_0 - sortedcontainers=2.4.0=pyhd8ed1ab_0 - - sqlite=3.46.0=h6d4b2fc_0 + - sqlite=3.37.0=h9cd32fc_0 - tabix=1.11=hdfd78af_0 - tk=8.6.13=noxft_h4845f30_101 - - ucx=1.18.0=hfd9a62f_1 + - ucx=1.18.0=hfd9a62f_3 - wheel=0.37.1=pyhd8ed1ab_0 - xz=5.2.6=h166bdaf_0 - zlib=1.2.13=h4ab18f5_6 - zstd=1.5.6=ha6fb4c9_0 - + \ No newline at end of file From 625c1cfd19270502165ef5d1389e6b9db32ce943 Mon Sep 17 00:00:00 2001 From: earx Date: Tue, 27 May 2025 14:45:15 +0200 Subject: [PATCH 9/9] Remove unnecessary commands --- docker/reditools2/Dockerfile | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/docker/reditools2/Dockerfile b/docker/reditools2/Dockerfile index e1cf014..08f5071 100644 --- a/docker/reditools2/Dockerfile +++ b/docker/reditools2/Dockerfile @@ -1,15 +1,11 @@ FROM condaforge/miniforge3:24.7.1-2 -# Auto-activating Conda setup for Python 2.7 -RUN conda config --add channels bioconda \ - && conda config --add channels conda-forge \ - && conda create -n myenv \ - && echo "conda activate myenv" >> ~/.bashrc - # Tool-specific dependencies COPY env_reditools2.yml . -RUN conda env update -n myenv --file env_reditools2.yml \ - && conda clean --all -f -y +RUN conda env create -n myenv --file env_reditools2.yml \ + && conda clean --all -f -y \ + && echo "conda activate myenv" >> ~/.bashrc # Auto-activating Conda setup for Python 2.7 + # Get and install tools RUN git clone --depth=1 https://github.com/BioinfoUNIBA/REDItools2.git reditools2