Skip to content

Commit 5449c15

Browse files
committed
use CSV for AliNe_HE
1 parent da62508 commit 5449c15

4 files changed

Lines changed: 192 additions & 85 deletions

File tree

modules/bash.nf

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,4 +83,68 @@ process transform_bases_fasta {
8383
awk '/^>/ {print; next} {gsub(/A/,"G"); gsub(/a/,"g"); print}' ${fasta} > ${output_name}
8484
"""
8585
}
86+
}
87+
88+
/*
89+
* Create CSV file for AliNe input from converted FASTQ reads
90+
* Generates a CSV with columns: sample,fastq_1,fastq_2,strandedness,read_type
91+
*/
92+
process create_aline_csv_he {
93+
label 'bash'
94+
tag "${file_id}"
95+
96+
input:
97+
tuple val(meta), path(fastq)
98+
99+
output:
100+
path "*.csv", emit: csv
101+
102+
script:
103+
def sample_id = meta.sample_id ? meta.sample_id : RainUtils.get_file_id(fastq[0]) // Extract sample ID from meta or from filename
104+
file_id = meta.file_id ? meta.file_id : RainUtils.get_file_id(fastq[0])
105+
def strandedness = meta.strandedness ? meta.strandedness : "auto"
106+
def read_type = meta.read_type ? meta.read_type : params.read_type
107+
108+
if (fastq[1]) {
109+
// Paired-end
110+
"""
111+
fastq0=\$(readlink -f ${fastq[0]})
112+
fastq1=\$(readlink -f ${fastq[1]})
113+
echo "${sample_id},\${fastq0},\${fastq1},${strandedness},${read_type}" > ${file_id}.csv
114+
"""
115+
} else {
116+
// Single-end
117+
"""
118+
fastq0=\$(readlink -f ${fastq[0]})
119+
echo "${sample_id},\${fastq0},,${strandedness},${read_type}" > ${file_id}.csv
120+
"""
121+
}
122+
}
123+
124+
/*
125+
* Collect CSV files for AliNe input from converted FASTQ reads
126+
* Generates a single CSV with columns: sample,fastq_1,fastq_2,strandedness,read_type
127+
*/
128+
process collect_aline_csv_he {
129+
label 'bash'
130+
publishDir("${output_dir}", mode:"copy", pattern: "*.csv")
131+
132+
input:
133+
val all_csv // List of tuples (meta, fastq_files)
134+
val output_dir
135+
136+
output:
137+
path "*.csv", emit: csv
138+
139+
script:
140+
141+
def list_csv = []
142+
list_csv = all_csv
143+
list_csv_bash = list_csv.join(" "); // remove bracket and replace comma by space to be processed by bash
144+
"""
145+
echo "sample,fastq_1,fastq_2,strandedness,read_type" > hyper_editing_samples.csv
146+
for entry in ${list_csv_bash}; do
147+
cat \$entry >> hyper_editing_samples.csv
148+
done
149+
"""
86150
}

modules/samtools.nf

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ process samtools_split_mapped_unmapped {
7979
// _seqkit suffix is added by aline when starting from fastq, when starting from BAM, there is no _seqkit suffix.
8080
// We want to keep the same base name for both cases, because it is used to recognize the sample name (sample name is what is bofore the first occurrence of _seqkit)
8181
// It is needed at the step of sequence restoration, where we join the aligned BAM with the original unmapped BAM based on the sample name. (see hyper-editing.nf)
82-
def suffix = base.contains('_seqkit') ? '' : '_seqkit'
82+
def suffix = base.contains('_AliNe') ? '' : '_AliNe'
8383
"""
8484
# Extract mapped reads (SAM flag -F 4: exclude unmapped)
8585
samtools view -@ ${task.cpus} -b -F 4 ${bam} > ${base}${suffix}_mapped.bam
@@ -126,10 +126,11 @@ process convert_to_fastq {
126126
process samtools_merge_bams {
127127
label "samtools"
128128
tag "${meta.id}"
129-
publishDir("${params.outdir}/merged_bam", mode:"copy")
129+
publishDir("${params.outdir}/${output}", mode:"copy")
130130

131131
input:
132132
tuple val(meta), path(bam1), path(bam2)
133+
val output
133134

134135
output:
135136
tuple val(meta), path("*_merged.bam"), emit: merged_bam

0 commit comments

Comments
 (0)