Juke34
diff --git a/‎modules/bash.nf‎
Lines changed: 64 additions & 0 deletions b/‎modules/bash.nf‎
Lines changed: 64 additions & 0 deletions
diff --git a/‎modules/samtools.nf‎
Lines changed: 3 additions & 2 deletions b/‎modules/samtools.nf‎
Lines changed: 3 additions & 2 deletions
@@ -83,4 +83,68 @@ process transform_bases_fasta {
             awk '/^>/ {print; next} {gsub(/A/,"G"); gsub(/a/,"g"); print}' ${fasta} > ${output_name}
             """
         }
+}
+
+/*
+ * Create CSV file for AliNe input from converted FASTQ reads
+ * Generates a CSV with columns: sample,fastq_1,fastq_2,strandedness,read_type
+ */
+process create_aline_csv_he {
+    label 'bash'
+    tag "${file_id}"
+
+    input:
+        tuple val(meta), path(fastq) 
+
+    output:
+        path "*.csv", emit: csv
+
+    script:
+        def sample_id = meta.sample_id ? meta.sample_id : RainUtils.get_file_id(fastq[0])  // Extract sample ID from meta or from filename
+        file_id = meta.file_id ? meta.file_id : RainUtils.get_file_id(fastq[0])
+        def strandedness = meta.strandedness ? meta.strandedness : "auto"
+        def read_type = meta.read_type ? meta.read_type : params.read_type
+        
+        if (fastq[1]) {
+            // Paired-end
+            """
+            fastq0=\$(readlink -f ${fastq[0]})
+            fastq1=\$(readlink -f ${fastq[1]})
+            echo "${sample_id},\${fastq0},\${fastq1},${strandedness},${read_type}" > ${file_id}.csv
+            """
+        } else {
+            // Single-end
+            """
+            fastq0=\$(readlink -f ${fastq[0]})
+            echo "${sample_id},\${fastq0},,${strandedness},${read_type}" > ${file_id}.csv
+            """
+        }
+}
+
+/*
+ * Collect CSV files for AliNe input from converted FASTQ reads
+ * Generates a single CSV with columns: sample,fastq_1,fastq_2,strandedness,read_type
+ */
+process collect_aline_csv_he {
+    label 'bash'
+    publishDir("${output_dir}", mode:"copy", pattern: "*.csv")
+    
+    input:
+        val all_csv  // List of tuples (meta, fastq_files)
+        val output_dir
+
+    output:
+        path "*.csv", emit: csv
+
+    script:
+
+        def list_csv = []
+        list_csv = all_csv
+        list_csv_bash = list_csv.join(" "); // remove bracket and replace comma by space to be processed by bash
+        """
+        echo "sample,fastq_1,fastq_2,strandedness,read_type" > hyper_editing_samples.csv
+        for entry in ${list_csv_bash}; do
+            cat \$entry >> hyper_editing_samples.csv
+        done
+        """
 }
@@ -79,7 +79,7 @@ process samtools_split_mapped_unmapped {
         // _seqkit suffix is added by aline when starting from fastq, when starting from BAM, there is no _seqkit suffix. 
         // We want to keep the same base name for both cases, because it is used to recognize the sample name (sample name is what is bofore the first occurrence of _seqkit)
         // It is needed at the step of sequence restoration, where we join the aligned BAM with the original unmapped BAM based on the sample name. (see hyper-editing.nf)
-        def suffix = base.contains('_seqkit') ? '' : '_seqkit'
+        def suffix = base.contains('_AliNe') ? '' : '_AliNe'
         """
         # Extract mapped reads (SAM flag -F 4: exclude unmapped)
         samtools view -@ ${task.cpus} -b -F 4 ${bam} > ${base}${suffix}_mapped.bam
@@ -126,10 +126,11 @@ process convert_to_fastq {
 process samtools_merge_bams {
     label "samtools"
     tag "${meta.id}"
-    publishDir("${params.outdir}/merged_bam", mode:"copy")
+    publishDir("${params.outdir}/${output}", mode:"copy")
 
     input:
         tuple val(meta), path(bam1), path(bam2)
+        val output
 
     output:
         tuple val(meta), path("*_merged.bam"), emit: merged_bam