Juke34 · Juke34 · Feb 17, 2026 · Feb 13, 2026 · Feb 16, 2026 · Feb 16, 2026
diff --git a/bin/restore_sequences.py b/bin/restore_sequences.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python3
+"""
+Restore original sequences in BAM files from original unmapped BAM.
+This tool replaces modified sequences with their original counterparts.
+"""
+
+import sys
+import argparse
+import pysam
+
+
+def fix_bam(bam_aligned, bam_unmapped, bam_out):
+	"""Restore original sequences using indexed lookup."""
+	# Create index for fast read name lookup
+	unmapped = pysam.AlignmentFile(bam_unmapped, "rb")
+	unmapped_index = pysam.IndexedReads(unmapped)
+	unmapped_index.build()
+
+	with pysam.AlignmentFile(bam_aligned, "rb") as bamfile:
+		with pysam.Samfile(bam_out, 'wb', template=bamfile) as out:
+			for read in bamfile:
+				try:
+					# Fetch original read by name (returns iterator)
+					for orig_read in unmapped_index.find(read.query_name):
+						read.query_sequence = orig_read.query_sequence
+						read.query_qualities = orig_read.query_qualities
+						read.set_tag("MD", None)
+						break  # Take first match
+					out.write(read)
+				except KeyError:
+					pass  # Read not found in unmapped BAM
+
+	unmapped.close()
+
+
+def main():
+	parser = argparse.ArgumentParser(description=__doc__)
+	parser.add_argument("-b", "--bam", required=True, help="aligned BAM file")
+	parser.add_argument("-u", "--unmapped", required=True, help="original unmapped BAM file")
+	parser.add_argument("-o", "--output", required=True, help="output BAM file")
+	args = parser.parse_args()
+
+	fix_bam(args.bam, args.unmapped, args.output)
+
+
+if __name__ == "__main__":
+	sys.exit(main())
diff --git a/build_containers.sh b/build_containers.sh
@@ -115,7 +115,7 @@ if [ "$build_docker" = true ]; then
     # List of image names
     image_list=( )
 
-    for dir in docker/*; do 
+    for dir in containers/docker/*; do 
         cd "${dir}"
         imgname=$(echo $dir | rev | cut -d/ -f1 | rev)
         image_list+=(${imgname})
@@ -161,7 +161,7 @@ if [ "$build_singularity" = true ]; then
     sif_list=( )
 
     # Loop through all .def files in singularity/ subdirectories
-    for def_file in singularity/*/*.def; do
+    for def_file in containers/singularity/*/*.def; do
         # Extract image name from directory
         imgname=$(basename "$(dirname "$def_file")")
         sif_output="sif_images/${imgname}.sif"

diff --git a/config/softwares.config b/config/softwares.config
@@ -46,7 +46,7 @@ process {
         container = 'quay.io/biocontainers/star:2.7.10b--h6b7c446_1'
     }
     withLabel: 'samtools' {
-        container = 'quay.io/biocontainers/samtools:1.3.1--h0cf4675_11'
+        container = 'quay.io/biocontainers/samtools:1.23--h96c455f_0'
     }
     withLabel: "sapin" {
         container = singularity.enabled ? "${params.sifPath}/sapin.sif" : "sapin"

diff --git a/docker/jacusa2/Dockerfile → containers/docker/jacusa2/Dockerfile b/docker/jacusa2/Dockerfile → containers/docker/jacusa2/Dockerfile
diff --git a/docker/pluviometer/Dockerfile → containers/docker/pluviometer/Dockerfile b/docker/pluviometer/Dockerfile → containers/docker/pluviometer/Dockerfile
diff --git a/docker/pluviometer/env_pluviometer.yml → ...rs/docker/pluviometer/env_pluviometer.yml b/docker/pluviometer/env_pluviometer.yml → ...rs/docker/pluviometer/env_pluviometer.yml
@@ -51,6 +51,7 @@ dependencies:
   - typing_extensions=4.13.2=pyh29332c3_0
   - tzdata=2025b=h78e105d_0
   - wheel=0.45.1=pyhd8ed1ab_1
+  - pysam=0.23.3=py312h47d5410_1
   - pip:
       - flameprof==0.4
 prefix: /home/earx/miniforge3/envs/rain-stats-scripts
diff --git a/docker/reditools2/Dockerfile → containers/docker/reditools2/Dockerfile b/docker/reditools2/Dockerfile → containers/docker/reditools2/Dockerfile
diff --git a/docker/reditools2/env_reditools2.yml → ...ners/docker/reditools2/env_reditools2.yml b/docker/reditools2/env_reditools2.yml → ...ners/docker/reditools2/env_reditools2.yml
diff --git a/docker/reditools3/Dockerfile → containers/docker/reditools3/Dockerfile b/docker/reditools3/Dockerfile → containers/docker/reditools3/Dockerfile
diff --git a/docker/sapin/Dockerfile → containers/docker/sapin/Dockerfile b/docker/sapin/Dockerfile → containers/docker/sapin/Dockerfile
diff --git a/docker/sapin/constraints.txt → containers/docker/sapin/constraints.txt b/docker/sapin/constraints.txt → containers/docker/sapin/constraints.txt
diff --git a/singularity/jacusa2/jacusa2.def → containers/singularity/jacusa2/jacusa2.def b/singularity/jacusa2/jacusa2.def → containers/singularity/jacusa2/jacusa2.def
diff --git a/singularity/pluviometer/env_pluviometer.yml → ...ngularity/pluviometer/env_pluviometer.yml b/singularity/pluviometer/env_pluviometer.yml → ...ngularity/pluviometer/env_pluviometer.yml
@@ -51,6 +51,7 @@ dependencies:
   - typing_extensions=4.13.2=pyh29332c3_0
   - tzdata=2025b=h78e105d_0
   - wheel=0.45.1=pyhd8ed1ab_1
+  - pysam=0.23.3=py312h47d5410_1
   - pip:
       - flameprof==0.4
 prefix: /home/earx/miniforge3/envs/rain-stats-scripts
diff --git a/singularity/pluviometer/pluviometer.def → ...s/singularity/pluviometer/pluviometer.def b/singularity/pluviometer/pluviometer.def → ...s/singularity/pluviometer/pluviometer.def
diff --git a/singularity/reditools2/entrypoint.sh → ...ners/singularity/reditools2/entrypoint.sh b/singularity/reditools2/entrypoint.sh → ...ners/singularity/reditools2/entrypoint.sh
diff --git a/singularity/reditools2/env_reditools2.yml → ...singularity/reditools2/env_reditools2.yml b/singularity/reditools2/env_reditools2.yml → ...singularity/reditools2/env_reditools2.yml
diff --git a/singularity/reditools2/reditools2.def → ...ers/singularity/reditools2/reditools2.def b/singularity/reditools2/reditools2.def → ...ers/singularity/reditools2/reditools2.def
diff --git a/singularity/reditools3/reditools3.def → ...ers/singularity/reditools3/reditools3.def b/singularity/reditools3/reditools3.def → ...ers/singularity/reditools3/reditools3.def
diff --git a/singularity/sapin/constraints.txt → containers/singularity/sapin/constraints.txt b/singularity/sapin/constraints.txt → containers/singularity/sapin/constraints.txt
diff --git a/singularity/sapin/sapin.def → containers/singularity/sapin/sapin.def b/singularity/sapin/sapin.def → containers/singularity/sapin/sapin.def
diff --git a/data/chr21/chr21_small.bam → data/chr21/chr21sample2_small.bam b/data/chr21/chr21_small.bam → data/chr21/chr21sample2_small.bam
diff --git a/modules/aline.nf b/modules/aline.nf
@@ -30,7 +30,7 @@ process AliNe {
                 pipeline_name,
                 profile,
                 config,
-                reads,
+                "--reads ${reads}",
                 "--reference ${genome}",
                 read_type,
                 aligner,

diff --git a/modules/bash.nf b/modules/bash.nf
@@ -18,4 +18,69 @@ process extract_libtype {
             LIBTYPE=\$(grep expected_format ${samlmon_json} | awk '{print \$2}' | tr -d '",\n')
         """
 
+}
+
+/*
+ * Transform A to G bases in FASTQ reads for hyper-editing detection
+ * Converts all adenosine (A) nucleotides to guanosine (G) in sequence lines
+ * Handles both single-end and paired-end reads
+ */
+process transform_bases_fastq {
+    label 'bash'
+    tag "${meta.id}"
+    publishDir("${output_dir}", mode:"copy", pattern: "*_AtoG.fastq.gz")
+
+    input:
+        tuple val(meta), path(fastq)  // Can be single file or multiple files (R1, R2, singles)
+        val output_dir
+
+    output:
+        tuple val(meta), path("*_AtoG.fastq.gz"), emit: converted_fastq
+
+    script:
+        """
+        # Process all FASTQ files (handles both single-end and paired-end)
+        for fq in ${fastq}; do
+            base=\$(basename "\${fq}" .fastq.gz)
+            base=\$(basename "\${base}" .fq.gz)
+
+            # Decompress, convert A to G in sequence lines (every 4th line starting from line 2),
+            # then recompress
+            zcat "\${fq}" | awk 'NR%4==2 {gsub(/A/,"G"); gsub(/a/,"g")} {print}' | gzip > "\${base}_AtoG.fastq.gz"
+        done
+        """
+}
+
+/*
+ * Transform A to G bases in reference genome FASTA for hyper-editing detection
+ * Converts all adenosine (A) nucleotides to guanosine (G) in sequence lines (non-header lines)
+ */
+process transform_bases_fasta {
+    label 'bash'
+    tag "${fasta.baseName}"
+    publishDir("${output_dir}", mode:"copy", pattern: "*_AtoG.fa")
+
+    input:
+        path fasta
+        val output_dir
+
+    output:
+        path "*_AtoG.fa", emit: converted_fasta
+
+    script:
+        def is_compressed = fasta.name.endsWith('.gz')
+        def base_name = fasta.baseName.replaceAll(/\.fa(sta)?/, '')
+        def output_name = "${base_name}_AtoG.fa"
+
+        if (is_compressed) {
+            """
+            # Decompress, convert A to G in non-header lines, save uncompressed
+            zcat ${fasta} | awk '/^>/ {print; next} {gsub(/A/,"G"); gsub(/a/,"g"); print}' > ${output_name}
+            """
+        } else {
+            """
+            # Convert A to G in non-header lines (lines not starting with >)
+            awk '/^>/ {print; next} {gsub(/A/,"G"); gsub(/a/,"g"); print}' ${fasta} > ${output_name}
+            """
+        }
 }
diff --git a/modules/pluviometer.nf b/modules/pluviometer.nf
@@ -1,6 +1,6 @@
 process pluviometer {
     label "pluviometer"
-    publishDir("${params.outdir}/feature_edits", mode: "copy")
+    publishDir("${params.outdir}/pluviometer/${tool_format}", mode: "copy")
     tag "${meta.id}"
 
     input:
@@ -9,7 +9,7 @@ process pluviometer {
         val(tool_format)
 
     output:
-        tuple(val(meta), path("features.tsv"), path("aggregates.tsv"), path("pluviometer.log"), emit: tuple_sample_feature_edits)
+        tuple(val(meta), path("*features.tsv"), path("*aggregates.tsv"), path("*pluviometer.log"), emit: tuple_sample_feature_edits)
 
 
     script:
@@ -22,6 +22,7 @@ process pluviometer {
             --cov 1 \
             --edit_threshold ${params.edit_threshold} \
             --threads ${task.cpus} \
-            --aggregation_mode ${params.aggregation_mode}
+            --aggregation_mode ${params.aggregation_mode} \
+            --output ${meta.id}
         """
 }
diff --git a/modules/python.nf b/modules/python.nf
@@ -0,0 +1,27 @@
+/*
+Here are described all processes related to rust
+*/
+
+/*
+ * Restore original read sequences in BAM files from FASTQ
+ * Used after alignment with A-to-G converted sequences to restore original bases
+ */
+process restore_original_sequences {
+    label "pluviometer"
+    tag "${bam.baseName}"
+    publishDir("${output_dir}", mode:"copy", pattern: "*_restored.bam")
+
+    input:
+        tuple( val(meta), path(bam), path(bam_unmapped))
+        val output_dir
+
+    output:
+        tuple val(meta), path("*_restored.bam"), emit: restored_bam
+
+    script:
+        def output_name = "${bam.baseName}_restored.bam"
+
+        """       
+        restore_sequences.py -b ${bam} -u ${bam_unmapped} -o ${output_name}
+        """
+}
diff --git a/modules/samtools.nf b/modules/samtools.nf
@@ -57,4 +57,85 @@ process samtools_sort_bam {
                 samtools sort -@ ${task.cpus} ${bam} -o ${bam.baseName}_sorted.bam  
             fi
         """
+}
+
+/*
+ * Split BAM file into mapped and unmapped reads
+ */
+process samtools_split_mapped_unmapped {
+    label "samtools"
+    tag "${meta.id}"
+    publishDir("${params.outdir}/split_bam", mode:"copy", pattern: "*_{mapped,unmapped}.bam")
+
+    input:
+        tuple val(meta), path(bam)
+
+    output:
+        tuple val(meta), path("*_mapped.bam"), emit: mapped_bam
+        tuple val(meta), path("*_unmapped.bam"), emit: unmapped_bam
+
+    script:
+        def base = bam.baseName
+        // _seqkit suffix is added by aline when starting from fastq, when starting from BAM, there is no _seqkit suffix. 
+        // We want to keep the same base name for both cases, because it is used to recognize the sample name (sample name is what is bofore the first occurrence of _seqkit)
+        // It is needed at the step of sequence restoration, where we join the aligned BAM with the original unmapped BAM based on the sample name. (see hyper-editing.nf)
+        def suffix = base.contains('_seqkit') ? '' : '_seqkit'
+        """
+        # Extract mapped reads (SAM flag -F 4: exclude unmapped)
+        samtools view -@ ${task.cpus} -b -F 4 ${bam} > ${base}${suffix}_mapped.bam
+
+        # Extract unmapped reads (SAM flag -f 4: include only unmapped)
+        samtools view -@ ${task.cpus} -b -f 4 ${bam} > ${base}${suffix}_unmapped.bam
+        """
+}
+
+/*
+ * Convert BAM to FASTQ format
+ * Extracts reads from BAM and converts to FASTQ for re-alignment
+ */
+process convert_to_fastq {
+    label "samtools"
+    tag "${meta.id}"
+    publishDir("${output_dir}", mode:"copy", pattern: "*.fastq*")
+
+    input:
+        tuple val(meta), path(bam)
+        val output_dir
+
+    output:
+        tuple val(meta), path("*.fastq.gz"), emit: fastq_reads
+
+    script:
+        def output_base = "${bam.baseName}"
+        """
+        # Check if BAM contains paired-end or single-end reads
+        if samtools view -c -f 1 ${bam} | grep -q "^0\$"; then
+            # Single-end reads
+            samtools fastq -@ ${task.cpus} ${bam} | gzip > ${output_base}.fastq.gz
+        else
+            # Paired-end reads
+            samtools fastq -@ ${task.cpus} -1 ${output_base}_R1.fastq.gz -2 ${output_base}_R2.fastq.gz ${bam}
+        fi
+        """
+}
+
+/*
+ * Merge two BAM files
+ * Combines aligned reads from two BAM files into a single output BAM
+ */
+process samtools_merge_bams {
+    label "samtools"
+    tag "${meta.id}"
+    publishDir("${params.outdir}/merged_bam", mode:"copy")
+
+    input:
+        tuple val(meta), path(bam1), path(bam2)
+
+    output:
+        tuple val(meta), path("*_merged.bam"), emit: merged_bam
+
+    script:
+        """
+        samtools merge -@ ${task.cpus} ${meta.id}_merged.bam ${bam1} ${bam2}
+        """
 }
diff --git a/nextflow.config b/nextflow.config
@@ -60,7 +60,7 @@ profiles {
     }
     test {
         params.aligner        = "STAR" 
-        params.reads          = "${baseDir}/data/chr21/chr21_small_R1.fastq.gz"
+        params.reads          = "${baseDir}/data/chr21/chr21_small_R1.fastq.gz,${baseDir}/data/chr21/chr21_small_R2.fastq.gz"
         params.genome         = "${baseDir}/data/chr21/chr21_small.fasta.gz"
         params.annotation     = "${baseDir}/data/chr21/chr21_small_filtered.gff3.gz"
         params.strandedness   = "ISR"