From 71fb269c29a910d58a6065697b67938dfb7bdbcd Mon Sep 17 00:00:00 2001
From: earx <eascarrunz.dev@mailfence.com>
Date: Tue, 24 Jun 2025 10:59:20 +0200
Subject: [PATCH 1/5] fix strandedness input

---
 modules/jacusa2.nf | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/modules/jacusa2.nf b/modules/jacusa2.nf
index 60f6a87..ec658c9 100644
--- a/modules/jacusa2.nf
+++ b/modules/jacusa2.nf
@@ -25,9 +25,7 @@ process jacusa2 {
             // Unstranded
             jacusa_strand_param = "UNSTRANDED"
         } else {
-            // Unsupported: Pass the library type string so that it's reported in
-            // the Jacusa2 error message
-            jacusa_strand_param = meta.libtype
+            jacusa_strand_param = "UNSTRANDED"
         }
         """
         java -Xmx${task.memory.toMega()}M -jar /usr/local/bin/JACUSA_v2.0.4.jar \

From 8bfbf8a280dd8a40f80ddc93d4453e030f9ddb73 Mon Sep 17 00:00:00 2001
From: earx <eascarrunz.dev@mailfence.com>
Date: Tue, 24 Jun 2025 11:17:45 +0200
Subject: [PATCH 2/5] set output format to extended BED6 and add comments
 explaining other options

---
 modules/jacusa2.nf | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/modules/jacusa2.nf b/modules/jacusa2.nf
index ec658c9..58c1e05 100644
--- a/modules/jacusa2.nf
+++ b/modules/jacusa2.nf
@@ -28,10 +28,16 @@ process jacusa2 {
             jacusa_strand_param = "UNSTRANDED"
         }
         """
+        # -A    Show all sites, inc. sites without variants
+        # -f    Use BED-6 extended output format
+        # -p    Number of threads
+        # -r    Path of output file
+        # -c    Minimum coverage: 1 read
+        # -s    Store feature-filtered results in another file
         java -Xmx${task.memory.toMega()}M -jar /usr/local/bin/JACUSA_v2.0.4.jar \
             call-1 \
             -A \
-            -f V \
+            -f B \
             -p ${task.cpus} \
             -r ${base_name}.site_edits_jacusa2.txt \
             -c 1 \

From e72470906c4ce8e1bb258c63a1f07f5c98bde528 Mon Sep 17 00:00:00 2001
From: earx <eascarrunz.dev@mailfence.com>
Date: Tue, 24 Jun 2025 11:24:07 +0200
Subject: [PATCH 3/5] Add usage note to Dockerfile

---
 docker/jacusa2/Dockerfile | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docker/jacusa2/Dockerfile b/docker/jacusa2/Dockerfile
index 208e659..5d431f9 100644
--- a/docker/jacusa2/Dockerfile
+++ b/docker/jacusa2/Dockerfile
@@ -7,3 +7,6 @@ RUN wget https://github.com/dieterich-lab/JACUSA2/releases/download/v2.0.4/JACUS
 RUN echo "#!/usr/bin/env bash" > /usr/local/bin/jacusa2
 RUN echo "java -jar /usr/local/bin/JACUSA_v2.0.4.jar $@" >> /usr/local/bin/jacusa2
 RUN chmod +x /usr/local/bin/jacusa2
+
+# ⚠️ IMPORTANT
+# Run the Docker image with the option `--ulimit nofile=1024:1024`

From d40714efa5e1c8c2c658c2df704a37e43d28496e Mon Sep 17 00:00:00 2001
From: earx <eascarrunz.dev@mailfence.com>
Date: Tue, 24 Jun 2025 12:14:02 +0200
Subject: [PATCH 4/5] add Jacusa2 reader. new field `score` in SiteVariantData

---
 bin/stats/site_variant_readers.py | 62 +++++++++++++++++++++++++++++--
 bin/stats/utils.py                |  1 +
 2 files changed, 60 insertions(+), 3 deletions(-)

diff --git a/bin/stats/site_variant_readers.py b/bin/stats/site_variant_readers.py
index 896c2a6..47e95cd 100644
--- a/bin/stats/site_variant_readers.py
+++ b/bin/stats/site_variant_readers.py
@@ -7,6 +7,22 @@
 REDITOOLS_FIELDS = ["Seqid", "Position", "Reference", "Strand", "Coverage", "MeanQ", "Frequencies"]
 REDITOOLS_FIELD_INDEX = {field: i for i, field in  enumerate(REDITOOLS_FIELDS)}
 
+# Jacusa output file fields in the extended BED6 format
+JACUSA_FIELDS = ["contig", "start", "end", "name", "score", "strand", "bases11", "info", "filter", "ref"]
+JACUSA_FIELDS_INDEX = {field: i for i, field in enumerate(JACUSA_FIELDS)}
+
+def skip_comments(handle: TextIO, s: str) -> Optional[str]:
+    """
+    Read and return the next line in a text file handle that does not start with the comment prefix `s`.
+    """
+    line: str = handle.readline()
+    
+    while line.startswith(s):
+        line = handle.readline()
+
+    return line
+
+
 class RNAVariantReader(ABC):
     """Abstract class defining the API for readers"""
 
@@ -48,7 +64,8 @@ def read(self) -> Optional[SiteVariantData]:
             strand=self.strand,
             coverage=1,
             mean_quality=30.0,
-            frequencies=self.frequencies
+            frequencies=self.frequencies,
+            score=0.0
         )
         self.position += 1
 
@@ -128,7 +145,8 @@ def _parse_parts(self) -> SiteVariantData:
             strand=strand,
             coverage=int(self.parts[REDITOOLS_FIELD_INDEX["Coverage"]]),
             mean_quality=float(self.parts[REDITOOLS_FIELD_INDEX["MeanQ"]]),
-            frequencies=np.int32(self.parts[REDITOOLS_FIELD_INDEX["Frequencies"]][1:-1].split(",") + [0])
+            frequencies=np.int32(self.parts[REDITOOLS_FIELD_INDEX["Frequencies"]][1:-1].split(",") + [0]),
+            score=0.0
         )
 
     def read(self) -> Optional[SiteVariantData]:
@@ -147,7 +165,6 @@ def close(self) -> None:
         """Close the file"""
         self.file_handle.close()
     
-
 class Reditools2Reader(ReditoolsXReader):
     def parse_strand(self) -> int:
         strand = int(self.parts[REDITOOLS_FIELD_INDEX["Strand"]])
@@ -174,3 +191,42 @@ def parse_strand(self) -> int:
             case _:
                 raise Exception(f"Invalid strand value: {strand_str}")
 
+class Jacusa2Reader():
+    def __init__(self, file_handle: TextIO) -> None:
+        self.file_handle: TextIO = file_handle
+
+        line = skip_comments(self.file_handle, "##")
+
+        # Check the Jacusa header
+        assert line.lstrip('#').split('\t') == JACUSA_FIELDS
+        
+        return None
+    
+    def read(self):
+        line: str = self.file_handle.readline()
+        parts: list[str] = line.split('\t')
+
+        reference_nuc_str: str = parts[JACUSA_FIELDS_INDEX["ref"]]
+
+        strand_str: str = parts[JACUSA_FIELDS_INDEX["strand"]]
+
+        match strand_str:
+            case '.':
+                strand = 0
+            case '+':
+                strand = 1
+            case '-':
+                strand -1
+
+        frequencies=np.int32(parts[JACUSA_FIELDS_INDEX["bases11"]].split(',') + [0])
+        
+        return SiteVariantData(
+            seqid=parts[JACUSA_FIELDS_INDEX["contig"]],
+            position=int(parts[JACUSA_FIELDS_INDEX["start"]]),
+            reference=NUC_STR_TO_IND[reference_nuc_str],
+            strand=strand,
+            coverage=sum(frequencies),
+            mean_quality=float("nan"),
+            frequencies=frequencies,
+            score=float(parts[JACUSA_FIELDS_INDEX["score"]])
+        )
diff --git a/bin/stats/utils.py b/bin/stats/utils.py
index 33d7012..5b6cc75 100644
--- a/bin/stats/utils.py
+++ b/bin/stats/utils.py
@@ -49,3 +49,4 @@ class SiteVariantData:
     coverage: int
     mean_quality: float
     frequencies: NDArray[np.int32]
+    score: float

From 885e4a92b4ef3852ad7a136098d4b45cf4fe9272 Mon Sep 17 00:00:00 2001
From: earx <eascarrunz.dev@mailfence.com>
Date: Tue, 24 Jun 2025 13:35:24 +0200
Subject: [PATCH 5/5] make pluviometer take jacusa2 input

---
 bin/stats/pluviometer.py          |  5 ++++-
 bin/stats/site_variant_readers.py | 12 ++++++++----
 modules/jacusa2.nf                |  6 +++---
 rain.nf                           |  2 ++
 4 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/bin/stats/pluviometer.py b/bin/stats/pluviometer.py
index 3f725e1..976472a 100644
--- a/bin/stats/pluviometer.py
+++ b/bin/stats/pluviometer.py
@@ -13,7 +13,7 @@
 import numpy as np
 from numpy.typing import NDArray
 from typing import TextIO, NamedTuple, Optional, Generator
-from site_variant_readers import RNAVariantReader, Reditools2Reader, Reditools3Reader
+from site_variant_readers import RNAVariantReader, Reditools2Reader, Reditools3Reader, Jacusa2Reader
 import argparse
 from contextlib import nullcontext
 import sys
@@ -520,6 +520,9 @@ def scan_and_count(self, reader: RNAVariantReader) -> None:
             case "reditools3":
                 print("Reditools3 format\n")
                 sv_reader: RNAVariantReader = Reditools3Reader(sv_handle)
+            case "jacusa2":
+                print("Jacusa2 BED6 extended format")
+                sv_reader: RNAVariantReader = Jacusa2Reader(sv_handle)
             case _:
                 raise Exception(f'Unimplemented format "{args.format}"')
 
diff --git a/bin/stats/site_variant_readers.py b/bin/stats/site_variant_readers.py
index 47e95cd..f352a59 100644
--- a/bin/stats/site_variant_readers.py
+++ b/bin/stats/site_variant_readers.py
@@ -198,12 +198,16 @@ def __init__(self, file_handle: TextIO) -> None:
         line = skip_comments(self.file_handle, "##")
 
         # Check the Jacusa header
-        assert line.lstrip('#').split('\t') == JACUSA_FIELDS
+        assert line.strip().lstrip('#').split('\t') == JACUSA_FIELDS
         
         return None
     
-    def read(self):
-        line: str = self.file_handle.readline()
+    def read(self) -> Optional[SiteVariantData]:
+        line: str = self.file_handle.readline().strip()
+
+        if line == "":
+            return None
+
         parts: list[str] = line.split('\t')
 
         reference_nuc_str: str = parts[JACUSA_FIELDS_INDEX["ref"]]
@@ -222,7 +226,7 @@ def read(self):
         
         return SiteVariantData(
             seqid=parts[JACUSA_FIELDS_INDEX["contig"]],
-            position=int(parts[JACUSA_FIELDS_INDEX["start"]]),
+            position=int(parts[JACUSA_FIELDS_INDEX["start"]]),  # Jacusa2 position is 0-based
             reference=NUC_STR_TO_IND[reference_nuc_str],
             strand=strand,
             coverage=sum(frequencies),
diff --git a/modules/jacusa2.nf b/modules/jacusa2.nf
index 58c1e05..20332a6 100644
--- a/modules/jacusa2.nf
+++ b/modules/jacusa2.nf
@@ -15,13 +15,13 @@ process jacusa2 {
         base_name = bam.BaseName
         // Set the strand orientation parameter from the library type parameter
         // Terms explained in https://salmon.readthedocs.io/en/latest/library_type.html
-        if (meta.libtype in ["ISR", "SR"]) {
+        if (meta.strandedness in ["ISR", "SR"]) {
             // First-strand oriented
             jacusa_strand_param = "FR_SECONDSTRAND"
-        } else if (meta.libtype in ["ISF", "SF"]) {
+        } else if (meta.strandedness in ["ISF", "SF"]) {
             // Second-strand oriented
             jacusa_strand_param = "RF_FIRSTSTRAND"
-        } else if (meta.libtype in ["IU", "U"]) {
+        } else if (meta.strandedness in ["IU", "U"]) {
             // Unstranded
             jacusa_strand_param = "UNSTRANDED"
         } else {
diff --git a/rain.nf b/rain.nf
index 278b928..b55788f 100644
--- a/rain.nf
+++ b/rain.nf
@@ -591,6 +591,8 @@ workflow {
                 // Create a fasta index file of the reference genome
                 samtools_fasta_index(genome.collect())
                 jacusa2(samtools_index.out.tuple_sample_bam_bamindex, samtools_fasta_index.out.tuple_fasta_fastaindex.collect())
+                normalize_gxf(annotation.collect())
+                pluviometer(jacusa2.out.tuple_sample_jacusa2_table, normalize_gxf.out.gff.collect(), "jacusa2")
                 break
             case "sapin":
                 sapin(tuple_sample_bam_processed, genome.collect())