Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion bin/stats/pluviometer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import numpy as np
from numpy.typing import NDArray
from typing import TextIO, NamedTuple, Optional, Generator
from site_variant_readers import RNAVariantReader, Reditools2Reader, Reditools3Reader
from site_variant_readers import RNAVariantReader, Reditools2Reader, Reditools3Reader, Jacusa2Reader
import argparse
from contextlib import nullcontext
import sys
Expand Down Expand Up @@ -520,6 +520,9 @@ def scan_and_count(self, reader: RNAVariantReader) -> None:
case "reditools3":
print("Reditools3 format\n")
sv_reader: RNAVariantReader = Reditools3Reader(sv_handle)
case "jacusa2":
print("Jacusa2 BED6 extended format")
sv_reader: RNAVariantReader = Jacusa2Reader(sv_handle)
case _:
raise Exception(f'Unimplemented format "{args.format}"')

Expand Down
66 changes: 63 additions & 3 deletions bin/stats/site_variant_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,22 @@
REDITOOLS_FIELDS = ["Seqid", "Position", "Reference", "Strand", "Coverage", "MeanQ", "Frequencies"]
REDITOOLS_FIELD_INDEX = {field: i for i, field in enumerate(REDITOOLS_FIELDS)}

# Jacusa output file fields in the extended BED6 format
JACUSA_FIELDS = ["contig", "start", "end", "name", "score", "strand", "bases11", "info", "filter", "ref"]
JACUSA_FIELDS_INDEX = {field: i for i, field in enumerate(JACUSA_FIELDS)}

def skip_comments(handle: TextIO, s: str) -> Optional[str]:
"""
Read and return the next line in a text file handle that does not start with the comment prefix `s`.
"""
line: str = handle.readline()

while line.startswith(s):
line = handle.readline()

return line


class RNAVariantReader(ABC):
"""Abstract class defining the API for readers"""

Expand Down Expand Up @@ -48,7 +64,8 @@ def read(self) -> Optional[SiteVariantData]:
strand=self.strand,
coverage=1,
mean_quality=30.0,
frequencies=self.frequencies
frequencies=self.frequencies,
score=0.0
)
self.position += 1

Expand Down Expand Up @@ -128,7 +145,8 @@ def _parse_parts(self) -> SiteVariantData:
strand=strand,
coverage=int(self.parts[REDITOOLS_FIELD_INDEX["Coverage"]]),
mean_quality=float(self.parts[REDITOOLS_FIELD_INDEX["MeanQ"]]),
frequencies=np.int32(self.parts[REDITOOLS_FIELD_INDEX["Frequencies"]][1:-1].split(",") + [0])
frequencies=np.int32(self.parts[REDITOOLS_FIELD_INDEX["Frequencies"]][1:-1].split(",") + [0]),
score=0.0
)

def read(self) -> Optional[SiteVariantData]:
Expand All @@ -147,7 +165,6 @@ def close(self) -> None:
"""Close the file"""
self.file_handle.close()


class Reditools2Reader(ReditoolsXReader):
def parse_strand(self) -> int:
strand = int(self.parts[REDITOOLS_FIELD_INDEX["Strand"]])
Expand All @@ -174,3 +191,46 @@ def parse_strand(self) -> int:
case _:
raise Exception(f"Invalid strand value: {strand_str}")

class Jacusa2Reader():
def __init__(self, file_handle: TextIO) -> None:
self.file_handle: TextIO = file_handle

line = skip_comments(self.file_handle, "##")

# Check the Jacusa header
assert line.strip().lstrip('#').split('\t') == JACUSA_FIELDS

return None

def read(self) -> Optional[SiteVariantData]:
line: str = self.file_handle.readline().strip()

if line == "":
return None

parts: list[str] = line.split('\t')

reference_nuc_str: str = parts[JACUSA_FIELDS_INDEX["ref"]]

strand_str: str = parts[JACUSA_FIELDS_INDEX["strand"]]

match strand_str:
case '.':
strand = 0
case '+':
strand = 1
case '-':
strand -1

frequencies=np.int32(parts[JACUSA_FIELDS_INDEX["bases11"]].split(',') + [0])

return SiteVariantData(
seqid=parts[JACUSA_FIELDS_INDEX["contig"]],
position=int(parts[JACUSA_FIELDS_INDEX["start"]]), # Jacusa2 position is 0-based
reference=NUC_STR_TO_IND[reference_nuc_str],
strand=strand,
coverage=sum(frequencies),
mean_quality=float("nan"),
frequencies=frequencies,
score=float(parts[JACUSA_FIELDS_INDEX["score"]])
)
1 change: 1 addition & 0 deletions bin/stats/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,4 @@ class SiteVariantData:
coverage: int
mean_quality: float
frequencies: NDArray[np.int32]
score: float
3 changes: 3 additions & 0 deletions docker/jacusa2/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@ RUN wget https://github.com/dieterich-lab/JACUSA2/releases/download/v2.0.4/JACUS
RUN echo "#!/usr/bin/env bash" > /usr/local/bin/jacusa2
RUN echo "java -jar /usr/local/bin/JACUSA_v2.0.4.jar $@" >> /usr/local/bin/jacusa2
RUN chmod +x /usr/local/bin/jacusa2

# ⚠️ IMPORTANT
# Run the Docker image with the option `--ulimit nofile=1024:1024`
18 changes: 11 additions & 7 deletions modules/jacusa2.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,29 @@ process jacusa2 {
base_name = bam.BaseName
// Set the strand orientation parameter from the library type parameter
// Terms explained in https://salmon.readthedocs.io/en/latest/library_type.html
if (meta.libtype in ["ISR", "SR"]) {
if (meta.strandedness in ["ISR", "SR"]) {
// First-strand oriented
jacusa_strand_param = "FR_SECONDSTRAND"
} else if (meta.libtype in ["ISF", "SF"]) {
} else if (meta.strandedness in ["ISF", "SF"]) {
// Second-strand oriented
jacusa_strand_param = "RF_FIRSTSTRAND"
} else if (meta.libtype in ["IU", "U"]) {
} else if (meta.strandedness in ["IU", "U"]) {
// Unstranded
jacusa_strand_param = "UNSTRANDED"
} else {
// Unsupported: Pass the library type string so that it's reported in
// the Jacusa2 error message
jacusa_strand_param = meta.libtype
jacusa_strand_param = "UNSTRANDED"
}
"""
# -A Show all sites, inc. sites without variants
# -f Use BED-6 extended output format
# -p Number of threads
# -r Path of output file
# -c Minimum coverage: 1 read
# -s Store feature-filtered results in another file
java -Xmx${task.memory.toMega()}M -jar /usr/local/bin/JACUSA_v2.0.4.jar \
call-1 \
-A \
-f V \
-f B \
-p ${task.cpus} \
-r ${base_name}.site_edits_jacusa2.txt \
-c 1 \
Expand Down
2 changes: 2 additions & 0 deletions rain.nf
Original file line number Diff line number Diff line change
Expand Up @@ -591,6 +591,8 @@ workflow {
// Create a fasta index file of the reference genome
samtools_fasta_index(genome.collect())
jacusa2(samtools_index.out.tuple_sample_bam_bamindex, samtools_fasta_index.out.tuple_fasta_fastaindex.collect())
normalize_gxf(annotation.collect())
pluviometer(jacusa2.out.tuple_sample_jacusa2_table, normalize_gxf.out.gff.collect(), "jacusa2")
break
case "sapin":
sapin(tuple_sample_bam_processed, genome.collect())
Expand Down