Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ result/*
**/__pycache__/
.DS_Store
sif_images/*
rain_result/*
23 changes: 17 additions & 6 deletions bin/stats/pluviometer.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,13 @@ def parse_cli_input() -> argparse.Namespace:
def write_output_file_header(handle: TextIO) -> int:
return handle.write(
"SeqID\tGrandParentID\tParentID\tFeatureID\tType\tStart\tEnd\tStrand\tCoveredSites"
+ "\tSiteFreqs["
+ "\tGenomeBases["
+ ",".join(BASE_TYPES)
+ "]"
+ "\tSiteBasePairs["
+ ",".join(MATCH_MISMATCH_TYPES)
+ "]"
+ "\tReadFreqs["
+ "\tReadBasePairs["
+ ",".join(MATCH_MISMATCH_TYPES)
+ "]"
+ "\n"
Expand All @@ -102,7 +105,7 @@ class SiteFilter:
def __init__(self, cov_threshold: int, edit_threshold: int) -> None:
self.cov_threshold: int = cov_threshold
self.edit_threshold: int = edit_threshold
self.frequencies: NDArray[np.int32] = np.zeros(4, np.int32)
self.frequencies: NDArray[np.int32] = np.zeros(5, np.int32)

def apply(self, variant_data: SiteVariantData) -> None:
if variant_data.coverage >= self.cov_threshold:
Expand All @@ -127,9 +130,11 @@ def __init__(self, site_filter: SiteFilter) -> None:
Rows and column indices correspond to bases in alphabetic order (ACGT)
Row-columns corresponding to the same base (e.g. (0,0) -> (A,A)) do not represent edits, and should remain 0
"""
self.edit_read_freqs: NDArray[np.int32] = np.zeros((4, 4), dtype=np.int32)
self.edit_read_freqs: NDArray[np.int32] = np.zeros((5, 5), dtype=np.int32)

self.edit_site_freqs: NDArray[np.int32] = np.zeros((5, 5), dtype=np.int32)

self.edit_site_freqs: NDArray[np.int32] = np.zeros((4, 4), dtype=np.int32)
self.genome_base_freqs: NDArray[np.int32] = np.zeros(5, dtype=np.int32)

self.filter = site_filter

Expand All @@ -144,13 +149,19 @@ def update(self, variant_data: SiteVariantData) -> None:
self.filter.apply(variant_data)
self.edit_site_freqs[i, :] += self.filter.frequencies

self.genome_base_freqs[i] += 1

return None

def report(self, output_handle: TextIO) -> int:
b = 0

# Write the number of covered sites
b += output_handle.write(str(self.edit_site_freqs.sum()))
b += output_handle.write(str(self.genome_base_freqs.sum()))
b += output_handle.write("\t")

# Write the base frequencies in the genome
b += write_base_array(output_handle, self.genome_base_freqs)
b += output_handle.write("\t")

# Write edited sites
Expand Down
4 changes: 2 additions & 2 deletions bin/stats/site_variant_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def __init__(self, strand: int, edit: str) -> None:
assert edit in EDIT_TYPES + NONEDIT_TYPES
self.reference: int = NUC_STR_TO_IND[edit[0]]
self.edited: str = edit[1]
self.frequencies: NDArray[np.int32] = np.zeros(4, dtype=np.int32)
self.frequencies: NDArray[np.int32] = np.zeros(5, dtype=np.int32)
self.frequencies[NUC_STR_TO_IND[self.edited]] = 1

return None
Expand Down Expand Up @@ -128,7 +128,7 @@ def _parse_parts(self) -> SiteVariantData:
strand=strand,
coverage=int(self.parts[REDITOOLS_FIELD_INDEX["Coverage"]]),
mean_quality=float(self.parts[REDITOOLS_FIELD_INDEX["MeanQ"]]),
frequencies=np.int32(self.parts[REDITOOLS_FIELD_INDEX["Frequencies"]][1:-1].split(","))
frequencies=np.int32(self.parts[REDITOOLS_FIELD_INDEX["Frequencies"]][1:-1].split(",") + [0])
)

def read(self) -> Optional[SiteVariantData]:
Expand Down
12 changes: 12 additions & 0 deletions bin/stats/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from numpy.typing import NDArray
from dataclasses import dataclass
import itertools
from itertools import permutations

BASE_TYPES = ["A", "C", "G", "T"]

Expand Down Expand Up @@ -37,6 +38,17 @@ def argmax(collection) -> int:
"""
return max(range(len(collection)), key=lambda i: collection[i])

nucs = ('A', 'C', 'G', 'T')

def make_variant_header_dict():
nuc_pairs = permutations(nucs, 2)
keys = ((x, NUC_STR_TO_IND[y]) for x, y in nuc_pairs)
return {k:v for v, k in enumerate(keys)}

header_dict = make_variant_header_dict()

for item in header_dict.items():
print(item)

@dataclass(frozen=True, slots=True)
class SiteVariantData:
Expand Down
9 changes: 5 additions & 4 deletions modules/reditools2.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,20 @@ process reditools2 {

// Set the strand orientation parameter from the library type parameter
// Terms explained in https://salmon.readthedocs.io/en/latest/library_type.html
if (meta.libtype in ["ISR", "SR"]) {
if (meta.strandedness in ["ISR", "SR"]) {
// First-strand oriented
strand_orientation = "2"
} else if (meta.libtype in ["ISF", "SF"]) {
} else if (meta.strandedness in ["ISF", "SF"]) {
// Second-strand oriented
strand_orientation = "1"
} else if (meta.libtype in ["IU", "U"]) {
} else if (meta.strandedness in ["IU", "U"]) {
// Unstranded
strand_orientation = "0"
} else {
// Unsupported: Pass the library type string so that it's reported in
// the reditools error message
strand_orientation = meta.libtype
print("invalid strand \"${meta.strandedness}\"")
strand_orientation = meta.strandedness
}
base_name = bam.BaseName

Expand Down
3 changes: 2 additions & 1 deletion modules/reditools3.nf
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ process reditools3 {
} else {
// Unsupported: Pass the library type string so that it's reported in
// the reditools error message
strand_orientation = "0"
print("invalid strand ${meta.strandedness}")
strand_orientation = meta.strandedness
}
base_name = bam.BaseName

Expand Down
6 changes: 3 additions & 3 deletions rain.nf
Original file line number Diff line number Diff line change
Expand Up @@ -574,12 +574,12 @@ workflow {
if (params.clipoverlap) {
bamutil_clipoverlap(gatk_markduplicates.out.tuple_sample_dedupbam)
tuple_sample_bam_processed = bamutil_clipoverlap.out.tuple_sample_clipoverbam
// stat on bam with overlap clipped
fastqc_clip(tuple_sample_bam_processed, "clip")
logs.concat(fastqc_clip.out).set{logs} // save log
} else {
tuple_sample_bam_processed = gatk_markduplicates.out.tuple_sample_dedupbam
}
// stat on bam with overlap clipped
fastqc_clip(tuple_sample_bam_processed, "clip")
logs.concat(fastqc_clip.out).set{logs} // save log
// index bam
samtools_index(tuple_sample_bam_processed)
// report with multiqc
Expand Down