Skip to content

Commit cf9b422

Browse files
authored
Merge pull request #35 from Juke34/reditools2
Reditools2
2 parents a7d740c + 86a2657 commit cf9b422

File tree

7 files changed

+42
-16
lines changed

7 files changed

+42
-16
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ result/*
66
**/__pycache__/
77
.DS_Store
88
sif_images/*
9+
rain_result/*

bin/stats/pluviometer.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,13 @@ def parse_cli_input() -> argparse.Namespace:
8888
def write_output_file_header(handle: TextIO) -> int:
8989
return handle.write(
9090
"SeqID\tGrandParentID\tParentID\tFeatureID\tType\tStart\tEnd\tStrand\tCoveredSites"
91-
+ "\tSiteFreqs["
91+
+ "\tGenomeBases["
92+
+ ",".join(BASE_TYPES)
93+
+ "]"
94+
+ "\tSiteBasePairs["
9295
+ ",".join(MATCH_MISMATCH_TYPES)
9396
+ "]"
94-
+ "\tReadFreqs["
97+
+ "\tReadBasePairs["
9598
+ ",".join(MATCH_MISMATCH_TYPES)
9699
+ "]"
97100
+ "\n"
@@ -102,7 +105,7 @@ class SiteFilter:
102105
def __init__(self, cov_threshold: int, edit_threshold: int) -> None:
103106
self.cov_threshold: int = cov_threshold
104107
self.edit_threshold: int = edit_threshold
105-
self.frequencies: NDArray[np.int32] = np.zeros(4, np.int32)
108+
self.frequencies: NDArray[np.int32] = np.zeros(5, np.int32)
106109

107110
def apply(self, variant_data: SiteVariantData) -> None:
108111
if variant_data.coverage >= self.cov_threshold:
@@ -127,9 +130,11 @@ def __init__(self, site_filter: SiteFilter) -> None:
127130
Rows and column indices correspond to bases in alphabetic order (ACGT)
128131
Row-columns corresponding to the same base (e.g. (0,0) -> (A,A)) do not represent edits, and should remain 0
129132
"""
130-
self.edit_read_freqs: NDArray[np.int32] = np.zeros((4, 4), dtype=np.int32)
133+
self.edit_read_freqs: NDArray[np.int32] = np.zeros((5, 5), dtype=np.int32)
134+
135+
self.edit_site_freqs: NDArray[np.int32] = np.zeros((5, 5), dtype=np.int32)
131136

132-
self.edit_site_freqs: NDArray[np.int32] = np.zeros((4, 4), dtype=np.int32)
137+
self.genome_base_freqs: NDArray[np.int32] = np.zeros(5, dtype=np.int32)
133138

134139
self.filter = site_filter
135140

@@ -144,13 +149,19 @@ def update(self, variant_data: SiteVariantData) -> None:
144149
self.filter.apply(variant_data)
145150
self.edit_site_freqs[i, :] += self.filter.frequencies
146151

152+
self.genome_base_freqs[i] += 1
153+
147154
return None
148155

149156
def report(self, output_handle: TextIO) -> int:
150157
b = 0
151158

152159
# Write the number of covered sites
153-
b += output_handle.write(str(self.edit_site_freqs.sum()))
160+
b += output_handle.write(str(self.genome_base_freqs.sum()))
161+
b += output_handle.write("\t")
162+
163+
# Write the base frequencies in the genome
164+
b += write_base_array(output_handle, self.genome_base_freqs)
154165
b += output_handle.write("\t")
155166

156167
# Write edited sites

bin/stats/site_variant_readers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def __init__(self, strand: int, edit: str) -> None:
3535
assert edit in EDIT_TYPES + NONEDIT_TYPES
3636
self.reference: int = NUC_STR_TO_IND[edit[0]]
3737
self.edited: str = edit[1]
38-
self.frequencies: NDArray[np.int32] = np.zeros(4, dtype=np.int32)
38+
self.frequencies: NDArray[np.int32] = np.zeros(5, dtype=np.int32)
3939
self.frequencies[NUC_STR_TO_IND[self.edited]] = 1
4040

4141
return None
@@ -128,7 +128,7 @@ def _parse_parts(self) -> SiteVariantData:
128128
strand=strand,
129129
coverage=int(self.parts[REDITOOLS_FIELD_INDEX["Coverage"]]),
130130
mean_quality=float(self.parts[REDITOOLS_FIELD_INDEX["MeanQ"]]),
131-
frequencies=np.int32(self.parts[REDITOOLS_FIELD_INDEX["Frequencies"]][1:-1].split(","))
131+
frequencies=np.int32(self.parts[REDITOOLS_FIELD_INDEX["Frequencies"]][1:-1].split(",") + [0])
132132
)
133133

134134
def read(self) -> Optional[SiteVariantData]:

bin/stats/utils.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from numpy.typing import NDArray
33
from dataclasses import dataclass
44
import itertools
5+
from itertools import permutations
56

67
BASE_TYPES = ["A", "C", "G", "T"]
78

@@ -37,6 +38,17 @@ def argmax(collection) -> int:
3738
"""
3839
return max(range(len(collection)), key=lambda i: collection[i])
3940

41+
nucs = ('A', 'C', 'G', 'T')
42+
43+
def make_variant_header_dict():
44+
nuc_pairs = permutations(nucs, 2)
45+
keys = ((x, NUC_STR_TO_IND[y]) for x, y in nuc_pairs)
46+
return {k:v for v, k in enumerate(keys)}
47+
48+
header_dict = make_variant_header_dict()
49+
50+
for item in header_dict.items():
51+
print(item)
4052

4153
@dataclass(frozen=True, slots=True)
4254
class SiteVariantData:

modules/reditools2.nf

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,20 @@ process reditools2 {
1515

1616
// Set the strand orientation parameter from the library type parameter
1717
// Terms explained in https://salmon.readthedocs.io/en/latest/library_type.html
18-
if (meta.libtype in ["ISR", "SR"]) {
18+
if (meta.strandedness in ["ISR", "SR"]) {
1919
// First-strand oriented
2020
strand_orientation = "2"
21-
} else if (meta.libtype in ["ISF", "SF"]) {
21+
} else if (meta.strandedness in ["ISF", "SF"]) {
2222
// Second-strand oriented
2323
strand_orientation = "1"
24-
} else if (meta.libtype in ["IU", "U"]) {
24+
} else if (meta.strandedness in ["IU", "U"]) {
2525
// Unstranded
2626
strand_orientation = "0"
2727
} else {
2828
// Unsupported: Pass the library type string so that it's reported in
2929
// the reditools error message
30-
strand_orientation = meta.libtype
30+
print("invalid strand \"${meta.strandedness}\"")
31+
strand_orientation = meta.strandedness
3132
}
3233
base_name = bam.BaseName
3334

modules/reditools3.nf

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ process reditools3 {
2626
} else {
2727
// Unsupported: Pass the library type string so that it's reported in
2828
// the reditools error message
29-
strand_orientation = "0"
29+
print("invalid strand ${meta.strandedness}")
30+
strand_orientation = meta.strandedness
3031
}
3132
base_name = bam.BaseName
3233

rain.nf

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -574,12 +574,12 @@ workflow {
574574
if (params.clipoverlap) {
575575
bamutil_clipoverlap(gatk_markduplicates.out.tuple_sample_dedupbam)
576576
tuple_sample_bam_processed = bamutil_clipoverlap.out.tuple_sample_clipoverbam
577+
// stat on bam with overlap clipped
578+
fastqc_clip(tuple_sample_bam_processed, "clip")
579+
logs.concat(fastqc_clip.out).set{logs} // save log
577580
} else {
578581
tuple_sample_bam_processed = gatk_markduplicates.out.tuple_sample_dedupbam
579582
}
580-
// stat on bam with overlap clipped
581-
fastqc_clip(tuple_sample_bam_processed, "clip")
582-
logs.concat(fastqc_clip.out).set{logs} // save log
583583
// index bam
584584
samtools_index(tuple_sample_bam_processed)
585585
// report with multiqc

0 commit comments

Comments
 (0)