Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion bin/stats/pluviometer.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,6 @@ def __init__(self, site_filter: SiteFilter) -> None:
Row-columns corresponding to the same base (e.g. (0,0) -> (A,A)) do not represent edits, and should remain 0
"""
self.edit_read_freqs: NDArray[np.int32] = np.zeros((5, 5), dtype=np.int32)

self.edit_site_freqs: NDArray[np.int32] = np.zeros((5, 5), dtype=np.int32)

self.genome_base_freqs: NDArray[np.int32] = np.zeros(5, dtype=np.int32)
Expand Down
6 changes: 3 additions & 3 deletions bin/stats/site_variant_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ def __init__(self, strand: int, edit: str) -> None:
self.position: int = 0
self.strand:int = strand
assert edit in EDIT_TYPES + NONEDIT_TYPES
self.reference: int = NUC_STR_TO_IND[edit[0]]
self.reference: int = NUC_STR_TO_IND.get(edit[0], 4)
self.edited: str = edit[1]
self.frequencies: NDArray[np.int32] = np.zeros(5, dtype=np.int32)
self.frequencies[NUC_STR_TO_IND[self.edited]] = 1
self.frequencies[NUC_STR_TO_IND.get(self.edited, 4)] = 1

return None

Expand Down Expand Up @@ -124,7 +124,7 @@ def _parse_parts(self) -> SiteVariantData:
return SiteVariantData(
seqid=self.parts[REDITOOLS_FIELD_INDEX["Seqid"]],
position=int(self.parts[REDITOOLS_FIELD_INDEX["Position"]]) - 1, # Convert Reditools 1-based index to Python's 0-based index
reference=NUC_STR_TO_IND[reference_nuc_str],
reference=NUC_STR_TO_IND.get(reference_nuc_str, 4),
strand=strand,
coverage=int(self.parts[REDITOOLS_FIELD_INDEX["Coverage"]]),
mean_quality=float(self.parts[REDITOOLS_FIELD_INDEX["MeanQ"]]),
Expand Down
21 changes: 20 additions & 1 deletion bin/stats/test_pluviometer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from Bio.SeqRecord import SeqRecord
from site_variant_readers import TestReader
from pluviometer import RecordManager
from pluviometer import SiteFilter
from pluviometer import write_output_file_header
from contextlib import nullcontext
from typing import Generator
from utils import SiteVariantData
Expand Down Expand Up @@ -32,6 +34,14 @@ def parse_cli_input() -> argparse.Namespace:
type=str,
help="Name of the output file (leave empty to write to stdout)",
)
parser.add_argument(
"--aggregation_mode",
"-a",
type=str,
default="all",
choices=["all", "cds_longest"],
help='Mode for aggregating counts: "all" aggregates features of every transcript; "cds_longest" aggregates features of the longest CDS or non-coding transcript',
)

return parser.parse_args()

Expand All @@ -41,6 +51,10 @@ def parse_cli_input() -> argparse.Namespace:
with (open(args.gff) as gff_handle,
open(args.output, "w") if len(args.output) > 0 else nullcontext(sys.stdout) as output_handle):
records: Generator[SeqRecord, None, None] = GFF.parse(gff_handle)

global_filter: SiteFilter = SiteFilter(cov_threshold=1, edit_threshold=1)

write_output_file_header(output_handle)

for record in records:
sv_reader = TestReader(
Expand All @@ -49,5 +63,10 @@ def parse_cli_input() -> argparse.Namespace:
)
sv_data: SiteVariantData = sv_reader.read()

manager: RecordManager = RecordManager(record, output_handle)
manager: RecordManager = RecordManager(
record,
global_filter,
output_handle,
args.aggregation_mode
)
manager.scan_and_count(sv_reader)