Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
173 changes: 161 additions & 12 deletions README.md

Large diffs are not rendered by default.

134 changes: 7 additions & 127 deletions bin/SeqFeature_extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,12 @@ def get_transcript_like(self: SeqFeature) -> list[tuple[str, str, int]]:
setattr(SeqFeature, "parent_list", [""])


def make_chimaeras2(self: SeqFeature, record_id: str) -> None:
def make_chimaeras(self: SeqFeature, record_id: str) -> None:
"""
Create chimaeras out of all the feature types of the sub-features.

The chimaeric features are added as sub-features, with their feature ID and feature types suffixed with "-chimaera"
"""
target_type_locations: dict[str, list[SimpleLocation | CompoundLocation]] = {}

for transcript in self.sub_features:
Expand All @@ -53,6 +58,7 @@ def make_chimaeras2(self: SeqFeature, record_id: str) -> None:
else:
target_type_locations[child.type] = child.location.parts

# Create a dict of the feature types to chimaerize
chimaeric_type_locations: dict[str, SimpleLocation | CompoundLocation] = {
key: location_union(location_parts) for key, location_parts in target_type_locations.items()
}
Expand All @@ -65,136 +71,10 @@ def make_chimaeras2(self: SeqFeature, record_id: str) -> None:
qualifiers={"Parent": self.id}
)

# if key == "exon" or key == "CDS":
# logging.info(
# f"Record {record_id} · Created {key} chimaera of feature {self.id}: {len(transcript_like_list)} transcripts were merged into one transcript of {len(chimaeric_location_cds_or_exon.parts)} elements"
# )

chimaera.sub_features = []
chimaera.is_chimaera = True
self.sub_features.append(chimaera)

return None

setattr(SeqFeature, "make_chimaeras2", make_chimaeras2)


def make_chimaeras(self: SeqFeature, record_id: str) -> list[SeqFeature]:
"""
If the feature contains
"""
if hasattr(self, "sub_features"):
if len(self.sub_features) == 0:
return []
else:
return []

new_chimaeras: list[SeqFeature] = []

transcript_like_list: list[SeqFeature] = list(
filter(
lambda transcript: any(map(lambda part: part.type == "CDS", transcript.sub_features)),
self.sub_features,
)
)

if len(transcript_like_list) == 0:
chimaeric_type_cds_or_exon: str = "exon"
transcript_like_list: list[SeqFeature] = list(
filter(
lambda transcript: any(
map(lambda part: part.type == "exon", transcript.sub_features)
),
self.sub_features,
)
)
else:
chimaeric_type_cds_or_exon: str = "CDS"

if len(transcript_like_list) == 0:
return None

target_locations_cds_or_exon: list[SimpleLocation | CompoundLocation] = []
target_locations_five_prime_utr: list[SimpleLocation | CompoundLocation] = []
target_locations_three_prime_utr: list[SimpleLocation | CompoundLocation] = []
for transcript in transcript_like_list:
target_locations_cds_or_exon.extend(
list(
map(
lambda part: part.location,
filter(
lambda part: part.type == chimaeric_type_cds_or_exon,
transcript.sub_features,
),
)
)
)
target_locations_five_prime_utr.extend(
list(
map(
lambda part: part.location,
filter(lambda part: part.type == "five_prime_utr", transcript.sub_features),
)
)
)
target_locations_three_prime_utr.extend(
list(
map(
lambda part: part.location,
filter(lambda part: part.type == "three_prime_utr", transcript.sub_features),
)
)
)

chimaeric_location_cds_or_exon: SimpleLocation | CompoundLocation = location_union(
target_locations_cds_or_exon
)
logging.info(
f"Record {record_id} · Created {chimaeric_type_cds_or_exon} chimaera of feature {self.id}: {len(transcript_like_list)} transcripts were merged into one transcript of {len(chimaeric_location_cds_or_exon.parts)} elements"
)

chimaeric_feature_cds_or_exon: SeqFeature = SeqFeature(
location=chimaeric_location_cds_or_exon,
type=chimaeric_type_cds_or_exon + "-chimaera",
id=self.id + "-chimaera",
qualifiers={"Parent": self.id},
)
chimaeric_feature_cds_or_exon.is_chimaera = True
chimaeric_feature_cds_or_exon.sub_features = []
self.sub_features.append(chimaeric_feature_cds_or_exon)
new_chimaeras.append(chimaeric_feature_cds_or_exon)

if len(target_locations_five_prime_utr) > 0:
chimaeric_location_five_prime_utr: SimpleLocation | CompoundLocation = location_union(
target_locations_five_prime_utr
).parts[0] # Pick only the first element so that there is only one 5'-UTR
chimaeric_feature_five_prime_utr: SeqFeature = SeqFeature(
location=chimaeric_location_five_prime_utr,
type="five_prime_utr-chimaera",
id=self.id + "-chimaera",
qualifiers={"Parent": self.id},
)
chimaeric_feature_five_prime_utr.is_chimaera = True
chimaeric_feature_five_prime_utr.sub_features = []
self.sub_features.append(chimaeric_feature_five_prime_utr)
new_chimaeras.append(chimaeric_feature_five_prime_utr)

if len(target_locations_three_prime_utr) > 0:
chimaeric_location_three_prime_utr: SimpleLocation | CompoundLocation = location_union(
target_locations_three_prime_utr
).parts[-1] # Pick only the last element so that there is only one 3'-UTR
chimaeric_feature_three_prime_utr: SeqFeature = SeqFeature(
location=chimaeric_location_three_prime_utr,
type="three_prime_utr-chimaera",
id=self.id + "-chimaera",
qualifiers={"Parent": self.id},
)
chimaeric_feature_three_prime_utr.is_chimaera = True
chimaeric_feature_three_prime_utr.sub_features = []
self.sub_features.append(chimaeric_feature_three_prime_utr)
new_chimaeras.append(chimaeric_feature_three_prime_utr)

return new_chimaeras


setattr(SeqFeature, "make_chimaeras", make_chimaeras)
121 changes: 0 additions & 121 deletions bin/feature_aggregator.py

This file was deleted.

8 changes: 4 additions & 4 deletions bin/MultiCounter.py → bin/multi_counter.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from utils import SiteVariantData
from utils import RNASiteVariantData
import numpy as np
from numpy.typing import NDArray
from SiteFilter import SiteFilter
from site_filter import SiteFilter
from typing import TextIO

class MultiCounter:
Expand All @@ -12,7 +12,7 @@ def __init__(self, site_filter: SiteFilter) -> None:
Tallies of the numbers of reads per edit type
This is a numpy matrix where the rows represent the reference base and the columns the edited base
Rows and column indices correspond to bases in alphabetic order (ACGT)
Row-columns corresponding to the same base (e.g. (0,0) -> (A,A)) do not represent edits, and should remain 0
Row-columns corresponding to the same base (e.g. (0,0) -> (A,A)) represent reads where the base is unchanged
"""
self.edit_read_freqs: NDArray[np.int64] = np.zeros((5, 5), dtype=np.int64)
self.edit_site_freqs: NDArray[np.int64] = np.zeros((5, 5), dtype=np.int64)
Expand All @@ -23,7 +23,7 @@ def __init__(self, site_filter: SiteFilter) -> None:

return None

def update(self, variant_data: SiteVariantData) -> None:
def update(self, variant_data: RNASiteVariantData) -> None:
"""Increment the counters from the data in a SiteVariantData object."""
i: int = variant_data.reference

Expand Down
Loading