Skip to content

Commit ced6a58

Browse files
authored
Merge pull request #48 from Juke34/new-pluviometer
New pluviometer
2 parents 27ebedf + 7c3b6f3 commit ced6a58

17 files changed

+864
-789
lines changed

README.md

Lines changed: 161 additions & 12 deletions
Large diffs are not rendered by default.

bin/SeqFeature_extensions.py

Lines changed: 7 additions & 127 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,12 @@ def get_transcript_like(self: SeqFeature) -> list[tuple[str, str, int]]:
4040
setattr(SeqFeature, "parent_list", [""])
4141

4242

43-
def make_chimaeras2(self: SeqFeature, record_id: str) -> None:
43+
def make_chimaeras(self: SeqFeature, record_id: str) -> None:
44+
"""
45+
Create chimaeras out of all the feature types of the sub-features.
46+
47+
The chimaeric features are added as sub-features, with their feature ID and feature types suffixed with "-chimaera"
48+
"""
4449
target_type_locations: dict[str, list[SimpleLocation | CompoundLocation]] = {}
4550

4651
for transcript in self.sub_features:
@@ -53,6 +58,7 @@ def make_chimaeras2(self: SeqFeature, record_id: str) -> None:
5358
else:
5459
target_type_locations[child.type] = child.location.parts
5560

61+
# Create a dict of the feature types to chimaerize
5662
chimaeric_type_locations: dict[str, SimpleLocation | CompoundLocation] = {
5763
key: location_union(location_parts) for key, location_parts in target_type_locations.items()
5864
}
@@ -65,136 +71,10 @@ def make_chimaeras2(self: SeqFeature, record_id: str) -> None:
6571
qualifiers={"Parent": self.id}
6672
)
6773

68-
# if key == "exon" or key == "CDS":
69-
# logging.info(
70-
# f"Record {record_id} · Created {key} chimaera of feature {self.id}: {len(transcript_like_list)} transcripts were merged into one transcript of {len(chimaeric_location_cds_or_exon.parts)} elements"
71-
# )
72-
7374
chimaera.sub_features = []
7475
chimaera.is_chimaera = True
7576
self.sub_features.append(chimaera)
7677

7778
return None
7879

79-
setattr(SeqFeature, "make_chimaeras2", make_chimaeras2)
80-
81-
82-
def make_chimaeras(self: SeqFeature, record_id: str) -> list[SeqFeature]:
83-
"""
84-
If the feature contains
85-
"""
86-
if hasattr(self, "sub_features"):
87-
if len(self.sub_features) == 0:
88-
return []
89-
else:
90-
return []
91-
92-
new_chimaeras: list[SeqFeature] = []
93-
94-
transcript_like_list: list[SeqFeature] = list(
95-
filter(
96-
lambda transcript: any(map(lambda part: part.type == "CDS", transcript.sub_features)),
97-
self.sub_features,
98-
)
99-
)
100-
101-
if len(transcript_like_list) == 0:
102-
chimaeric_type_cds_or_exon: str = "exon"
103-
transcript_like_list: list[SeqFeature] = list(
104-
filter(
105-
lambda transcript: any(
106-
map(lambda part: part.type == "exon", transcript.sub_features)
107-
),
108-
self.sub_features,
109-
)
110-
)
111-
else:
112-
chimaeric_type_cds_or_exon: str = "CDS"
113-
114-
if len(transcript_like_list) == 0:
115-
return None
116-
117-
target_locations_cds_or_exon: list[SimpleLocation | CompoundLocation] = []
118-
target_locations_five_prime_utr: list[SimpleLocation | CompoundLocation] = []
119-
target_locations_three_prime_utr: list[SimpleLocation | CompoundLocation] = []
120-
for transcript in transcript_like_list:
121-
target_locations_cds_or_exon.extend(
122-
list(
123-
map(
124-
lambda part: part.location,
125-
filter(
126-
lambda part: part.type == chimaeric_type_cds_or_exon,
127-
transcript.sub_features,
128-
),
129-
)
130-
)
131-
)
132-
target_locations_five_prime_utr.extend(
133-
list(
134-
map(
135-
lambda part: part.location,
136-
filter(lambda part: part.type == "five_prime_utr", transcript.sub_features),
137-
)
138-
)
139-
)
140-
target_locations_three_prime_utr.extend(
141-
list(
142-
map(
143-
lambda part: part.location,
144-
filter(lambda part: part.type == "three_prime_utr", transcript.sub_features),
145-
)
146-
)
147-
)
148-
149-
chimaeric_location_cds_or_exon: SimpleLocation | CompoundLocation = location_union(
150-
target_locations_cds_or_exon
151-
)
152-
logging.info(
153-
f"Record {record_id} · Created {chimaeric_type_cds_or_exon} chimaera of feature {self.id}: {len(transcript_like_list)} transcripts were merged into one transcript of {len(chimaeric_location_cds_or_exon.parts)} elements"
154-
)
155-
156-
chimaeric_feature_cds_or_exon: SeqFeature = SeqFeature(
157-
location=chimaeric_location_cds_or_exon,
158-
type=chimaeric_type_cds_or_exon + "-chimaera",
159-
id=self.id + "-chimaera",
160-
qualifiers={"Parent": self.id},
161-
)
162-
chimaeric_feature_cds_or_exon.is_chimaera = True
163-
chimaeric_feature_cds_or_exon.sub_features = []
164-
self.sub_features.append(chimaeric_feature_cds_or_exon)
165-
new_chimaeras.append(chimaeric_feature_cds_or_exon)
166-
167-
if len(target_locations_five_prime_utr) > 0:
168-
chimaeric_location_five_prime_utr: SimpleLocation | CompoundLocation = location_union(
169-
target_locations_five_prime_utr
170-
).parts[0] # Pick only the first element so that there is only one 5'-UTR
171-
chimaeric_feature_five_prime_utr: SeqFeature = SeqFeature(
172-
location=chimaeric_location_five_prime_utr,
173-
type="five_prime_utr-chimaera",
174-
id=self.id + "-chimaera",
175-
qualifiers={"Parent": self.id},
176-
)
177-
chimaeric_feature_five_prime_utr.is_chimaera = True
178-
chimaeric_feature_five_prime_utr.sub_features = []
179-
self.sub_features.append(chimaeric_feature_five_prime_utr)
180-
new_chimaeras.append(chimaeric_feature_five_prime_utr)
181-
182-
if len(target_locations_three_prime_utr) > 0:
183-
chimaeric_location_three_prime_utr: SimpleLocation | CompoundLocation = location_union(
184-
target_locations_three_prime_utr
185-
).parts[-1] # Pick only the last element so that there is only one 3'-UTR
186-
chimaeric_feature_three_prime_utr: SeqFeature = SeqFeature(
187-
location=chimaeric_location_three_prime_utr,
188-
type="three_prime_utr-chimaera",
189-
id=self.id + "-chimaera",
190-
qualifiers={"Parent": self.id},
191-
)
192-
chimaeric_feature_three_prime_utr.is_chimaera = True
193-
chimaeric_feature_three_prime_utr.sub_features = []
194-
self.sub_features.append(chimaeric_feature_three_prime_utr)
195-
new_chimaeras.append(chimaeric_feature_three_prime_utr)
196-
197-
return new_chimaeras
198-
199-
20080
setattr(SeqFeature, "make_chimaeras", make_chimaeras)

bin/feature_aggregator.py

Lines changed: 0 additions & 121 deletions
This file was deleted.
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
from utils import SiteVariantData
1+
from utils import RNASiteVariantData
22
import numpy as np
33
from numpy.typing import NDArray
4-
from SiteFilter import SiteFilter
4+
from site_filter import SiteFilter
55
from typing import TextIO
66

77
class MultiCounter:
@@ -12,7 +12,7 @@ def __init__(self, site_filter: SiteFilter) -> None:
1212
Tallies of the numbers of reads per edit type
1313
This is a numpy matrix where the rows represent the reference base and the columns the edited base
1414
Rows and column indices correspond to bases in alphabetic order (ACGT)
15-
Row-columns corresponding to the same base (e.g. (0,0) -> (A,A)) do not represent edits, and should remain 0
15+
Row-columns corresponding to the same base (e.g. (0,0) -> (A,A)) represent reads where the base is unchanged
1616
"""
1717
self.edit_read_freqs: NDArray[np.int64] = np.zeros((5, 5), dtype=np.int64)
1818
self.edit_site_freqs: NDArray[np.int64] = np.zeros((5, 5), dtype=np.int64)
@@ -23,7 +23,7 @@ def __init__(self, site_filter: SiteFilter) -> None:
2323

2424
return None
2525

26-
def update(self, variant_data: SiteVariantData) -> None:
26+
def update(self, variant_data: RNASiteVariantData) -> None:
2727
"""Increment the counters from the data in a SiteVariantData object."""
2828
i: int = variant_data.reference
2929

0 commit comments

Comments
 (0)