Juke34 · eascarrunz · Aug 1, 2025 · Jul 28, 2025 · Jul 28, 2025 · Jul 29, 2025
diff --git a/README.md b/README.md
diff --git a/bin/SeqFeature_extensions.py b/bin/SeqFeature_extensions.py
@@ -40,7 +40,12 @@ def get_transcript_like(self: SeqFeature) -> list[tuple[str, str, int]]:
 setattr(SeqFeature, "parent_list", [""])
 
 
-def make_chimaeras2(self: SeqFeature, record_id: str) -> None:
+def make_chimaeras(self: SeqFeature, record_id: str) -> None:
+    """
+    Create chimaeras out of all the feature types of the sub-features.
+
+    The chimaeric features are added as sub-features, with their feature ID and feature types suffixed with "-chimaera"
+    """
     target_type_locations: dict[str, list[SimpleLocation | CompoundLocation]] = {}
 
     for transcript in self.sub_features:
@@ -53,6 +58,7 @@ def make_chimaeras2(self: SeqFeature, record_id: str) -> None:
             else:
                 target_type_locations[child.type] = child.location.parts
 
+    # Create a dict of the feature types to chimaerize
     chimaeric_type_locations: dict[str, SimpleLocation | CompoundLocation] = {
         key: location_union(location_parts) for key, location_parts in target_type_locations.items()
     }
@@ -65,136 +71,10 @@ def make_chimaeras2(self: SeqFeature, record_id: str) -> None:
             qualifiers={"Parent": self.id}
         )
 
-        # if key == "exon" or key == "CDS":
-        #     logging.info(
-        #         f"Record {record_id} · Created {key} chimaera of feature {self.id}: {len(transcript_like_list)} transcripts were merged into one transcript of {len(chimaeric_location_cds_or_exon.parts)} elements"
-        #     )
-
         chimaera.sub_features = []
         chimaera.is_chimaera = True
         self.sub_features.append(chimaera)
 
     return None
 
-setattr(SeqFeature, "make_chimaeras2", make_chimaeras2)
-
-
-def make_chimaeras(self: SeqFeature, record_id: str) -> list[SeqFeature]:
-    """
-    If the feature contains
-    """
-    if hasattr(self, "sub_features"):
-        if len(self.sub_features) == 0:
-            return []
-    else:
-        return []
-
-    new_chimaeras: list[SeqFeature] = []
-
-    transcript_like_list: list[SeqFeature] = list(
-        filter(
-            lambda transcript: any(map(lambda part: part.type == "CDS", transcript.sub_features)),
-            self.sub_features,
-        )
-    )
-
-    if len(transcript_like_list) == 0:
-        chimaeric_type_cds_or_exon: str = "exon"
-        transcript_like_list: list[SeqFeature] = list(
-            filter(
-                lambda transcript: any(
-                    map(lambda part: part.type == "exon", transcript.sub_features)
-                ),
-                self.sub_features,
-            )
-        )
-    else:
-        chimaeric_type_cds_or_exon: str = "CDS"
-
-    if len(transcript_like_list) == 0:
-        return None
-
-    target_locations_cds_or_exon: list[SimpleLocation | CompoundLocation] = []
-    target_locations_five_prime_utr: list[SimpleLocation | CompoundLocation] = []
-    target_locations_three_prime_utr: list[SimpleLocation | CompoundLocation] = []
-    for transcript in transcript_like_list:
-        target_locations_cds_or_exon.extend(
-            list(
-                map(
-                    lambda part: part.location,
-                    filter(
-                        lambda part: part.type == chimaeric_type_cds_or_exon,
-                        transcript.sub_features,
-                    ),
-                )
-            )
-        )
-        target_locations_five_prime_utr.extend(
-            list(
-                map(
-                    lambda part: part.location,
-                    filter(lambda part: part.type == "five_prime_utr", transcript.sub_features),
-                )
-            )
-        )
-        target_locations_three_prime_utr.extend(
-            list(
-                map(
-                    lambda part: part.location,
-                    filter(lambda part: part.type == "three_prime_utr", transcript.sub_features),
-                )
-            )
-        )
-
-    chimaeric_location_cds_or_exon: SimpleLocation | CompoundLocation = location_union(
-        target_locations_cds_or_exon
-    )
-    logging.info(
-        f"Record {record_id} · Created {chimaeric_type_cds_or_exon} chimaera of feature {self.id}: {len(transcript_like_list)} transcripts were merged into one transcript of {len(chimaeric_location_cds_or_exon.parts)} elements"
-    )
-
-    chimaeric_feature_cds_or_exon: SeqFeature = SeqFeature(
-        location=chimaeric_location_cds_or_exon,
-        type=chimaeric_type_cds_or_exon + "-chimaera",
-        id=self.id + "-chimaera",
-        qualifiers={"Parent": self.id},
-    )
-    chimaeric_feature_cds_or_exon.is_chimaera = True
-    chimaeric_feature_cds_or_exon.sub_features = []
-    self.sub_features.append(chimaeric_feature_cds_or_exon)
-    new_chimaeras.append(chimaeric_feature_cds_or_exon)
-
-    if len(target_locations_five_prime_utr) > 0:
-        chimaeric_location_five_prime_utr: SimpleLocation | CompoundLocation = location_union(
-            target_locations_five_prime_utr
-        ).parts[0]  # Pick only the first element so that there is only one 5'-UTR
-        chimaeric_feature_five_prime_utr: SeqFeature = SeqFeature(
-            location=chimaeric_location_five_prime_utr,
-            type="five_prime_utr-chimaera",
-            id=self.id + "-chimaera",
-            qualifiers={"Parent": self.id},
-        )
-        chimaeric_feature_five_prime_utr.is_chimaera = True
-        chimaeric_feature_five_prime_utr.sub_features = []
-        self.sub_features.append(chimaeric_feature_five_prime_utr)
-        new_chimaeras.append(chimaeric_feature_five_prime_utr)
-
-    if len(target_locations_three_prime_utr) > 0:
-        chimaeric_location_three_prime_utr: SimpleLocation | CompoundLocation = location_union(
-            target_locations_three_prime_utr
-        ).parts[-1]  # Pick only the last element so that there is only one 3'-UTR
-        chimaeric_feature_three_prime_utr: SeqFeature = SeqFeature(
-            location=chimaeric_location_three_prime_utr,
-            type="three_prime_utr-chimaera",
-            id=self.id + "-chimaera",
-            qualifiers={"Parent": self.id},
-        )
-        chimaeric_feature_three_prime_utr.is_chimaera = True
-        chimaeric_feature_three_prime_utr.sub_features = []
-        self.sub_features.append(chimaeric_feature_three_prime_utr)
-        new_chimaeras.append(chimaeric_feature_three_prime_utr)
-
-    return new_chimaeras
-
-
 setattr(SeqFeature, "make_chimaeras", make_chimaeras)
diff --git a/bin/feature_aggregator.py b/bin/feature_aggregator.py
diff --git a/bin/MultiCounter.py → bin/multi_counter.py b/bin/MultiCounter.py → bin/multi_counter.py
@@ -1,7 +1,7 @@
-from utils import SiteVariantData
+from utils import RNASiteVariantData
 import numpy as np
 from numpy.typing import NDArray
-from SiteFilter import SiteFilter
+from site_filter import SiteFilter
 from typing import TextIO
 
 class MultiCounter:
@@ -12,7 +12,7 @@ def __init__(self, site_filter: SiteFilter) -> None:
         Tallies of the numbers of reads per edit type
         This is a numpy matrix where the rows represent the reference base and the columns the edited base
         Rows and column indices correspond to bases in alphabetic order (ACGT)
-        Row-columns corresponding to the same base (e.g. (0,0) -> (A,A)) do not represent edits, and should remain 0
+        Row-columns corresponding to the same base (e.g. (0,0) -> (A,A)) represent reads where the base is unchanged
         """
         self.edit_read_freqs: NDArray[np.int64] = np.zeros((5, 5), dtype=np.int64)
         self.edit_site_freqs: NDArray[np.int64] = np.zeros((5, 5), dtype=np.int64)
@@ -23,7 +23,7 @@ def __init__(self, site_filter: SiteFilter) -> None:
 
         return None
 
-    def update(self, variant_data: SiteVariantData) -> None:
+    def update(self, variant_data: RNASiteVariantData) -> None:
         """Increment the counters from the data in a SiteVariantData object."""
         i: int = variant_data.reference