From b39bc7d083545e1fcc34982401b2eeedfe5450f4 Mon Sep 17 00:00:00 2001
From: Gemy Kaithakottil <gemygk@gmail.com>
Date: Fri, 28 Nov 2025 14:29:46 +0000
Subject: [PATCH 01/10] chore: format with ruff

---
 Mikado/parsers/bed12.py | 1016 ++++++++++++++++++++++++++-------------
 1 file changed, 685 insertions(+), 331 deletions(-)

diff --git a/Mikado/parsers/bed12.py b/Mikado/parsers/bed12.py
index a28cad7f..7c95ffe4 100644
--- a/Mikado/parsers/bed12.py
+++ b/Mikado/parsers/bed12.py
@@ -40,11 +40,13 @@
 
 codons = copy.deepcopy(CodonTable.ambiguous_dna_by_id[1]._codon_table)
 codons.start_codons = ["ATG"]
-standard = CodonTable.AmbiguousCodonTable(codons,
-                                          IUPACData.ambiguous_dna_letters,
-                                          IUPACData.ambiguous_dna_values,
-                                          IUPACData.extended_protein_letters,
-                                          IUPACData.extended_protein_values)
+standard = CodonTable.AmbiguousCodonTable(
+    codons,
+    IUPACData.ambiguous_dna_letters,
+    IUPACData.ambiguous_dna_values,
+    IUPACData.extended_protein_letters,
+    IUPACData.extended_protein_values,
+)
 assert standard.start_codons == ["ATG"]
 assert CodonTable.ambiguous_dna_by_id[1].start_codons != ["ATG"]
 
@@ -66,16 +68,20 @@ def get_tables(table, to_stop=False, gap=None, stop_symbol="*"):
     if dual_coding:
         c = dual_coding[0]
         if to_stop:
-            raise ValueError("You cannot use 'to_stop=True' with this table "
-                             "as it contains {} codon(s) which can be both "
-                             " STOP and an  amino acid (e.g. '{}' -> '{}' or "
-                             "STOP)."
-                             .format(len(dual_coding), c, forward_table[c]))
-        warnings.warn("This table contains {} codon(s) which code(s) for both "
-                      "STOP and an amino acid (e.g. '{}' -> '{}' or STOP). "
-                      "Such codons will be translated as amino acid."
-                      .format(len(dual_coding), c, forward_table[c]),
-                      BiopythonWarning)
+            raise ValueError(
+                "You cannot use 'to_stop=True' with this table "
+                "as it contains {} codon(s) which can be both "
+                " STOP and an  amino acid (e.g. '{}' -> '{}' or "
+                "STOP).".format(len(dual_coding), c, forward_table[c])
+            )
+        warnings.warn(
+            "This table contains {} codon(s) which code(s) for both "
+            "STOP and an amino acid (e.g. '{}' -> '{}' or STOP). "
+            "Such codons will be translated as amino acid.".format(
+                len(dual_coding), c, forward_table[c]
+            ),
+            BiopythonWarning,
+        )
 
     for stop in stop_codons:
         forward_table[stop] = stop_symbol
@@ -88,7 +94,9 @@ def get_tables(table, to_stop=False, gap=None, stop_symbol="*"):
     return forward_table, getter, valid_letters
 
 
-def _translate_str(sequence, table, stop_symbol="*", to_stop=False, cds=False, pos_stop="X", gap=None):
+def _translate_str(
+    sequence, table, stop_symbol="*", to_stop=False, cds=False, pos_stop="X", gap=None
+):
     """Translate nucleotide string into a protein string (PRIVATE).
 
     Arguments:
@@ -153,33 +161,50 @@ def _translate_str(sequence, table, stop_symbol="*", to_stop=False, cds=False, p
     # Check that the pos_stop is a single character
     # By default this is the "X" character (equivalent to "N" for nucleotides)
     if not (isinstance(pos_stop, (bytes, str)) and len(pos_stop) == 1):
-        raise ValueError("Pos_stop must be a single character, not {pos_stop}".format(pos_stop=pos_stop))
+        raise ValueError(
+            "Pos_stop must be a single character, not {pos_stop}".format(
+                pos_stop=pos_stop
+            )
+        )
     if isinstance(pos_stop, bytes):
         pos_stop = pos_stop.decode()
 
     if cds and len(sequence) % 3 != 0:
-        raise CodonTable.TranslationError("Sequence length {0} is not a multiple of three".format(
-            len(sequence)
-        ))
+        raise CodonTable.TranslationError(
+            "Sequence length {0} is not a multiple of three".format(len(sequence))
+        )
     elif gap is not None and (not isinstance(gap, str) or len(gap) > 1):
         raise ValueError("Gap character should be a single character string.")
 
-    forward_table, getter, valid_letters = get_tables(table, to_stop=to_stop, gap=gap, stop_symbol=stop_symbol)
+    forward_table, getter, valid_letters = get_tables(
+        table, to_stop=to_stop, gap=gap, stop_symbol=stop_symbol
+    )
 
     sequence = sequence.upper()
     if not valid_letters.issuperset(set(sequence)):
-        raise CodonTable.TranslationError("Invalid letters in the sequence: {}".format(
-            set.difference(set(sequence), valid_letters)
-        ))
-
-    amino_acids = getter(np.array(
-        [sequence[start:start + 3] for start in range(0, len(sequence) - len(sequence) % 3, 3)], dtype="<U"))
+        raise CodonTable.TranslationError(
+            "Invalid letters in the sequence: {}".format(
+                set.difference(set(sequence), valid_letters)
+            )
+        )
+
+    amino_acids = getter(
+        np.array(
+            [
+                sequence[start : start + 3]
+                for start in range(0, len(sequence) - len(sequence) % 3, 3)
+            ],
+            dtype="<U",
+        )
+    )
 
     if cds and amino_acids[0] != "M":
         if sequence[0:3] in table.start_codons:
             amino_acids[0] = "M"
         else:
-            raise CodonTable.TranslationError("First codon '{0}' is not a start codon".format(sequence[:3]))
+            raise CodonTable.TranslationError(
+                "First codon '{0}' is not a start codon".format(sequence[:3])
+            )
 
     nones = np.where((amino_acids == "None") | (amino_acids is None))[0]
     amino_acids[nones] = pos_stop
@@ -188,13 +213,18 @@ def _translate_str(sequence, table, stop_symbol="*", to_stop=False, cds=False, p
 
     if cds and found_stops > 1:
         raise CodonTable.TranslationError(
-            "Extra in-frame stop codon found. Sequence: {sequence}".format(sequence=sequence))
+            "Extra in-frame stop codon found. Sequence: {sequence}".format(
+                sequence=sequence
+            )
+        )
     elif cds and found_stops and _stop_locations[0] < len(amino_acids) - 1:
         raise CodonTable.TranslationError(
             "Extra in-frame stop codon. Sequence:\n{sequence}\n{spaces}^^^".format(
-            sequence=sequence, spaces=" " * _stop_locations[0] * 3))
+                sequence=sequence, spaces=" " * _stop_locations[0] * 3
+            )
+        )
     if to_stop and found_stops > 0:
-        amino_acids = amino_acids[:_stop_locations[0]]
+        amino_acids = amino_acids[: _stop_locations[0]]
 
     return "".join(amino_acids)
 
@@ -202,7 +232,6 @@ def _translate_str(sequence, table, stop_symbol="*", to_stop=False, cds=False, p
 # These classes do contain lots of things, it is correct like it is
 # pylint: disable=too-many-instance-attributes
 class BED12:
-
     """
     BED12 parsing class.
     """
@@ -211,18 +240,20 @@ class BED12:
 
     _attribute_pattern = re.compile(r"([^;]*)=([^$=]*)(?:;|$)")
 
-    def __init__(self, *args: Union[str, list, tuple, GffLine],
-                 fasta_index=None,
-                 phase=None,
-                 sequence=None,
-                 transcriptomic=False,
-                 max_regression=0,
-                 start_adjustment=True,
-                 coding=True,
-                 lenient=False,
-                 table=0,
-                 logger=create_null_logger()):
-
+    def __init__(
+        self,
+        *args: Union[str, list, tuple, GffLine],
+        fasta_index=None,
+        phase=None,
+        sequence=None,
+        transcriptomic=False,
+        max_regression=0,
+        start_adjustment=True,
+        coding=True,
+        lenient=False,
+        table=0,
+        logger=create_null_logger(),
+    ):
         """
         :param args: the BED12 line.
         :type args: (str, list, tuple, GffLine)
@@ -324,7 +355,7 @@ def __init__(self, *args: Union[str, list, tuple, GffLine],
         self.name = ""
         self.score = 0
         self.strand = None
-        self.rgb = ''
+        self.rgb = ""
         self.stop_codon = self.start_codon = None
         self.__has_start = self.__has_stop = False
         self.__block_sizes = np.zeros(1, dtype=np.int64)
@@ -356,7 +387,7 @@ def __init__(self, *args: Union[str, list, tuple, GffLine],
         self._line = args[0]
         if isinstance(self._line, str) or self._line is None:
             if self._line is None:
-                self._line = ''
+                self._line = ""
             self._line = self._line.rstrip()
             if len(self._line) == 0 or self._line[0] == "#":
                 self.header = True
@@ -384,7 +415,9 @@ def __init__(self, *args: Union[str, list, tuple, GffLine],
                     fasta_length = len(sequence)
                 elif fasta_index:
                     if isinstance(fasta_index, pysam.FastaFile):
-                        fasta_length = fasta_index.get_reference_length(self._line.chrom)
+                        fasta_length = fasta_index.get_reference_length(
+                            self._line.chrom
+                        )
                     elif isinstance(fasta_index, pyfaidx.Fasta):
                         sequence = fasta_index[self._line.chrom]
                         fasta_length = len(fasta_index[self._line.chrom])
@@ -395,7 +428,9 @@ def __init__(self, *args: Union[str, list, tuple, GffLine],
                 self.__set_values_from_gff(fasta_length)
 
         elif not (isinstance(self._line, list) or isinstance(self._line, tuple)):
-            raise InvalidParsingFormat("I need an ordered array, not {0}".format(type(self._line)))
+            raise InvalidParsingFormat(
+                "I need an ordered array, not {0}".format(type(self._line))
+            )
         else:
             self._fields = self._line
             print("Line", self._fields)
@@ -404,7 +439,11 @@ def __init__(self, *args: Union[str, list, tuple, GffLine],
         self.__check_validity(transcriptomic, fasta_index, sequence)
 
         if self.invalid and self.coding:
-            self.logger.debug("%s cannot be coding as it is invalid (reason: %s)", self.chrom, self.invalid_reason)
+            self.logger.debug(
+                "%s cannot be coding as it is invalid (reason: %s)",
+                self.chrom,
+                self.invalid_reason,
+            )
             self.coding = False
 
         if self.coding and self.phase is None:
@@ -438,7 +477,9 @@ def table(self):
 
     @table.setter
     def table(self, table):
-        if isinstance(table, bool):  # Boolean can be considered as int so this requires special handling
+        if isinstance(
+            table, bool
+        ):  # Boolean can be considered as int so this requires special handling
             raise ValueError(f"Invalid table specified: {table} (type {type(table)})")
         elif table is not None and not isinstance(table, (int, float, bytes, str)):
             raise ValueError(f"Invalid table specified: {table} (type {type(table)})")
@@ -461,16 +502,21 @@ def table(self, table):
             self.__table_index = 0
         elif isinstance(table, int):
             if table not in ambiguous_dna_by_id.keys():
-                raise ValueError(f"Invalid table code specified: {table}. Available codes: "
-                                 f"{', '.join([str(_) for _ in ambiguous_dna_by_id.keys()])}")
+                raise ValueError(
+                    f"Invalid table code specified: {table}. Available codes: "
+                    f"{', '.join([str(_) for _ in ambiguous_dna_by_id.keys()])}"
+                )
             self.__table = ambiguous_dna_by_id[table]
-            assert self.__table.start_codons == ["ATG"] if table == 0 else True, f"Invalid codons for table 0: " \
-                                                                                 f"{self.__table.start_codons}"
+            assert self.__table.start_codons == ["ATG"] if table == 0 else True, (
+                f"Invalid codons for table 0: {self.__table.start_codons}"
+            )
             self.__table_index = table
         elif isinstance(table, str):
             if table not in ambiguous_dna_by_name.keys():
-                raise ValueError(f"Invalid table name specified: {table}. Available table: "
-                                 f"{', '.join([str(_) for _ in ambiguous_dna_by_name.keys()])}")
+                raise ValueError(
+                    f"Invalid table name specified: {table}. Available table: "
+                    f"{', '.join([str(_) for _ in ambiguous_dna_by_name.keys()])}"
+                )
             self.__table = ambiguous_dna_by_name[table]
             self.__table_index = ambiguous_dna_by_name[table].id
         return
@@ -482,11 +528,15 @@ def parent(self, parent):
         self.__parent = [parent]
 
     def __getstate__(self):
-
-        state = copy.deepcopy(dict((key, val) for key, val in self.__dict__.items()
-                                   if key not in ("_BED12_table") and
-                                   not isinstance(val, logging.Logger) and
-                                   not isinstance(val, CodonTable.CodonTable)))
+        state = copy.deepcopy(
+            dict(
+                (key, val)
+                for key, val in self.__dict__.items()
+                if key not in ("_BED12_table")
+                and not isinstance(val, logging.Logger)
+                and not isinstance(val, CodonTable.CodonTable)
+            )
+        )
 
         return state
 
@@ -496,7 +546,6 @@ def __setstate__(self, state):
         self.table = self.__table_index
 
     def _parse_attributes(self, attributes):
-
         """
         Private method that parses the last field of the GFF line.
         :return:
@@ -526,15 +575,24 @@ def _parse_attributes(self, attributes):
                 continue
 
     def __set_values_from_fields(self):
-
         """
         Private method that sets the correct values from the fields derived from the input line.
         :return:
         """
-        self.chrom, self.start, self.end, \
-            self.name, self.score, self.strand, \
-            self.thick_start, self.thick_end, self.rgb, \
-            self.block_count, block_sizes, block_starts = self._fields[:12]
+        (
+            self.chrom,
+            self.start,
+            self.end,
+            self.name,
+            self.score,
+            self.strand,
+            self.thick_start,
+            self.thick_end,
+            self.rgb,
+            self.block_count,
+            block_sizes,
+            block_starts,
+        ) = self._fields[:12]
 
         # Reduce memory usage
         intern(self.chrom)
@@ -567,7 +625,6 @@ def __set_values_from_fields(self):
         return
 
     def __set_values_from_bed12(self, line):
-
         self.__setstate__(line.__getstate__())
         return
 
@@ -577,19 +634,24 @@ def __set_values_from_gff(self, fasta_length):
         :return:
         """
 
-        (self.chrom, self.thick_start,
-         self.thick_end, self.strand, self.name) = (self._line.chrom,
-                                                    self._line.start,
-                                                    self._line.end, self._line.strand, self._line.id)
+        (self.chrom, self.thick_start, self.thick_end, self.strand, self.name) = (
+            self._line.chrom,
+            self._line.start,
+            self._line.end,
+            self._line.strand,
+            self._line.id,
+        )
         intern(self.chrom)
         if self.name is None:
-            raise InvalidParsingFormat("{self} should have the name property defined".format(self=repr(self)))
+            raise InvalidParsingFormat(
+                "{self} should have the name property defined".format(self=repr(self))
+            )
         self.start = 1
         self.end = fasta_length
         self.score = self._line.score
         self.rgb = None
         self.block_count = 1
-        self.block_sizes = [self.thick_end - self.thick_start +1]
+        self.block_sizes = [self.thick_end - self.thick_start + 1]
         self.block_starts = [self.thick_start]
         self.has_start_codon = False
         self.has_stop_codon = False
@@ -609,15 +671,21 @@ def __check_validity(self, transcriptomic, fasta_index, sequence):
 
         if transcriptomic is True and self.coding is True:
             if not (fasta_index is not None or sequence is not None):
-                self.logger.debug("No further check on the validity of %s as no sequence has been provided.",
-                                  self.chrom)
+                self.logger.debug(
+                    "No further check on the validity of %s as no sequence has been provided.",
+                    self.chrom,
+                )
                 return
 
         if transcriptomic is True:
             self.has_start_codon = False
             self.has_stop_codon = False
 
-        if transcriptomic is True and self.coding is True and (fasta_index is not None or sequence is not None):
+        if (
+            transcriptomic is True
+            and self.coding is True
+            and (fasta_index is not None or sequence is not None)
+        ):
             self.logger.debug("Starting to check the validity of %s", self.chrom)
             self.validity_checked = True
             if sequence is not None:
@@ -628,7 +696,9 @@ def __check_validity(self, transcriptomic, fasta_index, sequence):
                     sequence = str(sequence)
             else:
                 if self.id not in fasta_index:
-                    self.logger.warning("%s not found in the index. Aborting the check, we will trust the ORF as-is.")
+                    self.logger.warning(
+                        "%s not found in the index. Aborting the check, we will trust the ORF as-is."
+                    )
                     self.__in_index = False
                     return
                 self.fasta_length = len(fasta_index[self.id])
@@ -647,17 +717,32 @@ def __check_validity(self, transcriptomic, fasta_index, sequence):
 
             if self.strand != "-":
                 orf_sequence = sequence[
-                               (self.thick_start - 1 if not self.phase else self.start + self.phase - 1):self.thick_end]
+                    (
+                        self.thick_start - 1
+                        if not self.phase
+                        else self.start + self.phase - 1
+                    ) : self.thick_end
+                ]
             else:
                 orf_sequence = Seq.reverse_complement(
-                    sequence[(self.thick_start - 1):(
-                        self.thick_end if not self.phase else self.end - (3 - self.phase) % 3)])
+                    sequence[
+                        (self.thick_start - 1) : (
+                            self.thick_end
+                            if not self.phase
+                            else self.end - (3 - self.phase) % 3
+                        )
+                    ]
+                )
 
             self.start_codon = str(orf_sequence)[:3].upper()
             self.stop_codon = str(orf_sequence[-3:]).upper()
 
-            if self.start_codon in self.table.start_codons and (self.phase is None or self.phase == 0):
-                self.logger.debug("Found start codon for %s. Setting phase to 0", self.chrom)
+            if self.start_codon in self.table.start_codons and (
+                self.phase is None or self.phase == 0
+            ):
+                self.logger.debug(
+                    "Found start codon for %s. Setting phase to 0", self.chrom
+                )
                 self.has_start_codon = True
                 self.phase = 0
             else:
@@ -674,25 +759,39 @@ def __check_validity(self, transcriptomic, fasta_index, sequence):
                 elif self.strand == "-" and self.thick_start - self.start < 3:
                     self.thick_start = 1
 
-            self.logger.debug("%s with start codon (%s) and stop codon (%s). Valid: %s",
-                              self.chrom, self.has_start_codon, self.has_stop_codon, not self.invalid)
+            self.logger.debug(
+                "%s with start codon (%s) and stop codon (%s). Valid: %s",
+                self.chrom,
+                self.has_start_codon,
+                self.has_stop_codon,
+                not self.invalid,
+            )
 
             # Get only a proper multiple of three
             if self.lenient is False and self.coding is True:
                 if self.strand != "-":
                     orf_sequence = sequence[
-                                   (self.thick_start - 1 if not self.phase
-                                    else self.start + self.phase - 1):self.thick_end]
+                        (
+                            self.thick_start - 1
+                            if not self.phase
+                            else self.start + self.phase - 1
+                        ) : self.thick_end
+                    ]
                 else:
                     orf_sequence = Seq.reverse_complement(
                         sequence[
-                        (self.thick_start - 1):
-                        (self.thick_end if not self.phase else self.end - self.phase)])
+                            (self.thick_start - 1) : (
+                                self.thick_end
+                                if not self.phase
+                                else self.end - self.phase
+                            )
+                        ]
+                    )
 
                 last_pos = -3 - ((len(orf_sequence)) % 3)
-                translated_seq = _translate_str(orf_sequence[:last_pos],
-                                                table=self.table,
-                                                gap='N')
+                translated_seq = _translate_str(
+                    orf_sequence[:last_pos], table=self.table, gap="N"
+                )
 
                 self._internal_stop_codons = str(translated_seq).count("*")
                 if self._internal_stop_codons == 0 and len(orf_sequence[last_pos:]) > 3:
@@ -702,25 +801,31 @@ def __check_validity(self, transcriptomic, fasta_index, sequence):
                         if self.strand == "-":
                             self.thick_start += -last_pos % 3
                             self.logger.warning(
-                                f"Shifting the position of the thick start of {self.name} by {-last_pos % 3}")
+                                f"Shifting the position of the thick start of {self.name} by {-last_pos % 3}"
+                            )
                         else:
                             self.thick_end -= -last_pos % 3
                             self.logger.warning(
-                                f"Shifting the position of the thick end of {self.name} by {-last_pos % 3}")
+                                f"Shifting the position of the thick end of {self.name} by {-last_pos % 3}"
+                            )
 
             del self.invalid
             if self.__is_invalid() is True:
                 return
 
     def _adjust_start(self, sequence, orf_sequence):
-
         if len(orf_sequence) != (self.thick_end - self.thick_start + 1 - self.phase):
             # We are checking that the sequence of the ORF (provided as argument) is the same length as
             # the imputed length of the ORF
-            raise ValueError("The provided orf_sequence of length {lorf} is different from the imputed length of the\
+            raise ValueError(
+                "The provided orf_sequence of length {lorf} is different from the imputed length of the\
  ORF for {sid} (total {ltotal}; thick start {self.thick_start}, thick end {self.thick_end}, phase {self.phase})".format(
-                lorf=len(orf_sequence), ltotal=self.thick_end - self.thick_start + 1 - self.phase,
-                sid=self.name, self=self))
+                    lorf=len(orf_sequence),
+                    ltotal=self.thick_end - self.thick_start + 1 - self.phase,
+                    sid=self.name,
+                    self=self,
+                )
+            )
 
         # Let's check UPstream first.
         # This means that we DO NOT have a starting Met and yet we are starting far upstream.
@@ -728,11 +833,19 @@ def _adjust_start(self, sequence, orf_sequence):
         if self.strand == "+" and self.thick_start > 3:
             for pos in range(self.thick_start, 3, -3):
                 self.thick_start -= 3
-                codon = sequence[pos - 4:pos - 1]
-                is_start, is_stop = ((codon in self.table.start_codons),
-                                     (codon in self.table.stop_codons))
-                self.logger.debug("Checking pos %s (%s) for %s, start: %s; stop: %s",
-                                  pos, codon, self.chrom, is_start, is_stop)
+                codon = sequence[pos - 4 : pos - 1]
+                is_start, is_stop = (
+                    (codon in self.table.start_codons),
+                    (codon in self.table.stop_codons),
+                )
+                self.logger.debug(
+                    "Checking pos %s (%s) for %s, start: %s; stop: %s",
+                    pos,
+                    codon,
+                    self.chrom,
+                    is_start,
+                    is_stop,
+                )
                 if is_start:
                     # We have found a valid methionine.
                     break
@@ -742,20 +855,27 @@ def _adjust_start(self, sequence, orf_sequence):
                     assert self.invalid is True
                     self.logger.debug(
                         "Found in-frame stop codon for %s while expanding, stopping here. Invalid: %s (reason %s)",
-                                        self.chrom, self.invalid, self.invalid_reason)
+                        self.chrom,
+                        self.invalid,
+                        self.invalid_reason,
+                    )
                     break
                 continue
 
         elif self.strand == "-" and self.end - self.thick_end > 3:
             for pos in range(self.thick_end, self.end - 3, 3):
                 self.thick_end += 3
-                codon = Seq.reverse_complement(sequence[pos - 3:pos])
-                is_start, is_stop = ((codon in self.table.start_codons),
-                                     (codon in self.table.stop_codons))
+                codon = Seq.reverse_complement(sequence[pos - 3 : pos])
+                is_start, is_stop = (
+                    (codon in self.table.start_codons),
+                    (codon in self.table.stop_codons),
+                )
                 if is_start:
                     # We have found a valid methionine.
-                    self.logger.debug("Found correct start codon for %s while expanding, stopping here.",
-                                      self.chrom)
+                    self.logger.debug(
+                        "Found correct start codon for %s while expanding, stopping here.",
+                        self.chrom,
+                    )
                     break
                 elif is_stop:
                     self.stop_codon = codon
@@ -763,7 +883,10 @@ def _adjust_start(self, sequence, orf_sequence):
                     assert self.invalid is True
                     self.logger.debug(
                         "Found in-frame stop codon for %s while expanding, stopping here. Invalid: %s (reason %s)",
-                        self.chrom, self.invalid, self.invalid_reason)
+                        self.chrom,
+                        self.invalid,
+                        self.invalid_reason,
+                    )
                     break
         else:
             self._regression(orf_sequence)
@@ -786,30 +909,45 @@ def _adjust_start(self, sequence, orf_sequence):
                 else:
                     self.phase = 0
         else:
-            self.logger.debug("Setting phase of %s at 0 (end: %s; thick end: %s; thick start %s)",
-                              self.chrom, self.end, self.thick_end, self.thick_start)
+            self.logger.debug(
+                "Setting phase of %s at 0 (end: %s; thick end: %s; thick start %s)",
+                self.chrom,
+                self.end,
+                self.thick_end,
+                self.thick_start,
+            )
             self.phase = 0
 
         del self.invalid
         if self.invalid:
-            self.logger.debug("%s is not coding after checking. Reason: %s", self.chrom, self.invalid_reason)
+            self.logger.debug(
+                "%s is not coding after checking. Reason: %s",
+                self.chrom,
+                self.invalid_reason,
+            )
             self.coding = False
 
     def _regression(self, orf_sequence):
         self.logger.debug(
             "Starting the regression algorithm to find an internal start for %s (end: %s; thick start/end: %s, %s; phase %s)",
-            self.chrom, self.end, self.thick_start, self.thick_end, self.phase)
+            self.chrom,
+            self.end,
+            self.thick_start,
+            self.thick_end,
+            self.phase,
+        )
         if self.strand != "-":
             # self.thick_start = self.phase + 3
-            self.logger.debug("Starting to analyse %s; positions %s-%s",
-                              self.chrom,
-                              self.phase + 3,
-                              self.phase + 3 + int(len(orf_sequence) * self.max_regression),
-                              )
-            for pos in range(self.phase + 3,
-                             int(len(orf_sequence) * self.max_regression),
-                             3):
-                codon = orf_sequence[pos:pos + 3]
+            self.logger.debug(
+                "Starting to analyse %s; positions %s-%s",
+                self.chrom,
+                self.phase + 3,
+                self.phase + 3 + int(len(orf_sequence) * self.max_regression),
+            )
+            for pos in range(
+                self.phase + 3, int(len(orf_sequence) * self.max_regression), 3
+            ):
+                codon = orf_sequence[pos : pos + 3]
                 # self.logger.debug("Testing position %s-%s (%s)", pos, pos + 3, codon)
                 if codon in self.table.start_codons:
                     # Now we have to shift the start accordingly
@@ -819,20 +957,26 @@ def _regression(self, orf_sequence):
                     break
                 else:
                     continue
-            self.logger.debug("Final internal coords for %s: %s-%s", self.chrom, self.thick_start, self.thick_end)
+            self.logger.debug(
+                "Final internal coords for %s: %s-%s",
+                self.chrom,
+                self.thick_start,
+                self.thick_end,
+            )
         elif self.strand == "-":
             if self.end - self.thick_end < 3:
                 self.phase = (3 - (self.end - self.thick_end) % 3) % 3
-            self.logger.debug("Starting to analyse %s (phase %s); positions %s-%s",
-                              self.chrom,
-                              self.phase,
-                              self.phase + 3,
-                              self.phase + 3 + int(len(orf_sequence) * self.max_regression),
-                              )
-            for pos in range(self.phase + 3,
-                             int(len(orf_sequence) * self.max_regression),
-                             3):
-                codon = orf_sequence[pos:pos + 3]
+            self.logger.debug(
+                "Starting to analyse %s (phase %s); positions %s-%s",
+                self.chrom,
+                self.phase,
+                self.phase + 3,
+                self.phase + 3 + int(len(orf_sequence) * self.max_regression),
+            )
+            for pos in range(
+                self.phase + 3, int(len(orf_sequence) * self.max_regression), 3
+            ):
+                codon = orf_sequence[pos : pos + 3]
                 # self.logger.debug("Testing position %s-%s (%s)", pos, pos + 3, codon)
                 if codon in self.table.start_codons:
                     # Now we have to shift the start accordingly
@@ -840,7 +984,12 @@ def _regression(self, orf_sequence):
                     self.thick_end -= pos
                     self.phase = 0
                     break
-            self.logger.debug("Final internal coords for %s: %s-%s", self.chrom, self.thick_start, self.thick_end)
+            self.logger.debug(
+                "Final internal coords for %s: %s-%s",
+                self.chrom,
+                self.thick_start,
+                self.thick_end,
+            )
 
     def __repr__(self):
         return pp.saferepr(self.__dict__)
@@ -849,7 +998,6 @@ def __hash__(self):
         return hash(frozenset(self.__getstate__()))
 
     def __str__(self):
-
         if self.header is True:
             if self._line is not None:
                 return self._line
@@ -879,8 +1027,12 @@ def __str__(self):
         line.append(self.block_count)
         line.append(",".join([str(x) for x in self.block_sizes]))
         line.append(",".join([str(x) for x in self.block_starts]))
-        attributes = dict((key.lower(), val) for key, val in self.attributes.items() if key.lower() not in
-                          ("geneid", "gene_id", "name", "phase", "coding", "alias", "id"))
+        attributes = dict(
+            (key.lower(), val)
+            for key, val in self.attributes.items()
+            if key.lower()
+            not in ("geneid", "gene_id", "name", "phase", "coding", "alias", "id")
+        )
         if self.parent is not None:
             attributes["Parent"] = self.parent[0]
             assert "Parent" in attributes
@@ -892,14 +1044,25 @@ def __str__(self):
                 parent = None
             attributes["Parent"] = parent
         if attributes:
-            line.append(";".join(f"{key}={val}" for key, val in attributes.items() if val is not None))
+            line.append(
+                ";".join(
+                    f"{key}={val}" for key, val in attributes.items() if val is not None
+                )
+            )
         return "\t".join([str(x) for x in line])
 
     def __eq__(self, other):
-        for key in ["chrom", "strand", "start",
-                    "end", "thick_start", "thick_end",
-                    "block_count", "block_sizes",
-                    "block_starts"]:
+        for key in [
+            "chrom",
+            "strand",
+            "start",
+            "end",
+            "thick_start",
+            "thick_end",
+            "block_count",
+            "block_sizes",
+            "block_starts",
+        ]:
             if getattr(self, key) != getattr(other, key):
                 return False
         return True
@@ -908,11 +1071,9 @@ def __len__(self):
         return self.end - self.start + 1
 
     def copy(self):
-
         return copy.deepcopy(self)
 
     def as_simple_dict(self):
-
         return {
             "chrom": self.chrom,
             "id": self.id,
@@ -1024,6 +1185,7 @@ def id(self):
             return self.chrom
         else:
             return self.name
+
     # pylint: enable=invalid-name
 
     @property
@@ -1047,9 +1209,10 @@ def lenient(self):
         return self.__lenient
 
     def __is_invalid(self):
-
         if self._internal_stop_codons >= 1:
-            self.invalid_reason = "{} internal stop codons found".format(self._internal_stop_codons)
+            self.invalid_reason = "{} internal stop codons found".format(
+                self._internal_stop_codons
+            )
             return True
 
         if self.fasta_length is None:
@@ -1060,12 +1223,14 @@ def __is_invalid(self):
                 pass
             else:
                 invalid = "thickStart {0} <start {1}: {2}; end {3} <thickEnd {4} {5}"
-                self.invalid_reason = invalid.format(self.thick_start,
-                                                     self.start,
-                                                     self.thick_start < self.start,
-                                                     self.end,
-                                                     self.thick_end,
-                                                     self.thick_end > self.end)
+                self.invalid_reason = invalid.format(
+                    self.thick_start,
+                    self.start,
+                    self.thick_start < self.start,
+                    self.end,
+                    self.thick_end,
+                    self.thick_end > self.end,
+                )
                 return True
 
         if self.transcriptomic is True:
@@ -1075,8 +1240,7 @@ def __is_invalid(self):
 
             if len(self) != self.fasta_length:
                 self.invalid_reason = "FASTA length != BED length: {0} vs. {1}".format(
-                    self.fasta_length,
-                    len(self)
+                    self.fasta_length, len(self)
                 )
                 return True
 
@@ -1085,19 +1249,29 @@ def __is_invalid(self):
             else:
                 if (self.cds_len - self.phase) % 3 != 0:
                     if self.strand == "+" and self.thick_end != self.end:
-                        self.invalid_reason = "Invalid CDS length: {0} % 3 = {1} ({2}-{3}, {4})".format(
-                            self.cds_len - self.phase,
-                            (self.cds_len - self.phase) % 3,
-                            self.thick_start, self.thick_end, self.phase)
+                        self.invalid_reason = (
+                            "Invalid CDS length: {0} % 3 = {1} ({2}-{3}, {4})".format(
+                                self.cds_len - self.phase,
+                                (self.cds_len - self.phase) % 3,
+                                self.thick_start,
+                                self.thick_end,
+                                self.phase,
+                            )
+                        )
                         return True
                     elif self.strand == "-" and self.thick_start != self.start:
-                        self.invalid_reason = "Invalid CDS length: {0} % 3 = {1} ({2}-{3}, {4})".format(
-                            self.cds_len - self.phase,
-                            (self.cds_len - self.phase) % 3,
-                            self.thick_start, self.thick_end, self.phase)
+                        self.invalid_reason = (
+                            "Invalid CDS length: {0} % 3 = {1} ({2}-{3}, {4})".format(
+                                self.cds_len - self.phase,
+                                (self.cds_len - self.phase) % 3,
+                                self.thick_start,
+                                self.thick_end,
+                                self.phase,
+                            )
+                        )
                         return True
 
-        self.invalid_reason = ''
+        self.invalid_reason = ""
         return False
 
     @property
@@ -1133,7 +1307,9 @@ def start(self, value):
         try:
             value = int(value)
         except (ValueError, TypeError):
-            raise ValueError("Start must be an integer, not {}! Value: {}".format(type(value), value))
+            raise ValueError(
+                "Start must be an integer, not {}! Value: {}".format(type(value), value)
+            )
         self.__start = value
         del self.invalid
 
@@ -1151,7 +1327,9 @@ def end(self, value):
         try:
             value = int(value)
         except (ValueError, TypeError):
-            raise ValueError("End must be an integer, not {}! Value: {}".format(type(value), value))
+            raise ValueError(
+                "End must be an integer, not {}! Value: {}".format(type(value), value)
+            )
         self.__end = value
         del self.invalid
 
@@ -1169,7 +1347,11 @@ def thick_start(self, value):
         try:
             value = int(value)
         except (ValueError, TypeError):
-            raise ValueError("Thick start must be an integer, not {}! Value: {}".format(type(value), value))
+            raise ValueError(
+                "Thick start must be an integer, not {}! Value: {}".format(
+                    type(value), value
+                )
+            )
         self.__thick_start = value
         del self.invalid
 
@@ -1187,7 +1369,11 @@ def thick_end(self, value):
         try:
             value = int(value)
         except (ValueError, TypeError):
-            raise ValueError("Thick end must be an integer, not {}! Value: {}".format(type(value), value))
+            raise ValueError(
+                "Thick end must be an integer, not {}! Value: {}".format(
+                    type(value), value
+                )
+            )
         self.__thick_end = value
         del self.invalid
 
@@ -1211,10 +1397,12 @@ def phase(self):
 
     @phase.setter
     def phase(self, val):
-
         if val not in (None, 0, 1, 2):
-            raise ValueError("Invalid frame specified for {}: {}. Must be None or 0, 1, 2".format(
-                self.name, val))
+            raise ValueError(
+                "Invalid frame specified for {}: {}. Must be None or 0, 1, 2".format(
+                    self.name, val
+                )
+            )
         elif self.transcriptomic is True and val not in (0, 1, 2):
             raise ValueError("A transcriptomic BED cannot have null frame.")
         del self.invalid
@@ -1234,7 +1422,11 @@ def block_count(self, value):
         try:
             value = int(value)
         except (ValueError, TypeError):
-            raise ValueError("Block count must be an integer, not {}! Value: {}".format(type(value), value))
+            raise ValueError(
+                "Block count must be an integer, not {}! Value: {}".format(
+                    type(value), value
+                )
+            )
         self.__block_count = value
         del self.invalid
 
@@ -1263,9 +1455,11 @@ def block_starts(self):
     def block_starts(self, starts):
         starts = np.array(starts)
         if not issubclass(starts.dtype.type, np.int64):
-            raise TypeError("Block sizes should be integers! Dtype: {}; array: {}".format(
-                starts.dtype, starts
-            ))
+            raise TypeError(
+                "Block sizes should be integers! Dtype: {}; array: {}".format(
+                    starts.dtype, starts
+                )
+            )
         self.__block_starts = starts
         del self.invalid
 
@@ -1290,11 +1484,20 @@ def _max_regression(self):
     def _max_regression(self, value):
         if not (isinstance(value, (int, float)) and 0 <= value <= 1):
             raise ValueError(
-                "Invalid value specified for _max_regression (must be between 0 and 1): {}".format(value))
+                "Invalid value specified for _max_regression (must be between 0 and 1): {}".format(
+                    value
+                )
+            )
         self.__max_regression = value
 
-    def expand(self, sequence, upstream, downstream, expand_orf=False, logger=create_null_logger()):
-
+    def expand(
+        self,
+        sequence,
+        upstream,
+        downstream,
+        expand_orf=False,
+        logger=create_null_logger(),
+    ):
         # TODO this needs revising. The expand_orf key does not act as it should, as the thick start and
         #  end are changed even when it is set to False.
 
@@ -1317,102 +1520,159 @@ def expand(self, sequence, upstream, downstream, expand_orf=False, logger=create
         """
 
         if upstream < 0 or downstream < 0:
-            raise ValueError("Upstream and downstream must be 0 or positive, not {upstream} and {downstream}".format(
-                upstream=upstream, downstream=downstream))
+            raise ValueError(
+                "Upstream and downstream must be 0 or positive, not {upstream} and {downstream}".format(
+                    upstream=upstream, downstream=downstream
+                )
+            )
 
         if len(sequence) != len(self) + upstream + downstream:
             raise ValueError(
                 "When trying to expand the original sequence of length {lself} of {sid} by {upstream} upstream and {downstream} downstream nucleotides, \
 the total length ({total}) is different from the length of the provided sequence ({lseq}).".format(
-                    sid=self.id, lself=len(self), upstream=upstream, downstream=downstream, total=len(self) + upstream + downstream,
-                    lseq=len(sequence)))
+                    sid=self.id,
+                    lself=len(self),
+                    upstream=upstream,
+                    downstream=downstream,
+                    total=len(self) + upstream + downstream,
+                    lseq=len(sequence),
+                )
+            )
 
         if len(self) == len(sequence):
             logger.debug(
                 "The length of the sequence for {sid} is identical to the length of the original object. No action needed.".format(
-                    sid=self.id))
+                    sid=self.id
+                )
+            )
             return
 
         if self.transcriptomic is False:
-            raise ValueError("This is not a transcriptomic BED12, I cannot expand it!\n{sself}".format(
-                sself=repr(self)))
+            raise ValueError(
+                "This is not a transcriptomic BED12, I cannot expand it!\n{sself}".format(
+                    sself=repr(self)
+                )
+            )
         if self.strand == "-":
             raise NotImplementedError(
-                "{sid} is on the negative strand, I can only expand ORFs on the sense strand".format(sid=self.id))
+                "{sid} is on the negative strand, I can only expand ORFs on the sense strand".format(
+                    sid=self.id
+                )
+            )
 
-        old_sequence = sequence[upstream:len(self) + upstream]
+        old_sequence = sequence[upstream : len(self) + upstream]
         if len(old_sequence) + upstream + downstream != len(sequence):
             raise ValueError(
                 "When trying to expand the original sequence of length {lself} of {sid} by {upstream} upstream and {downstream} downstream nucleotides, \
 the length of the *imputed* old sequence ({lold}) does not tally up with the new sequence ({lnew})".format(
-                    lself=len(self), sid=self.id, upstream=upstream, downstream=downstream,
-                    lold=len(old_sequence), lnew=len(sequence)))
+                    lself=len(self),
+                    sid=self.id,
+                    upstream=upstream,
+                    downstream=downstream,
+                    lold=len(old_sequence),
+                    lnew=len(sequence),
+                )
+            )
         self.fasta_length = len(sequence)
 
         # I presume that the sequence is already in the right orientation
         old_start_pos = self.thick_start + self.phase - 1
         old_end_pos = self.thick_end - (self.thick_end - old_start_pos) % 3
         old_orf = old_sequence[old_start_pos:old_end_pos].upper()
-        logger.debug("Old sequence of %s (%s bps): %s[...]%s", self.id, len(old_sequence),
-                     old_sequence[:10], old_sequence[-10:])
-        logger.debug("Old ORF of %s (%s bps, phase %s): %s[...]%s", self.id, len(old_orf), self.phase,
-                     old_orf[:10], old_orf[-10:])
+        logger.debug(
+            "Old sequence of %s (%s bps): %s[...]%s",
+            self.id,
+            len(old_sequence),
+            old_sequence[:10],
+            old_sequence[-10:],
+        )
+        logger.debug(
+            "Old ORF of %s (%s bps, phase %s): %s[...]%s",
+            self.id,
+            len(old_orf),
+            self.phase,
+            old_orf[:10],
+            old_orf[-10:],
+        )
         # TODO: this function should not fail for non-coding transcripts
         assert len(old_orf) > 0, (old_start_pos, old_end_pos)
         assert len(old_orf) % 3 == 0, (old_start_pos, old_end_pos)
 
         old_pep = _translate_str(old_orf, self.table, gap="N")
         if "*" in old_pep and old_pep.find("*") < len(old_pep) - 1:
-            logger.error("Stop codon found within the ORF of %s (pos %s of %s; phase %s). This is invalid!",
-                         self.id, old_pep.find("*"), len(old_pep), self.phase)
+            logger.error(
+                "Stop codon found within the ORF of %s (pos %s of %s; phase %s). This is invalid!",
+                self.id,
+                old_pep.find("*"),
+                len(old_pep),
+                self.phase,
+            )
 
         self.start_codon = old_orf[:3]
         self.stop_codon = old_orf[-3:]
-        logger.debug("%s: start codon %s, old start %s (%s); stop codon %s, old stop %s (%s)",
-                     self.name, self.start_codon, self.thick_start + self.phase,
-                     (self.thick_start + self.phase + upstream),
-                     self.stop_codon, self.thick_end, (self.thick_end + upstream))
+        logger.debug(
+            "%s: start codon %s, old start %s (%s); stop codon %s, old stop %s (%s)",
+            self.name,
+            self.start_codon,
+            self.thick_start + self.phase,
+            (self.thick_start + self.phase + upstream),
+            self.stop_codon,
+            self.thick_end,
+            (self.thick_end + upstream),
+        )
         # Now expand
         self.end = len(sequence)
         self.thick_start += upstream
         self.thick_end += upstream
         start_codon = str(self.start_codon).upper()
         stop_codon = str(self.stop_codon).upper()
-        self.has_start_codon = (start_codon in self.table.start_codons)
-        self.has_stop_codon = (stop_codon in self.table.stop_codons)
-        self.logger.debug("%s has start codon (%s): %s", self.chrom, start_codon, self.has_start_codon)
-        self.logger.debug("%s has stop codon (%s): %s", self.chrom, stop_codon, self.has_stop_codon)
+        self.has_start_codon = start_codon in self.table.start_codons
+        self.has_stop_codon = stop_codon in self.table.stop_codons
+        self.logger.debug(
+            "%s has start codon (%s): %s", self.chrom, start_codon, self.has_start_codon
+        )
+        self.logger.debug(
+            "%s has stop codon (%s): %s", self.chrom, stop_codon, self.has_stop_codon
+        )
         if expand_orf is True and not (self.has_start_codon and self.has_stop_codon):
             if not self.has_start_codon:
-                for pos in range(old_start_pos + upstream,
-                                 0,
-                                 -3):
-                    codon = sequence[pos:pos + 3].upper()
+                for pos in range(old_start_pos + upstream, 0, -3):
+                    codon = sequence[pos : pos + 3].upper()
 
                     self.thick_start = pos + 1
                     if codon in self.table.start_codons:
                         # self.thick_start = pos
                         self.start_codon = codon
                         self.__has_start = True
-                        logger.debug("Position %d, codon %s. Start codon found.", pos, codon)
+                        logger.debug(
+                            "Position %d, codon %s. Start codon found.", pos, codon
+                        )
                         break
                 if self.start_codon not in self.table.start_codons:
                     self.phase = (self.thick_start - 1) % 3
-                    logger.debug("No start codon found for %s. Thick start %s, new phase: %s",
-                                 self.id, self.thick_start, self.phase)
+                    logger.debug(
+                        "No start codon found for %s. Thick start %s, new phase: %s",
+                        self.id,
+                        self.thick_start,
+                        self.phase,
+                    )
                     self.thick_start = 1
                 else:
                     self.phase = 0
                     self.__has_start = True
 
-            coding_seq = sequence[self.thick_start + self.phase - 1:self.end]
+            coding_seq = sequence[self.thick_start + self.phase - 1 : self.end]
             if len(coding_seq) % 3 != 0:
                 # Only get a multiple of three
-                coding_seq = coding_seq[:-((len(coding_seq)) % 3)]
+                coding_seq = coding_seq[: -((len(coding_seq)) % 3)]
             prot_seq = _translate_str(coding_seq, table=self.table, gap="N")
             if "*" in prot_seq:
-                self.thick_end = self.thick_start + self.phase - 1 + (1 + prot_seq.find("*")) * 3
-                self.stop_codon = coding_seq[prot_seq.find("*") * 3:(1 + prot_seq.find("*")) * 3].upper()
+                self.thick_end = (
+                    self.thick_start + self.phase - 1 + (1 + prot_seq.find("*")) * 3
+                )
+                self.stop_codon = coding_seq[
+                    prot_seq.find("*") * 3 : (1 + prot_seq.find("*")) * 3
+                ].upper()
                 self.__has_stop = True
                 logger.debug("New stop codon for %s: %s", self.name, self.thick_end)
 
@@ -1426,7 +1686,6 @@ def expand(self, sequence, upstream, downstream, expand_orf=False, logger=create
 
     @property
     def blocks(self):
-
         """This will return the coordinates of the blocks, with a 1-offset (as in GFF3)"""
 
         # First thing: calculate where each start point will be
@@ -1436,9 +1695,15 @@ def blocks(self):
 
         return list(zip(_bstarts, _bends))
 
-    def to_transcriptomic(self, sequence=None, fasta_index=None, start_adjustment=False,
-                          lenient=False, alias=None, coding=True):
-
+    def to_transcriptomic(
+        self,
+        sequence=None,
+        fasta_index=None,
+        start_adjustment=False,
+        lenient=False,
+        alias=None,
+        coding=True,
+    ):
         """This method will return a transcriptomic version of the BED12. If the object is already transcriptomic,
         it will return itself."""
 
@@ -1462,27 +1727,42 @@ def to_transcriptomic(self, sequence=None, fasta_index=None, start_adjustment=Fa
 
         # Check thick start and end are defined
 
-        assert tStart is not None and tEnd is not None, f"The thick start, thick end of {self.id} are invalid " \
-                                                        f"as they are outside of the defined exons.\nThick start: " \
-                                                        f"{self.thick_start}\nThick end: {self.thick_end}\n" \
-                                                        f"Exons: {self.blocks}"
+        assert tStart is not None and tEnd is not None, (
+            f"The thick start, thick end of {self.id} are invalid "
+            f"as they are outside of the defined exons.\nThick start: "
+            f"{self.thick_start}\nThick end: {self.thick_end}\n"
+            f"Exons: {self.blocks}"
+        )
 
         if self.strand == "+":
             bsizes = self.block_sizes[:]
         else:
             bsizes = np.flip(self.block_sizes)
-            tStart, tEnd = self.block_sizes.sum() - tEnd, self.block_sizes.sum() - tStart
+            tStart, tEnd = (
+                self.block_sizes.sum() - tEnd,
+                self.block_sizes.sum() - tStart,
+            )
 
         bstarts = np.concatenate([np.zeros(1, dtype=np.int64), bsizes[:-1].cumsum()])
         if not (len(bstarts) == len(bsizes) == self.block_count):
-            raise ValueError("""In {self.id} ({self.chrom}:{self.start}-{self.end}) there is a discrepancy between block \
+            raise ValueError(
+                """In {self.id} ({self.chrom}:{self.start}-{self.end}) there is a discrepancy between block \
 starts (# {lbstarts}, {bstarts}), block sizes (# {lbsizes}, {bsizes}) and block counts (# {self.block_count}). \
-This is invalid""".format(self=self, lbstarts=len(bstarts), lbsizes=len(bsizes), bstarts=bstarts, bsizes=bsizes))
+This is invalid""".format(
+                    self=self,
+                    lbstarts=len(bstarts),
+                    lbsizes=len(bsizes),
+                    bstarts=bstarts,
+                    bsizes=bsizes,
+                )
+            )
 
         if self.coding:
-            new_name = "ID={};coding={};phase={}".format(self.name.split(";")[0],
-                                                         self.coding,
-                                                         self.phase if self.phase is not None else 0)
+            new_name = "ID={};coding={};phase={}".format(
+                self.name.split(";")[0],
+                self.coding,
+                self.phase if self.phase is not None else 0,
+            )
         else:
             new_name = "ID={};coding={}".format(self.name.split(";")[0], self.coding)
 
@@ -1492,31 +1772,32 @@ def to_transcriptomic(self, sequence=None, fasta_index=None, start_adjustment=Fa
         if not self.coding:
             tStart, tEnd = 0, 1
 
-        new = list((self.name.split(";")[0],
-                    0,
-                    self.block_sizes.sum(),
-                    new_name,
-                    self.score,
-                    "+"))
-
-        new.extend(list((
-            tStart,
-            tEnd,
-            self.rgb,
-            self.block_count,
-            bsizes,
-            bstarts
-        )))
-
-        new = BED12(new,
-                    phase=self.phase,
-                    sequence=sequence,
-                    coding=self.coding,
-                    fasta_index=fasta_index,
-                    transcriptomic=True,
-                    lenient=lenient,
-                    start_adjustment=start_adjustment)
-        assert isinstance(new, type(self)), f"The new object is of type {type(new)} instead of {type(self)}!"
+        new = list(
+            (
+                self.name.split(";")[0],
+                0,
+                self.block_sizes.sum(),
+                new_name,
+                self.score,
+                "+",
+            )
+        )
+
+        new.extend(list((tStart, tEnd, self.rgb, self.block_count, bsizes, bstarts)))
+
+        new = BED12(
+            new,
+            phase=self.phase,
+            sequence=sequence,
+            coding=self.coding,
+            fasta_index=fasta_index,
+            transcriptomic=True,
+            lenient=lenient,
+            start_adjustment=start_adjustment,
+        )
+        assert isinstance(new, type(self)), (
+            f"The new object is of type {type(new)} instead of {type(self)}!"
+        )
         return new
 
     @property
@@ -1544,7 +1825,9 @@ def logger(self, logger):
             if not isinstance(logger, logging.Logger):
                 raise TypeError(
                     "Objects of type {tself} accept only logging.Logger instances as loggers, not {tlog}!".format(
-                        tself=type(self), tlog=type(logger)))
+                        tself=type(self), tlog=type(logger)
+                    )
+                )
             self.__logger = logger
         self.__logger.propagate = False
 
@@ -1556,15 +1839,18 @@ class Bed12Parser(Parser):
 
     __annot_type__ = "bed12"
 
-    def __init__(self, handle,
-                 fasta_index=None,
-                 transcriptomic=False,
-                 max_regression=0,
-                 start_adjustment=True,
-                 is_gff=False,
-                 coding=False,
-                 logger=create_null_logger(),
-                 table=0):
+    def __init__(
+        self,
+        handle,
+        fasta_index=None,
+        transcriptomic=False,
+        max_regression=0,
+        start_adjustment=True,
+        is_gff=False,
+        coding=False,
+        logger=create_null_logger(),
+        table=0,
+    ):
         """
         Constructor method.
         :param handle: the input BED file.
@@ -1591,7 +1877,7 @@ def __init__(self, handle,
         self.__closed = False
         self.header = False
         self.__table = table
-        self._is_bed12 = (not is_gff)
+        self._is_bed12 = not is_gff
         self.__line_counter = 0
 
     @staticmethod
@@ -1599,8 +1885,8 @@ def __set_fasta_index(fasta_index):
         if isinstance(fasta_index, dict):
             # check that this is a bona fide dictionary ...
             assert isinstance(
-                fasta_index[random.choice(fasta_index.keys())],
-                Bio.SeqRecord.SeqRecord)
+                fasta_index[random.choice(fasta_index.keys())], Bio.SeqRecord.SeqRecord
+            )
         elif fasta_index is not None:
             if isinstance(fasta_index, (str, bytes)):
                 if isinstance(fasta_index, bytes):
@@ -1615,14 +1901,23 @@ def __iter__(self):
         return self
 
     def __next__(self, seq=None):
-
         try:
             if self._is_bed12 is True:
                 return self.bed_next()
             else:
                 return self.gff_next()
-        except (ValueError, KeyError, TypeError, UnicodeError, AttributeError, AssertionError, InvalidParsingFormat) as exc:
-            raise InvalidParsingFormat(f"This is not a valid BED12 file! Exception: {exc}")
+        except (
+            ValueError,
+            KeyError,
+            TypeError,
+            UnicodeError,
+            AttributeError,
+            AssertionError,
+            InvalidParsingFormat,
+        ) as exc:
+            raise InvalidParsingFormat(
+                f"This is not a valid BED12 file! Exception: {exc}"
+            )
 
     def __getstate__(self):
         state = super().__getstate__()
@@ -1649,17 +1944,29 @@ def bed_next(self):
             line = next(self._handle)
             self.__line_counter += 1
             try:
-                bed12 = BED12(line,
-                              fasta_index=self.fasta_index,
-                              transcriptomic=self.transcriptomic,
-                              max_regression=self._max_regression,
-                              coding=self.coding,
-                              table=self.__table,
-                              logger=self.logger,
-                              start_adjustment=self.start_adjustment)
-            except (ValueError, TypeError, CodonTable.TranslationError, KeyError, InvalidParsingFormat) as exc:
+                bed12 = BED12(
+                    line,
+                    fasta_index=self.fasta_index,
+                    transcriptomic=self.transcriptomic,
+                    max_regression=self._max_regression,
+                    coding=self.coding,
+                    table=self.__table,
+                    logger=self.logger,
+                    start_adjustment=self.start_adjustment,
+                )
+            except (
+                ValueError,
+                TypeError,
+                CodonTable.TranslationError,
+                KeyError,
+                InvalidParsingFormat,
+            ) as exc:
                 error = "Invalid line for file {name}, line {counter}:\n{line}\nError: {exc}".format(
-                    name=self.name, counter=self.__line_counter, line=line.rstrip(), exc=exc)
+                    name=self.name,
+                    counter=self.__line_counter,
+                    line=line.rstrip(),
+                    exc=exc,
+                )
                 raise InvalidParsingFormat(error)
         return bed12
 
@@ -1675,25 +1982,47 @@ def gff_next(self):
             self.__line_counter += 1
             try:
                 gff_line = GffLine(line)
-            except (ValueError, TypeError, CodonTable.TranslationError, KeyError, InvalidParsingFormat) as exc:
+            except (
+                ValueError,
+                TypeError,
+                CodonTable.TranslationError,
+                KeyError,
+                InvalidParsingFormat,
+            ) as exc:
                 error = "Invalid line for file {name}, line {counter}:\n{line}\nError: {exc}".format(
-                    name=self.name, counter=self.__line_counter, line=line.rstrip(), exc=exc)
+                    name=self.name,
+                    counter=self.__line_counter,
+                    line=line.rstrip(),
+                    exc=exc,
+                )
                 raise InvalidParsingFormat(error)
 
             if gff_line.feature != "CDS":
                 continue
             # Compatibility with BED12
             try:
-                bed12 = BED12(gff_line,
-                              fasta_index=self.fasta_index,
-                              transcriptomic=self.transcriptomic,
-                              max_regression=self._max_regression,
-                              table=self.__table,
-                              start_adjustment=self.start_adjustment,
-                              logger=self.logger)
-            except (ValueError, TypeError, CodonTable.TranslationError, KeyError, InvalidParsingFormat) as exc:
+                bed12 = BED12(
+                    gff_line,
+                    fasta_index=self.fasta_index,
+                    transcriptomic=self.transcriptomic,
+                    max_regression=self._max_regression,
+                    table=self.__table,
+                    start_adjustment=self.start_adjustment,
+                    logger=self.logger,
+                )
+            except (
+                ValueError,
+                TypeError,
+                CodonTable.TranslationError,
+                KeyError,
+                InvalidParsingFormat,
+            ) as exc:
                 error = "Invalid line for file {name}, line {counter}:\n{line}\nError: {exc}".format(
-                    name=self.name, counter=self.__line_counter, line=line.rstrip(), exc=exc)
+                    name=self.name,
+                    counter=self.__line_counter,
+                    line=line.rstrip(),
+                    exc=exc,
+                )
                 raise InvalidParsingFormat(error)
         # raise NotImplementedError("Still working on this!")
         return bed12
@@ -1719,7 +2048,10 @@ def _max_regression(self):
     def _max_regression(self, value):
         if not (isinstance(value, (int, float)) and 0 <= value <= 1):
             raise ValueError(
-                "Invalid value specified for _max_regression (must be between 0 and 1): {}".format(value))
+                "Invalid value specified for _max_regression (must be between 0 and 1): {}".format(
+                    value
+                )
+            )
         self.__max_regression = value
 
     @property
@@ -1736,21 +2068,21 @@ def coding(self, coding):
 
 
 class Bed12ParseWrapper(mp.Process):
-
-    def __init__(self,
-                 identifier=None,
-                 rec_queue=None,
-                 return_queue=None,
-                 log_queue=None,
-                 level="DEBUG",
-                 fasta_index=None,
-                 transcriptomic=False,
-                 max_regression=0,
-                 is_gff=False,
-                 coding=False,
-                 start_adjustment=True,
-                 table=0):
-
+    def __init__(
+        self,
+        identifier=None,
+        rec_queue=None,
+        return_queue=None,
+        log_queue=None,
+        level="DEBUG",
+        fasta_index=None,
+        transcriptomic=False,
+        max_regression=0,
+        is_gff=False,
+        coding=False,
+        start_adjustment=True,
+        table=0,
+    ):
         """
         :param send_queue:
         :type send_queue: mp.Queue
@@ -1775,8 +2107,8 @@ def __init__(self,
         if isinstance(fasta_index, dict):
             # check that this is a bona fide dictionary ...
             assert isinstance(
-                fasta_index[random.choice(fasta_index.keys())],
-                Bio.SeqRecord.SeqRecord)
+                fasta_index[random.choice(fasta_index.keys())], Bio.SeqRecord.SeqRecord
+            )
         elif fasta_index is not None:
             if isinstance(fasta_index, (str, bytes)):
                 if isinstance(fasta_index, bytes):
@@ -1790,7 +2122,7 @@ def __init__(self,
         self.__closed = False
         self.header = False
         self.__table = table
-        self._is_bed12 = (not is_gff)
+        self._is_bed12 = not is_gff
 
     def bed_next(self, line, sequence=None):
         """
@@ -1799,15 +2131,23 @@ def bed_next(self, line, sequence=None):
         """
 
         try:
-            bed12 = BED12(line,
-                          logger=self.logger,
-                          sequence=sequence,
-                          transcriptomic=self.transcriptomic,
-                          max_regression=self._max_regression,
-                          start_adjustment=self.start_adjustment,
-                          coding=self.coding,
-                          table=self.__table)
-        except (ValueError, TypeError, CodonTable.TranslationError, KeyError, InvalidParsingFormat) as exc:
+            bed12 = BED12(
+                line,
+                logger=self.logger,
+                sequence=sequence,
+                transcriptomic=self.transcriptomic,
+                max_regression=self._max_regression,
+                start_adjustment=self.start_adjustment,
+                coding=self.coding,
+                table=self.__table,
+            )
+        except (
+            ValueError,
+            TypeError,
+            CodonTable.TranslationError,
+            KeyError,
+            InvalidParsingFormat,
+        ) as exc:
             raise InvalidParsingFormat("Invalid line: {}".format(line))
         return bed12
 
@@ -1819,20 +2159,28 @@ def gff_next(self, line, sequence):
 
         try:
             line = GffLine(line)
-        except (ValueError, TypeError, CodonTable.TranslationError, KeyError, InvalidParsingFormat) as exc:
+        except (
+            ValueError,
+            TypeError,
+            CodonTable.TranslationError,
+            KeyError,
+            InvalidParsingFormat,
+        ) as exc:
             error = "Invalid line:\n{}".format(line)
             raise InvalidParsingFormat(error)
 
         if line.feature != "CDS":
             return None
             # Compatibility with BED12
-        bed12 = BED12(line,
-                      logger=self.logger,
-                      sequence=sequence,
-                      transcriptomic=self.transcriptomic,
-                      max_regression=self._max_regression,
-                      start_adjustment=self.start_adjustment,
-                      table=self.__table)
+        bed12 = BED12(
+            line,
+            logger=self.logger,
+            sequence=sequence,
+            transcriptomic=self.transcriptomic,
+            max_regression=self._max_regression,
+            start_adjustment=self.start_adjustment,
+            table=self.__table,
+        )
         # raise NotImplementedError("Still working on this!")
         return bed12
 
@@ -1868,13 +2216,19 @@ def run(self, *args, **kwargs):
                 if not row or row.header is True:
                     continue
                 if row.invalid is True:
-                    self.logger.warning("Invalid entry, reason: %s\n%s",
-                                        row.invalid_reason,
-                                        row)
+                    self.logger.warning(
+                        "Invalid entry, reason: %s\n%s", row.invalid_reason, row
+                    )
                     continue
-                # self.cache[num] = 
+                # self.cache[num] =
                 self.return_queue.put((num, msgpack.dumps(row.as_simple_dict())))
             except AttributeError:
                 pass
-            except (ValueError, TypeError, CodonTable.TranslationError, KeyError, InvalidParsingFormat) as exc:
+            except (
+                ValueError,
+                TypeError,
+                CodonTable.TranslationError,
+                KeyError,
+                InvalidParsingFormat,
+            ) as exc:
                 raise InvalidParsingFormat(line)

From a4191362f44cc70985cdececa46025b908788b6d Mon Sep 17 00:00:00 2001
From: Gemy Kaithakottil <gemygk@gmail.com>
Date: Fri, 28 Nov 2025 14:50:55 +0000
Subject: [PATCH 02/10] fix: prevent treating NNN as a stop codon #469

---
 Mikado/parsers/bed12.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Mikado/parsers/bed12.py b/Mikado/parsers/bed12.py
index 7c95ffe4..64c30e21 100644
--- a/Mikado/parsers/bed12.py
+++ b/Mikado/parsers/bed12.py
@@ -790,7 +790,7 @@ def __check_validity(self, transcriptomic, fasta_index, sequence):
 
                 last_pos = -3 - ((len(orf_sequence)) % 3)
                 translated_seq = _translate_str(
-                    orf_sequence[:last_pos], table=self.table, gap="N"
+                    orf_sequence[:last_pos], table=self.table
                 )
 
                 self._internal_stop_codons = str(translated_seq).count("*")
@@ -1598,7 +1598,7 @@ def expand(
         assert len(old_orf) > 0, (old_start_pos, old_end_pos)
         assert len(old_orf) % 3 == 0, (old_start_pos, old_end_pos)
 
-        old_pep = _translate_str(old_orf, self.table, gap="N")
+        old_pep = _translate_str(old_orf, self.table)
         if "*" in old_pep and old_pep.find("*") < len(old_pep) - 1:
             logger.error(
                 "Stop codon found within the ORF of %s (pos %s of %s; phase %s). This is invalid!",
@@ -1665,7 +1665,7 @@ def expand(
             if len(coding_seq) % 3 != 0:
                 # Only get a multiple of three
                 coding_seq = coding_seq[: -((len(coding_seq)) % 3)]
-            prot_seq = _translate_str(coding_seq, table=self.table, gap="N")
+            prot_seq = _translate_str(coding_seq, table=self.table)
             if "*" in prot_seq:
                 self.thick_end = (
                     self.thick_start + self.phase - 1 + (1 + prot_seq.find("*")) * 3

From 4cb9770f3e8da8626b56960c692acf997dccb7ea Mon Sep 17 00:00:00 2001
From: Gemy Kaithakottil <gemygk@gmail.com>
Date: Fri, 28 Nov 2025 14:52:11 +0000
Subject: [PATCH 03/10] fix: update gh actions

---
 .github/workflows/python-package.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index a06fb747..dc068fbf 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -23,14 +23,14 @@ jobs:
         sudo apt update
         sudo apt install -y build-essential zlib1g-dev zlib1g
     - uses: actions/checkout@v2
-    - uses: actions/cache@v2
+    - uses: actions/cache@v4
       if: startsWith(runner.os, 'Linux')
       with:
         path: ~/.cache/pip
         key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
         restore-keys: |
           ${{ runner.os }}-pip-
-    - uses: actions/cache@v2
+    - uses: actions/cache@v4
       if: startsWith(runner.os, 'macOS')
       with:
         path: ~/Library/Caches/pip
@@ -39,7 +39,7 @@ jobs:
           ${{ runner.os }}-pip-
     - name: Cache conda
       id: cache-miniconda
-      uses: actions/cache@v2
+      uses: actions/cache@v4
       env:
         CACHE_NUMBER: 0
       with:

From 9a0fe2fd82c327bbb8ee0b9f0cb03848a15e8871 Mon Sep 17 00:00:00 2001
From: Gemy Kaithakottil <gemygk@gmail.com>
Date: Mon, 1 Dec 2025 12:38:15 +0000
Subject: [PATCH 04/10] chore: format with ruff

---
 Mikado/tests/test_bed12.py | 80 ++++++++++++++++++++++----------------
 1 file changed, 46 insertions(+), 34 deletions(-)

diff --git a/Mikado/tests/test_bed12.py b/Mikado/tests/test_bed12.py
index f507f46a..6c4d1d44 100644
--- a/Mikado/tests/test_bed12.py
+++ b/Mikado/tests/test_bed12.py
@@ -5,7 +5,6 @@
 
 
 class TestTranslate(unittest.TestCase):
-
     """
     >>>
     >>> table = CodonTable.ambiguous_dna_by_id[1]
@@ -29,7 +28,6 @@ class TestTranslate(unittest.TestCase):
     """
 
     def test_from_Bio(self):
-
         self.assertEqual(_translate_str("TAN", standard, pos_stop="X"), "X")
         for codon, amino in standard.forward_table.forward_table.items():
             self.assertEqual(_translate_str(codon, standard), amino)
@@ -51,12 +49,16 @@ def test_from_Bio(self):
             _translate_str("AAACCCTAG", standard, cds=True)
         with self.assertRaises(CodonTable.TranslationError) as exc:
             _translate_str("ATGCCCTAGCCCTAG", standard, cds=True)
-        self.assertTrue(str(exc.exception).startswith("Extra in-frame stop codon found."),
-                        str(exc.exception))
+        self.assertTrue(
+            str(exc.exception).startswith("Extra in-frame stop codon found."),
+            str(exc.exception),
+        )
         with self.assertRaises(CodonTable.TranslationError) as exc:
             _translate_str("ATGCCCTAGCCCTAT", standard, cds=True)
-        self.assertTrue(str(exc.exception).startswith("Extra in-frame stop codon. Sequence:"),
-                        str(exc.exception))
+        self.assertTrue(
+            str(exc.exception).startswith("Extra in-frame stop codon. Sequence:"),
+            str(exc.exception),
+        )
         for invalid in (10, "AB", b"NT"):
             with self.assertRaises(ValueError):
                 _translate_str("ATGCCCTAG", standard, cds=True, gap=invalid)
@@ -85,12 +87,20 @@ def test_ncbi_standard(self):
         self.assertEqual(_translate_str("ATGCCCTAG", standard, cds=True), "MP*")
         self.assertEqual(_translate_str("ATGCCCTAG", standard, to_stop=True), "MP")
         self.assertEqual(_translate_str("CTGCCCTAG", standard, cds=True), "MP*")
-        self.assertEqual(_translate_str("CTGCCCTAG", standard, cds=True, to_stop=True), "MP")
+        self.assertEqual(
+            _translate_str("CTGCCCTAG", standard, cds=True, to_stop=True), "MP"
+        )
 
     def test_ambiguous(self):
         ambigouous = None
         for key, table in CodonTable.ambiguous_dna_by_id.items():
-            amb = 0 < len([c for c in table._codon_table.forward_table.keys() if c in table.stop_codons])
+            amb = 0 < len(
+                [
+                    c
+                    for c in table._codon_table.forward_table.keys()
+                    if c in table.stop_codons
+                ]
+            )
             if amb:
                 amb = table
                 break
@@ -120,15 +130,12 @@ def test_set_table(self):
 
 
 class Bed12GenToTrans(unittest.TestCase):
-
     def setUp(self):
         pass
 
     def test_positive_mono_transfer(self):
-
         string_bed = "1\t10\t500\ttest\t0\t+\t300\t390\t0\t1\t490\t0"
 
-
         bed = BED12(string_bed)
         self.assertFalse(bed.invalid)
         self.assertFalse(bed.header)
@@ -150,10 +157,9 @@ def test_positive_mono_transfer(self):
         self.assertEqual(tbed.thick_end, 380)
         self.assertTrue(tbed.has_start_codon)
         self.assertTrue(tbed.has_stop_codon)
-        self.assertEqual(seq[tbed.thick_start - 1:tbed.thick_end], "ATG" * 29 + "TGA")
+        self.assertEqual(seq[tbed.thick_start - 1 : tbed.thick_end], "ATG" * 29 + "TGA")
 
     def test_negative_mono_transfer(self):
-
         string_bed = "1\t10\t500\ttest\t0\t-\t300\t390\t0\t1\t490\t0"
 
         bed = BED12(string_bed)
@@ -179,10 +185,9 @@ def test_negative_mono_transfer(self):
         self.assertTrue(tbed.has_start_codon)
         self.assertTrue(tbed.has_stop_codon)
 
-        self.assertEqual(seq[tbed.thick_start - 1:tbed.thick_end], "ATG" * 29 + "TGA")
+        self.assertEqual(seq[tbed.thick_start - 1 : tbed.thick_end], "ATG" * 29 + "TGA")
 
     def test_diexonic_pos_transfer(self):
-
         string_bed = "1\t10\t1000\ttest\t0\t+\t80\t920\t0\t2\t190,200\t0,790"
         bed = BED12(string_bed)
         self.assertFalse(bed.invalid or bed.header)
@@ -206,10 +211,9 @@ def test_diexonic_pos_transfer(self):
         self.assertTrue(tbed.has_start_codon)
         self.assertTrue(tbed.has_stop_codon)
 
-        self.assertEqual(seq[tbed.thick_start - 1:tbed.thick_end], "ATG" * 79 + "TAA")
+        self.assertEqual(seq[tbed.thick_start - 1 : tbed.thick_end], "ATG" * 79 + "TAA")
 
     def test_diexonic_neg_transfer(self):
-
         string_bed = "1\t10\t1000\ttest\t0\t-\t80\t920\t0\t2\t190,200\t0,790"
         bed = BED12(string_bed)
         self.assertFalse(bed.invalid or bed.header)
@@ -233,10 +237,9 @@ def test_diexonic_neg_transfer(self):
         self.assertTrue(tbed.has_start_codon)
         self.assertTrue(tbed.has_stop_codon)
 
-        self.assertEqual(seq[tbed.thick_start - 1:tbed.thick_end], "ATG" * 79 + "TAA")
+        self.assertEqual(seq[tbed.thick_start - 1 : tbed.thick_end], "ATG" * 79 + "TAA")
 
     def test_wheat_1(self):
-
         string_bed = "chr7A\t207087445\t207089574\tTraesCS7A01G235400.1\t0\t-\t207087615\t207088433\t0\t3\t457,393,30\t0,603,2099"
         string_seq = """CGCGTCGGTGCATCCGGATACGTCGCCTGGGCTACACAATGGCGCTGATCGATTGGATAG
 AACTGAGTGATGATGCAGAGATTATTGAATTGAGCAGTAGCGAGGAGAATGTCGAAGAAT
@@ -272,10 +275,13 @@ def test_wheat_1(self):
 
         bed = BED12(string_bed)
         self.assertFalse(bed.invalid or bed.header)
-        self.assertEqual(bed.start, 207087445+1)
+        self.assertEqual(bed.start, 207087445 + 1)
         self.assertEqual(bed.end, 207089574)
         self.assertEqual(bed.strand, "-")
-        self.assertEqual(bed.blocks, [(207087446,207087902), (207088049,207088441), (207089545,207089574) ])
+        self.assertEqual(
+            bed.blocks,
+            [(207087446, 207087902), (207088049, 207088441), (207089545, 207089574)],
+        )
         self.assertEqual(bed.thick_start, 207087616)
         self.assertEqual(bed.thick_end, 207088433)
 
@@ -284,11 +290,12 @@ def test_wheat_1(self):
         self.assertEqual(tbed.thick_end - tbed.thick_start + 1, 672)
         self.assertEqual(tbed.thick_start, string_seq.index("ATGGCGCTGATCGATTGGA") + 1)
         self.assertEqual(tbed.thick_start, 39)
-        self.assertEqual(tbed.thick_end, string_seq.index("CTCGGCAGATAG") + len("CTCGGCAGATAG"))
-        self.assertEqual(string_cds, string_seq[tbed.thick_start - 1:tbed.thick_end])
+        self.assertEqual(
+            tbed.thick_end, string_seq.index("CTCGGCAGATAG") + len("CTCGGCAGATAG")
+        )
+        self.assertEqual(string_cds, string_seq[tbed.thick_start - 1 : tbed.thick_end])
 
     def test_mono_pos_bed_with_phase(self):
-
         string = "1\t10\t101\tID=test;phase=2;coding=True\t0\t+\t10\t101\t0\t1\t91\t0"
 
         seq = "A" + "CGG" * 29 + "TAA"
@@ -311,7 +318,6 @@ def test_mono_pos_bed_with_phase(self):
         self.assertTrue(tbed.transcriptomic)
 
     def test_mono_neg_bed_with_phase(self):
-
         string = "1\t10\t101\tID=test;phase=2;coding=True\t0\t-\t10\t101\t0\t1\t91\t0"
 
         seq = "A" + "CGG" * 29 + "TAA"
@@ -334,7 +340,6 @@ def test_mono_neg_bed_with_phase(self):
         self.assertTrue(tbed.transcriptomic)
 
     def test_diex_pos_bed_with_phase_one(self):
-
         string = "1\t10\t111\tID=test;phase=1;coding=True\t0\t+\t10\t101\t0\t1\t101\t0"
 
         seq = "A" + "CGG" * 29 + "TAA" + "A" * 10
@@ -357,7 +362,6 @@ def test_diex_pos_bed_with_phase_one(self):
         self.assertTrue(tbed.has_stop_codon)
 
     def test_diex_neg_bed_with_phase_one(self):
-
         string = "1\t10\t300\tID=test;phase=1;coding=True\t0\t-\t70\t300\t0\t2\t90,100\t0,190"
 
         seq = "A" + "CGG" * 42 + "TAA" + "A" * 60
@@ -380,7 +384,6 @@ def test_diex_neg_bed_with_phase_one(self):
         self.assertTrue(tbed.has_stop_codon)
 
     def test_diex_pos_bed_with_phase_two(self):
-
         string = "1\t9\t111\tID=test;phase=2;coding=True\t0\t+\t9\t101\t0\t1\t102\t0"
 
         seq = "GA" + "CGG" * 29 + "TAA" + "A" * 10
@@ -403,7 +406,6 @@ def test_diex_pos_bed_with_phase_two(self):
         self.assertTrue(tbed.has_stop_codon)
 
     def test_diex_neg_bed_with_phase_two(self):
-
         string = "1\t10\t301\tID=test;phase=2;coding=True\t0\t-\t70\t301\t0\t2\t90,101\t0,190"
 
         seq = "GA" + "CGG" * 42 + "TAA" + "A" * 60
@@ -426,7 +428,6 @@ def test_diex_neg_bed_with_phase_two(self):
         self.assertTrue(tbed.has_stop_codon)
 
     def test_tran_to_bed12_neg(self):
-
         for end, phase in [(299, 0), (300, 1), (301, 2)]:
             with self.subTest():
                 t = Transcript()
@@ -443,9 +444,20 @@ def test_tran_to_bed12_neg(self):
                 self.assertFalse(r.invalid)
 
     def test_touching_exons(self):
-
-        bed12line = ["chr1", 172601, 175626, "ID=foo.1", 100, "-", 172601, 175626, "0,0,0", 3, "199,1281,861,",
-                     "0,199,2164"]
+        bed12line = [
+            "chr1",
+            172601,
+            175626,
+            "ID=foo.1",
+            100,
+            "-",
+            172601,
+            175626,
+            "0,0,0",
+            3,
+            "199,1281,861,",
+            "0,199,2164",
+        ]
         bed = BED12(bed12line, transcriptomic=False)
         self.assertFalse(bed.invalid, bed.invalid_reason)
         t = Transcript(bed)
@@ -455,4 +467,4 @@ def test_touching_exons(self):
 
 
 if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file
+    unittest.main()

From 199b690ce1f19d5002fae33500232fd6d5d548e7 Mon Sep 17 00:00:00 2001
From: Gemy Kaithakottil <gemygk@gmail.com>
Date: Mon, 1 Dec 2025 12:42:24 +0000
Subject: [PATCH 05/10] feat: update unittest for the NNN change #469

---
 Mikado/tests/test_bed12.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/Mikado/tests/test_bed12.py b/Mikado/tests/test_bed12.py
index 6c4d1d44..1bd32114 100644
--- a/Mikado/tests/test_bed12.py
+++ b/Mikado/tests/test_bed12.py
@@ -14,6 +14,10 @@ class TestTranslate(unittest.TestCase):
     '*'
     >>> _translate_str("TAN", table)
     'X'
+    >>> _translate_str("NNN", table)
+    'X'
+    >>> _translate_str("NNN", table, gap="N")
+    '*'
     >>> _translate_str("TAN", table, pos_stop="@")
     '@'
     >>> _translate_str("TA?", table)
@@ -37,6 +41,8 @@ def test_from_Bio(self):
 
         self.assertEqual(_translate_str("TAN", standard, pos_stop="U"), "U")
         self.assertEqual(_translate_str("TAN", standard, pos_stop=b"U"), "U")
+        self.assertEqual(_translate_str("NNN", standard), "X")
+        self.assertEqual(_translate_str("NNN", standard, gap="N"), "*")
         with self.assertRaises(CodonTable.TranslationError):
             _translate_str("TA?", standard)
 

From 0b0df26d564ef090efa84ba83bc5b5fd365806e9 Mon Sep 17 00:00:00 2001
From: Gemy Kaithakottil <gemygk@gmail.com>
Date: Mon, 1 Dec 2025 13:02:54 +0000
Subject: [PATCH 06/10] fix: update gh actions

---
 .github/workflows/python-package.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index dc068fbf..13dce218 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -52,7 +52,7 @@ jobs:
       with:
         python-version: ${{ matrix.python-version }}
         miniforge-variant: Mambaforge
-        miniforge-version: 4.9.2-4
+        miniforge-version: latest
         # mamba-version: "*"
         # channels: conda-forge, defaults
         channels: conda-forge, bioconda, defaults, anaconda

From e0a12f37c54adeff7734ceddc36824d7b5de844a Mon Sep 17 00:00:00 2001
From: Gemy Kaithakottil <gemygk@gmail.com>
Date: Wed, 3 Dec 2025 14:20:13 +0000
Subject: [PATCH 07/10] fix: fix error with libmamba Could not solve for
 environment specs, update to pysam==0.23.3 and diamond==2.1.16 and removed
 channels - anaconda, defaults

---
 environment.yml  | 8 +++-----
 requirements.txt | 2 +-
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/environment.yml b/environment.yml
index daed53ab..692109a5 100644
--- a/environment.yml
+++ b/environment.yml
@@ -1,9 +1,7 @@
 name: mikado2
 channels:
-  - bioconda
   - conda-forge
-  - defaults
-  - anaconda
+  - bioconda
 dependencies:
   - python>=3.9,<3.11
   - cython==0.29.32
@@ -15,7 +13,7 @@ dependencies:
   - networkx==2.8.7
   - numpy==1.23.3
   - pandas==1.5.0
-  - pysam==0.19.1
+  - pysam==0.23.3
   - pyyaml==6.0.1
   - scipy==1.11.1
   - snakemake==6.15.5
@@ -35,5 +33,5 @@ dependencies:
   - samtools>=1.11
   - htslib>=1.11
   - prodigal==2.6.3
-  - diamond==2.0.11
+  - diamond==2.1.16
   - portcullis==1.2.4
diff --git a/requirements.txt b/requirements.txt
index 563fffc1..dbde9262 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,7 +7,7 @@ msgpack==1.0.4
 networkx==2.8.7
 numpy==1.23.3
 pandas==1.5.0
-pysam==0.19.1
+pysam==0.23.3
 pyyaml==6.0.1
 scipy==1.11.1
 snakemake==6.15.5

From b4e64f1e696a07bf1f13d9b764f6c45e8ac0ba0b Mon Sep 17 00:00:00 2001
From: Gemy Kaithakottil <gemygk@gmail.com>
Date: Wed, 3 Dec 2025 14:23:36 +0000
Subject: [PATCH 08/10] feat(ci): update gh actions codeql-analysis.yml

---
 .github/workflows/codeql-analysis.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index 46acc186..2527fcbf 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -39,7 +39,7 @@ jobs:
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@v1
+      uses: github/codeql-action/init@v2
       with:
         languages: ${{ matrix.language }}
         # If you wish to specify custom queries, you can do so here or in a config file.
@@ -50,7 +50,7 @@ jobs:
     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
     # If this step fails, then you should remove it and run the build manually (see below)
     - name: Autobuild
-      uses: github/codeql-action/autobuild@v1
+      uses: github/codeql-action/autobuild@v2
 
     # ℹ️ Command-line programs to run using the OS shell.
     # 📚 https://git.io/JvXDl
@@ -64,4 +64,4 @@ jobs:
     #   make release
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@v1
+      uses: github/codeql-action/analyze@v2

From 09ae6d2299f9f154df03efa103eebd983c3984de Mon Sep 17 00:00:00 2001
From: Gemy Kaithakottil <gemygk@gmail.com>
Date: Wed, 3 Dec 2025 14:24:43 +0000
Subject: [PATCH 09/10] feat: pin versions in pyproject.toml

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 2fbd8447..119b5a3b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,5 +1,5 @@
 [build-system]
-requires = ["setuptools","wheel","cython","numpy","scipy"]
+requires = ["setuptools==65.4.1","wheel==0.37.1","Cython>=0.29.32","numpy==1.23.3","scipy==1.11.1"]
 build-backend = "setuptools.build_meta"
 
 [pytest]

From 01ea8406a2cdefb295f134a03d1c401968431807 Mon Sep 17 00:00:00 2001
From: Gemy Kaithakottil <gemygk@gmail.com>
Date: Wed, 3 Dec 2025 14:28:41 +0000
Subject: [PATCH 10/10] feat(ci): start using mamba-org/setup-micromamba
 instead of conda-incubator/setup-miniconda

feat(ci): update channel_alias to Pixi: Package Management - https://repo.prefix.dev

feat(ci): update actions to latest versions
---
 .github/workflows/python-package.yml | 49 ++++++++++++++--------------
 1 file changed, 24 insertions(+), 25 deletions(-)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 13dce218..56143b19 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -11,8 +11,7 @@ jobs:
         shell: bash -el {0}
     strategy:
       matrix:
-        # python-version: [ "3.8", "3.9" ]
-        python-version: [ "3.9" ]
+        python-version: [ "3.9", "3.10" ]
         # os: [ubuntu-latest, macos-latest]
         os: [ubuntu-latest]
     steps:
@@ -22,7 +21,11 @@ jobs:
         export DEBIAN_FRONTEND=noninteractive
         sudo apt update
         sudo apt install -y build-essential zlib1g-dev zlib1g
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v5
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v6
+      with:
+        python-version: ${{ matrix.python-version }}
     - uses: actions/cache@v4
       if: startsWith(runner.os, 'Linux')
       with:
@@ -46,37 +49,33 @@ jobs:
         path: ~/conda_pkgs_dir
         key:
           ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ hashFiles('./environment.yml') }}
-    - uses: actions/checkout@v4
-    - uses: conda-incubator/setup-miniconda@v2
-      name: setup-Mambaforge
+    - uses: mamba-org/setup-micromamba@v2
+      name: setup-Micromamba
       with:
-        python-version: ${{ matrix.python-version }}
-        miniforge-variant: Mambaforge
-        miniforge-version: latest
-        # mamba-version: "*"
-        # channels: conda-forge, defaults
-        channels: conda-forge, bioconda, defaults, anaconda
-        channel-priority: true
-        activate-environment: "mikado2"
         environment-file: ./environment.yml
-        use-mamba: true
-        # use-only-tar-bz2: true  # IMPORTANT: This needs to be set for caching to work properly!
-    - name: Verify conda environment
+        environment-name: mikado2
+        cache-environment: true
+        condarc: |
+          channels:
+            - conda-forge
+            - bioconda
+          channel_priority: flexible
+          channel_alias: https://repo.prefix.dev
+    - name: Verify micromamba environment
       run: |
-        conda info --envs
-        conda env list
-        conda activate mikado2
-        conda list
+        micromamba info
+        micromamba activate mikado2
+        micromamba list
     - name: Install dependencies
       run: |
-        conda activate mikado2
+        micromamba activate mikado2
         python --version
         gcc --version
         pip --version
+        pip install numpy==1.23.3 cython==0.29.32 pytest-cov
         pip install -r requirements.txt
         python -c "import pysam; print(pysam.__version__)"
-        pip install Cython pytest-cov
-        python setup.py develop
+        pip install --no-deps --editable .
     - name: Test light
       run: |
         pytest -m slow Mikado/tests/test_light.py::LightTest::test_subprocess_multi_empty_orfs
@@ -90,7 +89,7 @@ jobs:
         pytest -m 'not triage';
     - name: Upload coverage to Codecov
       if: startsWith(runner.os, 'Linux')
-      uses: codecov/codecov-action@v1
+      uses: codecov/codecov-action@v3
     # - name: Test daijin
     #   if: startsWith(runner.os, 'Linux')
     #   run: |