From 415865d67d4fb1cbae2dc3b71883b693ad43bab4 Mon Sep 17 00:00:00 2001 From: Sam Chorlton <> Date: Sat, 24 May 2025 16:50:23 -0700 Subject: [PATCH] try adding capsid --- sierralocal/nucaminohook.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/sierralocal/nucaminohook.py b/sierralocal/nucaminohook.py index 6a87c72..2273032 100644 --- a/sierralocal/nucaminohook.py +++ b/sierralocal/nucaminohook.py @@ -73,6 +73,7 @@ def __init__(self, algorithm, binary=None, program='post'): # initialize gene map self.pol_start = 2085 self.pol_nuc_map = { + 'CA': (1186, 1878), 'PR': (2253, 2549), 'RT': (2550, 4229), # incorrectly includes RNAse, emulating sierrapy 'IN': (4230, 5096) @@ -374,8 +375,6 @@ def get_genes(self, pol_aligned_sites, pol_first_aa, pol_last_aa): """ Determines the first POL gene that is present in the query sequence, by virtue of gene breakpoints - TODO: sierra uses different minimum numbers of sites per gene - (40, 60 and 30 for PR, RT and IN) @param pol_aligned_sites: list, sublist holds alignment program output aligned POL sites @param pol_first_aa: int, location of first amino acid in pol @@ -385,7 +384,12 @@ def get_genes(self, pol_aligned_sites, pol_first_aa, pol_last_aa): first na position in pol, last na position in pol] """ # good here - min_overlap = {'PR': 40, 'RT': 60, 'IN': 30} + min_overlap = { + 'PR': 40, + 'RT': 60, + 'IN': 30, + 'CA': 30, # Arbitrary as could not find source for above thresholds + } genes = [] for gene, bounds in self.gene_map.items(): aa_start, aa_end = bounds