From 7752853ca7c95fd29aee91b8fafc90f841970ca0 Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Tue, 6 May 2025 06:20:18 -0400
Subject: [PATCH 01/21] processing amelogenin from UAS sample details report
 [skip ci]

---
 lusSTR/data/str_markers.json | 19 +++++++++++++++++++
 lusSTR/scripts/marker.py     | 24 ++++++++++++++++++++++++
 lusSTR/wrappers/convert.py   |  2 --
 lusSTR/wrappers/format.py    |  3 ++-
 4 files changed, 45 insertions(+), 3 deletions(-)

diff --git a/lusSTR/data/str_markers.json b/lusSTR/data/str_markers.json
index 38c613d5..1527c157 100644
--- a/lusSTR/data/str_markers.json
+++ b/lusSTR/data/str_markers.json
@@ -1,4 +1,23 @@
 {
+    "AMELOGENIN": {
+        "BasesToSubtract": 47,
+        "NumRepeats": 1,
+        "Repeats": [
+            "AAAGTG"
+        ],
+        "NumBasesToSeparate": 0,
+        "ReverseCompNeeded": "No",
+        "LUS": "",
+        "Sec": "",
+        "Tert": "",
+        "Foren_5": 26,
+        "Foren_3": 37,
+        "Power_5": 10,
+        "Power_3": 37,
+        "Custom_5": 10,
+        "Custom_3": 37,
+        "Alleles": ["0", "1"]
+    },
     "CSF1PO": {
         "BasesToSubtract": 0,
         "NumRepeats": 1,
diff --git a/lusSTR/scripts/marker.py b/lusSTR/scripts/marker.py
index ab91ae6f..bef170b6 100644
--- a/lusSTR/scripts/marker.py
+++ b/lusSTR/scripts/marker.py
@@ -355,6 +355,29 @@ def summary(self):
         ]
 
 
+class STRMarker_Amelogenin(STRMarker):
+    @property
+    def canonical(self):
+        if self.uas_sequence == "AAAGTG":
+            return "Y"
+        else:
+            return "X"
+
+    @property
+    def summary(self):
+        return [
+            self.uas_sequence,
+            self.forward_sequence,
+            self.custom_sequence,
+            self.uas_sequence,
+            self.uas_sequence,
+            self.uas_sequence,
+            self.canonical,
+            "NA",
+            "NA",
+        ]
+
+
 class STRMarker_D8S1179(STRMarker):
     @property
     def flank_5p(self):
@@ -1742,6 +1765,7 @@ def flank_5p(self):
 
 def STRMarkerObject(locus, sequence, software, custom=False, kit="forenseq"):
     constructors = {
+        "AMELOGENIN": STRMarker_Amelogenin,
         "D8S1179": STRMarker_D8S1179,
         "D13S317": STRMarker_D13S317,
         "D20S482": STRMarker_D20S482,
diff --git a/lusSTR/wrappers/convert.py b/lusSTR/wrappers/convert.py
index cc79317b..4841d242 100644
--- a/lusSTR/wrappers/convert.py
+++ b/lusSTR/wrappers/convert.py
@@ -58,8 +58,6 @@ def format_table(input, software, kit="forenseq", custom=False):
             locus = "PENTA E"
         if locus == "DYS385A/B" or locus == "DYS385":
             locus = "DYS385A-B"
-        if locus == "AMELOGENIN":
-            continue
         metadata = str_marker_data[locus]
         if kit == "forenseq":
             remove_5p = metadata["Foren_5"]
diff --git a/lusSTR/wrappers/format.py b/lusSTR/wrappers/format.py
index 410bc303..4e7c080f 100644
--- a/lusSTR/wrappers/format.py
+++ b/lusSTR/wrappers/format.py
@@ -59,7 +59,7 @@ def parse_str_table_from_sheet(infile, sheet, exclude=None):
 
 
 def uas_format(infile, sexloci=False):
-    auto_strs = parse_str_table_from_sheet(infile, sheet="Autosomal STRs", exclude=["Amelogenin"])
+    auto_strs = parse_str_table_from_sheet(infile, sheet="Autosomal STRs")
     sex_strs = None
     if sexloci is True:
         y_strs = parse_str_table_from_sheet(infile, "Y STRs")
@@ -71,6 +71,7 @@ def uas_format(infile, sexloci=False):
 def nonuas_load(inpath, software, sexloci=False):
     """Format a directory of STRait Razor/GeneMarker output files."""
     locus_list = [
+        "AMELOGENIN",
         "CSF1PO",
         "D10S1248",
         "D12S391",

From fb465c4d715b31e1c34ff3e23ea0f34c99040e73 Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Wed, 7 May 2025 06:27:29 -0400
Subject: [PATCH 02/21] steps through convert can use amelogenin [skip ci]

---
 lusSTR/data/str_markers.json |  4 +--
 lusSTR/scripts/marker.py     | 68 +++++++++++++++++++++++++++++-------
 lusSTR/wrappers/convert.py   |  6 +++-
 lusSTR/wrappers/format.py    |  2 +-
 4 files changed, 64 insertions(+), 16 deletions(-)

diff --git a/lusSTR/data/str_markers.json b/lusSTR/data/str_markers.json
index 1527c157..1cd584b6 100644
--- a/lusSTR/data/str_markers.json
+++ b/lusSTR/data/str_markers.json
@@ -1,6 +1,6 @@
 {
     "AMELOGENIN": {
-        "BasesToSubtract": 47,
+        "BasesToSubtract": 0,
         "NumRepeats": 1,
         "Repeats": [
             "AAAGTG"
@@ -16,7 +16,7 @@
         "Power_3": 37,
         "Custom_5": 10,
         "Custom_3": 37,
-        "Alleles": ["0", "1"]
+        "Alleles": ["X", "Y"]
     },
     "CSF1PO": {
         "BasesToSubtract": 0,
diff --git a/lusSTR/scripts/marker.py b/lusSTR/scripts/marker.py
index bef170b6..666bf76e 100644
--- a/lusSTR/scripts/marker.py
+++ b/lusSTR/scripts/marker.py
@@ -63,7 +63,10 @@ def __init__(self, locus, sequence, software, custom=False, kit="forenseq"):
 
     @property
     def repeat_size(self):
-        return len(self.data["LUS"])
+        if self.data["LUS"] != "":
+            return len(self.data["LUS"])
+        else:
+            return 1
 
     @property
     def repeats(self):
@@ -356,6 +359,20 @@ def summary(self):
 
 
 class STRMarker_Amelogenin(STRMarker):
+    @property
+    def forward_sequence(self):
+        if self.software == "uas":
+            return self.sequence
+        front, back = self._uas_bases_to_trim()
+        if len(self.sequence) == 0:
+            back = None
+        else:
+            back *= -1
+        if self.sequence[front:back] == "":
+            return ""
+        else:
+            return self.sequence[front:back]
+
     @property
     def canonical(self):
         if self.uas_sequence == "AAAGTG":
@@ -363,19 +380,46 @@ def canonical(self):
         else:
             return "X"
 
+    @property
+    def convert(self):
+        if self.forward_sequence == "":
+            return ""
+        else:
+            return self.forward_sequence
+
+    @property
+    def custom_brack(self):
+        if self.forward_sequence == "":
+            return ""
+        else:
+            return "NA"
+
     @property
     def summary(self):
-        return [
-            self.uas_sequence,
-            self.forward_sequence,
-            self.custom_sequence,
-            self.uas_sequence,
-            self.uas_sequence,
-            self.uas_sequence,
-            self.canonical,
-            "NA",
-            "NA",
-        ]
+        if self.uas_sequence == "AAAGTG":
+            return [
+                "AAAGTG",
+                "AAAGTG",
+                "AAAGTG",
+                "AAAGTG",
+                "NA",
+                "NA",
+                "Y",
+                "NA",
+                "NA",
+            ]
+        elif self.uas_sequence == "":
+            return [
+                "",
+                "",
+                "",
+                "",
+                "NA",
+                "NA",
+                "X",
+                "NA",
+                "NA",
+            ]
 
 
 class STRMarker_D8S1179(STRMarker):
diff --git a/lusSTR/wrappers/convert.py b/lusSTR/wrappers/convert.py
index 4841d242..dbef0c5b 100644
--- a/lusSTR/wrappers/convert.py
+++ b/lusSTR/wrappers/convert.py
@@ -65,7 +65,11 @@ def format_table(input, software, kit="forenseq", custom=False):
         else:
             remove_5p = metadata["Power_5"]
             remove_3p = metadata["Power_3"]
-        if len(sequence) <= (remove_5p + remove_3p) and software != "uas":
+        if (
+            len(sequence) <= (remove_5p + remove_3p)
+            and software != "uas"
+            and locus != "AMELOGENIN"
+        ):
             flank_summary = [
                 sampleid,
                 project,
diff --git a/lusSTR/wrappers/format.py b/lusSTR/wrappers/format.py
index 4e7c080f..1ce3aa08 100644
--- a/lusSTR/wrappers/format.py
+++ b/lusSTR/wrappers/format.py
@@ -71,7 +71,7 @@ def uas_format(infile, sexloci=False):
 def nonuas_load(inpath, software, sexloci=False):
     """Format a directory of STRait Razor/GeneMarker output files."""
     locus_list = [
-        "AMELOGENIN",
+        "Amelogenin",
         "CSF1PO",
         "D10S1248",
         "D12S391",

From 7a7ad3bdfdbdfc5323f2365c2b033283369d0667 Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Thu, 8 May 2025 06:32:00 -0400
Subject: [PATCH 03/21] fixed convert step for crappy sequences in amel and
 filtering amel sequences [skip ci]

---
 lusSTR/data/filters.json          | 18 +++++++++++++
 lusSTR/scripts/filter_settings.py | 38 +++++++++++++++++++++++++++-
 lusSTR/scripts/marker.py          | 42 ++++++++++++++++++++-----------
 lusSTR/wrappers/convert.py        |  2 +-
 lusSTR/wrappers/filter.py         |  4 +++
 5 files changed, 88 insertions(+), 16 deletions(-)

diff --git a/lusSTR/data/filters.json b/lusSTR/data/filters.json
index 64f06b4b..d241505c 100644
--- a/lusSTR/data/filters.json
+++ b/lusSTR/data/filters.json
@@ -1,4 +1,22 @@
 {
+    "AMELOGENIN": {
+        "MinimumNumberReadsForDynamicThresholds": 650,
+        "DetectionThresholdStaticCount": 10,
+        "DetectionThresholdDynamicPercent": 0,
+        "DetectionThresholdUse": "Static",
+        "AnalyticalThresholdStaticCount": 20,
+        "AnalyticalThresholdDynamicPercent": 0.017,
+        "AnalyticalThresholdUse": "Both",
+        "StochasticThresholdStaticCount": 20,
+        "StochasticThresholdDynamicPercent": 0.017,
+        "StochasticThresholdUse": "Both",
+        "MinimumHeterozygousBalanceThresholdDynamicPercent": 0.50,
+        "SameSizeThresholdDynamicPercent": 0,
+        "StutterThresholdDynamicPercent": 0,
+        "StutterForwardThresholdDynamicPercent": 0,
+        "Intercept": 0,
+        "Slope": 0
+    },
     "CSF1PO": {
         "MinimumNumberReadsForDynamicThresholds": 650,
         "DetectionThresholdStaticCount": 10,
diff --git a/lusSTR/scripts/filter_settings.py b/lusSTR/scripts/filter_settings.py
index e639a33b..63d1fc76 100644
--- a/lusSTR/scripts/filter_settings.py
+++ b/lusSTR/scripts/filter_settings.py
@@ -28,7 +28,9 @@ def get_filter_metadata_file():
 
 def filters(locus_allele_info, locus, locus_reads, datatype, brack_col):
     metadata = filter_marker_data[locus]
-    if len(locus_allele_info) == 1:
+    if locus == "AMELOGENIN":
+        locus_allele_info = filter_amel(metadata, locus_allele_info, locus_reads)
+    elif len(locus_allele_info) == 1:
         locus_allele_info = single_allele_thresholds(metadata, locus_reads, locus_allele_info)
     else:
         locus_allele_info, locus_reads = multiple_allele_thresholds(
@@ -42,6 +44,40 @@ def filters(locus_allele_info, locus, locus_reads, datatype, brack_col):
     return locus_allele_info
 
 
+def filter_amel(metadata, amel_df, locus_reads):
+    for filter in ["Detection", "Analytical"]:
+        use = metadata[f"{filter}ThresholdUse"]
+        count = metadata[f"{filter}ThresholdStaticCount"]
+        perc = metadata[f"{filter}ThresholdDynamicPercent"]
+        thresh_perc = round(perc * locus_reads, 1)
+        if (
+            use.lower() == "dynamic"
+            and locus_reads < metadata["MinimumNumberReadsForDynamicThresholds"]
+        ):
+            use = "static"
+        if use.lower() == "both":
+            thresh = thresh_perc if thresh_perc >= count else count
+        elif use.lower() == "static":
+            thresh = count
+        elif use.lower() == "dynamic":
+            thresh = thresh_perc
+        if filter == "Detection":
+            amel_dt = amel_df[amel_df["Reads"] >= thresh].reset_index(drop=True)
+            locus_reads = amel_df["Reads"].sum()
+        else:
+            for i in range(len(amel_dt)):
+                al_reads = amel_dt.loc[i, "Reads"]
+                if al_reads < thresh:
+                    amel_dt.loc[i, ["allele_type", "perc_noise"]] = [
+                        "BelowAT",
+                        round(al_reads / locus_reads, 3),
+                    ]
+                else:
+                    amel_dt.loc[i, "allele_tpye"] = "Typed"
+    print(amel_dt)
+    return amel_dt
+
+
 def single_allele_thresholds(metadata, locus_reads, single_all_df):
     if thresholds("Detection", metadata, locus_reads, single_all_df["Reads"][0])[1] is False:
         single_all_df = pd.DataFrame()
diff --git a/lusSTR/scripts/marker.py b/lusSTR/scripts/marker.py
index 666bf76e..2400cfaf 100644
--- a/lusSTR/scripts/marker.py
+++ b/lusSTR/scripts/marker.py
@@ -377,8 +377,10 @@ def forward_sequence(self):
     def canonical(self):
         if self.uas_sequence == "AAAGTG":
             return "Y"
-        else:
+        elif self.uas_sequence == "":
             return "X"
+        else:
+            return self.uas_sequence
 
     @property
     def convert(self):
@@ -396,19 +398,19 @@ def custom_brack(self):
 
     @property
     def summary(self):
-        if self.uas_sequence == "AAAGTG":
-            return [
-                "AAAGTG",
-                "AAAGTG",
-                "AAAGTG",
-                "AAAGTG",
-                "NA",
-                "NA",
-                "Y",
-                "NA",
-                "NA",
-            ]
-        elif self.uas_sequence == "":
+        # if self.uas_sequence == "AAAGTG":
+        #    return [
+        #        "AAAGTG",
+        #        "AAAGTG",
+        #        "AAAGTG",
+        #        "AAAGTG",
+        #        "NA",
+        #        "NA",
+        #        "Y",
+        #        "NA",
+        #        "NA",
+        #    ]
+        if self.uas_sequence == "":
             return [
                 "",
                 "",
@@ -420,6 +422,18 @@ def summary(self):
                 "NA",
                 "NA",
             ]
+        else:
+            return [
+                self.uas_sequence,
+                self.forward_sequence,
+                self.custom_sequence,
+                self.convert,
+                self.convert,
+                self.custom_brack,
+                self.canonical,
+                "NA",
+                "NA",
+            ]
 
 
 class STRMarker_D8S1179(STRMarker):
diff --git a/lusSTR/wrappers/convert.py b/lusSTR/wrappers/convert.py
index dbef0c5b..2ad397f5 100644
--- a/lusSTR/wrappers/convert.py
+++ b/lusSTR/wrappers/convert.py
@@ -69,7 +69,7 @@ def format_table(input, software, kit="forenseq", custom=False):
             len(sequence) <= (remove_5p + remove_3p)
             and software != "uas"
             and locus != "AMELOGENIN"
-        ):
+        ) or (locus == "AMELOGENIN" and len(sequence) < (remove_5p + remove_3p)):
             flank_summary = [
                 sampleid,
                 project,
diff --git a/lusSTR/wrappers/filter.py b/lusSTR/wrappers/filter.py
index 782b6be2..ce422c62 100644
--- a/lusSTR/wrappers/filter.py
+++ b/lusSTR/wrappers/filter.py
@@ -28,6 +28,7 @@
 
 
 strs = [
+    "AMELOGENIN",
     "CSF1PO",
     "D10S1248",
     "D12S391",
@@ -146,6 +147,8 @@ def process_strs(dict_loc, datatype, seq_col, brack_col):
             filtered_df = filtered_df.replace({"nan": None})
             final_df = pd.concat([final_df, filtered_df])
             flags_df = pd.concat([flags_df, flags(filtered_df, datatype)])
+    # elif locus == "AMELOGENIN":
+    #    final_df = pd.concet([final_df, data_order])
     if datatype == "ce" or datatype == "ngs":
         try:
             final_df = final_df.astype({"CE_Allele": "float64", "Reads": "int"})
@@ -155,6 +158,7 @@ def process_strs(dict_loc, datatype, seq_col, brack_col):
 
 
 def EFM_output(profile, outfile, profile_type, data_type, col, sex, separate=False):
+    profile = profile[profile["Locus"] != "AMELOGENIN"]
     if profile_type == "reference":
         profile = profile.query("allele_type == 'Typed'")
     else:

From 086c7fe0036473fea0b87f818003acfd933fbbf5 Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Tue, 13 May 2025 06:18:52 -0400
Subject: [PATCH 04/21] fixed typo in amel filtering function [skip ci]

---
 lusSTR/scripts/filter_settings.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/lusSTR/scripts/filter_settings.py b/lusSTR/scripts/filter_settings.py
index 63d1fc76..77ed5d0c 100644
--- a/lusSTR/scripts/filter_settings.py
+++ b/lusSTR/scripts/filter_settings.py
@@ -73,8 +73,7 @@ def filter_amel(metadata, amel_df, locus_reads):
                         round(al_reads / locus_reads, 3),
                     ]
                 else:
-                    amel_dt.loc[i, "allele_tpye"] = "Typed"
-    print(amel_dt)
+                    amel_dt.loc[i, "allele_type"] = "Typed"
     return amel_dt
 
 

From 4634d425c97a519f9060dcfe1e9838c2f34b9eed Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Wed, 14 May 2025 06:18:47 -0400
Subject: [PATCH 05/21] amelogenin now plotting correctly in pdf [skip ci]

---
 lusSTR/wrappers/filter.py | 40 ++++++++++++++++++++++++++-------------
 1 file changed, 27 insertions(+), 13 deletions(-)

diff --git a/lusSTR/wrappers/filter.py b/lusSTR/wrappers/filter.py
index ce422c62..19736cee 100644
--- a/lusSTR/wrappers/filter.py
+++ b/lusSTR/wrappers/filter.py
@@ -147,13 +147,11 @@ def process_strs(dict_loc, datatype, seq_col, brack_col):
             filtered_df = filtered_df.replace({"nan": None})
             final_df = pd.concat([final_df, filtered_df])
             flags_df = pd.concat([flags_df, flags(filtered_df, datatype)])
-    # elif locus == "AMELOGENIN":
-    #    final_df = pd.concet([final_df, data_order])
-    if datatype == "ce" or datatype == "ngs":
-        try:
-            final_df = final_df.astype({"CE_Allele": "float64", "Reads": "int"})
-        except KeyError:
-            final_df = None
+    # if datatype == "ce" or datatype == "ngs":
+    #    try:
+    #        final_df = final_df.astype({"CE_Allele": "float64", "Reads": "int"})
+    #    except KeyError:
+    #        final_df = None
     return final_df, flags_df
 
 
@@ -266,6 +264,7 @@ def determine_max_num_alleles(allele_heights):
 
 
 def STRmix_output(profile, outdir, profile_type, data_type, seq_col):
+    profile = profile[profile["Locus"] != "AMELOGENIN"]
     Path(outdir).mkdir(parents=True, exist_ok=True)
     if profile_type == "reference":
         filtered_df = profile.query("allele_type == 'Typed'")
@@ -365,7 +364,6 @@ def format_ref_table(new_rows, sample_data, datatype):
 
 def marker_plots(df, output_name, sex, wd="."):
     Path(f"{wd}/MarkerPlots").mkdir(parents=True, exist_ok=True)
-    df["CE_Allele"] = df["CE_Allele"].astype(float)
     filt_df = df[df["allele_type"] == "Typed"]
     for sample_id in df["SampleID"].unique():
         with PdfPages(f"{wd}/MarkerPlots/{output_name}_{sample_id}_marker_plots.pdf") as pdf:
@@ -398,6 +396,12 @@ def make_plot(df, sample_id, sameyaxis=False, filters=False, at=True):
             n += 1
             colors = {"Typed": "green", "Stutter": "blue", "BelowAT": "red", "Deleted": "purple"}
             marker_df = sample_df[sample_df["Locus"] == marker].sort_values(by="CE_Allele")
+            if marker == "AMELOGENIN":
+                for i, row in marker_df.iterrows():
+                    marker_df.loc[i, "CE_Allele"] = (
+                        0 if marker_df.loc[i, "CE_Allele"] == "X" else 1
+                    )
+            marker_df["CE_Allele"] = marker_df["CE_Allele"].astype(float)
             ax = fig.add_subplot(6, 5, n)
             p = ax.bar(
                 marker_df["CE_Allele"],
@@ -411,15 +415,25 @@ def make_plot(df, sample_id, sameyaxis=False, filters=False, at=True):
                 ax.text(round(min(marker_df["CE_Allele"])) - 0.9, at + (at * 0.1), f"AT", size=12)
             labels = marker_df["Type"].unique()
             handles = [plt.Rectangle((0, 0), 1, 1, color=colors[l]) for l in labels]
+            if marker == "AMELOGENIN":
+                plt.xlim(-1, 2)
+                ax.set_xticks(np.arange(-1, 3, 1))
+                labels_x = ["", "X", "Y", ""]
+                ax.set_xticklabels(labels_x)
             if not filters:
                 plt.legend(handles, labels, title="Allele Type")
             else:
                 for i, row in marker_df.iterrows():
-                    marker_df.loc[i, "Label"] = (
-                        str(int(marker_df.loc[i, "CE_Allele"]))
-                        if ".0" in str(marker_df.loc[i, "CE_Allele"])
-                        else str(marker_df.loc[i, "CE_Allele"])
-                    )
+                    if marker == "AMELOGENIN":
+                        marker_df.loc[i, "Label"] = (
+                            "X" if marker_df.loc[i, "CE_Allele"] == 0 else "Y"
+                        )
+                    else:
+                        marker_df.loc[i, "Label"] = (
+                            str(int(marker_df.loc[i, "CE_Allele"]))
+                            if ".0" in str(marker_df.loc[i, "CE_Allele"])
+                            else str(marker_df.loc[i, "CE_Allele"])
+                        )
                 ax.bar_label(p, labels=marker_df["Label"])
             if sameyaxis:
                 plt.ylim(0, max_yvalue)

From 992e608c8b3de2dd602a22bb9d9e5e0993e2fa6f Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Thu, 15 May 2025 06:20:05 -0400
Subject: [PATCH 06/21] fixed bug in combining reads when using custom sequence
 ranges [skip ci]

---
 lusSTR/wrappers/convert.py | 42 +++++++++++++++++++++++++++++++++-----
 1 file changed, 37 insertions(+), 5 deletions(-)

diff --git a/lusSTR/wrappers/convert.py b/lusSTR/wrappers/convert.py
index 2ad397f5..cc54fb6f 100644
--- a/lusSTR/wrappers/convert.py
+++ b/lusSTR/wrappers/convert.py
@@ -220,8 +220,36 @@ def check_vwa(marker, sequence, software, custom):
     return new_marker
 
 
-def combine_reads(table, columns):
-    comb_table = table.groupby(columns[:-1], as_index=False)["Reads"].sum()
+def combine_reads(table, columns, custom=False):
+    if custom:
+        print(table)
+        comb_table = (
+            table.groupby(
+                [
+                    "SampleID",
+                    "Project",
+                    "Analysis",
+                    "Locus",
+                    "Custom_Range_Sequence",
+                    "Custom_Bracketed_Notation",
+                    "CE_Allele",
+                ]
+            )
+            .agg(
+                {
+                    "UAS_Output_Sequence": lambda x: ", ".join(x),
+                    "Forward_Strand_Sequence": lambda x: ", ".join(x),
+                    "UAS_Output_Bracketed_Notation": lambda x: ", ".join(x),
+                    "Forward_Strand_Bracketed_Notation": lambda x: ", ".join(x),
+                    "LUS": lambda x: ", ".join(x),
+                    "LUS_Plus": lambda x: ", ".join(x),
+                    "Reads": "sum",
+                }
+            )
+            .reset_index()
+        )
+    else:
+        comb_table = table.groupby(columns[:-1], as_index=False)["Reads"].sum()
     sorted = sort_table(comb_table)
     return sorted
 
@@ -239,7 +267,7 @@ def remove_columns(column_list, remove_list):
     return column_list
 
 
-def create_custom_outputtable(columns, table):
+def create_custom_outputtable(columns, table, custom):
     remove_list = [
         "UAS_Output_Sequence",
         "Forward_Strand_Sequence",
@@ -275,7 +303,9 @@ def main(input, out, kit, software, sex, nocombine, custom):
                 sex_final_table = combine_reads(sex_final_table, sex_columns)
                 sex_final_table.to_csv(f"{full_table_name}_sexloci.txt", sep="\t", index=False)
                 if custom:
-                    sex_table_custom = create_custom_outputtable(sex_columns, sex_final_table)
+                    sex_table_custom = create_custom_outputtable(
+                        sex_columns, sex_final_table, custom=True
+                    )
                     sex_table_custom.to_csv(f"{output_name}_sexloci.txt", index=False, sep="\t")
         else:
             sex_final_table.to_csv(f"{output_name}_sexloci.txt", sep="\t", index=False)
@@ -289,7 +319,9 @@ def main(input, out, kit, software, sex, nocombine, custom):
             autosomal_final_table = combine_reads(autosomal_final_table, columns)
             autosomal_final_table.to_csv(f"{full_table_name}.txt", sep="\t", index=False)
             if custom:
-                custom_table_comb = create_custom_outputtable(columns, autosomal_final_table)
+                custom_table_comb = create_custom_outputtable(
+                    columns, autosomal_final_table, custom=True
+                )
                 custom_table_comb.to_csv(out, sep="\t", index=False)
     else:
         autosomal_final_table.to_csv(out, sep="\t", index=False)

From dc557e0fce5e9102e8630ff0960ce4a45e8de12d Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Fri, 16 May 2025 05:47:27 -0400
Subject: [PATCH 07/21] fixed bug with custom sequence ranges in amel [skip ci]

---
 lusSTR/scripts/marker.py | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/lusSTR/scripts/marker.py b/lusSTR/scripts/marker.py
index 2400cfaf..4468f335 100644
--- a/lusSTR/scripts/marker.py
+++ b/lusSTR/scripts/marker.py
@@ -373,6 +373,22 @@ def forward_sequence(self):
         else:
             return self.sequence[front:back]
 
+    @property
+    def custom_sequence(self):
+        if self.custom:
+            custom_front = self.data["Custom_5"]
+            custom_back = self.data["Custom_3"]
+            if custom_back == 0:
+                custom_back = None
+            else:
+                custom_back *= -1
+            if self.sequence[custom_front:custom_back] == "":
+                return ""
+            else:
+                return self.sequence[custom_front:custom_back]
+        else:
+            return None
+
     @property
     def canonical(self):
         if self.uas_sequence == "AAAGTG":
@@ -398,18 +414,6 @@ def custom_brack(self):
 
     @property
     def summary(self):
-        # if self.uas_sequence == "AAAGTG":
-        #    return [
-        #        "AAAGTG",
-        #        "AAAGTG",
-        #        "AAAGTG",
-        #        "AAAGTG",
-        #        "NA",
-        #        "NA",
-        #        "Y",
-        #        "NA",
-        #        "NA",
-        #    ]
         if self.uas_sequence == "":
             return [
                 "",

From 4ce279cabae641554810b5b88374aa2789648bda Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Mon, 19 May 2025 06:31:31 -0400
Subject: [PATCH 08/21] began implementing amel into GUI marker plots [skip ci]

---
 lusSTR/cli/gui.py | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/lusSTR/cli/gui.py b/lusSTR/cli/gui.py
index 1971a6a9..4b0dcfc3 100644
--- a/lusSTR/cli/gui.py
+++ b/lusSTR/cli/gui.py
@@ -201,7 +201,13 @@ def interactive_plots_allmarkers(sample_df, flagged_df):
         col = cols[n]
         container = col.container(border=True)
         sample_locus = sample_df["SampleID"].unique() + "_" + marker
-        marker_df = sample_df[sample_df["Locus"] == marker].sort_values(by="CE_Allele")
+        for i, row in sample_df.iterrows():
+            if sample_df.loc[i, "Locus"] == "AMELOGENIN":
+                sample_df.loc[i, "CE_Allele"] = 0 if sample_df.loc[i, "CE_Allele"] == "X" else 1
+        sample_df["CE_Allele"] = pd.to_numeric(sample_df["CE_Allele"])
+        marker_df = sample_df[sample_df["Locus"] == marker].sort_values(
+            by=["CE_Allele", "allele_type"], ascending=[False, True]
+        )
         if sample_locus in flagged_df["key"].values:
             marker = f"⚠️{marker}⚠️"
         plot = interactive_plots(marker_df, marker, max_yvalue, increase_value, all=True)
@@ -240,9 +246,14 @@ def interactive_plots(df, locus, ymax, increase, all=False):
     )
     plot.add_hline(y=at, line_width=3, line_dash="dot", line_color="gray")
     plot.add_annotation(text=f"AT", x=min_x + 0.1, y=at, showarrow=False, yshift=10)
-    plot.update_layout(
-        xaxis=dict(range=[min_x, max_x], tickmode="array", tickvals=np.arange(min_x, max_x, 1))
-    )
+    if locus == "AMELOGENIN":
+        plot.update_layout(
+            xaxis=dict(range=[-1, 2], tickmode="array", tickvals=["", "X", "Y", ""])
+        )
+    else:
+        plot.update_layout(
+            xaxis=dict(range=[min_x, max_x], tickmode="array", tickvals=np.arange(min_x, max_x, 1))
+        )
     if all:
         plot.update_layout(
             yaxis=dict(range=[0, ymax], tickmode="array", tickvals=np.arange(0, ymax, increase))
@@ -307,6 +318,10 @@ def interactive_setup(df1, file):
             )
         interactive_plots_allmarkers(sample_df, flags)
     else:
+        for i, row in sample_df.iterrows():
+            if sample_df.loc[i, "Locus"] == "AMELOGENIN":
+                sample_df.loc[i, "CE_Allele"] = 0 if sample_df.loc[i, "CE_Allele"] == "X" else 1
+        sample_df["CE_Allele"] = pd.to_numeric(sample_df["CE_Allele"])
         locus_key = f"{sample}_{locus}"
         if locus_key not in st.session_state:
             st.session_state[locus_key] = sample_df[sample_df["Locus"] == locus].reset_index(

From 65b878a2e32a012481a6d63a76a8344d8152a9db Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Tue, 20 May 2025 12:28:06 -0400
Subject: [PATCH 09/21] fixed custom range for amel [skip ci]

---
 lusSTR/data/str_markers.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lusSTR/data/str_markers.json b/lusSTR/data/str_markers.json
index 1cd584b6..cf030cb4 100644
--- a/lusSTR/data/str_markers.json
+++ b/lusSTR/data/str_markers.json
@@ -14,8 +14,8 @@
         "Foren_3": 37,
         "Power_5": 10,
         "Power_3": 37,
-        "Custom_5": 10,
-        "Custom_3": 37,
+        "Custom_5": 0,
+        "Custom_3": 0,
         "Alleles": ["X", "Y"]
     },
     "CSF1PO": {

From 01472ee309f9df239d1f4c7462e51e27e9e02088 Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Tue, 20 May 2025 12:29:31 -0400
Subject: [PATCH 10/21] handling samples with no sequences passing filters
 [skip ci]

---
 lusSTR/scripts/filter_settings.py | 22 ++++++++++++----------
 lusSTR/scripts/marker.py          |  3 +--
 lusSTR/wrappers/convert.py        |  8 ++++++--
 lusSTR/wrappers/filter.py         | 30 ++++++++++++++++++------------
 4 files changed, 37 insertions(+), 26 deletions(-)

diff --git a/lusSTR/scripts/filter_settings.py b/lusSTR/scripts/filter_settings.py
index 77ed5d0c..88430dc6 100644
--- a/lusSTR/scripts/filter_settings.py
+++ b/lusSTR/scripts/filter_settings.py
@@ -30,17 +30,19 @@ def filters(locus_allele_info, locus, locus_reads, datatype, brack_col):
     metadata = filter_marker_data[locus]
     if locus == "AMELOGENIN":
         locus_allele_info = filter_amel(metadata, locus_allele_info, locus_reads)
-    elif len(locus_allele_info) == 1:
-        locus_allele_info = single_allele_thresholds(metadata, locus_reads, locus_allele_info)
     else:
-        locus_allele_info, locus_reads = multiple_allele_thresholds(
-            metadata, locus_reads, locus_allele_info
-        )
-        locus_allele_info = ce_filtering(
-            locus_allele_info, locus_reads, metadata, datatype, brack_col
-        )
-        if datatype != "ce":
-            locus_allele_info = same_size_filter(locus_allele_info, metadata, datatype)
+        locus_allele_info["CE_Allele"] = locus_allele_info["CE_Allele"].astype(float)
+        if len(locus_allele_info) == 1:
+            locus_allele_info = single_allele_thresholds(metadata, locus_reads, locus_allele_info)
+        else:
+            locus_allele_info, locus_reads = multiple_allele_thresholds(
+                metadata, locus_reads, locus_allele_info
+            )
+            locus_allele_info = ce_filtering(
+                locus_allele_info, locus_reads, metadata, datatype, brack_col
+            )
+            if datatype != "ce":
+                locus_allele_info = same_size_filter(locus_allele_info, metadata, datatype)
     return locus_allele_info
 
 
diff --git a/lusSTR/scripts/marker.py b/lusSTR/scripts/marker.py
index 4468f335..95aa2baa 100644
--- a/lusSTR/scripts/marker.py
+++ b/lusSTR/scripts/marker.py
@@ -376,8 +376,7 @@ def forward_sequence(self):
     @property
     def custom_sequence(self):
         if self.custom:
-            custom_front = self.data["Custom_5"]
-            custom_back = self.data["Custom_3"]
+            custom_front, custom_back = self._uas_bases_to_trim()
             if custom_back == 0:
                 custom_back = None
             else:
diff --git a/lusSTR/wrappers/convert.py b/lusSTR/wrappers/convert.py
index cc54fb6f..8115c09f 100644
--- a/lusSTR/wrappers/convert.py
+++ b/lusSTR/wrappers/convert.py
@@ -65,8 +65,13 @@ def format_table(input, software, kit="forenseq", custom=False):
         else:
             remove_5p = metadata["Power_5"]
             remove_3p = metadata["Power_3"]
+        if custom:
+            if metadata["Custom_5"] < 0:
+                remove_5p = remove_5p - metadata["Custom_5"]
+            if metadata["Custom_3"] < 0:
+                remove_3p = remove_3p - metadata["Custom_3"]
         if (
-            len(sequence) <= (remove_5p + remove_3p)
+            len(sequence) <= (remove_5p + remove_3p + len(metadata["LUS"]))
             and software != "uas"
             and locus != "AMELOGENIN"
         ) or (locus == "AMELOGENIN" and len(sequence) < (remove_5p + remove_3p)):
@@ -222,7 +227,6 @@ def check_vwa(marker, sequence, software, custom):
 
 def combine_reads(table, columns, custom=False):
     if custom:
-        print(table)
         comb_table = (
             table.groupby(
                 [
diff --git a/lusSTR/wrappers/filter.py b/lusSTR/wrappers/filter.py
index 19736cee..17607a3d 100644
--- a/lusSTR/wrappers/filter.py
+++ b/lusSTR/wrappers/filter.py
@@ -155,7 +155,7 @@ def process_strs(dict_loc, datatype, seq_col, brack_col):
     return final_df, flags_df
 
 
-def EFM_output(profile, outfile, profile_type, data_type, col, sex, separate=False):
+def EFM_output(profile, id_list, outfile, profile_type, data_type, col, sex, separate=False):
     profile = profile[profile["Locus"] != "AMELOGENIN"]
     if profile_type == "reference":
         profile = profile.query("allele_type == 'Typed'")
@@ -263,7 +263,7 @@ def determine_max_num_alleles(allele_heights):
     return max_num_alleles
 
 
-def STRmix_output(profile, outdir, profile_type, data_type, seq_col):
+def STRmix_output(profile, outdir, profile_type, data_type, seq_col, id_list):
     profile = profile[profile["Locus"] != "AMELOGENIN"]
     Path(outdir).mkdir(parents=True, exist_ok=True)
     if profile_type == "reference":
@@ -288,7 +288,6 @@ def STRmix_output(profile, outdir, profile_type, data_type, seq_col):
         {"Locus": {"VWA": "vWA", "PENTA D": "PentaD", "PENTA E": "PentaE"}}, inplace=True
     )
     Path(outdir).mkdir(exist_ok=True)
-    id_list = strmix_profile["SampleID"].unique()
     for id in id_list:
         sample_df = strmix_profile[strmix_profile["SampleID"] == id].reset_index(drop=True)
         if profile_type == "evidence":
@@ -366,13 +365,17 @@ def marker_plots(df, output_name, sex, wd="."):
     Path(f"{wd}/MarkerPlots").mkdir(parents=True, exist_ok=True)
     filt_df = df[df["allele_type"] == "Typed"]
     for sample_id in df["SampleID"].unique():
-        with PdfPages(f"{wd}/MarkerPlots/{output_name}_{sample_id}_marker_plots.pdf") as pdf:
-            make_plot(filt_df, sample_id, filters=True, at=False)
-            pdf.savefig()
-            make_plot(df, sample_id)
-            pdf.savefig()
-            make_plot(df, sample_id, sameyaxis=True)
-            pdf.savefig()
+        if df[df["SampleID"] == sample_id].empty:
+            print(f"{sample_id} does not have any reads passing filter. Skipping to next sample.")
+        else:
+            with PdfPages(f"{wd}/MarkerPlots/{output_name}_{sample_id}_marker_plots.pdf") as pdf:
+                if not filt_df[filt_df["SampleID"] == sample_id].empty:
+                    make_plot(filt_df, sample_id, filters=True, at=False)
+                    pdf.savefig()
+                make_plot(df, sample_id)
+                pdf.savefig()
+                make_plot(df, sample_id, sameyaxis=True)
+                pdf.savefig()
 
 
 def make_plot(df, sample_id, sameyaxis=False, filters=False, at=True):
@@ -505,13 +508,16 @@ def process_input(
             STRmix_output(full_df, outpath, profile_type, data_type, seq_col)
     else:
         dict_loc = {k: v for k, v in full_df.groupby(["SampleID", "Locus"])}
+        id_list = full_df["SampleID"].unique()
         final_df, flags_df = process_strs(dict_loc, data_type, seq_col, brack_col)
         if final_df is not None:
             marker_plots(final_df, input_name, sex)
             if output_type == "efm" or output_type == "mpsproto":
-                EFM_output(final_df, outpath, profile_type, data_type, brack_col, sex, separate)
+                EFM_output(
+                    final_df, id_list, outpath, profile_type, data_type, brack_col, sex, separate
+                )
             else:
-                STRmix_output(final_df, outpath, profile_type, data_type, seq_col)
+                STRmix_output(final_df, outpath, profile_type, data_type, seq_col, id_list)
             if info:
                 name = os.path.basename(outpath)
                 final_df.to_csv(f"{outpath}/{input_name}_sequence_info.csv", index=False)

From ebc7fc3b2646d9b97e21c6d8769c60ab403d1f0c Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Tue, 20 May 2025 13:43:10 -0400
Subject: [PATCH 11/21] fixed plotting amel in gui [skip ci]

---
 lusSTR/cli/gui.py         | 15 +++++++--------
 lusSTR/wrappers/filter.py |  6 ++----
 2 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/lusSTR/cli/gui.py b/lusSTR/cli/gui.py
index 4b0dcfc3..e5f88f2e 100644
--- a/lusSTR/cli/gui.py
+++ b/lusSTR/cli/gui.py
@@ -248,7 +248,7 @@ def interactive_plots(df, locus, ymax, increase, all=False):
     plot.add_annotation(text=f"AT", x=min_x + 0.1, y=at, showarrow=False, yshift=10)
     if locus == "AMELOGENIN":
         plot.update_layout(
-            xaxis=dict(range=[-1, 2], tickmode="array", tickvals=["", "X", "Y", ""])
+            xaxis=dict(tickvals=np.arange(-1, 2, 1), tickmode="array", ticktext=["", "X", "Y", ""])
         )
     else:
         plot.update_layout(
@@ -318,15 +318,14 @@ def interactive_setup(df1, file):
             )
         interactive_plots_allmarkers(sample_df, flags)
     else:
-        for i, row in sample_df.iterrows():
-            if sample_df.loc[i, "Locus"] == "AMELOGENIN":
-                sample_df.loc[i, "CE_Allele"] = 0 if sample_df.loc[i, "CE_Allele"] == "X" else 1
-        sample_df["CE_Allele"] = pd.to_numeric(sample_df["CE_Allele"])
+        plot_df = sample_df
+        for i, row in plot_df.iterrows():
+            if plot_df.loc[i, "Locus"] == "AMELOGENIN":
+                plot_df.loc[i, "CE_Allele"] = 0 if plot_df.loc[i, "CE_Allele"] == "X" else 1
+        plot_df["CE_Allele"] = pd.to_numeric(plot_df["CE_Allele"])
         locus_key = f"{sample}_{locus}"
         if locus_key not in st.session_state:
-            st.session_state[locus_key] = sample_df[sample_df["Locus"] == locus].reset_index(
-                drop=True
-            )
+            st.session_state[locus_key] = plot_df[plot_df["Locus"] == locus].reset_index(drop=True)
         Type = [
             "Deleted",
             "Typed",
diff --git a/lusSTR/wrappers/filter.py b/lusSTR/wrappers/filter.py
index 17607a3d..17dc321f 100644
--- a/lusSTR/wrappers/filter.py
+++ b/lusSTR/wrappers/filter.py
@@ -155,7 +155,7 @@ def process_strs(dict_loc, datatype, seq_col, brack_col):
     return final_df, flags_df
 
 
-def EFM_output(profile, id_list, outfile, profile_type, data_type, col, sex, separate=False):
+def EFM_output(profile, outfile, profile_type, data_type, col, sex, separate=False):
     profile = profile[profile["Locus"] != "AMELOGENIN"]
     if profile_type == "reference":
         profile = profile.query("allele_type == 'Typed'")
@@ -513,9 +513,7 @@ def process_input(
         if final_df is not None:
             marker_plots(final_df, input_name, sex)
             if output_type == "efm" or output_type == "mpsproto":
-                EFM_output(
-                    final_df, id_list, outpath, profile_type, data_type, brack_col, sex, separate
-                )
+                EFM_output(final_df, outpath, profile_type, data_type, brack_col, sex, separate)
             else:
                 STRmix_output(final_df, outpath, profile_type, data_type, seq_col, id_list)
             if info:

From c2929b1ebcdd34b21369807a181d16d22605c9d9 Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Wed, 21 May 2025 16:34:05 -0400
Subject: [PATCH 12/21] added blank plots for missing loci [skip ci]

---
 lusSTR/wrappers/filter.py | 35 ++++++++++++++++-------------------
 1 file changed, 16 insertions(+), 19 deletions(-)

diff --git a/lusSTR/wrappers/filter.py b/lusSTR/wrappers/filter.py
index 17dc321f..780d1c45 100644
--- a/lusSTR/wrappers/filter.py
+++ b/lusSTR/wrappers/filter.py
@@ -147,11 +147,6 @@ def process_strs(dict_loc, datatype, seq_col, brack_col):
             filtered_df = filtered_df.replace({"nan": None})
             final_df = pd.concat([final_df, filtered_df])
             flags_df = pd.concat([flags_df, flags(filtered_df, datatype)])
-    # if datatype == "ce" or datatype == "ngs":
-    #    try:
-    #        final_df = final_df.astype({"CE_Allele": "float64", "Reads": "int"})
-    #    except KeyError:
-    #        final_df = None
     return final_df, flags_df
 
 
@@ -361,7 +356,7 @@ def format_ref_table(new_rows, sample_data, datatype):
     return sort_df
 
 
-def marker_plots(df, output_name, sex, wd="."):
+def marker_plots(df, output_name, wd="."):
     Path(f"{wd}/MarkerPlots").mkdir(parents=True, exist_ok=True)
     filt_df = df[df["allele_type"] == "Typed"]
     for sample_id in df["SampleID"].unique():
@@ -370,15 +365,15 @@ def marker_plots(df, output_name, sex, wd="."):
         else:
             with PdfPages(f"{wd}/MarkerPlots/{output_name}_{sample_id}_marker_plots.pdf") as pdf:
                 if not filt_df[filt_df["SampleID"] == sample_id].empty:
-                    make_plot(filt_df, sample_id, filters=True, at=False)
+                    make_plot(filt_df, sample_id, output_name, filters=True, at=False)
                     pdf.savefig()
-                make_plot(df, sample_id)
+                make_plot(df, sample_id, output_name)
                 pdf.savefig()
-                make_plot(df, sample_id, sameyaxis=True)
+                make_plot(df, sample_id, output_name, sameyaxis=True)
                 pdf.savefig()
 
 
-def make_plot(df, sample_id, sameyaxis=False, filters=False, at=True):
+def make_plot(df, sample_id, output_name, sameyaxis=False, filters=False, at=True):
     sample_df = df[df["SampleID"] == sample_id].copy()
     conditions = [
         sample_df["allele_type"].str.contains("Typed"),
@@ -394,18 +389,20 @@ def make_plot(df, sample_id, sameyaxis=False, filters=False, at=True):
     increase_value = int(math.ceil((max_yvalue / 5) / n)) * n
     fig = plt.figure(figsize=(30, 30))
     n = 0
-    for marker in sample_df["Locus"].unique():
-        if marker in strs or marker in ystrs:
-            n += 1
-            colors = {"Typed": "green", "Stutter": "blue", "BelowAT": "red", "Deleted": "purple"}
-            marker_df = sample_df[sample_df["Locus"] == marker].sort_values(by="CE_Allele")
+    str_list = ystrs if "sexloci" in output_name else strs
+    for marker in str_list:
+        n += 1
+        colors = {"Typed": "green", "Stutter": "blue", "BelowAT": "red", "Deleted": "purple"}
+        marker_df = sample_df[sample_df["Locus"] == marker].sort_values(by="CE_Allele")
+        ax = fig.add_subplot(6, 5, n)
+        if not marker_df.empty:
             if marker == "AMELOGENIN":
                 for i, row in marker_df.iterrows():
                     marker_df.loc[i, "CE_Allele"] = (
                         0 if marker_df.loc[i, "CE_Allele"] == "X" else 1
                     )
             marker_df["CE_Allele"] = marker_df["CE_Allele"].astype(float)
-            ax = fig.add_subplot(6, 5, n)
+            # ax = fig.add_subplot(6, 5, n)
             p = ax.bar(
                 marker_df["CE_Allele"],
                 marker_df["Reads"],
@@ -448,7 +445,7 @@ def make_plot(df, sample_id, sameyaxis=False, filters=False, at=True):
                     1.0,
                 )
             )
-            ax.title.set_text(marker)
+        ax.title.set_text(marker)
     if sameyaxis:
         title = "Marker Plots for All Alleles With Same Y-Axis Scale"
     elif filters:
@@ -501,7 +498,7 @@ def process_input(
         )
     if nofiltering:
         full_df["allele_type"] = "Typed"
-        marker_plots(full_df, input_name, sex)
+        marker_plots(full_df, input_name)
         if output_type == "efm" or output_type == "mpsproto":
             EFM_output(full_df, outpath, profile_type, data_type, brack_col, sex, separate)
         else:
@@ -511,7 +508,7 @@ def process_input(
         id_list = full_df["SampleID"].unique()
         final_df, flags_df = process_strs(dict_loc, data_type, seq_col, brack_col)
         if final_df is not None:
-            marker_plots(final_df, input_name, sex)
+            marker_plots(final_df, input_name)
             if output_type == "efm" or output_type == "mpsproto":
                 EFM_output(final_df, outpath, profile_type, data_type, brack_col, sex, separate)
             else:

From 694c980227016a9a7cdca9e0a86e2ab8c4169237 Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Fri, 23 May 2025 14:02:19 -0400
Subject: [PATCH 13/21] made str lists specific for each kit [skip ci]

---
 lusSTR/workflows/strs.smk |   3 +-
 lusSTR/wrappers/filter.py | 100 +++++++++++++++++++++++++++++++-------
 2 files changed, 85 insertions(+), 18 deletions(-)

diff --git a/lusSTR/workflows/strs.smk b/lusSTR/workflows/strs.smk
index 3e2ad4e7..fb77fa4d 100644
--- a/lusSTR/workflows/strs.smk
+++ b/lusSTR/workflows/strs.smk
@@ -150,7 +150,8 @@ rule filter:
         filters=config["nofilters"],
         strand=config["strand"],
         custom=config["custom_ranges"],
-        sex=config["sex"]
+        sex=config["sex"],
+        kit=config["kit"]
     script:
         lusSTR.wrapper("filter")
 
diff --git a/lusSTR/wrappers/filter.py b/lusSTR/wrappers/filter.py
index 780d1c45..1f818d2b 100644
--- a/lusSTR/wrappers/filter.py
+++ b/lusSTR/wrappers/filter.py
@@ -27,7 +27,34 @@
 import sys
 
 
-strs = [
+p_strs = [
+    "AMELOGENIN",
+    "CSF1PO",
+    "D10S1248",
+    "D12S391",
+    "D13S317",
+    "D16S539",
+    "D18S51",
+    "D19S433",
+    "D1S1656",
+    "D21S11",
+    "D22S1045",
+    "D2S1338",
+    "D2S441",
+    "D3S1358",
+    "D5S818",
+    "D6S1043",
+    "D7S820",
+    "D8S1179",
+    "FGA",
+    "PENTA D",
+    "PENTA E",
+    "TH01",
+    "TPOX",
+    "VWA",
+]
+
+f_strs = [
     "AMELOGENIN",
     "CSF1PO",
     "D10S1248",
@@ -58,7 +85,7 @@
     "VWA",
 ]
 
-ystrs = [
+p_ystrs = [
     "DYS19",
     "DYS385A-B",
     "DYS389II",
@@ -82,6 +109,31 @@
     "Y-GATA-H4",
 ]
 
+f_ystrs = [
+    "DYS19",
+    "DYS385A-B",
+    "DYS389II",
+    "DYS390",
+    "DYS391",
+    "DYS392",
+    "DYS437",
+    "DYS438",
+    "DYS439",
+    "DYS448",
+    "DYS460",
+    "DYS481",
+    "DYS505",
+    "DYS522",
+    "DYS533",
+    "DYS549",
+    "DYS570",
+    "DYS576",
+    "DYS612",
+    "DYS635",
+    "DYS643",
+    "Y-GATA-H4",
+]
+
 
 def get_filter_metadata_file():
     return importlib.resources.files("lusSTR") / "data/filters.json"
@@ -91,9 +143,11 @@ def get_filter_metadata_file():
     filter_marker_data = json.load(fh)
 
 
-def process_strs(dict_loc, datatype, seq_col, brack_col):
+def process_strs(dict_loc, datatype, seq_col, brack_col, kit):
     final_df = pd.DataFrame()
     flags_df = pd.DataFrame()
+    strs = p_strs if kit == "powerseq" else f_strs
+    ystrs = p_ystrs if kit == "powerseq" else f_ystrs
     for key, value in dict_loc.items():
         data = dict_loc[key].reset_index(drop=True)
         if datatype == "ce":
@@ -150,20 +204,20 @@ def process_strs(dict_loc, datatype, seq_col, brack_col):
     return final_df, flags_df
 
 
-def EFM_output(profile, outfile, profile_type, data_type, col, sex, separate=False):
+def EFM_output(profile, outfile, profile_type, data_type, col, sex, kit, separate=False):
     profile = profile[profile["Locus"] != "AMELOGENIN"]
     if profile_type == "reference":
         profile = profile.query("allele_type == 'Typed'")
     else:
         profile = profile.query("allele_type != ['BelowAT', 'Deleted']")
-    efm_profile = populate_efm_profile(profile, data_type, col, sex)
+    efm_profile = populate_efm_profile(profile, data_type, col, sex, kit)
     if separate:
         write_sample_specific_efm_profiles(efm_profile, profile_type, data_type, outfile)
     else:
         write_aggregate_efm_profile(efm_profile, profile_type, data_type, outfile)
 
 
-def populate_efm_profile(profile, data_type, colname, sex):
+def populate_efm_profile(profile, data_type, colname, sex, kit):
     if data_type == "ce":
         prof_col = "CE_Allele"
     elif data_type == "lusplus":
@@ -186,6 +240,8 @@ def populate_efm_profile(profile, data_type, colname, sex):
             allele_heights[row.SampleID][row.Locus][row.Allele] = int(row.Reads)
     max_num_alleles = determine_max_num_alleles(allele_heights)
     reformatted_profile = list()
+    strs = p_strs if kit == "powerseq" else f_strs
+    ystrs = p_ystrs if kit == "powerseq" else f_ystrs
     for sampleid, loci in allele_heights.items():
         for locusid, alleles in loci.items():
             allele_list, height_list = list(), list()
@@ -356,7 +412,7 @@ def format_ref_table(new_rows, sample_data, datatype):
     return sort_df
 
 
-def marker_plots(df, output_name, wd="."):
+def marker_plots(df, output_name, kit, wd="."):
     Path(f"{wd}/MarkerPlots").mkdir(parents=True, exist_ok=True)
     filt_df = df[df["allele_type"] == "Typed"]
     for sample_id in df["SampleID"].unique():
@@ -365,15 +421,15 @@ def marker_plots(df, output_name, wd="."):
         else:
             with PdfPages(f"{wd}/MarkerPlots/{output_name}_{sample_id}_marker_plots.pdf") as pdf:
                 if not filt_df[filt_df["SampleID"] == sample_id].empty:
-                    make_plot(filt_df, sample_id, output_name, filters=True, at=False)
+                    make_plot(filt_df, sample_id, output_name, kit, filters=True, at=False)
                     pdf.savefig()
-                make_plot(df, sample_id, output_name)
+                make_plot(df, sample_id, output_name, kit)
                 pdf.savefig()
-                make_plot(df, sample_id, output_name, sameyaxis=True)
+                make_plot(df, sample_id, output_name, kit, sameyaxis=True)
                 pdf.savefig()
 
 
-def make_plot(df, sample_id, output_name, sameyaxis=False, filters=False, at=True):
+def make_plot(df, sample_id, output_name, kit, sameyaxis=False, filters=False, at=True):
     sample_df = df[df["SampleID"] == sample_id].copy()
     conditions = [
         sample_df["allele_type"].str.contains("Typed"),
@@ -389,7 +445,10 @@ def make_plot(df, sample_id, output_name, sameyaxis=False, filters=False, at=Tru
     increase_value = int(math.ceil((max_yvalue / 5) / n)) * n
     fig = plt.figure(figsize=(30, 30))
     n = 0
-    str_list = ystrs if "sexloci" in output_name else strs
+    if kit == "powerseq":
+        str_list = p_ystrs if "sexloci" in output_name else p_strs
+    else:
+        str_list = f_ystrs if "sexloci" in output_name else f_strs
     for marker in str_list:
         n += 1
         colors = {"Typed": "green", "Stutter": "blue", "BelowAT": "red", "Deleted": "purple"}
@@ -478,6 +537,7 @@ def process_input(
     profile_type,
     data_type,
     output_type,
+    kit,
     strand="forward",
     nofiltering=False,
     separate=False,
@@ -498,19 +558,21 @@ def process_input(
         )
     if nofiltering:
         full_df["allele_type"] = "Typed"
-        marker_plots(full_df, input_name)
+        marker_plots(full_df, input_name, kit)
         if output_type == "efm" or output_type == "mpsproto":
-            EFM_output(full_df, outpath, profile_type, data_type, brack_col, sex, separate)
+            EFM_output(full_df, outpath, profile_type, data_type, brack_col, sex, kit, separate)
         else:
             STRmix_output(full_df, outpath, profile_type, data_type, seq_col)
     else:
         dict_loc = {k: v for k, v in full_df.groupby(["SampleID", "Locus"])}
         id_list = full_df["SampleID"].unique()
-        final_df, flags_df = process_strs(dict_loc, data_type, seq_col, brack_col)
+        final_df, flags_df = process_strs(dict_loc, data_type, seq_col, brack_col, kit)
         if final_df is not None:
-            marker_plots(final_df, input_name)
+            marker_plots(final_df, input_name, kit)
             if output_type == "efm" or output_type == "mpsproto":
-                EFM_output(final_df, outpath, profile_type, data_type, brack_col, sex, separate)
+                EFM_output(
+                    final_df, outpath, profile_type, data_type, brack_col, sex, kit, separate
+                )
             else:
                 STRmix_output(final_df, outpath, profile_type, data_type, seq_col, id_list)
             if info:
@@ -534,6 +596,7 @@ def main(
     strand,
     custom,
     sex,
+    kit,
 ):
     input = str(input)
     if profile_type not in ("evidence", "reference"):
@@ -553,6 +616,7 @@ def main(
             profile_type,
             data_type,
             output_type,
+            kit,
             strand=strand,
             nofiltering=nofilters,
             separate=separate,
@@ -568,6 +632,7 @@ def main(
         profile_type,
         data_type,
         output_type,
+        kit,
         strand=strand,
         nofiltering=nofilters,
         separate=separate,
@@ -590,4 +655,5 @@ def main(
         strand=snakemake.params.strand,
         custom=snakemake.params.custom,
         sex=snakemake.params.sex,
+        kit=snakemake.params.kit,
     )

From f089083466d49871e67d11b87936cc4b11245f36 Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Tue, 27 May 2025 06:03:46 -0400
Subject: [PATCH 14/21] added empty plots to GUI for missing markers [skip ci]

---
 lusSTR/cli/gui.py | 70 +++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 68 insertions(+), 2 deletions(-)

diff --git a/lusSTR/cli/gui.py b/lusSTR/cli/gui.py
index e5f88f2e..588cfcbb 100644
--- a/lusSTR/cli/gui.py
+++ b/lusSTR/cli/gui.py
@@ -22,6 +22,7 @@
 import pandas as pd
 from pathlib import Path
 import plotly.express as px
+import plotly.graph_objs as go
 import streamlit as st
 from streamlit_option_menu import option_menu
 import yaml
@@ -146,6 +147,64 @@ def main():
 #                     lusSTR Home Page                              #
 #####################################################################
 
+p_strs = [
+    "AMELOGENIN",
+    "CSF1PO",
+    "D10S1248",
+    "D12S391",
+    "D13S317",
+    "D16S539",
+    "D18S51",
+    "D19S433",
+    "D1S1656",
+    "D21S11",
+    "D22S1045",
+    "D2S1338",
+    "D2S441",
+    "D3S1358",
+    "D5S818",
+    "D6S1043",
+    "D7S820",
+    "D8S1179",
+    "FGA",
+    "PENTA D",
+    "PENTA E",
+    "TH01",
+    "TPOX",
+    "VWA",
+]
+
+f_strs = [
+    "AMELOGENIN",
+    "CSF1PO",
+    "D10S1248",
+    "D12S391",
+    "D13S317",
+    "D16S539",
+    "D17S1301",
+    "D18S51",
+    "D19S433",
+    "D1S1656",
+    "D20S482",
+    "D21S11",
+    "D22S1045",
+    "D2S1338",
+    "D2S441",
+    "D3S1358",
+    "D4S2408",
+    "D5S818",
+    "D6S1043",
+    "D7S820",
+    "D8S1179",
+    "D9S1122",
+    "FGA",
+    "PENTA D",
+    "PENTA E",
+    "TH01",
+    "TPOX",
+    "VWA",
+]
+
 
 def show_home_page():
 
@@ -197,7 +256,9 @@ def interactive_plots_allmarkers(sample_df, flagged_df):
     max_yvalue = (int(math.ceil(max_reads / n)) * n) + n
     increase_value = int(math.ceil((max_yvalue / 5) / n)) * n
     n = 0
-    for marker in sample_df["Locus"].unique():
+    all_loci = f_strs if st.session_state.kit == "forenseq" else p_strs
+    missing_loci = [x for x in all_loci if x not in sample_df["Locus"].unique()]
+    for marker in all_loci:
         col = cols[n]
         container = col.container(border=True)
         sample_locus = sample_df["SampleID"].unique() + "_" + marker
@@ -210,7 +271,12 @@ def interactive_plots_allmarkers(sample_df, flagged_df):
         )
         if sample_locus in flagged_df["key"].values:
             marker = f"⚠️{marker}⚠️"
-        plot = interactive_plots(marker_df, marker, max_yvalue, increase_value, all=True)
+        if marker in missing_loci:
+            marker = f"⚠️{marker}⚠️"
+            plot = go.Figure()
+            plot.update_layout(title=marker)
+        else:
+            plot = interactive_plots(marker_df, marker, max_yvalue, increase_value, all=True)
         container.plotly_chart(plot, use_container_width=True)
         if n == 3:
             n = 0

From cd44d52ffcb51eb8ded25ed8dcf1d5efbad668d5 Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Wed, 28 May 2025 06:07:54 -0400
Subject: [PATCH 15/21] removed extra marker in powerseq list [skip ci]

---
 lusSTR/cli/gui.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lusSTR/cli/gui.py b/lusSTR/cli/gui.py
index 588cfcbb..bcd3a005 100644
--- a/lusSTR/cli/gui.py
+++ b/lusSTR/cli/gui.py
@@ -163,7 +163,6 @@ def main():
     "D2S441",
     "D3S1358",
     "D5S818",
-    "D6S1043",
     "D7S820",
     "D8S1179",
     "FGA",

From 1fb78462be75572ed0a1e54e14b771b21b5c4fb1 Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Wed, 28 May 2025 06:08:11 -0400
Subject: [PATCH 16/21] removed extra marker in powerseq list [skip ci]

---
 lusSTR/wrappers/filter.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/lusSTR/wrappers/filter.py b/lusSTR/wrappers/filter.py
index 1f818d2b..37bbb7eb 100644
--- a/lusSTR/wrappers/filter.py
+++ b/lusSTR/wrappers/filter.py
@@ -43,7 +43,6 @@
     "D2S441",
     "D3S1358",
     "D5S818",
-    "D6S1043",
     "D7S820",
     "D8S1179",
     "FGA",
@@ -205,7 +204,6 @@ def process_strs(dict_loc, datatype, seq_col, brack_col, kit):
 
 
 def EFM_output(profile, outfile, profile_type, data_type, col, sex, kit, separate=False):
-    profile = profile[profile["Locus"] != "AMELOGENIN"]
     if profile_type == "reference":
         profile = profile.query("allele_type == 'Typed'")
     else:
@@ -266,7 +264,8 @@ def populate_efm_profile(profile, data_type, colname, sex, kit):
     for col in height_columns:
         efm_profile[col] = efm_profile[col].astype("Int64")
     efm_profile = efm_profile.sort_values(by=["SampleName", "Marker"])
-    return efm_profile
+    efm_profile_noamel = efm_profile[efm_profile["Marker"] != "AMELOGENIN"]
+    return efm_profile_noamel
 
 
 def write_sample_specific_efm_profiles(efm_profile, profile_type, data_type, outdir):
@@ -314,7 +313,7 @@ def determine_max_num_alleles(allele_heights):
     return max_num_alleles
 
 
-def STRmix_output(profile, outdir, profile_type, data_type, seq_col, id_list):
+def STRmix_output(profile, outdir, profile_type, data_type, seq_col):
     profile = profile[profile["Locus"] != "AMELOGENIN"]
     Path(outdir).mkdir(parents=True, exist_ok=True)
     if profile_type == "reference":
@@ -339,6 +338,7 @@ def STRmix_output(profile, outdir, profile_type, data_type, seq_col, id_list):
         {"Locus": {"VWA": "vWA", "PENTA D": "PentaD", "PENTA E": "PentaE"}}, inplace=True
     )
     Path(outdir).mkdir(exist_ok=True)
+    id_list = strmix_profile["SampleID"].unique()
     for id in id_list:
         sample_df = strmix_profile[strmix_profile["SampleID"] == id].reset_index(drop=True)
         if profile_type == "evidence":
@@ -565,7 +565,6 @@ def process_input(
             STRmix_output(full_df, outpath, profile_type, data_type, seq_col)
     else:
         dict_loc = {k: v for k, v in full_df.groupby(["SampleID", "Locus"])}
-        id_list = full_df["SampleID"].unique()
         final_df, flags_df = process_strs(dict_loc, data_type, seq_col, brack_col, kit)
         if final_df is not None:
             marker_plots(final_df, input_name, kit)
@@ -574,7 +573,7 @@ def process_input(
                     final_df, outpath, profile_type, data_type, brack_col, sex, kit, separate
                 )
             else:
-                STRmix_output(final_df, outpath, profile_type, data_type, seq_col, id_list)
+                STRmix_output(final_df, outpath, profile_type, data_type, seq_col)
             if info:
                 name = os.path.basename(outpath)
                 final_df.to_csv(f"{outpath}/{input_name}_sequence_info.csv", index=False)

From eec3ac129c70d257656136d35494e3d8a0618945 Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Wed, 28 May 2025 06:11:37 -0400
Subject: [PATCH 17/21] began updating tests [skip ci]

---
 .../LUSPlus_sequence_info.csv                 | 42 ++++++-------
 .../NGS_stutter_test/Sample1_nofilter.csv     | 54 ++++++++--------
 .../data/STRaitRazor_output_test_A001.csv     |  2 +
 .../tests/data/STRait_Razor_test_output.csv   | 34 ++++++++++
 lusSTR/tests/data/UAS_bulk_test.csv           |  4 ++
 lusSTR/tests/data/lusstr_output.csv           |  2 +
 .../Positive_Control_evidence_ngs.csv         | 62 +++++++++----------
 lusSTR/tests/data/testformat.csv              |  2 +
 8 files changed, 123 insertions(+), 79 deletions(-)

diff --git a/lusSTR/tests/data/LUSPlus_stutter_test/LUSPlus_sequence_info.csv b/lusSTR/tests/data/LUSPlus_stutter_test/LUSPlus_sequence_info.csv
index 5a2541dd..84c079d7 100644
--- a/lusSTR/tests/data/LUSPlus_stutter_test/LUSPlus_sequence_info.csv
+++ b/lusSTR/tests/data/LUSPlus_stutter_test/LUSPlus_sequence_info.csv
@@ -1,24 +1,24 @@
 SampleID,Locus,CE_Allele,LUS_Plus,Reads,allele_type,parent_allele1,parent_allele2,allele1_ref_reads,allele2_ref_reads,perc_noise,perc_stutter
-Sample1,D4S2408,10.0,10_10_0,1022,Typed,,,,,,
-Sample1,D4S2408,9.0,9_9_0,116,-1_stutter/+1_stutter,10_10_0,8_8_0,1022.0,1050.0,,
-Sample1,D4S2408,8.0,8_8_0,1050,Typed,,,,,,
-Sample1,D8S1179,14.0,14_12_1_0,869,Typed,,,,,,
-Sample1,D8S1179,13.0,13_11_1_0,184,-1_stutter,14_12_1_0,,869.0,,,0.212
-Sample1,D8S1179,12.0,12_10_1_0,37,-2_stutter,14_12_1_0,,869.0,,,0.201
-Sample1,D9S1122,13.0,13_11,948,Typed,,,,,,
-Sample1,D9S1122,12.0,12_10,108,-1_stutter,13_11,,948.0,,,0.114
-Sample1,D9S1122,11.0,11_11,991,Typed,,,,,,
-Sample1,D9S1122,10.0,10_10,87,-1_stutter,11_11,,991.0,,,0.088
-Sample1,FGA,23.0,23_15_3_0,1436,Typed,,,,,,
-Sample1,FGA,22.0,22_14_3_0,262,-1_stutter,23_15_3_0,,1436.0,,,0.182
-Sample1,FGA,21.0,21_13_3_0,48,BelowAT,,,,,0.013,
-Sample1,FGA,20.0,20_12_3_0,1750,Typed,,,,,,
-Sample1,FGA,18.0,18_10_3_0,181,Typed,,,,,,
-Sample1,FGA,17.0,17_9_3_0,15,BelowAT,,,,,0.004,
-Sample1,PENTA D,15.0,15_15,50,Typed,,,,,,
-Sample1,PENTA D,13.0,13_13,1000,Typed,,,,,,
+Sample1,D4S2408,10,10_10_0,1022,Typed,,,,,,
+Sample1,D4S2408,9,9_9_0,116,-1_stutter/+1_stutter,10_10_0,8_8_0,1022.0,1050.0,,
+Sample1,D4S2408,8,8_8_0,1050,Typed,,,,,,
+Sample1,D8S1179,14,14_12_1_0,869,Typed,,,,,,
+Sample1,D8S1179,13,13_11_1_0,184,-1_stutter,14_12_1_0,,869.0,,,0.212
+Sample1,D8S1179,12,12_10_1_0,37,-2_stutter,14_12_1_0,,869.0,,,0.201
+Sample1,D9S1122,13,13_11,948,Typed,,,,,,
+Sample1,D9S1122,12,12_10,108,-1_stutter,13_11,,948.0,,,0.114
+Sample1,D9S1122,11,11_11,991,Typed,,,,,,
+Sample1,D9S1122,10,10_10,87,-1_stutter,11_11,,991.0,,,0.088
+Sample1,FGA,23,23_15_3_0,1436,Typed,,,,,,
+Sample1,FGA,22,22_14_3_0,262,-1_stutter,23_15_3_0,,1436.0,,,0.182
+Sample1,FGA,21,21_13_3_0,48,BelowAT,,,,,0.013,
+Sample1,FGA,20,20_12_3_0,1750,Typed,,,,,,
+Sample1,FGA,18,18_10_3_0,181,Typed,,,,,,
+Sample1,FGA,17,17_9_3_0,15,BelowAT,,,,,0.004,
+Sample1,PENTA D,15,15_15,50,Typed,,,,,,
+Sample1,PENTA D,13,13_13,1000,Typed,,,,,,
 Sample1,PENTA E,7.0,7_7,505,Typed,,,,,,
-Sample1,TH01,7.0,7_7,2197,Typed,,,,,,
-Sample1,TH01,6.0,6_6,1632,Typed,,,,,,
-Sample1,TH01,5.0,5_5,66,BelowAT,,,,,0.017,
+Sample1,TH01,7,7_7,2197,Typed,,,,,,
+Sample1,TH01,6,6_6,1632,Typed,,,,,,
+Sample1,TH01,5,5_5,66,BelowAT,,,,,0.017,
 Sample1,TPOX,11.0,11_11,15,BelowAT,,,,,1.0,
diff --git a/lusSTR/tests/data/NGS_stutter_test/Sample1_nofilter.csv b/lusSTR/tests/data/NGS_stutter_test/Sample1_nofilter.csv
index 7531c6f3..b83fce36 100644
--- a/lusSTR/tests/data/NGS_stutter_test/Sample1_nofilter.csv
+++ b/lusSTR/tests/data/NGS_stutter_test/Sample1_nofilter.csv
@@ -1,28 +1,28 @@
 Locus,CE Allele,Allele Seq,Reads
-D4S2408,8.0,ATCTATCTATCTATCTATCTATCTATCTATCT,1000
-D4S2408,9.0,ATCTATCTATCTATCTATCTATCTATCTATCTATCT,1357
-D4S2408,10.0,ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT,900
-D8S1179,12.0,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTA,26
-D8S1179,12.0,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,11
-D8S1179,13.0,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,95
-D8S1179,13.0,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,89
-D8S1179,14.0,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,739
-D8S1179,14.0,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,130
-D9S1122,10.0,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,87
-D9S1122,11.0,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,991
-D9S1122,12.0,TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,108
-D9S1122,13.0,TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,948
-FGA,17.0,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,15
-FGA,18.0,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,181
-FGA,20.0,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,1750
-FGA,21.0,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,48
-FGA,22.0,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,262
-FGA,23.0,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,1436
-PentaD,13.0,AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,1000
-PentaD,15.0,AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,50
-PentaE,7.0,AAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,505
-TH01,5.0,AATGAATGAATGAATGAATG,66
-TH01,6.0,AATGAATGAATGAATGAATGAATG,1632
-TH01,7.0,AATGAATGAATGAATGAATGAATGAATG,2197
-TPOX,11.0,AATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATG,15
-vWA,16.0,TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCCATCTA,6
+D4S2408,8,ATCTATCTATCTATCTATCTATCTATCTATCT,1000
+D4S2408,9,ATCTATCTATCTATCTATCTATCTATCTATCTATCT,1357
+D4S2408,10,ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT,900
+D8S1179,12,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTA,26
+D8S1179,12,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,11
+D8S1179,13,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,95
+D8S1179,13,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,89
+D8S1179,14,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,739
+D8S1179,14,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,130
+D9S1122,10,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,87
+D9S1122,11,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,991
+D9S1122,12,TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,108
+D9S1122,13,TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,948
+FGA,17,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,15
+FGA,18,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,181
+FGA,20,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,1750
+FGA,21,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,48
+FGA,22,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,262
+FGA,23,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,1436
+PentaD,13,AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,1000
+PentaD,15,AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,50
+PentaE,7,AAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,505
+TH01,5,AATGAATGAATGAATGAATG,66
+TH01,6,AATGAATGAATGAATGAATGAATG,1632
+TH01,7,AATGAATGAATGAATGAATGAATGAATG,2197
+TPOX,11,AATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATG,15
+vWA,16,TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCCATCTA,6
diff --git a/lusSTR/tests/data/STRaitRazor_output_test_A001.csv b/lusSTR/tests/data/STRaitRazor_output_test_A001.csv
index 219ee03f..c4a2a09a 100644
--- a/lusSTR/tests/data/STRaitRazor_output_test_A001.csv
+++ b/lusSTR/tests/data/STRaitRazor_output_test_A001.csv
@@ -1,4 +1,6 @@
 Locus,Total_Reads,Sequence,SampleID,Project,Analysis
+Amelogenin,226,TAGTGGGTGGATTCTTCATCCCAAATAAAGTGGTTTCTCAAGTGGTCCCAATTTTACAGTTCCTACCAT,A001,NA,NA
+Amelogenin,162,TAGTGTGTTGATTCTTTATCCCAGATGTTTCTCAAGTGGTCCTGATTTTACAGTTCCTACCAC,A001,NA,NA
 CSF1PO,547,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A001,NA,NA
 CSF1PO,25,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A001,NA,NA
 CSF1PO,7,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A001,NA,NA
diff --git a/lusSTR/tests/data/STRait_Razor_test_output.csv b/lusSTR/tests/data/STRait_Razor_test_output.csv
index 57449926..959433cf 100644
--- a/lusSTR/tests/data/STRait_Razor_test_output.csv
+++ b/lusSTR/tests/data/STRait_Razor_test_output.csv
@@ -1,4 +1,6 @@
 Locus,Total_Reads,Sequence,SampleID,Project,Analysis
+Amelogenin,226,TAGTGGGTGGATTCTTCATCCCAAATAAAGTGGTTTCTCAAGTGGTCCCAATTTTACAGTTCCTACCAT,A001,STRait_Razor_test_output,STRait_Razor_test_output
+Amelogenin,162,TAGTGTGTTGATTCTTTATCCCAGATGTTTCTCAAGTGGTCCTGATTTTACAGTTCCTACCAC,A001,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,547,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A001,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,25,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A001,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,7,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A001,STRait_Razor_test_output,STRait_Razor_test_output
@@ -610,6 +612,8 @@ vWA,2,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAG
 vWA,2,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A001,STRait_Razor_test_output,STRait_Razor_test_output
 vWA,2,AATACATAGGATGGATGGATAGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A001,STRait_Razor_test_output,STRait_Razor_test_output
 vWA,2,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A001,STRait_Razor_test_output,STRait_Razor_test_output
+Amelogenin,249,TAGTGGGTGGATTCTTCATCCCAAATAAAGTGGTTTCTCAAGTGGTCCCAATTTTACAGTTCCTACCAT,A002,STRait_Razor_test_output,STRait_Razor_test_output
+Amelogenin,171,TAGTGTGTTGATTCTTTATCCCAGATGTTTCTCAAGTGGTCCTGATTTTACAGTTCCTACCAC,A002,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,498,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A002,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,402,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A002,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,41,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A002,STRait_Razor_test_output,STRait_Razor_test_output
@@ -1450,6 +1454,8 @@ vWA,2,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATTGATAG
 vWA,2,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGGCAGACAGATAGATCAAT,A002,STRait_Razor_test_output,STRait_Razor_test_output
 vWA,2,AATACATAGGATGGGTGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A002,STRait_Razor_test_output,STRait_Razor_test_output
 vWA,2,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATAAAT,A002,STRait_Razor_test_output,STRait_Razor_test_output
+Amelogenin,313,TAGTGGGTGGATTCTTCATCCCAAATAAAGTGGTTTCTCAAGTGGTCCCAATTTTACAGTTCCTACCAT,A003,STRait_Razor_test_output,STRait_Razor_test_output
+Amelogenin,167,TAGTGTGTTGATTCTTTATCCCAGATGTTTCTCAAGTGGTCCTGATTTTACAGTTCCTACCAC,A003,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,696,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A003,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,35,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A003,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,10,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A003,STRait_Razor_test_output,STRait_Razor_test_output
@@ -2313,6 +2319,8 @@ vWA,3,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAG
 vWA,3,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A003,STRait_Razor_test_output,STRait_Razor_test_output
 vWA,2,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGATAGATCAAT,A003,STRait_Razor_test_output,STRait_Razor_test_output
 vWA,2,AATACATAGGATGGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGATAGATCAAT,A003,STRait_Razor_test_output,STRait_Razor_test_output
+Amelogenin,178,TAGTGTGTTGATTCTTTATCCCAGATGTTTCTCAAGTGGTCCTGATTTTACAGTTCCTACCAC,A004,STRait_Razor_test_output,STRait_Razor_test_output
+Amelogenin,135,TAGTGGGTGGATTCTTCATCCCAAATAAAGTGGTTTCTCAAGTGGTCCCAATTTTACAGTTCCTACCAT,A004,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,469,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A004,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,381,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A004,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,22,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A004,STRait_Razor_test_output,STRait_Razor_test_output
@@ -3173,6 +3181,11 @@ vWA,2,AATACATAGGATGGATGGATAGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAG
 vWA,2,AATACATAGGATGGATGGATGGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A004,STRait_Razor_test_output,STRait_Razor_test_output
 vWA,2,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGATAGATCAAT,A004,STRait_Razor_test_output,STRait_Razor_test_output
 vWA,2,AATACATAGGATGGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A004,STRait_Razor_test_output,STRait_Razor_test_output
+Amelogenin,322,TAGTGGGTGGATTCTTCATCCCAAATAAAGTGGTTTCTCAAGTGGTCCCAATTTTACAGTTCCTACCAT,A005,STRait_Razor_test_output,STRait_Razor_test_output
+Amelogenin,299,TAGTGTGTTGATTCTTTATCCCAGATGTTTCTCAAGTGGTCCTGATTTTACAGTTCCTACCAC,A005,STRait_Razor_test_output,STRait_Razor_test_output
+Amelogenin,3,TAGTGGGTGGATTCATCATCCCAAATAAAGTGGTTTCTCAAGTGGTCCCAATTTTACAGTTCCTACCAT,A005,STRait_Razor_test_output,STRait_Razor_test_output
+Amelogenin,2,TAGTGTGTTGATTCTTTATCCCAGATGTTTCTCAAGTGGTCCTGATTTTTCAGTTCCTACCAC,A005,STRait_Razor_test_output,STRait_Razor_test_output
+Amelogenin,2,TAGTGGATGGATTCTTCATCCCAAATAAAGTGGTTTCTCAAGTGGTCCCAATTTTACAGTTCCTACCAT,A005,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,817,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A005,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,28,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A005,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,13,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A005,STRait_Razor_test_output,STRait_Razor_test_output
@@ -4101,6 +4114,9 @@ vWA,2,AATACATAGGATGGATAGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAG
 vWA,2,AATACATAGGATGGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A005,STRait_Razor_test_output,STRait_Razor_test_output
 vWA,2,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATGGATAGATAGATAGATAGATAGATAGACAGACAGACAGATAGATCAAT,A005,STRait_Razor_test_output,STRait_Razor_test_output
 vWA,2,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGATAGTTCAAT,A005,STRait_Razor_test_output,STRait_Razor_test_output
+Amelogenin,255,TAGTGGGTGGATTCTTCATCCCAAATAAAGTGGTTTCTCAAGTGGTCCCAATTTTACAGTTCCTACCAT,A006,STRait_Razor_test_output,STRait_Razor_test_output
+Amelogenin,196,TAGTGTGTTGATTCTTTATCCCAGATGTTTCTCAAGTGGTCCTGATTTTACAGTTCCTACCAC,A006,STRait_Razor_test_output,STRait_Razor_test_output
+Amelogenin,2,TAGCGTGTTGATTCTTTATCCCAGATGTTTCTCAAGTGGTCCTGATTTTACAGTTCCTACCAC,A006,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,429,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A006,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,390,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A006,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,59,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A006,STRait_Razor_test_output,STRait_Razor_test_output
@@ -4892,6 +4908,8 @@ vWA,2,AATACATAGGATGGATGGATAGATGGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAG
 vWA,2,AATACATAGGATGGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A006,STRait_Razor_test_output,STRait_Razor_test_output
 vWA,2,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAAACAGACAGATAGATCAAT,A006,STRait_Razor_test_output,STRait_Razor_test_output
 vWA,2,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGACAGATAGATCAAT,A006,STRait_Razor_test_output,STRait_Razor_test_output
+Amelogenin,230,TAGTGTGTTGATTCTTTATCCCAGATGTTTCTCAAGTGGTCCTGATTTTACAGTTCCTACCAC,A007,STRait_Razor_test_output,STRait_Razor_test_output
+Amelogenin,212,TAGTGGGTGGATTCTTCATCCCAAATAAAGTGGTTTCTCAAGTGGTCCCAATTTTACAGTTCCTACCAT,A007,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,864,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A007,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,57,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A007,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,8,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A007,STRait_Razor_test_output,STRait_Razor_test_output
@@ -5885,6 +5903,10 @@ vWA,3,AATACATAGGATGGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAG
 vWA,2,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A007,STRait_Razor_test_output,STRait_Razor_test_output
 vWA,2,AATACATAGGATGGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A007,STRait_Razor_test_output,STRait_Razor_test_output
 vWA,2,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGATAGATCAAT,A007,STRait_Razor_test_output,STRait_Razor_test_output
+Amelogenin,385,TAGTGTGTTGATTCTTTATCCCAGATGTTTCTCAAGTGGTCCTGATTTTACAGTTCCTACCAC,A008,STRait_Razor_test_output,STRait_Razor_test_output
+Amelogenin,259,TAGTGGGTGGATTCTTCATCCCAAATAAAGTGGTTTCTCAAGTGGTCCCAATTTTACAGTTCCTACCAT,A008,STRait_Razor_test_output,STRait_Razor_test_output
+Amelogenin,3,TAGTGTGTTGATTCTTTATCCCAGATGTTTCTCAAGTGGTCCTGATTTTACCGTTCCTACCAC,A008,STRait_Razor_test_output,STRait_Razor_test_output
+Amelogenin,2,TAGTGGGTGGATACTTCATCCCAAATAAAGTGGTTTCTCAAGTGGTCCCAATTTTACAGTTCCTACCAT,A008,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,523,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A008,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,483,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A008,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,30,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A008,STRait_Razor_test_output,STRait_Razor_test_output
@@ -6808,6 +6830,8 @@ vWA,2,AATACATAGAATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAG
 vWA,2,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATGGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A008,STRait_Razor_test_output,STRait_Razor_test_output
 vWA,2,AATACATAGGATGGATGGATAGATGGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A008,STRait_Razor_test_output,STRait_Razor_test_output
 vWA,2,GATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A008,STRait_Razor_test_output,STRait_Razor_test_output
+Amelogenin,317,TAGTGGGTGGATTCTTCATCCCAAATAAAGTGGTTTCTCAAGTGGTCCCAATTTTACAGTTCCTACCAT,A009,STRait_Razor_test_output,STRait_Razor_test_output
+Amelogenin,194,TAGTGTGTTGATTCTTTATCCCAGATGTTTCTCAAGTGGTCCTGATTTTACAGTTCCTACCAC,A009,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,407,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A009,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,338,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A009,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,16,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A009,STRait_Razor_test_output,STRait_Razor_test_output
@@ -7722,6 +7746,9 @@ vWA,2,AATACATAGGATGGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAG
 vWA,2,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGGCAGATAGATCAAT,A009,STRait_Razor_test_output,STRait_Razor_test_output
 vWA,2,AATACATAGGATGGATGGATAGATGGACAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A009,STRait_Razor_test_output,STRait_Razor_test_output
 vWA,2,AATACATAGGATGGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A009,STRait_Razor_test_output,STRait_Razor_test_output
+Amelogenin,227,TAGTGGGTGGATTCTTCATCCCAAATAAAGTGGTTTCTCAAGTGGTCCCAATTTTACAGTTCCTACCAT,A010,STRait_Razor_test_output,STRait_Razor_test_output
+Amelogenin,143,TAGTGTGTTGATTCTTTATCCCAGATGTTTCTCAAGTGGTCCTGATTTTACAGTTCCTACCAC,A010,STRait_Razor_test_output,STRait_Razor_test_output
+Amelogenin,2,TAGTGGGTGGATTCTTCATCCCAAATAAAGTGGTTTCTCAAGTGGTCCCAATTTTATAGTTCCTACCAT,A010,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,664,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A010,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,39,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A010,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,2,CGTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A010,STRait_Razor_test_output,STRait_Razor_test_output
@@ -8718,6 +8745,8 @@ vWA,3,GATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAG
 vWA,3,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGATAGATCAAT,A010,STRait_Razor_test_output,STRait_Razor_test_output
 vWA,2,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGATAGACAGACAGATAGATCAAT,A010,STRait_Razor_test_output,STRait_Razor_test_output
 vWA,2,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGATAGATCAAT,A010,STRait_Razor_test_output,STRait_Razor_test_output
+Amelogenin,276,TAGTGTGTTGATTCTTTATCCCAGATGTTTCTCAAGTGGTCCTGATTTTACAGTTCCTACCAC,A011,STRait_Razor_test_output,STRait_Razor_test_output
+Amelogenin,228,TAGTGGGTGGATTCTTCATCCCAAATAAAGTGGTTTCTCAAGTGGTCCCAATTTTACAGTTCCTACCAT,A011,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,449,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A011,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,272,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A011,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,28,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A011,STRait_Razor_test_output,STRait_Razor_test_output
@@ -9677,6 +9706,11 @@ vWA,3,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAG
 vWA,2,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGATAGATAGATCAAT,A011,STRait_Razor_test_output,STRait_Razor_test_output
 vWA,2,AATACATAGGATGGATGGATAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACGGACAGACAGATAGATCAAT,A011,STRait_Razor_test_output,STRait_Razor_test_output
 vWA,2,AATACATAGGATGGATGGATAGATGGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGATCAAT,A011,STRait_Razor_test_output,STRait_Razor_test_output
+Amelogenin,418,TAGTGGGTGGATTCTTCATCCCAAATAAAGTGGTTTCTCAAGTGGTCCCAATTTTACAGTTCCTACCAT,A012,STRait_Razor_test_output,STRait_Razor_test_output
+Amelogenin,339,TAGTGTGTTGATTCTTTATCCCAGATGTTTCTCAAGTGGTCCTGATTTTACAGTTCCTACCAC,A012,STRait_Razor_test_output,STRait_Razor_test_output
+Amelogenin,2,TAGTGTGTTGATTCTCTATCCCAGATGTTTCTCAAGTGGTCCTGATTTTACAGTTCCTACCAC,A012,STRait_Razor_test_output,STRait_Razor_test_output
+Amelogenin,2,TAGTGGGTGGATTCTTCGTCCCAAATAAAGTGGTTTCTCAAGTGGTCCCAATTTTACAGTTCCTACCAT,A012,STRait_Razor_test_output,STRait_Razor_test_output
+Amelogenin,2,TAGTGGGTGGATTCTTCATCCCAAATAAAGTGGTTTATCAAGTGGTCCCAATTTTACAGTTCCTACCAT,A012,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,1131,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A012,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,43,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A012,STRait_Razor_test_output,STRait_Razor_test_output
 CSF1PO,13,CTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTT,A012,STRait_Razor_test_output,STRait_Razor_test_output
diff --git a/lusSTR/tests/data/UAS_bulk_test.csv b/lusSTR/tests/data/UAS_bulk_test.csv
index 88663214..064b8952 100644
--- a/lusSTR/tests/data/UAS_bulk_test.csv
+++ b/lusSTR/tests/data/UAS_bulk_test.csv
@@ -1,4 +1,6 @@
 Locus,Reads,Repeat Sequence,SampleID,Project,Analysis
+Amelogenin,143,,Positive Control,Project1,Analysis1
+Amelogenin,283,AAAGTG,Positive Control,Project1,Analysis1
 D1S1656,33,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGGTGTGTGTGTG,Positive Control,Project1,Analysis1
 D1S1656,13,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATGTGTGTGTG,Positive Control,Project1,Analysis1
 D1S1656,231,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGGTGTGTGTGTG,Positive Control,Project1,Analysis1
@@ -127,6 +129,8 @@ D22S1045,13,ATTATTATTATTATTATTATTATTATTATTATTACTATTATT,Positive Control,Project1
 D22S1045,146,ATTATTATTATTATTATTATTATTATTATTATTATTACTATTATT,Positive Control,Project1,Analysis1
 D22S1045,1746,ATTATTATTATTATTATTATTATTATTATTATTATTATTACTATTATT,Positive Control,Project1,Analysis1
 D22S1045,27,ATTATTATTATTATTATTATTATTATTATTATTATTATTATTACTATTATT,Positive Control,Project1,Analysis1
+Amelogenin,143,,Positive Control2,Project1,Analysis1
+Amelogenin,283,AAAGTG,Positive Control2,Project1,Analysis1
 D1S1656,33,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGGTGTGTGTGTG,Positive Control2,Project1,Analysis1
 D1S1656,13,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATGTGTGTGTG,Positive Control2,Project1,Analysis1
 D1S1656,231,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGGTGTGTGTGTG,Positive Control2,Project1,Analysis1
diff --git a/lusSTR/tests/data/lusstr_output.csv b/lusSTR/tests/data/lusstr_output.csv
index a53ea100..cce2ce0c 100644
--- a/lusSTR/tests/data/lusstr_output.csv
+++ b/lusSTR/tests/data/lusstr_output.csv
@@ -1,4 +1,6 @@
 Locus,Reads,Repeat Sequence,SampleID,Project,Analysis
+Amelogenin,143,,Positive Control,Project1,Analysis1
+Amelogenin,283,AAAGTG,Positive Control,Project1,Analysis1
 D1S1656,33,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGGTGTGTGTGTG,Positive Control,Project1,Analysis1
 D1S1656,13,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATGTGTGTGTG,Positive Control,Project1,Analysis1
 D1S1656,231,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGGTGTGTGTGTG,Positive Control,Project1,Analysis1
diff --git a/lusSTR/tests/data/lusstr_output/Positive_Control_evidence_ngs.csv b/lusSTR/tests/data/lusstr_output/Positive_Control_evidence_ngs.csv
index f5901b88..ab397eff 100644
--- a/lusSTR/tests/data/lusstr_output/Positive_Control_evidence_ngs.csv
+++ b/lusSTR/tests/data/lusstr_output/Positive_Control_evidence_ngs.csv
@@ -1,6 +1,6 @@
 Locus,CE Allele,Allele Seq,Reads
-CSF1PO,11.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,29
-CSF1PO,12.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,702
+CSF1PO,11.0,ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT,29
+CSF1PO,12.0,ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT,702
 D10S1248,12.0,GGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAA,163
 D10S1248,13.0,GGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAA,1050
 D10S1248,14.0,GGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAA,116
@@ -23,12 +23,12 @@ D18S51,15.0,AGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAAAGAGAG
 D18S51,16.0,AGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAAAGAGAGAG,1009
 D18S51,17.0,AGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAAAGAGAGAG,165
 D18S51,18.0,AGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAAAGAGAGAG,999
-D19S433,12.0,AAGGAAAGAAGGTAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAGAGAGGAAGAAAGAGAG,122
-D19S433,13.0,AAGGAAAGAAGGTAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAGAGAGGAAGAAAGAGAG,1782
-D19S433,14.0,AAGGAAAGAAGGTAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAGAGAGGAAGAAAGAGAG,1621
-D1S1656,11.0,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGGTGTGTGTGTG,33
-D1S1656,12.0,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGGTGTGTGTGTG,231
-D1S1656,13.0,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATGTGTGTGTG,162
+D19S433,12.0,CTCTCTTTCTTCCTCTCTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTACCTTCTTTCCTT,122
+D19S433,13.0,CTCTCTTTCTTCCTCTCTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTACCTTCTTTCCTT,1782
+D19S433,14.0,CTCTCTTTCTTCCTCTCTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTACCTTCTTTCCTT,1621
+D1S1656,11.0,CACACACACACCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,33
+D1S1656,12.0,CACACACACACCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,231
+D1S1656,13.0,CACACACACATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,162
 D20S482,13.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,337
 D20S482,14.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,3136
 D20S482,15.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,2731
@@ -37,10 +37,10 @@ D21S11,29.0,TCTATCTATCTATCTATCTGTCTGTCTGTCTGTCTGTCTGTCTATCTATCTATATCTATCTATCTATC
 D21S11,31.2,TCTATCTATCTATCTATCTATCTGTCTGTCTGTCTGTCTGTCTGTCTATCTATCTATATCTATCTATCTATCATCTATCTATCCATATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATATCTA,1064
 D22S1045,15.0,ATTATTATTATTATTATTATTATTATTATTATTATTACTATTATT,146
 D22S1045,16.0,ATTATTATTATTATTATTATTATTATTATTATTATTATTACTATTATT,1746
-D2S1338,21.0,TGCCTGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC,246
-D2S1338,22.0,TGCCTGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC,2165
-D2S1338,24.0,TGCCTGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC,259
-D2S1338,25.0,TGCCTGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC,1656
+D2S1338,21.0,GGAAGGAAGGACGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGCAGGCAGGCAGGCAGGCAGGCAGGCA,246
+D2S1338,22.0,GGAAGGAAGGACGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGCAGGCAGGCAGGCAGGCAGGCAGGCA,2165
+D2S1338,24.0,GGAAGGAAGGACGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGCAGGCAGGCAGGCAGGCAGGCAGGCA,259
+D2S1338,25.0,GGAAGGAAGGACGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGCAGGCAGGCAGGCAGGCAGGCAGGCA,1656
 D2S441,10.0,TCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,1781
 D2S441,14.0,TCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATTTATCTATCTA,1330
 D3S1358,16.0,TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,135
@@ -48,15 +48,15 @@ D3S1358,17.0,TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT
 D3S1358,18.0,TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,1855
 D4S2408,8.0,ATCTATCTATCTATCTATCTATCTATCTATCT,38
 D4S2408,9.0,ATCTATCTATCTATCTATCTATCTATCTATCTATCT,1357
-D5S818,11.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAG,21
-D5S818,12.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAG,382
-D6S1043,11.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,219
-D6S1043,12.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,2088
-D6S1043,19.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATACATAGATAGATAGATAGATAGAT,138
-D6S1043,20.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATACATAGATAGATAGATAGATAGAT,1487
-D7S820,8.0,GATAGATAGATAGATAGATAGATAGATAGATAGACAGATTGATAGTTT,517
-D7S820,10.0,GATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGATTGATAGTTT,33
-D7S820,11.0,GATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGATTGATAGTTT,522
+D5S818,11.0,CTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT,21
+D5S818,12.0,CTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT,382
+D6S1043,11.0,ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT,219
+D6S1043,12.0,ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT,2088
+D6S1043,19.0,ATCTATCTATCTATCTATCTATGTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT,138
+D6S1043,20.0,ATCTATCTATCTATCTATCTATGTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT,1487
+D7S820,8.0,AAACTATCAATCTGTCTATCTATCTATCTATCTATCTATCTATCTATC,517
+D7S820,10.0,AAACTATCAATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATC,33
+D7S820,11.0,AAACTATCAATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATC,522
 D8S1179,13.0,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,135
 D8S1179,14.0,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,971
 D8S1179,15.0,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,661
@@ -64,21 +64,21 @@ D9S1122,11.0,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,131
 D9S1122,11.0,TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,115
 D9S1122,12.0,TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,1551
 D9S1122,12.0,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,1427
-FGA,19.0,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,135
-FGA,20.0,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,1043
-FGA,22.0,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,182
-FGA,23.0,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,1023
+FGA,19.0,GGAAGGAAGGAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAAAAGAAAGAAAGAAA,135
+FGA,20.0,GGAAGGAAGGAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAAAAGAAAGAAAGAAA,1043
+FGA,22.0,GGAAGGAAGGAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAAAAGAAAGAAAGAAA,182
+FGA,23.0,GGAAGGAAGGAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAAAAGAAAGAAAGAAA,1023
 PentaD,12.0,AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,289
 PentaD,13.0,AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,278
-PentaE,7.0,AAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,505
-PentaE,14.0,AAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,261
+PentaE,7.0,TCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTT,505
+PentaE,14.0,TCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTT,261
 TH01,5.0,AATGAATGAATGAATGAATG,247
 TH01,6.0,AATGAATGAATGAATGAATGAATG,4542
 TH01,8.3,AATGAATGAATGAATGAATGATGAATGAATGAATG,151
 TH01,9.3,AATGAATGAATGAATGAATGAATGATGAATGAATGAATG,3581
 TPOX,10.0,AATGAATGAATGAATGAATGAATGAATGAATGAATGAATG,59
 TPOX,11.0,AATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATG,1216
-vWA,15.0,TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCCATCTA,23
-vWA,16.0,TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCCATCTA,377
-vWA,18.0,TCTATCTGTCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCCATCTA,43
-vWA,19.0,TCTATCTGTCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCCATCTA,325
+vWA,15.0,TAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGATAGA,23
+vWA,16.0,TAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGATAGA,377
+vWA,18.0,TAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGA,43
+vWA,19.0,TAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGA,325
diff --git a/lusSTR/tests/data/testformat.csv b/lusSTR/tests/data/testformat.csv
index a53ea100..cce2ce0c 100644
--- a/lusSTR/tests/data/testformat.csv
+++ b/lusSTR/tests/data/testformat.csv
@@ -1,4 +1,6 @@
 Locus,Reads,Repeat Sequence,SampleID,Project,Analysis
+Amelogenin,143,,Positive Control,Project1,Analysis1
+Amelogenin,283,AAAGTG,Positive Control,Project1,Analysis1
 D1S1656,33,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGGTGTGTGTGTG,Positive Control,Project1,Analysis1
 D1S1656,13,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATGTGTGTGTG,Positive Control,Project1,Analysis1
 D1S1656,231,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGGTGTGTGTGTG,Positive Control,Project1,Analysis1

From 740c5eaa388a4747fac68db3fe2551d86bcc5f2a Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Mon, 2 Jun 2025 05:57:38 -0400
Subject: [PATCH 18/21] updated remaining tests

---
 .../tests/data/genemarker/genemarker_test.csv |  2 +
 .../tests/data/genemarker/genemarker_test.txt |  2 +
 .../genemarker/genemarker_test_flanks.txt     |  2 +
 lusSTR/tests/data/lusstr_output.txt           |  2 +
 .../Positive_Control_evidence_ngs.csv         | 62 +++++++++----------
 lusSTR/wrappers/convert.py                    |  4 +-
 lusSTR/wrappers/filter.py                     |  4 --
 7 files changed, 42 insertions(+), 36 deletions(-)

diff --git a/lusSTR/tests/data/genemarker/genemarker_test.csv b/lusSTR/tests/data/genemarker/genemarker_test.csv
index 142ac4b4..944ea126 100644
--- a/lusSTR/tests/data/genemarker/genemarker_test.csv
+++ b/lusSTR/tests/data/genemarker/genemarker_test.csv
@@ -1,4 +1,6 @@
 Locus,Total_Reads,Sequence,SampleID,Project,Analysis
+Amelogenin,14189,TCAGCTATGAGGTAATTTTTCTCTTTACTAATTTTGACCATTGTTTGCGTTAACAATGCCCTGGGCTCTGTAAAGAATAGTGTGTTGATTCTTTATCCCAGATGTTTCTCAAGTGGTCCTGATTTTACAGTTCCTACCACCAGCTTCCCA,2800M_strresults_filtered,NA,NA
+Amelogenin,11986,TCAGCTATGAGGTAATTTTTCTCTTTACTAATTTTGATCACTGTTTGCATTAGCAGTCCCCTGGGCTCTGTAAAGAATAGTGGGTGGATTCTTCATCCCAAATAAAGTGGTTTCTCAAGTGGTCCCAATTTTACAGTTCCTACCATCAGCTTCCCA,2800M_strresults_filtered,NA,NA
 PentaE,6733,TAATGATTACATAACATACATGTGTGTAAAGTGCTTAGTATCATGATTGATACATGGAAAGAATTCTCTTATTTGGGTTATTAATTGAGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGAC,2800M_strresults_filtered,NA,NA
 PentaE,4746,TAATGATTACATAACATACATGTGTGTAAAGTGCTTAGTATCATGATTGATACATGGAAAGAATTCTCTTATTTGGGTTATTAATTGAGAAAACTCCTTACAATTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTGAGAC,2800M_strresults_filtered,NA,NA
 D18S51,602,AGGCTGCAGTGAGCCATGTTCATGCCACTGCACTTCACTCTGAGTGACAAATTGAGACCTTGTCTCAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAAAGAGAGAGGAAAGAAAGAGAAAAAGAAAAGAAATAGTAGCAACTGTTATTGTA,2800M_strresults_filtered,NA,NA
diff --git a/lusSTR/tests/data/genemarker/genemarker_test.txt b/lusSTR/tests/data/genemarker/genemarker_test.txt
index 3b3fc202..326eb682 100644
--- a/lusSTR/tests/data/genemarker/genemarker_test.txt
+++ b/lusSTR/tests/data/genemarker/genemarker_test.txt
@@ -69,3 +69,5 @@ SampleID	Project	Analysis	Locus	UAS_Output_Sequence	Forward_Strand_Sequence	UAS_
 2800M_strresults_filtered	NA	NA	D10S1248	GGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAA	GGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAA	[GGAA]12	[GGAA]12	12	12_12	12_12	741
 2800M_strresults_filtered	NA	NA	CSF1PO	AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT	ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT	[AGAT]12	[ATCT]12	12	12_12	12_12_0	14044
 2800M_strresults_filtered	NA	NA	CSF1PO	AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT	ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT	[AGAT]11	[ATCT]11	11	11_11	11_11_0	1047
+2800M_strresults_filtered	NA	NA	AMELOGENIN	GGTAATTTTTCTCTTTACTAATTTTGACCATTGTTTGCGTTAACAATGCCCTGGGCTCTGTAAAGAATAGTGTGTTGATTCTTTATCCCAGATGTTTCTCAAG	GGTAATTTTTCTCTTTACTAATTTTGACCATTGTTTGCGTTAACAATGCCCTGGGCTCTGTAAAGAATAGTGTGTTGATTCTTTATCCCAGATGTTTCTCAAG	GGTAATTTTTCTCTTTACTAATTTTGACCATTGTTTGCGTTAACAATGCCCTGGGCTCTGTAAAGAATAGTGTGTTGATTCTTTATCCCAGATGTTTCTCAAG	GGTAATTTTTCTCTTTACTAATTTTGACCATTGTTTGCGTTAACAATGCCCTGGGCTCTGTAAAGAATAGTGTGTTGATTCTTTATCCCAGATGTTTCTCAAG	GGTAATTTTTCTCTTTACTAATTTTGACCATTGTTTGCGTTAACAATGCCCTGGGCTCTGTAAAGAATAGTGTGTTGATTCTTTATCCCAGATGTTTCTCAAG	NA	NA	14189
+2800M_strresults_filtered	NA	NA	AMELOGENIN	GGTAATTTTTCTCTTTACTAATTTTGATCACTGTTTGCATTAGCAGTCCCCTGGGCTCTGTAAAGAATAGTGGGTGGATTCTTCATCCCAAATAAAGTGGTTTCTCAAG	GGTAATTTTTCTCTTTACTAATTTTGATCACTGTTTGCATTAGCAGTCCCCTGGGCTCTGTAAAGAATAGTGGGTGGATTCTTCATCCCAAATAAAGTGGTTTCTCAAG	GGTAATTTTTCTCTTTACTAATTTTGATCACTGTTTGCATTAGCAGTCCCCTGGGCTCTGTAAAGAATAGTGGGTGGATTCTTCATCCCAAATAAAGTGGTTTCTCAAG	GGTAATTTTTCTCTTTACTAATTTTGATCACTGTTTGCATTAGCAGTCCCCTGGGCTCTGTAAAGAATAGTGGGTGGATTCTTCATCCCAAATAAAGTGGTTTCTCAAG	GGTAATTTTTCTCTTTACTAATTTTGATCACTGTTTGCATTAGCAGTCCCCTGGGCTCTGTAAAGAATAGTGGGTGGATTCTTCATCCCAAATAAAGTGGTTTCTCAAG	NA	NA	11986
diff --git a/lusSTR/tests/data/genemarker/genemarker_test_flanks.txt b/lusSTR/tests/data/genemarker/genemarker_test_flanks.txt
index 9740c817..a445bbc4 100644
--- a/lusSTR/tests/data/genemarker/genemarker_test_flanks.txt
+++ b/lusSTR/tests/data/genemarker/genemarker_test_flanks.txt
@@ -71,3 +71,5 @@ SampleID	Project	Analysis	Locus	Reads	CE_Allele	Full_Sequence	5_Flank_Bracketed_
 2800M_strresults_filtered	NA	NA	D10S1248	741	12	CCCCAGGACCAATCTGGTCACAAACATATTAATGAATTGAACAAATGAGTGAGTGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAA	CCCC AGGA CCAA TCTG GTCA CAAA CATA TTAA TGAA TT GAAC AAAT [GAGT]2	[GGAA]12		 
 2800M_strresults_filtered	NA	NA	CSF1PO	14044	12	CTAAGTACTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTTCTATCTATGAAGGCAGTTACTGTTAATATCTTCATTTTACAGGTAGGAAAACTGAGACACAGGGTGGTTAGCAACCTGCTAGTCCTTGGCAGACTCAG	CTA AGTA CT TCCT	[ATCT]12	A [ATCT]3 T [CTAT]2 GAAG GCAG TTAC TGTT AATA TCTT CATT TTAC AGGT AGGA AAAC TGAG ACAC AGGG TGGT TAG CA ACCT GCTA GTCC TTGG CAGA CTCA G	 
 2800M_strresults_filtered	NA	NA	CSF1PO	1047	11	CTAAGTACTTCCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTAATCTATCTATCTTCTATCTATGAAGGCAGTTACTGTTAATATCTTCATTTTACAGGTAGGAAAACTGAGACACAGGGTGGTTAGCAACCTGCTAGTCCTTGGCAGACTCAG	CTA AGTA CT TCCT	[ATCT]11	A [ATCT]3 T [CTAT]2 GAAG GCAG TTAC TGTT AATA TCTT CATT TTAC AGGT AGGA AAAC TGAG ACAC AGGG TGGT TAG CA ACCT GCTA GTCC TTGG CAGA CTCA G	 
+2800M_strresults_filtered	NA	NA	AMELOGENIN	14189	GGTAATTTTTCTCTTTACTAATTTTGACCATTGTTTGCGTTAACAATGCCCTGGGCTCTGTAAAGAATAGTGTGTTGATTCTTTATCCCAGATGTTTCTCAAG	TCAGCTATGAGGTAATTTTTCTCTTTACTAATTTTGACCATTGTTTGCGTTAACAATGCCCTGGGCTCTGTAAAGAATAGTGTGTTGATTCTTTATCCCAGATGTTTCTCAAGTGGTCCTGATTTTACAGTTCCTACCACCAGCTTCCCA	T C A G C T A T G A	GGTAATTTTTCTCTTTACTAATTTTGACCATTGTTTGCGTTAACAATGCCCTGGGCTCTGTAAAGAATAGTGTGTTGATTCTTTATCCCAGATGTTTCTCAAG	T [G]2 T [C]2 T G A [T]4 A C A G [T]2 [C]2 T A [C]2 A [C]2 A G C [T]2 [C]3 A	Possible indel or partial sequence
+2800M_strresults_filtered	NA	NA	AMELOGENIN	11986	GGTAATTTTTCTCTTTACTAATTTTGATCACTGTTTGCATTAGCAGTCCCCTGGGCTCTGTAAAGAATAGTGGGTGGATTCTTCATCCCAAATAAAGTGGTTTCTCAAG	TCAGCTATGAGGTAATTTTTCTCTTTACTAATTTTGATCACTGTTTGCATTAGCAGTCCCCTGGGCTCTGTAAAGAATAGTGGGTGGATTCTTCATCCCAAATAAAGTGGTTTCTCAAGTGGTCCCAATTTTACAGTTCCTACCATCAGCTTCCCA	T C A G C T A T G A	GGTAATTTTTCTCTTTACTAATTTTGATCACTGTTTGCATTAGCAGTCCCCTGGGCTCTGTAAAGAATAGTGGGTGGATTCTTCATCCCAAATAAAGTGGTTTCTCAAG	T [G]2 T [C]3 [A]2 [T]4 A C A G [T]2 [C]2 T A [C]2 A T C A G C [T]2 [C]3 A	Possible indel or partial sequence
diff --git a/lusSTR/tests/data/lusstr_output.txt b/lusSTR/tests/data/lusstr_output.txt
index d86c37dd..852445e8 100644
--- a/lusSTR/tests/data/lusstr_output.txt
+++ b/lusSTR/tests/data/lusstr_output.txt
@@ -127,3 +127,5 @@ Positive_Control	Project1	Analysis1	D10S1248	GGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGA
 Positive_Control	Project1	Analysis1	CSF1PO	AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT	ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT	[AGAT]12	[ATCT]12	12	12_12	12_12_0	702
 Positive_Control	Project1	Analysis1	CSF1PO	AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT	ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT	[AGAT]11	[ATCT]11	11	11_11	11_11_0	29
 Positive_Control	Project1	Analysis1	CSF1PO	AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT	ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT	[AGAT]13	[ATCT]13	13	13_13	13_13_0	11
+Positive_Control	Project1	Analysis1	AMELOGENIN	AAAGTG	AAAGTG	AAAGTG	AAAGTG	Y	NA	NA	283
+Positive_Control	Project1	Analysis1	AMELOGENIN				NA	X	NA	NA	143
diff --git a/lusSTR/tests/data/lusstr_output/Positive_Control_evidence_ngs.csv b/lusSTR/tests/data/lusstr_output/Positive_Control_evidence_ngs.csv
index ab397eff..f5901b88 100644
--- a/lusSTR/tests/data/lusstr_output/Positive_Control_evidence_ngs.csv
+++ b/lusSTR/tests/data/lusstr_output/Positive_Control_evidence_ngs.csv
@@ -1,6 +1,6 @@
 Locus,CE Allele,Allele Seq,Reads
-CSF1PO,11.0,ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT,29
-CSF1PO,12.0,ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT,702
+CSF1PO,11.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,29
+CSF1PO,12.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,702
 D10S1248,12.0,GGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAA,163
 D10S1248,13.0,GGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAA,1050
 D10S1248,14.0,GGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAA,116
@@ -23,12 +23,12 @@ D18S51,15.0,AGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAAAGAGAG
 D18S51,16.0,AGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAAAGAGAGAG,1009
 D18S51,17.0,AGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAAAGAGAGAG,165
 D18S51,18.0,AGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAAAGAGAGAG,999
-D19S433,12.0,CTCTCTTTCTTCCTCTCTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTACCTTCTTTCCTT,122
-D19S433,13.0,CTCTCTTTCTTCCTCTCTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTACCTTCTTTCCTT,1782
-D19S433,14.0,CTCTCTTTCTTCCTCTCTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTACCTTCTTTCCTT,1621
-D1S1656,11.0,CACACACACACCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,33
-D1S1656,12.0,CACACACACACCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,231
-D1S1656,13.0,CACACACACATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,162
+D19S433,12.0,AAGGAAAGAAGGTAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAGAGAGGAAGAAAGAGAG,122
+D19S433,13.0,AAGGAAAGAAGGTAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAGAGAGGAAGAAAGAGAG,1782
+D19S433,14.0,AAGGAAAGAAGGTAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAGAGAGGAAGAAAGAGAG,1621
+D1S1656,11.0,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGGTGTGTGTGTG,33
+D1S1656,12.0,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGGTGTGTGTGTG,231
+D1S1656,13.0,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATGTGTGTGTG,162
 D20S482,13.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,337
 D20S482,14.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,3136
 D20S482,15.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,2731
@@ -37,10 +37,10 @@ D21S11,29.0,TCTATCTATCTATCTATCTGTCTGTCTGTCTGTCTGTCTGTCTATCTATCTATATCTATCTATCTATC
 D21S11,31.2,TCTATCTATCTATCTATCTATCTGTCTGTCTGTCTGTCTGTCTGTCTATCTATCTATATCTATCTATCTATCATCTATCTATCCATATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATATCTA,1064
 D22S1045,15.0,ATTATTATTATTATTATTATTATTATTATTATTATTACTATTATT,146
 D22S1045,16.0,ATTATTATTATTATTATTATTATTATTATTATTATTATTACTATTATT,1746
-D2S1338,21.0,GGAAGGAAGGACGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGCAGGCAGGCAGGCAGGCAGGCAGGCA,246
-D2S1338,22.0,GGAAGGAAGGACGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGCAGGCAGGCAGGCAGGCAGGCAGGCA,2165
-D2S1338,24.0,GGAAGGAAGGACGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGCAGGCAGGCAGGCAGGCAGGCAGGCA,259
-D2S1338,25.0,GGAAGGAAGGACGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGCAGGCAGGCAGGCAGGCAGGCAGGCA,1656
+D2S1338,21.0,TGCCTGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC,246
+D2S1338,22.0,TGCCTGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC,2165
+D2S1338,24.0,TGCCTGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC,259
+D2S1338,25.0,TGCCTGCCTGCCTGCCTGCCTGCCTGCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCGTCCTTCCTTCC,1656
 D2S441,10.0,TCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,1781
 D2S441,14.0,TCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATTTATCTATCTA,1330
 D3S1358,16.0,TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,135
@@ -48,15 +48,15 @@ D3S1358,17.0,TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT
 D3S1358,18.0,TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,1855
 D4S2408,8.0,ATCTATCTATCTATCTATCTATCTATCTATCT,38
 D4S2408,9.0,ATCTATCTATCTATCTATCTATCTATCTATCTATCT,1357
-D5S818,11.0,CTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT,21
-D5S818,12.0,CTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT,382
-D6S1043,11.0,ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT,219
-D6S1043,12.0,ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT,2088
-D6S1043,19.0,ATCTATCTATCTATCTATCTATGTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT,138
-D6S1043,20.0,ATCTATCTATCTATCTATCTATGTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT,1487
-D7S820,8.0,AAACTATCAATCTGTCTATCTATCTATCTATCTATCTATCTATCTATC,517
-D7S820,10.0,AAACTATCAATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATC,33
-D7S820,11.0,AAACTATCAATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATC,522
+D5S818,11.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAG,21
+D5S818,12.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAG,382
+D6S1043,11.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,219
+D6S1043,12.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAT,2088
+D6S1043,19.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATACATAGATAGATAGATAGATAGAT,138
+D6S1043,20.0,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATACATAGATAGATAGATAGATAGAT,1487
+D7S820,8.0,GATAGATAGATAGATAGATAGATAGATAGATAGACAGATTGATAGTTT,517
+D7S820,10.0,GATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGATTGATAGTTT,33
+D7S820,11.0,GATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGATTGATAGTTT,522
 D8S1179,13.0,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,135
 D8S1179,14.0,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,971
 D8S1179,15.0,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,661
@@ -64,21 +64,21 @@ D9S1122,11.0,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,131
 D9S1122,11.0,TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,115
 D9S1122,12.0,TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,1551
 D9S1122,12.0,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,1427
-FGA,19.0,GGAAGGAAGGAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAAAAGAAAGAAAGAAA,135
-FGA,20.0,GGAAGGAAGGAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAAAAGAAAGAAAGAAA,1043
-FGA,22.0,GGAAGGAAGGAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAAAAGAAAGAAAGAAA,182
-FGA,23.0,GGAAGGAAGGAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAAAAGAAAGAAAGAAA,1023
+FGA,19.0,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,135
+FGA,20.0,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,1043
+FGA,22.0,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,182
+FGA,23.0,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,1023
 PentaD,12.0,AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,289
 PentaD,13.0,AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,278
-PentaE,7.0,TCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTT,505
-PentaE,14.0,TCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTT,261
+PentaE,7.0,AAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,505
+PentaE,14.0,AAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,261
 TH01,5.0,AATGAATGAATGAATGAATG,247
 TH01,6.0,AATGAATGAATGAATGAATGAATG,4542
 TH01,8.3,AATGAATGAATGAATGAATGATGAATGAATGAATG,151
 TH01,9.3,AATGAATGAATGAATGAATGAATGATGAATGAATGAATG,3581
 TPOX,10.0,AATGAATGAATGAATGAATGAATGAATGAATGAATGAATG,59
 TPOX,11.0,AATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATG,1216
-vWA,15.0,TAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGATAGA,23
-vWA,16.0,TAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGATAGA,377
-vWA,18.0,TAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGA,43
-vWA,19.0,TAGATGGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGACAGACAGACAGACAGATAGA,325
+vWA,15.0,TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCCATCTA,23
+vWA,16.0,TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCCATCTA,377
+vWA,18.0,TCTATCTGTCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCCATCTA,43
+vWA,19.0,TCTATCTGTCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCCATCTA,325
diff --git a/lusSTR/wrappers/convert.py b/lusSTR/wrappers/convert.py
index 8115c09f..47599c80 100644
--- a/lusSTR/wrappers/convert.py
+++ b/lusSTR/wrappers/convert.py
@@ -74,7 +74,9 @@ def format_table(input, software, kit="forenseq", custom=False):
             len(sequence) <= (remove_5p + remove_3p + len(metadata["LUS"]))
             and software != "uas"
             and locus != "AMELOGENIN"
-        ) or (locus == "AMELOGENIN" and len(sequence) < (remove_5p + remove_3p)):
+        ) or (
+            software != "uas" and locus == "AMELOGENIN" and len(sequence) < (remove_5p + remove_3p)
+        ):
             flank_summary = [
                 sampleid,
                 project,
diff --git a/lusSTR/wrappers/filter.py b/lusSTR/wrappers/filter.py
index 37bbb7eb..3869c5e6 100644
--- a/lusSTR/wrappers/filter.py
+++ b/lusSTR/wrappers/filter.py
@@ -119,15 +119,11 @@
     "DYS438",
     "DYS439",
     "DYS448",
-    "DYS460",
     "DYS481",
-    "DYS505",
-    "DYS522",
     "DYS533",
     "DYS549",
     "DYS570",
     "DYS576",
-    "DYS612",
     "DYS635",
     "DYS643",
     "Y-GATA-H4",

From 1f03738016bccb478900f0dca201f9a7af296a62 Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Wed, 4 Jun 2025 06:10:46 -0400
Subject: [PATCH 19/21] fixed formatting issues; added str lists as json file

---
 lusSTR/cli/gui.py          |  87 ++++++-----------------
 lusSTR/data/str_lists.json | 101 ++++++++++++++++++++++++++
 lusSTR/wrappers/filter.py  | 141 +++++++------------------------------
 3 files changed, 148 insertions(+), 181 deletions(-)
 create mode 100644 lusSTR/data/str_lists.json

diff --git a/lusSTR/cli/gui.py b/lusSTR/cli/gui.py
index bcd3a005..29b95ffb 100644
--- a/lusSTR/cli/gui.py
+++ b/lusSTR/cli/gui.py
@@ -44,6 +44,14 @@ def get_filter_metadata_file():
     filter_marker_data = json.load(fh)
 
 
+def get_strlist_file():
+    return importlib.resources.files("lusSTR") / "data/str_lists.json"
+
+
+with open(get_strlist_file(), "r") as fh:
+    str_lists = json.load(fh)
+
+
 # ------------ Function to Generate config.yaml File ---------- #
 
 
@@ -147,63 +155,6 @@ def main():
 #                     lusSTR Home Page                              #
 #####################################################################
 
-p_strs = [
-    "AMELOGENIN",
-    "CSF1PO",
-    "D10S1248",
-    "D12S391",
-    "D13S317",
-    "D16S539",
-    "D18S51",
-    "D19S433",
-    "D1S1656",
-    "D21S11",
-    "D22S1045",
-    "D2S1338",
-    "D2S441",
-    "D3S1358",
-    "D5S818",
-    "D7S820",
-    "D8S1179",
-    "FGA",
-    "PENTA D",
-    "PENTA E",
-    "TH01",
-    "TPOX",
-    "VWA",
-]
-
-f_strs = [
-    "AMELOGENIN",
-    "CSF1PO",
-    "D10S1248",
-    "D12S391",
-    "D13S317",
-    "D16S539",
-    "D17S1301",
-    "D18S51",
-    "D19S433",
-    "D1S1656",
-    "D20S482",
-    "D21S11",
-    "D22S1045",
-    "D2S1338",
-    "D2S441",
-    "D3S1358",
-    "D4S2408",
-    "D5S818",
-    "D6S1043",
-    "D7S820",
-    "D8S1179",
-    "D9S1122",
-    "FGA",
-    "PENTA D",
-    "PENTA E",
-    "TH01",
-    "TPOX",
-    "VWA",
-]
-
 
 def show_home_page():
 
@@ -255,15 +206,21 @@ def interactive_plots_allmarkers(sample_df, flagged_df):
     max_yvalue = (int(math.ceil(max_reads / n)) * n) + n
     increase_value = int(math.ceil((max_yvalue / 5) / n)) * n
     n = 0
-    all_loci = f_strs if st.session_state.kit == "forenseq" else p_strs
+    all_loci = (
+        str_lists["forenseq_strs"]
+        if st.session_state.kit == "forenseq"
+        else str_lists["powerseq_strs"]
+    )
     missing_loci = [x for x in all_loci if x not in sample_df["Locus"].unique()]
     for marker in all_loci:
         col = cols[n]
         container = col.container(border=True)
         sample_locus = sample_df["SampleID"].unique() + "_" + marker
-        for i, row in sample_df.iterrows():
-            if sample_df.loc[i, "Locus"] == "AMELOGENIN":
-                sample_df.loc[i, "CE_Allele"] = 0 if sample_df.loc[i, "CE_Allele"] == "X" else 1
+        sample_df = np.where(
+            sample_df["Locus"] == "AMELOGENIN",
+            np.where(sample_df["CE_Allele"] == "X", 0, 1),
+            sample_df["CE_Allele"],
+        )
         sample_df["CE_Allele"] = pd.to_numeric(sample_df["CE_Allele"])
         marker_df = sample_df[sample_df["Locus"] == marker].sort_values(
             by=["CE_Allele", "allele_type"], ascending=[False, True]
@@ -384,9 +341,11 @@ def interactive_setup(df1, file):
         interactive_plots_allmarkers(sample_df, flags)
     else:
         plot_df = sample_df
-        for i, row in plot_df.iterrows():
-            if plot_df.loc[i, "Locus"] == "AMELOGENIN":
-                plot_df.loc[i, "CE_Allele"] = 0 if plot_df.loc[i, "CE_Allele"] == "X" else 1
+        sample_df = np.where(
+            sample_df["Locus"] == "AMELOGENIN",
+            np.where(sample_df["CE_Allele"] == "X", 0, 1),
+            sample_df["CE_Allele"],
+        )
         plot_df["CE_Allele"] = pd.to_numeric(plot_df["CE_Allele"])
         locus_key = f"{sample}_{locus}"
         if locus_key not in st.session_state:
diff --git a/lusSTR/data/str_lists.json b/lusSTR/data/str_lists.json
new file mode 100644
index 00000000..e1d54894
--- /dev/null
+++ b/lusSTR/data/str_lists.json
@@ -0,0 +1,101 @@
+{
+
+    "powerseq_strs" : [
+        "AMELOGENIN",
+        "CSF1PO",
+        "D10S1248",
+        "D12S391",
+        "D13S317",
+        "D16S539",
+        "D18S51",
+        "D19S433",
+        "D1S1656",
+        "D21S11",
+        "D22S1045",
+        "D2S1338",
+        "D2S441",
+        "D3S1358",
+        "D5S818",
+        "D7S820",
+        "D8S1179",
+        "FGA",
+        "PENTA D",
+        "PENTA E",
+        "TH01",
+        "TPOX",
+        "VWA"
+    ],
+    "forenseq_strs" : [
+        "AMELOGENIN",
+        "CSF1PO",
+        "D10S1248",
+        "D12S391",
+        "D13S317",
+        "D16S539",
+        "D17S1301",
+        "D18S51",
+        "D19S433",
+        "D1S1656",
+        "D20S482",
+        "D21S11",
+        "D22S1045",
+        "D2S1338",
+        "D2S441",
+        "D3S1358",
+        "D4S2408",
+        "D5S818",
+        "D6S1043",
+        "D7S820",
+        "D8S1179",
+        "D9S1122",
+        "FGA",
+        "PENTA D",
+        "PENTA E",
+        "TH01",
+        "TPOX",
+        "VWA"
+        ],
+    "powerseq_ystrs" : [
+        "DYS19",
+        "DYS385A-B",
+        "DYS389II",
+        "DYS390",
+        "DYS391",
+        "DYS392",
+        "DYS393",
+        "DYS437",
+        "DYS438",
+        "DYS439",
+        "DYS448",
+        "DYS456",
+        "DYS458",
+        "DYS481",
+        "DYS533",
+        "DYS549",
+        "DYS570",
+        "DYS576",
+        "DYS635",
+        "DYS643",
+        "Y-GATA-H4"
+    ],
+    "forenseq_ystrs" : [
+        "DYS19",
+        "DYS385A-B",
+        "DYS389II",
+        "DYS390",
+        "DYS391",
+        "DYS392",
+        "DYS437",
+        "DYS438",
+        "DYS439",
+        "DYS448",
+        "DYS481",
+        "DYS533",
+        "DYS549",
+        "DYS570",
+        "DYS576",
+        "DYS635",
+        "DYS643",
+        "Y-GATA-H4"
+    ]
+}
\ No newline at end of file
diff --git a/lusSTR/wrappers/filter.py b/lusSTR/wrappers/filter.py
index 3869c5e6..3b3c7981 100644
--- a/lusSTR/wrappers/filter.py
+++ b/lusSTR/wrappers/filter.py
@@ -27,109 +27,6 @@
 import sys
 
 
-p_strs = [
-    "AMELOGENIN",
-    "CSF1PO",
-    "D10S1248",
-    "D12S391",
-    "D13S317",
-    "D16S539",
-    "D18S51",
-    "D19S433",
-    "D1S1656",
-    "D21S11",
-    "D22S1045",
-    "D2S1338",
-    "D2S441",
-    "D3S1358",
-    "D5S818",
-    "D7S820",
-    "D8S1179",
-    "FGA",
-    "PENTA D",
-    "PENTA E",
-    "TH01",
-    "TPOX",
-    "VWA",
-]
-
-f_strs = [
-    "AMELOGENIN",
-    "CSF1PO",
-    "D10S1248",
-    "D12S391",
-    "D13S317",
-    "D16S539",
-    "D17S1301",
-    "D18S51",
-    "D19S433",
-    "D1S1656",
-    "D20S482",
-    "D21S11",
-    "D22S1045",
-    "D2S1338",
-    "D2S441",
-    "D3S1358",
-    "D4S2408",
-    "D5S818",
-    "D6S1043",
-    "D7S820",
-    "D8S1179",
-    "D9S1122",
-    "FGA",
-    "PENTA D",
-    "PENTA E",
-    "TH01",
-    "TPOX",
-    "VWA",
-]
-
-p_ystrs = [
-    "DYS19",
-    "DYS385A-B",
-    "DYS389II",
-    "DYS390",
-    "DYS391",
-    "DYS392",
-    "DYS393",
-    "DYS437",
-    "DYS438",
-    "DYS439",
-    "DYS448",
-    "DYS456",
-    "DYS458",
-    "DYS481",
-    "DYS533",
-    "DYS549",
-    "DYS570",
-    "DYS576",
-    "DYS635",
-    "DYS643",
-    "Y-GATA-H4",
-]
-
-f_ystrs = [
-    "DYS19",
-    "DYS385A-B",
-    "DYS389II",
-    "DYS390",
-    "DYS391",
-    "DYS392",
-    "DYS437",
-    "DYS438",
-    "DYS439",
-    "DYS448",
-    "DYS481",
-    "DYS533",
-    "DYS549",
-    "DYS570",
-    "DYS576",
-    "DYS635",
-    "DYS643",
-    "Y-GATA-H4",
-]
-
-
 def get_filter_metadata_file():
     return importlib.resources.files("lusSTR") / "data/filters.json"
 
@@ -138,11 +35,19 @@ def get_filter_metadata_file():
     filter_marker_data = json.load(fh)
 
 
+def get_strlist_file():
+    return importlib.resources.files("lusSTR") / "data/str_lists.json"
+
+
+with open(get_strlist_file(), "r") as fh:
+    str_lists = json.load(fh)
+
+
 def process_strs(dict_loc, datatype, seq_col, brack_col, kit):
     final_df = pd.DataFrame()
     flags_df = pd.DataFrame()
-    strs = p_strs if kit == "powerseq" else f_strs
-    ystrs = p_ystrs if kit == "powerseq" else f_ystrs
+    strs = str_lists["powerseq_strs"] if kit == "powerseq" else str_lists["forenseq_strs"]
+    ystrs = str_lists["powerseq_ystrs"] if kit == "powerseq" else str_lists["forenseq_ystrs"]
     for key, value in dict_loc.items():
         data = dict_loc[key].reset_index(drop=True)
         if datatype == "ce":
@@ -234,8 +139,8 @@ def populate_efm_profile(profile, data_type, colname, sex, kit):
             allele_heights[row.SampleID][row.Locus][row.Allele] = int(row.Reads)
     max_num_alleles = determine_max_num_alleles(allele_heights)
     reformatted_profile = list()
-    strs = p_strs if kit == "powerseq" else f_strs
-    ystrs = p_ystrs if kit == "powerseq" else f_ystrs
+    strs = str_lists["powerseq_strs"] if kit == "powerseq" else str_lists["forenseq_strs"]
+    ystrs = str_lists["powerseq_ystrs"] if kit == "powerseq" else str_lists["forenseq_ystrs"]
     for sampleid, loci in allele_heights.items():
         for locusid, alleles in loci.items():
             allele_list, height_list = list(), list()
@@ -442,9 +347,13 @@ def make_plot(df, sample_id, output_name, kit, sameyaxis=False, filters=False, a
     fig = plt.figure(figsize=(30, 30))
     n = 0
     if kit == "powerseq":
-        str_list = p_ystrs if "sexloci" in output_name else p_strs
+        str_list = (
+            str_lists["powerseq_ystrs"] if "sexloci" in output_name else str_lists["powerseq_strs"]
+        )
     else:
-        str_list = f_ystrs if "sexloci" in output_name else f_strs
+        str_list = (
+            str_lists["forenseq_ystrs"] if "sexloci" in output_name else str_lists["forenseq_strs"]
+        )
     for marker in str_list:
         n += 1
         colors = {"Typed": "green", "Stutter": "blue", "BelowAT": "red", "Deleted": "purple"}
@@ -457,7 +366,6 @@ def make_plot(df, sample_id, output_name, kit, sameyaxis=False, filters=False, a
                         0 if marker_df.loc[i, "CE_Allele"] == "X" else 1
                     )
             marker_df["CE_Allele"] = marker_df["CE_Allele"].astype(float)
-            # ax = fig.add_subplot(6, 5, n)
             p = ax.bar(
                 marker_df["CE_Allele"],
                 marker_df["Reads"],
@@ -478,16 +386,15 @@ def make_plot(df, sample_id, output_name, kit, sameyaxis=False, filters=False, a
             if not filters:
                 plt.legend(handles, labels, title="Allele Type")
             else:
+                marker_df["Label"] = None
                 for i, row in marker_df.iterrows():
                     if marker == "AMELOGENIN":
-                        marker_df.loc[i, "Label"] = (
-                            "X" if marker_df.loc[i, "CE_Allele"] == 0 else "Y"
-                        )
+                        row["Label"] = "X" if row["CE_Allele"] == 0 else "Y"
                     else:
-                        marker_df.loc[i, "Label"] = (
-                            str(int(marker_df.loc[i, "CE_Allele"]))
-                            if ".0" in str(marker_df.loc[i, "CE_Allele"])
-                            else str(marker_df.loc[i, "CE_Allele"])
+                        row["Label"] = (
+                            str(int(row["CE_Allele"]))
+                            if ".0" in str(row["CE_Allele"])
+                            else str(row["CE_Allele"])
                         )
                 ax.bar_label(p, labels=marker_df["Label"])
             if sameyaxis:

From ac814c78b9874bd71923ede6a47a8b01293ca25d Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Wed, 4 Jun 2025 07:16:39 -0400
Subject: [PATCH 20/21] fixed bug

---
 lusSTR/wrappers/filter.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/lusSTR/wrappers/filter.py b/lusSTR/wrappers/filter.py
index 3b3c7981..812713e2 100644
--- a/lusSTR/wrappers/filter.py
+++ b/lusSTR/wrappers/filter.py
@@ -386,12 +386,11 @@ def make_plot(df, sample_id, output_name, kit, sameyaxis=False, filters=False, a
             if not filters:
                 plt.legend(handles, labels, title="Allele Type")
             else:
-                marker_df["Label"] = None
                 for i, row in marker_df.iterrows():
                     if marker == "AMELOGENIN":
-                        row["Label"] = "X" if row["CE_Allele"] == 0 else "Y"
+                        marker_df.loc[i, "Label"] = "X" if row["CE_Allele"] == 0 else "Y"
                     else:
-                        row["Label"] = (
+                        marker_df.loc[i, "Label"] = (
                             str(int(row["CE_Allele"]))
                             if ".0" in str(row["CE_Allele"])
                             else str(row["CE_Allele"])

From ae3c813da8cd606ec82b1c7830553983de57b5bd Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Thu, 5 Jun 2025 12:07:17 -0400
Subject: [PATCH 21/21] simplified convert code

---
 lusSTR/wrappers/convert.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/lusSTR/wrappers/convert.py b/lusSTR/wrappers/convert.py
index 47599c80..49a7062d 100644
--- a/lusSTR/wrappers/convert.py
+++ b/lusSTR/wrappers/convert.py
@@ -70,13 +70,10 @@ def format_table(input, software, kit="forenseq", custom=False):
                 remove_5p = remove_5p - metadata["Custom_5"]
             if metadata["Custom_3"] < 0:
                 remove_3p = remove_3p - metadata["Custom_3"]
-        if (
-            len(sequence) <= (remove_5p + remove_3p + len(metadata["LUS"]))
-            and software != "uas"
-            and locus != "AMELOGENIN"
-        ) or (
-            software != "uas" and locus == "AMELOGENIN" and len(sequence) < (remove_5p + remove_3p)
-        ):
+        locus_min_length = remove_5p + remove_3p + len(metadata["LUS"])
+        if locus == "AMELOGENIN":
+            locus_min_length -= 1
+        if software != "uas" and len(sequence) < locus_min_length:
             flank_summary = [
                 sampleid,
                 project,