-
Notifications
You must be signed in to change notification settings - Fork 0
Implementing lusSTR workflow for Amelogenin locus #85
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
7752853
fb465c4
7a7ad3b
086c7fe
4634d42
992e608
dc557e0
4ce279c
65b878a
01472ee
ebc7fc3
c2929b1
694c980
f089083
cd44d52
1fb7846
eec3ac1
740c5ea
1f03738
ac814c7
ae3c813
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,101 @@ | ||
| { | ||
|
|
||
| "powerseq_strs" : [ | ||
| "AMELOGENIN", | ||
| "CSF1PO", | ||
| "D10S1248", | ||
| "D12S391", | ||
| "D13S317", | ||
| "D16S539", | ||
| "D18S51", | ||
| "D19S433", | ||
| "D1S1656", | ||
| "D21S11", | ||
| "D22S1045", | ||
| "D2S1338", | ||
| "D2S441", | ||
| "D3S1358", | ||
| "D5S818", | ||
| "D7S820", | ||
| "D8S1179", | ||
| "FGA", | ||
| "PENTA D", | ||
| "PENTA E", | ||
| "TH01", | ||
| "TPOX", | ||
| "VWA" | ||
| ], | ||
| "forenseq_strs" : [ | ||
| "AMELOGENIN", | ||
| "CSF1PO", | ||
| "D10S1248", | ||
| "D12S391", | ||
| "D13S317", | ||
| "D16S539", | ||
| "D17S1301", | ||
| "D18S51", | ||
| "D19S433", | ||
| "D1S1656", | ||
| "D20S482", | ||
| "D21S11", | ||
| "D22S1045", | ||
| "D2S1338", | ||
| "D2S441", | ||
| "D3S1358", | ||
| "D4S2408", | ||
| "D5S818", | ||
| "D6S1043", | ||
| "D7S820", | ||
| "D8S1179", | ||
| "D9S1122", | ||
| "FGA", | ||
| "PENTA D", | ||
| "PENTA E", | ||
| "TH01", | ||
| "TPOX", | ||
| "VWA" | ||
| ], | ||
| "powerseq_ystrs" : [ | ||
| "DYS19", | ||
| "DYS385A-B", | ||
| "DYS389II", | ||
| "DYS390", | ||
| "DYS391", | ||
| "DYS392", | ||
| "DYS393", | ||
| "DYS437", | ||
| "DYS438", | ||
| "DYS439", | ||
| "DYS448", | ||
| "DYS456", | ||
| "DYS458", | ||
| "DYS481", | ||
| "DYS533", | ||
| "DYS549", | ||
| "DYS570", | ||
| "DYS576", | ||
| "DYS635", | ||
| "DYS643", | ||
| "Y-GATA-H4" | ||
| ], | ||
| "forenseq_ystrs" : [ | ||
| "DYS19", | ||
| "DYS385A-B", | ||
| "DYS389II", | ||
| "DYS390", | ||
| "DYS391", | ||
| "DYS392", | ||
| "DYS437", | ||
| "DYS438", | ||
| "DYS439", | ||
| "DYS448", | ||
| "DYS481", | ||
| "DYS533", | ||
| "DYS549", | ||
| "DYS570", | ||
| "DYS576", | ||
| "DYS635", | ||
| "DYS643", | ||
| "Y-GATA-H4" | ||
| ] | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,20 +28,57 @@ def get_filter_metadata_file(): | |
|
|
||
| def filters(locus_allele_info, locus, locus_reads, datatype, brack_col): | ||
| metadata = filter_marker_data[locus] | ||
| if len(locus_allele_info) == 1: | ||
| locus_allele_info = single_allele_thresholds(metadata, locus_reads, locus_allele_info) | ||
| if locus == "AMELOGENIN": | ||
| locus_allele_info = filter_amel(metadata, locus_allele_info, locus_reads) | ||
|
Comment on lines
-31
to
+32
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's nothing necessarily wrong with how you've updated this code, but we could keep the nesting complexity to a minimum with something like this, right? if locus == "AMELOGENIN":
# ...
elif len(locus_allele_info) == 1:
# ...
else:
# ...
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was just trying to avoid have to repeat code (line 34 would have to be in both blocks). |
||
| else: | ||
| locus_allele_info, locus_reads = multiple_allele_thresholds( | ||
| metadata, locus_reads, locus_allele_info | ||
| ) | ||
| locus_allele_info = ce_filtering( | ||
| locus_allele_info, locus_reads, metadata, datatype, brack_col | ||
| ) | ||
| if datatype != "ce": | ||
| locus_allele_info = same_size_filter(locus_allele_info, metadata, datatype) | ||
| locus_allele_info["CE_Allele"] = locus_allele_info["CE_Allele"].astype(float) | ||
| if len(locus_allele_info) == 1: | ||
| locus_allele_info = single_allele_thresholds(metadata, locus_reads, locus_allele_info) | ||
| else: | ||
| locus_allele_info, locus_reads = multiple_allele_thresholds( | ||
| metadata, locus_reads, locus_allele_info | ||
| ) | ||
| locus_allele_info = ce_filtering( | ||
| locus_allele_info, locus_reads, metadata, datatype, brack_col | ||
| ) | ||
| if datatype != "ce": | ||
| locus_allele_info = same_size_filter(locus_allele_info, metadata, datatype) | ||
| return locus_allele_info | ||
|
|
||
|
|
||
| def filter_amel(metadata, amel_df, locus_reads): | ||
| for filter in ["Detection", "Analytical"]: | ||
| use = metadata[f"{filter}ThresholdUse"] | ||
| count = metadata[f"{filter}ThresholdStaticCount"] | ||
| perc = metadata[f"{filter}ThresholdDynamicPercent"] | ||
| thresh_perc = round(perc * locus_reads, 1) | ||
| if ( | ||
| use.lower() == "dynamic" | ||
| and locus_reads < metadata["MinimumNumberReadsForDynamicThresholds"] | ||
| ): | ||
| use = "static" | ||
| if use.lower() == "both": | ||
| thresh = thresh_perc if thresh_perc >= count else count | ||
| elif use.lower() == "static": | ||
| thresh = count | ||
| elif use.lower() == "dynamic": | ||
| thresh = thresh_perc | ||
| if filter == "Detection": | ||
| amel_dt = amel_df[amel_df["Reads"] >= thresh].reset_index(drop=True) | ||
| locus_reads = amel_df["Reads"].sum() | ||
| else: | ||
| for i in range(len(amel_dt)): | ||
| al_reads = amel_dt.loc[i, "Reads"] | ||
| if al_reads < thresh: | ||
| amel_dt.loc[i, ["allele_type", "perc_noise"]] = [ | ||
| "BelowAT", | ||
| round(al_reads / locus_reads, 3), | ||
| ] | ||
| else: | ||
| amel_dt.loc[i, "allele_type"] = "Typed" | ||
| return amel_dt | ||
|
|
||
|
|
||
| def single_allele_thresholds(metadata, locus_reads, single_all_df): | ||
| if thresholds("Detection", metadata, locus_reads, single_all_df["Reads"][0])[1] is False: | ||
| single_all_df = pd.DataFrame() | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I feel like this plotly import pattern is an elaborate ruse so people can put "go figure" in their code 😆