diff --git a/moalmanac/datasources/README.md b/datasources/README.md similarity index 100% rename from moalmanac/datasources/README.md rename to datasources/README.md diff --git a/moalmanac/datasources/acmg/README.md b/datasources/acmg/README.md similarity index 100% rename from moalmanac/datasources/acmg/README.md rename to datasources/acmg/README.md diff --git a/moalmanac/datasources/acmg/acmg.secondaryfindings.v3.txt b/datasources/acmg/acmg.secondaryfindings.v3.txt similarity index 100% rename from moalmanac/datasources/acmg/acmg.secondaryfindings.v3.txt rename to datasources/acmg/acmg.secondaryfindings.v3.txt diff --git a/moalmanac/datasources/cancergenecensus/README.md b/datasources/cancergenecensus/README.md similarity index 100% rename from moalmanac/datasources/cancergenecensus/README.md rename to datasources/cancergenecensus/README.md diff --git a/moalmanac/datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv b/datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv similarity index 100% rename from moalmanac/datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv rename to datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv diff --git a/moalmanac/datasources/cancergenecensus/diff_versions.py b/datasources/cancergenecensus/diff_versions.py similarity index 100% rename from moalmanac/datasources/cancergenecensus/diff_versions.py rename to datasources/cancergenecensus/diff_versions.py diff --git a/moalmanac/datasources/cancergenecensus/extract_genes.py b/datasources/cancergenecensus/extract_genes.py similarity index 100% rename from moalmanac/datasources/cancergenecensus/extract_genes.py rename to datasources/cancergenecensus/extract_genes.py diff --git a/moalmanac/datasources/cancerhotspots/README.md b/datasources/cancerhotspots/README.md similarity index 100% rename from moalmanac/datasources/cancerhotspots/README.md rename to datasources/cancerhotspots/README.md diff --git a/moalmanac/datasources/cancerhotspots/hotspots.txt b/datasources/cancerhotspots/hotspots.txt similarity index 100% rename from moalmanac/datasources/cancerhotspots/hotspots.txt rename to datasources/cancerhotspots/hotspots.txt diff --git a/moalmanac/datasources/cancerhotspots/hotspots3d.txt b/datasources/cancerhotspots/hotspots3d.txt similarity index 100% rename from moalmanac/datasources/cancerhotspots/hotspots3d.txt rename to datasources/cancerhotspots/hotspots3d.txt diff --git a/moalmanac/datasources/cancerhotspots/hotspots_v2.txt b/datasources/cancerhotspots/hotspots_v2.txt similarity index 100% rename from moalmanac/datasources/cancerhotspots/hotspots_v2.txt rename to datasources/cancerhotspots/hotspots_v2.txt diff --git a/moalmanac/datasources/cancerhotspots/prep_3dhotspots/3d_hotspots.xls b/datasources/cancerhotspots/prep_3dhotspots/3d_hotspots.xls similarity index 100% rename from moalmanac/datasources/cancerhotspots/prep_3dhotspots/3d_hotspots.xls rename to datasources/cancerhotspots/prep_3dhotspots/3d_hotspots.xls diff --git a/moalmanac/datasources/cancerhotspots/prep_3dhotspots/3d_hotspots_T2.txt b/datasources/cancerhotspots/prep_3dhotspots/3d_hotspots_T2.txt similarity index 100% rename from moalmanac/datasources/cancerhotspots/prep_3dhotspots/3d_hotspots_T2.txt rename to datasources/cancerhotspots/prep_3dhotspots/3d_hotspots_T2.txt diff --git a/moalmanac/datasources/cancerhotspots/prep_3dhotspots/3d_hotspots_T5.txt b/datasources/cancerhotspots/prep_3dhotspots/3d_hotspots_T5.txt similarity index 100% rename from moalmanac/datasources/cancerhotspots/prep_3dhotspots/3d_hotspots_T5.txt rename to datasources/cancerhotspots/prep_3dhotspots/3d_hotspots_T5.txt diff --git a/moalmanac/datasources/cancerhotspots/prep_3dhotspots/prep3dhotspots.py b/datasources/cancerhotspots/prep_3dhotspots/prep3dhotspots.py similarity index 100% rename from moalmanac/datasources/cancerhotspots/prep_3dhotspots/prep3dhotspots.py rename to datasources/cancerhotspots/prep_3dhotspots/prep3dhotspots.py diff --git a/moalmanac/datasources/clinvar/README.md b/datasources/clinvar/README.md similarity index 100% rename from moalmanac/datasources/clinvar/README.md rename to datasources/clinvar/README.md diff --git a/moalmanac/datasources/clinvar/prepare_clinvar.py b/datasources/clinvar/prepare_clinvar.py similarity index 100% rename from moalmanac/datasources/clinvar/prepare_clinvar.py rename to datasources/clinvar/prepare_clinvar.py diff --git a/moalmanac/datasources/clinvar/variant_summary.lite.txt b/datasources/clinvar/variant_summary.lite.txt similarity index 100% rename from moalmanac/datasources/clinvar/variant_summary.lite.txt rename to datasources/clinvar/variant_summary.lite.txt diff --git a/moalmanac/datasources/cosmic/README.md b/datasources/cosmic/README.md similarity index 100% rename from moalmanac/datasources/cosmic/README.md rename to datasources/cosmic/README.md diff --git a/moalmanac/datasources/cosmic/prepare_cosmic.py b/datasources/cosmic/prepare_cosmic.py similarity index 100% rename from moalmanac/datasources/cosmic/prepare_cosmic.py rename to datasources/cosmic/prepare_cosmic.py diff --git a/moalmanac/datasources/exac/README.md b/datasources/exac/README.md similarity index 100% rename from moalmanac/datasources/exac/README.md rename to datasources/exac/README.md diff --git a/moalmanac/datasources/exac/build_exac.sh b/datasources/exac/build_exac.sh similarity index 100% rename from moalmanac/datasources/exac/build_exac.sh rename to datasources/exac/build_exac.sh diff --git a/moalmanac/datasources/exac/expand_exac.py b/datasources/exac/expand_exac.py similarity index 100% rename from moalmanac/datasources/exac/expand_exac.py rename to datasources/exac/expand_exac.py diff --git a/moalmanac/datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt b/datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt similarity index 100% rename from moalmanac/datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt rename to datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt diff --git a/moalmanac/datasources/gsea_gene_sets/README.md b/datasources/gsea_gene_sets/README.md similarity index 100% rename from moalmanac/datasources/gsea_gene_sets/README.md rename to datasources/gsea_gene_sets/README.md diff --git a/moalmanac/datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt b/datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt similarity index 100% rename from moalmanac/datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt rename to datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt diff --git a/moalmanac/datasources/hereditary/README.md b/datasources/hereditary/README.md similarity index 100% rename from moalmanac/datasources/hereditary/README.md rename to datasources/hereditary/README.md diff --git a/moalmanac/datasources/hereditary/hereditary.txt b/datasources/hereditary/hereditary.txt similarity index 100% rename from moalmanac/datasources/hereditary/hereditary.txt rename to datasources/hereditary/hereditary.txt diff --git a/moalmanac/datasources/lawrence/README.md b/datasources/lawrence/README.md similarity index 100% rename from moalmanac/datasources/lawrence/README.md rename to datasources/lawrence/README.md diff --git a/moalmanac/datasources/lawrence/lawrence_ST2.txt b/datasources/lawrence/lawrence_ST2.txt similarity index 100% rename from moalmanac/datasources/lawrence/lawrence_ST2.txt rename to datasources/lawrence/lawrence_ST2.txt diff --git a/moalmanac/datasources/lawrence/lawrence_mapped_ontology.txt b/datasources/lawrence/lawrence_mapped_ontology.txt similarity index 100% rename from moalmanac/datasources/lawrence/lawrence_mapped_ontology.txt rename to datasources/lawrence/lawrence_mapped_ontology.txt diff --git a/moalmanac/datasources/lawrence/map_oncotree_lawrence.ipynb b/datasources/lawrence/map_oncotree_lawrence.ipynb similarity index 100% rename from moalmanac/datasources/lawrence/map_oncotree_lawrence.ipynb rename to datasources/lawrence/map_oncotree_lawrence.ipynb diff --git a/moalmanac/datasources/moalmanac/README.md b/datasources/moalmanac/README.md similarity index 100% rename from moalmanac/datasources/moalmanac/README.md rename to datasources/moalmanac/README.md diff --git a/moalmanac/datasources/moalmanac/create_almanac_db.py b/datasources/moalmanac/create_almanac_db.py similarity index 100% rename from moalmanac/datasources/moalmanac/create_almanac_db.py rename to datasources/moalmanac/create_almanac_db.py diff --git a/moalmanac/datasources/moalmanac/molecular-oncology-almanac.json b/datasources/moalmanac/molecular-oncology-almanac.json similarity index 100% rename from moalmanac/datasources/moalmanac/molecular-oncology-almanac.json rename to datasources/moalmanac/molecular-oncology-almanac.json diff --git a/moalmanac/datasources/oncotree/README.md b/datasources/oncotree/README.md similarity index 100% rename from moalmanac/datasources/oncotree/README.md rename to datasources/oncotree/README.md diff --git a/moalmanac/datasources/oncotree/get_oncotree.py b/datasources/oncotree/get_oncotree.py similarity index 100% rename from moalmanac/datasources/oncotree/get_oncotree.py rename to datasources/oncotree/get_oncotree.py diff --git a/moalmanac/datasources/oncotree/oncotree.2023-03-09.txt b/datasources/oncotree/oncotree.2023-03-09.txt similarity index 100% rename from moalmanac/datasources/oncotree/oncotree.2023-03-09.txt rename to datasources/oncotree/oncotree.2023-03-09.txt diff --git a/moalmanac/datasources/preclinical/README.md b/datasources/preclinical/README.md similarity index 100% rename from moalmanac/datasources/preclinical/README.md rename to datasources/preclinical/README.md diff --git a/moalmanac/datasources/preclinical/almanac-gdsc-mappings.json b/datasources/preclinical/almanac-gdsc-mappings.json similarity index 100% rename from moalmanac/datasources/preclinical/almanac-gdsc-mappings.json rename to datasources/preclinical/almanac-gdsc-mappings.json diff --git a/moalmanac/datasources/preclinical/annotated/README.md b/datasources/preclinical/annotated/README.md similarity index 100% rename from moalmanac/datasources/preclinical/annotated/README.md rename to datasources/preclinical/annotated/README.md diff --git a/moalmanac/datasources/preclinical/annotated/annotate-copy-numbers.py b/datasources/preclinical/annotated/annotate-copy-numbers.py similarity index 100% rename from moalmanac/datasources/preclinical/annotated/annotate-copy-numbers.py rename to datasources/preclinical/annotated/annotate-copy-numbers.py diff --git a/moalmanac/datasources/preclinical/annotated/annotate-fusions.py b/datasources/preclinical/annotated/annotate-fusions.py similarity index 100% rename from moalmanac/datasources/preclinical/annotated/annotate-fusions.py rename to datasources/preclinical/annotated/annotate-fusions.py diff --git a/moalmanac/datasources/preclinical/annotated/annotate-molecular-features.sh b/datasources/preclinical/annotated/annotate-molecular-features.sh similarity index 100% rename from moalmanac/datasources/preclinical/annotated/annotate-molecular-features.sh rename to datasources/preclinical/annotated/annotate-molecular-features.sh diff --git a/moalmanac/datasources/preclinical/annotated/annotate-variants.py b/datasources/preclinical/annotated/annotate-variants.py similarity index 100% rename from moalmanac/datasources/preclinical/annotated/annotate-variants.py rename to datasources/preclinical/annotated/annotate-variants.py diff --git a/moalmanac/datasources/preclinical/annotated/cell-lines.copy-numbers.annotated.txt b/datasources/preclinical/annotated/cell-lines.copy-numbers.annotated.txt similarity index 100% rename from moalmanac/datasources/preclinical/annotated/cell-lines.copy-numbers.annotated.txt rename to datasources/preclinical/annotated/cell-lines.copy-numbers.annotated.txt diff --git a/moalmanac/datasources/preclinical/annotated/cell-lines.fusions.annotated.gene1.txt b/datasources/preclinical/annotated/cell-lines.fusions.annotated.gene1.txt similarity index 100% rename from moalmanac/datasources/preclinical/annotated/cell-lines.fusions.annotated.gene1.txt rename to datasources/preclinical/annotated/cell-lines.fusions.annotated.gene1.txt diff --git a/moalmanac/datasources/preclinical/annotated/cell-lines.fusions.annotated.gene2.txt b/datasources/preclinical/annotated/cell-lines.fusions.annotated.gene2.txt similarity index 100% rename from moalmanac/datasources/preclinical/annotated/cell-lines.fusions.annotated.gene2.txt rename to datasources/preclinical/annotated/cell-lines.fusions.annotated.gene2.txt diff --git a/moalmanac/datasources/preclinical/annotated/cell-lines.fusions.annotated.txt b/datasources/preclinical/annotated/cell-lines.fusions.annotated.txt similarity index 100% rename from moalmanac/datasources/preclinical/annotated/cell-lines.fusions.annotated.txt rename to datasources/preclinical/annotated/cell-lines.fusions.annotated.txt diff --git a/moalmanac/datasources/preclinical/annotated/cell-lines.somatic-variants.annotated.txt b/datasources/preclinical/annotated/cell-lines.somatic-variants.annotated.txt similarity index 100% rename from moalmanac/datasources/preclinical/annotated/cell-lines.somatic-variants.annotated.txt rename to datasources/preclinical/annotated/cell-lines.somatic-variants.annotated.txt diff --git a/moalmanac/datasources/preclinical/cell-lines.pkl b/datasources/preclinical/cell-lines.pkl similarity index 100% rename from moalmanac/datasources/preclinical/cell-lines.pkl rename to datasources/preclinical/cell-lines.pkl diff --git a/moalmanac/datasources/preclinical/formatted/0.map-almanac-to-gdsc.ipynb b/datasources/preclinical/formatted/0.map-almanac-to-gdsc.ipynb similarity index 100% rename from moalmanac/datasources/preclinical/formatted/0.map-almanac-to-gdsc.ipynb rename to datasources/preclinical/formatted/0.map-almanac-to-gdsc.ipynb diff --git a/moalmanac/datasources/preclinical/formatted/1.process-cell-line-molecular-features.ipynb b/datasources/preclinical/formatted/1.process-cell-line-molecular-features.ipynb similarity index 100% rename from moalmanac/datasources/preclinical/formatted/1.process-cell-line-molecular-features.ipynb rename to datasources/preclinical/formatted/1.process-cell-line-molecular-features.ipynb diff --git a/moalmanac/datasources/preclinical/formatted/README.md b/datasources/preclinical/formatted/README.md similarity index 100% rename from moalmanac/datasources/preclinical/formatted/README.md rename to datasources/preclinical/formatted/README.md diff --git a/moalmanac/datasources/preclinical/formatted/almanac-gdsc-mappings.json b/datasources/preclinical/formatted/almanac-gdsc-mappings.json similarity index 100% rename from moalmanac/datasources/preclinical/formatted/almanac-gdsc-mappings.json rename to datasources/preclinical/formatted/almanac-gdsc-mappings.json diff --git a/moalmanac/datasources/preclinical/formatted/cell-line-names.formatted.txt b/datasources/preclinical/formatted/cell-line-names.formatted.txt similarity index 100% rename from moalmanac/datasources/preclinical/formatted/cell-line-names.formatted.txt rename to datasources/preclinical/formatted/cell-line-names.formatted.txt diff --git a/moalmanac/datasources/preclinical/formatted/cell-lines.copy-numbers.txt b/datasources/preclinical/formatted/cell-lines.copy-numbers.txt similarity index 100% rename from moalmanac/datasources/preclinical/formatted/cell-lines.copy-numbers.txt rename to datasources/preclinical/formatted/cell-lines.copy-numbers.txt diff --git a/moalmanac/datasources/preclinical/formatted/cell-lines.fusions.txt b/datasources/preclinical/formatted/cell-lines.fusions.txt similarity index 100% rename from moalmanac/datasources/preclinical/formatted/cell-lines.fusions.txt rename to datasources/preclinical/formatted/cell-lines.fusions.txt diff --git a/moalmanac/datasources/preclinical/formatted/cell-lines.somatic-variants.txt b/datasources/preclinical/formatted/cell-lines.somatic-variants.txt similarity index 100% rename from moalmanac/datasources/preclinical/formatted/cell-lines.somatic-variants.txt rename to datasources/preclinical/formatted/cell-lines.somatic-variants.txt diff --git a/moalmanac/datasources/preclinical/formatted/cell-lines.summary.txt b/datasources/preclinical/formatted/cell-lines.summary.txt similarity index 100% rename from moalmanac/datasources/preclinical/formatted/cell-lines.summary.txt rename to datasources/preclinical/formatted/cell-lines.summary.txt diff --git a/moalmanac/datasources/preclinical/formatted/sanger.gdsc.txt b/datasources/preclinical/formatted/sanger.gdsc.txt similarity index 100% rename from moalmanac/datasources/preclinical/formatted/sanger.gdsc.txt rename to datasources/preclinical/formatted/sanger.gdsc.txt diff --git a/moalmanac/datasources/preclinical/generate-dictionary.ipynb b/datasources/preclinical/generate-dictionary.ipynb similarity index 100% rename from moalmanac/datasources/preclinical/generate-dictionary.ipynb rename to datasources/preclinical/generate-dictionary.ipynb diff --git a/moalmanac/datasources/preclinical/source/README.md b/datasources/preclinical/source/README.md similarity index 100% rename from moalmanac/datasources/preclinical/source/README.md rename to datasources/preclinical/source/README.md diff --git a/moalmanac/datasources/preclinical/source/ccle-2019/README.md b/datasources/preclinical/source/ccle-2019/README.md similarity index 100% rename from moalmanac/datasources/preclinical/source/ccle-2019/README.md rename to datasources/preclinical/source/ccle-2019/README.md diff --git a/moalmanac/datasources/preclinical/source/depmap/README.md b/datasources/preclinical/source/depmap/README.md similarity index 100% rename from moalmanac/datasources/preclinical/source/depmap/README.md rename to datasources/preclinical/source/depmap/README.md diff --git a/moalmanac/datasources/preclinical/source/gdsc/README.md b/datasources/preclinical/source/gdsc/README.md similarity index 100% rename from moalmanac/datasources/preclinical/source/gdsc/README.md rename to datasources/preclinical/source/gdsc/README.md diff --git a/example_output/example_output.actionable.txt b/example_output/example_output.actionable.txt index 863f2f4..909dca8 100644 --- a/example_output/example_output.actionable.txt +++ b/example_output/example_output.actionable.txt @@ -1,22 +1,22 @@ -score_bin sensitive_predictive_implication resistance_predictive_implication prognostic_predictive_implication feature_type feature alteration_type alteration tumor_f total_coverage exac_af exac_common clinvar sensitive_score_bin sensitive_therapy_name sensitive_therapy_strategy sensitive_therapy_type sensitive_oncotree_code sensitive_description sensitive_citation sensitive_url resistance_score_bin resistance_therapy_name resistance_therapy_strategy resistance_therapy_type resistance_oncotree_code resistance_description resistance_citation resistance_url prognostic_score_bin favorable_prognosis prognostic_oncotree_code prognostic_description prognostic_citation prognostic_url number_germline_mutations_in_gene validation_total_coverage validation_tumor_f validation_detection_power feature_display preclinical_efficacy_observed patient_id tumor_sample_barcode normal_sample_barcode -Putatively Actionable FDA-Approved Guideline Guideline Somatic Variant BRAF Missense p.V600E 0.6316 152 1.6e-05 0.0 Putatively Actionable Binimetinib + Encorafenib MEK inhibition + B-RAF inhibition Targeted therapy NSCLC The U.S. Food and Drug Administration (FDA) approved encorafenib in combination with binimetinib for the treatment of adult patients with metastatic non-small cell lung cancer (NSCLC) with BRAF V600E mutation, as detected by an FDA-approved test. Array BioPharma Inc. Braftovi (encorafenib) [package insert]. U.S. Food and Drug Administration website. https://www.accessdata.fda.gov/drugsatfda_docs/label/2023/210496s014lbl.pdf. Revised October 2023. Accessed November 1, 2023. https://www.accessdata.fda.gov/drugsatfda_docs/label/2023/210496s014lbl.pdf Putatively Actionable Panitumumab EGFR inhibition Targeted therapy COADREAD Panitumumab (Vectibix) is not recommended by the National Comprehensive Cancer Network® (NCCN®) as a treatment option for patients with metastatic colorectal cancer, BRAF V600E makes response to panitumumab or cetuximab highly unlikely unless given with a BRAF inhibitor. Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Colon Cancer V.4.2018. © National Comprehensive Cancer Network, Inc. 2018. All rights reserved. Accessed March 20 2019. To view the most recent and complete version of the guideline, go online to NCCN.org. https://www.nccn.org/professionals/physician_gls/pdf/colon_blocks.pdf Putatively Actionable 0 COADREAD BRAF V600E alterations are associated with an unfavorable prognosis in MSI-low and microsatellite-stable patients. Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Colon Cancer V.2.2016. © National Comprehensive Cancer Network, Inc. 2016. All rights reserved. Accessed March 20 2019. To view the most recent and complete version of the guideline, go online to NCCN.org. https://www.nccn.org/professionals/physician_gls/pdf/colon_blocks.pdf 1.0 4.0 0.0 0.161 BRAF p.V600E (Missense) example example_tumor_profile example_normal_profile -Putatively Actionable Guideline Copy Number CDK4 Amplification 0.0 0.0 Putatively Actionable Palbociclib CDK4/6 inhibition Targeted therapy WDLS Palbociclib (Ibrance) is recommended by the National Comprehensive Cancer Network® (NCCN®) as a treatment option for patients with well-differentiated liposarcoma and CDK4 amplification. CDK4 amplification is characteristic of well-differentiated and dedifferentiated liposarcomas, and palbociclib shows activity in this context. Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Soft Tissue Sarcoma V.1.2021. © National Comprehensive Cancer Network, Inc. 2019. All rights reserved. Accessed November 19th, 2019. To view the most recent and complete version of the guideline, go online to NCCN.org. https://www.nccn.org/professionals/physician_gls/pdf/sarcoma_blocks.pdf CDK4 Amplification 1 example -Putatively Actionable Clinical evidence Copy Number CDKN2A Deletion 0.0 0.0 Putatively Actionable Palbociclib CDK4/6 inhibition Targeted therapy ULMS A patient with uterine leiomyosarcoma whose tumor harbored a CDKN2A mutant which inactivated p16INK4a experienced clinical benefit from treatment with palbociclib. Elvin JA, Gay LM, Ort R, et al. Clinical Benefit in Response to Palbociclib Treatment in Refractory Uterine Leiomyosarcomas with a Common CDKN2A Alteration. Oncologist. 2017;22(4):416-421. https://doi.org/10.1634/theoncologist.2016-0310 CDKN2A Deletion 1 example -Putatively Actionable Preclinical Guideline Copy Number TP53 Deletion 0.0 0.0 Putatively Actionable Talazoparib PARP inhibition Targeted therapy OS Response to talazoparib in osteosarcoma cell lines was associated with homologous recombination deficiency in a study of 5 cancer cell lines. Osteosarcoma cell lines MG63 and ZK-58 displayed the highest sensitivity to talazoparib, SaOS-2 and MNNG-HOS displayed intermediate sensitivity, and U2OS cells remained resistant. Cell lines MG63, ZK-58, and MNNG-HOS scored HRD-LOH positive according to a score (Abkevich et al. 2012). MG63 cells harbored copy losses in BAP1, FANCA, and FANCD2 while ZK-58 carried disruptive copy loss in BARD1 and copy gain in FANCD2. SaOS-2 cells harbored copy losses in CHEK2 and TP53 and MNNG-HOS cells have copy loss of ATM and disruptive copy gains in PTEN and FANCD2. The talazoparib-resistant cell line, U2OS, carried a heterozygous BRCA2 copy loss and one intact BRCA2 alelle. Engert F, Kovac M, Baumhoer D, Nathrath M, Fulda S. Osteosarcoma cells with genetic signatures of BRCAness are susceptible to the PARP inhibitor talazoparib alone or in combination with chemotherapeutics. Oncotarget. 2016;8(30):48794-48806. https://www.clinicalkey.com/#!/content/playContent/1-s2.0-S1470204513700494 Putatively Actionable 0 MM Deletion of 17p13 leads to LoH of TP53 and is considered a high-risk feature of multiple myeloma. Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Multiple Myeloma V.2.2016. © National Comprehensive Cancer Network, Inc. 2016. All rights reserved. Accessed November 5 2016. To view the most recent and complete version of the guideline, go online to NCCN.org. https://www.nccn.org/professionals/physician_gls/pdf/myeloma_blocks.pdf 1.0 TP53 Deletion 1 example -Putatively Actionable Clinical evidence Copy Number BRAF Amplification 0.0 0.0 Putatively Actionable Vemurafenib B-RAF inhibition Targeted therapy MEL Amplification of BRAF may predict resistance to RAF inhibition. Wagle, Nikhil, et al. MAP kinase pathway alterations in BRAF-mutant melanoma patients with acquired resistance to combined RAF/MEK inhibition. Cancer discovery 4.1 (2014): 61-68. https://doi.org/10.1158/2159-8290.CD-13-0631 1.0 BRAF Amplification example -Investigate Actionability FDA-Approved Guideline Guideline Microsatellite Stability MSI-High Investigate Actionability Dostarlimab-gxly PD-1/PD-L1 inhibition Immunotherapy UCEC The U.S. Food and Drug Administration (FDA) granted approval to dostarlimab-gxly in combination with carboplatin and paclitaxel, followed by single agent dostarlimab-gxly, for the treatment of adult patients primary advanced or recurrent endometrial cancer that is mismatch repair deficient (dMMR), as determined by an FDA-approved test, or microsatellite-instability-high. GlaxoSmithKline LLC. Jemperli (dostarlimab-gxly) [package insert]. U.S. Food and Drug Administration website. https://www.accessdata.fda.gov/drugsatfda_docs/label/2023/761174s006lbl.pdf. Revised July 2023. Accessed September 6, 2023. https://www.accessdata.fda.gov/drugsatfda_docs/label/2023/761174s006lbl.pdf Investigate Actionability 5-Fluorouracil Thymidylate synthase inhibition Chemotherapy COADREAD 5-Fluorouracil is not recommended by the National Comprehensive Cancer Network® (NCCN®) as a treatment option for patients with MSI-High colorectal cancer. These patients appear to not benefit from, and may be resistant to, 5-fluorouracil therapy. Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Colon Cancer V.2.2016. © National Comprehensive Cancer Network, Inc. 2016. All rights reserved. Accessed November 5 2016. To view the most recent and complete version of the guideline, go online to NCCN.org. https://www.nccn.org/professionals/physician_gls/pdf/colon_blocks.pdf Investigate Actionability 1 COADREAD Patients with MSI-High colorectal cancer often have a favorable prognosis. Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Colon Cancer V.2.2016. © National Comprehensive Cancer Network, Inc. 2016. All rights reserved. Accessed November 5 2016. To view the most recent and complete version of the guideline, go online to NCCN.org. https://www.nccn.org/professionals/physician_gls/pdf/colon_blocks.pdf MSI-High example -Investigate Actionability Guideline Rearrangement COL1A1 Fusion COL1A1--CITED4 0.0 0.0 Investigate Actionability Imatinib PDGF-R inhibition Targeted therapy DFSP Imatinib (Gleevec) is recommended by the National Comprehensive Cancer Network® (NCCN®) as a treatment option for patients with localized or metastatic dermatofibrosarcoma tumors containing t(17;22)(q22;q13). Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Dermatofibrosarcoma V.1.2017. © National Comprehensive Cancer Network, Inc. 2017. All rights reserved. Accessed November 5 2016. To view the most recent and complete version of the guideline, go online to NCCN.org. https://www.nccn.org/professionals/physician_gls/pdf/dfsp_blocks.pdf COL1A1--CITED4 Fusion 0 example -Investigate Actionability Inferential Mutational Signature SBS5 v3.4 0.224 Investigate Actionability Cisplatin Platinum-based chemotherapy Chemotherapy BLCA COSMIC Signature 5 is associated with somatic ERCC2 mutations, which may suggest sensitivity to cisplatin based chemotherapy. Kim J, Mouw KW, Polak P, et al. Somatic ERCC2 mutations are associated with a distinct genomic signature in urothelial tumors. Nat Genet. 2016;48(6):600-606. https://doi.org/10.1038/ng.3557 SBS5 (22%) example -Investigate Actionability Inferential Mutational Signature SBS10b v3.4 0.119 Investigate Actionability Durvalumab PD-1/PD-L1 inhibition Immunotherapy COSMIC Signature 10 is observed in some of the most hypermutant samples and recurrent POLE mutations. POLE mutant tumors are being included along with MMR-deficient tumors in several ongoing trials for sensitivity to immunotherapy. Mouw KW, Goldberg MS, Konstantinopoulos PA, D'Andrea AD. DNA Damage and Repair Biomarkers of Immunotherapy Response Cancer Discov. 2017; 7(7):675-693. https://doi.org/10.1158/2159-8290.CD-17-0226 SBS10b (12%) example -Investigate Actionability Inferential Aneuploidy Whole genome doubling Investigate Actionability 0 WGD was associated with adverse survival pan-cancer in patients with advanced disease and in cancers with heterogeneous clinical outcomes, even following the development of metastasis. Bielski CM, Zehir A, Penson AV, et al. Genome doubling shapes the evolution and prognosis of advanced cancers Nat Genet. 2018; 50(8):1189-1195. https://doi.org/10.1038/s41588-018-0165-1 example -Investigate Actionability FDA-Approved Germline Variant BRCA2 Frameshift p.S1982fs 0.5 100 0.0 0.0 Pathogenic Investigate Actionability Abiraterone acetate + Niraparib Antiandrogen + PARP inhibition Combination therapy PRAD The U.S. Food and Drug Administration (FDA) granted approval to niraparib, a poly (ADP-ribose) polymerase (PARP) inhibitor, and abiraterone acetate, a CYP17 inhibitor indicated with prednisone for the treatment of adult patients with deleterious or suspected deleterious BRCA-mutated (BRCAm) metastatic castration-resistant prostate cancer (mCRPC). Janssen Biotech, Inc. Akeega (niraparib and abiraterone acetate) [package insert]. U.S. Food and Drug Administration website. https://www.accessdata.fda.gov/drugsatfda_docs/label/2023/216793s000lbl.pdf. Revised August 2023. Accessed September 6, 2023. https://www.accessdata.fda.gov/drugsatfda_docs/label/2023/216793s000lbl.pdf BRCA2 p.S1982fs (Frameshift) example example_tumor_profile example_normal_profile -Investigate Actionability Clinical evidence Somatic Variant MSH2 Missense p.D887N 0.8039 51 0.0 0.0 Investigate Actionability Pembrolizumab PD-1/PD-L1 inhibition Immunotherapy COADREAD Patients with defects in DNA mismatch repair genes may have enhanced sensitivity to immune checkpoint blockade. Le DT, Uram JN, Wang H, et al. PD-1 Blockade in Tumors with Mismatch-Repair Deficiency. N Engl J Med. 2015;372(26):2509-20. https://doi.org/10.1056/NEJMoa1500596 0.0 0.0 0.0 MSH2 p.D887N (Missense) example example_tumor_profile example_normal_profile -Investigate Actionability Preclinical Copy Number FGFR2 Deletion 0.0 0.0 Investigate Actionability Infigratinib FGFR inhibition Targeted therapy COADREAD A study of 32 Cancer Cell Line Encyclopedia cell lines demonstrating FGFR1 and FGFR2 amplification demonstrated sensitivity to Infigratinib. Guagnano V, Kauffmann A, Wohrle S, et al. FGFR genetic alterations predict for sensitivity to NVP-BGJ398, a selective pan-FGFR inhibitor. Cancer Discov. 2012;2(12):1118-33. https://doi.org/10.1158/2159-8290.CD-12-0210 FGFR2 Deletion example -Investigate Actionability Guideline Somatic Variant STAG2 Missense p.F467I 0.3571 56 0.0 0.0 Investigate Actionability 0 MDS The National Comprehensive Cancer Network® (NCCN®) highlights STAG2 nonsense, frameshift, and splice site variants as being associated with a poor prognosis in patients with myelodysplastic syndromes. Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Myelodysplastic Syndromes V.2.2023. © National Comprehensive Cancer Network, Inc. 2023. All rights reserved. Accessed November 2, 2023. To view the most recent and complete version of the guideline, go online to NCCN.org. https://www.nccn.org/professionals/physician_gls/pdf/mds_blocks.pdf 27.0 0.0 0.9881 STAG2 p.F467I (Missense) example example_tumor_profile example_normal_profile -Investigate Actionability Guideline Somatic Variant ZRSR2 Missense p.N261I 0.4019 107 0.0 0.0 Investigate Actionability 0 MDS The National Comprehensive Cancer Network® (NCCN®) highlights ZRSR2 nonsense and frameshift variants as being associated with a poor prognosis in patients with myelodysplastic syndromes. Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Myelodysplastic Syndromes V.2.2023. © National Comprehensive Cancer Network, Inc. 2023. All rights reserved. Accessed November 2, 2023. To view the most recent and complete version of the guideline, go online to NCCN.org. https://www.nccn.org/professionals/physician_gls/pdf/mds_blocks.pdf 29.0 0.0 0.9987 ZRSR2 p.N261I (Missense) example example_tumor_profile example_normal_profile -Biologically Relevant Somatic Variant NTRK2 Missense p.H300Y 0.6136 44 0.0 0.0 17.0 0.0 0.9982 NTRK2 p.H300Y (Missense) example example_tumor_profile example_normal_profile -Biologically Relevant Somatic Variant PDGFRB Missense p.G674E 0.4 40 0.0 0.0 222.0 0.0 1.0 PDGFRB p.G674E (Missense) example example_tumor_profile example_normal_profile -Biologically Relevant Germline Variant BRAF Nonsense p.R509* 0.5 100 0.0 0.0 BRAF p.R509* (Nonsense) example example_tumor_profile example_normal_profile -Biologically Relevant Microsatellite Stability Supporting variants PRDM2 p.E282del (Deletion) Supporting variants: PRDM2 p.E282del (Deletion) example -Biologically Relevant Mutational Signature SBS7a v3.4 0.424 SBS7a (42%) example -Biologically Relevant Mutational Signature SBS30 v3.4 0.231 SBS30 (23%) example +score_bin sensitive_predictive_implication resistance_predictive_implication prognostic_predictive_implication feature_type feature alteration_type alteration tumor_f total_coverage exac_af exac_common clinvar sensitive_score_bin sensitive_therapy_name sensitive_therapy_strategy sensitive_therapy_type sensitive_oncotree_code sensitive_description sensitive_citation sensitive_url resistance_score_bin resistance_therapy_name resistance_therapy_strategy resistance_therapy_type resistance_oncotree_code resistance_description resistance_citation resistance_url prognostic_score_bin favorable_prognosis prognostic_oncotree_code prognostic_description prognostic_citation prognostic_url number_germline_mutations_in_gene validation_total_coverage validation_tumor_f validation_detection_power feature_display preclinical_efficacy_observed patient_id tumor_sample_barcode normal_sample_barcode +Putatively Actionable FDA-Approved Guideline Guideline Somatic Variant BRAF Missense p.V600E 0.6316 152 1.60E-05 0 Putatively Actionable Binimetinib + Encorafenib MEK inhibition + B-RAF inhibition Targeted therapy NSCLC "The U.S. Food and Drug Administration (FDA) approved encorafenib in combination with binimetinib for the treatment of adult patients with metastatic non-small cell lung cancer (NSCLC) with BRAF V600E mutation, as detected by an FDA-approved test." "Array BioPharma Inc. Braftovi (encorafenib) [package insert]. U.S. Food and Drug Administration website. https://www.accessdata.fda.gov/drugsatfda_docs/label/2023/210496s014lbl.pdf. Revised October 2023. Accessed November 1, 2023." https://www.accessdata.fda.gov/drugsatfda_docs/label/2023/210496s014lbl.pdf Putatively Actionable Panitumumab EGFR inhibition Targeted therapy COADREAD "Panitumumab (Vectibix) is not recommended by the National Comprehensive Cancer Network® (NCCN®) as a treatment option for patients with metastatic colorectal cancer, BRAF V600E makes response to panitumumab or cetuximab highly unlikely unless given with a BRAF inhibitor. " "Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Colon Cancer V.4.2018. © National Comprehensive Cancer Network, Inc. 2018. All rights reserved. Accessed March 20 2019. To view the most recent and complete version of the guideline, go online to NCCN.org." https://www.nccn.org/professionals/physician_gls/pdf/colon_blocks.pdf Putatively Actionable 0 COADREAD BRAF V600E alterations are associated with an unfavorable prognosis in MSI-low and microsatellite-stable patients. "Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Colon Cancer V.2.2016. © National Comprehensive Cancer Network, Inc. 2016. All rights reserved. Accessed March 20 2019. To view the most recent and complete version of the guideline, go online to NCCN.org." https://www.nccn.org/professionals/physician_gls/pdf/colon_blocks.pdf 1 4 0 0.161 BRAF p.V600E (Missense) example example_tumor_profile example_normal_profile +Putatively Actionable Guideline Copy Number CDK4 Amplification 0 0 Putatively Actionable Palbociclib CDK4/6 inhibition Targeted therapy WDLS "Palbociclib (Ibrance) is recommended by the National Comprehensive Cancer Network® (NCCN®) as a treatment option for patients with well-differentiated liposarcoma and CDK4 amplification. CDK4 amplification is characteristic of well-differentiated and dedifferentiated liposarcomas, and palbociclib shows activity in this context." "Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Soft Tissue Sarcoma V.1.2021. © National Comprehensive Cancer Network, Inc. 2019. All rights reserved. Accessed November 19th, 2019. To view the most recent and complete version of the guideline, go online to NCCN.org." https://www.nccn.org/professionals/physician_gls/pdf/sarcoma_blocks.pdf CDK4 Amplification example +Putatively Actionable Clinical evidence Copy Number CDKN2A Deletion 0 0 Putatively Actionable Palbociclib CDK4/6 inhibition Targeted therapy ULMS A patient with uterine leiomyosarcoma whose tumor harbored a CDKN2A mutant which inactivated p16INK4a experienced clinical benefit from treatment with palbociclib. "Elvin JA, Gay LM, Ort R, et al. Clinical Benefit in Response to Palbociclib Treatment in Refractory Uterine Leiomyosarcomas with a Common CDKN2A Alteration. Oncologist. 2017;22(4):416-421." https://doi.org/10.1634/theoncologist.2016-0310 CDKN2A Deletion example +Putatively Actionable Preclinical Guideline Copy Number TP53 Deletion 0 0 Putatively Actionable Talazoparib PARP inhibition Targeted therapy OS "Response to talazoparib in osteosarcoma cell lines was associated with homologous recombination deficiency in a study of 5 cancer cell lines. Osteosarcoma cell lines MG63 and ZK-58 displayed the highest sensitivity to talazoparib, SaOS-2 and MNNG-HOS displayed intermediate sensitivity, and U2OS cells remained resistant. Cell lines MG63, ZK-58, and MNNG-HOS scored HRD-LOH positive according to a score (Abkevich et al. 2012). MG63 cells harbored copy losses in BAP1, FANCA, and FANCD2 while ZK-58 carried disruptive copy loss in BARD1 and copy gain in FANCD2. SaOS-2 cells harbored copy losses in CHEK2 and TP53 and MNNG-HOS cells have copy loss of ATM and disruptive copy gains in PTEN and FANCD2. The talazoparib-resistant cell line, U2OS, carried a heterozygous BRCA2 copy loss and one intact BRCA2 alelle." "Engert F, Kovac M, Baumhoer D, Nathrath M, Fulda S. Osteosarcoma cells with genetic signatures of BRCAness are susceptible to the PARP inhibitor talazoparib alone or in combination with chemotherapeutics. Oncotarget. 2016;8(30):48794-48806." https://www.clinicalkey.com/#!/content/playContent/1-s2.0-S1470204513700494 Putatively Actionable 0 MM Deletion of 17p13 leads to LoH of TP53 and is considered a high-risk feature of multiple myeloma. "Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Multiple Myeloma V.2.2016. © National Comprehensive Cancer Network, Inc. 2016. All rights reserved. Accessed November 5 2016. To view the most recent and complete version of the guideline, go online to NCCN.org." https://www.nccn.org/professionals/physician_gls/pdf/myeloma_blocks.pdf 1 TP53 Deletion example +Putatively Actionable Clinical evidence Copy Number BRAF Amplification 0 0 Putatively Actionable Vemurafenib B-RAF inhibition Targeted therapy MEL Amplification of BRAF may predict resistance to RAF inhibition. "Wagle, Nikhil, et al. MAP kinase pathway alterations in BRAF-mutant melanoma patients with acquired resistance to combined RAF/MEK inhibition. Cancer discovery 4.1 (2014): 61-68." https://doi.org/10.1158/2159-8290.CD-13-0631 1 BRAF Amplification example +Investigate Actionability FDA-Approved Guideline Guideline Microsatellite Stability MSI-High Investigate Actionability Dostarlimab-gxly PD-1/PD-L1 inhibition Immunotherapy UCEC "The U.S. Food and Drug Administration (FDA) granted approval to dostarlimab-gxly in combination with carboplatin and paclitaxel, followed by single agent dostarlimab-gxly, for the treatment of adult patients primary advanced or recurrent endometrial cancer that is mismatch repair deficient (dMMR), as determined by an FDA-approved test, or microsatellite-instability-high." "GlaxoSmithKline LLC. Jemperli (dostarlimab-gxly) [package insert]. U.S. Food and Drug Administration website. https://www.accessdata.fda.gov/drugsatfda_docs/label/2023/761174s006lbl.pdf. Revised July 2023. Accessed September 6, 2023." https://www.accessdata.fda.gov/drugsatfda_docs/label/2023/761174s006lbl.pdf Investigate Actionability 5-Fluorouracil Thymidylate synthase inhibition Chemotherapy COADREAD "5-Fluorouracil is not recommended by the National Comprehensive Cancer Network® (NCCN®) as a treatment option for patients with MSI-High colorectal cancer. These patients appear to not benefit from, and may be resistant to, 5-fluorouracil therapy." "Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Colon Cancer V.2.2016. © National Comprehensive Cancer Network, Inc. 2016. All rights reserved. Accessed November 5 2016. To view the most recent and complete version of the guideline, go online to NCCN.org." https://www.nccn.org/professionals/physician_gls/pdf/colon_blocks.pdf Investigate Actionability 1 COADREAD Patients with MSI-High colorectal cancer often have a favorable prognosis. "Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Colon Cancer V.2.2016. © National Comprehensive Cancer Network, Inc. 2016. All rights reserved. Accessed November 5 2016. To view the most recent and complete version of the guideline, go online to NCCN.org." https://www.nccn.org/professionals/physician_gls/pdf/colon_blocks.pdf MSI-High example +Investigate Actionability Guideline Rearrangement COL1A1 Fusion COL1A1--CITED4 0 0 Investigate Actionability Imatinib PDGF-R inhibition Targeted therapy DFSP Imatinib (Gleevec) is recommended by the National Comprehensive Cancer Network® (NCCN®) as a treatment option for patients with localized or metastatic dermatofibrosarcoma tumors containing t(17;22)(q22;q13). "Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Dermatofibrosarcoma V.1.2017. © National Comprehensive Cancer Network, Inc. 2017. All rights reserved. Accessed November 5 2016. To view the most recent and complete version of the guideline, go online to NCCN.org." https://www.nccn.org/professionals/physician_gls/pdf/dfsp_blocks.pdf COL1A1--CITED4 Fusion example +Investigate Actionability Inferential Mutational Signature SBS5 v3.4 0.224 Investigate Actionability Cisplatin Platinum-based chemotherapy Chemotherapy BLCA "COSMIC Signature 5 is associated with somatic ERCC2 mutations, which may suggest sensitivity to cisplatin based chemotherapy." "Kim J, Mouw KW, Polak P, et al. Somatic ERCC2 mutations are associated with a distinct genomic signature in urothelial tumors. Nat Genet. 2016;48(6):600-606." https://doi.org/10.1038/ng.3557 SBS5 (22%) example +Investigate Actionability Inferential Mutational Signature SBS10b v3.4 0.119 Investigate Actionability Durvalumab PD-1/PD-L1 inhibition Immunotherapy COSMIC Signature 10 is observed in some of the most hypermutant samples and recurrent POLE mutations. POLE mutant tumors are being included along with MMR-deficient tumors in several ongoing trials for sensitivity to immunotherapy. "Mouw KW, Goldberg MS, Konstantinopoulos PA, D'Andrea AD. DNA Damage and Repair Biomarkers of Immunotherapy Response Cancer Discov. 2017; 7(7):675-693." https://doi.org/10.1158/2159-8290.CD-17-0226 SBS10b (12%) example +Investigate Actionability Inferential Aneuploidy Whole genome doubling Investigate Actionability 0 "WGD was associated with adverse survival pan-cancer in patients with advanced disease and in cancers with heterogeneous clinical outcomes, even following the development of metastasis." "Bielski CM, Zehir A, Penson AV, et al. Genome doubling shapes the evolution and prognosis of advanced cancers Nat Genet. 2018; 50(8):1189-1195." https://doi.org/10.1038/s41588-018-0165-1 example +Investigate Actionability FDA-Approved Germline Variant BRCA2 Frameshift p.S1982fs 0.5 100 0 0 Pathogenic Investigate Actionability Abiraterone acetate + Niraparib Antiandrogen + PARP inhibition Combination therapy PRAD "The U.S. Food and Drug Administration (FDA) granted approval to niraparib, a poly (ADP-ribose) polymerase (PARP) inhibitor, and abiraterone acetate, a CYP17 inhibitor indicated with prednisone for the treatment of adult patients with deleterious or suspected deleterious BRCA-mutated (BRCAm) metastatic castration-resistant prostate cancer (mCRPC)." "Janssen Biotech, Inc. Akeega (niraparib and abiraterone acetate) [package insert]. U.S. Food and Drug Administration website. https://www.accessdata.fda.gov/drugsatfda_docs/label/2023/216793s000lbl.pdf. Revised August 2023. Accessed September 6, 2023." https://www.accessdata.fda.gov/drugsatfda_docs/label/2023/216793s000lbl.pdf BRCA2 p.S1982fs (Frameshift) example example_tumor_profile example_normal_profile +Investigate Actionability Clinical evidence Somatic Variant MSH2 Missense p.D887N 0.8039 51 0 0 Investigate Actionability Pembrolizumab PD-1/PD-L1 inhibition Immunotherapy COADREAD Patients with defects in DNA mismatch repair genes may have enhanced sensitivity to immune checkpoint blockade. "Le DT, Uram JN, Wang H, et al. PD-1 Blockade in Tumors with Mismatch-Repair Deficiency. N Engl J Med. 2015;372(26):2509-20." https://doi.org/10.1056/NEJMoa1500596 0 0 0 MSH2 p.D887N (Missense) example example_tumor_profile example_normal_profile +Investigate Actionability Preclinical Copy Number FGFR2 Deletion 0 0 Investigate Actionability Infigratinib FGFR inhibition Targeted therapy COADREAD A study of 32 Cancer Cell Line Encyclopedia cell lines demonstrating FGFR1 and FGFR2 amplification demonstrated sensitivity to Infigratinib. "Guagnano V, Kauffmann A, Wohrle S, et al. FGFR genetic alterations predict for sensitivity to NVP-BGJ398, a selective pan-FGFR inhibitor. Cancer Discov. 2012;2(12):1118-33." https://doi.org/10.1158/2159-8290.CD-12-0210 FGFR2 Deletion example +Investigate Actionability Guideline Somatic Variant STAG2 Missense p.F467I 0.3571 56 0 0 Investigate Actionability 0 MDS "The National Comprehensive Cancer Network® (NCCN®) highlights STAG2 nonsense, frameshift, and splice site variants as being associated with a poor prognosis in patients with myelodysplastic syndromes." "Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Myelodysplastic Syndromes V.2.2023. © National Comprehensive Cancer Network, Inc. 2023. All rights reserved. Accessed November 2, 2023. To view the most recent and complete version of the guideline, go online to NCCN.org." https://www.nccn.org/professionals/physician_gls/pdf/mds_blocks.pdf 27 0 0.9881 STAG2 p.F467I (Missense) example example_tumor_profile example_normal_profile +Investigate Actionability Guideline Somatic Variant ZRSR2 Missense p.N261I 0.4019 107 0 0 Investigate Actionability 0 MDS The National Comprehensive Cancer Network® (NCCN®) highlights ZRSR2 nonsense and frameshift variants as being associated with a poor prognosis in patients with myelodysplastic syndromes. "Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Myelodysplastic Syndromes V.2.2023. © National Comprehensive Cancer Network, Inc. 2023. All rights reserved. Accessed November 2, 2023. To view the most recent and complete version of the guideline, go online to NCCN.org." https://www.nccn.org/professionals/physician_gls/pdf/mds_blocks.pdf 29 0 0.9987 ZRSR2 p.N261I (Missense) example example_tumor_profile example_normal_profile +Biologically Relevant Somatic Variant NTRK2 Missense p.H300Y 0.6136 44 0 0 17 0 0.9982 NTRK2 p.H300Y (Missense) example example_tumor_profile example_normal_profile +Biologically Relevant Somatic Variant PDGFRB Missense p.G674E 0.4 40 0 0 222 0 1 PDGFRB p.G674E (Missense) example example_tumor_profile example_normal_profile +Biologically Relevant Germline Variant BRAF Nonsense p.R509* 0.5 100 0 0 BRAF p.R509* (Nonsense) example example_tumor_profile example_normal_profile +Biologically Relevant Microsatellite Stability Supporting variants PRDM2 p.E282del (Deletion) Supporting variants: PRDM2 p.E282del (Deletion) example +Biologically Relevant Mutational Signature SBS7a v3.4 0.424 SBS7a (42%) example +Biologically Relevant Mutational Signature SBS30 v3.4 0.231 SBS30 (23%) example \ No newline at end of file diff --git a/moalmanac/README.md b/moalmanac/README.md index a65e169..72d6cd1 100644 --- a/moalmanac/README.md +++ b/moalmanac/README.md @@ -2,11 +2,13 @@ Molecular Oncology Almanac can be run by executing either `moalmanac.py` with [standard input formats](#standard-usage) or `simplified_input.py` with [simplified inputs](#simplified-input). Please follow the [installation instructions](../README.md#installation) before use. ## Standard usage -Molecular Oncology Almanac may be executed on any combination of input data but does require a patient_id to label output files. Additional settings can be set by modifying the [config.ini](#configini) file and column names may be modified by editing the [colnames.ini](#colnamesini) file. +Molecular Oncology Almanac may be executed on any combination of input data but does require a patient_id to label output files. Additional settings can be set by modifying the [config.ini](#configini) file and column names may be modified by editing the [colnames.ini](#colnamesini) file. The [config.ini](config.ini) and [annotation-databases.ini](annotation-databases.ini) must be passed as arguments to moalmanac.py. Required arguments: ``` --patient_id patient identifier + --config ini file that contains configuration details, config.ini + --dbs ini file that contains database paths for annotation, annotation-databases.ini ``` Optional arguments: @@ -28,6 +30,7 @@ Optional arguments: --disable_matchmaking remove patient-to-cell line matchmaking from report --description description of patient --output-directory specify location of produced outputs + --preclinical-dbs path to preclinical-databases.ini file ``` Example: @@ -46,8 +49,11 @@ python moalmanac.py \ --validation_handle "../example_data/example_patient.rna.somatic.snvs.maf" \ --purity 0.85 \ --ploidy 4.02 \ - --ms_status "msih" - --wgd + --ms_status "msih" \ + --wgd \ + --config config.ini \ + --dbs annotation-databases.ini \ + --preclinical-dbs preclinical-databases.ini ``` These example inputs may also be processed by executing `run_example.py`. @@ -85,6 +91,8 @@ This is also described in the [description of inputs](../docs/description-of-inp Required arguments: ``` --patient_id patient identifier + --config ini file that contains configuration details, config.ini + --dbs ini file that contains database paths for annotation, annotation-databases.ini ``` Optional arguments: @@ -98,6 +106,7 @@ Optional arguments: --wgd specify the occurence of whole genome duplication --description description of patient --output-directory specify location of produced outputs + --preclinical-dbs path to preclinical-databases.ini file ``` Example: @@ -110,8 +119,11 @@ python simplified_input.py \ --input "../example_data/example_patient.simplified_input.txt" \ --purity 0.85 \ --ploidy 4.02 \ - --ms_status "msih" - --wgd + --ms_status "msih" \ + --wgd \ + --config config.ini \ + --dbs annotation-databases.ini \ + --preclinical-dbs preclinical-databases.ini ``` ## Configuration @@ -128,8 +140,6 @@ The configuration file [config.ini](config.ini) lets users change settings, thre - `signatures` allows users to specify the minimum required contribution to consider mutational signatures - `validation_sequencing` allows users to specify minimum power and allelic fraction to consider for variants from validation sequencing - `feature_types` allows users to specify strings for considered feature types -- `databases` specifies file paths for databases used for annotation, found in the `moalmanac/databases/` folder -- `preclinical` specifies file paths for datasources used for preclinical functions, [model_similarity](../docs/description-of-outputs.md#profile-to-cell-line-matchmaking) and [preclinical efficacy](../docs/description-of-outputs.md#preclinical-efficacy) ### colnames.ini The configuration file [colnames.ini](colnames.ini) lets users change strings associated with column names for input and output files. The file contains the following relevant sections, @@ -137,6 +147,12 @@ The configuration file [colnames.ini](colnames.ini) lets users change strings as Other sections in this configuration file are used internally to MOAlmanac for processing. +### annotation-databases.ini +The configuration file [annotation-databases.ini](annotation-databases.ini) lets users change paths to datasources being used to annotate genomic variants within the algorithm. This file contains a single `databases` section, with either a relative or absolute path being set to the `root` variable. By default, this points to the [datasources/](../datasources/) folder in the root directory of this repository. + +### preclinical-databases.ini +Similar to `annotation-databases.ini`, the configuration file [preclinical-databases.ini](preclinical-databases.ini) lets users change paths to datasources being used to for preclinical comparison functions, [model_similarity](../docs/description-of-outputs.md#profile-to-cell-line-matchmaking) and [preclinical efficacy](../docs/description-of-outputs.md#preclinical-efficacy). + ## Citation If you find this tool or any code herein useful, please cite: > [Reardon, B., Moore, N.D., Moore, N.S., *et al*. Integrating molecular profiles into clinical frameworks through the Molecular Oncology Almanac to prospectively guide precision oncology. *Nat Cancer* (2021). https://doi.org/10.1038/s43018-021-00243-3](https://www.nature.com/articles/s43018-021-00243-3) diff --git a/moalmanac/annotation-databases.ini b/moalmanac/annotation-databases.ini new file mode 100644 index 0000000..200f099 --- /dev/null +++ b/moalmanac/annotation-databases.ini @@ -0,0 +1,15 @@ +[databases] +root = ../datasources +almanac_handle = ${root}/moalmanac/molecular-oncology-almanac.json +cancerhotspots_handle = ${root}/cancerhotspots/hotspots_v2.txt +3dcancerhotspots_handle = ${root}/cancerhotspots/hotspots3d.txt +cgc_handle = ${root}/cancergenecensus/cancer_gene_census_v97.genes.tsv +cosmic_handle = ${root}/cosmic/CosmicMutantExport_v97.lite.txt +gsea_pathways_handle = ${root}/gsea_gene_sets/GSEA_cancer_gene_sets.txt +gsea_modules_handle = ${root}/gsea_gene_sets/c4.cm.v6.0.symbols.txt +exac_handle = ${root}/exac/exac.expanded.r1.txt +acmg_handle = ${root}/acmg/acmg.secondaryfindings.v3.txt +clinvar_handle = ${root}/clinvar/variant_summary.lite.txt +hereditary_handle = ${root}/hereditary/hereditary.txt +oncotree_handle = ${root}/oncotree/oncotree.2023-03-09.txt +lawrence_handle = ${root}/lawrence/lawrence_mapped_ontology.txt \ No newline at end of file diff --git a/moalmanac/annotator.py b/moalmanac/annotator.py index 2c1bf90..51b181e 100644 --- a/moalmanac/annotator.py +++ b/moalmanac/annotator.py @@ -8,9 +8,6 @@ import features from config import COLNAMES -from config import CONFIG - -EXAC_CONFIG = CONFIG['exac'] class Annotator: @@ -45,28 +42,28 @@ def annotate(cls, df, dbs, importer, bin_name, comparison_columns): return df @classmethod - def annotate_almanac(cls, df, dbs, ontology): + def annotate_almanac(cls, df, dbs, ontology, config): df[cls.score_bin] = cls.preallocate_bin(cls.score_bin, df.index) - df = Almanac.annotate(df, dbs, ontology) + df = Almanac.annotate(df, dbs, ontology, config) return df @classmethod - def annotate_germline(cls, df, dbs, ontology): + def annotate_germline(cls, df, dbs, ontology, config): df[cls.score_bin] = cls.preallocate_bin(cls.score_bin, df.index) - df = Almanac.annotate(df, dbs, ontology) + df = Almanac.annotate(df, dbs, ontology, config) df = CancerHotspots.annotate(df, dbs) df = CancerGeneCensus.annotate(df, dbs) df = ACMG.annotate(df, dbs) df = ClinVar.annotate(df, dbs) df = Hereditary.annotate(df, dbs) - df = ExACExtended.annotate(df, dbs) + df = ExACExtended.annotate(df, dbs, config) df = MSI.annotate(df) return df @classmethod - def annotate_simple(cls, df, dbs, ontology): + def annotate_simple(cls, df, dbs, ontology, config): df[cls.score_bin] = cls.preallocate_bin(cls.score_bin, df.index) - df = Almanac.annotate(df, dbs, ontology) + df = Almanac.annotate(df, dbs, ontology, config) df = CancerHotspots.annotate(df, dbs) df = CancerHotspots3D.annotate(df, dbs) df = CancerGeneCensus.annotate(df, dbs) @@ -79,23 +76,23 @@ def annotate_simple(cls, df, dbs, ontology): return df @classmethod - def annotate_somatic(cls, df, dbs, ontology): + def annotate_somatic(cls, df, dbs, ontology, config): df[cls.score_bin] = cls.preallocate_bin(cls.score_bin, df.index) - df = Almanac.annotate(df, dbs, ontology) + df = Almanac.annotate(df, dbs, ontology, config) df = CancerHotspots.annotate(df, dbs) df = CancerHotspots3D.annotate(df, dbs) df = CancerGeneCensus.annotate(df, dbs) df = Cosmic.annotate(df, dbs) df = GSEACancerPathways.annotate(df, dbs) df = GSEACancerModules.annotate(df, dbs) - df = ExAC.annotate(df, dbs) + df = ExAC.annotate(df, dbs, config) df = MSI.annotate(df) return df @classmethod - def annotate_somatic_no_exac(cls, df, dbs, ontology): + def annotate_somatic_no_exac(cls, df, dbs, ontology, config): df[cls.score_bin] = cls.preallocate_bin(cls.score_bin, df.index) - df = Almanac.annotate(df, dbs, ontology) + df = Almanac.annotate(df, dbs, ontology, config) df = CancerHotspots.annotate(df, dbs) df = CancerHotspots3D.annotate(df, dbs) df = CancerGeneCensus.annotate(df, dbs) @@ -381,32 +378,21 @@ class Almanac: } } - feature_types_section = 'feature_types' - feature_types_config = CONFIG[feature_types_section] - aneuploidy = feature_types_config['aneuploidy'] - burden = feature_types_config['burden'] - copynumber_variant = feature_types_config['cna'] - fusion = feature_types_config['fusion'] - germline_variant = feature_types_config['germline'] - microsatellite_status = feature_types_config['microsatellite'] - signature = feature_types_config['signature'] - somatic_variant = feature_types_config['mut'] - @classmethod - def annotate(cls, df, dbs, ontology): + def annotate(cls, df, dbs, ontology, config): db = datasources.Almanac.import_ds(dbs) ds = db['content'] list_genes = db['genes'] annotation_function_dict = { - cls.aneuploidy: cls.annotate_aneuploidy, - cls.burden: cls.annotate_burden, - cls.copynumber_variant: cls.annotate_copy_number, - cls.fusion: cls.annotate_fusion, - cls.germline_variant: cls.annotate_variants, - cls.microsatellite_status: cls.annotate_microsatellite_stability, - cls.signature: cls.annotate_signatures, - cls.somatic_variant: cls.annotate_variants + config['feature_types']['aneuploidy']: cls.annotate_aneuploidy, + config['feature_types']['burden']: cls.annotate_burden, + config['feature_types']['cna']: cls.annotate_copy_number, + config['feature_types']['fusion']: cls.annotate_fusion, + config['feature_types']['germline']: cls.annotate_variants, + config['feature_types']['microsatellite']: cls.annotate_microsatellite_stability, + config['feature_types']['signature']: cls.annotate_signatures, + config['feature_types']['mut']: cls.annotate_variants } for feature_type, group in df.groupby(cls.feature_type): @@ -422,7 +408,13 @@ def annotate(cls, df, dbs, ontology): .astype(float) ) - if feature_type in [cls.somatic_variant, cls.germline_variant, cls.copynumber_variant, cls.fusion]: + simple_biomarkers = [ + config['feature_types']['mut'], + config['feature_types']['germline'], + config['feature_types']['cna'], + config['feature_types']['fusion'] + ] + if feature_type in simple_biomarkers: idx = group[cls.feature].isin(list_genes) df.loc[group[~idx].index, cls.bin_name] = 0 group = group[group[cls.feature].isin(list_genes)] @@ -981,18 +973,15 @@ class ExAC: af = datasources.ExAC.af bin_name = Annotator.exac_common_bin - exac_common_threshold = EXAC_CONFIG['exac_common_af_threshold'] str_columns = [chr, ref, alt] int_columns = [start] - somatic = CONFIG['feature_types']['mut'] - germline = CONFIG['feature_types']['germline'] feature_type = features.Features.feature_type @classmethod - def append_exac_af(cls, df, ds, ds_columns): - variants, not_variants = cls.subset_for_variants(df) + def append_exac_af(cls, df, ds, ds_columns, variant_biomarker_types): + variants, not_variants = cls.subset_for_variants(df, variant_biomarker_types) ds = ds.loc[:, ds_columns] for column, data_type in [(cls.str_columns, str), (cls.int_columns, float), (cls.int_columns, int)]: @@ -1017,15 +1006,24 @@ def append_exac_af(cls, df, ds, ds_columns): return result @classmethod - def annotate(cls, df, dbs): + def annotate(cls, df, dbs, config): df_dropped = cls.drop_existing_columns(df) ds = datasources.ExAC.import_ds(dbs) - df_annotated = cls.append_exac_af(df_dropped, ds, [cls.chr, cls.start, cls.ref, cls.alt, cls.af]) - df_annotated[cls.bin_name] = cls.annotate_common_af(df_annotated[cls.af]) + df_annotated = cls.append_exac_af( + df=df_dropped, + ds=ds, + ds_columns=[cls.chr, cls.start, cls.ref, cls.alt, cls.af], + variant_biomarker_types=[config['feature_types']['mut'], config['feature_types']['germline']] + ) + common_allele_frequency_threshold=config['exac']['exac_common_af_threshold'] + df_annotated[cls.bin_name] = cls.annotate_common_af( + series_exac_af=df_annotated[cls.af], + threshold=common_allele_frequency_threshold + ) return features.Features.preallocate_missing_columns(df_annotated) @classmethod - def annotate_common_af(cls, series_exac_af): + def annotate_common_af(cls, series_exac_af, threshold): if not series_exac_af.empty: series = pd.Series(float(0.0), index=series_exac_af.index.tolist()) condition = (series_exac_af @@ -1034,7 +1032,7 @@ def annotate_common_af(cls, series_exac_af): .astype(float).mean(axis=1) .fillna(0.0) ) - idx = condition.astype(float) >= float(cls.exac_common_threshold) + idx = condition.astype(float) >= float(threshold) series[idx] = float(1.0) return series else: @@ -1049,8 +1047,8 @@ def format_columns(cls, dataframe, column, data_type): return dataframe.loc[dataframe.index, column].astype(data_type) @classmethod - def subset_for_variants(cls, dataframe): - idx = dataframe[cls.feature_type].isin([cls.somatic, cls.germline]) + def subset_for_variants(cls, dataframe, variant_biomarker_types): + idx = dataframe[cls.feature_type].isin(variant_biomarker_types) return dataframe[idx].copy(), dataframe[~idx].copy() @@ -1083,11 +1081,20 @@ class ExACExtended: an_afr, an_amr, an_eas, an_fin, an_nfe, an_sas, an_oth] @classmethod - def annotate(cls, df, dbs): + def annotate(cls, df, dbs, config): df_dropped = ExAC.drop_existing_columns(df) ds = datasources.ExACExtended.import_ds(dbs) - df_annotated = ExAC.append_exac_af(df_dropped, ds, cls.ds_columns) - df_annotated[ExAC.bin_name] = ExAC.annotate_common_af(df_annotated[ExAC.af]) + df_annotated = ExAC.append_exac_af( + df=df_dropped, + ds=ds, + ds_columns=cls.ds_columns, + variant_biomarker_types=[config['feature_types']['mut'], config['feature_types']['germline']] + ) + common_allele_frequency_threshold = config['exac']['exac_common_af_threshold'] + df_annotated[ExAC.bin_name] = ExAC.annotate_common_af( + series_exac_af=df_annotated[ExAC.af], + threshold=common_allele_frequency_threshold + ) return features.Features.preallocate_missing_columns(df_annotated) @@ -1156,16 +1163,14 @@ class OverlapValidation: validation_coverage = COLNAMES[section]['validation_coverage'] validation_detection_power = COLNAMES[section]['validation_detection_power'] - somatic_variants = CONFIG['feature_types']['mut'] - merge_cols = [gene, alt_type, alt] fill_cols = [tumor_f, validation_tumor_f, validation_coverage] @classmethod - def append_validation(cls, primary, validation): + def append_validation(cls, primary, validation, biomarker_type): df = cls.drop_validation_columns(primary) df = cls.merge_data_frames(df, validation, cls.merge_cols) - idx = cls.get_mutation_index(df) + idx = cls.get_mutation_index(df, biomarker_type) for column in cls.fill_cols: df.loc[idx, column] = Annotator.fill_na( dataframe=df.loc[idx, :], @@ -1198,8 +1203,8 @@ def drop_validation_columns(cls, df): return df.drop([cls.validation_tumor_f, cls.validation_coverage], axis=1) @classmethod - def get_mutation_index(cls, df): - return df[df[cls.feature_type].eq(cls.somatic_variants)].index + def get_mutation_index(cls, df, biomarker_type): + return df[df[cls.feature_type].eq(biomarker_type)].index @classmethod def merge_data_frames(cls, df1, df2, columns): @@ -1310,12 +1315,6 @@ class PreclinicalMatchmaking: feature_display = COLNAMES[section]['feature_display'] predictive_implication = COLNAMES[section]['predictive_implication'] - feature_types_section = 'feature_types' - feature_types_config = CONFIG[feature_types_section] - copy_number = feature_types_config['cna'] - fusion = feature_types_config['fusion'] - somatic_variant = feature_types_config['mut'] - evidence_map = { 'FDA-Approved': 5, 'Guideline': 4, 'Clinical trial': 3, 'Clinical evidence': 2, 'Preclinical': 1, 'Inferential': 0} @@ -1328,14 +1327,18 @@ class PreclinicalMatchmaking: fusions_gene2 = 'fusions_gene2' @classmethod - def annotate(cls, input_dict, dbs): - input_variants = input_dict[cls.somatic_variant] - input_copy_number_alterations = input_dict[cls.copy_number] - input_fusions = input_dict[cls.fusion] + def annotate(cls, input_dict, dbs, config): + copy_number = config['feature_types']['cna'] + fusion = config['feature_types']['fusion'] + somatic_variant = config['feature_types']['mut'] + + input_variants = input_dict[somatic_variant] + input_copy_number_alterations = input_dict[copy_number] + input_fusions = input_dict[fusion] - variants = cls.annotate_somatic_variants(input_variants, dbs) - copy_number_alterations = cls.annotate_copy_numbers(input_copy_number_alterations, dbs) - fusions, fusions_gene1, fusions_gene2 = cls.annotate_fusions(input_fusions, dbs) + variants = cls.annotate_somatic_variants(input_variants, dbs, somatic_variant) + copy_number_alterations = cls.annotate_copy_numbers(input_copy_number_alterations, dbs, copy_number) + fusions, fusions_gene1, fusions_gene2 = cls.annotate_fusions(input_fusions, dbs, fusion) return { cls.variants: variants, cls.cnas: copy_number_alterations, @@ -1345,12 +1348,12 @@ def annotate(cls, input_dict, dbs): } @classmethod - def annotate_copy_numbers(cls, df, dbs): + def annotate_copy_numbers(cls, df, dbs, biomarker_type_string): almanac = datasources.Almanac.import_ds(dbs) almanac_genes = datasources.Almanac.import_genes(dbs) - df = df[df[cls.feature_type].eq(cls.copy_number)] - db = Almanac.subset_records(almanac['content'], cls.feature_type, cls.copy_number) + df = df[df[cls.feature_type].eq(biomarker_type_string)] + db = Almanac.subset_records(almanac['content'], cls.feature_type, biomarker_type_string) db = pd.DataFrame(db) column_map = {cls.gene: cls.feature, cls.direction: cls.alteration_type} @@ -1363,12 +1366,12 @@ def annotate_copy_numbers(cls, df, dbs): return df @classmethod - def annotate_fusions(cls, df, dbs): + def annotate_fusions(cls, df, dbs, biomarker_type_string): almanac = datasources.Almanac.import_ds(dbs) almanac_genes = datasources.Almanac.import_genes(dbs) - df = df[df[cls.feature_type].eq(cls.fusion)] - db = Almanac.subset_records(almanac['content'], cls.feature_type, cls.fusion) + df = df[df[cls.feature_type].eq(biomarker_type_string)] + db = Almanac.subset_records(almanac['content'], cls.feature_type, biomarker_type_string) db = pd.DataFrame(db) column_map = {cls.rearrangement_type: cls.alteration_type} @@ -1503,12 +1506,12 @@ def annotate_fusions_matching(cls, df, db, db_genes, consider_partner=False): return df @classmethod - def annotate_somatic_variants(cls, df, dbs): + def annotate_somatic_variants(cls, df, dbs, biomarker_type_string): almanac = datasources.Almanac.import_ds(dbs) almanac_genes = datasources.Almanac.import_genes(dbs) - df = df[df[cls.feature_type].eq(cls.somatic_variant)] - db = Almanac.subset_records(almanac['content'], cls.feature_type, cls.somatic_variant) + df = df[df[cls.feature_type].eq(biomarker_type_string)] + db = Almanac.subset_records(almanac['content'], cls.feature_type, biomarker_type_string) db = pd.DataFrame(db) replacement_dictionary = {'Oncogenic Mutations': '', 'Activating mutation': ''} diff --git a/moalmanac/config.ini b/moalmanac/config.ini index 399c6aa..6a38ea3 100644 --- a/moalmanac/config.ini +++ b/moalmanac/config.ini @@ -8,7 +8,7 @@ include_preclinical_efficacy_in_actionability_report = on plot_preclinical_efficacy = on [versions] -interpreter = 0.6.0 +interpreter = 0.7.0 database = v.2024-04-11 [exac] @@ -46,30 +46,4 @@ microsatellite = Microsatellite Stability burden = Mutational Burden signature = Mutational Signature aneuploidy = Aneuploidy -knockdown = Knockdown - -[databases] -almanac_handle = datasources/moalmanac/molecular-oncology-almanac.json -cancerhotspots_handle = datasources/cancerhotspots/hotspots_v2.txt -3dcancerhotspots_handle = datasources/cancerhotspots/hotspots3d.txt -cgc_handle = datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv -cosmic_handle = datasources/cosmic/CosmicMutantExport_v97.lite.txt -gsea_pathways_handle = datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt -gsea_modules_handle = datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt -exac_handle = datasources/exac/exac.expanded.r1.txt -acmg_handle = datasources/acmg/acmg.secondaryfindings.v3.txt -clinvar_handle = datasources/clinvar/variant_summary.lite.txt -hereditary_handle = datasources/hereditary/hereditary.txt -oncotree_handle = datasources/oncotree/oncotree.2023-03-09.txt -lawrence_handle = datasources/lawrence/lawrence_mapped_ontology.txt - -[preclinical] -almanac_gdsc_mappings = datasources/preclinical/formatted/almanac-gdsc-mappings.json -summary = datasources/preclinical/formatted/cell-lines.summary.txt -variants = datasources/preclinical/annotated/cell-lines.somatic-variants.annotated.txt -copynumbers = datasources/preclinical/annotated/cell-lines.copy-numbers.annotated.txt -fusions = datasources/preclinical/annotated/cell-lines.fusions.annotated.txt -fusions1 = datasources/preclinical/annotated/cell-lines.fusions.annotated.gene1.txt -fusions2 = datasources/preclinical/annotated/cell-lines.fusions.annotated.gene2.txt -gdsc = datasources/preclinical/formatted/sanger.gdsc.txt -dictionary = datasources/preclinical/cell-lines.pkl +knockdown = Knockdown \ No newline at end of file diff --git a/moalmanac/config.py b/moalmanac/config.py index 42bada4..3a66f5d 100644 --- a/moalmanac/config.py +++ b/moalmanac/config.py @@ -1,15 +1,8 @@ import configparser -default_config = 'config.ini' default_colnames = 'colnames.ini' -def create_config(): - config = configparser.ConfigParser() - config.read(default_config) - return config - - def create_colnames_dict(config): dictionary = {} for section in config.sections(): @@ -25,5 +18,4 @@ def create_colnames(): return create_colnames_dict(config) -CONFIG = create_config() COLNAMES = create_colnames() diff --git a/moalmanac/datasources.py b/moalmanac/datasources.py index 741871f..60fa523 100644 --- a/moalmanac/datasources.py +++ b/moalmanac/datasources.py @@ -2,7 +2,6 @@ from reader import Reader from config import COLNAMES -from config import CONFIG class Datasources: @@ -73,24 +72,6 @@ class Datasources: an_sas = COLNAMES[datasources_section]['exac_sas_an'] an_oth = COLNAMES[datasources_section]['exac_oth_an'] - @classmethod - def generate_db_dict(cls, config): - return { - 'almanac_handle': config.get('databases', 'almanac_handle'), - 'hotspots_handle': config.get('databases', 'cancerhotspots_handle'), - '3dhotspots_handle': config.get('databases', '3dcancerhotspots_handle'), - 'clinvar_handle': config.get('databases', 'clinvar_handle'), - 'cgc_handle': config.get('databases', 'cgc_handle'), - 'cosmic_handle': config.get('databases', 'cosmic_handle'), - 'gsea_pathways_handle': config.get('databases', 'gsea_pathways_handle'), - 'gsea_modules_handle': config.get('databases', 'gsea_modules_handle'), - 'exac_handle': config.get('databases', 'exac_handle'), - 'acmg_handle': config.get('databases', 'acmg_handle'), - 'hereditary_handle': config.get('databases', 'hereditary_handle'), - 'oncotree_handle': config.get('databases', 'oncotree_handle'), - 'lawrence_handle': config.get('databases', 'lawrence_handle') - } - class ACMG: gene = Datasources.feature @@ -196,7 +177,7 @@ def format_cancerhotspots(cls, df): @classmethod def import_ds(cls, dbs): - df = Reader.safe_read(dbs['hotspots_handle'], '\t', cls.column_map) + df = Reader.safe_read(dbs['cancerhotspots_handle'], '\t', cls.column_map) return cls.format_cancerhotspots(df) @@ -211,7 +192,7 @@ class CancerHotspots3D: @classmethod def import_ds(cls, dbs): - return Reader.safe_read(dbs['3dhotspots_handle'], '\t', cls.column_map) + return Reader.safe_read(dbs['3dcancerhotspots_handle'], '\t', cls.column_map) class ClinVar: @@ -408,16 +389,6 @@ def import_ds(cls, dbs): class Preclinical: section = 'preclinical' - summary_handle = CONFIG[section]['summary'] - variants_handle = CONFIG[section]['variants'] - cnas_handle = CONFIG[section]['copynumbers'] - fusions_handle = CONFIG[section]['fusions'] - fusions_gene1_handle = CONFIG[section]['fusions1'] - fusions_gene2_handle = CONFIG[section]['fusions2'] - gdsc_handle = CONFIG[section]['gdsc'] - mappings_handle = CONFIG[section]['almanac_gdsc_mappings'] - dictionary_handle = CONFIG[section]['dictionary'] - feature = COLNAMES[section]['feature'] partner = COLNAMES[section]['partner'] gene = COLNAMES[section]['gene'] @@ -437,12 +408,6 @@ class Preclinical: mappings = 'mappings' dictionary = 'dictionary' - feature_type = Datasources.feature_type - feature_types_section = 'feature_types' - variant_type = CONFIG[feature_types_section]['mut'] - copy_number_type = CONFIG[feature_types_section]['cna'] - fusion_type = CONFIG[feature_types_section]['fusion'] - @classmethod def create_convert_names_dict(cls, dataframe, map_from, map_to): return dataframe.loc[:, [map_from, map_to]].dropna().set_index(map_from)[map_to].to_dict() @@ -452,16 +417,16 @@ def generate_sample_list(dataframe, use_column, sample_column): return dataframe[dataframe[use_column].astype(bool).astype(int).eq(1)][sample_column].sort_values().tolist() @classmethod - def import_dbs(cls): - summary = Reader.read(cls.summary_handle, delimiter='\t') - variants = Reader.read(cls.variants_handle, delimiter='\t', low_memory=False) - cnas = Reader.read(cls.cnas_handle, delimiter='\t', low_memory=False) - fusions = Reader.read(cls.fusions_handle, delimiter='\t', low_memory=False) - fusions1 = Reader.read(cls.fusions_gene1_handle, delimiter='\t', low_memory=False) - fusions2 = Reader.read(cls.fusions_gene2_handle, delimiter='\t', low_memory=False) - gdsc = Reader.read(cls.gdsc_handle, delimiter='\t', low_memory=False) - mappings = Reader.read_json(cls.mappings_handle) - dictionary = Reader.read_pickle(cls.dictionary_handle) + def import_dbs(cls, paths_dictionary): + summary = Reader.read(paths_dictionary['summary'], delimiter='\t') + variants = Reader.read(paths_dictionary['variants'], delimiter='\t', low_memory=False) + cnas = Reader.read(paths_dictionary['copynumbers'], delimiter='\t', low_memory=False) + fusions = Reader.read(paths_dictionary['fusions'], delimiter='\t', low_memory=False) + fusions1 = Reader.read(paths_dictionary['fusions1'], delimiter='\t', low_memory=False) + fusions2 = Reader.read(paths_dictionary['fusions2'], delimiter='\t', low_memory=False) + gdsc = Reader.read(paths_dictionary['gdsc'], delimiter='\t', low_memory=False) + mappings = Reader.read_json(paths_dictionary['almanac_gdsc_mappings']) + dictionary = Reader.read_pickle(paths_dictionary['dictionary']) ccle_map = cls.create_convert_names_dict(summary, cls.ccle_name, cls.broad) sanger_map = cls.create_convert_names_dict(summary, cls.sanger, cls.broad) diff --git a/moalmanac/evaluator.py b/moalmanac/evaluator.py index d541700..d684c89 100644 --- a/moalmanac/evaluator.py +++ b/moalmanac/evaluator.py @@ -4,11 +4,10 @@ import datasources import features -from config import CONFIG from config import COLNAMES -class Evaluator(object): +class Evaluator: """ Evaluate based on annotated bins """ @@ -81,20 +80,6 @@ class Evaluator(object): microsatellite_section = 'microsatellite' supporting_variants = COLNAMES[microsatellite_section]['supporting_variants'] - feature_type_section = 'feature_types' - mut_type = CONFIG[feature_type_section]['mut'] - copynumber_type = CONFIG[feature_type_section]['cna'] - germline_type = CONFIG[feature_type_section]['germline'] - fusion_type = CONFIG[feature_type_section]['fusion'] - burden_type = CONFIG[feature_type_section]['burden'] - microsatellite_type = CONFIG[feature_type_section]['microsatellite'] - signature_type = CONFIG[feature_type_section]['signature'] - aneuploidy_type = CONFIG[feature_type_section]['aneuploidy'] - - mutations_section = 'mutations' - min_coverage = CONFIG[mutations_section]['min_coverage'] - min_af = CONFIG[mutations_section]['min_af'] - @classmethod def assign_bin(cls, df, bin_column, bin_label): series_score_bin = df.loc[:, cls.score_bin] @@ -145,17 +130,18 @@ def remap_almanac_bins(series, old_value, new_value): return series.astype(int).replace(to_replace=old_value, value=new_value) @classmethod - def remove_low_allele_fraction_variants(cls, df): - idx_mut = df[df[cls.feature_type].isin([cls.mut_type, cls.germline_type])].index - idx_low_quality = df[df[cls.tumor_f].astype(float).lt(float(cls.min_af))].index + def remove_low_allele_fraction_variants(cls, df, minimum_allele_fraction=0.05): + # hard coding somatic and germline variant strings until future refactor + idx_mut = df[df[cls.feature_type].isin(['Somatic Variant', 'Germline Variant'])].index + idx_low_quality = df[df[cls.tumor_f].astype(float).lt(float(minimum_allele_fraction))].index idx_low_quality_muts = idx_mut.intersection(idx_low_quality) idx = df.index.difference(idx_low_quality_muts) return df.loc[idx, :] @classmethod - def remove_low_coverage_variants(cls, df): - idx_mut = df[df[cls.feature_type].isin([cls.mut_type, cls.germline_type])].index - idx_low_quality = df[df[cls.coverage].astype(float).le(float(cls.min_coverage))].index + def remove_low_coverage_variants(cls, df, minimum_coverage=15): + idx_mut = df[df[cls.feature_type].isin(['Somatic Variant', 'Germline Variant'])].index + idx_low_quality = df[df[cls.coverage].astype(float).le(float(minimum_coverage))].index idx_low_quality_muts = idx_mut.intersection(idx_low_quality) idx = df.index.difference(idx_low_quality_muts) return df.loc[idx, :] @@ -193,46 +179,48 @@ def create_string_list(series): return ', '.join(map(str, series.unique())) @classmethod - def display_aneuploidy(cls, df, idx, feature): - return df.loc[idx, feature] + def display_aneuploidy(cls, df, idx): + return df.loc[idx, Evaluator.feature] @classmethod - def display_burden(cls, df, idx, alt): - return df.loc[idx, alt].astype(str) + def display_burden(cls, df, idx): + return df.loc[idx, Evaluator.alt].astype(str) @classmethod - def display_copynumber(cls, df, idx, feature, alt_type): - gene = df.loc[idx, feature] - direction = df.loc[idx, alt_type] + def display_copynumber(cls, df, idx): + gene = df.loc[idx, Evaluator.feature] + direction = df.loc[idx, Evaluator.alt_type] # Copy Number: CDKN2A Deletion return gene + ' ' + direction @classmethod - def display_fusion(cls, df, idx, alt): - fusion = df.loc[idx, alt] + def display_fusion(cls, df, idx): + fusion = df.loc[idx, Evaluator.alt] # Rearrangement: BCR--ABL1 Fusion return fusion + ' Fusion' @classmethod - def display_microsatellite_stability(cls, df, idx, feature): - return df.loc[idx, feature] + def display_microsatellite_stability(cls, df, idx): + return df.loc[idx, Evaluator.feature] @classmethod - def display_microsatellite_variants(cls, df, idx, feature, alt): - return df.loc[idx, feature] + ': ' + df.loc[idx, alt] + def display_microsatellite_variants(cls, df, idx): + return df.loc[idx, Evaluator.feature] + ': ' + df.loc[idx, Evaluator.alt] @classmethod - def display_signature(cls, df, idx, feature, alt): - signature = df.loc[idx, feature].str.replace('COSMIC Signature', 'COSMIC Signature (version 2)') - contribution = df.loc[idx, alt].astype(float).multiply(100).round(0).astype(int).astype(str) + def display_signature(cls, df, idx): + # before_string = "COSMIC Signature" + # after_string = f"COSMIC Signature (version {version})" + signature = df.loc[idx, Evaluator.feature]#.str.replace(before_string, after_string) + contribution = df.loc[idx, Evaluator.alt].astype(float).multiply(100).round(0).astype(int).astype(str) # Signature: Cosmic Signature 7 (65%) return signature + ' (' + contribution + '%)' @classmethod - def display_variant(cls, df, idx, feature, alt_type, alt): - gene = df.loc[idx, feature] - protein_change = df.loc[idx, alt] - variant_class = df.loc[idx, alt_type] + def display_variant(cls, df, idx): + gene = df.loc[idx, Evaluator.feature] + protein_change = df.loc[idx, Evaluator.alt] + variant_class = df.loc[idx, Evaluator.alt_type] # exon, pathogenic, cDNA, linebreaks # Gene p.Foo (c.DNA) # Exon 12 Missense @@ -240,14 +228,14 @@ def display_variant(cls, df, idx, feature, alt_type, alt): return gene + ' ' + protein_change + ' (' + variant_class + ')' @classmethod - def evaluate(cls, somatic, germline, ms_variants, ms_status, burden, signatures, wgd): + def evaluate(cls, somatic, germline, ms_variants, ms_status, burden, signatures, wgd, config): somatic = cls.format_mutations(somatic) germline = cls.format_mutations(germline) germline = Evaluator.remove_benign_variants(germline) germline = Evaluator.remove_common_variants(germline) - ms_variants_summary = cls.summarize_ms_variants(ms_variants) + ms_variants_summary = cls.summarize_ms_variants(ms_variants, config) if not burden.loc[0, Evaluator.high_burden_boolean]: burden = burden.drop(burden.index[0]) @@ -257,48 +245,37 @@ def evaluate(cls, somatic, germline, ms_variants, ms_status, burden, signatures, actionable_list.append(Evaluator.subset_almanac_bin(dataframe)) df = features.Features.concat_list_of_dataframes(list_of_dataframes=actionable_list) - df[Evaluator.feature_display] = cls.format_feature_display( - df, Evaluator.feature_display, - Evaluator.feature_type, Evaluator.feature, - Evaluator.alt_type, Evaluator.alt) + df[Evaluator.feature_display] = cls.format_feature_display(df=df, config=config) return df.sort_values(cls.sort_columns, ascending=False) @classmethod - def format_feature_display(cls, df, feature_display_column, - feature_type_column, feature_column, - alt_type_column, alt_column): - idx_somatic = df[feature_type_column].isin([Evaluator.mut_type]) - idx_germline = df[feature_type_column].isin([Evaluator.germline_type]) - idx_cn = df[feature_type_column].isin([Evaluator.copynumber_type]) - idx_fusion = df[feature_type_column].isin([Evaluator.fusion_type]) - idx_msi = df[feature_type_column].isin([Evaluator.microsatellite_type]) + def format_feature_display(cls, df, config): + display_column = Evaluator.feature_display + feature_type_column = Evaluator.feature_type + feature_column = Evaluator.feature + + biomarker_types = config['feature_types'] + idx_somatic = df[feature_type_column].isin([biomarker_types['mut']]) + idx_germline = df[feature_type_column].isin([biomarker_types['germline']]) + idx_cn = df[feature_type_column].isin([biomarker_types['cna']]) + idx_fusion = df[feature_type_column].isin([biomarker_types['fusion']]) + idx_msi = df[feature_type_column].isin([biomarker_types['microsatellite']]) idx_msi_variants = df[feature_column].isin([Evaluator.supporting_variants]) idx_msi = idx_msi & ~idx_msi_variants - idx_burden = df[feature_type_column].isin([Evaluator.burden_type]) - idx_signature = df[feature_type_column].isin([Evaluator.signature_type]) - idx_wgd = df[feature_column].isin([Evaluator.aneuploidy_type]) - - df.loc[idx_wgd, feature_display_column] = cls.display_aneuploidy( - df, idx_wgd, feature_column) - df.loc[idx_somatic, feature_display_column] = cls.display_variant( - df, idx_somatic, feature_column, alt_type_column, alt_column) - df.loc[idx_germline, feature_display_column] = cls.display_variant( - df, idx_germline, feature_column, alt_type_column, alt_column) - df.loc[idx_cn, feature_display_column] = cls.display_copynumber( - df, idx_cn, feature_column, alt_type_column) - df.loc[idx_fusion, feature_display_column] = cls.display_fusion( - df, idx_fusion, alt_column) - df.loc[idx_msi, feature_display_column] = cls.display_microsatellite_stability( - df, idx_msi, feature_column) - df.loc[idx_msi_variants, feature_display_column] = cls.display_microsatellite_variants( - df, idx_msi_variants, feature_column, alt_column) - df.loc[idx_burden, feature_display_column] = cls.display_burden( - df, idx_burden, alt_column) - df.loc[idx_signature, feature_display_column] = cls.display_signature( - df, idx_signature, feature_column, alt_column) - df.loc[idx_wgd, feature_display_column] = cls.display_aneuploidy( - df, idx_wgd, feature_column) - return df.loc[:, feature_display_column] + idx_burden = df[feature_type_column].isin([biomarker_types['burden']]) + idx_signature = df[feature_type_column].isin([biomarker_types['signature']]) + idx_wgd = df[feature_column].isin([biomarker_types['aneuploidy']]) + + df.loc[idx_somatic, display_column] = cls.display_variant(df=df, idx=idx_somatic) + df.loc[idx_germline, display_column] = cls.display_variant(df=df, idx=idx_germline) + df.loc[idx_cn, display_column] = cls.display_copynumber(df=df, idx=idx_cn) + df.loc[idx_fusion, display_column] = cls.display_fusion(df=df, idx=idx_fusion) + df.loc[idx_msi, display_column] = cls.display_microsatellite_stability(df=df, idx=idx_msi) + df.loc[idx_msi_variants, display_column] = cls.display_microsatellite_variants(df=df, idx=idx_msi_variants) + df.loc[idx_burden, display_column] = cls.display_burden(df=df, idx=idx_burden) + df.loc[idx_signature, display_column] = cls.display_signature(df=df, idx=idx_signature) + df.loc[idx_wgd, display_column] = cls.display_aneuploidy(df=df, idx=idx_wgd) + return df.loc[:, display_column] @classmethod def format_mutations(cls, df): @@ -312,22 +289,15 @@ def format_variant_classification(cls, series): return series.str.replace('_Mutation', '') @classmethod - def summarize_ms_variants(cls, df): + def summarize_ms_variants(cls, df, config): df = cls.format_mutations(df) msi_summary = features.Features.create_empty_dataframe() if not df.empty: feature = Evaluator.supporting_variants - feature_displays = cls.format_feature_display( - df, - Evaluator.feature_display, - Evaluator.feature_type, - Evaluator.feature, - Evaluator.alt_type, - Evaluator.alt - ) + feature_displays = cls.format_feature_display(df=df, config=config) feature_displays_list = cls.create_string_list(feature_displays) - msi_summary.loc[0, Evaluator.feature_type] = Evaluator.microsatellite_type + msi_summary.loc[0, Evaluator.feature_type] = config['feature_types']['microsatellite'] msi_summary.loc[0, Evaluator.feature] = feature msi_summary.loc[0, Evaluator.alt] = feature_displays_list msi_summary.loc[0, Evaluator.almanac_bin] = 1 @@ -336,7 +306,7 @@ def summarize_ms_variants(cls, df): return msi_summary -class Integrative(object): +class Integrative: feature = datasources.Datasources.feature feature_type = datasources.Datasources.feature_type alt_type = datasources.Datasources.alt_type @@ -357,13 +327,13 @@ def create_integrated_df(cls, genes): return pd.DataFrame(None, columns=cls.columns, index=genes) @classmethod - def evaluate(cls, somatic, germline, dbs, feature_types): + def evaluate(cls, somatic, germline, dbs, config): genes = cls.return_datasource_genes(dbs) df = cls.create_integrated_df(genes) - somatic_mutations = cls.extract_feature_type(somatic, feature_types['mutation']) - somatic_copynumbers = cls.extract_feature_type(somatic, feature_types['copynumber']) - somatic_fusions = cls.extract_feature_type(somatic, feature_types['fusion']) + somatic_mutations = cls.extract_feature_type(somatic, config['feature_types']['mut']) + somatic_copynumbers = cls.extract_feature_type(somatic, config['feature_types']['cna']) + somatic_fusions = cls.extract_feature_type(somatic, config['feature_types']['fusion']) for gene in df.index: gene_muts = somatic_mutations[somatic_mutations[cls.feature].astype(str) == gene] diff --git a/moalmanac/features.py b/moalmanac/features.py index bc3ec3d..b0195e2 100644 --- a/moalmanac/features.py +++ b/moalmanac/features.py @@ -9,9 +9,6 @@ from reader import Reader from config import COLNAMES -from config import CONFIG - -SIGNATURES_CONFIG = CONFIG['signatures'] class Features: @@ -120,28 +117,21 @@ def preallocate_missing_columns(cls, df): class Aneuploidy: aneuploidy = 'aneuploidy' - - feature_type_section = 'feature_types' - feature_type = CONFIG[feature_type_section][aneuploidy] - aneuploidy_section = aneuploidy wgd = COLNAMES[aneuploidy]['wgd'] wgd_string = COLNAMES[aneuploidy]['wgd_string'] @classmethod - def summarize(cls, boolean): + def summarize(cls, boolean, config): df = Features.create_empty_dataframe() + feature_type = config['feature_types']['aneuploidy'] if boolean: - df.loc[0, Features.feature_type] = cls.feature_type + df.loc[0, Features.feature_type] = feature_type df.loc[0, Features.feature] = cls.wgd_string return df class BurdenReader: - feature_type_section = 'feature_types' - feature_type = CONFIG[feature_type_section]['burden'] - feature_type_mutations = CONFIG[feature_type_section]['mut'] - burden_section = 'burden' patient_id = COLNAMES[burden_section]['patient'] tumor_type = COLNAMES[burden_section]['tumor_type'] @@ -212,15 +202,16 @@ def evaluate_high_burden_boolean(cls, boolean): return Features.not_high_burden @classmethod - def import_feature(cls, handle, patient, variants, dbs): + def import_feature(cls, handle, patient, variants, dbs, config): if os.path.exists(handle): bases_covered = float(Reader.read(handle, '\t', index_col=False).columns.tolist()[0]) else: bases_covered = np.nan df = cls.create_burden_series(patient, bases_covered) - df[Features.feature_type] = cls.feature_type - mutations = variants[variants[Features.feature_type] == cls.feature_type_mutations].shape[0] + biomarker_type = config['feature_types']['burden'] + df[Features.feature_type] = biomarker_type + mutations = variants[variants[Features.feature_type] == config['feature_types']['mut']].shape[0] mutational_burden = cls.calculate_burden(mutations, bases_covered) df[cls.n_nonsyn_mutations] = mutations @@ -239,18 +230,13 @@ def import_feature(cls, handle, patient, variants, dbs): class CopyNumber: - config = CONFIG['seg'] - amplification = config['amp'] - deletion = config['del'] - feature_type = CONFIG['feature_types']['cna'] - @staticmethod def format_cn_gene(series): new_series = series.str.split(' ', expand=True).loc[:, 0] return new_series @classmethod - def import_feature(cls, called_handle, not_called_handle): + def import_feature(cls, called_handle, not_called_handle, config): if called_handle: column_map = CopyNumberCalled.create_column_map() handle = called_handle @@ -259,13 +245,17 @@ def import_feature(cls, called_handle, not_called_handle): handle = not_called_handle df = Features.import_if_path_exists(handle, '\t', column_map, comment_character="#") + + amplification_string = config['seg']['amp'] + deletion_string = config['seg']['del'] if not df.empty: - df[Features.feature_type] = Features.annotate_feature_type(cls.feature_type, df.index) + biomarker_type = config['feature_types']['cna'] + df[Features.feature_type] = Features.annotate_feature_type(biomarker_type, df.index) df[Features.feature] = cls.format_cn_gene(df[Features.feature]) if called_handle: - seg_accept, seg_reject = CopyNumberCalled.process_calls(df) + seg_accept, seg_reject = CopyNumberCalled.process_calls(df, amplification_string, deletion_string) else: - seg_accept, seg_reject = CopyNumberTotal.process_calls(df) + seg_accept, seg_reject = CopyNumberTotal.process_calls(df, config) else: seg_accept = Features.create_empty_dataframe() seg_reject = Features.create_empty_dataframe() @@ -283,21 +273,21 @@ def create_column_map(): } @classmethod - def filter_calls(cls, series): - return series.fillna('').isin([cls.amplification, cls.deletion]) + def filter_calls(cls, series, amp_string, del_string): + return series.fillna('').isin([amp_string, del_string]) @classmethod - def process_calls(cls, dataframe): - idx = cls.filter_calls(dataframe[Features.alt_type]) + def process_calls(cls, dataframe, amp_string, del_string): + idx = cls.filter_calls(dataframe[Features.alt_type], amp_string, del_string) return dataframe[idx], dataframe[~idx] class CopyNumberTotal(CopyNumber): @classmethod - def annotate_amp_del(cls, idx, idx_amp, idx_del): + def annotate_amp_del(cls, idx, idx_amp, idx_del, amp_string, del_string): series = pd.Series('', index=idx) - series[idx_amp] = cls.amplification - series[idx_del] = cls.deletion + series[idx_amp] = amp_string + series[idx_del] = del_string return series @staticmethod @@ -323,7 +313,7 @@ def drop_duplicate_genes(cls, df): ) @classmethod - def filter_by_threshold(cls, df, percentile_amp, percentile_del): + def filter_by_threshold(cls, df, percentile_amp, percentile_del, amp_string, del_string): unique_segments = cls.get_unique_segments(df) threshold_amp = Features.calculate_percentile(unique_segments, percentile_amp) threshold_del = Features.calculate_percentile(unique_segments, percentile_del) @@ -331,7 +321,7 @@ def filter_by_threshold(cls, df, percentile_amp, percentile_del): idx_amp = df[df[Features.segment_mean].astype(float) >= float(threshold_amp)].index idx_del = df[df[Features.segment_mean].astype(float) <= float(threshold_del)].index - df[Features.alt_type] = cls.annotate_amp_del(df.index, idx_amp, idx_del) + df[Features.alt_type] = cls.annotate_amp_del(df.index, idx_amp, idx_del, amp_string, del_string) idx_accept = df[df[Features.alt_type] != ''].index idx_unique = Features.drop_duplicate_genes(df.loc[idx_accept, :], Features.segment_mean) @@ -346,10 +336,12 @@ def get_unique_segments(df): return df.drop_duplicates([Features.chr, Features.start])[Features.segment_mean] @classmethod - def process_calls(cls, dataframe): - amp_percentile = cls.config['amp_percentile'] - del_percentile = cls.config['del_percentile'] - return cls.filter_by_threshold(dataframe, amp_percentile, del_percentile) + def process_calls(cls, dataframe, config): + amp_percentile = config['seg']['amp_percentile'] + del_percentile = config['seg']['del_percentile'] + amp_string = config['seg']['amp'] + del_string = config['seg']['del'] + return cls.filter_by_threshold(dataframe, amp_percentile, del_percentile, amp_string, del_string) class CoverageMetrics: @@ -391,14 +383,9 @@ def split_counts(series): class CosmicSignatures: - feature_type_section = 'feature_types' - feature_type = CONFIG[feature_type_section]['signature'] - signature_section = 'signatures' patient_id = COLNAMES[signature_section]['patient'] - min_contribution = SIGNATURES_CONFIG['min_contribution'] - input_section = 'mutational_signature_input' input_signature = COLNAMES[input_section]['signature'] input_contribution = COLNAMES[input_section]['contribution'] @@ -413,21 +400,23 @@ def create_column_map(cls): } @classmethod - def import_feature(cls, path): + def import_feature(cls, path, config): """Loads and formats Cosmic Mutational Signatures based on provided file path.""" column_map = cls.create_column_map() df = Features.import_if_path_exists(path, delimiter='\t', column_map=column_map) if not df.empty: - df[Features.feature_type] = cls.feature_type + biomarker_type = config['feature_types']['signature'] + minimum_contribution = config['signatures']['min_contribution'] + df[Features.feature_type] = biomarker_type df[Features.alt_type] = 'v3.4' df[Features.alt] = cls.round_contributions(df[Features.alt]) - idx = cls.index_for_minimum_contribution(df[Features.alt]) + idx = cls.index_for_minimum_contribution(series=df[Features.alt], minimum_value=minimum_contribution) return df[idx] else: return Features.create_empty_dataframe() @classmethod - def index_for_minimum_contribution(cls, series, minimum_value=min_contribution): + def index_for_minimum_contribution(cls, series, minimum_value=0.06): """Subsets the provided SBS signatures to those that pass the minimum contribution, specified in config.ini""" return series.astype(float) >= float(minimum_value) @@ -438,41 +427,38 @@ def round_contributions(series, decimals=3): class Fusion: - config = CONFIG['fusion'] - alt_type = config['alt_type'] - leftbreakpoint = config['leftbreakpoint'] - rightbreakpoint = config['rightbreakpoint'] - spanningfrags_min = config['spanningfrags_min'] - - feature_type = CONFIG['feature_types']['fusion'] - @classmethod - def create_colmap(cls): + def create_colmap(cls, config): section = 'fusion_input' column_names = COLNAMES[section] + + leftbreakpoint = config['fusion']['leftbreakpoint'] + rightbreakpoint = config['fusion']['rightbreakpoint'] return { column_names['name']: Features.feature, column_names['spanningfrags']: Features.spanningfrags, - column_names[cls.leftbreakpoint]: cls.leftbreakpoint, - column_names[cls.rightbreakpoint]: cls.rightbreakpoint + column_names[leftbreakpoint]: leftbreakpoint, + column_names[rightbreakpoint]: rightbreakpoint } @staticmethod - def filter_by_spanning_fragment_count(series, minimum=spanningfrags_min): + def filter_by_spanning_fragment_count(series, minimum=5.0): minimum = int(float(minimum)) return series[series.astype(int).ge(minimum)].index @classmethod - def import_feature(cls, handle): - column_map = cls.create_colmap() + def import_feature(cls, handle, config): + column_map = cls.create_colmap(config) df = Features.import_if_path_exists(handle, '\t', column_map, index_col=False) if not df.empty: split_genes = cls.split_genes(df[Features.feature]) df[Features.left_gene] = split_genes[Features.left_gene] df[Features.right_gene] = split_genes[Features.right_gene] - left = cls.split_breakpoint(df[cls.leftbreakpoint]) - right = cls.split_breakpoint(df[cls.rightbreakpoint]) + leftbreakpoint = config['fusion']['leftbreakpoint'] + rightbreakpoint = config['fusion']['rightbreakpoint'] + left = cls.split_breakpoint(df[leftbreakpoint]) + right = cls.split_breakpoint(df[rightbreakpoint]) df[Features.chr] = left[Features.chr] df[Features.start] = left[Features.start] @@ -480,13 +466,18 @@ def import_feature(cls, handle): df[Features.left_start] = left[Features.start] df[Features.right_chr] = right[Features.chr] df[Features.right_start] = right[Features.start] - df.drop([cls.leftbreakpoint, cls.rightbreakpoint], axis=1, inplace=True) + df.drop([leftbreakpoint, rightbreakpoint], axis=1, inplace=True) - df[Features.feature_type] = Features.annotate_feature_type(cls.feature_type, df.index) - df[Features.alt_type] = cls.alt_type + biomarker_type = config['feature_types']['fusion'] + df[Features.feature_type] = Features.annotate_feature_type(biomarker_type, df.index) + df[Features.alt_type] = config['fusion']['alt_Type'] df[Features.alt] = df[Features.feature] - idx_min_spanning_fragments = cls.filter_by_spanning_fragment_count(df[Features.spanningfrags]) + min_fragments = config['fusion']['spanningfrags_min'] + idx_min_spanning_fragments = cls.filter_by_spanning_fragment_count( + series=df[Features.spanningfrags], + minimum=min_fragments + ) idx_unique = Features.drop_duplicate_genes(df.loc[idx_min_spanning_fragments, :], Features.feature) fusions_unique = df.loc[idx_unique, :] @@ -521,11 +512,7 @@ def split_breakpoint(series_breakpoint): class MicrosatelliteReader: - microsatellite = 'microsatellite' - feature_type_section = 'feature_types' - feature_type = CONFIG[feature_type_section][microsatellite] - - microsatellite_section = microsatellite + microsatellite_section = 'microsatellite' msih = COLNAMES[microsatellite_section]['msih'] msil = COLNAMES[microsatellite_section]['msil'] mss = COLNAMES[microsatellite_section]['mss'] @@ -543,9 +530,10 @@ def map_status(cls, status): return cls.status_map[status] @classmethod - def summarize(cls, status): + def summarize(cls, status, config): df = Features.create_empty_dataframe() - df.loc[0, Features.feature_type] = cls.feature_type + biomarker_type = config['feature_types']['microsatellite'] + df.loc[0, Features.feature_type] = biomarker_type df.loc[0, Features.feature] = cls.map_status(status) return df @@ -636,13 +624,12 @@ def return_variants_non_coding(cls, df): class MAFGermline(MAF): - feature_type = CONFIG['feature_types']['germline'] - @classmethod - def import_feature(cls, handle): + def import_feature(cls, handle, config): df = cls.import_maf(handle) + biomarker_type = config['feature_types']['germline'] if not df.empty: - df = cls.format_maf(df, cls.feature_type) + df = cls.format_maf(df, biomarker_type) coding_variants = cls.return_variants_coding(df) else: coding_variants = cls.create_empty_dataframe() @@ -651,13 +638,12 @@ def import_feature(cls, handle): class MAFSomatic(MAF): - feature_type = CONFIG['feature_types']['mut'] - @classmethod - def import_feature(cls, handle): + def import_feature(cls, handle, config): df = cls.import_maf(handle) + biomarker_type = config['feature_types']['mut'] if not df.empty: - df = cls.format_maf(df, cls.feature_type) + df = cls.format_maf(df, biomarker_type) coding_variants = cls.return_variants_coding(df) non_coding_variants = cls.return_variants_non_coding(df) else: @@ -682,8 +668,8 @@ class MAFValidation(MAF): columns = [gene, alt, alt_type, validation_tumor_f, validation_coverage] @classmethod - def import_feature(cls, handle): - df, df_reject = MAFSomatic.import_feature(handle) + def import_feature(cls, handle, config): + df, df_reject = MAFSomatic.import_feature(handle, config) df = df.drop(df.columns[df.columns.str.contains('validation')], axis=1) df = df.rename(columns=cls.column_map).loc[:, cls.columns] return df, df_reject diff --git a/moalmanac/illustrator.py b/moalmanac/illustrator.py index 36ae986..3f6e120 100644 --- a/moalmanac/illustrator.py +++ b/moalmanac/illustrator.py @@ -1,5 +1,4 @@ from config import COLNAMES -from config import CONFIG import io import base64 @@ -137,13 +136,6 @@ def create_title(string): class ValidationOverlap(Illustrator): - config_section = 'validation_sequencing' - min_af = float(CONFIG[config_section]['min_af_for_annotation']) - min_power = float(CONFIG[config_section]['min_power']) - - feature_type_section = 'feature_types' - feature_type_mutation = CONFIG[feature_type_section]['mut'] - section = 'validation_sequencing' gene = COLNAMES[section]['gene'] feature_type = COLNAMES[section]['feature_type'] @@ -164,8 +156,8 @@ def create_gene_alt_string(cls, data): return data[cls.gene].astype(str) + ' ' + data[cls.alt].astype(str) @classmethod - def format_data(cls, df): - idx = df[df[cls.feature_type] == cls.feature_type_mutation].index + def format_data(cls, df, biomarker_type_string): + idx = df[df[cls.feature_type] == biomarker_type_string].index data = df.loc[idx, cls.columns].fillna(0.0) data[cls.coverage] = pd.to_numeric(data[cls.coverage]) data[cls.tumor_f] = pd.to_numeric(data[cls.tumor_f]) @@ -175,7 +167,7 @@ def format_data(cls, df): return data @classmethod - def plot_overlap_af(cls, data, title=''): + def plot_overlap_af(cls, data, title='', minimum_power=0.95, minimum_allele_fraction=0.05): fig = plt.figure(figsize=(7, 7)) ax = plt.subplot() @@ -190,22 +182,22 @@ def plot_overlap_af(cls, data, title=''): plt.yticks(fontsize=14) plt.xticks(fontsize=14) - powered = data[data[cls.validation_detection_power].astype(float) >= cls.min_power] - lowpower = data[data[cls.validation_detection_power].astype(float) < cls.min_power] + powered = data[data[cls.validation_detection_power].astype(float) >= minimum_power] + lowpower = data[data[cls.validation_detection_power].astype(float) < minimum_power] labels = cls.create_gene_alt_string(data) for idx in data.index: primary_tumor_f = float(data.loc[idx, cls.tumor_f]) validation_tumor_f = float(data.loc[idx, cls.validation_tumor_f]) - if (primary_tumor_f >= cls.min_af) & (validation_tumor_f >= cls.min_af): + if (primary_tumor_f >= minimum_allele_fraction) & (validation_tumor_f >= minimum_allele_fraction): ax.annotate(labels[idx], (primary_tumor_f, validation_tumor_f)) plt.scatter(powered[cls.tumor_f].tolist(), powered[cls.validation_tumor_f].tolist(), color=Illustrator.tableau10['blue'], - label=''.join(['Detection power in validation sequencing >= {}'.format(cls.min_power)])) + label=''.join(['Detection power in validation sequencing >= {}'.format(minimum_power)])) plt.scatter(lowpower[cls.tumor_f].tolist(), lowpower[cls.validation_tumor_f].tolist(), color=Illustrator.tableau10['grey'], - label=''.join(['Detection power in validation sequencing < {}'.format(cls.min_power)])) + label=''.join(['Detection power in validation sequencing < {}'.format(minimum_power)])) plt.xlim(-0.01, 1.01) plt.ylim(-0.01, 1.01) @@ -222,7 +214,16 @@ def plot_overlap_af(cls, data, title=''): return fig @classmethod - def generate_dna_rna_plot(cls, df, patient_id, folder): - data = cls.format_data(df) - figure = cls.plot_overlap_af(data, title=patient_id) + def generate_dna_rna_plot(cls, df, patient_id, folder, config): + biomarker_type = config['feature_types']['mut'] + minimum_power = float(config['validation_sequencing']['min_power']) + minimum_allele_fraction = float(config['validation_sequencing']['min_af_for_annotation']) + + data = cls.format_data(df=df, biomarker_type_string=biomarker_type) + figure = cls.plot_overlap_af( + data=data, + title=patient_id, + minimum_power=minimum_power, + minimum_allele_fraction=minimum_allele_fraction + ) Illustrator.save_fig(figure, folder, patient_id, 'validation_overlap.png') diff --git a/moalmanac/investigator.py b/moalmanac/investigator.py index cfb4847..9b3c762 100644 --- a/moalmanac/investigator.py +++ b/moalmanac/investigator.py @@ -6,7 +6,6 @@ from datasources import Preclinical from illustrator import PreclinicalEfficacy -from config import CONFIG from config import COLNAMES @@ -58,26 +57,6 @@ class Investigator(object): ic50 = COLNAMES[preclinical_section]['ic50'] tested_subfeature = COLNAMES[preclinical_section]['tested_subfeature'] - feature_type_section = 'feature_types' - feature_type_mut = CONFIG[feature_type_section]['mut'] - feature_type_germline = CONFIG[feature_type_section]['germline'] - feature_type_cna = CONFIG[feature_type_section]['cna'] - feature_type_fusion = CONFIG[feature_type_section]['fusion'] - feature_type_burden = CONFIG[feature_type_section]['burden'] - feature_type_signature = CONFIG[feature_type_section]['signature'] - feature_types = { - 'variant': feature_type_mut, - 'germline': feature_type_germline, - 'copy_number': feature_type_cna, - 'fusion': feature_type_fusion, - 'burden': feature_type_burden, - 'signature': feature_type_signature - } - - input_dtypes = [feature_types['variant'], - feature_types['copy_number'], - feature_types['fusion']] - @staticmethod def list_feature_combinations(split_feature, feature_length): return ['.'.join(split_feature[:i]) for i in range(1, feature_length + 1)] @@ -122,7 +101,7 @@ def calculate_mann_whitney_u(series1, series2): return stats.mannwhitneyu(series1, series2, alternative='two-sided') @classmethod - def create(cls, dbs, df_actionable): + def create(cls, dbs, df_actionable, config): summary = dbs[cls.summary] variants = dbs[cls.variants] cnas = dbs[cls.cnas] @@ -131,8 +110,14 @@ def create(cls, dbs, df_actionable): genes = dbs[cls.gene] mappings = dbs[cls.mappings] + input_dtypes = [ + config['feature_types']['mut'], + config['feature_types']['cna'], + config['feature_types']['fusion'] + ] + samples = Preclinical.generate_sample_list(summary, cls.use_column, cls.model_id) - idx_feature_type = df_actionable[cls.feature_type].isin(cls.input_dtypes) + idx_feature_type = df_actionable[cls.feature_type].isin(input_dtypes) idx_sensitive = ~(df_actionable[cls.sensitive_therapy].isnull() | df_actionable[cls.sensitive_therapy].eq('')) dictionary = {} @@ -144,7 +129,7 @@ def create(cls, dbs, df_actionable): feature_display = df_actionable.loc[index, cls.feature_display] index_dict = {} if mapped: - feature_dictionary = cls.split_samples_by_wt_mut(df_actionable.loc[index, :], dbs, samples) + feature_dictionary = cls.split_samples_by_wt_mut(df_actionable.loc[index, :], dbs, samples, config) features = list(feature_dictionary) for therapy in mapped: therapy_dict = {} @@ -228,12 +213,12 @@ def populate_feature_dictionary(cls, groups, all_samples): return dictionary @classmethod - def select_split_function(cls, feature_type): - if feature_type == cls.feature_types['variant']: + def select_split_function(cls, feature_type, variant_string, copy_number_string, fusion_string): + if feature_type == variant_string: return cls.split_samples_for_variants - elif feature_type == cls.feature_types['copy_number']: + elif feature_type == copy_number_string: return cls.split_samples_for_copy_numbers - elif feature_type == cls.feature_types['fusion']: + elif feature_type == fusion_string: return cls.split_samples_for_fusions else: return cls.split_exit @@ -245,10 +230,19 @@ def split_exit(cls, dbs, series, samples): exit() @classmethod - def split_samples_by_wt_mut(cls, series, dbs, samples): + def split_samples_by_wt_mut(cls, series, dbs, samples, config): feature_type = series.loc[cls.feature_type] - split_function = cls.select_split_function(feature_type) - return split_function(dbs, series, samples) + split_function = cls.select_split_function( + feature_type=feature_type, + variant_string=config['feature_types']['mut'], + copy_number_string=config['feature_types']['cna'], + fusion_string=config['feature_types']['fusion'] + ) + return split_function( + dbs=dbs, + series=series, + all_samples=samples + ) @classmethod def split_samples_for_copy_numbers(cls, dbs, series, all_samples): diff --git a/moalmanac/matchmaker.py b/moalmanac/matchmaker.py index 542e9ed..2320d61 100644 --- a/moalmanac/matchmaker.py +++ b/moalmanac/matchmaker.py @@ -10,7 +10,6 @@ from datasources import Preclinical as DatasourcePreclinical from config import COLNAMES -from config import CONFIG class Matchmaker: @@ -27,20 +26,14 @@ class Matchmaker: feature_columns = [feature_type, feature, alt_type, alt] cgc_bin = 'cgc_bin' - - feature_types_section = 'feature_types' - feature_types_config = CONFIG[feature_types_section] - cn = feature_types_config['cna'] - rearrangement = feature_types_config['fusion'] - variant = feature_types_config['mut'] fusion = 'Fusion' @classmethod - def concat_case_comparisons(cls, somatic, dbs): + def concat_case_comparisons(cls, somatic, dbs, variant_string, copy_number_string, rearrangement_string): somatic[cls.model_id] = cls.case_profile - case_variants = cls.subset_dataframe_eq(somatic, cls.feature_type, cls.variant) - case_cns = cls.subset_dataframe_eq(somatic, cls.feature_type, cls.cn) - case_fusions = cls.subset_dataframe_eq(somatic, cls.feature_type, cls.rearrangement) + case_variants = cls.subset_dataframe_eq(somatic, cls.feature_type, variant_string) + case_cns = cls.subset_dataframe_eq(somatic, cls.feature_type, copy_number_string) + case_fusions = cls.subset_dataframe_eq(somatic, cls.feature_type, rearrangement_string) if case_fusions.shape[0] > 0: case_fusions = cls.format_fusions(case_fusions) @@ -54,24 +47,34 @@ def concat_case_comparisons(cls, somatic, dbs): fusion_columns = [cls.feature, cls.partner, cls.model_id] fusions = cls.concat_dataframes(case_fusions, comparison_fusions, fusion_columns) - variants[cls.feature_type] = cls.variant - copy_numbers[cls.feature_type] = cls.cn - fusions[cls.feature_type] = cls.rearrangement + variants[cls.feature_type] = variant_string + copy_numbers[cls.feature_type] = copy_number_string + fusions[cls.feature_type] = rearrangement_string fusions[cls.alt_type] = cls.fusion return { - cls.variant: variants, - cls.cn: copy_numbers, - cls.rearrangement: fusions + variant_string: variants, + copy_number_string: copy_numbers, + rearrangement_string: fusions } @classmethod - def compare(cls, dbs, dbs_preclinical, somatic, case_sample_id): + def compare(cls, dbs, dbs_preclinical, somatic, case_sample_id, config): cgc = DatasourceCGC.import_ds(dbs) almanac = DatasourceAlmanac.import_ds(dbs) - merged = cls.concat_case_comparisons(somatic, dbs_preclinical) - annotated = AnnotatorPreclinicalMatchmaking.annotate(merged, dbs) + somatic_variant_biomarker_type_string = config['feature_types']['mut'] + copy_number_variant_biomarker_type_string = config['feature_types']['cna'] + fusion_biomarker_type_string = config['feature_types']['fusion'] + + merged = cls.concat_case_comparisons( + somatic = somatic, + dbs = dbs_preclinical, + variant_string = somatic_variant_biomarker_type_string, + copy_number_string = copy_number_variant_biomarker_type_string, + rearrangement_string = fusion_biomarker_type_string + ) + annotated = AnnotatorPreclinicalMatchmaking.annotate(merged, dbs, config) samples_to_use = cls.subset_samples(dbs_preclinical) calculated = SNFTypesCGCwithEvidence.calculate(annotated, samples_to_use, cgc, almanac) diff --git a/moalmanac/moalmanac.py b/moalmanac/moalmanac.py index b89223b..5470b66 100644 --- a/moalmanac/moalmanac.py +++ b/moalmanac/moalmanac.py @@ -1,7 +1,6 @@ import time import argparse import os -import pandas as pd import subprocess import annotator @@ -16,7 +15,7 @@ import writer from config import COLNAMES -from config import CONFIG +from reader import Ini snv_handle = 'snv_handle' indel_handle = 'indel_handle' @@ -50,28 +49,7 @@ ontology = COLNAMES[oncotree_section]['ontology'] code = COLNAMES[oncotree_section]['code'] -feature_type_section = 'feature_types' -feature_type_mut = CONFIG[feature_type_section]['mut'] -feature_type_germline = CONFIG[feature_type_section]['germline'] -feature_type_cna = CONFIG[feature_type_section]['cna'] -feature_type_fusion = CONFIG[feature_type_section]['fusion'] -feature_type_burden = CONFIG[feature_type_section]['burden'] -feature_type_signature = CONFIG[feature_type_section]['signature'] -feature_type_microsatellite = CONFIG[feature_type_section]['microsatellite'] -feature_type_aneuploidy = CONFIG[feature_type_section]['aneuploidy'] -feature_types = { - 'mutation': feature_type_mut, - 'germline': feature_type_germline, - 'copynumber': feature_type_cna, - 'fusion': feature_type_fusion, - 'burden': feature_type_burden, - 'signature': feature_type_signature, - 'microsatellite': feature_type_microsatellite, - 'aneuploidy': feature_type_aneuploidy -} - generate_illustrations = 'generate_illustrations' -TOGGLE_FEATURES = CONFIG['function_toggle'] def create_metadata_dictionary(input_dictionary): @@ -95,9 +73,9 @@ def format_output_directory(directory): return directory -def load_and_process_mutational_signatures(input, dbs, tumor_type): - signatures = features.CosmicSignatures.import_feature(input) - annotated = annotator.Annotator.annotate_almanac(signatures, dbs, tumor_type) +def load_and_process_mutational_signatures(input, dbs, tumor_type, config): + signatures = features.CosmicSignatures.import_feature(input, config) + annotated = annotator.Annotator.annotate_almanac(signatures, dbs, tumor_type, config) evaluated = evaluator.Evaluator.evaluate_almanac(annotated) return evaluated @@ -110,18 +88,17 @@ def plot_preclinical_efficacy(dictionary, folder, label): writer.Illustrations.write(figure, folder, label, f"{figure_name}.png") -def process_preclinical_efficacy(dbs, dataframe, folder, label, plot: bool = False): - efficacy_dictionary = investigator.SensitivityDictionary.create(dbs, dataframe) +def process_preclinical_efficacy(dbs, dataframe, folder, label, config, plot: bool = False): + efficacy_dictionary = investigator.SensitivityDictionary.create(dbs, dataframe, config) if plot: plot_preclinical_efficacy(efficacy_dictionary, folder, label) efficacy_summary = investigator.SummaryDataFrame.create(efficacy_dictionary, dataframe, label) return efficacy_dictionary, efficacy_summary -def main(patient, inputs, output_folder): +def main(patient, inputs, output_folder, config, dbs, dbs_preclinical=None): metadata_dictionary = create_metadata_dictionary(patient) - dbs = datasources.Datasources.generate_db_dict(CONFIG) output_folder = format_output_directory(output_folder) if output_folder != "": execute_cmd(f"mkdir -p {output_folder}") @@ -132,47 +109,81 @@ def main(patient, inputs, output_folder): metadata_dictionary[ontology] = mapped_ontology[ontology] metadata_dictionary[code] = mapped_ontology[code] - df_snv, df_snv_reject = features.MAFSomatic.import_feature(inputs[snv_handle]) - df_indel, df_indel_reject = features.MAFSomatic.import_feature(inputs[indel_handle]) - df_cnv, df_cnv_reject = features.CopyNumber.import_feature(inputs[called_cn_handle], inputs[cnv_handle]) - df_fusion, df_fusion_reject = features.Fusion.import_feature(inputs[fusion_handle]) + df_snv, df_snv_reject = features.MAFSomatic.import_feature(inputs[snv_handle], config) + df_indel, df_indel_reject = features.MAFSomatic.import_feature(inputs[indel_handle], config) + df_cnv, df_cnv_reject = features.CopyNumber.import_feature(inputs[called_cn_handle], inputs[cnv_handle], config) + df_fusion, df_fusion_reject = features.Fusion.import_feature(inputs[fusion_handle], config) accepted_variants = [df_snv, df_indel, df_cnv, df_fusion] filtered_variants = [df_snv_reject, df_indel_reject, df_cnv_reject, df_fusion_reject] somatic_variants = features.Features.concat_list_of_dataframes(accepted_variants) somatic_filtered = features.Features.concat_list_of_dataframes(filtered_variants) - germline_variants, germline_reject = features.MAFGermline.import_feature(inputs[germline_handle]) + germline_variants, germline_reject = features.MAFGermline.import_feature(inputs[germline_handle], config) if not somatic_variants.empty: - annotated_somatic = annotator.Annotator.annotate_somatic(somatic_variants, dbs, metadata_dictionary[code]) + annotated_somatic = annotator.Annotator.annotate_somatic( + df=somatic_variants, + dbs=dbs, + ontology=metadata_dictionary[code], + config=config + ) evaluated_somatic = evaluator.Evaluator.evaluate_somatic(annotated_somatic) - validation_accept, validation_reject = features.MAFValidation.import_feature(inputs[validation_handle]) + validation_accept, validation_reject = features.MAFValidation.import_feature(inputs[validation_handle], config) if not validation_accept.empty: - evaluated_somatic = annotator.OverlapValidation.append_validation(evaluated_somatic, validation_accept) - illustrator.ValidationOverlap.generate_dna_rna_plot(evaluated_somatic, string_id, output_folder) + evaluated_somatic = annotator.OverlapValidation.append_validation( + primary=evaluated_somatic, + validation=validation_accept, + biomarker_type=config['feature_types']['mut']) + illustrator.ValidationOverlap.generate_dna_rna_plot(evaluated_somatic, string_id, output_folder, config) else: evaluated_somatic = features.Features.create_empty_dataframe() if not germline_variants.empty: - annotated_germline = annotator.Annotator.annotate_germline(germline_variants, dbs, metadata_dictionary[code]) + annotated_germline = annotator.Annotator.annotate_germline( + germline_variants, + dbs, + metadata_dictionary[code], + config=config + ) evaluated_germline = evaluator.Evaluator.evaluate_germline(annotated_germline) else: evaluated_germline = features.Features.create_empty_dataframe() evaluated_somatic = annotator.OverlapSomaticGermline.append_germline_hits(evaluated_somatic, evaluated_germline) - integrated = evaluator.Integrative.evaluate(evaluated_somatic, evaluated_germline, dbs, feature_types) + integrated = evaluator.Integrative.evaluate(evaluated_somatic, evaluated_germline, dbs, config) - somatic_burden = features.BurdenReader.import_feature(inputs[bases_covered_handle], metadata_dictionary, somatic_variants, dbs) + somatic_burden = features.BurdenReader.import_feature( + handle=inputs[bases_covered_handle], + patient=metadata_dictionary, + variants=somatic_variants, + dbs=dbs, + config=config + ) - patient_wgd = features.Aneuploidy.summarize(metadata_dictionary[wgd]) - patient_ms_status = features.MicrosatelliteReader.summarize(metadata_dictionary[ms_status]) + patient_wgd = features.Aneuploidy.summarize(metadata_dictionary[wgd], config) + patient_ms_status = features.MicrosatelliteReader.summarize(metadata_dictionary[ms_status], config) metadata_dictionary[ms_status] = features.MicrosatelliteReader.map_status(metadata_dictionary[ms_status]) - annotated_burden = annotator.Annotator.annotate_almanac(somatic_burden, dbs, metadata_dictionary[code]) - annotated_wgd = annotator.Annotator.annotate_almanac(patient_wgd, dbs, metadata_dictionary[code]) - annotated_ms_status = annotator.Annotator.annotate_almanac(patient_ms_status, dbs, metadata_dictionary[code]) + annotated_burden = annotator.Annotator.annotate_almanac( + df=somatic_burden, + dbs=dbs, + ontology=metadata_dictionary[code], + config=config + ) + annotated_wgd = annotator.Annotator.annotate_almanac( + df=patient_wgd, + dbs=dbs, + ontology=metadata_dictionary[code], + config=config + ) + annotated_ms_status = annotator.Annotator.annotate_almanac( + df=patient_ms_status, + dbs=dbs, + ontology=metadata_dictionary[code], + config=config + ) evaluated_burden = evaluator.Evaluator.evaluate_almanac(annotated_burden) evaluated_wgd = evaluator.Evaluator.evaluate_almanac(annotated_wgd) @@ -182,55 +193,69 @@ def main(patient, inputs, output_folder): evaluated_mutational_signatures = load_and_process_mutational_signatures( input=inputs[mutational_signatures_path], dbs=dbs, - tumor_type=code + tumor_type=code, + config=config ) actionable = evaluator.Actionable.evaluate( - evaluated_somatic, - evaluated_germline, - evaluated_ms_variants, - evaluated_ms_status, - evaluated_burden, - evaluated_mutational_signatures, - evaluated_wgd + somatic=evaluated_somatic, + germline=evaluated_germline, + ms_variants=evaluated_ms_variants, + ms_status=evaluated_ms_status, + burden=evaluated_burden, + signatures=evaluated_mutational_signatures, + wgd=evaluated_wgd, + config=config ) strategies = evaluator.Strategies.report_therapy_strategies(actionable) + function_toggle = config['function_toggle'] + efficacy_summary = investigator.SummaryDataFrame.create_empty_dataframe() - efficacy_dictionary = {} - cell_lines_dictionary = {} - preclinical_efficacy_on = TOGGLE_FEATURES.getboolean('calculate_preclinical_efficacy') + preclinical_efficacy_on = function_toggle.getboolean('calculate_preclinical_efficacy') # The input argument --disable_matchmaking will be removed in the next non-backwards compatible release - model_similarity_on = TOGGLE_FEATURES.getboolean('calculate_model_similarity') and not inputs[disable_matchmaking] + model_similarity_on = function_toggle.getboolean('calculate_model_similarity') and not inputs[disable_matchmaking] similarity_results = matchmaker.Matchmaker.create_empty_output() similarity_summary = {} - if preclinical_efficacy_on or model_similarity_on: - dbs_preclinical = datasources.Preclinical.import_dbs() - cell_lines_dictionary = dbs_preclinical['dictionary'] - if preclinical_efficacy_on: - plot_preclinical = TOGGLE_FEATURES.getboolean('plot_preclinical_efficacy') - efficacy_results = process_preclinical_efficacy( - dbs_preclinical, - actionable, - output_folder, - string_id, - plot=plot_preclinical - ) - efficacy_dictionary = efficacy_results[0] - efficacy_summary = efficacy_results[1] - - actionable = annotator.PreclinicalEfficacy.annotate( - actionable, - efficacy_summary, - efficacy_dictionary, - append_lookup=TOGGLE_FEATURES.getboolean('include_preclinical_efficacy_in_actionability_report') - ) - if model_similarity_on: - similarity_results = matchmaker.Matchmaker.compare(dbs, dbs_preclinical, evaluated_somatic, string_id) - similarity_summary = matchmaker.Report.create_report_dictionary(similarity_results, cell_lines_dictionary) + if dbs_preclinical is not None: + if preclinical_efficacy_on or model_similarity_on: + dbs_preclinical = datasources.Preclinical.import_dbs(dbs_preclinical) + cell_lines_dictionary = dbs_preclinical['dictionary'] + if preclinical_efficacy_on: + plot_preclinical = function_toggle.getboolean('plot_preclinical_efficacy') + efficacy_results = process_preclinical_efficacy( + dbs=dbs_preclinical, + dataframe=actionable, + folder=output_folder, + label=string_id, + config=config, + plot=plot_preclinical + ) + efficacy_dictionary = efficacy_results[0] + efficacy_summary = efficacy_results[1] + + actionable = annotator.PreclinicalEfficacy.annotate( + actionable, + efficacy_summary, + efficacy_dictionary, + append_lookup=function_toggle.getboolean('include_preclinical_efficacy_in_actionability_report') + ) + + if model_similarity_on: + similarity_results = matchmaker.Matchmaker.compare( + dbs=dbs, + dbs_preclinical=dbs_preclinical, + somatic=evaluated_somatic, + case_sample_id=string_id, + config=config + ) + similarity_summary = matchmaker.Report.create_report_dictionary( + similarity_results, + cell_lines_dictionary + ) writer.Actionable.write(actionable, string_id, output_folder) writer.GermlineACMG.write(evaluated_germline, string_id, output_folder) @@ -245,13 +270,14 @@ def main(patient, inputs, output_folder): writer.PreclinicalEfficacy.write(efficacy_summary, string_id, output_folder) writer.PreclinicalMatchmaking.write(similarity_results, string_id, output_folder) - if TOGGLE_FEATURES.getboolean('generate_actionability_report'): + if function_toggle.getboolean('generate_actionability_report'): report_dictionary = reporter.Reporter.generate_dictionary(evaluated_somatic, metadata_dictionary) - include_similarity = TOGGLE_FEATURES.getboolean('include_model_similarity_in_actionability_report') + include_similarity = function_toggle.getboolean('include_model_similarity_in_actionability_report') reporter.Reporter.generate_actionability_report( - actionable, - report_dictionary, + actionable=actionable, + report_dictionary=report_dictionary, + config=config, similarity=similarity_summary if include_similarity else None, output_directory=output_folder ) @@ -260,66 +286,121 @@ def main(patient, inputs, output_folder): if __name__ == "__main__": start_time = time.time() - arg_parser = argparse.ArgumentParser(prog='Molecular Oncology Almanac', - description='A clinical interpretation algorithm for cancer genomics.') - arg_parser.add_argument('--patient_id', - help='patient id label', - required=True) - arg_parser.add_argument('--description', - default='', - help='description of patient') - arg_parser.add_argument('--tumor_type', - default='Unknown', - help='reported tumor type') - arg_parser.add_argument('--stage', - default='Unknown', - help='disease stage') - arg_parser.add_argument('--snv_handle', - default='', - help='handle for SNV MAF') - arg_parser.add_argument('--indel_handle', - default='', - help='handle for InDel MAF') - arg_parser.add_argument('--bases_covered_handle', - default='', - help='handle for a text file which contains the numeric number of somatic bases') - arg_parser.add_argument('--called_cn_handle', - default='', - help='handle for called copy number alterations file, used over --cnv_handle') - arg_parser.add_argument('--cnv_handle', - default='', - help='handle for annotated seg file') - arg_parser.add_argument('--fusion_handle', - default='', - help='handle for STAR Fusion output, .final.abridged') - arg_parser.add_argument('--germline_handle', - default='', - help='handle for Germline MAF') - arg_parser.add_argument('--validation_handle', - default='', - help='handle for SNV MAF called from validation sequencing') - arg_parser.add_argument('--ms_status', - default='unk', - choices=['msih', 'msil', 'mss', 'unk'], - help='microsatellite instability status') - arg_parser.add_argument('--mutational_signatures', - default='', - help='file for SBS signature contributions, version 3.4') - arg_parser.add_argument('--purity', - default='Unknown', - help='Tumor purity') - arg_parser.add_argument('--ploidy', - default='Unknown', - help='Tumor ploidy') - arg_parser.add_argument('--wgd', - action='store_true', - help='Specify the occurrence of whole genome duplication') - arg_parser.add_argument('--disable_matchmaking', - action='store_true', - help='Disable matchmaking in report') - arg_parser.add_argument('--output_directory', - default=None, - help='Output directory for generated files') + arg_parser = argparse.ArgumentParser( + prog='Molecular Oncology Almanac', + description='A clinical interpretation algorithm for cancer genomics.' + ) + arg_parser.add_argument( + '--patient_id', + help='patient id label', + required=True + ) + arg_parser.add_argument( + '--description', + default='', + help='description of patient' + ) + arg_parser.add_argument( + '--tumor_type', + default='Unknown', + help='reported tumor type' + ) + arg_parser.add_argument( + '--stage', + default='Unknown', + help='disease stage' + ) + arg_parser.add_argument( + '--snv_handle', + default='', + help='handle for SNV MAF' + ) + arg_parser.add_argument( + '--indel_handle', + default='', + help='handle for InDel MAF' + ) + arg_parser.add_argument( + '--bases_covered_handle', + default='', + help='handle for a text file which contains the numeric number of somatic bases' + ) + arg_parser.add_argument( + '--called_cn_handle', + default='', + help='handle for called copy number alterations file, used over --cnv_handle' + ) + arg_parser.add_argument( + '--cnv_handle', + default='', + help='handle for annotated seg file' + ) + arg_parser.add_argument( + '--fusion_handle', + default='', + help='handle for STAR Fusion output, .final.abridged' + ) + arg_parser.add_argument( + '--germline_handle', + default='', + help='handle for Germline MAF' + ) + arg_parser.add_argument( + '--validation_handle', + default='', + help='handle for SNV MAF called from validation sequencing' + ) + arg_parser.add_argument( + '--ms_status', + default='unk', + choices=['msih', 'msil', 'mss', 'unk'], + help='microsatellite instability status' + ) + arg_parser.add_argument( + '--mutational_signatures', + default='', + help='file for SBS signature contributions, version 3.4' + ) + arg_parser.add_argument( + '--purity', + default='Unknown', + help='Tumor purity' + ) + arg_parser.add_argument( + '--ploidy', + default='Unknown', + help='Tumor ploidy' + ) + arg_parser.add_argument( + '--wgd', + action='store_true', + help='Specify the occurrence of whole genome duplication' + ) + arg_parser.add_argument( + '--disable_matchmaking', + action='store_true', + help='Disable matchmaking in report' + ) + arg_parser.add_argument( + '--output_directory', + default=None, + help='Output directory for generated files' + ) + arg_parser.add_argument( + '--config', '-c', + required=True, + help='ini file that contains configuration details' + ) + arg_parser.add_argument( + '--dbs', + required=True, + help='ini file that contains database paths' + ) + arg_parser.add_argument( + '--preclinical-dbs', + required=False, + help='ini file that contains preclinical file paths' + ) args = arg_parser.parse_args() patient_dict = { @@ -348,7 +429,23 @@ def main(patient, inputs, output_folder): output_directory = args.output_directory if args.output_directory else os.getcwd() - main(patient_dict, inputs_dict, output_directory) + config_ini = Ini.read(args.config, extended_interpolation=False, convert_to_dictionary=False) + + db_paths = Ini.read(args.dbs, extended_interpolation=True, convert_to_dictionary=True) + if args.preclinical_dbs: + preclinical_db_paths = Ini.read(args.preclinical_dbs, extended_interpolation=True, convert_to_dictionary=True) + else: + preclinical_db_paths = None + + print(db_paths) + main( + patient=patient_dict, + inputs=inputs_dict, + output_folder=output_directory, + config=config_ini, + dbs=db_paths['databases'], + dbs_preclinical=preclinical_db_paths['preclinical'] + ) end_time = time.time() time_statement = "Molecular Oncology Almanac runtime: %s seconds" % round((end_time - start_time), 4) diff --git a/moalmanac/preclinical-databases.ini b/moalmanac/preclinical-databases.ini new file mode 100644 index 0000000..d6e2086 --- /dev/null +++ b/moalmanac/preclinical-databases.ini @@ -0,0 +1,11 @@ +[preclinical] +root = ../datasources +almanac_gdsc_mappings = ${root}/preclinical/formatted/almanac-gdsc-mappings.json +summary = ${root}/preclinical/formatted/cell-lines.summary.txt +variants = ${root}/preclinical/annotated/cell-lines.somatic-variants.annotated.txt +copynumbers = ${root}/preclinical/annotated/cell-lines.copy-numbers.annotated.txt +fusions = ${root}/preclinical/annotated/cell-lines.fusions.annotated.txt +fusions1 = ${root}/preclinical/annotated/cell-lines.fusions.annotated.gene1.txt +fusions2 = ${root}/preclinical/annotated/cell-lines.fusions.annotated.gene2.txt +gdsc = ${root}/preclinical/formatted/sanger.gdsc.txt +dictionary = ${root}/preclinical/cell-lines.pkl \ No newline at end of file diff --git a/moalmanac/reader.py b/moalmanac/reader.py index c935ff7..ce61219 100644 --- a/moalmanac/reader.py +++ b/moalmanac/reader.py @@ -1,9 +1,38 @@ +import configparser import json import pandas as pd import pickle -class Reader(object): +class Ini: + @classmethod + def read(cls, path, extended_interpolation=False, convert_to_dictionary=False): + ini = cls.load(path, extended_interpolation=extended_interpolation) + if convert_to_dictionary: + return cls.convert_ini_to_dictionary(ini) + else: + return ini + + @staticmethod + def convert_ini_to_dictionary(ini): + dictionary = {} + for section in ini.sections(): + dictionary[section] = {} + for (key, value) in ini.items(section): + dictionary[section][key] = value + return dictionary + + @staticmethod + def load(path, extended_interpolation=False): + if extended_interpolation: + config = configparser.ConfigParser(interpolation=configparser.ExtendedInterpolation()) + else: + config = configparser.ConfigParser() + config.read(path) + return config + + +class Reader: @staticmethod def check_comment_rows(handle, comment_character): skip_rows = 0 diff --git a/moalmanac/reporter.py b/moalmanac/reporter.py index bb48023..21f8828 100644 --- a/moalmanac/reporter.py +++ b/moalmanac/reporter.py @@ -5,7 +5,6 @@ import os from config import COLNAMES -from config import CONFIG class Reporter: @@ -23,23 +22,22 @@ class Reporter: ms_status = COLNAMES[report_section]['ms_status'] @classmethod - def drop_double_fusion(cls, dataframe): + def drop_double_fusion(cls, dataframe, biomarker_type_string): feature_type = COLNAMES[cls.report_section]['feature_type'] alt = COLNAMES[cls.report_section]['alteration'] - rearrangement = CONFIG['feature_types']['fusion'] - idx_rearrangement = dataframe[dataframe[feature_type].eq(rearrangement)].index + idx_rearrangement = dataframe[dataframe[feature_type].eq(biomarker_type_string)].index idx_rearrangement_keep = dataframe.loc[idx_rearrangement, :].drop_duplicates([alt], keep='first').index idx_rearrangement_drop = idx_rearrangement.difference(idx_rearrangement_keep) idx_keep = dataframe.index.difference(idx_rearrangement_drop) return dataframe.loc[idx_keep, :] @classmethod - def format_alterations(cls, dataframe): + def format_alterations(cls, dataframe, config): if dataframe.empty: return dataframe - dataframe = cls.drop_double_fusion(dataframe) + dataframe = cls.drop_double_fusion(dataframe, biomarker_type_string=config['feature_types']['fusion']) lookup = COLNAMES['datasources'] columns = [lookup['sensitivity'], lookup['resistance'], lookup['prognosis']] @@ -73,7 +71,7 @@ def format_clinical_columns(series, convert_to_float=False): return series @classmethod - def generate_actionability_report(cls, actionable, report_dictionary, similarity=None, output_directory=None): + def generate_actionability_report(cls, actionable, report_dictionary, config, similarity=None, output_directory=None): report = ActionabilityReport() report.add_metadata( name=report_dictionary['patient_id'], @@ -89,13 +87,13 @@ def generate_actionability_report(cls, actionable, report_dictionary, similarity msi=report_dictionary['microsatellite_status'] ) - versions = cls.generate_version_dictionary() + versions = cls.generate_version_dictionary(config) report.add_versions( software=versions['software'], database=versions['database'] ) - actionable = cls.format_alterations(actionable) + actionable = cls.format_alterations(dataframe=actionable, config=config) report.add_alterations(actionable) report.add_similar_profiles(similarity) @@ -120,10 +118,10 @@ def generate_date(): return datetime.date.today().strftime("%b %d %Y") @classmethod - def generate_version_dictionary(cls): + def generate_version_dictionary(cls, config): version_section = 'versions' - software_version = CONFIG[version_section]['interpreter'] - database_version = CONFIG[version_section]['database'] + software_version = config[version_section]['interpreter'] + database_version = config[version_section]['database'] return { 'software': software_version, 'database': database_version diff --git a/moalmanac/run_deconstructsigs.R b/moalmanac/run_deconstructsigs.R deleted file mode 100644 index 89900a3..0000000 --- a/moalmanac/run_deconstructsigs.R +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/Rscript -library("deconstructSigs") - -args = commandArgs(trailingOnly=TRUE) -patient_id = args[1] -snv_handle = args[2] -sample = args[3] -ref = args[4] -alt = args[5] -chr = args[6] -pos = args[7] -folder = args[8] - -maf = read.csv(snv_handle, sep = '\t', comment.char = '#') -names(maf) <- tolower(names(maf)) -cols = c(sample, ref, alt, chr, pos) -maf <- maf[colnames(maf) %in% cols] - -maf$tumor_sample_barcode <- sapply(maf$tumor_sample_barcode, as.factor) -maf$reference_allele <- sapply(maf$reference_allele, as.factor) -maf$tumor_seq_allele2 <- sapply(maf$tumor_seq_allele2, as.factor) -maf$chromosome <- sapply(maf$chromosome, as.factor) - -unique.samples = unique(maf$tumor_sample_barcode) - -sigs.input <- mut.to.sigs.input(mut.ref = maf, - sample.id = sample, chr = chr, - pos = pos, ref = ref, - alt = alt) - -temp.filename <- paste(folder, patient_id, ".sigs.context.txt", sep = "") -write.table(sigs.input, file = temp.filename, sep = '\t', row.names = FALSE) - -for (sample_ in unique.samples) { - output.sigs <- whichSignatures(tumor.ref = sigs.input, - signatures.ref = signatures.cosmic, sample.id = sample_, - context = TRUE, tri.counts.method = 'default') - - temp.filename = paste(folder, patient_id, ".sigs.cosmic.txt", sep = "") - write.table(output.sigs, file = temp.filename, sep = '\t', row.names = FALSE) -} diff --git a/moalmanac/run_example.py b/moalmanac/run_example.py index a8400f5..75718b4 100644 --- a/moalmanac/run_example.py +++ b/moalmanac/run_example.py @@ -1,9 +1,13 @@ import moalmanac import os -import time import subprocess +import time + +from datetime import date -patient_dict = { +from reader import Ini + +metadata_dictionary = { 'patient_id': 'example', 'reported_tumor_type': 'MEL', 'stage': 'Metastatic', @@ -14,7 +18,7 @@ 'microsatellite_status': 'msih' } -empty_dict = { +input_dictionary_empty = { 'snv_handle': '', 'indel_handle': '', 'bases_covered_handle': '', @@ -27,7 +31,7 @@ 'disable_matchmaking': False } -example_dict = { +input_dictionary = { 'snv_handle': '../example_data/example_patient.capture.somatic.snvs.maf', 'indel_handle': '../example_data/example_patient.capture.somatic.indels.maf', 'bases_covered_handle': '../example_data/example_patient.capture.somatic.coverage.txt', @@ -40,12 +44,21 @@ 'disable_matchmaking': False } +config_ini_path = "config.ini" +dbs_ini_path = "annotation-databases.ini" +dbs_preclinical_ini_path = "preclinical-databases.ini" + +config_ini = Ini.read(config_ini_path, extended_interpolation=False, convert_to_dictionary=False) +db_paths = Ini.read(dbs_ini_path, extended_interpolation=True, convert_to_dictionary=True) +preclinical_db_paths = Ini.read(dbs_preclinical_ini_path, extended_interpolation=True, convert_to_dictionary=True) + def execute_cmd(command): subprocess.call(command, shell=True) -output_directory = "example" +today = date.today().isoformat() +output_directory = f"{today}-example-outputs" if output_directory != "": cmd = f"mkdir -p {output_directory}" execute_cmd(cmd) @@ -53,7 +66,14 @@ def execute_cmd(command): output_directory = os.getcwd() start_time = time.time() -moalmanac.main(patient_dict, example_dict, output_directory) +moalmanac.main( + patient=metadata_dictionary, + inputs=input_dictionary, + output_folder=output_directory, + config=config_ini, + dbs=db_paths['databases'], + dbs_preclinical=preclinical_db_paths['preclinical'] +) end_time = time.time() time_statement = "Molecular Oncology Almanac runtime: %s seconds" % round((end_time - start_time), 4) diff --git a/moalmanac/simplified_input.py b/moalmanac/simplified_input.py index 5bdfc09..ba87257 100644 --- a/moalmanac/simplified_input.py +++ b/moalmanac/simplified_input.py @@ -17,7 +17,7 @@ import writer from config import COLNAMES -from config import CONFIG +from reader import Ini snv_handle = 'snv_handle' indel_handle = 'indel_handle' @@ -84,34 +84,41 @@ def subset_by_feature_type(dataframe): return somatic, germline -def main(patient, input_file, output_folder): - dbs = datasources.Datasources.generate_db_dict(CONFIG) +def main(patient, input_file, output_folder, config, dbs, dbs_preclinical=None): + metadata_dictionary = moalmanac.create_metadata_dictionary(patient) + output_folder = moalmanac.format_output_directory(output_folder) if output_folder != "": moalmanac.execute_cmd(f"mkdir -p {output_folder}") - string_id = patient[patient_id] + string_id = metadata_dictionary[patient_id] - mapped_ontology = ontologymapper.OntologyMapper.map(dbs, patient[tumor_type]) - patient[ontology] = mapped_ontology[ontology] - patient[code] = mapped_ontology[code] + mapped_ontology = ontologymapper.OntologyMapper.map(dbs, metadata_dictionary[tumor_type]) + metadata_dictionary[ontology] = mapped_ontology[ontology] + metadata_dictionary[code] = mapped_ontology[code] alterations = features.Simple.import_feature(input_file) - annotated_alterations = annotator.Annotator.annotate_simple(alterations, dbs, patient[code]) + annotated_alterations = annotator.Annotator.annotate_simple(alterations, dbs, patient[code], config=config) evaluated_alterations = evaluator.Evaluator.evaluate_somatic(annotated_alterations) evaluated_somatic, evaluated_germline = subset_by_feature_type(evaluated_alterations) evaluated_somatic = annotator.OverlapSomaticGermline.append_germline_hits(evaluated_somatic, evaluated_germline) integrated = evaluator.Integrative.evaluate(evaluated_somatic, evaluated_germline, dbs, feature_types) - somatic_burden = features.BurdenReader.import_feature('', patient, evaluated_somatic, dbs) - patient_wgd = features.Aneuploidy.summarize(patient[wgd]) - patient_ms_status = features.MicrosatelliteReader.summarize(patient[ms_status]) + somatic_burden = features.BurdenReader.import_feature( + handle='', + patient=metadata_dictionary, + variants=evaluated_somatic, + dbs=dbs, + config=config + ) + patient_wgd = features.Aneuploidy.summarize(patient[wgd], config=config) + patient_ms_status = features.MicrosatelliteReader.summarize(patient[ms_status], config=config) patient[ms_status] = features.MicrosatelliteReader.map_status(patient[ms_status]) - annotated_burden = annotator.Annotator.annotate_almanac(somatic_burden, dbs, patient[code]) - annotated_wgd = annotator.Annotator.annotate_almanac(patient_wgd, dbs, patient[code]) - annotated_ms_status = annotator.Annotator.annotate_almanac(patient_ms_status, dbs, patient[code]) + annotated_burden = annotator.Annotator.annotate_almanac(somatic_burden, dbs, patient[code], config=config) + annotated_wgd = annotator.Annotator.annotate_almanac(patient_wgd, dbs, patient[code], config=config) + annotated_ms_status = annotator.Annotator.annotate_almanac(patient_ms_status, dbs, patient[code], config=config) evaluated_burden = evaluator.Evaluator.evaluate_almanac(annotated_burden) evaluated_wgd = evaluator.Evaluator.evaluate_almanac(annotated_wgd) @@ -125,14 +132,14 @@ def main(patient, input_file, output_folder): ms_status=evaluated_ms_status, burden=evaluated_burden, signatures=features.Features.create_empty_dataframe(), - wgd=evaluated_wgd + wgd=evaluated_wgd, + config=config ) strategies = evaluator.Strategies.report_therapy_strategies(actionable) + function_toggle = config['function_toggle'] efficacy_summary = investigator.SummaryDataFrame.create_empty_dataframe() - efficacy_dictionary = {} - cell_lines_dictionary = {} preclinical_efficacy_on = TOGGLE_FEATURES.getboolean('calculate_preclinical_efficacy') # The input argument --disable_matchmaking will be removed in the next non-backwards compatible release @@ -146,10 +153,11 @@ def main(patient, input_file, output_folder): if preclinical_efficacy_on: plot_preclinical = TOGGLE_FEATURES.getboolean('plot_preclinical_efficacy') efficacy_results = moalmanac.process_preclinical_efficacy( - dbs_preclinical, - actionable, - output_folder, - string_id, + dbs=dbs_preclinical, + dataframe=actionable, + folder=output_folder, + label=string_id, + config=config, plot=plot_preclinical ) efficacy_dictionary = efficacy_results[0] @@ -162,8 +170,17 @@ def main(patient, input_file, output_folder): append_lookup=TOGGLE_FEATURES.getboolean('include_preclinical_efficacy_in_actionability_report') ) if model_similarity_on: - similarity_results = matchmaker.Matchmaker.compare(dbs, dbs_preclinical, evaluated_somatic, string_id) - similarity_summary = matchmaker.Report.create_report_dictionary(similarity_results, cell_lines_dictionary) + similarity_results = matchmaker.Matchmaker.compare( + dbs=dbs, + dbs_preclinical=dbs_preclinical, + somatic=evaluated_somatic, + case_sample_id=string_id, + config=config + ) + similarity_summary = matchmaker.Report.create_report_dictionary( + similarity_results, + cell_lines_dictionary + ) writer.Actionable.write(actionable, string_id, output_folder) writer.GermlineACMG.write(evaluated_germline, string_id, output_folder) @@ -177,13 +194,14 @@ def main(patient, input_file, output_folder): writer.PreclinicalEfficacy.write(efficacy_summary, string_id, output_folder) writer.PreclinicalMatchmaking.write(similarity_results, string_id, output_folder) - if TOGGLE_FEATURES.getboolean('generate_actionability_report'): - report_dictionary = reporter.Reporter.generate_dictionary(evaluated_somatic, patient) + if function_toggle.getboolean('generate_actionability_report'): + report_dictionary = reporter.Reporter.generate_dictionary(evaluated_somatic, metadata_dictionary) - include_similarity = TOGGLE_FEATURES.getboolean('include_model_similarity_in_actionability_report') + include_similarity = function_toggle.getboolean('include_model_similarity_in_actionability_report') reporter.Reporter.generate_actionability_report( - actionable, - report_dictionary, + actionable=actionable, + report_dictionary=report_dictionary, + config=config, similarity=similarity_summary if include_similarity else None, output_directory=output_folder ) @@ -194,33 +212,66 @@ def main(patient, input_file, output_folder): arg_parser = argparse.ArgumentParser(prog='Molecular Oncology Almanac using simplified input', description='Annotates only using the Molecular Oncology Almanac database') - arg_parser.add_argument('--patient_id', - help='patient id label', - required=True) - arg_parser.add_argument('--stage', - default='Unknown', - help='disease stage') - arg_parser.add_argument('--tumor_type', - default='Unknown', - help='reported tumor type') - arg_parser.add_argument('--input', - help='Tab delimited file of observed alterations') - arg_parser.add_argument('--ms_status', - default='unk', - choices=['msih', 'msil', 'mss', 'unk'], - help='microsatellite instability status') - arg_parser.add_argument('--purity', - default='Unknown', - help='Tumor purity') - arg_parser.add_argument('--ploidy', - default='Unknown', - help='Tumor ploidy') - arg_parser.add_argument('--wgd', - action='store_true', - help='Specify the occurrence of whole genome duplication') - arg_parser.add_argument('--output_directory', - default=None, - help='Output directory for generated files') + arg_parser.add_argument( + '--patient_id', + help='patient id label', + required=True + ) + arg_parser.add_argument( + '--stage', + default='Unknown', + help='disease stage' + ) + arg_parser.add_argument( + '--tumor_type', + default='Unknown', + help='reported tumor type' + ) + arg_parser.add_argument( + '--input', + help='Tab delimited file of observed alterations' + ) + arg_parser.add_argument( + '--ms_status', + default='unk', + choices=['msih', 'msil', 'mss', 'unk'], + help='microsatellite instability status' + ) + arg_parser.add_argument( + '--purity', + default='Unknown', + help='Tumor purity' + ) + arg_parser.add_argument( + '--ploidy', + default='Unknown', + help='Tumor ploidy' + ) + arg_parser.add_argument( + '--wgd', + action='store_true', + help='Specify the occurrence of whole genome duplication' + ) + arg_parser.add_argument( + '--output_directory', + default=None, + help='Output directory for generated files' + ) + arg_parser.add_argument( + '--config', '-c', + required=True, + help='ini file that contains configuration details' + ) + arg_parser.add_argument( + '--dbs', + required=True, + help='ini file that contains database paths ' + ) + arg_parser.add_argument( + '--preclinical-dbs', + required=False, + help='ini file that contains preclinical file paths' + ) args = arg_parser.parse_args() patient_dict = { @@ -236,7 +287,22 @@ def main(patient, input_file, output_folder): output_directory = args.output_directory if args.output_directory else os.getcwd() - main(patient_dict, args.input, output_directory) + config_ini = Ini.read(args.config, extended_interpolation=False, convert_to_dictionary=False) + + db_paths = Ini.read(args.dbs, extended_interpolation=True, convert_to_dictionary=True) + if args.preclinical_dbs: + preclinical_db_paths = Ini.read(args.preclinical_dbs, extended_interpolation=True, convert_to_dictionary=True) + else: + preclinical_db_paths = None + + main( + patient=patient_dict, + input_file=args.input, + output_folder=output_directory, + config=config_ini, + dbs=db_paths, + dbs_preclinical=preclinical_db_paths + ) end_time = time.time() time_statement = "Molecular Oncology Almanac runtime: %s seconds" % round((end_time - start_time), 4) diff --git a/moalmanac/test/README.md b/moalmanac/test/README.md new file mode 100644 index 0000000..e69de29 diff --git a/moalmanac/test/annotator_tests.py b/moalmanac/test/annotator_tests.py index 286028b..5a43b13 100644 --- a/moalmanac/test/annotator_tests.py +++ b/moalmanac/test/annotator_tests.py @@ -9,7 +9,6 @@ from datasources import Preclinical as datasources_Preclinical from features import Features from investigator import SensitivityDictionary -from config import CONFIG class UnitTestAnnotator(unittest.TestCase): @@ -58,7 +57,9 @@ def test_annotate(self): gene = ACMG.gene bin_name = ACMG.bin_name df = pd.DataFrame({gene: ['TP53', 'FOO', 'PMS2', 'TSC1', 'AR']}) - dbs = Datasources.generate_db_dict(CONFIG) + dbs = { + 'acmg_handle': '../datasources/acmg/acmg.secondaryfindings.v3.txt' + } annotated = ACMG.annotate(df, dbs) expected_result = pd.Series([1, 0, 1, 1, 0], name=bin_name) @@ -161,7 +162,7 @@ def test_update_series_with_best_match(self): "doi": "10.1126/science.1062538", "pmid": "11423618", "nct": '', "last_updated": "6/13/19", "feature_display": "ABL1 p.T315I (Missense)", "predictive_implication_map": 1.0}, {}] - somatic_variant = Almanac.somatic_variant + somatic_variant = 'Somatic Variant' series = pd.Series(dtype=object) for columns in [Almanac.column_map_sensitive, Almanac.column_map_resistance, Almanac.column_map_prognostic]: @@ -194,9 +195,9 @@ def test_append_exac_af(self): alt = ExAC.alt af = ExAC.af feature_type = Features.feature_type - somatic = CONFIG['feature_types']['mut'] - germline = CONFIG['feature_types']['germline'] - cn = CONFIG['feature_types']['cna'] + somatic = 'Somatic Variant' + germline = 'Germline Variant' + cn = 'Copy Number' df = pd.DataFrame({chr: [1, 2, 3, 1], start: [100, 101, 103, 100], @@ -209,13 +210,18 @@ def test_append_exac_af(self): ref: ["C", "A", "T"], alt: ["G", "G", "G"], af: [1, 0.5, 0.001]}) - result = ExAC.append_exac_af(df, exac, [chr, start, ref, alt, af]) + biomarker_types = [somatic, germline] + result = ExAC.append_exac_af( + df=df, + ds=exac, + ds_columns=[chr, start, ref, alt, af], + variant_biomarker_types=biomarker_types) self.assertEqual([1, 0, 0, 0], result[af].tolist()) def test_annotate_common_af(self): - exac_common_threshold = ExAC.exac_common_threshold + exac_common_threshold = 0.001 series = pd.Series([float(exac_common_threshold) - 0.01, float(exac_common_threshold) + 0.01]) - result = ExAC.annotate_common_af(series) + result = ExAC.annotate_common_af(series, threshold=exac_common_threshold) self.assertEqual(0.0, result.loc[0]) self.assertEqual(1.0, result.loc[1]) @@ -245,7 +251,11 @@ class UnitTestValidation(unittest.TestCase): }) def test_append_validation(self): - result = OverlapValidation.append_validation(UnitTestValidation.dataframe1, UnitTestValidation.dataframe2) + result = OverlapValidation.append_validation( + UnitTestValidation.dataframe1, + UnitTestValidation.dataframe2, + biomarker_type='Somatic Variant' + ) result = result.fillna('') self.assertEqual(UnitTestValidation.dataframe1['feature'].tolist(), result['feature'].tolist()) self.assertEqual([0.20, '', 0.66, 0.0], result['validation_tumor_f'].tolist()) @@ -284,7 +294,7 @@ def test_get_mutation_index(self): dataframe = pd.DataFrame(['Somatic Variant', 'bar', 'foo'], columns=[OverlapValidation.feature_type]) solution = ['Somatic Variant'] solution_index = pd.Index([0]) - result = OverlapValidation.get_mutation_index(dataframe) + result = OverlapValidation.get_mutation_index(dataframe, biomarker_type='Somatic Variant') self.assertEqual(solution[0], dataframe.loc[result[0], OverlapValidation.feature_type]) self.assertEqual(solution_index[0], result[0]) @@ -327,8 +337,26 @@ class UnitTestPreclinicalEfficacy(unittest.TestCase): 'pvalue_mww': [2.322E-12, 7.627E-17, 0.835] } df2 = pd.DataFrame(data_dictionary, index=[0, 1, 2]) - dbs_preclinical = datasources_Preclinical.import_dbs() - efficacy_dictionary = SensitivityDictionary.create(dbs_preclinical, df1) + dbs_dictionary = { + 'almanac_gdsc_mappings': '../datasources/preclinical/formatted/almanac-gdsc-mappings.json', + 'summary': '../datasources/preclinical/formatted/cell-lines.summary.txt', + 'variants': '../datasources/preclinical/annotated/cell-lines.somatic-variants.annotated.txt', + 'copynumbers': '../datasources/preclinical/annotated/cell-lines.copy-numbers.annotated.txt', + 'fusions': '../datasources/preclinical/annotated/cell-lines.fusions.annotated.txt', + 'fusions1': '../datasources/preclinical/annotated/cell-lines.fusions.annotated.gene1.txt', + 'fusions2': '../datasources/preclinical/annotated/cell-lines.fusions.annotated.gene2.txt', + 'gdsc': '../datasources/preclinical/formatted/sanger.gdsc.txt', + 'dictionary': '../datasources/preclinical/cell-lines.pkl' + } + config = { + 'feature_types': { + 'mut': 'Somatic Variant', + 'cna': 'Copy Number', + 'fusion': 'Rearrangement' + } + } + dbs_preclinical = datasources_Preclinical.import_dbs(dbs_dictionary) + efficacy_dictionary = SensitivityDictionary.create(dbs_preclinical, df1, config=config) def test_annotate(self): result = PreclinicalEfficacy.annotate( @@ -352,13 +380,27 @@ def test_series_for_significance(self): class UnitTestPreclinicalMatchmaking(unittest.TestCase): def test_annotate_copy_numbers(self): - dbs = Datasources.generate_db_dict(CONFIG) + dbs = { + 'almanac_handle': '../datasources/moalmanac/molecular-oncology-almanac.json', + 'cancerhotspots_handle': '../datasources/cancerhotspots/hotspots_v2.txt', + '3dcancerhotspots_handle': '../datasources/cancerhotspots/hotspots3d.txt', + 'cgc_handle': '../datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', + 'cosmic_handle': '../datasources/cosmic/CosmicMutantExport_v97.lite.txt', + 'gsea_pathways_handle': '../datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', + 'gsea_modules_handle': '../datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', + 'exac_handle': '../datasources/exac/exac.expanded.r1.txt', + 'acmg_handle': '../datasources/acmg/acmg.secondaryfindings.v3.txt', + 'clinvar_handle': '../datasources/clinvar/variant_summary.lite.txt', + 'hereditary_handle': '../datasources/hereditary/hereditary.txt', + 'oncotree_handle': '../datasources/oncotree/oncotree.2023-03-09.txt', + 'lawrence_handle': '../datasources/lawrence/lawrence_mapped_ontology.txt' + } feature = PreclinicalMatchmaking.feature feature_type = PreclinicalMatchmaking.feature_type alteration_type = PreclinicalMatchmaking.alteration_type alteration = PreclinicalMatchmaking.alteration - copy_number = PreclinicalMatchmaking.copy_number + copy_number = 'Copy Number' df = pd.DataFrame({ feature: ['CDKN2A', 'CDKN2A', 'KRAS'], @@ -366,7 +408,7 @@ def test_annotate_copy_numbers(self): }) df[feature_type] = copy_number df[alteration] = pd.NA - result = PreclinicalMatchmaking.annotate_copy_numbers(df, dbs) + result = PreclinicalMatchmaking.annotate_copy_numbers(df, dbs, biomarker_type_string=copy_number) expected_cdkn2a_del = { 'feature_match_1': 1, @@ -420,12 +462,26 @@ def test_annotate_copy_numbers(self): self.assertEqual(result.loc[2, key], expected_kras_amp[key]) def test_annotate_fusions(self): - dbs = Datasources.generate_db_dict(CONFIG) + dbs = { + 'almanac_handle': '../datasources/moalmanac/molecular-oncology-almanac.json', + 'cancerhotspots_handle': '../datasources/cancerhotspots/hotspots_v2.txt', + '3dcancerhotspots_handle': '../datasources/cancerhotspots/hotspots3d.txt', + 'cgc_handle': '../datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', + 'cosmic_handle': '../datasources/cosmic/CosmicMutantExport_v97.lite.txt', + 'gsea_pathways_handle': '../datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', + 'gsea_modules_handle': '../datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', + 'exac_handle': '../datasources/exac/exac.expanded.r1.txt', + 'acmg_handle': '../datasources/acmg/acmg.secondaryfindings.v3.txt', + 'clinvar_handle': '../datasources/clinvar/variant_summary.lite.txt', + 'hereditary_handle': '../datasources/hereditary/hereditary.txt', + 'oncotree_handle': '../datasources/oncotree/oncotree.2023-03-09.txt', + 'lawrence_handle': '../datasources/lawrence/lawrence_mapped_ontology.txt' + } feature = PreclinicalMatchmaking.feature feature_type = PreclinicalMatchmaking.feature_type alteration_type = PreclinicalMatchmaking.alteration_type partner = PreclinicalMatchmaking.partner - fusion = PreclinicalMatchmaking.fusion + fusion = 'Rearrangement' model_id = PreclinicalMatchmaking.model_id df = pd.DataFrame({ @@ -435,7 +491,7 @@ def test_annotate_fusions(self): df[alteration_type] = 'Fusion' df[model_id] = 'case' - result, group1, group2 = PreclinicalMatchmaking.annotate_fusions(df, dbs) + result, group1, group2 = PreclinicalMatchmaking.annotate_fusions(df, dbs, biomarker_type_string=fusion) expected_index_0 = { 'feature_match_1': 1, @@ -668,12 +724,27 @@ def test_annotate_fusions(self): self.assertEqual(group2.loc[1, 'gsea_modules_bin'], expected_index_1_group2['gsea_modules_bin']) def test_annotate_fusions_matching(self): - dbs = Datasources.generate_db_dict(CONFIG) + dbs = { + 'almanac_handle': '../datasources/moalmanac/molecular-oncology-almanac.json', + 'cancerhotspots_handle': '../datasources/cancerhotspots/hotspots_v2.txt', + '3dcancerhotspots_handle': '../datasources/cancerhotspots/hotspots3d.txt', + 'cgc_handle': '../datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', + 'cosmic_handle': '../datasources/cosmic/CosmicMutantExport_v97.lite.txt', + 'gsea_pathways_handle': '../datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', + 'gsea_modules_handle': '../datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', + 'exac_handle': '../datasources/exac/exac.expanded.r1.txt', + 'acmg_handle': '../datasources/acmg/acmg.secondaryfindings.v3.txt', + 'clinvar_handle': '../datasources/clinvar/variant_summary.lite.txt', + 'hereditary_handle': '../datasources/hereditary/hereditary.txt', + 'oncotree_handle': '../datasources/oncotree/oncotree.2023-03-09.txt', + 'lawrence_handle': '../datasources/lawrence/lawrence_mapped_ontology.txt' + } + feature = PreclinicalMatchmaking.feature feature_type = PreclinicalMatchmaking.feature_type alteration_type = PreclinicalMatchmaking.alteration_type partner = PreclinicalMatchmaking.partner - fusion = PreclinicalMatchmaking.fusion + fusion = 'Rearrangement' model_id = PreclinicalMatchmaking.model_id evidence_map_str = PreclinicalMatchmaking.evidence_map_str merged = PreclinicalMatchmaking.merged @@ -715,13 +786,27 @@ def test_annotate_fusions_matching(self): self.assertEqual(result.loc[index, key], value) def test_annotate_somatic_variants(self): - dbs = Datasources.generate_db_dict(CONFIG) + dbs = { + 'almanac_handle': '../datasources/moalmanac/molecular-oncology-almanac.json', + 'cancerhotspots_handle': '../datasources/cancerhotspots/hotspots_v2.txt', + '3dcancerhotspots_handle': '../datasources/cancerhotspots/hotspots3d.txt', + 'cgc_handle': '../datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', + 'cosmic_handle': '../datasources/cosmic/CosmicMutantExport_v97.lite.txt', + 'gsea_pathways_handle': '../datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', + 'gsea_modules_handle': '../datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', + 'exac_handle': '../datasources/exac/exac.expanded.r1.txt', + 'acmg_handle': '../datasources/acmg/acmg.secondaryfindings.v3.txt', + 'clinvar_handle': '../datasources/clinvar/variant_summary.lite.txt', + 'hereditary_handle': '../datasources/hereditary/hereditary.txt', + 'oncotree_handle': '../datasources/oncotree/oncotree.2023-03-09.txt', + 'lawrence_handle': '../datasources/lawrence/lawrence_mapped_ontology.txt' + } feature = PreclinicalMatchmaking.feature feature_type = PreclinicalMatchmaking.feature_type alteration_type = PreclinicalMatchmaking.alteration_type alteration = PreclinicalMatchmaking.alteration - somatic_variant = PreclinicalMatchmaking.somatic_variant + somatic_variant = 'Somatic Variant' df = pd.DataFrame({ feature: ['BRAF', 'BRAF', 'IDH1', 'CDKN2A'], @@ -730,7 +815,7 @@ def test_annotate_somatic_variants(self): }) df[feature_type] = somatic_variant - result = PreclinicalMatchmaking.annotate_somatic_variants(df, dbs) + result = PreclinicalMatchmaking.annotate_somatic_variants(df, dbs, biomarker_type_string=somatic_variant) expected_braf_1 = { 'feature_match_1': 1, @@ -816,11 +901,25 @@ def test_annotate_match_1(self): def test_annotate_match_2(self): match_2 = PreclinicalMatchmaking.match_2 - dbs = Datasources.generate_db_dict(CONFIG) + dbs = { + 'almanac_handle': '../datasources/moalmanac/molecular-oncology-almanac.json', + 'cancerhotspots_handle': '../datasources/cancerhotspots/hotspots_v2.txt', + '3dcancerhotspots_handle': '../datasources/cancerhotspots/hotspots3d.txt', + 'cgc_handle': '../datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', + 'cosmic_handle': '../datasources/cosmic/CosmicMutantExport_v97.lite.txt', + 'gsea_pathways_handle': '../datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', + 'gsea_modules_handle': '../datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', + 'exac_handle': '../datasources/exac/exac.expanded.r1.txt', + 'acmg_handle': '../datasources/acmg/acmg.secondaryfindings.v3.txt', + 'clinvar_handle': '../datasources/clinvar/variant_summary.lite.txt', + 'hereditary_handle': '../datasources/hereditary/hereditary.txt', + 'oncotree_handle': '../datasources/oncotree/oncotree.2023-03-09.txt', + 'lawrence_handle': '../datasources/lawrence/lawrence_mapped_ontology.txt' + } almanac = datasource_Almanac.import_ds(dbs) - copy_number = PreclinicalMatchmaking.copy_number - fusion = PreclinicalMatchmaking.fusion - somatic_variant = PreclinicalMatchmaking.somatic_variant + copy_number = 'Copy Number' + fusion = 'Rearrangement' + somatic_variant = 'Somatic Variant' feature = PreclinicalMatchmaking.feature alteration_type = PreclinicalMatchmaking.alteration_type @@ -880,11 +979,25 @@ def test_annotate_match_2(self): def test_annotate_match_3(self): match_3 = PreclinicalMatchmaking.match_3 - dbs = Datasources.generate_db_dict(CONFIG) + dbs = { + 'almanac_handle': '../datasources/moalmanac/molecular-oncology-almanac.json', + 'cancerhotspots_handle': '../datasources/cancerhotspots/hotspots_v2.txt', + '3dcancerhotspots_handle': '../datasources/cancerhotspots/hotspots3d.txt', + 'cgc_handle': '../datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', + 'cosmic_handle': '../datasources/cosmic/CosmicMutantExport_v97.lite.txt', + 'gsea_pathways_handle': '../datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', + 'gsea_modules_handle': '../datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', + 'exac_handle': '../datasources/exac/exac.expanded.r1.txt', + 'acmg_handle': '../datasources/acmg/acmg.secondaryfindings.v3.txt', + 'clinvar_handle': '../datasources/clinvar/variant_summary.lite.txt', + 'hereditary_handle': '../datasources/hereditary/hereditary.txt', + 'oncotree_handle': '../datasources/oncotree/oncotree.2023-03-09.txt', + 'lawrence_handle': '../datasources/lawrence/lawrence_mapped_ontology.txt' + } almanac = datasource_Almanac.import_ds(dbs) - copy_number = PreclinicalMatchmaking.copy_number - fusion = PreclinicalMatchmaking.fusion - somatic_variant = PreclinicalMatchmaking.somatic_variant + copy_number = 'Copy Number' + fusion = 'Rearrangement' + somatic_variant = 'Somatic Variant' feature = PreclinicalMatchmaking.feature alteration_type = PreclinicalMatchmaking.alteration_type @@ -953,10 +1066,25 @@ def test_annotate_match_3(self): def test_annotate_match_4(self): match_4 = PreclinicalMatchmaking.match_4 - dbs = Datasources.generate_db_dict(CONFIG) + dbs = { + 'almanac_handle': '../datasources/moalmanac/molecular-oncology-almanac.json', + 'cancerhotspots_handle': '../datasources/cancerhotspots/hotspots_v2.txt', + '3dcancerhotspots_handle': '../datasources/cancerhotspots/hotspots3d.txt', + 'cgc_handle': '../datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', + 'cosmic_handle': '../datasources/cosmic/CosmicMutantExport_v97.lite.txt', + 'gsea_pathways_handle': '../datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', + 'gsea_modules_handle': '../datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', + 'exac_handle': '../datasources/exac/exac.expanded.r1.txt', + 'acmg_handle': '../datasources/acmg/acmg.secondaryfindings.v3.txt', + 'clinvar_handle': '../datasources/clinvar/variant_summary.lite.txt', + 'hereditary_handle': '../datasources/hereditary/hereditary.txt', + 'oncotree_handle': '../datasources/oncotree/oncotree.2023-03-09.txt', + 'lawrence_handle': '../datasources/lawrence/lawrence_mapped_ontology.txt' + } almanac = datasource_Almanac.import_ds(dbs) - fusion = PreclinicalMatchmaking.fusion - somatic_variant = PreclinicalMatchmaking.somatic_variant + copy_number = 'Copy Number' + fusion = 'Rearrangement' + somatic_variant = 'Somatic Variant' feature = PreclinicalMatchmaking.feature alteration_type = PreclinicalMatchmaking.alteration_type @@ -1002,11 +1130,25 @@ def test_annotate_match_4(self): self.assertEqual(result.loc[3, match_4], result.loc[3, 'expectation1']) def test_format_db(self): - dbs = Datasources.generate_db_dict(CONFIG) + dbs = { + 'almanac_handle': '../datasources/moalmanac/molecular-oncology-almanac.json', + 'cancerhotspots_handle': '../datasources/cancerhotspots/hotspots_v2.txt', + '3dcancerhotspots_handle': '../datasources/cancerhotspots/hotspots3d.txt', + 'cgc_handle': '../datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', + 'cosmic_handle': '../datasources/cosmic/CosmicMutantExport_v97.lite.txt', + 'gsea_pathways_handle': '../datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', + 'gsea_modules_handle': '../datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', + 'exac_handle': '../datasources/exac/exac.expanded.r1.txt', + 'acmg_handle': '../datasources/acmg/acmg.secondaryfindings.v3.txt', + 'clinvar_handle': '../datasources/clinvar/variant_summary.lite.txt', + 'hereditary_handle': '../datasources/hereditary/hereditary.txt', + 'oncotree_handle': '../datasources/oncotree/oncotree.2023-03-09.txt', + 'lawrence_handle': '../datasources/lawrence/lawrence_mapped_ontology.txt' + } almanac = datasource_Almanac.import_ds(dbs) - copy_number = PreclinicalMatchmaking.copy_number - fusion = PreclinicalMatchmaking.fusion - somatic_variant = PreclinicalMatchmaking.somatic_variant + copy_number = 'Copy Number' + fusion = 'Rearrangement' + somatic_variant = 'Somatic Variant' feature = PreclinicalMatchmaking.feature alteration_type = PreclinicalMatchmaking.alteration_type diff --git a/moalmanac/test/datasources_tests.py b/moalmanac/test/datasources_tests.py index 8e5a8db..7d95676 100644 --- a/moalmanac/test/datasources_tests.py +++ b/moalmanac/test/datasources_tests.py @@ -28,7 +28,18 @@ def test_import_dbs(self): fusions = Preclinical.fusions gdsc = Preclinical.gdsc mappings = Preclinical.mappings - dbs = Preclinical.import_dbs() + dbs_dictionary = { + 'almanac_gdsc_mappings': '../datasources/preclinical/formatted/almanac-gdsc-mappings.json', + 'summary': '../datasources/preclinical/formatted/cell-lines.summary.txt', + 'variants': '../datasources/preclinical/annotated/cell-lines.somatic-variants.annotated.txt', + 'copynumbers': '../datasources/preclinical/annotated/cell-lines.copy-numbers.annotated.txt', + 'fusions': '../datasources/preclinical/annotated/cell-lines.fusions.annotated.txt', + 'fusions1': '../datasources/preclinical/annotated/cell-lines.fusions.annotated.gene1.txt', + 'fusions2': '../datasources/preclinical/annotated/cell-lines.fusions.annotated.gene2.txt', + 'gdsc': '../datasources/preclinical/formatted/sanger.gdsc.txt', + 'dictionary': '../datasources/preclinical/cell-lines.pkl' + } + dbs = Preclinical.import_dbs(dbs_dictionary) for label in [summary, variants, cnas, fusions, gdsc]: self.assertEqual(type(dbs[label]), type(pd.DataFrame())) diff --git a/moalmanac/test/evaluator_tests.py b/moalmanac/test/evaluator_tests.py index afceb40..4c27b4b 100644 --- a/moalmanac/test/evaluator_tests.py +++ b/moalmanac/test/evaluator_tests.py @@ -110,32 +110,32 @@ def test_remap_almanac_bins(self): def test_remove_low_allele_fraction_variants(self): feature_type = Evaluator.feature_type - mut_type = Evaluator.mut_type - germline_type = Evaluator.germline_type + mut_type = 'Somatic Variant' + germline_type = 'Germline Variant' tumor_f = Evaluator.tumor_f - min_af = Evaluator.min_af + min_af = 0.05 low_af = float(min_af) - 0.01 high_af = float(min_af) + 0.01 df = pd.DataFrame({feature_type: [mut_type, mut_type, germline_type, germline_type, 'Aneuploidy'], tumor_f: [low_af, high_af, low_af, high_af, np.nan]}) - subsetted = Evaluator.remove_low_allele_fraction_variants(df) + subsetted = Evaluator.remove_low_allele_fraction_variants(df, minimum_allele_fraction=min_af) self.assertEqual([1, 3, 4], subsetted.index.tolist()) def test_remove_low_coverage_variants(self): feature_type = Evaluator.feature_type - mut_type = Evaluator.mut_type - germline_type = Evaluator.germline_type + mut_type = 'Somatic Variant' + germline_type = 'Germline Variant' coverage = Evaluator.coverage - min_coverage = Evaluator.min_coverage + min_coverage = 15 low_coverage = float(min_coverage) - 10 high_coverage = float(min_coverage) + 10 df = pd.DataFrame({feature_type: [mut_type, mut_type, germline_type, germline_type, 'Aneuploidy'], coverage: [low_coverage, high_coverage, low_coverage, high_coverage, np.nan]}) - subsetted = Evaluator.remove_low_coverage_variants(df) + subsetted = Evaluator.remove_low_coverage_variants(df, minimum_coverage=min_coverage) self.assertEqual([1, 3, 4], subsetted.index.tolist()) def test_remove_benign_variants(self): @@ -166,17 +166,17 @@ def test_create_string_list(self): self.assertEqual('the, quick, fox', Actionable.create_string_list(series)) def test_display_aneuploidy(self): - feature = Evaluator.aneuploidy_type + feature = Evaluator.feature df = pd.DataFrame({feature: ['A', 'B', 'C']}) idx = [0, 2] - series = Actionable.display_aneuploidy(df, idx, feature) + series = Actionable.display_aneuploidy(df, idx) self.assertEqual(['A', 'C'], series.tolist()) def test_display_burden(self): alt = Evaluator.alt df = pd.DataFrame({alt: ["10 mutations per Mb", "20", "30 mutations per Mb"]}) idx = [0, 2] - series = Actionable.display_burden(df, idx, alt) + series = Actionable.display_burden(df, idx) self.assertEqual(['10 mutations per Mb', '30 mutations per Mb'], series.tolist()) def test_display_copynumber(self): @@ -185,21 +185,21 @@ def test_display_copynumber(self): df = pd.DataFrame({feature: ['Foo', 'Bar', 'FooBar'], alt_type: ['Amp', 'Amp', 'Del']}) idx = [0, 2] - series = Actionable.display_copynumber(df, idx, feature, alt_type) + series = Actionable.display_copynumber(df, idx) self.assertEqual(['Foo Amp', 'FooBar Del'], series.tolist()) def test_display_fusion(self): alt = Evaluator.alt df = pd.DataFrame({alt: ['Foo--Bar', 'Bar--Foo', 'FooBar--Alpha']}) idx = [0, 2] - series = Actionable.display_fusion(df, idx, alt) + series = Actionable.display_fusion(df, idx) self.assertEqual(['Foo--Bar Fusion', 'FooBar--Alpha Fusion'], series.tolist()) def test_display_microsatellite_stability(self): feature = Evaluator.feature df = pd.DataFrame({feature: ['A', 'B', 'C']}) idx = [0, 2] - series = Actionable.display_microsatellite_stability(df, idx, feature) + series = Actionable.display_microsatellite_stability(df, idx) self.assertEqual(['A', 'C'], series.tolist()) def test_display_microsatellite_variants(self): @@ -208,7 +208,7 @@ def test_display_microsatellite_variants(self): df = pd.DataFrame({feature: ['Foo', '', 'Bar'], alt: ['Amp', '', 'Del']}) idx = [0, 2] - series = Actionable.display_microsatellite_variants(df, idx, feature, alt) + series = Actionable.display_microsatellite_variants(df, idx) self.assertEqual(['Foo: Amp', 'Bar: Del'], series.tolist()) def test_display_signature(self): @@ -216,7 +216,7 @@ def test_display_signature(self): alt = Evaluator.alt df = pd.DataFrame({feature: ['Signature 1', '', 'Signature 2'], alt: [0.523, '', 0.0145]}) idx = [0, 2] - series = Actionable.display_signature(df, idx, feature, alt) + series = Actionable.display_signature(df, idx) self.assertEqual(['Signature 1 (52%)', 'Signature 2 (1%)'], series.tolist()) @@ -228,7 +228,7 @@ def test_display_variant(self): alt_type: ['Missense', 'Nonsense', 'Frameshift'], alt: ['p.V600E', 'p.N500*', 'p.L151fs*']}) idx = [0, 2] - series = Actionable.display_variant(df, idx, feature, alt_type, alt) + series = Actionable.display_variant(df, idx) self.assertEqual(['Foo p.V600E (Missense)', 'FooBar p.L151fs* (Frameshift)'], series.tolist()) def test_format_variant_classification(self): diff --git a/moalmanac/test/features_tests.py b/moalmanac/test/features_tests.py index 4143d43..a591582 100644 --- a/moalmanac/test/features_tests.py +++ b/moalmanac/test/features_tests.py @@ -2,8 +2,8 @@ import pandas as pd from moalmanac import features -from config import CONFIG, COLNAMES - +from config import COLNAMES +from reader import Ini class UnitTestFeatures(unittest.TestCase): def test_annotate_feature_type(self): @@ -69,19 +69,30 @@ def test_create_column_map(self): self.assertEqual(column_map['call'], features.Features.alt_type) def test_filter_calls(self): + amp_string = 'Amplification' + del_string = 'Deletion' tmp = pd.Series(['Amplification', 'Deletion', '', 'Deletion']) - idx = features.CopyNumberCalled.filter_calls(tmp) + idx = features.CopyNumberCalled.filter_calls(series=tmp, amp_string=amp_string, del_string=del_string) self.assertEqual([0, 1, 3], idx[idx].index.tolist()) self.assertEqual([2], idx[~idx].index.tolist()) class UnitTestCopyNumberTotal(unittest.TestCase): def test_annotate_amp_del(self): + amp_string = 'Amplification' + del_string = 'Deletion' index = pd.Index([0, 1, 2]) index_amp = pd.Index([0]) index_del = pd.Index([2]) - expected = [features.CopyNumberTotal.amplification, '', features.CopyNumberTotal.deletion] - self.assertEqual(expected, features.CopyNumberTotal.annotate_amp_del(index, index_amp, index_del).tolist()) + expected = [amp_string, '', del_string] + result = features.CopyNumberTotal.annotate_amp_del( + idx=index, + idx_amp=index_amp, + idx_del=index_del, + amp_string=amp_string, + del_string=del_string + ) + self.assertEqual(expected, result.tolist()) def test_create_column_map(self): column_map = features.CopyNumberTotal.create_column_map() @@ -102,12 +113,20 @@ def test_drop_duplicate_genes(self): self.assertEqual(True, idx in features.CopyNumberTotal.drop_duplicate_genes(df)) def test_filter_by_threshold(self): + amp_string = 'Amplification' + del_string = 'Deletion' values = pd.Series(range(1, 101)) df = pd.DataFrame({features.Features.feature: values, features.Features.chr: values, features.Features.start: values, features.Features.segment_mean: values}) - accept, reject = features.CopyNumberTotal.filter_by_threshold(df, 97.5, 2.5) + accept, reject = features.CopyNumberTotal.filter_by_threshold( + df=df, + percentile_amp=97.5, + percentile_del=2.5, + amp_string=amp_string, + del_string=del_string + ) expected = pd.Index([0, 1, 2, 97, 98, 99]) for idx in expected: self.assertEqual(True, idx in accept.index) @@ -157,13 +176,17 @@ def test_subset_significant_signatures(self): class UnitTestFusion(unittest.TestCase): def test_create_column_map(self): - column_map = features.Fusion.create_colmap() + config = Ini.read('config.ini', extended_interpolation=False, convert_to_dictionary=False) + column_map = features.Fusion.create_colmap(config) + leftbreakpoint = 'leftbreakpoint' + rightbreakpoint = 'rightbreakpoint' + values = list(column_map.values()) self.assertEqual(4, len(column_map)) self.assertEqual(features.Features.feature, values[0]) self.assertEqual(features.Features.spanningfrags, values[1]) - self.assertEqual(features.Fusion.leftbreakpoint, values[2]) - self.assertEqual(features.Fusion.rightbreakpoint, values[3]) + self.assertEqual(leftbreakpoint, values[2]) + self.assertEqual(rightbreakpoint, values[3]) def test_filter_by_spanning_fragment_count(self): series = pd.Series([4, 5, 6]) diff --git a/moalmanac/test/investigator_tests.py b/moalmanac/test/investigator_tests.py index dc636db..30dc662 100644 --- a/moalmanac/test/investigator_tests.py +++ b/moalmanac/test/investigator_tests.py @@ -4,7 +4,7 @@ from datasources import Preclinical from investigator import Investigator, SensitivityDictionary - +from reader import Ini class UnitTestSensitivityDictionary(unittest.TestCase): def test_calculate_series_exp(self): @@ -54,7 +54,18 @@ def test_calculate_mann_whitney_u(self): self.assertTrue(math.isnan(statistic)) def test_create(self): - dbs = Preclinical.import_dbs() + dbs_paths = { + 'almanac_gdsc_mappings': '../datasources/preclinical/formatted/almanac-gdsc-mappings.json', + 'summary': '../datasources/preclinical/formatted/cell-lines.summary.txt', + 'variants': '../datasources/preclinical/annotated/cell-lines.somatic-variants.annotated.txt', + 'copynumbers': '../datasources/preclinical/annotated/cell-lines.copy-numbers.annotated.txt', + 'fusions': '../datasources/preclinical/annotated/cell-lines.fusions.annotated.txt', + 'fusions1': '../datasources/preclinical/annotated/cell-lines.fusions.annotated.gene1.txt', + 'fusions2': '../datasources/preclinical/annotated/cell-lines.fusions.annotated.gene2.txt', + 'gdsc': '../datasources/preclinical/formatted/sanger.gdsc.txt', + 'dictionary': '../datasources/preclinical/cell-lines.pkl' + } + dbs = Preclinical.import_dbs(dbs_paths) data_dictionary = { 'feature_type': ['Somatic Variant'], 'feature': ['BRAF'], @@ -67,7 +78,8 @@ def test_create(self): actionable = pd.DataFrame(data_dictionary, index=[0]) expected_dabrafenib = '2.322e-12' expected_trametinib = '2.344e-09' - result = SensitivityDictionary.create(dbs, actionable) + config = Ini.read('config.ini', extended_interpolation=False, convert_to_dictionary=False) + result = SensitivityDictionary.create(dbs, actionable, config) self.assertEqual(result[0]['Dabrafenib']['BRAF']['comparison']['pvalue_mww'], expected_dabrafenib) self.assertEqual(result[0]['Trametinib']['BRAF']['comparison']['pvalue_mww'], expected_trametinib) @@ -104,17 +116,27 @@ def test_generate_feature_strings(self): self.assertEqual(result, ['CDKN2A Copy Number Deletion', 'CDKN2A Copy Number', 'CDKN2A']) def test_select_split_function(self): - var = Investigator.feature_types['variant'] - cn = Investigator.feature_types['copy_number'] - fusion = Investigator.feature_types['fusion'] + config = Ini.read('config.ini', extended_interpolation=False, convert_to_dictionary=False) + var_string = config['feature_types']['mut'] + cn_string = config['feature_types']['cna'] + fusion_string = config['feature_types']['fusion'] var_function = SensitivityDictionary.split_samples_for_variants cn_function = SensitivityDictionary.split_samples_for_copy_numbers fusion_function = SensitivityDictionary.split_samples_for_fusions - self.assertEqual(SensitivityDictionary.select_split_function(var), var_function) - self.assertEqual(SensitivityDictionary.select_split_function(cn), cn_function) - self.assertEqual(SensitivityDictionary.select_split_function(fusion), fusion_function) + self.assertEqual( + SensitivityDictionary.select_split_function(var_string, var_string, cn_string, fusion_string), + var_function + ) + self.assertEqual( + SensitivityDictionary.select_split_function(cn_string, var_string, cn_string, fusion_string), + cn_function + ) + self.assertEqual( + SensitivityDictionary.select_split_function(fusion_string, var_string, cn_string, fusion_string), + fusion_function + ) def test_split_samples_by_wt_mut(self): gene = SensitivityDictionary.gene @@ -161,9 +183,11 @@ def test_split_samples_by_wt_mut(self): fusions: db_fusions } - results_variants = SensitivityDictionary.split_samples_by_wt_mut(data.loc[0, :], dbs, samples) - results_cnas = SensitivityDictionary.split_samples_by_wt_mut(data.loc[1, :], dbs, samples) - results_fusions = SensitivityDictionary.split_samples_by_wt_mut(data.loc[2, :], dbs, samples) + config = Ini.read('config.ini', extended_interpolation=False, convert_to_dictionary=False) + + results_variants = SensitivityDictionary.split_samples_by_wt_mut(data.loc[0, :], dbs, samples, config) + results_cnas = SensitivityDictionary.split_samples_by_wt_mut(data.loc[1, :], dbs, samples, config) + results_fusions = SensitivityDictionary.split_samples_by_wt_mut(data.loc[2, :], dbs, samples, config) self.assertEqual(results_variants['BRAF']['samples'][0], ['B', 'C', 'D']) self.assertEqual(results_variants['BRAF']['samples'][1], ['A', 'E']) self.assertEqual(results_variants['BRAF Somatic Variant']['samples'][0], ['E']) @@ -217,7 +241,8 @@ def test_split_samples_for_copy_numbers(self): cnas: db_cnas, } - results_cnas = SensitivityDictionary.split_samples_by_wt_mut(data.loc[1, :], dbs, samples) + config = Ini.read('config.ini', extended_interpolation=False, convert_to_dictionary=False) + results_cnas = SensitivityDictionary.split_samples_by_wt_mut(data.loc[1, :], dbs, samples, config) self.assertEqual(results_cnas['CDKN2A']['samples'][0], ['A', 'C', 'D', 'E']) self.assertEqual(results_cnas['CDKN2A']['samples'][1], ['B']) self.assertEqual(results_cnas['CDKN2A Copy Number']['samples'][0], ['A', 'D', 'E']) @@ -253,7 +278,8 @@ def test_split_samples_for_fusions(self): fusions: db_fusions } - results_fusions = SensitivityDictionary.split_samples_by_wt_mut(data.loc[2, :], dbs, samples) + config = Ini.read('config.ini', extended_interpolation=False, convert_to_dictionary=False) + results_fusions = SensitivityDictionary.split_samples_by_wt_mut(data.loc[2, :], dbs, samples, config) self.assertEqual(results_fusions['TMPRSS2']['samples'][0], ['A', 'B', 'E']) self.assertEqual(results_fusions['TMPRSS2']['samples'][1], ['C', 'D']) self.assertEqual(results_fusions['ERG']['samples'][0], ['A', 'B', 'D', 'E']) @@ -294,7 +320,8 @@ def test_split_samples_for_variants(self): variants: db_variants } - results_variants = SensitivityDictionary.split_samples_by_wt_mut(data.loc[0, :], dbs, samples) + config = Ini.read('config.ini', extended_interpolation=False, convert_to_dictionary=False) + results_variants = SensitivityDictionary.split_samples_by_wt_mut(data.loc[0, :], dbs, samples, config) self.assertEqual(results_variants['BRAF']['samples'][0], ['B', 'C', 'D']) self.assertEqual(results_variants['BRAF']['samples'][1], ['A', 'E']) self.assertEqual(results_variants['BRAF Somatic Variant']['samples'][0], ['E']) diff --git a/moalmanac/wrapper_deconstructsigs.sh b/moalmanac/wrapper_deconstructsigs.sh deleted file mode 100644 index 1b6307c..0000000 --- a/moalmanac/wrapper_deconstructsigs.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -patient_id=$1 -snv_handle=$2 -sample=$3 -ref=$4 -alt=$5 -chr=$6 -pos=$7 -folder=$8 - -Rscript run_deconstructsigs.R ${patient_id} ${snv_handle} ${sample} ${ref} ${alt} ${chr} ${pos} ${folder}