From 6e369efb23b98b5cfcb55e54801d7be1fab1bb96 Mon Sep 17 00:00:00 2001 From: Brendan Reardon Date: Wed, 3 Jul 2024 20:31:13 -0400 Subject: [PATCH 01/19] Refactored main function to pass config.ini as argument, instead of as global. DB paths separated into two separate ini files --- moalmanac/annotation-databases.ini | 14 + moalmanac/annotator.py | 161 ++++++------ moalmanac/config.ini | 38 +-- moalmanac/config.py | 8 - moalmanac/datasources.py | 59 +---- moalmanac/evaluator.py | 153 +++++------ moalmanac/features.py | 157 ++++++----- moalmanac/illustrator.py | 39 +-- moalmanac/investigator.py | 54 ++-- moalmanac/matchmaker.py | 43 +-- moalmanac/moalmanac.py | 392 +++++++++++++++++----------- moalmanac/preclinical-databases.ini | 10 + moalmanac/reader.py | 31 ++- moalmanac/reporter.py | 23 +- moalmanac/run_example.py | 27 +- 15 files changed, 638 insertions(+), 571 deletions(-) create mode 100644 moalmanac/annotation-databases.ini create mode 100644 moalmanac/preclinical-databases.ini diff --git a/moalmanac/annotation-databases.ini b/moalmanac/annotation-databases.ini new file mode 100644 index 0000000..65503a9 --- /dev/null +++ b/moalmanac/annotation-databases.ini @@ -0,0 +1,14 @@ +[databases] +almanac_handle = datasources/moalmanac/molecular-oncology-almanac.json +cancerhotspots_handle = datasources/cancerhotspots/hotspots_v2.txt +3dcancerhotspots_handle = datasources/cancerhotspots/hotspots3d.txt +cgc_handle = datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv +cosmic_handle = datasources/cosmic/CosmicMutantExport_v97.lite.txt +gsea_pathways_handle = datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt +gsea_modules_handle = datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt +exac_handle = datasources/exac/exac.expanded.r1.txt +acmg_handle = datasources/acmg/acmg.secondaryfindings.v3.txt +clinvar_handle = datasources/clinvar/variant_summary.lite.txt +hereditary_handle = datasources/hereditary/hereditary.txt +oncotree_handle = datasources/oncotree/oncotree.2023-03-09.txt +lawrence_handle = datasources/lawrence/lawrence_mapped_ontology.txt \ No newline at end of file diff --git a/moalmanac/annotator.py b/moalmanac/annotator.py index 2c1bf90..8bad42b 100644 --- a/moalmanac/annotator.py +++ b/moalmanac/annotator.py @@ -8,9 +8,6 @@ import features from config import COLNAMES -from config import CONFIG - -EXAC_CONFIG = CONFIG['exac'] class Annotator: @@ -45,28 +42,28 @@ def annotate(cls, df, dbs, importer, bin_name, comparison_columns): return df @classmethod - def annotate_almanac(cls, df, dbs, ontology): + def annotate_almanac(cls, df, dbs, ontology, config): df[cls.score_bin] = cls.preallocate_bin(cls.score_bin, df.index) - df = Almanac.annotate(df, dbs, ontology) + df = Almanac.annotate(df, dbs, ontology, config) return df @classmethod - def annotate_germline(cls, df, dbs, ontology): + def annotate_germline(cls, df, dbs, ontology, config): df[cls.score_bin] = cls.preallocate_bin(cls.score_bin, df.index) - df = Almanac.annotate(df, dbs, ontology) + df = Almanac.annotate(df, dbs, ontology, config) df = CancerHotspots.annotate(df, dbs) df = CancerGeneCensus.annotate(df, dbs) df = ACMG.annotate(df, dbs) df = ClinVar.annotate(df, dbs) df = Hereditary.annotate(df, dbs) - df = ExACExtended.annotate(df, dbs) + df = ExACExtended.annotate(df, dbs, config) df = MSI.annotate(df) return df @classmethod - def annotate_simple(cls, df, dbs, ontology): + def annotate_simple(cls, df, dbs, ontology, config): df[cls.score_bin] = cls.preallocate_bin(cls.score_bin, df.index) - df = Almanac.annotate(df, dbs, ontology) + df = Almanac.annotate(df, dbs, ontology, config) df = CancerHotspots.annotate(df, dbs) df = CancerHotspots3D.annotate(df, dbs) df = CancerGeneCensus.annotate(df, dbs) @@ -79,23 +76,23 @@ def annotate_simple(cls, df, dbs, ontology): return df @classmethod - def annotate_somatic(cls, df, dbs, ontology): + def annotate_somatic(cls, df, dbs, ontology, config): df[cls.score_bin] = cls.preallocate_bin(cls.score_bin, df.index) - df = Almanac.annotate(df, dbs, ontology) + df = Almanac.annotate(df, dbs, ontology, config) df = CancerHotspots.annotate(df, dbs) df = CancerHotspots3D.annotate(df, dbs) df = CancerGeneCensus.annotate(df, dbs) df = Cosmic.annotate(df, dbs) df = GSEACancerPathways.annotate(df, dbs) df = GSEACancerModules.annotate(df, dbs) - df = ExAC.annotate(df, dbs) + df = ExAC.annotate(df, dbs, config) df = MSI.annotate(df) return df @classmethod - def annotate_somatic_no_exac(cls, df, dbs, ontology): + def annotate_somatic_no_exac(cls, df, dbs, ontology, config): df[cls.score_bin] = cls.preallocate_bin(cls.score_bin, df.index) - df = Almanac.annotate(df, dbs, ontology) + df = Almanac.annotate(df, dbs, ontology, config) df = CancerHotspots.annotate(df, dbs) df = CancerHotspots3D.annotate(df, dbs) df = CancerGeneCensus.annotate(df, dbs) @@ -381,32 +378,21 @@ class Almanac: } } - feature_types_section = 'feature_types' - feature_types_config = CONFIG[feature_types_section] - aneuploidy = feature_types_config['aneuploidy'] - burden = feature_types_config['burden'] - copynumber_variant = feature_types_config['cna'] - fusion = feature_types_config['fusion'] - germline_variant = feature_types_config['germline'] - microsatellite_status = feature_types_config['microsatellite'] - signature = feature_types_config['signature'] - somatic_variant = feature_types_config['mut'] - @classmethod - def annotate(cls, df, dbs, ontology): + def annotate(cls, df, dbs, ontology, config): db = datasources.Almanac.import_ds(dbs) ds = db['content'] list_genes = db['genes'] annotation_function_dict = { - cls.aneuploidy: cls.annotate_aneuploidy, - cls.burden: cls.annotate_burden, - cls.copynumber_variant: cls.annotate_copy_number, - cls.fusion: cls.annotate_fusion, - cls.germline_variant: cls.annotate_variants, - cls.microsatellite_status: cls.annotate_microsatellite_stability, - cls.signature: cls.annotate_signatures, - cls.somatic_variant: cls.annotate_variants + config['feature_types']['aneuploidy']: cls.annotate_aneuploidy, + config['feature_types']['burden']: cls.annotate_burden, + config['feature_types']['cna']: cls.annotate_copy_number, + config['feature_types']['fusion']: cls.annotate_fusion, + config['feature_types']['germline']: cls.annotate_variants, + config['feature_types']['microsatellite']: cls.annotate_microsatellite_stability, + config['feature_types']['signature']: cls.annotate_signatures, + config['feature_types']['mut']: cls.annotate_variants } for feature_type, group in df.groupby(cls.feature_type): @@ -422,7 +408,13 @@ def annotate(cls, df, dbs, ontology): .astype(float) ) - if feature_type in [cls.somatic_variant, cls.germline_variant, cls.copynumber_variant, cls.fusion]: + simple_biomarkers = [ + config['feature_types']['mut'], + config['feature_types']['germline'], + config['feature_types']['cna'], + config['feature_types']['fusion'] + ] + if feature_type in simple_biomarkers: idx = group[cls.feature].isin(list_genes) df.loc[group[~idx].index, cls.bin_name] = 0 group = group[group[cls.feature].isin(list_genes)] @@ -981,18 +973,15 @@ class ExAC: af = datasources.ExAC.af bin_name = Annotator.exac_common_bin - exac_common_threshold = EXAC_CONFIG['exac_common_af_threshold'] str_columns = [chr, ref, alt] int_columns = [start] - somatic = CONFIG['feature_types']['mut'] - germline = CONFIG['feature_types']['germline'] feature_type = features.Features.feature_type @classmethod - def append_exac_af(cls, df, ds, ds_columns): - variants, not_variants = cls.subset_for_variants(df) + def append_exac_af(cls, df, ds, ds_columns, variant_biomarker_types): + variants, not_variants = cls.subset_for_variants(df, variant_biomarker_types) ds = ds.loc[:, ds_columns] for column, data_type in [(cls.str_columns, str), (cls.int_columns, float), (cls.int_columns, int)]: @@ -1017,15 +1006,24 @@ def append_exac_af(cls, df, ds, ds_columns): return result @classmethod - def annotate(cls, df, dbs): + def annotate(cls, df, dbs, config): df_dropped = cls.drop_existing_columns(df) ds = datasources.ExAC.import_ds(dbs) - df_annotated = cls.append_exac_af(df_dropped, ds, [cls.chr, cls.start, cls.ref, cls.alt, cls.af]) - df_annotated[cls.bin_name] = cls.annotate_common_af(df_annotated[cls.af]) + df_annotated = cls.append_exac_af( + df=df_dropped, + ds=ds, + ds_columns=[cls.chr, cls.start, cls.ref, cls.alt, cls.af], + variant_biomarker_types=[config['feature_types']['mut'], config['feature_types']['germline']] + ) + common_allele_frequency_threshold=config['exac']['exac_common_af_threshold'] + df_annotated[cls.bin_name] = cls.annotate_common_af( + series_exac_af=df_annotated[cls.af], + threshold=common_allele_frequency_threshold + ) return features.Features.preallocate_missing_columns(df_annotated) @classmethod - def annotate_common_af(cls, series_exac_af): + def annotate_common_af(cls, series_exac_af, threshold): if not series_exac_af.empty: series = pd.Series(float(0.0), index=series_exac_af.index.tolist()) condition = (series_exac_af @@ -1034,7 +1032,7 @@ def annotate_common_af(cls, series_exac_af): .astype(float).mean(axis=1) .fillna(0.0) ) - idx = condition.astype(float) >= float(cls.exac_common_threshold) + idx = condition.astype(float) >= float(threshold) series[idx] = float(1.0) return series else: @@ -1049,8 +1047,8 @@ def format_columns(cls, dataframe, column, data_type): return dataframe.loc[dataframe.index, column].astype(data_type) @classmethod - def subset_for_variants(cls, dataframe): - idx = dataframe[cls.feature_type].isin([cls.somatic, cls.germline]) + def subset_for_variants(cls, dataframe, variant_biomarker_types): + idx = dataframe[cls.feature_type].isin(variant_biomarker_types) return dataframe[idx].copy(), dataframe[~idx].copy() @@ -1083,11 +1081,20 @@ class ExACExtended: an_afr, an_amr, an_eas, an_fin, an_nfe, an_sas, an_oth] @classmethod - def annotate(cls, df, dbs): + def annotate(cls, df, dbs, config): df_dropped = ExAC.drop_existing_columns(df) ds = datasources.ExACExtended.import_ds(dbs) - df_annotated = ExAC.append_exac_af(df_dropped, ds, cls.ds_columns) - df_annotated[ExAC.bin_name] = ExAC.annotate_common_af(df_annotated[ExAC.af]) + df_annotated = ExAC.append_exac_af( + df=df_dropped, + ds=ds, + ds_columns=[cls.chr, cls.start, cls.ref, cls.alt, cls.af], + variant_biomarker_types=[config['feature_types']['mut'], config['feature_types']['germline']] + ) + common_allele_frequency_threshold = config['exac']['exac_common_af_threshold'] + df_annotated[ExAC.bin_name] = ExAC.annotate_common_af( + series_exac_af=df_annotated[ExAC.af], + threshold=common_allele_frequency_threshold + ) return features.Features.preallocate_missing_columns(df_annotated) @@ -1156,16 +1163,14 @@ class OverlapValidation: validation_coverage = COLNAMES[section]['validation_coverage'] validation_detection_power = COLNAMES[section]['validation_detection_power'] - somatic_variants = CONFIG['feature_types']['mut'] - merge_cols = [gene, alt_type, alt] fill_cols = [tumor_f, validation_tumor_f, validation_coverage] @classmethod - def append_validation(cls, primary, validation): + def append_validation(cls, primary, validation, biomarker_type): df = cls.drop_validation_columns(primary) df = cls.merge_data_frames(df, validation, cls.merge_cols) - idx = cls.get_mutation_index(df) + idx = cls.get_mutation_index(df, biomarker_type) for column in cls.fill_cols: df.loc[idx, column] = Annotator.fill_na( dataframe=df.loc[idx, :], @@ -1198,8 +1203,8 @@ def drop_validation_columns(cls, df): return df.drop([cls.validation_tumor_f, cls.validation_coverage], axis=1) @classmethod - def get_mutation_index(cls, df): - return df[df[cls.feature_type].eq(cls.somatic_variants)].index + def get_mutation_index(cls, df, biomarker_type): + return df[df[cls.feature_type].eq(biomarker_type)].index @classmethod def merge_data_frames(cls, df1, df2, columns): @@ -1310,12 +1315,6 @@ class PreclinicalMatchmaking: feature_display = COLNAMES[section]['feature_display'] predictive_implication = COLNAMES[section]['predictive_implication'] - feature_types_section = 'feature_types' - feature_types_config = CONFIG[feature_types_section] - copy_number = feature_types_config['cna'] - fusion = feature_types_config['fusion'] - somatic_variant = feature_types_config['mut'] - evidence_map = { 'FDA-Approved': 5, 'Guideline': 4, 'Clinical trial': 3, 'Clinical evidence': 2, 'Preclinical': 1, 'Inferential': 0} @@ -1328,14 +1327,18 @@ class PreclinicalMatchmaking: fusions_gene2 = 'fusions_gene2' @classmethod - def annotate(cls, input_dict, dbs): - input_variants = input_dict[cls.somatic_variant] - input_copy_number_alterations = input_dict[cls.copy_number] - input_fusions = input_dict[cls.fusion] + def annotate(cls, input_dict, dbs, config): + copy_number = config['feature_types']['cna'] + fusion = config['feature_types']['fusion'] + somatic_variant = config['feature_types']['mut'] + + input_variants = input_dict[somatic_variant] + input_copy_number_alterations = input_dict[copy_number] + input_fusions = input_dict[fusion] - variants = cls.annotate_somatic_variants(input_variants, dbs) - copy_number_alterations = cls.annotate_copy_numbers(input_copy_number_alterations, dbs) - fusions, fusions_gene1, fusions_gene2 = cls.annotate_fusions(input_fusions, dbs) + variants = cls.annotate_somatic_variants(input_variants, dbs, somatic_variant) + copy_number_alterations = cls.annotate_copy_numbers(input_copy_number_alterations, dbs, copy_number) + fusions, fusions_gene1, fusions_gene2 = cls.annotate_fusions(input_fusions, dbs, fusion) return { cls.variants: variants, cls.cnas: copy_number_alterations, @@ -1345,12 +1348,12 @@ def annotate(cls, input_dict, dbs): } @classmethod - def annotate_copy_numbers(cls, df, dbs): + def annotate_copy_numbers(cls, df, dbs, biomarker_type_string): almanac = datasources.Almanac.import_ds(dbs) almanac_genes = datasources.Almanac.import_genes(dbs) - df = df[df[cls.feature_type].eq(cls.copy_number)] - db = Almanac.subset_records(almanac['content'], cls.feature_type, cls.copy_number) + df = df[df[cls.feature_type].eq(biomarker_type_string)] + db = Almanac.subset_records(almanac['content'], cls.feature_type, biomarker_type_string) db = pd.DataFrame(db) column_map = {cls.gene: cls.feature, cls.direction: cls.alteration_type} @@ -1363,12 +1366,12 @@ def annotate_copy_numbers(cls, df, dbs): return df @classmethod - def annotate_fusions(cls, df, dbs): + def annotate_fusions(cls, df, dbs, biomarker_type_string): almanac = datasources.Almanac.import_ds(dbs) almanac_genes = datasources.Almanac.import_genes(dbs) - df = df[df[cls.feature_type].eq(cls.fusion)] - db = Almanac.subset_records(almanac['content'], cls.feature_type, cls.fusion) + df = df[df[cls.feature_type].eq(biomarker_type_string)] + db = Almanac.subset_records(almanac['content'], cls.feature_type, biomarker_type_string) db = pd.DataFrame(db) column_map = {cls.rearrangement_type: cls.alteration_type} @@ -1503,12 +1506,12 @@ def annotate_fusions_matching(cls, df, db, db_genes, consider_partner=False): return df @classmethod - def annotate_somatic_variants(cls, df, dbs): + def annotate_somatic_variants(cls, df, dbs, biomarker_type_string): almanac = datasources.Almanac.import_ds(dbs) almanac_genes = datasources.Almanac.import_genes(dbs) - df = df[df[cls.feature_type].eq(cls.somatic_variant)] - db = Almanac.subset_records(almanac['content'], cls.feature_type, cls.somatic_variant) + df = df[df[cls.feature_type].eq(biomarker_type_string)] + db = Almanac.subset_records(almanac['content'], cls.feature_type, biomarker_type_string) db = pd.DataFrame(db) replacement_dictionary = {'Oncogenic Mutations': '', 'Activating mutation': ''} diff --git a/moalmanac/config.ini b/moalmanac/config.ini index 399c6aa..a22562b 100644 --- a/moalmanac/config.ini +++ b/moalmanac/config.ini @@ -1,11 +1,11 @@ [function_toggle] ; Use this section to enable or disable functions performed by MOAlmanac by writing either 'on' or 'off' -calculate_model_similarity = on -calculate_preclinical_efficacy = on +calculate_model_similarity = off +calculate_preclinical_efficacy = off generate_actionability_report = on -include_model_similarity_in_actionability_report = on -include_preclinical_efficacy_in_actionability_report = on -plot_preclinical_efficacy = on +include_model_similarity_in_actionability_report = off +include_preclinical_efficacy_in_actionability_report = off +plot_preclinical_efficacy = off [versions] interpreter = 0.6.0 @@ -46,30 +46,4 @@ microsatellite = Microsatellite Stability burden = Mutational Burden signature = Mutational Signature aneuploidy = Aneuploidy -knockdown = Knockdown - -[databases] -almanac_handle = datasources/moalmanac/molecular-oncology-almanac.json -cancerhotspots_handle = datasources/cancerhotspots/hotspots_v2.txt -3dcancerhotspots_handle = datasources/cancerhotspots/hotspots3d.txt -cgc_handle = datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv -cosmic_handle = datasources/cosmic/CosmicMutantExport_v97.lite.txt -gsea_pathways_handle = datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt -gsea_modules_handle = datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt -exac_handle = datasources/exac/exac.expanded.r1.txt -acmg_handle = datasources/acmg/acmg.secondaryfindings.v3.txt -clinvar_handle = datasources/clinvar/variant_summary.lite.txt -hereditary_handle = datasources/hereditary/hereditary.txt -oncotree_handle = datasources/oncotree/oncotree.2023-03-09.txt -lawrence_handle = datasources/lawrence/lawrence_mapped_ontology.txt - -[preclinical] -almanac_gdsc_mappings = datasources/preclinical/formatted/almanac-gdsc-mappings.json -summary = datasources/preclinical/formatted/cell-lines.summary.txt -variants = datasources/preclinical/annotated/cell-lines.somatic-variants.annotated.txt -copynumbers = datasources/preclinical/annotated/cell-lines.copy-numbers.annotated.txt -fusions = datasources/preclinical/annotated/cell-lines.fusions.annotated.txt -fusions1 = datasources/preclinical/annotated/cell-lines.fusions.annotated.gene1.txt -fusions2 = datasources/preclinical/annotated/cell-lines.fusions.annotated.gene2.txt -gdsc = datasources/preclinical/formatted/sanger.gdsc.txt -dictionary = datasources/preclinical/cell-lines.pkl +knockdown = Knockdown \ No newline at end of file diff --git a/moalmanac/config.py b/moalmanac/config.py index 42bada4..3a66f5d 100644 --- a/moalmanac/config.py +++ b/moalmanac/config.py @@ -1,15 +1,8 @@ import configparser -default_config = 'config.ini' default_colnames = 'colnames.ini' -def create_config(): - config = configparser.ConfigParser() - config.read(default_config) - return config - - def create_colnames_dict(config): dictionary = {} for section in config.sections(): @@ -25,5 +18,4 @@ def create_colnames(): return create_colnames_dict(config) -CONFIG = create_config() COLNAMES = create_colnames() diff --git a/moalmanac/datasources.py b/moalmanac/datasources.py index 741871f..60fa523 100644 --- a/moalmanac/datasources.py +++ b/moalmanac/datasources.py @@ -2,7 +2,6 @@ from reader import Reader from config import COLNAMES -from config import CONFIG class Datasources: @@ -73,24 +72,6 @@ class Datasources: an_sas = COLNAMES[datasources_section]['exac_sas_an'] an_oth = COLNAMES[datasources_section]['exac_oth_an'] - @classmethod - def generate_db_dict(cls, config): - return { - 'almanac_handle': config.get('databases', 'almanac_handle'), - 'hotspots_handle': config.get('databases', 'cancerhotspots_handle'), - '3dhotspots_handle': config.get('databases', '3dcancerhotspots_handle'), - 'clinvar_handle': config.get('databases', 'clinvar_handle'), - 'cgc_handle': config.get('databases', 'cgc_handle'), - 'cosmic_handle': config.get('databases', 'cosmic_handle'), - 'gsea_pathways_handle': config.get('databases', 'gsea_pathways_handle'), - 'gsea_modules_handle': config.get('databases', 'gsea_modules_handle'), - 'exac_handle': config.get('databases', 'exac_handle'), - 'acmg_handle': config.get('databases', 'acmg_handle'), - 'hereditary_handle': config.get('databases', 'hereditary_handle'), - 'oncotree_handle': config.get('databases', 'oncotree_handle'), - 'lawrence_handle': config.get('databases', 'lawrence_handle') - } - class ACMG: gene = Datasources.feature @@ -196,7 +177,7 @@ def format_cancerhotspots(cls, df): @classmethod def import_ds(cls, dbs): - df = Reader.safe_read(dbs['hotspots_handle'], '\t', cls.column_map) + df = Reader.safe_read(dbs['cancerhotspots_handle'], '\t', cls.column_map) return cls.format_cancerhotspots(df) @@ -211,7 +192,7 @@ class CancerHotspots3D: @classmethod def import_ds(cls, dbs): - return Reader.safe_read(dbs['3dhotspots_handle'], '\t', cls.column_map) + return Reader.safe_read(dbs['3dcancerhotspots_handle'], '\t', cls.column_map) class ClinVar: @@ -408,16 +389,6 @@ def import_ds(cls, dbs): class Preclinical: section = 'preclinical' - summary_handle = CONFIG[section]['summary'] - variants_handle = CONFIG[section]['variants'] - cnas_handle = CONFIG[section]['copynumbers'] - fusions_handle = CONFIG[section]['fusions'] - fusions_gene1_handle = CONFIG[section]['fusions1'] - fusions_gene2_handle = CONFIG[section]['fusions2'] - gdsc_handle = CONFIG[section]['gdsc'] - mappings_handle = CONFIG[section]['almanac_gdsc_mappings'] - dictionary_handle = CONFIG[section]['dictionary'] - feature = COLNAMES[section]['feature'] partner = COLNAMES[section]['partner'] gene = COLNAMES[section]['gene'] @@ -437,12 +408,6 @@ class Preclinical: mappings = 'mappings' dictionary = 'dictionary' - feature_type = Datasources.feature_type - feature_types_section = 'feature_types' - variant_type = CONFIG[feature_types_section]['mut'] - copy_number_type = CONFIG[feature_types_section]['cna'] - fusion_type = CONFIG[feature_types_section]['fusion'] - @classmethod def create_convert_names_dict(cls, dataframe, map_from, map_to): return dataframe.loc[:, [map_from, map_to]].dropna().set_index(map_from)[map_to].to_dict() @@ -452,16 +417,16 @@ def generate_sample_list(dataframe, use_column, sample_column): return dataframe[dataframe[use_column].astype(bool).astype(int).eq(1)][sample_column].sort_values().tolist() @classmethod - def import_dbs(cls): - summary = Reader.read(cls.summary_handle, delimiter='\t') - variants = Reader.read(cls.variants_handle, delimiter='\t', low_memory=False) - cnas = Reader.read(cls.cnas_handle, delimiter='\t', low_memory=False) - fusions = Reader.read(cls.fusions_handle, delimiter='\t', low_memory=False) - fusions1 = Reader.read(cls.fusions_gene1_handle, delimiter='\t', low_memory=False) - fusions2 = Reader.read(cls.fusions_gene2_handle, delimiter='\t', low_memory=False) - gdsc = Reader.read(cls.gdsc_handle, delimiter='\t', low_memory=False) - mappings = Reader.read_json(cls.mappings_handle) - dictionary = Reader.read_pickle(cls.dictionary_handle) + def import_dbs(cls, paths_dictionary): + summary = Reader.read(paths_dictionary['summary'], delimiter='\t') + variants = Reader.read(paths_dictionary['variants'], delimiter='\t', low_memory=False) + cnas = Reader.read(paths_dictionary['copynumbers'], delimiter='\t', low_memory=False) + fusions = Reader.read(paths_dictionary['fusions'], delimiter='\t', low_memory=False) + fusions1 = Reader.read(paths_dictionary['fusions1'], delimiter='\t', low_memory=False) + fusions2 = Reader.read(paths_dictionary['fusions2'], delimiter='\t', low_memory=False) + gdsc = Reader.read(paths_dictionary['gdsc'], delimiter='\t', low_memory=False) + mappings = Reader.read_json(paths_dictionary['almanac_gdsc_mappings']) + dictionary = Reader.read_pickle(paths_dictionary['dictionary']) ccle_map = cls.create_convert_names_dict(summary, cls.ccle_name, cls.broad) sanger_map = cls.create_convert_names_dict(summary, cls.sanger, cls.broad) diff --git a/moalmanac/evaluator.py b/moalmanac/evaluator.py index d541700..57d94d9 100644 --- a/moalmanac/evaluator.py +++ b/moalmanac/evaluator.py @@ -4,11 +4,10 @@ import datasources import features -from config import CONFIG from config import COLNAMES -class Evaluator(object): +class Evaluator: """ Evaluate based on annotated bins """ @@ -81,20 +80,6 @@ class Evaluator(object): microsatellite_section = 'microsatellite' supporting_variants = COLNAMES[microsatellite_section]['supporting_variants'] - feature_type_section = 'feature_types' - mut_type = CONFIG[feature_type_section]['mut'] - copynumber_type = CONFIG[feature_type_section]['cna'] - germline_type = CONFIG[feature_type_section]['germline'] - fusion_type = CONFIG[feature_type_section]['fusion'] - burden_type = CONFIG[feature_type_section]['burden'] - microsatellite_type = CONFIG[feature_type_section]['microsatellite'] - signature_type = CONFIG[feature_type_section]['signature'] - aneuploidy_type = CONFIG[feature_type_section]['aneuploidy'] - - mutations_section = 'mutations' - min_coverage = CONFIG[mutations_section]['min_coverage'] - min_af = CONFIG[mutations_section]['min_af'] - @classmethod def assign_bin(cls, df, bin_column, bin_label): series_score_bin = df.loc[:, cls.score_bin] @@ -145,17 +130,18 @@ def remap_almanac_bins(series, old_value, new_value): return series.astype(int).replace(to_replace=old_value, value=new_value) @classmethod - def remove_low_allele_fraction_variants(cls, df): - idx_mut = df[df[cls.feature_type].isin([cls.mut_type, cls.germline_type])].index - idx_low_quality = df[df[cls.tumor_f].astype(float).lt(float(cls.min_af))].index + def remove_low_allele_fraction_variants(cls, df, minimum_allele_fraction=0.05): + # hard coding somatic and germline variant strings until future refactor + idx_mut = df[df[cls.feature_type].isin(['Somatic Variant', 'Germline Variant'])].index + idx_low_quality = df[df[cls.tumor_f].astype(float).lt(float(minimum_allele_fraction))].index idx_low_quality_muts = idx_mut.intersection(idx_low_quality) idx = df.index.difference(idx_low_quality_muts) return df.loc[idx, :] @classmethod - def remove_low_coverage_variants(cls, df): - idx_mut = df[df[cls.feature_type].isin([cls.mut_type, cls.germline_type])].index - idx_low_quality = df[df[cls.coverage].astype(float).le(float(cls.min_coverage))].index + def remove_low_coverage_variants(cls, df, minimum_coverage=15): + idx_mut = df[df[cls.feature_type].isin(['Somatic Variant', 'Germline Variant'])].index + idx_low_quality = df[df[cls.coverage].astype(float).le(float(minimum_coverage))].index idx_low_quality_muts = idx_mut.intersection(idx_low_quality) idx = df.index.difference(idx_low_quality_muts) return df.loc[idx, :] @@ -193,46 +179,48 @@ def create_string_list(series): return ', '.join(map(str, series.unique())) @classmethod - def display_aneuploidy(cls, df, idx, feature): - return df.loc[idx, feature] + def display_aneuploidy(cls, df, idx): + return df.loc[idx, Evaluator.feature] @classmethod - def display_burden(cls, df, idx, alt): - return df.loc[idx, alt].astype(str) + def display_burden(cls, df, idx): + return df.loc[idx, Evaluator.alt].astype(str) @classmethod - def display_copynumber(cls, df, idx, feature, alt_type): - gene = df.loc[idx, feature] - direction = df.loc[idx, alt_type] + def display_copynumber(cls, df, idx): + gene = df.loc[idx, Evaluator.feature] + direction = df.loc[idx, Evaluator.alt_type] # Copy Number: CDKN2A Deletion return gene + ' ' + direction @classmethod - def display_fusion(cls, df, idx, alt): - fusion = df.loc[idx, alt] + def display_fusion(cls, df, idx): + fusion = df.loc[idx, Evaluator.alt] # Rearrangement: BCR--ABL1 Fusion return fusion + ' Fusion' @classmethod - def display_microsatellite_stability(cls, df, idx, feature): - return df.loc[idx, feature] + def display_microsatellite_stability(cls, df, idx): + return df.loc[idx, Evaluator.feature] @classmethod - def display_microsatellite_variants(cls, df, idx, feature, alt): - return df.loc[idx, feature] + ': ' + df.loc[idx, alt] + def display_microsatellite_variants(cls, df, idx): + return df.loc[idx, Evaluator.feature] + ': ' + df.loc[idx, Evaluator.alt] @classmethod - def display_signature(cls, df, idx, feature, alt): - signature = df.loc[idx, feature].str.replace('COSMIC Signature', 'COSMIC Signature (version 2)') - contribution = df.loc[idx, alt].astype(float).multiply(100).round(0).astype(int).astype(str) + def display_signature(cls, df, idx): + #before_string = "COSMIC Signature" + #after_string = f"COSMIC Signature (version {version})" + signature = df.loc[idx, Evaluator.feature]#.str.replace(before_string, after_string) + contribution = df.loc[idx, Evaluator.alt].astype(float).multiply(100).round(0).astype(int).astype(str) # Signature: Cosmic Signature 7 (65%) return signature + ' (' + contribution + '%)' @classmethod - def display_variant(cls, df, idx, feature, alt_type, alt): - gene = df.loc[idx, feature] - protein_change = df.loc[idx, alt] - variant_class = df.loc[idx, alt_type] + def display_variant(cls, df, idx): + gene = df.loc[idx, Evaluator.feature] + protein_change = df.loc[idx, Evaluator.alt] + variant_class = df.loc[idx, Evaluator.alt_type] # exon, pathogenic, cDNA, linebreaks # Gene p.Foo (c.DNA) # Exon 12 Missense @@ -240,7 +228,7 @@ def display_variant(cls, df, idx, feature, alt_type, alt): return gene + ' ' + protein_change + ' (' + variant_class + ')' @classmethod - def evaluate(cls, somatic, germline, ms_variants, ms_status, burden, signatures, wgd): + def evaluate(cls, somatic, germline, ms_variants, ms_status, burden, signatures, wgd, config): somatic = cls.format_mutations(somatic) germline = cls.format_mutations(germline) @@ -257,48 +245,43 @@ def evaluate(cls, somatic, germline, ms_variants, ms_status, burden, signatures, actionable_list.append(Evaluator.subset_almanac_bin(dataframe)) df = features.Features.concat_list_of_dataframes(list_of_dataframes=actionable_list) - df[Evaluator.feature_display] = cls.format_feature_display( - df, Evaluator.feature_display, - Evaluator.feature_type, Evaluator.feature, - Evaluator.alt_type, Evaluator.alt) + df[Evaluator.feature_display] = cls.format_feature_display(df=df, config=config) + # df, Evaluator.feature_display, + # Evaluator.feature_type, Evaluator.feature, + # Evaluator.alt_type, Evaluator.alt) return df.sort_values(cls.sort_columns, ascending=False) @classmethod - def format_feature_display(cls, df, feature_display_column, - feature_type_column, feature_column, - alt_type_column, alt_column): - idx_somatic = df[feature_type_column].isin([Evaluator.mut_type]) - idx_germline = df[feature_type_column].isin([Evaluator.germline_type]) - idx_cn = df[feature_type_column].isin([Evaluator.copynumber_type]) - idx_fusion = df[feature_type_column].isin([Evaluator.fusion_type]) - idx_msi = df[feature_type_column].isin([Evaluator.microsatellite_type]) + def format_feature_display(cls, df, config): + display_column = Evaluator.feature_display + feature_type_column = Evaluator.feature_type + feature_column = Evaluator.feature + # alt_type_column = Evaluator.alt_type + # alt_column = Evaluator.alt + #sig_version = config['signatures']['version'] + + biomarker_types = config['feature_types'] + idx_somatic = df[feature_type_column].isin([biomarker_types['mut']]) + idx_germline = df[feature_type_column].isin([biomarker_types['germline']]) + idx_cn = df[feature_type_column].isin([biomarker_types['cna']]) + idx_fusion = df[feature_type_column].isin([biomarker_types['fusion']]) + idx_msi = df[feature_type_column].isin([biomarker_types['microsatellite']]) idx_msi_variants = df[feature_column].isin([Evaluator.supporting_variants]) idx_msi = idx_msi & ~idx_msi_variants - idx_burden = df[feature_type_column].isin([Evaluator.burden_type]) - idx_signature = df[feature_type_column].isin([Evaluator.signature_type]) - idx_wgd = df[feature_column].isin([Evaluator.aneuploidy_type]) - - df.loc[idx_wgd, feature_display_column] = cls.display_aneuploidy( - df, idx_wgd, feature_column) - df.loc[idx_somatic, feature_display_column] = cls.display_variant( - df, idx_somatic, feature_column, alt_type_column, alt_column) - df.loc[idx_germline, feature_display_column] = cls.display_variant( - df, idx_germline, feature_column, alt_type_column, alt_column) - df.loc[idx_cn, feature_display_column] = cls.display_copynumber( - df, idx_cn, feature_column, alt_type_column) - df.loc[idx_fusion, feature_display_column] = cls.display_fusion( - df, idx_fusion, alt_column) - df.loc[idx_msi, feature_display_column] = cls.display_microsatellite_stability( - df, idx_msi, feature_column) - df.loc[idx_msi_variants, feature_display_column] = cls.display_microsatellite_variants( - df, idx_msi_variants, feature_column, alt_column) - df.loc[idx_burden, feature_display_column] = cls.display_burden( - df, idx_burden, alt_column) - df.loc[idx_signature, feature_display_column] = cls.display_signature( - df, idx_signature, feature_column, alt_column) - df.loc[idx_wgd, feature_display_column] = cls.display_aneuploidy( - df, idx_wgd, feature_column) - return df.loc[:, feature_display_column] + idx_burden = df[feature_type_column].isin([biomarker_types['burden']]) + idx_signature = df[feature_type_column].isin([biomarker_types['signature']]) + idx_wgd = df[feature_column].isin([biomarker_types['aneuploidy']]) + + df.loc[idx_somatic, display_column] = cls.display_variant(df=df, idx=idx_somatic) + df.loc[idx_germline, display_column] = cls.display_variant(df=df, idx=idx_germline) + df.loc[idx_cn, display_column] = cls.display_copynumber(df=df, idx=idx_cn) + df.loc[idx_fusion, display_column] = cls.display_fusion(df=df, idx=idx_fusion) + df.loc[idx_msi, display_column] = cls.display_microsatellite_stability(df=df, idx=idx_msi) + df.loc[idx_msi_variants, display_column] = cls.display_microsatellite_variants(df=df, idx=idx_msi_variants) + df.loc[idx_burden, display_column] = cls.display_burden(df=df, idx=idx_burden) + df.loc[idx_signature, display_column] = cls.display_signature(df=df, idx=idx_signature) + df.loc[idx_wgd, display_column] = cls.display_aneuploidy(df=df, idx=idx_wgd) + return df.loc[:, display_column] @classmethod def format_mutations(cls, df): @@ -336,7 +319,7 @@ def summarize_ms_variants(cls, df): return msi_summary -class Integrative(object): +class Integrative: feature = datasources.Datasources.feature feature_type = datasources.Datasources.feature_type alt_type = datasources.Datasources.alt_type @@ -357,13 +340,13 @@ def create_integrated_df(cls, genes): return pd.DataFrame(None, columns=cls.columns, index=genes) @classmethod - def evaluate(cls, somatic, germline, dbs, feature_types): + def evaluate(cls, somatic, germline, dbs, config): genes = cls.return_datasource_genes(dbs) df = cls.create_integrated_df(genes) - somatic_mutations = cls.extract_feature_type(somatic, feature_types['mutation']) - somatic_copynumbers = cls.extract_feature_type(somatic, feature_types['copynumber']) - somatic_fusions = cls.extract_feature_type(somatic, feature_types['fusion']) + somatic_mutations = cls.extract_feature_type(somatic, config['feature_types']['mut']) + somatic_copynumbers = cls.extract_feature_type(somatic, config['feature_types']['cna']) + somatic_fusions = cls.extract_feature_type(somatic, config['feature_types']['fusion']) for gene in df.index: gene_muts = somatic_mutations[somatic_mutations[cls.feature].astype(str) == gene] diff --git a/moalmanac/features.py b/moalmanac/features.py index bc3ec3d..164068a 100644 --- a/moalmanac/features.py +++ b/moalmanac/features.py @@ -9,9 +9,6 @@ from reader import Reader from config import COLNAMES -from config import CONFIG - -SIGNATURES_CONFIG = CONFIG['signatures'] class Features: @@ -120,28 +117,21 @@ def preallocate_missing_columns(cls, df): class Aneuploidy: aneuploidy = 'aneuploidy' - - feature_type_section = 'feature_types' - feature_type = CONFIG[feature_type_section][aneuploidy] - aneuploidy_section = aneuploidy wgd = COLNAMES[aneuploidy]['wgd'] wgd_string = COLNAMES[aneuploidy]['wgd_string'] @classmethod - def summarize(cls, boolean): + def summarize(cls, boolean, config): df = Features.create_empty_dataframe() + feature_type = config['feature_types']['aneuploidy'] if boolean: - df.loc[0, Features.feature_type] = cls.feature_type + df.loc[0, Features.feature_type] = feature_type df.loc[0, Features.feature] = cls.wgd_string return df class BurdenReader: - feature_type_section = 'feature_types' - feature_type = CONFIG[feature_type_section]['burden'] - feature_type_mutations = CONFIG[feature_type_section]['mut'] - burden_section = 'burden' patient_id = COLNAMES[burden_section]['patient'] tumor_type = COLNAMES[burden_section]['tumor_type'] @@ -212,15 +202,16 @@ def evaluate_high_burden_boolean(cls, boolean): return Features.not_high_burden @classmethod - def import_feature(cls, handle, patient, variants, dbs): + def import_feature(cls, handle, patient, variants, dbs, config): if os.path.exists(handle): bases_covered = float(Reader.read(handle, '\t', index_col=False).columns.tolist()[0]) else: bases_covered = np.nan df = cls.create_burden_series(patient, bases_covered) - df[Features.feature_type] = cls.feature_type - mutations = variants[variants[Features.feature_type] == cls.feature_type_mutations].shape[0] + biomarker_type = config['feature_types']['burden'] + df[Features.feature_type] = biomarker_type + mutations = variants[variants[Features.feature_type] == config['feature_types']['mut']].shape[0] mutational_burden = cls.calculate_burden(mutations, bases_covered) df[cls.n_nonsyn_mutations] = mutations @@ -239,10 +230,10 @@ def import_feature(cls, handle, patient, variants, dbs): class CopyNumber: - config = CONFIG['seg'] - amplification = config['amp'] - deletion = config['del'] - feature_type = CONFIG['feature_types']['cna'] + #config = CONFIG['seg'] + #amplification = config['amp'] + #deletion = config['del'] + #feature_type = CONFIG['feature_types']['cna'] @staticmethod def format_cn_gene(series): @@ -250,7 +241,7 @@ def format_cn_gene(series): return new_series @classmethod - def import_feature(cls, called_handle, not_called_handle): + def import_feature(cls, called_handle, not_called_handle, config): if called_handle: column_map = CopyNumberCalled.create_column_map() handle = called_handle @@ -259,13 +250,17 @@ def import_feature(cls, called_handle, not_called_handle): handle = not_called_handle df = Features.import_if_path_exists(handle, '\t', column_map, comment_character="#") + + amplification_string = config['seg']['amp'] + deletion_string = config['seg']['del'] if not df.empty: - df[Features.feature_type] = Features.annotate_feature_type(cls.feature_type, df.index) + biomarker_type = config['feature_types']['cna'] + df[Features.feature_type] = Features.annotate_feature_type(biomarker_type, df.index) df[Features.feature] = cls.format_cn_gene(df[Features.feature]) if called_handle: - seg_accept, seg_reject = CopyNumberCalled.process_calls(df) + seg_accept, seg_reject = CopyNumberCalled.process_calls(df, amplification_string, deletion_string) else: - seg_accept, seg_reject = CopyNumberTotal.process_calls(df) + seg_accept, seg_reject = CopyNumberTotal.process_calls(df, config) else: seg_accept = Features.create_empty_dataframe() seg_reject = Features.create_empty_dataframe() @@ -283,21 +278,21 @@ def create_column_map(): } @classmethod - def filter_calls(cls, series): - return series.fillna('').isin([cls.amplification, cls.deletion]) + def filter_calls(cls, series, amp_string, del_string): + return series.fillna('').isin([amp_string, del_string]) @classmethod - def process_calls(cls, dataframe): - idx = cls.filter_calls(dataframe[Features.alt_type]) + def process_calls(cls, dataframe, amp_string, del_string): + idx = cls.filter_calls(dataframe[Features.alt_type], amp_string, del_string) return dataframe[idx], dataframe[~idx] class CopyNumberTotal(CopyNumber): @classmethod - def annotate_amp_del(cls, idx, idx_amp, idx_del): + def annotate_amp_del(cls, idx, idx_amp, idx_del, amp_string, del_string): series = pd.Series('', index=idx) - series[idx_amp] = cls.amplification - series[idx_del] = cls.deletion + series[idx_amp] = amp_string + series[idx_del] = del_string return series @staticmethod @@ -323,7 +318,7 @@ def drop_duplicate_genes(cls, df): ) @classmethod - def filter_by_threshold(cls, df, percentile_amp, percentile_del): + def filter_by_threshold(cls, df, percentile_amp, percentile_del, amp_string, del_string): unique_segments = cls.get_unique_segments(df) threshold_amp = Features.calculate_percentile(unique_segments, percentile_amp) threshold_del = Features.calculate_percentile(unique_segments, percentile_del) @@ -331,7 +326,7 @@ def filter_by_threshold(cls, df, percentile_amp, percentile_del): idx_amp = df[df[Features.segment_mean].astype(float) >= float(threshold_amp)].index idx_del = df[df[Features.segment_mean].astype(float) <= float(threshold_del)].index - df[Features.alt_type] = cls.annotate_amp_del(df.index, idx_amp, idx_del) + df[Features.alt_type] = cls.annotate_amp_del(df.index, idx_amp, idx_del, amp_string, del_string) idx_accept = df[df[Features.alt_type] != ''].index idx_unique = Features.drop_duplicate_genes(df.loc[idx_accept, :], Features.segment_mean) @@ -346,10 +341,12 @@ def get_unique_segments(df): return df.drop_duplicates([Features.chr, Features.start])[Features.segment_mean] @classmethod - def process_calls(cls, dataframe): - amp_percentile = cls.config['amp_percentile'] - del_percentile = cls.config['del_percentile'] - return cls.filter_by_threshold(dataframe, amp_percentile, del_percentile) + def process_calls(cls, dataframe, config): + amp_percentile = config['seg']['amp_percentile'] + del_percentile = config['seg']['del_percentile'] + amp_string = config['seg']['amp'] + del_string = config['seg']['del'] + return cls.filter_by_threshold(dataframe, amp_percentile, del_percentile, amp_string, del_string) class CoverageMetrics: @@ -391,14 +388,9 @@ def split_counts(series): class CosmicSignatures: - feature_type_section = 'feature_types' - feature_type = CONFIG[feature_type_section]['signature'] - signature_section = 'signatures' patient_id = COLNAMES[signature_section]['patient'] - min_contribution = SIGNATURES_CONFIG['min_contribution'] - input_section = 'mutational_signature_input' input_signature = COLNAMES[input_section]['signature'] input_contribution = COLNAMES[input_section]['contribution'] @@ -413,21 +405,23 @@ def create_column_map(cls): } @classmethod - def import_feature(cls, path): + def import_feature(cls, path, config): """Loads and formats Cosmic Mutational Signatures based on provided file path.""" column_map = cls.create_column_map() df = Features.import_if_path_exists(path, delimiter='\t', column_map=column_map) if not df.empty: - df[Features.feature_type] = cls.feature_type + biomarker_type = config['feature_types']['signature'] + minimum_contribution = config['signatures']['min_contribution'] + df[Features.feature_type] = biomarker_type df[Features.alt_type] = 'v3.4' df[Features.alt] = cls.round_contributions(df[Features.alt]) - idx = cls.index_for_minimum_contribution(df[Features.alt]) + idx = cls.index_for_minimum_contribution(series=df[Features.alt], minimum_value=minimum_contribution) return df[idx] else: return Features.create_empty_dataframe() @classmethod - def index_for_minimum_contribution(cls, series, minimum_value=min_contribution): + def index_for_minimum_contribution(cls, series, minimum_value=0.06): """Subsets the provided SBS signatures to those that pass the minimum contribution, specified in config.ini""" return series.astype(float) >= float(minimum_value) @@ -438,41 +432,38 @@ def round_contributions(series, decimals=3): class Fusion: - config = CONFIG['fusion'] - alt_type = config['alt_type'] - leftbreakpoint = config['leftbreakpoint'] - rightbreakpoint = config['rightbreakpoint'] - spanningfrags_min = config['spanningfrags_min'] - - feature_type = CONFIG['feature_types']['fusion'] - @classmethod - def create_colmap(cls): + def create_colmap(cls, config): section = 'fusion_input' column_names = COLNAMES[section] + + leftbreakpoint = config['fusion']['leftbreakpoint'] + rightbreakpoint = config['fusion']['rightbreakpoint'] return { column_names['name']: Features.feature, column_names['spanningfrags']: Features.spanningfrags, - column_names[cls.leftbreakpoint]: cls.leftbreakpoint, - column_names[cls.rightbreakpoint]: cls.rightbreakpoint + column_names[leftbreakpoint]: leftbreakpoint, + column_names[rightbreakpoint]: rightbreakpoint } @staticmethod - def filter_by_spanning_fragment_count(series, minimum=spanningfrags_min): + def filter_by_spanning_fragment_count(series, minimum=5.0): minimum = int(float(minimum)) return series[series.astype(int).ge(minimum)].index @classmethod - def import_feature(cls, handle): - column_map = cls.create_colmap() + def import_feature(cls, handle, config): + column_map = cls.create_colmap(config) df = Features.import_if_path_exists(handle, '\t', column_map, index_col=False) if not df.empty: split_genes = cls.split_genes(df[Features.feature]) df[Features.left_gene] = split_genes[Features.left_gene] df[Features.right_gene] = split_genes[Features.right_gene] - left = cls.split_breakpoint(df[cls.leftbreakpoint]) - right = cls.split_breakpoint(df[cls.rightbreakpoint]) + leftbreakpoint = config['fusion']['leftbreakpoint'] + rightbreakpoint = config['fusion']['rightbreakpoint'] + left = cls.split_breakpoint(df[leftbreakpoint]) + right = cls.split_breakpoint(df[rightbreakpoint]) df[Features.chr] = left[Features.chr] df[Features.start] = left[Features.start] @@ -480,13 +471,18 @@ def import_feature(cls, handle): df[Features.left_start] = left[Features.start] df[Features.right_chr] = right[Features.chr] df[Features.right_start] = right[Features.start] - df.drop([cls.leftbreakpoint, cls.rightbreakpoint], axis=1, inplace=True) + df.drop([leftbreakpoint, rightbreakpoint], axis=1, inplace=True) - df[Features.feature_type] = Features.annotate_feature_type(cls.feature_type, df.index) - df[Features.alt_type] = cls.alt_type + biomarker_type = config['feature_types']['fusion'] + df[Features.feature_type] = Features.annotate_feature_type(biomarker_type, df.index) + df[Features.alt_type] = config['fusion']['alt_Type'] df[Features.alt] = df[Features.feature] - idx_min_spanning_fragments = cls.filter_by_spanning_fragment_count(df[Features.spanningfrags]) + min_fragments = config['fusion']['spanningfrags_min'] + idx_min_spanning_fragments = cls.filter_by_spanning_fragment_count( + series=df[Features.spanningfrags], + minimum=min_fragments + ) idx_unique = Features.drop_duplicate_genes(df.loc[idx_min_spanning_fragments, :], Features.feature) fusions_unique = df.loc[idx_unique, :] @@ -521,11 +517,7 @@ def split_breakpoint(series_breakpoint): class MicrosatelliteReader: - microsatellite = 'microsatellite' - feature_type_section = 'feature_types' - feature_type = CONFIG[feature_type_section][microsatellite] - - microsatellite_section = microsatellite + microsatellite_section = 'microsatellite' msih = COLNAMES[microsatellite_section]['msih'] msil = COLNAMES[microsatellite_section]['msil'] mss = COLNAMES[microsatellite_section]['mss'] @@ -543,9 +535,10 @@ def map_status(cls, status): return cls.status_map[status] @classmethod - def summarize(cls, status): + def summarize(cls, status, config): df = Features.create_empty_dataframe() - df.loc[0, Features.feature_type] = cls.feature_type + biomarker_type = config['feature_types']['microsatellite'] + df.loc[0, Features.feature_type] = biomarker_type df.loc[0, Features.feature] = cls.map_status(status) return df @@ -636,13 +629,12 @@ def return_variants_non_coding(cls, df): class MAFGermline(MAF): - feature_type = CONFIG['feature_types']['germline'] - @classmethod - def import_feature(cls, handle): + def import_feature(cls, handle, config): df = cls.import_maf(handle) + biomarker_type = config['feature_types']['germline'] if not df.empty: - df = cls.format_maf(df, cls.feature_type) + df = cls.format_maf(df, biomarker_type) coding_variants = cls.return_variants_coding(df) else: coding_variants = cls.create_empty_dataframe() @@ -651,13 +643,12 @@ def import_feature(cls, handle): class MAFSomatic(MAF): - feature_type = CONFIG['feature_types']['mut'] - @classmethod - def import_feature(cls, handle): + def import_feature(cls, handle, config): df = cls.import_maf(handle) + biomarker_type = config['feature_types']['mut'] if not df.empty: - df = cls.format_maf(df, cls.feature_type) + df = cls.format_maf(df, biomarker_type) coding_variants = cls.return_variants_coding(df) non_coding_variants = cls.return_variants_non_coding(df) else: @@ -682,8 +673,8 @@ class MAFValidation(MAF): columns = [gene, alt, alt_type, validation_tumor_f, validation_coverage] @classmethod - def import_feature(cls, handle): - df, df_reject = MAFSomatic.import_feature(handle) + def import_feature(cls, handle, config): + df, df_reject = MAFSomatic.import_feature(handle, config) df = df.drop(df.columns[df.columns.str.contains('validation')], axis=1) df = df.rename(columns=cls.column_map).loc[:, cls.columns] return df, df_reject diff --git a/moalmanac/illustrator.py b/moalmanac/illustrator.py index 36ae986..3f6e120 100644 --- a/moalmanac/illustrator.py +++ b/moalmanac/illustrator.py @@ -1,5 +1,4 @@ from config import COLNAMES -from config import CONFIG import io import base64 @@ -137,13 +136,6 @@ def create_title(string): class ValidationOverlap(Illustrator): - config_section = 'validation_sequencing' - min_af = float(CONFIG[config_section]['min_af_for_annotation']) - min_power = float(CONFIG[config_section]['min_power']) - - feature_type_section = 'feature_types' - feature_type_mutation = CONFIG[feature_type_section]['mut'] - section = 'validation_sequencing' gene = COLNAMES[section]['gene'] feature_type = COLNAMES[section]['feature_type'] @@ -164,8 +156,8 @@ def create_gene_alt_string(cls, data): return data[cls.gene].astype(str) + ' ' + data[cls.alt].astype(str) @classmethod - def format_data(cls, df): - idx = df[df[cls.feature_type] == cls.feature_type_mutation].index + def format_data(cls, df, biomarker_type_string): + idx = df[df[cls.feature_type] == biomarker_type_string].index data = df.loc[idx, cls.columns].fillna(0.0) data[cls.coverage] = pd.to_numeric(data[cls.coverage]) data[cls.tumor_f] = pd.to_numeric(data[cls.tumor_f]) @@ -175,7 +167,7 @@ def format_data(cls, df): return data @classmethod - def plot_overlap_af(cls, data, title=''): + def plot_overlap_af(cls, data, title='', minimum_power=0.95, minimum_allele_fraction=0.05): fig = plt.figure(figsize=(7, 7)) ax = plt.subplot() @@ -190,22 +182,22 @@ def plot_overlap_af(cls, data, title=''): plt.yticks(fontsize=14) plt.xticks(fontsize=14) - powered = data[data[cls.validation_detection_power].astype(float) >= cls.min_power] - lowpower = data[data[cls.validation_detection_power].astype(float) < cls.min_power] + powered = data[data[cls.validation_detection_power].astype(float) >= minimum_power] + lowpower = data[data[cls.validation_detection_power].astype(float) < minimum_power] labels = cls.create_gene_alt_string(data) for idx in data.index: primary_tumor_f = float(data.loc[idx, cls.tumor_f]) validation_tumor_f = float(data.loc[idx, cls.validation_tumor_f]) - if (primary_tumor_f >= cls.min_af) & (validation_tumor_f >= cls.min_af): + if (primary_tumor_f >= minimum_allele_fraction) & (validation_tumor_f >= minimum_allele_fraction): ax.annotate(labels[idx], (primary_tumor_f, validation_tumor_f)) plt.scatter(powered[cls.tumor_f].tolist(), powered[cls.validation_tumor_f].tolist(), color=Illustrator.tableau10['blue'], - label=''.join(['Detection power in validation sequencing >= {}'.format(cls.min_power)])) + label=''.join(['Detection power in validation sequencing >= {}'.format(minimum_power)])) plt.scatter(lowpower[cls.tumor_f].tolist(), lowpower[cls.validation_tumor_f].tolist(), color=Illustrator.tableau10['grey'], - label=''.join(['Detection power in validation sequencing < {}'.format(cls.min_power)])) + label=''.join(['Detection power in validation sequencing < {}'.format(minimum_power)])) plt.xlim(-0.01, 1.01) plt.ylim(-0.01, 1.01) @@ -222,7 +214,16 @@ def plot_overlap_af(cls, data, title=''): return fig @classmethod - def generate_dna_rna_plot(cls, df, patient_id, folder): - data = cls.format_data(df) - figure = cls.plot_overlap_af(data, title=patient_id) + def generate_dna_rna_plot(cls, df, patient_id, folder, config): + biomarker_type = config['feature_types']['mut'] + minimum_power = float(config['validation_sequencing']['min_power']) + minimum_allele_fraction = float(config['validation_sequencing']['min_af_for_annotation']) + + data = cls.format_data(df=df, biomarker_type_string=biomarker_type) + figure = cls.plot_overlap_af( + data=data, + title=patient_id, + minimum_power=minimum_power, + minimum_allele_fraction=minimum_allele_fraction + ) Illustrator.save_fig(figure, folder, patient_id, 'validation_overlap.png') diff --git a/moalmanac/investigator.py b/moalmanac/investigator.py index cfb4847..0c2d6fc 100644 --- a/moalmanac/investigator.py +++ b/moalmanac/investigator.py @@ -6,7 +6,6 @@ from datasources import Preclinical from illustrator import PreclinicalEfficacy -from config import CONFIG from config import COLNAMES @@ -58,26 +57,6 @@ class Investigator(object): ic50 = COLNAMES[preclinical_section]['ic50'] tested_subfeature = COLNAMES[preclinical_section]['tested_subfeature'] - feature_type_section = 'feature_types' - feature_type_mut = CONFIG[feature_type_section]['mut'] - feature_type_germline = CONFIG[feature_type_section]['germline'] - feature_type_cna = CONFIG[feature_type_section]['cna'] - feature_type_fusion = CONFIG[feature_type_section]['fusion'] - feature_type_burden = CONFIG[feature_type_section]['burden'] - feature_type_signature = CONFIG[feature_type_section]['signature'] - feature_types = { - 'variant': feature_type_mut, - 'germline': feature_type_germline, - 'copy_number': feature_type_cna, - 'fusion': feature_type_fusion, - 'burden': feature_type_burden, - 'signature': feature_type_signature - } - - input_dtypes = [feature_types['variant'], - feature_types['copy_number'], - feature_types['fusion']] - @staticmethod def list_feature_combinations(split_feature, feature_length): return ['.'.join(split_feature[:i]) for i in range(1, feature_length + 1)] @@ -122,7 +101,7 @@ def calculate_mann_whitney_u(series1, series2): return stats.mannwhitneyu(series1, series2, alternative='two-sided') @classmethod - def create(cls, dbs, df_actionable): + def create(cls, dbs, df_actionable, config): summary = dbs[cls.summary] variants = dbs[cls.variants] cnas = dbs[cls.cnas] @@ -131,8 +110,14 @@ def create(cls, dbs, df_actionable): genes = dbs[cls.gene] mappings = dbs[cls.mappings] + input_dtypes = [ + config['feature_types']['mut'], + config['feature_types']['cna'], + config['feature_types']['fusion'] + ] + samples = Preclinical.generate_sample_list(summary, cls.use_column, cls.model_id) - idx_feature_type = df_actionable[cls.feature_type].isin(cls.input_dtypes) + idx_feature_type = df_actionable[cls.feature_type].isin(input_dtypes) idx_sensitive = ~(df_actionable[cls.sensitive_therapy].isnull() | df_actionable[cls.sensitive_therapy].eq('')) dictionary = {} @@ -228,12 +213,12 @@ def populate_feature_dictionary(cls, groups, all_samples): return dictionary @classmethod - def select_split_function(cls, feature_type): - if feature_type == cls.feature_types['variant']: + def select_split_function(cls, feature_type, variant_string, copy_number_string, fusion_string): + if feature_type == variant_string: return cls.split_samples_for_variants - elif feature_type == cls.feature_types['copy_number']: + elif feature_type == copy_number_string: return cls.split_samples_for_copy_numbers - elif feature_type == cls.feature_types['fusion']: + elif feature_type == fusion_string: return cls.split_samples_for_fusions else: return cls.split_exit @@ -245,10 +230,19 @@ def split_exit(cls, dbs, series, samples): exit() @classmethod - def split_samples_by_wt_mut(cls, series, dbs, samples): + def split_samples_by_wt_mut(cls, series, dbs, samples, config): feature_type = series.loc[cls.feature_type] - split_function = cls.select_split_function(feature_type) - return split_function(dbs, series, samples) + split_function = cls.select_split_function( + feature_type=feature_type, + variant_string=config['feature_types']['mut'], + copy_number_string=config['feature_types']['cna'], + fusion_string=config['feature_types']['fusion'] + ) + return split_function( + dbs=dbs, + series=series, + all_samples=samples + ) @classmethod def split_samples_for_copy_numbers(cls, dbs, series, all_samples): diff --git a/moalmanac/matchmaker.py b/moalmanac/matchmaker.py index 542e9ed..2320d61 100644 --- a/moalmanac/matchmaker.py +++ b/moalmanac/matchmaker.py @@ -10,7 +10,6 @@ from datasources import Preclinical as DatasourcePreclinical from config import COLNAMES -from config import CONFIG class Matchmaker: @@ -27,20 +26,14 @@ class Matchmaker: feature_columns = [feature_type, feature, alt_type, alt] cgc_bin = 'cgc_bin' - - feature_types_section = 'feature_types' - feature_types_config = CONFIG[feature_types_section] - cn = feature_types_config['cna'] - rearrangement = feature_types_config['fusion'] - variant = feature_types_config['mut'] fusion = 'Fusion' @classmethod - def concat_case_comparisons(cls, somatic, dbs): + def concat_case_comparisons(cls, somatic, dbs, variant_string, copy_number_string, rearrangement_string): somatic[cls.model_id] = cls.case_profile - case_variants = cls.subset_dataframe_eq(somatic, cls.feature_type, cls.variant) - case_cns = cls.subset_dataframe_eq(somatic, cls.feature_type, cls.cn) - case_fusions = cls.subset_dataframe_eq(somatic, cls.feature_type, cls.rearrangement) + case_variants = cls.subset_dataframe_eq(somatic, cls.feature_type, variant_string) + case_cns = cls.subset_dataframe_eq(somatic, cls.feature_type, copy_number_string) + case_fusions = cls.subset_dataframe_eq(somatic, cls.feature_type, rearrangement_string) if case_fusions.shape[0] > 0: case_fusions = cls.format_fusions(case_fusions) @@ -54,24 +47,34 @@ def concat_case_comparisons(cls, somatic, dbs): fusion_columns = [cls.feature, cls.partner, cls.model_id] fusions = cls.concat_dataframes(case_fusions, comparison_fusions, fusion_columns) - variants[cls.feature_type] = cls.variant - copy_numbers[cls.feature_type] = cls.cn - fusions[cls.feature_type] = cls.rearrangement + variants[cls.feature_type] = variant_string + copy_numbers[cls.feature_type] = copy_number_string + fusions[cls.feature_type] = rearrangement_string fusions[cls.alt_type] = cls.fusion return { - cls.variant: variants, - cls.cn: copy_numbers, - cls.rearrangement: fusions + variant_string: variants, + copy_number_string: copy_numbers, + rearrangement_string: fusions } @classmethod - def compare(cls, dbs, dbs_preclinical, somatic, case_sample_id): + def compare(cls, dbs, dbs_preclinical, somatic, case_sample_id, config): cgc = DatasourceCGC.import_ds(dbs) almanac = DatasourceAlmanac.import_ds(dbs) - merged = cls.concat_case_comparisons(somatic, dbs_preclinical) - annotated = AnnotatorPreclinicalMatchmaking.annotate(merged, dbs) + somatic_variant_biomarker_type_string = config['feature_types']['mut'] + copy_number_variant_biomarker_type_string = config['feature_types']['cna'] + fusion_biomarker_type_string = config['feature_types']['fusion'] + + merged = cls.concat_case_comparisons( + somatic = somatic, + dbs = dbs_preclinical, + variant_string = somatic_variant_biomarker_type_string, + copy_number_string = copy_number_variant_biomarker_type_string, + rearrangement_string = fusion_biomarker_type_string + ) + annotated = AnnotatorPreclinicalMatchmaking.annotate(merged, dbs, config) samples_to_use = cls.subset_samples(dbs_preclinical) calculated = SNFTypesCGCwithEvidence.calculate(annotated, samples_to_use, cgc, almanac) diff --git a/moalmanac/moalmanac.py b/moalmanac/moalmanac.py index b89223b..c47ff4c 100644 --- a/moalmanac/moalmanac.py +++ b/moalmanac/moalmanac.py @@ -1,7 +1,6 @@ import time import argparse import os -import pandas as pd import subprocess import annotator @@ -16,7 +15,7 @@ import writer from config import COLNAMES -from config import CONFIG +from reader import Ini snv_handle = 'snv_handle' indel_handle = 'indel_handle' @@ -50,28 +49,7 @@ ontology = COLNAMES[oncotree_section]['ontology'] code = COLNAMES[oncotree_section]['code'] -feature_type_section = 'feature_types' -feature_type_mut = CONFIG[feature_type_section]['mut'] -feature_type_germline = CONFIG[feature_type_section]['germline'] -feature_type_cna = CONFIG[feature_type_section]['cna'] -feature_type_fusion = CONFIG[feature_type_section]['fusion'] -feature_type_burden = CONFIG[feature_type_section]['burden'] -feature_type_signature = CONFIG[feature_type_section]['signature'] -feature_type_microsatellite = CONFIG[feature_type_section]['microsatellite'] -feature_type_aneuploidy = CONFIG[feature_type_section]['aneuploidy'] -feature_types = { - 'mutation': feature_type_mut, - 'germline': feature_type_germline, - 'copynumber': feature_type_cna, - 'fusion': feature_type_fusion, - 'burden': feature_type_burden, - 'signature': feature_type_signature, - 'microsatellite': feature_type_microsatellite, - 'aneuploidy': feature_type_aneuploidy -} - generate_illustrations = 'generate_illustrations' -TOGGLE_FEATURES = CONFIG['function_toggle'] def create_metadata_dictionary(input_dictionary): @@ -95,9 +73,9 @@ def format_output_directory(directory): return directory -def load_and_process_mutational_signatures(input, dbs, tumor_type): - signatures = features.CosmicSignatures.import_feature(input) - annotated = annotator.Annotator.annotate_almanac(signatures, dbs, tumor_type) +def load_and_process_mutational_signatures(input, dbs, tumor_type, config): + signatures = features.CosmicSignatures.import_feature(input, config) + annotated = annotator.Annotator.annotate_almanac(signatures, dbs, tumor_type, config) evaluated = evaluator.Evaluator.evaluate_almanac(annotated) return evaluated @@ -110,18 +88,17 @@ def plot_preclinical_efficacy(dictionary, folder, label): writer.Illustrations.write(figure, folder, label, f"{figure_name}.png") -def process_preclinical_efficacy(dbs, dataframe, folder, label, plot: bool = False): - efficacy_dictionary = investigator.SensitivityDictionary.create(dbs, dataframe) +def process_preclinical_efficacy(dbs, dataframe, folder, label, config, plot: bool = False): + efficacy_dictionary = investigator.SensitivityDictionary.create(dbs, dataframe, config) if plot: plot_preclinical_efficacy(efficacy_dictionary, folder, label) efficacy_summary = investigator.SummaryDataFrame.create(efficacy_dictionary, dataframe, label) return efficacy_dictionary, efficacy_summary -def main(patient, inputs, output_folder): +def main(patient, inputs, output_folder, config, dbs, dbs_preclinical=None): metadata_dictionary = create_metadata_dictionary(patient) - dbs = datasources.Datasources.generate_db_dict(CONFIG) output_folder = format_output_directory(output_folder) if output_folder != "": execute_cmd(f"mkdir -p {output_folder}") @@ -132,47 +109,81 @@ def main(patient, inputs, output_folder): metadata_dictionary[ontology] = mapped_ontology[ontology] metadata_dictionary[code] = mapped_ontology[code] - df_snv, df_snv_reject = features.MAFSomatic.import_feature(inputs[snv_handle]) - df_indel, df_indel_reject = features.MAFSomatic.import_feature(inputs[indel_handle]) - df_cnv, df_cnv_reject = features.CopyNumber.import_feature(inputs[called_cn_handle], inputs[cnv_handle]) - df_fusion, df_fusion_reject = features.Fusion.import_feature(inputs[fusion_handle]) + df_snv, df_snv_reject = features.MAFSomatic.import_feature(inputs[snv_handle], config) + df_indel, df_indel_reject = features.MAFSomatic.import_feature(inputs[indel_handle], config) + df_cnv, df_cnv_reject = features.CopyNumber.import_feature(inputs[called_cn_handle], inputs[cnv_handle], config) + df_fusion, df_fusion_reject = features.Fusion.import_feature(inputs[fusion_handle], config) accepted_variants = [df_snv, df_indel, df_cnv, df_fusion] filtered_variants = [df_snv_reject, df_indel_reject, df_cnv_reject, df_fusion_reject] somatic_variants = features.Features.concat_list_of_dataframes(accepted_variants) somatic_filtered = features.Features.concat_list_of_dataframes(filtered_variants) - germline_variants, germline_reject = features.MAFGermline.import_feature(inputs[germline_handle]) + germline_variants, germline_reject = features.MAFGermline.import_feature(inputs[germline_handle], config) if not somatic_variants.empty: - annotated_somatic = annotator.Annotator.annotate_somatic(somatic_variants, dbs, metadata_dictionary[code]) + annotated_somatic = annotator.Annotator.annotate_somatic( + df=somatic_variants, + dbs=dbs, + ontology=metadata_dictionary[code], + config=config + ) evaluated_somatic = evaluator.Evaluator.evaluate_somatic(annotated_somatic) - validation_accept, validation_reject = features.MAFValidation.import_feature(inputs[validation_handle]) + validation_accept, validation_reject = features.MAFValidation.import_feature(inputs[validation_handle], config) if not validation_accept.empty: - evaluated_somatic = annotator.OverlapValidation.append_validation(evaluated_somatic, validation_accept) - illustrator.ValidationOverlap.generate_dna_rna_plot(evaluated_somatic, string_id, output_folder) + evaluated_somatic = annotator.OverlapValidation.append_validation( + primary=evaluated_somatic, + validation=validation_accept, + biomarker_type=config['feature_types']['mut']) + illustrator.ValidationOverlap.generate_dna_rna_plot(evaluated_somatic, string_id, output_folder, config) else: evaluated_somatic = features.Features.create_empty_dataframe() if not germline_variants.empty: - annotated_germline = annotator.Annotator.annotate_germline(germline_variants, dbs, metadata_dictionary[code]) + annotated_germline = annotator.Annotator.annotate_germline( + germline_variants, + dbs, + metadata_dictionary[code], + config=config + ) evaluated_germline = evaluator.Evaluator.evaluate_germline(annotated_germline) else: evaluated_germline = features.Features.create_empty_dataframe() evaluated_somatic = annotator.OverlapSomaticGermline.append_germline_hits(evaluated_somatic, evaluated_germline) - integrated = evaluator.Integrative.evaluate(evaluated_somatic, evaluated_germline, dbs, feature_types) + integrated = evaluator.Integrative.evaluate(evaluated_somatic, evaluated_germline, dbs, config) - somatic_burden = features.BurdenReader.import_feature(inputs[bases_covered_handle], metadata_dictionary, somatic_variants, dbs) + somatic_burden = features.BurdenReader.import_feature( + handle=inputs[bases_covered_handle], + patient=metadata_dictionary, + variants=somatic_variants, + dbs=dbs, + config=config + ) - patient_wgd = features.Aneuploidy.summarize(metadata_dictionary[wgd]) - patient_ms_status = features.MicrosatelliteReader.summarize(metadata_dictionary[ms_status]) + patient_wgd = features.Aneuploidy.summarize(metadata_dictionary[wgd], config) + patient_ms_status = features.MicrosatelliteReader.summarize(metadata_dictionary[ms_status], config) metadata_dictionary[ms_status] = features.MicrosatelliteReader.map_status(metadata_dictionary[ms_status]) - annotated_burden = annotator.Annotator.annotate_almanac(somatic_burden, dbs, metadata_dictionary[code]) - annotated_wgd = annotator.Annotator.annotate_almanac(patient_wgd, dbs, metadata_dictionary[code]) - annotated_ms_status = annotator.Annotator.annotate_almanac(patient_ms_status, dbs, metadata_dictionary[code]) + annotated_burden = annotator.Annotator.annotate_almanac( + df=somatic_burden, + dbs=dbs, + ontology=metadata_dictionary[code], + config=config + ) + annotated_wgd = annotator.Annotator.annotate_almanac( + df=patient_wgd, + dbs=dbs, + ontology=metadata_dictionary[code], + config=config + ) + annotated_ms_status = annotator.Annotator.annotate_almanac( + df=patient_ms_status, + dbs=dbs, + ontology=metadata_dictionary[code], + config=config + ) evaluated_burden = evaluator.Evaluator.evaluate_almanac(annotated_burden) evaluated_wgd = evaluator.Evaluator.evaluate_almanac(annotated_wgd) @@ -182,55 +193,65 @@ def main(patient, inputs, output_folder): evaluated_mutational_signatures = load_and_process_mutational_signatures( input=inputs[mutational_signatures_path], dbs=dbs, - tumor_type=code + tumor_type=code, + config=config ) actionable = evaluator.Actionable.evaluate( - evaluated_somatic, - evaluated_germline, - evaluated_ms_variants, - evaluated_ms_status, - evaluated_burden, - evaluated_mutational_signatures, - evaluated_wgd + somatic=evaluated_somatic, + germline=evaluated_germline, + ms_variants=evaluated_ms_variants, + ms_status=evaluated_ms_status, + burden=evaluated_burden, + signatures=evaluated_mutational_signatures, + wgd=evaluated_wgd, + config=config ) strategies = evaluator.Strategies.report_therapy_strategies(actionable) + function_toggle = config['function_toggle'] + efficacy_summary = investigator.SummaryDataFrame.create_empty_dataframe() - efficacy_dictionary = {} - cell_lines_dictionary = {} - preclinical_efficacy_on = TOGGLE_FEATURES.getboolean('calculate_preclinical_efficacy') + # efficacy_dictionary = {} + # cell_lines_dictionary = {} + preclinical_efficacy_on = function_toggle.getboolean('calculate_preclinical_efficacy') # The input argument --disable_matchmaking will be removed in the next non-backwards compatible release - model_similarity_on = TOGGLE_FEATURES.getboolean('calculate_model_similarity') and not inputs[disable_matchmaking] + model_similarity_on = function_toggle.getboolean('calculate_model_similarity') and not inputs[disable_matchmaking] similarity_results = matchmaker.Matchmaker.create_empty_output() similarity_summary = {} - if preclinical_efficacy_on or model_similarity_on: - dbs_preclinical = datasources.Preclinical.import_dbs() - cell_lines_dictionary = dbs_preclinical['dictionary'] - if preclinical_efficacy_on: - plot_preclinical = TOGGLE_FEATURES.getboolean('plot_preclinical_efficacy') - efficacy_results = process_preclinical_efficacy( - dbs_preclinical, - actionable, - output_folder, - string_id, - plot=plot_preclinical - ) - efficacy_dictionary = efficacy_results[0] - efficacy_summary = efficacy_results[1] - - actionable = annotator.PreclinicalEfficacy.annotate( - actionable, - efficacy_summary, - efficacy_dictionary, - append_lookup=TOGGLE_FEATURES.getboolean('include_preclinical_efficacy_in_actionability_report') - ) - if model_similarity_on: - similarity_results = matchmaker.Matchmaker.compare(dbs, dbs_preclinical, evaluated_somatic, string_id) - similarity_summary = matchmaker.Report.create_report_dictionary(similarity_results, cell_lines_dictionary) + if dbs_preclinical is not None: + if preclinical_efficacy_on or model_similarity_on: + dbs_preclinical = datasources.Preclinical.import_dbs(dbs_preclinical) + cell_lines_dictionary = dbs_preclinical['dictionary'] + if preclinical_efficacy_on: + plot_preclinical = function_toggle.getboolean('plot_preclinical_efficacy') + efficacy_results = process_preclinical_efficacy( + dbs=dbs_preclinical, + dataframe=actionable, + folder=output_folder, + label=string_id, + config=config, + plot=plot_preclinical + ) + efficacy_dictionary = efficacy_results[0] + efficacy_summary = efficacy_results[1] + + actionable = annotator.PreclinicalEfficacy.annotate( + actionable, + efficacy_summary, + efficacy_dictionary, + append_lookup=function_toggle.getboolean('include_preclinical_efficacy_in_actionability_report') + ) + + if model_similarity_on: + similarity_results = matchmaker.Matchmaker.compare(dbs, dbs_preclinical, evaluated_somatic, string_id) + similarity_summary = matchmaker.Report.create_report_dictionary( + similarity_results, + cell_lines_dictionary + ) writer.Actionable.write(actionable, string_id, output_folder) writer.GermlineACMG.write(evaluated_germline, string_id, output_folder) @@ -245,81 +266,137 @@ def main(patient, inputs, output_folder): writer.PreclinicalEfficacy.write(efficacy_summary, string_id, output_folder) writer.PreclinicalMatchmaking.write(similarity_results, string_id, output_folder) - if TOGGLE_FEATURES.getboolean('generate_actionability_report'): + if function_toggle.getboolean('generate_actionability_report'): report_dictionary = reporter.Reporter.generate_dictionary(evaluated_somatic, metadata_dictionary) - include_similarity = TOGGLE_FEATURES.getboolean('include_model_similarity_in_actionability_report') + include_similarity = function_toggle.getboolean('include_model_similarity_in_actionability_report') reporter.Reporter.generate_actionability_report( - actionable, - report_dictionary, - similarity=similarity_summary if include_similarity else None, - output_directory=output_folder + actionable = actionable, + report_dictionary = report_dictionary, + config = config, + similarity = similarity_summary if include_similarity else None, + output_directory = output_folder ) if __name__ == "__main__": start_time = time.time() - arg_parser = argparse.ArgumentParser(prog='Molecular Oncology Almanac', - description='A clinical interpretation algorithm for cancer genomics.') - arg_parser.add_argument('--patient_id', - help='patient id label', - required=True) - arg_parser.add_argument('--description', - default='', - help='description of patient') - arg_parser.add_argument('--tumor_type', - default='Unknown', - help='reported tumor type') - arg_parser.add_argument('--stage', - default='Unknown', - help='disease stage') - arg_parser.add_argument('--snv_handle', - default='', - help='handle for SNV MAF') - arg_parser.add_argument('--indel_handle', - default='', - help='handle for InDel MAF') - arg_parser.add_argument('--bases_covered_handle', - default='', - help='handle for a text file which contains the numeric number of somatic bases') - arg_parser.add_argument('--called_cn_handle', - default='', - help='handle for called copy number alterations file, used over --cnv_handle') - arg_parser.add_argument('--cnv_handle', - default='', - help='handle for annotated seg file') - arg_parser.add_argument('--fusion_handle', - default='', - help='handle for STAR Fusion output, .final.abridged') - arg_parser.add_argument('--germline_handle', - default='', - help='handle for Germline MAF') - arg_parser.add_argument('--validation_handle', - default='', - help='handle for SNV MAF called from validation sequencing') - arg_parser.add_argument('--ms_status', - default='unk', - choices=['msih', 'msil', 'mss', 'unk'], - help='microsatellite instability status') - arg_parser.add_argument('--mutational_signatures', - default='', - help='file for SBS signature contributions, version 3.4') - arg_parser.add_argument('--purity', - default='Unknown', - help='Tumor purity') - arg_parser.add_argument('--ploidy', - default='Unknown', - help='Tumor ploidy') - arg_parser.add_argument('--wgd', - action='store_true', - help='Specify the occurrence of whole genome duplication') - arg_parser.add_argument('--disable_matchmaking', - action='store_true', - help='Disable matchmaking in report') - arg_parser.add_argument('--output_directory', - default=None, - help='Output directory for generated files') + arg_parser = argparse.ArgumentParser( + prog='Molecular Oncology Almanac', + description='A clinical interpretation algorithm for cancer genomics.' + ) + arg_parser.add_argument( + '--patient_id', + help='patient id label', + required=True + ) + arg_parser.add_argument( + '--description', + default='', + help='description of patient' + ) + arg_parser.add_argument( + '--tumor_type', + default='Unknown', + help='reported tumor type' + ) + arg_parser.add_argument( + '--stage', + default='Unknown', + help='disease stage' + ) + arg_parser.add_argument( + '--snv_handle', + default='', + help='handle for SNV MAF' + ) + arg_parser.add_argument( + '--indel_handle', + default='', + help='handle for InDel MAF' + ) + arg_parser.add_argument( + '--bases_covered_handle', + default='', + help='handle for a text file which contains the numeric number of somatic bases' + ) + arg_parser.add_argument( + '--called_cn_handle', + default='', + help='handle for called copy number alterations file, used over --cnv_handle' + ) + arg_parser.add_argument( + '--cnv_handle', + default='', + help='handle for annotated seg file' + ) + arg_parser.add_argument( + '--fusion_handle', + default='', + help='handle for STAR Fusion output, .final.abridged' + ) + arg_parser.add_argument( + '--germline_handle', + default='', + help='handle for Germline MAF' + ) + arg_parser.add_argument( + '--validation_handle', + default='', + help='handle for SNV MAF called from validation sequencing' + ) + arg_parser.add_argument( + '--ms_status', + default='unk', + choices=['msih', 'msil', 'mss', 'unk'], + help='microsatellite instability status' + ) + arg_parser.add_argument( + '--mutational_signatures', + default='', + help='file for SBS signature contributions, version 3.4' + ) + arg_parser.add_argument( + '--purity', + default='Unknown', + help='Tumor purity' + ) + arg_parser.add_argument( + '--ploidy', + default='Unknown', + help='Tumor ploidy' + ) + arg_parser.add_argument( + '--wgd', + action='store_true', + help='Specify the occurrence of whole genome duplication' + ) + arg_parser.add_argument( + '--disable_matchmaking', + action='store_true', + help='Disable matchmaking in report' + ) + arg_parser.add_argument( + '--output_directory', + default=None, + help='Output directory for generated files' + ) + arg_parser.add_argument( + '--config', '-c', + required=True, + help='ini file that contains configuration details ' + ) + arg_parser.add_argument( + '--dbs', + required=True, + help='ini file that contains database paths ' + ) + arg_parser.add_argument( + '--preclinical-dbs', + required=False, + help='ini file that contains preclinical file paths' + ) args = arg_parser.parse_args() patient_dict = { @@ -348,7 +425,22 @@ def main(patient, inputs, output_folder): output_directory = args.output_directory if args.output_directory else os.getcwd() - main(patient_dict, inputs_dict, output_directory) + config_ini = Ini.read(args.config, extended_interpolation=False, convert_to_dictionary=False) + + db_paths = Ini.read(args.dbs, extended_interpolation=False, convert_to_dictionary=True) + if args.preclinical_dbs: + preclinical_db_paths = Ini.read(args.preclinical_dbs, extended_interpolation=False, convert_to_dictionary=True) + else: + preclinical_db_paths = None + + main( + patient=patient_dict, + inputs=inputs_dict, + output_folder=output_directory, + config=config_ini, + dbs=db_paths['databases'], + dbs_preclinical=preclinical_db_paths['preclinical'] + ) end_time = time.time() time_statement = "Molecular Oncology Almanac runtime: %s seconds" % round((end_time - start_time), 4) diff --git a/moalmanac/preclinical-databases.ini b/moalmanac/preclinical-databases.ini new file mode 100644 index 0000000..de60614 --- /dev/null +++ b/moalmanac/preclinical-databases.ini @@ -0,0 +1,10 @@ +[preclinical] +almanac_gdsc_mappings = datasources/preclinical/formatted/almanac-gdsc-mappings.json +summary = datasources/preclinical/formatted/cell-lines.summary.txt +variants = datasources/preclinical/annotated/cell-lines.somatic-variants.annotated.txt +copynumbers = datasources/preclinical/annotated/cell-lines.copy-numbers.annotated.txt +fusions = datasources/preclinical/annotated/cell-lines.fusions.annotated.txt +fusions1 = datasources/preclinical/annotated/cell-lines.fusions.annotated.gene1.txt +fusions2 = datasources/preclinical/annotated/cell-lines.fusions.annotated.gene2.txt +gdsc = datasources/preclinical/formatted/sanger.gdsc.txt +dictionary = datasources/preclinical/cell-lines.pkl \ No newline at end of file diff --git a/moalmanac/reader.py b/moalmanac/reader.py index c935ff7..ce61219 100644 --- a/moalmanac/reader.py +++ b/moalmanac/reader.py @@ -1,9 +1,38 @@ +import configparser import json import pandas as pd import pickle -class Reader(object): +class Ini: + @classmethod + def read(cls, path, extended_interpolation=False, convert_to_dictionary=False): + ini = cls.load(path, extended_interpolation=extended_interpolation) + if convert_to_dictionary: + return cls.convert_ini_to_dictionary(ini) + else: + return ini + + @staticmethod + def convert_ini_to_dictionary(ini): + dictionary = {} + for section in ini.sections(): + dictionary[section] = {} + for (key, value) in ini.items(section): + dictionary[section][key] = value + return dictionary + + @staticmethod + def load(path, extended_interpolation=False): + if extended_interpolation: + config = configparser.ConfigParser(interpolation=configparser.ExtendedInterpolation()) + else: + config = configparser.ConfigParser() + config.read(path) + return config + + +class Reader: @staticmethod def check_comment_rows(handle, comment_character): skip_rows = 0 diff --git a/moalmanac/reporter.py b/moalmanac/reporter.py index bb48023..e0c0ef8 100644 --- a/moalmanac/reporter.py +++ b/moalmanac/reporter.py @@ -5,7 +5,7 @@ import os from config import COLNAMES -from config import CONFIG +#from config import CONFIG class Reporter: @@ -23,23 +23,22 @@ class Reporter: ms_status = COLNAMES[report_section]['ms_status'] @classmethod - def drop_double_fusion(cls, dataframe): + def drop_double_fusion(cls, dataframe, biomarker_type_string): feature_type = COLNAMES[cls.report_section]['feature_type'] alt = COLNAMES[cls.report_section]['alteration'] - rearrangement = CONFIG['feature_types']['fusion'] - idx_rearrangement = dataframe[dataframe[feature_type].eq(rearrangement)].index + idx_rearrangement = dataframe[dataframe[feature_type].eq(biomarker_type_string)].index idx_rearrangement_keep = dataframe.loc[idx_rearrangement, :].drop_duplicates([alt], keep='first').index idx_rearrangement_drop = idx_rearrangement.difference(idx_rearrangement_keep) idx_keep = dataframe.index.difference(idx_rearrangement_drop) return dataframe.loc[idx_keep, :] @classmethod - def format_alterations(cls, dataframe): + def format_alterations(cls, dataframe, config): if dataframe.empty: return dataframe - dataframe = cls.drop_double_fusion(dataframe) + dataframe = cls.drop_double_fusion(dataframe, biomarker_type_string=config['feature_types']['fusion']) lookup = COLNAMES['datasources'] columns = [lookup['sensitivity'], lookup['resistance'], lookup['prognosis']] @@ -73,7 +72,7 @@ def format_clinical_columns(series, convert_to_float=False): return series @classmethod - def generate_actionability_report(cls, actionable, report_dictionary, similarity=None, output_directory=None): + def generate_actionability_report(cls, actionable, report_dictionary, config, similarity=None, output_directory=None): report = ActionabilityReport() report.add_metadata( name=report_dictionary['patient_id'], @@ -89,13 +88,13 @@ def generate_actionability_report(cls, actionable, report_dictionary, similarity msi=report_dictionary['microsatellite_status'] ) - versions = cls.generate_version_dictionary() + versions = cls.generate_version_dictionary(config) report.add_versions( software=versions['software'], database=versions['database'] ) - actionable = cls.format_alterations(actionable) + actionable = cls.format_alterations(dataframe=actionable, config=config) report.add_alterations(actionable) report.add_similar_profiles(similarity) @@ -120,10 +119,10 @@ def generate_date(): return datetime.date.today().strftime("%b %d %Y") @classmethod - def generate_version_dictionary(cls): + def generate_version_dictionary(cls, config): version_section = 'versions' - software_version = CONFIG[version_section]['interpreter'] - database_version = CONFIG[version_section]['database'] + software_version = config[version_section]['interpreter'] + database_version = config[version_section]['database'] return { 'software': software_version, 'database': database_version diff --git a/moalmanac/run_example.py b/moalmanac/run_example.py index a8400f5..87c12c4 100644 --- a/moalmanac/run_example.py +++ b/moalmanac/run_example.py @@ -3,7 +3,9 @@ import time import subprocess -patient_dict = { +from reader import Ini + +metadata_dictionary = { 'patient_id': 'example', 'reported_tumor_type': 'MEL', 'stage': 'Metastatic', @@ -14,7 +16,7 @@ 'microsatellite_status': 'msih' } -empty_dict = { +input_dictionary_empty = { 'snv_handle': '', 'indel_handle': '', 'bases_covered_handle': '', @@ -27,7 +29,7 @@ 'disable_matchmaking': False } -example_dict = { +input_dictionary = { 'snv_handle': '../example_data/example_patient.capture.somatic.snvs.maf', 'indel_handle': '../example_data/example_patient.capture.somatic.indels.maf', 'bases_covered_handle': '../example_data/example_patient.capture.somatic.coverage.txt', @@ -40,12 +42,20 @@ 'disable_matchmaking': False } +config_ini_path = "config.ini" +dbs_ini_path = "annotation-databases.ini" +dbs_preclinical_ini_path = "preclinical-databases.ini" + +config_ini = Ini.read(config_ini_path, extended_interpolation=False, convert_to_dictionary=False) +db_paths = Ini.read(dbs_ini_path, extended_interpolation=False, convert_to_dictionary=True) +preclinical_db_paths = Ini.read(dbs_preclinical_ini_path, extended_interpolation=False, convert_to_dictionary=True) + def execute_cmd(command): subprocess.call(command, shell=True) -output_directory = "example" +output_directory = "2024-07-03-config-edits" if output_directory != "": cmd = f"mkdir -p {output_directory}" execute_cmd(cmd) @@ -53,7 +63,14 @@ def execute_cmd(command): output_directory = os.getcwd() start_time = time.time() -moalmanac.main(patient_dict, example_dict, output_directory) +moalmanac.main( + patient=metadata_dictionary, + inputs=input_dictionary, + output_folder=output_directory, + config=config_ini, + dbs=db_paths['databases'], + dbs_preclinical=preclinical_db_paths['preclinical'] +) end_time = time.time() time_statement = "Molecular Oncology Almanac runtime: %s seconds" % round((end_time - start_time), 4) From 670d7042911ac88d1ccca712dcac4d14da0d5602 Mon Sep 17 00:00:00 2001 From: Brendan Reardon Date: Wed, 3 Jul 2024 21:03:49 -0400 Subject: [PATCH 02/19] Test revisions in progress --- moalmanac/evaluator.py | 19 +++++---------- moalmanac/test/README.md | 0 moalmanac/test/annotator_tests.py | 36 +++++++++++++++++++++++++---- moalmanac/test/datasources_tests.py | 13 ++++++++++- moalmanac/test/evaluator_tests.py | 34 +++++++++++++-------------- 5 files changed, 67 insertions(+), 35 deletions(-) create mode 100644 moalmanac/test/README.md diff --git a/moalmanac/evaluator.py b/moalmanac/evaluator.py index 57d94d9..3031baf 100644 --- a/moalmanac/evaluator.py +++ b/moalmanac/evaluator.py @@ -209,8 +209,8 @@ def display_microsatellite_variants(cls, df, idx): @classmethod def display_signature(cls, df, idx): - #before_string = "COSMIC Signature" - #after_string = f"COSMIC Signature (version {version})" + # before_string = "COSMIC Signature" + # after_string = f"COSMIC Signature (version {version})" signature = df.loc[idx, Evaluator.feature]#.str.replace(before_string, after_string) contribution = df.loc[idx, Evaluator.alt].astype(float).multiply(100).round(0).astype(int).astype(str) # Signature: Cosmic Signature 7 (65%) @@ -235,7 +235,7 @@ def evaluate(cls, somatic, germline, ms_variants, ms_status, burden, signatures, germline = Evaluator.remove_benign_variants(germline) germline = Evaluator.remove_common_variants(germline) - ms_variants_summary = cls.summarize_ms_variants(ms_variants) + ms_variants_summary = cls.summarize_ms_variants(ms_variants, config) if not burden.loc[0, Evaluator.high_burden_boolean]: burden = burden.drop(burden.index[0]) @@ -295,22 +295,15 @@ def format_variant_classification(cls, series): return series.str.replace('_Mutation', '') @classmethod - def summarize_ms_variants(cls, df): + def summarize_ms_variants(cls, df, config): df = cls.format_mutations(df) msi_summary = features.Features.create_empty_dataframe() if not df.empty: feature = Evaluator.supporting_variants - feature_displays = cls.format_feature_display( - df, - Evaluator.feature_display, - Evaluator.feature_type, - Evaluator.feature, - Evaluator.alt_type, - Evaluator.alt - ) + feature_displays = cls.format_feature_display(df=df, config=config) feature_displays_list = cls.create_string_list(feature_displays) - msi_summary.loc[0, Evaluator.feature_type] = Evaluator.microsatellite_type + msi_summary.loc[0, Evaluator.feature_type] = config['feature_types']['microsatellite'] msi_summary.loc[0, Evaluator.feature] = feature msi_summary.loc[0, Evaluator.alt] = feature_displays_list msi_summary.loc[0, Evaluator.almanac_bin] = 1 diff --git a/moalmanac/test/README.md b/moalmanac/test/README.md new file mode 100644 index 0000000..e69de29 diff --git a/moalmanac/test/annotator_tests.py b/moalmanac/test/annotator_tests.py index 286028b..7a8cb47 100644 --- a/moalmanac/test/annotator_tests.py +++ b/moalmanac/test/annotator_tests.py @@ -9,7 +9,8 @@ from datasources import Preclinical as datasources_Preclinical from features import Features from investigator import SensitivityDictionary -from config import CONFIG +# from reader import Ini +# from config import CONFIG class UnitTestAnnotator(unittest.TestCase): @@ -58,7 +59,9 @@ def test_annotate(self): gene = ACMG.gene bin_name = ACMG.bin_name df = pd.DataFrame({gene: ['TP53', 'FOO', 'PMS2', 'TSC1', 'AR']}) - dbs = Datasources.generate_db_dict(CONFIG) + dbs = { + 'acmg_handle': '../datasources/acmg.secondaryfindings.v3.txt' + } annotated = ACMG.annotate(df, dbs) expected_result = pd.Series([1, 0, 1, 1, 0], name=bin_name) @@ -327,7 +330,18 @@ class UnitTestPreclinicalEfficacy(unittest.TestCase): 'pvalue_mww': [2.322E-12, 7.627E-17, 0.835] } df2 = pd.DataFrame(data_dictionary, index=[0, 1, 2]) - dbs_preclinical = datasources_Preclinical.import_dbs() + dbs_dictionary = { + 'almanac_gdsc_mappings': '../datasources/preclinical/formatted/almanac-gdsc-mappings.json', + 'summary': '../datasources/preclinical/formatted/cell-lines-summary.txt', + 'variants': '../datasources/preclinical/annotated/cell-lines.somatic-variants.annotated.txt', + 'copynumbers': '../datasources/preclinical/annotated/cell-lines.copy-numbers.annotated.txt', + 'fusions': '../datasources/preclinical/annotated/cell-lines.fusions.annotated.txt', + 'fusions1': '../datasources/preclinical/annotated/cell-lines.fusions.annotated.gene1.txt', + 'fusions2': '../datasources/preclinical/annotated/cell-lines.fusions.annotated.gene2.txt', + 'gdsc': '../datasources/preclinical/formatted/sanger.gdsc.txt', + 'dictionary': '../datasources/preclinical/cell-lines.pkl' + } + dbs_preclinical = datasources_Preclinical.import_dbs(dbs_dictionary) efficacy_dictionary = SensitivityDictionary.create(dbs_preclinical, df1) def test_annotate(self): @@ -352,7 +366,21 @@ def test_series_for_significance(self): class UnitTestPreclinicalMatchmaking(unittest.TestCase): def test_annotate_copy_numbers(self): - dbs = Datasources.generate_db_dict(CONFIG) + dbs = { + 'almanac_handle': '../datasources/moalmanac/molecular-oncology-almanac.json', + 'cancerhotspots_handle': '../datasources/cancerhotspots/hotspots_v2.txt', + '3dcancerhotspots_handle': '../datasources/cancerhotspots/hotspots3d.txt', + 'cgc_handle': '../datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', + 'cosmic_handle': '../datasources/cosmic/CosmicMutantExport_v97.lite.txt', + 'gsea_pathways_handle': '../datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', + 'gsea_modules_handle': '../datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', + 'exac_handle': '../datasources/exac/exac.expanded.r1.txt', + 'acmg_handle': '../datasources/acmg/acmg.secondaryfindings.v3.txt', + 'clinvar_handle': '../datasources/clinvar/variant_summary.lite.txt', + 'hereditary_handle': '../datasources/hereditary/hereditary.txt', + 'oncotree_handle': '../datasources/oncotree/oncotree.2023-03-09.txt', + 'lawrence_handle': '../datasources/lawrence/lawrence_mapped_ontology.txt' + } feature = PreclinicalMatchmaking.feature feature_type = PreclinicalMatchmaking.feature_type alteration_type = PreclinicalMatchmaking.alteration_type diff --git a/moalmanac/test/datasources_tests.py b/moalmanac/test/datasources_tests.py index 8e5a8db..027a9b9 100644 --- a/moalmanac/test/datasources_tests.py +++ b/moalmanac/test/datasources_tests.py @@ -28,7 +28,18 @@ def test_import_dbs(self): fusions = Preclinical.fusions gdsc = Preclinical.gdsc mappings = Preclinical.mappings - dbs = Preclinical.import_dbs() + dbs_dictionary = { + 'almanac_gdsc_mappings': '../datasources/preclinical/formatted/almanac-gdsc-mappings.json', + 'summary': '../datasources/preclinical/formatted/cell-lines-summary.txt', + 'variants': '../datasources/preclinical/annotated/cell-lines.somatic-variants.annotated.txt', + 'copynumbers': '../datasources/preclinical/annotated/cell-lines.copy-numbers.annotated.txt', + 'fusions': '../datasources/preclinical/annotated/cell-lines.fusions.annotated.txt', + 'fusions1': '../datasources/preclinical/annotated/cell-lines.fusions.annotated.gene1.txt', + 'fusions2': '../datasources/preclinical/annotated/cell-lines.fusions.annotated.gene2.txt', + 'gdsc': '../datasources/preclinical/formatted/sanger.gdsc.txt', + 'dictionary': '../datasources/preclinical/cell-lines.pkl' + } + dbs = Preclinical.import_dbs(dbs_dictionary) for label in [summary, variants, cnas, fusions, gdsc]: self.assertEqual(type(dbs[label]), type(pd.DataFrame())) diff --git a/moalmanac/test/evaluator_tests.py b/moalmanac/test/evaluator_tests.py index afceb40..ce7f7ad 100644 --- a/moalmanac/test/evaluator_tests.py +++ b/moalmanac/test/evaluator_tests.py @@ -110,32 +110,32 @@ def test_remap_almanac_bins(self): def test_remove_low_allele_fraction_variants(self): feature_type = Evaluator.feature_type - mut_type = Evaluator.mut_type - germline_type = Evaluator.germline_type + mut_type = 'Somatic Variant' + germline_type = 'Germline Variant' tumor_f = Evaluator.tumor_f - min_af = Evaluator.min_af + min_af = 0.05 low_af = float(min_af) - 0.01 high_af = float(min_af) + 0.01 df = pd.DataFrame({feature_type: [mut_type, mut_type, germline_type, germline_type, 'Aneuploidy'], tumor_f: [low_af, high_af, low_af, high_af, np.nan]}) - subsetted = Evaluator.remove_low_allele_fraction_variants(df) + subsetted = Evaluator.remove_low_allele_fraction_variants(df, minimum_allele_fraction=min_af) self.assertEqual([1, 3, 4], subsetted.index.tolist()) def test_remove_low_coverage_variants(self): feature_type = Evaluator.feature_type - mut_type = Evaluator.mut_type - germline_type = Evaluator.germline_type + mut_type = 'Somatic Variant' + germline_type = 'Germline Variant' coverage = Evaluator.coverage - min_coverage = Evaluator.min_coverage + min_coverage = 15 low_coverage = float(min_coverage) - 10 high_coverage = float(min_coverage) + 10 df = pd.DataFrame({feature_type: [mut_type, mut_type, germline_type, germline_type, 'Aneuploidy'], coverage: [low_coverage, high_coverage, low_coverage, high_coverage, np.nan]}) - subsetted = Evaluator.remove_low_coverage_variants(df) + subsetted = Evaluator.remove_low_coverage_variants(df, minimum_coverage=min_coverage) self.assertEqual([1, 3, 4], subsetted.index.tolist()) def test_remove_benign_variants(self): @@ -166,17 +166,17 @@ def test_create_string_list(self): self.assertEqual('the, quick, fox', Actionable.create_string_list(series)) def test_display_aneuploidy(self): - feature = Evaluator.aneuploidy_type + feature = 'Aneuploidy' df = pd.DataFrame({feature: ['A', 'B', 'C']}) idx = [0, 2] - series = Actionable.display_aneuploidy(df, idx, feature) + series = Actionable.display_aneuploidy(df, idx) self.assertEqual(['A', 'C'], series.tolist()) def test_display_burden(self): alt = Evaluator.alt df = pd.DataFrame({alt: ["10 mutations per Mb", "20", "30 mutations per Mb"]}) idx = [0, 2] - series = Actionable.display_burden(df, idx, alt) + series = Actionable.display_burden(df, idx) self.assertEqual(['10 mutations per Mb', '30 mutations per Mb'], series.tolist()) def test_display_copynumber(self): @@ -185,21 +185,21 @@ def test_display_copynumber(self): df = pd.DataFrame({feature: ['Foo', 'Bar', 'FooBar'], alt_type: ['Amp', 'Amp', 'Del']}) idx = [0, 2] - series = Actionable.display_copynumber(df, idx, feature, alt_type) + series = Actionable.display_copynumber(df, idx) self.assertEqual(['Foo Amp', 'FooBar Del'], series.tolist()) def test_display_fusion(self): alt = Evaluator.alt df = pd.DataFrame({alt: ['Foo--Bar', 'Bar--Foo', 'FooBar--Alpha']}) idx = [0, 2] - series = Actionable.display_fusion(df, idx, alt) + series = Actionable.display_fusion(df, idx) self.assertEqual(['Foo--Bar Fusion', 'FooBar--Alpha Fusion'], series.tolist()) def test_display_microsatellite_stability(self): feature = Evaluator.feature df = pd.DataFrame({feature: ['A', 'B', 'C']}) idx = [0, 2] - series = Actionable.display_microsatellite_stability(df, idx, feature) + series = Actionable.display_microsatellite_stability(df, idx) self.assertEqual(['A', 'C'], series.tolist()) def test_display_microsatellite_variants(self): @@ -208,7 +208,7 @@ def test_display_microsatellite_variants(self): df = pd.DataFrame({feature: ['Foo', '', 'Bar'], alt: ['Amp', '', 'Del']}) idx = [0, 2] - series = Actionable.display_microsatellite_variants(df, idx, feature, alt) + series = Actionable.display_microsatellite_variants(df, idx) self.assertEqual(['Foo: Amp', 'Bar: Del'], series.tolist()) def test_display_signature(self): @@ -216,7 +216,7 @@ def test_display_signature(self): alt = Evaluator.alt df = pd.DataFrame({feature: ['Signature 1', '', 'Signature 2'], alt: [0.523, '', 0.0145]}) idx = [0, 2] - series = Actionable.display_signature(df, idx, feature, alt) + series = Actionable.display_signature(df, idx) self.assertEqual(['Signature 1 (52%)', 'Signature 2 (1%)'], series.tolist()) @@ -228,7 +228,7 @@ def test_display_variant(self): alt_type: ['Missense', 'Nonsense', 'Frameshift'], alt: ['p.V600E', 'p.N500*', 'p.L151fs*']}) idx = [0, 2] - series = Actionable.display_variant(df, idx, feature, alt_type, alt) + series = Actionable.display_variant(df, idx) self.assertEqual(['Foo p.V600E (Missense)', 'FooBar p.L151fs* (Frameshift)'], series.tolist()) def test_format_variant_classification(self): From b4520ee361711a6e7988bbdb42222713f77ebc8f Mon Sep 17 00:00:00 2001 From: Brendan Reardon Date: Tue, 9 Jul 2024 21:23:04 -0400 Subject: [PATCH 03/19] Regression tests match --- moalmanac/annotator.py | 4 ++- moalmanac/config.ini | 10 +++---- moalmanac/features.py | 5 ---- moalmanac/investigator.py | 2 +- moalmanac/test/datasources_tests.py | 18 ++++++------- moalmanac/test/evaluator_tests.py | 2 +- moalmanac/test/features_tests.py | 41 ++++++++++++++++++++++------- 7 files changed, 51 insertions(+), 31 deletions(-) diff --git a/moalmanac/annotator.py b/moalmanac/annotator.py index 8bad42b..b7b22e4 100644 --- a/moalmanac/annotator.py +++ b/moalmanac/annotator.py @@ -1015,6 +1015,8 @@ def annotate(cls, df, dbs, config): ds_columns=[cls.chr, cls.start, cls.ref, cls.alt, cls.af], variant_biomarker_types=[config['feature_types']['mut'], config['feature_types']['germline']] ) + print(df_annotated['exac_af'].head()) + print(df_annotated['exac_af'].isnull().value_counts()) common_allele_frequency_threshold=config['exac']['exac_common_af_threshold'] df_annotated[cls.bin_name] = cls.annotate_common_af( series_exac_af=df_annotated[cls.af], @@ -1087,7 +1089,7 @@ def annotate(cls, df, dbs, config): df_annotated = ExAC.append_exac_af( df=df_dropped, ds=ds, - ds_columns=[cls.chr, cls.start, cls.ref, cls.alt, cls.af], + ds_columns=cls.ds_columns, variant_biomarker_types=[config['feature_types']['mut'], config['feature_types']['germline']] ) common_allele_frequency_threshold = config['exac']['exac_common_af_threshold'] diff --git a/moalmanac/config.ini b/moalmanac/config.ini index a22562b..9e21e3c 100644 --- a/moalmanac/config.ini +++ b/moalmanac/config.ini @@ -1,11 +1,11 @@ [function_toggle] ; Use this section to enable or disable functions performed by MOAlmanac by writing either 'on' or 'off' -calculate_model_similarity = off -calculate_preclinical_efficacy = off +calculate_model_similarity = on +calculate_preclinical_efficacy = on generate_actionability_report = on -include_model_similarity_in_actionability_report = off -include_preclinical_efficacy_in_actionability_report = off -plot_preclinical_efficacy = off +include_model_similarity_in_actionability_report = on +include_preclinical_efficacy_in_actionability_report = on +plot_preclinical_efficacy = on [versions] interpreter = 0.6.0 diff --git a/moalmanac/features.py b/moalmanac/features.py index 164068a..b0195e2 100644 --- a/moalmanac/features.py +++ b/moalmanac/features.py @@ -230,11 +230,6 @@ def import_feature(cls, handle, patient, variants, dbs, config): class CopyNumber: - #config = CONFIG['seg'] - #amplification = config['amp'] - #deletion = config['del'] - #feature_type = CONFIG['feature_types']['cna'] - @staticmethod def format_cn_gene(series): new_series = series.str.split(' ', expand=True).loc[:, 0] diff --git a/moalmanac/investigator.py b/moalmanac/investigator.py index 0c2d6fc..9b3c762 100644 --- a/moalmanac/investigator.py +++ b/moalmanac/investigator.py @@ -129,7 +129,7 @@ def create(cls, dbs, df_actionable, config): feature_display = df_actionable.loc[index, cls.feature_display] index_dict = {} if mapped: - feature_dictionary = cls.split_samples_by_wt_mut(df_actionable.loc[index, :], dbs, samples) + feature_dictionary = cls.split_samples_by_wt_mut(df_actionable.loc[index, :], dbs, samples, config) features = list(feature_dictionary) for therapy in mapped: therapy_dict = {} diff --git a/moalmanac/test/datasources_tests.py b/moalmanac/test/datasources_tests.py index 027a9b9..82f0cd4 100644 --- a/moalmanac/test/datasources_tests.py +++ b/moalmanac/test/datasources_tests.py @@ -29,15 +29,15 @@ def test_import_dbs(self): gdsc = Preclinical.gdsc mappings = Preclinical.mappings dbs_dictionary = { - 'almanac_gdsc_mappings': '../datasources/preclinical/formatted/almanac-gdsc-mappings.json', - 'summary': '../datasources/preclinical/formatted/cell-lines-summary.txt', - 'variants': '../datasources/preclinical/annotated/cell-lines.somatic-variants.annotated.txt', - 'copynumbers': '../datasources/preclinical/annotated/cell-lines.copy-numbers.annotated.txt', - 'fusions': '../datasources/preclinical/annotated/cell-lines.fusions.annotated.txt', - 'fusions1': '../datasources/preclinical/annotated/cell-lines.fusions.annotated.gene1.txt', - 'fusions2': '../datasources/preclinical/annotated/cell-lines.fusions.annotated.gene2.txt', - 'gdsc': '../datasources/preclinical/formatted/sanger.gdsc.txt', - 'dictionary': '../datasources/preclinical/cell-lines.pkl' + 'almanac_gdsc_mappings': 'datasources/preclinical/formatted/almanac-gdsc-mappings.json', + 'summary': 'datasources/preclinical/formatted/cell-lines.summary.txt', + 'variants': 'datasources/preclinical/annotated/cell-lines.somatic-variants.annotated.txt', + 'copynumbers': 'datasources/preclinical/annotated/cell-lines.copy-numbers.annotated.txt', + 'fusions': 'datasources/preclinical/annotated/cell-lines.fusions.annotated.txt', + 'fusions1': 'datasources/preclinical/annotated/cell-lines.fusions.annotated.gene1.txt', + 'fusions2': 'datasources/preclinical/annotated/cell-lines.fusions.annotated.gene2.txt', + 'gdsc': 'datasources/preclinical/formatted/sanger.gdsc.txt', + 'dictionary': 'datasources/preclinical/cell-lines.pkl' } dbs = Preclinical.import_dbs(dbs_dictionary) diff --git a/moalmanac/test/evaluator_tests.py b/moalmanac/test/evaluator_tests.py index ce7f7ad..4c27b4b 100644 --- a/moalmanac/test/evaluator_tests.py +++ b/moalmanac/test/evaluator_tests.py @@ -166,7 +166,7 @@ def test_create_string_list(self): self.assertEqual('the, quick, fox', Actionable.create_string_list(series)) def test_display_aneuploidy(self): - feature = 'Aneuploidy' + feature = Evaluator.feature df = pd.DataFrame({feature: ['A', 'B', 'C']}) idx = [0, 2] series = Actionable.display_aneuploidy(df, idx) diff --git a/moalmanac/test/features_tests.py b/moalmanac/test/features_tests.py index 4143d43..a591582 100644 --- a/moalmanac/test/features_tests.py +++ b/moalmanac/test/features_tests.py @@ -2,8 +2,8 @@ import pandas as pd from moalmanac import features -from config import CONFIG, COLNAMES - +from config import COLNAMES +from reader import Ini class UnitTestFeatures(unittest.TestCase): def test_annotate_feature_type(self): @@ -69,19 +69,30 @@ def test_create_column_map(self): self.assertEqual(column_map['call'], features.Features.alt_type) def test_filter_calls(self): + amp_string = 'Amplification' + del_string = 'Deletion' tmp = pd.Series(['Amplification', 'Deletion', '', 'Deletion']) - idx = features.CopyNumberCalled.filter_calls(tmp) + idx = features.CopyNumberCalled.filter_calls(series=tmp, amp_string=amp_string, del_string=del_string) self.assertEqual([0, 1, 3], idx[idx].index.tolist()) self.assertEqual([2], idx[~idx].index.tolist()) class UnitTestCopyNumberTotal(unittest.TestCase): def test_annotate_amp_del(self): + amp_string = 'Amplification' + del_string = 'Deletion' index = pd.Index([0, 1, 2]) index_amp = pd.Index([0]) index_del = pd.Index([2]) - expected = [features.CopyNumberTotal.amplification, '', features.CopyNumberTotal.deletion] - self.assertEqual(expected, features.CopyNumberTotal.annotate_amp_del(index, index_amp, index_del).tolist()) + expected = [amp_string, '', del_string] + result = features.CopyNumberTotal.annotate_amp_del( + idx=index, + idx_amp=index_amp, + idx_del=index_del, + amp_string=amp_string, + del_string=del_string + ) + self.assertEqual(expected, result.tolist()) def test_create_column_map(self): column_map = features.CopyNumberTotal.create_column_map() @@ -102,12 +113,20 @@ def test_drop_duplicate_genes(self): self.assertEqual(True, idx in features.CopyNumberTotal.drop_duplicate_genes(df)) def test_filter_by_threshold(self): + amp_string = 'Amplification' + del_string = 'Deletion' values = pd.Series(range(1, 101)) df = pd.DataFrame({features.Features.feature: values, features.Features.chr: values, features.Features.start: values, features.Features.segment_mean: values}) - accept, reject = features.CopyNumberTotal.filter_by_threshold(df, 97.5, 2.5) + accept, reject = features.CopyNumberTotal.filter_by_threshold( + df=df, + percentile_amp=97.5, + percentile_del=2.5, + amp_string=amp_string, + del_string=del_string + ) expected = pd.Index([0, 1, 2, 97, 98, 99]) for idx in expected: self.assertEqual(True, idx in accept.index) @@ -157,13 +176,17 @@ def test_subset_significant_signatures(self): class UnitTestFusion(unittest.TestCase): def test_create_column_map(self): - column_map = features.Fusion.create_colmap() + config = Ini.read('config.ini', extended_interpolation=False, convert_to_dictionary=False) + column_map = features.Fusion.create_colmap(config) + leftbreakpoint = 'leftbreakpoint' + rightbreakpoint = 'rightbreakpoint' + values = list(column_map.values()) self.assertEqual(4, len(column_map)) self.assertEqual(features.Features.feature, values[0]) self.assertEqual(features.Features.spanningfrags, values[1]) - self.assertEqual(features.Fusion.leftbreakpoint, values[2]) - self.assertEqual(features.Fusion.rightbreakpoint, values[3]) + self.assertEqual(leftbreakpoint, values[2]) + self.assertEqual(rightbreakpoint, values[3]) def test_filter_by_spanning_fragment_count(self): series = pd.Series([4, 5, 6]) From 9e6766864b2a39b17d5b5184ce410c6f4301db51 Mon Sep 17 00:00:00 2001 From: Brendan Reardon Date: Tue, 9 Jul 2024 21:48:08 -0400 Subject: [PATCH 04/19] 1 unit test still failing --- moalmanac/moalmanac.py | 20 ++- moalmanac/test/annotator_tests.py | 231 ++++++++++++++++++++------- moalmanac/test/investigator_tests.py | 57 +++++-- 3 files changed, 227 insertions(+), 81 deletions(-) diff --git a/moalmanac/moalmanac.py b/moalmanac/moalmanac.py index c47ff4c..05ff8c3 100644 --- a/moalmanac/moalmanac.py +++ b/moalmanac/moalmanac.py @@ -213,8 +213,6 @@ def main(patient, inputs, output_folder, config, dbs, dbs_preclinical=None): function_toggle = config['function_toggle'] efficacy_summary = investigator.SummaryDataFrame.create_empty_dataframe() - # efficacy_dictionary = {} - # cell_lines_dictionary = {} preclinical_efficacy_on = function_toggle.getboolean('calculate_preclinical_efficacy') # The input argument --disable_matchmaking will be removed in the next non-backwards compatible release @@ -247,7 +245,13 @@ def main(patient, inputs, output_folder, config, dbs, dbs_preclinical=None): ) if model_similarity_on: - similarity_results = matchmaker.Matchmaker.compare(dbs, dbs_preclinical, evaluated_somatic, string_id) + similarity_results = matchmaker.Matchmaker.compare( + dbs=dbs, + dbs_preclinical=dbs_preclinical, + somatic=evaluated_somatic, + case_sample_id=string_id, + config=config + ) similarity_summary = matchmaker.Report.create_report_dictionary( similarity_results, cell_lines_dictionary @@ -271,11 +275,11 @@ def main(patient, inputs, output_folder, config, dbs, dbs_preclinical=None): include_similarity = function_toggle.getboolean('include_model_similarity_in_actionability_report') reporter.Reporter.generate_actionability_report( - actionable = actionable, - report_dictionary = report_dictionary, - config = config, - similarity = similarity_summary if include_similarity else None, - output_directory = output_folder + actionable=actionable, + report_dictionary=report_dictionary, + config=config, + similarity=similarity_summary if include_similarity else None, + output_directory=output_folder ) diff --git a/moalmanac/test/annotator_tests.py b/moalmanac/test/annotator_tests.py index 7a8cb47..b512eeb 100644 --- a/moalmanac/test/annotator_tests.py +++ b/moalmanac/test/annotator_tests.py @@ -60,7 +60,7 @@ def test_annotate(self): bin_name = ACMG.bin_name df = pd.DataFrame({gene: ['TP53', 'FOO', 'PMS2', 'TSC1', 'AR']}) dbs = { - 'acmg_handle': '../datasources/acmg.secondaryfindings.v3.txt' + 'acmg_handle': 'datasources/acmg/acmg.secondaryfindings.v3.txt' } annotated = ACMG.annotate(df, dbs) @@ -164,7 +164,7 @@ def test_update_series_with_best_match(self): "doi": "10.1126/science.1062538", "pmid": "11423618", "nct": '', "last_updated": "6/13/19", "feature_display": "ABL1 p.T315I (Missense)", "predictive_implication_map": 1.0}, {}] - somatic_variant = Almanac.somatic_variant + somatic_variant = 'Somatic Variant' series = pd.Series(dtype=object) for columns in [Almanac.column_map_sensitive, Almanac.column_map_resistance, Almanac.column_map_prognostic]: @@ -197,9 +197,9 @@ def test_append_exac_af(self): alt = ExAC.alt af = ExAC.af feature_type = Features.feature_type - somatic = CONFIG['feature_types']['mut'] - germline = CONFIG['feature_types']['germline'] - cn = CONFIG['feature_types']['cna'] + somatic = 'Somatic Variant' + germline = 'Germline Variant' + cn = 'Copy Number' df = pd.DataFrame({chr: [1, 2, 3, 1], start: [100, 101, 103, 100], @@ -212,13 +212,18 @@ def test_append_exac_af(self): ref: ["C", "A", "T"], alt: ["G", "G", "G"], af: [1, 0.5, 0.001]}) - result = ExAC.append_exac_af(df, exac, [chr, start, ref, alt, af]) + biomarker_types = [somatic, germline] + result = ExAC.append_exac_af( + df=df, + ds=exac, + ds_columns=[chr, start, ref, alt, af], + variant_biomarker_types=biomarker_types) self.assertEqual([1, 0, 0, 0], result[af].tolist()) def test_annotate_common_af(self): - exac_common_threshold = ExAC.exac_common_threshold + exac_common_threshold = 0.001 series = pd.Series([float(exac_common_threshold) - 0.01, float(exac_common_threshold) + 0.01]) - result = ExAC.annotate_common_af(series) + result = ExAC.annotate_common_af(series, threshold=exac_common_threshold) self.assertEqual(0.0, result.loc[0]) self.assertEqual(1.0, result.loc[1]) @@ -248,7 +253,11 @@ class UnitTestValidation(unittest.TestCase): }) def test_append_validation(self): - result = OverlapValidation.append_validation(UnitTestValidation.dataframe1, UnitTestValidation.dataframe2) + result = OverlapValidation.append_validation( + UnitTestValidation.dataframe1, + UnitTestValidation.dataframe2, + biomarker_type='Somatic Variant' + ) result = result.fillna('') self.assertEqual(UnitTestValidation.dataframe1['feature'].tolist(), result['feature'].tolist()) self.assertEqual([0.20, '', 0.66, 0.0], result['validation_tumor_f'].tolist()) @@ -287,7 +296,7 @@ def test_get_mutation_index(self): dataframe = pd.DataFrame(['Somatic Variant', 'bar', 'foo'], columns=[OverlapValidation.feature_type]) solution = ['Somatic Variant'] solution_index = pd.Index([0]) - result = OverlapValidation.get_mutation_index(dataframe) + result = OverlapValidation.get_mutation_index(dataframe, biomarker_type='Somatic Variant') self.assertEqual(solution[0], dataframe.loc[result[0], OverlapValidation.feature_type]) self.assertEqual(solution_index[0], result[0]) @@ -331,18 +340,25 @@ class UnitTestPreclinicalEfficacy(unittest.TestCase): } df2 = pd.DataFrame(data_dictionary, index=[0, 1, 2]) dbs_dictionary = { - 'almanac_gdsc_mappings': '../datasources/preclinical/formatted/almanac-gdsc-mappings.json', - 'summary': '../datasources/preclinical/formatted/cell-lines-summary.txt', - 'variants': '../datasources/preclinical/annotated/cell-lines.somatic-variants.annotated.txt', - 'copynumbers': '../datasources/preclinical/annotated/cell-lines.copy-numbers.annotated.txt', - 'fusions': '../datasources/preclinical/annotated/cell-lines.fusions.annotated.txt', - 'fusions1': '../datasources/preclinical/annotated/cell-lines.fusions.annotated.gene1.txt', - 'fusions2': '../datasources/preclinical/annotated/cell-lines.fusions.annotated.gene2.txt', - 'gdsc': '../datasources/preclinical/formatted/sanger.gdsc.txt', - 'dictionary': '../datasources/preclinical/cell-lines.pkl' + 'almanac_gdsc_mappings': 'datasources/preclinical/formatted/almanac-gdsc-mappings.json', + 'summary': 'datasources/preclinical/formatted/cell-lines.summary.txt', + 'variants': 'datasources/preclinical/annotated/cell-lines.somatic-variants.annotated.txt', + 'copynumbers': 'datasources/preclinical/annotated/cell-lines.copy-numbers.annotated.txt', + 'fusions': 'datasources/preclinical/annotated/cell-lines.fusions.annotated.txt', + 'fusions1': 'datasources/preclinical/annotated/cell-lines.fusions.annotated.gene1.txt', + 'fusions2': 'datasources/preclinical/annotated/cell-lines.fusions.annotated.gene2.txt', + 'gdsc': 'datasources/preclinical/formatted/sanger.gdsc.txt', + 'dictionary': 'datasources/preclinical/cell-lines.pkl' + } + config = { + 'feature_types': { + 'mut': 'Somatic Variant', + 'cna': 'Copy Number', + 'fusion': 'Rearrangement' + } } dbs_preclinical = datasources_Preclinical.import_dbs(dbs_dictionary) - efficacy_dictionary = SensitivityDictionary.create(dbs_preclinical, df1) + efficacy_dictionary = SensitivityDictionary.create(dbs_preclinical, df1, config=config) def test_annotate(self): result = PreclinicalEfficacy.annotate( @@ -367,26 +383,26 @@ def test_series_for_significance(self): class UnitTestPreclinicalMatchmaking(unittest.TestCase): def test_annotate_copy_numbers(self): dbs = { - 'almanac_handle': '../datasources/moalmanac/molecular-oncology-almanac.json', - 'cancerhotspots_handle': '../datasources/cancerhotspots/hotspots_v2.txt', - '3dcancerhotspots_handle': '../datasources/cancerhotspots/hotspots3d.txt', - 'cgc_handle': '../datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', - 'cosmic_handle': '../datasources/cosmic/CosmicMutantExport_v97.lite.txt', - 'gsea_pathways_handle': '../datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', - 'gsea_modules_handle': '../datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', - 'exac_handle': '../datasources/exac/exac.expanded.r1.txt', - 'acmg_handle': '../datasources/acmg/acmg.secondaryfindings.v3.txt', - 'clinvar_handle': '../datasources/clinvar/variant_summary.lite.txt', - 'hereditary_handle': '../datasources/hereditary/hereditary.txt', - 'oncotree_handle': '../datasources/oncotree/oncotree.2023-03-09.txt', - 'lawrence_handle': '../datasources/lawrence/lawrence_mapped_ontology.txt' + 'almanac_handle': 'datasources/moalmanac/molecular-oncology-almanac.json', + 'cancerhotspots_handle': 'datasources/cancerhotspots/hotspots_v2.txt', + '3dcancerhotspots_handle': 'datasources/cancerhotspots/hotspots3d.txt', + 'cgc_handle': 'datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', + 'cosmic_handle': 'datasources/cosmic/CosmicMutantExport_v97.lite.txt', + 'gsea_pathways_handle': 'datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', + 'gsea_modules_handle': 'datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', + 'exac_handle': 'datasources/exac/exac.expanded.r1.txt', + 'acmg_handle': 'datasources/acmg/acmg.secondaryfindings.v3.txt', + 'clinvar_handle': 'datasources/clinvar/variant_summary.lite.txt', + 'hereditary_handle': 'datasources/hereditary/hereditary.txt', + 'oncotree_handle': 'datasources/oncotree/oncotree.2023-03-09.txt', + 'lawrence_handle': 'datasources/lawrence/lawrence_mapped_ontology.txt' } feature = PreclinicalMatchmaking.feature feature_type = PreclinicalMatchmaking.feature_type alteration_type = PreclinicalMatchmaking.alteration_type alteration = PreclinicalMatchmaking.alteration - copy_number = PreclinicalMatchmaking.copy_number + copy_number = 'Copy Number' df = pd.DataFrame({ feature: ['CDKN2A', 'CDKN2A', 'KRAS'], @@ -394,7 +410,7 @@ def test_annotate_copy_numbers(self): }) df[feature_type] = copy_number df[alteration] = pd.NA - result = PreclinicalMatchmaking.annotate_copy_numbers(df, dbs) + result = PreclinicalMatchmaking.annotate_copy_numbers(df, dbs, biomarker_type_string=copy_number) expected_cdkn2a_del = { 'feature_match_1': 1, @@ -448,12 +464,26 @@ def test_annotate_copy_numbers(self): self.assertEqual(result.loc[2, key], expected_kras_amp[key]) def test_annotate_fusions(self): - dbs = Datasources.generate_db_dict(CONFIG) + dbs = { + 'almanac_handle': 'datasources/moalmanac/molecular-oncology-almanac.json', + 'cancerhotspots_handle': 'datasources/cancerhotspots/hotspots_v2.txt', + '3dcancerhotspots_handle': 'datasources/cancerhotspots/hotspots3d.txt', + 'cgc_handle': 'datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', + 'cosmic_handle': 'datasources/cosmic/CosmicMutantExport_v97.lite.txt', + 'gsea_pathways_handle': 'datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', + 'gsea_modules_handle': 'datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', + 'exac_handle': 'datasources/exac/exac.expanded.r1.txt', + 'acmg_handle': 'datasources/acmg/acmg.secondaryfindings.v3.txt', + 'clinvar_handle': 'datasources/clinvar/variant_summary.lite.txt', + 'hereditary_handle': 'datasources/hereditary/hereditary.txt', + 'oncotree_handle': 'datasources/oncotree/oncotree.2023-03-09.txt', + 'lawrence_handle': 'datasources/lawrence/lawrence_mapped_ontology.txt' + } feature = PreclinicalMatchmaking.feature feature_type = PreclinicalMatchmaking.feature_type alteration_type = PreclinicalMatchmaking.alteration_type partner = PreclinicalMatchmaking.partner - fusion = PreclinicalMatchmaking.fusion + fusion = 'Rearrangement' model_id = PreclinicalMatchmaking.model_id df = pd.DataFrame({ @@ -463,7 +493,7 @@ def test_annotate_fusions(self): df[alteration_type] = 'Fusion' df[model_id] = 'case' - result, group1, group2 = PreclinicalMatchmaking.annotate_fusions(df, dbs) + result, group1, group2 = PreclinicalMatchmaking.annotate_fusions(df, dbs, biomarker_type_string=fusion) expected_index_0 = { 'feature_match_1': 1, @@ -696,12 +726,26 @@ def test_annotate_fusions(self): self.assertEqual(group2.loc[1, 'gsea_modules_bin'], expected_index_1_group2['gsea_modules_bin']) def test_annotate_fusions_matching(self): - dbs = Datasources.generate_db_dict(CONFIG) + dbs = { + 'almanac_handle': 'datasources/moalmanac/molecular-oncology-almanac.json', + 'cancerhotspots_handle': 'datasources/cancerhotspots/hotspots_v2.txt', + '3dcancerhotspots_handle': 'datasources/cancerhotspots/hotspots3d.txt', + 'cgc_handle': 'datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', + 'cosmic_handle': 'datasources/cosmic/CosmicMutantExport_v97.lite.txt', + 'gsea_pathways_handle': 'datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', + 'gsea_modules_handle': 'datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', + 'exac_handle': 'datasources/exac/exac.expanded.r1.txt', + 'acmg_handle': 'datasources/acmg/acmg.secondaryfindings.v3.txt', + 'clinvar_handle': 'datasources/clinvar/variant_summary.lite.txt', + 'hereditary_handle': 'datasources/hereditary/hereditary.txt', + 'oncotree_handle': 'datasources/oncotree/oncotree.2023-03-09.txt', + 'lawrence_handle': 'datasources/lawrence/lawrence_mapped_ontology.txt' + } feature = PreclinicalMatchmaking.feature feature_type = PreclinicalMatchmaking.feature_type alteration_type = PreclinicalMatchmaking.alteration_type partner = PreclinicalMatchmaking.partner - fusion = PreclinicalMatchmaking.fusion + fusion = 'Fusion' model_id = PreclinicalMatchmaking.model_id evidence_map_str = PreclinicalMatchmaking.evidence_map_str merged = PreclinicalMatchmaking.merged @@ -743,13 +787,27 @@ def test_annotate_fusions_matching(self): self.assertEqual(result.loc[index, key], value) def test_annotate_somatic_variants(self): - dbs = Datasources.generate_db_dict(CONFIG) + dbs = { + 'almanac_handle': 'datasources/moalmanac/molecular-oncology-almanac.json', + 'cancerhotspots_handle': 'datasources/cancerhotspots/hotspots_v2.txt', + '3dcancerhotspots_handle': 'datasources/cancerhotspots/hotspots3d.txt', + 'cgc_handle': 'datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', + 'cosmic_handle': 'datasources/cosmic/CosmicMutantExport_v97.lite.txt', + 'gsea_pathways_handle': 'datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', + 'gsea_modules_handle': 'datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', + 'exac_handle': 'datasources/exac/exac.expanded.r1.txt', + 'acmg_handle': 'datasources/acmg/acmg.secondaryfindings.v3.txt', + 'clinvar_handle': 'datasources/clinvar/variant_summary.lite.txt', + 'hereditary_handle': 'datasources/hereditary/hereditary.txt', + 'oncotree_handle': 'datasources/oncotree/oncotree.2023-03-09.txt', + 'lawrence_handle': 'datasources/lawrence/lawrence_mapped_ontology.txt' + } feature = PreclinicalMatchmaking.feature feature_type = PreclinicalMatchmaking.feature_type alteration_type = PreclinicalMatchmaking.alteration_type alteration = PreclinicalMatchmaking.alteration - somatic_variant = PreclinicalMatchmaking.somatic_variant + somatic_variant = 'Somatic Variant' df = pd.DataFrame({ feature: ['BRAF', 'BRAF', 'IDH1', 'CDKN2A'], @@ -758,7 +816,7 @@ def test_annotate_somatic_variants(self): }) df[feature_type] = somatic_variant - result = PreclinicalMatchmaking.annotate_somatic_variants(df, dbs) + result = PreclinicalMatchmaking.annotate_somatic_variants(df, dbs, biomarker_type_string=somatic_variant) expected_braf_1 = { 'feature_match_1': 1, @@ -844,11 +902,25 @@ def test_annotate_match_1(self): def test_annotate_match_2(self): match_2 = PreclinicalMatchmaking.match_2 - dbs = Datasources.generate_db_dict(CONFIG) + dbs = { + 'almanac_handle': 'datasources/moalmanac/molecular-oncology-almanac.json', + 'cancerhotspots_handle': 'datasources/cancerhotspots/hotspots_v2.txt', + '3dcancerhotspots_handle': 'datasources/cancerhotspots/hotspots3d.txt', + 'cgc_handle': 'datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', + 'cosmic_handle': 'datasources/cosmic/CosmicMutantExport_v97.lite.txt', + 'gsea_pathways_handle': 'datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', + 'gsea_modules_handle': 'datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', + 'exac_handle': 'datasources/exac/exac.expanded.r1.txt', + 'acmg_handle': 'datasources/acmg/acmg.secondaryfindings.v3.txt', + 'clinvar_handle': 'datasources/clinvar/variant_summary.lite.txt', + 'hereditary_handle': 'datasources/hereditary/hereditary.txt', + 'oncotree_handle': 'datasources/oncotree/oncotree.2023-03-09.txt', + 'lawrence_handle': 'datasources/lawrence/lawrence_mapped_ontology.txt' + } almanac = datasource_Almanac.import_ds(dbs) - copy_number = PreclinicalMatchmaking.copy_number - fusion = PreclinicalMatchmaking.fusion - somatic_variant = PreclinicalMatchmaking.somatic_variant + copy_number = 'Copy Number' + fusion = 'Rearrangement' + somatic_variant = 'Somatic Variant' feature = PreclinicalMatchmaking.feature alteration_type = PreclinicalMatchmaking.alteration_type @@ -908,11 +980,25 @@ def test_annotate_match_2(self): def test_annotate_match_3(self): match_3 = PreclinicalMatchmaking.match_3 - dbs = Datasources.generate_db_dict(CONFIG) + dbs = { + 'almanac_handle': 'datasources/moalmanac/molecular-oncology-almanac.json', + 'cancerhotspots_handle': 'datasources/cancerhotspots/hotspots_v2.txt', + '3dcancerhotspots_handle': 'datasources/cancerhotspots/hotspots3d.txt', + 'cgc_handle': 'datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', + 'cosmic_handle': 'datasources/cosmic/CosmicMutantExport_v97.lite.txt', + 'gsea_pathways_handle': 'datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', + 'gsea_modules_handle': 'datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', + 'exac_handle': 'datasources/exac/exac.expanded.r1.txt', + 'acmg_handle': 'datasources/acmg/acmg.secondaryfindings.v3.txt', + 'clinvar_handle': 'datasources/clinvar/variant_summary.lite.txt', + 'hereditary_handle': 'datasources/hereditary/hereditary.txt', + 'oncotree_handle': 'datasources/oncotree/oncotree.2023-03-09.txt', + 'lawrence_handle': 'datasources/lawrence/lawrence_mapped_ontology.txt' + } almanac = datasource_Almanac.import_ds(dbs) - copy_number = PreclinicalMatchmaking.copy_number - fusion = PreclinicalMatchmaking.fusion - somatic_variant = PreclinicalMatchmaking.somatic_variant + copy_number = 'Copy Number' + fusion = 'Rearrangement' + somatic_variant = 'Somatic Variant' feature = PreclinicalMatchmaking.feature alteration_type = PreclinicalMatchmaking.alteration_type @@ -981,10 +1067,25 @@ def test_annotate_match_3(self): def test_annotate_match_4(self): match_4 = PreclinicalMatchmaking.match_4 - dbs = Datasources.generate_db_dict(CONFIG) + dbs = { + 'almanac_handle': 'datasources/moalmanac/molecular-oncology-almanac.json', + 'cancerhotspots_handle': 'datasources/cancerhotspots/hotspots_v2.txt', + '3dcancerhotspots_handle': 'datasources/cancerhotspots/hotspots3d.txt', + 'cgc_handle': 'datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', + 'cosmic_handle': 'datasources/cosmic/CosmicMutantExport_v97.lite.txt', + 'gsea_pathways_handle': 'datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', + 'gsea_modules_handle': 'datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', + 'exac_handle': 'datasources/exac/exac.expanded.r1.txt', + 'acmg_handle': 'datasources/acmg/acmg.secondaryfindings.v3.txt', + 'clinvar_handle': 'datasources/clinvar/variant_summary.lite.txt', + 'hereditary_handle': 'datasources/hereditary/hereditary.txt', + 'oncotree_handle': 'datasources/oncotree/oncotree.2023-03-09.txt', + 'lawrence_handle': 'datasources/lawrence/lawrence_mapped_ontology.txt' + } almanac = datasource_Almanac.import_ds(dbs) - fusion = PreclinicalMatchmaking.fusion - somatic_variant = PreclinicalMatchmaking.somatic_variant + copy_number = 'Copy Number' + fusion = 'Rearrangement' + somatic_variant = 'Somatic Variant' feature = PreclinicalMatchmaking.feature alteration_type = PreclinicalMatchmaking.alteration_type @@ -1030,11 +1131,25 @@ def test_annotate_match_4(self): self.assertEqual(result.loc[3, match_4], result.loc[3, 'expectation1']) def test_format_db(self): - dbs = Datasources.generate_db_dict(CONFIG) + dbs = { + 'almanac_handle': 'datasources/moalmanac/molecular-oncology-almanac.json', + 'cancerhotspots_handle': 'datasources/cancerhotspots/hotspots_v2.txt', + '3dcancerhotspots_handle': 'datasources/cancerhotspots/hotspots3d.txt', + 'cgc_handle': 'datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', + 'cosmic_handle': 'datasources/cosmic/CosmicMutantExport_v97.lite.txt', + 'gsea_pathways_handle': 'datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', + 'gsea_modules_handle': 'datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', + 'exac_handle': 'datasources/exac/exac.expanded.r1.txt', + 'acmg_handle': 'datasources/acmg/acmg.secondaryfindings.v3.txt', + 'clinvar_handle': 'datasources/clinvar/variant_summary.lite.txt', + 'hereditary_handle': 'datasources/hereditary/hereditary.txt', + 'oncotree_handle': 'datasources/oncotree/oncotree.2023-03-09.txt', + 'lawrence_handle': 'datasources/lawrence/lawrence_mapped_ontology.txt' + } almanac = datasource_Almanac.import_ds(dbs) - copy_number = PreclinicalMatchmaking.copy_number - fusion = PreclinicalMatchmaking.fusion - somatic_variant = PreclinicalMatchmaking.somatic_variant + copy_number = 'Copy Number' + fusion = 'Rearrangement' + somatic_variant = 'Somatic Variant' feature = PreclinicalMatchmaking.feature alteration_type = PreclinicalMatchmaking.alteration_type diff --git a/moalmanac/test/investigator_tests.py b/moalmanac/test/investigator_tests.py index dc636db..f8a9a50 100644 --- a/moalmanac/test/investigator_tests.py +++ b/moalmanac/test/investigator_tests.py @@ -4,7 +4,7 @@ from datasources import Preclinical from investigator import Investigator, SensitivityDictionary - +from reader import Ini class UnitTestSensitivityDictionary(unittest.TestCase): def test_calculate_series_exp(self): @@ -54,7 +54,18 @@ def test_calculate_mann_whitney_u(self): self.assertTrue(math.isnan(statistic)) def test_create(self): - dbs = Preclinical.import_dbs() + dbs_paths = { + 'almanac_gdsc_mappings': 'datasources/preclinical/formatted/almanac-gdsc-mappings.json', + 'summary': 'datasources/preclinical/formatted/cell-lines.summary.txt', + 'variants': 'datasources/preclinical/annotated/cell-lines.somatic-variants.annotated.txt', + 'copynumbers': 'datasources/preclinical/annotated/cell-lines.copy-numbers.annotated.txt', + 'fusions': 'datasources/preclinical/annotated/cell-lines.fusions.annotated.txt', + 'fusions1': 'datasources/preclinical/annotated/cell-lines.fusions.annotated.gene1.txt', + 'fusions2': 'datasources/preclinical/annotated/cell-lines.fusions.annotated.gene2.txt', + 'gdsc': 'datasources/preclinical/formatted/sanger.gdsc.txt', + 'dictionary': 'datasources/preclinical/cell-lines.pkl' + } + dbs = Preclinical.import_dbs(dbs_paths) data_dictionary = { 'feature_type': ['Somatic Variant'], 'feature': ['BRAF'], @@ -67,7 +78,8 @@ def test_create(self): actionable = pd.DataFrame(data_dictionary, index=[0]) expected_dabrafenib = '2.322e-12' expected_trametinib = '2.344e-09' - result = SensitivityDictionary.create(dbs, actionable) + config = Ini.read('config.ini', extended_interpolation=False, convert_to_dictionary=False) + result = SensitivityDictionary.create(dbs, actionable, config) self.assertEqual(result[0]['Dabrafenib']['BRAF']['comparison']['pvalue_mww'], expected_dabrafenib) self.assertEqual(result[0]['Trametinib']['BRAF']['comparison']['pvalue_mww'], expected_trametinib) @@ -104,17 +116,27 @@ def test_generate_feature_strings(self): self.assertEqual(result, ['CDKN2A Copy Number Deletion', 'CDKN2A Copy Number', 'CDKN2A']) def test_select_split_function(self): - var = Investigator.feature_types['variant'] - cn = Investigator.feature_types['copy_number'] - fusion = Investigator.feature_types['fusion'] + config = Ini.read('config.ini', extended_interpolation=False, convert_to_dictionary=False) + var_string = config['feature_types']['mut'] + cn_string = config['feature_types']['cna'] + fusion_string = config['feature_types']['fusion'] var_function = SensitivityDictionary.split_samples_for_variants cn_function = SensitivityDictionary.split_samples_for_copy_numbers fusion_function = SensitivityDictionary.split_samples_for_fusions - self.assertEqual(SensitivityDictionary.select_split_function(var), var_function) - self.assertEqual(SensitivityDictionary.select_split_function(cn), cn_function) - self.assertEqual(SensitivityDictionary.select_split_function(fusion), fusion_function) + self.assertEqual( + SensitivityDictionary.select_split_function(var_string, var_string, cn_string, fusion_string), + var_function + ) + self.assertEqual( + SensitivityDictionary.select_split_function(cn_string, var_string, cn_string, fusion_string), + cn_function + ) + self.assertEqual( + SensitivityDictionary.select_split_function(fusion_string, var_string, cn_string, fusion_string), + fusion_function + ) def test_split_samples_by_wt_mut(self): gene = SensitivityDictionary.gene @@ -161,9 +183,11 @@ def test_split_samples_by_wt_mut(self): fusions: db_fusions } - results_variants = SensitivityDictionary.split_samples_by_wt_mut(data.loc[0, :], dbs, samples) - results_cnas = SensitivityDictionary.split_samples_by_wt_mut(data.loc[1, :], dbs, samples) - results_fusions = SensitivityDictionary.split_samples_by_wt_mut(data.loc[2, :], dbs, samples) + config = Ini.read('config.ini', extended_interpolation=False, convert_to_dictionary=False) + + results_variants = SensitivityDictionary.split_samples_by_wt_mut(data.loc[0, :], dbs, samples, config) + results_cnas = SensitivityDictionary.split_samples_by_wt_mut(data.loc[1, :], dbs, samples, config) + results_fusions = SensitivityDictionary.split_samples_by_wt_mut(data.loc[2, :], dbs, samples, config) self.assertEqual(results_variants['BRAF']['samples'][0], ['B', 'C', 'D']) self.assertEqual(results_variants['BRAF']['samples'][1], ['A', 'E']) self.assertEqual(results_variants['BRAF Somatic Variant']['samples'][0], ['E']) @@ -217,7 +241,8 @@ def test_split_samples_for_copy_numbers(self): cnas: db_cnas, } - results_cnas = SensitivityDictionary.split_samples_by_wt_mut(data.loc[1, :], dbs, samples) + config = Ini.read('config.ini', extended_interpolation=False, convert_to_dictionary=False) + results_cnas = SensitivityDictionary.split_samples_by_wt_mut(data.loc[1, :], dbs, samples, config) self.assertEqual(results_cnas['CDKN2A']['samples'][0], ['A', 'C', 'D', 'E']) self.assertEqual(results_cnas['CDKN2A']['samples'][1], ['B']) self.assertEqual(results_cnas['CDKN2A Copy Number']['samples'][0], ['A', 'D', 'E']) @@ -253,7 +278,8 @@ def test_split_samples_for_fusions(self): fusions: db_fusions } - results_fusions = SensitivityDictionary.split_samples_by_wt_mut(data.loc[2, :], dbs, samples) + config = Ini.read('config.ini', extended_interpolation=False, convert_to_dictionary=False) + results_fusions = SensitivityDictionary.split_samples_by_wt_mut(data.loc[2, :], dbs, samples, config) self.assertEqual(results_fusions['TMPRSS2']['samples'][0], ['A', 'B', 'E']) self.assertEqual(results_fusions['TMPRSS2']['samples'][1], ['C', 'D']) self.assertEqual(results_fusions['ERG']['samples'][0], ['A', 'B', 'D', 'E']) @@ -294,7 +320,8 @@ def test_split_samples_for_variants(self): variants: db_variants } - results_variants = SensitivityDictionary.split_samples_by_wt_mut(data.loc[0, :], dbs, samples) + config = Ini.read('config.ini', extended_interpolation=False, convert_to_dictionary=False) + results_variants = SensitivityDictionary.split_samples_by_wt_mut(data.loc[0, :], dbs, samples, config) self.assertEqual(results_variants['BRAF']['samples'][0], ['B', 'C', 'D']) self.assertEqual(results_variants['BRAF']['samples'][1], ['A', 'E']) self.assertEqual(results_variants['BRAF Somatic Variant']['samples'][0], ['E']) From 367374bffb81dfde1ee0660371f67dade27968c2 Mon Sep 17 00:00:00 2001 From: Brendan Reardon Date: Tue, 9 Jul 2024 22:20:50 -0400 Subject: [PATCH 05/19] Annotator unit test resolved --- moalmanac/annotator.py | 2 -- moalmanac/test/annotator_tests.py | 3 ++- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/moalmanac/annotator.py b/moalmanac/annotator.py index b7b22e4..51b181e 100644 --- a/moalmanac/annotator.py +++ b/moalmanac/annotator.py @@ -1015,8 +1015,6 @@ def annotate(cls, df, dbs, config): ds_columns=[cls.chr, cls.start, cls.ref, cls.alt, cls.af], variant_biomarker_types=[config['feature_types']['mut'], config['feature_types']['germline']] ) - print(df_annotated['exac_af'].head()) - print(df_annotated['exac_af'].isnull().value_counts()) common_allele_frequency_threshold=config['exac']['exac_common_af_threshold'] df_annotated[cls.bin_name] = cls.annotate_common_af( series_exac_af=df_annotated[cls.af], diff --git a/moalmanac/test/annotator_tests.py b/moalmanac/test/annotator_tests.py index b512eeb..fd4afce 100644 --- a/moalmanac/test/annotator_tests.py +++ b/moalmanac/test/annotator_tests.py @@ -741,11 +741,12 @@ def test_annotate_fusions_matching(self): 'oncotree_handle': 'datasources/oncotree/oncotree.2023-03-09.txt', 'lawrence_handle': 'datasources/lawrence/lawrence_mapped_ontology.txt' } + feature = PreclinicalMatchmaking.feature feature_type = PreclinicalMatchmaking.feature_type alteration_type = PreclinicalMatchmaking.alteration_type partner = PreclinicalMatchmaking.partner - fusion = 'Fusion' + fusion = 'Rearrangement' model_id = PreclinicalMatchmaking.model_id evidence_map_str = PreclinicalMatchmaking.evidence_map_str merged = PreclinicalMatchmaking.merged From b7e3ff9973b6dc05c9976927bd5a8e5f3bf125e3 Mon Sep 17 00:00:00 2001 From: Brendan Reardon Date: Tue, 9 Jul 2024 22:52:45 -0400 Subject: [PATCH 06/19] Added time to default output folder from run_example --- moalmanac/annotation-databases.ini | 27 ++-- moalmanac/preclinical-databases.ini | 19 +-- moalmanac/run_example.py | 11 +- moalmanac/test/annotator_tests.py | 228 +++++++++++++-------------- moalmanac/test/datasources_tests.py | 18 +-- moalmanac/test/investigator_tests.py | 18 +-- 6 files changed, 163 insertions(+), 158 deletions(-) diff --git a/moalmanac/annotation-databases.ini b/moalmanac/annotation-databases.ini index 65503a9..200f099 100644 --- a/moalmanac/annotation-databases.ini +++ b/moalmanac/annotation-databases.ini @@ -1,14 +1,15 @@ [databases] -almanac_handle = datasources/moalmanac/molecular-oncology-almanac.json -cancerhotspots_handle = datasources/cancerhotspots/hotspots_v2.txt -3dcancerhotspots_handle = datasources/cancerhotspots/hotspots3d.txt -cgc_handle = datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv -cosmic_handle = datasources/cosmic/CosmicMutantExport_v97.lite.txt -gsea_pathways_handle = datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt -gsea_modules_handle = datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt -exac_handle = datasources/exac/exac.expanded.r1.txt -acmg_handle = datasources/acmg/acmg.secondaryfindings.v3.txt -clinvar_handle = datasources/clinvar/variant_summary.lite.txt -hereditary_handle = datasources/hereditary/hereditary.txt -oncotree_handle = datasources/oncotree/oncotree.2023-03-09.txt -lawrence_handle = datasources/lawrence/lawrence_mapped_ontology.txt \ No newline at end of file +root = ../datasources +almanac_handle = ${root}/moalmanac/molecular-oncology-almanac.json +cancerhotspots_handle = ${root}/cancerhotspots/hotspots_v2.txt +3dcancerhotspots_handle = ${root}/cancerhotspots/hotspots3d.txt +cgc_handle = ${root}/cancergenecensus/cancer_gene_census_v97.genes.tsv +cosmic_handle = ${root}/cosmic/CosmicMutantExport_v97.lite.txt +gsea_pathways_handle = ${root}/gsea_gene_sets/GSEA_cancer_gene_sets.txt +gsea_modules_handle = ${root}/gsea_gene_sets/c4.cm.v6.0.symbols.txt +exac_handle = ${root}/exac/exac.expanded.r1.txt +acmg_handle = ${root}/acmg/acmg.secondaryfindings.v3.txt +clinvar_handle = ${root}/clinvar/variant_summary.lite.txt +hereditary_handle = ${root}/hereditary/hereditary.txt +oncotree_handle = ${root}/oncotree/oncotree.2023-03-09.txt +lawrence_handle = ${root}/lawrence/lawrence_mapped_ontology.txt \ No newline at end of file diff --git a/moalmanac/preclinical-databases.ini b/moalmanac/preclinical-databases.ini index de60614..d6e2086 100644 --- a/moalmanac/preclinical-databases.ini +++ b/moalmanac/preclinical-databases.ini @@ -1,10 +1,11 @@ [preclinical] -almanac_gdsc_mappings = datasources/preclinical/formatted/almanac-gdsc-mappings.json -summary = datasources/preclinical/formatted/cell-lines.summary.txt -variants = datasources/preclinical/annotated/cell-lines.somatic-variants.annotated.txt -copynumbers = datasources/preclinical/annotated/cell-lines.copy-numbers.annotated.txt -fusions = datasources/preclinical/annotated/cell-lines.fusions.annotated.txt -fusions1 = datasources/preclinical/annotated/cell-lines.fusions.annotated.gene1.txt -fusions2 = datasources/preclinical/annotated/cell-lines.fusions.annotated.gene2.txt -gdsc = datasources/preclinical/formatted/sanger.gdsc.txt -dictionary = datasources/preclinical/cell-lines.pkl \ No newline at end of file +root = ../datasources +almanac_gdsc_mappings = ${root}/preclinical/formatted/almanac-gdsc-mappings.json +summary = ${root}/preclinical/formatted/cell-lines.summary.txt +variants = ${root}/preclinical/annotated/cell-lines.somatic-variants.annotated.txt +copynumbers = ${root}/preclinical/annotated/cell-lines.copy-numbers.annotated.txt +fusions = ${root}/preclinical/annotated/cell-lines.fusions.annotated.txt +fusions1 = ${root}/preclinical/annotated/cell-lines.fusions.annotated.gene1.txt +fusions2 = ${root}/preclinical/annotated/cell-lines.fusions.annotated.gene2.txt +gdsc = ${root}/preclinical/formatted/sanger.gdsc.txt +dictionary = ${root}/preclinical/cell-lines.pkl \ No newline at end of file diff --git a/moalmanac/run_example.py b/moalmanac/run_example.py index 87c12c4..75718b4 100644 --- a/moalmanac/run_example.py +++ b/moalmanac/run_example.py @@ -1,7 +1,9 @@ import moalmanac import os -import time import subprocess +import time + +from datetime import date from reader import Ini @@ -47,15 +49,16 @@ dbs_preclinical_ini_path = "preclinical-databases.ini" config_ini = Ini.read(config_ini_path, extended_interpolation=False, convert_to_dictionary=False) -db_paths = Ini.read(dbs_ini_path, extended_interpolation=False, convert_to_dictionary=True) -preclinical_db_paths = Ini.read(dbs_preclinical_ini_path, extended_interpolation=False, convert_to_dictionary=True) +db_paths = Ini.read(dbs_ini_path, extended_interpolation=True, convert_to_dictionary=True) +preclinical_db_paths = Ini.read(dbs_preclinical_ini_path, extended_interpolation=True, convert_to_dictionary=True) def execute_cmd(command): subprocess.call(command, shell=True) -output_directory = "2024-07-03-config-edits" +today = date.today().isoformat() +output_directory = f"{today}-example-outputs" if output_directory != "": cmd = f"mkdir -p {output_directory}" execute_cmd(cmd) diff --git a/moalmanac/test/annotator_tests.py b/moalmanac/test/annotator_tests.py index fd4afce..399a806 100644 --- a/moalmanac/test/annotator_tests.py +++ b/moalmanac/test/annotator_tests.py @@ -60,7 +60,7 @@ def test_annotate(self): bin_name = ACMG.bin_name df = pd.DataFrame({gene: ['TP53', 'FOO', 'PMS2', 'TSC1', 'AR']}) dbs = { - 'acmg_handle': 'datasources/acmg/acmg.secondaryfindings.v3.txt' + 'acmg_handle': '../datasources/acmg/acmg.secondaryfindings.v3.txt' } annotated = ACMG.annotate(df, dbs) @@ -340,15 +340,15 @@ class UnitTestPreclinicalEfficacy(unittest.TestCase): } df2 = pd.DataFrame(data_dictionary, index=[0, 1, 2]) dbs_dictionary = { - 'almanac_gdsc_mappings': 'datasources/preclinical/formatted/almanac-gdsc-mappings.json', - 'summary': 'datasources/preclinical/formatted/cell-lines.summary.txt', - 'variants': 'datasources/preclinical/annotated/cell-lines.somatic-variants.annotated.txt', - 'copynumbers': 'datasources/preclinical/annotated/cell-lines.copy-numbers.annotated.txt', - 'fusions': 'datasources/preclinical/annotated/cell-lines.fusions.annotated.txt', - 'fusions1': 'datasources/preclinical/annotated/cell-lines.fusions.annotated.gene1.txt', - 'fusions2': 'datasources/preclinical/annotated/cell-lines.fusions.annotated.gene2.txt', - 'gdsc': 'datasources/preclinical/formatted/sanger.gdsc.txt', - 'dictionary': 'datasources/preclinical/cell-lines.pkl' + 'almanac_gdsc_mappings': '../datasources/preclinical/formatted/almanac-gdsc-mappings.json', + 'summary': '../datasources/preclinical/formatted/cell-lines.summary.txt', + 'variants': '../datasources/preclinical/annotated/cell-lines.somatic-variants.annotated.txt', + 'copynumbers': '../datasources/preclinical/annotated/cell-lines.copy-numbers.annotated.txt', + 'fusions': '../datasources/preclinical/annotated/cell-lines.fusions.annotated.txt', + 'fusions1': '../datasources/preclinical/annotated/cell-lines.fusions.annotated.gene1.txt', + 'fusions2': '../datasources/preclinical/annotated/cell-lines.fusions.annotated.gene2.txt', + 'gdsc': '../datasources/preclinical/formatted/sanger.gdsc.txt', + 'dictionary': '../datasources/preclinical/cell-lines.pkl' } config = { 'feature_types': { @@ -383,19 +383,19 @@ def test_series_for_significance(self): class UnitTestPreclinicalMatchmaking(unittest.TestCase): def test_annotate_copy_numbers(self): dbs = { - 'almanac_handle': 'datasources/moalmanac/molecular-oncology-almanac.json', - 'cancerhotspots_handle': 'datasources/cancerhotspots/hotspots_v2.txt', - '3dcancerhotspots_handle': 'datasources/cancerhotspots/hotspots3d.txt', - 'cgc_handle': 'datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', - 'cosmic_handle': 'datasources/cosmic/CosmicMutantExport_v97.lite.txt', - 'gsea_pathways_handle': 'datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', - 'gsea_modules_handle': 'datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', - 'exac_handle': 'datasources/exac/exac.expanded.r1.txt', - 'acmg_handle': 'datasources/acmg/acmg.secondaryfindings.v3.txt', - 'clinvar_handle': 'datasources/clinvar/variant_summary.lite.txt', - 'hereditary_handle': 'datasources/hereditary/hereditary.txt', - 'oncotree_handle': 'datasources/oncotree/oncotree.2023-03-09.txt', - 'lawrence_handle': 'datasources/lawrence/lawrence_mapped_ontology.txt' + 'almanac_handle': '../datasources/moalmanac/molecular-oncology-almanac.json', + 'cancerhotspots_handle': '../datasources/cancerhotspots/hotspots_v2.txt', + '3dcancerhotspots_handle': '../datasources/cancerhotspots/hotspots3d.txt', + 'cgc_handle': '../datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', + 'cosmic_handle': '../datasources/cosmic/CosmicMutantExport_v97.lite.txt', + 'gsea_pathways_handle': '../datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', + 'gsea_modules_handle': '../datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', + 'exac_handle': '../datasources/exac/exac.expanded.r1.txt', + 'acmg_handle': '../datasources/acmg/acmg.secondaryfindings.v3.txt', + 'clinvar_handle': '../datasources/clinvar/variant_summary.lite.txt', + 'hereditary_handle': '../datasources/hereditary/hereditary.txt', + 'oncotree_handle': '../datasources/oncotree/oncotree.2023-03-09.txt', + 'lawrence_handle': '../datasources/lawrence/lawrence_mapped_ontology.txt' } feature = PreclinicalMatchmaking.feature feature_type = PreclinicalMatchmaking.feature_type @@ -465,19 +465,19 @@ def test_annotate_copy_numbers(self): def test_annotate_fusions(self): dbs = { - 'almanac_handle': 'datasources/moalmanac/molecular-oncology-almanac.json', - 'cancerhotspots_handle': 'datasources/cancerhotspots/hotspots_v2.txt', - '3dcancerhotspots_handle': 'datasources/cancerhotspots/hotspots3d.txt', - 'cgc_handle': 'datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', - 'cosmic_handle': 'datasources/cosmic/CosmicMutantExport_v97.lite.txt', - 'gsea_pathways_handle': 'datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', - 'gsea_modules_handle': 'datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', - 'exac_handle': 'datasources/exac/exac.expanded.r1.txt', - 'acmg_handle': 'datasources/acmg/acmg.secondaryfindings.v3.txt', - 'clinvar_handle': 'datasources/clinvar/variant_summary.lite.txt', - 'hereditary_handle': 'datasources/hereditary/hereditary.txt', - 'oncotree_handle': 'datasources/oncotree/oncotree.2023-03-09.txt', - 'lawrence_handle': 'datasources/lawrence/lawrence_mapped_ontology.txt' + 'almanac_handle': '../datasources/moalmanac/molecular-oncology-almanac.json', + 'cancerhotspots_handle': '../datasources/cancerhotspots/hotspots_v2.txt', + '3dcancerhotspots_handle': '../datasources/cancerhotspots/hotspots3d.txt', + 'cgc_handle': '../datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', + 'cosmic_handle': '../datasources/cosmic/CosmicMutantExport_v97.lite.txt', + 'gsea_pathways_handle': '../datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', + 'gsea_modules_handle': '../datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', + 'exac_handle': '../datasources/exac/exac.expanded.r1.txt', + 'acmg_handle': '../datasources/acmg/acmg.secondaryfindings.v3.txt', + 'clinvar_handle': '../datasources/clinvar/variant_summary.lite.txt', + 'hereditary_handle': '../datasources/hereditary/hereditary.txt', + 'oncotree_handle': '../datasources/oncotree/oncotree.2023-03-09.txt', + 'lawrence_handle': '../datasources/lawrence/lawrence_mapped_ontology.txt' } feature = PreclinicalMatchmaking.feature feature_type = PreclinicalMatchmaking.feature_type @@ -727,19 +727,19 @@ def test_annotate_fusions(self): def test_annotate_fusions_matching(self): dbs = { - 'almanac_handle': 'datasources/moalmanac/molecular-oncology-almanac.json', - 'cancerhotspots_handle': 'datasources/cancerhotspots/hotspots_v2.txt', - '3dcancerhotspots_handle': 'datasources/cancerhotspots/hotspots3d.txt', - 'cgc_handle': 'datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', - 'cosmic_handle': 'datasources/cosmic/CosmicMutantExport_v97.lite.txt', - 'gsea_pathways_handle': 'datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', - 'gsea_modules_handle': 'datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', - 'exac_handle': 'datasources/exac/exac.expanded.r1.txt', - 'acmg_handle': 'datasources/acmg/acmg.secondaryfindings.v3.txt', - 'clinvar_handle': 'datasources/clinvar/variant_summary.lite.txt', - 'hereditary_handle': 'datasources/hereditary/hereditary.txt', - 'oncotree_handle': 'datasources/oncotree/oncotree.2023-03-09.txt', - 'lawrence_handle': 'datasources/lawrence/lawrence_mapped_ontology.txt' + 'almanac_handle': '../datasources/moalmanac/molecular-oncology-almanac.json', + 'cancerhotspots_handle': '../datasources/cancerhotspots/hotspots_v2.txt', + '3dcancerhotspots_handle': '../datasources/cancerhotspots/hotspots3d.txt', + 'cgc_handle': '../datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', + 'cosmic_handle': '../datasources/cosmic/CosmicMutantExport_v97.lite.txt', + 'gsea_pathways_handle': '../datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', + 'gsea_modules_handle': '../datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', + 'exac_handle': '../datasources/exac/exac.expanded.r1.txt', + 'acmg_handle': '../datasources/acmg/acmg.secondaryfindings.v3.txt', + 'clinvar_handle': '../datasources/clinvar/variant_summary.lite.txt', + 'hereditary_handle': '../datasources/hereditary/hereditary.txt', + 'oncotree_handle': '../datasources/oncotree/oncotree.2023-03-09.txt', + 'lawrence_handle': '../datasources/lawrence/lawrence_mapped_ontology.txt' } feature = PreclinicalMatchmaking.feature @@ -789,19 +789,19 @@ def test_annotate_fusions_matching(self): def test_annotate_somatic_variants(self): dbs = { - 'almanac_handle': 'datasources/moalmanac/molecular-oncology-almanac.json', - 'cancerhotspots_handle': 'datasources/cancerhotspots/hotspots_v2.txt', - '3dcancerhotspots_handle': 'datasources/cancerhotspots/hotspots3d.txt', - 'cgc_handle': 'datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', - 'cosmic_handle': 'datasources/cosmic/CosmicMutantExport_v97.lite.txt', - 'gsea_pathways_handle': 'datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', - 'gsea_modules_handle': 'datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', - 'exac_handle': 'datasources/exac/exac.expanded.r1.txt', - 'acmg_handle': 'datasources/acmg/acmg.secondaryfindings.v3.txt', - 'clinvar_handle': 'datasources/clinvar/variant_summary.lite.txt', - 'hereditary_handle': 'datasources/hereditary/hereditary.txt', - 'oncotree_handle': 'datasources/oncotree/oncotree.2023-03-09.txt', - 'lawrence_handle': 'datasources/lawrence/lawrence_mapped_ontology.txt' + 'almanac_handle': '../datasources/moalmanac/molecular-oncology-almanac.json', + 'cancerhotspots_handle': '../datasources/cancerhotspots/hotspots_v2.txt', + '3dcancerhotspots_handle': '../datasources/cancerhotspots/hotspots3d.txt', + 'cgc_handle': '../datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', + 'cosmic_handle': '../datasources/cosmic/CosmicMutantExport_v97.lite.txt', + 'gsea_pathways_handle': '../datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', + 'gsea_modules_handle': '../datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', + 'exac_handle': '../datasources/exac/exac.expanded.r1.txt', + 'acmg_handle': '../datasources/acmg/acmg.secondaryfindings.v3.txt', + 'clinvar_handle': '../datasources/clinvar/variant_summary.lite.txt', + 'hereditary_handle': '../datasources/hereditary/hereditary.txt', + 'oncotree_handle': '../datasources/oncotree/oncotree.2023-03-09.txt', + 'lawrence_handle': '../datasources/lawrence/lawrence_mapped_ontology.txt' } feature = PreclinicalMatchmaking.feature feature_type = PreclinicalMatchmaking.feature_type @@ -904,19 +904,19 @@ def test_annotate_match_2(self): match_2 = PreclinicalMatchmaking.match_2 dbs = { - 'almanac_handle': 'datasources/moalmanac/molecular-oncology-almanac.json', - 'cancerhotspots_handle': 'datasources/cancerhotspots/hotspots_v2.txt', - '3dcancerhotspots_handle': 'datasources/cancerhotspots/hotspots3d.txt', - 'cgc_handle': 'datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', - 'cosmic_handle': 'datasources/cosmic/CosmicMutantExport_v97.lite.txt', - 'gsea_pathways_handle': 'datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', - 'gsea_modules_handle': 'datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', - 'exac_handle': 'datasources/exac/exac.expanded.r1.txt', - 'acmg_handle': 'datasources/acmg/acmg.secondaryfindings.v3.txt', - 'clinvar_handle': 'datasources/clinvar/variant_summary.lite.txt', - 'hereditary_handle': 'datasources/hereditary/hereditary.txt', - 'oncotree_handle': 'datasources/oncotree/oncotree.2023-03-09.txt', - 'lawrence_handle': 'datasources/lawrence/lawrence_mapped_ontology.txt' + 'almanac_handle': '../datasources/moalmanac/molecular-oncology-almanac.json', + 'cancerhotspots_handle': '../datasources/cancerhotspots/hotspots_v2.txt', + '3dcancerhotspots_handle': '../datasources/cancerhotspots/hotspots3d.txt', + 'cgc_handle': '../datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', + 'cosmic_handle': '../datasources/cosmic/CosmicMutantExport_v97.lite.txt', + 'gsea_pathways_handle': '../datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', + 'gsea_modules_handle': '../datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', + 'exac_handle': '../datasources/exac/exac.expanded.r1.txt', + 'acmg_handle': '../datasources/acmg/acmg.secondaryfindings.v3.txt', + 'clinvar_handle': '../datasources/clinvar/variant_summary.lite.txt', + 'hereditary_handle': '../datasources/hereditary/hereditary.txt', + 'oncotree_handle': '../datasources/oncotree/oncotree.2023-03-09.txt', + 'lawrence_handle': '../datasources/lawrence/lawrence_mapped_ontology.txt' } almanac = datasource_Almanac.import_ds(dbs) copy_number = 'Copy Number' @@ -982,19 +982,19 @@ def test_annotate_match_3(self): match_3 = PreclinicalMatchmaking.match_3 dbs = { - 'almanac_handle': 'datasources/moalmanac/molecular-oncology-almanac.json', - 'cancerhotspots_handle': 'datasources/cancerhotspots/hotspots_v2.txt', - '3dcancerhotspots_handle': 'datasources/cancerhotspots/hotspots3d.txt', - 'cgc_handle': 'datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', - 'cosmic_handle': 'datasources/cosmic/CosmicMutantExport_v97.lite.txt', - 'gsea_pathways_handle': 'datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', - 'gsea_modules_handle': 'datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', - 'exac_handle': 'datasources/exac/exac.expanded.r1.txt', - 'acmg_handle': 'datasources/acmg/acmg.secondaryfindings.v3.txt', - 'clinvar_handle': 'datasources/clinvar/variant_summary.lite.txt', - 'hereditary_handle': 'datasources/hereditary/hereditary.txt', - 'oncotree_handle': 'datasources/oncotree/oncotree.2023-03-09.txt', - 'lawrence_handle': 'datasources/lawrence/lawrence_mapped_ontology.txt' + 'almanac_handle': '../datasources/moalmanac/molecular-oncology-almanac.json', + 'cancerhotspots_handle': '../datasources/cancerhotspots/hotspots_v2.txt', + '3dcancerhotspots_handle': '../datasources/cancerhotspots/hotspots3d.txt', + 'cgc_handle': '../datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', + 'cosmic_handle': '../datasources/cosmic/CosmicMutantExport_v97.lite.txt', + 'gsea_pathways_handle': '../datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', + 'gsea_modules_handle': '../datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', + 'exac_handle': '../datasources/exac/exac.expanded.r1.txt', + 'acmg_handle': '../datasources/acmg/acmg.secondaryfindings.v3.txt', + 'clinvar_handle': '../datasources/clinvar/variant_summary.lite.txt', + 'hereditary_handle': '../datasources/hereditary/hereditary.txt', + 'oncotree_handle': '../datasources/oncotree/oncotree.2023-03-09.txt', + 'lawrence_handle': '../datasources/lawrence/lawrence_mapped_ontology.txt' } almanac = datasource_Almanac.import_ds(dbs) copy_number = 'Copy Number' @@ -1069,19 +1069,19 @@ def test_annotate_match_4(self): match_4 = PreclinicalMatchmaking.match_4 dbs = { - 'almanac_handle': 'datasources/moalmanac/molecular-oncology-almanac.json', - 'cancerhotspots_handle': 'datasources/cancerhotspots/hotspots_v2.txt', - '3dcancerhotspots_handle': 'datasources/cancerhotspots/hotspots3d.txt', - 'cgc_handle': 'datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', - 'cosmic_handle': 'datasources/cosmic/CosmicMutantExport_v97.lite.txt', - 'gsea_pathways_handle': 'datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', - 'gsea_modules_handle': 'datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', - 'exac_handle': 'datasources/exac/exac.expanded.r1.txt', - 'acmg_handle': 'datasources/acmg/acmg.secondaryfindings.v3.txt', - 'clinvar_handle': 'datasources/clinvar/variant_summary.lite.txt', - 'hereditary_handle': 'datasources/hereditary/hereditary.txt', - 'oncotree_handle': 'datasources/oncotree/oncotree.2023-03-09.txt', - 'lawrence_handle': 'datasources/lawrence/lawrence_mapped_ontology.txt' + 'almanac_handle': '../datasources/moalmanac/molecular-oncology-almanac.json', + 'cancerhotspots_handle': '../datasources/cancerhotspots/hotspots_v2.txt', + '3dcancerhotspots_handle': '../datasources/cancerhotspots/hotspots3d.txt', + 'cgc_handle': '../datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', + 'cosmic_handle': '../datasources/cosmic/CosmicMutantExport_v97.lite.txt', + 'gsea_pathways_handle': '../datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', + 'gsea_modules_handle': '../datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', + 'exac_handle': '../datasources/exac/exac.expanded.r1.txt', + 'acmg_handle': '../datasources/acmg/acmg.secondaryfindings.v3.txt', + 'clinvar_handle': '../datasources/clinvar/variant_summary.lite.txt', + 'hereditary_handle': '../datasources/hereditary/hereditary.txt', + 'oncotree_handle': '../datasources/oncotree/oncotree.2023-03-09.txt', + 'lawrence_handle': '../datasources/lawrence/lawrence_mapped_ontology.txt' } almanac = datasource_Almanac.import_ds(dbs) copy_number = 'Copy Number' @@ -1133,19 +1133,19 @@ def test_annotate_match_4(self): def test_format_db(self): dbs = { - 'almanac_handle': 'datasources/moalmanac/molecular-oncology-almanac.json', - 'cancerhotspots_handle': 'datasources/cancerhotspots/hotspots_v2.txt', - '3dcancerhotspots_handle': 'datasources/cancerhotspots/hotspots3d.txt', - 'cgc_handle': 'datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', - 'cosmic_handle': 'datasources/cosmic/CosmicMutantExport_v97.lite.txt', - 'gsea_pathways_handle': 'datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', - 'gsea_modules_handle': 'datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', - 'exac_handle': 'datasources/exac/exac.expanded.r1.txt', - 'acmg_handle': 'datasources/acmg/acmg.secondaryfindings.v3.txt', - 'clinvar_handle': 'datasources/clinvar/variant_summary.lite.txt', - 'hereditary_handle': 'datasources/hereditary/hereditary.txt', - 'oncotree_handle': 'datasources/oncotree/oncotree.2023-03-09.txt', - 'lawrence_handle': 'datasources/lawrence/lawrence_mapped_ontology.txt' + 'almanac_handle': '../datasources/moalmanac/molecular-oncology-almanac.json', + 'cancerhotspots_handle': '../datasources/cancerhotspots/hotspots_v2.txt', + '3dcancerhotspots_handle': '../datasources/cancerhotspots/hotspots3d.txt', + 'cgc_handle': '../datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv', + 'cosmic_handle': '../datasources/cosmic/CosmicMutantExport_v97.lite.txt', + 'gsea_pathways_handle': '../datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt', + 'gsea_modules_handle': '../datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt', + 'exac_handle': '../datasources/exac/exac.expanded.r1.txt', + 'acmg_handle': '../datasources/acmg/acmg.secondaryfindings.v3.txt', + 'clinvar_handle': '../datasources/clinvar/variant_summary.lite.txt', + 'hereditary_handle': '../datasources/hereditary/hereditary.txt', + 'oncotree_handle': '../datasources/oncotree/oncotree.2023-03-09.txt', + 'lawrence_handle': '../datasources/lawrence/lawrence_mapped_ontology.txt' } almanac = datasource_Almanac.import_ds(dbs) copy_number = 'Copy Number' diff --git a/moalmanac/test/datasources_tests.py b/moalmanac/test/datasources_tests.py index 82f0cd4..7d95676 100644 --- a/moalmanac/test/datasources_tests.py +++ b/moalmanac/test/datasources_tests.py @@ -29,15 +29,15 @@ def test_import_dbs(self): gdsc = Preclinical.gdsc mappings = Preclinical.mappings dbs_dictionary = { - 'almanac_gdsc_mappings': 'datasources/preclinical/formatted/almanac-gdsc-mappings.json', - 'summary': 'datasources/preclinical/formatted/cell-lines.summary.txt', - 'variants': 'datasources/preclinical/annotated/cell-lines.somatic-variants.annotated.txt', - 'copynumbers': 'datasources/preclinical/annotated/cell-lines.copy-numbers.annotated.txt', - 'fusions': 'datasources/preclinical/annotated/cell-lines.fusions.annotated.txt', - 'fusions1': 'datasources/preclinical/annotated/cell-lines.fusions.annotated.gene1.txt', - 'fusions2': 'datasources/preclinical/annotated/cell-lines.fusions.annotated.gene2.txt', - 'gdsc': 'datasources/preclinical/formatted/sanger.gdsc.txt', - 'dictionary': 'datasources/preclinical/cell-lines.pkl' + 'almanac_gdsc_mappings': '../datasources/preclinical/formatted/almanac-gdsc-mappings.json', + 'summary': '../datasources/preclinical/formatted/cell-lines.summary.txt', + 'variants': '../datasources/preclinical/annotated/cell-lines.somatic-variants.annotated.txt', + 'copynumbers': '../datasources/preclinical/annotated/cell-lines.copy-numbers.annotated.txt', + 'fusions': '../datasources/preclinical/annotated/cell-lines.fusions.annotated.txt', + 'fusions1': '../datasources/preclinical/annotated/cell-lines.fusions.annotated.gene1.txt', + 'fusions2': '../datasources/preclinical/annotated/cell-lines.fusions.annotated.gene2.txt', + 'gdsc': '../datasources/preclinical/formatted/sanger.gdsc.txt', + 'dictionary': '../datasources/preclinical/cell-lines.pkl' } dbs = Preclinical.import_dbs(dbs_dictionary) diff --git a/moalmanac/test/investigator_tests.py b/moalmanac/test/investigator_tests.py index f8a9a50..30dc662 100644 --- a/moalmanac/test/investigator_tests.py +++ b/moalmanac/test/investigator_tests.py @@ -55,15 +55,15 @@ def test_calculate_mann_whitney_u(self): def test_create(self): dbs_paths = { - 'almanac_gdsc_mappings': 'datasources/preclinical/formatted/almanac-gdsc-mappings.json', - 'summary': 'datasources/preclinical/formatted/cell-lines.summary.txt', - 'variants': 'datasources/preclinical/annotated/cell-lines.somatic-variants.annotated.txt', - 'copynumbers': 'datasources/preclinical/annotated/cell-lines.copy-numbers.annotated.txt', - 'fusions': 'datasources/preclinical/annotated/cell-lines.fusions.annotated.txt', - 'fusions1': 'datasources/preclinical/annotated/cell-lines.fusions.annotated.gene1.txt', - 'fusions2': 'datasources/preclinical/annotated/cell-lines.fusions.annotated.gene2.txt', - 'gdsc': 'datasources/preclinical/formatted/sanger.gdsc.txt', - 'dictionary': 'datasources/preclinical/cell-lines.pkl' + 'almanac_gdsc_mappings': '../datasources/preclinical/formatted/almanac-gdsc-mappings.json', + 'summary': '../datasources/preclinical/formatted/cell-lines.summary.txt', + 'variants': '../datasources/preclinical/annotated/cell-lines.somatic-variants.annotated.txt', + 'copynumbers': '../datasources/preclinical/annotated/cell-lines.copy-numbers.annotated.txt', + 'fusions': '../datasources/preclinical/annotated/cell-lines.fusions.annotated.txt', + 'fusions1': '../datasources/preclinical/annotated/cell-lines.fusions.annotated.gene1.txt', + 'fusions2': '../datasources/preclinical/annotated/cell-lines.fusions.annotated.gene2.txt', + 'gdsc': '../datasources/preclinical/formatted/sanger.gdsc.txt', + 'dictionary': '../datasources/preclinical/cell-lines.pkl' } dbs = Preclinical.import_dbs(dbs_paths) data_dictionary = { From 92e9759f4eb8eac65b84dad4ed912c87ee81b1d4 Mon Sep 17 00:00:00 2001 From: Brendan Reardon Date: Tue, 9 Jul 2024 22:54:51 -0400 Subject: [PATCH 07/19] Update to example actionable output --- example_output/example_output.actionable.txt | 44 ++++++++++---------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/example_output/example_output.actionable.txt b/example_output/example_output.actionable.txt index 863f2f4..909dca8 100644 --- a/example_output/example_output.actionable.txt +++ b/example_output/example_output.actionable.txt @@ -1,22 +1,22 @@ -score_bin sensitive_predictive_implication resistance_predictive_implication prognostic_predictive_implication feature_type feature alteration_type alteration tumor_f total_coverage exac_af exac_common clinvar sensitive_score_bin sensitive_therapy_name sensitive_therapy_strategy sensitive_therapy_type sensitive_oncotree_code sensitive_description sensitive_citation sensitive_url resistance_score_bin resistance_therapy_name resistance_therapy_strategy resistance_therapy_type resistance_oncotree_code resistance_description resistance_citation resistance_url prognostic_score_bin favorable_prognosis prognostic_oncotree_code prognostic_description prognostic_citation prognostic_url number_germline_mutations_in_gene validation_total_coverage validation_tumor_f validation_detection_power feature_display preclinical_efficacy_observed patient_id tumor_sample_barcode normal_sample_barcode -Putatively Actionable FDA-Approved Guideline Guideline Somatic Variant BRAF Missense p.V600E 0.6316 152 1.6e-05 0.0 Putatively Actionable Binimetinib + Encorafenib MEK inhibition + B-RAF inhibition Targeted therapy NSCLC The U.S. Food and Drug Administration (FDA) approved encorafenib in combination with binimetinib for the treatment of adult patients with metastatic non-small cell lung cancer (NSCLC) with BRAF V600E mutation, as detected by an FDA-approved test. Array BioPharma Inc. Braftovi (encorafenib) [package insert]. U.S. Food and Drug Administration website. https://www.accessdata.fda.gov/drugsatfda_docs/label/2023/210496s014lbl.pdf. Revised October 2023. Accessed November 1, 2023. https://www.accessdata.fda.gov/drugsatfda_docs/label/2023/210496s014lbl.pdf Putatively Actionable Panitumumab EGFR inhibition Targeted therapy COADREAD Panitumumab (Vectibix) is not recommended by the National Comprehensive Cancer Network® (NCCN®) as a treatment option for patients with metastatic colorectal cancer, BRAF V600E makes response to panitumumab or cetuximab highly unlikely unless given with a BRAF inhibitor. Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Colon Cancer V.4.2018. © National Comprehensive Cancer Network, Inc. 2018. All rights reserved. Accessed March 20 2019. To view the most recent and complete version of the guideline, go online to NCCN.org. https://www.nccn.org/professionals/physician_gls/pdf/colon_blocks.pdf Putatively Actionable 0 COADREAD BRAF V600E alterations are associated with an unfavorable prognosis in MSI-low and microsatellite-stable patients. Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Colon Cancer V.2.2016. © National Comprehensive Cancer Network, Inc. 2016. All rights reserved. Accessed March 20 2019. To view the most recent and complete version of the guideline, go online to NCCN.org. https://www.nccn.org/professionals/physician_gls/pdf/colon_blocks.pdf 1.0 4.0 0.0 0.161 BRAF p.V600E (Missense) example example_tumor_profile example_normal_profile -Putatively Actionable Guideline Copy Number CDK4 Amplification 0.0 0.0 Putatively Actionable Palbociclib CDK4/6 inhibition Targeted therapy WDLS Palbociclib (Ibrance) is recommended by the National Comprehensive Cancer Network® (NCCN®) as a treatment option for patients with well-differentiated liposarcoma and CDK4 amplification. CDK4 amplification is characteristic of well-differentiated and dedifferentiated liposarcomas, and palbociclib shows activity in this context. Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Soft Tissue Sarcoma V.1.2021. © National Comprehensive Cancer Network, Inc. 2019. All rights reserved. Accessed November 19th, 2019. To view the most recent and complete version of the guideline, go online to NCCN.org. https://www.nccn.org/professionals/physician_gls/pdf/sarcoma_blocks.pdf CDK4 Amplification 1 example -Putatively Actionable Clinical evidence Copy Number CDKN2A Deletion 0.0 0.0 Putatively Actionable Palbociclib CDK4/6 inhibition Targeted therapy ULMS A patient with uterine leiomyosarcoma whose tumor harbored a CDKN2A mutant which inactivated p16INK4a experienced clinical benefit from treatment with palbociclib. Elvin JA, Gay LM, Ort R, et al. Clinical Benefit in Response to Palbociclib Treatment in Refractory Uterine Leiomyosarcomas with a Common CDKN2A Alteration. Oncologist. 2017;22(4):416-421. https://doi.org/10.1634/theoncologist.2016-0310 CDKN2A Deletion 1 example -Putatively Actionable Preclinical Guideline Copy Number TP53 Deletion 0.0 0.0 Putatively Actionable Talazoparib PARP inhibition Targeted therapy OS Response to talazoparib in osteosarcoma cell lines was associated with homologous recombination deficiency in a study of 5 cancer cell lines. Osteosarcoma cell lines MG63 and ZK-58 displayed the highest sensitivity to talazoparib, SaOS-2 and MNNG-HOS displayed intermediate sensitivity, and U2OS cells remained resistant. Cell lines MG63, ZK-58, and MNNG-HOS scored HRD-LOH positive according to a score (Abkevich et al. 2012). MG63 cells harbored copy losses in BAP1, FANCA, and FANCD2 while ZK-58 carried disruptive copy loss in BARD1 and copy gain in FANCD2. SaOS-2 cells harbored copy losses in CHEK2 and TP53 and MNNG-HOS cells have copy loss of ATM and disruptive copy gains in PTEN and FANCD2. The talazoparib-resistant cell line, U2OS, carried a heterozygous BRCA2 copy loss and one intact BRCA2 alelle. Engert F, Kovac M, Baumhoer D, Nathrath M, Fulda S. Osteosarcoma cells with genetic signatures of BRCAness are susceptible to the PARP inhibitor talazoparib alone or in combination with chemotherapeutics. Oncotarget. 2016;8(30):48794-48806. https://www.clinicalkey.com/#!/content/playContent/1-s2.0-S1470204513700494 Putatively Actionable 0 MM Deletion of 17p13 leads to LoH of TP53 and is considered a high-risk feature of multiple myeloma. Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Multiple Myeloma V.2.2016. © National Comprehensive Cancer Network, Inc. 2016. All rights reserved. Accessed November 5 2016. To view the most recent and complete version of the guideline, go online to NCCN.org. https://www.nccn.org/professionals/physician_gls/pdf/myeloma_blocks.pdf 1.0 TP53 Deletion 1 example -Putatively Actionable Clinical evidence Copy Number BRAF Amplification 0.0 0.0 Putatively Actionable Vemurafenib B-RAF inhibition Targeted therapy MEL Amplification of BRAF may predict resistance to RAF inhibition. Wagle, Nikhil, et al. MAP kinase pathway alterations in BRAF-mutant melanoma patients with acquired resistance to combined RAF/MEK inhibition. Cancer discovery 4.1 (2014): 61-68. https://doi.org/10.1158/2159-8290.CD-13-0631 1.0 BRAF Amplification example -Investigate Actionability FDA-Approved Guideline Guideline Microsatellite Stability MSI-High Investigate Actionability Dostarlimab-gxly PD-1/PD-L1 inhibition Immunotherapy UCEC The U.S. Food and Drug Administration (FDA) granted approval to dostarlimab-gxly in combination with carboplatin and paclitaxel, followed by single agent dostarlimab-gxly, for the treatment of adult patients primary advanced or recurrent endometrial cancer that is mismatch repair deficient (dMMR), as determined by an FDA-approved test, or microsatellite-instability-high. GlaxoSmithKline LLC. Jemperli (dostarlimab-gxly) [package insert]. U.S. Food and Drug Administration website. https://www.accessdata.fda.gov/drugsatfda_docs/label/2023/761174s006lbl.pdf. Revised July 2023. Accessed September 6, 2023. https://www.accessdata.fda.gov/drugsatfda_docs/label/2023/761174s006lbl.pdf Investigate Actionability 5-Fluorouracil Thymidylate synthase inhibition Chemotherapy COADREAD 5-Fluorouracil is not recommended by the National Comprehensive Cancer Network® (NCCN®) as a treatment option for patients with MSI-High colorectal cancer. These patients appear to not benefit from, and may be resistant to, 5-fluorouracil therapy. Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Colon Cancer V.2.2016. © National Comprehensive Cancer Network, Inc. 2016. All rights reserved. Accessed November 5 2016. To view the most recent and complete version of the guideline, go online to NCCN.org. https://www.nccn.org/professionals/physician_gls/pdf/colon_blocks.pdf Investigate Actionability 1 COADREAD Patients with MSI-High colorectal cancer often have a favorable prognosis. Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Colon Cancer V.2.2016. © National Comprehensive Cancer Network, Inc. 2016. All rights reserved. Accessed November 5 2016. To view the most recent and complete version of the guideline, go online to NCCN.org. https://www.nccn.org/professionals/physician_gls/pdf/colon_blocks.pdf MSI-High example -Investigate Actionability Guideline Rearrangement COL1A1 Fusion COL1A1--CITED4 0.0 0.0 Investigate Actionability Imatinib PDGF-R inhibition Targeted therapy DFSP Imatinib (Gleevec) is recommended by the National Comprehensive Cancer Network® (NCCN®) as a treatment option for patients with localized or metastatic dermatofibrosarcoma tumors containing t(17;22)(q22;q13). Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Dermatofibrosarcoma V.1.2017. © National Comprehensive Cancer Network, Inc. 2017. All rights reserved. Accessed November 5 2016. To view the most recent and complete version of the guideline, go online to NCCN.org. https://www.nccn.org/professionals/physician_gls/pdf/dfsp_blocks.pdf COL1A1--CITED4 Fusion 0 example -Investigate Actionability Inferential Mutational Signature SBS5 v3.4 0.224 Investigate Actionability Cisplatin Platinum-based chemotherapy Chemotherapy BLCA COSMIC Signature 5 is associated with somatic ERCC2 mutations, which may suggest sensitivity to cisplatin based chemotherapy. Kim J, Mouw KW, Polak P, et al. Somatic ERCC2 mutations are associated with a distinct genomic signature in urothelial tumors. Nat Genet. 2016;48(6):600-606. https://doi.org/10.1038/ng.3557 SBS5 (22%) example -Investigate Actionability Inferential Mutational Signature SBS10b v3.4 0.119 Investigate Actionability Durvalumab PD-1/PD-L1 inhibition Immunotherapy COSMIC Signature 10 is observed in some of the most hypermutant samples and recurrent POLE mutations. POLE mutant tumors are being included along with MMR-deficient tumors in several ongoing trials for sensitivity to immunotherapy. Mouw KW, Goldberg MS, Konstantinopoulos PA, D'Andrea AD. DNA Damage and Repair Biomarkers of Immunotherapy Response Cancer Discov. 2017; 7(7):675-693. https://doi.org/10.1158/2159-8290.CD-17-0226 SBS10b (12%) example -Investigate Actionability Inferential Aneuploidy Whole genome doubling Investigate Actionability 0 WGD was associated with adverse survival pan-cancer in patients with advanced disease and in cancers with heterogeneous clinical outcomes, even following the development of metastasis. Bielski CM, Zehir A, Penson AV, et al. Genome doubling shapes the evolution and prognosis of advanced cancers Nat Genet. 2018; 50(8):1189-1195. https://doi.org/10.1038/s41588-018-0165-1 example -Investigate Actionability FDA-Approved Germline Variant BRCA2 Frameshift p.S1982fs 0.5 100 0.0 0.0 Pathogenic Investigate Actionability Abiraterone acetate + Niraparib Antiandrogen + PARP inhibition Combination therapy PRAD The U.S. Food and Drug Administration (FDA) granted approval to niraparib, a poly (ADP-ribose) polymerase (PARP) inhibitor, and abiraterone acetate, a CYP17 inhibitor indicated with prednisone for the treatment of adult patients with deleterious or suspected deleterious BRCA-mutated (BRCAm) metastatic castration-resistant prostate cancer (mCRPC). Janssen Biotech, Inc. Akeega (niraparib and abiraterone acetate) [package insert]. U.S. Food and Drug Administration website. https://www.accessdata.fda.gov/drugsatfda_docs/label/2023/216793s000lbl.pdf. Revised August 2023. Accessed September 6, 2023. https://www.accessdata.fda.gov/drugsatfda_docs/label/2023/216793s000lbl.pdf BRCA2 p.S1982fs (Frameshift) example example_tumor_profile example_normal_profile -Investigate Actionability Clinical evidence Somatic Variant MSH2 Missense p.D887N 0.8039 51 0.0 0.0 Investigate Actionability Pembrolizumab PD-1/PD-L1 inhibition Immunotherapy COADREAD Patients with defects in DNA mismatch repair genes may have enhanced sensitivity to immune checkpoint blockade. Le DT, Uram JN, Wang H, et al. PD-1 Blockade in Tumors with Mismatch-Repair Deficiency. N Engl J Med. 2015;372(26):2509-20. https://doi.org/10.1056/NEJMoa1500596 0.0 0.0 0.0 MSH2 p.D887N (Missense) example example_tumor_profile example_normal_profile -Investigate Actionability Preclinical Copy Number FGFR2 Deletion 0.0 0.0 Investigate Actionability Infigratinib FGFR inhibition Targeted therapy COADREAD A study of 32 Cancer Cell Line Encyclopedia cell lines demonstrating FGFR1 and FGFR2 amplification demonstrated sensitivity to Infigratinib. Guagnano V, Kauffmann A, Wohrle S, et al. FGFR genetic alterations predict for sensitivity to NVP-BGJ398, a selective pan-FGFR inhibitor. Cancer Discov. 2012;2(12):1118-33. https://doi.org/10.1158/2159-8290.CD-12-0210 FGFR2 Deletion example -Investigate Actionability Guideline Somatic Variant STAG2 Missense p.F467I 0.3571 56 0.0 0.0 Investigate Actionability 0 MDS The National Comprehensive Cancer Network® (NCCN®) highlights STAG2 nonsense, frameshift, and splice site variants as being associated with a poor prognosis in patients with myelodysplastic syndromes. Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Myelodysplastic Syndromes V.2.2023. © National Comprehensive Cancer Network, Inc. 2023. All rights reserved. Accessed November 2, 2023. To view the most recent and complete version of the guideline, go online to NCCN.org. https://www.nccn.org/professionals/physician_gls/pdf/mds_blocks.pdf 27.0 0.0 0.9881 STAG2 p.F467I (Missense) example example_tumor_profile example_normal_profile -Investigate Actionability Guideline Somatic Variant ZRSR2 Missense p.N261I 0.4019 107 0.0 0.0 Investigate Actionability 0 MDS The National Comprehensive Cancer Network® (NCCN®) highlights ZRSR2 nonsense and frameshift variants as being associated with a poor prognosis in patients with myelodysplastic syndromes. Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Myelodysplastic Syndromes V.2.2023. © National Comprehensive Cancer Network, Inc. 2023. All rights reserved. Accessed November 2, 2023. To view the most recent and complete version of the guideline, go online to NCCN.org. https://www.nccn.org/professionals/physician_gls/pdf/mds_blocks.pdf 29.0 0.0 0.9987 ZRSR2 p.N261I (Missense) example example_tumor_profile example_normal_profile -Biologically Relevant Somatic Variant NTRK2 Missense p.H300Y 0.6136 44 0.0 0.0 17.0 0.0 0.9982 NTRK2 p.H300Y (Missense) example example_tumor_profile example_normal_profile -Biologically Relevant Somatic Variant PDGFRB Missense p.G674E 0.4 40 0.0 0.0 222.0 0.0 1.0 PDGFRB p.G674E (Missense) example example_tumor_profile example_normal_profile -Biologically Relevant Germline Variant BRAF Nonsense p.R509* 0.5 100 0.0 0.0 BRAF p.R509* (Nonsense) example example_tumor_profile example_normal_profile -Biologically Relevant Microsatellite Stability Supporting variants PRDM2 p.E282del (Deletion) Supporting variants: PRDM2 p.E282del (Deletion) example -Biologically Relevant Mutational Signature SBS7a v3.4 0.424 SBS7a (42%) example -Biologically Relevant Mutational Signature SBS30 v3.4 0.231 SBS30 (23%) example +score_bin sensitive_predictive_implication resistance_predictive_implication prognostic_predictive_implication feature_type feature alteration_type alteration tumor_f total_coverage exac_af exac_common clinvar sensitive_score_bin sensitive_therapy_name sensitive_therapy_strategy sensitive_therapy_type sensitive_oncotree_code sensitive_description sensitive_citation sensitive_url resistance_score_bin resistance_therapy_name resistance_therapy_strategy resistance_therapy_type resistance_oncotree_code resistance_description resistance_citation resistance_url prognostic_score_bin favorable_prognosis prognostic_oncotree_code prognostic_description prognostic_citation prognostic_url number_germline_mutations_in_gene validation_total_coverage validation_tumor_f validation_detection_power feature_display preclinical_efficacy_observed patient_id tumor_sample_barcode normal_sample_barcode +Putatively Actionable FDA-Approved Guideline Guideline Somatic Variant BRAF Missense p.V600E 0.6316 152 1.60E-05 0 Putatively Actionable Binimetinib + Encorafenib MEK inhibition + B-RAF inhibition Targeted therapy NSCLC "The U.S. Food and Drug Administration (FDA) approved encorafenib in combination with binimetinib for the treatment of adult patients with metastatic non-small cell lung cancer (NSCLC) with BRAF V600E mutation, as detected by an FDA-approved test." "Array BioPharma Inc. Braftovi (encorafenib) [package insert]. U.S. Food and Drug Administration website. https://www.accessdata.fda.gov/drugsatfda_docs/label/2023/210496s014lbl.pdf. Revised October 2023. Accessed November 1, 2023." https://www.accessdata.fda.gov/drugsatfda_docs/label/2023/210496s014lbl.pdf Putatively Actionable Panitumumab EGFR inhibition Targeted therapy COADREAD "Panitumumab (Vectibix) is not recommended by the National Comprehensive Cancer Network® (NCCN®) as a treatment option for patients with metastatic colorectal cancer, BRAF V600E makes response to panitumumab or cetuximab highly unlikely unless given with a BRAF inhibitor. " "Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Colon Cancer V.4.2018. © National Comprehensive Cancer Network, Inc. 2018. All rights reserved. Accessed March 20 2019. To view the most recent and complete version of the guideline, go online to NCCN.org." https://www.nccn.org/professionals/physician_gls/pdf/colon_blocks.pdf Putatively Actionable 0 COADREAD BRAF V600E alterations are associated with an unfavorable prognosis in MSI-low and microsatellite-stable patients. "Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Colon Cancer V.2.2016. © National Comprehensive Cancer Network, Inc. 2016. All rights reserved. Accessed March 20 2019. To view the most recent and complete version of the guideline, go online to NCCN.org." https://www.nccn.org/professionals/physician_gls/pdf/colon_blocks.pdf 1 4 0 0.161 BRAF p.V600E (Missense) example example_tumor_profile example_normal_profile +Putatively Actionable Guideline Copy Number CDK4 Amplification 0 0 Putatively Actionable Palbociclib CDK4/6 inhibition Targeted therapy WDLS "Palbociclib (Ibrance) is recommended by the National Comprehensive Cancer Network® (NCCN®) as a treatment option for patients with well-differentiated liposarcoma and CDK4 amplification. CDK4 amplification is characteristic of well-differentiated and dedifferentiated liposarcomas, and palbociclib shows activity in this context." "Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Soft Tissue Sarcoma V.1.2021. © National Comprehensive Cancer Network, Inc. 2019. All rights reserved. Accessed November 19th, 2019. To view the most recent and complete version of the guideline, go online to NCCN.org." https://www.nccn.org/professionals/physician_gls/pdf/sarcoma_blocks.pdf CDK4 Amplification example +Putatively Actionable Clinical evidence Copy Number CDKN2A Deletion 0 0 Putatively Actionable Palbociclib CDK4/6 inhibition Targeted therapy ULMS A patient with uterine leiomyosarcoma whose tumor harbored a CDKN2A mutant which inactivated p16INK4a experienced clinical benefit from treatment with palbociclib. "Elvin JA, Gay LM, Ort R, et al. Clinical Benefit in Response to Palbociclib Treatment in Refractory Uterine Leiomyosarcomas with a Common CDKN2A Alteration. Oncologist. 2017;22(4):416-421." https://doi.org/10.1634/theoncologist.2016-0310 CDKN2A Deletion example +Putatively Actionable Preclinical Guideline Copy Number TP53 Deletion 0 0 Putatively Actionable Talazoparib PARP inhibition Targeted therapy OS "Response to talazoparib in osteosarcoma cell lines was associated with homologous recombination deficiency in a study of 5 cancer cell lines. Osteosarcoma cell lines MG63 and ZK-58 displayed the highest sensitivity to talazoparib, SaOS-2 and MNNG-HOS displayed intermediate sensitivity, and U2OS cells remained resistant. Cell lines MG63, ZK-58, and MNNG-HOS scored HRD-LOH positive according to a score (Abkevich et al. 2012). MG63 cells harbored copy losses in BAP1, FANCA, and FANCD2 while ZK-58 carried disruptive copy loss in BARD1 and copy gain in FANCD2. SaOS-2 cells harbored copy losses in CHEK2 and TP53 and MNNG-HOS cells have copy loss of ATM and disruptive copy gains in PTEN and FANCD2. The talazoparib-resistant cell line, U2OS, carried a heterozygous BRCA2 copy loss and one intact BRCA2 alelle." "Engert F, Kovac M, Baumhoer D, Nathrath M, Fulda S. Osteosarcoma cells with genetic signatures of BRCAness are susceptible to the PARP inhibitor talazoparib alone or in combination with chemotherapeutics. Oncotarget. 2016;8(30):48794-48806." https://www.clinicalkey.com/#!/content/playContent/1-s2.0-S1470204513700494 Putatively Actionable 0 MM Deletion of 17p13 leads to LoH of TP53 and is considered a high-risk feature of multiple myeloma. "Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Multiple Myeloma V.2.2016. © National Comprehensive Cancer Network, Inc. 2016. All rights reserved. Accessed November 5 2016. To view the most recent and complete version of the guideline, go online to NCCN.org." https://www.nccn.org/professionals/physician_gls/pdf/myeloma_blocks.pdf 1 TP53 Deletion example +Putatively Actionable Clinical evidence Copy Number BRAF Amplification 0 0 Putatively Actionable Vemurafenib B-RAF inhibition Targeted therapy MEL Amplification of BRAF may predict resistance to RAF inhibition. "Wagle, Nikhil, et al. MAP kinase pathway alterations in BRAF-mutant melanoma patients with acquired resistance to combined RAF/MEK inhibition. Cancer discovery 4.1 (2014): 61-68." https://doi.org/10.1158/2159-8290.CD-13-0631 1 BRAF Amplification example +Investigate Actionability FDA-Approved Guideline Guideline Microsatellite Stability MSI-High Investigate Actionability Dostarlimab-gxly PD-1/PD-L1 inhibition Immunotherapy UCEC "The U.S. Food and Drug Administration (FDA) granted approval to dostarlimab-gxly in combination with carboplatin and paclitaxel, followed by single agent dostarlimab-gxly, for the treatment of adult patients primary advanced or recurrent endometrial cancer that is mismatch repair deficient (dMMR), as determined by an FDA-approved test, or microsatellite-instability-high." "GlaxoSmithKline LLC. Jemperli (dostarlimab-gxly) [package insert]. U.S. Food and Drug Administration website. https://www.accessdata.fda.gov/drugsatfda_docs/label/2023/761174s006lbl.pdf. Revised July 2023. Accessed September 6, 2023." https://www.accessdata.fda.gov/drugsatfda_docs/label/2023/761174s006lbl.pdf Investigate Actionability 5-Fluorouracil Thymidylate synthase inhibition Chemotherapy COADREAD "5-Fluorouracil is not recommended by the National Comprehensive Cancer Network® (NCCN®) as a treatment option for patients with MSI-High colorectal cancer. These patients appear to not benefit from, and may be resistant to, 5-fluorouracil therapy." "Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Colon Cancer V.2.2016. © National Comprehensive Cancer Network, Inc. 2016. All rights reserved. Accessed November 5 2016. To view the most recent and complete version of the guideline, go online to NCCN.org." https://www.nccn.org/professionals/physician_gls/pdf/colon_blocks.pdf Investigate Actionability 1 COADREAD Patients with MSI-High colorectal cancer often have a favorable prognosis. "Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Colon Cancer V.2.2016. © National Comprehensive Cancer Network, Inc. 2016. All rights reserved. Accessed November 5 2016. To view the most recent and complete version of the guideline, go online to NCCN.org." https://www.nccn.org/professionals/physician_gls/pdf/colon_blocks.pdf MSI-High example +Investigate Actionability Guideline Rearrangement COL1A1 Fusion COL1A1--CITED4 0 0 Investigate Actionability Imatinib PDGF-R inhibition Targeted therapy DFSP Imatinib (Gleevec) is recommended by the National Comprehensive Cancer Network® (NCCN®) as a treatment option for patients with localized or metastatic dermatofibrosarcoma tumors containing t(17;22)(q22;q13). "Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Dermatofibrosarcoma V.1.2017. © National Comprehensive Cancer Network, Inc. 2017. All rights reserved. Accessed November 5 2016. To view the most recent and complete version of the guideline, go online to NCCN.org." https://www.nccn.org/professionals/physician_gls/pdf/dfsp_blocks.pdf COL1A1--CITED4 Fusion example +Investigate Actionability Inferential Mutational Signature SBS5 v3.4 0.224 Investigate Actionability Cisplatin Platinum-based chemotherapy Chemotherapy BLCA "COSMIC Signature 5 is associated with somatic ERCC2 mutations, which may suggest sensitivity to cisplatin based chemotherapy." "Kim J, Mouw KW, Polak P, et al. Somatic ERCC2 mutations are associated with a distinct genomic signature in urothelial tumors. Nat Genet. 2016;48(6):600-606." https://doi.org/10.1038/ng.3557 SBS5 (22%) example +Investigate Actionability Inferential Mutational Signature SBS10b v3.4 0.119 Investigate Actionability Durvalumab PD-1/PD-L1 inhibition Immunotherapy COSMIC Signature 10 is observed in some of the most hypermutant samples and recurrent POLE mutations. POLE mutant tumors are being included along with MMR-deficient tumors in several ongoing trials for sensitivity to immunotherapy. "Mouw KW, Goldberg MS, Konstantinopoulos PA, D'Andrea AD. DNA Damage and Repair Biomarkers of Immunotherapy Response Cancer Discov. 2017; 7(7):675-693." https://doi.org/10.1158/2159-8290.CD-17-0226 SBS10b (12%) example +Investigate Actionability Inferential Aneuploidy Whole genome doubling Investigate Actionability 0 "WGD was associated with adverse survival pan-cancer in patients with advanced disease and in cancers with heterogeneous clinical outcomes, even following the development of metastasis." "Bielski CM, Zehir A, Penson AV, et al. Genome doubling shapes the evolution and prognosis of advanced cancers Nat Genet. 2018; 50(8):1189-1195." https://doi.org/10.1038/s41588-018-0165-1 example +Investigate Actionability FDA-Approved Germline Variant BRCA2 Frameshift p.S1982fs 0.5 100 0 0 Pathogenic Investigate Actionability Abiraterone acetate + Niraparib Antiandrogen + PARP inhibition Combination therapy PRAD "The U.S. Food and Drug Administration (FDA) granted approval to niraparib, a poly (ADP-ribose) polymerase (PARP) inhibitor, and abiraterone acetate, a CYP17 inhibitor indicated with prednisone for the treatment of adult patients with deleterious or suspected deleterious BRCA-mutated (BRCAm) metastatic castration-resistant prostate cancer (mCRPC)." "Janssen Biotech, Inc. Akeega (niraparib and abiraterone acetate) [package insert]. U.S. Food and Drug Administration website. https://www.accessdata.fda.gov/drugsatfda_docs/label/2023/216793s000lbl.pdf. Revised August 2023. Accessed September 6, 2023." https://www.accessdata.fda.gov/drugsatfda_docs/label/2023/216793s000lbl.pdf BRCA2 p.S1982fs (Frameshift) example example_tumor_profile example_normal_profile +Investigate Actionability Clinical evidence Somatic Variant MSH2 Missense p.D887N 0.8039 51 0 0 Investigate Actionability Pembrolizumab PD-1/PD-L1 inhibition Immunotherapy COADREAD Patients with defects in DNA mismatch repair genes may have enhanced sensitivity to immune checkpoint blockade. "Le DT, Uram JN, Wang H, et al. PD-1 Blockade in Tumors with Mismatch-Repair Deficiency. N Engl J Med. 2015;372(26):2509-20." https://doi.org/10.1056/NEJMoa1500596 0 0 0 MSH2 p.D887N (Missense) example example_tumor_profile example_normal_profile +Investigate Actionability Preclinical Copy Number FGFR2 Deletion 0 0 Investigate Actionability Infigratinib FGFR inhibition Targeted therapy COADREAD A study of 32 Cancer Cell Line Encyclopedia cell lines demonstrating FGFR1 and FGFR2 amplification demonstrated sensitivity to Infigratinib. "Guagnano V, Kauffmann A, Wohrle S, et al. FGFR genetic alterations predict for sensitivity to NVP-BGJ398, a selective pan-FGFR inhibitor. Cancer Discov. 2012;2(12):1118-33." https://doi.org/10.1158/2159-8290.CD-12-0210 FGFR2 Deletion example +Investigate Actionability Guideline Somatic Variant STAG2 Missense p.F467I 0.3571 56 0 0 Investigate Actionability 0 MDS "The National Comprehensive Cancer Network® (NCCN®) highlights STAG2 nonsense, frameshift, and splice site variants as being associated with a poor prognosis in patients with myelodysplastic syndromes." "Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Myelodysplastic Syndromes V.2.2023. © National Comprehensive Cancer Network, Inc. 2023. All rights reserved. Accessed November 2, 2023. To view the most recent and complete version of the guideline, go online to NCCN.org." https://www.nccn.org/professionals/physician_gls/pdf/mds_blocks.pdf 27 0 0.9881 STAG2 p.F467I (Missense) example example_tumor_profile example_normal_profile +Investigate Actionability Guideline Somatic Variant ZRSR2 Missense p.N261I 0.4019 107 0 0 Investigate Actionability 0 MDS The National Comprehensive Cancer Network® (NCCN®) highlights ZRSR2 nonsense and frameshift variants as being associated with a poor prognosis in patients with myelodysplastic syndromes. "Referenced with permission from the NCCN Clinical Practice Guidelines in Oncology (NCCN Guidelines®) for Myelodysplastic Syndromes V.2.2023. © National Comprehensive Cancer Network, Inc. 2023. All rights reserved. Accessed November 2, 2023. To view the most recent and complete version of the guideline, go online to NCCN.org." https://www.nccn.org/professionals/physician_gls/pdf/mds_blocks.pdf 29 0 0.9987 ZRSR2 p.N261I (Missense) example example_tumor_profile example_normal_profile +Biologically Relevant Somatic Variant NTRK2 Missense p.H300Y 0.6136 44 0 0 17 0 0.9982 NTRK2 p.H300Y (Missense) example example_tumor_profile example_normal_profile +Biologically Relevant Somatic Variant PDGFRB Missense p.G674E 0.4 40 0 0 222 0 1 PDGFRB p.G674E (Missense) example example_tumor_profile example_normal_profile +Biologically Relevant Germline Variant BRAF Nonsense p.R509* 0.5 100 0 0 BRAF p.R509* (Nonsense) example example_tumor_profile example_normal_profile +Biologically Relevant Microsatellite Stability Supporting variants PRDM2 p.E282del (Deletion) Supporting variants: PRDM2 p.E282del (Deletion) example +Biologically Relevant Mutational Signature SBS7a v3.4 0.424 SBS7a (42%) example +Biologically Relevant Mutational Signature SBS30 v3.4 0.231 SBS30 (23%) example \ No newline at end of file From 8dba89f7bdf407c51914705246a8145354c85ba8 Mon Sep 17 00:00:00 2001 From: Brendan Reardon Date: Tue, 9 Jul 2024 22:57:24 -0400 Subject: [PATCH 08/19] Moved datasources to root dir of repo --- .../datasources => datasources}/README.md | 0 .../acmg/README.md | 0 .../acmg/acmg.secondaryfindings.v3.txt | 0 .../cancergenecensus/README.md | 0 .../cancer_gene_census_v97.genes.tsv | 0 .../cancergenecensus/diff_versions.py | 0 .../cancergenecensus/extract_genes.py | 0 .../cancerhotspots/README.md | 0 .../cancerhotspots/hotspots.txt | 0 .../cancerhotspots/hotspots3d.txt | 0 .../cancerhotspots/hotspots_v2.txt | 0 .../prep_3dhotspots/3d_hotspots.xls | Bin .../prep_3dhotspots/3d_hotspots_T2.txt | 0 .../prep_3dhotspots/3d_hotspots_T5.txt | 0 .../prep_3dhotspots/prep3dhotspots.py | 0 .../clinvar/README.md | 0 .../clinvar/prepare_clinvar.py | 0 .../clinvar/variant_summary.lite.txt | 0 .../cosmic/README.md | 0 .../cosmic/prepare_cosmic.py | 0 .../exac/README.md | 0 .../exac/build_exac.sh | 0 .../exac/expand_exac.py | 0 .../gsea_gene_sets/GSEA_cancer_gene_sets.txt | 0 .../gsea_gene_sets/README.md | 0 .../gsea_gene_sets/c4.cm.v6.0.symbols.txt | 0 .../hereditary/README.md | 0 .../hereditary/hereditary.txt | 0 .../lawrence/README.md | 0 .../lawrence/lawrence_ST2.txt | 0 .../lawrence/lawrence_mapped_ontology.txt | 0 .../lawrence/map_oncotree_lawrence.ipynb | 0 .../moalmanac/README.md | 0 .../moalmanac/create_almanac_db.py | 0 .../moalmanac/molecular-oncology-almanac.json | 0 .../oncotree/README.md | 0 datasources/oncotree/get-oncotree.ipynb | 76 ++++++++++++++++++ .../oncotree/get_oncotree.py | 0 .../oncotree/oncotree.2023-03-09.txt | 0 .../preclinical/README.md | 0 .../preclinical/almanac-gdsc-mappings.json | 0 .../preclinical/annotated/README.md | 0 .../annotated/annotate-copy-numbers.py | 0 .../preclinical/annotated/annotate-fusions.py | 0 .../annotated/annotate-molecular-features.sh | 0 .../annotated/annotate-variants.py | 0 .../cell-lines.copy-numbers.annotated.txt | 0 .../cell-lines.fusions.annotated.gene1.txt | 0 .../cell-lines.fusions.annotated.gene2.txt | 0 .../cell-lines.fusions.annotated.txt | 0 .../cell-lines.somatic-variants.annotated.txt | 0 .../preclinical/cell-lines.pkl | Bin .../formatted/0.map-almanac-to-gdsc.ipynb | 0 ...process-cell-line-molecular-features.ipynb | 0 .../preclinical/formatted/README.md | 0 .../formatted/almanac-gdsc-mappings.json | 0 .../formatted/cell-line-names.formatted.txt | 0 .../formatted/cell-lines.copy-numbers.txt | 0 .../formatted/cell-lines.fusions.txt | 0 .../formatted/cell-lines.somatic-variants.txt | 0 .../formatted/cell-lines.summary.txt | 0 .../preclinical/formatted/sanger.gdsc.txt | 0 .../preclinical/generate-dictionary.ipynb | 0 .../preclinical/source/README.md | 0 .../preclinical/source/ccle-2019/README.md | 0 .../preclinical/source/depmap/README.md | 0 .../preclinical/source/gdsc/README.md | 0 67 files changed, 76 insertions(+) rename {moalmanac/datasources => datasources}/README.md (100%) rename {moalmanac/datasources => datasources}/acmg/README.md (100%) rename {moalmanac/datasources => datasources}/acmg/acmg.secondaryfindings.v3.txt (100%) rename {moalmanac/datasources => datasources}/cancergenecensus/README.md (100%) rename {moalmanac/datasources => datasources}/cancergenecensus/cancer_gene_census_v97.genes.tsv (100%) rename {moalmanac/datasources => datasources}/cancergenecensus/diff_versions.py (100%) rename {moalmanac/datasources => datasources}/cancergenecensus/extract_genes.py (100%) rename {moalmanac/datasources => datasources}/cancerhotspots/README.md (100%) rename {moalmanac/datasources => datasources}/cancerhotspots/hotspots.txt (100%) rename {moalmanac/datasources => datasources}/cancerhotspots/hotspots3d.txt (100%) rename {moalmanac/datasources => datasources}/cancerhotspots/hotspots_v2.txt (100%) rename {moalmanac/datasources => datasources}/cancerhotspots/prep_3dhotspots/3d_hotspots.xls (100%) rename {moalmanac/datasources => datasources}/cancerhotspots/prep_3dhotspots/3d_hotspots_T2.txt (100%) rename {moalmanac/datasources => datasources}/cancerhotspots/prep_3dhotspots/3d_hotspots_T5.txt (100%) rename {moalmanac/datasources => datasources}/cancerhotspots/prep_3dhotspots/prep3dhotspots.py (100%) rename {moalmanac/datasources => datasources}/clinvar/README.md (100%) rename {moalmanac/datasources => datasources}/clinvar/prepare_clinvar.py (100%) rename {moalmanac/datasources => datasources}/clinvar/variant_summary.lite.txt (100%) rename {moalmanac/datasources => datasources}/cosmic/README.md (100%) rename {moalmanac/datasources => datasources}/cosmic/prepare_cosmic.py (100%) rename {moalmanac/datasources => datasources}/exac/README.md (100%) rename {moalmanac/datasources => datasources}/exac/build_exac.sh (100%) rename {moalmanac/datasources => datasources}/exac/expand_exac.py (100%) rename {moalmanac/datasources => datasources}/gsea_gene_sets/GSEA_cancer_gene_sets.txt (100%) rename {moalmanac/datasources => datasources}/gsea_gene_sets/README.md (100%) rename {moalmanac/datasources => datasources}/gsea_gene_sets/c4.cm.v6.0.symbols.txt (100%) rename {moalmanac/datasources => datasources}/hereditary/README.md (100%) rename {moalmanac/datasources => datasources}/hereditary/hereditary.txt (100%) rename {moalmanac/datasources => datasources}/lawrence/README.md (100%) rename {moalmanac/datasources => datasources}/lawrence/lawrence_ST2.txt (100%) rename {moalmanac/datasources => datasources}/lawrence/lawrence_mapped_ontology.txt (100%) rename {moalmanac/datasources => datasources}/lawrence/map_oncotree_lawrence.ipynb (100%) rename {moalmanac/datasources => datasources}/moalmanac/README.md (100%) rename {moalmanac/datasources => datasources}/moalmanac/create_almanac_db.py (100%) rename {moalmanac/datasources => datasources}/moalmanac/molecular-oncology-almanac.json (100%) rename {moalmanac/datasources => datasources}/oncotree/README.md (100%) create mode 100644 datasources/oncotree/get-oncotree.ipynb rename {moalmanac/datasources => datasources}/oncotree/get_oncotree.py (100%) rename {moalmanac/datasources => datasources}/oncotree/oncotree.2023-03-09.txt (100%) rename {moalmanac/datasources => datasources}/preclinical/README.md (100%) rename {moalmanac/datasources => datasources}/preclinical/almanac-gdsc-mappings.json (100%) rename {moalmanac/datasources => datasources}/preclinical/annotated/README.md (100%) rename {moalmanac/datasources => datasources}/preclinical/annotated/annotate-copy-numbers.py (100%) rename {moalmanac/datasources => datasources}/preclinical/annotated/annotate-fusions.py (100%) rename {moalmanac/datasources => datasources}/preclinical/annotated/annotate-molecular-features.sh (100%) rename {moalmanac/datasources => datasources}/preclinical/annotated/annotate-variants.py (100%) rename {moalmanac/datasources => datasources}/preclinical/annotated/cell-lines.copy-numbers.annotated.txt (100%) rename {moalmanac/datasources => datasources}/preclinical/annotated/cell-lines.fusions.annotated.gene1.txt (100%) rename {moalmanac/datasources => datasources}/preclinical/annotated/cell-lines.fusions.annotated.gene2.txt (100%) rename {moalmanac/datasources => datasources}/preclinical/annotated/cell-lines.fusions.annotated.txt (100%) rename {moalmanac/datasources => datasources}/preclinical/annotated/cell-lines.somatic-variants.annotated.txt (100%) rename {moalmanac/datasources => datasources}/preclinical/cell-lines.pkl (100%) rename {moalmanac/datasources => datasources}/preclinical/formatted/0.map-almanac-to-gdsc.ipynb (100%) rename {moalmanac/datasources => datasources}/preclinical/formatted/1.process-cell-line-molecular-features.ipynb (100%) rename {moalmanac/datasources => datasources}/preclinical/formatted/README.md (100%) rename {moalmanac/datasources => datasources}/preclinical/formatted/almanac-gdsc-mappings.json (100%) rename {moalmanac/datasources => datasources}/preclinical/formatted/cell-line-names.formatted.txt (100%) rename {moalmanac/datasources => datasources}/preclinical/formatted/cell-lines.copy-numbers.txt (100%) rename {moalmanac/datasources => datasources}/preclinical/formatted/cell-lines.fusions.txt (100%) rename {moalmanac/datasources => datasources}/preclinical/formatted/cell-lines.somatic-variants.txt (100%) rename {moalmanac/datasources => datasources}/preclinical/formatted/cell-lines.summary.txt (100%) rename {moalmanac/datasources => datasources}/preclinical/formatted/sanger.gdsc.txt (100%) rename {moalmanac/datasources => datasources}/preclinical/generate-dictionary.ipynb (100%) rename {moalmanac/datasources => datasources}/preclinical/source/README.md (100%) rename {moalmanac/datasources => datasources}/preclinical/source/ccle-2019/README.md (100%) rename {moalmanac/datasources => datasources}/preclinical/source/depmap/README.md (100%) rename {moalmanac/datasources => datasources}/preclinical/source/gdsc/README.md (100%) diff --git a/moalmanac/datasources/README.md b/datasources/README.md similarity index 100% rename from moalmanac/datasources/README.md rename to datasources/README.md diff --git a/moalmanac/datasources/acmg/README.md b/datasources/acmg/README.md similarity index 100% rename from moalmanac/datasources/acmg/README.md rename to datasources/acmg/README.md diff --git a/moalmanac/datasources/acmg/acmg.secondaryfindings.v3.txt b/datasources/acmg/acmg.secondaryfindings.v3.txt similarity index 100% rename from moalmanac/datasources/acmg/acmg.secondaryfindings.v3.txt rename to datasources/acmg/acmg.secondaryfindings.v3.txt diff --git a/moalmanac/datasources/cancergenecensus/README.md b/datasources/cancergenecensus/README.md similarity index 100% rename from moalmanac/datasources/cancergenecensus/README.md rename to datasources/cancergenecensus/README.md diff --git a/moalmanac/datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv b/datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv similarity index 100% rename from moalmanac/datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv rename to datasources/cancergenecensus/cancer_gene_census_v97.genes.tsv diff --git a/moalmanac/datasources/cancergenecensus/diff_versions.py b/datasources/cancergenecensus/diff_versions.py similarity index 100% rename from moalmanac/datasources/cancergenecensus/diff_versions.py rename to datasources/cancergenecensus/diff_versions.py diff --git a/moalmanac/datasources/cancergenecensus/extract_genes.py b/datasources/cancergenecensus/extract_genes.py similarity index 100% rename from moalmanac/datasources/cancergenecensus/extract_genes.py rename to datasources/cancergenecensus/extract_genes.py diff --git a/moalmanac/datasources/cancerhotspots/README.md b/datasources/cancerhotspots/README.md similarity index 100% rename from moalmanac/datasources/cancerhotspots/README.md rename to datasources/cancerhotspots/README.md diff --git a/moalmanac/datasources/cancerhotspots/hotspots.txt b/datasources/cancerhotspots/hotspots.txt similarity index 100% rename from moalmanac/datasources/cancerhotspots/hotspots.txt rename to datasources/cancerhotspots/hotspots.txt diff --git a/moalmanac/datasources/cancerhotspots/hotspots3d.txt b/datasources/cancerhotspots/hotspots3d.txt similarity index 100% rename from moalmanac/datasources/cancerhotspots/hotspots3d.txt rename to datasources/cancerhotspots/hotspots3d.txt diff --git a/moalmanac/datasources/cancerhotspots/hotspots_v2.txt b/datasources/cancerhotspots/hotspots_v2.txt similarity index 100% rename from moalmanac/datasources/cancerhotspots/hotspots_v2.txt rename to datasources/cancerhotspots/hotspots_v2.txt diff --git a/moalmanac/datasources/cancerhotspots/prep_3dhotspots/3d_hotspots.xls b/datasources/cancerhotspots/prep_3dhotspots/3d_hotspots.xls similarity index 100% rename from moalmanac/datasources/cancerhotspots/prep_3dhotspots/3d_hotspots.xls rename to datasources/cancerhotspots/prep_3dhotspots/3d_hotspots.xls diff --git a/moalmanac/datasources/cancerhotspots/prep_3dhotspots/3d_hotspots_T2.txt b/datasources/cancerhotspots/prep_3dhotspots/3d_hotspots_T2.txt similarity index 100% rename from moalmanac/datasources/cancerhotspots/prep_3dhotspots/3d_hotspots_T2.txt rename to datasources/cancerhotspots/prep_3dhotspots/3d_hotspots_T2.txt diff --git a/moalmanac/datasources/cancerhotspots/prep_3dhotspots/3d_hotspots_T5.txt b/datasources/cancerhotspots/prep_3dhotspots/3d_hotspots_T5.txt similarity index 100% rename from moalmanac/datasources/cancerhotspots/prep_3dhotspots/3d_hotspots_T5.txt rename to datasources/cancerhotspots/prep_3dhotspots/3d_hotspots_T5.txt diff --git a/moalmanac/datasources/cancerhotspots/prep_3dhotspots/prep3dhotspots.py b/datasources/cancerhotspots/prep_3dhotspots/prep3dhotspots.py similarity index 100% rename from moalmanac/datasources/cancerhotspots/prep_3dhotspots/prep3dhotspots.py rename to datasources/cancerhotspots/prep_3dhotspots/prep3dhotspots.py diff --git a/moalmanac/datasources/clinvar/README.md b/datasources/clinvar/README.md similarity index 100% rename from moalmanac/datasources/clinvar/README.md rename to datasources/clinvar/README.md diff --git a/moalmanac/datasources/clinvar/prepare_clinvar.py b/datasources/clinvar/prepare_clinvar.py similarity index 100% rename from moalmanac/datasources/clinvar/prepare_clinvar.py rename to datasources/clinvar/prepare_clinvar.py diff --git a/moalmanac/datasources/clinvar/variant_summary.lite.txt b/datasources/clinvar/variant_summary.lite.txt similarity index 100% rename from moalmanac/datasources/clinvar/variant_summary.lite.txt rename to datasources/clinvar/variant_summary.lite.txt diff --git a/moalmanac/datasources/cosmic/README.md b/datasources/cosmic/README.md similarity index 100% rename from moalmanac/datasources/cosmic/README.md rename to datasources/cosmic/README.md diff --git a/moalmanac/datasources/cosmic/prepare_cosmic.py b/datasources/cosmic/prepare_cosmic.py similarity index 100% rename from moalmanac/datasources/cosmic/prepare_cosmic.py rename to datasources/cosmic/prepare_cosmic.py diff --git a/moalmanac/datasources/exac/README.md b/datasources/exac/README.md similarity index 100% rename from moalmanac/datasources/exac/README.md rename to datasources/exac/README.md diff --git a/moalmanac/datasources/exac/build_exac.sh b/datasources/exac/build_exac.sh similarity index 100% rename from moalmanac/datasources/exac/build_exac.sh rename to datasources/exac/build_exac.sh diff --git a/moalmanac/datasources/exac/expand_exac.py b/datasources/exac/expand_exac.py similarity index 100% rename from moalmanac/datasources/exac/expand_exac.py rename to datasources/exac/expand_exac.py diff --git a/moalmanac/datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt b/datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt similarity index 100% rename from moalmanac/datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt rename to datasources/gsea_gene_sets/GSEA_cancer_gene_sets.txt diff --git a/moalmanac/datasources/gsea_gene_sets/README.md b/datasources/gsea_gene_sets/README.md similarity index 100% rename from moalmanac/datasources/gsea_gene_sets/README.md rename to datasources/gsea_gene_sets/README.md diff --git a/moalmanac/datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt b/datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt similarity index 100% rename from moalmanac/datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt rename to datasources/gsea_gene_sets/c4.cm.v6.0.symbols.txt diff --git a/moalmanac/datasources/hereditary/README.md b/datasources/hereditary/README.md similarity index 100% rename from moalmanac/datasources/hereditary/README.md rename to datasources/hereditary/README.md diff --git a/moalmanac/datasources/hereditary/hereditary.txt b/datasources/hereditary/hereditary.txt similarity index 100% rename from moalmanac/datasources/hereditary/hereditary.txt rename to datasources/hereditary/hereditary.txt diff --git a/moalmanac/datasources/lawrence/README.md b/datasources/lawrence/README.md similarity index 100% rename from moalmanac/datasources/lawrence/README.md rename to datasources/lawrence/README.md diff --git a/moalmanac/datasources/lawrence/lawrence_ST2.txt b/datasources/lawrence/lawrence_ST2.txt similarity index 100% rename from moalmanac/datasources/lawrence/lawrence_ST2.txt rename to datasources/lawrence/lawrence_ST2.txt diff --git a/moalmanac/datasources/lawrence/lawrence_mapped_ontology.txt b/datasources/lawrence/lawrence_mapped_ontology.txt similarity index 100% rename from moalmanac/datasources/lawrence/lawrence_mapped_ontology.txt rename to datasources/lawrence/lawrence_mapped_ontology.txt diff --git a/moalmanac/datasources/lawrence/map_oncotree_lawrence.ipynb b/datasources/lawrence/map_oncotree_lawrence.ipynb similarity index 100% rename from moalmanac/datasources/lawrence/map_oncotree_lawrence.ipynb rename to datasources/lawrence/map_oncotree_lawrence.ipynb diff --git a/moalmanac/datasources/moalmanac/README.md b/datasources/moalmanac/README.md similarity index 100% rename from moalmanac/datasources/moalmanac/README.md rename to datasources/moalmanac/README.md diff --git a/moalmanac/datasources/moalmanac/create_almanac_db.py b/datasources/moalmanac/create_almanac_db.py similarity index 100% rename from moalmanac/datasources/moalmanac/create_almanac_db.py rename to datasources/moalmanac/create_almanac_db.py diff --git a/moalmanac/datasources/moalmanac/molecular-oncology-almanac.json b/datasources/moalmanac/molecular-oncology-almanac.json similarity index 100% rename from moalmanac/datasources/moalmanac/molecular-oncology-almanac.json rename to datasources/moalmanac/molecular-oncology-almanac.json diff --git a/moalmanac/datasources/oncotree/README.md b/datasources/oncotree/README.md similarity index 100% rename from moalmanac/datasources/oncotree/README.md rename to datasources/oncotree/README.md diff --git a/datasources/oncotree/get-oncotree.ipynb b/datasources/oncotree/get-oncotree.ipynb new file mode 100644 index 0000000..188c6fb --- /dev/null +++ b/datasources/oncotree/get-oncotree.ipynb @@ -0,0 +1,76 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "15aef7c6", + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "import pandas" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "9006f380", + "metadata": {}, + "outputs": [], + "source": [ + "request = 'http://oncotree.mskcc.org/api/tumorTypes/tree?version=oncotree_latest_stable'\n", + "r = requests.get(request)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "875a3e89", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['OVARY', 'LYMPH', 'SOFT_TISSUE', 'THYROID', 'PLEURA', 'PANCREAS', 'BILIARY_TRACT', 'BREAST', 'HEAD_NECK', 'EYE', 'AMPULLA_OF_VATER', 'THYMUS', 'CERVIX', 'PNS', 'BOWEL', 'BONE', 'SKIN', 'BLADDER', 'BRAIN', 'ADRENAL_GLAND', 'PROSTATE', 'LUNG', 'PENIS', 'UTERUS', 'OTHER', 'TESTIS', 'LIVER', 'PERITONEUM', 'MYELOID', 'VULVA', 'KIDNEY', 'STOMACH'])" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r.json()['TISSUE']['children'].keys()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0c4d27c3", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "moalmanac", + "language": "python", + "name": "moalmanac" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/moalmanac/datasources/oncotree/get_oncotree.py b/datasources/oncotree/get_oncotree.py similarity index 100% rename from moalmanac/datasources/oncotree/get_oncotree.py rename to datasources/oncotree/get_oncotree.py diff --git a/moalmanac/datasources/oncotree/oncotree.2023-03-09.txt b/datasources/oncotree/oncotree.2023-03-09.txt similarity index 100% rename from moalmanac/datasources/oncotree/oncotree.2023-03-09.txt rename to datasources/oncotree/oncotree.2023-03-09.txt diff --git a/moalmanac/datasources/preclinical/README.md b/datasources/preclinical/README.md similarity index 100% rename from moalmanac/datasources/preclinical/README.md rename to datasources/preclinical/README.md diff --git a/moalmanac/datasources/preclinical/almanac-gdsc-mappings.json b/datasources/preclinical/almanac-gdsc-mappings.json similarity index 100% rename from moalmanac/datasources/preclinical/almanac-gdsc-mappings.json rename to datasources/preclinical/almanac-gdsc-mappings.json diff --git a/moalmanac/datasources/preclinical/annotated/README.md b/datasources/preclinical/annotated/README.md similarity index 100% rename from moalmanac/datasources/preclinical/annotated/README.md rename to datasources/preclinical/annotated/README.md diff --git a/moalmanac/datasources/preclinical/annotated/annotate-copy-numbers.py b/datasources/preclinical/annotated/annotate-copy-numbers.py similarity index 100% rename from moalmanac/datasources/preclinical/annotated/annotate-copy-numbers.py rename to datasources/preclinical/annotated/annotate-copy-numbers.py diff --git a/moalmanac/datasources/preclinical/annotated/annotate-fusions.py b/datasources/preclinical/annotated/annotate-fusions.py similarity index 100% rename from moalmanac/datasources/preclinical/annotated/annotate-fusions.py rename to datasources/preclinical/annotated/annotate-fusions.py diff --git a/moalmanac/datasources/preclinical/annotated/annotate-molecular-features.sh b/datasources/preclinical/annotated/annotate-molecular-features.sh similarity index 100% rename from moalmanac/datasources/preclinical/annotated/annotate-molecular-features.sh rename to datasources/preclinical/annotated/annotate-molecular-features.sh diff --git a/moalmanac/datasources/preclinical/annotated/annotate-variants.py b/datasources/preclinical/annotated/annotate-variants.py similarity index 100% rename from moalmanac/datasources/preclinical/annotated/annotate-variants.py rename to datasources/preclinical/annotated/annotate-variants.py diff --git a/moalmanac/datasources/preclinical/annotated/cell-lines.copy-numbers.annotated.txt b/datasources/preclinical/annotated/cell-lines.copy-numbers.annotated.txt similarity index 100% rename from moalmanac/datasources/preclinical/annotated/cell-lines.copy-numbers.annotated.txt rename to datasources/preclinical/annotated/cell-lines.copy-numbers.annotated.txt diff --git a/moalmanac/datasources/preclinical/annotated/cell-lines.fusions.annotated.gene1.txt b/datasources/preclinical/annotated/cell-lines.fusions.annotated.gene1.txt similarity index 100% rename from moalmanac/datasources/preclinical/annotated/cell-lines.fusions.annotated.gene1.txt rename to datasources/preclinical/annotated/cell-lines.fusions.annotated.gene1.txt diff --git a/moalmanac/datasources/preclinical/annotated/cell-lines.fusions.annotated.gene2.txt b/datasources/preclinical/annotated/cell-lines.fusions.annotated.gene2.txt similarity index 100% rename from moalmanac/datasources/preclinical/annotated/cell-lines.fusions.annotated.gene2.txt rename to datasources/preclinical/annotated/cell-lines.fusions.annotated.gene2.txt diff --git a/moalmanac/datasources/preclinical/annotated/cell-lines.fusions.annotated.txt b/datasources/preclinical/annotated/cell-lines.fusions.annotated.txt similarity index 100% rename from moalmanac/datasources/preclinical/annotated/cell-lines.fusions.annotated.txt rename to datasources/preclinical/annotated/cell-lines.fusions.annotated.txt diff --git a/moalmanac/datasources/preclinical/annotated/cell-lines.somatic-variants.annotated.txt b/datasources/preclinical/annotated/cell-lines.somatic-variants.annotated.txt similarity index 100% rename from moalmanac/datasources/preclinical/annotated/cell-lines.somatic-variants.annotated.txt rename to datasources/preclinical/annotated/cell-lines.somatic-variants.annotated.txt diff --git a/moalmanac/datasources/preclinical/cell-lines.pkl b/datasources/preclinical/cell-lines.pkl similarity index 100% rename from moalmanac/datasources/preclinical/cell-lines.pkl rename to datasources/preclinical/cell-lines.pkl diff --git a/moalmanac/datasources/preclinical/formatted/0.map-almanac-to-gdsc.ipynb b/datasources/preclinical/formatted/0.map-almanac-to-gdsc.ipynb similarity index 100% rename from moalmanac/datasources/preclinical/formatted/0.map-almanac-to-gdsc.ipynb rename to datasources/preclinical/formatted/0.map-almanac-to-gdsc.ipynb diff --git a/moalmanac/datasources/preclinical/formatted/1.process-cell-line-molecular-features.ipynb b/datasources/preclinical/formatted/1.process-cell-line-molecular-features.ipynb similarity index 100% rename from moalmanac/datasources/preclinical/formatted/1.process-cell-line-molecular-features.ipynb rename to datasources/preclinical/formatted/1.process-cell-line-molecular-features.ipynb diff --git a/moalmanac/datasources/preclinical/formatted/README.md b/datasources/preclinical/formatted/README.md similarity index 100% rename from moalmanac/datasources/preclinical/formatted/README.md rename to datasources/preclinical/formatted/README.md diff --git a/moalmanac/datasources/preclinical/formatted/almanac-gdsc-mappings.json b/datasources/preclinical/formatted/almanac-gdsc-mappings.json similarity index 100% rename from moalmanac/datasources/preclinical/formatted/almanac-gdsc-mappings.json rename to datasources/preclinical/formatted/almanac-gdsc-mappings.json diff --git a/moalmanac/datasources/preclinical/formatted/cell-line-names.formatted.txt b/datasources/preclinical/formatted/cell-line-names.formatted.txt similarity index 100% rename from moalmanac/datasources/preclinical/formatted/cell-line-names.formatted.txt rename to datasources/preclinical/formatted/cell-line-names.formatted.txt diff --git a/moalmanac/datasources/preclinical/formatted/cell-lines.copy-numbers.txt b/datasources/preclinical/formatted/cell-lines.copy-numbers.txt similarity index 100% rename from moalmanac/datasources/preclinical/formatted/cell-lines.copy-numbers.txt rename to datasources/preclinical/formatted/cell-lines.copy-numbers.txt diff --git a/moalmanac/datasources/preclinical/formatted/cell-lines.fusions.txt b/datasources/preclinical/formatted/cell-lines.fusions.txt similarity index 100% rename from moalmanac/datasources/preclinical/formatted/cell-lines.fusions.txt rename to datasources/preclinical/formatted/cell-lines.fusions.txt diff --git a/moalmanac/datasources/preclinical/formatted/cell-lines.somatic-variants.txt b/datasources/preclinical/formatted/cell-lines.somatic-variants.txt similarity index 100% rename from moalmanac/datasources/preclinical/formatted/cell-lines.somatic-variants.txt rename to datasources/preclinical/formatted/cell-lines.somatic-variants.txt diff --git a/moalmanac/datasources/preclinical/formatted/cell-lines.summary.txt b/datasources/preclinical/formatted/cell-lines.summary.txt similarity index 100% rename from moalmanac/datasources/preclinical/formatted/cell-lines.summary.txt rename to datasources/preclinical/formatted/cell-lines.summary.txt diff --git a/moalmanac/datasources/preclinical/formatted/sanger.gdsc.txt b/datasources/preclinical/formatted/sanger.gdsc.txt similarity index 100% rename from moalmanac/datasources/preclinical/formatted/sanger.gdsc.txt rename to datasources/preclinical/formatted/sanger.gdsc.txt diff --git a/moalmanac/datasources/preclinical/generate-dictionary.ipynb b/datasources/preclinical/generate-dictionary.ipynb similarity index 100% rename from moalmanac/datasources/preclinical/generate-dictionary.ipynb rename to datasources/preclinical/generate-dictionary.ipynb diff --git a/moalmanac/datasources/preclinical/source/README.md b/datasources/preclinical/source/README.md similarity index 100% rename from moalmanac/datasources/preclinical/source/README.md rename to datasources/preclinical/source/README.md diff --git a/moalmanac/datasources/preclinical/source/ccle-2019/README.md b/datasources/preclinical/source/ccle-2019/README.md similarity index 100% rename from moalmanac/datasources/preclinical/source/ccle-2019/README.md rename to datasources/preclinical/source/ccle-2019/README.md diff --git a/moalmanac/datasources/preclinical/source/depmap/README.md b/datasources/preclinical/source/depmap/README.md similarity index 100% rename from moalmanac/datasources/preclinical/source/depmap/README.md rename to datasources/preclinical/source/depmap/README.md diff --git a/moalmanac/datasources/preclinical/source/gdsc/README.md b/datasources/preclinical/source/gdsc/README.md similarity index 100% rename from moalmanac/datasources/preclinical/source/gdsc/README.md rename to datasources/preclinical/source/gdsc/README.md From abdd8a0f0aa838c465391b6234c69270a9b4efce Mon Sep 17 00:00:00 2001 From: Brendan Reardon Date: Tue, 9 Jul 2024 23:00:35 -0400 Subject: [PATCH 09/19] removed --- datasources/oncotree/get-oncotree.ipynb | 76 ------------------------- 1 file changed, 76 deletions(-) delete mode 100644 datasources/oncotree/get-oncotree.ipynb diff --git a/datasources/oncotree/get-oncotree.ipynb b/datasources/oncotree/get-oncotree.ipynb deleted file mode 100644 index 188c6fb..0000000 --- a/datasources/oncotree/get-oncotree.ipynb +++ /dev/null @@ -1,76 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "15aef7c6", - "metadata": {}, - "outputs": [], - "source": [ - "import requests\n", - "import pandas" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "9006f380", - "metadata": {}, - "outputs": [], - "source": [ - "request = 'http://oncotree.mskcc.org/api/tumorTypes/tree?version=oncotree_latest_stable'\n", - "r = requests.get(request)" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "875a3e89", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "dict_keys(['OVARY', 'LYMPH', 'SOFT_TISSUE', 'THYROID', 'PLEURA', 'PANCREAS', 'BILIARY_TRACT', 'BREAST', 'HEAD_NECK', 'EYE', 'AMPULLA_OF_VATER', 'THYMUS', 'CERVIX', 'PNS', 'BOWEL', 'BONE', 'SKIN', 'BLADDER', 'BRAIN', 'ADRENAL_GLAND', 'PROSTATE', 'LUNG', 'PENIS', 'UTERUS', 'OTHER', 'TESTIS', 'LIVER', 'PERITONEUM', 'MYELOID', 'VULVA', 'KIDNEY', 'STOMACH'])" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "r.json()['TISSUE']['children'].keys()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0c4d27c3", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "moalmanac", - "language": "python", - "name": "moalmanac" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From d412bdab1095d2a6c57f622f46f445b5c002a444 Mon Sep 17 00:00:00 2001 From: Brendan Reardon Date: Tue, 9 Jul 2024 23:01:03 -0400 Subject: [PATCH 10/19] Removed deconstructSigs R wrapper --- moalmanac/run_deconstructsigs.R | 41 --------------------------------- 1 file changed, 41 deletions(-) delete mode 100644 moalmanac/run_deconstructsigs.R diff --git a/moalmanac/run_deconstructsigs.R b/moalmanac/run_deconstructsigs.R deleted file mode 100644 index 89900a3..0000000 --- a/moalmanac/run_deconstructsigs.R +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/Rscript -library("deconstructSigs") - -args = commandArgs(trailingOnly=TRUE) -patient_id = args[1] -snv_handle = args[2] -sample = args[3] -ref = args[4] -alt = args[5] -chr = args[6] -pos = args[7] -folder = args[8] - -maf = read.csv(snv_handle, sep = '\t', comment.char = '#') -names(maf) <- tolower(names(maf)) -cols = c(sample, ref, alt, chr, pos) -maf <- maf[colnames(maf) %in% cols] - -maf$tumor_sample_barcode <- sapply(maf$tumor_sample_barcode, as.factor) -maf$reference_allele <- sapply(maf$reference_allele, as.factor) -maf$tumor_seq_allele2 <- sapply(maf$tumor_seq_allele2, as.factor) -maf$chromosome <- sapply(maf$chromosome, as.factor) - -unique.samples = unique(maf$tumor_sample_barcode) - -sigs.input <- mut.to.sigs.input(mut.ref = maf, - sample.id = sample, chr = chr, - pos = pos, ref = ref, - alt = alt) - -temp.filename <- paste(folder, patient_id, ".sigs.context.txt", sep = "") -write.table(sigs.input, file = temp.filename, sep = '\t', row.names = FALSE) - -for (sample_ in unique.samples) { - output.sigs <- whichSignatures(tumor.ref = sigs.input, - signatures.ref = signatures.cosmic, sample.id = sample_, - context = TRUE, tri.counts.method = 'default') - - temp.filename = paste(folder, patient_id, ".sigs.cosmic.txt", sep = "") - write.table(output.sigs, file = temp.filename, sep = '\t', row.names = FALSE) -} From 64a22a74f6f5f0c3c2419dcf23287334d74307e0 Mon Sep 17 00:00:00 2001 From: Brendan Reardon Date: Tue, 9 Jul 2024 23:01:19 -0400 Subject: [PATCH 11/19] Removed deconstructSigs wrapper --- moalmanac/wrapper_deconstructsigs.sh | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100644 moalmanac/wrapper_deconstructsigs.sh diff --git a/moalmanac/wrapper_deconstructsigs.sh b/moalmanac/wrapper_deconstructsigs.sh deleted file mode 100644 index 1b6307c..0000000 --- a/moalmanac/wrapper_deconstructsigs.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -patient_id=$1 -snv_handle=$2 -sample=$3 -ref=$4 -alt=$5 -chr=$6 -pos=$7 -folder=$8 - -Rscript run_deconstructsigs.R ${patient_id} ${snv_handle} ${sample} ${ref} ${alt} ${chr} ${pos} ${folder} From b92c27b5d21ff3f52ba05b5064923d7f0de9fba4 Mon Sep 17 00:00:00 2001 From: Brendan Reardon Date: Tue, 9 Jul 2024 23:24:44 -0400 Subject: [PATCH 12/19] Updated documentation --- moalmanac/README.md | 30 ++++-- moalmanac/moalmanac.py | 4 +- moalmanac/simplified_input.py | 169 +++++++++++++++++++++++----------- 3 files changed, 139 insertions(+), 64 deletions(-) diff --git a/moalmanac/README.md b/moalmanac/README.md index a65e169..72d6cd1 100644 --- a/moalmanac/README.md +++ b/moalmanac/README.md @@ -2,11 +2,13 @@ Molecular Oncology Almanac can be run by executing either `moalmanac.py` with [standard input formats](#standard-usage) or `simplified_input.py` with [simplified inputs](#simplified-input). Please follow the [installation instructions](../README.md#installation) before use. ## Standard usage -Molecular Oncology Almanac may be executed on any combination of input data but does require a patient_id to label output files. Additional settings can be set by modifying the [config.ini](#configini) file and column names may be modified by editing the [colnames.ini](#colnamesini) file. +Molecular Oncology Almanac may be executed on any combination of input data but does require a patient_id to label output files. Additional settings can be set by modifying the [config.ini](#configini) file and column names may be modified by editing the [colnames.ini](#colnamesini) file. The [config.ini](config.ini) and [annotation-databases.ini](annotation-databases.ini) must be passed as arguments to moalmanac.py. Required arguments: ``` --patient_id patient identifier + --config ini file that contains configuration details, config.ini + --dbs ini file that contains database paths for annotation, annotation-databases.ini ``` Optional arguments: @@ -28,6 +30,7 @@ Optional arguments: --disable_matchmaking remove patient-to-cell line matchmaking from report --description description of patient --output-directory specify location of produced outputs + --preclinical-dbs path to preclinical-databases.ini file ``` Example: @@ -46,8 +49,11 @@ python moalmanac.py \ --validation_handle "../example_data/example_patient.rna.somatic.snvs.maf" \ --purity 0.85 \ --ploidy 4.02 \ - --ms_status "msih" - --wgd + --ms_status "msih" \ + --wgd \ + --config config.ini \ + --dbs annotation-databases.ini \ + --preclinical-dbs preclinical-databases.ini ``` These example inputs may also be processed by executing `run_example.py`. @@ -85,6 +91,8 @@ This is also described in the [description of inputs](../docs/description-of-inp Required arguments: ``` --patient_id patient identifier + --config ini file that contains configuration details, config.ini + --dbs ini file that contains database paths for annotation, annotation-databases.ini ``` Optional arguments: @@ -98,6 +106,7 @@ Optional arguments: --wgd specify the occurence of whole genome duplication --description description of patient --output-directory specify location of produced outputs + --preclinical-dbs path to preclinical-databases.ini file ``` Example: @@ -110,8 +119,11 @@ python simplified_input.py \ --input "../example_data/example_patient.simplified_input.txt" \ --purity 0.85 \ --ploidy 4.02 \ - --ms_status "msih" - --wgd + --ms_status "msih" \ + --wgd \ + --config config.ini \ + --dbs annotation-databases.ini \ + --preclinical-dbs preclinical-databases.ini ``` ## Configuration @@ -128,8 +140,6 @@ The configuration file [config.ini](config.ini) lets users change settings, thre - `signatures` allows users to specify the minimum required contribution to consider mutational signatures - `validation_sequencing` allows users to specify minimum power and allelic fraction to consider for variants from validation sequencing - `feature_types` allows users to specify strings for considered feature types -- `databases` specifies file paths for databases used for annotation, found in the `moalmanac/databases/` folder -- `preclinical` specifies file paths for datasources used for preclinical functions, [model_similarity](../docs/description-of-outputs.md#profile-to-cell-line-matchmaking) and [preclinical efficacy](../docs/description-of-outputs.md#preclinical-efficacy) ### colnames.ini The configuration file [colnames.ini](colnames.ini) lets users change strings associated with column names for input and output files. The file contains the following relevant sections, @@ -137,6 +147,12 @@ The configuration file [colnames.ini](colnames.ini) lets users change strings as Other sections in this configuration file are used internally to MOAlmanac for processing. +### annotation-databases.ini +The configuration file [annotation-databases.ini](annotation-databases.ini) lets users change paths to datasources being used to annotate genomic variants within the algorithm. This file contains a single `databases` section, with either a relative or absolute path being set to the `root` variable. By default, this points to the [datasources/](../datasources/) folder in the root directory of this repository. + +### preclinical-databases.ini +Similar to `annotation-databases.ini`, the configuration file [preclinical-databases.ini](preclinical-databases.ini) lets users change paths to datasources being used to for preclinical comparison functions, [model_similarity](../docs/description-of-outputs.md#profile-to-cell-line-matchmaking) and [preclinical efficacy](../docs/description-of-outputs.md#preclinical-efficacy). + ## Citation If you find this tool or any code herein useful, please cite: > [Reardon, B., Moore, N.D., Moore, N.S., *et al*. Integrating molecular profiles into clinical frameworks through the Molecular Oncology Almanac to prospectively guide precision oncology. *Nat Cancer* (2021). https://doi.org/10.1038/s43018-021-00243-3](https://www.nature.com/articles/s43018-021-00243-3) diff --git a/moalmanac/moalmanac.py b/moalmanac/moalmanac.py index 05ff8c3..1b5572a 100644 --- a/moalmanac/moalmanac.py +++ b/moalmanac/moalmanac.py @@ -389,12 +389,12 @@ def main(patient, inputs, output_folder, config, dbs, dbs_preclinical=None): arg_parser.add_argument( '--config', '-c', required=True, - help='ini file that contains configuration details ' + help='ini file that contains configuration details' ) arg_parser.add_argument( '--dbs', required=True, - help='ini file that contains database paths ' + help='ini file that contains database paths' ) arg_parser.add_argument( '--preclinical-dbs', diff --git a/moalmanac/simplified_input.py b/moalmanac/simplified_input.py index 5bdfc09..51d57a8 100644 --- a/moalmanac/simplified_input.py +++ b/moalmanac/simplified_input.py @@ -17,7 +17,7 @@ import writer from config import COLNAMES -from config import CONFIG +from reader import Ini snv_handle = 'snv_handle' indel_handle = 'indel_handle' @@ -84,34 +84,41 @@ def subset_by_feature_type(dataframe): return somatic, germline -def main(patient, input_file, output_folder): - dbs = datasources.Datasources.generate_db_dict(CONFIG) +def main(patient, input_file, output_folder, config, dbs, dbs_preclinical=None): + metadata_dictionary = moalmanac.create_metadata_dictionary(patient) + output_folder = moalmanac.format_output_directory(output_folder) if output_folder != "": moalmanac.execute_cmd(f"mkdir -p {output_folder}") - string_id = patient[patient_id] + string_id = metadata_dictionary[patient_id] - mapped_ontology = ontologymapper.OntologyMapper.map(dbs, patient[tumor_type]) - patient[ontology] = mapped_ontology[ontology] - patient[code] = mapped_ontology[code] + mapped_ontology = ontologymapper.OntologyMapper.map(dbs, metadata_dictionary[tumor_type]) + metadata_dictionary[ontology] = mapped_ontology[ontology] + metadata_dictionary[code] = mapped_ontology[code] alterations = features.Simple.import_feature(input_file) - annotated_alterations = annotator.Annotator.annotate_simple(alterations, dbs, patient[code]) + annotated_alterations = annotator.Annotator.annotate_simple(alterations, dbs, patient[code], config=config) evaluated_alterations = evaluator.Evaluator.evaluate_somatic(annotated_alterations) evaluated_somatic, evaluated_germline = subset_by_feature_type(evaluated_alterations) evaluated_somatic = annotator.OverlapSomaticGermline.append_germline_hits(evaluated_somatic, evaluated_germline) integrated = evaluator.Integrative.evaluate(evaluated_somatic, evaluated_germline, dbs, feature_types) - somatic_burden = features.BurdenReader.import_feature('', patient, evaluated_somatic, dbs) - patient_wgd = features.Aneuploidy.summarize(patient[wgd]) - patient_ms_status = features.MicrosatelliteReader.summarize(patient[ms_status]) + somatic_burden = features.BurdenReader.import_feature( + handle='', + patient=metadata_dictionary, + variants=evaluated_somatic, + dbs=dbs, + config=config + ) + patient_wgd = features.Aneuploidy.summarize(patient[wgd], config=config) + patient_ms_status = features.MicrosatelliteReader.summarize(patient[ms_status], config=config) patient[ms_status] = features.MicrosatelliteReader.map_status(patient[ms_status]) - annotated_burden = annotator.Annotator.annotate_almanac(somatic_burden, dbs, patient[code]) - annotated_wgd = annotator.Annotator.annotate_almanac(patient_wgd, dbs, patient[code]) - annotated_ms_status = annotator.Annotator.annotate_almanac(patient_ms_status, dbs, patient[code]) + annotated_burden = annotator.Annotator.annotate_almanac(somatic_burden, dbs, patient[code], config=config) + annotated_wgd = annotator.Annotator.annotate_almanac(patient_wgd, dbs, patient[code], config=config) + annotated_ms_status = annotator.Annotator.annotate_almanac(patient_ms_status, dbs, patient[code], config=config) evaluated_burden = evaluator.Evaluator.evaluate_almanac(annotated_burden) evaluated_wgd = evaluator.Evaluator.evaluate_almanac(annotated_wgd) @@ -125,14 +132,14 @@ def main(patient, input_file, output_folder): ms_status=evaluated_ms_status, burden=evaluated_burden, signatures=features.Features.create_empty_dataframe(), - wgd=evaluated_wgd + wgd=evaluated_wgd, + config=config ) strategies = evaluator.Strategies.report_therapy_strategies(actionable) + function_toggle = config['function_toggle'] efficacy_summary = investigator.SummaryDataFrame.create_empty_dataframe() - efficacy_dictionary = {} - cell_lines_dictionary = {} preclinical_efficacy_on = TOGGLE_FEATURES.getboolean('calculate_preclinical_efficacy') # The input argument --disable_matchmaking will be removed in the next non-backwards compatible release @@ -146,10 +153,11 @@ def main(patient, input_file, output_folder): if preclinical_efficacy_on: plot_preclinical = TOGGLE_FEATURES.getboolean('plot_preclinical_efficacy') efficacy_results = moalmanac.process_preclinical_efficacy( - dbs_preclinical, - actionable, - output_folder, - string_id, + dbs=dbs_preclinical, + dataframe=actionable, + folder=output_folder, + label=string_id, + config=config, plot=plot_preclinical ) efficacy_dictionary = efficacy_results[0] @@ -162,8 +170,17 @@ def main(patient, input_file, output_folder): append_lookup=TOGGLE_FEATURES.getboolean('include_preclinical_efficacy_in_actionability_report') ) if model_similarity_on: - similarity_results = matchmaker.Matchmaker.compare(dbs, dbs_preclinical, evaluated_somatic, string_id) - similarity_summary = matchmaker.Report.create_report_dictionary(similarity_results, cell_lines_dictionary) + similarity_results = matchmaker.Matchmaker.compare( + dbs=dbs, + dbs_preclinical=dbs_preclinical, + somatic=evaluated_somatic, + case_sample_id=string_id, + config=config + ) + similarity_summary = matchmaker.Report.create_report_dictionary( + similarity_results, + cell_lines_dictionary + ) writer.Actionable.write(actionable, string_id, output_folder) writer.GermlineACMG.write(evaluated_germline, string_id, output_folder) @@ -177,13 +194,14 @@ def main(patient, input_file, output_folder): writer.PreclinicalEfficacy.write(efficacy_summary, string_id, output_folder) writer.PreclinicalMatchmaking.write(similarity_results, string_id, output_folder) - if TOGGLE_FEATURES.getboolean('generate_actionability_report'): - report_dictionary = reporter.Reporter.generate_dictionary(evaluated_somatic, patient) + if function_toggle.getboolean('generate_actionability_report'): + report_dictionary = reporter.Reporter.generate_dictionary(evaluated_somatic, metadata_dictionary) - include_similarity = TOGGLE_FEATURES.getboolean('include_model_similarity_in_actionability_report') + include_similarity = function_toggle.getboolean('include_model_similarity_in_actionability_report') reporter.Reporter.generate_actionability_report( - actionable, - report_dictionary, + actionable=actionable, + report_dictionary=report_dictionary, + config=config, similarity=similarity_summary if include_similarity else None, output_directory=output_folder ) @@ -194,33 +212,66 @@ def main(patient, input_file, output_folder): arg_parser = argparse.ArgumentParser(prog='Molecular Oncology Almanac using simplified input', description='Annotates only using the Molecular Oncology Almanac database') - arg_parser.add_argument('--patient_id', - help='patient id label', - required=True) - arg_parser.add_argument('--stage', - default='Unknown', - help='disease stage') - arg_parser.add_argument('--tumor_type', - default='Unknown', - help='reported tumor type') - arg_parser.add_argument('--input', - help='Tab delimited file of observed alterations') - arg_parser.add_argument('--ms_status', - default='unk', - choices=['msih', 'msil', 'mss', 'unk'], - help='microsatellite instability status') - arg_parser.add_argument('--purity', - default='Unknown', - help='Tumor purity') - arg_parser.add_argument('--ploidy', - default='Unknown', - help='Tumor ploidy') - arg_parser.add_argument('--wgd', - action='store_true', - help='Specify the occurrence of whole genome duplication') - arg_parser.add_argument('--output_directory', - default=None, - help='Output directory for generated files') + arg_parser.add_argument( + '--patient_id', + help='patient id label', + required=True + ) + arg_parser.add_argument( + '--stage', + default='Unknown', + help='disease stage' + ) + arg_parser.add_argument( + '--tumor_type', + default='Unknown', + help='reported tumor type' + ) + arg_parser.add_argument( + '--input', + help='Tab delimited file of observed alterations' + ) + arg_parser.add_argument( + '--ms_status', + default='unk', + choices=['msih', 'msil', 'mss', 'unk'], + help='microsatellite instability status' + ) + arg_parser.add_argument( + '--purity', + default='Unknown', + help='Tumor purity' + ) + arg_parser.add_argument( + '--ploidy', + default='Unknown', + help='Tumor ploidy' + ) + arg_parser.add_argument( + '--wgd', + action='store_true', + help='Specify the occurrence of whole genome duplication' + ) + arg_parser.add_argument( + '--output_directory', + default=None, + help='Output directory for generated files' + ) + arg_parser.add_argument( + '--config', '-c', + required=True, + help='ini file that contains configuration details' + ) + arg_parser.add_argument( + '--dbs', + required=True, + help='ini file that contains database paths ' + ) + arg_parser.add_argument( + '--preclinical-dbs', + required=False, + help='ini file that contains preclinical file paths' + ) args = arg_parser.parse_args() patient_dict = { @@ -236,6 +287,14 @@ def main(patient, input_file, output_folder): output_directory = args.output_directory if args.output_directory else os.getcwd() + config_ini = Ini.read(args.config, extended_interpolation=False, convert_to_dictionary=False) + + db_paths = Ini.read(args.dbs, extended_interpolation=False, convert_to_dictionary=True) + if args.preclinical_dbs: + preclinical_db_paths = Ini.read(args.preclinical_dbs, extended_interpolation=False, convert_to_dictionary=True) + else: + preclinical_db_paths = None + main(patient_dict, args.input, output_directory) end_time = time.time() From 7a34c9d03096ef847d8d564834c4a10f54754e77 Mon Sep 17 00:00:00 2001 From: Brendan Reardon Date: Tue, 9 Jul 2024 23:35:10 -0400 Subject: [PATCH 13/19] Increased minor version of algorithm --- moalmanac/config.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/moalmanac/config.ini b/moalmanac/config.ini index 9e21e3c..6a38ea3 100644 --- a/moalmanac/config.ini +++ b/moalmanac/config.ini @@ -8,7 +8,7 @@ include_preclinical_efficacy_in_actionability_report = on plot_preclinical_efficacy = on [versions] -interpreter = 0.6.0 +interpreter = 0.7.0 database = v.2024-04-11 [exac] From bd7d377245dba455abaf3d66f084d783931529e1 Mon Sep 17 00:00:00 2001 From: Brendan Reardon Date: Tue, 9 Jul 2024 23:42:59 -0400 Subject: [PATCH 14/19] Removed commented out code --- moalmanac/evaluator.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/moalmanac/evaluator.py b/moalmanac/evaluator.py index 3031baf..d684c89 100644 --- a/moalmanac/evaluator.py +++ b/moalmanac/evaluator.py @@ -246,9 +246,6 @@ def evaluate(cls, somatic, germline, ms_variants, ms_status, burden, signatures, df = features.Features.concat_list_of_dataframes(list_of_dataframes=actionable_list) df[Evaluator.feature_display] = cls.format_feature_display(df=df, config=config) - # df, Evaluator.feature_display, - # Evaluator.feature_type, Evaluator.feature, - # Evaluator.alt_type, Evaluator.alt) return df.sort_values(cls.sort_columns, ascending=False) @classmethod @@ -256,9 +253,6 @@ def format_feature_display(cls, df, config): display_column = Evaluator.feature_display feature_type_column = Evaluator.feature_type feature_column = Evaluator.feature - # alt_type_column = Evaluator.alt_type - # alt_column = Evaluator.alt - #sig_version = config['signatures']['version'] biomarker_types = config['feature_types'] idx_somatic = df[feature_type_column].isin([biomarker_types['mut']]) From 40bcbb77570a8a76847740bbe4fb9bd028d7667f Mon Sep 17 00:00:00 2001 From: Brendan Reardon Date: Tue, 9 Jul 2024 23:48:25 -0400 Subject: [PATCH 15/19] Removed additional commented code --- moalmanac/reporter.py | 1 - moalmanac/test/annotator_tests.py | 2 -- 2 files changed, 3 deletions(-) diff --git a/moalmanac/reporter.py b/moalmanac/reporter.py index e0c0ef8..21f8828 100644 --- a/moalmanac/reporter.py +++ b/moalmanac/reporter.py @@ -5,7 +5,6 @@ import os from config import COLNAMES -#from config import CONFIG class Reporter: diff --git a/moalmanac/test/annotator_tests.py b/moalmanac/test/annotator_tests.py index 399a806..5a43b13 100644 --- a/moalmanac/test/annotator_tests.py +++ b/moalmanac/test/annotator_tests.py @@ -9,8 +9,6 @@ from datasources import Preclinical as datasources_Preclinical from features import Features from investigator import SensitivityDictionary -# from reader import Ini -# from config import CONFIG class UnitTestAnnotator(unittest.TestCase): From d69eaa6d2fe9874616a8f489c9f33b91f73ad26a Mon Sep 17 00:00:00 2001 From: Brendan Reardon Date: Wed, 10 Jul 2024 00:02:19 -0400 Subject: [PATCH 16/19] Removed unused ini reader --- moalmanac/reader.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/moalmanac/reader.py b/moalmanac/reader.py index ce61219..74ff8df 100644 --- a/moalmanac/reader.py +++ b/moalmanac/reader.py @@ -5,14 +5,6 @@ class Ini: - @classmethod - def read(cls, path, extended_interpolation=False, convert_to_dictionary=False): - ini = cls.load(path, extended_interpolation=extended_interpolation) - if convert_to_dictionary: - return cls.convert_ini_to_dictionary(ini) - else: - return ini - @staticmethod def convert_ini_to_dictionary(ini): dictionary = {} From 6ec4477cffd3e33f218884b2f0b99ea679afb809 Mon Sep 17 00:00:00 2001 From: Brendan Reardon Date: Wed, 10 Jul 2024 00:03:00 -0400 Subject: [PATCH 17/19] just kidding --- moalmanac/reader.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/moalmanac/reader.py b/moalmanac/reader.py index 74ff8df..ce61219 100644 --- a/moalmanac/reader.py +++ b/moalmanac/reader.py @@ -5,6 +5,14 @@ class Ini: + @classmethod + def read(cls, path, extended_interpolation=False, convert_to_dictionary=False): + ini = cls.load(path, extended_interpolation=extended_interpolation) + if convert_to_dictionary: + return cls.convert_ini_to_dictionary(ini) + else: + return ini + @staticmethod def convert_ini_to_dictionary(ini): dictionary = {} From 3e400bfd450200ec8eb61bc84c24724a9317fe11 Mon Sep 17 00:00:00 2001 From: Brendan Reardon Date: Wed, 10 Jul 2024 00:07:36 -0400 Subject: [PATCH 18/19] Set extended interpolation to True for db loading --- moalmanac/moalmanac.py | 5 +++-- moalmanac/simplified_input.py | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/moalmanac/moalmanac.py b/moalmanac/moalmanac.py index 1b5572a..5470b66 100644 --- a/moalmanac/moalmanac.py +++ b/moalmanac/moalmanac.py @@ -431,12 +431,13 @@ def main(patient, inputs, output_folder, config, dbs, dbs_preclinical=None): config_ini = Ini.read(args.config, extended_interpolation=False, convert_to_dictionary=False) - db_paths = Ini.read(args.dbs, extended_interpolation=False, convert_to_dictionary=True) + db_paths = Ini.read(args.dbs, extended_interpolation=True, convert_to_dictionary=True) if args.preclinical_dbs: - preclinical_db_paths = Ini.read(args.preclinical_dbs, extended_interpolation=False, convert_to_dictionary=True) + preclinical_db_paths = Ini.read(args.preclinical_dbs, extended_interpolation=True, convert_to_dictionary=True) else: preclinical_db_paths = None + print(db_paths) main( patient=patient_dict, inputs=inputs_dict, diff --git a/moalmanac/simplified_input.py b/moalmanac/simplified_input.py index 51d57a8..7264c20 100644 --- a/moalmanac/simplified_input.py +++ b/moalmanac/simplified_input.py @@ -289,9 +289,9 @@ def main(patient, input_file, output_folder, config, dbs, dbs_preclinical=None): config_ini = Ini.read(args.config, extended_interpolation=False, convert_to_dictionary=False) - db_paths = Ini.read(args.dbs, extended_interpolation=False, convert_to_dictionary=True) + db_paths = Ini.read(args.dbs, extended_interpolation=True, convert_to_dictionary=True) if args.preclinical_dbs: - preclinical_db_paths = Ini.read(args.preclinical_dbs, extended_interpolation=False, convert_to_dictionary=True) + preclinical_db_paths = Ini.read(args.preclinical_dbs, extended_interpolation=True, convert_to_dictionary=True) else: preclinical_db_paths = None From 5e5d773288e4714937e33ec520b78be34bfd2828 Mon Sep 17 00:00:00 2001 From: Brendan Reardon Date: Thu, 11 Jul 2024 10:14:02 -0400 Subject: [PATCH 19/19] revised input args for simplified input --- moalmanac/simplified_input.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/moalmanac/simplified_input.py b/moalmanac/simplified_input.py index 7264c20..ba87257 100644 --- a/moalmanac/simplified_input.py +++ b/moalmanac/simplified_input.py @@ -295,7 +295,14 @@ def main(patient, input_file, output_folder, config, dbs, dbs_preclinical=None): else: preclinical_db_paths = None - main(patient_dict, args.input, output_directory) + main( + patient=patient_dict, + input_file=args.input, + output_folder=output_directory, + config=config_ini, + dbs=db_paths, + dbs_preclinical=preclinical_db_paths + ) end_time = time.time() time_statement = "Molecular Oncology Almanac runtime: %s seconds" % round((end_time - start_time), 4)