Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
1. Outliers detection and batch detection & correction are shown in <a href="https://nbviewer.jupyter.org/github/BostonGene/MFP/blob/master/Methods_Description_-_Batch_correction.ipynb">Methods_Description_-_Batch_correction.ipynb</a> (Melanoma data)
1. Clusters generation from scaled signatures is shown in <a href="clustering_example.py">clustering_example.py</a>. The process is the same for melanoma and pancan analysis
1. Finally, having a dataset with scaled signatures and known clusters we can classify another datasets using <a href="classification_example.py">classification_example.py</a>
1. A walkthrough of the clustering and classification (labeling to IE IE/F F or D) is illustarted in an illustrated python notebook.


_.ipynb files could by opened at https://nbviewer.jupyter.org/ or downloaded as HTML files from upstream_html folder_
Expand All @@ -30,4 +31,4 @@ The Molecular Functional (MF) Portrait is a planetary schematic representation o

Visual tool available at https://science.bostongene.com/tumor-portrait/<br>

© 2020 BostonGene Corporation.
© 2020 BostonGene Corporation.
48 changes: 48 additions & 0 deletions final_clusters.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
MFP
GSM478912 D
GSM478913 D
GSM478937 D
GSM478939 D
GSM478946 D
GSM478947 D
GSM478948 D
GSM478914 F
GSM478915 F
GSM478916 F
GSM478925 F
GSM478943 F
GSM478932 F
GSM478924 F
GSM478942 F
GSM478944 F
GSM478945 F
GSM478956 F
GSM478917 D
GSM478920 IE
GSM478949 D
GSM478963 D
GSM478918 IE
GSM478919 IE
GSM478926 IE
GSM478927 IE
GSM478952 IE
GSM478960 IE
GSM478922 IE/F
GSM478923 IE/F
GSM478961 IE/F
GSM478955 F
GSM478934 IE
GSM478935 IE
GSM478954 IE
GSM478958 IE
GSM478950 IE
GSM478928
GSM478929
GSM478930
GSM478959
GSM478933 IE
GSM478936 D
GSM478938 D
GSM478940 IE/F
GSM478941 F
GSM478951 IE
4 changes: 2 additions & 2 deletions portraits/clustering.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import warnings

import community # louvain
import community.community_louvain as community
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
Expand Down Expand Up @@ -122,7 +122,7 @@ def clustering_profile_metrics_plot(cluster_metrics, num_clusters_ylim_max=7):
clusters_perc = pd.DataFrame([x.value_counts() for x in cluster_metrics.perc],
index=cluster_metrics.index).iloc[:, :10]

clusters_perc.plot(kind='bar', stached=True, ax=next(af), offset=.5)
clusters_perc.plot(kind='bar', stacked=True, ax=next(af)) #offset=.5 was specified in the original code

ax.set_xticks(ax.get_xticks() - .5)
ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
Expand Down
33 changes: 33 additions & 0 deletions portraits/detect_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import pandas as pd
from portraits.clustering import clustering_profile_metrics, clustering_profile_metrics_plot
from portraits.utils import read_gene_sets, ssgsea_formula, median_scale


def detect_type(data, threshold, scores):
ser = data.loc[threshold].perc # here threshold and ser were added to the original code
cmeans = pd.DataFrame({cg: scores.loc[samps.index].mean() for cg, samps in ser.groupby(ser)})
mapper = {}
deltas = (cmeans.loc[['Angiogenesis', 'Endothelium', 'CAF', 'Matrix', 'Matrix_remodeling']].mean() -
cmeans.loc[['MHCII', 'Antitumor_cytokines', 'Coactivation_molecules',
'B_cells', 'NK_cells', 'Checkpoint_inhibition',
'Effector_cells', 'T_cells', 'Th1_signature',
'T_cell_traffic', 'MHCI']].mean()).sort_values()

mapper[deltas.index[-1]] = 'F' # That's fibrotic
mapper[deltas.index[0]] = 'IE' # Immune enriched, non-fibrotic
cmeans.pop(deltas.index[-1])
cmeans.pop(deltas.index[0])

deltas = (cmeans.loc[['Angiogenesis', 'Endothelium', 'CAF', 'Matrix', 'Matrix_remodeling',
'Protumor_cytokines', 'Neutrophil_signature', 'Granulocyte_traffic',
'Macrophages', 'Macrophage_DC_traffic', 'MDSC_traffic', 'MDSC',
'Th2_signature', 'T_reg_traffic', 'Treg', 'M1_signatures', 'MHCII',
'Antitumor_cytokines', 'Coactivation_molecules', 'B_cells', 'NK_cells',
'Checkpoint_inhibition', 'Effector_cells', 'T_cells', 'Th1_signature',
'T_cell_traffic', 'MHCI', 'EMT_signature']].mean() -
cmeans.loc['Proliferation_rate']).sort_values()

mapper[deltas.index[-1]] = 'IE/F' # Immune enriched & fibrotic
mapper[deltas.index[0]] = 'D' # Desert
return ser.map(mapper).rename('MFP')

Loading