Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions src/CalculateCoElutionScores.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def loadElutionData(self, elutionProfileF, frac_count = 2, max_count_cutoff=1):
pro_list.append(protID)
else:
removed += 1
print "finished processing %s\n removed %i (%.2f, total: %i, after filtering: %i) proteins found in less than %i fraction" % (elutionProfileF, removed, removed/(removed + len(prot2Index)), removed + len(prot2Index), len(prot2Index), frac_count)
print("finished processing %s\n removed %i (%.2f, total: %i, after filtering: %i) proteins found in less than %i fraction" % (elutionProfileF, removed, removed/(removed + len(prot2Index)), removed + len(prot2Index), len(prot2Index), frac_count))
elutionProfileFH.close()
elutionMat = np.nan_to_num(np.matrix(elutionMat))
return elutionMat, prot2Index
Expand Down Expand Up @@ -609,7 +609,7 @@ def load_genemania(self):
for fp in self.files: #for de-bugging, I only used the first three files
filename = str(fp.split('/')[-1])
if filename.startswith(f_evidence):
print "Processing: %s" % (filename)
print("Processing: %s" % (filename))
fh = urllib2.urlopen(fp)
fh.readline()
for line in fh:
Expand Down Expand Up @@ -1082,8 +1082,8 @@ def cv_eval(self, data, targets, folds= 5):
this_targets = []
i = 1
for train, test in skf.split(data, targets):
#print "Processing fold %i" % i
print "Processing data..."
#print("Processing fold %i" % i)
print("Processing data...")
i += 1
self.fit(data[train], targets[train])
probs.extend(self.predict_proba(data[test]))
Expand All @@ -1108,7 +1108,7 @@ class MLP_wrapper(object):


def __init__(self):
print "Using MLP with Keras/tensorflow"
print("Using MLP with Keras/tensorflow")
self.model = Sequential()

def fit(self, data, labels):
Expand All @@ -1133,12 +1133,12 @@ def predict(self, toPred):
class SAE_wrapper(MLP_wrapper):

def __init__(self):
print "Using stacked autoencoder"
print("Using stacked autoencoder")


def fit(self, data, labels):
print data.shape
print len(labels)
print(data.shape)
print(len(labels))
num_features = data.shape[1]
input = Input(shape=(num_features,))

Expand Down Expand Up @@ -1250,7 +1250,7 @@ def get_current_string_ver(self):
return response.getroot()[0][0].text
except urllib2.HTTPError as err:
error_message = err.read()
print error_message
print(error_message)
sys.exit()


Expand Down
62 changes: 31 additions & 31 deletions src/GoldStandard.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,19 +50,19 @@ def make_reference_data(self, db_clusters, orthmap="", found_prots=""):
self.complexes.addComplex("%i;%s;%s" % (i, db_clust.name, compl), tmp_clust.complexes[compl])
i += 1

print "Total number of complexes %i in %s" % (total_complexes, self.name)
print "Number of complexes after ortholog mapping %i complexes in %s" % (len(self.complexes.complexes), self.name)
print("Total number of complexes %i in %s" % (total_complexes, self.name))
print("Number of complexes after ortholog mapping %i complexes in %s" % (len(self.complexes.complexes), self.name))

if found_prots != "":
self.complexes.remove_proteins(found_prots)
print "After removing not indetified proteins %i number of complexes in % s" % (len(self.complexes.complexes), self.name)
print("After removing not indetified proteins %i number of complexes in % s" % (len(self.complexes.complexes), self.name))

self.complexes.filter_complexes()
print "After size filtering %i number of complexes in % s" % (len(self.complexes.complexes), self.name)
print("After size filtering %i number of complexes in % s" % (len(self.complexes.complexes), self.name))

self.complexes.merge_complexes()
self.complexes.filter_complexes()
print "After mergning %i number of complexes in % s" % (len(self.complexes.complexes), self.name)
print("After mergning %i number of complexes in % s" % (len(self.complexes.complexes), self.name))

self.make_pos_neg_ppis()

Expand Down Expand Up @@ -227,8 +227,8 @@ def n_fols_split(self, num_folds, number_pruning, overlap="False"):

round += 1

print "length of complex set one: " + str(len(itemindex_one))
print "length of complex set two: " + str(len(itemindex_zero))
print("length of complex set one: " + str(len(itemindex_one)))
print("length of complex set two: " + str(len(itemindex_zero)))

# randomize clusters
#rnd.shuffle(ref_cluster_ids)
Expand Down Expand Up @@ -272,16 +272,16 @@ def n_fols_split(self, num_folds, number_pruning, overlap="False"):
len_over_positive = len(train[0] & evaluate[0])
len_over_negative = len(train[1] & evaluate[1])

print len_train_positive
print len_eva_positive
print len_train_negative
print len_eva_negative
print(len_train_positive)
print(len_eva_positive)
print(len_train_negative)
print(len_eva_negative)


print "number of train and evaluation PPIs:"
print len_train_positive + len_train_negative
print "number of overlapped PPIs:"
print len_over_positive + len_over_negative
print("number of train and evaluation PPIs:")
print(len_train_positive + len_train_negative)
print("number of overlapped PPIs:")
print(len_over_positive + len_over_negative)

out_folds.append((training, evaluation))

Expand Down Expand Up @@ -346,12 +346,12 @@ def n_fols_split(self, num_folds, number_pruning, overlap="False"):
# len_over_negative = len(train[1] & evaluate[1])
#
#
# print len_train_positive
# print len_eva_positive
# print len_over_positive
# print len_train_negative
# print len_eva_negative
# print len_over_negative
# print(len_train_positive)
# print(len_eva_positive)
# print(len_over_positive)
# print(len_train_negative)
# print(len_eva_negative)
# print(len_over_negative)
# sys.exit()
#
# out_folds.append((training, evaluation))
Expand Down Expand Up @@ -476,9 +476,9 @@ def split_into_n_fold2(self, n_fold, val_ppis, no_overlapp=False): # what is va

training_evaluation_dictionary["turpleKey"].append((training, evaluation))

print "the number of training negatives and positives for corss validation "
print len(training.get_negative())
print len(training.get_positive())
print("the number of training negatives and positives for corss validation ")
print(len(training.get_negative()))
print(len(training.get_positive()))

return training_evaluation_dictionary

Expand Down Expand Up @@ -747,7 +747,7 @@ def read_file(self, clusterF):
all_proteins_count += len(prots)
clusterFH.close()

print "Average size of predicted complexes is: " + str((all_proteins_count)/i)
print("Average size of predicted complexes is: " + str((all_proteins_count)/i))

def write_cuslter_file(self, outF):
outFH = open(outF, "w")
Expand Down Expand Up @@ -1044,9 +1044,9 @@ def get_complexes(self):
# go_to_prot_map = {}
# prot_to_go_map = {}
# quickgoURL = "http://www.ebi.ac.uk/QuickGO-Old/GAnnotation?goid=GO:0043234&tax=%s&format=tsv&limit=1000000000&evidence=IDA,IPI,EXP," % (self.taxid)
# print quickgoURL
# print self.taxid
# print "the url is: ..."
# print(quickgoURL)
# print(self.taxid)
# print("the url is: ...")
# quickgoURL_FH = urllib2.urlopen(quickgoURL)
# quickgoURL_FH.readline()
# for line in quickgoURL_FH:
Expand Down Expand Up @@ -1094,7 +1094,7 @@ def __init__(self, taxid, inparanoid_cutoff=1, foundProts = set([])):
xmldoc = self.getXML()
self.orthmap, self.orthgroups = self.parseXML(xmldoc)
else:
print "Taxid:%s not supported" % taxid
print("Taxid:%s not supported" % taxid)

def mapProtein(self, prot):
if prot not in self.orthmap: return None
Expand Down Expand Up @@ -1127,7 +1127,7 @@ def mapComplexes(self, clusters):
if prot in self.orthmap:
mapped_members.add(self.orthmap[prot])
# else:
# print "No map for %s" % prot
# print("No map for %s" % prot)

if len(mapped_members)==0:
todel.add(clust)
Expand Down Expand Up @@ -1246,4 +1246,4 @@ def getids(ids_raw):
else:
outmap[protA] = protB
outgroups.append(orthgroup)
self.orthmap, self.orthgroups = outmap, outgroups
self.orthmap, self.orthgroups = outmap, outgroups
46 changes: 23 additions & 23 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,50 +82,50 @@ def main():
args.fun_anno_source = args.fun_anno_source.upper()

#Create feature combination
if args.feature_selection == "00000000":
print "Select at least one feature"
if args.feature_selection == "00000000":
print("Select at least one feature")
sys.exit()

this_scores = utils.get_fs_comb(args.feature_selection)
print "\t".join([fs.name for fs in this_scores])
print("\t".join([fs.name for fs in this_scores]))

# Initialize CLF
use_rf = args.classifier == "RF"
use_rf = args.classifier == "RF"
clf = CS.CLF_Wrapper(args.num_cores, use_rf)

# Load elution data
foundprots, elution_datas = utils.load_data(args.input_dir, this_scores, fc=args.frac_count, mfc=args.elution_max_count)
foundprots, elution_datas = utils.load_data(args.input_dir, this_scores, fc=args.frac_count, mfc=args.elution_max_count)

# Generate reference data set
gs = ""
if ((args.taxid != "" and args.ppi != "") or (args.cluster != "" and args.ppi != "" )):
print "Refernce from cluster and PPI are nor compatiple. Please supply ppi or complex reference, not both!"
print("Refernce from cluster and PPI are nor compatiple. Please supply ppi or complex reference, not both!")
sys.exit()

if args.taxid == "" and args.ppi == "" and args.cluster == "":
print "Please supply a reference by setting taxid, cluster, or ppi tag"
print("Please supply a reference by setting taxid, cluster, or ppi tag")
sys.exit()

gs_clusters = []
if (args.taxid != "" and args.cluster == "" and args.ppi == ""):
print "Loading clusters from GO, CORUM, and Intact"
print("Loading clusters from GO, CORUM, and Intact")
gs_clusters.extend(utils.get_reference_from_net(args.taxid))

if args.cluster != "":
print "Loading complexes from file"
print("Loading complexes from file")
if args.mode == "FA":
gs_clusters.append(GS.FileClusters(args.cluster, "all"))
else:
gs_clusters.append(GS.FileClusters(args.cluster, foundprots))

if args.ppi != "":
print "Reading PPI file from %s" % args.reference
print("Reading PPI file from %s" % args.reference)
gs = Goldstandard_from_PPI_File(args.ppi, foundprots)



print gs_clusters
if len(gs_clusters)>0:
print(gs_clusters)
if len(gs_clusters)>0:
gs = utils.create_goldstandard(gs_clusters, args.taxid, foundprots)

output_dir = args.output_dir + os.sep + args.output_prefix
Expand All @@ -139,26 +139,26 @@ def main():
if args.precalcualted_score_file == "NONE":
scoreCalc.calculate_coelutionDatas(gs)
else:
scoreCalc.readTable(args.precalcualted_score_file, gs)
scoreCalc.readTable(args.precalcualted_score_file, gs)

print scoreCalc.scores.shape
print(scoreCalc.scores.shape)

functionalData = ""
gs.positive = set(gs.positive & set(scoreCalc.ppiToIndex.keys()))
gs.negative = set(gs.negative & set(scoreCalc.ppiToIndex.keys()))
gs.rebalance()

print len(gs.positive)
print len(gs.negative)
print(len(gs.positive))
print(len(gs.negative))


if args.mode != "EXP":
print "Loading functional data"
print("Loading functional data")
functionalData = utils.get_FA_data(args.fun_anno_source, args.taxid, args.fun_anno_file)
print "Dimension of fun anno " + str(functionalData.scores.shape)
print("Dimension of fun anno " + str(functionalData.scores.shape))


print "Start benchmarking"
print("Start benchmarking")

if args.mode == "EXP":
utils.cv_bench_clf(scoreCalc, clf, gs, output_dir, format="pdf", verbose=True, folds = 5)
Expand All @@ -172,7 +172,7 @@ def main():
utils.cv_bench_clf(functionalData, clf, gs, output_dir, format="pdf", verbose=True, folds= 5)

# PPI evaluation
print utils.cv_bench_clf(scoreCalc, clf, gs, args.output_dir, verbose=False, format="pdf", folds=5)
print(utils.cv_bench_clf(scoreCalc, clf, gs, args.output_dir, verbose=False, format="pdf", folds=5))
#print "I am here"

network = utils.make_predictions(scoreCalc, args.mode, clf, gs, fun_anno=functionalData)
Expand All @@ -197,14 +197,14 @@ def main():
pred_clusters = GS.Clusters(False)
pred_clusters.read_file("%s.clust.txt" % (output_dir))
overlapped_complexes_with_reference = gs.get_complexes().get_overlapped_complexes_set(pred_clusters)
print "# of complexes in reference dataset: " + str(len(overlapped_complexes_with_reference))
print("# of complexes in reference dataset: " + str(len(overlapped_complexes_with_reference)))
#clust_scores, header = utils.clustering_evaluation(gs.complexes, pred_clusters, "", False)
clust_scores, header, composite_score = utils.clustering_evaluation(gs.complexes, pred_clusters, "", False)
outFH = open("%s.eval.txt" % (output_dir), "w")
header = header.split("\t")
clust_scores = clust_scores.split("\t")
for i, head in enumerate(header):
print "%s\t%s" % (head, clust_scores[i])
print("%s\t%s" % (head, clust_scores[i]))
print >> outFH, "%s\t%s" % (head, clust_scores[i])
outFH.close()

Expand All @@ -214,4 +214,4 @@ def main():
except KeyboardInterrupt:
pass

#11000100 (MI, Bayes, PCC+N)
#11000100 (MI, Bayes, PCC+N)
Loading