BaderLab · heckendorfc · Mar 1, 2022
diff --git a/src/CalculateCoElutionScores.py b/src/CalculateCoElutionScores.py
@@ -141,7 +141,7 @@ def loadElutionData(self, elutionProfileF, frac_count = 2, max_count_cutoff=1):
 				pro_list.append(protID)
 			else:
 				removed += 1
-		print "finished processing %s\n removed %i (%.2f, total: %i, after filtering: %i) proteins found in less than %i fraction" % (elutionProfileF, removed, removed/(removed + len(prot2Index)), removed + len(prot2Index), len(prot2Index), frac_count)
+		print("finished processing %s\n removed %i (%.2f, total: %i, after filtering: %i) proteins found in less than %i fraction" % (elutionProfileF, removed, removed/(removed + len(prot2Index)), removed + len(prot2Index), len(prot2Index), frac_count))
 		elutionProfileFH.close()
 		elutionMat = np.nan_to_num(np.matrix(elutionMat))
 		return elutionMat, prot2Index
@@ -609,7 +609,7 @@ def load_genemania(self):
 			for fp in self.files:                        #for de-bugging, I only used the first three files
 				filename = str(fp.split('/')[-1])
 				if filename.startswith(f_evidence):
-					print "Processing: %s" % (filename)
+					print("Processing: %s" % (filename))
 					fh = urllib2.urlopen(fp)
 					fh.readline()
 					for line in fh:
@@ -1082,8 +1082,8 @@ def cv_eval(self, data, targets, folds= 5):
 		this_targets = []
 		i = 1
 		for train, test in skf.split(data, targets):
-			#print "Processing fold %i" % i
-			print "Processing data..."
+			#print("Processing fold %i" % i)
+			print("Processing data...")
 			i += 1
 			self.fit(data[train], targets[train])
 			probs.extend(self.predict_proba(data[test]))
@@ -1108,7 +1108,7 @@ class MLP_wrapper(object):
 
 
 	def __init__(self):
-		print "Using MLP with Keras/tensorflow"
+		print("Using MLP with Keras/tensorflow")
 		self.model = Sequential()
 
 	def fit(self, data, labels):
@@ -1133,12 +1133,12 @@ def predict(self, toPred):
 class SAE_wrapper(MLP_wrapper):
 
 	def __init__(self):
-		print "Using stacked autoencoder"
+		print("Using stacked autoencoder")
 
 
 	def fit(self, data, labels):
-		print data.shape
-		print len(labels)
+		print(data.shape)
+		print(len(labels))
 		num_features = data.shape[1]
 		input = Input(shape=(num_features,))
 
@@ -1250,7 +1250,7 @@ def get_current_string_ver(self):
 			return response.getroot()[0][0].text
 		except urllib2.HTTPError as err:
 			error_message = err.read()
-			print error_message
+			print(error_message)
 			sys.exit()
 
 

diff --git a/src/GoldStandard.py b/src/GoldStandard.py
@@ -50,19 +50,19 @@ def make_reference_data(self, db_clusters, orthmap="", found_prots=""):
 				self.complexes.addComplex("%i;%s;%s" % (i, db_clust.name, compl), tmp_clust.complexes[compl])
 				i += 1
 
-		print "Total number of complexes %i in %s" % (total_complexes, self.name)
-		print "Number of complexes after ortholog mapping %i complexes in %s" % (len(self.complexes.complexes), self.name)
+		print("Total number of complexes %i in %s" % (total_complexes, self.name))
+		print("Number of complexes after ortholog mapping %i complexes in %s" % (len(self.complexes.complexes), self.name))
 
 		if found_prots != "":
 			self.complexes.remove_proteins(found_prots)
-			print "After removing not indetified proteins %i number of complexes in % s" % (len(self.complexes.complexes), self.name)
+			print("After removing not indetified proteins %i number of complexes in % s" % (len(self.complexes.complexes), self.name))
 
 		self.complexes.filter_complexes()
-		print "After size filtering %i number of complexes in % s" % (len(self.complexes.complexes), self.name)
+		print("After size filtering %i number of complexes in % s" % (len(self.complexes.complexes), self.name))
 
 		self.complexes.merge_complexes()
 		self.complexes.filter_complexes()
-		print "After mergning %i number of complexes in % s" % (len(self.complexes.complexes), self.name)
+		print("After mergning %i number of complexes in % s" % (len(self.complexes.complexes), self.name))
 
 		self.make_pos_neg_ppis()
 
@@ -227,8 +227,8 @@ def n_fols_split(self, num_folds, number_pruning, overlap="False"):
 
 			round += 1
 
-		print "length of complex set one: " + str(len(itemindex_one))
-		print "length of complex set two: " + str(len(itemindex_zero))
+		print("length of complex set one: " + str(len(itemindex_one)))
+		print("length of complex set two: " + str(len(itemindex_zero)))
 
 		# randomize clusters
 		#rnd.shuffle(ref_cluster_ids)
@@ -272,16 +272,16 @@ def n_fols_split(self, num_folds, number_pruning, overlap="False"):
 			len_over_positive = len(train[0] & evaluate[0])
 			len_over_negative = len(train[1] & evaluate[1])
 
-			print len_train_positive
-			print len_eva_positive
-			print len_train_negative
-			print len_eva_negative
+			print(len_train_positive)
+			print(len_eva_positive)
+			print(len_train_negative)
+			print(len_eva_negative)
 
 
-			print "number of train and evaluation PPIs:"
-			print len_train_positive + len_train_negative
-			print "number of overlapped PPIs:"
-			print len_over_positive + len_over_negative
+			print("number of train and evaluation PPIs:")
+			print(len_train_positive + len_train_negative)
+			print("number of overlapped PPIs:")
+			print(len_over_positive + len_over_negative)
 
 			out_folds.append((training, evaluation))
 
@@ -346,12 +346,12 @@ def n_fols_split(self, num_folds, number_pruning, overlap="False"):
 	# 		len_over_negative = len(train[1] & evaluate[1])
     #
     #
-	# 		print len_train_positive
-	# 		print len_eva_positive
-	# 		print len_over_positive
-	# 		print len_train_negative
-	# 		print len_eva_negative
-	# 		print len_over_negative
+	# 		print(len_train_positive)
+	# 		print(len_eva_positive)
+	# 		print(len_over_positive)
+	# 		print(len_train_negative)
+	# 		print(len_eva_negative)
+	# 		print(len_over_negative)
 	# 		sys.exit()
     #
 	# 		out_folds.append((training, evaluation))
@@ -476,9 +476,9 @@ def split_into_n_fold2(self, n_fold, val_ppis, no_overlapp=False):  # what is va
 
 			training_evaluation_dictionary["turpleKey"].append((training, evaluation))
 
-			print "the number of training negatives and positives for corss validation "
-			print len(training.get_negative())
-			print len(training.get_positive())
+			print("the number of training negatives and positives for corss validation ")
+			print(len(training.get_negative()))
+			print(len(training.get_positive()))
 
 		return training_evaluation_dictionary
 
@@ -747,7 +747,7 @@ def read_file(self, clusterF):
 			all_proteins_count += len(prots)
 		clusterFH.close()
 
-		print "Average size of predicted complexes is: " + str((all_proteins_count)/i)
+		print("Average size of predicted complexes is: " + str((all_proteins_count)/i))
 
 	def write_cuslter_file(self, outF):
 		outFH = open(outF, "w")
@@ -1044,9 +1044,9 @@ def get_complexes(self):
 	# 	go_to_prot_map = {}
 	# 	prot_to_go_map = {}
 	# 	quickgoURL = "http://www.ebi.ac.uk/QuickGO-Old/GAnnotation?goid=GO:0043234&tax=%s&format=tsv&limit=1000000000&evidence=IDA,IPI,EXP," % (self.taxid)
-	# 	print quickgoURL
-	# 	print self.taxid
-	# 	print "the url is: ..."
+	# 	print(quickgoURL)
+	# 	print(self.taxid)
+	# 	print("the url is: ...")
 	# 	quickgoURL_FH = urllib2.urlopen(quickgoURL)
 	# 	quickgoURL_FH.readline()
 	# 	for line in quickgoURL_FH:
@@ -1094,7 +1094,7 @@ def __init__(self, taxid, inparanoid_cutoff=1, foundProts = set([])):
 				xmldoc = self.getXML()
 				self.orthmap, self.orthgroups = self.parseXML(xmldoc)
 			else:
-				print "Taxid:%s not supported" % taxid
+				print("Taxid:%s not supported" % taxid)
 
 	def mapProtein(self, prot):
 		if prot not in self.orthmap: return None
@@ -1127,7 +1127,7 @@ def mapComplexes(self, clusters):
 				if prot in self.orthmap:
 					mapped_members.add(self.orthmap[prot])
 #				else:
-#					print "No map for %s" % prot
+#					print("No map for %s" % prot)
 
 			if len(mapped_members)==0:
 				todel.add(clust)
@@ -1246,4 +1246,4 @@ def getids(ids_raw):
 				else:
 					outmap[protA] = protB
 				outgroups.append(orthgroup)
-		self.orthmap, self.orthgroups =  outmap, outgroups
+		self.orthmap, self.orthgroups =  outmap, outgroups
diff --git a/src/main.py b/src/main.py
@@ -82,50 +82,50 @@ def main():
 	args.fun_anno_source = args.fun_anno_source.upper()
 
 	#Create feature combination
- 	if args.feature_selection == "00000000":
-		print "Select at least one feature"
+	if args.feature_selection == "00000000":
+		print("Select at least one feature")
 		sys.exit()
 
 	this_scores = utils.get_fs_comb(args.feature_selection)
-	print "\t".join([fs.name for fs in this_scores])
+	print("\t".join([fs.name for fs in this_scores]))
 
 	# Initialize CLF
- 	use_rf = args.classifier == "RF"
+	use_rf = args.classifier == "RF"
 	clf = CS.CLF_Wrapper(args.num_cores, use_rf)
 
 	# Load elution data
- 	foundprots, elution_datas = utils.load_data(args.input_dir, this_scores, fc=args.frac_count, mfc=args.elution_max_count)
+	foundprots, elution_datas = utils.load_data(args.input_dir, this_scores, fc=args.frac_count, mfc=args.elution_max_count)
 
 	# Generate reference data set
 	gs = ""
 	if ((args.taxid != "" and  args.ppi != "") or (args.cluster != "" and  args.ppi != "" )):
-		print "Refernce from cluster and PPI are nor compatiple. Please supply ppi or complex reference, not both!"
+		print("Refernce from cluster and PPI are nor compatiple. Please supply ppi or complex reference, not both!")
 		sys.exit()
 
 	if args.taxid == "" and  args.ppi == "" and args.cluster == "":
-		print "Please supply a reference by setting taxid, cluster, or ppi tag"
+		print("Please supply a reference by setting taxid, cluster, or ppi tag")
 		sys.exit()
 
 	gs_clusters = []
 	if (args.taxid != "" and args.cluster == "" and args.ppi == ""):
-		print "Loading clusters from GO, CORUM, and Intact"
+		print("Loading clusters from GO, CORUM, and Intact")
 		gs_clusters.extend(utils.get_reference_from_net(args.taxid))
 
 	if args.cluster != "":
-		print "Loading complexes from file"
+		print("Loading complexes from file")
 		if args.mode == "FA":
 			gs_clusters.append(GS.FileClusters(args.cluster, "all"))
 		else:
 			gs_clusters.append(GS.FileClusters(args.cluster, foundprots))
 
 	if args.ppi != "":
-		print "Reading PPI file from %s" % args.reference
+		print("Reading PPI file from %s" % args.reference)
 		gs = Goldstandard_from_PPI_File(args.ppi, foundprots)
 
 
 
-	print gs_clusters
-	if 	len(gs_clusters)>0:
+	print(gs_clusters)
+	if	len(gs_clusters)>0:
 		gs = utils.create_goldstandard(gs_clusters, args.taxid, foundprots)
 
 	output_dir = args.output_dir + os.sep + args.output_prefix
@@ -139,26 +139,26 @@ def main():
 	if args.precalcualted_score_file == "NONE":
 		scoreCalc.calculate_coelutionDatas(gs)
 	else:
- 		scoreCalc.readTable(args.precalcualted_score_file, gs)
+		scoreCalc.readTable(args.precalcualted_score_file, gs)
 
-	print scoreCalc.scores.shape
+	print(scoreCalc.scores.shape)
 
 	functionalData = ""
 	gs.positive = set(gs.positive & set(scoreCalc.ppiToIndex.keys()))
 	gs.negative = set(gs.negative & set(scoreCalc.ppiToIndex.keys()))
 	gs.rebalance()
 
-	print len(gs.positive)
-	print len(gs.negative)
+	print(len(gs.positive))
+	print(len(gs.negative))
 
 
 	if args.mode != "EXP":
-		print "Loading functional data"
+		print("Loading functional data")
 		functionalData = utils.get_FA_data(args.fun_anno_source, args.taxid, args.fun_anno_file)
-		print "Dimension of fun anno " + str(functionalData.scores.shape)
+		print("Dimension of fun anno " + str(functionalData.scores.shape))
 
 
-	print "Start benchmarking"
+	print("Start benchmarking")
 
 	if args.mode == "EXP":
 		utils.cv_bench_clf(scoreCalc, clf, gs, output_dir, format="pdf", verbose=True, folds = 5)
@@ -172,7 +172,7 @@ def main():
 		utils.cv_bench_clf(functionalData, clf, gs, output_dir, format="pdf", verbose=True, folds= 5)
 
 	# PPI evaluation
-	print utils.cv_bench_clf(scoreCalc, clf, gs, args.output_dir, verbose=False, format="pdf", folds=5)
+	print(utils.cv_bench_clf(scoreCalc, clf, gs, args.output_dir, verbose=False, format="pdf", folds=5))
 	#print "I am here"
 
 	network = utils.make_predictions(scoreCalc, args.mode, clf, gs, fun_anno=functionalData)
@@ -197,14 +197,14 @@ def main():
 	pred_clusters = GS.Clusters(False)
 	pred_clusters.read_file("%s.clust.txt" % (output_dir))
 	overlapped_complexes_with_reference = gs.get_complexes().get_overlapped_complexes_set(pred_clusters)
-	print "# of complexes in reference dataset: " + str(len(overlapped_complexes_with_reference))
+	print("# of complexes in reference dataset: " + str(len(overlapped_complexes_with_reference)))
 	#clust_scores, header = utils.clustering_evaluation(gs.complexes, pred_clusters, "", False)
 	clust_scores, header, composite_score = utils.clustering_evaluation(gs.complexes, pred_clusters, "", False)
 	outFH = open("%s.eval.txt" % (output_dir), "w")
 	header = header.split("\t")
 	clust_scores = clust_scores.split("\t")
 	for i, head in enumerate(header):
-		print "%s\t%s" % (head, clust_scores[i])
+		print("%s\t%s" % (head, clust_scores[i]))
 		print >> outFH, "%s\t%s" % (head, clust_scores[i])
 	outFH.close()
 
@@ -214,4 +214,4 @@ def main():
 	except KeyboardInterrupt:
 		pass
 
-#11000100 (MI, Bayes, PCC+N)
+#11000100 (MI, Bayes, PCC+N)