forked from bmmoore43/MotifDiscovery
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathget_same_kmers.py
More file actions
103 lines (90 loc) · 2.64 KB
/
get_same_kmers.py
File metadata and controls
103 lines (90 loc) · 2.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# get overlap of 2 kmer lists
import os, sys
dir1= sys.argv[1]
#kmers= open(sys.argv[2],"r")
out= open("all_clusters_cre_rank.txt","w")
# def get_kmers(kmers, lista):
# for line in kmers:
# L=line.strip().split('\t')
# k=L[0]
# lista.append(k)
# return lista
def add_to_dict(inp, D, curr_list, count):
header= inp.readline()
for line in inp:
L=line.strip().split('\t')
k2=L[0]
rank=L[1]
if count != 0:
if k2 not in curr_list:
newlist=[]
curr_list.append(k2)
for i in range(count):
newlist.append("NA")
if k2 not in D:
D[k2]=newlist+[str(rank)]
else:
D[k2].append(str(rank))
else:
D[k2].append(str(rank))
else:
if k2 not in D:
D[k2]=[str(rank)]
return D, curr_list
def add_to_dict2(inp, D, curr_list, count):
header= inp.readline()
for line in inp:
L=line.strip().split('\t')
k2=L[0]
rank=L[6]
if count != 0:
if k2 not in curr_list:
newlist=[]
curr_list.append(k2)
for i in range(count):
newlist.append("NA")
if k2 not in D:
D[k2]=newlist+[str(rank)]
else:
D[k2].append(str(rank))
else:
D[k2].append(str(rank))
else:
if k2 not in D:
D[k2]=[str(rank)]
return D, curr_list
kmer_list=[]
#final_list= get_kmers(kmers, kmer_list)
#kmers.close()
kdict={}
title_list=[]
current_k_list=[]
count=0
for file in os.listdir(dir1):
if file.endswith("_imp_avgrank_RF.txt") or file.endswith("_imp"):
name = file.strip().split("_imp")[0]
title_list.append(name)
inp = open(dir1 + "/" + file)
kdict, current_k_list= add_to_dict(inp, kdict, current_k_list, count)
inp.close()
count= count+1
if file.endswith("_imp_scaled.txt"):
name = file.strip().split("_imp")[0]
title_list.append(name)
inp = open(dir1 + "/" + file)
kdict, current_k_list= add_to_dict2(inp, kdict, current_k_list, count)
inp.close()
count= count+1
print(kdict)
titlestr= "\t".join(title_list)
out.write("kmer\t%s\n" % (titlestr))
for key in kdict:
data = kdict[key]
if(len(set(data))==1):
pass
else:
out.write("%s\t" % (key))
for d in data:
out.write("%s\t" % (d))
out.write("\n")
out.close()