-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathnw.py
More file actions
117 lines (97 loc) · 2.5 KB
/
nw.py
File metadata and controls
117 lines (97 loc) · 2.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
def zeros(m,n):
return [[0]*n for _ in xrange(m)]
def match_score(k,l):
fname='%enter file path here%'
handle=open(fname)
matrix=[]
col,row={},{}
idx,idr=0,0
first_line=0
for line in handle:
if not line.startswith('#'):
if first_line==0:
first_line=1
for i in line.split():
col[i]=idx
idx+=1
else:
a=line.split()
x=a.pop(0)
row[x]=idr
matrix.append(a)
idr+=1
return int(matrix[col[k]][row[l]])
def nw (seq1,seq2,pen):
m=len(seq1)
n=len(seq2)
table=zeros(m+1,n+1)
for i in range(0,m+1):
table[i][0]=pen*i
for j in range(0,n+1):
table[0][j]=pen*j
for i in range(1,m+1):
for j in range(1,n+1):
match=table[i-1][j-1]+match_score(seq1[i-1],seq2[j-1])
delete=table[i-1][j]+pen
insert=table[i][j-1]+pen
table[i][j]=max(match,insert,delete)
align1,align2='',''
i,j=m,n
while i>0 and j>0:
current=table[i][j]
diaganol=table[i-1][j-1]
up=table[i][j-1]
left=table[i-1][j]
if current==diaganol+match_score(seq1[i-1],seq2[j-1]):
align1+=seq1[i-1]
align2+=seq2[j-1]
i,j=i-1,j-1
elif current==left+pen:
align1+=seq1[i-1]
align2+='-'
i-=1
elif current==up+pen:
align1+='-'
align2+=seq2[j-1]
j-=1
while i>0:
align1+=seq1[i-1]
align2+='-'
i-=1
while j>0:
align1+='-'
align2+=seq2[j-1]
j-=1
seqalign(align1,align2)
def seqalign (align1,align2):
align1=align1[::-1]
align2=align2[::-1]
i,j=0,0
seqscore=0
identity=0
symbol=''
for i in range(0,len(align1)):
if align1[i]==align2[i]:
identity+=1
seqscore+=match_score(align1[i],align2[i])
symbol+= '|'
elif align1[i]=='-' or align2[i]=='-':
seqscore+=pen
symbol+=' '
elif align1[i]!=align2[i] and align1[i]!='-' and align2[i]!='-':
seqscore+=match_score(align1[i],align2[i])
symbol+=' '
identity= float(identity)/len(align1) *100
print 'Sequence similarity:', seqscore
print 'Identity Score', identity,'%'
print align1
print symbol
print align2
seq4='ADCNSRQCLCRPM'
seq5='ASCSNRCKCRDP'
seq1='GLSDGEWQLVLNVWGKVEADVAGHGQEVLIRLFKGHPETLEKFDKFKHLKSEDEMKASEDLKKHGNTVLTALGGILKKKGHHEAELTPLAQSHATKHKIPVKYLEFISEAIIQVLQSKHPGDFGADAQGAMSKALELFRNDMAAKYKELG'
seq2='GLSDGEWQQVLNVWGKVEADIAGHGQEVLIRLFTGHPETLEKFDKFKHLKTEAEMKASEDLKKHGTVVLTALGGILKKKGHHEAELKPLAQSHATKHKIPIKYLEFISDAIIHVLHSKHPGDFGADAQGAMTKALELFRNDIAAKYKELG'
seq3='GLSDGEWQLVLNVWGKVEADIPGHGQEVLIRLFKGHPETLEKFDRFKHLKSEDEMKASEDLKKHGATVLTALGGILKKKGHHEAEIKPLAQSHATKHKIPVKYLEFISEAIIQVLQSKHPGDFGADAQGAMNKALELFRKDMASNYKELG'
pen=-8
nw(seq1,seq2,pen)
nw(seq1,seq3,pen)