-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathgetTransmembraneRegions.py
More file actions
executable file
·81 lines (73 loc) · 2.8 KB
/
getTransmembraneRegions.py
File metadata and controls
executable file
·81 lines (73 loc) · 2.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env python2
# Script created by Bobby Kim and Nick Schafer on 1/8/13
# This script assumes that you have a file called "pdbtmall"
# in the current directory. This file is an XML version of the
# PDBTM database, which can be found here:
# http://pdbtm.enzim.hu/?_=/download/files
# The script takes two arguments:
# python getTransmembraneRegions.py pdbidlist
# It then searches for transmembrane regions that correspond to
# that pdbids in the pdbidlist and writes the information out to a
# file called "pdbidCHAINID.tm". The format of that file is a
# single column of 1s and 0s, each row corresponding to one
# residue, where 1 means that it is transmembrane and 0 means
# that it is not. WARNING: In its original form, the script
# searches for transmembrane HELICES only, but this can be
# changed by searching for other "regiontype"s. For information
# about other region types, see:
# http://pdbtm.enzim.hu/?_=/docs/manual
import sys
import xml.etree.ElementTree as ET
import os.path
def remove_namespace(doc, namespace):
"""Remove namespace in the passed document in place."""
ns = u'{%s}' % namespace
nsl = len(ns)
for elem in doc.getiterator():
if elem.tag.startswith(ns):
elem.tag = elem.tag[nsl:]
def cyto_peri_switcher():
global cyto_peri_switch
if cyto_peri_switch == 1:
cyto_peri_switch = 3
elif cyto_peri_switch == 3:
cyto_peri_switch = 1
cyto_peri_switch = 3
# import data from file
pdbfile = open("pdbtmall","r")
pdbtm = ET.parse(pdbfile)
remove_namespace(pdbtm, 'http://pdbtm.enzim.hu')
idlistfile=sys.argv[1]
idlist=open(idlistfile, "r")
for line in iter(idlist):
# find transmembrane regions and print them out
print line
pdbid=line[0:4]
print pdbid
chainid=line[5]
print chainid
outputfilename='./%s%s.tm' % (pdbid, chainid)
if os.path.isfile(outputfilename):
print "%s already exists, skipping" % outputfilename
continue
outputfile=open(outputfilename,"w")
prev_regiontype = ''
transmembraneregions=pdbtm.findall("./pdbtm[@ID='%s']/CHAIN[@CHAINID='%s']/REGION" % (pdbid, chainid))
print pdbid, chainid
for region in transmembraneregions:
start = int(region.get('seq_beg'))
end = int(region.get('seq_end'))
regiontype = region.get('type')
print region.get('seq_beg'), region.get('seq_end'), region.get('type')
for i in range(start,end+1):
if regiontype == 'H':
outputfile.write('2\n')
else:
if prev_regiontype == 'H':
cyto_peri_switcher()
if cyto_peri_switch == 1:
outputfile.write('1\n')
elif cyto_peri_switch == 3:
outputfile.write('3\n')
prev_regiontype = regiontype
outputfile.close()