-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathbsbiomart.py
More file actions
70 lines (59 loc) · 1.74 KB
/
bsbiomart.py
File metadata and controls
70 lines (59 loc) · 1.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import bioservices as bs
import sys
def gethumanshortname(outfile):
#Get all human ENSEMBL / short name relationships
s = bs.BioMart(host = 'www.ensembl.org')
s.add_dataset_to_xml('hsapiens_gene_ensembl')
s.add_attribute_to_xml('ensembl_gene_id')
s.add_attribute_to_xml('wikigene_name')
xml = s.get_xml()
res = s.query(xml)
res = res.split('\n')
d = {} #{ENSG : shortname}
for pair in res:
pair = pair.split('\t')
if len(pair) == 2:
#If there is a shortname
ensembl = str(pair[0])
shortname = str(pair[1])
d[ensembl] = shortname
elif len(pair) == 1:
#If there is not a shortname
ensembl = str(pair[0])
d[ensembl] = None
with open(outfile, 'w') as f:
for gene in d:
if d[gene]:
f.write(gene + '\t' + d[gene] + '\n')
def mousetohuman(genelist, outfile):
#Given a list of ENSEMBL mouse gene IDs, output file of human orthologs
#First get all mouse/human ortholog relationships
s = bs.BioMart(host = 'www.ensembl.org')
s.add_dataset_to_xml('mmusculus_gene_ensembl')
s.add_attribute_to_xml('ensembl_gene_id')
s.add_attribute_to_xml('hsapiens_homolog_ensembl_gene')
xml = s.get_xml()
res = s.query(xml)
res = res.split('\n')
d = {} #{ENSMUSG : ENSG}
for pair in res:
pair = pair.split('\t')
if len(pair) == 2:
#If there is a human ortholog
mousegene = str(pair[0])
humangene = str(pair[1])
d[mousegene] = humangene
elif len(pair) == 1:
#If there is not a human homolog
mousegene = str(pair[0])
d[mousegene] = None
with open(outfile, 'w') as f, open(genelist, 'r') as i:
genes = []
for line in i:
line = line.strip()
genes.append(line)
for gene in d:
if d[gene] and gene in genes:
f.write(d[gene] + '\n')
if __name__ == '__main__':
mousetohuman(sys.argv[1], sys.argv[2])