-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathBS_test.py
More file actions
138 lines (92 loc) · 3.8 KB
/
BS_test.py
File metadata and controls
138 lines (92 loc) · 3.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# -*- coding: utf-8 -*-
"""
Created on Sun Oct 12 15:14:50 2014
@author: willem
"""
from bioservices import *
class Features():
def __init__(self,id):
self.id = id
list_of_genes = []
f = open(id,"r")
for line in f:
list_of_genes.append(line.strip())
f.close()
self.list=list_of_genes
def __str__(self):
return "Takes a list of protein names and outputs a file containing each proteins' GO terms."
def items(self):
""" Return the file's constituents as a list"""
return self.list
def conv_to_up(self,organism):
"""Convert KEGG gene names to UniProt IDs"""
uniprot_IDs_list = []
kegg_class = KEGGParser()
for i in self.list:
gene_kegg_entry = kegg_class.get(organism+":"+i)
try:
parsed_entry = kegg_class.parse(gene_kegg_entry)
except AttributeError:
pass
uniprot_IDs_list.append(str(parsed_entry['dblinks']['UniProt']).split())
return uniprot_IDs_list
def file_up(self,filename,organism):
"""Creates a text file containing each gene with its corresponding UniProt IDs."""
f=open(filename,"w")
gene_list= self.list
up_list = self.conv_to_up(organism)
for i, gene in enumerate(gene_list):
f.write(gene+" "+up_list[i]+"\n")
f.close()
def file_up_first(self,filename,organism):
f=open(filename,"w")
gene_list= self.list
up_list = self.conv_to_up(organism)
for i, gene in enumerate(gene_list):
f.write(gene+" "+up_list[i][0]+"\n")
f.close()
def go_list(self,organism):
up_list = self.conv_to_up(organism)
up_list_first = []
for i in up_list:
up_list_first.append(i[0])
go_obj = QuickGO()
go_list = []
for i in up_list_first:
go_list.append(go_obj.Annotation(protein=i, frmt="tsv", tax=9606, source="UniProt", col="proteinName,goID,goName,with"))
return go_list
def go_list_file(self,organism,filename):
go_list_local = self.go_list(organism)
f=open(filename,"w")
for i, gene in enumerate(go_list_local):
f.write(self.items()[i]+" - "+gene)
f.close()
def parser(self):
""" Parses a file containing a list of genes, separated by newlines."""
list_of_genes = []
f = open(self,"r")
for line in f:
list_of_genes.append(line.strip())
return list_of_genes
# def uniprot_id(self,organism):
# """ UniProt IDs for a list of KEGG Gene IDs.
# """
# kegg_class = KEGGParser()
# gene_kegg_entry = kegg_class.get(organism:self)
# parsed_entry = kegg_class.parse(gene_kegg_entry)
# uniprot_id = str(parsed_entry['dblinks']['UniProt'])
#
#
# def go_attributes(self):
# """Uses BioServices to query QuickGO Gene Ontology database.
#
# Returns a Pandas.DataFrame, displaying only columns 4 & 5.
#
# """
# #Initialize BioServices QuickGO object
# bioservices_quickgo_obj = QuickGO()
# #Search QuickGO for protein UniProt ID
# res = bioservices_quickgo_obj.Annotation_from_protein(protein=str(Features.part_attrib(self,'uniprot_id')))
#
# #Use Pandas.DataFrame method object iloc to select specific columns
# print res.iloc[:,[4,5]]