BS_test/BS_test.py at master · willemveerman/BS_test · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# -*- coding: utf-8 -*-
"""
Created on Sun Oct 12 15:14:50 2014

@author: willem
"""

from bioservices import *

class Features():
    def __init__(self,id):
        self.id = id

        list_of_genes = []

        f = open(id,"r")

        for line in f:
            list_of_genes.append(line.strip())

        f.close()

        self.list=list_of_genes

    def __str__(self):
        return "Takes a list of protein names and outputs a file containing each proteins' GO terms."

    def items(self):
        """ Return the file's constituents as a list"""

        return self.list

    def conv_to_up(self,organism):
        """Convert KEGG gene names to UniProt IDs"""

        uniprot_IDs_list = []

        kegg_class = KEGGParser()

        for i in self.list:
            gene_kegg_entry = kegg_class.get(organism+":"+i)
            try:
                parsed_entry = kegg_class.parse(gene_kegg_entry)
            except AttributeError:
                pass
            uniprot_IDs_list.append(str(parsed_entry['dblinks']['UniProt']).split())

        return uniprot_IDs_list

    def file_up(self,filename,organism):
        """Creates a text file containing each gene with its corresponding UniProt IDs."""

        f=open(filename,"w")

        gene_list= self.list

        up_list = self.conv_to_up(organism)

        for i, gene in enumerate(gene_list):
            f.write(gene+" "+up_list[i]+"\n")

        f.close()

    def file_up_first(self,filename,organism):

        f=open(filename,"w")

        gene_list= self.list

        up_list = self.conv_to_up(organism)

        for i, gene in enumerate(gene_list):
            f.write(gene+" "+up_list[i][0]+"\n")

        f.close()

    def go_list(self,organism):

        up_list = self.conv_to_up(organism)

        up_list_first = []

        for i in up_list:
            up_list_first.append(i[0])

        go_obj = QuickGO()

        go_list = []

        for i in up_list_first:
            go_list.append(go_obj.Annotation(protein=i, frmt="tsv", tax=9606, source="UniProt", col="proteinName,goID,goName,with"))

        return go_list

    def go_list_file(self,organism,filename):

        go_list_local = self.go_list(organism)

        f=open(filename,"w")

        for i, gene in enumerate(go_list_local):
            f.write(self.items()[i]+" - "+gene)

        f.close()

    def parser(self):
        """ Parses a file containing a list of genes, separated by newlines."""
        list_of_genes = []

        f = open(self,"r")

        for line in f:
            list_of_genes.append(line.strip())

        return list_of_genes

#    def uniprot_id(self,organism):
#        """ UniProt IDs for a list of KEGG Gene IDs.
#        """
#        kegg_class = KEGGParser()
#        gene_kegg_entry = kegg_class.get(organism:self)
#        parsed_entry = kegg_class.parse(gene_kegg_entry)
#        uniprot_id = str(parsed_entry['dblinks']['UniProt'])
#
#
#    def go_attributes(self):
#        """Uses BioServices to query QuickGO Gene Ontology database.
#
#            Returns a Pandas.DataFrame, displaying only columns 4 & 5.
#
#        """
#        #Initialize BioServices QuickGO object
#        bioservices_quickgo_obj = QuickGO()
#        #Search QuickGO for protein UniProt ID
#        res = bioservices_quickgo_obj.Annotation_from_protein(protein=str(Features.part_attrib(self,'uniprot_id')))
#
#        #Use Pandas.DataFrame method object iloc to select specific columns
#        print res.iloc[:,[4,5]]