-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathFasta.py
More file actions
93 lines (75 loc) · 3.18 KB
/
Fasta.py
File metadata and controls
93 lines (75 loc) · 3.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import RetrieveOrthogroup as RO
import TreeUtils
class FastaFile:
"""
A FastaFile instance reads a fasta file and stores each sequence seen in the file,
as well as the pubGeneID of each sequence and the species ID of the species it came
from, and the domains in each sequence.
"""
def __init__(self, fasta = [], hmmfile = ""):
"""
Reads input from a fasta file. If unspecified, then an object with empty
names, sequences, and speciesIDs lists is returned.
Args:
fasta (list): A list representation of a fasta file (with headers untouched)
"""
if fasta == []:
self.names, self.sequences, self.speciesIDs = [], [], []
else:
self.names, self.sequences, self.speciesIDs = RO.fastaToSeqs(fasta)
if hmmfile != "":
self.domains, self.domainStarts, self.domainEnds = [], [], []
for i in range(len(self.sequences)):
a,b,c = TreeUtils.findDomains(self.sequences[i], hmmfile)
self.domainStarts.append(a)
self.domainEnds.append(b)
self.domains.append(c)
#Basically another init, but can't override init :(
def initFromGeneID(self, geneID, level, hmmfile):
"""
Given a gene ID and an orthogroup level to work at, pulls the containing orthogroup
from orthoDB and parses it.
Args:
geneID (str ): The gene ID whose orthogroup to search for
level (str ): The NCBI Taxonomy ID level at which to search
hmmfile (str ): path to hmm model of domains in sequence
"""
species = level
oid = RO.findOrthogroupID(geneID, level, species)
fasta = RO.getFasta(oid, level, species)
self.names, self.sequences, self.speciesIDs = RO.fastaToSeqs(fasta)
self.domains, self.domainStarts, self.domainEnds = [], [], []
for i in range(len(self.sequences)):
a,b,c = TreeUtils.findDomains(self.sequences[i], hmmfile)
self.domainStarts.append(a)
self.domainEnds.append(b)
self.domains.append(c)
#Getters (Setting is not allowed)
def length(self):
return len(self.sequences)
def get(self, index):
"""Returns the ith entry from the object"""
return (self.names[index], self.sequences[index], self.speciesIDs[index])
def getName(self, index):
return self.names[index]
def getSequence(self, index):
return self.sequences[index]
def getSpeciesID(self, index):
return self.speciesIDs[index]
def getDomains(self, index):
return self.domains[index]
def getDomainStarts(self, index):
return self.domainStarts[index]
def getDomainEnds(self, index):
return self.domainEnds[index]
def getAllDomains(self):
return self.domains
def getAllSequences(self):
return self.sequences
def getAllSpeciesIDs(self):
return self.speciesIDs
def delete(self, index):
"""Deletes the ith entry from the object"""
self.names.pop(index)
self.sequences.pop(index)
self.speciesIDs.pop(index)