-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathget_seqs_from_fasta.py
More file actions
65 lines (49 loc) · 2 KB
/
get_seqs_from_fasta.py
File metadata and controls
65 lines (49 loc) · 2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env python
import argparse
from Bio import SeqIO
import sys
# Pull specified sequences out of a fasta file.
#
# Matt Gitzendanner
# University of Florida
#
# get_seqs_from_fasta.py -i file.fa -o file.phy -s headers_of_sequences_to_get
#
__version__= 1.0 # June 25, 2020
def open_files(in_file, out):
"""Opens the input and output files and returns file handles"""
try:
in_fh=open(in_file, 'r')
except IOError:
print (f"Can't open file for reading: {in_file}")
sys.exit()
try:
out_fh=open(out, 'a')
except IOError:
print (f"Can't open file for writing: {out}")
sys.exit()
return in_fh, out_fh
def pull_seq(seqs_in, in_format, seqs_out, out_format, seq_list, verbose):
"""Pulls specified sequences from a file and write to a new file"""
for record in SeqIO.parse(seqs_in, in_format):
if record.id in seq_list:
if verbose:
print(f"Writing sequence for {record.id}")
SeqIO.write(record, seqs_out, out_format)
def main():
parser = argparse.ArgumentParser(prog='get_seqs_from_fasta.py')
parser.add_argument("-i", "--in_file", help="input file")
parser.add_argument("-f", "--in_format", help="File format of input file.",
default='fasta')
parser.add_argument("-o", "--out_file", help="output file")
parser.add_argument("-x", "--out_format", help="File format of output file.",
default='fasta')
parser.add_argument('-s', '--sequences', nargs='+', default=[],
help="List of sequence IDs to keep")
parser.add_argument('-v', '--verbose',action='store_true', default=False)
parser.add_argument('--version', action='version', version=f'%(prog)s Version: {__version__}:')
args = parser.parse_args()
in_fh, out_fh = open_files(args.in_file, args.out_file)
pull_seq(in_fh, args.in_format, out_fh, args.out_format, args.sequences, args.verbose)
if __name__ == "__main__":
main()