-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathRFPfilterreadlength.py
More file actions
35 lines (28 loc) · 1.21 KB
/
RFPfilterreadlength.py
File metadata and controls
35 lines (28 loc) · 1.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from Bio.SeqIO.QualityIO import FastqGeneralIterator
import sys
import os
import gzip
import argparse
def fastqtrimmer(allowedlengths, infile):
counter = 0
outfilename = infile.split('.')[0] + '.' + infile.split('.')[1] + '.lengthfiltered.fastq.gz'
with gzip.open(infile, 'rb') as infh, gzip.open(outfilename, 'wb') as outfh:
try:
for title, seq, qual in FastqGeneralIterator(infh):
counter +=1
if counter % 1000000 == 0:
print 'On read {0} of {1}.'.format(counter, infile)
if len(seq) in allowedlengths:
outfh.write("@%s\n%s\n+\n%s\n" % (title, seq, qual))
except ValueError:
print 'Title and second title line don\'t match for read {0}.'.format(title)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--fastqfiles', type = str, help = 'Comma separated list of fastq files to trim.')
parser.add_argument('--allowedlengths', type = str, help = 'Comma separated list of allowed read lengths.')
args = parser.parse_args()
allowedlengths = args.allowedlengths.split(',')
allowedlengths = [int(allowedlength) for allowedlength in allowedlengths]
fastqfiles = args.fastqfiles.split(',')
for fastqfile in fastqfiles:
fastqtrimmer(allowedlengths, fastqfile)