-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathfastqDumpCoords.py
More file actions
executable file
·55 lines (48 loc) · 2.2 KB
/
fastqDumpCoords.py
File metadata and controls
executable file
·55 lines (48 loc) · 2.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/env python
import os
import argparse
import logging
from Bio.SeqIO.QualityIO import FastqGeneralIterator
def getOptions():
""" Function to pull in arguments """
parser = argparse.ArgumentParser(description="Takes a fastq file and creates a table containing time and x y coordinate information.")
parser.add_argument("-i", "--input", dest="input", action='store', required=True, help="Name of FASTQ file [Required]")
parser.add_argument("-o", "--out", dest="out", action='store', required=True, help="Name of the output CSV table [Required]")
parser.add_argument("-g", "--log", dest="log", action='store', required=False, help="Log File")
args = parser.parse_args()
return(args)
def setLogger(fname,loglevel):
""" Function to handle error logging """
logging.basicConfig(filename=fname, level=loglevel, format='%(asctime)s - %(levelname)s - %(message)s')
def headerSplit(header):
if header.count(' '):
# Header was created using CASAVA 1.8+
(mhead, suphead) = header.split(' ')
(machine, runid, flowid, lane, tile, xcoord, ycoord) = mhead.split(':')
read = suphead[0]
else:
# Header was created using older versions of CASAVA
(mhead, suphead) = header.split('#')
(machine, lane, tile, xcoord, ycoord) = mhead.split(':')
if suphead.endswith('1') or suphead.endswith('2'):
read = suphead[-1]
else:
read = 1
return([machine, lane, tile, xcoord, ycoord, read])
def main():
""" MAIN Function to execute everything """
# Turn on Logging if option -g was given
args = getOptions()
if args.log:
setLogger(args.log,logging.INFO)
else:
setLogger(os.devnull,logging.INFO)
with open(args.input, 'r') as FQ:
with open(args.out, 'w') as OUT:
OUT.write(','.join(str(x) for x in ['machine', 'lane', 'tile', 'x-coord', 'y-coord', 'readNum', 'sequence']) + "\n")
for (header, sequence, quality) in FastqGeneralIterator(FQ):
headerInfo = headerSplit(header)
OUT.write(','.join(str(x) for x in headerInfo) + ',' + sequence + "\n")
if __name__=='__main__':
main()
logging.info("Script complete.")