-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdummyData.py3
More file actions
60 lines (48 loc) · 1.85 KB
/
dummyData.py3
File metadata and controls
60 lines (48 loc) · 1.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#USAGE: python3 dummyData.py3
#--n [Number of sequences in file] --m [Rate of mutation (a decimal number times the length)]
#--l [length of each sequence] --f [Name of output file]
import argparse, os, random, textwrap
#Get arguments or initialize default arguments (300 sequences of 600 bases with up to 100 base substitutions each, fasta file)
parser=argparse.ArgumentParser()
parser.add_argument('--n', type=int)
parser.add_argument('--l', type=int)
parser.add_argument('--m', type=float)
parser.add_argument('--f', type=str)
parser.add_argument('--y', type=int)
args = parser.parse_args()
if (args.n==None): seqNum = 720
else: seqNum = args.n
if (args.l==None): seqLen = 600
else: seqLen = args.l
if (args.m==None): mutRate = 0.01*seqLen
else: mutRate = seqLen*args.m
if (args.f==None): fileName = "dummyData.fas"
else: fileName = args.f
if (args.y==None): years = 18
else: fileName = args.f
outfile = open(fileName, 'a+')
bases = ["G", "T", "A", "C"] #Altering base list can alter GC content
#Create a base sequence from which all other sequences will be derived
baseSequence = list()
for i in range(0,(seqLen-1)):
baseSequence.append(bases[random.randint(0,(len(bases)-1))])
#Initialize Text Output
outtext = ""
sequence = baseSequence
year = 2019 - years
idNum = 11000
#Create sequence entry's for a defined number of id's
while year != 2019:
for x in range(idNum,(idNum + seqNum//years)):
outtext += (">" + str(idNum) + "_" + str(year) + "\n")
#Induce mutation (alter range to alter number of random base point mutations
for x in range(random.randint(0,mutRate)):
index = random.randint(0,(len(sequence)-1))
sequence[index] = bases[random.randint(0,(len(bases)-1))]
sequenceText = textwrap.fill(''.join(sequence),60)
outtext += (sequenceText + "\n\n")
idNum=idNum+1
year = year+1
#write text output and
outfile.write(outtext)
outfile.close()