-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathcgmlst.smk
More file actions
78 lines (56 loc) · 1.27 KB
/
cgmlst.smk
File metadata and controls
78 lines (56 loc) · 1.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
from pathlib import Path
import os.path
import os
for env_var in ('OPENBLAS_NUM_THREADS',
'OMP_NUM_THREADS',
'MKL_NUM_THREADS',
'NUMEXPR_NUM_THREADS'):
os.environ[env_var] = '1'
NAMES = [Path(fasta).stem for fasta in Path('genomes/').glob('*.fasta')]
ruleorder: update > create_table
rule cgmlst:
input:
'pristine.csv'
rule call:
input:
'genomes/{name}.fasta'
output:
'jsons/{name}.json'
threads:
1
conda:
'envs/fsac.yaml'
shell:
'fsac call -a {config[alleles]} -i {input} -o {output}'
rule update:
input:
expand('jsons/{name}.json', name=NAMES)
output:
touch('.updated')
conda:
'envs/fsac.yaml'
shell:
'fsac update -a {config[alleles]} -j jsons/ -g genomes/'
rule create_table:
input:
expand('jsons/{name}.json', name=NAMES),
'.updated'
output:
'calls.csv'
conda:
'envs/fsac.yaml'
shell:
'fsac tabulate -j jsons/ -o {output} -d ,'
rule create_pristine:
input:
rules.create_table.output
output:
'pristine.csv'
run:
import pandas as pd
calls = pd.read_csv(input[0], sep=',', index_col=0, header=0)
pristine = calls.loc[[not any(v < 1) for i, v in calls.iterrows()]]
pristine.to_csv(output[0], sep=',', header=True, index=True)
rule clean:
shell:
'rm {TEMPDIR}/*'