-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSnakefile
More file actions
109 lines (100 loc) · 4.03 KB
/
Snakefile
File metadata and controls
109 lines (100 loc) · 4.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# Snakefile - MutationScan Master Workflow
configfile: "config/config.yaml"
import os
# ---------------------------------------------------------------------------
# CONFIGURATION
# ---------------------------------------------------------------------------
# The entry point is now strictly a local directory of provided genomes.
GENOMES_DIR = config.get("local_genomes", "data/local_genomes")
TARGETS_FILE = config.get("targets_file", "config/acr_targets.txt")
DEFAULT_PDB = config.get("default_pdb", "data/5o66.pdb")
JOB_NAME = config.get("job_name", "default_run")
SKIP_EXTRACTION = str(config.get("skip_extraction", False)).lower() in ("1", "true", "yes", "y")
LEGACY_RESULTS_DIR = config.get("legacy_results_dir", "data/results")
# THIS IS THE CRITICAL LINE:
OUT_DIR = f"data/output/{JOB_NAME}"
# When skip_extraction=true, reuse existing proteins/refs (legacy defaults).
PROTEINS_INPUT_DIR = config.get(
"proteins_dir",
f"{LEGACY_RESULTS_DIR}/proteins" if SKIP_EXTRACTION else f"{OUT_DIR}/proteins"
)
REFS_INPUT_DIR = config.get(
"refs_dir",
f"{LEGACY_RESULTS_DIR}/refs" if SKIP_EXTRACTION else f"{OUT_DIR}/refs"
)
# ---------------------------------------------------------------------------
# MASTER RULE
# ---------------------------------------------------------------------------
rule all:
input:
f"{OUT_DIR}/1_genomics_report.csv",
f"{OUT_DIR}/2_epistasis_networks.csv",
f"{OUT_DIR}/ControlScan_Networks",
f"{OUT_DIR}/3_biophysics_docking.csv"
# ---------------------------------------------------------------------------
# PHASE 1A: PROTEIN EXTRACTION
# ---------------------------------------------------------------------------
rule extract_proteins:
input:
genomes_dir=GENOMES_DIR,
targets_file=TARGETS_FILE
output:
proteins_dir=directory(f"{OUT_DIR}/proteins"),
refs_dir=directory(f"{OUT_DIR}/refs"),
marker=f"{OUT_DIR}/proteins/.proteins_extracted"
params:
uniprot_taxid=config.get("uniprot_taxid", ""),
out_dir=OUT_DIR,
skip_extraction=config.get("skip_extraction", False)
script:
"src/scripts/02a_extract_proteins.py"
# ---------------------------------------------------------------------------
# PHASE 1B: VARIANT CALLING
# ---------------------------------------------------------------------------
rule call_variants:
input:
proteins_dir=PROTEINS_INPUT_DIR,
refs_dir=REFS_INPUT_DIR
output:
report=f"{OUT_DIR}/1_genomics_report.csv",
marker=f"{OUT_DIR}/.variants_called"
params:
out_dir=OUT_DIR
script:
"src/scripts/02b_call_variants.py"
# ---------------------------------------------------------------------------
# PHASE 2: BIOCHEMICAL EPISTASIS
# ---------------------------------------------------------------------------
rule biochemical_epistasis:
input:
report=f"{OUT_DIR}/1_genomics_report.csv"
output:
networks=f"{OUT_DIR}/2_epistasis_networks.csv",
plots_dir=directory(f"{OUT_DIR}/ControlScan_Networks")
params:
out_dir=OUT_DIR
script:
"src/scripts/03_biochemical_epistasis.py"
# ---------------------------------------------------------------------------
# PHASE 3: OPENMM DYNAMICS & HTVS DOCKING
# ---------------------------------------------------------------------------
rule htvs_biophysics:
input:
networks=f"{OUT_DIR}/2_epistasis_networks.csv",
proteins_dir=PROTEINS_INPUT_DIR,
pdb_file=DEFAULT_PDB
output:
docking_report=f"{OUT_DIR}/3_biophysics_docking.csv",
mutated_pdbs=directory(f"{OUT_DIR}/Mutated_Structures"),
readme=f"{OUT_DIR}/README_Biophysics.txt"
params:
pdb=DEFAULT_PDB,
chain_map=config.get("chain_map", ""),
ligand=config.get("ligand", ""),
center_x=config.get("center_x", 0.0),
center_y=config.get("center_y", 0.0),
center_z=config.get("center_z", 0.0),
stiffness=config.get("md_stiffness", 500.0),
out_dir=OUT_DIR
script:
"src/scripts/04_htvs_biophysics.py"