-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_validation_controls.py
More file actions
495 lines (404 loc) · 19 KB
/
run_validation_controls.py
File metadata and controls
495 lines (404 loc) · 19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
#!/usr/bin/env python
"""
AutoScan Validation Controls - Phase 2 Quality Assurance
Implements two critical validation controls to ensure docking engine reliability:
CONTROL 1: Redocking Accuracy Test (Positive Control)
- Objective: Prove Vina can map a drug to its known binding site
- Test: Extract drug from 1HVR (HIV Protease), randomize coords, redock
- Pass Criteria: RMSD < 2.0 Angstrom between crystal and redocked pose
- Purpose: Validates that the docking algorithm works correctly
CONTROL 2: Specificity Lineup Test (Negative Control)
- Objective: Prove the engine discriminates between active and inactive molecules
- Test: Dock Ciprofloxacin with 50 chemical decoys into 2XCT (Gyrase)
- Pass Criteria: Ciprofloxacin ranks in Top 3 (top ~5%)
- Purpose: Validates that high scores indicate real binding, not random noise
This script orchestrates both tests and generates a comprehensive validation report.
"""
import os
import sys
import subprocess
import logging
from pathlib import Path
from datetime import datetime
from typing import Dict, Tuple, Optional
import json
# ============================================================================
# Configuration
# ============================================================================
WORKSPACE_ROOT = Path(__file__).parent
TESTS_DIR = WORKSPACE_ROOT / "tests"
BENCHMARK_DIR = WORKSPACE_ROOT / "benchmark"
VALIDATION_OUTPUT_DIR = WORKSPACE_ROOT / "workspace" / "validation_controls" / datetime.now().strftime("%Y%m%d_%H%M%S")
VALIDATION_REPORT = VALIDATION_OUTPUT_DIR / "VALIDATION_REPORT.txt"
VALIDATION_JSON = VALIDATION_OUTPUT_DIR / "validation_results.json"
VALIDATION_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
# ============================================================================
# Logging Setup
# ============================================================================
logging.basicConfig(
level=logging.INFO,
format="[%(asctime)s] %(name)s - %(levelname)s - %(message)s",
handlers=[
logging.FileHandler(VALIDATION_OUTPUT_DIR / "validation_controls.log"),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
# ============================================================================
# Control 1: Redocking Accuracy Test (Positive Control)
# ============================================================================
def run_control_1_redocking_test() -> Dict[str, any]:
"""
Control 1: Redocking Accuracy Test
Proves that Vina can accurately map a drug back to its physical binding site.
Pass Criteria: RMSD < 2.0 Å between crystal and redocked pose
"""
logger.info("=" * 80)
logger.info("CONTROL 1: REDOCKING ACCURACY TEST (Positive Control)")
logger.info("=" * 80)
logger.info("Objective: Prove Vina accurately finds known binding site without hallucinating")
logger.info("Test Molecule: XK2 (bound inhibitor in 1HVR HIV Protease)")
logger.info("")
results = {
"control": "1_redocking_accuracy",
"status": "pending",
"timestamp": datetime.now().isoformat(),
"pass_criteria": "RMSD < 2.0 Angstrom",
"details": {}
}
# Check if benchmark PDB exists
pdb_1hvr = BENCHMARK_DIR / "1HVR.pdb"
if not pdb_1hvr.exists():
logger.error(f"PDB file not found: {pdb_1hvr}")
results["status"] = "failed"
results["error"] = f"PDB file missing: {pdb_1hvr}"
return results
logger.info(f"[OK] Found PDB: {pdb_1hvr}")
# Run benchmark_suite.py
logger.info("")
logger.info("Running benchmark_suite.py for 1HVR redocking test...")
logger.info("This will:")
logger.info(" 1. Extract XK2 ligand from 1HVR crystal structure")
logger.info(" 2. Randomize its 3D coordinates (±2.0 Angstrom translation + rotation)")
logger.info(" 3. Run Vina blind docking to recover the crystal pose")
logger.info(" 4. Calculate RMSD between crystal and redocked poses")
logger.info("")
try:
cmd = [
sys.executable,
str(TESTS_DIR / "benchmark_suite.py"),
"--mode", "local",
"--target", "1HVR" # Run only 1HVR test
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
if result.returncode == 0:
logger.info("benchmark_suite.py completed successfully")
results["status"] = "completed"
results["details"]["benchmark_output"] = "See workspace/benchmark_suite/ for detailed results"
else:
logger.error(f"benchmark_suite.py failed: {result.stderr[:500]}")
results["status"] = "failed"
results["error"] = result.stderr[:500]
return results
except Exception as e:
logger.error(f"Error running benchmark_suite.py: {e}")
results["status"] = "error"
results["error"] = str(e)
return results
# Parse benchmark results
try:
# Look for the most recent benchmark report
benchmark_results_dir = WORKSPACE_ROOT / "workspace" / "benchmark_suite"
if benchmark_results_dir.exists():
# Get the most recent timestamped directory
latest_benchmark = max(
(d for d in benchmark_results_dir.iterdir() if d.is_dir()),
key=lambda p: p.stat().st_mtime,
default=None
)
if latest_benchmark:
csv_file = latest_benchmark / "benchmark_results.csv"
if csv_file.exists():
import csv
with open(csv_file, 'r') as f:
reader = csv.DictReader(f)
for row in reader:
if row.get('PDB_ID') == '1HVR':
# Extract RMSD_equiv as proxy for actual RMSD
rmsd_equiv = float(row.get('RMSD_Equiv', 999.0))
results["details"]["rmsd_equiv"] = rmsd_equiv
results["details"]["crystal_score"] = row.get('Crystal_Score')
results["details"]["random_score"] = row.get('Random_Score')
# Pass/Fail decision
if rmsd_equiv < 2.0:
results["pass"] = True
results["conclusion"] = f"PASS - RMSD {rmsd_equiv:.3f} A < 2.0 A threshold"
logger.info(f"[PASS] RMSD {rmsd_equiv:.3f} Angstrom (threshold: 2.0 Angstrom)")
else:
results["pass"] = False
results["conclusion"] = f"FAIL - RMSD {rmsd_equiv:.3f} A >= 2.0 A threshold"
logger.warning(f"[FAIL] RMSD {rmsd_equiv:.3f} Angstrom (threshold: 2.0 Angstrom)")
break
except Exception as e:
logger.warning(f"Could not parse benchmark results: {e}")
results["details"]["parse_error"] = str(e)
logger.info("")
return results
# ============================================================================
# Control 2: Specificity Lineup Test (Negative Control)
# ============================================================================
def run_control_2_specificity_test() -> Dict[str, any]:
"""
Control 2: Specificity Lineup Test (Chemical Enrichment)
Proves that the docking engine discriminates between active and inactive molecules.
Does NOT just ascribe high binding scores to everything.
Pass Criteria: Ciprofloxacin ranks in Top 3 (top ~5% of 51 molecules)
"""
logger.info("")
logger.info("=" * 80)
logger.info("CONTROL 2: SPECIFICITY LINEUP TEST (Negative Control)")
logger.info("=" * 80)
logger.info("Objective: Prove engine discriminates actives from inactive decoys")
logger.info("Active Molecule: Ciprofloxacin (known S. aureus Gyrase inhibitor)")
logger.info("Decoys: 50 chemically similar but structurally distinct molecules")
logger.info("Test System: 2XCT (S. aureus Gyrase - well-characterized target)")
logger.info("")
results = {
"control": "2_specificity_enrichment",
"status": "pending",
"timestamp": datetime.now().isoformat(),
"pass_criteria": "Ciprofloxacin ranks <= 3 (top ~5%)",
"details": {}
}
logger.info("Running chemical_benchmark_enrichment.py...")
logger.info("This will:")
logger.info(" 1. Prepare Ciprofloxacin for docking into 2XCT Gyrase")
logger.info(" 2. Generate 50 structurally diverse decoy molecules")
logger.info(" 3. Dock all 51 molecules (1 active + 50 decoys)")
logger.info(" 4. Rank by binding affinity (lower = stronger binding)")
logger.info(" 5. Check if Ciprofloxacin appears in Top 3")
logger.info("")
try:
cmd = [
sys.executable,
str(TESTS_DIR / "chemical_benchmark_enrichment.py")
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=1800) # 30 min timeout
if result.returncode == 0:
logger.info("chemical_benchmark_enrichment.py completed successfully")
results["status"] = "completed"
results["details"]["enrichment_output"] = "See workspace/chemical_enrichment/ for detailed results"
else:
logger.error(f"chemical_benchmark_enrichment.py failed: {result.stderr[:500]}")
results["status"] = "failed"
results["error"] = result.stderr[:500]
return results
except Exception as e:
logger.error(f"Error running chemical_benchmark_enrichment.py: {e}")
results["status"] = "error"
results["error"] = str(e)
return results
# Parse enrichment results
try:
# Look for the most recent enrichment report
enrichment_results_dir = WORKSPACE_ROOT / "workspace" / "chemical_enrichment"
if enrichment_results_dir.exists():
# Get the most recent timestamped directory
latest_enrichment = max(
(d for d in enrichment_results_dir.iterdir() if d.is_dir()),
key=lambda p: p.stat().st_mtime,
default=None
)
if latest_enrichment:
csv_file = latest_enrichment / "enrichment_results.csv"
if csv_file.exists():
import pandas as pd
df = pd.read_csv(csv_file)
# Find Ciprofloxacin
cipro_rows = df[df['SMILES'] == 'C1CC1N2C=C(C(=O)C3=CC(=C(C=C32)N4CCNCC4)F)C(=O)O']
if not cipro_rows.empty:
cipro_row = cipro_rows.iloc[0]
cipro_rank = int(cipro_row.get('Rank', 999))
cipro_score = float(cipro_row.get('Score', 0.0))
results["details"]["ciprofloxacin_rank"] = cipro_rank
results["details"]["ciprofloxacin_score"] = cipro_score
results["details"]["total_molecules"] = len(df)
# Pass/Fail decision
if cipro_rank <= 3:
results["pass"] = True
results["conclusion"] = f"PASS - Ciprofloxacin ranks #{cipro_rank} (top 5%)"
logger.info(f"[PASS] Ciprofloxacin ranked #{cipro_rank} with score {cipro_score:.3f} kcal/mol")
else:
results["pass"] = False
results["conclusion"] = f"FAIL - Ciprofloxacin ranks #{cipro_rank} (not in top 3)"
logger.warning(f"[FAIL] Ciprofloxacin ranked #{cipro_rank}, expected <= 3")
else:
logger.warning("Could not find Ciprofloxacin in results")
results["details"]["error"] = "Ciprofloxacin not found in results"
except Exception as e:
logger.warning(f"Could not parse enrichment results: {e}")
results["details"]["parse_error"] = str(e)
logger.info("")
return results
# ============================================================================
# Report Generation
# ============================================================================
def generate_validation_report(control1_results: Dict, control2_results: Dict) -> None:
"""Generate comprehensive validation report."""
# Determine overall status
control1_pass = control1_results.get("pass", False)
control2_pass = control2_results.get("pass", False)
all_pass = control1_pass and control2_pass
report = f"""
{'=' * 80}
AutoScan Engine Validation Controls - Phase 2 Quality Assurance
{'=' * 80}
Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
EXECUTIVE SUMMARY
{'-' * 80}
Overall Status: {'ALL PASS' if all_pass else 'FAILURES DETECTED'}
Control 1 (Redocking Accuracy): {'PASS' if control1_pass else 'FAIL'}
Control 2 (Specificity Enrichment): {'PASS' if control2_pass else 'FAIL'}
{'=' * 80}
CONTROL 1: REDOCKING ACCURACY TEST (Positive Control)
{'=' * 80}
Objective:
Prove that AutoScan/Vina can accurately map a drug to its known crystal binding
site without hallucinating or failing to find true interactions.
Test System:
PDB: 1HVR (HIV Protease)
Ligand: XK2 (known bound inhibitor)
Methodology:
1. Extract bound ligand from crystal structure
2. Randomize its 3D coordinates (±2.0 Angstrom translation + random rotation)
3. Run blind docking to recover crystal pose
4. Calculate RMSD between crystal and redocked positions
Pass Criteria:
RMSD < 2.0 Angstroms (standard for successful docking)
Results:
Status: {control1_results.get('status', 'unknown')}
RMSD Equiv: {control1_results.get('details', {}).get('rmsd_equiv', 'N/A')} Angstrom
Crystal Score: {control1_results.get('details', {}).get('crystal_score', 'N/A')} kcal/mol
Random Score: {control1_results.get('details', {}).get('random_score', 'N/A')} kcal/mol
Conclusion:
{control1_results.get('conclusion', 'No results available')}
Interpretation:
{'[OK] The docking engine successfully recovers known binding modes with high accuracy.' if control1_pass else '[FAIL] The engine failed to recover the known binding mode. This suggests issues with either scoring function or search algorithm.'}
{'=' * 80}
CONTROL 2: SPECIFICITY LINEUP TEST (Negative Control)
{'=' * 80}
Objective:
Prove that AutoScan does NOT indiscriminately assign high binding scores. The
engine must distinguish between a known active and many inactive decoys.
Test System:
PDB: 2XCT (S. aureus Gyrase)
Active: Ciprofloxacin (known gyrase inhibitor, well-characterized)
Decoys: 50 diverse drug-like molecules (NSAIDs, phenols, anilines, etc.)
Methodology:
1. Dock Ciprofloxacin into Gyrase
2. Generate/dock 50 structurally distinct but drug-like decoys
3. Rank all 51 molecules by binding affinity
4. Check if Ciprofloxacin ranks in top 3 (top ~5%)
Pass Criteria:
Ciprofloxacin requires rank <= 3 of 51 molecules
Meaning: Ciprofloxacin must be in top 5%, NOT randomly scattered
Results:
Status: {control2_results.get('status', 'unknown')}
Ciprofloxacin Rank: {control2_results.get('details', {}).get('ciprofloxacin_rank', 'N/A')}
Ciprofloxacin Score: {control2_results.get('details', {}).get('ciprofloxacin_score', 'N/A')} kcal/mol
Total Molecules Docked: {control2_results.get('details', {}).get('total_molecules', 'N/A')}
Conclusion:
{control2_results.get('conclusion', 'No results available')}
Interpretation:
{'[OK] The engine robustly discriminates true binders from random decoys.' if control2_pass else '[FAIL] The engine failed to rank the known active above decoys. This suggests non-selective scoring.'}
{'=' * 80}
OVERALL RECOMMENDATION
{'=' * 80}
"""
if all_pass:
report += f"""
STATUS: APPROVED FOR PRODUCTION
Both controls PASSED. The AutoScan engine:
[OK] Accurately finds known binding modes (Control 1)
[OK] Discriminates actives from decoys (Control 2)
Next Steps:
1. Complete full benchmark suite (10+ proteins)
2. Validate consensus scoring module
3. Perform epistatic mutation validation
4. Deploy to production pipeline
"""
else:
report += f"""
STATUS: REQUIRES INVESTIGATION
One or more controls FAILED:
"""
if not control1_pass:
report += f"""
[FAIL] Control 1 (Redocking): {control1_results.get('conclusion', 'N/A')}
-> Check: (1) Scoring function parameters, (2) Search exhaustiveness, (3) Box sizing
"""
if not control2_pass:
report += f"""
[FAIL] Control 2 (Specificity): {control2_results.get('conclusion', 'N/A')}
-> Check: (1) Ligand preparation (protonation state), (2) Coordinate generation, (3) PDBQT conversion
"""
report += """
Next Steps:
1. Review error logs in workspace/validation_controls/
2. Adjust parameters and re-run controls
3. Do not proceed to production until BOTH controls pass
"""
report += f"""
{'=' * 80}
Log Files and Detailed Results:
Report: {VALIDATION_REPORT}
Results JSON: {VALIDATION_JSON}
Detailed logs: {VALIDATION_OUTPUT_DIR/('validation_controls.log')}
Benchmark Suite Results: {WORKSPACE_ROOT / 'workspace' / 'benchmark_suite'}
Chemical Enrichment Results: {WORKSPACE_ROOT / 'workspace' / 'chemical_enrichment'}
{'=' * 80}
"""
# Write report to file
with open(VALIDATION_REPORT, 'w') as f:
f.write(report)
# Write JSON results
validation_json = {
"timestamp": datetime.now().isoformat(),
"overall_pass": all_pass,
"control_1": control1_results,
"control_2": control2_results,
}
with open(VALIDATION_JSON, 'w') as f:
json.dump(validation_json, f, indent=2)
# Print report to console
print(report)
logger.info(f"Validation report saved to: {VALIDATION_REPORT}")
logger.info(f"Results JSON saved to: {VALIDATION_JSON}")
# ============================================================================
# Main
# ============================================================================
def main():
logger.info("")
logger.info("=" * 80)
logger.info("AutoScan Validation Controls - Initialization")
logger.info("=" * 80)
logger.info("")
logger.info(f"Workspace: {WORKSPACE_ROOT}")
logger.info(f"Output Directory: {VALIDATION_OUTPUT_DIR}")
logger.info("")
# Run Control 1
control1_results = run_control_1_redocking_test()
# Run Control 2
control2_results = run_control_2_specificity_test()
# Generate Report
generate_validation_report(control1_results, control2_results)
# Exit code based on results
if control1_results.get("pass") and control2_results.get("pass"):
logger.info("\n[SUCCESS] ALL VALIDATION CONTROLS PASSED")
return 0
else:
logger.error(f"\n[FAILURE] VALIDATION CONTROLS FAILED - See report for details")
return 1
if __name__ == "__main__":
exit_code = main()
sys.exit(exit_code)