Skip to content

Commit a793918

Browse files
Add more detailed failure statuses
1 parent b3c7fe2 commit a793918

File tree

6 files changed

+233
-9
lines changed

6 files changed

+233
-9
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,3 +208,5 @@ test_parse_8ip3.py
208208
/pdb_benchmark
209209
/7cqv_dir
210210
/tests/troubleshoot/8IP3_validation
211+
/ionerdss/benchmark/trials
212+
/ionerdss/benchmark_output

benchmark/run_validation_benchmark.py

Lines changed: 81 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,71 @@
1313
import logging
1414
import traceback
1515
from pathlib import Path
16+
from typing import Optional
1617

1718
from ionerdss.model.pdb import PDBModelBuilder
1819
from ionerdss.model import pdb
20+
from ionerdss.model.pdb.structure_validation import get_disconnected_design_message
1921

2022
FAST_VALIDATION_ITERATIONS = 100000
2123

2224

25+
def _classify_failed_assembly(sim_result, target_assembly_size: int) -> str:
26+
"""Return UA when the largest observed assembly is smaller than target, otherwise OA."""
27+
if sim_result.largest_observed_assembly_size < target_assembly_size:
28+
return "UA"
29+
return "OA"
30+
31+
32+
def _contains_nerdss_crash_signature(message: Optional[str]) -> bool:
33+
"""Return True when the text clearly indicates a NERDSS runtime crash."""
34+
if not message:
35+
return False
36+
37+
normalized = message.lower()
38+
crash_signatures = (
39+
"segmentation fault",
40+
"segfault",
41+
"core dumped",
42+
"signal 11",
43+
"sigsegv",
44+
"abort trap",
45+
"stack trace",
46+
)
47+
return any(signature in normalized for signature in crash_signatures)
48+
49+
50+
def _detect_nerdss_crash(sim_result) -> bool:
51+
"""Return True when the simulation result includes clear evidence of a NERDSS crash."""
52+
if _contains_nerdss_crash_signature(getattr(sim_result, "warning_message", None)):
53+
return True
54+
55+
simulation_dir = getattr(sim_result, "simulation_dir", None)
56+
if simulation_dir is None:
57+
return False
58+
59+
log_path = Path(simulation_dir) / "output.log"
60+
if not log_path.exists():
61+
return False
62+
63+
try:
64+
return _contains_nerdss_crash_signature(log_path.read_text(encoding="utf-8", errors="replace"))
65+
except OSError:
66+
return False
67+
68+
69+
def _status_for_failed_validation(sim_result, target_assembly_size: int) -> str:
70+
"""Prefer specific failure codes and fall back to the older ambiguous status when needed."""
71+
if _detect_nerdss_crash(sim_result):
72+
return "NC"
73+
74+
largest_observed_assembly_size = getattr(sim_result, "largest_observed_assembly_size", None)
75+
if largest_observed_assembly_size is not None:
76+
return _classify_failed_assembly(sim_result, target_assembly_size)
77+
78+
return "Failed_Assembly"
79+
80+
2381
def _run_validation_attempt(
2482
system,
2583
workspace_manager,
@@ -136,6 +194,20 @@ def main():
136194
chain_types_count = len(rep_instances)
137195

138196
print(f" -> Extracted {chains_count} chains across {chain_types_count} unique types.")
197+
198+
if chains_count < 2:
199+
status = "FP"
200+
print(" -> Too few protein chains remain after coarse-graining; marking as FP.")
201+
continue
202+
203+
disconnected_design_message = get_disconnected_design_message(
204+
system,
205+
prefix="Validation preflight warning",
206+
)
207+
if disconnected_design_message is not None:
208+
status = "DC"
209+
print(f" -> {disconnected_design_message}")
210+
continue
139211

140212
# Create a range of titration rates, scaled for each molecular species
141213
base_rate = 0.0 # 0.25e-3
@@ -187,20 +259,23 @@ def main():
187259
print(f" -> Validation Complete! RMSD: {rmsd:.4f} nm")
188260
else:
189261
print(" -> Simulation ran but did not yield a full matching assembly.")
262+
target_assembly_size = sum(artifacts.target_counts.values())
263+
status = _status_for_failed_validation(sim_result, target_assembly_size)
190264
if sim_result.warning_message:
191265
print(f" Warning: {sim_result.warning_message}")
192266
if "[Errno 2] No such file" in sim_result.warning_message or "invalid literal format" in sim_result.warning_message or "invalid literal for int" in sim_result.warning_message:
193267
status = "Crashed"
194-
else:
195-
status = "Failed_Assembly"
196-
print(" Error: target composition did not appear in the histogram after both validation runs.")
197-
else:
198-
status = "Failed_Assembly"
268+
elif status in {"UA", "OA"}:
269+
print(
270+
" Error: target assembly was not found after both validation runs. "
271+
f"Largest observed assembly size was {sim_result.largest_observed_assembly_size} "
272+
f"vs target size {target_assembly_size}."
273+
)
199274

200275
_dump_nerdss_log(Path(sim_result.simulation_dir), pdb_id)
201276

202277
except Exception as e:
203-
status = "Crashed"
278+
status = "NC" if _contains_nerdss_crash_signature(str(e)) else "Crashed"
204279
print(f" -> Error encountered benchmarking {pdb_id}: {e}")
205280
if "DATA/restart.dat or any RESTART snapshot" in str(e):
206281
print(" Error: target composition appeared in the histogram, but no restart snapshot contained the full assembly.")

ionerdss/model/pdb/main.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from pathlib import Path
1212
import logging
1313
import math
14+
import warnings
1415
import numpy as np
1516

1617
from ionerdss.model.components.system import System
@@ -241,10 +242,11 @@ def notice(self, message, *args, **kwargs):
241242

242243
disconnected_design_message = get_disconnected_design_message(
243244
system,
244-
prefix="Preflight error",
245+
prefix="Preflight warning",
245246
)
246247
if disconnected_design_message is not None:
247-
raise ValueError(disconnected_design_message)
248+
self.workspace_manager.logger.warning(disconnected_design_message)
249+
warnings.warn(disconnected_design_message, RuntimeWarning)
248250

249251
# Calculate default molecule counts if not provided (using stoichiometry)
250252
if molecule_counts is None:

ionerdss/model/pdb/structure_validation.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ class StructureValidationSimulationResult:
8080
full_assembly_found: bool
8181
warning_message: Optional[str]
8282
first_full_assembly_time: Optional[float]
83+
largest_observed_assembly_size: int
8384
observed_coordinates: Optional[Dict[str, Tuple[float, float, float]]]
8485

8586

@@ -1013,6 +1014,75 @@ def _find_observed_com_coordinates_in_restart_snapshots(
10131014
)
10141015

10151016

1017+
def _get_largest_complex_json_size(complex_json_file: Union[str, Path]) -> int:
1018+
"""Return the largest complex size present in a COMPLEXES JSON snapshot."""
1019+
payload = json.loads(Path(complex_json_file).read_text(encoding="utf-8"))
1020+
if not isinstance(payload, list):
1021+
raise ValueError(f"Malformed COMPLEXES JSON snapshot: {complex_json_file}")
1022+
1023+
largest_size = 0
1024+
for complex_record in payload:
1025+
if not isinstance(complex_record, dict):
1026+
continue
1027+
names = complex_record.get("names")
1028+
coords = complex_record.get("coords")
1029+
if not isinstance(names, list) or not isinstance(coords, list) or len(names) != len(coords):
1030+
continue
1031+
largest_size = max(largest_size, len(names))
1032+
1033+
return largest_size
1034+
1035+
1036+
def _get_largest_complex_json_size_in_snapshots(complexes_dir: Union[str, Path]) -> int:
1037+
"""Return the largest complex size seen across COMPLEXES JSON snapshots."""
1038+
largest_size = 0
1039+
for candidate_json in _iter_complex_json_candidates(complexes_dir):
1040+
try:
1041+
largest_size = max(largest_size, _get_largest_complex_json_size(candidate_json))
1042+
except Exception:
1043+
continue
1044+
return largest_size
1045+
1046+
1047+
def _get_largest_restart_component_size(restart_file: Union[str, Path]) -> int:
1048+
"""Return the largest connected component size in a restart snapshot."""
1049+
adjacency, restart_coords, _restart_mol_names = _parse_restart_snapshot(restart_file)
1050+
for mol_id in restart_coords:
1051+
adjacency.setdefault(mol_id, set())
1052+
1053+
largest_size = 0
1054+
visited: set[int] = set()
1055+
for mol_id in sorted(restart_coords):
1056+
if mol_id in visited:
1057+
continue
1058+
1059+
stack = [mol_id]
1060+
visited.add(mol_id)
1061+
component_size = 0
1062+
while stack:
1063+
current = stack.pop()
1064+
component_size += 1
1065+
for neighbor in sorted(adjacency.get(current, ())):
1066+
if neighbor in restart_coords and neighbor not in visited:
1067+
visited.add(neighbor)
1068+
stack.append(neighbor)
1069+
1070+
largest_size = max(largest_size, component_size)
1071+
1072+
return largest_size
1073+
1074+
1075+
def _get_largest_restart_component_size_in_snapshots(primary_restart_file: Union[str, Path]) -> int:
1076+
"""Return the largest connected component size seen across DATA and RESTART snapshots."""
1077+
largest_size = 0
1078+
for candidate_restart in _iter_restart_snapshot_candidates(primary_restart_file):
1079+
try:
1080+
largest_size = max(largest_size, _get_largest_restart_component_size(candidate_restart))
1081+
except Exception:
1082+
continue
1083+
return largest_size
1084+
1085+
10161086
def run_structure_validation_simulation(
10171087
artifacts: StructureValidationArtifacts,
10181088
nerdss_dir: Union[str, Path],
@@ -1065,8 +1135,10 @@ def run_structure_validation_simulation(
10651135
warning_message = None
10661136
observed_coordinates = None
10671137
selected_restart_file = restart_file
1138+
largest_observed_assembly_size = 0
10681139
complex_json_candidates = _iter_complex_json_candidates(complexes_dir)
10691140
if complex_json_candidates:
1141+
largest_observed_assembly_size = _get_largest_complex_json_size_in_snapshots(complexes_dir)
10701142
try:
10711143
observed_coordinates, selected_restart_file = _find_observed_com_coordinates_in_complex_json_snapshots(
10721144
complexes_dir=complexes_dir,
@@ -1086,6 +1158,7 @@ def run_structure_validation_simulation(
10861158
)
10871159
warnings.warn(fallback_warning, RuntimeWarning)
10881160
warning_message = fallback_warning
1161+
largest_observed_assembly_size = _get_largest_restart_component_size_in_snapshots(restart_file)
10891162
try:
10901163
observed_coordinates, selected_restart_file = _find_observed_com_coordinates_in_restart_snapshots(
10911164
system_psf_file=system_psf_file,
@@ -1122,5 +1195,6 @@ def run_structure_validation_simulation(
11221195
full_assembly_found=full_assembly_found,
11231196
warning_message=warning_message,
11241197
first_full_assembly_time=first_full_assembly_time,
1198+
largest_observed_assembly_size=largest_observed_assembly_size,
11251199
observed_coordinates=observed_coordinates,
11261200
)

ionerdss/tests/test_structure_validation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -452,7 +452,7 @@ def get_system(self):
452452

453453
builder = PDBModelBuilder("test_input.pdb")
454454

455-
with pytest.raises(ValueError, match="Preflight error: the designed assembly graph is disconnected"):
455+
with pytest.warns(RuntimeWarning, match="Preflight warning: the designed assembly graph is disconnected"):
456456
builder.build_system(
457457
workspace_path=str(tmp_path / "workspace"),
458458
generate_visualizations=False,
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
from importlib.util import module_from_spec, spec_from_file_location
2+
from pathlib import Path
3+
from types import SimpleNamespace
4+
5+
6+
BENCHMARK_SCRIPT = Path(__file__).resolve().parents[2] / "benchmark" / "run_validation_benchmark.py"
7+
_SPEC = spec_from_file_location("run_validation_benchmark", BENCHMARK_SCRIPT)
8+
assert _SPEC is not None and _SPEC.loader is not None
9+
benchmark = module_from_spec(_SPEC)
10+
_SPEC.loader.exec_module(benchmark)
11+
12+
13+
def test_status_for_failed_validation_returns_ua_when_largest_assembly_is_too_small():
14+
sim_result = SimpleNamespace(
15+
largest_observed_assembly_size=3,
16+
warning_message="Validation warning: no full assembly matching target.",
17+
simulation_dir=Path("/tmp/does_not_exist"),
18+
)
19+
20+
assert benchmark._status_for_failed_validation(sim_result, target_assembly_size=4) == "UA"
21+
22+
23+
def test_status_for_failed_validation_returns_oa_when_largest_assembly_meets_or_exceeds_target():
24+
sim_result = SimpleNamespace(
25+
largest_observed_assembly_size=4,
26+
warning_message="Validation warning: no full assembly matching target.",
27+
simulation_dir=Path("/tmp/does_not_exist"),
28+
)
29+
30+
assert benchmark._status_for_failed_validation(sim_result, target_assembly_size=4) == "OA"
31+
32+
33+
def test_status_for_failed_validation_returns_nc_when_warning_contains_crash_signature():
34+
sim_result = SimpleNamespace(
35+
largest_observed_assembly_size=2,
36+
warning_message="NERDSS terminated with Segmentation fault (core dumped).",
37+
simulation_dir=Path("/tmp/does_not_exist"),
38+
)
39+
40+
assert benchmark._status_for_failed_validation(sim_result, target_assembly_size=4) == "NC"
41+
42+
43+
def test_status_for_failed_validation_returns_nc_when_output_log_contains_crash_signature(tmp_path):
44+
simulation_dir = tmp_path / "validation_output" / "1"
45+
simulation_dir.mkdir(parents=True)
46+
(simulation_dir / "output.log").write_text(
47+
"fatal error: abort trap\nSegmentation fault\n",
48+
encoding="utf-8",
49+
)
50+
sim_result = SimpleNamespace(
51+
largest_observed_assembly_size=2,
52+
warning_message="Validation warning: simulation did not produce a full assembly.",
53+
simulation_dir=simulation_dir,
54+
)
55+
56+
assert benchmark._status_for_failed_validation(sim_result, target_assembly_size=4) == "NC"
57+
58+
59+
def test_status_for_failed_validation_falls_back_to_old_failed_assembly_label_without_size_metadata():
60+
sim_result = SimpleNamespace(
61+
warning_message="Validation warning: simulation did not produce a full assembly.",
62+
simulation_dir=Path("/tmp/does_not_exist"),
63+
)
64+
65+
assert benchmark._status_for_failed_validation(sim_result, target_assembly_size=4) == "Failed_Assembly"
66+
67+
68+
def test_contains_nerdss_crash_signature_matches_common_runtime_crash_messages():
69+
assert benchmark._contains_nerdss_crash_signature("Segmentation fault (core dumped)")
70+
assert benchmark._contains_nerdss_crash_signature("Received signal 11 while running NERDSS")
71+
assert not benchmark._contains_nerdss_crash_signature("Validation warning: no full assembly was found")

0 commit comments

Comments
 (0)