Skip to content

Commit 136960b

Browse files
update restart parser
1 parent 8ddcb24 commit 136960b

File tree

2 files changed

+67
-50
lines changed

2 files changed

+67
-50
lines changed

ionerdss/model/pdb/structure_validation.py

Lines changed: 63 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -411,75 +411,90 @@ def _parse_restart_snapshot(
411411
adjacency: Dict[int, set[int]] = defaultdict(set)
412412
restart_coords: Dict[int, Tuple[float, float, float]] = {}
413413
idx = start_idx + 2
414-
for block_idx in range(molecule_count):
414+
for _ in range(molecule_count):
415415
header = lines[idx].split()
416-
if len(header) < 2:
416+
if len(header) < 5:
417417
raise ValueError(f"Malformed molecule block header in restart file near line {idx + 1}")
418418
mol_id = int(header[0])
419-
block_start = idx
420-
coord_tokens = lines[block_start + 2].split()
419+
idx += 1
420+
421+
metadata_tokens = lines[idx].split()
422+
if len(metadata_tokens) < 6:
423+
raise ValueError(f"Malformed molecule metadata line in restart file near line {idx + 1}")
424+
idx += 1
425+
426+
coord_tokens = lines[idx].split()
421427
if len(coord_tokens) < 3:
422-
raise ValueError(f"Malformed coordinate line in restart file near line {block_start + 3}")
428+
raise ValueError(f"Malformed coordinate line in restart file near line {idx + 1}")
423429
restart_coords[mol_id] = (
424430
float(coord_tokens[0]),
425431
float(coord_tokens[1]),
426432
float(coord_tokens[2]),
427433
)
434+
idx += 1
428435

429-
# Real restart.dat files write floating-point metadata immediately after
430-
# the block header and use a compact two-line-per-interface layout.
431-
# Older synthetic tests in this repo used a simplified integer-only
432-
# metadata line and an older three-line-per-interface layout.
433-
metadata_tokens = lines[block_start + 1].split()
434-
real_restart_layout = any(
435-
any(ch in token.lower() for ch in (".", "e"))
436-
for token in metadata_tokens
437-
)
436+
free_list_line = lines[idx].split()
437+
if not free_list_line:
438+
raise ValueError(f"Malformed free interface list in restart file near line {idx + 1}")
439+
free_list_size = int(free_list_line[0])
440+
idx += 1
438441

439-
if real_restart_layout:
440-
partner_idx = block_start + 4
441-
else:
442-
partner_idx = block_start + 5
442+
bound_list_line = lines[idx].split()
443+
if not bound_list_line:
444+
raise ValueError(f"Malformed bound interface list in restart file near line {idx + 1}")
445+
bound_list_size = int(bound_list_line[0])
446+
idx += 1
443447

444-
partner_line = lines[partner_idx].split()
448+
partner_line = lines[idx].split()
445449
if not partner_line:
446-
raise ValueError(f"Malformed partner list in restart file near line {partner_idx + 1}")
450+
raise ValueError(f"Malformed bound partner list in restart file near line {idx + 1}")
447451
partner_count = int(partner_line[0])
448452
partner_ids = [int(value) for value in partner_line[1:1 + partner_count]]
453+
idx += 1
454+
455+
iface_count_line = lines[idx].split()
456+
if not iface_count_line:
457+
raise ValueError(f"Malformed interface count in restart file near line {idx + 1}")
458+
iface_count = int(iface_count_line[0])
459+
idx += 1
460+
461+
if bound_list_size != partner_count:
462+
raise ValueError(
463+
"Mismatch between bound interface count and bound partner count in "
464+
f"{restart_file} near line {idx}."
465+
)
449466

450467
for partner_id in partner_ids:
451468
if partner_id != mol_id:
452469
adjacency[mol_id].add(partner_id)
453470
adjacency[partner_id].add(mol_id)
454471

455-
if block_idx == molecule_count - 1:
456-
idx = len(lines)
457-
continue
458-
459-
next_mol_id = mol_id + 1
460-
next_idx = None
461-
for candidate_idx in range(block_start + 1, len(lines) - 1):
462-
candidate = lines[candidate_idx].split()
463-
if len(candidate) != 5:
464-
continue
465-
if candidate[0] != str(next_mol_id):
466-
continue
467-
candidate_metadata = lines[candidate_idx + 1].split()
468-
candidate_real_layout = any(
469-
any(ch in token.lower() for ch in (".", "e"))
470-
for token in candidate_metadata
471-
)
472-
if candidate_real_layout != real_restart_layout:
473-
continue
474-
next_idx = candidate_idx
475-
break
476-
477-
if next_idx is None:
478-
raise ValueError(
479-
f"Could not locate the next molecule block after molecule id {mol_id} in {restart_file}"
480-
)
481-
482-
idx = next_idx
472+
for _iface_idx in range(iface_count):
473+
iface_line = lines[idx].split()
474+
if len(iface_line) < 6:
475+
raise ValueError(f"Malformed interface header in restart file near line {idx + 1}")
476+
is_bound = int(iface_line[5])
477+
idx += 1
478+
479+
iface_coord_line = lines[idx].split()
480+
if len(iface_coord_line) < 3:
481+
raise ValueError(f"Malformed interface coordinate line in restart file near line {idx + 1}")
482+
idx += 1
483+
484+
if is_bound:
485+
bound_partner_line = lines[idx].split()
486+
if len(bound_partner_line) < 3:
487+
raise ValueError(f"Malformed bound interface payload in restart file near line {idx + 1}")
488+
idx += 1
489+
490+
for list_name in ("prevlist", "prevmyface", "prevpface", "prevnorm", "ps_prev", "prevsep"):
491+
list_line = lines[idx].split()
492+
if not list_line:
493+
raise ValueError(f"Malformed {list_name} list in restart file near line {idx + 1}")
494+
list_size = int(list_line[0])
495+
if len(list_line) < 1 + list_size:
496+
raise ValueError(f"Truncated {list_name} list in restart file near line {idx + 1}")
497+
idx += 1
483498

484499
return adjacency, restart_coords
485500

ionerdss/tests/test_structure_validation.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -211,13 +211,14 @@ def test_observed_structure_extraction_uses_one_connected_component():
211211
)
212212

213213
def _restart_block(mol_id: int, partners: list[int], coord) -> list[str]:
214+
bound_iface_line = " ".join([str(len(partners))] + [str(i) for i in range(len(partners))])
214215
return [
215216
f"{mol_id} 0 0 0 0",
216217
"1.0 0 0 0 0 0",
217218
f"{coord[0]} {coord[1]} {coord[2]}",
218219
"4 0 1 2 3",
220+
bound_iface_line,
219221
" ".join([str(len(partners))] + [str(pid) for pid in partners]),
220-
"0",
221222
"4",
222223
"0 0 0 0 \0 0",
223224
"0.0 0.0 0.0",
@@ -302,13 +303,14 @@ def test_restart_snapshot_search_falls_back_to_restart_directory():
302303
)
303304

304305
def _restart_block(mol_id: int, partners: list[int], coord) -> list[str]:
306+
bound_iface_line = " ".join([str(len(partners))] + [str(i) for i in range(len(partners))])
305307
return [
306308
f"{mol_id} 0 0 0 0",
307309
"1.0 0 0 0 0 0",
308310
f"{coord[0]} {coord[1]} {coord[2]}",
309311
"4 0 1 2 3",
312+
bound_iface_line,
310313
" ".join([str(len(partners))] + [str(pid) for pid in partners]),
311-
"0",
312314
"4",
313315
"0 0 0 0 \0 0",
314316
"0.0 0.0 0.0",

0 commit comments

Comments
 (0)