Skip to content

Commit c02f79d

Browse files
Completely disable PSF file based parsing
1 parent f845f71 commit c02f79d

File tree

2 files changed

+53
-63
lines changed

2 files changed

+53
-63
lines changed

ionerdss/model/pdb/structure_validation.py

Lines changed: 7 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -648,36 +648,14 @@ def _extract_observed_com_coordinates(
648648
restart_file: Union[str, Path],
649649
target_counts: Mapping[str, int],
650650
) -> Dict[str, Tuple[float, float, float]]:
651-
"""Extract COM coordinates from one connected final-frame assembly."""
652-
com_records = _parse_psf_com_records(system_psf_file)
653-
xyz_coords = None
654-
if final_coords_file is not None and Path(final_coords_file).exists():
655-
xyz_coords = _parse_xyz_coordinates(final_coords_file)
656-
adjacency, restart_coords, restart_mol_names = _parse_restart_snapshot(restart_file)
651+
"""Extract COM coordinates from one connected final-frame assembly using restart-native typing only."""
652+
del system_psf_file, final_coords_file
657653

658-
mol_id_to_coord: Dict[int, Tuple[float, float, float]] = {}
659-
psf_mol_id_to_name: Dict[int, str] = {}
660-
for atom_index, mol_id, mol_name in com_records:
661-
psf_mol_id_to_name[mol_id] = mol_name
662-
if mol_id in restart_coords:
663-
mol_id_to_coord[mol_id] = restart_coords[mol_id]
664-
elif xyz_coords is not None:
665-
mol_id_to_coord[mol_id] = tuple(xyz_coords[atom_index].tolist())
666-
else:
667-
raise ValueError(f"Missing coordinates for molecule id {mol_id} in restart snapshot {restart_file}")
654+
adjacency, restart_coords, restart_mol_names = _parse_restart_snapshot(restart_file)
655+
for mol_id in restart_coords:
668656
adjacency.setdefault(mol_id, set())
669657

670-
name_maps_to_try: list[Dict[int, str]] = [psf_mol_id_to_name]
671-
if restart_mol_names and restart_mol_names != psf_mol_id_to_name:
672-
name_maps_to_try.append(restart_mol_names)
673-
674-
matching_components: list[list[int]] = []
675-
selected_mol_id_to_name = psf_mol_id_to_name
676-
for candidate_name_map in name_maps_to_try:
677-
matching_components = _find_matching_components(adjacency, candidate_name_map, target_counts)
678-
if matching_components:
679-
selected_mol_id_to_name = candidate_name_map
680-
break
658+
matching_components = _find_matching_components(adjacency, restart_mol_names, target_counts)
681659

682660
if not matching_components:
683661
raise ValueError(
@@ -696,7 +674,7 @@ def _extract_observed_com_coordinates(
696674
observed = {}
697675
type_counts: Dict[str, int] = {}
698676
for mol_id in selected_component:
699-
mol_name = selected_mol_id_to_name[mol_id]
677+
mol_name = restart_mol_names[mol_id]
700678
copy_idx = type_counts.get(mol_name, 0)
701679
type_counts[mol_name] = copy_idx + 1
702680

@@ -705,7 +683,7 @@ def _extract_observed_com_coordinates(
705683
else:
706684
key = mol_name
707685

708-
observed[key] = mol_id_to_coord[mol_id]
686+
observed[key] = restart_coords[mol_id]
709687

710688
missing = sorted(set(target_counts) - set(type_counts))
711689
if missing:

ionerdss/tests/test_structure_validation.py

Lines changed: 46 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -293,24 +293,24 @@ def test_observed_structure_extraction_uses_one_connected_component():
293293
encoding="utf-8",
294294
)
295295

296-
def _restart_block(mol_id: int, partners: list[int], coord) -> list[str]:
296+
def _restart_block(mol_id: int, iface_count: int, partners: list[int], coord) -> list[str]:
297297
bound_iface_line = " ".join([str(len(partners))] + [str(i) for i in range(len(partners))])
298298
return [
299299
f"{mol_id} 0 0 0 0",
300300
"1.0 0 0 0 0 0",
301301
f"{coord[0]} {coord[1]} {coord[2]}",
302-
"4 0 1 2 3",
302+
" ".join([str(iface_count)] + [str(i) for i in range(iface_count)]),
303303
bound_iface_line,
304304
" ".join([str(len(partners))] + [str(pid) for pid in partners]),
305-
"4",
306-
"0 0 0 0 \0 0",
307-
"0.0 0.0 0.0",
308-
"1 1 0 0 \0 0",
309-
"0.0 0.0 0.0",
310-
"2 2 0 0 \0 0",
311-
"0.0 0.0 0.0",
312-
"3 3 0 0 \0 0",
313-
"0.0 0.0 0.0",
305+
str(iface_count),
306+
*[
307+
token
308+
for iface_idx in range(iface_count)
309+
for token in (
310+
f"{iface_idx} {iface_idx} 0 0 \0 0",
311+
"0.0 0.0 0.0",
312+
)
313+
],
314314
"0",
315315
"0",
316316
"0",
@@ -320,15 +320,19 @@ def _restart_block(mol_id: int, partners: list[int], coord) -> list[str]:
320320
]
321321

322322
restart_lines = [
323+
"#MolTemplates",
324+
"0 A 4",
325+
"1 H 3",
326+
"2 L 2",
323327
"#All Molecules and coordinates",
324328
"6 6",
325329
]
326-
restart_lines.extend(_restart_block(0, [1, 2], (0.0, 0.0, 0.0)))
327-
restart_lines.extend(_restart_block(1, [0, 2], (1.0, 0.0, 0.0)))
328-
restart_lines.extend(_restart_block(2, [0, 1], (0.0, 1.0, 0.0)))
329-
restart_lines.extend(_restart_block(3, [4, 5], (10.0, 10.0, 0.0)))
330-
restart_lines.extend(_restart_block(4, [3, 5], (10.0, 0.0, 0.0)))
331-
restart_lines.extend(_restart_block(5, [3, 4], (10.0, 11.0, 0.0)))
330+
restart_lines.extend(_restart_block(0, 4, [1, 2], (0.0, 0.0, 0.0)))
331+
restart_lines.extend(_restart_block(1, 3, [0, 2], (1.0, 0.0, 0.0)))
332+
restart_lines.extend(_restart_block(2, 2, [0, 1], (0.0, 1.0, 0.0)))
333+
restart_lines.extend(_restart_block(3, 4, [4, 5], (10.0, 10.0, 0.0)))
334+
restart_lines.extend(_restart_block(4, 3, [3, 5], (10.0, 0.0, 0.0)))
335+
restart_lines.extend(_restart_block(5, 2, [3, 4], (10.0, 11.0, 0.0)))
332336
restart_path.write_text("\n".join(restart_lines), encoding="utf-8")
333337

334338
observed = _extract_observed_com_coordinates(
@@ -385,24 +389,24 @@ def test_restart_snapshot_search_falls_back_to_restart_directory():
385389
encoding="utf-8",
386390
)
387391

388-
def _restart_block(mol_id: int, partners: list[int], coord) -> list[str]:
392+
def _restart_block(mol_id: int, iface_count: int, partners: list[int], coord) -> list[str]:
389393
bound_iface_line = " ".join([str(len(partners))] + [str(i) for i in range(len(partners))])
390394
return [
391395
f"{mol_id} 0 0 0 0",
392396
"1.0 0 0 0 0 0",
393397
f"{coord[0]} {coord[1]} {coord[2]}",
394-
"4 0 1 2 3",
398+
" ".join([str(iface_count)] + [str(i) for i in range(iface_count)]),
395399
bound_iface_line,
396400
" ".join([str(len(partners))] + [str(pid) for pid in partners]),
397-
"4",
398-
"0 0 0 0 \0 0",
399-
"0.0 0.0 0.0",
400-
"1 1 0 0 \0 0",
401-
"0.0 0.0 0.0",
402-
"2 2 0 0 \0 0",
403-
"0.0 0.0 0.0",
404-
"3 3 0 0 \0 0",
405-
"0.0 0.0 0.0",
401+
str(iface_count),
402+
*[
403+
token
404+
for iface_idx in range(iface_count)
405+
for token in (
406+
f"{iface_idx} {iface_idx} 0 0 \0 0",
407+
"0.0 0.0 0.0",
408+
)
409+
],
406410
"0",
407411
"0",
408412
"0",
@@ -412,21 +416,29 @@ def _restart_block(mol_id: int, partners: list[int], coord) -> list[str]:
412416
]
413417

414418
primary_lines = [
419+
"#MolTemplates",
420+
"0 A 4",
421+
"1 H 3",
422+
"2 L 2",
415423
"#All Molecules and coordinates",
416424
"3 3",
417425
]
418-
primary_lines.extend(_restart_block(0, [], (10.0, 0.0, 0.0)))
419-
primary_lines.extend(_restart_block(1, [], (20.0, 0.0, 0.0)))
420-
primary_lines.extend(_restart_block(2, [], (30.0, 0.0, 0.0)))
426+
primary_lines.extend(_restart_block(0, 4, [], (10.0, 0.0, 0.0)))
427+
primary_lines.extend(_restart_block(1, 3, [], (20.0, 0.0, 0.0)))
428+
primary_lines.extend(_restart_block(2, 2, [], (30.0, 0.0, 0.0)))
421429
primary_restart_path.write_text("\n".join(primary_lines), encoding="utf-8")
422430

423431
older_lines = [
432+
"#MolTemplates",
433+
"0 A 4",
434+
"1 H 3",
435+
"2 L 2",
424436
"#All Molecules and coordinates",
425437
"3 3",
426438
]
427-
older_lines.extend(_restart_block(0, [1, 2], (0.0, 0.0, 0.0)))
428-
older_lines.extend(_restart_block(1, [0, 2], (1.0, 0.0, 0.0)))
429-
older_lines.extend(_restart_block(2, [0, 1], (0.0, 1.0, 0.0)))
439+
older_lines.extend(_restart_block(0, 4, [1, 2], (0.0, 0.0, 0.0)))
440+
older_lines.extend(_restart_block(1, 3, [0, 2], (1.0, 0.0, 0.0)))
441+
older_lines.extend(_restart_block(2, 2, [0, 1], (0.0, 1.0, 0.0)))
430442
older_restart_path.write_text("\n".join(older_lines), encoding="utf-8")
431443

432444
observed, used_restart = _find_observed_com_coordinates_in_restart_snapshots(

0 commit comments

Comments
 (0)