@@ -832,6 +832,27 @@ def _find_matching_components(
832832 return matching_components
833833
834834
835+ def _build_observed_coordinate_map (
836+ mol_names : Sequence [str ],
837+ coords : Sequence [Sequence [float ]],
838+ target_counts : Mapping [str , int ],
839+ ) -> Dict [str , Tuple [float , float , float ]]:
840+ """Build the observed coordinate map with stable per-copy labels for repeated molecule types."""
841+ observed : Dict [str , Tuple [float , float , float ]] = {}
842+ type_counts : Dict [str , int ] = {}
843+ for mol_name , coord in zip (mol_names , coords ):
844+ copy_idx = type_counts .get (mol_name , 0 )
845+ type_counts [mol_name ] = copy_idx + 1
846+ key = f"{ mol_name } _{ copy_idx } " if target_counts .get (mol_name , 1 ) > 1 else mol_name
847+ observed [key ] = tuple (float (value ) for value in coord )
848+
849+ missing = sorted (set (target_counts ) - set (type_counts ))
850+ if missing :
851+ raise ValueError (f"Missing COM coordinates for molecule types: { missing } " )
852+
853+ return observed
854+
855+
835856def _restart_snapshot_sort_key (restart_path : Path ) -> Tuple [Tuple [int , ...], str ]:
836857 """Sort restart snapshot files from earliest to latest by embedded numeric suffixes."""
837858 numeric_parts = tuple (int (part ) for part in re .findall (r"\d+" , restart_path .stem ))
@@ -859,6 +880,77 @@ def _iter_restart_snapshot_candidates(primary_restart_file: Union[str, Path]) ->
859880 return candidates
860881
861882
883+ def _complex_snapshot_sort_key (snapshot_path : Path ) -> Tuple [Tuple [int , ...], str ]:
884+ """Sort COMPLEXES JSON snapshots by embedded numeric suffixes."""
885+ numeric_parts = tuple (int (part ) for part in re .findall (r"\d+" , snapshot_path .stem ))
886+ return numeric_parts , snapshot_path .name
887+
888+
889+ def _iter_complex_json_candidates (complexes_dir : Union [str , Path ]) -> list [Path ]:
890+ """Return COMPLEXES JSON snapshots newest-to-oldest."""
891+ directory = Path (complexes_dir )
892+ if not directory .is_dir ():
893+ return []
894+
895+ json_files = [path for path in directory .iterdir () if path .is_file () and path .suffix == ".json" ]
896+ return sorted (json_files , key = _complex_snapshot_sort_key , reverse = True )
897+
898+
899+ def _extract_observed_com_coordinates_from_complex_json (
900+ complex_json_file : Union [str , Path ],
901+ target_counts : Mapping [str , int ],
902+ ) -> Dict [str , Tuple [float , float , float ]]:
903+ """Extract one matching complex directly from a COMPLEXES JSON snapshot."""
904+ payload = json .loads (Path (complex_json_file ).read_text (encoding = "utf-8" ))
905+ if not isinstance (payload , list ):
906+ raise ValueError (f"Malformed COMPLEXES JSON snapshot: { complex_json_file } " )
907+
908+ matching_complexes : list [tuple [list [str ], list [Sequence [float ]]]] = []
909+ for complex_record in payload :
910+ if not isinstance (complex_record , dict ):
911+ continue
912+ names = complex_record .get ("names" )
913+ coords = complex_record .get ("coords" )
914+ if not isinstance (names , list ) or not isinstance (coords , list ) or len (names ) != len (coords ):
915+ continue
916+ if dict (Counter (str (name ) for name in names )) == dict (target_counts ):
917+ matching_complexes .append (([str (name ) for name in names ], coords ))
918+
919+ if not matching_complexes :
920+ raise ValueError (
921+ "No complex in the COMPLEXES JSON snapshot matches the designed "
922+ f"target composition { dict (target_counts )} ."
923+ )
924+
925+ selected_names , selected_coords = matching_complexes [0 ]
926+ if len (matching_complexes ) > 1 :
927+ warnings .warn (
928+ "Multiple full assemblies matching the validation target were present in the COMPLEXES "
929+ f"snapshot { Path (complex_json_file ).name } ; selecting the first matching complex." ,
930+ RuntimeWarning ,
931+ )
932+
933+ return _build_observed_coordinate_map (selected_names , selected_coords , target_counts )
934+
935+
936+ def _find_observed_com_coordinates_in_complex_json_snapshots (
937+ complexes_dir : Union [str , Path ],
938+ target_counts : Mapping [str , int ],
939+ ) -> Tuple [Dict [str , Tuple [float , float , float ]], Path ]:
940+ """Search COMPLEXES JSON snapshots newest-to-oldest for a matching full assembly."""
941+ errors : list [str ] = []
942+ for candidate_json in _iter_complex_json_candidates (complexes_dir ):
943+ try :
944+ observed = _extract_observed_com_coordinates_from_complex_json (candidate_json , target_counts )
945+ return observed , candidate_json
946+ except Exception as exc :
947+ errors .append (f"{ candidate_json .name } : { exc } " )
948+
949+ raise ValueError (
950+ "No matching assembly was found in DATA/COMPLEXES JSON snapshots. " + " | " .join (errors )
951+ )
952+
953+
862954def _extract_observed_com_coordinates (
863955 system_psf_file : Union [str , Path ],
864956 final_coords_file : Optional [Union [str , Path ]],
@@ -889,23 +981,9 @@ def _extract_observed_com_coordinates(
889981 )
890982
891983 observed = {}
892- type_counts : Dict [str , int ] = {}
893- for mol_id in selected_component :
894- mol_name = restart_mol_names [mol_id ]
895- copy_idx = type_counts .get (mol_name , 0 )
896- type_counts [mol_name ] = copy_idx + 1
897-
898- if target_counts .get (mol_name , 1 ) > 1 :
899- key = f"{ mol_name } _{ copy_idx } "
900- else :
901- key = mol_name
902-
903- observed [key ] = restart_coords [mol_id ]
904-
905- missing = sorted (set (target_counts ) - set (type_counts ))
906- if missing :
907- raise ValueError (f"Missing COM coordinates for molecule types: { missing } " )
908-
984+ selected_names = [restart_mol_names [mol_id ] for mol_id in selected_component ]
985+ selected_coords = [restart_coords [mol_id ] for mol_id in selected_component ]
986+ observed = _build_observed_coordinate_map (selected_names , selected_coords , target_counts )
909987 return observed
910988
911989
@@ -960,16 +1038,21 @@ def run_structure_validation_simulation(
9601038
9611039 simulation_dir = work_dir / sim_dir_name / str (sim_index )
9621040 histogram_file = simulation_dir / "DATA" / "histogram_complexes_time.dat"
1041+ complexes_dir = simulation_dir / "DATA" / "COMPLEXES"
9631042 final_coords_file = simulation_dir / "DATA" / "final_coords.xyz"
9641043 system_psf_file = simulation_dir / "DATA" / "system.psf"
9651044 restart_file = simulation_dir / "DATA" / "restart.dat"
9661045
967- hist_times , hist_comps , hist_matrix = parse_complex_histogram (histogram_file )
1046+ hist_times = np .asarray ([])
1047+ hist_comps = []
1048+ hist_matrix = None
1049+ if histogram_file .exists ():
1050+ hist_times , hist_comps , hist_matrix = parse_complex_histogram (histogram_file )
9681051 target_comp = dict (artifacts .target_counts )
9691052
9701053 first_full_assembly_time = None
9711054 full_assembly_found = False
972- if len (hist_times ) > 0 :
1055+ if len (hist_times ) > 0 and hist_matrix is not None :
9731056 for col_idx , comp in enumerate (hist_comps ):
9741057 if comp == target_comp :
9751058 counts = np .asarray (hist_matrix [:, [col_idx ]].todense ()).ravel ()
@@ -982,26 +1065,53 @@ def run_structure_validation_simulation(
9821065 warning_message = None
9831066 observed_coordinates = None
9841067 selected_restart_file = restart_file
985- if full_assembly_found :
1068+ complex_json_candidates = _iter_complex_json_candidates (complexes_dir )
1069+ if complex_json_candidates :
1070+ try :
1071+ observed_coordinates , selected_restart_file = _find_observed_com_coordinates_in_complex_json_snapshots (
1072+ complexes_dir = complexes_dir ,
1073+ target_counts = artifacts .target_counts ,
1074+ )
1075+ full_assembly_found = True
1076+ except Exception as exc :
1077+ warning_message = (
1078+ "Validation warning: no full assembly matching the designed one-copy target "
1079+ f"{ target_comp } was found in { complexes_dir } ."
1080+ )
1081+ warnings .warn (warning_message , RuntimeWarning )
1082+ else :
1083+ fallback_warning = (
1084+ "Validation warning: no COMPLEXES JSON snapshots were found in "
1085+ f"{ complexes_dir } ; falling back to restart snapshots."
1086+ )
1087+ warnings .warn (fallback_warning , RuntimeWarning )
1088+ warning_message = fallback_warning
9861089 try :
9871090 observed_coordinates , selected_restart_file = _find_observed_com_coordinates_in_restart_snapshots (
9881091 system_psf_file = system_psf_file ,
9891092 final_coords_file = final_coords_file ,
9901093 restart_file = restart_file ,
9911094 target_counts = artifacts .target_counts ,
9921095 )
993- except Exception as exc :
994- raise ValueError (
995- "The target composition appeared in the histogram, but no matching connected "
996- "assembly was found in DATA/restart.dat or any RESTART snapshot. "
997- f"{ exc } "
998- ) from exc
999- else :
1000- warning_message = (
1096+ full_assembly_found = True
1097+ except Exception :
1098+ full_assembly_found = False
1099+
1100+ if not full_assembly_found :
1101+ no_assembly_warning = (
10011102 "Validation warning: no full assembly matching the designed one-copy target "
1002- f"{ target_comp } was found in { histogram_file } ."
1103+ f"{ target_comp } was found in { complexes_dir if complex_json_candidates else restart_file } ."
1104+ )
1105+ warning_message = (
1106+ no_assembly_warning
1107+ if warning_message is None
1108+ else f"{ warning_message } { no_assembly_warning } "
10031109 )
1004- warnings .warn (warning_message , RuntimeWarning )
1110+ if complex_json_candidates :
1111+ warnings .warn (no_assembly_warning , RuntimeWarning )
1112+ else :
1113+ if warning_message is not None and not warning_message .endswith ("snapshots." ):
1114+ warnings .warn (warning_message , RuntimeWarning )
10051115
10061116 return StructureValidationSimulationResult (
10071117 simulation_dir = simulation_dir ,
0 commit comments