From 1d84a80bd4b1bde0d8972849d445c93ee1fafabb Mon Sep 17 00:00:00 2001 From: bbh-pharm Date: Mon, 16 Feb 2026 23:00:34 +0900 Subject: [PATCH 01/13] feat(wrapper/rdkit): add read_mols function --- src/gmol/base/wrapper/rdkit.py | 62 ++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/src/gmol/base/wrapper/rdkit.py b/src/gmol/base/wrapper/rdkit.py index 7e51c7c..fe6af7b 100644 --- a/src/gmol/base/wrapper/rdkit.py +++ b/src/gmol/base/wrapper/rdkit.py @@ -3,6 +3,7 @@ from collections.abc import Iterable from itertools import count from pathlib import Path +from typing import cast from rdkit import Chem from rdkit.Chem import AllChem @@ -105,6 +106,67 @@ def generate_conformer( raise +def read_mols( + file_path: Path | str, + sanitize: bool = True, + remove_h: bool = False, +) -> list[Chem.Mol]: + """Read a molecular file into a list of RDKit Mol objects. + + Supported formats: ``.mol2``, ``.sdf``, ``.pdb``. + + :param file_path: Path to the molecular file. + :param sanitize: If True, sanitize each molecule after reading. + :param remove_h: If True, remove explicit hydrogens from each molecule. + :returns: List of RDKit Mol objects; entries that failed to parse are omitted. + """ + file_path = Path(file_path) + + if not file_path.exists(): + raise FileNotFoundError(f"File not found: {file_path}") + + ext = file_path.suffix.lower() + + try: + if ext == ".mol2": + mol = cast( + Chem.Mol | None, + Chem.MolFromMol2File( + str(file_path), sanitize=False, removeHs=False + ), + ) + mols = [mol] if mol is not None else [] + elif ext == ".sdf": + with Chem.SDMolSupplier( + str(file_path), sanitize=False, removeHs=False + ) as suppl: + mols = [m for m in suppl if m is not None] # pyright: ignore[reportUnnecessaryComparison] + elif ext == ".pdb": + mol = cast( + Chem.Mol | None, + Chem.MolFromPDBFile( + str(file_path), sanitize=False, removeHs=False + ), + ) + mols = [mol] if mol is not None else [] + else: + raise ValueError( + f"Unsupported file extension '{ext}'. Supported formats: .mol2, .sdf, .pdbqt, .pdb." + ) + + if sanitize: + for mol in mols: + Chem.SanitizeMol(mol) + + if remove_h: + mols = [Chem.RemoveHs(mol, sanitize=sanitize) for mol in mols] + + except Exception as exc: + raise ValueError(f"Error processing molecule: {exc}") from exc + + return mols + + _end_re = re.compile(r"^END\s+", re.MULTILINE) From 97e11a502ff2d1ac8e0b2ca4933d17f4d7b98016 Mon Sep 17 00:00:00 2001 From: bbh-pharm Date: Mon, 16 Feb 2026 23:01:11 +0900 Subject: [PATCH 02/13] feat(test): add ligand test data and rdkit tests for reading and writing ligands --- tests/test_data/ligands/5dwr_ligand.mol2 | 143 +++++++++++++++++++++++ tests/test_data/ligands/5dwr_ligand.pdb | 116 ++++++++++++++++++ tests/test_data/ligands/5dwr_ligand.sdf | 122 +++++++++++++++++++ tests/test_data/ligands/8a0b_ligand.mol2 | 58 +++++++++ tests/test_data/ligands/8a0b_ligand.pdb | 48 ++++++++ tests/test_data/ligands/8a0b_ligand.sdf | 54 +++++++++ tests/wrapper/rdkit_test.py | 139 +++++++++++++++++++++- 7 files changed, 679 insertions(+), 1 deletion(-) create mode 100644 tests/test_data/ligands/5dwr_ligand.mol2 create mode 100644 tests/test_data/ligands/5dwr_ligand.pdb create mode 100644 tests/test_data/ligands/5dwr_ligand.sdf create mode 100644 tests/test_data/ligands/8a0b_ligand.mol2 create mode 100644 tests/test_data/ligands/8a0b_ligand.pdb create mode 100644 tests/test_data/ligands/8a0b_ligand.sdf diff --git a/tests/test_data/ligands/5dwr_ligand.mol2 b/tests/test_data/ligands/5dwr_ligand.mol2 new file mode 100644 index 0000000..ab7274f --- /dev/null +++ b/tests/test_data/ligands/5dwr_ligand.mol2 @@ -0,0 +1,143 @@ +# Name: 5dwr_ligand +# Creating user name: suminyi +# Creation time: Sun Sep 30 06:48:38 2018 + +# Modifying user name: suminyi +# Modification time: Sun Sep 30 06:48:38 2018 + +@MOLECULE +5dwr_ligand + 56 59 1 1 0 +SMALL +MMFF94_CHARGES + + +@ATOM + 1 C1 -40.9660 -1.8740 4.5420 C.3 1 MOL 0.0000 + 2 C2 -40.8620 -3.0170 5.5460 C.3 1 MOL 0.0000 + 3 C3 -40.7410 -4.4010 4.8800 C.3 1 MOL 0.5030 + 4 C11 -39.0090 -2.8770 0.4690 C.ar 1 MOL 0.1170 + 5 C12 -40.9940 -2.5110 -1.0020 C.2 1 MOL 0.5438 + 6 C13 -42.4570 -2.2000 -0.8760 C.ar 1 MOL 0.3962 + 7 C14 -43.2690 -2.2150 -2.0050 C.ar 1 MOL -0.1500 + 8 C15 -44.6380 -2.1050 -1.8270 C.ar 1 MOL -0.1500 + 9 C16 -45.1360 -1.9630 -0.5540 C.ar 1 MOL 0.1900 + 10 C17 -44.2610 -1.9200 0.5320 C.ar 1 MOL 0.3100 + 11 C18 -44.7350 -1.7710 1.9400 C.ar 1 MOL 0.0000 + 12 C19 -45.6500 -0.7960 2.3210 C.ar 1 MOL 0.1900 + 13 C20 -46.1530 -0.6720 3.5920 C.ar 1 MOL -0.1500 + 14 C21 -45.7270 -1.5680 4.5600 C.ar 1 MOL -0.1500 + 15 C22 -44.8000 -2.5500 4.2510 C.ar 1 MOL -0.1500 + 16 C23 -44.3300 -2.6200 2.9600 C.ar 1 MOL 0.1900 + 17 F2 -43.3970 -3.5570 2.6630 F 1 MOL -0.1900 + 18 F1 -46.0540 0.0830 1.3780 F 1 MOL -0.1900 + 19 F -46.4730 -1.8840 -0.3750 F 1 MOL -0.1900 + 20 N3 -42.9350 -2.0310 0.3670 N.ar 1 MOL -0.6200 + 21 O -40.5010 -2.8530 -2.0760 O.2 1 MOL -0.5700 + 22 N2 -40.3140 -2.4310 0.1650 N.am 1 MOL -0.5470 + 23 C10 -37.9850 -2.8640 -0.4850 C.ar 1 MOL 0.1600 + 24 N1 -36.7290 -3.2430 -0.2500 N.ar 1 MOL -0.6200 + 25 C9 -36.4530 -3.6660 0.9820 C.ar 1 MOL 0.1600 + 26 C8 -37.3800 -3.7140 2.0020 C.ar 1 MOL -0.1500 + 27 C7 -38.6900 -3.3130 1.7630 C.ar 1 MOL -0.1435 + 28 C5 -39.7190 -3.2860 2.8850 C.3 1 MOL 0.1435 + 29 C4 -39.5270 -4.3670 3.9490 C.3 1 MOL 0.0000 + 30 N -40.5840 -5.4540 5.8860 N.4 1 MOL -0.8530 + 31 C6 -39.7900 -1.9210 3.5720 C.3 1 MOL 0.0000 + 32 C -41.0440 -0.5240 5.2440 C.3 1 MOL 0.0000 + 33 H1 -41.8925 -2.0091 3.9647 H 1 MOL 0.0000 + 34 H2 -41.7626 -3.0106 6.1776 H 1 MOL 0.0000 + 35 H3 -39.9733 -2.8519 6.1729 H 1 MOL 0.0000 + 36 H4 -41.6484 -4.6036 4.2922 H 1 MOL 0.0000 + 37 H5 -42.8421 -2.3102 -2.9969 H 1 MOL 0.1500 + 38 H6 -45.3071 -2.1305 -2.6795 H 1 MOL 0.1500 + 39 H7 -46.8663 0.1079 3.8330 H 1 MOL 0.1500 + 40 H8 -46.1224 -1.4998 5.5670 H 1 MOL 0.1500 + 41 H9 -44.4539 -3.2454 5.0071 H 1 MOL 0.1500 + 42 H10 -40.8049 -1.9943 0.9189 H 1 MOL 0.3700 + 43 H11 -38.2304 -2.5221 -1.4840 H 1 MOL 0.1500 + 44 H12 -35.4413 -3.9914 1.1956 H 1 MOL 0.1500 + 45 H13 -37.0887 -4.0629 2.9861 H 1 MOL 0.1500 + 46 H14 -40.6958 -3.4691 2.4136 H 1 MOL 0.0000 + 47 H15 -39.4152 -5.3455 3.4591 H 1 MOL 0.0000 + 48 H16 -38.6233 -4.1452 4.5356 H 1 MOL 0.0000 + 49 H17 -41.3904 -5.4638 6.4908 H 1 MOL 0.4500 + 50 H18 -39.7562 -5.2757 6.4327 H 1 MOL 0.4500 + 51 H19 -40.4975 -6.3472 5.4269 H 1 MOL 0.4500 + 52 H20 -38.8561 -1.7452 4.1260 H 1 MOL 0.0000 + 53 H21 -39.9173 -1.1380 2.8099 H 1 MOL 0.0000 + 54 H22 -41.8994 -0.5193 5.9356 H 1 MOL 0.0000 + 55 H23 -41.1733 0.2716 4.4955 H 1 MOL 0.0000 + 56 H24 -40.1157 -0.3497 5.8078 H 1 MOL 0.0000 +@BOND + 1 2 1 1 + 2 1 31 1 + 3 1 32 1 + 4 3 2 1 + 5 29 3 1 + 6 3 30 1 + 7 4 22 1 + 8 4 23 ar + 9 27 4 ar + 10 5 6 1 + 11 5 21 2 + 12 22 5 am + 13 6 7 ar + 14 6 20 ar + 15 7 8 ar + 16 8 9 ar + 17 10 9 ar + 18 9 19 1 + 19 10 11 1 + 20 20 10 ar + 21 11 12 ar + 22 11 16 ar + 23 12 13 ar + 24 12 18 1 + 25 13 14 ar + 26 15 14 ar + 27 16 15 ar + 28 16 17 1 + 29 23 24 ar + 30 25 24 ar + 31 26 25 ar + 32 27 26 ar + 33 28 27 1 + 34 28 29 1 + 35 28 31 1 + 36 1 33 1 + 37 2 34 1 + 38 2 35 1 + 39 3 36 1 + 40 7 37 1 + 41 8 38 1 + 42 13 39 1 + 43 14 40 1 + 44 15 41 1 + 45 22 42 1 + 46 23 43 1 + 47 25 44 1 + 48 26 45 1 + 49 28 46 1 + 50 29 47 1 + 51 29 48 1 + 52 30 49 1 + 53 30 50 1 + 54 30 51 1 + 55 31 52 1 + 56 31 53 1 + 57 32 54 1 + 58 32 55 1 + 59 32 56 1 +@SUBSTRUCTURE + 1 MOL 1 PERM 0 **** **** 0 ROOT +@NORMAL +@ALT_TYPE +MMFF94_ALT_TYPE_SET +MMFF94 1 CR 2 CR 3 CR 28 CR 29 CR 31 CR 32 CR 4 CB 6 CB 7 CB 8 CB 9 CB \ +10 CB 11 CB 12 CB 13 CB 14 CB 15 CB 16 CB 23 CB 25 CB \ +26 CB 27 CB 5 C=ON 33 HC 34 HC 35 HC 36 HC 37 HC 38 HC \ +39 HC 40 HC 41 HC 43 HC 44 HC 45 HC 46 HC 47 HC 48 HC \ +52 HC 53 HC 54 HC 55 HC 56 HC 42 HNCC 49 HNR+ 50 HNR+ \ +51 HNR+ 21 O=CN 20 NPYD 24 NPYD 30 NR+ 22 NC=O 17 F \ +18 F 19 F diff --git a/tests/test_data/ligands/5dwr_ligand.pdb b/tests/test_data/ligands/5dwr_ligand.pdb new file mode 100644 index 0000000..50bb8a6 --- /dev/null +++ b/tests/test_data/ligands/5dwr_ligand.pdb @@ -0,0 +1,116 @@ +COMPND 5dwr_ligand +AUTHOR GENERATED BY OPEN BABEL 3.1.0 +ATOM 1 C1 MOL A 1 -40.966 -1.874 4.542 1.00 0.00 C +ATOM 2 C2 MOL A 1 -40.862 -3.017 5.546 1.00 0.00 C +ATOM 3 C3 MOL A 1 -40.741 -4.401 4.880 1.00 0.00 C +ATOM 4 C11 MOL A 1 -39.009 -2.877 0.469 1.00 0.00 C +ATOM 5 C12 MOL A 1 -40.994 -2.511 -1.002 1.00 0.00 C +ATOM 6 C13 MOL A 1 -42.457 -2.200 -0.876 1.00 0.00 C +ATOM 7 C14 MOL A 1 -43.269 -2.215 -2.005 1.00 0.00 C +ATOM 8 C15 MOL A 1 -44.638 -2.105 -1.827 1.00 0.00 C +ATOM 9 C16 MOL A 1 -45.136 -1.963 -0.554 1.00 0.00 C +ATOM 10 C17 MOL A 1 -44.261 -1.920 0.532 1.00 0.00 C +ATOM 11 C18 MOL A 1 -44.735 -1.771 1.940 1.00 0.00 C +ATOM 12 C19 MOL A 1 -45.650 -0.796 2.321 1.00 0.00 C +ATOM 13 C20 MOL A 1 -46.153 -0.672 3.592 1.00 0.00 C +ATOM 14 C21 MOL A 1 -45.727 -1.568 4.560 1.00 0.00 C +ATOM 15 C22 MOL A 1 -44.800 -2.550 4.251 1.00 0.00 C +ATOM 16 C23 MOL A 1 -44.330 -2.620 2.960 1.00 0.00 C +ATOM 17 F2 MOL A 1 -43.397 -3.557 2.663 1.00 0.00 F +ATOM 18 F1 MOL A 1 -46.054 0.083 1.378 1.00 0.00 F +ATOM 19 F MOL A 1 -46.473 -1.884 -0.375 1.00 0.00 F +ATOM 20 N3 MOL A 1 -42.935 -2.031 0.367 1.00 0.00 N +ATOM 21 O MOL A 1 -40.501 -2.853 -2.076 1.00 0.00 O +ATOM 22 N2 MOL A 1 -40.314 -2.431 0.165 1.00 0.00 N +ATOM 23 C10 MOL A 1 -37.985 -2.864 -0.485 1.00 0.00 C +ATOM 24 N1 MOL A 1 -36.729 -3.243 -0.250 1.00 0.00 N +ATOM 25 C9 MOL A 1 -36.453 -3.666 0.982 1.00 0.00 C +ATOM 26 C8 MOL A 1 -37.380 -3.714 2.002 1.00 0.00 C +ATOM 27 C7 MOL A 1 -38.690 -3.313 1.763 1.00 0.00 C +ATOM 28 C5 MOL A 1 -39.719 -3.286 2.885 1.00 0.00 C +ATOM 29 C4 MOL A 1 -39.527 -4.367 3.949 1.00 0.00 C +ATOM 30 N MOL A 1 -40.584 -5.454 5.886 1.00 0.00 N1+ +ATOM 31 C6 MOL A 1 -39.790 -1.921 3.572 1.00 0.00 C +ATOM 32 C MOL A 1 -41.044 -0.524 5.244 1.00 0.00 C +ATOM 33 H1 MOL A 1 -41.892 -2.009 3.965 1.00 0.00 H +ATOM 34 H2 MOL A 1 -41.763 -3.011 6.178 1.00 0.00 H +ATOM 35 H3 MOL A 1 -39.973 -2.852 6.173 1.00 0.00 H +ATOM 36 H4 MOL A 1 -41.648 -4.604 4.292 1.00 0.00 H +ATOM 37 H5 MOL A 1 -42.842 -2.310 -2.997 1.00 0.00 H +ATOM 38 H6 MOL A 1 -45.307 -2.131 -2.679 1.00 0.00 H +ATOM 39 H7 MOL A 1 -46.866 0.108 3.833 1.00 0.00 H +ATOM 40 H8 MOL A 1 -46.122 -1.500 5.567 1.00 0.00 H +ATOM 41 H9 MOL A 1 -44.454 -3.245 5.007 1.00 0.00 H +ATOM 42 H10 MOL A 1 -40.805 -1.994 0.919 1.00 0.00 H +ATOM 43 H11 MOL A 1 -38.230 -2.522 -1.484 1.00 0.00 H +ATOM 44 H12 MOL A 1 -35.441 -3.991 1.196 1.00 0.00 H +ATOM 45 H13 MOL A 1 -37.089 -4.063 2.986 1.00 0.00 H +ATOM 46 H14 MOL A 1 -40.696 -3.469 2.414 1.00 0.00 H +ATOM 47 H15 MOL A 1 -39.415 -5.346 3.459 1.00 0.00 H +ATOM 48 H16 MOL A 1 -38.623 -4.145 4.536 1.00 0.00 H +ATOM 49 H17 MOL A 1 -41.390 -5.464 6.491 1.00 0.00 H +ATOM 50 H18 MOL A 1 -39.756 -5.276 6.433 1.00 0.00 H +ATOM 51 H19 MOL A 1 -40.498 -6.347 5.427 1.00 0.00 H +ATOM 52 H20 MOL A 1 -38.856 -1.745 4.126 1.00 0.00 H +ATOM 53 H21 MOL A 1 -39.917 -1.138 2.810 1.00 0.00 H +ATOM 54 H22 MOL A 1 -41.899 -0.519 5.936 1.00 0.00 H +ATOM 55 H23 MOL A 1 -41.173 0.272 4.495 1.00 0.00 H +ATOM 56 H24 MOL A 1 -40.116 -0.350 5.808 1.00 0.00 H +CONECT 1 2 31 32 33 +CONECT 2 1 3 34 35 +CONECT 3 2 29 30 36 +CONECT 4 22 23 23 27 +CONECT 5 6 21 21 22 +CONECT 6 5 7 7 20 +CONECT 7 6 6 8 37 +CONECT 8 7 9 9 38 +CONECT 9 8 8 10 19 +CONECT 10 9 11 20 20 +CONECT 11 10 12 12 16 +CONECT 12 11 11 13 18 +CONECT 13 12 14 14 39 +CONECT 14 13 13 15 40 +CONECT 15 14 16 16 41 +CONECT 16 11 15 15 17 +CONECT 17 16 +CONECT 18 12 +CONECT 19 9 +CONECT 20 6 10 10 +CONECT 21 5 5 +CONECT 22 4 5 42 +CONECT 23 4 4 24 43 +CONECT 24 23 25 25 +CONECT 25 24 24 26 44 +CONECT 26 25 27 27 45 +CONECT 27 4 26 26 28 +CONECT 28 27 29 31 46 +CONECT 29 3 28 47 48 +CONECT 30 3 49 50 51 +CONECT 31 1 28 52 53 +CONECT 32 1 54 55 56 +CONECT 33 1 +CONECT 34 2 +CONECT 35 2 +CONECT 36 3 +CONECT 37 7 +CONECT 38 8 +CONECT 39 13 +CONECT 40 14 +CONECT 41 15 +CONECT 42 22 +CONECT 43 23 +CONECT 44 25 +CONECT 45 26 +CONECT 46 28 +CONECT 47 29 +CONECT 48 29 +CONECT 49 30 +CONECT 50 30 +CONECT 51 30 +CONECT 52 31 +CONECT 53 31 +CONECT 54 32 +CONECT 55 32 +CONECT 56 32 +MASTER 0 0 0 0 0 0 0 0 56 0 56 0 +END diff --git a/tests/test_data/ligands/5dwr_ligand.sdf b/tests/test_data/ligands/5dwr_ligand.sdf new file mode 100644 index 0000000..d6323f4 --- /dev/null +++ b/tests/test_data/ligands/5dwr_ligand.sdf @@ -0,0 +1,122 @@ +5dwr_ligand + OpenBabel02162622323D + + 56 59 0 0 1 0 0 0 0 0999 V2000 + -40.9660 -1.8740 4.5420 C 0 0 2 0 0 0 0 0 0 0 0 0 + -40.8620 -3.0170 5.5460 C 0 0 0 0 0 0 0 0 0 0 0 0 + -40.7410 -4.4010 4.8800 C 0 0 1 0 0 0 0 0 0 0 0 0 + -39.0090 -2.8770 0.4690 C 0 0 0 0 0 0 0 0 0 0 0 0 + -40.9940 -2.5110 -1.0020 C 0 0 0 0 0 0 0 0 0 0 0 0 + -42.4570 -2.2000 -0.8760 C 0 0 0 0 0 0 0 0 0 0 0 0 + -43.2690 -2.2150 -2.0050 C 0 0 0 0 0 0 0 0 0 0 0 0 + -44.6380 -2.1050 -1.8270 C 0 0 0 0 0 0 0 0 0 0 0 0 + -45.1360 -1.9630 -0.5540 C 0 0 0 0 0 0 0 0 0 0 0 0 + -44.2610 -1.9200 0.5320 C 0 0 0 0 0 0 0 0 0 0 0 0 + -44.7350 -1.7710 1.9400 C 0 0 0 0 0 0 0 0 0 0 0 0 + -45.6500 -0.7960 2.3210 C 0 0 0 0 0 0 0 0 0 0 0 0 + -46.1530 -0.6720 3.5920 C 0 0 0 0 0 0 0 0 0 0 0 0 + -45.7270 -1.5680 4.5600 C 0 0 0 0 0 0 0 0 0 0 0 0 + -44.8000 -2.5500 4.2510 C 0 0 0 0 0 0 0 0 0 0 0 0 + -44.3300 -2.6200 2.9600 C 0 0 0 0 0 0 0 0 0 0 0 0 + -43.3970 -3.5570 2.6630 F 0 0 0 0 0 0 0 0 0 0 0 0 + -46.0540 0.0830 1.3780 F 0 0 0 0 0 0 0 0 0 0 0 0 + -46.4730 -1.8840 -0.3750 F 0 0 0 0 0 0 0 0 0 0 0 0 + -42.9350 -2.0310 0.3670 N 0 0 0 0 0 0 0 0 0 0 0 0 + -40.5010 -2.8530 -2.0760 O 0 0 0 0 0 0 0 0 0 0 0 0 + -40.3140 -2.4310 0.1650 N 0 0 0 0 0 0 0 0 0 0 0 0 + -37.9850 -2.8640 -0.4850 C 0 0 0 0 0 0 0 0 0 0 0 0 + -36.7290 -3.2430 -0.2500 N 0 0 0 0 0 0 0 0 0 0 0 0 + -36.4530 -3.6660 0.9820 C 0 0 0 0 0 0 0 0 0 0 0 0 + -37.3800 -3.7140 2.0020 C 0 0 0 0 0 0 0 0 0 0 0 0 + -38.6900 -3.3130 1.7630 C 0 0 0 0 0 0 0 0 0 0 0 0 + -39.7190 -3.2860 2.8850 C 0 0 1 0 0 0 0 0 0 0 0 0 + -39.5270 -4.3670 3.9490 C 0 0 0 0 0 0 0 0 0 0 0 0 + -40.5840 -5.4540 5.8860 N 0 3 0 0 0 0 0 0 0 0 0 0 + -39.7900 -1.9210 3.5720 C 0 0 0 0 0 0 0 0 0 0 0 0 + -41.0440 -0.5240 5.2440 C 0 0 0 0 0 0 0 0 0 0 0 0 + -41.8925 -2.0091 3.9647 H 0 0 0 0 0 0 0 0 0 0 0 0 + -41.7626 -3.0106 6.1776 H 0 0 0 0 0 0 0 0 0 0 0 0 + -39.9733 -2.8519 6.1729 H 0 0 0 0 0 0 0 0 0 0 0 0 + -41.6484 -4.6036 4.2922 H 0 0 0 0 0 0 0 0 0 0 0 0 + -42.8421 -2.3102 -2.9969 H 0 0 0 0 0 0 0 0 0 0 0 0 + -45.3071 -2.1305 -2.6795 H 0 0 0 0 0 0 0 0 0 0 0 0 + -46.8663 0.1079 3.8330 H 0 0 0 0 0 0 0 0 0 0 0 0 + -46.1224 -1.4998 5.5670 H 0 0 0 0 0 0 0 0 0 0 0 0 + -44.4539 -3.2454 5.0071 H 0 0 0 0 0 0 0 0 0 0 0 0 + -40.8049 -1.9943 0.9189 H 0 0 0 0 0 0 0 0 0 0 0 0 + -38.2304 -2.5221 -1.4840 H 0 0 0 0 0 0 0 0 0 0 0 0 + -35.4413 -3.9914 1.1956 H 0 0 0 0 0 0 0 0 0 0 0 0 + -37.0887 -4.0629 2.9861 H 0 0 0 0 0 0 0 0 0 0 0 0 + -40.6958 -3.4691 2.4136 H 0 0 0 0 0 0 0 0 0 0 0 0 + -39.4152 -5.3455 3.4591 H 0 0 0 0 0 0 0 0 0 0 0 0 + -38.6233 -4.1452 4.5356 H 0 0 0 0 0 0 0 0 0 0 0 0 + -41.3904 -5.4638 6.4908 H 0 0 0 0 0 0 0 0 0 0 0 0 + -39.7562 -5.2757 6.4327 H 0 0 0 0 0 0 0 0 0 0 0 0 + -40.4975 -6.3472 5.4269 H 0 0 0 0 0 0 0 0 0 0 0 0 + -38.8561 -1.7452 4.1260 H 0 0 0 0 0 0 0 0 0 0 0 0 + -39.9173 -1.1380 2.8099 H 0 0 0 0 0 0 0 0 0 0 0 0 + -41.8994 -0.5193 5.9356 H 0 0 0 0 0 0 0 0 0 0 0 0 + -41.1733 0.2716 4.4955 H 0 0 0 0 0 0 0 0 0 0 0 0 + -40.1157 -0.3497 5.8078 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 31 1 0 0 0 0 + 1 32 1 0 0 0 0 + 1 33 1 6 0 0 0 + 2 1 1 0 0 0 0 + 2 34 1 0 0 0 0 + 2 35 1 0 0 0 0 + 3 2 1 0 0 0 0 + 3 30 1 0 0 0 0 + 3 36 1 6 0 0 0 + 4 22 1 0 0 0 0 + 4 23 2 0 0 0 0 + 5 6 1 0 0 0 0 + 5 21 2 0 0 0 0 + 6 7 2 0 0 0 0 + 6 20 1 0 0 0 0 + 7 8 1 0 0 0 0 + 7 37 1 0 0 0 0 + 8 9 2 0 0 0 0 + 8 38 1 0 0 0 0 + 9 19 1 0 0 0 0 + 10 9 1 0 0 0 0 + 10 11 1 0 0 0 0 + 11 12 2 0 0 0 0 + 11 16 1 0 0 0 0 + 12 13 1 0 0 0 0 + 12 18 1 0 0 0 0 + 13 14 2 0 0 0 0 + 13 39 1 0 0 0 0 + 14 40 1 0 0 0 0 + 15 14 1 0 0 0 0 + 15 41 1 0 0 0 0 + 16 15 2 0 0 0 0 + 16 17 1 0 0 0 0 + 20 10 2 0 0 0 0 + 22 5 1 0 0 0 0 + 22 42 1 0 0 0 0 + 23 24 1 0 0 0 0 + 23 43 1 0 0 0 0 + 25 24 2 0 0 0 0 + 25 44 1 0 0 0 0 + 26 25 1 0 0 0 0 + 26 45 1 0 0 0 0 + 27 4 1 0 0 0 0 + 27 26 2 0 0 0 0 + 28 27 1 0 0 0 0 + 28 29 1 0 0 0 0 + 28 31 1 0 0 0 0 + 28 46 1 6 0 0 0 + 29 3 1 0 0 0 0 + 29 47 1 0 0 0 0 + 29 48 1 0 0 0 0 + 30 49 1 0 0 0 0 + 30 50 1 0 0 0 0 + 30 51 1 0 0 0 0 + 31 52 1 0 0 0 0 + 31 53 1 0 0 0 0 + 32 54 1 0 0 0 0 + 32 55 1 0 0 0 0 + 32 56 1 0 0 0 0 +M CHG 1 30 1 +M END +$$$$ diff --git a/tests/test_data/ligands/8a0b_ligand.mol2 b/tests/test_data/ligands/8a0b_ligand.mol2 new file mode 100644 index 0000000..34d1ac4 --- /dev/null +++ b/tests/test_data/ligands/8a0b_ligand.mol2 @@ -0,0 +1,58 @@ +@MOLECULE +1.H + 22 25 0 0 0 +SMALL +GASTEIGER + +@ATOM + 1 C 21.4370 -17.8690 -0.8150 C.2 1 UNL1 0.2832 + 2 C 21.0060 -19.3130 -0.6580 C.3 1 UNL1 0.2211 + 3 C 19.4340 -20.3360 0.9110 C.3 1 UNL1 0.0917 + 4 C 18.6730 -19.6930 -0.2290 C.3 1 UNL1 0.2055 + 5 C 18.7640 -20.2730 2.2750 C.ar 1 UNL1 -0.0163 + 6 C 18.9240 -20.1640 4.6780 C.ar 1 UNL1 -0.0003 + 7 C 17.5600 -20.0910 4.7720 C.ar 1 UNL1 -0.0000 + 8 C 16.7920 -20.1690 3.6530 C.ar 1 UNL1 -0.0003 + 9 C 17.3870 -20.2370 2.3920 C.ar 1 UNL1 -0.0041 + 10 C 23.7710 -18.6170 -0.5230 C.3 1 UNL1 0.1204 + 11 C 25.0230 -17.7860 -0.4980 C.ar 1 UNL1 -0.0047 + 12 C 27.0690 -15.9180 -0.5410 C.ar 1 UNL1 -0.0003 + 13 C 24.7330 -16.4420 -0.6890 C.ar 1 UNL1 -0.0047 + 14 C 23.2520 -16.2330 -0.8420 C.3 1 UNL1 0.1204 + 15 O 20.6140 -16.9880 -1.0130 O.2 1 UNL1 -0.2685 + 16 C 20.7870 -19.6370 0.8320 C.3 1 UNL1 0.0829 + 17 N 19.6900 -19.5550 -1.3180 N.4 1 UNL1 0.4508 + 18 C 19.5220 -20.2330 3.4290 C.ar 1 UNL1 -0.0041 + 19 N 22.7330 -17.5980 -0.7330 N.am 1 UNL1 -0.2666 + 20 C 26.3280 -18.2060 -0.3590 C.ar 1 UNL1 -0.0029 + 21 C 27.3550 -17.2670 -0.3560 C.ar 1 UNL1 -0.0003 + 22 C 25.7530 -15.5020 -0.6940 C.ar 1 UNL1 -0.0029 +@UNITY_ATOM_ATTR +17 1 +charge 1 +@BOND + 1 2 1 1 + 2 1 15 2 + 3 1 19 am + 4 16 2 1 + 5 17 2 1 + 6 3 4 1 + 7 3 5 1 + 8 3 16 1 + 9 4 17 1 + 10 5 9 ar + 11 5 18 ar + 12 6 7 ar + 13 6 18 ar + 14 7 8 ar + 15 8 9 ar + 16 10 11 1 + 17 10 19 1 + 18 11 13 ar + 19 11 20 ar + 20 12 21 ar + 21 12 22 ar + 22 13 14 1 + 23 13 22 ar + 24 14 19 1 + 25 20 21 ar diff --git a/tests/test_data/ligands/8a0b_ligand.pdb b/tests/test_data/ligands/8a0b_ligand.pdb new file mode 100644 index 0000000..f127419 --- /dev/null +++ b/tests/test_data/ligands/8a0b_ligand.pdb @@ -0,0 +1,48 @@ +COMPND 1.H +AUTHOR GENERATED BY OPEN BABEL 3.1.0 +HETATM 1 C UNL 1 21.437 -17.869 -0.815 1.00 0.00 C +HETATM 2 C UNL 1 21.006 -19.313 -0.658 1.00 0.00 C +HETATM 3 C UNL 1 19.434 -20.336 0.911 1.00 0.00 C +HETATM 4 C UNL 1 18.673 -19.693 -0.229 1.00 0.00 C +HETATM 5 C UNL 1 18.764 -20.273 2.275 1.00 0.00 C +HETATM 6 C UNL 1 18.924 -20.164 4.678 1.00 0.00 C +HETATM 7 C UNL 1 17.560 -20.091 4.772 1.00 0.00 C +HETATM 8 C UNL 1 16.792 -20.169 3.653 1.00 0.00 C +HETATM 9 C UNL 1 17.387 -20.237 2.392 1.00 0.00 C +HETATM 10 C UNL 1 23.771 -18.617 -0.523 1.00 0.00 C +HETATM 11 C UNL 1 25.023 -17.786 -0.498 1.00 0.00 C +HETATM 12 C UNL 1 27.069 -15.918 -0.541 1.00 0.00 C +HETATM 13 C UNL 1 24.733 -16.442 -0.689 1.00 0.00 C +HETATM 14 C UNL 1 23.252 -16.233 -0.842 1.00 0.00 C +HETATM 15 O UNL 1 20.614 -16.988 -1.013 1.00 0.00 O +HETATM 16 C UNL 1 20.787 -19.637 0.832 1.00 0.00 C +HETATM 17 N UNL 1 19.690 -19.555 -1.318 1.00 0.00 N1+ +HETATM 18 C UNL 1 19.522 -20.233 3.429 1.00 0.00 C +HETATM 19 N UNL 1 22.733 -17.598 -0.733 1.00 0.00 N +HETATM 20 C UNL 1 26.328 -18.206 -0.359 1.00 0.00 C +HETATM 21 C UNL 1 27.355 -17.267 -0.356 1.00 0.00 C +HETATM 22 C UNL 1 25.753 -15.502 -0.694 1.00 0.00 C +CONECT 1 2 15 15 19 +CONECT 2 1 16 17 +CONECT 3 4 5 16 +CONECT 4 3 17 +CONECT 5 3 9 18 18 +CONECT 6 7 7 18 +CONECT 7 6 6 8 +CONECT 8 7 9 9 +CONECT 9 5 8 8 +CONECT 10 11 19 +CONECT 11 10 13 13 20 +CONECT 12 21 22 22 +CONECT 13 11 11 14 22 +CONECT 14 13 19 +CONECT 15 1 1 +CONECT 16 2 3 +CONECT 17 2 4 +CONECT 18 5 5 6 +CONECT 19 1 10 14 +CONECT 20 11 21 21 +CONECT 21 12 20 20 +CONECT 22 12 12 13 +MASTER 0 0 0 0 0 0 0 0 22 0 22 0 +END diff --git a/tests/test_data/ligands/8a0b_ligand.sdf b/tests/test_data/ligands/8a0b_ligand.sdf new file mode 100644 index 0000000..cbd2b0a --- /dev/null +++ b/tests/test_data/ligands/8a0b_ligand.sdf @@ -0,0 +1,54 @@ +1.H + RDKit 3D + + 22 25 0 0 1 0 0 0 0 0999 V2000 + 21.4370 -17.8690 -0.8150 C 0 0 0 0 0 0 0 0 0 0 0 0 + 21.0060 -19.3130 -0.6580 C 0 0 1 0 0 0 0 0 0 0 0 0 + 19.4340 -20.3360 0.9110 C 0 0 2 0 0 0 0 0 0 0 0 0 + 18.6730 -19.6930 -0.2290 C 0 0 0 0 0 0 0 0 0 0 0 0 + 18.7640 -20.2730 2.2750 C 0 0 0 0 0 0 0 0 0 0 0 0 + 18.9240 -20.1640 4.6780 C 0 0 0 0 0 0 0 0 0 0 0 0 + 17.5600 -20.0910 4.7720 C 0 0 0 0 0 0 0 0 0 0 0 0 + 16.7920 -20.1690 3.6530 C 0 0 0 0 0 0 0 0 0 0 0 0 + 17.3870 -20.2370 2.3920 C 0 0 0 0 0 0 0 0 0 0 0 0 + 23.7710 -18.6170 -0.5230 C 0 0 0 0 0 0 0 0 0 0 0 0 + 25.0230 -17.7860 -0.4980 C 0 0 0 0 0 0 0 0 0 0 0 0 + 27.0690 -15.9180 -0.5410 C 0 0 0 0 0 0 0 0 0 0 0 0 + 24.7330 -16.4420 -0.6890 C 0 0 0 0 0 0 0 0 0 0 0 0 + 23.2520 -16.2330 -0.8420 C 0 0 0 0 0 0 0 0 0 0 0 0 + 20.6140 -16.9880 -1.0130 O 0 0 0 0 0 0 0 0 0 0 0 0 + 20.7870 -19.6370 0.8320 C 0 0 0 0 0 0 0 0 0 0 0 0 + 19.6900 -19.5550 -1.3180 N 0 0 0 0 0 4 0 0 0 0 0 0 + 19.5220 -20.2330 3.4290 C 0 0 0 0 0 0 0 0 0 0 0 0 + 22.7330 -17.5980 -0.7330 N 0 0 0 0 0 0 0 0 0 0 0 0 + 26.3280 -18.2060 -0.3590 C 0 0 0 0 0 0 0 0 0 0 0 0 + 27.3550 -17.2670 -0.3560 C 0 0 0 0 0 0 0 0 0 0 0 0 + 25.7530 -15.5020 -0.6940 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2 1 1 1 + 1 15 2 0 + 1 19 1 0 + 16 2 1 1 + 17 2 1 1 + 3 4 1 0 + 3 5 1 1 + 3 16 1 0 + 4 17 1 1 + 5 9 1 0 + 5 18 2 0 + 6 7 2 0 + 6 18 1 0 + 7 8 1 0 + 8 9 2 0 + 10 11 1 0 + 10 19 1 1 + 11 13 2 0 + 11 20 1 0 + 12 21 1 0 + 12 22 2 0 + 13 14 1 0 + 13 22 1 0 + 14 19 1 1 + 20 21 2 0 +M CHG 1 17 1 +M END +$$$$ diff --git a/tests/wrapper/rdkit_test.py b/tests/wrapper/rdkit_test.py index 593abb3..0d2eabd 100644 --- a/tests/wrapper/rdkit_test.py +++ b/tests/wrapper/rdkit_test.py @@ -3,7 +3,12 @@ import pytest from rdkit import Chem -from gmol.base.wrapper.rdkit import generate_conformer, smi2mol, write_mols +from gmol.base.wrapper.rdkit import ( + generate_conformer, + read_mols, + smi2mol, + write_mols, +) @pytest.fixture @@ -138,3 +143,135 @@ def test_write_mols_sdf_kekulize(tmp_path: Path, benzene_mol: Chem.Mol): # Kekulized benzene has explicit single/double bonds assert mols[0] is not None assert mols[0].GetNumBonds() == 6 + + +LIGAND_NAMES = ["5dwr_ligand", "8a0b_ligand"] + + +@pytest.mark.parametrize("ligand", LIGAND_NAMES) +def test_read_mols_sanitize(test_data: Path, ligand: str): + """read_mols with sanitize=True and sanitize=False both return mols.""" + sdf_path = test_data / "ligands" / f"{ligand}.sdf" + mols_sanitize = read_mols(sdf_path, sanitize=True) + mols_no_sanitize = read_mols(sdf_path, sanitize=False) + assert len(mols_sanitize) == len(mols_no_sanitize) + assert len(mols_sanitize) >= 1 + for a, b in zip(mols_sanitize, mols_no_sanitize, strict=True): + assert a.GetNumHeavyAtoms() == b.GetNumHeavyAtoms() + + +@pytest.mark.parametrize("ligand", LIGAND_NAMES) +def test_read_mols_remove_h(test_data: Path, ligand: str): + """read_mols with remove_h=True returns mols with no more atoms than remove_h=False.""" + sdf_path = test_data / "ligands" / f"{ligand}.sdf" + mols_with_h = read_mols(sdf_path, remove_h=False) + mols_no_h = read_mols(sdf_path, remove_h=True) + assert len(mols_with_h) == len(mols_no_h) + assert len(mols_with_h) >= 1 + for with_h, no_h in zip(mols_with_h, mols_no_h, strict=True): + assert no_h.GetNumHeavyAtoms() == with_h.GetNumHeavyAtoms() + + +@pytest.mark.parametrize("ligand", LIGAND_NAMES) +def test_write_mols_ligands_sdf(tmp_path: Path, test_data: Path, ligand: str): + """Read SDF from ligands dir, write to tmp SDF, read back and compare count.""" + sdf_path = test_data / "ligands" / f"{ligand}.sdf" + mols = read_mols(sdf_path) + assert len(mols) >= 1 + + out = tmp_path / "out.sdf" + write_mols(out, mols) + assert out.is_file() + roundtrip = list(Chem.SDMolSupplier(str(out))) + roundtrip = [m for m in roundtrip if m is not None] + assert len(roundtrip) == len(mols) + for a, b in zip(mols, roundtrip, strict=True): + assert a.GetNumHeavyAtoms() == b.GetNumHeavyAtoms() + + +@pytest.mark.parametrize("ligand", LIGAND_NAMES) +def test_write_mols_ligands_multi_sdf( + tmp_path: Path, test_data: Path, ligand: str +): + """Read multi-molecule SDF from ligands dir, write and roundtrip.""" + sdf_path = test_data / "ligands" / f"{ligand}.sdf" + mols = read_mols(sdf_path) + assert len(mols) >= 1 + + out = tmp_path / "out.sdf" + multi_confs = mols * 2 + write_mols(out, multi_confs) + assert out.is_file() + roundtrip = list(Chem.SDMolSupplier(str(out))) + roundtrip = [m for m in roundtrip if m is not None] + assert len(roundtrip) == len(multi_confs) + + +@pytest.mark.parametrize("ligand", LIGAND_NAMES) +def test_write_mols_ligands_pdb_roundtrip( + tmp_path: Path, test_data: Path, ligand: str +): + """Read PDB from ligands dir, write to tmp PDB, check structure and model count.""" + pdb_path = test_data / "ligands" / f"{ligand}.pdb" + mols = read_mols(pdb_path) + assert len(mols) >= 1 + + out = tmp_path / "out.pdb" + write_mols(out, mols) + assert out.is_file() + content = out.read_text() + assert "MODEL 1" in content + assert "ENDMDL" in content + assert content.count("MODEL") == len(mols) + assert content.count("ENDMDL") == len(mols) + + +@pytest.mark.parametrize("ligand", LIGAND_NAMES) +def test_write_mols_ligands_from_mol2_to_sdf( + tmp_path: Path, test_data: Path, ligand: str +): + """Read MOL2 from ligands dir, write as SDF, verify mol count.""" + mol2_path = test_data / "ligands" / f"{ligand}.mol2" + mols = read_mols(mol2_path) + assert len(mols) >= 1 + + out = tmp_path / "out.sdf" + write_mols(out, mols) + assert out.is_file() + written = list(Chem.SDMolSupplier(str(out))) + written = [m for m in written if m is not None] + assert len(written) == len(mols) + + +@pytest.mark.parametrize("ligand", LIGAND_NAMES) +def test_write_mols_ligands_from_mol2_to_pdb( + tmp_path: Path, test_data: Path, ligand: str +): + """Read MOL2 from ligands dir, write as PDB, verify model count.""" + mol2_path = test_data / "ligands" / f"{ligand}.mol2" + mols = read_mols(mol2_path) + assert len(mols) >= 1 + + out = tmp_path / "out.pdb" + write_mols(out, mols) + assert out.is_file() + content = out.read_text() + assert content.count("MODEL") == len(mols) + assert content.count("ENDMDL") == len(mols) + + +@pytest.mark.parametrize("ligand", LIGAND_NAMES) +def test_write_mols_ligands_skips_none( + tmp_path: Path, test_data: Path, ligand: str +): + """write_mols skips None when list is built from ligand data.""" + sdf_path = test_data / "ligands" / f"{ligand}.sdf" + mols = read_mols(sdf_path) + assert len(mols) >= 1 + mixed = [mols[0], None, mols[0]] + + out = tmp_path / "out.sdf" + write_mols(out, mixed) + written = list(Chem.SDMolSupplier(str(out))) + written = [m for m in written if m is not None] + assert len(written) == 2 From 87c22d755216f70c9a9bbcd4fd13cd8b58c8e8b9 Mon Sep 17 00:00:00 2001 From: bbh-pharm Date: Wed, 18 Feb 2026 12:52:49 +0900 Subject: [PATCH 03/13] fix(wrapper/rdkit): update error message for unsupported file extensions --- src/gmol/base/wrapper/rdkit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gmol/base/wrapper/rdkit.py b/src/gmol/base/wrapper/rdkit.py index fe6af7b..7779ef4 100644 --- a/src/gmol/base/wrapper/rdkit.py +++ b/src/gmol/base/wrapper/rdkit.py @@ -151,7 +151,7 @@ def read_mols( mols = [mol] if mol is not None else [] else: raise ValueError( - f"Unsupported file extension '{ext}'. Supported formats: .mol2, .sdf, .pdbqt, .pdb." + f"Unsupported file extension '{ext}'. Supported formats: .mol2, .sdf, .pdb." ) if sanitize: From 59f9758e347bfafac82792613af7340a072d5398 Mon Sep 17 00:00:00 2001 From: bbh-pharm Date: Wed, 18 Feb 2026 12:54:43 +0900 Subject: [PATCH 04/13] fix(wrapper/rdkit): add validation for supported file extensions in read_mols function --- src/gmol/base/wrapper/rdkit.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gmol/base/wrapper/rdkit.py b/src/gmol/base/wrapper/rdkit.py index 7779ef4..7bee026 100644 --- a/src/gmol/base/wrapper/rdkit.py +++ b/src/gmol/base/wrapper/rdkit.py @@ -126,6 +126,10 @@ def read_mols( raise FileNotFoundError(f"File not found: {file_path}") ext = file_path.suffix.lower() + if ext not in (".mol2", ".sdf", ".pdb"): + raise ValueError( + f"Unsupported file extension '{ext}'. Supported formats: .mol2, .sdf, .pdb." + ) try: if ext == ".mol2": @@ -149,10 +153,6 @@ def read_mols( ), ) mols = [mol] if mol is not None else [] - else: - raise ValueError( - f"Unsupported file extension '{ext}'. Supported formats: .mol2, .sdf, .pdb." - ) if sanitize: for mol in mols: From a1a5fc405ff3306f8f04f0d9e686d239360a90d7 Mon Sep 17 00:00:00 2001 From: bbh-pharm Date: Wed, 18 Feb 2026 12:57:55 +0900 Subject: [PATCH 05/13] fix(test/rdkit): update assertion in mol comparison in test_read_mols_remove_h --- tests/wrapper/rdkit_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/wrapper/rdkit_test.py b/tests/wrapper/rdkit_test.py index 0d2eabd..e25ac65 100644 --- a/tests/wrapper/rdkit_test.py +++ b/tests/wrapper/rdkit_test.py @@ -169,7 +169,7 @@ def test_read_mols_remove_h(test_data: Path, ligand: str): assert len(mols_with_h) == len(mols_no_h) assert len(mols_with_h) >= 1 for with_h, no_h in zip(mols_with_h, mols_no_h, strict=True): - assert no_h.GetNumHeavyAtoms() == with_h.GetNumHeavyAtoms() + assert no_h.GetNumAtoms() <= with_h.GetNumAtoms() @pytest.mark.parametrize("ligand", LIGAND_NAMES) From ffc12df5b54888cd7933784e6a3617ef4e16b22c Mon Sep 17 00:00:00 2001 From: Nuri Jung Date: Wed, 18 Feb 2026 17:49:17 +0900 Subject: [PATCH 06/13] refactor(wrapper/rdkit): don't wrap exception unnecessarily --- src/gmol/base/wrapper/rdkit.py | 54 ++++++++++++++++------------------ 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/src/gmol/base/wrapper/rdkit.py b/src/gmol/base/wrapper/rdkit.py index 7bee026..facd7dc 100644 --- a/src/gmol/base/wrapper/rdkit.py +++ b/src/gmol/base/wrapper/rdkit.py @@ -131,38 +131,34 @@ def read_mols( f"Unsupported file extension '{ext}'. Supported formats: .mol2, .sdf, .pdb." ) - try: - if ext == ".mol2": - mol = cast( - Chem.Mol | None, - Chem.MolFromMol2File( - str(file_path), sanitize=False, removeHs=False - ), - ) - mols = [mol] if mol is not None else [] - elif ext == ".sdf": - with Chem.SDMolSupplier( + if ext == ".mol2": + mol = cast( + Chem.Mol | None, + Chem.MolFromMol2File( str(file_path), sanitize=False, removeHs=False - ) as suppl: - mols = [m for m in suppl if m is not None] # pyright: ignore[reportUnnecessaryComparison] - elif ext == ".pdb": - mol = cast( - Chem.Mol | None, - Chem.MolFromPDBFile( - str(file_path), sanitize=False, removeHs=False - ), - ) - mols = [mol] if mol is not None else [] - - if sanitize: - for mol in mols: - Chem.SanitizeMol(mol) + ), + ) + mols = [mol] if mol is not None else [] + elif ext == ".sdf": + with Chem.SDMolSupplier( + str(file_path), sanitize=False, removeHs=False + ) as suppl: + mols = [m for m in suppl if m is not None] # pyright: ignore[reportUnnecessaryComparison] + elif ext == ".pdb": + mol = cast( + Chem.Mol | None, + Chem.MolFromPDBFile( + str(file_path), sanitize=False, removeHs=False + ), + ) + mols = [mol] if mol is not None else [] - if remove_h: - mols = [Chem.RemoveHs(mol, sanitize=sanitize) for mol in mols] + if sanitize: + for mol in mols: + Chem.SanitizeMol(mol) - except Exception as exc: - raise ValueError(f"Error processing molecule: {exc}") from exc + if remove_h: + mols = [Chem.RemoveHs(mol, sanitize=sanitize) for mol in mols] return mols From 3e79381286a74673ecf486ba2eed43f6d41435cd Mon Sep 17 00:00:00 2001 From: Nuri Jung Date: Wed, 18 Feb 2026 17:50:27 +0900 Subject: [PATCH 07/13] feat(wrapper/rdkit): support reading multiple molecules from mol2 file --- src/gmol/base/wrapper/rdkit.py | 38 +++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/src/gmol/base/wrapper/rdkit.py b/src/gmol/base/wrapper/rdkit.py index facd7dc..9501674 100644 --- a/src/gmol/base/wrapper/rdkit.py +++ b/src/gmol/base/wrapper/rdkit.py @@ -106,6 +106,36 @@ def generate_conformer( raise +def _split_read_mol2(path: str): + + def _read_mol(block: list[str]): + mol2 = "".join(block) + mol = Chem.MolFromMol2Block( + mol2, + sanitize=False, + removeHs=False, + ) + return cast(Chem.Mol | None, mol) + + with open(path) as f: + block: list[str] = [] + + for line in f: + if line.startswith("@MOLECULE") and block: + mol = _read_mol(block) + if mol is not None: + yield mol + + block.clear() + + block.append(line) + + if block: + mol = _read_mol(block) + if mol is not None: + yield mol + + def read_mols( file_path: Path | str, sanitize: bool = True, @@ -132,13 +162,7 @@ def read_mols( ) if ext == ".mol2": - mol = cast( - Chem.Mol | None, - Chem.MolFromMol2File( - str(file_path), sanitize=False, removeHs=False - ), - ) - mols = [mol] if mol is not None else [] + mols = list(_split_read_mol2(str(file_path))) elif ext == ".sdf": with Chem.SDMolSupplier( str(file_path), sanitize=False, removeHs=False From 34c5cfcf975edf860f10073e7ada84d5d68e1030 Mon Sep 17 00:00:00 2001 From: Nuri Jung Date: Wed, 18 Feb 2026 17:50:49 +0900 Subject: [PATCH 08/13] refactor(wrapper/rdkit): cast instead ignore --- src/gmol/base/wrapper/rdkit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gmol/base/wrapper/rdkit.py b/src/gmol/base/wrapper/rdkit.py index 9501674..a549057 100644 --- a/src/gmol/base/wrapper/rdkit.py +++ b/src/gmol/base/wrapper/rdkit.py @@ -167,7 +167,7 @@ def read_mols( with Chem.SDMolSupplier( str(file_path), sanitize=False, removeHs=False ) as suppl: - mols = [m for m in suppl if m is not None] # pyright: ignore[reportUnnecessaryComparison] + mols = [m for m in suppl if cast(Chem.Mol | None, m) is not None] elif ext == ".pdb": mol = cast( Chem.Mol | None, From 9221b4f99a64579d0452889242bb4c203d4aec5f Mon Sep 17 00:00:00 2001 From: Nuri Jung Date: Wed, 18 Feb 2026 17:54:52 +0900 Subject: [PATCH 09/13] test(wrapper/rdkit): test multiple molecule read from mol2 file --- tests/test_data/ligands/ligands.mol2 | 201 +++++++++++++++++++++++++++ tests/wrapper/rdkit_test.py | 12 ++ 2 files changed, 213 insertions(+) create mode 100644 tests/test_data/ligands/ligands.mol2 diff --git a/tests/test_data/ligands/ligands.mol2 b/tests/test_data/ligands/ligands.mol2 new file mode 100644 index 0000000..54d2bda --- /dev/null +++ b/tests/test_data/ligands/ligands.mol2 @@ -0,0 +1,201 @@ +# Name: 5dwr_ligand +# Creating user name: suminyi +# Creation time: Sun Sep 30 06:48:38 2018 + +# Modifying user name: suminyi +# Modification time: Sun Sep 30 06:48:38 2018 + +@MOLECULE +5dwr_ligand + 56 59 1 1 0 +SMALL +MMFF94_CHARGES + + +@ATOM + 1 C1 -40.9660 -1.8740 4.5420 C.3 1 MOL 0.0000 + 2 C2 -40.8620 -3.0170 5.5460 C.3 1 MOL 0.0000 + 3 C3 -40.7410 -4.4010 4.8800 C.3 1 MOL 0.5030 + 4 C11 -39.0090 -2.8770 0.4690 C.ar 1 MOL 0.1170 + 5 C12 -40.9940 -2.5110 -1.0020 C.2 1 MOL 0.5438 + 6 C13 -42.4570 -2.2000 -0.8760 C.ar 1 MOL 0.3962 + 7 C14 -43.2690 -2.2150 -2.0050 C.ar 1 MOL -0.1500 + 8 C15 -44.6380 -2.1050 -1.8270 C.ar 1 MOL -0.1500 + 9 C16 -45.1360 -1.9630 -0.5540 C.ar 1 MOL 0.1900 + 10 C17 -44.2610 -1.9200 0.5320 C.ar 1 MOL 0.3100 + 11 C18 -44.7350 -1.7710 1.9400 C.ar 1 MOL 0.0000 + 12 C19 -45.6500 -0.7960 2.3210 C.ar 1 MOL 0.1900 + 13 C20 -46.1530 -0.6720 3.5920 C.ar 1 MOL -0.1500 + 14 C21 -45.7270 -1.5680 4.5600 C.ar 1 MOL -0.1500 + 15 C22 -44.8000 -2.5500 4.2510 C.ar 1 MOL -0.1500 + 16 C23 -44.3300 -2.6200 2.9600 C.ar 1 MOL 0.1900 + 17 F2 -43.3970 -3.5570 2.6630 F 1 MOL -0.1900 + 18 F1 -46.0540 0.0830 1.3780 F 1 MOL -0.1900 + 19 F -46.4730 -1.8840 -0.3750 F 1 MOL -0.1900 + 20 N3 -42.9350 -2.0310 0.3670 N.ar 1 MOL -0.6200 + 21 O -40.5010 -2.8530 -2.0760 O.2 1 MOL -0.5700 + 22 N2 -40.3140 -2.4310 0.1650 N.am 1 MOL -0.5470 + 23 C10 -37.9850 -2.8640 -0.4850 C.ar 1 MOL 0.1600 + 24 N1 -36.7290 -3.2430 -0.2500 N.ar 1 MOL -0.6200 + 25 C9 -36.4530 -3.6660 0.9820 C.ar 1 MOL 0.1600 + 26 C8 -37.3800 -3.7140 2.0020 C.ar 1 MOL -0.1500 + 27 C7 -38.6900 -3.3130 1.7630 C.ar 1 MOL -0.1435 + 28 C5 -39.7190 -3.2860 2.8850 C.3 1 MOL 0.1435 + 29 C4 -39.5270 -4.3670 3.9490 C.3 1 MOL 0.0000 + 30 N -40.5840 -5.4540 5.8860 N.4 1 MOL -0.8530 + 31 C6 -39.7900 -1.9210 3.5720 C.3 1 MOL 0.0000 + 32 C -41.0440 -0.5240 5.2440 C.3 1 MOL 0.0000 + 33 H1 -41.8925 -2.0091 3.9647 H 1 MOL 0.0000 + 34 H2 -41.7626 -3.0106 6.1776 H 1 MOL 0.0000 + 35 H3 -39.9733 -2.8519 6.1729 H 1 MOL 0.0000 + 36 H4 -41.6484 -4.6036 4.2922 H 1 MOL 0.0000 + 37 H5 -42.8421 -2.3102 -2.9969 H 1 MOL 0.1500 + 38 H6 -45.3071 -2.1305 -2.6795 H 1 MOL 0.1500 + 39 H7 -46.8663 0.1079 3.8330 H 1 MOL 0.1500 + 40 H8 -46.1224 -1.4998 5.5670 H 1 MOL 0.1500 + 41 H9 -44.4539 -3.2454 5.0071 H 1 MOL 0.1500 + 42 H10 -40.8049 -1.9943 0.9189 H 1 MOL 0.3700 + 43 H11 -38.2304 -2.5221 -1.4840 H 1 MOL 0.1500 + 44 H12 -35.4413 -3.9914 1.1956 H 1 MOL 0.1500 + 45 H13 -37.0887 -4.0629 2.9861 H 1 MOL 0.1500 + 46 H14 -40.6958 -3.4691 2.4136 H 1 MOL 0.0000 + 47 H15 -39.4152 -5.3455 3.4591 H 1 MOL 0.0000 + 48 H16 -38.6233 -4.1452 4.5356 H 1 MOL 0.0000 + 49 H17 -41.3904 -5.4638 6.4908 H 1 MOL 0.4500 + 50 H18 -39.7562 -5.2757 6.4327 H 1 MOL 0.4500 + 51 H19 -40.4975 -6.3472 5.4269 H 1 MOL 0.4500 + 52 H20 -38.8561 -1.7452 4.1260 H 1 MOL 0.0000 + 53 H21 -39.9173 -1.1380 2.8099 H 1 MOL 0.0000 + 54 H22 -41.8994 -0.5193 5.9356 H 1 MOL 0.0000 + 55 H23 -41.1733 0.2716 4.4955 H 1 MOL 0.0000 + 56 H24 -40.1157 -0.3497 5.8078 H 1 MOL 0.0000 +@BOND + 1 2 1 1 + 2 1 31 1 + 3 1 32 1 + 4 3 2 1 + 5 29 3 1 + 6 3 30 1 + 7 4 22 1 + 8 4 23 ar + 9 27 4 ar + 10 5 6 1 + 11 5 21 2 + 12 22 5 am + 13 6 7 ar + 14 6 20 ar + 15 7 8 ar + 16 8 9 ar + 17 10 9 ar + 18 9 19 1 + 19 10 11 1 + 20 20 10 ar + 21 11 12 ar + 22 11 16 ar + 23 12 13 ar + 24 12 18 1 + 25 13 14 ar + 26 15 14 ar + 27 16 15 ar + 28 16 17 1 + 29 23 24 ar + 30 25 24 ar + 31 26 25 ar + 32 27 26 ar + 33 28 27 1 + 34 28 29 1 + 35 28 31 1 + 36 1 33 1 + 37 2 34 1 + 38 2 35 1 + 39 3 36 1 + 40 7 37 1 + 41 8 38 1 + 42 13 39 1 + 43 14 40 1 + 44 15 41 1 + 45 22 42 1 + 46 23 43 1 + 47 25 44 1 + 48 26 45 1 + 49 28 46 1 + 50 29 47 1 + 51 29 48 1 + 52 30 49 1 + 53 30 50 1 + 54 30 51 1 + 55 31 52 1 + 56 31 53 1 + 57 32 54 1 + 58 32 55 1 + 59 32 56 1 +@SUBSTRUCTURE + 1 MOL 1 PERM 0 **** **** 0 ROOT +@NORMAL +@ALT_TYPE +MMFF94_ALT_TYPE_SET +MMFF94 1 CR 2 CR 3 CR 28 CR 29 CR 31 CR 32 CR 4 CB 6 CB 7 CB 8 CB 9 CB \ +10 CB 11 CB 12 CB 13 CB 14 CB 15 CB 16 CB 23 CB 25 CB \ +26 CB 27 CB 5 C=ON 33 HC 34 HC 35 HC 36 HC 37 HC 38 HC \ +39 HC 40 HC 41 HC 43 HC 44 HC 45 HC 46 HC 47 HC 48 HC \ +52 HC 53 HC 54 HC 55 HC 56 HC 42 HNCC 49 HNR+ 50 HNR+ \ +51 HNR+ 21 O=CN 20 NPYD 24 NPYD 30 NR+ 22 NC=O 17 F \ +18 F 19 F +@MOLECULE +1.H + 22 25 0 0 0 +SMALL +GASTEIGER + +@ATOM + 1 C 21.4370 -17.8690 -0.8150 C.2 1 UNL1 0.2832 + 2 C 21.0060 -19.3130 -0.6580 C.3 1 UNL1 0.2211 + 3 C 19.4340 -20.3360 0.9110 C.3 1 UNL1 0.0917 + 4 C 18.6730 -19.6930 -0.2290 C.3 1 UNL1 0.2055 + 5 C 18.7640 -20.2730 2.2750 C.ar 1 UNL1 -0.0163 + 6 C 18.9240 -20.1640 4.6780 C.ar 1 UNL1 -0.0003 + 7 C 17.5600 -20.0910 4.7720 C.ar 1 UNL1 -0.0000 + 8 C 16.7920 -20.1690 3.6530 C.ar 1 UNL1 -0.0003 + 9 C 17.3870 -20.2370 2.3920 C.ar 1 UNL1 -0.0041 + 10 C 23.7710 -18.6170 -0.5230 C.3 1 UNL1 0.1204 + 11 C 25.0230 -17.7860 -0.4980 C.ar 1 UNL1 -0.0047 + 12 C 27.0690 -15.9180 -0.5410 C.ar 1 UNL1 -0.0003 + 13 C 24.7330 -16.4420 -0.6890 C.ar 1 UNL1 -0.0047 + 14 C 23.2520 -16.2330 -0.8420 C.3 1 UNL1 0.1204 + 15 O 20.6140 -16.9880 -1.0130 O.2 1 UNL1 -0.2685 + 16 C 20.7870 -19.6370 0.8320 C.3 1 UNL1 0.0829 + 17 N 19.6900 -19.5550 -1.3180 N.4 1 UNL1 0.4508 + 18 C 19.5220 -20.2330 3.4290 C.ar 1 UNL1 -0.0041 + 19 N 22.7330 -17.5980 -0.7330 N.am 1 UNL1 -0.2666 + 20 C 26.3280 -18.2060 -0.3590 C.ar 1 UNL1 -0.0029 + 21 C 27.3550 -17.2670 -0.3560 C.ar 1 UNL1 -0.0003 + 22 C 25.7530 -15.5020 -0.6940 C.ar 1 UNL1 -0.0029 +@UNITY_ATOM_ATTR +17 1 +charge 1 +@BOND + 1 2 1 1 + 2 1 15 2 + 3 1 19 am + 4 16 2 1 + 5 17 2 1 + 6 3 4 1 + 7 3 5 1 + 8 3 16 1 + 9 4 17 1 + 10 5 9 ar + 11 5 18 ar + 12 6 7 ar + 13 6 18 ar + 14 7 8 ar + 15 8 9 ar + 16 10 11 1 + 17 10 19 1 + 18 11 13 ar + 19 11 20 ar + 20 12 21 ar + 21 12 22 ar + 22 13 14 1 + 23 13 22 ar + 24 14 19 1 + 25 20 21 ar diff --git a/tests/wrapper/rdkit_test.py b/tests/wrapper/rdkit_test.py index e25ac65..1ddb575 100644 --- a/tests/wrapper/rdkit_test.py +++ b/tests/wrapper/rdkit_test.py @@ -172,6 +172,18 @@ def test_read_mols_remove_h(test_data: Path, ligand: str): assert no_h.GetNumAtoms() <= with_h.GetNumAtoms() +def test_read_mols_multiple_mol2(test_data: Path): + mol2_path = test_data / "ligands" / "ligands.mol2" + mols = read_mols(mol2_path) + assert len(mols) == 2 + + assert mols[0].GetNumAtoms() == 56 + assert mols[0].GetNumBonds() == 59 + + assert mols[1].GetNumAtoms() == 22 + assert mols[1].GetNumBonds() == 25 + + @pytest.mark.parametrize("ligand", LIGAND_NAMES) def test_write_mols_ligands_sdf(tmp_path: Path, test_data: Path, ligand: str): """Read SDF from ligands dir, write to tmp SDF, read back and compare count.""" From 541b3d0f09364acf87e9b3d1c81002fc74d22ed5 Mon Sep 17 00:00:00 2001 From: bbh-pharm Date: Wed, 18 Feb 2026 18:06:19 +0900 Subject: [PATCH 10/13] fix(wrapper/rdkit): add support for reading multiple models from PDB files --- src/gmol/base/wrapper/rdkit.py | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/src/gmol/base/wrapper/rdkit.py b/src/gmol/base/wrapper/rdkit.py index a549057..fb89e74 100644 --- a/src/gmol/base/wrapper/rdkit.py +++ b/src/gmol/base/wrapper/rdkit.py @@ -136,6 +136,22 @@ def _read_mol(block: list[str]): yield mol +_model_re = re.compile(r"^MODEL\s+\d+\s*$", re.MULTILINE) +_endmdl_re = re.compile(r"^ENDMDL\s*$", re.MULTILINE) + + +def _split_pdb_models(pdb_text: str) -> list[str]: + starts = [m.start() for m in _model_re.finditer(pdb_text)] + ends = [m.end() for m in _endmdl_re.finditer(pdb_text)] + if not starts or not ends or len(starts) != len(ends): + return [pdb_text] + + blocks: list[str] = [] + for s, e in zip(starts, ends, strict=True): + blocks.append(pdb_text[s:e] + "\nEND\n") + return blocks + + def read_mols( file_path: Path | str, sanitize: bool = True, @@ -169,13 +185,16 @@ def read_mols( ) as suppl: mols = [m for m in suppl if cast(Chem.Mol | None, m) is not None] elif ext == ".pdb": - mol = cast( - Chem.Mol | None, - Chem.MolFromPDBFile( - str(file_path), sanitize=False, removeHs=False - ), - ) - mols = [mol] if mol is not None else [] + pdb_text = file_path.read_text() + blocks = _split_pdb_models(pdb_text) + mols = [] + for blk in blocks: + mol = cast( + Chem.Mol | None, + Chem.MolFromPDBBlock(blk, sanitize=False, removeHs=False), + ) + if mol is not None: + mols.append(mol) if sanitize: for mol in mols: From 254be1426336f0e58d170eb7d722d0bec5ee8c15 Mon Sep 17 00:00:00 2001 From: Nuri Jung Date: Wed, 18 Feb 2026 21:37:45 +0900 Subject: [PATCH 11/13] refactor(wrapper/rdkit): avoid whole-file read --- src/gmol/base/wrapper/rdkit.py | 62 ++++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 21 deletions(-) diff --git a/src/gmol/base/wrapper/rdkit.py b/src/gmol/base/wrapper/rdkit.py index fb89e74..b635119 100644 --- a/src/gmol/base/wrapper/rdkit.py +++ b/src/gmol/base/wrapper/rdkit.py @@ -136,20 +136,49 @@ def _read_mol(block: list[str]): yield mol -_model_re = re.compile(r"^MODEL\s+\d+\s*$", re.MULTILINE) -_endmdl_re = re.compile(r"^ENDMDL\s*$", re.MULTILINE) +def _split_read_pdb(path: str): + def _read_mol(block: list[str]): + pdb = "".join(block) + mol = Chem.MolFromPDBBlock( + pdb, + sanitize=False, + removeHs=False, + ) + return cast(Chem.Mol | None, mol) + + with open(path) as f: + block: list[str] = [] + + for line in f: + if line.startswith("MODEL"): + if not block: + continue + + _logger.warning( + ( + "Multiple MODEL entries found in PDB file without " + "ENDMDL. Splitting anyway after MODEL record." + ) + ) + line = "ENDMDL" + + if line.startswith("ENDMDL"): + mol = _read_mol(block) + if mol is not None: + yield mol -def _split_pdb_models(pdb_text: str) -> list[str]: - starts = [m.start() for m in _model_re.finditer(pdb_text)] - ends = [m.end() for m in _endmdl_re.finditer(pdb_text)] - if not starts or not ends or len(starts) != len(ends): - return [pdb_text] + block.clear() + continue + + block.append(line) - blocks: list[str] = [] - for s, e in zip(starts, ends, strict=True): - blocks.append(pdb_text[s:e] + "\nEND\n") - return blocks + if block: + # If no "MODEL/ENDMDL" is found, the whole file is interpreted as a + # single molecule. + mol = _read_mol(block) + if mol is not None: + yield mol def read_mols( @@ -185,16 +214,7 @@ def read_mols( ) as suppl: mols = [m for m in suppl if cast(Chem.Mol | None, m) is not None] elif ext == ".pdb": - pdb_text = file_path.read_text() - blocks = _split_pdb_models(pdb_text) - mols = [] - for blk in blocks: - mol = cast( - Chem.Mol | None, - Chem.MolFromPDBBlock(blk, sanitize=False, removeHs=False), - ) - if mol is not None: - mols.append(mol) + mols = list(_split_read_pdb(str(file_path))) if sanitize: for mol in mols: From 094cc42da43c9a1204dc115720b273743c48bc0f Mon Sep 17 00:00:00 2001 From: Nuri Jung Date: Wed, 18 Feb 2026 21:40:16 +0900 Subject: [PATCH 12/13] test(wrapper/rdkit): test reading multiple molecules in PDB --- tests/test_data/ligands/ligands.pdb | 165 ++++++++++++++++++++++++++++ tests/wrapper/rdkit_test.py | 12 ++ 2 files changed, 177 insertions(+) create mode 100644 tests/test_data/ligands/ligands.pdb diff --git a/tests/test_data/ligands/ligands.pdb b/tests/test_data/ligands/ligands.pdb new file mode 100644 index 0000000..6c830f9 --- /dev/null +++ b/tests/test_data/ligands/ligands.pdb @@ -0,0 +1,165 @@ +MODEL 1 +COMPND 5dwr_ligand +AUTHOR GENERATED BY OPEN BABEL 3.1.0 +ATOM 1 C1 MOL A 1 -40.966 -1.874 4.542 1.00 0.00 C +ATOM 2 C2 MOL A 1 -40.862 -3.017 5.546 1.00 0.00 C +ATOM 3 C3 MOL A 1 -40.741 -4.401 4.880 1.00 0.00 C +ATOM 4 C11 MOL A 1 -39.009 -2.877 0.469 1.00 0.00 C +ATOM 5 C12 MOL A 1 -40.994 -2.511 -1.002 1.00 0.00 C +ATOM 6 C13 MOL A 1 -42.457 -2.200 -0.876 1.00 0.00 C +ATOM 7 C14 MOL A 1 -43.269 -2.215 -2.005 1.00 0.00 C +ATOM 8 C15 MOL A 1 -44.638 -2.105 -1.827 1.00 0.00 C +ATOM 9 C16 MOL A 1 -45.136 -1.963 -0.554 1.00 0.00 C +ATOM 10 C17 MOL A 1 -44.261 -1.920 0.532 1.00 0.00 C +ATOM 11 C18 MOL A 1 -44.735 -1.771 1.940 1.00 0.00 C +ATOM 12 C19 MOL A 1 -45.650 -0.796 2.321 1.00 0.00 C +ATOM 13 C20 MOL A 1 -46.153 -0.672 3.592 1.00 0.00 C +ATOM 14 C21 MOL A 1 -45.727 -1.568 4.560 1.00 0.00 C +ATOM 15 C22 MOL A 1 -44.800 -2.550 4.251 1.00 0.00 C +ATOM 16 C23 MOL A 1 -44.330 -2.620 2.960 1.00 0.00 C +ATOM 17 F2 MOL A 1 -43.397 -3.557 2.663 1.00 0.00 F +ATOM 18 F1 MOL A 1 -46.054 0.083 1.378 1.00 0.00 F +ATOM 19 F MOL A 1 -46.473 -1.884 -0.375 1.00 0.00 F +ATOM 20 N3 MOL A 1 -42.935 -2.031 0.367 1.00 0.00 N +ATOM 21 O MOL A 1 -40.501 -2.853 -2.076 1.00 0.00 O +ATOM 22 N2 MOL A 1 -40.314 -2.431 0.165 1.00 0.00 N +ATOM 23 C10 MOL A 1 -37.985 -2.864 -0.485 1.00 0.00 C +ATOM 24 N1 MOL A 1 -36.729 -3.243 -0.250 1.00 0.00 N +ATOM 25 C9 MOL A 1 -36.453 -3.666 0.982 1.00 0.00 C +ATOM 26 C8 MOL A 1 -37.380 -3.714 2.002 1.00 0.00 C +ATOM 27 C7 MOL A 1 -38.690 -3.313 1.763 1.00 0.00 C +ATOM 28 C5 MOL A 1 -39.719 -3.286 2.885 1.00 0.00 C +ATOM 29 C4 MOL A 1 -39.527 -4.367 3.949 1.00 0.00 C +ATOM 30 N MOL A 1 -40.584 -5.454 5.886 1.00 0.00 N1+ +ATOM 31 C6 MOL A 1 -39.790 -1.921 3.572 1.00 0.00 C +ATOM 32 C MOL A 1 -41.044 -0.524 5.244 1.00 0.00 C +ATOM 33 H1 MOL A 1 -41.892 -2.009 3.965 1.00 0.00 H +ATOM 34 H2 MOL A 1 -41.763 -3.011 6.178 1.00 0.00 H +ATOM 35 H3 MOL A 1 -39.973 -2.852 6.173 1.00 0.00 H +ATOM 36 H4 MOL A 1 -41.648 -4.604 4.292 1.00 0.00 H +ATOM 37 H5 MOL A 1 -42.842 -2.310 -2.997 1.00 0.00 H +ATOM 38 H6 MOL A 1 -45.307 -2.131 -2.679 1.00 0.00 H +ATOM 39 H7 MOL A 1 -46.866 0.108 3.833 1.00 0.00 H +ATOM 40 H8 MOL A 1 -46.122 -1.500 5.567 1.00 0.00 H +ATOM 41 H9 MOL A 1 -44.454 -3.245 5.007 1.00 0.00 H +ATOM 42 H10 MOL A 1 -40.805 -1.994 0.919 1.00 0.00 H +ATOM 43 H11 MOL A 1 -38.230 -2.522 -1.484 1.00 0.00 H +ATOM 44 H12 MOL A 1 -35.441 -3.991 1.196 1.00 0.00 H +ATOM 45 H13 MOL A 1 -37.089 -4.063 2.986 1.00 0.00 H +ATOM 46 H14 MOL A 1 -40.696 -3.469 2.414 1.00 0.00 H +ATOM 47 H15 MOL A 1 -39.415 -5.346 3.459 1.00 0.00 H +ATOM 48 H16 MOL A 1 -38.623 -4.145 4.536 1.00 0.00 H +ATOM 49 H17 MOL A 1 -41.390 -5.464 6.491 1.00 0.00 H +ATOM 50 H18 MOL A 1 -39.756 -5.276 6.433 1.00 0.00 H +ATOM 51 H19 MOL A 1 -40.498 -6.347 5.427 1.00 0.00 H +ATOM 52 H20 MOL A 1 -38.856 -1.745 4.126 1.00 0.00 H +ATOM 53 H21 MOL A 1 -39.917 -1.138 2.810 1.00 0.00 H +ATOM 54 H22 MOL A 1 -41.899 -0.519 5.936 1.00 0.00 H +ATOM 55 H23 MOL A 1 -41.173 0.272 4.495 1.00 0.00 H +ATOM 56 H24 MOL A 1 -40.116 -0.350 5.808 1.00 0.00 H +CONECT 1 2 31 32 33 +CONECT 2 1 3 34 35 +CONECT 3 2 29 30 36 +CONECT 4 22 23 23 27 +CONECT 5 6 21 21 22 +CONECT 6 5 7 7 20 +CONECT 7 6 6 8 37 +CONECT 8 7 9 9 38 +CONECT 9 8 8 10 19 +CONECT 10 9 11 20 20 +CONECT 11 10 12 12 16 +CONECT 12 11 11 13 18 +CONECT 13 12 14 14 39 +CONECT 14 13 13 15 40 +CONECT 15 14 16 16 41 +CONECT 16 11 15 15 17 +CONECT 17 16 +CONECT 18 12 +CONECT 19 9 +CONECT 20 6 10 10 +CONECT 21 5 5 +CONECT 22 4 5 42 +CONECT 23 4 4 24 43 +CONECT 24 23 25 25 +CONECT 25 24 24 26 44 +CONECT 26 25 27 27 45 +CONECT 27 4 26 26 28 +CONECT 28 27 29 31 46 +CONECT 29 3 28 47 48 +CONECT 30 3 49 50 51 +CONECT 31 1 28 52 53 +CONECT 32 1 54 55 56 +CONECT 33 1 +CONECT 34 2 +CONECT 35 2 +CONECT 36 3 +CONECT 37 7 +CONECT 38 8 +CONECT 39 13 +CONECT 40 14 +CONECT 41 15 +CONECT 42 22 +CONECT 43 23 +CONECT 44 25 +CONECT 45 26 +CONECT 46 28 +CONECT 47 29 +CONECT 48 29 +CONECT 49 30 +CONECT 50 30 +CONECT 51 30 +CONECT 52 31 +CONECT 53 31 +CONECT 54 32 +CONECT 55 32 +CONECT 56 32 +ENDMDL +MODEL 2 +COMPND 1.H +AUTHOR GENERATED BY OPEN BABEL 3.1.0 +HETATM 1 C UNL 1 21.437 -17.869 -0.815 1.00 0.00 C +HETATM 2 C UNL 1 21.006 -19.313 -0.658 1.00 0.00 C +HETATM 3 C UNL 1 19.434 -20.336 0.911 1.00 0.00 C +HETATM 4 C UNL 1 18.673 -19.693 -0.229 1.00 0.00 C +HETATM 5 C UNL 1 18.764 -20.273 2.275 1.00 0.00 C +HETATM 6 C UNL 1 18.924 -20.164 4.678 1.00 0.00 C +HETATM 7 C UNL 1 17.560 -20.091 4.772 1.00 0.00 C +HETATM 8 C UNL 1 16.792 -20.169 3.653 1.00 0.00 C +HETATM 9 C UNL 1 17.387 -20.237 2.392 1.00 0.00 C +HETATM 10 C UNL 1 23.771 -18.617 -0.523 1.00 0.00 C +HETATM 11 C UNL 1 25.023 -17.786 -0.498 1.00 0.00 C +HETATM 12 C UNL 1 27.069 -15.918 -0.541 1.00 0.00 C +HETATM 13 C UNL 1 24.733 -16.442 -0.689 1.00 0.00 C +HETATM 14 C UNL 1 23.252 -16.233 -0.842 1.00 0.00 C +HETATM 15 O UNL 1 20.614 -16.988 -1.013 1.00 0.00 O +HETATM 16 C UNL 1 20.787 -19.637 0.832 1.00 0.00 C +HETATM 17 N UNL 1 19.690 -19.555 -1.318 1.00 0.00 N1+ +HETATM 18 C UNL 1 19.522 -20.233 3.429 1.00 0.00 C +HETATM 19 N UNL 1 22.733 -17.598 -0.733 1.00 0.00 N +HETATM 20 C UNL 1 26.328 -18.206 -0.359 1.00 0.00 C +HETATM 21 C UNL 1 27.355 -17.267 -0.356 1.00 0.00 C +HETATM 22 C UNL 1 25.753 -15.502 -0.694 1.00 0.00 C +CONECT 1 2 15 15 19 +CONECT 2 1 16 17 +CONECT 3 4 5 16 +CONECT 4 3 17 +CONECT 5 3 9 18 18 +CONECT 6 7 7 18 +CONECT 7 6 6 8 +CONECT 8 7 9 9 +CONECT 9 5 8 8 +CONECT 10 11 19 +CONECT 11 10 13 13 20 +CONECT 12 21 22 22 +CONECT 13 11 11 14 22 +CONECT 14 13 19 +CONECT 15 1 1 +CONECT 16 2 3 +CONECT 17 2 4 +CONECT 18 5 5 6 +CONECT 19 1 10 14 +CONECT 20 11 21 21 +CONECT 21 12 20 20 +CONECT 22 12 12 13 +ENDMDL +END diff --git a/tests/wrapper/rdkit_test.py b/tests/wrapper/rdkit_test.py index 1ddb575..d2b113c 100644 --- a/tests/wrapper/rdkit_test.py +++ b/tests/wrapper/rdkit_test.py @@ -184,6 +184,18 @@ def test_read_mols_multiple_mol2(test_data: Path): assert mols[1].GetNumBonds() == 25 +def test_read_mols_multiple_pdb(test_data: Path): + pdb_path = test_data / "ligands" / "ligands.pdb" + mols = read_mols(pdb_path) + assert len(mols) == 2 + + assert mols[0].GetNumAtoms() == 56 + assert mols[0].GetNumBonds() == 59 + + assert mols[1].GetNumAtoms() == 22 + assert mols[1].GetNumBonds() == 25 + + @pytest.mark.parametrize("ligand", LIGAND_NAMES) def test_write_mols_ligands_sdf(tmp_path: Path, test_data: Path, ligand: str): """Read SDF from ligands dir, write to tmp SDF, read back and compare count.""" From 53239bcd83ba3f6451b64cdb40884384c96315ee Mon Sep 17 00:00:00 2001 From: Nuri Jung Date: Thu, 19 Feb 2026 09:36:55 +0900 Subject: [PATCH 13/13] fix(wrapper/rdkit): ignore until first model --- src/gmol/base/wrapper/rdkit.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/gmol/base/wrapper/rdkit.py b/src/gmol/base/wrapper/rdkit.py index b635119..b432221 100644 --- a/src/gmol/base/wrapper/rdkit.py +++ b/src/gmol/base/wrapper/rdkit.py @@ -152,16 +152,8 @@ def _read_mol(block: list[str]): for line in f: if line.startswith("MODEL"): - if not block: - continue - - _logger.warning( - ( - "Multiple MODEL entries found in PDB file without " - "ENDMDL. Splitting anyway after MODEL record." - ) - ) - line = "ENDMDL" + block.clear() + continue if line.startswith("ENDMDL"): mol = _read_mol(block)