diff --git a/.gitignore b/.gitignore
index 378daab..3868f4b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
*.pyc
.DS_Store
+*.sdf
profile.json
.vscode
diff --git a/.isort.cfg b/.isort.cfg
index 241e17d..b81b1fe 100644
--- a/.isort.cfg
+++ b/.isort.cfg
@@ -1,9 +1,9 @@
[settings]
-py_version = 312
+skip=.bzr,.direnv,.eggs,.git,.hg,.mypy_cache,.nox,.pants.d,.svn,.tox,.venv,__pypackages__,_build,buck-out,build,dist,node_modules,venv,.pixi,__init__.py
line_length = 88
known_typing = typing,types,typing_extensions,mypy,mypy_extensions
sections = FUTURE,TYPING,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER
profile = black
include_trailing_comma = true
multi_line_output = 3
-indent = 4
+indent = 4
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5121912..3f24614 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,7 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
### Changed
-- Refactored codebase to use pixi as our development environment and make this package pip installable.
+- Refactored codebase to use pixi as our development environment and make this package pip installable.
## [1.2.1]
diff --git a/README.md b/README.md
index dc2cdd1..b47a4b3 100644
--- a/README.md
+++ b/README.md
@@ -45,72 +45,72 @@ pip install https://github.com/durrantlab/gypsum_dl.git
Gypsum-DL accepts the following command-line parameters:
```text
- -h, --help show this help message and exit
- --json param.json, -j param.json
- Name of a json file containing all parameters.
- Overrides all other arguments specified at the
- commandline.
- --source input.smi, -s input.smi
- Name of the source file (e.g., input.smi). Note:
- support for SMI (SMILES) files is better than support
- for SDF files, though Gypsum-DL can handle both.
- --output_folder OUTPUT_FOLDER, -o OUTPUT_FOLDER
- The path to an existing folder where the Gypsum-DL
- output file(s) will be saved.
- --job_manager {mpi,multiprocessing,serial}
- Determine what style of multiprocessing to use: mpi,
- multiprocessing, or serial. Serial will override the
- num_processors flag, forcing it to be one. MPI mode
- requires mpi4py 2.1.0 or higher and should be executed
- as: mpirun -n $NTASKS python -m mpi4py
- run_gypsum_dl.py ...-settings...
- --num_processors N, -p N
- Number of processors to use for parallel calculations.
- --max_variants_per_compound V, -m V
- The maximum number of variants to create per input
- molecule.
- --thoroughness THOROUGHNESS, -t THOROUGHNESS
- How widely to search for low-energy conformers. Larger
- values increase run times but can produce better
- results.
- --separate_output_files
- Indicates that the outputs should be split between
- files. If true, each output .sdf file will correspond
- to a single input file, but different 3D conformers
- will still be stored in the same file.
- --add_pdb_output Indicates that the outputs should also be written in
- the .pdb format. Creates one PDB file for each
- molecular variant.
- --add_html_output Indicates that the outputs should also be written in
- the .html format, for debugging. Attempts to open a
- browser for viewing.
- --min_ph MIN Minimum pH to consider.
- --max_ph MAX Maximum pH to consider.
- --pka_precision D Size of pH substructure ranges. See Dimorphite-DL
- publication for details.
- --skip_optimize_geometry
- Skips the optimization step.
- --skip_alternate_ring_conformations
- Skips the non-aromatic ring-conformation generation
- step.
- --skip_adding_hydrogen
- Skips the ionization step.
- --skip_making_tautomers
- Skips tautomer-generation step.
- --skip_enumerate_chiral_mol
- Skips the ennumeration of unspecified chiral centers.
- --skip_enumerate_double_bonds
- Skips the ennumeration of double bonds.
- --let_tautomers_change_chirality
- Allow tautomers that change the total number of chiral
- centers (see README.md for further explanation).
- --use_durrant_lab_filters
- Use substructure filters to remove molecular variants
- that, though technically possible, were judged
- improbable by members of the Durrant lab. See
- README.md for more details.
- --2d_output_only Skips the generate-3D-models step.
- --cache_prerun, -c Run this before running Gypsum-DL in mpi mode.
+-h, --help show this help message and exit
+--json param.json, -j param.json
+ Name of a json file containing all parameters.
+ Overrides all other arguments specified at the
+ commandline.
+--source input.smi, -s input.smi
+ Name of the source file (e.g., input.smi). Note:
+ support for SMI (SMILES) files is better than support
+ for SDF files, though Gypsum-DL can handle both.
+--output_folder OUTPUT_FOLDER, -o OUTPUT_FOLDER
+ The path to an existing folder where the Gypsum-DL
+ output file(s) will be saved.
+--job_manager {mpi,multiprocessing,serial}
+ Determine what style of multiprocessing to use: mpi,
+ multiprocessing, or serial. Serial will override the
+ num_processors flag, forcing it to be one. MPI mode
+ requires mpi4py 2.1.0 or higher and should be executed
+ as: mpirun -n $NTASKS python -m mpi4py
+ run_gypsum_dl.py ...-settings...
+--num_processors N, -p N
+ Number of processors to use for parallel calculations.
+--max_variants_per_compound V, -m V
+ The maximum number of variants to create per input
+ molecule.
+--thoroughness THOROUGHNESS, -t THOROUGHNESS
+ How widely to search for low-energy conformers. Larger
+ values increase run times but can produce better
+ results.
+--separate_output_files
+ Indicates that the outputs should be split between
+ files. If true, each output .sdf file will correspond
+ to a single input file, but different 3D conformers
+ will still be stored in the same file.
+--add_pdb_output Indicates that the outputs should also be written in
+ the .pdb format. Creates one PDB file for each
+ molecular variant.
+--add_html_output Indicates that the outputs should also be written in
+ the .html format, for debugging. Attempts to open a
+ browser for viewing.
+--min_ph MIN Minimum pH to consider.
+--max_ph MAX Maximum pH to consider.
+--pka_precision D Size of pH substructure ranges. See Dimorphite-DL
+ publication for details.
+--skip_optimize_geometry
+ Skips the optimization step.
+--skip_alternate_ring_conformations
+ Skips the non-aromatic ring-conformation generation
+ step.
+--skip_adding_hydrogen
+ Skips the ionization step.
+--skip_making_tautomers
+ Skips tautomer-generation step.
+--skip_enumerate_chiral_mol
+ Skips the ennumeration of unspecified chiral centers.
+--skip_enumerate_double_bonds
+ Skips the ennumeration of double bonds.
+--let_tautomers_change_chirality
+ Allow tautomers that change the total number of chiral
+ centers (see README.md for further explanation).
+--use_durrant_lab_filters
+ Use substructure filters to remove molecular variants
+ that, though technically possible, were judged
+ improbable by members of the Durrant lab. See
+ README.md for more details.
+--2d_output_only Skips the generate-3D-models step.
+--cache_prerun, -c Run this before running Gypsum-DL in mpi mode.
```
### Examples
@@ -119,13 +119,13 @@ Prepare a virtual library and save all 3D models to a single SDF file in the
present directory:
```bash
-gypsum-dl --source ./examples/sample_molecules.smi
+gypsum-dl --source ./tests/files/sample/sample_molecules.smi
```
Instead save all 3D models to a different, existing folder:
```bash
-gypsum-dl --source ./examples/sample_molecules.smi \
+gypsum-dl --source ./tests/files/sample/sample_molecules.smi \
--output_folder /my/folder/
```
@@ -133,7 +133,7 @@ Additionally save the models associated with each input molecule to separate
files:
```bash
-gypsum-dl --source ./examples/sample_molecules.smi \
+gypsum-dl --source ./tests/files/sample/sample_molecules.smi \
--output_folder /my/folder/ --separate_output_files
```
@@ -141,42 +141,42 @@ In addition to saving a 3D SDF file, also save 3D PDB files and an HTML file
with 2D structures (for debugging).
```bash
-gypsum-dl --source ./examples/sample_molecules.smi \
+gypsum-dl --source ./tests/files/sample/sample_molecules.smi \
--output_folder /my/folder/ --add_pdb_output --add_html_output
```
Save at most two variants per input molecule:
```bash
-gypsum-dl --source ./examples/sample_molecules.smi \
+gypsum-dl --source ./tests/files/sample/sample_molecules.smi \
--output_folder /my/folder/ --max_variants_per_compound 2
```
Control how Gypsum-DL ionizes the input molecules:
```bash
-gypsum-dl --source ./examples/sample_molecules.smi \
+gypsum-dl --source ./tests/files/sample/sample_molecules.smi \
--output_folder /my/folder/ --min_ph 12 --max_ph 14 --pka_precision 1
```
Run Gypsum-DL in serial mode (using only one processor):
```bash
-gypsum-dl --source ./examples/sample_molecules.smi \
+gypsum-dl --source ./tests/files/sample/sample_molecules.smi \
--job_manager serial
```
Run Gypsum-DL in multiprocessing mode, using 4 processors:
```bash
-gypsum-dl --source ./examples/sample_molecules.smi \
+gypsum-dl --source ./tests/files/sample/sample_molecules.smi \
--job_manager multiprocessing --num_processors 4
```
Run Gypsum-DL in mpi mode using all available processors:
```bash
-mpirun -n $NTASKS python -m mpi4py run_gypsum_dl.py --source ./examples/sample_molecules.smi \
+mpirun -n $NTASKS python -m mpi4py run_gypsum_dl.py --source ./tests/files/sample/sample_molecules.smi \
--job_manager mpi --num_processors -1
```
@@ -190,7 +190,7 @@ Where `myparams.json` might look like:
```json
{
- "source": "./examples/sample_molecules.smi",
+ "source": "./tests/files/sample/sample_molecules.smi",
"separate_output_files": true,
"job_manager": "multiprocessing",
"output_folder": "/my/folder/",
@@ -225,17 +225,17 @@ As always, be sure to examine the structures that Gypsum-DL outputs to ensure th
In looking over many Gypsum-DL-generated variants, we have identified a number of substructures that, though technically possible, strike us as improbable or otherwise poorly suited for virtual screening.
Here are some examples:
-- `C=[N-]`
-- `[N-]C=[N+]`
-- `[nH+]c[n-]`
-- `[#7+]~[#7+]`
-- `[#7-]~[#7-]`
-- `[!#7]~[#7+]~[#7-]~[!#7]`
-- `[#5]` (boron)
-- `O=[PH](=O)([#8])([#8])`
-- `N=c1cc[#7]c[#7]1`
-- `[$([NX2H1]),$([NX3H2])]=C[$([OH]),$([O-])]`
-- Metals
+- `C=[N-]`
+- `[N-]C=[N+]`
+- `[nH+]c[n-]`
+- `[#7+]~[#7+]`
+- `[#7-]~[#7-]`
+- `[!#7]~[#7+]~[#7-]~[!#7]`
+- `[#5]` (boron)
+- `O=[PH](=O)([#8])([#8])`
+- `N=c1cc[#7]c[#7]1`
+- `[$([NX2H1]),$([NX3H2])]=C[$([OH]),$([O-])]`
+- Metals
If you'd like to discard molecular variants with substructures such as these, use the `--use_durrant_lab_filters` flag.
@@ -255,15 +255,12 @@ To correct the problem, either increase the available memory, or divide your lib
Gypsum-DL aims to enumerate many possible variant forms, including forms that are not necessarily probable.
Beyond applying Durrant-Lab filters, several methods allow users to exclude other potentially problematic forms:
-1. Identify the steps Gypsum-DL takes to generate a given problematic form (see the "Genealogy" field of every output SDF file).
+1. Identify the steps Gypsum-DL takes to generate a given problematic form (see the "Genealogy" field of every output SDF file).
Then use parameters such as `--skip_optimize_geometry`, `--skip_alternate_ring_conformations`, `--skip_adding_hydrogen`, `--skip_making_tautomers`, `--skip_enumerate_chiral_mol`, or `--skip_enumerate_double_bonds` to skip the problem-causing step.
This fix is easy, but it may unexpectedly impact unrelated compounds.
2. Consider adjusting the `--min_ph`, `--max_ph`, or `--pka_precision` parameters if Gypsum-DL is producing compounds with undesired protonation states.
- Alternatively, you can delete specific protonation rules by modifying the
- `gypsum_dl/Steps/SMILES/dimorphite_dl/site_substructures.smarts` file.
3. Add to the Durrant-Lab filters if there is a specific substructure you would like to avoid (e.g., imidic acid due to amide/imidic-acid tautomerization).
- Simplify modify the `gypsum_dl/Steps/SMILES/DurrantLabFilter.py` file.
-
+ Simplify modify the `gypsum_dl/Steps/smiles/dl_filter.py` file.
## Citation
diff --git a/gypsum_dl/MolContainer.py b/gypsum_dl/MolContainer.py
deleted file mode 100644
index 9f91d33..0000000
--- a/gypsum_dl/MolContainer.py
+++ /dev/null
@@ -1,270 +0,0 @@
-"""
-This module describes the MolContainer, which contains different MyMol.MyMol
-objects. Each object in this container is derived from the same input molecule
-(so they are variants). Note that conformers (3D coordinate sets) live inside
-MyMol.MyMol. So, just to clarify:
-
-MolContainer.MolContainer > MyMol.MyMol > MyMol.MyConformers
-"""
-
-from gypsum_dl import MyMol, chem_utils, utils
-
-
-class MolContainer:
- """The molecucle container class. It stores all the molecules (tautomers,
- etc.) associated with a single input SMILES entry."""
-
- def __init__(self, smiles, name, index, properties):
- """The constructor.
-
- :param smiles: A list of SMILES strings.
- :type smiles: str
- :param name: The name of the molecule.
- :type name: str
- :param index: The index of this MolContainer in the main MolContainer
- list.
- :type index: int
- :param properties: A dictionary of properties from the sdf.
- :type properties: dict
- """
-
- # Set some variables are set on the container level (not the MyMol
- # level)
- self.contnr_idx = index
- self.contnr_idx_orig = index # Because if some circumstances (mpi),
- # might be reset. But good to have
- # original for filename output.
- self.orig_smi = smiles
- self.orig_smi_deslt = smiles # initial assumption
- self.mols = []
- self.name = name
- self.properties = properties
- self.mol_orig_frm_inp_smi = MyMol.MyMol(smiles, name)
- self.mol_orig_frm_inp_smi.contnr_idx = self.contnr_idx
- self.frgs = "" # For caching.
-
- # Save the original canonical smiles
- self.orig_smi_canonical = self.mol_orig_frm_inp_smi.smiles()
-
- # Get the number of nonaromatic rings
- self.num_nonaro_rngs = len(
- self.mol_orig_frm_inp_smi.get_idxs_of_nonaro_rng_atms()
- )
-
- # Get the number of chiral centers, assigned
- self.num_specif_chiral_cntrs = len(
- self.mol_orig_frm_inp_smi.chiral_cntrs_only_asignd()
- )
-
- # Also get the number of chiral centers, unassigned
- self.num_unspecif_chiral_cntrs = len(
- self.mol_orig_frm_inp_smi.chiral_cntrs_w_unasignd()
- )
-
- # Get the non-acidic carbon-hydrogen footprint.
- self.carbon_hydrogen_count = self.mol_orig_frm_inp_smi.count_hyd_bnd_to_carb()
-
- def mol_with_smiles_is_in_contnr(self, smiles):
- """Checks whether or not a given smiles string is already in this
- container.
-
- :param smiles: The smiles string to check.
- :type smiles: str
- :return: True if it is present, otherwise a new MyMol.MyMol object
- corresponding to that smiles.
- :rtype: bool or MyMol.MyMol
- """
-
- # Checks all the mols in this container to see if a given smiles is
- # already present. Returns a new MyMol object if it isn't, True
- # otherwise.
-
- # First, get the set of all cannonical smiles.
- # TODO: Probably shouldn't be generating this on the fly every time
- # you use it!
- can_smi_in_this_container = {m.smiles() for m in self.mols}
-
- # Determine whether it is already in the container, and act
- # accordingly.
- amol = MyMol.MyMol(smiles)
- return True if amol.smiles() in can_smi_in_this_container else amol
-
- def add_smiles(self, smiles):
- """Adds smiles strings to this container. SMILES are always isomeric
- and always unique (canonical).
-
- :param smiles: A list of SMILES strings. If it's a string, it is
- converted into a list.
- :type smiles: str
- """
-
- # Convert it into a list if it comes in as a string.
- if isinstance(smiles, str):
- smiles = [smiles]
-
- # Keep only the mols with smiles that are not already present.
- for s in smiles:
- result = self.mol_with_smiles_is_in_contnr(s)
- if result != True:
- # Much of the contnr info should be passed to each molecule,
- # too, for convenience.
- result.name = self.name
- result.name = self.orig_smi
- result.orig_smi_canonical = self.orig_smi_canonical
- result.orig_smi_deslt = self.orig_smi_deslt
- result.contnr_idx = self.contnr_idx
-
- self.mols.append(result)
-
- def add_mol(self, mol):
- """Adds a molecule to this container. Does NOT check for uniqueness.
-
- :param mol: The MyMol.MyMol object to add.
- :type mol: MyMol.MyMol
- """
-
- self.mols.append(mol)
-
- def all_can_noh_smiles(self):
- """Gets a list of all the noh canonical smiles in this container.
-
- :return: The canonical, noh smiles string.
- :rtype: str
- """
-
- # True means noh
- return [m.smiles(True) for m in self.mols if m.rdkit_mol is not None]
-
- def get_frags_of_orig_smi(self):
- """Gets a list of the fragments found in the original smiles string
- passed to this container.
-
- :return: A list of the fragments, as rdkit.Mol objects. Also saves to
- self.frgs.
- :rtype: list
- """
-
- if self.frgs != "":
- return self.frgs
-
- frags = self.mol_orig_frm_inp_smi.get_frags_of_orig_smi()
- self.frgs = frags
- return frags
-
- def update_orig_smi(self, orig_smi):
- """Updates the orig_smi string. Used by desalter (to replace with
- largest fragment).
-
- :param orig_smi: The replacement smiles string.
- :type orig_smi: str
- """
-
- # Update the MolContainer object
- self.orig_smi = orig_smi
- self.orig_smi_deslt = orig_smi
- self.mol_orig_frm_inp_smi = MyMol.MyMol(self.orig_smi, self.name)
- self.frgs = ""
- self.orig_smi_canonical = self.mol_orig_frm_inp_smi.smiles()
- self.num_nonaro_rngs = len(
- self.mol_orig_frm_inp_smi.get_idxs_of_nonaro_rng_atms()
- )
- self.num_specif_chiral_cntrs = len(
- self.mol_orig_frm_inp_smi.chiral_cntrs_only_asignd()
- )
- self.num_unspecif_chiral_cntrs = len(
- self.mol_orig_frm_inp_smi.chiral_cntrs_w_unasignd()
- )
-
- # None of the mols derived to date, if present, are accurate.
- self.mols = []
-
- def add_container_properties(self):
- """Adds all properties from the container to the molecules. Used when
- saving final files, to keep a record in the file itself."""
-
- for mol in self.mols:
- mol.mol_props.update(self.properties)
- mol.set_all_rdkit_mol_props()
-
- def remove_identical_mols_from_contnr(self):
- """Removes itentical molecules from this container."""
-
- # For reasons I don't understand, the following doesn't give unique
- # canonical smiles:
-
- # Chem.MolToSmiles(self.mols[0].rdkit_mol, isomericSmiles=True,
- # canonical=True)
-
- # # This block for debugging. JDD: Needs attention?
- # all_can_noh_smiles = [m.smiles() for m in self.mols] # Get all the smiles as stored.
-
- # wrong_cannonical_smiles = [
- # Chem.MolToSmiles(
- # m.rdkit_mol, # Using the RdKit mol stored in MyMol
- # isomericSmiles=True,
- # canonical=True
- # ) for m in self.mols
- # ]
-
- # right_cannonical_smiles = [
- # Chem.MolToSmiles(
- # Chem.MolFromSmiles( # Regenerating the RdKit mol from the smiles string stored in MyMol
- # m.smiles()
- # ),
- # isomericSmiles=True,
- # canonical=True
- # ) for m in self.mols]
-
- # if len(set(wrong_cannonical_smiles)) != len(set(right_cannonical_smiles)):
- # utils.log("ERROR!")
- # utils.log("Stored smiles string in this container:")
- # utils.log("\n".join(all_can_noh_smiles))
- # utils.log("")
- # utils.log("""Supposedly cannonical smiles strings generated from stored
- # RDKit Mols in this container:""")
- # utils.log("\n".join(wrong_cannonical_smiles))
- # utils.log("""But if you plop these into chemdraw, you'll see some of them
- # represent identical structures.""")
- # utils.log("")
- # utils.log("""Cannonical smiles strings generated from RDKit mols that
- # were generated from the stored smiles string in this container:""")
- # utils.log("\n".join(right_cannonical_smiles))
- # utils.log("""Now you see the identical molecules. But why didn't the previous
- # method catch them?""")
- # utils.log("")
-
- # utils.log("""Note that the third method identifies duplicates that the second
- # method doesn't.""")
- # utils.log("")
- # utils.log("=" * 20)
-
- # # You need to make new molecules to get it to work.
- # new_smiles = [m.smiles() for m in self.mols]
- # new_mols = [Chem.MolFromSmiles(smi) for smi in new_smiles]
- # new_can_smiles = [Chem.MolToSmiles(new_mol, isomericSmiles=True, canonical=True) for new_mol in new_mols]
-
- # can_smiles_already_set = set([])
- # for i, new_can_smile in enumerate(new_can_smiles):
- # if not new_can_smile in can_smiles_already_set:
- # # Never seen before
- # can_smiles_already_set.add(new_can_smile)
- # else:
- # # Seen before. Delete!
- # self.mols[i] = None
-
- # while None in self.mols:
- # self.mols.remove(None)
-
- self.mols = chem_utils.uniq_mols_in_list(self.mols)
-
- def update_idx(self, new_idx):
- """Updates the index of this container.
-
- :param new_idx: The new index.
- :type new_idx: int
- """
-
- if type(new_idx) != int:
- utils.exception("New idx value must be an int.")
- self.contnr_idx = new_idx
- self.mol_orig_frm_inp_smi.contnr_idx = self.contnr_idx
diff --git a/gypsum_dl/MyMol.py b/gypsum_dl/MyMol.py
deleted file mode 100644
index dfbedea..0000000
--- a/gypsum_dl/MyMol.py
+++ /dev/null
@@ -1,881 +0,0 @@
-"""
-This module contains classes and functions for processing individual molecules
-(variants). All variants of the same input molecule are grouped together in
-the same MolContainer.MolContainer object. Each MyMol.MyMol is also associated
-with conformers described here (3D coordinate sets).
-
-So just to clarify: MolContainer.MolContainer > MyMol.MyMol >
-MyMol.MyConformer
-"""
-
-import contextlib
-import copy
-import operator
-import sys
-
-from molvs import standardize_smiles as ssmiles
-
-# Disable the unnecessary RDKit warnings
-from rdkit import Chem, RDLogger
-from rdkit.Chem import AllChem
-from rdkit.Chem.rdchem import BondStereo
-
-import gypsum_dl.MolObjectHandling as MOH
-from gypsum_dl import utils
-
-RDLogger.DisableLog("rdApp.*")
-
-
-class MyMol:
- """
- A class that wraps around a rdkit.Mol object. Includes additional data and
- functions.
- """
-
- def __init__(self, starter, name=""):
- """Initialize the MyMol object.
-
- :param starter: The object (smiles or rdkit.Mol) on which to build this
- class.
- :type starter: str or rdkit.Mol
- :param name: An optional string, the name of this molecule. Defaults to "".
- :param name: str, optional
- """
-
- if isinstance(starter, str):
- # It's a SMILES string.
- self.rdkit_mol = ""
- self.can_smi = ""
- smiles = starter
- else:
- # So it's an rdkit mol object.
- self.rdkit_mol = (
- starter # No need to regenerate this, since already provided.
- )
-
- # Get the smiles too from the rdkit mol object.
- try:
- smiles = Chem.MolToSmiles(
- self.rdkit_mol, isomericSmiles=True, canonical=True
- )
-
- # In this case you know it's cannonical.
- self.can_smi = smiles
- except Exception:
- # Sometimes this conversion just can't happen. Happened once
- # with this beast, for example:
- # CC(=O)NC1=CC(=C=[N+]([O-])O)C=C1O
- self.can_smi = False
- id_to_print = name if name != "" else str(starter)
- utils.log(
- "\tERROR: Could not generate one of the structures "
- + "for ("
- + id_to_print
- + ")."
- )
-
- self.can_smi_noh = ""
- self.orig_smi = smiles
-
- # Default assumption is that they are the same.
- self.orig_smi_deslt = smiles
- self.name = name
- self.conformers = []
- self.nonaro_ring_atom_idx = ""
- self.chiral_cntrs_only_assigned = ""
- self.chiral_cntrs_include_unasignd = ""
- self.bizarre_substruct = ""
- self.enrgy = {} # different energies for different conformers.
- self.minimized_enrgy = {}
- self.contnr_idx = ""
- self.frgs = ""
- self.stdrd_smiles = ""
- self.mol_props = {}
- self.idxs_low_energy_confs_no_opt = {}
- self.idxs_of_confs_to_min = set([])
- self.genealogy = [] # Keep track of how the molecule came to be.
-
- # Makes the molecule if a smiles was provided. Sanitizes the molecule
- # regardless.
- self.make_mol_frm_smiles_sanitze()
-
- def standardize_smiles(self):
- """Standardize the smiles string if you can."""
-
- if self.stdrd_smiles != "":
- return self.stdrd_smiles
-
- try:
- self.stdrd_smiles = ssmiles(self.smiles())
- except Exception:
- utils.log("\tCould not standardize " + self.smiles(True) + ". Skipping.")
- self.stdrd_smiles = self.smiles()
-
- return self.stdrd_smiles
-
- def __hash__(self):
- """Allows you to compare MyMol.MyMol objects.
-
- :return: The hashed canonical smiles.
- :rtype: str
- """
-
- can_smi = self.smiles()
-
- # So it hashes based on the cannonical smiles.
- return hash(can_smi)
-
- def __eq__(self, other):
- """Allows you to compare MyMol.MyMol objects.
-
- :param other: The other molecule.
- :type other: MyMol.MyMol
- :return: Whether the other molecule is the same as this one.
- :rtype: bool
- """
-
- return False if other is None else self.__hash__() == other.__hash__()
-
- def __ne__(self, other):
- """Allows you to compare MyMol.MyMol objects.
-
- :param other: The other molecule.
- :type other: MyMol.MyMol
- :return: Whether the other molecule is different from this one.
- :rtype: bool
- """
-
- return not self.__eq__(other)
-
- def __lt__(self, other):
- """Is this MyMol less than another one? Gypsum-DL often sorts
- molecules by sorting tuples of the form (energy, MyMol). On rare
- occasions, the energies are identical, and the sorting algorithm
- attempts to compare MyMol directly.
-
- :param other: The other molecule.
- :type other: MyMol.MyMol
- :return: True or False, if less than or not.
- :rtype: boolean
- """
-
- return self.__hash__() < other.__hash__()
-
- def __le__(self, other):
- """Is this MyMol less than or equal to another one? Gypsum-DL often
- sorts molecules by sorting tuples of the form (energy, MyMol). On rare
- occasions, the energies are identical, and the sorting algorithm
- attempts to compare MyMol directly.
-
- :param other: The other molecule.
- :type other: MyMol.MyMol
- :return: True or False, if less than or equal to, or not.
- :rtype: boolean
- """
-
- return self.__hash__() <= other.__hash__()
-
- def __gt__(self, other):
- """Is this MyMol greater than another one? Gypsum-DL often sorts
- molecules by sorting tuples of the form (energy, MyMol). On rare
- occasions, the energies are identical, and the sorting algorithm
- attempts to compare MyMol directly.
-
- :param other: The other molecule.
- :type other: MyMol.MyMol
- :return: True or False, if greater than or not.
- :rtype: boolean
- """
-
- return self.__hash__() > other.__hash__()
-
- def __ge__(self, other):
- """Is this MyMol greater than or equal to another one? Gypsum-DL often
- sorts molecules by sorting tuples of the form (energy, MyMol). On rare
- occasions, the energies are identical, and the sorting algorithm
- attempts to compare MyMol directly.
-
- :param other: The other molecule.
- :type other: MyMol.MyMol
- :return: True or False, if greater than or equal to, or not.
- :rtype: boolean
- """
-
- return self.__hash__() >= other.__hash__()
-
- def make_mol_frm_smiles_sanitze(self):
- """Construct a rdkit.mol for this object, in case you only received
- the smiles. Also, sanitize the molecule regardless.
-
- :return: Returns the rdkit.mol object, though it's also stored in
- self.rdkit_mol.
- :rtype: rdkit.mol object.
- """
-
- # If given a SMILES string.
- if self.rdkit_mol == "":
- try:
- # sanitize = False makes it respect double-bond stereochemistry
- m = Chem.MolFromSmiles(self.orig_smi_deslt, sanitize=False)
- except Exception:
- m = None
- else: # If given a RDKit Mol Obj
- m = self.rdkit_mol
-
- if m is not None:
- # Sanitize and hopefully correct errors in the smiles string such
- # as incorrect nitrogen charges.
- m = MOH.check_sanitization(m)
- self.rdkit_mol = m
- return m
-
- def make_first_3d_conf_no_min(self):
- """Makes the associated rdkit.mol object 3D by adding the first
- conformer. This also adds hydrogen atoms to the associated rdkit.mol
- object. Note that it does not perform a minimization, so it is not
- too expensive."""
-
- # Set the first 3D conformer
- if len(self.conformers) > 0:
- # It's already been done.
- return
-
- # Add hydrogens. This adds explicit hydrogens, while respecting
- # Dimorphite-DL protonation states.
- self.rdkit_mol = MOH.try_reprotanation(self.rdkit_mol)
-
- # Add a single conformer. RMSD cutoff very small so all conformers
- # will be accepted. And not minimizing (False).
- self.add_conformers(1, 1e60, False)
-
- def smiles(self, noh=False):
- """Get the desalted, canonical smiles string associated with this
- object. (Not the input smiles!)
-
- :param noh: Whether or not hydrogen atoms should be included in the
- canonical smiles string., defaults to False
- :param noh: bool, optional
- :return: The canonical smiles string, or None if it cannot be
- determined.
- :rtype: str or None
- """
-
- # See if it's already been calculated. They want the hydrogen atoms.
- if noh == False:
- if self.can_smi != "":
- # Return previously determined canonical SMILES.
- return self.can_smi
-
- # Need to determine canonical SMILES.
- try:
- can_smi = Chem.MolToSmiles(
- self.rdkit_mol, isomericSmiles=True, canonical=True
- )
- except Exception:
- utils.log(
- f"Warning: Couldn't put {self.orig_smi} ({self.name}) in canonical form. Got this error: {str(sys.exc_info()[0])}. This molecule will be discarded."
- )
- self.can_smi = None
- return None
-
- self.can_smi = can_smi
- return can_smi
- else:
- # They don't want the hydrogen atoms.
- if self.can_smi_noh != "":
- # Return previously determined string.
- return self.can_smi_noh
-
- # So remove hydrogens. Note that this assumes you will have called
- # this function previously with noh = False
- amol = copy.copy(self.rdkit_mol)
- amol = MOH.try_deprotanation(amol)
- self.can_smi_noh = Chem.MolToSmiles(
- amol, isomericSmiles=True, canonical=True
- )
- return self.can_smi_noh
-
- def get_idxs_of_nonaro_rng_atms(self):
- """Identifies which rings in a given molecule are nonaromatic, if any.
-
- :return: A [[int, int, int]]. A list of lists, where each inner list is
- a list of the atom indecies of the members of a non-aromatic ring.
- Also saved to self.nonaro_ring_atom_idx.
- :rtype: list
- """
-
- if self.nonaro_ring_atom_idx != "":
- # Already determined...
- return self.nonaro_ring_atom_idx
-
- # There are no rings if the molecule is None.
- if self.rdkit_mol is None:
- return []
-
- # Get the number of symmetric smallest set of rings
- ssr = Chem.GetSymmSSSR(self.rdkit_mol)
-
- # Get the rings
- ring_indecies = [list(ssr[i]) for i in range(len(ssr))]
-
- # Are the atoms in any of those rings nonaromatic?
- nonaro_rngs = []
- for rng_indx_set in ring_indecies:
- for atm_idx in rng_indx_set:
- if self.rdkit_mol.GetAtomWithIdx(atm_idx).GetIsAromatic() == False:
- # One of the ring atoms is not aromatic! Let's keep it.
- nonaro_rngs.append(rng_indx_set)
- break
- self.nonaro_ring_atom_idx = nonaro_rngs
- return nonaro_rngs
-
- def chiral_cntrs_w_unasignd(self):
- """Get the chiral centers that haven't been assigned.
-
- :return: The chiral centers. Also saved to
- self.chiral_cntrs_include_unasignd. Looks like [(10, '?')]
- :rtype: list
- """
-
- # No chiral centers if the molecule is None.
- if self.rdkit_mol is None:
- return []
-
- if self.chiral_cntrs_include_unasignd != "":
- # Already been determined...
- return self.chiral_cntrs_include_unasignd
-
- # Get the chiral centers that are not defined.
- ccs = Chem.FindMolChiralCenters(self.rdkit_mol, includeUnassigned=True)
- self.chiral_cntrs_include_unasignd = ccs
- return ccs
-
- def chiral_cntrs_only_asignd(self):
- """Get the chiral centers that have been assigned.
-
- :return: The chiral centers. Also saved to self.chiral_cntrs_only_assigned.
- :rtype: list
- """
-
- if self.chiral_cntrs_only_assigned != "":
- return self.chiral_cntrs_only_assigned
-
- if self.rdkit_mol is None:
- return []
-
- ccs = Chem.FindMolChiralCenters(self.rdkit_mol, includeUnassigned=False)
- self.chiral_cntrs_only_assigned = ccs
- return ccs
-
- def get_double_bonds_without_stereochemistry(self):
- """Get the double bonds that don't have specified stereochemistry.
-
- :return: The unasignd double bonds (indexes). Looks like this:
- [2, 4, 7]
- :rtype: list
- """
-
- if self.rdkit_mol is None:
- return []
-
- return [
- b.GetIdx()
- for b in self.rdkit_mol.GetBonds()
- if b.GetBondTypeAsDouble() == 2 and b.GetStereo() is BondStereo.STEREONONE
- ]
-
- def remove_bizarre_substruc(self):
- """Removes molecules with improbable substuctures, likely generated
- from the tautomerization process. Used to find artifacts.
-
- :return: Boolean, whether or not there are impossible substructures.
- Also saves to self.bizarre_substruct.
- :rtype: bool
- """
-
- if self.bizarre_substruct != "":
- # Already been determined.
- return self.bizarre_substruct
-
- if self.rdkit_mol is None:
- # It is bizarre to have a molecule with no atoms in it.
- return True
-
- # These are substrutures that can't be easily corrected using
- # fix_common_errors() below.
- # , "[C+]", "[C-]", "[c+]", "[c-]", "[n-]", "[N-]"] # ,
- # "[*@@H]1(~[*][*]~2)~[*]~[*]~[*@@H]2~[*]~[*]~1",
- # "[*@@H]1~2~*~*~[*@@H](~*~*2)~*1",
- # "[*@@H]1~2~*~*~*~[*@@H](~*~*2)~*1",
- # "[*@@H]1~2~*~*~*~*~[*@@H](~*~*2)~*1",
- # "[*@@H]1~2~*~[*@@H](~*~*2)~*1", "[*@@H]~1~2~*~*~*~[*@H]1O2",
- # "[*@@H]~1~2~*~*~*~*~[*@H]1O2"]
-
- # Note that C(O)=N, C and N mean they are aliphatic. Does not match
- # c(O)n, when aromatic. So this form is acceptable if in aromatic
- # structure.
- prohibited_substructures = ["O(=*)-*"] # , "C(O)=N"]
-
- # Enol forms with terminal alkenes are unlikely.
- prohibited_substructures.append("C(=[CH2])[OH]")
-
- # Enol forms with terminal alkenes are unlikely.
- prohibited_substructures.append("C(=[CH2])[O-]")
- # A geminal vinyl diol is not a tautomer of a carboxylate group.
- prohibited_substructures.append("C=C([OH])[OH]")
-
- # A geminal vinyl diol is not a tautomer of a carboxylate group.
- prohibited_substructures.append("C=C([O-])[OH]")
-
- # A geminal vinyl diol is not a tautomer of a carboxylate group.
- prohibited_substructures.append("C=C([O-])[O-]")
- prohibited_substructures.append("[C-]") # No carbanions.
- prohibited_substructures.append("[c-]") # No carbanions.
-
- for s in prohibited_substructures:
- # First just match strings... could be faster, but not 100%
- # accurate.
- if s in self.orig_smi or s in self.orig_smi_deslt or s in self.can_smi:
- utils.log("\tDetected unusual substructure: " + s)
- self.bizarre_substruct = True
- return True
-
- # Now do actual substructure matching
- for s in prohibited_substructures:
- pattrn = Chem.MolFromSmarts(s)
- if self.rdkit_mol.HasSubstructMatch(pattrn):
- # utils.log("\tRemoving a molecule because it has an odd
- # substructure: " + s)
- utils.log("\tDetected unusual substructure: " + s)
- self.bizarre_substruct = True
- return True
-
- # Now certin patterns that are more complex.
- # TODO in the future?
-
- self.bizarre_substruct = False
- return False
-
- def get_frags_of_orig_smi(self):
- """Divide the current molecule into fragments.
-
- :return: A list of the fragments, as rdkit.Mol objects.
- :rtype: list
- """
-
- if self.frgs != "":
- # Already been determined...
- return self.frgs
-
- if "." not in self.orig_smi:
- # There are no fragments. Just return this object.
- self.frgs = [self]
- return [self]
-
- # Get the fragments.
- frags = Chem.GetMolFrags(self.rdkit_mol, asMols=True)
- self.frgs = frags
- return frags
-
- def inherit_contnr_props(self, other):
- """Copies a few key properties from a different MyMol.MyMol object to
- this one.
-
- :param other: The other MyMol.MyMol object to copy these properties to.
- :type other: MyMol.MyMol
- """
-
- # other can be a contnr or MyMol.MyMol object. These are properties
- # that should be the same for every MyMol.MyMol object in this
- # MolContainer.
- self.contnr_idx = other.contnr_idx
- self.orig_smi = other.orig_smi
- self.orig_smi_deslt = other.orig_smi_deslt # initial assumption
- self.name = other.name
-
- def set_rdkit_mol_prop(self, key, val):
- """Set a molecular property.
-
- :param key: The name of the molecular property.
- :type key: str
- :param val: The value of that property.
- :type val: str
- """
-
- val = str(val)
- self.rdkit_mol.SetProp(key, val)
- self.rdkit_mol.SetProp(key, val)
-
- with contextlib.suppress(Exception):
- self.rdkit_mol.SetProp(key, val)
-
- def set_all_rdkit_mol_props(self):
- """Set all the stored molecular properties. Copies ones from the
- MyMol.MyMol object to the MyMol.rdkit_mol object."""
-
- self.set_rdkit_mol_prop("SMILES", self.smiles(True))
- # self.set_rdkit_mol_prop("SOURCE_SMILES", self.orig_smi)
- for prop in list(self.mol_props.keys()):
- self.set_rdkit_mol_prop(prop, self.mol_props[prop])
- genealogy = "\n".join(self.genealogy)
- self.set_rdkit_mol_prop("Genealogy", genealogy)
- self.set_rdkit_mol_prop("_Name", self.name)
-
- def add_conformers(self, num, rmsd_cutoff=0.1, minimize=True):
- """Add conformers to this molecule.
-
- :param num: The total number of conformers to generate, including ones
- that have been generated previously.
- :type num: int
- :param rmsd_cutoff: Don't keep conformers that come within this rms
- distance of other conformers. Defaults to 0.1
- :param rmsd_cutoff: float, optional
- :param minimize: Whether or not to minimize the geometry of all these
- conformers. Defaults to True.
- :param minimize: bool, optional
- """
-
- # First, do you need to add new conformers? Some might have already
- # been added. Just add enough to meet the requested amount.
- num_new_confs = max(0, num - len(self.conformers))
- for _ in range(num_new_confs):
- if len(self.conformers) == 0:
- # For the first one, don't start from random coordinates.
- new_conf = MyConformer(self)
- else:
- # For all subsequent ones, do start from random coordinates.
- new_conf = MyConformer(self, None, False, True)
-
- if new_conf.mol is not False:
- self.conformers.append(new_conf)
-
- # Are the current ones minimized if necessary?
- if minimize == True:
- for conf in self.conformers:
- conf.minimize() # Won't reminimize if it's already been done.
-
- # Automatically sort by the energy.
- self.conformers.sort(key=operator.attrgetter("energy"))
-
- # print(len(self.conformers))
-
- # Print the coordinates of the atoms of each conformer
- # for conf in self.conformers:
- # print(conf.coords())
- # print("")
-
- # Save all the conformers to separate PDB files
- # for i, conf in enumerate(self.conformers):
- # conf.write_pdb_file("test_" + str(i) + ".pdb")
- # print(conf.get_energy())
-
- # Remove ones that are very structurally similar.
- self.eliminate_structurally_similar_conformers(rmsd_cutoff)
-
- def eliminate_structurally_similar_conformers(self, rmsd_cutoff=0.1):
- """Eliminates conformers that are very geometrically similar.
-
- :param rmsd_cutoff: The RMSD cutoff to use. Defaults to 0.1
- :param rmsd_cutoff: float, optional
- """
-
- # Eliminate redundant ones.
- for i1 in range(len(self.conformers) - 1):
- if self.conformers[i1] is not None:
- for i2 in range(i1 + 1, len(self.conformers)):
- if self.conformers[i2] is not None:
- # Align them.
- self.conformers[i2] = self.conformers[i1].align_to_me(
- self.conformers[i2]
- )
-
- # Calculate the RMSD.
- rmsd = self.conformers[i1].rmsd_to_me(self.conformers[i2])
-
- # Replace the second one with None if it's too similar
- # to the first.
- if rmsd <= rmsd_cutoff:
- self.conformers[i2] = None
-
- # Remove all the None entries.
- while None in self.conformers:
- self.conformers.remove(None)
-
- # Those that remains are only the distinct conformers.
-
- def count_hyd_bnd_to_carb(self):
- """Count the number of Hydrogens bound to carbons."""
-
- if self.rdkit_mol is None:
- # Doesn't have any atoms at all.
- return 0
-
- total_hydrogens_counted = 0
- for atom in self.rdkit_mol.GetAtoms():
- if atom.GetSymbol() == "C":
- total_hydrogens_counted = total_hydrogens_counted + atom.GetTotalNumHs(
- includeNeighbors=True
- )
-
- return total_hydrogens_counted
-
- def load_conformers_into_rdkit_mol(self):
- """Load the conformers stored as MyConformers objects (in
- self.conformers) into the rdkit Mol object."""
-
- self.rdkit_mol.RemoveAllConformers()
- for conformer in self.conformers:
- self.rdkit_mol.AddConformer(conformer.conformer())
-
-
-class MyConformer:
- """A wrapper around a rdkit Conformer object. Allows me to associate extra
- values with conformers. These are 3D coordinate sets for a given
- MyMol.MyMol object (different molecule conformations).
- """
-
- def __init__(
- self, mol, conformer=None, second_embed=False, use_random_coordinates=False
- ):
- """Create a MyConformer objects.
-
- :param mol: The MyMol.MyMol associated with this conformer.
- :type mol: MyMol.MyMol
- :param conformer: An optional variable specifying the conformer to use.
- If not specified, it will create a new conformer. Defaults to None.
- :type conformer: rdkit.Conformer, optional
- :param second_embed: Whether to try to generate 3D coordinates using an
- older algorithm if the better (default) algorithm fails. This can add
- run time, but sometimes converts certain molecules that would
- otherwise fail. Defaults to False.
- :type second_embed: bool, optional
- :param use_random_coordinates: The first conformer should not start
- from random coordinates, but rather the eigenvalues-based
- coordinates rdkit defaults to. But Gypsum-DL generates subsequent
- conformers to try to consider alternate geometries. So they should
- start from random coordinates. Defaults to False.
- :type use_random_coordinates: bool, optional
- """
-
- # Save some values to the object.
- self.mol = copy.deepcopy(mol.rdkit_mol)
- self.smiles = mol.smiles()
-
- # Remove any previous conformers.
- self.mol.RemoveAllConformers()
-
- if conformer is None:
- # The user is providing no conformer. So we must generate it.
-
- # Note that I have confirmed that the below respects chirality.
- # params is a list of ETKDGv2 parameters generated by this command
- # Description of these parameters can be found at
- # help(AllChem.EmbedMolecule)
-
- try:
- # Newest version
- # print("HERE")
- params = AllChem.ETKDGv3()
- except Exception:
- try:
- # Try to use ETKDGv2, but it is only present in the python 3.6
- # version of RDKit.
- params = AllChem.ETKDGv2()
- except Exception:
- # Use the original version of ETKDG if python 2.7 RDKit. This
- # may be resolved in next RDKit update so we encased this in a
- # try statement.
- params = AllChem.ETKDG()
-
- # The default, but just a sanity check.
- params.enforceChirality = True
-
- # Set a max number of times it will try to calculate the 3D
- # coordinates. Will save a little time. This should be the default
- # (0) but lets set it anyway
- params.maxIterations = 0
-
- # Also set whether to start from random coordinates.
- params.useRandomCoords = use_random_coordinates
- # params.randomSeed = random.randint(0, 1000000000)
-
- # AllChem.EmbedMolecule uses geometry to create inital molecule
- # coordinates. This sometimes takes a very long time.
- AllChem.EmbedMolecule(self.mol, params)
-
- # On rare occasions, the new conformer generating algorithm fails
- # because params.useRandomCoords = False. So if it fails, try
- # again with True.
- if self.mol.GetNumConformers() == 0 and use_random_coordinates == False:
- params.useRandomCoords = True
- AllChem.EmbedMolecule(self.mol, params)
-
- # On very rare occasions, the new conformer generating algorithm
- # fails. For example, COC(=O)c1cc(C)nc2c(C)cc3[nH]c4ccccc4c3c12 .
- # In this case, the old one still works. So if no coordinates are
- # assigned, try that one. Parameters must have second_embed set to
- # True for this to happen.
- if second_embed == True and self.mol.GetNumConformers() == 0:
- AllChem.EmbedMolecule(self.mol, useRandomCoords=use_random_coordinates)
-
- # On rare occasions, both methods fail. For example,
- # O=c1cccc2[C@H]3C[NH2+]C[C@@H](C3)Cn21 Another example:
- # COc1cccc2c1[C@H](CO)[N@H+]1[C@@H](C#N)[C@@H]3C[C@@H](C(=O)[O-])[C@H]([C@H]1C2)[N@H+]3C
- if self.mol.GetNumConformers() == 0:
- self.mol = False
- else:
- # The user has provided a conformer. Just add it.
- conformer.SetId(0)
- self.mol.AddConformer(conformer, assignId=True)
-
- # Calculate some energies, other housekeeping.
- if self.mol is not False:
- try:
- ff = AllChem.UFFGetMoleculeForceField(self.mol)
- self.energy = ff.CalcEnergy()
- except Exception:
- utils.log(
- "Warning: Could not calculate energy for molecule "
- + Chem.MolToSmiles(self.mol)
- )
- # Example of smiles that cause problem here without try...catch:
- # NC1=NC2=C(N[C@@H]3[C@H](N2)O[C@@H](COP(O)(O)=O)C2=C3S[Mo](S)(=O)(=O)S2)C(=O)N1
- self.energy = 9999
- self.minimized = False
- self.ids_hvy_atms = [
- a.GetIdx() for a in self.mol.GetAtoms() if a.GetAtomicNum() != 1
- ]
-
- def conformer(self, conf=None):
- """Get or set the conformer. An optional variable can specify the
- conformer to set. If not specified, this function acts as a get for
- the conformer.
-
- :param conf: The conformer to set, defaults to None
- :param conf: rdkit.Conformer, optional
- :return: An rdkit.Conformer object, if conf is not specified.
- :rtype: rdkit.Conformer
- """
-
- if conf is None:
- return self.mol.GetConformers()[0]
-
- self.mol.RemoveAllConformers()
- self.mol.AddConformer(conf)
-
- def minimize(self):
- """Minimize (optimize) the geometry of the current conformer if it
- hasn't already been optimized."""
-
- if self.minimized == True:
- # Already minimized. Don't do it again.
- return
-
- # Perform the minimization, and save the energy.
- try:
- ff = AllChem.UFFGetMoleculeForceField(self.mol)
- ff.Minimize()
- self.energy = ff.CalcEnergy()
- except Exception:
- utils.log(
- "Warning: Could not calculate energy for molecule "
- + Chem.MolToSmiles(self.mol)
- )
- self.energy = 9999
- self.minimized = True
-
- def align_to_me(self, other_conf):
- """Align another conformer to this one.
-
- :param other_conf: The other conformer to align.
- :type other_conf: MyConformer
- :return: The aligned MyConformer object.
- :rtype: MyConformer
- """
-
- # Add the conformer of the other MyConformer object.
- self.mol.AddConformer(other_conf.conformer(), assignId=True)
-
- # Align them.
- AllChem.AlignMolConformers(self.mol, atomIds=self.ids_hvy_atms)
-
- # Reset the conformer of the other MyConformer object.
- last_conf = self.mol.GetConformers()[-1]
- other_conf.conformer(last_conf)
-
- # Remove the added conformer.
- self.mol.RemoveConformer(last_conf.GetId())
-
- # Return that other object.
- return other_conf
-
- def MolToMolBlock(self):
- """Prints out the first 500 letters of the molblock version of this
- conformer. Good for debugging."""
-
- mol_copy = copy.deepcopy(self.mol_copy) # Use it as a template.
- mol_copy.RemoveAllConformers()
- mol_copy.AddConformer(self.conformer)
- utils.log(Chem.MolToMolBlock(mol_copy)[:500])
-
- def rmsd_to_me(self, other_conf):
- """Calculate the rms distance between this conformer and another one.
-
- :param other_conf: The other conformer to align.
- :type other_conf: MyConformer
- :return: The rmsd, a float.
- :rtype: float
- """
-
- # Make a new molecule.
- amol = Chem.MolFromSmiles(self.smiles, sanitize=False)
- amol = MOH.check_sanitization(amol)
- amol = MOH.try_reprotanation(amol)
-
- # Add the conformer of the other MyConformer object.
- amol.AddConformer(self.conformer(), assignId=True)
- amol.AddConformer(other_conf.conformer(), assignId=True)
-
- # Get the two confs.
- # first_conf = amol.GetConformers()[0]
- # last_conf = amol.GetConformers()[-1]
-
- # Return the RMSD.
- amol = MOH.try_deprotanation(amol)
- return AllChem.GetConformerRMS(amol, 0, 1, prealigned=True)
-
- def coords(self):
- """Get the coordinates of this conformer. For debugging.
-
- :return: A list of coordinates.
- :rtype: list
- """
-
- return self.conformer().GetPositions()
-
- def write_pdb_file(self, filename: str):
- """Write this conformer to a PDB file. For debugging.
-
- :param filename: The name of the file to write.
- :type filename: str
- """
-
- # Make a new molecule.
- mol = copy.deepcopy(self.mol)
- mol.RemoveAllConformers()
-
- # Add the conformer of the other MyConformer object.
- mol.AddConformer(self.conformer(), assignId=True)
-
- # Write the PDB file.
- AllChem.MolToPDBFile(mol, filename)
-
- def get_energy(self) -> float:
- """Get the energy of this conformer. For debugging.
-
- :return: The energy.
- :rtype: float
- """
-
- ff = AllChem.UFFGetMoleculeForceField(self.mol)
- return ff.CalcEnergy()
diff --git a/gypsum_dl/__init__.py b/gypsum_dl/__init__.py
index 058c508..f51becd 100644
--- a/gypsum_dl/__init__.py
+++ b/gypsum_dl/__init__.py
@@ -13,7 +13,7 @@
LOG_FORMAT = (
"{time:HH:mm:ss} | "
"{level: <8} | "
- "{name}:{function}:{line} - {message}"
+ "{message}"
)
diff --git a/gypsum_dl/chem_utils.py b/gypsum_dl/chem_utils.py
index 13d1407..59b4dbf 100644
--- a/gypsum_dl/chem_utils.py
+++ b/gypsum_dl/chem_utils.py
@@ -1,29 +1,36 @@
"""The module includes definitions to manipulate the molecules."""
+from typing import TYPE_CHECKING
+
+from loguru import logger
from rdkit import Chem
from gypsum_dl import utils
+if TYPE_CHECKING:
+ from gypsum_dl.models import Molecule, MoleculeContainer
+
-def pick_lowest_enrgy_mols(mol_lst, num, thoroughness):
+def pick_lowest_enrgy_mols(
+ mol_lst: list["Molecule"], num: int, thoroughness: int
+) -> list["Molecule"]:
"""Pick molecules with low energies. If necessary, the definition also
makes a conformer without minimization (so not too computationally
expensive).
- :param mol_lst: The list of MyMol.MyMol objects.
- :type mol_lst: list
- :param num: The number of the lowest-energy ones to keep.
- :type num: int
- :param thoroughness: How many molecules to generate per variant (molecule)
- retained, for evaluation. For example, perhaps you want to advance five
- molecules (max_variants_per_compound = 5). You could just generate five
- and advance them all. Or you could generate ten and advance the best
- five (so thoroughness = 2). Using thoroughness > 1 increases the
- computational expense, but it also increases the chances of finding good
- molecules.
- :type thoroughness: int
- :return: Returns a list of MyMol.MyMol, the best ones.
- :rtype: list
+ Args:
+ mol_lst: The list of Molecule objects.
+ num: The number of the lowest-energy ones to keep.
+ thoroughness: How many molecules to generate per variant (molecule)
+ retained, for evaluation. For example, perhaps you want to advance five
+ molecules (max_variants_per_compound = 5). You could just generate five
+ and advance them all. Or you could generate ten and advance the best
+ five (so thoroughness = 2). Using thoroughness > 1 increases the
+ computational expense, but it also increases the chances of finding good
+ molecules.
+
+ Returns:
+ Returns a list of Molecule, the best ones.
"""
# Remove identical entries.
@@ -34,7 +41,7 @@ def pick_lowest_enrgy_mols(mol_lst, num, thoroughness):
return mol_lst
# First, generate 3D structures. How many? num * thoroughness. mols_3d is
- # a list of Gypsum-DL MyMol.MyMol objects.
+ # a list of Gypsum-DL Molecule objects.
mols_3d = utils.random_sample(mol_lst, num * thoroughness, "")
# Now get the energies
@@ -55,13 +62,14 @@ def pick_lowest_enrgy_mols(mol_lst, num, thoroughness):
return [mol_lst[d[1]] for d in data]
-def remove_highly_charged_molecules(mol_lst):
+def remove_highly_charged_molecules(mol_lst: list["Molecule"]) -> list["Molecule"]:
"""Remove molecules that are highly charged.
- :param mol_lst: The list of molecules to consider.
- :type mol_lst: list
- :return: A list of molecules that are not too charged.
- :rtype: list
+ Args:
+ mol_lst: The list of molecules to consider.
+
+ Returns:
+ A list of molecules that are not too charged.
"""
# First, find the molecule that is closest to being neutral.
@@ -78,46 +86,40 @@ def remove_highly_charged_molecules(mol_lst):
if abs(charge - charge_closest_to_neutral) <= 4:
new_mol_lst.append(mol_lst[i])
else:
- utils.log(
- "\tWARNING: Discarding highly charged form: "
- + mol_lst[i].smiles()
- + "."
+ logger.warning(
+ "Discarding highly charged form: " + mol_lst[i].smiles() + "."
)
return new_mol_lst
def bst_for_each_contnr_no_opt(
- contnrs,
- mol_lst,
- max_variants_per_compound,
- thoroughness,
- crry_ovr_frm_lst_step_if_no_fnd=True,
-):
+ contnrs: list["MoleculeContainer"],
+ mol_lst: list["Molecule"],
+ max_variants_per_compound: int,
+ thoroughness: int,
+ crry_ovr_frm_lst_step_if_no_fnd: bool = True,
+) -> None:
"""Keep only the top few compound variants in each container, to prevent a
- combinatorial explosion. This is run periodically on the growing
- containers to keep them in check.
-
- :param contnrs: A list of containers (MolContainer.MolContainer).
- :type contnrs: list
- :param mol_lst: The list of MyMol.MyMol objects.
- :type mol_lst: list
- :param max_variants_per_compound: To control the combinatorial explosion,
- only this number of variants (molecules) will be advanced to the next
- step.
- :type max_variants_per_compound: int
- :param thoroughness: How many molecules to generate per variant (molecule)
- retained, for evaluation. For example, perhaps you want to advance five
- molecules (max_variants_per_compound = 5). You could just generate five
- and advance them all. Or you could generate ten and advance the best
- five (so thoroughness = 2). Using thoroughness > 1 increases the
- computational expense, but it also increases the chances of finding good
- molecules.
- :type thoroughness: int
- :param crry_ovr_frm_lst_step_if_no_fnd: If it can't find any low-energy
- conformers, determines whether to just keep the old ones. Defaults to
- True.
- :param crry_ovr_frm_lst_step_if_no_fnd: bool, optional
+ combinatorial explosion. This is run periodically on the growing
+ containers to keep them in check.
+
+ Args:
+ contnrs: A list of containers (container.MoleculeContainer).
+ mol_lst: The list of Molecule objects.
+ max_variants_per_compound: To control the combinatorial explosion,
+ only this number of variants (molecules) will be advanced to the next
+ step.
+ thoroughness: How many molecules to generate per variant (molecule)
+ retained, for evaluation. For example, perhaps you want to advance five
+ molecules (max_variants_per_compound = 5). You could just generate five
+ and advance them all. Or you could generate ten and advance the best
+ five (so thoroughness = 2). Using thoroughness > 1 increases the
+ computational expense, but it also increases the chances of finding good
+ molecules.
+ crry_ovr_frm_lst_step_if_no_fnd: If it can't find any low-energy
+ conformers, determines whether to just keep the old ones. Defaults to
+ True.
"""
# Remove duplicate ligands from each container.
@@ -162,8 +164,8 @@ def bst_for_each_contnr_no_opt(
if none_generated:
if crry_ovr_frm_lst_step_if_no_fnd:
# Just use previous ones.
- utils.log(
- "\tWARNING: Unable to find low-energy conformations: "
+ logger.warning(
+ "Unable to find low-energy conformations: "
+ contnr.orig_smi_deslt
+ " ("
+ contnr.name
@@ -172,8 +174,8 @@ def bst_for_each_contnr_no_opt(
)
else:
# Discard the conformation.
- utils.log(
- "\tWARNING: Unable to find low-energy conformations: "
+ logger.warning(
+ "Unable to find low-energy conformations: "
+ contnr.orig_smi_deslt
+ " ("
+ contnr.name
@@ -182,7 +184,7 @@ def bst_for_each_contnr_no_opt(
contnr.mols = []
-def uniq_mols_in_list(mol_lst):
+def uniq_mols_in_list(mol_lst: list[Chem.Mol]) -> list[str]:
# You need to make new molecules to get it to work.
# new_smiles = [m.smiles() for m in self.mols]
# new_mols = [Chem.MolFromSmiles(smi) for smi in new_smiles]
@@ -191,7 +193,7 @@ def uniq_mols_in_list(mol_lst):
can_smiles_already_set = set([])
uniq_mols = []
for m in mol_lst:
- smi = m.smiles()
+ smi: str = m.smiles()
if smi not in can_smiles_already_set:
uniq_mols.append(m)
can_smiles_already_set.add(smi)
diff --git a/gypsum_dl/config/__init__.py b/gypsum_dl/config/__init__.py
new file mode 100644
index 0000000..b6ab6a6
--- /dev/null
+++ b/gypsum_dl/config/__init__.py
@@ -0,0 +1,3 @@
+from .core import GypsumConfig
+
+__all__: list[str] = ["GypsumConfig"]
diff --git a/gypsum_dl/config/core.py b/gypsum_dl/config/core.py
new file mode 100644
index 0000000..67e9a7c
--- /dev/null
+++ b/gypsum_dl/config/core.py
@@ -0,0 +1,9 @@
+from typing import Any
+
+from pydantic import BaseModel, Field
+
+from gypsum_dl.config.utils import Render, YamlIO
+
+
+class GypsumConfig(BaseModel, YamlIO, Render):
+ """Configuration for gypsum."""
diff --git a/gypsum_dl/config/utils/__init__.py b/gypsum_dl/config/utils/__init__.py
new file mode 100644
index 0000000..d078a2c
--- /dev/null
+++ b/gypsum_dl/config/utils/__init__.py
@@ -0,0 +1,4 @@
+from .io import YamlIO
+from .render import Render
+
+__all__ = ["YamlIO", "Render"]
diff --git a/gypsum_dl/config/utils/io.py b/gypsum_dl/config/utils/io.py
new file mode 100644
index 0000000..e4f3bde
--- /dev/null
+++ b/gypsum_dl/config/utils/io.py
@@ -0,0 +1,66 @@
+from typing import Any
+
+from abc import ABC
+
+import yaml
+
+
+class YamlIO(ABC):
+ """Handles YAML inputs and outputs."""
+
+ def update(self, data: dict[str, Any]) -> None:
+ """Iteratively update pydantic model.
+
+ Args:
+ data: Key-value mapping to update attributes with.
+
+ Notes:
+ Many of our pydantic models have fields that need to specified by
+ instantiating other objects or models. In order to instantiate these
+ objects, you can use the `import` root key to specify which class to use.
+ For example, if you need to specify the
+ [`AmberTopoGen`][simulation.amber.topo.AmberTopoGen] as the
+ simlify configuration `engine`, you can add this to your YAML file.
+
+ ```yaml
+ import:
+ engine: simlify.simulation.amber.topo.AmberTopoGen
+ ```
+
+ This will call [`get_obj_from_string`][utils.get_obj_from_string] and
+ set the [`engine`][config.SimlifyConfig.engine] attribute to
+ [`AmberTopoGen`][simulation.amber.topo.AmberTopoGen].
+ We handle these imports before any other field is handled.
+ """
+ for key, value in data.items():
+ if key in self.model_fields: # type: ignore # pylint: disable=no-member
+ setattr(self, key, value)
+
+ def from_yaml(self, yaml_paths: str | list[str]) -> None:
+ """Update the instance's attributes from one or more YAML files.
+
+ Args:
+ yaml_paths: A sequence of YAML file paths or a single YAML file path.
+
+ Raises:
+ FileNotFoundError: If any of the YAML files cannot be found.
+ """
+ if isinstance(yaml_paths, str):
+ yaml_paths = [yaml_paths]
+ for yaml_path in yaml_paths:
+ with open(yaml_path, "r", encoding="utf-8") as f:
+ yaml_data = yaml.safe_load(f)
+ self.update(yaml_data)
+
+ def to_yaml(self, file_path: str) -> None:
+ """Serialize a Pydantic BaseModel instance to a YAML file.
+
+ Args:
+ file_path: Path to the YAML file to write the serialized data to.
+
+ Raises:
+ YamlIOError: If the file cannot be written to.
+ """
+ config_dict = self.model_dump() # type: ignore # pylint: disable=no-member
+ with open(file_path, "w", encoding="utf-8") as f:
+ yaml.dump(config_dict, f, default_flow_style=False)
diff --git a/gypsum_dl/config/utils/render.py b/gypsum_dl/config/utils/render.py
new file mode 100644
index 0000000..b278ac8
--- /dev/null
+++ b/gypsum_dl/config/utils/render.py
@@ -0,0 +1,40 @@
+from abc import ABC
+
+from pydantic import Field
+
+
+class Render(ABC):
+ """Handles rendering files."""
+
+ dir_work: str = Field(default=".", exclude=True)
+ """
+ Working directory to write files.
+ """
+
+ dir_input: str = Field(default=".", exclude=True)
+ """
+ Path to a directory relative to
+ [`dir_work`][configs.render.Render.dir_work] that will contain
+ input files.
+ """
+
+ dir_output: str = Field(default=".", exclude=True)
+ """
+ Path to a directory relative to
+ [`dir_work`][configs.render.Render.dir_work] that the simulation will
+ store output files.
+ """
+
+ def render(self, with_newlines: bool = False) -> list[str]:
+ """Prepare input lines by rendering templates or combining input configuration."""
+ raise NotImplementedError
+
+ def write_render(self, file_path: str) -> None:
+ """Thin wrapper to write lines from the `render` function.
+
+ Args:
+ file_path: Path to write file.
+ """
+ lines = self.render()
+ with open(file_path, "w", encoding="utf-8") as f:
+ f.write("\n".join(lines))
diff --git a/gypsum_dl/MolObjectHandling.py b/gypsum_dl/handlers.py
similarity index 73%
rename from gypsum_dl/MolObjectHandling.py
rename to gypsum_dl/handlers.py
index 4418535..6f14538 100644
--- a/gypsum_dl/MolObjectHandling.py
+++ b/gypsum_dl/handlers.py
@@ -1,27 +1,25 @@
-##### MolObjectHandling.py
-
-# Disable the unnecessary RDKit warnings
from rdkit import Chem, RDLogger
-RDLogger.DisableLog("rdApp.*")
+RDLogger.DisableLog("rdApp.*") # type: ignore
-def check_sanitization(mol):
+def check_sanitization(mol: Chem.Mol) -> Chem.Mol | None:
"""
Given a Chem.rdchem.Mol this script will sanitize the molecule.
It will be done using a series of try/except statements so that if it fails it will return a None
rather than causing the outer script to fail.
Nitrogen Fixing step occurs here to correct for a common RDKit valence error in which Nitrogens with
- with 4 bonds have the wrong formal charge by setting it to -1.
- This can be a place to add additional correcting features for any discovered common sanitation failures.
+ with 4 bonds have the wrong formal charge by setting it to -1.
+ This can be a place to add additional correcting features for any discovered common sanitation failures.
Handled here so there are no problems later.
- Inputs:
- :param Chem.rdchem.Mol mol: an rdkit molecule to be sanitized
+ Args:
+ mol: an rdkit molecule to be sanitized
+
Returns:
- :returns: Chem.rdchem.Mol mol: A sanitized rdkit molecule or None if it failed.
+ A sanitized rdkit molecule or None if it failed.
"""
if mol is None:
return None
@@ -67,7 +65,7 @@ def check_sanitization(mol):
return None if sanitize_string.name != "SANITIZE_NONE" else mol
-def handleHs(mol, protanate_step):
+def handleHs(mol: Chem.Mol, protanate_step: bool):
"""
Given a Chem.rdchem.Mol this script will sanitize the molecule, remove all non-explicit H's
and add back on all implicit H's. This is to control for any discrepencies in the smiles strings or presence and
@@ -75,13 +73,13 @@ def handleHs(mol, protanate_step):
If it fails it will return a None rather than causing the outer script to fail. Handled here so there are no problems later.
Inputs:
- :param Chem.rdchem.Mol sanitized_deprotanated_mol: an rdkit molecule already sanitized and deprotanated.
- :param bol protanate_step: True if mol needs to be protanated; False if deprotanated
- -Note if Protanated, SmilesMerge takes up to 10times longer
+ sanitized_deprotanated_mol: an rdkit molecule already sanitized and deprotanated.
+ protanate_step: True if mol needs to be protanated; False if deprotanated
+ Note if Protanated, SmilesMerge takes up to 10times longer
Returns:
- :returns: Chem.rdchem.Mol mol: an rdkit molecule with H's handled (either added or removed) and sanitized.
- it returns None if H's can't be added or if sanitation fails
+ An rdkit molecule with H's handled (either added or removed) and sanitized.
+ it returns None if H's can't be added or if sanitation fails
"""
mol = check_sanitization(mol)
if mol is None:
@@ -103,19 +101,17 @@ def handleHs(mol, protanate_step):
return mol
-#
-
-
-def try_deprotanation(sanitized_mol):
+def try_deprotanation(sanitized_mol: Chem.Mol) -> Chem.Mol | None:
"""
Given an already sanitize Chem.rdchem.Mol object, we will try to deprotanate the mol of all non-explicit
Hs. If it fails it will return a None rather than causing the outer script to fail.
- Inputs:
- :param Chem.rdchem.Mol mol: an rdkit molecule already sanitized.
+ Args:
+ mol: an rdkit molecule already sanitized.
+
Returns:
- :returns: Chem.rdchem.Mol mol_sanitized: an rdkit molecule with H's removed and sanitized.
- it returns None if H's can't be added or if sanitation fails
+ An rdkit molecule with H's removed and sanitized.
+ it returns None if H's can't be added or if sanitation fails.
"""
try:
mol = Chem.RemoveHs(sanitized_mol, sanitize=False)
@@ -125,16 +121,17 @@ def try_deprotanation(sanitized_mol):
return check_sanitization(mol)
-def try_reprotanation(sanitized_deprotanated_mol):
+def try_reprotanation(sanitized_deprotanated_mol: Chem.Mol) -> Chem.Mol | None:
"""
Given an already sanitize and deprotanate Chem.rdchem.Mol object, we will try to reprotanate the mol with
implicit Hs. If it fails it will return a None rather than causing the outer script to fail.
- Inputs:
- :param Chem.rdchem.Mol sanitized_deprotanated_mol: an rdkit molecule already sanitized and deprotanated.
+ Args:
+ sanitized_deprotanated_mol: an rdkit molecule already sanitized and deprotanated.
+
Returns:
- :returns: Chem.rdchem.Mol mol_sanitized: an rdkit molecule with H's added and sanitized.
- it returns None if H's can't be added or if sanitation fails
+ An rdkit molecule with H's added and sanitized.
+ it returns None if H's can't be added or if sanitation fails
"""
if sanitized_deprotanated_mol is None:
@@ -148,23 +145,20 @@ def try_reprotanation(sanitized_deprotanated_mol):
return check_sanitization(mol)
-#
-
-
-def remove_atoms(mol, list_of_idx_to_remove):
+def remove_atoms(mol: Chem.Mol, list_of_idx_to_remove: list[int]) -> Chem.Mol | None:
"""
This function removes atoms from an rdkit mol based on
a provided list. The RemoveAtom function in Rdkit requires
converting the mol to an more editable version of the rdkit mol
object (Chem.EditableMol).
- Inputs:
- :param Chem.rdchem.Mol mol: any rdkit mol
- :param list list_of_idx_to_remove: a list of idx values to remove
- from mol
+ Args:
+ mol: any rdkit mol
+ list_of_idx_to_remove: a list of idx values to remove
+ from mol
Returns:
- :returns: Chem.rdchem.Mol new_mol: the rdkit mol as input but with
- the atoms from the list removed
+ the rdkit mol as input but with
+ the atoms from the list removed
"""
if mol is None:
@@ -186,16 +180,13 @@ def remove_atoms(mol, list_of_idx_to_remove):
return None
-#
-
-
-def Nitrogen_charge_adjustment(mol):
+def Nitrogen_charge_adjustment(mol: Chem.Mol) -> Chem.Mol | None:
"""
When importing ligands with sanitation turned off, one can successfully import
import a SMILES in which a Nitrogen (N) can have 4 bonds, but no positive charge.
Any 4-bonded N lacking a positive charge will fail a sanitiation check.
- -This could be an issue with importing improper SMILES, reactions, or crossing a nuetral nitrogen
- with a side chain which adds an extra bond, but doesn't add the extra positive charge.
+ This could be an issue with importing improper SMILES, reactions, or crossing a neutral nitrogen
+ with a side chain which adds an extra bond, but doesn't add the extra positive charge.
To correct for this, this function will find all N atoms with a summed bond count of 4
(ie. 4 single bonds;2 double bonds; a single and a triple bond; two single and a double bond)
@@ -204,10 +195,11 @@ def Nitrogen_charge_adjustment(mol):
RDkit treats aromatic bonds as a bond count of 1.5. But we will not try to correct for
Nitrogens labeled as Aromatic. As precaution, any N which is aromatic is skipped in this function.
- Inputs:
- :param Chem.rdchem.Mol mol: any rdkit mol
+ Args:
+ mol: any rdkit mol
+
Returns:
- :returns: Chem.rdchem.Mol mol: the same rdkit mol with the N's adjusted
+ the same rdkit mol with the N's adjusted
"""
if mol is None:
return None
@@ -233,10 +225,7 @@ def Nitrogen_charge_adjustment(mol):
return mol
-#
-
-
-def check_for_unassigned_atom(mol):
+def check_for_unassigned_atom(mol: Chem.Mol) -> Chem.Mol | None:
"""
Check there isn't a missing atom group ie. '*'
A '*' in a SMILES string is an atom with an atomic num of 0
@@ -255,10 +244,7 @@ def check_for_unassigned_atom(mol):
return mol
-#
-
-
-def handle_frag_check(mol):
+def handle_frag_check(mol: Chem.Mol) -> Chem.Mol | None:
"""
This will take a RDKit Mol object. It will check if it is fragmented.
If it has fragments it will return the largest of the two fragments.
@@ -295,6 +281,3 @@ def handle_frag_check(mol):
largest_frag_idx = frag_info_list[0][0]
largest_frag = frags[largest_frag_idx]
return largest_frag
-
-
-#
diff --git a/gypsum_dl/models/__init__.py b/gypsum_dl/models/__init__.py
new file mode 100644
index 0000000..2c0e7cd
--- /dev/null
+++ b/gypsum_dl/models/__init__.py
@@ -0,0 +1,5 @@
+from .molecule import Molecule
+from .conformers import Conformer, ConformerSet
+from .container import MoleculeContainer
+
+__all__: list[str] = ["Molecule", "MoleculeContainer", "Conformer", "ConformerSet"]
diff --git a/gypsum_dl/models/conformers.py b/gypsum_dl/models/conformers.py
new file mode 100644
index 0000000..4d8d1ec
--- /dev/null
+++ b/gypsum_dl/models/conformers.py
@@ -0,0 +1,238 @@
+import copy
+import operator
+
+from loguru import logger
+from rdkit import Chem
+from rdkit.Chem import AllChem
+
+from gypsum_dl import handlers
+
+
+class Conformer:
+ """
+ Encapsulates a single RDKit Conformer object and its associated energy.
+ """
+
+ def __init__(
+ self,
+ mol: Chem.Mol,
+ rdkit_conformer: Chem.Conformer,
+ energy: float,
+ minimized: bool = False,
+ ):
+ self._mol = mol # Keep a reference to the parent RDKit Mol for atom access
+ self._rdkit_conformer = rdkit_conformer
+ self._energy = energy
+ self._minimized = minimized
+ self._heavy_atom_ids = [
+ a.GetIdx() for a in self._mol.GetAtoms() if a.GetAtomicNum() != 1
+ ]
+
+ @property
+ def rdkit_conformer(self) -> Chem.Conformer:
+ return self._rdkit_conformer
+
+ @property
+ def energy(self) -> float:
+ return self._energy
+
+ @property
+ def is_minimized(self) -> bool:
+ return self._minimized
+
+ def minimize(self) -> None:
+ """
+ Minimizes the geometry of the current conformer if it hasn't already been optimized.
+ """
+ if self._minimized:
+ return
+
+ try:
+ # Create a temporary Mol just for minimization of this conformer
+ temp_mol = Chem.Mol(self._mol)
+ temp_mol.RemoveAllConformers()
+ temp_mol.AddConformer(self._rdkit_conformer, assignId=True)
+
+ ff = AllChem.UFFGetMoleculeForceField(temp_mol) # type: ignore
+ ff.Minimize()
+ self._energy = ff.CalcEnergy()
+ self._rdkit_conformer = temp_mol.GetConformers()[
+ 0
+ ] # Update with minimized conformer
+ self._minimized = True
+ logger.info(f"Conformer minimized to energy: {self._energy:.2f}")
+ except Exception as e:
+ logger.warning(f"Could not minimize conformer. Error: {e}")
+ self._energy = 9999.0 # Assign a high energy to mark as problematic
+
+ def align_to_me(self, other_conformer: "Conformer") -> None:
+ """
+ Aligns another conformer to this conformer in place.
+ """
+ # Create a temporary mol with both conformers to align
+ temp_mol = Chem.Mol(self._mol)
+ temp_mol.RemoveAllConformers()
+ temp_mol.AddConformer(self.rdkit_conformer, assignId=True)
+ temp_mol.AddConformer(other_conformer.rdkit_conformer, assignId=True)
+
+ AllChem.AlignMolConformers(temp_mol, atomIds=self._heavy_atom_ids) # type: ignore
+
+ # Update the other conformer's RDKit Conformer object
+ other_conformer._rdkit_conformer = temp_mol.GetConformers()[1]
+
+ def rmsd_to_me(self, other_conformer: "Conformer") -> float:
+ """
+ Calculates the RMSD between this conformer and another one.
+ Assumes both conformers belong to the same parent molecule structure.
+ """
+ # A new mol is created to ensure atom indices match and for RMSD calculation
+ temp_mol = Chem.Mol(self._mol)
+ temp_mol.RemoveAllConformers()
+ temp_mol.AddConformer(self.rdkit_conformer, assignId=True)
+ temp_mol.AddConformer(other_conformer.rdkit_conformer, assignId=True)
+
+ # The handlers.try_deprotanation might be problematic if it changes atom order.
+ # For RMSD calculation, it's generally best to ensure consistent atom indexing
+ # between the two conformers you're comparing. If the mol object itself is modified,
+ # it can break the RMSD calculation. Assuming the mol object passed to Conformer
+ # and used by rmsd_to_me always represents the same atom order for the comparison.
+ mol_for_rmsd = handlers.try_deprotanation(temp_mol)
+ if mol_for_rmsd is None:
+ raise RuntimeError(
+ "Failed to prepare molecule for RMSD calculation (deprotonation failed)."
+ )
+ return AllChem.GetConformerRMS(mol_for_rmsd, 0, 1, prealigned=True)
+
+ def get_positions(self) -> list[list[float]]:
+ """Returns the 3D coordinates of the conformer."""
+ return self._rdkit_conformer.GetPositions().tolist()
+
+
+class ConformerSet:
+ """
+ Manages a collection of conformers for a specific molecule.
+ """
+
+ def __init__(self, molecule: Chem.Mol):
+ self._molecule_template = (
+ molecule # Store a template Mol for generating new conformers
+ )
+ self._conformers: list[Conformer] = []
+
+ def generate_conformers(
+ self,
+ num_to_generate: int,
+ use_random_coordinates: bool = False,
+ embed_second_try: bool = False,
+ ) -> None:
+ """
+ Generates new conformers for the associated molecule.
+ """
+ initial_count = len(self._conformers)
+ num_needed = num_to_generate - initial_count
+
+ for _ in range(num_needed):
+ mol_copy = Chem.Mol(self._molecule_template) # Work on a copy
+ mol_copy.RemoveAllConformers() # Ensure no existing conformers
+
+ params = None
+ try:
+ params = AllChem.ETKDGv3() # type: ignore
+ except AttributeError: # Fallback for older RDKit versions
+ try:
+ params = AllChem.ETKDGv2() # type: ignore
+ except AttributeError:
+ params = AllChem.ETKDG() # type: ignore
+
+ params.enforceChirality = True
+ params.maxIterations = 0
+ params.useRandomCoords = use_random_coordinates
+
+ try:
+ AllChem.EmbedMolecule(mol_copy, params) # type: ignore
+ except Exception as e:
+ logger.warning(
+ f"Initial conformer embedding failed for {Chem.MolToSmiles(mol_copy)}. Error: {e}"
+ )
+ if mol_copy.GetNumConformers() == 0 and embed_second_try:
+ try:
+ AllChem.EmbedMolecule( # type: ignore
+ mol_copy, useRandomCoords=use_random_coordinates
+ )
+ except Exception as e_second:
+ logger.warning(
+ f"Second conformer embedding failed. Error: {e_second}"
+ )
+
+ if mol_copy.GetNumConformers() > 0:
+ rdkit_conf = mol_copy.GetConformers()[0]
+ try:
+ ff = AllChem.UFFGetMoleculeForceField(mol_copy) # type: ignore
+ energy = ff.CalcEnergy()
+ except Exception as e:
+ logger.warning(
+ f"Could not calculate energy for generated conformer. Error: {e}"
+ )
+ energy = 9999.0
+ self._conformers.append(
+ Conformer(self._molecule_template, rdkit_conf, energy)
+ )
+ else:
+ logger.warning("Failed to generate a conformer.")
+
+ self.sort_conformers()
+
+ def minimize_all(self) -> None:
+ """Minimizes all conformers in the set."""
+ for conf in self._conformers:
+ conf.minimize()
+ self.sort_conformers()
+
+ def eliminate_structurally_similar(self, rmsd_cutoff: float = 0.1) -> None:
+ """
+ Eliminates conformers that are very geometrically similar based on RMSD.
+ Assumes conformers are already sorted by energy for better selection.
+ """
+ if not self._conformers:
+ return
+
+ unique_conformers: list[Conformer] = []
+ if self._conformers:
+ unique_conformers.append(self._conformers[0])
+
+ for i in range(1, len(self._conformers)):
+ is_unique = True
+ current_conf = self._conformers[i]
+ for unique_conf in unique_conformers:
+ # Align current_conf to unique_conf before calculating RMSD
+ aligned_current_conf = copy.deepcopy(
+ current_conf
+ ) # Avoid modifying original during check
+ unique_conf.align_to_me(aligned_current_conf)
+ rmsd = unique_conf.rmsd_to_me(aligned_current_conf)
+ if rmsd <= rmsd_cutoff:
+ is_unique = False
+ break
+ if is_unique:
+ unique_conformers.append(current_conf)
+
+ self._conformers = unique_conformers
+ logger.info(
+ f"Reduced to {len(self._conformers)} unique conformers after RMSD cutoff of {rmsd_cutoff}."
+ )
+ self.sort_conformers()
+
+ def sort_conformers(self) -> None:
+ """Sorts conformers by energy."""
+ self._conformers.sort(key=operator.attrgetter("energy"))
+
+ def get_conformers(self) -> list[Conformer]:
+ """Returns the list of managed Conformer objects."""
+ return self._conformers
+
+ def load_into_rdkit_mol(self, target_mol: Chem.Mol) -> None:
+ """Loads all stored conformers into a given RDKit Mol object."""
+ target_mol.RemoveAllConformers()
+ for conf_obj in self._conformers:
+ target_mol.AddConformer(conf_obj.rdkit_conformer, assignId=True)
+ logger.info(f"Loaded {len(self._conformers)} conformers into RDKit Mol object.")
diff --git a/gypsum_dl/models/container.py b/gypsum_dl/models/container.py
new file mode 100644
index 0000000..32f4c21
--- /dev/null
+++ b/gypsum_dl/models/container.py
@@ -0,0 +1,111 @@
+from loguru import logger
+
+from gypsum_dl.models.conformers import ConformerSet
+from gypsum_dl.models.molecule import Molecule
+
+
+class MoleculeContainer:
+ """
+ A container to group related Molecule objects (e.g., tautomers, protonation states)
+ and manage their preparation workflow, including conformer generation.
+ """
+
+ def __init__(self, initial_molecule: Molecule, container_id: int | None = None):
+ self._initial_molecule = (
+ initial_molecule # The original input molecule (e.g., desalted)
+ )
+ self._variants: list[Molecule] = [
+ initial_molecule
+ ] # Different tautomers, stereoisomers etc.
+ self._container_id = container_id
+ # Using dict[str, ConformerSet] for type hinting
+ self._conformer_sets: dict[
+ str, ConformerSet
+ ] = {} # Key: unique identifier for a variant (e.g., canonical SMILES)
+
+ # Initialize a conformer set for the initial molecule
+ initial_mol_key = initial_molecule.canonical_smiles(True)
+ self.name: str = initial_molecule.name
+ if initial_mol_key is None:
+ logger.warning(
+ f"Initial molecule {initial_molecule.name} has no canonical SMILES, cannot manage conformers effectively."
+ )
+ else:
+ self._conformer_sets[initial_mol_key] = ConformerSet(
+ initial_molecule.rdkit_mol
+ )
+
+ @property
+ def initial_molecule(self) -> Molecule:
+ return self._initial_molecule
+
+ @property
+ def container_id(self) -> int | None:
+ return self._container_id
+
+ def add_variant(self, molecule_variant: Molecule) -> None:
+ """Adds a new molecular variant to this container."""
+ if molecule_variant not in self._variants:
+ self._variants.append(molecule_variant)
+ # Each variant might have its own conformers
+ variant_key = molecule_variant.canonical_smiles(True)
+ if variant_key is None:
+ logger.warning(
+ f"Variant {molecule_variant.name} has no canonical SMILES, cannot manage conformers effectively."
+ )
+ else:
+ self._conformer_sets[variant_key] = ConformerSet(
+ molecule_variant.rdkit_mol
+ )
+ logger.info(
+ f"Added variant {molecule_variant.name or molecule_variant.canonical_smiles(True)} to container."
+ )
+
+ def get_variants(self) -> list[Molecule]:
+ """Returns all molecular variants in this container."""
+ return self._variants
+
+ def get_conformer_set(self, molecule_variant: Molecule) -> ConformerSet | None:
+ """Retrieves the ConformerSet for a specific molecular variant."""
+ return self._conformer_sets.get(molecule_variant.canonical_smiles(True) or "")
+
+ def generate_all_conformers(
+ self,
+ num_conformers: int,
+ rmsd_cutoff: float = 0.1,
+ minimize: bool = True,
+ use_random_coords: bool = False,
+ ) -> None:
+ """
+ Generates, minimizes, and filters conformers for all variants in the container.
+ """
+ for variant in self._variants:
+ conformer_set = self.get_conformer_set(variant)
+ if conformer_set:
+ logger.info(
+ f"Generating conformers for variant: {variant.name or variant.canonical_smiles(True)}"
+ )
+ conformer_set.generate_conformers(
+ num_conformers, use_random_coordinates=use_random_coords
+ )
+ if minimize:
+ conformer_set.minimize_all()
+ conformer_set.eliminate_structurally_similar(rmsd_cutoff)
+ logger.info(
+ f"Finished conformer generation for {variant.name or variant.canonical_smiles(True)}. Found {len(conformer_set.get_conformers())} unique conformers."
+ )
+ else:
+ logger.warning(
+ f"No ConformerSet found for variant: {variant.name or variant.canonical_smiles(True)}. Skipping conformer generation for this variant."
+ )
+
+ def load_conformers_into_all_rdkit_mols(self) -> None:
+ """Loads the generated conformers back into the RDKit Mol objects of all variants."""
+ for variant in self._variants:
+ conformer_set = self.get_conformer_set(variant)
+ if conformer_set and conformer_set.get_conformers():
+ conformer_set.load_into_rdkit_mol(variant.rdkit_mol)
+ else:
+ logger.info(
+ f"No conformers to load for variant: {variant.name or variant.canonical_smiles(True)}"
+ )
diff --git a/gypsum_dl/models/molecule.py b/gypsum_dl/models/molecule.py
new file mode 100644
index 0000000..d8086c4
--- /dev/null
+++ b/gypsum_dl/models/molecule.py
@@ -0,0 +1,347 @@
+"""
+This module contains classes and functions for processing individual molecules
+(variants).
+"""
+
+from typing import Any
+
+import copy
+from collections.abc import Container
+
+from loguru import logger
+from molvs import standardize_smiles as ssmiles
+from rdkit import Chem, RDLogger
+from rdkit.Chem import BondStereo
+
+from gypsum_dl import handlers
+
+RDLogger.DisableLog("rdApp.*") # type: ignore
+
+
+class Molecule:
+ """
+ A class that wraps around an RDKit Mol object, focusing on its core
+ structural and chemical properties.
+ """
+
+ def __init__(self, rdkit_mol: Chem.Mol, name: str = ""):
+ """
+ Initialize the Molecule object with an RDKit Mol object.
+ Sanitization is performed upon initialization.
+ """
+ if rdkit_mol is None:
+ raise ValueError("RDKit Mol object cannot be None.")
+
+ # Ensure the RDKit Mol is properly sanitized
+ self._rdkit_mol = handlers.check_sanitization(rdkit_mol)
+ if self._rdkit_mol is None:
+ raise ValueError("Could not sanitize the provided RDKit Mol.")
+
+ self._name = name
+ self._orig_smiles = Chem.MolToSmiles(
+ self._rdkit_mol, isomericSmiles=True, canonical=False
+ )
+ self._can_smiles: str | None = None
+ self._can_smiles_noh: str | None = None
+ self._standardized_smiles: str | None = None
+ self._mol_props: dict[str, Any] = {}
+ self._genealogy: list[str] = []
+
+ # Cached properties
+ self._nonaro_ring_atom_idx: list[list[int]] | None = None
+ self._chiral_centers_assigned: list[Any] | None = None
+ self._chiral_centers_unassigned: list[Any] | None = None
+ self._has_bizarre_substructure: bool | None = None
+ self._fragments: Container[Chem.Mol] | None = None
+
+ @classmethod
+ def from_smiles(cls, smiles: str, name: str = "") -> "Molecule":
+ """Factory method to create a Molecule from a SMILES string."""
+ try:
+ # sanitize=False to respect double-bond stereochemistry initially
+ mol = Chem.MolFromSmiles(smiles, sanitize=False)
+ if mol is None:
+ raise ValueError(f"Could not create RDKit Mol from SMILES: {smiles}")
+ return cls(mol, name)
+ except Exception as e:
+ logger.error(f"Failed to create Molecule from SMILES '{smiles}': {e}")
+ raise
+
+ @classmethod
+ def from_rdkit_mol(cls, rdkit_mol: Chem.Mol, name: str = "") -> "Molecule":
+ """Factory method to create a Molecule from an existing RDKit Mol object."""
+ return cls(rdkit_mol, name)
+
+ @property
+ def rdkit_mol(self) -> Chem.Mol:
+ return self._rdkit_mol
+
+ @property
+ def name(self) -> str:
+ return self._name
+
+ @name.setter
+ def name(self, new_name: str) -> None:
+ self._name = new_name
+
+ @property
+ def original_smiles(self) -> str:
+ return self._orig_smiles
+
+ def canonical_smiles(self, include_hydrogens: bool = True) -> str | None:
+ """
+ Get the canonical SMILES string associated with this object.
+ Caches the result.
+ """
+ if include_hydrogens:
+ if self._can_smiles is None:
+ try:
+ self._can_smiles = Chem.MolToSmiles(
+ self._rdkit_mol, isomericSmiles=True, canonical=True
+ )
+ except Exception:
+ logger.warning(
+ f"Couldn't generate canonical SMILES for {self._orig_smiles} ({self._name})."
+ )
+ self._can_smiles = None
+ return self._can_smiles
+ else:
+ if self._can_smiles_noh is None:
+ # Temporarily deprotonate to get SMILES without explicit hydrogens
+ deprotonated_mol = handlers.try_deprotanation(
+ copy.deepcopy(self._rdkit_mol)
+ )
+ if deprotonated_mol is not None:
+ self._can_smiles_noh = Chem.MolToSmiles(
+ deprotonated_mol, isomericSmiles=True, canonical=True
+ )
+ else:
+ logger.warning(
+ f"Could not deprotonate molecule for non-H canonical SMILES: {self._orig_smiles}"
+ )
+ self._can_smiles_noh = None
+ return self._can_smiles_noh
+
+ @property
+ def standardized_smiles(self) -> str | None:
+ """Returns the standardized SMILES string, computing and caching if necessary."""
+ if self._standardized_smiles is None:
+ # It's important to pass a string to ssmiles.
+ can_smi = self.canonical_smiles(include_hydrogens=True)
+ if can_smi is None:
+ logger.warning(
+ f"Cannot standardize SMILES for {self.original_smiles} because canonical SMILES is None."
+ )
+ self._standardized_smiles = None
+ else:
+ try:
+ self._standardized_smiles = ssmiles(can_smi) # type: ignore
+ except Exception:
+ logger.info(
+ f"Could not standardize SMILES for {can_smi}. Skipping."
+ )
+ self._standardized_smiles = (
+ can_smi # Fallback to canonical if standardization fails
+ )
+ return self._standardized_smiles
+
+ def add_to_genealogy(self, step_description: str) -> None:
+ """Adds a step to the molecule's genealogy (preparation history)."""
+ self._genealogy.append(step_description)
+
+ @property
+ def genealogy(self) -> list[str]:
+ return self._genealogy
+
+ def set_property(self, key: str, value: Any) -> None:
+ """Set a custom molecular property."""
+ self._mol_props[key] = value
+ self.update_rdkit_mol_properties() # Update RDKit Mol too
+
+ def get_property(self, key: str) -> Any:
+ """Get a custom molecular property."""
+ return self._mol_props.get(key)
+
+ def update_rdkit_mol_properties(self) -> None:
+ """Copies all stored properties to the RDKit Mol object."""
+ self._rdkit_mol.SetProp("SMILES", self.canonical_smiles(True) or "N/A")
+ if self.original_smiles:
+ self._rdkit_mol.SetProp("ORIGINAL_SMILES", self.original_smiles)
+ if self._name:
+ self._rdkit_mol.SetProp("_Name", self._name)
+ if self.genealogy:
+ self._rdkit_mol.SetProp("Genealogy", "\n".join(self.genealogy))
+ for prop_key, prop_val in self._mol_props.items():
+ try:
+ self._rdkit_mol.SetProp(prop_key, str(prop_val))
+ except Exception as e:
+ logger.warning(
+ f"Could not set RDKit property '{prop_key}' with value '{prop_val}': {e}"
+ )
+
+ def add_explicit_hydrogens(self) -> None:
+ """Adds explicit hydrogen atoms to the RDKit Mol object."""
+ new_mol = handlers.try_reprotanation(self._rdkit_mol)
+ if new_mol is None:
+ raise RuntimeError("Failed to add explicit hydrogens to molecule.")
+ self._rdkit_mol = new_mol
+ self._can_smiles = None # Invalidate cached SMILES
+ self._can_smiles_noh = None
+
+ def remove_explicit_hydrogens(self) -> None:
+ """Removes explicit hydrogen atoms from the RDKit Mol object."""
+ new_mol = handlers.try_deprotanation(self._rdkit_mol)
+ if new_mol is None:
+ raise RuntimeError("Failed to remove explicit hydrogens from molecule.")
+ self._rdkit_mol = new_mol
+ self._can_smiles = None # Invalidate cached SMILES
+ self._can_smiles_noh = None
+
+ @property
+ def non_aromatic_ring_atom_indices(self) -> list[list[int]]:
+ """Identifies and returns indices of atoms in non-aromatic rings, caching the result."""
+ if self._nonaro_ring_atom_idx is None:
+ if self._rdkit_mol is None:
+ self._nonaro_ring_atom_idx = []
+ else:
+ ssr = Chem.GetSymmSSSR(self._rdkit_mol)
+ ring_indices = [list(ssr[i]) for i in range(len(ssr))]
+ non_aromatic_rings = []
+ for ring_idx_set in ring_indices:
+ if any(
+ not self._rdkit_mol.GetAtomWithIdx(atm_idx).GetIsAromatic()
+ for atm_idx in ring_idx_set
+ ):
+ non_aromatic_rings.append(ring_idx_set)
+ self._nonaro_ring_atom_idx = non_aromatic_rings
+ return self._nonaro_ring_atom_idx
+
+ @property
+ def chiral_centers_assigned(self) -> list[Any]:
+ """Returns a list of assigned chiral centers, caching the result."""
+ if self._chiral_centers_assigned is None:
+ if self._rdkit_mol is None:
+ self._chiral_centers_assigned = []
+ else:
+ self._chiral_centers_assigned = Chem.FindMolChiralCenters(
+ self._rdkit_mol, includeUnassigned=False
+ )
+ return self._chiral_centers_assigned
+
+ @property
+ def chiral_centers_unassigned(self) -> list[Any]:
+ """Returns a list of chiral centers including unassigned ones, caching the result."""
+ if self._chiral_centers_unassigned is None:
+ if self._rdkit_mol is None:
+ self._chiral_centers_unassigned = []
+ else:
+ self._chiral_centers_unassigned = Chem.FindMolChiralCenters(
+ self._rdkit_mol, includeUnassigned=True
+ )
+ return self._chiral_centers_unassigned
+
+ @property
+ def double_bonds_without_stereochemistry(self) -> list[int]:
+ """Returns indices of double bonds without specified stereochemistry."""
+ if self._rdkit_mol is None:
+ return []
+ return [
+ b.GetIdx()
+ for b in self._rdkit_mol.GetBonds()
+ if b.GetBondTypeAsDouble() == 2 and b.GetStereo() is BondStereo.STEREONONE
+ ]
+
+ @property
+ def has_bizarre_substructure(self) -> bool:
+ """
+ Checks for and caches whether the molecule contains improbable substructures.
+ """
+ if self._has_bizarre_substructure is None:
+ if self._rdkit_mol is None:
+ self._has_bizarre_substructure = True # No mol is bizarre
+ return True
+
+ prohibited_substructures = [
+ "O(=*)-*",
+ "C(=[CH2])[OH]",
+ "C(=[CH2])[O-]",
+ "C=C([OH])[OH]",
+ "C=C([O-])[OH]",
+ "C=C([O-])[O-]",
+ "[C-]",
+ "[c-]",
+ ]
+
+ for s in prohibited_substructures:
+ pattrn = Chem.MolFromSmarts(s)
+ if pattrn is not None and self._rdkit_mol.HasSubstructMatch(pattrn):
+ logger.info(
+ f"Detected unusual substructure: {s} in {self.canonical_smiles(True)}"
+ )
+ self._has_bizarre_substructure = True
+ return True
+ self._has_bizarre_substructure = False
+ return self._has_bizarre_substructure
+
+ @property
+ def fragments(self) -> Container[Chem.Mol]:
+ """Divides the molecule into fragments if disconnected, caching the result."""
+ if self._fragments is None:
+ if "." not in self._orig_smiles:
+ self._fragments = [self.rdkit_mol] # A list containing only itself
+ else:
+ self._fragments = Chem.GetMolFrags(self._rdkit_mol, asMols=True)
+ return self._fragments
+
+ def count_hydrogens_bound_to_carbons(self) -> int:
+ """Count the number of Hydrogens bound to carbons."""
+ if self._rdkit_mol is None:
+ return 0
+
+ total_hydrogens_counted = 0
+ for atom in self._rdkit_mol.GetAtoms():
+ if atom.GetSymbol() == "C":
+ total_hydrogens_counted += atom.GetTotalNumHs(includeNeighbors=True)
+ return total_hydrogens_counted
+
+ def __hash__(self) -> int:
+ """Allows hashing based on the canonical SMILES."""
+ can_smi = self.canonical_smiles(include_hydrogens=True)
+ return hash(can_smi) if can_smi else 0
+
+ def __eq__(self, other: object) -> bool:
+ """Compares Molecule objects based on canonical SMILES."""
+ if not isinstance(other, Molecule):
+ return NotImplemented
+ return self.canonical_smiles(True) == other.canonical_smiles(True)
+
+ def __ne__(self, other: object) -> bool:
+ return not self.__eq__(other)
+
+ def __lt__(self, other: "Molecule") -> bool:
+ if not isinstance(other, Molecule):
+ return NotImplemented
+ return (self.canonical_smiles(True) or "") < (
+ other.canonical_smiles(True) or ""
+ )
+
+ def __le__(self, other: "Molecule") -> bool:
+ if not isinstance(other, Molecule):
+ return NotImplemented
+ return (self.canonical_smiles(True) or "") <= (
+ other.canonical_smiles(True) or ""
+ )
+
+ def __gt__(self, other: "Molecule") -> bool:
+ if not isinstance(other, Molecule):
+ return NotImplemented
+ return (self.canonical_smiles(True) or "") > (
+ other.canonical_smiles(True) or ""
+ )
+
+ def __ge__(self, other: "Molecule") -> bool:
+ if not isinstance(other, Molecule):
+ return NotImplemented
+ return (self.canonical_smiles(True) or "") >= (
+ other.canonical_smiles(True) or ""
+ )
diff --git a/gypsum_dl/parallelizer.py b/gypsum_dl/parallelizer.py
index bd585cb..bb824cc 100644
--- a/gypsum_dl/parallelizer.py
+++ b/gypsum_dl/parallelizer.py
@@ -1,6 +1,4 @@
"""
-Parallelizer.py
-
Abstract parallel computation utility.
The "parallelizer" object exposes a simple map interface that takes a function
@@ -17,13 +15,14 @@
import multiprocessing
import sys
+from collections.abc import Collection
try:
import mpi4py
- MPI_installed = True
-except Exception:
- MPI_installed = False
+ HAS_MPI = True
+except ImportError:
+ HAS_MPI = False
class Parallelizer(object):
@@ -128,35 +127,40 @@ def __init__(
else:
self.num_procs = self.compute_nodes()
- def test_import_MPI(self, mode, flag_for_low_level=False):
+ def test_import_MPI(self, mode: str, flag_for_low_level: bool = False) -> bool:
"""
This tests for the ability of importing the MPI sublibrary from mpi4py.
- This import is problematic when run inside a program which was already mpi parallelized (ie a program run inside an mpi program)
- - for some reason from mpi4py import MPI is problematic in this sub-program structuring.
- To prevent these errors we do a quick check outside the class with a Try statement to import mpi4py
- - if it can't do the import mpi4py than the API isn't installed and we can't run MPI, so we won't even attempt from mpi4py import MPI
+ This import is problematic when run inside a program which was already mpi
+ parallelized (ie a program run inside an mpi program)
+ For some reason from mpi4py import MPI is problematic in this sub-program
+ structuring.
+
+ To prevent these errors we do a quick check outside the class with a
+ Try statement to import mpi4py if it can't do the import mpi4py than the
+ API isn't installed and we can't run MPI, so we won't even attempt from mpi4py
+ import MPI
it then checks if the mode has been already establish or if there is a low level flag.
If the user explicitly or implicitly asks for mpi (ie mode=None or mode="mpi") without flags and mpi4py is installed, then we will
run the from mpi4py import MPI check. if it passes then we will return a True and run mpi mode; if not we return False and run multiprocess
- Inputs:
- :param str mode: the multiprocess mode to be used, ie) serial, multiprocessing, mpi, or None:
- if None then we will try to pick a possible multiprocessing choice. This should only be used for
- top level coding. It is best practice to specify which multiprocessing choice to use.
- if you have smaller programs used by a larger program, with both mpi enabled there will be problems, so specify multiprocessing is important.
- :param int num_procs: the number of processors or nodes that will be used. If None than we will use all available nodes/processors
- This will be overriden and fixed to a single processor if mode==serial
- :param bol flag_for_low_level: this will override mode and number of processors and set it to a multiprocess as serial. This is useful because
- a low-level program in mpi mode referenced by a top level program in mpi mode will have terrible problems. This means you can't mpi-multiprocess inside an mpi-multiprocess.
+ Args:
+ mode: the multiprocess mode to be used, ie) serial, multiprocessing, mpi, or None:
+ if None then we will try to pick a possible multiprocessing choice. This should only be used for
+ top level coding. It is best practice to specify which multiprocessing choice to use.
+ if you have smaller programs used by a larger program, with both mpi enabled there will be problems, so specify multiprocessing is important.
+ num_procs: the number of processors or nodes that will be used. If None than we will use all available nodes/processors
+ This will be overriden and fixed to a single processor if mode==serial
+ flag_for_low_level: this will override mode and number of processors and set it to a multiprocess as serial. This is useful because
+ a low-level program in mpi mode referenced by a top level program in mpi mode will have terrible problems. This means you can't mpi-multiprocess inside an mpi-multiprocess.
Returns:
- :returns: bol bol: Returns True if MPI can be run and there aren't any flags against running mpi mode
- Returns False if it cannot or should not run mpi mode.
+ Returns True if MPI can be run and there aren't any flags against running mpi mode
+ Returns False if it cannot or should not run mpi mode.
"""
- if MPI_installed == False:
+ if HAS_MPI == False:
# mpi4py isn't installed and we will need to multiprocess
return False
@@ -204,7 +208,7 @@ def check_mpi_available(self):
return False
return True
- def start(self, mode: str | None = None):
+ def start(self, mode: str | None = None) -> "ParallelMPI | None":
"""
One must call this method before `run()` in order to configure MPI parallelization
@@ -219,11 +223,12 @@ def start(self, mode: str | None = None):
if None then we will try to pick a possible multiprocessing choice. This should only be used for
top level coding. It is best practice to specify which multiprocessing choice to use.
if you have smaller programs used by a larger program, with both mpi enabled there will be problems, so specify multiprocessing is important.
+
Returns:
- :returns: class parallel_obj: This is the obstantiated object of the class of parallizations.
- ie) if self.mode=='mpi' self.parallel_obj will be an instance of the mpi class
- This style of retained parallel_obj to be used later is important because this is the object which controls the work nodes and maintains the mpi universe
- self.parallel_obj will be set to None for simpler parallization methods like serial
+ This is the obstantiated object of the class of parallizations.
+ ie) if self.mode=='mpi' self.parallel_obj will be an instance of the mpi class
+ This style of retained parallel_obj to be used later is important because this is the object which controls the work nodes and maintains the mpi universe
+ self.parallel_obj will be set to None for simpler parallization methods like serial
"""
if mode is None:
@@ -240,16 +245,15 @@ def start(self, mode: str | None = None):
return None
- def end(self, mode=None):
+ def end(self, mode: str | None = None) -> None:
"""
Call this method before exit to terminate MPI workers
-
- Inputs:
- :param str mode: the multiprocess mode to be used, ie) serial, multiprocessing, mpi, or None:
- if None then we will try to pick a possible multiprocessing choice. This should only be used for
- top level coding. It is best practice to specify which multiprocessing choice to use.
- if you have smaller programs used by a larger program, with both mpi enabled there will be problems, so specify multiprocessing is important.
+ Args:
+ mode: the multiprocess mode to be used, ie) serial, multiprocessing, mpi, or None:
+ if None then we will try to pick a possible multiprocessing choice. This should only be used for
+ top level coding. It is best practice to specify which multiprocessing choice to use.
+ if you have smaller programs used by a larger program, with both mpi enabled there will be problems, so specify multiprocessing is important.
"""
if mode is None:
@@ -262,7 +266,13 @@ def end(self, mode=None):
else:
raise Exception("mpi4py package must be available to use mpi mode")
- def run(self, args, func, num_procs=None, mode=None):
+ def run(
+ self,
+ args: Collection[Any],
+ func: Any,
+ num_procs: int | None = None,
+ mode: str | None = None,
+ ) -> list[Any]:
"""
Run a task in parallel across the system.
@@ -278,18 +288,19 @@ def run(self, args, func, num_procs=None, mode=None):
func = foo The namespace of foo
- Inputs:
- :param python_obj func: This is the object of the function which will be used.
- :param list args: a list of lists/tuples, each sublist/tuple must contain all information required by the function for a single object which will be multiprocessed
- :param int num_procs: (Primarily for Developers) the number of processors or nodes that will be used. If None than we will use all available nodes/processors
- This will be overriden and fixed to a single processor if mode==serial
- :param str mode: (Primarily for Developers) the multiprocess mode to be used, ie) serial, multiprocessing, mpi, or None:
- if None then we will try to pick a possible multiprocessing choice. This should only be used for
- top level coding. It is best practice to specify which multiprocessing choice to use.
- if you have smaller programs used by a larger program, with both mpi enabled there will be problems, so specify multiprocessing is important.
- BEST TO LEAVE THIS BLANK
+ Args:
+ args: a list of lists/tuples, each sublist/tuple must contain all information required by the function for a single object which will be multiprocessed
+ func: This is the object of the function which will be used.
+ num_procs: (Primarily for Developers) the number of processors or nodes that will be used. If None than we will use all available nodes/processors
+ This will be overriden and fixed to a single processor if mode==serial
+ mode: (Primarily for Developers) the multiprocess mode to be used, ie) serial, multiprocessing, mpi, or None:
+ if None then we will try to pick a possible multiprocessing choice. This should only be used for
+ top level coding. It is best practice to specify which multiprocessing choice to use.
+ if you have smaller programs used by a larger program, with both mpi enabled there will be problems, so specify multiprocessing is important.
+ BEST TO LEAVE THIS BLANK
+
Returns:
- :returns: list results: A list containing all the results from the multiprocess
+ A list containing all the results from the multiprocess
"""
# determine the mode
@@ -356,29 +367,26 @@ def pick_mode(self):
# default to multiprocessing
return "multiprocessing"
- def return_mode(self):
+ def return_mode(self) -> str:
"""
- Returns the mode chosen for the parallelization cababilities of the system and returns one
+ Returns the mode chosen for the parallelization capabilities of the system and returns one
of the following modes depending on the configuration:
- :param str mode: the multiprocess mode to be used, ie) serial, multiprocessing, mpi, or None:
- if None then we will try to pick a possible multiprocessing choice. This should only be used for
- top level coding. It is best practice to specify which multiprocessing choice to use.
- if you have smaller programs used by a larger program, with both mpi enabled there will be problems, so specify multiprocessing is important.
- BEST TO LEAVE THIS BLANK
+
Returns:
- :returns: str mode: the mode which is to be used 'mpi', 'multiprocessing', 'serial'
+ the mode which is to be used 'mpi', 'multiprocessing', 'serial'
"""
return self.mode
- def compute_nodes(self, mode=None):
+ def compute_nodes(self, mode: str | None = None) -> int:
"""
Computes the number of "compute nodes" according to the selected mode.
For mpi, this is the universe size
For multiprocessing this is the number of available cores
For serial, this value is 1
+
Returns:
- :returns: int num_procs: the number of nodes/processors which is to be used
+ the number of nodes/processors which is to be used
"""
if mode is None:
mode = self.mode
@@ -392,15 +400,16 @@ def compute_nodes(self, mode=None):
else:
return 1
- def return_node(self):
+ def return_node(self) -> int:
"""
Returns the number of "compute nodes" according to the selected mode.
For mpi, this is the universe size
For multiprocessing this is the number of available cores
For serial, this value is 1
+
Returns:
- :returns: int num_procs: the number of nodes/processors which is to be used
+ the number of nodes/processors which is to be used
"""
return self.num_procs
@@ -410,7 +419,7 @@ class ParallelMPI(object):
Utility code for running tasks in parallel across an MPI cluster.
"""
- def __init__(self):
+ def __init__(self) -> None:
"""
Default num_procs is all the processesors possible
"""
@@ -419,7 +428,7 @@ def __init__(self):
self.Empty_object = Empty_obj()
- def start(self):
+ def start(self) -> None:
"""
Call this method at the beginning of program execution to put non-root processors
into worker mode.
@@ -432,14 +441,14 @@ def start(self):
else:
worker = self._worker()
- def end(self):
+ def end(self) -> None:
"""
Call this method to terminate worker processes
"""
self.COMM.bcast(None, root=0)
- def _worker(self):
+ def _worker(self) -> None:
"""
Worker processors wait in this function to receive new jobs
"""
@@ -543,7 +552,6 @@ def check_and_format_args(self, args):
if type(args[i]) == item_type:
continue
printout = "all items within args must be the same type and must be either a list or tuple"
- print(printout)
raise Exception(printout)
if item_type == list:
return args
@@ -559,8 +567,8 @@ def run(self, func, args):
"""
Run a function in parallel across the current MPI cluster.
- * func is a pure function of type (A)->(B)
- * args is a list of type list(A)
+ - func is a pure function of type (A)->(B)
+ - args is a list of type list(A)
This method batches the computation across the MPI cluster and returns
the result of type list(B) where result[i] = func(args[i]).
@@ -583,7 +591,7 @@ def run(self, func, args):
# scatter argument chunks to workers
args_chunk = self.COMM.scatter(args_chunk, root=0)
- if type(args_chunk) != list:
+ if not isinstance(args_chunk, list):
raise Exception("args_chunk needs to be a list")
# perform the calculation and get results
@@ -592,7 +600,7 @@ def run(self, func, args):
result_chunk = self.COMM.gather(result_chunk, root=0)
- if type(result_chunk) != list:
+ if not isinstance(result_chunk, list):
raise Exception("result_chunk needs to be a list")
# group results
@@ -602,7 +610,7 @@ def run(self, func, args):
results = [x for x in results if type(x) != type(self.Empty_object)]
results = flatten_list(results)
- if type(results) != list:
+ if not isinstance(results, list):
raise Exception("results needs to be a list")
results = [x for x in results if type(x) != type(self.Empty_object)]
@@ -621,16 +629,6 @@ class Empty_obj(object):
pass
-#
-
-
-"""
-Run commands on multiple processors in python.
-
-Adapted from examples on https://docs.python.org/2/library/multiprocessing.html
-"""
-
-
def MultiThreading(inputs, num_procs, task_name):
"""Initialize this object.
@@ -671,11 +669,6 @@ def MultiThreading(inputs, num_procs, task_name):
return results
-###
-# Worker function
-###
-
-
def worker(input, output):
for seq, job in iter(input.get, "STOP"):
func, args = job
@@ -710,15 +703,17 @@ def check_and_format_inputs_to_list_of_tuples(args):
raise Exception(printout)
-def count_processors(num_inputs, num_procs):
+def count_processors(num_inputs: int, num_procs: int) -> int:
"""
Checks processors available and returns a safe number of them to
utilize.
- :param int num_inputs: The number of inputs.
- :param int num_procs: The number of desired processors.
+ Args:
+ num_inputs: The number of inputs.
+ num_procs: The number of desired processors.
- :returns: The number of processors to use.
+ Returns:
+ The number of processors to use.
"""
# first, if num_procs <= 0, determine the number of processors to
# use programatically
@@ -761,7 +756,7 @@ def start_processes(inputs, num_procs):
return [item[1] for item in map(list, results)]
-def flatten_list(tier_list: list) -> list:
+def flatten_list(tier_list: list[Any]) -> list[Any]:
"""
Given a list of lists, this returns a flat list of all items.
@@ -787,9 +782,11 @@ def strip_none(none_list: list[Any]) -> list[Any]:
Given a list that might contain None items, this returns a list with no
None items.
- :params list none_list: A list that may contain None items.
+ Args:
+ none_list: A list that may contain None items.
- :returns: A list stripped of None items.
+ Returns:
+ A list stripped of None items.
"""
return [] if none_list is None else [x for x in none_list if x is not None]
diff --git a/gypsum_dl/run.py b/gypsum_dl/run.py
index 76fed23..930e39e 100644
--- a/gypsum_dl/run.py
+++ b/gypsum_dl/run.py
@@ -1,7 +1,8 @@
import argparse
import copy
-from gypsum_dl import utils
+from loguru import logger
+
from gypsum_dl.start import prepare_molecules
@@ -19,49 +20,49 @@ def main():
1. Prepare a virtual library and save all 3D models to a single SDF file in the
present directory:
- python run_gypsum_dl.py --source ./examples/sample_molecules.smi
+ python run_gypsum_dl.py --source ./tests/files/sample/sample_molecules.smi
2. Instead save all 3D models to a different, existing folder:
- python run_gypsum_dl.py --source ./examples/sample_molecules.smi \\
+ python run_gypsum_dl.py --source ./tests/files/sample/sample_molecules.smi \\
--output_folder /my/folder/
3. Additionally save the models associated with each input molecule to
separate files:
- python run_gypsum_dl.py --source ./examples/sample_molecules.smi \\
+ python run_gypsum_dl.py --source ./tests/files/sample/sample_molecules.smi \\
--output_folder /my/folder/ --separate_output_files
4. In addition to saving a 3D SDF file, also save 3D PDB files and an HTML file
with 2D structures (for debugging).
- python run_gypsum_dl.py --source ./examples/sample_molecules.smi \\
+ python run_gypsum_dl.py --source ./tests/files/sample/sample_molecules.smi \\
--output_folder /my/folder/ --add_pdb_output --add_html_output
5. Save at most two variants per input molecule:
- python run_gypsum_dl.py --source ./examples/sample_molecules.smi \\
+ python run_gypsum_dl.py --source ./tests/files/sample/sample_molecules.smi \\
--output_folder /my/folder/ --max_variants_per_compound 2
6. Control how Gypsum-DL ionizes the input molecules:
- python run_gypsum_dl.py --source ./examples/sample_molecules.smi \\
+ python run_gypsum_dl.py --source ./tests/files/sample/sample_molecules.smi \\
--output_folder /my/folder/ --min_ph 12 --max_ph 14 --pka_precision 1
7. Run Gypsum-DL in serial mode (using only one processor):
- python run_gypsum_dl.py --source ./examples/sample_molecules.smi \\
+ python run_gypsum_dl.py --source ./tests/files/sample/sample_molecules.smi \\
--job_manager serial
8. Run Gypsum-DL in multiprocessing mode, using 4 processors:
- python run_gypsum_dl.py --source ./examples/sample_molecules.smi \\
+ python run_gypsum_dl.py --source ./tests/files/sample/sample_molecules.smi \\
--job_manager multiprocessing --num_processors 4
9. Run Gypsum-DL in mpi mode using all available processors:
mpirun -n $NTASKS python -m mpi4py run_gypsum_dl.py \\
- --source ./examples/sample_molecules.smi \\
+ --source ./tests/files/sample/sample_molecules.smi \\
--job_manager mpi --num_processors -1
10. Gypsum-DL can also take parameters from a JSON file:
@@ -71,7 +72,7 @@ def main():
Where myparams.json might look like:
{
- "source": "./examples/sample_molecules.smi",
+ "source": "./tests/files/sample/sample_molecules.smi",
"separate_output_files": true,
"job_manager": "multiprocessing",
"output_folder": "/my/folder/",
@@ -243,4 +244,4 @@ def main():
if v is None:
del INPUTS[k]
prepare_molecules(INPUTS)
- utils.log("Finished Gypsum-DL")
+ logger.info("Finished Gypsum-DL")
diff --git a/gypsum_dl/start.py b/gypsum_dl/start.py
index 1d4da22..94f8e23 100644
--- a/gypsum_dl/start.py
+++ b/gypsum_dl/start.py
@@ -11,15 +11,16 @@
from collections import OrderedDict
from datetime import datetime
+from loguru import logger
from rdkit import Chem
from gypsum_dl import utils
-from gypsum_dl.MolContainer import MolContainer
+from gypsum_dl.models import Molecule, MoleculeContainer
from gypsum_dl.parallelizer import Parallelizer
-from gypsum_dl.steps.conf.PrepareThreeD import prepare_3d
-from gypsum_dl.steps.io.LoadFiles import load_sdf_file, load_smiles_file
-from gypsum_dl.steps.io.ProcessOutput import proccess_output
-from gypsum_dl.steps.smiles.PrepareSmiles import prepare_smiles
+from gypsum_dl.steps.conf.prepare import prepare_3d
+from gypsum_dl.steps.io.load import load_sdf_file, load_smiles_file
+from gypsum_dl.steps.io.output import proccess_output
+from gypsum_dl.steps.smiles.prepare import prepare_smiles
# see http://www.rdkit.org/docs/GettingStartedInPython.html#working-with-3d-molecules
@@ -55,9 +56,9 @@ def prepare_molecules(args: dict[str, Any]) -> None:
if "json" in args:
# "json" is one of the parameters, so we'll be ignoring the rest.
try:
- params = json.load(open(args["json"]))
- except:
- utils.exception("Is your input json file properly formed?")
+ params = json.load(open(args["json"], encoding="utf-8"))
+ except Exception as e:
+ raise ValueError("Is your input json file properly formed?") from e
params = set_parameters(params)
if [i for i in json_warning_list if i in list(args.keys())]:
@@ -70,7 +71,7 @@ def prepare_molecules(args: dict[str, Any]) -> None:
# If running in serial mode, make sure only one processor is used.
if params["job_manager"] == "serial":
if params["num_processors"] != 1:
- utils.log(
+ logger.warning(
"Because --job_manager was set to serial, this will be run on a single processor."
)
params["num_processors"] = 1
@@ -82,36 +83,13 @@ def prepare_molecules(args: dict[str, Any]) -> None:
sys_modules = sys.modules
if "runpy" not in sys_modules.keys():
printout = "\nTo run in mpi mode you must run with -m flag. ie) mpirun -n $NTASKS python -m mpi4py run_gypsum_dl.py\n"
- print(printout)
- utils.exception(printout)
-
- # Check mpi4py import
- try:
- import mpi4py
- except Exception:
- printout = "\nmpi4py not installed but --job_manager is set to mpi. \n Either install mpi4py or switch job_manager to multiprocessing or serial.\n"
- print(printout)
- utils.exception(printout)
-
- # Check mpi4py import version. This must be at version 2.1.0 and higher
- mpi4py_version = mpi4py.__version__
- mpi4py_version = [int(x) for x in mpi4py_version.split(".")]
-
- if mpi4py_version[0] == 2:
- if mpi4py_version[1] < 1:
- printout = "\nmpi4py version 2.1.0 or higher is required. Use the 'python -m mpi4py' flag to run in mpi mode.\nPlease update mpi4py to a newer version, or switch job_manager to multiprocessing or serial.\n"
- print(printout)
- utils.exception(printout)
- elif mpi4py_version[0] < 2:
- printout = "\nmpi4py version 2.1.0 or higher is required. Use the 'python -m mpi4py' flag to run in mpi mode.\nPlease update mpi4py to a newer version, or switch job_manager to multiprocessing or serial.\n"
- print(printout)
- utils.exception(printout)
+ raise ValueError(printout)
# Throw a message if running on windows. Windows doesn't deal with with
# multiple processors, so use only 1.
if sys.platform == "win32":
- utils.log(
- "WARNING: Multiprocessing is not supported on Windows. Tasks will be run in Serial mode."
+ logger.warning(
+ "Multiprocessing is not supported on Windows. Tasks will be run in Serial mode."
)
params["num_processors"] = 1
params["job_manager"] = "serial"
@@ -132,27 +110,27 @@ def prepare_molecules(args: dict[str, Any]) -> None:
# Let the user know that their command-line parameters will be ignored, if
# they have specified a json file.
- if need_to_print_override_warning == True:
- utils.log("WARNING: Using the --json flag overrides all other flags.")
+ if need_to_print_override_warning:
+ logger.warning("Using the --json flag overrides all other flags.")
# If running in mpi mode, separate_output_files must be set to true.
- if params["job_manager"] == "mpi" and params["separate_output_files"] == False:
- utils.log(
- "WARNING: Running in mpi mode, but separate_output_files is not set to True. Setting separate_output_files to True anyway."
+ if params["job_manager"] == "mpi" and not params["separate_output_files"]:
+ logger.warning(
+ "Running in mpi mode, but separate_output_files is not set to True. Setting separate_output_files to True anyway."
)
params["separate_output_files"] = True
# Outputing HTML files not supported in mpi mode.
- if params["job_manager"] == "mpi" and params["add_html_output"] == True:
- utils.log(
- "WARNING: Running in mpi mode, but add_html_output is set to True. HTML output is not supported in mpi mode."
+ if params["job_manager"] == "mpi" and params["add_html_output"]:
+ logger.warning(
+ "Running in mpi mode, but add_html_output is set to True. HTML output is not supported in mpi mode."
)
params["add_html_output"] = False
# Warn the user if he or she is not using the Durrant lab filters.
- if params["use_durrant_lab_filters"] == -False:
- utils.log(
- "WARNING: Running Gypsum-DL without the Durrant-lab filters. In looking over many Gypsum-DL-generated "
+ if params["use_durrant_lab_filters"]:
+ logger.warning(
+ "Running Gypsum-DL without the Durrant-lab filters. In looking over many Gypsum-DL-generated "
+ "variants, we have identified a number of substructures that, though technically possible, strike us "
+ "as improbable or otherwise poorly suited for virtual screening. We strongly recommend removing these "
+ "by running Gypsum-DL with the --use_durrant_lab_filters option.",
@@ -161,7 +139,7 @@ def prepare_molecules(args: dict[str, Any]) -> None:
# Load SMILES data
if isinstance(params["source"], str):
- utils.log(
+ logger.debug(
"Loading molecules from " + os.path.basename(params["source"]) + "..."
)
@@ -179,10 +157,10 @@ def prepare_molecules(args: dict[str, Any]) -> None:
pass # It's already in the required format.
# Make the output directory if necessary.
- if os.path.exists(params["output_folder"]) == False:
+ if not os.path.exists(params["output_folder"]):
os.mkdir(params["output_folder"])
- if os.path.exists(params["output_folder"]) == False:
- utils.exception("Output folder directory couldn't be found or created.")
+ if not os.path.exists(params["output_folder"]):
+ raise RuntimeError("Output folder directory couldn't be found or created.")
# For Debugging
# print("")
@@ -200,36 +178,26 @@ def prepare_molecules(args: dict[str, Any]) -> None:
for i in range(0, len(smiles_data)):
try:
smiles, name, props = smiles_data[i]
- except Exception:
- msg = 'Unexpected error. Does your "source" parameter specify a '
- msg = msg + "filename that ends in a .can, .smi, or .sdf extension?"
- utils.exception(msg)
+ except Exception as e:
+ logger.exception(
+ 'Unexpected error. Does your "source" parameter specify a filename that ends in a .can, .smi, or .sdf extension?'
+ )
+ raise e
if detect_unassigned_bonds(smiles) is None:
- utils.log(
- "WARNING: Throwing out SMILES because of unassigned bonds: " + smiles
- )
+ logger.warning("Throwing out SMILES because of unassigned bonds: " + smiles)
continue
- new_contnr = MolContainer(smiles, name, idx_counter, props)
- if (
- new_contnr.orig_smi_canonical == None
- or type(new_contnr.orig_smi_canonical) != str
- ):
- utils.log(
- "WARNING: Throwing out SMILES because of it couldn't convert to mol: "
- + smiles
+ new_mol = Molecule.from_smiles(smiles, name)
+ new_smiles = new_mol.canonical_smiles()
+ if new_smiles is None or not isinstance(new_smiles, str):
+ logger.warning(
+ "Throwing out SMILES because of it couldn't convert to mol: " + smiles
)
continue
-
- contnrs.append(new_contnr)
+ contnrs.append(MoleculeContainer(new_mol, idx_counter))
idx_counter += 1
- # Remove None types from failed conversion
- contnrs = [x for x in contnrs if x.orig_smi_canonical != None]
- if len(contnrs) != idx_counter:
- utils.exception("There is a corrupted container")
-
# In multiprocessing mode, Gypsum-DL parallelizes each small-molecule
# preparation step separately. But this scheme is inefficient in MPI mode
# because it increases the amount of communication required between nodes.
@@ -263,9 +231,9 @@ def prepare_molecules(args: dict[str, Any]) -> None:
params["end_time"] = str(end_time)
params["run_time"] = str(run_time)
- utils.log("\nStart time at: " + str(start_time))
- utils.log("End time at: " + str(end_time))
- utils.log("Total time at: " + str(run_time))
+ logger.info("Start time at: " + str(start_time))
+ logger.info("End time at: " + str(end_time))
+ logger.info("Total time at: " + str(run_time))
# Kill mpi workers if necessary.
params["Parallelizer"].end(params["job_manager"])
@@ -398,10 +366,10 @@ def merge_parameters(default: dict[str, Any], params: dict[str, Any]) -> None:
for param in params:
# Throw an error if there's an unrecognized parameter.
if param not in default:
- utils.log(f'Parameter "{str(param)}" not recognized!')
- utils.log("Here are the options:")
- utils.log(" ".join(sorted(list(default.keys()))))
- utils.exception(f"Unrecognized parameter: {str(param)}")
+ logger.warning(f'Parameter "{str(param)}" not recognized!')
+ logger.warning("Here are the options:")
+ logger.warning(" ".join(sorted(list(default.keys()))))
+ raise ValueError(f"Unrecognized parameter: {str(param)}")
# Throw an error if the input parameter has a different type than
# the default one.
@@ -411,7 +379,7 @@ def merge_parameters(default: dict[str, Any], params: dict[str, Any]) -> None:
params[param] = float(params[param])
else:
# Seems to be a type mismatch.
- utils.exception(
+ raise TypeError(
'The parameter "'
+ param
+ '" must be of '
@@ -449,8 +417,8 @@ def make_type_dict(dictionary: dict[str, Any]) -> dict[str, Any]:
# The value ha san unacceptable type. Throw an error.
if key not in type_dict:
- utils.exception(
- "ERROR: There appears to be an error in your parameter "
+ raise TypeError(
+ "There appears to be an error in your parameter "
+ "JSON file. No value can have type "
+ str(type(val))
+ "."
@@ -471,7 +439,7 @@ def finalize_params(params: dict[str, Any]) -> dict[str, Any]:
# Throw an error if there's a missing parameter.
if params["source"] == "":
- utils.exception(
+ raise RuntimeError(
'Missing parameter "source". You need to specify '
+ "the source of the input molecules (probably a SMI or SDF "
+ "file)."
@@ -487,25 +455,18 @@ def finalize_params(params: dict[str, Any]) -> dict[str, Any]:
# Check some required variables.
try:
params["source"] = os.path.abspath(params["source"])
- except Exception:
- utils.exception("Source file doesn't exist.")
+ except Exception as e:
+ raise RuntimeError("Source file doesn't exist.") from e
source_dir = params["source"].strip(os.path.basename(params["source"]))
if params["output_folder"] == "" and params["source"] != "":
params["output_folder"] = f"{source_dir}output{str(os.sep)}"
- if params["add_pdb_output"] == True and params["output_folder"] == "":
- utils.exception("To output files as .pdbs, specify the output_folder.")
-
- if params["separate_output_files"] == True and params["output_folder"] == "":
- utils.exception("For separate_output_files, specify the output_folder.")
+ if params["add_pdb_output"] and params["output_folder"] == "":
+ raise RuntimeError("To output files as .pdbs, specify the output_folder.")
- # if not os.path.exists(params["output_folder"]) or not os.path.isdir(params["output_folder"]):
- # utils.exception(
- # "The specified \"output_folder\", " + params["output_folder"] +
- # ", either does not exist or is a file rather than a folder. " +
- # "Please provide the path to an existing folder instead."
- # )
+ if params["separate_output_files"] and params["output_folder"] == "":
+ raise RuntimeError("For separate_output_files, specify the output_folder.")
# Make sure job_manager is always lower case.
params["job_manager"] = params["job_manager"].lower()
@@ -513,12 +474,12 @@ def finalize_params(params: dict[str, Any]) -> dict[str, Any]:
return params
-def add_mol_id_props(contnrs: list[MolContainer]) -> None:
+def add_mol_id_props(contnrs: list[MoleculeContainer]) -> None:
"""Once all molecules have been generated, go through each and add the
name and a unique id (for writing to the SDF file, for example).
Args:
- contnrs: A list of containers (MolContainer.MolContainer).
+ contnrs: A list of containers (container.MoleculeContainer).
"""
cont_id = 0
@@ -530,12 +491,12 @@ def add_mol_id_props(contnrs: list[MolContainer]) -> None:
def deal_with_failed_molecules(
- contnrs: list[MolContainer], params: dict[str, Any]
+ contnrs: list[MoleculeContainer], params: dict[str, Any]
) -> None:
"""Removes and logs failed molecules.
Args:
- contnrs: A list of containers (MolContainer.MolContainer).
+ contnrs: A list of containers (container.MoleculeContainer).
params: The parameters, used to determine the filename that will
contain the failed molecules.
"""
@@ -548,12 +509,13 @@ def deal_with_failed_molecules(
]
# Let the user know if there's more than one failed molecule.
if failed_ones:
- utils.log("\n3D models could not be generated for the following entries:")
- utils.log("\n".join(failed_ones))
- utils.log("\n")
+ logger.warning("\n3D models could not be generated for the following entries:")
+ logger.warning("\n".join(failed_ones))
# Write the failures to an smi file.
with open(
- params["output_folder"] + os.sep + "gypsum_dl_failed.smi", "w"
+ params["output_folder"] + os.sep + "gypsum_dl_failed.smi",
+ "w",
+ encoding="utf-8",
) as outfile:
outfile.write("\n".join(failed_ones))
diff --git a/gypsum_dl/steps/conf/Minimize3D.py b/gypsum_dl/steps/conf/Minimize3D.py
deleted file mode 100644
index 87fccd0..0000000
--- a/gypsum_dl/steps/conf/Minimize3D.py
+++ /dev/null
@@ -1,175 +0,0 @@
-"""
-This module performs a final 3D minimization to improve the small-molecule
-geometry.
-"""
-
-import __future__
-
-import copy
-
-from gypsum_dl import chem_utils, utils
-from gypsum_dl.MyMol import MyConformer
-
-
-def minimize_3d(
- contnrs,
- max_variants_per_compound,
- thoroughness,
- num_procs,
- second_embed,
- job_manager,
- parallelizer_obj,
-):
- """This function minimizes a 3D molecular conformation. In an attempt to
- not get trapped in a local minimum, it actually generates a number of
- conformers, minimizes the best ones, and then saves the best of the
- best.
-
- :param contnrs: A list of containers (MolContainer.MolContainer).
- :type contnrs: list
- :param max_variants_per_compound: To control the combinatorial explosion,
- only this number of variants (molecules) will be advanced to the next
- step.
- :type max_variants_per_compound: int
- :param thoroughness: How many molecules to generate per variant (molecule)
- retained, for evaluation. For example, perhaps you want to advance five
- molecules (max_variants_per_compound = 5). You could just generate five
- and advance them all. Or you could generate ten and advance the best
- five (so thoroughness = 2). Using thoroughness > 1 increases the
- computational expense, but it also increases the chances of finding good
- molecules.
- :type thoroughness: int
- :param num_procs: The number of processors to use.
- :type num_procs: int
- :param second_embed: Whether to try to generate 3D coordinates using an
- older algorithm if the better (default) algorithm fails. This can add
- run time, but sometimes converts certain molecules that would
- otherwise fail.
- :type second_embed: bool
- :param job_manager: The multithred mode to use.
- :type job_manager: string
- :param parallelizer_obj: The Parallelizer object.
- :type parallelizer_obj: Parallelizer.Parallelizer
- """
-
- # Let the user know you're on this step.
- utils.log("Minimizing all 3D molecular structures...")
-
- # Create the parameters (inputs) for the parallelizer.
- params = []
- ones_without_nonaro_rngs = set([])
- for contnr in contnrs:
- if contnr.num_nonaro_rngs == 0:
- # Because ones with nonaromatic rings have already been minimized,
- # so they can be skipped here.
- for mol in contnr.mols:
- ones_without_nonaro_rngs.add(mol.contnr_idx)
- params.append(
- (mol, max_variants_per_compound, thoroughness, second_embed)
- )
- params = tuple(params)
-
- # Run the inputs through the parallelizer.
- tmp = []
- if parallelizer_obj is None:
- tmp.extend(parallel_minit(i[0], i[1], i[2], i[3]) for i in params)
- else:
- tmp = parallelizer_obj.run(params, parallel_minit, num_procs, job_manager)
-
- # Save energy into MyMol object, and get a list of just those objects.
- contnr_list_not_empty = set([]) # To keep track of which container lists
- # are not empty. These are the ones
- # you'll be repopulating with better
- # optimized structures.
- results = [] # Will contain MyMol.MyMol objects, with the saved energies
- # inside.
- for mol in tmp:
- mol.mol_props["Energy"] = mol.conformers[0].energy
- results.append(mol)
- contnr_list_not_empty.add(mol.contnr_idx)
-
- # Go through each of the containers that are not empty and remove current
- # ones. Because you'll be replacing them with optimized versions.
- for i in contnr_list_not_empty:
- contnrs[i].mols = []
-
- # Go through each of the minimized mols, and populate containers they
- # belong to.
- for mol in results:
- contnrs[mol.contnr_idx].add_mol(mol)
-
- # Alert the user to any errors.
- for contnr in contnrs:
- for mol in contnr.mols:
- if mol.rdkit_mol == "":
- mol.genealogy.append("(WARNING: Could not optimize 3D geometry)")
- mol.conformers = []
-
-
-def parallel_minit(mol, max_variants_per_compound, thoroughness, second_embed):
- """Minimizes the geometries of a MyMol.MyMol object. Meant to be run
- within parallelizer.
-
- :param mol: The molecule to minimize.
- :type mol: MyMol.MyMol
- :param max_variants_per_compound: To control the combinatorial explosion,
- only this number of variants (molecules) will be advanced to the next
- step.
- :type max_variants_per_compound: int
- :param thoroughness: How many molecules to generate per variant (molecule)
- retained, for evaluation. For example, perhaps you want to advance five
- molecules (max_variants_per_compound = 5). You could just generate five
- and advance them all. Or you could generate ten and advance the best
- five (so thoroughness = 2). Using thoroughness > 1 increases the
- computational expense, but it also increases the chances of finding good
- molecules.
- :type thoroughness: int
- :param second_embed: Whether to try to generate 3D coordinates using an
- older algorithm if the better (default) algorithm fails. This can add
- run time, but sometimes converts certain molecules that would
- otherwise fail.
- :type second_embed: bool
- :return: A molecule with the minimized conformers inside it.
- :rtype: MyMol.MyMol
- """
-
- # Not minimizing. Just adding the conformers.
- mol.add_conformers(thoroughness * max_variants_per_compound, 0.1, False)
-
- if len(mol.conformers) > 0:
- # Because it is possible to find a molecule that has no
- # acceptable conformers (i.e., is not possible geometrically).
- # Consider this:
- # O=C([C@@]1([C@@H]2O[C@@H]([C@@]1(C3=O)C)CC2)C)N3c4sccn4
-
- # Further minimize the unoptimized conformers that were among the best
- # scoring.
- max_vars_per_cmpd = max_variants_per_compound
- for i in range(len(mol.conformers[:max_vars_per_cmpd])):
- mol.conformers[i].minimize()
-
- # Remove similar conformers
- # mol.eliminate_structurally_similar_conformers()
-
- # Get the best scoring (lowest energy) of these minimized conformers
- new_mol = copy.deepcopy(mol)
- c = MyConformer(new_mol, mol.conformers[0].conformer(), second_embed)
- new_mol.conformers = [c]
- best_energy = c.energy
-
- # Save to the genealogy record.
- new_mol.genealogy = mol.genealogy[:]
- new_mol.genealogy.append(
- new_mol.smiles(True)
- + " (optimized conformer: "
- + str(best_energy)
- + " kcal/mol)"
- )
-
- # Save best conformation. For some reason molecular properties
- # attached to mol are lost when returning from multiple
- # processors. So save the separately so they can be readded to
- # the molecule in a bit.
- # JDD: Still any issue?
-
- return new_mol
diff --git a/gypsum_dl/steps/conf/Convert2DTo3D.py b/gypsum_dl/steps/conf/convert.py
similarity index 51%
rename from gypsum_dl/steps/conf/Convert2DTo3D.py
rename to gypsum_dl/steps/conf/convert.py
index 74d4956..d44a15f 100644
--- a/gypsum_dl/steps/conf/Convert2DTo3D.py
+++ b/gypsum_dl/steps/conf/convert.py
@@ -1,52 +1,47 @@
"""
A module to so the 2D to 3D conversion, though the actual code for that
-conversion is in MyMol.MyMol.make_first_3d_conf_no_min()
+conversion is in Molecule.make_first_3d_conf_no_min()
"""
-from gypsum_dl import chem_utils, utils
+from typing import TYPE_CHECKING
-from ... import parallelizer as Parallelizer
+from loguru import logger
-try:
- from rdkit import Chem
- from rdkit.Chem import AllChem
-except Exception:
- utils.exception("You need to install rdkit and its dependencies.")
+import gypsum_dl.parallelizer as Parallelizer
+from gypsum_dl import chem_utils
+
+if TYPE_CHECKING:
+ from gypsum_dl.models import Molecule, MoleculeContainer
def convert_2d_to_3d(
- contnrs,
- max_variants_per_compound,
- thoroughness,
- num_procs,
- job_manager,
- parallelizer_obj,
-):
+ contnrs: list["MoleculeContainer"],
+ max_variants_per_compound: int,
+ thoroughness: int,
+ num_procs: int,
+ job_manager: str,
+ parallelizer_obj: object,
+) -> None:
"""Converts the 1D smiles strings into 3D small-molecule models.
- :param contnrs: A list of containers (MolContainer.MolContainer).
- :type contnrs: list
- :param max_variants_per_compound: To control the combinatorial explosion,
- only this number of variants (molecules) will be advanced to the next
- step.
- :type max_variants_per_compound: int
- :param thoroughness: How many molecules to generate per variant (molecule)
- retained, for evaluation. For example, perhaps you want to advance five
- molecules (max_variants_per_compound = 5). You could just generate five
- and advance them all. Or you could generate ten and advance the best
- five (so thoroughness = 2). Using thoroughness > 1 increases the
- computational expense, but it also increases the chances of finding good
- molecules.
- :type thoroughness: int
- :param num_procs: The number of processors to use.
- :type num_procs: int
- :param job_manager: The multithred mode to use.
- :type job_manager: string
- :param parallelizer_obj: The Parallelizer object.
- :type parallelizer_obj: Parallelizer.Parallelizer
+ Args:
+ contnrs: A list of containers (container.MoleculeContainer).
+ max_variants_per_compound: To control the combinatorial explosion,
+ only this number of variants (molecules) will be advanced to the next
+ step.
+ thoroughness: How many molecules to generate per variant (molecule)
+ retained, for evaluation. For example, perhaps you want to advance five
+ molecules (max_variants_per_compound = 5). You could just generate five
+ and advance them all. Or you could generate ten and advance the best
+ five (so thoroughness = 2). Using thoroughness > 1 increases the
+ computational expense, but it also increases the chances of finding good
+ molecules.
+ num_procs: The number of processors to use.
+ job_manager: The multithred mode to use.
+ parallelizer_obj: The Parallelizer object.
"""
- utils.log("Converting all molecules to 3D structures.")
+ logger.info("Converting all molecules to 3D structures.")
# Make the inputs to pass to the parallelizer.
params = []
@@ -70,14 +65,15 @@ def convert_2d_to_3d(
)
-def parallel_make_3d(mol):
+def parallel_make_3d(mol: "Molecule") -> "Molecule | None":
"""Does the 2D to 3D conversion. Meant to run within parallelizer.
- :param mol: The molecule to be converted.
- :type mol: MyMol.MyMol
- :return: A MyMol.MyMol object with the 3D coordinates inside, or None if
- it fails.
- :rtype: MyMol.MyMol | None
+ Args:
+ mol: The molecule to be converted.
+
+ Returns:
+ A Molecule object with the 3D coordinates inside, or None if
+ it fails.
"""
# Initially assume you won't show an error message.
@@ -87,7 +83,7 @@ def parallel_make_3d(mol):
# The rdkit mol is None. Something's gone wrong. Show an error
# message.
show_error_msg = True
- elif mol.remove_bizarre_substruc() == False:
+ elif not mol.remove_bizarre_substruc():
# Check if it has strange substructures.
# Perform the conversion.
@@ -104,8 +100,8 @@ def parallel_make_3d(mol):
if show_error_msg:
# Something's gone wrong, so show this error.
- utils.log(
- "\tWARNING: Could not generate 3D geometry for "
+ logger.warning(
+ "Could not generate 3D geometry for "
+ str(mol.smiles())
+ " ("
+ mol.name
diff --git a/gypsum_dl/steps/conf/minimize.py b/gypsum_dl/steps/conf/minimize.py
new file mode 100644
index 0000000..13e9c21
--- /dev/null
+++ b/gypsum_dl/steps/conf/minimize.py
@@ -0,0 +1,173 @@
+"""
+This module performs a final 3D minimization to improve the small-molecule geometry.
+"""
+
+from typing import TYPE_CHECKING
+
+import copy
+
+from loguru import logger
+
+from gypsum_dl.models import Conformer
+
+if TYPE_CHECKING:
+ import gypsum_dl.parallelizer as Parallelizer
+ from gypsum_dl.models import Molecule, MoleculeContainer
+
+
+def minimize_3d(
+ contnrs: list["MoleculeContainer"],
+ max_variants_per_compound: int,
+ thoroughness: int,
+ num_procs: int,
+ second_embed: bool,
+ job_manager: str,
+ parallelizer_obj: "Parallelizer.Parallelizer",
+):
+ """This function minimizes a 3D molecular conformation. In an attempt to
+ not get trapped in a local minimum, it actually generates a number of
+ conformers, minimizes the best ones, and then saves the best of the
+ best.
+
+ Args:
+ contnrs: A list of molecule containers.
+ max_variants_per_compound: To control the combinatorial explosion,
+ only this number of variants (molecules) will be advanced to the next
+ step.
+ thoroughness: How many molecules to generate per variant (molecule)
+ retained, for evaluation. For example, perhaps you want to advance five
+ molecules (`max_variants_per_compound = 5`). You could just generate five
+ and advance them all. Or you could generate ten and advance the best
+ five (so `thoroughness = 2`). Using `thoroughness > 1` increases the
+ computational expense, but it also increases the chances of finding good
+ molecules.
+ num_procs: The number of processors to use.
+ second_embed: Whether to try to generate 3D coordinates using an
+ older algorithm if the better (default) algorithm fails. This can add
+ run time, but sometimes converts certain molecules that would
+ otherwise fail.
+ job_manager: The multithred mode to use.
+ parallelizer_obj: The Parallelizer object.
+ """
+ logger.info("Minimizing all 3D molecular structures...")
+
+ # Create the parameters (inputs) for the parallelizer.
+ params = []
+ ones_without_nonaro_rngs = set([])
+ for contnr in contnrs:
+ if contnr.num_nonaro_rngs == 0:
+ # Because ones with nonaromatic rings have already been minimized,
+ # so they can be skipped here.
+ for mol in contnr.mols:
+ ones_without_nonaro_rngs.add(mol.contnr_idx)
+ params.append(
+ (mol, max_variants_per_compound, thoroughness, second_embed)
+ )
+ params = tuple(params)
+
+ # Run the inputs through the parallelizer.
+ tmp = []
+ if parallelizer_obj is None:
+ tmp.extend(parallel_minit(i[0], i[1], i[2], i[3]) for i in params)
+ else:
+ tmp = parallelizer_obj.run(params, parallel_minit, num_procs, job_manager)
+
+ # Save energy into Molecule object, and get a list of just those objects.
+ contnr_list_not_empty = set([]) # To keep track of which container lists
+ # are not empty. These are the ones
+ # you'll be repopulating with better
+ # optimized structures.
+ results = [] # Will contain Molecule objects, with the saved energies
+ # inside.
+ for mol in tmp:
+ mol.mol_props["Energy"] = mol.conformers[0].energy
+ results.append(mol)
+ contnr_list_not_empty.add(mol.contnr_idx)
+
+ # Go through each of the containers that are not empty and remove current
+ # ones. Because you'll be replacing them with optimized versions.
+ for i in contnr_list_not_empty:
+ contnrs[i].mols = []
+
+ # Go through each of the minimized mols, and populate containers they
+ # belong to.
+ for mol in results:
+ contnrs[mol.contnr_idx].add_mol(mol)
+
+ # Alert the user to any errors.
+ for contnr in contnrs:
+ for mol in contnr.mols:
+ if mol.rdkit_mol == "":
+ mol.genealogy.append("(WARNING: Could not optimize 3D geometry)")
+ mol.conformers = []
+
+
+def parallel_minit(
+ mol: "Molecule",
+ max_variants_per_compound: int,
+ thoroughness: int,
+ second_embed: bool,
+) -> "Molecule | None":
+ """Minimizes the geometries of a Molecule object. Meant to be run
+ within parallelizer.
+
+ Args:
+ mol: The molecule to minimize.
+ max_variants_per_compound: To control the combinatorial explosion,
+ only this number of variants (molecules) will be advanced to the next
+ step.
+ thoroughness: How many molecules to generate per variant (molecule)
+ retained, for evaluation. For example, perhaps you want to advance five
+ molecules (max_variants_per_compound = 5). You could just generate five
+ and advance them all. Or you could generate ten and advance the best
+ five (so thoroughness = 2). Using thoroughness > 1 increases the
+ computational expense, but it also increases the chances of finding good
+ molecules.
+ second_embed: Whether to try to generate 3D coordinates using an
+ older algorithm if the better (default) algorithm fails. This can add
+ run time, but sometimes converts certain molecules that would
+ otherwise fail.
+
+ Returns:
+ A molecule with the minimized conformers inside it.
+ """
+
+ # Not minimizing. Just adding the conformers.
+ mol.add_conformers(thoroughness * max_variants_per_compound, 0.1, False)
+
+ if len(mol.conformers) > 0:
+ # Because it is possible to find a molecule that has no
+ # acceptable conformers (i.e., is not possible geometrically).
+ # Consider this:
+ # O=C([C@@]1([C@@H]2O[C@@H]([C@@]1(C3=O)C)CC2)C)N3c4sccn4
+
+ # Further minimize the unoptimized conformers that were among the best
+ # scoring.
+ max_vars_per_cmpd = max_variants_per_compound
+ for i in range(len(mol.conformers[:max_vars_per_cmpd])):
+ mol.conformers[i].minimize()
+
+ # Remove similar conformers
+ # mol.eliminate_structurally_similar_conformers()
+
+ # Get the best scoring (lowest energy) of these minimized conformers
+ new_mol = copy.deepcopy(mol)
+ c = Conformer(new_mol, mol.conformers[0].conformer(), second_embed)
+ new_mol.conformers = [c]
+ best_energy = c.energy
+
+ # Save to the genealogy record.
+ new_mol.genealogy = mol.genealogy[:]
+ smiles: str | None = new_mol.smiles(noh=True)
+ if smiles is not None:
+ new_mol.genealogy.append(
+ smiles + " (optimized conformer: " + str(best_energy) + " kcal/mol)"
+ )
+
+ # Save best conformation. For some reason molecular properties
+ # attached to mol are lost when returning from multiple
+ # processors. So save the separately so they can be readded to
+ # the molecule in a bit.
+ # JDD: Still any issue?
+
+ return new_mol
diff --git a/gypsum_dl/steps/conf/PrepareThreeD.py b/gypsum_dl/steps/conf/prepare.py
similarity index 74%
rename from gypsum_dl/steps/conf/PrepareThreeD.py
rename to gypsum_dl/steps/conf/prepare.py
index 7fb0386..86d5c0a 100644
--- a/gypsum_dl/steps/conf/PrepareThreeD.py
+++ b/gypsum_dl/steps/conf/prepare.py
@@ -2,22 +2,22 @@
Runs the 3D preparation process.
"""
-import __future__
+from typing import TYPE_CHECKING, Any
-from gypsum_dl.steps.conf.Convert2DTo3D import convert_2d_to_3d
-from gypsum_dl.steps.conf.GenerateAlternate3DNonaromaticRingConfs import (
- generate_alternate_3d_nonaromatic_ring_confs,
-)
-from gypsum_dl.steps.conf.Minimize3D import minimize_3d
+from gypsum_dl.steps.conf.convert import convert_2d_to_3d
+from gypsum_dl.steps.conf.minimize import minimize_3d
+from gypsum_dl.steps.conf.rings import generate_alternate_3d_nonaromatic_ring_confs
+if TYPE_CHECKING:
+ from gypsum_dl.models import MoleculeContainer
-def prepare_3d(contnrs, params):
+
+def prepare_3d(contnrs: list["MoleculeContainer"], params: dict[str, Any]) -> None:
"""Runs the pipeline for generating the 3D small-molecule models.
- :param contnrs: A list of containers (MolContainer.MolContainer).
- :type contnrs: list
- :param params: The parameters.
- :type params: dict
+ Args:
+ contnrs: A list of containers (container.MoleculeContainer).
+ params: The parameters.
"""
# Do the 2d to 3d conversion, if requested.
diff --git a/gypsum_dl/steps/conf/GenerateAlternate3DNonaromaticRingConfs.py b/gypsum_dl/steps/conf/rings.py
similarity index 62%
rename from gypsum_dl/steps/conf/GenerateAlternate3DNonaromaticRingConfs.py
rename to gypsum_dl/steps/conf/rings.py
index 82e6dc4..ca3902e 100644
--- a/gypsum_dl/steps/conf/GenerateAlternate3DNonaromaticRingConfs.py
+++ b/gypsum_dl/steps/conf/rings.py
@@ -4,79 +4,66 @@
conformations.
"""
-import __future__
+from typing import TYPE_CHECKING
import copy
import warnings
-import gypsum_dl.parallelizer as Parallelizer
-from gypsum_dl import chem_utils, utils
-from gypsum_dl.MyMol import MyConformer
-
-try:
- from rdkit import Chem
- from rdkit.Chem import AllChem
-except Exception:
- utils.exception("You need to install rdkit and its dependencies.")
+import numpy
+from loguru import logger
+from rdkit import Chem
+from rdkit.Chem import AllChem
+from scipy.cluster.vq import kmeans2
-try:
- import numpy
-except Exception:
- utils.exception("You need to install numpy and its dependencies.")
+import gypsum_dl.parallelizer as Parallelizer
+from gypsum_dl.models import Conformer
-try:
- from scipy.cluster.vq import kmeans2
-except Exception:
- utils.exception("You need to install scipy and its dependencies.")
+if TYPE_CHECKING:
+ from gypsum_dl.models import Molecule, MoleculeContainer
def generate_alternate_3d_nonaromatic_ring_confs(
- contnrs,
- max_variants_per_compound,
- thoroughness,
- num_procs,
- second_embed,
- job_manager,
- parallelizer_obj,
+ contnrs: list["MoleculeContainer"],
+ max_variants_per_compound: int,
+ thoroughness: int,
+ num_procs: int,
+ second_embed: bool,
+ job_manager: str,
+ parallelizer_obj: Parallelizer.Parallelizer,
):
"""Docking programs like Vina rotate chemical moieties around their
- rotatable bonds, so it's not necessary to generate a larger rotomer
- library for each molecule. The one exception to this rule is
- non-aromatic rings, which can assume multiple conformations (boat vs.
- chair, etc.). This function generates a few low-energy ring structures
- for each molecule with a non-aromatic ring(s).
-
- :param contnrs: A list of containers (MolContainer.MolContainer).
- :type contnrs: list
- :param max_variants_per_compound: To control the combinatorial explosion,
- only this number of variants (molecules) will be advanced to the next
- step.
- :type max_variants_per_compound: int
- :param thoroughness: How many molecules to generate per variant (molecule)
- retained, for evaluation. For example, perhaps you want to advance five
- molecules (max_variants_per_compound = 5). You could just generate five
- and advance them all. Or you could generate ten and advance the best
- five (so thoroughness = 2). Using thoroughness > 1 increases the
- computational expense, but it also increases the chances of finding good
- molecules.
- :type thoroughness: int
- :param num_procs: The number of processors to use.
- :type num_procs: int
- :param second_embed: Whether to try to generate 3D coordinates using an
- older algorithm if the better (default) algorithm fails. This can add
- run time, but sometimes converts certain molecules that would
- otherwise fail.
- :type second_embed: bool
- :param job_manager: The multiprocess mode.
- :type job_manager: string
- :param parallelizer_obj: The Parallelizer object.
- :type parallelizer_obj: Parallelizer.Parallelizer
- :return: Returns None if no ring conformers are generated
- :rtype: None
+ rotatable bonds, so it's not necessary to generate a larger rotomer
+ library for each molecule. The one exception to this rule is
+ non-aromatic rings, which can assume multiple conformations (boat vs.
+ chair, etc.). This function generates a few low-energy ring structures
+ for each molecule with a non-aromatic ring(s).
+
+ Args:
+ contnrs: A list of containers (container.MoleculeContainer).
+ max_variants_per_compound: To control the combinatorial explosion,
+ only this number of variants (molecules) will be advanced to the next
+ step.
+ thoroughness: How many molecules to generate per variant (molecule)
+ retained, for evaluation. For example, perhaps you want to advance five
+ molecules (max_variants_per_compound = 5). You could just generate five
+ and advance them all. Or you could generate ten and advance the best
+ five (so thoroughness = 2). Using thoroughness > 1 increases the
+ computational expense, but it also increases the chances of finding good
+ molecules.
+ num_procs: The number of processors to use.
+ second_embed: Whether to try to generate 3D coordinates using an
+ older algorithm if the better (default) algorithm fails. This can add
+ run time, but sometimes converts certain molecules that would
+ otherwise fail.
+ job_manager: The multiprocess mode.
+ parallelizer_obj: The Parallelizer object.
+
+ Returns:
+ Returns None if no ring conformers are generated
"""
# Let the user know you've started this step.
- utils.log(
+ logger.info(
"Generating several conformers of molecules with non-aromatic "
+ "rings (boat vs. chair, etc.)..."
)
@@ -149,30 +136,33 @@ def generate_alternate_3d_nonaromatic_ring_confs(
)
-def parallel_get_ring_confs(mol, max_variants_per_compound, thoroughness, second_embed):
+def parallel_get_ring_confs(
+ mol: "Molecule",
+ max_variants_per_compound: int,
+ thoroughness: int,
+ second_embed: bool,
+) -> list["Molecule"] | None:
"""Gets alternate ring conformations. Meant to run with the parallelizer class.
- :param mol: The molecule to process (with non-aromatic ring(s)).
- :type mol: MyMol.MyMol
- :param max_variants_per_compound: To control the combinatorial explosion,
- only this number of variants (molecules) will be advanced to the next
- step.
- :type max_variants_per_compound: int
- :param thoroughness: How many molecules to generate per variant (molecule)
- retained, for evaluation. For example, perhaps you want to advance five
- molecules (max_variants_per_compound = 5). You could just generate five
- and advance them all. Or you could generate ten and advance the best
- five (so thoroughness = 2). Using thoroughness > 1 increases the
- computational expense, but it also increases the chances of finding good
- molecules.
- :type thoroughness: int
- :param second_embed: Whether to try to generate 3D coordinates using an
- older algorithm if the better (default) algorithm fails. This can add
- run time, but sometimes converts certain molecules that would
- otherwise fail.
- :type second_embed: bool
- :return: A list of MyMol.MyMol objects, with alternate ring conformations.
- :rtype: list
+ Args:
+ mol: The molecule to process (with non-aromatic ring(s)).
+ max_variants_per_compound: To control the combinatorial explosion,
+ only this number of variants (molecules) will be advanced to the next
+ step.
+ thoroughness: How many molecules to generate per variant (molecule)
+ retained, for evaluation. For example, perhaps you want to advance five
+ molecules (max_variants_per_compound = 5). You could just generate five
+ and advance them all. Or you could generate ten and advance the best
+ five (so thoroughness = 2). Using thoroughness > 1 increases the
+ computational expense, but it also increases the chances of finding good
+ molecules.
+ second_embed: Whether to try to generate 3D coordinates using an
+ older algorithm if the better (default) algorithm fails. This can add
+ run time, but sometimes converts certain molecules that would
+ otherwise fail.
+
+ Returns:
+ A list of Molecule objects, with alternate ring conformations.
"""
# Make it easier to access the container index.
@@ -227,7 +217,7 @@ def parallel_get_ring_confs(mol, max_variants_per_compound, thoroughness, second
list_of_rmslists = [[]] * len(ring_mols)
for k in range(len(ring_mols)):
list_of_rmslists[k] = []
- AllChem.AlignMolConformers(ring_mols[k], RMSlist=list_of_rmslists[k])
+ AllChem.AlignMolConformers(ring_mols[k], RMSlist=list_of_rmslists[k]) # type: ignore
# Get points for each conformer (rmsd_ring1, rmsd_ring2, rmsd_ring3)
pts = numpy.array(list_of_rmslists).T
@@ -241,7 +231,7 @@ def parallel_get_ring_confs(mol, max_variants_per_compound, thoroughness, second
# When kmeans2 runs on insufficient clusters, it can sometimes throw an
# error about empty clusters. This is not necessary to throw for the
- # user and so we have supressed it here.
+ # user and so we have suppressed it here.
with warnings.catch_warnings():
warnings.simplefilter("ignore")
groups = kmeans2(pts, num_clusters, minit="points")[1]
@@ -251,7 +241,7 @@ def parallel_get_ring_confs(mol, max_variants_per_compound, thoroughness, second
# contribute similar conformations. In the end, you'll be selecting from
# all these together, so similar ones could end up together.
- # Key is group id from kmeans (int). Values are the MyMol.MyConformers
+ # Key is group id from kmeans (int). Values are the Conformers
# objects.
best_conf_per_group = {}
@@ -259,29 +249,30 @@ def parallel_get_ring_confs(mol, max_variants_per_compound, thoroughness, second
for k, grp in enumerate(groups):
if grp not in list(best_conf_per_group.keys()):
best_conf_per_group[grp] = mol.conformers[k]
- # best_confs has the MyMol.MyConformers objects.
+ # best_confs has the Conformers objects.
best_confs = best_conf_per_group.values()
- # Convert rdkit mols to MyMol.MyMol and save those MyMol.MyMol objects
+ # Convert rdkit mols to Molecule and save those Molecule objects
# for returning.
results = []
for conf in best_confs:
new_mol = copy.deepcopy(mol)
- c = MyConformer(new_mol, conf.conformer(), second_embed)
+ c = Conformer(new_mol, conf.conformer(), second_embed)
new_mol.conformers = [c]
energy = c.energy
new_mol.genealogy = mol.genealogy[:]
- new_mol.genealogy.append(
- new_mol.smiles(True)
- + " (nonaromatic ring conformer: "
- + str(energy)
- + " kcal/mol)"
- )
+ smiles: str | None = new_mol.smiles(noh=True)
+ if smiles is not None:
+ new_mol.genealogy.append(
+ smiles
+ + " (nonaromatic ring conformer: "
+ + str(energy)
+ + " kcal/mol)"
+ )
- results.append(new_mol) # i is mol index
+ results.append(new_mol)
return results
- # If you get here, something went wrong.
return None
diff --git a/gypsum_dl/steps/io/LoadFiles.py b/gypsum_dl/steps/io/load.py
similarity index 68%
rename from gypsum_dl/steps/io/LoadFiles.py
rename to gypsum_dl/steps/io/load.py
index d007b4b..5b0c329 100644
--- a/gypsum_dl/steps/io/LoadFiles.py
+++ b/gypsum_dl/steps/io/load.py
@@ -2,21 +2,18 @@
A module for loading in files.
"""
-from gypsum_dl import utils
+from loguru import logger
+from rdkit import Chem
-try:
- from rdkit import Chem
-except Exception:
- utils.exception("You need to install rdkit and its dependencies.")
-
-def load_smiles_file(filename):
+def load_smiles_file(filename: str) -> list[tuple[Chem.Mol, str]]:
"""Loads a smiles file.
- :param filename: The filename.
- :type filename: str
- :return: A list of tuples, (SMILES, Name).
- :rtype: list
+ Args:
+ filename: The filename.
+
+ Returns:
+ A list of tuples, (SMILES, Name).
"""
# A smiles file contains one molecule on each line. Each line is a string,
@@ -37,9 +34,9 @@ def load_smiles_file(filename):
# Handle unnamed ligands.
if not name:
name = f"untitled_line_{line_counter + 1}"
- utils.log(
+ logger.info(
(
- "\tUntitled ligand on line {}. Naming that ligand "
+ "Untitled ligand on line {}. Naming that ligand "
+ "{}. All associated files will be referred to with "
+ "this name."
).format(line_counter + 1, name)
@@ -53,11 +50,11 @@ def load_smiles_file(filename):
else:
duplicate_names[name] = 2
new_name = f"{name}_copy_{duplicate_names[name]}"
- utils.log(f"\nMultiple entries with the ligand name: {name}")
- utils.log(
- f"\tThe version of the ligand on line {line_counter} will be retitled {new_name}"
+ logger.info(f"Multiple entries with the ligand name: {name}")
+ logger.info(
+ f"The version of the ligand on line {line_counter} will be retitled {new_name}"
)
- utils.log("\tAll associated files will be referred to with this name")
+ logger.info("All associated files will be referred to with this name")
name = new_name
# Save the data for this line and advance.
name_list.append(name)
@@ -68,13 +65,14 @@ def load_smiles_file(filename):
return data
-def load_sdf_file(filename):
+def load_sdf_file(filename: str) -> list[tuple[Chem.Mol, str]]:
"""Loads an sdf file.
- :param filename: The filename.
- :type filename: str
- :return: A list of tuples, (SMILES, Name).
- :rtype: list
+ Args:
+ filename: The filename.
+
+ Returns:
+ A list of tuples, (SMILES, Name).
"""
suppl = Chem.SDMolSupplier(filename)
@@ -89,8 +87,8 @@ def load_sdf_file(filename):
if mol:
smiles = Chem.MolToSmiles(mol, isomericSmiles=True, canonical=True)
else:
- utils.log(
- "\tWarning: Could not convert some SDF-formatted files to SMILES. Consider using an SMI (SMILES) file instead."
+ logger.warning(
+ "Could not convert some SDF-formatted files to SMILES. Consider using an SMI (SMILES) file instead."
)
continue
@@ -101,17 +99,17 @@ def load_sdf_file(filename):
# Handle unnamed ligands
if not name:
- utils.log(
- f"\tUntitled ligand for the {mol_obj_counter} molecule in the input SDF"
+ logger.info(
+ f"Untitled ligand for the {mol_obj_counter} molecule in the input SDF"
)
name = f"untitled_{missing_name_counter}_molnum_{mol_obj_counter}"
- utils.log(f"\tNaming that ligand {name}")
- utils.log("\tAll associated files will be referred to with this name")
+ logger.info(f"Naming that ligand {name}")
+ logger.info("All associated files will be referred to with this name")
missing_name_counter += 1
# Handle duplicate ligands in same list.
if name in name_list:
- utils.log(f"\nMultiple entries with the ligand name: {name}")
+ logger.info(f"Multiple entries with the ligand name: {name}")
# If multiple names.
if name in list(duplicate_names.keys()):
duplicate_names[name] = duplicate_names[name] + 1
@@ -120,10 +118,10 @@ def load_sdf_file(filename):
duplicate_names[name] = 2
new_name = f"{name}_copy_{duplicate_names[name]}"
name = new_name
- utils.log(
- f"\tThe version of the ligand for the {mol_obj_counter} molecule in the SDF file will be retitled {name}"
+ logger.info(
+ f"The version of the ligand for the {mol_obj_counter} molecule in the SDF file will be retitled {name}"
)
- utils.log("\tAll associated files will be referred to with this name")
+ logger.info("All associated files will be referred to with this name")
mol_obj_counter += 1
name_list.append(name)
diff --git a/gypsum_dl/steps/io/ProcessOutput.py b/gypsum_dl/steps/io/output.py
similarity index 56%
rename from gypsum_dl/steps/io/ProcessOutput.py
rename to gypsum_dl/steps/io/output.py
index 58b5cb1..0613639 100644
--- a/gypsum_dl/steps/io/ProcessOutput.py
+++ b/gypsum_dl/steps/io/output.py
@@ -3,23 +3,22 @@
disk (output).
"""
-import __future__
+from loguru import logger
-from gypsum_dl import utils
-from gypsum_dl.steps.io.SaveToPDB import convert_sdfs_to_PDBs
-from gypsum_dl.steps.io.SaveToSDF import save_to_sdf
-from gypsum_dl.steps.io.Web2DOutput import web_2d_output
+from gypsum_dl.steps.io.to_html import web_2d_output
+from gypsum_dl.steps.io.to_pdb import convert_sdfs_to_PDBs
+from gypsum_dl.steps.io.to_sdf import save_to_sdf
def proccess_output(contnrs, params):
- """Proccess the molecular models in preparation for writing them to the
+ """Process the molecular models in preparation for writing them to the
disk."""
# Unpack some variables.
separate_output_files = params["separate_output_files"]
output_folder = params["output_folder"]
- if params["add_html_output"] == True:
+ if params["add_html_output"]:
# Write to an HTML file.
web_2d_output(contnrs, output_folder)
@@ -27,6 +26,6 @@ def proccess_output(contnrs, params):
save_to_sdf(contnrs, params, separate_output_files, output_folder)
# Also write to PDB files, if requested.
- if params["add_pdb_output"] == True:
- utils.log("\nMaking PDB output files\n")
+ if params["add_pdb_output"]:
+ logger.info("Making PDB output files")
convert_sdfs_to_PDBs(contnrs, output_folder)
diff --git a/gypsum_dl/steps/io/Web2DOutput.py b/gypsum_dl/steps/io/to_html.py
similarity index 80%
rename from gypsum_dl/steps/io/Web2DOutput.py
rename to gypsum_dl/steps/io/to_html.py
index d82ee29..91e9d71 100644
--- a/gypsum_dl/steps/io/Web2DOutput.py
+++ b/gypsum_dl/steps/io/to_html.py
@@ -3,17 +3,12 @@
debugging.
"""
-# import webbrowser
import os
-from gypsum_dl import chem_utils, utils
-
-try:
- from rdkit import Chem
- from rdkit.Chem import rdDepictor
- from rdkit.Chem.Draw import PrepareMolForDrawing, rdMolDraw2D
-except Exception:
- utils.exception("You need to install rdkit and its dependencies.")
+from loguru import logger
+from rdkit import Chem
+from rdkit.Chem import rdDepictor
+from rdkit.Chem.Draw import PrepareMolForDrawing, rdMolDraw2D
def web_2d_output(contnrs, output_folder):
@@ -21,13 +16,13 @@ def web_2d_output(contnrs, output_folder):
a browser. Then opens a browser automatically to view them. This is mostly
for debugging."""
- utils.log("Saving html image of molecules associated with...")
+ logger.info("Saving html image of molecules associated with...")
# Let's not parallelize it for now. This will rarely be used.
html_file = output_folder + os.sep + "gypsum_dl_success.html"
with open(html_file, "w") as f:
for contnr in contnrs:
- utils.log("\t" + contnr.orig_smi)
+ logger.info("\t" + contnr.orig_smi)
for mol in contnr.mols:
# See
# http://rdkit.org/docs/source/rdkit.Chem.rdmolops.html#rdkit.Chem.rdmolops.RemoveHs
@@ -60,6 +55,3 @@ def web_2d_output(contnrs, output_folder):
+ ""
+ ""
)
-
- # Open the browser to show the file.
- # webbrowser.open("file://" + os.path.abspath(html_file))
diff --git a/gypsum_dl/steps/io/SaveToPDB.py b/gypsum_dl/steps/io/to_pdb.py
similarity index 87%
rename from gypsum_dl/steps/io/SaveToPDB.py
rename to gypsum_dl/steps/io/to_pdb.py
index f3b0089..5b4bcee 100644
--- a/gypsum_dl/steps/io/SaveToPDB.py
+++ b/gypsum_dl/steps/io/to_pdb.py
@@ -2,20 +2,15 @@
Contains the function for saving the output to PDB files.
"""
-import __future__
-
-import glob
import os
import sys
-from os.path import basename
-import rdkit
import rdkit.Chem as Chem
+from rdkit import RDLogger
from gypsum_dl import utils
-# Disable the unnecessary RDKit warnings
-rdkit.RDLogger.DisableLog("rdApp.*")
+RDLogger.DisableLog("rdApp.*")
sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)), "gypsum_dl"))
@@ -24,7 +19,7 @@ def convert_sdfs_to_PDBs(contnrs, output_folder):
"""This will convert every conformer into a PDB file, which is saved in
the output_folder.
- :param contnrs: A list of containers (MolContainer.MolContainer).
+ :param contnrs: A list of containers (container.MoleculeContainer).
:type contnrs: list
:param output_folder: The name of the output folder.
:type output_folder: str
diff --git a/gypsum_dl/steps/io/SaveToSDF.py b/gypsum_dl/steps/io/to_sdf.py
similarity index 86%
rename from gypsum_dl/steps/io/SaveToSDF.py
rename to gypsum_dl/steps/io/to_sdf.py
index d65a7ab..a6598c5 100644
--- a/gypsum_dl/steps/io/SaveToSDF.py
+++ b/gypsum_dl/steps/io/to_sdf.py
@@ -2,22 +2,18 @@
Saves output files to SDF.
"""
-import __future__
-
import os
-from gypsum_dl import utils
+from loguru import logger
+from rdkit import Chem
-try:
- from rdkit import Chem
-except Exception:
- utils.exception("You need to install rdkit and its dependencies.")
+from gypsum_dl import utils
def save_to_sdf(contnrs, params, separate_output_files, output_folder):
"""Saves the 3D models to the disk as an SDF file.
- :param contnrs: A list of containers (MolContainer.MolContainer).
+ :param contnrs: A list of containers (container.MoleculeContainer).
:type contnrs: list
:param params: The parameters.
:type params: dict
@@ -45,14 +41,14 @@ def save_to_sdf(contnrs, params, separate_output_files, output_folder):
w.close()
# Also save the file or files containing the output molecules.
- utils.log("Saving molecules associated with...")
+ logger.info("Saving molecules associated with...")
for i, contnr in enumerate(contnrs):
# Add the container properties to the rdkit_mol object so they get
# written to the SDF file.
contnr.add_container_properties()
# Let the user know which molecule you're on.
- utils.log("\t" + contnr.orig_smi)
+ logger.info("\t" + contnr.orig_smi)
# Save the file(s).
if separate_output_files == True:
diff --git a/gypsum_dl/steps/smiles/EnumerateDoubleBonds.py b/gypsum_dl/steps/smiles/bonds.py
similarity index 84%
rename from gypsum_dl/steps/smiles/EnumerateDoubleBonds.py
rename to gypsum_dl/steps/smiles/bonds.py
index 3813094..2f0b4f3 100644
--- a/gypsum_dl/steps/smiles/EnumerateDoubleBonds.py
+++ b/gypsum_dl/steps/smiles/bonds.py
@@ -1,60 +1,57 @@
"""Module for enumerating unspecified double bonds (cis vs. trans)."""
-import __future__
+from typing import TYPE_CHECKING
import copy
import itertools
import math
import random
+from loguru import logger
+from rdkit import Chem
+
import gypsum_dl.parallelizer as Parallelizer
-from gypsum_dl import MyMol, chem_utils, utils
+from gypsum_dl import chem_utils, utils
+from gypsum_dl.models import Molecule
-try:
- from rdkit import Chem
-except Exception:
- utils.exception("You need to install rdkit and its dependencies.")
+if TYPE_CHECKING:
+ from gypsum_dl.models import MoleculeContainer
def enumerate_double_bonds(
- contnrs,
- max_variants_per_compound,
- thoroughness,
- num_procs,
- job_manager,
- parallelizer_obj,
-):
+ contnrs: list["MoleculeContainer"],
+ max_variants_per_compound: int,
+ thoroughness: int,
+ num_procs: int,
+ job_manager: str,
+ parallelizer_obj: object,
+) -> None:
"""Enumerates all possible cis-trans isomers. If the stereochemistry of a
double bond is specified, it is not varied. All unspecified double bonds
are varied.
- :param contnrs: A list of containers (MolContainer.MolContainer).
- :type contnrs: A list.
- :param max_variants_per_compound: To control the combinatorial explosion,
- only this number of variants (molecules) will be advanced to the next
- step.
- :type max_variants_per_compound: int
- :param thoroughness: How many molecules to generate per variant (molecule)
- retained, for evaluation. For example, perhaps you want to advance five
- molecules (max_variants_per_compound = 5). You could just generate five
- and advance them all. Or you could generate ten and advance the best
- five (so thoroughness = 2). Using thoroughness > 1 increases the
- computational expense, but it also increases the chances of finding good
- molecules.
- :type thoroughness: int
- :param num_procs: The number of processors to use.
- :type num_procs: int
- :param job_manager: The multithred mode to use.
- :type job_manager: string
- :param parallelizer_obj: The Parallelizer object.
- :type parallelizer_obj: Parallelizer.Parallelizer
+ Args:
+ contnrs: A list of containers (container.MoleculeContainer).
+ max_variants_per_compound: To control the combinatorial explosion,
+ only this number of variants (molecules) will be advanced to the next
+ step.
+ thoroughness: How many molecules to generate per variant (molecule)
+ retained, for evaluation. For example, perhaps you want to advance five
+ molecules (max_variants_per_compound = 5). You could just generate five
+ and advance them all. Or you could generate ten and advance the best
+ five (so thoroughness = 2). Using thoroughness > 1 increases the
+ computational expense, but it also increases the chances of finding good
+ molecules.
+ num_procs: The number of processors to use.
+ job_manager: The multithred mode to use.
+ parallelizer_obj: The Parallelizer object.
"""
# No need to continue if none are requested.
if max_variants_per_compound == 0:
return
- utils.log("Enumerating all possible cis-trans isomers for all molecules...")
+ logger.debug("Enumerating all possible cis-trans isomers for all molecules...")
# Group the molecule containers so they can be passed to the parallelizer.
params = []
@@ -84,7 +81,7 @@ def enumerate_double_bonds(
# Go through the missing ones and throw a message.
for miss_indx in contnr_idxs_of_failed:
- utils.log(
+ logger.warning(
"\tCould not generate valid double-bond variant for "
+ contnrs[miss_indx].orig_smi
+ " ("
@@ -109,7 +106,7 @@ def parallel_get_double_bonded(mol, max_variants_per_compound, thoroughness):
"""A parallelizable function for enumerating double bonds.
:param mol: The molecule with a potentially unspecified double bond.
- :type mol: MyMol.MyMol
+ :type mol: Molecule
:param max_variants_per_compound: To control the combinatorial explosion,
only this number of variants (molecules) will be advanced to the next
step.
@@ -214,9 +211,8 @@ def parallel_get_double_bonded(mol, max_variants_per_compound, thoroughness):
# Let the user know.
if dbl_bnd_count > 0:
- utils.log(
- "\t"
- + mol.smiles(True)
+ logger.info(
+ mol.smiles(True)
+ " has "
# + str(dbl_bnd_count)
+ str(
@@ -274,8 +270,8 @@ def parallel_get_double_bonded(mol, max_variants_per_compound, thoroughness):
]
results = []
for smile_to_consider in smiles_to_consider:
- # Make a new MyMol.MyMol object with the specified smiles.
- new_mol = MyMol.MyMol(smile_to_consider)
+ # Make a new Molecule object with the specified smiles.
+ new_mol = Molecule(smile_to_consider)
# Sometimes you get an error if there's a bad structure otherwise. Add
# the new molecule to the list of results, if it does not have a bizarre
diff --git a/gypsum_dl/steps/smiles/EnumerateChiralMols.py b/gypsum_dl/steps/smiles/chiral.py
similarity index 91%
rename from gypsum_dl/steps/smiles/EnumerateChiralMols.py
rename to gypsum_dl/steps/smiles/chiral.py
index 961bc96..21bbe52 100644
--- a/gypsum_dl/steps/smiles/EnumerateChiralMols.py
+++ b/gypsum_dl/steps/smiles/chiral.py
@@ -2,19 +2,15 @@
A module for generating alternate chiralities.
"""
-import __future__
-
import copy
import itertools
-import random
-import gypsum_dl.parallelizer as Parallelizer
-from gypsum_dl import MyMol, chem_utils, utils
+from loguru import logger
+from rdkit import Chem
-try:
- from rdkit import Chem
-except Exception:
- utils.exception("You need to install rdkit and its dependencies.")
+import gypsum_dl.parallelizer as Parallelizer
+from gypsum_dl import chem_utils, utils
+from gypsum_dl.models import Molecule
def enumerate_chiral_molecules(
@@ -29,7 +25,7 @@ def enumerate_chiral_molecules(
an atom is given, that chiral center is not varied. Only the chirality
of unspecified chiral centers is varied.
- :param contnrs: A list of containers (MolContainer.MolContainer).
+ :param contnrs: A list of containers (container.MoleculeContainer).
:type contnrs: list
:param max_variants_per_compound: To control the combinatorial explosion,
only this number of variants (molecules) will be advanced to the next
@@ -55,7 +51,7 @@ def enumerate_chiral_molecules(
if max_variants_per_compound == 0:
return
- utils.log("Enumerating all possible enantiomers for all molecules...")
+ logger.info("Enumerating all possible enantiomers for all molecules...")
# Group the molecules so you can feed them to parallelizer.
params = []
@@ -83,8 +79,8 @@ def enumerate_chiral_molecules(
# Go through the missing ones and throw a message.
for miss_indx in contnr_idxs_of_failed:
- utils.log(
- "\tCould not generate valid enantiomers for "
+ logger.warning(
+ "Could not generate valid enantiomers for "
+ contnrs[miss_indx].orig_smi
+ " ("
+ contnrs[miss_indx].name
@@ -106,7 +102,7 @@ def parallel_get_chiral(mol, max_variants_per_compound, thoroughness):
"""A parallelizable function for enumerating chiralities.
:param mol: The input molecule.
- :type mol: MyMol.MyMol
+ :type mol: Molecule
:param max_variants_per_compound: To control the combinatorial explosion,
only this number of variants (molecules) will be advanced to the next
step.
@@ -119,7 +115,7 @@ def parallel_get_chiral(mol, max_variants_per_compound, thoroughness):
computational expense, but it also increases the chances of finding good
molecules.
:type thoroughness: int
- :return: A list of MyMol.MyMol objects.
+ :return: A list of Molecule objects.
:rtype: list
"""
@@ -154,9 +150,8 @@ def parallel_get_chiral(mol, max_variants_per_compound, thoroughness):
options = [list(itertools.chain(c[0], c[1])) for c in options]
# Let the user know the number of chiral centers.
- utils.log(
- "\t"
- + mol.smiles(True)
+ logger.info(
+ mol.smiles(True)
+ " ("
+ mol.name
+ ") has "
@@ -188,8 +183,8 @@ def parallel_get_chiral(mol, max_variants_per_compound, thoroughness):
Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CCW
)
- # Make a new MyMol.MyMol object from that rdkit molecule.
- new_mol = MyMol.MyMol(a_rd_mol)
+ # Make a new Molecule object from that rdkit molecule.
+ new_mol = Molecule(a_rd_mol)
# Add the new molecule to the list of results, if it does not have a
# bizarre substructure.
diff --git a/gypsum_dl/steps/smiles/DeSaltOrigSmiles.py b/gypsum_dl/steps/smiles/desalt.py
similarity index 71%
rename from gypsum_dl/steps/smiles/DeSaltOrigSmiles.py
rename to gypsum_dl/steps/smiles/desalt.py
index 14aeec8..7096d5f 100644
--- a/gypsum_dl/steps/smiles/DeSaltOrigSmiles.py
+++ b/gypsum_dl/steps/smiles/desalt.py
@@ -3,24 +3,24 @@
molecule, keep the larger one.
"""
-import __future__
+from loguru import logger
import gypsum_dl.parallelizer as Parallelizer
-from gypsum_dl import MyMol, chem_utils, utils
-
-try:
- from rdkit import Chem
-except Exception:
- utils.exception("You need to install rdkit and its dependencies.")
+from gypsum_dl.models import MoleculeContainer
+from gypsum_dl.models import Molecule
def desalt_orig_smi(
- contnrs, num_procs, job_manager, parallelizer_obj, durrant_lab_filters=False
+ contnrs: list[MoleculeContainer],
+ num_procs: int,
+ job_manager: str,
+ parallelizer_obj,
+ durrant_lab_filters: bool = False,
):
"""If an input molecule has multiple unconnected fragments, this removes
all but the largest fragment.
- :param contnrs: A list of containers (MolContainer.MolContainer).
+ :param contnrs: A list of containers (container.MoleculeContainer).
:type contnrs: list
:param num_procs: The number of processors to use.
:type num_procs: int
@@ -30,7 +30,7 @@ def desalt_orig_smi(
:type parallelizer_obj: Parallelizer.Parallelizer
"""
- utils.log("Desalting all molecules (i.e., keeping only largest fragment).")
+ logger.info("Desalting all molecules (i.e., keeping only largest fragment).")
# Desalt each of the molecule containers. This step is very fast, so let's
# just run it on a single processor always.
@@ -51,24 +51,28 @@ def desalt_orig_smi(
cont.add_mol(desalt_mol)
-def desalter(contnr):
+def desalter(contnr: MoleculeContainer):
"""Desalts molecules in a molecule container.
:param contnr: The molecule container.
- :type contnr: MolContainer.MolContainer
+ :type contnr: container.MoleculeContainer
:return: A molecule object.
- :rtype: MyMol.MyMol
+ :rtype: Molecule
"""
# Split it into fragments
- frags = contnr.get_frags_of_orig_smi()
+ frags = contnr.initial_molecule.fragments
if len(frags) == 1:
# It's only got one fragment, so default assumption that
# orig_smi = orig_smi_deslt is correct.
return contnr.mol_orig_frm_inp_smi
- utils.log(
- "\tMultiple fragments found in " + contnr.orig_smi + " (" + contnr.name + ")"
+ logger.debug(
+ "Multiple fragments found in "
+ + contnr.name
+ + " ("
+ + contnr.initial_molecule.name
+ + ")"
)
# Find the biggest fragment
@@ -83,9 +87,9 @@ def desalter(contnr):
biggest_frag = num_heavy_atoms_to_frag[max(num_heavy_atoms)]
# Return info about that biggest fragment.
- new_mol = MyMol.MyMol(biggest_frag)
- new_mol.contnr_idx = contnr.contnr_idx
+ new_mol = Molecule(biggest_frag)
+ new_mol.contnr_idx = contnr.container_id
new_mol.name = contnr.name
- new_mol.genealogy = contnr.mol_orig_frm_inp_smi.genealogy
+ new_mol.genealogy = contnr
new_mol.make_mol_frm_smiles_sanitze() # Need to update the mol.
return new_mol
diff --git a/gypsum_dl/steps/smiles/DurrantLabFilter.py b/gypsum_dl/steps/smiles/dl_filter.py
similarity index 91%
rename from gypsum_dl/steps/smiles/DurrantLabFilter.py
rename to gypsum_dl/steps/smiles/dl_filter.py
index 0918460..93e2d62 100644
--- a/gypsum_dl/steps/smiles/DurrantLabFilter.py
+++ b/gypsum_dl/steps/smiles/dl_filter.py
@@ -3,15 +3,11 @@
filters.
"""
-import __future__
+from loguru import logger
+from rdkit import Chem
import gypsum_dl.parallelizer as Parallelizer
-from gypsum_dl import chem_utils, utils
-
-try:
- from rdkit import Chem
-except Exception:
- utils.exception("You need to install rdkit and its dependencies.")
+from gypsum_dl import chem_utils
# Get the substructures you won't permit (per substructure matching, not
# substring matching)
@@ -77,7 +73,7 @@ def durrant_lab_filters(contnrs, num_procs, job_manager, parallelizer_obj):
"""Removes any molecules that contain prohibited substructures, per the
durrant-lab filters.
- :param contnrs: A list of containers (MolContainer.MolContainer).
+ :param contnrs: A list of containers (container.MoleculeContainer).
:type contnrs: A list.
:param num_procs: The number of processors to use.
:type num_procs: int
@@ -87,7 +83,7 @@ def durrant_lab_filters(contnrs, num_procs, job_manager, parallelizer_obj):
:type parallelizer_obj: Parallelizer.Parallelizer
"""
- utils.log("Applying Durrant-lab filters to all molecules...")
+ logger.info("Applying Durrant-lab filters to all molecules...")
prohibited_substructs = [
Chem.MolFromSmarts(s) for s in prohibited_smi_substrs_for_substruc
@@ -141,12 +137,12 @@ def parallel_durrant_lab_filter(contnr, prohibited_substructs):
break any nonaromatic rings present in the original object.
:param contnr: The molecule container.
- :type contnr: MolContainer.MolContainer
+ :type contnr: container.MoleculeContainer
:param prohibited_substructs: A list of the prohibited substructures.
:type prohibited_substructs: list
:return: Either the container with bad molecules removed, or a None
object.
- :rtype: MolContainer.MolContainer | None
+ :rtype: container.MoleculeContainer | None
"""
# Replace any molecules that have prohibited substructure with None.
@@ -155,9 +151,8 @@ def parallel_durrant_lab_filter(contnr, prohibited_substructs):
if durrant_lab_contains_bad_substr(
m.orig_smi_deslt
) or m.rdkit_mol.HasSubstructMatch(pattrn):
- utils.log(
- "\t"
- + m.smiles(True)
+ logger.warning(
+ m.smiles(True)
+ ", a variant generated "
+ "from "
+ contnr.orig_smi
diff --git a/gypsum_dl/steps/smiles/AddHydrogens.py b/gypsum_dl/steps/smiles/hydrogens.py
similarity index 87%
rename from gypsum_dl/steps/smiles/AddHydrogens.py
rename to gypsum_dl/steps/smiles/hydrogens.py
index ac472b7..bd83f01 100644
--- a/gypsum_dl/steps/smiles/AddHydrogens.py
+++ b/gypsum_dl/steps/smiles/hydrogens.py
@@ -4,10 +4,12 @@
"""
from dimorphite_dl import protonate_smiles
+from loguru import logger
from rdkit import Chem
import gypsum_dl.parallelizer as Parallelizer
-from gypsum_dl import MyMol, chem_utils, utils
+from gypsum_dl import chem_utils, utils
+from gypsum_dl.models import Molecule
def add_hydrogens(
@@ -24,7 +26,7 @@ def add_hydrogens(
"""Adds hydrogen atoms to molecule containers, as appropriate for a given
pH.
- :param contnrs: A list of containers (MolContainer.MolContainer).
+ :param contnrs: A list of containers (container.MoleculeContainer).
:type contnrs: A list.
:param min_pH: The minimum pH to consider.
:type min_pH: float
@@ -52,7 +54,7 @@ def add_hydrogens(
:type parallelizer_obj: Parallelizer.Parallelizer
"""
- utils.log("Ionizing all molecules...")
+ logger.info("Ionizing all molecules...")
# Make a simple directory with the ionization parameters.
protonation_settings = {
@@ -84,8 +86,8 @@ def add_hydrogens(
# For those molecules, just use the original SMILES string, with hydrogen
# atoms added using RDKit.
for miss_indx in contnr_idxs_of_failed:
- utils.log(
- "\tWARNING: Gypsum-DL produced no valid ionization states for "
+ logger.warning(
+ "Gypsum-DL produced no valid ionization states for "
+ contnrs[miss_indx].orig_smi
+ " ("
+ contnrs[miss_indx].name
@@ -119,7 +121,7 @@ def parallel_add_H(contnr, protonation_settings):
This is the function that gets fed into the parallelizer.
:param contnr: The molecule container.
- :type contnr: MolContainer.MolContainer
+ :type contnr: container.MoleculeContainer
:param protonation_settings: Protonation settings to pass to Dimorphite-DL.
:type protonation_settings: dict
:return: [description]
@@ -128,11 +130,11 @@ def parallel_add_H(contnr, protonation_settings):
# Make sure the canonical SMILES is actually a string.
if not isinstance(contnr.orig_smi_canonical, str):
- utils.log(f"container.orig_smi_canonical: {contnr.orig_smi_canonical}")
- utils.log(
+ logger.error(f"container.orig_smi_canonical: {contnr.orig_smi_canonical}")
+ logger.error(
f"type container.orig_smi_canonical: {str(type(contnr.orig_smi_canonical))}"
)
- utils.exception(f"container.orig_smi_canonical: {contnr.orig_smi_canonical}")
+ raise ValueError(f"container.orig_smi_canonical: {contnr.orig_smi_canonical}")
# Add the SMILES string to the protonation parameters.
protonation_settings["smiles_input"] = contnr.orig_smi_canonical
@@ -144,14 +146,14 @@ def parallel_add_H(contnr, protonation_settings):
# objects.
rdkit_mols = [Chem.MolFromSmiles(smi.strip()) for smi in smis]
- # Convert from rdkit mols to MyMol.MyMol.
- addH_mols = [MyMol.MyMol(mol) for mol in rdkit_mols if mol is not None]
+ # Convert from rdkit mols to Molecule.
+ addH_mols = [Molecule(mol) for mol in rdkit_mols if mol is not None]
- # Remove MyMols with odd substructures.
+ # Remove Molecule with odd substructures.
addH_mols = [mol for mol in addH_mols if mol.remove_bizarre_substruc() is False]
# I once saw it add a "C+"" here. So do a secondary check at this point to
- # make sure it's valid. Recreate the list, moving new MyMol.MyMol objects
+ # make sure it's valid. Recreate the list, moving new Molecule objects
# into the return_values list.
return_values = []
diff --git a/gypsum_dl/steps/smiles/PrepareSmiles.py b/gypsum_dl/steps/smiles/prepare.py
similarity index 73%
rename from gypsum_dl/steps/smiles/PrepareSmiles.py
rename to gypsum_dl/steps/smiles/prepare.py
index 76dc323..14d0fc8 100644
--- a/gypsum_dl/steps/smiles/PrepareSmiles.py
+++ b/gypsum_dl/steps/smiles/prepare.py
@@ -3,24 +3,25 @@
tautomeric, chiral forms, etc.
"""
-import __future__
+from loguru import logger
from gypsum_dl import utils
-from gypsum_dl.steps.smiles.AddHydrogens import add_hydrogens
-from gypsum_dl.steps.smiles.DeSaltOrigSmiles import desalt_orig_smi
-from gypsum_dl.steps.smiles.DurrantLabFilter import (
+from gypsum_dl.steps.smiles.bonds import enumerate_double_bonds
+from gypsum_dl.steps.smiles.chiral import enumerate_chiral_molecules
+from gypsum_dl.steps.smiles.desalt import desalt_orig_smi
+from gypsum_dl.steps.smiles.dl_filter import (
durrant_lab_contains_bad_substr,
durrant_lab_filters,
)
-from gypsum_dl.steps.smiles.EnumerateChiralMols import enumerate_chiral_molecules
-from gypsum_dl.steps.smiles.EnumerateDoubleBonds import enumerate_double_bonds
-from gypsum_dl.steps.smiles.MakeTautomers import make_tauts
+from gypsum_dl.steps.smiles.hydrogens import add_hydrogens
+from gypsum_dl.steps.smiles.tautomers import make_tauts
+from gypsum_dl.models import MoleculeContainer
-def prepare_smiles(contnrs, params):
+def prepare_smiles(contnrs: list[MoleculeContainer], params: dict):
"""Runs the appropriate steps for processing the SMILES strings.
- :param contnrs: A list of containers (MolContainer.MolContainer).
+ :param contnrs: A list of containers (container.MoleculeContainer).
:type contnrs: list
:param params: The user parameters.
:type params: dict
@@ -41,14 +42,13 @@ def prepare_smiles(contnrs, params):
# Desalt the molecules. Note that the program always desalts (can't turn it
# off).
- # utils.log("Begin Desaltings")
+ logger.debug("Begin Desaltings")
desalt_orig_smi(contnrs, num_procs, job_manager, parallelizer_obj)
- # utils.log("Done with Desalting")
# Filter the containers to remove ones that have bad substrings (metal,
# etc.) in the desalted smiles, assuming durrant lab filter turned on. Note
# that some compounds aren't filtered until later.
- if params["use_durrant_lab_filters"] == True:
+ if params["use_durrant_lab_filters"]:
contnrs = [
c for c in contnrs if not durrant_lab_contains_bad_substr(c.orig_smi_deslt)
]
@@ -58,7 +58,6 @@ def prepare_smiles(contnrs, params):
# Add hydrogens for user-specified pH, if requested.
if not params["skip_adding_hydrogen"]:
- # utils.log("Ionizing Molecules")
add_hydrogens(
contnrs,
min_ph,
@@ -70,9 +69,8 @@ def prepare_smiles(contnrs, params):
job_manager,
parallelizer_obj,
)
- # utils.log("Done with Ionization")
else:
- utils.log("Skipping ionization")
+ logger.info("Skipping ionization")
wrap_molecules(contnrs)
if debug:
@@ -80,7 +78,6 @@ def prepare_smiles(contnrs, params):
# Make alternate tautomeric forms, if requested.
if not params["skip_making_tautomers"]:
- # utils.log("Tautomerizing Molecules")
make_tauts(
contnrs,
max_variants_per_compound,
@@ -90,27 +87,23 @@ def prepare_smiles(contnrs, params):
let_tautomers_change_chirality,
parallelizer_obj,
)
- # utils.log("Done with Tautomerization")
else:
- utils.log("Skipping tautomerization")
+ logger.info("Skipping tautomerization")
if debug:
utils.print_current_smiles(contnrs)
# Apply Durrant-lab filters if requested
if params["use_durrant_lab_filters"]:
- # utils.log("Applying Durrant-Lab Filters")
durrant_lab_filters(contnrs, num_procs, job_manager, parallelizer_obj)
- # utils.log("Done Applying Durrant-Lab Filters")
else:
- utils.log("Not applying Durrant-lab filters")
+ logger.info("Not applying Durrant-lab filters")
if debug:
utils.print_current_smiles(contnrs)
# Make alternate chiral forms, if requested.
if not params["skip_enumerate_chiral_mol"]:
- # utils.log("Enumerating Chirality")
enumerate_chiral_molecules(
contnrs,
max_variants_per_compound,
@@ -119,16 +112,14 @@ def prepare_smiles(contnrs, params):
job_manager,
parallelizer_obj,
)
- # utils.log("Done with Chirality Enumeration")
else:
- utils.log("Skipping chirality enumeration")
+ logger.info("Skipping chirality enumeration")
if debug:
utils.print_current_smiles(contnrs)
# Make alternate double-bond isomers, if requested.
if not params["skip_enumerate_double_bonds"]:
- # utils.log("Enumerating Double Bonds")
enumerate_double_bonds(
contnrs,
max_variants_per_compound,
@@ -137,9 +128,8 @@ def prepare_smiles(contnrs, params):
job_manager,
parallelizer_obj,
)
- # utils.log("Done with Double Bond Enumeration")
else:
- utils.log("Skipping double bond enumeration")
+ logger.info("Skipping double bond enumeration")
if debug:
utils.print_current_smiles(contnrs)
@@ -159,7 +149,7 @@ def wrap_molecules(contnrs):
In this case, the one SMILES needs to be converted to a RDKit mol object
for subsequent steps to work. Let's do that here.
- :param contnrs: A list of containers (MolContainer.MolContainer).
+ :param contnrs: A list of containers (container.MoleculeContainer).
:type contnrs: list
"""
diff --git a/gypsum_dl/steps/smiles/MakeTautomers.py b/gypsum_dl/steps/smiles/tautomers.py
similarity index 84%
rename from gypsum_dl/steps/smiles/MakeTautomers.py
rename to gypsum_dl/steps/smiles/tautomers.py
index e7f20bd..4e33c06 100644
--- a/gypsum_dl/steps/smiles/MakeTautomers.py
+++ b/gypsum_dl/steps/smiles/tautomers.py
@@ -1,22 +1,12 @@
"""This module makes alternate tautomeric states, using MolVS."""
-import __future__
+from loguru import logger
+from molvs import tautomer
+from rdkit import Chem
-import random
-
-import gypsum_dl.MolObjectHandling as MOH
import gypsum_dl.parallelizer as Parallelizer
-from gypsum_dl import MyMol, chem_utils, utils
-
-try:
- from rdkit import Chem
-except Exception:
- utils.exception("You need to install rdkit and its dependencies.")
-
-try:
- from molvs import tautomer
-except Exception:
- utils.exception("You need to install molvs and its dependencies.")
+from gypsum_dl import chem_utils, handlers
+from gypsum_dl.models import Molecule
def make_tauts(
@@ -31,10 +21,10 @@ def make_tauts(
"""Generates tautomers of the molecules. Note that some of the generated
tautomers are not realistic. If you find a certain improbable
substructure keeps popping up, add it to the list in the
- `prohibited_substructures` definition found with MyMol.py, in the function
+ `prohibited_substructures` definition found with Molecule.py, in the function
remove_bizarre_substruc().
- :param contnrs: A list of containers (MolContainer.MolContainer).
+ :param contnrs: A list of containers (container.MoleculeContainer).
:type contnrs: A list.
:param max_variants_per_compound: To control the combinatorial explosion,
only this number of variants (molecules) will be advanced to the next
@@ -54,7 +44,7 @@ def make_tauts(
change the total number of chiral centers.
:type let_tautomers_change_chirality: bool
:param job_manager: The multithred mode to use.
- :type job_manager: string
+ :type job_manager: stringutils.log
:param parallelizer_obj: The Parallelizer object.
:type parallelizer_obj: Parallelizer.Parallelizer
"""
@@ -63,7 +53,7 @@ def make_tauts(
if max_variants_per_compound == 0:
return
- utils.log("Generating tautomers for all molecules...")
+ logger.info("Generating tautomers for all molecules...")
# Create the parameters to feed into the parallelizer object.
params = []
@@ -111,42 +101,40 @@ def parallel_make_taut(contnr, mol_index, max_variants_per_compound):
function that gets fed into the parallelizer.
:param contnr: The molecule container.
- :type contnr: MolContainer.MolContainer
+ :type contnr: container.MoleculeContainer
:param mol_index: The molecule index.
:type mol_index: int
:param max_variants_per_compound: To control the combinatorial explosion,
only this number of variants (molecules) will be advanced to the next
step.
:type max_variants_per_compound: int
- :return: A list of MyMol.MyMol objects, containing the alternate
+ :return: A list of Molecule objects, containing the alternate
tautomeric forms.
:rtype: list
"""
- # Get the MyMol.MyMol within the molecule container corresponding to the
+ # Get the Molecule within the molecule container corresponding to the
# given molecule index.
mol = contnr.mols[mol_index]
# Create a temporary RDKit mol object, since that's what MolVS works with.
# TODO: There should be a copy function
- m = MyMol.MyMol(mol.smiles()).rdkit_mol
+ m = Molecule(mol.smiles()).rdkit_mol
# For tautomers to work, you need to not have any explicit hydrogens.
m = Chem.RemoveHs(m)
# Make sure it's not None.
if m is None:
- utils.log(
- "\tCould not generate tautomers for "
- + contnr.orig_smi
- + ". I'm deleting it."
+ logger.warning(
+ "Could not generate tautomers for " + contnr.orig_smi + ". I'm deleting it."
)
return
# Molecules should be kekulized already, but let's double check that.
# Because MolVS requires kekulized input.
Chem.Kekulize(m)
- m = MOH.check_sanitization(m)
+ m = handlers.check_sanitization(m)
if m is None:
return None
@@ -157,15 +145,15 @@ def parallel_make_taut(contnr, mol_index, max_variants_per_compound):
enum = tautomer.TautomerEnumerator(max_tautomers=max_variants_per_compound)
tauts_rdkit_mols = enum.enumerate(m)
- # Make all those tautomers into MyMol objects.
- tauts_mols = [MyMol.MyMol(m) for m in tauts_rdkit_mols]
+ # Make all those tautomers into Molecule objects.
+ tauts_mols = [Molecule(m) for m in tauts_rdkit_mols]
# Keep only those that have reasonable substructures.
tauts_mols = [t for t in tauts_mols if t.remove_bizarre_substruc() == False]
# If there's more than one, let the user know that.
if len(tauts_mols) > 1:
- utils.log("\t" + mol.smiles(True) + " has tautomers.")
+ logger.info(mol.smiles(True) + " has tautomers.")
# Now collect the final results.
results = []
@@ -190,9 +178,9 @@ def tauts_no_break_arom_rngs(
regardless of tautization, ionization, etc. Any taut that breaks
aromaticity is unlikely to be worth pursuing. So remove it.
- :param contnrs: A list of containers (MolContainer.MolContainer).
+ :param contnrs: A list of containers (container.MoleculeContainer).
:type contnrs: A list.
- :param taut_data: A list of MyMol.MyMol objects.
+ :param taut_data: A list of Molecule objects.
:type taut_data: list
:param num_procs: The number of processors to use.
:type num_procs: int
@@ -200,7 +188,7 @@ def tauts_no_break_arom_rngs(
:type job_manager: string
:param parallelizer_obj: The Parallelizer object.
:type parallelizer_obj: Parallelizer.Parallelizer
- :return: A list of MyMol.MyMol objects, with certain bad ones removed.
+ :return: A list of Molecule objects, with certain bad ones removed.
:rtype: list
"""
@@ -236,9 +224,9 @@ def tauts_no_elim_chiral(contnrs, taut_data, num_procs, job_manager, parallelize
for the error in other folk's code, let's just require that the number
of chiral centers remain unchanged with isomerization.
- :param contnrs: A list of containers (MolContainer.MolContainer).
+ :param contnrs: A list of containers (container.MoleculeContainer).
:type contnrs: list
- :param taut_data: A list of MyMol.MyMol objects.
+ :param taut_data: A list of Molecule objects.
:type taut_data: list
:param num_procs: The number of processors to use.
:type num_procs: int
@@ -246,7 +234,7 @@ def tauts_no_elim_chiral(contnrs, taut_data, num_procs, job_manager, parallelize
:type job_manager: string
:param parallelizer_obj: The Parallelizer object.
:type parallelizer_obj: Parallelizer.Parallelizer
- :return: A list of MyMol.MyMol objects, with certain bad ones removed.
+ :return: A list of Molecule objects, with certain bad ones removed.
:rtype: list
"""
@@ -283,9 +271,9 @@ def tauts_no_change_hs_to_cs_unless_alpha_to_carbnyl(
tautomer-generating code you use makes these inappropriate tautomers.
Remove them here.
- :param contnrs: A list of containers (MolContainer.MolContainer).
+ :param contnrs: A list of containers (container.MoleculeContainer).
:type contnrs: list
- :param taut_data: A list of MyMol.MyMol objects.
+ :param taut_data: A list of Molecule objects.
:type taut_data: list
:param num_procs: The number of processors to use.
:type num_procs: int
@@ -293,7 +281,7 @@ def tauts_no_change_hs_to_cs_unless_alpha_to_carbnyl(
:type job_manager: string
:param parallelizer_obj: The Parallelizer object.
:type parallelizer_obj: Parallelizer.Parallelizer
- :return: A list of MyMol.MyMol objects, with certain bad ones removed.
+ :return: A list of Molecule objects, with certain bad ones removed.
:rtype: list
"""
@@ -317,11 +305,11 @@ def parallel_check_nonarom_rings(taut, contnr):
break any nonaromatic rings present in the original object.
:param taut: The tautomer to evaluate.
- :type taut: MyMol.MyMol
+ :type taut: Molecule
:param contnr: The original molecule container.
- :type contnr: MolContainer.MolContainer
+ :type contnr: container.MoleculeContainer
:return: Either the tautomer or a None object.
- :rtype: MyMol.MyMol | None
+ :rtype: Molecule | None
"""
# How many nonaromatic rings in the original smiles?
@@ -334,9 +322,8 @@ def parallel_check_nonarom_rings(taut, contnr):
# good ones.
return taut
else:
- utils.log(
- "\t"
- + taut.smiles(True)
+ logger.warning(
+ taut.smiles(True)
+ ", a tautomer generated "
+ "from "
+ contnr.orig_smi
@@ -351,11 +338,11 @@ def parallel_check_chiral_centers(taut, contnr):
any chiral centers in the original molecule.
:param taut: The tautomer to evaluate.
- :type taut: MyMol.MyMol
+ :type taut: Molecule
:param contnr: The original molecule container.
- :type contnr: MolContainer.MolContainer
+ :type contnr: container.MoleculeContainer
:return: Either the tautomer or a None object.
- :rtype: MyMol.MyMol | None
+ :rtype: Molecule | None
"""
# How many chiral centers in the original smiles?
@@ -373,9 +360,8 @@ def parallel_check_chiral_centers(taut, contnr):
# one.
return taut
else:
- utils.log(
- "\t"
- + contnr.orig_smi
+ logger.warning(
+ contnr.orig_smi
+ " ==> "
+ taut.smiles(True)
+ " (tautomer transformation on "
@@ -395,11 +381,11 @@ def parallel_check_carbon_hydrogens(taut, contnr):
change the hydrogens on inappropriate carbons.
:param taut: The tautomer to evaluate.
- :type taut: MyMol.MyMol
+ :type taut: Molecule
:param contnr: The original molecule container.
- :type contnr: MolContainer.MolContainer
+ :type contnr: container.MoleculeContainer
:return: Either the tautomer or a None object.
- :rtype: MyMol.MyMol | None
+ :rtype: Molecule | None
"""
# What's the carbon-hydrogen fingerprint of the original smiles?
@@ -412,9 +398,8 @@ def parallel_check_carbon_hydrogens(taut, contnr):
if orig_carbon_hydrogen_count == this_carbon_hydrogen_count:
return taut
else:
- utils.log(
- "\t"
- + contnr.orig_smi
+ logger.warning(
+ contnr.orig_smi
+ " ==> "
+ taut.smiles(True)
+ " (tautomer transformation on "
diff --git a/gypsum_dl/utils.py b/gypsum_dl/utils.py
index 71722eb..269b5db 100644
--- a/gypsum_dl/utils.py
+++ b/gypsum_dl/utils.py
@@ -1,25 +1,31 @@
"""Some helpful utility definitions used throughout the code."""
+from typing import TYPE_CHECKING, Any
+
import contextlib
import random
import string
-import textwrap
-from gypsum_dl import MolContainer, MyMol
+from loguru import logger
+
+if TYPE_CHECKING:
+ from gypsum_dl.models import Molecule, MoleculeContainer
-def group_mols_by_container_index(mol_lst):
- """Take a list of MyMol.MyMol objects, and place them in lists according to
+def group_mols_by_container_index(
+ mol_lst: list["Molecule"],
+) -> dict[Any, list["Molecule"]]:
+ """Take a list of Molecule objects, and place them in lists according to
their associated contnr_idx values. These lists are accessed via
a dictionary, where they keys are the contnr_idx values
themselves.
Args:
- mol_lst: The list of MyMol.MyMol objects.
+ mol_lst: The list of Molecule objects.
Returns:
A dictionary, where keys are `contnr_idx` values and values are lists of
- MyMol.MyMol objects.
+ Molecule objects.
"""
# Make the dictionary.
@@ -42,7 +48,7 @@ def group_mols_by_container_index(mol_lst):
return grouped_results
-def random_sample(lst: list, num: int, msg_if_cut: str = ""):
+def random_sample(lst: list, num: int, msg_if_cut: str = "") -> list:
"""Randomly selects elements from a list.
Args:
@@ -56,7 +62,7 @@ def random_sample(lst: list, num: int, msg_if_cut: str = ""):
"""
with contextlib.suppress(Exception):
- # Remove redundancies. Supress because someitems lst element may be
+ # Remove redundancies. Supress because sometimes lst element may be
# unhashable.
lst = list(set(lst))
@@ -66,40 +72,19 @@ def random_sample(lst: list, num: int, msg_if_cut: str = ""):
# Keep the top ones.
lst = lst[:num]
if msg_if_cut != "":
- log(msg_if_cut)
+ logger.debug(msg_if_cut)
return lst
-def log(txt: str, trailing_whitespace: str = "") -> None:
- """Prints a message to the screen.
-
- Args:
- txt: The message to print.
- trailing_whitespace: White space to add to the end of the
- message, after the trim. "" by default.
- """
-
- whitespace_before = txt[: len(txt) - len(txt.lstrip())].replace("\t", " ")
- print(
- (
- textwrap.fill(
- txt.strip(),
- width=80,
- initial_indent=whitespace_before,
- subsequent_indent=f"{whitespace_before} ",
- )
- + trailing_whitespace
- )
- )
-
-
-def fnd_contnrs_not_represntd(contnrs: list[MolContainer], results: list) -> list:
+def fnd_contnrs_not_represntd(
+ contnrs: list["MoleculeContainer"], results: list
+) -> list:
"""Identify containers that have no representative elements in results.
Something likely failed for the containers with no results.
Args:
- contnrs: A list of containers (MolContainer.MolContainer).
- results: A list of MyMol.MyMol objects.
+ contnrs: A list of containers (container.MoleculeContainer).
+ results: A list of Molecule objects.
Returns:
A list of integers, the indecies of the contnrs that have no
@@ -115,7 +100,6 @@ def fnd_contnrs_not_represntd(contnrs: list[MolContainer], results: list) -> lis
# smiles.
idx_to_smi = {}
for idx in range(len(contnrs)):
- contnr = contnrs[idx]
if idx not in idx_to_smi:
idx_to_smi[idx] = contnrs[idx].orig_smi_deslt
@@ -130,35 +114,17 @@ def fnd_contnrs_not_represntd(contnrs: list[MolContainer], results: list) -> lis
return list(idx_to_smi.keys())
-def print_current_smiles(contnrs: list[MolContainer]) -> None:
+def print_current_smiles(contnrs: list["MoleculeContainer"]) -> None:
"""Prints the smiles of the current containers. Helpful for debugging.
Args:
- contnrs: A list of containers (MolContainer.MolContainer).
+ contnrs: A list of containers (container.MoleculeContainer).
"""
-
- # For debugging.
- log(" Contents of MolContainers")
+ logger.debug("Contents of MoleculeContainers")
for i, mol_cont in enumerate(contnrs):
- log("\t\tMolContainer #" + str(i) + " (" + mol_cont.name + ")")
+ logger.debug("MoleculeContainer #" + str(i) + " (" + mol_cont.name + ")")
for i, s in enumerate(mol_cont.all_can_noh_smiles()):
- log("\t\t\tMol #" + str(i) + ": " + s)
-
-
-def exception(msg: str) -> None:
- """Prints an error to the screen and raises an exception.
-
- Args:
- msg: The error message.
- """
-
- log(msg)
- log("\n" + "=" * 79)
- log("For help with usage:")
- log("\tpython run_gypsum_dl.py --help")
- log("=" * 79)
- log("")
- raise Exception(msg)
+ logger.debug("Mol #" + str(i) + ": " + s)
def slug(strng: str) -> str:
diff --git a/pixi.lock b/pixi.lock
index 154c092..f185a94 100644
--- a/pixi.lock
+++ b/pixi.lock
@@ -8,6 +8,7 @@ environments:
packages:
linux-64:
- conda: https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-3_kmp_llvm.conda
+ - conda: https://conda.anaconda.org/conda-forge/noarch/annotated-types-0.7.0-pyhd8ed1ab_1.conda
- conda: https://conda.anaconda.org/conda-forge/linux-64/brotli-1.1.0-hb9d3cd8_3.conda
- conda: https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.1.0-hb9d3cd8_3.conda
- conda: https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h4bc722e_7.conda
@@ -91,6 +92,8 @@ environments:
- conda: https://conda.anaconda.org/conda-forge/linux-64/pixman-0.46.2-h29eaf8c_0.conda
- conda: https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-hb9d3cd8_1002.conda
- conda: https://conda.anaconda.org/conda-forge/linux-64/pycairo-1.28.0-py313h926f637_0.conda
+ - conda: https://conda.anaconda.org/conda-forge/noarch/pydantic-2.11.7-pyh3cfb1c2_0.conda
+ - conda: https://conda.anaconda.org/conda-forge/linux-64/pydantic-core-2.33.2-py313h4b2b08d_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.2.3-pyhd8ed1ab_1.conda
- conda: https://conda.anaconda.org/conda-forge/linux-64/python-3.13.5-hec9711d_102_cp313.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0.post0-pyhe01879c_2.conda
@@ -107,6 +110,7 @@ environments:
- conda: https://conda.anaconda.org/conda-forge/linux-64/sqlalchemy-2.0.41-py313h536fd9c_0.conda
- conda: https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_hd72426e_102.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.14.0-h32cad80_0.conda
+ - conda: https://conda.anaconda.org/conda-forge/noarch/typing-inspection-0.4.1-pyhd8ed1ab_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.14.0-pyhe01879c_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda
- conda: https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.2-hb9d3cd8_0.conda
@@ -119,8 +123,10 @@ environments:
- conda: https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb8e6e7a_2.conda
- pypi: https://files.pythonhosted.org/packages/87/35/cc6fb4acf8a68c4e75aeb4556eaebe5165ebe8d4c1860d7eb643a14b7c13/dimorphite_dl-2.0.1-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/08/dc/d948e83b97f2c420cb6c7e2143ae349560d3b5b061945f1b2a4eefb0231c/MolVS-0.1.1.tar.gz
+ - pypi: https://files.pythonhosted.org/packages/04/24/b7721e4845c2f162d26f50521b825fb061bc0a5afcf9a386840f23ea19fa/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
- pypi: ./
osx-64:
+ - conda: https://conda.anaconda.org/conda-forge/noarch/annotated-types-0.7.0-pyhd8ed1ab_1.conda
- conda: https://conda.anaconda.org/conda-forge/osx-64/brotli-1.1.0-h6e16a3a_3.conda
- conda: https://conda.anaconda.org/conda-forge/osx-64/brotli-bin-1.1.0-h6e16a3a_3.conda
- conda: https://conda.anaconda.org/conda-forge/osx-64/bzip2-1.0.8-hfdf4475_7.conda
@@ -202,6 +208,8 @@ environments:
- conda: https://conda.anaconda.org/conda-forge/osx-64/pixman-0.46.2-h1fd1274_0.conda
- conda: https://conda.anaconda.org/conda-forge/osx-64/pthread-stubs-0.4-h00291cd_1002.conda
- conda: https://conda.anaconda.org/conda-forge/osx-64/pycairo-1.28.0-py313hdb0f19b_0.conda
+ - conda: https://conda.anaconda.org/conda-forge/noarch/pydantic-2.11.7-pyh3cfb1c2_0.conda
+ - conda: https://conda.anaconda.org/conda-forge/osx-64/pydantic-core-2.33.2-py313hb35714d_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.2.3-pyhd8ed1ab_1.conda
- conda: https://conda.anaconda.org/conda-forge/osx-64/python-3.13.5-hc3a4c56_102_cp313.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0.post0-pyhe01879c_2.conda
@@ -218,6 +226,7 @@ environments:
- conda: https://conda.anaconda.org/conda-forge/osx-64/sqlalchemy-2.0.41-py313h63b0ddb_0.conda
- conda: https://conda.anaconda.org/conda-forge/osx-64/tk-8.6.13-hf689a15_2.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.14.0-h32cad80_0.conda
+ - conda: https://conda.anaconda.org/conda-forge/noarch/typing-inspection-0.4.1-pyhd8ed1ab_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.14.0-pyhe01879c_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda
- conda: https://conda.anaconda.org/conda-forge/osx-64/xorg-libxau-1.0.12-h6e16a3a_0.conda
@@ -225,8 +234,10 @@ environments:
- conda: https://conda.anaconda.org/conda-forge/osx-64/zstd-1.5.7-h8210216_2.conda
- pypi: https://files.pythonhosted.org/packages/87/35/cc6fb4acf8a68c4e75aeb4556eaebe5165ebe8d4c1860d7eb643a14b7c13/dimorphite_dl-2.0.1-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/08/dc/d948e83b97f2c420cb6c7e2143ae349560d3b5b061945f1b2a4eefb0231c/MolVS-0.1.1.tar.gz
+ - pypi: https://files.pythonhosted.org/packages/ef/e3/3af305b830494fa85d95f6d95ef7fa73f2ee1cc8ef5b495c7c3269fb835f/PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl
- pypi: ./
osx-arm64:
+ - conda: https://conda.anaconda.org/conda-forge/noarch/annotated-types-0.7.0-pyhd8ed1ab_1.conda
- conda: https://conda.anaconda.org/conda-forge/osx-arm64/brotli-1.1.0-h5505292_3.conda
- conda: https://conda.anaconda.org/conda-forge/osx-arm64/brotli-bin-1.1.0-h5505292_3.conda
- conda: https://conda.anaconda.org/conda-forge/osx-arm64/bzip2-1.0.8-h99b78c6_7.conda
@@ -310,6 +321,8 @@ environments:
- conda: https://conda.anaconda.org/conda-forge/osx-arm64/pixman-0.46.2-h2f9eb0b_0.conda
- conda: https://conda.anaconda.org/conda-forge/osx-arm64/pthread-stubs-0.4-hd74edd7_1002.conda
- conda: https://conda.anaconda.org/conda-forge/osx-arm64/pycairo-1.28.0-py313h4109515_0.conda
+ - conda: https://conda.anaconda.org/conda-forge/noarch/pydantic-2.11.7-pyh3cfb1c2_0.conda
+ - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pydantic-core-2.33.2-py313hf3ab51e_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.2.3-pyhd8ed1ab_1.conda
- conda: https://conda.anaconda.org/conda-forge/osx-arm64/python-3.13.5-hf3f3da0_102_cp313.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0.post0-pyhe01879c_2.conda
@@ -326,6 +339,7 @@ environments:
- conda: https://conda.anaconda.org/conda-forge/osx-arm64/sqlalchemy-2.0.41-py313h90d716c_0.conda
- conda: https://conda.anaconda.org/conda-forge/osx-arm64/tk-8.6.13-h892fb3f_2.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.14.0-h32cad80_0.conda
+ - conda: https://conda.anaconda.org/conda-forge/noarch/typing-inspection-0.4.1-pyhd8ed1ab_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.14.0-pyhe01879c_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda
- conda: https://conda.anaconda.org/conda-forge/osx-arm64/xorg-libxau-1.0.12-h5505292_0.conda
@@ -333,8 +347,10 @@ environments:
- conda: https://conda.anaconda.org/conda-forge/osx-arm64/zstd-1.5.7-h6491c7d_2.conda
- pypi: https://files.pythonhosted.org/packages/87/35/cc6fb4acf8a68c4e75aeb4556eaebe5165ebe8d4c1860d7eb643a14b7c13/dimorphite_dl-2.0.1-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/08/dc/d948e83b97f2c420cb6c7e2143ae349560d3b5b061945f1b2a4eefb0231c/MolVS-0.1.1.tar.gz
+ - pypi: https://files.pythonhosted.org/packages/45/9f/3b1c20a0b7a3200524eb0076cc027a970d320bd3a6592873c85c92a08731/PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl
- pypi: ./
win-64:
+ - conda: https://conda.anaconda.org/conda-forge/noarch/annotated-types-0.7.0-pyhd8ed1ab_1.conda
- conda: https://conda.anaconda.org/conda-forge/win-64/brotli-1.1.0-h2466b09_3.conda
- conda: https://conda.anaconda.org/conda-forge/win-64/brotli-bin-1.1.0-h2466b09_3.conda
- conda: https://conda.anaconda.org/conda-forge/win-64/bzip2-1.0.8-h2466b09_7.conda
@@ -414,6 +430,8 @@ environments:
- conda: https://conda.anaconda.org/conda-forge/win-64/pthread-stubs-0.4-hcd874cb_1001.tar.bz2
- conda: https://conda.anaconda.org/conda-forge/win-64/pthreads-win32-2.9.1-h2466b09_4.conda
- conda: https://conda.anaconda.org/conda-forge/win-64/pycairo-1.28.0-py313h4cbbc28_0.conda
+ - conda: https://conda.anaconda.org/conda-forge/noarch/pydantic-2.11.7-pyh3cfb1c2_0.conda
+ - conda: https://conda.anaconda.org/conda-forge/win-64/pydantic-core-2.33.2-py313ha8a9a3c_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.2.3-pyhd8ed1ab_1.conda
- conda: https://conda.anaconda.org/conda-forge/win-64/python-3.13.5-h7de537c_102_cp313.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0.post0-pyhe01879c_2.conda
@@ -430,6 +448,7 @@ environments:
- conda: https://conda.anaconda.org/conda-forge/win-64/tbb-2021.13.0-h62715c5_1.conda
- conda: https://conda.anaconda.org/conda-forge/win-64/tk-8.6.13-h2c6b04d_2.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.14.0-h32cad80_0.conda
+ - conda: https://conda.anaconda.org/conda-forge/noarch/typing-inspection-0.4.1-pyhd8ed1ab_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.14.0-pyhe01879c_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda
- conda: https://conda.anaconda.org/conda-forge/win-64/ucrt-10.0.22621.0-h57928b3_1.conda
@@ -442,6 +461,7 @@ environments:
- conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-hbeecb71_2.conda
- pypi: https://files.pythonhosted.org/packages/87/35/cc6fb4acf8a68c4e75aeb4556eaebe5165ebe8d4c1860d7eb643a14b7c13/dimorphite_dl-2.0.1-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/08/dc/d948e83b97f2c420cb6c7e2143ae349560d3b5b061945f1b2a4eefb0231c/MolVS-0.1.1.tar.gz
+ - pypi: https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl
- pypi: ./
dev:
channels:
@@ -453,6 +473,7 @@ environments:
- conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2
- conda: https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2
- conda: https://conda.anaconda.org/conda-forge/noarch/_python_abi3_support-1.0-hd8ed1ab_2.conda
+ - conda: https://conda.anaconda.org/conda-forge/noarch/annotated-types-0.7.0-pyhd8ed1ab_1.conda
- conda: https://conda.anaconda.org/conda-forge/linux-64/attr-2.5.1-h166bdaf_1.tar.bz2
- conda: https://conda.anaconda.org/conda-forge/noarch/backports-1.0-pyhd8ed1ab_5.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/backports.tarfile-1.2.0-pyhd8ed1ab_1.conda
@@ -595,6 +616,8 @@ environments:
- conda: https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-hb9d3cd8_1002.conda
- conda: https://conda.anaconda.org/conda-forge/linux-64/pycairo-1.28.0-py312hac037c1_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/pycparser-2.22-pyh29332c3_1.conda
+ - conda: https://conda.anaconda.org/conda-forge/noarch/pydantic-2.11.7-pyh3cfb1c2_0.conda
+ - conda: https://conda.anaconda.org/conda-forge/linux-64/pydantic-core-2.33.2-py312h680f630_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/pygments-2.19.2-pyhd8ed1ab_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/pynvml-11.4.1-pyhd8ed1ab_0.tar.bz2
- conda: https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.2.3-pyhd8ed1ab_1.conda
@@ -631,6 +654,7 @@ environments:
- conda: https://conda.anaconda.org/conda-forge/noarch/twine-5.1.1-pyhd8ed1ab_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/types-pyyaml-6.0.12.20250516-pyhd8ed1ab_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.14.0-h32cad80_0.conda
+ - conda: https://conda.anaconda.org/conda-forge/noarch/typing-inspection-0.4.1-pyhd8ed1ab_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.14.0-pyhe01879c_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda
- conda: https://conda.anaconda.org/conda-forge/linux-64/ucx-1.18.1-h1369271_0.conda
@@ -652,10 +676,12 @@ environments:
- pypi: https://files.pythonhosted.org/packages/87/35/cc6fb4acf8a68c4e75aeb4556eaebe5165ebe8d4c1860d7eb643a14b7c13/dimorphite_dl-2.0.1-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/08/dc/d948e83b97f2c420cb6c7e2143ae349560d3b5b061945f1b2a4eefb0231c/MolVS-0.1.1.tar.gz
- pypi: https://files.pythonhosted.org/packages/bd/24/12818598c362d7f300f18e74db45963dbcb85150324092410c8b49405e42/pyproject_hooks-1.2.0-py3-none-any.whl
+ - pypi: https://files.pythonhosted.org/packages/b9/2b/614b4752f2e127db5cc206abc23a8c19678e92b23c3db30fc86ab731d3bd/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
- pypi: https://files.pythonhosted.org/packages/ab/ac/8f96ba9b4cfe3e4ea201f23f4f97165862395e9331a424ed325ae37024a8/setuptools_scm-8.3.1-py3-none-any.whl
- pypi: ./
osx-64:
- conda: https://conda.anaconda.org/conda-forge/noarch/_python_abi3_support-1.0-hd8ed1ab_2.conda
+ - conda: https://conda.anaconda.org/conda-forge/noarch/annotated-types-0.7.0-pyhd8ed1ab_1.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/backports-1.0-pyhd8ed1ab_5.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/backports.tarfile-1.2.0-pyhd8ed1ab_1.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/basedpyright-1.29.4-pyhe01879c_0.conda
@@ -780,6 +806,8 @@ environments:
- conda: https://conda.anaconda.org/conda-forge/osx-64/pthread-stubs-0.4-h00291cd_1002.conda
- conda: https://conda.anaconda.org/conda-forge/osx-64/pycairo-1.28.0-py312h4c91fdb_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/pycparser-2.22-pyh29332c3_1.conda
+ - conda: https://conda.anaconda.org/conda-forge/noarch/pydantic-2.11.7-pyh3cfb1c2_0.conda
+ - conda: https://conda.anaconda.org/conda-forge/osx-64/pydantic-core-2.33.2-py312haba3716_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/pygments-2.19.2-pyhd8ed1ab_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/pynvml-11.4.1-pyhd8ed1ab_0.tar.bz2
- conda: https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.2.3-pyhd8ed1ab_1.conda
@@ -814,6 +842,7 @@ environments:
- conda: https://conda.anaconda.org/conda-forge/noarch/twine-5.1.1-pyhd8ed1ab_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/types-pyyaml-6.0.12.20250516-pyhd8ed1ab_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.14.0-h32cad80_0.conda
+ - conda: https://conda.anaconda.org/conda-forge/noarch/typing-inspection-0.4.1-pyhd8ed1ab_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.14.0-pyhe01879c_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda
- conda: https://conda.anaconda.org/conda-forge/osx-64/unicodedata2-16.0.0-py312h01d7ebd_0.conda
@@ -829,10 +858,12 @@ environments:
- pypi: https://files.pythonhosted.org/packages/87/35/cc6fb4acf8a68c4e75aeb4556eaebe5165ebe8d4c1860d7eb643a14b7c13/dimorphite_dl-2.0.1-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/08/dc/d948e83b97f2c420cb6c7e2143ae349560d3b5b061945f1b2a4eefb0231c/MolVS-0.1.1.tar.gz
- pypi: https://files.pythonhosted.org/packages/bd/24/12818598c362d7f300f18e74db45963dbcb85150324092410c8b49405e42/pyproject_hooks-1.2.0-py3-none-any.whl
+ - pypi: https://files.pythonhosted.org/packages/86/0c/c581167fc46d6d6d7ddcfb8c843a4de25bdd27e4466938109ca68492292c/PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl
- pypi: https://files.pythonhosted.org/packages/ab/ac/8f96ba9b4cfe3e4ea201f23f4f97165862395e9331a424ed325ae37024a8/setuptools_scm-8.3.1-py3-none-any.whl
- pypi: ./
osx-arm64:
- conda: https://conda.anaconda.org/conda-forge/noarch/_python_abi3_support-1.0-hd8ed1ab_2.conda
+ - conda: https://conda.anaconda.org/conda-forge/noarch/annotated-types-0.7.0-pyhd8ed1ab_1.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/backports-1.0-pyhd8ed1ab_5.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/backports.tarfile-1.2.0-pyhd8ed1ab_1.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/basedpyright-1.29.4-pyhe01879c_0.conda
@@ -959,6 +990,8 @@ environments:
- conda: https://conda.anaconda.org/conda-forge/osx-arm64/pthread-stubs-0.4-hd74edd7_1002.conda
- conda: https://conda.anaconda.org/conda-forge/osx-arm64/pycairo-1.28.0-py312h0437987_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/pycparser-2.22-pyh29332c3_1.conda
+ - conda: https://conda.anaconda.org/conda-forge/noarch/pydantic-2.11.7-pyh3cfb1c2_0.conda
+ - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pydantic-core-2.33.2-py312hd3c0895_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/pygments-2.19.2-pyhd8ed1ab_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/pynvml-11.4.1-pyhd8ed1ab_0.tar.bz2
- conda: https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.2.3-pyhd8ed1ab_1.conda
@@ -993,6 +1026,7 @@ environments:
- conda: https://conda.anaconda.org/conda-forge/noarch/twine-5.1.1-pyhd8ed1ab_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/types-pyyaml-6.0.12.20250516-pyhd8ed1ab_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.14.0-h32cad80_0.conda
+ - conda: https://conda.anaconda.org/conda-forge/noarch/typing-inspection-0.4.1-pyhd8ed1ab_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.14.0-pyhe01879c_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda
- conda: https://conda.anaconda.org/conda-forge/osx-arm64/unicodedata2-16.0.0-py312hea69d52_0.conda
@@ -1008,10 +1042,12 @@ environments:
- pypi: https://files.pythonhosted.org/packages/87/35/cc6fb4acf8a68c4e75aeb4556eaebe5165ebe8d4c1860d7eb643a14b7c13/dimorphite_dl-2.0.1-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/08/dc/d948e83b97f2c420cb6c7e2143ae349560d3b5b061945f1b2a4eefb0231c/MolVS-0.1.1.tar.gz
- pypi: https://files.pythonhosted.org/packages/bd/24/12818598c362d7f300f18e74db45963dbcb85150324092410c8b49405e42/pyproject_hooks-1.2.0-py3-none-any.whl
+ - pypi: https://files.pythonhosted.org/packages/a8/0c/38374f5bb272c051e2a69281d71cba6fdb983413e6758b84482905e29a5d/PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl
- pypi: https://files.pythonhosted.org/packages/ab/ac/8f96ba9b4cfe3e4ea201f23f4f97165862395e9331a424ed325ae37024a8/setuptools_scm-8.3.1-py3-none-any.whl
- pypi: ./
win-64:
- conda: https://conda.anaconda.org/conda-forge/noarch/_python_abi3_support-1.0-hd8ed1ab_2.conda
+ - conda: https://conda.anaconda.org/conda-forge/noarch/annotated-types-0.7.0-pyhd8ed1ab_1.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/backports-1.0-pyhd8ed1ab_5.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/backports.tarfile-1.2.0-pyhd8ed1ab_1.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/basedpyright-1.29.4-pyhe01879c_0.conda
@@ -1132,6 +1168,8 @@ environments:
- conda: https://conda.anaconda.org/conda-forge/win-64/pthreads-win32-2.9.1-h2466b09_4.conda
- conda: https://conda.anaconda.org/conda-forge/win-64/pycairo-1.28.0-py312h0cc4484_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/pycparser-2.22-pyh29332c3_1.conda
+ - conda: https://conda.anaconda.org/conda-forge/noarch/pydantic-2.11.7-pyh3cfb1c2_0.conda
+ - conda: https://conda.anaconda.org/conda-forge/win-64/pydantic-core-2.33.2-py312h8422cdd_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/pygments-2.19.2-pyhd8ed1ab_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/pynvml-11.4.1-pyhd8ed1ab_0.tar.bz2
- conda: https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.2.3-pyhd8ed1ab_1.conda
@@ -1167,6 +1205,7 @@ environments:
- conda: https://conda.anaconda.org/conda-forge/noarch/twine-5.1.1-pyhd8ed1ab_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/types-pyyaml-6.0.12.20250516-pyhd8ed1ab_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.14.0-h32cad80_0.conda
+ - conda: https://conda.anaconda.org/conda-forge/noarch/typing-inspection-0.4.1-pyhd8ed1ab_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.14.0-pyhe01879c_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda
- conda: https://conda.anaconda.org/conda-forge/win-64/ucrt-10.0.22621.0-h57928b3_1.conda
@@ -1187,6 +1226,7 @@ environments:
- pypi: https://files.pythonhosted.org/packages/87/35/cc6fb4acf8a68c4e75aeb4556eaebe5165ebe8d4c1860d7eb643a14b7c13/dimorphite_dl-2.0.1-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/08/dc/d948e83b97f2c420cb6c7e2143ae349560d3b5b061945f1b2a4eefb0231c/MolVS-0.1.1.tar.gz
- pypi: https://files.pythonhosted.org/packages/bd/24/12818598c362d7f300f18e74db45963dbcb85150324092410c8b49405e42/pyproject_hooks-1.2.0-py3-none-any.whl
+ - pypi: https://files.pythonhosted.org/packages/0c/e8/4f648c598b17c3d06e8753d7d13d57542b30d56e6c2dedf9c331ae56312e/PyYAML-6.0.2-cp312-cp312-win_amd64.whl
- pypi: https://files.pythonhosted.org/packages/ab/ac/8f96ba9b4cfe3e4ea201f23f4f97165862395e9331a424ed325ae37024a8/setuptools_scm-8.3.1-py3-none-any.whl
- pypi: ./
docs:
@@ -1197,6 +1237,7 @@ environments:
packages:
linux-64:
- conda: https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-3_kmp_llvm.conda
+ - conda: https://conda.anaconda.org/conda-forge/noarch/annotated-types-0.7.0-pyhd8ed1ab_1.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/asttokens-3.0.0-pyhd8ed1ab_1.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/attrs-25.3.0-pyh71513ae_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/babel-2.17.0-pyhd8ed1ab_0.conda
@@ -1363,6 +1404,8 @@ environments:
- conda: https://conda.anaconda.org/conda-forge/noarch/pure_eval-0.2.3-pyhd8ed1ab_1.conda
- conda: https://conda.anaconda.org/conda-forge/linux-64/pycairo-1.28.0-py313h926f637_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/pycparser-2.22-pyh29332c3_1.conda
+ - conda: https://conda.anaconda.org/conda-forge/noarch/pydantic-2.11.7-pyh3cfb1c2_0.conda
+ - conda: https://conda.anaconda.org/conda-forge/linux-64/pydantic-core-2.33.2-py313h4b2b08d_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/pygments-2.19.2-pyhd8ed1ab_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/pymdown-extensions-10.16-pyhd8ed1ab_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.2.3-pyhd8ed1ab_1.conda
@@ -1400,6 +1443,7 @@ environments:
- conda: https://conda.anaconda.org/conda-forge/linux-64/tornado-6.5.1-py313h536fd9c_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/traitlets-5.14.3-pyhd8ed1ab_1.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.14.0-h32cad80_0.conda
+ - conda: https://conda.anaconda.org/conda-forge/noarch/typing-inspection-0.4.1-pyhd8ed1ab_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.14.0-pyhe01879c_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/urllib3-2.5.0-pyhd8ed1ab_0.conda
@@ -1427,6 +1471,7 @@ environments:
- pypi: https://files.pythonhosted.org/packages/eb/d8/0d1d2e9d3fabcf5d6840362adcf05f8cf3cd06a73358140c3a97189238ae/wcmatch-10.1-py3-none-any.whl
- pypi: ./
osx-64:
+ - conda: https://conda.anaconda.org/conda-forge/noarch/annotated-types-0.7.0-pyhd8ed1ab_1.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/appnope-0.1.4-pyhd8ed1ab_1.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/asttokens-3.0.0-pyhd8ed1ab_1.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/attrs-25.3.0-pyh71513ae_0.conda
@@ -1592,6 +1637,8 @@ environments:
- conda: https://conda.anaconda.org/conda-forge/noarch/pure_eval-0.2.3-pyhd8ed1ab_1.conda
- conda: https://conda.anaconda.org/conda-forge/osx-64/pycairo-1.28.0-py313hdb0f19b_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/pycparser-2.22-pyh29332c3_1.conda
+ - conda: https://conda.anaconda.org/conda-forge/noarch/pydantic-2.11.7-pyh3cfb1c2_0.conda
+ - conda: https://conda.anaconda.org/conda-forge/osx-64/pydantic-core-2.33.2-py313hb35714d_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/pygments-2.19.2-pyhd8ed1ab_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/pymdown-extensions-10.16-pyhd8ed1ab_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.2.3-pyhd8ed1ab_1.conda
@@ -1629,6 +1676,7 @@ environments:
- conda: https://conda.anaconda.org/conda-forge/osx-64/tornado-6.5.1-py313h63b0ddb_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/traitlets-5.14.3-pyhd8ed1ab_1.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.14.0-h32cad80_0.conda
+ - conda: https://conda.anaconda.org/conda-forge/noarch/typing-inspection-0.4.1-pyhd8ed1ab_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.14.0-pyhe01879c_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/urllib3-2.5.0-pyhd8ed1ab_0.conda
@@ -1651,6 +1699,7 @@ environments:
- pypi: https://files.pythonhosted.org/packages/eb/d8/0d1d2e9d3fabcf5d6840362adcf05f8cf3cd06a73358140c3a97189238ae/wcmatch-10.1-py3-none-any.whl
- pypi: ./
osx-arm64:
+ - conda: https://conda.anaconda.org/conda-forge/noarch/annotated-types-0.7.0-pyhd8ed1ab_1.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/appnope-0.1.4-pyhd8ed1ab_1.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/asttokens-3.0.0-pyhd8ed1ab_1.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/attrs-25.3.0-pyh71513ae_0.conda
@@ -1818,6 +1867,8 @@ environments:
- conda: https://conda.anaconda.org/conda-forge/noarch/pure_eval-0.2.3-pyhd8ed1ab_1.conda
- conda: https://conda.anaconda.org/conda-forge/osx-arm64/pycairo-1.28.0-py313h4109515_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/pycparser-2.22-pyh29332c3_1.conda
+ - conda: https://conda.anaconda.org/conda-forge/noarch/pydantic-2.11.7-pyh3cfb1c2_0.conda
+ - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pydantic-core-2.33.2-py313hf3ab51e_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/pygments-2.19.2-pyhd8ed1ab_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/pymdown-extensions-10.16-pyhd8ed1ab_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.2.3-pyhd8ed1ab_1.conda
@@ -1855,6 +1906,7 @@ environments:
- conda: https://conda.anaconda.org/conda-forge/osx-arm64/tornado-6.5.1-py313h90d716c_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/traitlets-5.14.3-pyhd8ed1ab_1.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.14.0-h32cad80_0.conda
+ - conda: https://conda.anaconda.org/conda-forge/noarch/typing-inspection-0.4.1-pyhd8ed1ab_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.14.0-pyhe01879c_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/urllib3-2.5.0-pyhd8ed1ab_0.conda
@@ -1877,6 +1929,7 @@ environments:
- pypi: https://files.pythonhosted.org/packages/eb/d8/0d1d2e9d3fabcf5d6840362adcf05f8cf3cd06a73358140c3a97189238ae/wcmatch-10.1-py3-none-any.whl
- pypi: ./
win-64:
+ - conda: https://conda.anaconda.org/conda-forge/noarch/annotated-types-0.7.0-pyhd8ed1ab_1.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/asttokens-3.0.0-pyhd8ed1ab_1.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/attrs-25.3.0-pyh71513ae_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/babel-2.17.0-pyhd8ed1ab_0.conda
@@ -2037,6 +2090,8 @@ environments:
- conda: https://conda.anaconda.org/conda-forge/noarch/pure_eval-0.2.3-pyhd8ed1ab_1.conda
- conda: https://conda.anaconda.org/conda-forge/win-64/pycairo-1.28.0-py313h4cbbc28_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/pycparser-2.22-pyh29332c3_1.conda
+ - conda: https://conda.anaconda.org/conda-forge/noarch/pydantic-2.11.7-pyh3cfb1c2_0.conda
+ - conda: https://conda.anaconda.org/conda-forge/win-64/pydantic-core-2.33.2-py313ha8a9a3c_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/pygments-2.19.2-pyhd8ed1ab_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/pymdown-extensions-10.16-pyhd8ed1ab_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.2.3-pyhd8ed1ab_1.conda
@@ -2075,6 +2130,7 @@ environments:
- conda: https://conda.anaconda.org/conda-forge/win-64/tornado-6.5.1-py313ha7868ed_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/traitlets-5.14.3-pyhd8ed1ab_1.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.14.0-h32cad80_0.conda
+ - conda: https://conda.anaconda.org/conda-forge/noarch/typing-inspection-0.4.1-pyhd8ed1ab_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.14.0-pyhe01879c_0.conda
- conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda
- conda: https://conda.anaconda.org/conda-forge/win-64/ucrt-10.0.22621.0-h57928b3_1.conda
@@ -2146,6 +2202,18 @@ packages:
purls: []
size: 8191
timestamp: 1744137672556
+- conda: https://conda.anaconda.org/conda-forge/noarch/annotated-types-0.7.0-pyhd8ed1ab_1.conda
+ sha256: e0ea1ba78fbb64f17062601edda82097fcf815012cf52bb704150a2668110d48
+ md5: 2934f256a8acfe48f6ebb4fce6cde29c
+ depends:
+ - python >=3.9
+ - typing-extensions >=4.0.0
+ license: MIT
+ license_family: MIT
+ purls:
+ - pkg:pypi/annotated-types?source=hash-mapping
+ size: 18074
+ timestamp: 1733247158254
- conda: https://conda.anaconda.org/conda-forge/noarch/appnope-0.1.4-pyhd8ed1ab_1.conda
sha256: 8f032b140ea4159806e4969a68b4a3c0a7cab1ad936eb958a2b5ffe5335e19bf
md5: 54898d0f524c9dee622d44bbb081a8ab
@@ -3928,8 +3996,8 @@ packages:
timestamp: 1745436578592
- pypi: ./
name: gypsum-dl
- version: 1.0.1.dev4+g441bece
- sha256: 4942dec03a78254da84781c5236ca91e82023ce806a26b87f4aec53b3f04b492
+ version: 1.0.1.dev16+g1107099.d20250712013936
+ sha256: d909faa3498b773d58dbf024524f9b0ec8d3efbed308d0aa300010775b3aa6c0
requires_dist:
- loguru>=0.7.2,<1
- numpy>=2.0.0,<3
@@ -3938,6 +4006,8 @@ packages:
- mpi4py>=4.0.1,<5
- molvs>=0.1.1,<1
- dimorphite-dl>=2.0.1,<3
+ - pydantic>=2.0.0,<3
+ - pyyaml>=6.0.0,<7
requires_python: '>=3.10'
editable: true
- conda: https://conda.anaconda.org/conda-forge/noarch/h2-4.2.0-pyhd8ed1ab_0.conda
@@ -9758,6 +9828,160 @@ packages:
purls: []
size: 110100
timestamp: 1733195786147
+- conda: https://conda.anaconda.org/conda-forge/noarch/pydantic-2.11.7-pyh3cfb1c2_0.conda
+ sha256: ee7823e8bc227f804307169870905ce062531d36c1dcf3d431acd65c6e0bd674
+ md5: 1b337e3d378cde62889bb735c024b7a2
+ depends:
+ - annotated-types >=0.6.0
+ - pydantic-core 2.33.2
+ - python >=3.9
+ - typing-extensions >=4.6.1
+ - typing-inspection >=0.4.0
+ - typing_extensions >=4.12.2
+ license: MIT
+ license_family: MIT
+ purls:
+ - pkg:pypi/pydantic?source=compressed-mapping
+ size: 307333
+ timestamp: 1749927245525
+- conda: https://conda.anaconda.org/conda-forge/linux-64/pydantic-core-2.33.2-py312h680f630_0.conda
+ sha256: 4d14d7634c8f351ff1e63d733f6bb15cba9a0ec77e468b0de9102014a4ddc103
+ md5: cfbd96e5a0182dfb4110fc42dda63e57
+ depends:
+ - python
+ - typing-extensions >=4.6.0,!=4.7.0
+ - __glibc >=2.17,<3.0.a0
+ - libgcc >=13
+ - python_abi 3.12.* *_cp312
+ constrains:
+ - __glibc >=2.17
+ license: MIT
+ license_family: MIT
+ purls:
+ - pkg:pypi/pydantic-core?source=hash-mapping
+ size: 1890081
+ timestamp: 1746625309715
+- conda: https://conda.anaconda.org/conda-forge/linux-64/pydantic-core-2.33.2-py313h4b2b08d_0.conda
+ sha256: 754e3739e4b2a8856573e75829a1cccc0d16ee59dbee6ad594a70728a90e2854
+ md5: 04b21004fe9316e29c92aa3accd528e5
+ depends:
+ - python
+ - typing-extensions >=4.6.0,!=4.7.0
+ - libgcc >=13
+ - __glibc >=2.17,<3.0.a0
+ - python_abi 3.13.* *_cp313
+ constrains:
+ - __glibc >=2.17
+ license: MIT
+ license_family: MIT
+ purls:
+ - pkg:pypi/pydantic-core?source=hash-mapping
+ size: 1894157
+ timestamp: 1746625309269
+- conda: https://conda.anaconda.org/conda-forge/osx-64/pydantic-core-2.33.2-py312haba3716_0.conda
+ sha256: 2bd1ff91077790b93141f6a718840626c6fe12eddd6de8441da6d211aa74999a
+ md5: ef5b500de254557bd376a64ef2d76c9a
+ depends:
+ - python
+ - typing-extensions >=4.6.0,!=4.7.0
+ - __osx >=10.13
+ - python_abi 3.12.* *_cp312
+ constrains:
+ - __osx >=10.13
+ license: MIT
+ license_family: MIT
+ purls:
+ - pkg:pypi/pydantic-core?source=hash-mapping
+ size: 1861583
+ timestamp: 1746625308090
+- conda: https://conda.anaconda.org/conda-forge/osx-64/pydantic-core-2.33.2-py313hb35714d_0.conda
+ sha256: 84b5d39c74f8578722b0fc40b6c0a862cff590549ff74abfe88210f98526fa62
+ md5: d005389707c7f9ccc4f86933b4649708
+ depends:
+ - python
+ - typing-extensions >=4.6.0,!=4.7.0
+ - __osx >=10.13
+ - python_abi 3.13.* *_cp313
+ constrains:
+ - __osx >=10.13
+ license: MIT
+ license_family: MIT
+ purls:
+ - pkg:pypi/pydantic-core?source=hash-mapping
+ size: 1867059
+ timestamp: 1746625317183
+- conda: https://conda.anaconda.org/conda-forge/osx-arm64/pydantic-core-2.33.2-py312hd3c0895_0.conda
+ sha256: 4e583aab0854a3a9c88e3e5c55348f568a1fddce43952a74892e490537327522
+ md5: affb6b478c21735be55304d47bfe1c63
+ depends:
+ - python
+ - typing-extensions >=4.6.0,!=4.7.0
+ - python 3.12.* *_cpython
+ - __osx >=11.0
+ - python_abi 3.12.* *_cp312
+ constrains:
+ - __osx >=11.0
+ license: MIT
+ license_family: MIT
+ purls:
+ - pkg:pypi/pydantic-core?source=hash-mapping
+ size: 1715338
+ timestamp: 1746625327204
+- conda: https://conda.anaconda.org/conda-forge/osx-arm64/pydantic-core-2.33.2-py313hf3ab51e_0.conda
+ sha256: a70d31e04b81df4c98821668d87089279284d2dbcc70413f791eaa60b28f42fd
+ md5: 0d5685f410c4234af909cde6fac63cb0
+ depends:
+ - python
+ - typing-extensions >=4.6.0,!=4.7.0
+ - python 3.13.* *_cp313
+ - __osx >=11.0
+ - python_abi 3.13.* *_cp313
+ constrains:
+ - __osx >=11.0
+ license: MIT
+ license_family: MIT
+ purls:
+ - pkg:pypi/pydantic-core?source=hash-mapping
+ size: 1720344
+ timestamp: 1746625313921
+- conda: https://conda.anaconda.org/conda-forge/win-64/pydantic-core-2.33.2-py312h8422cdd_0.conda
+ sha256: f377214abd06f1870011a6068b10c9e23dc62065d4c2de13b2f0a6014636e0ae
+ md5: c61e3f191da309117e0b0478b49f6e91
+ depends:
+ - python
+ - typing-extensions >=4.6.0,!=4.7.0
+ - vc >=14.2,<15
+ - vc14_runtime >=14.29.30139
+ - ucrt >=10.0.20348.0
+ - vc >=14.2,<15
+ - vc14_runtime >=14.29.30139
+ - ucrt >=10.0.20348.0
+ - python_abi 3.12.* *_cp312
+ license: MIT
+ license_family: MIT
+ purls:
+ - pkg:pypi/pydantic-core?source=hash-mapping
+ size: 1900306
+ timestamp: 1746625389678
+- conda: https://conda.anaconda.org/conda-forge/win-64/pydantic-core-2.33.2-py313ha8a9a3c_0.conda
+ sha256: 14dc654f3bb8e5a489da6632cf91b421a32e0d1c521d4f0b64a6910ae51d5c8f
+ md5: b3a8def3a1d2e94644e2a9c0b8717f4a
+ depends:
+ - python
+ - typing-extensions >=4.6.0,!=4.7.0
+ - vc >=14.2,<15
+ - vc14_runtime >=14.29.30139
+ - ucrt >=10.0.20348.0
+ - vc >=14.2,<15
+ - vc14_runtime >=14.29.30139
+ - ucrt >=10.0.20348.0
+ - python_abi 3.13.* *_cp313
+ license: MIT
+ license_family: MIT
+ purls:
+ - pkg:pypi/pydantic-core?source=hash-mapping
+ size: 1905166
+ timestamp: 1746625395940
- conda: https://conda.anaconda.org/conda-forge/noarch/pygments-2.19.2-pyhd8ed1ab_0.conda
sha256: 5577623b9f6685ece2697c6eb7511b4c9ac5fb607c9babc2646c811b428fd46a
md5: 6b6ece66ebcae2d5f326c77ef2c5a066
@@ -10164,6 +10388,46 @@ packages:
- pkg:pypi/pywin32-ctypes?source=hash-mapping
size: 57449
timestamp: 1727282288065
+- pypi: https://files.pythonhosted.org/packages/04/24/b7721e4845c2f162d26f50521b825fb061bc0a5afcf9a386840f23ea19fa/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
+ name: pyyaml
+ version: 6.0.2
+ sha256: 70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5
+ requires_python: '>=3.8'
+- pypi: https://files.pythonhosted.org/packages/0c/e8/4f648c598b17c3d06e8753d7d13d57542b30d56e6c2dedf9c331ae56312e/PyYAML-6.0.2-cp312-cp312-win_amd64.whl
+ name: pyyaml
+ version: 6.0.2
+ sha256: 7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8
+ requires_python: '>=3.8'
+- pypi: https://files.pythonhosted.org/packages/45/9f/3b1c20a0b7a3200524eb0076cc027a970d320bd3a6592873c85c92a08731/PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl
+ name: pyyaml
+ version: 6.0.2
+ sha256: 50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1
+ requires_python: '>=3.8'
+- pypi: https://files.pythonhosted.org/packages/86/0c/c581167fc46d6d6d7ddcfb8c843a4de25bdd27e4466938109ca68492292c/PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl
+ name: pyyaml
+ version: 6.0.2
+ sha256: c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab
+ requires_python: '>=3.8'
+- pypi: https://files.pythonhosted.org/packages/a8/0c/38374f5bb272c051e2a69281d71cba6fdb983413e6758b84482905e29a5d/PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl
+ name: pyyaml
+ version: 6.0.2
+ sha256: ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725
+ requires_python: '>=3.8'
+- pypi: https://files.pythonhosted.org/packages/b9/2b/614b4752f2e127db5cc206abc23a8c19678e92b23c3db30fc86ab731d3bd/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
+ name: pyyaml
+ version: 6.0.2
+ sha256: 80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476
+ requires_python: '>=3.8'
+- pypi: https://files.pythonhosted.org/packages/ef/e3/3af305b830494fa85d95f6d95ef7fa73f2ee1cc8ef5b495c7c3269fb835f/PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl
+ name: pyyaml
+ version: 6.0.2
+ sha256: efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba
+ requires_python: '>=3.8'
+- pypi: https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl
+ name: pyyaml
+ version: 6.0.2
+ sha256: 8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563
+ requires_python: '>=3.8'
- conda: https://conda.anaconda.org/conda-forge/linux-64/pyyaml-6.0.2-py313h8060acc_2.conda
sha256: 6826217690cfe92d6d49cdeedb6d63ab32f51107105d6a459d30052a467037a0
md5: 50992ba61a8a1f8c2d346168ae1c86df
@@ -11719,6 +11983,18 @@ packages:
purls: []
size: 90310
timestamp: 1748959427551
+- conda: https://conda.anaconda.org/conda-forge/noarch/typing-inspection-0.4.1-pyhd8ed1ab_0.conda
+ sha256: 4259a7502aea516c762ca8f3b8291b0d4114e094bdb3baae3171ccc0900e722f
+ md5: e0c3cd765dc15751ee2f0b03cd015712
+ depends:
+ - python >=3.9
+ - typing_extensions >=4.12.0
+ license: MIT
+ license_family: MIT
+ purls:
+ - pkg:pypi/typing-inspection?source=compressed-mapping
+ size: 18809
+ timestamp: 1747870776989
- conda: https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.14.0-pyhe01879c_0.conda
sha256: 8561db52f278c5716b436da6d4ee5521712a49e8f3c70fcae5350f5ebb4be41c
md5: 2adcd9bb86f656d3d43bf84af59a1faf
diff --git a/pixi.toml b/pixi.toml
index b8ce569..97bf57e 100644
--- a/pixi.toml
+++ b/pixi.toml
@@ -7,18 +7,34 @@ platforms = ["linux-64", "osx-64", "win-64", "osx-arm64"]
[pypi-dependencies]
gypsum_dl = { path = ".", editable = true }
+<<<<<<< Updated upstream
molvs = ">=0.1.1, <1"
dimorphite-dl = ">=2.0.1, <3"
+pyyaml = ">=6.0.2, <7"
+=======
+molvs = ">=0.1.1, <0.2"
+dimorphite-dl = ">=2.0.2, <3"
+>>>>>>> Stashed changes
[tasks]
[dependencies]
-python = ">=3.10,<3.14"
+<<<<<<< Updated upstream
+python = ">=3.10,<3.15"
loguru = ">=0.7.2,<1"
numpy = ">=2.0.0,<3"
scipy = ">=1.15.1,<2"
rdkit = ">=2024.9.4,<2027"
mpi4py = ">=4.0.1,<5"
+pydantic = ">=2.11.7,<3"
+=======
+python = ">=3.13.9,<3.15"
+loguru = ">=0.7.3,<0.8"
+numpy = ">=2.3.4,<3"
+scipy = ">=1.16.3,<2"
+rdkit = ">=2025.9.1,<2026"
+mpi4py = ">=4.1.1,<5"
+>>>>>>> Stashed changes
[environments]
dev = ["dev"]
@@ -29,7 +45,7 @@ docs = ["docs"]
ruff = ">=0.7.2,<1"
isort = ">=5.13.2,<6"
mypy = ">=1.13.0,<2"
-twine = ">=5.1.1,<6"
+twine = ">=6.2,<7"
pytest = ">=8.3.3,<9"
pytest-cov = ">=6.0.0,<7"
coverage = ">=7.6.4,<8"
@@ -40,7 +56,7 @@ scalene = ">=1.5.41,<2"
basedpyright = ">=1.29.1,<2"
[feature.dev.tasks]
-mdlint = { cmd = ["markdownlint-cli2", '"**/*.{md,markdown}"', "--fix", "--config", ".markdownlint.yaml", "||", "true"] }
+mdlint = { cmd = ["markdownlint-cli2", '"docs/*.{md,markdown}"', "--fix", "--config", ".markdownlint.yaml", "||", "true"] }
isort = { cmd = ["isort", "--settings-path", ".isort.cfg", "./gypsum_dl", "./tests", "||", "true"] }
ruff = { cmd = ["ruff", "format", "--config", ".ruff.toml", "./gypsum_dl", "./tests", "||", "true"] }
format = { depends-on = ["mdlint", "isort", "ruff"] }
diff --git a/pyproject.toml b/pyproject.toml
index 1d38b0f..c310c8f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,6 +7,18 @@ authors = [
]
readme = "README.md"
requires-python = ">=3.10"
+classifiers = [
+ "Development Status :: 3 - Alpha",
+ "Intended Audience :: Science/Research",
+ "Natural Language :: English",
+ "Programming Language :: Python",
+ "Programming Language :: Python :: 3",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ "Programming Language :: Python :: 3.12",
+ "Programming Language :: Python :: 3.13",
+ "Topic :: Scientific/Engineering :: Chemistry",
+]
# TODO: Keep this here until pixi releases building capabilities
dependencies = [
@@ -17,8 +29,16 @@ dependencies = [
"mpi4py>=4.0.1,<5",
"molvs>=0.1.1,<1",
"dimorphite-dl>=2.0.1,<3",
+ "pydantic>=2.0.0,<3",
+ "pyyaml>=6.0.0,<7",
]
+[project.urls]
+Repository = "https://github.com/durrantlab/gypsum_dl"
+Issues = "https://github.com/durrantlab/gypsum_dl/issues"
+Changelog = "https://github.com/durrantlab/gypsum_dl/blob/main/CHANGELOG.md"
+
+
[project.scripts]
gypsum-dl = "gypsum_dl.run:main"
diff --git a/tests/test_sample.py b/tests/test_sample.py
index 59f1982..266242e 100644
--- a/tests/test_sample.py
+++ b/tests/test_sample.py
@@ -7,7 +7,6 @@
import os
import shutil
-from gypsum_dl import utils
from gypsum_dl.start import prepare_molecules
@@ -39,19 +38,11 @@ def test_samples(test_dir):
# Prepare the molecules.
prepare_molecules(params)
- utils.log("")
- utils.log("TEST RESULTS")
- utils.log("============")
# Get the output sdf files.
sdf_files = glob.glob(f"{output_folder}/*")
- # There should be seven sdf files.
- msg = f"Expected 15 output files, got {len(sdf_files)}."
- if len(sdf_files) != 15:
- utils.exception(f"FAILED. {msg}")
- else:
- utils.log(f"PASSED. {msg}")
+ assert len(sdf_files) == 15, f"Expected 15 output files, got {len(sdf_files)}."
# Get all the smiles from the files.
all_smiles = set([])
@@ -140,13 +131,6 @@ def test_samples(test_dir):
# Azides can have adjacent +/- nitrogens.
target_smiles |= {"CN=[N+]=[N-]", "CN=[N+]=N"}
- # msg = "Expected " + str(len(target_smiles)) + " total SMILES, got " + \
- # str(len(all_smiles)) + "."
- # if len(all_smiles) != len(target_smiles):
- # utils.exception("FAILED. " + msg)
- # else:
- # utils.log("PASSED. " + msg)
-
# Python3 gives some smiles that are different than thsoe obtain with
# Python2. But they are just different representations of the same thing.
# Let's make the switch to the Python2 form for this test.
diff --git a/ty.toml b/ty.toml
new file mode 100644
index 0000000..286a66a
--- /dev/null
+++ b/ty.toml
@@ -0,0 +1,2 @@
+[environment]
+python = ".pixi/envs/dev"
\ No newline at end of file