Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions scripts/batch_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def to_number(value):
result[k] = value
return result

def prepare_one(pdbid, data_dir=None, input_dir=None, force=False, remove_ligands=False, implicit_solvent=False):
def prepare_one(box_size, pdbid, data_dir=None, input_dir=None, force=False, remove_ligands=False, implicit_solvent=False):
if data_dir:
function.set_data_dir(data_dir)

Expand Down Expand Up @@ -91,7 +91,7 @@ def prepare_one(pdbid, data_dir=None, input_dir=None, force=False, remove_ligand
prepare_input = os.path.join(input_dir, pdbid + ".pdb")
else:
prepare_input = pdbid
preprocess.prepare_protein(prepare_input, remove_ligands=remove_ligands, implicit_solvent=implicit_solvent)
preprocess.prepare_protein(box_size, prepare_input, remove_ligands=remove_ligands, implicit_solvent=implicit_solvent)
except Exception as e:
ok = False
traceback.print_tb(e.__traceback__)
Expand All @@ -114,9 +114,8 @@ def prepare_one(pdbid, data_dir=None, input_dir=None, force=False, remove_ligand

return ok

def simulate_one(pdbid, data_dir=None, input_dir=None, steps=10000, report_steps=1, prepare=False, remove_ligands=False,
def simulate_one(box_size, pdbid, data_dir=None, input_dir=None, steps=10000, report_steps=1, prepare=False, remove_ligands=False,
prepare_implicit=False, force=False, timeout=None, integrator_params=None):
# print("simulate_one:", pdbid, data_dir, steps, report_steps, prepare, force, timeout)
interrupt_callback = None
if timeout:
interrupt_callback = lambda timeout=timeout : timeout > time.time()
Expand All @@ -129,7 +128,7 @@ def simulate_one(pdbid, data_dir=None, input_dir=None, steps=10000, report_steps

if prepare:
#TODO: Split force prepare / force simulate into separate flags?
if not prepare_one(pdbid, data_dir, input_dir, force, remove_ligands=remove_ligands, implicit_solvent=prepare_implicit):
if not prepare_one(box_size, pdbid, data_dir, input_dir, force, remove_ligands=remove_ligands, implicit_solvent=prepare_implicit):
return

finished_file_path = function.get_data_path(f'{pdbid}/simulation/finished.txt')
Expand Down Expand Up @@ -221,6 +220,9 @@ def main():
parser.add_argument("--input-dir", default=None, type=str, help="Input data directory, if set PDB files will be copied from here instead of download from RCSB")
parser.add_argument("--gpus", default=None, type=str, help="A comma delimited lists of GPUs to use e.g. '0,1,2,3'")
parser.add_argument("--timeout", default=None, type=float, help="The maximum time to run in hours (e.g. 0.5 = 30 minutes)")
parser.add_argument("--box-size", default=None, type=float, nargs="?", const=0.0,
help="""Set an explicit box size, or use the box size by not providing a value
listed in the pdb file (if no box size listed in pdb file or flag not used, uses openmm)""")

args = parser.parse_args()
print(args)
Expand Down Expand Up @@ -293,7 +295,7 @@ def main():
"force":args.force, "timeout":timeout,
"integrator_params":integrator_params}
pending_results += [pool.apply_async(simulate_one,
(pdbid,), kwargs_dict)]
(args.box_size, pdbid,), kwargs_dict)]

while pending_results:
pending_results = [i for i in pending_results if not i.ready()]
Expand Down
20 changes: 15 additions & 5 deletions scripts/module/preprocess.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import openmm as mm
import openmm.app as app
from openmm import unit
from openmm import Vec3
import pdbfixer
import requests
import json
Expand All @@ -9,19 +10,18 @@
from module import ligands
from module import function


def prepare_protein(pdbid=str, remove_ligands=False, implicit_solvent=False):
def prepare_protein(box_size=None, pdbid=str, remove_ligands=False, implicit_solvent=False):
"""
Preprocesses a protein by downloading the PDB file, fixing missing residues and atoms,
adding missing hydrogens, adding solvent, and writing the processed PDB file.

Args:
pdbid (str): The PDB ID of the protein.
box_size(float): If 0.0 adds water solvent with padding = 1 Angstroms. If negative, attempts to match box size, defaulting back to padding = 1 Angstroms. If > 0, uses that value as a box size.

Comment on lines +20 to 21
Copy link

Copilot AI Apr 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] Clarify the expected units and behavior of the box_size parameter in the docstring. For example, specify that a positive value is expected in Angstroms (which is converted to nanometers inside the function) and that negative values fall back to using a padding of 1 nm.

Suggested change
box_size(float): If 0.0 adds water solvent with padding = 1 Angstroms. If negative, attempts to match box size, defaulting back to padding = 1 Angstroms. If > 0, uses that value as a box size.
box_size (float, optional): The size of the simulation box. Positive values are interpreted as the box size in Angstroms
and are converted to nanometers internally. A value of 0.0 adds water solvent with a padding of 1 nm. Negative values
fall back to using a default padding of 1 nm. Defaults to None.

Copilot uses AI. Check for mistakes.
Returns:
None
"""

local_input_pdb_path = None
if pdbid.endswith(".pdb"):
local_input_pdb_path = pdbid
Expand Down Expand Up @@ -124,7 +124,17 @@ def prepare_protein(pdbid=str, remove_ligands=False, implicit_solvent=False):

# Add the water molecules if we're using explicit solvent
if not implicit_solvent:
modeller.addSolvent(forcefield, padding=1.0 * unit.nanometers, ionicStrength=0.15 * unit.molar)
# Attmpt to match box size
Copy link

Copilot AI Apr 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct the spelling error in the comment ('Attmpt' should be 'Attempt').

Suggested change
# Attmpt to match box size
# Attempt to match box size

Copilot uses AI. Check for mistakes.
if box_size == 0.0 and fixer.topology.getUnitCellDimensions():
modeller.addSolvent(forcefield, boxSize=fixer.topology.getUnitCellDimensions() * unit.nanometers, ionicStrength=0.15 * unit.molar)
# Explicit box size
elif box_size > 0.0:
# convert box size from angstroms to nm
box_size /= 10
modeller.addSolvent(forcefield, boxSize=Vec3(box_size,box_size,box_size)*unit.nanometers, ionicStrength=0.15 * unit.molar)
# Adds water with padding = 1 and lets openmm decide appropriate box size
else:
modeller.addSolvent(forcefield, padding=1.0 * unit.nanometers, ionicStrength=0.15 * unit.molar)

# write the processed pdb file & ligand templates
top = modeller.getTopology()
Expand All @@ -139,4 +149,4 @@ def prepare_protein(pdbid=str, remove_ligands=False, implicit_solvent=False):

top_bonds = [len([*i.bonds()]) for i in top.residues() if i.name == 'UNK']
pdb_bonds = [len([*i.bonds()]) for i in pdb.topology.residues() if i.name == 'UNK']
assert pdb_bonds == top_bonds
assert pdb_bonds == top_bonds