diff --git a/presto/__init__.py b/presto/__init__.py index 9957951..2b646e8 100644 --- a/presto/__init__.py +++ b/presto/__init__.py @@ -1,4 +1,4 @@ -from presto.build import build +import presto.build import presto.config import presto.constants import presto.calculators diff --git a/presto/build.py b/presto/build.py index 8ccc32f..2daeccc 100644 --- a/presto/build.py +++ b/presto/build.py @@ -1,4 +1,4 @@ -import configparser, os, re, pathlib, yaml, cctk, h5py, logging +import os, re, yaml, cctk, h5py, logging import numpy as np import presto diff --git a/presto/config.py b/presto/config.py index 158e7fa..22278bd 100644 --- a/presto/config.py +++ b/presto/config.py @@ -1,5 +1,4 @@ -import configparser, os, re, pathlib, yaml, cctk, h5py, logging, shutil -import numpy as np +import configparser, os, re, pathlib, logging import presto logger = logging.getLogger(__name__) @@ -86,4 +85,4 @@ def check_exec(executable): # backwards-compatibility with v0.2.4 and before def build(*args, **kwargs): - return presto.build(*args, **kwargs) + return presto.build.build(*args, **kwargs) diff --git a/presto/config.py.rej b/presto/config.py.rej new file mode 100644 index 0000000..c593a39 --- /dev/null +++ b/presto/config.py.rej @@ -0,0 +1,30 @@ +diff a/presto/config.py b/presto/config.py (rejected hunks) +@@ -237,23 +237,18 @@ def build_bath_scheduler(settings): + + delta = settings["start_temp"] - settings["target_temp"] + +- def sched(time): +- if time > settings["target_time"]: +- return settings["target_temp"] +- else: +- return settings["target_temp"] + delta * (1 - time / settings["target_time"]) ++ if time > settings["target_time"]: ++ return settings["target_temp"] ++ else: ++ return settings["target_temp"] + delta * (1 - time / settings["target_time"]) + +- return sched + + elif settings["type"].lower() == "constant": + assert "target_temp" in settings, "Need `target_temp` for this bath scheduler!" + assert isinstance(settings["target_temp"], (float, int)), "`target_temp` must be numeric!" + assert settings["target_temp"] > 0, "`target_temp` must be positive!" + +- def sched(time): +- return settings["target_temp"] +- +- return sched ++ return settings["target_temp"] + + else: + raise ValueError(f"Unknown bath scheduler type {settings['type']}! Allowed options are `linear` or `constant`.") diff --git a/presto/replica_exchange.py b/presto/replica_exchange.py index aa392f4..a942e94 100644 --- a/presto/replica_exchange.py +++ b/presto/replica_exchange.py @@ -1,13 +1,25 @@ -import numpy as np -import math, copy, cctk, os, re, logging, time, yaml, random -import multiprocessing as mp +# presto/presto/replica_exchange.py +# Jul 2021 +import numpy as np +import logging +import random +import sys +import subprocess import presto +import dill +import re +import os +import yaml +import multiprocessing as mp logger = logging.getLogger(__name__) MIN_CHECKPOINT_INTERVAL = 50 +class MissingSbatchFlags(Exception): + pass + class ReplicaExchange(): """ Runs several trajectories and manages interconversions between them. @@ -23,15 +35,15 @@ class ReplicaExchange(): finished (bool): current_idx (int): """ - def __init__(self, trajectories, checkpoint_filename="remd.chk", swap_interval=10): - temps = np.zeros(shape=len(trajectories)) - for idx, traj in enumerate(trajectories): - assert isinstance(traj, presto.trajectory.Trajectory), "all trajectories must be EquilibrationTrajectories" + def __init__(self, trajectories, checkpoint_filename="remd.chk", swap_interval=100): + for traj in trajectories: + assert isinstance(traj, presto.trajectory.EquilibrationTrajectory), "all trajectories must be EquilibrationTrajectories" assert traj.timestep == trajectories[0].timestep, "all trajectories must have same ``timestep``" assert traj.stop_time == trajectories[0].stop_time, "all trajectories must have same ``stop_time``" assert np.array_equal(traj.high_atoms, trajectories[0].high_atoms), "all trajectories must have same ``high_atoms``" assert np.array_equal(traj.active_atoms, trajectories[0].active_atoms), "all trajectories must have same ``active_atoms``" + # sort by temp self.trajectories = sorted(trajectories, key=lambda x: x.bath_scheduler(0)) self.stop_time = trajectories[0].stop_time @@ -45,13 +57,15 @@ def __init__(self, trajectories, checkpoint_filename="remd.chk", swap_interval=1 self.finished = False self.current_idx = 0 - self.load() + # prevent parallel class from loading + if not isinstance(self, ReplicaExchangeParallel): + self.load() def __str__(self): - return f"ReplicaExchange({len(trajectories)} trajectories, swap_interval={swap_interval} fs, checkpoint_file={checkpoint_filename})" + return f"ReplicaExchange({len(self.trajectories)} trajectories, swap_interval={self.swap_interval} fs, checkpoint_file={self.checkpoint_filename})" def __repr__(self): - return f"ReplicaExchange({len(trajectories)} trajectories, swap_interval={swap_interval} fs, checkpoint_file={checkpoint_filename})" + return f"ReplicaExchange({len(self.trajectories)} trajectories, swap_interval={self.swap_interval} fs, checkpoint_file={self.checkpoint_filename})" def save(self): file_dict = { @@ -86,12 +100,16 @@ def run(self): processes = [None]*len(self.trajectories) start_idx = self.current_idx - # we break the runs up into small chunks + # we break the runs up into small chunks of size swap_interval + # each trajectory is basically run in parallel for time swap_interval in each iter + # we make as many mp.Process objects as there are trajectories for current_idx in range(start_idx, int(self.stop_time/self.swap_interval)): next_idx = current_idx + 1 target_time = next_idx * self.swap_interval + for idx, traj in enumerate(self.trajectories): # how long does each traj need to run for? + # the last iter may have time_remaining \neq swap_interval time_remaining = max(0, target_time - traj.last_time_run()) if time_remaining: @@ -100,24 +118,30 @@ def run(self): "checkpoint_interval": min(MIN_CHECKPOINT_INTERVAL, self.swap_interval), }) processes[idx].start() - + for process in processes: if process is not None: process.join() + # all the chunks in this iter are done + # run saves frames to chk file, so this ensures that run is done and saved properly for traj in self.trajectories: traj.load_from_checkpoint() assert traj.last_time_run() == target_time - self.exchange(next_idx*self.swap_interval) self.current_idx = next_idx + self.exchange() # TODO: make sure calling exchange without time is ok self.save() self.finished = True return self - def exchange(self, time): + def exchange(self): + """ time is from 0 + """ + time = self.current_idx * self.swap_interval kB = presto.constants.BOLTZMANN_CONSTANT - + + # make one pass from low to high T for i in range(len(self.trajectories)-1): j = i+1 @@ -132,6 +156,7 @@ def exchange(self, time): logger.info(f"E{i} & E{j}\tp={p}") if p > random.random(): + # or momenta scaling v_i_scaling = np.sqrt(T_j/T_i) v_j_scaling = np.sqrt(T_i/T_j) @@ -148,6 +173,7 @@ def exchange(self, time): def report(self): counts = np.zeros(shape=len(self.trajectories)) + # len of counts is no of temps possible = self.current_idx * (len(self.trajectories) - 1) for swap in self.swaps: @@ -159,3 +185,151 @@ def report(self): text += f"\tReplica {i} <=> Replica {i+1} \t{counts[i]/self.current_idx:.2%}\n" return text + +class ReplicaExchangeParallel(ReplicaExchange): + """ + Runs several trajectories and manages interconversions between them. Slurm-parallelized version. + + Based on: + http://www.math.pitt.edu/~cbsg/Materials/Earl_ParallelTempering.pdf + + Attributes: + stop_time (int): + swaps (list of dict): + swap_interval (int): + checkpoint_filename (int): + finished (bool): + current_idx (int): + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def update_trajs(self): + """ + Updates all trajectories from respective chk files on newly unpickled remd object + Not run in the first iteration + Does not modify any files + """ + self.current_idx += 1 + + for traj in self.trajectories: + traj.load_from_checkpoint(frames=slice(-2, None, None)) + # need last two frames to do exchange + # ensure all the trajectories are run up to the same time + assert traj.last_time_run() == self.current_idx * self.swap_interval, "ensure that trajectories all ended at the same time" + + if self.current_idx * self.swap_interval == self.stop_time: + self.finished = True + + def run(self, slurm=True): + """ generates a (slurm) job array for trajectories in each segment and submits it + """ + + # always self.update_trajs before self.run + if self.finished: + logger.info(f"Replica exchange already finished!") + return self + + next_idx = self.current_idx + 1 + target_time = next_idx * self.swap_interval + + if isinstance(self.trajectories[0].calculator, presto.calculators.XTBCalculator): + n_threads = self.trajectories[0].calculator.parallel + elif isinstance(self.trajectories[0].calculator, presto.calculators.GaussianCalculator): + n_threads = int( + self.trajectories[0].calculator.link0["nprocshared"]) + + if slurm: + slurm_script = 'traj_array.sh' + + assert os.path.exists(slurm_script), "The Slurm submit script traj_array.sh is required but not found" + + # dict to ensure that (some) necessary sbatch options are specified + sbatch_flags = {'-J': False, '-c': False, '--array': False} + with open(slurm_script, 'r') as f: + source = f.read().splitlines() # no newline in source + + with open(slurm_script, 'w') as f: + for line in source: + if "job-name" in line or '-J ' in line: + # modify job name appropriately + line = re.sub('array_(.*?)fs', f'array_{target_time}fs', line) + sbatch_flags['-J'] = True + elif "cpus-per-task" in line or '#SBATCH -c' in line: + # check if cpus-per-task is equal to n_threads + sbatch_flags['-c'] = True + thread_match = re.search('(\d+)$', line) + input_threads = int(thread_match.group()) + if input_threads != n_threads: + line = re.sub('(\d+)$', str(n_threads), line) + logger.warning( + "--cpus-per-task in traj_array.sh is not equal to number of processors specified in config file, automatically changing") + elif "--array" in line: + # check if array size is equal to num of trajs + input_trajs = int(re.search('(\d+)$', line).group()) + sbatch_flags['--array'] = True + if input_trajs != len(self.trajectories) - 1: + line = re.sub('(\d+)$', str(len(self.trajectories) - 1), line) + logger.warning( + "--array size in traj_array.sh is not equal to number of trajectories specified when calling remd_par_manager.py, automatically changing") + + f.write(line+'\n') + + for k, v in sbatch_flags.items(): + if not v: + logger.error( + f"did not specify sbatch option {k} in traj_array.sh, exiting now") + sys.exit(1) + + try: + sbatch_msg = subprocess.check_output( + ['sbatch', slurm_script]) + if 'Submitted' not in sbatch_msg.decode('utf-8'): + raise subprocess.CalledProcessError + + except subprocess.CalledProcessError as e: + logger.error("Could not submit slurm job array script") + sys.exit(1) + logger.info("Wrote and submitted traj_array.sh") + return int(sbatch_msg.split()[-1]) # slurm jobid of the + + def exchange(self): + """ time is from 0 + Saves updates trajectories to trajectory chkfiles + """ + super().exchange() + + for traj in self.trajectories: + traj.save() + + def save(self): + """ + Saves ReplicaExchange object to chkfile (pickle), keeping only the last frame of each trajectory + + """ + for traj in self.trajectories: + traj.frames = [traj.frames[-1]] + + with (open(self.checkpoint_filename, "wb")) as f: + dill.dump(self, f, protocol=-1) # HIGHEST_PROTOCOL + + logger.info(f"Saved ReplicaExchange to {self.checkpoint_filename}.") + + @classmethod + def load(cls, checkpoint): + """ Loads ReplicaExchange object from chkfile + + Returns: + ReplicaExchange object + """ + + try: + with (open(checkpoint, "rb")) as f: + remd = dill.load(f) + except FileNotFoundError: + print( + "checkpoint file not found, do not call this script with --spawn on the first instance") + logger.info(f"Loaded ReplicaExchange from {checkpoint}.") + + return remd diff --git a/presto/test/300k.yaml b/presto/test/300k.yaml new file mode 100644 index 0000000..d4ce2ea --- /dev/null +++ b/presto/test/300k.yaml @@ -0,0 +1,24 @@ +type: equilibration + +timestep: 1 +high_atoms: 1-8 +stop_time: 250 + +anchor: 1 + +integrator: + type: langevin + viscosity: 0.0001 + +potential: + type: spherical_harmonic + radius: 8 + +calculator: + type: xtb + gfn : 2 + parallel: 8 + +bath_scheduler: + type: constant + target_temp: 300.00 diff --git a/presto/test/dce.xyz b/presto/test/dce.xyz new file mode 100644 index 0000000..6643077 --- /dev/null +++ b/presto/test/dce.xyz @@ -0,0 +1,10 @@ +8 +C2H4Cl2 +C -1.259345218 0.000000000 -1.676453798 +H -0.376267218 -0.509845000 -2.075709798 +H -2.142423218 -0.509845000 -2.075709798 +C -1.259345218 0.000000000 -0.146465798 +H -2.142423218 0.509845000 0.252790202 +H -0.376267218 0.509845000 0.252790202 +Cl -1.259345218 -1.648164277 0.498867074 +Cl -1.259345218 1.648164277 -2.321786671 diff --git a/presto/test/save_empty_traj.py b/presto/test/save_empty_traj.py new file mode 100644 index 0000000..9add969 --- /dev/null +++ b/presto/test/save_empty_traj.py @@ -0,0 +1,6 @@ +import presto + +traj = presto.config.build("300k.yaml", checkpoint="300k.chk", geometry="dce.xyz") +traj.save() + +traj2 = presto.trajectory.Trajectory.new_from_checkpoint("300k.chk", -1) diff --git a/presto/test/test.sh b/presto/test/test.sh new file mode 100644 index 0000000..11780e2 --- /dev/null +++ b/presto/test/test.sh @@ -0,0 +1,16 @@ +#!/bin/bash +#SBATCH --job-name=test +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=1 +#SBATCH --partition=day +#SBATCH -t 1:00:00 +#SBATCH --mem-per-cpu=6000mb +#SBATCH --mail-type=END,FAIL + +module purge +module load miniconda +source activate presto-d + +python slurm_submit.py --spawn + + diff --git a/presto/trajectory.py b/presto/trajectory.py index a7ae585..fac44f4 100644 --- a/presto/trajectory.py +++ b/presto/trajectory.py @@ -125,7 +125,8 @@ def __init__( self.set_inactive_atoms(kwargs["inactive_atoms"]) else: # assume all atoms are active - self.set_inactive_atoms(np.ndarray([])) + self.set_inactive_atoms(None) + if not hasattr(self, "masses"): self.masses = cctk.OneIndexedArray([float(cctk.helper_functions.draw_isotopologue(z)) for z in atomic_numbers]) @@ -176,14 +177,17 @@ def __repr__(self): def set_inactive_atoms(self, inactive_atoms): """ Since sometimes it's easier to specify the inactive atoms than the inactive atoms, this method updates ``self.active_atoms`` with the complement of ``inactive_atoms``. + + Args: + inactive_atoms (None or np.ndarray) """ - assert isinstance(inactive_atoms, (list, np.ndarray)), "Need list of atoms!" active_atoms = list(range(1, len(self.atomic_numbers)+1)) - if len(inactive_atoms): + if inactive_atoms is not None: + assert isinstance(inactive_atoms, (list, np.ndarray)), "Need list of atoms!" for atom in inactive_atoms: active_atoms.remove(atom) - active_atoms = np.array(active_atoms) - self.active_atoms = active_atoms + + self.active_atoms = np.array(active_atoms) def run(self, keep_all=False, time=None, **kwargs): """ @@ -332,15 +336,7 @@ def load_from_checkpoint(self, frames="all"): all_velocities= h5.get("all_velocities")[frames] all_accels = h5.get("all_accelerations")[frames] temperatures = h5.get("bath_temperatures")[frames] - - # v0.2.2 - provisionally removing this -# all_times = None -# try: all_times = h5.get("all_times")[frames] -# except Exception as e: -# all_times = np.arange(0, self.timestep*len(all_energies)*self.save_interval, self.timestep*self.save_interval) -# # this was added recently, so may be some backwards compatibility issues. -# pass if isinstance(all_energies, np.ndarray): assert len(all_positions) == len(all_energies) @@ -492,8 +488,16 @@ def write_movie(self, filename, solvents="all", idxs=None): """ # what do we make a movie of? - if idxs is not None: - assert isinstance(idxs, list), "idxs must be list of indices or ``None``!" + if idxs: + if isinstance(idxs, str): + if idxs == "high": + idxs = self.high_atoms + elif idxs == "all": + idxs = None + else: + raise ValueError(f"unknown idxs keyword {idxs} -- must be 'high' or 'all'") + else: + raise ValueError(f"unknown idxs keyword {idxs} -- must be 'high' or 'all'") else: if isinstance(solvents, str): if solvents == "high": @@ -509,7 +513,7 @@ def write_movie(self, filename, solvents="all", idxs=None): raise ValueError("``solvents`` must be int, 'high', or 'all'!") ensemble = self.as_ensemble(idxs) - logger.info("Writing trajectory to {filename}") + logger.info(f"Writing trajectory to {filename}") if re.search("pdb$", filename): cctk.PDBFile.write_ensemble_to_trajectory(filename, ensemble) elif re.search("mol2$", filename): @@ -523,10 +527,32 @@ def write_movie(self, filename, solvents="all", idxs=None): def as_ensemble(self): ensemble = cctk.ConformationalEnsemble() - for frame in self.frames[:-1]: + # for frame in self.frames[:-1]: # why is this up to only the second last frame? + for frame in self.frames: ensemble.add_molecule(frame.molecule(idxs), {"bath_temperature": frame.bath_temperature, "energy": frame.energy}) return ensemble + @classmethod + def new_from_checkpoint(cls, checkpoint, frame=slice(None)): + """ + Creates new trajectory from the given checkpoint file. + + Args: + checkpoint (str): path to checkpoint file + frame (int): the index of the desired frame + + Returns: + new ``Trajectory`` object + """ + assert isinstance(frame, slice), "frame needs to be a Slice object" + + new_traj = cls(checkpoint_filename=checkpoint, stop_time=10000, save_interval=1) + # added defaults here to avoid errors when creating new trajectory object + new_traj.load_from_checkpoint(frames=frame) + + #assert len(new_traj.frames) == 1, "got too many frames!" + return new_traj + def initialize_lock(self): """ Create hidden lockfile to accompany ``.chk`` file. diff --git a/scripts/remd_demux.py b/scripts/remd_demux.py new file mode 100644 index 0000000..78893c0 --- /dev/null +++ b/scripts/remd_demux.py @@ -0,0 +1,129 @@ +# presto/scripts/remd_demux.py +# Marcus Sak, Jun 2021 +# Script to demux remd replicas + +import argparse +import sys +import numpy as np +import presto +import logging +import copy +from tqdm import tqdm + +logging.basicConfig(level=logging.INFO, filename=f"demux.log", filemode='a', + format='%(asctime)s %(name)-12s %(message)s', datefmt='%m-%d %H:%M') + +logger = logging.getLogger(__name__) + + +def exchange(array, i, j): + array[[i, j]] = array[[j, i]] + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser( + prog="remd_demux.py", description="Given a REMD run, reconstructs the continuous trajectories of each replica and outputs trajectory checkfiles and the temperature evolution for each replica. Run $ python remd_demux.py --help for options.") + parser.add_argument("--checkpoint_filename", "-c", type=str, default="remd.chk", + help="path to checkpoint file (usually ends in .chk)") + parser.add_argument("--frames", "-t", type=int, default=-1, + help="length of run the beginning (in fs) to demux and analyze") + + args = vars(parser.parse_args(sys.argv[1:])) + chkfile = args["checkpoint_filename"] + + remd = presto.replica_exchange.ReplicaExchangeParallel.load(chkfile) + # assert remd.finished, "REMD run from this checkpoint file is unfinished!" + if not remd.finished: + remd.update_trajs() # load each traj from chkfile and add 1 to current index + remd.exchange() # pass self.current_idx * self.swap_interval as current time + + trajs = remd.trajectories + swap_int = int(remd.swap_interval) + run_time = int(remd.current_idx * remd.swap_interval) # frames that have been run, in fs + + assert trajs[0].last_time_run() == run_time, "End time recorded in trajectory checkpoint file does not agree with end time recorded in REMD checkpoint file." + + if args['frames'] == -1: # all frames + end_time = run_time + else: + end_time = args['frames'] + assert end_time <= run_time, f"Only {run_time} fs have been run, but {end_time} fs requested" + assert end_time % swap_int == 0, f"Requested length must be multiple of swap interval ({swap_int} fs)." + + # list of lists of temp history + traj_hists = np.zeros((len(trajs), int(run_time/swap_int) + 1), dtype=int) + # add 1 to shape[1] if we don't want to ignore the last swap + + for i, hist in enumerate(traj_hists): + hist[0] = i # starting arrangement + + curr_time = 0 + curr_arrangement = np.arange(len(trajs)) # one col of traj_hists + + for i, swap in enumerate(remd.swaps): + while int(swap["time"]) != curr_time: + # finished swapping or no swaps for this time, write final arrangement for curr_time + traj_hists[:, int(curr_time/swap_int)] = curr_arrangement + curr_time += swap_int + exchange(curr_arrangement, swap["i"], swap["j"]) + + #account for any final intervals without swaps + while curr_time <= run_time: + traj_hists[:, int(curr_time/swap_int)] = curr_arrangement + + if curr_time != run_time: + curr_time += swap_int + else: + break + + logger.info(f"{run_time} fs have been run, {end_time} fs will be demuxed, and the swap interval is {swap_int} fs.") + assert curr_time/swap_int == traj_hists.shape[1] - 1 # last column + + demux_hists = np.zeros((len(trajs), int(run_time/swap_int) + 1), dtype=int) # same shape as traj_hists + + # actually demuxing + for i,j in np.ndindex(traj_hists.shape): + demux_hists[traj_hists[i,j], j] = i + + demux_trajs = copy.deepcopy(trajs) + for traj in demux_trajs: + traj.frames = [] # empty frames, to be filled in demuxed order + + n_frames = int(end_time / int(trajs[0].timestep * trajs[0].save_interval)) + 1 # make sure traj.timestep is needed + # include zeroth frame + + print("loading trajectories...") + for traj in tqdm(trajs): + traj.load_from_checkpoint(frames=slice(0, n_frames, 1)) + assert len( + traj.frames) == n_frames, "expected number of frames should be equal to number of frames in trajectory" + + #hist = np.repeat(hist, swap_int / traj.save_interval) + + + print("demuxing trajectories...") + for i, hist in tqdm(enumerate(demux_hists)): # each trajectory + for j in np.arange(n_frames, dtype=int): # each timestep + demux_trajs[i].frames.append(trajs[demux_hists[i, int(j/swap_int)]].frames[j]) + + print("writing demuxed trajectories to PDB and .chk files:") + for i, traj in tqdm(enumerate(demux_trajs)): + traj.checkpoint_filename = f"replica{i}.chk" + pdb_filename = f'replica{i}.pdb' + traj.write_movie(pdb_filename) + logger.info(f"Wrote movie to {pdb_filename}.") + + traj.save() + + times = np.arange(0, end_time + 1, swap_int) + + print("writing temperature evolutions to CSV files...") + for i, hist in tqdm(enumerate(demux_hists)): + #time_hist = np.array([times, hist[:n_frames]]).astype(int) + time_hist = np.stack((times, hist[:len(times)])).astype(int) + + csv_filename = f"replica{i}.csv" + np.savetxt(csv_filename, time_hist.transpose(), delimiter=',', header='tim e(fs), temperature index') + logger.info(f"Wrote temperature evolution to {csv_filename}") + diff --git a/scripts/remd_par_manager.py b/scripts/remd_par_manager.py new file mode 100644 index 0000000..dbaed11 --- /dev/null +++ b/scripts/remd_par_manager.py @@ -0,0 +1,97 @@ +# presto/scripts/remd_par_manager.py +# Marcus Sak, Jun 2021 +# Manager script to run slurm-parallelized remd + +import argparse +import logging +import os +import sys +import numpy as np +import presto +import subprocess + +logging.basicConfig(level=logging.INFO, filename=f"remd.log", filemode='a', + format='%(asctime)s %(name)-12s %(message)s', datefmt='%m-%d %H:%M') + +logger = logging.getLogger(__name__) + + +def sbatch_self(slurm_script, dependency): + with open(slurm_script, 'r') as f: + source = f.read().splitlines() # no newline in source + + with open(slurm_script, 'w') as f: + for line in source: + if "python" in line and "--spawn" not in line: + line += " --spawn" + f.write(line+'\n') + + try: + subprocess.run( + ['sbatch', f'--dependency=afterok:{dependency}', slurm_script]) + logger.info( + f"Self-spawned with dependency on slurm job array {dependency}") + except subprocess.CalledProcessError as e: + logger.error("Could not submit slurm script") + sys.exit(1) + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser( + prog="remd_par_manager.py", description="Runs replica exchange MD for accelerated sampling of conformational space. Outputs chkfiles for all trajectories. Run > python replica_exchange.py --help for options.") + parser.add_argument("--checkpoint_filename", type=str, default="remd.chk", + help="path to checkpoint file (usually ends in .chk)") + parser.add_argument("--input", default=None, type=str, + help="path of input geometry (.xyz)") + parser.add_argument("--template", "-t", type=str, default="template.yaml", + help="path to template file (usually ends in .yaml)") + parser.add_argument("--mintemp", "-a", default=100, + type=int, help="minimum trajectory temperature (K)") + parser.add_argument("--maxtemp", "-z", default=800, + type=int, help="maximum trajectory temperature (K)") + parser.add_argument("--trajs", "-n", default=8, type=int, + help="number of trajectories") + parser.add_argument("--swap", "-s", default=2000, type=int, + help="time interval between swaps (fs)") + parser.add_argument('--spawn', action='store_true', + help="add this flag when recursively called from remd_par_manager.py") + + args = vars(parser.parse_args(sys.argv[1:])) + chkfile = args["checkpoint_filename"] + + if not args["spawn"]: # new remd run + all_trajs = [] + temps = np.geomspace( + args["mintemp"], args["maxtemp"], num=args["trajs"]) + for temp in temps: + name = f"{int(temp)}k" + with open(args["template"], 'r') as file: + filedata = file.read() + filedata = filedata.replace("", f"{temp:.2f}") + with open(f"{name}.yaml", 'w') as file: + file.write(filedata) + traj = presto.build.build( + f"{name}.yaml", f"{name}.chk", geometry=args["input"], checkpoint_interval=min(50, args["swap"])) + all_trajs.append(traj) + remd = presto.replica_exchange.ReplicaExchangeParallel(trajectories=all_trajs, checkpoint_filename=args["checkpoint_filename"], swap_interval=args["swap"]) + else: + remd = presto.replica_exchange.ReplicaExchangeParallel.load( + args["checkpoint_filename"]) + remd.update_trajs() # load each traj from chkfile and add 1 to current index + remd.exchange() # pass self.current_idx * self.swap_interval as current time + # exchange will save trajectories into chkfile + if remd.finished: + remd.save() + logger.info(f"Replica exchange completed.") + logger.info("\n\n----------------REPORT----------------\n") + logger.info(remd.report()) + sys.exit(0) + + remd.save() + # nonblocking, needs to return slurm jobID of submitted array + trajs_slurm_job_id = remd.run() + + slurm_script = f'remd_{os.path.splitext(args["input"])[0]}.sh' + sbatch_self(slurm_script, trajs_slurm_job_id) + sys.exit(0) diff --git a/scripts/remd_par_readme.md b/scripts/remd_par_readme.md new file mode 100644 index 0000000..3d0d4b7 --- /dev/null +++ b/scripts/remd_par_readme.md @@ -0,0 +1,179 @@ +# Parallelized replica exchange molecular dynamics (REMD) in _presto_ + +## Table of Contents + +1. [Overview](#overview) +2. [Input and Output](#input-and-output) +3. [Instructions](#instructions) +4. [Parallelizing REMD with Slurm](#parallelizing-remd-with-slurm) +5. [Analyzing the REMD run](#analyzing-the-remd-run) + +## Overview + +A challenge in conventional molecular dynamics (MD) is the propensity for the system to be trapped in local potential energy minima. This issue is even more pronounced in the context of _ab initio_ MD, because the cost of simulating each timestep is higher. [Replica exchange molecular dynamics](http://www.math.pitt.edu/~cbsg/Materials/Earl_ParallelTempering.pdf) (REMD), also known as parallel tempering, is an established approach to accelerate exploration of the potential energy surface. + +In _presto_, REMD is implemented as a loop: + +1. Run _n\_trajs_ trajectories with temperatures uniformly spanning _mintemp_ and _maxtemp_, for time _swap\_interval_. +2. Collect all the trajectories and make a pass through them, exchanging configurations between adjacent trajectories based on the Metropolis criterion. +3. Repeat until _stop\_time_. + +The bottleneck in this loop is step (1), which is fortunately embarrassingly parallel. Huge gains in computation time can be realized by running the trajectories in parallel, subject to computational resources. More details about the parallel implementation can be found [below](#parallelizing-remd-with-slurm). + +## Input and Output + +This implementation of parallelized REMD requires the [Slurm](https://Slurm.schedmd.com/documentation.html) scheduling system, and assumes that jobs are submitted using `sbatch `. + +### Provided files + +1. `scripts/remd_par_manager.py`: manager script that handles the main REMD loop + - `--checkpoint_filename`: path to checkpoint file (default remd.chk) + - `--input`: path of input geometry (.xyz) + - `--template`: path to [config file](https://github.com/corinwagen/presto/blob/master/CONFIG.md) (default template.yaml) + - `--mintemp`: minimum trajectory temperature in K + - `--maxtemp`: maximum trajectory temperature in K + - `--trajs`: number of trajectories + - `--swap`: length of swap interval in fs (default 2000) + - `--spawn`: initiate REMD run from REMD and trajectory checkpoint files + + Run `$ python run_remd_par.py --help` for more details and options. +2. `scripts/run_traj_temp.py`: auxiliary script to run individual trajectories, not intended to be called alone +3. `presto/replica_exchange_par.py`: contains the ReplicaExchange class + +### User-supplied files + +1. a template for the .yaml config file, with the `target_temp` field filled with \. The intended runtime of the trajectories is supplied here in the `stop_time` field, as with a usual single-trajectory run. +2. the geometry input file, in `.xyz` format +3. `remd_.sh`: Slurm submit script for `remd_par_manager.py`. Example: + + ```sh + #!/bin/bash + #SBATCH --job-name=remd_par_manager + #SBATCH --output=slurm-remd_par_manager.out + #SBATCH --open-mode=append + #SBATCH --ntasks=1 + #SBATCH --cpus-per-task=1 + #SBATCH --partition=day + #SBATCH -t 00:10:00 + #SBATCH --mem-per-cpu=8000mb + #SBATCH --mail-type=FAIL + + # add the path to run_remd_par.py if necessary. Remove --spawn whenever calling manually + python run_remd_par.py --input geom.xyz --mintemp 300 --maxtemp 400 --trajs 3 --swap 1000 --template template.yaml + ``` + +4. `traj_array.sh`: Slurm submit script for `scripts/run_traj_temp.py`, not intended to be called by the user. Example: + + ``` sh + #!/bin/bash + ### this file is not intended to be sbatch'ed on its own + #SBATCH --job-name=remd_traj_array_fs + #SBATCH --output=slurm-traj_arrays.out + #SBATCH --open-mode=append + #SBATCH --ntasks=1 + ### do not modify above this line + ### set array indices from 0 to (number of trajectories - 1) + #SBATCH --array=0-2 + ### each trajectory is one slurm task, adjust accordingly below: + #SBATCH --cpus-per-task=4 + #SBATCH --mem-per-cpu=8000mb + #SBATCH -t 00:15:00 + #SBATCH --partition=day + + # add the path to run_traj_temp.py and the name of your checkfile after -c + python run_traj_temp.py -i $SLURM_ARRAY_TASK_ID -c remd.chk + + ``` + +### Output files + +1. `k.chk`: checkpoint file for a trajectory at \ K. Examine the trajectory with `scripts/analyze.py`. +2. `remd_par.log`: log file for all running modules. Use this file to monitor trajectory progress and check for warnings. +3. `remd.chk`: checkpoint file for `ReplicaExchange` object. Contains a pickled `ReplicaExchange` object that is loadable with `presto.replica_exchange_par.ReplicaExchange.load()`. +4. `slurm-remd_par_manager.out`: Slurm output file from manager script. Contains tracebacks of critical unhandled exceptions (look at this file if the job unexpectedly terminates). +5. `slurm-traj_arrays.out`: Slurm output file from run_traj_temp.py submit script. Rarely useful. + +## Instructions + +1. If necessary, load the requisite software modules (Anaconda, Gaussian, xTB), e.g. `$ module load Gaussian`. Activate the presto virtual environment, e.g. `$ conda activate presto`. + +2. After setting up all the necessary files, initiate the run using `$ sbatch remd_.sh`. + + The only practical resource limitation is that the runtime of each trajectory for the swap interval (e.g. 50 fs) on one node cannot exceed the walltime of the cluster/partition. The total REMD runtime (`stop_time` in the config file) is restricted only by `sys.maxint` and the available storage for the trajectory checkpoint files; the number of trajectories is additionally restricted by the system-dependent Slurm `MaxArraySize` configuration parameter (usually 4E6). + +3. This slurm script will start a job named `remd_par_manager`, which will initiate a Slurm job array named `remd_traj_array_fs`. + +4. The manager script will spawn another instance of itself (`remd_par_manager`), which will start running when the job array is completed. + +5. When the REMD run ends, each trajectory can be examined by running `scripts/analyze.py` on its .chk file. + +6. To prematurely cancel the run, execute `$ scancel ` on the `remd_par_manager` job. + +7. If the REMD run was prematurely stopped/failed during execution of the manager script, the run may be resumed by running the manager script with the additional flag `--spawn`, i.e. in `remd_.sh`, change the final line to `python run_remd_par.py --spawn`. + +## Parallelizing REMD with Slurm + +_This is meant as a developer's note, i.e. not required reading._ + +This implementation leverages the [Slurm job array](https://Slurm.schedmd.com/job_array.html) feature to run trajectories as individual Slurm jobs within a Slurm job array. + +In summary, for every _swap\_interval_ chunk, + +1. The manager script creates a `ReplicaExchange` object or loads one from the checkpoint file. +2. ReplicaExchange.run() is called, which writes a Slurm submit script for the job array consisting of _n\_trajs_ independent calls to the auxiliary script. +3. The manager script submits itself as a new job to the Slurm scheduler, with a dependency on completion of the job array. +4. Each instance of the auxiliary script runs one trajectory (`presto.trajectory.run()`) and saves the trajectory to its own checkpoint file (e.g. 300k.chk) +5. The new manager script instance is run, which assembles the newly completed trajectories into the `ReplicaExchange` object and exchanges the configurations, and saves the `ReplicaExchange` object to the checkpoint file. + +This approach + +- parallelizes the trajectory runs +- sidesteps limitations on the total REMD runtime due to cluster walltime limits +- sidesteps limitations on the number of trajectories due to the maximum number of nodes available + +The one notable drawback occurs in cases where each _swap\_interval_ chunk takes a short time to run, because the trajectory runs are resubmitted as new jobs after each _swap\_interval_—this incurs the usual cost of scheduling and waiting for new resources to become available. In any reasonable use case, this drawback pales in comparison to the computation time of serialized REMD, or in an MPI implementation, the time required for a large number of nodes and cores to become simultaneously available. + +## Analyzing the REMD run + +### Reconstructing the temperature evolution of each replica + +In REMD, efficient diffusion of configurations between temperatures is desired. _presto_ keeps tracks of the swaps performed after each _swap\_interval_ in the REMD checkpoint file, and provides a script (`scripts/remd_demux.py`) to demultiplex (or "demux") the trajectories, enabling the user to examine the (random) walks taken by each replica through temperature space. + +**Required files**: + +1. checkpoint file to a *completed* REMD run. +2. checkpoint files for all the trajectories (e.g. `300k.chk`). +3. `scripts/remd_demux.py` + +To use, execute `$ python remd_demux.py --checkpoint_filename `. + +**Output files**: + +1. checkpoint files for the demuxed trajectories (e.g. `300k_demux.chk`) +2. PDB files containing the demuxed trajectories (e.g. `300k_demux.pdb`) +3. CSV files containing the temperature evolutions of each trajectory (e.g. `traj0.csv`). The first column contains the _index_ of the temperature, not the temperature in K. These data can be visualized in your favorite program. + +### Cluster analysis + +We often want to visualize the (relative) distributions of internal coordinates throughout an MD trajectory, e.g. _phi_ and _psi_ angles in peptides. A script `scripts/traj_cluster.py` is provided to plot the kernel density-estimated distributions of up to two internal coordinates (distance, length, dihedral) given a trajectory checkpoint file. + +The `seaborn` package (`$ conda install seaborn`) is used for visualization. + +**Required files**: + +1. `scripts/traj_cluster.py` + - `--checkpoint_filename`: path to trajectory checkpoint file (e.g. `300k.chk`) + - `--distance`: a pair of atom indices (1-indexed) + - `--angle`: three atom indices (1-indexed) + - `--dihedral`: four atom indices (1-indexed) + + Specify up to two internal coordinates. Examples of acceptable combinations are: + - `$ python traj_cluster.py -c 300k.chk --distance 3 4` + - `$ python traj_cluster.py -c 300k.chk --distance 3 4 --angle 8 1 3` + - `$ python traj_cluster.py -c 300k.chk --dihedral 17 20 8 7 --angle 1 2 5` + +2. checkpoint file for the trajectory + +**Output files**: + +1. a PNG file containing the single distribution plot or joint distribution plot for the specified internal coordinates. diff --git a/scripts/solvate.py b/scripts/solvate.py index 845d212..cee6a65 100644 --- a/scripts/solvate.py +++ b/scripts/solvate.py @@ -51,7 +51,7 @@ for s, n in zip(args["solvent"], args["num"]): with pkg_resources.path(solvents, f"{s}.xyz") as file: f = cctk.XYZFile.read_file(file) - title_dict = {x.split("=")[0]: x.split("=")[1] for x in f.title.split(" ")} + title_dict = {x.split("=")[0]: x.split("=")[1] for x in f.titles[0].split(" ")} assert "mw" in title_dict.keys(), f"need mw=__ in title of {s}.xyz!" assert "density" in title_dict.keys(), f"need density=__ in title of {s}.xyz!" diff --git a/scripts/traj_array.py b/scripts/traj_array.py new file mode 100644 index 0000000..6404b7a --- /dev/null +++ b/scripts/traj_array.py @@ -0,0 +1,28 @@ +# presto/scripts/run_traj_temp.py +# Marcus Sak, Jun 2021 +# Script to run one trajectory as part of slurm job array + +import sys +import presto +import logging +import argparse + +logging.basicConfig(level=logging.INFO, filename=f"remd.log", filemode='a', + format='%(asctime)s %(name)-12s %(message)s', datefmt='%m-%d %H:%M') + +logger = logging.getLogger(__name__) + +parser = argparse.ArgumentParser( + prog="run_traj_temp.py", description="Script to run single trajectory at specified temperature, usually called by replica_exchange_par.py") +parser.add_argument("--checkpoint_filename", "-c", type=str, + help="path to remd checkpoint file") +parser.add_argument("--index", "-i", type=int, help="index of trajectory") + +args = vars(parser.parse_args(sys.argv[1:])) +remd = presto.replica_exchange.ReplicaExchangeParallel.load(args["checkpoint_filename"]) + +traj = remd.trajectories[args["index"]] +traj.run(time=remd.swap_interval) +traj.save() + +sys.exit(0) \ No newline at end of file diff --git a/scripts/traj_cluster.py b/scripts/traj_cluster.py new file mode 100644 index 0000000..a4c030f --- /dev/null +++ b/scripts/traj_cluster.py @@ -0,0 +1,72 @@ +# presto/scripts/traj_cluster.py +# Marcus Sak, Jun 2021 +# Script for cluster analysis + +import argparse +import sys +import numpy as np +import presto +import pandas as pd +import seaborn as sns +import matplotlib.pyplot as plt + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + prog="traj_cluster.py", description="Given checkpoint file for trajectory and up to two desired internal coordinates, plots their distribution over the frames of the trajectory. Atom indices start from 1.") + + parser.add_argument("--checkpoint_filename", "-c", type=str, + help="path to trajectory checkpoint file (usually ends in .chk) ", required=True) + parser.add_argument("--distance", "-l", type=int, nargs=2, action='append', + help="a pair of atom indices to track distance", required=False) + parser.add_argument("--angle", "-a", type=int, nargs=3, action='append', + help="three atom indices to track angle", required=False) + parser.add_argument("--dihedral", "-d", type=int, nargs=4, action='append', + help="four atom indices to track dihedral", required=False) + + args = vars(parser.parse_args(sys.argv[1:])) + chkfile = args['checkpoint_filename'] + del args['checkpoint_filename'] + + int_coords = [] + for v in args.values(): + while v: + int_coords.append(v.pop(0)) + + assert len(int_coords) > 0 and len( + int_coords) < 3, "only one or two internal coordinates should be specified" + + traj = presto.trajectory.Trajectory.new_from_checkpoint(chkfile) + frames = traj.as_ensemble().molecules + + coord_values = [] # 2D array of internal coordinate values for whole trajectory + + for coord in int_coords: + value = [] # array of int coord values for all frames + if len(coord) == 2: + for frame in frames: + value.append(frame.get_distance(atoms=coord)) + elif len(coord) == 3: + for frame in frames: + value.append(frame.get_angle(atoms=coord)) + elif len(coord) == 4: + for frame in frames: + value.append(frame.get_dihedral(atoms=coord)) + coord_values.append(value) + + label = {2: 'distance', 3: 'angle', 4: 'dihedral'} + + if len(coord_values) == 1: # only one internal coordinate + label1 = f"{label[len(int_coords[0])]} {int_coords[0]}" + df = pd.DataFrame(coord_values[0], columns=[label1]) + sns.displot(data=df, x=label1, kind="kde") + plt.savefig('cluster1d.png', dpi=300) + plt.show() + else: # two internal coordinates + label1 = f"{label[len(int_coords[0])]} {int_coords[0]}" + label2 = f"{label[len(int_coords[1])]} {int_coords[1]}" + df = pd.DataFrame(np.transpose(coord_values), columns=[label1, label2]) + #sns.jointplot(data=df, x=label1, y=label2) + sns.jointplot(data=df, x=label1, y=label2, kind="kde") + + plt.savefig('cluster2d.png', dpi=300) + plt.show() diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..cd1eb7e --- /dev/null +++ b/setup.cfg @@ -0,0 +1,6 @@ +[metadata] +name = presto +version = 0.2.2-dev + +[options] +packages = presto \ No newline at end of file diff --git a/setup.py b/setup.py index ad78e7c..207f8af 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ long_description_content_type="text/markdown", url="https://github.com/cwagen/presto", # packages=setuptools.find_packages(), - install_requires=["cctk", "tqdm", "h5py", "pyyaml", "matplotlib", "asciichartpy", "fasteners"], + install_requires=["cctk", "tqdm", "h5py", "pyyaml", "matplotlib", "asciichartpy", "fasteners", "dill", "pandas", "seaborn"], classifiers=[ "Programming Language :: Python :: 3", "Development Status :: 3 - Alpha", diff --git a/tutorial/notes b/tutorial/notes new file mode 100644 index 0000000..5eda46f --- /dev/null +++ b/tutorial/notes @@ -0,0 +1,9 @@ +Notes for use of presto: + +1. preequilibration of solvent (preequil.yaml) +2. equilibration of system (equil.yaml) + +(1) and (2) are run with run.py + +3. reaction simulation (rxn.yaml) +(3) is run with run_ts.py \ No newline at end of file diff --git a/tutorial/tutorial00/analyze.py b/tutorial/tutorial00/analyze.py new file mode 100644 index 0000000..cf31da9 --- /dev/null +++ b/tutorial/tutorial00/analyze.py @@ -0,0 +1,39 @@ + +import argparse, math, sys, os, subprocess, glob, re, shutil, logging, presto +import numpy as np +from asciichartpy import plot + +parser = argparse.ArgumentParser(prog="analyze.py", description="Analyze a single presto job -- prints temperature and energy as a function of time, and can write a movie.") +parser.add_argument("--cutoff", "-c", default=0, type=int, help="index of first frame to analyze, defaults to 0 (i.e. analyzing all frames)") +parser.add_argument("--movie", "-m", default=False, help="which atoms to include in movie, either 'high' or 'all'. if blank, no movie will be written.") +parser.add_argument("config_filename", type=str, help="path to config file (usually ends in .yaml)") +parser.add_argument("checkpoint_filename", type=str, help="path to checkpoint file (usually ends in .chk)") + +args = vars(parser.parse_args(sys.argv[1:])) + +traj = presto.config.build(args["config_filename"], args["checkpoint_filename"], load_frames=slice(args["cutoff"], None, None)) +print(f"{len(traj.frames)} frames loaded from {args['checkpoint_filename']} ({traj.num_frames()} total)") +if traj.finished: + print("trajectory finished!") + +temps = np.array([f.temperature() for f in traj.frames]) +energies = np.array([f.energy for f in traj.frames][:-1]) +rel_energies = energies - np.min(energies) +rel_energies = energies * 627.509 + +max_width = 100 +scale = math.ceil(len(energies) / max_width) +if scale < 1: + scale == 1 + +height = 16 + +print(f"TEMPERATURE:\t\t{np.mean(temps):.2f} K (± {np.std(temps):.2f})") +print(plot(np.mean(temps[:(len(temps)//scale)*scale].reshape(-1,scale), axis=1), {"height": height})) +print(f"ENERGY:\t\t\t{np.mean(energies):.2f} (± {np.std(energies)*627.509:.2f} kcal/mol)") +print(plot(np.mean(rel_energies[:(len(rel_energies)//scale)*scale].reshape(-1,scale), axis=1), {"height":height})) + +if args["movie"]: + movie_path = re.sub("chk$", "pdb", args["checkpoint_filename"]) + print(f"writing movie to {movie_path}...") + traj.write_movie(movie_path, idxs=args["movie"]) diff --git a/tutorial/tutorial00/presto_tut_00.sh b/tutorial/tutorial00/presto_tut_00.sh new file mode 100644 index 0000000..836dd35 --- /dev/null +++ b/tutorial/tutorial00/presto_tut_00.sh @@ -0,0 +1,15 @@ +#!/bin/bash +#SBATCH --job-name=presto_tut_00 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=8 +#SBATCH --partition=day +#SBATCH -t 00:05:00 +#SBATCH --mem-per-cpu=8000mb +#SBATCH --mail-type=END,FAIL + +module load miniconda +conda activate presto-d + +python test_presto.py + + diff --git a/tutorial/tutorial01/mine/AcCl_NaN3.xyz b/tutorial/tutorial01/mine/AcCl_NaN3.xyz new file mode 100644 index 0000000..60fac94 --- /dev/null +++ b/tutorial/tutorial01/mine/AcCl_NaN3.xyz @@ -0,0 +1,13 @@ +11 +close to TS + C -1.08358600 -0.26735500 0.52360200 + O -0.22813100 -1.05036100 0.86101600 + C -1.70614000 0.77957400 1.39400000 + H -1.02897400 1.64083300 1.33607900 + H -2.69423200 1.07404900 1.04220300 + H -1.74041800 0.41054300 2.42232800 + Cl -1.73177300 -0.35921400 -1.14209900 + N 1.57453200 1.21832200 -0.27449400 + N 0.66337300 1.95262400 -0.23529500 + N 2.50446800 0.46198300 -0.31822300 + Na 1.84277800 -1.55633900 0.18340100 diff --git a/tutorial/tutorial01/mine/solvated.xyz b/tutorial/tutorial01/mine/solvated.xyz new file mode 100644 index 0000000..ef225fe --- /dev/null +++ b/tutorial/tutorial01/mine/solvated.xyz @@ -0,0 +1,313 @@ + 311 + Built with Packmol + C -0.753758 -0.658688 -0.002991 + O 0.101697 -1.441694 0.334423 + C -1.376312 0.388241 0.867407 + H -0.699146 1.249500 0.809486 + H -2.364404 0.682716 0.515610 + H -1.410590 0.019210 1.895735 + Cl -1.401945 -0.750547 -1.668692 + N 1.904360 0.826989 -0.801087 + N 0.993201 1.561291 -0.761888 + N 2.834296 0.070650 -0.844816 + Na 2.172606 -1.947672 -0.343192 + C 0.327934 -2.453899 -8.984994 + H 0.277427 -2.900961 -8.014177 + H 0.928236 -1.569326 -8.939521 + H 0.764544 -3.146200 -9.674190 + C -1.091750 -2.087193 -9.455763 + N -2.161122 -1.810972 -9.810369 + C -5.205138 -6.586187 1.956134 + H -4.451093 -6.778046 2.690645 + H -6.147176 -6.951573 2.308210 + H -5.273688 -5.532659 1.782122 + C -4.835907 -7.300461 0.642711 + N -4.557785 -7.838486 -0.346621 + C -2.567489 -3.702633 5.530558 + H -2.793906 -4.700232 5.216819 + H -3.320161 -3.363303 6.211175 + H -1.614562 -3.689352 6.017029 + C -2.529835 -2.774334 4.302371 + N -2.501472 -2.075096 3.377243 + C 0.978981 2.554762 9.199888 + H 0.157182 3.054428 9.668802 + H 1.330789 1.771007 9.837743 + H 0.658564 2.139024 8.267475 + C 2.116576 3.561988 8.948947 + N 2.973466 4.320678 8.759927 + C -8.881401 -1.433952 -0.132785 + H -9.886845 -1.796926 -0.180031 + H -8.306353 -1.872811 -0.921216 + H -8.881046 -0.369440 -0.241017 + C -8.262463 -1.812012 1.225739 + N -7.796250 -2.096785 2.249042 + C 5.180780 -2.547945 6.041074 + H 4.646032 -2.242412 5.166092 + H 4.715841 -2.127461 6.908211 + H 5.168411 -3.615519 6.112012 + C 6.637384 -2.056356 5.950269 + N 7.734566 -1.686068 5.881869 + C 4.995053 -4.983618 1.879699 + H 4.441182 -4.079643 1.734936 + H 4.571293 -5.761538 1.279560 + H 6.014107 -4.827261 1.593374 + C 4.935437 -5.390077 3.363895 + N 4.890532 -5.696242 4.481860 + C 6.741755 4.115500 3.695467 + H 7.715847 3.998612 3.268405 + H 6.347085 5.073075 3.426791 + H 6.095639 3.346916 3.325721 + C 6.837744 4.011728 5.228965 + N 6.910048 3.933562 6.384068 + C 2.196605 4.959932 1.494640 + H 2.660715 5.682741 0.856637 + H 1.216794 4.736710 1.127159 + H 2.784705 4.066143 1.508024 + C 2.092402 5.527287 2.922523 + N 2.013912 5.954645 3.998071 + C -2.774953 -5.680957 -6.037438 + H -2.597342 -4.797261 -5.460864 + H -2.327073 -6.521263 -5.549393 + H -3.828904 -5.841501 -6.128610 + C -2.158290 -5.512339 -7.438471 + N -1.693791 -5.385328 -8.493795 + C 6.477149 1.305912 -4.506183 + H 7.041989 2.063902 -5.007488 + H 7.143795 0.646754 -3.990437 + H 5.812447 1.762762 -3.803076 + C 5.661404 0.506146 -5.538914 + N 5.046947 -0.096275 -6.316816 + C -8.723344 -2.058586 -3.305789 + H -9.409259 -1.257679 -3.124217 + H -9.014522 -2.914279 -2.733158 + H -8.731745 -2.306304 -4.346685 + C -7.304969 -1.623209 -2.893165 + N -6.236583 -1.295262 -2.582357 + C -4.115322 2.074959 -7.497615 + H -4.252817 1.898527 -8.543974 + H -3.992953 1.140272 -6.991368 + H -3.243890 2.677814 -7.349050 + C -5.347763 2.806478 -6.934082 + N -6.276095 3.357493 -6.509603 + C 2.542265 -8.151151 -3.656131 + H 2.845875 -7.175629 -3.974057 + H 1.918717 -8.592467 -4.405349 + H 3.407344 -8.762813 -3.506440 + C 1.757668 -8.039673 -2.335683 + N 1.166674 -7.955702 -1.341061 + C -5.817602 -4.061581 4.995508 + H -6.786164 -4.509843 4.919039 + H -5.801759 -3.385969 5.825084 + H -5.082078 -4.824601 5.142835 + C -5.505003 -3.290617 3.699554 + N -5.269538 -2.709890 2.723381 + C -1.087836 -5.838515 7.443334 + H -1.688338 -5.001370 7.732273 + H -0.266715 -5.939587 8.121895 + H -1.681548 -6.728378 7.466819 + C -0.550862 -5.617174 6.017056 + N -0.146389 -5.450450 4.942717 + C -2.706064 -6.231979 -2.715133 + H -2.771964 -5.430956 -2.008790 + H -3.630502 -6.315018 -3.247504 + H -1.913221 -6.034081 -3.405883 + C -2.421821 -7.550162 -1.971360 + N -2.207716 -8.543079 -1.411115 + C 3.885038 4.948419 5.467694 + H 3.254436 5.747063 5.798464 + H 4.835505 5.019660 5.953943 + H 3.429868 4.010846 5.709926 + C 4.079776 5.045840 3.943166 + N 4.226462 5.119221 2.794820 + C 0.605901 -0.430194 8.900450 + H 0.913967 -1.052293 9.714692 + H 0.134296 -1.031033 8.151129 + H -0.085584 0.305237 9.255259 + C 1.836496 0.271449 8.296354 + N 2.763438 0.799959 7.841321 + C 6.468548 0.437619 3.946502 + H 6.031517 -0.246078 3.249034 + H 7.260490 -0.050092 4.475525 + H 6.859152 1.283257 3.419980 + C 5.395562 0.906486 4.946736 + N 4.587339 1.259658 5.700158 + C 4.631204 5.034690 -6.382117 + H 5.194958 5.939449 -6.474274 + H 4.576882 4.751693 -5.351650 + H 3.643003 5.189943 -6.761924 + C 5.320276 3.916368 -7.185943 + N 5.839318 3.073996 -7.791422 + C 2.925362 -0.179218 3.752872 + H 2.394731 -0.278688 4.676688 + H 3.941029 -0.486629 3.890056 + H 2.464035 -0.794118 3.008576 + C 2.891240 1.291382 3.297053 + N 2.865537 2.399107 2.953709 + C 8.089380 -4.812923 -1.256707 + H 8.374114 -4.922480 -2.282292 + H 7.084663 -4.448924 -1.202419 + H 8.152039 -5.761626 -0.765855 + C 9.035437 -3.813707 -0.565229 + N 9.748052 -3.061051 -0.044374 + C -3.356845 4.172349 -2.277226 + H -3.085625 3.409794 -2.977119 + H -3.400535 3.754192 -1.293287 + H -2.626460 4.953948 -2.300492 + C -4.735527 4.746774 -2.652557 + N -5.774014 5.179458 -2.935273 + C 3.626378 -8.773593 0.662062 + H 4.594016 -8.989093 0.259399 + H 3.575065 -9.121388 1.672659 + H 3.457831 -7.717193 0.639431 + C 2.550139 -9.483294 -0.180335 + N 1.739465 -10.017874 -0.814868 + C 6.160966 3.411476 -0.199189 + H 7.186331 3.569140 0.062856 + H 6.106244 2.943514 -1.159875 + H 5.698984 2.780959 0.531510 + C 5.428873 4.765548 -0.245329 + N 4.877426 5.785498 -0.280084 + C -0.218266 9.330792 -0.007311 + H 0.742316 9.226609 -0.467016 + H -0.101392 9.740430 0.974237 + H -0.826641 9.984671 -0.596567 + C -0.893390 7.950070 0.089712 + N -1.401926 6.910046 0.162794 + C 4.001452 -2.392478 -2.475394 + H 3.247037 -1.646319 -2.613255 + H 4.422813 -2.297056 -1.496492 + H 3.566155 -3.363970 -2.583183 + C 5.107302 -2.205501 -3.530730 + N 5.940281 -2.064661 -4.325659 + C -4.653904 5.762199 2.863304 + H -5.651217 5.442157 3.082029 + H -4.329122 6.454227 3.611979 + H -4.001199 4.914383 2.854077 + C -4.625369 6.447036 1.484253 + N -4.603876 6.962888 0.445487 + C -3.333356 8.886154 1.356336 + H -2.842602 9.556492 2.030656 + H -2.917909 7.905837 1.462559 + H -4.379119 8.856595 1.580848 + C -3.132489 9.374834 -0.090193 + N -2.981187 9.742931 -1.179785 + C -6.659511 -0.742087 -5.754528 + H -6.777826 -1.580374 -5.100176 + H -5.717189 -0.271944 -5.565069 + H -6.693976 -1.075224 -6.770762 + C -7.795862 0.267231 -5.506366 + N -8.651814 1.027497 -5.319440 + C 5.809883 5.650046 -2.893630 + H 5.329277 6.571111 -3.149674 + H 6.362686 5.776398 -1.986247 + H 5.071162 4.887938 -2.758046 + C 6.769176 5.239419 -4.026211 + N 7.491761 4.930114 -4.879324 + C 3.373993 8.083031 3.821581 + H 3.827971 8.862385 3.245897 + H 3.734171 7.133865 3.483563 + H 3.622387 8.211558 4.854383 + C 1.844717 8.142450 3.650163 + N 0.692795 8.187207 3.521043 + C -0.335897 8.319363 -3.885632 + H -0.229975 8.225868 -2.825001 + H -1.287340 8.753337 -4.112176 + H 0.442263 8.946462 -4.267883 + C -0.238945 6.926775 -4.535939 + N -0.165917 5.877812 -5.025780 + C -7.550695 0.611065 5.073460 + H -6.661643 0.188406 4.654121 + H -8.000578 -0.094220 5.740602 + H -7.302684 1.502403 5.610962 + C -8.539719 0.951602 3.943210 + N -9.284698 1.208110 3.091853 + C 5.082623 -6.979130 -2.380441 + H 5.175808 -7.952870 -1.946797 + H 5.899983 -6.366823 -2.061219 + H 5.093841 -7.061213 -3.447229 + C 3.755974 -6.340798 -1.928629 + N 2.756679 -5.859977 -1.588302 + C 0.212336 -5.387201 -4.957044 + H -0.119437 -6.391063 -4.792467 + H 0.350100 -5.224469 -6.005586 + H -0.520918 -4.703424 -4.583297 + C 1.546903 -5.160729 -4.222714 + N 2.552161 -4.990139 -3.669581 + C 4.509319 -4.454962 -5.217535 + H 3.742637 -3.750734 -4.970223 + H 5.281381 -4.416153 -4.477729 + H 4.092722 -5.440092 -5.246952 + C 5.101161 -4.106527 -6.595910 + N 5.546964 -3.844069 -7.634166 + C 8.017368 -1.451798 -1.378369 + H 7.531136 -0.498658 -1.377230 + H 7.813583 -1.955050 -2.300382 + H 9.073194 -1.313703 -1.273192 + C 7.490879 -2.298052 -0.204374 + N 7.094303 -2.935491 0.679933 + C 2.037972 -3.853900 6.438487 + H 1.001133 -3.776561 6.185738 + H 2.580333 -3.061817 5.965888 + H 2.152761 -3.781064 7.499815 + C 2.584443 -5.210049 5.954925 + N 2.996070 -6.231563 5.590683 + C 2.521501 6.613542 -2.080576 + H 3.405115 7.214657 -2.027778 + H 2.588292 5.817921 -1.368237 + H 1.665730 7.217384 -1.861643 + C 2.385300 6.024405 -3.496897 + N 2.282707 5.580640 -4.563737 + C -1.024845 1.699689 -6.501495 + H -0.320449 1.024286 -6.062683 + H -1.851548 1.835045 -5.835806 + H -0.551720 2.642847 -6.678997 + C -1.529762 1.119513 -7.835682 + N -1.910090 0.682498 -8.840654 + C -3.520269 5.425990 -6.229570 + H -2.765962 5.155555 -5.520496 + H -4.458916 5.010336 -5.927786 + H -3.600590 6.491969 -6.275785 + C -3.139356 4.879231 -7.617936 + N -2.852434 4.467386 -8.663718 + C -6.620404 3.880267 0.362509 + H -6.316968 3.156787 1.090110 + H -7.637344 4.160972 0.541217 + H -5.993700 4.744418 0.435899 + C -6.495475 3.273801 -1.047523 + N -6.401373 2.816982 -2.109625 + C -0.008140 -5.261710 -0.054245 + H -0.560678 -4.820040 -0.857071 + H 0.885541 -5.706979 -0.438914 + H 0.248126 -4.507186 0.659842 + C -0.867962 -6.342481 0.627106 + N -1.515621 -7.156569 1.140332 + C -7.710787 -4.901754 -0.134369 + H -8.049290 -3.890699 -0.224276 + H -7.159130 -5.013599 0.775612 + H -8.553878 -5.560527 -0.123618 + C -6.804149 -5.247802 -1.330136 + N -6.121227 -5.508461 -2.230843 + C 0.123242 5.338726 6.766326 + H 0.038330 5.519845 7.817461 + H -0.853619 5.294824 6.331911 + H 0.680460 6.131044 6.311748 + C 0.849423 4.000892 6.532962 + N 1.396417 2.993172 6.357181 + C -1.136750 5.712290 3.387717 + H -0.165679 6.023661 3.711708 + H -1.510499 4.961197 4.051848 + H -1.799739 6.552136 3.389790 + C -1.042235 5.136411 1.962575 + N -0.971043 4.702632 0.889091 + C 2.782878 1.009091 -3.522089 + H 2.161094 0.171092 -3.285355 + H 2.471462 1.857420 -2.949151 + H 3.800616 0.775262 -3.288782 + C 2.661203 1.330760 -5.023196 + N 2.569552 1.573057 -6.153901 + C 8.305648 -0.509922 1.529778 + H 7.360002 -0.990124 1.671410 + H 8.968272 -1.172444 1.013160 + H 8.170423 0.380889 0.952655 + C 8.907610 -0.147355 2.900101 + N 9.361036 0.125747 3.932293 diff --git a/tutorial/tutorial01/presto_tut_01_equil.sh b/tutorial/tutorial01/presto_tut_01_equil.sh new file mode 100644 index 0000000..d0d5df3 --- /dev/null +++ b/tutorial/tutorial01/presto_tut_01_equil.sh @@ -0,0 +1,15 @@ +#!/bin/bash +#SBATCH --job-name=presto_tut_00 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=16 +#SBATCH --partition=day +#SBATCH -t 24:00:00 +#SBATCH --mem-per-cpu=9000mb +#SBATCH --mail-type=END,FAIL + +module load miniconda +conda activate presto + +python run.py + + diff --git a/tutorial/tutorialxx/300k_1ns_demux_cluster.png b/tutorial/tutorialxx/300k_1ns_demux_cluster.png new file mode 100644 index 0000000..709c8a0 Binary files /dev/null and b/tutorial/tutorialxx/300k_1ns_demux_cluster.png differ diff --git a/tutorial/tutorialxx/300k_cluster_1ns.png b/tutorial/tutorialxx/300k_cluster_1ns.png new file mode 100644 index 0000000..58668bf Binary files /dev/null and b/tutorial/tutorialxx/300k_cluster_1ns.png differ diff --git a/tutorial/tutorialxx/README.md b/tutorial/tutorialxx/README.md new file mode 100644 index 0000000..cd486f6 --- /dev/null +++ b/tutorial/tutorialxx/README.md @@ -0,0 +1,195 @@ +# Tutorial xx: Replica Exchange Molecular Dynamics + +Building on your familiarity with running equilibration trajectories, this tutorial will introduce you to running replica exchange molecular dynamics (REMD) in *presto*. + +## Introduction + +As a bare-bones example, we will use replica exchange molecular dynamics to explore part of the potential energy surface of alanine dipeptide (_N_-acetyl-L-alanine-_N_'-methylamide) in the gas phase. We will perform calculations at the [GFN-FF](https://xtb-docs.readthedocs.io/en/latest/gfnff.html) [level](https://onlinelibrary.wiley.com/doi/full/10.1002/anie.202004239) so that the REMD run can be completed in a short time (1–2 days), keeping in mind that we realistically use *presto* for explicitly solvated molecules at the *ab initio* level. + +Here, we will perform a REMD simulation with alanine dipeptide using four replicas from 300 K to 1000 K. *presto* will distribute the temperatures of the replicas according to a geometric progression based on the specified minimum and maximum temperatures. Each MD frame will last 1 fs and we will perform the exchange every 2 ps (i.e. 2000 frames). We will target a total runtime of 1 ns. + +## Input and Output Files + +REMD in *presto* requires the [Slurm](https://Slurm.schedmd.com/documentation.html) scheduling system, and assumes that jobs are submitted using `$ sbatch `. + +We first activate the presto virtual environment if applicable, then copy the two scripts needed to run REMD from the `scripts/` folder. As its name implies, the first script manages the REMD run and is the only script the user needs to call. For each script, all the available command line options can be viewed by running `$ python