diff --git a/.gitignore b/.gitignore index c3e4b29..94ac255 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ pkg/* !pkg/.keep + +__pycache__ \ No newline at end of file diff --git a/README b/README index db7d916..b136aa7 100644 --- a/README +++ b/README @@ -1,6 +1,10 @@ +Note: Updated and works (at least for me) in Python 3 + + PBS Tools Copyright 2005-2019, Ohio Supercomputer Center Copyright 2008-2015, University of Tennessee +Copyright 2021-2025, Chen Chen Website: http://www.osc.edu/~troy/pbstools/ Git repo: https://github.com/tabaer/pbstools @@ -60,9 +64,9 @@ Database/web tools: Note: these all assume a MySQL DB back end running on the same server as pbs_server and PHP-enabled httpd. -etc/create-tables.sql -- Create "pbsacct" database, table, and two +etc/create-tables.mysql -- Create "pbsacct" database, table, and two user accounts (pbsacct and webapp) used by the other DB tools. To -initialize, run "mysql -u root -p passwd 0 last = t[0] lasti = i = 1 - while i << n: + while i < n: if t[i] != last: t[lasti] = last = t[i] lasti += 1 @@ -143,35 +141,40 @@ def unique(s): if x not in u: u.append(x) return u -### end code from Python Cookbook + + +# end code from Python Cookbook + def job_exists(jobid): """ OVERVIEW: finds if a job is in the queue - + PARAMETERS: jobid, the job ID to find - + RETURNS: True, if job is found False, if job is not found """ - if ( jobid is not None ): + if jobid is not None: user_name = getpass.getuser() - cmd = "qselect -s RHQ -u "+user_name+" | egrep -c "+ jobid - p = subprocess.Popen(cmd, shell=True, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - close_fds=True) + cmd = "qselect -s RHQ -u " + user_name + " | egrep -c " + jobid + p = subprocess.Popen( + cmd, + shell=True, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + close_fds=True, + ) pin = p.stdin perr = p.stderr pout = p.stdout pin.close() - stderr = perr.readlines() perr.close() stdout = pout.readlines() pout.close() p.wait() - if len(stdout)>0: + if len(stdout) > 0: num_jobid = int(stdout[0]) if num_jobid > 0: return True @@ -191,34 +194,35 @@ class Category(object): _max, the maximum number of jobs in queue or running _job_nodes , list of job nodes in category """ - def __init__(self,name): + + def __init__(self, name): """ Creates a new category with _name, name sets _max to infinity and _job_nodes to an empty list """ self._name = name - self._max = float('infinity') + self._max = float("infinity") self._job_nodes = [] - + def get_name(self): """ Returns private attribute _name """ return self._name - + def get_max(self): """ Returns private attribute _max """ return self._max - - def set_max(self,maximum): + + def set_max(self, maximum): """ Sets _max to maximum """ self._max = maximum - def add_job_node(self,node): + def add_job_node(self, node): """ Adds a node to the _job_nodes list """ @@ -230,11 +234,13 @@ class Category(object): """ return self._job_nodes + class JobNode(object): """ Defines a job node in the DAG """ - def __init__(self,name,script): + + def __init__(self, name, script): self._name = name self._script = script self._queue = None @@ -256,33 +262,33 @@ class JobNode(object): self._abortreturn = dict() self._noretrystatus = 0 self._category = None - if ( not os.path.exists(script) ): - raise IOError, "File "+script+" not found" + if not os.path.exists(script): + raise IOError("File " + script + " not found") - def __cmp__(self, other): - return cmp(other._priority, self._priority) + def __lt__(self, other): + return self._priority < other._priority - def addChild(self,child): - if ( self._children is None ): + def addChild(self, child): + if self._children is None: self._children = [child] else: self._children.append(child) - def addParent(self,parent): - if ( self._parents is None ): + def addParent(self, parent): + if self._parents is None: self._parents = [parent] else: self._parents.append(parent) - def addVar(self,var): - if ( self._vars is None ): + def addVar(self, var): + if self._vars is None: self._vars = [var] else: self._vars.append(var) - def addAbortStatus(self,abort,returncode=None): + def addAbortStatus(self, abort, returncode=None): self._abortstatus.append(abort) - if ( returncode is None ): + if returncode is None: self._abortreturn[abort] = abort else: self._abortreturn[abort] = returncode @@ -328,7 +334,7 @@ class JobNode(object): def category(self): return self._category - + def noRetryStatus(self): return self._noretrystatus @@ -347,50 +353,50 @@ class JobNode(object): def exitCode(self): return self._exitcode - def setRetries(self,retries): + def setRetries(self, retries): self._retries = int(retries) - def setPriority(self,priority): + def setPriority(self, priority): self._priority = priority - def setQueue(self,queue): + def setQueue(self, queue): self._queue = queue def setScript(self, script): self._script = self._script + script - def setPreScript(self,prescript): + def setPreScript(self, prescript): self._prescript = prescript - def setPreScriptHasRun(self,flag): + def setPreScriptHasRun(self, flag): self._prescript_run = flag - def setPostScript(self,postscript): + def setPostScript(self, postscript): self._postscript = postscript - def setPostScriptHasRun(self,flag): + def setPostScriptHasRun(self, flag): self._postscript_run = flag - def setDone(self,done): + def setDone(self, done): self._done = done - def setSubmitted(self,submitted): + def setSubmitted(self, submitted): self._submitted = submitted - def setExitCode(self,exitcode): + def setExitCode(self, exitcode): self._exitcode = exitcode - def setNoRetryStatus(self,noretry): + def setNoRetryStatus(self, noretry): self._noretrystatus = noretry - - def set_category(self,category): + + def set_category(self, category): self._category = category def isSick(self): self._isHealthy = False - def childrenAreSick(self,dag): - if ( self._children is not None ): + def childrenAreSick(self, dag): + if self._children is not None: for child in self._children: childnode = dag.getNode(child) childnode.isSick() @@ -399,126 +405,210 @@ class JobNode(object): """ OVERVIEW: returns the status of the job node. Writes long messages to log if verbose. - + MODIFIES: self._done , if job is done self._exitCode, if job is done - + OUTCOMES: returns normally if job is completed, not found, or in progress. exits program if job has failed - + RETURNS: SUCCESS = 0 job has completed successfully FAIL = -1 job has failed NOT_FOUND = 1 job is not found, status of job unknown - IN_PROGRESS = 2 job is currently in queue + IN_PROGRESS = 2 job is currently in queue """ - if ( self.jobId() is not None ): - if ( verbose ): - log.write(str(datetime.datetime.now())+": checking status of job "+ - self.name()+", jobid "+self.jobId()+"\n") + if self.jobId() is not None: + if verbose: + log.write( + str(datetime.datetime.now()) + + ": checking status of job " + + self.name() + + ", jobid " + + self.jobId() + + "\n" + ) log.flush() - cmd = "qstat -f "+str(self.jobId()) - p = subprocess.Popen(cmd, shell=True, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - close_fds=True) + cmd = "qstat -f " + str(self.jobId()) + p = subprocess.Popen( + cmd, + shell=True, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + close_fds=True, + ) pin = p.stdin perr = p.stderr pout = p.stdout pin.close() stderr = perr.readlines() perr.close() - if ( verbose and len(stderr)>0 ): + if verbose and len(stderr) > 0: for line in stderr: - log.write(str(datetime.datetime.now())+": "+line) + log.write(str(datetime.datetime.now()) + ": " + line) log.flush() stdout = pout.readlines() pout.close() p.wait() - if ( p.returncode!=0 ): - if ( verbose ): - log.write(str(datetime.datetime.now())+": job "+self.name()+ - " jobid "+self.jobId()+" not found\n") + if p.returncode != 0: + if verbose: + log.write( + str(datetime.datetime.now()) + + ": job " + + self.name() + + " jobid " + + self.jobId() + + " not found\n" + ) log.flush() return NOT_FOUND - if( len(stdout)==0 ): - if ( verbose ): - log.write(str(datetime.datetime.now())+": job "+self.name()+ - " jobid "+self.jobId()+" not found\n") + if len(stdout) == 0: + if verbose: + log.write( + str(datetime.datetime.now()) + + ": job " + + self.name() + + " jobid " + + self.jobId() + + " not found\n" + ) log.flush() return NOT_FOUND for line in stdout: - if( line.find('exit_status')!=-1 ): + if line.find("exit_status") != -1: exit_status = int(line.split(" = ")[1]) self.setExitCode(exit_status) - if( exit_status==0 ): - if ( verbose ): - log.write(str(datetime.datetime.now())+": job "+self.name()+ - " jobid "+self.jobId()+" has completed\n") + if exit_status == 0: + if verbose: + log.write( + str(datetime.datetime.now()) + + ": job " + + self.name() + + " jobid " + + self.jobId() + + " has completed\n" + ) log.flush() self.setDone(True) return SUCCESS else: - log.write(str(datetime.datetime.now())+": job "+self.name()+ - " jobid "+self.jobId()+" has failed\n") + log.write( + str(datetime.datetime.now()) + + ": job " + + self.name() + + " jobid " + + self.jobId() + + " has failed\n" + ) log.flush() - if ( exit_status in self._abortstatus ): - log.write(str(datetime.datetime.now())+": job "+self.name()+ - " jobid "+self.jobId()+" had fatal exit status "+ - str(exit_status)+", aborting\n") + if exit_status in self._abortstatus: + log.write( + str(datetime.datetime.now()) + + ": job " + + self.name() + + " jobid " + + self.jobId() + + " had fatal exit status " + + str(exit_status) + + ", aborting\n" + ) log.flush() sys.exit(self._abortreturn[exit_status]) return FAIL - if ( verbose ): - log.write(str(datetime.datetime.now())+": job "+self.name()+ - " jobid "+self.jobId()+" is in progress\n") + if verbose: + log.write( + str(datetime.datetime.now()) + + ": job " + + self.name() + + " jobid " + + self.jobId() + + " is in progress\n" + ) log.flush() return IN_PROGRESS else: - if ( verbose ): - log.write(str(datetime.datetime.now())+": job "+self.name()+" not found\n") + if verbose: + log.write( + str(datetime.datetime.now()) + + ": job " + + self.name() + + " not found\n" + ) log.flush() return NOT_FOUND - + def retry(self, cmd, dag, log): tries = 0 condition = self.jobStatus(log) - while ( (condition==IN_PROGRESS or condition==FAIL or condition==NOT_FOUND) - and (tries<=self.retries()) ): - # There are two possibilities where a job is not found. + while ( + condition == IN_PROGRESS or condition == FAIL or condition == NOT_FOUND + ) and (tries <= self.retries()): + # There are two possibilities where a job is not found. # 1. The job is in 'C' state for more than the length of # time that the server keeps completed jobs, so the job has # been flushed out. # 2. The job is somehow deleted in 'Q' or 'H' state(s). # Implementation: - # We assume that "NOT_FOUND" is same as "FAIL", and we will resubmit the job + # We assume that "NOT_FOUND" is same as "FAIL", and we will resubmit the job # if "NOT_FOUND" is detected. - if ( tries>=self.retries() and (condition==FAIL or condition==NOT_FOUND) ): - log.write(str(datetime.datetime.now())+": job "+self.name()+" retries exhausted\n") + if tries >= self.retries() and ( + condition == FAIL or condition == NOT_FOUND + ): + log.write( + str(datetime.datetime.now()) + + ": job " + + self.name() + + " retries exhausted\n" + ) log.flush() return tries - elif ( self.exitCode() is not None and self.exitCode()==self.noRetryStatus() ): - log.write(str(datetime.datetime.now())+": job "+self.name()+" failed with exit status "+ - str(self.noRetryStatus())+", not retrying\n") + elif ( + self.exitCode() is not None and self.exitCode() == self.noRetryStatus() + ): + log.write( + str(datetime.datetime.now()) + + ": job " + + self.name() + + " failed with exit status " + + str(self.noRetryStatus()) + + ", not retrying\n" + ) log.flush() return tries else: - if ( verbose ): - log.write(str(datetime.datetime.now())+": job "+self.name()+ - " has "+str(self.retries()-tries)+" retries left\n") + if verbose: + log.write( + str(datetime.datetime.now()) + + ": job " + + self.name() + + " has " + + str(self.retries() - tries) + + " retries left\n" + ) log.flush() - if ( condition==FAIL or condition==NOT_FOUND ): + if condition == FAIL or condition == NOT_FOUND: self.setJobId(None) - if ( verbose ): - log.write(str(datetime.datetime.now())+": job "+self.name()+" retry "+str(tries)+ - " being submitted using command \""+cmd+"\"\n") + if verbose: + log.write( + str(datetime.datetime.now()) + + ": job " + + self.name() + + " retry " + + str(tries) + + ' being submitted using command "' + + cmd + + '"\n' + ) log.flush() - p = subprocess.Popen(cmd, shell=True, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - close_fds=True) + p = subprocess.Popen( + cmd, + shell=True, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + close_fds=True, + ) pin = p.stdin perr = p.stderr pout = p.stdout @@ -526,118 +616,152 @@ class JobNode(object): jobid = "" try: stdout = pout.readlines() - if ( len(stdout)>0 ): + if len(stdout) > 0: jobid = stdout[0] - if ( jobid[-1]=="\n" ): + if jobid[-1] == "\n": jobid = jobid[:-1] stderr = perr.readlines() for line in stderr: - log.write(str(datetime.datetime.now())+": "+line) + log.write(str(datetime.datetime.now()) + ": " + line) log.flush() pout.close() - except IndexError, err: - log.write(str(datetime.datetime.now())+": "+str(err)+"\n") + except IndexError as err: + log.write(str(datetime.datetime.now()) + ": " + str(err) + "\n") log.flush() stderr = perr.readlines() perr.close() for line in stderr: - log.write(str(datetime.datetime.now())+": "+line) + log.write(str(datetime.datetime.now()) + ": " + line) log.flush() p.wait() - tries += 1 - if ( p.returncode!=0 or jobid is None or jobid=="" ): + tries += 1 + if p.returncode != 0 or jobid is None or jobid == "": self.setJobId(None) - log.write(str(datetime.datetime.now())+": submission of retry "+str(tries)+ - " (max "+str(self.retries())+") for job "+self.name()+" failed\n") + log.write( + str(datetime.datetime.now()) + + ": submission of retry " + + str(tries) + + " (max " + + str(self.retries()) + + ") for job " + + self.name() + + " failed\n" + ) log.flush() else: self.setJobId(jobid) - log.write(str(datetime.datetime.now())+": submitted job "+self.name()+ - " retry "+str(tries)+" (max "+str(self.retries())+") as jobid "+ - str(self.jobId())+"\n") + log.write( + str(datetime.datetime.now()) + + ": submitted job " + + self.name() + + " retry " + + str(tries) + + " (max " + + str(self.retries()) + + ") as jobid " + + str(self.jobId()) + + "\n" + ) log.flush() - if ( verbose ): - log.write(str(datetime.datetime.now())+": sleeping for "+str(timeout)+" seconds\n") + if verbose: + log.write( + str(datetime.datetime.now()) + + ": sleeping for " + + str(timeout) + + " seconds\n" + ) log.flush() dag.writeRescueDAG(log) time.sleep(timeout) condition = self.jobStatus(log) return tries - def _presubmit(self,dag,log,categories): + def _presubmit(self, dag, log, categories): # make sure all parents are submitted, if necessary parentjobs = None - if ( self.parents() is not None ): + if self.parents() is not None: node_list = [] for parent in self.parents(): - if (len(node_list) == 0): + if len(node_list) == 0: node_list = [dag.getNode(parent)] else: node_list.append(dag.getNode(parent)) node_list.sort() for parentnode in node_list: - success = parentnode.submit(dag,log,categories) - if ( not success ): + success = parentnode.submit(dag, log, categories) + if not success: return success else: - if ( parentjobs is None and parentnode.jobId() is not None): + if parentjobs is None and parentnode.jobId() is not None: parentjobs = [parentnode.jobId()] - elif ( parentnode.jobId() is not None ): + elif parentnode.jobId() is not None: parentjobs.append(parentnode.jobId()) - + # do prologue, if needed - if ( self.preScript() is not None and not self.preScriptHasRun() ): + if self.preScript() is not None and not self.preScriptHasRun(): cmd = self.preScript()[0] - if ( len(self.preScript())>1 ): + if len(self.preScript()) > 1: for arg in self.preScript()[1:]: - cmd = cmd+" "+arg - log.write(str(datetime.datetime.now())+": running PRE script for job "+ - self.name()+" -- \""+cmd+"\"\n") + cmd = cmd + " " + arg + log.write( + str(datetime.datetime.now()) + + ": running PRE script for job " + + self.name() + + ' -- "' + + cmd + + '"\n' + ) log.flush() exit_status = os.system(cmd) - if ( exit_status!=0 ): - raise RuntimeError,self.name()+": preScript failed" + if exit_status != 0: + raise RuntimeError(self.name() + ": preScript failed") self.setPreScriptHasRun(True) return True - def _postsubmit(self,dag,log,categories): + def _postsubmit(self, dag, log, categories): # do epilogue, if needed - if ( self.postScript() is not None and not self.postScriptHasRun() ): + if self.postScript() is not None and not self.postScriptHasRun(): cmd = self.postScript()[0] - if ( len(self.postScript())>1 ): + if len(self.postScript()) > 1: for arg in self.postScript()[1:]: - cmd = cmd+" "+arg - log.write(str(datetime.datetime.now())+": running POST script for job "+ - self.name()+" -- \""+cmd+"\"\n") + cmd = cmd + " " + arg + log.write( + str(datetime.datetime.now()) + + ": running POST script for job " + + self.name() + + ' -- "' + + cmd + + '"\n' + ) log.flush() exit_status = os.system(cmd) - if ( exit_status!=0 ): - raise RuntimeError,self.name()+": postScript failed" + if exit_status != 0: + raise RuntimeError(self.name() + ": postScript failed") self.setPostScriptHasRun(True) - + # dump out a rescue DAG just in case dag.writeRescueDAG(log) - + # make sure all children are submitted, if necessary - if ( self.children() is not None ): + if self.children() is not None: node_list = [] for child in self.children(): - if( len(node_list)==0 ): + if len(node_list) == 0: node_list = [dag.getNode(child)] else: node_list.append(dag.getNode(child)) node_list.sort() for childnode in node_list: - success = childnode.submit(dag,log,categories) - if ( not success ): + success = childnode.submit(dag, log, categories) + if not success: return success return True - def submit(self,dag,log,categories,deptype="afterok"): + def submit(self, dag, log, categories, deptype="afterok"): """ - OVERVIEW: Submits job for self with dependencies + OVERVIEW: Submits job for self with dependencies OUTCOMES: 1) if job is submitted successfully postsubmission script is run and @@ -650,76 +774,124 @@ class JobNode(object): RETURNS: self._postsubmit(dag,log,categories) """ - if ( self.isSubmitted() ): + if self.isSubmitted(): return self.isSubmitted() - elif ( self.isDone() ): + elif self.isDone(): return self.isDone() # do generic pre-submission stuff - success = self._presubmit(dag,log,categories) - if ( not success ): + success = self._presubmit(dag, log, categories) + if not success: return success - + # do actual job submission - if ( not self.isSubmitted() ): - if ( not self.isDone() ): - #check number of jobs in category and wait with user specified wait time + if not self.isSubmitted(): + if not self.isDone(): + # check number of jobs in category and wait with user specified wait time if self.category() is not None: - while self.num_category(categories, dag.getNodes()) >= categories[self.category()].get_max(): + while ( + self.num_category(categories, dag.getNodes()) + >= categories[self.category()].get_max() + ): dag.writeRescueDAG(log) - log.write(str(datetime.datetime.now())+": waiting "+str(maxJobWait)+ - " sec for CATEGORY "+self.category()+"\n") + log.write( + str(datetime.datetime.now()) + + ": waiting " + + str(maxJobWait) + + " sec for CATEGORY " + + self.category() + + "\n" + ) time.sleep(maxJobWait) dependencies = None parents = self.parents() - if ( parents is not None and len(parents)>0 ): + if parents is not None and len(parents) > 0: for parent in parents: jobid = dag.getNode(parent).jobId() - if ( jobid is not None ): + if jobid is not None: # Need to check if jobids still exist before adding them # as a dependency - if ( verbose ): - log.write(str(datetime.datetime.now())+": job "+self.name()+ - " checking for existence of parent job "+jobid+"\n") + if verbose: + log.write( + str(datetime.datetime.now()) + + ": job " + + self.name() + + " checking for existence of parent job " + + jobid + + "\n" + ) log.flush() - #if ( os.system("qstat "+jobid+" 2>/dev/null 1>/dev/null")==0 ): + # if ( os.system("qstat "+jobid+" 2>/dev/null 1>/dev/null")==0 ): if job_exists(jobid): - if ( verbose ): - log.write(str(datetime.datetime.now())+": job "+self.name()+ - " parent "+jobid+" found, adding to dependency list\n") + if verbose: + log.write( + str(datetime.datetime.now()) + + ": job " + + self.name() + + " parent " + + jobid + + " found, adding to dependency list\n" + ) log.flush() - if ( dependencies is None ): + if dependencies is None: dependencies = [jobid] else: dependencies.append(jobid) else: - if ( verbose ): - log.write(str(datetime.datetime.now())+": job "+self.name()+ - " parent "+jobid+" not found, ignoring\n") + if verbose: + log.write( + str(datetime.datetime.now()) + + ": job " + + self.name() + + " parent " + + jobid + + " not found, ignoring\n" + ) log.flush() dependency = "" - if ( dependencies is not None ): + if dependencies is not None: # Filter dependencies to get the unique values; otherwise PBS gets confused. dependencies = unique(dependencies) dependencies.sort() - #dependencies.reverse() - dependency = "-W depend=%s:%s" % (deptype,":".join(dependencies)) + # dependencies.reverse() + dependency = "-W depend=%s:%s" % (deptype, ":".join(dependencies)) vars = "" - if ( self.vars() is not None ): + if self.vars() is not None: vars = "-v %s" % ",".join(self.vars()) queue = "" - if ( self.queue() is not None ): - queue = "-q "+self.queue() - cmd = jobsubmit+" -N "+self.name()+" "+queue+" "+dependency+" "+vars+" "+self.script() - if ( verbose ): - log.write(str(datetime.datetime.now())+": job "+self.name()+ - " being submitted using command \""+cmd+"\"\n") + if self.queue() is not None: + queue = "-q " + self.queue() + cmd = ( + jobsubmit + + " -N " + + self.name() + + " " + + queue + + " " + + dependency + + " " + + vars + + " " + + self.script() + ) + if verbose: + log.write( + str(datetime.datetime.now()) + + ": job " + + self.name() + + ' being submitted using command "' + + cmd + + '"\n' + ) log.flush() - p = subprocess.Popen(cmd, shell=True, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - close_fds=True) + p = subprocess.Popen( + cmd, + shell=True, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + close_fds=True, + ) pin = p.stdin perr = p.stderr pout = p.stdout @@ -727,56 +899,68 @@ class JobNode(object): jobid = "" try: stdout = pout.readlines() - if ( len(stdout)>0 ): + if len(stdout) > 0: jobid = stdout[0] - if ( jobid[-1]=="\n" ): + if jobid[-1] == "\n": jobid = jobid[:-1] for line in perr.readlines(): - log.write(str(datetime.datetime.now())+": "+line) + log.write(str(datetime.datetime.now()) + ": " + line) log.flush() pout.close() - except IndexError, err: - if(verbose): - sys.stderr.write(str(err) + "\n") + except IndexError as err: + if verbose: + sys.stderr.write(str(err) + "\n") perr.close() p.wait() - if ( p.returncode!=0 or jobid is None or jobid=="" ): - log.write(str(datetime.datetime.now())+": job submission failed for job "+ - self.name()+", aborting\n") + if p.returncode != 0 or jobid is None or jobid == "": + log.write( + str(datetime.datetime.now()) + + ": job submission failed for job " + + self.name() + + ", aborting\n" + ) log.flush() sys.exit(-1) self.setJobId(jobid) - log.write(str(datetime.datetime.now())+": submitted job "+self.name()+ - " as jobid "+self.jobId()+"\n") + log.write( + str(datetime.datetime.now()) + + ": submitted job " + + self.name() + + " as jobid " + + self.jobId() + + "\n" + ) log.flush() - + # if RETRY is defined or there is a POST script to run, call retry() - if ( self.retries()>0 or (self.postScript() is not None and not self.postScriptHasRun()) ): - self.retry(cmd,dag,log) - + if self.retries() > 0 or ( + self.postScript() is not None and not self.postScriptHasRun() + ): + self.retry(cmd, dag, log) + self.setSubmitted(True) # if nothing failed, do generic post-submission stuff - return self._postsubmit(dag,log,categories) - + return self._postsubmit(dag, log, categories) + def num_category(self, categories, nodes): """ - OVERVIEW: finds the number of jobs submitted or running in + OVERVIEW: finds the number of jobs submitted or running in self._category PARAMETERS:categories, dictionary of all categories nodes, dictionary of all job nodes - RETURNS: the number of jobs submitted or running in self._category + RETURNS: the number of jobs submitted or running in self._category """ num_in_category = 0 - + if self._category is not None: - #initialize regular expression + # initialize regular expression reg_ex = "" - #get user name + # get user name user_name = getpass.getuser() - #get names of job nodes in my category and build regular expression of the jobIDs + # get names of job nodes in my category and build regular expression of the jobIDs cat_siblings = categories[self._category].get_job_nodes() for x in cat_siblings: if nodes[x].jobId() is not None: @@ -784,25 +968,27 @@ class JobNode(object): reg_ex = str(nodes[x].jobId()) else: reg_ex = reg_ex + "\|" + str(nodes[x].jobId()) - cmd = "qselect -s RHQ -u "+user_name+" | egrep -c "+ reg_ex - #create subprocess to search number of jobs in category - p = subprocess.Popen(cmd, shell=True, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - close_fds=True) + cmd = "qselect -s RHQ -u " + user_name + " | egrep -c " + reg_ex + # create subprocess to search number of jobs in category + p = subprocess.Popen( + cmd, + shell=True, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + close_fds=True, + ) pin = p.stdin perr = p.stderr pout = p.stdout pin.close() - stderr = perr.readlines() perr.close() stdout = pout.readlines() pout.close() p.wait() - if len(stdout)>0: + if len(stdout) > 0: num_in_category = int(stdout[0]) - + return num_in_category @@ -811,204 +997,273 @@ class SubDAGNode(JobNode): Extension of JobNode class for "jobs" that are in fact DAGs themselves """ - def __init__(self,name,script): - super(SubDAGNode,self).__init__(name,script) - def submit(self,dag,log,categories): + def __init__(self, name, script): + super(SubDAGNode, self).__init__(name, script) + + def submit(self, dag, log, categories): """ OVERVIEW: Submits job for self with dependencies - + OUTCOMES: 1) if subDAGis executed successfully, postsubmission script is run and child nodes are submitted using self._postsubmit 2) if job submission fails message is printed to log and program exits - + MODIFIES: self._submitted self._jobid RETURNS: self._postsubmit(dag,log,categories) """ - if ( self.isSubmitted() ): + if self.isSubmitted(): return self.isSubmitted() - elif ( self.isDone() ): + elif self.isDone(): return self.isDone() - + # do generic pre-submission stuff - success = self._presubmit(dag,log,categories) - if ( not success ): + success = self._presubmit(dag, log, categories) + if not success: return success - # "submit job" by running running sys.argv[0] synchronously + # "submit job" by running running sys.argv[0] synchronously # on the script (actually DAG) file - if ( not self.isSubmitted() ): - args = [sys.argv[0],"-no_fork"] + if not self.isSubmitted(): + args = [sys.argv[0], "-no_fork"] for arg in sys.argv[1:-1]: - if ( arg in ["-force","-no_submit","-v","-verbose"] ): + if arg in ["-force", "-no_submit", "-v", "-verbose"]: args.append(arg) args.append(self.script()) cmd = " ".join(args) - log.write(str(datetime.datetime.now())+": executing subDAG "+self.name()+"\n") - if ( verbose ): - log.write(str(datetime.datetime.now())+": \""+cmd+"\"\n") + log.write( + str(datetime.datetime.now()) + + ": executing subDAG " + + self.name() + + "\n" + ) + if verbose: + log.write(str(datetime.datetime.now()) + ': "' + cmd + '"\n') exitcode = os.system(cmd) self.setJobId(None) - self.setSubmitted(True) + self.setSubmitted(True) self.setDone(True) self.setExitCode(exitcode) - if ( self.exitCode()==0 and verbose ): - log.write(str(datetime.datetime.now())+": subDAG "+self.name()+" has completed\n") + if self.exitCode() == 0 and verbose: + log.write( + str(datetime.datetime.now()) + + ": subDAG " + + self.name() + + " has completed\n" + ) log.flush() - elif ( verbose ): - log.write(str(datetime.datetime.now())+": subDAG "+self.name()+" has failed\n") + elif verbose: + log.write( + str(datetime.datetime.now()) + + ": subDAG " + + self.name() + + " has failed\n" + ) log.flush() # if nothing failed, do generic post-submission stuff - return self._postsubmit(dag,log, categories) + return self._postsubmit(dag, log, categories) - def jobStatus(self,log): + def jobStatus(self, log): """ OVERVIEW: returns the status of the job node. Writes long messages to log if verbose. - + OUTCOMES: returns normally if job is completed, not found, or in progress. exits program if job has failed - + RETURNS: SUCCESS = 0 job has completed successfully FAIL = -1 job has failed NOT_FOUND = 1 job is not found, status of job unknown - IN_PROGRESS = 2 job is currently in queue + IN_PROGRESS = 2 job is currently in queue """ - if ( not self.isSubmitted() ): + if not self.isSubmitted(): return NOT_FOUND - elif ( self.isSubmitted() and not self.isDone() ): + elif self.isSubmitted() and not self.isDone(): return IN_PROGRESS - elif ( self.exitCode()!=0 or not self.isHealthy() ): + elif self.exitCode() != 0 or not self.isHealthy(): return FAIL else: return SUCCESS - + class DataJobNode(JobNode): """ Extension of JobNode class for jobs that need data movers """ - def __init__(self,name,script): - super(DataJobNode,self).__init__(name,script) + + def __init__(self, name, script): + super(DataJobNode, self).__init__(name, script) self._format = None self._mover = None - def setFormat(self,format): + def setFormat(self, format): self._format = format def format(self): return self._format - def setMover(self,mover): + def setMover(self, mover): self._mover = mover def mover(self): - return + return - def submit(self,dag,log,categories): + def submit(self, dag, log, categories): """ OVERVIEW: Submits job for self with dependencies - + OUTCOMES: 1) if job is submitted successfully postsubmission script is run and child nodes are submitted using self._postsubmit 2) if job submission fails message is printed to log and program exits - + MODIFIES: self._submitted self._jobid RETURNS: self._postsubmit(dag,log,categories) """ - - if ( self.isSubmitted() ): + + if self.isSubmitted(): return self.isSubmitted() - elif ( self.isDone() ): + elif self.isDone(): return self.isDone() - + # do generic pre-submission stuff - success = self._presubmit(dag,log,categories) - if ( not success ): + success = self._presubmit(dag, log, categories) + if not success: return success # do actual job submission - if ( not self.isSubmitted() ): - if ( not self.isDone() ): - #check number of jobs in category and wait with user specified wait time + if not self.isSubmitted(): + if not self.isDone(): + # check number of jobs in category and wait with user specified wait time if self.category() is not None: - while self.num_category(categories, dag.getNodes()) >= categories[self.category()].get_max(): + while ( + self.num_category(categories, dag.getNodes()) + >= categories[self.category()].get_max() + ): dag.writeRescueDAG(log) - log.write(str(datetime.datetime.now())+": waiting "+str(maxJobWait)+ - " sec for CATEGORY "+self.category()+"\n") + log.write( + str(datetime.datetime.now()) + + ": waiting " + + str(maxJobWait) + + " sec for CATEGORY " + + self.category() + + "\n" + ) log.flush() - time.sleep(maxJobWait) + time.sleep(maxJobWait) dependencies = None parents = self.parents() - if ( parents is not None and len(parents)>0 ): + if parents is not None and len(parents) > 0: for parent in self.parents(): jobid = dag.getNode(parent).jobId() - if ( jobid is not None ): + if jobid is not None: # Need to check if jobids still exist before adding them # as a dependency - if ( verbose ): - log.write(str(datetime.datetime.now())+": job "+self.name()+ - " checking for existence of parent job "+jobid+"\n") + if verbose: + log.write( + str(datetime.datetime.now()) + + ": job " + + self.name() + + " checking for existence of parent job " + + jobid + + "\n" + ) log.flush() if job_exists(jobid): - if ( verbose ): - log.write(str(datetime.datetime.now())+": job "+self.name()+ - " parent "+jobid+" found, adding to dependency list\n") + if verbose: + log.write( + str(datetime.datetime.now()) + + ": job " + + self.name() + + " parent " + + jobid + + " found, adding to dependency list\n" + ) log.flush() - if ( dependencies is None ): + if dependencies is None: dependencies = [jobid] else: dependencies.append(jobid) else: - if ( verbose ): - log.write(str(datetime.datetime.now())+": job "+self.name()+ - " parent "+jobid+" not found, ignoring\n") + if verbose: + log.write( + str(datetime.datetime.now()) + + ": job " + + self.name() + + " parent " + + jobid + + " not found, ignoring\n" + ) log.flush() dependency = "" - if ( dependencies is not None ): + if dependencies is not None: # Filter dependencies to get the unique values; otherwise PBS gets confused. dependencies = unique(dependencies) dependencies.sort() - #dependencies.reverse() - dependency = "-D "+dependencies[0] - if ( len(dependencies)>1 ): + # dependencies.reverse() + dependency = "-D " + dependencies[0] + if len(dependencies) > 1: for dep in dependencies[1:]: dependency += ":" + dep vars = "" - if ( self.vars() is not None ): - vars = "-v "+self.vars()[0] - if ( len(self.vars())>1 ): + if self.vars() is not None: + vars = "-v " + self.vars()[0] + if len(self.vars()) > 1: for var in self.vars()[1:]: - vars = vars+","+var + vars = vars + "," + var queue = "" - if ( self.queue() is not None ): - queue = "-q "+self.queue() + if self.queue() is not None: + queue = "-q " + self.queue() format = "" - if ( self.format() is not None ): - format = "-F "+self.format() + if self.format() is not None: + format = "-F " + self.format() mover = "" - if ( self.mover() is not None ): - queue = "-M "+self.mover() - cmd = (datasubmit+" -j oe -N "+self.name()+" "+queue+" "+dependency+" "+ - " "+format+" "+mover+" "+vars+" "+self.script()) - if ( verbose ): - log.write(str(datetime.datetime.now())+": job "+self.name()+ - " being submitted using command \""+cmd+"\"\n") + if self.mover() is not None: + queue = "-M " + self.mover() + cmd = ( + datasubmit + + " -j oe -N " + + self.name() + + " " + + queue + + " " + + dependency + + " " + + " " + + format + + " " + + mover + + " " + + vars + + " " + + self.script() + ) + if verbose: + log.write( + str(datetime.datetime.now()) + + ": job " + + self.name() + + ' being submitted using command "' + + cmd + + '"\n' + ) log.flush() - p = subprocess.Popen(cmd, shell=True, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - close_fds=True) + p = subprocess.Popen( + cmd, + shell=True, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + close_fds=True, + ) pin = p.stdin perr = p.stderr pout = p.stdout @@ -1016,50 +1271,62 @@ class DataJobNode(JobNode): jobid = "" try: stdout = pout.readlines() - if ( len(stdout)>0 ): + if len(stdout) > 0: jobid = stdout[0] - if ( jobid[-1]=="\n" ): + if jobid[-1] == "\n": jobid = jobid[:-1] stderr = perr.readlines() for line in stderr: - log.write(str(datetime.datetime.now())+": "+line) + log.write(str(datetime.datetime.now()) + ": " + line) log.flush() - except IndexError, err: - if(verbose): - sys.stderr.write(str(err) + "\n") + except IndexError as err: + if verbose: + sys.stderr.write(str(err) + "\n") pout.close() perr.close() p.wait() - if ( p.returncode!=0 or jobid is None or jobid=="" ): - log.write(str(datetime.datetime.now())+": job submission failed for job "+ - self.name()+", aborting\n") + if p.returncode != 0 or jobid is None or jobid == "": + log.write( + str(datetime.datetime.now()) + + ": job submission failed for job " + + self.name() + + ", aborting\n" + ) log.flush() sys.exit(-1) self.setJobId(jobid) - log.write(str(datetime.datetime.now())+": submitted job "+self.name()+ - " as jobid "+self.jobId()+"\n") + log.write( + str(datetime.datetime.now()) + + ": submitted job " + + self.name() + + " as jobid " + + self.jobId() + + "\n" + ) log.flush() - # if RETRY is defined or there is a POST script, call retry() - if ( self.retries()>0 or (self.postScript() is not None and not self.postScriptHasRun()) ): - self.retry(cmd,dag,log) - + if self.retries() > 0 or ( + self.postScript() is not None and not self.postScriptHasRun() + ): + self.retry(cmd, dag, log) + self.setSubmitted(True) # if nothing failed, do generic post-submission stuff - return self._postsubmit(dag,log, categories) + return self._postsubmit(dag, log, categories) class DAG(object): """ The directed acyclic graph of JobNodes """ - def __init__(self,file=None): + + def __init__(self, file=None): self._nodes = dict() self._filename = file - def setFileName(self,file): + def setFileName(self, file): self._filename = file def fileName(self): @@ -1068,12 +1335,12 @@ class DAG(object): def getNodes(self): return self._nodes - def addNode(self,node): + def addNode(self, node): nodename = node.name() self._nodes[nodename] = node - def getNode(self,name): - if ( name in self._nodes ): + def getNode(self, name): + if name in self._nodes: return self._nodes[name] else: return None @@ -1085,8 +1352,8 @@ class DAG(object): roots = None for name in self._nodes.keys(): node = self.getNode(name) - if ( node.parents() is None ): - if ( roots is None ): + if node.parents() is None: + if roots is None: roots = [name] else: roots.append(name) @@ -1096,37 +1363,44 @@ class DAG(object): root = self.getRootNodes() return True - def _writeDotSubtree(self,rootnode,fd,iswritten,forceoutput): + def _writeDotSubtree(self, rootnode, fd, iswritten, forceoutput): rootname = rootnode.name() - if ( rootnode.children() is not None and ( rootnode.isDone() or forceoutput ) ): + if rootnode.children() is not None and (rootnode.isDone() or forceoutput): for childname in rootnode.children(): childnode = self.getNode(childname) - if( childnode.isDone() or forceoutput ): - fd.write("\t"+rootname+" -> "+childname+";\n") + if childnode.isDone() or forceoutput: + fd.write("\t" + rootname + " -> " + childname + ";\n") childnode = self.getNode(childname) - if ( not iswritten[childname] ): - self._writeDotSubtree(childnode,fd,iswritten,forceoutput) + if not iswritten[childname]: + self._writeDotSubtree(childnode, fd, iswritten, forceoutput) iswritten[childname] = True - - def writeDotFile(self,filename,graphname,log,forceoutput): + + def writeDotFile(self, filename, graphname, log, forceoutput): """ Writes graph to filename using the .dot format associated with graphviz """ - if ( verbose ): - log.write(str(datetime.datetime.now())+": writing DOT file "+dotFile+"\n") + if verbose: + log.write( + str(datetime.datetime.now()) + ": writing DOT file " + dotFile + "\n" + ) log.flush() iswritten = dict() for nodename in self._nodes.keys(): iswritten[nodename] = False - dotfile = open(filename,"w") - dotfile.write("digraph "+graphname+" {\n") + dotfile = open(filename, "w") + dotfile.write("digraph " + graphname + " {\n") for root in self.getRootNodes(): rootnode = self.getNode(root) - self._writeDotSubtree(rootnode,dotfile,iswritten,forceoutput) + self._writeDotSubtree(rootnode, dotfile, iswritten, forceoutput) dotfile.write("}\n") dotfile.close() - if ( verbose ): - log.write(str(datetime.datetime.now())+": done writing DOT file "+dotFile+"\n") + if verbose: + log.write( + str(datetime.datetime.now()) + + ": done writing DOT file " + + dotFile + + "\n" + ) log.flush() def writeRescueDAG(self, log): @@ -1134,119 +1408,165 @@ class DAG(object): Rewrites .rescue which provides information about the current stat of the DAG """ - ## After all jobs are submitted, check if self.Done() is true. - ## If so, write "DONE" to the file. - if ( verbose ): - log.write(str(datetime.datetime.now())+": writing rescue DAG\n") + # After all jobs are submitted, check if self.Done() is true. + # If so, write "DONE" to the file. + if verbose: + log.write(str(datetime.datetime.now()) + ": writing rescue DAG\n") log.flush() file = self.fileName() - if ( file=="-" or file=="--" ): - raise RuntimeException,"Cannot reread stdin to create rescue DAG" - if ( os.path.exists(file+".rescue") and not os.path.exists(file+".rescue.lck") and - os.stat(file).st_mtimeos.stat(file+".rescue").st_mtime ): - log.write(str(datetime.datetime.now())+": ignoring "+file+ - ".rescue because it is older than "+file+"\n") + if os.path.exists(file + ".rescue") and os.path.exists( + file + ".rescue.lck" + ): + log.write( + str(datetime.datetime.now()) + + ": ignoring " + + file + + ".rescue due to possible corruption -- " + + file + + ".rescue.lck present\n" + ) + elif ( + os.path.exists(file + ".rescue") + and os.stat(file).st_mtime > os.stat(file + ".rescue").st_mtime + ): + log.write( + str(datetime.datetime.now()) + + ": ignoring " + + file + + ".rescue because it is older than " + + file + + "\n" + ) log.flush() - oldRescueDAG = open(file, 'r') + oldRescueDAG = open(file, "r") buffer = oldRescueDAG.readlines() oldRescueDAG.close() - newRescueDAG = open(file+".rescue", 'w') + newRescueDAG = open(file + ".rescue", "w") # create "lock" file - open(file+".rescue.lck", 'w').close() + open(file + ".rescue.lck", "w").close() for line in buffer: - if ( ( line.startswith("JOB") or line.startswith('DATA') or line.startswith("SUBDAG") ) and - ( 'DONE' not in line ) ): + if ( + line.startswith("JOB") + or line.startswith("DATA") + or line.startswith("SUBDAG") + ) and ("DONE" not in line): try: nodename = None - if ( line.startswith("JOB") or line.startswith('DATA') ): + if line.startswith("JOB") or line.startswith("DATA"): nodename = line.split()[1] - elif ( line.startswith("SUBDAG") ): + elif line.startswith("SUBDAG"): nodename = line.split()[2] node = self.getNode(nodename) - if ( node is not None ): + if node is not None: cond = node.jobStatus(log) - if ( not node.isHealthy() ): - if ( verbose ): - log.write(str(datetime.datetime.now())+": "+node.name()+" is unhealthy\n") + if not node.isHealthy(): + if verbose: + log.write( + str(datetime.datetime.now()) + + ": " + + node.name() + + " is unhealthy\n" + ) log.flush() newRescueDAG.write(line) - elif ( cond==NOT_FOUND ): + elif cond == NOT_FOUND: newRescueDAG.write(line) - elif ( cond==FAIL ): + elif cond == FAIL: node.childrenAreSick(self) newRescueDAG.write(line) - elif ( cond==IN_PROGRESS ): + elif cond == IN_PROGRESS: newRescueDAG.write(line) - elif ( node.isDone() ): - if ( verbose ): - log.write(str(datetime.datetime.now())+": "+node.name()+" is done\n") + elif node.isDone(): + if verbose: + log.write( + str(datetime.datetime.now()) + + ": " + + node.name() + + " is done\n" + ) log.flush() - if(line[-2].isspace()): + if line[-2].isspace(): newRescueDAG.write(line[:-1] + "DONE\n") else: newRescueDAG.write(line[:-1] + " DONE\n") else: - log.write(str(datetime.datetime.now())+": node "+nodename+ - " is None while writing rescue DAG\n") + log.write( + str(datetime.datetime.now()) + + ": node " + + nodename + + " is None while writing rescue DAG\n" + ) log.flush() - except AttributeError, err: - log.write(str(datetime.datetime.now())+": "+str(err)+'\n') + except AttributeError as err: + log.write(str(datetime.datetime.now()) + ": " + str(err) + "\n") log.flush() else: newRescueDAG.write(line) # destroy "lock" file - if ( os.path.exists(file+".rescue.lck") ): - os.remove(file+".rescue.lck") - newRescueDAG.close() - if ( verbose ): - log.write(str(datetime.datetime.now())+": done writing rescue DAG\n") + if os.path.exists(file + ".rescue.lck"): + os.remove(file + ".rescue.lck") + newRescueDAG.close() + if verbose: + log.write(str(datetime.datetime.now()) + ": done writing rescue DAG\n") log.flush() def _subtreeIsComplete(self, rootnode, log): done = False - if ( rootnode is None ): + if rootnode is None: done = True else: rootstatus = rootnode.jobStatus(log) - if ( rootstatus in [FAIL,NOT_FOUND] ): + if rootstatus in [FAIL, NOT_FOUND]: done = True - elif ( rootstatus in [IN_PROGRESS] ): + elif rootstatus in [IN_PROGRESS]: done = False - elif ( not rootnode.isHealthy() ): + elif not rootnode.isHealthy(): done = True else: done = rootnode.isDone() - if ( done and rootnode.isHealthy() and rootnode.children() is not None ): + if done and rootnode.isHealthy() and rootnode.children() is not None: for childname in rootnode.children(): childnode = self.getNode(childname) - if ( done ): - done = self._subtreeIsComplete(childnode,log) + if done: + done = self._subtreeIsComplete(childnode, log) return done def isComplete(self, log): finish = True for root in self.getRootNodes(): rootnode = self.getNode(root) - if ( finish ): - finish = self._subtreeIsComplete(rootnode,log) - if ( verbose ): - log.write(str(datetime.datetime.now())+": DAG completed == "+str(finish)+"\n") + if finish: + finish = self._subtreeIsComplete(rootnode, log) + if verbose: + log.write( + str(datetime.datetime.now()) + + ": DAG completed == " + + str(finish) + + "\n" + ) log.flush() return finish - + + def usage(): """ Prints usage to the screen """ sys.stderr.write("Usage: dagsub [options] file [file]\n") - sys.stderr.write("Options:\n\t-help\n\t-force\n\t-no_fork\n\t-no_submit\n\t-verbose\n\t-log "+ - "logfile\n\t-maxidle NumberOfJobs\n\t-maxjobs NumberOfJobs\n\t-maxjobwait TimeToWait\n") + sys.stderr.write( + "Options:\n\t-help\n\t-force\n\t-no_fork\n\t-no_submit\n\t-verbose\n\t-log " + + "logfile\n\t-maxidle NumberOfJobs\n\t-maxjobs NumberOfJobs\n\t-maxjobwait TimeToWait\n" + ) + verbose = False doFork = True @@ -1260,7 +1580,8 @@ timeout = 30 jobsubmit = "qsub" datasubmit = "dmsub" maxJobWait = 30 - + + def main(): """ main() does the following: @@ -1281,7 +1602,7 @@ def main(): global timeout global jobsubmit global maxJobWait - + try: # The convention is to use single dash for short command (i.e. -h), and double dashes # (i.e. --no_submit) for long command. In condor submit dag, user uses single dash @@ -1290,167 +1611,178 @@ def main(): cmdline = sys.argv[1:] pcmd = [] for each in cmdline: - if ( each.startswith("-") and not each.startswith("--") ): - pcmd.append('-' + each) + if each.startswith("-") and not each.startswith("--"): + pcmd.append("-" + each) else: pcmd.append(each) # Command line argument processing - opts, args = getopt.getopt(pcmd,"hv", - ["allowversionmismatch", - "DumpRescue", - "force", - "help", - "no_fork", - "no_recurse", - "no_submit", - "update_submit", - "usedagdir", - "verbose", - "append=", - "autorescue=", - "config=", - "dorescuefrom", - "insert_sub_file", - "log=", - "maxidle=", - "maxjobs=", - "notification=", - "oldrescue", - "outfile_dir=", - "maxjobwait=", - "timeout="]) - except getopt.GetoptError, err: - sys.stderr.write(str(err)+"\n") + opts, args = getopt.getopt( + pcmd, + "hv", + [ + "allowversionmismatch", + "DumpRescue", + "force", + "help", + "no_fork", + "no_recurse", + "no_submit", + "update_submit", + "usedagdir", + "verbose", + "append=", + "autorescue=", + "config=", + "dorescuefrom", + "insert_sub_file", + "log=", + "maxidle=", + "maxjobs=", + "notification=", + "oldrescue", + "outfile_dir=", + "maxjobwait=", + "timeout=", + ], + ) + except getopt.GetoptError as err: + sys.stderr.write(str(err) + "\n") usage() sys.exit(1) try: for opt, value in opts: - if ( opt=="--force" ): + if opt == "--force": overwrite = True - if ( opt in ("-h", "--help") ): + if opt in ("-h", "--help"): usage() sys.exit(0) - if ( opt=="--DumpRescue" ): + if opt == "--DumpRescue": dumpRescueAndExit = False - if ( opt=="--log" ): + if opt == "--log": logFile = value - if (opt=="--maxidle"): + if opt == "--maxidle": maxIdle = value - if (opt=="--maxjobs"): + if opt == "--maxjobs": maxJobs = value - if (opt=="--notification"): + if opt == "--notification": notification = value - if ( opt=="--no_fork" ): + if opt == "--no_fork": doFork = False - if ( opt=="--no_submit" ): + if opt == "--no_submit": doSubmit = False - if ( opt in ("-v", "--verbose") ): + if opt in ("-v", "--verbose"): verbose = True - if ( opt=="--maxjobwait" ): + if opt == "--maxjobwait": try: maxJobWait = int(value) - except: - sys.stderr.warn("Unable to interpret maxjobwait value \"%s\", ignoring.\n" % str(value)) - if ( opt=="--timeout" ): + except Exception: + sys.stderr.warn( + 'Unable to interpret maxjobwait value "%s", ignoring.\n' + % str(value) + ) + if opt == "--timeout": try: timeout = int(value) - except: - sys.stderr.warn("Unable to interpret timeout value \"%s\", ignoring.\n" % str(value)) - except NameError, err: - sys.stderr.write(str(err)+"\n") + except Exception: + sys.stderr.warn( + 'Unable to interpret timeout value "%s", ignoring.\n' + % str(value) + ) + except NameError as err: + sys.stderr.write(str(err) + "\n") usage() sys.exit(0) - + if args == []: usage() sys.exit(0) - + for file in args: # We fork off a separate process for each DAG and leave them running # in the background. Otherwise, it's virtually impossible to do # things like retries without running an additional daemon/service. rescue_file = file + ".rescue" - if (os.path.exists(rescue_file)): - print "Rescue Dag found ..." - print "Please do ONE of the following: " - print "1. Remove "+ rescue_file + " and submit using "+ file - print "\t\t OR" - print "2. Submit using " + rescue_file - print "Exiting" + if os.path.exists(rescue_file): + print("Rescue Dag found ...") + print("Please do ONE of the following: ") + print("1. Remove " + rescue_file + " and submit using " + file) + print("\t\t OR") + print("2. Submit using " + rescue_file) + print("Exiting") sys.exit(1) - - if ( doFork ): + + if doFork: child = os.fork() else: child = 0 dag = None categories = {} - if( child==0 ): + if child == 0: pid = os.getpid() dag = DAG() - if ( not os.path.exists(file) ): - sys.stderr.write(file+" does not exist, ignoring\n") - elif ( not os.path.isfile(file) ): - sys.stderr.write(file+" is not a file, ignoring\n") + if not os.path.exists(file): + sys.stderr.write(file + " does not exist, ignoring\n") + elif not os.path.isfile(file): + sys.stderr.write(file + " is not a file, ignoring\n") else: dag.setFileName(file) - if ( file=="-" ): + if file == "-": fd = sys.stdin else: - fd = open(file,"r") + fd = open(file, "r") lines = fd.readlines() for line in lines: token = line.split() # parse DAG file - if ( token!=[] and token[0].find("#")!=0 ): - if ( token[0]=="DOT" ): + if token != [] and token[0].find("#") != 0: + if token[0] == "DOT": generateDotFile = True dotFile = token[1] - elif ( token[0]=="JOB" ): - node = JobNode(token[1],token[2]) + elif token[0] == "JOB": + node = JobNode(token[1], token[2]) try: - if(token[3] == "DONE"): + if token[3] == "DONE": node.setDone(True) - elif(token[3] == "DIR"): + elif token[3] == "DIR": node.setScript(" -d " + token[4]) - if(token[5] == "DONE"): + if token[5] == "DONE": node.setDone(True) - except IndexError, err: + except IndexError: pass dag.addNode(node) - elif ( token[0]=="DATA" ): - node = DataJobNode(token[1],token[2]) + elif token[0] == "DATA": + node = DataJobNode(token[1], token[2]) try: - for i in range(3,len(token),2): - if ( token[i]=="DONE" ): + for i in range(3, len(token), 2): + if token[i] == "DONE": node.setDone(True) - elif ( token[i]=="FORMAT" ): - node.setFormat(token[i+1]) - elif ( token[i]=="MOVER" ): - node.setMover(token[i+1]) - elif ( token[i]=="QUEUE" ): - node.setQueue(token[i+1]) - except IndexError, err: + elif token[i] == "FORMAT": + node.setFormat(token[i + 1]) + elif token[i] == "MOVER": + node.setMover(token[i + 1]) + elif token[i] == "QUEUE": + node.setQueue(token[i + 1]) + except IndexError: pass dag.addNode(node) - elif ( token[0]=="SUBDAG" ): - node = SubDAGNode(token[2],token[3]) + elif token[0] == "SUBDAG": + node = SubDAGNode(token[2], token[3]) try: - if ( len(token)>4 and token[4]=="DONE" ): + if len(token) > 4 and token[4] == "DONE": node.setDone(True) - except IndexError, err: + except IndexError: pass dag.addNode(node) - elif ( token[0]=="PARENT" ): + elif token[0] == "PARENT": foundchild = False parents = [token[1]] children = [] for name in token[2:]: - if ( name=="CHILD" ): + if name == "CHILD": foundchild = True - elif ( foundchild ): + elif foundchild: children.append(name) else: parents.append(name) @@ -1460,103 +1792,128 @@ def main(): parentnode.addChild(child) childnode = dag.getNode(child) childnode.addParent(parent) - elif ( token[0]=="RETRY" ): + elif token[0] == "RETRY": node = dag.getNode(token[1]) node.setRetries(token[2]) - if ( len(token)==5 ): + if len(token) == 5: node.setNoRetryStatus(int(token[4])) - elif ( token[0]=="SCRIPT" ): + elif token[0] == "SCRIPT": node = dag.getNode(token[2]) script = token[3:] - if ( token[1]=="PRE" ): + if token[1] == "PRE": node.setPreScript(script) - elif ( token[1]=="POST" ): + elif token[1] == "POST": node.setPostScript(script) - elif ( token[0]=="VARS" ): + elif token[0] == "VARS": node = dag.getNode(token[1]) for var in token[2:]: node.addVar(var) - elif ( token[0]=="PRIORITY" ): + elif token[0] == "PRIORITY": node = dag.getNode(token[1]) - if ( node is not None ): + if node is not None: node.setPriority(token[2]) else: - sys.stderr.write("%s: Cannot set priority on undefined node %s" % (file,token[1])) - elif ( token[0]=="ABORT-DAG-ON" ): + sys.stderr.write( + "%s: Cannot set priority on undefined node %s" + % (file, token[1]) + ) + elif token[0] == "ABORT-DAG-ON": node = dag.getNode(token[1]) - if ( len(token)==3 ): + if len(token) == 3: node.addAbortStatus(int(token[2])) - elif ( len(token)==5 ): - node.addAbortStatus(int(token[2]),int(token[4])) - elif ( token[0]=="CATEGORY" ): + elif len(token) == 5: + node.addAbortStatus(int(token[2]), int(token[4])) + elif token[0] == "CATEGORY": node = dag.getNode(token[1]) if not token[2] in categories.keys(): categories[token[2]] = Category(token[2]) node.set_category(token[2]) categories[token[2]].add_job_node(token[1]) # sys.stderr.write(file+": CATEGORY not supported, ignoring\n") - elif ( token[0]=="MAXJOBS" ): + elif token[0] == "MAXJOBS": if not token[1] in categories.keys(): - sys.stderr.write(file+": CATEGORY = "+token[1]+" not defined, ignoring MAXJOBS\n") + sys.stderr.write( + file + + ": CATEGORY = " + + token[1] + + " not defined, ignoring MAXJOBS\n" + ) else: categories[token[1]].set_max(int(token[2])) - elif ( token[0]=="CONFIG" ): - sys.stderr.write(file+": CONFIG not supported, ignoring\n") + elif token[0] == "CONFIG": + sys.stderr.write( + file + ": CONFIG not supported, ignoring\n" + ) else: - sys.stderr.write(file+": unknown keyword "+token[0]+", abort\n") + sys.stderr.write( + file + ": unknown keyword " + token[0] + ", abort\n" + ) sys.exit(1) fd.close() base = file.split(".")[0] - + log = None - if ( logFile is None ): - log = open(base+".log","w") - elif ( logFile=="-" or logFile=="--" ): + if logFile is None: + log = open(base + ".log", "w") + elif logFile == "-" or logFile == "--": log = sys.stdout else: - log = open(logFile,"w") - - if ( verbose ): - log.write(str(datetime.datetime.now())+": "+file+" being processed by pid "+str(pid)+"\n") + log = open(logFile, "w") + + if verbose: + log.write( + str(datetime.datetime.now()) + + ": " + + file + + " being processed by pid " + + str(pid) + + "\n" + ) log.flush() - if ( dag.getRootNodes() is not None ): + if dag.getRootNodes() is not None: # Not sure how to handle log name, if multiple input files are present? - # Current implementation: + # Current implementation: # Only allow the custom logname, if single input file submitted. - if ( dumpRescueAndExit ): + if dumpRescueAndExit: dag.writeRescueDAG(log) - dag.writeDotFile(dotFile,base,log,True) - elif ( doSubmit ): - if ( dotFile is not None ): - dag.writeDotFile(dotFile,base,log,False) + dag.writeDotFile(dotFile, base, log, True) + elif doSubmit: + if dotFile is not None: + dag.writeDotFile(dotFile, base, log, False) finish = False for root in dag.getRootNodes(): rootnode = dag.getNode(root) - rootnode.submit(dag,log,categories) + rootnode.submit(dag, log, categories) finish = dag.writeRescueDAG(log) - if ( dotFile is not None ): - dag.writeDotFile(dotFile,base,log,False) + if dotFile is not None: + dag.writeDotFile(dotFile, base, log, False) # Monitor the DAG and write out rescue DAGs periodically - while ( not dag.isComplete(log) ): + while not dag.isComplete(log): time.sleep(timeout) dag.writeRescueDAG(log) - if ( dotFile is not None ): - dag.writeDotFile(dotFile,base,log,False) - + if dotFile is not None: + dag.writeDotFile(dotFile, base, log, False) + # last-ditch effort to finish the .DOT file up before exiting, if specified - if ( dotFile is not None ): - dag.writeDotFile(dotFile,base,log,True) + if dotFile is not None: + dag.writeDotFile(dotFile, base, log, True) else: usage() - - log.write(str(datetime.datetime.now())+": processing of "+file+" complete.\n") + + log.write( + str(datetime.datetime.now()) + + ": processing of " + + file + + " complete.\n" + ) log.flush() sys.exit(0) # if we get here, we're the parent process, so exit sys.exit(0) + if __name__ == "__main__": main() diff --git a/bin/dmsub b/bin/dmsub index 313db7f..fdd995b 100755 --- a/bin/dmsub +++ b/bin/dmsub @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 # # dmsub: Submit a data movement job using a variety of methods. # Copyright 2009,2014 University of Tennessee @@ -10,7 +10,6 @@ # $Date$ import getopt -import datetime import os import shutil import subprocess @@ -24,9 +23,10 @@ blksize = 1048576 streams = 1 stripesize = 268787456 + class TransferList(object): - def __init__(self,destlist=None,options=None): - if ( destlist is not None ): + def __init__(self, destlist=None, options=None): + if destlist is not None: self._destlist = destlist else: self._destlist = None @@ -44,126 +44,136 @@ class TransferList(object): self._destsubj = None self._cleanup = False self._retries = 0 - if ( options is not None ): + if options is not None: self.setOptions(options) - def setOptions(self,options): - if ( isdict(options) ): + def setOptions(self, options): + if isinstance(options, dict): for option in options.keys(): - self.setOption(option,options[option]) + self.setOption(option, options[option]) - def setOption(self,keyword,value): - if ( keyword=="x509proxy" ): + def setOption(self, keyword, value): + if keyword == "x509proxy": self._x509proxy = value - elif ( keyword=="alt_protocols" ): + elif keyword == "alt_protocols": self._altproto = value - elif ( keyword=="binary" ): - if ( value=="false" or value=="False" ): + elif keyword == "binary": + if value == "false" or value == "False": value = False else: value = True self._binary = value - elif ( keyword=="block_size" ): + elif keyword == "block_size": self._blksize = value - elif ( keyword=="stripe_size" ): + elif keyword == "stripe_size": self._stripsize = value - elif ( keyword=="tcp_buffer" ): + elif keyword == "tcp_buffer": self._tcpbuf = value - elif ( keyword=="no_tpt" ): - if ( value=="false" or value=="False" ): + elif keyword == "no_tpt": + if value == "false" or value == "False": value = False else: value = True self._allow3rdparty = not value - elif ( keyword=="dcau" ): - if ( value=="false" or value=="False" ): + elif keyword == "dcau": + if value == "false" or value == "False": value = False else: value = True self._dcau = value - elif ( keyword=="streams" ): + elif keyword == "streams": self._nstreams = value - elif ( keyword=="concurrency" ): + elif keyword == "concurrency": self._nsimfiles = value - elif ( keyword=="src_subject" ): + elif keyword == "src_subject": self._srcsubj = value - elif ( keyword=="dest_subject" ): + elif keyword == "dest_subject": self._destsubj = value - elif ( keyword=="cleanup" ): - if ( value=="false" or value=="False" ): + elif keyword == "cleanup": + if value == "false" or value == "False": value = False else: value = True self._cleanup = value - elif ( keyword=="retries" ): + elif keyword == "retries": self._retries = value - def getOption(self,keyword): - if ( keyword=="x509proxy" ): + def getOption(self, keyword): + if keyword == "x509proxy": return self._x509proxy - elif ( keyword=="alt_protocols" ): + elif keyword == "alt_protocols": return self._altproto - elif ( keyword=="binary" ): + elif keyword == "binary": return self._binary - elif ( keyword=="block_size" ): + elif keyword == "block_size": return self._blksize - elif ( keyword=="stripe_size" ): + elif keyword == "stripe_size": return self._stripesize - elif ( keyword=="tcp_buffer" ): + elif keyword == "tcp_buffer": return self._tcpbuf - elif ( keyword=="no_tpt" ): + elif keyword == "no_tpt": return self._allow3rdparty - elif ( keyword=="dcau" ): + elif keyword == "dcau": return self._dcau - elif ( keyword=="streams" ): + elif keyword == "streams": return self._streams - elif ( keyword=="concurrency" ): + elif keyword == "concurrency": return self._nsimfiles - elif ( keyword=="src_subject" ): + elif keyword == "src_subject": return self._srcsubj - elif ( keyword=="dest_subject" ): + elif keyword == "dest_subject": return self._destsubj - elif ( keyword=="cleanup" ): + elif keyword == "cleanup": return self._cleanup - elif ( keyword=="retries" ): + elif keyword == "retries": return self._retries else: return None - def addURLpair(self,src,dest): - if ( self._destlist is None ): + def addURLpair(self, src, dest): + if self._destlist is None: self._destlist = dict() self._destlist[src] = dest - def getSrcURL(self,dest): + def getSrcURL(self, dest): src = None - if ( self._destlist is not None and (dest in self._destlist.values()) ): - for (mysrc,mydest) in self._destlist.items(): - if ( dest==mydest ): + if self._destlist is not None and (dest in self._destlist.values()): + for mysrc, mydest in self._destlist.items(): + if dest == mydest: src = mysrc break return src - - def getDestURL(self,src): + + def getDestURL(self, src): dest = None - if ( self._destlist is not None and self._destlist.has_key(src) ): + if self._destlist is not None and src in self._destlist: dest = self._destlist[src] return dest def getURLdict(self): - if ( self._destlist is not None ): + if self._destlist is not None: return self._destlist else: return dict() def getAllSrcURLs(self): - if ( self._destlist is not None ): + if self._destlist is not None: return self._destlist.keys() else: return () + class ScheduledDataTransfer(object): - def __init__(self,filename=None,desctype="urlpair",movertype="supermover",batchtype="pbs",server=None,port=None,credcache=False): + def __init__( + self, + filename=None, + desctype="urlpair", + movertype="supermover", + batchtype="pbs", + server=None, + port=None, + credcache=False, + ): self.setName("dmsub") self.setWalltime("1:00:00") self.setQueue(None) @@ -182,98 +192,99 @@ class ScheduledDataTransfer(object): self._dep = None self._vars = None - def setFileName(self,filename): + def setFileName(self, filename): self._file = filename self._needupdate = True def getFileName(self): return self._file - def setName(self,name): + def setName(self, name): self._name = name def getName(self): return self._name - def setWalltime(self,walltime): + def setWalltime(self, walltime): self._walltime = walltime def getWalltime(self): return self._walltime - def setQueue(self,queue): + def setQueue(self, queue): self._queue = queue def getQueue(self): return self._queue - def setErrLog(self,errlog): + def setErrLog(self, errlog): self._errlog = errlog def getErrLog(self): return self._errlog - def setOutLog(self,outlog): + def setOutLog(self, outlog): self._outlog = outlog def getOutLog(self): return self._outlog - def setJoinLogs(self,joinlogs): + def setJoinLogs(self, joinlogs): self._joinlogs = joinlogs def getJoinLogs(self): return self._joinlogs - def setDescriptionType(self,type): - if ( type=="urlpair" or - type=="dmover" or - type=="rft" or - type=="stork" ): + def setDescriptionType(self, type): + if type == "urlpair" or type == "dmover" or type == "rft" or type == "stork": self._desctype = type else: - raise RuntimeError("invalid description type \""+type+"\"") + raise RuntimeError('invalid description type "' + type + '"') self._needupdate = True def getDescriptionType(self): return self._desctype - def setMoverType(self,type): - if ( type=="supermover" or - type=="guc" or - type=="globus-url-copy" or - type=="dmover" or - type=="rft" or - type=="stork" ): + def setMoverType(self, type): + if ( + type == "supermover" + or type == "guc" + or type == "globus-url-copy" + or type == "dmover" + or type == "rft" + or type == "stork" + ): self._movertype = type else: - raise RuntimeError("invalid mover type \""+type+"\"") + raise RuntimeError('invalid mover type "' + type + '"') def getMoverType(self): return self._movertype - def setBatchType(self,type): - if ( type=="pbs" or - type=="torque" or - type=="dmover" or - type=="rft" or - type=="stork" or - type=="condor" ): + def setBatchType(self, type): + if ( + type == "pbs" + or type == "torque" + or type == "dmover" + or type == "rft" + or type == "stork" + or type == "condor" + ): self._batchtype = type else: - raise RuntimeError("invalid batch type \""+type+"\"") + raise RuntimeError('invalid batch type "' + type + '"') def getBatchType(self): return self._batchtype - def setDependency(self,dep): + def setDependency(self, dep): self._dep = dep def getDependency(self): return self._dep - def setVars(self,vars): - if ( self._vars is None ): + def setVars(self, vars): + if self._vars is None: self._vars = [] varlist = vars.split(",") for var in varlist: @@ -282,154 +293,158 @@ class ScheduledDataTransfer(object): def getVars(self): return self._vars - def setServer(self,server): + def setServer(self, server): self._server = server def getServer(self): return self._server - def setPort(self,Port): + def setPort(self, Port): self._Port = Port def getPort(self): return self._Port - def setCredCache(self,credcache): + def setCredCache(self, credcache): self._credcache = credcache def getCredCache(self): return self._credcache def transferList(self): - if ( not self._needupdate and self._cachedlist is not None ): - if ( debug ): + if not self._needupdate and self._cachedlist is not None: + if debug: sys.stderr.write("using cached version of transfer list\n") return self._cachedlist - if ( debug ): + if debug: sys.stderr.write("generating new version of transfer list\n") list = None try: - fd = open(self._file,'r') - if ( self._desctype=="urlpair" or self._desctype=="dmover" ): + fd = open(self._file, "r") + if self._desctype == "urlpair" or self._desctype == "dmover": list = self._readSimpleXferList(fd) - elif ( self._desctype=="rft" ): + elif self._desctype == "rft": list = self._readRFTXferList(fd) - elif ( self._desctype=="stork" ): + elif self._desctype == "stork": list = self._readStorkXferList(fd) fd.close() - except IOError,msg: - raise IOError,msg + except IOError as msg: + raise IOError(msg) self._needupdate = False self._cachedlist = list return list - def _readSimpleXferList(self,fd): + def _readSimpleXferList(self, fd): list = TransferList() lines = fd.readlines() for line in lines: - if ( not line.startswith("#") ): + if not line.startswith("#"): token = line.split() - if ( len(token)>1 ): - list.addURLpair(token[0],token[1]) + if len(token) > 1: + list.addURLpair(token[0], token[1]) return list - def _readRFTXferList(self,fd): + def _readRFTXferList(self, fd): # RFT format is basically a bunch of options in a specific order # (values only, no keywords for some unknown reason) followed by # a simple URL pair list (on separate lines, again for some unknown # reason). list = TransferList() line = fd.readline() - while ( line.startswith("#") ): + while line.startswith("#"): line = fd.readline() - list.setOption("binary",line.rstrip('\n')) + list.setOption("binary", line.rstrip("\n")) line = fd.readline() - while ( line.startswith("#") ): + while line.startswith("#"): line = fd.readline() - list.setOption("block_size",line.rstrip('\n')) + list.setOption("block_size", line.rstrip("\n")) line = fd.readline() - while ( line.startswith("#") ): + while line.startswith("#"): line = fd.readline() - list.setOption("tcp_buffer",line.rstrip('\n')) + list.setOption("tcp_buffer", line.rstrip("\n")) line = fd.readline() - while ( line.startswith("#") ): + while line.startswith("#"): line = fd.readline() - list.setOption("no_tpt",line.rstrip('\n')) + list.setOption("no_tpt", line.rstrip("\n")) line = fd.readline() - while ( line.startswith("#") ): + while line.startswith("#"): line = fd.readline() - list.setOption("streams",line.rstrip('\n')) + list.setOption("streams", line.rstrip("\n")) line = fd.readline() - while ( line.startswith("#") ): + while line.startswith("#"): line = fd.readline() - list.setOption("dcau",line.rstrip('\n')) + list.setOption("dcau", line.rstrip("\n")) line = fd.readline() - while ( line.startswith("#") ): + while line.startswith("#"): line = fd.readline() - list.setOption("concurrency",line.rstrip('\n')) + list.setOption("concurrency", line.rstrip("\n")) line = fd.readline() - while ( line.startswith("#") ): + while line.startswith("#"): line = fd.readline() - list.setOption("src_subject",line.rstrip('\n')) + list.setOption("src_subject", line.rstrip("\n")) line = fd.readline() - while ( line.startswith("#") ): + while line.startswith("#"): line = fd.readline() - list.setOption("dest_subject",line.rstrip('\n')) + list.setOption("dest_subject", line.rstrip("\n")) line = fd.readline() - while ( line.startswith("#") ): + while line.startswith("#"): line = fd.readline() - list.setOption("cleanup",line.rstrip('\n')) + list.setOption("cleanup", line.rstrip("\n")) line = fd.readline() - while ( line.startswith("#") ): + while line.startswith("#"): line = fd.readline() - list.setOption("retries",line.rstrip('\n')) - while ( True ): + list.setOption("retries", line.rstrip("\n")) + while True: line = fd.readline() - while ( line.startswith("#") ): + while line.startswith("#"): line = fd.readline() - if ( len(line)==0 ): + if len(line) == 0: break - src = line.rstrip('\n') + src = line.rstrip("\n") line = fd.readline() - while ( line.startswith("#") ): + while line.startswith("#"): line = fd.readline() - if ( len(line)==0 ): + if len(line) == 0: break - dest = line.rstrip('\n') - list.addURLpair(src,dest) + dest = line.rstrip("\n") + list.addURLpair(src, dest) return list - def _readStorkXferList(self,fd): + def _readStorkXferList(self, fd): list = TransferList() src = None dest = None lines = fd.readlines() for line in lines: - if ( not line.startswith("//") ): - token = line.split(); - if ( token[0]=="src_url" ): - src = token[2].rstrip(';').rstrip('"').lstrip('"') - elif ( token[0]=="dest_url" ): - dest = token[2].rstrip(';').rstrip('"').lstrip('"') - elif ( token[0]=="x509proxy" ): - list.setOption("x509proxy",token[2].rstrip(';').rstrip('"').lstrip('"')) - elif ( token[0]=="alt_protocols" ): - list.setOption("alt_protocols",token[2].rstrip(';').rstrip('"').lstrip('"')) - list.addURLpair(src,dest) + if not line.startswith("//"): + token = line.split() + if token[0] == "src_url": + src = token[2].rstrip(";").rstrip('"').lstrip('"') + elif token[0] == "dest_url": + dest = token[2].rstrip(";").rstrip('"').lstrip('"') + elif token[0] == "x509proxy": + list.setOption( + "x509proxy", token[2].rstrip(";").rstrip('"').lstrip('"') + ) + elif token[0] == "alt_protocols": + list.setOption( + "alt_protocols", token[2].rstrip(";").rstrip('"').lstrip('"') + ) + list.addURLpair(src, dest) return list - def writeDescription(self,filename=None,desctype="urlpair"): + def writeDescription(self, filename=None, desctype="urlpair"): list = self.transferList() - if ( filename is None ): - fd = open(self.getName()+"."+desctype) + if filename is None: + fd = open(self.getName() + "." + desctype) else: - fd = open(filename,'w') - if ( desctype=="urlpair" or desctype=="dmover" ): + fd = open(filename, "w") + if desctype == "urlpair" or desctype == "dmover": fd.write("# automatically generated by dmsub\n") fd.write("# urlpair/dmover format\n") for src in list.getAllSrcURLs(): - fd.write(src+" "+list.getDestURL(src)+"\n") - elif ( desctype=="rft" ): + fd.write(src + " " + list.getDestURL(src) + "\n") + elif desctype == "rft": # RFT format is basically a bunch of options in a specific order # (values only, no keywords for some unknown reason) followed by # a simple URL pair list (on separate lines, again for some unknown @@ -437,95 +452,110 @@ class ScheduledDataTransfer(object): fd.write("# automatically generated by dmsub\n") fd.write("# RFT format\n") fd.write("# binary?\n") - fd.write(str(list.getOption("binary"))+"\n") + fd.write(str(list.getOption("binary")) + "\n") fd.write("# block size in bytes\n") - fd.write(str(list.getOption("block_size"))+"\n") + fd.write(str(list.getOption("block_size")) + "\n") fd.write("# TCP buffer size in bytes\n") - fd.write(str(list.getOption("tcp_buffer"))+"\n") + fd.write(str(list.getOption("tcp_buffer")) + "\n") fd.write("# No 3rd-party transfers?\n") - fd.write(str(not list.getOption("no_tpt"))+"\n") + fd.write(str(not list.getOption("no_tpt")) + "\n") fd.write("# parallel streams\n") - fd.write(str(list.getOption("streams"))+"\n") + fd.write(str(list.getOption("streams")) + "\n") fd.write("# Data Channel Authentication (DCAU)\n") - fd.write(str(list.getOption("dcau"))+"\n") + fd.write(str(list.getOption("dcau")) + "\n") fd.write("# concurrency\n") - fd.write(str(list.getOption("concurrency"))+"\n") + fd.write(str(list.getOption("concurrency")) + "\n") fd.write("# grid subject of src server\n") - fd.write(str(list.getOption("src_subject"))+"\n") + fd.write(str(list.getOption("src_subject")) + "\n") fd.write("# grid subject of dest server\n") - fd.write(str(list.getOption("dest_subject"))+"\n") + fd.write(str(list.getOption("dest_subject")) + "\n") fd.write("# clean up all transfers on failure of any\n") - fd.write(str(list.getOption("cleanup"))+"\n") + fd.write(str(list.getOption("cleanup")) + "\n") fd.write("# max retries\n") - fd.write(str(list.getOption("retries"))+"\n") + fd.write(str(list.getOption("retries")) + "\n") fd.write("# src/dest URL pairs\n") for src in list.getAllSrcURLs(): - fd.write(src+"\n") - fd.write(list.getDestURL(src)+"\n") - elif ( desctype=="stork" ): + fd.write(src + "\n") + fd.write(list.getDestURL(src) + "\n") + elif desctype == "stork": fd.write("// automatically generated by dmsub\n") fd.write("// stork format\n") for src in list.getAllSrcURLs(): fd.write("[\n") fd.write(" dap_type = transfer;\n") - fd.write(" src_url = \""+str(src)+"\";\n") - fd.write(" dest_url = \""+str(list.getDestURL(src))+"\";\n") - if ( list.getOption("x509proxy")!="default" ): - fd.write(" x509proxy = \""+str(list.getOption("x509proxy"))+"\";\n") - elif ( "X509_USER_PROXY" in os.environ ): - fd.write(" x509proxy = \""+os.environ["X509_USER_PROXY"]+"\";\n") - if ( list.getOption("alt_protocols") is not None ): - fd.write(" alt_protocols = \""+str(list.getOption("alt_protocols"))+"\";\n") + fd.write(' src_url = "' + str(src) + '";\n') + fd.write(' dest_url = "' + str(list.getDestURL(src)) + '";\n') + if list.getOption("x509proxy") != "default": + fd.write( + ' x509proxy = "' + str(list.getOption("x509proxy")) + '";\n' + ) + elif "X509_USER_PROXY" in os.environ: + fd.write( + ' x509proxy = "' + os.environ["X509_USER_PROXY"] + '";\n' + ) + if list.getOption("alt_protocols") is not None: + fd.write( + ' alt_protocols = "' + + str(list.getOption("alt_protocols")) + + '";\n' + ) fd.write("]\n") else: - raise RuntimeError,"invalid description type \""+type+"\"" + raise RuntimeError('invalid description type "' + type + '"') fd.close() - def _doCredCache(self,proxy): - if ( self.getCredCache() ): + def _doCredCache(self, proxy): + if self.getCredCache(): # the following assumes that $HOME is shared with the # data mover nodes on which the job will run - cachedir = os.environ["HOME"]+"/.credcache" - if ( not os.path.exists(cachedir) ): - os.mkdir(cachedir,0700) - newcred = cachedir+"/"+str(uuid.uuid1()) - shutil.copyfile(os.environ["X509_USER_PROXY"],newcred) - os.chmod(newcred,0600) - if ( debug ): - sys.stderr.write("caching credential "+proxy+" as "+newcred+"\n") + cachedir = os.environ["HOME"] + "/.credcache" + if not os.path.exists(cachedir): + os.mkdir(cachedir, 0o700) + newcred = cachedir + "/" + str(uuid.uuid1()) + shutil.copyfile(os.environ["X509_USER_PROXY"], newcred) + os.chmod(newcred, 0o600) + if debug: + sys.stderr.write( + "caching credential " + proxy + " as " + newcred + "\n" + ) return newcred else: return proxy - - def submit(self,dosubmit): + + def submit(self, dosubmit): jobid = None - if ( self.getBatchType()=="pbs" or self.getBatchType()=="torque" ): + if self.getBatchType() == "pbs" or self.getBatchType() == "torque": jobid = self._submitPBS(dosubmit) - elif ( self.getBatchType()=="dmover" ): + elif self.getBatchType() == "dmover": jobid = self._submitDMover(dosubmit) - elif ( self.getBatchType()=="rft" ): + elif self.getBatchType() == "rft": jobid = self._submitRFT(dosubmit) - elif ( self.getBatchType()=="stork" or self.getBatchType()=="condor" ): + elif self.getBatchType() == "stork" or self.getBatchType() == "condor": jobid = self._submitStork(dosubmit) - if ( jobid is not None ): - sys.stdout.write(jobid+"\n") + if jobid is not None: + sys.stdout.write(jobid + "\n") return jobid - def _submitPBS(self,dosubmit): + def _submitPBS(self, dosubmit): jobid = None epilogue = "" - if ( self.getMoverType()=="stork" or self.getMoverType()=="rft" ): - raise RuntimeError, "mover type \""+self.getMoverType()+"\" is incompatible with PBS" - if ( debug ): - sys.stderr.write("mover="+self.getMoverType()+"\n") - if ( dosubmit ): + if self.getMoverType() == "stork" or self.getMoverType() == "rft": + raise RuntimeError( + 'mover type "' + self.getMoverType() + '" is incompatible with PBS' + ) + if debug: + sys.stderr.write("mover=" + self.getMoverType() + "\n") + if dosubmit: cmd = "qsub" - #pin,pout,perr = os.popen3(cmd) - p = subprocess.Popen(cmd, shell=True, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - close_fds=True) + # pin,pout,perr = os.popen3(cmd) + p = subprocess.Popen( + cmd, + shell=True, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + close_fds=True, + ) pin = p.stdin perr = p.stderr pout = p.stdout @@ -533,170 +563,218 @@ class ScheduledDataTransfer(object): pin = sys.stdout pout = open("/dev/null") perr = open("/dev/null") - pin.write("#PBS -N "+self.getName()+"\n") + pin.write("#PBS -N " + self.getName() + "\n") pin.write("#PBS -S /bin/bash\n") - if ( self.getErrLog() is not None ): - pin.write("#PBS -e "+self.getErrLog()+"\n") - if ( self.getOutLog() is not None ): - pin.write("#PBS -o "+self.getOutLog()+"\n") - if ( self.getJoinLogs() is not None ): - pin.write("#PBS -j "+self.getJoinLogs()+"\n") - pin.write("#PBS -l walltime="+self.getWalltime()+"\n") - if ( self.getQueue() is not None ): - pin.write("#PBS -q "+self.getQueue()+"\n") - if ( self.getDependency() is not None ): - pin.write("#PBS -W depend=afterok:"+(",afterok:".join(self.getDependency().split(":")))+"\n") - if ( hold ): + if self.getErrLog() is not None: + pin.write("#PBS -e " + self.getErrLog() + "\n") + if self.getOutLog() is not None: + pin.write("#PBS -o " + self.getOutLog() + "\n") + if self.getJoinLogs() is not None: + pin.write("#PBS -j " + self.getJoinLogs() + "\n") + pin.write("#PBS -l walltime=" + self.getWalltime() + "\n") + if self.getQueue() is not None: + pin.write("#PBS -q " + self.getQueue() + "\n") + if self.getDependency() is not None: + pin.write( + "#PBS -W depend=afterok:" + + (",afterok:".join(self.getDependency().split(":"))) + + "\n" + ) + if hold: pin.write("#PBS -h\n") - if ( self.getVars() is not None ): + if self.getVars() is not None: myvars = self.getVars() - pin.write("#PBS -v "+myvars[0]) - if ( len(myvars)>1 ): + pin.write("#PBS -v " + myvars[0]) + if len(myvars) > 1: for var in myvars[1:]: - pin.write(","+var) + pin.write("," + var) pin.write("\n") - if ( pbs_prologue is not None ): + if pbs_prologue is not None: lines = pbs_prologue.split("\\n") for line in lines: - pin.write(line+"\n") + pin.write(line + "\n") list = self.transferList() - if ( list.getOption("x509proxy")!="default" ): - if ( debug ): - sys.stderr.write("using x509 proxy "+list.getOption("x509proxy")+"\n") - if ( self.getCredCache() ): + if list.getOption("x509proxy") != "default": + if debug: + sys.stderr.write( + "using x509 proxy " + list.getOption("x509proxy") + "\n" + ) + if self.getCredCache(): newcred = self._doCredCache(list.getOption("x509proxy")) - pin.write("export X509_USER_PROXY="+newcred+"\n") - epilogue = epilogue+"rm -f "+newcred+"\n" + pin.write("export X509_USER_PROXY=" + newcred + "\n") + epilogue = epilogue + "rm -f " + newcred + "\n" else: - pin.write("export X509_USER_PROXY="+list.getOption("x509proxy")+"\n") - elif ( "X509_USER_PROXY" in os.environ and os.path.exists(os.environ["X509_USER_PROXY"]) ): - if ( debug ): - sys.stderr.write("using x509 proxy "+os.environ["X509_USER_PROXY"]+"\n") - if ( self.getCredCache() ): + pin.write( + "export X509_USER_PROXY=" + list.getOption("x509proxy") + "\n" + ) + elif "X509_USER_PROXY" in os.environ and os.path.exists( + os.environ["X509_USER_PROXY"] + ): + if debug: + sys.stderr.write( + "using x509 proxy " + os.environ["X509_USER_PROXY"] + "\n" + ) + if self.getCredCache(): newcred = self._doCredCache(os.environ["X509_USER_PROXY"]) - pin.write("export X509_USER_PROXY="+newcred+"\n") - epilogue = epilogue+"rm -f "+newcred+"\n" - if ( self.getMoverType()=="guc" or self.getMoverType()=="globus-url-copy" ): - pin.write("export GUC_OPTIONS=\"-q -cd -r -stripe -fast -tcp-bs "+str(list.getOption("tcp_buffer"))+" -bs "+str(list.getOption("block_size"))) - if ( not list.getOption("binary") ): + pin.write("export X509_USER_PROXY=" + newcred + "\n") + epilogue = epilogue + "rm -f " + newcred + "\n" + if self.getMoverType() == "guc" or self.getMoverType() == "globus-url-copy": + pin.write( + 'export GUC_OPTIONS="-q -cd -r -stripe -fast -tcp-bs ' + + str(list.getOption("tcp_buffer")) + + " -bs " + + str(list.getOption("block_size")) + ) + if not list.getOption("binary"): pin.write(" -ascii") - if ( not list.getOption("dcau") ): + if not list.getOption("dcau"): pin.write(" -nodcau") - if ( list.getOption("src_subject") is not None ): - pin.write(" -ss '"+list.getOption("src_subject")+"'") - if ( list.getOption("dest_subject") is not None ): - pin.write(" -ds '"+list.getOption("src_subject")+"'") - if ( not list.getOption("allow3rdparty") ): + if list.getOption("src_subject") is not None: + pin.write(" -ss '" + list.getOption("src_subject") + "'") + if list.getOption("dest_subject") is not None: + pin.write(" -ds '" + list.getOption("src_subject") + "'") + if not list.getOption("allow3rdparty"): pin.write(" -notpt") - if ( list.getOption("streams")>1 ): - pin.write(" -parallel "+str(list.getOption("streams"))+" -sbs "+str(list.getOption("stripe_size"))) - if ( not list.getOption("cleanup") ): + if list.getOption("streams") > 1: + pin.write( + " -parallel " + + str(list.getOption("streams")) + + " -sbs " + + str(list.getOption("stripe_size")) + ) + if not list.getOption("cleanup"): pin.write(" -continue-on-error") - if ( list.getOption("retries")>0 ): - pin.write(" -restart -rst-retries "+str(list.getOption("retries"))) - pin.write("\"\n") + if list.getOption("retries") > 0: + pin.write(" -restart -rst-retries " + str(list.getOption("retries"))) + pin.write('"\n') pin.flush() for src in list.getAllSrcURLs(): - if ( debug ): - sys.stderr.write("src_url="+src+"\n") - if ( self.getMoverType()=="supermover" ): - pin.write("supermover "+src+" "+list.getDestURL(src)+"\n") - elif ( self.getMoverType()=="guc" or self.getMoverType()=="globus-url-copy" ): - pin.write("globus-url-copy $GUC_OPTIONS \""+src+"\" \""+list.getDestURL(src)+"\"\n") + if debug: + sys.stderr.write("src_url=" + src + "\n") + if self.getMoverType() == "supermover": + pin.write("supermover " + src + " " + list.getDestURL(src) + "\n") + elif ( + self.getMoverType() == "guc" or self.getMoverType() == "globus-url-copy" + ): + pin.write( + 'globus-url-copy $GUC_OPTIONS "' + + src + + '" "' + + list.getDestURL(src) + + '"\n' + ) pin.flush() - if ( debug ): - sys.stderr.write("done writing url list\n") - if ( epilogue!="" ): - if ( debug ): + if debug: + sys.stderr.write("done writing url list\n") + if epilogue != "": + if debug: sys.stderr.write("appending epilogue to script\n") pin.write(epilogue) pin.close() try: - if ( debug ): + if debug: sys.stderr.write("parsing qsub stdout\n") stdout = pout.readlines() - if ( len(stdout)>0 ): - jobid = stdout[0].rstrip('\n') - if ( debug ): + if len(stdout) > 0: + jobid = stdout[0].rstrip("\n") + if debug: sys.stderr.write("parsing qsub stderr\n") stderr = perr.readlines() - if ( len(stderr)>0 ): + if len(stderr) > 0: for line in stderr: sys.stderr.write(line) sys.stderr.flush() - except IndexError, err: + except IndexError as err: sys.stderr.write(str(err) + "\n") sys.stderr.flush() pout.close() perr.close() - if ( jobid is None or jobid=="" ): + if jobid is None or jobid == "": sys.stderr.write("No job id, assuming failure\n") sys.exit(-1) return jobid def _tmpDir(self): tmpdir = "/tmp" - if ( ("TMPDIR" in os.environ) and os.path.exists(os.environ["TMPDIR"]) ): + if ("TMPDIR" in os.environ) and os.path.exists(os.environ["TMPDIR"]): tmpdir = os.environ["TMPDIR"] - elif ( ("TG_NODE_SCRATCH" in os.environ) and os.path.exists(os.environ["TG_NODE_SCRATCH"]) ): + elif ("TG_NODE_SCRATCH" in os.environ) and os.path.exists( + os.environ["TG_NODE_SCRATCH"] + ): tmpdir = os.environ["TG_NODE_SCRATCH"] - if ( debug ): - sys.stderr.write("using tmp dir "+tmpdir+"\n") + if debug: + sys.stderr.write("using tmp dir " + tmpdir + "\n") return tmpdir - def _submitDMover(self,dosubmit): - if ( self.getMoverType()!="dmover" ): - raise RuntimeError("mover type \""+self.getMoverType()+"\" is incompatible with dmover") + def _submitDMover(self, dosubmit): + if self.getMoverType() != "dmover": + raise RuntimeError( + 'mover type "' + self.getMoverType() + '" is incompatible with dmover' + ) jobid = None list = self.transferList() proxy = list.getOption("x509proxy") - if ( proxy is not None and proxy!="default" and os.path.exists(proxy) ): - if ( self.getCredCache() ): + if proxy is not None and proxy != "default" and os.path.exists(proxy): + if self.getCredCache(): newcred = self._doCredCache(proxy) os.environ["X509_PROXY"] = newcred else: os.environ["X509_PROXY"] = proxy - if ( debug ): - sys.stderr.write("using x5090 proxy "+os.environ["X509_PROXY"]+"\n") - tmpfile = self._tmpDir()+"/u"+str(os.getuid())+"_p"+str(os.getpid())+".dmover" - self.writeDescription(tmpfile,"dmover") + if debug: + sys.stderr.write("using x5090 proxy " + os.environ["X509_PROXY"] + "\n") + tmpfile = ( + self._tmpDir() + + "/u" + + str(os.getuid()) + + "_p" + + str(os.getpid()) + + ".dmover" + ) + self.writeDescription(tmpfile, "dmover") cmd = "dsub" - if ( os.environ["X509_PROXY"] is not None and os.environ["X509_PROXY"]!="default" and os.path.exists(os.environ["X509_PROXY"]) ): - cmd += " -c "+os.environ["X509_PROXY"] - if ( self.getQueue() is not None ): - cmd += " -q "+self.getQueue() - if ( self.getDependency() is not None ): - cmd += " -D "+self.getDependency() - if ( self.getOutLog() is not None ): - cmd += " -o "+self.getOutLog() - if ( self.getWalltime() is not None ): - cmd += " -w "+self.getWalltime() - if ( list.getOption("retries")>0 ): - cmd += " -R "+str(list.getOption("retries")) - if ( list.getOption("streams")>1 ): - cmd += " -s "+str(list.getOption("streams")) - if ( hold ): + if ( + os.environ["X509_PROXY"] is not None + and os.environ["X509_PROXY"] != "default" + and os.path.exists(os.environ["X509_PROXY"]) + ): + cmd += " -c " + os.environ["X509_PROXY"] + if self.getQueue() is not None: + cmd += " -q " + self.getQueue() + if self.getDependency() is not None: + cmd += " -D " + self.getDependency() + if self.getOutLog() is not None: + cmd += " -o " + self.getOutLog() + if self.getWalltime() is not None: + cmd += " -w " + self.getWalltime() + if list.getOption("retries") > 0: + cmd += " -R " + str(list.getOption("retries")) + if list.getOption("streams") > 1: + cmd += " -s " + str(list.getOption("streams")) + if hold: cmd += " -H" - cmd += " -f "+tmpfile - if ( debug ): - sys.stderr.write("executing command \""+cmd+"\"\n") - if ( dosubmit ): - #pin,pout,perr = os.popen3(cmd) - p = subprocess.Popen(cmd, shell=True, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - close_fds=True) + cmd += " -f " + tmpfile + if debug: + sys.stderr.write('executing command "' + cmd + '"\n') + if dosubmit: + # pin,pout,perr = os.popen3(cmd) + p = subprocess.Popen( + cmd, + shell=True, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + close_fds=True, + ) pin = p.stdin perr = p.stderr pout = p.stdout pin.close() stdout = pout.readlines() for line in stdout: - if ( line.startswith("Executing qsub:") ): + if line.startswith("Executing qsub:"): token = line.split() - if ( len(token)>2 ): - jobid = token[2].rstrip('\n') + if len(token) > 2: + jobid = token[2].rstrip("\n") sys.stderr.write(line) pout.close() stderr = perr.readlines() @@ -704,101 +782,120 @@ class ScheduledDataTransfer(object): sys.stderr.write(line) perr.close() else: - sys.stderr.write(cmd+"\n") - os.system("cat "+tmpfile) + sys.stderr.write(cmd + "\n") + os.system("cat " + tmpfile) os.unlink(tmpfile) return jobid - def _submitRFT(self,dosubmit): - if ( self.getMoverType()!="rft" ): - raise RuntimeError("mover type \""+self.getMoverType()+"\" is incompatible with RFT") - if ( self.getServer() is None ): + def _submitRFT(self, dosubmit): + if self.getMoverType() != "rft": + raise RuntimeError( + 'mover type "' + self.getMoverType() + '" is incompatible with RFT' + ) + if self.getServer() is None: raise RuntimeError("RFT server not set") jobid = None proxy = self.transferList().getOption("x509proxy") - if ( proxy is not None and proxy!="default" and os.path.exists(proxy) ): - if ( self.getCredCache() ): + if proxy is not None and proxy != "default" and os.path.exists(proxy): + if self.getCredCache(): newcred = self._doCredCache(proxy) os.environ["X509_PROXY"] = newcred else: os.environ["X509_PROXY"] = proxy - tmpfile = self._tmpDir()+"/u"+str(os.getuid())+"_p"+str(os.getpid())+".rft" - self.writeDescription(tmpfile,"rft") - cmd = "rft -h "+self.getServer() - if ( self.getPort() is not None ): - cmd += " -r "+self.getPort() - cmd += " -f "+tmpfile - if ( debug ): - sys.stderr.write("executing command \""+cmd+"\"\n") - if ( dosubmit ): - #pin,pout,perr = os.popen3(cmd) - p = subprocess.Popen(cmd, shell=True, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - close_fds=True) + tmpfile = ( + self._tmpDir() + "/u" + str(os.getuid()) + "_p" + str(os.getpid()) + ".rft" + ) + self.writeDescription(tmpfile, "rft") + cmd = "rft -h " + self.getServer() + if self.getPort() is not None: + cmd += " -r " + self.getPort() + cmd += " -f " + tmpfile + if debug: + sys.stderr.write('executing command "' + cmd + '"\n') + if dosubmit: + # pin,pout,perr = os.popen3(cmd) + p = subprocess.Popen( + cmd, + shell=True, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + close_fds=True, + ) pin = p.stdin perr = p.stderr pout = p.stdout pin.close() stdout = pout.readlines() for line in stdout: - sys.stderr.write(line) + sys.stderr.write(line) pout.close() stderr = perr.readlines() for line in stderr: sys.stderr.write(line) perr.close() else: - sys.stderr.write(cmd+"\n") - os.system("cat "+tmpfile) + sys.stderr.write(cmd + "\n") + os.system("cat " + tmpfile) os.unlink(tmpfile) return jobid - - def _submitStork(self,dosubmit): - if ( self.getMoverType()!="stork" ): - raise RuntimeError("mover type \""+self.getMoverType()+"\" is incompatible with Stork") + + def _submitStork(self, dosubmit): + if self.getMoverType() != "stork": + raise RuntimeError( + 'mover type "' + self.getMoverType() + '" is incompatible with Stork' + ) proxy = self.transferList().getOption("x509proxy") - if ( proxy is not None and proxy!="default" and os.path.exists(proxy) ): - if ( self.getCredCache() ): + if proxy is not None and proxy != "default" and os.path.exists(proxy): + if self.getCredCache(): newcred = self._doCredCache(proxy) os.environ["X509_PROXY"] = newcred else: os.environ["X509_PROXY"] = proxy - elif ( ("X509_PROXY" in os.environ.keys()) and self.getCredCache() ): + elif ("X509_PROXY" in os.environ.keys()) and self.getCredCache(): newcred = self._doCredCache(os.environ["X509_PROXY"]) os.environ["X509_PROXY"] = newcred jobid = None - tmpfile = self._tmpDir()+"/u"+str(os.getuid())+"_p"+str(os.getpid())+".stork" - self.writeDescription(tmpfile,"stork") + tmpfile = ( + self._tmpDir() + + "/u" + + str(os.getuid()) + + "_p" + + str(os.getpid()) + + ".stork" + ) + self.writeDescription(tmpfile, "stork") cmd = "stork_submit" - if ( self.getServer() is not None ): - cmd += " -name "+self.getServer() - if ( self.getPort() is not None ): - cmd += ":"+str(self.getPort()) - cmd += " "+tmpfile - if ( debug ): - sys.stderr.write("executing command \""+cmd+"\"\n") - if ( dosubmit ): - #pin,pout,perr = os.popen3(cmd) - p = subprocess.Popen(cmd, shell=True, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - close_fds=True) + if self.getServer() is not None: + cmd += " -name " + self.getServer() + if self.getPort() is not None: + cmd += ":" + str(self.getPort()) + cmd += " " + tmpfile + if debug: + sys.stderr.write('executing command "' + cmd + '"\n') + if dosubmit: + # pin,pout,perr = os.popen3(cmd) + p = subprocess.Popen( + cmd, + shell=True, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + close_fds=True, + ) pin = p.stdin perr = p.stderr pout = p.stdout pin.close() - if ( len(stdout)>0 ): - jobid = stdout[0].rstrip('\n') + if len(pout) > 0: + jobid = pout[0].rstrip("\n") pout.close() - for line in stderr: + for line in perr: sys.stderr.write(line) perr.close() else: - sys.stderr.write(cmd+"\n") - os.system("cat "+tmpfile) + sys.stderr.write(cmd + "\n") + os.system("cat " + tmpfile) os.unlink(tmpfile) return jobid @@ -807,9 +904,9 @@ informat = "urlpair" mover = "globus-url-copy" rm = "pbs" cfgfile = "/usr/local/etc/dmsub.cfg" -if ( os.path.exists("/etc/dmsub.cfg") ): +if os.path.exists("/etc/dmsub.cfg"): cfgfile = "/etc/dmsub.cfg" -if ( "DMSUB_CFG" in os.environ ): +if "DMSUB_CFG" in os.environ: cfgfile = os.environ["DMSUB_CFG"] dosubmit = True queue = None @@ -824,45 +921,99 @@ dependency = None vars = None pbs_prologue = None + def usage(): -## global informat -## global rm -## global mover -## global queue -## global walltime -## global debug -## global server -## global port -## global streams -## global hold + # global informat + # global rm + # global mover + # global queue + # global walltime + # global debug + # global server + # global port + # global streams + # global hold sys.stderr.write("Usage: dmsub [arguments] [-f] xfer_file\n\n") sys.stderr.write("Arguments:\n") - sys.stderr.write(" -b rm, --batch=rm, --rm=rm\n\tUse \"rm\" batch environment (default is "+str(rm)+")\n") - sys.stderr.write(" -C cfgfile, --config=cfgfile\n\tRead config file \"cfgfile\"\n") - sys.stderr.write(" -c credfile, --credential=credfile\n\tUse file \"credfile\" as GSI credential\n") - sys.stderr.write(" -D jobid[:jobid], --dependency=jobid[:jobid]\n\tDo not run data transfer job till after jobid completes\n") + sys.stderr.write( + ' -b rm, --batch=rm, --rm=rm\n\tUse "rm" batch environment (default is ' + + str(rm) + + ")\n" + ) + sys.stderr.write(' -C cfgfile, --config=cfgfile\n\tRead config file "cfgfile"\n') + sys.stderr.write( + ' -c credfile, --credential=credfile\n\tUse file "credfile" as GSI credential\n' + ) + sys.stderr.write( + " -D jobid[:jobid], --dependency=jobid[:jobid]\n\tDo not run data transfer job till after jobid completes\n" + ) sys.stderr.write(" -d, --debug\n\tEnable debugging output\n") - sys.stderr.write(" -e errlog, --errlog=errlog\n\tWrite stderr from data transfer to \"errlog\"\n") - sys.stderr.write(" -F fmt, --format=fmt\n\tUse \"fmt\" as data transfer description format (default is "+str(informat)+")\n") - sys.stderr.write(" -H, --hold\n\tSubmit job in held state if possible (default is "+str(hold)+")\n") + sys.stderr.write( + ' -e errlog, --errlog=errlog\n\tWrite stderr from data transfer to "errlog"\n' + ) + sys.stderr.write( + ' -F fmt, --format=fmt\n\tUse "fmt" as data transfer description format (default is ' + + str(informat) + + ")\n" + ) + sys.stderr.write( + " -H, --hold\n\tSubmit job in held state if possible (default is " + + str(hold) + + ")\n" + ) sys.stderr.write(" -h, --help\n\tPrint this help message\n") - sys.stderr.write(" -j join, --joinlogs=join\n\tJoin stdout and stderr logs (default is "+str(joinlogs)+")\n") - sys.stderr.write(" -l rsrc=limit[,rsrc=limit], --limit=rsrc=limit[,rsrc=limit]\n\tSet batch resource limit (e.g. walltime)\n") - sys.stderr.write(" -M mvr, --mover=mvr\n\tUse \"mvr\" as the data movement mechanism (default is "+mover+")\n") - sys.stderr.write(" -N jobname, --name=jobname\n\tUse \"jobnname\" as name of data movement job\n") + sys.stderr.write( + " -j join, --joinlogs=join\n\tJoin stdout and stderr logs (default is " + + str(joinlogs) + + ")\n" + ) + sys.stderr.write( + " -l rsrc=limit[,rsrc=limit], --limit=rsrc=limit[,rsrc=limit]\n\tSet batch resource limit (e.g. walltime)\n" + ) + sys.stderr.write( + ' -M mvr, --mover=mvr\n\tUse "mvr" as the data movement mechanism (default is ' + + mover + + ")\n" + ) + sys.stderr.write( + ' -N jobname, --name=jobname\n\tUse "jobnname" as name of data movement job\n' + ) sys.stderr.write(" -n, --nosubmit\n\tDo a dry run without submitting any jobs\n") - sys.stderr.write(" -o log, --outlog=log\n\tWrite stdout from data transfer to \"log\"\n") - sys.stderr.write(" -P pt, --port=pt\n\tConnect to data transfer service on port \"pt\"\n") - sys.stderr.write(" -q dest, --queue=dest\n\tSubmit data transfer job to batch queue \"dest\" (default is "+str(queue)+")\n") - sys.stderr.write(" -s n, --streams=n\n\tUse up to n striped data streams whenever possible (default is "+str(streams)+")\n") - sys.stderr.write(" -S svr, --server=svr\n\tConnect to data transfer service on server \"svr\"\n") - sys.stderr.write(" -X, --cache-credential\n\tCache GSI credential if found\n (default)") - sys.stderr.write(" -x, --no-cache-credential\n\tDo not cache GSI credential if found\n") - sys.stderr.write("\nSupported batch environments: pbs, torque, condor, stork, rft\n") + sys.stderr.write( + ' -o log, --outlog=log\n\tWrite stdout from data transfer to "log"\n' + ) + sys.stderr.write( + ' -P pt, --port=pt\n\tConnect to data transfer service on port "pt"\n' + ) + sys.stderr.write( + ' -q dest, --queue=dest\n\tSubmit data transfer job to batch queue "dest" (default is ' + + str(queue) + + ")\n" + ) + sys.stderr.write( + " -s n, --streams=n\n\tUse up to n striped data streams whenever possible (default is " + + str(streams) + + ")\n" + ) + sys.stderr.write( + ' -S svr, --server=svr\n\tConnect to data transfer service on server "svr"\n' + ) + sys.stderr.write( + " -X, --cache-credential\n\tCache GSI credential if found\n (default)" + ) + sys.stderr.write( + " -x, --no-cache-credential\n\tDo not cache GSI credential if found\n" + ) + sys.stderr.write( + "\nSupported batch environments: pbs, torque, condor, stork, rft\n" + ) sys.stderr.write("Supported transfer descriptions: urlpair, dmover, rft, stork\n") - sys.stderr.write("Supported data movers: globus-url-copy, supermover, stork, rft\n") + sys.stderr.write( + "Supported data movers: globus-url-copy, supermover, stork, rft\n" + ) sys.exit(0) + def read_cfg(file): global informat global rm @@ -878,205 +1029,211 @@ def read_cfg(file): global vars global pbs_prologue global credcache - if ( not os.path.exists(file) ): - raise IOError,file+" does not exist" - elif ( not os.path.isfile(file) ): - raise IOError("Config \"file\" "+file+" is not actually a file") + if not os.path.exists(file): + raise IOError(file + " does not exist") + elif not os.path.isfile(file): + raise IOError('Config "file" ' + file + " is not actually a file") else: - if ( debug ): - sys.stderr.write("reading config file "+cfgfile+"\n") - fd = open(file,"r") + if debug: + sys.stderr.write("reading config file " + cfgfile + "\n") + fd = open(file, "r") lines = fd.readlines() for line in lines: - token = line.split("=",2) - if ( token!=[] and len(token)==2 and not token[0].startswith("#") ): + token = line.split("=", 2) + if token != [] and len(token) == 2 and not token[0].startswith("#"): keyword = token[0].strip() value = token[1].strip() - if ( debug ): - sys.stderr.write(file+": "+keyword+"="+value+"\n") - if ( keyword=="batch" or keyword=="rm" ): + if debug: + sys.stderr.write(file + ": " + keyword + "=" + value + "\n") + if keyword == "batch" or keyword == "rm": rm = value - elif ( keyword=="mover" ): + elif keyword == "mover": mover = value - elif ( keyword=="queue" ): + elif keyword == "queue": queue = value - elif ( keyword=="walltime" ): + elif keyword == "walltime": walltime = value - elif ( keyword=="server" ): + elif keyword == "server": server = value - elif ( keyword=="port" ): + elif keyword == "port": port = value - elif ( keyword=="format" ): + elif keyword == "format": informat = value - elif ( keyword=="streams" ): + elif keyword == "streams": streams = value - elif ( keyword=="stripe_size" ): + elif keyword == "stripe_size": stripesize = value - elif ( keyword=="block_size" ): + elif keyword == "block_size": stripesize = value - elif ( keyword=="debug" ): - if ( value=="true" or value=="True" or value=="TRUE" ): + elif keyword == "debug": + if value == "true" or value == "True" or value == "TRUE": debug = True else: debug = False - elif ( keyword=="pbs_prologue" ): + elif keyword == "pbs_prologue": pbs_prologue = value + try: - if ( debug ): + if debug: sys.stderr.write("invoking getopt\n") - opts, args = getopt.getopt(sys.argv[1:], - "b:C:c:D:de:F:f:Hhj:l:M:no:N:P:q:S:s:v:w:Xx", - ["batch=", - "credential=", - "cache-credential", - "config=s", - "debug", - "dependency=", - "errlog=", - "format=", - "help", - "hold", - "joinlogs=", - "limit=", - "mover=", - "name=", - "no-cache-credential", - "nosubmit", - "outlog=", - "port=", - "queue=", - "rm=", - "server=", - "streams=", - "vars="]) -except getopt.GetoptError, err: - sys.stderr.write(str(err)+"\n") + opts, args = getopt.getopt( + sys.argv[1:], + "b:C:c:D:de:F:f:Hhj:l:M:no:N:P:q:S:s:v:w:Xx", + [ + "batch=", + "credential=", + "cache-credential", + "config=s", + "debug", + "dependency=", + "errlog=", + "format=", + "help", + "hold", + "joinlogs=", + "limit=", + "mover=", + "name=", + "no-cache-credential", + "nosubmit", + "outlog=", + "port=", + "queue=", + "rm=", + "server=", + "streams=", + "vars=", + ], + ) +except getopt.GetoptError as err: + sys.stderr.write(str(err) + "\n") usage() -if ( os.path.exists(cfgfile) ): - if ( debug ): +if os.path.exists(cfgfile): + if debug: sys.stderr.write("reading global config file\n") read_cfg(cfgfile) else: - if ( debug ): - sys.stderr.write("global config file "+cfgfile+" not found\n") + if debug: + sys.stderr.write("global config file " + cfgfile + " not found\n") -if ( debug ): +if debug: sys.stderr.write("parsing command line options\n") for opt in opts: - if ( opt[0]=="-b" or opt[0]=="--batch " or opt[0]=="--rm" ): - if ( debug ): - sys.stderr.write("setting rm to "+opt[1]+"\n") + if opt[0] == "-b" or opt[0] == "--batch " or opt[0] == "--rm": + if debug: + sys.stderr.write("setting rm to " + opt[1] + "\n") rm = opt[1] - if ( opt[0]=="-C" or opt[0]=="--config" ): + if opt[0] == "-C" or opt[0] == "--config": read_cfg(opt[1]) - if ( opt[0]=="-c" or opt[0]=="--credential" ): - if ( os.path.exists(opt[1]) and os.access(opt[1],os.R_OK) ): + if opt[0] == "-c" or opt[0] == "--credential": + if os.path.exists(opt[1]) and os.access(opt[1], os.R_OK): os.environ["X509_USER_PROXY"] = opt[1] - elif ( not os.access(opt[1],os.R_OK) ): - raise IOError(opt[1]+" is not readable") + elif not os.access(opt[1], os.R_OK): + raise IOError(opt[1] + " is not readable") else: - raise IOError(opt[1]+" does not exist") - if ( opt[0]=="-D" or opt[0]=="--dependency" ): - if ( debug ): - sys.stderr.write("setting dependency to "+opt[1]+"\n") + raise IOError(opt[1] + " does not exist") + if opt[0] == "-D" or opt[0] == "--dependency": + if debug: + sys.stderr.write("setting dependency to " + opt[1] + "\n") dependency = opt[1] - if ( opt[0]=="-d" or opt[0]=="--debug" ): + if opt[0] == "-d" or opt[0] == "--debug": debug = True - if ( opt[0]=="-e" or opt[0]=="--errlog" ): - if ( debug ): - sys.stderr.write("setting errlog to "+opt[1]+"\n") + if opt[0] == "-e" or opt[0] == "--errlog": + if debug: + sys.stderr.write("setting errlog to " + opt[1] + "\n") errlog = opt[1] - if ( opt[0]=="-F" or opt[0]=="--format" ): - if ( debug ): - sys.stderr.write("setting format to "+opt[1]+"\n") + if opt[0] == "-F" or opt[0] == "--format": + if debug: + sys.stderr.write("setting format to " + opt[1] + "\n") informat = opt[1] - if ( opt[0]=="-f" ): + if opt[0] == "-f": args.append(opt[1]) - if ( opt[0]=="-H" or opt[0]=="--hold" ): + if opt[0] == "-H" or opt[0] == "--hold": hold = True - if ( opt[0]=="-h" or opt[0]=="--help" ): + if opt[0] == "-h" or opt[0] == "--help": usage() - if ( opt[0]=="-j" or opt[0]=="--joinlogs" ): - if ( debug ): - sys.stderr.write("setting joinlogs to "+opt[1]+"\n") + if opt[0] == "-j" or opt[0] == "--joinlogs": + if debug: + sys.stderr.write("setting joinlogs to " + opt[1] + "\n") joinlogs = opt[1] - if ( opt[0]=="-l" or opt[0]=="--limit" ): - if ( debug ): - sys.stderr.write("setting limits to "+opt[1]+"\n") + if opt[0] == "-l" or opt[0] == "--limit": + if debug: + sys.stderr.write("setting limits to " + opt[1] + "\n") limits = opt[1].split(",") for limit in limits: - (rsrc,value) = limit.split("=",1) - if ( rsrc=="walltime" ): + (rsrc, value) = limit.split("=", 1) + if rsrc == "walltime": walltime = value - if ( opt[0]=="-M" or opt[0]=="--mover" ): - if ( debug ): - sys.stderr.write("setting mover to "+opt[1]+"\n") + if opt[0] == "-M" or opt[0] == "--mover": + if debug: + sys.stderr.write("setting mover to " + opt[1] + "\n") mover = opt[1] - if ( opt[0]=="-N" or opt[0]=="--name" ): - if ( debug ): - sys.stderr.write("setting jobname to "+opt[1]+"\n") + if opt[0] == "-N" or opt[0] == "--name": + if debug: + sys.stderr.write("setting jobname to " + opt[1] + "\n") jobname = opt[1] - if ( opt[0]=="-n" or opt[0]=="--nosubmit" ): + if opt[0] == "-n" or opt[0] == "--nosubmit": dosubmit = False - if ( opt[0]=="-o" or opt[0]=="--outlog" ): - if ( debug ): - sys.stderr.write("setting outlog to "+opt[1]+"\n") + if opt[0] == "-o" or opt[0] == "--outlog": + if debug: + sys.stderr.write("setting outlog to " + opt[1] + "\n") outlog = opt[1] - if ( opt[0]=="-P" or opt[0]=="--port" ): - if ( debug ): - sys.stderr.write("setting port to "+opt[1]+"\n") + if opt[0] == "-P" or opt[0] == "--port": + if debug: + sys.stderr.write("setting port to " + opt[1] + "\n") port = opt[1] - if ( opt[0]=="-q" or opt[0]=="--queue" ): - if ( debug ): - sys.stderr.write("setting queue to "+opt[1]+"\n") + if opt[0] == "-q" or opt[0] == "--queue": + if debug: + sys.stderr.write("setting queue to " + opt[1] + "\n") queue = opt[1] - if ( opt[0]=="-S" or opt[0]=="--server" ): - if ( debug ): - sys.stderr.write("setting server to "+opt[1]+"\n") + if opt[0] == "-S" or opt[0] == "--server": + if debug: + sys.stderr.write("setting server to " + opt[1] + "\n") server = opt[1] - if ( opt[0]=="-s" or opt[0]=="--streams" ): - if ( debug ): - sys.stderr.write("setting streams to "+opt[1]+"\n") + if opt[0] == "-s" or opt[0] == "--streams": + if debug: + sys.stderr.write("setting streams to " + opt[1] + "\n") streams = opt[1] - if ( opt[0]=="-v" ): + if opt[0] == "-v": vars = opt[1] - if ( opt[0]=="-w" ): - walltime = opt[1]+":00" - if ( opt[0]=="-X" or opt[0]=="--cache-credential" ): + if opt[0] == "-w": + walltime = opt[1] + ":00" + if opt[0] == "-X" or opt[0] == "--cache-credential": credcache = True - if ( opt[0]=="-x" or opt[0]=="--no-cache-credential" ): + if opt[0] == "-x" or opt[0] == "--no-cache-credential": credcache = False # What to do if credcache is set but no cred is specified? # Look for default and use that... -if ( credcache and not ( "X509_USER_PROXY" in os.environ.keys() ) ): - defproxy = "/tmp/x509up_u"+str(os.getuid()) - if ( os.path.exists(defproxy) and os.access(defproxy,os.R_OK) ): +if credcache and not ("X509_USER_PROXY" in os.environ.keys()): + defproxy = "/tmp/x509up_u" + str(os.getuid()) + if os.path.exists(defproxy) and os.access(defproxy, os.R_OK): os.environ["X509_USER_PROXY"] = defproxy - + for file in args: - if ( debug ): - sys.stderr.write("creating job from file "+file+"\n") - job = ScheduledDataTransfer(file,informat,mover,rm,server,port,credcache) - if ( jobname is not None ): + if debug: + sys.stderr.write("creating job from file " + file + "\n") + job = ScheduledDataTransfer(file, informat, mover, rm, server, port, credcache) + if jobname is not None: job.setName(jobname) - if ( queue is not None ): + if queue is not None: job.setQueue(queue) - if ( walltime is not None ): + if walltime is not None: job.setWalltime(walltime) - if ( outlog is not None ): + if outlog is not None: job.setOutLog(outlog) - if ( errlog is not None ): + if errlog is not None: job.setErrLog(errlog) - if ( joinlogs is not None ): + if joinlogs is not None: job.setJoinLogs(joinlogs) - if ( dependency is not None ): + if dependency is not None: job.setDependency(dependency) - if ( vars is not None ): + if vars is not None: job.setVars(vars) - if ( debug ): - sys.stderr.write("submitting job from file "+file+" as job "+job.getName()+"\n") + if debug: + sys.stderr.write( + "submitting job from file " + file + " as job " + job.getName() + "\n" + ) jobid = job.submit(dosubmit) - diff --git a/bin/job-vm-launch b/bin/job-vm-launch index 6efea5a..177b3e6 100644 --- a/bin/job-vm-launch +++ b/bin/job-vm-launch @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 # # job-vm-launch: Launch a VM image inside a PBS job # Copyright 2013, 2014, University of Tennessee @@ -42,62 +42,69 @@ global vmname def connect(hypervisor): conn = libvirt.open(hypervisor) - if ( conn is None ): - log.write("Failed to connect to hypervisor "+hypervisor+"\n") + if conn is None: + log.write("Failed to connect to hypervisor " + hypervisor + "\n") sys.exit(-1) return conn -def cleanup(signal,frame): +def cleanup(signal, frame): global conn - if ( vmname ): - if ( conn is None ): + if vmname: + if conn is None: conn = connect(hypervisor) try: guest = conn.lookupByName(vmname) - except: - log.write("cleanup: Could not find VM "+vmname+"\n") + except Exception: + log.write("cleanup: Could not find VM " + vmname + "\n") sys.exit(-1) try: - if ( guest.isActive() ): - log.write("Shutting down guest "+vmname+"\n") + if guest.isActive(): + log.write("Shutting down guest " + vmname + "\n") guest.shutdown() guest.destroy() - if ( keepguest ): - log.write("Keeping guest "+vmname+"\n") + if keepguest: + log.write("Keeping guest " + vmname + "\n") else: - log.write("Removing guest "+vmname+"\n") + log.write("Removing guest " + vmname + "\n") guest.undefine() - except: + except Exception: # if we got here, libvirtd probably already cleaned up the guest # before we could; do nothing pass - if ( cloneimg and not keepclone and os.path.exists(vmimage) ): - log.write("Deleting clone image "+vmimage+"\n") + if cloneimg and not keepclone and os.path.exists(vmimage): + log.write("Deleting clone image " + vmimage + "\n") os.unlink(vmimage) - elif ( cloneimg and keepclone and os.path.exists(vmimage) ): - log.write("Keeping clone image "+vmimage+"\n") + elif cloneimg and keepclone and os.path.exists(vmimage): + log.write("Keeping clone image " + vmimage + "\n") exit(exitcode) # from http://www.centos.org/docs/5/html/5.2/Virtualization/sect-Virtualization-Tips_and_tricks-Generating_a_new_unique_MAC_address.html def randomMAC(): - mac = [ 0xde, 0xad, 0xbe, 0xef, - random.randint(0x00, 0xff), - random.randint(0x00, 0xff) ] - macaddr = ':'.join(map(lambda x: "%02x" % x, mac)) - log.write("Using randomly generated MAC address "+macaddr+"\n") - return macaddr + mac = [ + 0xDE, + 0xAD, + 0xBE, + 0xEF, + random.randint(0x00, 0xFF), + random.randint(0x00, 0xFF), + ] + macaddr = ":".join(map(lambda x: "%02x" % x, mac)) + log.write("Using randomly generated MAC address " + macaddr + "\n") + return macaddr # From http://stackoverflow.com/questions/377017/test-if-executable-exists-in-python def which(program): def is_exe(fpath): return os.path.exists(fpath) and os.access(fpath, os.X_OK) + def ext_candidates(fpath): yield fpath for ext in os.environ.get("PATHEXT", "").split(os.pathsep): yield fpath + ext + fpath, fname = os.path.split(program) if fpath: if is_exe(program): @@ -121,7 +128,7 @@ def usage(exitcode): # misc default settings log = sys.stdout show_usage = False -exitcode=0 +exitcode = 0 hypervisor = "qemu:///session" vmtype = "hvm" conn = None @@ -149,19 +156,19 @@ validatexml = False # find cpuset/cgroup root, if any cpusetroot = None -if ( os.path.exists("/dev/cpuset") ): +if os.path.exists("/dev/cpuset"): cpusetroot = "/dev/cpuset" -elif ( os.path.exists("/sys/fs/cgroup/cpuset") ): +elif os.path.exists("/sys/fs/cgroup/cpuset"): cpusetroot = "/sys/fs/cgroup/cpuset" # find cpuset, if any cpuset = None -if ( os.path.exists("/proc/self/cpuset") ): +if os.path.exists("/proc/self/cpuset"): cpuset = open("/proc/self/cpuset").read()[:-1] hostname = socket.gethostname() jobid = None -if ( "PBS_JOBID" in os.environ ): +if "PBS_JOBID" in os.environ: jobid = os.environ["PBS_JOBID"] else: sys.stderr.write("Not in a PBS job, exiting!\n") @@ -169,7 +176,7 @@ else: # find the qemu-kvm executable... qemukvm = which("qemu-kvm") -if ( which("qemu-kvm") is None and os.path.exists("/usr/libexec/qemu-kvm") ): +if which("qemu-kvm") is None and os.path.exists("/usr/libexec/qemu-kvm"): # ...which may not be in $PATH, especially on RHEL/CentOS qemukvm = "/usr/libexec/qemu-kvm" else: @@ -179,55 +186,63 @@ else: # default to 1, just in case nothing else is set vcores = 0 # look at cpuset/cgroup, if available -if ( cpusetroot is not None and cpuset is not None and - ( os.path.exists(cpusetroot+"/"+cpuset+"/cpus") or - os.path.exists(cpusetroot+"/"+cpuset+"/cpuset.cpus") ) ): +if ( + cpusetroot is not None + and cpuset is not None + and ( + os.path.exists(cpusetroot + "/" + cpuset + "/cpus") + or os.path.exists(cpusetroot + "/" + cpuset + "/cpuset.cpus") + ) +): fd = None - if ( os.path.exists(cpusetroot+"/"+cpuset+"/cpus") ): - fd = open(cpusetroot+"/"+cpuset+"/cpus") - elif ( os.path.exists(cpusetroot+"/"+cpuset+"/cpuset.cpus") ): - fd = open(cpusetroot+"/"+cpuset+"/cpuset.cpus") - if ( fd is not None ): + if os.path.exists(cpusetroot + "/" + cpuset + "/cpus"): + fd = open(cpusetroot + "/" + cpuset + "/cpus") + elif os.path.exists(cpusetroot + "/" + cpuset + "/cpuset.cpus"): + fd = open(cpusetroot + "/" + cpuset + "/cpuset.cpus") + if fd is not None: cpus = fd.read().rstrip() elts = cpus.split(",") for elt in elts: - if ( "-" in elt ): - [start,end] = elt.split("-") - vcores += int(end)-int(start)+1 + if "-" in elt: + [start, end] = elt.split("-") + vcores += int(end) - int(start) + 1 else: vcores += 1 fd.close() - if ( vcores==0 ): + if vcores == 0: vcores = 1 # if not, check PBS environment -elif ( "PBS_NUM_PPN" in os.environ.keys() ): +elif "PBS_NUM_PPN" in os.environ.keys(): vcores = int(os.environ["PBS_NUM_PPN"]) # figure out my memory limit (in MB) # default to 1GB, just in case nothing else is set memlimitmb = 1024 # look for cpuset/cgroup mem limit -if ( cpusetroot is not None and cpuset is not None and - os.path.exists(cpusetroot+"/"+cpuset+"/memory.limit_in_bytes") ): - fd = open(cpusetroot+"/"+cpuset+"/memory.limit_in_bytes") +if ( + cpusetroot is not None + and cpuset is not None + and os.path.exists(cpusetroot + "/" + cpuset + "/memory.limit_in_bytes") +): + fd = open(cpusetroot + "/" + cpuset + "/memory.limit_in_bytes") cgmemlimit = int(str(fd.read()).rstrip()) fd.close() # cgroup mem is in bytes - memlimitmb = cgmemlimit/(1024*1024) -elif ( which("qstat") is not None ): -# if the cpuset/cgroup is no help, see if the job has a mem limit set -# also look for a core count if there's not one already set + memlimitmb = cgmemlimit / (1024 * 1024) +elif which("qstat") is not None: + # if the cpuset/cgroup is no help, see if the job has a mem limit set + # also look for a core count if there's not one already set qmlimit = None - qstatf = "qstat -f "+os.environ['PBS_JOBID'] + qstatf = "qstat -f " + os.environ["PBS_JOBID"] fd = os.popen(qstatf) for line in fd.readlines(): - if ( "Resource_List.mem" in line ): + if "Resource_List.mem" in line: elt = (line.rstrip()).split() - if ( len(elt)>=2 ): + if len(elt) >= 2: qmlimit = elt[2] - elif ( "Resource_List.nodes" in line and vcores==0 ): + elif "Resource_List.nodes" in line and vcores == 0: elt = (line.rstrip()).split() - if ( len(elt)>=2 ): + if len(elt) >= 2: # handle PBS nodes={#,host)[:ppn=#][:gpus=#][:feature][+...] # syntax nodes = elt[2].split("+") @@ -235,244 +250,291 @@ elif ( which("qstat") is not None ): for node in nodes: nelts = node.split(":") for nelt in nelts[1:]: - if ( "ppn=" in nelt ): - if ( nelts[0] in hostname ): + if "ppn=" in nelt: + if nelts[0] in hostname: myvcores = int((nelt.split("="))[1]) break break else: - myvcores = max(myvcores,int((nelt.split("="))[1])) - if ( myvcores>vcores ): + myvcores = max(myvcores, int((nelt.split("="))[1])) + if myvcores > vcores: vcores = myvcores fd.close() - if ( qmlimit is not None ): + if qmlimit is not None: # convert PBS' ####[kMGT][BW] syntax to MB - m = re.match("^(\d+)([KkMmGgTt]{0,1})([BbWw])$",qmlimit) + m = re.match("^(\d+)([KkMmGgTt]{0,1})([BbWw])$", qmlimit) base = int(m.group(1)) - mult = 1.0/(1024*1024) - if ( m.group(2) and ( m.group(2)=="K" or m.group(2)=="k" ) ): - mult = 1.0/1024 - elif ( m.group(2) and ( m.group(2)=="M" or m.group(2)=="m" ) ): + mult = 1.0 / (1024 * 1024) + if m.group(2) and (m.group(2) == "K" or m.group(2) == "k"): + mult = 1.0 / 1024 + elif m.group(2) and (m.group(2) == "M" or m.group(2) == "m"): mult = 1 - elif ( m.group(2) and ( m.group(2)=="G" or m.group(2)=="g" ) ): + elif m.group(2) and (m.group(2) == "G" or m.group(2) == "g"): mult = 1024 - elif ( m.group(2) and ( m.group(2)=="T" or m.group(2)=="t" ) ): - mult = 1024*1024 + elif m.group(2) and (m.group(2) == "T" or m.group(2) == "t"): + mult = 1024 * 1024 unit = 1 - if ( m.group(3)=="W" or m.group(3)=="w" ): + if m.group(3) == "W" or m.group(3) == "w": unit = 8 - memlimitmb = int(base*mult*unit) + memlimitmb = int(base * mult * unit) # command line argument handling try: - opts, args = getopt.getopt(sys.argv[1:], - "B:Cc:dF:f:hL:l:m:Nn:o:r:t:w:", - ["arch=","boot=","cdrom=","clone-image","connect=","cpu=","debug","disk=","disk-bus=s","disk-format=","file=","floppy=","graphics=","help","image=","keep-clone","keep-guest","location=","log=","mac=","name=","network=","no-block","os-type=","os-variant=","ram=","validate-xml","vcpus=","vm-type="]) -except getopt.GetoptError, err: - sys.stderr.write(str(err)+"\n\n") + opts, args = getopt.getopt( + sys.argv[1:], + "B:Cc:dF:f:hL:l:m:Nn:o:r:t:w:", + [ + "arch=", + "boot=", + "cdrom=", + "clone-image", + "connect=", + "cpu=", + "debug", + "disk=", + "disk-bus=s", + "disk-format=", + "file=", + "floppy=", + "graphics=", + "help", + "image=", + "keep-clone", + "keep-guest", + "location=", + "log=", + "mac=", + "name=", + "network=", + "no-block", + "os-type=", + "os-variant=", + "ram=", + "validate-xml", + "vcpus=", + "vm-type=", + ], + ) +except getopt.GetoptError as err: + sys.stderr.write(str(err) + "\n\n") usage(-1) if opts == [] and args == []: show_usage = True -if ( not show_usage ): +if not show_usage: for opt in opts: - if ( opt[0]=="-B" or opt[0]=="--disk-bus" ): + if opt[0] == "-B" or opt[0] == "--disk-bus": diskbus = opt[1] - if ( opt[0]=="-C" or opt[0]=="--clone-image" ): + if opt[0] == "-C" or opt[0] == "--clone-image": cloneimg = True - if ( opt[0]=="-c" or opt[0]=="--cdrom" ): + if opt[0] == "-c" or opt[0] == "--cdrom": cdrom = opt[1] - if ( opt[0]=="-d" or opt[0]=="--debug" ): + if opt[0] == "-d" or opt[0] == "--debug": debug = True - if ( opt[0]=="-F" or opt[0]=="--disk-format" ): + if opt[0] == "-F" or opt[0] == "--disk-format": diskformat = opt[1] - if ( opt[0]=="-f" or opt[0]=="--file" or opt[0]=="--image" ): + if opt[0] == "-f" or opt[0] == "--file" or opt[0] == "--image": vmimage = opt[1] - if ( opt[0]=="-h" or opt[0]=="--help" ): + if opt[0] == "-h" or opt[0] == "--help": show_usage = True - if ( opt[0]=="-L" or opt[0]=="--log" ): + if opt[0] == "-L" or opt[0] == "--log": logfile = opt[1] try: - log = open(logfile,'w') - except IOError, (errno, strerror): - sys.stderr.write("Can't open "+logfile+" for writing: "+strerror+" (errno="+str(errno)+")\n") + log = open(logfile, "w") + except IOError as e: + errno, strerror = e.args + sys.stderr.write( + "Can't open " + + logfile + + " for writing: " + + strerror + + " (errno=" + + str(errno) + + ")\n" + ) pass - if ( opt[0]=="-l" or opt[0]=="--location" ): + if opt[0] == "-l" or opt[0] == "--location": location = opt[1] - if ( opt[0]=="-m" or opt[0]=="--mac" ): + if opt[0] == "-m" or opt[0] == "--mac": macaddress = opt[1] - if ( opt[0]=="-N" or opt[0]=="--no-block" ): + if opt[0] == "-N" or opt[0] == "--no-block": block = False - if ( opt[0]=="-n" or opt[0]=="--name" ): + if opt[0] == "-n" or opt[0] == "--name": vmname = opt[1] - if ( opt[0]=="-o" or opt[0]=="--os-type" ): + if opt[0] == "-o" or opt[0] == "--os-type": ostype = opt[1] - if ( opt[0]=="-r" or opt[0]=="--ram" ): + if opt[0] == "-r" or opt[0] == "--ram": memlimitmb = int(opt[1]) - if ( opt[0]=="-t" or opt[0]=="--vm-type" ): + if opt[0] == "-t" or opt[0] == "--vm-type": vmtype = opt[1] - if ( opt[0]=="-w" or opt[0]=="--network" ): + if opt[0] == "-w" or opt[0] == "--network": networkopts = opt[1] - if ( opt[0]=="--arch" ): + if opt[0] == "--arch": arch = opt[1] - if ( opt[0]=="--boot" ): + if opt[0] == "--boot": bootopts = opt[1] - if ( opt[0]=="--connect" ): + if opt[0] == "--connect": hypervisor = opt[1] - if ( opt[0]=="--cpu" ): + if opt[0] == "--cpu": cputype = opt[1] - if ( opt[0]=="--disk" ): + if opt[0] == "--disk": otherdisks.append(opt[1]) - if ( opt[0]=="--floppy" ): + if opt[0] == "--floppy": floppy = opt[1] - if ( opt[0]=="--graphics" ): + if opt[0] == "--graphics": graphics = opt[1] - if ( opt[0]=="--keep-clone" ): + if opt[0] == "--keep-clone": keepclone = True - if ( opt[0]=="--keep-guest" ): + if opt[0] == "--keep-guest": keepguest = True - if ( opt[0]=="--network" ): + if opt[0] == "--network": networks.append(opt[1]) - if ( opt[0]=="--os-variant" ): + if opt[0] == "--os-variant": osvariant = opt[1] - if ( opt[0]=="--validate-xml" ): + if opt[0] == "--validate-xml": validatexml = True - if ( opt[0]=="--vcpus" ): + if opt[0] == "--vcpus": vcores = int(opt[1]) -if ( show_usage ): +if show_usage: usage(exitcode) log.flush() # at this point, there had better only be at most one element in args, and # it had better be a VM image... -if ( len(args)>1 ): - sys.stderr.write("Too many arguments in \""+" ".join(sys.argv)+"\"\n") +if len(args) > 1: + sys.stderr.write('Too many arguments in "' + " ".join(sys.argv) + '"\n') usage(-2) -if ( vmimage is None ): +if vmimage is None: vmimage = args[0] -if ( not os.path.exists(vmimage) ): - sys.stderr.write("VM image not found: "+vmimage+"\n") +if not os.path.exists(vmimage): + sys.stderr.write("VM image not found: " + vmimage + "\n") sys.exit(-4) # clone the VM image, if needed -if ( cloneimg ): - newimg = "/tmp/"+str(uuid.uuid3(uuid.NAMESPACE_DNS,vmname))+".img" - log.write("Cloning "+vmimage+" into "+newimg+"\n") - os.system("dd if="+vmimage+" of="+newimg+" bs=1M") +if cloneimg: + newimg = "/tmp/" + str(uuid.uuid3(uuid.NAMESPACE_DNS, vmname)) + ".img" + log.write("Cloning " + vmimage + " into " + newimg + "\n") + os.system("dd if=" + vmimage + " of=" + newimg + " bs=1M") vmimage = newimg # connect to the hypervisor -if ( conn is None ): +if conn is None: conn = connect(hypervisor) - if ( conn is None and hypervisor=="qemu:///session" ): + if conn is None and hypervisor == "qemu:///session": # work around a bug in some older libvirtd versions WRT qemu:///session # by starting libvirtd as user manually and then retrying connection libvirtdpid = os.fork() - if ( libvirtdpid==0 ): - os.environ['PATH'] = "/sbin:/usr/sbin:"+os.environ['PATH'] - if ( which("libvirtd") is not None ): + if libvirtdpid == 0: + os.environ["PATH"] = "/sbin:/usr/sbin:" + os.environ["PATH"] + if which("libvirtd") is not None: cmd = "libvirtd" args = [cmd] - os.execvp(cmd,args) + os.execvp(cmd, args) else: - log.write("Could not find libvirtd in PATH="+os.environ['PATH']+"\n") + log.write( + "Could not find libvirtd in PATH=" + os.environ["PATH"] + "\n" + ) sys.exit(-8) time.sleep(5) conn = connect(hypervisor) - if ( conn is None ): - log.write("Unable to connect to hypervisor "+hypervisor+"\n") + if conn is None: + log.write("Unable to connect to hypervisor " + hypervisor + "\n") sys.exit(-8) # figure out a guest name, if one isn't already set -if ( vmname is None ): +if vmname is None: vmnum = 0 for vmid in conn.listDomainsID(): thisvm = conn.lookupByID(vmid).name() - if ( jobid+"-"+hostname+"-vm" in thisvm ): - m = re.match("^"+jobid+"-"+hostname+"-vm(\d+)",thisvm) + if jobid + "-" + hostname + "-vm" in thisvm: + m = re.match("^" + jobid + "-" + hostname + "-vm(\d+)", thisvm) thisvmnum = int(m.group(1)) - if ( thisvmnum>=vmnum ): - vmnum=thisvmnum+1 - vmname = jobid+"-"+hostname+"-vm"+str(vmnum) + if thisvmnum >= vmnum: + vmnum = thisvmnum + 1 + vmname = jobid + "-" + hostname + "-vm" + str(vmnum) # fork and start VM in child -childpid = os.fork() -if ( childpid==0 ): +childpid = os.fork() +if childpid == 0: # if there's no MAC address set yet, see if there's one set in VM image - if ( macaddress is None ): + if macaddress is None: try: # from http://libguestfs.org/guestfs-python.3.html import guestfs + g = guestfs.GuestFS(python_return_dict=True) - g.add_drive_opts(vmimage,format=diskformat,readonly=1) + g.add_drive_opts(vmimage, format=diskformat, readonly=1) g.launch() roots = g.inspect_os() - if ( len(roots)>0 ): + if len(roots) > 0: for root in roots: # figure out distro distro = g.inspect_get_distro(root) # "mount" file systems mps = g.inspect_get_mountpoints(root) - def compare(a, b): return len(a) - len(b) - for device in sorted (mps.keys(),compare): + + def compare(a, b): + return len(a) - len(b) + + for device in sorted(mps.keys(), compare): try: - g.mount_ro(mps[device],device) - except RuntimeError as msg: + g.mount_ro(mps[device], device) + except RuntimeError: pass iffile = None - if ( distro in ["ubuntu"] ): + if distro in ["ubuntu"]: iffile = "/etc/network/interfaces" - elif ( distro in ["redhat","centos","fedora"] ): - if ( g.exists("/etc/sysconfig/network-scripts/ifcfg-em1") ): + elif distro in ["redhat", "centos", "fedora"]: + if g.exists("/etc/sysconfig/network-scripts/ifcfg-em1"): iffile = "/etc/sysconfig/network-scripts/ifcfg-em1" else: iffile = "/etc/sysconfig/network-scripts/ifcfg-eth0" - elif ( distro in ["suse","sles"] ): + elif distro in ["suse", "sles"]: iffile = "/etc/sysconfig/network/ifcfg-eth0" - if ( distro in ["ubuntu"] ): + if distro in ["ubuntu"]: # not sure how to do this for ubuntu pass else: - for line in g.egrep("^HWADDR=",iffile): + for line in g.egrep("^HWADDR=", iffile): macaddress = line.split("=")[1] # strip out quotes, if there are any - macaddress = macaddress.replace('"','').lower() + macaddress = macaddress.replace('"', "").lower() # unmount everything g.umount_all() g.close() - except: + except Exception: pass # generate a default MAC addr if we don't have one at this point - if ( macaddress is None ): + if macaddress is None: macaddress = randomMAC() - + # generate XML description of VM xml = minidom.Document() domnode = xml.createElement("domain") - domnode.setAttribute("type","kvm") + domnode.setAttribute("type", "kvm") namenode = xml.createElement("name") namenode.appendChild(xml.createTextNode(vmname)) domnode.appendChild(namenode) memnode = xml.createElement("memory") - memnode.appendChild(xml.createTextNode(str(1024*memlimitmb))) + memnode.appendChild(xml.createTextNode(str(1024 * memlimitmb))) domnode.appendChild(memnode) vcpunode = xml.createElement("vcpu") vcpunode.appendChild(xml.createTextNode(str(vcores))) domnode.appendChild(vcpunode) osnode = xml.createElement("os") - ostypenode = xml.createElement("type") - ostypenode.setAttribute("arch",arch) + ostypenode = xml.createElement("type") + ostypenode.setAttribute("arch", arch) ostypenode.appendChild(xml.createTextNode(vmtype)) osnode.appendChild(ostypenode) osbootnode = xml.createElement("boot") - if ( bootopts is None ): - osbootnode.setAttribute("dev","hd") + if bootopts is None: + osbootnode.setAttribute("dev", "hd") else: - osbootnode.setAttribute("dev",bootopts.split(",")[0]) + osbootnode.setAttribute("dev", bootopts.split(",")[0]) osnode.appendChild(osbootnode) - if ( bootopts is not None and "," in bootopts ): + if bootopts is not None and "," in bootopts: for bootopt in bootopts.split(",")[1:]: thisbootnode = xml.createElement("boot") - thisbootnode.setAttribute("dev",bootopt) + thisbootnode.setAttribute("dev", bootopt) osnode.appendChild(thisbootnode) domnode.appendChild(osnode) featurenode = xml.createElement("features") @@ -481,50 +543,50 @@ if ( childpid==0 ): featurenode.appendChild(xml.createElement("pae")) domnode.appendChild(featurenode) cpunode = xml.createElement("cpu") - if ( cputype=="host" ): - cpunode.setAttribute("mode","host-model") - elif ( cputype is not None ): + if cputype == "host": + cpunode.setAttribute("mode", "host-model") + elif cputype is not None: elt = cputype.split(",") cpumodel = elt[0] - if ( len(elt)>1 ): + if len(elt) > 1: cpufeatures = elt[1:] - cpunode.setAttribute("match","exact") + cpunode.setAttribute("match", "exact") cpumodelnode = xml.createElement("model") cpumodelnode.appendChild(xml.createTextNode(cpumodel)) cpunode.appendChild(cpumodelnode) for cpufeature in cpufeatures: - if ( "vendor=" in cpufeature ): + if "vendor=" in cpufeature: vendor = cpufeature.split("=")[1] - if ( vendor is not None ): + if vendor is not None: vendornode = xml.createElement("vendor") vendornode.appendChild(xml.createTextNode(vendor)) cpunode.appendChild(vendornode) - elif ( "match=" in cpufeature ): + elif "match=" in cpufeature: match = cpufeature.split("=")[1] - if ( match is not None ): - cpunode.setAttribute("match",match) - elif ( "+" in cpufeature or "-" in cpufeature ): - if ( "+" in cpufeature ): + if match is not None: + cpunode.setAttribute("match", match) + elif "+" in cpufeature or "-" in cpufeature: + if "+" in cpufeature: policy = "force" feature = cpufeature.split("+")[1] - elif ( "-" in cpufeature ): + elif "-" in cpufeature: policy = "disable" feature = cpufeature.split("-")[1] - if ( feature is not None ): + if feature is not None: featurenode = xml.createElement("feature") - featurenode.setAttribute("policy",policy) - featurenode.setAttribute("name",feature) + featurenode.setAttribute("policy", policy) + featurenode.setAttribute("name", feature) cpunode.appendChild(featurenode) else: - [policy,feature] = cpufeature.split("=") - if ( policy is not None and feature is not None ): + [policy, feature] = cpufeature.split("=") + if policy is not None and feature is not None: featurenode = xml.createElement("feature") - featurenode.setAttribute("policy",policy) - featurenode.setAttribute("name",feature) + featurenode.setAttribute("policy", policy) + featurenode.setAttribute("name", feature) cpunode.appendChild(featurenode) domnode.appendChild(cpunode) clocknode = xml.createElement("clock") - clocknode.setAttribute("offset","utc") + clocknode.setAttribute("offset", "utc") domnode.appendChild(clocknode) offnode = xml.createElement("on_poweroff") offnode.appendChild(xml.createTextNode("destroy")) @@ -539,89 +601,89 @@ if ( childpid==0 ): emunode = xml.createElement("emulator") emunode.appendChild(xml.createTextNode(qemukvm)) devnode.appendChild(emunode) - if ( floppy is not None ): + if floppy is not None: floppynode = xml.createElement("disk") - floppynode.setAttribute("type","file") - floppynode.setAttribute("device","floppy") + floppynode.setAttribute("type", "file") + floppynode.setAttribute("device", "floppy") fdrvnode = xml.createElement("driver") - fdrvnode.setAttribute("name","qemu") + fdrvnode.setAttribute("name", "qemu") floppynode.appendChild(fdrvnode) fsrcnode = xml.createElement("source") - fsrcnode.setAttribute("file",floppy) + fsrcnode.setAttribute("file", floppy) floppynode.appendChild(fsrcnode) ftgtnode = xml.createElement("target") - ftgtnode.setAttribute("dev","fda") - ftgtnode.setAttribute("bus","fdc") + ftgtnode.setAttribute("dev", "fda") + ftgtnode.setAttribute("bus", "fdc") floppynode.appendChild(ftgtnode) devnode.appendChild(floppynode) disknode = xml.createElement("disk") - disknode.setAttribute("type","file") - disknode.setAttribute("device","disk") + disknode.setAttribute("type", "file") + disknode.setAttribute("device", "disk") ddrvnode = xml.createElement("driver") - ddrvnode.setAttribute("name","qemu") - ddrvnode.setAttribute("type",diskformat) + ddrvnode.setAttribute("name", "qemu") + ddrvnode.setAttribute("type", diskformat) disknode.appendChild(ddrvnode) dsrcnode = xml.createElement("source") # libvirtd needs an absolute path to the image file - if ( vmimage.startswith("/") ): - dsrcnode.setAttribute("file",vmimage) + if vmimage.startswith("/"): + dsrcnode.setAttribute("file", vmimage) else: cwd = os.getcwd() - dsrcnode.setAttribute("file",cwd+"/"+vmimage) + dsrcnode.setAttribute("file", cwd + "/" + vmimage) disknode.appendChild(dsrcnode) dtgtnode = xml.createElement("target") - dtgtnode.setAttribute("bus",diskbus) - if ( diskbus=="virtio" ): - dtgtnode.setAttribute("dev","vda") - elif ( diskbus in ["scsi","usb"] ): - dtgtnode.setAttribute("dev","sda") - elif ( diskbus in ["ide","sata"] ): - dtgtnode.setAttribute("dev","hda") + dtgtnode.setAttribute("bus", diskbus) + if diskbus == "virtio": + dtgtnode.setAttribute("dev", "vda") + elif diskbus in ["scsi", "usb"]: + dtgtnode.setAttribute("dev", "sda") + elif diskbus in ["ide", "sata"]: + dtgtnode.setAttribute("dev", "hda") disknode.appendChild(dtgtnode) devnode.appendChild(disknode) - if ( cdrom is not None ): + if cdrom is not None: cdromnode = xml.createElement("disk") - cdromnode.setAttribute("type","file") - cdromnode.setAttribute("device","cdrom") + cdromnode.setAttribute("type", "file") + cdromnode.setAttribute("device", "cdrom") cdrvnode = xml.createElement("driver") - cdrvnode.setAttribute("name","qemu") + cdrvnode.setAttribute("name", "qemu") cdromnode.appendChild(cdrvnode) csrcnode = xml.createElement("source") - csrcnode.setAttribute("file",cdrom) + csrcnode.setAttribute("file", cdrom) cdromnode.appendChild(csrcnode) ctgtnode = xml.createElement("target") - ctgtnode.setAttribute("dev","hdc") - ctgtnode.setAttribute("bus","ide") + ctgtnode.setAttribute("dev", "hdc") + ctgtnode.setAttribute("bus", "ide") cdromnode.appendChild(ctgtnode) crdonlynode = xml.createElement("readonly") cdromnode.appendChild(crdonlynode) devnode.appendChild(cdromnode) # how to handle other disk devices? # how to handle MAC addresses and other network settings? - if ( len(networks)==0 ): + if len(networks) == 0: ifnode = xml.createElement("interface") - ifnode.setAttribute("type","user") + ifnode.setAttribute("type", "user") macnode = xml.createElement("mac") - macnode.setAttribute("address",macaddress) + macnode.setAttribute("address", macaddress) ifnode.appendChild(macnode) devnode.appendChild(ifnode) inputnode = xml.createElement("input") - inputnode.setAttribute("type","mouse") - inputnode.setAttribute("bus","ps2") + inputnode.setAttribute("type", "mouse") + inputnode.setAttribute("bus", "ps2") devnode.appendChild(inputnode) graphicsnode = xml.createElement("graphics") - if ( graphics is not None ): - graphicsnode.setAttribute("type",graphics) + if graphics is not None: + graphicsnode.setAttribute("type", graphics) else: - graphicsnode.setAttribute("type","vnc") - graphicsnode.setAttribute("port","-1") + graphicsnode.setAttribute("type", "vnc") + graphicsnode.setAttribute("port", "-1") devnode.appendChild(graphicsnode) consolenode = xml.createElement("console") - consolenode.setAttribute("type","pty") + consolenode.setAttribute("type", "pty") devnode.appendChild(consolenode) videonode = xml.createElement("video") modelnode = xml.createElement("model") - modelnode.setAttribute("type","cirrus") + modelnode.setAttribute("type", "cirrus") videonode.appendChild(modelnode) devnode.appendChild(videonode) domnode.appendChild(devnode) @@ -629,88 +691,119 @@ if ( childpid==0 ): # save XML, if possible try: - #xml.writexml(open("/tmp/"+vmname+".xml","w"),""," ","\n") - xml.writexml(open("/tmp/"+vmname+".xml","w")) - log.write("Saved guest description as /tmp/"+vmname+".xml\n") - except: - log.write("Failed to save guest description as /tmp/"+vmname+".xml\n") + # xml.writexml(open("/tmp/"+vmname+".xml","w"),""," ","\n") + xml.writexml(open("/tmp/" + vmname + ".xml", "w")) + log.write("Saved guest description as /tmp/" + vmname + ".xml\n") + except Exception: + log.write("Failed to save guest description as /tmp/" + vmname + ".xml\n") # 'twould be nice to validate the XML here... - if ( validatexml and - os.path.exists("/tmp/"+vmname+".xml") and - which("virt-xml-validate") is not None ): + if ( + validatexml + and os.path.exists("/tmp/" + vmname + ".xml") + and which("virt-xml-validate") is not None + ): try: - exitcode = os.system("virt-xml-validate /tmp/"+vmname+".xml") - except: - raise RuntimeError("virt-xml-validate /tmp/"+vmname+".xml failed") - if ( exitcode!=0 ): + exitcode = os.system("virt-xml-validate /tmp/" + vmname + ".xml") + except Exception: + raise RuntimeError("virt-xml-validate /tmp/" + vmname + ".xml failed") + if exitcode != 0: sys.exit(exitcode) # start VM try: - guestvm = conn.createXML(xml.toxml(),0) - log.write("Domain "+vmname+" created from /tmp/"+vmname+".xml\n") - except libvirt.libvirtError as e: - log.write("Failed to start guest "+vmname+" in hypervisor "+hypervisor+", trying alternate method.\n") + guestvm = conn.createXML(xml.toxml(), 0) + log.write("Domain " + vmname + " created from /tmp/" + vmname + ".xml\n") + except libvirt.libvirtError: + log.write( + "Failed to start guest " + + vmname + + " in hypervisor " + + hypervisor + + ", trying alternate method.\n" + ) try: - os.system("virsh -c "+hypervisor+" create /tmp/"+vmname+".xml") - except: + os.system("virsh -c " + hypervisor + " create /tmp/" + vmname + ".xml") + except Exception: log.write("Alternate guest startup method failed, exiting.\n") sys.exit(-1) - log.write("To connect to VM text console, run \"virsh --connect="+hypervisor+" console "+vmname+"\"\n") - if ( "DISPLAY" in os.environ.keys() ): - log.write("To connect to VM graphical console, run \"virt-viewer --connect="+hypervisor+" --wait "+vmname+"\"\n") - log.write("To terminate VM, run \"virsh --connect="+hypervisor+" destroy "+vmname+"\"\n") + log.write( + 'To connect to VM text console, run "virsh --connect=' + + hypervisor + + " console " + + vmname + + '"\n' + ) + if "DISPLAY" in os.environ.keys(): + log.write( + 'To connect to VM graphical console, run "virt-viewer --connect=' + + hypervisor + + " --wait " + + vmname + + '"\n' + ) + log.write( + 'To terminate VM, run "virsh --connect=' + + hypervisor + + " destroy " + + vmname + + '"\n' + ) # set signal handler in case the job ends before the VM shuts down # or user hits ^C - signal.signal(signal.SIGINT,cleanup) - signal.signal(signal.SIGTERM,cleanup) + signal.signal(signal.SIGINT, cleanup) + signal.signal(signal.SIGTERM, cleanup) # give the VM some time for its network interface to come up time.sleep(15) # give job owner some hints about how to connect to VM - if ( macaddress is not None ): + if macaddress is not None: ipaddr = None - arp = "arp | grep -i "+macaddress+" | awk '{print $1}' | head -1" + arp = "arp | grep -i " + macaddress + " | awk '{print $1}' | head -1" fd = os.popen(arp) for line in fd.readlines(): ipaddr = line[:-1] fd.close() - if ( ipaddr is not None ): - log.write("VM MAC address "+macaddress+" has IP address "+ipaddr+"\n") + if ipaddr is not None: + log.write( + "VM MAC address " + macaddress + " has IP address " + ipaddr + "\n" + ) else: - log.write("VM MAC address "+macaddress+" does not appear in host ARP table (normal for hypervisor=qemu:///session)\n") + log.write( + "VM MAC address " + + macaddress + + " does not appear in host ARP table (normal for hypervisor=qemu:///session)\n" + ) else: log.write("No MAC address set\n") log.flush() # wait for VM to end - if ( block ): + if block: try: - while ( guestvm.isActive() ): + while guestvm.isActive(): time.sleep(1) - except: + except Exception: # the guest probably went away while we were sleeping; # do nothing pass sys.exit(0) # put resulting process in cpuset, if possible -if ( os.path.exists(cpusetroot+"/"+cpuset+"/tasks") and - os.access(cpusetroot+"/"+cpuset+"/tasks",os.W_OK) ): - fd = open(cpusetroot+"/"+cpuset+"/tasks",'w') +if os.path.exists(cpusetroot + "/" + cpuset + "/tasks") and os.access( + cpusetroot + "/" + cpuset + "/tasks", os.W_OK +): + fd = open(cpusetroot + "/" + cpuset + "/tasks", "w") fd.write(str(childpid)) fd.close() -if ( block ): +if block: # wait on child - status = os.waitpid(childpid,0) + status = os.waitpid(childpid, 0) exit(status[0]) else: # wait on guest network to come up before exiting time.sleep(17) exit(exitcode) - - diff --git a/bin/jobarray-to-pcp b/bin/jobarray-to-pcp index 139fa4e..545e575 100755 --- a/bin/jobarray-to-pcp +++ b/bin/jobarray-to-pcp @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 # # jobarray-to-pcp: Run the equivalent of a TORQUE job array using # parallel-command-processor. @@ -16,109 +16,124 @@ import re import subprocess import sys -class request(): - def __init__(self): - self.values = {} - self.set('shell',"/bin/bash") - if ( "SHELL" in os.environ.keys() ): - self.set('shell',os.environ["SHELL"]) - self.set('qsub_args',[]) - self.set('ranges',[]) - self.set('jobname',None) - self.set('outfile',None) - self.set('errfile',None) - self.set('join',None) - self.set('pbs_header',"PBS") - self.set('tpn',None) - self.set('test_mode',False) - def set(self,key,value): - self.values[key] = value - def get(self,key): - if ( key not in self.values.keys() ): - raise ValueError("Unknown key '"+key+"'") - return self.values[key] - def keys(self): - return self.values.keys() - def add_range(self,thisrange): - self.values['ranges'].append(thisrange) - def add_qsub_arg(self,arg): - self.values['qsub_args'].append(arg) - def merge(self,request2): - # copy unique values from request2 - for key in request2.values.keys(): - if ( key not in self.values.keys() or self.get(key) is None ): - self.set(key,request2.get(key)) - # merge the qsub_args values, starting with those from request2, - # as the later values will take precedence - merged_args = [] - for arg in request2.values['qsub_args']: - merged_args.append(arg) - for arg in self.values['qsub_args']: - merged_args.append(arg) - self.set('qsub_args',merged_args) + +class request: + def __init__(self): + self.values = {} + self.set("shell", "/bin/bash") + if "SHELL" in os.environ.keys(): + self.set("shell", os.environ["SHELL"]) + self.set("qsub_args", []) + self.set("ranges", []) + self.set("jobname", None) + self.set("outfile", None) + self.set("errfile", None) + self.set("join", None) + self.set("pbs_header", "PBS") + self.set("tpn", None) + self.set("test_mode", False) + + def set(self, key, value): + self.values[key] = value + + def get(self, key): + if key not in self.values.keys(): + raise ValueError("Unknown key '" + key + "'") + return self.values[key] + + def keys(self): + return self.values.keys() + + def add_range(self, thisrange): + self.values["ranges"].append(thisrange) + + def add_qsub_arg(self, arg): + self.values["qsub_args"].append(arg) + + def merge(self, request2): + # copy unique values from request2 + for key in request2.values.keys(): + if key not in self.values.keys() or self.get(key) is None: + self.set(key, request2.get(key)) + # merge the qsub_args values, starting with those from request2, + # as the later values will take precedence + merged_args = [] + for arg in request2.values["qsub_args"]: + merged_args.append(arg) + for arg in self.values["qsub_args"]: + merged_args.append(arg) + self.set("qsub_args", merged_args) def usage(): - sys.stderr.write("jobarray-to-pcp:\tRun the equivalent of a TORQUE job array using\n\t\t\tparallel-command-processor (PCP).\n\n") - sys.stderr.write("Usage: jobarray-to-pcp [args] [qsub args] \n\n") - sys.stderr.write("Arguments:\n") - sys.stderr.write("\t--help\t\t\t\tPrint this help message.\n") - sys.stderr.write("\t--tpn N, --tasks-per-node=N\tRun N instances of PCP per node.\n\t\t\t\t\t(default is 1 per core)\n") - sys.stderr.write("\t--test\t\t\t\tPrint job script to stdout rather than\n\t\t\t\t\tsubmit it.\n") - sys.exit(1) - - -def parse_args(arglist,request): - # command line/script argument processing - try: - opts, args = getopt.getopt(arglist, - "A:C:IM:N:P:S:T:VW:Xa:b:c:d:e:fhj:k:l:m:no:p:q:r:t:u:v:w:xz", - ["help","tasks-per-node=","test","tpn="]) - except getopt.GetoptError, err: - sys.stderr.write(str(err)+"\n") - usage() - for opt in opts: - if ( opt[0]=="-C" ): - request.set('pbs_header',opt[1]) - request.add_qsub_arg(opt[0]) - request.add_qsub_arg(opt[1]) - elif ( opt[0]=="-N" ): - request.set('jobname',opt[1]) - request.add_qsub_arg(opt[0]) - request.add_qsub_arg(opt[1]) - elif ( opt[0]=="-e" ): - request.set('errfile',opt[1]) - request.add_qsub_arg(opt[0]) - request.add_qsub_arg(opt[1]) - elif ( opt[0]=="-j" ): - request.set("join",opt[1]) - request.add_qsub_arg(opt[0]) - request.add_qsub_arg(opt[1]) - elif ( opt[0]=="-o" ): - request.set('outfile',opt[1]) - request.add_qsub_arg(opt[0]) - request.add_qsub_arg(opt[1]) - elif ( opt[0]=="-t" ): - for thisrange in opt[1].split(","): - if ( "-" in thisrange ): - (taskstart,taskend) = thisrange.split("-",1) - taskstart = int(taskstart) - taskend = int(taskend) + sys.stderr.write( + "jobarray-to-pcp:\tRun the equivalent of a TORQUE job array using\n\t\t\tparallel-command-processor (PCP).\n\n" + ) + sys.stderr.write("Usage: jobarray-to-pcp [args] [qsub args] \n\n") + sys.stderr.write("Arguments:\n") + sys.stderr.write("\t--help\t\t\t\tPrint this help message.\n") + sys.stderr.write( + "\t--tpn N, --tasks-per-node=N\tRun N instances of PCP per node.\n\t\t\t\t\t(default is 1 per core)\n" + ) + sys.stderr.write( + "\t--test\t\t\t\tPrint job script to stdout rather than\n\t\t\t\t\tsubmit it.\n" + ) + sys.exit(1) + + +def parse_args(arglist, request): + # command line/script argument processing + try: + opts, args = getopt.getopt( + arglist, + "A:C:IM:N:P:S:T:VW:Xa:b:c:d:e:fhj:k:l:m:no:p:q:r:t:u:v:w:xz", + ["help", "tasks-per-node=", "test", "tpn="], + ) + except getopt.GetoptError as err: + sys.stderr.write(str(err) + "\n") + usage() + for opt in opts: + if opt[0] == "-C": + request.set("pbs_header", opt[1]) + request.add_qsub_arg(opt[0]) + request.add_qsub_arg(opt[1]) + elif opt[0] == "-N": + request.set("jobname", opt[1]) + request.add_qsub_arg(opt[0]) + request.add_qsub_arg(opt[1]) + elif opt[0] == "-e": + request.set("errfile", opt[1]) + request.add_qsub_arg(opt[0]) + request.add_qsub_arg(opt[1]) + elif opt[0] == "-j": + request.set("join", opt[1]) + request.add_qsub_arg(opt[0]) + request.add_qsub_arg(opt[1]) + elif opt[0] == "-o": + request.set("outfile", opt[1]) + request.add_qsub_arg(opt[0]) + request.add_qsub_arg(opt[1]) + elif opt[0] == "-t": + for thisrange in opt[1].split(","): + if "-" in thisrange: + (taskstart, taskend) = thisrange.split("-", 1) + taskstart = int(taskstart) + taskend = int(taskend) + else: + taskstart = int(thisrange) + taskend = int(taskstart) + request.add_range([taskstart, taskend]) + elif opt[0] == "--tasks-per-node" or opt[0] == "--tpn": + request.set("tpn", int(opt[1])) + elif opt[0] == "--test": + request.set("test_mode", True) + elif opt[0] == "--help": + usage() else: - taskstart = int(thisrange) - taskend = int(taskstart) - request.add_range([taskstart,taskend]) - elif ( opt[0]=="--tasks-per-node" or opt[0]=="--tpn" ): - request.set('tpn',int(opt[1])) - elif ( opt[0]=="--test" ): - request.set('test_mode',True) - elif ( opt[0]=="--help" ): - usage() - else: - request.add_qsub_arg(opt[0]) - if ( len(opt)>1 ): - request.add_qsub_arg(opt[1]) - return args + request.add_qsub_arg(opt[0]) + if len(opt) > 1: + request.add_qsub_arg(opt[1]) + return args # Main program starts here @@ -127,7 +142,9 @@ def parse_args(arglist,request): # * getting parallel-command-processor into $PATH # * setting $jobid (just the numeric jobid, possibly needed for output # file naming) -jobstart = "cd $PBS_O_WORKDIR\nmodule load pcp\njobid=`echo $PBS_JOBID | sed 's/\..*$//'`\n" +jobstart = ( + "cd $PBS_O_WORKDIR\nmodule load pcp\njobid=`echo $PBS_JOBID | sed 's/\..*$//'`\n" +) jobend = "\n" # site specific MPI launcher settings: @@ -138,15 +155,15 @@ mpiexec_tpn_arg = "-ppn" # command line argument processing req = request() -args = parse_args(sys.argv[1:],req) +args = parse_args(sys.argv[1:], req) # make sure jobarray script exists -if ( len(args)>0 ): - jobarrayscript = args[0] - if ( not (os.path.exists(jobarrayscript) ) ): - raise IOError(jobarrayscript+": file not found") +if len(args) > 0: + jobarrayscript = args[0] + if not (os.path.exists(jobarrayscript)): + raise IOError(jobarrayscript + ": file not found") else: - raise RuntimeError("No job array script specified") + raise RuntimeError("No job array script specified") # grovel options out of job array script script_args = [] @@ -154,20 +171,20 @@ fp = open(jobarrayscript) lines = fp.readlines() fp.close() for line in lines: - if ( line.startswith("#"+req.get('pbs_header')) ): - for elt in line.rstrip("\n").split()[1:]: - if ( elt.startswith("#") ): + if line.startswith("#" + req.get("pbs_header")): + for elt in line.rstrip("\n").split()[1:]: + if elt.startswith("#"): + break + else: + script_args.append(elt) + # emulate qsub behavior of only reading PBS headers up to the first + # non-comment, non-whitespace line + elif not line.startswith("#") and not (re.match("^\s*$", line)): break - else: - script_args.append(elt) - # emulate qsub behavior of only reading PBS headers up to the first - # non-comment, non-whitespace line - elif ( not line.startswith("#") and not ( re.match('^\s*$',line) ) ): - break # parse script arguments script_req = request() -parse_args(script_args,script_req) +parse_args(script_args, script_req) # combine script request with the command line request req.merge(script_req) @@ -176,63 +193,73 @@ req.merge(script_req) # WRT stdout and stderr files # basically "PBS_ARRAYID=# $SHELL $JOBARRAYSCRIPT" pcp_cfg = "" -if ( req.get('jobname') is None ): - req.set('jobname',os.path.basename(jobarrayscript)) -jobname = req.get('jobname') -join = req.get('join') -outfile = req.get('outfile') -errfile = req.get('errfile') -shell = req.get('shell') -for thisrange in req.get('ranges'): - for taskid in range(thisrange[0],thisrange[1]+1): - stdout = None - stderr = None - if ( join is None or join=="n" ): - # seperate stdout and stderr - if ( outfile is None ): - stdout = ">"+jobname+".o${jobid}-"+str(taskid) - else: - stdout = ">"+outfile+"-"+str(taskid) - if ( errfile is None ): - stderr = "2>"+jobname+".e${jobid}-"+str(taskid) - else: - stderr = "2>"+errfile+"-"+str(taskid) - elif ( join=="oe" ): - # combine into stdout - if ( outfile is None ): - stdout = ">"+jobname+".o${jobid}-"+str(taskid) - else: - stdout = ">"+outfile+"-"+str(taskid) - stderr = "2>&1" - elif ( join=="eo" ): - # combine into stderr - if ( errfile is None ): - stdout = ">"+jobname+".e${jobid}-"+str(taskid) - else: - stdout = ">"+errfile+"-"+str(taskid) - stderr = "2>&1" - pcp_cfg += "PBS_ARRAYID="+str(taskid)+" "+shell+" "+jobarrayscript+" "+stdout+" "+stderr+"\n" +if req.get("jobname") is None: + req.set("jobname", os.path.basename(jobarrayscript)) +jobname = req.get("jobname") +join = req.get("join") +outfile = req.get("outfile") +errfile = req.get("errfile") +shell = req.get("shell") +for thisrange in req.get("ranges"): + for taskid in range(thisrange[0], thisrange[1] + 1): + stdout = None + stderr = None + if join is None or join == "n": + # seperate stdout and stderr + if outfile is None: + stdout = ">" + jobname + ".o${jobid}-" + str(taskid) + else: + stdout = ">" + outfile + "-" + str(taskid) + if errfile is None: + stderr = "2>" + jobname + ".e${jobid}-" + str(taskid) + else: + stderr = "2>" + errfile + "-" + str(taskid) + elif join == "oe": + # combine into stdout + if outfile is None: + stdout = ">" + jobname + ".o${jobid}-" + str(taskid) + else: + stdout = ">" + outfile + "-" + str(taskid) + stderr = "2>&1" + elif join == "eo": + # combine into stderr + if errfile is None: + stdout = ">" + jobname + ".e${jobid}-" + str(taskid) + else: + stdout = ">" + errfile + "-" + str(taskid) + stderr = "2>&1" + pcp_cfg += ( + "PBS_ARRAYID=" + + str(taskid) + + " " + + shell + + " " + + jobarrayscript + + " " + + stdout + + " " + + stderr + + "\n" + ) # generate job script jobscript = jobstart jobscript += mpiexec -if ( "tpn" in req.keys() and req.get('tpn') is not None ): - jobscript += " "+mpiexec_tpn_arg+" "+str(req.get('tpn')) +if "tpn" in req.keys() and req.get("tpn") is not None: + jobscript += " " + mpiexec_tpn_arg + " " + str(req.get("tpn")) jobscript += " parallel-command-processor <0 ): + if len(results) > 0: for result in results: - if ( result[0] is not None ): - #sys.stdout.write("script for jobid %s\n---------------------------------\n" % jobid) + if result[0] is not None: + # sys.stdout.write("script for jobid %s\n---------------------------------\n" % jobid) sys.stdout.write(str(result[0])) - #sys.stdout.write("\n---------------------------------\n") + # sys.stdout.write("\n---------------------------------\n") else: sys.stderr.write("No script found for jobid %s\n" % jobid) else: diff --git a/bin/pbs-spark-submit b/bin/pbs-spark-submit index 20f3156..8f3c9af 100755 --- a/bin/pbs-spark-submit +++ b/bin/pbs-spark-submit @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 # # pbs-spark-submit: Run an Apache Spark "job" (including optionally # starting the Spark services) inside a PBS job. @@ -12,7 +12,6 @@ # $Date$ import argparse import fcntl -import getopt import glob import os import platform @@ -20,191 +19,230 @@ import socket import struct import sys import time -import warnings + # # ways to launch workers # class Launcher: - def launch(self,cmdline,env,propagate_env=False, - prop_env_list=["SPARK_CONF_DIR","SPARK_LOG_DIR","SPARK_LOCAL_DIRS"], - wpn=1,worker_on_mother_superior=True): + def launch( + self, + cmdline, + env, + propagate_env=False, + prop_env_list=["SPARK_CONF_DIR", "SPARK_LOG_DIR", "SPARK_LOCAL_DIRS"], + wpn=1, + worker_on_mother_superior=True, + ): raise NotImplementedError + def sleep(self): sleeptime = 5 - if ( "PBS_NUM_NODES" in os.environ ): - sleeptime += 2*int(os.environ["PBS_NUM_NODES"]) - elif ( "SLURM_JOB_NUM_NODES" in os.environ ): - sleeptime += 2*int(os.environ["SLURM_JOB_NUM_NODES"]) - elif ( "SLURM_NNODES" in os.environ ): - sleeptime += 2*int(os.environ["SLURM_NNODES"]) + if "PBS_NUM_NODES" in os.environ: + sleeptime += 2 * int(os.environ["PBS_NUM_NODES"]) + elif "SLURM_JOB_NUM_NODES" in os.environ: + sleeptime += 2 * int(os.environ["SLURM_JOB_NUM_NODES"]) + elif "SLURM_NNODES" in os.environ: + sleeptime += 2 * int(os.environ["SLURM_NNODES"]) time.sleep(sleeptime) - def env_list(env,prop_env_list): + + def env_list(env, prop_env_list): # since we can't rely on ssh_config and sshd_config having # the appropriate SendEnv/AcceptEnv settings argv = [] for var in prop_env_list: - if ( var in env.keys() ): - argv.append(var+"="+env[var]) + if var in env.keys(): + argv.append(var + "=" + env[var]) return argv - def env_string(env,prop_env_list): - return " ".join(self.env_list(env,prop_env_list)) + + def env_string(env, prop_env_list): + return " ".join(Launcher.env_list(env, prop_env_list)) class ExecLauncher(Launcher): - def launch(self,cmdline,env,propagate_env=False, - prop_env_list=["SPARK_CONF_DIR","SPARK_LOG_DIR","SPARK_LOCAL_DIRS"], - wpn=1,worker_on_mother_superior=True): + def launch( + self, + cmdline, + env, + propagate_env=False, + prop_env_list=["SPARK_CONF_DIR", "SPARK_LOG_DIR", "SPARK_LOCAL_DIRS"], + wpn=1, + worker_on_mother_superior=True, + ): time.sleep(1) # sanity check - if ( not worker_on_mother_superior ): - raise RuntimeError("Cannot use --no-worker-on-mother-superior with Exec launcher") - + if not worker_on_mother_superior: + raise RuntimeError( + "Cannot use --no-worker-on-mother-superior with Exec launcher" + ) + # lots of squick to try to limit the number of cores used on big # SMP/NUMA systems that are likely shared with other users cpuset = None cpusetroot = None cpus = 0 - if ( os.path.exists("/proc/self/cpuset") ): + if os.path.exists("/proc/self/cpuset"): cpusetfile = open("/proc/self/cpuset") cpuset = cpusetfile.read().rstrip("\n") cpusetfile.close() - if ( os.path.exists("/dev/cpuset") ): + if os.path.exists("/dev/cpuset"): cpusetroot = "/dev/cpuset" - elif ( os.path.exists("/sys/fs/cgroup/cpuset") ): + elif os.path.exists("/sys/fs/cgroup/cpuset"): cpusetroot = "/sys/fs/cgroup/cpuset" - if ( cpusetroot is not None and cpuset is not None ): + if cpusetroot is not None and cpuset is not None: cpusfile = None - if ( os.path.exists(cpusetroot+cpuset+"/cpus") ): - cpusfile = open(cpusetroot+cpuset+"/cpus") - elif ( os.path.exists(cpusetroot+cpuset+"/cpuset.cpus") ): - cpusfile = open(cpusetroot+cpuset+"/cpuset.cpus") - if ( cpusfile is not None ): + if os.path.exists(cpusetroot + cpuset + "/cpus"): + cpusfile = open(cpusetroot + cpuset + "/cpus") + elif os.path.exists(cpusetroot + cpuset + "/cpuset.cpus"): + cpusfile = open(cpusetroot + cpuset + "/cpuset.cpus") + if cpusfile is not None: allcpus = cpusfile.read() cpusfile.close() for cgroup in allcpus.split(","): cpurange = cgroup.split("-") - if ( len(cpurange)==1 ): + if len(cpurange) == 1: cpus += 1 - elif ( len(cpurange)==2 ): - cpus += int(cpurange[1])-int(cpurange[0])+1 - if ( cpus==0 and "PBS_NP" in os.environ.keys() ): + elif len(cpurange) == 2: + cpus += int(cpurange[1]) - int(cpurange[0]) + 1 + if cpus == 0 and "PBS_NP" in os.environ.keys(): try: cpus = int(os.environ["PBS_NP"]) - except e,Exception: + except Exception: pass - elif ( cpus==0 and "PBS_NUM_PPN" in os.environ.keys() ): + elif cpus == 0 and "PBS_NUM_PPN" in os.environ.keys(): try: cpus = int(os.environ["PBS_NUM_PPN"]) - except e,Exception: + except Exception: pass - if ( cpus>0 ): + if cpus > 0: os.environ["SPARK_WORKER_CORES"] = str(cpus) env["SPARK_WORKER_CORES"] = str(cpus) # need to do the equivalent shenanigans for memory at some point... # base functionality argv = cmdline.split() - if ( propagate_env ): - for arg in self.env_list(env,prop_arg_list): + if propagate_env: + for arg in self.env_list(env, prop_env_list): argv.append(arg) child_pid = os.fork() - if ( child_pid==0 ): - os.execvpe(argv[0],argv,env) + if child_pid == 0: + os.execvpe(argv[0], argv, env) self.sleep() class PBSDSHLauncher(Launcher): - def launch(self,cmdline,env,propagate_env=False, - prop_env_list=["SPARK_CONF_DIR","SPARK_LOG_DIR","SPARK_LOCAL_DIRS"], - wpn=1,worker_on_mother_superior=True): + def launch( + self, + cmdline, + env, + propagate_env=False, + prop_env_list=["SPARK_CONF_DIR", "SPARK_LOG_DIR", "SPARK_LOCAL_DIRS"], + wpn=1, + worker_on_mother_superior=True, + ): time.sleep(1) cmd = cmdline - if ( propagate_env ): - cmd = self.env_string(env,prop_env_list)+" "+cmdline - if ( wpn is None ): - os.system("pbsdsh "+cmdline+" &") + if propagate_env: + cmd = self.env_string(env, prop_env_list) + " " + cmdline + if wpn is None: + os.system("pbsdsh " + cmd + " &") else: nodes = nodelist(unique=True) for node in nodes: - if ( worker_on_mother_superior or - not ( node in platform.node() ) ): + if worker_on_mother_superior or not (node in platform.node()): for i in range(int(wpn)): - os.system("pbsdsh -h "+node+" "+cmdline+" &") + os.system("pbsdsh -h " + node + " " + cmd + " &") self.sleep() + class SRunLauncher(Launcher): - def launch(self,cmdline,env,propagate_env=False, - prop_env_list=["SPARK_CONF_DIR","SPARK_LOG_DIR","SPARK_LOCAL_DIRS"], - wpn=1,worker_on_mother_superior=True): + def launch( + self, + cmdline, + env, + propagate_env=False, + prop_env_list=["SPARK_CONF_DIR", "SPARK_LOG_DIR", "SPARK_LOCAL_DIRS"], + wpn=1, + worker_on_mother_superior=True, + ): time.sleep(1) cmd = cmdline - if ( propagate_env ): - cmd = self.env_string(env,prop_env_list)+" "+cmdline - if ( wpn is None ): + if propagate_env: + cmd = self.env_string(env, prop_env_list) + " " + cmdline + if wpn is None: os.system("srun %s &" % cmd) else: # srun is a bit dense and has to be told the total number of tasks # with --ntasks-per-node - if ( "SLURM_JOB_NUM_NODES" in os.environ ): + if "SLURM_JOB_NUM_NODES" in os.environ: nnodes = int(os.environ["SLURM_JOB_NUM_NODES"]) - elif ( "SLURM_NNODES" in os.environ ): + elif "SLURM_NNODES" in os.environ: nnodes = int(os.environ["SLURM_NNODES"]) else: raise RuntimeError("SRunLauncher: Unable to determine number of nodes") srun = "srun" - if ( not worker_on_mother_superior ): + if not worker_on_mother_superior: nnodes -= 1 srun = "srun --exclude=%s" % platform.node().split(".")[0] - ntasks = nnodes*wpn - os.system("%s --nodes=%d --ntasks=%d --ntasks-per-node=%d %s &" % (srun,nnodes,ntasks,wpn,cmd)) + ntasks = nnodes * wpn + os.system( + "%s --nodes=%d --ntasks=%d --ntasks-per-node=%d %s &" + % (srun, nnodes, ntasks, wpn, cmd) + ) self.sleep() + class SSHLauncher(Launcher): - def launch(self,cmdline,env,propagate_env=False, - prop_env_list=["SPARK_CONF_DIR","SPARK_LOG_DIR","SPARK_LOCAL_DIRS"], - wpn=1,worker_on_mother_superior=True): + def launch( + self, + cmdline, + env, + propagate_env=False, + prop_env_list=["SPARK_CONF_DIR", "SPARK_LOG_DIR", "SPARK_LOCAL_DIRS"], + wpn=1, + worker_on_mother_superior=True, + ): time.sleep(1) - if ( "PBS_NODEFILE" in os.environ.keys() ): - if ( wpn is None ): + if "PBS_NODEFILE" in os.environ.keys(): + if wpn is None: nodes = nodelist() else: nodes = nodelist(unique=True) for node in nodes: - if ( worker_on_mother_superior or - not ( node in platform.node() ) ): + if worker_on_mother_superior or not (node in platform.node()): argv = cmdline.split() ssh = "ssh" - if ( "SPARK_SSH" in env.keys() ): - ssh=env["SPARK_SSH"] - argv.insert(0,ssh) - argv.insert(1,node) - if ( propagate_env ): - for arg in self.env_list(env,prop_env_list): - argv.insert(2,arg) - sys.stderr.write(" ".join(argv)+"\n") - if ( wpn is None ): + if "SPARK_SSH" in env.keys(): + ssh = env["SPARK_SSH"] + argv.insert(0, ssh) + argv.insert(1, node) + if propagate_env: + for arg in self.env_list(env, prop_env_list): + argv.insert(2, arg) + sys.stderr.write(" ".join(argv) + "\n") + if wpn is None: nforks = 1 else: nforks = int(wpn) for i in range(nforks): child_pid = os.fork() - if ( child_pid==0 ): - os.execvpe(argv[0],argv,env) + if child_pid == 0: + os.execvpe(argv[0], argv, env) self.sleep() else: raise EnvironmentError("PBS_NODEFILE undefined") + # # functions to help with PBS node file # def nodelist(unique=False): nodes = [] - if ( "PBS_NODEFILE" in os.environ.keys() ): + if "PBS_NODEFILE" in os.environ.keys(): nodefile = open(os.environ["PBS_NODEFILE"]) for line in nodefile.readlines(): node = line.rstrip("\n") - if ( not unique or not ( node in nodes ) ): + if not unique or not (node in nodes): nodes.append(node) return nodes @@ -215,32 +253,35 @@ def nodelist(unique=False): def propsToCmdLine(proplist): result = [] for prop in proplist.keys(): - result.append("-D"+prop+"=\""+proplist[prop]+"\"") + result.append("-D" + prop + '="' + proplist[prop] + '"') return " ".join(result) + def propsFromFile(filename): - if ( not os.path.exists(filename) ): - raise IOError(filename+" not found") + if not os.path.exists(filename): + raise IOError(filename + " not found") proplist = {} fd = open(filename) for line in fd.readlines(): - if ( not line.startswith("#") ): - keyval = (line.rstrip("\n")).split("=",1) - if ( len(keyval)==2 ): + if not line.startswith("#"): + keyval = (line.rstrip("\n")).split("=", 1) + if len(keyval) == 2: proplist[keyval[0]] = keyval[1] return proplist + # # get IP address of network interface # borrowed from http://code.activestate.com/recipes/439094-get-the-ip-address-associated-with-a-network-inter/ # def get_ip_address(ifname): s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) - return socket.inet_ntoa(fcntl.ioctl( - s.fileno(), - 0x8915, # SIOCGIFADDR - struct.pack('256s', ifname[:15]) - )[20:24]) + return socket.inet_ntoa( + fcntl.ioctl( + s.fileno(), 0x8915, struct.pack("256s", ifname[:15]) # SIOCGIFADDR + )[20:24] + ) + # # main program begins here @@ -249,344 +290,438 @@ def get_ip_address(ifname): # set up default environment properties = {} launcher = SSHLauncher() -if ( "SPARK_LAUNCHER" in os.environ.keys() ): - if ( os.environ["SPARK_LAUNCHER"] in ("exec","EXEC") ): +if "SPARK_LAUNCHER" in os.environ.keys(): + if os.environ["SPARK_LAUNCHER"] in ("exec", "EXEC"): launcher = ExecLauncher() - if ( os.environ["SPARK_LAUNCHER"] in ("pbsdsh","PBSDSH") ): + if os.environ["SPARK_LAUNCHER"] in ("pbsdsh", "PBSDSH"): launcher = PBSDSHLauncher() - if ( os.environ["SPARK_LAUNCHER"] in ("ssh","SSH") ): + if os.environ["SPARK_LAUNCHER"] in ("ssh", "SSH"): launcher = SSHLauncher() - if ( os.environ["SPARK_LAUNCHER"] in ("srun","SRUN") ): + if os.environ["SPARK_LAUNCHER"] in ("srun", "SRUN"): launcher = SRunLauncher() -elif ( "SLURM_JOB_ID" in os.environ ): +elif "SLURM_JOB_ID" in os.environ: launcher = SRunLauncher() -elif ( "PBS_JOBID" in os.environ ): +elif "PBS_JOBID" in os.environ: launcher = PBSDSHLauncher() -if ( not ( "SPARK_CONF_DIR" in os.environ.keys() ) ): - os.environ["SPARK_CONF_DIR"] = os.getcwd()+"/conf" -if ( not ( "SPARK_LOG_DIR" in os.environ.keys() ) ): +if not ("SPARK_CONF_DIR" in os.environ.keys()): + os.environ["SPARK_CONF_DIR"] = os.getcwd() + "/conf" +if not ("SPARK_LOG_DIR" in os.environ.keys()): os.environ["SPARK_LOG_DIR"] = os.getcwd() # manage scratch directories # **ASSUMPTION**: work directory is on a shared file system workdir = os.getcwd() -if ( "SCRATCHDIR" in os.environ.keys() ): - if ( "SLURM_JOB_ID" in os.environ ): - workdir = os.environ["SCRATCHDIR"]+"/spark-"+os.environ["SLURM_JOB_ID"] - elif ( "PBS_JOBID" in os.environ ): - workdir = os.environ["SCRATCHDIR"]+"/spark-"+os.environ["PBS_JOBID"] +if "SCRATCHDIR" in os.environ.keys(): + if "SLURM_JOB_ID" in os.environ: + workdir = os.environ["SCRATCHDIR"] + "/spark-" + os.environ["SLURM_JOB_ID"] + elif "PBS_JOBID" in os.environ: + workdir = os.environ["SCRATCHDIR"] + "/spark-" + os.environ["PBS_JOBID"] # SPARK_LOCAL_DIRS should be node-local -if ( ( "TMPDIR" in os.environ.keys() ) and - not ( "SPARK_LOCAL_DIRS" in os.environ.keys() ) ): +if ("TMPDIR" in os.environ.keys()) and not ("SPARK_LOCAL_DIRS" in os.environ.keys()): os.environ["SPARK_LOCAL_DIRS"] = os.environ["TMPDIR"] -elif ( not ( "SPARK_LOCAL_DIRS" in os.environ.keys() ) ): +elif not ("SPARK_LOCAL_DIRS" in os.environ.keys()): os.environ["SPARK_LOCAL_DIRS"] = "/tmp" # command line argument handling parser = argparse.ArgumentParser() initargs = parser.add_mutually_exclusive_group() -initargs.add_argument("--init", - help="Initialize Spark master/worker services (default).", - action="store_true", - dest="init_svcs", - default=True) -initargs.add_argument("--no-init", - help="Do not initialize Spark master/worker services.", - dest="init_svcs", - action="store_false") +initargs.add_argument( + "--init", + help="Initialize Spark master/worker services (default).", + action="store_true", + dest="init_svcs", + default=True, +) +initargs.add_argument( + "--no-init", + help="Do not initialize Spark master/worker services.", + dest="init_svcs", + action="store_false", +) launcherargs = parser.add_mutually_exclusive_group() -launcherargs.add_argument("--pbsdsh", - help="Use the pbsdsh process launcher.", - action="store_const", - dest="launcher", - const="pbsdsh", - default=None) -launcherargs.add_argument("--exec", - help="Use the exec process launcher.", - action="store_const", - dest="launcher", - const="exec") -launcherargs.add_argument("--srun", - help="Use the srun process launcher.", - action="store_const", - dest="launcher", - const="srun") -launcherargs.add_argument("--ssh", - help="Use the ssh process launcher.", - action="store_const", - dest="launcher", - const="ssh") +launcherargs.add_argument( + "--pbsdsh", + help="Use the pbsdsh process launcher.", + action="store_const", + dest="launcher", + const="pbsdsh", + default=None, +) +launcherargs.add_argument( + "--exec", + help="Use the exec process launcher.", + action="store_const", + dest="launcher", + const="exec", +) +launcherargs.add_argument( + "--srun", + help="Use the srun process launcher.", + action="store_const", + dest="launcher", + const="srun", +) +launcherargs.add_argument( + "--ssh", + help="Use the ssh process launcher.", + action="store_const", + dest="launcher", + const="ssh", +) msargs = parser.add_mutually_exclusive_group() -msargs.add_argument("--worker-on-mother-superior", - help="Run a worker on the mother superior node as well as the driver program (default).", - action="store_true", - dest="worker_on_mother_superior", - default=True) -msargs.add_argument("-N","--no-worker-on-mother-superior", - help="Do not run a worker on the mother superior node, only the driver program.", - action="store_false", - dest="worker_on_mother_superior") -parser.add_argument("-M","--master-interface", - help="Have Spark master listen on network interface rather than the default.", - metavar="IF", - dest="iface", - type=str, - default=None) -parser.add_argument("-C","--conf-dir", - help="Look in for Java properties files.", - dest="spark_conf_dir", - type=str, - default=None) -parser.add_argument("-L","--log-dir", - help="Place logs in .", - dest="spark_log_dir", - type=str, - default=None) -parser.add_argument("-l","--log4j-properties", - help="Read log4j properties from .", - dest="log4j_props", - type=str, - default=None) -parser.add_argument("-d","--work-dir", - help="Use as Spark program's working directory.", - metavar="workdir", - dest="workdir", - type=str, - default=workdir) -parser.add_argument("-W","--wpn","--workers-per-node", - help="Launch worker tasks per node instead of the default (1).", - metavar="N", - dest="wpn", - type=int, - default=1) -parser.add_argument("-w","--wc","--worker-cores", - help="Use cores per worker (default all available).", - metavar="N", - dest="worker_cores", - type=int, - default=None) -parser.add_argument("-m","--wm","--worker-memory", - help="Set per-worker memory limit.", - metavar="MEM", - dest="worker_mem", - type=str, - default=None) -parser.add_argument("-p","--pausetime", - help="Pause seconds between startup stages (default 5).", - metavar="N", - type=int, - default=5) -parser.add_argument("-D","--conf", - help="Set the Java property to .", - metavar="key=value", - dest="properties", - nargs='+', - action="append") -parser.add_argument("-P","--properties-file", - help="Read Java properties from .", - metavar="propfile", - dest="propfile", - type=str, - default=None) -parser.add_argument("--class", - help="Application's main class (for Java/Scala apps).", - dest="classname", - type=str, - default=None) -parser.add_argument("--name", - help="The name of your application.", - type=str, - default=None) -parser.add_argument("--jars", - help="Comma-separated list of local jars to include on the driver and executor classpaths.", - metavar="jarlist", - type=str, - default=None) -parser.add_argument("--packages", - help="Comma-separated list of maven coordinates of jars to include on the driver and executor classpaths. Will search the local maven repo, then maven central and any additional remote repositories given by --repositories. The format for the coordinates should be groupId:artifactId:version.", - metavar="pkglist", - dest="pkgs", - type=str, - default=None) -parser.add_argument("--exclude-packages", - help="Comma-separated list of groupId:artifactId to exclude while resolving the dependencies provided in --packages to avoid dependency conflicts.", - metavar="pkglist", - dest="excl_pkgs", - type=str, - default=None) -parser.add_argument("--repositories", - help="", - metavar="repolist", - dest="repos", - type=str, - default=None) -parser.add_argument("--py-files", - help="Comma-separated list of .zip, .egg, or .py files to place on PYTHONPATH for Python apps.", - metavar="filelist", - dest="pyfiles", - type=str, - default=None) -parser.add_argument("--files", - help="Comma-separated list of files to be placed in the working directory of each executor.", - type=str, - default=None) -parser.add_argument("--driver-memory", - help="Memory for driver (e.g. 1000M, 2G; default is 1024M).", - metavar="mem", - dest="driver_mem", - type=str, - default=None) -parser.add_argument("--driver-java-options", - help="Extra Java options to pass to the driver.", - metavar="opts", - dest="driver_java_opts", - type=str, - default=None) -parser.add_argument("--driver-library-path", - help="Extra library path entries to pass to the driver.", - metavar="libpath", - dest="driver_lib_path", - type=str, - default=None) -parser.add_argument("--driver-class-path", - help="Extra class path entries to pass to the driver. Note that jars added with --jars are automatically included in the classpath.", - metavar="classpath", - dest="driver_class_path", - type=str, - default=None) -parser.add_argument("--executor-cores", - help="# cores per executor (e.g. 1; default is all available).", - metavar="cores", - dest="exec_cores", - type=int, - default=None) -parser.add_argument("--executor-memory", - help="Memory per executor (e.g. 1000M, 2G; default is 1G).", - metavar="mem", - dest="exec_mem", - type=str, - default=None) -parser.add_argument("app", - nargs=argparse.REMAINDER, - help="Spark application and arguments") +msargs.add_argument( + "--worker-on-mother-superior", + help="Run a worker on the mother superior node as well as the driver program (default).", + action="store_true", + dest="worker_on_mother_superior", + default=True, +) +msargs.add_argument( + "-N", + "--no-worker-on-mother-superior", + help="Do not run a worker on the mother superior node, only the driver program.", + action="store_false", + dest="worker_on_mother_superior", +) +parser.add_argument( + "-M", + "--master-interface", + help="Have Spark master listen on network interface rather than the default.", + metavar="IF", + dest="iface", + type=str, + default=None, +) +parser.add_argument( + "-C", + "--conf-dir", + help="Look in for Java properties files.", + dest="spark_conf_dir", + type=str, + default=None, +) +parser.add_argument( + "-L", + "--log-dir", + help="Place logs in .", + dest="spark_log_dir", + type=str, + default=None, +) +parser.add_argument( + "-l", + "--log4j-properties", + help="Read log4j properties from .", + dest="log4j_props", + type=str, + default=None, +) +parser.add_argument( + "-d", + "--work-dir", + help="Use as Spark program's working directory.", + metavar="workdir", + dest="workdir", + type=str, + default=workdir, +) +parser.add_argument( + "-W", + "--wpn", + "--workers-per-node", + help="Launch worker tasks per node instead of the default (1).", + metavar="N", + dest="wpn", + type=int, + default=1, +) +parser.add_argument( + "-w", + "--wc", + "--worker-cores", + help="Use cores per worker (default all available).", + metavar="N", + dest="worker_cores", + type=int, + default=None, +) +parser.add_argument( + "-m", + "--wm", + "--worker-memory", + help="Set per-worker memory limit.", + metavar="MEM", + dest="worker_mem", + type=str, + default=None, +) +parser.add_argument( + "-p", + "--pausetime", + help="Pause seconds between startup stages (default 5).", + metavar="N", + type=int, + default=5, +) +parser.add_argument( + "-D", + "--conf", + help="Set the Java property to .", + metavar="key=value", + dest="properties", + nargs="+", + action="append", +) +parser.add_argument( + "-P", + "--properties-file", + help="Read Java properties from .", + metavar="propfile", + dest="propfile", + type=str, + default=None, +) +parser.add_argument( + "--class", + help="Application's main class (for Java/Scala apps).", + dest="classname", + type=str, + default=None, +) +parser.add_argument( + "--name", help="The name of your application.", type=str, default=None +) +parser.add_argument( + "--jars", + help="Comma-separated list of local jars to include on the driver and executor classpaths.", + metavar="jarlist", + type=str, + default=None, +) +parser.add_argument( + "--packages", + help="Comma-separated list of maven coordinates of jars to include on the driver and executor classpaths. Will search the local maven repo, then maven central and any additional remote repositories given by --repositories. The format for the coordinates should be groupId:artifactId:version.", + metavar="pkglist", + dest="pkgs", + type=str, + default=None, +) +parser.add_argument( + "--exclude-packages", + help="Comma-separated list of groupId:artifactId to exclude while resolving the dependencies provided in --packages to avoid dependency conflicts.", + metavar="pkglist", + dest="excl_pkgs", + type=str, + default=None, +) +parser.add_argument( + "--repositories", help="", metavar="repolist", dest="repos", type=str, default=None +) +parser.add_argument( + "--py-files", + help="Comma-separated list of .zip, .egg, or .py files to place on PYTHONPATH for Python apps.", + metavar="filelist", + dest="pyfiles", + type=str, + default=None, +) +parser.add_argument( + "--files", + help="Comma-separated list of files to be placed in the working directory of each executor.", + type=str, + default=None, +) +parser.add_argument( + "--driver-memory", + help="Memory for driver (e.g. 1000M, 2G; default is 1024M).", + metavar="mem", + dest="driver_mem", + type=str, + default=None, +) +parser.add_argument( + "--driver-java-options", + help="Extra Java options to pass to the driver.", + metavar="opts", + dest="driver_java_opts", + type=str, + default=None, +) +parser.add_argument( + "--driver-library-path", + help="Extra library path entries to pass to the driver.", + metavar="libpath", + dest="driver_lib_path", + type=str, + default=None, +) +parser.add_argument( + "--driver-class-path", + help="Extra class path entries to pass to the driver. Note that jars added with --jars are automatically included in the classpath.", + metavar="classpath", + dest="driver_class_path", + type=str, + default=None, +) +parser.add_argument( + "--executor-cores", + help="# cores per executor (e.g. 1; default is all available).", + metavar="cores", + dest="exec_cores", + type=int, + default=None, +) +parser.add_argument( + "--executor-memory", + help="Memory per executor (e.g. 1000M, 2G; default is 1G).", + metavar="mem", + dest="exec_mem", + type=str, + default=None, +) +parser.add_argument( + "app", nargs=argparse.REMAINDER, help="Spark application and arguments" +) args = parser.parse_args() # post arg parsing setup -if ( args.launcher is not None ): - if ( args.launcher=="exec" ): +if args.launcher is not None: + if args.launcher == "exec": launcher = ExecLauncher() - elif ( args.launcher=="pbsdsh" ): + elif args.launcher == "pbsdsh": launcher = PBSDSHLauncher() - elif ( args.launcher=="srun" ): + elif args.launcher == "srun": launcher = SRunLauncher() - elif ( args.launcher=="ssh" ): + elif args.launcher == "ssh": launcher = SSHLauncher() -if ( args.properties is not None ): +if args.properties is not None: for propertylist in args.properties: for property in propertylist: try: - (key,value) = property.split("=") + (key, value) = property.split("=") properties[key] = value - except: - sys.stdin.write("Ignoring malformed property \"%s\"\n" % property) + except Exception: + sys.stdin.write('Ignoring malformed property "%s"\n' % property) # environment sanity checks -if ( not "PBS_JOBID" in os.environ and not "SLURM_JOB_ID" in os.environ ): +if "PBS_JOBID" not in os.environ and "SLURM_JOB_ID" not in os.environ: raise EnvironmentError("Not in a PBS or Slurm job") -if ( not ( "SPARK_HOME" in os.environ.keys() ) ): +if not ("SPARK_HOME" in os.environ.keys()): raise EnvironmentError("SPARK_HOME not defined") # read any properties files in the conf directory -for propfile in glob.glob(os.environ["SPARK_CONF_DIR"]+"/*.properties"): - if ( os.path.exists(propfile) ): +for propfile in glob.glob(os.environ["SPARK_CONF_DIR"] + "/*.properties"): + if os.path.exists(propfile): props = propsFromFile(propfile) for key in props.keys(): - if ( not ( key in properties.keys() ) ): + if not (key in properties.keys()): properties[key] = props[key] # make sure the work dir actually exists -if ( args.workdir is not None and not os.path.exists(args.workdir) ): +if args.workdir is not None and not os.path.exists(args.workdir): os.mkdir(args.workdir) # **ASSUMPTION**: master runs on mother superior node -if ( args.iface is None ): +if args.iface is None: os.environ["SPARK_MASTER_IP"] = platform.node() os.environ["SPARK_MASTER_HOST"] = platform.node() else: os.environ["SPARK_MASTER_IP"] = get_ip_address(args.iface) os.environ["SPARK_MASTER_HOST"] = get_ip_address(args.iface) -if ( not ( "SPARK_MASTER_PORT" in os.environ.keys() ) ): +if not ("SPARK_MASTER_PORT" in os.environ.keys()): os.environ["SPARK_MASTER_PORT"] = "7077" -spark_master = "spark://"+os.environ["SPARK_MASTER_IP"]+":"+str(os.environ["SPARK_MASTER_PORT"]) -#sys.stderr.write("Spark master = "+spark_master+"\n") +spark_master = ( + "spark://" + + os.environ["SPARK_MASTER_IP"] + + ":" + + str(os.environ["SPARK_MASTER_PORT"]) +) +# sys.stderr.write("Spark master = "+spark_master+"\n") -if ( args.init_svcs ): +if args.init_svcs: # stick any properties in the appropriate environment variable - if ( len(properties)>0 ): - if ( "SPARK_DAEMON_JAVA_OPTS" in os.environ.keys() ): - os.environ["SPARK_DAEMON_JAVA_OPTS"] += " "+propsToCmdLine(properties) + if len(properties) > 0: + if "SPARK_DAEMON_JAVA_OPTS" in os.environ.keys(): + os.environ["SPARK_DAEMON_JAVA_OPTS"] += " " + propsToCmdLine(properties) else: os.environ["SPARK_DAEMON_JAVA_OPTS"] = propsToCmdLine(properties) # launch master on mother superior - cmdline = os.environ["SPARK_HOME"]+"/sbin/start-master.sh" - os.system(cmdline+" &") - sys.stderr.write(cmdline+"\n") - sys.stdout.write("SPARK_MASTER_HOST="+os.environ["SPARK_MASTER_HOST"]+"\n") - sys.stdout.write("SPARK_MASTER_PORT="+os.environ["SPARK_MASTER_PORT"]+"\n") + cmdline = os.environ["SPARK_HOME"] + "/sbin/start-master.sh" + os.system(cmdline + " &") + sys.stderr.write(cmdline + "\n") + sys.stdout.write("SPARK_MASTER_HOST=" + os.environ["SPARK_MASTER_HOST"] + "\n") + sys.stdout.write("SPARK_MASTER_PORT=" + os.environ["SPARK_MASTER_PORT"] + "\n") time.sleep(args.pausetime) # launch workers - cmdline = os.environ["SPARK_HOME"]+"/bin/spark-class org.apache.spark.deploy.worker.Worker" - if ( args.worker_cores is not None ): - cmdline += " --cores "+str(args.worker_cores) - if ( args.worker_mem is not None ): - cmdline += " --memory "+args.worker_mem - if ( args.workdir is not None ): - cmdline += " --work-dir "+args.workdir - cmdline += " "+spark_master - sys.stderr.write(cmdline+"\n") - launcher.launch(cmdline,os.environ,wpn=args.wpn, - worker_on_mother_superior=args.worker_on_mother_superior) + cmdline = ( + os.environ["SPARK_HOME"] + + "/bin/spark-class org.apache.spark.deploy.worker.Worker" + ) + if args.worker_cores is not None: + cmdline += " --cores " + str(args.worker_cores) + if args.worker_mem is not None: + cmdline += " --memory " + args.worker_mem + if args.workdir is not None: + cmdline += " --work-dir " + args.workdir + cmdline += " " + spark_master + sys.stderr.write(cmdline + "\n") + launcher.launch( + cmdline, + os.environ, + wpn=args.wpn, + worker_on_mother_superior=args.worker_on_mother_superior, + ) time.sleep(args.pausetime) # run the user's Spark "job", if one is given -if ( len(args.app)>0 ): - cmdline = os.environ["SPARK_HOME"]+"/bin/spark-submit --master "+spark_master - if ( args.classname is not None ): - cmdline += " --class "+args.classname - if ( args.name is not None ): - cmdline += " --name "+args.name - if ( args.jars is not None ): - cmdline += " --jars "+args.jars - if ( args.pkgs is not None ): - cmdline += " --packages "+args.pkgs - if ( args.excl_pkgs is not None ): - cmdline += " --exclude-packages "+args.excl_pkgs - if ( args.repos is not None ): - cmdline += " --repositories "+args.repos - if ( args.pyfiles is not None ): - cmdline += " --py-files "+args.pyfiles - if ( args.files is not None ): - cmdline += " --files "+args.files - if ( args.log4j_props is not None and args.driver_java_opts is None ): - cmdline += " --driver-java-options \"-Dlog4j.configuration=file:"+args.log4j_props+"\"" - elif ( args.log4j_props is None and args.driver_java_opts is not None ): - cmdline += " --driver-java-options \""+args.driver_java_opts+"\"" - elif ( args.log4j_props is not None and args.driver_java_opts is not None ): - cmdline += " --driver-java-options \"-Dlog4j.configuration=file:"+args.log4j_props+" "+args.driver_java_opts+"\"" - if ( args.driver_mem is not None ): - cmdline += " --driver-memory "+args.driver_mem - if ( args.driver_lib_path is not None ): - cmdline += " --driver-library-path "+args.driver_lib_path - if ( args.driver_class_path is not None ): - cmdline += " --driver-class-path "+args.driver_class_path - if ( args.exec_cores is not None ): - cmdline += " --executor-cores "+args.exec_cores - if ( args.exec_mem is not None ): - cmdline += " --executor-memory "+args.exec_mem +if len(args.app) > 0: + cmdline = os.environ["SPARK_HOME"] + "/bin/spark-submit --master " + spark_master + if args.classname is not None: + cmdline += " --class " + args.classname + if args.name is not None: + cmdline += " --name " + args.name + if args.jars is not None: + cmdline += " --jars " + args.jars + if args.pkgs is not None: + cmdline += " --packages " + args.pkgs + if args.excl_pkgs is not None: + cmdline += " --exclude-packages " + args.excl_pkgs + if args.repos is not None: + cmdline += " --repositories " + args.repos + if args.pyfiles is not None: + cmdline += " --py-files " + args.pyfiles + if args.files is not None: + cmdline += " --files " + args.files + if args.log4j_props is not None and args.driver_java_opts is None: + cmdline += ( + ' --driver-java-options "-Dlog4j.configuration=file:' + + args.log4j_props + + '"' + ) + elif args.log4j_props is None and args.driver_java_opts is not None: + cmdline += ' --driver-java-options "' + args.driver_java_opts + '"' + elif args.log4j_props is not None and args.driver_java_opts is not None: + cmdline += ( + ' --driver-java-options "-Dlog4j.configuration=file:' + + args.log4j_props + + " " + + args.driver_java_opts + + '"' + ) + if args.driver_mem is not None: + cmdline += " --driver-memory " + args.driver_mem + if args.driver_lib_path is not None: + cmdline += " --driver-library-path " + args.driver_lib_path + if args.driver_class_path is not None: + cmdline += " --driver-class-path " + args.driver_class_path + if args.exec_cores is not None: + cmdline += " --executor-cores " + args.exec_cores + if args.exec_mem is not None: + cmdline += " --executor-memory " + args.exec_mem for key in properties.keys(): - cmdline += " --conf \""+str(key)+"="+str(properties[key])+"\"" - cmdline += " "+" ".join(args.app) + cmdline += ' --conf "' + str(key) + "=" + str(properties[key]) + '"' + cmdline += " " + " ".join(args.app) os.system(cmdline) diff --git a/bin/supermover b/bin/supermover index ff48531..3899f8f 100755 --- a/bin/supermover +++ b/bin/supermover @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 # # supermover -- wrapper around a number of data movement tools # Copyright 2009, 2011, 2013 University of Tennessee @@ -73,139 +73,184 @@ def get_methods(file): global debug global proto_list methods = {} - if ( not os.path.exists(file) ): - sys.stderr.write("Config file "+file+" does not exist.\n") - elif ( not os.access(file,os.R_OK) ): - sys.stderr.write("Config file "+file+" is not readable.\n") + if not os.path.exists(file): + sys.stderr.write("Config file " + file + " does not exist.\n") + elif not os.access(file, os.R_OK): + sys.stderr.write("Config file " + file + " is not readable.\n") else: - fd = open(file,"r") + fd = open(file, "r") lines = fd.readlines() for line in lines: - token = line.split("=",1) - if ( token!=[] and len(token)==2 and not token[0].startswith("#") ): + token = line.split("=", 1) + if token != [] and len(token) == 2 and not token[0].startswith("#"): protopair = token[0].strip() method = token[1].strip() methods[protopair] = method protos = protopair.split(":") for proto in protos: - if ( proto not in proto_list ): + if proto not in proto_list: proto_list.append(proto) fd.close() return methods -def do_xfer(src,dest,methods,log): +def do_xfer(src, dest, methods, log): global debug global successes global failures global proto_list - src_url = urlparse(src,"file") + src_url = urlparse(src, "file") src_proto = src_url[0] - if ( src_proto=="" ): + if src_proto == "": src_proto = "file" - src_hostport = src_url[1].split(":",2) + src_hostport = src_url[1].split(":", 2) src_host = src_hostport[0] - if ( len(src_hostport)>1 ): + if len(src_hostport) > 1: src_port = src_hostport[1] else: src_port = None src_path = src_url[2] - if ( (src_proto in proto_list) and - src_path.startswith("//") and src_host=='' ): + if (src_proto in proto_list) and src_path.startswith("//") and src_host == "": # if we get here, urlparse has screwed the pooch on # parsing the URL, so try some heuristics to recover - new_hostpath = (src_path[2:]).split("/",2) - src_hostport = new_hostpath[0].split(":",2) + new_hostpath = (src_path[2:]).split("/", 2) + src_hostport = new_hostpath[0].split(":", 2) src_host = src_hostport[0] - if ( len(src_hostport)>1 ): + if len(src_hostport) > 1: src_port = src_hostport[1] else: src_port = None src_path = src_url[2].split(src_host)[1] dest_url = urlparse(dest) dest_proto = dest_url[0] - if ( dest_proto=="" ): + if dest_proto == "": dest_proto = "file" - dest_hostport = dest_url[1].split(":",2) + dest_hostport = dest_url[1].split(":", 2) dest_host = dest_hostport[0] - if ( len(dest_hostport)>1 ): + if len(dest_hostport) > 1: dest_port = dest_hostport[1] else: dest_port = None dest_path = dest_url[2] - if ( (dest_proto in proto_list) and - dest_path.startswith("//") and dest_host=='' ): + if (dest_proto in proto_list) and dest_path.startswith("//") and dest_host == "": # if we get here, urlparse has screwed the pooch on # parsing the URL, so try some heuristics to recover - new_hostpath = (dest_path[2:]).split("/",2) - dest_hostport = new_hostpath[0].split(":",2) + new_hostpath = (dest_path[2:]).split("/", 2) + dest_hostport = new_hostpath[0].split(":", 2) dest_host = dest_hostport[0] - if ( len(dest_hostport)>1 ): + if len(dest_hostport) > 1: dest_port = dest_hostport[1] else: dest_port = None dest_path = dest_url[2].split(dest_host)[1] - if ( src_proto.startswith("gsi") or dest_proto.startswith("gsi") ): + if src_proto.startswith("gsi") or dest_proto.startswith("gsi"): uid = os.getuid() if ( - # KRB5 - ( not os.path.exists("/tmp/krb5cc_"+str(os.getuid())) ) and - ( "KRB5CCNAME" in os.environ and not os.path.exists(os.environ["KRB5CCNAME"]) ) and - # KRB4 - ( not os.path.exists("/tmp/tkt"+str(os.getuid())) ) and - # Globus/x.509 - ( not os.path.exists("/tmp/x509up_u"+str(os.getuid())) ) and - ( not os.path.exists(os.path.expanduser("~")+"/.globus/x509up_u"+str(os.getuid())) ) and - ( "X509_USER_PROXY" in os.environ and not os.path.exists(os.environ["X509_USER_PROXY"]) ) - ): - failures = failures+1 - raise RuntimeException("GSI protocol requested but no GSI proxy credential found; please run myproxy-logon, grid-proxy-init, or kinit") - method = methods[src_proto+":"+dest_proto] - if ( method is not None ): + # KRB5 + (not os.path.exists("/tmp/krb5cc_" + str(uid))) + and ( + "KRB5CCNAME" in os.environ + and not os.path.exists(os.environ["KRB5CCNAME"]) + ) + and + # KRB4 + (not os.path.exists("/tmp/tkt" + str(uid))) + and + # Globus/x.509 + (not os.path.exists("/tmp/x509up_u" + str(uid))) + and ( + not os.path.exists( + os.path.expanduser("~") + "/.globus/x509up_u" + str(uid) + ) + ) + and ( + "X509_USER_PROXY" in os.environ + and not os.path.exists(os.environ["X509_USER_PROXY"]) + ) + ): + failures = failures + 1 + raise RuntimeError( + "GSI protocol requested but no GSI proxy credential found; please run myproxy-logon, grid-proxy-init, or kinit" + ) + method = methods[src_proto + ":" + dest_proto] + if method is not None: cmd = method - cmd = cmd.replace("%u",src) - cmd = cmd.replace("%h",src_host) - if ( src_port is not None ): - cmd = cmd.replace("%p",src_port) - cmd = cmd.replace("%f",src_path) - cmd = cmd.replace("%U",dest) - cmd = cmd.replace("%H",dest_host) - if ( dest_port is not None ): - cmd = cmd.replace("%P",dest_port) - cmd = cmd.replace("%F",dest_path) - log.write(str(datetime.datetime.now())+": initiating transfer of "+src+" to "+dest+"\n") - log.write(str(datetime.datetime.now())+": "+cmd+"\n") + cmd = cmd.replace("%u", src) + cmd = cmd.replace("%h", src_host) + if src_port is not None: + cmd = cmd.replace("%p", src_port) + cmd = cmd.replace("%f", src_path) + cmd = cmd.replace("%U", dest) + cmd = cmd.replace("%H", dest_host) + if dest_port is not None: + cmd = cmd.replace("%P", dest_port) + cmd = cmd.replace("%F", dest_path) + log.write( + str(datetime.datetime.now()) + + ": initiating transfer of " + + src + + " to " + + dest + + "\n" + ) + log.write(str(datetime.datetime.now()) + ": " + cmd + "\n") log.flush() - if ( debug is False ): - pipe = os.popen(cmd+" 2>1") + if debug is False: + pipe = os.popen(cmd + " 2>1") while True: line = pipe.readline() - if ( not line ): + if not line: break else: - log.write(str(datetime.datetime.now())+": "+line) + log.write(str(datetime.datetime.now()) + ": " + line) returncode = pipe.close() - if ( returncode is None or returncode==0 ): - log.write(str(datetime.datetime.now())+": completed transfer of "+src+" to "+dest+"\n") + if returncode is None or returncode == 0: + log.write( + str(datetime.datetime.now()) + + ": completed transfer of " + + src + + " to " + + dest + + "\n" + ) log.flush() successes += 1 else: - log.write(str(datetime.datetime.now())+": failed transfer of "+src+" to "+dest+", return code "+str(returncode)+"\n") + log.write( + str(datetime.datetime.now()) + + ": failed transfer of " + + src + + " to " + + dest + + ", return code " + + str(returncode) + + "\n" + ) log.flush() failures += 1 else: - raise RuntimeException("No method defined for protocol pair "+src_proto+":"+dest_proto) + raise RuntimeError( + "No method defined for protocol pair " + src_proto + ":" + dest_proto + ) def usage(methods): sys.stderr.write("Usage: supermover [args] srcURL destURL\n") sys.stderr.write(" or: supermover [args] -f urlPairList\n\n") sys.stderr.write("Arguments:\n") - sys.stderr.write(" -c cfgfile, --config=cfgfile\n\tRead config file \"cfgfile\" (default is "+cfgfile+")\n") + sys.stderr.write( + ' -c cfgfile, --config=cfgfile\n\tRead config file "cfgfile" (default is ' + + cfgfile + + ")\n" + ) sys.stderr.write(" -d, --debug\n\tEnable extra logging for debugging\n") - sys.stderr.write(" -f urlPairList, --file=urlPairList\n\tRead src/edest URL pairs from file urlPairList rather than\n\tcommand line arguments\n") + sys.stderr.write( + " -f urlPairList, --file=urlPairList\n\tRead src/edest URL pairs from file urlPairList rather than\n\tcommand line arguments\n" + ) sys.stderr.write(" -h, --help\n\tPrint this help message\n") - sys.stderr.write(" -l logfile, --log=logfile\n\tWrite log messages to file \"logfile\" (default is stdout)\n") + sys.stderr.write( + ' -l logfile, --log=logfile\n\tWrite log messages to file "logfile" (default is stdout)\n' + ) sys.stderr.write("\nSupported protocols:\n\t") sys.stderr.write("\n\t".join(proto_list)) sys.stderr.write("\n") @@ -216,79 +261,95 @@ def usage(methods): cfgfile = "/usr/local/etc/supermover.cfg" -if ( os.path.exists("/etc/supermover.cfg") ): +if os.path.exists("/etc/supermover.cfg"): cfgfile = "/etc/supermover.cfg" -if ( "SUPERMOVER_CFG" in os.environ ): +if "SUPERMOVER_CFG" in os.environ: cfgfile = os.environ["SUPERMOVER_CFG"] log = sys.stdout urllist = None show_usage = False try: - opts, args = getopt.getopt(sys.argv[1:], - "c:df:hl:", - ["config=s","debug","file=s","help","log=s"]) -except getopt.GetoptError, err: - sys.stderr.write(str(err)+"\n\n") + opts, args = getopt.getopt( + sys.argv[1:], "c:df:hl:", ["config=s", "debug", "file=s", "help", "log=s"] + ) +except getopt.GetoptError as err: + sys.stderr.write(str(err) + "\n\n") methods = get_methods(cfgfile) usage(methods) sys.exit(-1) - + if opts == [] and args == []: show_usage = True - -if ( not show_usage ): + +if not show_usage: for opt in opts: - if ( opt[0]=="-h" or opt[0]=="--help" ): + if opt[0] == "-h" or opt[0] == "--help": show_usage = True - if ( opt[0]=="-d" or opt[0]=="--debug" ): + if opt[0] == "-d" or opt[0] == "--debug": debug = True - if ( opt[0]=="-c" or opt[0]=="--config" ): + if opt[0] == "-c" or opt[0] == "--config": cfgfile = opt[1] - if ( opt[0]=="-f" or opt[0]=="--file" ): + if opt[0] == "-f" or opt[0] == "--file": urllist = opt[1] - if ( opt[0]=="-l" or opt[0]=="--log" ): + if opt[0] == "-l" or opt[0] == "--log": logfile = opt[1] try: - log = open(logfile,'w') - except IOError, (errno, strerror): - sys.stderr.write("Can't open "+logfile+" for writing: "+strerror+" (errno="+str(errno)+")\n") - pass + log = open(logfile, "w") + except IOError as e: + errno, strerror = e.args + sys.stderr.write( + "Can't open " + + logfile + + " for writing: " + + strerror + + " (errno=" + + str(errno) + + ")\n" + ) + pass methods = get_methods(cfgfile) -if ( debug is True ): +if debug is True: for method in methods.keys(): - log.write("method["+method+"] = "+methods[method]+"\n") -if ( show_usage ): + log.write("method[" + method + "] = " + methods[method] + "\n") +if show_usage: usage(methods) sys.exit(0) log.flush() -if ( urllist is not None ): - if ( not os.path.exists(urllist) ): - raise IOError("URL list file "+urllist+" does not exist") - elif ( not os.path.isfile(urllist) ): - raise IOError("URL list \"file\" "+urllist+" is not actually a file") +if urllist is not None: + if not os.path.exists(urllist): + raise IOError("URL list file " + urllist + " does not exist") + elif not os.path.isfile(urllist): + raise IOError('URL list "file" ' + urllist + " is not actually a file") else: - fd = open(urllist,"r") + fd = open(urllist, "r") lines = fd.readlines() for line in lines: token = line.split() - if ( token!=[] and len(token)==2 and not token[0].startswith("#") ): + if token != [] and len(token) == 2 and not token[0].startswith("#"): src = token[0] dest = token[1] - do_xfer(src,dest,methods,log) + do_xfer(src, dest, methods, log) fd.close() else: - if ( len(args)!=2 ): + if len(args) != 2: usage() else: src = args[0] dest = args[1] - do_xfer(src,dest,methods,log) + do_xfer(src, dest, methods, log) -log.write(str(datetime.datetime.now())+": "+str(successes)+" successes, "+str(failures)+" failures\n") -if ( failures>0 ): +log.write( + str(datetime.datetime.now()) + + ": " + + str(successes) + + " successes, " + + str(failures) + + " failures\n" +) +if failures > 0: sys.exit(-failures) else: sys.exit(0) diff --git a/etc/create-tables.mysql b/etc/create-tables.mysql index 725a716..16d9fa7 100644 --- a/etc/create-tables.mysql +++ b/etc/create-tables.mysql @@ -57,7 +57,7 @@ CREATE INDEX submit_ts_jobs ON Jobs (submit_ts); CREATE INDEX submit_jobs ON Jobs (submit_date); CREATE INDEX eligible_ts_jobs ON Jobs (eligible_ts); CREATE INDEX eligible_jobs ON Jobs (eligible_date); -CREATE INDEX start_ts_jobs ON Jobs (start_ts);C +CREATE INDEX start_ts_jobs ON Jobs (start_ts); CREATE INDEX start_jobs ON Jobs (start_date); CREATE INDEX end_ts_jobs ON Jobs (end_ts); CREATE INDEX end_jobs ON Jobs (end_date); diff --git a/pbstools.spec b/pbstools.spec index 19d1991..2d4c949 100644 --- a/pbstools.spec +++ b/pbstools.spec @@ -23,7 +23,7 @@ OpenPBS, PBS Pro, and TORQUE). %package ja Summary: PBStools Job Accounting Group: System Environment/Base -Requires: python +Requires: python3 %description ja ja provides job accounting within a PBS job, similar to the command of the same name in NQE. @@ -48,7 +48,7 @@ qexec is a PBS workalike for the SGE qlogin, qrsh, and qsh commands. %package -n supermover Summary: Supermover Group: System Environment/Base -Requires: python +Requires: python3 %description -n supermover supermover is a highly configurable wrapper around other data transfer utilities such as scp, globus-url-copy, and hsi. @@ -57,7 +57,7 @@ utilities such as scp, globus-url-copy, and hsi. %package dmsub Summary: PBStools Data Movement Job Submission Group: System Environment/Base -Requires: python,supermover +Requires: python3,supermover %description dmsub dmsub is a tool for submitting data movement jobs. It understands data transfer descriptions in the formats of DMOVER, RFT, and Stork; it can @@ -67,7 +67,7 @@ also use several different data movement tools, including supermover. %package dagsub Summary: PBStools Directed Acyclic Graph Job Submission Group: System Environment/Base -Requires: python,dmsub +Requires: python3,dmsub %description dagsub dagsub is a workalike for condor_submit_dag. This allows the submission of large, complex sets of dependent jobs using a relatively @@ -77,7 +77,7 @@ simple syntax. It relies on dmsub for data movement. %package job-vm-launch Summary: PBStools VM Launcher Group: System Environment/Base -Requires: python,libvirt-python +Requires: python3,libvirt-python %description job-vm-launch job-vm-launch launches a virtual machine instance within a TORQUE job using KVM. It should be installed on compute nodes running pbs_mom @@ -86,14 +86,14 @@ and libvirtd/qemu-kvm. %package jobarray-to-pcp Summary: PBStools Job Array to PCP Group: System Environment/Base -Requires: python +Requires: python3 %description jobarray-to-pcp Run the equivalent of a TORQUE job array using parallel-command-processor. %package pbs-spark-submit Summary: PBStools Spark Launcher Group: System Environment/Base -Requires: python +Requires: python3 %description pbs-spark-submit pbs-spark-submit launches an Apache Spark program within a TORQUE job, including starting the Spark master and worker processes in standalone @@ -112,8 +112,8 @@ which have not been allocated jobs on that host. %package -n pbsacct-python Summary: pbsacct Python library Group: System Environment/Base -Requires: python -BuildRequires: python-setuptools +Requires: python3 +BuildRequires: python3-setuptools %description -n pbsacct-python pbsacct.py is a module for parsing and processing PBS accounting log files. @@ -121,7 +121,7 @@ pbsacct.py is a module for parsing and processing PBS accounting log files. %package -n pbsacct-collector Summary: pbsacct Data Collector Group: System Environment/Base -Requires: perl,perl-DBD-MySQL,python,pbsacct-python +Requires: perl,perl-DBD-MySQL,python3,python3-mysqlclient,pbsacct-python %description -n pbsacct-collector pbsacct-collector is the data collection core of the pbsacct workload analysis system. It should be installed on the same host as a @@ -157,7 +157,11 @@ External PHP ods library for pbsacct-php %package -n pbsacct-db Summary: pbsacct Database Backend Group: System Environment/Base +%if 0%{?rhel}>=7 +Requires: mariadb-server +%else Requires: mysql-server +%endif %description -n pbsacct-db pbsacct-db is the database backend for the pbsacct workload analysis system. It should be installed on a database server running MySQL. @@ -240,10 +244,9 @@ make PREFIX=%{buildroot}/%{_prefix} WEBPREFIX=%{buildroot}/var/www/html/pbsacct %doc %{_mandir}/man8/reaver.8.gz %files -n pbsacct-python -%{python_sitelib}/pbsacct-* -%{python_sitelib}/pbsacct/__init__.py -%{python_sitelib}/pbsacct/__init__.pyc -%{python_sitelib}/pbsacct/__init__.pyo +%{python3_sitelib}/pbsacct-* +%{python3_sitelib}/pbsacct/__init__.py +%{python3_sitelib}/pbsacct/__pycache__/ %{_sbindir}/transform-accounting-log %files -n pbsacct-collector diff --git a/sbin/job-db-update b/sbin/job-db-update index 63f9623..0fed227 100755 --- a/sbin/job-db-update +++ b/sbin/job-db-update @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 # # job-db-update -- Insert job records into a MySQL DB # Python rewrite @@ -24,9 +24,9 @@ import getopt import logging import os import pbsacct -import re import sys + def usage(): sys.stderr.write("Usage:\tjob-db-update\t[-h system] [-s dbhost] [-D dbtype]\n") sys.stderr.write("\t\t\t[-d dbname] [-u dbuser] [-p dbpasswd] [-q]\n") @@ -52,70 +52,68 @@ noop = False syslog = False append_to_jobid_in_db = None -db = pbsacct.pbsacctDB(host=dbhost,db=dbname,dbuser=dbuser,dbpasswd=dbpasswd) -if ( not os.environ.has_key("PBSTOOLS_DIR") ): +db = pbsacct.pbsacctDB(host=dbhost, db=dbname, dbuser=dbuser, dbpasswd=dbpasswd) +if "PBSTOOLS_DIR" not in os.environ: os.environ["PBSTOOLS_DIR"] = "/usr/local" -cfgfile = os.environ["PBSTOOLS_DIR"]+"/etc/pbsacctdb.cfg" -if ( os.path.exists(cfgfile) ): +cfgfile = os.environ["PBSTOOLS_DIR"] + "/etc/pbsacctdb.cfg" +if os.path.exists(cfgfile): db.readConfigFile(cfgfile) # command line processing try: - opts, filelist = getopt.getopt(sys.argv[1:], - "C:D:LTa:c:d:h:lp:qs:t:u:w:", - []) -except getopt.GetoptError, err: - sys.stderr.write(str(err)+"\n") + opts, filelist = getopt.getopt(sys.argv[1:], "C:D:LTa:c:d:h:lp:qs:t:u:w:", []) +except getopt.GetoptError as err: + sys.stderr.write(str(err) + "\n") usage() for opt in opts: - if ( opt[0]=="-C" ): + if opt[0] == "-C": db.setConfigTable(opt[1]) - elif ( opt[0]=="-D" ): + elif opt[0] == "-D": db.setType(opt[1]) - elif ( opt[0]=="-L" ): + elif opt[0] == "-L": syslog = True - elif ( opt[0]=="-T" ): + elif opt[0] == "-T": noop = True - elif ( opt[0]=="-a" ): + elif opt[0] == "-a": append_to_jobid_in_db = opt[1] - elif ( opt[0]=="-c" ): + elif opt[0] == "-c": db.readConfigFile(opt[1]) - elif ( opt[0]=="-d" ): + elif opt[0] == "-d": db.setName(opt[1]) - elif ( opt[0]=="-h" ): + elif opt[0] == "-h": db.setSystem(opt[1]) - elif ( opt[0]=="-l" ): + elif opt[0] == "-l": syslog = False - elif ( opt[0]=="-p" ): + elif opt[0] == "-p": db.setPassword(opt[1]) - elif ( opt[0]=="-q" ): + elif opt[0] == "-q": query = True - elif ( opt[0]=="-s" ): + elif opt[0] == "-s": db.setServerName(opt[1]) - elif ( opt[0]=="-t" ): + elif opt[0] == "-t": db.setJobsTable(opt[1]) - elif ( opt[0]=="-u" ): + elif opt[0] == "-u": db.setUser(opt[1]) - elif ( opt[0]=="-w" ): + elif opt[0] == "-w": db.setSoftwareTable(opt[1]) -if ( system is None and db.getSystem() is not None ): +if system is None and db.getSystem() is not None: system = db.getSystem() # configure logging try: logger = pbsacct.getLogger() -except: +except Exception: logger = logging.getLogger(sys.argv[0]) -if ( noop ): +if noop: logger.setLevel(logging.INFO) else: logger.setLevel(logging.WARNING) lh = logging.StreamHandler(sys.stderr) -if ( syslog ): - lh = logging.handlers.SysLogHandler(address='/dev/log') -lh.setFormatter(logging.Formatter('[%(asctime)s] %(levelname)s: %(message)s')) +if syslog: + lh = logging.handlers.SysLogHandler(address="/dev/log") +lh.setFormatter(logging.Formatter("[%(asctime)s] %(levelname)s: %(message)s")) logger.addHandler(lh) # connect to DB @@ -123,63 +121,170 @@ dbconn = db.connect() cursor = dbconn.cursor() # ingest job data from accounting logs -if ( len(filelist)>0 ): - jobs = pbsacct.jobs_from_files(filelist,system) - - for jobid in sorted(jobs,key=lambda jobid:jobs[jobid].end_ts): +if len(filelist) > 0: + jobs = pbsacct.jobs_from_files(filelist, system) + + for jobid in sorted(jobs, key=lambda jobid: jobs[jobid].end_ts()): # in some versions of TORQUE, jobs that are deleted before they start # will be reported as in the E state with an end time (when it was # qdel'ed) but no start time, so just checking for the E state is not # enough to avoid false positives here - if ( jobs[jobid].get_state() in ["S","E","A"] and - jobs[jobid].start_ts()>0 ): - try: - if ( query ): - dbjob = None - if ( db.job_exists(jobid,append_to_jobid=append_to_jobid_in_db) ): - dbjob = db.get_job(jobid,append_to_jobid=append_to_jobid_in_db) - if ( dbjob is None ): - logger.info("Jobid %s does not exist in DB" % jobid) - elif ( dbjob!=jobs[jobid] ): - if ( jobid!=dbjob.jobid() ): - logger.info("%s %s: dbvalue=%s, logvalue=%s" % (jobid, "jobid", str(dbjob.jobid(), str(jobid)))) - if ( jobs[jobid].user()!=dbjob.user() ): - logger.info("%s %s: dbvalue=%s, logvalue=%s" % (jobid, "user", str(dbjob.user()), str(jobs[jobid].user()))) - if ( jobs[jobid].group()!=dbjob.group() ): - logger.info("%s %s: dbvalue=%s, logvalue=%s" % (jobid, "group", str(dbjob.group()), str(jobs[jobid].group()))) - if ( jobs[jobid].account()!=dbjob.account() ): - logger.info("%s %s: dbvalue=%s, logvalue=%s" % (jobid, "account", str(dbjob.account()), str(jobs[jobid].account()))) - if ( jobs[jobid].queue()!=dbjob.queue() ): - logger.info("%s %s: dbvalue=%s, logvalue=%s" % (jobid, "queue", str(dbjob.queue()), str(jobs[jobid].queue()))) - for column in list( set(jobs[jobid].get_resource_keys()) | set(dbjob.get_resource_keys()) ): - dbvalue = dbjob.get_resource(column) - logvalue = jobs[jobid].get_resource(column) - ignorecolumns = ["qtime", - "Resource_List.pmem", - "resources_used.energy_used", - "session", - "total_execution_slots", - "unique_node_count"] - if ( column not in ignorecolumns ): - if ( str(dbvalue)!=str(logvalue) ): - if ( column in ["ctime","etime","start","end"] and ( dbvalue is not None and dbvalue!=0 ) and ( logvalue is not None and logvalue!=0 ) ): - logger.info("%s %s: dbvalue=%s, logvalue=%s, difference=%d" % (jobid, column, str(dbvalue), str(logvalue), int(dbvalue)-int(logvalue))) - # resources_used.cput and resources_used.walltime are weird because sometimes - # they're reported in hh:mm:ss format and sometimes they're just in seconds, - # depending on which version of TORQUE you're using. - elif ( column=="resources_used.cput" and dbvalue is not None and logvalue is not None and pbsacct.time_to_sec(dbvalue)==int(logvalue) ): - next - elif ( column=="resources_used.cput" and dbvalue is not None and logvalue is not None and pbsacct.time_to_sec(dbvalue)!=int(logvalue) ): - logger.info("%s %s: dbvalue=%s, logvalue=%s" % (jobid, column, str(dbvalue), pbsacct.sec_to_time(int(logvalue)))) - elif ( column=="resources_used.walltime" and dbvalue is not None and logvalue is not None and pbsacct.time_to_sec(dbvalue)==int(logvalue) ): - next - elif ( column=="resources_used.walltime" and dbvalue is not None and logvalue is not None and pbsacct.time_to_sec(dbvalue)!=int(logvalue) ): - logger.info("%s %s: dbvalue=%s, logvalue=%s" % (jobid, column, str(dbvalue), pbsacct.sec_to_time(int(logvalue)))) - elif ( ( dbvalue is not None and dbvalue!=0 ) and ( logvalue is not None and logvalue!=0 ) ): - logger.info("%s %s: dbvalue=%s, logvalue=%s" % (jobid, column, str(dbvalue), str(logvalue))) - else: - db.insert_or_update_job(jobs[jobid],system=system,noop=noop,append_to_jobid=append_to_jobid_in_db) - except: - pass + if jobs[jobid].get_state() in ["S", "E", "A"] and jobs[jobid].start_ts() > 0: + try: + if query: + dbjob = None + if db.job_exists(jobid, append_to_jobid=append_to_jobid_in_db): + dbjob = db.get_job(jobid, append_to_jobid=append_to_jobid_in_db) + if dbjob is None: + logger.info("Jobid %s does not exist in DB" % jobid) + elif dbjob != jobs[jobid]: + if jobid != dbjob.jobid(): + logger.info( + "%s %s: dbvalue=%s, logvalue=%s" + % (jobid, "jobid", str(dbjob.jobid()), str(jobid)) + ) + if jobs[jobid].user() != dbjob.user(): + logger.info( + "%s %s: dbvalue=%s, logvalue=%s" + % ( + jobid, + "user", + str(dbjob.user()), + str(jobs[jobid].user()), + ) + ) + if jobs[jobid].group() != dbjob.group(): + logger.info( + "%s %s: dbvalue=%s, logvalue=%s" + % ( + jobid, + "group", + str(dbjob.group()), + str(jobs[jobid].group()), + ) + ) + if jobs[jobid].account() != dbjob.account(): + logger.info( + "%s %s: dbvalue=%s, logvalue=%s" + % ( + jobid, + "account", + str(dbjob.account()), + str(jobs[jobid].account()), + ) + ) + if jobs[jobid].queue() != dbjob.queue(): + logger.info( + "%s %s: dbvalue=%s, logvalue=%s" + % ( + jobid, + "queue", + str(dbjob.queue()), + str(jobs[jobid].queue()), + ) + ) + for column in list( + set(jobs[jobid].get_resource_keys()) + | set(dbjob.get_resource_keys()) + ): + dbvalue = dbjob.get_resource(column) + logvalue = jobs[jobid].get_resource(column) + ignorecolumns = [ + "qtime", + "Resource_List.pmem", + "resources_used.energy_used", + "session", + "total_execution_slots", + "unique_node_count", + ] + if column not in ignorecolumns: + if str(dbvalue) != str(logvalue): + if ( + column in ["ctime", "etime", "start", "end"] + and (dbvalue is not None and dbvalue != 0) + and (logvalue is not None and logvalue != 0) + ): + logger.info( + "%s %s: dbvalue=%s, logvalue=%s, difference=%d" + % ( + jobid, + column, + str(dbvalue), + str(logvalue), + int(dbvalue) - int(logvalue), + ) + ) + # resources_used.cput and resources_used.walltime are weird because sometimes + # they're reported in hh:mm:ss format and sometimes they're just in seconds, + # depending on which version of TORQUE you're using. + elif ( + column == "resources_used.cput" + and dbvalue is not None + and logvalue is not None + and pbsacct.time_to_sec(dbvalue) + == int(logvalue) + ): + next + elif ( + column == "resources_used.cput" + and dbvalue is not None + and logvalue is not None + and pbsacct.time_to_sec(dbvalue) + != int(logvalue) + ): + logger.info( + "%s %s: dbvalue=%s, logvalue=%s" + % ( + jobid, + column, + str(dbvalue), + pbsacct.sec_to_time(int(logvalue)), + ) + ) + elif ( + column == "resources_used.walltime" + and dbvalue is not None + and logvalue is not None + and pbsacct.time_to_sec(dbvalue) + == int(logvalue) + ): + next + elif ( + column == "resources_used.walltime" + and dbvalue is not None + and logvalue is not None + and pbsacct.time_to_sec(dbvalue) + != int(logvalue) + ): + logger.info( + "%s %s: dbvalue=%s, logvalue=%s" + % ( + jobid, + column, + str(dbvalue), + pbsacct.sec_to_time(int(logvalue)), + ) + ) + elif (dbvalue is not None and dbvalue != 0) and ( + logvalue is not None and logvalue != 0 + ): + logger.info( + "%s %s: dbvalue=%s, logvalue=%s" + % ( + jobid, + column, + str(dbvalue), + str(logvalue), + ) + ) + else: + db.insert_or_update_job( + jobs[jobid], + system=system, + noop=noop, + append_to_jobid=append_to_jobid_in_db, + ) + except Exception: + pass dbconn.close() diff --git a/sbin/jobscript-to-db b/sbin/jobscript-to-db index 6206c5e..e78dfba 100755 --- a/sbin/jobscript-to-db +++ b/sbin/jobscript-to-db @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # jobscript-to-db -- copy newly spooled job scripts into database # Python rewrite @@ -16,9 +16,9 @@ import getopt import logging import os import pbsacct -import re import sys + def usage(): sys.stderr.write("Usage:\tjobscript-to-db\t[-h system] [-s dbhost] [-D dbtype]\n") sys.stderr.write("\t\t\t[-d dbname] [-u dbuser] [-p dbpasswd]\n") @@ -42,52 +42,50 @@ noop = False append_to_jobid_in_db = None syslog = False -db = pbsacct.pbsacctDB(host=dbhost,db=dbname,dbuser=dbuser,dbpasswd=dbpasswd) -if ( not os.environ.has_key("PBSTOOLS_DIR") ): +db = pbsacct.pbsacctDB(host=dbhost, db=dbname, dbuser=dbuser, dbpasswd=dbpasswd) +if "PBSTOOLS_DIR" not in os.environ: os.environ["PBSTOOLS_DIR"] = "/usr/local" -cfgfile = os.environ["PBSTOOLS_DIR"]+"/etc/pbsacctdb.cfg" -if ( os.path.exists(cfgfile) ): +cfgfile = os.environ["PBSTOOLS_DIR"] + "/etc/pbsacctdb.cfg" +if os.path.exists(cfgfile): db.readConfigFile(cfgfile) # command line processing try: - opts, filelist = getopt.getopt(sys.argv[1:], - "C:D:LTa:c:d:h:lp:s:t:u:w:", - []) -except getopt.GetoptError, err: - sys.stderr.write(str(err)+"\n") + opts, filelist = getopt.getopt(sys.argv[1:], "C:D:LTa:c:d:h:lp:s:t:u:w:", []) +except getopt.GetoptError as err: + sys.stderr.write(str(err) + "\n") usage() for opt in opts: - if ( opt[0]=="-C" ): + if opt[0] == "-C": db.setConfigTable(opt[1]) - elif ( opt[0]=="-D" ): + elif opt[0] == "-D": db.setType(opt[1]) - elif ( opt[0]=="-L" ): + elif opt[0] == "-L": syslog = True - elif ( opt[0]=="-T" ): + elif opt[0] == "-T": noop = True - elif ( opt[0]=="-a" ): + elif opt[0] == "-a": append_to_jobid_in_db = opt[1] - elif ( opt[0]=="-c" ): + elif opt[0] == "-c": db.readConfigFile(opt[1]) - elif ( opt[0]=="-d" ): + elif opt[0] == "-d": db.setName(opt[1]) - elif ( opt[0]=="-h" ): + elif opt[0] == "-h": db.setSystem(opt[1]) - elif ( opt[0]=="-l" ): + elif opt[0] == "-l": syslog = False - elif ( opt[0]=="-p" ): + elif opt[0] == "-p": db.setPassword(opt[1]) - elif ( opt[0]=="-s" ): + elif opt[0] == "-s": db.setServerName(opt[1]) - elif ( opt[0]=="-t" ): + elif opt[0] == "-t": db.setJobsTable(opt[1]) - elif ( opt[0]=="-u" ): + elif opt[0] == "-u": db.setUser(opt[1]) - elif ( opt[0]=="-w" ): + elif opt[0] == "-w": db.setSoftwareTable(opt[1]) -if ( system is None and db.getSystem() is not None ): +if system is None and db.getSystem() is not None: system = db.getSystem() else: system = "%" @@ -95,14 +93,14 @@ else: # configure logging logger = pbsacct.getLogger() -if ( noop ): +if noop: logger.setLevel(logging.INFO) else: logger.setLevel(logging.WARNING) lh = logging.StreamHandler(sys.stderr) -if ( syslog ): - lh = logging.handlers.SysLogHandler(address='/dev/log') -lh.setFormatter(logging.Formatter('[%(asctime)s] %(levelname)s: %(message)s')) +if syslog: + lh = logging.handlers.SysLogHandler(address="/dev/log") +lh.setFormatter(logging.Formatter("[%(asctime)s] %(levelname)s: %(message)s")) logger.addHandler(lh) # connect to DB @@ -110,24 +108,31 @@ dbconn = db.connect() cursor = dbconn.cursor() for scriptfile in filelist: - if ( os.path.exists(scriptfile) and os.access(scriptfile,os.R_OK) ): + if os.path.exists(scriptfile) and os.access(scriptfile, os.R_OK): jobid = os.path.basename(scriptfile).rstrip(".SC") - if ( append_to_jobid_in_db is not None ): - jobid = jobid+append_to_jobid_in_db + if append_to_jobid_in_db is not None: + jobid = jobid + append_to_jobid_in_db scriptfp = open(scriptfile) script = "".join(scriptfp.readlines()) scriptfp.close() - if ( db.job_exists(jobid) ): - sql = "UPDATE "+db.getJobsTable()+" SET script=%s WHERE system LIKE %s AND jobid=%s;" + if db.job_exists(jobid): + sql = ( + "UPDATE " + + db.getJobsTable() + + " SET script=%s WHERE system LIKE %s AND jobid=%s;" + ) else: - sql = "INSERT INTO "+db.getJobsTable()+" (script,system,jobid) VALUES (%s,%s,%s);" - if ( noop ): - logger.info("%s (%s,%s,%s)" % (sql,script,system,jobid)) + sql = ( + "INSERT INTO " + + db.getJobsTable() + + " (script,system,jobid) VALUES (%s,%s,%s);" + ) + if noop: + logger.info("%s (%s,%s,%s)" % (sql, script, system, jobid)) else: try: - cursor.execute(sql,(script,system,jobid)) + cursor.execute(sql, (script, system, jobid)) dbconn.commit() except Exception as e: logger.debug("%s" % sql) logger.error(str(e)) - diff --git a/sbin/pbsacct-dump b/sbin/pbsacct-dump index ecd88ec..c7f956f 100755 --- a/sbin/pbsacct-dump +++ b/sbin/pbsacct-dump @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 # # pbsacct-dump -- Dump records from pbsacct DB into CSV format # Copyright 2016 Ohio Supercomputer Center @@ -16,10 +16,24 @@ # $Date$ import getopt -import MySQLdb -import os import sys +try: + import MySQLdb +except ImportError: + try: + import pymysql + + pymysql.install_as_MySQLdb() + import MySQLdb + except ImportError: + print( + "Error: MySQL Python library not found. Install python3-mysqlclient or PyMySQL", + file=sys.stderr, + ) + sys.exit(1) + + def usage(): sys.stderr.write("Usage: pbsacct-dump [-s dbserver] [-d dbname] [-t dbtable ]\n") sys.stderr.write(" [-u dbuser] [-p dbpasswd]\n") @@ -28,6 +42,7 @@ def usage(): sys.stderr.write("Default system is '%' (i.e. all).\n") sys.exit(1) + # # Main program starts here # @@ -42,67 +57,65 @@ end_date = None # command line processing try: - opts, args = getopt.getopt(sys.argv[1:], - "E:S:d:h:p:s:t:u:", - []) -except getopt.GetoptError, err: - sys.stderr.write(str(err)+"\n") + opts, args = getopt.getopt(sys.argv[1:], "E:S:d:h:p:s:t:u:", []) +except getopt.GetoptError as err: + sys.stderr.write(str(err) + "\n") usage() for opt in opts: - if ( opt[0]=="-E" ): + if opt[0] == "-E": end_date = opt[1] - elif ( opt[0]=="-S" ): + elif opt[0] == "-S": start_date = opt[1] - elif ( opt[0]=="-d" ): + elif opt[0] == "-d": dbname = opt[1] - elif ( opt[0]=="-h" ): + elif opt[0] == "-h": hostname = opt[1] - elif ( opt[0]=="-p" ): + elif opt[0] == "-p": dbpasswd = opt[1] - elif ( opt[0]=="-s" ): + elif opt[0] == "-s": dbhost = opt[1] - elif ( opt[0]=="-t" ): + elif opt[0] == "-t": dbtable = opt[1] - elif ( opt[0]=="-u" ): + elif opt[0] == "-u": dbuser = opt[1] # connect to DB -db = MySQLdb.connect(host=dbhost,db=dbname,user=dbuser,passwd=dbpasswd) +db = MySQLdb.connect(host=dbhost, db=dbname, user=dbuser, passwd=dbpasswd) cursor = db.cursor() # dump job records -sql = "SELECT * FROM Jobs WHERE system LIKE '"+hostname+"'" -if ( start_date is not None ): - sql += " AND submit_date>='"+start_date+"'" -if ( end_date is not None ): - sql += " AND submit_date<='"+end_date+"'" -#sys.stderr.write(sql+"\n") +sql = "SELECT * FROM Jobs WHERE system LIKE '" + hostname + "'" +if start_date is not None: + sql += " AND submit_date>='" + start_date + "'" +if end_date is not None: + sql += " AND submit_date<='" + end_date + "'" +# sys.stderr.write(sql+"\n") records = 0 try: cursor.execute(sql) results = cursor.fetchone() - while ( results!=None ): - records = records+1 + while results is not None: + records = records + 1 output = "" # because of the mixed types, you can't just do ",".join(results) here for result in results: - if ( isinstance(result,str) ): + if isinstance(result, str): # transliterate '\n' into '\\n' - result = "'"+result.replace("\n","\\n")+"'" - elif ( result is None ): + result = "'" + result.replace("\n", "\\n") + "'" + elif result is None: result = "NULL" - if ( output=="" ): + if output == "": output = str(result) else: - output = output+","+str(result) - sys.stdout.write(output+"\n") - results=cursor.fetchone() + output = output + "," + str(result) + sys.stdout.write(output + "\n") + results = cursor.fetchone() except KeyboardInterrupt: raise except Exception as e: - sys.stderr.write("ERROR: unable to fetch results -- "+(str(e))+"\n") + sys.stderr.write("ERROR: unable to fetch results -- " + (str(e)) + "\n") else: - sys.stderr.write("Read "+str(records)+" records\n") + sys.stderr.write("Read " + str(records) + " records\n") # close DB connection db.close() diff --git a/sbin/spool-jobscripts b/sbin/spool-jobscripts index be48626..f5a4a9d 100755 --- a/sbin/spool-jobscripts +++ b/sbin/spool-jobscripts @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 # # spool-jobscripts -- copy newly created job scripts to a spool directory # Python rewrite @@ -17,9 +17,11 @@ import getopt import os import sys + def usage(): sys.exit(1) + # # main program begins here # @@ -32,34 +34,32 @@ noop = False # command line argument processing try: - opts, args = getopt.getopt(sys.argv[1:], - "Ta:d:hs:", - []) -except getopt.GetoptError, err: - sys.stderr.write(str(err)+"\n") + opts, args = getopt.getopt(sys.argv[1:], "Ta:d:hs:", []) +except getopt.GetoptError as err: + sys.stderr.write(str(err) + "\n") usage() for opt in opts: - if ( opt[0] in ["-T"] ): + if opt[0] in ["-T"]: noop = True - elif ( opt[0] in ["-a"] ): + elif opt[0] in ["-a"]: append_to_jobid_in_db = opt[1] - elif ( opt[0] in ["-d"] ): + elif opt[0] in ["-d"]: spooldir = opt[1] - elif ( opt[0] in ["-h"] ): + elif opt[0] in ["-h"]: usage() - elif ( opt[0] in ["-s"] ): + elif opt[0] in ["-s"]: jobsdir = opt[1] -if ( len(args)>0 ): +if len(args) > 0: jobsdir = args[0] -if ( not os.path.exists(jobsdir) ): +if not os.path.exists(jobsdir): raise IOError("Jobs directory %s does not exist" % jobsdir) else: os.chdir(jobsdir) -if ( not os.path.exists(spooldir) ): - os.mkdir(spooldir,0700) +if not os.path.exists(spooldir): + os.mkdir(spooldir, 0o700) # get list of script files srcfiles = [] @@ -67,42 +67,43 @@ destfiles = [] for file in os.listdir(jobsdir): # for each script file, copy it to the spool directory if it's not already # there - srcfile = jobsdir+"/"+file - destfile = spooldir+"/"+file - if ( file.endswith(".SC") and - os.path.exists(srcfile) and - not os.path.exists(destfile) ): + srcfile = jobsdir + "/" + file + destfile = spooldir + "/" + file + if ( + file.endswith(".SC") + and os.path.exists(srcfile) + and not os.path.exists(destfile) + ): srcfiles.append(srcfile) destfiles.append(destfile) # copy files to spool dir -if ( len(srcfiles)>0 ): - cmd = "/bin/cp %s %s" % (" ".join(srcfiles),spooldir) - if ( noop ): +if len(srcfiles) > 0: + cmd = "/bin/cp %s %s" % (" ".join(srcfiles), spooldir) + if noop: sys.stderr.write("%s\n" % cmd) else: os.system(cmd) # fork/exec jobscript-to-db; don't wait for completion -if ( len(destfiles)>0 ): +if len(destfiles) > 0: pid = os.fork() - if ( pid==0 ): + if pid == 0: cmd = "/usr/local/sbin/jobscript-to-db" args = [cmd] - if ( noop ): + if noop: args.append("-T") - if ( append_to_jobid_in_db is not None ): + if append_to_jobid_in_db is not None: args.append("-a") args.append(append_to_jobid_in_db) for destfile in destfiles: args.append(destfile) - if ( noop ): + if noop: sys.stderr.write(" ".join(args)) try: - os.execv(cmd,args) + os.execv(cmd, args) except Exception as e: sys.stderr.write("%s\n" % str(e)) sys.exit(0) sys.exit(0) - diff --git a/sbin/sw_app-cache b/sbin/sw_app-cache index 038c7d9..3a0dc44 100755 --- a/sbin/sw_app-cache +++ b/sbin/sw_app-cache @@ -1,13 +1,15 @@ -#!/usr/bin/python +#!/usr/bin/python3 import getopt import logging import pbsacct import os import sys + def usage(): sys.exit(-1) + # # Main program starts here # @@ -22,68 +24,66 @@ noop = False indexhack = False syslog = False -db = pbsacct.pbsacctDB(host=dbhost,db=dbname,dbuser=dbuser,dbpasswd=dbpasswd) -if ( not os.environ.has_key("PBSTOOLS_DIR") ): +db = pbsacct.pbsacctDB(host=dbhost, db=dbname, dbuser=dbuser, dbpasswd=dbpasswd) +if "PBSTOOLS_DIR" not in os.environ: os.environ["PBSTOOLS_DIR"] = "/usr/local" -cfgfile = os.environ["PBSTOOLS_DIR"]+"/etc/pbsacctdb.cfg" -if ( os.path.exists(cfgfile) ): +cfgfile = os.environ["PBSTOOLS_DIR"] + "/etc/pbsacctdb.cfg" +if os.path.exists(cfgfile): db.readConfigFile(cfgfile) # command line processing try: - opts, args = getopt.getopt(sys.argv[1:], - "E:ILS:Tc:d:h:lp:s:t:u:w:", - []) -except getopt.GetoptError, err: - sys.stderr.write(str(err)+"\n") + opts, args = getopt.getopt(sys.argv[1:], "E:ILS:Tc:d:h:lp:s:t:u:w:", []) +except getopt.GetoptError as err: + sys.stderr.write(str(err) + "\n") usage() for opt in opts: - if ( opt[0]=="-E" ): + if opt[0] == "-E": end_date = opt[1] - elif ( opt[0]=="-I" ): + elif opt[0] == "-I": indexhack = True - elif ( opt[0]=="-L" ): + elif opt[0] == "-L": syslog = True - elif ( opt[0]=="-S" ): + elif opt[0] == "-S": start_date = opt[1] - elif ( opt[0]=="-T" ): + elif opt[0] == "-T": noop = True - elif ( opt[0]=="-c" ): + elif opt[0] == "-c": db.readConfigFile(opt[1]) - elif ( opt[0]=="-d" ): + elif opt[0] == "-d": db.setName(opt[1]) - elif ( opt[0]=="-h" ): + elif opt[0] == "-h": db.setSystem(opt[1]) - elif ( opt[0]=="-l" ): + elif opt[0] == "-l": syslog = False - elif ( opt[0]=="-p" ): + elif opt[0] == "-p": db.setPassword(opt[1]) - elif ( opt[0]=="-s" ): + elif opt[0] == "-s": db.setServerName(opt[1]) - elif ( opt[0]=="-t" ): + elif opt[0] == "-t": db.setJobsTable(opt[1]) - elif ( opt[0]=="-u" ): + elif opt[0] == "-u": db.setUser(opt[1]) - elif ( opt[0]=="-w" ): + elif opt[0] == "-w": db.setSoftwareTable(opt[1]) # configure logging logger = pbsacct.getLogger() -if ( noop ): +if noop: logger.setLevel(logging.INFO) else: logger.setLevel(logging.WARNING) lh = logging.StreamHandler(sys.stderr) -if ( syslog ): - lh = logging.handlers.SysLogHandler(address='/dev/log') -lh.setFormatter(logging.Formatter('[%(asctime)s] %(levelname)s: %(message)s')) +if syslog: + lh = logging.handlers.SysLogHandler(address="/dev/log") +lh.setFormatter(logging.Formatter("[%(asctime)s] %(levelname)s: %(message)s")) logger.addHandler(lh) use_index = "" -if ( indexhack ): +if indexhack: use_index = "USE INDEX (sw_app_jobs)" - if ( start_date is not None or end_date is not None ): + if start_date is not None or end_date is not None: use_index = "USE INDEX (system_start_jobs)" # connect to DB @@ -91,48 +91,58 @@ dbconn = db.connect() cursor = dbconn.cursor() packages_in_jobs = [] -select = "SELECT DISTINCT(sw_app) AS package FROM "+db.getJobsTable()+" "+use_index+" WHERE sw_app IS NOT NULL" -if ( db.getSystem() is not None ): - select += " AND system='"+db.getSystem()+"'" -if ( start_date is not None ): - select += " AND start_date>='"+start_date+"'" -if ( end_date is not None ): - select += " AND start_date<='"+end_date+"'" +select = ( + "SELECT DISTINCT(sw_app) AS package FROM " + + db.getJobsTable() + + " " + + use_index + + " WHERE sw_app IS NOT NULL" +) +if db.getSystem() is not None: + select += " AND system='" + db.getSystem() + "'" +if start_date is not None: + select += " AND start_date>='" + start_date + "'" +if end_date is not None: + select += " AND start_date<='" + end_date + "'" try: cursor.execute(select) for row in cursor.fetchall(): packages_in_jobs.append(row[0]) except KeyboardInterrupt: raise -except Error, e: - log.error("Package select from "+db.getJobsTable()+": "+str(e)) +except Exception as e: + logger.error("Package select from " + db.getJobsTable() + ": " + str(e)) sys.exit(2) -if ( noop ): +if noop: logger.info("Packages in jobs: [%s]" % ",".join(packages_in_jobs)) packages_in_cache = [] -select = "SELECT package FROM "+db.getSoftwareTable() +select = "SELECT package FROM " + db.getSoftwareTable() try: cursor.execute(select) for row in cursor.fetchall(): packages_in_cache.append(row[0]) except KeyboardInterrupt: raise -except MySQLdb.Error, e: - logger.error("MySQL error %d: %s" % (e.args[0],e.args[1])) +except Exception as e: + logger.error("MySQL error: " + str(e)) sys.exit(1) -except Error, e: - logger.error("Unknown error in package select from "+db.getSoftwareTable()+": "+str(e)) +except Exception as e: + logger.error( + "Unknown error in package select from " + db.getSoftwareTable() + ": " + str(e) + ) sys.exit(2) -if ( noop ): +if noop: logger.info("Packages in cache: [%s]" % ",".join(packages_in_cache)) for pkg in packages_in_jobs: - if ( pkg not in packages_in_cache ): + if pkg not in packages_in_cache: # use the IGNORE keyword here in case there are more than one of these # hitting the DB at once - insert = "INSERT IGNORE INTO "+db.getSoftwareTable()+" SET package='"+pkg+"'" - if ( noop ): + insert = ( + "INSERT IGNORE INTO " + db.getSoftwareTable() + " SET package='" + pkg + "'" + ) + if noop: logger.info("%s" % insert) else: try: @@ -140,10 +150,17 @@ for pkg in packages_in_jobs: dbconn.commit() except KeyboardInterrupt: raise - except Error, e: + except Exception as e: if dbconn: dbconn.rollback() - logger.error("Error in insert of "+pkg+" into "+db.getSoftwareTable()+": "+str(e)) + logger.error( + "Error in insert of " + + pkg + + " into " + + db.getSoftwareTable() + + ": " + + str(e) + ) sys.exit(2) - + db.close() diff --git a/sbin/sw_app-index b/sbin/sw_app-index index fb01c99..1a2fad8 100755 --- a/sbin/sw_app-index +++ b/sbin/sw_app-index @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # sw_app-index -- Update values for sw_app indexing # Python rewrite @@ -29,11 +29,14 @@ import logging import logging.handlers import os import pbsacct -import re import sys + def usage(): - sys.stderr.write("Usage:\t%s\t[-h system] [-s dbhost] [-D dbtype]\n" % os.path.basename(sys.argv[0])) + sys.stderr.write( + "Usage:\t%s\t[-h system] [-s dbhost] [-D dbtype]\n" + % os.path.basename(sys.argv[0]) + ) sys.stderr.write("\t\t\t[-d dbname] [-u dbuser] [-p dbpasswd] [-q]\n") sys.stderr.write("\t\t\t[-t jobstable] [-C config_table] [-w sw_table]\n") sys.stderr.write("\t\t\t[-c cfgfile] [-F] [-L|-M|-T]\n") @@ -189,7 +192,7 @@ def software_list(): "dasquad", "ddmax", "decomp", - #"decypher", + # "decypher", "delphi", "delta5d", "desmond", @@ -395,7 +398,7 @@ def software_list(): "megahit", "meme", "meta", - "mexe", + "mexe", "metgrid", "mfc", "mhd_1", @@ -422,7 +425,7 @@ def software_list(): "mpi-multi", "mpiasm", "mpiblast", - "MPT_PDECOMP", + "MPT_PDECOMP", "mrbayes", "mrobb6dipzz", "msa", @@ -434,7 +437,7 @@ def software_list(): "mykim9dgt", "myq", "mysql", - #"nag", + # "nag", "namd", "nb", "ncbi", @@ -464,7 +467,7 @@ def software_list(): "omen", "omgred", "onepartm", - #"openeye", + # "openeye", "opt_exe", "optics", "orca", @@ -476,7 +479,7 @@ def software_list(): "padc", "parallelEAM", "parallelqp", - #"param", + # "param", "paratec", "paraview", "parflow", @@ -570,7 +573,7 @@ def software_list(): "s-param", "s3d", "s4kt", - #"sable", + # "sable", "sailfish", "sam_adv_um5", "samtools", @@ -626,7 +629,7 @@ def software_list(): "swh1b", "swift", "THREEDIM_SOL_JET", - "THREEDHBL_hybridsol", + "THREEDHBL_hybridsol", "tabix", "tacoma", "tantalus", @@ -658,7 +661,7 @@ def software_list(): "turbomole", "turbsim", "TWODLDC", - "TWODHBLhybridroe_sol", + "TWODHBLhybridroe_sol", "two_phase", "ukh2d", "upc", @@ -699,223 +702,879 @@ def software_list(): "zeo++", "zeus", "zk3", - "zNtoM" + "zNtoM", ] + def software_match_list(): pkgmatch = {} - + # default to "( script LIKE '%pkgname%' OR ( software IS NOT NULL AND software LIKE 'pkgname%' ) )" for pkg in software_list(): - pkgmatch[pkg] = "( script LIKE '%%%s%%' OR ( software IS NOT NULL AND software LIKE '%%%s%%' ) )" % (pkg,pkg) + pkgmatch[pkg] = ( + "( script LIKE '%%%s%%' OR ( software IS NOT NULL AND software LIKE '%%%s%%' ) )" + % (pkg, pkg) + ) # exceptions # REGEXP match is ***MUCH*** slower than regular LIKE matching # in MySQL and not supported in other DBs, so don't use REGEXP # unless you REALLY need it. - pkgmatch['3dh'] = "script LIKE '%./3dh%'" - pkgmatch['55_x'] = "script LIKE '%55.x%'" - pkgmatch['aims'] = "( script LIKE '%aims%' AND NOT ( script LIKE '%aims/vasp%' ) )" - pkgmatch['a_out'] = "( script LIKE '%a.out %' OR script LIKE '%a.out\n%' )" - pkgmatch['abinit'] = "( script LIKE '%abinit%' OR script LIKE '%abinis%' OR script LIKE '%abinip%' )" - pkgmatch['aces2'] = "script LIKE '%xaces2%'" - pkgmatch['adda'] = "( script LIKE '%adda%' AND NOT ( script LIKE '%FindRadDat%' ) )" - pkgmatch['adf'] = "( script LIKE '%adf%' AND NOT ( script LIKE '%radfile%' ) AND NOT ( script LIKE '%adfs%' ) )" - pkgmatch['AliEn'] = "( script LIKE '%aliroot%' OR script LIKE '%agent.startup%' )" - pkgmatch['amdat'] = "script LIKE '%/AMDAT %'" - pkgmatch['arts'] = "( script LIKE '%arts%' AND script NOT LIKE '%starts%' )" - pkgmatch['ash'] = "( script LIKE '%ash_1%' OR script LIKE '%ash_2%' OR script LIKE '%ash_fd%' )" - pkgmatch['athena'] = "script LIKE '%/athena %'" - pkgmatch['bioperl'] = "( script LIKE '%bioperl%' OR script LIKE '%phylogenetic_pipe%' )" - pkgmatch['blat'] = "script LIKE '%blat %'" - pkgmatch['boltztran'] = "(script LIKE '%boltzpar%')" - pkgmatch['boots'] = "( script LIKE '%boots%' AND NOT ( script LIKE '%bootstrap%' ) )" - pkgmatch['cbl'] = "( script LIKE '% cbl%' OR script LIKE '%pcbl%' OR script LIKE '%biolib%' )" - pkgmatch['cdo'] = "( script LIKE '%cdo%' AND NOT ( script LIKE '%cdock%' ) )" - pkgmatch['charles'] = "script like '%charles.exe%'" - pkgmatch['chemshell'] = "script LIKE '%chemsh%'" - pkgmatch['chg'] = "script LIKE '%/chg %'" - pkgmatch['cluster'] = "script LIKE '%/cluster %'" - pkgmatch['converge'] = "script LIKE '%/bin/converge%'" - pkgmatch['crystal'] = "script LIKE '%Pcrystal%'" - pkgmatch['cube'] = "( script LIKE '%/cube %' OR script LIKE '%/intelcube %' )" - pkgmatch['dam'] = "script LIKE '%/dam %'" - pkgmatch['decomp'] = "script LIKE '%/decomp %'" - #pkgmatch['decypher'] = "script REGEXP '(decypher|dc_(target|make|blast|phrap)|TimeLogic)'" - pkgmatch['desmond'] = "( script LIKE '%desmond%' AND NOT ( username LIKE '%desmond%' ) )" - pkgmatch['dicyl_exe'] = "script LIKE '%dicyl.exe%'" - pkgmatch['dipole'] = "script LIKE '%.cxx.op%'" - pkgmatch['dissens'] = "script LIKE '%dissens.x%'" - pkgmatch['dns2d'] = "( script LIKE '%DNS2d.x%' OR script LIKE '%DNS2d_%.x%' OR script LIKE '%code2.x%' OR script LIKE '%spcal2d.x%' )" - pkgmatch['dock'] = "( script LIKE '%dock5%' OR script LIKE '%dock6%' OR script LIKE '%sphgen%' OR script LIKE '%mopac%' )" - pkgmatch['esp'] = "script LIKE '%/esp %'" - pkgmatch['evp'] = "script LIKE '%/evp %'" - pkgmatch['ex_e'] = "script LIKE '%ex.e%'" - pkgmatch['fastsimcoal'] = "( script LIKE '%FASTSIMCOAL%' OR script LIKE '%fsc25%' )" - pkgmatch['fit_exe'] = "script LIKE '%fit.exe%'" - pkgmatch['fld2d'] = "( script LIKE '%fld2d%' OR script LIKE '%/compact.%.exe%' )" - pkgmatch['fluent'] = "( script LIKE '%fluent%' OR ( software IS NOT NULL AND software LIKE '%fluent%' ) )" - pkgmatch['fsweep'] = "( script LIKE '%fsweep.exe%' OR script LIKE '%fsweep2.exe%' )" - pkgmatch['gabe'] = "script LIKE '%./gabe %'" - pkgmatch['gamess'] = "( script LIKE '%gamess%' OR script LIKE '%rungms%' OR script LIKE '%rungmx%' )" - pkgmatch['gc'] = "script LIKE '%kland_gc%'" - pkgmatch['gemc_x'] = "script LIKE '%gemc.x%'" - pkgmatch['glast'] = "( script LIKE '%glast%' OR script LIKE '%gp run%' )" - pkgmatch['gphocs'] = "script LIKE '%/G-PhoCS%'" - pkgmatch['gzip'] = "( script LIKE '%gzip%' OR script LIKE '%gunzip%' )" - pkgmatch['harness'] = "script LIKE '%test_harness_driver.py%'" - pkgmatch['harris'] = "script LIKE '%harris.cxx.op%'" - pkgmatch['hd'] = "script LIKE '%/HD %'" - pkgmatch['hf'] = "script LIKE '%hf/hf%'" - pkgmatch['hf2'] = "script LIKE '%/hf2%'" - pkgmatch['hmmer'] = "( script LIKE '%hmmer%' OR script LIKE '%hmmp%' )" - pkgmatch['hpl'] = "script LIKE '%xhpl%'" - pkgmatch['hydro'] = "script LIKE '%./hydro %'" - pkgmatch['idl']="( script LIKE '%module load idl%' OR script LIKE '%module add idl%' OR script LIKE '%\nidl%' OR ( software IS NOT NULL AND software LIKE '%idl%' ) )" - pkgmatch['hsi'] = "( script LIKE '%hsi%' OR script LIKE '%htar%' OR queue='hpss' )" - pkgmatch['ice'] = "script LIKE '%IceModule%'" - pkgmatch['iedg_mix'] = "( script LIKE '%iedg_mix%' OR script LIKE '%iedg_ext%' )" - pkgmatch['imb'] = "script LIKE '%IMB-%'" - pkgmatch['imc'] = "script LIKE '%/imc.x%'" - pkgmatch['lammps'] = "( script LIKE '%lammps%' OR script LIKE '% lmp_%' OR script LIKE '%/lmp_%' )" - pkgmatch['liso'] = "script LIKE '%/liso %'" - pkgmatch['lsdyna'] = "( script LIKE '%lsdyna%' OR script LIKE '%ls-dyna%' OR script LIKE '%mpp-dyna%' OR script LIKE '%mpp971%' OR ( software IS NOT NULL AND software LIKE '%lsdyna%' ) )" - pkgmatch['lsp'] = "( script LIKE '%/lsp.%' OR script LIKE '%/lsp_%' OR script LIKE '%/lsp %' )" - pkgmatch['madness'] = "( script LIKE '%m-a-d-n-e-s-s%' OR script LIKE '%slda%' )" - pkgmatch['md_xx'] = "script LIKE '%md.xx%'" - pkgmatch['meta'] = "( script LIKE '%anti.meta%' OR script LIKE '%para.meta%' OR script LIKE '%xray.meta%' )" - pkgmatch['mhd_1'] = "( script LIKE '%mhd_1%' OR script LIKE '%mhd_2%' OR script LIKE '%mhd_3%' OR script LIKE '%mhd_4%' OR script LIKE '%rmhd%' OR script LIKE '% mhd %' )" - pkgmatch['mhd_vec'] = "( script LIKE '%mhd_vec%' OR script LIKE '%mhd_pvec%' )" - pkgmatch['mm5'] = "( script LIKE '%mm5%' AND NOT SCRIPT LIKE '%womm5%' )" - pkgmatch['mrbayes'] = "( script LIKE '%mrbayes%' OR script LIKE '%mb-parallel%' )" - pkgmatch['msa'] = "script LIKE '%/msa %'" - pkgmatch['nb'] = "script LIKE '%NB/CODES%'" - pkgmatch['ncbi'] = "( script LIKE '%ncbi%' OR script LIKE '%blastall%' OR script LIKE '%blastn%' OR script LIKE '%blastpgp%' OR script LIKE '%blastp %' OR script LIKE '%blastx%' OR script LIKE '%fastacmd%' OR script LIKE '%formatdb%' OR script LIKE '%makeblastdb%' OR script LIKE '%rpsblast%' OR script LIKE '%seqtest%' )" - pkgmatch['nga_fb'] = "( script LIKE '%nga_fb%' OR script LIKE '%nga_cfb%' )" - pkgmatch['omega'] = "script LIKE '%omega.exe%'" - pkgmatch['openeye'] = "( script LIKE '%babel3%' OR script LIKE '%checkcff%' OR script LIKE '%chunker%' OR script LIKE '%fred2%' OR script LIKE '%fredPA%' OR script LIKE '%ligand_info%' OR script LIKE '%makefraglib%' OR script LIKE '%makerocsdb%' OR script LIKE '%nam2mol%' OR script LIKE '%omega2%' OR script LIKE '%szybki%' )" - pkgmatch['opt_exe'] = "( script LIKE '%opt_exe%' OR script LIKE '%scriptLaunchAll%' )" - pkgmatch['os2z'] = "( script LIKE '%/oS2x_%' OR script LIKE '%/oS2z_%' )" - pkgmatch['paraview'] = "( script LIKE '%paraview%' OR script LIKE '%pvserver%' )" - pkgmatch['phase'] = "script LIKE '%/PHASE %'" - pkgmatch['postH'] = "script LIKE '%/postH %'" - pkgmatch['pse'] = "( script LIKE '%/PSE\n' OR script LIKE '%/PSE2\n' )" - pkgmatch['psi4'] = "( script LIKE '%/psi4 %' OR script LIKE '%\npsi4%' )" - pkgmatch['pwscf'] = "( script LIKE '%pwscf%' OR script LIKE '%/pw.x %' OR script LIKE '%/ph.x %' OR script LIKE '%module load espresso%' )" - pkgmatch['RR_xxM'] = "( script REGEXP 'RR_[0-9]+(M|end)' )" - pkgmatch['r_out'] = "( script LIKE '%/r.out %' OR script LIKE '%/r.out\n%' )" - pkgmatch['radhyd'] = "( script LIKE '%radhyd%' OR script LIKE '%rhd_hyb%' OR script LIKE '%orion2%' )" - pkgmatch['raflesclean'] = "( script LIKE '%RAFLESCLEAN%' OR script LIKE '%RAFLES_CLEAN%' )" - pkgmatch['raspa'] = "( script LIKE '%RASPA_Binary%' OR script LIKE '%RASPA_DIR=%' OR script LIKE '%RASPA-%' )" - pkgmatch['real_exe'] = "script LIKE '%real.exe%'" - pkgmatch['reduce'] = "( script LIKE '%reduce_1%' OR script LIKE '%reduce_eta%' )" - pkgmatch['reflect'] = "script LIKE '%/reflect\n%'" - pkgmatch['root'] = "script LIKE '%\nroot -q%'" - pkgmatch['rosetta'] = "( script LIKE '%rosetta.%' OR script LIKE '% rosetta %' OR script LIKE '%/rr %' OR script LIKE '%rosetta_scripts%' OR script LIKE '%module load rosetta%' )" - pkgmatch['roth'] = "script LIKE '%/ROTH%'" - pkgmatch['rtp'] = "( script LIKE '%rtp%' AND NOT ( script like '%RestartP%' ) AND NOT ( script LIKE '%addpertpath%' ) )" - pkgmatch['sable'] = "( script LIKE '%sable%' AND script NOT LIKE '%DISABLE%' )" - pkgmatch['sas'] = "( script LIKE '%\nsas%' OR ( software IS NOT NULL AND software LIKE '%sas%' ) OR queue LIKE '%sas%' )" - pkgmatch['scattering'] = "script LIKE '%/scattering %'" - pkgmatch['simu_pow'] = "( script LIKE '%simu_pow%' OR script LIKE '%simu_thres%' )" - pkgmatch['simulate'] = "script LIKE '%/Binary/simulate%'" - pkgmatch['spark'] = "script LIKE '%spark-submit%'" - pkgmatch['sratoolkit'] = "( script LIKE '%sratoolkit%' OR script LIKE '%fastq-dump%' )" - pkgmatch['star'] = "( script LIKE '%/STAR --runMode%' OR script LIKE '%\nSTAR --runMode%' )" - pkgmatch['tbms'] = "( script LIKE '%tbms%dvm%' OR script LIKE '%distr%dvm%' OR script LIKE '%jac%dvm%' OR script LIKE '%mt%dvm%' )" - pkgmatch['testharness'] = "( username LIKE 'usertest%' )" - pkgmatch['tsutil'] = "( script LIKE '%tsutil%' OR script LIKE '%app_cmd%' )" - pkgmatch['track'] = "script LIKE '%TRACKdir%'" - pkgmatch['trial'] = "script LIKE '%/trial %'" - pkgmatch['turbo'] = "script LIKE '%pturbo.x%'" - pkgmatch['upc'] = "script LIKE '%upcrun%'" - pkgmatch['v2'] = "script LIKE '%/v2_%'" - pkgmatch['vasp'] = "script LIKE '%vasp%'" - pkgmatch['vina'] = "script LIKE '%/vina %'" - pkgmatch['visit'] = "( script LIKE '%visit%' AND script NOT LIKE '%revisit%' )" - pkgmatch['vnc'] = "script LIKE '%vncserver%'" - pkgmatch['vpic'] = "( script LIKE '%npic%' OR script LIKE '%open.cxx.op%' )" - pkgmatch['xfdtd'] = "( script LIKE '%xfdtd%' OR script LIKE '%xfsolver%' )" - pkgmatch['xtest'] = "script LIKE '%/xtest%'" - pkgmatch['xx'] = "script LIKE '%./xx\n%'" - pkgmatch['zeus'] = "( script LIKE '%/zeus%' OR script LIKE '%/pglobal%' )" - pkgmatch['zNtoM'] = "( script LIKE '%z1to3%' OR script LIKE '%z4to6%' OR script LIKE '%z7to9%' OR script LIKE '%z10to12%' OR script LIKE '%z13to15%' )" - + pkgmatch["3dh"] = "script LIKE '%./3dh%'" + pkgmatch["55_x"] = "script LIKE '%55.x%'" + pkgmatch["aims"] = "( script LIKE '%aims%' AND NOT ( script LIKE '%aims/vasp%' ) )" + pkgmatch["a_out"] = "( script LIKE '%a.out %' OR script LIKE '%a.out\n%' )" + pkgmatch["abinit"] = ( + "( script LIKE '%abinit%' OR script LIKE '%abinis%' OR script LIKE '%abinip%' )" + ) + pkgmatch["aces2"] = "script LIKE '%xaces2%'" + pkgmatch["adda"] = "( script LIKE '%adda%' AND NOT ( script LIKE '%FindRadDat%' ) )" + pkgmatch["adf"] = ( + "( script LIKE '%adf%' AND NOT ( script LIKE '%radfile%' ) AND NOT ( script LIKE '%adfs%' ) )" + ) + pkgmatch["AliEn"] = "( script LIKE '%aliroot%' OR script LIKE '%agent.startup%' )" + pkgmatch["amdat"] = "script LIKE '%/AMDAT %'" + pkgmatch["arts"] = "( script LIKE '%arts%' AND script NOT LIKE '%starts%' )" + pkgmatch["ash"] = ( + "( script LIKE '%ash_1%' OR script LIKE '%ash_2%' OR script LIKE '%ash_fd%' )" + ) + pkgmatch["athena"] = "script LIKE '%/athena %'" + pkgmatch["bioperl"] = ( + "( script LIKE '%bioperl%' OR script LIKE '%phylogenetic_pipe%' )" + ) + pkgmatch["blat"] = "script LIKE '%blat %'" + pkgmatch["boltztran"] = "(script LIKE '%boltzpar%')" + pkgmatch["boots"] = ( + "( script LIKE '%boots%' AND NOT ( script LIKE '%bootstrap%' ) )" + ) + pkgmatch["cbl"] = ( + "( script LIKE '% cbl%' OR script LIKE '%pcbl%' OR script LIKE '%biolib%' )" + ) + pkgmatch["cdo"] = "( script LIKE '%cdo%' AND NOT ( script LIKE '%cdock%' ) )" + pkgmatch["charles"] = "script like '%charles.exe%'" + pkgmatch["chemshell"] = "script LIKE '%chemsh%'" + pkgmatch["chg"] = "script LIKE '%/chg %'" + pkgmatch["cluster"] = "script LIKE '%/cluster %'" + pkgmatch["converge"] = "script LIKE '%/bin/converge%'" + pkgmatch["crystal"] = "script LIKE '%Pcrystal%'" + pkgmatch["cube"] = "( script LIKE '%/cube %' OR script LIKE '%/intelcube %' )" + pkgmatch["dam"] = "script LIKE '%/dam %'" + pkgmatch["decomp"] = "script LIKE '%/decomp %'" + # pkgmatch['decypher'] = "script REGEXP '(decypher|dc_(target|make|blast|phrap)|TimeLogic)'" + pkgmatch["desmond"] = ( + "( script LIKE '%desmond%' AND NOT ( username LIKE '%desmond%' ) )" + ) + pkgmatch["dicyl_exe"] = "script LIKE '%dicyl.exe%'" + pkgmatch["dipole"] = "script LIKE '%.cxx.op%'" + pkgmatch["dissens"] = "script LIKE '%dissens.x%'" + pkgmatch["dns2d"] = ( + "( script LIKE '%DNS2d.x%' OR script LIKE '%DNS2d_%.x%' OR script LIKE '%code2.x%' OR script LIKE '%spcal2d.x%' )" + ) + pkgmatch["dock"] = ( + "( script LIKE '%dock5%' OR script LIKE '%dock6%' OR script LIKE '%sphgen%' OR script LIKE '%mopac%' )" + ) + pkgmatch["esp"] = "script LIKE '%/esp %'" + pkgmatch["evp"] = "script LIKE '%/evp %'" + pkgmatch["ex_e"] = "script LIKE '%ex.e%'" + pkgmatch["fastsimcoal"] = "( script LIKE '%FASTSIMCOAL%' OR script LIKE '%fsc25%' )" + pkgmatch["fit_exe"] = "script LIKE '%fit.exe%'" + pkgmatch["fld2d"] = "( script LIKE '%fld2d%' OR script LIKE '%/compact.%.exe%' )" + pkgmatch["fluent"] = ( + "( script LIKE '%fluent%' OR ( software IS NOT NULL AND software LIKE '%fluent%' ) )" + ) + pkgmatch["fsweep"] = "( script LIKE '%fsweep.exe%' OR script LIKE '%fsweep2.exe%' )" + pkgmatch["gabe"] = "script LIKE '%./gabe %'" + pkgmatch["gamess"] = ( + "( script LIKE '%gamess%' OR script LIKE '%rungms%' OR script LIKE '%rungmx%' )" + ) + pkgmatch["gc"] = "script LIKE '%kland_gc%'" + pkgmatch["gemc_x"] = "script LIKE '%gemc.x%'" + pkgmatch["glast"] = "( script LIKE '%glast%' OR script LIKE '%gp run%' )" + pkgmatch["gphocs"] = "script LIKE '%/G-PhoCS%'" + pkgmatch["gzip"] = "( script LIKE '%gzip%' OR script LIKE '%gunzip%' )" + pkgmatch["harness"] = "script LIKE '%test_harness_driver.py%'" + pkgmatch["harris"] = "script LIKE '%harris.cxx.op%'" + pkgmatch["hd"] = "script LIKE '%/HD %'" + pkgmatch["hf"] = "script LIKE '%hf/hf%'" + pkgmatch["hf2"] = "script LIKE '%/hf2%'" + pkgmatch["hmmer"] = "( script LIKE '%hmmer%' OR script LIKE '%hmmp%' )" + pkgmatch["hpl"] = "script LIKE '%xhpl%'" + pkgmatch["hydro"] = "script LIKE '%./hydro %'" + pkgmatch["idl"] = ( + "( script LIKE '%module load idl%' OR script LIKE '%module add idl%' OR script LIKE '%\nidl%' OR ( software IS NOT NULL AND software LIKE '%idl%' ) )" + ) + pkgmatch["hsi"] = "( script LIKE '%hsi%' OR script LIKE '%htar%' OR queue='hpss' )" + pkgmatch["ice"] = "script LIKE '%IceModule%'" + pkgmatch["iedg_mix"] = "( script LIKE '%iedg_mix%' OR script LIKE '%iedg_ext%' )" + pkgmatch["imb"] = "script LIKE '%IMB-%'" + pkgmatch["imc"] = "script LIKE '%/imc.x%'" + pkgmatch["lammps"] = ( + "( script LIKE '%lammps%' OR script LIKE '% lmp_%' OR script LIKE '%/lmp_%' )" + ) + pkgmatch["liso"] = "script LIKE '%/liso %'" + pkgmatch["lsdyna"] = ( + "( script LIKE '%lsdyna%' OR script LIKE '%ls-dyna%' OR script LIKE '%mpp-dyna%' OR script LIKE '%mpp971%' OR ( software IS NOT NULL AND software LIKE '%lsdyna%' ) )" + ) + pkgmatch["lsp"] = ( + "( script LIKE '%/lsp.%' OR script LIKE '%/lsp_%' OR script LIKE '%/lsp %' )" + ) + pkgmatch["madness"] = "( script LIKE '%m-a-d-n-e-s-s%' OR script LIKE '%slda%' )" + pkgmatch["md_xx"] = "script LIKE '%md.xx%'" + pkgmatch["meta"] = ( + "( script LIKE '%anti.meta%' OR script LIKE '%para.meta%' OR script LIKE '%xray.meta%' )" + ) + pkgmatch["mhd_1"] = ( + "( script LIKE '%mhd_1%' OR script LIKE '%mhd_2%' OR script LIKE '%mhd_3%' OR script LIKE '%mhd_4%' OR script LIKE '%rmhd%' OR script LIKE '% mhd %' )" + ) + pkgmatch["mhd_vec"] = "( script LIKE '%mhd_vec%' OR script LIKE '%mhd_pvec%' )" + pkgmatch["mm5"] = "( script LIKE '%mm5%' AND NOT SCRIPT LIKE '%womm5%' )" + pkgmatch["mrbayes"] = "( script LIKE '%mrbayes%' OR script LIKE '%mb-parallel%' )" + pkgmatch["msa"] = "script LIKE '%/msa %'" + pkgmatch["nb"] = "script LIKE '%NB/CODES%'" + pkgmatch["ncbi"] = ( + "( script LIKE '%ncbi%' OR script LIKE '%blastall%' OR script LIKE '%blastn%' OR script LIKE '%blastpgp%' OR script LIKE '%blastp %' OR script LIKE '%blastx%' OR script LIKE '%fastacmd%' OR script LIKE '%formatdb%' OR script LIKE '%makeblastdb%' OR script LIKE '%rpsblast%' OR script LIKE '%seqtest%' )" + ) + pkgmatch["nga_fb"] = "( script LIKE '%nga_fb%' OR script LIKE '%nga_cfb%' )" + pkgmatch["omega"] = "script LIKE '%omega.exe%'" + pkgmatch["openeye"] = ( + "( script LIKE '%babel3%' OR script LIKE '%checkcff%' OR script LIKE '%chunker%' OR script LIKE '%fred2%' OR script LIKE '%fredPA%' OR script LIKE '%ligand_info%' OR script LIKE '%makefraglib%' OR script LIKE '%makerocsdb%' OR script LIKE '%nam2mol%' OR script LIKE '%omega2%' OR script LIKE '%szybki%' )" + ) + pkgmatch["opt_exe"] = ( + "( script LIKE '%opt_exe%' OR script LIKE '%scriptLaunchAll%' )" + ) + pkgmatch["os2z"] = "( script LIKE '%/oS2x_%' OR script LIKE '%/oS2z_%' )" + pkgmatch["paraview"] = "( script LIKE '%paraview%' OR script LIKE '%pvserver%' )" + pkgmatch["phase"] = "script LIKE '%/PHASE %'" + pkgmatch["postH"] = "script LIKE '%/postH %'" + pkgmatch["pse"] = "( script LIKE '%/PSE\n' OR script LIKE '%/PSE2\n' )" + pkgmatch["psi4"] = "( script LIKE '%/psi4 %' OR script LIKE '%\npsi4%' )" + pkgmatch["pwscf"] = ( + "( script LIKE '%pwscf%' OR script LIKE '%/pw.x %' OR script LIKE '%/ph.x %' OR script LIKE '%module load espresso%' )" + ) + pkgmatch["RR_xxM"] = "( script REGEXP 'RR_[0-9]+(M|end)' )" + pkgmatch["r_out"] = "( script LIKE '%/r.out %' OR script LIKE '%/r.out\n%' )" + pkgmatch["radhyd"] = ( + "( script LIKE '%radhyd%' OR script LIKE '%rhd_hyb%' OR script LIKE '%orion2%' )" + ) + pkgmatch["raflesclean"] = ( + "( script LIKE '%RAFLESCLEAN%' OR script LIKE '%RAFLES_CLEAN%' )" + ) + pkgmatch["raspa"] = ( + "( script LIKE '%RASPA_Binary%' OR script LIKE '%RASPA_DIR=%' OR script LIKE '%RASPA-%' )" + ) + pkgmatch["real_exe"] = "script LIKE '%real.exe%'" + pkgmatch["reduce"] = "( script LIKE '%reduce_1%' OR script LIKE '%reduce_eta%' )" + pkgmatch["reflect"] = "script LIKE '%/reflect\n%'" + pkgmatch["root"] = "script LIKE '%\nroot -q%'" + pkgmatch["rosetta"] = ( + "( script LIKE '%rosetta.%' OR script LIKE '% rosetta %' OR script LIKE '%/rr %' OR script LIKE '%rosetta_scripts%' OR script LIKE '%module load rosetta%' )" + ) + pkgmatch["roth"] = "script LIKE '%/ROTH%'" + pkgmatch["rtp"] = ( + "( script LIKE '%rtp%' AND NOT ( script like '%RestartP%' ) AND NOT ( script LIKE '%addpertpath%' ) )" + ) + pkgmatch["sable"] = "( script LIKE '%sable%' AND script NOT LIKE '%DISABLE%' )" + pkgmatch["sas"] = ( + "( script LIKE '%\nsas%' OR ( software IS NOT NULL AND software LIKE '%sas%' ) OR queue LIKE '%sas%' )" + ) + pkgmatch["scattering"] = "script LIKE '%/scattering %'" + pkgmatch["simu_pow"] = "( script LIKE '%simu_pow%' OR script LIKE '%simu_thres%' )" + pkgmatch["simulate"] = "script LIKE '%/Binary/simulate%'" + pkgmatch["spark"] = "script LIKE '%spark-submit%'" + pkgmatch["sratoolkit"] = ( + "( script LIKE '%sratoolkit%' OR script LIKE '%fastq-dump%' )" + ) + pkgmatch["star"] = ( + "( script LIKE '%/STAR --runMode%' OR script LIKE '%\nSTAR --runMode%' )" + ) + pkgmatch["tbms"] = ( + "( script LIKE '%tbms%dvm%' OR script LIKE '%distr%dvm%' OR script LIKE '%jac%dvm%' OR script LIKE '%mt%dvm%' )" + ) + pkgmatch["testharness"] = "( username LIKE 'usertest%' )" + pkgmatch["tsutil"] = "( script LIKE '%tsutil%' OR script LIKE '%app_cmd%' )" + pkgmatch["track"] = "script LIKE '%TRACKdir%'" + pkgmatch["trial"] = "script LIKE '%/trial %'" + pkgmatch["turbo"] = "script LIKE '%pturbo.x%'" + pkgmatch["upc"] = "script LIKE '%upcrun%'" + pkgmatch["v2"] = "script LIKE '%/v2_%'" + pkgmatch["vasp"] = "script LIKE '%vasp%'" + pkgmatch["vina"] = "script LIKE '%/vina %'" + pkgmatch["visit"] = "( script LIKE '%visit%' AND script NOT LIKE '%revisit%' )" + pkgmatch["vnc"] = "script LIKE '%vncserver%'" + pkgmatch["vpic"] = "( script LIKE '%npic%' OR script LIKE '%open.cxx.op%' )" + pkgmatch["xfdtd"] = "( script LIKE '%xfdtd%' OR script LIKE '%xfsolver%' )" + pkgmatch["xtest"] = "script LIKE '%/xtest%'" + pkgmatch["xx"] = "script LIKE '%./xx\n%'" + pkgmatch["zeus"] = "( script LIKE '%/zeus%' OR script LIKE '%/pglobal%' )" + pkgmatch["zNtoM"] = ( + "( script LIKE '%z1to3%' OR script LIKE '%z4to6%' OR script LIKE '%z7to9%' OR script LIKE '%z10to12%' OR script LIKE '%z13to15%' )" + ) + # package matches with dependencies on other package matches - pkgmatch['agk'] = "( script LIKE '%agk%' AND NOT ( "+pkgmatch['hsi']+" ) AND NOT ( "+pkgmatch['mhd_1']+" ) )" - pkgmatch['ansys'] = "( ( script LIKE '%ansys' OR ( software IS NOT NULL AND software LIKE '%ansys%' ) ) AND NOT ( "+pkgmatch['fluent']+" ) )" - pkgmatch['amber'] = "( ( script LIKE '%amber%' OR script LIKE '%sander%' OR script LIKE '%pmemd%' OR script LIKE '%sviol%' OR script LIKE '%SingleJob%' OR script LIKE '%MINJob%' OR script LIKE '%run_md_mpi.csh%' ) AND NOT ( "+pkgmatch['cctm']+" ) AND NOT ( "+pkgmatch['cvm']+" ) AND NOT ( "+pkgmatch['idl']+" ) AND NOT ( "+pkgmatch['qmc']+" ) AND NOT ( "+pkgmatch['sigma']+" ) AND NOT ( "+pkgmatch['tantalus']+" ) AND NOT ( "+pkgmatch['tfe']+" ) )" - pkgmatch['arps'] = "( script LIKE '%arps%' AND NOT ( "+pkgmatch['adf']+" ) AND NOT ( "+pkgmatch['hsi']+" ) AND NOT ( "+pkgmatch['matlab']+" ) )" - pkgmatch['bam'] = "( script LIKE '%bam%' AND NOT ( "+pkgmatch['bismark']+" ) AND NOT ( "+pkgmatch['cufflinks']+" ) AND NOT ( "+pkgmatch['samtools']+" ) )" - pkgmatch['bismark'] = "( script LIKE '%bismark%' AND NOT ( "+pkgmatch['samtools']+" ) )" - pkgmatch['bugget'] = "( script LIKE '%bugget%' AND NOT ( "+pkgmatch['halo']+" ) AND NOT ( "+pkgmatch['simpleio']+" ) )" - pkgmatch['cactus'] = "( script LIKE '%cactus%' AND NOT ( "+pkgmatch['cdo']+" ) AND NOT ( "+pkgmatch['simfactory']+" ) )" - pkgmatch['cam'] = "( script LIKE '%cam%' AND NOT ( "+pkgmatch['amber']+" ) AND NOT ( "+pkgmatch['grads']+" ) AND NOT ( "+pkgmatch['hsi']+" ) )" - pkgmatch['ccsm'] = "( ( script LIKE '%ccsm%' OR script LIKE '%cpl%csim%clm%pop%cam%' ) AND NOT ( "+pkgmatch['cam']+" ) AND NOT ( "+pkgmatch['hsi']+" ) AND NOT ( "+pkgmatch['mm5']+" ) AND NOT ( "+pkgmatch['swift']+" ) )" - pkgmatch['charmm'] = "( script LIKE '%charmm%' AND NOT ( "+pkgmatch['chemshell']+" ) )" - pkgmatch['cpmd'] = "( script LIKE '%cpmd%' AND NOT ( "+pkgmatch['a_out']+" ) AND NOT ( "+pkgmatch['amber']+" ) AND NOT ( "+pkgmatch['inca']+" ) AND NOT ( "+pkgmatch['vasp']+" ) )" - pkgmatch['cvm'] = "( script LIKE '%cvm%' AND NOT ( "+pkgmatch['cpmd']+" ) AND NOT ( "+pkgmatch['psolve']+" ) )" - pkgmatch['ddmax'] = "( script LIKE '%DDMAX%' AND NOT ( "+pkgmatch['maxsolve']+" ) )" - pkgmatch['eden'] = "( script LIKE '%eden%' AND NOT ( "+pkgmatch['matlab']+" ) )" - pkgmatch['enzo'] = "( script LIKE '%enzo%' AND NOT ( "+pkgmatch['rtp']+" ) )" - pkgmatch['f-plane'] = "( script LIKE '%f-plane%' AND NOT ( "+pkgmatch['hsi']+" ) )" - pkgmatch['foam'] = "( script LIKE '%foam%' AND NOT ( "+pkgmatch['lsdyna']+" ) )" - pkgmatch['gadget'] = "( script LIKE '%gadget%' AND NOT ( "+pkgmatch['hsi']+" ) )" - pkgmatch['gdl'] = "( script LIKE '%gdl%' AND NOT ( "+pkgmatch['rotbouss']+" ) )" - pkgmatch['grib'] = "( script LIKE '%grib%' AND NOT ( "+pkgmatch['cdo']+" ) AND NOT ( "+pkgmatch['mm5']+" ) AND NOT ( "+pkgmatch['sgf']+" ) AND NOT ( "+pkgmatch['sigma']+" ) )" - pkgmatch['gromacs'] = "( ( script LIKE '%gromacs%' OR script LIKE '%grompp%' OR script LIKE '%mdrun%' OR script LIKE '%rgmx%' ) AND NOT ( "+pkgmatch['amber']+" ) AND NOT ( "+pkgmatch['cpmd']+" ) AND NOT ( "+pkgmatch['sigma']+" ) AND NOT ( "+pkgmatch['tantalus']+" ) )" - pkgmatch['gtc'] = "( ( script LIKE '%gtc%' OR script LIKE '%gts%' ) AND NOT ( "+pkgmatch['cctm']+" ) AND NOT ( "+pkgmatch['cutadapt']+" ) AND NOT ( "+pkgmatch['pmcl3d']+" ) )" - pkgmatch['halo'] = "( script LIKE '%halo%' AND NOT ( "+pkgmatch['enzo']+" ) AND NOT ( "+pkgmatch['gadget']+" ) AND NOT ( "+pkgmatch['hsi']+" ) AND NOT ( "+pkgmatch['simpleio']+" ) AND NOT ( "+pkgmatch['yt']+" ) )" - pkgmatch['hfb'] = "( script LIKE '%hfb%' AND NOT ( "+pkgmatch['vbc']+" ) )" - pkgmatch['hfodd'] = "( script LIKE '%hfodd%' AND NOT ( "+pkgmatch['cdo']+" ) )" - pkgmatch['hmc'] = "( script LIKE '%hmc%' AND NOT ( "+pkgmatch['chroma']+" ) AND NOT ( "+pkgmatch['gadget']+" ) AND NOT ( "+pkgmatch['nplqcd']+" ) AND NOT ( "+pkgmatch['tantalus']+" ) AND NOT ( "+pkgmatch['terachem']+" ) )" - pkgmatch['hsphere'] = "( script LIKE '%hsphere%' AND NOT ( "+pkgmatch['lfm']+" ) )" - pkgmatch['hy3s'] = "( ( script LIKE '%SSA%' OR script LIKE '%HyJCMSS-%' ) AND NOT ( "+pkgmatch['arps']+" ) AND NOT ( "+pkgmatch['amber']+" ) AND NOT ( "+pkgmatch['bugget']+" ) AND NOT ( "+pkgmatch['cactus']+" ) AND NOT ( "+pkgmatch['cam']+" ) AND NOT ( "+pkgmatch['ccsm']+" ) AND NOT ( "+pkgmatch['cdo']+" ) AND NOT ( "+pkgmatch['charmm']+" ) AND NOT ( "+pkgmatch['energyplus']+" ) AND NOT ( "+pkgmatch['enzo']+" ) AND NOT ( "+pkgmatch['grmhd']+" ) AND NOT ( "+pkgmatch['halo']+" ) AND NOT ( "+pkgmatch['hchbm']+" ) AND NOT ( "+pkgmatch['hsi']+" ) AND NOT ( "+pkgmatch['milc']+" ) AND NOT ( "+pkgmatch['ncl']+" ) AND NOT ( "+pkgmatch['nwchem']+" ) AND NOT ( "+pkgmatch['simpleio']+" ) AND NOT ( "+pkgmatch['sses']+" ) AND NOT ( "+pkgmatch['tfe']+" ) )" - pkgmatch['ifs'] = "( script LIKE '%ifsMASTER%' AND NOT ( "+pkgmatch['cdp']+" ) AND NOT ( "+pkgmatch['hsi']+" ) )" - pkgmatch['inca'] = "( script LIKE '%inca%' AND NOT ( "+pkgmatch['vasp']+" ) )" - pkgmatch['ior'] = "( script LIKE '%ior%' AND NOT ( username LIKE '%ior%' ) AND NOT ( script LIKE '%prior%' ) AND NOT ( "+pkgmatch['a_out']+" ) AND NOT ( "+pkgmatch['cam']+" ) AND NOT ( "+pkgmatch['lammps']+" ) AND NOT ( "+pkgmatch['swift']+" ) )" - pkgmatch['meep'] = "( script LIKE '%meep%' AND NOT ( "+pkgmatch['sigma']+" ) )" - pkgmatch['milc'] = "( ( script LIKE '%milc%' OR script LIKE '%su3_%' OR script LIKE '%switch%.csh%' ) AND NOT ( "+pkgmatch['nicam']+" ) AND NOT ( "+pkgmatch['hmc']+" ) )" - pkgmatch['measurements'] = " ( script LIKE '%measurements%' ) AND NOT ( "+pkgmatch['milc']+" )" - pkgmatch['nag'] = "( script LIKE '%nag%' AND NOT ( "+pkgmatch['cctm']+" ) AND NOT ( "+pkgmatch['mpi_helium']+" ) )" - pkgmatch['namd'] = "( ( script LIKE '%namd%' OR script LIKE '%md.sh%' OR SCRIPT LIKE '%rem_mono_npt4.sh%') AND NOT ( "+pkgmatch['a_out']+" ) AND NOT ( "+pkgmatch['amber']+" ) AND NOT ( "+pkgmatch['cactus']+" ) AND NOT ( "+pkgmatch['charmm']+" ) AND NOT ( "+pkgmatch['cdo']+" ) AND NOT ( "+pkgmatch['gromacs']+" ) AND NOT ( "+pkgmatch['hsi']+" ) AND NOT ( "+pkgmatch['hmc']+" ) AND NOT ( "+pkgmatch['hy3s']+" ) AND NOT ( "+pkgmatch['ior']+" ) )" - pkgmatch['ncl'] = "( script LIKE '%ncl%' AND NOT ( script LIKE '%include%' ) AND NOT ( "+pkgmatch['cam']+" ) AND NOT ( "+pkgmatch['gen.v4']+" ) AND NOT ( "+pkgmatch['grmhd']+" ) AND NOT ( "+pkgmatch['swift']+" ) )" - pkgmatch['nested'] = "( script LIKE '%nested%' AND NOT ( "+pkgmatch['enzo']+" ) AND NOT ( "+pkgmatch['grib']+" ) )" - pkgmatch['nicam'] = "( script LIKE '%nicam%' AND NOT ( "+pkgmatch['cam']+" ) AND NOT ( "+pkgmatch['grads']+" ) AND NOT ( "+pkgmatch['hsi']+" ) AND NOT ( "+pkgmatch['hy3s']+" ) )" - pkgmatch['npb'] = "( script LIKE '%npb%' AND NOT ( script LIKE '%npbs.%' ) AND NOT ( script LIKE '%snsnpb%' ) AND NOT ( "+pkgmatch['milc']+" ) AND NOT ( "+pkgmatch['vorpal']+" ) )" - pkgmatch['omen'] = "( script LIKE '%omen%' AND NOT ( "+pkgmatch['cam']+" ) AND NOT ( "+pkgmatch['GreenSolver']+" ) AND NOT ( "+pkgmatch['milc']+" ) )" - pkgmatch['overlap']="( script LIKE '%overlap_%' AND NOT ( "+pkgmatch['grib']+" ) AND NOT ( "+pkgmatch['hfb']+" ) AND NOT ( "+pkgmatch['nicam']+" ) AND NOT ( "+pkgmatch['simfactory']+" ) )" - pkgmatch['paratec'] = "( script LIKE '%paratec%' AND NOT ( "+pkgmatch['sigma']+" ) )" - pkgmatch['pcg'] = "( script LIKE '%pcg%' AND script NOT LIKE '%request%' AND NOT ( "+pkgmatch['gen.v4']+" ) )" - pkgmatch['pop'] = "( script LIKE '%pop%' AND NOT ( "+pkgmatch['amber']+" ) AND NOT ( "+pkgmatch['cam']+" ) AND NOT ( "+pkgmatch['ccsm']+" ) AND NOT ( "+pkgmatch['cp2k']+" ) AND NOT ( "+pkgmatch['charmm']+" ) AND NOT ( "+pkgmatch['grib']+" ) AND NOT ( "+pkgmatch['gromacs']+" ) AND NOT ( "+pkgmatch['hmc']+" ) AND NOT ( "+pkgmatch['namd']+" ) AND NOT ( "+pkgmatch['nwchem']+" ) AND NOT ( "+pkgmatch['run_im']+" ) AND NOT ( "+pkgmatch['sses']+" ) )" - pkgmatch['propagators'] = "( script LIKE '%propagators%' AND NOT ( "+pkgmatch['milc']+" ) )" - pkgmatch['python'] = "( ( script LIKE '%python%' OR script LIKE '%jython%' ) AND NOT ( "+pkgmatch['hoomd']+" ) )" - pkgmatch['qb'] = "( script LIKE '%qb%' AND NOT ( "+pkgmatch['hfb']+" ) AND NOT ( "+pkgmatch['milc']+" ) AND NOT ( "+pkgmatch['amber']+" ) )" - pkgmatch['qrpacc'] = "( script LIKE '%qrpacc%' AND NOT ( "+pkgmatch['vbc']+" ) )" - pkgmatch['quest'] = "( script LIKE '%quest%' AND script NOT LIKE '%request%' AND NOT ( "+pkgmatch['gen.v4']+" ) )" - pkgmatch['radhyd'] = "( script LIKE '%radhyd%' AND NOT ( "+pkgmatch['chimera']+" ) )" - pkgmatch['run_im'] = "( script LIKE '%run_im%' AND NOT ( "+pkgmatch['aims']+" ) AND NOT ( "+pkgmatch['flash4']+" ) AND NOT ( "+pkgmatch['ncl']+" ) AND NOT ( "+pkgmatch['wrf']+" ) )" - pkgmatch['run_xyzvort'] = "( ( script LIKE '%run_xvort%' OR script LIKE '%run_yvort%' OR script LIKE '%run_zvort%' OR script LIKE '%run_thpert%' OR script LIKE '%run_u%' OR script LIKE '%run_v%' OR script LIKE '%run_w%' OR script LIKE '%run_dBZ%' ) AND NOT ( "+pkgmatch['beopest']+" ) AND NOT ( "+pkgmatch['lobster']+" ) AND NOT ( "+pkgmatch['paraview']+" ) AND NOT ( "+pkgmatch['vasp']+" ) AND NOT ( "+pkgmatch['vnc']+" ) AND NOT ( "+pkgmatch['wrf']+" ) )" - pkgmatch['s3d'] = "( script LIKE '%s3d%' AND NOT ( "+pkgmatch['adf']+" ) AND NOT ( "+pkgmatch['arps']+" ) AND NOT ( "+pkgmatch['cam']+" ) AND NOT ( "+pkgmatch['cctm']+" ) )" - pkgmatch['scalapack'] = "( script LIKE '%scalapack%' AND NOT ( script LIKE '%#module load scalapack%' ) AND NOT ( "+pkgmatch['mustem']+" ) AND NOT ( "+pkgmatch['siesta']+" ) AND NOT ( "+pkgmatch['vasp']+" ) )" - pkgmatch['sgf'] = "( script LIKE '%sgf%' AND NOT ( "+pkgmatch['lsdyna']+" ) AND NOT ( "+pkgmatch['sigma']+" ) )" - pkgmatch['sord'] = "( script LIKE '%sord%' AND NOT ( "+pkgmatch['namd']+" ) )" - pkgmatch['spades'] = "( script LIKE '%spades%' AND NOT ( "+pkgmatch['ncbi']+" ) )" - pkgmatch['sses'] = "( script LIKE '%sses%' AND NOT ( script LIKE '%subprocess%' ) AND NOT ( "+pkgmatch['amber']+" ) AND NOT ( "+pkgmatch['arps']+" ) AND NOT ( "+pkgmatch['cactus']+" ) AND NOT ( "+pkgmatch['cdo']+" ) AND NOT ( "+pkgmatch['enzo']+" ) AND NOT ( "+pkgmatch['grib']+" ) AND NOT ( "+pkgmatch['milc']+" ) AND NOT ( "+pkgmatch['namd']+" ) AND NOT ( "+pkgmatch['python']+" ) AND NOT ( "+pkgmatch['qb']+" ) AND NOT ( "+pkgmatch['vasp']+" ) AND NOT ( "+pkgmatch['vbc']+" ) )" - pkgmatch['sus'] = "( script LIKE '%sus%' AND NOT ( "+pkgmatch['cam']+" ) AND NOT ( "+pkgmatch['consensus']+" ) AND NOT ( "+pkgmatch['stata']+" ) )" - pkgmatch['tabix'] = "( script LIKE '%tabix%' AND NOT ( "+pkgmatch['vep']+" ) )" - pkgmatch['tsc'] = "( script LIKE '%tsc%' AND NOT ( "+pkgmatch['amber']+" ) AND NOT ( "+pkgmatch['arps']+" ) AND NOT ( "+pkgmatch['cactus']+" ) AND NOT ( "+pkgmatch['foam']+" ) AND NOT ( "+pkgmatch['simfactory']+" ) AND NOT ( "+pkgmatch['swift']+" ) )" - pkgmatch['turbomole'] = "( script LIKE '%turbomole%' AND NOT ( "+pkgmatch['chemshell']+" ) )" - pkgmatch['ukh2d'] = "( ( script LIKE '%ukh2d%' OR script LIKE '%ukh.cxx.op%' ) AND NOT ( "+pkgmatch['h3d']+" ) )" - pkgmatch['wrf'] = "( script LIKE '%wrf%' AND NOT ( "+pkgmatch['arps']+" ) AND NOT ( "+pkgmatch['ccsm']+" ) AND NOT ( "+pkgmatch['grib']+" ) AND NOT ( "+pkgmatch['hsi']+" ) AND NOT ( "+pkgmatch['hy3s']+" ) AND NOT ( "+pkgmatch['milc']+" ) AND NOT ( "+pkgmatch['mm5']+" ) AND NOT ( "+pkgmatch['sgf']+" ) AND NOT ( "+pkgmatch['sigma']+" ) )" - pkgmatch['vmd'] = "( script LIKE '%vmd%' AND NOT ( "+pkgmatch['amber']+" ) AND NOT ( "+pkgmatch['cpmd']+" ) AND NOT ( "+pkgmatch['cvm']+" ) AND NOT ( "+pkgmatch['gromacs']+" ) AND NOT ( "+pkgmatch['hsi']+" ) AND NOT ( "+pkgmatch['hmc']+" ) AND NOT ( "+pkgmatch['namd']+" ) AND NOT ( "+pkgmatch['pop']+" ) )" - pkgmatch['xgc'] = "( script LIKE '%xgc%' AND NOT ( "+pkgmatch['agk']+" ) AND NOT ( "+pkgmatch['hsi']+" ) )" - pkgmatch['yt'] = "( script LIKE '%yt%' AND NOT ( script LIKE '%ything%' ) AND NOT ( "+pkgmatch['amber']+" ) AND NOT ( "+pkgmatch['cactus']+" ) AND NOT ( "+pkgmatch['cdo']+" ) AND NOT ( "+pkgmatch['gen.v4']+" ) AND NOT ( "+pkgmatch['grib']+" ) AND NOT ( "+pkgmatch['grmhd']+" ) AND NOT ( "+pkgmatch['hoomd']+" ) AND NOT ( "+pkgmatch['hsi']+" ) AND NOT ( "+pkgmatch['hy3s']+" ) AND NOT ( "+pkgmatch['lammps']+" ) AND NOT ( "+pkgmatch['lfm']+" ) AND NOT ( "+pkgmatch['matlab']+" ) AND NOT ( "+pkgmatch['namd']+" ) AND NOT ( "+pkgmatch['nwchem']+" ) AND NOT ( "+pkgmatch['pop']+" ) AND NOT ( "+pkgmatch['python']+" ) AND NOT ( "+pkgmatch['stata']+" ) AND NOT ( "+pkgmatch['sses']+" ) AND NOT ( "+pkgmatch['sord']+" ) AND NOT ( "+pkgmatch['swift']+" ) AND NOT ( "+pkgmatch['sus']+" ) AND NOT ( "+pkgmatch['vasp']+" ) AND NOT ( "+pkgmatch['vorpal']+" ) )" + pkgmatch["agk"] = ( + "( script LIKE '%agk%' AND NOT ( " + + pkgmatch["hsi"] + + " ) AND NOT ( " + + pkgmatch["mhd_1"] + + " ) )" + ) + pkgmatch["ansys"] = ( + "( ( script LIKE '%ansys' OR ( software IS NOT NULL AND software LIKE '%ansys%' ) ) AND NOT ( " + + pkgmatch["fluent"] + + " ) )" + ) + pkgmatch["amber"] = ( + "( ( script LIKE '%amber%' OR script LIKE '%sander%' OR script LIKE '%pmemd%' OR script LIKE '%sviol%' OR script LIKE '%SingleJob%' OR script LIKE '%MINJob%' OR script LIKE '%run_md_mpi.csh%' ) AND NOT ( " + + pkgmatch["cctm"] + + " ) AND NOT ( " + + pkgmatch["cvm"] + + " ) AND NOT ( " + + pkgmatch["idl"] + + " ) AND NOT ( " + + pkgmatch["qmc"] + + " ) AND NOT ( " + + pkgmatch["sigma"] + + " ) AND NOT ( " + + pkgmatch["tantalus"] + + " ) AND NOT ( " + + pkgmatch["tfe"] + + " ) )" + ) + pkgmatch["arps"] = ( + "( script LIKE '%arps%' AND NOT ( " + + pkgmatch["adf"] + + " ) AND NOT ( " + + pkgmatch["hsi"] + + " ) AND NOT ( " + + pkgmatch["matlab"] + + " ) )" + ) + pkgmatch["bam"] = ( + "( script LIKE '%bam%' AND NOT ( " + + pkgmatch["bismark"] + + " ) AND NOT ( " + + pkgmatch["cufflinks"] + + " ) AND NOT ( " + + pkgmatch["samtools"] + + " ) )" + ) + pkgmatch["bismark"] = ( + "( script LIKE '%bismark%' AND NOT ( " + pkgmatch["samtools"] + " ) )" + ) + pkgmatch["bugget"] = ( + "( script LIKE '%bugget%' AND NOT ( " + + pkgmatch["halo"] + + " ) AND NOT ( " + + pkgmatch["simpleio"] + + " ) )" + ) + pkgmatch["cactus"] = ( + "( script LIKE '%cactus%' AND NOT ( " + + pkgmatch["cdo"] + + " ) AND NOT ( " + + pkgmatch["simfactory"] + + " ) )" + ) + pkgmatch["cam"] = ( + "( script LIKE '%cam%' AND NOT ( " + + pkgmatch["amber"] + + " ) AND NOT ( " + + pkgmatch["grads"] + + " ) AND NOT ( " + + pkgmatch["hsi"] + + " ) )" + ) + pkgmatch["ccsm"] = ( + "( ( script LIKE '%ccsm%' OR script LIKE '%cpl%csim%clm%pop%cam%' ) AND NOT ( " + + pkgmatch["cam"] + + " ) AND NOT ( " + + pkgmatch["hsi"] + + " ) AND NOT ( " + + pkgmatch["mm5"] + + " ) AND NOT ( " + + pkgmatch["swift"] + + " ) )" + ) + pkgmatch["charmm"] = ( + "( script LIKE '%charmm%' AND NOT ( " + pkgmatch["chemshell"] + " ) )" + ) + pkgmatch["cpmd"] = ( + "( script LIKE '%cpmd%' AND NOT ( " + + pkgmatch["a_out"] + + " ) AND NOT ( " + + pkgmatch["amber"] + + " ) AND NOT ( " + + pkgmatch["inca"] + + " ) AND NOT ( " + + pkgmatch["vasp"] + + " ) )" + ) + pkgmatch["cvm"] = ( + "( script LIKE '%cvm%' AND NOT ( " + + pkgmatch["cpmd"] + + " ) AND NOT ( " + + pkgmatch["psolve"] + + " ) )" + ) + pkgmatch["ddmax"] = ( + "( script LIKE '%DDMAX%' AND NOT ( " + pkgmatch["maxsolve"] + " ) )" + ) + pkgmatch["eden"] = "( script LIKE '%eden%' AND NOT ( " + pkgmatch["matlab"] + " ) )" + pkgmatch["enzo"] = "( script LIKE '%enzo%' AND NOT ( " + pkgmatch["rtp"] + " ) )" + pkgmatch["f-plane"] = ( + "( script LIKE '%f-plane%' AND NOT ( " + pkgmatch["hsi"] + " ) )" + ) + pkgmatch["foam"] = "( script LIKE '%foam%' AND NOT ( " + pkgmatch["lsdyna"] + " ) )" + pkgmatch["gadget"] = ( + "( script LIKE '%gadget%' AND NOT ( " + pkgmatch["hsi"] + " ) )" + ) + pkgmatch["gdl"] = "( script LIKE '%gdl%' AND NOT ( " + pkgmatch["rotbouss"] + " ) )" + pkgmatch["grib"] = ( + "( script LIKE '%grib%' AND NOT ( " + + pkgmatch["cdo"] + + " ) AND NOT ( " + + pkgmatch["mm5"] + + " ) AND NOT ( " + + pkgmatch["sgf"] + + " ) AND NOT ( " + + pkgmatch["sigma"] + + " ) )" + ) + pkgmatch["gromacs"] = ( + "( ( script LIKE '%gromacs%' OR script LIKE '%grompp%' OR script LIKE '%mdrun%' OR script LIKE '%rgmx%' ) AND NOT ( " + + pkgmatch["amber"] + + " ) AND NOT ( " + + pkgmatch["cpmd"] + + " ) AND NOT ( " + + pkgmatch["sigma"] + + " ) AND NOT ( " + + pkgmatch["tantalus"] + + " ) )" + ) + pkgmatch["gtc"] = ( + "( ( script LIKE '%gtc%' OR script LIKE '%gts%' ) AND NOT ( " + + pkgmatch["cctm"] + + " ) AND NOT ( " + + pkgmatch["cutadapt"] + + " ) AND NOT ( " + + pkgmatch["pmcl3d"] + + " ) )" + ) + pkgmatch["halo"] = ( + "( script LIKE '%halo%' AND NOT ( " + + pkgmatch["enzo"] + + " ) AND NOT ( " + + pkgmatch["gadget"] + + " ) AND NOT ( " + + pkgmatch["hsi"] + + " ) AND NOT ( " + + pkgmatch["simpleio"] + + " ) AND NOT ( " + + pkgmatch["yt"] + + " ) )" + ) + pkgmatch["hfb"] = "( script LIKE '%hfb%' AND NOT ( " + pkgmatch["vbc"] + " ) )" + pkgmatch["hfodd"] = "( script LIKE '%hfodd%' AND NOT ( " + pkgmatch["cdo"] + " ) )" + pkgmatch["hmc"] = ( + "( script LIKE '%hmc%' AND NOT ( " + + pkgmatch["chroma"] + + " ) AND NOT ( " + + pkgmatch["gadget"] + + " ) AND NOT ( " + + pkgmatch["nplqcd"] + + " ) AND NOT ( " + + pkgmatch["tantalus"] + + " ) AND NOT ( " + + pkgmatch["terachem"] + + " ) )" + ) + pkgmatch["hsphere"] = ( + "( script LIKE '%hsphere%' AND NOT ( " + pkgmatch["lfm"] + " ) )" + ) + pkgmatch["hy3s"] = ( + "( ( script LIKE '%SSA%' OR script LIKE '%HyJCMSS-%' ) AND NOT ( " + + pkgmatch["arps"] + + " ) AND NOT ( " + + pkgmatch["amber"] + + " ) AND NOT ( " + + pkgmatch["bugget"] + + " ) AND NOT ( " + + pkgmatch["cactus"] + + " ) AND NOT ( " + + pkgmatch["cam"] + + " ) AND NOT ( " + + pkgmatch["ccsm"] + + " ) AND NOT ( " + + pkgmatch["cdo"] + + " ) AND NOT ( " + + pkgmatch["charmm"] + + " ) AND NOT ( " + + pkgmatch["energyplus"] + + " ) AND NOT ( " + + pkgmatch["enzo"] + + " ) AND NOT ( " + + pkgmatch["grmhd"] + + " ) AND NOT ( " + + pkgmatch["halo"] + + " ) AND NOT ( " + + pkgmatch["hchbm"] + + " ) AND NOT ( " + + pkgmatch["hsi"] + + " ) AND NOT ( " + + pkgmatch["milc"] + + " ) AND NOT ( " + + pkgmatch["ncl"] + + " ) AND NOT ( " + + pkgmatch["nwchem"] + + " ) AND NOT ( " + + pkgmatch["simpleio"] + + " ) AND NOT ( " + + pkgmatch["sses"] + + " ) AND NOT ( " + + pkgmatch["tfe"] + + " ) )" + ) + pkgmatch["ifs"] = ( + "( script LIKE '%ifsMASTER%' AND NOT ( " + + pkgmatch["cdp"] + + " ) AND NOT ( " + + pkgmatch["hsi"] + + " ) )" + ) + pkgmatch["inca"] = "( script LIKE '%inca%' AND NOT ( " + pkgmatch["vasp"] + " ) )" + pkgmatch["ior"] = ( + "( script LIKE '%ior%' AND NOT ( username LIKE '%ior%' ) AND NOT ( script LIKE '%prior%' ) AND NOT ( " + + pkgmatch["a_out"] + + " ) AND NOT ( " + + pkgmatch["cam"] + + " ) AND NOT ( " + + pkgmatch["lammps"] + + " ) AND NOT ( " + + pkgmatch["swift"] + + " ) )" + ) + pkgmatch["meep"] = "( script LIKE '%meep%' AND NOT ( " + pkgmatch["sigma"] + " ) )" + pkgmatch["milc"] = ( + "( ( script LIKE '%milc%' OR script LIKE '%su3_%' OR script LIKE '%switch%.csh%' ) AND NOT ( " + + pkgmatch["nicam"] + + " ) AND NOT ( " + + pkgmatch["hmc"] + + " ) )" + ) + pkgmatch["measurements"] = ( + " ( script LIKE '%measurements%' ) AND NOT ( " + pkgmatch["milc"] + " )" + ) + pkgmatch["nag"] = ( + "( script LIKE '%nag%' AND NOT ( " + + pkgmatch["cctm"] + + " ) AND NOT ( " + + pkgmatch["mpi_helium"] + + " ) )" + ) + pkgmatch["namd"] = ( + "( ( script LIKE '%namd%' OR script LIKE '%md.sh%' OR SCRIPT LIKE '%rem_mono_npt4.sh%') AND NOT ( " + + pkgmatch["a_out"] + + " ) AND NOT ( " + + pkgmatch["amber"] + + " ) AND NOT ( " + + pkgmatch["cactus"] + + " ) AND NOT ( " + + pkgmatch["charmm"] + + " ) AND NOT ( " + + pkgmatch["cdo"] + + " ) AND NOT ( " + + pkgmatch["gromacs"] + + " ) AND NOT ( " + + pkgmatch["hsi"] + + " ) AND NOT ( " + + pkgmatch["hmc"] + + " ) AND NOT ( " + + pkgmatch["hy3s"] + + " ) AND NOT ( " + + pkgmatch["ior"] + + " ) )" + ) + pkgmatch["ncl"] = ( + "( script LIKE '%ncl%' AND NOT ( script LIKE '%include%' ) AND NOT ( " + + pkgmatch["cam"] + + " ) AND NOT ( " + + pkgmatch["gen.v4"] + + " ) AND NOT ( " + + pkgmatch["grmhd"] + + " ) AND NOT ( " + + pkgmatch["swift"] + + " ) )" + ) + pkgmatch["nested"] = ( + "( script LIKE '%nested%' AND NOT ( " + + pkgmatch["enzo"] + + " ) AND NOT ( " + + pkgmatch["grib"] + + " ) )" + ) + pkgmatch["nicam"] = ( + "( script LIKE '%nicam%' AND NOT ( " + + pkgmatch["cam"] + + " ) AND NOT ( " + + pkgmatch["grads"] + + " ) AND NOT ( " + + pkgmatch["hsi"] + + " ) AND NOT ( " + + pkgmatch["hy3s"] + + " ) )" + ) + pkgmatch["npb"] = ( + "( script LIKE '%npb%' AND NOT ( script LIKE '%npbs.%' ) AND NOT ( script LIKE '%snsnpb%' ) AND NOT ( " + + pkgmatch["milc"] + + " ) AND NOT ( " + + pkgmatch["vorpal"] + + " ) )" + ) + pkgmatch["omen"] = ( + "( script LIKE '%omen%' AND NOT ( " + + pkgmatch["cam"] + + " ) AND NOT ( " + + pkgmatch["GreenSolver"] + + " ) AND NOT ( " + + pkgmatch["milc"] + + " ) )" + ) + pkgmatch["overlap"] = ( + "( script LIKE '%overlap_%' AND NOT ( " + + pkgmatch["grib"] + + " ) AND NOT ( " + + pkgmatch["hfb"] + + " ) AND NOT ( " + + pkgmatch["nicam"] + + " ) AND NOT ( " + + pkgmatch["simfactory"] + + " ) )" + ) + pkgmatch["paratec"] = ( + "( script LIKE '%paratec%' AND NOT ( " + pkgmatch["sigma"] + " ) )" + ) + pkgmatch["pcg"] = ( + "( script LIKE '%pcg%' AND script NOT LIKE '%request%' AND NOT ( " + + pkgmatch["gen.v4"] + + " ) )" + ) + pkgmatch["pop"] = ( + "( script LIKE '%pop%' AND NOT ( " + + pkgmatch["amber"] + + " ) AND NOT ( " + + pkgmatch["cam"] + + " ) AND NOT ( " + + pkgmatch["ccsm"] + + " ) AND NOT ( " + + pkgmatch["cp2k"] + + " ) AND NOT ( " + + pkgmatch["charmm"] + + " ) AND NOT ( " + + pkgmatch["grib"] + + " ) AND NOT ( " + + pkgmatch["gromacs"] + + " ) AND NOT ( " + + pkgmatch["hmc"] + + " ) AND NOT ( " + + pkgmatch["namd"] + + " ) AND NOT ( " + + pkgmatch["nwchem"] + + " ) AND NOT ( " + + pkgmatch["run_im"] + + " ) AND NOT ( " + + pkgmatch["sses"] + + " ) )" + ) + pkgmatch["propagators"] = ( + "( script LIKE '%propagators%' AND NOT ( " + pkgmatch["milc"] + " ) )" + ) + pkgmatch["python"] = ( + "( ( script LIKE '%python%' OR script LIKE '%jython%' ) AND NOT ( " + + pkgmatch["hoomd"] + + " ) )" + ) + pkgmatch["qb"] = ( + "( script LIKE '%qb%' AND NOT ( " + + pkgmatch["hfb"] + + " ) AND NOT ( " + + pkgmatch["milc"] + + " ) AND NOT ( " + + pkgmatch["amber"] + + " ) )" + ) + pkgmatch["qrpacc"] = ( + "( script LIKE '%qrpacc%' AND NOT ( " + pkgmatch["vbc"] + " ) )" + ) + pkgmatch["quest"] = ( + "( script LIKE '%quest%' AND script NOT LIKE '%request%' AND NOT ( " + + pkgmatch["gen.v4"] + + " ) )" + ) + pkgmatch["radhyd"] = ( + "( script LIKE '%radhyd%' AND NOT ( " + pkgmatch["chimera"] + " ) )" + ) + pkgmatch["run_im"] = ( + "( script LIKE '%run_im%' AND NOT ( " + + pkgmatch["aims"] + + " ) AND NOT ( " + + pkgmatch["flash4"] + + " ) AND NOT ( " + + pkgmatch["ncl"] + + " ) AND NOT ( " + + pkgmatch["wrf"] + + " ) )" + ) + pkgmatch["run_xyzvort"] = ( + "( ( script LIKE '%run_xvort%' OR script LIKE '%run_yvort%' OR script LIKE '%run_zvort%' OR script LIKE '%run_thpert%' OR script LIKE '%run_u%' OR script LIKE '%run_v%' OR script LIKE '%run_w%' OR script LIKE '%run_dBZ%' ) AND NOT ( " + + pkgmatch["beopest"] + + " ) AND NOT ( " + + pkgmatch["lobster"] + + " ) AND NOT ( " + + pkgmatch["paraview"] + + " ) AND NOT ( " + + pkgmatch["vasp"] + + " ) AND NOT ( " + + pkgmatch["vnc"] + + " ) AND NOT ( " + + pkgmatch["wrf"] + + " ) )" + ) + pkgmatch["s3d"] = ( + "( script LIKE '%s3d%' AND NOT ( " + + pkgmatch["adf"] + + " ) AND NOT ( " + + pkgmatch["arps"] + + " ) AND NOT ( " + + pkgmatch["cam"] + + " ) AND NOT ( " + + pkgmatch["cctm"] + + " ) )" + ) + pkgmatch["scalapack"] = ( + "( script LIKE '%scalapack%' AND NOT ( script LIKE '%#module load scalapack%' ) AND NOT ( " + + pkgmatch["mustem"] + + " ) AND NOT ( " + + pkgmatch["siesta"] + + " ) AND NOT ( " + + pkgmatch["vasp"] + + " ) )" + ) + pkgmatch["sgf"] = ( + "( script LIKE '%sgf%' AND NOT ( " + + pkgmatch["lsdyna"] + + " ) AND NOT ( " + + pkgmatch["sigma"] + + " ) )" + ) + pkgmatch["sord"] = "( script LIKE '%sord%' AND NOT ( " + pkgmatch["namd"] + " ) )" + pkgmatch["spades"] = ( + "( script LIKE '%spades%' AND NOT ( " + pkgmatch["ncbi"] + " ) )" + ) + pkgmatch["sses"] = ( + "( script LIKE '%sses%' AND NOT ( script LIKE '%subprocess%' ) AND NOT ( " + + pkgmatch["amber"] + + " ) AND NOT ( " + + pkgmatch["arps"] + + " ) AND NOT ( " + + pkgmatch["cactus"] + + " ) AND NOT ( " + + pkgmatch["cdo"] + + " ) AND NOT ( " + + pkgmatch["enzo"] + + " ) AND NOT ( " + + pkgmatch["grib"] + + " ) AND NOT ( " + + pkgmatch["milc"] + + " ) AND NOT ( " + + pkgmatch["namd"] + + " ) AND NOT ( " + + pkgmatch["python"] + + " ) AND NOT ( " + + pkgmatch["qb"] + + " ) AND NOT ( " + + pkgmatch["vasp"] + + " ) AND NOT ( " + + pkgmatch["vbc"] + + " ) )" + ) + pkgmatch["sus"] = ( + "( script LIKE '%sus%' AND NOT ( " + + pkgmatch["cam"] + + " ) AND NOT ( " + + pkgmatch["consensus"] + + " ) AND NOT ( " + + pkgmatch["stata"] + + " ) )" + ) + pkgmatch["tabix"] = "( script LIKE '%tabix%' AND NOT ( " + pkgmatch["vep"] + " ) )" + pkgmatch["tsc"] = ( + "( script LIKE '%tsc%' AND NOT ( " + + pkgmatch["amber"] + + " ) AND NOT ( " + + pkgmatch["arps"] + + " ) AND NOT ( " + + pkgmatch["cactus"] + + " ) AND NOT ( " + + pkgmatch["foam"] + + " ) AND NOT ( " + + pkgmatch["simfactory"] + + " ) AND NOT ( " + + pkgmatch["swift"] + + " ) )" + ) + pkgmatch["turbomole"] = ( + "( script LIKE '%turbomole%' AND NOT ( " + pkgmatch["chemshell"] + " ) )" + ) + pkgmatch["ukh2d"] = ( + "( ( script LIKE '%ukh2d%' OR script LIKE '%ukh.cxx.op%' ) AND NOT ( " + + pkgmatch["h3d"] + + " ) )" + ) + pkgmatch["wrf"] = ( + "( script LIKE '%wrf%' AND NOT ( " + + pkgmatch["arps"] + + " ) AND NOT ( " + + pkgmatch["ccsm"] + + " ) AND NOT ( " + + pkgmatch["grib"] + + " ) AND NOT ( " + + pkgmatch["hsi"] + + " ) AND NOT ( " + + pkgmatch["hy3s"] + + " ) AND NOT ( " + + pkgmatch["milc"] + + " ) AND NOT ( " + + pkgmatch["mm5"] + + " ) AND NOT ( " + + pkgmatch["sgf"] + + " ) AND NOT ( " + + pkgmatch["sigma"] + + " ) )" + ) + pkgmatch["vmd"] = ( + "( script LIKE '%vmd%' AND NOT ( " + + pkgmatch["amber"] + + " ) AND NOT ( " + + pkgmatch["cpmd"] + + " ) AND NOT ( " + + pkgmatch["cvm"] + + " ) AND NOT ( " + + pkgmatch["gromacs"] + + " ) AND NOT ( " + + pkgmatch["hsi"] + + " ) AND NOT ( " + + pkgmatch["hmc"] + + " ) AND NOT ( " + + pkgmatch["namd"] + + " ) AND NOT ( " + + pkgmatch["pop"] + + " ) )" + ) + pkgmatch["xgc"] = ( + "( script LIKE '%xgc%' AND NOT ( " + + pkgmatch["agk"] + + " ) AND NOT ( " + + pkgmatch["hsi"] + + " ) )" + ) + pkgmatch["yt"] = ( + "( script LIKE '%yt%' AND NOT ( script LIKE '%ything%' ) AND NOT ( " + + pkgmatch["amber"] + + " ) AND NOT ( " + + pkgmatch["cactus"] + + " ) AND NOT ( " + + pkgmatch["cdo"] + + " ) AND NOT ( " + + pkgmatch["gen.v4"] + + " ) AND NOT ( " + + pkgmatch["grib"] + + " ) AND NOT ( " + + pkgmatch["grmhd"] + + " ) AND NOT ( " + + pkgmatch["hoomd"] + + " ) AND NOT ( " + + pkgmatch["hsi"] + + " ) AND NOT ( " + + pkgmatch["hy3s"] + + " ) AND NOT ( " + + pkgmatch["lammps"] + + " ) AND NOT ( " + + pkgmatch["lfm"] + + " ) AND NOT ( " + + pkgmatch["matlab"] + + " ) AND NOT ( " + + pkgmatch["namd"] + + " ) AND NOT ( " + + pkgmatch["nwchem"] + + " ) AND NOT ( " + + pkgmatch["pop"] + + " ) AND NOT ( " + + pkgmatch["python"] + + " ) AND NOT ( " + + pkgmatch["stata"] + + " ) AND NOT ( " + + pkgmatch["sses"] + + " ) AND NOT ( " + + pkgmatch["sord"] + + " ) AND NOT ( " + + pkgmatch["swift"] + + " ) AND NOT ( " + + pkgmatch["sus"] + + " ) AND NOT ( " + + pkgmatch["vasp"] + + " ) AND NOT ( " + + pkgmatch["vorpal"] + + " ) )" + ) # packages with dependencies on packages that have dependencies... - pkgmatch['gaussian'] = "( script LIKE '%module load gaussian%' OR ( script LIKE '%g16%' AND NOT ( ( "+pkgmatch['adda']+" ) OR ( "+pkgmatch['adf']+" ) OR ( "+pkgmatch['dirac-pilot']+" ) OR ( "+pkgmatch['lammps']+" ) OR ( "+pkgmatch['starccm']+" ) ) ) OR script LIKE '%g09%' OR script LIKE '%g03%' OR ( script LIKE '%g98%' AND NOT ( ( "+pkgmatch['dirac-pilot']+" ) OR ( "+pkgmatch['python']+" ) OR ( "+pkgmatch['raxml']+" ) OR ( "+pkgmatch['samtools']+" ) OR ( "+pkgmatch['starccm']+" ) OR ( "+pkgmatch['tsutil']+" ) OR ( "+pkgmatch['vnc']+" ) OR script LIKE '%config98.WS%' ) ) )" - pkgmatch['R'] = "( ( script LIKE '%\nR %' OR script LIKE '%Rscript %' OR script LIKE '%RMPI%' OR script LIKE '%module load R\n%' OR script LIKE '%module load R/%' OR script LIKE '%ml R/%' ) AND NOT ( "+pkgmatch['gaussian']+" ) AND NOT ( "+pkgmatch['adf']+" ) )" - pkgmatch['res'] = "( script LIKE '%/res_%' AND NOT ( "+pkgmatch['enzo']+" ) AND NOT ( "+pkgmatch['grib']+" ) AND NOT ( "+pkgmatch['lammps']+" ) AND NOT ( "+pkgmatch['spheres3']+" ) AND NOT ( "+pkgmatch['tsutil']+" ) )" + pkgmatch["gaussian"] = ( + "( script LIKE '%module load gaussian%' OR ( script LIKE '%g16%' AND NOT ( ( " + + pkgmatch["adda"] + + " ) OR ( " + + pkgmatch["adf"] + + " ) OR ( " + + pkgmatch["dirac-pilot"] + + " ) OR ( " + + pkgmatch["lammps"] + + " ) OR ( " + + pkgmatch["starccm"] + + " ) ) ) OR script LIKE '%g09%' OR script LIKE '%g03%' OR ( script LIKE '%g98%' AND NOT ( ( " + + pkgmatch["dirac-pilot"] + + " ) OR ( " + + pkgmatch["python"] + + " ) OR ( " + + pkgmatch["raxml"] + + " ) OR ( " + + pkgmatch["samtools"] + + " ) OR ( " + + pkgmatch["starccm"] + + " ) OR ( " + + pkgmatch["tsutil"] + + " ) OR ( " + + pkgmatch["vnc"] + + " ) OR script LIKE '%config98.WS%' ) ) )" + ) + pkgmatch["R"] = ( + "( ( script LIKE '%\nR %' OR script LIKE '%Rscript %' OR script LIKE '%RMPI%' OR script LIKE '%module load R\n%' OR script LIKE '%module load R/%' OR script LIKE '%ml R/%' ) AND NOT ( " + + pkgmatch["gaussian"] + + " ) AND NOT ( " + + pkgmatch["adf"] + + " ) )" + ) + pkgmatch["res"] = ( + "( script LIKE '%/res_%' AND NOT ( " + + pkgmatch["enzo"] + + " ) AND NOT ( " + + pkgmatch["grib"] + + " ) AND NOT ( " + + pkgmatch["lammps"] + + " ) AND NOT ( " + + pkgmatch["spheres3"] + + " ) AND NOT ( " + + pkgmatch["tsutil"] + + " ) )" + ) return pkgmatch @@ -940,69 +1599,68 @@ singlepkg = None indexhack = False syslog = False -db = pbsacct.pbsacctDB(host=dbhost,db=dbname,dbuser=dbuser,dbpasswd=dbpasswd) -if ( not os.environ.has_key("PBSTOOLS_DIR") ): +db = pbsacct.pbsacctDB(host=dbhost, db=dbname, dbuser=dbuser, dbpasswd=dbpasswd) +if "PBSTOOLS_DIR" not in os.environ: os.environ["PBSTOOLS_DIR"] = "/usr/local" -cfgfile = os.environ["PBSTOOLS_DIR"]+"/etc/pbsacctdb.cfg" -if ( os.path.exists(cfgfile) ): +cfgfile = os.environ["PBSTOOLS_DIR"] + "/etc/pbsacctdb.cfg" +if os.path.exists(cfgfile): db.readConfigFile(cfgfile) # command line processing try: - opts, filelist = getopt.getopt(sys.argv[1:], - "C:D:E:FHILMTP:S::a:c:d:h:lp:s:t:u:w:", - ["help", - "list"]) -except getopt.GetoptError, err: - sys.stderr.write(str(err)+"\n") + opts, filelist = getopt.getopt( + sys.argv[1:], "C:D:E:FHILMTP:S::a:c:d:h:lp:s:t:u:w:", ["help", "list"] + ) +except getopt.GetoptError as err: + sys.stderr.write(str(err) + "\n") usage() for opt in opts: - if ( opt[0]=="-C" ): + if opt[0] == "-C": db.setConfigTable(opt[1]) - elif ( opt[0]=="-D" ): + elif opt[0] == "-D": db.setType(opt[1]) - elif ( opt[0]=="-E" ): + elif opt[0] == "-E": enddate = opt[1] - elif ( opt[0]=="-F" ): + elif opt[0] == "-F": fullreindex = True - elif ( opt[0] in ["-H","--help"] ): + elif opt[0] in ["-H", "--help"]: usage() - elif ( opt[0] in ["-I"] ): + elif opt[0] in ["-I"]: indexhack = True - elif ( opt[0]=="-L" ): + elif opt[0] == "-L": syslog = True - elif ( opt[0] in ["--list"] ): + elif opt[0] in ["--list"]: listmode = True - elif ( opt[0]=="-M" ): + elif opt[0] == "-M": domultimatch = True - elif ( opt[0]=="-P" ): + elif opt[0] == "-P": singlepkg = opt[1] - elif ( opt[0]=="-S" ): + elif opt[0] == "-S": startdate = opt[1] - elif ( opt[0]=="-T" ): + elif opt[0] == "-T": syntaxcheck = True - elif ( opt[0]=="-a" ): + elif opt[0] == "-a": append_to_jobid_in_db = opt[1] - elif ( opt[0]=="-c" ): + elif opt[0] == "-c": db.readConfigFile(opt[1]) - elif ( opt[0]=="-d" ): + elif opt[0] == "-d": db.setName(opt[1]) - elif ( opt[0]=="-h" ): + elif opt[0] == "-h": db.setSystem(opt[1]) - elif ( opt[0]=="-l" ): + elif opt[0] == "-l": syslog = False - elif ( opt[0]=="-p" ): + elif opt[0] == "-p": db.setPassword(opt[1]) - elif ( opt[0]=="-s" ): + elif opt[0] == "-s": db.setServerName(opt[1]) - elif ( opt[0]=="-t" ): + elif opt[0] == "-t": db.setJobsTable(opt[1]) - elif ( opt[0]=="-u" ): + elif opt[0] == "-u": db.setUser(opt[1]) - elif ( opt[0]=="-w" ): + elif opt[0] == "-w": db.setSoftwareTable(opt[1]) -if ( system is None and db.getSystem() is not None ): +if system is None and db.getSystem() is not None: system = db.getSystem() # configure logging @@ -1010,24 +1668,24 @@ logger = pbsacct.getLogger() logger.setLevel(logging.WARNING) lh = logging.StreamHandler(sys.stderr) -if ( syslog ): - lh = logging.handlers.SysLogHandler(address='/dev/log') -lh.setFormatter(logging.Formatter('[%(asctime)s] %(levelname)s: %(message)s')) +if syslog: + lh = logging.handlers.SysLogHandler(address="/dev/log") +lh.setFormatter(logging.Formatter("[%(asctime)s] %(levelname)s: %(message)s")) logger.addHandler(lh) # get lists of sw pkg patterns pkglist = software_list() -if ( singlepkg is not None and singlepkg not in ['*','%'] ): +if singlepkg is not None and singlepkg not in ["*", "%"]: pkglist = singlepkg.split(",") pkgmatch = software_match_list() -if ( listmode ): +if listmode: # list all the package rules and exit for pkg in pkglist: - if ( pkg in pkgmatch ): - print "%s = \"%s\"" % (pkg,pkgmatch[pkg]) + if pkg in pkgmatch: + print('%s = "%s"' % (pkg, pkgmatch[pkg])) else: - print "%s = UNKNOWN" % (pkg) + print("%s = UNKNOWN" % (pkg)) sys.exit(0) # connect to DB @@ -1035,53 +1693,69 @@ dbconn = db.connect() cursor = dbconn.cursor() use_index = "" -if ( indexhack ): +if indexhack: use_index = "USE INDEX (sw_app_jobs)" - if ( startdate is not None or enddate is not None ): + if startdate is not None or enddate is not None: use_index = "USE INDEX (system_start_jobs)" -if ( domultimatch ): +if domultimatch: # do multi-match analysis done = {} for pkg1 in pkglist: for pkg2 in pkglist: - if ( pkg1!=pkg2 and pkg1+":"+pkg2 not in done ): - sql = "SELECT '%s' AS pkg1, '%s' AS pkg2, COUNT(jobid) AS jobs FROM Jobs %s WHERE ( ( sw_app='%s' AND %s ) OR ( sw_app='%s' AND %s ) )" % (pkg1,pkg2,use_index,pkg1,pkgmatch[pkg2],pkg2,pkgmatch[pkg1]) - if ( system is not None ): + if pkg1 != pkg2 and pkg1 + ":" + pkg2 not in done: + sql = ( + "SELECT '%s' AS pkg1, '%s' AS pkg2, COUNT(jobid) AS jobs FROM Jobs %s WHERE ( ( sw_app='%s' AND %s ) OR ( sw_app='%s' AND %s ) )" + % ( + pkg1, + pkg2, + use_index, + pkg1, + pkgmatch[pkg2], + pkg2, + pkgmatch[pkg1], + ) + ) + if system is not None: sql += " AND system='%s'" % system - if ( startdate is not None ): + if startdate is not None: sql += " AND ( start_date>='%s' )" % startdate - if ( enddate is not None ): + if enddate is not None: sql += " AND ( start_date<='%s' )" % enddate - #print sql + # print sql cursor.execute(sql) results = cursor.fetchall() for result in results: - if ( result[2]>0 ): - print "%-12s\t%-12s\t%d" % result - done[pkg1+":"+pkg2] = True - done[pkg2+":"+pkg1] = True + if result[2] > 0: + print("%-12s\t%-12s\t%d" % result) + done[pkg1 + ":" + pkg2] = True + done[pkg2 + ":" + pkg1] = True else: for pkg in pkglist: - if ( syntaxcheck ): - sql = "SELECT COUNT(jobid) FROM Jobs %s WHERE script IS NOT NULL" % use_index + if syntaxcheck: + sql = ( + "SELECT COUNT(jobid) FROM Jobs %s WHERE script IS NOT NULL" % use_index + ) else: - sql = "UPDATE Jobs %s SET sw_app='%s' WHERE script IS NOT NULL" % (use_index,pkg) - if ( not fullreindex ): + sql = "UPDATE Jobs %s SET sw_app='%s' WHERE script IS NOT NULL" % ( + use_index, + pkg, + ) + if not fullreindex: sql += " AND sw_app IS NULL" - if ( system is not None ): + if system is not None: sql += " AND system LIKE '%s'" % system - if ( startdate is not None ): + if startdate is not None: sql += " AND ( start_date>='%s' )" % startdate - if ( enddate is not None ): + if enddate is not None: sql += " AND ( start_date<='%s' )" % enddate - sql += " AND "+pkgmatch[pkg] + sql += " AND " + pkgmatch[pkg] try: cursor.execute(sql) - if ( not syntaxcheck ): + if not syntaxcheck: db.commit() except Exception as e: - logger.error("package \"%s\": %s" % (pkg,str(e))) + logger.error('package "%s": %s' % (pkg, str(e))) logger.debug("%s" % sql) dbconn.close() diff --git a/sbin/transform-accounting-log b/sbin/transform-accounting-log index edde331..b947d0c 100755 --- a/sbin/transform-accounting-log +++ b/sbin/transform-accounting-log @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Usage: transform-accounting-log [options] import datetime @@ -11,7 +11,9 @@ import sys def usage(): - sys.stderr.write("Usage: transform-accounting-log [options] \n") + sys.stderr.write( + "Usage: transform-accounting-log [options] \n" + ) sys.exit(1) @@ -34,79 +36,83 @@ syslog = False # command line processing try: - opts, args = getopt.getopt(sys.argv[1:], - "A:DG:H:JLN:RU:Wdj:lt:", - ["account-map-file=", - "debug", - "deidentify", - "group-map-file=", - "host-transform=", - "jobid-adjust=", - "jobid-autorenumber", - "no-syslog", - "node-transform=", - "read-map-files", - "stderr", - "syslog", - "timestamp-adjust=", - "user-map-file=", - "write-map-files"]) -except getopt.GetoptError, err: - sys.stderr.write(str(err)+"\n") + opts, args = getopt.getopt( + sys.argv[1:], + "A:DG:H:JLN:RU:Wdj:lt:", + [ + "account-map-file=", + "debug", + "deidentify", + "group-map-file=", + "host-transform=", + "jobid-adjust=", + "jobid-autorenumber", + "no-syslog", + "node-transform=", + "read-map-files", + "stderr", + "syslog", + "timestamp-adjust=", + "user-map-file=", + "write-map-files", + ], + ) +except getopt.GetoptError as err: + sys.stderr.write(str(err) + "\n") usage() for opt in opts: - if ( opt[0] in ["-A","--account-map-file"] ): + if opt[0] in ["-A", "--account-map-file"]: acctmapfile = opt[1] - elif ( opt[0] in ["-D","--deidentify"] ): + elif opt[0] in ["-D", "--deidentify"]: deidentify = True - elif ( opt[0] in ["-G","--group-map-file"] ): + elif opt[0] in ["-G", "--group-map-file"]: groupmapfile = opt[1] - elif ( opt[0] in ["-H","--host-transform"] ): + elif opt[0] in ["-H", "--host-transform"]: # expect a sed style "s/old/new/" string elt = opt[1].split("/") - hostxform = [elt[1],elt[2]] - elif ( opt[0] in ["-J","--jobid-autorenumber"] ): + hostxform = [elt[1], elt[2]] + elif opt[0] in ["-J", "--jobid-autorenumber"]: autorenumber = True - elif ( opt[0] in ["-L","--syslog"] ): + elif opt[0] in ["-L", "--syslog"]: syslog = True - elif ( opt[0] in ["-N","--node-transform"] ): + elif opt[0] in ["-N", "--node-transform"]: # expect a sed style "s/old/new/" string elt = opt[1].split("/") - nodexform = [elt[1],elt[2]] - elif ( opt[0] in ["-R","--read-map-files"] ): + nodexform = [elt[1], elt[2]] + elif opt[0] in ["-R", "--read-map-files"]: readmaps = True - elif ( opt[0] in ["-U","--user-map-file"] ): + elif opt[0] in ["-U", "--user-map-file"]: usermapfile = opt[1] - elif ( opt[0] in ["-W","--write-map-files"] ): + elif opt[0] in ["-W", "--write-map-files"]: writemaps = True - elif ( opt[0] in ["-d","--debug"] ): + elif opt[0] in ["-d", "--debug"]: debug = True - elif ( opt[0] in ["-j","--jobid-adjust"] ): + elif opt[0] in ["-j", "--jobid-adjust"]: jobiddelta = int(opt[1]) - elif ( opt[0] in ["-l","--no-syslog","--stderr"] ): + elif opt[0] in ["-l", "--no-syslog", "--stderr"]: syslog = False - elif ( opt[0] in ["-t","--timestamp-adjust"] ): + elif opt[0] in ["-t", "--timestamp-adjust"]: tsdelta = int(opt[1]) -if ( len(args)!=2 ): +if len(args) != 2: usage() # configure logging logger = pbsacct.getLogger() -if ( debug ): +if debug: logger.setLevel(logging.INFO) else: logger.setLevel(logging.WARNING) lh = logging.StreamHandler(sys.stderr) -if ( syslog ): - lh = logging.handlers.SysLogHandler(address='/dev/log') -lh.setFormatter(logging.Formatter('[%(asctime)s] %(levelname)s: %(message)s')) +if syslog: + lh = logging.handlers.SysLogHandler(address="/dev/log") +lh.setFormatter(logging.Formatter("[%(asctime)s] %(levelname)s: %(message)s")) logger.addHandler(lh) # Read raw data from original file original = args[0] -if ( not os.path.exists(original) ): +if not os.path.exists(original): raise IOError("%s: File not found" % original) rawdata = pbsacct.raw_data_from_file(original) @@ -116,176 +122,183 @@ grouplist = {} acctlist = {} # Read maps, if requested -if ( readmaps ): - if ( usermapfile is not None ): - if ( os.path.exists(usermapfile) ): - if ( debug ): +if readmaps: + if usermapfile is not None: + if os.path.exists(usermapfile): + if debug: logger.info("Reading usermap file %s" % usermapfile) usermap = open(usermapfile) for line in usermap.readlines(): - if ( not line.startswith("#") ): - (key,value) = line.rstrip('\n').split("=",1) + if not line.startswith("#"): + (key, value) = line.rstrip("\n").split("=", 1) userlist[key] = value usermap.close() - if ( debug ): - logger.info("Done reading usermap file %" % usermapfile) - elif ( debug ): + if debug: + logger.info("Done reading usermap file %s" % usermapfile) + elif debug: logger.info("Skipping nonexistent groupmap file %s\n" % usermapfile) - if ( groupmapfile is not None ): - if ( os.path.exists(groupmapfile) ): - if ( debug ): + if groupmapfile is not None: + if os.path.exists(groupmapfile): + if debug: logger.info("Reading groupmap file %s" % groupmapfile) groupmap = open(groupmapfile) for line in groupmap.readlines(): - if ( not line.startswith("#") ): - (key,value) = line.rstrip('\n').split("=",1) + if not line.startswith("#"): + (key, value) = line.rstrip("\n").split("=", 1) grouplist[key] = value groupmap.close() - if ( debug ): + if debug: logger.info("Done reading groupmap file %s" % groupmapfile) - elif ( debug ): + elif debug: logger.info("Skipping nonexistent groupmap file %s" % groupmapfile) - if ( acctmapfile is not None ): - if ( os.path.exists(acctmapfile) ): - if ( debug ): + if acctmapfile is not None: + if os.path.exists(acctmapfile): + if debug: logger.info("Reading acctmap file %s" % acctmapfile) acctmap = open(acctmapfile) for line in acctmap.readlines(): - if ( not line.startswith("#") ): - (key,value) = line.rstrip('\n').split("=",1) + if not line.startswith("#"): + (key, value) = line.rstrip("\n").split("=", 1) acctlist[key] = value acctmap.close() - if ( debug ): + if debug: logger.info("Done reading acctmap file %s" % acctmapfile) - elif ( debug ): - logger.info("Skipping nonexistent acctmap file %s" % acctmapfile) + elif debug: + logger.info("Skipping nonexistent acctmap file %s" % acctmapfile) nuser = len(userlist.keys()) ngroup = len(grouplist.keys()) nacct = len(acctlist.keys()) minjobid = -1 -if ( deidentify or autorenumber ): +if deidentify or autorenumber: for record in rawdata: - if ( deidentify ): + if deidentify: for key in record[3].keys(): - if ( key=="user" and record[3][key] not in userlist ): + if key == "user" and record[3][key] not in userlist: newuser = "usr%05d" % nuser userlist[record[3][key]] = newuser - nuser = nuser+1 - elif ( key=="group" and record[3][key] not in grouplist ): + nuser = nuser + 1 + elif key == "group" and record[3][key] not in grouplist: newgroup = "grp%05d" % ngroup grouplist[record[3][key]] = newgroup - ngroup = ngroup+1 - elif ( key=="account" and record[3][key] not in acctlist ): + ngroup = ngroup + 1 + elif key == "account" and record[3][key] not in acctlist: newacct = "act%05d" % nacct acctlist[record[3][key]] = newacct - nacct = nacct+1 - elif ( key in ["owner","requestor"] ): + nacct = nacct + 1 + elif key in ["owner", "requestor"]: user = record[3][key].split("@")[0] - if ( user not in userlist ): + if user not in userlist: newuser = "usr%05d" % nuser userlist[user] = newuser - nuser = nuser+1 - if ( autorenumber ): - njobid = int(re.sub("\[[0-9]*\]","",record[0].split(".")[0])) - if ( minjobid==-1 or njobid0 ): - output += "\tctime = %s (%d)\n" % (str(self.ctime()),self.ctime_ts()) - if ( self.qtime_ts()>0 ): - output += "\tqtime = %s (%d)\n" % (str(self.qtime()),self.qtime_ts()) - if ( self.etime_ts()>0 ): - output += "\tetime = %s (%d)\n" % (str(self.etime()),self.etime_ts()) - if ( self.start_ts()>0 ): - output += "\tstart = %s (%d)\n" % (str(self.start()),self.start_ts()) - if ( self.end_ts()>0 ): - output += "\tend = %s (%d)\n" % (str(self.end()),self.end_ts()) - if ( self.start_count()>0 ): + if self.ctime_ts() > 0: + output += "\tctime = %s (%d)\n" % (str(self.ctime()), self.ctime_ts()) + if self.qtime_ts() > 0: + output += "\tqtime = %s (%d)\n" % (str(self.qtime()), self.qtime_ts()) + if self.etime_ts() > 0: + output += "\tetime = %s (%d)\n" % (str(self.etime()), self.etime_ts()) + if self.start_ts() > 0: + output += "\tstart = %s (%d)\n" % (str(self.start()), self.start_ts()) + if self.end_ts() > 0: + output += "\tend = %s (%d)\n" % (str(self.end()), self.end_ts()) + if self.start_count() > 0: output += "\tstart_count = %d\n" % self.start_count() output += "\tnproc = %d\n" % self.num_processors() - if ( self.nodes() is not None ): + if self.nodes() is not None: output += "\tnodes = %s\n" % self.nodes() output += "\tnodect = %d\n" % self.num_nodes() output += "\tnodes_used = %s\n" % str(self.nodes_used()) - if ( self.num_gpus()>0 ): + if self.num_gpus() > 0: output += "\tngpus = %d\n" % self.num_gpus() - if ( self.feature() is not None ): + if self.feature() is not None: output += "\tfeature = %s\n" % self.feature() - if ( self.gattr() is not None ): + if self.gattr() is not None: output += "\tgattr = %s\n" % self.gattr() - if ( self.gres() is not None ): + if self.gres() is not None: output += "\tgres = %s\n" % self.gres() - if ( self.software() is not None ): + if self.software() is not None: output += "\tsoftware = %s\n" % self.software() - if ( self.other() is not None ): + if self.other() is not None: output += "\tother = %s\n" % self.other() - if ( self.mem_used_kb()>0 ): + if self.mem_used_kb() > 0: output += "\tmem_used (kb) = %d\n" % self.mem_used_kb() output += "\tmem_limit (kb) = %d\n" % self.mem_limit_kb() - if ( self.vmem_used_kb()>0 ): + if self.vmem_used_kb() > 0: output += "\tvmem_used (kb) = %d\n" % self.vmem_used_kb() output += "\tvmem_limit (kb) = %d\n" % self.vmem_limit_kb() - if ( self.walltime_used_sec()>0 ): - output += "\twalltime_used = %s (%d)\n" % (sec_to_time(self.walltime_used_sec()),self.walltime_used_sec()) - output += "\twalltime_limit = %s (%d)\n" % (sec_to_time(self.walltime_limit_sec()),self.walltime_limit_sec()) - if ( self.cput_used_sec()>0 ): - output += "\tcput_used = %s (%d)\n" % (sec_to_time(self.cput_used_sec()),self.cput_used_sec()) - output += "\tcput_limit = %s (%d)\n" % (sec_to_time(self.cput_limit_sec()),self.cput_limit_sec()) - if ( self.exit_status() is not None ): + if self.walltime_used_sec() > 0: + output += "\twalltime_used = %s (%d)\n" % ( + sec_to_time(self.walltime_used_sec()), + self.walltime_used_sec(), + ) + output += "\twalltime_limit = %s (%d)\n" % ( + sec_to_time(self.walltime_limit_sec()), + self.walltime_limit_sec(), + ) + if self.cput_used_sec() > 0: + output += "\tcput_used = %s (%d)\n" % ( + sec_to_time(self.cput_used_sec()), + self.cput_used_sec(), + ) + output += "\tcput_limit = %s (%d)\n" % ( + sec_to_time(self.cput_limit_sec()), + self.cput_limit_sec(), + ) + if self.exit_status() is not None: output += "\texit_status = %d\n" % self.exit_status() output += "}" return output @@ -138,13 +168,13 @@ def get_update_time(self): def get_update_time_ts(self): return int(self._updatetime.strftime("%s")) - def set_update_time(self,update_time): - self._updatetime = datetime.datetime.strptime(update_time,self._updatetimefmt) + def set_update_time(self, update_time): + self._updatetime = datetime.datetime.strptime(update_time, self._updatetimefmt) def get_state(self): return self._state - def set_state(self,state): + def set_state(self, state): self._state = state def get_resources(self): @@ -153,33 +183,32 @@ def get_resources(self): def get_resource_keys(self): return self._resources.keys() - def get_resource(self,key): - if ( self.has_resource(key) ): + def get_resource(self, key): + if self.has_resource(key): return self._resources[key] else: return None - def set_resource(self,key,value): + def set_resource(self, key, value): self._resources[key] = value - def unset_resource(self,key): - if ( key in self._resources ): + def unset_resource(self, key): + if key in self._resources: del self._resources[key] - def has_resource(self,key): + def has_resource(self, key): return key in self._resources - def add_to_resource(self,key,value): - supported_time_resources = ["resources_used.cput","resources_used.walltime"] - if ( key in supported_time_resources and - not self.has_resource(key) ): + def add_to_resource(self, key, value): + supported_time_resources = ["resources_used.cput", "resources_used.walltime"] + if key in supported_time_resources and not self.has_resource(key): self._resources[key] = value - elif ( key in supported_time_resources ): + elif key in supported_time_resources: oldval = time_to_sec(self._resources[key]) incr = time_to_sec(value) - self._resources[key] = sec_to_time(oldval+incr) + self._resources[key] = sec_to_time(oldval + incr) else: - raise ValueError("Resource \""+key+"\" not supported for addition") + raise ValueError('Resource "' + key + '" not supported for addition') def jobid(self): return self._jobid @@ -187,13 +216,13 @@ def jobid(self): def numeric_jobid(self): """ Returns the numeric job id (i.e. without the hostname, if any) - + Input is of the form: 6072125.oak-batch.osc.edu Output is of the form: 6072125 """ - if ( '[' in self._jobid ): + if "[" in self._jobid: # this is an array job (###[#]), so return the "master" jobid - return int((self._jobid.split(".")[0])[0:self._jobid.index('[')]) + return int((self._jobid.split(".")[0])[0 : self._jobid.index("[")]) else: return int(self._jobid.split(".")[0]) @@ -220,75 +249,83 @@ def owner(self): def submithost(self): owner = self.owner() - if ( owner is None or - '@' not in owner or - len(owner.split('@'))>2 ): + if owner is None or "@" not in owner or len(owner.split("@")) > 2: return None else: - return owner.split('@',1)[1] + return owner.split("@", 1)[1] def ctime(self): - if ( self.has_resource("ctime") ): + if self.has_resource("ctime"): return datetime.datetime.fromtimestamp(self.ctime_ts()) else: - raise RuntimeError("Job "+self._jobid+" has no ctime set (aborted before started?)") + raise RuntimeError( + "Job " + self._jobid + " has no ctime set (aborted before started?)" + ) def qtime(self): - if ( self.has_resource("qtime") ): + if self.has_resource("qtime"): return datetime.datetime.fromtimestamp(self.qtime_ts()) else: - raise RuntimeError("Job "+self._jobid+" has no qtime set (aborted before started?)") + raise RuntimeError( + "Job " + self._jobid + " has no qtime set (aborted before started?)" + ) def etime(self): - if ( self.has_resource("etime") ): + if self.has_resource("etime"): return datetime.datetime.fromtimestamp(self.etime_ts()) else: - raise RuntimeError("Job "+self._jobid+" has no etime set (aborted before started?)") + raise RuntimeError( + "Job " + self._jobid + " has no etime set (aborted before started?)" + ) def start(self): - if ( self.has_resource("start") ): + if self.has_resource("start"): return datetime.datetime.fromtimestamp(self.start_ts()) else: - raise RuntimeError("Job "+self._jobid+" has no start time set (aborted before started?)") + raise RuntimeError( + "Job " + + self._jobid + + " has no start time set (aborted before started?)" + ) def end(self): - if ( self.has_resource("end") ): + if self.has_resource("end"): return datetime.datetime.fromtimestamp(self.end_ts()) else: - raise RuntimeError("Job "+self._jobid+" has no end time set") + raise RuntimeError("Job " + self._jobid + " has no end time set") def ctime_ts(self): - if ( self.has_resource("ctime") ): + if self.has_resource("ctime"): return int(self.get_resource("ctime")) else: return 0 def qtime_ts(self): - if ( self.has_resource("qtime") ): + if self.has_resource("qtime"): return int(self.get_resource("qtime")) else: return 0 def etime_ts(self): - if ( self.has_resource("etime") ): + if self.has_resource("etime"): return int(self.get_resource("etime")) else: return 0 def start_ts(self): - if ( self.has_resource("start") ): + if self.has_resource("start"): return int(self.get_resource("start")) else: return 0 def end_ts(self): - if ( self.has_resource("end") ): + if self.has_resource("end"): return int(self.get_resource("end")) else: return 0 def start_count(self): - if ( self.has_resource("start_count") ): + if self.has_resource("start_count"): return int(self.get_resource("start_count")) else: return 0 @@ -301,116 +338,120 @@ def tasks(self): def nodes_used(self): nodes = [] - if ( self.has_resource("exec_host") ): + if self.has_resource("exec_host"): for node_and_procs in self.get_resource("exec_host").split("+"): - (node,procs) = node_and_procs.split("/") - if ( node not in nodes ): + (node, procs) = node_and_procs.split("/") + if node not in nodes: nodes.append(node) return nodes def num_nodes(self): nnodes = 0 - if ( self.has_resource("unique_node_count")): + if self.has_resource("unique_node_count"): # Added in TORQUE 4.2.9 nnodes = int(self.get_resource("unique_node_count")) - elif ( self.has_resource("Resource_List.nodect")): + elif self.has_resource("Resource_List.nodect"): nnodes = int(self.get_resource("Resource_List.nodect")) - elif ( len(self.nodes_used())>0 ): + elif len(self.nodes_used()) > 0: nnodes = len(self.nodes_used()) - elif ( self.has_resource("Resource_List.nodes") ): + elif self.has_resource("Resource_List.nodes"): for node in self.get_resource("Resource_List.nodes").split("+"): nodes_and_ppn = node.split(":") try: n = int(nodes_and_ppn[0]) - except: + except Exception: n = 1 - nnodes = nnodes+n - elif ( self.has_resource("Resource_List.neednodes") ): + nnodes = nnodes + n + elif self.has_resource("Resource_List.neednodes"): for node in self.get_resource("Resource_List.neednodes").split("+"): nodes_and_ppn = node.split(":") try: - n = int(nodes_ppn_prop[0]) + n = int(nodes_and_ppn[0]) except ValueError: n = 1 - nnodes = nnodes+n + nnodes = nnodes + n return nnodes def num_processors(self): - """ Returns the total number of processors the job requires """ + """Returns the total number of processors the job requires""" processors = 0 - if ( self.has_resource("total_execution_slots") ): + if self.has_resource("total_execution_slots"): # Added in TORQUE 4.2.9 processors = int(self.get_resource("total_execution_slots")) - elif ( self.has_resource("Resource_List.nodes") ): + elif self.has_resource("Resource_List.nodes"): # Compute the nodes requested and the processors per node for nodelist in self.get_resource("Resource_List.nodes").split("+"): nodes_and_ppn = nodelist.split(":") try: nodes = int(nodes_and_ppn[0]) - except: + except Exception: # Handles malformed log values nodes = 1 - if ( len(nodes_and_ppn)>=2 ): + if len(nodes_and_ppn) >= 2: try: ppn = int(re.search("ppn=(\d+)", nodes_and_ppn[1]).group(1)) except AttributeError: ppn = 1 else: ppn = 1 - nodes = max(1,nodes) - ppn = max(1,ppn) - processors = processors + nodes*ppn + nodes = max(1, nodes) + ppn = max(1, ppn) + processors = processors + nodes * ppn return processors ncpus = 0 - if ( self.has_resource("Resource_List.ncpus") ): - ncpus = max(ncpus,int(self.get_resource("Resource_List.ncpus"))) - if ( self.has_resource("resources_used.ncpus") ): - ncpus = max(ncpus,int(self.get_resource("resources_used.ncpus"))) - if ( self.has_resource("Resource_List.mppssp") or - self.has_resource("resources_used.mppssp") or - self.has_resource("Resource_List.mppe") or - self.has_resource("resources_used.mppe") ): + if self.has_resource("Resource_List.ncpus"): + ncpus = max(ncpus, int(self.get_resource("Resource_List.ncpus"))) + if self.has_resource("resources_used.ncpus"): + ncpus = max(ncpus, int(self.get_resource("resources_used.ncpus"))) + if ( + self.has_resource("Resource_List.mppssp") + or self.has_resource("resources_used.mppssp") + or self.has_resource("Resource_List.mppe") + or self.has_resource("resources_used.mppe") + ): # Cray SV1/X1 specific code # These systems could gang together 4 individual processors (SSPs) # in a virtual processor (MSPs). # This is admittedly rather weird and only of historical interest. ssps = 0 - if ( self.has_resource("Resource_List.mppssp") ): + if self.has_resource("Resource_List.mppssp"): ssps = ssps + int(self.get_resource("Resource_List.mppssp")) - elif ( self.has_resource("resources_used.mppssp") ): + elif self.has_resource("resources_used.mppssp"): ssps = ssps + int(self.get_resource("resources_used.mppssp")) - if ( self.has_resource("Resource_List.mppe") ): - ssps = ssps + 4*int(self.get_resource("Resource_List.mppe")) - elif ( self.has_resource("resources_used.mppe") ): - ssps = ssps + 4*int(self.get_resource("resources_used.mppe")) - ncpus = max(ncpus,ssps) - if ( self.has_resource("Resource_List.size") ): - ncpus = max(ncpus,int(self.get_resource("Resource_List.size"))) + if self.has_resource("Resource_List.mppe"): + ssps = ssps + 4 * int(self.get_resource("Resource_List.mppe")) + elif self.has_resource("resources_used.mppe"): + ssps = ssps + 4 * int(self.get_resource("resources_used.mppe")) + ncpus = max(ncpus, ssps) + if self.has_resource("Resource_List.size"): + ncpus = max(ncpus, int(self.get_resource("Resource_List.size"))) # Return the larger of the two computed values - return max(processors,ncpus) + return max(processors, ncpus) def num_gpus(self): ngpus = 0 # sadly, there doesn't appear to be a more elegant way to do this - if ( self.nodes() is not None and "gpus=" in self.nodes() ): + if self.nodes() is not None and "gpus=" in self.nodes(): # Compute the nodes requested and the processors per node for nodelist in self.nodes().split("+"): nodes_and_props = nodelist.split(":") try: nodes = int(nodes_and_props[0]) - except: + except Exception: # Handles malformed log values nodes = 1 gpn = 0 - if ( len(nodes_and_props)>=2 ): + if len(nodes_and_props) >= 2: for nodeprop in nodes_and_props[1:]: - if ( re.match("^gpus=(\d+)$", nodeprop) ): + if re.match("^gpus=(\d+)$", nodeprop): gpn = int(re.search("^gpus=(\d+)$", nodeprop).group(1)) - nodes = max(1,nodes) - gpn = max(0,gpn) - ngpus = ngpus + nodes*gpn - elif ( self.gres() is not None and "gpus:" in self.gres() ): - ngpus = int(re.search("gpus:(\d+)",self.gres()).group(1)) + nodes = max(1, nodes) + gpn = max(0, gpn) + ngpus = ngpus + nodes * gpn + elif self.gres() is not None and "gpus:" in self.gres(): + ngpus = int(re.search("gpus:(\d+)", self.gres()).group(1)) + if self.has_resource("Resource_List.ngpus"): + ngpus = max(ngpus, int(self.get_resource("Resource_List.ngpus"))) return ngpus def feature(self): @@ -432,83 +473,83 @@ def software(self): return self.get_resource("Resource_List.software") def mem_used_kb(self): - """ Return the amount of memory (in kb) used by the job """ - if ( self.has_resource("resources_used.mem") ): + """Return the amount of memory (in kb) used by the job""" + if self.has_resource("resources_used.mem"): return mem_to_kb(self.get_resource("resources_used.mem")) else: return 0 def vmem_used_kb(self): - """ Return the amount of virtual memory (in kb) used by the job """ - if ( self.has_resource("resources_used.vmem") ): + """Return the amount of virtual memory (in kb) used by the job""" + if self.has_resource("resources_used.vmem"): return mem_to_kb(self.get_resource("resources_used.vmem")) else: return 0 def mem_limit(self): - if ( self.has_resource("Resource_List.mem") ): + if self.has_resource("Resource_List.mem"): return self.get_resource("Resource_List.mem") else: return None def vmem_limit(self): - if ( self.has_resource("Resource_List.vmem") ): + if self.has_resource("Resource_List.vmem"): return self.get_resource("Resource_List.vmem") else: return None def mem_limit_kb(self): - if ( self.has_resource("Resource_List.mem") ): + if self.has_resource("Resource_List.mem"): return mem_to_kb(self.get_resource("Resource_List.mem")) else: return 0 def vmem_limit_kb(self): - if ( self.has_resource("Resource_List.vmem") ): + if self.has_resource("Resource_List.vmem"): return mem_to_kb(self.get_resource("Resource_List.vmem")) else: return 0 def walltime_used_sec(self): - if ( self.has_resource("resources_used.walltime") ): - if ( time_to_sec(self.get_resource("resources_used.walltime"))>=0 ): + if self.has_resource("resources_used.walltime"): + if time_to_sec(self.get_resource("resources_used.walltime")) >= 0: return time_to_sec(self.get_resource("resources_used.walltime")) else: - return self.end_ts()-self.start_ts() + return self.end_ts() - self.start_ts() else: return 0 def walltime_limit_sec(self): - if ( self.has_resource("Resource_List.walltime") ): + if self.has_resource("Resource_List.walltime"): return time_to_sec(self.get_resource("Resource_List.walltime")) else: return 0 - + def cput_used_sec(self): - if ( self.has_resource("resources_used.cput") ): + if self.has_resource("resources_used.cput"): return time_to_sec(self.get_resource("resources_used.cput")) else: return 0 def cput_limit_sec(self): - if ( self.has_resource("Resource_List.cput") ): + if self.has_resource("Resource_List.cput"): return time_to_sec(self.get_resource("Resource_List.cput")) else: return 0 def energy_used(self): - if ( self.has_resource("resources_used.energy_used") ): + if self.has_resource("resources_used.energy_used"): return int(self.get_resource("resources_used.energy_used")) else: return 0 def exit_status(self): - if ( self.has_resource("Exit_status") ): + if self.has_resource("Exit_status"): return int(self.get_resource("Exit_status")) else: return None - def write_last_accounting_record(self,fd): + def write_last_accounting_record(self, fd): """ Write a the equivalent of the last accounting record for this job in the "standard" PBS format """ @@ -516,197 +557,236 @@ def write_last_accounting_record(self,fd): state = self.get_state() jobid = self.jobid() resources = self.get_resources() - record = (jobid,datestamp,state,resources) - write_record_to_accounting_log(record,fd) + record = (jobid, datestamp, state, resources) + write_record_to_accounting_log(record, fd) + class jobinfoTestCase(unittest.TestCase): - def __init__(self,methodName='runTest'): - super(jobinfoTestCase,self).__init__(methodName) + def __init__(self, methodName="runTest"): + super(jobinfoTestCase, self).__init__(methodName) # don't replicate our fake test job in every test method - self.testjob = jobinfo('123456.fakehost.lan', - '02/13/2009 18:31:30', - 'E', - {'user': 'foo', - 'group': 'bar', - 'owner': 'foo@login1.fakehost.lan', - 'jobname': 'job', - 'ctime': '1234567890', - 'qtime': '1234567890', - 'etime': '1234567890', - 'start': '1234567890', - 'end': '1234567890', - 'start_count': '1', - 'queue': 'batch', - 'Resource_List.nodes': '2:ppn=4', - 'Resource_List.cput': '2:00:00', - 'Resource_List.walltime': '1:00:00', - 'Resource_List.mem': '1GB', - 'Resource_List.vmem': '1GB', - 'resources_used.cput': '00:00:02', - 'resources_used.walltime': '00:00:01', - 'resources_used.mem': '1024kb', - 'resources_used.vmem': '2048kb', - 'exec_host': 'node01/1+node02/2', - 'exit_status': '0'}) + self.testjob = jobinfo( + "123456.fakehost.lan", + "02/13/2009 18:31:30", + "E", + { + "user": "foo", + "group": "bar", + "owner": "foo@login1.fakehost.lan", + "jobname": "job", + "ctime": "1234567890", + "qtime": "1234567890", + "etime": "1234567890", + "start": "1234567890", + "end": "1234567890", + "start_count": "1", + "queue": "batch", + "Resource_List.nodes": "2:ppn=4", + "Resource_List.cput": "2:00:00", + "Resource_List.walltime": "1:00:00", + "Resource_List.mem": "1GB", + "Resource_List.vmem": "1GB", + "resources_used.cput": "00:00:02", + "resources_used.walltime": "00:00:01", + "resources_used.mem": "1024kb", + "resources_used.vmem": "2048kb", + "exec_host": "node01/1+node02/2", + "exit_status": "0", + }, + ) + def test_eq(self): j1 = copy.deepcopy(self.testjob) j2 = copy.deepcopy(self.testjob) - self.assertEqual(j1==j2,True) + self.assertEqual(j1 == j2, True) j3 = copy.deepcopy(j1) - j3.set_resource("exit_status","-1") - self.assertEqual(j1==j3,False) + j3.set_resource("exit_status", "-1") + self.assertEqual(j1 == j3, False) + def test_numeric_jobid(self): j1 = copy.deepcopy(self.testjob) - self.assertEqual(j1.numeric_jobid(),123456) + self.assertEqual(j1.numeric_jobid(), 123456) + def test_user(self): j1 = copy.deepcopy(self.testjob) - self.assertEqual(j1.user(),"foo") + self.assertEqual(j1.user(), "foo") + def test_group(self): j1 = copy.deepcopy(self.testjob) - self.assertEqual(j1.group(),"bar") + self.assertEqual(j1.group(), "bar") + def test_account(self): j1 = copy.deepcopy(self.testjob) - self.assertEqual(j1.account(),None) + self.assertEqual(j1.account(), None) j2 = copy.deepcopy(self.testjob) - j2.set_resource("account","fnord") - self.assertEqual(j2.account(),"fnord") + j2.set_resource("account", "fnord") + self.assertEqual(j2.account(), "fnord") + def test_submithost(self): j1 = copy.deepcopy(self.testjob) - self.assertEqual(j1.submithost(),"login1.fakehost.lan") + self.assertEqual(j1.submithost(), "login1.fakehost.lan") + def test_queue(self): j1 = copy.deepcopy(self.testjob) - self.assertEqual(j1.queue(),"batch") + self.assertEqual(j1.queue(), "batch") + def test_start_count(self): j1 = copy.deepcopy(self.testjob) - self.assertEqual(j1.start_count(),1) + self.assertEqual(j1.start_count(), 1) + def test_nodes(self): j1 = copy.deepcopy(self.testjob) - self.assertEqual(j1.nodes(),'2:ppn=4') + self.assertEqual(j1.nodes(), "2:ppn=4") + def test_tasks(self): acctdata = "10/25/2018 17:02:14;S;13512.pitzer-batch.ten.osc.edu;user=troy group=PZS0708 account=PZS0708 jobname=ht-test-allowthreads-wholenode queue=newsyntax ctime=1540501291 qtime=1540501291 etime=1540501291 start_count=1 start=1540501334 owner=troy@pitzer-login01.hpc.osc.edu exec_host=p0221/0-79 Resource_List.walltime=01:00:00 Resource_List.gattr=sysp Resource_List.advres=ht-test.224004 Resource_Request_2.0=-L tasks=1:lprocs=80:memory=100GB:allowthreads \n" acctdata += "10/25/2018 17:02:48;E;13512.pitzer-batch.ten.osc.edu;user=troy group=PZS0708 account=PZS0708 jobname=ht-test-allowthreads-wholenode queue=newsyntax ctime=1540501291 qtime=1540501291 etime=1540501291 start_count=1 start=1540501334 owner=troy@pitzer-login01.hpc.osc.edu exec_host=p0221/0-79 Resource_List.walltime=01:00:00 Resource_List.gattr=sysp Resource_List.advres=ht-test.224004 Resource_Request_2.0=-L tasks=1:lprocs=80:memory=100GB:allowthreads session=149510 total_execution_slots=80 unique_node_count=1 end=1540501368 Exit_status=0 resources_used.cput=253 resources_used.vmem=159171128kb resources_used.walltime=00:00:33 resources_used.mem=15448300kb resources_used.energy_used=0\n" acctdata += "11/02/2018 10:04:27;S;16057.pitzer-batch.ten.osc.edu;user=troy group=PZS0708 account=PZS0708 jobname=twotasks queue=newsyntax ctime=1541167460 qtime=1541167460 etime=1541167460 start_count=1 start=1541167467 owner=troy@pitzer-login01.hpc.osc.edu exec_host=p0034/0+p0070/0 Resource_List.walltime=01:00:00 Resource_List.gattr=sysp Resource_Request_2.0=-L tasks=1:lprocs=1:usecores:memory=4GB -L tasks=1:lprocs=1:usecores:memory=4GB \n" acctdata += "11/02/2018 10:05:58;E;16057.pitzer-batch.ten.osc.edu;user=troy group=PZS0708 account=PZS0708 jobname=twotasks queue=newsyntax ctime=1541167460 qtime=1541167460 etime=1541167460 start_count=1 start=1541167467 owner=troy@pitzer-login01.hpc.osc.edu exec_host=p0034/0+p0070/0 Resource_List.walltime=01:00:00 Resource_List.gattr=sysp Resource_Request_2.0=-L tasks=1:lprocs=1:usecores:memory=4GB -L tasks=1:lprocs=1:usecores:memory=4GB session=94063 total_execution_slots=2 unique_node_count=2 end=1541167558 Exit_status=271 resources_used.cput=0 resources_used.energy_used=0 resources_used.mem=1776kb resources_used.vmem=292048kb resources_used.walltime=00:01:24 \n" from tempfile import mkstemp - (tmpfd,tmpfile) = mkstemp() - tmpfh = os.fdopen(tmpfd,'w') + + (tmpfd, tmpfile) = mkstemp() + tmpfh = os.fdopen(tmpfd, "w") tmpfh.write(acctdata) tmpfh.flush() tmpfh.close() jobs = jobs_from_file(tmpfile) - if ( "13512.pitzer-batch.ten.osc.edu" not in jobs ): + if "13512.pitzer-batch.ten.osc.edu" not in jobs: self.fail() else: j1 = jobs["13512.pitzer-batch.ten.osc.edu"] - self.assertEqual(j1.tasks(),"1:lprocs=80:memory=100GB:allowthreads") - if ( "16057.pitzer-batch.ten.osc.edu" not in jobs ): + self.assertEqual(j1.tasks(), "1:lprocs=80:memory=100GB:allowthreads") + if "16057.pitzer-batch.ten.osc.edu" not in jobs: self.fail() else: j2 = jobs["16057.pitzer-batch.ten.osc.edu"] - self.assertEqual(j2.tasks(),"1:lprocs=1:usecores:memory=4GB+1:lprocs=1:usecores:memory=4GB") + self.assertEqual( + j2.tasks(), + "1:lprocs=1:usecores:memory=4GB+1:lprocs=1:usecores:memory=4GB", + ) try: os.unlink(tmpfile) - except: - self.fail() + except Exception: + self.fail() + def test_nodes_used(self): j1 = copy.deepcopy(self.testjob) - self.assertEqual(j1.nodes_used(),['node01','node02']) + self.assertEqual(j1.nodes_used(), ["node01", "node02"]) + def test_num_nodes(self): j1 = copy.deepcopy(self.testjob) - self.assertEqual(j1.num_nodes(),2) + self.assertEqual(j1.num_nodes(), 2) j2 = copy.deepcopy(self.testjob) - j2.set_resource("Resource_List.nodes","4:ppn=28") - j2.set_resource("Resource_List.neednodes","4:ppn=28") - j2.set_resource("exec_host","o0799/0-27+o0797/0-27+o0786/0-27+o0795/0-27") - self.assertEqual(j2.num_nodes(),4) + j2.set_resource("Resource_List.nodes", "4:ppn=28") + j2.set_resource("Resource_List.neednodes", "4:ppn=28") + j2.set_resource("exec_host", "o0799/0-27+o0797/0-27+o0786/0-27+o0795/0-27") + self.assertEqual(j2.num_nodes(), 4) j3 = copy.deepcopy(self.testjob) - j3.set_resource("Resource_List.nodes","3:ppn=28") - j3.set_resource("Resource_List.neednodes","3:ppn=28") - j3.set_resource("Resource_List.nodect","3") + j3.set_resource("Resource_List.nodes", "3:ppn=28") + j3.set_resource("Resource_List.neednodes", "3:ppn=28") + j3.set_resource("Resource_List.nodect", "3") j3.unset_resource("exec_host") - self.assertEqual(j3.num_nodes(),3) + self.assertEqual(j3.num_nodes(), 3) j4 = copy.deepcopy(self.testjob) - j4.set_resource("Resource_List.nodes","5:ppn=28") - j4.set_resource("Resource_List.neednodes","5:ppn=28") - j4.set_resource("unique_node_count","5") + j4.set_resource("Resource_List.nodes", "5:ppn=28") + j4.set_resource("Resource_List.neednodes", "5:ppn=28") + j4.set_resource("unique_node_count", "5") j4.unset_resource("exec_host") - self.assertEqual(j4.num_nodes(),5) + self.assertEqual(j4.num_nodes(), 5) j5 = copy.deepcopy(self.testjob) - j5.set_resource("Resource_List.nodes","2:ppn=28+2:ppn=1") - j5.set_resource("Resource_List.neednodes","2:ppn=28+2:ppn=1") + j5.set_resource("Resource_List.nodes", "2:ppn=28+2:ppn=1") + j5.set_resource("Resource_List.neednodes", "2:ppn=28+2:ppn=1") j5.unset_resource("exec_host") - self.assertEqual(j5.num_nodes(),4) + self.assertEqual(j5.num_nodes(), 4) + def test_num_processors(self): j1 = copy.deepcopy(self.testjob) - self.assertEqual(j1.num_processors(),8) + self.assertEqual(j1.num_processors(), 8) j2 = copy.deepcopy(self.testjob) - j2.set_resource('Resource_List.nodes','2:ppn=4+4:ppn=1') - self.assertEqual(j2.num_processors(),12) + j2.set_resource("Resource_List.nodes", "2:ppn=4+4:ppn=1") + self.assertEqual(j2.num_processors(), 12) j3 = copy.deepcopy(self.testjob) - j3.set_resource("Resource_List.nodes","3:ppn=28") - j3.set_resource("Resource_List.neednodes","3:ppn=28") - j3.set_resource("total_execution_slots","84") - self.assertEqual(j3.num_processors(),84) + j3.set_resource("Resource_List.nodes", "3:ppn=28") + j3.set_resource("Resource_List.neednodes", "3:ppn=28") + j3.set_resource("total_execution_slots", "84") + self.assertEqual(j3.num_processors(), 84) + def test_num_gpus(self): j1 = copy.deepcopy(self.testjob) - self.assertEqual(j1.num_gpus(),0) + self.assertEqual(j1.num_gpus(), 0) j2 = copy.deepcopy(self.testjob) - j2.set_resource('Resource_List.nodes','2:ppn=4:gpus=2') - self.assertEqual(j2.num_gpus(),4) + j2.set_resource("Resource_List.nodes", "2:ppn=4:gpus=2") + self.assertEqual(j2.num_gpus(), 4) j3 = copy.deepcopy(self.testjob) - j3.set_resource('Resource_List.nodes','2:ppn=4:gpus=2+4:ppn=1:gpus=1') - self.assertEqual(j3.num_gpus(),8) + j3.set_resource("Resource_List.nodes", "2:ppn=4:gpus=2+4:ppn=1:gpus=1") + self.assertEqual(j3.num_gpus(), 8) + def test_mem_limit_kb(self): j1 = copy.deepcopy(self.testjob) - self.assertEqual(j1.mem_limit_kb(),1024*1024) + self.assertEqual(j1.mem_limit_kb(), 1024 * 1024) + def test_mem_used_kb(self): j1 = copy.deepcopy(self.testjob) - self.assertEqual(j1.mem_used_kb(),1024) + self.assertEqual(j1.mem_used_kb(), 1024) + def test_vmem_limit_kb(self): j1 = copy.deepcopy(self.testjob) - self.assertEqual(j1.vmem_limit_kb(),1024*1024) + self.assertEqual(j1.vmem_limit_kb(), 1024 * 1024) + def test_vmem_used_kb(self): j1 = copy.deepcopy(self.testjob) - self.assertEqual(j1.vmem_used_kb(),2048) + self.assertEqual(j1.vmem_used_kb(), 2048) + def test_cput_limit_sec(self): j1 = copy.deepcopy(self.testjob) - self.assertEqual(j1.cput_limit_sec(),7200) + self.assertEqual(j1.cput_limit_sec(), 7200) + def test_cput_used_sec(self): j1 = copy.deepcopy(self.testjob) - self.assertEqual(j1.cput_used_sec(),2) + self.assertEqual(j1.cput_used_sec(), 2) + def test_walltime_limit_sec(self): j1 = copy.deepcopy(self.testjob) - self.assertEqual(j1.walltime_limit_sec(),3600) + self.assertEqual(j1.walltime_limit_sec(), 3600) + def test_walltime_used_sec(self): j1 = copy.deepcopy(self.testjob) - self.assertEqual(j1.walltime_used_sec(),1) + self.assertEqual(j1.walltime_used_sec(), 1) # bogus time value, should use end-start (in this case 0) j2 = copy.deepcopy(self.testjob) - j2.set_resource("resources_used.walltime","-342359:-42:-21") - self.assertEqual(j2.walltime_used_sec(),0) + j2.set_resource("resources_used.walltime", "-342359:-42:-21") + self.assertEqual(j2.walltime_used_sec(), 0) + def test_software(self): j1 = copy.deepcopy(self.testjob) - self.assertEqual(j1.software(),None) + self.assertEqual(j1.software(), None) j2 = copy.deepcopy(self.testjob) - j2.set_resource('Resource_List.software','abaqus+2') - self.assertEqual(j2.software(),"abaqus+2") + j2.set_resource("Resource_List.software", "abaqus+2") + self.assertEqual(j2.software(), "abaqus+2") + def test_system(self): j1 = copy.deepcopy(self.testjob) - self.assertEqual(j1.system(),None) + self.assertEqual(j1.system(), None) j2 = copy.deepcopy(self.testjob) - j2.set_resource('system','fakehost') - self.assertEqual(j2.system(),"fakehost") + j2.set_resource("system", "fakehost") + self.assertEqual(j2.system(), "fakehost") + def test_write_last_accounting_record(self): # hack to handle class namespace change in python 3.x - if ( sys.version_info<(3,0) ): + if sys.version_info < (3, 0): import StringIO + fd1 = StringIO.StringIO() else: import io + fd1 = io.StringIO() j1 = copy.deepcopy(self.testjob) j1.write_last_accounting_record(fd1) - self.assertEqual(fd1.getvalue(),"02/13/2009 18:31:30;E;123456.fakehost.lan;Resource_List.cput=2:00:00 Resource_List.mem=1GB Resource_List.nodes=2:ppn=4 Resource_List.vmem=1GB Resource_List.walltime=1:00:00 ctime=1234567890 end=1234567890 etime=1234567890 exec_host=node01/1+node02/2 exit_status=0 group=bar jobname=job owner=foo@login1.fakehost.lan qtime=1234567890 queue=batch resources_used.cput=00:00:02 resources_used.mem=1024kb resources_used.vmem=2048kb resources_used.walltime=00:00:01 start=1234567890 start_count=1 system=None user=foo\n") + self.assertEqual( + fd1.getvalue(), + "02/13/2009 18:31:30;E;123456.fakehost.lan;Resource_List.cput=2:00:00 Resource_List.mem=1GB Resource_List.nodes=2:ppn=4 Resource_List.vmem=1GB Resource_List.walltime=1:00:00 ctime=1234567890 end=1234567890 etime=1234567890 exec_host=node01/1+node02/2 exit_status=0 group=bar jobname=job owner=foo@login1.fakehost.lan qtime=1234567890 queue=batch resources_used.cput=00:00:02 resources_used.mem=1024kb resources_used.vmem=2048kb resources_used.walltime=00:00:01 start=1234567890 start_count=1 system=None user=foo\n", + ) fd1.close() @@ -717,7 +797,7 @@ def raw_data_from_file(filename): (jobid, time, record_type, resources) - Resources are returned in a dictionary containing entries for each + Resources are returned in a dictionary containing entries for each resource name and corresponding value """ try: @@ -730,14 +810,21 @@ def raw_data_from_file(filename): return [] output = [] for line in acct_data: - + # Get the fields from the log entry try: - time, record_type, jobid, resources = line.rstrip('\n').split(";",3) + time, record_type, jobid, resources = line.rstrip("\n").split(";", 3) except ValueError: - print("ERROR: Invalid number of fields (requires 4). Unable to parse entry: %s" % (str(line.split(";",3)))) + print( + "ERROR: Invalid number of fields (requires 4). Unable to parse entry: %s" + % (str(line.split(";", 3))) + ) continue - + + # skip license line + if jobid == "license": + continue + # Create a dict for the various resources resources_dict = dict() for resource in resources.split(" "): @@ -753,46 +840,49 @@ def raw_data_from_file(filename): # way the new NUMA-aware syntax implements multi-req jobs. So, if there's # more than one tasks=[...] entry, compound them with pluses (similar to # how compound nodes= requests are done). - if ( key=="tasks" and "tasks" in resources_dict.keys() ): - resources_dict[key] = resources_dict[key]+"+"+value + if key == "tasks" and "tasks" in resources_dict.keys(): + resources_dict[key] = resources_dict[key] + "+" + value else: resources_dict[key] = value - elif ( resource=="-L" ): + elif resource == "-L": # This is detritus from how multiple -L tasks=[...] are included in accounting logs, # so ignore it. pass - elif ( resource!="" ): - logger.warn("filename=%s, jobid=%s: Malformed resource \"%s\"" % (filename,jobid,resource)) - + elif resource != "": + logger.warn( + 'filename=%s, jobid=%s: Malformed resource "%s"' + % (filename, jobid, resource) + ) + # Store the data in the output output.append((jobid, time, record_type, resources_dict)) - #break + # break acct_data.close() return output -def raw_data_from_files(filelist,warn_missing=False): +def raw_data_from_files(filelist, warn_missing=False): """ Parses a list of files containing multiple PBS accounting log entries. Returns a list of tuples containing the following information: (jobid, time, record_type, resources) - Resources are returned in a dictionary containing entries for each + Resources are returned in a dictionary containing entries for each resource name and corresponding value """ rawdata = [] for filename in filelist: - if ( os.path.exists(filename) ): + if os.path.exists(filename): for record in raw_data_from_file(filename): rawdata.append(record) - elif ( warn_missing ): + elif warn_missing: logger.warn("%s does not exist" % filename) continue return rawdata -def records_to_jobs(rawdata,system=None): +def records_to_jobs(rawdata, system=None): """ Processes an array containing multiple PBS accounting log entries. Returns a hash of lightly postprocessed data (i.e. one entry per jobid rather @@ -804,18 +894,19 @@ def records_to_jobs(rawdata,system=None): update_time = record[1] record_type = record[2] resources = record[3] - if ( jobid not in output ): - output[jobid] = jobinfo(jobid,update_time,record_type,resources,system) + if jobid not in output: + output[jobid] = jobinfo(jobid, update_time, record_type, resources, system) # may need an extra case here for jobs with multiple S and E # records (e.g. preemption) else: output[jobid].set_update_time(update_time) output[jobid].set_state(record_type) for key in resources.keys(): - output[jobid].set_resource(key,resources[key]) + output[jobid].set_resource(key, resources[key]) return output -def write_record_to_accounting_log(record,fd): + +def write_record_to_accounting_log(record, fd): """ Write a raw accounting record in the "standard" PBS format """ @@ -825,125 +916,129 @@ def write_record_to_accounting_log(record,fd): resources = record[3] resourcestring = "" for key in sorted(resources.keys()): - if ( resourcestring=="" ): - resourcestring = "%s=%s" % (key,resources[key]) + if resourcestring == "": + resourcestring = "%s=%s" % (key, resources[key]) else: - resourcestring += " %s=%s" % (key,resources[key]) + resourcestring += " %s=%s" % (key, resources[key]) # format is datestamp;state;jobid;resources_separated_by_whitespace - fd.write("%s;%s;%s;%s\n" % (datestamp,state,jobid,resourcestring)) + fd.write("%s;%s;%s;%s\n" % (datestamp, state, jobid, resourcestring)) -def jobs_from_file(filename,system=None,warn_missing=False): +def jobs_from_file(filename, system=None, warn_missing=False): """ Parses a file containing multiple PBS accounting log entries. Returns a hash of lightly postprocessed data (i.e. one entry per jobid rather than one per record). """ - return jobs_from_files([filename],system,warn_missing) + return jobs_from_files([filename], system, warn_missing) -def jobs_from_files(filelist,system=None,warn_missing=False): +def jobs_from_files(filelist, system=None, warn_missing=False): """ Parses a list of files containing multiple PBS accounting log entries. Returns a hash of lightly postprocessed data (i.e. one entry per jobid rather than one per record). """ - return records_to_jobs(raw_data_from_files(filelist,warn_missing),system) + return records_to_jobs(raw_data_from_files(filelist, warn_missing), system) -def jobinfo_from_epilogue(jobid,reqlist="",usedlist="",queue=None,account=None,exit_status=0,system=None): +def jobinfo_from_epilogue( + jobid, reqlist="", usedlist="", queue=None, account=None, exit_status=0, system=None +): """ Create jobinfo from the information provided to the TORQUE epilogue. """ - update_time = datetime.datetime.strftime(datetime.datetime.now(),"%m/%d/%Y %H:%M:%S") + update_time = datetime.datetime.strftime( + datetime.datetime.now(), "%m/%d/%Y %H:%M:%S" + ) rsrc = {} rsrc["exit_status"] = exit_status - if ( queue is not None ): + if queue is not None: rsrc["queue"] = queue - if ( account is not None ): + if account is not None: rsrc["account"] = account - for req in reqlist.split(','): + for req in reqlist.split(","): try: - (key,value) = req.split('=',1) - rsrc["Resource_List."+key] = value - except: + (key, value) = req.split("=", 1) + rsrc["Resource_List." + key] = value + except Exception: pass - for used in usedlist.split(','): + for used in usedlist.split(","): try: - (key,value) = used.split('=',1) - rsrc["resources_used."+key] = value - except: + (key, value) = used.split("=", 1) + rsrc["resources_used." + key] = value + except Exception: pass - return jobinfo(jobid,update_time,"E",rsrc,system) + return jobinfo(jobid, update_time, "E", rsrc, system) def time_to_sec(timestr): """ Convert string time into seconds. """ - if ( timestr is None ): + if timestr is None: return 0 - elif ( isinstance(timestr,int) ): + elif isinstance(timestr, int): return timestr - if ( not re.match("[\d\-:]+",timestr) ): - raise ValueError("Malformed time \""+timestr+"\"") + if not re.match("[\d\-:]+", timestr): + raise ValueError('Malformed time "' + timestr + '"') sec = 0 elt = timestr.split(":") - if ( len(elt)==1 ): - # raw seconds -- TORQUE 5.1.2 did this on walltime and cput + if len(elt) == 1: + # raw seconds -- TORQUE 5.1.2 did this on walltime and cput # for some reason sec = int(elt[0]) - elif ( len(elt)==2 ): + elif len(elt) == 2: # mm:ss -- should be rare to nonexistent in TORQUE - sec = 60*int(elt[0])+int(elt[1]) - elif ( len(elt)==3 ): + sec = 60 * int(elt[0]) + int(elt[1]) + elif len(elt) == 3: # hh:mm:ss -- most common case - sec = 3600*int(elt[0])+60*int(elt[1])+int(elt[2]) - elif ( len(elt)==4 ): + sec = 3600 * int(elt[0]) + 60 * int(elt[1]) + int(elt[2]) + elif len(elt) == 4: # dd:hh:mm:ss -- not used in TORQUE, occasionally appears in Moab # output - sec = 3600*(24*int(elt[0])+int(elt[1]))+60*int(elt[2])+int(elt[2]) + sec = 3600 * (24 * int(elt[0]) + int(elt[1])) + 60 * int(elt[2]) + int(elt[2]) else: - raise ValueError("Malformed time \""+timestr+"\"") + raise ValueError('Malformed time "' + timestr + '"') return sec def sec_to_time(seconds): - if ( isinstance(seconds,str) ): + if isinstance(seconds, str): return seconds # from https://stackoverflow.com/questions/775049/python-time-seconds-to-hms m, s = divmod(seconds, 60) h, m = divmod(m, 60) - return "%02d:%02d:%02d" % (h,m,s) + return "%02d:%02d:%02d" % (h, m, s) def mem_to_kb(memstr): - match = re.match("^(\d+)([TtGgMmKk])([BbWw])$",memstr) - if ( match is not None and len(match.groups())==3 ): + match = re.match("^(\d+)([TtGgMmKk])([BbWw])$", memstr) + if match is not None and len(match.groups()) == 3: number = int(match.group(1)) multiplier = 1 numbytes = 1 factor = match.group(2) - if ( factor in ["T","t"] ): - multiplier = 1024*1024*1024 - elif ( factor in ["G","g"] ): - multiplier = 1024*1024 - elif ( factor in ["M","m"] ): + if factor in ["T", "t"]: + multiplier = 1024 * 1024 * 1024 + elif factor in ["G", "g"]: + multiplier = 1024 * 1024 + elif factor in ["M", "m"]: multiplier = 1024 units = match.group(3) - if ( units in ["W","w"] ): + if units in ["W", "w"]: numbytes = 8 - return number*multiplier*numbytes - elif ( re.match("^(\d+)([BbWw])$",memstr) ): - match = re.match("^(\d+)([BbWw])$",memstr) + return number * multiplier * numbytes + elif re.match("^(\d+)([BbWw])$", memstr): + match = re.match("^(\d+)([BbWw])$", memstr) number = int(match.group(1)) numbytes = 1 units = match.group(2) - if ( units in ["W","w"] ): + if units in ["W", "w"]: numbytes = 8 - return number*numbytes/1024 + return number * numbytes / 1024 else: - raise ValueError("Invalid memory expression \""+memstr+"\"") + raise ValueError('Invalid memory expression "' + memstr + '"') class pbsacctTestCase(unittest.TestCase): @@ -953,57 +1048,74 @@ class pbsacctTestCase(unittest.TestCase): # def test_jobs_from_file(self): # def test_jobs_from_files(self): def test_jobinfo_from_epilogue(self): - j1 = jobinfo_from_epilogue("123456.testfakehost.lan","cput=2:00:00,mem=1GB,nodes=2:ppn=4,vmem=1GB,walltime=1:00:00,neednodes=2:ppn=4","cput=00:00:02,mem=1024kb,vmem=2048kb,walltime=00:00:01","batch") - self.assertEqual(j1.numeric_jobid(),123456) - self.assertEqual(j1.account(),None) - self.assertEqual(j1.queue(),"batch") - self.assertEqual(j1.num_nodes(),2) - self.assertEqual(j1.num_processors(),8) - self.assertEqual(j1.num_gpus(),0) - self.assertEqual(j1.cput_limit_sec(),7200) - self.assertEqual(j1.cput_used_sec(),2) - self.assertEqual(j1.walltime_limit_sec(),3600) - self.assertEqual(j1.walltime_used_sec(),1) - self.assertEqual(j1.mem_limit_kb(),1024*1024) - self.assertEqual(j1.mem_used_kb(),1024) - self.assertEqual(j1.vmem_limit_kb(),1024*1024) - self.assertEqual(j1.vmem_used_kb(),2048) - self.assertEqual(j1.software(),None) + j1 = jobinfo_from_epilogue( + "123456.testfakehost.lan", + "cput=2:00:00,mem=1GB,nodes=2:ppn=4,vmem=1GB,walltime=1:00:00,neednodes=2:ppn=4", + "cput=00:00:02,mem=1024kb,vmem=2048kb,walltime=00:00:01", + "batch", + ) + self.assertEqual(j1.numeric_jobid(), 123456) + self.assertEqual(j1.account(), None) + self.assertEqual(j1.queue(), "batch") + self.assertEqual(j1.num_nodes(), 2) + self.assertEqual(j1.num_processors(), 8) + self.assertEqual(j1.num_gpus(), 0) + self.assertEqual(j1.cput_limit_sec(), 7200) + self.assertEqual(j1.cput_used_sec(), 2) + self.assertEqual(j1.walltime_limit_sec(), 3600) + self.assertEqual(j1.walltime_used_sec(), 1) + self.assertEqual(j1.mem_limit_kb(), 1024 * 1024) + self.assertEqual(j1.mem_used_kb(), 1024) + self.assertEqual(j1.vmem_limit_kb(), 1024 * 1024) + self.assertEqual(j1.vmem_used_kb(), 2048) + self.assertEqual(j1.software(), None) + def test_mem_to_kb(self): - self.assertEqual(mem_to_kb('1000kb'),1000) - self.assertEqual(mem_to_kb('1000mb'),1000*1024) - self.assertEqual(mem_to_kb('1000gb'),1000*1024*1024) - self.assertEqual(mem_to_kb('1000kw'),8*1000) - self.assertEqual(mem_to_kb('1000mw'),8*1000*1024) - self.assertEqual(mem_to_kb('1000gw'),8*1000*1024*1024) + self.assertEqual(mem_to_kb("1000kb"), 1000) + self.assertEqual(mem_to_kb("1000mb"), 1000 * 1024) + self.assertEqual(mem_to_kb("1000gb"), 1000 * 1024 * 1024) + self.assertEqual(mem_to_kb("1000kw"), 8 * 1000) + self.assertEqual(mem_to_kb("1000mw"), 8 * 1000 * 1024) + self.assertEqual(mem_to_kb("1000gw"), 8 * 1000 * 1024 * 1024) + def test_sec_to_time(self): - self.assertEqual(sec_to_time(1),'00:00:01') - self.assertEqual(sec_to_time(2),'00:00:02') - self.assertEqual(sec_to_time(10),'00:00:10') - self.assertEqual(sec_to_time(60),'00:01:00') - self.assertEqual(sec_to_time(3600),'01:00:00') - self.assertEqual(sec_to_time(2*3600),'02:00:00') - self.assertEqual(sec_to_time(10*3600),'10:00:00') - self.assertEqual(sec_to_time(24*3600),'24:00:00') - self.assertEqual(sec_to_time(7*24*3600),'168:00:00') + self.assertEqual(sec_to_time(1), "00:00:01") + self.assertEqual(sec_to_time(2), "00:00:02") + self.assertEqual(sec_to_time(10), "00:00:10") + self.assertEqual(sec_to_time(60), "00:01:00") + self.assertEqual(sec_to_time(3600), "01:00:00") + self.assertEqual(sec_to_time(2 * 3600), "02:00:00") + self.assertEqual(sec_to_time(10 * 3600), "10:00:00") + self.assertEqual(sec_to_time(24 * 3600), "24:00:00") + self.assertEqual(sec_to_time(7 * 24 * 3600), "168:00:00") + def test_time_to_sec(self): - self.assertEqual(1,time_to_sec('00:00:01')) - self.assertEqual(2,time_to_sec('00:00:02')) - self.assertEqual(10,time_to_sec('00:00:10')) - self.assertEqual(60,time_to_sec('00:01:00')) - self.assertEqual(3600,time_to_sec('01:00:00')) - self.assertEqual(2*3600,time_to_sec('02:00:00')) - self.assertEqual(10*3600,time_to_sec('10:00:00')) - self.assertEqual(1*24*3600,time_to_sec('1:00:00:00')) - self.assertEqual(7*24*3600,time_to_sec('7:00:00:00')) - self.assertEqual(-1232494941,time_to_sec('-342359:-42:-21')) + self.assertEqual(1, time_to_sec("00:00:01")) + self.assertEqual(2, time_to_sec("00:00:02")) + self.assertEqual(10, time_to_sec("00:00:10")) + self.assertEqual(60, time_to_sec("00:01:00")) + self.assertEqual(3600, time_to_sec("01:00:00")) + self.assertEqual(2 * 3600, time_to_sec("02:00:00")) + self.assertEqual(10 * 3600, time_to_sec("10:00:00")) + self.assertEqual(1 * 24 * 3600, time_to_sec("1:00:00:00")) + self.assertEqual(7 * 24 * 3600, time_to_sec("7:00:00:00")) + self.assertEqual(-1232494941, time_to_sec("-342359:-42:-21")) class pbsacctDB: - def __init__(self, host=None, dbtype="mysql", - db="pbsacct", dbuser=None, dbpasswd=None, - jobs_table="Jobs", config_table="Config", - sw_table="Software",system=None,sqlitefile=None): + def __init__( + self, + host=None, + dbtype="mysql", + db="pbsacct", + dbuser=None, + dbpasswd=None, + jobs_table="Jobs", + config_table="Config", + sw_table="Software", + system=None, + sqlitefile=None, + ): self.setServerName(host) self.setType(dbtype) self.setName(db) @@ -1024,11 +1136,11 @@ def getServerName(self): return self._dbhost def setType(self, dbtype): - supported_dbs = ["mysql","pgsql","sqlite2","sqlite3"] - if ( dbtype in supported_dbs ): + supported_dbs = ["mysql", "pgsql", "sqlite2", "sqlite3"] + if dbtype in supported_dbs: self._dbtype = dbtype else: - raise RuntimeError("Requested unimplemented database type \"%s\"" % dbtype) + raise RuntimeError('Requested unimplemented database type "%s"' % dbtype) def getType(self): return self._dbtype @@ -1059,7 +1171,7 @@ def setConfigTable(self, config_table): def getConfigTable(self): return self._cfgtable - + def setSoftwareTable(self, sw_table): self._swtable = sw_table @@ -1072,77 +1184,95 @@ def setSystem(self, system): def getSystem(self): return self._system - def setSQLiteFile(self,filename): + def setSQLiteFile(self, filename): self._sqlitefile = filename def getSQLiteFile(self): return self._sqlitefile - + def readConfigFile(self, cfgfilename): - if ( not os.path.exists(cfgfilename) ): + if not os.path.exists(cfgfilename): raise IOError("%s does not exist" % cfgfilename) cfgfile = open(cfgfilename) for line in cfgfile.readlines(): - if ( not line.startswith("#") and not re.match('^\s*$',line) ): + if not line.startswith("#") and not re.match("^\s*$", line): try: - (keyword,value) = line.rstrip('\n').split("=") - if ( keyword=="dbhost" ): + (keyword, value) = line.rstrip("\n").split("=") + if keyword == "dbhost": self.setServerName(value) - elif ( keyword=="dbtype" ): + elif keyword == "dbtype": self.setType(value) - elif ( keyword=="dbname" ): + elif keyword == "dbname": self.setName(value) - elif ( keyword=="dbuser" ): + elif keyword == "dbuser": self.setUser(value) - elif ( keyword=="dbpasswd" ): + elif keyword == "dbpasswd": self.setPassword(value) - elif ( keyword=="jobstable" ): + elif keyword == "jobstable": self.setJobsTable(value) - elif ( keyword=="configtable" ): + elif keyword == "configtable": self.setConfigTable(value) - elif ( keyword=="sqlitefile" ): + elif keyword == "sqlitefile": self.setSQLiteFile(value) - elif ( keyword=="softwaretable" ): + elif keyword == "softwaretable": self.setSoftwareTable(value) - elif ( keyword=="system" ): + elif keyword == "system": self.setSystem(value) else: - raise RuntimeError("Unknown keyword \"%s\"" % keyword) + raise RuntimeError('Unknown keyword "%s"' % keyword) except Exception as e: logger.warn(str(e)) pass def connect(self): - if ( self._dbhandle is not None ): + if self._dbhandle is not None: return self._dbhandle - if ( self.getType()=="mysql" ): - import MySQLdb - self._dbhandle = MySQLdb.connect(host=self._dbhost, - db=self._dbname, - user=self._dbuser, - passwd=self._dbpasswd) + if self.getType() == "mysql": + try: + import MySQLdb + except ImportError: + try: + import pymysql + + pymysql.install_as_MySQLdb() + import MySQLdb + except ImportError: + raise ImportError( + "MySQL Python library not found. Install python3-mysqlclient or PyMySQL" + ) + self._dbhandle = MySQLdb.connect( + host=self._dbhost, + db=self._dbname, + user=self._dbuser, + passwd=self._dbpasswd, + ) return self._dbhandle - elif ( self.getType()=="pgsql" ): + elif self.getType() == "pgsql": import psycopg2 - self._dbhandle = psycopg2.connect(host=self._dbhost, - db=self._dbname, - user=self._dbuser, - passwd=self._dbpasswd) + + self._dbhandle = psycopg2.connect( + host=self._dbhost, + db=self._dbname, + user=self._dbuser, + passwd=self._dbpasswd, + ) return self._dbhandle - elif ( self.getType()=="sqlite2" ): + elif self.getType() == "sqlite2": import pysqlite2.dbapi2 as sqlite - if ( self.getSQLiteFile() is None ): + + if self.getSQLiteFile() is None: raise RuntimeError("No SQLite database file specified") self._dbhandle = sqlite.connect(self.getSQLiteFile()) return self._dbhandle - elif ( self.getType()=="sqlite3" ): + elif self.getType() == "sqlite3": import sqlite3 as sqlite - if ( self.getSQLiteFile() is None ): + + if self.getSQLiteFile() is None: raise RuntimeError("No SQLite database file specified") self._dbhandle = sqlite.connect(self.getSQLiteFile()) return self._dbhandle else: - raise RuntimeError("Unimplemented database type \"%s\"" % self.getType()) + raise RuntimeError('Unimplemented database type "%s"' % self.getType()) def close(self): self.connect().close() @@ -1153,189 +1283,252 @@ def commit(self): self.connect().commit() def cursor(self): - if ( self._cursor is None ): + if self._cursor is None: self._cursor = self.connect().cursor() return self._cursor def rollback(self): self.connect().rollback() - def job_exists(self,jobid,append_to_jobid=None): + def job_exists(self, jobid, append_to_jobid=None): myjobid = jobid - if ( append_to_jobid is not None ): - myjobid = jobid+append_to_jobid - sql = "SELECT jobid FROM %s WHERE jobid='%s'" % (self.getJobsTable(),myjobid) + if append_to_jobid is not None: + myjobid = jobid + append_to_jobid + sql = "SELECT jobid FROM %s WHERE jobid='%s'" % (self.getJobsTable(), myjobid) self.cursor().execute(sql) results = self.cursor().fetchall() - if ( len(results)==0 ): + if len(results) == 0: return False - elif ( len(results)==1 ): + elif len(results) == 1: return True else: - raise RuntimeError("More than one result for jobid %s (should not be possible)" % jobid) + raise RuntimeError( + "More than one result for jobid %s (should not be possible)" % jobid + ) - def _timestamp_to_date(self,ts): - if ( self.getType() in ["mysql"] ): + def _timestamp_to_date(self, ts): + if self.getType() in ["mysql"]: return "DATE(FROM_UNIXTIME('%d'))" % ts - elif ( self.getType() in ["pgsql"] ): + elif self.getType() in ["pgsql"]: return "DATE(TIMESTAMP 'epoch' + %d * INTERVAL '1 second')" % ts - elif ( self.getType() in ["sqlite2","sqlite3"] ): + elif self.getType() in ["sqlite2", "sqlite3"]: return "DATE('%d','UNIXEPOCH')" % ts else: - raise RuntimeError("Unable to determine ts->date conversion for database type \"%s\"" % self.getType()) - - def _job_set_fields(self,job,system=None,oldjob=None,append_to_jobid=None): - if ( not isinstance(job,jobinfo) ): - raise TypeError("\"job\" object is of wrong type: %s" % str(job)) - if ( oldjob is not None and not isinstance(oldjob,jobinfo) ): - raise TypeError("\"oldjob\" object is of wrong type: %s" % str(oldjob)) + raise RuntimeError( + 'Unable to determine ts->date conversion for database type "%s"' + % self.getType() + ) + + def _job_set_fields(self, job, system=None, oldjob=None, append_to_jobid=None): + if not isinstance(job, jobinfo): + raise TypeError('"job" object is of wrong type: %s' % str(job)) + if oldjob is not None and not isinstance(oldjob, jobinfo): + raise TypeError('"oldjob" object is of wrong type: %s' % str(oldjob)) myjobid = job.jobid() - if ( append_to_jobid is not None ): - myjobid = job.jobid()+append_to_jobid + if append_to_jobid is not None: + myjobid = job.jobid() + append_to_jobid fields_to_set = {} - if ( oldjob is None ): - fields_to_set["jobid"] = "'%s'" % myjobid - if ( system is not None and - ( oldjob is None or job.system()!=oldjob.system() ) ): - fields_to_set["system"] = "'%s'" % system - if ( job.user() is not None and - ( oldjob is None or job.user()!=oldjob.user() ) ): - fields_to_set["username"] = "'%s'" % job.user() - if ( job.group() is not None and - ( oldjob is None or job.group()!=oldjob.group() ) ): - fields_to_set["groupname"] = "'%s'" % job.group() - if ( job.submithost() is not None and - ( oldjob is None or job.submithost()!=oldjob.submithost() ) ): - fields_to_set["submithost"] = "'%s'" % job.submithost() - if ( job.name() is not None and - ( oldjob is None or job.name()!=oldjob.name() ) ): - fields_to_set["jobname"] = "'%s'" % job.name() - if ( job.num_processors()>0 and - ( oldjob is None or job.num_processors()!=oldjob.num_processors()) ): - fields_to_set["nproc"] = "'%d'" % job.num_processors() - if ( job.num_nodes()>0 and - ( oldjob is None or job.num_nodes()!=oldjob.num_nodes()) ): - fields_to_set["nodect"] = "'%d'" % job.num_nodes() - if ( job.nodes() is not None and - ( oldjob is None or job.nodes()!=oldjob.nodes() ) ): - fields_to_set["nodes"] = "'%s'" % job.nodes() + if oldjob is None: + fields_to_set["jobid"] = "'%s'" % myjobid + if system is not None and (oldjob is None or job.system() != oldjob.system()): + fields_to_set["system"] = "'%s'" % system + if job.user() is not None and (oldjob is None or job.user() != oldjob.user()): + fields_to_set["username"] = "'%s'" % job.user() + if job.group() is not None and ( + oldjob is None or job.group() != oldjob.group() + ): + fields_to_set["groupname"] = "'%s'" % job.group() + if job.submithost() is not None and ( + oldjob is None or job.submithost() != oldjob.submithost() + ): + fields_to_set["submithost"] = "'%s'" % job.submithost() + if job.name() is not None and (oldjob is None or job.name() != oldjob.name()): + fields_to_set["jobname"] = "'%s'" % job.name() + if job.num_processors() > 0 and ( + oldjob is None or job.num_processors() != oldjob.num_processors() + ): + fields_to_set["nproc"] = "'%d'" % job.num_processors() + if job.num_nodes() > 0 and ( + oldjob is None or job.num_nodes() != oldjob.num_nodes() + ): + fields_to_set["nodect"] = "'%d'" % job.num_nodes() + if job.nodes() is not None and ( + oldjob is None or job.nodes() != oldjob.nodes() + ): + fields_to_set["nodes"] = "'%s'" % job.nodes() # This is a bit hackish, but otherwise ngpus never gets set in the DB for some reason - #if ( job.num_gpus()>0 and + # if ( job.num_gpus()>0 and # ( oldjob is None or job.num_gpus()!=oldjob.num_gpus()) ): - if ( job.num_gpus()>0 ): - fields_to_set["ngpus"] = "'%d'" % job.num_gpus() - if ( job.feature() is not None and - ( oldjob is None or job.feature()!=oldjob.feature() ) ): - fields_to_set["feature"] = "'%s'" % job.feature() - if ( job.gattr() is not None and - ( oldjob is None or job.gattr()!=oldjob.gattr() ) ): - fields_to_set["gattr"] = "'%s'" % job.gattr() - if ( job.gres() is not None and - ( oldjob is None or job.gres()!=oldjob.gres() ) ): - fields_to_set["gres"] = "'%s'" % job.gres() - if ( job.queue() is not None and - ( oldjob is None or job.queue()!=oldjob.queue() ) ): - fields_to_set["queue"] = "'%s'" % job.queue() - if ( job.qos() is not None and - ( oldjob is None or job.qos()!=oldjob.qos() ) ): - fields_to_set["qos"] = "'%s'" % job.qos() - if ( job.qtime_ts()>0 and - ( oldjob is None or job.qtime_ts()!=oldjob.qtime_ts() ) ): - fields_to_set["submit_ts"] = "'%d'" % job.qtime_ts() - fields_to_set["submit_date"] = self._timestamp_to_date(job.qtime_ts()) - if ( job.etime_ts()>0 and - ( oldjob is None or job.etime_ts()!=oldjob.etime_ts() ) ): - fields_to_set["eligible_ts"] = "'%d'" % job.etime_ts() - fields_to_set["eligible_date"] = self._timestamp_to_date(job.etime_ts()) - if ( job.start_ts()>0 and - ( oldjob is None or job.start_ts()!=oldjob.start_ts() ) ): - fields_to_set["start_ts"] = "'%d'" % job.start_ts() - fields_to_set["start_date"] = self._timestamp_to_date(job.start_ts()) - if ( job.end_ts()>0 and - ( oldjob is None or job.end_ts()!=oldjob.end_ts() ) ): - fields_to_set["end_ts"] = "'%d'" % job.end_ts() - fields_to_set["end_date"] = self._timestamp_to_date(job.end_ts()) - if ( job.start_count()>0 and - ( oldjob is None or job.start_count()!=oldjob.start_count() ) ): + if job.num_gpus() > 0: + fields_to_set["ngpus"] = "'%d'" % job.num_gpus() + if job.feature() is not None and ( + oldjob is None or job.feature() != oldjob.feature() + ): + fields_to_set["feature"] = "'%s'" % job.feature() + if job.gattr() is not None and ( + oldjob is None or job.gattr() != oldjob.gattr() + ): + fields_to_set["gattr"] = "'%s'" % job.gattr() + if job.gres() is not None and (oldjob is None or job.gres() != oldjob.gres()): + fields_to_set["gres"] = "'%s'" % job.gres() + if job.queue() is not None and ( + oldjob is None or job.queue() != oldjob.queue() + ): + fields_to_set["queue"] = "'%s'" % job.queue() + if job.qos() is not None and (oldjob is None or job.qos() != oldjob.qos()): + fields_to_set["qos"] = "'%s'" % job.qos() + if job.qtime_ts() > 0 and ( + oldjob is None or job.qtime_ts() != oldjob.qtime_ts() + ): + fields_to_set["submit_ts"] = "'%d'" % job.qtime_ts() + fields_to_set["submit_date"] = self._timestamp_to_date(job.qtime_ts()) + if job.etime_ts() > 0 and ( + oldjob is None or job.etime_ts() != oldjob.etime_ts() + ): + fields_to_set["eligible_ts"] = "'%d'" % job.etime_ts() + fields_to_set["eligible_date"] = self._timestamp_to_date(job.etime_ts()) + if job.start_ts() > 0 and ( + oldjob is None or job.start_ts() != oldjob.start_ts() + ): + fields_to_set["start_ts"] = "'%d'" % job.start_ts() + fields_to_set["start_date"] = self._timestamp_to_date(job.start_ts()) + if job.end_ts() > 0 and (oldjob is None or job.end_ts() != oldjob.end_ts()): + fields_to_set["end_ts"] = "'%d'" % job.end_ts() + fields_to_set["end_date"] = self._timestamp_to_date(job.end_ts()) + if job.start_count() > 0 and ( + oldjob is None or job.start_count() != oldjob.start_count() + ): fields_to_set["start_count"] = "'%d'" % job.start_count() - if ( job.cput_limit_sec()>0 and - ( oldjob is None or job.cput_limit_sec()!=oldjob.cput_limit_sec() ) ): - fields_to_set["cput_req"] = "'%s'" % sec_to_time(job.cput_limit_sec()) - fields_to_set["cput_req_sec"] = "'%d'" % job.cput_limit_sec() - if ( job.cput_used_sec()>0 and - ( oldjob is None or job.cput_used_sec()!=oldjob.cput_used_sec() ) ): - fields_to_set["cput"] = "'%s'" % sec_to_time(job.cput_used_sec()) - fields_to_set["cput_sec"] = "'%d'" % job.cput_used_sec() - if ( job.walltime_limit_sec()>0 and - ( oldjob is None or job.walltime_limit_sec()!=oldjob.walltime_limit_sec() ) ): - fields_to_set["walltime_req"] = "'%s'" % sec_to_time(job.walltime_limit_sec()) - fields_to_set["walltime_req_sec"] = "'%d'" % job.walltime_limit_sec() - if ( job.walltime_used_sec()>0 and - ( oldjob is None or job.walltime_used_sec()!=oldjob.walltime_used_sec() ) ): - fields_to_set["walltime"] = "'%s'" % sec_to_time(job.walltime_used_sec()) - fields_to_set["walltime_sec"] = "'%d'" % job.walltime_used_sec() - elif ( job.walltime_used_sec()<0 and - ( oldjob is None or job.walltime_used_sec()!=oldjob.walltime_used_sec() ) ): - delta = int(job.end_ts())-int(job.start_ts()) - fields_to_set["walltime"] = "'%s'" % sec_to_time(delta) - fields_to_set["walltime_sec"] = "'%d'" % delta - if ( job.mem_limit() is not None and - ( oldjob is None or job.mem_limit()!=oldjob.mem_limit()) ): - fields_to_set["mem_req"] = "'%s'" % job.mem_limit() - if ( job.mem_used_kb()>0 and - ( oldjob is None or job.mem_used_kb()!=oldjob.mem_used_kb()) ): - fields_to_set["mem_kb"] = "'%d'" % job.mem_used_kb() - if ( job.vmem_limit() is not None and - ( oldjob is None or job.vmem_limit()!=oldjob.vmem_limit()) ): - fields_to_set["vmem_req"] = "'%s'" % job.vmem_limit() - if ( job.vmem_used_kb()>0 and - ( oldjob is None or job.vmem_used_kb()!=oldjob.vmem_used_kb()) ): - fields_to_set["vmem_kb"] = "'%d'" % job.vmem_used_kb() - if ( ( job.has_resource("Resource_List.mppe") or job.has_resource("resources_used.mppe") ) and - ( oldjob is None or - ( job.get_resource("Resource_List.mppe")!=oldjob.get_resource("Resource_List.mppe") or - job.get_resource("resources_used.mppe")!=oldjob.get_resource("resources_used.mppe") ) ) ): - fields_to_set["mppe"] = "'%d'" % max(int(job.get_resource("Resource_List.mppe")),int(job.get_resource("resources_used.mppe"))) - if ( ( job.has_resource("Resource_List.mppssp") or job.has_resource("resources_used.mppssp") ) and - ( oldjob is None or - ( job.get_resource("Resource_List.mppssp")!=oldjob.get_resource("Resource_List.mppssp") or - job.get_resource("resources_used.mppssp")!=oldjob.get_resource("resources_used.mppssp") ) ) ): - fields_to_set["mppssp"] = "'%d'" % max(int(job.get_resource("Resource_List.mppssp")),int(job.get_resource("resources_used.mppssp"))) - if ( job.has_resource("exec_host") and - ( oldjob is None or job.get_resource("exec_host")!=oldjob.get_resource("exec_host") ) ): - fields_to_set["hostlist"] = "'%s'" % job.get_resource("exec_host") - if ( job.exit_status() is not None and - ( oldjob is None or job.get_resource("Exit_status")!=oldjob.get_resource("Exit_status") ) ): - fields_to_set["exit_status"] = "'%d'" % int(job.get_resource("Exit_status")) - if ( job.software() is not None and - ( oldjob is None or job.software()!=oldjob.software() ) ): - fields_to_set["software"] = "'%s'" % job.software() - if ( job.account() is not None and - ( oldjob is None or job.account()!=oldjob.account() ) ): - fields_to_set["account"] = "'%s'" % job.account() + if job.cput_limit_sec() > 0 and ( + oldjob is None or job.cput_limit_sec() != oldjob.cput_limit_sec() + ): + fields_to_set["cput_req"] = "'%s'" % sec_to_time(job.cput_limit_sec()) + fields_to_set["cput_req_sec"] = "'%d'" % job.cput_limit_sec() + if job.cput_used_sec() > 0 and ( + oldjob is None or job.cput_used_sec() != oldjob.cput_used_sec() + ): + fields_to_set["cput"] = "'%s'" % sec_to_time(job.cput_used_sec()) + fields_to_set["cput_sec"] = "'%d'" % job.cput_used_sec() + if job.walltime_limit_sec() > 0 and ( + oldjob is None or job.walltime_limit_sec() != oldjob.walltime_limit_sec() + ): + fields_to_set["walltime_req"] = "'%s'" % sec_to_time( + job.walltime_limit_sec() + ) + fields_to_set["walltime_req_sec"] = "'%d'" % job.walltime_limit_sec() + if job.walltime_used_sec() > 0 and ( + oldjob is None or job.walltime_used_sec() != oldjob.walltime_used_sec() + ): + fields_to_set["walltime"] = "'%s'" % sec_to_time(job.walltime_used_sec()) + fields_to_set["walltime_sec"] = "'%d'" % job.walltime_used_sec() + elif job.walltime_used_sec() < 0 and ( + oldjob is None or job.walltime_used_sec() != oldjob.walltime_used_sec() + ): + delta = int(job.end_ts()) - int(job.start_ts()) + fields_to_set["walltime"] = "'%s'" % sec_to_time(delta) + fields_to_set["walltime_sec"] = "'%d'" % delta + if job.mem_limit() is not None and ( + oldjob is None or job.mem_limit() != oldjob.mem_limit() + ): + fields_to_set["mem_req"] = "'%s'" % job.mem_limit() + if job.mem_used_kb() > 0 and ( + oldjob is None or job.mem_used_kb() != oldjob.mem_used_kb() + ): + fields_to_set["mem_kb"] = "'%d'" % job.mem_used_kb() + if job.vmem_limit() is not None and ( + oldjob is None or job.vmem_limit() != oldjob.vmem_limit() + ): + fields_to_set["vmem_req"] = "'%s'" % job.vmem_limit() + if job.vmem_used_kb() > 0 and ( + oldjob is None or job.vmem_used_kb() != oldjob.vmem_used_kb() + ): + fields_to_set["vmem_kb"] = "'%d'" % job.vmem_used_kb() + if ( + job.has_resource("Resource_List.mppe") + or job.has_resource("resources_used.mppe") + ) and ( + oldjob is None + or ( + job.get_resource("Resource_List.mppe") + != oldjob.get_resource("Resource_List.mppe") + or job.get_resource("resources_used.mppe") + != oldjob.get_resource("resources_used.mppe") + ) + ): + fields_to_set["mppe"] = "'%d'" % max( + int(job.get_resource("Resource_List.mppe")), + int(job.get_resource("resources_used.mppe")), + ) + if ( + job.has_resource("Resource_List.mppssp") + or job.has_resource("resources_used.mppssp") + ) and ( + oldjob is None + or ( + job.get_resource("Resource_List.mppssp") + != oldjob.get_resource("Resource_List.mppssp") + or job.get_resource("resources_used.mppssp") + != oldjob.get_resource("resources_used.mppssp") + ) + ): + fields_to_set["mppssp"] = "'%d'" % max( + int(job.get_resource("Resource_List.mppssp")), + int(job.get_resource("resources_used.mppssp")), + ) + if job.has_resource("exec_host") and ( + oldjob is None + or job.get_resource("exec_host") != oldjob.get_resource("exec_host") + ): + fields_to_set["hostlist"] = "'%s'" % job.get_resource("exec_host") + if job.exit_status() is not None and ( + oldjob is None + or job.get_resource("Exit_status") != oldjob.get_resource("Exit_status") + ): + fields_to_set["exit_status"] = "'%d'" % int(job.get_resource("Exit_status")) + if job.software() is not None and ( + oldjob is None or job.software() != oldjob.software() + ): + fields_to_set["software"] = "'%s'" % job.software() + if job.account() is not None and ( + oldjob is None or job.account() != oldjob.account() + ): + fields_to_set["account"] = "'%s'" % job.account() # work around MySQL time field limitation -- cannot handle times >= 839:00:00 - if ( self.getType()=="mysql" ): - for timefield in ["cput","cput_req","walltime","walltime_req"]: - if ( timefield in fields_to_set and time_to_sec(fields_to_set[timefield].strip("'"))>=839*3600 ): + if self.getType() == "mysql": + for timefield in ["cput", "cput_req", "walltime", "walltime_req"]: + if ( + timefield in fields_to_set + and time_to_sec(fields_to_set[timefield].strip("'")) >= 839 * 3600 + ): fields_to_set[timefield] = "'838:59:59'" - if ( len(fields_to_set)>0 ): + if len(fields_to_set) > 0: return fields_to_set else: return None - def insert_job(self,job,system=None,check_existance=True,noop=False,append_to_jobid=None): - if ( not isinstance(job,jobinfo) ): - raise TypeError("\"job\" object is of wrong type: %s" % str(job)) - if ( check_existance and self.job_exists(job.jobid(),append_to_jobid=append_to_jobid) ): - raise RuntimeError("Job %s already exists in database, cannot insert" % job.jobid()) - delta = self._job_set_fields(job,system,append_to_jobid=append_to_jobid) - if ( delta is not None ): + def insert_job( + self, job, system=None, check_existance=True, noop=False, append_to_jobid=None + ): + if not isinstance(job, jobinfo): + raise TypeError('"job" object is of wrong type: %s' % str(job)) + if check_existance and self.job_exists( + job.jobid(), append_to_jobid=append_to_jobid + ): + raise RuntimeError( + "Job %s already exists in database, cannot insert" % job.jobid() + ) + delta = self._job_set_fields(job, system, append_to_jobid=append_to_jobid) + if delta is not None: deltakeys = sorted(delta.keys()) deltavalues = [] for key in deltakeys: deltavalues.append(delta[key]) - sql = "INSERT INTO %s ( %s ) VALUES ( %s )" % (self.getJobsTable(),",".join(deltakeys),",".join(deltavalues)) - if ( noop ): + sql = "INSERT INTO %s ( %s ) VALUES ( %s )" % ( + self.getJobsTable(), + ",".join(deltakeys), + ",".join(deltavalues), + ) + if noop: logger.debug("%s" % sql) else: try: @@ -1345,23 +1538,35 @@ def insert_job(self,job,system=None,check_existance=True,noop=False,append_to_jo logger.debug("%s" % sql) logger.error(str(e)) - def update_job(self,job,system=None,check_existance=True,noop=False,append_to_jobid=None): - if ( not isinstance(job,jobinfo) ): - raise TypeError("\"job\" object is of wrong type: %s" % str(job)) - if ( check_existance and not self.job_exists(job.jobid(),append_to_jobid=append_to_jobid) ): - raise RuntimeError("Job %s does not exist in database, cannot update" % job.jobid()) + def update_job( + self, job, system=None, check_existance=True, noop=False, append_to_jobid=None + ): + if not isinstance(job, jobinfo): + raise TypeError('"job" object is of wrong type: %s' % str(job)) + if check_existance and not self.job_exists( + job.jobid(), append_to_jobid=append_to_jobid + ): + raise RuntimeError( + "Job %s does not exist in database, cannot update" % job.jobid() + ) myjobid = job.jobid() - if ( append_to_jobid is not None ): - myjobid = job.jobid()+append_to_jobid - oldjob = self.get_job(job.jobid(),append_to_jobid=append_to_jobid) - if ( job!=oldjob ): - delta = self._job_set_fields(job,system,oldjob,append_to_jobid=append_to_jobid) - if ( delta is not None ): + if append_to_jobid is not None: + myjobid = job.jobid() + append_to_jobid + oldjob = self.get_job(job.jobid(), append_to_jobid=append_to_jobid) + if job != oldjob: + delta = self._job_set_fields( + job, system, oldjob, append_to_jobid=append_to_jobid + ) + if delta is not None: deltalist = [] for key in sorted(delta.keys()): - deltalist.append("%s=%s" % (key,delta[key])) - sql = "UPDATE %s SET %s WHERE jobid='%s'" % (self.getJobsTable(),", ".join(deltalist),myjobid) - if ( noop ): + deltalist.append("%s=%s" % (key, delta[key])) + sql = "UPDATE %s SET %s WHERE jobid='%s'" % ( + self.getJobsTable(), + ", ".join(deltalist), + myjobid, + ) + if noop: logger.debug("%s" % sql) else: try: @@ -1371,203 +1576,237 @@ def update_job(self,job,system=None,check_existance=True,noop=False,append_to_jo logger.debug("%s" % sql) logger.error(str(e)) - def insert_or_update_job(self,job,system=None,noop=False,append_to_jobid=None): - if ( not isinstance(job,jobinfo) ): - raise TypeError("\"job\" object is of wrong type: %s" % str(job)) - if ( self.job_exists(job.jobid(),append_to_jobid=append_to_jobid) ): - self.update_job(job,system,check_existance=False,noop=noop,append_to_jobid=append_to_jobid) + def insert_or_update_job(self, job, system=None, noop=False, append_to_jobid=None): + if not isinstance(job, jobinfo): + raise TypeError('"job" object is of wrong type: %s' % str(job)) + if self.job_exists(job.jobid(), append_to_jobid=append_to_jobid): + self.update_job( + job, + system, + check_existance=False, + noop=noop, + append_to_jobid=append_to_jobid, + ) else: - self.insert_job(job,system,check_existance=False,noop=noop,append_to_jobid=append_to_jobid) - - def get_job(self,jobid,noop=False,append_to_jobid=None): + self.insert_job( + job, + system, + check_existance=False, + noop=noop, + append_to_jobid=append_to_jobid, + ) + + def get_job(self, jobid, noop=False, append_to_jobid=None): myjobid = jobid - if ( append_to_jobid is not None ): - myjobid = jobid+append_to_jobid - if ( self.job_exists(myjobid) ): - sql = "SELECT * FROM %s WHERE jobid='%s'" % (self.getJobsTable(),myjobid) - if ( noop ): + if append_to_jobid is not None: + myjobid = jobid + append_to_jobid + if self.job_exists(myjobid): + sql = "SELECT * FROM %s WHERE jobid='%s'" % (self.getJobsTable(), myjobid) + if noop: logger.debug("%s" % sql) return None else: self.cursor().execute(sql) results = self.cursor().fetchall() - if ( len(results)==0 ): + if len(results) == 0: return None - elif ( len(results)==1 ): + elif len(results) == 1: columns = [] for desc in self.cursor().description: columns.append(desc[0]) resources = {} - ngpus = 0 result = list(results[0]) for i in range(len(result)): - if ( columns[i] in ["account","jobname","queue","system"] and - result[i] is not None ): + if ( + columns[i] in ["account", "jobname", "queue", "system"] + and result[i] is not None + ): resources[columns[i]] = str(result[i]) - elif ( columns[i]=="username" and - result[i] is not None ): + elif columns[i] == "username" and result[i] is not None: resources["user"] = str(result[i]) - elif ( columns[i]=="groupname" and - result[i] is not None ): + elif columns[i] == "groupname" and result[i] is not None: resources["group"] = str(result[i]) - elif ( columns[i]=="submithost" and - result[i] is not None ): - if ( "user" in resources ): - resources["owner"] = resources["user"]+"@"+str(result[i]) - elif ( columns[i]=="submit_ts" and - result[i]>0 ): + elif columns[i] == "submithost" and result[i] is not None: + if "user" in resources: + resources["owner"] = ( + resources["user"] + "@" + str(result[i]) + ) + elif columns[i] == "submit_ts" and result[i] > 0: resources["ctime"] = str(result[i]) resources["qtime"] = str(result[i]) - elif ( columns[i]=="eligible_ts" and - result[i]>0 ): + elif columns[i] == "eligible_ts" and result[i] > 0: resources["etime"] = str(result[i]) - elif ( columns[i]=="start_ts" and - result[i]>0 ): + elif columns[i] == "start_ts" and result[i] > 0: resources["start"] = str(result[i]) - elif ( columns[i]=="end_ts" and - result[i]>0 ): + elif columns[i] == "end_ts" and result[i] > 0: resources["end"] = str(result[i]) - elif ( columns[i]=="start_count" and - result[i]>0 ): + elif columns[i] == "start_count" and result[i] > 0: resources["start_count"] = str(result[i]) - elif ( columns[i]=="hostlist" and - result[i] is not None ): + elif columns[i] == "hostlist" and result[i] is not None: resources["exec_host"] = str(result[i]) - elif ( columns[i]=="exit_status" and - result[i] is not None ): + elif columns[i] == "exit_status" and result[i] is not None: resources["Exit_status"] = str(result[i]) - elif ( columns[i]=="cput_req" ): - if ( isinstance(result[i],datetime.timedelta) ): - if ( result[i].days>0 and result[i].seconds>0 ): - resources["Resource_List.cput"] = sec_to_time(24*3600*result[i].days+result[i].seconds) + elif columns[i] == "cput_req": + if isinstance(result[i], datetime.timedelta): + if result[i].days > 0 and result[i].seconds > 0: + resources["Resource_List.cput"] = sec_to_time( + 24 * 3600 * result[i].days + result[i].seconds + ) else: resources["Resource_List.cput"] = None - elif ( isinstance(result[i],int) ): - if ( result[i]>0 ): - resources["Resource_List.cput"] = sec_to_time(result[i]) + elif isinstance(result[i], int): + if result[i] > 0: + resources["Resource_List.cput"] = sec_to_time( + result[i] + ) else: resources["Resource_List.cput"] = None else: resources["Resource_List.cput"] = str(result[i]) - elif ( columns[i]=="cput_req_sec" ): - if ( "Resource_List.cput" not in resources or - ( "Resource_List.cput" in resources and - result[i] is not None and - int(result[i])>time_to_sec(resources["Resource_List.cput"]) ) ): + elif columns[i] == "cput_req_sec": + if "Resource_List.cput" not in resources or ( + "Resource_List.cput" in resources + and result[i] is not None + and int(result[i]) + > time_to_sec(resources["Resource_List.cput"]) + ): resources["Resource_List.cput"] = sec_to_time(result[i]) - elif ( columns[i]=="feature" and - result[i] is not None ): + elif columns[i] == "feature" and result[i] is not None: resources["Resource_List.feature"] = str(result[i]) - elif ( columns[i]=="gattr" and - result[i] is not None ): + elif columns[i] == "gattr" and result[i] is not None: resources["Resource_List.gattr"] = str(result[i]) - elif ( columns[i]=="gres" and - result[i] is not None ): + elif columns[i] == "gres" and result[i] is not None: resources["Resource_List.gres"] = str(result[i]) - elif ( columns[i]=="gres" and - result[i] is not None ): + elif columns[i] == "gres" and result[i] is not None: resources["Resource_List.gres"] = str(result[i]) - elif ( columns[i]=="mem_req" and - result[i] is not None ): + elif columns[i] == "mem_req" and result[i] is not None: resources["Resource_List.mem"] = str(result[i]) - elif ( columns[i]=="mppe" and - result[i] is not None ): + elif columns[i] == "mppe" and result[i] is not None: resources["Resource_List.mppe"] = str(result[i]) resources["resources_used.mppe"] = str(result[i]) - elif ( columns[i]=="mppssp" and - result[i] is not None ): + elif columns[i] == "mppssp" and result[i] is not None: resources["Resource_List.mppssp"] = str(result[i]) resources["resources_used.mppssp"] = str(result[i]) - elif ( columns[i]=="nodes" and - result[i] is not None ): + elif columns[i] == "nodes" and result[i] is not None: resources["Resource_List.nodes"] = str(result[i]) resources["Resource_List.neednodes"] = str(result[i]) - elif ( columns[i]=="nodect" and - result[i]>0 ): + elif columns[i] == "nodect" and result[i] > 0: resources["Resource_List.nodect"] = str(result[i]) resources["unique_node_count"] = str(result[i]) - elif ( columns[i]=="qos" and - result[i] is not None ): + elif columns[i] == "qos" and result[i] is not None: resources["Resource_List.qos"] = str(result[i]) - elif ( columns[i]=="vmem_req" and - result[i] is not None ): + elif columns[i] == "vmem_req" and result[i] is not None: resources["Resource_List.vmem"] = str(result[i]) - elif ( columns[i]=="walltime_req" ): - if ( "Resource_List.walltime" not in resources ): - if ( isinstance(result[i],datetime.timedelta) ): - resources["Resource_List.walltime"] = sec_to_time(24*3600*result[i].days+result[i].seconds) - elif ( isinstance(result[i],int) ): - resources["Resource_List.walltime"] = sec_to_time(result[i]) + elif columns[i] == "walltime_req": + if "Resource_List.walltime" not in resources: + if isinstance(result[i], datetime.timedelta): + resources["Resource_List.walltime"] = sec_to_time( + 24 * 3600 * result[i].days + result[i].seconds + ) + elif isinstance(result[i], int): + resources["Resource_List.walltime"] = sec_to_time( + result[i] + ) else: resources["Resource_List.walltime"] = str(result[i]) - elif ( columns[i]=="walltime_req_sec" ): - if ( "Resource_List.walltime" not in resources or - ( "Resource_List.walltime" in resources and - result[i] is not None and - int(result[i])>time_to_sec(resources["Resource_List.walltime"]) ) ): - resources["Resource_List.walltime"] = sec_to_time(result[i]) - - elif ( columns[i]=="cput" ): - if ( "resources_used.cput" not in resources ): - if ( isinstance(result[i],datetime.timedelta) ): - resources["resources_used.cput"] = sec_to_time(24*3600*result[i].days+result[i].seconds) - elif ( isinstance(result[i],int) ): - resources["resources_used.cput"] = sec_to_time(result[i]) + elif columns[i] == "walltime_req_sec": + if "Resource_List.walltime" not in resources or ( + "Resource_List.walltime" in resources + and result[i] is not None + and int(result[i]) + > time_to_sec(resources["Resource_List.walltime"]) + ): + resources["Resource_List.walltime"] = sec_to_time( + result[i] + ) + + elif columns[i] == "cput": + if "resources_used.cput" not in resources: + if isinstance(result[i], datetime.timedelta): + resources["resources_used.cput"] = sec_to_time( + 24 * 3600 * result[i].days + result[i].seconds + ) + elif isinstance(result[i], int): + resources["resources_used.cput"] = sec_to_time( + result[i] + ) else: resources["resources_used.cput"] = str(result[i]) - elif ( columns[i]=="cput_sec" ): - if ( "resources_used.cput" not in resources or - ( "resources_used.cput" in resources and - result[i] is not None and - int(result[i])>time_to_sec(resources["resources_used.cput"]) ) ): - resources["resources_used.cput"] = sec_to_time(result[i]) - elif ( columns[i]=="energy" ): + elif columns[i] == "cput_sec": + if "resources_used.cput" not in resources or ( + "resources_used.cput" in resources + and result[i] is not None + and int(result[i]) + > time_to_sec(resources["resources_used.cput"]) + ): + resources["resources_used.cput"] = sec_to_time( + result[i] + ) + elif columns[i] == "energy": resources["resources_used.energy_used"] = str(result[i]) - elif ( columns[i]=="mem_kb" and - result[i] is not None ): - resources["resources_used.mem"] = str(result[i])+"kb" - elif ( columns[i]=="vmem_kb" and - result[i] is not None ): - resources["resources_used.vmem"] = str(result[i])+"kb" - elif ( columns[i]=="walltime" ): - if ( "resources_used.walltime" not in resources ): - if ( isinstance(result[i],datetime.timedelta) ): - resources["resources_used.walltime"] = sec_to_time(24*3600*result[i].days+result[i].seconds) - elif ( isinstance(result[i],int) ): - resources["resources_used.walltime"] = sec_to_time(result[i]) + elif columns[i] == "mem_kb" and result[i] is not None: + resources["resources_used.mem"] = str(result[i]) + "kb" + elif columns[i] == "vmem_kb" and result[i] is not None: + resources["resources_used.vmem"] = str(result[i]) + "kb" + elif columns[i] == "walltime": + if "resources_used.walltime" not in resources: + if isinstance(result[i], datetime.timedelta): + resources["resources_used.walltime"] = sec_to_time( + 24 * 3600 * result[i].days + result[i].seconds + ) + elif isinstance(result[i], int): + resources["resources_used.walltime"] = sec_to_time( + result[i] + ) else: - resources["resources_used.walltime"] = str(result[i]) - elif ( columns[i]=="walltime_sec" ): - if ( "resources_used.walltime" not in resources or - ( "resources_used.walltime" in resources and - result[i] is not None and - int(result[i])>time_to_sec(resources["resources_used.walltime"]) ) ): - resources["resources_used.walltime"] = sec_to_time(result[i]) - elif ( columns[i]=="nproc" ): + resources["resources_used.walltime"] = str( + result[i] + ) + elif columns[i] == "walltime_sec": + if "resources_used.walltime" not in resources or ( + "resources_used.walltime" in resources + and result[i] is not None + and int(result[i]) + > time_to_sec(resources["resources_used.walltime"]) + ): + resources["resources_used.walltime"] = sec_to_time( + result[i] + ) + elif columns[i] == "nproc": resources["total_execution_slots"] = str(result[i]) - elif ( columns[i]=="ngpus" ): + elif columns[i] == "ngpus": resources["ngpus"] = int(result[i]) - elif ( columns[i]=="software" and result[i] is not None ): + elif columns[i] == "software" and result[i] is not None: resources["Resource_List.software"] = str(result[i]) - elif ( columns[i]=="script" and result[i] is not None ): + elif columns[i] == "script" and result[i] is not None: resources["script"] = str(result[i]) - if ( "ctime" in resources ): + if "ctime" in resources: updatetime = int(resources["ctime"]) else: updatetime = 0 state = "Q" - if ( "start" in resources ): - if ( int(resources["start"])>updatetime ): + if "start" in resources: + if int(resources["start"]) > updatetime: updatetime = resources["start"] state = "S" - if ( "end" in resources ): - if ( int(resources["end"])>updatetime ): + if "end" in resources: + if int(resources["end"]) > updatetime: updatetime = int(resources["end"]) state = "E" - job = jobinfo(myjobid,datetime.datetime.fromtimestamp(float(updatetime)).strftime("%m/%d/%Y %H:%M:%S"),state,resources) + job = jobinfo( + myjobid, + datetime.datetime.fromtimestamp(float(updatetime)).strftime( + "%m/%d/%Y %H:%M:%S" + ), + state, + resources, + ) return job else: - raise RuntimeError("More than one result for jobid %s (should not be possible)" % jobid) + raise RuntimeError( + "More than one result for jobid %s (should not be possible)" + % jobid + ) # class pbsacctDBTestCase(unittest.TestCase): @@ -1579,5 +1818,5 @@ def get_job(self,jobid,noop=False,append_to_jobid=None): if __name__ == "__main__": unittest.main() - #import glob - #print str(jobs_from_files(glob.glob("/users/sysp/amaharry/acct-data/201603*"))) + # import glob + # print str(jobs_from_files(glob.glob("/users/sysp/amaharry/acct-data/201603*"))) diff --git a/src/python/setup.py b/src/python/setup.py index 1fa02d4..8233ebc 100644 --- a/src/python/setup.py +++ b/src/python/setup.py @@ -1,30 +1,38 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 from setuptools import setup # From https://packaging.python.org/guides/making-a-pypi-friendly-readme/ from os import path + this_directory = path.abspath(path.dirname(__file__)) -with open(path.join(this_directory, 'README.rst')) as f: +with open(path.join(this_directory, "README.rst")) as f: long_description = f.read() -setup (name = "pbsacct", - version = "3.4.6", - description = "Python library for parsing PBS accounting logs", - long_description = long_description, - long_description_content_type = 'text/x-rst', - author = "Troy Baer", - author_email = "tabaer@gmail.com", - url = "https://github.com/tabaer/pbstools", - packages = ['pbsacct'], - zip_safe = False, - license = "GPL v2", - classifiers = [ - 'Development Status :: 4 - Beta', - 'Intended Audience :: System Administrators', - 'License :: OSI Approved :: GNU General Public License v2 (GPLv2)', - 'Operating System :: POSIX', - 'Topic :: Scientific/Engineering :: Information Analysis', - 'Topic :: Software Development :: Libraries :: Python Modules' - ] - ) +setup( + name="pbsacct", + version="3.4.6", + description="Python library for parsing PBS accounting logs", + long_description=long_description, + long_description_content_type="text/x-rst", + author="Troy Baer", + author_email="tabaer@gmail.com", + url="https://github.com/tabaer/pbstools", + packages=["pbsacct"], + zip_safe=False, + license="GPL v2", + classifiers=[ + "Development Status :: 4 - Beta", + "Intended Audience :: System Administrators", + "License :: OSI Approved :: GNU General Public License v2 (GPLv2)", + "Operating System :: POSIX", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Topic :: Scientific/Engineering :: Information Analysis", + "Topic :: Software Development :: Libraries :: Python Modules", + ], + python_requires=">=3.6", +)