From ed18c753be09106e83113ba7d4debdec7f16b383 Mon Sep 17 00:00:00 2001 From: Chunfang Zheng Date: Tue, 14 Nov 2017 19:52:02 +0000 Subject: [PATCH 1/3] add a feature for a user-defined config folder --- src/assemble.py | 4 ++-- src/preprocess.py | 5 +++-- src/runPipeline.py | 15 ++++++++++----- src/utils.py | 23 +++++++++++++++++++++-- 4 files changed, 36 insertions(+), 11 deletions(-) diff --git a/src/assemble.py b/src/assemble.py index 5a79ee9..e10937c 100644 --- a/src/assemble.py +++ b/src/assemble.py @@ -514,8 +514,8 @@ def Assemble(input,output): matedString = "-mean %d -stddev %d -m %s/Preprocess/out/lib%d.seq.mates"%(lib.mean, lib.stdev, _settings.rundir, lib.id) run_process(_settings, "%s/convert-fasta-to-v2.pl -l %s %s -s %s/Preprocess/out/lib%d.seq -q %s/Preprocess/out/lib%d.seq.qual > %s/Preprocess/out/lib%d.frg"%(_settings.CA, lib.sid, matedString, _settings.rundir, lib.id, _settings.rundir, lib.id, _settings.rundir, lib.id),"Assemble") frglist += "%s/Preprocess/out/lib%d.frg "%(_settings.rundir, lib.id) - - run_process(_settings, "%s/runCA -p %s -d %s/Assemble/out/ -s %s/config/asm.spec %s %s"%(_settings.CA,_settings.PREFIX,_settings.rundir,_settings.METAMOS_UTILS,"stopAfter=terminator" if _settings.doscaffolding else "stopAfter=utgcns", frglist),"Assemble") + asm_path=getProgramParamsFile(_settings.METAMOS_UTILS, 'asm.spec') + run_process(_settings, "%s/runCA -p %s -d %s/Assemble/out/ -s %s %s %s"%(_settings.CA,_settings.PREFIX,_settings.rundir,asm_path,"stopAfter=terminator" if _settings.doscaffolding else "stopAfter=utgcns", frglist),"Assemble") #convert CA to AMOS run_process(_settings, "%s/gatekeeper -dumpfrg -allreads %s.gkpStore > %s.frg"%(_settings.CA, _settings.PREFIX, _settings.PREFIX),"Assemble") if _settings.doscaffolding: diff --git a/src/preprocess.py b/src/preprocess.py index 4efe427..ce06f6a 100644 --- a/src/preprocess.py +++ b/src/preprocess.py @@ -531,6 +531,7 @@ def Preprocess(input,output): reads = rf.read().split(">")[1:] quals = rq.read().split(">")[1:] + readcnt = 1 for currIndex, rd in enumerate(reads): @@ -684,8 +685,8 @@ def Preprocess(input,output): oldPath = os.environ["PATH"] os.environ["PATH"] = _settings.AMOS + os.pathsep + _settings.BLASR + os.pathsep + oldPath - - run_process(_settings, "%s/pacBioToCA -l lib%d -s %s/config/pacbio.blasr.spec -t %d -partitions 100 fastqFile=%s genomeSize=%s longReads=1 ovlThreads=%d merylThreads=%d cnsConcurrency=%d merylMemory=%s ovlStoreMemory=%s"%(_settings.CA, lib.id, _settings.METAMOS_UTILS, _settings.threads, lib.f1.path, genomeSize, _settings.threads, _settings.threads, _settings.threads, availableMem, availableMem), "Preprocess") + pacbio_path = getProgramParamsFile(_settings.METAMOS_UTILS, 'pacbio.blasr.spec') + run_process(_settings, "%s/pacBioToCA -l lib%d -s %s -t %d -partitions 100 fastqFile=%s genomeSize=%s longReads=1 ovlThreads=%d merylThreads=%d cnsConcurrency=%d merylMemory=%s ovlStoreMemory=%s"%(_settings.CA, lib.id, pacbio_path, _settings.threads, lib.f1.path, genomeSize, _settings.threads, _settings.threads, _settings.threads, availableMem, availableMem), "Preprocess") # subset longest 25X? # update library format and file names lib.f1.path = "%s/Preprocess/out/lib%d.fastq"%(_settings.rundir, lib.id) diff --git a/src/runPipeline.py b/src/runPipeline.py index 60ade55..1d054f5 100644 --- a/src/runPipeline.py +++ b/src/runPipeline.py @@ -198,7 +198,7 @@ def printConfiguration(fileName=None): conf.write(''.join(configurationText)) conf.close() -shortOptions = "hM:IR:rjwbd:s:e:o:k:c:a:n:p:qt:f:vm:4g:iu1l:x:yz:LBVX:S:" +shortOptions = "hM:IR:rjwbd:s:e:o:k:c:a:n:p:qt:f:vm:4g:iu1l:x:yz:LBVX:S:U:" longOptions = ["help", \ "multialigner",\ "isolate",\ @@ -234,7 +234,7 @@ def printConfiguration(fileName=None): "noblastdb",\ "version",\ "validator",\ - "asmscore"] + "asmscore", "user_config_dir"] try: opts, args = getopt.getopt(sys.argv[1:], shortOptions, longOptions) except getopt.GetoptError, err: @@ -252,14 +252,14 @@ def printConfiguration(fileName=None): supported_genecallers = ["fraggenescan","metagenemark","glimmermg"] supported_assemblers = ["newbler", "soapdenovo","soapdenovo2","ca","velvet","velvet-sc","metavelvet",\ "metaidba","sparseassembler","minimus"] -supported_assemblers.extend(generic.getSupportedList(utils.INITIAL_UTILS, utils.STEP_NAMES.ASSEMBLE)) +#supported_assemblers.extend(generic.getSupportedList(utils.INITIAL_UTILS, utils.STEP_NAMES.ASSEMBLE)) supported_mappers = ["bowtie","bowtie2"] supported_abundance = ["metaphyler"] supported_aligners = ["mgcat"] supported_classifiers = ["fcp","phylosift","phmmer","blast",\ "metaphyler", "phymm"] -supported_classifiers.extend(generic.getSupportedList(utils.INITIAL_UTILS, utils.STEP_NAMES.ANNOTATE)) +#supported_classifiers.extend(generic.getSupportedList(utils.INITIAL_UTILS, utils.STEP_NAMES.ANNOTATE)) supported_validators = ["reapr", "orf", "lap", "ale", "quast", "frcbam", "freebayes", "cgal", "n50"] supported_fannotate = ["blast"] supported_scaffolders = ["bambus2"] @@ -343,7 +343,10 @@ def printConfiguration(fileName=None): print "project dir %s does not exist!"%(settings.rundir) usage() sys.exit(1) - + elif o in ("-U","--user_config_dir"): + utils.user_config_dir = a +supported_assemblers.extend(generic.getSupportedList(utils.INITIAL_UTILS, utils.STEP_NAMES.ASSEMBLE)) +supported_classifiers.extend(generic.getSupportedList(utils.INITIAL_UTILS, utils.STEP_NAMES.ANNOTATE)) if not os.path.exists(settings.rundir) or settings.rundir == "": print "project dir %s does not exist!"%(settings.rundir) usage() @@ -684,6 +687,8 @@ def printConfiguration(fileName=None): savebtidx = True elif o in ("-L", "--localKrona"): utils.Settings.local_krona = True + elif o in ("-U","--user_config_dir"): + utils.user_config_dir = a else: assert False, "unhandled option" diff --git a/src/utils.py b/src/utils.py index 032b7fc..6d0c8b7 100644 --- a/src/utils.py +++ b/src/utils.py @@ -20,7 +20,7 @@ def resource_path(relative_path): base_path = os.path.abspath(".") return os.path.join(base_path, relative_path) - +user_config_dir ="" application_path = "" if getattr(sys, 'frozen', False): application_path = os.path.dirname(sys.executable) @@ -1228,7 +1228,7 @@ def getProgramParams(configDir, fileName, module="", prefix="", comment="#", sep # second: user home directory # third: metAMOS directory # a parameter specifeid in the current directory takes priority over all others, and so on down the line - dirs = [configDir + os.sep + "config", os.path.expanduser('~') + os.sep + ".metAMOS", os.getcwd()] + dirs = [configDir + os.sep + "config", os.path.expanduser('~') + os.sep + ".metAMOS", os.getcwd(), user_config_dir] optDict = {} cmdOptions = "" @@ -1424,3 +1424,22 @@ def translateToSRAURL(settings, name): if oldDyLD != "": os.environ["DYLD_FALLBACK_LIBRARY_PATH"] = oldDyLD return result + +def getProgramParamsFile(configDir, fileName): + # we process parameters in the following priority: + # first: current directory + # second: user home directory + # third: metAMOS directory + # a parameter specifeid in the current directory takes priority over all others, and so on down the line + #dirs = ["/home/chz001/config", configDir + os.sep + "config", os.path.expanduser('~') + os.sep + ".metAMOS", os.getcwd()] + dirs = [configDir + os.sep + "config", os.path.expanduser('~') + os.sep + ".metAMOS", os.getcwd(), user_config_dir] + + result_path = '' + for curDir in dirs: + curFile = curDir + os.sep + filename + print curFile + if os.path.exists(curFile): + result_path = curFile + + return result_path + From 62d4590cad93e2cd4453c893e4b76343cc6361df Mon Sep 17 00:00:00 2001 From: Chunfang Zheng Date: Tue, 14 Nov 2017 20:32:25 +0000 Subject: [PATCH 2/3] modify the comments for methods getProgramParams and getProgramParamsFile --- src/utils.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/utils.py b/src/utils.py index 6d0c8b7..f0de372 100644 --- a/src/utils.py +++ b/src/utils.py @@ -1224,10 +1224,11 @@ def getProgramCitations(settings, programName, comment="#"): def getProgramParams(configDir, fileName, module="", prefix="", comment="#", separator=""): # we process parameters in the following priority: - # first: current directory - # second: user home directory - # third: metAMOS directory - # a parameter specifeid in the current directory takes priority over all others, and so on down the line + # first: user_config_dir + # second: current directory + # third: user home directory + # fourth: metAMOS directory + # a parameter specifeid in user_config_dir takes priority over all others, and so on down the line dirs = [configDir + os.sep + "config", os.path.expanduser('~') + os.sep + ".metAMOS", os.getcwd(), user_config_dir] optDict = {} @@ -1427,11 +1428,12 @@ def translateToSRAURL(settings, name): def getProgramParamsFile(configDir, fileName): # we process parameters in the following priority: - # first: current directory - # second: user home directory - # third: metAMOS directory - # a parameter specifeid in the current directory takes priority over all others, and so on down the line - #dirs = ["/home/chz001/config", configDir + os.sep + "config", os.path.expanduser('~') + os.sep + ".metAMOS", os.getcwd()] + # first: user_config_dir + # second: current directory + # third: user home directory + # fourth: metAMOS directory + # a parameter specifeid in user_config_dir takes priority over all others, and so on down the line + dirs = [configDir + os.sep + "config", os.path.expanduser('~') + os.sep + ".metAMOS", os.getcwd(), user_config_dir] result_path = '' From 0ab5909c86c5c297286f90a0f25fd1fc7decc0e1 Mon Sep 17 00:00:00 2001 From: Chunfang Zheng Date: Mon, 20 Nov 2017 16:55:11 +0000 Subject: [PATCH 3/3] a bug caused by a type error --- src/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils.py b/src/utils.py index f0de372..a94bbf2 100644 --- a/src/utils.py +++ b/src/utils.py @@ -1438,7 +1438,7 @@ def getProgramParamsFile(configDir, fileName): result_path = '' for curDir in dirs: - curFile = curDir + os.sep + filename + curFile = curDir + os.sep + fileName print curFile if os.path.exists(curFile): result_path = curFile