From 6f4946ecad2610a50a409e6def84e3a33189639a Mon Sep 17 00:00:00 2001 From: Mariano Olivera Fedi Date: Wed, 31 May 2023 08:03:27 +0100 Subject: [PATCH 1/4] chore: changed tab per space and add requirements --- cromwell_configuration/slurm.conf | 8 ++++---- reat.yml | 2 ++ setup.py | 4 ++-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/cromwell_configuration/slurm.conf b/cromwell_configuration/slurm.conf index 26ec05a..0f0bda4 100644 --- a/cromwell_configuration/slurm.conf +++ b/cromwell_configuration/slurm.conf @@ -78,15 +78,15 @@ backend { submit = """ if [ "" == "${queue}" ] then - sbatch -J ${job_name} --constraint="${constraints}" -D ${cwd} -o ${out} -e ${err} -t ${runtime_minutes} \ - -p ei-medium \ + sbatch -J ${job_name} --constraint="${constraints}" -D ${cwd} -o ${out} -e ${err} -t ${runtime_minutes} \ + -p ei-medium \ ${"-c " + cpu} \ --mem ${memory_mb} \ --wrap "/bin/bash ${script}" else - sbatch -J ${job_name} --constraint="${constraints}" -D ${cwd} -o ${out} -e ${err} -t ${runtime_minutes} \ - -p ${queue} \ + sbatch -J ${job_name} --constraint="${constraints}" -D ${cwd} -o ${out} -e ${err} -t ${runtime_minutes} \ + -p ${queue} \ ${"-c " + cpu} \ --mem ${memory_mb} \ --wrap "/bin/bash diff --git a/reat.yml b/reat.yml index d1c438f..40423b9 100644 --- a/reat.yml +++ b/reat.yml @@ -31,4 +31,6 @@ dependencies: - snap=2013_11_29 - codingquarry=2.0 - augustus=3.4.0 + - helixer=0.3.1 - evidencemodeler=1.1.1 + diff --git a/setup.py b/setup.py index 54e1bb1..125280b 100644 --- a/setup.py +++ b/setup.py @@ -19,8 +19,8 @@ "Programming Language :: Python :: 3.8", ], license="MIT", - author="Luis Yanes, Gemy Kaithakottil", - author_email="luis.yanes@earlham.ac.uk, gemy.kaithakottil@earlham.ac.uk", + author="Luis Yanes, Gemy Kaithakottil, Mariano Olivera Fedi", + author_email="luis.yanes@earlham.ac.uk, gemy.kaithakottil@earlham.ac.uk, mariano.olivera-fedi@earlham.ac.uk", description="Robust Eukaryotic Annotation Toolkit", zip_safe=False, keywords="gene annotation WDL pipeline workflow cromwell transcriptome homology", From 41eddd0b1895dc8bc2e09ded57a5f83de6b1e444 Mon Sep 17 00:00:00 2001 From: Mariano Olivera Fedi Date: Wed, 31 May 2023 08:04:46 +0100 Subject: [PATCH 2/4] chore: add gpu specifications to config --- cromwell_configuration/slurm.conf | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/cromwell_configuration/slurm.conf b/cromwell_configuration/slurm.conf index 0f0bda4..42ba91e 100644 --- a/cromwell_configuration/slurm.conf +++ b/cromwell_configuration/slurm.conf @@ -73,20 +73,30 @@ backend { Int memory_mb = 8000 String? constraints String? queue = "ei-medium" + Int? gpu """ submit = """ if [ "" == "${queue}" ] + then + sbatch -J ${job_name} --constraint="${constraints}" -D ${cwd} -o ${out} -e ${err} -t ${runtime_minutes} \ + -p ei-medium \ + ${"-c " + cpu} \ + --mem ${memory_mb} \ + --wrap "/bin/bash + ${script}" + elif [ "${queue}" == "ei-a100" || "${queue}" == "ei-gpu" ] #gpu specific nodes in the cluster then sbatch -J ${job_name} --constraint="${constraints}" -D ${cwd} -o ${out} -e ${err} -t ${runtime_minutes} \ - -p ei-medium \ + -p "${queue}" \ ${"-c " + cpu} \ --mem ${memory_mb} \ + --gres=gpu:"${gpu}" --wrap "/bin/bash ${script}" else - sbatch -J ${job_name} --constraint="${constraints}" -D ${cwd} -o ${out} -e ${err} -t ${runtime_minutes} \ - -p ${queue} \ + sbatch -J ${job_name} --constraint="${constraints}" -D ${cwd} -o ${out} -e ${err} -t ${runtime_minutes} \ + -p ${queue} \ ${"-c " + cpu} \ --mem ${memory_mb} \ --wrap "/bin/bash From 6f1af99cffe72e08514b04727f92213d450f6e72 Mon Sep 17 00:00:00 2001 From: Mariano Olivera Fedi Date: Wed, 31 May 2023 10:40:42 +0100 Subject: [PATCH 3/4] feat: add helixer to prediction module --- .bumpversion.cfg | 2 +- annotation/__init__.py | 2 +- annotation/prediction.py | 26 ++++++++- annotation/prediction_module/main.wdl | 79 ++++++++++++++++++++++++++- setup.py | 2 +- singularity/build_image.sh | 2 +- 6 files changed, 106 insertions(+), 7 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 85d41c6..7e84097 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.6.1 +current_version = 0.7.1 commit = True tag = True diff --git a/annotation/__init__.py b/annotation/__init__.py index c161714..916b518 100644 --- a/annotation/__init__.py +++ b/annotation/__init__.py @@ -7,7 +7,7 @@ from Mikado.parsers import parser_factory from Mikado.transcripts import Gene, Transcript -VERSION = '0.6.1' +VERSION = '0.7.1' RUN_METADATA = "run_details.json" UTR_SELECTION_OPTIONS = ('augustus', 'gold', 'silver', 'bronze', 'all', 'hq_assembly', 'lq_assembly') diff --git a/annotation/prediction.py b/annotation/prediction.py index d965b39..d7efc96 100644 --- a/annotation/prediction.py +++ b/annotation/prediction.py @@ -59,6 +59,9 @@ def combine_arguments_prediction(cli_arguments): if cli_arguments.repeats: cromwell_inputs['ei_prediction.repeats_gff'] = cli_arguments.repeats.name + if cli_arguments.helixer_model: + cromwell_inputs['ei_prediction.helixer_model'] = cli_arguments.helixer_model + if cli_arguments.force_train: cromwell_inputs['ei_prediction.force_train'] = cli_arguments.force_train @@ -89,6 +92,9 @@ def combine_arguments_prediction(cli_arguments): if cli_arguments.mikado_utr_files: cromwell_inputs['ei_prediction.mikado_utr_files'] = ' '.join(cli_arguments.mikado_utr_files) + if cli_arguments.temp_dir: + cromwell_inputs['ei_prediction.temp_dir'] = cli_arguments.temp_dir + if cli_arguments.do_glimmer: cromwell_inputs['ei_prediction.do_glimmer'] = 'true' if cli_arguments.do_glimmer is not True and os.access(cli_arguments.do_glimmer, os.R_OK): @@ -103,7 +109,10 @@ def combine_arguments_prediction(cli_arguments): cromwell_inputs['ei_prediction.do_codingquarry'] = 'true' if cli_arguments.do_codingquarry is not True and os.access(cli_arguments.do_codingquarry, os.R_OK): cromwell_inputs['ei_prediction.codingquarry_training'] = cli_arguments.do_codingquarry - + + if cli_arguments.do_helixer: + cromwell_inputs['ei_prediction.do_helixer'] = 'true' + if cli_arguments.no_augustus and cli_arguments.no_augustus is False: cromwell_inputs['ei_prediction.do_augustus'] = 'false' @@ -222,8 +231,11 @@ def combine_arguments_prediction(cli_arguments): cromwell_inputs['ei_prediction.snap_extra_params'] = cli_arguments.snap_extra_params if cli_arguments.augustus_extra_params: cromwell_inputs['ei_prediction.augustus_extra_params'] = cli_arguments.augustus_extra_params + if cli_arguments.helixer_extra_params: + cromwell_inputs['ei_prediction.helixer_extra_params'] = cli_arguments.helixer_extra_params if cli_arguments.evm_extra_params: cromwell_inputs['ei_prediction.evm_extra_params'] = cli_arguments.evm_extra_params + if cli_arguments.mikado_config: cromwell_inputs['ei_prediction.mikado_config'] = cli_arguments.mikado_config.name @@ -246,7 +258,8 @@ def collect_prediction_output(run_metadata): if outputs['ei_prediction.glimmer']: symlink(outputs_path, outputs['ei_prediction.glimmer']) - + if outputs['ei_prediction.helixer']: + symlink(outputs_path, outputs['ei_prediction.helixer']) if outputs['ei_prediction.snap']: symlink(outputs_path, outputs['ei_prediction.snap']) if outputs['ei_prediction.codingquarry']: @@ -271,6 +284,7 @@ def collect_prediction_output(run_metadata): snap_prediction_path = os.path.join(predictions_path, "SNAP") codingquarry_prediction_path = os.path.join(predictions_path, "CodingQuarry") augustus_prediction_path = os.path.join(predictions_path, "Augustus") + helixer_prediction_path = os.path.join(predictions_path, "Helixer") if not os.path.exists(predictions_path): os.mkdir(predictions_path) @@ -303,6 +317,14 @@ def collect_prediction_output(run_metadata): else: if os.path.exists(glimmer_prediction_path): shutil.rmtree(glimmer_prediction_path) + + if outputs['ei_prediction.predictions_helixer']: + if not os.path.exists(helixer_prediction_path): + os.mkdir(helixer_prediction_path) + symlink(helixer_prediction_path, outputs['ei_prediction.predictions_helixer']) + else: + if os.path.exists(helixer_prediction_path): + shutil.rmtree(helixer_prediction_path) if outputs['ei_prediction.predictions_augustus']: if not os.path.exists(augustus_prediction_path): diff --git a/annotation/prediction_module/main.wdl b/annotation/prediction_module/main.wdl index 9598891..6db6d4d 100644 --- a/annotation/prediction_module/main.wdl +++ b/annotation/prediction_module/main.wdl @@ -15,7 +15,10 @@ workflow ei_prediction { Boolean do_glimmer = false Directory? glimmer_training Boolean do_snap = false + Boolean do_helixer = false File? snap_training + String? temp_dir + Boolean do_augustus = true Array[File]? transcriptome_models # Classify and divide into Gold, Silver and Bronze Array[File]? homology_models # Take as is @@ -31,6 +34,8 @@ workflow ei_prediction { File? repeats_gff # These are passed through to augustus File? extra_training_models # These models are taken as-is directly as results from the training model selection + File? helixer_model #model used for helixer gene model prediction + Int flank = 200 Int kfold = 8 @@ -51,7 +56,7 @@ workflow ei_prediction { String? snap_extra_params String? augustus_extra_params String? evm_extra_params - + String? helixer_extra_params RuntimeAttr augustus_resources } @@ -262,6 +267,18 @@ workflow ei_prediction { Boolean train_utr = select_first([train_utr_, false]) Int total_models = select_first([num_models, 0]) + #Run helixer predictions if GPU is available + if (do_helixer) { + call Helixer { + input: + genome = def_reference_genome, + model = helixer_model, + temp_dir = temp_dir, + species = species, + extra_params = helixer_extra_params + } + } + # Generate CodingQuarry predictions # Considers lowercase as masked by default if (total_models > 1500 && do_codingquarry) { @@ -421,6 +438,7 @@ workflow ei_prediction { augustus_predictions = def_augustus_predictions, snap_predictions = SNAP.predictions, glimmer_predictions = GlimmerHMM.predictions, + helixer_predictions = Helixer.predictions, codingquarry_predictions = CodingQuarry.predictions, codingquarry_fresh_predictions = CodingQuarryFresh.predictions, hq_protein_alignments = hq_protein.processed_gff, @@ -514,6 +532,7 @@ workflow ei_prediction { File? codingquarry = EVM.formatted_codingquarry_predictions File? codingquarry_fresh = EVM.formatted_codingquarry_fresh_predictions File? augustus_abinitio = EVM.formatted_augustus_abinitio_predictions + File? helixer = EVM.formatted_helixer_predictions Array[File]? augustus = EVM.formatted_augustus_runs_predictions File evm_predictions = CombineEVM.predictions File mikado_loci = MikadoPick.loci @@ -1157,6 +1176,56 @@ task GlimmerHMM { >>> } + +task Helixer { + input { + IndexedReference genome + File model + Int sequence_length + String species + String temp_dir + RuntimeAttr? resources + } + + RuntimeAttr default_attr = object { + constraints: "avx|avx2|sse4", + cpu_cores: 8, + mem_gb: 64, + max_retries: 1, + queue: "ei-a100,ei-gpu" + } + + RuntimeAttr runtime_attr = select_first([resources, default_attr]) + + Int cpus = select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) + Int gpus = select_first([runtime_attr.gpu, default_attr.gpu]) + + runtime { + cpu: cpus + memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GB" + constraints: select_first([runtime_attr.constraints, default_attr.constraints]) + maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) + queue: select_first([runtime_attr.queue, default_attr.queue]) + gpu: gpus + } + + + output { + File predictions = "helixer.predictions.gff3" + } + + command <<< + set -euxo pipefail + ln -s ~{genome.fasta} + Helixer.py --model-filepath \ + --temporary-dir ~{temp_dir} \ + --species ~{species} \ + --fasta-path ~{genome.fasta} \ + ~{helixer_extra_params} \ + --gff-output-path helixer.predictions.gff3 + >>> +} + task GenerateModelProteins { input { IndexedReference genome @@ -1393,6 +1462,7 @@ task EVM { File? augustus_abinitio File? snap_predictions File? glimmer_predictions + File? helixer_predictions File? codingquarry_predictions File? codingquarry_fresh_predictions File? hq_protein_alignments @@ -1426,6 +1496,7 @@ task EVM { File? formatted_codingquarry_fresh_predictions = "codingquarry_fresh.predictions.gff" File? formatted_augustus_abinitio_predictions = "augustus_abinitio.predictions.gff" Array[File]? formatted_augustus_runs_predictions = glob("augustus_*.predictions.gff") + File? formatted_helixer_predictions = "helixer.predictions.gff" File? formatted_snap_predictions_stats = "snap.predictions.stats" File? formatted_glimmer_predictions_stats = "glimmer.predictions.stats" @@ -1433,6 +1504,7 @@ task EVM { File? formatted_codingquarry_fresh_predictions_stats = "codingquarry_fresh.predictions.stats" File? formatted_augustus_abinitio_predictions_stats = "augustus_abinitio.predictions.stats" Array[File]? formatted_augustus_runs_predictions_stats = glob("augustus_*.predictions.stats") + File? formatted_helixer_predictions_stats = "helixer.predictions.gff.stats" } command <<< @@ -1487,6 +1559,11 @@ task EVM { mikado util stats augustus_abinitio.predictions.gff augustus_abinitio.predictions.stats fi + if [ "~{helixer_predictions}" != "" ]; then + cat ~{helixer_predictions} | gff_to_evm glimmer | tee helixer.predictions.gff >> predictions.gff + mikado util stats helixer.predictions.gff helixer.predictions.stats + fi + augustus_run=0 for i in ~{sep=" " augustus_predictions}; do ((augustus_run++)) diff --git a/setup.py b/setup.py index 125280b..8ed8915 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name="reat", - version="0.6.1", + version="0.7.1", packages=find_packages(".", exclude=["tests"]), url="https://github.com/ei-corebioinformatics/reat", classifiers=[ diff --git a/singularity/build_image.sh b/singularity/build_image.sh index e52908f..97a78e9 100644 --- a/singularity/build_image.sh +++ b/singularity/build_image.sh @@ -1,5 +1,5 @@ set -euxo -version=0.6.1 +version=0.7.1 rundir=$(dirname "$(realpath "$0")") cd "$(mktemp -d)" cp "${rundir}"/reat_singularity.def reat.def From 412cc34801b18df0358fe1772bf2f5946f11a5d6 Mon Sep 17 00:00:00 2001 From: Mariano Olivera Fedi Date: Wed, 31 May 2023 12:05:33 +0100 Subject: [PATCH 4/4] fix: remove helixer from pip requirements --- reat.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/reat.yml b/reat.yml index 40423b9..fe4358b 100644 --- a/reat.yml +++ b/reat.yml @@ -31,6 +31,5 @@ dependencies: - snap=2013_11_29 - codingquarry=2.0 - augustus=3.4.0 - - helixer=0.3.1 - evidencemodeler=1.1.1