From f773e106283e0cae44a8803aa11d93d323c422b1 Mon Sep 17 00:00:00 2001 From: thepatientwait Date: Mon, 14 Oct 2024 15:52:01 +1000 Subject: [PATCH] Update gather_tool_databases.smk to download metabuli_db from sharepoint automatically. --- README.md | 6 ------ gather_tool_databases.smk | 27 +++++++++++++++++++++++++-- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index d0eaf3d..12979ed 100644 --- a/README.md +++ b/README.md @@ -33,12 +33,6 @@ First, download the reference databases for each tool snakemake --snakefile gather_tool_databases.smk --use-conda -c 8 ``` -The Metabuli R207 database is downloaded separately. Download the tar.gz file from https://connectqutedu.sharepoint.com/:u:/s/metabuli_gtdb_207/EYk7N71mp-NAtET5_X_fBDABM6AC_DCbxGiDc2rdVVlNiw?e=Ra5rVZ and put it into a new folder `tool_reference_data/metabuli`. Then extract it with - -```bash -tar -xvf metabuli.tar.gz -``` - Then run the benchmarking, for instance #1 ```bash diff --git a/gather_tool_databases.smk b/gather_tool_databases.smk index 02f2300..888e8d0 100644 --- a/gather_tool_databases.smk +++ b/gather_tool_databases.smk @@ -33,9 +33,9 @@ map2b_db = os.path.join(map2b_checkout_dir, 'database/GTDB') # metabuli_db_dir = join(output_directory, 'metabuli') # metabuli_db = join(metabuli_db_dir, 'gtdb') -# tools = ['singlem', 'metaphlan', 'motus', 'kraken', 'sourmash', 'kaiju', 'map2b', 'metabuli'] +tools = ['singlem', 'metaphlan', 'motus', 'kraken', 'sourmash', 'kaiju', 'map2b', 'metabuli'] ## metabuli download is not scripted because it is via sharepoint, which gives an indirect link. -tools = ['singlem', 'metaphlan', 'motus', 'kraken', 'sourmash', 'kaiju', 'map2b'] +# tools = ['singlem', 'metaphlan', 'motus', 'kraken', 'sourmash', 'kaiju', 'map2b'] rule all: input: @@ -385,3 +385,26 @@ rule bench2_genomes_extract: """ cd 2_phylogenetic_novelty && tar -xzf bench2_genomes.tar.gz &> ../{log} """ + +rule download_metabuli: + output: + metabuli_tar = join(output_directory, 'metabuli', 'metabuli.tar.gz'), + log: + join(output_directory, 'metabuli.log') + shell: + """ + mkdir -p {output_directory}/metabuli + wget https://connectqutedu.sharepoint.com/:u:/s/metabuli_gtdb_207/EYk7N71mp-NAtET5_X_fBDABM6AC_DCbxGiDc2rdVVlNiw?download=1 -O {output.metabuli_tar} &> {log} + """ + +rule extract_metabuli: + input: + metabuli_tar = join(output_directory, 'metabuli', 'metabuli.tar.gz'), + params: + output_directory = join(output_directory, 'metabuli'), + output: + done=touch(join(output_directory, 'metabuli.done')), + log: + join(output_directory, 'metabuli-extract.log') + shell: + 'tar -xzf {input.metabuli_tar} -C {params.output_directory} &> {log}' \ No newline at end of file