From 40cb58ff1cd2860369166ea1471e7ff0e0e55d0a Mon Sep 17 00:00:00 2001 From: Kenneth Acosta Date: Tue, 3 May 2022 21:50:39 -0400 Subject: [PATCH 1/3] name BLAST database as /data/VB12_2020 --- VB12Path_FunctionProfiler.PL | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VB12Path_FunctionProfiler.PL b/VB12Path_FunctionProfiler.PL index 8b73755..8ddff66 100644 --- a/VB12Path_FunctionProfiler.PL +++ b/VB12Path_FunctionProfiler.PL @@ -105,7 +105,7 @@ if ($method eq "diamond") { die "Only fasta files are supported by blast program!"; } @files = glob("$workdir/*$filetype"); - system("$formatdb -i ./data/VB12_2020.faa -p T"); + system("$formatdb -i ./data/VB12_2020.faa -p T -n ./data/VB12_2020"); foreach my $file (@files) { my $out = $file; $out =~ s/$filetype/blast/; From 3552d4e0febb1c1fd1103411f6fbb3ca89e56bad Mon Sep 17 00:00:00 2001 From: Kenneth Acosta Date: Fri, 6 May 2022 21:58:29 -0400 Subject: [PATCH 2/3] truncate target sequence labels from usearch --- VB12Path_FunctionProfiler.PL | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VB12Path_FunctionProfiler.PL b/VB12Path_FunctionProfiler.PL index 8ddff66..6f68029 100644 --- a/VB12Path_FunctionProfiler.PL +++ b/VB12Path_FunctionProfiler.PL @@ -13,7 +13,7 @@ my $diamond = "./bin/diamond"; my $diamond_parameters = "-k 1 -e 1e-4 -p 20 --sensitive"; ##usearch https://www.drive5.com/usearch/download.html my $usearch = "./bin/usearch8.1.1861_i86linux32"; -my $usearch_parameters = "-id 0.3"; ##identity cutoff for search_global +my $usearch_parameters = "-id 0.3 -trunclabels"; ##identity cutoff for search_global my ($workdir, $method, $outfile, $seqtype, $filetype, $sampleinfo, $randomsampling); From 0154097abccc98f85a98876eeebd44a960cc15d1 Mon Sep 17 00:00:00 2001 From: Kenneth Acosta Date: Fri, 6 May 2022 22:11:46 -0400 Subject: [PATCH 3/3] add faa filetype --- VB12Path_FunctionProfiler.PL | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/VB12Path_FunctionProfiler.PL b/VB12Path_FunctionProfiler.PL index 6f68029..16bae64 100644 --- a/VB12Path_FunctionProfiler.PL +++ b/VB12Path_FunctionProfiler.PL @@ -36,7 +36,7 @@ if ( !defined $workdir || !defined $seqtype || !defined $sampleinfo || $method !~ /^diamond|usearch|blast$/ - || $filetype !~ /^fastq|fastq.gz|fasta|fasta.gz|fq|fq.gz|fa|fa.gz$/) + || $filetype !~ /^fastq|fastq.gz|fasta|fasta.gz|fq|fq.gz|fa|fa.gz|faa$/) { &PrintHelp(); die; @@ -246,7 +246,7 @@ sub PrintHelp() { print "perl VB12Path_FunctionProfiler.PL -d -m -f -s -si -rs -o \n"; print "-m diamond|usearch|blast\n"; - print "-f fastq, fastq.gz, fasta,fasta.gz, fq, fq.gz, fa, fa.gz\n"; + print "-f fastq, fastq.gz, fasta, fasta.gz, fq, fq.gz, fa, fa.gz, faa\n"; print "-s sequence type, nucl or prot \n"; print "-si tab delimited file for sequence number in each file\n"; print "-rs random sampling size\n";