diff --git a/data/count_ATs.sh b/data/count_ATs.sh index 6c0096a..8d9419e 100644 --- a/data/count_ATs.sh +++ b/data/count_ATs.sh @@ -8,7 +8,7 @@ fi export file=$1 -fasta_stats $file | \ +fasta_stats.py $file | \ grep 'unit:AT' | \ grep 'dinucleotide' | \ wc diff --git a/data/fasta_stats.py b/data/fasta_stats.py index 54847da..ba13353 100755 --- a/data/fasta_stats.py +++ b/data/fasta_stats.py @@ -4,67 +4,67 @@ import re if len(sys.argv) != 2: - print("Usage: fasta_dna_stats ") - print("This script is for informational purposes only, and requires that the input file be a DNA (As, Ts, Cs, and Gs) FASTA-formatted file.") - quit() + print("Usage: fasta_dna_stats ") + print("This script is for informational purposes only, and requires that the input file be a DNA (As, Ts, Cs, and Gs) FASTA-formatted file.") + quit() ####### Parse input def fasta_hash_from_file(filename: str) -> dict: - handle = open(filename, "r") - seqs_dict = dict() - current_id = "" - current_seq_list = list() - - for line in handle: - line_list = re.split(r"\s+", line.strip()) - if re.match(r"^>", line_list[0]): - if current_id != "": - seqs_dict[current_id] = "".join(current_seq_list) - current_id = re.subn(r"^>", "", line_list[0])[0] - current_seq_list = list() - else: - current_seq_list.append(line_list[0]) - - seqs_dict[current_id] = "".join(current_seq_list) - handle.close() - return seqs_dict + handle = open(filename, "r") + seqs_dict = dict() + current_id = "" + current_seq_list = list() + + for line in handle: + line_list = re.split(r"\s+", line.strip()) + if re.match(r"^>", line_list[0]): + if current_id != "": + seqs_dict[current_id] = "".join(current_seq_list) + current_id = re.subn(r"^>", "", line_list[0])[0] + current_seq_list = list() + else: + current_seq_list.append(line_list[0]) + + seqs_dict[current_id] = "".join(current_seq_list) + handle.close() + return seqs_dict def longest_perfect_repeat(seq: str) -> list: - result = re.finditer(r"(.{1,10}?)\1{1,}", seq) - max = 1 - maxrep = seq[0] - maxunit = seq[0] - for match in result: - unit = match.group(1) - if len(match.group(0)) > max and len(set(unit)) > 1: - max = len(match.group(0)) - maxrep = match.group(0) - maxunit = match.group(1) - - return [max, maxrep, maxunit] + result = re.finditer(r"(.{1,10}?)\1{1,}", seq) + max = 1 + maxrep = seq[0] + maxunit = seq[0] + for match in result: + unit = match.group(1) + if len(match.group(0)) > max and len(set(unit)) > 1: + max = len(match.group(0)) + maxrep = match.group(0) + maxunit = match.group(1) + + return [max, maxrep, maxunit] def gc_content(seq: str) -> float: - gc_count = seq.count("G") + seq.count("g") + seq.count("C") + seq.count("c") - gc = gc_count/len(seq) - return(round(gc,3)) + gc_count = seq.count("G") + seq.count("g") + seq.count("C") + seq.count("c") + gc = gc_count/len(seq) + return(round(gc,3)) def most_common_fivemer(seq: str) -> list: - mers = dict() - for index in range(0, len(seq) - 5 + 1): - fivemer = seq[index:index+5] - if fivemer in mers: - mers[fivemer] = mers[fivemer] + 1 - else: - mers[fivemer] = 1 - - max = 0 - maxmer = "" - for fivemer in mers: - if mers[fivemer] > max: - max = mers[fivemer] - maxmer = fivemer - - return [maxmer, max] + mers = dict() + for index in range(0, len(seq) - 5 + 1): + fivemer = seq[index:index+5] + if fivemer in mers: + mers[fivemer] = mers[fivemer] + 1 + else: + mers[fivemer] = 1 + + max = 0 + maxmer = "" + for fivemer in mers: + if mers[fivemer] > max: + max = mers[fivemer] + maxmer = fivemer + + return [maxmer, max] filename = sys.argv[1] seqs_dict = fasta_hash_from_file(filename) @@ -91,13 +91,13 @@ def most_common_fivemer(seq: str) -> list: types[10] = "decanucleotide" for id in seqs_dict: - print(f"Processing sequence ID {id}",file=sys.stderr) + print(f"Processing sequence ID {id}",file=sys.stderr) - seq = seqs_dict[id] - print(f"{id}\t{gc_content(seq)}\t{len(seq)}", end="\t") + seq = seqs_dict[id] + print(f"{id}\t{gc_content(seq)}\t{len(seq)}", end="\t") - maxmer = most_common_fivemer(seq) - print(f"{maxmer[0]}\t{maxmer[1]}", end="\t") - - longest_rep_list = longest_perfect_repeat(seq) - print(f"unit: {longest_rep_list[2]}\t{longest_rep_list[0]}\t{types[len(longest_rep_list[2])]}") \ No newline at end of file + maxmer = most_common_fivemer(seq) + print(f"{maxmer[0]}\t{maxmer[1]}", end="\t") + + longest_rep_list = longest_perfect_repeat(seq) + print(f"unit:{longest_rep_list[2]}\t{longest_rep_list[0]}\t{types[len(longest_rep_list[2])]}") \ No newline at end of file diff --git a/data/pz_blastx_yeast_top1.txt b/data/pz_blastx_yeast_top1.txt deleted file mode 100644 index 7197647..0000000 --- a/data/pz_blastx_yeast_top1.txt +++ /dev/null @@ -1,24 +0,0 @@ -PZ7180000000004_TY YKL081W 31.07 338 197 8 13 993 1 313 1e-32 124 -PZ1082_AB YHR104W 44.92 118 62 3 4 348 196 313 1e-26 100 -PZ11_FX YLR406C 53.01 83 38 1 290 42 25 106 7e-15 65.9 -PZ7180000036154 YNL245C 36.27 102 60 3 105 395 1 102 3e-07 46.2 -PZ605962 YKR079C 29.57 115 66 4 429 121 479 590 3e-11 59.3 -PZ856513 YKL215C 48.39 155 73 3 3 452 109 261 3e-37 135 -PZ7180000024771 YFL037W 75.73 103 25 0 849 541 323 425 5e-52 177 -PZ7180000021475 YFR050C 36.40 250 140 6 123 839 17 258 8e-45 154 -PZ7180000032158 YPL198W 68.83 77 24 0 232 2 61 137 6e-30 109 -PZ7180000024778 YKL088W 48.62 109 54 2 539 219 389 497 2e-26 106 -PZ7180000033595 YDR018C 25.27 273 163 9 847 125 113 376 1e-16 79.3 -PZ7180000036896 YKL095W 28.95 114 76 3 219 545 41 154 2e-08 52.8 -PZ789897_ATR YBR019C 33.09 136 77 7 379 5 14 146 1e-09 54.3 -PZ488295 YIL106W 35.00 140 87 3 410 3 93 232 1e-22 90.1 -PZ7180000023623 YMR234W 36.42 162 83 7 1055 624 188 347 5e-14 70.9 -PZ545_OM YPL131W 43.75 64 31 1 201 392 194 252 8e-09 51.6 -PZ7180000000106_L YNL202W 28.35 254 171 4 55 804 21 267 2e-21 90.1 -PZ7180000031008 YKL060C 55.81 129 56 1 395 9 176 303 1e-49 162 -PZ7180000028269_AFW YHR013C 62.22 45 17 0 247 113 148 192 1e-14 64.7 -PZ7180000000124_L YKL055C 27.75 173 95 5 538 77 114 275 2e-10 58.2 -PZ7180000000157_L YKR009C 26.54 260 153 8 746 6 10 244 3e-18 82.8 -PZ7180000000646_B YHR007C 31.01 129 86 2 398 21 281 409 2e-13 66.2 -PZ7180000000117_K YOR113W 39.25 107 63 1 481 167 591 697 9e-24 97.1 -PZ7180000037033_ADR YBR198C 26.07 211 109 7 219 842 523 689 8e-11 61.2 \ No newline at end of file diff --git a/data/pz_blastx_yeast_top10.txt b/data/pz_blastx_yeast_top10.txt index 0856239..1a1c1c7 100644 --- a/data/pz_blastx_yeast_top10.txt +++ b/data/pz_blastx_yeast_top10.txt @@ -1,1975 +1,1983 @@ -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000000004_TX nReads=26 cov=9.436 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000031590 nReads=3 cov=2.59465 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000027934 nReads=5 cov=2.32231 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ456916 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000037718 nReads=9 cov=6.26448 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000000004_TY nReads=86 cov=36.4238 # Database: orf_trans -# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# Fields: query acc.ver, subject acc.ver, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score # 3 hits found -PZ7180000000004_TY YKL081W 31.07 338 197 8 13 993 1 313 1e-32 124 -PZ7180000000004_TY YPL048W 30.70 342 196 9 13 993 1 316 1e-28 112 -PZ7180000000004_TY YGR201C 24.02 204 136 4 13 582 1 199 3e-12 63.5 -# BLASTX 2.2.30+ +PZ7180000000004_TY YKL081W 31.065 338 197 8 13 993 1 313 4.46e-33 124 +PZ7180000000004_TY YPL048W 30.702 342 196 9 13 993 1 316 9.26e-29 112 +PZ7180000000004_TY YGR201C 24.020 204 136 4 13 582 1 199 2.68e-12 63.5 +# BLASTX 2.14.0+ # Query: PZ7180000000067_AF nReads=16 cov=12.0608 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000031591 nReads=4 cov=3.26022 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000024036 nReads=14 cov=5.86079 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ15501_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ800059 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000037719 nReads=4 cov=2.62469 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000000004_TZ nReads=61 cov=31.2595 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000031592 nReads=4 cov=3.17598 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000027936 nReads=2 cov=1.60073 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000024037 nReads=26 cov=6.96922 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ1082_AB nReads=1 cov=1 # Database: orf_trans -# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# Fields: query acc.ver, subject acc.ver, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score # 6 hits found -PZ1082_AB YHR104W 44.92 118 62 3 4 348 196 313 1e-26 100 -PZ1082_AB YDR368W 39.00 100 50 1 1 300 196 284 2e-17 75.1 -PZ1082_AB YOR120W 36.00 100 53 1 1 300 196 284 7e-15 67.8 -PZ1082_AB YDL124W 35.16 128 70 7 7 366 184 306 1e-11 58.9 -PZ1082_AB YBR149W 29.29 99 59 1 4 300 218 305 2e-07 46.2 -PZ1082_AB YJR096W 25.25 99 68 3 7 300 166 259 6e-07 45.1 -# BLASTX 2.2.30+ +PZ1082_AB YHR104W 44.915 118 62 3 4 348 196 313 2.90e-27 100 +PZ1082_AB YDR368W 39.000 100 50 1 1 300 196 284 9.72e-18 75.1 +PZ1082_AB YOR120W 36.000 100 53 1 1 300 196 284 4.46e-15 67.8 +PZ1082_AB YDL124W 35.156 128 70 7 7 366 184 306 7.58e-12 58.9 +PZ1082_AB YBR149W 29.293 99 59 1 4 300 218 305 1.73e-07 46.2 +PZ1082_AB YJR096W 25.253 99 68 3 7 300 166 259 4.89e-07 45.1 +# BLASTX 2.14.0+ # Query: PZ7180000031593 nReads=5 cov=3.27273 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ456919 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ693347_EBM nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ342_JQ nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000024039 nReads=2 cov=2.0023 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000024719_APV nReads=9 cov=3.43203 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000031595 nReads=2 cov=1.3588 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000027939 nReads=2 cov=1.37482 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ17540_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000000447_A nReads=2 cov=1.52986 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000000447_B nReads=7 cov=3.98616 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000031596 nReads=2 cov=1.57426 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ11_FX nReads=1 cov=1 # Database: orf_trans -# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# Fields: query acc.ver, subject acc.ver, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score # 2 hits found -PZ11_FX YLR406C 53.01 83 38 1 290 42 25 106 7e-15 65.9 -PZ11_FX YDL075W 53.01 83 38 1 290 42 25 106 9e-15 65.5 -# BLASTX 2.2.30+ +PZ11_FX YLR406C 53.012 83 38 1 290 42 25 106 1.42e-19 65.9 +PZ11_FX YDL075W 53.012 83 38 1 290 42 25 106 1.71e-19 65.5 +# BLASTX 2.14.0+ # Query: PZ25894_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000031597 nReads=2 cov=1.79653 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000025470 nReads=12 cov=5.96825 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ31_GH nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000031598 nReads=2 cov=1.97531 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ14988_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000025471 nReads=4 cov=1.57106 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ13865_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000031599 nReads=2 cov=1.375 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000036150 nReads=3 cov=2.12525 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000025472 nReads=18 cov=7.143 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ640524 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000000600_B nReads=9 cov=3.7459 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ24242_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000036151 nReads=6 cov=2.66141 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000025473 nReads=7 cov=2.38076 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000025474 nReads=7 cov=3.86364 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ601258 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000025475 nReads=2 cov=1.55918 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000028772 nReads=2 cov=1.40778 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ15399_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ35181_B nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000000633_B nReads=5 cov=3.34205 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ25398_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ23512_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ662049 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000036154 nReads=4 cov=3.32031 # Database: orf_trans -# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# Fields: query acc.ver, subject acc.ver, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score # 1 hits found -PZ7180000036154 YNL245C 36.27 102 60 3 105 395 1 102 3e-07 46.2 -# BLASTX 2.2.30+ +PZ7180000036154 YNL245C 36.275 102 60 3 105 395 1 102 3.46e-07 46.2 +# BLASTX 2.14.0+ # Query: PZ7180000025476 nReads=4 cov=1.84059 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000022520 nReads=24 cov=6.4313 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000036155 nReads=17 cov=4.42362 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000025477 nReads=13 cov=5.1815 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ851952 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ434291 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000036156 nReads=2 cov=1.51107 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000025478 nReads=9 cov=4.37636 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000028775 nReads=5 cov=3.61232 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ732126 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000028457_BMW nReads=5 cov=3.06176 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000022522 nReads=6 cov=3.58447 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000025479 nReads=8 cov=2.64862 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000028776 nReads=4 cov=2.71399 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ15552_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ750331 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ333_CO nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000022523 nReads=28 cov=7.67314 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ801814 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000028777 nReads=3 cov=1.47983 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ830890 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ17053_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ22783_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000022524 nReads=36 cov=9.20119 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ463243 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000028778 nReads=9 cov=4.15691 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ525403 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000022525 nReads=11 cov=2.73147 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000028779 nReads=3 cov=1.58095 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ605962 nReads=1 cov=1 # Database: orf_trans -# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# Fields: query acc.ver, subject acc.ver, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score # 1 hits found -PZ605962 YKR079C 29.57 115 66 4 429 121 479 590 3e-11 59.3 -# BLASTX 2.2.30+ +PZ605962 YKR079C 29.565 115 66 4 429 121 479 590 1.13e-11 59.3 +# BLASTX 2.14.0+ # Query: PZ728940 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000030412 nReads=6 cov=3.56202 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ625652_CJK nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000000067_AY nReads=14 cov=5.1865 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ22254_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ20930_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000022527 nReads=11 cov=5.04943 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ841572 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000022528 nReads=8 cov=3.47335 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000030414 nReads=2 cov=1.76887 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ783221_AOC nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ532963 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ674165 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000022529 nReads=21 cov=5.55482 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000019700 nReads=3 cov=2.25532 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ462889 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ797650 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000019701 nReads=40 cov=22.5442 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000030416 nReads=2 cov=1.86179 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ856513 nReads=1 cov=1 # Database: orf_trans -# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# Fields: query acc.ver, subject acc.ver, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score # 1 hits found -PZ856513 YKL215C 48.39 155 73 3 3 452 109 261 3e-37 135 -# BLASTX 2.2.30+ +PZ856513 YKL215C 48.387 155 73 3 3 452 109 261 2.90e-38 135 +# BLASTX 2.14.0+ # Query: PZ725649 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ803611 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000019702 nReads=12 cov=8.68396 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000030417 nReads=2 cov=1.76136 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ16936_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000019703 nReads=7 cov=4.15682 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000030418 nReads=2 cov=1.70149 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ3202_E nReads=1 cov=1 # Database: orf_trans -# 0 hits found -# BLASTX 2.2.30+ +# Fields: query acc.ver, subject acc.ver, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# 1 hits found +PZ3202_E YBR247C 64.000 25 9 0 344 418 373 397 3.00e-09 40.4 +# BLASTX 2.14.0+ # Query: PZ729309 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ835452 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ19882_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ621267 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000019704 nReads=5 cov=3.39701 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000030419 nReads=2 cov=1.99686 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ817248 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ527201 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ20795_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ565168 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000019705 nReads=10 cov=4.33742 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ4990_P nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ467980 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000019706 nReads=4 cov=2.83127 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ701633 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000019707 nReads=10 cov=8.47059 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ21389_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ699332 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ598313 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ852490_ETW nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ22908_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ17500_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000036719_ARD nReads=7 cov=4.1479 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ457464_ZN nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ17539_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ821824 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ19001_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000000146_L nReads=30 cov=20.4413 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ25854_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ20981_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ614788 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ922_U nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000021471 nReads=48 cov=10.1479 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ439397 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ1028_K nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000032150 nReads=2 cov=1.91414 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ861581 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ770345 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ794706 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000021473 nReads=44 cov=8.81014 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000024770 nReads=23 cov=7.9627 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ13077_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000032152 nReads=3 cov=2.07776 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000021474 nReads=46 cov=11.7995 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000024771 nReads=12 cov=4.81316 # Database: orf_trans -# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# Fields: query acc.ver, subject acc.ver, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score # 3 hits found -PZ7180000024771 YFL037W 75.73 103 25 0 849 541 323 425 5e-52 177 -PZ7180000024771 YML085C 34.58 107 62 1 846 550 327 433 5e-18 81.6 -PZ7180000024771 YML124C 34.58 107 62 1 846 550 327 433 5e-18 81.6 -# BLASTX 2.2.30+ +PZ7180000024771 YFL037W 75.728 103 25 0 849 541 323 425 4.20e-53 177 +PZ7180000024771 YML085C 34.579 107 62 1 846 550 327 433 4.22e-18 81.6 +PZ7180000024771 YML124C 34.579 107 62 1 846 550 327 433 4.36e-18 81.6 +# BLASTX 2.14.0+ # Query: PZ524382 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ2143_BP nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000032153 nReads=2 cov=1.96026 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000021475 nReads=101 cov=41.3457 # Database: orf_trans -# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# Fields: query acc.ver, subject acc.ver, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score # 1 hits found -PZ7180000021475 YFR050C 36.40 250 140 6 123 839 17 258 8e-45 154 -# BLASTX 2.2.30+ +PZ7180000021475 YFR050C 36.400 250 140 6 123 839 17 258 6.22e-46 154 +# BLASTX 2.14.0+ # Query: PZ800795 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ735272 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000036480_AIJ nReads=11 cov=4.61416 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000032154 nReads=2 cov=1.43258 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000024773 nReads=2 cov=1.57051 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ18488_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000032155 nReads=3 cov=2.1542 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000021477 nReads=17 cov=8.17316 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000024774 nReads=3 cov=1.74725 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ796092 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000021478 nReads=32 cov=11.3823 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ742703 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000032157 nReads=2 cov=1.86353 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000021479 nReads=45 cov=8.50979 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000024776 nReads=8 cov=3.44402 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ759636 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000026030 nReads=9 cov=4.08691 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ799391 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000000626_B nReads=2 cov=2.00401 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000032158 nReads=2 cov=1.90271 # Database: orf_trans -# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# Fields: query acc.ver, subject acc.ver, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score # 3 hits found -PZ7180000032158 YPL198W 68.83 77 24 0 232 2 61 137 6e-30 109 -PZ7180000032158 YGL076C 68.83 77 24 0 232 2 61 137 6e-30 109 -PZ7180000032158 YNL002C 49.12 57 27 1 166 2 141 197 8e-09 51.2 -# BLASTX 2.2.30+ +PZ7180000032158 YPL198W 68.831 77 24 0 232 2 61 137 7.38e-31 109 +PZ7180000032158 YGL076C 68.831 77 24 0 232 2 61 137 7.38e-31 109 +PZ7180000032158 YNL002C 49.123 57 27 1 166 2 141 197 6.87e-09 51.2 +# BLASTX 2.14.0+ # Query: PZ7180000024777 nReads=6 cov=2.38934 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ19581_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000026031 nReads=6 cov=2.98716 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ24989_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000032159 nReads=2 cov=1.65815 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000024778 nReads=8 cov=4.29654 # Database: orf_trans -# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# Fields: query acc.ver, subject acc.ver, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score # 3 hits found -PZ7180000024778 YKL088W 48.62 109 54 2 539 219 389 497 2e-26 106 -PZ7180000024778 YKR072C 46.09 115 59 2 557 222 370 484 2e-25 103 -PZ7180000024778 YOR054C 44.07 118 55 3 554 222 452 565 2e-24 100 -# BLASTX 2.2.30+ +PZ7180000024778 YKL088W 48.624 109 54 2 539 219 389 497 8.53e-32 106 +PZ7180000024778 YKR072C 46.087 115 59 2 557 222 370 484 1.08e-25 103 +PZ7180000024778 YOR054C 44.068 118 55 3 554 222 452 565 8.91e-25 100 +# BLASTX 2.14.0+ # Query: PZ782460 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000026032 nReads=7 cov=2.43676 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ448818_ACV nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ20494_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000024779 nReads=11 cov=4.86829 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ542594 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ483605 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ670161 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000026034 nReads=6 cov=2.34378 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ23899_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000029332 nReads=3 cov=1.77778 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ510757 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ655256 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ483608 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000026036 nReads=13 cov=7.56672 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000033592 nReads=7 cov=3.2 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000029334 nReads=2 cov=1.65775 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000033593 nReads=18 cov=6.02895 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000037250 nReads=8 cov=4.15104 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000029335 nReads=2 cov=1.69853 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ858766 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000033594 nReads=15 cov=4.74728 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ740242 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000037251 nReads=24 cov=13.5712 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000026039 nReads=12 cov=5.73953 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000029336 nReads=2 cov=1.91885 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ805359 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000030970_AAC nReads=3 cov=1.81164 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000033595 nReads=29 cov=8.84946 # Database: orf_trans -# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# Fields: query acc.ver, subject acc.ver, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score # 2 hits found -PZ7180000033595 YDR018C 25.27 273 163 9 847 125 113 376 1e-16 79.3 -PZ7180000033595 YBR042C 24.05 291 176 9 844 101 109 397 7e-14 71.2 -# BLASTX 2.2.30+ +PZ7180000033595 YDR018C 25.275 273 163 9 847 125 113 376 1.21e-16 79.3 +PZ7180000033595 YBR042C 24.055 291 176 9 844 101 109 397 6.34e-14 71.2 +# BLASTX 2.14.0+ # Query: PZ7180000036892 nReads=2 cov=1.73175 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000029337 nReads=3 cov=1.97485 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000033596 nReads=2 cov=1.40179 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000036893 nReads=4 cov=2.73298 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ649135 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000037253 nReads=5 cov=3.52734 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ492422 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000029338 nReads=5 cov=4.05486 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ858769 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ448536 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000024876_AYM nReads=6 cov=2.00192 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ662789 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000037254 nReads=6 cov=3.82863 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000029339 nReads=2 cov=1.81875 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000031000 nReads=2 cov=1.89327 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ25024_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ21878_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000036895 nReads=23 cov=18.3988 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ583660 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000037255 nReads=11 cov=4.49815 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ17031_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000031001 nReads=9 cov=5.1711 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ17593_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000036896 nReads=9 cov=3.66603 # Database: orf_trans -# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# Fields: query acc.ver, subject acc.ver, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score # 1 hits found -PZ7180000036896 YKL095W 28.95 114 76 3 219 545 41 154 2e-08 52.8 -# BLASTX 2.2.30+ +PZ7180000036896 YKL095W 28.947 114 76 3 219 545 41 154 5.77e-11 52.8 +# BLASTX 2.14.0+ # Query: PZ7180000037256 nReads=11 cov=6.35108 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000031002 nReads=2 cov=1.79947 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ789897_ATR nReads=1 cov=1 # Database: orf_trans -# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# Fields: query acc.ver, subject acc.ver, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score # 1 hits found -PZ789897_ATR YBR019C 33.09 136 77 7 379 5 14 146 1e-09 54.3 -# BLASTX 2.2.30+ +PZ789897_ATR YBR019C 33.088 136 77 7 379 5 14 146 5.47e-10 54.3 +# BLASTX 2.14.0+ # Query: PZ7180000000620_B nReads=2 cov=1.33182 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000037257 nReads=15 cov=7.2512 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ453158 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000031003 nReads=2 cov=1.6375 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000023622 nReads=3 cov=2.0131 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ21349_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000036898 nReads=15 cov=5.75025 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ488295 nReads=1 cov=1 # Database: orf_trans -# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# Fields: query acc.ver, subject acc.ver, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score # 2 hits found -PZ488295 YIL106W 35.00 140 87 3 410 3 93 232 1e-22 90.1 -PZ488295 YFL034C-B 26.02 123 91 0 371 3 78 200 7e-17 73.6 -# BLASTX 2.2.30+ +PZ488295 YIL106W 35.000 140 87 3 410 3 93 232 5.54e-23 90.1 +PZ488295 YFL034C-B 26.016 123 91 0 371 3 78 200 5.29e-17 73.6 +# BLASTX 2.14.0+ # Query: PZ732868 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ442821 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ779644 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ17064_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000031004 nReads=6 cov=4.99628 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000020326 nReads=6 cov=4.36897 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000023623 nReads=27 cov=7.58566 # Database: orf_trans -# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# Fields: query acc.ver, subject acc.ver, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score # 1 hits found -PZ7180000023623 YMR234W 36.42 162 83 7 1055 624 188 347 5e-14 70.9 -# BLASTX 2.2.30+ +PZ7180000023623 YMR234W 36.420 162 83 7 1055 624 188 347 3.96e-14 70.9 +# BLASTX 2.14.0+ # Query: PZ7180000020436_DPJ nReads=4 cov=3.1009 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000036899 nReads=2 cov=1.27059 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000031005 nReads=2 cov=2 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000020327 nReads=4 cov=3.39468 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000023624 nReads=2 cov=1.54247 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000000653_B nReads=677 cov=197.683 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ24295_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000031006 nReads=3 cov=2.35421 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000023625 nReads=7 cov=6.30243 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ545_OM nReads=1 cov=1 # Database: orf_trans -# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# Fields: query acc.ver, subject acc.ver, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score # 1 hits found -PZ545_OM YPL131W 43.75 64 31 1 201 392 194 252 8e-09 51.6 -# BLASTX 2.2.30+ +PZ545_OM YPL131W 43.750 64 31 1 201 392 194 252 7.63e-09 51.6 +# BLASTX 2.14.0+ # Query: PZ7180000000106_K nReads=3 cov=1.52771 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ653712 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ614444 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ667347 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000031007 nReads=5 cov=3.13828 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ859196_FBK nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000000106_L nReads=50 cov=20.8824 # Database: orf_trans -# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# Fields: query acc.ver, subject acc.ver, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score # 9 hits found -PZ7180000000106_L YNL202W 28.35 254 171 4 55 804 21 267 2e-21 90.1 -PZ7180000000106_L YMR226C 31.47 251 150 8 67 789 14 252 1e-20 87.4 -PZ7180000000106_L YIR035C 29.25 212 132 5 67 672 3 206 3e-16 74.7 -PZ7180000000106_L YIL124W 32.09 187 121 5 67 621 10 192 7e-14 68.9 -PZ7180000000106_L YIR036C 29.06 203 132 4 67 654 3 200 4e-13 65.9 -PZ7180000000106_L YBR159W 27.50 240 148 7 61 726 61 292 1e-10 59.3 -PZ7180000000106_L YKL055C 22.74 277 162 8 70 765 6 275 5e-10 57.4 -PZ7180000000106_L YOR246C 26.70 221 121 10 67 618 17 233 1e-09 56.2 -PZ7180000000106_L YKR009C 25.37 201 129 4 274 858 387 572 1e-09 57.0 -# BLASTX 2.2.30+ +PZ7180000000106_L YNL202W 28.346 254 171 4 55 804 21 267 1.88e-21 90.1 +PZ7180000000106_L YMR226C 31.474 251 150 8 67 789 14 252 1.07e-20 87.4 +PZ7180000000106_L YIR035C 29.245 212 132 5 67 672 3 206 3.72e-16 74.7 +PZ7180000000106_L YKR009C 32.143 196 114 5 58 609 7 195 8.57e-16 75.5 +PZ7180000000106_L YIL124W 32.086 187 121 5 67 621 10 192 6.42e-14 68.9 +PZ7180000000106_L YIR036C 29.064 203 132 4 67 654 3 200 4.25e-13 65.9 +PZ7180000000106_L YBR159W 27.500 240 148 7 61 726 61 292 1.31e-10 59.3 +PZ7180000000106_L YKL055C 22.744 277 162 8 70 765 6 275 4.28e-10 57.4 +PZ7180000000106_L YOR246C 26.697 221 121 10 67 618 17 233 1.26e-09 56.2 +# BLASTX 2.14.0+ # Query: PZ13004_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000031008 nReads=2 cov=1.83586 # Database: orf_trans -# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# Fields: query acc.ver, subject acc.ver, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score # 1 hits found -PZ7180000031008 YKL060C 55.81 129 56 1 395 9 176 303 1e-49 162 -# BLASTX 2.2.30+ +PZ7180000031008 YKL060C 55.814 129 56 1 395 9 176 303 9.31e-51 162 +# BLASTX 2.14.0+ # Query: PZ7180000020329 nReads=7 cov=3.04407 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000023627 nReads=3 cov=2.01274 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000000686_B nReads=332 cov=190.836 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ23003_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ22442_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ653714 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ726746 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000031009 nReads=5 cov=4.2027 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000023628 nReads=43 cov=14.7064 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000000106_N nReads=9 cov=4.46205 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ790706 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ744951 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000020795_DWY nReads=2 cov=1.48972 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ13037_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ580713 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000023629 nReads=22 cov=10.7028 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ14482_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ449977 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ24481_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ777183 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ836045 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000035710 nReads=5 cov=3.49212 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ667690_CHO nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ455784_CBD nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000000106_S nReads=3 cov=2.80237 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ527941 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000035711 nReads=19 cov=9.8867 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ710112 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ752337 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000000106_T nReads=23 cov=8.1159 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ560014 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000035712 nReads=41 cov=16.5557 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ828668 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000035713 nReads=21 cov=12.5486 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ567164 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000022060 nReads=7 cov=3.52941 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ535325 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000035714 nReads=24 cov=10.9168 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000028269_AFW nReads=3 cov=2.97992 # Database: orf_trans -# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# Fields: query acc.ver, subject acc.ver, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score # 1 hits found -PZ7180000028269_AFW YHR013C 62.22 45 17 0 247 113 148 192 1e-14 64.7 -# BLASTX 2.2.30+ +PZ7180000028269_AFW YHR013C 62.222 45 17 0 247 113 148 192 7.06e-15 64.7 +# BLASTX 2.14.0+ # Query: PZ7180000000124_K nReads=2 cov=1.99288 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ24949_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ792095 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ651253 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000035715 nReads=3 cov=2.16468 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000000124_L nReads=12 cov=5.33621 # Database: orf_trans -# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score -# 5 hits found -PZ7180000000124_L YKL055C 27.75 173 95 5 538 77 114 275 2e-10 58.2 -PZ7180000000124_L YIR035C 30.77 169 110 4 706 206 22 185 2e-10 57.8 -PZ7180000000124_L YMR226C 31.21 141 86 4 613 206 71 205 3e-10 57.8 -PZ7180000000124_L YIL124W 33.58 137 84 4 607 206 62 194 5e-10 57.0 -PZ7180000000124_L YIR036C 28.18 181 117 6 601 92 59 237 2e-09 55.1 -# BLASTX 2.2.30+ +# Fields: query acc.ver, subject acc.ver, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# 7 hits found +PZ7180000000124_L YNL202W 26.840 231 139 8 682 38 51 267 1.40e-11 61.6 +PZ7180000000124_L YKL055C 27.746 173 95 5 538 77 114 275 1.92e-10 58.2 +PZ7180000000124_L YIR035C 30.769 169 110 4 706 206 22 185 2.13e-10 57.8 +PZ7180000000124_L YMR226C 31.206 141 86 4 613 206 71 205 2.61e-10 57.8 +PZ7180000000124_L YIL124W 33.577 137 84 4 607 206 62 194 4.54e-10 57.0 +PZ7180000000124_L YIR036C 28.177 181 117 6 601 92 59 237 1.92e-09 55.1 +PZ7180000000124_L YKR009C 28.926 121 76 4 577 227 81 195 6.69e-07 48.1 +# BLASTX 2.14.0+ # Query: PZ21577_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ20454_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000022062 nReads=3 cov=2.16056 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ535327 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000035716 nReads=13 cov=5.65193 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000030879_DFJ nReads=6 cov=3.75406 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000022063 nReads=2 cov=1.37143 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ808910 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ760258 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ19012_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000000157_L nReads=30 cov=14.6842 # Database: orf_trans -# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score -# 3 hits found -PZ7180000000157_L YKR009C 26.54 260 153 8 746 6 10 244 3e-18 82.8 -PZ7180000000157_L YNL202W 29.41 255 167 5 758 9 21 267 1e-16 75.9 -PZ7180000000157_L YDL114W 26.96 115 76 2 473 150 118 231 7e-08 50.8 -# BLASTX 2.2.30+ +# Fields: query acc.ver, subject acc.ver, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# 4 hits found +PZ7180000000157_L YKR009C 26.538 260 153 8 746 6 10 244 2.27e-18 82.8 +PZ7180000000157_L YNL202W 29.412 255 167 5 758 9 21 267 1.75e-16 75.9 +PZ7180000000157_L YKL055C 27.675 271 150 10 743 57 6 272 8.48e-11 59.3 +PZ7180000000157_L YDL114W 26.957 115 76 2 473 150 118 231 6.36e-08 50.8 +# BLASTX 2.14.0+ # Query: PZ7180000035718 nReads=6 cov=4.15929 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ13055_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000022066 nReads=13 cov=9.16058 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000035719 nReads=10 cov=5.99667 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ768110 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000022067 nReads=4 cov=2.24584 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ538629 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000027517_ADM nReads=10 cov=7.20455 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000022068 nReads=2 cov=1.92658 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ13088_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ503931 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ856051 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000022069 nReads=9 cov=6.87952 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ470217 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ18643_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ856052 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ775640 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ16108_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ803151 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000019242 nReads=456 cov=113.356 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ18676_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ596919_AEA nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ789277 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ555871_APA nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000027130 nReads=5 cov=2.9668 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000023665_ATQ nReads=2 cov=1.70472 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ19592_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ17024_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000019245 nReads=4 cov=2.6625 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000026772 nReads=4 cov=2.50095 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000027132 nReads=5 cov=2.35828 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ663374 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ853101 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000032899 nReads=151 cov=98.8562 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000036376_BVQ nReads=17 cov=6.99178 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000000613_B nReads=4 cov=2.17132 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ25378_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000026773 nReads=4 cov=1.88667 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000027133 nReads=6 cov=3.32907 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ564758 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000026774 nReads=14 cov=9.37331 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000027134 nReads=3 cov=1.69639 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ13709_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ558633 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000026775 nReads=4 cov=2.28205 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ797198 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ773_MP nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000000646_B nReads=3 cov=2.56126 # Database: orf_trans -# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# Fields: query acc.ver, subject acc.ver, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score # 2 hits found -PZ7180000000646_B YHR007C 31.01 129 86 2 398 21 281 409 2e-13 66.2 -PZ7180000000646_B YMR015C 29.63 108 69 3 350 48 321 428 2e-07 47.8 -# BLASTX 2.2.30+ +PZ7180000000646_B YHR007C 31.008 129 86 2 398 21 281 409 8.52e-14 66.2 +PZ7180000000646_B YMR015C 29.630 108 69 3 350 48 321 428 1.65e-07 47.8 +# BLASTX 2.14.0+ # Query: PZ23525_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ733424 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000027136 nReads=4 cov=3.3192 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ663378 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ15532_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000027137 nReads=3 cov=1.70282 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000023820 nReads=32 cov=13.4486 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000034692 nReads=12 cov=5.9462 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ33524_B nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000027138 nReads=9 cov=3.68491 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000023821 nReads=18 cov=5.11978 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000037990 nReads=5 cov=3.28261 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ173_DV nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ24119_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ821366 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000026779 nReads=4 cov=2.56593 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000027139 nReads=8 cov=3.7925 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ15565_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ16688_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000037991 nReads=6 cov=3.57916 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000025499_BEG nReads=7 cov=2.23689 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ19072_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ708737 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ453893 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000023823 nReads=26 cov=12.1673 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000034695 nReads=42 cov=18.4356 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000021111_BJN nReads=60 cov=23.2971 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ639490 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000023824 nReads=11 cov=4.24772 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000037993 nReads=4 cov=3.20534 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ824666 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ16159_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000037994 nReads=7 cov=3.38238 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000031741 nReads=3 cov=1.5929 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ773185 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000034698 nReads=50 cov=29.0801 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000037995 nReads=9 cov=4.58665 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ21504_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ832049 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000031742 nReads=4 cov=3.36879 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000034699 nReads=27 cov=12.3133 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000037996 nReads=9 cov=5.39332 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000031743 nReads=3 cov=2.1412 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ788124 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000023827 nReads=20 cov=5.14369 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ509932 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000031744 nReads=2 cov=1.82792 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ21537_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000031745 nReads=2 cov=1.85 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000023829 nReads=6 cov=3.99115 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000037999 nReads=8 cov=4.86677 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000000664_B nReads=73 cov=38.4985 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ22982_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ22420_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ482783 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ721182 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000033000 nReads=45 cov=25.9408 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000000117_K nReads=6 cov=4.05979 # Database: orf_trans -# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# Fields: query acc.ver, subject acc.ver, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score # 10 hits found -PZ7180000000117_K YOR113W 39.25 107 63 1 481 167 591 697 9e-24 97.1 -PZ7180000000117_K YJL056C 40.19 107 62 1 481 161 766 870 4e-21 89.4 -PZ7180000000117_K YNL027W 37.84 111 50 2 475 143 569 660 7e-16 73.6 -PZ7180000000117_K YPR186C 34.91 106 64 3 376 68 56 159 3e-13 65.1 -PZ7180000000117_K YMR037C 41.82 55 31 1 484 323 644 698 6e-10 55.5 -PZ7180000000117_K YDR216W 38.55 83 44 2 289 44 87 163 1e-09 54.7 -PZ7180000000117_K YJR127C 40.74 54 32 0 229 68 149 202 1e-09 54.7 -PZ7180000000117_K YML081W 31.40 86 51 1 322 65 36 113 1e-09 54.3 -PZ7180000000117_K YKL062W 37.04 54 33 1 481 323 571 624 2e-09 53.5 -PZ7180000000117_K YGL209W 48.28 58 28 1 481 314 15 72 1e-08 51.2 -# BLASTX 2.2.30+ +PZ7180000000117_K YOR113W 39.252 107 63 1 481 167 591 697 1.50e-24 97.1 +PZ7180000000117_K YJL056C 40.187 107 62 1 481 161 766 870 7.68e-22 89.4 +PZ7180000000117_K YNL027W 37.838 111 50 2 475 143 569 660 2.04e-16 73.6 +PZ7180000000117_K YPR186C 34.906 106 64 3 376 68 56 159 1.43e-13 65.1 +PZ7180000000117_K YMR037C 41.818 55 31 1 484 323 644 698 4.24e-10 55.5 +PZ7180000000117_K YDR216W 38.554 83 44 2 289 44 87 163 8.23e-10 54.7 +PZ7180000000117_K YJR127C 40.741 54 32 0 229 68 149 202 8.58e-10 54.7 +PZ7180000000117_K YML081W 31.395 86 51 1 322 65 36 113 9.73e-10 54.3 +PZ7180000000117_K YKL062W 37.037 54 33 1 481 323 571 624 1.76e-09 53.5 +PZ7180000000117_K YGL209W 48.276 58 28 1 481 314 15 72 9.34e-09 51.2 +# BLASTX 2.14.0+ # Query: PZ7180000025620 nReads=9 cov=3.50955 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ506638 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ734865 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ625862 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000033001 nReads=137 cov=58.1597 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000037033_ADR nReads=7 cov=2.99701 # Database: orf_trans -# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score -# 3 hits found -PZ7180000037033_ADR YBR198C 26.07 211 109 7 219 842 523 689 8e-11 61.2 -PZ7180000037033_ADR YNL253W 28.46 123 86 1 465 827 206 328 7e-09 54.7 -PZ7180000037033_ADR YPR178W 24.76 206 144 5 369 953 181 386 9e-08 51.6 -# BLASTX 2.2.30+ +# Fields: query acc.ver, subject acc.ver, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# 4 hits found +PZ7180000037033_ADR YBR198C 26.066 211 109 7 219 842 523 689 7.70e-11 61.2 +PZ7180000037033_ADR YNL253W 28.455 123 86 1 465 827 206 328 6.98e-09 54.7 +PZ7180000037033_ADR YCR072C 32.432 74 48 2 213 431 440 512 2.20e-08 45.8 +PZ7180000037033_ADR YPR178W 24.757 206 144 5 369 953 181 386 8.39e-08 51.6 +# BLASTX 2.14.0+ # Query: PZ7180000000117_L nReads=18 cov=7.786 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ21008_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000025621 nReads=4 cov=2.91786 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ14138_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000031747 nReads=3 cov=2.40482 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ753430 nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ4305_H nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ22453_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000025622 nReads=6 cov=3.08579 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000031748 nReads=3 cov=2.81962 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ15583_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000036300 nReads=3 cov=2.2153 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ25077_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000025623 nReads=8 cov=3.60964 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000028920 nReads=2 cov=1.46573 # Database: orf_trans -# 0 hits found -# BLASTX 2.2.30+ +# Fields: query acc.ver, subject acc.ver, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# 1 hits found +PZ7180000028920 YNL243W 24.138 116 83 1 37 384 102 212 4.59e-10 44.7 +# BLASTX 2.14.0+ # Query: PZ7180000031749 nReads=4 cov=2.6841 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000036301 nReads=20 cov=4.63444 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000028921 nReads=3 cov=2.11052 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000036302 nReads=5 cov=3.8 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000028922 nReads=2 cov=1.67442 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ18603_A nReads=1 cov=1 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000036303 nReads=12 cov=4.41569 # Database: orf_trans # 0 hits found -# BLASTX 2.2.30+ +# BLASTX 2.14.0+ # Query: PZ7180000028923 nReads=3 cov=2.07642 # Database: orf_trans # 0 hits found -# BLAST processed 471 queries \ No newline at end of file +# BLAST processed 471 queries diff --git a/data/pz_blastx_yeast_top10_v1.txt b/data/pz_blastx_yeast_top10_v1.txt new file mode 100644 index 0000000..0856239 --- /dev/null +++ b/data/pz_blastx_yeast_top10_v1.txt @@ -0,0 +1,1975 @@ +# BLASTX 2.2.30+ +# Query: PZ7180000000004_TX nReads=26 cov=9.436 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000031590 nReads=3 cov=2.59465 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000027934 nReads=5 cov=2.32231 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ456916 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000037718 nReads=9 cov=6.26448 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000000004_TY nReads=86 cov=36.4238 +# Database: orf_trans +# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# 3 hits found +PZ7180000000004_TY YKL081W 31.07 338 197 8 13 993 1 313 1e-32 124 +PZ7180000000004_TY YPL048W 30.70 342 196 9 13 993 1 316 1e-28 112 +PZ7180000000004_TY YGR201C 24.02 204 136 4 13 582 1 199 3e-12 63.5 +# BLASTX 2.2.30+ +# Query: PZ7180000000067_AF nReads=16 cov=12.0608 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000031591 nReads=4 cov=3.26022 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000024036 nReads=14 cov=5.86079 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ15501_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ800059 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000037719 nReads=4 cov=2.62469 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000000004_TZ nReads=61 cov=31.2595 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000031592 nReads=4 cov=3.17598 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000027936 nReads=2 cov=1.60073 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000024037 nReads=26 cov=6.96922 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ1082_AB nReads=1 cov=1 +# Database: orf_trans +# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# 6 hits found +PZ1082_AB YHR104W 44.92 118 62 3 4 348 196 313 1e-26 100 +PZ1082_AB YDR368W 39.00 100 50 1 1 300 196 284 2e-17 75.1 +PZ1082_AB YOR120W 36.00 100 53 1 1 300 196 284 7e-15 67.8 +PZ1082_AB YDL124W 35.16 128 70 7 7 366 184 306 1e-11 58.9 +PZ1082_AB YBR149W 29.29 99 59 1 4 300 218 305 2e-07 46.2 +PZ1082_AB YJR096W 25.25 99 68 3 7 300 166 259 6e-07 45.1 +# BLASTX 2.2.30+ +# Query: PZ7180000031593 nReads=5 cov=3.27273 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ456919 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ693347_EBM nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ342_JQ nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000024039 nReads=2 cov=2.0023 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000024719_APV nReads=9 cov=3.43203 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000031595 nReads=2 cov=1.3588 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000027939 nReads=2 cov=1.37482 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ17540_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000000447_A nReads=2 cov=1.52986 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000000447_B nReads=7 cov=3.98616 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000031596 nReads=2 cov=1.57426 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ11_FX nReads=1 cov=1 +# Database: orf_trans +# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# 2 hits found +PZ11_FX YLR406C 53.01 83 38 1 290 42 25 106 7e-15 65.9 +PZ11_FX YDL075W 53.01 83 38 1 290 42 25 106 9e-15 65.5 +# BLASTX 2.2.30+ +# Query: PZ25894_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000031597 nReads=2 cov=1.79653 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000025470 nReads=12 cov=5.96825 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ31_GH nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000031598 nReads=2 cov=1.97531 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ14988_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000025471 nReads=4 cov=1.57106 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ13865_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000031599 nReads=2 cov=1.375 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000036150 nReads=3 cov=2.12525 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000025472 nReads=18 cov=7.143 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ640524 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000000600_B nReads=9 cov=3.7459 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ24242_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000036151 nReads=6 cov=2.66141 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000025473 nReads=7 cov=2.38076 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000025474 nReads=7 cov=3.86364 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ601258 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000025475 nReads=2 cov=1.55918 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000028772 nReads=2 cov=1.40778 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ15399_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ35181_B nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000000633_B nReads=5 cov=3.34205 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ25398_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ23512_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ662049 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000036154 nReads=4 cov=3.32031 +# Database: orf_trans +# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# 1 hits found +PZ7180000036154 YNL245C 36.27 102 60 3 105 395 1 102 3e-07 46.2 +# BLASTX 2.2.30+ +# Query: PZ7180000025476 nReads=4 cov=1.84059 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000022520 nReads=24 cov=6.4313 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000036155 nReads=17 cov=4.42362 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000025477 nReads=13 cov=5.1815 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ851952 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ434291 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000036156 nReads=2 cov=1.51107 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000025478 nReads=9 cov=4.37636 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000028775 nReads=5 cov=3.61232 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ732126 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000028457_BMW nReads=5 cov=3.06176 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000022522 nReads=6 cov=3.58447 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000025479 nReads=8 cov=2.64862 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000028776 nReads=4 cov=2.71399 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ15552_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ750331 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ333_CO nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000022523 nReads=28 cov=7.67314 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ801814 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000028777 nReads=3 cov=1.47983 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ830890 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ17053_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ22783_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000022524 nReads=36 cov=9.20119 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ463243 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000028778 nReads=9 cov=4.15691 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ525403 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000022525 nReads=11 cov=2.73147 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000028779 nReads=3 cov=1.58095 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ605962 nReads=1 cov=1 +# Database: orf_trans +# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# 1 hits found +PZ605962 YKR079C 29.57 115 66 4 429 121 479 590 3e-11 59.3 +# BLASTX 2.2.30+ +# Query: PZ728940 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000030412 nReads=6 cov=3.56202 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ625652_CJK nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000000067_AY nReads=14 cov=5.1865 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ22254_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ20930_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000022527 nReads=11 cov=5.04943 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ841572 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000022528 nReads=8 cov=3.47335 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000030414 nReads=2 cov=1.76887 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ783221_AOC nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ532963 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ674165 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000022529 nReads=21 cov=5.55482 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000019700 nReads=3 cov=2.25532 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ462889 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ797650 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000019701 nReads=40 cov=22.5442 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000030416 nReads=2 cov=1.86179 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ856513 nReads=1 cov=1 +# Database: orf_trans +# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# 1 hits found +PZ856513 YKL215C 48.39 155 73 3 3 452 109 261 3e-37 135 +# BLASTX 2.2.30+ +# Query: PZ725649 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ803611 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000019702 nReads=12 cov=8.68396 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000030417 nReads=2 cov=1.76136 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ16936_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000019703 nReads=7 cov=4.15682 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000030418 nReads=2 cov=1.70149 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ3202_E nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ729309 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ835452 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ19882_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ621267 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000019704 nReads=5 cov=3.39701 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000030419 nReads=2 cov=1.99686 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ817248 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ527201 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ20795_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ565168 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000019705 nReads=10 cov=4.33742 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ4990_P nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ467980 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000019706 nReads=4 cov=2.83127 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ701633 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000019707 nReads=10 cov=8.47059 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ21389_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ699332 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ598313 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ852490_ETW nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ22908_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ17500_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000036719_ARD nReads=7 cov=4.1479 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ457464_ZN nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ17539_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ821824 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ19001_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000000146_L nReads=30 cov=20.4413 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ25854_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ20981_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ614788 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ922_U nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000021471 nReads=48 cov=10.1479 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ439397 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ1028_K nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000032150 nReads=2 cov=1.91414 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ861581 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ770345 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ794706 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000021473 nReads=44 cov=8.81014 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000024770 nReads=23 cov=7.9627 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ13077_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000032152 nReads=3 cov=2.07776 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000021474 nReads=46 cov=11.7995 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000024771 nReads=12 cov=4.81316 +# Database: orf_trans +# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# 3 hits found +PZ7180000024771 YFL037W 75.73 103 25 0 849 541 323 425 5e-52 177 +PZ7180000024771 YML085C 34.58 107 62 1 846 550 327 433 5e-18 81.6 +PZ7180000024771 YML124C 34.58 107 62 1 846 550 327 433 5e-18 81.6 +# BLASTX 2.2.30+ +# Query: PZ524382 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ2143_BP nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000032153 nReads=2 cov=1.96026 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000021475 nReads=101 cov=41.3457 +# Database: orf_trans +# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# 1 hits found +PZ7180000021475 YFR050C 36.40 250 140 6 123 839 17 258 8e-45 154 +# BLASTX 2.2.30+ +# Query: PZ800795 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ735272 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000036480_AIJ nReads=11 cov=4.61416 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000032154 nReads=2 cov=1.43258 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000024773 nReads=2 cov=1.57051 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ18488_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000032155 nReads=3 cov=2.1542 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000021477 nReads=17 cov=8.17316 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000024774 nReads=3 cov=1.74725 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ796092 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000021478 nReads=32 cov=11.3823 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ742703 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000032157 nReads=2 cov=1.86353 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000021479 nReads=45 cov=8.50979 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000024776 nReads=8 cov=3.44402 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ759636 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000026030 nReads=9 cov=4.08691 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ799391 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000000626_B nReads=2 cov=2.00401 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000032158 nReads=2 cov=1.90271 +# Database: orf_trans +# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# 3 hits found +PZ7180000032158 YPL198W 68.83 77 24 0 232 2 61 137 6e-30 109 +PZ7180000032158 YGL076C 68.83 77 24 0 232 2 61 137 6e-30 109 +PZ7180000032158 YNL002C 49.12 57 27 1 166 2 141 197 8e-09 51.2 +# BLASTX 2.2.30+ +# Query: PZ7180000024777 nReads=6 cov=2.38934 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ19581_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000026031 nReads=6 cov=2.98716 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ24989_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000032159 nReads=2 cov=1.65815 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000024778 nReads=8 cov=4.29654 +# Database: orf_trans +# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# 3 hits found +PZ7180000024778 YKL088W 48.62 109 54 2 539 219 389 497 2e-26 106 +PZ7180000024778 YKR072C 46.09 115 59 2 557 222 370 484 2e-25 103 +PZ7180000024778 YOR054C 44.07 118 55 3 554 222 452 565 2e-24 100 +# BLASTX 2.2.30+ +# Query: PZ782460 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000026032 nReads=7 cov=2.43676 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ448818_ACV nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ20494_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000024779 nReads=11 cov=4.86829 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ542594 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ483605 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ670161 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000026034 nReads=6 cov=2.34378 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ23899_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000029332 nReads=3 cov=1.77778 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ510757 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ655256 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ483608 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000026036 nReads=13 cov=7.56672 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000033592 nReads=7 cov=3.2 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000029334 nReads=2 cov=1.65775 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000033593 nReads=18 cov=6.02895 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000037250 nReads=8 cov=4.15104 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000029335 nReads=2 cov=1.69853 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ858766 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000033594 nReads=15 cov=4.74728 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ740242 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000037251 nReads=24 cov=13.5712 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000026039 nReads=12 cov=5.73953 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000029336 nReads=2 cov=1.91885 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ805359 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000030970_AAC nReads=3 cov=1.81164 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000033595 nReads=29 cov=8.84946 +# Database: orf_trans +# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# 2 hits found +PZ7180000033595 YDR018C 25.27 273 163 9 847 125 113 376 1e-16 79.3 +PZ7180000033595 YBR042C 24.05 291 176 9 844 101 109 397 7e-14 71.2 +# BLASTX 2.2.30+ +# Query: PZ7180000036892 nReads=2 cov=1.73175 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000029337 nReads=3 cov=1.97485 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000033596 nReads=2 cov=1.40179 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000036893 nReads=4 cov=2.73298 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ649135 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000037253 nReads=5 cov=3.52734 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ492422 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000029338 nReads=5 cov=4.05486 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ858769 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ448536 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000024876_AYM nReads=6 cov=2.00192 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ662789 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000037254 nReads=6 cov=3.82863 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000029339 nReads=2 cov=1.81875 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000031000 nReads=2 cov=1.89327 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ25024_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ21878_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000036895 nReads=23 cov=18.3988 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ583660 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000037255 nReads=11 cov=4.49815 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ17031_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000031001 nReads=9 cov=5.1711 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ17593_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000036896 nReads=9 cov=3.66603 +# Database: orf_trans +# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# 1 hits found +PZ7180000036896 YKL095W 28.95 114 76 3 219 545 41 154 2e-08 52.8 +# BLASTX 2.2.30+ +# Query: PZ7180000037256 nReads=11 cov=6.35108 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000031002 nReads=2 cov=1.79947 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ789897_ATR nReads=1 cov=1 +# Database: orf_trans +# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# 1 hits found +PZ789897_ATR YBR019C 33.09 136 77 7 379 5 14 146 1e-09 54.3 +# BLASTX 2.2.30+ +# Query: PZ7180000000620_B nReads=2 cov=1.33182 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000037257 nReads=15 cov=7.2512 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ453158 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000031003 nReads=2 cov=1.6375 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000023622 nReads=3 cov=2.0131 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ21349_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000036898 nReads=15 cov=5.75025 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ488295 nReads=1 cov=1 +# Database: orf_trans +# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# 2 hits found +PZ488295 YIL106W 35.00 140 87 3 410 3 93 232 1e-22 90.1 +PZ488295 YFL034C-B 26.02 123 91 0 371 3 78 200 7e-17 73.6 +# BLASTX 2.2.30+ +# Query: PZ732868 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ442821 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ779644 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ17064_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000031004 nReads=6 cov=4.99628 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000020326 nReads=6 cov=4.36897 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000023623 nReads=27 cov=7.58566 +# Database: orf_trans +# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# 1 hits found +PZ7180000023623 YMR234W 36.42 162 83 7 1055 624 188 347 5e-14 70.9 +# BLASTX 2.2.30+ +# Query: PZ7180000020436_DPJ nReads=4 cov=3.1009 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000036899 nReads=2 cov=1.27059 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000031005 nReads=2 cov=2 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000020327 nReads=4 cov=3.39468 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000023624 nReads=2 cov=1.54247 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000000653_B nReads=677 cov=197.683 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ24295_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000031006 nReads=3 cov=2.35421 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000023625 nReads=7 cov=6.30243 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ545_OM nReads=1 cov=1 +# Database: orf_trans +# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# 1 hits found +PZ545_OM YPL131W 43.75 64 31 1 201 392 194 252 8e-09 51.6 +# BLASTX 2.2.30+ +# Query: PZ7180000000106_K nReads=3 cov=1.52771 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ653712 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ614444 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ667347 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000031007 nReads=5 cov=3.13828 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ859196_FBK nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000000106_L nReads=50 cov=20.8824 +# Database: orf_trans +# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# 9 hits found +PZ7180000000106_L YNL202W 28.35 254 171 4 55 804 21 267 2e-21 90.1 +PZ7180000000106_L YMR226C 31.47 251 150 8 67 789 14 252 1e-20 87.4 +PZ7180000000106_L YIR035C 29.25 212 132 5 67 672 3 206 3e-16 74.7 +PZ7180000000106_L YIL124W 32.09 187 121 5 67 621 10 192 7e-14 68.9 +PZ7180000000106_L YIR036C 29.06 203 132 4 67 654 3 200 4e-13 65.9 +PZ7180000000106_L YBR159W 27.50 240 148 7 61 726 61 292 1e-10 59.3 +PZ7180000000106_L YKL055C 22.74 277 162 8 70 765 6 275 5e-10 57.4 +PZ7180000000106_L YOR246C 26.70 221 121 10 67 618 17 233 1e-09 56.2 +PZ7180000000106_L YKR009C 25.37 201 129 4 274 858 387 572 1e-09 57.0 +# BLASTX 2.2.30+ +# Query: PZ13004_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000031008 nReads=2 cov=1.83586 +# Database: orf_trans +# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# 1 hits found +PZ7180000031008 YKL060C 55.81 129 56 1 395 9 176 303 1e-49 162 +# BLASTX 2.2.30+ +# Query: PZ7180000020329 nReads=7 cov=3.04407 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000023627 nReads=3 cov=2.01274 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000000686_B nReads=332 cov=190.836 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ23003_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ22442_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ653714 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ726746 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000031009 nReads=5 cov=4.2027 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000023628 nReads=43 cov=14.7064 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000000106_N nReads=9 cov=4.46205 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ790706 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ744951 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000020795_DWY nReads=2 cov=1.48972 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ13037_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ580713 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000023629 nReads=22 cov=10.7028 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ14482_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ449977 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ24481_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ777183 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ836045 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000035710 nReads=5 cov=3.49212 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ667690_CHO nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ455784_CBD nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000000106_S nReads=3 cov=2.80237 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ527941 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000035711 nReads=19 cov=9.8867 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ710112 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ752337 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000000106_T nReads=23 cov=8.1159 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ560014 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000035712 nReads=41 cov=16.5557 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ828668 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000035713 nReads=21 cov=12.5486 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ567164 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000022060 nReads=7 cov=3.52941 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ535325 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000035714 nReads=24 cov=10.9168 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000028269_AFW nReads=3 cov=2.97992 +# Database: orf_trans +# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# 1 hits found +PZ7180000028269_AFW YHR013C 62.22 45 17 0 247 113 148 192 1e-14 64.7 +# BLASTX 2.2.30+ +# Query: PZ7180000000124_K nReads=2 cov=1.99288 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ24949_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ792095 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ651253 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000035715 nReads=3 cov=2.16468 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000000124_L nReads=12 cov=5.33621 +# Database: orf_trans +# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# 5 hits found +PZ7180000000124_L YKL055C 27.75 173 95 5 538 77 114 275 2e-10 58.2 +PZ7180000000124_L YIR035C 30.77 169 110 4 706 206 22 185 2e-10 57.8 +PZ7180000000124_L YMR226C 31.21 141 86 4 613 206 71 205 3e-10 57.8 +PZ7180000000124_L YIL124W 33.58 137 84 4 607 206 62 194 5e-10 57.0 +PZ7180000000124_L YIR036C 28.18 181 117 6 601 92 59 237 2e-09 55.1 +# BLASTX 2.2.30+ +# Query: PZ21577_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ20454_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000022062 nReads=3 cov=2.16056 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ535327 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000035716 nReads=13 cov=5.65193 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000030879_DFJ nReads=6 cov=3.75406 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000022063 nReads=2 cov=1.37143 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ808910 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ760258 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ19012_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000000157_L nReads=30 cov=14.6842 +# Database: orf_trans +# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# 3 hits found +PZ7180000000157_L YKR009C 26.54 260 153 8 746 6 10 244 3e-18 82.8 +PZ7180000000157_L YNL202W 29.41 255 167 5 758 9 21 267 1e-16 75.9 +PZ7180000000157_L YDL114W 26.96 115 76 2 473 150 118 231 7e-08 50.8 +# BLASTX 2.2.30+ +# Query: PZ7180000035718 nReads=6 cov=4.15929 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ13055_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000022066 nReads=13 cov=9.16058 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000035719 nReads=10 cov=5.99667 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ768110 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000022067 nReads=4 cov=2.24584 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ538629 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000027517_ADM nReads=10 cov=7.20455 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000022068 nReads=2 cov=1.92658 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ13088_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ503931 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ856051 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000022069 nReads=9 cov=6.87952 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ470217 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ18643_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ856052 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ775640 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ16108_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ803151 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000019242 nReads=456 cov=113.356 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ18676_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ596919_AEA nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ789277 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ555871_APA nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000027130 nReads=5 cov=2.9668 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000023665_ATQ nReads=2 cov=1.70472 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ19592_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ17024_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000019245 nReads=4 cov=2.6625 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000026772 nReads=4 cov=2.50095 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000027132 nReads=5 cov=2.35828 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ663374 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ853101 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000032899 nReads=151 cov=98.8562 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000036376_BVQ nReads=17 cov=6.99178 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000000613_B nReads=4 cov=2.17132 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ25378_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000026773 nReads=4 cov=1.88667 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000027133 nReads=6 cov=3.32907 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ564758 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000026774 nReads=14 cov=9.37331 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000027134 nReads=3 cov=1.69639 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ13709_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ558633 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000026775 nReads=4 cov=2.28205 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ797198 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ773_MP nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000000646_B nReads=3 cov=2.56126 +# Database: orf_trans +# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# 2 hits found +PZ7180000000646_B YHR007C 31.01 129 86 2 398 21 281 409 2e-13 66.2 +PZ7180000000646_B YMR015C 29.63 108 69 3 350 48 321 428 2e-07 47.8 +# BLASTX 2.2.30+ +# Query: PZ23525_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ733424 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000027136 nReads=4 cov=3.3192 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ663378 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ15532_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000027137 nReads=3 cov=1.70282 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000023820 nReads=32 cov=13.4486 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000034692 nReads=12 cov=5.9462 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ33524_B nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000027138 nReads=9 cov=3.68491 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000023821 nReads=18 cov=5.11978 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000037990 nReads=5 cov=3.28261 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ173_DV nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ24119_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ821366 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000026779 nReads=4 cov=2.56593 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000027139 nReads=8 cov=3.7925 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ15565_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ16688_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000037991 nReads=6 cov=3.57916 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000025499_BEG nReads=7 cov=2.23689 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ19072_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ708737 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ453893 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000023823 nReads=26 cov=12.1673 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000034695 nReads=42 cov=18.4356 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000021111_BJN nReads=60 cov=23.2971 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ639490 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000023824 nReads=11 cov=4.24772 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000037993 nReads=4 cov=3.20534 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ824666 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ16159_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000037994 nReads=7 cov=3.38238 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000031741 nReads=3 cov=1.5929 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ773185 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000034698 nReads=50 cov=29.0801 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000037995 nReads=9 cov=4.58665 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ21504_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ832049 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000031742 nReads=4 cov=3.36879 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000034699 nReads=27 cov=12.3133 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000037996 nReads=9 cov=5.39332 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000031743 nReads=3 cov=2.1412 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ788124 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000023827 nReads=20 cov=5.14369 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ509932 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000031744 nReads=2 cov=1.82792 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ21537_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000031745 nReads=2 cov=1.85 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000023829 nReads=6 cov=3.99115 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000037999 nReads=8 cov=4.86677 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000000664_B nReads=73 cov=38.4985 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ22982_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ22420_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ482783 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ721182 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000033000 nReads=45 cov=25.9408 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000000117_K nReads=6 cov=4.05979 +# Database: orf_trans +# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# 10 hits found +PZ7180000000117_K YOR113W 39.25 107 63 1 481 167 591 697 9e-24 97.1 +PZ7180000000117_K YJL056C 40.19 107 62 1 481 161 766 870 4e-21 89.4 +PZ7180000000117_K YNL027W 37.84 111 50 2 475 143 569 660 7e-16 73.6 +PZ7180000000117_K YPR186C 34.91 106 64 3 376 68 56 159 3e-13 65.1 +PZ7180000000117_K YMR037C 41.82 55 31 1 484 323 644 698 6e-10 55.5 +PZ7180000000117_K YDR216W 38.55 83 44 2 289 44 87 163 1e-09 54.7 +PZ7180000000117_K YJR127C 40.74 54 32 0 229 68 149 202 1e-09 54.7 +PZ7180000000117_K YML081W 31.40 86 51 1 322 65 36 113 1e-09 54.3 +PZ7180000000117_K YKL062W 37.04 54 33 1 481 323 571 624 2e-09 53.5 +PZ7180000000117_K YGL209W 48.28 58 28 1 481 314 15 72 1e-08 51.2 +# BLASTX 2.2.30+ +# Query: PZ7180000025620 nReads=9 cov=3.50955 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ506638 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ734865 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ625862 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000033001 nReads=137 cov=58.1597 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000037033_ADR nReads=7 cov=2.99701 +# Database: orf_trans +# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score +# 3 hits found +PZ7180000037033_ADR YBR198C 26.07 211 109 7 219 842 523 689 8e-11 61.2 +PZ7180000037033_ADR YNL253W 28.46 123 86 1 465 827 206 328 7e-09 54.7 +PZ7180000037033_ADR YPR178W 24.76 206 144 5 369 953 181 386 9e-08 51.6 +# BLASTX 2.2.30+ +# Query: PZ7180000000117_L nReads=18 cov=7.786 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ21008_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000025621 nReads=4 cov=2.91786 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ14138_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000031747 nReads=3 cov=2.40482 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ753430 nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ4305_H nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ22453_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000025622 nReads=6 cov=3.08579 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000031748 nReads=3 cov=2.81962 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ15583_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000036300 nReads=3 cov=2.2153 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ25077_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000025623 nReads=8 cov=3.60964 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000028920 nReads=2 cov=1.46573 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000031749 nReads=4 cov=2.6841 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000036301 nReads=20 cov=4.63444 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000028921 nReads=3 cov=2.11052 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000036302 nReads=5 cov=3.8 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000028922 nReads=2 cov=1.67442 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ18603_A nReads=1 cov=1 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000036303 nReads=12 cov=4.41569 +# Database: orf_trans +# 0 hits found +# BLASTX 2.2.30+ +# Query: PZ7180000028923 nReads=3 cov=2.07642 +# Database: orf_trans +# 0 hits found +# BLAST processed 471 queries \ No newline at end of file diff --git a/images/part1-09-the-standard-streams.Rmd.images/I.9_20_unix_112_unit_AT_script.png b/images/part1-09-the-standard-streams.Rmd.images/I.9_20_unix_112_unit_AT_script.png index e9370ea..00c0194 100644 Binary files a/images/part1-09-the-standard-streams.Rmd.images/I.9_20_unix_112_unit_AT_script.png and b/images/part1-09-the-standard-streams.Rmd.images/I.9_20_unix_112_unit_AT_script.png differ diff --git a/part1-09-the-standard-streams.Rmd b/part1-09-the-standard-streams.Rmd index 0c0111d..84494f5 100644 --- a/part1-09-the-standard-streams.Rmd +++ b/part1-09-the-standard-streams.Rmd @@ -1,6 +1,6 @@ # The Standard Streams -In previous chapters, we learned that programs may produce output not only by writing to files, but also by printing them on the [standard output](#standard_output) stream. To further illustrate this feature, we’ve created a simple program called [`fasta_stats.py`](data/fasta_stats.py) that, given a FASTA file name as its first parameter, produces statistics on each sequence. We’ll also look at the file [`pz_cDNAs.fasta`](data/pz_cDNAs.fasta), which contains a set of 471 de novo assembled transcript sequences from *Papilio zelicaon*, and [`pz_cDNAs_sample.fasta`](data/pz_cDNAs_sample.fasta), which contains only the first two. +In previous chapters, we learned that programs may produce output not only by writing to files, but also by printing them on the [standard output](#standard_output) stream. To further illustrate this feature, we've created a simple program called [`fasta_stats.py`](data/fasta_stats.py) that, given a FASTA file name as its first parameter, produces statistics on each sequence. We'll also look at the file [`pz_cDNAs.fasta`](data/pz_cDNAs.fasta), which contains a set of 471 *de novo* assembled transcript sequences from *Papilio zelicaon*, and [`pz_cDNAs_sample.fasta`](data/pz_cDNAs_sample.fasta), which contains only the first two.
fasta_stats.py  pz_cDNAs.fasta  pz_cDNAs_sample.fasta
+fasta_stats.py  pz_cDNAs.fasta  pz_cDNAs_sample.fasta
 
-We can run the `fasta_stats.py` program (after making it executable) with `./fasta_stats.py pz_cDNAs_sample.fasta`. +We can run the `fasta_stats.py` program (after making it executable with `chmod`) with `./fasta_stats.py pz_cDNAs_sample.fasta`. When we run this command, however, we see that even though the output file has been created, text is still printed to the terminal! If we use `less -S` to view the `pz_sample_stats.txt` file, we see that some of the output has gone to the file. @@ -75,18 +75,18 @@ When we run this command, however, we see that even though the output file has b # Column 6: Repeat unit of longest simple perfect repeat (2 to 10 chars) # Column 7: Length of repeat (in characters) # Column 8: Repeat type (dinucleotide, trinucleotide, etc.) -PZ7180000031590 0.378   486     ACAAA   5       unit:ATTTA      10      pentanuc -PZ7180000000004_TX      0.279   1000    AAATA   12      unit:TAA        12       +PZ7180000000004_TX 0.279 1000 AAATA 12 unit:TAA 12 trinucleotide +PZ7180000031590 0.379 486 ACAAA 5 unit:ATTTA 10 pentanucleotide pz_sample_stats.txt (END) ###### {- #standard_error} -So what is going on? It turns out that programs can produce output (other than writing to files) on *two* streams. We are already familiar with the first, [standard output](#standard_output), which is by default printed to the terminal but can be redirected to a file with >. The second, called *standard error*, is also by default printed to the terminal but is not redirected with `>`. +So what is going on? It turns out that programs can produce output (other than writing to files) on *two* streams. We are already familiar with the first, [standard output](#standard_output), which is by default printed to the terminal but can be redirected to a file with `>`. The second, called *standard error*, is also by default printed to the terminal but is not redirected with `>`. -By default, like standard output, standard error (also known as “standard err” or “stderr”) is printed to the terminal. +By default, like standard output, standard error (also known as "standard err" or "stderr") is printed to the terminal. -Because standard error usually contains diagnostic information, we may not be interested in capturing it into its own file. Still, if we wish, `bash` can redirect the standard error to a file by using the `2>` redirect.^[The `tcsh` and `csh` shells unfortunately cannot natively separately redirect stdout and stderr to files. A potential workaround looks like: `( ./fasta_stats pz_cDNAs_sample.fasta > pz_sample_stats.txt ) > & pz_sample_stats.err.txt`. This command runs two independent redirects; using parentheses causes the redirect of stdout to happen first, then the further redirect of stderr can occur next. How `bash`-compatible shells handle standard output and standard error is one of the primary reasons they are preferred over the older `csh`-compatible shells.] +Because standard error usually contains diagnostic information, we may not be interested in capturing it into its own file. Still, if we wish, `bash` can redirect the standard error to a file by using the `2>` redirect.^[The `tcsh` and `csh` shells unfortunately cannot natively separately redirect stdout and stderr to files. A potential workaround looks like: `( ./fasta_stats.py pz_cDNAs_sample.fasta > pz_sample_stats.txt ) > & pz_sample_stats.err.txt`. This command runs two independent redirects; using parentheses causes the redirect of stdout to happen first, then the further redirect of stderr can occur next. How `bash`-compatible shells handle standard output and standard error is one of the primary reasons they are preferred over the older `csh`-compatible shells.]
fasta_stats.py  pz_cDNAs.fasta  pz_cDNAs_sample.fasta  pz_sample_stats.txt  pz_sample_stats.err.txt
+fasta_stats.py  pz_cDNAs_sample.fasta    pz_sample_stats.txt
+pz_cDNAs.fasta  pz_sample_stats.err.txt
 
We might pictorially represent programs and their output as alternative information flows: @@ -108,7 +109,7 @@ We might pictorially represent programs and their output as alternative informat ###### {- #grep} -It can often be useful to extract lines from a file based on a pattern. For example, the `pz_sample_stats.txt` file contains information on what each column describes, as well as the data itself. If we want to extract all the lines that match a particular pattern, say, `unit:`, we can use the tool `grep` (for Global search for Regular Expression and Print), which prints to [standard output](#standard_output) lines that match a given pattern (or don’t match a given pattern, if using the `-v` flag): `grep '' `. To illustrate, we’ll first run fasta_stats on the full input file, redirecting the standard output to a file called `pz_stats.txt`. +It can often be useful to extract lines from a file based on a pattern. For example, the `pz_sample_stats.txt` file contains information on what each column describes, as well as the data itself. If we want to extract all the lines that match a particular pattern, say, `unit:`, we can use the tool `grep` (for Global search for Regular Expression and Print), which prints to [standard output](#standard_output) lines that match a given pattern (or don't match a given pattern, if using the `-v` flag): `grep '' `. To illustrate, we'll first run fasta_stats on the full input file, redirecting the standard output to a file called `pz_stats.txt`.
 
-PZ832049        0.321   218     CTTAA   4       unit:CGT        6       trinucle
-PZ21878_A       0.162   172     ATTAA   8       unit:ATT        6       trinucle
-PZ439397        0.153   111     TTAAT   5       unit:GAAAT      10      pentanuc
-PZ16108_A       0.157   191     ATTAA   7       unit:ATT        6       trinucle
-PZ21537_A       0.158   82      TTATT   3       unit:ATT        6       trinucle
-PZ535325        0.108   120     AATTA   6       unit:TA 6       dinucleotide
+PZ7180000000004_TX      0.279   1000    AAATA   12      unit:TAA        12      trinu
+PZ7180000031590 0.379   486     ACAAA   5       unit:ATTTA      10      pentanucleoti
+PZ7180000027934 0.379   937     ATTTT   7       unit:GTTTGAC    14      heptanucleoti
+PZ456916        0.167   162     TAATT   6       unit:TAAT       8       tetranucleoti
+PZ7180000037718 0.299   518     TTTTT   15      unit:GTTGT      10      pentanucleoti
+PZ7180000000004_TY      0.578   998     AGAAG   8       unit:GCGAG      10      penta
 ...
 
Rather than viewing the file with `less`, we can also count the number of lines present in the file with the `wc` tool, which counts the number of lines, words, and characters of input: `wc `. -Working with the cleaned data table reveals that our program produced 21,131 characters broken into 3,297 words among 471 lines of data output. +Working with the cleaned data table reveals that our program produced 27,723 characters broken into 3,768 words among 471 lines of data output.
 
 [oneils@mbp ~/apcb/intro/fasta_stats]$ wc pz_stats.table
-  471  3297 21131 pz_stats.table
+  471  3768 27723 pz_stats.table
 
###### {- #standard_input} -This sort of command-line-driven analysis can be quite powerful, particularly because many of these programs—like `less`, `grep`, and `wc`—can both print their results on standard output and read input from *standard input* rather than from a file. Standard input is the secondary input mechanism for command-line programs (other than reading from files directly). By default, standard input, or “stdin”, is unused. - -How can we get input to a program on its standard input? It turns out that the easiest way to do so is to *redirect* the standard output of another program to it using the `|`, also known as the “pipe,” redirect (found above the Enter key on most keyboards). In this case, the data come in “on the left”: +This sort of command-line-driven analysis can be quite powerful, particularly because many of these programs — like `less`, `grep`, and `wc` — can both print their results on standard output and read input from *standard input* rather than from a file. Standard input is the secondary input mechanism for command-line programs (other than reading from files directly). By default, standard input, or "stdin", is unused. +How can we get input to a program on its standard input? It turns out that the easiest way to do so is to *redirect* the standard output of another program to it using the `|`, also known as the "pipe," redirect (found above the Enter key on most keyboards). In this case, the data come in "on the left":
-To drive this home, we’ll first remove our `pz_stats.table` file, and then rerun our `grep` for `unit:` on the `pz_stats.txt` file, but rather than send the result of `grep` to a file with the `>` redirect, we’ll direct it straight to the standard input of `wc` with a `|` redirect. +To drive this home, we'll first remove our `pz_stats.table` file, and then rerun our `grep` for `unit:` on the `pz_stats.txt` file, but rather than send the result of `grep` to a file with the `>` redirect, we'll direct it straight to the standard input of `wc` with a `|` redirect.
 
+[oneils@mbp ~/apcb/intro/fasta_stats]$ rm pz_stats.table
+fasta_stats.py  pz_cDNAs_sample.fasta    pz_sample_stats.txt
+pz_cDNAs.fasta  pz_sample_stats.err.txt  pz_stats.txt
 [oneils@mbp ~/apcb/intro/fasta_stats]$ grep 'unit:' pz_stats.txt | wc
-    471    3297   21131
+    471    3768   27723
 
-In this example, we’ve neither created a new file nor specified a file for `wc` to read from; the data are stored in a temporary buffer that is handled automatically by the shell and operating system. The `less` program can also read from standard input, so if we wanted to see the contents of the `grep` without creating a new file, we could run `grep 'unit:' pz_stats.txt | less -S`. +In this example, we've neither created a new file nor specified a file for `wc` to read from; the data are stored in a temporary buffer that is handled automatically by the shell and operating system. The `less` program can also read from standard input, so if we wanted to see the contents of the `grep` without creating a new file, we could run `grep 'unit:' pz_stats.txt | less -S`. Recall that the `fasta_stats` program wrote its output to standard out, and because `grep` can read from standard input as well, we can process the entire FASTA file without needing to create any new files by using multiple such buffers: @@ -212,38 +215,38 @@ Recall that the `fasta_stats` program wrote its output to standard out, and beca line-numbers linkable-line-numbers"> -[oneils@mbp ~/apcb/intro/fasta_stats]$ ./fasta_stats pz_cDNAs.fasta | grep 'unit:' | wc +[oneils@mbp ~/apcb/intro/fasta_stats]$ ./fasta_stats.py pz_cDNAs.fasta | grep 'unit:' | wc -When this command runs, the results printed by `fasta_stats` on standard error will still be printed to the terminal (as that is the default and we didn’t redirect standard error), but the standard output results will be filtered through `grep` and then filtered through `wc`, producing the eventual output of 471 lines. +When this command runs, the results printed by `fasta_stats` on standard error will still be printed to the terminal (as that is the default and we didn't redirect standard error), but the standard output results will be filtered through `grep` and then filtered through `wc`, producing the eventual output of 471 lines. -At this point, the longish nature of the commands and the fact that our terminal window is only so wide are making it difficult to read the commands we are producing. So, we’ll start breaking the commands over multiple lines by ending partial commands with backslashes. Just as in the shell scripts we wrote at the end of chapter 7, “[Using (Bioinformatics) Software](using-bioinformatics-software.html)”, using backslashes will let the shell know that we aren’t finished entering the command. However, the `bash` shell indicates that a command spans multiple lines by showing us a `>`, which shouldn’t be confused with the redirect character that we might type ourselves. The following example shows the exact same command in a more readable form broken over multiple lines, but the highlighted characters have not been typed. +At this point, the longish nature of the commands and the fact that our terminal window is only so wide are making it difficult to read the commands we are producing. So, we'll start breaking the commands over multiple lines by ending partial commands with backslashes. Just as in the shell scripts we wrote at the end of chapter 7, "[Using (Bioinformatics) Software](#using-bioinformatics-software)", using backslashes will let the shell know that we aren't finished entering the command. However, the `bash` shell indicates that a command spans multiple lines by showing us a `>`, which shouldn't be confused with the redirect character that we might type ourselves. The following example shows the exact same command in a more readable form broken over multiple lines, but the highlighted characters have not been typed.
 
-[oneils@mbp ~/apcb/intro/fasta_stats]$ ./fasta_stats pz_cDNAs.fasta | \ 
-> grep 'unit:' | \ 
+[oneils@mbp ~/apcb/intro/fasta_stats]$ ./fasta_stats.py pz_cDNAs.fasta | \
+> grep 'unit:' | \
 > wc
 
###### {- #pipeline} -A chain of commands like the above, separated by pipe characters, is often called a “pipeline.” More generally, though, a pipeline can describe any series of steps from input data to output data (as in the Muscle/HMMER series covered in chapter 7). +A chain of commands like the above, separated by pipe characters, is often called a "pipeline." More generally, though, a pipeline can describe any series of steps from input data to output data (as in the Muscle/HMMER series covered in chapter 7). ### Counting Simple AT Repeats {-} -Let’s expand on the small pipeline above to inspect just the “simple” AT repeats, that is, those that are the string “AT” repeated one or more times. We can start with what we have, but rather than just searching for `unit:`, we’ll modify the pattern to find `unit:AT`, and see what we get: +Let's expand on the small pipeline above to inspect just the "simple" AT repeats, that is, those that are the string "AT" repeated one or more times. We can start with what we have, but rather than just searching for `unit:`, we'll modify the pattern to find `unit:AT`, and see what we get:
 
-[oneils@mbp ~/apcb/intro/fasta_stats]$ ./fasta_stats pz_cDNAs.fasta | \ 
-> grep 'unit:AT' | \ 
+[oneils@mbp ~/apcb/intro/fasta_stats]$ ./fasta_stats.py pz_cDNAs.fasta | \
+> grep 'unit:AT' | \
 > less -S
 
@@ -254,12 +257,12 @@ The resulting output is close to what we hoped for, but not quite complete, as t line-numbers linkable-line-numbers"> -PZ21878_A       0.162   172     ATTAA   8       unit:ATT        6       trinucle -PZ16108_A       0.157   191     ATTAA   7       unit:ATT        6       trinucle -PZ21537_A       0.158   82      TTATT   3       unit:ATT        6       trinucle -PZ7180000031590 0.378   486     ACAAA   5       unit:ATTTA      10      pentanuc -PZ7180000031597 0.287   403     ATTAT   6       unit:ATTTTG     12      hexanucl -PZ7180000025478 0.516   829     TGATG   18      unit:ATG        18      trinucle +PZ22254_A 0.132 68 ATTAA 5 unit:AT 4 dinucleotide +PZ20930_A 0.229 375 ATTAA 10 unit:ATT 6 trinucleotide +PZ841572 0.285 411 TTTTT 7 unit:ATGTGA 12 hexanucleotid +PZ7180000019701 0.254 678 TTTTT 12 unit:ATAAA 10 pentanucleoti +PZ729309 0.269 535 ATATT 11 unit:ATTAA 10 pentanucleoti +PZ835452 0.238 311 AAAAA 6 unit:ATG 6 trinucleotide ... @@ -270,9 +273,9 @@ We probably want to further filter the output, but based on what pattern? In thi line-numbers linkable-line-numbers"> -[oneils@mbp ~/apcb/intro/fasta_stats]$ ./fasta_stats pz_cDNAs.fasta | \ -> grep 'unit:AT' | \ -> grep 'dinucleotide' | \ +[oneils@mbp ~/apcb/intro/fasta_stats]$ ./fasta_stats.py pz_cDNAs.fasta | \ +> grep 'unit:AT' | \ +> grep 'dinucleotide' | \ > less -S @@ -283,12 +286,12 @@ This command results in the output we want: line-numbers linkable-line-numbers"> -PZ7180000031598 0.209   81      AATAT   5       unit:AT 6       dinucleotide -PZ463243        0.226   97      TTGTA   3       unit:AT 4       dinucleotide -PZ7180000000106_T       0.246   1044    AAAAA   22      unit:AT 10      dinucleo -PZ17593_A       0.157   76      ATTAA   5       unit:AT 4       dinucleotide -PZ492422        0.144   90      ATTAA   5       unit:AT 4       dinucleotide -PZ22453_A       0.267   269     ATTAA   8       unit:AT 4       dinucleotide +PZ7180000031598 0.21 81 AATAT 5 unit:AT 6 dinucleotide +PZ23512_A 0.281 153 TTAAA 3 unit:AT 6 dinucleotide +PZ851952 0.399 338 TATAT 12 unit:AT 24 dinucleotide +PZ17053_A 0.21 119 ATTAA 6 unit:AT 4 dinucleotide +PZ463243 0.227 97 TTGTA 3 unit:AT 4 dinucleotide +PZ22254_A 0.132 68 ATTAA 5 unit:AT 4 dinucleotide ... @@ -296,22 +299,22 @@ PZ22453_A       0.267   269     ATTAA   8       unit:AT 4       din Rather than run the results through `less -S`, we could instead use `wc` to count the simple (dinucleotide) AT repeats. There is an important concept at play here, that of *iterative development*, the idea that as we get closer to a solution, we inspect the results and repeat as necessary. Iterative development is a good strategy for many areas in life, but it is essential and pervasive in computing. -Once we’ve decided that we like the small computational process we have created, we might decide to encapsulate it and make it repeatable as a shell [script](#script), perhaps called [`count_ATs.sh`](data/count_ATs.sh). +Once we've decided that we like the small computational process we have created, we might decide to encapsulate it and make it repeatable as a shell [script](#script), perhaps called [`count_ATs.sh`](data/count_ATs.sh).
-The above script will need to be made executable and placed in a location referenced by `$PATH`, as will the `fasta_stats` program. +The above script will need to be made executable and placed in a location referenced by `$PATH`, as will the `fasta_stats.py` program.
#### Exercises {-} 1. Use `grep` and `wc` to determine how many sequences are in the file [`orf_trans.fasta`](data/orf_trans.fasta) without creating any temporary files. -2. How many sequence headers in the file [`orf_trans.fasta`](data/orf_trans.fasta) have the term “polymerase”? +2. How many sequence headers in the file [`orf_trans.fasta`](data/orf_trans.fasta) have the term "polymerase"? -3. Some of the sequence headers in [`orf_trans.fasta`](data/orf_trans.fasta) have the phrase “Verified ORF” to indicate that the open reading frame has been verified experimentally. Some also have the term “reverse complement” to indicate that the ORF (open reading frame) is present on the reverse complement sequence of the canonical genome description. How many sequences are verified ORFs *and* are not on the reverse complement? +3. Some of the sequence headers in [`orf_trans.fasta`](data/orf_trans.fasta) have the phrase "Verified ORF" to indicate that the open reading frame has been verified experimentally. Some also have the term "reverse complement" to indicate that the ORF (open reading frame) is present on the reverse complement sequence of the canonical genome description. How many sequences are verified ORFs *and* are not on the reverse complement? 4. The sequence headers in [`orf_trans.fasta`](data/orf_trans.fasta) have information on the chromosome from which they originate, such as `Chr I` or `Chr II`. How many sequences are present on chromosome I?
\ No newline at end of file diff --git a/part1-10-sorting-first-last.Rmd b/part1-10-sorting-first-last.Rmd index 76a2408..9b38d4b 100644 --- a/part1-10-sorting-first-last.Rmd +++ b/part1-10-sorting-first-last.Rmd @@ -1,13 +1,13 @@ # Sorting, First and Last Lines -Continuing with the `fasta_stats` examples from chapter 9, [“The Standard Streams”](the-standard-streams.html), the seventh column of the output contains the length of the longest perfect repeat, in characters. +Continuing with the `fasta_stats.py` examples from chapter 9, ["The Standard Streams"](#the-standard-streams), the seventh column of the output contains the length of the longest perfect repeat, in characters.
 
-[oneils@mbp ~/apcb/intro/fasta_stats]$ ./fasta_stats pz_cDNAs_sample.fasta
+[oneils@mbp ~/apcb/intro/fasta_stats]$ ./fasta_stats.py pz_cDNAs_sample.fasta
 # Column 1: Sequence ID
 # Column 2: GC content
 # Column 3: Length
@@ -16,23 +16,23 @@ Continuing with the `fasta_stats` examples from chapter 9, [“The Standard Stre
 # Column 6: Repeat unit of longest simple perfect repeat (2 to 10 chars)
 # Column 7: Length of repeat (in characters)
 # Column 8: Repeat type (dinucleotide, trinucleotide, etc.)
-Processing sequence ID PZ7180000031590
-PZ7180000031590 0.378 486 ACAAA 5 unit:ATTTA 10 pentanucleotide
 Processing sequence ID PZ7180000000004_TX
-PZ7180000000004_TX 0.279 1000 AAATA 12 unit:TAA 12 trinucleotide
+PZ7180000000004_TX	0.279	1000	AAATA	12	unit: TAA	12	trinucleotide
+Processing sequence ID PZ7180000031590
+PZ7180000031590	0.379	486	ACAAA	5	unit: ATTTA	10	pentanucleotide
 
-Which sequence contains the longest perfect repeat? To answer this question, we could consider sorting the lines according to this seventh column. (First, we’ll have to remove the header lines themselves, which we can accomplish by filtering out lines matching `#` using the `-v` flag of [grep](#grep), or by grepping for `unit:`, as in chapter 9.) Enter `sort`, which sorts lines from a text file (or from [standard input](#standard_input)) by specified columns: `sort ` or `... | sort`. +Which sequence contains the longest perfect repeat? To answer this question, we could consider sorting the lines according to this seventh column. (First, we'll have to remove the header lines themselves, which we can accomplish by filtering out lines matching `#` using the `-v` flag of [grep](#grep), or by grepping for `unit:`, as in chapter 9.) Enter `sort`, which sorts lines from a text file (or from [standard input](#standard_input)) by specified columns: `sort ` or `... | sort`. -By default, `sort` sorts by all columns, by comparing the entire lines in “dictionary,” or [lexicographic order](#lexicographic_order). To sort by specific columns, we need to use a rather sophisticated syntax. We’ll illustrate with a figure. +By default, `sort` sorts by all columns, by comparing the entire lines in "dictionary," or [lexicographic order](#lexicographic_order). To sort by specific columns, we need to use a rather sophisticated syntax. We'll illustrate with a figure.
-The `sort` utility takes many potential parameters, though the most important are the `-k` parameters that specify the columns by which to sort and how that sorting should be done, and occasionally the `-u` flag. The `-k` (key) parameters are considered in order; the above specifies that the sorting should be done on columns 2 through 4 (conglomerated into a single “column”), considering them in dictionary order, and sorting them in reverse. In the case of ties, only the first column is considered in normal dictionary order, and in the case of further ties, the fifth column is considered in numeric order.^[A word of caution: if a column contains an entry that cannot be interpreted as an integer or general number, it will be treated as `0` in the sorting order.] (The difference between `n` and `g` ordering is that `g` can handle entries in scientific notation like `1e-6`, but generally `n` is preferred because it is faster and not subject to small rounding errors.) +The `sort` utility takes many potential parameters, though the most important are the `-k` parameters that specify the columns by which to sort and how that sorting should be done, and occasionally the `-u` flag. The `-k` (key) parameters are considered in order; the above specifies that the sorting should be done on columns 2 through 4 (conglomerated into a single "column"), considering them in dictionary order, and sorting them in reverse. In the case of ties, only the first column is considered in normal dictionary order, and in the case of further ties, the fifth column is considered in numeric order.^[A word of caution: if a column contains an entry that cannot be interpreted as an integer or general number, it will be treated as `0` in the sorting order.] (The difference between `n` and `g` ordering is that `g` can handle entries in scientific notation like `1e-6`, but generally `n` is preferred because it is faster and not subject to small rounding errors.) -The optional `-u` flag (which may be specified before or after the keys, or even mixed in) specifies that after all the keys are considered, if there are still any ties between rows, then only the first row should be output. It outputs only “unique” lines according to the overall sorting order. +The optional `-u` flag (which may be specified before or after the keys, or even mixed in) specifies that after all the keys are considered, if there are still any ties between rows, then only the first row should be output. It outputs only "unique" lines according to the overall sorting order. By default, `sort` uses [whitespace](#whitespace) as the column separator, though it can be changed (run `man sort` for more information). To view information about the longest perfect repeat, we will use `sort -k7,7nr`, indicating that we wish sort on the seventh column only, in reverse numeric order. @@ -41,31 +41,31 @@ By default, `sort` uses [whitespace](#whitespace) as the column separator, thoug line-numbers linkable-line-numbers"> -[oneils@mbp ~/apcb/intro/fasta_stats]$ ./fasta_stats pz_cDNAs.fasta | \ +[oneils@mbp ~/apcb/intro/fasta_stats]$ ./fasta_stats.py pz_cDNAs.fasta | \ > grep -v '#' | \ > sort -k7,7nr | \ > less -S -The first few lines of output indicate that the longest perfect repeat is 94 bases long and occurs in sequence `PZ805359` (this sequence’s GC content is `0`, because it’s composed entirely of a long AT repeat). +The first few lines of output indicate that the longest perfect repeat is 94 bases long and occurs in sequence `PZ805359` (this sequence's GC content is `0`, because it's composed entirely of a long AT repeat).
 
-PZ805359        0.0     101     ATATA   47      unit:AT 94      dinucleotide
-PZ796092        0.365   361     TACGT   9       unit:GTACGT     48      hexanucl
-PZ7180000019700 0.375   564     GAGTG   12      unit:GAGTG      30      pentanuc
-PZ7180000028921 0.31    561     TGTAA   8       unit:CTGTG      30      pentanuc
-PZ851952        0.399   338     TATAT   12      unit:AT 24      dinucleotide
-PZ7180000000664_B       0.3     652     TTTTT   18      unit:TAAAATTAT  18      
-PZ7180000023622 0.31    687     TTAAT   9       unit:TGA        18      trinucle
-PZ7180000023665_ATQ     0.401   508     ACTGA   5       unit:TGACACTGA  18      
-PZ7180000025478 0.516   829     TGATG   18      unit:ATG        18      trinucle
-PZ7180000030412 0.461   258     TGATG   8       unit:ATG        18      trinucle
-PZ7180000036892 0.268   548     AATAA   16      unit:TAA        18      trinucle
-PZ801814        0.262   255     TTACA   5       unit:TATTTACAT  18      enneanuc
+PZ805359        0.0     101     TATAT   47      unit:AT 94      dinucleotide
+PZ796092        0.366   361     TACGT   9       unit:GTACGT     48      hexanucleotid
+PZ7180000019700 0.376   564     GAGTG   12      unit:GAGTG      30      pentanucleoti
+PZ7180000028921 0.31    561     TTTTT   8       unit:CTGTG      30      pentanucleoti
+PZ851952        0.399   338     TATAT   12      unit:AT 24      dinucleotide
+PZ7180000000664_B       0.301   652     TTTTT   18      unit:TAAAATTAT  18      ennea
+PZ7180000023622 0.31    687     TTAAT   9       unit:TGA        18      trinucleotide
+PZ7180000023665_ATQ     0.402   508     ACTGA   5       unit:TGACACTGA  18      ennea
+PZ7180000025478 0.516   829     TGATG   18      unit:ATG        18      trinucleotide
+PZ7180000030412 0.461   258     TGATG   8       unit:ATG        18      trinucleotide
+PZ7180000036892 0.268   548     AATAA   16      unit:TAA        18      trinucleotide
+PZ801814        0.263   255     TTACA   5       unit:TATTTACAT  18      enneanucleoti
 ...
 
@@ -78,7 +78,7 @@ A useful trick is to perform two sorts: one that initially sorts the data on wha line-numbers linkable-line-numbers"> -[oneils@mbp ~/apcb/intro/fasta_stats]$ ./fasta_stats pz_cDNAs.fasta | \ +[oneils@mbp ~/apcb/intro/fasta_stats]$ ./fasta_stats.py pz_cDNAs.fasta | \ > grep -v '#' | \ > sort -k7,7nr -k2,2gr | \ > sort -k7,7nr -u | \ @@ -92,20 +92,20 @@ Output: line-numbers linkable-line-numbers"> -PZ805359        0.0     101     ATATA   47      unit:AT 94      dinucleotide -PZ796092        0.365   361     TACGT   9       unit:GTACGT     48      hexanucl -PZ7180000019700 0.375   564     GAGTG   12      unit:GAGTG      30      pentanuc -PZ851952        0.399   338     TATAT   12      unit:AT 24      dinucleotide -PZ7180000025478 0.516   829     TGATG   18      unit:ATG        18      trinucle -PZ7180000000447_B       0.484   578     ATCCA   7       unit:TCCA       16       +PZ805359 0.0 101 TATAT 47 unit:AT 94 dinucleotide +PZ796092 0.366 361 TACGT 9 unit:GTACGT 48 hexanucleotide +PZ7180000019700 0.376 564 GAGTG 12 unit:GAGTG 30 pent +PZ851952 0.399 338 TATAT 12 unit:AT 24 dinucleotide +PZ7180000025478 0.516 829 TGATG 18 unit:ATG 18 trinucleo +PZ7180000000447_B 0.484 578 ATCCA 7 unit:TCCA 16 tetranucl ... -There is one small concern, however: how can we be sure that our careful ordering by GC content wasn’t undone in the second sort? After all, the second sort would technically be free to reorder ties according to the seventh column, resulting in incorrect output. There is an additional flag for sort, the `-s` flag, indicating that stable sorting should be used. Stable sorting means that, in the case of ties, elements are left in their original order. So, to be safe, we could use a secondary sort of sort `-k7,7nr -u -s`, though a careful reading of the documentation for `sort` indicates that on most systems the `-u` flag implies the `-s` flag. +There is one small concern, however: how can we be sure that our careful ordering by GC content wasn't undone in the second sort? After all, the second sort would technically be free to reorder ties according to the seventh column, resulting in incorrect output. There is an additional flag for sort, the `-s` flag, indicating that stable sorting should be used. Stable sorting means that, in the case of ties, elements are left in their original order. So, to be safe, we could use a secondary sort of sort `-k7,7nr -u -s`, though a careful reading of the documentation for `sort` indicates that on most systems the `-u` flag implies the `-s` flag. ### First and Last Lines {-} -Often we wish to extract from a file (or from [standard input(#standard_input)]) the first or last few lines. The tools `head` and `tail` are designed to do exactly this, and in combination with other tools are surprisingly handy. The `head` tool extracts the first lines of a file or standard input: `head -n ` or `... | head -n `. The tail tool extracts the last lines of a file or standard input: `tail -n ` or `... | tail -n `. +Often we wish to extract from a file (or from [standard input](#standard_input)) the first or last few lines. The tools `head` and `tail` are designed to do exactly this, and in combination with other tools are surprisingly handy. The `head` tool extracts the first lines of a file or standard input: `head -n ` or `... | head -n `. The tail tool extracts the last lines of a file or standard input: `tail -n ` or `... | tail -n `. The `head` and `tail` utilities, like the others covered previously, write their output to [standard output](#standard_output), and so they can be used within pipelines. They are often employed to inspect the beginning or end of a file (to check results or formatting). They also commonly extract test data sets. For example, `head -n 40000 input.fastq > test.fastq` would extract the first 10,000 sequence records from `input.fastq` and produce `test.fastq` (because every four lines of a FASTQ sequence file represents information for a single sequence). @@ -129,9 +129,9 @@ BCCDFFEFHHHHHJJJJJIJIJJIJJJJJIIGIIIDFIIJIJIFHIJGGIGIJJJIJHJIGHGDIJCEEGHIGIGCACEF ... -The above shows the first 12 lines of a FASTQ file generated on an Illumina HiSeq 2000. The first line in each set of four represents an identifier for the sequence read, the second line contains the sequence itself, the third line is often unused (containing only a `+`, though it may be followed by the identifier and other optional data), and the fourth line contains the “quality” of each base in the sequence encoded as a character. (The encoding has varied in the past, but in recent years, sequencing companies have standardized the encoding used by Sanger sequencing machines.) +The above shows the first 12 lines of a FASTQ file generated on an Illumina HiSeq 2000. The first line in each set of four represents an identifier for the sequence read, the second line contains the sequence itself, the third line is often unused (containing only a `+`, though it may be followed by the identifier and other optional data), and the fourth line contains the "quality" of each base in the sequence encoded as a character. (The encoding has varied in the past, but in recent years, sequencing companies have standardized the encoding used by Sanger sequencing machines.) -With a bit of modified syntax, `tail` can also be used to extract all lines of a file starting at a given line. As an example, `tail -n +5 input.fastq > test.fastq` would result in `test.fastq` having all but the first sequence record of `input.fastq` (i.e., starting at the fifth line). This feature is especially useful for stripping off header lines of output or files before further processing, as in `./fasta_stats pz_cDNAs.fasta | tail -n +9`, rather than using g`rep -v '#'` above. +With a bit of modified syntax, `tail` can also be used to extract all lines of a file starting at a given line. As an example, `tail -n +5 input.fastq > test.fastq` would result in `test.fastq` having all but the first sequence record of `input.fastq` (i.e., starting at the fifth line). This feature is especially useful for stripping off header lines of output or files before further processing, as in `./fasta_stats.py pz_cDNAs.fasta | tail -n +9`, rather than using g`rep -v '#'` above.
#### Exercises {-} @@ -140,7 +140,7 @@ With a bit of modified syntax, `tail` can also be used to extract all lines of a 2. Running [fasta_stats.py](data/fasta_stats.py) on [pz_cDNAs.fasta](data/pz_cDNAs.fasta), how many different perfect repeat units (column six) are found? -3. The file [pz_blastx_yeast_top10.txt](data/pz_blastx_yeast_top10.txt) is the result of running `blastx -query ../fasta_stats/pz_cDNAs.fasta -db orf_trans -evalue 1e-6 -max_target_seqs 10 -max_hsps 1 -outfmt 7 -out pz_blastx_yeast_top1.txt`. Aside from the “comment” lines that start with `#`, the first column is the query ID, the second the target (yeast) ID, the third the percentage identity of the HSP, the eleventh the E value, and the twelfth the “bitscore.” Which query ID had the largest bitscore? How many different query sequences (entries in the first column) had one or more HSPs against the database? +3. The file [pz_blastx_yeast_top10.txt](data/pz_blastx_yeast_top10.txt) is the result of running `blastx -query ../fasta_stats/pz_cDNAs.fasta -db orf_trans -evalue 1e-6 -max_target_seqs 10 -max_hsps 1 -outfmt 7 -out pz_blastx_yeast_top10.txt`. Aside from the "comment" lines that start with `#`, the first column is the query ID, the second the target (yeast) ID, the third the percentage identity of the HSP, the eleventh the E value, and the twelfth the "bitscore." Which query ID had the largest bitscore? How many different query sequences (entries in the first column) had one or more HSPs against the database? 4. Extract from [pz_blastx_yeast_top10.txt](data/pz_blastx_yeast_top10.txt) a file called `pz_blastx_yeast_top1.txt` containing only the smallest *E*-valued HSP line per query ID. (You may remove comment lines starting with `#` altogether.)
\ No newline at end of file diff --git a/part1-11-rows-and-columns.Rmd b/part1-11-rows-and-columns.Rmd index 8e2e92e..dc4cae7 100644 --- a/part1-11-rows-and-columns.Rmd +++ b/part1-11-rows-and-columns.Rmd @@ -1,33 +1,34 @@ # Rows and columns -Let’s return to the output of the yeast proteins versus yeast proteins self-BLAST we performed previously, from the file [`yeast_blastp_yeast_top2.txt`](data/yeast_blastp_yeast_top2.txt). +Let's return to the output of the yeast proteins versus yeast proteins self-BLAST we performed previously, from the file [`yeast_blastp_yeast_top2.txt`](data/yeast_blastp_yeast_top2.txt).
 
-# BLASTP 2.2.30+
-# Query: YAL003W EFB1 SGDID:S000000003, Chr I from 142174-142253,142620-143160, 
+# BLASTP 2.14.0+
+# Query: YAL068C PAU8 SGDID:S000002142, Chr I from 2169-1807, Genome Release 64-5-1,
 # Database: orf_trans
-# Fields: query id, subject id, alignment length, query length, subject length, 
-# 1 hits found
-YAL003W YAL003W 207     207     207     1       207     1       207     2e-148
-# BLASTP 2.2.30+
-# Query: YAL005C SSA1 SGDID:S000000004, Chr I from 141431-139503, Genome Release
+# Fields: query id, subject id, alignment length, query length, subject length, q. 
+# 2 hits found
+YAL068C       YGL261C       121   121    121     1      121   1      121    3.27e-86
+YAL068C       YAL068C       121   121    121     1      121   1      121    3.27e-86
+# BLASTP 2.14.0+
+# Query: YAL067W-A YAL067W-A SGDID:S000028593, Chr I from 2480-2707, Genome Release 
 # Database: orf_trans
-# Fields: query id, subject id, alignment length, query length, subject length, 
+# Fields: query id, subject id, alignment length, query length, subject length, q. s
 # 2 hits found
-YAL005C YAL005C 643     643     643     1       643     1       643     0.0
-YAL005C YLL024C 643     643     640     1       643     1       640     0.0
+YAL067W-A     YAL067W-A     76    76      76     1      76    1      76     1.48e-50
+YAL067W-A     YIL174W       76    76      76     1      76    1      76     3.49e-43
 ...
 
-Let’s ask the following: how many sequences in this data set had a match to some other sequence? To start with, we would probably use a `grep -v '#'` to remove all of the comment lines, but then what? We could try using `wc` to count the lines, but only after also removing the self-hits, where the ID in the first column is equal to the ID in the second column. None of the utilities we’ve seen so far—`grep`, `sort`, `head`, or `tail`—can perform this task. We need a new tool, `awk`, which is a line-by-line and column-by-column processing tool for text files: `awk '' ` or `... | awk ''`. +Let's ask the following: how many sequences in this data set had a match to some other sequence? To start with, we would probably use a `grep -v '#'` to remove all of the comment lines, but then what? We could try using `wc` to count the lines, but only after also removing the self-hits, where the ID in the first column is equal to the ID in the second column. None of the utilities we've seen so far — `grep`, `sort`, `head`, or `tail` — can perform this task. We need a new tool, `awk`, which is a line-by-line and column-by-column processing tool for text files: `awk '' ` or `... | awk ''`. Written in the late 1970s and named after its authors (Alfred Aho, Peter Weinberger, and Brian Kernigan), `awk` provides a sophisticated programming language that makes it easy to parse tabular data like the BLAST results above. The syntax for `awk` can be fairly complex, but much of the complexity can be ignored in regular use. -First, let’s answer our specific question, of how many sequences had matches to other sequences, and then we’ll look at some `awk` syntax more generally. The `awk` command that we want, printing only those lines where the first two columns are not equal, is `awk '{if($1 != $2) {print $0}}'`. +First, let's answer our specific question, of how many sequences had matches to other sequences, and then we'll look at some `awk` syntax more generally. The `awk` command that we want, printing only those lines where the first two columns are not equal, is `awk '{if($1 != $2) {print $0}}'`.
 
-YDR534C YOR383C 134     529     205     1       129     1       134     7e-33
-YDR536W YEL069C 525     570     565     21      532     50      553     6e-59
-YDR541C YOL151W 342     345     343     4       345     2       343     6e-150
-YDR542W YGL261C 121     121     121     1       121     1       121     3e-83
-YDR545W YOR396W 1797    1797    1797    1       1797    1       1797    0.0
-YDR545W YLR467W 1797    1797    1797    1       1797    1       1797    0.0
-YEL006W YIL006W 330     336     374     15      330     41      365     4e-130
+...
+YDR541C   YOL151W  342     345     343     4       345     2       343     1.02e-151
+YDR542W   YGL261C  121     121     121     1       121     1       121     7.47e-85
+YDR545W   YOR396W  1797    1797    1797    1       1797    1       1797    0.0
+YDR545W   YLR467W  1797    1797    1797    1       1797    1       1797    0.0
+YEL077C   YLR466W  1276    1278    1383    3       1278    164     1383    0.0
+YEL076C-A YLR464W  217     217     217     1       217     1       217     3.09e-163
+YEL076C   YLR464W  171     217     217     1       171     1       171     1.60e-127
+YEL075C   YER189W  123     123     123     1       123     1       123     3.47e-87
 ...
 
-In theory, at this point we should be able to replace the `less -S` with a `wc` to count the lines, and thus counting the number of sequences that had matches to other sequences. Unfortunately, in this case, theory doesn’t align with reality: inspecting the output above reveals that ID `YDR545W` is still represented by two lines, so this sequence would be counted twice. +In theory, at this point we should be able to replace the `less -S` with a `wc` to count the lines, and thus counting the number of sequences that had matches to other sequences. Unfortunately, in this case, theory doesn't align with reality: inspecting the output above reveals that ID `YDR545W` is still represented by two lines, so this sequence would be counted twice. -Why? In the BLAST command, we requested the top two HSPs per query with `-max_target_seqs 2` and `-max_hsps 1`, so we expected that the best HSP would be to the sequence itself, with the second best (if it existed) to be to a non-self-hit. But in this case, `blastx` decided to report two non-self-hits. In fact, if we were to inspect `YDR545W`, `YOR396W`, and `YLR467W`, we’d find that their sequences are identical, and so BLAST needed to pick two HSPs out of the three-way tie. +Why? In the BLAST command, we requested the top two HSPs per query with `-max_target_seqs 2` and `-max_hsps 1`, so we expected that the best HSP would be to the sequence itself, with the second best (if it existed) to be to a non-self-hit. But in this case, `blastx` decided to report two non-self-hits. In fact, if we were to inspect `YDR545W`, `YOR396W`, and `YLR467W`, we'd find that their sequences are identical, and so BLAST needed to pick two HSPs out of the three-way tie. In order to get the correct number of sequences that had matches to others, we need to remove any duplicates that might still be found in the first column. We can do this by adding a `sort -k1,1d -u`, for a final answer of 2,884. @@ -73,23 +76,22 @@ In order to get the correct number of sequences that had matches to others, we n > awk '{if($1 != $2) {print $0}}' | \ > sort -k1,1d -u | \ > wc
-   2884   28840  138048 + 2989 29890 149721 -For any sufficiently complex data set, it is a good idea to check as many assumptions about it as possible when performing an analysis. In the above, using `wc` on the lines to count the number of sequences that had hits to others implied that in the first column no ID was listed twice. In this case, comparing the counts with and without the `sort -k1,1d -u` would serve to verify or reject this assumption. In later chapters, we’ll learn more techniques for this kind of “sanity checking.” +For any sufficiently complex data set, it is a good idea to check as many assumptions about it as possible when performing an analysis. In the above, using `wc` on the lines to count the number of sequences that had hits to others implied that in the first column no ID was listed twice. In this case, comparing the counts with and without the `sort -k1,1d -u` would serve to verify or reject this assumption. In later chapters, we'll learn more techniques for this kind of "sanity checking." ### Basic Syntax for awk {-} -Although `awk` can be used as a full-featured programming language (complete with loops, arrays, and so on), for sophisticated programming needs, other languages like Python and R are usually better suited. Let’s take a look at a practical subset of its syntax. +Although `awk` can be used as a full-featured programming language (complete with loops, arrays, and so on), for sophisticated programming needs, other languages like Python and R are usually better suited. Let's take a look at a practical subset of its syntax.
-Statements in the `BEGIN` block are executed before any line of the input is processed, statements in the unadorned middle block are executed for every line, and statements in the `END` block are executed after the last line is processed. Each of these blocks is optional, though the middle “for each line” block is rarely omitted in practice. - -When processing a line, a number of variables are available for use. Although many of them start with a `$`, they are not environment variables (because we’ve placed the “program” inside single quotes, they are sent to `awk` as unaltered strings, `$` signs intact). +Statements in the `BEGIN` block are executed before any line of the input is processed, statements in the unadorned middle block are executed for every line, and statements in the `END` block are executed after the last line is processed. Each of these blocks is optional, though the middle "for each line" block is rarely omitted in practice. +When processing a line, a number of variables are available for use. Although many of them start with a `$`, they are not environment variables (because we've placed the "program" inside single quotes, they are sent to `awk` as unaltered strings, `$` signs intact).
  • `$0`
  • @@ -114,9 +116,9 @@ This command prints only the first two columns of the table, separated by a spac [oneils@mbp ~/apcb/intro/blast]$ cat yeast_blastp_yeast_top2.txt | \ > grep -v '#' | \ > awk '{print $1,$2}' -YAL001C YAL001C -YAL002W YAL002W -YAL003W YAL003W +YAL068C YGL261C +YAL068C YAL068C +YAL067W-A YAL067W-A ... @@ -130,13 +132,13 @@ Instead of separating the two output columns by a space, we can instead separate [oneils@mbp ~/apcb/intro/blast]$ cat yeast_blastp_yeast_top2.txt | \ > grep -v '#' | \ > awk '{print $1":::"$2}' -YAL001C:::YAL001C -YAL002W:::YAL002W -YAL003W:::YAL003W +YAL068C:::YGL261C +YAL068C:::YAL068C +YAL067W-A:::YAL067W-A ... -If we’d like to add a new first column that simply contains the line number, we can use the `NR` variable in conjunction with the `$0` variable: +If we'd like to add a new first column that simply contains the line number, we can use the `NR` variable in conjunction with the `$0` variable:
    -Notice that the organization of the curly brackets produces a nested block structure; although for this simple case the inside set of brackets could be omitted, it’s usually best practice to include them, as they illustrate exactly which statement is controlled by the preceding `if`.^[This nested construct, a controlled block inside of another block that is executed for each element of a set (in this case, for each line), is one of our first examples of programming! One hint for reducing confusion when producing such structures is to fill in their structure from the outside in, adding pairs of symbols and then “backing up” into them as needed. In this example, we might have started with `awk ''`, and then added the curly brackets to produce `awk '{}'`, next` awk '{if() {}}'`, and finally filled in the logic with `awk '{if($10 < 1e-10) {print $0}}'`.] +If we want to get a bit more complex, `awk` allows us to use if-statements. The syntax is `{if() {statments to execute} else if(logical expression) {statments to execute} else {statements to execute}}`. Notice that the organization of the curly brackets produces a nested block structure; for simple cases of a single if-statement, the inside set of brackets could be omitted, it's usually best practice to include them, as they illustrate exactly which statement is controlled by the preceding `if`.^[This nested construct, a controlled block inside of another block that is executed for each element of a set (in this case, for each line), is one of our first examples of programming! One hint for reducing confusion when producing such structures is to fill in their structure from the outside in, adding pairs of symbols and then "backing up" into them as needed. In this example, we might have started with `awk ''`, and then added the curly brackets to produce `awk '{}'`, next` awk '{if() {}}'`, and finally filled in the logic with `awk '{if($10 < 1e-10) {print $0}}'`.] -If-statements can control multiple conditions, and sometimes it helps to break `awk` programs over multiple lines to help with their readability, especially when they are included in executable scripts. This sophisticated statement adds a new first column that categorizes each HSP as either “great,” “good,” or “ok,” depending on the E value, printing only the two IDs and the E value (columns 1, 2, and 10): +If-statements can control multiple conditions, and sometimes it helps to break `awk` programs over multiple lines to help with their readability, especially when they are included in executable scripts. This sophisticated statement adds a new first column that categorizes each HSP as either "great," "good," or "ok," depending on the *E* value, printing only the two IDs and the *E* value (columns 1, 2, and 10):
    -YAL054C YAL054C 714     714     714     1       714     1       714     0.0
    -YAL054C YLR153C 645     714     684     73      712     37      674     0.0
    +> awk '$1 == "YAL054C" {print $0}'
    +YAL054C	YAL054C	714	714	714	1	714	1	714	0.0
    +YAL054C	YLR153C	645	714	684	73	712	37	674	0.0
     
    Mathematical computations are a nice feature of `awk`. For example, columns 4 and 5 contain the total length of the query sequence and subject sequence, respectively, so we might wish to print the ratio of these two as an additional column at the end. @@ -223,17 +225,18 @@ Mathematical computations are a nice feature of `awk`. For example, columns 4 an ... YAL017W YAL017W 1 YAL017W YOL045W 1.2314 -YAL018C YAL018C 1 -YAL018C YOL048C 0.950437 -YAL019W YAL019W 1 -YAL019W YOR290C 0.664319 -YAL020C YAL020C 1 +YAL016W YAL016W 1 +YAL015C YAL015C 1 +YAL015C YOL043C 1.04987 +YAL014C YAL014C 1 +YAL013W YAL013W 1 ... -We could then pipe the result to a `sort -k3,3g | tail -n 5` to see the five HSPs with the largest ratios. Beware, however, that when performing mathematical operations or comparisons with columns, any contents that can’t be parsed as a number (`1.5` can be, as can `2` and `4e-4`, but not `i5` or `NA`) may be truncated (e.g., `10x1` is treated as just `10`) or treated as `0`. Using sort on columns with `-g` can reveal such potential problems, as the same underlying method is used for parsing. +We could then pipe the result to a `sort -k3,3g | tail -n 5` to see the five HSPs with the largest ratios. Beware, however, that when performing mathematical operations or comparisons with columns, any contents that can't be parsed as a number (`1.5` can be, as can `2` and `4e-4`, but not `i5` or `NA`) may be truncated (e.g., `10x1` is treated as just `10`) or treated as `0`. Using sort on columns with `-g` can reveal such potential problems, as the same underlying method is used for parsing. + +There are a variety of mathematical functions built into `awk`. Here's a sample: -There are a variety of mathematical functions built into `awk`. Here’s a sample:
    • `log()`
      • Returns the natural logarithm of its argument, as in `print $10 * log($3 * $4)` for printing the log of the multiplication of the third and fourth columns times the tenth column.^[If you are concerned about where spaces are allowed in `awk` statements, try not to be: for the most part, they are allowed anywhere, and you may feel free to use them to enhance the readability of your statements. They are not allowed in keywords and variables: `i f($ 1 > $2) {print N R}` would be an invalid expression because `i f`, `$ 1`, and `N R` have erroneous spaces.]
      @@ -244,11 +247,11 @@ There are a variety of mathematical functions built into `awk`. Here’s a sampl
    • `%`
      • The modulus operator, returning the remainder after dividing the left-hand side by the right-hand side. For example, `NR%4` will be 1 on the first line, 2 on the second, 3 on the third, 0 on the fourth, 1 on the fifth, and so on.
    • `exp()`
    • -
      • This function returns its argument raised to the natural power *e*. For example, `log(exp($1))` returns the first column’s value.
      +
      • This function returns its argument raised to the natural power *e*. For example, `log(exp($1))` returns the first column's value.
    • `int()`
      • Returns the integer portion of its argument. For example, `int(6.8)` returns `6`, and `int(-3.6)` returns `-3`.
    • `rand()`
    • -
      • When given no arguments, returns a random number between 0 (inclusive) and 1 (exclusive). By default, every time `awk` is run, the series of random numbers produced by multiple calls to `rand()` is the same. To get a “random” random series, run `srand()` (which “seeds” the random number generator) in the `BEGIN` block, as in `BEGIN{srand()}{print rand(),$0}`.
      +
      • When given no arguments, returns a random number between 0 (inclusive) and 1 (exclusive). By default, every time `awk` is run, the series of random numbers produced by multiple calls to `rand()` is the same. To get a "random" random series, run `srand()` (which "seeds" the random number generator) in the `BEGIN` block, as in `BEGIN{srand()}{print rand(),$0}`.
Logical expressions may be combined with Boolean operators, including `&&` for "and" and `||` for "or" (which produces true if either or both sides are true), and grouping can be accomplished with parentheses. For instance, we might wish to print only those lines where the first column is not equal to the second, and either the tenth column is less than `1e-30` or the second column is `YAL044C`. @@ -257,7 +260,7 @@ Logical expressions may be combined with Boolean operators, including `&&` for " -Thus far, we haven’t made much use of the `BEGIN` or `END` blocks, which are especially handy when we define and update our own variables. We can accomplish this task with an `=` assignment (not to be confused with the `==` comparison). This command prints the average E values in our example BLAST result file. +Thus far, we haven't made much use of the `BEGIN` or `END` blocks, which are especially handy when we define and update our own variables. We can accomplish this task with an `=` assignment (not to be confused with the `==` comparison). This command prints the average *E* values in our example BLAST result file.
 awk 'BEGIN{sumeval = 0; count = 0} \
 > {if($1 != $2){sumeval = sumeval + $10; count = count + 1}} \
 > END{print sumeval/count}'
-9.06228e-09
+8.96057e-09
 
As before, some IDs are still present more than one time in the first column with this solution, so it may make better sense to first filter the desired lines by using the `awk` and `sort -k1,1d -u` solution from above, and then use another `awk` for the average computation. @@ -292,15 +295,16 @@ As before, some IDs are still present more than one time in the first column wit
#### Exercises {-} -1. In the file [pz_blastx_yeast_top10.txt](data/pz_blastx_yeast_top10.txt), how many HSPs (lines) have an E value that is less than 1e-30 *or* have an identity value of greater than 50%? Use `awk`, `wc`, and `grep` if needed to compute the answer. +1. In the file [pz_blastx_yeast_top10.txt](data/pz_blastx_yeast_top10.txt), how many HSPs (lines) have an *E* value that is less than 1e-30 *or* have an identity value of greater than 50%? Use `awk`, `wc`, and `grep` if needed to compute the answer. -2. The file [contig_stats.txt](data/contig_stats.txt) describes statistics for contigs from a de novo genome assembly (a contig is an assembled “piece” of the genome). The fourth column describes the GC content of the various contigs. Other analyses indicate that the majority of correctly assembled genes have an average coverage (second column) of between 80.0 and 150.0. Use `awk` to determine the average GC content for contigs with coverage between 80.0 and 150.0. Then use another invocation of `awk` to determine the average GC content for all other contigs. (Do not count the header line in the file in your computations.) +2. The file [contig_stats.txt](data/contig_stats.txt) describes statistics for contigs from a *de novo* genome assembly (a contig is an assembled "piece" of the genome). The fourth column describes the GC content of the various contigs. Other analyses indicate that the majority of correctly assembled genes have an average coverage (second column) of between 80.0 and 150.0. Use `awk` to determine the average GC content for contigs with coverage between 80.0 and 150.0. Then use another invocation of `awk` to determine the average GC content for all other contigs. (Do not count the header line in the file in your computations.) 3. The file [PZ.annot.txt](data/PZ.annot.txt) is the result of a gene ontology (GO) analysis for the full set of assembled *Papilio zelicaon* cDNA sequences. Owing to the incomplete nature of the annotation process, not all sequences were assigned GO terms. How many different sequence IDs are represented in this file? -4. Some versions of the sort program can sort lines in “random” order by using the `-R` flag. Rather than using this flag, however, use `grep`, `awk` (with the `rand()` feature), `sort` (without the `-R` flag), and `head` to select five random IDs from [pz_cDNAs.fasta](data/pz_cDNAs.fasta). An example output might look like:
+4. Some versions of the sort program can sort lines in "random" order by using the `-R` flag. Rather than using this flag, however, use `grep`, `awk` (with the `rand()` feature), `sort` (without the `-R` flag), and `head` to select five random IDs from [pz_cDNAs.fasta](data/pz_cDNAs.fasta). An example output might look like:
-
The same command should produce a different list of five IDs each time it is run. + + The same command should produce a different list of five IDs each time it is run.
\ No newline at end of file diff --git a/part1-12-patterns.Rmd b/part1-12-patterns.Rmd index 3e6e507..b30d9f9 100644 --- a/part1-12-patterns.Rmd +++ b/part1-12-patterns.Rmd @@ -2,7 +2,7 @@ In previous chapters, we used a simple [`fasta_stats`](data/fasta_stats.py) program to perform basic analyses on a FASTA file called [`pz_cDNAs.fasta`](data/pz_cDNAs.fasta), mostly as an excuse to learn about the standard streams and tools like `grep` and `sort`. It turns out that the information in the [`pz_cDNAs.fasta`](data/pz_cDNAs.fasta) file provides us with many potential questions to ponder. -The sequences in this file are actually a subset of putative transcripts, produced from a de novo transcriptome assembly for the butterfly *Papilio zelicaon*. Each sequence header line encodes a variety of information: the second and third columns of the header lines reveal the number of reads contributing to each assembled sequence and the average coverage of the sequence (defined as the total number of bases contributed by reads, divided by the assembled sequence length). Even the sequence IDs encode some information: they all start with a pseudorandom identifier, but some have a suffix like `_TY`. +The sequences in this file are actually a subset of putative transcripts, produced from a *de novo* transcriptome assembly for the butterfly *Papilio zelicaon*. Each sequence header line encodes a variety of information: the second and third columns of the header lines reveal the number of reads contributing to each assembled sequence and the average coverage of the sequence (defined as the total number of bases contributed by reads, divided by the assembled sequence length). Even the sequence IDs encode some information: they all start with a pseudorandom identifier, but some have a suffix like `_TY`.
@@ -10,7 +10,7 @@ The sequences in this file are actually a subset of putative transcripts, produc Groups of sequences that share the same `_` suffix were previously identified as having shared matches using a self-BLAST. Sequence IDs without such a suffix had no matches. We might ask: how many sequences are in such a group? This could be easily answered by first using `grep` to extract lines that match `>` (the header lines), and then using another `grep` to extract those with the pattern `_` (those in a group) before sending the result to `wc`. -A more complex question would be to ask how many different groups are represented in the file. If the group information was stored in a separate column (say, the second column), this question could be answered with the same process as above, followed by a `sort -k2,2d -u` to remove duplicate group identifiers. But how can we coerce the group information into its own column? We could do this by *substituting* instances of `_` with spaces. The `sed` (Stream EDitor) tool can help us. Here’s the overall pipeline we’ll use: +A more complex question would be to ask how many different groups are represented in the file. If the group information was stored in a separate column (say, the second column), this question could be answered with the same process as above, followed by a `sort -k2,2d -u` to remove duplicate group identifiers. But how can we coerce the group information into its own column? We could do this by *substituting* instances of `_` with a tab character. The `sed` (**S**tream **ED**itor) tool can help us. Here's the overall pipeline we'll use:
> sed -r 's/_/ /' | \
+> sed -r 's/_/\t/' | \
 > sort -k2,2d -u | \
 > less -S
 
@@ -33,10 +33,10 @@ And here is some of the output, where only sequences in groups are represented a linkable-line-numbers"> ... ->PZ7180000000067 AF nReads=16 cov=12.0608 ->PZ7180000028269 AFW nReads=3 cov=2.97992 ->PZ7180000036480 AIJ nReads=11 cov=4.61416 ->PZ783221 AOC nReads=1 cov=1 +>PZ7180000000067 AF nReads=16 cov=12.0608 +>PZ7180000028269 AFW nReads=3 cov=2.97992 +>PZ7180000036480 AIJ nReads=11 cov=4.61416 +>PZ783221 AOC nReads=1 cov=1 ... @@ -44,23 +44,23 @@ And here is some of the output, where only sequences in groups are represented a The `sed` tool is a sophisticated program for modifying input (either from a file or standard input) and printing the results to standard output: `sed '' ` or` ... | sed ''`. -Like `awk`, `sed` hails from the 1970s and provides a huge variety of powerful features and syntax, only a tiny fraction of which we’ll cover here. In particular, we’ll focus on the `s`, or substitution, operation. +Like `awk`, `sed` hails from the 1970s and provides a huge variety of powerful features and syntax, only a tiny fraction of which we'll cover here. In particular, we'll focus on the `s`, or substitution, operation.
-The `-r` option that we’ve used lets `sed` know that we want our pattern to be specified by “POSIX extended regular expression” syntax.^[POSIX, short for Portable Operating System Interface, defines a base set of standards for programs and operating systems so that different Unix-like operating systems may interoperate.] The general pattern of the program for substitution is `s///g`, where the `g` specifies that, for each line, each instance of the pattern should be replaced. We can alternatively use `1` in this spot to indicate that only the first instance should be replaced, `2` to indicate only the second, and so on. Often, `s///` is used, as it has the same meaning as `s///1`.^[We should also note that the `/` delimiters are the most commonly used, but most characters can be used instead; for example, `s|||g` may make it easier to replace `/` characters. Alternate delimiters are also often used when we end up with a command that looks like a "Picket Fence", for example: `s\/usr/local/bin\/bin\g` becomes `s|/usr/local/bin|/bin|g`] +The `-r` option that we've used lets `sed` know that we want our pattern to be specified by "POSIX extended regular expression" syntax.^[POSIX, short for Portable Operating System Interface, defines a base set of standards for programs and operating systems so that different Unix-like operating systems may interoperate.] The general pattern of the program for substitution is `s///g`, where the `g` specifies that, for each line, each instance of the pattern should be replaced. We can alternatively use `1` in this spot to indicate that only the first instance should be replaced, `2` to indicate only the second, and so on. Often, `s///` is used, as it has the same meaning as `s///1`.^[We should also note that the `/` delimiters are the most commonly used, but most characters can be used instead; for example, `s|||g` may make it easier to replace `/` characters. Alternate delimiters are also often used when we end up with a command that looks like a "Picket Fence", for example: `s\/usr/local/bin\/bin\g` becomes `s|/usr/local/bin|/bin|g`] ### Regular Expressions {-} -The true power of `sed` comes not from its ability to replace text, but from its utility in replacing text based on “patterns” or, more formally, *regular expressions*. A regular expression is a syntax for describing pattern matching in strings. Regular expressions are described by the individual characters that make up the pattern to search for, and “meta-operators” that modify parts of the pattern for flexibility. In `[ch]at`, for example, the brackets function as a meta-operator meaning “one of these characters,” and this pattern matches both `cat` and `hat`, but not `chat`. Regular expressions are often built by chaining smaller expressions, as in `[ch]at on the [mh]at`, matching `cat on the hat`, `cat on the mat`, `hat on the hat`, and `hat on the mat`. +The true power of `sed` comes not from its ability to replace text, but from its utility in replacing text based on "patterns" or, more formally, *regular expressions*. A regular expression is a syntax for describing pattern matching in strings. Regular expressions are described by the individual characters that make up the pattern to search for, and "meta-operators" that modify parts of the pattern for flexibility. In `[ch]at`, for example, the brackets function as a meta-operator meaning "one of these characters," and this pattern matches both `cat` and `hat`, but not `chat`. Regular expressions are often built by chaining smaller expressions, as in `[ch]at on the [mh]at`, matching `cat on the hat`, `cat on the mat`, `hat on the hat`, and `hat on the mat`. -In the example above, the entire pattern was specified by `_`, which is not a meta-operator of any kind, and so each instance of `_` was replaced by the replacement (a space character). The meta-operators that are supported by regular expressions are many and varied, but here’s a basic list along with some biologically inspired examples: +In the example above, the entire pattern was specified by `_`, which is not a meta-operator of any kind, and so each instance of `_` was replaced by the replacement (a tab character). The meta-operators that are supported by regular expressions are many and varied, but here's a basic list along with some biologically inspired examples:
  • non-meta-operator characters or strings
  • -
    • Most characters that don’t operate in a meta-fashion are simply matched. For example, `_` matches `_`, `A` matches `A`, and `ATG` matches a start codon. (In fact, `ATG` is three individual patterns specified in a row.) When in doubt, it is usually safe to escape a character (by prefixing it with a backslash) to ensure it is interpreted literally. For example, `\[_\]` matches the literal string `[_]`, rather than making use of the brackets as meta-operators.
    +
    • Most characters that don't operate in a meta-fashion are simply matched. For example, `_` matches `_`, `A` matches `A`, and `ATG` matches a start codon. (In fact, `ATG` is three individual patterns specified in a row.) When in doubt, it is usually safe to escape a character (by prefixing it with a backslash) to ensure it is interpreted literally. For example, `\[_\]` matches the literal string `[_]`, rather than making use of the brackets as meta-operators.
  • `.`
    • A period matches any single character. For example, `CC.` matches any P codon (`CCA`, `CCT`, `CCG`, `CCC`), but also strings like `CCX` and `CC%`.
  • `[]`
  • @@ -73,27 +73,29 @@ In the example above, the entire pattern was specified by `_`, which is not a me
    • Similar to `^`, but `$` matches the end of the string or line. So, `sed -r 's/ATG$/XXX/g'` replaces all start codons that exist at the end of their respective lines.
-So far our patterns aren’t really all that flexible, because most of the pieces covered to this point match a single character. The next five meta-operators resolve that limitation. +So far our patterns aren't really all that flexible, because most of the pieces covered to this point match a single character. The next five meta-operators resolve that limitation.
  • `{x,y}`
  • -
    • Modifies the preceding pattern so that it matches if it occurs between `x` and `y` times in a row, inclusive. For example, `[GC]{4,8}` matches any string of C’s and/or G’s that is four to eight characters long (shooting for eight characters, if possible). So, `sed -r 's/[GC]{4,8}/_X_/g'` would result in the following substitutions:
      +
      • Modifies the preceding pattern so that it matches if it occurs between `x` and `y` times in a row, inclusive. For example, `[GC]{4,8}` matches any string of C's and/or G's that is four to eight characters long (shooting for eight characters, if possible). So, `sed -r 's/[GC]{4,8}/_X_/g'` would result in the following substitutions:
        `ATCCGTCT` to `ATCCGTCT` (no replacement)
        `ATCCGCGGCTC` to `AT_X_TC`
        `ATCGCGCGGCCCGTTCGGGCCT` to `AT_X_CCGTT_X_T`
        -Using `{0,1}` has the effect of making what it follows optional in the pattern, and `{x,}` has the effect of allowing the pattern to match x or more times with no upper limit.
      +Using `{0,1}` has the effect of making what it follows optional in the pattern, and `{x,}` has the effect of allowing the pattern to match x or more times with no upper limit. + +Note that when `x` is equal to `y`, it is redundant to specify both, so we can simplify, i.e., instead of `{3,3}`, we can simply use `{3}`.
  • `*`
    • An asterisk modifies the preceding pattern so that it matches if it occurs zero or more times; thus it is equivalent to `{0,}`.

      The usage of `*` deserves a detailed example. Consider the pattern `ATG[ATGC]*TGA`, where `ATG` is the pattern for a start codon, `[ATGC]*` indicates zero or more DNA bases in a row, and `TGA` is one of the canonical stop codons. This pattern matches `ATGTACCTTGA`, and also matches `ATGTGA` (where the middle part has been matched zero times).
  • `+`
    • The most prominent repetition modifier, a plus sign modifies the preceding pattern so that it is matched one or more times; it is equivalent to `{1,}`. In contrast with the example above, `ATG[ATGC]+TGA` matches `ATGTACCTTGA` and `ATGCTGA`, but not `ATGTGA`.
  • `()`
  • -
    • Parentheses may be used to group an expression or series of expressions into a single unit so that they may be operated on together. Because `AT` is the pattern `A` followed by `T`, for example, `AT+` matches `AT`, `ATT`, `ATTT`, and so on. If we wanted to instead match `AT` repeats, we might wish to specify a pattern like `(AT)+`, which matches `AT`, `ATAT`, `ATATAT`, and so on. Parentheses also “save” the string that was matched within them for later use. This is known as back-referencing, discussed below.
    +
    • Parentheses may be used to group an expression or series of expressions into a single unit so that they may be operated on together. Because `AT` is the pattern `A` followed by `T`, for example, `AT+` matches `AT`, `ATT`, `ATTT`, and so on. If we wanted to instead match `AT` repeats, we might wish to specify a pattern like `(AT)+`, which matches `AT`, `ATAT`, `ATATAT`, and so on. Parentheses also "save" the string that was matched within them for later use. This is known as back-referencing, discussed below.
  • `|`
  • -
    • Match either the pattern `` or the pattern` `. Multiple such patterns or operations can be chained; for example, `TAA|TAG|TGA` matches any one of the three canonical stop codons. This example is a bit ambiguous, though: does this pattern read “TA (A or T) A (G or T) GA,” or “TAA or TAG or TGA”? To make it concrete, we’d probably want to specify it as `((TAA)|(TAG)|(TGA))`.
    +
    • Match either the pattern `` or the pattern` `. Multiple such patterns or operations can be chained; for example, `TAA|TAG|TGA` matches any one of the three canonical stop codons. This example is a bit ambiguous, though: does this pattern read "TA (A or T) A (G or T) GA," or "TAA or TAG or TGA"? To make it concrete, we'd probably want to specify it as `((TAA)|(TAG)|(TGA))`.
-Using these pieces, we can put together a regular expression that serves as a simple (and not actually useful in practice) open reading frame finder. For prokaryotic sequences (where introns are not a consideration), we’ll define these as a start codon `ATG`, followed by one or more codons, followed by one of the three canonical stop codons `TAA`, `TAG`, or `TGA`. The pattern for the start is `ATG`, and we’ve seen how we can encode a stop above, with `((TAA)|(TAG)|(TGA))`. How about “one or more codons?” Well, “one or more” is embodied in the `+` operator, and a codon is any three A’s, T’s, C’s, or G’s. So, “one or more codons” is encoded as (`[ACTG]{3,3})+`. Thus the regular expression for our simple open reading frame finder is: +Using these pieces, we can put together a regular expression that serves as a simple (and not actually useful in practice) open reading frame finder. For prokaryotic sequences (where introns are not a consideration), we'll define these as a start codon `ATG`, followed by one or more codons, followed by one of the three canonical stop codons `TAA`, `TAG`, or `TGA`. The pattern for the start is `ATG`, and we've seen how we can encode a stop above, with `((TAA)|(TAG)|(TGA))`. How about "one or more codons"? Well, "one or more" is embodied in the `+` operator, and a codon is any three A's, T's, C's, or G's. So, "one or more codons" is encoded as `([ACTG]{3,3})+`. Thus the regular expression for our simple open reading frame finder is:
@@ -119,13 +121,13 @@ Notice that the string `TAG` is both a type of codon in general (`[ACTG]{3,3}`) T_ORF_CTTTTAG -The regular expression syntax used by `sed` is similar to the syntax used in languages such as Perl, Python, and R. In fact, all of the examples we’ve seen so far would work the same in those languages (though they are applied by their own specific functions rather than a call to `sed`). One helpful feature provided by more modern regular expression engines like these is that operators like `*` and `+` can be made nongreedy (though I prefer the clearer term “reluctant”) by following them with a question mark. In Python, the regular expression `ATG([ACTG]{3,3})+?((TAA)|(TAG)|(TGA))` would match the second option. (When not following a `*`, or `+`, it makes the previous optional; thus `TG(T)?CC` is equivalent to `TG(T){0,1}CC`.) More sophisticated features allow the user to access all the matches of a pattern, even if they overlap, so that the most satisfying one can be pulled out by some secondary criteria. Unfortunately, `sed` does not support nongreedy matching and several other advanced regular expression features. +The regular expression syntax used by `sed` is similar to the syntax used in languages such as Perl, Python, and R. In fact, all of the examples we've seen so far would work the same in those languages (though they are applied by their own specific functions rather than a call to `sed`). One helpful feature provided by more modern regular expression engines like these is that operators like `*` and `+` can be made nongreedy (though I prefer the clearer term "reluctant") by following them with a question mark. In Python, the regular expression `ATG([ACTG]{3,3})+?((TAA)|(TAG)|(TGA))` would match the second option. (When not following a `*`, or `+`, it makes the previous optional; thus `TG(T)?CC` is equivalent to `TG(T){0,1}CC`.) More sophisticated features allow the user to access all the matches of a pattern, even if they overlap, so that the most satisfying one can be pulled out by some secondary criteria. Unfortunately, `sed` does not support nongreedy matching and several other advanced regular expression features. ### Character Classes and Regular Expressions in Other Tools {-} -We often wish to use charset brackets to match any one of a “class” of characters; for example, `[0123456789]` matches any single digit. Most regular expression syntaxes (including that used by `sed`) allow a shorthand version `[0-9]` (if we wanted to match only a 0, 9, or -, we could use `[09-]`). Similarly, `[a-z]` matches any single lowercase letter, and `[A-Z]` any uppercase letter. These can even be combined: `[A-Za-z0-9]` matches any digit or letter. In the POSIX extended syntax used by `sed`, `0-9` can also be specified as `[:digit:]`. Notice the lack of brackets in the former—to actually match any single digit, the regular expression is `[[:digit:]]` (which, yes, is annoying). To match any nondigit, we can negate the bracketed set as `[^[:digit:]]`. +We often wish to use charset brackets to match any one of a "class" of characters; for example, `[0123456789]` matches any single digit. Most regular expression syntaxes (including that used by `sed`) allow a shorthand version `[0-9]` (if we wanted to match only a 0, 9, or -, we could use `[09-]`). Similarly, `[a-z]` matches any single lowercase letter, and `[A-Z]` any uppercase letter. These can even be combined: `[A-Za-z0-9]` matches any digit or letter. In the POSIX extended syntax used by `sed`, `0-9` can also be specified as `[:digit:]`. Notice the lack of brackets in the former — to actually match any single digit, the regular expression is `[[:digit:]]` (which, yes, is annoying). To match any nondigit, we can negate the bracketed set as `[^[:digit:]]`. -These POSIX character classes are especially useful when we want to match character types that are difficult to type or enumerate. In particular, `[[:space:]]` matches one of any [whitespace](#whitespace) character (spaces, tabs, newlines), and `[[:punct:]]` matches any “punctuation” character, of which there are quite a few. The `[[:space:]]` character class is particularly helpful when you are reformatting data stored in rows and columns but are not sure whether the column separators are spaces, tabs, or some combination. +These POSIX character classes are especially useful when we want to match character types that are difficult to type or enumerate. In particular, `[[:space:]]` matches one of any [whitespace](#whitespace) character (spaces, tabs, newlines), and `[[:punct:]]` matches any "punctuation" character, of which there are quite a few. The `[[:space:]]` character class is particularly helpful when you are reformatting data stored in rows and columns but are not sure whether the column separators are spaces, tabs, or some combination. In many regular expression syntaxes (including those used by Perl, Python, R, and some versions of `sed`), even shorter shortcuts for character classes are available. In these, `\d` is equivalent to `[[:digit:]]`, `\D` is equivalent to `[^[:digit:]]`, `\s` for `[[:space:]]`, `\S` for `[^[:space:]]`, among others. @@ -164,7 +166,7 @@ If we send these results through `wc`, we see that this file contains 471 header     471    1413   15279 -All of the headers matched the pattern we expected. What if they hadn’t? We could inspect which ones didn’t by using a `grep -v -E` to print the lines that didn’t match the pattern. +All of the headers matched the pattern we expected. What if they hadn't? We could inspect which ones didn't by using a `grep -v -E` to print the lines that didn't match the pattern. Now, hypothetically, suppose a (stubborn, and more senior) colleague has identified a list of important gene IDs, and has sent them along in a simple text file. @@ -182,11 +184,11 @@ PZ15501-gene_A ... -Unfortunately, it looks like our colleague has decided to use a slightly altered naming scheme, appending `-gene` to the end of each pseudorandom identifier, before the `_`, if it is present. In order to continue with peaceful collaborations, it might behoove us to modify our sequence file so that it corresponds to this scheme. We can do this with `sed`, but it will be a challenge, primarily because we want to perform an insertion, rather than a substitution. Actually, we’ll be performing a substitution, but we’ll be substituting matches with contents from themselves! +Unfortunately, it looks like our colleague has decided to use a slightly altered naming scheme, appending `-gene` to the end of each pseudorandom identifier, before the `_`, if it is present. In order to continue with peaceful collaborations, it might behoove us to modify our sequence file so that it corresponds to this scheme. We can do this with `sed`, but it will be a challenge, primarily because we want to perform an insertion, rather than a substitution. Actually, we'll be performing a substitution, but we'll be substituting matches with contents from themselves! Regarding back-references, in a regular expression, matches or submatches that are grouped and enclosed in parentheses have their matching strings saved in variables `\1`, `\2`, and so on. The contents of the first pair of parentheses is saved in `\1`, the second in `\2` (some experimentation may be needed to identify where nested parentheses matches are saved). The entire expression match is saved in `\0`. -To complete the example, we’ll modify the pattern used in the grep to capture both relevant parts of the pattern, replacing them with `\1-gene\2`. +To complete the example, we'll modify the pattern used in the grep to capture both relevant parts of the pattern, replacing them with `\1-gene\2`.
@@ -219,25 +221,25 @@ TGAAGAGTTCCTGAAAAAATTCCCTGCGGGCAAAGTACCGGCTTATGAAAGTGCTGATGGAAAAGTGGTG ... -Back-references may be used within the pattern itself as well. For example, a `sed -r 's/([A-Za-z]+) \1/\1/g'` would replace “doubled” words `([A-Za-z]+) \1` with a single copy of the word `\1`, as in `I like sed very very much`, resulting in `I like sed very much`. But beware if you are thinking of using substitution of this kind as a grammar checker, because this syntax does not search across line boundaries (although more complex `sed` programs can). This example would not modify the following pair of lines (where the word `the` appears twice): +Back-references may be used within the pattern itself as well. For example, a `sed -r 's/([A-Za-z]+) \1/\1/g'` would replace "doubled" words `([A-Za-z]+) \1` with a single copy of the word `\1`, as in `I like sed very very much`, resulting in `I like sed very much`. But beware if you are thinking of using substitution of this kind as a grammar checker, because this syntax does not search across line boundaries (although more complex `sed` programs can). This example would not modify the following pair of lines (where the word `the` appears twice):
The quick sed regex modifies the
the lazy awk output.
A few final notes about sed and regular expressions in general will help conclude this chapter. -
  1. Regular expressions, while powerful, lend themselves to mistakes. Work incrementally, and regularly check the results.
  2. -
  3. It is often easier to use multiple invocations of regular expressions (e.g., with multiple `sed` commands) rather than attempt to create a single complex expression.
  4. -
  5. Use regular expressions where appropriate, but know that they are not always appropriate. Many problems that might seem a natural fit for regular expressions are also naturally fit by other strategies, which should be taken into consideration given the complexity that regular expressions can add to a given command or piece of code.
+1. Regular expressions, while powerful, lend themselves to mistakes. Work incrementally, and regularly check the results. +2. It is often easier to use multiple invocations of regular expressions (e.g., with multiple `sed` commands) rather than attempt to create a single complex expression. +3. Use regular expressions where appropriate, but know that they are not always appropriate. Many problems that might seem a natural fit for regular expressions are also naturally fit by other strategies, which should be taken into consideration given the complexity that regular expressions can add to a given command or piece of code. -

Some people, when confronted with a problem, think, “I know, I’ll use regular expressions.” Now they have two problems.

+

Some people, when confronted with a problem, think, "I know, I'll use regular expressions." Now they have two problems.

~Jamie Zawinski

#### Exercises {-} -1. In the de novo assembly statistics file [`contig_stats.txt`](data/contig_stats.txt), the contig IDs are named as `NODE_1`, `NODE_2`, and so on. We’d prefer them to be named `contig1`, `contig2`, and the like. Produce `contig_stats_renamed.txt` with these changes performed. +1. In the *de novo* assembly statistics file [`contig_stats.txt`](data/contig_stats.txt), the contig IDs are named as `NODE_1`, `NODE_2`, and so on. We'd prefer them to be named `contig1`, `contig2`, and the like. Produce `contig_stats_renamed.txt` with these changes performed. 2. How many sequences in the file [`pz_cDNAs.fasta`](data/pz_cDNAs.fasta) are composed of only one read? You will likely need to use both `awk` and `sed` here, and be sure to carefully check the results of your pipeline with `less`. diff --git a/part3-12-plotting-data-and-ggplot2.Rmd b/part3-12-plotting-data-and-ggplot2.Rmd index ae4ad03..a3b2747 100644 --- a/part3-12-plotting-data-and-ggplot2.Rmd +++ b/part3-12-plotting-data-and-ggplot2.Rmd @@ -511,7 +511,7 @@ The `cut()` can take a `labels =` parameter as well, for cases when the default Each aesthetic used in a plot is associated with a scale, whether it be the `x` and `y` or even `color` or `size`. For faceted plots, usually any scale adjustments are shared across facets, and for each layer in a plot, all scale properties must be shared as well. The different types of scales — continuous for `x` and `y`, color values for `color`, sizes for `size` — can all be modified to change the scale name, range, locations of breaks (or tick-marks) and labels of the breaks. There's more to say about scales than can be covered in this book, but we'll try to hit the high points. -Rather than continue plotting the diamonds data set, let's load up a data frame more biologically relevant. This table, stored in a file called [`contig_stats.txt`](data/contig_stats.txt), summarizes sequence statistics from a de novo genome assembly. Each row represents a "contig" from the assembly (a single sequence meant to represent a contiguous piece of the genome sequence produced from many overlapping sequenced pieces). +Rather than continue plotting the diamonds data set, let's load up a data frame more biologically relevant. This table, stored in a file called [`contig_stats.txt`](data/contig_stats.txt), summarizes sequence statistics from a *de novo* genome assembly. Each row represents a "contig" from the assembly (a single sequence meant to represent a contiguous piece of the genome sequence produced from many overlapping sequenced pieces).