From 18ad30dbffbc7db174f48e77b73dd70c3e13c189 Mon Sep 17 00:00:00 2001 From: rakesh-nori <20rakeshn@students.harker.org> Date: Tue, 23 Jul 2019 19:06:49 -0400 Subject: [PATCH] Following changes to get rid of Canonicalization This is a rough way to get rid of canonicalization in squeakr, but I suggest that you add an option for noncanonical counting that can get rid of the reverse complement steps in the count.cc script when chosen. --- src/count.cc | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/src/count.cc b/src/count.cc index a4d5288..e8f55ca 100644 --- a/src/count.cc +++ b/src/count.cc @@ -96,7 +96,6 @@ bool reads_to_kmers(chunk &c, flush_object *obj) goto next_read; { __int128_t first = 0; - __int128_t first_rev = 0; __int128_t item = 0; for(uint32_t i = 0; i < obj->ksize; i++) { //First kmer uint8_t curr = Kmer::map_base(read[i]); @@ -111,12 +110,8 @@ bool reads_to_kmers(chunk &c, flush_object *obj) first = first << 2; } first = first >> 2; - first_rev = Kmer::reverse_complement(first, obj->ksize); - if (Kmer::compare_kmers(first, first_rev)) - item = first; - else - item = first_rev; + item = first; /* * first try and insert in the main QF. @@ -140,7 +135,6 @@ bool reads_to_kmers(chunk &c, flush_object *obj) } uint64_t next = (first << 2) & BITMASK(2 * obj->ksize); - uint64_t next_rev = first_rev >> 2; for(uint32_t i = obj->ksize; i < read.length(); i++) { //next kmers uint8_t curr = Kmer::map_base(read[i]); @@ -152,13 +146,7 @@ bool reads_to_kmers(chunk &c, flush_object *obj) goto start_read; } next |= curr; - uint64_t tmp = Kmer::reverse_complement_base(curr); - tmp <<= (obj->ksize * 2 - 2); - next_rev = next_rev | tmp; - if (Kmer::compare_kmers(next, next_rev)) - item = next; - else - item = next_rev; + item = next; /* * first try and insert in the main QF. @@ -182,7 +170,6 @@ bool reads_to_kmers(chunk &c, flush_object *obj) } next = (next << 2) & BITMASK(2*obj->ksize); - next_rev = next_rev >> 2; } }