diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index c0b0208..2daeb6f 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -31,4 +31,14 @@ jobs: - name: Build run: cargo build --verbose --features serde - name: Run tests - run: cargo test --verbose --features serde \ No newline at end of file + run: cargo test --verbose --features serde + + docs: + runs-on: ubuntu-latest + env: + RUSTFLAGS: -C target-cpu=x86-64 + RUSTDOCFLAGS: -C target-cpu=x86-64 + steps: + - uses: actions/checkout@v4 + - name: Docs + run: cargo doc --verbose --all-features \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index 371da6e..7798dd4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "vers-vecs" -version = "1.7.0" +version = "1.8.1" edition = "2021" authors = ["Johannes \"Cydhra\" Hengstler"] description = "A collection of succinct data structures supported by fast implementations of rank and select queries." diff --git a/benches/bp.rs b/benches/bp.rs index c278694..e88c0de 100644 --- a/benches/bp.rs +++ b/benches/bp.rs @@ -11,7 +11,7 @@ use vers_vecs::trees::{Tree, TreeBuilder}; mod common; -const BLOCK_SIZE: usize = 1024; +const BLOCK_SIZE: u64 = 1024; // TODO this function has nlogn runtime, which is a bit too much for the largest trees fn generate_tree(rng: &mut R, nodes: u64) -> BpTree { @@ -107,7 +107,7 @@ fn bench_navigation(b: &mut Criterion) { let mut rng = StdRng::from_seed([0; 32]); let bp = generate_tree(&mut rng, l as u64); - let node_handles = (0..l).map(|i| bp.node_handle(i)).collect::>(); + let node_handles = (0..l as u64).map(|i| bp.node_handle(i)).collect::>(); group.bench_with_input(BenchmarkId::new("parent", l), &l, |b, _| { b.iter_batched( diff --git a/benches/elias_fano_iterator.rs b/benches/elias_fano_iterator.rs index 774ec87..ad939ae 100644 --- a/benches/elias_fano_iterator.rs +++ b/benches/elias_fano_iterator.rs @@ -29,7 +29,7 @@ fn bench_ef(b: &mut Criterion) { let start = Instant::now(); while i < iters { - black_box(ef_vec.get_unchecked(i as usize % l)); + black_box(ef_vec.get_unchecked(i % l as u64)); i += 1; } time += start.elapsed(); diff --git a/benches/rmq.rs b/benches/rmq.rs index a9506c6..d70da24 100644 --- a/benches/rmq.rs +++ b/benches/rmq.rs @@ -1,7 +1,7 @@ use criterion::{black_box, criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion}; use rand::distributions::{Distribution, Uniform}; use rand::Rng; -use vers_vecs::rmq::fast_rmq::FastRmq; +use vers_vecs::rmq::small::SmallRmq; mod common; @@ -12,7 +12,7 @@ fn bench_rmq(b: &mut Criterion) { group.plot_config(common::plot_config()); for l in common::SIZES { - let rmq = FastRmq::from_vec(common::fill_random_vec(&mut rng, l)); + let rmq = SmallRmq::from_vec(common::fill_random_vec(&mut rng, l)); let sample = Uniform::new(0, rmq.len()); group.bench_with_input(BenchmarkId::new("range_min", l), &l, |b, _| { b.iter_batched( diff --git a/benches/select_adversarial.rs b/benches/select_adversarial.rs index f70be47..070e90e 100644 --- a/benches/select_adversarial.rs +++ b/benches/select_adversarial.rs @@ -35,7 +35,7 @@ fn select_worst_case(b: &mut Criterion) { // construct a vector with only one select block and put its last one bit at the end // of the vector - let mut bit_vec = BitVec::with_capacity(length / 64); + let mut bit_vec = BitVec::with_capacity(length as u64 / 64); for _ in 0..(1usize << 13) / 64 - 1 { bit_vec.append_word(u64::MAX); } diff --git a/benches/select_iter.rs b/benches/select_iter.rs index 73be7d7..595838e 100644 --- a/benches/select_iter.rs +++ b/benches/select_iter.rs @@ -15,11 +15,11 @@ fn bench_select_iter(b: &mut Criterion) { group.bench_with_input(BenchmarkId::new("select queries", l), &l, |b, _| { b.iter_custom(|iters| { let mut time = Duration::new(0, 0); - let mut i = 0usize; + let mut i = 0; let rank1 = bit_vec.rank1(bit_vec.len()); let start = Instant::now(); - while (i as u64) < iters { + while (i) < iters { black_box(bit_vec.select1(i % rank1)); i += 1; } diff --git a/benches/sparse_equals.rs b/benches/sparse_equals.rs index 9119652..7438fc1 100644 --- a/benches/sparse_equals.rs +++ b/benches/sparse_equals.rs @@ -22,14 +22,14 @@ pub const SIZES: [usize; 7] = [ const FILL_FACTORS: [f64; 6] = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5]; /// Generate a bitvector with `fill_factors` percent ones at random positions -fn generate_vector_with_fill(rng: &mut ThreadRng, len: usize, fill_factor: f64) -> BitVec { +fn generate_vector_with_fill(rng: &mut ThreadRng, len: u64, fill_factor: f64) -> BitVec { let mut bit_vec1 = BitVec::from_zeros(len); // flip exactly fill-factor * len bits so the equality check is not trivial - sample(rng, len, (fill_factor * len as f64) as usize) + sample(rng, len as usize, (fill_factor * len as f64) as usize) .iter() .for_each(|i| { - bit_vec1.flip_bit(i); + bit_vec1.flip_bit(i as u64); }); bit_vec1 @@ -39,6 +39,7 @@ fn bench(b: &mut Criterion) { let mut rng = rand::thread_rng(); for len in SIZES { + let len = len as u64; let mut group = b.benchmark_group(format!("Equals Benchmark: {}", len)); group.plot_config(common::plot_config()); diff --git a/migrate.md b/migrate.md new file mode 100644 index 0000000..e9b1160 --- /dev/null +++ b/migrate.md @@ -0,0 +1,29 @@ +# Migration Guide from 1.X to 2.0 +The following guide explains the changes from versions 1.X to the 2.0 release and points out what changes are necessary +to downstream crates. + +## Renamed Members +The following structures and functions were renamed +- `BitVec::from_bit_vector` to `BitVec::from_bit_vec` +- `SparseRSVec` to `SparseRsVec` +- `FastRmq` to `SmallRmq` +- `BinaryRmq` to `SparseRmq` +- `BitVec::from_bits` to `BitVec::from_bits_u8` +- module `fast_rs_vec` to `rs` +- module `elias_fano` to `ef` +- module `fast_rmq` to `small` +- module `binary_rmq` to `sparse` + +## Changed Index Type +All vector types that operate on bits or sub-byte words are now indexed by `u64` instead of `usize`, +allowing full utilization of the memory in 32-bit architectures. +This affects `BitVec`, `RsVec`, `EliasFano`, `SparseRsVec`, `BpTree`, and `WaveletMatrix` +This changes the parameter and return types of various functions on the affected types from `usize` to `u64`. +The only adverse effect is that `len()` and `count()` of iterators over these data structures may panic if the +iterator has more than `usize::MAX` elements. + +## Changed Backing Structures +`RsVec`, `SparseRmq`, and `FastRmq` now use `Box<[_]>` instead of `Vec<_>` as backing structs, which reduces the stack +footprint. +This breaks the serde-compatibility with already serialized data. +It also changes the `Deref` implementation of the RMQ structs, which previously returned `Vec<_>`. \ No newline at end of file diff --git a/readme.md b/readme.md index ea64ed1..d21edac 100644 --- a/readme.md +++ b/readme.md @@ -31,8 +31,6 @@ since the intrinsics speed up both `rank` and `select` operations by a factor of - `simd`: Enables the use of SIMD instructions for rank and select operations. This feature requires AVX-512 support and uses unsafe code. It also enables a special iterator for the rank/select bit vector that uses vectorized operations. -The feature only works on nightly Rust. -Enabling it on stable Rust is a no-op, because the required CPU features are not available there. - `serde`: Enables serialization and deserialization of the data structures using the `serde` crate. - `u16_lookup` Enables a larger lookup table for BP tree queries. The larger table requires 128 KiB instead of 4 KiB. diff --git a/src/bit_vec/mask.rs b/src/bit_vec/mask.rs index a146b24..a8be369 100644 --- a/src/bit_vec/mask.rs +++ b/src/bit_vec/mask.rs @@ -48,7 +48,7 @@ where /// If the position is larger than the length of the vector, None is returned. #[inline] #[must_use] - pub fn get(&self, pos: usize) -> Option { + pub fn get(&self, pos: u64) -> Option { if pos >= self.vec.len { None } else { @@ -67,10 +67,10 @@ where /// [`get`]: MaskedBitVec::get #[inline] #[must_use] - pub fn get_unchecked(&self, pos: usize) -> u64 { + pub fn get_unchecked(&self, pos: u64) -> u64 { ((self.bin_op)( - self.vec.data[pos / WORD_SIZE], - self.mask.data[pos / WORD_SIZE], + self.vec.data[(pos / WORD_SIZE) as usize], + self.mask.data[(pos / WORD_SIZE) as usize], ) >> (pos % WORD_SIZE)) & 1 } @@ -79,7 +79,7 @@ where /// If the position is larger than the length of the vector, None is returned. #[inline] #[must_use] - pub fn is_bit_set(&self, pos: usize) -> Option { + pub fn is_bit_set(&self, pos: u64) -> Option { if pos >= self.vec.len { None } else { @@ -97,7 +97,7 @@ where /// [`is_bit_set`]: MaskedBitVec::is_bit_set #[inline] #[must_use] - pub fn is_bit_set_unchecked(&self, pos: usize) -> bool { + pub fn is_bit_set_unchecked(&self, pos: u64) -> bool { self.get_unchecked(pos) != 0 } @@ -108,7 +108,7 @@ where /// If the length of the query is larger than 64, None is returned. #[inline] #[must_use] - pub fn get_bits(&self, pos: usize, len: usize) -> Option { + pub fn get_bits(&self, pos: u64, len: u64) -> Option { if len > WORD_SIZE || len == 0 { return None; } @@ -138,12 +138,13 @@ where #[must_use] #[allow(clippy::inline_always)] #[allow(clippy::comparison_chain)] // rust-clippy #5354 + #[allow(clippy::cast_possible_truncation)] // safe due to the division #[inline] - pub fn get_bits_unchecked(&self, pos: usize, len: usize) -> u64 { + pub fn get_bits_unchecked(&self, pos: u64, len: u64) -> u64 { debug_assert!(len <= WORD_SIZE); let partial_word = (self.bin_op)( - self.vec.data[pos / WORD_SIZE], - self.mask.data[pos / WORD_SIZE], + self.vec.data[(pos / WORD_SIZE) as usize], + self.mask.data[(pos / WORD_SIZE) as usize], ) >> (pos % WORD_SIZE); if pos % WORD_SIZE + len == WORD_SIZE { @@ -152,8 +153,8 @@ where partial_word & ((1 << (len % WORD_SIZE)) - 1) } else { let next_half = (self.bin_op)( - self.vec.data[pos / WORD_SIZE + 1], - self.mask.data[pos / WORD_SIZE + 1], + self.vec.data[(pos / WORD_SIZE + 1) as usize], + self.mask.data[(pos / WORD_SIZE + 1) as usize], ) << (WORD_SIZE - pos % WORD_SIZE); (partial_word | next_half) & ((1 << (len % WORD_SIZE)) - 1) @@ -167,7 +168,7 @@ where #[inline] #[must_use] pub fn count_zeros(&self) -> u64 { - self.vec.len as u64 - self.count_ones() + self.vec.len - self.count_ones() } /// Return the number of ones in the masked bit vector. @@ -177,10 +178,10 @@ where pub fn count_ones(&self) -> u64 { let mut ones = self .iter_limbs() - .take(self.vec.len / WORD_SIZE) + .take((self.vec.len / WORD_SIZE) as usize) .map(|limb| u64::from(limb.count_ones())) .sum(); - if self.vec.len % WORD_SIZE > 0 { + if !self.vec.len.is_multiple_of(WORD_SIZE) { ones += u64::from( ((self.bin_op)( *self.vec.data.last().unwrap(), diff --git a/src/bit_vec/mod.rs b/src/bit_vec/mod.rs index 056091e..9c7fa73 100644 --- a/src/bit_vec/mod.rs +++ b/src/bit_vec/mod.rs @@ -7,14 +7,14 @@ use std::cmp::min; use std::hash::{Hash, Hasher}; use std::mem::size_of; -pub mod fast_rs_vec; +pub mod rs; pub mod sparse; pub mod mask; /// Size of a word in bitvectors. All vectors operate on 64-bit words. -const WORD_SIZE: usize = 64; +const WORD_SIZE: u64 = 64; /// Type alias for masked bitvectors that implement a simple bitwise binary operation. /// The first lifetime is for the bit vector that is being masked, the second lifetime is for the @@ -29,7 +29,7 @@ pub type BitMask<'s, 'b> = MaskedBitVec<'s, 'b, fn(u64, u64) -> u64>; /// The bit vector has a wide range of constructors that allow for easy creation from various /// sources. /// Among them are constructors for creating an empty vector ([`BitVec::new`]), -/// creating one from single bits of various integer types ([`BitVec::from_bits`] and variations), +/// creating one from single bits of various integer types ([`BitVec::from_bits_u8`] and variations), /// creating limbs from u64 values directly ([`BitVec::from_limbs`] and variations), /// or packing a sequence of numerical values into a dense bit sequence /// ([`BitVec::pack_sequence_u64`] and variations). @@ -60,7 +60,7 @@ pub type BitMask<'s, 'b> = MaskedBitVec<'s, 'b, fn(u64, u64) -> u64>; #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct BitVec { data: Vec, - len: usize, + len: u64, } impl BitVec { @@ -75,9 +75,10 @@ impl BitVec { /// The bit vector will be able to hold at least `capacity` bits without reallocating. /// More memory may be allocated according to the underlying allocation strategy. #[must_use] - pub fn with_capacity(capacity: usize) -> Self { + pub fn with_capacity(capacity: u64) -> Self { Self { - data: Vec::with_capacity(capacity / WORD_SIZE + 1), + #[allow(clippy::cast_possible_truncation)] // safe due to the division + data: Vec::with_capacity((capacity / WORD_SIZE + 1) as usize), len: 0, } } @@ -85,22 +86,19 @@ impl BitVec { /// Create a new bit vector with all zeros and the given length. /// The length is measured in bits. #[must_use] - pub fn from_zeros(len: usize) -> Self { - let mut data = vec![0; len / WORD_SIZE]; - if len % WORD_SIZE != 0 { - data.push(0); - } + pub fn from_zeros(len: u64) -> Self { + #[allow(clippy::cast_possible_truncation)] // safe due to the division + let data = vec![0; len.div_ceil(WORD_SIZE) as usize]; Self { data, len } } /// Create a new bit vector with all ones and the given length. /// The length is measured in bits. #[must_use] - pub fn from_ones(len: usize) -> Self { - let mut data = vec![u64::MAX; len / WORD_SIZE]; - if len % WORD_SIZE != 0 { - data.push((1 << (len % WORD_SIZE)) - 1); - } + pub fn from_ones(len: u64) -> Self { + // junk data is allowed to be any bit + #[allow(clippy::cast_possible_truncation)] // safe due to the division + let data = vec![u64::MAX; len.div_ceil(WORD_SIZE) as usize]; Self { data, len } } @@ -116,7 +114,7 @@ impl BitVec { /// use vers_vecs::BitVec; /// /// let bits: &[u8] = &[1, 0, 1, 1, 1, 1]; - /// let bv = BitVec::from_bits(&bits); + /// let bv = BitVec::from_bits_u8(&bits); /// /// assert_eq!(bv.len(), 6); /// assert_eq!(bv.get_bits(0, 6), Some(0b111101u64)); @@ -127,8 +125,8 @@ impl BitVec { /// [`from_bits_u64`]: BitVec::from_bits_u64 /// [`from_bits_iter`]: BitVec::from_bits_iter #[must_use] - pub fn from_bits(bits: &[u8]) -> Self { - let mut bv = Self::with_capacity(bits.len()); + pub fn from_bits_u8(bits: &[u8]) -> Self { + let mut bv = Self::with_capacity(bits.len() as u64); bits.iter().for_each(|&b| bv.append_bit(b.into())); bv } @@ -138,15 +136,15 @@ impl BitVec { /// bit vector. /// All other bits are ignored. /// - /// See also: [`from_bits`], [`from_bits_u32`], [`from_bits_u64`], [`from_bits_iter`] + /// See also: [`from_bits_u8`], [`from_bits_u32`], [`from_bits_u64`], [`from_bits_iter`] /// - /// [`from_bits`]: BitVec::from_bits + /// [`from_bits_u8`]: BitVec::from_bits_u8 /// [`from_bits_u32`]: BitVec::from_bits_u32 /// [`from_bits_u64`]: BitVec::from_bits_u64 /// [`from_bits_iter`]: BitVec::from_bits_iter #[must_use] pub fn from_bits_u16(bits: &[u16]) -> Self { - let mut bv = Self::with_capacity(bits.len()); + let mut bv = Self::with_capacity(bits.len() as u64); bits.iter().for_each(|&b| bv.append_bit_u16(b)); bv } @@ -156,15 +154,15 @@ impl BitVec { /// bit vector. /// All other bits are ignored. /// - /// See also: [`from_bits`], [`from_bits_u16`], [`from_bits_u64`], [`from_bits_iter`] + /// See also: [`from_bits_u8`], [`from_bits_u16`], [`from_bits_u64`], [`from_bits_iter`] /// - /// [`from_bits`]: BitVec::from_bits + /// [`from_bits_u8`]: BitVec::from_bits_u8 /// [`from_bits_u16`]: BitVec::from_bits_u16 /// [`from_bits_u64`]: BitVec::from_bits_u64 /// [`from_bits_iter`]: BitVec::from_bits_iter #[must_use] pub fn from_bits_u32(bits: &[u32]) -> Self { - let mut bv = Self::with_capacity(bits.len()); + let mut bv = Self::with_capacity(bits.len() as u64); bits.iter().for_each(|&b| bv.append_bit_u32(b)); bv } @@ -174,15 +172,15 @@ impl BitVec { /// bit vector. /// All other bits are ignored. /// - /// See also: [`from_bits`], [`from_bits_u16`], [`from_bits_u32`], [`from_bits_iter`] + /// See also: [`from_bits_u8`], [`from_bits_u16`], [`from_bits_u32`], [`from_bits_iter`] /// - /// [`from_bits`]: BitVec::from_bits + /// [`from_bits_u8`]: BitVec::from_bits_u8 /// [`from_bits_u16`]: BitVec::from_bits_u16 /// [`from_bits_u32`]: BitVec::from_bits_u32 /// [`from_bits_iter`]: BitVec::from_bits_iter #[must_use] pub fn from_bits_u64(bits: &[u64]) -> Self { - let mut bv = Self::with_capacity(bits.len()); + let mut bv = Self::with_capacity(bits.len() as u64); bits.iter().for_each(|&b| bv.append_bit(b)); bv } @@ -193,7 +191,7 @@ impl BitVec { /// All other bits are ignored. /// The iterator must yield values that can be converted into u64 values. /// - /// See also: [`from_bits`], [`from_bits_u16`], [`from_bits_u32`], [`from_bits_u64`] + /// See also: [`from_bits_u8`], [`from_bits_u16`], [`from_bits_u32`], [`from_bits_u64`] /// /// # Example /// ```rust @@ -210,7 +208,7 @@ impl BitVec { /// assert_eq!(bv, bv2); /// ``` /// - /// [`from_bits`]: BitVec::from_bits + /// [`from_bits_u8`]: BitVec::from_bits_u8 /// [`from_bits_u16`]: BitVec::from_bits_u16 /// [`from_bits_u32`]: BitVec::from_bits_u32 /// [`from_bits_u64`]: BitVec::from_bits_u64 @@ -221,7 +219,7 @@ impl BitVec { I: IntoIterator, { let iter = iter.into_iter(); - let mut bv = Self::with_capacity(iter.size_hint().0); + let mut bv = Self::with_capacity(iter.size_hint().0 as u64); for bit in iter { bv.append_bit(bit.into()); } @@ -253,7 +251,7 @@ impl BitVec { /// [`from_limbs_iter`]: BitVec::from_limbs_iter #[must_use] pub fn from_limbs(words: &[u64]) -> Self { - let len = words.len() * WORD_SIZE; + let len = words.len() as u64 * WORD_SIZE; Self { data: words.to_vec(), len, @@ -318,15 +316,15 @@ impl BitVec { /// [`from_limbs_iter`]: BitVec::from_limbs_iter #[must_use] pub fn from_vec(data: Vec) -> Self { - let len = data.len() * WORD_SIZE; + let len = data.len() as u64 * WORD_SIZE; Self { data, len } } - fn pack_bits(sequence: &[T], bits_per_element: usize) -> Self + fn pack_bits(sequence: &[T], bits_per_element: u64) -> Self where T: Into + Copy, { - let mut bv = Self::with_capacity(sequence.len() * bits_per_element); + let mut bv = Self::with_capacity(sequence.len() as u64 * bits_per_element); for &word in sequence { if bits_per_element <= MAX_BITS { bv.append_bits(word.into(), bits_per_element); @@ -372,7 +370,7 @@ impl BitVec { /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16 /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8 #[must_use] - pub fn pack_sequence_u64(sequence: &[u64], bits_per_element: usize) -> Self { + pub fn pack_sequence_u64(sequence: &[u64], bits_per_element: u64) -> Self { Self::pack_bits::<_, 64>(sequence, bits_per_element) } @@ -406,7 +404,7 @@ impl BitVec { /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16 /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8 #[must_use] - pub fn pack_sequence_u32(sequence: &[u32], bits_per_element: usize) -> Self { + pub fn pack_sequence_u32(sequence: &[u32], bits_per_element: u64) -> Self { Self::pack_bits::<_, 32>(sequence, bits_per_element) } @@ -440,7 +438,7 @@ impl BitVec { /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32 /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8 #[must_use] - pub fn pack_sequence_u16(sequence: &[u16], bits_per_element: usize) -> Self { + pub fn pack_sequence_u16(sequence: &[u16], bits_per_element: u64) -> Self { Self::pack_bits::<_, 16>(sequence, bits_per_element) } @@ -474,7 +472,7 @@ impl BitVec { /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32 /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16 #[must_use] - pub fn pack_sequence_u8(sequence: &[u8], bits_per_element: usize) -> Self { + pub fn pack_sequence_u8(sequence: &[u8], bits_per_element: u64) -> Self { Self::pack_bits::<_, 8>(sequence, bits_per_element) } @@ -500,13 +498,13 @@ impl BitVec { /// [`append_bit_u8`]: BitVec::append_bit_u8 /// [`append_word`]: BitVec::append_word pub fn append(&mut self, bit: bool) { - if self.len % WORD_SIZE == 0 { + if self.len.is_multiple_of(WORD_SIZE) { self.data.push(0); } if bit { - self.data[self.len / WORD_SIZE] |= 1 << (self.len % WORD_SIZE); + self.data[(self.len / WORD_SIZE) as usize] |= 1 << (self.len % WORD_SIZE); } else { - self.data[self.len / WORD_SIZE] &= !(1 << (self.len % WORD_SIZE)); + self.data[(self.len / WORD_SIZE) as usize] &= !(1 << (self.len % WORD_SIZE)); } self.len += 1; } @@ -519,7 +517,7 @@ impl BitVec { /// ```rust /// use vers_vecs::BitVec; /// - /// let mut bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]); + /// let mut bv = BitVec::from_bits_u8(&[1, 0, 1, 1, 1, 1]); /// bv.drop_last(3); /// /// assert_eq!(bv.len(), 3); @@ -529,14 +527,15 @@ impl BitVec { /// /// assert!(bv.is_empty()); /// ``` - pub fn drop_last(&mut self, n: usize) { + pub fn drop_last(&mut self, n: u64) { if n > self.len { self.data.clear(); self.len = 0; return; } - let new_limb_count = (self.len - n).div_ceil(WORD_SIZE); + #[allow(clippy::cast_possible_truncation)] // safe due to the division + let new_limb_count = (self.len - n).div_ceil(WORD_SIZE) as usize; // cut off limbs that we no longer need if new_limb_count < self.data.len() { @@ -574,13 +573,13 @@ impl BitVec { /// [`append_bit_u8`]: BitVec::append_bit_u8 /// [`append_word`]: BitVec::append_word pub fn append_bit(&mut self, bit: u64) { - if self.len % WORD_SIZE == 0 { + if self.len.is_multiple_of(WORD_SIZE) { self.data.push(0); } if bit % 2 == 1 { - self.data[self.len / WORD_SIZE] |= 1 << (self.len % WORD_SIZE); + self.data[(self.len / WORD_SIZE) as usize] |= 1 << (self.len % WORD_SIZE); } else { - self.data[self.len / WORD_SIZE] &= !(1 << (self.len % WORD_SIZE)); + self.data[(self.len / WORD_SIZE) as usize] &= !(1 << (self.len % WORD_SIZE)); } self.len += 1; @@ -653,12 +652,12 @@ impl BitVec { /// [`append_bit_u16`]: BitVec::append_bit_u16 /// [`append_bit_u8`]: BitVec::append_bit_u8 pub fn append_word(&mut self, word: u64) { - if self.len % WORD_SIZE == 0 { + if self.len.is_multiple_of(WORD_SIZE) { self.data.push(word); } else { // zero out the unused bits before or-ing the new one, to ensure no garbage data remains - self.data[self.len / WORD_SIZE] &= !(u64::MAX << (self.len % WORD_SIZE)); - self.data[self.len / WORD_SIZE] |= word << (self.len % WORD_SIZE); + self.data[(self.len / WORD_SIZE) as usize] &= !(u64::MAX << (self.len % WORD_SIZE)); + self.data[(self.len / WORD_SIZE) as usize] |= word << (self.len % WORD_SIZE); self.data.push(word >> (WORD_SIZE - self.len % WORD_SIZE)); } @@ -685,15 +684,15 @@ impl BitVec { /// /// # Panics /// Panics if `len` is larger than 64. - pub fn append_bits(&mut self, bits: u64, len: usize) { + pub fn append_bits(&mut self, bits: u64, len: u64) { assert!(len <= 64, "Cannot append more than 64 bits"); - if self.len % WORD_SIZE == 0 { + if self.len.is_multiple_of(WORD_SIZE) { self.data.push(bits); } else { // zero out the unused bits before or-ing the new one, to ensure no garbage data remains - self.data[self.len / WORD_SIZE] &= !(u64::MAX << (self.len % WORD_SIZE)); - self.data[self.len / WORD_SIZE] |= bits << (self.len % WORD_SIZE); + self.data[(self.len / WORD_SIZE) as usize] &= !(u64::MAX << (self.len % WORD_SIZE)); + self.data[(self.len / WORD_SIZE) as usize] |= bits << (self.len % WORD_SIZE); if self.len % WORD_SIZE + len > WORD_SIZE { self.data.push(bits >> (WORD_SIZE - self.len % WORD_SIZE)); @@ -724,11 +723,11 @@ impl BitVec { /// /// [`append_bits`]: BitVec::append_bits /// [`drop_last`]: BitVec::drop_last - pub fn append_bits_unchecked(&mut self, bits: u64, len: usize) { - if self.len % WORD_SIZE == 0 { + pub fn append_bits_unchecked(&mut self, bits: u64, len: u64) { + if self.len.is_multiple_of(WORD_SIZE) { self.data.push(bits); } else { - self.data[self.len / WORD_SIZE] |= bits << (self.len % WORD_SIZE); + self.data[(self.len / WORD_SIZE) as usize] |= bits << (self.len % WORD_SIZE); if self.len % WORD_SIZE + len > WORD_SIZE { self.data.push(bits >> (WORD_SIZE - self.len % WORD_SIZE)); @@ -743,10 +742,11 @@ impl BitVec { /// This function is guaranteed to reallocate the underlying vector at most once. pub fn extend_bitvec(&mut self, other: &Self) { // reserve space for the new bits, ensuring at most one re-allocation + #[allow(clippy::cast_possible_truncation)] // safe due to the division self.data - .reserve((self.len + other.len).div_ceil(WORD_SIZE) - self.data.len()); + .reserve((self.len + other.len).div_ceil(WORD_SIZE) as usize - self.data.len()); - let full_limbs = other.len() / WORD_SIZE; + let full_limbs = (other.len() / WORD_SIZE) as usize; for i in 0..full_limbs { self.append_bits(other.data[i], WORD_SIZE); } @@ -759,7 +759,7 @@ impl BitVec { /// Return the length of the bit vector. The length is measured in bits. #[must_use] - pub fn len(&self) -> usize { + pub fn len(&self) -> u64 { self.len } @@ -776,7 +776,7 @@ impl BitVec { /// ```rust /// use vers_vecs::BitVec; /// - /// let mut bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]); + /// let mut bv = BitVec::from_bits_u8(&[1, 0, 1, 1, 1, 1]); /// bv.flip_bit(1); /// /// assert_eq!(bv.len(), 6); @@ -785,7 +785,7 @@ impl BitVec { /// /// # Panics /// If the position is larger than the length of the vector, the function panics. - pub fn flip_bit(&mut self, pos: usize) { + pub fn flip_bit(&mut self, pos: u64) { assert!(pos < self.len, "Index out of bounds"); self.flip_bit_unchecked(pos); } @@ -800,8 +800,8 @@ impl BitVec { /// This will not corrupt memory. /// /// [`flip_bit`]: BitVec::flip_bit - pub fn flip_bit_unchecked(&mut self, pos: usize) { - self.data[pos / WORD_SIZE] ^= 1 << (pos % WORD_SIZE); + pub fn flip_bit_unchecked(&mut self, pos: u64) { + self.data[(pos / WORD_SIZE) as usize] ^= 1 << (pos % WORD_SIZE); } /// Return the bit at the given position. @@ -815,13 +815,15 @@ impl BitVec { /// ```rust /// use vers_vecs::BitVec; /// - /// let bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]); + /// let bv = BitVec::from_bits_u8(&[1, 0, 1, 1, 1, 1]); /// /// assert_eq!(bv.get(1), Some(0)); /// assert_eq!(bv.get(2), Some(1)); /// ``` + /// + /// [`get_unchecked`]: Self::get_unchecked #[must_use] - pub fn get(&self, pos: usize) -> Option { + pub fn get(&self, pos: u64) -> Option { if pos >= self.len { None } else { @@ -839,8 +841,8 @@ impl BitVec { /// /// [`get`]: BitVec::get #[must_use] - pub fn get_unchecked(&self, pos: usize) -> u64 { - (self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE)) & 1 + pub fn get_unchecked(&self, pos: u64) -> u64 { + (self.data[(pos / WORD_SIZE) as usize] >> (pos % WORD_SIZE)) & 1 } /// Set the bit at the given position. @@ -853,7 +855,7 @@ impl BitVec { /// ```rust /// use vers_vecs::BitVec; /// - /// let mut bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]); + /// let mut bv = BitVec::from_bits_u8(&[1, 0, 1, 1, 1, 1]); /// bv.set(1, 1).unwrap(); /// /// assert_eq!(bv.len(), 6); @@ -865,7 +867,7 @@ impl BitVec { /// otherwise it will return an empty `Ok`. /// /// [`set_unchecked`]: BitVec::set_unchecked - pub fn set(&mut self, pos: usize, value: u64) -> Result<(), &str> { + pub fn set(&mut self, pos: u64, value: u64) -> Result<(), &str> { if pos >= self.len { Err("out of range") } else { @@ -883,8 +885,9 @@ impl BitVec { /// Use [`set`] to properly handle this case with a `Result`. /// /// [`set`]: BitVec::set - pub fn set_unchecked(&mut self, pos: usize, value: u64) { - self.data[pos / WORD_SIZE] = (self.data[pos / WORD_SIZE] & !(0x1 << (pos % WORD_SIZE))) + pub fn set_unchecked(&mut self, pos: u64, value: u64) { + self.data[(pos / WORD_SIZE) as usize] = (self.data[(pos / WORD_SIZE) as usize] + & !(0x1 << (pos % WORD_SIZE))) | ((value & 0x1) << (pos % WORD_SIZE)); } @@ -898,7 +901,7 @@ impl BitVec { /// ```rust /// use vers_vecs::BitVec; /// - /// let bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]); + /// let bv = BitVec::from_bits_u8(&[1, 0, 1, 1, 1, 1]); /// /// assert!(!bv.is_bit_set(1).unwrap()); /// assert!(bv.is_bit_set(2).unwrap()); @@ -906,7 +909,7 @@ impl BitVec { /// /// [`is_bit_set_unchecked`]: BitVec::is_bit_set_unchecked #[must_use] - pub fn is_bit_set(&self, pos: usize) -> Option { + pub fn is_bit_set(&self, pos: u64) -> Option { if pos >= self.len { None } else { @@ -923,7 +926,7 @@ impl BitVec { /// /// [`is_bit_set`]: BitVec::is_bit_set #[must_use] - pub fn is_bit_set_unchecked(&self, pos: usize) -> bool { + pub fn is_bit_set_unchecked(&self, pos: u64) -> bool { self.get_unchecked(pos) != 0 } @@ -937,7 +940,7 @@ impl BitVec { /// The first bit at `pos` is the most significant bit of the return value /// limited to `len` bits. #[must_use] - pub fn get_bits(&self, pos: usize, len: usize) -> Option { + pub fn get_bits(&self, pos: u64, len: u64) -> Option { if len > WORD_SIZE || len == 0 { return None; } @@ -969,13 +972,14 @@ impl BitVec { #[allow(clippy::comparison_chain)] // readability #[inline(always)] // inline to gain loop optimization and pipeline advantages for elias fano #[allow(clippy::cast_possible_truncation)] // parameter must be out of scope for this to happen - pub fn get_bits_unchecked(&self, pos: usize, len: usize) -> u64 { + pub fn get_bits_unchecked(&self, pos: u64, len: u64) -> u64 { debug_assert!(len <= WORD_SIZE); - let partial_word = self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE); + let partial_word = self.data[(pos / WORD_SIZE) as usize] >> (pos % WORD_SIZE); if pos % WORD_SIZE + len <= WORD_SIZE { partial_word & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1) } else { - (partial_word | (self.data[pos / WORD_SIZE + 1] << (WORD_SIZE - pos % WORD_SIZE))) + (partial_word + | (self.data[(pos / WORD_SIZE + 1) as usize] << (WORD_SIZE - pos % WORD_SIZE))) & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1) } } @@ -1006,7 +1010,7 @@ impl BitVec { #[must_use] #[allow(clippy::inline_always)] #[inline(always)] // to gain optimization if n is constant - pub fn unpack_element(&self, index: usize, n: usize) -> Option { + pub fn unpack_element(&self, index: u64, n: u64) -> Option { self.get_bits(index * n, n) } @@ -1028,7 +1032,7 @@ impl BitVec { #[must_use] #[allow(clippy::inline_always)] #[inline(always)] // to gain optimization if n is constant - pub fn unpack_element_unchecked(&self, index: usize, n: usize) -> u64 { + pub fn unpack_element_unchecked(&self, index: u64, n: u64) -> u64 { self.get_bits_unchecked(index * n, n) } @@ -1039,11 +1043,11 @@ impl BitVec { #[must_use] #[allow(clippy::missing_panics_doc)] // can't panic because of manual bounds check pub fn count_ones(&self) -> u64 { - let mut ones: u64 = self.data[0..self.len / WORD_SIZE] + let mut ones: u64 = self.data[0..(self.len / WORD_SIZE) as usize] .iter() .map(|limb| u64::from(limb.count_ones())) .sum(); - if self.len % WORD_SIZE > 0 { + if !self.len.is_multiple_of(WORD_SIZE) { ones += u64::from( (self.data.last().unwrap() & ((1 << (self.len % WORD_SIZE)) - 1)).count_ones(), ); @@ -1059,7 +1063,7 @@ impl BitVec { /// [`count_ones`]: BitVec::count_ones #[must_use] pub fn count_zeros(&self) -> u64 { - self.len as u64 - self.count_ones() + self.len - self.count_ones() } /// Mask this bit vector with another bitvector using bitwise or. The mask is applied lazily @@ -1226,7 +1230,9 @@ impl BitVec { /// containing the original vector. /// /// See also: [`split_at_unchecked`] - pub fn split_at(self, at: usize) -> Result<(Self, Self), Self> { + /// + /// [`split_at_unchecked`]: Self::split_at_unchecked + pub fn split_at(self, at: u64) -> Result<(Self, Self), Self> { if at > self.len { Err(self) } else { @@ -1241,8 +1247,10 @@ impl BitVec { /// If the index is larger than the length of the vector the function will panic or run /// out of memory. /// Use [`split_at`] to properly handle this case. + /// + /// [`split_at`]: Self::split_at #[must_use] - pub fn split_at_unchecked(mut self, at: usize) -> (Self, Self) { + pub fn split_at_unchecked(mut self, at: u64) -> (Self, Self) { let other_len = self.len - at; let mut other = Self::with_capacity(other_len); @@ -1250,8 +1258,8 @@ impl BitVec { return (self, other); } - let first_limb = at / WORD_SIZE; - let last_limb = self.len / WORD_SIZE; + let first_limb = (at / WORD_SIZE) as usize; + let last_limb = (self.len / WORD_SIZE) as usize; // First, we figure out the number of bits from the first limb to retain in this vector: let leading_partial = at % WORD_SIZE; @@ -1322,7 +1330,7 @@ impl From> for BitVec { impl Extend for BitVec { fn extend>(&mut self, iter: T) { for v in iter { - self.extend_bitvec(&v) + self.extend_bitvec(&v); } } } @@ -1330,7 +1338,7 @@ impl Extend for BitVec { impl<'t> Extend<&'t BitVec> for BitVec { fn extend>(&mut self, iter: T) { for v in iter { - self.extend_bitvec(v) + self.extend_bitvec(v); } } } @@ -1377,7 +1385,7 @@ impl Eq for BitVec {} impl Hash for BitVec { fn hash(&self, state: &mut H) { - state.write_usize(self.len); + state.write_u64(self.len); if self.len > 0 { self.data[0..self.data.len() - 1] .iter() diff --git a/src/bit_vec/fast_rs_vec/bitset.rs b/src/bit_vec/rs/bitset.rs similarity index 92% rename from src/bit_vec/fast_rs_vec/bitset.rs rename to src/bit_vec/rs/bitset.rs index 00cb5e0..2f98d11 100644 --- a/src/bit_vec/fast_rs_vec/bitset.rs +++ b/src/bit_vec/rs/bitset.rs @@ -7,7 +7,7 @@ use crate::RsVec; use std::mem::size_of; /// The number of bits in a RsVec that can be processed by AVX instructions at once. -const VECTOR_SIZE: usize = 16; +const VECTOR_SIZE: u64 = 16; // add iterator functions to RsVec impl RsVec { @@ -73,20 +73,22 @@ impl RsVec { /// [`bit_set_iter0`]: RsVec::bit_set_iter0 /// [`bit_set_iter1`]: RsVec::bit_set_iter1 /// [`SelectIter`]: super::SelectIter +#[allow(clippy::cast_possible_truncation)] pub struct BitSetIter<'a, const ZERO: bool> { vec: &'a RsVec, - base: usize, - offsets: [u32; VECTOR_SIZE], + base: u64, + offsets: [u32; VECTOR_SIZE as usize], content_len: u8, cursor: u8, } impl<'a, const ZERO: bool> BitSetIter<'a, ZERO> { pub(super) fn new(vec: &'a RsVec) -> Self { + #[allow(clippy::cast_possible_truncation)] let mut iter = Self { vec, base: 0, - offsets: [0; VECTOR_SIZE], + offsets: [0; VECTOR_SIZE as usize], content_len: 0, cursor: 0, }; @@ -103,7 +105,10 @@ impl<'a, const ZERO: bool> BitSetIter<'a, ZERO> { unsafe { let offsets = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - assert!(VECTOR_SIZE <= size_of::() * 8, "change data types"); + assert!( + VECTOR_SIZE <= size_of::() as u64 * 8, + "change data types" + ); let mut mask = __mmask16::from(data); if ZERO { mask = !mask; @@ -129,7 +134,7 @@ impl<'a, const ZERO: bool> BitSetIter<'a, ZERO> { } impl Iterator for BitSetIter<'_, ZERO> { - type Item = usize; + type Item = u64; fn next(&mut self) -> Option { if self.base >= self.vec.len() { @@ -159,6 +164,6 @@ impl Iterator for BitSetIter<'_, ZERO> { let offset = self.offsets[self.cursor as usize]; self.cursor += 1; - Some(self.base + offset as usize) + Some(self.base + offset as u64) } } diff --git a/src/bit_vec/fast_rs_vec/iter.rs b/src/bit_vec/rs/iter.rs similarity index 80% rename from src/bit_vec/fast_rs_vec/iter.rs rename to src/bit_vec/rs/iter.rs index e0d4fcf..5a43a9d 100644 --- a/src/bit_vec/fast_rs_vec/iter.rs +++ b/src/bit_vec/rs/iter.rs @@ -1,4 +1,4 @@ -use crate::bit_vec::fast_rs_vec::{BLOCK_SIZE, SELECT_BLOCK_SIZE, SUPER_BLOCK_SIZE}; +use crate::bit_vec::rs::{BLOCK_SIZE, SELECT_BLOCK_SIZE, SUPER_BLOCK_SIZE}; use crate::RsVec; use std::iter::FusedIterator; use std::num::NonZeroUsize; @@ -13,6 +13,10 @@ impl RsVec { /// the linear access pattern. /// /// This method has convenience methods `iter0` and `iter1`. + /// + /// # Panics + /// If the vector contains more than `usize::MAX` elements, calling `len()` on the iterator will + /// cause it to panic. pub fn select_iter(&self) -> SelectIter<'_, ZERO> { SelectIter::new(self) } @@ -26,6 +30,10 @@ impl RsVec { /// the linear access pattern. /// /// This method has convenience methods `into_iter0` and `into_iter1`. + /// + /// # Panics + /// If the vector contains more than `usize::MAX` elements, calling `len()` on the iterator will + /// cause it to panic. pub fn into_select_iter(self) -> SelectIntoIter { SelectIntoIter::new(self) } @@ -36,6 +44,10 @@ impl RsVec { /// exploits the linear access pattern. /// /// See [`SelectIter`] for more information. + /// + /// # Panics + /// If the vector contains more than `usize::MAX` elements, calling `len()` on the iterator will + /// cause it to panic. pub fn iter0(&self) -> SelectIter<'_, true> { self.select_iter() } @@ -46,6 +58,10 @@ impl RsVec { /// exploits the linear access pattern. /// /// See [`SelectIter`] for more information. + /// + /// # Panics + /// If the vector contains more than `usize::MAX` elements, calling `len()` on the iterator will + /// cause it to panic. pub fn iter1(&self) -> SelectIter<'_, false> { self.select_iter() } @@ -56,6 +72,10 @@ impl RsVec { /// exploits the linear access pattern. /// /// See [`SelectIntoIter`] for more information. + /// + /// # Panics + /// If the vector contains more than `usize::MAX` elements, calling `len()` on the iterator will + /// cause it to panic. pub fn into_iter0(self) -> SelectIntoIter { self.into_select_iter() } @@ -66,6 +86,10 @@ impl RsVec { /// exploits the linear access pattern. /// /// See [`SelectIntoIter`] for more information. + /// + /// # Panics + /// If the vector contains more than `usize::MAX` elements, calling `len()` on the iterator will + /// cause it to panic. pub fn into_iter1(self) -> SelectIntoIter { self.into_select_iter() } @@ -106,18 +130,18 @@ macro_rules! gen_iter_impl { } /// Same implementation like select0, but uses cached indices of last query to speed up search - fn select_next_0(&mut self) -> Option { + fn select_next_0(&mut self) -> Option { let mut rank = self.next_rank; if rank >= self.vec.rank0 || self.next_rank_back.is_none() || rank > self.next_rank_back.unwrap() { return None; } - let mut super_block = self.vec.select_blocks[rank / SELECT_BLOCK_SIZE].index_0; + let mut super_block = self.vec.select_blocks[(rank / SELECT_BLOCK_SIZE) as usize].index_0; let mut block_index = 0; if self.vec.super_blocks.len() > (self.last_super_block + 1) - && self.vec.super_blocks[self.last_super_block + 1].zeros > rank + && self.vec.super_blocks[self.last_super_block + 1].zeros as u64 > rank { // instantly jump to the last searched position super_block = self.last_super_block; @@ -127,13 +151,13 @@ macro_rules! gen_iter_impl { // this is true IF the last_block is either the last block in a super block, // in which case it must be this block, because we know the rank is within the super block, // OR if the next block has a rank higher than the current rank - if self.last_block % (SUPER_BLOCK_SIZE / BLOCK_SIZE) == 15 + if self.last_block % (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize == 15 || self.vec.blocks.len() > self.last_block + 1 - && self.vec.blocks[self.last_block + 1].zeros as usize > rank + && self.vec.blocks[self.last_block + 1].zeros as u64 > rank { // instantly jump to the last searched position block_index = self.last_block; - rank -= self.vec.blocks[block_index].zeros as usize; + rank -= self.vec.blocks[block_index].zeros as u64; } } else { super_block = self.vec.search_super_block0(super_block, rank); @@ -143,11 +167,11 @@ macro_rules! gen_iter_impl { // if the block index is not zero, we already found the block, and need only update the word if block_index == 0 { - block_index = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE); + block_index = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize; self.vec.search_block0(rank, &mut block_index); self.last_block = block_index; - rank -= self.vec.blocks[block_index].zeros as usize; + rank -= self.vec.blocks[block_index].zeros as u64; } self.next_rank += 1; @@ -155,17 +179,17 @@ macro_rules! gen_iter_impl { } /// Same implementation like ``select_next_0``, but backwards - fn select_next_0_back(&mut self) -> Option { + fn select_next_0_back(&mut self) -> Option { let mut rank = self.next_rank_back?; if self.next_rank_back.is_none() || rank < self.next_rank { return None; } - let mut super_block = self.vec.select_blocks[rank / SELECT_BLOCK_SIZE].index_0; + let mut super_block = self.vec.select_blocks[(rank / SELECT_BLOCK_SIZE) as usize].index_0; let mut block_index = 0; - if self.vec.super_blocks[self.last_super_block_back].zeros < rank + if (self.vec.super_blocks[self.last_super_block_back].zeros as u64) < rank { // instantly jump to the last searched position super_block = self.last_super_block_back; @@ -174,11 +198,11 @@ macro_rules! gen_iter_impl { // check if current block contains the one and if yes, we don't need to search // this is true IF the zeros before the last block are less than the rank, // since the block before then can't contain it - if self.vec.blocks[self.last_block_back].zeros as usize <= rank + if self.vec.blocks[self.last_block_back].zeros as u64 <= rank { // instantly jump to the last searched position block_index = self.last_block_back; - rank -= self.vec.blocks[block_index].zeros as usize; + rank -= self.vec.blocks[block_index].zeros as u64; } } else { super_block = self.vec.search_super_block0(super_block, rank); @@ -188,11 +212,11 @@ macro_rules! gen_iter_impl { // if the block index is not zero, we already found the block, and need only update the word if block_index == 0 { - block_index = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE); + block_index = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize; self.vec.search_block0(rank, &mut block_index); self.last_block_back = block_index; - rank -= self.vec.blocks[block_index].zeros as usize; + rank -= self.vec.blocks[block_index].zeros as u64; } self.next_rank_back = self.next_rank_back.and_then(|x| if x > 0 { Some(x - 1) } else { None }); @@ -201,62 +225,62 @@ macro_rules! gen_iter_impl { #[must_use] #[allow(clippy::assertions_on_constants)] - fn select_next_1(&mut self) -> Option { + fn select_next_1(&mut self) -> Option { let mut rank = self.next_rank; if rank >= self.vec.rank1 || self.next_rank_back.is_none() || rank > self.next_rank_back.unwrap() { return None; } - let mut super_block = self.vec.select_blocks[rank / SELECT_BLOCK_SIZE].index_1; + let mut super_block = self.vec.select_blocks[(rank / SELECT_BLOCK_SIZE) as usize].index_1; let mut block_index = 0; // check if the last super block still contains the rank, and if yes, we don't need to search if self.vec.super_blocks.len() > (self.last_super_block + 1) - && (self.last_super_block + 1) * SUPER_BLOCK_SIZE - - self.vec.super_blocks[self.last_super_block + 1].zeros + && (self.last_super_block + 1) as u64 * SUPER_BLOCK_SIZE + - self.vec.super_blocks[self.last_super_block + 1].zeros as u64 > rank { // instantly jump to the last searched position super_block = self.last_super_block; - let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE); - rank -= super_block * SUPER_BLOCK_SIZE - self.vec.super_blocks[super_block].zeros; + let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize; + rank -= super_block as u64 * SUPER_BLOCK_SIZE - self.vec.super_blocks[super_block].zeros; // check if current block contains the one and if yes, we don't need to search // this is true IF the last_block is either the last block in a super block, // in which case it must be this block, because we know the rank is within the super block, // OR if the next block has a rank higher than the current rank - if self.last_block % (SUPER_BLOCK_SIZE / BLOCK_SIZE) == 15 + if self.last_block as u64 % (SUPER_BLOCK_SIZE / BLOCK_SIZE) == 15 || self.vec.blocks.len() > self.last_block + 1 - && (self.last_block + 1 - block_at_super_block) * BLOCK_SIZE - - self.vec.blocks[self.last_block + 1].zeros as usize + && (self.last_block + 1 - block_at_super_block) as u64 * BLOCK_SIZE + - self.vec.blocks[self.last_block + 1].zeros as u64 > rank { // instantly jump to the last searched position block_index = self.last_block; - let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE); - rank -= (block_index - block_at_super_block) * BLOCK_SIZE - - self.vec.blocks[block_index].zeros as usize; + let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize; + rank -= (block_index - block_at_super_block) as u64 * BLOCK_SIZE + - self.vec.blocks[block_index].zeros as u64; } } else { super_block = self.vec.search_super_block1(super_block, rank); self.last_super_block = super_block; - rank -= super_block * SUPER_BLOCK_SIZE - self.vec.super_blocks[super_block].zeros; + rank -= super_block as u64 * SUPER_BLOCK_SIZE - self.vec.super_blocks[super_block].zeros; } // if the block index is not zero, we already found the block, and need only update the word if block_index == 0 { // full binary search for block that contains the rank, manually loop-unrolled, because // LLVM doesn't do it for us, but it gains just under 20% performance - let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE); + let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize; block_index = block_at_super_block; self.vec .search_block1(rank, block_at_super_block, &mut block_index); self.last_block = block_index; - rank -= (block_index - block_at_super_block) * BLOCK_SIZE - - self.vec.blocks[block_index].zeros as usize; + rank -= (block_index - block_at_super_block) as u64 * BLOCK_SIZE + - self.vec.blocks[block_index].zeros as u64; } self.next_rank += 1; @@ -265,101 +289,109 @@ macro_rules! gen_iter_impl { #[must_use] #[allow(clippy::assertions_on_constants)] - fn select_next_1_back(&mut self) -> Option { + fn select_next_1_back(&mut self) -> Option { let mut rank = self.next_rank_back?; if self.next_rank_back.is_none() || rank < self.next_rank { return None; } - let mut super_block = self.vec.select_blocks[rank / SELECT_BLOCK_SIZE].index_1; + let mut super_block = self.vec.select_blocks[(rank / SELECT_BLOCK_SIZE) as usize].index_1; let mut block_index = 0; // check if the last super block still contains the rank, and if yes, we don't need to search - if (self.last_super_block_back) * SUPER_BLOCK_SIZE - - self.vec.super_blocks[self.last_super_block_back].zeros + if self.last_super_block_back as u64 * SUPER_BLOCK_SIZE + - (self.vec.super_blocks[self.last_super_block_back].zeros as u64) < rank { // instantly jump to the last searched position super_block = self.last_super_block_back; - let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE); - rank -= super_block * SUPER_BLOCK_SIZE - self.vec.super_blocks[super_block].zeros; + let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize; + rank -= super_block as u64 * SUPER_BLOCK_SIZE - self.vec.super_blocks[super_block].zeros; // check if current block contains the one and if yes, we don't need to search // this is true IF the ones before the last block are less than the rank, // since the block before then can't contain it - if (self.last_block_back - block_at_super_block) * BLOCK_SIZE - - self.vec.blocks[self.last_block_back].zeros as usize + if (self.last_block_back - block_at_super_block) as u64 * BLOCK_SIZE + - self.vec.blocks[self.last_block_back].zeros as u64 <= rank { // instantly jump to the last searched position block_index = self.last_block_back; - let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE); - rank -= (block_index - block_at_super_block) * BLOCK_SIZE - - self.vec.blocks[block_index].zeros as usize; + let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize; + rank -= (block_index - block_at_super_block) as u64 * BLOCK_SIZE + - self.vec.blocks[block_index].zeros as u64; } } else { super_block = self.vec.search_super_block1(super_block, rank); self.last_super_block_back = super_block; - rank -= super_block * SUPER_BLOCK_SIZE - self.vec.super_blocks[super_block].zeros; + rank -= super_block as u64 * SUPER_BLOCK_SIZE - self.vec.super_blocks[super_block].zeros; } // if the block index is not zero, we already found the block, and need only update the word if block_index == 0 { // full binary search for block that contains the rank, manually loop-unrolled, because // LLVM doesn't do it for us, but it gains just under 20% performance - let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE); + let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize; block_index = block_at_super_block; self.vec .search_block1(rank, block_at_super_block, &mut block_index); self.last_block_back = block_index; - rank -= (block_index - block_at_super_block) * BLOCK_SIZE - - self.vec.blocks[block_index].zeros as usize; + rank -= (block_index - block_at_super_block) as u64 * BLOCK_SIZE + - self.vec.blocks[block_index].zeros as u64; } self.next_rank_back = self.next_rank_back.and_then(|x| if x > 0 { Some(x - 1) } else { None }); Some(self.vec.search_word_in_block1(rank, block_index)) } - /// Advances the iterator by `n` elements. Returns an error if the iterator does not have - /// enough elements left. Does not call `next` internally. + /// Advances the iterator by `n` elements. + /// Does not call `next` internally. /// This method is currently being added to the iterator trait, see /// [this issue](https://github.com/rust-lang/rust/issues/77404). /// As soon as it is stabilized, this method will be removed and replaced with a custom /// implementation in the iterator impl. - pub(super) fn advance_by(&mut self, n: usize) -> Result<(), NonZeroUsize> { + /// + /// # Errors + /// If the iterator does not hold `n` elements, + /// all remaining elements are skipped, and an error with the overflow is returned. + pub fn advance_by(&mut self, n: usize) -> Result<(), NonZeroUsize> { if self.len() >= n { - self.next_rank += n; + self.next_rank += n as u64; Ok(()) } else { let len = self.len(); - self.next_rank += len; + self.next_rank += len as u64; Err(NonZeroUsize::new(n - len).unwrap()) } } - /// Advances the iterator back by `n` elements. Returns an error if the iterator does not have - /// enough elements left. Does not call `next_back` internally. + /// Advances the iterator back by `n` elements. + /// Does not call `next_back` internally. /// This method is currently being added to the iterator trait, see /// [this issue](https://github.com/rust-lang/rust/issues/77404). /// As soon as it is stabilized, this method will be removed and replaced with a custom /// implementation in the double ended iterator impl. - pub(super) fn advance_back_by(&mut self, n: usize) -> Result<(), NonZeroUsize> { + /// + /// # Errors + /// If the iterator does not hold `n` elements, + /// all remaining elements are skipped, and an error with the overflow is returned. + pub fn advance_back_by(&mut self, n: usize) -> Result<(), NonZeroUsize> { if self.len() >= n { - self.next_rank_back = self.next_rank_back.map(|x| x - n); + self.next_rank_back = self.next_rank_back.map(|x| x - n as u64); Ok(()) } else { let len = self.len(); - self.next_rank_back = self.next_rank_back.map(|x| x - len); + self.next_rank_back = self.next_rank_back.map(|x| x - len as u64); Err(NonZeroUsize::new(n - len).unwrap()) } } } impl<$($life,)? const ZERO: bool> Iterator for $name<$($life,)? ZERO> { - type Item = usize; + type Item = u64; fn next(&mut self) -> Option { if ZERO { @@ -373,6 +405,12 @@ macro_rules! gen_iter_impl { (self.len(), Some(self.len())) } + /// Returns the exact number of elements that this iterator would iterate over. Does not + /// call `next` internally. + /// + /// # Panics + /// If the vector contains more than `usize::MAX` elements, calling `count()` on the iterator will + /// cause it to panic. fn count(self) -> usize where Self: Sized, @@ -423,8 +461,16 @@ macro_rules! gen_iter_impl { impl<$($life,)? const ZERO: bool> FusedIterator for $name<$($life,)? ZERO> {} impl<$($life,)? const ZERO: bool> ExactSizeIterator for $name<$($life,)? ZERO> { + // the check and panic guarantees panic on truncation + #[allow(clippy::cast_possible_truncation)] fn len(&self) -> usize { - self.next_rank_back.map(|x| x + 1).unwrap_or_default().saturating_sub(self.next_rank) + // this check is hopefully eliminated on 64-bit architectures + if self.next_rank_back.map(|x| x + 1).unwrap_or_default().saturating_sub(self.next_rank) + > usize::MAX as u64 { + panic!("calling len() on an iterator containing more than usize::MAX elements is forbidden"); + } + + self.next_rank_back.map(|x| x + 1).unwrap_or_default().saturating_sub(self.next_rank) as usize } } } @@ -461,11 +507,11 @@ macro_rules! gen_iter_impl { #[must_use] pub struct SelectIter<'a, const ZERO: bool> { pub(crate) vec: &'a RsVec, - next_rank: usize, + next_rank: u64, // rank back is none, iff it points to element -1 (i.e. element 0 has been consumed by // a call to next_back()). It can be Some(..) even if the iterator is empty - next_rank_back: Option, + next_rank_back: Option, /// the last index in the super block structure where we found a bit last_super_block: usize, @@ -514,11 +560,11 @@ gen_iter_impl!('a, SelectIter); // this owning iterator became necessary pub struct SelectIntoIter { pub(crate) vec: RsVec, - next_rank: usize, + next_rank: u64, // rank back is none, iff it points to element -1 (i.e. element 0 has been consumed by // a call to next_back()). It can be Some(..) even if the iterator is empty - next_rank_back: Option, + next_rank_back: Option, /// the last index in the super block structure where we found a bit last_super_block: usize, diff --git a/src/bit_vec/fast_rs_vec/mod.rs b/src/bit_vec/rs/mod.rs similarity index 82% rename from src/bit_vec/fast_rs_vec/mod.rs rename to src/bit_vec/rs/mod.rs index 2c35643..840d4d3 100644 --- a/src/bit_vec/fast_rs_vec/mod.rs +++ b/src/bit_vec/rs/mod.rs @@ -20,7 +20,7 @@ use crate::BitVec; use super::WORD_SIZE; /// Size of a block in the bitvector. -const BLOCK_SIZE: usize = 512; +const BLOCK_SIZE: u64 = 512; /// Size of a super block in the bitvector. Super-blocks exist to decrease the memory overhead /// of block descriptors. @@ -30,12 +30,12 @@ const BLOCK_SIZE: usize = 512; /// impact on the performance of select queries. The larger the super block size, the deeper will /// a binary search be. We found 2^13 to be a good compromise between memory overhead and /// performance. -const SUPER_BLOCK_SIZE: usize = 1 << 13; +const SUPER_BLOCK_SIZE: u64 = 1 << 13; /// Size of a select block. The select block is used to speed up select queries. The select block /// contains the indices of every `SELECT_BLOCK_SIZE`'th 1-bit and 0-bit in the bitvector. /// The smaller this block-size, the faster are select queries, but the more memory is used. -const SELECT_BLOCK_SIZE: usize = 1 << 13; +const SELECT_BLOCK_SIZE: u64 = 1 << 13; /// Meta-data for a block. The `zeros` field stores the number of zeros up to the block, /// beginning from the last super-block boundary. This means the first block in a super-block @@ -53,7 +53,7 @@ struct BlockDescriptor { #[derive(Clone, Copy, Debug)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] struct SuperBlockDescriptor { - zeros: usize, + zeros: u64, } /// Meta-data for the select query. Each entry i in the select vector contains the indices to find @@ -85,13 +85,13 @@ struct SelectSuperBlockDescriptor { #[derive(Clone, Debug)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct RsVec { - data: Vec, - len: usize, - blocks: Vec, - super_blocks: Vec, - select_blocks: Vec, - pub(crate) rank0: usize, - pub(crate) rank1: usize, + data: Box<[u64]>, + len: u64, + blocks: Box<[BlockDescriptor]>, + super_blocks: Box<[SuperBlockDescriptor]>, + select_blocks: Box<[SelectSuperBlockDescriptor]>, + pub(crate) rank0: u64, + pub(crate) rank1: u64, } impl RsVec { @@ -106,8 +106,8 @@ impl RsVec { pub fn from_bit_vec(vec: BitVec) -> RsVec { // Construct the block descriptor meta data. Each block descriptor contains the number of // zeros in the super-block, up to but excluding the block. - let mut blocks = Vec::with_capacity(vec.len() / BLOCK_SIZE + 1); - let mut super_blocks = Vec::with_capacity(vec.len() / SUPER_BLOCK_SIZE + 1); + let mut blocks = Vec::with_capacity((vec.len() / BLOCK_SIZE) as usize + 1); + let mut super_blocks = Vec::with_capacity((vec.len() / SUPER_BLOCK_SIZE) as usize + 1); let mut select_blocks = Vec::new(); // sentinel value @@ -116,16 +116,16 @@ impl RsVec { index_1: 0, }); - let mut total_zeros: usize = 0; - let mut current_zeros: usize = 0; + let mut total_zeros: u64 = 0; + let mut current_zeros: u64 = 0; let mut last_zero_select_block: usize = 0; let mut last_one_select_block: usize = 0; - for (idx, &word) in vec.data.iter().enumerate() { + for (word_idx, &word) in vec.data.iter().enumerate() { // if we moved past a block boundary, append the block information for the previous // block and reset the counter if we moved past a super-block boundary. - if idx % (BLOCK_SIZE / WORD_SIZE) == 0 { - if idx % (SUPER_BLOCK_SIZE / WORD_SIZE) == 0 { + if (word_idx as u64).is_multiple_of(BLOCK_SIZE / WORD_SIZE) { + if (word_idx as u64).is_multiple_of(SUPER_BLOCK_SIZE / WORD_SIZE) { total_zeros += current_zeros; current_zeros = 0; super_blocks.push(SuperBlockDescriptor { zeros: total_zeros }); @@ -141,40 +141,42 @@ impl RsVec { // count the zeros in the current word and add them to the counter // the last word may contain padding zeros, which should not be counted, // but since we do not append the last block descriptor, this is not a problem - let mut new_zeros = word.count_zeros() as usize; + let mut new_zeros = word.count_zeros() as u64; // in the last block, remove remaining zeros of limb that aren't part of the vector - if idx == vec.data.len() - 1 && vec.len % WORD_SIZE > 0 { + if word_idx == vec.data.len() - 1 && !vec.len.is_multiple_of(WORD_SIZE) { let mask = (1 << (vec.len % WORD_SIZE)) - 1; - new_zeros -= (word | mask).count_zeros() as usize; + new_zeros -= (word | mask).count_zeros() as u64; } let all_zeros = total_zeros + current_zeros + new_zeros; if all_zeros / SELECT_BLOCK_SIZE > (total_zeros + current_zeros) / SELECT_BLOCK_SIZE { - if all_zeros / SELECT_BLOCK_SIZE == select_blocks.len() { + if (all_zeros / SELECT_BLOCK_SIZE) as usize == select_blocks.len() { select_blocks.push(SelectSuperBlockDescriptor { index_0: super_blocks.len() - 1, index_1: 0, }); } else { - select_blocks[all_zeros / SELECT_BLOCK_SIZE].index_0 = super_blocks.len() - 1; + select_blocks[(all_zeros / SELECT_BLOCK_SIZE) as usize].index_0 = + super_blocks.len() - 1; } last_zero_select_block += 1; } - let total_bits = (idx + 1) * WORD_SIZE; + let total_bits = (word_idx as u64 + 1) * WORD_SIZE; let all_ones = total_bits - all_zeros; if all_ones / SELECT_BLOCK_SIZE - > (idx * WORD_SIZE - total_zeros - current_zeros) / SELECT_BLOCK_SIZE + > (word_idx as u64 * WORD_SIZE - total_zeros - current_zeros) / SELECT_BLOCK_SIZE { - if all_ones / SELECT_BLOCK_SIZE == select_blocks.len() { + if (all_ones / SELECT_BLOCK_SIZE) as usize == select_blocks.len() { select_blocks.push(SelectSuperBlockDescriptor { index_0: 0, index_1: super_blocks.len() - 1, }); } else { - select_blocks[all_ones / SELECT_BLOCK_SIZE].index_1 = super_blocks.len() - 1; + select_blocks[(all_ones / SELECT_BLOCK_SIZE) as usize].index_1 = + super_blocks.len() - 1; } last_one_select_block += 1; @@ -212,11 +214,11 @@ impl RsVec { total_zeros += current_zeros; RsVec { - data: vec.data, + data: vec.data.into_boxed_slice(), len: vec.len, - blocks, - super_blocks, - select_blocks, + blocks: blocks.into_boxed_slice(), + super_blocks: super_blocks.into_boxed_slice(), + select_blocks: select_blocks.into_boxed_slice(), rank0: total_zeros, rank1: vec.len - total_zeros, } @@ -230,7 +232,7 @@ impl RsVec { /// # Parameters /// - `pos`: The position of the bit to return the rank of. #[must_use] - pub fn rank0(&self, pos: usize) -> usize { + pub fn rank0(&self, pos: u64) -> u64 { self.rank(true, pos) } @@ -242,7 +244,7 @@ impl RsVec { /// # Parameters /// - `pos`: The position of the bit to return the rank of. #[must_use] - pub fn rank1(&self, pos: usize) -> usize { + pub fn rank1(&self, pos: u64) -> u64 { self.rank(false, pos) } @@ -250,7 +252,7 @@ impl RsVec { // branch elimination profits alone should make it worth it. #[allow(clippy::inline_always)] #[inline(always)] - fn rank(&self, zero: bool, pos: usize) -> usize { + fn rank(&self, zero: bool, pos: u64) -> u64 { #[allow(clippy::collapsible_else_if)] // readability and more obvious where dead branch elimination happens if zero { @@ -263,39 +265,40 @@ impl RsVec { } } - let index = pos / WORD_SIZE; - let block_index = pos / BLOCK_SIZE; - let super_block_index = pos / SUPER_BLOCK_SIZE; + let index = (pos / WORD_SIZE) as usize; + let block_index = (pos / BLOCK_SIZE) as usize; + let super_block_index = (pos / SUPER_BLOCK_SIZE) as usize; let mut rank = 0; // at first add the number of zeros/ones before the current super block rank += if zero { self.super_blocks[super_block_index].zeros } else { - (super_block_index * SUPER_BLOCK_SIZE) - self.super_blocks[super_block_index].zeros + (super_block_index as u64 * SUPER_BLOCK_SIZE) + - self.super_blocks[super_block_index].zeros }; // then add the number of zeros/ones before the current block rank += if zero { - self.blocks[block_index].zeros as usize + self.blocks[block_index].zeros as u64 } else { - ((block_index % (SUPER_BLOCK_SIZE / BLOCK_SIZE)) * BLOCK_SIZE) - - self.blocks[block_index].zeros as usize + ((block_index as u64 % (SUPER_BLOCK_SIZE / BLOCK_SIZE)) * BLOCK_SIZE) + - self.blocks[block_index].zeros as u64 }; // naive popcount of blocks - for &i in &self.data[(block_index * BLOCK_SIZE) / WORD_SIZE..index] { + for &i in &self.data[((block_index as u64 * BLOCK_SIZE) / WORD_SIZE) as usize..index] { rank += if zero { - i.count_zeros() as usize + i.count_zeros() as u64 } else { - i.count_ones() as usize + i.count_ones() as u64 }; } rank += if zero { - (!self.data[index] & ((1 << (pos % WORD_SIZE)) - 1)).count_ones() as usize + (!self.data[index] & ((1 << (pos % WORD_SIZE)) - 1)).count_ones() as u64 } else { - (self.data[index] & ((1 << (pos % WORD_SIZE)) - 1)).count_ones() as usize + (self.data[index] & ((1 << (pos % WORD_SIZE)) - 1)).count_ones() as u64 }; rank @@ -303,7 +306,7 @@ impl RsVec { /// Return the length of the vector, i.e. the number of bits it contains. #[must_use] - pub fn len(&self) -> usize { + pub fn len(&self) -> u64 { self.len } @@ -317,7 +320,7 @@ impl RsVec { /// bit of the returned u64 word. /// If the position is larger than the length of the vector, `None` is returned. #[must_use] - pub fn get(&self, pos: usize) -> Option { + pub fn get(&self, pos: u64) -> Option { if pos >= self.len() { None } else { @@ -331,8 +334,8 @@ impl RsVec { /// # Panics /// This function may panic if `pos >= self.len()` (alternatively, it may return garbage). #[must_use] - pub fn get_unchecked(&self, pos: usize) -> u64 { - (self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE)) & 1 + pub fn get_unchecked(&self, pos: u64) -> u64 { + (self.data[(pos / WORD_SIZE) as usize] >> (pos % WORD_SIZE)) & 1 } /// Return multiple bits at the given position. The number of bits to return is given by `len`. @@ -341,7 +344,7 @@ impl RsVec { /// None is returned (even if the query partially overlaps with the vector). /// If the length of the query is larger than 64, None is returned. #[must_use] - pub fn get_bits(&self, pos: usize, len: usize) -> Option { + pub fn get_bits(&self, pos: u64, len: u64) -> Option { if len > WORD_SIZE { return None; } @@ -370,13 +373,14 @@ impl RsVec { #[must_use] #[allow(clippy::comparison_chain)] // readability #[allow(clippy::cast_possible_truncation)] // parameter must be out of scope for this to happen - pub fn get_bits_unchecked(&self, pos: usize, len: usize) -> u64 { + pub fn get_bits_unchecked(&self, pos: u64, len: u64) -> u64 { debug_assert!(len <= WORD_SIZE); - let partial_word = self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE); + let partial_word = self.data[(pos / WORD_SIZE) as usize] >> (pos % WORD_SIZE); if pos % WORD_SIZE + len <= WORD_SIZE { partial_word & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1) } else { - (partial_word | (self.data[pos / WORD_SIZE + 1] << (WORD_SIZE - pos % WORD_SIZE))) + (partial_word + | (self.data[(pos / WORD_SIZE + 1) as usize] << (WORD_SIZE - pos % WORD_SIZE))) & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1) } } @@ -405,7 +409,7 @@ impl RsVec { #[must_use] pub fn into_bit_vec(self) -> BitVec { BitVec { - data: self.data, + data: self.data.into_vec(), len: self.len, } } @@ -437,7 +441,11 @@ impl RsVec { let iter: SelectIter = self.select_iter(); - for (rank, bit_index) in iter.enumerate() { + let len = if ZERO { self.rank0 } else { self.rank1 }; + + // we need to manually enumerate() the iter, because the number of set bits could exceed + // the size of usize. + for (rank, bit_index) in (0..len).zip(iter) { // since rank is inlined, we get dead code elimination depending on ZERO if (other.get_unchecked(bit_index) == 0) != ZERO || other.rank(ZERO, bit_index) != rank { @@ -468,18 +476,19 @@ impl RsVec { return false; } - if self.data[..self.len / 64] + if self.data[..(self.len / WORD_SIZE) as usize] .iter() - .zip(other.data[..other.len / 64].iter()) + .zip(other.data[..(other.len / 64) as usize].iter()) .any(|(a, b)| a != b) { return false; } // if last incomplete block exists, test it without junk data - if self.len % 64 > 0 - && self.data[self.len / 64] & ((1 << (self.len % 64)) - 1) - != other.data[self.len / 64] & ((1 << (other.len % 64)) - 1) + if !self.len.is_multiple_of(WORD_SIZE) + && self.data[(self.len / WORD_SIZE) as usize] & ((1 << (self.len % WORD_SIZE)) - 1) + != other.data[(self.len / WORD_SIZE) as usize] + & ((1 << (other.len % WORD_SIZE)) - 1) { return false; } diff --git a/src/bit_vec/fast_rs_vec/select.rs b/src/bit_vec/rs/select.rs similarity index 80% rename from src/bit_vec/fast_rs_vec/select.rs rename to src/bit_vec/rs/select.rs index b8721d7..9e7ae85 100644 --- a/src/bit_vec/fast_rs_vec/select.rs +++ b/src/bit_vec/rs/select.rs @@ -1,13 +1,13 @@ // Select code is in here to keep it more organized. -use crate::bit_vec::fast_rs_vec::{BLOCK_SIZE, SELECT_BLOCK_SIZE, SUPER_BLOCK_SIZE}; +use crate::bit_vec::rs::{BLOCK_SIZE, SELECT_BLOCK_SIZE, SUPER_BLOCK_SIZE}; use crate::bit_vec::WORD_SIZE; use crate::util::pdep::Pdep; use crate::util::unroll; /// A safety constant for assertions to make sure that the block size doesn't change without /// adjusting the code. -const BLOCKS_PER_SUPERBLOCK: usize = 16; +const BLOCKS_PER_SUPERBLOCK: u64 = 16; impl super::RsVec { /// Return the position of the 0-bit with the given rank. See `rank0`. @@ -17,12 +17,12 @@ impl super::RsVec { /// If the rank is larger than the number of 0-bits in the vector, the vector length is returned. #[must_use] #[allow(clippy::assertions_on_constants)] - pub fn select0(&self, mut rank: usize) -> usize { + pub fn select0(&self, mut rank: u64) -> u64 { if rank >= self.rank0 { return self.len; } - let mut super_block = self.select_blocks[rank / SELECT_BLOCK_SIZE].index_0; + let mut super_block = self.select_blocks[(rank / SELECT_BLOCK_SIZE) as usize].index_0; if self.super_blocks.len() > (super_block + 1) && self.super_blocks[super_block + 1].zeros <= rank @@ -32,10 +32,10 @@ impl super::RsVec { rank -= self.super_blocks[super_block].zeros; - let mut block_index = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE); + let mut block_index = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize; self.search_block0(rank, &mut block_index); - rank -= self.blocks[block_index].zeros as usize; + rank -= self.blocks[block_index].zeros as u64; self.search_word_in_block0(rank, block_index) } @@ -56,10 +56,10 @@ impl super::RsVec { target_feature = "avx512bw", ))] #[inline(always)] - pub(super) fn search_block0(&self, rank: usize, block_index: &mut usize) { + pub(super) fn search_block0(&self, rank: u64, block_index: &mut usize) { use std::arch::x86_64::{_mm256_cmpgt_epu16_mask, _mm256_loadu_epi16, _mm256_set1_epi16}; - if self.blocks.len() > *block_index + (SUPER_BLOCK_SIZE / BLOCK_SIZE) { + if self.blocks.len() > *block_index + (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize { debug_assert!( SUPER_BLOCK_SIZE / BLOCK_SIZE == BLOCKS_PER_SUPERBLOCK, "change unroll constant to {}", @@ -93,25 +93,25 @@ impl super::RsVec { target_feature = "avx512bw", )))] #[inline(always)] - pub(super) fn search_block0(&self, rank: usize, block_index: &mut usize) { + pub(super) fn search_block0(&self, rank: u64, block_index: &mut usize) { self.search_block0_naive(rank, block_index); } #[inline(always)] - fn search_block0_naive(&self, rank: usize, block_index: &mut usize) { + fn search_block0_naive(&self, rank: u64, block_index: &mut usize) { // full binary search for block that contains the rank, manually loop-unrolled, because // LLVM doesn't do it for us, but it gains just under 20% performance // this code relies on the fact that BLOCKS_PER_SUPERBLOCK blocks are in one superblock debug_assert!( - SUPER_BLOCK_SIZE / BLOCK_SIZE == BLOCKS_PER_SUPERBLOCK, + (SUPER_BLOCK_SIZE / BLOCK_SIZE) == BLOCKS_PER_SUPERBLOCK, "change unroll constant to {}", 64 - (SUPER_BLOCK_SIZE / BLOCK_SIZE).leading_zeros() - 1 ); unroll!(4, - |boundary = { (SUPER_BLOCK_SIZE / BLOCK_SIZE) / 2}| + |boundary = { (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize / 2}| // do not use select_unpredictable here, it degrades performance - if self.blocks.len() > *block_index + boundary && rank >= self.blocks[*block_index + boundary].zeros as usize { + if self.blocks.len() > *block_index + boundary && rank >= self.blocks[*block_index + boundary].zeros as u64 { *block_index += boundary; }, boundary /= 2); @@ -126,7 +126,7 @@ impl super::RsVec { /// * `block_index` - the index of the block to search in, this is the block in the blocks /// vector that contains the rank #[inline(always)] - pub(super) fn search_word_in_block0(&self, mut rank: usize, block_index: usize) -> usize { + pub(super) fn search_word_in_block0(&self, mut rank: u64, block_index: usize) -> u64 { // linear search for word that contains the rank. Binary search is not possible here, // because we don't have accumulated popcounts for the words. We use pdep to find the // position of the rank-th zero bit in the word, if the word contains enough zeros, otherwise @@ -134,24 +134,24 @@ impl super::RsVec { let mut index_counter = 0; debug_assert!(BLOCK_SIZE / WORD_SIZE == 8, "change unroll constant"); unroll!(7, |n = {0}| { - let word = self.data[block_index * BLOCK_SIZE / WORD_SIZE + n]; - if (word.count_zeros() as usize) <= rank { - rank -= word.count_zeros() as usize; + let word = self.data[block_index * (BLOCK_SIZE / WORD_SIZE) as usize + n]; + if (word.count_zeros() as u64) <= rank { + rank -= word.count_zeros() as u64; index_counter += WORD_SIZE; } else { - return block_index * BLOCK_SIZE + return block_index as u64 * BLOCK_SIZE + index_counter - + (1 << rank).pdep(!word).trailing_zeros() as usize; + + (1 << rank).pdep(!word).trailing_zeros() as u64; } }, n += 1); // the last word must contain the rank-th zero bit, otherwise the rank is outside the // block, and thus outside the bitvector - block_index * BLOCK_SIZE + block_index as u64 * BLOCK_SIZE + index_counter + (1 << rank) - .pdep(!self.data[block_index * BLOCK_SIZE / WORD_SIZE + 7]) - .trailing_zeros() as usize + .pdep(!self.data[block_index * (BLOCK_SIZE / WORD_SIZE) as usize + 7]) + .trailing_zeros() as u64 } /// Search for the superblock that contains the rank. @@ -162,8 +162,9 @@ impl super::RsVec { /// superblock in the ``select_blocks`` vector that contains the rank /// * `rank` - the rank to search for #[inline(always)] - pub(super) fn search_super_block0(&self, mut super_block: usize, rank: usize) -> usize { - let mut upper_bound = self.select_blocks[rank / SELECT_BLOCK_SIZE + 1].index_0; + #[allow(clippy::cast_possible_truncation)] // safe due to the division + pub(super) fn search_super_block0(&self, mut super_block: usize, rank: u64) -> usize { + let mut upper_bound = self.select_blocks[(rank / SELECT_BLOCK_SIZE + 1) as usize].index_0; while upper_bound - super_block > 8 { let middle = super_block + ((upper_bound - super_block) >> 1); @@ -192,31 +193,31 @@ impl super::RsVec { /// If the rank is larger than the number of 1-bits in the bit-vector, the vector length is returned. #[must_use] #[allow(clippy::assertions_on_constants)] - pub fn select1(&self, mut rank: usize) -> usize { + pub fn select1(&self, mut rank: u64) -> u64 { if rank >= self.rank1 { return self.len; } - let mut super_block = - self.select_blocks[rank / crate::bit_vec::fast_rs_vec::SELECT_BLOCK_SIZE].index_1; + let mut super_block = self.select_blocks[(rank / SELECT_BLOCK_SIZE) as usize].index_1; if self.super_blocks.len() > (super_block + 1) - && ((super_block + 1) * SUPER_BLOCK_SIZE - self.super_blocks[super_block + 1].zeros) + && ((super_block + 1) as u64 * SUPER_BLOCK_SIZE + - self.super_blocks[super_block + 1].zeros) <= rank { super_block = self.search_super_block1(super_block, rank); } - rank -= (super_block) * SUPER_BLOCK_SIZE - self.super_blocks[super_block].zeros; + rank -= super_block as u64 * SUPER_BLOCK_SIZE - self.super_blocks[super_block].zeros; // full binary search for block that contains the rank, manually loop-unrolled, because // LLVM doesn't do it for us, but it gains just under 20% performance - let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE); + let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize; let mut block_index = block_at_super_block; self.search_block1(rank, block_at_super_block, &mut block_index); - rank -= (block_index - block_at_super_block) * BLOCK_SIZE - - self.blocks[block_index].zeros as usize; + rank -= (block_index - block_at_super_block) as u64 * BLOCK_SIZE + - self.blocks[block_index].zeros as u64; self.search_word_in_block1(rank, block_index) } @@ -240,7 +241,7 @@ impl super::RsVec { #[inline(always)] pub(super) fn search_block1( &self, - rank: usize, + rank: u64, block_at_super_block: usize, block_index: &mut usize, ) { @@ -249,7 +250,7 @@ impl super::RsVec { _mm256_sub_epi16, }; - if self.blocks.len() > *block_index + BLOCKS_PER_SUPERBLOCK { + if self.blocks.len() > *block_index + BLOCKS_PER_SUPERBLOCK as usize { debug_assert!( SUPER_BLOCK_SIZE / BLOCK_SIZE == BLOCKS_PER_SUPERBLOCK, "change unroll constant to {}", @@ -257,6 +258,7 @@ impl super::RsVec { ); unsafe { + #[allow(clippy::cast_possible_truncation)] // false positive because constants let bit_nums = _mm256_set_epi16( (15 * BLOCK_SIZE) as i16, (14 * BLOCK_SIZE) as i16, @@ -273,7 +275,7 @@ impl super::RsVec { (3 * BLOCK_SIZE) as i16, (2 * BLOCK_SIZE) as i16, (1 * BLOCK_SIZE) as i16, - (0 * BLOCK_SIZE) as i16, + 0i16, ); let blocks = _mm256_loadu_epi16(self.blocks[*block_index..].as_ptr() as *const i16); @@ -307,7 +309,7 @@ impl super::RsVec { #[inline(always)] pub(super) fn search_block1( &self, - rank: usize, + rank: u64, block_at_super_block: usize, block_index: &mut usize, ) { @@ -315,25 +317,20 @@ impl super::RsVec { } #[inline(always)] - fn search_block1_naive( - &self, - rank: usize, - block_at_super_block: usize, - block_index: &mut usize, - ) { + fn search_block1_naive(&self, rank: u64, block_at_super_block: usize, block_index: &mut usize) { // full binary search for block that contains the rank, manually loop-unrolled, because // LLVM doesn't do it for us, but it gains just under 20% performance // this code relies on the fact that BLOCKS_PER_SUPERBLOCK blocks are in one superblock debug_assert!( - SUPER_BLOCK_SIZE / BLOCK_SIZE == BLOCKS_PER_SUPERBLOCK, + (SUPER_BLOCK_SIZE / BLOCK_SIZE) == BLOCKS_PER_SUPERBLOCK, "change unroll constant to {}", 64 - (SUPER_BLOCK_SIZE / BLOCK_SIZE).leading_zeros() - 1 ); unroll!(4, - |boundary = { (SUPER_BLOCK_SIZE / BLOCK_SIZE) / 2}| + |boundary = { (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize / 2}| // do not use select_unpredictable here, it degrades performance - if self.blocks.len() > *block_index + boundary && rank >= (*block_index + boundary - block_at_super_block) * BLOCK_SIZE - self.blocks[*block_index + boundary].zeros as usize { + if self.blocks.len() > *block_index + boundary && rank >= (*block_index + boundary - block_at_super_block) as u64 * BLOCK_SIZE - self.blocks[*block_index + boundary].zeros as u64 { *block_index += boundary; }, boundary /= 2); @@ -348,7 +345,7 @@ impl super::RsVec { /// * `block_index` - the index of the block to search in, this is the block in the blocks /// vector that contains the rank #[inline(always)] - pub(super) fn search_word_in_block1(&self, mut rank: usize, block_index: usize) -> usize { + pub(super) fn search_word_in_block1(&self, mut rank: u64, block_index: usize) -> u64 { // linear search for word that contains the rank. Binary search is not possible here, // because we don't have accumulated popcounts for the words. We use pdep to find the // position of the rank-th zero bit in the word, if the word contains enough zeros, otherwise @@ -356,24 +353,24 @@ impl super::RsVec { let mut index_counter = 0; debug_assert!(BLOCK_SIZE / WORD_SIZE == 8, "change unroll constant"); unroll!(7, |n = {0}| { - let word = self.data[block_index * BLOCK_SIZE / WORD_SIZE + n]; - if (word.count_ones() as usize) <= rank { - rank -= word.count_ones() as usize; + let word = self.data[block_index * (BLOCK_SIZE / WORD_SIZE) as usize + n]; + if (word.count_ones() as u64) <= rank { + rank -= word.count_ones() as u64; index_counter += WORD_SIZE; } else { - return block_index * BLOCK_SIZE + return block_index as u64 * BLOCK_SIZE + index_counter - + (1 << rank).pdep(word).trailing_zeros() as usize; + + (1 << rank).pdep(word).trailing_zeros() as u64; } }, n += 1); // the last word must contain the rank-th zero bit, otherwise the rank is outside of the // block, and thus outside of the bitvector - block_index * BLOCK_SIZE + block_index as u64 * BLOCK_SIZE + index_counter + (1 << rank) - .pdep(self.data[block_index * BLOCK_SIZE / WORD_SIZE + 7]) - .trailing_zeros() as usize + .pdep(self.data[block_index * (BLOCK_SIZE / WORD_SIZE) as usize + 7]) + .trailing_zeros() as u64 } /// Search for the superblock that contains the rank. @@ -384,14 +381,15 @@ impl super::RsVec { /// superblock in the ``select_blocks`` vector that contains the rank /// * `rank` - the rank to search for #[inline(always)] - pub(super) fn search_super_block1(&self, mut super_block: usize, rank: usize) -> usize { - let mut upper_bound = self.select_blocks[rank / SELECT_BLOCK_SIZE + 1].index_1; + #[allow(clippy::cast_possible_truncation)] // safe due to the division + pub(super) fn search_super_block1(&self, mut super_block: usize, rank: u64) -> usize { + let mut upper_bound = self.select_blocks[(rank / SELECT_BLOCK_SIZE + 1) as usize].index_1; // binary search for superblock that contains the rank while upper_bound - super_block > 8 { let middle = super_block + ((upper_bound - super_block) >> 1); // using select_unpredictable does nothing here, likely because the search isn't hot - if ((middle + 1) * SUPER_BLOCK_SIZE - self.super_blocks[middle].zeros) <= rank { + if ((middle + 1) as u64 * SUPER_BLOCK_SIZE - self.super_blocks[middle].zeros) <= rank { super_block = middle; } else { upper_bound = middle; @@ -399,7 +397,8 @@ impl super::RsVec { } // linear search for superblock that contains the rank while self.super_blocks.len() > (super_block + 1) - && ((super_block + 1) * SUPER_BLOCK_SIZE - self.super_blocks[super_block + 1].zeros) + && ((super_block + 1) as u64 * SUPER_BLOCK_SIZE + - self.super_blocks[super_block + 1].zeros) <= rank { super_block += 1; diff --git a/src/bit_vec/fast_rs_vec/tests.rs b/src/bit_vec/rs/tests.rs similarity index 97% rename from src/bit_vec/fast_rs_vec/tests.rs rename to src/bit_vec/rs/tests.rs index 1858248..c8537b2 100644 --- a/src/bit_vec/fast_rs_vec/tests.rs +++ b/src/bit_vec/rs/tests.rs @@ -23,7 +23,7 @@ fn test_random_data_rank() { 6, 7, ]); let sample = Uniform::new(0, 2); - static LENGTH: usize = 4 * SUPER_BLOCK_SIZE; + static LENGTH: u64 = 4 * SUPER_BLOCK_SIZE; for _ in 0..LENGTH { bv.append_bit(sample.sample(&mut rng)); @@ -42,17 +42,17 @@ fn test_random_data_rank() { let mut expected_rank1 = 0; let mut expected_rank0 = 0; - let data_index = rnd_index / WORD_SIZE; + let data_index = (rnd_index / WORD_SIZE) as usize; let bit_index = rnd_index % WORD_SIZE; for v in data.iter().take(data_index) { - expected_rank1 += v.count_ones() as usize; - expected_rank0 += v.count_zeros() as usize; + expected_rank1 += v.count_ones() as u64; + expected_rank0 += v.count_zeros() as u64; } if bit_index > 0 { - expected_rank1 += (data[data_index] & ((1 << bit_index) - 1)).count_ones() as usize; - expected_rank0 += (!data[data_index] & ((1 << bit_index) - 1)).count_ones() as usize; + expected_rank1 += (data[data_index] & ((1 << bit_index) - 1)).count_ones() as u64; + expected_rank0 += (!data[data_index] & ((1 << bit_index) - 1)).count_ones() as u64; } assert_eq!(actual_rank1, expected_rank1); @@ -205,13 +205,13 @@ fn test_only_ones_select() { #[test] fn random_data_select0() { + static LENGTH: u64 = 4 * SUPER_BLOCK_SIZE; let mut bv = BitVec::with_capacity(LENGTH); let mut rng = StdRng::from_seed([ 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, ]); let sample = Uniform::new(0, 2); - static LENGTH: usize = 4 * SUPER_BLOCK_SIZE; for _ in 0..LENGTH { bv.append_bit_u8(sample.sample(&mut rng) as u8); @@ -231,7 +231,7 @@ fn random_data_select0() { let mut index = 0; loop { - let zeros = data[index].count_zeros() as usize; + let zeros = data[index].count_zeros() as u64; if rank_counter + zeros > rnd_rank0 { break; } else { @@ -260,13 +260,13 @@ fn random_data_select0() { #[test] fn random_data_select1() { + static LENGTH: u64 = 4 * SUPER_BLOCK_SIZE; let mut bv = BitVec::with_capacity(LENGTH); let mut rng = StdRng::from_seed([ 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, ]); let sample = Uniform::new(0, 2); - static LENGTH: usize = 4 * SUPER_BLOCK_SIZE; for _ in 0..LENGTH { bv.append_bit_u8(sample.sample(&mut rng) as u8); @@ -286,7 +286,7 @@ fn random_data_select1() { let mut index = 0; loop { - let ones = data[index].count_ones() as usize; + let ones = data[index].count_ones() as u64; if rank_counter + ones > rnd_rank1 { break; } else { @@ -1211,8 +1211,8 @@ fn test_random_data_iter_both_ends() { } let bv = RsVec::from_bit_vec(bv); - let mut zeros = Vec::with_capacity(bv.rank0); - let mut ones = Vec::with_capacity(bv.rank1); + let mut zeros = Vec::with_capacity(bv.rank0 as usize); + let mut ones = Vec::with_capacity(bv.rank1 as usize); let mut iter0 = bv.iter0(); let mut iter1 = bv.iter1(); @@ -1226,7 +1226,7 @@ fn test_random_data_iter_both_ends() { } zeros.sort(); zeros.dedup(); - assert_eq!(zeros.len(), bv.rank0); + assert_eq!(zeros.len() as u64, bv.rank0); for _ in 0..bv.rank1 { ones.push(if sample.sample(&mut rng) < 50 { @@ -1237,7 +1237,7 @@ fn test_random_data_iter_both_ends() { } ones.sort(); ones.dedup(); - assert_eq!(ones.len(), bv.rank1); + assert_eq!(ones.len() as u64, bv.rank1); for idx in ones { assert_eq!(bv.get(idx), Some(1), "bit {} is not 1", idx); @@ -1254,7 +1254,7 @@ fn test_random_data_iter_both_ends() { // test a randomly generated bit vector for correct values in blocks #[test] fn test_block_layout() { - static LENGTH: usize = 4 * SUPER_BLOCK_SIZE; + static LENGTH: u64 = 4 * SUPER_BLOCK_SIZE; let mut bv = BitVec::with_capacity(LENGTH); let mut rng = StdRng::from_seed([ 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, @@ -1271,7 +1271,7 @@ fn test_block_layout() { let mut zero_counter = 0u32; for (block_index, block) in bv.blocks.iter().enumerate() { - if block_index % (SUPER_BLOCK_SIZE / BLOCK_SIZE) == 0 { + if block_index % (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize == 0 { zero_counter = 0; } assert_eq!( @@ -1281,9 +1281,9 @@ fn test_block_layout() { block_index, bv.blocks.len() ); - for word in bv.data[block_index * BLOCK_SIZE / WORD_SIZE..] + for word in bv.data[block_index * (BLOCK_SIZE / WORD_SIZE) as usize..] .iter() - .take(BLOCK_SIZE / WORD_SIZE) + .take((BLOCK_SIZE / WORD_SIZE) as usize) { zero_counter += word.count_zeros(); } @@ -1293,7 +1293,7 @@ fn test_block_layout() { // Github issue https://github.com/Cydhra/vers/issues/6 regression test #[test] fn test_iter1_regression_i6() { - static LENGTH: usize = 4 * SUPER_BLOCK_SIZE; + static LENGTH: u64 = 4 * SUPER_BLOCK_SIZE; let mut bv = BitVec::with_capacity(LENGTH); let mut rng = StdRng::from_seed([ 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, @@ -1319,7 +1319,7 @@ fn test_iter1_regression_i6() { let mut all_bits: Vec<_> = bv.iter0().chain(bv.iter1()).collect(); all_bits.sort(); - assert_eq!(all_bits.len(), LENGTH); + assert_eq!(all_bits.len() as u64, LENGTH); } // Github issue https://github.com/Cydhra/vers/issues/8 regression test diff --git a/src/bit_vec/sparse.rs b/src/bit_vec/sparse.rs index bc0dbe5..b422489 100644 --- a/src/bit_vec/sparse.rs +++ b/src/bit_vec/sparse.rs @@ -2,7 +2,7 @@ //! The vector requires `O(n log u/n) + 2n + o(n)` bits of space, where `n` is the number of bits in the vector //! and `u` is the number of 1-bits. //! The vector is constructed from a sorted list of indices of 1-bits, or from an existing -//! [`BitVec`](crate::BitVec). +//! [`BitVec`]. use crate::{BitVec, EliasFanoVec}; @@ -15,9 +15,9 @@ use crate::{BitVec, EliasFanoVec}; /// /// # Examples /// ``` -/// use vers_vecs::SparseRSVec; +/// use vers_vecs::SparseRsVec; /// -/// let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12); +/// let sparse = SparseRsVec::new(&[1, 3, 5, 7, 9], 12); /// assert_eq!(sparse.get(5), Some(1)); /// assert_eq!(sparse.get(11), Some(0)); /// assert_eq!(sparse.get(12), None); @@ -28,14 +28,14 @@ use crate::{BitVec, EliasFanoVec}; /// /// It cn also be constructed from a `BitVec` directly: /// ``` -/// use vers_vecs::SparseRSVec; +/// use vers_vecs::SparseRsVec; /// use vers_vecs::BitVec; /// /// let mut bv = BitVec::from_zeros(12); /// bv.flip_bit(6); /// bv.flip_bit(7); /// -/// let sparse = SparseRSVec::from_bitvec(&bv); +/// let sparse = SparseRsVec::from_bitvec(&bv); /// assert_eq!(sparse.rank1(5), 0); /// assert_eq!(sparse.select1(0), 6); /// ``` @@ -44,12 +44,12 @@ use crate::{BitVec, EliasFanoVec}; /// [`from_bitvec_inverted`]: #method.from_bitvec_inverted #[derive(Debug, Clone)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -pub struct SparseRSVec { +pub struct SparseRsVec { vec: EliasFanoVec, len: u64, } -impl SparseRSVec { +impl SparseRsVec { /// Creates a new `SparseRSVec` from a sequence of set bits represented as indices. /// The input must be sorted in ascending order and free of duplicates. /// @@ -80,7 +80,7 @@ impl SparseRSVec { /// - `input`: The input `BitVec` to compress. #[must_use] pub fn from_bitvec(input: &BitVec) -> Self { - let len = input.len() as u64; + let len = input.len(); Self::new( input .iter() @@ -109,7 +109,7 @@ impl SparseRSVec { /// /// # Example /// ``` - /// use vers_vecs::SparseRSVec; + /// use vers_vecs::SparseRsVec; /// use vers_vecs::BitVec; /// /// let mut bv = BitVec::from_ones(12); @@ -117,7 +117,7 @@ impl SparseRSVec { /// bv.flip_bit(6); /// bv.flip_bit(7); /// - /// let sparse = SparseRSVec::from_bitvec_inverted(&bv); + /// let sparse = SparseRsVec::from_bitvec_inverted(&bv); /// // now select1 gives the position of 0-bits /// assert_eq!(sparse.select1(1), 7); /// ``` @@ -127,7 +127,7 @@ impl SparseRSVec { /// [`get`]: #method.get #[must_use] pub fn from_bitvec_inverted(input: &BitVec) -> Self { - let len = input.len() as u64; + let len = input.len(); Self::new( input .iter() @@ -170,6 +170,8 @@ impl SparseRSVec { /// # Panics /// If `i` is out of bounds the function might panic or produce incorrect results. /// Use [`get`] for a checked version. + /// + /// [`get`]: Self::get #[must_use] pub fn get_unchecked(&self, i: u64) -> u64 { self.is_set_unchecked(i).into() @@ -188,7 +190,7 @@ impl SparseRSVec { /// /// If the rank is larger than the number of sparse bits in the vector, the vector length is returned. #[must_use] - pub fn select1(&self, i: usize) -> u64 { + pub fn select1(&self, i: u64) -> u64 { self.vec.get(i).unwrap_or(self.len) } @@ -238,13 +240,13 @@ impl SparseRSVec { } } -impl From for SparseRSVec { +impl From for SparseRsVec { fn from(input: BitVec) -> Self { Self::from_bitvec_inverted(&input) } } -impl<'a> From<&'a BitVec> for SparseRSVec { +impl<'a> From<&'a BitVec> for SparseRsVec { fn from(input: &'a BitVec) -> Self { Self::from_bitvec_inverted(input) } @@ -252,14 +254,14 @@ impl<'a> From<&'a BitVec> for SparseRSVec { #[cfg(test)] mod tests { - use super::SparseRSVec; + use super::SparseRsVec; use crate::BitVec; use rand::prelude::StdRng; use rand::{Rng, SeedableRng}; #[test] fn test_sparse_rank() { - let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12); + let sparse = SparseRsVec::new(&[1, 3, 5, 7, 9], 12); assert_eq!(sparse.rank1(0), 0); assert_eq!(sparse.rank1(1), 0); assert_eq!(sparse.rank1(2), 1); @@ -278,7 +280,7 @@ mod tests { #[test] fn test_sparse_select() { - let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12); + let sparse = SparseRsVec::new(&[1, 3, 5, 7, 9], 12); assert_eq!(sparse.select1(0), 1); assert_eq!(sparse.select1(1), 3); assert_eq!(sparse.select1(2), 5); @@ -290,7 +292,7 @@ mod tests { #[test] fn test_sparse_rank0() { - let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12); + let sparse = SparseRsVec::new(&[1, 3, 5, 7, 9], 12); assert_eq!(sparse.rank0(0), 0); assert_eq!(sparse.rank0(1), 1); assert_eq!(sparse.rank0(2), 1); @@ -309,7 +311,7 @@ mod tests { #[test] fn test_empty_sparse() { - let sparse = SparseRSVec::new(&[], 0); + let sparse = SparseRsVec::new(&[], 0); assert_eq!(sparse.rank1(0), 0); assert_eq!(sparse.rank1(1), 0); assert_eq!(sparse.rank1(999), 0); @@ -325,7 +327,7 @@ mod tests { #[test] fn test_sparse_get() { - let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12); + let sparse = SparseRsVec::new(&[1, 3, 5, 7, 9], 12); assert_eq!(sparse.get(0), Some(0)); assert_eq!(sparse.get(1), Some(1)); assert_eq!(sparse.get(2), Some(0)); @@ -348,7 +350,7 @@ mod tests { bv.flip_bit(6); bv.flip_bit(7); - let sparse = SparseRSVec::from_bitvec(&bv); + let sparse = SparseRsVec::from_bitvec(&bv); assert_eq!(sparse.rank1(0), 0); assert_eq!(sparse.rank1(1), 1); assert_eq!(sparse.rank1(2), 2); @@ -357,7 +359,7 @@ mod tests { assert_eq!(sparse.rank1(9), 7); assert_eq!(sparse.rank1(12), 10); - let sparse = SparseRSVec::from_bitvec_inverted(&bv); + let sparse = SparseRsVec::from_bitvec_inverted(&bv); assert_eq!(sparse.rank1(0), 0); assert_eq!(sparse.rank1(1), 0); assert_eq!(sparse.rank1(2), 0); @@ -370,7 +372,7 @@ mod tests { #[test] fn test_large_block() { // test that the implementation works correctly if the search triggers a binary search - let sparse = SparseRSVec::new( + let sparse = SparseRsVec::new( &[ 1, 100_000, 100_001, 100_002, 100_003, 100_004, 100_005, 100_006, 100_007, 100_008, 100_009, 100_010, 1_000_000, @@ -383,7 +385,7 @@ mod tests { #[test] fn test_fuzzy() { - const L: usize = 100_000; + const L: u64 = 100_000; let mut bv = BitVec::from_zeros(L); let mut rng = StdRng::from_seed([0; 32]); @@ -391,15 +393,15 @@ mod tests { bv.flip_bit(rng.gen_range(0..L)); } - let sparse = SparseRSVec::from_bitvec(&bv); + let sparse = SparseRsVec::from_bitvec(&bv); let mut ones = 0; for i in 0..L { - assert_eq!(bv.get(i), sparse.get(i as u64)); - assert_eq!(ones, sparse.rank1(i as u64)); - assert_eq!(i as u64 - ones, sparse.rank0(i as u64)); + assert_eq!(bv.get(i), sparse.get(i)); + assert_eq!(ones, sparse.rank1(i)); + assert_eq!(i - ones, sparse.rank0(i)); if bv.get(i) == Some(1) { - assert_eq!(i, sparse.select1(ones as usize).try_into().unwrap()); + assert_eq!(i, sparse.select1(ones).try_into().unwrap()); ones += 1; } } @@ -416,7 +418,7 @@ mod tests { bv.append_bit(0); bv.drop_last(1); - let sparse = SparseRSVec::from_bitvec(&bv); + let sparse = SparseRsVec::from_bitvec(&bv); assert_eq!(sparse.len(), 2); assert_eq!(sparse.get(0), Some(1)); assert_eq!(sparse.get(1), Some(0)); diff --git a/src/bit_vec/tests.rs b/src/bit_vec/tests.rs index 0fa3f6d..345906c 100644 --- a/src/bit_vec/tests.rs +++ b/src/bit_vec/tests.rs @@ -475,7 +475,7 @@ fn test_apply_masks() { #[test] fn test_from_bits() { - let bv = BitVec::from_bits(&[1, 0, 1]); + let bv = BitVec::from_bits_u8(&[1, 0, 1]); assert_eq!(bv.len, 3); assert_eq!(bv.get_bits(0, 3), Some(0b101)); @@ -619,8 +619,8 @@ fn test_unpack() { let bv = BitVec::pack_sequence_u64(&sequence, 10); for (i, &val) in sequence.iter().enumerate() { - assert_eq!(bv.unpack_element(i, 10), Some(val)); - assert_eq!(bv.unpack_element_unchecked(i, 10), val); + assert_eq!(bv.unpack_element(i as u64, 10), Some(val)); + assert_eq!(bv.unpack_element_unchecked(i as u64, 10), val); } assert_eq!(bv.unpack_element(8, 10), None); diff --git a/src/elias_fano/mod.rs b/src/ef/mod.rs similarity index 88% rename from src/elias_fano/mod.rs rename to src/ef/mod.rs index 75b009c..28aa4c6 100644 --- a/src/elias_fano/mod.rs +++ b/src/ef/mod.rs @@ -17,7 +17,7 @@ use std::cmp::max; /// friendly. But for large clusters this takes too long, so we switch to binary search. /// We use 4 because benchmarks suggested that this was the best trade-off between speed for average /// case and for worst case. -const BIN_SEARCH_THRESHOLD: usize = 4; +const BIN_SEARCH_THRESHOLD: u64 = 4; /// An Elias-Fano encoded vector of u64 values. The vector is immutable, which is exploited by /// limiting the word length of elements to the minimum required to represent all elements. @@ -61,8 +61,8 @@ pub struct EliasFanoVec { lower_vec: BitVec, universe_zero: u64, universe_max: u64, - lower_len: usize, - len: usize, + lower_len: u64, + len: u64, } impl EliasFanoVec { @@ -102,23 +102,23 @@ impl EliasFanoVec { let universe_zero = data[0]; let universe_bound = data[data.len() - 1] - universe_zero; - let log_n = ((data.len() + 2) as f64).log2().ceil() as usize; - let bits_per_number = (max(universe_bound, 2) as f64).log2().ceil() as usize; - let bits_for_upper_values = (max(data.len(), 2) as f64).log2().ceil() as usize; + let log_n = ((data.len() + 2) as f64).log2().ceil() as u64; + let bits_per_number = (max(universe_bound, 2) as f64).log2().ceil() as u64; + let bits_for_upper_values = (max(data.len(), 2) as f64).log2().ceil() as u64; let lower_width = max(bits_per_number, log_n) - bits_for_upper_values; assert!(lower_width < 64); let mut upper_vec = - BitVec::from_zeros(2 + data.len() + (universe_bound >> lower_width) as usize); - let mut lower_vec = BitVec::with_capacity(data.len() * lower_width); + BitVec::from_zeros(2 + data.len() as u64 + (universe_bound >> lower_width)); + let mut lower_vec = BitVec::with_capacity(data.len() as u64 * lower_width); for (i, &word) in data.iter().enumerate() { let word = word - universe_zero; - let upper = (word >> lower_width) as usize; + let upper = word >> lower_width; let lower = word & ((1 << lower_width) - 1); - upper_vec.flip_bit_unchecked(upper + i + 1); + upper_vec.flip_bit_unchecked(upper + i as u64 + 1); lower_vec.append_bits_unchecked(lower, lower_width); } @@ -128,13 +128,13 @@ impl EliasFanoVec { universe_zero, universe_max: data[data.len() - 1], lower_len: lower_width, - len: data.len(), + len: data.len() as u64, } } /// Returns the number of elements in the vector. #[must_use] - pub fn len(&self) -> usize { + pub fn len(&self) -> u64 { self.len } @@ -147,7 +147,7 @@ impl EliasFanoVec { /// Returns the element at the given index, or `None` if the index exceeds the length of the /// vector. #[must_use] - pub fn get(&self, index: usize) -> Option { + pub fn get(&self, index: u64) -> Option { if index >= self.len() { return None; } @@ -164,8 +164,10 @@ impl EliasFanoVec { /// /// Note, that select in bit-vectors returns an index, while select in Elias-Fano returns the /// element at the given rank. + /// + /// [`get`]: Self::get #[must_use] - pub fn select(&self, rank: usize) -> Option { + pub fn select(&self, rank: u64) -> Option { self.get(rank) } @@ -178,12 +180,12 @@ impl EliasFanoVec { /// [`get`]: EliasFanoVec::get #[must_use] #[allow(clippy::cast_possible_truncation)] - pub fn get_unchecked(&self, index: usize) -> u64 { + pub fn get_unchecked(&self, index: u64) -> u64 { let upper = self.upper_vec.select1(index) - index - 1; let lower = self .lower_vec .get_bits_unchecked(index * self.lower_len, self.lower_len); - ((upper << self.lower_len) as u64 | lower) + self.universe_zero + ((upper << self.lower_len) | lower) + self.universe_zero } /// Returns the largest element that is smaller than or equal to the query. @@ -214,15 +216,15 @@ impl EliasFanoVec { #[allow(clippy::cast_possible_truncation)] // we will fix this in a breaking update fn search_element_in_block( &self, - start_index_upper: usize, - start_index_lower: usize, + start_index_upper: u64, + start_index_lower: u64, query: u64, query_upper: u64, query_lower: u64, query_masked_upper: u64, ) -> u64 { // the direction in which we search for the element, dependent on the UPWARD flag - let direction: isize = if UPWARD { 1 } else { -1 }; + let direction: i64 = if UPWARD { 1 } else { -1 }; // the function to check if the current candidate no longer fulfills the query // criterion @@ -246,12 +248,12 @@ impl EliasFanoVec { // last element. if self .upper_vec - .get_unchecked((start_index_upper as isize + direction) as usize) + .get_unchecked((start_index_upper as i64 + direction) as u64) > 0 { // get the first value from the lower vector that corresponds to the query prefix let mut lower_candidate = self.lower_vec.get_bits_unchecked( - (start_index_lower as isize) as usize * self.lower_len, + (start_index_lower as i64) as u64 * self.lower_len, self.lower_len, ); @@ -263,11 +265,11 @@ impl EliasFanoVec { let mut cursor = direction; while self .upper_vec - .get_unchecked((start_index_upper as isize + cursor + direction) as usize) + .get_unchecked((start_index_upper as i64 + cursor + direction) as u64) > 0 { let next_candidate = self.lower_vec.get_bits_unchecked( - (start_index_lower as isize + cursor) as usize * self.lower_len, + (start_index_lower as i64 + cursor) as u64 * self.lower_len, self.lower_len, ); @@ -277,13 +279,13 @@ impl EliasFanoVec { || (!UPWARD && next_candidate < query_lower) { return if INDEX { - start_index_lower as u64 + cursor as u64 + start_index_lower + cursor as u64 } else { (query_masked_upper | lower_candidate) + self.universe_zero }; } else if next_candidate == query_lower { return if INDEX { - start_index_lower as u64 + cursor as u64 + start_index_lower + cursor as u64 } else { (query_masked_upper | next_candidate) + self.universe_zero }; @@ -297,23 +299,20 @@ impl EliasFanoVec { #[allow(clippy::comparison_chain)] // readability if cursor.unsigned_abs() == BIN_SEARCH_THRESHOLD { let block_end = if UPWARD { - self.upper_vec.select0((query_upper as isize + 1) as usize) - - query_upper as usize + self.upper_vec.select0((query_upper as i64 + 1) as u64) + - query_upper - 2 } else { - self.upper_vec.select0((query_upper as isize) as usize) - - query_upper as usize + self.upper_vec.select0((query_upper as i64) as u64) - query_upper }; let mut upper_bound; let mut lower_bound; if UPWARD { upper_bound = block_end; - lower_bound = - (start_index_lower as isize + cursor - direction) as usize; + lower_bound = (start_index_lower as i64 + cursor - direction) as u64; } else { - upper_bound = - (start_index_lower as isize + cursor - direction) as usize; + upper_bound = (start_index_lower as i64 + cursor - direction) as u64; lower_bound = block_end; } @@ -332,10 +331,10 @@ impl EliasFanoVec { upper_bound = middle; } else if middle_candidate == query_lower { return if INDEX { - cursor = middle as isize; + cursor = middle as i64; // while the element at cursor - 1 is equal, reduce cursor while self.lower_vec.get_bits_unchecked( - (cursor - direction) as usize * self.lower_len, + (cursor - direction) as u64 * self.lower_len, self.lower_len, ) == query_lower { @@ -362,7 +361,7 @@ impl EliasFanoVec { || (!UPWARD && final_bound > block_end) { let check_candidate = self.lower_vec.get_bits_unchecked( - (final_bound as isize + direction) as usize * self.lower_len, + (final_bound as i64 + direction) as u64 * self.lower_len, self.lower_len, ); @@ -371,7 +370,7 @@ impl EliasFanoVec { // if the element at lower_bound + 1 is smaller than the query, we include it // in the count, so we return lower_bound + 1 + 1, as all elements in the // 1-block are smaller than the query - (final_bound as isize + direction + 1) as u64 + (final_bound as i64 + direction + 1) as u64 } else { (query_masked_upper | check_candidate) + self.universe_zero }; @@ -380,7 +379,7 @@ impl EliasFanoVec { // update the cursor because we use it for the final index calculation if INDEX { - cursor = final_bound as isize + direction; + cursor = final_bound as i64 + direction; } break; } @@ -390,7 +389,7 @@ impl EliasFanoVec { // the loop ended because the element at cursor has a larger upper index, // so we return the previous element count // (element at curser - 1, +1 because count is not 0 based) - start_index_lower as u64 + cursor as u64 + start_index_lower + cursor as u64 } else { (query_masked_upper | lower_candidate) + self.universe_zero }; @@ -401,9 +400,9 @@ impl EliasFanoVec { // all elements in the 1-block are larger than the query, // so we return the last element count // (start_index_lower - 1, +1 because count is not 0 based) - start_index_lower as u64 + start_index_lower } else { - self.get_unchecked((start_index_lower as isize - direction) as usize) + self.get_unchecked((start_index_lower as i64 - direction) as u64) } } @@ -427,7 +426,7 @@ impl EliasFanoVec { let n = n - self.universe_zero; // split the query into the upper and lower part - let upper_query = (n >> self.lower_len) as usize; + let upper_query = n >> self.lower_len; let lower_query = n & ((1 << self.lower_len) - 1); // calculate the lower bound within the lower vector where our predecessor can be found. Since @@ -439,13 +438,13 @@ impl EliasFanoVec { // calculate the upper part of the result. This only works if the next value in the upper // vector is set, otherwise the there is no value in the entire vector with this bit-prefix, // and we need to search the largest prefix smaller than the query. - let result_upper = (upper_query << self.lower_len) as u64; + let result_upper = upper_query << self.lower_len; self.search_element_in_block::( lower_bound_upper_index, lower_bound_lower_index, n, - upper_query as u64, + upper_query, lower_query, result_upper, ) @@ -488,7 +487,7 @@ impl EliasFanoVec { let n = n - self.universe_zero; // split the query into the upper and lower part - let upper_query = (n >> self.lower_len) as usize; + let upper_query = n >> self.lower_len; let lower_query = n & ((1 << self.lower_len) - 1); // calculate the upper bound within the lower vector where our successor can be found. Since @@ -500,13 +499,13 @@ impl EliasFanoVec { // calculate the upper part of the result. This only works if the next value in the upper // vector is set, otherwise the there is no value in the entire vector with this bit-prefix, // and we need to search the largest prefix smaller than the query. - let result_upper = (upper_query << self.lower_len) as u64; + let result_upper = upper_query << self.lower_len; self.search_element_in_block::( upper_bound_upper_index, upper_bound_lower_index, n, - upper_query as u64, + upper_query, lower_query, result_upper, ) @@ -531,7 +530,7 @@ impl EliasFanoVec { /// assert_eq!(elias_fano_vec.delta(3), Some(80)); /// ``` #[must_use] - pub fn delta(&self, index: usize) -> Option { + pub fn delta(&self, index: u64) -> Option { if index >= self.len() { return None; } @@ -549,7 +548,7 @@ impl EliasFanoVec { ) } else { let query_upper_part = (upper_index - index - 1) << self.lower_len; - let query_number = query_upper_part as u64 + let query_number = query_upper_part | self .lower_vec .get_bits_unchecked(index * self.lower_len, self.lower_len); @@ -561,7 +560,7 @@ impl EliasFanoVec { let lower_element_upper_index = self.upper_vec.select1(index - 1); let lower_element_upper = lower_element_upper_index - (index - 1) - 1; - let lower_elem = ((lower_element_upper as u64) << self.lower_len as u64) + let lower_elem = (lower_element_upper << self.lower_len) | self .lower_vec .get_bits_unchecked((index - 1) * self.lower_len, self.lower_len); @@ -572,10 +571,9 @@ impl EliasFanoVec { /// Return how many elements strictly smaller than the query element are present in the vector. #[must_use] - #[allow(clippy::cast_possible_truncation)] // we will fix this in a breaking update pub fn rank(&self, value: u64) -> u64 { if value > self.universe_max || self.is_empty() { - return self.len() as u64; + return self.len(); } if value < self.universe_zero { @@ -585,12 +583,12 @@ impl EliasFanoVec { let value = value - self.universe_zero; let upper = value >> self.lower_len; let lower = value & ((1 << self.lower_len) - 1); - let query_begin = self.upper_vec.select0(upper as usize); - let lower_index = query_begin as u64 - upper; + let query_begin = self.upper_vec.select0(upper); + let lower_index = query_begin - upper; self.search_element_in_block::( query_begin, - lower_index as usize, + lower_index, value, upper, lower, diff --git a/src/elias_fano/tests.rs b/src/ef/tests.rs similarity index 98% rename from src/elias_fano/tests.rs rename to src/ef/tests.rs index b7b0d42..6457006 100644 --- a/src/elias_fano/tests.rs +++ b/src/ef/tests.rs @@ -62,10 +62,10 @@ fn test_randomized_elias_fano() { let ef = EliasFanoVec::from_slice(&seq); - assert_eq!(ef.len(), seq.len()); + assert_eq!(ef.len(), seq.len() as u64); for (i, &v) in seq.iter().enumerate() { - assert_eq!(ef.get_unchecked(i), v); + assert_eq!(ef.get_unchecked(i as u64), v); } for _ in 0..1000 { @@ -110,7 +110,7 @@ fn test_clustered_ef() { let ef = EliasFanoVec::from_slice(&seq); for (i, &x) in seq.iter().enumerate() { - assert_eq!(ef.get_unchecked(i), x, "expected {:b}", x); + assert_eq!(ef.get_unchecked(i as u64), x, "expected {:b}", x); assert_eq!(ef.predecessor_unchecked(x), x); assert_eq!(ef.successor_unchecked(x), x); } @@ -398,10 +398,10 @@ fn test_randomized_elias_fano_successor() { let ef = EliasFanoVec::from_slice(&seq); - assert_eq!(ef.len(), seq.len()); + assert_eq!(ef.len(), seq.len() as u64); for (i, &v) in seq.iter().enumerate() { - assert_eq!(ef.get_unchecked(i), v); + assert_eq!(ef.get_unchecked(i as u64), v); } for _ in 0..1000 { diff --git a/src/lib.rs b/src/lib.rs index 2e1c297..29e9152 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,19 +1,9 @@ -#![cfg_attr( - all( - feature = "simd", - target_arch = "x86_64", - target_feature = "avx", - target_feature = "avx2", - target_feature = "avx512f", - target_feature = "avx512bw", - ), - feature(stdarch_x86_avx512) -)] #![warn(missing_docs)] -#![allow(clippy::module_name_repetitions)] #![allow(clippy::assertions_on_constants)] // for asserts warning about incompatible constant values #![allow(clippy::inline_always)] // we actually measure performance increases with most of these -#![cfg_attr(docsrs, feature(doc_cfg), feature(doc_auto_cfg))] // for conditional compilation in docs +#![allow(clippy::cast_lossless)] // it is often more readable to use `as u64` instead of `u64::from(..)` +#![allow(clippy::needless_for_each)] // readability of one-liners +#![cfg_attr(docsrs, feature(doc_cfg))] // for conditional compilation in docs //! This crate provides a collection of data structures supported by fast implementations of //! rank and select queries. The data structures are static, meaning that they cannot be modified @@ -64,12 +54,12 @@ //! - `bp_u16_lookup` (disabled by default): Uses a 16-bit lookup table for the balanced parenthesis //! tree data structure. This is faster, but requires 128 KiB instead of 4 KiB. -pub use bit_vec::fast_rs_vec::RsVec; -pub use bit_vec::sparse::SparseRSVec; +pub use bit_vec::rs::RsVec; +pub use bit_vec::sparse::SparseRsVec; pub use bit_vec::BitVec; -pub use elias_fano::EliasFanoVec; -pub use rmq::binary_rmq::BinaryRmq; -pub use rmq::fast_rmq::FastRmq; +pub use ef::EliasFanoVec; +pub use rmq::small::SmallRmq; +pub use rmq::sparse::SparseRmq; pub use trees::bp::{BpBuilder, BpTree}; pub use trees::{IsAncestor, LevelTree, SubtreeSize, Tree, TreeBuilder}; pub use wavelet::WaveletMatrix; @@ -77,7 +67,7 @@ pub use wavelet::WaveletMatrix; pub mod bit_vec; #[forbid(unsafe_code)] -pub mod elias_fano; +pub mod ef; #[forbid(unsafe_code)] pub mod rmq; diff --git a/src/rmq/mod.rs b/src/rmq/mod.rs index e7064ce..196c901 100644 --- a/src/rmq/mod.rs +++ b/src/rmq/mod.rs @@ -1,7 +1,7 @@ //! Range minimum query data structures. These data structures allow for the calculation of the index of the //! minimum element in a range of a static array in constant-time. The implementations are located -//! in the [`binary_rmq`] and [`fast_rmq`] modules. +//! in the [`sparse`] and [`small`] modules. -pub mod fast_rmq; +pub mod small; -pub mod binary_rmq; +pub mod sparse; diff --git a/src/rmq/fast_rmq/mod.rs b/src/rmq/small/mod.rs similarity index 92% rename from src/rmq/fast_rmq/mod.rs rename to src/rmq/small/mod.rs index c451ce3..246c9b4 100644 --- a/src/rmq/fast_rmq/mod.rs +++ b/src/rmq/small/mod.rs @@ -7,7 +7,7 @@ use std::cmp::min_by; use std::mem::size_of; use std::ops::{Bound, Deref, RangeBounds}; -use crate::rmq::binary_rmq::BinaryRmq; +use crate::rmq::sparse::SparseRmq; use crate::util::pdep::Pdep; /// Size of the blocks the data is split into. One block is indexable with a u8, hence its size. @@ -66,10 +66,10 @@ struct Block { /// /// # Example /// ```rust -/// use vers_vecs::FastRmq; +/// use vers_vecs::SmallRmq; /// /// let data = vec![4, 10, 3, 11, 2, 12]; -/// let rmq = FastRmq::from_vec(data); +/// let rmq = SmallRmq::from_vec(data); /// /// assert_eq!(rmq.range_min(0, 1), 0); /// assert_eq!(rmq.range_min(0, 2), 2); @@ -77,17 +77,17 @@ struct Block { /// ``` #[derive(Clone, Debug)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -pub struct FastRmq { - data: Vec, - block_minima: BinaryRmq, - block_min_indices: Vec, - blocks: Vec, +pub struct SmallRmq { + data: Box<[u64]>, + block_minima: SparseRmq, + block_min_indices: Box<[u8]>, + blocks: Box<[Block]>, } -impl FastRmq { +impl SmallRmq { /// Creates a new range minimum query data structure from the given data. Creation time is /// O(n log n) and space overhead is O(n log n) with a fractional constant factor - /// (see [`FastRmq`]) + /// (see [`SmallRmq`]) /// /// # Panics /// This function will panic if the input is larger than 2^40 elements. @@ -141,21 +141,21 @@ impl FastRmq { }); Self { - data, - block_minima: BinaryRmq::from_vec(block_minima), - block_min_indices, - blocks, + data: data.into_boxed_slice(), + block_minima: SparseRmq::from_vec(block_minima), + block_min_indices: block_min_indices.into_boxed_slice(), + blocks: blocks.into_boxed_slice(), } } - /// Convenience function for [`FastRmq::range_min`] for using range operators. + /// Convenience function for [`SmallRmq::range_min`] for using range operators. /// The range is clamped to the length of the data structure, sso this function will not panic, /// unless called on an empty data structure, because that does not have a valid index. /// /// # Example /// ```rust - /// use vers_vecs::FastRmq; - /// let rmq = FastRmq::from_vec(vec![5, 4, 3, 2, 1]); + /// use vers_vecs::SmallRmq; + /// let rmq = SmallRmq::from_vec(vec![5, 4, 3, 2, 1]); /// assert_eq!(rmq.range_min_with_range(0..3), 2); /// assert_eq!(rmq.range_min_with_range(0..=3), 3); /// ``` @@ -290,15 +290,15 @@ impl FastRmq { /// Implements Deref to delegate to the underlying data structure. This allows the user to use /// indexing syntax on the RMQ data structure to access the underlying data, as well as iterators, /// etc. -impl Deref for FastRmq { - type Target = Vec; +impl Deref for SmallRmq { + type Target = Box<[u64]>; fn deref(&self) -> &Self::Target { &self.data } } -impl From> for FastRmq { +impl From> for SmallRmq { fn from(data: Vec) -> Self { Self::from_vec(data) } @@ -309,8 +309,8 @@ impl From> for FastRmq { /// /// See [`FastRmq::from_vec`] for more information. /// -/// [`FastRmq::from_vec`]: FastRmq::from_vec -impl FromIterator for FastRmq { +/// [`FastRmq::from_vec`]: SmallRmq::from_vec +impl FromIterator for SmallRmq { fn from_iter>(iter: T) -> Self { Self::from_vec(iter.into_iter().collect()) } diff --git a/src/rmq/fast_rmq/tests.rs b/src/rmq/small/tests.rs similarity index 90% rename from src/rmq/fast_rmq/tests.rs rename to src/rmq/small/tests.rs index f6e1bc9..aac8bdf 100644 --- a/src/rmq/fast_rmq/tests.rs +++ b/src/rmq/small/tests.rs @@ -45,7 +45,7 @@ fn test_fast_rmq() { numbers_vec.push(i as u64); } - let rmq = FastRmq::from_vec(numbers_vec.clone()); + let rmq = SmallRmq::from_vec(numbers_vec.clone()); for i in 0..L { for j in i..L { @@ -70,7 +70,7 @@ fn test_fast_rmq_unsorted() { numbers_vec.push(rng.next_u64()); } - let rmq = FastRmq::from_vec(numbers_vec.clone()); + let rmq = SmallRmq::from_vec(numbers_vec.clone()); for i in 0..L { for j in i..L { @@ -88,7 +88,7 @@ fn test_fast_rmq_unsorted() { #[test] fn test_iter() { - let rmq = FastRmq::from_vec(vec![1, 2, 3, 4, 5]); + let rmq = SmallRmq::from_vec(vec![1, 2, 3, 4, 5]); let mut iter = rmq.iter(); assert_eq!(iter.next(), Some(&1)); assert_eq!(iter.next(), Some(&2)); @@ -100,7 +100,7 @@ fn test_iter() { #[test] fn test_range_operators() { - let rmq = FastRmq::from_vec(vec![5, 4, 3, 2, 1]); + let rmq = SmallRmq::from_vec(vec![5, 4, 3, 2, 1]); assert_eq!(rmq.range_min(0, 3), 3); assert_eq!(rmq.range_min_with_range(0..3), 2); assert_eq!(rmq.range_min_with_range(0..=3), 3); @@ -108,7 +108,7 @@ fn test_range_operators() { #[test] fn test_empty_rmq() { - let _rmq = FastRmq::from_vec(Vec::::new()); + let _rmq = SmallRmq::from_vec(Vec::::new()); // calling functions on an empty rmq will panic because the upper bound is inclusive, but there // is no valid index in an empty array, so we can't test anything else } diff --git a/src/rmq/binary_rmq/mod.rs b/src/rmq/sparse/mod.rs similarity index 92% rename from src/rmq/binary_rmq/mod.rs rename to src/rmq/sparse/mod.rs index 84962ca..3bb1743 100644 --- a/src/rmq/binary_rmq/mod.rs +++ b/src/rmq/sparse/mod.rs @@ -17,10 +17,10 @@ use std::ops::{Deref, RangeBounds}; /// /// # Example /// ```rust -/// use vers_vecs::BinaryRmq; +/// use vers_vecs::SparseRmq; /// /// let data = vec![4, 10, 3, 11, 2, 12]; -/// let rmq = BinaryRmq::from_vec(data); +/// let rmq = SparseRmq::from_vec(data); /// /// assert_eq!(rmq.range_min(0, 1), 0); /// assert_eq!(rmq.range_min(0, 2), 2); @@ -28,16 +28,16 @@ use std::ops::{Deref, RangeBounds}; /// ``` #[derive(Clone, Debug)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -pub struct BinaryRmq { - data: Vec, +pub struct SparseRmq { + data: Box<[u64]>, // store indices relative to start of range. There is no way to have ranges exceeding 2^32 bits // but since we have fast_rmq for larger inputs, which does not have any downsides at that point, // we can just use u32 here (which gains cache efficiency for both implementations). - results: Vec, + results: Box<[u32]>, } -impl BinaryRmq { +impl SparseRmq { /// Create a new RMQ data structure for the given data. This uses O(n log n) space and /// precalculates the minimum element in intervals 2^k for all k for all elements. /// @@ -102,17 +102,20 @@ impl BinaryRmq { } } - Self { data, results } + Self { + data: data.into_boxed_slice(), + results: results.into_boxed_slice(), + } } - /// Convenience function for [`BinaryRmq::range_min`] for using range operators. + /// Convenience function for [`SparseRmq::range_min`] for using range operators. /// The range is clamped to the length of the data structure, so this function will not panic, /// unless called on an empty data structure, because that does not have a valid index. /// /// # Example /// ```rust - /// use vers_vecs::BinaryRmq; - /// let rmq = BinaryRmq::from_vec(vec![5, 4, 3, 2, 1]); + /// use vers_vecs::SparseRmq; + /// let rmq = SparseRmq::from_vec(vec![5, 4, 3, 2, 1]); /// assert_eq!(rmq.range_min_with_range(0..3), 2); /// assert_eq!(rmq.range_min_with_range(0..=3), 3); /// ``` @@ -169,15 +172,15 @@ impl BinaryRmq { /// Implements Deref to delegate to the underlying data structure. This allows the user to use /// indexing syntax on the RMQ data structure to access the underlying data, as well as iterators, /// etc. -impl Deref for BinaryRmq { - type Target = Vec; +impl Deref for SparseRmq { + type Target = Box<[u64]>; fn deref(&self) -> &Self::Target { &self.data } } -impl From> for BinaryRmq { +impl From> for SparseRmq { fn from(data: Vec) -> Self { Self::from_vec(data) } @@ -188,8 +191,8 @@ impl From> for BinaryRmq { /// /// See [`BinaryRmq::from_vec`] for more information. /// -/// [`BinaryRmq::from_vec`]: BinaryRmq::from_vec -impl FromIterator for BinaryRmq { +/// [`BinaryRmq::from_vec`]: SparseRmq::from_vec +impl FromIterator for SparseRmq { fn from_iter>(iter: T) -> Self { Self::from_vec(iter.into_iter().collect()) } diff --git a/src/rmq/binary_rmq/tests.rs b/src/rmq/sparse/tests.rs similarity index 83% rename from src/rmq/binary_rmq/tests.rs rename to src/rmq/sparse/tests.rs index 9e5fa63..05c8cd0 100644 --- a/src/rmq/binary_rmq/tests.rs +++ b/src/rmq/sparse/tests.rs @@ -1,9 +1,9 @@ -use crate::rmq::binary_rmq::BinaryRmq; +use crate::rmq::sparse::SparseRmq; use rand::RngCore; #[test] fn small_test() { - let rmq = BinaryRmq::from_vec(vec![9, 6, 10, 4, 0, 8, 3, 7, 1, 2, 5]); + let rmq = SparseRmq::from_vec(vec![9, 6, 10, 4, 0, 8, 3, 7, 1, 2, 5]); assert_eq!(rmq.range_min(0, 0), 0); assert_eq!(rmq.range_min(0, 1), 1); @@ -25,7 +25,7 @@ fn randomized_test() { numbers_vec.push(rng.next_u64()); } - let rmq = BinaryRmq::from_vec(numbers_vec.clone()); + let rmq = SparseRmq::from_vec(numbers_vec.clone()); for i in 0..L { for j in i..L { @@ -43,7 +43,7 @@ fn randomized_test() { #[test] fn test_iter() { - let rmq = BinaryRmq::from_vec(vec![1, 2, 3, 4, 5]); + let rmq = SparseRmq::from_vec(vec![1, 2, 3, 4, 5]); let mut iter = rmq.iter(); assert_eq!(iter.next(), Some(&1)); assert_eq!(iter.next(), Some(&2)); @@ -55,7 +55,7 @@ fn test_iter() { #[test] fn test_range_operators() { - let rmq = BinaryRmq::from_vec(vec![5, 4, 3, 2, 1]); + let rmq = SparseRmq::from_vec(vec![5, 4, 3, 2, 1]); assert_eq!(rmq.range_min(0, 3), 3); assert_eq!(rmq.range_min_with_range(0..3), 2); assert_eq!(rmq.range_min_with_range(0..=3), 3); @@ -63,7 +63,7 @@ fn test_range_operators() { #[test] fn test_empty_rmq() { - let rmq = BinaryRmq::from_vec(Vec::::new()); + let rmq = SparseRmq::from_vec(Vec::::new()); assert!(rmq.is_empty()); // calling functions on an empty rmq will panic because the upper bound is inclusive, but there // is no valid index in an empty array, so we can't test anything else diff --git a/src/trees/bp/builder.rs b/src/trees/bp/builder.rs index 753600c..7553925 100644 --- a/src/trees/bp/builder.rs +++ b/src/trees/bp/builder.rs @@ -5,13 +5,14 @@ use crate::BitVec; /// A builder for [`BpTrees`] using depth-first traversal of the tree. See the documentation of /// [`TreeBuilder`]. /// -/// [`BpTree`]: BpTree -pub struct BpBuilder { +/// [`BpTrees`]: BpTree +/// [`TreeBuilder`]: TreeBuilder +pub struct BpBuilder { excess: i64, bit_vec: BitVec, } -impl BpBuilder { +impl BpBuilder { /// Create new empty `DfsTreeBuilder` #[must_use] pub fn new() -> Self { @@ -26,18 +27,18 @@ impl BpBuilder { pub fn with_capacity(capacity: u64) -> Self { Self { excess: 0, - bit_vec: BitVec::with_capacity((capacity * 2) as usize), + bit_vec: BitVec::with_capacity(capacity * 2), } } } -impl Default for BpBuilder { +impl Default for BpBuilder { fn default() -> Self { Self::new() } } -impl TreeBuilder for BpBuilder { +impl TreeBuilder for BpBuilder { type Tree = BpTree; fn enter_node(&mut self) { @@ -54,7 +55,7 @@ impl TreeBuilder for BpBuilder { if self.excess != 0 { Err(self.excess) } else { - Ok(BpTree::from_bit_vector(self.bit_vec)) + Ok(BpTree::from_bit_vec(self.bit_vec)) } } } diff --git a/src/trees/bp/lookup.rs b/src/trees/bp/lookup.rs index 2c8fc8b..0a44c3d 100644 --- a/src/trees/bp/lookup.rs +++ b/src/trees/bp/lookup.rs @@ -1,3 +1,6 @@ +#![allow(clippy::cast_sign_loss)] // sign loss cannot happen on correctly formed BP trees +#![allow(clippy::cast_possible_wrap)] // ditto + //! This module provides the lookup table and lookup functionality to answer excess queries //! for 8-bit and 16-bit blocks in the tree vector. //! Note that the 8-bit version is unused, since this whole module gets replaced with @@ -52,6 +55,7 @@ const PAREN_BLOCK_LOOKUP: [EncodedTableType; 1 << LOOKUP_BLOCK_SIZE] = calculate /// Offset to add to encoded excess values, so negative numbers are stored as positive integers, reducing /// encoding complexity +#[allow(clippy::cast_possible_truncation)] // false positive const ENCODING_OFFSET: i32 = LOOKUP_BLOCK_SIZE as i32; /// Bitmask for one of the lookup values. @@ -66,6 +70,7 @@ const MINIMUM_EXCESS_POSITION: usize = 6; #[cfg(not(feature = "bp_u16_lookup"))] const MINIMUM_EXCESS_POSITION: usize = 5; +#[allow(clippy::cast_possible_truncation)] // all values are in range const fn calculate_lookup_table() -> [EncodedTableType; 1 << LOOKUP_BLOCK_SIZE] { // initial sentinel values during excess computation const MORE_THAN_MAX: SignedLookupBlockType = (LOOKUP_BLOCK_SIZE + 1) as SignedLookupBlockType; @@ -114,12 +119,14 @@ const fn get_maximum_excess(value: EncodedTableType) -> i64 { } /// Branchless const minimum computation for values that cannot overflow +#[allow(clippy::cast_possible_truncation)] // all values are in range const fn min(a: SignedLookupBlockType, b: SignedLookupBlockType) -> SignedLookupBlockType { b + ((a - b) & -(((a - b) as LookupBlockType >> (LOOKUP_BLOCK_SIZE - 1)) as SignedLookupBlockType)) } /// Branchless const maximum computation for values that cannot overflow +#[allow(clippy::cast_possible_truncation)] // all values are in range const fn max(a: SignedLookupBlockType, b: SignedLookupBlockType) -> SignedLookupBlockType { a - ((a - b) & -(((a - b) as LookupBlockType >> (LOOKUP_BLOCK_SIZE - 1)) as SignedLookupBlockType)) diff --git a/src/trees/bp/mod.rs b/src/trees/bp/mod.rs index 6b9e89c..321e1e0 100644 --- a/src/trees/bp/mod.rs +++ b/src/trees/bp/mod.rs @@ -3,7 +3,7 @@ //! time, as well as subtree size, level-order, and ancestor queries in `O(log n)` time. //! The tree is succinct (ideally sublinear space overhead) and pointer-less. -use crate::bit_vec::fast_rs_vec::SelectIntoIter; +use crate::bit_vec::rs::SelectIntoIter; use crate::trees::mmt::MinMaxTree; use crate::trees::{IsAncestor, LevelTree, SubtreeSize, Tree}; use crate::{BitVec, RsVec}; @@ -11,7 +11,7 @@ use std::cmp::{max, min}; use std::iter::FusedIterator; /// The default block size for the tree, used in several const generics -const DEFAULT_BLOCK_SIZE: usize = 512; +const DEFAULT_BLOCK_SIZE: u64 = 512; const OPEN_PAREN: u64 = 1; const CLOSE_PAREN: u64 = 0; @@ -122,7 +122,7 @@ use lookup_query::{process_block_bwd, process_block_fwd, LOOKUP_BLOCK_SIZE}; /// # #![allow(long_running_const_eval)] /// use vers_vecs::{BitVec, BpTree, Tree}; /// let bv = BitVec::pack_sequence_u8(&[0b1101_0111, 0b0010_0100], 8); -/// let tree = BpTree::<4>::from_bit_vector(bv); +/// let tree = BpTree::<4>::from_bit_vec(bv); /// /// let nodes = tree.dfs_iter().collect::>(); /// assert_eq!(nodes, vec![0, 1, 2, 4, 6, 7, 10, 13]); @@ -139,15 +139,15 @@ use lookup_query::{process_block_bwd, process_block_fwd, LOOKUP_BLOCK_SIZE}; /// [`BitVec`]: BitVec #[derive(Clone, Debug)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -pub struct BpTree { +pub struct BpTree { vec: RsVec, min_max_tree: MinMaxTree, } -impl BpTree { +impl BpTree { /// Construct a new `BpTree` from a given bit vector. #[must_use] - pub fn from_bit_vector(bv: BitVec) -> Self { + pub fn from_bit_vec(bv: BitVec) -> Self { let min_max_tree = MinMaxTree::excess_tree(&bv, BLOCK_SIZE); let vec = bv.into(); Self { vec, min_max_tree } @@ -161,14 +161,15 @@ impl BpTree { /// # Arguments /// - `index`: The starting index. /// - `relative_excess`: The desired relative excess value. - pub fn fwd_search(&self, index: usize, mut relative_excess: i64) -> Option { + pub fn fwd_search(&self, index: u64, mut relative_excess: i64) -> Option { // check for greater than or equal length minus one, because the last element // won't ever have a result from fwd_search if index >= (self.vec.len() - 1) { return None; } - let block_index = (index + 1) / BLOCK_SIZE; + #[allow(clippy::cast_possible_truncation)] // safe due to the division + let block_index = ((index + 1) / BLOCK_SIZE) as usize; self.fwd_search_block(index, block_index, &mut relative_excess) .map_or_else( |()| { @@ -177,8 +178,12 @@ impl BpTree { // check the result block for the exact position block.and_then(|(block, mut relative_excess)| { - self.fwd_search_block(block * BLOCK_SIZE - 1, block, &mut relative_excess) - .ok() + self.fwd_search_block( + block as u64 * BLOCK_SIZE - 1, + block, + &mut relative_excess, + ) + .ok() }) }, Some, @@ -194,15 +199,15 @@ impl BpTree { #[inline(always)] fn fwd_search_block( &self, - start_index: usize, + start_index: u64, block_index: usize, relative_excess: &mut i64, - ) -> Result { - let block_boundary = min((block_index + 1) * BLOCK_SIZE, self.vec.len()); + ) -> Result { + let block_boundary = min((block_index as u64 + 1) * BLOCK_SIZE, self.vec.len()); // the boundary at which we can start with table lookups let lookup_boundary = min( - (start_index + 1).div_ceil(LOOKUP_BLOCK_SIZE as usize) * LOOKUP_BLOCK_SIZE as usize, + (start_index + 1).div_ceil(LOOKUP_BLOCK_SIZE) * LOOKUP_BLOCK_SIZE, block_boundary, ); for i in start_index + 1..lookup_boundary { @@ -217,18 +222,20 @@ impl BpTree { // the boundary up to which we can use table lookups let upper_lookup_boundary = max( lookup_boundary, - (block_boundary / LOOKUP_BLOCK_SIZE as usize) * LOOKUP_BLOCK_SIZE as usize, + (block_boundary / LOOKUP_BLOCK_SIZE) * LOOKUP_BLOCK_SIZE, ); + // LOOKUP_BLOCK_SIZE as usize is a false positive for the lint: https://github.com/rust-lang/rust-clippy/issues/9613 + #[allow(clippy::cast_possible_truncation)] for i in (lookup_boundary..upper_lookup_boundary).step_by(LOOKUP_BLOCK_SIZE as usize) { if let Ok(idx) = process_block_fwd( self.vec - .get_bits_unchecked(i, LOOKUP_BLOCK_SIZE as usize) + .get_bits_unchecked(i, LOOKUP_BLOCK_SIZE) .try_into() .unwrap(), relative_excess, ) { - return Ok(i + idx as usize); + return Ok(i + idx); } } @@ -254,7 +261,7 @@ impl BpTree { /// # Arguments /// - `index`: The starting index. /// - `relative_excess`: The desired relative excess value. - pub fn bwd_search(&self, index: usize, mut relative_excess: i64) -> Option { + pub fn bwd_search(&self, index: u64, mut relative_excess: i64) -> Option { if index >= self.vec.len() { return None; } @@ -267,7 +274,8 @@ impl BpTree { // calculate the block we start searching in. It starts at index - 1, so we don't accidentally // search the mM tree and immediately find `index` as the position - let block_index = (index - 1) / BLOCK_SIZE; + #[allow(clippy::cast_possible_truncation)] // safe due to the division + let block_index = ((index - 1) / BLOCK_SIZE) as usize; // check the current block self.bwd_search_block(index, block_index, &mut relative_excess) @@ -278,8 +286,12 @@ impl BpTree { // check the result block for the exact position block.and_then(|(block, mut relative_excess)| { - self.bwd_search_block((block + 1) * BLOCK_SIZE, block, &mut relative_excess) - .ok() + self.bwd_search_block( + (block as u64 + 1) * BLOCK_SIZE, + block, + &mut relative_excess, + ) + .ok() }) }, Some, @@ -295,15 +307,15 @@ impl BpTree { #[inline(always)] fn bwd_search_block( &self, - start_index: usize, + start_index: u64, block_index: usize, relative_excess: &mut i64, - ) -> Result { - let block_boundary = min(block_index * BLOCK_SIZE, self.vec.len()); + ) -> Result { + let block_boundary = min(block_index as u64 * BLOCK_SIZE, self.vec.len()); // the boundary at which we can start with table lookups let lookup_boundary = max( - ((start_index - 1) / LOOKUP_BLOCK_SIZE as usize) * LOOKUP_BLOCK_SIZE as usize, + ((start_index - 1) / LOOKUP_BLOCK_SIZE) * LOOKUP_BLOCK_SIZE, block_boundary, ); for i in (lookup_boundary..start_index).rev() { @@ -315,18 +327,22 @@ impl BpTree { } } - for i in (block_boundary..lookup_boundary) + // lookup_boundary - block_boundary is smaller than a block, so casting to usize cannot + // truncate + // and LOOKUP_BLOCK_SIZE as usize is a false positive for the lint: https://github.com/rust-lang/rust-clippy/issues/9613 + #[allow(clippy::cast_possible_truncation)] + for i in (0..(lookup_boundary - block_boundary) as usize) .step_by(LOOKUP_BLOCK_SIZE as usize) .rev() { if let Ok(idx) = process_block_bwd( self.vec - .get_bits_unchecked(i, LOOKUP_BLOCK_SIZE as usize) + .get_bits_unchecked(block_boundary + i as u64, LOOKUP_BLOCK_SIZE) .try_into() .unwrap(), relative_excess, ) { - return Ok(i + idx as usize); + return Ok(block_boundary + i as u64 + idx); } } @@ -337,7 +353,7 @@ impl BpTree { /// If the bit at `index` is not an opening parenthesis, the result is meaningless. /// If there is no matching closing parenthesis, `None` is returned. #[must_use] - pub fn close(&self, index: usize) -> Option { + pub fn close(&self, index: u64) -> Option { if index >= self.vec.len() { return None; } @@ -349,7 +365,7 @@ impl BpTree { /// If the bit at `index` is not a closing parenthesis, the result is meaningless. /// If there is no matching opening parenthesis, `None` is returned. #[must_use] - pub fn open(&self, index: usize) -> Option { + pub fn open(&self, index: u64) -> Option { if index >= self.vec.len() { return None; } @@ -361,7 +377,7 @@ impl BpTree { /// This works regardless of whether the bit at `index` is an opening or closing parenthesis. /// If there is no enclosing parenthesis, `None` is returned. #[must_use] - pub fn enclose(&self, index: usize) -> Option { + pub fn enclose(&self, index: u64) -> Option { if index >= self.vec.len() { return None; } @@ -380,7 +396,8 @@ impl BpTree { /// The excess is the number of open parentheses minus the number of closing parentheses. /// If `index` is out of bounds, the total excess of the parentheses expression is returned. #[must_use] - pub fn excess(&self, index: usize) -> i64 { + #[allow(clippy::cast_possible_wrap)] // only happens if the tree is unbalanced and has more than 2^62 nodes + pub fn excess(&self, index: u64) -> i64 { debug_assert!(index < self.vec.len(), "Index out of bounds"); self.vec.rank1(index + 1) as i64 - self.vec.rank0(index + 1) as i64 } @@ -426,8 +443,14 @@ impl BpTree { /// Iterate over a subtree rooted at `node` in depth-first (pre-)order. /// The iteration starts with the node itself. /// + /// # Limitations + /// When called on an architecture where `usize` is smaller than 64 bits, on a tree with more + /// than 2^31 nodes, the iterator may produce an iterator over an unspecified subset of nodes. + /// + /// # Panics /// Calling this method on an invalid node handle, or an unbalanced parenthesis expression, - /// will produce an iterator over an unspecified subset of nodes. + /// will produce an iterator over an unspecified subset of nodes, or panic either during + /// construction or iteration. pub fn subtree_iter( &self, node: as Tree>::NodeHandle, @@ -437,17 +460,44 @@ impl BpTree { "Node handle is invalid" ); - let index = self.vec.rank1(node); + let mut index = self.vec.rank1(node); let close = self.close(node).unwrap_or(node); let subtree_size = self.vec.rank1(close) - index; - self.vec.iter1().skip(index).take(subtree_size) + let mut iterator = self.vec.iter1(); + + // since index and subtree_size can exceed usize::MAX, we need some special casing. + // This should be optimized away on 64-bit architectures + + // skip `index` bytes + while index > usize::MAX as u64 { + index -= usize::MAX as u64; + iterator.advance_by(usize::MAX).unwrap(); + } + #[allow(clippy::cast_possible_truncation)] // the loop guarantees no truncation + iterator.advance_by(index as usize).unwrap(); + + // limit to `subtree_size` bytes by consuming the back of the iterator + let mut remaining_bits = self.vec.rank1 - index - subtree_size; + while remaining_bits > usize::MAX as u64 { + remaining_bits -= usize::MAX as u64; + iterator.advance_back_by(usize::MAX).unwrap(); + } + #[allow(clippy::cast_possible_truncation)] // the loop guarantees no truncation + iterator.advance_back_by(remaining_bits as usize).unwrap(); + + iterator } /// Iterate over a subtree rooted at `node` in depth-first (post-)order. /// This is slower than the pre-order iteration. /// The iteration ends with the node itself. /// + /// # Limitations + /// When called on an architecture where `usize` is smaller than 64 bits, on a tree with more + /// than 2^31 nodes, the iterator may return an unspecified number of nodes starting at an + /// unspecified node. + /// /// # Panics /// Calling this method on an invalid node handle, or an unbalanced parenthesis expression, /// will produce an iterator over an unspecified subset of nodes, or panic either during @@ -461,15 +511,33 @@ impl BpTree { "Node handle is invalid" ); - let index = self.vec.rank0(node); + let mut index = self.vec.rank0(node); let close = self.close(node).unwrap_or(node); let subtree_size = self.vec.rank0(close) + 1 - index; - self.vec - .iter0() - .skip(index) - .take(subtree_size) - .map(|n| self.open(n).unwrap()) + let mut iterator = self.vec.iter0(); + + // since index and subtree_size can exceed usize::MAX, we need some special casing. + // This should be optimized away on 64-bit architectures + + // skip `index` bytes + while index > usize::MAX as u64 { + index -= usize::MAX as u64; + iterator.advance_by(usize::MAX).unwrap(); + } + #[allow(clippy::cast_possible_truncation)] // the loop guarantees no truncation + iterator.advance_by(index as usize).unwrap(); + + // limit to `subtree_size` bytes by consuming the back of the iterator + let mut remaining_bits = self.vec.rank0 - index - subtree_size; + while remaining_bits > usize::MAX as u64 { + remaining_bits -= usize::MAX as u64; + iterator.advance_back_by(usize::MAX).unwrap(); + } + #[allow(clippy::cast_possible_truncation)] // the loop guarantees no truncation + iterator.advance_back_by(remaining_bits as usize).unwrap(); + + iterator.map(|n| self.open(n).unwrap()) } /// Iterate over the children of a node in the tree. @@ -520,7 +588,7 @@ impl BpTree { /// use vers_vecs::{BitVec, RsVec, BpTree, Tree}; /// /// let bv = BitVec::pack_sequence_u8(&[0b1101_0111, 0b0010_0100], 8); - /// let tree = BpTree::<4>::from_bit_vector(bv); + /// let tree = BpTree::<4>::from_bit_vec(bv); /// assert_eq!(tree.size(), 8); /// /// let rs_vec = tree.into_parentheses_vec(); @@ -528,7 +596,7 @@ impl BpTree { /// /// bv.flip_bit(15); /// bv.append_bits(0, 2); - /// let tree = BpTree::<4>::from_bit_vector(bv); + /// let tree = BpTree::<4>::from_bit_vec(bv); /// assert_eq!(tree.size(), 9); /// ``` #[must_use] @@ -544,8 +612,8 @@ impl BpTree { } } -impl Tree for BpTree { - type NodeHandle = usize; +impl Tree for BpTree { + type NodeHandle = u64; fn root(&self) -> Option { if self.vec.is_empty() { @@ -627,7 +695,7 @@ impl Tree for BpTree { }) } - fn node_index(&self, node: Self::NodeHandle) -> usize { + fn node_index(&self, node: Self::NodeHandle) -> u64 { debug_assert!( self.vec.get(node) == Some(OPEN_PAREN), "Node handle is invalid" @@ -635,7 +703,7 @@ impl Tree for BpTree { self.vec.rank1(node) } - fn node_handle(&self, index: usize) -> Self::NodeHandle { + fn node_handle(&self, index: u64) -> Self::NodeHandle { self.vec.select1(index) } @@ -656,7 +724,7 @@ impl Tree for BpTree { excess.saturating_sub(1) } - fn size(&self) -> usize { + fn size(&self) -> u64 { self.vec.rank1(self.vec.len()) } @@ -665,7 +733,7 @@ impl Tree for BpTree { } } -impl IsAncestor for BpTree { +impl IsAncestor for BpTree { fn is_ancestor( &self, ancestor: Self::NodeHandle, @@ -685,7 +753,7 @@ impl IsAncestor for BpTree { } } -impl LevelTree for BpTree { +impl LevelTree for BpTree { fn level_ancestor(&self, node: Self::NodeHandle, level: u64) -> Option { if level == 0 { return Some(node); @@ -722,8 +790,8 @@ impl LevelTree for BpTree { } } -impl SubtreeSize for BpTree { - fn subtree_size(&self, node: Self::NodeHandle) -> Option { +impl SubtreeSize for BpTree { + fn subtree_size(&self, node: Self::NodeHandle) -> Option { debug_assert!( self.vec.get(node) == Some(OPEN_PAREN), "Node handle is invalid" @@ -734,7 +802,7 @@ impl SubtreeSize for BpTree { } } -impl IntoIterator for BpTree { +impl IntoIterator for BpTree { type Item = as Tree>::NodeHandle; type IntoIter = SelectIntoIter; @@ -743,19 +811,19 @@ impl IntoIterator for BpTree { } } -impl From for BpTree { +impl From for BpTree { fn from(bv: BitVec) -> Self { - Self::from_bit_vector(bv) + Self::from_bit_vec(bv) } } -impl From> for BitVec { +impl From> for BitVec { fn from(value: BpTree) -> Self { value.into_parentheses_vec().into_bit_vec() } } -impl From> for RsVec { +impl From> for RsVec { fn from(value: BpTree) -> Self { value.into_parentheses_vec() } @@ -764,13 +832,13 @@ impl From> for RsVec { /// An iterator over the children of a node. /// Calls to `next` return the next child node handle in the order they appear in the parenthesis /// expression. -struct ChildrenIter<'a, const BLOCK_SIZE: usize, const FORWARD: bool> { +struct ChildrenIter<'a, const BLOCK_SIZE: u64, const FORWARD: bool> { tree: &'a BpTree, - current_sibling: Option, + current_sibling: Option, } -impl<'a, const BLOCK_SIZE: usize, const FORWARD: bool> ChildrenIter<'a, BLOCK_SIZE, FORWARD> { - fn new(tree: &'a BpTree, node: usize) -> Self { +impl<'a, const BLOCK_SIZE: u64, const FORWARD: bool> ChildrenIter<'a, BLOCK_SIZE, FORWARD> { + fn new(tree: &'a BpTree, node: u64) -> Self { Self { tree, current_sibling: if FORWARD { @@ -782,10 +850,10 @@ impl<'a, const BLOCK_SIZE: usize, const FORWARD: bool> ChildrenIter<'a, BLOCK_SI } } -impl Iterator +impl Iterator for ChildrenIter<'_, BLOCK_SIZE, FORWARD> { - type Item = usize; + type Item = u64; fn next(&mut self) -> Option { let current = self.current_sibling?; @@ -799,7 +867,7 @@ impl Iterator } } -impl FusedIterator +impl FusedIterator for ChildrenIter<'_, BLOCK_SIZE, FORWARD> { } diff --git a/src/trees/bp/tests.rs b/src/trees/bp/tests.rs index a7da6b2..c6b389d 100644 --- a/src/trees/bp/tests.rs +++ b/src/trees/bp/tests.rs @@ -6,13 +6,13 @@ use rand::{RngCore, SeedableRng}; #[test] fn test_fwd_search() { #[rustfmt::skip] - let bv = BitVec::from_bits(&[ + let bv = BitVec::from_bits_u8(&[ 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, ]); - let bp_tree = BpTree::<8>::from_bit_vector(bv); + let bp_tree = BpTree::<8>::from_bit_vec(bv); // search within block assert_eq!(bp_tree.fwd_search(3, -1), Some(4)); @@ -33,13 +33,13 @@ fn test_fwd_search() { #[test] fn test_fwd_single_block() { #[rustfmt::skip] - let bv = BitVec::from_bits(&[ + let bv = BitVec::from_bits_u8(&[ 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, ]); - let bp_tree = BpTree::<512>::from_bit_vector(bv); + let bp_tree = BpTree::<512>::from_bit_vec(bv); assert_eq!(bp_tree.fwd_search(3, -1), Some(4)); assert_eq!(bp_tree.fwd_search(2, -1), Some(5)); @@ -55,13 +55,13 @@ fn test_fwd_single_block() { #[test] fn test_fwd_illegal_queries() { #[rustfmt::skip] - let bv = BitVec::from_bits(&[ + let bv = BitVec::from_bits_u8(&[ 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, ]); - let tree = BpTree::<8>::from_bit_vector(bv.clone()); + let tree = BpTree::<8>::from_bit_vec(bv.clone()); assert_eq!(tree.fwd_search(24, 0), None); assert_eq!(tree.fwd_search(25, 0), None); @@ -69,7 +69,7 @@ fn test_fwd_illegal_queries() { assert_eq!(tree.fwd_search(0, -2), None); assert_eq!(tree.fwd_search(22, 1), None); - let tree = BpTree::<64>::from_bit_vector(bv); + let tree = BpTree::<64>::from_bit_vec(bv); assert_eq!(tree.fwd_search(24, 0), None); assert_eq!(tree.fwd_search(25, 0), None); @@ -82,13 +82,13 @@ fn test_fwd_illegal_queries() { fn test_fwd_unbalanced_expression() { // test whether forward search works with unbalanced parenthesis expressions #[rustfmt::skip] - let bv = BitVec::from_bits(&[ + let bv = BitVec::from_bits_u8(&[ 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ]); - let tree = BpTree::<8>::from_bit_vector(bv); + let tree = BpTree::<8>::from_bit_vec(bv); assert_eq!(tree.fwd_search(0, -1), Some(13)); assert_eq!(tree.fwd_search(1, -1), Some(12)); @@ -99,8 +99,8 @@ fn test_fwd_unbalanced_expression() { #[test] fn test_fwd_block_boundary() { - let bv = BitVec::from_bits(&[1, 1, 0, 1, 0, 0]); - let tree = BpTree::<4>::from_bit_vector(bv); + let bv = BitVec::from_bits_u8(&[1, 1, 0, 1, 0, 0]); + let tree = BpTree::<4>::from_bit_vec(bv); // test if a query returns the correct result if the result is the first bit in a block // and not in the initial block @@ -113,8 +113,8 @@ fn test_fwd_block_boundary() { #[test] fn test_fwd_negative_block() { - let bv = BitVec::from_bits(&[1, 1, 1, 1, 0, 0, 0, 0]); - let tree = BpTree::<2>::from_bit_vector(bv); + let bv = BitVec::from_bits_u8(&[1, 1, 1, 1, 0, 0, 0, 0]); + let tree = BpTree::<2>::from_bit_vec(bv); // regression: test if a query correctly returns none (instead of crashing) if the following // block has a negative maximum excess (as a previous bug clamped it to 0). @@ -127,39 +127,39 @@ fn test_fwd_last_element() { // the binary mM tree right of it may be uninitialized, and so not ending the query early // may yield invalid results or break assertions #[rustfmt::skip] - let bv = BitVec::from_bits(&[ + let bv = BitVec::from_bits_u8(&[ 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, ]); - let tree = BpTree::<4>::from_bit_vector(bv); + let tree = BpTree::<4>::from_bit_vec(bv); assert!(tree.fwd_search(23, 0).is_none()); } #[test] fn test_lookup_extreme_pop() { // test whether a table lookup works if the bit pattern is only ones or only zeros - let bv = BitVec::from_bits(&[1; 64]); - let tree = BpTree::<512>::from_bit_vector(bv); + let bv = BitVec::from_bits_u8(&[1; 64]); + let tree = BpTree::<512>::from_bit_vec(bv); for excess in 1..64 { - assert_eq!(tree.fwd_search(0, excess), Some(excess as usize)); + assert_eq!(tree.fwd_search(0, excess), Some(excess as u64)); } - let bv = BitVec::from_bits(&[0; 64]); - let tree = BpTree::<512>::from_bit_vector(bv); + let bv = BitVec::from_bits_u8(&[0; 64]); + let tree = BpTree::<512>::from_bit_vec(bv); for excess in 1..64 { - assert_eq!(tree.fwd_search(0, -excess), Some(excess as usize)); + assert_eq!(tree.fwd_search(0, -excess), Some(excess as u64)); } } #[test] fn test_fwd_fuzzy() { // we're fuzzing forward search a bit - const L: usize = 1000; - const L_BITS: usize = L * size_of::() * 8; + const L: u64 = 1000; + const L_BITS: u64 = L * size_of::() as u64 * 8; // we generate a vector using a seeded random generator and check that every query works as expected let mut rng = StdRng::from_seed([0; 32]); @@ -170,7 +170,7 @@ fn test_fwd_fuzzy() { } // pre-calculate all absolute excess values - let mut excess_values = vec![0i16; L_BITS]; + let mut excess_values = vec![0i16; L_BITS as usize]; let mut excess = 0; for (idx, bit) in bit_vec.iter().enumerate() { if bit == 1 { @@ -182,16 +182,16 @@ fn test_fwd_fuzzy() { } } - let bp = BpTree::<128>::from_bit_vector(bit_vec); + let bp = BpTree::<128>::from_bit_vec(bit_vec); // test any query from valid nodes with the given relative excess values for relative_excess in [-3, -2, -1, 0, 1, 2, 3] { for node_handle in bp.vec.iter1() { let absolute_excess = bp.excess(node_handle) + relative_excess; - let expected = excess_values[node_handle + 1..] + let expected = excess_values[(node_handle + 1) as usize..] .iter() .position(|&excess| excess as i64 == absolute_excess) - .map(|i| i + node_handle + 1); + .map(|i| i as u64 + node_handle + 1); let actual = bp.fwd_search(node_handle, relative_excess); assert_eq!( expected, @@ -209,13 +209,13 @@ fn test_fwd_fuzzy() { #[test] fn test_bwd_search() { #[rustfmt::skip] - let bv = BitVec::from_bits(&[ + let bv = BitVec::from_bits_u8(&[ 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, ]); - let bp_tree = BpTree::<8>::from_bit_vector(bv); + let bp_tree = BpTree::<8>::from_bit_vec(bv); // search within block assert_eq!(bp_tree.bwd_search(4, -1), Some(3)); @@ -236,13 +236,13 @@ fn test_bwd_search() { #[test] fn test_bwd_single_block() { #[rustfmt::skip] - let bv = BitVec::from_bits(&[ + let bv = BitVec::from_bits_u8(&[ 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, ]); - let bp_tree = BpTree::<512>::from_bit_vector(bv); + let bp_tree = BpTree::<512>::from_bit_vec(bv); assert_eq!(bp_tree.bwd_search(4, -1), Some(3)); assert_eq!(bp_tree.bwd_search(5, -1), Some(2)); @@ -258,13 +258,13 @@ fn test_bwd_single_block() { #[test] fn test_bwd_illegal_queries() { #[rustfmt::skip] - let bv = BitVec::from_bits(&[ + let bv = BitVec::from_bits_u8(&[ 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, ]); - let tree = BpTree::<8>::from_bit_vector(bv.clone()); + let tree = BpTree::<8>::from_bit_vec(bv.clone()); assert_eq!(tree.bwd_search(0, 0), None); assert_eq!(tree.bwd_search(1, 0), None); @@ -272,7 +272,7 @@ fn test_bwd_illegal_queries() { assert_eq!(tree.bwd_search(23, -2), None); assert_eq!(tree.bwd_search(22, -3), None); - let tree = BpTree::<64>::from_bit_vector(bv); + let tree = BpTree::<64>::from_bit_vec(bv); assert_eq!(tree.bwd_search(0, 0), None); assert_eq!(tree.bwd_search(1, 0), None); @@ -285,8 +285,8 @@ fn test_bwd_illegal_queries() { fn test_bwd_left_block_boundary() { // test if a query returns the correct result if the result is the first bit after // a block boundary (the left-most one even for backward search) - let bv = BitVec::from_bits(&[1, 1, 0, 1, 0, 0]); - let tree = BpTree::<4>::from_bit_vector(bv); + let bv = BitVec::from_bits_u8(&[1, 1, 0, 1, 0, 0]); + let tree = BpTree::<4>::from_bit_vec(bv); assert_eq!(tree.bwd_search(5, 0), Some(3)); } @@ -294,12 +294,12 @@ fn test_bwd_left_block_boundary() { #[test] fn test_bwd_right_block_boundary() { #[rustfmt::skip] - let bv = BitVec::from_bits(&[ + let bv = BitVec::from_bits_u8(&[ 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, ]); - let bp_tree = BpTree::<4>::from_bit_vector(bv); + let bp_tree = BpTree::<4>::from_bit_vec(bv); // test the correct result is returned if result is exactly at a right block boundary assert_eq!(bp_tree.bwd_search(11, -1), Some(4)); @@ -307,8 +307,8 @@ fn test_bwd_right_block_boundary() { #[test] fn test_bwd_block_traversal() { - let bv = BitVec::from_bits(&[1, 1, 1, 1, 0]); - let tree = BpTree::<4>::from_bit_vector(bv); + let bv = BitVec::from_bits_u8(&[1, 1, 1, 1, 0]); + let tree = BpTree::<4>::from_bit_vec(bv); // if we request excess 0 backwards at a block boundary // we test if that actually traverses the vector instead of reporting @@ -320,8 +320,8 @@ fn test_bwd_block_traversal() { #[test] fn test_bwd_fuzzy() { // we're fuzzing forward search a bit - const L: usize = 1000; - const L_BITS: usize = L * size_of::() * 8; + const L: u64 = 1000; + const L_BITS: u64 = L * size_of::() as u64 * 8; // we generate a vector using a seeded random generator and check that every query works as expected let mut rng = StdRng::from_seed([0; 32]); @@ -332,7 +332,7 @@ fn test_bwd_fuzzy() { } // pre-calculate all absolute excess values - let mut excess_values = vec![0i16; L_BITS + 1]; + let mut excess_values = vec![0i16; (L_BITS + 1) as usize]; let mut excess = 0; for (idx, bit) in bit_vec.iter().enumerate() { if bit == 1 { @@ -344,7 +344,7 @@ fn test_bwd_fuzzy() { } } - let bp = BpTree::<128>::from_bit_vector(bit_vec); + let bp = BpTree::<128>::from_bit_vec(bit_vec); // test any query from valid nodes with the given relative excess values for relative_excess in [-3, -2, -1, 0, 1, 2, 3] { @@ -354,9 +354,10 @@ fn test_bwd_fuzzy() { } else { bp.excess(node_handle - 1) + relative_excess }; - let expected = excess_values[..node_handle] + let expected = excess_values[..node_handle as usize] .iter() - .rposition(|&excess| excess as i64 == absolute_excess); + .rposition(|&excess| excess as i64 == absolute_excess) + .map(|idx| idx as u64); let actual = bp.bwd_search(node_handle, relative_excess); assert_eq!( @@ -374,12 +375,12 @@ fn test_bwd_fuzzy() { #[test] fn test_close() { - let bv = BitVec::from_bits(&[ + let bv = BitVec::from_bits_u8(&[ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]); - let tree = BpTree::<8>::from_bit_vector(bv); + let tree = BpTree::<8>::from_bit_vec(bv); for i in 0..24 { assert_eq!(tree.close(i), Some(47 - i)); @@ -390,12 +391,12 @@ fn test_close() { #[test] fn test_open() { - let bv = BitVec::from_bits(&[ + let bv = BitVec::from_bits_u8(&[ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]); - let tree = BpTree::<8>::from_bit_vector(bv); + let tree = BpTree::<8>::from_bit_vec(bv); for i in 24..48 { assert_eq!(tree.open(i), Some(47 - i)); @@ -406,12 +407,12 @@ fn test_open() { #[test] fn test_enclose() { - let bv = BitVec::from_bits(&[ + let bv = BitVec::from_bits_u8(&[ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]); - let tree = BpTree::<8>::from_bit_vector(bv); + let tree = BpTree::<8>::from_bit_vec(bv); for i in 1..24 { assert_eq!(tree.enclose(i), Some(i - 1)); @@ -430,11 +431,11 @@ fn test_enclose() { #[test] fn test_parent() { - let bv = BitVec::from_bits(&[ + let bv = BitVec::from_bits_u8(&[ 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, ]); - let tree = BpTree::<8>::from_bit_vector(bv.clone()); + let tree = BpTree::<8>::from_bit_vec(bv.clone()); assert_eq!(tree.excess(27), 0, "tree is not balanced"); @@ -443,13 +444,13 @@ fn test_parent() { for (idx, bit) in bv.iter().enumerate() { if bit == 1 { assert_eq!( - tree.parent(idx), + tree.parent(idx as u64), head, "parent of node {} is incorrect", idx ); stack.push(head); - head = Some(idx); + head = Some(idx as u64); } else { head = stack.pop().expect("stack underflow despite balanced tree"); } @@ -458,9 +459,9 @@ fn test_parent() { #[test] fn test_children() { - let bv = BitVec::from_bits(&[1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0]); + let bv = BitVec::from_bits_u8(&[1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0]); - let tree = BpTree::<8>::from_bit_vector(bv); + let tree = BpTree::<8>::from_bit_vec(bv); assert_eq!(tree.excess(17), 0, "tree is not balanced"); assert_eq!(tree.first_child(0), Some(1)); @@ -491,25 +492,25 @@ fn test_children() { fn test_contiguous_index() { // test whether `node_index` and `node_handle` return correct indices / node handles. - let bv = BitVec::from_bits(&[1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0]); - let tree = BpTree::<4>::from_bit_vector(bv.clone()); + let bv = BitVec::from_bits_u8(&[1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0]); + let tree = BpTree::<4>::from_bit_vec(bv.clone()); let rs: RsVec = bv.into(); for (rank, index_in_bv) in rs.iter1().enumerate() { - assert_eq!(tree.node_index(index_in_bv), rank); - assert_eq!(tree.node_handle(rank), index_in_bv); + assert_eq!(tree.node_index(index_in_bv), rank as u64); + assert_eq!(tree.node_handle(rank as u64), index_in_bv); } } #[test] fn test_depth() { - let bv = BitVec::from_bits(&[ + let bv = BitVec::from_bits_u8(&[ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]); let mut depth = 0; - let tree = BpTree::<8>::from_bit_vector(bv.clone()); + let tree = BpTree::<8>::from_bit_vec(bv.clone()); for i in 0..24 { if bv.get(i) == Some(1) { assert_eq!(tree.depth(i), depth); @@ -525,17 +526,17 @@ fn test_is_leaf() { let bits = vec![ 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, ]; - let bv = BitVec::from_bits(&bits); + let bv = BitVec::from_bits_u8(&bits); let leaves = bits[..] .windows(2) .map(|window| window[0] == 1 && window[1] == 0) .collect::>(); - let tree = BpTree::<8>::from_bit_vector(bv.clone()); + let tree = BpTree::<8>::from_bit_vec(bv.clone()); for (idx, is_leaf) in leaves.iter().enumerate() { // if the bit is 1, check if that node is a leaf. If it's 0, it's not a valid node handle. if bits[idx] == 1 { - assert_eq!(tree.is_leaf(idx), *is_leaf); + assert_eq!(tree.is_leaf(idx as u64), *is_leaf); } } } @@ -545,8 +546,8 @@ fn test_is_ancestor() { // (()((())())) // ab cde f let bits = vec![1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0]; - let bv = BitVec::from_bits(&bits); - let tree = BpTree::<8>::from_bit_vector(bv); + let bv = BitVec::from_bits_u8(&bits); + let tree = BpTree::<8>::from_bit_vec(bv); let a = tree.root().unwrap(); let b = tree.first_child(a).unwrap(); let c = tree.next_sibling(b).unwrap(); @@ -574,22 +575,22 @@ fn test_is_ancestor() { #[test] fn test_root() { - let bv = BitVec::from_bits(&[ + let bv = BitVec::from_bits_u8(&[ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]); - let tree = BpTree::<8>::from_bit_vector(bv); + let tree = BpTree::<8>::from_bit_vec(bv); assert_eq!(tree.root(), Some(0)); assert_eq!(tree.previous_sibling(0), None); assert_eq!(tree.next_sibling(0), None); - let tree = BpTree::<16>::from_bit_vector(BitVec::new()); + let tree = BpTree::<16>::from_bit_vec(BitVec::new()); assert_eq!(tree.root(), None); } #[test] fn test_level_ancestor() { - let bv = BitVec::from_bits(&[1, 1, 1, 0, 0, 1, 0, 0]); - let tree = BpTree::<4>::from_bit_vector(bv); + let bv = BitVec::from_bits_u8(&[1, 1, 1, 0, 0, 1, 0, 0]); + let tree = BpTree::<4>::from_bit_vec(bv); assert_eq!(tree.level_ancestor(2, 0), Some(2)); assert_eq!(tree.level_ancestor(2, 1), Some(1)); @@ -603,10 +604,10 @@ fn test_level_ancestor() { #[test] fn test_level_next() { - let bv = BitVec::from_bits(&[ + let bv = BitVec::from_bits_u8(&[ 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, // intentionally unbalanced ]); - let tree = BpTree::<4>::from_bit_vector(bv); + let tree = BpTree::<4>::from_bit_vec(bv); assert_eq!(tree.level_next(0), None); // unbalanced query assert_eq!(tree.level_next(1), Some(5)); @@ -618,8 +619,8 @@ fn test_level_next() { #[test] fn test_level_prev() { - let bv = BitVec::from_bits(&[1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0]); - let tree = BpTree::<4>::from_bit_vector(bv); + let bv = BitVec::from_bits_u8(&[1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0]); + let tree = BpTree::<4>::from_bit_vec(bv); assert_eq!(tree.level_prev(0), None); assert_eq!(tree.level_prev(1), None); @@ -634,8 +635,8 @@ fn test_level_prev() { #[test] fn test_level_leftmost() { - let bv = BitVec::from_bits(&[1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0]); - let tree = BpTree::<4>::from_bit_vector(bv); + let bv = BitVec::from_bits_u8(&[1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0]); + let tree = BpTree::<4>::from_bit_vec(bv); assert_eq!(tree.level_leftmost(0), Some(0)); assert_eq!(tree.level_leftmost(1), Some(1)); @@ -647,8 +648,8 @@ fn test_level_leftmost() { #[test] fn test_level_rightmost() { - let bv = BitVec::from_bits(&[1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0]); - let tree = BpTree::<4>::from_bit_vector(bv); + let bv = BitVec::from_bits_u8(&[1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0]); + let tree = BpTree::<4>::from_bit_vec(bv); assert_eq!(tree.level_rightmost(0), Some(0)); assert_eq!(tree.level_rightmost(1), Some(11)); @@ -660,8 +661,8 @@ fn test_level_rightmost() { #[test] fn test_subtree_size() { - let bv = BitVec::from_bits(&[1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0]); - let tree = BpTree::<4>::from_bit_vector(bv); + let bv = BitVec::from_bits_u8(&[1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0]); + let tree = BpTree::<4>::from_bit_vec(bv); assert_eq!(tree.subtree_size(0), Some(9)); assert_eq!(tree.subtree_size(1), Some(2)); @@ -681,8 +682,8 @@ fn test_malformed_tree_positive() { // for further queries in a consistent state. // the tree has not enough closing brackets - let bv = BitVec::from_bits(&[1, 1, 1, 0, 1, 1, 0, 1, 1, 0]); - let tree = BpTree::<4>::from_bit_vector(bv); + let bv = BitVec::from_bits_u8(&[1, 1, 1, 0, 1, 1, 0, 1, 1, 0]); + let tree = BpTree::<4>::from_bit_vec(bv); test_all_functions(&tree); } @@ -694,8 +695,8 @@ fn test_malformed_tree_negative() { // for further queries in a consistent state. // the tree has too many closing brackets - let bv = BitVec::from_bits(&[0, 0, 1, 1, 1, 0, 0, 0, 0, 0]); - let tree = BpTree::<4>::from_bit_vector(bv); + let bv = BitVec::from_bits_u8(&[0, 0, 1, 1, 1, 0, 0, 0, 0, 0]); + let tree = BpTree::<4>::from_bit_vec(bv); test_all_functions(&tree); } @@ -706,8 +707,8 @@ fn test_negative_depth() { // most results are meaningless, but we don't want to panic and leave the data structure // for further queries in a consistent state. - let bv = BitVec::from_bits(&[0, 0, 0, 0, 1, 1, 0]); - let tree = BpTree::<4>::from_bit_vector(bv); + let bv = BitVec::from_bits_u8(&[0, 0, 0, 0, 1, 1, 0]); + let tree = BpTree::<4>::from_bit_vec(bv); assert_eq!(tree.depth(4), 0); } @@ -756,8 +757,8 @@ fn fuzz_tree_navigation() { // fuzzing the tree navigation operations on an unbalanced tree // because those are easier to generate uniformly. - const L: usize = 1 << 14; - const L_BITS: usize = L * size_of::() * 8; + const L: u64 = 1 << 14; + const L_BITS: u64 = L * size_of::() as u64 * 8; // we generate a vector using a seeded random generator and check that every query works as expected let mut rng = StdRng::from_seed([0; 32]); @@ -767,7 +768,7 @@ fn fuzz_tree_navigation() { bit_vec.append_word(rng.next_u64()); } - let tree = BpTree::<32>::from_bit_vector(bit_vec.clone()); + let tree = BpTree::<32>::from_bit_vec(bit_vec.clone()); let mut parent_stack = Vec::new(); // keep track of last sibling for each node @@ -778,6 +779,7 @@ fn fuzz_tree_navigation() { let mut sibling_count_stack = Vec::new(); tree.vec.iter().enumerate().for_each(|(idx, bit)| { + let idx = idx as u64; if bit == OPEN_PAREN { assert_eq!(tree.parent(idx), parent_stack.last().copied()); assert_eq!( @@ -831,7 +833,7 @@ fn fuzz_tree_navigation() { #[test] fn test_dfs_iterators() { - let tree = BpTree::<32>::from_bit_vector(BitVec::from_bits(&[ + let tree = BpTree::<32>::from_bit_vec(BitVec::from_bits_u8(&[ 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, ])); @@ -844,7 +846,7 @@ fn test_dfs_iterators() { #[test] fn test_subtree_iterators() { - let tree = BpTree::<4>::from_bit_vector(BitVec::from_bits(&[ + let tree = BpTree::<4>::from_bit_vec(BitVec::from_bits_u8(&[ 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, ])); @@ -875,7 +877,7 @@ fn test_subtree_iterators() { #[test] fn test_children_iterator() { - let tree = BpTree::<4>::from_bit_vector(BitVec::from_bits(&[ + let tree = BpTree::<4>::from_bit_vec(BitVec::from_bits_u8(&[ 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, ])); @@ -918,7 +920,7 @@ fn test_from_padded_bitvec() { bv.append_bit(0); bv.drop_last(1); - let tree = BpTree::<64>::from_bit_vector(bv.clone()); + let tree = BpTree::<64>::from_bit_vec(bv.clone()); assert_eq!(tree.root(), Some(0)); assert_eq!(tree.size(), 1); assert_eq!(tree.fwd_search(0, 2), None); diff --git a/src/trees/mmt.rs b/src/trees/mmt.rs index e66aa09..289383d 100644 --- a/src/trees/mmt.rs +++ b/src/trees/mmt.rs @@ -41,12 +41,16 @@ pub(crate) struct MinMaxTree { } impl MinMaxTree { - pub(crate) fn excess_tree(bit_vec: &BitVec, block_size: usize) -> Self { + pub(crate) fn excess_tree(bit_vec: &BitVec, block_size: u64) -> Self { if bit_vec.is_empty() { return Self::default(); } - let num_leaves = bit_vec.len().div_ceil(block_size); + #[allow(clippy::cast_possible_truncation)] // safe due to the division + let num_leaves = bit_vec.len().div_ceil(block_size) as usize; + #[allow(clippy::cast_possible_truncation)] // only happens if available memory already exceeded + #[allow(clippy::cast_sign_loss)] + #[allow(clippy::cast_precision_loss)] let num_internal_nodes = max(1, (1 << (num_leaves as f64).log2().ceil() as usize) - 1); let mut nodes = vec![ExcessNode::default(); num_leaves + num_internal_nodes]; @@ -56,8 +60,9 @@ impl MinMaxTree { // bottom up construction for i in 0..bit_vec.len() { + #[allow(clippy::cast_possible_truncation)] // safe due to the division if i > 0 && i % block_size == 0 { - nodes[num_internal_nodes + i / block_size - 1] = ExcessNode { + nodes[num_internal_nodes + (i / block_size) as usize - 1] = ExcessNode { total: total_excess, min: min_excess, max: max_excess, @@ -170,7 +175,7 @@ impl MinMaxTree { /// Get the index of the left sibling of the node at `index` if it exists #[allow(clippy::unused_self)] // self is used for consistency with other methods pub(crate) fn left_sibling(&self, index: NonZeroUsize) -> Option { - if index.get() % 2 == 0 { + if index.get().is_multiple_of(2) { // index is at least 2 NonZeroUsize::new(index.get() - 1) } else { @@ -447,7 +452,7 @@ mod tests { #[test] fn test_simple_excess_tree() { #[rustfmt::skip] - let bv = BitVec::from_bits(&[ + let bv = BitVec::from_bits_u8(&[ 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, @@ -505,7 +510,7 @@ mod tests { // 3 4 5 6 // /\/\/\/\ // 7 8 9 10 11 12 - - - let bv = BitVec::from_bits(&[0; 48]); + let bv = BitVec::from_bits_u8(&[0; 48]); let tree = MinMaxTree::excess_tree(&bv, 8); assert_eq!(tree.nodes.len(), 13); // 6 leaves + 7 internal nodes @@ -586,7 +591,7 @@ mod tests { #[test] fn test_simple_fwd_search() { #[rustfmt::skip] - let bv = BitVec::from_bits(&[ + let bv = BitVec::from_bits_u8(&[ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -625,7 +630,7 @@ mod tests { #[test] fn test_fwd_search_with_multiple_blocks() { #[rustfmt::skip] - let bv = BitVec::from_bits(&[ + let bv = BitVec::from_bits_u8(&[ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, @@ -651,7 +656,7 @@ mod tests { #[test] fn test_fwd_search_relative_offsets() { #[rustfmt::skip] - let bv = BitVec::from_bits(&[ + let bv = BitVec::from_bits_u8(&[ 1, 1, 1, 0, 1, 0, 1, 1, // excess 2 1, 0, 1, 0, // min excess 0, max excess 1 @@ -670,7 +675,7 @@ mod tests { #[test] fn test_simple_bwd_search() { #[rustfmt::skip] - let bv = BitVec::from_bits(&[ + let bv = BitVec::from_bits_u8(&[ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -709,7 +714,7 @@ mod tests { #[test] fn test_bwd_search_with_multiple_blocks() { #[rustfmt::skip] - let bv = BitVec::from_bits(&[ + let bv = BitVec::from_bits_u8(&[ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, @@ -735,7 +740,7 @@ mod tests { #[test] fn test_bwd_search_relative_offsets() { #[rustfmt::skip] - let bv = BitVec::from_bits(&[ + let bv = BitVec::from_bits_u8(&[ 1, 1, 1, 0, 1, 0, 1, 1, // excess 2 1, 0, 1, 0, // min excess 0, max excess 1 @@ -752,7 +757,7 @@ mod tests { #[test] fn test_incomplete_block() { #[rustfmt::skip] - let bv = BitVec::from_bits(&[ + let bv = BitVec::from_bits_u8(&[ 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 ]); @@ -772,7 +777,7 @@ mod tests { #[test] fn test_single_block() { - let bv = BitVec::from_bits(&[1, 1, 1, 1, 0, 0, 0, 0]); + let bv = BitVec::from_bits_u8(&[1, 1, 1, 1, 0, 0, 0, 0]); let tree = MinMaxTree::excess_tree(&bv, 8); @@ -782,12 +787,12 @@ mod tests { #[test] fn test_leaf_calculation() { // test small tree - let bv = BitVec::from_bits(&vec![0; 1000]); + let bv = BitVec::from_bits_u8(&vec![0; 1000]); let tree = MinMaxTree::excess_tree(&bv, 1200); assert_eq!(tree.first_leaf(), 1); // test very large tree - let bv = BitVec::from_bits(&vec![0; 1000]); + let bv = BitVec::from_bits_u8(&vec![0; 1000]); let tree = MinMaxTree::excess_tree(&bv, 4); assert_eq!(tree.first_leaf(), 255) @@ -797,7 +802,7 @@ mod tests { fn test_relative_excess() { // test a tree with 3 layers and different downwards traversals #[rustfmt::skip] - let bv = BitVec::from_bits(&[ + let bv = BitVec::from_bits_u8(&[ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, diff --git a/src/trees/mod.rs b/src/trees/mod.rs index 3e2f0eb..4faec67 100644 --- a/src/trees/mod.rs +++ b/src/trees/mod.rs @@ -41,14 +41,14 @@ pub trait Tree { /// Convert a node handle into a contiguous index, allowing associated data to be stored in a vector. /// If `node` is not a valid node handle, the result is meaningless. - fn node_index(&self, node: Self::NodeHandle) -> usize; + fn node_index(&self, node: Self::NodeHandle) -> u64; /// Convert a contiguous index that enumerates all nodes into a node handle. /// This operation is the inverse of `node_index`. /// The index must be in the range `0..self.size()`. /// /// If the index is out of bounds, the behavior is unspecified. - fn node_handle(&self, index: usize) -> Self::NodeHandle; + fn node_handle(&self, index: u64) -> Self::NodeHandle; /// Returns true if the node is a leaf. /// If `node` is not a valid node handle, the result is meaningless. @@ -63,7 +63,7 @@ pub trait Tree { fn depth(&self, node: Self::NodeHandle) -> u64; /// Returns the number of nodes in the tree. - fn size(&self) -> usize; + fn size(&self) -> u64; /// Returns true, if the tree has no nodes. fn is_empty(&self) -> bool { @@ -81,7 +81,7 @@ pub trait SubtreeSize: Tree { /// /// Returns `None` if the `node` has no closing parenthesis (in an unbalanced parenthesis /// expression). - fn subtree_size(&self, node: Self::NodeHandle) -> Option; + fn subtree_size(&self, node: Self::NodeHandle) -> Option; } /// A trait for succinct tree data structures that support [`is_ancestor`] queries. @@ -122,6 +122,10 @@ pub trait LevelTree: Tree { /// /// Once the full tree has been visited, the caller must call [`build`] to create an instance of the /// implementing tree type. +/// +/// [`enter_node`]: TreeBuilder::enter_node +/// [`leave_node`]: TreeBuilder::leave_node +/// [`build`]: TreeBuilder::build pub trait TreeBuilder { /// The tree type constructed with this interface type Tree; @@ -139,5 +143,8 @@ pub trait TreeBuilder { /// (i.e. there are nodes for which [`leave_node`] has not been called, /// or there are more calls to `leave_node` than to [`enter_node`]; /// the number of extraneous calls to `enter_node` is returned in the error). + /// + /// [`leave_node`]: Self::leave_node + /// [`enter_node`]: Self::enter_node fn build(self) -> Result; } diff --git a/src/util/elias_fano_iter.rs b/src/util/elias_fano_iter.rs index 646d3f4..cd651e4 100644 --- a/src/util/elias_fano_iter.rs +++ b/src/util/elias_fano_iter.rs @@ -12,14 +12,17 @@ macro_rules! gen_ef_iter_impl { return Ok(()); } - if Some(self.index + n - 1) > self.back_index { + if Some(self.index + n as u64 - 1) > self.back_index { if Some(self.index) > self.back_index { Err(std::num::NonZeroUsize::new(n).unwrap()) } else { - Err(std::num::NonZeroUsize::new(n - (self.back_index.as_ref().unwrap_or(&usize::MAX).wrapping_sub(self.index).wrapping_add(1))).unwrap()) + // the following is limited in size by n, and `back_index` is `None` only if the vector is + // empty, so a truncation is impossible + #[allow(clippy::cast_possible_truncation)] + Err(std::num::NonZeroUsize::new(n - (self.back_index.as_ref().unwrap_or(&u64::MAX).wrapping_sub(self.index).wrapping_add(1)) as usize).unwrap()) } } else { - self.index += n; + self.index += n as u64; if n > 0 { // since advance_by is not stable yet, we need to call nth - 1. self.upper_iter.nth(n - 1).expect("upper iterator should not be exhausted"); @@ -46,10 +49,12 @@ macro_rules! gen_ef_iter_impl { // since the cursors point to unconsumed items, we need to add 1 let remaining = *self.back_index.as_ref().unwrap() - self.index + 1; - if remaining < n { - return Err(std::num::NonZeroUsize::new(n - remaining).unwrap()); + if remaining < n as u64 { + // the following is limited in size by n, so a truncation is impossible + #[allow(clippy::cast_possible_truncation)] + return Err(std::num::NonZeroUsize::new(n - remaining as usize).unwrap()); } - self.back_index = if self.back_index >= Some(n) { self.back_index.map(|b| b - n) } else { None }; + self.back_index = if self.back_index >= Some(n as u64) { self.back_index.map(|b| b - n as u64) } else { None }; if n > 0 { // since advance_by is not stable yet, we need to call nth - 1. self.upper_iter.nth_back(n - 1).expect("upper iterator should not be exhausted"); @@ -87,6 +92,10 @@ macro_rules! gen_ef_iter_impl { /// Returns the exact number of elements that this iterator would iterate over. Does not /// call `next` internally. + /// + /// # Panics + /// If the vector contains more than `usize::MAX` elements, calling `count()` on the iterator will + /// cause it to panic. fn count(self) -> usize where Self: Sized, @@ -139,9 +148,17 @@ macro_rules! gen_ef_iter_impl { } impl $(<$life>)? std::iter::ExactSizeIterator for $name $(<$life>)? { + // the check and panic guarantees panic on truncation + #[allow(clippy::cast_possible_truncation)] fn len(&self) -> usize { + // this check is hopefully eliminated on 64-bit architectures + if (*self.back_index.as_ref().unwrap_or(&u64::MAX)).wrapping_sub(self.index).wrapping_add(1) + > usize::MAX as u64 { + panic!("calling len() on an iterator containing more than usize::MAX elements is forbidden"); + } + // intentionally overflowing calculations to avoid branches on empty iterator - (*self.back_index.as_ref().unwrap_or(&usize::MAX)).wrapping_sub(self.index).wrapping_add(1) + (*self.back_index.as_ref().unwrap_or(&u64::MAX)).wrapping_sub(self.index).wrapping_add(1) as usize } } @@ -180,19 +197,19 @@ macro_rules! impl_ef_iterator { #[doc = concat!("This struct is created by the `into_iter` trait implementation of `", stringify!($type), "`.")] #[derive(Clone, Debug)] pub struct $own { - upper_iter: crate::bit_vec::fast_rs_vec::SelectIntoIter, + upper_iter: crate::bit_vec::rs::SelectIntoIter, vec: crate::bit_vec::BitVec, - index: usize, + index: u64, // back index is none, iff it points to element -1 (i.e. element 0 has been consumed by // a call to next_back()). It can be Some(..) even if the iterator is empty - back_index: Option, - lower_len: usize, + back_index: Option, + lower_len: u64, universe_zero: u64, } impl $own { #[must_use] - fn new(vec: crate::elias_fano::EliasFanoVec) -> Self { + fn new(vec: crate::ef::EliasFanoVec) -> Self { if vec.is_empty() { return Self { upper_iter: vec.upper_vec.into_iter1(), @@ -218,6 +235,7 @@ macro_rules! impl_ef_iterator { impl EliasFanoVec { #[doc = concat!("Returns an iterator over the elements of `", stringify!($type), "`.")] + #[doc = "Note, if the iterator length exceeds `usize::MAX`, calling `len()` on it will panic ."] #[must_use] pub fn iter(&self) -> $bor<'_> { $bor::new(self) @@ -228,19 +246,19 @@ macro_rules! impl_ef_iterator { #[doc = concat!("This struct is created by the `iter` method of `", stringify!($type), "`.")] #[derive(Clone, Debug)] pub struct $bor<'a> { - upper_iter: crate::bit_vec::fast_rs_vec::SelectIter<'a, false>, + upper_iter: crate::bit_vec::rs::SelectIter<'a, false>, vec: &'a crate::bit_vec::BitVec, - index: usize, + index: u64, // back index is none, iff it points to element -1 (i.e. element 0 has been consumed by // a call to next_back()). It can be Some(..) even if the iterator is empty - back_index: Option, - lower_len: usize, + back_index: Option, + lower_len: u64, universe_zero: u64, } impl<'a> $bor<'a> { #[must_use] - fn new(vec: &'a crate::elias_fano::EliasFanoVec) -> Self { + fn new(vec: &'a crate::ef::EliasFanoVec) -> Self { if vec.is_empty() { return Self { upper_iter: vec.upper_vec.iter1(), diff --git a/src/util/general_iter.rs b/src/util/general_iter.rs index afc4c73..37dca0c 100644 --- a/src/util/general_iter.rs +++ b/src/util/general_iter.rs @@ -32,14 +32,17 @@ macro_rules! gen_vector_iter_impl { return Ok(()); } - if Some(self.index + n - 1) > self.back_index { + if Some(self.index + n as u64 - 1) > self.back_index { if Some(self.index) > self.back_index { Err(std::num::NonZeroUsize::new(n).unwrap()) } else { - Err(std::num::NonZeroUsize::new(n - (self.back_index.as_ref().unwrap_or(&usize::MAX).wrapping_sub(self.index).wrapping_add(1))).unwrap()) + // the following is limited in size by n, and `back_index` is `None` only if the vector is + // empty, so a truncation is impossible + #[allow(clippy::cast_possible_truncation)] + Err(std::num::NonZeroUsize::new(n - (self.back_index.as_ref().unwrap_or(&u64::MAX).wrapping_sub(self.index).wrapping_add(1)) as usize).unwrap()) } } else { - self.index += n; + self.index += n as u64; Ok(()) } } @@ -62,10 +65,12 @@ macro_rules! gen_vector_iter_impl { // since the cursors point to unconsumed items, we need to add 1 let remaining = *self.back_index.as_ref().unwrap() - self.index + 1; - if remaining < n { - return Err(std::num::NonZeroUsize::new(n - remaining).unwrap()); + if remaining < n as u64 { + // the following is limited in size by n, so a truncation is impossible + #[allow(clippy::cast_possible_truncation)] + return Err(std::num::NonZeroUsize::new(n - remaining as usize).unwrap()); } - self.back_index = if self.back_index >= Some(n) { self.back_index.map(|b| b - n) } else { None }; + self.back_index = if self.back_index >= Some(n as u64) { self.back_index.map(|b| b - n as u64) } else { None }; Ok(()) } @@ -96,6 +101,10 @@ macro_rules! gen_vector_iter_impl { /// Returns the exact number of elements that this iterator would iterate over. Does not /// call `next` internally. + /// + /// # Panics + /// If the vector contains more than `usize::MAX` elements, calling `count()` on the iterator will + /// cause it to panic. fn count(self) -> usize where Self: Sized, @@ -124,9 +133,17 @@ macro_rules! gen_vector_iter_impl { } impl $(<$life>)? std::iter::ExactSizeIterator for $name $(<$life>)? { + // the check and panic guarantees panic on truncation + #[allow(clippy::cast_possible_truncation)] fn len(&self) -> usize { + // this check is hopefully eliminated on 64-bit architectures + if (self.back_index.as_ref().unwrap_or(&u64::MAX)).wrapping_sub(self.index).wrapping_add(1) + > usize::MAX as u64 { + panic!("calling len() on an iterator containing more than usize::MAX elements is forbidden"); + } + // intentionally overflowing calculations to avoid branches on empty iterator - (*self.back_index.as_ref().unwrap_or(&usize::MAX)).wrapping_sub(self.index).wrapping_add(1) + (*self.back_index.as_ref().unwrap_or(&u64::MAX)).wrapping_sub(self.index).wrapping_add(1) as usize } } @@ -236,20 +253,20 @@ macro_rules! impl_vector_iterator { #[derive(Clone, Debug)] pub struct $own { vec: $type, - index: usize, + index: u64, // back index is none, iff it points to element -1 (i.e. element 0 has been consumed by // a call to next_back()). It can be Some(..) even if the iterator is empty - back_index: Option, + back_index: Option, } #[doc = concat!("A borrowing iterator for `", stringify!($type), "`.")] #[derive(Clone, Debug)] pub struct $bor<'a> { vec: &'a $type, - index: usize, + index: u64, // back index is none, iff it points to element -1 (i.e. element 0 has been consumed by // a call to next_back()). It can be Some(..) even if the iterator is empty - back_index: Option, + back_index: Option, } crate::util::gen_vector_iter_impl!($own, $type, $return_type, $get_unchecked, $get); @@ -262,6 +279,8 @@ macro_rules! impl_vector_iterator { impl $type { #[doc = concat!("Returns an iterator over the elements of `", stringify!($type), "`.")] #[doc = concat!("The iterator returns `", stringify!($return_type), "` elements.")] + #[doc = "Note, if the iterator element type is larger than usize, calling `len()` on the \ + iterator will panic if the iterator length exceeds `usize::MAX`."] #[must_use] pub fn iter(&self) -> $bor<'_> { $bor::new(self) diff --git a/src/wavelet/mod.rs b/src/wavelet/mod.rs index 3d08602..16e13c2 100644 --- a/src/wavelet/mod.rs +++ b/src/wavelet/mod.rs @@ -62,6 +62,10 @@ use std::ops::Range; /// ``` /// /// [`RsVec`]: RsVec +/// [`from_bit_vec`]: WaveletMatrix::from_bit_vec +/// [`from_slice`]: WaveletMatrix::from_slice +/// [`from_bit_vec_pc`]: WaveletMatrix::from_bit_vec_pc +/// [`from_slice_pc`]: WaveletMatrix::from_slice_pc #[derive(Clone, Debug)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct WaveletMatrix { @@ -77,28 +81,31 @@ impl WaveletMatrix { /// - `num_elements`: The number of elements in the sequence. /// - `bit_lookup`: A closure that returns the `bit`-th bit of the `element`-th word. #[inline(always)] // should get rid of closures in favor of static calls - fn permutation_sorting u64>( + fn permutation_sorting u64>( bits_per_element: u16, - num_elements: usize, + num_elements: u64, bit_lookup: LOOKUP, ) -> Self { - let element_len = bits_per_element as usize; + let element_len = bits_per_element as u64; - let mut data = vec![BitVec::from_zeros(num_elements); element_len]; + #[allow(clippy::cast_possible_truncation)] + let mut data = vec![BitVec::from_zeros(num_elements); element_len as usize]; // insert the first bit of each word into the first bit vector // for each following level, insert the next bit of each word into the next bit vector // sorted stably by the previous bit vector let mut permutation = (0..num_elements).collect::>(); - let mut next_permutation = vec![0; num_elements]; + #[allow(clippy::cast_possible_truncation)] + let mut next_permutation = vec![0; num_elements as usize]; for (level, data) in data.iter_mut().enumerate() { + let level = level as u64; let mut total_zeros = 0; for (i, p) in permutation.iter().enumerate() { if bit_lookup(*p, element_len - level - 1) == 0 { total_zeros += 1; } else { - data.set(i, 1).unwrap(); + data.set(i as u64, 1).unwrap(); } } @@ -108,7 +115,7 @@ impl WaveletMatrix { let mut zero_boundary = 0; let mut one_boundary = total_zeros; for (i, p) in permutation.iter().enumerate() { - if data.get_unchecked(i) == 0 { + if data.get_unchecked(i as u64) == 0 { next_permutation[zero_boundary] = *p; zero_boundary += 1; } else { @@ -139,10 +146,10 @@ impl WaveletMatrix { /// Panics if the number of bits in the bit vector is not a multiple of the number of bits per element. #[must_use] pub fn from_bit_vec(bit_vec: &BitVec, bits_per_element: u16) -> Self { - assert_eq!(bit_vec.len() % bits_per_element as usize, 0, "The number of bits in the bit vector must be a multiple of the number of bits per element."); - let num_elements = bit_vec.len() / bits_per_element as usize; + assert_eq!(bit_vec.len() % bits_per_element as u64, 0, "The number of bits in the bit vector must be a multiple of the number of bits per element."); + let num_elements = bit_vec.len() / bits_per_element as u64; Self::permutation_sorting(bits_per_element, num_elements, |element, bit| { - bit_vec.get_unchecked(element * bits_per_element as usize + bit) + bit_vec.get_unchecked(element * bits_per_element as u64 + bit) }) } @@ -161,8 +168,10 @@ impl WaveletMatrix { bits_per_element <= 64, "The number of bits per element cannot exceed 64." ); - Self::permutation_sorting(bits_per_element, sequence.len(), |element, bit| { - (sequence[element] >> bit) & 1 + #[allow(clippy::cast_possible_truncation)] + // safe because the closure is only called with indices of `sequence` + Self::permutation_sorting(bits_per_element, sequence.len() as u64, |element, bit| { + (sequence[element as usize] >> bit) & 1 }) } @@ -176,17 +185,19 @@ impl WaveletMatrix { /// - `bit_lookup`: A closure that returns the `bit`-th bit of the `element`-th word. /// - `element_lookup`: A closure that returns the `element`-th word. #[inline(always)] // should get rid of closures in favor of static calls - fn prefix_counting u64, ELEMENT: Fn(usize) -> u64>( + fn prefix_counting u64, ELEMENT: Fn(u64) -> u64>( bits_per_element: u16, - num_elements: usize, + num_elements: u64, bit_lookup: LOOKUP, element_lookup: ELEMENT, ) -> Self { - let element_len = bits_per_element as usize; - let mut histogram = vec![0usize; 1 << bits_per_element]; - let mut borders = vec![0usize; 1 << bits_per_element]; - let mut data = vec![BitVec::from_zeros(num_elements); element_len]; + let element_len = bits_per_element as u64; + let mut histogram = vec![0u64; 1 << bits_per_element]; + let mut borders = vec![0u64; 1 << bits_per_element]; + #[allow(clippy::cast_possible_truncation)] + let mut data = vec![BitVec::from_zeros(num_elements); element_len as usize]; + #[allow(clippy::cast_possible_truncation)] // element_lookup only returns small values for i in 0..num_elements { histogram[element_lookup(i) as usize] += 1; data[0].set_unchecked(i, bit_lookup(i, element_len - 1)); @@ -207,9 +218,10 @@ impl WaveletMatrix { borders[h_minus_1] + histogram[h_minus_1]; } + #[allow(clippy::cast_possible_truncation)] // element_lookup only returns small values for i in 0..num_elements { let bit = bit_lookup(i, element_len - level - 1); - data[level].set_unchecked( + data[level as usize].set_unchecked( borders[element_lookup(i) as usize >> (element_len - level)], bit, ); @@ -242,21 +254,19 @@ impl WaveletMatrix { /// [`from_slice`]: WaveletMatrix::from_slice #[must_use] pub fn from_bit_vec_pc(bit_vec: &BitVec, bits_per_element: u16) -> Self { - assert_eq!(bit_vec.len() % bits_per_element as usize, 0, "The number of bits in the bit vector must be a multiple of the number of bits per element."); + assert_eq!(bit_vec.len() % bits_per_element as u64, 0, "The number of bits in the bit vector must be a multiple of the number of bits per element."); assert!( bits_per_element <= 64, "The number of bits per element cannot exceed 64." ); - let num_elements = bit_vec.len() / bits_per_element as usize; + let num_elements = bit_vec.len() / bits_per_element as u64; Self::prefix_counting( bits_per_element, num_elements, - |element, bit| bit_vec.get_unchecked(element * bits_per_element as usize + bit), + |element, bit| bit_vec.get_unchecked(element * bits_per_element as u64 + bit), |element| { - bit_vec.get_bits_unchecked( - element * bits_per_element as usize, - bits_per_element as usize, - ) + bit_vec + .get_bits_unchecked(element * bits_per_element as u64, bits_per_element as u64) }, ) } @@ -282,18 +292,20 @@ impl WaveletMatrix { bits_per_element <= 64, "The number of bits per element cannot exceed 64." ); + #[allow(clippy::cast_possible_truncation)] + // safe because the closures are called only with indices of `sequence` Self::prefix_counting( bits_per_element, - sequence.len(), - |element, bit| (sequence[element] >> bit) & 1, - |element| sequence[element], + sequence.len() as u64, + |element, bit| (sequence[element as usize] >> bit) & 1, + |element| sequence[element as usize], ) } /// Generic function to read a value from the wavelet matrix and consume it with a closure. /// The function is used by the `get_value` and `get_u64` functions, deduplicating code. #[inline(always)] - fn reconstruct_value_unchecked(&self, mut i: usize, mut target_func: F) { + fn reconstruct_value_unchecked(&self, mut i: u64, mut target_func: F) { for level in 0..self.bits_per_element() { let bit = self.data[level].get_unchecked(i); target_func(bit); @@ -323,7 +335,7 @@ impl WaveletMatrix { /// assert_eq!(wavelet_matrix.get_value(100), None); /// ``` #[must_use] - pub fn get_value(&self, i: usize) -> Option { + pub fn get_value(&self, i: u64) -> Option { if self.data.is_empty() || i >= self.data[0].len() { None } else { @@ -342,11 +354,11 @@ impl WaveletMatrix { /// /// [`get_value`]: WaveletMatrix::get_value #[must_use] - pub fn get_value_unchecked(&self, i: usize) -> BitVec { - let mut value = BitVec::from_zeros(self.bits_per_element()); + pub fn get_value_unchecked(&self, i: u64) -> BitVec { + let mut value = BitVec::from_zeros(self.bits_per_element() as u64); let mut level = self.bits_per_element() - 1; self.reconstruct_value_unchecked(i, |bit| { - value.set_unchecked(level, bit); + value.set_unchecked(level as u64, bit); level = level.saturating_sub(1); }); value @@ -369,7 +381,7 @@ impl WaveletMatrix { /// assert_eq!(wavelet_matrix.get_u64(100), None); /// ``` #[must_use] - pub fn get_u64(&self, i: usize) -> Option { + pub fn get_u64(&self, i: u64) -> Option { if self.bits_per_element() > 64 || self.data.is_empty() || i >= self.data[0].len() { None } else { @@ -388,7 +400,7 @@ impl WaveletMatrix { /// /// [`get_u64`]: WaveletMatrix::get_u64 #[must_use] - pub fn get_u64_unchecked(&self, i: usize) -> u64 { + pub fn get_u64_unchecked(&self, i: u64) -> u64 { let mut value = 0; self.reconstruct_value_unchecked(i, |bit| { value <<= 1; @@ -414,9 +426,9 @@ impl WaveletMatrix { /// [`BitVec`]: BitVec /// [`rank_range`]: WaveletMatrix::rank_range #[must_use] - pub fn rank_range_unchecked(&self, mut range: Range, symbol: &BitVec) -> usize { + pub fn rank_range_unchecked(&self, mut range: Range, symbol: &BitVec) -> u64 { for (level, data) in self.data.iter().enumerate() { - if symbol.get_unchecked((self.bits_per_element() - 1) - level) == 0 { + if symbol.get_unchecked(((self.bits_per_element() - 1) - level) as u64) == 0 { range.start = data.rank0(range.start); range.end = data.rank0(range.end); } else { @@ -450,10 +462,10 @@ impl WaveletMatrix { /// /// [`BitVec`]: BitVec #[must_use] - pub fn rank_range(&self, range: Range, symbol: &BitVec) -> Option { + pub fn rank_range(&self, range: Range, symbol: &BitVec) -> Option { if range.start >= self.len() || range.end > self.len() - || symbol.len() != self.bits_per_element() + || symbol.len() != self.bits_per_element() as u64 { None } else { @@ -478,7 +490,7 @@ impl WaveletMatrix { /// /// [`rank_range_u64`]: WaveletMatrix::rank_range_u64 #[must_use] - pub fn rank_range_u64_unchecked(&self, mut range: Range, symbol: u64) -> usize { + pub fn rank_range_u64_unchecked(&self, mut range: Range, symbol: u64) -> u64 { for (level, data) in self.data.iter().enumerate() { if (symbol >> ((self.bits_per_element() - 1) - level)) & 1 == 0 { range.start = data.rank0(range.start); @@ -512,7 +524,7 @@ impl WaveletMatrix { /// assert_eq!(wavelet_matrix.rank_range_u64(2..4, 4), Some(1)); /// ``` #[must_use] - pub fn rank_range_u64(&self, range: Range, symbol: u64) -> Option { + pub fn rank_range_u64(&self, range: Range, symbol: u64) -> Option { if range.start >= self.len() || range.end > self.len() || self.bits_per_element() > 64 { None } else { @@ -543,7 +555,7 @@ impl WaveletMatrix { /// [`BitVec`]: BitVec /// [`rank_offset`]: WaveletMatrix::rank_offset #[must_use] - pub fn rank_offset_unchecked(&self, offset: usize, i: usize, symbol: &BitVec) -> usize { + pub fn rank_offset_unchecked(&self, offset: u64, i: u64, symbol: &BitVec) -> u64 { self.rank_range_unchecked(offset..i, symbol) } @@ -577,11 +589,11 @@ impl WaveletMatrix { /// /// [`BitVec`]: BitVec #[must_use] - pub fn rank_offset(&self, offset: usize, i: usize, symbol: &BitVec) -> Option { + pub fn rank_offset(&self, offset: u64, i: u64, symbol: &BitVec) -> Option { if offset > i || offset >= self.len() || i > self.len() - || symbol.len() != self.bits_per_element() + || symbol.len() != self.bits_per_element() as u64 { None } else { @@ -610,7 +622,7 @@ impl WaveletMatrix { /// /// [`rank_offset_u64`]: WaveletMatrix::rank_offset_u64 #[must_use] - pub fn rank_offset_u64_unchecked(&self, offset: usize, i: usize, symbol: u64) -> usize { + pub fn rank_offset_u64_unchecked(&self, offset: u64, i: u64, symbol: u64) -> u64 { self.rank_range_u64_unchecked(offset..i, symbol) } @@ -640,7 +652,7 @@ impl WaveletMatrix { /// assert_eq!(wavelet_matrix.rank_offset_u64(2, 4, 4), Some(1)); /// ``` #[must_use] - pub fn rank_offset_u64(&self, offset: usize, i: usize, symbol: u64) -> Option { + pub fn rank_offset_u64(&self, offset: u64, i: u64, symbol: u64) -> Option { if offset > i || offset >= self.len() || i > self.len() || self.bits_per_element() > 64 { None } else { @@ -666,7 +678,7 @@ impl WaveletMatrix { /// [`BitVec`]: BitVec /// [`rank`]: WaveletMatrix::rank #[must_use] - pub fn rank_unchecked(&self, i: usize, symbol: &BitVec) -> usize { + pub fn rank_unchecked(&self, i: u64, symbol: &BitVec) -> u64 { self.rank_range_unchecked(0..i, symbol) } @@ -693,8 +705,8 @@ impl WaveletMatrix { /// /// [`BitVec`]: BitVec #[must_use] - pub fn rank(&self, i: usize, symbol: &BitVec) -> Option { - if i > self.len() || symbol.len() != self.bits_per_element() { + pub fn rank(&self, i: u64, symbol: &BitVec) -> Option { + if i > self.len() || symbol.len() != self.bits_per_element() as u64 { None } else { Some(self.rank_range_unchecked(0..i, symbol)) @@ -717,7 +729,7 @@ impl WaveletMatrix { /// /// [`rank_u64`]: WaveletMatrix::rank_u64 #[must_use] - pub fn rank_u64_unchecked(&self, i: usize, symbol: u64) -> usize { + pub fn rank_u64_unchecked(&self, i: u64, symbol: u64) -> u64 { self.rank_range_u64_unchecked(0..i, symbol) } @@ -741,7 +753,7 @@ impl WaveletMatrix { /// assert_eq!(wavelet_matrix.rank_u64(3, 1), Some(1)); /// ``` #[must_use] - pub fn rank_u64(&self, i: usize, symbol: u64) -> Option { + pub fn rank_u64(&self, i: u64, symbol: u64) -> Option { if i > self.len() || self.bits_per_element() > 64 { None } else { @@ -770,11 +782,11 @@ impl WaveletMatrix { /// [`BitVec`]: BitVec /// [`select_offset`]: WaveletMatrix::select_offset #[must_use] - pub fn select_offset_unchecked(&self, offset: usize, rank: usize, symbol: &BitVec) -> usize { + pub fn select_offset_unchecked(&self, offset: u64, rank: u64, symbol: &BitVec) -> u64 { let mut range_start = offset; for (level, data) in self.data.iter().enumerate() { - if symbol.get_unchecked((self.bits_per_element() - 1) - level) == 0 { + if symbol.get_unchecked(((self.bits_per_element() - 1) - level) as u64) == 0 { range_start = data.rank0(range_start); } else { range_start = data.rank0 + data.rank1(range_start); @@ -784,7 +796,7 @@ impl WaveletMatrix { let mut range_end = range_start + rank; for (level, data) in self.data.iter().enumerate().rev() { - if symbol.get_unchecked((self.bits_per_element() - 1) - level) == 0 { + if symbol.get_unchecked(((self.bits_per_element() - 1) - level) as u64) == 0 { range_end = data.select0(range_end); } else { range_end = data.select1(range_end - data.rank0); @@ -818,8 +830,8 @@ impl WaveletMatrix { /// /// [`BitVec`]: BitVec #[must_use] - pub fn select_offset(&self, offset: usize, rank: usize, symbol: &BitVec) -> Option { - if offset >= self.len() || symbol.len() != self.bits_per_element() { + pub fn select_offset(&self, offset: u64, rank: u64, symbol: &BitVec) -> Option { + if offset >= self.len() || symbol.len() != self.bits_per_element() as u64 { None } else { let idx = self.select_offset_unchecked(offset, rank, symbol); @@ -850,7 +862,7 @@ impl WaveletMatrix { /// /// [`select_offset_u64`]: WaveletMatrix::select_offset_u64 #[must_use] - pub fn select_offset_u64_unchecked(&self, offset: usize, rank: usize, symbol: u64) -> usize { + pub fn select_offset_u64_unchecked(&self, offset: u64, rank: u64, symbol: u64) -> u64 { let mut range_start = offset; for (level, data) in self.data.iter().enumerate() { @@ -895,7 +907,7 @@ impl WaveletMatrix { /// assert_eq!(wavelet_matrix.select_offset_u64(2, 1, 4), None); /// ``` #[must_use] - pub fn select_offset_u64(&self, offset: usize, rank: usize, symbol: u64) -> Option { + pub fn select_offset_u64(&self, offset: u64, rank: u64, symbol: u64) -> Option { if offset >= self.len() || self.bits_per_element() > 64 { None } else { @@ -927,7 +939,7 @@ impl WaveletMatrix { /// [`BitVec`]: BitVec /// [`select`]: WaveletMatrix::select #[must_use] - pub fn select_unchecked(&self, rank: usize, symbol: &BitVec) -> usize { + pub fn select_unchecked(&self, rank: u64, symbol: &BitVec) -> u64 { self.select_offset_unchecked(0, rank, symbol) } @@ -952,8 +964,8 @@ impl WaveletMatrix { /// /// [`BitVec`]: BitVec #[must_use] - pub fn select(&self, rank: usize, symbol: &BitVec) -> Option { - if symbol.len() == self.bits_per_element() { + pub fn select(&self, rank: u64, symbol: &BitVec) -> Option { + if symbol.len() == self.bits_per_element() as u64 { let idx = self.select_unchecked(rank, symbol); if idx < self.len() { Some(idx) @@ -982,7 +994,7 @@ impl WaveletMatrix { /// /// [`select_u64`]: WaveletMatrix::select_u64 #[must_use] - pub fn select_u64_unchecked(&self, rank: usize, symbol: u64) -> usize { + pub fn select_u64_unchecked(&self, rank: u64, symbol: u64) -> u64 { self.select_offset_u64_unchecked(0, rank, symbol) } @@ -1004,7 +1016,7 @@ impl WaveletMatrix { /// assert_eq!(wavelet_matrix.select_u64(1, 4), Some(2)); /// ``` #[must_use] - pub fn select_u64(&self, rank: usize, symbol: u64) -> Option { + pub fn select_u64(&self, rank: u64, symbol: u64) -> Option { if self.bits_per_element() > 64 { None } else { @@ -1032,8 +1044,8 @@ impl WaveletMatrix { /// /// [`quantile`]: WaveletMatrix::quantile #[must_use] - pub fn quantile_unchecked(&self, range: Range, k: usize) -> BitVec { - let result = BitVec::from_zeros(self.bits_per_element()); + pub fn quantile_unchecked(&self, range: Range, k: u64) -> BitVec { + let result = BitVec::from_zeros(self.bits_per_element() as u64); self.partial_quantile_search_unchecked(range, k, 0, result) } @@ -1046,12 +1058,12 @@ impl WaveletMatrix { #[inline(always)] fn partial_quantile_search_unchecked( &self, - mut range: Range, - mut k: usize, + mut range: Range, + mut k: u64, start_level: usize, mut prefix: BitVec, ) -> BitVec { - debug_assert!(prefix.len() == self.bits_per_element()); + debug_assert!(prefix.len() == self.bits_per_element() as u64); debug_assert!(!range.is_empty()); debug_assert!(range.end <= self.len()); @@ -1067,7 +1079,7 @@ impl WaveletMatrix { } else { // the element is among the ones, so we set the bit to 1, and move the range // into the 1-partition of the next level - prefix.set_unchecked((self.bits_per_element() - 1) - level, 1); + prefix.set_unchecked(((self.bits_per_element() - 1) - level) as u64, 1); k -= zeros; range.start = data.rank0 + (range.start - zeros_start); // range.start - zeros_start is the rank1 of range.start range.end = data.rank0 + (range.end - zeros_end); // same here @@ -1080,7 +1092,7 @@ impl WaveletMatrix { /// Get the `k`-th smallest element in the encoded sequence in the specified `range`, /// where `k = 0` returns the smallest element. /// The `range` is a half-open interval, meaning that the `end` index is exclusive. - /// The `k`-th smallest element is returned as a `BitVec`, + /// The `k`-th smallest element is returned as a [`BitVec`], /// where the least significant bit is the first element. /// /// Returns `None` if the `range` is out of bounds, or if `k` is greater than the size of the range. @@ -1097,7 +1109,7 @@ impl WaveletMatrix { /// assert_eq!(wavelet_matrix.quantile(1..4, 0), Some(BitVec::pack_sequence_u8(&[1], 3))); /// ``` #[must_use] - pub fn quantile(&self, range: Range, k: usize) -> Option { + pub fn quantile(&self, range: Range, k: u64) -> Option { if range.start >= self.len() || range.end > self.len() || k >= range.end - range.start { None } else { @@ -1114,8 +1126,10 @@ impl WaveletMatrix { /// /// # Panics /// May panic if the `i` is out of bounds, or returns an empty bit vector. + /// + /// [`get_sorted`]: Self::get_sorted #[must_use] - pub fn get_sorted_unchecked(&self, i: usize) -> BitVec { + pub fn get_sorted_unchecked(&self, i: u64) -> BitVec { self.quantile_unchecked(0..self.len(), i) } @@ -1138,7 +1152,7 @@ impl WaveletMatrix { /// assert_eq!(wavelet_matrix.get_sorted(2), Some(BitVec::pack_sequence_u8(&[2], 3))); /// ``` #[must_use] - pub fn get_sorted(&self, i: usize) -> Option { + pub fn get_sorted(&self, i: u64) -> Option { if i >= self.len() { None } else { @@ -1162,7 +1176,7 @@ impl WaveletMatrix { /// /// [`quantile_u64`]: WaveletMatrix::quantile_u64 #[must_use] - pub fn quantile_u64_unchecked(&self, range: Range, k: usize) -> u64 { + pub fn quantile_u64_unchecked(&self, range: Range, k: u64) -> u64 { self.partial_quantile_search_u64_unchecked(range, k, 0, 0) } @@ -1175,8 +1189,8 @@ impl WaveletMatrix { #[inline(always)] fn partial_quantile_search_u64_unchecked( &self, - mut range: Range, - mut k: usize, + mut range: Range, + mut k: u64, start_level: usize, mut prefix: u64, ) -> u64 { @@ -1224,7 +1238,7 @@ impl WaveletMatrix { /// assert_eq!(wavelet_matrix.quantile_u64(1..4, 0), Some(1)); /// ``` #[must_use] - pub fn quantile_u64(&self, range: Range, k: usize) -> Option { + pub fn quantile_u64(&self, range: Range, k: u64) -> Option { if range.start >= self.len() || range.end > self.len() || self.bits_per_element() > 64 @@ -1249,7 +1263,7 @@ impl WaveletMatrix { /// /// [`get_sorted_u64`]: WaveletMatrix::get_sorted_u64 #[must_use] - pub fn get_sorted_u64_unchecked(&self, i: usize) -> u64 { + pub fn get_sorted_u64_unchecked(&self, i: u64) -> u64 { self.quantile_u64_unchecked(0..self.len(), i) } @@ -1270,7 +1284,7 @@ impl WaveletMatrix { /// assert_eq!(wavelet_matrix.get_sorted_u64(2), Some(2)); /// ``` #[must_use] - pub fn get_sorted_u64(&self, i: usize) -> Option { + pub fn get_sorted_u64(&self, i: u64) -> Option { if i >= self.len() || self.bits_per_element() > 64 { None } else { @@ -1291,7 +1305,7 @@ impl WaveletMatrix { /// /// [`range_min`]: WaveletMatrix::range_min #[must_use] - pub fn range_min_unchecked(&self, range: Range) -> BitVec { + pub fn range_min_unchecked(&self, range: Range) -> BitVec { self.quantile_unchecked(range, 0) } @@ -1313,7 +1327,7 @@ impl WaveletMatrix { /// assert_eq!(wavelet_matrix.range_min(1..3), Some(BitVec::pack_sequence_u8(&[4], 3))); /// ``` #[must_use] - pub fn range_min(&self, range: Range) -> Option { + pub fn range_min(&self, range: Range) -> Option { self.quantile(range, 0) } @@ -1331,7 +1345,7 @@ impl WaveletMatrix { /// /// [`range_min_u64`]: WaveletMatrix::range_min_u64 #[must_use] - pub fn range_min_u64_unchecked(&self, range: Range) -> u64 { + pub fn range_min_u64_unchecked(&self, range: Range) -> u64 { self.quantile_u64_unchecked(range, 0) } @@ -1354,7 +1368,7 @@ impl WaveletMatrix { /// assert_eq!(wavelet_matrix.range_min_u64(1..3), Some(4)); /// ``` #[must_use] - pub fn range_min_u64(&self, range: Range) -> Option { + pub fn range_min_u64(&self, range: Range) -> Option { self.quantile_u64(range, 0) } @@ -1372,7 +1386,7 @@ impl WaveletMatrix { /// /// [`range_max`]: WaveletMatrix::range_max #[must_use] - pub fn range_max_unchecked(&self, range: Range) -> BitVec { + pub fn range_max_unchecked(&self, range: Range) -> BitVec { let k = range.end - range.start - 1; self.quantile_unchecked(range, k) } @@ -1395,7 +1409,7 @@ impl WaveletMatrix { /// assert_eq!(wavelet_matrix.range_max(3..6), Some(BitVec::pack_sequence_u8(&[7], 3))); /// ``` #[must_use] - pub fn range_max(&self, range: Range) -> Option { + pub fn range_max(&self, range: Range) -> Option { if range.is_empty() { None } else { @@ -1418,7 +1432,7 @@ impl WaveletMatrix { /// /// [`range_max_u64`]: WaveletMatrix::range_max_u64 #[must_use] - pub fn range_max_u64_unchecked(&self, range: Range) -> u64 { + pub fn range_max_u64_unchecked(&self, range: Range) -> u64 { let k = range.end - range.start - 1; self.quantile_u64_unchecked(range, k) } @@ -1441,7 +1455,7 @@ impl WaveletMatrix { /// assert_eq!(wavelet_matrix.range_max_u64(3..6), Some(7)); /// ``` #[must_use] - pub fn range_max_u64(&self, range: Range) -> Option { + pub fn range_max_u64(&self, range: Range) -> Option { if range.is_empty() { None } else { @@ -1466,7 +1480,7 @@ impl WaveletMatrix { /// /// [`range_median`]: WaveletMatrix::range_median #[must_use] - pub fn range_median_unchecked(&self, range: Range) -> BitVec { + pub fn range_median_unchecked(&self, range: Range) -> BitVec { let k = (range.end - 1 - range.start) / 2; self.quantile_unchecked(range, k) } @@ -1492,7 +1506,7 @@ impl WaveletMatrix { /// assert_eq!(wavelet_matrix.range_median(0..6), Some(BitVec::pack_sequence_u8(&[2], 3))); /// ``` #[must_use] - pub fn range_median(&self, range: Range) -> Option { + pub fn range_median(&self, range: Range) -> Option { if range.is_empty() { None } else { @@ -1517,7 +1531,7 @@ impl WaveletMatrix { /// /// [`range_median_u64`]: WaveletMatrix::range_median_u64 #[must_use] - pub fn range_median_u64_unchecked(&self, range: Range) -> u64 { + pub fn range_median_u64_unchecked(&self, range: Range) -> u64 { let k = (range.end - 1 - range.start) / 2; self.quantile_u64_unchecked(range, k) } @@ -1543,7 +1557,7 @@ impl WaveletMatrix { /// assert_eq!(wavelet_matrix.range_median_u64(0..6), Some(2)); /// ``` #[must_use] - pub fn range_median_u64(&self, range: Range) -> Option { + pub fn range_median_u64(&self, range: Range) -> Option { if range.is_empty() || self.bits_per_element() > 64 || range.end > self.len() { None } else { @@ -1561,10 +1575,10 @@ impl WaveletMatrix { T: Clone, Reader: Fn(usize, &T) -> u64, Writer: Fn(u64, usize, &mut T), - Quantile: Fn(&Self, Range, usize, usize, T) -> T, + Quantile: Fn(&Self, Range, u64, usize, T) -> T, >( &self, - mut range: Range, + mut range: Range, symbol: &T, mut result_value: T, bit_reader: Reader, @@ -1577,7 +1591,7 @@ impl WaveletMatrix { // the level of the last node where we could go to an interval with smaller elements let mut last_one_level: Option = None; // the range of the last node where we could go to an interval with smaller elements - let mut next_smaller_range: Option> = None; + let mut next_smaller_range: Option> = None; for (level, data) in self.data.iter().enumerate() { let query_bit = bit_reader(level, symbol); @@ -1672,8 +1686,8 @@ impl WaveletMatrix { /// /// [`BitVec`]: BitVec #[must_use] - pub fn predecessor(&self, range: Range, symbol: &BitVec) -> Option { - if symbol.len() != self.bits_per_element() + pub fn predecessor(&self, range: Range, symbol: &BitVec) -> Option { + if symbol.len() != self.bits_per_element() as u64 || range.is_empty() || self.is_empty() || range.end > self.len() @@ -1684,10 +1698,10 @@ impl WaveletMatrix { self.predecessor_generic_unchecked( range, symbol, - BitVec::from_zeros(self.bits_per_element()), - |level, symbol| symbol.get_unchecked((self.bits_per_element() - 1) - level), + BitVec::from_zeros(self.bits_per_element() as u64), + |level, symbol| symbol.get_unchecked(((self.bits_per_element() - 1) - level) as u64), |bit, level, result| { - result.set_unchecked((self.bits_per_element() - 1) - level, bit); + result.set_unchecked(((self.bits_per_element() - 1) - level) as u64, bit); }, Self::partial_quantile_search_unchecked, ) @@ -1716,7 +1730,7 @@ impl WaveletMatrix { /// assert_eq!(wavelet_matrix.predecessor_u64(0..6, 7), Some(7)); /// ``` #[must_use] - pub fn predecessor_u64(&self, range: Range, symbol: u64) -> Option { + pub fn predecessor_u64(&self, range: Range, symbol: u64) -> Option { if self.bits_per_element() > 64 || range.is_empty() || self.is_empty() @@ -1745,10 +1759,10 @@ impl WaveletMatrix { T: Clone, Reader: Fn(usize, &T) -> u64, Writer: Fn(u64, usize, &mut T), - Quantile: Fn(&Self, Range, usize, usize, T) -> T, + Quantile: Fn(&Self, Range, u64, usize, T) -> T, >( &self, - mut range: Range, + mut range: Range, symbol: &T, mut result_value: T, bit_reader: Reader, @@ -1761,7 +1775,7 @@ impl WaveletMatrix { // the level of the last node where we could go to an interval with larger elements let mut last_zero_level: Option = None; // the range of the last node where we could go to an interval with larger elements - let mut next_larger_range: Option> = None; + let mut next_larger_range: Option> = None; for (level, data) in self.data.iter().enumerate() { let query_bit = bit_reader(level, symbol); @@ -1859,8 +1873,8 @@ impl WaveletMatrix { /// /// [`BitVec`]: BitVec #[must_use] - pub fn successor(&self, range: Range, symbol: &BitVec) -> Option { - if symbol.len() != self.bits_per_element() + pub fn successor(&self, range: Range, symbol: &BitVec) -> Option { + if symbol.len() != self.bits_per_element() as u64 || range.is_empty() || self.is_empty() || range.end > self.len() @@ -1871,10 +1885,10 @@ impl WaveletMatrix { self.successor_generic_unchecked( range, symbol, - BitVec::from_zeros(self.bits_per_element()), - |level, symbol| symbol.get_unchecked((self.bits_per_element() - 1) - level), + BitVec::from_zeros(self.bits_per_element() as u64), + |level, symbol| symbol.get_unchecked(((self.bits_per_element() - 1) - level) as u64), |bit, level, result| { - result.set_unchecked((self.bits_per_element() - 1) - level, bit); + result.set_unchecked(((self.bits_per_element() - 1) - level) as u64, bit); }, Self::partial_quantile_search_unchecked, ) @@ -1903,7 +1917,7 @@ impl WaveletMatrix { /// assert_eq!(wavelet_matrix.successor_u64(0..6, 2), Some(2)); /// ``` #[must_use] - pub fn successor_u64(&self, range: Range, symbol: u64) -> Option { + pub fn successor_u64(&self, range: Range, symbol: u64) -> Option { if self.bits_per_element() > 64 || range.is_empty() || self.is_empty() @@ -1942,7 +1956,7 @@ impl WaveletMatrix { /// assert_eq!(iter.collect::>(), vec![1, 4, 4, 1, 2, 7]); /// ``` #[must_use] - pub fn iter_u64(&self) -> Option { + pub fn iter_u64(&self) -> Option> { if self.bits_per_element() > 64 { None } else { @@ -1966,8 +1980,10 @@ impl WaveletMatrix { /// The iterator yields `BitVec` elements. /// /// See also [`iter_sorted_u64`] for an iterator that yields `u64` elements. + /// + /// [`iter_sorted_u64`]: Self::iter_sorted_u64 #[must_use] - pub fn iter_sorted(&self) -> WaveletSortedRefIter { + pub fn iter_sorted(&self) -> WaveletSortedRefIter<'_> { WaveletSortedRefIter::new(self) } @@ -1993,7 +2009,7 @@ impl WaveletMatrix { /// assert_eq!(iter.collect::>(), vec![1, 1, 2, 4, 4, 7]); /// ``` #[must_use] - pub fn iter_sorted_u64(&self) -> Option { + pub fn iter_sorted_u64(&self) -> Option> { if self.bits_per_element() > 64 { None } else { @@ -2020,17 +2036,9 @@ impl WaveletMatrix { self.data.len() } - /// Get the number of bits per element in the alphabet of the encoded sequence. - #[must_use] - #[deprecated(since = "1.5.1", note = "please use `bits_per_element` instead")] - #[allow(clippy::cast_possible_truncation)] - pub fn bit_len(&self) -> u16 { - self.bits_per_element() as u16 - } - /// Get the number of elements stored in the encoded sequence. #[must_use] - pub fn len(&self) -> usize { + pub fn len(&self) -> u64 { if self.data.is_empty() { 0 } else { diff --git a/src/wavelet/tests.rs b/src/wavelet/tests.rs index c4cf4e7..0d2d231 100644 --- a/src/wavelet/tests.rs +++ b/src/wavelet/tests.rs @@ -37,9 +37,10 @@ fn test_wavelet_encoding_randomized() { let wavelet_prefix_counting = WaveletMatrix::from_bit_vec_pc(&BitVec::pack_sequence_u8(&data, 8), 8); - assert_eq!(wavelet.len(), data.len()); + assert_eq!(wavelet.len(), data.len() as u64); for (i, v) in data.iter().enumerate() { + let i = i as u64; assert_eq!(wavelet.get_u64_unchecked(i), *v as u64); assert_eq!(wavelet_from_slice.get_u64_unchecked(i), *v as u64); assert_eq!(wavelet_prefix_counting.get_u64_unchecked(i), *v as u64); @@ -138,7 +139,7 @@ fn test_rank_randomized() { let symbol_bit_vec = BitVec::pack_sequence_u8(&[symbol], 8); let mut rank = 0; for (i, v) in data.iter().enumerate() { - assert_eq!(wavelet.rank_unchecked(i, &symbol_bit_vec), rank); + assert_eq!(wavelet.rank_unchecked(i as u64, &symbol_bit_vec), rank); if *v == symbol { rank += 1; } @@ -230,10 +231,10 @@ fn test_quantile() { for (i, v) in sequence.iter().enumerate() { assert_eq!( - wavelet.quantile(0..10, i), + wavelet.quantile(0..10, i as u64), Some(BitVec::pack_sequence_u8(&[*v as u8], 4)) ); - assert_eq!(wavelet.quantile_u64(0..10, i), Some(*v)); + assert_eq!(wavelet.quantile_u64(0..10, i as u64), Some(*v)); } assert_eq!(wavelet.quantile(0..10, 10), None); @@ -269,8 +270,8 @@ fn test_quantile_randomized() { let wavelet = WaveletMatrix::from_bit_vec(&BitVec::pack_sequence_u8(&data, 8), 8); for _ in 0..1000 { - let range_i = rng.gen_range(0..data.len()); - let range_j = rng.gen_range(0..data.len()); + let range_i = rng.gen_range(0..data.len() as u64); + let range_j = rng.gen_range(0..data.len() as u64); let range = min(range_i, range_j)..max(range_i, range_j); let k = if range.is_empty() { @@ -279,7 +280,7 @@ fn test_quantile_randomized() { rng.gen_range(range.clone()) - range.start }; - let mut range_data = data[range.clone()].to_vec(); + let mut range_data = data[range.start as usize..range.end as usize].to_vec(); range_data.sort_unstable(); assert_eq!( @@ -287,7 +288,7 @@ fn test_quantile_randomized() { if range.is_empty() { None } else { - Some(range_data[k] as u64) + Some(range_data[k as usize] as u64) } ); assert_eq!(