From 237e3f7b691c02f99d56487ebdce0350ec7bb875 Mon Sep 17 00:00:00 2001
From: Johannes Hengstler <ubezl@student.kit.edu>
Date: Sat, 28 Jun 2025 14:43:31 +0200
Subject: [PATCH 1/5] prepare migration guide

---
 migrate.md | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 migrate.md

diff --git a/migrate.md b/migrate.md
new file mode 100644
index 0000000..33cfd51
--- /dev/null
+++ b/migrate.md
@@ -0,0 +1,3 @@
+# Migration Guide from 1.X to 2.0
+The following guide explains the changes from versions 1.X to the 2.0 release and points out what changes are necessary
+to downstream crates.
\ No newline at end of file

From 406eacdcbc1d0feb4ab44fb4d08ab09a6fe524ed Mon Sep 17 00:00:00 2001
From: Cydhra <ubezl@student.kit.edu>
Date: Sun, 19 Oct 2025 23:22:33 +0200
Subject: [PATCH 2/5] Change usize to u64 for all variables that aren't indices
 into byte-based structures (#34)

This changes all types that index bits instead of words to u64, adjusts the necessary conversions, and suppresses truncation warnings where truncations are impossible.
Furthermore, it also modifies the usage, documentation, and implementation of iterator types, because various iterator functions are limited to `usize`. We avoid truncation where possible and panic where it cannot be avoided (which only applies to len() and count())
---
 .github/workflows/rust.yml        |  12 +-
 Cargo.toml                        |   2 +-
 benches/bp.rs                     |   4 +-
 benches/elias_fano_iterator.rs    |   2 +-
 benches/select_adversarial.rs     |   2 +-
 benches/select_iter.rs            |   4 +-
 benches/sparse_equals.rs          |   7 +-
 readme.md                         |   2 -
 src/bit_vec/fast_rs_vec/bitset.rs |  19 ++-
 src/bit_vec/fast_rs_vec/iter.rs   | 170 ++++++++++++--------
 src/bit_vec/fast_rs_vec/mod.rs    | 113 ++++++++------
 src/bit_vec/fast_rs_vec/select.rs | 113 +++++++-------
 src/bit_vec/fast_rs_vec/tests.rs  |  40 ++---
 src/bit_vec/mask.rs               |  31 ++--
 src/bit_vec/mod.rs                | 158 ++++++++++---------
 src/bit_vec/sparse.rs             |  20 +--
 src/bit_vec/tests.rs              |   4 +-
 src/elias_fano/mod.rs             | 108 +++++++------
 src/elias_fano/tests.rs           |  10 +-
 src/lib.rs                        |  15 +-
 src/trees/bp/builder.rs           |  13 +-
 src/trees/bp/lookup.rs            |   7 +
 src/trees/bp/mod.rs               | 184 +++++++++++++++-------
 src/trees/bp/tests.rs             |  40 ++---
 src/trees/mmt.rs                  |  13 +-
 src/trees/mod.rs                  |  15 +-
 src/util/elias_fano_iter.rs       |  44 ++++--
 src/util/general_iter.rs          |  41 +++--
 src/wavelet/mod.rs                | 252 +++++++++++++++---------------
 src/wavelet/tests.rs              |  17 +-
 30 files changed, 833 insertions(+), 629 deletions(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index c0b0208..2daeb6f 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -31,4 +31,14 @@ jobs:
       - name: Build
         run: cargo build --verbose --features serde
       - name: Run tests
-        run: cargo test --verbose --features serde
\ No newline at end of file
+        run: cargo test --verbose --features serde
+
+  docs:
+    runs-on: ubuntu-latest
+    env:
+      RUSTFLAGS: -C target-cpu=x86-64
+      RUSTDOCFLAGS: -C target-cpu=x86-64
+    steps:
+      - uses: actions/checkout@v4
+      - name: Docs
+        run: cargo doc --verbose --all-features
\ No newline at end of file
diff --git a/Cargo.toml b/Cargo.toml
index 371da6e..7798dd4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "vers-vecs"
-version = "1.7.0"
+version = "1.8.1"
 edition = "2021"
 authors = ["Johannes \"Cydhra\" Hengstler"]
 description = "A collection of succinct data structures supported by fast implementations of rank and select queries."
diff --git a/benches/bp.rs b/benches/bp.rs
index c278694..e88c0de 100644
--- a/benches/bp.rs
+++ b/benches/bp.rs
@@ -11,7 +11,7 @@ use vers_vecs::trees::{Tree, TreeBuilder};
 
 mod common;
 
-const BLOCK_SIZE: usize = 1024;
+const BLOCK_SIZE: u64 = 1024;
 
 // TODO this function has nlogn runtime, which is a bit too much for the largest trees
 fn generate_tree<R: Rng>(rng: &mut R, nodes: u64) -> BpTree<BLOCK_SIZE> {
@@ -107,7 +107,7 @@ fn bench_navigation(b: &mut Criterion) {
         let mut rng = StdRng::from_seed([0; 32]);
 
         let bp = generate_tree(&mut rng, l as u64);
-        let node_handles = (0..l).map(|i| bp.node_handle(i)).collect::<Vec<_>>();
+        let node_handles = (0..l as u64).map(|i| bp.node_handle(i)).collect::<Vec<_>>();
 
         group.bench_with_input(BenchmarkId::new("parent", l), &l, |b, _| {
             b.iter_batched(
diff --git a/benches/elias_fano_iterator.rs b/benches/elias_fano_iterator.rs
index 774ec87..ad939ae 100644
--- a/benches/elias_fano_iterator.rs
+++ b/benches/elias_fano_iterator.rs
@@ -29,7 +29,7 @@ fn bench_ef(b: &mut Criterion) {
 
                 let start = Instant::now();
                 while i < iters {
-                    black_box(ef_vec.get_unchecked(i as usize % l));
+                    black_box(ef_vec.get_unchecked(i % l as u64));
                     i += 1;
                 }
                 time += start.elapsed();
diff --git a/benches/select_adversarial.rs b/benches/select_adversarial.rs
index f70be47..070e90e 100644
--- a/benches/select_adversarial.rs
+++ b/benches/select_adversarial.rs
@@ -35,7 +35,7 @@ fn select_worst_case(b: &mut Criterion) {
         // construct a vector with only one select block and put its last one bit at the end
         // of the vector
 
-        let mut bit_vec = BitVec::with_capacity(length / 64);
+        let mut bit_vec = BitVec::with_capacity(length as u64 / 64);
         for _ in 0..(1usize << 13) / 64 - 1 {
             bit_vec.append_word(u64::MAX);
         }
diff --git a/benches/select_iter.rs b/benches/select_iter.rs
index 73be7d7..595838e 100644
--- a/benches/select_iter.rs
+++ b/benches/select_iter.rs
@@ -15,11 +15,11 @@ fn bench_select_iter(b: &mut Criterion) {
         group.bench_with_input(BenchmarkId::new("select queries", l), &l, |b, _| {
             b.iter_custom(|iters| {
                 let mut time = Duration::new(0, 0);
-                let mut i = 0usize;
+                let mut i = 0;
                 let rank1 = bit_vec.rank1(bit_vec.len());
 
                 let start = Instant::now();
-                while (i as u64) < iters {
+                while (i) < iters {
                     black_box(bit_vec.select1(i % rank1));
                     i += 1;
                 }
diff --git a/benches/sparse_equals.rs b/benches/sparse_equals.rs
index 9119652..7438fc1 100644
--- a/benches/sparse_equals.rs
+++ b/benches/sparse_equals.rs
@@ -22,14 +22,14 @@ pub const SIZES: [usize; 7] = [
 const FILL_FACTORS: [f64; 6] = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5];
 
 /// Generate a bitvector with `fill_factors` percent ones at random positions
-fn generate_vector_with_fill(rng: &mut ThreadRng, len: usize, fill_factor: f64) -> BitVec {
+fn generate_vector_with_fill(rng: &mut ThreadRng, len: u64, fill_factor: f64) -> BitVec {
     let mut bit_vec1 = BitVec::from_zeros(len);
 
     // flip exactly fill-factor * len bits so the equality check is not trivial
-    sample(rng, len, (fill_factor * len as f64) as usize)
+    sample(rng, len as usize, (fill_factor * len as f64) as usize)
         .iter()
         .for_each(|i| {
-            bit_vec1.flip_bit(i);
+            bit_vec1.flip_bit(i as u64);
         });
 
     bit_vec1
@@ -39,6 +39,7 @@ fn bench(b: &mut Criterion<TimeDiff>) {
     let mut rng = rand::thread_rng();
 
     for len in SIZES {
+        let len = len as u64;
         let mut group = b.benchmark_group(format!("Equals Benchmark: {}", len));
         group.plot_config(common::plot_config());
 
diff --git a/readme.md b/readme.md
index ea64ed1..d21edac 100644
--- a/readme.md
+++ b/readme.md
@@ -31,8 +31,6 @@ since the intrinsics speed up both `rank` and `select` operations by a factor of
 - `simd`: Enables the use of SIMD instructions for rank and select operations.
 This feature requires AVX-512 support and uses unsafe code.
 It also enables a special iterator for the rank/select bit vector that uses vectorized operations.
-The feature only works on nightly Rust.
-Enabling it on stable Rust is a no-op, because the required CPU features are not available there.
 - `serde`: Enables serialization and deserialization of the data structures using the `serde` crate.
 - `u16_lookup` Enables a larger lookup table for BP tree queries. The larger table requires 128 KiB instead of 4 KiB.
 
diff --git a/src/bit_vec/fast_rs_vec/bitset.rs b/src/bit_vec/fast_rs_vec/bitset.rs
index 00cb5e0..2f98d11 100644
--- a/src/bit_vec/fast_rs_vec/bitset.rs
+++ b/src/bit_vec/fast_rs_vec/bitset.rs
@@ -7,7 +7,7 @@ use crate::RsVec;
 use std::mem::size_of;
 
 /// The number of bits in a RsVec that can be processed by AVX instructions at once.
-const VECTOR_SIZE: usize = 16;
+const VECTOR_SIZE: u64 = 16;
 
 // add iterator functions to RsVec
 impl RsVec {
@@ -73,20 +73,22 @@ impl RsVec {
 /// [`bit_set_iter0`]: RsVec::bit_set_iter0
 /// [`bit_set_iter1`]: RsVec::bit_set_iter1
 /// [`SelectIter`]: super::SelectIter
+#[allow(clippy::cast_possible_truncation)]
 pub struct BitSetIter<'a, const ZERO: bool> {
     vec: &'a RsVec,
-    base: usize,
-    offsets: [u32; VECTOR_SIZE],
+    base: u64,
+    offsets: [u32; VECTOR_SIZE as usize],
     content_len: u8,
     cursor: u8,
 }
 
 impl<'a, const ZERO: bool> BitSetIter<'a, ZERO> {
     pub(super) fn new(vec: &'a RsVec) -> Self {
+        #[allow(clippy::cast_possible_truncation)]
         let mut iter = Self {
             vec,
             base: 0,
-            offsets: [0; VECTOR_SIZE],
+            offsets: [0; VECTOR_SIZE as usize],
             content_len: 0,
             cursor: 0,
         };
@@ -103,7 +105,10 @@ impl<'a, const ZERO: bool> BitSetIter<'a, ZERO> {
 
         unsafe {
             let offsets = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-            assert!(VECTOR_SIZE <= size_of::<u16>() * 8, "change data types");
+            assert!(
+                VECTOR_SIZE <= size_of::<u16>() as u64 * 8,
+                "change data types"
+            );
             let mut mask = __mmask16::from(data);
             if ZERO {
                 mask = !mask;
@@ -129,7 +134,7 @@ impl<'a, const ZERO: bool> BitSetIter<'a, ZERO> {
 }
 
 impl<const ZERO: bool> Iterator for BitSetIter<'_, ZERO> {
-    type Item = usize;
+    type Item = u64;
 
     fn next(&mut self) -> Option<Self::Item> {
         if self.base >= self.vec.len() {
@@ -159,6 +164,6 @@ impl<const ZERO: bool> Iterator for BitSetIter<'_, ZERO> {
 
         let offset = self.offsets[self.cursor as usize];
         self.cursor += 1;
-        Some(self.base + offset as usize)
+        Some(self.base + offset as u64)
     }
 }
diff --git a/src/bit_vec/fast_rs_vec/iter.rs b/src/bit_vec/fast_rs_vec/iter.rs
index e0d4fcf..198af18 100644
--- a/src/bit_vec/fast_rs_vec/iter.rs
+++ b/src/bit_vec/fast_rs_vec/iter.rs
@@ -13,6 +13,10 @@ impl RsVec {
     /// the linear access pattern.
     ///
     /// This method has convenience methods `iter0` and `iter1`.
+    ///
+    /// # Panics
+    /// If the vector contains more than `usize::MAX` elements, calling `len()` on the iterator will
+    /// cause it to panic.
     pub fn select_iter<const ZERO: bool>(&self) -> SelectIter<'_, ZERO> {
         SelectIter::new(self)
     }
@@ -26,6 +30,10 @@ impl RsVec {
     /// the linear access pattern.
     ///
     /// This method has convenience methods `into_iter0` and `into_iter1`.
+    ///
+    /// # Panics
+    /// If the vector contains more than `usize::MAX` elements, calling `len()` on the iterator will
+    /// cause it to panic.
     pub fn into_select_iter<const ZERO: bool>(self) -> SelectIntoIter<ZERO> {
         SelectIntoIter::new(self)
     }
@@ -36,6 +44,10 @@ impl RsVec {
     /// exploits the linear access pattern.
     ///
     /// See [`SelectIter`] for more information.
+    ///
+    /// # Panics
+    /// If the vector contains more than `usize::MAX` elements, calling `len()` on the iterator will
+    /// cause it to panic.
     pub fn iter0(&self) -> SelectIter<'_, true> {
         self.select_iter()
     }
@@ -46,6 +58,10 @@ impl RsVec {
     /// exploits the linear access pattern.
     ///
     /// See [`SelectIter`] for more information.
+    ///
+    /// # Panics
+    /// If the vector contains more than `usize::MAX` elements, calling `len()` on the iterator will
+    /// cause it to panic.
     pub fn iter1(&self) -> SelectIter<'_, false> {
         self.select_iter()
     }
@@ -56,6 +72,10 @@ impl RsVec {
     /// exploits the linear access pattern.
     ///
     /// See [`SelectIntoIter`] for more information.
+    ///
+    /// # Panics
+    /// If the vector contains more than `usize::MAX` elements, calling `len()` on the iterator will
+    /// cause it to panic.
     pub fn into_iter0(self) -> SelectIntoIter<true> {
         self.into_select_iter()
     }
@@ -66,6 +86,10 @@ impl RsVec {
     /// exploits the linear access pattern.
     ///
     /// See [`SelectIntoIter`] for more information.
+    ///
+    /// # Panics
+    /// If the vector contains more than `usize::MAX` elements, calling `len()` on the iterator will
+    /// cause it to panic.
     pub fn into_iter1(self) -> SelectIntoIter<false> {
         self.into_select_iter()
     }
@@ -106,18 +130,18 @@ macro_rules! gen_iter_impl {
             }
 
             /// Same implementation like select0, but uses cached indices of last query to speed up search
-            fn select_next_0(&mut self) -> Option<usize> {
+            fn select_next_0(&mut self) -> Option<u64> {
                 let mut rank = self.next_rank;
 
                 if rank >= self.vec.rank0 || self.next_rank_back.is_none() || rank > self.next_rank_back.unwrap() {
                     return None;
                 }
 
-                let mut super_block = self.vec.select_blocks[rank / SELECT_BLOCK_SIZE].index_0;
+                let mut super_block = self.vec.select_blocks[(rank / SELECT_BLOCK_SIZE) as usize].index_0;
                 let mut block_index = 0;
 
                 if self.vec.super_blocks.len() > (self.last_super_block + 1)
-                    && self.vec.super_blocks[self.last_super_block + 1].zeros > rank
+                    && self.vec.super_blocks[self.last_super_block + 1].zeros as u64 > rank
                 {
                     // instantly jump to the last searched position
                     super_block = self.last_super_block;
@@ -127,13 +151,13 @@ macro_rules! gen_iter_impl {
                     // this is true IF the last_block is either the last block in a super block,
                     // in which case it must be this block, because we know the rank is within the super block,
                     // OR if the next block has a rank higher than the current rank
-                    if self.last_block % (SUPER_BLOCK_SIZE / BLOCK_SIZE) == 15
+                    if self.last_block % (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize == 15
                         || self.vec.blocks.len() > self.last_block + 1
-                            && self.vec.blocks[self.last_block + 1].zeros as usize > rank
+                            && self.vec.blocks[self.last_block + 1].zeros as u64 > rank
                     {
                         // instantly jump to the last searched position
                         block_index = self.last_block;
-                        rank -= self.vec.blocks[block_index].zeros as usize;
+                        rank -= self.vec.blocks[block_index].zeros as u64;
                     }
                 } else {
                     super_block = self.vec.search_super_block0(super_block, rank);
@@ -143,11 +167,11 @@ macro_rules! gen_iter_impl {
 
                 // if the block index is not zero, we already found the block, and need only update the word
                 if block_index == 0 {
-                    block_index = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
+                    block_index = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize;
                     self.vec.search_block0(rank, &mut block_index);
 
                     self.last_block = block_index;
-                    rank -= self.vec.blocks[block_index].zeros as usize;
+                    rank -= self.vec.blocks[block_index].zeros as u64;
                 }
 
                 self.next_rank += 1;
@@ -155,17 +179,17 @@ macro_rules! gen_iter_impl {
             }
 
             /// Same implementation like ``select_next_0``, but backwards
-            fn select_next_0_back(&mut self) -> Option<usize> {
+            fn select_next_0_back(&mut self) -> Option<u64> {
                 let mut rank = self.next_rank_back?;
 
                 if self.next_rank_back.is_none() || rank < self.next_rank {
                     return None;
                 }
 
-                let mut super_block = self.vec.select_blocks[rank / SELECT_BLOCK_SIZE].index_0;
+                let mut super_block = self.vec.select_blocks[(rank / SELECT_BLOCK_SIZE) as usize].index_0;
                 let mut block_index = 0;
 
-                if self.vec.super_blocks[self.last_super_block_back].zeros < rank
+                if (self.vec.super_blocks[self.last_super_block_back].zeros as u64) < rank
                 {
                     // instantly jump to the last searched position
                     super_block = self.last_super_block_back;
@@ -174,11 +198,11 @@ macro_rules! gen_iter_impl {
                     // check if current block contains the one and if yes, we don't need to search
                     // this is true IF the zeros before the last block are less than the rank,
                     // since the block before then can't contain it
-                    if self.vec.blocks[self.last_block_back].zeros as usize <= rank
+                    if self.vec.blocks[self.last_block_back].zeros as u64 <= rank
                     {
                         // instantly jump to the last searched position
                         block_index = self.last_block_back;
-                        rank -= self.vec.blocks[block_index].zeros as usize;
+                        rank -= self.vec.blocks[block_index].zeros as u64;
                     }
                 } else {
                     super_block = self.vec.search_super_block0(super_block, rank);
@@ -188,11 +212,11 @@ macro_rules! gen_iter_impl {
 
                 // if the block index is not zero, we already found the block, and need only update the word
                 if block_index == 0 {
-                    block_index = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
+                    block_index = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize;
                     self.vec.search_block0(rank, &mut block_index);
 
                     self.last_block_back = block_index;
-                    rank -= self.vec.blocks[block_index].zeros as usize;
+                    rank -= self.vec.blocks[block_index].zeros as u64;
                 }
 
                 self.next_rank_back = self.next_rank_back.and_then(|x| if x > 0 { Some(x - 1) } else { None });
@@ -201,62 +225,62 @@ macro_rules! gen_iter_impl {
 
             #[must_use]
             #[allow(clippy::assertions_on_constants)]
-            fn select_next_1(&mut self) -> Option<usize> {
+            fn select_next_1(&mut self) -> Option<u64> {
                 let mut rank = self.next_rank;
 
                 if rank >= self.vec.rank1 || self.next_rank_back.is_none() || rank > self.next_rank_back.unwrap() {
                     return None;
                 }
 
-                let mut super_block = self.vec.select_blocks[rank / SELECT_BLOCK_SIZE].index_1;
+                let mut super_block = self.vec.select_blocks[(rank / SELECT_BLOCK_SIZE) as usize].index_1;
                 let mut block_index = 0;
 
                 // check if the last super block still contains the rank, and if yes, we don't need to search
                 if self.vec.super_blocks.len() > (self.last_super_block + 1)
-                    && (self.last_super_block + 1) * SUPER_BLOCK_SIZE
-                        - self.vec.super_blocks[self.last_super_block + 1].zeros
+                    && (self.last_super_block + 1) as u64 * SUPER_BLOCK_SIZE
+                        - self.vec.super_blocks[self.last_super_block + 1].zeros as u64
                         > rank
                 {
                     // instantly jump to the last searched position
                     super_block = self.last_super_block;
-                    let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
-                    rank -= super_block * SUPER_BLOCK_SIZE - self.vec.super_blocks[super_block].zeros;
+                    let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize;
+                    rank -= super_block as u64 * SUPER_BLOCK_SIZE - self.vec.super_blocks[super_block].zeros;
 
                     // check if current block contains the one and if yes, we don't need to search
                     // this is true IF the last_block is either the last block in a super block,
                     // in which case it must be this block, because we know the rank is within the super block,
                     // OR if the next block has a rank higher than the current rank
-                    if self.last_block % (SUPER_BLOCK_SIZE / BLOCK_SIZE) == 15
+                    if self.last_block as u64 % (SUPER_BLOCK_SIZE / BLOCK_SIZE) == 15
                         || self.vec.blocks.len() > self.last_block + 1
-                            && (self.last_block + 1 - block_at_super_block) * BLOCK_SIZE
-                                - self.vec.blocks[self.last_block + 1].zeros as usize
+                            && (self.last_block + 1 - block_at_super_block) as u64 * BLOCK_SIZE
+                                - self.vec.blocks[self.last_block + 1].zeros as u64
                                 > rank
                     {
                         // instantly jump to the last searched position
                         block_index = self.last_block;
-                        let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
-                        rank -= (block_index - block_at_super_block) * BLOCK_SIZE
-                            - self.vec.blocks[block_index].zeros as usize;
+                        let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize;
+                        rank -= (block_index - block_at_super_block) as u64 * BLOCK_SIZE
+                            - self.vec.blocks[block_index].zeros as u64;
                     }
                 } else {
                     super_block = self.vec.search_super_block1(super_block, rank);
 
                     self.last_super_block = super_block;
-                    rank -= super_block * SUPER_BLOCK_SIZE - self.vec.super_blocks[super_block].zeros;
+                    rank -= super_block as u64 * SUPER_BLOCK_SIZE - self.vec.super_blocks[super_block].zeros;
                 }
 
                 // if the block index is not zero, we already found the block, and need only update the word
                 if block_index == 0 {
                     // full binary search for block that contains the rank, manually loop-unrolled, because
                     // LLVM doesn't do it for us, but it gains just under 20% performance
-                    let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
+                    let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize;
                     block_index = block_at_super_block;
                     self.vec
                         .search_block1(rank, block_at_super_block, &mut block_index);
 
                     self.last_block = block_index;
-                    rank -= (block_index - block_at_super_block) * BLOCK_SIZE
-                        - self.vec.blocks[block_index].zeros as usize;
+                    rank -= (block_index - block_at_super_block) as u64 * BLOCK_SIZE
+                        - self.vec.blocks[block_index].zeros as u64;
                 }
 
                 self.next_rank += 1;
@@ -265,101 +289,109 @@ macro_rules! gen_iter_impl {
 
             #[must_use]
             #[allow(clippy::assertions_on_constants)]
-            fn select_next_1_back(&mut self) -> Option<usize> {
+            fn select_next_1_back(&mut self) -> Option<u64> {
                 let mut rank = self.next_rank_back?;
 
                 if self.next_rank_back.is_none() || rank < self.next_rank {
                     return None;
                 }
 
-                let mut super_block = self.vec.select_blocks[rank / SELECT_BLOCK_SIZE].index_1;
+                let mut super_block = self.vec.select_blocks[(rank / SELECT_BLOCK_SIZE) as usize].index_1;
                 let mut block_index = 0;
 
                 // check if the last super block still contains the rank, and if yes, we don't need to search
-                if (self.last_super_block_back) * SUPER_BLOCK_SIZE
-                        - self.vec.super_blocks[self.last_super_block_back].zeros
+                if self.last_super_block_back as u64 * SUPER_BLOCK_SIZE
+                        - (self.vec.super_blocks[self.last_super_block_back].zeros as u64)
                         < rank
                 {
                     // instantly jump to the last searched position
                     super_block = self.last_super_block_back;
-                    let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
-                    rank -= super_block * SUPER_BLOCK_SIZE - self.vec.super_blocks[super_block].zeros;
+                    let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize;
+                    rank -= super_block as u64 * SUPER_BLOCK_SIZE - self.vec.super_blocks[super_block].zeros;
 
                     // check if current block contains the one and if yes, we don't need to search
                     // this is true IF the ones before the last block are less than the rank,
                     // since the block before then can't contain it
-                    if (self.last_block_back - block_at_super_block) * BLOCK_SIZE
-                        - self.vec.blocks[self.last_block_back].zeros as usize
+                    if (self.last_block_back - block_at_super_block) as u64 * BLOCK_SIZE
+                        - self.vec.blocks[self.last_block_back].zeros as u64
                             <= rank
                     {
                         // instantly jump to the last searched position
                         block_index = self.last_block_back;
-                        let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
-                        rank -= (block_index - block_at_super_block) * BLOCK_SIZE
-                            - self.vec.blocks[block_index].zeros as usize;
+                        let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize;
+                        rank -= (block_index - block_at_super_block) as u64 * BLOCK_SIZE
+                            - self.vec.blocks[block_index].zeros as u64;
                     }
                 } else {
                     super_block = self.vec.search_super_block1(super_block, rank);
 
                     self.last_super_block_back = super_block;
-                    rank -= super_block * SUPER_BLOCK_SIZE - self.vec.super_blocks[super_block].zeros;
+                    rank -= super_block as u64 * SUPER_BLOCK_SIZE - self.vec.super_blocks[super_block].zeros;
                 }
 
                 // if the block index is not zero, we already found the block, and need only update the word
                 if block_index == 0 {
                     // full binary search for block that contains the rank, manually loop-unrolled, because
                     // LLVM doesn't do it for us, but it gains just under 20% performance
-                    let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
+                    let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize;
                     block_index = block_at_super_block;
                     self.vec
                         .search_block1(rank, block_at_super_block, &mut block_index);
 
                     self.last_block_back = block_index;
-                    rank -= (block_index - block_at_super_block) * BLOCK_SIZE
-                        - self.vec.blocks[block_index].zeros as usize;
+                    rank -= (block_index - block_at_super_block) as u64 * BLOCK_SIZE
+                        - self.vec.blocks[block_index].zeros as u64;
                 }
 
                 self.next_rank_back = self.next_rank_back.and_then(|x| if x > 0 { Some(x - 1) } else { None });
                 Some(self.vec.search_word_in_block1(rank, block_index))
             }
 
-            /// Advances the iterator by `n` elements. Returns an error if the iterator does not have
-            /// enough elements left. Does not call `next` internally.
+            /// Advances the iterator by `n` elements.
+            /// Does not call `next` internally.
             /// This method is currently being added to the iterator trait, see
             /// [this issue](https://github.com/rust-lang/rust/issues/77404).
             /// As soon as it is stabilized, this method will be removed and replaced with a custom
             /// implementation in the iterator impl.
-            pub(super) fn advance_by(&mut self, n: usize) -> Result<(), NonZeroUsize> {
+            ///
+            /// # Errors
+            /// If the iterator does not hold `n` elements,
+            /// all remaining elements are skipped, and an error with the overflow is returned.
+            pub fn advance_by(&mut self, n: usize) -> Result<(), NonZeroUsize> {
                 if self.len() >= n {
-                    self.next_rank += n;
+                    self.next_rank += n as u64;
                     Ok(())
                 } else {
                     let len = self.len();
-                    self.next_rank += len;
+                    self.next_rank += len as u64;
                     Err(NonZeroUsize::new(n - len).unwrap())
                 }
             }
 
-            /// Advances the iterator back by `n` elements. Returns an error if the iterator does not have
-            /// enough elements left. Does not call `next_back` internally.
+            /// Advances the iterator back by `n` elements.
+            /// Does not call `next_back` internally.
             /// This method is currently being added to the iterator trait, see
             /// [this issue](https://github.com/rust-lang/rust/issues/77404).
             /// As soon as it is stabilized, this method will be removed and replaced with a custom
             /// implementation in the double ended iterator impl.
-            pub(super) fn advance_back_by(&mut self, n: usize) -> Result<(), NonZeroUsize> {
+            ///
+            /// # Errors
+            /// If the iterator does not hold `n` elements,
+            /// all remaining elements are skipped, and an error with the overflow is returned.
+            pub fn advance_back_by(&mut self, n: usize) -> Result<(), NonZeroUsize> {
                 if self.len() >= n {
-                    self.next_rank_back = self.next_rank_back.map(|x| x - n);
+                    self.next_rank_back = self.next_rank_back.map(|x| x - n as u64);
                     Ok(())
                 } else {
                     let len = self.len();
-                    self.next_rank_back = self.next_rank_back.map(|x| x - len);
+                    self.next_rank_back = self.next_rank_back.map(|x| x - len as u64);
                     Err(NonZeroUsize::new(n - len).unwrap())
                 }
             }
         }
 
         impl<$($life,)? const ZERO: bool> Iterator for $name<$($life,)? ZERO> {
-            type Item = usize;
+            type Item = u64;
 
             fn next(&mut self) -> Option<Self::Item> {
                 if ZERO {
@@ -373,6 +405,12 @@ macro_rules! gen_iter_impl {
                 (self.len(), Some(self.len()))
             }
 
+            /// Returns the exact number of elements that this iterator would iterate over. Does not
+            /// call `next` internally.
+            ///
+            /// # Panics
+            /// If the vector contains more than `usize::MAX` elements, calling `count()` on the iterator will
+            /// cause it to panic.
             fn count(self) -> usize
             where
                 Self: Sized,
@@ -423,8 +461,16 @@ macro_rules! gen_iter_impl {
         impl<$($life,)? const ZERO: bool> FusedIterator for $name<$($life,)? ZERO> {}
 
         impl<$($life,)? const ZERO: bool> ExactSizeIterator for $name<$($life,)? ZERO> {
+            // the check and panic guarantees panic on truncation
+            #[allow(clippy::cast_possible_truncation)]
             fn len(&self) -> usize {
-                self.next_rank_back.map(|x| x + 1).unwrap_or_default().saturating_sub(self.next_rank)
+                // this check is hopefully eliminated on 64-bit architectures
+                if self.next_rank_back.map(|x| x + 1).unwrap_or_default().saturating_sub(self.next_rank)
+                    > usize::MAX as u64 {
+                    panic!("calling len() on an iterator containing more than usize::MAX elements is forbidden");
+                }
+
+                self.next_rank_back.map(|x| x + 1).unwrap_or_default().saturating_sub(self.next_rank) as usize
             }
         }
     }
@@ -461,11 +507,11 @@ macro_rules! gen_iter_impl {
 #[must_use]
 pub struct SelectIter<'a, const ZERO: bool> {
     pub(crate) vec: &'a RsVec,
-    next_rank: usize,
+    next_rank: u64,
 
     // rank back is none, iff it points to element -1 (i.e. element 0 has been consumed by
     // a call to next_back()). It can be Some(..) even if the iterator is empty
-    next_rank_back: Option<usize>,
+    next_rank_back: Option<u64>,
 
     /// the last index in the super block structure where we found a bit
     last_super_block: usize,
@@ -514,11 +560,11 @@ gen_iter_impl!('a, SelectIter);
 // this owning iterator became necessary
 pub struct SelectIntoIter<const ZERO: bool> {
     pub(crate) vec: RsVec,
-    next_rank: usize,
+    next_rank: u64,
 
     // rank back is none, iff it points to element -1 (i.e. element 0 has been consumed by
     // a call to next_back()). It can be Some(..) even if the iterator is empty
-    next_rank_back: Option<usize>,
+    next_rank_back: Option<u64>,
 
     /// the last index in the super block structure where we found a bit
     last_super_block: usize,
diff --git a/src/bit_vec/fast_rs_vec/mod.rs b/src/bit_vec/fast_rs_vec/mod.rs
index 2c35643..3009500 100644
--- a/src/bit_vec/fast_rs_vec/mod.rs
+++ b/src/bit_vec/fast_rs_vec/mod.rs
@@ -20,7 +20,7 @@ use crate::BitVec;
 use super::WORD_SIZE;
 
 /// Size of a block in the bitvector.
-const BLOCK_SIZE: usize = 512;
+const BLOCK_SIZE: u64 = 512;
 
 /// Size of a super block in the bitvector. Super-blocks exist to decrease the memory overhead
 /// of block descriptors.
@@ -30,12 +30,12 @@ const BLOCK_SIZE: usize = 512;
 /// impact on the performance of select queries. The larger the super block size, the deeper will
 /// a binary search be. We found 2^13 to be a good compromise between memory overhead and
 /// performance.
-const SUPER_BLOCK_SIZE: usize = 1 << 13;
+const SUPER_BLOCK_SIZE: u64 = 1 << 13;
 
 /// Size of a select block. The select block is used to speed up select queries. The select block
 /// contains the indices of every `SELECT_BLOCK_SIZE`'th 1-bit and 0-bit in the bitvector.
 /// The smaller this block-size, the faster are select queries, but the more memory is used.
-const SELECT_BLOCK_SIZE: usize = 1 << 13;
+const SELECT_BLOCK_SIZE: u64 = 1 << 13;
 
 /// Meta-data for a block. The `zeros` field stores the number of zeros up to the block,
 /// beginning from the last super-block boundary. This means the first block in a super-block
@@ -53,7 +53,7 @@ struct BlockDescriptor {
 #[derive(Clone, Copy, Debug)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 struct SuperBlockDescriptor {
-    zeros: usize,
+    zeros: u64,
 }
 
 /// Meta-data for the select query. Each entry i in the select vector contains the indices to find
@@ -86,12 +86,12 @@ struct SelectSuperBlockDescriptor {
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub struct RsVec {
     data: Vec<u64>,
-    len: usize,
+    len: u64,
     blocks: Vec<BlockDescriptor>,
     super_blocks: Vec<SuperBlockDescriptor>,
     select_blocks: Vec<SelectSuperBlockDescriptor>,
-    pub(crate) rank0: usize,
-    pub(crate) rank1: usize,
+    pub(crate) rank0: u64,
+    pub(crate) rank1: u64,
 }
 
 impl RsVec {
@@ -106,8 +106,8 @@ impl RsVec {
     pub fn from_bit_vec(vec: BitVec) -> RsVec {
         // Construct the block descriptor meta data. Each block descriptor contains the number of
         // zeros in the super-block, up to but excluding the block.
-        let mut blocks = Vec::with_capacity(vec.len() / BLOCK_SIZE + 1);
-        let mut super_blocks = Vec::with_capacity(vec.len() / SUPER_BLOCK_SIZE + 1);
+        let mut blocks = Vec::with_capacity((vec.len() / BLOCK_SIZE) as usize + 1);
+        let mut super_blocks = Vec::with_capacity((vec.len() / SUPER_BLOCK_SIZE) as usize + 1);
         let mut select_blocks = Vec::new();
 
         // sentinel value
@@ -116,16 +116,16 @@ impl RsVec {
             index_1: 0,
         });
 
-        let mut total_zeros: usize = 0;
-        let mut current_zeros: usize = 0;
+        let mut total_zeros: u64 = 0;
+        let mut current_zeros: u64 = 0;
         let mut last_zero_select_block: usize = 0;
         let mut last_one_select_block: usize = 0;
 
-        for (idx, &word) in vec.data.iter().enumerate() {
+        for (word_idx, &word) in vec.data.iter().enumerate() {
             // if we moved past a block boundary, append the block information for the previous
             // block and reset the counter if we moved past a super-block boundary.
-            if idx % (BLOCK_SIZE / WORD_SIZE) == 0 {
-                if idx % (SUPER_BLOCK_SIZE / WORD_SIZE) == 0 {
+            if (word_idx as u64).is_multiple_of(BLOCK_SIZE / WORD_SIZE) {
+                if (word_idx as u64).is_multiple_of(SUPER_BLOCK_SIZE / WORD_SIZE) {
                     total_zeros += current_zeros;
                     current_zeros = 0;
                     super_blocks.push(SuperBlockDescriptor { zeros: total_zeros });
@@ -141,40 +141,42 @@ impl RsVec {
             // count the zeros in the current word and add them to the counter
             // the last word may contain padding zeros, which should not be counted,
             // but since we do not append the last block descriptor, this is not a problem
-            let mut new_zeros = word.count_zeros() as usize;
+            let mut new_zeros = word.count_zeros() as u64;
 
             // in the last block, remove remaining zeros of limb that aren't part of the vector
-            if idx == vec.data.len() - 1 && vec.len % WORD_SIZE > 0 {
+            if word_idx == vec.data.len() - 1 && !vec.len.is_multiple_of(WORD_SIZE) {
                 let mask = (1 << (vec.len % WORD_SIZE)) - 1;
-                new_zeros -= (word | mask).count_zeros() as usize;
+                new_zeros -= (word | mask).count_zeros() as u64;
             }
 
             let all_zeros = total_zeros + current_zeros + new_zeros;
             if all_zeros / SELECT_BLOCK_SIZE > (total_zeros + current_zeros) / SELECT_BLOCK_SIZE {
-                if all_zeros / SELECT_BLOCK_SIZE == select_blocks.len() {
+                if (all_zeros / SELECT_BLOCK_SIZE) as usize == select_blocks.len() {
                     select_blocks.push(SelectSuperBlockDescriptor {
                         index_0: super_blocks.len() - 1,
                         index_1: 0,
                     });
                 } else {
-                    select_blocks[all_zeros / SELECT_BLOCK_SIZE].index_0 = super_blocks.len() - 1;
+                    select_blocks[(all_zeros / SELECT_BLOCK_SIZE) as usize].index_0 =
+                        super_blocks.len() - 1;
                 }
 
                 last_zero_select_block += 1;
             }
 
-            let total_bits = (idx + 1) * WORD_SIZE;
+            let total_bits = (word_idx as u64 + 1) * WORD_SIZE;
             let all_ones = total_bits - all_zeros;
             if all_ones / SELECT_BLOCK_SIZE
-                > (idx * WORD_SIZE - total_zeros - current_zeros) / SELECT_BLOCK_SIZE
+                > (word_idx as u64 * WORD_SIZE - total_zeros - current_zeros) / SELECT_BLOCK_SIZE
             {
-                if all_ones / SELECT_BLOCK_SIZE == select_blocks.len() {
+                if (all_ones / SELECT_BLOCK_SIZE) as usize == select_blocks.len() {
                     select_blocks.push(SelectSuperBlockDescriptor {
                         index_0: 0,
                         index_1: super_blocks.len() - 1,
                     });
                 } else {
-                    select_blocks[all_ones / SELECT_BLOCK_SIZE].index_1 = super_blocks.len() - 1;
+                    select_blocks[(all_ones / SELECT_BLOCK_SIZE) as usize].index_1 =
+                        super_blocks.len() - 1;
                 }
 
                 last_one_select_block += 1;
@@ -230,7 +232,7 @@ impl RsVec {
     /// # Parameters
     /// - `pos`: The position of the bit to return the rank of.
     #[must_use]
-    pub fn rank0(&self, pos: usize) -> usize {
+    pub fn rank0(&self, pos: u64) -> u64 {
         self.rank(true, pos)
     }
 
@@ -242,7 +244,7 @@ impl RsVec {
     /// # Parameters
     /// - `pos`: The position of the bit to return the rank of.
     #[must_use]
-    pub fn rank1(&self, pos: usize) -> usize {
+    pub fn rank1(&self, pos: u64) -> u64 {
         self.rank(false, pos)
     }
 
@@ -250,7 +252,7 @@ impl RsVec {
     // branch elimination profits alone should make it worth it.
     #[allow(clippy::inline_always)]
     #[inline(always)]
-    fn rank(&self, zero: bool, pos: usize) -> usize {
+    fn rank(&self, zero: bool, pos: u64) -> u64 {
         #[allow(clippy::collapsible_else_if)]
         // readability and more obvious where dead branch elimination happens
         if zero {
@@ -263,39 +265,40 @@ impl RsVec {
             }
         }
 
-        let index = pos / WORD_SIZE;
-        let block_index = pos / BLOCK_SIZE;
-        let super_block_index = pos / SUPER_BLOCK_SIZE;
+        let index = (pos / WORD_SIZE) as usize;
+        let block_index = (pos / BLOCK_SIZE) as usize;
+        let super_block_index = (pos / SUPER_BLOCK_SIZE) as usize;
         let mut rank = 0;
 
         // at first add the number of zeros/ones before the current super block
         rank += if zero {
             self.super_blocks[super_block_index].zeros
         } else {
-            (super_block_index * SUPER_BLOCK_SIZE) - self.super_blocks[super_block_index].zeros
+            (super_block_index as u64 * SUPER_BLOCK_SIZE)
+                - self.super_blocks[super_block_index].zeros
         };
 
         // then add the number of zeros/ones before the current block
         rank += if zero {
-            self.blocks[block_index].zeros as usize
+            self.blocks[block_index].zeros as u64
         } else {
-            ((block_index % (SUPER_BLOCK_SIZE / BLOCK_SIZE)) * BLOCK_SIZE)
-                - self.blocks[block_index].zeros as usize
+            ((block_index as u64 % (SUPER_BLOCK_SIZE / BLOCK_SIZE)) * BLOCK_SIZE)
+                - self.blocks[block_index].zeros as u64
         };
 
         // naive popcount of blocks
-        for &i in &self.data[(block_index * BLOCK_SIZE) / WORD_SIZE..index] {
+        for &i in &self.data[((block_index as u64 * BLOCK_SIZE) / WORD_SIZE) as usize..index] {
             rank += if zero {
-                i.count_zeros() as usize
+                i.count_zeros() as u64
             } else {
-                i.count_ones() as usize
+                i.count_ones() as u64
             };
         }
 
         rank += if zero {
-            (!self.data[index] & ((1 << (pos % WORD_SIZE)) - 1)).count_ones() as usize
+            (!self.data[index] & ((1 << (pos % WORD_SIZE)) - 1)).count_ones() as u64
         } else {
-            (self.data[index] & ((1 << (pos % WORD_SIZE)) - 1)).count_ones() as usize
+            (self.data[index] & ((1 << (pos % WORD_SIZE)) - 1)).count_ones() as u64
         };
 
         rank
@@ -303,7 +306,7 @@ impl RsVec {
 
     /// Return the length of the vector, i.e. the number of bits it contains.
     #[must_use]
-    pub fn len(&self) -> usize {
+    pub fn len(&self) -> u64 {
         self.len
     }
 
@@ -317,7 +320,7 @@ impl RsVec {
     /// bit of the returned u64 word.
     /// If the position is larger than the length of the vector, `None` is returned.
     #[must_use]
-    pub fn get(&self, pos: usize) -> Option<u64> {
+    pub fn get(&self, pos: u64) -> Option<u64> {
         if pos >= self.len() {
             None
         } else {
@@ -331,8 +334,8 @@ impl RsVec {
     /// # Panics
     /// This function may panic if `pos >= self.len()` (alternatively, it may return garbage).
     #[must_use]
-    pub fn get_unchecked(&self, pos: usize) -> u64 {
-        (self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE)) & 1
+    pub fn get_unchecked(&self, pos: u64) -> u64 {
+        (self.data[(pos / WORD_SIZE) as usize] >> (pos % WORD_SIZE)) & 1
     }
 
     /// Return multiple bits at the given position. The number of bits to return is given by `len`.
@@ -341,7 +344,7 @@ impl RsVec {
     /// None is returned (even if the query partially overlaps with the vector).
     /// If the length of the query is larger than 64, None is returned.
     #[must_use]
-    pub fn get_bits(&self, pos: usize, len: usize) -> Option<u64> {
+    pub fn get_bits(&self, pos: u64, len: u64) -> Option<u64> {
         if len > WORD_SIZE {
             return None;
         }
@@ -370,13 +373,14 @@ impl RsVec {
     #[must_use]
     #[allow(clippy::comparison_chain)] // readability
     #[allow(clippy::cast_possible_truncation)] // parameter must be out of scope for this to happen
-    pub fn get_bits_unchecked(&self, pos: usize, len: usize) -> u64 {
+    pub fn get_bits_unchecked(&self, pos: u64, len: u64) -> u64 {
         debug_assert!(len <= WORD_SIZE);
-        let partial_word = self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE);
+        let partial_word = self.data[(pos / WORD_SIZE) as usize] >> (pos % WORD_SIZE);
         if pos % WORD_SIZE + len <= WORD_SIZE {
             partial_word & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
         } else {
-            (partial_word | (self.data[pos / WORD_SIZE + 1] << (WORD_SIZE - pos % WORD_SIZE)))
+            (partial_word
+                | (self.data[(pos / WORD_SIZE + 1) as usize] << (WORD_SIZE - pos % WORD_SIZE)))
                 & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
         }
     }
@@ -437,7 +441,11 @@ impl RsVec {
 
         let iter: SelectIter<ZERO> = self.select_iter();
 
-        for (rank, bit_index) in iter.enumerate() {
+        let len = if ZERO { self.rank0 } else { self.rank1 };
+
+        // we need to manually enumerate() the iter, because the number of set bits could exceed
+        // the size of usize.
+        for (rank, bit_index) in (0..len).zip(iter) {
             // since rank is inlined, we get dead code elimination depending on ZERO
             if (other.get_unchecked(bit_index) == 0) != ZERO || other.rank(ZERO, bit_index) != rank
             {
@@ -468,18 +476,19 @@ impl RsVec {
             return false;
         }
 
-        if self.data[..self.len / 64]
+        if self.data[..(self.len / WORD_SIZE) as usize]
             .iter()
-            .zip(other.data[..other.len / 64].iter())
+            .zip(other.data[..(other.len / 64) as usize].iter())
             .any(|(a, b)| a != b)
         {
             return false;
         }
 
         // if last incomplete block exists, test it without junk data
-        if self.len % 64 > 0
-            && self.data[self.len / 64] & ((1 << (self.len % 64)) - 1)
-                != other.data[self.len / 64] & ((1 << (other.len % 64)) - 1)
+        if !self.len.is_multiple_of(WORD_SIZE)
+            && self.data[(self.len / WORD_SIZE) as usize] & ((1 << (self.len % WORD_SIZE)) - 1)
+                != other.data[(self.len / WORD_SIZE) as usize]
+                    & ((1 << (other.len % WORD_SIZE)) - 1)
         {
             return false;
         }
diff --git a/src/bit_vec/fast_rs_vec/select.rs b/src/bit_vec/fast_rs_vec/select.rs
index b8721d7..9d8578e 100644
--- a/src/bit_vec/fast_rs_vec/select.rs
+++ b/src/bit_vec/fast_rs_vec/select.rs
@@ -7,7 +7,7 @@ use crate::util::unroll;
 
 /// A safety constant for assertions to make sure that the block size doesn't change without
 /// adjusting the code.
-const BLOCKS_PER_SUPERBLOCK: usize = 16;
+const BLOCKS_PER_SUPERBLOCK: u64 = 16;
 
 impl super::RsVec {
     /// Return the position of the 0-bit with the given rank. See `rank0`.
@@ -17,12 +17,12 @@ impl super::RsVec {
     /// If the rank is larger than the number of 0-bits in the vector, the vector length is returned.
     #[must_use]
     #[allow(clippy::assertions_on_constants)]
-    pub fn select0(&self, mut rank: usize) -> usize {
+    pub fn select0(&self, mut rank: u64) -> u64 {
         if rank >= self.rank0 {
             return self.len;
         }
 
-        let mut super_block = self.select_blocks[rank / SELECT_BLOCK_SIZE].index_0;
+        let mut super_block = self.select_blocks[(rank / SELECT_BLOCK_SIZE) as usize].index_0;
 
         if self.super_blocks.len() > (super_block + 1)
             && self.super_blocks[super_block + 1].zeros <= rank
@@ -32,10 +32,10 @@ impl super::RsVec {
 
         rank -= self.super_blocks[super_block].zeros;
 
-        let mut block_index = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
+        let mut block_index = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize;
         self.search_block0(rank, &mut block_index);
 
-        rank -= self.blocks[block_index].zeros as usize;
+        rank -= self.blocks[block_index].zeros as u64;
 
         self.search_word_in_block0(rank, block_index)
     }
@@ -56,10 +56,10 @@ impl super::RsVec {
         target_feature = "avx512bw",
     ))]
     #[inline(always)]
-    pub(super) fn search_block0(&self, rank: usize, block_index: &mut usize) {
+    pub(super) fn search_block0(&self, rank: u64, block_index: &mut usize) {
         use std::arch::x86_64::{_mm256_cmpgt_epu16_mask, _mm256_loadu_epi16, _mm256_set1_epi16};
 
-        if self.blocks.len() > *block_index + (SUPER_BLOCK_SIZE / BLOCK_SIZE) {
+        if self.blocks.len() > *block_index + (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize {
             debug_assert!(
                 SUPER_BLOCK_SIZE / BLOCK_SIZE == BLOCKS_PER_SUPERBLOCK,
                 "change unroll constant to {}",
@@ -93,25 +93,25 @@ impl super::RsVec {
         target_feature = "avx512bw",
     )))]
     #[inline(always)]
-    pub(super) fn search_block0(&self, rank: usize, block_index: &mut usize) {
+    pub(super) fn search_block0(&self, rank: u64, block_index: &mut usize) {
         self.search_block0_naive(rank, block_index);
     }
 
     #[inline(always)]
-    fn search_block0_naive(&self, rank: usize, block_index: &mut usize) {
+    fn search_block0_naive(&self, rank: u64, block_index: &mut usize) {
         // full binary search for block that contains the rank, manually loop-unrolled, because
         // LLVM doesn't do it for us, but it gains just under 20% performance
 
         // this code relies on the fact that BLOCKS_PER_SUPERBLOCK blocks are in one superblock
         debug_assert!(
-            SUPER_BLOCK_SIZE / BLOCK_SIZE == BLOCKS_PER_SUPERBLOCK,
+            (SUPER_BLOCK_SIZE / BLOCK_SIZE) == BLOCKS_PER_SUPERBLOCK,
             "change unroll constant to {}",
             64 - (SUPER_BLOCK_SIZE / BLOCK_SIZE).leading_zeros() - 1
         );
         unroll!(4,
-            |boundary = { (SUPER_BLOCK_SIZE / BLOCK_SIZE) / 2}|
+            |boundary = { (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize / 2}|
                 // do not use select_unpredictable here, it degrades performance
-                if self.blocks.len() > *block_index + boundary && rank >= self.blocks[*block_index + boundary].zeros as usize {
+                if self.blocks.len() > *block_index + boundary && rank >= self.blocks[*block_index + boundary].zeros as u64 {
                     *block_index += boundary;
                 },
             boundary /= 2);
@@ -126,7 +126,7 @@ impl super::RsVec {
     /// * `block_index` - the index of the block to search in, this is the block in the blocks
     ///   vector that contains the rank
     #[inline(always)]
-    pub(super) fn search_word_in_block0(&self, mut rank: usize, block_index: usize) -> usize {
+    pub(super) fn search_word_in_block0(&self, mut rank: u64, block_index: usize) -> u64 {
         // linear search for word that contains the rank. Binary search is not possible here,
         // because we don't have accumulated popcounts for the words. We use pdep to find the
         // position of the rank-th zero bit in the word, if the word contains enough zeros, otherwise
@@ -134,24 +134,24 @@ impl super::RsVec {
         let mut index_counter = 0;
         debug_assert!(BLOCK_SIZE / WORD_SIZE == 8, "change unroll constant");
         unroll!(7, |n = {0}| {
-                    let word = self.data[block_index * BLOCK_SIZE / WORD_SIZE + n];
-                    if (word.count_zeros() as usize) <= rank {
-                        rank -= word.count_zeros() as usize;
+                    let word = self.data[block_index * (BLOCK_SIZE / WORD_SIZE) as usize + n];
+                    if (word.count_zeros() as u64) <= rank {
+                        rank -= word.count_zeros() as u64;
                         index_counter += WORD_SIZE;
                     } else {
-                        return block_index * BLOCK_SIZE
+                        return block_index as u64 * BLOCK_SIZE
                             + index_counter
-                            + (1 << rank).pdep(!word).trailing_zeros() as usize;
+                            + (1 << rank).pdep(!word).trailing_zeros() as u64;
                     }
                 }, n += 1);
 
         // the last word must contain the rank-th zero bit, otherwise the rank is outside the
         // block, and thus outside the bitvector
-        block_index * BLOCK_SIZE
+        block_index as u64 * BLOCK_SIZE
             + index_counter
             + (1 << rank)
-                .pdep(!self.data[block_index * BLOCK_SIZE / WORD_SIZE + 7])
-                .trailing_zeros() as usize
+                .pdep(!self.data[block_index * (BLOCK_SIZE / WORD_SIZE) as usize + 7])
+                .trailing_zeros() as u64
     }
 
     /// Search for the superblock that contains the rank.
@@ -162,8 +162,9 @@ impl super::RsVec {
     ///   superblock in the ``select_blocks`` vector that contains the rank
     /// * `rank` - the rank to search for
     #[inline(always)]
-    pub(super) fn search_super_block0(&self, mut super_block: usize, rank: usize) -> usize {
-        let mut upper_bound = self.select_blocks[rank / SELECT_BLOCK_SIZE + 1].index_0;
+    #[allow(clippy::cast_possible_truncation)] // safe due to the division
+    pub(super) fn search_super_block0(&self, mut super_block: usize, rank: u64) -> usize {
+        let mut upper_bound = self.select_blocks[(rank / SELECT_BLOCK_SIZE + 1) as usize].index_0;
 
         while upper_bound - super_block > 8 {
             let middle = super_block + ((upper_bound - super_block) >> 1);
@@ -192,31 +193,31 @@ impl super::RsVec {
     /// If the rank is larger than the number of 1-bits in the bit-vector, the vector length is returned.
     #[must_use]
     #[allow(clippy::assertions_on_constants)]
-    pub fn select1(&self, mut rank: usize) -> usize {
+    pub fn select1(&self, mut rank: u64) -> u64 {
         if rank >= self.rank1 {
             return self.len;
         }
 
-        let mut super_block =
-            self.select_blocks[rank / crate::bit_vec::fast_rs_vec::SELECT_BLOCK_SIZE].index_1;
+        let mut super_block = self.select_blocks[(rank / SELECT_BLOCK_SIZE) as usize].index_1;
 
         if self.super_blocks.len() > (super_block + 1)
-            && ((super_block + 1) * SUPER_BLOCK_SIZE - self.super_blocks[super_block + 1].zeros)
+            && ((super_block + 1) as u64 * SUPER_BLOCK_SIZE
+                - self.super_blocks[super_block + 1].zeros)
                 <= rank
         {
             super_block = self.search_super_block1(super_block, rank);
         }
 
-        rank -= (super_block) * SUPER_BLOCK_SIZE - self.super_blocks[super_block].zeros;
+        rank -= super_block as u64 * SUPER_BLOCK_SIZE - self.super_blocks[super_block].zeros;
 
         // full binary search for block that contains the rank, manually loop-unrolled, because
         // LLVM doesn't do it for us, but it gains just under 20% performance
-        let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
+        let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize;
         let mut block_index = block_at_super_block;
         self.search_block1(rank, block_at_super_block, &mut block_index);
 
-        rank -= (block_index - block_at_super_block) * BLOCK_SIZE
-            - self.blocks[block_index].zeros as usize;
+        rank -= (block_index - block_at_super_block) as u64 * BLOCK_SIZE
+            - self.blocks[block_index].zeros as u64;
 
         self.search_word_in_block1(rank, block_index)
     }
@@ -240,7 +241,7 @@ impl super::RsVec {
     #[inline(always)]
     pub(super) fn search_block1(
         &self,
-        rank: usize,
+        rank: u64,
         block_at_super_block: usize,
         block_index: &mut usize,
     ) {
@@ -249,7 +250,7 @@ impl super::RsVec {
             _mm256_sub_epi16,
         };
 
-        if self.blocks.len() > *block_index + BLOCKS_PER_SUPERBLOCK {
+        if self.blocks.len() > *block_index + BLOCKS_PER_SUPERBLOCK as usize {
             debug_assert!(
                 SUPER_BLOCK_SIZE / BLOCK_SIZE == BLOCKS_PER_SUPERBLOCK,
                 "change unroll constant to {}",
@@ -257,6 +258,7 @@ impl super::RsVec {
             );
 
             unsafe {
+                #[allow(clippy::cast_possible_truncation)] // false positive because constants
                 let bit_nums = _mm256_set_epi16(
                     (15 * BLOCK_SIZE) as i16,
                     (14 * BLOCK_SIZE) as i16,
@@ -273,7 +275,7 @@ impl super::RsVec {
                     (3 * BLOCK_SIZE) as i16,
                     (2 * BLOCK_SIZE) as i16,
                     (1 * BLOCK_SIZE) as i16,
-                    (0 * BLOCK_SIZE) as i16,
+                    0i16,
                 );
 
                 let blocks = _mm256_loadu_epi16(self.blocks[*block_index..].as_ptr() as *const i16);
@@ -307,7 +309,7 @@ impl super::RsVec {
     #[inline(always)]
     pub(super) fn search_block1(
         &self,
-        rank: usize,
+        rank: u64,
         block_at_super_block: usize,
         block_index: &mut usize,
     ) {
@@ -315,25 +317,20 @@ impl super::RsVec {
     }
 
     #[inline(always)]
-    fn search_block1_naive(
-        &self,
-        rank: usize,
-        block_at_super_block: usize,
-        block_index: &mut usize,
-    ) {
+    fn search_block1_naive(&self, rank: u64, block_at_super_block: usize, block_index: &mut usize) {
         // full binary search for block that contains the rank, manually loop-unrolled, because
         // LLVM doesn't do it for us, but it gains just under 20% performance
 
         // this code relies on the fact that BLOCKS_PER_SUPERBLOCK blocks are in one superblock
         debug_assert!(
-            SUPER_BLOCK_SIZE / BLOCK_SIZE == BLOCKS_PER_SUPERBLOCK,
+            (SUPER_BLOCK_SIZE / BLOCK_SIZE) == BLOCKS_PER_SUPERBLOCK,
             "change unroll constant to {}",
             64 - (SUPER_BLOCK_SIZE / BLOCK_SIZE).leading_zeros() - 1
         );
         unroll!(4,
-            |boundary = { (SUPER_BLOCK_SIZE / BLOCK_SIZE) / 2}|
+            |boundary = { (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize / 2}|
                 // do not use select_unpredictable here, it degrades performance
-                if self.blocks.len() > *block_index + boundary && rank >= (*block_index + boundary - block_at_super_block) * BLOCK_SIZE - self.blocks[*block_index + boundary].zeros as usize {
+                if self.blocks.len() > *block_index + boundary && rank >= (*block_index + boundary - block_at_super_block) as u64 * BLOCK_SIZE - self.blocks[*block_index + boundary].zeros as u64 {
                     *block_index += boundary;
                 },
             boundary /= 2);
@@ -348,7 +345,7 @@ impl super::RsVec {
     /// * `block_index` - the index of the block to search in, this is the block in the blocks
     ///   vector that contains the rank
     #[inline(always)]
-    pub(super) fn search_word_in_block1(&self, mut rank: usize, block_index: usize) -> usize {
+    pub(super) fn search_word_in_block1(&self, mut rank: u64, block_index: usize) -> u64 {
         // linear search for word that contains the rank. Binary search is not possible here,
         // because we don't have accumulated popcounts for the words. We use pdep to find the
         // position of the rank-th zero bit in the word, if the word contains enough zeros, otherwise
@@ -356,24 +353,24 @@ impl super::RsVec {
         let mut index_counter = 0;
         debug_assert!(BLOCK_SIZE / WORD_SIZE == 8, "change unroll constant");
         unroll!(7, |n = {0}| {
-            let word = self.data[block_index * BLOCK_SIZE / WORD_SIZE + n];
-            if (word.count_ones() as usize) <= rank {
-                rank -= word.count_ones() as usize;
+            let word = self.data[block_index * (BLOCK_SIZE / WORD_SIZE) as usize + n];
+            if (word.count_ones() as u64) <= rank {
+                rank -= word.count_ones() as u64;
                 index_counter += WORD_SIZE;
             } else {
-                return block_index * BLOCK_SIZE
+                return block_index as u64 * BLOCK_SIZE
                     + index_counter
-                    + (1 << rank).pdep(word).trailing_zeros() as usize;
+                    + (1 << rank).pdep(word).trailing_zeros() as u64;
             }
         }, n += 1);
 
         // the last word must contain the rank-th zero bit, otherwise the rank is outside of the
         // block, and thus outside of the bitvector
-        block_index * BLOCK_SIZE
+        block_index as u64 * BLOCK_SIZE
             + index_counter
             + (1 << rank)
-                .pdep(self.data[block_index * BLOCK_SIZE / WORD_SIZE + 7])
-                .trailing_zeros() as usize
+                .pdep(self.data[block_index * (BLOCK_SIZE / WORD_SIZE) as usize + 7])
+                .trailing_zeros() as u64
     }
 
     /// Search for the superblock that contains the rank.
@@ -384,14 +381,15 @@ impl super::RsVec {
     ///   superblock in the ``select_blocks`` vector that contains the rank
     /// * `rank` - the rank to search for
     #[inline(always)]
-    pub(super) fn search_super_block1(&self, mut super_block: usize, rank: usize) -> usize {
-        let mut upper_bound = self.select_blocks[rank / SELECT_BLOCK_SIZE + 1].index_1;
+    #[allow(clippy::cast_possible_truncation)] // safe due to the division
+    pub(super) fn search_super_block1(&self, mut super_block: usize, rank: u64) -> usize {
+        let mut upper_bound = self.select_blocks[(rank / SELECT_BLOCK_SIZE + 1) as usize].index_1;
 
         // binary search for superblock that contains the rank
         while upper_bound - super_block > 8 {
             let middle = super_block + ((upper_bound - super_block) >> 1);
             // using select_unpredictable does nothing here, likely because the search isn't hot
-            if ((middle + 1) * SUPER_BLOCK_SIZE - self.super_blocks[middle].zeros) <= rank {
+            if ((middle + 1) as u64 * SUPER_BLOCK_SIZE - self.super_blocks[middle].zeros) <= rank {
                 super_block = middle;
             } else {
                 upper_bound = middle;
@@ -399,7 +397,8 @@ impl super::RsVec {
         }
         // linear search for superblock that contains the rank
         while self.super_blocks.len() > (super_block + 1)
-            && ((super_block + 1) * SUPER_BLOCK_SIZE - self.super_blocks[super_block + 1].zeros)
+            && ((super_block + 1) as u64 * SUPER_BLOCK_SIZE
+                - self.super_blocks[super_block + 1].zeros)
                 <= rank
         {
             super_block += 1;
diff --git a/src/bit_vec/fast_rs_vec/tests.rs b/src/bit_vec/fast_rs_vec/tests.rs
index 1858248..c8537b2 100644
--- a/src/bit_vec/fast_rs_vec/tests.rs
+++ b/src/bit_vec/fast_rs_vec/tests.rs
@@ -23,7 +23,7 @@ fn test_random_data_rank() {
         6, 7,
     ]);
     let sample = Uniform::new(0, 2);
-    static LENGTH: usize = 4 * SUPER_BLOCK_SIZE;
+    static LENGTH: u64 = 4 * SUPER_BLOCK_SIZE;
 
     for _ in 0..LENGTH {
         bv.append_bit(sample.sample(&mut rng));
@@ -42,17 +42,17 @@ fn test_random_data_rank() {
         let mut expected_rank1 = 0;
         let mut expected_rank0 = 0;
 
-        let data_index = rnd_index / WORD_SIZE;
+        let data_index = (rnd_index / WORD_SIZE) as usize;
         let bit_index = rnd_index % WORD_SIZE;
 
         for v in data.iter().take(data_index) {
-            expected_rank1 += v.count_ones() as usize;
-            expected_rank0 += v.count_zeros() as usize;
+            expected_rank1 += v.count_ones() as u64;
+            expected_rank0 += v.count_zeros() as u64;
         }
 
         if bit_index > 0 {
-            expected_rank1 += (data[data_index] & ((1 << bit_index) - 1)).count_ones() as usize;
-            expected_rank0 += (!data[data_index] & ((1 << bit_index) - 1)).count_ones() as usize;
+            expected_rank1 += (data[data_index] & ((1 << bit_index) - 1)).count_ones() as u64;
+            expected_rank0 += (!data[data_index] & ((1 << bit_index) - 1)).count_ones() as u64;
         }
 
         assert_eq!(actual_rank1, expected_rank1);
@@ -205,13 +205,13 @@ fn test_only_ones_select() {
 
 #[test]
 fn random_data_select0() {
+    static LENGTH: u64 = 4 * SUPER_BLOCK_SIZE;
     let mut bv = BitVec::with_capacity(LENGTH);
     let mut rng = StdRng::from_seed([
         0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
         6, 7,
     ]);
     let sample = Uniform::new(0, 2);
-    static LENGTH: usize = 4 * SUPER_BLOCK_SIZE;
 
     for _ in 0..LENGTH {
         bv.append_bit_u8(sample.sample(&mut rng) as u8);
@@ -231,7 +231,7 @@ fn random_data_select0() {
 
         let mut index = 0;
         loop {
-            let zeros = data[index].count_zeros() as usize;
+            let zeros = data[index].count_zeros() as u64;
             if rank_counter + zeros > rnd_rank0 {
                 break;
             } else {
@@ -260,13 +260,13 @@ fn random_data_select0() {
 
 #[test]
 fn random_data_select1() {
+    static LENGTH: u64 = 4 * SUPER_BLOCK_SIZE;
     let mut bv = BitVec::with_capacity(LENGTH);
     let mut rng = StdRng::from_seed([
         0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
         6, 7,
     ]);
     let sample = Uniform::new(0, 2);
-    static LENGTH: usize = 4 * SUPER_BLOCK_SIZE;
 
     for _ in 0..LENGTH {
         bv.append_bit_u8(sample.sample(&mut rng) as u8);
@@ -286,7 +286,7 @@ fn random_data_select1() {
 
         let mut index = 0;
         loop {
-            let ones = data[index].count_ones() as usize;
+            let ones = data[index].count_ones() as u64;
             if rank_counter + ones > rnd_rank1 {
                 break;
             } else {
@@ -1211,8 +1211,8 @@ fn test_random_data_iter_both_ends() {
                 }
                 let bv = RsVec::from_bit_vec(bv);
 
-                let mut zeros = Vec::with_capacity(bv.rank0);
-                let mut ones = Vec::with_capacity(bv.rank1);
+                let mut zeros = Vec::with_capacity(bv.rank0 as usize);
+                let mut ones = Vec::with_capacity(bv.rank1 as usize);
 
                 let mut iter0 = bv.iter0();
                 let mut iter1 = bv.iter1();
@@ -1226,7 +1226,7 @@ fn test_random_data_iter_both_ends() {
                 }
                 zeros.sort();
                 zeros.dedup();
-                assert_eq!(zeros.len(), bv.rank0);
+                assert_eq!(zeros.len() as u64, bv.rank0);
 
                 for _ in 0..bv.rank1 {
                     ones.push(if sample.sample(&mut rng) < 50 {
@@ -1237,7 +1237,7 @@ fn test_random_data_iter_both_ends() {
                 }
                 ones.sort();
                 ones.dedup();
-                assert_eq!(ones.len(), bv.rank1);
+                assert_eq!(ones.len() as u64, bv.rank1);
 
                 for idx in ones {
                     assert_eq!(bv.get(idx), Some(1), "bit {} is not 1", idx);
@@ -1254,7 +1254,7 @@ fn test_random_data_iter_both_ends() {
 // test a randomly generated bit vector for correct values in blocks
 #[test]
 fn test_block_layout() {
-    static LENGTH: usize = 4 * SUPER_BLOCK_SIZE;
+    static LENGTH: u64 = 4 * SUPER_BLOCK_SIZE;
     let mut bv = BitVec::with_capacity(LENGTH);
     let mut rng = StdRng::from_seed([
         0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
@@ -1271,7 +1271,7 @@ fn test_block_layout() {
 
     let mut zero_counter = 0u32;
     for (block_index, block) in bv.blocks.iter().enumerate() {
-        if block_index % (SUPER_BLOCK_SIZE / BLOCK_SIZE) == 0 {
+        if block_index % (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize == 0 {
             zero_counter = 0;
         }
         assert_eq!(
@@ -1281,9 +1281,9 @@ fn test_block_layout() {
             block_index,
             bv.blocks.len()
         );
-        for word in bv.data[block_index * BLOCK_SIZE / WORD_SIZE..]
+        for word in bv.data[block_index * (BLOCK_SIZE / WORD_SIZE) as usize..]
             .iter()
-            .take(BLOCK_SIZE / WORD_SIZE)
+            .take((BLOCK_SIZE / WORD_SIZE) as usize)
         {
             zero_counter += word.count_zeros();
         }
@@ -1293,7 +1293,7 @@ fn test_block_layout() {
 // Github issue https://github.com/Cydhra/vers/issues/6 regression test
 #[test]
 fn test_iter1_regression_i6() {
-    static LENGTH: usize = 4 * SUPER_BLOCK_SIZE;
+    static LENGTH: u64 = 4 * SUPER_BLOCK_SIZE;
     let mut bv = BitVec::with_capacity(LENGTH);
     let mut rng = StdRng::from_seed([
         0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
@@ -1319,7 +1319,7 @@ fn test_iter1_regression_i6() {
 
     let mut all_bits: Vec<_> = bv.iter0().chain(bv.iter1()).collect();
     all_bits.sort();
-    assert_eq!(all_bits.len(), LENGTH);
+    assert_eq!(all_bits.len() as u64, LENGTH);
 }
 
 // Github issue https://github.com/Cydhra/vers/issues/8 regression test
diff --git a/src/bit_vec/mask.rs b/src/bit_vec/mask.rs
index a146b24..a8be369 100644
--- a/src/bit_vec/mask.rs
+++ b/src/bit_vec/mask.rs
@@ -48,7 +48,7 @@ where
     /// If the position is larger than the length of the vector, None is returned.
     #[inline]
     #[must_use]
-    pub fn get(&self, pos: usize) -> Option<u64> {
+    pub fn get(&self, pos: u64) -> Option<u64> {
         if pos >= self.vec.len {
             None
         } else {
@@ -67,10 +67,10 @@ where
     /// [`get`]: MaskedBitVec::get
     #[inline]
     #[must_use]
-    pub fn get_unchecked(&self, pos: usize) -> u64 {
+    pub fn get_unchecked(&self, pos: u64) -> u64 {
         ((self.bin_op)(
-            self.vec.data[pos / WORD_SIZE],
-            self.mask.data[pos / WORD_SIZE],
+            self.vec.data[(pos / WORD_SIZE) as usize],
+            self.mask.data[(pos / WORD_SIZE) as usize],
         ) >> (pos % WORD_SIZE))
             & 1
     }
@@ -79,7 +79,7 @@ where
     /// If the position is larger than the length of the vector, None is returned.
     #[inline]
     #[must_use]
-    pub fn is_bit_set(&self, pos: usize) -> Option<bool> {
+    pub fn is_bit_set(&self, pos: u64) -> Option<bool> {
         if pos >= self.vec.len {
             None
         } else {
@@ -97,7 +97,7 @@ where
     /// [`is_bit_set`]: MaskedBitVec::is_bit_set
     #[inline]
     #[must_use]
-    pub fn is_bit_set_unchecked(&self, pos: usize) -> bool {
+    pub fn is_bit_set_unchecked(&self, pos: u64) -> bool {
         self.get_unchecked(pos) != 0
     }
 
@@ -108,7 +108,7 @@ where
     /// If the length of the query is larger than 64, None is returned.
     #[inline]
     #[must_use]
-    pub fn get_bits(&self, pos: usize, len: usize) -> Option<u64> {
+    pub fn get_bits(&self, pos: u64, len: u64) -> Option<u64> {
         if len > WORD_SIZE || len == 0 {
             return None;
         }
@@ -138,12 +138,13 @@ where
     #[must_use]
     #[allow(clippy::inline_always)]
     #[allow(clippy::comparison_chain)] // rust-clippy #5354
+    #[allow(clippy::cast_possible_truncation)] // safe due to the division
     #[inline]
-    pub fn get_bits_unchecked(&self, pos: usize, len: usize) -> u64 {
+    pub fn get_bits_unchecked(&self, pos: u64, len: u64) -> u64 {
         debug_assert!(len <= WORD_SIZE);
         let partial_word = (self.bin_op)(
-            self.vec.data[pos / WORD_SIZE],
-            self.mask.data[pos / WORD_SIZE],
+            self.vec.data[(pos / WORD_SIZE) as usize],
+            self.mask.data[(pos / WORD_SIZE) as usize],
         ) >> (pos % WORD_SIZE);
 
         if pos % WORD_SIZE + len == WORD_SIZE {
@@ -152,8 +153,8 @@ where
             partial_word & ((1 << (len % WORD_SIZE)) - 1)
         } else {
             let next_half = (self.bin_op)(
-                self.vec.data[pos / WORD_SIZE + 1],
-                self.mask.data[pos / WORD_SIZE + 1],
+                self.vec.data[(pos / WORD_SIZE + 1) as usize],
+                self.mask.data[(pos / WORD_SIZE + 1) as usize],
             ) << (WORD_SIZE - pos % WORD_SIZE);
 
             (partial_word | next_half) & ((1 << (len % WORD_SIZE)) - 1)
@@ -167,7 +168,7 @@ where
     #[inline]
     #[must_use]
     pub fn count_zeros(&self) -> u64 {
-        self.vec.len as u64 - self.count_ones()
+        self.vec.len - self.count_ones()
     }
 
     /// Return the number of ones in the masked bit vector.
@@ -177,10 +178,10 @@ where
     pub fn count_ones(&self) -> u64 {
         let mut ones = self
             .iter_limbs()
-            .take(self.vec.len / WORD_SIZE)
+            .take((self.vec.len / WORD_SIZE) as usize)
             .map(|limb| u64::from(limb.count_ones()))
             .sum();
-        if self.vec.len % WORD_SIZE > 0 {
+        if !self.vec.len.is_multiple_of(WORD_SIZE) {
             ones += u64::from(
                 ((self.bin_op)(
                     *self.vec.data.last().unwrap(),
diff --git a/src/bit_vec/mod.rs b/src/bit_vec/mod.rs
index 056091e..2f34134 100644
--- a/src/bit_vec/mod.rs
+++ b/src/bit_vec/mod.rs
@@ -14,7 +14,7 @@ pub mod sparse;
 pub mod mask;
 
 /// Size of a word in bitvectors. All vectors operate on 64-bit words.
-const WORD_SIZE: usize = 64;
+const WORD_SIZE: u64 = 64;
 
 /// Type alias for masked bitvectors that implement a simple bitwise binary operation.
 /// The first lifetime is for the bit vector that is being masked, the second lifetime is for the
@@ -60,7 +60,7 @@ pub type BitMask<'s, 'b> = MaskedBitVec<'s, 'b, fn(u64, u64) -> u64>;
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub struct BitVec {
     data: Vec<u64>,
-    len: usize,
+    len: u64,
 }
 
 impl BitVec {
@@ -75,9 +75,10 @@ impl BitVec {
     /// The bit vector will be able to hold at least `capacity` bits without reallocating.
     /// More memory may be allocated according to the underlying allocation strategy.
     #[must_use]
-    pub fn with_capacity(capacity: usize) -> Self {
+    pub fn with_capacity(capacity: u64) -> Self {
         Self {
-            data: Vec::with_capacity(capacity / WORD_SIZE + 1),
+            #[allow(clippy::cast_possible_truncation)] // safe due to the division
+            data: Vec::with_capacity((capacity / WORD_SIZE + 1) as usize),
             len: 0,
         }
     }
@@ -85,22 +86,19 @@ impl BitVec {
     /// Create a new bit vector with all zeros and the given length.
     /// The length is measured in bits.
     #[must_use]
-    pub fn from_zeros(len: usize) -> Self {
-        let mut data = vec![0; len / WORD_SIZE];
-        if len % WORD_SIZE != 0 {
-            data.push(0);
-        }
+    pub fn from_zeros(len: u64) -> Self {
+        #[allow(clippy::cast_possible_truncation)] // safe due to the division
+        let data = vec![0; len.div_ceil(WORD_SIZE) as usize];
         Self { data, len }
     }
 
     /// Create a new bit vector with all ones and the given length.
     /// The length is measured in bits.
     #[must_use]
-    pub fn from_ones(len: usize) -> Self {
-        let mut data = vec![u64::MAX; len / WORD_SIZE];
-        if len % WORD_SIZE != 0 {
-            data.push((1 << (len % WORD_SIZE)) - 1);
-        }
+    pub fn from_ones(len: u64) -> Self {
+        // junk data is allowed to be any bit
+        #[allow(clippy::cast_possible_truncation)] // safe due to the division
+        let data = vec![u64::MAX; len.div_ceil(WORD_SIZE) as usize];
         Self { data, len }
     }
 
@@ -128,7 +126,7 @@ impl BitVec {
     /// [`from_bits_iter`]: BitVec::from_bits_iter
     #[must_use]
     pub fn from_bits(bits: &[u8]) -> Self {
-        let mut bv = Self::with_capacity(bits.len());
+        let mut bv = Self::with_capacity(bits.len() as u64);
         bits.iter().for_each(|&b| bv.append_bit(b.into()));
         bv
     }
@@ -146,7 +144,7 @@ impl BitVec {
     /// [`from_bits_iter`]: BitVec::from_bits_iter
     #[must_use]
     pub fn from_bits_u16(bits: &[u16]) -> Self {
-        let mut bv = Self::with_capacity(bits.len());
+        let mut bv = Self::with_capacity(bits.len() as u64);
         bits.iter().for_each(|&b| bv.append_bit_u16(b));
         bv
     }
@@ -164,7 +162,7 @@ impl BitVec {
     /// [`from_bits_iter`]: BitVec::from_bits_iter
     #[must_use]
     pub fn from_bits_u32(bits: &[u32]) -> Self {
-        let mut bv = Self::with_capacity(bits.len());
+        let mut bv = Self::with_capacity(bits.len() as u64);
         bits.iter().for_each(|&b| bv.append_bit_u32(b));
         bv
     }
@@ -182,7 +180,7 @@ impl BitVec {
     /// [`from_bits_iter`]: BitVec::from_bits_iter
     #[must_use]
     pub fn from_bits_u64(bits: &[u64]) -> Self {
-        let mut bv = Self::with_capacity(bits.len());
+        let mut bv = Self::with_capacity(bits.len() as u64);
         bits.iter().for_each(|&b| bv.append_bit(b));
         bv
     }
@@ -221,7 +219,7 @@ impl BitVec {
         I: IntoIterator<Item = E>,
     {
         let iter = iter.into_iter();
-        let mut bv = Self::with_capacity(iter.size_hint().0);
+        let mut bv = Self::with_capacity(iter.size_hint().0 as u64);
         for bit in iter {
             bv.append_bit(bit.into());
         }
@@ -253,7 +251,7 @@ impl BitVec {
     /// [`from_limbs_iter`]: BitVec::from_limbs_iter
     #[must_use]
     pub fn from_limbs(words: &[u64]) -> Self {
-        let len = words.len() * WORD_SIZE;
+        let len = words.len() as u64 * WORD_SIZE;
         Self {
             data: words.to_vec(),
             len,
@@ -318,15 +316,15 @@ impl BitVec {
     /// [`from_limbs_iter`]: BitVec::from_limbs_iter
     #[must_use]
     pub fn from_vec(data: Vec<u64>) -> Self {
-        let len = data.len() * WORD_SIZE;
+        let len = data.len() as u64 * WORD_SIZE;
         Self { data, len }
     }
 
-    fn pack_bits<T, const MAX_BITS: usize>(sequence: &[T], bits_per_element: usize) -> Self
+    fn pack_bits<T, const MAX_BITS: u64>(sequence: &[T], bits_per_element: u64) -> Self
     where
         T: Into<u64> + Copy,
     {
-        let mut bv = Self::with_capacity(sequence.len() * bits_per_element);
+        let mut bv = Self::with_capacity(sequence.len() as u64 * bits_per_element);
         for &word in sequence {
             if bits_per_element <= MAX_BITS {
                 bv.append_bits(word.into(), bits_per_element);
@@ -372,7 +370,7 @@ impl BitVec {
     /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
     /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
     #[must_use]
-    pub fn pack_sequence_u64(sequence: &[u64], bits_per_element: usize) -> Self {
+    pub fn pack_sequence_u64(sequence: &[u64], bits_per_element: u64) -> Self {
         Self::pack_bits::<_, 64>(sequence, bits_per_element)
     }
 
@@ -406,7 +404,7 @@ impl BitVec {
     /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
     /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
     #[must_use]
-    pub fn pack_sequence_u32(sequence: &[u32], bits_per_element: usize) -> Self {
+    pub fn pack_sequence_u32(sequence: &[u32], bits_per_element: u64) -> Self {
         Self::pack_bits::<_, 32>(sequence, bits_per_element)
     }
 
@@ -440,7 +438,7 @@ impl BitVec {
     /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32
     /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
     #[must_use]
-    pub fn pack_sequence_u16(sequence: &[u16], bits_per_element: usize) -> Self {
+    pub fn pack_sequence_u16(sequence: &[u16], bits_per_element: u64) -> Self {
         Self::pack_bits::<_, 16>(sequence, bits_per_element)
     }
 
@@ -474,7 +472,7 @@ impl BitVec {
     /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32
     /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
     #[must_use]
-    pub fn pack_sequence_u8(sequence: &[u8], bits_per_element: usize) -> Self {
+    pub fn pack_sequence_u8(sequence: &[u8], bits_per_element: u64) -> Self {
         Self::pack_bits::<_, 8>(sequence, bits_per_element)
     }
 
@@ -500,13 +498,13 @@ impl BitVec {
     /// [`append_bit_u8`]: BitVec::append_bit_u8
     /// [`append_word`]: BitVec::append_word
     pub fn append(&mut self, bit: bool) {
-        if self.len % WORD_SIZE == 0 {
+        if self.len.is_multiple_of(WORD_SIZE) {
             self.data.push(0);
         }
         if bit {
-            self.data[self.len / WORD_SIZE] |= 1 << (self.len % WORD_SIZE);
+            self.data[(self.len / WORD_SIZE) as usize] |= 1 << (self.len % WORD_SIZE);
         } else {
-            self.data[self.len / WORD_SIZE] &= !(1 << (self.len % WORD_SIZE));
+            self.data[(self.len / WORD_SIZE) as usize] &= !(1 << (self.len % WORD_SIZE));
         }
         self.len += 1;
     }
@@ -529,14 +527,15 @@ impl BitVec {
     ///
     /// assert!(bv.is_empty());
     /// ```
-    pub fn drop_last(&mut self, n: usize) {
+    pub fn drop_last(&mut self, n: u64) {
         if n > self.len {
             self.data.clear();
             self.len = 0;
             return;
         }
 
-        let new_limb_count = (self.len - n).div_ceil(WORD_SIZE);
+        #[allow(clippy::cast_possible_truncation)] // safe due to the division
+        let new_limb_count = (self.len - n).div_ceil(WORD_SIZE) as usize;
 
         // cut off limbs that we no longer need
         if new_limb_count < self.data.len() {
@@ -574,13 +573,13 @@ impl BitVec {
     /// [`append_bit_u8`]: BitVec::append_bit_u8
     /// [`append_word`]: BitVec::append_word
     pub fn append_bit(&mut self, bit: u64) {
-        if self.len % WORD_SIZE == 0 {
+        if self.len.is_multiple_of(WORD_SIZE) {
             self.data.push(0);
         }
         if bit % 2 == 1 {
-            self.data[self.len / WORD_SIZE] |= 1 << (self.len % WORD_SIZE);
+            self.data[(self.len / WORD_SIZE) as usize] |= 1 << (self.len % WORD_SIZE);
         } else {
-            self.data[self.len / WORD_SIZE] &= !(1 << (self.len % WORD_SIZE));
+            self.data[(self.len / WORD_SIZE) as usize] &= !(1 << (self.len % WORD_SIZE));
         }
 
         self.len += 1;
@@ -653,12 +652,12 @@ impl BitVec {
     /// [`append_bit_u16`]: BitVec::append_bit_u16
     /// [`append_bit_u8`]: BitVec::append_bit_u8
     pub fn append_word(&mut self, word: u64) {
-        if self.len % WORD_SIZE == 0 {
+        if self.len.is_multiple_of(WORD_SIZE) {
             self.data.push(word);
         } else {
             // zero out the unused bits before or-ing the new one, to ensure no garbage data remains
-            self.data[self.len / WORD_SIZE] &= !(u64::MAX << (self.len % WORD_SIZE));
-            self.data[self.len / WORD_SIZE] |= word << (self.len % WORD_SIZE);
+            self.data[(self.len / WORD_SIZE) as usize] &= !(u64::MAX << (self.len % WORD_SIZE));
+            self.data[(self.len / WORD_SIZE) as usize] |= word << (self.len % WORD_SIZE);
 
             self.data.push(word >> (WORD_SIZE - self.len % WORD_SIZE));
         }
@@ -685,15 +684,15 @@ impl BitVec {
     ///
     /// # Panics
     /// Panics if `len` is larger than 64.
-    pub fn append_bits(&mut self, bits: u64, len: usize) {
+    pub fn append_bits(&mut self, bits: u64, len: u64) {
         assert!(len <= 64, "Cannot append more than 64 bits");
 
-        if self.len % WORD_SIZE == 0 {
+        if self.len.is_multiple_of(WORD_SIZE) {
             self.data.push(bits);
         } else {
             // zero out the unused bits before or-ing the new one, to ensure no garbage data remains
-            self.data[self.len / WORD_SIZE] &= !(u64::MAX << (self.len % WORD_SIZE));
-            self.data[self.len / WORD_SIZE] |= bits << (self.len % WORD_SIZE);
+            self.data[(self.len / WORD_SIZE) as usize] &= !(u64::MAX << (self.len % WORD_SIZE));
+            self.data[(self.len / WORD_SIZE) as usize] |= bits << (self.len % WORD_SIZE);
 
             if self.len % WORD_SIZE + len > WORD_SIZE {
                 self.data.push(bits >> (WORD_SIZE - self.len % WORD_SIZE));
@@ -724,11 +723,11 @@ impl BitVec {
     ///
     /// [`append_bits`]: BitVec::append_bits
     /// [`drop_last`]: BitVec::drop_last
-    pub fn append_bits_unchecked(&mut self, bits: u64, len: usize) {
-        if self.len % WORD_SIZE == 0 {
+    pub fn append_bits_unchecked(&mut self, bits: u64, len: u64) {
+        if self.len.is_multiple_of(WORD_SIZE) {
             self.data.push(bits);
         } else {
-            self.data[self.len / WORD_SIZE] |= bits << (self.len % WORD_SIZE);
+            self.data[(self.len / WORD_SIZE) as usize] |= bits << (self.len % WORD_SIZE);
 
             if self.len % WORD_SIZE + len > WORD_SIZE {
                 self.data.push(bits >> (WORD_SIZE - self.len % WORD_SIZE));
@@ -743,10 +742,11 @@ impl BitVec {
     /// This function is guaranteed to reallocate the underlying vector at most once.
     pub fn extend_bitvec(&mut self, other: &Self) {
         // reserve space for the new bits, ensuring at most one re-allocation
+        #[allow(clippy::cast_possible_truncation)] // safe due to the division
         self.data
-            .reserve((self.len + other.len).div_ceil(WORD_SIZE) - self.data.len());
+            .reserve((self.len + other.len).div_ceil(WORD_SIZE) as usize - self.data.len());
 
-        let full_limbs = other.len() / WORD_SIZE;
+        let full_limbs = (other.len() / WORD_SIZE) as usize;
         for i in 0..full_limbs {
             self.append_bits(other.data[i], WORD_SIZE);
         }
@@ -759,7 +759,7 @@ impl BitVec {
 
     /// Return the length of the bit vector. The length is measured in bits.
     #[must_use]
-    pub fn len(&self) -> usize {
+    pub fn len(&self) -> u64 {
         self.len
     }
 
@@ -785,7 +785,7 @@ impl BitVec {
     ///
     /// # Panics
     /// If the position is larger than the length of the vector, the function panics.
-    pub fn flip_bit(&mut self, pos: usize) {
+    pub fn flip_bit(&mut self, pos: u64) {
         assert!(pos < self.len, "Index out of bounds");
         self.flip_bit_unchecked(pos);
     }
@@ -800,8 +800,8 @@ impl BitVec {
     /// This will not corrupt memory.
     ///
     /// [`flip_bit`]: BitVec::flip_bit
-    pub fn flip_bit_unchecked(&mut self, pos: usize) {
-        self.data[pos / WORD_SIZE] ^= 1 << (pos % WORD_SIZE);
+    pub fn flip_bit_unchecked(&mut self, pos: u64) {
+        self.data[(pos / WORD_SIZE) as usize] ^= 1 << (pos % WORD_SIZE);
     }
 
     /// Return the bit at the given position.
@@ -820,8 +820,10 @@ impl BitVec {
     /// assert_eq!(bv.get(1), Some(0));
     /// assert_eq!(bv.get(2), Some(1));
     /// ```
+    ///
+    /// [`get_unchecked`]: Self::get_unchecked
     #[must_use]
-    pub fn get(&self, pos: usize) -> Option<u64> {
+    pub fn get(&self, pos: u64) -> Option<u64> {
         if pos >= self.len {
             None
         } else {
@@ -839,8 +841,8 @@ impl BitVec {
     ///
     /// [`get`]: BitVec::get
     #[must_use]
-    pub fn get_unchecked(&self, pos: usize) -> u64 {
-        (self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE)) & 1
+    pub fn get_unchecked(&self, pos: u64) -> u64 {
+        (self.data[(pos / WORD_SIZE) as usize] >> (pos % WORD_SIZE)) & 1
     }
 
     /// Set the bit at the given position.
@@ -865,7 +867,7 @@ impl BitVec {
     /// otherwise it will return an empty `Ok`.
     ///
     /// [`set_unchecked`]: BitVec::set_unchecked
-    pub fn set(&mut self, pos: usize, value: u64) -> Result<(), &str> {
+    pub fn set(&mut self, pos: u64, value: u64) -> Result<(), &str> {
         if pos >= self.len {
             Err("out of range")
         } else {
@@ -883,8 +885,9 @@ impl BitVec {
     /// Use [`set`] to properly handle this case with a `Result`.
     ///
     /// [`set`]: BitVec::set
-    pub fn set_unchecked(&mut self, pos: usize, value: u64) {
-        self.data[pos / WORD_SIZE] = (self.data[pos / WORD_SIZE] & !(0x1 << (pos % WORD_SIZE)))
+    pub fn set_unchecked(&mut self, pos: u64, value: u64) {
+        self.data[(pos / WORD_SIZE) as usize] = (self.data[(pos / WORD_SIZE) as usize]
+            & !(0x1 << (pos % WORD_SIZE)))
             | ((value & 0x1) << (pos % WORD_SIZE));
     }
 
@@ -906,7 +909,7 @@ impl BitVec {
     ///
     /// [`is_bit_set_unchecked`]: BitVec::is_bit_set_unchecked
     #[must_use]
-    pub fn is_bit_set(&self, pos: usize) -> Option<bool> {
+    pub fn is_bit_set(&self, pos: u64) -> Option<bool> {
         if pos >= self.len {
             None
         } else {
@@ -923,7 +926,7 @@ impl BitVec {
     ///
     /// [`is_bit_set`]: BitVec::is_bit_set
     #[must_use]
-    pub fn is_bit_set_unchecked(&self, pos: usize) -> bool {
+    pub fn is_bit_set_unchecked(&self, pos: u64) -> bool {
         self.get_unchecked(pos) != 0
     }
 
@@ -937,7 +940,7 @@ impl BitVec {
     /// The first bit at `pos` is the most significant bit of the return value
     /// limited to `len` bits.
     #[must_use]
-    pub fn get_bits(&self, pos: usize, len: usize) -> Option<u64> {
+    pub fn get_bits(&self, pos: u64, len: u64) -> Option<u64> {
         if len > WORD_SIZE || len == 0 {
             return None;
         }
@@ -969,13 +972,14 @@ impl BitVec {
     #[allow(clippy::comparison_chain)] // readability
     #[inline(always)] // inline to gain loop optimization and pipeline advantages for elias fano
     #[allow(clippy::cast_possible_truncation)] // parameter must be out of scope for this to happen
-    pub fn get_bits_unchecked(&self, pos: usize, len: usize) -> u64 {
+    pub fn get_bits_unchecked(&self, pos: u64, len: u64) -> u64 {
         debug_assert!(len <= WORD_SIZE);
-        let partial_word = self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE);
+        let partial_word = self.data[(pos / WORD_SIZE) as usize] >> (pos % WORD_SIZE);
         if pos % WORD_SIZE + len <= WORD_SIZE {
             partial_word & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
         } else {
-            (partial_word | (self.data[pos / WORD_SIZE + 1] << (WORD_SIZE - pos % WORD_SIZE)))
+            (partial_word
+                | (self.data[(pos / WORD_SIZE + 1) as usize] << (WORD_SIZE - pos % WORD_SIZE)))
                 & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
         }
     }
@@ -1006,7 +1010,7 @@ impl BitVec {
     #[must_use]
     #[allow(clippy::inline_always)]
     #[inline(always)] // to gain optimization if n is constant
-    pub fn unpack_element(&self, index: usize, n: usize) -> Option<u64> {
+    pub fn unpack_element(&self, index: u64, n: u64) -> Option<u64> {
         self.get_bits(index * n, n)
     }
 
@@ -1028,7 +1032,7 @@ impl BitVec {
     #[must_use]
     #[allow(clippy::inline_always)]
     #[inline(always)] // to gain optimization if n is constant
-    pub fn unpack_element_unchecked(&self, index: usize, n: usize) -> u64 {
+    pub fn unpack_element_unchecked(&self, index: u64, n: u64) -> u64 {
         self.get_bits_unchecked(index * n, n)
     }
 
@@ -1039,11 +1043,11 @@ impl BitVec {
     #[must_use]
     #[allow(clippy::missing_panics_doc)] // can't panic because of manual bounds check
     pub fn count_ones(&self) -> u64 {
-        let mut ones: u64 = self.data[0..self.len / WORD_SIZE]
+        let mut ones: u64 = self.data[0..(self.len / WORD_SIZE) as usize]
             .iter()
             .map(|limb| u64::from(limb.count_ones()))
             .sum();
-        if self.len % WORD_SIZE > 0 {
+        if !self.len.is_multiple_of(WORD_SIZE) {
             ones += u64::from(
                 (self.data.last().unwrap() & ((1 << (self.len % WORD_SIZE)) - 1)).count_ones(),
             );
@@ -1059,7 +1063,7 @@ impl BitVec {
     /// [`count_ones`]: BitVec::count_ones
     #[must_use]
     pub fn count_zeros(&self) -> u64 {
-        self.len as u64 - self.count_ones()
+        self.len - self.count_ones()
     }
 
     /// Mask this bit vector with another bitvector using bitwise or. The mask is applied lazily
@@ -1226,7 +1230,9 @@ impl BitVec {
     /// containing the original vector.
     ///
     /// See also: [`split_at_unchecked`]
-    pub fn split_at(self, at: usize) -> Result<(Self, Self), Self> {
+    ///
+    /// [`split_at_unchecked`]: Self::split_at_unchecked
+    pub fn split_at(self, at: u64) -> Result<(Self, Self), Self> {
         if at > self.len {
             Err(self)
         } else {
@@ -1241,8 +1247,10 @@ impl BitVec {
     /// If the index is larger than the length of the vector the function will panic or run
     /// out of memory.
     /// Use [`split_at`] to properly handle this case.
+    ///
+    /// [`split_at`]: Self::split_at
     #[must_use]
-    pub fn split_at_unchecked(mut self, at: usize) -> (Self, Self) {
+    pub fn split_at_unchecked(mut self, at: u64) -> (Self, Self) {
         let other_len = self.len - at;
         let mut other = Self::with_capacity(other_len);
 
@@ -1250,8 +1258,8 @@ impl BitVec {
             return (self, other);
         }
 
-        let first_limb = at / WORD_SIZE;
-        let last_limb = self.len / WORD_SIZE;
+        let first_limb = (at / WORD_SIZE) as usize;
+        let last_limb = (self.len / WORD_SIZE) as usize;
 
         // First, we figure out the number of bits from the first limb to retain in this vector:
         let leading_partial = at % WORD_SIZE;
@@ -1322,7 +1330,7 @@ impl From<Vec<u64>> for BitVec {
 impl Extend<BitVec> for BitVec {
     fn extend<T: IntoIterator<Item = BitVec>>(&mut self, iter: T) {
         for v in iter {
-            self.extend_bitvec(&v)
+            self.extend_bitvec(&v);
         }
     }
 }
@@ -1330,7 +1338,7 @@ impl Extend<BitVec> for BitVec {
 impl<'t> Extend<&'t BitVec> for BitVec {
     fn extend<T: IntoIterator<Item = &'t BitVec>>(&mut self, iter: T) {
         for v in iter {
-            self.extend_bitvec(v)
+            self.extend_bitvec(v);
         }
     }
 }
@@ -1377,7 +1385,7 @@ impl Eq for BitVec {}
 
 impl Hash for BitVec {
     fn hash<H: Hasher>(&self, state: &mut H) {
-        state.write_usize(self.len);
+        state.write_u64(self.len);
         if self.len > 0 {
             self.data[0..self.data.len() - 1]
                 .iter()
diff --git a/src/bit_vec/sparse.rs b/src/bit_vec/sparse.rs
index bc0dbe5..261df70 100644
--- a/src/bit_vec/sparse.rs
+++ b/src/bit_vec/sparse.rs
@@ -2,7 +2,7 @@
 //! The vector requires `O(n log u/n) + 2n + o(n)` bits of space, where `n` is the number of bits in the vector
 //! and `u` is the number of 1-bits.
 //! The vector is constructed from a sorted list of indices of 1-bits, or from an existing
-//! [`BitVec`](crate::BitVec).
+//! [`BitVec`].
 
 use crate::{BitVec, EliasFanoVec};
 
@@ -80,7 +80,7 @@ impl SparseRSVec {
     /// - `input`: The input `BitVec` to compress.
     #[must_use]
     pub fn from_bitvec(input: &BitVec) -> Self {
-        let len = input.len() as u64;
+        let len = input.len();
         Self::new(
             input
                 .iter()
@@ -127,7 +127,7 @@ impl SparseRSVec {
     /// [`get`]: #method.get
     #[must_use]
     pub fn from_bitvec_inverted(input: &BitVec) -> Self {
-        let len = input.len() as u64;
+        let len = input.len();
         Self::new(
             input
                 .iter()
@@ -170,6 +170,8 @@ impl SparseRSVec {
     /// # Panics
     /// If `i` is out of bounds the function might panic or produce incorrect results.
     /// Use [`get`] for a checked version.
+    ///
+    /// [`get`]: Self::get
     #[must_use]
     pub fn get_unchecked(&self, i: u64) -> u64 {
         self.is_set_unchecked(i).into()
@@ -188,7 +190,7 @@ impl SparseRSVec {
     ///
     /// If the rank is larger than the number of sparse bits in the vector, the vector length is returned.
     #[must_use]
-    pub fn select1(&self, i: usize) -> u64 {
+    pub fn select1(&self, i: u64) -> u64 {
         self.vec.get(i).unwrap_or(self.len)
     }
 
@@ -383,7 +385,7 @@ mod tests {
 
     #[test]
     fn test_fuzzy() {
-        const L: usize = 100_000;
+        const L: u64 = 100_000;
         let mut bv = BitVec::from_zeros(L);
         let mut rng = StdRng::from_seed([0; 32]);
 
@@ -395,11 +397,11 @@ mod tests {
 
         let mut ones = 0;
         for i in 0..L {
-            assert_eq!(bv.get(i), sparse.get(i as u64));
-            assert_eq!(ones, sparse.rank1(i as u64));
-            assert_eq!(i as u64 - ones, sparse.rank0(i as u64));
+            assert_eq!(bv.get(i), sparse.get(i));
+            assert_eq!(ones, sparse.rank1(i));
+            assert_eq!(i - ones, sparse.rank0(i));
             if bv.get(i) == Some(1) {
-                assert_eq!(i, sparse.select1(ones as usize).try_into().unwrap());
+                assert_eq!(i, sparse.select1(ones).try_into().unwrap());
                 ones += 1;
             }
         }
diff --git a/src/bit_vec/tests.rs b/src/bit_vec/tests.rs
index 0fa3f6d..0e01c2c 100644
--- a/src/bit_vec/tests.rs
+++ b/src/bit_vec/tests.rs
@@ -619,8 +619,8 @@ fn test_unpack() {
     let bv = BitVec::pack_sequence_u64(&sequence, 10);
 
     for (i, &val) in sequence.iter().enumerate() {
-        assert_eq!(bv.unpack_element(i, 10), Some(val));
-        assert_eq!(bv.unpack_element_unchecked(i, 10), val);
+        assert_eq!(bv.unpack_element(i as u64, 10), Some(val));
+        assert_eq!(bv.unpack_element_unchecked(i as u64, 10), val);
     }
 
     assert_eq!(bv.unpack_element(8, 10), None);
diff --git a/src/elias_fano/mod.rs b/src/elias_fano/mod.rs
index 75b009c..28aa4c6 100644
--- a/src/elias_fano/mod.rs
+++ b/src/elias_fano/mod.rs
@@ -17,7 +17,7 @@ use std::cmp::max;
 /// friendly. But for large clusters this takes too long, so we switch to binary search.
 /// We use 4 because benchmarks suggested that this was the best trade-off between speed for average
 /// case and for worst case.
-const BIN_SEARCH_THRESHOLD: usize = 4;
+const BIN_SEARCH_THRESHOLD: u64 = 4;
 
 /// An Elias-Fano encoded vector of u64 values. The vector is immutable, which is exploited by
 /// limiting the word length of elements to the minimum required to represent all elements.
@@ -61,8 +61,8 @@ pub struct EliasFanoVec {
     lower_vec: BitVec,
     universe_zero: u64,
     universe_max: u64,
-    lower_len: usize,
-    len: usize,
+    lower_len: u64,
+    len: u64,
 }
 
 impl EliasFanoVec {
@@ -102,23 +102,23 @@ impl EliasFanoVec {
         let universe_zero = data[0];
         let universe_bound = data[data.len() - 1] - universe_zero;
 
-        let log_n = ((data.len() + 2) as f64).log2().ceil() as usize;
-        let bits_per_number = (max(universe_bound, 2) as f64).log2().ceil() as usize;
-        let bits_for_upper_values = (max(data.len(), 2) as f64).log2().ceil() as usize;
+        let log_n = ((data.len() + 2) as f64).log2().ceil() as u64;
+        let bits_per_number = (max(universe_bound, 2) as f64).log2().ceil() as u64;
+        let bits_for_upper_values = (max(data.len(), 2) as f64).log2().ceil() as u64;
         let lower_width = max(bits_per_number, log_n) - bits_for_upper_values;
         assert!(lower_width < 64);
 
         let mut upper_vec =
-            BitVec::from_zeros(2 + data.len() + (universe_bound >> lower_width) as usize);
-        let mut lower_vec = BitVec::with_capacity(data.len() * lower_width);
+            BitVec::from_zeros(2 + data.len() as u64 + (universe_bound >> lower_width));
+        let mut lower_vec = BitVec::with_capacity(data.len() as u64 * lower_width);
 
         for (i, &word) in data.iter().enumerate() {
             let word = word - universe_zero;
 
-            let upper = (word >> lower_width) as usize;
+            let upper = word >> lower_width;
             let lower = word & ((1 << lower_width) - 1);
 
-            upper_vec.flip_bit_unchecked(upper + i + 1);
+            upper_vec.flip_bit_unchecked(upper + i as u64 + 1);
             lower_vec.append_bits_unchecked(lower, lower_width);
         }
 
@@ -128,13 +128,13 @@ impl EliasFanoVec {
             universe_zero,
             universe_max: data[data.len() - 1],
             lower_len: lower_width,
-            len: data.len(),
+            len: data.len() as u64,
         }
     }
 
     /// Returns the number of elements in the vector.
     #[must_use]
-    pub fn len(&self) -> usize {
+    pub fn len(&self) -> u64 {
         self.len
     }
 
@@ -147,7 +147,7 @@ impl EliasFanoVec {
     /// Returns the element at the given index, or `None` if the index exceeds the length of the
     /// vector.
     #[must_use]
-    pub fn get(&self, index: usize) -> Option<u64> {
+    pub fn get(&self, index: u64) -> Option<u64> {
         if index >= self.len() {
             return None;
         }
@@ -164,8 +164,10 @@ impl EliasFanoVec {
     ///
     /// Note, that select in bit-vectors returns an index, while select in Elias-Fano returns the
     /// element at the given rank.
+    ///
+    /// [`get`]: Self::get
     #[must_use]
-    pub fn select(&self, rank: usize) -> Option<u64> {
+    pub fn select(&self, rank: u64) -> Option<u64> {
         self.get(rank)
     }
 
@@ -178,12 +180,12 @@ impl EliasFanoVec {
     /// [`get`]: EliasFanoVec::get
     #[must_use]
     #[allow(clippy::cast_possible_truncation)]
-    pub fn get_unchecked(&self, index: usize) -> u64 {
+    pub fn get_unchecked(&self, index: u64) -> u64 {
         let upper = self.upper_vec.select1(index) - index - 1;
         let lower = self
             .lower_vec
             .get_bits_unchecked(index * self.lower_len, self.lower_len);
-        ((upper << self.lower_len) as u64 | lower) + self.universe_zero
+        ((upper << self.lower_len) | lower) + self.universe_zero
     }
 
     /// Returns the largest element that is smaller than or equal to the query.
@@ -214,15 +216,15 @@ impl EliasFanoVec {
     #[allow(clippy::cast_possible_truncation)] // we will fix this in a breaking update
     fn search_element_in_block<const INDEX: bool, const UPWARD: bool>(
         &self,
-        start_index_upper: usize,
-        start_index_lower: usize,
+        start_index_upper: u64,
+        start_index_lower: u64,
         query: u64,
         query_upper: u64,
         query_lower: u64,
         query_masked_upper: u64,
     ) -> u64 {
         // the direction in which we search for the element, dependent on the UPWARD flag
-        let direction: isize = if UPWARD { 1 } else { -1 };
+        let direction: i64 = if UPWARD { 1 } else { -1 };
 
         // the function to check if the current candidate no longer fulfills the query
         // criterion
@@ -246,12 +248,12 @@ impl EliasFanoVec {
         // last element.
         if self
             .upper_vec
-            .get_unchecked((start_index_upper as isize + direction) as usize)
+            .get_unchecked((start_index_upper as i64 + direction) as u64)
             > 0
         {
             // get the first value from the lower vector that corresponds to the query prefix
             let mut lower_candidate = self.lower_vec.get_bits_unchecked(
-                (start_index_lower as isize) as usize * self.lower_len,
+                (start_index_lower as i64) as u64 * self.lower_len,
                 self.lower_len,
             );
 
@@ -263,11 +265,11 @@ impl EliasFanoVec {
                 let mut cursor = direction;
                 while self
                     .upper_vec
-                    .get_unchecked((start_index_upper as isize + cursor + direction) as usize)
+                    .get_unchecked((start_index_upper as i64 + cursor + direction) as u64)
                     > 0
                 {
                     let next_candidate = self.lower_vec.get_bits_unchecked(
-                        (start_index_lower as isize + cursor) as usize * self.lower_len,
+                        (start_index_lower as i64 + cursor) as u64 * self.lower_len,
                         self.lower_len,
                     );
 
@@ -277,13 +279,13 @@ impl EliasFanoVec {
                         || (!UPWARD && next_candidate < query_lower)
                     {
                         return if INDEX {
-                            start_index_lower as u64 + cursor as u64
+                            start_index_lower + cursor as u64
                         } else {
                             (query_masked_upper | lower_candidate) + self.universe_zero
                         };
                     } else if next_candidate == query_lower {
                         return if INDEX {
-                            start_index_lower as u64 + cursor as u64
+                            start_index_lower + cursor as u64
                         } else {
                             (query_masked_upper | next_candidate) + self.universe_zero
                         };
@@ -297,23 +299,20 @@ impl EliasFanoVec {
                     #[allow(clippy::comparison_chain)] // readability
                     if cursor.unsigned_abs() == BIN_SEARCH_THRESHOLD {
                         let block_end = if UPWARD {
-                            self.upper_vec.select0((query_upper as isize + 1) as usize)
-                                - query_upper as usize
+                            self.upper_vec.select0((query_upper as i64 + 1) as u64)
+                                - query_upper
                                 - 2
                         } else {
-                            self.upper_vec.select0((query_upper as isize) as usize)
-                                - query_upper as usize
+                            self.upper_vec.select0((query_upper as i64) as u64) - query_upper
                         };
 
                         let mut upper_bound;
                         let mut lower_bound;
                         if UPWARD {
                             upper_bound = block_end;
-                            lower_bound =
-                                (start_index_lower as isize + cursor - direction) as usize;
+                            lower_bound = (start_index_lower as i64 + cursor - direction) as u64;
                         } else {
-                            upper_bound =
-                                (start_index_lower as isize + cursor - direction) as usize;
+                            upper_bound = (start_index_lower as i64 + cursor - direction) as u64;
                             lower_bound = block_end;
                         }
 
@@ -332,10 +331,10 @@ impl EliasFanoVec {
                                 upper_bound = middle;
                             } else if middle_candidate == query_lower {
                                 return if INDEX {
-                                    cursor = middle as isize;
+                                    cursor = middle as i64;
                                     // while the element at cursor - 1 is equal, reduce cursor
                                     while self.lower_vec.get_bits_unchecked(
-                                        (cursor - direction) as usize * self.lower_len,
+                                        (cursor - direction) as u64 * self.lower_len,
                                         self.lower_len,
                                     ) == query_lower
                                     {
@@ -362,7 +361,7 @@ impl EliasFanoVec {
                             || (!UPWARD && final_bound > block_end)
                         {
                             let check_candidate = self.lower_vec.get_bits_unchecked(
-                                (final_bound as isize + direction) as usize * self.lower_len,
+                                (final_bound as i64 + direction) as u64 * self.lower_len,
                                 self.lower_len,
                             );
 
@@ -371,7 +370,7 @@ impl EliasFanoVec {
                                     // if the element at lower_bound + 1 is smaller than the query, we include it
                                     // in the count, so we return lower_bound + 1 + 1, as all elements in the
                                     // 1-block are smaller than the query
-                                    (final_bound as isize + direction + 1) as u64
+                                    (final_bound as i64 + direction + 1) as u64
                                 } else {
                                     (query_masked_upper | check_candidate) + self.universe_zero
                                 };
@@ -380,7 +379,7 @@ impl EliasFanoVec {
 
                         // update the cursor because we use it for the final index calculation
                         if INDEX {
-                            cursor = final_bound as isize + direction;
+                            cursor = final_bound as i64 + direction;
                         }
                         break;
                     }
@@ -390,7 +389,7 @@ impl EliasFanoVec {
                     // the loop ended because the element at cursor has a larger upper index,
                     // so we return the previous element count
                     // (element at curser - 1, +1 because count is not 0 based)
-                    start_index_lower as u64 + cursor as u64
+                    start_index_lower + cursor as u64
                 } else {
                     (query_masked_upper | lower_candidate) + self.universe_zero
                 };
@@ -401,9 +400,9 @@ impl EliasFanoVec {
             // all elements in the 1-block are larger than the query,
             // so we return the last element count
             // (start_index_lower - 1, +1 because count is not 0 based)
-            start_index_lower as u64
+            start_index_lower
         } else {
-            self.get_unchecked((start_index_lower as isize - direction) as usize)
+            self.get_unchecked((start_index_lower as i64 - direction) as u64)
         }
     }
 
@@ -427,7 +426,7 @@ impl EliasFanoVec {
         let n = n - self.universe_zero;
 
         // split the query into the upper and lower part
-        let upper_query = (n >> self.lower_len) as usize;
+        let upper_query = n >> self.lower_len;
         let lower_query = n & ((1 << self.lower_len) - 1);
 
         // calculate the lower bound within the lower vector where our predecessor can be found. Since
@@ -439,13 +438,13 @@ impl EliasFanoVec {
         // calculate the upper part of the result. This only works if the next value in the upper
         // vector is set, otherwise the there is no value in the entire vector with this bit-prefix,
         // and we need to search the largest prefix smaller than the query.
-        let result_upper = (upper_query << self.lower_len) as u64;
+        let result_upper = upper_query << self.lower_len;
 
         self.search_element_in_block::<false, true>(
             lower_bound_upper_index,
             lower_bound_lower_index,
             n,
-            upper_query as u64,
+            upper_query,
             lower_query,
             result_upper,
         )
@@ -488,7 +487,7 @@ impl EliasFanoVec {
         let n = n - self.universe_zero;
 
         // split the query into the upper and lower part
-        let upper_query = (n >> self.lower_len) as usize;
+        let upper_query = n >> self.lower_len;
         let lower_query = n & ((1 << self.lower_len) - 1);
 
         // calculate the upper bound within the lower vector where our successor can be found. Since
@@ -500,13 +499,13 @@ impl EliasFanoVec {
         // calculate the upper part of the result. This only works if the next value in the upper
         // vector is set, otherwise the there is no value in the entire vector with this bit-prefix,
         // and we need to search the largest prefix smaller than the query.
-        let result_upper = (upper_query << self.lower_len) as u64;
+        let result_upper = upper_query << self.lower_len;
 
         self.search_element_in_block::<false, false>(
             upper_bound_upper_index,
             upper_bound_lower_index,
             n,
-            upper_query as u64,
+            upper_query,
             lower_query,
             result_upper,
         )
@@ -531,7 +530,7 @@ impl EliasFanoVec {
     /// assert_eq!(elias_fano_vec.delta(3), Some(80));
     /// ```
     #[must_use]
-    pub fn delta(&self, index: usize) -> Option<u64> {
+    pub fn delta(&self, index: u64) -> Option<u64> {
         if index >= self.len() {
             return None;
         }
@@ -549,7 +548,7 @@ impl EliasFanoVec {
             )
         } else {
             let query_upper_part = (upper_index - index - 1) << self.lower_len;
-            let query_number = query_upper_part as u64
+            let query_number = query_upper_part
                 | self
                     .lower_vec
                     .get_bits_unchecked(index * self.lower_len, self.lower_len);
@@ -561,7 +560,7 @@ impl EliasFanoVec {
                 let lower_element_upper_index = self.upper_vec.select1(index - 1);
                 let lower_element_upper = lower_element_upper_index - (index - 1) - 1;
 
-                let lower_elem = ((lower_element_upper as u64) << self.lower_len as u64)
+                let lower_elem = (lower_element_upper << self.lower_len)
                     | self
                         .lower_vec
                         .get_bits_unchecked((index - 1) * self.lower_len, self.lower_len);
@@ -572,10 +571,9 @@ impl EliasFanoVec {
 
     /// Return how many elements strictly smaller than the query element are present in the vector.
     #[must_use]
-    #[allow(clippy::cast_possible_truncation)] // we will fix this in a breaking update
     pub fn rank(&self, value: u64) -> u64 {
         if value > self.universe_max || self.is_empty() {
-            return self.len() as u64;
+            return self.len();
         }
 
         if value < self.universe_zero {
@@ -585,12 +583,12 @@ impl EliasFanoVec {
         let value = value - self.universe_zero;
         let upper = value >> self.lower_len;
         let lower = value & ((1 << self.lower_len) - 1);
-        let query_begin = self.upper_vec.select0(upper as usize);
-        let lower_index = query_begin as u64 - upper;
+        let query_begin = self.upper_vec.select0(upper);
+        let lower_index = query_begin - upper;
 
         self.search_element_in_block::<true, true>(
             query_begin,
-            lower_index as usize,
+            lower_index,
             value,
             upper,
             lower,
diff --git a/src/elias_fano/tests.rs b/src/elias_fano/tests.rs
index b7b0d42..6457006 100644
--- a/src/elias_fano/tests.rs
+++ b/src/elias_fano/tests.rs
@@ -62,10 +62,10 @@ fn test_randomized_elias_fano() {
 
     let ef = EliasFanoVec::from_slice(&seq);
 
-    assert_eq!(ef.len(), seq.len());
+    assert_eq!(ef.len(), seq.len() as u64);
 
     for (i, &v) in seq.iter().enumerate() {
-        assert_eq!(ef.get_unchecked(i), v);
+        assert_eq!(ef.get_unchecked(i as u64), v);
     }
 
     for _ in 0..1000 {
@@ -110,7 +110,7 @@ fn test_clustered_ef() {
 
     let ef = EliasFanoVec::from_slice(&seq);
     for (i, &x) in seq.iter().enumerate() {
-        assert_eq!(ef.get_unchecked(i), x, "expected {:b}", x);
+        assert_eq!(ef.get_unchecked(i as u64), x, "expected {:b}", x);
         assert_eq!(ef.predecessor_unchecked(x), x);
         assert_eq!(ef.successor_unchecked(x), x);
     }
@@ -398,10 +398,10 @@ fn test_randomized_elias_fano_successor() {
 
     let ef = EliasFanoVec::from_slice(&seq);
 
-    assert_eq!(ef.len(), seq.len());
+    assert_eq!(ef.len(), seq.len() as u64);
 
     for (i, &v) in seq.iter().enumerate() {
-        assert_eq!(ef.get_unchecked(i), v);
+        assert_eq!(ef.get_unchecked(i as u64), v);
     }
 
     for _ in 0..1000 {
diff --git a/src/lib.rs b/src/lib.rs
index 2e1c297..20958a7 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,19 +1,10 @@
-#![cfg_attr(
-    all(
-        feature = "simd",
-        target_arch = "x86_64",
-        target_feature = "avx",
-        target_feature = "avx2",
-        target_feature = "avx512f",
-        target_feature = "avx512bw",
-    ),
-    feature(stdarch_x86_avx512)
-)]
 #![warn(missing_docs)]
 #![allow(clippy::module_name_repetitions)]
 #![allow(clippy::assertions_on_constants)] // for asserts warning about incompatible constant values
 #![allow(clippy::inline_always)] // we actually measure performance increases with most of these
-#![cfg_attr(docsrs, feature(doc_cfg), feature(doc_auto_cfg))] // for conditional compilation in docs
+#![allow(clippy::cast_lossless)] // it is often more readable to use `as u64` instead of `u64::from(..)`
+#![allow(clippy::needless_for_each)] // readability of one-liners
+#![cfg_attr(docsrs, feature(doc_cfg))] // for conditional compilation in docs
 
 //! This crate provides a collection of data structures supported by fast implementations of
 //! rank and select queries. The data structures are static, meaning that they cannot be modified
diff --git a/src/trees/bp/builder.rs b/src/trees/bp/builder.rs
index 753600c..9baa122 100644
--- a/src/trees/bp/builder.rs
+++ b/src/trees/bp/builder.rs
@@ -5,13 +5,14 @@ use crate::BitVec;
 /// A builder for [`BpTrees`] using depth-first traversal of the tree. See the documentation of
 /// [`TreeBuilder`].
 ///
-/// [`BpTree`]: BpTree
-pub struct BpBuilder<const BLOCK_SIZE: usize = DEFAULT_BLOCK_SIZE> {
+/// [`BpTrees`]: BpTree
+/// [`TreeBuilder`]: TreeBuilder
+pub struct BpBuilder<const BLOCK_SIZE: u64 = DEFAULT_BLOCK_SIZE> {
     excess: i64,
     bit_vec: BitVec,
 }
 
-impl<const BLOCK_SIZE: usize> BpBuilder<BLOCK_SIZE> {
+impl<const BLOCK_SIZE: u64> BpBuilder<BLOCK_SIZE> {
     /// Create new empty `DfsTreeBuilder`
     #[must_use]
     pub fn new() -> Self {
@@ -26,18 +27,18 @@ impl<const BLOCK_SIZE: usize> BpBuilder<BLOCK_SIZE> {
     pub fn with_capacity(capacity: u64) -> Self {
         Self {
             excess: 0,
-            bit_vec: BitVec::with_capacity((capacity * 2) as usize),
+            bit_vec: BitVec::with_capacity(capacity * 2),
         }
     }
 }
 
-impl<const BLOCK_SIZE: usize> Default for BpBuilder<BLOCK_SIZE> {
+impl<const BLOCK_SIZE: u64> Default for BpBuilder<BLOCK_SIZE> {
     fn default() -> Self {
         Self::new()
     }
 }
 
-impl<const BLOCK_SIZE: usize> TreeBuilder for BpBuilder<BLOCK_SIZE> {
+impl<const BLOCK_SIZE: u64> TreeBuilder for BpBuilder<BLOCK_SIZE> {
     type Tree = BpTree<BLOCK_SIZE>;
 
     fn enter_node(&mut self) {
diff --git a/src/trees/bp/lookup.rs b/src/trees/bp/lookup.rs
index 2c8fc8b..0a44c3d 100644
--- a/src/trees/bp/lookup.rs
+++ b/src/trees/bp/lookup.rs
@@ -1,3 +1,6 @@
+#![allow(clippy::cast_sign_loss)] // sign loss cannot happen on correctly formed BP trees
+#![allow(clippy::cast_possible_wrap)] // ditto
+
 //! This module provides the lookup table and lookup functionality to answer excess queries
 //! for 8-bit and 16-bit blocks in the tree vector.
 //! Note that the 8-bit version is unused, since this whole module gets replaced with
@@ -52,6 +55,7 @@ const PAREN_BLOCK_LOOKUP: [EncodedTableType; 1 << LOOKUP_BLOCK_SIZE] = calculate
 
 /// Offset to add to encoded excess values, so negative numbers are stored as positive integers, reducing
 /// encoding complexity
+#[allow(clippy::cast_possible_truncation)] // false positive
 const ENCODING_OFFSET: i32 = LOOKUP_BLOCK_SIZE as i32;
 
 /// Bitmask for one of the lookup values.
@@ -66,6 +70,7 @@ const MINIMUM_EXCESS_POSITION: usize = 6;
 #[cfg(not(feature = "bp_u16_lookup"))]
 const MINIMUM_EXCESS_POSITION: usize = 5;
 
+#[allow(clippy::cast_possible_truncation)] // all values are in range
 const fn calculate_lookup_table() -> [EncodedTableType; 1 << LOOKUP_BLOCK_SIZE] {
     // initial sentinel values during excess computation
     const MORE_THAN_MAX: SignedLookupBlockType = (LOOKUP_BLOCK_SIZE + 1) as SignedLookupBlockType;
@@ -114,12 +119,14 @@ const fn get_maximum_excess(value: EncodedTableType) -> i64 {
 }
 
 /// Branchless const minimum computation for values that cannot overflow
+#[allow(clippy::cast_possible_truncation)] // all values are in range
 const fn min(a: SignedLookupBlockType, b: SignedLookupBlockType) -> SignedLookupBlockType {
     b + ((a - b)
         & -(((a - b) as LookupBlockType >> (LOOKUP_BLOCK_SIZE - 1)) as SignedLookupBlockType))
 }
 
 /// Branchless const maximum computation for values that cannot overflow
+#[allow(clippy::cast_possible_truncation)] // all values are in range
 const fn max(a: SignedLookupBlockType, b: SignedLookupBlockType) -> SignedLookupBlockType {
     a - ((a - b)
         & -(((a - b) as LookupBlockType >> (LOOKUP_BLOCK_SIZE - 1)) as SignedLookupBlockType))
diff --git a/src/trees/bp/mod.rs b/src/trees/bp/mod.rs
index 6b9e89c..89b0d82 100644
--- a/src/trees/bp/mod.rs
+++ b/src/trees/bp/mod.rs
@@ -11,7 +11,7 @@ use std::cmp::{max, min};
 use std::iter::FusedIterator;
 
 /// The default block size for the tree, used in several const generics
-const DEFAULT_BLOCK_SIZE: usize = 512;
+const DEFAULT_BLOCK_SIZE: u64 = 512;
 
 const OPEN_PAREN: u64 = 1;
 const CLOSE_PAREN: u64 = 0;
@@ -139,12 +139,12 @@ use lookup_query::{process_block_bwd, process_block_fwd, LOOKUP_BLOCK_SIZE};
 /// [`BitVec`]: BitVec
 #[derive(Clone, Debug)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
-pub struct BpTree<const BLOCK_SIZE: usize = DEFAULT_BLOCK_SIZE> {
+pub struct BpTree<const BLOCK_SIZE: u64 = DEFAULT_BLOCK_SIZE> {
     vec: RsVec,
     min_max_tree: MinMaxTree,
 }
 
-impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
+impl<const BLOCK_SIZE: u64> BpTree<BLOCK_SIZE> {
     /// Construct a new `BpTree` from a given bit vector.
     #[must_use]
     pub fn from_bit_vector(bv: BitVec) -> Self {
@@ -161,14 +161,15 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
     /// # Arguments
     /// - `index`: The starting index.
     /// - `relative_excess`: The desired relative excess value.
-    pub fn fwd_search(&self, index: usize, mut relative_excess: i64) -> Option<usize> {
+    pub fn fwd_search(&self, index: u64, mut relative_excess: i64) -> Option<u64> {
         // check for greater than or equal length minus one, because the last element
         // won't ever have a result from fwd_search
         if index >= (self.vec.len() - 1) {
             return None;
         }
 
-        let block_index = (index + 1) / BLOCK_SIZE;
+        #[allow(clippy::cast_possible_truncation)] // safe due to the division
+        let block_index = ((index + 1) / BLOCK_SIZE) as usize;
         self.fwd_search_block(index, block_index, &mut relative_excess)
             .map_or_else(
                 |()| {
@@ -177,8 +178,12 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
 
                     // check the result block for the exact position
                     block.and_then(|(block, mut relative_excess)| {
-                        self.fwd_search_block(block * BLOCK_SIZE - 1, block, &mut relative_excess)
-                            .ok()
+                        self.fwd_search_block(
+                            block as u64 * BLOCK_SIZE - 1,
+                            block,
+                            &mut relative_excess,
+                        )
+                        .ok()
                     })
                 },
                 Some,
@@ -194,15 +199,15 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
     #[inline(always)]
     fn fwd_search_block(
         &self,
-        start_index: usize,
+        start_index: u64,
         block_index: usize,
         relative_excess: &mut i64,
-    ) -> Result<usize, ()> {
-        let block_boundary = min((block_index + 1) * BLOCK_SIZE, self.vec.len());
+    ) -> Result<u64, ()> {
+        let block_boundary = min((block_index as u64 + 1) * BLOCK_SIZE, self.vec.len());
 
         // the boundary at which we can start with table lookups
         let lookup_boundary = min(
-            (start_index + 1).div_ceil(LOOKUP_BLOCK_SIZE as usize) * LOOKUP_BLOCK_SIZE as usize,
+            (start_index + 1).div_ceil(LOOKUP_BLOCK_SIZE) * LOOKUP_BLOCK_SIZE,
             block_boundary,
         );
         for i in start_index + 1..lookup_boundary {
@@ -217,18 +222,20 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
         // the boundary up to which we can use table lookups
         let upper_lookup_boundary = max(
             lookup_boundary,
-            (block_boundary / LOOKUP_BLOCK_SIZE as usize) * LOOKUP_BLOCK_SIZE as usize,
+            (block_boundary / LOOKUP_BLOCK_SIZE) * LOOKUP_BLOCK_SIZE,
         );
 
+        // LOOKUP_BLOCK_SIZE as usize is a false positive for the lint: https://github.com/rust-lang/rust-clippy/issues/9613
+        #[allow(clippy::cast_possible_truncation)]
         for i in (lookup_boundary..upper_lookup_boundary).step_by(LOOKUP_BLOCK_SIZE as usize) {
             if let Ok(idx) = process_block_fwd(
                 self.vec
-                    .get_bits_unchecked(i, LOOKUP_BLOCK_SIZE as usize)
+                    .get_bits_unchecked(i, LOOKUP_BLOCK_SIZE)
                     .try_into()
                     .unwrap(),
                 relative_excess,
             ) {
-                return Ok(i + idx as usize);
+                return Ok(i + idx);
             }
         }
 
@@ -254,7 +261,7 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
     /// # Arguments
     /// - `index`: The starting index.
     /// - `relative_excess`: The desired relative excess value.
-    pub fn bwd_search(&self, index: usize, mut relative_excess: i64) -> Option<usize> {
+    pub fn bwd_search(&self, index: u64, mut relative_excess: i64) -> Option<u64> {
         if index >= self.vec.len() {
             return None;
         }
@@ -267,7 +274,8 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
 
         // calculate the block we start searching in. It starts at index - 1, so we don't accidentally
         // search the mM tree and immediately find `index` as the position
-        let block_index = (index - 1) / BLOCK_SIZE;
+        #[allow(clippy::cast_possible_truncation)] // safe due to the division
+        let block_index = ((index - 1) / BLOCK_SIZE) as usize;
 
         // check the current block
         self.bwd_search_block(index, block_index, &mut relative_excess)
@@ -278,8 +286,12 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
 
                     // check the result block for the exact position
                     block.and_then(|(block, mut relative_excess)| {
-                        self.bwd_search_block((block + 1) * BLOCK_SIZE, block, &mut relative_excess)
-                            .ok()
+                        self.bwd_search_block(
+                            (block as u64 + 1) * BLOCK_SIZE,
+                            block,
+                            &mut relative_excess,
+                        )
+                        .ok()
                     })
                 },
                 Some,
@@ -295,15 +307,15 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
     #[inline(always)]
     fn bwd_search_block(
         &self,
-        start_index: usize,
+        start_index: u64,
         block_index: usize,
         relative_excess: &mut i64,
-    ) -> Result<usize, ()> {
-        let block_boundary = min(block_index * BLOCK_SIZE, self.vec.len());
+    ) -> Result<u64, ()> {
+        let block_boundary = min(block_index as u64 * BLOCK_SIZE, self.vec.len());
 
         // the boundary at which we can start with table lookups
         let lookup_boundary = max(
-            ((start_index - 1) / LOOKUP_BLOCK_SIZE as usize) * LOOKUP_BLOCK_SIZE as usize,
+            ((start_index - 1) / LOOKUP_BLOCK_SIZE) * LOOKUP_BLOCK_SIZE,
             block_boundary,
         );
         for i in (lookup_boundary..start_index).rev() {
@@ -315,18 +327,22 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
             }
         }
 
-        for i in (block_boundary..lookup_boundary)
+        // lookup_boundary - block_boundary is smaller than a block, so casting to usize cannot
+        // truncate
+        // and LOOKUP_BLOCK_SIZE as usize is a false positive for the lint: https://github.com/rust-lang/rust-clippy/issues/9613
+        #[allow(clippy::cast_possible_truncation)]
+        for i in (0..(lookup_boundary - block_boundary) as usize)
             .step_by(LOOKUP_BLOCK_SIZE as usize)
             .rev()
         {
             if let Ok(idx) = process_block_bwd(
                 self.vec
-                    .get_bits_unchecked(i, LOOKUP_BLOCK_SIZE as usize)
+                    .get_bits_unchecked(block_boundary + i as u64, LOOKUP_BLOCK_SIZE)
                     .try_into()
                     .unwrap(),
                 relative_excess,
             ) {
-                return Ok(i + idx as usize);
+                return Ok(block_boundary + i as u64 + idx);
             }
         }
 
@@ -337,7 +353,7 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
     /// If the bit at `index` is not an opening parenthesis, the result is meaningless.
     /// If there is no matching closing parenthesis, `None` is returned.
     #[must_use]
-    pub fn close(&self, index: usize) -> Option<usize> {
+    pub fn close(&self, index: u64) -> Option<u64> {
         if index >= self.vec.len() {
             return None;
         }
@@ -349,7 +365,7 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
     /// If the bit at `index` is not a closing parenthesis, the result is meaningless.
     /// If there is no matching opening parenthesis, `None` is returned.
     #[must_use]
-    pub fn open(&self, index: usize) -> Option<usize> {
+    pub fn open(&self, index: u64) -> Option<u64> {
         if index >= self.vec.len() {
             return None;
         }
@@ -361,7 +377,7 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
     /// This works regardless of whether the bit at `index` is an opening or closing parenthesis.
     /// If there is no enclosing parenthesis, `None` is returned.
     #[must_use]
-    pub fn enclose(&self, index: usize) -> Option<usize> {
+    pub fn enclose(&self, index: u64) -> Option<u64> {
         if index >= self.vec.len() {
             return None;
         }
@@ -380,7 +396,8 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
     /// The excess is the number of open parentheses minus the number of closing parentheses.
     /// If `index` is out of bounds, the total excess of the parentheses expression is returned.
     #[must_use]
-    pub fn excess(&self, index: usize) -> i64 {
+    #[allow(clippy::cast_possible_wrap)] // only happens if the tree is unbalanced and has more than 2^62 nodes
+    pub fn excess(&self, index: u64) -> i64 {
         debug_assert!(index < self.vec.len(), "Index out of bounds");
         self.vec.rank1(index + 1) as i64 - self.vec.rank0(index + 1) as i64
     }
@@ -426,8 +443,14 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
     /// Iterate over a subtree rooted at `node` in depth-first (pre-)order.
     /// The iteration starts with the node itself.
     ///
+    /// # Limitations
+    /// When called on an architecture where `usize` is smaller than 64 bits, on a tree with more
+    /// than 2^31 nodes, the iterator may produce an iterator over an unspecified subset of nodes.
+    ///
+    /// # Panics
     /// Calling this method on an invalid node handle, or an unbalanced parenthesis expression,
-    /// will produce an iterator over an unspecified subset of nodes.
+    /// will produce an iterator over an unspecified subset of nodes, or panic either during
+    /// construction or iteration.
     pub fn subtree_iter(
         &self,
         node: <BpTree<BLOCK_SIZE> as Tree>::NodeHandle,
@@ -437,17 +460,44 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
             "Node handle is invalid"
         );
 
-        let index = self.vec.rank1(node);
+        let mut index = self.vec.rank1(node);
         let close = self.close(node).unwrap_or(node);
         let subtree_size = self.vec.rank1(close) - index;
 
-        self.vec.iter1().skip(index).take(subtree_size)
+        let mut iterator = self.vec.iter1();
+
+        // since index and subtree_size can exceed usize::MAX, we need some special casing.
+        // This should be optimized away on 64-bit architectures
+
+        // skip `index` bytes
+        while index > usize::MAX as u64 {
+            index -= usize::MAX as u64;
+            iterator.advance_by(usize::MAX).unwrap();
+        }
+        #[allow(clippy::cast_possible_truncation)] // the loop guarantees no truncation
+        iterator.advance_by(index as usize).unwrap();
+
+        // limit to `subtree_size` bytes by consuming the back of the iterator
+        let mut remaining_bits = self.vec.rank1 - index - subtree_size;
+        while remaining_bits > usize::MAX as u64 {
+            remaining_bits -= usize::MAX as u64;
+            iterator.advance_back_by(usize::MAX).unwrap();
+        }
+        #[allow(clippy::cast_possible_truncation)] // the loop guarantees no truncation
+        iterator.advance_back_by(remaining_bits as usize).unwrap();
+
+        iterator
     }
 
     /// Iterate over a subtree rooted at `node` in depth-first (post-)order.
     /// This is slower than the pre-order iteration.
     /// The iteration ends with the node itself.
     ///
+    /// # Limitations
+    /// When called on an architecture where `usize` is smaller than 64 bits, on a tree with more
+    /// than 2^31 nodes, the iterator may return an unspecified number of nodes starting at an
+    /// unspecified node.
+    ///
     /// # Panics
     /// Calling this method on an invalid node handle, or an unbalanced parenthesis expression,
     /// will produce an iterator over an unspecified subset of nodes, or panic either during
@@ -461,15 +511,33 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
             "Node handle is invalid"
         );
 
-        let index = self.vec.rank0(node);
+        let mut index = self.vec.rank0(node);
         let close = self.close(node).unwrap_or(node);
         let subtree_size = self.vec.rank0(close) + 1 - index;
 
-        self.vec
-            .iter0()
-            .skip(index)
-            .take(subtree_size)
-            .map(|n| self.open(n).unwrap())
+        let mut iterator = self.vec.iter0();
+
+        // since index and subtree_size can exceed usize::MAX, we need some special casing.
+        // This should be optimized away on 64-bit architectures
+
+        // skip `index` bytes
+        while index > usize::MAX as u64 {
+            index -= usize::MAX as u64;
+            iterator.advance_by(usize::MAX).unwrap();
+        }
+        #[allow(clippy::cast_possible_truncation)] // the loop guarantees no truncation
+        iterator.advance_by(index as usize).unwrap();
+
+        // limit to `subtree_size` bytes by consuming the back of the iterator
+        let mut remaining_bits = self.vec.rank0 - index - subtree_size;
+        while remaining_bits > usize::MAX as u64 {
+            remaining_bits -= usize::MAX as u64;
+            iterator.advance_back_by(usize::MAX).unwrap();
+        }
+        #[allow(clippy::cast_possible_truncation)] // the loop guarantees no truncation
+        iterator.advance_back_by(remaining_bits as usize).unwrap();
+
+        iterator.map(|n| self.open(n).unwrap())
     }
 
     /// Iterate over the children of a node in the tree.
@@ -544,8 +612,8 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
     }
 }
 
-impl<const BLOCK_SIZE: usize> Tree for BpTree<BLOCK_SIZE> {
-    type NodeHandle = usize;
+impl<const BLOCK_SIZE: u64> Tree for BpTree<BLOCK_SIZE> {
+    type NodeHandle = u64;
 
     fn root(&self) -> Option<Self::NodeHandle> {
         if self.vec.is_empty() {
@@ -627,7 +695,7 @@ impl<const BLOCK_SIZE: usize> Tree for BpTree<BLOCK_SIZE> {
         })
     }
 
-    fn node_index(&self, node: Self::NodeHandle) -> usize {
+    fn node_index(&self, node: Self::NodeHandle) -> u64 {
         debug_assert!(
             self.vec.get(node) == Some(OPEN_PAREN),
             "Node handle is invalid"
@@ -635,7 +703,7 @@ impl<const BLOCK_SIZE: usize> Tree for BpTree<BLOCK_SIZE> {
         self.vec.rank1(node)
     }
 
-    fn node_handle(&self, index: usize) -> Self::NodeHandle {
+    fn node_handle(&self, index: u64) -> Self::NodeHandle {
         self.vec.select1(index)
     }
 
@@ -656,7 +724,7 @@ impl<const BLOCK_SIZE: usize> Tree for BpTree<BLOCK_SIZE> {
         excess.saturating_sub(1)
     }
 
-    fn size(&self) -> usize {
+    fn size(&self) -> u64 {
         self.vec.rank1(self.vec.len())
     }
 
@@ -665,7 +733,7 @@ impl<const BLOCK_SIZE: usize> Tree for BpTree<BLOCK_SIZE> {
     }
 }
 
-impl<const BLOCK_SIZE: usize> IsAncestor for BpTree<BLOCK_SIZE> {
+impl<const BLOCK_SIZE: u64> IsAncestor for BpTree<BLOCK_SIZE> {
     fn is_ancestor(
         &self,
         ancestor: Self::NodeHandle,
@@ -685,7 +753,7 @@ impl<const BLOCK_SIZE: usize> IsAncestor for BpTree<BLOCK_SIZE> {
     }
 }
 
-impl<const BLOCK_SIZE: usize> LevelTree for BpTree<BLOCK_SIZE> {
+impl<const BLOCK_SIZE: u64> LevelTree for BpTree<BLOCK_SIZE> {
     fn level_ancestor(&self, node: Self::NodeHandle, level: u64) -> Option<Self::NodeHandle> {
         if level == 0 {
             return Some(node);
@@ -722,8 +790,8 @@ impl<const BLOCK_SIZE: usize> LevelTree for BpTree<BLOCK_SIZE> {
     }
 }
 
-impl<const BLOCK_SIZE: usize> SubtreeSize for BpTree<BLOCK_SIZE> {
-    fn subtree_size(&self, node: Self::NodeHandle) -> Option<usize> {
+impl<const BLOCK_SIZE: u64> SubtreeSize for BpTree<BLOCK_SIZE> {
+    fn subtree_size(&self, node: Self::NodeHandle) -> Option<u64> {
         debug_assert!(
             self.vec.get(node) == Some(OPEN_PAREN),
             "Node handle is invalid"
@@ -734,7 +802,7 @@ impl<const BLOCK_SIZE: usize> SubtreeSize for BpTree<BLOCK_SIZE> {
     }
 }
 
-impl<const BLOCK_SIZE: usize> IntoIterator for BpTree<BLOCK_SIZE> {
+impl<const BLOCK_SIZE: u64> IntoIterator for BpTree<BLOCK_SIZE> {
     type Item = <BpTree<BLOCK_SIZE> as Tree>::NodeHandle;
     type IntoIter = SelectIntoIter<false>;
 
@@ -743,19 +811,19 @@ impl<const BLOCK_SIZE: usize> IntoIterator for BpTree<BLOCK_SIZE> {
     }
 }
 
-impl<const BLOCK_SIZE: usize> From<BitVec> for BpTree<BLOCK_SIZE> {
+impl<const BLOCK_SIZE: u64> From<BitVec> for BpTree<BLOCK_SIZE> {
     fn from(bv: BitVec) -> Self {
         Self::from_bit_vector(bv)
     }
 }
 
-impl<const BLOCK_SIZE: usize> From<BpTree<BLOCK_SIZE>> for BitVec {
+impl<const BLOCK_SIZE: u64> From<BpTree<BLOCK_SIZE>> for BitVec {
     fn from(value: BpTree<BLOCK_SIZE>) -> Self {
         value.into_parentheses_vec().into_bit_vec()
     }
 }
 
-impl<const BLOCK_SIZE: usize> From<BpTree<BLOCK_SIZE>> for RsVec {
+impl<const BLOCK_SIZE: u64> From<BpTree<BLOCK_SIZE>> for RsVec {
     fn from(value: BpTree<BLOCK_SIZE>) -> Self {
         value.into_parentheses_vec()
     }
@@ -764,13 +832,13 @@ impl<const BLOCK_SIZE: usize> From<BpTree<BLOCK_SIZE>> for RsVec {
 /// An iterator over the children of a node.
 /// Calls to `next` return the next child node handle in the order they appear in the parenthesis
 /// expression.
-struct ChildrenIter<'a, const BLOCK_SIZE: usize, const FORWARD: bool> {
+struct ChildrenIter<'a, const BLOCK_SIZE: u64, const FORWARD: bool> {
     tree: &'a BpTree<BLOCK_SIZE>,
-    current_sibling: Option<usize>,
+    current_sibling: Option<u64>,
 }
 
-impl<'a, const BLOCK_SIZE: usize, const FORWARD: bool> ChildrenIter<'a, BLOCK_SIZE, FORWARD> {
-    fn new(tree: &'a BpTree<BLOCK_SIZE>, node: usize) -> Self {
+impl<'a, const BLOCK_SIZE: u64, const FORWARD: bool> ChildrenIter<'a, BLOCK_SIZE, FORWARD> {
+    fn new(tree: &'a BpTree<BLOCK_SIZE>, node: u64) -> Self {
         Self {
             tree,
             current_sibling: if FORWARD {
@@ -782,10 +850,10 @@ impl<'a, const BLOCK_SIZE: usize, const FORWARD: bool> ChildrenIter<'a, BLOCK_SI
     }
 }
 
-impl<const BLOCK_SIZE: usize, const FORWARD: bool> Iterator
+impl<const BLOCK_SIZE: u64, const FORWARD: bool> Iterator
     for ChildrenIter<'_, BLOCK_SIZE, FORWARD>
 {
-    type Item = usize;
+    type Item = u64;
 
     fn next(&mut self) -> Option<Self::Item> {
         let current = self.current_sibling?;
@@ -799,7 +867,7 @@ impl<const BLOCK_SIZE: usize, const FORWARD: bool> Iterator
     }
 }
 
-impl<const BLOCK_SIZE: usize, const FORWARD: bool> FusedIterator
+impl<const BLOCK_SIZE: u64, const FORWARD: bool> FusedIterator
     for ChildrenIter<'_, BLOCK_SIZE, FORWARD>
 {
 }
diff --git a/src/trees/bp/tests.rs b/src/trees/bp/tests.rs
index a7da6b2..4d9acdb 100644
--- a/src/trees/bp/tests.rs
+++ b/src/trees/bp/tests.rs
@@ -144,22 +144,22 @@ fn test_lookup_extreme_pop() {
     let tree = BpTree::<512>::from_bit_vector(bv);
 
     for excess in 1..64 {
-        assert_eq!(tree.fwd_search(0, excess), Some(excess as usize));
+        assert_eq!(tree.fwd_search(0, excess), Some(excess as u64));
     }
 
     let bv = BitVec::from_bits(&[0; 64]);
     let tree = BpTree::<512>::from_bit_vector(bv);
 
     for excess in 1..64 {
-        assert_eq!(tree.fwd_search(0, -excess), Some(excess as usize));
+        assert_eq!(tree.fwd_search(0, -excess), Some(excess as u64));
     }
 }
 
 #[test]
 fn test_fwd_fuzzy() {
     // we're fuzzing forward search a bit
-    const L: usize = 1000;
-    const L_BITS: usize = L * size_of::<u64>() * 8;
+    const L: u64 = 1000;
+    const L_BITS: u64 = L * size_of::<u64>() as u64 * 8;
 
     // we generate a vector using a seeded random generator and check that every query works as expected
     let mut rng = StdRng::from_seed([0; 32]);
@@ -170,7 +170,7 @@ fn test_fwd_fuzzy() {
     }
 
     // pre-calculate all absolute excess values
-    let mut excess_values = vec![0i16; L_BITS];
+    let mut excess_values = vec![0i16; L_BITS as usize];
     let mut excess = 0;
     for (idx, bit) in bit_vec.iter().enumerate() {
         if bit == 1 {
@@ -188,10 +188,10 @@ fn test_fwd_fuzzy() {
     for relative_excess in [-3, -2, -1, 0, 1, 2, 3] {
         for node_handle in bp.vec.iter1() {
             let absolute_excess = bp.excess(node_handle) + relative_excess;
-            let expected = excess_values[node_handle + 1..]
+            let expected = excess_values[(node_handle + 1) as usize..]
                 .iter()
                 .position(|&excess| excess as i64 == absolute_excess)
-                .map(|i| i + node_handle + 1);
+                .map(|i| i as u64 + node_handle + 1);
             let actual = bp.fwd_search(node_handle, relative_excess);
             assert_eq!(
                 expected,
@@ -320,8 +320,8 @@ fn test_bwd_block_traversal() {
 #[test]
 fn test_bwd_fuzzy() {
     // we're fuzzing forward search a bit
-    const L: usize = 1000;
-    const L_BITS: usize = L * size_of::<u64>() * 8;
+    const L: u64 = 1000;
+    const L_BITS: u64 = L * size_of::<u64>() as u64 * 8;
 
     // we generate a vector using a seeded random generator and check that every query works as expected
     let mut rng = StdRng::from_seed([0; 32]);
@@ -332,7 +332,7 @@ fn test_bwd_fuzzy() {
     }
 
     // pre-calculate all absolute excess values
-    let mut excess_values = vec![0i16; L_BITS + 1];
+    let mut excess_values = vec![0i16; (L_BITS + 1) as usize];
     let mut excess = 0;
     for (idx, bit) in bit_vec.iter().enumerate() {
         if bit == 1 {
@@ -354,9 +354,10 @@ fn test_bwd_fuzzy() {
             } else {
                 bp.excess(node_handle - 1) + relative_excess
             };
-            let expected = excess_values[..node_handle]
+            let expected = excess_values[..node_handle as usize]
                 .iter()
-                .rposition(|&excess| excess as i64 == absolute_excess);
+                .rposition(|&excess| excess as i64 == absolute_excess)
+                .map(|idx| idx as u64);
 
             let actual = bp.bwd_search(node_handle, relative_excess);
             assert_eq!(
@@ -443,13 +444,13 @@ fn test_parent() {
     for (idx, bit) in bv.iter().enumerate() {
         if bit == 1 {
             assert_eq!(
-                tree.parent(idx),
+                tree.parent(idx as u64),
                 head,
                 "parent of node {} is incorrect",
                 idx
             );
             stack.push(head);
-            head = Some(idx);
+            head = Some(idx as u64);
         } else {
             head = stack.pop().expect("stack underflow despite balanced tree");
         }
@@ -496,8 +497,8 @@ fn test_contiguous_index() {
     let rs: RsVec = bv.into();
 
     for (rank, index_in_bv) in rs.iter1().enumerate() {
-        assert_eq!(tree.node_index(index_in_bv), rank);
-        assert_eq!(tree.node_handle(rank), index_in_bv);
+        assert_eq!(tree.node_index(index_in_bv), rank as u64);
+        assert_eq!(tree.node_handle(rank as u64), index_in_bv);
     }
 }
 
@@ -535,7 +536,7 @@ fn test_is_leaf() {
     for (idx, is_leaf) in leaves.iter().enumerate() {
         // if the bit is 1, check if that node is a leaf. If it's 0, it's not a valid node handle.
         if bits[idx] == 1 {
-            assert_eq!(tree.is_leaf(idx), *is_leaf);
+            assert_eq!(tree.is_leaf(idx as u64), *is_leaf);
         }
     }
 }
@@ -756,8 +757,8 @@ fn fuzz_tree_navigation() {
     // fuzzing the tree navigation operations on an unbalanced tree
     // because those are easier to generate uniformly.
 
-    const L: usize = 1 << 14;
-    const L_BITS: usize = L * size_of::<u64>() * 8;
+    const L: u64 = 1 << 14;
+    const L_BITS: u64 = L * size_of::<u64>() as u64 * 8;
 
     // we generate a vector using a seeded random generator and check that every query works as expected
     let mut rng = StdRng::from_seed([0; 32]);
@@ -778,6 +779,7 @@ fn fuzz_tree_navigation() {
     let mut sibling_count_stack = Vec::new();
 
     tree.vec.iter().enumerate().for_each(|(idx, bit)| {
+        let idx = idx as u64;
         if bit == OPEN_PAREN {
             assert_eq!(tree.parent(idx), parent_stack.last().copied());
             assert_eq!(
diff --git a/src/trees/mmt.rs b/src/trees/mmt.rs
index e66aa09..4817fb5 100644
--- a/src/trees/mmt.rs
+++ b/src/trees/mmt.rs
@@ -41,12 +41,16 @@ pub(crate) struct MinMaxTree {
 }
 
 impl MinMaxTree {
-    pub(crate) fn excess_tree(bit_vec: &BitVec, block_size: usize) -> Self {
+    pub(crate) fn excess_tree(bit_vec: &BitVec, block_size: u64) -> Self {
         if bit_vec.is_empty() {
             return Self::default();
         }
 
-        let num_leaves = bit_vec.len().div_ceil(block_size);
+        #[allow(clippy::cast_possible_truncation)] // safe due to the division
+        let num_leaves = bit_vec.len().div_ceil(block_size) as usize;
+        #[allow(clippy::cast_possible_truncation)] // only happens if available memory already exceeded
+        #[allow(clippy::cast_sign_loss)]
+        #[allow(clippy::cast_precision_loss)]
         let num_internal_nodes = max(1, (1 << (num_leaves as f64).log2().ceil() as usize) - 1);
 
         let mut nodes = vec![ExcessNode::default(); num_leaves + num_internal_nodes];
@@ -56,8 +60,9 @@ impl MinMaxTree {
 
         // bottom up construction
         for i in 0..bit_vec.len() {
+            #[allow(clippy::cast_possible_truncation)] // safe due to the division
             if i > 0 && i % block_size == 0 {
-                nodes[num_internal_nodes + i / block_size - 1] = ExcessNode {
+                nodes[num_internal_nodes + (i / block_size) as usize - 1] = ExcessNode {
                     total: total_excess,
                     min: min_excess,
                     max: max_excess,
@@ -170,7 +175,7 @@ impl MinMaxTree {
     /// Get the index of the left sibling of the node at `index` if it exists
     #[allow(clippy::unused_self)] // self is used for consistency with other methods
     pub(crate) fn left_sibling(&self, index: NonZeroUsize) -> Option<NonZeroUsize> {
-        if index.get() % 2 == 0 {
+        if index.get().is_multiple_of(2) {
             // index is at least 2
             NonZeroUsize::new(index.get() - 1)
         } else {
diff --git a/src/trees/mod.rs b/src/trees/mod.rs
index 3e2f0eb..4faec67 100644
--- a/src/trees/mod.rs
+++ b/src/trees/mod.rs
@@ -41,14 +41,14 @@ pub trait Tree {
 
     /// Convert a node handle into a contiguous index, allowing associated data to be stored in a vector.
     /// If `node` is not a valid node handle, the result is meaningless.
-    fn node_index(&self, node: Self::NodeHandle) -> usize;
+    fn node_index(&self, node: Self::NodeHandle) -> u64;
 
     /// Convert a contiguous index that enumerates all nodes into a node handle.
     /// This operation is the inverse of `node_index`.
     /// The index must be in the range `0..self.size()`.
     ///
     /// If the index is out of bounds, the behavior is unspecified.
-    fn node_handle(&self, index: usize) -> Self::NodeHandle;
+    fn node_handle(&self, index: u64) -> Self::NodeHandle;
 
     /// Returns true if the node is a leaf.
     /// If `node` is not a valid node handle, the result is meaningless.
@@ -63,7 +63,7 @@ pub trait Tree {
     fn depth(&self, node: Self::NodeHandle) -> u64;
 
     /// Returns the number of nodes in the tree.
-    fn size(&self) -> usize;
+    fn size(&self) -> u64;
 
     /// Returns true, if the tree has no nodes.
     fn is_empty(&self) -> bool {
@@ -81,7 +81,7 @@ pub trait SubtreeSize: Tree {
     ///
     /// Returns `None` if the `node` has no closing parenthesis (in an unbalanced parenthesis
     /// expression).
-    fn subtree_size(&self, node: Self::NodeHandle) -> Option<usize>;
+    fn subtree_size(&self, node: Self::NodeHandle) -> Option<u64>;
 }
 
 /// A trait for succinct tree data structures that support [`is_ancestor`] queries.
@@ -122,6 +122,10 @@ pub trait LevelTree: Tree {
 ///
 /// Once the full tree has been visited, the caller must call [`build`] to create an instance of the
 /// implementing tree type.
+///
+/// [`enter_node`]: TreeBuilder::enter_node
+/// [`leave_node`]: TreeBuilder::leave_node
+/// [`build`]: TreeBuilder::build
 pub trait TreeBuilder {
     /// The tree type constructed with this interface
     type Tree;
@@ -139,5 +143,8 @@ pub trait TreeBuilder {
     /// (i.e. there are nodes for which [`leave_node`] has not been called,
     /// or there are more calls to `leave_node` than to [`enter_node`];
     /// the number of extraneous calls to `enter_node` is returned in the error).
+    ///
+    /// [`leave_node`]: Self::leave_node
+    /// [`enter_node`]: Self::enter_node
     fn build(self) -> Result<Self::Tree, i64>;
 }
diff --git a/src/util/elias_fano_iter.rs b/src/util/elias_fano_iter.rs
index 646d3f4..029faed 100644
--- a/src/util/elias_fano_iter.rs
+++ b/src/util/elias_fano_iter.rs
@@ -12,14 +12,17 @@ macro_rules! gen_ef_iter_impl {
                     return Ok(());
                 }
 
-                if Some(self.index + n - 1) > self.back_index {
+                if Some(self.index + n as u64 - 1) > self.back_index {
                     if Some(self.index) > self.back_index {
                         Err(std::num::NonZeroUsize::new(n).unwrap())
                     } else {
-                        Err(std::num::NonZeroUsize::new(n - (self.back_index.as_ref().unwrap_or(&usize::MAX).wrapping_sub(self.index).wrapping_add(1))).unwrap())
+                        // the following is limited in size by n, and `back_index` is `None` only if the vector is
+                        // empty, so a truncation is impossible
+                        #[allow(clippy::cast_possible_truncation)]
+                        Err(std::num::NonZeroUsize::new(n - (self.back_index.as_ref().unwrap_or(&u64::MAX).wrapping_sub(self.index).wrapping_add(1)) as usize).unwrap())
                     }
                 } else {
-                    self.index += n;
+                    self.index += n as u64;
                     if n > 0 {
                         // since advance_by is not stable yet, we need to call nth - 1.
                         self.upper_iter.nth(n - 1).expect("upper iterator should not be exhausted");
@@ -46,10 +49,12 @@ macro_rules! gen_ef_iter_impl {
 
                 // since the cursors point to unconsumed items, we need to add 1
                 let remaining = *self.back_index.as_ref().unwrap() - self.index + 1;
-                if remaining < n {
-                    return Err(std::num::NonZeroUsize::new(n - remaining).unwrap());
+                if remaining < n as u64 {
+                    // the following is limited in size by n, so a truncation is impossible
+                    #[allow(clippy::cast_possible_truncation)]
+                    return Err(std::num::NonZeroUsize::new(n - remaining as usize).unwrap());
                 }
-                self.back_index = if self.back_index >= Some(n) { self.back_index.map(|b| b - n) } else { None };
+                self.back_index = if self.back_index >= Some(n as u64) { self.back_index.map(|b| b - n as u64) } else { None };
                 if n > 0 {
                     // since advance_by is not stable yet, we need to call nth - 1.
                     self.upper_iter.nth_back(n - 1).expect("upper iterator should not be exhausted");
@@ -87,6 +92,10 @@ macro_rules! gen_ef_iter_impl {
 
             /// Returns the exact number of elements that this iterator would iterate over. Does not
             /// call `next` internally.
+            ///
+            /// # Panics
+            /// If the vector contains more than `usize::MAX` elements, calling `count()` on the iterator will
+            /// cause it to panic.
             fn count(self) -> usize
             where
                 Self: Sized,
@@ -139,9 +148,17 @@ macro_rules! gen_ef_iter_impl {
         }
 
         impl $(<$life>)? std::iter::ExactSizeIterator for $name $(<$life>)? {
+            // the check and panic guarantees panic on truncation
+            #[allow(clippy::cast_possible_truncation)]
             fn len(&self) -> usize {
+                // this check is hopefully eliminated on 64-bit architectures
+                if (*self.back_index.as_ref().unwrap_or(&u64::MAX)).wrapping_sub(self.index).wrapping_add(1)
+                    > usize::MAX as u64 {
+                    panic!("calling len() on an iterator containing more than usize::MAX elements is forbidden");
+                }
+
                 // intentionally overflowing calculations to avoid branches on empty iterator
-                (*self.back_index.as_ref().unwrap_or(&usize::MAX)).wrapping_sub(self.index).wrapping_add(1)
+                (*self.back_index.as_ref().unwrap_or(&u64::MAX)).wrapping_sub(self.index).wrapping_add(1) as usize
             }
         }
 
@@ -182,11 +199,11 @@ macro_rules! impl_ef_iterator {
         pub struct $own {
             upper_iter: crate::bit_vec::fast_rs_vec::SelectIntoIter<false>,
             vec: crate::bit_vec::BitVec,
-            index: usize,
+            index: u64,
             // back index is none, iff it points to element -1 (i.e. element 0 has been consumed by
             // a call to next_back()). It can be Some(..) even if the iterator is empty
-            back_index: Option<usize>,
-            lower_len: usize,
+            back_index: Option<u64>,
+            lower_len: u64,
             universe_zero: u64,
         }
 
@@ -218,6 +235,7 @@ macro_rules! impl_ef_iterator {
 
         impl EliasFanoVec {
             #[doc = concat!("Returns an iterator over the elements of `", stringify!($type), "`.")]
+            #[doc = "Note, if the iterator length exceeds `usize::MAX`, calling `len()` on it will panic ."]
             #[must_use]
             pub fn iter(&self) -> $bor<'_> {
                 $bor::new(self)
@@ -230,11 +248,11 @@ macro_rules! impl_ef_iterator {
         pub struct $bor<'a> {
             upper_iter: crate::bit_vec::fast_rs_vec::SelectIter<'a, false>,
             vec: &'a crate::bit_vec::BitVec,
-            index: usize,
+            index: u64,
             // back index is none, iff it points to element -1 (i.e. element 0 has been consumed by
             // a call to next_back()). It can be Some(..) even if the iterator is empty
-            back_index: Option<usize>,
-            lower_len: usize,
+            back_index: Option<u64>,
+            lower_len: u64,
             universe_zero: u64,
         }
 
diff --git a/src/util/general_iter.rs b/src/util/general_iter.rs
index afc4c73..37dca0c 100644
--- a/src/util/general_iter.rs
+++ b/src/util/general_iter.rs
@@ -32,14 +32,17 @@ macro_rules! gen_vector_iter_impl {
                     return Ok(());
                 }
 
-                if Some(self.index + n - 1) > self.back_index {
+                if Some(self.index + n as u64 - 1) > self.back_index {
                     if Some(self.index) > self.back_index {
                         Err(std::num::NonZeroUsize::new(n).unwrap())
                     } else {
-                        Err(std::num::NonZeroUsize::new(n - (self.back_index.as_ref().unwrap_or(&usize::MAX).wrapping_sub(self.index).wrapping_add(1))).unwrap())
+                        // the following is limited in size by n, and `back_index` is `None` only if the vector is
+                        // empty, so a truncation is impossible
+                        #[allow(clippy::cast_possible_truncation)]
+                        Err(std::num::NonZeroUsize::new(n - (self.back_index.as_ref().unwrap_or(&u64::MAX).wrapping_sub(self.index).wrapping_add(1)) as usize).unwrap())
                     }
                 } else {
-                    self.index += n;
+                    self.index += n as u64;
                     Ok(())
                 }
             }
@@ -62,10 +65,12 @@ macro_rules! gen_vector_iter_impl {
 
                 // since the cursors point to unconsumed items, we need to add 1
                 let remaining = *self.back_index.as_ref().unwrap() - self.index + 1;
-                if remaining < n {
-                    return Err(std::num::NonZeroUsize::new(n - remaining).unwrap());
+                if remaining < n as u64 {
+                    // the following is limited in size by n, so a truncation is impossible
+                    #[allow(clippy::cast_possible_truncation)]
+                    return Err(std::num::NonZeroUsize::new(n - remaining as usize).unwrap());
                 }
-                self.back_index = if self.back_index >= Some(n) { self.back_index.map(|b| b - n) } else { None };
+                self.back_index = if self.back_index >= Some(n as u64) { self.back_index.map(|b| b - n as u64) } else { None };
                 Ok(())
             }
 
@@ -96,6 +101,10 @@ macro_rules! gen_vector_iter_impl {
 
             /// Returns the exact number of elements that this iterator would iterate over. Does not
             /// call `next` internally.
+            ///
+            /// # Panics
+            /// If the vector contains more than `usize::MAX` elements, calling `count()` on the iterator will
+            /// cause it to panic.
             fn count(self) -> usize
             where
                 Self: Sized,
@@ -124,9 +133,17 @@ macro_rules! gen_vector_iter_impl {
         }
 
         impl $(<$life>)? std::iter::ExactSizeIterator for $name $(<$life>)? {
+            // the check and panic guarantees panic on truncation
+            #[allow(clippy::cast_possible_truncation)]
             fn len(&self) -> usize {
+                // this check is hopefully eliminated on 64-bit architectures
+                if (self.back_index.as_ref().unwrap_or(&u64::MAX)).wrapping_sub(self.index).wrapping_add(1)
+                    > usize::MAX as u64 {
+                    panic!("calling len() on an iterator containing more than usize::MAX elements is forbidden");
+                }
+
                 // intentionally overflowing calculations to avoid branches on empty iterator
-                (*self.back_index.as_ref().unwrap_or(&usize::MAX)).wrapping_sub(self.index).wrapping_add(1)
+                (*self.back_index.as_ref().unwrap_or(&u64::MAX)).wrapping_sub(self.index).wrapping_add(1) as usize
             }
         }
 
@@ -236,20 +253,20 @@ macro_rules! impl_vector_iterator {
         #[derive(Clone, Debug)]
         pub struct $own {
             vec: $type,
-            index: usize,
+            index: u64,
             // back index is none, iff it points to element -1 (i.e. element 0 has been consumed by
             // a call to next_back()). It can be Some(..) even if the iterator is empty
-            back_index: Option<usize>,
+            back_index: Option<u64>,
         }
 
         #[doc = concat!("A borrowing iterator for `", stringify!($type), "`.")]
         #[derive(Clone, Debug)]
         pub struct $bor<'a> {
             vec: &'a $type,
-            index: usize,
+            index: u64,
             // back index is none, iff it points to element -1 (i.e. element 0 has been consumed by
             // a call to next_back()). It can be Some(..) even if the iterator is empty
-            back_index: Option<usize>,
+            back_index: Option<u64>,
         }
 
         crate::util::gen_vector_iter_impl!($own, $type, $return_type, $get_unchecked, $get);
@@ -262,6 +279,8 @@ macro_rules! impl_vector_iterator {
         impl $type {
             #[doc = concat!("Returns an iterator over the elements of `", stringify!($type), "`.")]
             #[doc = concat!("The iterator returns `", stringify!($return_type), "` elements.")]
+            #[doc = "Note, if the iterator element type is larger than usize, calling `len()` on the \
+            iterator will panic if the iterator length exceeds `usize::MAX`."]
             #[must_use]
             pub fn iter(&self) -> $bor<'_> {
                 $bor::new(self)
diff --git a/src/wavelet/mod.rs b/src/wavelet/mod.rs
index 3d08602..16e13c2 100644
--- a/src/wavelet/mod.rs
+++ b/src/wavelet/mod.rs
@@ -62,6 +62,10 @@ use std::ops::Range;
 /// ```
 ///
 /// [`RsVec`]: RsVec
+/// [`from_bit_vec`]: WaveletMatrix::from_bit_vec
+/// [`from_slice`]: WaveletMatrix::from_slice
+/// [`from_bit_vec_pc`]: WaveletMatrix::from_bit_vec_pc
+/// [`from_slice_pc`]: WaveletMatrix::from_slice_pc
 #[derive(Clone, Debug)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub struct WaveletMatrix {
@@ -77,28 +81,31 @@ impl WaveletMatrix {
     /// - `num_elements`: The number of elements in the sequence.
     /// - `bit_lookup`: A closure that returns the `bit`-th bit of the `element`-th word.
     #[inline(always)] // should get rid of closures in favor of static calls
-    fn permutation_sorting<LOOKUP: Fn(usize, usize) -> u64>(
+    fn permutation_sorting<LOOKUP: Fn(u64, u64) -> u64>(
         bits_per_element: u16,
-        num_elements: usize,
+        num_elements: u64,
         bit_lookup: LOOKUP,
     ) -> Self {
-        let element_len = bits_per_element as usize;
+        let element_len = bits_per_element as u64;
 
-        let mut data = vec![BitVec::from_zeros(num_elements); element_len];
+        #[allow(clippy::cast_possible_truncation)]
+        let mut data = vec![BitVec::from_zeros(num_elements); element_len as usize];
 
         // insert the first bit of each word into the first bit vector
         // for each following level, insert the next bit of each word into the next bit vector
         // sorted stably by the previous bit vector
         let mut permutation = (0..num_elements).collect::<Vec<_>>();
-        let mut next_permutation = vec![0; num_elements];
+        #[allow(clippy::cast_possible_truncation)]
+        let mut next_permutation = vec![0; num_elements as usize];
 
         for (level, data) in data.iter_mut().enumerate() {
+            let level = level as u64;
             let mut total_zeros = 0;
             for (i, p) in permutation.iter().enumerate() {
                 if bit_lookup(*p, element_len - level - 1) == 0 {
                     total_zeros += 1;
                 } else {
-                    data.set(i, 1).unwrap();
+                    data.set(i as u64, 1).unwrap();
                 }
             }
 
@@ -108,7 +115,7 @@ impl WaveletMatrix {
                 let mut zero_boundary = 0;
                 let mut one_boundary = total_zeros;
                 for (i, p) in permutation.iter().enumerate() {
-                    if data.get_unchecked(i) == 0 {
+                    if data.get_unchecked(i as u64) == 0 {
                         next_permutation[zero_boundary] = *p;
                         zero_boundary += 1;
                     } else {
@@ -139,10 +146,10 @@ impl WaveletMatrix {
     /// Panics if the number of bits in the bit vector is not a multiple of the number of bits per element.
     #[must_use]
     pub fn from_bit_vec(bit_vec: &BitVec, bits_per_element: u16) -> Self {
-        assert_eq!(bit_vec.len() % bits_per_element as usize, 0, "The number of bits in the bit vector must be a multiple of the number of bits per element.");
-        let num_elements = bit_vec.len() / bits_per_element as usize;
+        assert_eq!(bit_vec.len() % bits_per_element as u64, 0, "The number of bits in the bit vector must be a multiple of the number of bits per element.");
+        let num_elements = bit_vec.len() / bits_per_element as u64;
         Self::permutation_sorting(bits_per_element, num_elements, |element, bit| {
-            bit_vec.get_unchecked(element * bits_per_element as usize + bit)
+            bit_vec.get_unchecked(element * bits_per_element as u64 + bit)
         })
     }
 
@@ -161,8 +168,10 @@ impl WaveletMatrix {
             bits_per_element <= 64,
             "The number of bits per element cannot exceed 64."
         );
-        Self::permutation_sorting(bits_per_element, sequence.len(), |element, bit| {
-            (sequence[element] >> bit) & 1
+        #[allow(clippy::cast_possible_truncation)]
+        // safe because the closure is only called with indices of `sequence`
+        Self::permutation_sorting(bits_per_element, sequence.len() as u64, |element, bit| {
+            (sequence[element as usize] >> bit) & 1
         })
     }
 
@@ -176,17 +185,19 @@ impl WaveletMatrix {
     /// - `bit_lookup`: A closure that returns the `bit`-th bit of the `element`-th word.
     /// - `element_lookup`: A closure that returns the `element`-th word.
     #[inline(always)] // should get rid of closures in favor of static calls
-    fn prefix_counting<LOOKUP: Fn(usize, usize) -> u64, ELEMENT: Fn(usize) -> u64>(
+    fn prefix_counting<LOOKUP: Fn(u64, u64) -> u64, ELEMENT: Fn(u64) -> u64>(
         bits_per_element: u16,
-        num_elements: usize,
+        num_elements: u64,
         bit_lookup: LOOKUP,
         element_lookup: ELEMENT,
     ) -> Self {
-        let element_len = bits_per_element as usize;
-        let mut histogram = vec![0usize; 1 << bits_per_element];
-        let mut borders = vec![0usize; 1 << bits_per_element];
-        let mut data = vec![BitVec::from_zeros(num_elements); element_len];
+        let element_len = bits_per_element as u64;
+        let mut histogram = vec![0u64; 1 << bits_per_element];
+        let mut borders = vec![0u64; 1 << bits_per_element];
+        #[allow(clippy::cast_possible_truncation)]
+        let mut data = vec![BitVec::from_zeros(num_elements); element_len as usize];
 
+        #[allow(clippy::cast_possible_truncation)] // element_lookup only returns small values
         for i in 0..num_elements {
             histogram[element_lookup(i) as usize] += 1;
             data[0].set_unchecked(i, bit_lookup(i, element_len - 1));
@@ -207,9 +218,10 @@ impl WaveletMatrix {
                     borders[h_minus_1] + histogram[h_minus_1];
             }
 
+            #[allow(clippy::cast_possible_truncation)] // element_lookup only returns small values
             for i in 0..num_elements {
                 let bit = bit_lookup(i, element_len - level - 1);
-                data[level].set_unchecked(
+                data[level as usize].set_unchecked(
                     borders[element_lookup(i) as usize >> (element_len - level)],
                     bit,
                 );
@@ -242,21 +254,19 @@ impl WaveletMatrix {
     /// [`from_slice`]: WaveletMatrix::from_slice
     #[must_use]
     pub fn from_bit_vec_pc(bit_vec: &BitVec, bits_per_element: u16) -> Self {
-        assert_eq!(bit_vec.len() % bits_per_element as usize, 0, "The number of bits in the bit vector must be a multiple of the number of bits per element.");
+        assert_eq!(bit_vec.len() % bits_per_element as u64, 0, "The number of bits in the bit vector must be a multiple of the number of bits per element.");
         assert!(
             bits_per_element <= 64,
             "The number of bits per element cannot exceed 64."
         );
-        let num_elements = bit_vec.len() / bits_per_element as usize;
+        let num_elements = bit_vec.len() / bits_per_element as u64;
         Self::prefix_counting(
             bits_per_element,
             num_elements,
-            |element, bit| bit_vec.get_unchecked(element * bits_per_element as usize + bit),
+            |element, bit| bit_vec.get_unchecked(element * bits_per_element as u64 + bit),
             |element| {
-                bit_vec.get_bits_unchecked(
-                    element * bits_per_element as usize,
-                    bits_per_element as usize,
-                )
+                bit_vec
+                    .get_bits_unchecked(element * bits_per_element as u64, bits_per_element as u64)
             },
         )
     }
@@ -282,18 +292,20 @@ impl WaveletMatrix {
             bits_per_element <= 64,
             "The number of bits per element cannot exceed 64."
         );
+        #[allow(clippy::cast_possible_truncation)]
+        // safe because the closures are called only with indices of `sequence`
         Self::prefix_counting(
             bits_per_element,
-            sequence.len(),
-            |element, bit| (sequence[element] >> bit) & 1,
-            |element| sequence[element],
+            sequence.len() as u64,
+            |element, bit| (sequence[element as usize] >> bit) & 1,
+            |element| sequence[element as usize],
         )
     }
 
     /// Generic function to read a value from the wavelet matrix and consume it with a closure.
     /// The function is used by the `get_value` and `get_u64` functions, deduplicating code.
     #[inline(always)]
-    fn reconstruct_value_unchecked<F: FnMut(u64)>(&self, mut i: usize, mut target_func: F) {
+    fn reconstruct_value_unchecked<F: FnMut(u64)>(&self, mut i: u64, mut target_func: F) {
         for level in 0..self.bits_per_element() {
             let bit = self.data[level].get_unchecked(i);
             target_func(bit);
@@ -323,7 +335,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.get_value(100), None);
     /// ```
     #[must_use]
-    pub fn get_value(&self, i: usize) -> Option<BitVec> {
+    pub fn get_value(&self, i: u64) -> Option<BitVec> {
         if self.data.is_empty() || i >= self.data[0].len() {
             None
         } else {
@@ -342,11 +354,11 @@ impl WaveletMatrix {
     ///
     /// [`get_value`]: WaveletMatrix::get_value
     #[must_use]
-    pub fn get_value_unchecked(&self, i: usize) -> BitVec {
-        let mut value = BitVec::from_zeros(self.bits_per_element());
+    pub fn get_value_unchecked(&self, i: u64) -> BitVec {
+        let mut value = BitVec::from_zeros(self.bits_per_element() as u64);
         let mut level = self.bits_per_element() - 1;
         self.reconstruct_value_unchecked(i, |bit| {
-            value.set_unchecked(level, bit);
+            value.set_unchecked(level as u64, bit);
             level = level.saturating_sub(1);
         });
         value
@@ -369,7 +381,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.get_u64(100), None);
     /// ```
     #[must_use]
-    pub fn get_u64(&self, i: usize) -> Option<u64> {
+    pub fn get_u64(&self, i: u64) -> Option<u64> {
         if self.bits_per_element() > 64 || self.data.is_empty() || i >= self.data[0].len() {
             None
         } else {
@@ -388,7 +400,7 @@ impl WaveletMatrix {
     ///
     /// [`get_u64`]: WaveletMatrix::get_u64
     #[must_use]
-    pub fn get_u64_unchecked(&self, i: usize) -> u64 {
+    pub fn get_u64_unchecked(&self, i: u64) -> u64 {
         let mut value = 0;
         self.reconstruct_value_unchecked(i, |bit| {
             value <<= 1;
@@ -414,9 +426,9 @@ impl WaveletMatrix {
     /// [`BitVec`]: BitVec
     /// [`rank_range`]: WaveletMatrix::rank_range
     #[must_use]
-    pub fn rank_range_unchecked(&self, mut range: Range<usize>, symbol: &BitVec) -> usize {
+    pub fn rank_range_unchecked(&self, mut range: Range<u64>, symbol: &BitVec) -> u64 {
         for (level, data) in self.data.iter().enumerate() {
-            if symbol.get_unchecked((self.bits_per_element() - 1) - level) == 0 {
+            if symbol.get_unchecked(((self.bits_per_element() - 1) - level) as u64) == 0 {
                 range.start = data.rank0(range.start);
                 range.end = data.rank0(range.end);
             } else {
@@ -450,10 +462,10 @@ impl WaveletMatrix {
     ///
     /// [`BitVec`]: BitVec
     #[must_use]
-    pub fn rank_range(&self, range: Range<usize>, symbol: &BitVec) -> Option<usize> {
+    pub fn rank_range(&self, range: Range<u64>, symbol: &BitVec) -> Option<u64> {
         if range.start >= self.len()
             || range.end > self.len()
-            || symbol.len() != self.bits_per_element()
+            || symbol.len() != self.bits_per_element() as u64
         {
             None
         } else {
@@ -478,7 +490,7 @@ impl WaveletMatrix {
     ///
     /// [`rank_range_u64`]: WaveletMatrix::rank_range_u64
     #[must_use]
-    pub fn rank_range_u64_unchecked(&self, mut range: Range<usize>, symbol: u64) -> usize {
+    pub fn rank_range_u64_unchecked(&self, mut range: Range<u64>, symbol: u64) -> u64 {
         for (level, data) in self.data.iter().enumerate() {
             if (symbol >> ((self.bits_per_element() - 1) - level)) & 1 == 0 {
                 range.start = data.rank0(range.start);
@@ -512,7 +524,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.rank_range_u64(2..4, 4), Some(1));
     /// ```
     #[must_use]
-    pub fn rank_range_u64(&self, range: Range<usize>, symbol: u64) -> Option<usize> {
+    pub fn rank_range_u64(&self, range: Range<u64>, symbol: u64) -> Option<u64> {
         if range.start >= self.len() || range.end > self.len() || self.bits_per_element() > 64 {
             None
         } else {
@@ -543,7 +555,7 @@ impl WaveletMatrix {
     /// [`BitVec`]: BitVec
     /// [`rank_offset`]: WaveletMatrix::rank_offset
     #[must_use]
-    pub fn rank_offset_unchecked(&self, offset: usize, i: usize, symbol: &BitVec) -> usize {
+    pub fn rank_offset_unchecked(&self, offset: u64, i: u64, symbol: &BitVec) -> u64 {
         self.rank_range_unchecked(offset..i, symbol)
     }
 
@@ -577,11 +589,11 @@ impl WaveletMatrix {
     ///
     /// [`BitVec`]: BitVec
     #[must_use]
-    pub fn rank_offset(&self, offset: usize, i: usize, symbol: &BitVec) -> Option<usize> {
+    pub fn rank_offset(&self, offset: u64, i: u64, symbol: &BitVec) -> Option<u64> {
         if offset > i
             || offset >= self.len()
             || i > self.len()
-            || symbol.len() != self.bits_per_element()
+            || symbol.len() != self.bits_per_element() as u64
         {
             None
         } else {
@@ -610,7 +622,7 @@ impl WaveletMatrix {
     ///
     /// [`rank_offset_u64`]: WaveletMatrix::rank_offset_u64
     #[must_use]
-    pub fn rank_offset_u64_unchecked(&self, offset: usize, i: usize, symbol: u64) -> usize {
+    pub fn rank_offset_u64_unchecked(&self, offset: u64, i: u64, symbol: u64) -> u64 {
         self.rank_range_u64_unchecked(offset..i, symbol)
     }
 
@@ -640,7 +652,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.rank_offset_u64(2, 4, 4), Some(1));
     /// ```
     #[must_use]
-    pub fn rank_offset_u64(&self, offset: usize, i: usize, symbol: u64) -> Option<usize> {
+    pub fn rank_offset_u64(&self, offset: u64, i: u64, symbol: u64) -> Option<u64> {
         if offset > i || offset >= self.len() || i > self.len() || self.bits_per_element() > 64 {
             None
         } else {
@@ -666,7 +678,7 @@ impl WaveletMatrix {
     /// [`BitVec`]: BitVec
     /// [`rank`]: WaveletMatrix::rank
     #[must_use]
-    pub fn rank_unchecked(&self, i: usize, symbol: &BitVec) -> usize {
+    pub fn rank_unchecked(&self, i: u64, symbol: &BitVec) -> u64 {
         self.rank_range_unchecked(0..i, symbol)
     }
 
@@ -693,8 +705,8 @@ impl WaveletMatrix {
     ///
     /// [`BitVec`]: BitVec
     #[must_use]
-    pub fn rank(&self, i: usize, symbol: &BitVec) -> Option<usize> {
-        if i > self.len() || symbol.len() != self.bits_per_element() {
+    pub fn rank(&self, i: u64, symbol: &BitVec) -> Option<u64> {
+        if i > self.len() || symbol.len() != self.bits_per_element() as u64 {
             None
         } else {
             Some(self.rank_range_unchecked(0..i, symbol))
@@ -717,7 +729,7 @@ impl WaveletMatrix {
     ///
     /// [`rank_u64`]: WaveletMatrix::rank_u64
     #[must_use]
-    pub fn rank_u64_unchecked(&self, i: usize, symbol: u64) -> usize {
+    pub fn rank_u64_unchecked(&self, i: u64, symbol: u64) -> u64 {
         self.rank_range_u64_unchecked(0..i, symbol)
     }
 
@@ -741,7 +753,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.rank_u64(3, 1), Some(1));
     /// ```
     #[must_use]
-    pub fn rank_u64(&self, i: usize, symbol: u64) -> Option<usize> {
+    pub fn rank_u64(&self, i: u64, symbol: u64) -> Option<u64> {
         if i > self.len() || self.bits_per_element() > 64 {
             None
         } else {
@@ -770,11 +782,11 @@ impl WaveletMatrix {
     /// [`BitVec`]: BitVec
     /// [`select_offset`]: WaveletMatrix::select_offset
     #[must_use]
-    pub fn select_offset_unchecked(&self, offset: usize, rank: usize, symbol: &BitVec) -> usize {
+    pub fn select_offset_unchecked(&self, offset: u64, rank: u64, symbol: &BitVec) -> u64 {
         let mut range_start = offset;
 
         for (level, data) in self.data.iter().enumerate() {
-            if symbol.get_unchecked((self.bits_per_element() - 1) - level) == 0 {
+            if symbol.get_unchecked(((self.bits_per_element() - 1) - level) as u64) == 0 {
                 range_start = data.rank0(range_start);
             } else {
                 range_start = data.rank0 + data.rank1(range_start);
@@ -784,7 +796,7 @@ impl WaveletMatrix {
         let mut range_end = range_start + rank;
 
         for (level, data) in self.data.iter().enumerate().rev() {
-            if symbol.get_unchecked((self.bits_per_element() - 1) - level) == 0 {
+            if symbol.get_unchecked(((self.bits_per_element() - 1) - level) as u64) == 0 {
                 range_end = data.select0(range_end);
             } else {
                 range_end = data.select1(range_end - data.rank0);
@@ -818,8 +830,8 @@ impl WaveletMatrix {
     ///
     /// [`BitVec`]: BitVec
     #[must_use]
-    pub fn select_offset(&self, offset: usize, rank: usize, symbol: &BitVec) -> Option<usize> {
-        if offset >= self.len() || symbol.len() != self.bits_per_element() {
+    pub fn select_offset(&self, offset: u64, rank: u64, symbol: &BitVec) -> Option<u64> {
+        if offset >= self.len() || symbol.len() != self.bits_per_element() as u64 {
             None
         } else {
             let idx = self.select_offset_unchecked(offset, rank, symbol);
@@ -850,7 +862,7 @@ impl WaveletMatrix {
     ///
     /// [`select_offset_u64`]: WaveletMatrix::select_offset_u64
     #[must_use]
-    pub fn select_offset_u64_unchecked(&self, offset: usize, rank: usize, symbol: u64) -> usize {
+    pub fn select_offset_u64_unchecked(&self, offset: u64, rank: u64, symbol: u64) -> u64 {
         let mut range_start = offset;
 
         for (level, data) in self.data.iter().enumerate() {
@@ -895,7 +907,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.select_offset_u64(2, 1, 4), None);
     /// ```
     #[must_use]
-    pub fn select_offset_u64(&self, offset: usize, rank: usize, symbol: u64) -> Option<usize> {
+    pub fn select_offset_u64(&self, offset: u64, rank: u64, symbol: u64) -> Option<u64> {
         if offset >= self.len() || self.bits_per_element() > 64 {
             None
         } else {
@@ -927,7 +939,7 @@ impl WaveletMatrix {
     /// [`BitVec`]: BitVec
     /// [`select`]: WaveletMatrix::select
     #[must_use]
-    pub fn select_unchecked(&self, rank: usize, symbol: &BitVec) -> usize {
+    pub fn select_unchecked(&self, rank: u64, symbol: &BitVec) -> u64 {
         self.select_offset_unchecked(0, rank, symbol)
     }
 
@@ -952,8 +964,8 @@ impl WaveletMatrix {
     ///
     /// [`BitVec`]: BitVec
     #[must_use]
-    pub fn select(&self, rank: usize, symbol: &BitVec) -> Option<usize> {
-        if symbol.len() == self.bits_per_element() {
+    pub fn select(&self, rank: u64, symbol: &BitVec) -> Option<u64> {
+        if symbol.len() == self.bits_per_element() as u64 {
             let idx = self.select_unchecked(rank, symbol);
             if idx < self.len() {
                 Some(idx)
@@ -982,7 +994,7 @@ impl WaveletMatrix {
     ///
     /// [`select_u64`]: WaveletMatrix::select_u64
     #[must_use]
-    pub fn select_u64_unchecked(&self, rank: usize, symbol: u64) -> usize {
+    pub fn select_u64_unchecked(&self, rank: u64, symbol: u64) -> u64 {
         self.select_offset_u64_unchecked(0, rank, symbol)
     }
 
@@ -1004,7 +1016,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.select_u64(1, 4), Some(2));
     /// ```
     #[must_use]
-    pub fn select_u64(&self, rank: usize, symbol: u64) -> Option<usize> {
+    pub fn select_u64(&self, rank: u64, symbol: u64) -> Option<u64> {
         if self.bits_per_element() > 64 {
             None
         } else {
@@ -1032,8 +1044,8 @@ impl WaveletMatrix {
     ///
     /// [`quantile`]: WaveletMatrix::quantile
     #[must_use]
-    pub fn quantile_unchecked(&self, range: Range<usize>, k: usize) -> BitVec {
-        let result = BitVec::from_zeros(self.bits_per_element());
+    pub fn quantile_unchecked(&self, range: Range<u64>, k: u64) -> BitVec {
+        let result = BitVec::from_zeros(self.bits_per_element() as u64);
 
         self.partial_quantile_search_unchecked(range, k, 0, result)
     }
@@ -1046,12 +1058,12 @@ impl WaveletMatrix {
     #[inline(always)]
     fn partial_quantile_search_unchecked(
         &self,
-        mut range: Range<usize>,
-        mut k: usize,
+        mut range: Range<u64>,
+        mut k: u64,
         start_level: usize,
         mut prefix: BitVec,
     ) -> BitVec {
-        debug_assert!(prefix.len() == self.bits_per_element());
+        debug_assert!(prefix.len() == self.bits_per_element() as u64);
         debug_assert!(!range.is_empty());
         debug_assert!(range.end <= self.len());
 
@@ -1067,7 +1079,7 @@ impl WaveletMatrix {
             } else {
                 // the element is among the ones, so we set the bit to 1, and move the range
                 // into the 1-partition of the next level
-                prefix.set_unchecked((self.bits_per_element() - 1) - level, 1);
+                prefix.set_unchecked(((self.bits_per_element() - 1) - level) as u64, 1);
                 k -= zeros;
                 range.start = data.rank0 + (range.start - zeros_start); // range.start - zeros_start is the rank1 of range.start
                 range.end = data.rank0 + (range.end - zeros_end); // same here
@@ -1080,7 +1092,7 @@ impl WaveletMatrix {
     /// Get the `k`-th smallest element in the encoded sequence in the specified `range`,
     /// where `k = 0` returns the smallest element.
     /// The `range` is a half-open interval, meaning that the `end` index is exclusive.
-    /// The `k`-th smallest element is returned as a `BitVec`,
+    /// The `k`-th smallest element is returned as a [`BitVec`],
     /// where the least significant bit is the first element.
     ///
     /// Returns `None` if the `range` is out of bounds, or if `k` is greater than the size of the range.
@@ -1097,7 +1109,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.quantile(1..4, 0), Some(BitVec::pack_sequence_u8(&[1], 3)));
     /// ```
     #[must_use]
-    pub fn quantile(&self, range: Range<usize>, k: usize) -> Option<BitVec> {
+    pub fn quantile(&self, range: Range<u64>, k: u64) -> Option<BitVec> {
         if range.start >= self.len() || range.end > self.len() || k >= range.end - range.start {
             None
         } else {
@@ -1114,8 +1126,10 @@ impl WaveletMatrix {
     ///
     /// # Panics
     /// May panic if the `i` is out of bounds, or returns an empty bit vector.
+    ///
+    /// [`get_sorted`]: Self::get_sorted
     #[must_use]
-    pub fn get_sorted_unchecked(&self, i: usize) -> BitVec {
+    pub fn get_sorted_unchecked(&self, i: u64) -> BitVec {
         self.quantile_unchecked(0..self.len(), i)
     }
 
@@ -1138,7 +1152,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.get_sorted(2), Some(BitVec::pack_sequence_u8(&[2], 3)));
     /// ```
     #[must_use]
-    pub fn get_sorted(&self, i: usize) -> Option<BitVec> {
+    pub fn get_sorted(&self, i: u64) -> Option<BitVec> {
         if i >= self.len() {
             None
         } else {
@@ -1162,7 +1176,7 @@ impl WaveletMatrix {
     ///
     /// [`quantile_u64`]: WaveletMatrix::quantile_u64
     #[must_use]
-    pub fn quantile_u64_unchecked(&self, range: Range<usize>, k: usize) -> u64 {
+    pub fn quantile_u64_unchecked(&self, range: Range<u64>, k: u64) -> u64 {
         self.partial_quantile_search_u64_unchecked(range, k, 0, 0)
     }
 
@@ -1175,8 +1189,8 @@ impl WaveletMatrix {
     #[inline(always)]
     fn partial_quantile_search_u64_unchecked(
         &self,
-        mut range: Range<usize>,
-        mut k: usize,
+        mut range: Range<u64>,
+        mut k: u64,
         start_level: usize,
         mut prefix: u64,
     ) -> u64 {
@@ -1224,7 +1238,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.quantile_u64(1..4, 0), Some(1));
     /// ```
     #[must_use]
-    pub fn quantile_u64(&self, range: Range<usize>, k: usize) -> Option<u64> {
+    pub fn quantile_u64(&self, range: Range<u64>, k: u64) -> Option<u64> {
         if range.start >= self.len()
             || range.end > self.len()
             || self.bits_per_element() > 64
@@ -1249,7 +1263,7 @@ impl WaveletMatrix {
     ///
     /// [`get_sorted_u64`]: WaveletMatrix::get_sorted_u64
     #[must_use]
-    pub fn get_sorted_u64_unchecked(&self, i: usize) -> u64 {
+    pub fn get_sorted_u64_unchecked(&self, i: u64) -> u64 {
         self.quantile_u64_unchecked(0..self.len(), i)
     }
 
@@ -1270,7 +1284,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.get_sorted_u64(2), Some(2));
     /// ```
     #[must_use]
-    pub fn get_sorted_u64(&self, i: usize) -> Option<u64> {
+    pub fn get_sorted_u64(&self, i: u64) -> Option<u64> {
         if i >= self.len() || self.bits_per_element() > 64 {
             None
         } else {
@@ -1291,7 +1305,7 @@ impl WaveletMatrix {
     ///
     /// [`range_min`]: WaveletMatrix::range_min
     #[must_use]
-    pub fn range_min_unchecked(&self, range: Range<usize>) -> BitVec {
+    pub fn range_min_unchecked(&self, range: Range<u64>) -> BitVec {
         self.quantile_unchecked(range, 0)
     }
 
@@ -1313,7 +1327,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.range_min(1..3), Some(BitVec::pack_sequence_u8(&[4], 3)));
     /// ```
     #[must_use]
-    pub fn range_min(&self, range: Range<usize>) -> Option<BitVec> {
+    pub fn range_min(&self, range: Range<u64>) -> Option<BitVec> {
         self.quantile(range, 0)
     }
 
@@ -1331,7 +1345,7 @@ impl WaveletMatrix {
     ///
     /// [`range_min_u64`]: WaveletMatrix::range_min_u64
     #[must_use]
-    pub fn range_min_u64_unchecked(&self, range: Range<usize>) -> u64 {
+    pub fn range_min_u64_unchecked(&self, range: Range<u64>) -> u64 {
         self.quantile_u64_unchecked(range, 0)
     }
 
@@ -1354,7 +1368,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.range_min_u64(1..3), Some(4));
     /// ```
     #[must_use]
-    pub fn range_min_u64(&self, range: Range<usize>) -> Option<u64> {
+    pub fn range_min_u64(&self, range: Range<u64>) -> Option<u64> {
         self.quantile_u64(range, 0)
     }
 
@@ -1372,7 +1386,7 @@ impl WaveletMatrix {
     ///
     /// [`range_max`]: WaveletMatrix::range_max
     #[must_use]
-    pub fn range_max_unchecked(&self, range: Range<usize>) -> BitVec {
+    pub fn range_max_unchecked(&self, range: Range<u64>) -> BitVec {
         let k = range.end - range.start - 1;
         self.quantile_unchecked(range, k)
     }
@@ -1395,7 +1409,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.range_max(3..6), Some(BitVec::pack_sequence_u8(&[7], 3)));
     /// ```
     #[must_use]
-    pub fn range_max(&self, range: Range<usize>) -> Option<BitVec> {
+    pub fn range_max(&self, range: Range<u64>) -> Option<BitVec> {
         if range.is_empty() {
             None
         } else {
@@ -1418,7 +1432,7 @@ impl WaveletMatrix {
     ///
     /// [`range_max_u64`]: WaveletMatrix::range_max_u64
     #[must_use]
-    pub fn range_max_u64_unchecked(&self, range: Range<usize>) -> u64 {
+    pub fn range_max_u64_unchecked(&self, range: Range<u64>) -> u64 {
         let k = range.end - range.start - 1;
         self.quantile_u64_unchecked(range, k)
     }
@@ -1441,7 +1455,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.range_max_u64(3..6), Some(7));
     /// ```
     #[must_use]
-    pub fn range_max_u64(&self, range: Range<usize>) -> Option<u64> {
+    pub fn range_max_u64(&self, range: Range<u64>) -> Option<u64> {
         if range.is_empty() {
             None
         } else {
@@ -1466,7 +1480,7 @@ impl WaveletMatrix {
     ///
     /// [`range_median`]: WaveletMatrix::range_median
     #[must_use]
-    pub fn range_median_unchecked(&self, range: Range<usize>) -> BitVec {
+    pub fn range_median_unchecked(&self, range: Range<u64>) -> BitVec {
         let k = (range.end - 1 - range.start) / 2;
         self.quantile_unchecked(range, k)
     }
@@ -1492,7 +1506,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.range_median(0..6), Some(BitVec::pack_sequence_u8(&[2], 3)));
     /// ```
     #[must_use]
-    pub fn range_median(&self, range: Range<usize>) -> Option<BitVec> {
+    pub fn range_median(&self, range: Range<u64>) -> Option<BitVec> {
         if range.is_empty() {
             None
         } else {
@@ -1517,7 +1531,7 @@ impl WaveletMatrix {
     ///
     /// [`range_median_u64`]: WaveletMatrix::range_median_u64
     #[must_use]
-    pub fn range_median_u64_unchecked(&self, range: Range<usize>) -> u64 {
+    pub fn range_median_u64_unchecked(&self, range: Range<u64>) -> u64 {
         let k = (range.end - 1 - range.start) / 2;
         self.quantile_u64_unchecked(range, k)
     }
@@ -1543,7 +1557,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.range_median_u64(0..6), Some(2));
     /// ```
     #[must_use]
-    pub fn range_median_u64(&self, range: Range<usize>) -> Option<u64> {
+    pub fn range_median_u64(&self, range: Range<u64>) -> Option<u64> {
         if range.is_empty() || self.bits_per_element() > 64 || range.end > self.len() {
             None
         } else {
@@ -1561,10 +1575,10 @@ impl WaveletMatrix {
         T: Clone,
         Reader: Fn(usize, &T) -> u64,
         Writer: Fn(u64, usize, &mut T),
-        Quantile: Fn(&Self, Range<usize>, usize, usize, T) -> T,
+        Quantile: Fn(&Self, Range<u64>, u64, usize, T) -> T,
     >(
         &self,
-        mut range: Range<usize>,
+        mut range: Range<u64>,
         symbol: &T,
         mut result_value: T,
         bit_reader: Reader,
@@ -1577,7 +1591,7 @@ impl WaveletMatrix {
         // the level of the last node where we could go to an interval with smaller elements
         let mut last_one_level: Option<usize> = None;
         // the range of the last node where we could go to an interval with smaller elements
-        let mut next_smaller_range: Option<Range<usize>> = None;
+        let mut next_smaller_range: Option<Range<u64>> = None;
 
         for (level, data) in self.data.iter().enumerate() {
             let query_bit = bit_reader(level, symbol);
@@ -1672,8 +1686,8 @@ impl WaveletMatrix {
     ///
     /// [`BitVec`]: BitVec
     #[must_use]
-    pub fn predecessor(&self, range: Range<usize>, symbol: &BitVec) -> Option<BitVec> {
-        if symbol.len() != self.bits_per_element()
+    pub fn predecessor(&self, range: Range<u64>, symbol: &BitVec) -> Option<BitVec> {
+        if symbol.len() != self.bits_per_element() as u64
             || range.is_empty()
             || self.is_empty()
             || range.end > self.len()
@@ -1684,10 +1698,10 @@ impl WaveletMatrix {
         self.predecessor_generic_unchecked(
             range,
             symbol,
-            BitVec::from_zeros(self.bits_per_element()),
-            |level, symbol| symbol.get_unchecked((self.bits_per_element() - 1) - level),
+            BitVec::from_zeros(self.bits_per_element() as u64),
+            |level, symbol| symbol.get_unchecked(((self.bits_per_element() - 1) - level) as u64),
             |bit, level, result| {
-                result.set_unchecked((self.bits_per_element() - 1) - level, bit);
+                result.set_unchecked(((self.bits_per_element() - 1) - level) as u64, bit);
             },
             Self::partial_quantile_search_unchecked,
         )
@@ -1716,7 +1730,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.predecessor_u64(0..6, 7), Some(7));
     /// ```
     #[must_use]
-    pub fn predecessor_u64(&self, range: Range<usize>, symbol: u64) -> Option<u64> {
+    pub fn predecessor_u64(&self, range: Range<u64>, symbol: u64) -> Option<u64> {
         if self.bits_per_element() > 64
             || range.is_empty()
             || self.is_empty()
@@ -1745,10 +1759,10 @@ impl WaveletMatrix {
         T: Clone,
         Reader: Fn(usize, &T) -> u64,
         Writer: Fn(u64, usize, &mut T),
-        Quantile: Fn(&Self, Range<usize>, usize, usize, T) -> T,
+        Quantile: Fn(&Self, Range<u64>, u64, usize, T) -> T,
     >(
         &self,
-        mut range: Range<usize>,
+        mut range: Range<u64>,
         symbol: &T,
         mut result_value: T,
         bit_reader: Reader,
@@ -1761,7 +1775,7 @@ impl WaveletMatrix {
         // the level of the last node where we could go to an interval with larger elements
         let mut last_zero_level: Option<usize> = None;
         // the range of the last node where we could go to an interval with larger elements
-        let mut next_larger_range: Option<Range<usize>> = None;
+        let mut next_larger_range: Option<Range<u64>> = None;
 
         for (level, data) in self.data.iter().enumerate() {
             let query_bit = bit_reader(level, symbol);
@@ -1859,8 +1873,8 @@ impl WaveletMatrix {
     ///
     /// [`BitVec`]: BitVec
     #[must_use]
-    pub fn successor(&self, range: Range<usize>, symbol: &BitVec) -> Option<BitVec> {
-        if symbol.len() != self.bits_per_element()
+    pub fn successor(&self, range: Range<u64>, symbol: &BitVec) -> Option<BitVec> {
+        if symbol.len() != self.bits_per_element() as u64
             || range.is_empty()
             || self.is_empty()
             || range.end > self.len()
@@ -1871,10 +1885,10 @@ impl WaveletMatrix {
         self.successor_generic_unchecked(
             range,
             symbol,
-            BitVec::from_zeros(self.bits_per_element()),
-            |level, symbol| symbol.get_unchecked((self.bits_per_element() - 1) - level),
+            BitVec::from_zeros(self.bits_per_element() as u64),
+            |level, symbol| symbol.get_unchecked(((self.bits_per_element() - 1) - level) as u64),
             |bit, level, result| {
-                result.set_unchecked((self.bits_per_element() - 1) - level, bit);
+                result.set_unchecked(((self.bits_per_element() - 1) - level) as u64, bit);
             },
             Self::partial_quantile_search_unchecked,
         )
@@ -1903,7 +1917,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.successor_u64(0..6, 2), Some(2));
     /// ```
     #[must_use]
-    pub fn successor_u64(&self, range: Range<usize>, symbol: u64) -> Option<u64> {
+    pub fn successor_u64(&self, range: Range<u64>, symbol: u64) -> Option<u64> {
         if self.bits_per_element() > 64
             || range.is_empty()
             || self.is_empty()
@@ -1942,7 +1956,7 @@ impl WaveletMatrix {
     /// assert_eq!(iter.collect::<Vec<_>>(), vec![1, 4, 4, 1, 2, 7]);
     /// ```
     #[must_use]
-    pub fn iter_u64(&self) -> Option<WaveletNumRefIter> {
+    pub fn iter_u64(&self) -> Option<WaveletNumRefIter<'_>> {
         if self.bits_per_element() > 64 {
             None
         } else {
@@ -1966,8 +1980,10 @@ impl WaveletMatrix {
     /// The iterator yields `BitVec` elements.
     ///
     /// See also [`iter_sorted_u64`] for an iterator that yields `u64` elements.
+    ///
+    /// [`iter_sorted_u64`]: Self::iter_sorted_u64
     #[must_use]
-    pub fn iter_sorted(&self) -> WaveletSortedRefIter {
+    pub fn iter_sorted(&self) -> WaveletSortedRefIter<'_> {
         WaveletSortedRefIter::new(self)
     }
 
@@ -1993,7 +2009,7 @@ impl WaveletMatrix {
     /// assert_eq!(iter.collect::<Vec<_>>(), vec![1, 1, 2, 4, 4, 7]);
     /// ```
     #[must_use]
-    pub fn iter_sorted_u64(&self) -> Option<WaveletSortedNumRefIter> {
+    pub fn iter_sorted_u64(&self) -> Option<WaveletSortedNumRefIter<'_>> {
         if self.bits_per_element() > 64 {
             None
         } else {
@@ -2020,17 +2036,9 @@ impl WaveletMatrix {
         self.data.len()
     }
 
-    /// Get the number of bits per element in the alphabet of the encoded sequence.
-    #[must_use]
-    #[deprecated(since = "1.5.1", note = "please use `bits_per_element` instead")]
-    #[allow(clippy::cast_possible_truncation)]
-    pub fn bit_len(&self) -> u16 {
-        self.bits_per_element() as u16
-    }
-
     /// Get the number of elements stored in the encoded sequence.
     #[must_use]
-    pub fn len(&self) -> usize {
+    pub fn len(&self) -> u64 {
         if self.data.is_empty() {
             0
         } else {
diff --git a/src/wavelet/tests.rs b/src/wavelet/tests.rs
index c4cf4e7..0d2d231 100644
--- a/src/wavelet/tests.rs
+++ b/src/wavelet/tests.rs
@@ -37,9 +37,10 @@ fn test_wavelet_encoding_randomized() {
         let wavelet_prefix_counting =
             WaveletMatrix::from_bit_vec_pc(&BitVec::pack_sequence_u8(&data, 8), 8);
 
-        assert_eq!(wavelet.len(), data.len());
+        assert_eq!(wavelet.len(), data.len() as u64);
 
         for (i, v) in data.iter().enumerate() {
+            let i = i as u64;
             assert_eq!(wavelet.get_u64_unchecked(i), *v as u64);
             assert_eq!(wavelet_from_slice.get_u64_unchecked(i), *v as u64);
             assert_eq!(wavelet_prefix_counting.get_u64_unchecked(i), *v as u64);
@@ -138,7 +139,7 @@ fn test_rank_randomized() {
         let symbol_bit_vec = BitVec::pack_sequence_u8(&[symbol], 8);
         let mut rank = 0;
         for (i, v) in data.iter().enumerate() {
-            assert_eq!(wavelet.rank_unchecked(i, &symbol_bit_vec), rank);
+            assert_eq!(wavelet.rank_unchecked(i as u64, &symbol_bit_vec), rank);
             if *v == symbol {
                 rank += 1;
             }
@@ -230,10 +231,10 @@ fn test_quantile() {
 
     for (i, v) in sequence.iter().enumerate() {
         assert_eq!(
-            wavelet.quantile(0..10, i),
+            wavelet.quantile(0..10, i as u64),
             Some(BitVec::pack_sequence_u8(&[*v as u8], 4))
         );
-        assert_eq!(wavelet.quantile_u64(0..10, i), Some(*v));
+        assert_eq!(wavelet.quantile_u64(0..10, i as u64), Some(*v));
     }
 
     assert_eq!(wavelet.quantile(0..10, 10), None);
@@ -269,8 +270,8 @@ fn test_quantile_randomized() {
     let wavelet = WaveletMatrix::from_bit_vec(&BitVec::pack_sequence_u8(&data, 8), 8);
 
     for _ in 0..1000 {
-        let range_i = rng.gen_range(0..data.len());
-        let range_j = rng.gen_range(0..data.len());
+        let range_i = rng.gen_range(0..data.len() as u64);
+        let range_j = rng.gen_range(0..data.len() as u64);
         let range = min(range_i, range_j)..max(range_i, range_j);
 
         let k = if range.is_empty() {
@@ -279,7 +280,7 @@ fn test_quantile_randomized() {
             rng.gen_range(range.clone()) - range.start
         };
 
-        let mut range_data = data[range.clone()].to_vec();
+        let mut range_data = data[range.start as usize..range.end as usize].to_vec();
         range_data.sort_unstable();
 
         assert_eq!(
@@ -287,7 +288,7 @@ fn test_quantile_randomized() {
             if range.is_empty() {
                 None
             } else {
-                Some(range_data[k] as u64)
+                Some(range_data[k as usize] as u64)
             }
         );
         assert_eq!(

From 4890f9d1bd12b40bdd1dc8086a34c238f3fd6cca Mon Sep 17 00:00:00 2001
From: Cydhra <ubezl@student.kit.edu>
Date: Mon, 20 Oct 2025 00:06:51 +0200
Subject: [PATCH 3/5] Fix inconsistent API naming (#35)

* renamed BpTree::from_bit_vector to ::from_bit_vec
* renamed SparseRSVec to SparseRsVec
* renamed FastRmq to SmallRmq
* renamed BinaryRmq to SparseRmq
* renamed BitVec::from_bits to from_bits_u8
* renamed modules elias_fano and fast_rs_vec to ef and rs
* add instructions to migration guide
---
 benches/rmq.rs                            |   4 +-
 migrate.md                                |  12 +-
 src/bit_vec/mod.rs                        |  34 ++---
 src/bit_vec/{fast_rs_vec => rs}/bitset.rs |   0
 src/bit_vec/{fast_rs_vec => rs}/iter.rs   |   2 +-
 src/bit_vec/{fast_rs_vec => rs}/mod.rs    |   0
 src/bit_vec/{fast_rs_vec => rs}/select.rs |   2 +-
 src/bit_vec/{fast_rs_vec => rs}/tests.rs  |   0
 src/bit_vec/sparse.rs                     |  42 +++---
 src/bit_vec/tests.rs                      |   2 +-
 src/{elias_fano => ef}/mod.rs             |   0
 src/{elias_fano => ef}/tests.rs           |   0
 src/lib.rs                                |  12 +-
 src/rmq/binary_rmq/mod.rs                 |  22 +--
 src/rmq/binary_rmq/tests.rs               |  12 +-
 src/rmq/fast_rmq/mod.rs                   |  30 ++---
 src/rmq/fast_rmq/tests.rs                 |  10 +-
 src/trees/bp/builder.rs                   |   2 +-
 src/trees/bp/mod.rs                       |  12 +-
 src/trees/bp/tests.rs                     | 156 +++++++++++-----------
 src/trees/mmt.rs                          |  26 ++--
 src/util/elias_fano_iter.rs               |   8 +-
 22 files changed, 199 insertions(+), 189 deletions(-)
 rename src/bit_vec/{fast_rs_vec => rs}/bitset.rs (100%)
 rename src/bit_vec/{fast_rs_vec => rs}/iter.rs (99%)
 rename src/bit_vec/{fast_rs_vec => rs}/mod.rs (100%)
 rename src/bit_vec/{fast_rs_vec => rs}/select.rs (99%)
 rename src/bit_vec/{fast_rs_vec => rs}/tests.rs (100%)
 rename src/{elias_fano => ef}/mod.rs (100%)
 rename src/{elias_fano => ef}/tests.rs (100%)

diff --git a/benches/rmq.rs b/benches/rmq.rs
index a9506c6..81fae46 100644
--- a/benches/rmq.rs
+++ b/benches/rmq.rs
@@ -1,7 +1,7 @@
 use criterion::{black_box, criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion};
 use rand::distributions::{Distribution, Uniform};
 use rand::Rng;
-use vers_vecs::rmq::fast_rmq::FastRmq;
+use vers_vecs::rmq::fast_rmq::SmallRmq;
 
 mod common;
 
@@ -12,7 +12,7 @@ fn bench_rmq(b: &mut Criterion) {
     group.plot_config(common::plot_config());
 
     for l in common::SIZES {
-        let rmq = FastRmq::from_vec(common::fill_random_vec(&mut rng, l));
+        let rmq = SmallRmq::from_vec(common::fill_random_vec(&mut rng, l));
         let sample = Uniform::new(0, rmq.len());
         group.bench_with_input(BenchmarkId::new("range_min", l), &l, |b, _| {
             b.iter_batched(
diff --git a/migrate.md b/migrate.md
index 33cfd51..f9f58fc 100644
--- a/migrate.md
+++ b/migrate.md
@@ -1,3 +1,13 @@
 # Migration Guide from 1.X to 2.0
 The following guide explains the changes from versions 1.X to the 2.0 release and points out what changes are necessary
-to downstream crates.
\ No newline at end of file
+to downstream crates.
+
+## Renamed Members
+The following structures and functions were renamed
+- `BitVec::from_bit_vector` to `BitVec::from_bit_vec`
+- `SparseRSVec` to `SparseRsVec`
+- `FastRmq` to `SmallRmq`
+- `BinaryRmq` to `SparseRmq`
+- `BitVec::from_bits` to `BitVec::from_bits_u8`
+- module `fast_rs_vec` to `rs`
+- module `elias_fano` to `ef`
\ No newline at end of file
diff --git a/src/bit_vec/mod.rs b/src/bit_vec/mod.rs
index 2f34134..9c7fa73 100644
--- a/src/bit_vec/mod.rs
+++ b/src/bit_vec/mod.rs
@@ -7,7 +7,7 @@ use std::cmp::min;
 use std::hash::{Hash, Hasher};
 use std::mem::size_of;
 
-pub mod fast_rs_vec;
+pub mod rs;
 
 pub mod sparse;
 
@@ -29,7 +29,7 @@ pub type BitMask<'s, 'b> = MaskedBitVec<'s, 'b, fn(u64, u64) -> u64>;
 /// The bit vector has a wide range of constructors that allow for easy creation from various
 /// sources.
 /// Among them are constructors for creating an empty vector ([`BitVec::new`]),
-/// creating one from single bits of various integer types ([`BitVec::from_bits`] and variations),
+/// creating one from single bits of various integer types ([`BitVec::from_bits_u8`] and variations),
 /// creating limbs from u64 values directly ([`BitVec::from_limbs`] and variations),
 /// or packing a sequence of numerical values into a dense bit sequence
 /// ([`BitVec::pack_sequence_u64`] and variations).
@@ -114,7 +114,7 @@ impl BitVec {
     /// use vers_vecs::BitVec;
     ///
     /// let bits: &[u8] = &[1, 0, 1, 1, 1, 1];
-    /// let bv = BitVec::from_bits(&bits);
+    /// let bv = BitVec::from_bits_u8(&bits);
     ///
     /// assert_eq!(bv.len(), 6);
     /// assert_eq!(bv.get_bits(0, 6), Some(0b111101u64));
@@ -125,7 +125,7 @@ impl BitVec {
     /// [`from_bits_u64`]: BitVec::from_bits_u64
     /// [`from_bits_iter`]: BitVec::from_bits_iter
     #[must_use]
-    pub fn from_bits(bits: &[u8]) -> Self {
+    pub fn from_bits_u8(bits: &[u8]) -> Self {
         let mut bv = Self::with_capacity(bits.len() as u64);
         bits.iter().for_each(|&b| bv.append_bit(b.into()));
         bv
@@ -136,9 +136,9 @@ impl BitVec {
     /// bit vector.
     /// All other bits are ignored.
     ///
-    /// See also: [`from_bits`], [`from_bits_u32`], [`from_bits_u64`], [`from_bits_iter`]
+    /// See also: [`from_bits_u8`], [`from_bits_u32`], [`from_bits_u64`], [`from_bits_iter`]
     ///
-    /// [`from_bits`]: BitVec::from_bits
+    /// [`from_bits_u8`]: BitVec::from_bits_u8
     /// [`from_bits_u32`]: BitVec::from_bits_u32
     /// [`from_bits_u64`]: BitVec::from_bits_u64
     /// [`from_bits_iter`]: BitVec::from_bits_iter
@@ -154,9 +154,9 @@ impl BitVec {
     /// bit vector.
     /// All other bits are ignored.
     ///
-    /// See also: [`from_bits`], [`from_bits_u16`], [`from_bits_u64`], [`from_bits_iter`]
+    /// See also: [`from_bits_u8`], [`from_bits_u16`], [`from_bits_u64`], [`from_bits_iter`]
     ///
-    /// [`from_bits`]: BitVec::from_bits
+    /// [`from_bits_u8`]: BitVec::from_bits_u8
     /// [`from_bits_u16`]: BitVec::from_bits_u16
     /// [`from_bits_u64`]: BitVec::from_bits_u64
     /// [`from_bits_iter`]: BitVec::from_bits_iter
@@ -172,9 +172,9 @@ impl BitVec {
     /// bit vector.
     /// All other bits are ignored.
     ///
-    /// See also: [`from_bits`], [`from_bits_u16`], [`from_bits_u32`], [`from_bits_iter`]
+    /// See also: [`from_bits_u8`], [`from_bits_u16`], [`from_bits_u32`], [`from_bits_iter`]
     ///
-    /// [`from_bits`]: BitVec::from_bits
+    /// [`from_bits_u8`]: BitVec::from_bits_u8
     /// [`from_bits_u16`]: BitVec::from_bits_u16
     /// [`from_bits_u32`]: BitVec::from_bits_u32
     /// [`from_bits_iter`]: BitVec::from_bits_iter
@@ -191,7 +191,7 @@ impl BitVec {
     /// All other bits are ignored.
     /// The iterator must yield values that can be converted into u64 values.
     ///
-    /// See also: [`from_bits`], [`from_bits_u16`], [`from_bits_u32`], [`from_bits_u64`]
+    /// See also: [`from_bits_u8`], [`from_bits_u16`], [`from_bits_u32`], [`from_bits_u64`]
     ///
     /// # Example
     /// ```rust
@@ -208,7 +208,7 @@ impl BitVec {
     /// assert_eq!(bv, bv2);
     /// ```
     ///
-    /// [`from_bits`]: BitVec::from_bits
+    /// [`from_bits_u8`]: BitVec::from_bits_u8
     /// [`from_bits_u16`]: BitVec::from_bits_u16
     /// [`from_bits_u32`]: BitVec::from_bits_u32
     /// [`from_bits_u64`]: BitVec::from_bits_u64
@@ -517,7 +517,7 @@ impl BitVec {
     /// ```rust
     /// use vers_vecs::BitVec;
     ///
-    /// let mut bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
+    /// let mut bv = BitVec::from_bits_u8(&[1, 0, 1, 1, 1, 1]);
     /// bv.drop_last(3);
     ///
     /// assert_eq!(bv.len(), 3);
@@ -776,7 +776,7 @@ impl BitVec {
     /// ```rust
     /// use vers_vecs::BitVec;
     ///
-    /// let mut bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
+    /// let mut bv = BitVec::from_bits_u8(&[1, 0, 1, 1, 1, 1]);
     /// bv.flip_bit(1);
     ///
     /// assert_eq!(bv.len(), 6);
@@ -815,7 +815,7 @@ impl BitVec {
     /// ```rust
     /// use vers_vecs::BitVec;
     ///
-    /// let bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
+    /// let bv = BitVec::from_bits_u8(&[1, 0, 1, 1, 1, 1]);
     ///
     /// assert_eq!(bv.get(1), Some(0));
     /// assert_eq!(bv.get(2), Some(1));
@@ -855,7 +855,7 @@ impl BitVec {
     /// ```rust
     /// use vers_vecs::BitVec;
     ///
-    /// let mut bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
+    /// let mut bv = BitVec::from_bits_u8(&[1, 0, 1, 1, 1, 1]);
     /// bv.set(1, 1).unwrap();
     ///
     /// assert_eq!(bv.len(), 6);
@@ -901,7 +901,7 @@ impl BitVec {
     /// ```rust
     /// use vers_vecs::BitVec;
     ///
-    /// let bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
+    /// let bv = BitVec::from_bits_u8(&[1, 0, 1, 1, 1, 1]);
     ///
     /// assert!(!bv.is_bit_set(1).unwrap());
     /// assert!(bv.is_bit_set(2).unwrap());
diff --git a/src/bit_vec/fast_rs_vec/bitset.rs b/src/bit_vec/rs/bitset.rs
similarity index 100%
rename from src/bit_vec/fast_rs_vec/bitset.rs
rename to src/bit_vec/rs/bitset.rs
diff --git a/src/bit_vec/fast_rs_vec/iter.rs b/src/bit_vec/rs/iter.rs
similarity index 99%
rename from src/bit_vec/fast_rs_vec/iter.rs
rename to src/bit_vec/rs/iter.rs
index 198af18..5a43a9d 100644
--- a/src/bit_vec/fast_rs_vec/iter.rs
+++ b/src/bit_vec/rs/iter.rs
@@ -1,4 +1,4 @@
-use crate::bit_vec::fast_rs_vec::{BLOCK_SIZE, SELECT_BLOCK_SIZE, SUPER_BLOCK_SIZE};
+use crate::bit_vec::rs::{BLOCK_SIZE, SELECT_BLOCK_SIZE, SUPER_BLOCK_SIZE};
 use crate::RsVec;
 use std::iter::FusedIterator;
 use std::num::NonZeroUsize;
diff --git a/src/bit_vec/fast_rs_vec/mod.rs b/src/bit_vec/rs/mod.rs
similarity index 100%
rename from src/bit_vec/fast_rs_vec/mod.rs
rename to src/bit_vec/rs/mod.rs
diff --git a/src/bit_vec/fast_rs_vec/select.rs b/src/bit_vec/rs/select.rs
similarity index 99%
rename from src/bit_vec/fast_rs_vec/select.rs
rename to src/bit_vec/rs/select.rs
index 9d8578e..9e7ae85 100644
--- a/src/bit_vec/fast_rs_vec/select.rs
+++ b/src/bit_vec/rs/select.rs
@@ -1,6 +1,6 @@
 // Select code is in here to keep it more organized.
 
-use crate::bit_vec::fast_rs_vec::{BLOCK_SIZE, SELECT_BLOCK_SIZE, SUPER_BLOCK_SIZE};
+use crate::bit_vec::rs::{BLOCK_SIZE, SELECT_BLOCK_SIZE, SUPER_BLOCK_SIZE};
 use crate::bit_vec::WORD_SIZE;
 use crate::util::pdep::Pdep;
 use crate::util::unroll;
diff --git a/src/bit_vec/fast_rs_vec/tests.rs b/src/bit_vec/rs/tests.rs
similarity index 100%
rename from src/bit_vec/fast_rs_vec/tests.rs
rename to src/bit_vec/rs/tests.rs
diff --git a/src/bit_vec/sparse.rs b/src/bit_vec/sparse.rs
index 261df70..b422489 100644
--- a/src/bit_vec/sparse.rs
+++ b/src/bit_vec/sparse.rs
@@ -15,9 +15,9 @@ use crate::{BitVec, EliasFanoVec};
 ///
 /// # Examples
 /// ```
-/// use vers_vecs::SparseRSVec;
+/// use vers_vecs::SparseRsVec;
 ///
-/// let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12);
+/// let sparse = SparseRsVec::new(&[1, 3, 5, 7, 9], 12);
 /// assert_eq!(sparse.get(5), Some(1));
 /// assert_eq!(sparse.get(11), Some(0));
 /// assert_eq!(sparse.get(12), None);
@@ -28,14 +28,14 @@ use crate::{BitVec, EliasFanoVec};
 ///
 /// It cn also be constructed from a `BitVec` directly:
 /// ```
-/// use vers_vecs::SparseRSVec;
+/// use vers_vecs::SparseRsVec;
 /// use vers_vecs::BitVec;
 ///
 /// let mut bv = BitVec::from_zeros(12);
 /// bv.flip_bit(6);
 /// bv.flip_bit(7);
 ///
-/// let sparse = SparseRSVec::from_bitvec(&bv);
+/// let sparse = SparseRsVec::from_bitvec(&bv);
 /// assert_eq!(sparse.rank1(5), 0);
 /// assert_eq!(sparse.select1(0), 6);
 /// ```
@@ -44,12 +44,12 @@ use crate::{BitVec, EliasFanoVec};
 /// [`from_bitvec_inverted`]: #method.from_bitvec_inverted
 #[derive(Debug, Clone)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
-pub struct SparseRSVec {
+pub struct SparseRsVec {
     vec: EliasFanoVec,
     len: u64,
 }
 
-impl SparseRSVec {
+impl SparseRsVec {
     /// Creates a new `SparseRSVec` from a sequence of set bits represented as indices.
     /// The input must be sorted in ascending order and free of duplicates.
     ///
@@ -109,7 +109,7 @@ impl SparseRSVec {
     ///
     /// # Example
     /// ```
-    /// use vers_vecs::SparseRSVec;
+    /// use vers_vecs::SparseRsVec;
     /// use vers_vecs::BitVec;
     ///
     /// let mut bv = BitVec::from_ones(12);
@@ -117,7 +117,7 @@ impl SparseRSVec {
     /// bv.flip_bit(6);
     /// bv.flip_bit(7);
     ///
-    /// let sparse = SparseRSVec::from_bitvec_inverted(&bv);
+    /// let sparse = SparseRsVec::from_bitvec_inverted(&bv);
     /// // now select1 gives the position of 0-bits
     /// assert_eq!(sparse.select1(1), 7);
     /// ```
@@ -240,13 +240,13 @@ impl SparseRSVec {
     }
 }
 
-impl From<BitVec> for SparseRSVec {
+impl From<BitVec> for SparseRsVec {
     fn from(input: BitVec) -> Self {
         Self::from_bitvec_inverted(&input)
     }
 }
 
-impl<'a> From<&'a BitVec> for SparseRSVec {
+impl<'a> From<&'a BitVec> for SparseRsVec {
     fn from(input: &'a BitVec) -> Self {
         Self::from_bitvec_inverted(input)
     }
@@ -254,14 +254,14 @@ impl<'a> From<&'a BitVec> for SparseRSVec {
 
 #[cfg(test)]
 mod tests {
-    use super::SparseRSVec;
+    use super::SparseRsVec;
     use crate::BitVec;
     use rand::prelude::StdRng;
     use rand::{Rng, SeedableRng};
 
     #[test]
     fn test_sparse_rank() {
-        let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12);
+        let sparse = SparseRsVec::new(&[1, 3, 5, 7, 9], 12);
         assert_eq!(sparse.rank1(0), 0);
         assert_eq!(sparse.rank1(1), 0);
         assert_eq!(sparse.rank1(2), 1);
@@ -280,7 +280,7 @@ mod tests {
 
     #[test]
     fn test_sparse_select() {
-        let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12);
+        let sparse = SparseRsVec::new(&[1, 3, 5, 7, 9], 12);
         assert_eq!(sparse.select1(0), 1);
         assert_eq!(sparse.select1(1), 3);
         assert_eq!(sparse.select1(2), 5);
@@ -292,7 +292,7 @@ mod tests {
 
     #[test]
     fn test_sparse_rank0() {
-        let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12);
+        let sparse = SparseRsVec::new(&[1, 3, 5, 7, 9], 12);
         assert_eq!(sparse.rank0(0), 0);
         assert_eq!(sparse.rank0(1), 1);
         assert_eq!(sparse.rank0(2), 1);
@@ -311,7 +311,7 @@ mod tests {
 
     #[test]
     fn test_empty_sparse() {
-        let sparse = SparseRSVec::new(&[], 0);
+        let sparse = SparseRsVec::new(&[], 0);
         assert_eq!(sparse.rank1(0), 0);
         assert_eq!(sparse.rank1(1), 0);
         assert_eq!(sparse.rank1(999), 0);
@@ -327,7 +327,7 @@ mod tests {
 
     #[test]
     fn test_sparse_get() {
-        let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12);
+        let sparse = SparseRsVec::new(&[1, 3, 5, 7, 9], 12);
         assert_eq!(sparse.get(0), Some(0));
         assert_eq!(sparse.get(1), Some(1));
         assert_eq!(sparse.get(2), Some(0));
@@ -350,7 +350,7 @@ mod tests {
         bv.flip_bit(6);
         bv.flip_bit(7);
 
-        let sparse = SparseRSVec::from_bitvec(&bv);
+        let sparse = SparseRsVec::from_bitvec(&bv);
         assert_eq!(sparse.rank1(0), 0);
         assert_eq!(sparse.rank1(1), 1);
         assert_eq!(sparse.rank1(2), 2);
@@ -359,7 +359,7 @@ mod tests {
         assert_eq!(sparse.rank1(9), 7);
         assert_eq!(sparse.rank1(12), 10);
 
-        let sparse = SparseRSVec::from_bitvec_inverted(&bv);
+        let sparse = SparseRsVec::from_bitvec_inverted(&bv);
         assert_eq!(sparse.rank1(0), 0);
         assert_eq!(sparse.rank1(1), 0);
         assert_eq!(sparse.rank1(2), 0);
@@ -372,7 +372,7 @@ mod tests {
     #[test]
     fn test_large_block() {
         // test that the implementation works correctly if the search triggers a binary search
-        let sparse = SparseRSVec::new(
+        let sparse = SparseRsVec::new(
             &[
                 1, 100_000, 100_001, 100_002, 100_003, 100_004, 100_005, 100_006, 100_007, 100_008,
                 100_009, 100_010, 1_000_000,
@@ -393,7 +393,7 @@ mod tests {
             bv.flip_bit(rng.gen_range(0..L));
         }
 
-        let sparse = SparseRSVec::from_bitvec(&bv);
+        let sparse = SparseRsVec::from_bitvec(&bv);
 
         let mut ones = 0;
         for i in 0..L {
@@ -418,7 +418,7 @@ mod tests {
         bv.append_bit(0);
         bv.drop_last(1);
 
-        let sparse = SparseRSVec::from_bitvec(&bv);
+        let sparse = SparseRsVec::from_bitvec(&bv);
         assert_eq!(sparse.len(), 2);
         assert_eq!(sparse.get(0), Some(1));
         assert_eq!(sparse.get(1), Some(0));
diff --git a/src/bit_vec/tests.rs b/src/bit_vec/tests.rs
index 0e01c2c..345906c 100644
--- a/src/bit_vec/tests.rs
+++ b/src/bit_vec/tests.rs
@@ -475,7 +475,7 @@ fn test_apply_masks() {
 
 #[test]
 fn test_from_bits() {
-    let bv = BitVec::from_bits(&[1, 0, 1]);
+    let bv = BitVec::from_bits_u8(&[1, 0, 1]);
     assert_eq!(bv.len, 3);
     assert_eq!(bv.get_bits(0, 3), Some(0b101));
 
diff --git a/src/elias_fano/mod.rs b/src/ef/mod.rs
similarity index 100%
rename from src/elias_fano/mod.rs
rename to src/ef/mod.rs
diff --git a/src/elias_fano/tests.rs b/src/ef/tests.rs
similarity index 100%
rename from src/elias_fano/tests.rs
rename to src/ef/tests.rs
diff --git a/src/lib.rs b/src/lib.rs
index 20958a7..3ee5f00 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -55,12 +55,12 @@
 //! - `bp_u16_lookup` (disabled by default): Uses a 16-bit lookup table for the balanced parenthesis
 //!   tree data structure. This is faster, but requires 128 KiB instead of 4 KiB.
 
-pub use bit_vec::fast_rs_vec::RsVec;
-pub use bit_vec::sparse::SparseRSVec;
+pub use bit_vec::rs::RsVec;
+pub use bit_vec::sparse::SparseRsVec;
 pub use bit_vec::BitVec;
-pub use elias_fano::EliasFanoVec;
-pub use rmq::binary_rmq::BinaryRmq;
-pub use rmq::fast_rmq::FastRmq;
+pub use ef::EliasFanoVec;
+pub use rmq::binary_rmq::SparseRmq;
+pub use rmq::fast_rmq::SmallRmq;
 pub use trees::bp::{BpBuilder, BpTree};
 pub use trees::{IsAncestor, LevelTree, SubtreeSize, Tree, TreeBuilder};
 pub use wavelet::WaveletMatrix;
@@ -68,7 +68,7 @@ pub use wavelet::WaveletMatrix;
 pub mod bit_vec;
 
 #[forbid(unsafe_code)]
-pub mod elias_fano;
+pub mod ef;
 
 #[forbid(unsafe_code)]
 pub mod rmq;
diff --git a/src/rmq/binary_rmq/mod.rs b/src/rmq/binary_rmq/mod.rs
index 84962ca..9191ca6 100644
--- a/src/rmq/binary_rmq/mod.rs
+++ b/src/rmq/binary_rmq/mod.rs
@@ -17,10 +17,10 @@ use std::ops::{Deref, RangeBounds};
 ///
 /// # Example
 /// ```rust
-/// use vers_vecs::BinaryRmq;
+/// use vers_vecs::SparseRmq;
 ///
 /// let data = vec![4, 10, 3, 11, 2, 12];
-/// let rmq = BinaryRmq::from_vec(data);
+/// let rmq = SparseRmq::from_vec(data);
 ///
 /// assert_eq!(rmq.range_min(0, 1), 0);
 /// assert_eq!(rmq.range_min(0, 2), 2);
@@ -28,7 +28,7 @@ use std::ops::{Deref, RangeBounds};
 /// ```
 #[derive(Clone, Debug)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
-pub struct BinaryRmq {
+pub struct SparseRmq {
     data: Vec<u64>,
 
     // store indices relative to start of range. There is no way to have ranges exceeding 2^32 bits
@@ -37,7 +37,7 @@ pub struct BinaryRmq {
     results: Vec<u32>,
 }
 
-impl BinaryRmq {
+impl SparseRmq {
     /// Create a new RMQ data structure for the given data. This uses O(n log n) space and
     /// precalculates the minimum element in intervals 2^k for all k for all elements.
     ///
@@ -105,14 +105,14 @@ impl BinaryRmq {
         Self { data, results }
     }
 
-    /// Convenience function for [`BinaryRmq::range_min`] for using range operators.
+    /// Convenience function for [`SparseRmq::range_min`] for using range operators.
     /// The range is clamped to the length of the data structure, so this function will not panic,
     /// unless called on an empty data structure, because that does not have a valid index.
     ///
     /// # Example
     /// ```rust
-    /// use vers_vecs::BinaryRmq;
-    /// let rmq = BinaryRmq::from_vec(vec![5, 4, 3, 2, 1]);
+    /// use vers_vecs::SparseRmq;
+    /// let rmq = SparseRmq::from_vec(vec![5, 4, 3, 2, 1]);
     /// assert_eq!(rmq.range_min_with_range(0..3), 2);
     /// assert_eq!(rmq.range_min_with_range(0..=3), 3);
     /// ```
@@ -169,7 +169,7 @@ impl BinaryRmq {
 /// Implements Deref to delegate to the underlying data structure. This allows the user to use
 /// indexing syntax on the RMQ data structure to access the underlying data, as well as iterators,
 /// etc.
-impl Deref for BinaryRmq {
+impl Deref for SparseRmq {
     type Target = Vec<u64>;
 
     fn deref(&self) -> &Self::Target {
@@ -177,7 +177,7 @@ impl Deref for BinaryRmq {
     }
 }
 
-impl From<Vec<u64>> for BinaryRmq {
+impl From<Vec<u64>> for SparseRmq {
     fn from(data: Vec<u64>) -> Self {
         Self::from_vec(data)
     }
@@ -188,8 +188,8 @@ impl From<Vec<u64>> for BinaryRmq {
 ///
 /// See [`BinaryRmq::from_vec`] for more information.
 ///
-/// [`BinaryRmq::from_vec`]: BinaryRmq::from_vec
-impl FromIterator<u64> for BinaryRmq {
+/// [`BinaryRmq::from_vec`]: SparseRmq::from_vec
+impl FromIterator<u64> for SparseRmq {
     fn from_iter<T: IntoIterator<Item = u64>>(iter: T) -> Self {
         Self::from_vec(iter.into_iter().collect())
     }
diff --git a/src/rmq/binary_rmq/tests.rs b/src/rmq/binary_rmq/tests.rs
index 9e5fa63..8a8731f 100644
--- a/src/rmq/binary_rmq/tests.rs
+++ b/src/rmq/binary_rmq/tests.rs
@@ -1,9 +1,9 @@
-use crate::rmq::binary_rmq::BinaryRmq;
+use crate::rmq::binary_rmq::SparseRmq;
 use rand::RngCore;
 
 #[test]
 fn small_test() {
-    let rmq = BinaryRmq::from_vec(vec![9, 6, 10, 4, 0, 8, 3, 7, 1, 2, 5]);
+    let rmq = SparseRmq::from_vec(vec![9, 6, 10, 4, 0, 8, 3, 7, 1, 2, 5]);
 
     assert_eq!(rmq.range_min(0, 0), 0);
     assert_eq!(rmq.range_min(0, 1), 1);
@@ -25,7 +25,7 @@ fn randomized_test() {
         numbers_vec.push(rng.next_u64());
     }
 
-    let rmq = BinaryRmq::from_vec(numbers_vec.clone());
+    let rmq = SparseRmq::from_vec(numbers_vec.clone());
 
     for i in 0..L {
         for j in i..L {
@@ -43,7 +43,7 @@ fn randomized_test() {
 
 #[test]
 fn test_iter() {
-    let rmq = BinaryRmq::from_vec(vec![1, 2, 3, 4, 5]);
+    let rmq = SparseRmq::from_vec(vec![1, 2, 3, 4, 5]);
     let mut iter = rmq.iter();
     assert_eq!(iter.next(), Some(&1));
     assert_eq!(iter.next(), Some(&2));
@@ -55,7 +55,7 @@ fn test_iter() {
 
 #[test]
 fn test_range_operators() {
-    let rmq = BinaryRmq::from_vec(vec![5, 4, 3, 2, 1]);
+    let rmq = SparseRmq::from_vec(vec![5, 4, 3, 2, 1]);
     assert_eq!(rmq.range_min(0, 3), 3);
     assert_eq!(rmq.range_min_with_range(0..3), 2);
     assert_eq!(rmq.range_min_with_range(0..=3), 3);
@@ -63,7 +63,7 @@ fn test_range_operators() {
 
 #[test]
 fn test_empty_rmq() {
-    let rmq = BinaryRmq::from_vec(Vec::<u64>::new());
+    let rmq = SparseRmq::from_vec(Vec::<u64>::new());
     assert!(rmq.is_empty());
     // calling functions on an empty rmq will panic because the upper bound is inclusive, but there
     // is no valid index in an empty array, so we can't test anything else
diff --git a/src/rmq/fast_rmq/mod.rs b/src/rmq/fast_rmq/mod.rs
index c451ce3..c91ad24 100644
--- a/src/rmq/fast_rmq/mod.rs
+++ b/src/rmq/fast_rmq/mod.rs
@@ -7,7 +7,7 @@ use std::cmp::min_by;
 use std::mem::size_of;
 use std::ops::{Bound, Deref, RangeBounds};
 
-use crate::rmq::binary_rmq::BinaryRmq;
+use crate::rmq::binary_rmq::SparseRmq;
 use crate::util::pdep::Pdep;
 
 /// Size of the blocks the data is split into. One block is indexable with a u8, hence its size.
@@ -66,10 +66,10 @@ struct Block {
 ///
 /// # Example
 /// ```rust
-/// use vers_vecs::FastRmq;
+/// use vers_vecs::SmallRmq;
 ///
 /// let data = vec![4, 10, 3, 11, 2, 12];
-/// let rmq = FastRmq::from_vec(data);
+/// let rmq = SmallRmq::from_vec(data);
 ///
 /// assert_eq!(rmq.range_min(0, 1), 0);
 /// assert_eq!(rmq.range_min(0, 2), 2);
@@ -77,17 +77,17 @@ struct Block {
 /// ```
 #[derive(Clone, Debug)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
-pub struct FastRmq {
+pub struct SmallRmq {
     data: Vec<u64>,
-    block_minima: BinaryRmq,
+    block_minima: SparseRmq,
     block_min_indices: Vec<u8>,
     blocks: Vec<Block>,
 }
 
-impl FastRmq {
+impl SmallRmq {
     /// Creates a new range minimum query data structure from the given data. Creation time is
     /// O(n log n) and space overhead is O(n log n) with a fractional constant factor
-    /// (see [`FastRmq`])
+    /// (see [`SmallRmq`])
     ///
     /// # Panics
     /// This function will panic if the input is larger than 2^40 elements.
@@ -142,20 +142,20 @@ impl FastRmq {
 
         Self {
             data,
-            block_minima: BinaryRmq::from_vec(block_minima),
+            block_minima: SparseRmq::from_vec(block_minima),
             block_min_indices,
             blocks,
         }
     }
 
-    /// Convenience function for [`FastRmq::range_min`] for using range operators.
+    /// Convenience function for [`SmallRmq::range_min`] for using range operators.
     /// The range is clamped to the length of the data structure, sso this function will not panic,
     /// unless called on an empty data structure, because that does not have a valid index.
     ///
     /// # Example
     /// ```rust
-    /// use vers_vecs::FastRmq;
-    /// let rmq = FastRmq::from_vec(vec![5, 4, 3, 2, 1]);
+    /// use vers_vecs::SmallRmq;
+    /// let rmq = SmallRmq::from_vec(vec![5, 4, 3, 2, 1]);
     /// assert_eq!(rmq.range_min_with_range(0..3), 2);
     /// assert_eq!(rmq.range_min_with_range(0..=3), 3);
     /// ```
@@ -290,7 +290,7 @@ impl FastRmq {
 /// Implements Deref to delegate to the underlying data structure. This allows the user to use
 /// indexing syntax on the RMQ data structure to access the underlying data, as well as iterators,
 /// etc.
-impl Deref for FastRmq {
+impl Deref for SmallRmq {
     type Target = Vec<u64>;
 
     fn deref(&self) -> &Self::Target {
@@ -298,7 +298,7 @@ impl Deref for FastRmq {
     }
 }
 
-impl From<Vec<u64>> for FastRmq {
+impl From<Vec<u64>> for SmallRmq {
     fn from(data: Vec<u64>) -> Self {
         Self::from_vec(data)
     }
@@ -309,8 +309,8 @@ impl From<Vec<u64>> for FastRmq {
 ///
 /// See [`FastRmq::from_vec`] for more information.
 ///
-/// [`FastRmq::from_vec`]: FastRmq::from_vec
-impl FromIterator<u64> for FastRmq {
+/// [`FastRmq::from_vec`]: SmallRmq::from_vec
+impl FromIterator<u64> for SmallRmq {
     fn from_iter<T: IntoIterator<Item = u64>>(iter: T) -> Self {
         Self::from_vec(iter.into_iter().collect())
     }
diff --git a/src/rmq/fast_rmq/tests.rs b/src/rmq/fast_rmq/tests.rs
index f6e1bc9..aac8bdf 100644
--- a/src/rmq/fast_rmq/tests.rs
+++ b/src/rmq/fast_rmq/tests.rs
@@ -45,7 +45,7 @@ fn test_fast_rmq() {
         numbers_vec.push(i as u64);
     }
 
-    let rmq = FastRmq::from_vec(numbers_vec.clone());
+    let rmq = SmallRmq::from_vec(numbers_vec.clone());
 
     for i in 0..L {
         for j in i..L {
@@ -70,7 +70,7 @@ fn test_fast_rmq_unsorted() {
         numbers_vec.push(rng.next_u64());
     }
 
-    let rmq = FastRmq::from_vec(numbers_vec.clone());
+    let rmq = SmallRmq::from_vec(numbers_vec.clone());
 
     for i in 0..L {
         for j in i..L {
@@ -88,7 +88,7 @@ fn test_fast_rmq_unsorted() {
 
 #[test]
 fn test_iter() {
-    let rmq = FastRmq::from_vec(vec![1, 2, 3, 4, 5]);
+    let rmq = SmallRmq::from_vec(vec![1, 2, 3, 4, 5]);
     let mut iter = rmq.iter();
     assert_eq!(iter.next(), Some(&1));
     assert_eq!(iter.next(), Some(&2));
@@ -100,7 +100,7 @@ fn test_iter() {
 
 #[test]
 fn test_range_operators() {
-    let rmq = FastRmq::from_vec(vec![5, 4, 3, 2, 1]);
+    let rmq = SmallRmq::from_vec(vec![5, 4, 3, 2, 1]);
     assert_eq!(rmq.range_min(0, 3), 3);
     assert_eq!(rmq.range_min_with_range(0..3), 2);
     assert_eq!(rmq.range_min_with_range(0..=3), 3);
@@ -108,7 +108,7 @@ fn test_range_operators() {
 
 #[test]
 fn test_empty_rmq() {
-    let _rmq = FastRmq::from_vec(Vec::<u64>::new());
+    let _rmq = SmallRmq::from_vec(Vec::<u64>::new());
     // calling functions on an empty rmq will panic because the upper bound is inclusive, but there
     // is no valid index in an empty array, so we can't test anything else
 }
diff --git a/src/trees/bp/builder.rs b/src/trees/bp/builder.rs
index 9baa122..7553925 100644
--- a/src/trees/bp/builder.rs
+++ b/src/trees/bp/builder.rs
@@ -55,7 +55,7 @@ impl<const BLOCK_SIZE: u64> TreeBuilder for BpBuilder<BLOCK_SIZE> {
         if self.excess != 0 {
             Err(self.excess)
         } else {
-            Ok(BpTree::from_bit_vector(self.bit_vec))
+            Ok(BpTree::from_bit_vec(self.bit_vec))
         }
     }
 }
diff --git a/src/trees/bp/mod.rs b/src/trees/bp/mod.rs
index 89b0d82..321e1e0 100644
--- a/src/trees/bp/mod.rs
+++ b/src/trees/bp/mod.rs
@@ -3,7 +3,7 @@
 //! time, as well as subtree size, level-order, and ancestor queries in `O(log n)` time.
 //! The tree is succinct (ideally sublinear space overhead) and pointer-less.
 
-use crate::bit_vec::fast_rs_vec::SelectIntoIter;
+use crate::bit_vec::rs::SelectIntoIter;
 use crate::trees::mmt::MinMaxTree;
 use crate::trees::{IsAncestor, LevelTree, SubtreeSize, Tree};
 use crate::{BitVec, RsVec};
@@ -122,7 +122,7 @@ use lookup_query::{process_block_bwd, process_block_fwd, LOOKUP_BLOCK_SIZE};
 /// # #![allow(long_running_const_eval)]
 /// use vers_vecs::{BitVec, BpTree, Tree};
 /// let bv = BitVec::pack_sequence_u8(&[0b1101_0111, 0b0010_0100], 8);
-/// let tree = BpTree::<4>::from_bit_vector(bv);
+/// let tree = BpTree::<4>::from_bit_vec(bv);
 ///
 /// let nodes = tree.dfs_iter().collect::<Vec<_>>();
 /// assert_eq!(nodes, vec![0, 1, 2, 4, 6, 7, 10, 13]);
@@ -147,7 +147,7 @@ pub struct BpTree<const BLOCK_SIZE: u64 = DEFAULT_BLOCK_SIZE> {
 impl<const BLOCK_SIZE: u64> BpTree<BLOCK_SIZE> {
     /// Construct a new `BpTree` from a given bit vector.
     #[must_use]
-    pub fn from_bit_vector(bv: BitVec) -> Self {
+    pub fn from_bit_vec(bv: BitVec) -> Self {
         let min_max_tree = MinMaxTree::excess_tree(&bv, BLOCK_SIZE);
         let vec = bv.into();
         Self { vec, min_max_tree }
@@ -588,7 +588,7 @@ impl<const BLOCK_SIZE: u64> BpTree<BLOCK_SIZE> {
     /// use vers_vecs::{BitVec, RsVec, BpTree, Tree};
     ///
     /// let bv = BitVec::pack_sequence_u8(&[0b1101_0111, 0b0010_0100], 8);
-    /// let tree = BpTree::<4>::from_bit_vector(bv);
+    /// let tree = BpTree::<4>::from_bit_vec(bv);
     /// assert_eq!(tree.size(), 8);
     ///
     /// let rs_vec = tree.into_parentheses_vec();
@@ -596,7 +596,7 @@ impl<const BLOCK_SIZE: u64> BpTree<BLOCK_SIZE> {
     ///
     /// bv.flip_bit(15);
     /// bv.append_bits(0, 2);
-    /// let tree = BpTree::<4>::from_bit_vector(bv);
+    /// let tree = BpTree::<4>::from_bit_vec(bv);
     /// assert_eq!(tree.size(), 9);
     /// ```
     #[must_use]
@@ -813,7 +813,7 @@ impl<const BLOCK_SIZE: u64> IntoIterator for BpTree<BLOCK_SIZE> {
 
 impl<const BLOCK_SIZE: u64> From<BitVec> for BpTree<BLOCK_SIZE> {
     fn from(bv: BitVec) -> Self {
-        Self::from_bit_vector(bv)
+        Self::from_bit_vec(bv)
     }
 }
 
diff --git a/src/trees/bp/tests.rs b/src/trees/bp/tests.rs
index 4d9acdb..c6b389d 100644
--- a/src/trees/bp/tests.rs
+++ b/src/trees/bp/tests.rs
@@ -6,13 +6,13 @@ use rand::{RngCore, SeedableRng};
 #[test]
 fn test_fwd_search() {
     #[rustfmt::skip]
-    let bv = BitVec::from_bits(&[
+    let bv = BitVec::from_bits_u8(&[
         1, 1, 1, 1, 0, 0, 1, 1,
         0, 1, 0, 0, 1, 0, 1, 0,
         1, 0, 1, 0, 1, 0, 0, 0,
     ]);
 
-    let bp_tree = BpTree::<8>::from_bit_vector(bv);
+    let bp_tree = BpTree::<8>::from_bit_vec(bv);
 
     // search within block
     assert_eq!(bp_tree.fwd_search(3, -1), Some(4));
@@ -33,13 +33,13 @@ fn test_fwd_search() {
 #[test]
 fn test_fwd_single_block() {
     #[rustfmt::skip]
-    let bv = BitVec::from_bits(&[
+    let bv = BitVec::from_bits_u8(&[
         1, 1, 1, 1, 0, 0, 1, 1,
         0, 1, 0, 0, 1, 0, 1, 0,
         1, 0, 1, 0, 1, 0, 0, 0,
     ]);
 
-    let bp_tree = BpTree::<512>::from_bit_vector(bv);
+    let bp_tree = BpTree::<512>::from_bit_vec(bv);
 
     assert_eq!(bp_tree.fwd_search(3, -1), Some(4));
     assert_eq!(bp_tree.fwd_search(2, -1), Some(5));
@@ -55,13 +55,13 @@ fn test_fwd_single_block() {
 #[test]
 fn test_fwd_illegal_queries() {
     #[rustfmt::skip]
-    let bv = BitVec::from_bits(&[
+    let bv = BitVec::from_bits_u8(&[
         1, 1, 1, 1, 0, 0, 1, 1,
         0, 1, 0, 0, 1, 0, 1, 0,
         1, 0, 1, 0, 1, 0, 0, 0,
     ]);
 
-    let tree = BpTree::<8>::from_bit_vector(bv.clone());
+    let tree = BpTree::<8>::from_bit_vec(bv.clone());
 
     assert_eq!(tree.fwd_search(24, 0), None);
     assert_eq!(tree.fwd_search(25, 0), None);
@@ -69,7 +69,7 @@ fn test_fwd_illegal_queries() {
     assert_eq!(tree.fwd_search(0, -2), None);
     assert_eq!(tree.fwd_search(22, 1), None);
 
-    let tree = BpTree::<64>::from_bit_vector(bv);
+    let tree = BpTree::<64>::from_bit_vec(bv);
 
     assert_eq!(tree.fwd_search(24, 0), None);
     assert_eq!(tree.fwd_search(25, 0), None);
@@ -82,13 +82,13 @@ fn test_fwd_illegal_queries() {
 fn test_fwd_unbalanced_expression() {
     // test whether forward search works with unbalanced parenthesis expressions
     #[rustfmt::skip]
-    let bv = BitVec::from_bits(&[
+    let bv = BitVec::from_bits_u8(&[
         1, 1, 1, 1, 0, 0, 1, 1,
         0, 1, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 1,
     ]);
 
-    let tree = BpTree::<8>::from_bit_vector(bv);
+    let tree = BpTree::<8>::from_bit_vec(bv);
 
     assert_eq!(tree.fwd_search(0, -1), Some(13));
     assert_eq!(tree.fwd_search(1, -1), Some(12));
@@ -99,8 +99,8 @@ fn test_fwd_unbalanced_expression() {
 
 #[test]
 fn test_fwd_block_boundary() {
-    let bv = BitVec::from_bits(&[1, 1, 0, 1, 0, 0]);
-    let tree = BpTree::<4>::from_bit_vector(bv);
+    let bv = BitVec::from_bits_u8(&[1, 1, 0, 1, 0, 0]);
+    let tree = BpTree::<4>::from_bit_vec(bv);
 
     // test if a query returns the correct result if the result is the first bit in a block
     // and not in the initial block
@@ -113,8 +113,8 @@ fn test_fwd_block_boundary() {
 
 #[test]
 fn test_fwd_negative_block() {
-    let bv = BitVec::from_bits(&[1, 1, 1, 1, 0, 0, 0, 0]);
-    let tree = BpTree::<2>::from_bit_vector(bv);
+    let bv = BitVec::from_bits_u8(&[1, 1, 1, 1, 0, 0, 0, 0]);
+    let tree = BpTree::<2>::from_bit_vec(bv);
 
     // regression: test if a query correctly returns none (instead of crashing) if the following
     // block has a negative maximum excess (as a previous bug clamped it to 0).
@@ -127,28 +127,28 @@ fn test_fwd_last_element() {
     // the binary mM tree right of it may be uninitialized, and so not ending the query early
     // may yield invalid results or break assertions
     #[rustfmt::skip]
-    let bv = BitVec::from_bits(&[
+    let bv = BitVec::from_bits_u8(&[
         1, 1, 0, 1,  0, 1, 0, 1,
         0, 1, 0, 1,  0, 1, 0, 1,
         0, 1, 0, 1,  0, 1, 0, 1,
     ]);
 
-    let tree = BpTree::<4>::from_bit_vector(bv);
+    let tree = BpTree::<4>::from_bit_vec(bv);
     assert!(tree.fwd_search(23, 0).is_none());
 }
 
 #[test]
 fn test_lookup_extreme_pop() {
     // test whether a table lookup works if the bit pattern is only ones or only zeros
-    let bv = BitVec::from_bits(&[1; 64]);
-    let tree = BpTree::<512>::from_bit_vector(bv);
+    let bv = BitVec::from_bits_u8(&[1; 64]);
+    let tree = BpTree::<512>::from_bit_vec(bv);
 
     for excess in 1..64 {
         assert_eq!(tree.fwd_search(0, excess), Some(excess as u64));
     }
 
-    let bv = BitVec::from_bits(&[0; 64]);
-    let tree = BpTree::<512>::from_bit_vector(bv);
+    let bv = BitVec::from_bits_u8(&[0; 64]);
+    let tree = BpTree::<512>::from_bit_vec(bv);
 
     for excess in 1..64 {
         assert_eq!(tree.fwd_search(0, -excess), Some(excess as u64));
@@ -182,7 +182,7 @@ fn test_fwd_fuzzy() {
         }
     }
 
-    let bp = BpTree::<128>::from_bit_vector(bit_vec);
+    let bp = BpTree::<128>::from_bit_vec(bit_vec);
 
     // test any query from valid nodes with the given relative excess values
     for relative_excess in [-3, -2, -1, 0, 1, 2, 3] {
@@ -209,13 +209,13 @@ fn test_fwd_fuzzy() {
 #[test]
 fn test_bwd_search() {
     #[rustfmt::skip]
-    let bv = BitVec::from_bits(&[
+    let bv = BitVec::from_bits_u8(&[
         1, 1, 1, 1, 0, 0, 1, 1,
         0, 1, 0, 0, 1, 0, 1, 0,
         1, 0, 1, 0, 1, 0, 0, 0,
     ]);
 
-    let bp_tree = BpTree::<8>::from_bit_vector(bv);
+    let bp_tree = BpTree::<8>::from_bit_vec(bv);
 
     // search within block
     assert_eq!(bp_tree.bwd_search(4, -1), Some(3));
@@ -236,13 +236,13 @@ fn test_bwd_search() {
 #[test]
 fn test_bwd_single_block() {
     #[rustfmt::skip]
-    let bv = BitVec::from_bits(&[
+    let bv = BitVec::from_bits_u8(&[
         1, 1, 1, 1, 0, 0, 1, 1,
         0, 1, 0, 0, 1, 0, 1, 0,
         1, 0, 1, 0, 1, 0, 0, 0,
     ]);
 
-    let bp_tree = BpTree::<512>::from_bit_vector(bv);
+    let bp_tree = BpTree::<512>::from_bit_vec(bv);
 
     assert_eq!(bp_tree.bwd_search(4, -1), Some(3));
     assert_eq!(bp_tree.bwd_search(5, -1), Some(2));
@@ -258,13 +258,13 @@ fn test_bwd_single_block() {
 #[test]
 fn test_bwd_illegal_queries() {
     #[rustfmt::skip]
-    let bv = BitVec::from_bits(&[
+    let bv = BitVec::from_bits_u8(&[
         1, 1, 1, 1, 0, 0, 1, 1,
         0, 1, 0, 0, 1, 0, 1, 0,
         1, 0, 1, 0, 1, 0, 0, 0,
     ]);
 
-    let tree = BpTree::<8>::from_bit_vector(bv.clone());
+    let tree = BpTree::<8>::from_bit_vec(bv.clone());
 
     assert_eq!(tree.bwd_search(0, 0), None);
     assert_eq!(tree.bwd_search(1, 0), None);
@@ -272,7 +272,7 @@ fn test_bwd_illegal_queries() {
     assert_eq!(tree.bwd_search(23, -2), None);
     assert_eq!(tree.bwd_search(22, -3), None);
 
-    let tree = BpTree::<64>::from_bit_vector(bv);
+    let tree = BpTree::<64>::from_bit_vec(bv);
 
     assert_eq!(tree.bwd_search(0, 0), None);
     assert_eq!(tree.bwd_search(1, 0), None);
@@ -285,8 +285,8 @@ fn test_bwd_illegal_queries() {
 fn test_bwd_left_block_boundary() {
     // test if a query returns the correct result if the result is the first bit after
     // a block boundary (the left-most one even for backward search)
-    let bv = BitVec::from_bits(&[1, 1, 0, 1, 0, 0]);
-    let tree = BpTree::<4>::from_bit_vector(bv);
+    let bv = BitVec::from_bits_u8(&[1, 1, 0, 1, 0, 0]);
+    let tree = BpTree::<4>::from_bit_vec(bv);
 
     assert_eq!(tree.bwd_search(5, 0), Some(3));
 }
@@ -294,12 +294,12 @@ fn test_bwd_left_block_boundary() {
 #[test]
 fn test_bwd_right_block_boundary() {
     #[rustfmt::skip]
-    let bv = BitVec::from_bits(&[
+    let bv = BitVec::from_bits_u8(&[
         1, 1, 1, 1, 1, 1, 1, 1,
         0, 0, 0, 0,
     ]);
 
-    let bp_tree = BpTree::<4>::from_bit_vector(bv);
+    let bp_tree = BpTree::<4>::from_bit_vec(bv);
 
     // test the correct result is returned if result is exactly at a right block boundary
     assert_eq!(bp_tree.bwd_search(11, -1), Some(4));
@@ -307,8 +307,8 @@ fn test_bwd_right_block_boundary() {
 
 #[test]
 fn test_bwd_block_traversal() {
-    let bv = BitVec::from_bits(&[1, 1, 1, 1, 0]);
-    let tree = BpTree::<4>::from_bit_vector(bv);
+    let bv = BitVec::from_bits_u8(&[1, 1, 1, 1, 0]);
+    let tree = BpTree::<4>::from_bit_vec(bv);
 
     // if we request excess 0 backwards at a block boundary
     // we test if that actually traverses the vector instead of reporting
@@ -344,7 +344,7 @@ fn test_bwd_fuzzy() {
         }
     }
 
-    let bp = BpTree::<128>::from_bit_vector(bit_vec);
+    let bp = BpTree::<128>::from_bit_vec(bit_vec);
 
     // test any query from valid nodes with the given relative excess values
     for relative_excess in [-3, -2, -1, 0, 1, 2, 3] {
@@ -375,12 +375,12 @@ fn test_bwd_fuzzy() {
 
 #[test]
 fn test_close() {
-    let bv = BitVec::from_bits(&[
+    let bv = BitVec::from_bits_u8(&[
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]);
 
-    let tree = BpTree::<8>::from_bit_vector(bv);
+    let tree = BpTree::<8>::from_bit_vec(bv);
 
     for i in 0..24 {
         assert_eq!(tree.close(i), Some(47 - i));
@@ -391,12 +391,12 @@ fn test_close() {
 
 #[test]
 fn test_open() {
-    let bv = BitVec::from_bits(&[
+    let bv = BitVec::from_bits_u8(&[
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]);
 
-    let tree = BpTree::<8>::from_bit_vector(bv);
+    let tree = BpTree::<8>::from_bit_vec(bv);
 
     for i in 24..48 {
         assert_eq!(tree.open(i), Some(47 - i));
@@ -407,12 +407,12 @@ fn test_open() {
 
 #[test]
 fn test_enclose() {
-    let bv = BitVec::from_bits(&[
+    let bv = BitVec::from_bits_u8(&[
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]);
 
-    let tree = BpTree::<8>::from_bit_vector(bv);
+    let tree = BpTree::<8>::from_bit_vec(bv);
 
     for i in 1..24 {
         assert_eq!(tree.enclose(i), Some(i - 1));
@@ -431,11 +431,11 @@ fn test_enclose() {
 
 #[test]
 fn test_parent() {
-    let bv = BitVec::from_bits(&[
+    let bv = BitVec::from_bits_u8(&[
         1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,
     ]);
 
-    let tree = BpTree::<8>::from_bit_vector(bv.clone());
+    let tree = BpTree::<8>::from_bit_vec(bv.clone());
 
     assert_eq!(tree.excess(27), 0, "tree is not balanced");
 
@@ -459,9 +459,9 @@ fn test_parent() {
 
 #[test]
 fn test_children() {
-    let bv = BitVec::from_bits(&[1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0]);
+    let bv = BitVec::from_bits_u8(&[1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0]);
 
-    let tree = BpTree::<8>::from_bit_vector(bv);
+    let tree = BpTree::<8>::from_bit_vec(bv);
 
     assert_eq!(tree.excess(17), 0, "tree is not balanced");
     assert_eq!(tree.first_child(0), Some(1));
@@ -492,8 +492,8 @@ fn test_children() {
 fn test_contiguous_index() {
     // test whether `node_index` and `node_handle` return correct indices / node handles.
 
-    let bv = BitVec::from_bits(&[1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0]);
-    let tree = BpTree::<4>::from_bit_vector(bv.clone());
+    let bv = BitVec::from_bits_u8(&[1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0]);
+    let tree = BpTree::<4>::from_bit_vec(bv.clone());
     let rs: RsVec = bv.into();
 
     for (rank, index_in_bv) in rs.iter1().enumerate() {
@@ -504,13 +504,13 @@ fn test_contiguous_index() {
 
 #[test]
 fn test_depth() {
-    let bv = BitVec::from_bits(&[
+    let bv = BitVec::from_bits_u8(&[
         1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]);
 
     let mut depth = 0;
 
-    let tree = BpTree::<8>::from_bit_vector(bv.clone());
+    let tree = BpTree::<8>::from_bit_vec(bv.clone());
     for i in 0..24 {
         if bv.get(i) == Some(1) {
             assert_eq!(tree.depth(i), depth);
@@ -526,12 +526,12 @@ fn test_is_leaf() {
     let bits = vec![
         1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
     ];
-    let bv = BitVec::from_bits(&bits);
+    let bv = BitVec::from_bits_u8(&bits);
     let leaves = bits[..]
         .windows(2)
         .map(|window| window[0] == 1 && window[1] == 0)
         .collect::<Vec<_>>();
-    let tree = BpTree::<8>::from_bit_vector(bv.clone());
+    let tree = BpTree::<8>::from_bit_vec(bv.clone());
 
     for (idx, is_leaf) in leaves.iter().enumerate() {
         // if the bit is 1, check if that node is a leaf. If it's 0, it's not a valid node handle.
@@ -546,8 +546,8 @@ fn test_is_ancestor() {
     // (()((())()))
     // ab cde  f
     let bits = vec![1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0];
-    let bv = BitVec::from_bits(&bits);
-    let tree = BpTree::<8>::from_bit_vector(bv);
+    let bv = BitVec::from_bits_u8(&bits);
+    let tree = BpTree::<8>::from_bit_vec(bv);
     let a = tree.root().unwrap();
     let b = tree.first_child(a).unwrap();
     let c = tree.next_sibling(b).unwrap();
@@ -575,22 +575,22 @@ fn test_is_ancestor() {
 
 #[test]
 fn test_root() {
-    let bv = BitVec::from_bits(&[
+    let bv = BitVec::from_bits_u8(&[
         1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]);
-    let tree = BpTree::<8>::from_bit_vector(bv);
+    let tree = BpTree::<8>::from_bit_vec(bv);
     assert_eq!(tree.root(), Some(0));
     assert_eq!(tree.previous_sibling(0), None);
     assert_eq!(tree.next_sibling(0), None);
 
-    let tree = BpTree::<16>::from_bit_vector(BitVec::new());
+    let tree = BpTree::<16>::from_bit_vec(BitVec::new());
     assert_eq!(tree.root(), None);
 }
 
 #[test]
 fn test_level_ancestor() {
-    let bv = BitVec::from_bits(&[1, 1, 1, 0, 0, 1, 0, 0]);
-    let tree = BpTree::<4>::from_bit_vector(bv);
+    let bv = BitVec::from_bits_u8(&[1, 1, 1, 0, 0, 1, 0, 0]);
+    let tree = BpTree::<4>::from_bit_vec(bv);
 
     assert_eq!(tree.level_ancestor(2, 0), Some(2));
     assert_eq!(tree.level_ancestor(2, 1), Some(1));
@@ -604,10 +604,10 @@ fn test_level_ancestor() {
 
 #[test]
 fn test_level_next() {
-    let bv = BitVec::from_bits(&[
+    let bv = BitVec::from_bits_u8(&[
         1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, // intentionally unbalanced
     ]);
-    let tree = BpTree::<4>::from_bit_vector(bv);
+    let tree = BpTree::<4>::from_bit_vec(bv);
 
     assert_eq!(tree.level_next(0), None); // unbalanced query
     assert_eq!(tree.level_next(1), Some(5));
@@ -619,8 +619,8 @@ fn test_level_next() {
 
 #[test]
 fn test_level_prev() {
-    let bv = BitVec::from_bits(&[1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0]);
-    let tree = BpTree::<4>::from_bit_vector(bv);
+    let bv = BitVec::from_bits_u8(&[1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0]);
+    let tree = BpTree::<4>::from_bit_vec(bv);
 
     assert_eq!(tree.level_prev(0), None);
     assert_eq!(tree.level_prev(1), None);
@@ -635,8 +635,8 @@ fn test_level_prev() {
 
 #[test]
 fn test_level_leftmost() {
-    let bv = BitVec::from_bits(&[1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0]);
-    let tree = BpTree::<4>::from_bit_vector(bv);
+    let bv = BitVec::from_bits_u8(&[1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0]);
+    let tree = BpTree::<4>::from_bit_vec(bv);
 
     assert_eq!(tree.level_leftmost(0), Some(0));
     assert_eq!(tree.level_leftmost(1), Some(1));
@@ -648,8 +648,8 @@ fn test_level_leftmost() {
 
 #[test]
 fn test_level_rightmost() {
-    let bv = BitVec::from_bits(&[1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0]);
-    let tree = BpTree::<4>::from_bit_vector(bv);
+    let bv = BitVec::from_bits_u8(&[1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0]);
+    let tree = BpTree::<4>::from_bit_vec(bv);
 
     assert_eq!(tree.level_rightmost(0), Some(0));
     assert_eq!(tree.level_rightmost(1), Some(11));
@@ -661,8 +661,8 @@ fn test_level_rightmost() {
 
 #[test]
 fn test_subtree_size() {
-    let bv = BitVec::from_bits(&[1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0]);
-    let tree = BpTree::<4>::from_bit_vector(bv);
+    let bv = BitVec::from_bits_u8(&[1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0]);
+    let tree = BpTree::<4>::from_bit_vec(bv);
 
     assert_eq!(tree.subtree_size(0), Some(9));
     assert_eq!(tree.subtree_size(1), Some(2));
@@ -682,8 +682,8 @@ fn test_malformed_tree_positive() {
     // for further queries in a consistent state.
 
     // the tree has not enough closing brackets
-    let bv = BitVec::from_bits(&[1, 1, 1, 0, 1, 1, 0, 1, 1, 0]);
-    let tree = BpTree::<4>::from_bit_vector(bv);
+    let bv = BitVec::from_bits_u8(&[1, 1, 1, 0, 1, 1, 0, 1, 1, 0]);
+    let tree = BpTree::<4>::from_bit_vec(bv);
 
     test_all_functions(&tree);
 }
@@ -695,8 +695,8 @@ fn test_malformed_tree_negative() {
     // for further queries in a consistent state.
 
     // the tree has too many closing brackets
-    let bv = BitVec::from_bits(&[0, 0, 1, 1, 1, 0, 0, 0, 0, 0]);
-    let tree = BpTree::<4>::from_bit_vector(bv);
+    let bv = BitVec::from_bits_u8(&[0, 0, 1, 1, 1, 0, 0, 0, 0, 0]);
+    let tree = BpTree::<4>::from_bit_vec(bv);
 
     test_all_functions(&tree);
 }
@@ -707,8 +707,8 @@ fn test_negative_depth() {
     // most results are meaningless, but we don't want to panic and leave the data structure
     // for further queries in a consistent state.
 
-    let bv = BitVec::from_bits(&[0, 0, 0, 0, 1, 1, 0]);
-    let tree = BpTree::<4>::from_bit_vector(bv);
+    let bv = BitVec::from_bits_u8(&[0, 0, 0, 0, 1, 1, 0]);
+    let tree = BpTree::<4>::from_bit_vec(bv);
 
     assert_eq!(tree.depth(4), 0);
 }
@@ -768,7 +768,7 @@ fn fuzz_tree_navigation() {
         bit_vec.append_word(rng.next_u64());
     }
 
-    let tree = BpTree::<32>::from_bit_vector(bit_vec.clone());
+    let tree = BpTree::<32>::from_bit_vec(bit_vec.clone());
     let mut parent_stack = Vec::new();
 
     // keep track of last sibling for each node
@@ -833,7 +833,7 @@ fn fuzz_tree_navigation() {
 
 #[test]
 fn test_dfs_iterators() {
-    let tree = BpTree::<32>::from_bit_vector(BitVec::from_bits(&[
+    let tree = BpTree::<32>::from_bit_vec(BitVec::from_bits_u8(&[
         1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
     ]));
 
@@ -846,7 +846,7 @@ fn test_dfs_iterators() {
 
 #[test]
 fn test_subtree_iterators() {
-    let tree = BpTree::<4>::from_bit_vector(BitVec::from_bits(&[
+    let tree = BpTree::<4>::from_bit_vec(BitVec::from_bits_u8(&[
         1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
     ]));
 
@@ -877,7 +877,7 @@ fn test_subtree_iterators() {
 
 #[test]
 fn test_children_iterator() {
-    let tree = BpTree::<4>::from_bit_vector(BitVec::from_bits(&[
+    let tree = BpTree::<4>::from_bit_vec(BitVec::from_bits_u8(&[
         1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
     ]));
 
@@ -920,7 +920,7 @@ fn test_from_padded_bitvec() {
     bv.append_bit(0);
     bv.drop_last(1);
 
-    let tree = BpTree::<64>::from_bit_vector(bv.clone());
+    let tree = BpTree::<64>::from_bit_vec(bv.clone());
     assert_eq!(tree.root(), Some(0));
     assert_eq!(tree.size(), 1);
     assert_eq!(tree.fwd_search(0, 2), None);
diff --git a/src/trees/mmt.rs b/src/trees/mmt.rs
index 4817fb5..289383d 100644
--- a/src/trees/mmt.rs
+++ b/src/trees/mmt.rs
@@ -452,7 +452,7 @@ mod tests {
     #[test]
     fn test_simple_excess_tree() {
         #[rustfmt::skip]
-        let bv = BitVec::from_bits(&[
+        let bv = BitVec::from_bits_u8(&[
             1, 1, 1, 0, 0, 1, 1, 1,
             0, 1, 0, 1, 1, 1, 0, 0,
             1, 0, 0, 1, 0, 0, 0, 0,
@@ -510,7 +510,7 @@ mod tests {
         //  3  4 5 6
         //  /\/\/\/\
         // 7 8 9 10 11 12 - -
-        let bv = BitVec::from_bits(&[0; 48]);
+        let bv = BitVec::from_bits_u8(&[0; 48]);
         let tree = MinMaxTree::excess_tree(&bv, 8);
 
         assert_eq!(tree.nodes.len(), 13); // 6 leaves + 7 internal nodes
@@ -591,7 +591,7 @@ mod tests {
     #[test]
     fn test_simple_fwd_search() {
         #[rustfmt::skip]
-        let bv = BitVec::from_bits(&[
+        let bv = BitVec::from_bits_u8(&[
             1, 1, 1, 1, 1, 1, 1, 1,
             1, 1, 1, 1, 0, 0, 0, 0,
             0, 0, 0, 0, 0, 0, 0, 0,
@@ -630,7 +630,7 @@ mod tests {
     #[test]
     fn test_fwd_search_with_multiple_blocks() {
         #[rustfmt::skip]
-        let bv = BitVec::from_bits(&[
+        let bv = BitVec::from_bits_u8(&[
             1, 1, 1, 1, 1, 1, 1, 1,
             1, 1, 1, 1, 1, 0, 0, 0,
             1, 1, 1, 1, 1, 0, 0, 0,
@@ -656,7 +656,7 @@ mod tests {
     #[test]
     fn test_fwd_search_relative_offsets() {
         #[rustfmt::skip]
-        let bv = BitVec::from_bits(&[
+        let bv = BitVec::from_bits_u8(&[
             1, 1, 1, 0,
             1, 0, 1, 1, // excess 2
             1, 0, 1, 0, // min excess 0, max excess 1
@@ -675,7 +675,7 @@ mod tests {
     #[test]
     fn test_simple_bwd_search() {
         #[rustfmt::skip]
-        let bv = BitVec::from_bits(&[
+        let bv = BitVec::from_bits_u8(&[
             1, 1, 1, 1, 1, 1, 1, 1,
             1, 1, 1, 1, 0, 0, 0, 0,
             0, 0, 0, 0, 0, 0, 0, 0,
@@ -714,7 +714,7 @@ mod tests {
     #[test]
     fn test_bwd_search_with_multiple_blocks() {
         #[rustfmt::skip]
-        let bv = BitVec::from_bits(&[
+        let bv = BitVec::from_bits_u8(&[
             1, 1, 1, 1, 1, 1, 1, 1,
             1, 1, 1, 1, 1, 0, 0, 0,
             1, 1, 1, 1, 1, 0, 0, 0,
@@ -740,7 +740,7 @@ mod tests {
     #[test]
     fn test_bwd_search_relative_offsets() {
         #[rustfmt::skip]
-        let bv = BitVec::from_bits(&[
+        let bv = BitVec::from_bits_u8(&[
             1, 1, 1, 0,
             1, 0, 1, 1, // excess 2
             1, 0, 1, 0, // min excess 0, max excess 1
@@ -757,7 +757,7 @@ mod tests {
     #[test]
     fn test_incomplete_block() {
         #[rustfmt::skip]
-        let bv = BitVec::from_bits(&[
+        let bv = BitVec::from_bits_u8(&[
             1, 1, 1, 1, 1, 1, 1, 0,
             0, 0, 0, 0, 0, 0
         ]);
@@ -777,7 +777,7 @@ mod tests {
 
     #[test]
     fn test_single_block() {
-        let bv = BitVec::from_bits(&[1, 1, 1, 1, 0, 0, 0, 0]);
+        let bv = BitVec::from_bits_u8(&[1, 1, 1, 1, 0, 0, 0, 0]);
 
         let tree = MinMaxTree::excess_tree(&bv, 8);
 
@@ -787,12 +787,12 @@ mod tests {
     #[test]
     fn test_leaf_calculation() {
         // test small tree
-        let bv = BitVec::from_bits(&vec![0; 1000]);
+        let bv = BitVec::from_bits_u8(&vec![0; 1000]);
         let tree = MinMaxTree::excess_tree(&bv, 1200);
         assert_eq!(tree.first_leaf(), 1);
 
         // test very large tree
-        let bv = BitVec::from_bits(&vec![0; 1000]);
+        let bv = BitVec::from_bits_u8(&vec![0; 1000]);
         let tree = MinMaxTree::excess_tree(&bv, 4);
 
         assert_eq!(tree.first_leaf(), 255)
@@ -802,7 +802,7 @@ mod tests {
     fn test_relative_excess() {
         // test a tree with 3 layers and different downwards traversals
         #[rustfmt::skip]
-        let bv = BitVec::from_bits(&[
+        let bv = BitVec::from_bits_u8(&[
             1, 1, 1, 1, 1, 1, 1, 1,
             1, 1, 1, 1, 1, 1, 1, 1,
             1, 1, 1, 1, 1, 1, 1, 1,
diff --git a/src/util/elias_fano_iter.rs b/src/util/elias_fano_iter.rs
index 029faed..cd651e4 100644
--- a/src/util/elias_fano_iter.rs
+++ b/src/util/elias_fano_iter.rs
@@ -197,7 +197,7 @@ macro_rules! impl_ef_iterator {
         #[doc = concat!("This struct is created by the `into_iter` trait implementation of `", stringify!($type), "`.")]
         #[derive(Clone, Debug)]
         pub struct $own {
-            upper_iter: crate::bit_vec::fast_rs_vec::SelectIntoIter<false>,
+            upper_iter: crate::bit_vec::rs::SelectIntoIter<false>,
             vec: crate::bit_vec::BitVec,
             index: u64,
             // back index is none, iff it points to element -1 (i.e. element 0 has been consumed by
@@ -209,7 +209,7 @@ macro_rules! impl_ef_iterator {
 
         impl $own {
             #[must_use]
-            fn new(vec: crate::elias_fano::EliasFanoVec) -> Self {
+            fn new(vec: crate::ef::EliasFanoVec) -> Self {
                 if vec.is_empty() {
                     return Self {
                         upper_iter: vec.upper_vec.into_iter1(),
@@ -246,7 +246,7 @@ macro_rules! impl_ef_iterator {
         #[doc = concat!("This struct is created by the `iter` method of `", stringify!($type), "`.")]
         #[derive(Clone, Debug)]
         pub struct $bor<'a> {
-            upper_iter: crate::bit_vec::fast_rs_vec::SelectIter<'a, false>,
+            upper_iter: crate::bit_vec::rs::SelectIter<'a, false>,
             vec: &'a crate::bit_vec::BitVec,
             index: u64,
             // back index is none, iff it points to element -1 (i.e. element 0 has been consumed by
@@ -258,7 +258,7 @@ macro_rules! impl_ef_iterator {
 
         impl<'a> $bor<'a> {
             #[must_use]
-            fn new(vec: &'a crate::elias_fano::EliasFanoVec) -> Self {
+            fn new(vec: &'a crate::ef::EliasFanoVec) -> Self {
                 if vec.is_empty() {
                     return Self {
                         upper_iter: vec.upper_vec.iter1(),

From a562cbe2dcb8cb89fd4ce2a83d3bb456538e1946 Mon Sep 17 00:00:00 2001
From: Cydhra <ubezl@student.kit.edu>
Date: Mon, 20 Oct 2025 00:36:04 +0200
Subject: [PATCH 4/5] Change backing structs of our Data Structures to Box<[_]>
 (#38)

* change backing types of RsVec RMQ structs to Box<[_]>, and add migration guide
---
 migrate.md                | 16 +++++++++++++++-
 src/bit_vec/rs/mod.rs     | 18 +++++++++---------
 src/rmq/binary_rmq/mod.rs | 11 +++++++----
 src/rmq/fast_rmq/mod.rs   | 14 +++++++-------
 4 files changed, 38 insertions(+), 21 deletions(-)

diff --git a/migrate.md b/migrate.md
index f9f58fc..7b79581 100644
--- a/migrate.md
+++ b/migrate.md
@@ -10,4 +10,18 @@ The following structures and functions were renamed
 - `BinaryRmq` to `SparseRmq`
 - `BitVec::from_bits` to `BitVec::from_bits_u8`
 - module `fast_rs_vec` to `rs`
-- module `elias_fano` to `ef`
\ No newline at end of file
+- module `elias_fano` to `ef`
+
+## Changed Index Type
+All vector types that operate on bits or sub-byte words are now indexed by `u64` instead of `usize`, 
+allowing full utilization of the memory in 32-bit architectures.
+This affects `BitVec`, `RsVec`, `EliasFano`, `SparseRsVec`, `BpTree`, and `WaveletMatrix`
+This changes the parameter and return types of various functions on the affected types from `usize` to `u64`.
+The only adverse effect is that `len()` and `count()` of iterators over these data structures may panic if the
+iterator has more than `usize::MAX` elements.
+
+## Changed Backing Structures
+`RsVec`, `SparseRmq`, and `FastRmq` now use `Box<[_]>` instead of `Vec<_>` as backing structs, which reduces the stack 
+footprint.
+This breaks the serde-compatibility with already serialized data.
+It also changes the `Deref` implementation of the RMQ structs, which previously returned `Vec<_>`.
\ No newline at end of file
diff --git a/src/bit_vec/rs/mod.rs b/src/bit_vec/rs/mod.rs
index 3009500..840d4d3 100644
--- a/src/bit_vec/rs/mod.rs
+++ b/src/bit_vec/rs/mod.rs
@@ -85,11 +85,11 @@ struct SelectSuperBlockDescriptor {
 #[derive(Clone, Debug)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub struct RsVec {
-    data: Vec<u64>,
+    data: Box<[u64]>,
     len: u64,
-    blocks: Vec<BlockDescriptor>,
-    super_blocks: Vec<SuperBlockDescriptor>,
-    select_blocks: Vec<SelectSuperBlockDescriptor>,
+    blocks: Box<[BlockDescriptor]>,
+    super_blocks: Box<[SuperBlockDescriptor]>,
+    select_blocks: Box<[SelectSuperBlockDescriptor]>,
     pub(crate) rank0: u64,
     pub(crate) rank1: u64,
 }
@@ -214,11 +214,11 @@ impl RsVec {
         total_zeros += current_zeros;
 
         RsVec {
-            data: vec.data,
+            data: vec.data.into_boxed_slice(),
             len: vec.len,
-            blocks,
-            super_blocks,
-            select_blocks,
+            blocks: blocks.into_boxed_slice(),
+            super_blocks: super_blocks.into_boxed_slice(),
+            select_blocks: select_blocks.into_boxed_slice(),
             rank0: total_zeros,
             rank1: vec.len - total_zeros,
         }
@@ -409,7 +409,7 @@ impl RsVec {
     #[must_use]
     pub fn into_bit_vec(self) -> BitVec {
         BitVec {
-            data: self.data,
+            data: self.data.into_vec(),
             len: self.len,
         }
     }
diff --git a/src/rmq/binary_rmq/mod.rs b/src/rmq/binary_rmq/mod.rs
index 9191ca6..3bb1743 100644
--- a/src/rmq/binary_rmq/mod.rs
+++ b/src/rmq/binary_rmq/mod.rs
@@ -29,12 +29,12 @@ use std::ops::{Deref, RangeBounds};
 #[derive(Clone, Debug)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub struct SparseRmq {
-    data: Vec<u64>,
+    data: Box<[u64]>,
 
     // store indices relative to start of range. There is no way to have ranges exceeding 2^32 bits
     // but since we have fast_rmq for larger inputs, which does not have any downsides at that point,
     // we can just use u32 here (which gains cache efficiency for both implementations).
-    results: Vec<u32>,
+    results: Box<[u32]>,
 }
 
 impl SparseRmq {
@@ -102,7 +102,10 @@ impl SparseRmq {
             }
         }
 
-        Self { data, results }
+        Self {
+            data: data.into_boxed_slice(),
+            results: results.into_boxed_slice(),
+        }
     }
 
     /// Convenience function for [`SparseRmq::range_min`] for using range operators.
@@ -170,7 +173,7 @@ impl SparseRmq {
 /// indexing syntax on the RMQ data structure to access the underlying data, as well as iterators,
 /// etc.
 impl Deref for SparseRmq {
-    type Target = Vec<u64>;
+    type Target = Box<[u64]>;
 
     fn deref(&self) -> &Self::Target {
         &self.data
diff --git a/src/rmq/fast_rmq/mod.rs b/src/rmq/fast_rmq/mod.rs
index c91ad24..90fc779 100644
--- a/src/rmq/fast_rmq/mod.rs
+++ b/src/rmq/fast_rmq/mod.rs
@@ -78,10 +78,10 @@ struct Block {
 #[derive(Clone, Debug)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub struct SmallRmq {
-    data: Vec<u64>,
+    data: Box<[u64]>,
     block_minima: SparseRmq,
-    block_min_indices: Vec<u8>,
-    blocks: Vec<Block>,
+    block_min_indices: Box<[u8]>,
+    blocks: Box<[Block]>,
 }
 
 impl SmallRmq {
@@ -141,10 +141,10 @@ impl SmallRmq {
         });
 
         Self {
-            data,
+            data: data.into_boxed_slice(),
             block_minima: SparseRmq::from_vec(block_minima),
-            block_min_indices,
-            blocks,
+            block_min_indices: block_min_indices.into_boxed_slice(),
+            blocks: blocks.into_boxed_slice(),
         }
     }
 
@@ -291,7 +291,7 @@ impl SmallRmq {
 /// indexing syntax on the RMQ data structure to access the underlying data, as well as iterators,
 /// etc.
 impl Deref for SmallRmq {
-    type Target = Vec<u64>;
+    type Target = Box<[u64]>;
 
     fn deref(&self) -> &Self::Target {
         &self.data

From aa0d5b1e036158ec64c5c0489816d943864d0cd7 Mon Sep 17 00:00:00 2001
From: Johannes Hengstler <ubezl@student.kit.edu>
Date: Mon, 20 Oct 2025 00:50:53 +0200
Subject: [PATCH 5/5] renamed rmq modules to match struct names and added
 migration notes

---
 benches/rmq.rs                          | 2 +-
 migrate.md                              | 2 ++
 src/lib.rs                              | 5 ++---
 src/rmq/mod.rs                          | 6 +++---
 src/rmq/{fast_rmq => small}/mod.rs      | 2 +-
 src/rmq/{fast_rmq => small}/tests.rs    | 0
 src/rmq/{binary_rmq => sparse}/mod.rs   | 0
 src/rmq/{binary_rmq => sparse}/tests.rs | 2 +-
 8 files changed, 10 insertions(+), 9 deletions(-)
 rename src/rmq/{fast_rmq => small}/mod.rs (99%)
 rename src/rmq/{fast_rmq => small}/tests.rs (100%)
 rename src/rmq/{binary_rmq => sparse}/mod.rs (100%)
 rename src/rmq/{binary_rmq => sparse}/tests.rs (97%)

diff --git a/benches/rmq.rs b/benches/rmq.rs
index 81fae46..d70da24 100644
--- a/benches/rmq.rs
+++ b/benches/rmq.rs
@@ -1,7 +1,7 @@
 use criterion::{black_box, criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion};
 use rand::distributions::{Distribution, Uniform};
 use rand::Rng;
-use vers_vecs::rmq::fast_rmq::SmallRmq;
+use vers_vecs::rmq::small::SmallRmq;
 
 mod common;
 
diff --git a/migrate.md b/migrate.md
index 7b79581..e9b1160 100644
--- a/migrate.md
+++ b/migrate.md
@@ -11,6 +11,8 @@ The following structures and functions were renamed
 - `BitVec::from_bits` to `BitVec::from_bits_u8`
 - module `fast_rs_vec` to `rs`
 - module `elias_fano` to `ef`
+- module `fast_rmq` to `small`
+- module `binary_rmq` to `sparse`
 
 ## Changed Index Type
 All vector types that operate on bits or sub-byte words are now indexed by `u64` instead of `usize`, 
diff --git a/src/lib.rs b/src/lib.rs
index 3ee5f00..29e9152 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,5 +1,4 @@
 #![warn(missing_docs)]
-#![allow(clippy::module_name_repetitions)]
 #![allow(clippy::assertions_on_constants)] // for asserts warning about incompatible constant values
 #![allow(clippy::inline_always)] // we actually measure performance increases with most of these
 #![allow(clippy::cast_lossless)] // it is often more readable to use `as u64` instead of `u64::from(..)`
@@ -59,8 +58,8 @@ pub use bit_vec::rs::RsVec;
 pub use bit_vec::sparse::SparseRsVec;
 pub use bit_vec::BitVec;
 pub use ef::EliasFanoVec;
-pub use rmq::binary_rmq::SparseRmq;
-pub use rmq::fast_rmq::SmallRmq;
+pub use rmq::small::SmallRmq;
+pub use rmq::sparse::SparseRmq;
 pub use trees::bp::{BpBuilder, BpTree};
 pub use trees::{IsAncestor, LevelTree, SubtreeSize, Tree, TreeBuilder};
 pub use wavelet::WaveletMatrix;
diff --git a/src/rmq/mod.rs b/src/rmq/mod.rs
index e7064ce..196c901 100644
--- a/src/rmq/mod.rs
+++ b/src/rmq/mod.rs
@@ -1,7 +1,7 @@
 //! Range minimum query data structures. These data structures allow for the calculation of the index of the
 //! minimum element in a range of a static array in constant-time. The implementations are located
-//! in the [`binary_rmq`] and [`fast_rmq`] modules.
+//! in the [`sparse`] and [`small`] modules.
 
-pub mod fast_rmq;
+pub mod small;
 
-pub mod binary_rmq;
+pub mod sparse;
diff --git a/src/rmq/fast_rmq/mod.rs b/src/rmq/small/mod.rs
similarity index 99%
rename from src/rmq/fast_rmq/mod.rs
rename to src/rmq/small/mod.rs
index 90fc779..246c9b4 100644
--- a/src/rmq/fast_rmq/mod.rs
+++ b/src/rmq/small/mod.rs
@@ -7,7 +7,7 @@ use std::cmp::min_by;
 use std::mem::size_of;
 use std::ops::{Bound, Deref, RangeBounds};
 
-use crate::rmq::binary_rmq::SparseRmq;
+use crate::rmq::sparse::SparseRmq;
 use crate::util::pdep::Pdep;
 
 /// Size of the blocks the data is split into. One block is indexable with a u8, hence its size.
diff --git a/src/rmq/fast_rmq/tests.rs b/src/rmq/small/tests.rs
similarity index 100%
rename from src/rmq/fast_rmq/tests.rs
rename to src/rmq/small/tests.rs
diff --git a/src/rmq/binary_rmq/mod.rs b/src/rmq/sparse/mod.rs
similarity index 100%
rename from src/rmq/binary_rmq/mod.rs
rename to src/rmq/sparse/mod.rs
diff --git a/src/rmq/binary_rmq/tests.rs b/src/rmq/sparse/tests.rs
similarity index 97%
rename from src/rmq/binary_rmq/tests.rs
rename to src/rmq/sparse/tests.rs
index 8a8731f..05c8cd0 100644
--- a/src/rmq/binary_rmq/tests.rs
+++ b/src/rmq/sparse/tests.rs
@@ -1,4 +1,4 @@
-use crate::rmq::binary_rmq::SparseRmq;
+use crate::rmq::sparse::SparseRmq;
 use rand::RngCore;
 
 #[test]