diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index c0b0208..2daeb6f 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -31,4 +31,14 @@ jobs:
       - name: Build
         run: cargo build --verbose --features serde
       - name: Run tests
-        run: cargo test --verbose --features serde
\ No newline at end of file
+        run: cargo test --verbose --features serde
+
+  docs:
+    runs-on: ubuntu-latest
+    env:
+      RUSTFLAGS: -C target-cpu=x86-64
+      RUSTDOCFLAGS: -C target-cpu=x86-64
+    steps:
+      - uses: actions/checkout@v4
+      - name: Docs
+        run: cargo doc --verbose --all-features
\ No newline at end of file
diff --git a/Cargo.toml b/Cargo.toml
index 371da6e..7798dd4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "vers-vecs"
-version = "1.7.0"
+version = "1.8.1"
 edition = "2021"
 authors = ["Johannes \"Cydhra\" Hengstler"]
 description = "A collection of succinct data structures supported by fast implementations of rank and select queries."
diff --git a/benches/bp.rs b/benches/bp.rs
index c278694..e88c0de 100644
--- a/benches/bp.rs
+++ b/benches/bp.rs
@@ -11,7 +11,7 @@ use vers_vecs::trees::{Tree, TreeBuilder};
 
 mod common;
 
-const BLOCK_SIZE: usize = 1024;
+const BLOCK_SIZE: u64 = 1024;
 
 // TODO this function has nlogn runtime, which is a bit too much for the largest trees
 fn generate_tree<R: Rng>(rng: &mut R, nodes: u64) -> BpTree<BLOCK_SIZE> {
@@ -107,7 +107,7 @@ fn bench_navigation(b: &mut Criterion) {
         let mut rng = StdRng::from_seed([0; 32]);
 
         let bp = generate_tree(&mut rng, l as u64);
-        let node_handles = (0..l).map(|i| bp.node_handle(i)).collect::<Vec<_>>();
+        let node_handles = (0..l as u64).map(|i| bp.node_handle(i)).collect::<Vec<_>>();
 
         group.bench_with_input(BenchmarkId::new("parent", l), &l, |b, _| {
             b.iter_batched(
diff --git a/benches/elias_fano_iterator.rs b/benches/elias_fano_iterator.rs
index 774ec87..ad939ae 100644
--- a/benches/elias_fano_iterator.rs
+++ b/benches/elias_fano_iterator.rs
@@ -29,7 +29,7 @@ fn bench_ef(b: &mut Criterion) {
 
                 let start = Instant::now();
                 while i < iters {
-                    black_box(ef_vec.get_unchecked(i as usize % l));
+                    black_box(ef_vec.get_unchecked(i % l as u64));
                     i += 1;
                 }
                 time += start.elapsed();
diff --git a/benches/rmq.rs b/benches/rmq.rs
index a9506c6..d70da24 100644
--- a/benches/rmq.rs
+++ b/benches/rmq.rs
@@ -1,7 +1,7 @@
 use criterion::{black_box, criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion};
 use rand::distributions::{Distribution, Uniform};
 use rand::Rng;
-use vers_vecs::rmq::fast_rmq::FastRmq;
+use vers_vecs::rmq::small::SmallRmq;
 
 mod common;
 
@@ -12,7 +12,7 @@ fn bench_rmq(b: &mut Criterion) {
     group.plot_config(common::plot_config());
 
     for l in common::SIZES {
-        let rmq = FastRmq::from_vec(common::fill_random_vec(&mut rng, l));
+        let rmq = SmallRmq::from_vec(common::fill_random_vec(&mut rng, l));
         let sample = Uniform::new(0, rmq.len());
         group.bench_with_input(BenchmarkId::new("range_min", l), &l, |b, _| {
             b.iter_batched(
diff --git a/benches/select_adversarial.rs b/benches/select_adversarial.rs
index f70be47..070e90e 100644
--- a/benches/select_adversarial.rs
+++ b/benches/select_adversarial.rs
@@ -35,7 +35,7 @@ fn select_worst_case(b: &mut Criterion) {
         // construct a vector with only one select block and put its last one bit at the end
         // of the vector
 
-        let mut bit_vec = BitVec::with_capacity(length / 64);
+        let mut bit_vec = BitVec::with_capacity(length as u64 / 64);
         for _ in 0..(1usize << 13) / 64 - 1 {
             bit_vec.append_word(u64::MAX);
         }
diff --git a/benches/select_iter.rs b/benches/select_iter.rs
index 73be7d7..595838e 100644
--- a/benches/select_iter.rs
+++ b/benches/select_iter.rs
@@ -15,11 +15,11 @@ fn bench_select_iter(b: &mut Criterion) {
         group.bench_with_input(BenchmarkId::new("select queries", l), &l, |b, _| {
             b.iter_custom(|iters| {
                 let mut time = Duration::new(0, 0);
-                let mut i = 0usize;
+                let mut i = 0;
                 let rank1 = bit_vec.rank1(bit_vec.len());
 
                 let start = Instant::now();
-                while (i as u64) < iters {
+                while (i) < iters {
                     black_box(bit_vec.select1(i % rank1));
                     i += 1;
                 }
diff --git a/benches/sparse_equals.rs b/benches/sparse_equals.rs
index 9119652..7438fc1 100644
--- a/benches/sparse_equals.rs
+++ b/benches/sparse_equals.rs
@@ -22,14 +22,14 @@ pub const SIZES: [usize; 7] = [
 const FILL_FACTORS: [f64; 6] = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5];
 
 /// Generate a bitvector with `fill_factors` percent ones at random positions
-fn generate_vector_with_fill(rng: &mut ThreadRng, len: usize, fill_factor: f64) -> BitVec {
+fn generate_vector_with_fill(rng: &mut ThreadRng, len: u64, fill_factor: f64) -> BitVec {
     let mut bit_vec1 = BitVec::from_zeros(len);
 
     // flip exactly fill-factor * len bits so the equality check is not trivial
-    sample(rng, len, (fill_factor * len as f64) as usize)
+    sample(rng, len as usize, (fill_factor * len as f64) as usize)
         .iter()
         .for_each(|i| {
-            bit_vec1.flip_bit(i);
+            bit_vec1.flip_bit(i as u64);
         });
 
     bit_vec1
@@ -39,6 +39,7 @@ fn bench(b: &mut Criterion<TimeDiff>) {
     let mut rng = rand::thread_rng();
 
     for len in SIZES {
+        let len = len as u64;
         let mut group = b.benchmark_group(format!("Equals Benchmark: {}", len));
         group.plot_config(common::plot_config());
 
diff --git a/migrate.md b/migrate.md
new file mode 100644
index 0000000..e9b1160
--- /dev/null
+++ b/migrate.md
@@ -0,0 +1,29 @@
+# Migration Guide from 1.X to 2.0
+The following guide explains the changes from versions 1.X to the 2.0 release and points out what changes are necessary
+to downstream crates.
+
+## Renamed Members
+The following structures and functions were renamed
+- `BitVec::from_bit_vector` to `BitVec::from_bit_vec`
+- `SparseRSVec` to `SparseRsVec`
+- `FastRmq` to `SmallRmq`
+- `BinaryRmq` to `SparseRmq`
+- `BitVec::from_bits` to `BitVec::from_bits_u8`
+- module `fast_rs_vec` to `rs`
+- module `elias_fano` to `ef`
+- module `fast_rmq` to `small`
+- module `binary_rmq` to `sparse`
+
+## Changed Index Type
+All vector types that operate on bits or sub-byte words are now indexed by `u64` instead of `usize`, 
+allowing full utilization of the memory in 32-bit architectures.
+This affects `BitVec`, `RsVec`, `EliasFano`, `SparseRsVec`, `BpTree`, and `WaveletMatrix`
+This changes the parameter and return types of various functions on the affected types from `usize` to `u64`.
+The only adverse effect is that `len()` and `count()` of iterators over these data structures may panic if the
+iterator has more than `usize::MAX` elements.
+
+## Changed Backing Structures
+`RsVec`, `SparseRmq`, and `FastRmq` now use `Box<[_]>` instead of `Vec<_>` as backing structs, which reduces the stack 
+footprint.
+This breaks the serde-compatibility with already serialized data.
+It also changes the `Deref` implementation of the RMQ structs, which previously returned `Vec<_>`.
\ No newline at end of file
diff --git a/readme.md b/readme.md
index ea64ed1..d21edac 100644
--- a/readme.md
+++ b/readme.md
@@ -31,8 +31,6 @@ since the intrinsics speed up both `rank` and `select` operations by a factor of
 - `simd`: Enables the use of SIMD instructions for rank and select operations.
 This feature requires AVX-512 support and uses unsafe code.
 It also enables a special iterator for the rank/select bit vector that uses vectorized operations.
-The feature only works on nightly Rust.
-Enabling it on stable Rust is a no-op, because the required CPU features are not available there.
 - `serde`: Enables serialization and deserialization of the data structures using the `serde` crate.
 - `u16_lookup` Enables a larger lookup table for BP tree queries. The larger table requires 128 KiB instead of 4 KiB.
 
diff --git a/src/bit_vec/mask.rs b/src/bit_vec/mask.rs
index a146b24..a8be369 100644
--- a/src/bit_vec/mask.rs
+++ b/src/bit_vec/mask.rs
@@ -48,7 +48,7 @@ where
     /// If the position is larger than the length of the vector, None is returned.
     #[inline]
     #[must_use]
-    pub fn get(&self, pos: usize) -> Option<u64> {
+    pub fn get(&self, pos: u64) -> Option<u64> {
         if pos >= self.vec.len {
             None
         } else {
@@ -67,10 +67,10 @@ where
     /// [`get`]: MaskedBitVec::get
     #[inline]
     #[must_use]
-    pub fn get_unchecked(&self, pos: usize) -> u64 {
+    pub fn get_unchecked(&self, pos: u64) -> u64 {
         ((self.bin_op)(
-            self.vec.data[pos / WORD_SIZE],
-            self.mask.data[pos / WORD_SIZE],
+            self.vec.data[(pos / WORD_SIZE) as usize],
+            self.mask.data[(pos / WORD_SIZE) as usize],
         ) >> (pos % WORD_SIZE))
             & 1
     }
@@ -79,7 +79,7 @@ where
     /// If the position is larger than the length of the vector, None is returned.
     #[inline]
     #[must_use]
-    pub fn is_bit_set(&self, pos: usize) -> Option<bool> {
+    pub fn is_bit_set(&self, pos: u64) -> Option<bool> {
         if pos >= self.vec.len {
             None
         } else {
@@ -97,7 +97,7 @@ where
     /// [`is_bit_set`]: MaskedBitVec::is_bit_set
     #[inline]
     #[must_use]
-    pub fn is_bit_set_unchecked(&self, pos: usize) -> bool {
+    pub fn is_bit_set_unchecked(&self, pos: u64) -> bool {
         self.get_unchecked(pos) != 0
     }
 
@@ -108,7 +108,7 @@ where
     /// If the length of the query is larger than 64, None is returned.
     #[inline]
     #[must_use]
-    pub fn get_bits(&self, pos: usize, len: usize) -> Option<u64> {
+    pub fn get_bits(&self, pos: u64, len: u64) -> Option<u64> {
         if len > WORD_SIZE || len == 0 {
             return None;
         }
@@ -138,12 +138,13 @@ where
     #[must_use]
     #[allow(clippy::inline_always)]
     #[allow(clippy::comparison_chain)] // rust-clippy #5354
+    #[allow(clippy::cast_possible_truncation)] // safe due to the division
     #[inline]
-    pub fn get_bits_unchecked(&self, pos: usize, len: usize) -> u64 {
+    pub fn get_bits_unchecked(&self, pos: u64, len: u64) -> u64 {
         debug_assert!(len <= WORD_SIZE);
         let partial_word = (self.bin_op)(
-            self.vec.data[pos / WORD_SIZE],
-            self.mask.data[pos / WORD_SIZE],
+            self.vec.data[(pos / WORD_SIZE) as usize],
+            self.mask.data[(pos / WORD_SIZE) as usize],
         ) >> (pos % WORD_SIZE);
 
         if pos % WORD_SIZE + len == WORD_SIZE {
@@ -152,8 +153,8 @@ where
             partial_word & ((1 << (len % WORD_SIZE)) - 1)
         } else {
             let next_half = (self.bin_op)(
-                self.vec.data[pos / WORD_SIZE + 1],
-                self.mask.data[pos / WORD_SIZE + 1],
+                self.vec.data[(pos / WORD_SIZE + 1) as usize],
+                self.mask.data[(pos / WORD_SIZE + 1) as usize],
             ) << (WORD_SIZE - pos % WORD_SIZE);
 
             (partial_word | next_half) & ((1 << (len % WORD_SIZE)) - 1)
@@ -167,7 +168,7 @@ where
     #[inline]
     #[must_use]
     pub fn count_zeros(&self) -> u64 {
-        self.vec.len as u64 - self.count_ones()
+        self.vec.len - self.count_ones()
     }
 
     /// Return the number of ones in the masked bit vector.
@@ -177,10 +178,10 @@ where
     pub fn count_ones(&self) -> u64 {
         let mut ones = self
             .iter_limbs()
-            .take(self.vec.len / WORD_SIZE)
+            .take((self.vec.len / WORD_SIZE) as usize)
             .map(|limb| u64::from(limb.count_ones()))
             .sum();
-        if self.vec.len % WORD_SIZE > 0 {
+        if !self.vec.len.is_multiple_of(WORD_SIZE) {
             ones += u64::from(
                 ((self.bin_op)(
                     *self.vec.data.last().unwrap(),
diff --git a/src/bit_vec/mod.rs b/src/bit_vec/mod.rs
index 056091e..9c7fa73 100644
--- a/src/bit_vec/mod.rs
+++ b/src/bit_vec/mod.rs
@@ -7,14 +7,14 @@ use std::cmp::min;
 use std::hash::{Hash, Hasher};
 use std::mem::size_of;
 
-pub mod fast_rs_vec;
+pub mod rs;
 
 pub mod sparse;
 
 pub mod mask;
 
 /// Size of a word in bitvectors. All vectors operate on 64-bit words.
-const WORD_SIZE: usize = 64;
+const WORD_SIZE: u64 = 64;
 
 /// Type alias for masked bitvectors that implement a simple bitwise binary operation.
 /// The first lifetime is for the bit vector that is being masked, the second lifetime is for the
@@ -29,7 +29,7 @@ pub type BitMask<'s, 'b> = MaskedBitVec<'s, 'b, fn(u64, u64) -> u64>;
 /// The bit vector has a wide range of constructors that allow for easy creation from various
 /// sources.
 /// Among them are constructors for creating an empty vector ([`BitVec::new`]),
-/// creating one from single bits of various integer types ([`BitVec::from_bits`] and variations),
+/// creating one from single bits of various integer types ([`BitVec::from_bits_u8`] and variations),
 /// creating limbs from u64 values directly ([`BitVec::from_limbs`] and variations),
 /// or packing a sequence of numerical values into a dense bit sequence
 /// ([`BitVec::pack_sequence_u64`] and variations).
@@ -60,7 +60,7 @@ pub type BitMask<'s, 'b> = MaskedBitVec<'s, 'b, fn(u64, u64) -> u64>;
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub struct BitVec {
     data: Vec<u64>,
-    len: usize,
+    len: u64,
 }
 
 impl BitVec {
@@ -75,9 +75,10 @@ impl BitVec {
     /// The bit vector will be able to hold at least `capacity` bits without reallocating.
     /// More memory may be allocated according to the underlying allocation strategy.
     #[must_use]
-    pub fn with_capacity(capacity: usize) -> Self {
+    pub fn with_capacity(capacity: u64) -> Self {
         Self {
-            data: Vec::with_capacity(capacity / WORD_SIZE + 1),
+            #[allow(clippy::cast_possible_truncation)] // safe due to the division
+            data: Vec::with_capacity((capacity / WORD_SIZE + 1) as usize),
             len: 0,
         }
     }
@@ -85,22 +86,19 @@ impl BitVec {
     /// Create a new bit vector with all zeros and the given length.
     /// The length is measured in bits.
     #[must_use]
-    pub fn from_zeros(len: usize) -> Self {
-        let mut data = vec![0; len / WORD_SIZE];
-        if len % WORD_SIZE != 0 {
-            data.push(0);
-        }
+    pub fn from_zeros(len: u64) -> Self {
+        #[allow(clippy::cast_possible_truncation)] // safe due to the division
+        let data = vec![0; len.div_ceil(WORD_SIZE) as usize];
         Self { data, len }
     }
 
     /// Create a new bit vector with all ones and the given length.
     /// The length is measured in bits.
     #[must_use]
-    pub fn from_ones(len: usize) -> Self {
-        let mut data = vec![u64::MAX; len / WORD_SIZE];
-        if len % WORD_SIZE != 0 {
-            data.push((1 << (len % WORD_SIZE)) - 1);
-        }
+    pub fn from_ones(len: u64) -> Self {
+        // junk data is allowed to be any bit
+        #[allow(clippy::cast_possible_truncation)] // safe due to the division
+        let data = vec![u64::MAX; len.div_ceil(WORD_SIZE) as usize];
         Self { data, len }
     }
 
@@ -116,7 +114,7 @@ impl BitVec {
     /// use vers_vecs::BitVec;
     ///
     /// let bits: &[u8] = &[1, 0, 1, 1, 1, 1];
-    /// let bv = BitVec::from_bits(&bits);
+    /// let bv = BitVec::from_bits_u8(&bits);
     ///
     /// assert_eq!(bv.len(), 6);
     /// assert_eq!(bv.get_bits(0, 6), Some(0b111101u64));
@@ -127,8 +125,8 @@ impl BitVec {
     /// [`from_bits_u64`]: BitVec::from_bits_u64
     /// [`from_bits_iter`]: BitVec::from_bits_iter
     #[must_use]
-    pub fn from_bits(bits: &[u8]) -> Self {
-        let mut bv = Self::with_capacity(bits.len());
+    pub fn from_bits_u8(bits: &[u8]) -> Self {
+        let mut bv = Self::with_capacity(bits.len() as u64);
         bits.iter().for_each(|&b| bv.append_bit(b.into()));
         bv
     }
@@ -138,15 +136,15 @@ impl BitVec {
     /// bit vector.
     /// All other bits are ignored.
     ///
-    /// See also: [`from_bits`], [`from_bits_u32`], [`from_bits_u64`], [`from_bits_iter`]
+    /// See also: [`from_bits_u8`], [`from_bits_u32`], [`from_bits_u64`], [`from_bits_iter`]
     ///
-    /// [`from_bits`]: BitVec::from_bits
+    /// [`from_bits_u8`]: BitVec::from_bits_u8
     /// [`from_bits_u32`]: BitVec::from_bits_u32
     /// [`from_bits_u64`]: BitVec::from_bits_u64
     /// [`from_bits_iter`]: BitVec::from_bits_iter
     #[must_use]
     pub fn from_bits_u16(bits: &[u16]) -> Self {
-        let mut bv = Self::with_capacity(bits.len());
+        let mut bv = Self::with_capacity(bits.len() as u64);
         bits.iter().for_each(|&b| bv.append_bit_u16(b));
         bv
     }
@@ -156,15 +154,15 @@ impl BitVec {
     /// bit vector.
     /// All other bits are ignored.
     ///
-    /// See also: [`from_bits`], [`from_bits_u16`], [`from_bits_u64`], [`from_bits_iter`]
+    /// See also: [`from_bits_u8`], [`from_bits_u16`], [`from_bits_u64`], [`from_bits_iter`]
     ///
-    /// [`from_bits`]: BitVec::from_bits
+    /// [`from_bits_u8`]: BitVec::from_bits_u8
     /// [`from_bits_u16`]: BitVec::from_bits_u16
     /// [`from_bits_u64`]: BitVec::from_bits_u64
     /// [`from_bits_iter`]: BitVec::from_bits_iter
     #[must_use]
     pub fn from_bits_u32(bits: &[u32]) -> Self {
-        let mut bv = Self::with_capacity(bits.len());
+        let mut bv = Self::with_capacity(bits.len() as u64);
         bits.iter().for_each(|&b| bv.append_bit_u32(b));
         bv
     }
@@ -174,15 +172,15 @@ impl BitVec {
     /// bit vector.
     /// All other bits are ignored.
     ///
-    /// See also: [`from_bits`], [`from_bits_u16`], [`from_bits_u32`], [`from_bits_iter`]
+    /// See also: [`from_bits_u8`], [`from_bits_u16`], [`from_bits_u32`], [`from_bits_iter`]
     ///
-    /// [`from_bits`]: BitVec::from_bits
+    /// [`from_bits_u8`]: BitVec::from_bits_u8
     /// [`from_bits_u16`]: BitVec::from_bits_u16
     /// [`from_bits_u32`]: BitVec::from_bits_u32
     /// [`from_bits_iter`]: BitVec::from_bits_iter
     #[must_use]
     pub fn from_bits_u64(bits: &[u64]) -> Self {
-        let mut bv = Self::with_capacity(bits.len());
+        let mut bv = Self::with_capacity(bits.len() as u64);
         bits.iter().for_each(|&b| bv.append_bit(b));
         bv
     }
@@ -193,7 +191,7 @@ impl BitVec {
     /// All other bits are ignored.
     /// The iterator must yield values that can be converted into u64 values.
     ///
-    /// See also: [`from_bits`], [`from_bits_u16`], [`from_bits_u32`], [`from_bits_u64`]
+    /// See also: [`from_bits_u8`], [`from_bits_u16`], [`from_bits_u32`], [`from_bits_u64`]
     ///
     /// # Example
     /// ```rust
@@ -210,7 +208,7 @@ impl BitVec {
     /// assert_eq!(bv, bv2);
     /// ```
     ///
-    /// [`from_bits`]: BitVec::from_bits
+    /// [`from_bits_u8`]: BitVec::from_bits_u8
     /// [`from_bits_u16`]: BitVec::from_bits_u16
     /// [`from_bits_u32`]: BitVec::from_bits_u32
     /// [`from_bits_u64`]: BitVec::from_bits_u64
@@ -221,7 +219,7 @@ impl BitVec {
         I: IntoIterator<Item = E>,
     {
         let iter = iter.into_iter();
-        let mut bv = Self::with_capacity(iter.size_hint().0);
+        let mut bv = Self::with_capacity(iter.size_hint().0 as u64);
         for bit in iter {
             bv.append_bit(bit.into());
         }
@@ -253,7 +251,7 @@ impl BitVec {
     /// [`from_limbs_iter`]: BitVec::from_limbs_iter
     #[must_use]
     pub fn from_limbs(words: &[u64]) -> Self {
-        let len = words.len() * WORD_SIZE;
+        let len = words.len() as u64 * WORD_SIZE;
         Self {
             data: words.to_vec(),
             len,
@@ -318,15 +316,15 @@ impl BitVec {
     /// [`from_limbs_iter`]: BitVec::from_limbs_iter
     #[must_use]
     pub fn from_vec(data: Vec<u64>) -> Self {
-        let len = data.len() * WORD_SIZE;
+        let len = data.len() as u64 * WORD_SIZE;
         Self { data, len }
     }
 
-    fn pack_bits<T, const MAX_BITS: usize>(sequence: &[T], bits_per_element: usize) -> Self
+    fn pack_bits<T, const MAX_BITS: u64>(sequence: &[T], bits_per_element: u64) -> Self
     where
         T: Into<u64> + Copy,
     {
-        let mut bv = Self::with_capacity(sequence.len() * bits_per_element);
+        let mut bv = Self::with_capacity(sequence.len() as u64 * bits_per_element);
         for &word in sequence {
             if bits_per_element <= MAX_BITS {
                 bv.append_bits(word.into(), bits_per_element);
@@ -372,7 +370,7 @@ impl BitVec {
     /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
     /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
     #[must_use]
-    pub fn pack_sequence_u64(sequence: &[u64], bits_per_element: usize) -> Self {
+    pub fn pack_sequence_u64(sequence: &[u64], bits_per_element: u64) -> Self {
         Self::pack_bits::<_, 64>(sequence, bits_per_element)
     }
 
@@ -406,7 +404,7 @@ impl BitVec {
     /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
     /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
     #[must_use]
-    pub fn pack_sequence_u32(sequence: &[u32], bits_per_element: usize) -> Self {
+    pub fn pack_sequence_u32(sequence: &[u32], bits_per_element: u64) -> Self {
         Self::pack_bits::<_, 32>(sequence, bits_per_element)
     }
 
@@ -440,7 +438,7 @@ impl BitVec {
     /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32
     /// [`pack_sequence_u8`]: BitVec::pack_sequence_u8
     #[must_use]
-    pub fn pack_sequence_u16(sequence: &[u16], bits_per_element: usize) -> Self {
+    pub fn pack_sequence_u16(sequence: &[u16], bits_per_element: u64) -> Self {
         Self::pack_bits::<_, 16>(sequence, bits_per_element)
     }
 
@@ -474,7 +472,7 @@ impl BitVec {
     /// [`pack_sequence_u32`]: BitVec::pack_sequence_u32
     /// [`pack_sequence_u16`]: BitVec::pack_sequence_u16
     #[must_use]
-    pub fn pack_sequence_u8(sequence: &[u8], bits_per_element: usize) -> Self {
+    pub fn pack_sequence_u8(sequence: &[u8], bits_per_element: u64) -> Self {
         Self::pack_bits::<_, 8>(sequence, bits_per_element)
     }
 
@@ -500,13 +498,13 @@ impl BitVec {
     /// [`append_bit_u8`]: BitVec::append_bit_u8
     /// [`append_word`]: BitVec::append_word
     pub fn append(&mut self, bit: bool) {
-        if self.len % WORD_SIZE == 0 {
+        if self.len.is_multiple_of(WORD_SIZE) {
             self.data.push(0);
         }
         if bit {
-            self.data[self.len / WORD_SIZE] |= 1 << (self.len % WORD_SIZE);
+            self.data[(self.len / WORD_SIZE) as usize] |= 1 << (self.len % WORD_SIZE);
         } else {
-            self.data[self.len / WORD_SIZE] &= !(1 << (self.len % WORD_SIZE));
+            self.data[(self.len / WORD_SIZE) as usize] &= !(1 << (self.len % WORD_SIZE));
         }
         self.len += 1;
     }
@@ -519,7 +517,7 @@ impl BitVec {
     /// ```rust
     /// use vers_vecs::BitVec;
     ///
-    /// let mut bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
+    /// let mut bv = BitVec::from_bits_u8(&[1, 0, 1, 1, 1, 1]);
     /// bv.drop_last(3);
     ///
     /// assert_eq!(bv.len(), 3);
@@ -529,14 +527,15 @@ impl BitVec {
     ///
     /// assert!(bv.is_empty());
     /// ```
-    pub fn drop_last(&mut self, n: usize) {
+    pub fn drop_last(&mut self, n: u64) {
         if n > self.len {
             self.data.clear();
             self.len = 0;
             return;
         }
 
-        let new_limb_count = (self.len - n).div_ceil(WORD_SIZE);
+        #[allow(clippy::cast_possible_truncation)] // safe due to the division
+        let new_limb_count = (self.len - n).div_ceil(WORD_SIZE) as usize;
 
         // cut off limbs that we no longer need
         if new_limb_count < self.data.len() {
@@ -574,13 +573,13 @@ impl BitVec {
     /// [`append_bit_u8`]: BitVec::append_bit_u8
     /// [`append_word`]: BitVec::append_word
     pub fn append_bit(&mut self, bit: u64) {
-        if self.len % WORD_SIZE == 0 {
+        if self.len.is_multiple_of(WORD_SIZE) {
             self.data.push(0);
         }
         if bit % 2 == 1 {
-            self.data[self.len / WORD_SIZE] |= 1 << (self.len % WORD_SIZE);
+            self.data[(self.len / WORD_SIZE) as usize] |= 1 << (self.len % WORD_SIZE);
         } else {
-            self.data[self.len / WORD_SIZE] &= !(1 << (self.len % WORD_SIZE));
+            self.data[(self.len / WORD_SIZE) as usize] &= !(1 << (self.len % WORD_SIZE));
         }
 
         self.len += 1;
@@ -653,12 +652,12 @@ impl BitVec {
     /// [`append_bit_u16`]: BitVec::append_bit_u16
     /// [`append_bit_u8`]: BitVec::append_bit_u8
     pub fn append_word(&mut self, word: u64) {
-        if self.len % WORD_SIZE == 0 {
+        if self.len.is_multiple_of(WORD_SIZE) {
             self.data.push(word);
         } else {
             // zero out the unused bits before or-ing the new one, to ensure no garbage data remains
-            self.data[self.len / WORD_SIZE] &= !(u64::MAX << (self.len % WORD_SIZE));
-            self.data[self.len / WORD_SIZE] |= word << (self.len % WORD_SIZE);
+            self.data[(self.len / WORD_SIZE) as usize] &= !(u64::MAX << (self.len % WORD_SIZE));
+            self.data[(self.len / WORD_SIZE) as usize] |= word << (self.len % WORD_SIZE);
 
             self.data.push(word >> (WORD_SIZE - self.len % WORD_SIZE));
         }
@@ -685,15 +684,15 @@ impl BitVec {
     ///
     /// # Panics
     /// Panics if `len` is larger than 64.
-    pub fn append_bits(&mut self, bits: u64, len: usize) {
+    pub fn append_bits(&mut self, bits: u64, len: u64) {
         assert!(len <= 64, "Cannot append more than 64 bits");
 
-        if self.len % WORD_SIZE == 0 {
+        if self.len.is_multiple_of(WORD_SIZE) {
             self.data.push(bits);
         } else {
             // zero out the unused bits before or-ing the new one, to ensure no garbage data remains
-            self.data[self.len / WORD_SIZE] &= !(u64::MAX << (self.len % WORD_SIZE));
-            self.data[self.len / WORD_SIZE] |= bits << (self.len % WORD_SIZE);
+            self.data[(self.len / WORD_SIZE) as usize] &= !(u64::MAX << (self.len % WORD_SIZE));
+            self.data[(self.len / WORD_SIZE) as usize] |= bits << (self.len % WORD_SIZE);
 
             if self.len % WORD_SIZE + len > WORD_SIZE {
                 self.data.push(bits >> (WORD_SIZE - self.len % WORD_SIZE));
@@ -724,11 +723,11 @@ impl BitVec {
     ///
     /// [`append_bits`]: BitVec::append_bits
     /// [`drop_last`]: BitVec::drop_last
-    pub fn append_bits_unchecked(&mut self, bits: u64, len: usize) {
-        if self.len % WORD_SIZE == 0 {
+    pub fn append_bits_unchecked(&mut self, bits: u64, len: u64) {
+        if self.len.is_multiple_of(WORD_SIZE) {
             self.data.push(bits);
         } else {
-            self.data[self.len / WORD_SIZE] |= bits << (self.len % WORD_SIZE);
+            self.data[(self.len / WORD_SIZE) as usize] |= bits << (self.len % WORD_SIZE);
 
             if self.len % WORD_SIZE + len > WORD_SIZE {
                 self.data.push(bits >> (WORD_SIZE - self.len % WORD_SIZE));
@@ -743,10 +742,11 @@ impl BitVec {
     /// This function is guaranteed to reallocate the underlying vector at most once.
     pub fn extend_bitvec(&mut self, other: &Self) {
         // reserve space for the new bits, ensuring at most one re-allocation
+        #[allow(clippy::cast_possible_truncation)] // safe due to the division
         self.data
-            .reserve((self.len + other.len).div_ceil(WORD_SIZE) - self.data.len());
+            .reserve((self.len + other.len).div_ceil(WORD_SIZE) as usize - self.data.len());
 
-        let full_limbs = other.len() / WORD_SIZE;
+        let full_limbs = (other.len() / WORD_SIZE) as usize;
         for i in 0..full_limbs {
             self.append_bits(other.data[i], WORD_SIZE);
         }
@@ -759,7 +759,7 @@ impl BitVec {
 
     /// Return the length of the bit vector. The length is measured in bits.
     #[must_use]
-    pub fn len(&self) -> usize {
+    pub fn len(&self) -> u64 {
         self.len
     }
 
@@ -776,7 +776,7 @@ impl BitVec {
     /// ```rust
     /// use vers_vecs::BitVec;
     ///
-    /// let mut bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
+    /// let mut bv = BitVec::from_bits_u8(&[1, 0, 1, 1, 1, 1]);
     /// bv.flip_bit(1);
     ///
     /// assert_eq!(bv.len(), 6);
@@ -785,7 +785,7 @@ impl BitVec {
     ///
     /// # Panics
     /// If the position is larger than the length of the vector, the function panics.
-    pub fn flip_bit(&mut self, pos: usize) {
+    pub fn flip_bit(&mut self, pos: u64) {
         assert!(pos < self.len, "Index out of bounds");
         self.flip_bit_unchecked(pos);
     }
@@ -800,8 +800,8 @@ impl BitVec {
     /// This will not corrupt memory.
     ///
     /// [`flip_bit`]: BitVec::flip_bit
-    pub fn flip_bit_unchecked(&mut self, pos: usize) {
-        self.data[pos / WORD_SIZE] ^= 1 << (pos % WORD_SIZE);
+    pub fn flip_bit_unchecked(&mut self, pos: u64) {
+        self.data[(pos / WORD_SIZE) as usize] ^= 1 << (pos % WORD_SIZE);
     }
 
     /// Return the bit at the given position.
@@ -815,13 +815,15 @@ impl BitVec {
     /// ```rust
     /// use vers_vecs::BitVec;
     ///
-    /// let bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
+    /// let bv = BitVec::from_bits_u8(&[1, 0, 1, 1, 1, 1]);
     ///
     /// assert_eq!(bv.get(1), Some(0));
     /// assert_eq!(bv.get(2), Some(1));
     /// ```
+    ///
+    /// [`get_unchecked`]: Self::get_unchecked
     #[must_use]
-    pub fn get(&self, pos: usize) -> Option<u64> {
+    pub fn get(&self, pos: u64) -> Option<u64> {
         if pos >= self.len {
             None
         } else {
@@ -839,8 +841,8 @@ impl BitVec {
     ///
     /// [`get`]: BitVec::get
     #[must_use]
-    pub fn get_unchecked(&self, pos: usize) -> u64 {
-        (self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE)) & 1
+    pub fn get_unchecked(&self, pos: u64) -> u64 {
+        (self.data[(pos / WORD_SIZE) as usize] >> (pos % WORD_SIZE)) & 1
     }
 
     /// Set the bit at the given position.
@@ -853,7 +855,7 @@ impl BitVec {
     /// ```rust
     /// use vers_vecs::BitVec;
     ///
-    /// let mut bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
+    /// let mut bv = BitVec::from_bits_u8(&[1, 0, 1, 1, 1, 1]);
     /// bv.set(1, 1).unwrap();
     ///
     /// assert_eq!(bv.len(), 6);
@@ -865,7 +867,7 @@ impl BitVec {
     /// otherwise it will return an empty `Ok`.
     ///
     /// [`set_unchecked`]: BitVec::set_unchecked
-    pub fn set(&mut self, pos: usize, value: u64) -> Result<(), &str> {
+    pub fn set(&mut self, pos: u64, value: u64) -> Result<(), &str> {
         if pos >= self.len {
             Err("out of range")
         } else {
@@ -883,8 +885,9 @@ impl BitVec {
     /// Use [`set`] to properly handle this case with a `Result`.
     ///
     /// [`set`]: BitVec::set
-    pub fn set_unchecked(&mut self, pos: usize, value: u64) {
-        self.data[pos / WORD_SIZE] = (self.data[pos / WORD_SIZE] & !(0x1 << (pos % WORD_SIZE)))
+    pub fn set_unchecked(&mut self, pos: u64, value: u64) {
+        self.data[(pos / WORD_SIZE) as usize] = (self.data[(pos / WORD_SIZE) as usize]
+            & !(0x1 << (pos % WORD_SIZE)))
             | ((value & 0x1) << (pos % WORD_SIZE));
     }
 
@@ -898,7 +901,7 @@ impl BitVec {
     /// ```rust
     /// use vers_vecs::BitVec;
     ///
-    /// let bv = BitVec::from_bits(&[1, 0, 1, 1, 1, 1]);
+    /// let bv = BitVec::from_bits_u8(&[1, 0, 1, 1, 1, 1]);
     ///
     /// assert!(!bv.is_bit_set(1).unwrap());
     /// assert!(bv.is_bit_set(2).unwrap());
@@ -906,7 +909,7 @@ impl BitVec {
     ///
     /// [`is_bit_set_unchecked`]: BitVec::is_bit_set_unchecked
     #[must_use]
-    pub fn is_bit_set(&self, pos: usize) -> Option<bool> {
+    pub fn is_bit_set(&self, pos: u64) -> Option<bool> {
         if pos >= self.len {
             None
         } else {
@@ -923,7 +926,7 @@ impl BitVec {
     ///
     /// [`is_bit_set`]: BitVec::is_bit_set
     #[must_use]
-    pub fn is_bit_set_unchecked(&self, pos: usize) -> bool {
+    pub fn is_bit_set_unchecked(&self, pos: u64) -> bool {
         self.get_unchecked(pos) != 0
     }
 
@@ -937,7 +940,7 @@ impl BitVec {
     /// The first bit at `pos` is the most significant bit of the return value
     /// limited to `len` bits.
     #[must_use]
-    pub fn get_bits(&self, pos: usize, len: usize) -> Option<u64> {
+    pub fn get_bits(&self, pos: u64, len: u64) -> Option<u64> {
         if len > WORD_SIZE || len == 0 {
             return None;
         }
@@ -969,13 +972,14 @@ impl BitVec {
     #[allow(clippy::comparison_chain)] // readability
     #[inline(always)] // inline to gain loop optimization and pipeline advantages for elias fano
     #[allow(clippy::cast_possible_truncation)] // parameter must be out of scope for this to happen
-    pub fn get_bits_unchecked(&self, pos: usize, len: usize) -> u64 {
+    pub fn get_bits_unchecked(&self, pos: u64, len: u64) -> u64 {
         debug_assert!(len <= WORD_SIZE);
-        let partial_word = self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE);
+        let partial_word = self.data[(pos / WORD_SIZE) as usize] >> (pos % WORD_SIZE);
         if pos % WORD_SIZE + len <= WORD_SIZE {
             partial_word & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
         } else {
-            (partial_word | (self.data[pos / WORD_SIZE + 1] << (WORD_SIZE - pos % WORD_SIZE)))
+            (partial_word
+                | (self.data[(pos / WORD_SIZE + 1) as usize] << (WORD_SIZE - pos % WORD_SIZE)))
                 & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
         }
     }
@@ -1006,7 +1010,7 @@ impl BitVec {
     #[must_use]
     #[allow(clippy::inline_always)]
     #[inline(always)] // to gain optimization if n is constant
-    pub fn unpack_element(&self, index: usize, n: usize) -> Option<u64> {
+    pub fn unpack_element(&self, index: u64, n: u64) -> Option<u64> {
         self.get_bits(index * n, n)
     }
 
@@ -1028,7 +1032,7 @@ impl BitVec {
     #[must_use]
     #[allow(clippy::inline_always)]
     #[inline(always)] // to gain optimization if n is constant
-    pub fn unpack_element_unchecked(&self, index: usize, n: usize) -> u64 {
+    pub fn unpack_element_unchecked(&self, index: u64, n: u64) -> u64 {
         self.get_bits_unchecked(index * n, n)
     }
 
@@ -1039,11 +1043,11 @@ impl BitVec {
     #[must_use]
     #[allow(clippy::missing_panics_doc)] // can't panic because of manual bounds check
     pub fn count_ones(&self) -> u64 {
-        let mut ones: u64 = self.data[0..self.len / WORD_SIZE]
+        let mut ones: u64 = self.data[0..(self.len / WORD_SIZE) as usize]
             .iter()
             .map(|limb| u64::from(limb.count_ones()))
             .sum();
-        if self.len % WORD_SIZE > 0 {
+        if !self.len.is_multiple_of(WORD_SIZE) {
             ones += u64::from(
                 (self.data.last().unwrap() & ((1 << (self.len % WORD_SIZE)) - 1)).count_ones(),
             );
@@ -1059,7 +1063,7 @@ impl BitVec {
     /// [`count_ones`]: BitVec::count_ones
     #[must_use]
     pub fn count_zeros(&self) -> u64 {
-        self.len as u64 - self.count_ones()
+        self.len - self.count_ones()
     }
 
     /// Mask this bit vector with another bitvector using bitwise or. The mask is applied lazily
@@ -1226,7 +1230,9 @@ impl BitVec {
     /// containing the original vector.
     ///
     /// See also: [`split_at_unchecked`]
-    pub fn split_at(self, at: usize) -> Result<(Self, Self), Self> {
+    ///
+    /// [`split_at_unchecked`]: Self::split_at_unchecked
+    pub fn split_at(self, at: u64) -> Result<(Self, Self), Self> {
         if at > self.len {
             Err(self)
         } else {
@@ -1241,8 +1247,10 @@ impl BitVec {
     /// If the index is larger than the length of the vector the function will panic or run
     /// out of memory.
     /// Use [`split_at`] to properly handle this case.
+    ///
+    /// [`split_at`]: Self::split_at
     #[must_use]
-    pub fn split_at_unchecked(mut self, at: usize) -> (Self, Self) {
+    pub fn split_at_unchecked(mut self, at: u64) -> (Self, Self) {
         let other_len = self.len - at;
         let mut other = Self::with_capacity(other_len);
 
@@ -1250,8 +1258,8 @@ impl BitVec {
             return (self, other);
         }
 
-        let first_limb = at / WORD_SIZE;
-        let last_limb = self.len / WORD_SIZE;
+        let first_limb = (at / WORD_SIZE) as usize;
+        let last_limb = (self.len / WORD_SIZE) as usize;
 
         // First, we figure out the number of bits from the first limb to retain in this vector:
         let leading_partial = at % WORD_SIZE;
@@ -1322,7 +1330,7 @@ impl From<Vec<u64>> for BitVec {
 impl Extend<BitVec> for BitVec {
     fn extend<T: IntoIterator<Item = BitVec>>(&mut self, iter: T) {
         for v in iter {
-            self.extend_bitvec(&v)
+            self.extend_bitvec(&v);
         }
     }
 }
@@ -1330,7 +1338,7 @@ impl Extend<BitVec> for BitVec {
 impl<'t> Extend<&'t BitVec> for BitVec {
     fn extend<T: IntoIterator<Item = &'t BitVec>>(&mut self, iter: T) {
         for v in iter {
-            self.extend_bitvec(v)
+            self.extend_bitvec(v);
         }
     }
 }
@@ -1377,7 +1385,7 @@ impl Eq for BitVec {}
 
 impl Hash for BitVec {
     fn hash<H: Hasher>(&self, state: &mut H) {
-        state.write_usize(self.len);
+        state.write_u64(self.len);
         if self.len > 0 {
             self.data[0..self.data.len() - 1]
                 .iter()
diff --git a/src/bit_vec/fast_rs_vec/bitset.rs b/src/bit_vec/rs/bitset.rs
similarity index 92%
rename from src/bit_vec/fast_rs_vec/bitset.rs
rename to src/bit_vec/rs/bitset.rs
index 00cb5e0..2f98d11 100644
--- a/src/bit_vec/fast_rs_vec/bitset.rs
+++ b/src/bit_vec/rs/bitset.rs
@@ -7,7 +7,7 @@ use crate::RsVec;
 use std::mem::size_of;
 
 /// The number of bits in a RsVec that can be processed by AVX instructions at once.
-const VECTOR_SIZE: usize = 16;
+const VECTOR_SIZE: u64 = 16;
 
 // add iterator functions to RsVec
 impl RsVec {
@@ -73,20 +73,22 @@ impl RsVec {
 /// [`bit_set_iter0`]: RsVec::bit_set_iter0
 /// [`bit_set_iter1`]: RsVec::bit_set_iter1
 /// [`SelectIter`]: super::SelectIter
+#[allow(clippy::cast_possible_truncation)]
 pub struct BitSetIter<'a, const ZERO: bool> {
     vec: &'a RsVec,
-    base: usize,
-    offsets: [u32; VECTOR_SIZE],
+    base: u64,
+    offsets: [u32; VECTOR_SIZE as usize],
     content_len: u8,
     cursor: u8,
 }
 
 impl<'a, const ZERO: bool> BitSetIter<'a, ZERO> {
     pub(super) fn new(vec: &'a RsVec) -> Self {
+        #[allow(clippy::cast_possible_truncation)]
         let mut iter = Self {
             vec,
             base: 0,
-            offsets: [0; VECTOR_SIZE],
+            offsets: [0; VECTOR_SIZE as usize],
             content_len: 0,
             cursor: 0,
         };
@@ -103,7 +105,10 @@ impl<'a, const ZERO: bool> BitSetIter<'a, ZERO> {
 
         unsafe {
             let offsets = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-            assert!(VECTOR_SIZE <= size_of::<u16>() * 8, "change data types");
+            assert!(
+                VECTOR_SIZE <= size_of::<u16>() as u64 * 8,
+                "change data types"
+            );
             let mut mask = __mmask16::from(data);
             if ZERO {
                 mask = !mask;
@@ -129,7 +134,7 @@ impl<'a, const ZERO: bool> BitSetIter<'a, ZERO> {
 }
 
 impl<const ZERO: bool> Iterator for BitSetIter<'_, ZERO> {
-    type Item = usize;
+    type Item = u64;
 
     fn next(&mut self) -> Option<Self::Item> {
         if self.base >= self.vec.len() {
@@ -159,6 +164,6 @@ impl<const ZERO: bool> Iterator for BitSetIter<'_, ZERO> {
 
         let offset = self.offsets[self.cursor as usize];
         self.cursor += 1;
-        Some(self.base + offset as usize)
+        Some(self.base + offset as u64)
     }
 }
diff --git a/src/bit_vec/fast_rs_vec/iter.rs b/src/bit_vec/rs/iter.rs
similarity index 80%
rename from src/bit_vec/fast_rs_vec/iter.rs
rename to src/bit_vec/rs/iter.rs
index e0d4fcf..5a43a9d 100644
--- a/src/bit_vec/fast_rs_vec/iter.rs
+++ b/src/bit_vec/rs/iter.rs
@@ -1,4 +1,4 @@
-use crate::bit_vec::fast_rs_vec::{BLOCK_SIZE, SELECT_BLOCK_SIZE, SUPER_BLOCK_SIZE};
+use crate::bit_vec::rs::{BLOCK_SIZE, SELECT_BLOCK_SIZE, SUPER_BLOCK_SIZE};
 use crate::RsVec;
 use std::iter::FusedIterator;
 use std::num::NonZeroUsize;
@@ -13,6 +13,10 @@ impl RsVec {
     /// the linear access pattern.
     ///
     /// This method has convenience methods `iter0` and `iter1`.
+    ///
+    /// # Panics
+    /// If the vector contains more than `usize::MAX` elements, calling `len()` on the iterator will
+    /// cause it to panic.
     pub fn select_iter<const ZERO: bool>(&self) -> SelectIter<'_, ZERO> {
         SelectIter::new(self)
     }
@@ -26,6 +30,10 @@ impl RsVec {
     /// the linear access pattern.
     ///
     /// This method has convenience methods `into_iter0` and `into_iter1`.
+    ///
+    /// # Panics
+    /// If the vector contains more than `usize::MAX` elements, calling `len()` on the iterator will
+    /// cause it to panic.
     pub fn into_select_iter<const ZERO: bool>(self) -> SelectIntoIter<ZERO> {
         SelectIntoIter::new(self)
     }
@@ -36,6 +44,10 @@ impl RsVec {
     /// exploits the linear access pattern.
     ///
     /// See [`SelectIter`] for more information.
+    ///
+    /// # Panics
+    /// If the vector contains more than `usize::MAX` elements, calling `len()` on the iterator will
+    /// cause it to panic.
     pub fn iter0(&self) -> SelectIter<'_, true> {
         self.select_iter()
     }
@@ -46,6 +58,10 @@ impl RsVec {
     /// exploits the linear access pattern.
     ///
     /// See [`SelectIter`] for more information.
+    ///
+    /// # Panics
+    /// If the vector contains more than `usize::MAX` elements, calling `len()` on the iterator will
+    /// cause it to panic.
     pub fn iter1(&self) -> SelectIter<'_, false> {
         self.select_iter()
     }
@@ -56,6 +72,10 @@ impl RsVec {
     /// exploits the linear access pattern.
     ///
     /// See [`SelectIntoIter`] for more information.
+    ///
+    /// # Panics
+    /// If the vector contains more than `usize::MAX` elements, calling `len()` on the iterator will
+    /// cause it to panic.
     pub fn into_iter0(self) -> SelectIntoIter<true> {
         self.into_select_iter()
     }
@@ -66,6 +86,10 @@ impl RsVec {
     /// exploits the linear access pattern.
     ///
     /// See [`SelectIntoIter`] for more information.
+    ///
+    /// # Panics
+    /// If the vector contains more than `usize::MAX` elements, calling `len()` on the iterator will
+    /// cause it to panic.
     pub fn into_iter1(self) -> SelectIntoIter<false> {
         self.into_select_iter()
     }
@@ -106,18 +130,18 @@ macro_rules! gen_iter_impl {
             }
 
             /// Same implementation like select0, but uses cached indices of last query to speed up search
-            fn select_next_0(&mut self) -> Option<usize> {
+            fn select_next_0(&mut self) -> Option<u64> {
                 let mut rank = self.next_rank;
 
                 if rank >= self.vec.rank0 || self.next_rank_back.is_none() || rank > self.next_rank_back.unwrap() {
                     return None;
                 }
 
-                let mut super_block = self.vec.select_blocks[rank / SELECT_BLOCK_SIZE].index_0;
+                let mut super_block = self.vec.select_blocks[(rank / SELECT_BLOCK_SIZE) as usize].index_0;
                 let mut block_index = 0;
 
                 if self.vec.super_blocks.len() > (self.last_super_block + 1)
-                    && self.vec.super_blocks[self.last_super_block + 1].zeros > rank
+                    && self.vec.super_blocks[self.last_super_block + 1].zeros as u64 > rank
                 {
                     // instantly jump to the last searched position
                     super_block = self.last_super_block;
@@ -127,13 +151,13 @@ macro_rules! gen_iter_impl {
                     // this is true IF the last_block is either the last block in a super block,
                     // in which case it must be this block, because we know the rank is within the super block,
                     // OR if the next block has a rank higher than the current rank
-                    if self.last_block % (SUPER_BLOCK_SIZE / BLOCK_SIZE) == 15
+                    if self.last_block % (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize == 15
                         || self.vec.blocks.len() > self.last_block + 1
-                            && self.vec.blocks[self.last_block + 1].zeros as usize > rank
+                            && self.vec.blocks[self.last_block + 1].zeros as u64 > rank
                     {
                         // instantly jump to the last searched position
                         block_index = self.last_block;
-                        rank -= self.vec.blocks[block_index].zeros as usize;
+                        rank -= self.vec.blocks[block_index].zeros as u64;
                     }
                 } else {
                     super_block = self.vec.search_super_block0(super_block, rank);
@@ -143,11 +167,11 @@ macro_rules! gen_iter_impl {
 
                 // if the block index is not zero, we already found the block, and need only update the word
                 if block_index == 0 {
-                    block_index = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
+                    block_index = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize;
                     self.vec.search_block0(rank, &mut block_index);
 
                     self.last_block = block_index;
-                    rank -= self.vec.blocks[block_index].zeros as usize;
+                    rank -= self.vec.blocks[block_index].zeros as u64;
                 }
 
                 self.next_rank += 1;
@@ -155,17 +179,17 @@ macro_rules! gen_iter_impl {
             }
 
             /// Same implementation like ``select_next_0``, but backwards
-            fn select_next_0_back(&mut self) -> Option<usize> {
+            fn select_next_0_back(&mut self) -> Option<u64> {
                 let mut rank = self.next_rank_back?;
 
                 if self.next_rank_back.is_none() || rank < self.next_rank {
                     return None;
                 }
 
-                let mut super_block = self.vec.select_blocks[rank / SELECT_BLOCK_SIZE].index_0;
+                let mut super_block = self.vec.select_blocks[(rank / SELECT_BLOCK_SIZE) as usize].index_0;
                 let mut block_index = 0;
 
-                if self.vec.super_blocks[self.last_super_block_back].zeros < rank
+                if (self.vec.super_blocks[self.last_super_block_back].zeros as u64) < rank
                 {
                     // instantly jump to the last searched position
                     super_block = self.last_super_block_back;
@@ -174,11 +198,11 @@ macro_rules! gen_iter_impl {
                     // check if current block contains the one and if yes, we don't need to search
                     // this is true IF the zeros before the last block are less than the rank,
                     // since the block before then can't contain it
-                    if self.vec.blocks[self.last_block_back].zeros as usize <= rank
+                    if self.vec.blocks[self.last_block_back].zeros as u64 <= rank
                     {
                         // instantly jump to the last searched position
                         block_index = self.last_block_back;
-                        rank -= self.vec.blocks[block_index].zeros as usize;
+                        rank -= self.vec.blocks[block_index].zeros as u64;
                     }
                 } else {
                     super_block = self.vec.search_super_block0(super_block, rank);
@@ -188,11 +212,11 @@ macro_rules! gen_iter_impl {
 
                 // if the block index is not zero, we already found the block, and need only update the word
                 if block_index == 0 {
-                    block_index = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
+                    block_index = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize;
                     self.vec.search_block0(rank, &mut block_index);
 
                     self.last_block_back = block_index;
-                    rank -= self.vec.blocks[block_index].zeros as usize;
+                    rank -= self.vec.blocks[block_index].zeros as u64;
                 }
 
                 self.next_rank_back = self.next_rank_back.and_then(|x| if x > 0 { Some(x - 1) } else { None });
@@ -201,62 +225,62 @@ macro_rules! gen_iter_impl {
 
             #[must_use]
             #[allow(clippy::assertions_on_constants)]
-            fn select_next_1(&mut self) -> Option<usize> {
+            fn select_next_1(&mut self) -> Option<u64> {
                 let mut rank = self.next_rank;
 
                 if rank >= self.vec.rank1 || self.next_rank_back.is_none() || rank > self.next_rank_back.unwrap() {
                     return None;
                 }
 
-                let mut super_block = self.vec.select_blocks[rank / SELECT_BLOCK_SIZE].index_1;
+                let mut super_block = self.vec.select_blocks[(rank / SELECT_BLOCK_SIZE) as usize].index_1;
                 let mut block_index = 0;
 
                 // check if the last super block still contains the rank, and if yes, we don't need to search
                 if self.vec.super_blocks.len() > (self.last_super_block + 1)
-                    && (self.last_super_block + 1) * SUPER_BLOCK_SIZE
-                        - self.vec.super_blocks[self.last_super_block + 1].zeros
+                    && (self.last_super_block + 1) as u64 * SUPER_BLOCK_SIZE
+                        - self.vec.super_blocks[self.last_super_block + 1].zeros as u64
                         > rank
                 {
                     // instantly jump to the last searched position
                     super_block = self.last_super_block;
-                    let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
-                    rank -= super_block * SUPER_BLOCK_SIZE - self.vec.super_blocks[super_block].zeros;
+                    let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize;
+                    rank -= super_block as u64 * SUPER_BLOCK_SIZE - self.vec.super_blocks[super_block].zeros;
 
                     // check if current block contains the one and if yes, we don't need to search
                     // this is true IF the last_block is either the last block in a super block,
                     // in which case it must be this block, because we know the rank is within the super block,
                     // OR if the next block has a rank higher than the current rank
-                    if self.last_block % (SUPER_BLOCK_SIZE / BLOCK_SIZE) == 15
+                    if self.last_block as u64 % (SUPER_BLOCK_SIZE / BLOCK_SIZE) == 15
                         || self.vec.blocks.len() > self.last_block + 1
-                            && (self.last_block + 1 - block_at_super_block) * BLOCK_SIZE
-                                - self.vec.blocks[self.last_block + 1].zeros as usize
+                            && (self.last_block + 1 - block_at_super_block) as u64 * BLOCK_SIZE
+                                - self.vec.blocks[self.last_block + 1].zeros as u64
                                 > rank
                     {
                         // instantly jump to the last searched position
                         block_index = self.last_block;
-                        let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
-                        rank -= (block_index - block_at_super_block) * BLOCK_SIZE
-                            - self.vec.blocks[block_index].zeros as usize;
+                        let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize;
+                        rank -= (block_index - block_at_super_block) as u64 * BLOCK_SIZE
+                            - self.vec.blocks[block_index].zeros as u64;
                     }
                 } else {
                     super_block = self.vec.search_super_block1(super_block, rank);
 
                     self.last_super_block = super_block;
-                    rank -= super_block * SUPER_BLOCK_SIZE - self.vec.super_blocks[super_block].zeros;
+                    rank -= super_block as u64 * SUPER_BLOCK_SIZE - self.vec.super_blocks[super_block].zeros;
                 }
 
                 // if the block index is not zero, we already found the block, and need only update the word
                 if block_index == 0 {
                     // full binary search for block that contains the rank, manually loop-unrolled, because
                     // LLVM doesn't do it for us, but it gains just under 20% performance
-                    let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
+                    let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize;
                     block_index = block_at_super_block;
                     self.vec
                         .search_block1(rank, block_at_super_block, &mut block_index);
 
                     self.last_block = block_index;
-                    rank -= (block_index - block_at_super_block) * BLOCK_SIZE
-                        - self.vec.blocks[block_index].zeros as usize;
+                    rank -= (block_index - block_at_super_block) as u64 * BLOCK_SIZE
+                        - self.vec.blocks[block_index].zeros as u64;
                 }
 
                 self.next_rank += 1;
@@ -265,101 +289,109 @@ macro_rules! gen_iter_impl {
 
             #[must_use]
             #[allow(clippy::assertions_on_constants)]
-            fn select_next_1_back(&mut self) -> Option<usize> {
+            fn select_next_1_back(&mut self) -> Option<u64> {
                 let mut rank = self.next_rank_back?;
 
                 if self.next_rank_back.is_none() || rank < self.next_rank {
                     return None;
                 }
 
-                let mut super_block = self.vec.select_blocks[rank / SELECT_BLOCK_SIZE].index_1;
+                let mut super_block = self.vec.select_blocks[(rank / SELECT_BLOCK_SIZE) as usize].index_1;
                 let mut block_index = 0;
 
                 // check if the last super block still contains the rank, and if yes, we don't need to search
-                if (self.last_super_block_back) * SUPER_BLOCK_SIZE
-                        - self.vec.super_blocks[self.last_super_block_back].zeros
+                if self.last_super_block_back as u64 * SUPER_BLOCK_SIZE
+                        - (self.vec.super_blocks[self.last_super_block_back].zeros as u64)
                         < rank
                 {
                     // instantly jump to the last searched position
                     super_block = self.last_super_block_back;
-                    let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
-                    rank -= super_block * SUPER_BLOCK_SIZE - self.vec.super_blocks[super_block].zeros;
+                    let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize;
+                    rank -= super_block as u64 * SUPER_BLOCK_SIZE - self.vec.super_blocks[super_block].zeros;
 
                     // check if current block contains the one and if yes, we don't need to search
                     // this is true IF the ones before the last block are less than the rank,
                     // since the block before then can't contain it
-                    if (self.last_block_back - block_at_super_block) * BLOCK_SIZE
-                        - self.vec.blocks[self.last_block_back].zeros as usize
+                    if (self.last_block_back - block_at_super_block) as u64 * BLOCK_SIZE
+                        - self.vec.blocks[self.last_block_back].zeros as u64
                             <= rank
                     {
                         // instantly jump to the last searched position
                         block_index = self.last_block_back;
-                        let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
-                        rank -= (block_index - block_at_super_block) * BLOCK_SIZE
-                            - self.vec.blocks[block_index].zeros as usize;
+                        let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize;
+                        rank -= (block_index - block_at_super_block) as u64 * BLOCK_SIZE
+                            - self.vec.blocks[block_index].zeros as u64;
                     }
                 } else {
                     super_block = self.vec.search_super_block1(super_block, rank);
 
                     self.last_super_block_back = super_block;
-                    rank -= super_block * SUPER_BLOCK_SIZE - self.vec.super_blocks[super_block].zeros;
+                    rank -= super_block as u64 * SUPER_BLOCK_SIZE - self.vec.super_blocks[super_block].zeros;
                 }
 
                 // if the block index is not zero, we already found the block, and need only update the word
                 if block_index == 0 {
                     // full binary search for block that contains the rank, manually loop-unrolled, because
                     // LLVM doesn't do it for us, but it gains just under 20% performance
-                    let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
+                    let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize;
                     block_index = block_at_super_block;
                     self.vec
                         .search_block1(rank, block_at_super_block, &mut block_index);
 
                     self.last_block_back = block_index;
-                    rank -= (block_index - block_at_super_block) * BLOCK_SIZE
-                        - self.vec.blocks[block_index].zeros as usize;
+                    rank -= (block_index - block_at_super_block) as u64 * BLOCK_SIZE
+                        - self.vec.blocks[block_index].zeros as u64;
                 }
 
                 self.next_rank_back = self.next_rank_back.and_then(|x| if x > 0 { Some(x - 1) } else { None });
                 Some(self.vec.search_word_in_block1(rank, block_index))
             }
 
-            /// Advances the iterator by `n` elements. Returns an error if the iterator does not have
-            /// enough elements left. Does not call `next` internally.
+            /// Advances the iterator by `n` elements.
+            /// Does not call `next` internally.
             /// This method is currently being added to the iterator trait, see
             /// [this issue](https://github.com/rust-lang/rust/issues/77404).
             /// As soon as it is stabilized, this method will be removed and replaced with a custom
             /// implementation in the iterator impl.
-            pub(super) fn advance_by(&mut self, n: usize) -> Result<(), NonZeroUsize> {
+            ///
+            /// # Errors
+            /// If the iterator does not hold `n` elements,
+            /// all remaining elements are skipped, and an error with the overflow is returned.
+            pub fn advance_by(&mut self, n: usize) -> Result<(), NonZeroUsize> {
                 if self.len() >= n {
-                    self.next_rank += n;
+                    self.next_rank += n as u64;
                     Ok(())
                 } else {
                     let len = self.len();
-                    self.next_rank += len;
+                    self.next_rank += len as u64;
                     Err(NonZeroUsize::new(n - len).unwrap())
                 }
             }
 
-            /// Advances the iterator back by `n` elements. Returns an error if the iterator does not have
-            /// enough elements left. Does not call `next_back` internally.
+            /// Advances the iterator back by `n` elements.
+            /// Does not call `next_back` internally.
             /// This method is currently being added to the iterator trait, see
             /// [this issue](https://github.com/rust-lang/rust/issues/77404).
             /// As soon as it is stabilized, this method will be removed and replaced with a custom
             /// implementation in the double ended iterator impl.
-            pub(super) fn advance_back_by(&mut self, n: usize) -> Result<(), NonZeroUsize> {
+            ///
+            /// # Errors
+            /// If the iterator does not hold `n` elements,
+            /// all remaining elements are skipped, and an error with the overflow is returned.
+            pub fn advance_back_by(&mut self, n: usize) -> Result<(), NonZeroUsize> {
                 if self.len() >= n {
-                    self.next_rank_back = self.next_rank_back.map(|x| x - n);
+                    self.next_rank_back = self.next_rank_back.map(|x| x - n as u64);
                     Ok(())
                 } else {
                     let len = self.len();
-                    self.next_rank_back = self.next_rank_back.map(|x| x - len);
+                    self.next_rank_back = self.next_rank_back.map(|x| x - len as u64);
                     Err(NonZeroUsize::new(n - len).unwrap())
                 }
             }
         }
 
         impl<$($life,)? const ZERO: bool> Iterator for $name<$($life,)? ZERO> {
-            type Item = usize;
+            type Item = u64;
 
             fn next(&mut self) -> Option<Self::Item> {
                 if ZERO {
@@ -373,6 +405,12 @@ macro_rules! gen_iter_impl {
                 (self.len(), Some(self.len()))
             }
 
+            /// Returns the exact number of elements that this iterator would iterate over. Does not
+            /// call `next` internally.
+            ///
+            /// # Panics
+            /// If the vector contains more than `usize::MAX` elements, calling `count()` on the iterator will
+            /// cause it to panic.
             fn count(self) -> usize
             where
                 Self: Sized,
@@ -423,8 +461,16 @@ macro_rules! gen_iter_impl {
         impl<$($life,)? const ZERO: bool> FusedIterator for $name<$($life,)? ZERO> {}
 
         impl<$($life,)? const ZERO: bool> ExactSizeIterator for $name<$($life,)? ZERO> {
+            // the check and panic guarantees panic on truncation
+            #[allow(clippy::cast_possible_truncation)]
             fn len(&self) -> usize {
-                self.next_rank_back.map(|x| x + 1).unwrap_or_default().saturating_sub(self.next_rank)
+                // this check is hopefully eliminated on 64-bit architectures
+                if self.next_rank_back.map(|x| x + 1).unwrap_or_default().saturating_sub(self.next_rank)
+                    > usize::MAX as u64 {
+                    panic!("calling len() on an iterator containing more than usize::MAX elements is forbidden");
+                }
+
+                self.next_rank_back.map(|x| x + 1).unwrap_or_default().saturating_sub(self.next_rank) as usize
             }
         }
     }
@@ -461,11 +507,11 @@ macro_rules! gen_iter_impl {
 #[must_use]
 pub struct SelectIter<'a, const ZERO: bool> {
     pub(crate) vec: &'a RsVec,
-    next_rank: usize,
+    next_rank: u64,
 
     // rank back is none, iff it points to element -1 (i.e. element 0 has been consumed by
     // a call to next_back()). It can be Some(..) even if the iterator is empty
-    next_rank_back: Option<usize>,
+    next_rank_back: Option<u64>,
 
     /// the last index in the super block structure where we found a bit
     last_super_block: usize,
@@ -514,11 +560,11 @@ gen_iter_impl!('a, SelectIter);
 // this owning iterator became necessary
 pub struct SelectIntoIter<const ZERO: bool> {
     pub(crate) vec: RsVec,
-    next_rank: usize,
+    next_rank: u64,
 
     // rank back is none, iff it points to element -1 (i.e. element 0 has been consumed by
     // a call to next_back()). It can be Some(..) even if the iterator is empty
-    next_rank_back: Option<usize>,
+    next_rank_back: Option<u64>,
 
     /// the last index in the super block structure where we found a bit
     last_super_block: usize,
diff --git a/src/bit_vec/fast_rs_vec/mod.rs b/src/bit_vec/rs/mod.rs
similarity index 82%
rename from src/bit_vec/fast_rs_vec/mod.rs
rename to src/bit_vec/rs/mod.rs
index 2c35643..840d4d3 100644
--- a/src/bit_vec/fast_rs_vec/mod.rs
+++ b/src/bit_vec/rs/mod.rs
@@ -20,7 +20,7 @@ use crate::BitVec;
 use super::WORD_SIZE;
 
 /// Size of a block in the bitvector.
-const BLOCK_SIZE: usize = 512;
+const BLOCK_SIZE: u64 = 512;
 
 /// Size of a super block in the bitvector. Super-blocks exist to decrease the memory overhead
 /// of block descriptors.
@@ -30,12 +30,12 @@ const BLOCK_SIZE: usize = 512;
 /// impact on the performance of select queries. The larger the super block size, the deeper will
 /// a binary search be. We found 2^13 to be a good compromise between memory overhead and
 /// performance.
-const SUPER_BLOCK_SIZE: usize = 1 << 13;
+const SUPER_BLOCK_SIZE: u64 = 1 << 13;
 
 /// Size of a select block. The select block is used to speed up select queries. The select block
 /// contains the indices of every `SELECT_BLOCK_SIZE`'th 1-bit and 0-bit in the bitvector.
 /// The smaller this block-size, the faster are select queries, but the more memory is used.
-const SELECT_BLOCK_SIZE: usize = 1 << 13;
+const SELECT_BLOCK_SIZE: u64 = 1 << 13;
 
 /// Meta-data for a block. The `zeros` field stores the number of zeros up to the block,
 /// beginning from the last super-block boundary. This means the first block in a super-block
@@ -53,7 +53,7 @@ struct BlockDescriptor {
 #[derive(Clone, Copy, Debug)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 struct SuperBlockDescriptor {
-    zeros: usize,
+    zeros: u64,
 }
 
 /// Meta-data for the select query. Each entry i in the select vector contains the indices to find
@@ -85,13 +85,13 @@ struct SelectSuperBlockDescriptor {
 #[derive(Clone, Debug)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub struct RsVec {
-    data: Vec<u64>,
-    len: usize,
-    blocks: Vec<BlockDescriptor>,
-    super_blocks: Vec<SuperBlockDescriptor>,
-    select_blocks: Vec<SelectSuperBlockDescriptor>,
-    pub(crate) rank0: usize,
-    pub(crate) rank1: usize,
+    data: Box<[u64]>,
+    len: u64,
+    blocks: Box<[BlockDescriptor]>,
+    super_blocks: Box<[SuperBlockDescriptor]>,
+    select_blocks: Box<[SelectSuperBlockDescriptor]>,
+    pub(crate) rank0: u64,
+    pub(crate) rank1: u64,
 }
 
 impl RsVec {
@@ -106,8 +106,8 @@ impl RsVec {
     pub fn from_bit_vec(vec: BitVec) -> RsVec {
         // Construct the block descriptor meta data. Each block descriptor contains the number of
         // zeros in the super-block, up to but excluding the block.
-        let mut blocks = Vec::with_capacity(vec.len() / BLOCK_SIZE + 1);
-        let mut super_blocks = Vec::with_capacity(vec.len() / SUPER_BLOCK_SIZE + 1);
+        let mut blocks = Vec::with_capacity((vec.len() / BLOCK_SIZE) as usize + 1);
+        let mut super_blocks = Vec::with_capacity((vec.len() / SUPER_BLOCK_SIZE) as usize + 1);
         let mut select_blocks = Vec::new();
 
         // sentinel value
@@ -116,16 +116,16 @@ impl RsVec {
             index_1: 0,
         });
 
-        let mut total_zeros: usize = 0;
-        let mut current_zeros: usize = 0;
+        let mut total_zeros: u64 = 0;
+        let mut current_zeros: u64 = 0;
         let mut last_zero_select_block: usize = 0;
         let mut last_one_select_block: usize = 0;
 
-        for (idx, &word) in vec.data.iter().enumerate() {
+        for (word_idx, &word) in vec.data.iter().enumerate() {
             // if we moved past a block boundary, append the block information for the previous
             // block and reset the counter if we moved past a super-block boundary.
-            if idx % (BLOCK_SIZE / WORD_SIZE) == 0 {
-                if idx % (SUPER_BLOCK_SIZE / WORD_SIZE) == 0 {
+            if (word_idx as u64).is_multiple_of(BLOCK_SIZE / WORD_SIZE) {
+                if (word_idx as u64).is_multiple_of(SUPER_BLOCK_SIZE / WORD_SIZE) {
                     total_zeros += current_zeros;
                     current_zeros = 0;
                     super_blocks.push(SuperBlockDescriptor { zeros: total_zeros });
@@ -141,40 +141,42 @@ impl RsVec {
             // count the zeros in the current word and add them to the counter
             // the last word may contain padding zeros, which should not be counted,
             // but since we do not append the last block descriptor, this is not a problem
-            let mut new_zeros = word.count_zeros() as usize;
+            let mut new_zeros = word.count_zeros() as u64;
 
             // in the last block, remove remaining zeros of limb that aren't part of the vector
-            if idx == vec.data.len() - 1 && vec.len % WORD_SIZE > 0 {
+            if word_idx == vec.data.len() - 1 && !vec.len.is_multiple_of(WORD_SIZE) {
                 let mask = (1 << (vec.len % WORD_SIZE)) - 1;
-                new_zeros -= (word | mask).count_zeros() as usize;
+                new_zeros -= (word | mask).count_zeros() as u64;
             }
 
             let all_zeros = total_zeros + current_zeros + new_zeros;
             if all_zeros / SELECT_BLOCK_SIZE > (total_zeros + current_zeros) / SELECT_BLOCK_SIZE {
-                if all_zeros / SELECT_BLOCK_SIZE == select_blocks.len() {
+                if (all_zeros / SELECT_BLOCK_SIZE) as usize == select_blocks.len() {
                     select_blocks.push(SelectSuperBlockDescriptor {
                         index_0: super_blocks.len() - 1,
                         index_1: 0,
                     });
                 } else {
-                    select_blocks[all_zeros / SELECT_BLOCK_SIZE].index_0 = super_blocks.len() - 1;
+                    select_blocks[(all_zeros / SELECT_BLOCK_SIZE) as usize].index_0 =
+                        super_blocks.len() - 1;
                 }
 
                 last_zero_select_block += 1;
             }
 
-            let total_bits = (idx + 1) * WORD_SIZE;
+            let total_bits = (word_idx as u64 + 1) * WORD_SIZE;
             let all_ones = total_bits - all_zeros;
             if all_ones / SELECT_BLOCK_SIZE
-                > (idx * WORD_SIZE - total_zeros - current_zeros) / SELECT_BLOCK_SIZE
+                > (word_idx as u64 * WORD_SIZE - total_zeros - current_zeros) / SELECT_BLOCK_SIZE
             {
-                if all_ones / SELECT_BLOCK_SIZE == select_blocks.len() {
+                if (all_ones / SELECT_BLOCK_SIZE) as usize == select_blocks.len() {
                     select_blocks.push(SelectSuperBlockDescriptor {
                         index_0: 0,
                         index_1: super_blocks.len() - 1,
                     });
                 } else {
-                    select_blocks[all_ones / SELECT_BLOCK_SIZE].index_1 = super_blocks.len() - 1;
+                    select_blocks[(all_ones / SELECT_BLOCK_SIZE) as usize].index_1 =
+                        super_blocks.len() - 1;
                 }
 
                 last_one_select_block += 1;
@@ -212,11 +214,11 @@ impl RsVec {
         total_zeros += current_zeros;
 
         RsVec {
-            data: vec.data,
+            data: vec.data.into_boxed_slice(),
             len: vec.len,
-            blocks,
-            super_blocks,
-            select_blocks,
+            blocks: blocks.into_boxed_slice(),
+            super_blocks: super_blocks.into_boxed_slice(),
+            select_blocks: select_blocks.into_boxed_slice(),
             rank0: total_zeros,
             rank1: vec.len - total_zeros,
         }
@@ -230,7 +232,7 @@ impl RsVec {
     /// # Parameters
     /// - `pos`: The position of the bit to return the rank of.
     #[must_use]
-    pub fn rank0(&self, pos: usize) -> usize {
+    pub fn rank0(&self, pos: u64) -> u64 {
         self.rank(true, pos)
     }
 
@@ -242,7 +244,7 @@ impl RsVec {
     /// # Parameters
     /// - `pos`: The position of the bit to return the rank of.
     #[must_use]
-    pub fn rank1(&self, pos: usize) -> usize {
+    pub fn rank1(&self, pos: u64) -> u64 {
         self.rank(false, pos)
     }
 
@@ -250,7 +252,7 @@ impl RsVec {
     // branch elimination profits alone should make it worth it.
     #[allow(clippy::inline_always)]
     #[inline(always)]
-    fn rank(&self, zero: bool, pos: usize) -> usize {
+    fn rank(&self, zero: bool, pos: u64) -> u64 {
         #[allow(clippy::collapsible_else_if)]
         // readability and more obvious where dead branch elimination happens
         if zero {
@@ -263,39 +265,40 @@ impl RsVec {
             }
         }
 
-        let index = pos / WORD_SIZE;
-        let block_index = pos / BLOCK_SIZE;
-        let super_block_index = pos / SUPER_BLOCK_SIZE;
+        let index = (pos / WORD_SIZE) as usize;
+        let block_index = (pos / BLOCK_SIZE) as usize;
+        let super_block_index = (pos / SUPER_BLOCK_SIZE) as usize;
         let mut rank = 0;
 
         // at first add the number of zeros/ones before the current super block
         rank += if zero {
             self.super_blocks[super_block_index].zeros
         } else {
-            (super_block_index * SUPER_BLOCK_SIZE) - self.super_blocks[super_block_index].zeros
+            (super_block_index as u64 * SUPER_BLOCK_SIZE)
+                - self.super_blocks[super_block_index].zeros
         };
 
         // then add the number of zeros/ones before the current block
         rank += if zero {
-            self.blocks[block_index].zeros as usize
+            self.blocks[block_index].zeros as u64
         } else {
-            ((block_index % (SUPER_BLOCK_SIZE / BLOCK_SIZE)) * BLOCK_SIZE)
-                - self.blocks[block_index].zeros as usize
+            ((block_index as u64 % (SUPER_BLOCK_SIZE / BLOCK_SIZE)) * BLOCK_SIZE)
+                - self.blocks[block_index].zeros as u64
         };
 
         // naive popcount of blocks
-        for &i in &self.data[(block_index * BLOCK_SIZE) / WORD_SIZE..index] {
+        for &i in &self.data[((block_index as u64 * BLOCK_SIZE) / WORD_SIZE) as usize..index] {
             rank += if zero {
-                i.count_zeros() as usize
+                i.count_zeros() as u64
             } else {
-                i.count_ones() as usize
+                i.count_ones() as u64
             };
         }
 
         rank += if zero {
-            (!self.data[index] & ((1 << (pos % WORD_SIZE)) - 1)).count_ones() as usize
+            (!self.data[index] & ((1 << (pos % WORD_SIZE)) - 1)).count_ones() as u64
         } else {
-            (self.data[index] & ((1 << (pos % WORD_SIZE)) - 1)).count_ones() as usize
+            (self.data[index] & ((1 << (pos % WORD_SIZE)) - 1)).count_ones() as u64
         };
 
         rank
@@ -303,7 +306,7 @@ impl RsVec {
 
     /// Return the length of the vector, i.e. the number of bits it contains.
     #[must_use]
-    pub fn len(&self) -> usize {
+    pub fn len(&self) -> u64 {
         self.len
     }
 
@@ -317,7 +320,7 @@ impl RsVec {
     /// bit of the returned u64 word.
     /// If the position is larger than the length of the vector, `None` is returned.
     #[must_use]
-    pub fn get(&self, pos: usize) -> Option<u64> {
+    pub fn get(&self, pos: u64) -> Option<u64> {
         if pos >= self.len() {
             None
         } else {
@@ -331,8 +334,8 @@ impl RsVec {
     /// # Panics
     /// This function may panic if `pos >= self.len()` (alternatively, it may return garbage).
     #[must_use]
-    pub fn get_unchecked(&self, pos: usize) -> u64 {
-        (self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE)) & 1
+    pub fn get_unchecked(&self, pos: u64) -> u64 {
+        (self.data[(pos / WORD_SIZE) as usize] >> (pos % WORD_SIZE)) & 1
     }
 
     /// Return multiple bits at the given position. The number of bits to return is given by `len`.
@@ -341,7 +344,7 @@ impl RsVec {
     /// None is returned (even if the query partially overlaps with the vector).
     /// If the length of the query is larger than 64, None is returned.
     #[must_use]
-    pub fn get_bits(&self, pos: usize, len: usize) -> Option<u64> {
+    pub fn get_bits(&self, pos: u64, len: u64) -> Option<u64> {
         if len > WORD_SIZE {
             return None;
         }
@@ -370,13 +373,14 @@ impl RsVec {
     #[must_use]
     #[allow(clippy::comparison_chain)] // readability
     #[allow(clippy::cast_possible_truncation)] // parameter must be out of scope for this to happen
-    pub fn get_bits_unchecked(&self, pos: usize, len: usize) -> u64 {
+    pub fn get_bits_unchecked(&self, pos: u64, len: u64) -> u64 {
         debug_assert!(len <= WORD_SIZE);
-        let partial_word = self.data[pos / WORD_SIZE] >> (pos % WORD_SIZE);
+        let partial_word = self.data[(pos / WORD_SIZE) as usize] >> (pos % WORD_SIZE);
         if pos % WORD_SIZE + len <= WORD_SIZE {
             partial_word & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
         } else {
-            (partial_word | (self.data[pos / WORD_SIZE + 1] << (WORD_SIZE - pos % WORD_SIZE)))
+            (partial_word
+                | (self.data[(pos / WORD_SIZE + 1) as usize] << (WORD_SIZE - pos % WORD_SIZE)))
                 & 1u64.checked_shl(len as u32).unwrap_or(0).wrapping_sub(1)
         }
     }
@@ -405,7 +409,7 @@ impl RsVec {
     #[must_use]
     pub fn into_bit_vec(self) -> BitVec {
         BitVec {
-            data: self.data,
+            data: self.data.into_vec(),
             len: self.len,
         }
     }
@@ -437,7 +441,11 @@ impl RsVec {
 
         let iter: SelectIter<ZERO> = self.select_iter();
 
-        for (rank, bit_index) in iter.enumerate() {
+        let len = if ZERO { self.rank0 } else { self.rank1 };
+
+        // we need to manually enumerate() the iter, because the number of set bits could exceed
+        // the size of usize.
+        for (rank, bit_index) in (0..len).zip(iter) {
             // since rank is inlined, we get dead code elimination depending on ZERO
             if (other.get_unchecked(bit_index) == 0) != ZERO || other.rank(ZERO, bit_index) != rank
             {
@@ -468,18 +476,19 @@ impl RsVec {
             return false;
         }
 
-        if self.data[..self.len / 64]
+        if self.data[..(self.len / WORD_SIZE) as usize]
             .iter()
-            .zip(other.data[..other.len / 64].iter())
+            .zip(other.data[..(other.len / 64) as usize].iter())
             .any(|(a, b)| a != b)
         {
             return false;
         }
 
         // if last incomplete block exists, test it without junk data
-        if self.len % 64 > 0
-            && self.data[self.len / 64] & ((1 << (self.len % 64)) - 1)
-                != other.data[self.len / 64] & ((1 << (other.len % 64)) - 1)
+        if !self.len.is_multiple_of(WORD_SIZE)
+            && self.data[(self.len / WORD_SIZE) as usize] & ((1 << (self.len % WORD_SIZE)) - 1)
+                != other.data[(self.len / WORD_SIZE) as usize]
+                    & ((1 << (other.len % WORD_SIZE)) - 1)
         {
             return false;
         }
diff --git a/src/bit_vec/fast_rs_vec/select.rs b/src/bit_vec/rs/select.rs
similarity index 80%
rename from src/bit_vec/fast_rs_vec/select.rs
rename to src/bit_vec/rs/select.rs
index b8721d7..9e7ae85 100644
--- a/src/bit_vec/fast_rs_vec/select.rs
+++ b/src/bit_vec/rs/select.rs
@@ -1,13 +1,13 @@
 // Select code is in here to keep it more organized.
 
-use crate::bit_vec::fast_rs_vec::{BLOCK_SIZE, SELECT_BLOCK_SIZE, SUPER_BLOCK_SIZE};
+use crate::bit_vec::rs::{BLOCK_SIZE, SELECT_BLOCK_SIZE, SUPER_BLOCK_SIZE};
 use crate::bit_vec::WORD_SIZE;
 use crate::util::pdep::Pdep;
 use crate::util::unroll;
 
 /// A safety constant for assertions to make sure that the block size doesn't change without
 /// adjusting the code.
-const BLOCKS_PER_SUPERBLOCK: usize = 16;
+const BLOCKS_PER_SUPERBLOCK: u64 = 16;
 
 impl super::RsVec {
     /// Return the position of the 0-bit with the given rank. See `rank0`.
@@ -17,12 +17,12 @@ impl super::RsVec {
     /// If the rank is larger than the number of 0-bits in the vector, the vector length is returned.
     #[must_use]
     #[allow(clippy::assertions_on_constants)]
-    pub fn select0(&self, mut rank: usize) -> usize {
+    pub fn select0(&self, mut rank: u64) -> u64 {
         if rank >= self.rank0 {
             return self.len;
         }
 
-        let mut super_block = self.select_blocks[rank / SELECT_BLOCK_SIZE].index_0;
+        let mut super_block = self.select_blocks[(rank / SELECT_BLOCK_SIZE) as usize].index_0;
 
         if self.super_blocks.len() > (super_block + 1)
             && self.super_blocks[super_block + 1].zeros <= rank
@@ -32,10 +32,10 @@ impl super::RsVec {
 
         rank -= self.super_blocks[super_block].zeros;
 
-        let mut block_index = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
+        let mut block_index = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize;
         self.search_block0(rank, &mut block_index);
 
-        rank -= self.blocks[block_index].zeros as usize;
+        rank -= self.blocks[block_index].zeros as u64;
 
         self.search_word_in_block0(rank, block_index)
     }
@@ -56,10 +56,10 @@ impl super::RsVec {
         target_feature = "avx512bw",
     ))]
     #[inline(always)]
-    pub(super) fn search_block0(&self, rank: usize, block_index: &mut usize) {
+    pub(super) fn search_block0(&self, rank: u64, block_index: &mut usize) {
         use std::arch::x86_64::{_mm256_cmpgt_epu16_mask, _mm256_loadu_epi16, _mm256_set1_epi16};
 
-        if self.blocks.len() > *block_index + (SUPER_BLOCK_SIZE / BLOCK_SIZE) {
+        if self.blocks.len() > *block_index + (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize {
             debug_assert!(
                 SUPER_BLOCK_SIZE / BLOCK_SIZE == BLOCKS_PER_SUPERBLOCK,
                 "change unroll constant to {}",
@@ -93,25 +93,25 @@ impl super::RsVec {
         target_feature = "avx512bw",
     )))]
     #[inline(always)]
-    pub(super) fn search_block0(&self, rank: usize, block_index: &mut usize) {
+    pub(super) fn search_block0(&self, rank: u64, block_index: &mut usize) {
         self.search_block0_naive(rank, block_index);
     }
 
     #[inline(always)]
-    fn search_block0_naive(&self, rank: usize, block_index: &mut usize) {
+    fn search_block0_naive(&self, rank: u64, block_index: &mut usize) {
         // full binary search for block that contains the rank, manually loop-unrolled, because
         // LLVM doesn't do it for us, but it gains just under 20% performance
 
         // this code relies on the fact that BLOCKS_PER_SUPERBLOCK blocks are in one superblock
         debug_assert!(
-            SUPER_BLOCK_SIZE / BLOCK_SIZE == BLOCKS_PER_SUPERBLOCK,
+            (SUPER_BLOCK_SIZE / BLOCK_SIZE) == BLOCKS_PER_SUPERBLOCK,
             "change unroll constant to {}",
             64 - (SUPER_BLOCK_SIZE / BLOCK_SIZE).leading_zeros() - 1
         );
         unroll!(4,
-            |boundary = { (SUPER_BLOCK_SIZE / BLOCK_SIZE) / 2}|
+            |boundary = { (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize / 2}|
                 // do not use select_unpredictable here, it degrades performance
-                if self.blocks.len() > *block_index + boundary && rank >= self.blocks[*block_index + boundary].zeros as usize {
+                if self.blocks.len() > *block_index + boundary && rank >= self.blocks[*block_index + boundary].zeros as u64 {
                     *block_index += boundary;
                 },
             boundary /= 2);
@@ -126,7 +126,7 @@ impl super::RsVec {
     /// * `block_index` - the index of the block to search in, this is the block in the blocks
     ///   vector that contains the rank
     #[inline(always)]
-    pub(super) fn search_word_in_block0(&self, mut rank: usize, block_index: usize) -> usize {
+    pub(super) fn search_word_in_block0(&self, mut rank: u64, block_index: usize) -> u64 {
         // linear search for word that contains the rank. Binary search is not possible here,
         // because we don't have accumulated popcounts for the words. We use pdep to find the
         // position of the rank-th zero bit in the word, if the word contains enough zeros, otherwise
@@ -134,24 +134,24 @@ impl super::RsVec {
         let mut index_counter = 0;
         debug_assert!(BLOCK_SIZE / WORD_SIZE == 8, "change unroll constant");
         unroll!(7, |n = {0}| {
-                    let word = self.data[block_index * BLOCK_SIZE / WORD_SIZE + n];
-                    if (word.count_zeros() as usize) <= rank {
-                        rank -= word.count_zeros() as usize;
+                    let word = self.data[block_index * (BLOCK_SIZE / WORD_SIZE) as usize + n];
+                    if (word.count_zeros() as u64) <= rank {
+                        rank -= word.count_zeros() as u64;
                         index_counter += WORD_SIZE;
                     } else {
-                        return block_index * BLOCK_SIZE
+                        return block_index as u64 * BLOCK_SIZE
                             + index_counter
-                            + (1 << rank).pdep(!word).trailing_zeros() as usize;
+                            + (1 << rank).pdep(!word).trailing_zeros() as u64;
                     }
                 }, n += 1);
 
         // the last word must contain the rank-th zero bit, otherwise the rank is outside the
         // block, and thus outside the bitvector
-        block_index * BLOCK_SIZE
+        block_index as u64 * BLOCK_SIZE
             + index_counter
             + (1 << rank)
-                .pdep(!self.data[block_index * BLOCK_SIZE / WORD_SIZE + 7])
-                .trailing_zeros() as usize
+                .pdep(!self.data[block_index * (BLOCK_SIZE / WORD_SIZE) as usize + 7])
+                .trailing_zeros() as u64
     }
 
     /// Search for the superblock that contains the rank.
@@ -162,8 +162,9 @@ impl super::RsVec {
     ///   superblock in the ``select_blocks`` vector that contains the rank
     /// * `rank` - the rank to search for
     #[inline(always)]
-    pub(super) fn search_super_block0(&self, mut super_block: usize, rank: usize) -> usize {
-        let mut upper_bound = self.select_blocks[rank / SELECT_BLOCK_SIZE + 1].index_0;
+    #[allow(clippy::cast_possible_truncation)] // safe due to the division
+    pub(super) fn search_super_block0(&self, mut super_block: usize, rank: u64) -> usize {
+        let mut upper_bound = self.select_blocks[(rank / SELECT_BLOCK_SIZE + 1) as usize].index_0;
 
         while upper_bound - super_block > 8 {
             let middle = super_block + ((upper_bound - super_block) >> 1);
@@ -192,31 +193,31 @@ impl super::RsVec {
     /// If the rank is larger than the number of 1-bits in the bit-vector, the vector length is returned.
     #[must_use]
     #[allow(clippy::assertions_on_constants)]
-    pub fn select1(&self, mut rank: usize) -> usize {
+    pub fn select1(&self, mut rank: u64) -> u64 {
         if rank >= self.rank1 {
             return self.len;
         }
 
-        let mut super_block =
-            self.select_blocks[rank / crate::bit_vec::fast_rs_vec::SELECT_BLOCK_SIZE].index_1;
+        let mut super_block = self.select_blocks[(rank / SELECT_BLOCK_SIZE) as usize].index_1;
 
         if self.super_blocks.len() > (super_block + 1)
-            && ((super_block + 1) * SUPER_BLOCK_SIZE - self.super_blocks[super_block + 1].zeros)
+            && ((super_block + 1) as u64 * SUPER_BLOCK_SIZE
+                - self.super_blocks[super_block + 1].zeros)
                 <= rank
         {
             super_block = self.search_super_block1(super_block, rank);
         }
 
-        rank -= (super_block) * SUPER_BLOCK_SIZE - self.super_blocks[super_block].zeros;
+        rank -= super_block as u64 * SUPER_BLOCK_SIZE - self.super_blocks[super_block].zeros;
 
         // full binary search for block that contains the rank, manually loop-unrolled, because
         // LLVM doesn't do it for us, but it gains just under 20% performance
-        let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE);
+        let block_at_super_block = super_block * (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize;
         let mut block_index = block_at_super_block;
         self.search_block1(rank, block_at_super_block, &mut block_index);
 
-        rank -= (block_index - block_at_super_block) * BLOCK_SIZE
-            - self.blocks[block_index].zeros as usize;
+        rank -= (block_index - block_at_super_block) as u64 * BLOCK_SIZE
+            - self.blocks[block_index].zeros as u64;
 
         self.search_word_in_block1(rank, block_index)
     }
@@ -240,7 +241,7 @@ impl super::RsVec {
     #[inline(always)]
     pub(super) fn search_block1(
         &self,
-        rank: usize,
+        rank: u64,
         block_at_super_block: usize,
         block_index: &mut usize,
     ) {
@@ -249,7 +250,7 @@ impl super::RsVec {
             _mm256_sub_epi16,
         };
 
-        if self.blocks.len() > *block_index + BLOCKS_PER_SUPERBLOCK {
+        if self.blocks.len() > *block_index + BLOCKS_PER_SUPERBLOCK as usize {
             debug_assert!(
                 SUPER_BLOCK_SIZE / BLOCK_SIZE == BLOCKS_PER_SUPERBLOCK,
                 "change unroll constant to {}",
@@ -257,6 +258,7 @@ impl super::RsVec {
             );
 
             unsafe {
+                #[allow(clippy::cast_possible_truncation)] // false positive because constants
                 let bit_nums = _mm256_set_epi16(
                     (15 * BLOCK_SIZE) as i16,
                     (14 * BLOCK_SIZE) as i16,
@@ -273,7 +275,7 @@ impl super::RsVec {
                     (3 * BLOCK_SIZE) as i16,
                     (2 * BLOCK_SIZE) as i16,
                     (1 * BLOCK_SIZE) as i16,
-                    (0 * BLOCK_SIZE) as i16,
+                    0i16,
                 );
 
                 let blocks = _mm256_loadu_epi16(self.blocks[*block_index..].as_ptr() as *const i16);
@@ -307,7 +309,7 @@ impl super::RsVec {
     #[inline(always)]
     pub(super) fn search_block1(
         &self,
-        rank: usize,
+        rank: u64,
         block_at_super_block: usize,
         block_index: &mut usize,
     ) {
@@ -315,25 +317,20 @@ impl super::RsVec {
     }
 
     #[inline(always)]
-    fn search_block1_naive(
-        &self,
-        rank: usize,
-        block_at_super_block: usize,
-        block_index: &mut usize,
-    ) {
+    fn search_block1_naive(&self, rank: u64, block_at_super_block: usize, block_index: &mut usize) {
         // full binary search for block that contains the rank, manually loop-unrolled, because
         // LLVM doesn't do it for us, but it gains just under 20% performance
 
         // this code relies on the fact that BLOCKS_PER_SUPERBLOCK blocks are in one superblock
         debug_assert!(
-            SUPER_BLOCK_SIZE / BLOCK_SIZE == BLOCKS_PER_SUPERBLOCK,
+            (SUPER_BLOCK_SIZE / BLOCK_SIZE) == BLOCKS_PER_SUPERBLOCK,
             "change unroll constant to {}",
             64 - (SUPER_BLOCK_SIZE / BLOCK_SIZE).leading_zeros() - 1
         );
         unroll!(4,
-            |boundary = { (SUPER_BLOCK_SIZE / BLOCK_SIZE) / 2}|
+            |boundary = { (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize / 2}|
                 // do not use select_unpredictable here, it degrades performance
-                if self.blocks.len() > *block_index + boundary && rank >= (*block_index + boundary - block_at_super_block) * BLOCK_SIZE - self.blocks[*block_index + boundary].zeros as usize {
+                if self.blocks.len() > *block_index + boundary && rank >= (*block_index + boundary - block_at_super_block) as u64 * BLOCK_SIZE - self.blocks[*block_index + boundary].zeros as u64 {
                     *block_index += boundary;
                 },
             boundary /= 2);
@@ -348,7 +345,7 @@ impl super::RsVec {
     /// * `block_index` - the index of the block to search in, this is the block in the blocks
     ///   vector that contains the rank
     #[inline(always)]
-    pub(super) fn search_word_in_block1(&self, mut rank: usize, block_index: usize) -> usize {
+    pub(super) fn search_word_in_block1(&self, mut rank: u64, block_index: usize) -> u64 {
         // linear search for word that contains the rank. Binary search is not possible here,
         // because we don't have accumulated popcounts for the words. We use pdep to find the
         // position of the rank-th zero bit in the word, if the word contains enough zeros, otherwise
@@ -356,24 +353,24 @@ impl super::RsVec {
         let mut index_counter = 0;
         debug_assert!(BLOCK_SIZE / WORD_SIZE == 8, "change unroll constant");
         unroll!(7, |n = {0}| {
-            let word = self.data[block_index * BLOCK_SIZE / WORD_SIZE + n];
-            if (word.count_ones() as usize) <= rank {
-                rank -= word.count_ones() as usize;
+            let word = self.data[block_index * (BLOCK_SIZE / WORD_SIZE) as usize + n];
+            if (word.count_ones() as u64) <= rank {
+                rank -= word.count_ones() as u64;
                 index_counter += WORD_SIZE;
             } else {
-                return block_index * BLOCK_SIZE
+                return block_index as u64 * BLOCK_SIZE
                     + index_counter
-                    + (1 << rank).pdep(word).trailing_zeros() as usize;
+                    + (1 << rank).pdep(word).trailing_zeros() as u64;
             }
         }, n += 1);
 
         // the last word must contain the rank-th zero bit, otherwise the rank is outside of the
         // block, and thus outside of the bitvector
-        block_index * BLOCK_SIZE
+        block_index as u64 * BLOCK_SIZE
             + index_counter
             + (1 << rank)
-                .pdep(self.data[block_index * BLOCK_SIZE / WORD_SIZE + 7])
-                .trailing_zeros() as usize
+                .pdep(self.data[block_index * (BLOCK_SIZE / WORD_SIZE) as usize + 7])
+                .trailing_zeros() as u64
     }
 
     /// Search for the superblock that contains the rank.
@@ -384,14 +381,15 @@ impl super::RsVec {
     ///   superblock in the ``select_blocks`` vector that contains the rank
     /// * `rank` - the rank to search for
     #[inline(always)]
-    pub(super) fn search_super_block1(&self, mut super_block: usize, rank: usize) -> usize {
-        let mut upper_bound = self.select_blocks[rank / SELECT_BLOCK_SIZE + 1].index_1;
+    #[allow(clippy::cast_possible_truncation)] // safe due to the division
+    pub(super) fn search_super_block1(&self, mut super_block: usize, rank: u64) -> usize {
+        let mut upper_bound = self.select_blocks[(rank / SELECT_BLOCK_SIZE + 1) as usize].index_1;
 
         // binary search for superblock that contains the rank
         while upper_bound - super_block > 8 {
             let middle = super_block + ((upper_bound - super_block) >> 1);
             // using select_unpredictable does nothing here, likely because the search isn't hot
-            if ((middle + 1) * SUPER_BLOCK_SIZE - self.super_blocks[middle].zeros) <= rank {
+            if ((middle + 1) as u64 * SUPER_BLOCK_SIZE - self.super_blocks[middle].zeros) <= rank {
                 super_block = middle;
             } else {
                 upper_bound = middle;
@@ -399,7 +397,8 @@ impl super::RsVec {
         }
         // linear search for superblock that contains the rank
         while self.super_blocks.len() > (super_block + 1)
-            && ((super_block + 1) * SUPER_BLOCK_SIZE - self.super_blocks[super_block + 1].zeros)
+            && ((super_block + 1) as u64 * SUPER_BLOCK_SIZE
+                - self.super_blocks[super_block + 1].zeros)
                 <= rank
         {
             super_block += 1;
diff --git a/src/bit_vec/fast_rs_vec/tests.rs b/src/bit_vec/rs/tests.rs
similarity index 97%
rename from src/bit_vec/fast_rs_vec/tests.rs
rename to src/bit_vec/rs/tests.rs
index 1858248..c8537b2 100644
--- a/src/bit_vec/fast_rs_vec/tests.rs
+++ b/src/bit_vec/rs/tests.rs
@@ -23,7 +23,7 @@ fn test_random_data_rank() {
         6, 7,
     ]);
     let sample = Uniform::new(0, 2);
-    static LENGTH: usize = 4 * SUPER_BLOCK_SIZE;
+    static LENGTH: u64 = 4 * SUPER_BLOCK_SIZE;
 
     for _ in 0..LENGTH {
         bv.append_bit(sample.sample(&mut rng));
@@ -42,17 +42,17 @@ fn test_random_data_rank() {
         let mut expected_rank1 = 0;
         let mut expected_rank0 = 0;
 
-        let data_index = rnd_index / WORD_SIZE;
+        let data_index = (rnd_index / WORD_SIZE) as usize;
         let bit_index = rnd_index % WORD_SIZE;
 
         for v in data.iter().take(data_index) {
-            expected_rank1 += v.count_ones() as usize;
-            expected_rank0 += v.count_zeros() as usize;
+            expected_rank1 += v.count_ones() as u64;
+            expected_rank0 += v.count_zeros() as u64;
         }
 
         if bit_index > 0 {
-            expected_rank1 += (data[data_index] & ((1 << bit_index) - 1)).count_ones() as usize;
-            expected_rank0 += (!data[data_index] & ((1 << bit_index) - 1)).count_ones() as usize;
+            expected_rank1 += (data[data_index] & ((1 << bit_index) - 1)).count_ones() as u64;
+            expected_rank0 += (!data[data_index] & ((1 << bit_index) - 1)).count_ones() as u64;
         }
 
         assert_eq!(actual_rank1, expected_rank1);
@@ -205,13 +205,13 @@ fn test_only_ones_select() {
 
 #[test]
 fn random_data_select0() {
+    static LENGTH: u64 = 4 * SUPER_BLOCK_SIZE;
     let mut bv = BitVec::with_capacity(LENGTH);
     let mut rng = StdRng::from_seed([
         0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
         6, 7,
     ]);
     let sample = Uniform::new(0, 2);
-    static LENGTH: usize = 4 * SUPER_BLOCK_SIZE;
 
     for _ in 0..LENGTH {
         bv.append_bit_u8(sample.sample(&mut rng) as u8);
@@ -231,7 +231,7 @@ fn random_data_select0() {
 
         let mut index = 0;
         loop {
-            let zeros = data[index].count_zeros() as usize;
+            let zeros = data[index].count_zeros() as u64;
             if rank_counter + zeros > rnd_rank0 {
                 break;
             } else {
@@ -260,13 +260,13 @@ fn random_data_select0() {
 
 #[test]
 fn random_data_select1() {
+    static LENGTH: u64 = 4 * SUPER_BLOCK_SIZE;
     let mut bv = BitVec::with_capacity(LENGTH);
     let mut rng = StdRng::from_seed([
         0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
         6, 7,
     ]);
     let sample = Uniform::new(0, 2);
-    static LENGTH: usize = 4 * SUPER_BLOCK_SIZE;
 
     for _ in 0..LENGTH {
         bv.append_bit_u8(sample.sample(&mut rng) as u8);
@@ -286,7 +286,7 @@ fn random_data_select1() {
 
         let mut index = 0;
         loop {
-            let ones = data[index].count_ones() as usize;
+            let ones = data[index].count_ones() as u64;
             if rank_counter + ones > rnd_rank1 {
                 break;
             } else {
@@ -1211,8 +1211,8 @@ fn test_random_data_iter_both_ends() {
                 }
                 let bv = RsVec::from_bit_vec(bv);
 
-                let mut zeros = Vec::with_capacity(bv.rank0);
-                let mut ones = Vec::with_capacity(bv.rank1);
+                let mut zeros = Vec::with_capacity(bv.rank0 as usize);
+                let mut ones = Vec::with_capacity(bv.rank1 as usize);
 
                 let mut iter0 = bv.iter0();
                 let mut iter1 = bv.iter1();
@@ -1226,7 +1226,7 @@ fn test_random_data_iter_both_ends() {
                 }
                 zeros.sort();
                 zeros.dedup();
-                assert_eq!(zeros.len(), bv.rank0);
+                assert_eq!(zeros.len() as u64, bv.rank0);
 
                 for _ in 0..bv.rank1 {
                     ones.push(if sample.sample(&mut rng) < 50 {
@@ -1237,7 +1237,7 @@ fn test_random_data_iter_both_ends() {
                 }
                 ones.sort();
                 ones.dedup();
-                assert_eq!(ones.len(), bv.rank1);
+                assert_eq!(ones.len() as u64, bv.rank1);
 
                 for idx in ones {
                     assert_eq!(bv.get(idx), Some(1), "bit {} is not 1", idx);
@@ -1254,7 +1254,7 @@ fn test_random_data_iter_both_ends() {
 // test a randomly generated bit vector for correct values in blocks
 #[test]
 fn test_block_layout() {
-    static LENGTH: usize = 4 * SUPER_BLOCK_SIZE;
+    static LENGTH: u64 = 4 * SUPER_BLOCK_SIZE;
     let mut bv = BitVec::with_capacity(LENGTH);
     let mut rng = StdRng::from_seed([
         0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
@@ -1271,7 +1271,7 @@ fn test_block_layout() {
 
     let mut zero_counter = 0u32;
     for (block_index, block) in bv.blocks.iter().enumerate() {
-        if block_index % (SUPER_BLOCK_SIZE / BLOCK_SIZE) == 0 {
+        if block_index % (SUPER_BLOCK_SIZE / BLOCK_SIZE) as usize == 0 {
             zero_counter = 0;
         }
         assert_eq!(
@@ -1281,9 +1281,9 @@ fn test_block_layout() {
             block_index,
             bv.blocks.len()
         );
-        for word in bv.data[block_index * BLOCK_SIZE / WORD_SIZE..]
+        for word in bv.data[block_index * (BLOCK_SIZE / WORD_SIZE) as usize..]
             .iter()
-            .take(BLOCK_SIZE / WORD_SIZE)
+            .take((BLOCK_SIZE / WORD_SIZE) as usize)
         {
             zero_counter += word.count_zeros();
         }
@@ -1293,7 +1293,7 @@ fn test_block_layout() {
 // Github issue https://github.com/Cydhra/vers/issues/6 regression test
 #[test]
 fn test_iter1_regression_i6() {
-    static LENGTH: usize = 4 * SUPER_BLOCK_SIZE;
+    static LENGTH: u64 = 4 * SUPER_BLOCK_SIZE;
     let mut bv = BitVec::with_capacity(LENGTH);
     let mut rng = StdRng::from_seed([
         0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5,
@@ -1319,7 +1319,7 @@ fn test_iter1_regression_i6() {
 
     let mut all_bits: Vec<_> = bv.iter0().chain(bv.iter1()).collect();
     all_bits.sort();
-    assert_eq!(all_bits.len(), LENGTH);
+    assert_eq!(all_bits.len() as u64, LENGTH);
 }
 
 // Github issue https://github.com/Cydhra/vers/issues/8 regression test
diff --git a/src/bit_vec/sparse.rs b/src/bit_vec/sparse.rs
index bc0dbe5..b422489 100644
--- a/src/bit_vec/sparse.rs
+++ b/src/bit_vec/sparse.rs
@@ -2,7 +2,7 @@
 //! The vector requires `O(n log u/n) + 2n + o(n)` bits of space, where `n` is the number of bits in the vector
 //! and `u` is the number of 1-bits.
 //! The vector is constructed from a sorted list of indices of 1-bits, or from an existing
-//! [`BitVec`](crate::BitVec).
+//! [`BitVec`].
 
 use crate::{BitVec, EliasFanoVec};
 
@@ -15,9 +15,9 @@ use crate::{BitVec, EliasFanoVec};
 ///
 /// # Examples
 /// ```
-/// use vers_vecs::SparseRSVec;
+/// use vers_vecs::SparseRsVec;
 ///
-/// let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12);
+/// let sparse = SparseRsVec::new(&[1, 3, 5, 7, 9], 12);
 /// assert_eq!(sparse.get(5), Some(1));
 /// assert_eq!(sparse.get(11), Some(0));
 /// assert_eq!(sparse.get(12), None);
@@ -28,14 +28,14 @@ use crate::{BitVec, EliasFanoVec};
 ///
 /// It cn also be constructed from a `BitVec` directly:
 /// ```
-/// use vers_vecs::SparseRSVec;
+/// use vers_vecs::SparseRsVec;
 /// use vers_vecs::BitVec;
 ///
 /// let mut bv = BitVec::from_zeros(12);
 /// bv.flip_bit(6);
 /// bv.flip_bit(7);
 ///
-/// let sparse = SparseRSVec::from_bitvec(&bv);
+/// let sparse = SparseRsVec::from_bitvec(&bv);
 /// assert_eq!(sparse.rank1(5), 0);
 /// assert_eq!(sparse.select1(0), 6);
 /// ```
@@ -44,12 +44,12 @@ use crate::{BitVec, EliasFanoVec};
 /// [`from_bitvec_inverted`]: #method.from_bitvec_inverted
 #[derive(Debug, Clone)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
-pub struct SparseRSVec {
+pub struct SparseRsVec {
     vec: EliasFanoVec,
     len: u64,
 }
 
-impl SparseRSVec {
+impl SparseRsVec {
     /// Creates a new `SparseRSVec` from a sequence of set bits represented as indices.
     /// The input must be sorted in ascending order and free of duplicates.
     ///
@@ -80,7 +80,7 @@ impl SparseRSVec {
     /// - `input`: The input `BitVec` to compress.
     #[must_use]
     pub fn from_bitvec(input: &BitVec) -> Self {
-        let len = input.len() as u64;
+        let len = input.len();
         Self::new(
             input
                 .iter()
@@ -109,7 +109,7 @@ impl SparseRSVec {
     ///
     /// # Example
     /// ```
-    /// use vers_vecs::SparseRSVec;
+    /// use vers_vecs::SparseRsVec;
     /// use vers_vecs::BitVec;
     ///
     /// let mut bv = BitVec::from_ones(12);
@@ -117,7 +117,7 @@ impl SparseRSVec {
     /// bv.flip_bit(6);
     /// bv.flip_bit(7);
     ///
-    /// let sparse = SparseRSVec::from_bitvec_inverted(&bv);
+    /// let sparse = SparseRsVec::from_bitvec_inverted(&bv);
     /// // now select1 gives the position of 0-bits
     /// assert_eq!(sparse.select1(1), 7);
     /// ```
@@ -127,7 +127,7 @@ impl SparseRSVec {
     /// [`get`]: #method.get
     #[must_use]
     pub fn from_bitvec_inverted(input: &BitVec) -> Self {
-        let len = input.len() as u64;
+        let len = input.len();
         Self::new(
             input
                 .iter()
@@ -170,6 +170,8 @@ impl SparseRSVec {
     /// # Panics
     /// If `i` is out of bounds the function might panic or produce incorrect results.
     /// Use [`get`] for a checked version.
+    ///
+    /// [`get`]: Self::get
     #[must_use]
     pub fn get_unchecked(&self, i: u64) -> u64 {
         self.is_set_unchecked(i).into()
@@ -188,7 +190,7 @@ impl SparseRSVec {
     ///
     /// If the rank is larger than the number of sparse bits in the vector, the vector length is returned.
     #[must_use]
-    pub fn select1(&self, i: usize) -> u64 {
+    pub fn select1(&self, i: u64) -> u64 {
         self.vec.get(i).unwrap_or(self.len)
     }
 
@@ -238,13 +240,13 @@ impl SparseRSVec {
     }
 }
 
-impl From<BitVec> for SparseRSVec {
+impl From<BitVec> for SparseRsVec {
     fn from(input: BitVec) -> Self {
         Self::from_bitvec_inverted(&input)
     }
 }
 
-impl<'a> From<&'a BitVec> for SparseRSVec {
+impl<'a> From<&'a BitVec> for SparseRsVec {
     fn from(input: &'a BitVec) -> Self {
         Self::from_bitvec_inverted(input)
     }
@@ -252,14 +254,14 @@ impl<'a> From<&'a BitVec> for SparseRSVec {
 
 #[cfg(test)]
 mod tests {
-    use super::SparseRSVec;
+    use super::SparseRsVec;
     use crate::BitVec;
     use rand::prelude::StdRng;
     use rand::{Rng, SeedableRng};
 
     #[test]
     fn test_sparse_rank() {
-        let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12);
+        let sparse = SparseRsVec::new(&[1, 3, 5, 7, 9], 12);
         assert_eq!(sparse.rank1(0), 0);
         assert_eq!(sparse.rank1(1), 0);
         assert_eq!(sparse.rank1(2), 1);
@@ -278,7 +280,7 @@ mod tests {
 
     #[test]
     fn test_sparse_select() {
-        let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12);
+        let sparse = SparseRsVec::new(&[1, 3, 5, 7, 9], 12);
         assert_eq!(sparse.select1(0), 1);
         assert_eq!(sparse.select1(1), 3);
         assert_eq!(sparse.select1(2), 5);
@@ -290,7 +292,7 @@ mod tests {
 
     #[test]
     fn test_sparse_rank0() {
-        let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12);
+        let sparse = SparseRsVec::new(&[1, 3, 5, 7, 9], 12);
         assert_eq!(sparse.rank0(0), 0);
         assert_eq!(sparse.rank0(1), 1);
         assert_eq!(sparse.rank0(2), 1);
@@ -309,7 +311,7 @@ mod tests {
 
     #[test]
     fn test_empty_sparse() {
-        let sparse = SparseRSVec::new(&[], 0);
+        let sparse = SparseRsVec::new(&[], 0);
         assert_eq!(sparse.rank1(0), 0);
         assert_eq!(sparse.rank1(1), 0);
         assert_eq!(sparse.rank1(999), 0);
@@ -325,7 +327,7 @@ mod tests {
 
     #[test]
     fn test_sparse_get() {
-        let sparse = SparseRSVec::new(&[1, 3, 5, 7, 9], 12);
+        let sparse = SparseRsVec::new(&[1, 3, 5, 7, 9], 12);
         assert_eq!(sparse.get(0), Some(0));
         assert_eq!(sparse.get(1), Some(1));
         assert_eq!(sparse.get(2), Some(0));
@@ -348,7 +350,7 @@ mod tests {
         bv.flip_bit(6);
         bv.flip_bit(7);
 
-        let sparse = SparseRSVec::from_bitvec(&bv);
+        let sparse = SparseRsVec::from_bitvec(&bv);
         assert_eq!(sparse.rank1(0), 0);
         assert_eq!(sparse.rank1(1), 1);
         assert_eq!(sparse.rank1(2), 2);
@@ -357,7 +359,7 @@ mod tests {
         assert_eq!(sparse.rank1(9), 7);
         assert_eq!(sparse.rank1(12), 10);
 
-        let sparse = SparseRSVec::from_bitvec_inverted(&bv);
+        let sparse = SparseRsVec::from_bitvec_inverted(&bv);
         assert_eq!(sparse.rank1(0), 0);
         assert_eq!(sparse.rank1(1), 0);
         assert_eq!(sparse.rank1(2), 0);
@@ -370,7 +372,7 @@ mod tests {
     #[test]
     fn test_large_block() {
         // test that the implementation works correctly if the search triggers a binary search
-        let sparse = SparseRSVec::new(
+        let sparse = SparseRsVec::new(
             &[
                 1, 100_000, 100_001, 100_002, 100_003, 100_004, 100_005, 100_006, 100_007, 100_008,
                 100_009, 100_010, 1_000_000,
@@ -383,7 +385,7 @@ mod tests {
 
     #[test]
     fn test_fuzzy() {
-        const L: usize = 100_000;
+        const L: u64 = 100_000;
         let mut bv = BitVec::from_zeros(L);
         let mut rng = StdRng::from_seed([0; 32]);
 
@@ -391,15 +393,15 @@ mod tests {
             bv.flip_bit(rng.gen_range(0..L));
         }
 
-        let sparse = SparseRSVec::from_bitvec(&bv);
+        let sparse = SparseRsVec::from_bitvec(&bv);
 
         let mut ones = 0;
         for i in 0..L {
-            assert_eq!(bv.get(i), sparse.get(i as u64));
-            assert_eq!(ones, sparse.rank1(i as u64));
-            assert_eq!(i as u64 - ones, sparse.rank0(i as u64));
+            assert_eq!(bv.get(i), sparse.get(i));
+            assert_eq!(ones, sparse.rank1(i));
+            assert_eq!(i - ones, sparse.rank0(i));
             if bv.get(i) == Some(1) {
-                assert_eq!(i, sparse.select1(ones as usize).try_into().unwrap());
+                assert_eq!(i, sparse.select1(ones).try_into().unwrap());
                 ones += 1;
             }
         }
@@ -416,7 +418,7 @@ mod tests {
         bv.append_bit(0);
         bv.drop_last(1);
 
-        let sparse = SparseRSVec::from_bitvec(&bv);
+        let sparse = SparseRsVec::from_bitvec(&bv);
         assert_eq!(sparse.len(), 2);
         assert_eq!(sparse.get(0), Some(1));
         assert_eq!(sparse.get(1), Some(0));
diff --git a/src/bit_vec/tests.rs b/src/bit_vec/tests.rs
index 0fa3f6d..345906c 100644
--- a/src/bit_vec/tests.rs
+++ b/src/bit_vec/tests.rs
@@ -475,7 +475,7 @@ fn test_apply_masks() {
 
 #[test]
 fn test_from_bits() {
-    let bv = BitVec::from_bits(&[1, 0, 1]);
+    let bv = BitVec::from_bits_u8(&[1, 0, 1]);
     assert_eq!(bv.len, 3);
     assert_eq!(bv.get_bits(0, 3), Some(0b101));
 
@@ -619,8 +619,8 @@ fn test_unpack() {
     let bv = BitVec::pack_sequence_u64(&sequence, 10);
 
     for (i, &val) in sequence.iter().enumerate() {
-        assert_eq!(bv.unpack_element(i, 10), Some(val));
-        assert_eq!(bv.unpack_element_unchecked(i, 10), val);
+        assert_eq!(bv.unpack_element(i as u64, 10), Some(val));
+        assert_eq!(bv.unpack_element_unchecked(i as u64, 10), val);
     }
 
     assert_eq!(bv.unpack_element(8, 10), None);
diff --git a/src/elias_fano/mod.rs b/src/ef/mod.rs
similarity index 88%
rename from src/elias_fano/mod.rs
rename to src/ef/mod.rs
index 75b009c..28aa4c6 100644
--- a/src/elias_fano/mod.rs
+++ b/src/ef/mod.rs
@@ -17,7 +17,7 @@ use std::cmp::max;
 /// friendly. But for large clusters this takes too long, so we switch to binary search.
 /// We use 4 because benchmarks suggested that this was the best trade-off between speed for average
 /// case and for worst case.
-const BIN_SEARCH_THRESHOLD: usize = 4;
+const BIN_SEARCH_THRESHOLD: u64 = 4;
 
 /// An Elias-Fano encoded vector of u64 values. The vector is immutable, which is exploited by
 /// limiting the word length of elements to the minimum required to represent all elements.
@@ -61,8 +61,8 @@ pub struct EliasFanoVec {
     lower_vec: BitVec,
     universe_zero: u64,
     universe_max: u64,
-    lower_len: usize,
-    len: usize,
+    lower_len: u64,
+    len: u64,
 }
 
 impl EliasFanoVec {
@@ -102,23 +102,23 @@ impl EliasFanoVec {
         let universe_zero = data[0];
         let universe_bound = data[data.len() - 1] - universe_zero;
 
-        let log_n = ((data.len() + 2) as f64).log2().ceil() as usize;
-        let bits_per_number = (max(universe_bound, 2) as f64).log2().ceil() as usize;
-        let bits_for_upper_values = (max(data.len(), 2) as f64).log2().ceil() as usize;
+        let log_n = ((data.len() + 2) as f64).log2().ceil() as u64;
+        let bits_per_number = (max(universe_bound, 2) as f64).log2().ceil() as u64;
+        let bits_for_upper_values = (max(data.len(), 2) as f64).log2().ceil() as u64;
         let lower_width = max(bits_per_number, log_n) - bits_for_upper_values;
         assert!(lower_width < 64);
 
         let mut upper_vec =
-            BitVec::from_zeros(2 + data.len() + (universe_bound >> lower_width) as usize);
-        let mut lower_vec = BitVec::with_capacity(data.len() * lower_width);
+            BitVec::from_zeros(2 + data.len() as u64 + (universe_bound >> lower_width));
+        let mut lower_vec = BitVec::with_capacity(data.len() as u64 * lower_width);
 
         for (i, &word) in data.iter().enumerate() {
             let word = word - universe_zero;
 
-            let upper = (word >> lower_width) as usize;
+            let upper = word >> lower_width;
             let lower = word & ((1 << lower_width) - 1);
 
-            upper_vec.flip_bit_unchecked(upper + i + 1);
+            upper_vec.flip_bit_unchecked(upper + i as u64 + 1);
             lower_vec.append_bits_unchecked(lower, lower_width);
         }
 
@@ -128,13 +128,13 @@ impl EliasFanoVec {
             universe_zero,
             universe_max: data[data.len() - 1],
             lower_len: lower_width,
-            len: data.len(),
+            len: data.len() as u64,
         }
     }
 
     /// Returns the number of elements in the vector.
     #[must_use]
-    pub fn len(&self) -> usize {
+    pub fn len(&self) -> u64 {
         self.len
     }
 
@@ -147,7 +147,7 @@ impl EliasFanoVec {
     /// Returns the element at the given index, or `None` if the index exceeds the length of the
     /// vector.
     #[must_use]
-    pub fn get(&self, index: usize) -> Option<u64> {
+    pub fn get(&self, index: u64) -> Option<u64> {
         if index >= self.len() {
             return None;
         }
@@ -164,8 +164,10 @@ impl EliasFanoVec {
     ///
     /// Note, that select in bit-vectors returns an index, while select in Elias-Fano returns the
     /// element at the given rank.
+    ///
+    /// [`get`]: Self::get
     #[must_use]
-    pub fn select(&self, rank: usize) -> Option<u64> {
+    pub fn select(&self, rank: u64) -> Option<u64> {
         self.get(rank)
     }
 
@@ -178,12 +180,12 @@ impl EliasFanoVec {
     /// [`get`]: EliasFanoVec::get
     #[must_use]
     #[allow(clippy::cast_possible_truncation)]
-    pub fn get_unchecked(&self, index: usize) -> u64 {
+    pub fn get_unchecked(&self, index: u64) -> u64 {
         let upper = self.upper_vec.select1(index) - index - 1;
         let lower = self
             .lower_vec
             .get_bits_unchecked(index * self.lower_len, self.lower_len);
-        ((upper << self.lower_len) as u64 | lower) + self.universe_zero
+        ((upper << self.lower_len) | lower) + self.universe_zero
     }
 
     /// Returns the largest element that is smaller than or equal to the query.
@@ -214,15 +216,15 @@ impl EliasFanoVec {
     #[allow(clippy::cast_possible_truncation)] // we will fix this in a breaking update
     fn search_element_in_block<const INDEX: bool, const UPWARD: bool>(
         &self,
-        start_index_upper: usize,
-        start_index_lower: usize,
+        start_index_upper: u64,
+        start_index_lower: u64,
         query: u64,
         query_upper: u64,
         query_lower: u64,
         query_masked_upper: u64,
     ) -> u64 {
         // the direction in which we search for the element, dependent on the UPWARD flag
-        let direction: isize = if UPWARD { 1 } else { -1 };
+        let direction: i64 = if UPWARD { 1 } else { -1 };
 
         // the function to check if the current candidate no longer fulfills the query
         // criterion
@@ -246,12 +248,12 @@ impl EliasFanoVec {
         // last element.
         if self
             .upper_vec
-            .get_unchecked((start_index_upper as isize + direction) as usize)
+            .get_unchecked((start_index_upper as i64 + direction) as u64)
             > 0
         {
             // get the first value from the lower vector that corresponds to the query prefix
             let mut lower_candidate = self.lower_vec.get_bits_unchecked(
-                (start_index_lower as isize) as usize * self.lower_len,
+                (start_index_lower as i64) as u64 * self.lower_len,
                 self.lower_len,
             );
 
@@ -263,11 +265,11 @@ impl EliasFanoVec {
                 let mut cursor = direction;
                 while self
                     .upper_vec
-                    .get_unchecked((start_index_upper as isize + cursor + direction) as usize)
+                    .get_unchecked((start_index_upper as i64 + cursor + direction) as u64)
                     > 0
                 {
                     let next_candidate = self.lower_vec.get_bits_unchecked(
-                        (start_index_lower as isize + cursor) as usize * self.lower_len,
+                        (start_index_lower as i64 + cursor) as u64 * self.lower_len,
                         self.lower_len,
                     );
 
@@ -277,13 +279,13 @@ impl EliasFanoVec {
                         || (!UPWARD && next_candidate < query_lower)
                     {
                         return if INDEX {
-                            start_index_lower as u64 + cursor as u64
+                            start_index_lower + cursor as u64
                         } else {
                             (query_masked_upper | lower_candidate) + self.universe_zero
                         };
                     } else if next_candidate == query_lower {
                         return if INDEX {
-                            start_index_lower as u64 + cursor as u64
+                            start_index_lower + cursor as u64
                         } else {
                             (query_masked_upper | next_candidate) + self.universe_zero
                         };
@@ -297,23 +299,20 @@ impl EliasFanoVec {
                     #[allow(clippy::comparison_chain)] // readability
                     if cursor.unsigned_abs() == BIN_SEARCH_THRESHOLD {
                         let block_end = if UPWARD {
-                            self.upper_vec.select0((query_upper as isize + 1) as usize)
-                                - query_upper as usize
+                            self.upper_vec.select0((query_upper as i64 + 1) as u64)
+                                - query_upper
                                 - 2
                         } else {
-                            self.upper_vec.select0((query_upper as isize) as usize)
-                                - query_upper as usize
+                            self.upper_vec.select0((query_upper as i64) as u64) - query_upper
                         };
 
                         let mut upper_bound;
                         let mut lower_bound;
                         if UPWARD {
                             upper_bound = block_end;
-                            lower_bound =
-                                (start_index_lower as isize + cursor - direction) as usize;
+                            lower_bound = (start_index_lower as i64 + cursor - direction) as u64;
                         } else {
-                            upper_bound =
-                                (start_index_lower as isize + cursor - direction) as usize;
+                            upper_bound = (start_index_lower as i64 + cursor - direction) as u64;
                             lower_bound = block_end;
                         }
 
@@ -332,10 +331,10 @@ impl EliasFanoVec {
                                 upper_bound = middle;
                             } else if middle_candidate == query_lower {
                                 return if INDEX {
-                                    cursor = middle as isize;
+                                    cursor = middle as i64;
                                     // while the element at cursor - 1 is equal, reduce cursor
                                     while self.lower_vec.get_bits_unchecked(
-                                        (cursor - direction) as usize * self.lower_len,
+                                        (cursor - direction) as u64 * self.lower_len,
                                         self.lower_len,
                                     ) == query_lower
                                     {
@@ -362,7 +361,7 @@ impl EliasFanoVec {
                             || (!UPWARD && final_bound > block_end)
                         {
                             let check_candidate = self.lower_vec.get_bits_unchecked(
-                                (final_bound as isize + direction) as usize * self.lower_len,
+                                (final_bound as i64 + direction) as u64 * self.lower_len,
                                 self.lower_len,
                             );
 
@@ -371,7 +370,7 @@ impl EliasFanoVec {
                                     // if the element at lower_bound + 1 is smaller than the query, we include it
                                     // in the count, so we return lower_bound + 1 + 1, as all elements in the
                                     // 1-block are smaller than the query
-                                    (final_bound as isize + direction + 1) as u64
+                                    (final_bound as i64 + direction + 1) as u64
                                 } else {
                                     (query_masked_upper | check_candidate) + self.universe_zero
                                 };
@@ -380,7 +379,7 @@ impl EliasFanoVec {
 
                         // update the cursor because we use it for the final index calculation
                         if INDEX {
-                            cursor = final_bound as isize + direction;
+                            cursor = final_bound as i64 + direction;
                         }
                         break;
                     }
@@ -390,7 +389,7 @@ impl EliasFanoVec {
                     // the loop ended because the element at cursor has a larger upper index,
                     // so we return the previous element count
                     // (element at curser - 1, +1 because count is not 0 based)
-                    start_index_lower as u64 + cursor as u64
+                    start_index_lower + cursor as u64
                 } else {
                     (query_masked_upper | lower_candidate) + self.universe_zero
                 };
@@ -401,9 +400,9 @@ impl EliasFanoVec {
             // all elements in the 1-block are larger than the query,
             // so we return the last element count
             // (start_index_lower - 1, +1 because count is not 0 based)
-            start_index_lower as u64
+            start_index_lower
         } else {
-            self.get_unchecked((start_index_lower as isize - direction) as usize)
+            self.get_unchecked((start_index_lower as i64 - direction) as u64)
         }
     }
 
@@ -427,7 +426,7 @@ impl EliasFanoVec {
         let n = n - self.universe_zero;
 
         // split the query into the upper and lower part
-        let upper_query = (n >> self.lower_len) as usize;
+        let upper_query = n >> self.lower_len;
         let lower_query = n & ((1 << self.lower_len) - 1);
 
         // calculate the lower bound within the lower vector where our predecessor can be found. Since
@@ -439,13 +438,13 @@ impl EliasFanoVec {
         // calculate the upper part of the result. This only works if the next value in the upper
         // vector is set, otherwise the there is no value in the entire vector with this bit-prefix,
         // and we need to search the largest prefix smaller than the query.
-        let result_upper = (upper_query << self.lower_len) as u64;
+        let result_upper = upper_query << self.lower_len;
 
         self.search_element_in_block::<false, true>(
             lower_bound_upper_index,
             lower_bound_lower_index,
             n,
-            upper_query as u64,
+            upper_query,
             lower_query,
             result_upper,
         )
@@ -488,7 +487,7 @@ impl EliasFanoVec {
         let n = n - self.universe_zero;
 
         // split the query into the upper and lower part
-        let upper_query = (n >> self.lower_len) as usize;
+        let upper_query = n >> self.lower_len;
         let lower_query = n & ((1 << self.lower_len) - 1);
 
         // calculate the upper bound within the lower vector where our successor can be found. Since
@@ -500,13 +499,13 @@ impl EliasFanoVec {
         // calculate the upper part of the result. This only works if the next value in the upper
         // vector is set, otherwise the there is no value in the entire vector with this bit-prefix,
         // and we need to search the largest prefix smaller than the query.
-        let result_upper = (upper_query << self.lower_len) as u64;
+        let result_upper = upper_query << self.lower_len;
 
         self.search_element_in_block::<false, false>(
             upper_bound_upper_index,
             upper_bound_lower_index,
             n,
-            upper_query as u64,
+            upper_query,
             lower_query,
             result_upper,
         )
@@ -531,7 +530,7 @@ impl EliasFanoVec {
     /// assert_eq!(elias_fano_vec.delta(3), Some(80));
     /// ```
     #[must_use]
-    pub fn delta(&self, index: usize) -> Option<u64> {
+    pub fn delta(&self, index: u64) -> Option<u64> {
         if index >= self.len() {
             return None;
         }
@@ -549,7 +548,7 @@ impl EliasFanoVec {
             )
         } else {
             let query_upper_part = (upper_index - index - 1) << self.lower_len;
-            let query_number = query_upper_part as u64
+            let query_number = query_upper_part
                 | self
                     .lower_vec
                     .get_bits_unchecked(index * self.lower_len, self.lower_len);
@@ -561,7 +560,7 @@ impl EliasFanoVec {
                 let lower_element_upper_index = self.upper_vec.select1(index - 1);
                 let lower_element_upper = lower_element_upper_index - (index - 1) - 1;
 
-                let lower_elem = ((lower_element_upper as u64) << self.lower_len as u64)
+                let lower_elem = (lower_element_upper << self.lower_len)
                     | self
                         .lower_vec
                         .get_bits_unchecked((index - 1) * self.lower_len, self.lower_len);
@@ -572,10 +571,9 @@ impl EliasFanoVec {
 
     /// Return how many elements strictly smaller than the query element are present in the vector.
     #[must_use]
-    #[allow(clippy::cast_possible_truncation)] // we will fix this in a breaking update
     pub fn rank(&self, value: u64) -> u64 {
         if value > self.universe_max || self.is_empty() {
-            return self.len() as u64;
+            return self.len();
         }
 
         if value < self.universe_zero {
@@ -585,12 +583,12 @@ impl EliasFanoVec {
         let value = value - self.universe_zero;
         let upper = value >> self.lower_len;
         let lower = value & ((1 << self.lower_len) - 1);
-        let query_begin = self.upper_vec.select0(upper as usize);
-        let lower_index = query_begin as u64 - upper;
+        let query_begin = self.upper_vec.select0(upper);
+        let lower_index = query_begin - upper;
 
         self.search_element_in_block::<true, true>(
             query_begin,
-            lower_index as usize,
+            lower_index,
             value,
             upper,
             lower,
diff --git a/src/elias_fano/tests.rs b/src/ef/tests.rs
similarity index 98%
rename from src/elias_fano/tests.rs
rename to src/ef/tests.rs
index b7b0d42..6457006 100644
--- a/src/elias_fano/tests.rs
+++ b/src/ef/tests.rs
@@ -62,10 +62,10 @@ fn test_randomized_elias_fano() {
 
     let ef = EliasFanoVec::from_slice(&seq);
 
-    assert_eq!(ef.len(), seq.len());
+    assert_eq!(ef.len(), seq.len() as u64);
 
     for (i, &v) in seq.iter().enumerate() {
-        assert_eq!(ef.get_unchecked(i), v);
+        assert_eq!(ef.get_unchecked(i as u64), v);
     }
 
     for _ in 0..1000 {
@@ -110,7 +110,7 @@ fn test_clustered_ef() {
 
     let ef = EliasFanoVec::from_slice(&seq);
     for (i, &x) in seq.iter().enumerate() {
-        assert_eq!(ef.get_unchecked(i), x, "expected {:b}", x);
+        assert_eq!(ef.get_unchecked(i as u64), x, "expected {:b}", x);
         assert_eq!(ef.predecessor_unchecked(x), x);
         assert_eq!(ef.successor_unchecked(x), x);
     }
@@ -398,10 +398,10 @@ fn test_randomized_elias_fano_successor() {
 
     let ef = EliasFanoVec::from_slice(&seq);
 
-    assert_eq!(ef.len(), seq.len());
+    assert_eq!(ef.len(), seq.len() as u64);
 
     for (i, &v) in seq.iter().enumerate() {
-        assert_eq!(ef.get_unchecked(i), v);
+        assert_eq!(ef.get_unchecked(i as u64), v);
     }
 
     for _ in 0..1000 {
diff --git a/src/lib.rs b/src/lib.rs
index 2e1c297..29e9152 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,19 +1,9 @@
-#![cfg_attr(
-    all(
-        feature = "simd",
-        target_arch = "x86_64",
-        target_feature = "avx",
-        target_feature = "avx2",
-        target_feature = "avx512f",
-        target_feature = "avx512bw",
-    ),
-    feature(stdarch_x86_avx512)
-)]
 #![warn(missing_docs)]
-#![allow(clippy::module_name_repetitions)]
 #![allow(clippy::assertions_on_constants)] // for asserts warning about incompatible constant values
 #![allow(clippy::inline_always)] // we actually measure performance increases with most of these
-#![cfg_attr(docsrs, feature(doc_cfg), feature(doc_auto_cfg))] // for conditional compilation in docs
+#![allow(clippy::cast_lossless)] // it is often more readable to use `as u64` instead of `u64::from(..)`
+#![allow(clippy::needless_for_each)] // readability of one-liners
+#![cfg_attr(docsrs, feature(doc_cfg))] // for conditional compilation in docs
 
 //! This crate provides a collection of data structures supported by fast implementations of
 //! rank and select queries. The data structures are static, meaning that they cannot be modified
@@ -64,12 +54,12 @@
 //! - `bp_u16_lookup` (disabled by default): Uses a 16-bit lookup table for the balanced parenthesis
 //!   tree data structure. This is faster, but requires 128 KiB instead of 4 KiB.
 
-pub use bit_vec::fast_rs_vec::RsVec;
-pub use bit_vec::sparse::SparseRSVec;
+pub use bit_vec::rs::RsVec;
+pub use bit_vec::sparse::SparseRsVec;
 pub use bit_vec::BitVec;
-pub use elias_fano::EliasFanoVec;
-pub use rmq::binary_rmq::BinaryRmq;
-pub use rmq::fast_rmq::FastRmq;
+pub use ef::EliasFanoVec;
+pub use rmq::small::SmallRmq;
+pub use rmq::sparse::SparseRmq;
 pub use trees::bp::{BpBuilder, BpTree};
 pub use trees::{IsAncestor, LevelTree, SubtreeSize, Tree, TreeBuilder};
 pub use wavelet::WaveletMatrix;
@@ -77,7 +67,7 @@ pub use wavelet::WaveletMatrix;
 pub mod bit_vec;
 
 #[forbid(unsafe_code)]
-pub mod elias_fano;
+pub mod ef;
 
 #[forbid(unsafe_code)]
 pub mod rmq;
diff --git a/src/rmq/mod.rs b/src/rmq/mod.rs
index e7064ce..196c901 100644
--- a/src/rmq/mod.rs
+++ b/src/rmq/mod.rs
@@ -1,7 +1,7 @@
 //! Range minimum query data structures. These data structures allow for the calculation of the index of the
 //! minimum element in a range of a static array in constant-time. The implementations are located
-//! in the [`binary_rmq`] and [`fast_rmq`] modules.
+//! in the [`sparse`] and [`small`] modules.
 
-pub mod fast_rmq;
+pub mod small;
 
-pub mod binary_rmq;
+pub mod sparse;
diff --git a/src/rmq/fast_rmq/mod.rs b/src/rmq/small/mod.rs
similarity index 92%
rename from src/rmq/fast_rmq/mod.rs
rename to src/rmq/small/mod.rs
index c451ce3..246c9b4 100644
--- a/src/rmq/fast_rmq/mod.rs
+++ b/src/rmq/small/mod.rs
@@ -7,7 +7,7 @@ use std::cmp::min_by;
 use std::mem::size_of;
 use std::ops::{Bound, Deref, RangeBounds};
 
-use crate::rmq::binary_rmq::BinaryRmq;
+use crate::rmq::sparse::SparseRmq;
 use crate::util::pdep::Pdep;
 
 /// Size of the blocks the data is split into. One block is indexable with a u8, hence its size.
@@ -66,10 +66,10 @@ struct Block {
 ///
 /// # Example
 /// ```rust
-/// use vers_vecs::FastRmq;
+/// use vers_vecs::SmallRmq;
 ///
 /// let data = vec![4, 10, 3, 11, 2, 12];
-/// let rmq = FastRmq::from_vec(data);
+/// let rmq = SmallRmq::from_vec(data);
 ///
 /// assert_eq!(rmq.range_min(0, 1), 0);
 /// assert_eq!(rmq.range_min(0, 2), 2);
@@ -77,17 +77,17 @@ struct Block {
 /// ```
 #[derive(Clone, Debug)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
-pub struct FastRmq {
-    data: Vec<u64>,
-    block_minima: BinaryRmq,
-    block_min_indices: Vec<u8>,
-    blocks: Vec<Block>,
+pub struct SmallRmq {
+    data: Box<[u64]>,
+    block_minima: SparseRmq,
+    block_min_indices: Box<[u8]>,
+    blocks: Box<[Block]>,
 }
 
-impl FastRmq {
+impl SmallRmq {
     /// Creates a new range minimum query data structure from the given data. Creation time is
     /// O(n log n) and space overhead is O(n log n) with a fractional constant factor
-    /// (see [`FastRmq`])
+    /// (see [`SmallRmq`])
     ///
     /// # Panics
     /// This function will panic if the input is larger than 2^40 elements.
@@ -141,21 +141,21 @@ impl FastRmq {
         });
 
         Self {
-            data,
-            block_minima: BinaryRmq::from_vec(block_minima),
-            block_min_indices,
-            blocks,
+            data: data.into_boxed_slice(),
+            block_minima: SparseRmq::from_vec(block_minima),
+            block_min_indices: block_min_indices.into_boxed_slice(),
+            blocks: blocks.into_boxed_slice(),
         }
     }
 
-    /// Convenience function for [`FastRmq::range_min`] for using range operators.
+    /// Convenience function for [`SmallRmq::range_min`] for using range operators.
     /// The range is clamped to the length of the data structure, sso this function will not panic,
     /// unless called on an empty data structure, because that does not have a valid index.
     ///
     /// # Example
     /// ```rust
-    /// use vers_vecs::FastRmq;
-    /// let rmq = FastRmq::from_vec(vec![5, 4, 3, 2, 1]);
+    /// use vers_vecs::SmallRmq;
+    /// let rmq = SmallRmq::from_vec(vec![5, 4, 3, 2, 1]);
     /// assert_eq!(rmq.range_min_with_range(0..3), 2);
     /// assert_eq!(rmq.range_min_with_range(0..=3), 3);
     /// ```
@@ -290,15 +290,15 @@ impl FastRmq {
 /// Implements Deref to delegate to the underlying data structure. This allows the user to use
 /// indexing syntax on the RMQ data structure to access the underlying data, as well as iterators,
 /// etc.
-impl Deref for FastRmq {
-    type Target = Vec<u64>;
+impl Deref for SmallRmq {
+    type Target = Box<[u64]>;
 
     fn deref(&self) -> &Self::Target {
         &self.data
     }
 }
 
-impl From<Vec<u64>> for FastRmq {
+impl From<Vec<u64>> for SmallRmq {
     fn from(data: Vec<u64>) -> Self {
         Self::from_vec(data)
     }
@@ -309,8 +309,8 @@ impl From<Vec<u64>> for FastRmq {
 ///
 /// See [`FastRmq::from_vec`] for more information.
 ///
-/// [`FastRmq::from_vec`]: FastRmq::from_vec
-impl FromIterator<u64> for FastRmq {
+/// [`FastRmq::from_vec`]: SmallRmq::from_vec
+impl FromIterator<u64> for SmallRmq {
     fn from_iter<T: IntoIterator<Item = u64>>(iter: T) -> Self {
         Self::from_vec(iter.into_iter().collect())
     }
diff --git a/src/rmq/fast_rmq/tests.rs b/src/rmq/small/tests.rs
similarity index 90%
rename from src/rmq/fast_rmq/tests.rs
rename to src/rmq/small/tests.rs
index f6e1bc9..aac8bdf 100644
--- a/src/rmq/fast_rmq/tests.rs
+++ b/src/rmq/small/tests.rs
@@ -45,7 +45,7 @@ fn test_fast_rmq() {
         numbers_vec.push(i as u64);
     }
 
-    let rmq = FastRmq::from_vec(numbers_vec.clone());
+    let rmq = SmallRmq::from_vec(numbers_vec.clone());
 
     for i in 0..L {
         for j in i..L {
@@ -70,7 +70,7 @@ fn test_fast_rmq_unsorted() {
         numbers_vec.push(rng.next_u64());
     }
 
-    let rmq = FastRmq::from_vec(numbers_vec.clone());
+    let rmq = SmallRmq::from_vec(numbers_vec.clone());
 
     for i in 0..L {
         for j in i..L {
@@ -88,7 +88,7 @@ fn test_fast_rmq_unsorted() {
 
 #[test]
 fn test_iter() {
-    let rmq = FastRmq::from_vec(vec![1, 2, 3, 4, 5]);
+    let rmq = SmallRmq::from_vec(vec![1, 2, 3, 4, 5]);
     let mut iter = rmq.iter();
     assert_eq!(iter.next(), Some(&1));
     assert_eq!(iter.next(), Some(&2));
@@ -100,7 +100,7 @@ fn test_iter() {
 
 #[test]
 fn test_range_operators() {
-    let rmq = FastRmq::from_vec(vec![5, 4, 3, 2, 1]);
+    let rmq = SmallRmq::from_vec(vec![5, 4, 3, 2, 1]);
     assert_eq!(rmq.range_min(0, 3), 3);
     assert_eq!(rmq.range_min_with_range(0..3), 2);
     assert_eq!(rmq.range_min_with_range(0..=3), 3);
@@ -108,7 +108,7 @@ fn test_range_operators() {
 
 #[test]
 fn test_empty_rmq() {
-    let _rmq = FastRmq::from_vec(Vec::<u64>::new());
+    let _rmq = SmallRmq::from_vec(Vec::<u64>::new());
     // calling functions on an empty rmq will panic because the upper bound is inclusive, but there
     // is no valid index in an empty array, so we can't test anything else
 }
diff --git a/src/rmq/binary_rmq/mod.rs b/src/rmq/sparse/mod.rs
similarity index 92%
rename from src/rmq/binary_rmq/mod.rs
rename to src/rmq/sparse/mod.rs
index 84962ca..3bb1743 100644
--- a/src/rmq/binary_rmq/mod.rs
+++ b/src/rmq/sparse/mod.rs
@@ -17,10 +17,10 @@ use std::ops::{Deref, RangeBounds};
 ///
 /// # Example
 /// ```rust
-/// use vers_vecs::BinaryRmq;
+/// use vers_vecs::SparseRmq;
 ///
 /// let data = vec![4, 10, 3, 11, 2, 12];
-/// let rmq = BinaryRmq::from_vec(data);
+/// let rmq = SparseRmq::from_vec(data);
 ///
 /// assert_eq!(rmq.range_min(0, 1), 0);
 /// assert_eq!(rmq.range_min(0, 2), 2);
@@ -28,16 +28,16 @@ use std::ops::{Deref, RangeBounds};
 /// ```
 #[derive(Clone, Debug)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
-pub struct BinaryRmq {
-    data: Vec<u64>,
+pub struct SparseRmq {
+    data: Box<[u64]>,
 
     // store indices relative to start of range. There is no way to have ranges exceeding 2^32 bits
     // but since we have fast_rmq for larger inputs, which does not have any downsides at that point,
     // we can just use u32 here (which gains cache efficiency for both implementations).
-    results: Vec<u32>,
+    results: Box<[u32]>,
 }
 
-impl BinaryRmq {
+impl SparseRmq {
     /// Create a new RMQ data structure for the given data. This uses O(n log n) space and
     /// precalculates the minimum element in intervals 2^k for all k for all elements.
     ///
@@ -102,17 +102,20 @@ impl BinaryRmq {
             }
         }
 
-        Self { data, results }
+        Self {
+            data: data.into_boxed_slice(),
+            results: results.into_boxed_slice(),
+        }
     }
 
-    /// Convenience function for [`BinaryRmq::range_min`] for using range operators.
+    /// Convenience function for [`SparseRmq::range_min`] for using range operators.
     /// The range is clamped to the length of the data structure, so this function will not panic,
     /// unless called on an empty data structure, because that does not have a valid index.
     ///
     /// # Example
     /// ```rust
-    /// use vers_vecs::BinaryRmq;
-    /// let rmq = BinaryRmq::from_vec(vec![5, 4, 3, 2, 1]);
+    /// use vers_vecs::SparseRmq;
+    /// let rmq = SparseRmq::from_vec(vec![5, 4, 3, 2, 1]);
     /// assert_eq!(rmq.range_min_with_range(0..3), 2);
     /// assert_eq!(rmq.range_min_with_range(0..=3), 3);
     /// ```
@@ -169,15 +172,15 @@ impl BinaryRmq {
 /// Implements Deref to delegate to the underlying data structure. This allows the user to use
 /// indexing syntax on the RMQ data structure to access the underlying data, as well as iterators,
 /// etc.
-impl Deref for BinaryRmq {
-    type Target = Vec<u64>;
+impl Deref for SparseRmq {
+    type Target = Box<[u64]>;
 
     fn deref(&self) -> &Self::Target {
         &self.data
     }
 }
 
-impl From<Vec<u64>> for BinaryRmq {
+impl From<Vec<u64>> for SparseRmq {
     fn from(data: Vec<u64>) -> Self {
         Self::from_vec(data)
     }
@@ -188,8 +191,8 @@ impl From<Vec<u64>> for BinaryRmq {
 ///
 /// See [`BinaryRmq::from_vec`] for more information.
 ///
-/// [`BinaryRmq::from_vec`]: BinaryRmq::from_vec
-impl FromIterator<u64> for BinaryRmq {
+/// [`BinaryRmq::from_vec`]: SparseRmq::from_vec
+impl FromIterator<u64> for SparseRmq {
     fn from_iter<T: IntoIterator<Item = u64>>(iter: T) -> Self {
         Self::from_vec(iter.into_iter().collect())
     }
diff --git a/src/rmq/binary_rmq/tests.rs b/src/rmq/sparse/tests.rs
similarity index 83%
rename from src/rmq/binary_rmq/tests.rs
rename to src/rmq/sparse/tests.rs
index 9e5fa63..05c8cd0 100644
--- a/src/rmq/binary_rmq/tests.rs
+++ b/src/rmq/sparse/tests.rs
@@ -1,9 +1,9 @@
-use crate::rmq::binary_rmq::BinaryRmq;
+use crate::rmq::sparse::SparseRmq;
 use rand::RngCore;
 
 #[test]
 fn small_test() {
-    let rmq = BinaryRmq::from_vec(vec![9, 6, 10, 4, 0, 8, 3, 7, 1, 2, 5]);
+    let rmq = SparseRmq::from_vec(vec![9, 6, 10, 4, 0, 8, 3, 7, 1, 2, 5]);
 
     assert_eq!(rmq.range_min(0, 0), 0);
     assert_eq!(rmq.range_min(0, 1), 1);
@@ -25,7 +25,7 @@ fn randomized_test() {
         numbers_vec.push(rng.next_u64());
     }
 
-    let rmq = BinaryRmq::from_vec(numbers_vec.clone());
+    let rmq = SparseRmq::from_vec(numbers_vec.clone());
 
     for i in 0..L {
         for j in i..L {
@@ -43,7 +43,7 @@ fn randomized_test() {
 
 #[test]
 fn test_iter() {
-    let rmq = BinaryRmq::from_vec(vec![1, 2, 3, 4, 5]);
+    let rmq = SparseRmq::from_vec(vec![1, 2, 3, 4, 5]);
     let mut iter = rmq.iter();
     assert_eq!(iter.next(), Some(&1));
     assert_eq!(iter.next(), Some(&2));
@@ -55,7 +55,7 @@ fn test_iter() {
 
 #[test]
 fn test_range_operators() {
-    let rmq = BinaryRmq::from_vec(vec![5, 4, 3, 2, 1]);
+    let rmq = SparseRmq::from_vec(vec![5, 4, 3, 2, 1]);
     assert_eq!(rmq.range_min(0, 3), 3);
     assert_eq!(rmq.range_min_with_range(0..3), 2);
     assert_eq!(rmq.range_min_with_range(0..=3), 3);
@@ -63,7 +63,7 @@ fn test_range_operators() {
 
 #[test]
 fn test_empty_rmq() {
-    let rmq = BinaryRmq::from_vec(Vec::<u64>::new());
+    let rmq = SparseRmq::from_vec(Vec::<u64>::new());
     assert!(rmq.is_empty());
     // calling functions on an empty rmq will panic because the upper bound is inclusive, but there
     // is no valid index in an empty array, so we can't test anything else
diff --git a/src/trees/bp/builder.rs b/src/trees/bp/builder.rs
index 753600c..7553925 100644
--- a/src/trees/bp/builder.rs
+++ b/src/trees/bp/builder.rs
@@ -5,13 +5,14 @@ use crate::BitVec;
 /// A builder for [`BpTrees`] using depth-first traversal of the tree. See the documentation of
 /// [`TreeBuilder`].
 ///
-/// [`BpTree`]: BpTree
-pub struct BpBuilder<const BLOCK_SIZE: usize = DEFAULT_BLOCK_SIZE> {
+/// [`BpTrees`]: BpTree
+/// [`TreeBuilder`]: TreeBuilder
+pub struct BpBuilder<const BLOCK_SIZE: u64 = DEFAULT_BLOCK_SIZE> {
     excess: i64,
     bit_vec: BitVec,
 }
 
-impl<const BLOCK_SIZE: usize> BpBuilder<BLOCK_SIZE> {
+impl<const BLOCK_SIZE: u64> BpBuilder<BLOCK_SIZE> {
     /// Create new empty `DfsTreeBuilder`
     #[must_use]
     pub fn new() -> Self {
@@ -26,18 +27,18 @@ impl<const BLOCK_SIZE: usize> BpBuilder<BLOCK_SIZE> {
     pub fn with_capacity(capacity: u64) -> Self {
         Self {
             excess: 0,
-            bit_vec: BitVec::with_capacity((capacity * 2) as usize),
+            bit_vec: BitVec::with_capacity(capacity * 2),
         }
     }
 }
 
-impl<const BLOCK_SIZE: usize> Default for BpBuilder<BLOCK_SIZE> {
+impl<const BLOCK_SIZE: u64> Default for BpBuilder<BLOCK_SIZE> {
     fn default() -> Self {
         Self::new()
     }
 }
 
-impl<const BLOCK_SIZE: usize> TreeBuilder for BpBuilder<BLOCK_SIZE> {
+impl<const BLOCK_SIZE: u64> TreeBuilder for BpBuilder<BLOCK_SIZE> {
     type Tree = BpTree<BLOCK_SIZE>;
 
     fn enter_node(&mut self) {
@@ -54,7 +55,7 @@ impl<const BLOCK_SIZE: usize> TreeBuilder for BpBuilder<BLOCK_SIZE> {
         if self.excess != 0 {
             Err(self.excess)
         } else {
-            Ok(BpTree::from_bit_vector(self.bit_vec))
+            Ok(BpTree::from_bit_vec(self.bit_vec))
         }
     }
 }
diff --git a/src/trees/bp/lookup.rs b/src/trees/bp/lookup.rs
index 2c8fc8b..0a44c3d 100644
--- a/src/trees/bp/lookup.rs
+++ b/src/trees/bp/lookup.rs
@@ -1,3 +1,6 @@
+#![allow(clippy::cast_sign_loss)] // sign loss cannot happen on correctly formed BP trees
+#![allow(clippy::cast_possible_wrap)] // ditto
+
 //! This module provides the lookup table and lookup functionality to answer excess queries
 //! for 8-bit and 16-bit blocks in the tree vector.
 //! Note that the 8-bit version is unused, since this whole module gets replaced with
@@ -52,6 +55,7 @@ const PAREN_BLOCK_LOOKUP: [EncodedTableType; 1 << LOOKUP_BLOCK_SIZE] = calculate
 
 /// Offset to add to encoded excess values, so negative numbers are stored as positive integers, reducing
 /// encoding complexity
+#[allow(clippy::cast_possible_truncation)] // false positive
 const ENCODING_OFFSET: i32 = LOOKUP_BLOCK_SIZE as i32;
 
 /// Bitmask for one of the lookup values.
@@ -66,6 +70,7 @@ const MINIMUM_EXCESS_POSITION: usize = 6;
 #[cfg(not(feature = "bp_u16_lookup"))]
 const MINIMUM_EXCESS_POSITION: usize = 5;
 
+#[allow(clippy::cast_possible_truncation)] // all values are in range
 const fn calculate_lookup_table() -> [EncodedTableType; 1 << LOOKUP_BLOCK_SIZE] {
     // initial sentinel values during excess computation
     const MORE_THAN_MAX: SignedLookupBlockType = (LOOKUP_BLOCK_SIZE + 1) as SignedLookupBlockType;
@@ -114,12 +119,14 @@ const fn get_maximum_excess(value: EncodedTableType) -> i64 {
 }
 
 /// Branchless const minimum computation for values that cannot overflow
+#[allow(clippy::cast_possible_truncation)] // all values are in range
 const fn min(a: SignedLookupBlockType, b: SignedLookupBlockType) -> SignedLookupBlockType {
     b + ((a - b)
         & -(((a - b) as LookupBlockType >> (LOOKUP_BLOCK_SIZE - 1)) as SignedLookupBlockType))
 }
 
 /// Branchless const maximum computation for values that cannot overflow
+#[allow(clippy::cast_possible_truncation)] // all values are in range
 const fn max(a: SignedLookupBlockType, b: SignedLookupBlockType) -> SignedLookupBlockType {
     a - ((a - b)
         & -(((a - b) as LookupBlockType >> (LOOKUP_BLOCK_SIZE - 1)) as SignedLookupBlockType))
diff --git a/src/trees/bp/mod.rs b/src/trees/bp/mod.rs
index 6b9e89c..321e1e0 100644
--- a/src/trees/bp/mod.rs
+++ b/src/trees/bp/mod.rs
@@ -3,7 +3,7 @@
 //! time, as well as subtree size, level-order, and ancestor queries in `O(log n)` time.
 //! The tree is succinct (ideally sublinear space overhead) and pointer-less.
 
-use crate::bit_vec::fast_rs_vec::SelectIntoIter;
+use crate::bit_vec::rs::SelectIntoIter;
 use crate::trees::mmt::MinMaxTree;
 use crate::trees::{IsAncestor, LevelTree, SubtreeSize, Tree};
 use crate::{BitVec, RsVec};
@@ -11,7 +11,7 @@ use std::cmp::{max, min};
 use std::iter::FusedIterator;
 
 /// The default block size for the tree, used in several const generics
-const DEFAULT_BLOCK_SIZE: usize = 512;
+const DEFAULT_BLOCK_SIZE: u64 = 512;
 
 const OPEN_PAREN: u64 = 1;
 const CLOSE_PAREN: u64 = 0;
@@ -122,7 +122,7 @@ use lookup_query::{process_block_bwd, process_block_fwd, LOOKUP_BLOCK_SIZE};
 /// # #![allow(long_running_const_eval)]
 /// use vers_vecs::{BitVec, BpTree, Tree};
 /// let bv = BitVec::pack_sequence_u8(&[0b1101_0111, 0b0010_0100], 8);
-/// let tree = BpTree::<4>::from_bit_vector(bv);
+/// let tree = BpTree::<4>::from_bit_vec(bv);
 ///
 /// let nodes = tree.dfs_iter().collect::<Vec<_>>();
 /// assert_eq!(nodes, vec![0, 1, 2, 4, 6, 7, 10, 13]);
@@ -139,15 +139,15 @@ use lookup_query::{process_block_bwd, process_block_fwd, LOOKUP_BLOCK_SIZE};
 /// [`BitVec`]: BitVec
 #[derive(Clone, Debug)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
-pub struct BpTree<const BLOCK_SIZE: usize = DEFAULT_BLOCK_SIZE> {
+pub struct BpTree<const BLOCK_SIZE: u64 = DEFAULT_BLOCK_SIZE> {
     vec: RsVec,
     min_max_tree: MinMaxTree,
 }
 
-impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
+impl<const BLOCK_SIZE: u64> BpTree<BLOCK_SIZE> {
     /// Construct a new `BpTree` from a given bit vector.
     #[must_use]
-    pub fn from_bit_vector(bv: BitVec) -> Self {
+    pub fn from_bit_vec(bv: BitVec) -> Self {
         let min_max_tree = MinMaxTree::excess_tree(&bv, BLOCK_SIZE);
         let vec = bv.into();
         Self { vec, min_max_tree }
@@ -161,14 +161,15 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
     /// # Arguments
     /// - `index`: The starting index.
     /// - `relative_excess`: The desired relative excess value.
-    pub fn fwd_search(&self, index: usize, mut relative_excess: i64) -> Option<usize> {
+    pub fn fwd_search(&self, index: u64, mut relative_excess: i64) -> Option<u64> {
         // check for greater than or equal length minus one, because the last element
         // won't ever have a result from fwd_search
         if index >= (self.vec.len() - 1) {
             return None;
         }
 
-        let block_index = (index + 1) / BLOCK_SIZE;
+        #[allow(clippy::cast_possible_truncation)] // safe due to the division
+        let block_index = ((index + 1) / BLOCK_SIZE) as usize;
         self.fwd_search_block(index, block_index, &mut relative_excess)
             .map_or_else(
                 |()| {
@@ -177,8 +178,12 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
 
                     // check the result block for the exact position
                     block.and_then(|(block, mut relative_excess)| {
-                        self.fwd_search_block(block * BLOCK_SIZE - 1, block, &mut relative_excess)
-                            .ok()
+                        self.fwd_search_block(
+                            block as u64 * BLOCK_SIZE - 1,
+                            block,
+                            &mut relative_excess,
+                        )
+                        .ok()
                     })
                 },
                 Some,
@@ -194,15 +199,15 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
     #[inline(always)]
     fn fwd_search_block(
         &self,
-        start_index: usize,
+        start_index: u64,
         block_index: usize,
         relative_excess: &mut i64,
-    ) -> Result<usize, ()> {
-        let block_boundary = min((block_index + 1) * BLOCK_SIZE, self.vec.len());
+    ) -> Result<u64, ()> {
+        let block_boundary = min((block_index as u64 + 1) * BLOCK_SIZE, self.vec.len());
 
         // the boundary at which we can start with table lookups
         let lookup_boundary = min(
-            (start_index + 1).div_ceil(LOOKUP_BLOCK_SIZE as usize) * LOOKUP_BLOCK_SIZE as usize,
+            (start_index + 1).div_ceil(LOOKUP_BLOCK_SIZE) * LOOKUP_BLOCK_SIZE,
             block_boundary,
         );
         for i in start_index + 1..lookup_boundary {
@@ -217,18 +222,20 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
         // the boundary up to which we can use table lookups
         let upper_lookup_boundary = max(
             lookup_boundary,
-            (block_boundary / LOOKUP_BLOCK_SIZE as usize) * LOOKUP_BLOCK_SIZE as usize,
+            (block_boundary / LOOKUP_BLOCK_SIZE) * LOOKUP_BLOCK_SIZE,
         );
 
+        // LOOKUP_BLOCK_SIZE as usize is a false positive for the lint: https://github.com/rust-lang/rust-clippy/issues/9613
+        #[allow(clippy::cast_possible_truncation)]
         for i in (lookup_boundary..upper_lookup_boundary).step_by(LOOKUP_BLOCK_SIZE as usize) {
             if let Ok(idx) = process_block_fwd(
                 self.vec
-                    .get_bits_unchecked(i, LOOKUP_BLOCK_SIZE as usize)
+                    .get_bits_unchecked(i, LOOKUP_BLOCK_SIZE)
                     .try_into()
                     .unwrap(),
                 relative_excess,
             ) {
-                return Ok(i + idx as usize);
+                return Ok(i + idx);
             }
         }
 
@@ -254,7 +261,7 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
     /// # Arguments
     /// - `index`: The starting index.
     /// - `relative_excess`: The desired relative excess value.
-    pub fn bwd_search(&self, index: usize, mut relative_excess: i64) -> Option<usize> {
+    pub fn bwd_search(&self, index: u64, mut relative_excess: i64) -> Option<u64> {
         if index >= self.vec.len() {
             return None;
         }
@@ -267,7 +274,8 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
 
         // calculate the block we start searching in. It starts at index - 1, so we don't accidentally
         // search the mM tree and immediately find `index` as the position
-        let block_index = (index - 1) / BLOCK_SIZE;
+        #[allow(clippy::cast_possible_truncation)] // safe due to the division
+        let block_index = ((index - 1) / BLOCK_SIZE) as usize;
 
         // check the current block
         self.bwd_search_block(index, block_index, &mut relative_excess)
@@ -278,8 +286,12 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
 
                     // check the result block for the exact position
                     block.and_then(|(block, mut relative_excess)| {
-                        self.bwd_search_block((block + 1) * BLOCK_SIZE, block, &mut relative_excess)
-                            .ok()
+                        self.bwd_search_block(
+                            (block as u64 + 1) * BLOCK_SIZE,
+                            block,
+                            &mut relative_excess,
+                        )
+                        .ok()
                     })
                 },
                 Some,
@@ -295,15 +307,15 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
     #[inline(always)]
     fn bwd_search_block(
         &self,
-        start_index: usize,
+        start_index: u64,
         block_index: usize,
         relative_excess: &mut i64,
-    ) -> Result<usize, ()> {
-        let block_boundary = min(block_index * BLOCK_SIZE, self.vec.len());
+    ) -> Result<u64, ()> {
+        let block_boundary = min(block_index as u64 * BLOCK_SIZE, self.vec.len());
 
         // the boundary at which we can start with table lookups
         let lookup_boundary = max(
-            ((start_index - 1) / LOOKUP_BLOCK_SIZE as usize) * LOOKUP_BLOCK_SIZE as usize,
+            ((start_index - 1) / LOOKUP_BLOCK_SIZE) * LOOKUP_BLOCK_SIZE,
             block_boundary,
         );
         for i in (lookup_boundary..start_index).rev() {
@@ -315,18 +327,22 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
             }
         }
 
-        for i in (block_boundary..lookup_boundary)
+        // lookup_boundary - block_boundary is smaller than a block, so casting to usize cannot
+        // truncate
+        // and LOOKUP_BLOCK_SIZE as usize is a false positive for the lint: https://github.com/rust-lang/rust-clippy/issues/9613
+        #[allow(clippy::cast_possible_truncation)]
+        for i in (0..(lookup_boundary - block_boundary) as usize)
             .step_by(LOOKUP_BLOCK_SIZE as usize)
             .rev()
         {
             if let Ok(idx) = process_block_bwd(
                 self.vec
-                    .get_bits_unchecked(i, LOOKUP_BLOCK_SIZE as usize)
+                    .get_bits_unchecked(block_boundary + i as u64, LOOKUP_BLOCK_SIZE)
                     .try_into()
                     .unwrap(),
                 relative_excess,
             ) {
-                return Ok(i + idx as usize);
+                return Ok(block_boundary + i as u64 + idx);
             }
         }
 
@@ -337,7 +353,7 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
     /// If the bit at `index` is not an opening parenthesis, the result is meaningless.
     /// If there is no matching closing parenthesis, `None` is returned.
     #[must_use]
-    pub fn close(&self, index: usize) -> Option<usize> {
+    pub fn close(&self, index: u64) -> Option<u64> {
         if index >= self.vec.len() {
             return None;
         }
@@ -349,7 +365,7 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
     /// If the bit at `index` is not a closing parenthesis, the result is meaningless.
     /// If there is no matching opening parenthesis, `None` is returned.
     #[must_use]
-    pub fn open(&self, index: usize) -> Option<usize> {
+    pub fn open(&self, index: u64) -> Option<u64> {
         if index >= self.vec.len() {
             return None;
         }
@@ -361,7 +377,7 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
     /// This works regardless of whether the bit at `index` is an opening or closing parenthesis.
     /// If there is no enclosing parenthesis, `None` is returned.
     #[must_use]
-    pub fn enclose(&self, index: usize) -> Option<usize> {
+    pub fn enclose(&self, index: u64) -> Option<u64> {
         if index >= self.vec.len() {
             return None;
         }
@@ -380,7 +396,8 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
     /// The excess is the number of open parentheses minus the number of closing parentheses.
     /// If `index` is out of bounds, the total excess of the parentheses expression is returned.
     #[must_use]
-    pub fn excess(&self, index: usize) -> i64 {
+    #[allow(clippy::cast_possible_wrap)] // only happens if the tree is unbalanced and has more than 2^62 nodes
+    pub fn excess(&self, index: u64) -> i64 {
         debug_assert!(index < self.vec.len(), "Index out of bounds");
         self.vec.rank1(index + 1) as i64 - self.vec.rank0(index + 1) as i64
     }
@@ -426,8 +443,14 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
     /// Iterate over a subtree rooted at `node` in depth-first (pre-)order.
     /// The iteration starts with the node itself.
     ///
+    /// # Limitations
+    /// When called on an architecture where `usize` is smaller than 64 bits, on a tree with more
+    /// than 2^31 nodes, the iterator may produce an iterator over an unspecified subset of nodes.
+    ///
+    /// # Panics
     /// Calling this method on an invalid node handle, or an unbalanced parenthesis expression,
-    /// will produce an iterator over an unspecified subset of nodes.
+    /// will produce an iterator over an unspecified subset of nodes, or panic either during
+    /// construction or iteration.
     pub fn subtree_iter(
         &self,
         node: <BpTree<BLOCK_SIZE> as Tree>::NodeHandle,
@@ -437,17 +460,44 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
             "Node handle is invalid"
         );
 
-        let index = self.vec.rank1(node);
+        let mut index = self.vec.rank1(node);
         let close = self.close(node).unwrap_or(node);
         let subtree_size = self.vec.rank1(close) - index;
 
-        self.vec.iter1().skip(index).take(subtree_size)
+        let mut iterator = self.vec.iter1();
+
+        // since index and subtree_size can exceed usize::MAX, we need some special casing.
+        // This should be optimized away on 64-bit architectures
+
+        // skip `index` bytes
+        while index > usize::MAX as u64 {
+            index -= usize::MAX as u64;
+            iterator.advance_by(usize::MAX).unwrap();
+        }
+        #[allow(clippy::cast_possible_truncation)] // the loop guarantees no truncation
+        iterator.advance_by(index as usize).unwrap();
+
+        // limit to `subtree_size` bytes by consuming the back of the iterator
+        let mut remaining_bits = self.vec.rank1 - index - subtree_size;
+        while remaining_bits > usize::MAX as u64 {
+            remaining_bits -= usize::MAX as u64;
+            iterator.advance_back_by(usize::MAX).unwrap();
+        }
+        #[allow(clippy::cast_possible_truncation)] // the loop guarantees no truncation
+        iterator.advance_back_by(remaining_bits as usize).unwrap();
+
+        iterator
     }
 
     /// Iterate over a subtree rooted at `node` in depth-first (post-)order.
     /// This is slower than the pre-order iteration.
     /// The iteration ends with the node itself.
     ///
+    /// # Limitations
+    /// When called on an architecture where `usize` is smaller than 64 bits, on a tree with more
+    /// than 2^31 nodes, the iterator may return an unspecified number of nodes starting at an
+    /// unspecified node.
+    ///
     /// # Panics
     /// Calling this method on an invalid node handle, or an unbalanced parenthesis expression,
     /// will produce an iterator over an unspecified subset of nodes, or panic either during
@@ -461,15 +511,33 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
             "Node handle is invalid"
         );
 
-        let index = self.vec.rank0(node);
+        let mut index = self.vec.rank0(node);
         let close = self.close(node).unwrap_or(node);
         let subtree_size = self.vec.rank0(close) + 1 - index;
 
-        self.vec
-            .iter0()
-            .skip(index)
-            .take(subtree_size)
-            .map(|n| self.open(n).unwrap())
+        let mut iterator = self.vec.iter0();
+
+        // since index and subtree_size can exceed usize::MAX, we need some special casing.
+        // This should be optimized away on 64-bit architectures
+
+        // skip `index` bytes
+        while index > usize::MAX as u64 {
+            index -= usize::MAX as u64;
+            iterator.advance_by(usize::MAX).unwrap();
+        }
+        #[allow(clippy::cast_possible_truncation)] // the loop guarantees no truncation
+        iterator.advance_by(index as usize).unwrap();
+
+        // limit to `subtree_size` bytes by consuming the back of the iterator
+        let mut remaining_bits = self.vec.rank0 - index - subtree_size;
+        while remaining_bits > usize::MAX as u64 {
+            remaining_bits -= usize::MAX as u64;
+            iterator.advance_back_by(usize::MAX).unwrap();
+        }
+        #[allow(clippy::cast_possible_truncation)] // the loop guarantees no truncation
+        iterator.advance_back_by(remaining_bits as usize).unwrap();
+
+        iterator.map(|n| self.open(n).unwrap())
     }
 
     /// Iterate over the children of a node in the tree.
@@ -520,7 +588,7 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
     /// use vers_vecs::{BitVec, RsVec, BpTree, Tree};
     ///
     /// let bv = BitVec::pack_sequence_u8(&[0b1101_0111, 0b0010_0100], 8);
-    /// let tree = BpTree::<4>::from_bit_vector(bv);
+    /// let tree = BpTree::<4>::from_bit_vec(bv);
     /// assert_eq!(tree.size(), 8);
     ///
     /// let rs_vec = tree.into_parentheses_vec();
@@ -528,7 +596,7 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
     ///
     /// bv.flip_bit(15);
     /// bv.append_bits(0, 2);
-    /// let tree = BpTree::<4>::from_bit_vector(bv);
+    /// let tree = BpTree::<4>::from_bit_vec(bv);
     /// assert_eq!(tree.size(), 9);
     /// ```
     #[must_use]
@@ -544,8 +612,8 @@ impl<const BLOCK_SIZE: usize> BpTree<BLOCK_SIZE> {
     }
 }
 
-impl<const BLOCK_SIZE: usize> Tree for BpTree<BLOCK_SIZE> {
-    type NodeHandle = usize;
+impl<const BLOCK_SIZE: u64> Tree for BpTree<BLOCK_SIZE> {
+    type NodeHandle = u64;
 
     fn root(&self) -> Option<Self::NodeHandle> {
         if self.vec.is_empty() {
@@ -627,7 +695,7 @@ impl<const BLOCK_SIZE: usize> Tree for BpTree<BLOCK_SIZE> {
         })
     }
 
-    fn node_index(&self, node: Self::NodeHandle) -> usize {
+    fn node_index(&self, node: Self::NodeHandle) -> u64 {
         debug_assert!(
             self.vec.get(node) == Some(OPEN_PAREN),
             "Node handle is invalid"
@@ -635,7 +703,7 @@ impl<const BLOCK_SIZE: usize> Tree for BpTree<BLOCK_SIZE> {
         self.vec.rank1(node)
     }
 
-    fn node_handle(&self, index: usize) -> Self::NodeHandle {
+    fn node_handle(&self, index: u64) -> Self::NodeHandle {
         self.vec.select1(index)
     }
 
@@ -656,7 +724,7 @@ impl<const BLOCK_SIZE: usize> Tree for BpTree<BLOCK_SIZE> {
         excess.saturating_sub(1)
     }
 
-    fn size(&self) -> usize {
+    fn size(&self) -> u64 {
         self.vec.rank1(self.vec.len())
     }
 
@@ -665,7 +733,7 @@ impl<const BLOCK_SIZE: usize> Tree for BpTree<BLOCK_SIZE> {
     }
 }
 
-impl<const BLOCK_SIZE: usize> IsAncestor for BpTree<BLOCK_SIZE> {
+impl<const BLOCK_SIZE: u64> IsAncestor for BpTree<BLOCK_SIZE> {
     fn is_ancestor(
         &self,
         ancestor: Self::NodeHandle,
@@ -685,7 +753,7 @@ impl<const BLOCK_SIZE: usize> IsAncestor for BpTree<BLOCK_SIZE> {
     }
 }
 
-impl<const BLOCK_SIZE: usize> LevelTree for BpTree<BLOCK_SIZE> {
+impl<const BLOCK_SIZE: u64> LevelTree for BpTree<BLOCK_SIZE> {
     fn level_ancestor(&self, node: Self::NodeHandle, level: u64) -> Option<Self::NodeHandle> {
         if level == 0 {
             return Some(node);
@@ -722,8 +790,8 @@ impl<const BLOCK_SIZE: usize> LevelTree for BpTree<BLOCK_SIZE> {
     }
 }
 
-impl<const BLOCK_SIZE: usize> SubtreeSize for BpTree<BLOCK_SIZE> {
-    fn subtree_size(&self, node: Self::NodeHandle) -> Option<usize> {
+impl<const BLOCK_SIZE: u64> SubtreeSize for BpTree<BLOCK_SIZE> {
+    fn subtree_size(&self, node: Self::NodeHandle) -> Option<u64> {
         debug_assert!(
             self.vec.get(node) == Some(OPEN_PAREN),
             "Node handle is invalid"
@@ -734,7 +802,7 @@ impl<const BLOCK_SIZE: usize> SubtreeSize for BpTree<BLOCK_SIZE> {
     }
 }
 
-impl<const BLOCK_SIZE: usize> IntoIterator for BpTree<BLOCK_SIZE> {
+impl<const BLOCK_SIZE: u64> IntoIterator for BpTree<BLOCK_SIZE> {
     type Item = <BpTree<BLOCK_SIZE> as Tree>::NodeHandle;
     type IntoIter = SelectIntoIter<false>;
 
@@ -743,19 +811,19 @@ impl<const BLOCK_SIZE: usize> IntoIterator for BpTree<BLOCK_SIZE> {
     }
 }
 
-impl<const BLOCK_SIZE: usize> From<BitVec> for BpTree<BLOCK_SIZE> {
+impl<const BLOCK_SIZE: u64> From<BitVec> for BpTree<BLOCK_SIZE> {
     fn from(bv: BitVec) -> Self {
-        Self::from_bit_vector(bv)
+        Self::from_bit_vec(bv)
     }
 }
 
-impl<const BLOCK_SIZE: usize> From<BpTree<BLOCK_SIZE>> for BitVec {
+impl<const BLOCK_SIZE: u64> From<BpTree<BLOCK_SIZE>> for BitVec {
     fn from(value: BpTree<BLOCK_SIZE>) -> Self {
         value.into_parentheses_vec().into_bit_vec()
     }
 }
 
-impl<const BLOCK_SIZE: usize> From<BpTree<BLOCK_SIZE>> for RsVec {
+impl<const BLOCK_SIZE: u64> From<BpTree<BLOCK_SIZE>> for RsVec {
     fn from(value: BpTree<BLOCK_SIZE>) -> Self {
         value.into_parentheses_vec()
     }
@@ -764,13 +832,13 @@ impl<const BLOCK_SIZE: usize> From<BpTree<BLOCK_SIZE>> for RsVec {
 /// An iterator over the children of a node.
 /// Calls to `next` return the next child node handle in the order they appear in the parenthesis
 /// expression.
-struct ChildrenIter<'a, const BLOCK_SIZE: usize, const FORWARD: bool> {
+struct ChildrenIter<'a, const BLOCK_SIZE: u64, const FORWARD: bool> {
     tree: &'a BpTree<BLOCK_SIZE>,
-    current_sibling: Option<usize>,
+    current_sibling: Option<u64>,
 }
 
-impl<'a, const BLOCK_SIZE: usize, const FORWARD: bool> ChildrenIter<'a, BLOCK_SIZE, FORWARD> {
-    fn new(tree: &'a BpTree<BLOCK_SIZE>, node: usize) -> Self {
+impl<'a, const BLOCK_SIZE: u64, const FORWARD: bool> ChildrenIter<'a, BLOCK_SIZE, FORWARD> {
+    fn new(tree: &'a BpTree<BLOCK_SIZE>, node: u64) -> Self {
         Self {
             tree,
             current_sibling: if FORWARD {
@@ -782,10 +850,10 @@ impl<'a, const BLOCK_SIZE: usize, const FORWARD: bool> ChildrenIter<'a, BLOCK_SI
     }
 }
 
-impl<const BLOCK_SIZE: usize, const FORWARD: bool> Iterator
+impl<const BLOCK_SIZE: u64, const FORWARD: bool> Iterator
     for ChildrenIter<'_, BLOCK_SIZE, FORWARD>
 {
-    type Item = usize;
+    type Item = u64;
 
     fn next(&mut self) -> Option<Self::Item> {
         let current = self.current_sibling?;
@@ -799,7 +867,7 @@ impl<const BLOCK_SIZE: usize, const FORWARD: bool> Iterator
     }
 }
 
-impl<const BLOCK_SIZE: usize, const FORWARD: bool> FusedIterator
+impl<const BLOCK_SIZE: u64, const FORWARD: bool> FusedIterator
     for ChildrenIter<'_, BLOCK_SIZE, FORWARD>
 {
 }
diff --git a/src/trees/bp/tests.rs b/src/trees/bp/tests.rs
index a7da6b2..c6b389d 100644
--- a/src/trees/bp/tests.rs
+++ b/src/trees/bp/tests.rs
@@ -6,13 +6,13 @@ use rand::{RngCore, SeedableRng};
 #[test]
 fn test_fwd_search() {
     #[rustfmt::skip]
-    let bv = BitVec::from_bits(&[
+    let bv = BitVec::from_bits_u8(&[
         1, 1, 1, 1, 0, 0, 1, 1,
         0, 1, 0, 0, 1, 0, 1, 0,
         1, 0, 1, 0, 1, 0, 0, 0,
     ]);
 
-    let bp_tree = BpTree::<8>::from_bit_vector(bv);
+    let bp_tree = BpTree::<8>::from_bit_vec(bv);
 
     // search within block
     assert_eq!(bp_tree.fwd_search(3, -1), Some(4));
@@ -33,13 +33,13 @@ fn test_fwd_search() {
 #[test]
 fn test_fwd_single_block() {
     #[rustfmt::skip]
-    let bv = BitVec::from_bits(&[
+    let bv = BitVec::from_bits_u8(&[
         1, 1, 1, 1, 0, 0, 1, 1,
         0, 1, 0, 0, 1, 0, 1, 0,
         1, 0, 1, 0, 1, 0, 0, 0,
     ]);
 
-    let bp_tree = BpTree::<512>::from_bit_vector(bv);
+    let bp_tree = BpTree::<512>::from_bit_vec(bv);
 
     assert_eq!(bp_tree.fwd_search(3, -1), Some(4));
     assert_eq!(bp_tree.fwd_search(2, -1), Some(5));
@@ -55,13 +55,13 @@ fn test_fwd_single_block() {
 #[test]
 fn test_fwd_illegal_queries() {
     #[rustfmt::skip]
-    let bv = BitVec::from_bits(&[
+    let bv = BitVec::from_bits_u8(&[
         1, 1, 1, 1, 0, 0, 1, 1,
         0, 1, 0, 0, 1, 0, 1, 0,
         1, 0, 1, 0, 1, 0, 0, 0,
     ]);
 
-    let tree = BpTree::<8>::from_bit_vector(bv.clone());
+    let tree = BpTree::<8>::from_bit_vec(bv.clone());
 
     assert_eq!(tree.fwd_search(24, 0), None);
     assert_eq!(tree.fwd_search(25, 0), None);
@@ -69,7 +69,7 @@ fn test_fwd_illegal_queries() {
     assert_eq!(tree.fwd_search(0, -2), None);
     assert_eq!(tree.fwd_search(22, 1), None);
 
-    let tree = BpTree::<64>::from_bit_vector(bv);
+    let tree = BpTree::<64>::from_bit_vec(bv);
 
     assert_eq!(tree.fwd_search(24, 0), None);
     assert_eq!(tree.fwd_search(25, 0), None);
@@ -82,13 +82,13 @@ fn test_fwd_illegal_queries() {
 fn test_fwd_unbalanced_expression() {
     // test whether forward search works with unbalanced parenthesis expressions
     #[rustfmt::skip]
-    let bv = BitVec::from_bits(&[
+    let bv = BitVec::from_bits_u8(&[
         1, 1, 1, 1, 0, 0, 1, 1,
         0, 1, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 1,
     ]);
 
-    let tree = BpTree::<8>::from_bit_vector(bv);
+    let tree = BpTree::<8>::from_bit_vec(bv);
 
     assert_eq!(tree.fwd_search(0, -1), Some(13));
     assert_eq!(tree.fwd_search(1, -1), Some(12));
@@ -99,8 +99,8 @@ fn test_fwd_unbalanced_expression() {
 
 #[test]
 fn test_fwd_block_boundary() {
-    let bv = BitVec::from_bits(&[1, 1, 0, 1, 0, 0]);
-    let tree = BpTree::<4>::from_bit_vector(bv);
+    let bv = BitVec::from_bits_u8(&[1, 1, 0, 1, 0, 0]);
+    let tree = BpTree::<4>::from_bit_vec(bv);
 
     // test if a query returns the correct result if the result is the first bit in a block
     // and not in the initial block
@@ -113,8 +113,8 @@ fn test_fwd_block_boundary() {
 
 #[test]
 fn test_fwd_negative_block() {
-    let bv = BitVec::from_bits(&[1, 1, 1, 1, 0, 0, 0, 0]);
-    let tree = BpTree::<2>::from_bit_vector(bv);
+    let bv = BitVec::from_bits_u8(&[1, 1, 1, 1, 0, 0, 0, 0]);
+    let tree = BpTree::<2>::from_bit_vec(bv);
 
     // regression: test if a query correctly returns none (instead of crashing) if the following
     // block has a negative maximum excess (as a previous bug clamped it to 0).
@@ -127,39 +127,39 @@ fn test_fwd_last_element() {
     // the binary mM tree right of it may be uninitialized, and so not ending the query early
     // may yield invalid results or break assertions
     #[rustfmt::skip]
-    let bv = BitVec::from_bits(&[
+    let bv = BitVec::from_bits_u8(&[
         1, 1, 0, 1,  0, 1, 0, 1,
         0, 1, 0, 1,  0, 1, 0, 1,
         0, 1, 0, 1,  0, 1, 0, 1,
     ]);
 
-    let tree = BpTree::<4>::from_bit_vector(bv);
+    let tree = BpTree::<4>::from_bit_vec(bv);
     assert!(tree.fwd_search(23, 0).is_none());
 }
 
 #[test]
 fn test_lookup_extreme_pop() {
     // test whether a table lookup works if the bit pattern is only ones or only zeros
-    let bv = BitVec::from_bits(&[1; 64]);
-    let tree = BpTree::<512>::from_bit_vector(bv);
+    let bv = BitVec::from_bits_u8(&[1; 64]);
+    let tree = BpTree::<512>::from_bit_vec(bv);
 
     for excess in 1..64 {
-        assert_eq!(tree.fwd_search(0, excess), Some(excess as usize));
+        assert_eq!(tree.fwd_search(0, excess), Some(excess as u64));
     }
 
-    let bv = BitVec::from_bits(&[0; 64]);
-    let tree = BpTree::<512>::from_bit_vector(bv);
+    let bv = BitVec::from_bits_u8(&[0; 64]);
+    let tree = BpTree::<512>::from_bit_vec(bv);
 
     for excess in 1..64 {
-        assert_eq!(tree.fwd_search(0, -excess), Some(excess as usize));
+        assert_eq!(tree.fwd_search(0, -excess), Some(excess as u64));
     }
 }
 
 #[test]
 fn test_fwd_fuzzy() {
     // we're fuzzing forward search a bit
-    const L: usize = 1000;
-    const L_BITS: usize = L * size_of::<u64>() * 8;
+    const L: u64 = 1000;
+    const L_BITS: u64 = L * size_of::<u64>() as u64 * 8;
 
     // we generate a vector using a seeded random generator and check that every query works as expected
     let mut rng = StdRng::from_seed([0; 32]);
@@ -170,7 +170,7 @@ fn test_fwd_fuzzy() {
     }
 
     // pre-calculate all absolute excess values
-    let mut excess_values = vec![0i16; L_BITS];
+    let mut excess_values = vec![0i16; L_BITS as usize];
     let mut excess = 0;
     for (idx, bit) in bit_vec.iter().enumerate() {
         if bit == 1 {
@@ -182,16 +182,16 @@ fn test_fwd_fuzzy() {
         }
     }
 
-    let bp = BpTree::<128>::from_bit_vector(bit_vec);
+    let bp = BpTree::<128>::from_bit_vec(bit_vec);
 
     // test any query from valid nodes with the given relative excess values
     for relative_excess in [-3, -2, -1, 0, 1, 2, 3] {
         for node_handle in bp.vec.iter1() {
             let absolute_excess = bp.excess(node_handle) + relative_excess;
-            let expected = excess_values[node_handle + 1..]
+            let expected = excess_values[(node_handle + 1) as usize..]
                 .iter()
                 .position(|&excess| excess as i64 == absolute_excess)
-                .map(|i| i + node_handle + 1);
+                .map(|i| i as u64 + node_handle + 1);
             let actual = bp.fwd_search(node_handle, relative_excess);
             assert_eq!(
                 expected,
@@ -209,13 +209,13 @@ fn test_fwd_fuzzy() {
 #[test]
 fn test_bwd_search() {
     #[rustfmt::skip]
-    let bv = BitVec::from_bits(&[
+    let bv = BitVec::from_bits_u8(&[
         1, 1, 1, 1, 0, 0, 1, 1,
         0, 1, 0, 0, 1, 0, 1, 0,
         1, 0, 1, 0, 1, 0, 0, 0,
     ]);
 
-    let bp_tree = BpTree::<8>::from_bit_vector(bv);
+    let bp_tree = BpTree::<8>::from_bit_vec(bv);
 
     // search within block
     assert_eq!(bp_tree.bwd_search(4, -1), Some(3));
@@ -236,13 +236,13 @@ fn test_bwd_search() {
 #[test]
 fn test_bwd_single_block() {
     #[rustfmt::skip]
-    let bv = BitVec::from_bits(&[
+    let bv = BitVec::from_bits_u8(&[
         1, 1, 1, 1, 0, 0, 1, 1,
         0, 1, 0, 0, 1, 0, 1, 0,
         1, 0, 1, 0, 1, 0, 0, 0,
     ]);
 
-    let bp_tree = BpTree::<512>::from_bit_vector(bv);
+    let bp_tree = BpTree::<512>::from_bit_vec(bv);
 
     assert_eq!(bp_tree.bwd_search(4, -1), Some(3));
     assert_eq!(bp_tree.bwd_search(5, -1), Some(2));
@@ -258,13 +258,13 @@ fn test_bwd_single_block() {
 #[test]
 fn test_bwd_illegal_queries() {
     #[rustfmt::skip]
-    let bv = BitVec::from_bits(&[
+    let bv = BitVec::from_bits_u8(&[
         1, 1, 1, 1, 0, 0, 1, 1,
         0, 1, 0, 0, 1, 0, 1, 0,
         1, 0, 1, 0, 1, 0, 0, 0,
     ]);
 
-    let tree = BpTree::<8>::from_bit_vector(bv.clone());
+    let tree = BpTree::<8>::from_bit_vec(bv.clone());
 
     assert_eq!(tree.bwd_search(0, 0), None);
     assert_eq!(tree.bwd_search(1, 0), None);
@@ -272,7 +272,7 @@ fn test_bwd_illegal_queries() {
     assert_eq!(tree.bwd_search(23, -2), None);
     assert_eq!(tree.bwd_search(22, -3), None);
 
-    let tree = BpTree::<64>::from_bit_vector(bv);
+    let tree = BpTree::<64>::from_bit_vec(bv);
 
     assert_eq!(tree.bwd_search(0, 0), None);
     assert_eq!(tree.bwd_search(1, 0), None);
@@ -285,8 +285,8 @@ fn test_bwd_illegal_queries() {
 fn test_bwd_left_block_boundary() {
     // test if a query returns the correct result if the result is the first bit after
     // a block boundary (the left-most one even for backward search)
-    let bv = BitVec::from_bits(&[1, 1, 0, 1, 0, 0]);
-    let tree = BpTree::<4>::from_bit_vector(bv);
+    let bv = BitVec::from_bits_u8(&[1, 1, 0, 1, 0, 0]);
+    let tree = BpTree::<4>::from_bit_vec(bv);
 
     assert_eq!(tree.bwd_search(5, 0), Some(3));
 }
@@ -294,12 +294,12 @@ fn test_bwd_left_block_boundary() {
 #[test]
 fn test_bwd_right_block_boundary() {
     #[rustfmt::skip]
-    let bv = BitVec::from_bits(&[
+    let bv = BitVec::from_bits_u8(&[
         1, 1, 1, 1, 1, 1, 1, 1,
         0, 0, 0, 0,
     ]);
 
-    let bp_tree = BpTree::<4>::from_bit_vector(bv);
+    let bp_tree = BpTree::<4>::from_bit_vec(bv);
 
     // test the correct result is returned if result is exactly at a right block boundary
     assert_eq!(bp_tree.bwd_search(11, -1), Some(4));
@@ -307,8 +307,8 @@ fn test_bwd_right_block_boundary() {
 
 #[test]
 fn test_bwd_block_traversal() {
-    let bv = BitVec::from_bits(&[1, 1, 1, 1, 0]);
-    let tree = BpTree::<4>::from_bit_vector(bv);
+    let bv = BitVec::from_bits_u8(&[1, 1, 1, 1, 0]);
+    let tree = BpTree::<4>::from_bit_vec(bv);
 
     // if we request excess 0 backwards at a block boundary
     // we test if that actually traverses the vector instead of reporting
@@ -320,8 +320,8 @@ fn test_bwd_block_traversal() {
 #[test]
 fn test_bwd_fuzzy() {
     // we're fuzzing forward search a bit
-    const L: usize = 1000;
-    const L_BITS: usize = L * size_of::<u64>() * 8;
+    const L: u64 = 1000;
+    const L_BITS: u64 = L * size_of::<u64>() as u64 * 8;
 
     // we generate a vector using a seeded random generator and check that every query works as expected
     let mut rng = StdRng::from_seed([0; 32]);
@@ -332,7 +332,7 @@ fn test_bwd_fuzzy() {
     }
 
     // pre-calculate all absolute excess values
-    let mut excess_values = vec![0i16; L_BITS + 1];
+    let mut excess_values = vec![0i16; (L_BITS + 1) as usize];
     let mut excess = 0;
     for (idx, bit) in bit_vec.iter().enumerate() {
         if bit == 1 {
@@ -344,7 +344,7 @@ fn test_bwd_fuzzy() {
         }
     }
 
-    let bp = BpTree::<128>::from_bit_vector(bit_vec);
+    let bp = BpTree::<128>::from_bit_vec(bit_vec);
 
     // test any query from valid nodes with the given relative excess values
     for relative_excess in [-3, -2, -1, 0, 1, 2, 3] {
@@ -354,9 +354,10 @@ fn test_bwd_fuzzy() {
             } else {
                 bp.excess(node_handle - 1) + relative_excess
             };
-            let expected = excess_values[..node_handle]
+            let expected = excess_values[..node_handle as usize]
                 .iter()
-                .rposition(|&excess| excess as i64 == absolute_excess);
+                .rposition(|&excess| excess as i64 == absolute_excess)
+                .map(|idx| idx as u64);
 
             let actual = bp.bwd_search(node_handle, relative_excess);
             assert_eq!(
@@ -374,12 +375,12 @@ fn test_bwd_fuzzy() {
 
 #[test]
 fn test_close() {
-    let bv = BitVec::from_bits(&[
+    let bv = BitVec::from_bits_u8(&[
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]);
 
-    let tree = BpTree::<8>::from_bit_vector(bv);
+    let tree = BpTree::<8>::from_bit_vec(bv);
 
     for i in 0..24 {
         assert_eq!(tree.close(i), Some(47 - i));
@@ -390,12 +391,12 @@ fn test_close() {
 
 #[test]
 fn test_open() {
-    let bv = BitVec::from_bits(&[
+    let bv = BitVec::from_bits_u8(&[
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]);
 
-    let tree = BpTree::<8>::from_bit_vector(bv);
+    let tree = BpTree::<8>::from_bit_vec(bv);
 
     for i in 24..48 {
         assert_eq!(tree.open(i), Some(47 - i));
@@ -406,12 +407,12 @@ fn test_open() {
 
 #[test]
 fn test_enclose() {
-    let bv = BitVec::from_bits(&[
+    let bv = BitVec::from_bits_u8(&[
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]);
 
-    let tree = BpTree::<8>::from_bit_vector(bv);
+    let tree = BpTree::<8>::from_bit_vec(bv);
 
     for i in 1..24 {
         assert_eq!(tree.enclose(i), Some(i - 1));
@@ -430,11 +431,11 @@ fn test_enclose() {
 
 #[test]
 fn test_parent() {
-    let bv = BitVec::from_bits(&[
+    let bv = BitVec::from_bits_u8(&[
         1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,
     ]);
 
-    let tree = BpTree::<8>::from_bit_vector(bv.clone());
+    let tree = BpTree::<8>::from_bit_vec(bv.clone());
 
     assert_eq!(tree.excess(27), 0, "tree is not balanced");
 
@@ -443,13 +444,13 @@ fn test_parent() {
     for (idx, bit) in bv.iter().enumerate() {
         if bit == 1 {
             assert_eq!(
-                tree.parent(idx),
+                tree.parent(idx as u64),
                 head,
                 "parent of node {} is incorrect",
                 idx
             );
             stack.push(head);
-            head = Some(idx);
+            head = Some(idx as u64);
         } else {
             head = stack.pop().expect("stack underflow despite balanced tree");
         }
@@ -458,9 +459,9 @@ fn test_parent() {
 
 #[test]
 fn test_children() {
-    let bv = BitVec::from_bits(&[1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0]);
+    let bv = BitVec::from_bits_u8(&[1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0]);
 
-    let tree = BpTree::<8>::from_bit_vector(bv);
+    let tree = BpTree::<8>::from_bit_vec(bv);
 
     assert_eq!(tree.excess(17), 0, "tree is not balanced");
     assert_eq!(tree.first_child(0), Some(1));
@@ -491,25 +492,25 @@ fn test_children() {
 fn test_contiguous_index() {
     // test whether `node_index` and `node_handle` return correct indices / node handles.
 
-    let bv = BitVec::from_bits(&[1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0]);
-    let tree = BpTree::<4>::from_bit_vector(bv.clone());
+    let bv = BitVec::from_bits_u8(&[1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0]);
+    let tree = BpTree::<4>::from_bit_vec(bv.clone());
     let rs: RsVec = bv.into();
 
     for (rank, index_in_bv) in rs.iter1().enumerate() {
-        assert_eq!(tree.node_index(index_in_bv), rank);
-        assert_eq!(tree.node_handle(rank), index_in_bv);
+        assert_eq!(tree.node_index(index_in_bv), rank as u64);
+        assert_eq!(tree.node_handle(rank as u64), index_in_bv);
     }
 }
 
 #[test]
 fn test_depth() {
-    let bv = BitVec::from_bits(&[
+    let bv = BitVec::from_bits_u8(&[
         1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]);
 
     let mut depth = 0;
 
-    let tree = BpTree::<8>::from_bit_vector(bv.clone());
+    let tree = BpTree::<8>::from_bit_vec(bv.clone());
     for i in 0..24 {
         if bv.get(i) == Some(1) {
             assert_eq!(tree.depth(i), depth);
@@ -525,17 +526,17 @@ fn test_is_leaf() {
     let bits = vec![
         1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
     ];
-    let bv = BitVec::from_bits(&bits);
+    let bv = BitVec::from_bits_u8(&bits);
     let leaves = bits[..]
         .windows(2)
         .map(|window| window[0] == 1 && window[1] == 0)
         .collect::<Vec<_>>();
-    let tree = BpTree::<8>::from_bit_vector(bv.clone());
+    let tree = BpTree::<8>::from_bit_vec(bv.clone());
 
     for (idx, is_leaf) in leaves.iter().enumerate() {
         // if the bit is 1, check if that node is a leaf. If it's 0, it's not a valid node handle.
         if bits[idx] == 1 {
-            assert_eq!(tree.is_leaf(idx), *is_leaf);
+            assert_eq!(tree.is_leaf(idx as u64), *is_leaf);
         }
     }
 }
@@ -545,8 +546,8 @@ fn test_is_ancestor() {
     // (()((())()))
     // ab cde  f
     let bits = vec![1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0];
-    let bv = BitVec::from_bits(&bits);
-    let tree = BpTree::<8>::from_bit_vector(bv);
+    let bv = BitVec::from_bits_u8(&bits);
+    let tree = BpTree::<8>::from_bit_vec(bv);
     let a = tree.root().unwrap();
     let b = tree.first_child(a).unwrap();
     let c = tree.next_sibling(b).unwrap();
@@ -574,22 +575,22 @@ fn test_is_ancestor() {
 
 #[test]
 fn test_root() {
-    let bv = BitVec::from_bits(&[
+    let bv = BitVec::from_bits_u8(&[
         1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]);
-    let tree = BpTree::<8>::from_bit_vector(bv);
+    let tree = BpTree::<8>::from_bit_vec(bv);
     assert_eq!(tree.root(), Some(0));
     assert_eq!(tree.previous_sibling(0), None);
     assert_eq!(tree.next_sibling(0), None);
 
-    let tree = BpTree::<16>::from_bit_vector(BitVec::new());
+    let tree = BpTree::<16>::from_bit_vec(BitVec::new());
     assert_eq!(tree.root(), None);
 }
 
 #[test]
 fn test_level_ancestor() {
-    let bv = BitVec::from_bits(&[1, 1, 1, 0, 0, 1, 0, 0]);
-    let tree = BpTree::<4>::from_bit_vector(bv);
+    let bv = BitVec::from_bits_u8(&[1, 1, 1, 0, 0, 1, 0, 0]);
+    let tree = BpTree::<4>::from_bit_vec(bv);
 
     assert_eq!(tree.level_ancestor(2, 0), Some(2));
     assert_eq!(tree.level_ancestor(2, 1), Some(1));
@@ -603,10 +604,10 @@ fn test_level_ancestor() {
 
 #[test]
 fn test_level_next() {
-    let bv = BitVec::from_bits(&[
+    let bv = BitVec::from_bits_u8(&[
         1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, // intentionally unbalanced
     ]);
-    let tree = BpTree::<4>::from_bit_vector(bv);
+    let tree = BpTree::<4>::from_bit_vec(bv);
 
     assert_eq!(tree.level_next(0), None); // unbalanced query
     assert_eq!(tree.level_next(1), Some(5));
@@ -618,8 +619,8 @@ fn test_level_next() {
 
 #[test]
 fn test_level_prev() {
-    let bv = BitVec::from_bits(&[1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0]);
-    let tree = BpTree::<4>::from_bit_vector(bv);
+    let bv = BitVec::from_bits_u8(&[1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0]);
+    let tree = BpTree::<4>::from_bit_vec(bv);
 
     assert_eq!(tree.level_prev(0), None);
     assert_eq!(tree.level_prev(1), None);
@@ -634,8 +635,8 @@ fn test_level_prev() {
 
 #[test]
 fn test_level_leftmost() {
-    let bv = BitVec::from_bits(&[1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0]);
-    let tree = BpTree::<4>::from_bit_vector(bv);
+    let bv = BitVec::from_bits_u8(&[1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0]);
+    let tree = BpTree::<4>::from_bit_vec(bv);
 
     assert_eq!(tree.level_leftmost(0), Some(0));
     assert_eq!(tree.level_leftmost(1), Some(1));
@@ -647,8 +648,8 @@ fn test_level_leftmost() {
 
 #[test]
 fn test_level_rightmost() {
-    let bv = BitVec::from_bits(&[1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0]);
-    let tree = BpTree::<4>::from_bit_vector(bv);
+    let bv = BitVec::from_bits_u8(&[1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0]);
+    let tree = BpTree::<4>::from_bit_vec(bv);
 
     assert_eq!(tree.level_rightmost(0), Some(0));
     assert_eq!(tree.level_rightmost(1), Some(11));
@@ -660,8 +661,8 @@ fn test_level_rightmost() {
 
 #[test]
 fn test_subtree_size() {
-    let bv = BitVec::from_bits(&[1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0]);
-    let tree = BpTree::<4>::from_bit_vector(bv);
+    let bv = BitVec::from_bits_u8(&[1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0]);
+    let tree = BpTree::<4>::from_bit_vec(bv);
 
     assert_eq!(tree.subtree_size(0), Some(9));
     assert_eq!(tree.subtree_size(1), Some(2));
@@ -681,8 +682,8 @@ fn test_malformed_tree_positive() {
     // for further queries in a consistent state.
 
     // the tree has not enough closing brackets
-    let bv = BitVec::from_bits(&[1, 1, 1, 0, 1, 1, 0, 1, 1, 0]);
-    let tree = BpTree::<4>::from_bit_vector(bv);
+    let bv = BitVec::from_bits_u8(&[1, 1, 1, 0, 1, 1, 0, 1, 1, 0]);
+    let tree = BpTree::<4>::from_bit_vec(bv);
 
     test_all_functions(&tree);
 }
@@ -694,8 +695,8 @@ fn test_malformed_tree_negative() {
     // for further queries in a consistent state.
 
     // the tree has too many closing brackets
-    let bv = BitVec::from_bits(&[0, 0, 1, 1, 1, 0, 0, 0, 0, 0]);
-    let tree = BpTree::<4>::from_bit_vector(bv);
+    let bv = BitVec::from_bits_u8(&[0, 0, 1, 1, 1, 0, 0, 0, 0, 0]);
+    let tree = BpTree::<4>::from_bit_vec(bv);
 
     test_all_functions(&tree);
 }
@@ -706,8 +707,8 @@ fn test_negative_depth() {
     // most results are meaningless, but we don't want to panic and leave the data structure
     // for further queries in a consistent state.
 
-    let bv = BitVec::from_bits(&[0, 0, 0, 0, 1, 1, 0]);
-    let tree = BpTree::<4>::from_bit_vector(bv);
+    let bv = BitVec::from_bits_u8(&[0, 0, 0, 0, 1, 1, 0]);
+    let tree = BpTree::<4>::from_bit_vec(bv);
 
     assert_eq!(tree.depth(4), 0);
 }
@@ -756,8 +757,8 @@ fn fuzz_tree_navigation() {
     // fuzzing the tree navigation operations on an unbalanced tree
     // because those are easier to generate uniformly.
 
-    const L: usize = 1 << 14;
-    const L_BITS: usize = L * size_of::<u64>() * 8;
+    const L: u64 = 1 << 14;
+    const L_BITS: u64 = L * size_of::<u64>() as u64 * 8;
 
     // we generate a vector using a seeded random generator and check that every query works as expected
     let mut rng = StdRng::from_seed([0; 32]);
@@ -767,7 +768,7 @@ fn fuzz_tree_navigation() {
         bit_vec.append_word(rng.next_u64());
     }
 
-    let tree = BpTree::<32>::from_bit_vector(bit_vec.clone());
+    let tree = BpTree::<32>::from_bit_vec(bit_vec.clone());
     let mut parent_stack = Vec::new();
 
     // keep track of last sibling for each node
@@ -778,6 +779,7 @@ fn fuzz_tree_navigation() {
     let mut sibling_count_stack = Vec::new();
 
     tree.vec.iter().enumerate().for_each(|(idx, bit)| {
+        let idx = idx as u64;
         if bit == OPEN_PAREN {
             assert_eq!(tree.parent(idx), parent_stack.last().copied());
             assert_eq!(
@@ -831,7 +833,7 @@ fn fuzz_tree_navigation() {
 
 #[test]
 fn test_dfs_iterators() {
-    let tree = BpTree::<32>::from_bit_vector(BitVec::from_bits(&[
+    let tree = BpTree::<32>::from_bit_vec(BitVec::from_bits_u8(&[
         1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
     ]));
 
@@ -844,7 +846,7 @@ fn test_dfs_iterators() {
 
 #[test]
 fn test_subtree_iterators() {
-    let tree = BpTree::<4>::from_bit_vector(BitVec::from_bits(&[
+    let tree = BpTree::<4>::from_bit_vec(BitVec::from_bits_u8(&[
         1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
     ]));
 
@@ -875,7 +877,7 @@ fn test_subtree_iterators() {
 
 #[test]
 fn test_children_iterator() {
-    let tree = BpTree::<4>::from_bit_vector(BitVec::from_bits(&[
+    let tree = BpTree::<4>::from_bit_vec(BitVec::from_bits_u8(&[
         1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
     ]));
 
@@ -918,7 +920,7 @@ fn test_from_padded_bitvec() {
     bv.append_bit(0);
     bv.drop_last(1);
 
-    let tree = BpTree::<64>::from_bit_vector(bv.clone());
+    let tree = BpTree::<64>::from_bit_vec(bv.clone());
     assert_eq!(tree.root(), Some(0));
     assert_eq!(tree.size(), 1);
     assert_eq!(tree.fwd_search(0, 2), None);
diff --git a/src/trees/mmt.rs b/src/trees/mmt.rs
index e66aa09..289383d 100644
--- a/src/trees/mmt.rs
+++ b/src/trees/mmt.rs
@@ -41,12 +41,16 @@ pub(crate) struct MinMaxTree {
 }
 
 impl MinMaxTree {
-    pub(crate) fn excess_tree(bit_vec: &BitVec, block_size: usize) -> Self {
+    pub(crate) fn excess_tree(bit_vec: &BitVec, block_size: u64) -> Self {
         if bit_vec.is_empty() {
             return Self::default();
         }
 
-        let num_leaves = bit_vec.len().div_ceil(block_size);
+        #[allow(clippy::cast_possible_truncation)] // safe due to the division
+        let num_leaves = bit_vec.len().div_ceil(block_size) as usize;
+        #[allow(clippy::cast_possible_truncation)] // only happens if available memory already exceeded
+        #[allow(clippy::cast_sign_loss)]
+        #[allow(clippy::cast_precision_loss)]
         let num_internal_nodes = max(1, (1 << (num_leaves as f64).log2().ceil() as usize) - 1);
 
         let mut nodes = vec![ExcessNode::default(); num_leaves + num_internal_nodes];
@@ -56,8 +60,9 @@ impl MinMaxTree {
 
         // bottom up construction
         for i in 0..bit_vec.len() {
+            #[allow(clippy::cast_possible_truncation)] // safe due to the division
             if i > 0 && i % block_size == 0 {
-                nodes[num_internal_nodes + i / block_size - 1] = ExcessNode {
+                nodes[num_internal_nodes + (i / block_size) as usize - 1] = ExcessNode {
                     total: total_excess,
                     min: min_excess,
                     max: max_excess,
@@ -170,7 +175,7 @@ impl MinMaxTree {
     /// Get the index of the left sibling of the node at `index` if it exists
     #[allow(clippy::unused_self)] // self is used for consistency with other methods
     pub(crate) fn left_sibling(&self, index: NonZeroUsize) -> Option<NonZeroUsize> {
-        if index.get() % 2 == 0 {
+        if index.get().is_multiple_of(2) {
             // index is at least 2
             NonZeroUsize::new(index.get() - 1)
         } else {
@@ -447,7 +452,7 @@ mod tests {
     #[test]
     fn test_simple_excess_tree() {
         #[rustfmt::skip]
-        let bv = BitVec::from_bits(&[
+        let bv = BitVec::from_bits_u8(&[
             1, 1, 1, 0, 0, 1, 1, 1,
             0, 1, 0, 1, 1, 1, 0, 0,
             1, 0, 0, 1, 0, 0, 0, 0,
@@ -505,7 +510,7 @@ mod tests {
         //  3  4 5 6
         //  /\/\/\/\
         // 7 8 9 10 11 12 - -
-        let bv = BitVec::from_bits(&[0; 48]);
+        let bv = BitVec::from_bits_u8(&[0; 48]);
         let tree = MinMaxTree::excess_tree(&bv, 8);
 
         assert_eq!(tree.nodes.len(), 13); // 6 leaves + 7 internal nodes
@@ -586,7 +591,7 @@ mod tests {
     #[test]
     fn test_simple_fwd_search() {
         #[rustfmt::skip]
-        let bv = BitVec::from_bits(&[
+        let bv = BitVec::from_bits_u8(&[
             1, 1, 1, 1, 1, 1, 1, 1,
             1, 1, 1, 1, 0, 0, 0, 0,
             0, 0, 0, 0, 0, 0, 0, 0,
@@ -625,7 +630,7 @@ mod tests {
     #[test]
     fn test_fwd_search_with_multiple_blocks() {
         #[rustfmt::skip]
-        let bv = BitVec::from_bits(&[
+        let bv = BitVec::from_bits_u8(&[
             1, 1, 1, 1, 1, 1, 1, 1,
             1, 1, 1, 1, 1, 0, 0, 0,
             1, 1, 1, 1, 1, 0, 0, 0,
@@ -651,7 +656,7 @@ mod tests {
     #[test]
     fn test_fwd_search_relative_offsets() {
         #[rustfmt::skip]
-        let bv = BitVec::from_bits(&[
+        let bv = BitVec::from_bits_u8(&[
             1, 1, 1, 0,
             1, 0, 1, 1, // excess 2
             1, 0, 1, 0, // min excess 0, max excess 1
@@ -670,7 +675,7 @@ mod tests {
     #[test]
     fn test_simple_bwd_search() {
         #[rustfmt::skip]
-        let bv = BitVec::from_bits(&[
+        let bv = BitVec::from_bits_u8(&[
             1, 1, 1, 1, 1, 1, 1, 1,
             1, 1, 1, 1, 0, 0, 0, 0,
             0, 0, 0, 0, 0, 0, 0, 0,
@@ -709,7 +714,7 @@ mod tests {
     #[test]
     fn test_bwd_search_with_multiple_blocks() {
         #[rustfmt::skip]
-        let bv = BitVec::from_bits(&[
+        let bv = BitVec::from_bits_u8(&[
             1, 1, 1, 1, 1, 1, 1, 1,
             1, 1, 1, 1, 1, 0, 0, 0,
             1, 1, 1, 1, 1, 0, 0, 0,
@@ -735,7 +740,7 @@ mod tests {
     #[test]
     fn test_bwd_search_relative_offsets() {
         #[rustfmt::skip]
-        let bv = BitVec::from_bits(&[
+        let bv = BitVec::from_bits_u8(&[
             1, 1, 1, 0,
             1, 0, 1, 1, // excess 2
             1, 0, 1, 0, // min excess 0, max excess 1
@@ -752,7 +757,7 @@ mod tests {
     #[test]
     fn test_incomplete_block() {
         #[rustfmt::skip]
-        let bv = BitVec::from_bits(&[
+        let bv = BitVec::from_bits_u8(&[
             1, 1, 1, 1, 1, 1, 1, 0,
             0, 0, 0, 0, 0, 0
         ]);
@@ -772,7 +777,7 @@ mod tests {
 
     #[test]
     fn test_single_block() {
-        let bv = BitVec::from_bits(&[1, 1, 1, 1, 0, 0, 0, 0]);
+        let bv = BitVec::from_bits_u8(&[1, 1, 1, 1, 0, 0, 0, 0]);
 
         let tree = MinMaxTree::excess_tree(&bv, 8);
 
@@ -782,12 +787,12 @@ mod tests {
     #[test]
     fn test_leaf_calculation() {
         // test small tree
-        let bv = BitVec::from_bits(&vec![0; 1000]);
+        let bv = BitVec::from_bits_u8(&vec![0; 1000]);
         let tree = MinMaxTree::excess_tree(&bv, 1200);
         assert_eq!(tree.first_leaf(), 1);
 
         // test very large tree
-        let bv = BitVec::from_bits(&vec![0; 1000]);
+        let bv = BitVec::from_bits_u8(&vec![0; 1000]);
         let tree = MinMaxTree::excess_tree(&bv, 4);
 
         assert_eq!(tree.first_leaf(), 255)
@@ -797,7 +802,7 @@ mod tests {
     fn test_relative_excess() {
         // test a tree with 3 layers and different downwards traversals
         #[rustfmt::skip]
-        let bv = BitVec::from_bits(&[
+        let bv = BitVec::from_bits_u8(&[
             1, 1, 1, 1, 1, 1, 1, 1,
             1, 1, 1, 1, 1, 1, 1, 1,
             1, 1, 1, 1, 1, 1, 1, 1,
diff --git a/src/trees/mod.rs b/src/trees/mod.rs
index 3e2f0eb..4faec67 100644
--- a/src/trees/mod.rs
+++ b/src/trees/mod.rs
@@ -41,14 +41,14 @@ pub trait Tree {
 
     /// Convert a node handle into a contiguous index, allowing associated data to be stored in a vector.
     /// If `node` is not a valid node handle, the result is meaningless.
-    fn node_index(&self, node: Self::NodeHandle) -> usize;
+    fn node_index(&self, node: Self::NodeHandle) -> u64;
 
     /// Convert a contiguous index that enumerates all nodes into a node handle.
     /// This operation is the inverse of `node_index`.
     /// The index must be in the range `0..self.size()`.
     ///
     /// If the index is out of bounds, the behavior is unspecified.
-    fn node_handle(&self, index: usize) -> Self::NodeHandle;
+    fn node_handle(&self, index: u64) -> Self::NodeHandle;
 
     /// Returns true if the node is a leaf.
     /// If `node` is not a valid node handle, the result is meaningless.
@@ -63,7 +63,7 @@ pub trait Tree {
     fn depth(&self, node: Self::NodeHandle) -> u64;
 
     /// Returns the number of nodes in the tree.
-    fn size(&self) -> usize;
+    fn size(&self) -> u64;
 
     /// Returns true, if the tree has no nodes.
     fn is_empty(&self) -> bool {
@@ -81,7 +81,7 @@ pub trait SubtreeSize: Tree {
     ///
     /// Returns `None` if the `node` has no closing parenthesis (in an unbalanced parenthesis
     /// expression).
-    fn subtree_size(&self, node: Self::NodeHandle) -> Option<usize>;
+    fn subtree_size(&self, node: Self::NodeHandle) -> Option<u64>;
 }
 
 /// A trait for succinct tree data structures that support [`is_ancestor`] queries.
@@ -122,6 +122,10 @@ pub trait LevelTree: Tree {
 ///
 /// Once the full tree has been visited, the caller must call [`build`] to create an instance of the
 /// implementing tree type.
+///
+/// [`enter_node`]: TreeBuilder::enter_node
+/// [`leave_node`]: TreeBuilder::leave_node
+/// [`build`]: TreeBuilder::build
 pub trait TreeBuilder {
     /// The tree type constructed with this interface
     type Tree;
@@ -139,5 +143,8 @@ pub trait TreeBuilder {
     /// (i.e. there are nodes for which [`leave_node`] has not been called,
     /// or there are more calls to `leave_node` than to [`enter_node`];
     /// the number of extraneous calls to `enter_node` is returned in the error).
+    ///
+    /// [`leave_node`]: Self::leave_node
+    /// [`enter_node`]: Self::enter_node
     fn build(self) -> Result<Self::Tree, i64>;
 }
diff --git a/src/util/elias_fano_iter.rs b/src/util/elias_fano_iter.rs
index 646d3f4..cd651e4 100644
--- a/src/util/elias_fano_iter.rs
+++ b/src/util/elias_fano_iter.rs
@@ -12,14 +12,17 @@ macro_rules! gen_ef_iter_impl {
                     return Ok(());
                 }
 
-                if Some(self.index + n - 1) > self.back_index {
+                if Some(self.index + n as u64 - 1) > self.back_index {
                     if Some(self.index) > self.back_index {
                         Err(std::num::NonZeroUsize::new(n).unwrap())
                     } else {
-                        Err(std::num::NonZeroUsize::new(n - (self.back_index.as_ref().unwrap_or(&usize::MAX).wrapping_sub(self.index).wrapping_add(1))).unwrap())
+                        // the following is limited in size by n, and `back_index` is `None` only if the vector is
+                        // empty, so a truncation is impossible
+                        #[allow(clippy::cast_possible_truncation)]
+                        Err(std::num::NonZeroUsize::new(n - (self.back_index.as_ref().unwrap_or(&u64::MAX).wrapping_sub(self.index).wrapping_add(1)) as usize).unwrap())
                     }
                 } else {
-                    self.index += n;
+                    self.index += n as u64;
                     if n > 0 {
                         // since advance_by is not stable yet, we need to call nth - 1.
                         self.upper_iter.nth(n - 1).expect("upper iterator should not be exhausted");
@@ -46,10 +49,12 @@ macro_rules! gen_ef_iter_impl {
 
                 // since the cursors point to unconsumed items, we need to add 1
                 let remaining = *self.back_index.as_ref().unwrap() - self.index + 1;
-                if remaining < n {
-                    return Err(std::num::NonZeroUsize::new(n - remaining).unwrap());
+                if remaining < n as u64 {
+                    // the following is limited in size by n, so a truncation is impossible
+                    #[allow(clippy::cast_possible_truncation)]
+                    return Err(std::num::NonZeroUsize::new(n - remaining as usize).unwrap());
                 }
-                self.back_index = if self.back_index >= Some(n) { self.back_index.map(|b| b - n) } else { None };
+                self.back_index = if self.back_index >= Some(n as u64) { self.back_index.map(|b| b - n as u64) } else { None };
                 if n > 0 {
                     // since advance_by is not stable yet, we need to call nth - 1.
                     self.upper_iter.nth_back(n - 1).expect("upper iterator should not be exhausted");
@@ -87,6 +92,10 @@ macro_rules! gen_ef_iter_impl {
 
             /// Returns the exact number of elements that this iterator would iterate over. Does not
             /// call `next` internally.
+            ///
+            /// # Panics
+            /// If the vector contains more than `usize::MAX` elements, calling `count()` on the iterator will
+            /// cause it to panic.
             fn count(self) -> usize
             where
                 Self: Sized,
@@ -139,9 +148,17 @@ macro_rules! gen_ef_iter_impl {
         }
 
         impl $(<$life>)? std::iter::ExactSizeIterator for $name $(<$life>)? {
+            // the check and panic guarantees panic on truncation
+            #[allow(clippy::cast_possible_truncation)]
             fn len(&self) -> usize {
+                // this check is hopefully eliminated on 64-bit architectures
+                if (*self.back_index.as_ref().unwrap_or(&u64::MAX)).wrapping_sub(self.index).wrapping_add(1)
+                    > usize::MAX as u64 {
+                    panic!("calling len() on an iterator containing more than usize::MAX elements is forbidden");
+                }
+
                 // intentionally overflowing calculations to avoid branches on empty iterator
-                (*self.back_index.as_ref().unwrap_or(&usize::MAX)).wrapping_sub(self.index).wrapping_add(1)
+                (*self.back_index.as_ref().unwrap_or(&u64::MAX)).wrapping_sub(self.index).wrapping_add(1) as usize
             }
         }
 
@@ -180,19 +197,19 @@ macro_rules! impl_ef_iterator {
         #[doc = concat!("This struct is created by the `into_iter` trait implementation of `", stringify!($type), "`.")]
         #[derive(Clone, Debug)]
         pub struct $own {
-            upper_iter: crate::bit_vec::fast_rs_vec::SelectIntoIter<false>,
+            upper_iter: crate::bit_vec::rs::SelectIntoIter<false>,
             vec: crate::bit_vec::BitVec,
-            index: usize,
+            index: u64,
             // back index is none, iff it points to element -1 (i.e. element 0 has been consumed by
             // a call to next_back()). It can be Some(..) even if the iterator is empty
-            back_index: Option<usize>,
-            lower_len: usize,
+            back_index: Option<u64>,
+            lower_len: u64,
             universe_zero: u64,
         }
 
         impl $own {
             #[must_use]
-            fn new(vec: crate::elias_fano::EliasFanoVec) -> Self {
+            fn new(vec: crate::ef::EliasFanoVec) -> Self {
                 if vec.is_empty() {
                     return Self {
                         upper_iter: vec.upper_vec.into_iter1(),
@@ -218,6 +235,7 @@ macro_rules! impl_ef_iterator {
 
         impl EliasFanoVec {
             #[doc = concat!("Returns an iterator over the elements of `", stringify!($type), "`.")]
+            #[doc = "Note, if the iterator length exceeds `usize::MAX`, calling `len()` on it will panic ."]
             #[must_use]
             pub fn iter(&self) -> $bor<'_> {
                 $bor::new(self)
@@ -228,19 +246,19 @@ macro_rules! impl_ef_iterator {
         #[doc = concat!("This struct is created by the `iter` method of `", stringify!($type), "`.")]
         #[derive(Clone, Debug)]
         pub struct $bor<'a> {
-            upper_iter: crate::bit_vec::fast_rs_vec::SelectIter<'a, false>,
+            upper_iter: crate::bit_vec::rs::SelectIter<'a, false>,
             vec: &'a crate::bit_vec::BitVec,
-            index: usize,
+            index: u64,
             // back index is none, iff it points to element -1 (i.e. element 0 has been consumed by
             // a call to next_back()). It can be Some(..) even if the iterator is empty
-            back_index: Option<usize>,
-            lower_len: usize,
+            back_index: Option<u64>,
+            lower_len: u64,
             universe_zero: u64,
         }
 
         impl<'a> $bor<'a> {
             #[must_use]
-            fn new(vec: &'a crate::elias_fano::EliasFanoVec) -> Self {
+            fn new(vec: &'a crate::ef::EliasFanoVec) -> Self {
                 if vec.is_empty() {
                     return Self {
                         upper_iter: vec.upper_vec.iter1(),
diff --git a/src/util/general_iter.rs b/src/util/general_iter.rs
index afc4c73..37dca0c 100644
--- a/src/util/general_iter.rs
+++ b/src/util/general_iter.rs
@@ -32,14 +32,17 @@ macro_rules! gen_vector_iter_impl {
                     return Ok(());
                 }
 
-                if Some(self.index + n - 1) > self.back_index {
+                if Some(self.index + n as u64 - 1) > self.back_index {
                     if Some(self.index) > self.back_index {
                         Err(std::num::NonZeroUsize::new(n).unwrap())
                     } else {
-                        Err(std::num::NonZeroUsize::new(n - (self.back_index.as_ref().unwrap_or(&usize::MAX).wrapping_sub(self.index).wrapping_add(1))).unwrap())
+                        // the following is limited in size by n, and `back_index` is `None` only if the vector is
+                        // empty, so a truncation is impossible
+                        #[allow(clippy::cast_possible_truncation)]
+                        Err(std::num::NonZeroUsize::new(n - (self.back_index.as_ref().unwrap_or(&u64::MAX).wrapping_sub(self.index).wrapping_add(1)) as usize).unwrap())
                     }
                 } else {
-                    self.index += n;
+                    self.index += n as u64;
                     Ok(())
                 }
             }
@@ -62,10 +65,12 @@ macro_rules! gen_vector_iter_impl {
 
                 // since the cursors point to unconsumed items, we need to add 1
                 let remaining = *self.back_index.as_ref().unwrap() - self.index + 1;
-                if remaining < n {
-                    return Err(std::num::NonZeroUsize::new(n - remaining).unwrap());
+                if remaining < n as u64 {
+                    // the following is limited in size by n, so a truncation is impossible
+                    #[allow(clippy::cast_possible_truncation)]
+                    return Err(std::num::NonZeroUsize::new(n - remaining as usize).unwrap());
                 }
-                self.back_index = if self.back_index >= Some(n) { self.back_index.map(|b| b - n) } else { None };
+                self.back_index = if self.back_index >= Some(n as u64) { self.back_index.map(|b| b - n as u64) } else { None };
                 Ok(())
             }
 
@@ -96,6 +101,10 @@ macro_rules! gen_vector_iter_impl {
 
             /// Returns the exact number of elements that this iterator would iterate over. Does not
             /// call `next` internally.
+            ///
+            /// # Panics
+            /// If the vector contains more than `usize::MAX` elements, calling `count()` on the iterator will
+            /// cause it to panic.
             fn count(self) -> usize
             where
                 Self: Sized,
@@ -124,9 +133,17 @@ macro_rules! gen_vector_iter_impl {
         }
 
         impl $(<$life>)? std::iter::ExactSizeIterator for $name $(<$life>)? {
+            // the check and panic guarantees panic on truncation
+            #[allow(clippy::cast_possible_truncation)]
             fn len(&self) -> usize {
+                // this check is hopefully eliminated on 64-bit architectures
+                if (self.back_index.as_ref().unwrap_or(&u64::MAX)).wrapping_sub(self.index).wrapping_add(1)
+                    > usize::MAX as u64 {
+                    panic!("calling len() on an iterator containing more than usize::MAX elements is forbidden");
+                }
+
                 // intentionally overflowing calculations to avoid branches on empty iterator
-                (*self.back_index.as_ref().unwrap_or(&usize::MAX)).wrapping_sub(self.index).wrapping_add(1)
+                (*self.back_index.as_ref().unwrap_or(&u64::MAX)).wrapping_sub(self.index).wrapping_add(1) as usize
             }
         }
 
@@ -236,20 +253,20 @@ macro_rules! impl_vector_iterator {
         #[derive(Clone, Debug)]
         pub struct $own {
             vec: $type,
-            index: usize,
+            index: u64,
             // back index is none, iff it points to element -1 (i.e. element 0 has been consumed by
             // a call to next_back()). It can be Some(..) even if the iterator is empty
-            back_index: Option<usize>,
+            back_index: Option<u64>,
         }
 
         #[doc = concat!("A borrowing iterator for `", stringify!($type), "`.")]
         #[derive(Clone, Debug)]
         pub struct $bor<'a> {
             vec: &'a $type,
-            index: usize,
+            index: u64,
             // back index is none, iff it points to element -1 (i.e. element 0 has been consumed by
             // a call to next_back()). It can be Some(..) even if the iterator is empty
-            back_index: Option<usize>,
+            back_index: Option<u64>,
         }
 
         crate::util::gen_vector_iter_impl!($own, $type, $return_type, $get_unchecked, $get);
@@ -262,6 +279,8 @@ macro_rules! impl_vector_iterator {
         impl $type {
             #[doc = concat!("Returns an iterator over the elements of `", stringify!($type), "`.")]
             #[doc = concat!("The iterator returns `", stringify!($return_type), "` elements.")]
+            #[doc = "Note, if the iterator element type is larger than usize, calling `len()` on the \
+            iterator will panic if the iterator length exceeds `usize::MAX`."]
             #[must_use]
             pub fn iter(&self) -> $bor<'_> {
                 $bor::new(self)
diff --git a/src/wavelet/mod.rs b/src/wavelet/mod.rs
index 3d08602..16e13c2 100644
--- a/src/wavelet/mod.rs
+++ b/src/wavelet/mod.rs
@@ -62,6 +62,10 @@ use std::ops::Range;
 /// ```
 ///
 /// [`RsVec`]: RsVec
+/// [`from_bit_vec`]: WaveletMatrix::from_bit_vec
+/// [`from_slice`]: WaveletMatrix::from_slice
+/// [`from_bit_vec_pc`]: WaveletMatrix::from_bit_vec_pc
+/// [`from_slice_pc`]: WaveletMatrix::from_slice_pc
 #[derive(Clone, Debug)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub struct WaveletMatrix {
@@ -77,28 +81,31 @@ impl WaveletMatrix {
     /// - `num_elements`: The number of elements in the sequence.
     /// - `bit_lookup`: A closure that returns the `bit`-th bit of the `element`-th word.
     #[inline(always)] // should get rid of closures in favor of static calls
-    fn permutation_sorting<LOOKUP: Fn(usize, usize) -> u64>(
+    fn permutation_sorting<LOOKUP: Fn(u64, u64) -> u64>(
         bits_per_element: u16,
-        num_elements: usize,
+        num_elements: u64,
         bit_lookup: LOOKUP,
     ) -> Self {
-        let element_len = bits_per_element as usize;
+        let element_len = bits_per_element as u64;
 
-        let mut data = vec![BitVec::from_zeros(num_elements); element_len];
+        #[allow(clippy::cast_possible_truncation)]
+        let mut data = vec![BitVec::from_zeros(num_elements); element_len as usize];
 
         // insert the first bit of each word into the first bit vector
         // for each following level, insert the next bit of each word into the next bit vector
         // sorted stably by the previous bit vector
         let mut permutation = (0..num_elements).collect::<Vec<_>>();
-        let mut next_permutation = vec![0; num_elements];
+        #[allow(clippy::cast_possible_truncation)]
+        let mut next_permutation = vec![0; num_elements as usize];
 
         for (level, data) in data.iter_mut().enumerate() {
+            let level = level as u64;
             let mut total_zeros = 0;
             for (i, p) in permutation.iter().enumerate() {
                 if bit_lookup(*p, element_len - level - 1) == 0 {
                     total_zeros += 1;
                 } else {
-                    data.set(i, 1).unwrap();
+                    data.set(i as u64, 1).unwrap();
                 }
             }
 
@@ -108,7 +115,7 @@ impl WaveletMatrix {
                 let mut zero_boundary = 0;
                 let mut one_boundary = total_zeros;
                 for (i, p) in permutation.iter().enumerate() {
-                    if data.get_unchecked(i) == 0 {
+                    if data.get_unchecked(i as u64) == 0 {
                         next_permutation[zero_boundary] = *p;
                         zero_boundary += 1;
                     } else {
@@ -139,10 +146,10 @@ impl WaveletMatrix {
     /// Panics if the number of bits in the bit vector is not a multiple of the number of bits per element.
     #[must_use]
     pub fn from_bit_vec(bit_vec: &BitVec, bits_per_element: u16) -> Self {
-        assert_eq!(bit_vec.len() % bits_per_element as usize, 0, "The number of bits in the bit vector must be a multiple of the number of bits per element.");
-        let num_elements = bit_vec.len() / bits_per_element as usize;
+        assert_eq!(bit_vec.len() % bits_per_element as u64, 0, "The number of bits in the bit vector must be a multiple of the number of bits per element.");
+        let num_elements = bit_vec.len() / bits_per_element as u64;
         Self::permutation_sorting(bits_per_element, num_elements, |element, bit| {
-            bit_vec.get_unchecked(element * bits_per_element as usize + bit)
+            bit_vec.get_unchecked(element * bits_per_element as u64 + bit)
         })
     }
 
@@ -161,8 +168,10 @@ impl WaveletMatrix {
             bits_per_element <= 64,
             "The number of bits per element cannot exceed 64."
         );
-        Self::permutation_sorting(bits_per_element, sequence.len(), |element, bit| {
-            (sequence[element] >> bit) & 1
+        #[allow(clippy::cast_possible_truncation)]
+        // safe because the closure is only called with indices of `sequence`
+        Self::permutation_sorting(bits_per_element, sequence.len() as u64, |element, bit| {
+            (sequence[element as usize] >> bit) & 1
         })
     }
 
@@ -176,17 +185,19 @@ impl WaveletMatrix {
     /// - `bit_lookup`: A closure that returns the `bit`-th bit of the `element`-th word.
     /// - `element_lookup`: A closure that returns the `element`-th word.
     #[inline(always)] // should get rid of closures in favor of static calls
-    fn prefix_counting<LOOKUP: Fn(usize, usize) -> u64, ELEMENT: Fn(usize) -> u64>(
+    fn prefix_counting<LOOKUP: Fn(u64, u64) -> u64, ELEMENT: Fn(u64) -> u64>(
         bits_per_element: u16,
-        num_elements: usize,
+        num_elements: u64,
         bit_lookup: LOOKUP,
         element_lookup: ELEMENT,
     ) -> Self {
-        let element_len = bits_per_element as usize;
-        let mut histogram = vec![0usize; 1 << bits_per_element];
-        let mut borders = vec![0usize; 1 << bits_per_element];
-        let mut data = vec![BitVec::from_zeros(num_elements); element_len];
+        let element_len = bits_per_element as u64;
+        let mut histogram = vec![0u64; 1 << bits_per_element];
+        let mut borders = vec![0u64; 1 << bits_per_element];
+        #[allow(clippy::cast_possible_truncation)]
+        let mut data = vec![BitVec::from_zeros(num_elements); element_len as usize];
 
+        #[allow(clippy::cast_possible_truncation)] // element_lookup only returns small values
         for i in 0..num_elements {
             histogram[element_lookup(i) as usize] += 1;
             data[0].set_unchecked(i, bit_lookup(i, element_len - 1));
@@ -207,9 +218,10 @@ impl WaveletMatrix {
                     borders[h_minus_1] + histogram[h_minus_1];
             }
 
+            #[allow(clippy::cast_possible_truncation)] // element_lookup only returns small values
             for i in 0..num_elements {
                 let bit = bit_lookup(i, element_len - level - 1);
-                data[level].set_unchecked(
+                data[level as usize].set_unchecked(
                     borders[element_lookup(i) as usize >> (element_len - level)],
                     bit,
                 );
@@ -242,21 +254,19 @@ impl WaveletMatrix {
     /// [`from_slice`]: WaveletMatrix::from_slice
     #[must_use]
     pub fn from_bit_vec_pc(bit_vec: &BitVec, bits_per_element: u16) -> Self {
-        assert_eq!(bit_vec.len() % bits_per_element as usize, 0, "The number of bits in the bit vector must be a multiple of the number of bits per element.");
+        assert_eq!(bit_vec.len() % bits_per_element as u64, 0, "The number of bits in the bit vector must be a multiple of the number of bits per element.");
         assert!(
             bits_per_element <= 64,
             "The number of bits per element cannot exceed 64."
         );
-        let num_elements = bit_vec.len() / bits_per_element as usize;
+        let num_elements = bit_vec.len() / bits_per_element as u64;
         Self::prefix_counting(
             bits_per_element,
             num_elements,
-            |element, bit| bit_vec.get_unchecked(element * bits_per_element as usize + bit),
+            |element, bit| bit_vec.get_unchecked(element * bits_per_element as u64 + bit),
             |element| {
-                bit_vec.get_bits_unchecked(
-                    element * bits_per_element as usize,
-                    bits_per_element as usize,
-                )
+                bit_vec
+                    .get_bits_unchecked(element * bits_per_element as u64, bits_per_element as u64)
             },
         )
     }
@@ -282,18 +292,20 @@ impl WaveletMatrix {
             bits_per_element <= 64,
             "The number of bits per element cannot exceed 64."
         );
+        #[allow(clippy::cast_possible_truncation)]
+        // safe because the closures are called only with indices of `sequence`
         Self::prefix_counting(
             bits_per_element,
-            sequence.len(),
-            |element, bit| (sequence[element] >> bit) & 1,
-            |element| sequence[element],
+            sequence.len() as u64,
+            |element, bit| (sequence[element as usize] >> bit) & 1,
+            |element| sequence[element as usize],
         )
     }
 
     /// Generic function to read a value from the wavelet matrix and consume it with a closure.
     /// The function is used by the `get_value` and `get_u64` functions, deduplicating code.
     #[inline(always)]
-    fn reconstruct_value_unchecked<F: FnMut(u64)>(&self, mut i: usize, mut target_func: F) {
+    fn reconstruct_value_unchecked<F: FnMut(u64)>(&self, mut i: u64, mut target_func: F) {
         for level in 0..self.bits_per_element() {
             let bit = self.data[level].get_unchecked(i);
             target_func(bit);
@@ -323,7 +335,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.get_value(100), None);
     /// ```
     #[must_use]
-    pub fn get_value(&self, i: usize) -> Option<BitVec> {
+    pub fn get_value(&self, i: u64) -> Option<BitVec> {
         if self.data.is_empty() || i >= self.data[0].len() {
             None
         } else {
@@ -342,11 +354,11 @@ impl WaveletMatrix {
     ///
     /// [`get_value`]: WaveletMatrix::get_value
     #[must_use]
-    pub fn get_value_unchecked(&self, i: usize) -> BitVec {
-        let mut value = BitVec::from_zeros(self.bits_per_element());
+    pub fn get_value_unchecked(&self, i: u64) -> BitVec {
+        let mut value = BitVec::from_zeros(self.bits_per_element() as u64);
         let mut level = self.bits_per_element() - 1;
         self.reconstruct_value_unchecked(i, |bit| {
-            value.set_unchecked(level, bit);
+            value.set_unchecked(level as u64, bit);
             level = level.saturating_sub(1);
         });
         value
@@ -369,7 +381,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.get_u64(100), None);
     /// ```
     #[must_use]
-    pub fn get_u64(&self, i: usize) -> Option<u64> {
+    pub fn get_u64(&self, i: u64) -> Option<u64> {
         if self.bits_per_element() > 64 || self.data.is_empty() || i >= self.data[0].len() {
             None
         } else {
@@ -388,7 +400,7 @@ impl WaveletMatrix {
     ///
     /// [`get_u64`]: WaveletMatrix::get_u64
     #[must_use]
-    pub fn get_u64_unchecked(&self, i: usize) -> u64 {
+    pub fn get_u64_unchecked(&self, i: u64) -> u64 {
         let mut value = 0;
         self.reconstruct_value_unchecked(i, |bit| {
             value <<= 1;
@@ -414,9 +426,9 @@ impl WaveletMatrix {
     /// [`BitVec`]: BitVec
     /// [`rank_range`]: WaveletMatrix::rank_range
     #[must_use]
-    pub fn rank_range_unchecked(&self, mut range: Range<usize>, symbol: &BitVec) -> usize {
+    pub fn rank_range_unchecked(&self, mut range: Range<u64>, symbol: &BitVec) -> u64 {
         for (level, data) in self.data.iter().enumerate() {
-            if symbol.get_unchecked((self.bits_per_element() - 1) - level) == 0 {
+            if symbol.get_unchecked(((self.bits_per_element() - 1) - level) as u64) == 0 {
                 range.start = data.rank0(range.start);
                 range.end = data.rank0(range.end);
             } else {
@@ -450,10 +462,10 @@ impl WaveletMatrix {
     ///
     /// [`BitVec`]: BitVec
     #[must_use]
-    pub fn rank_range(&self, range: Range<usize>, symbol: &BitVec) -> Option<usize> {
+    pub fn rank_range(&self, range: Range<u64>, symbol: &BitVec) -> Option<u64> {
         if range.start >= self.len()
             || range.end > self.len()
-            || symbol.len() != self.bits_per_element()
+            || symbol.len() != self.bits_per_element() as u64
         {
             None
         } else {
@@ -478,7 +490,7 @@ impl WaveletMatrix {
     ///
     /// [`rank_range_u64`]: WaveletMatrix::rank_range_u64
     #[must_use]
-    pub fn rank_range_u64_unchecked(&self, mut range: Range<usize>, symbol: u64) -> usize {
+    pub fn rank_range_u64_unchecked(&self, mut range: Range<u64>, symbol: u64) -> u64 {
         for (level, data) in self.data.iter().enumerate() {
             if (symbol >> ((self.bits_per_element() - 1) - level)) & 1 == 0 {
                 range.start = data.rank0(range.start);
@@ -512,7 +524,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.rank_range_u64(2..4, 4), Some(1));
     /// ```
     #[must_use]
-    pub fn rank_range_u64(&self, range: Range<usize>, symbol: u64) -> Option<usize> {
+    pub fn rank_range_u64(&self, range: Range<u64>, symbol: u64) -> Option<u64> {
         if range.start >= self.len() || range.end > self.len() || self.bits_per_element() > 64 {
             None
         } else {
@@ -543,7 +555,7 @@ impl WaveletMatrix {
     /// [`BitVec`]: BitVec
     /// [`rank_offset`]: WaveletMatrix::rank_offset
     #[must_use]
-    pub fn rank_offset_unchecked(&self, offset: usize, i: usize, symbol: &BitVec) -> usize {
+    pub fn rank_offset_unchecked(&self, offset: u64, i: u64, symbol: &BitVec) -> u64 {
         self.rank_range_unchecked(offset..i, symbol)
     }
 
@@ -577,11 +589,11 @@ impl WaveletMatrix {
     ///
     /// [`BitVec`]: BitVec
     #[must_use]
-    pub fn rank_offset(&self, offset: usize, i: usize, symbol: &BitVec) -> Option<usize> {
+    pub fn rank_offset(&self, offset: u64, i: u64, symbol: &BitVec) -> Option<u64> {
         if offset > i
             || offset >= self.len()
             || i > self.len()
-            || symbol.len() != self.bits_per_element()
+            || symbol.len() != self.bits_per_element() as u64
         {
             None
         } else {
@@ -610,7 +622,7 @@ impl WaveletMatrix {
     ///
     /// [`rank_offset_u64`]: WaveletMatrix::rank_offset_u64
     #[must_use]
-    pub fn rank_offset_u64_unchecked(&self, offset: usize, i: usize, symbol: u64) -> usize {
+    pub fn rank_offset_u64_unchecked(&self, offset: u64, i: u64, symbol: u64) -> u64 {
         self.rank_range_u64_unchecked(offset..i, symbol)
     }
 
@@ -640,7 +652,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.rank_offset_u64(2, 4, 4), Some(1));
     /// ```
     #[must_use]
-    pub fn rank_offset_u64(&self, offset: usize, i: usize, symbol: u64) -> Option<usize> {
+    pub fn rank_offset_u64(&self, offset: u64, i: u64, symbol: u64) -> Option<u64> {
         if offset > i || offset >= self.len() || i > self.len() || self.bits_per_element() > 64 {
             None
         } else {
@@ -666,7 +678,7 @@ impl WaveletMatrix {
     /// [`BitVec`]: BitVec
     /// [`rank`]: WaveletMatrix::rank
     #[must_use]
-    pub fn rank_unchecked(&self, i: usize, symbol: &BitVec) -> usize {
+    pub fn rank_unchecked(&self, i: u64, symbol: &BitVec) -> u64 {
         self.rank_range_unchecked(0..i, symbol)
     }
 
@@ -693,8 +705,8 @@ impl WaveletMatrix {
     ///
     /// [`BitVec`]: BitVec
     #[must_use]
-    pub fn rank(&self, i: usize, symbol: &BitVec) -> Option<usize> {
-        if i > self.len() || symbol.len() != self.bits_per_element() {
+    pub fn rank(&self, i: u64, symbol: &BitVec) -> Option<u64> {
+        if i > self.len() || symbol.len() != self.bits_per_element() as u64 {
             None
         } else {
             Some(self.rank_range_unchecked(0..i, symbol))
@@ -717,7 +729,7 @@ impl WaveletMatrix {
     ///
     /// [`rank_u64`]: WaveletMatrix::rank_u64
     #[must_use]
-    pub fn rank_u64_unchecked(&self, i: usize, symbol: u64) -> usize {
+    pub fn rank_u64_unchecked(&self, i: u64, symbol: u64) -> u64 {
         self.rank_range_u64_unchecked(0..i, symbol)
     }
 
@@ -741,7 +753,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.rank_u64(3, 1), Some(1));
     /// ```
     #[must_use]
-    pub fn rank_u64(&self, i: usize, symbol: u64) -> Option<usize> {
+    pub fn rank_u64(&self, i: u64, symbol: u64) -> Option<u64> {
         if i > self.len() || self.bits_per_element() > 64 {
             None
         } else {
@@ -770,11 +782,11 @@ impl WaveletMatrix {
     /// [`BitVec`]: BitVec
     /// [`select_offset`]: WaveletMatrix::select_offset
     #[must_use]
-    pub fn select_offset_unchecked(&self, offset: usize, rank: usize, symbol: &BitVec) -> usize {
+    pub fn select_offset_unchecked(&self, offset: u64, rank: u64, symbol: &BitVec) -> u64 {
         let mut range_start = offset;
 
         for (level, data) in self.data.iter().enumerate() {
-            if symbol.get_unchecked((self.bits_per_element() - 1) - level) == 0 {
+            if symbol.get_unchecked(((self.bits_per_element() - 1) - level) as u64) == 0 {
                 range_start = data.rank0(range_start);
             } else {
                 range_start = data.rank0 + data.rank1(range_start);
@@ -784,7 +796,7 @@ impl WaveletMatrix {
         let mut range_end = range_start + rank;
 
         for (level, data) in self.data.iter().enumerate().rev() {
-            if symbol.get_unchecked((self.bits_per_element() - 1) - level) == 0 {
+            if symbol.get_unchecked(((self.bits_per_element() - 1) - level) as u64) == 0 {
                 range_end = data.select0(range_end);
             } else {
                 range_end = data.select1(range_end - data.rank0);
@@ -818,8 +830,8 @@ impl WaveletMatrix {
     ///
     /// [`BitVec`]: BitVec
     #[must_use]
-    pub fn select_offset(&self, offset: usize, rank: usize, symbol: &BitVec) -> Option<usize> {
-        if offset >= self.len() || symbol.len() != self.bits_per_element() {
+    pub fn select_offset(&self, offset: u64, rank: u64, symbol: &BitVec) -> Option<u64> {
+        if offset >= self.len() || symbol.len() != self.bits_per_element() as u64 {
             None
         } else {
             let idx = self.select_offset_unchecked(offset, rank, symbol);
@@ -850,7 +862,7 @@ impl WaveletMatrix {
     ///
     /// [`select_offset_u64`]: WaveletMatrix::select_offset_u64
     #[must_use]
-    pub fn select_offset_u64_unchecked(&self, offset: usize, rank: usize, symbol: u64) -> usize {
+    pub fn select_offset_u64_unchecked(&self, offset: u64, rank: u64, symbol: u64) -> u64 {
         let mut range_start = offset;
 
         for (level, data) in self.data.iter().enumerate() {
@@ -895,7 +907,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.select_offset_u64(2, 1, 4), None);
     /// ```
     #[must_use]
-    pub fn select_offset_u64(&self, offset: usize, rank: usize, symbol: u64) -> Option<usize> {
+    pub fn select_offset_u64(&self, offset: u64, rank: u64, symbol: u64) -> Option<u64> {
         if offset >= self.len() || self.bits_per_element() > 64 {
             None
         } else {
@@ -927,7 +939,7 @@ impl WaveletMatrix {
     /// [`BitVec`]: BitVec
     /// [`select`]: WaveletMatrix::select
     #[must_use]
-    pub fn select_unchecked(&self, rank: usize, symbol: &BitVec) -> usize {
+    pub fn select_unchecked(&self, rank: u64, symbol: &BitVec) -> u64 {
         self.select_offset_unchecked(0, rank, symbol)
     }
 
@@ -952,8 +964,8 @@ impl WaveletMatrix {
     ///
     /// [`BitVec`]: BitVec
     #[must_use]
-    pub fn select(&self, rank: usize, symbol: &BitVec) -> Option<usize> {
-        if symbol.len() == self.bits_per_element() {
+    pub fn select(&self, rank: u64, symbol: &BitVec) -> Option<u64> {
+        if symbol.len() == self.bits_per_element() as u64 {
             let idx = self.select_unchecked(rank, symbol);
             if idx < self.len() {
                 Some(idx)
@@ -982,7 +994,7 @@ impl WaveletMatrix {
     ///
     /// [`select_u64`]: WaveletMatrix::select_u64
     #[must_use]
-    pub fn select_u64_unchecked(&self, rank: usize, symbol: u64) -> usize {
+    pub fn select_u64_unchecked(&self, rank: u64, symbol: u64) -> u64 {
         self.select_offset_u64_unchecked(0, rank, symbol)
     }
 
@@ -1004,7 +1016,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.select_u64(1, 4), Some(2));
     /// ```
     #[must_use]
-    pub fn select_u64(&self, rank: usize, symbol: u64) -> Option<usize> {
+    pub fn select_u64(&self, rank: u64, symbol: u64) -> Option<u64> {
         if self.bits_per_element() > 64 {
             None
         } else {
@@ -1032,8 +1044,8 @@ impl WaveletMatrix {
     ///
     /// [`quantile`]: WaveletMatrix::quantile
     #[must_use]
-    pub fn quantile_unchecked(&self, range: Range<usize>, k: usize) -> BitVec {
-        let result = BitVec::from_zeros(self.bits_per_element());
+    pub fn quantile_unchecked(&self, range: Range<u64>, k: u64) -> BitVec {
+        let result = BitVec::from_zeros(self.bits_per_element() as u64);
 
         self.partial_quantile_search_unchecked(range, k, 0, result)
     }
@@ -1046,12 +1058,12 @@ impl WaveletMatrix {
     #[inline(always)]
     fn partial_quantile_search_unchecked(
         &self,
-        mut range: Range<usize>,
-        mut k: usize,
+        mut range: Range<u64>,
+        mut k: u64,
         start_level: usize,
         mut prefix: BitVec,
     ) -> BitVec {
-        debug_assert!(prefix.len() == self.bits_per_element());
+        debug_assert!(prefix.len() == self.bits_per_element() as u64);
         debug_assert!(!range.is_empty());
         debug_assert!(range.end <= self.len());
 
@@ -1067,7 +1079,7 @@ impl WaveletMatrix {
             } else {
                 // the element is among the ones, so we set the bit to 1, and move the range
                 // into the 1-partition of the next level
-                prefix.set_unchecked((self.bits_per_element() - 1) - level, 1);
+                prefix.set_unchecked(((self.bits_per_element() - 1) - level) as u64, 1);
                 k -= zeros;
                 range.start = data.rank0 + (range.start - zeros_start); // range.start - zeros_start is the rank1 of range.start
                 range.end = data.rank0 + (range.end - zeros_end); // same here
@@ -1080,7 +1092,7 @@ impl WaveletMatrix {
     /// Get the `k`-th smallest element in the encoded sequence in the specified `range`,
     /// where `k = 0` returns the smallest element.
     /// The `range` is a half-open interval, meaning that the `end` index is exclusive.
-    /// The `k`-th smallest element is returned as a `BitVec`,
+    /// The `k`-th smallest element is returned as a [`BitVec`],
     /// where the least significant bit is the first element.
     ///
     /// Returns `None` if the `range` is out of bounds, or if `k` is greater than the size of the range.
@@ -1097,7 +1109,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.quantile(1..4, 0), Some(BitVec::pack_sequence_u8(&[1], 3)));
     /// ```
     #[must_use]
-    pub fn quantile(&self, range: Range<usize>, k: usize) -> Option<BitVec> {
+    pub fn quantile(&self, range: Range<u64>, k: u64) -> Option<BitVec> {
         if range.start >= self.len() || range.end > self.len() || k >= range.end - range.start {
             None
         } else {
@@ -1114,8 +1126,10 @@ impl WaveletMatrix {
     ///
     /// # Panics
     /// May panic if the `i` is out of bounds, or returns an empty bit vector.
+    ///
+    /// [`get_sorted`]: Self::get_sorted
     #[must_use]
-    pub fn get_sorted_unchecked(&self, i: usize) -> BitVec {
+    pub fn get_sorted_unchecked(&self, i: u64) -> BitVec {
         self.quantile_unchecked(0..self.len(), i)
     }
 
@@ -1138,7 +1152,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.get_sorted(2), Some(BitVec::pack_sequence_u8(&[2], 3)));
     /// ```
     #[must_use]
-    pub fn get_sorted(&self, i: usize) -> Option<BitVec> {
+    pub fn get_sorted(&self, i: u64) -> Option<BitVec> {
         if i >= self.len() {
             None
         } else {
@@ -1162,7 +1176,7 @@ impl WaveletMatrix {
     ///
     /// [`quantile_u64`]: WaveletMatrix::quantile_u64
     #[must_use]
-    pub fn quantile_u64_unchecked(&self, range: Range<usize>, k: usize) -> u64 {
+    pub fn quantile_u64_unchecked(&self, range: Range<u64>, k: u64) -> u64 {
         self.partial_quantile_search_u64_unchecked(range, k, 0, 0)
     }
 
@@ -1175,8 +1189,8 @@ impl WaveletMatrix {
     #[inline(always)]
     fn partial_quantile_search_u64_unchecked(
         &self,
-        mut range: Range<usize>,
-        mut k: usize,
+        mut range: Range<u64>,
+        mut k: u64,
         start_level: usize,
         mut prefix: u64,
     ) -> u64 {
@@ -1224,7 +1238,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.quantile_u64(1..4, 0), Some(1));
     /// ```
     #[must_use]
-    pub fn quantile_u64(&self, range: Range<usize>, k: usize) -> Option<u64> {
+    pub fn quantile_u64(&self, range: Range<u64>, k: u64) -> Option<u64> {
         if range.start >= self.len()
             || range.end > self.len()
             || self.bits_per_element() > 64
@@ -1249,7 +1263,7 @@ impl WaveletMatrix {
     ///
     /// [`get_sorted_u64`]: WaveletMatrix::get_sorted_u64
     #[must_use]
-    pub fn get_sorted_u64_unchecked(&self, i: usize) -> u64 {
+    pub fn get_sorted_u64_unchecked(&self, i: u64) -> u64 {
         self.quantile_u64_unchecked(0..self.len(), i)
     }
 
@@ -1270,7 +1284,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.get_sorted_u64(2), Some(2));
     /// ```
     #[must_use]
-    pub fn get_sorted_u64(&self, i: usize) -> Option<u64> {
+    pub fn get_sorted_u64(&self, i: u64) -> Option<u64> {
         if i >= self.len() || self.bits_per_element() > 64 {
             None
         } else {
@@ -1291,7 +1305,7 @@ impl WaveletMatrix {
     ///
     /// [`range_min`]: WaveletMatrix::range_min
     #[must_use]
-    pub fn range_min_unchecked(&self, range: Range<usize>) -> BitVec {
+    pub fn range_min_unchecked(&self, range: Range<u64>) -> BitVec {
         self.quantile_unchecked(range, 0)
     }
 
@@ -1313,7 +1327,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.range_min(1..3), Some(BitVec::pack_sequence_u8(&[4], 3)));
     /// ```
     #[must_use]
-    pub fn range_min(&self, range: Range<usize>) -> Option<BitVec> {
+    pub fn range_min(&self, range: Range<u64>) -> Option<BitVec> {
         self.quantile(range, 0)
     }
 
@@ -1331,7 +1345,7 @@ impl WaveletMatrix {
     ///
     /// [`range_min_u64`]: WaveletMatrix::range_min_u64
     #[must_use]
-    pub fn range_min_u64_unchecked(&self, range: Range<usize>) -> u64 {
+    pub fn range_min_u64_unchecked(&self, range: Range<u64>) -> u64 {
         self.quantile_u64_unchecked(range, 0)
     }
 
@@ -1354,7 +1368,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.range_min_u64(1..3), Some(4));
     /// ```
     #[must_use]
-    pub fn range_min_u64(&self, range: Range<usize>) -> Option<u64> {
+    pub fn range_min_u64(&self, range: Range<u64>) -> Option<u64> {
         self.quantile_u64(range, 0)
     }
 
@@ -1372,7 +1386,7 @@ impl WaveletMatrix {
     ///
     /// [`range_max`]: WaveletMatrix::range_max
     #[must_use]
-    pub fn range_max_unchecked(&self, range: Range<usize>) -> BitVec {
+    pub fn range_max_unchecked(&self, range: Range<u64>) -> BitVec {
         let k = range.end - range.start - 1;
         self.quantile_unchecked(range, k)
     }
@@ -1395,7 +1409,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.range_max(3..6), Some(BitVec::pack_sequence_u8(&[7], 3)));
     /// ```
     #[must_use]
-    pub fn range_max(&self, range: Range<usize>) -> Option<BitVec> {
+    pub fn range_max(&self, range: Range<u64>) -> Option<BitVec> {
         if range.is_empty() {
             None
         } else {
@@ -1418,7 +1432,7 @@ impl WaveletMatrix {
     ///
     /// [`range_max_u64`]: WaveletMatrix::range_max_u64
     #[must_use]
-    pub fn range_max_u64_unchecked(&self, range: Range<usize>) -> u64 {
+    pub fn range_max_u64_unchecked(&self, range: Range<u64>) -> u64 {
         let k = range.end - range.start - 1;
         self.quantile_u64_unchecked(range, k)
     }
@@ -1441,7 +1455,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.range_max_u64(3..6), Some(7));
     /// ```
     #[must_use]
-    pub fn range_max_u64(&self, range: Range<usize>) -> Option<u64> {
+    pub fn range_max_u64(&self, range: Range<u64>) -> Option<u64> {
         if range.is_empty() {
             None
         } else {
@@ -1466,7 +1480,7 @@ impl WaveletMatrix {
     ///
     /// [`range_median`]: WaveletMatrix::range_median
     #[must_use]
-    pub fn range_median_unchecked(&self, range: Range<usize>) -> BitVec {
+    pub fn range_median_unchecked(&self, range: Range<u64>) -> BitVec {
         let k = (range.end - 1 - range.start) / 2;
         self.quantile_unchecked(range, k)
     }
@@ -1492,7 +1506,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.range_median(0..6), Some(BitVec::pack_sequence_u8(&[2], 3)));
     /// ```
     #[must_use]
-    pub fn range_median(&self, range: Range<usize>) -> Option<BitVec> {
+    pub fn range_median(&self, range: Range<u64>) -> Option<BitVec> {
         if range.is_empty() {
             None
         } else {
@@ -1517,7 +1531,7 @@ impl WaveletMatrix {
     ///
     /// [`range_median_u64`]: WaveletMatrix::range_median_u64
     #[must_use]
-    pub fn range_median_u64_unchecked(&self, range: Range<usize>) -> u64 {
+    pub fn range_median_u64_unchecked(&self, range: Range<u64>) -> u64 {
         let k = (range.end - 1 - range.start) / 2;
         self.quantile_u64_unchecked(range, k)
     }
@@ -1543,7 +1557,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.range_median_u64(0..6), Some(2));
     /// ```
     #[must_use]
-    pub fn range_median_u64(&self, range: Range<usize>) -> Option<u64> {
+    pub fn range_median_u64(&self, range: Range<u64>) -> Option<u64> {
         if range.is_empty() || self.bits_per_element() > 64 || range.end > self.len() {
             None
         } else {
@@ -1561,10 +1575,10 @@ impl WaveletMatrix {
         T: Clone,
         Reader: Fn(usize, &T) -> u64,
         Writer: Fn(u64, usize, &mut T),
-        Quantile: Fn(&Self, Range<usize>, usize, usize, T) -> T,
+        Quantile: Fn(&Self, Range<u64>, u64, usize, T) -> T,
     >(
         &self,
-        mut range: Range<usize>,
+        mut range: Range<u64>,
         symbol: &T,
         mut result_value: T,
         bit_reader: Reader,
@@ -1577,7 +1591,7 @@ impl WaveletMatrix {
         // the level of the last node where we could go to an interval with smaller elements
         let mut last_one_level: Option<usize> = None;
         // the range of the last node where we could go to an interval with smaller elements
-        let mut next_smaller_range: Option<Range<usize>> = None;
+        let mut next_smaller_range: Option<Range<u64>> = None;
 
         for (level, data) in self.data.iter().enumerate() {
             let query_bit = bit_reader(level, symbol);
@@ -1672,8 +1686,8 @@ impl WaveletMatrix {
     ///
     /// [`BitVec`]: BitVec
     #[must_use]
-    pub fn predecessor(&self, range: Range<usize>, symbol: &BitVec) -> Option<BitVec> {
-        if symbol.len() != self.bits_per_element()
+    pub fn predecessor(&self, range: Range<u64>, symbol: &BitVec) -> Option<BitVec> {
+        if symbol.len() != self.bits_per_element() as u64
             || range.is_empty()
             || self.is_empty()
             || range.end > self.len()
@@ -1684,10 +1698,10 @@ impl WaveletMatrix {
         self.predecessor_generic_unchecked(
             range,
             symbol,
-            BitVec::from_zeros(self.bits_per_element()),
-            |level, symbol| symbol.get_unchecked((self.bits_per_element() - 1) - level),
+            BitVec::from_zeros(self.bits_per_element() as u64),
+            |level, symbol| symbol.get_unchecked(((self.bits_per_element() - 1) - level) as u64),
             |bit, level, result| {
-                result.set_unchecked((self.bits_per_element() - 1) - level, bit);
+                result.set_unchecked(((self.bits_per_element() - 1) - level) as u64, bit);
             },
             Self::partial_quantile_search_unchecked,
         )
@@ -1716,7 +1730,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.predecessor_u64(0..6, 7), Some(7));
     /// ```
     #[must_use]
-    pub fn predecessor_u64(&self, range: Range<usize>, symbol: u64) -> Option<u64> {
+    pub fn predecessor_u64(&self, range: Range<u64>, symbol: u64) -> Option<u64> {
         if self.bits_per_element() > 64
             || range.is_empty()
             || self.is_empty()
@@ -1745,10 +1759,10 @@ impl WaveletMatrix {
         T: Clone,
         Reader: Fn(usize, &T) -> u64,
         Writer: Fn(u64, usize, &mut T),
-        Quantile: Fn(&Self, Range<usize>, usize, usize, T) -> T,
+        Quantile: Fn(&Self, Range<u64>, u64, usize, T) -> T,
     >(
         &self,
-        mut range: Range<usize>,
+        mut range: Range<u64>,
         symbol: &T,
         mut result_value: T,
         bit_reader: Reader,
@@ -1761,7 +1775,7 @@ impl WaveletMatrix {
         // the level of the last node where we could go to an interval with larger elements
         let mut last_zero_level: Option<usize> = None;
         // the range of the last node where we could go to an interval with larger elements
-        let mut next_larger_range: Option<Range<usize>> = None;
+        let mut next_larger_range: Option<Range<u64>> = None;
 
         for (level, data) in self.data.iter().enumerate() {
             let query_bit = bit_reader(level, symbol);
@@ -1859,8 +1873,8 @@ impl WaveletMatrix {
     ///
     /// [`BitVec`]: BitVec
     #[must_use]
-    pub fn successor(&self, range: Range<usize>, symbol: &BitVec) -> Option<BitVec> {
-        if symbol.len() != self.bits_per_element()
+    pub fn successor(&self, range: Range<u64>, symbol: &BitVec) -> Option<BitVec> {
+        if symbol.len() != self.bits_per_element() as u64
             || range.is_empty()
             || self.is_empty()
             || range.end > self.len()
@@ -1871,10 +1885,10 @@ impl WaveletMatrix {
         self.successor_generic_unchecked(
             range,
             symbol,
-            BitVec::from_zeros(self.bits_per_element()),
-            |level, symbol| symbol.get_unchecked((self.bits_per_element() - 1) - level),
+            BitVec::from_zeros(self.bits_per_element() as u64),
+            |level, symbol| symbol.get_unchecked(((self.bits_per_element() - 1) - level) as u64),
             |bit, level, result| {
-                result.set_unchecked((self.bits_per_element() - 1) - level, bit);
+                result.set_unchecked(((self.bits_per_element() - 1) - level) as u64, bit);
             },
             Self::partial_quantile_search_unchecked,
         )
@@ -1903,7 +1917,7 @@ impl WaveletMatrix {
     /// assert_eq!(wavelet_matrix.successor_u64(0..6, 2), Some(2));
     /// ```
     #[must_use]
-    pub fn successor_u64(&self, range: Range<usize>, symbol: u64) -> Option<u64> {
+    pub fn successor_u64(&self, range: Range<u64>, symbol: u64) -> Option<u64> {
         if self.bits_per_element() > 64
             || range.is_empty()
             || self.is_empty()
@@ -1942,7 +1956,7 @@ impl WaveletMatrix {
     /// assert_eq!(iter.collect::<Vec<_>>(), vec![1, 4, 4, 1, 2, 7]);
     /// ```
     #[must_use]
-    pub fn iter_u64(&self) -> Option<WaveletNumRefIter> {
+    pub fn iter_u64(&self) -> Option<WaveletNumRefIter<'_>> {
         if self.bits_per_element() > 64 {
             None
         } else {
@@ -1966,8 +1980,10 @@ impl WaveletMatrix {
     /// The iterator yields `BitVec` elements.
     ///
     /// See also [`iter_sorted_u64`] for an iterator that yields `u64` elements.
+    ///
+    /// [`iter_sorted_u64`]: Self::iter_sorted_u64
     #[must_use]
-    pub fn iter_sorted(&self) -> WaveletSortedRefIter {
+    pub fn iter_sorted(&self) -> WaveletSortedRefIter<'_> {
         WaveletSortedRefIter::new(self)
     }
 
@@ -1993,7 +2009,7 @@ impl WaveletMatrix {
     /// assert_eq!(iter.collect::<Vec<_>>(), vec![1, 1, 2, 4, 4, 7]);
     /// ```
     #[must_use]
-    pub fn iter_sorted_u64(&self) -> Option<WaveletSortedNumRefIter> {
+    pub fn iter_sorted_u64(&self) -> Option<WaveletSortedNumRefIter<'_>> {
         if self.bits_per_element() > 64 {
             None
         } else {
@@ -2020,17 +2036,9 @@ impl WaveletMatrix {
         self.data.len()
     }
 
-    /// Get the number of bits per element in the alphabet of the encoded sequence.
-    #[must_use]
-    #[deprecated(since = "1.5.1", note = "please use `bits_per_element` instead")]
-    #[allow(clippy::cast_possible_truncation)]
-    pub fn bit_len(&self) -> u16 {
-        self.bits_per_element() as u16
-    }
-
     /// Get the number of elements stored in the encoded sequence.
     #[must_use]
-    pub fn len(&self) -> usize {
+    pub fn len(&self) -> u64 {
         if self.data.is_empty() {
             0
         } else {
diff --git a/src/wavelet/tests.rs b/src/wavelet/tests.rs
index c4cf4e7..0d2d231 100644
--- a/src/wavelet/tests.rs
+++ b/src/wavelet/tests.rs
@@ -37,9 +37,10 @@ fn test_wavelet_encoding_randomized() {
         let wavelet_prefix_counting =
             WaveletMatrix::from_bit_vec_pc(&BitVec::pack_sequence_u8(&data, 8), 8);
 
-        assert_eq!(wavelet.len(), data.len());
+        assert_eq!(wavelet.len(), data.len() as u64);
 
         for (i, v) in data.iter().enumerate() {
+            let i = i as u64;
             assert_eq!(wavelet.get_u64_unchecked(i), *v as u64);
             assert_eq!(wavelet_from_slice.get_u64_unchecked(i), *v as u64);
             assert_eq!(wavelet_prefix_counting.get_u64_unchecked(i), *v as u64);
@@ -138,7 +139,7 @@ fn test_rank_randomized() {
         let symbol_bit_vec = BitVec::pack_sequence_u8(&[symbol], 8);
         let mut rank = 0;
         for (i, v) in data.iter().enumerate() {
-            assert_eq!(wavelet.rank_unchecked(i, &symbol_bit_vec), rank);
+            assert_eq!(wavelet.rank_unchecked(i as u64, &symbol_bit_vec), rank);
             if *v == symbol {
                 rank += 1;
             }
@@ -230,10 +231,10 @@ fn test_quantile() {
 
     for (i, v) in sequence.iter().enumerate() {
         assert_eq!(
-            wavelet.quantile(0..10, i),
+            wavelet.quantile(0..10, i as u64),
             Some(BitVec::pack_sequence_u8(&[*v as u8], 4))
         );
-        assert_eq!(wavelet.quantile_u64(0..10, i), Some(*v));
+        assert_eq!(wavelet.quantile_u64(0..10, i as u64), Some(*v));
     }
 
     assert_eq!(wavelet.quantile(0..10, 10), None);
@@ -269,8 +270,8 @@ fn test_quantile_randomized() {
     let wavelet = WaveletMatrix::from_bit_vec(&BitVec::pack_sequence_u8(&data, 8), 8);
 
     for _ in 0..1000 {
-        let range_i = rng.gen_range(0..data.len());
-        let range_j = rng.gen_range(0..data.len());
+        let range_i = rng.gen_range(0..data.len() as u64);
+        let range_j = rng.gen_range(0..data.len() as u64);
         let range = min(range_i, range_j)..max(range_i, range_j);
 
         let k = if range.is_empty() {
@@ -279,7 +280,7 @@ fn test_quantile_randomized() {
             rng.gen_range(range.clone()) - range.start
         };
 
-        let mut range_data = data[range.clone()].to_vec();
+        let mut range_data = data[range.start as usize..range.end as usize].to_vec();
         range_data.sort_unstable();
 
         assert_eq!(
@@ -287,7 +288,7 @@ fn test_quantile_randomized() {
             if range.is_empty() {
                 None
             } else {
-                Some(range_data[k] as u64)
+                Some(range_data[k as usize] as u64)
             }
         );
         assert_eq!(