Skip to content
Open
12 changes: 11 additions & 1 deletion .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,14 @@ jobs:
- name: Build
run: cargo build --verbose --features serde
- name: Run tests
run: cargo test --verbose --features serde
run: cargo test --verbose --features serde

docs:
runs-on: ubuntu-latest
env:
RUSTFLAGS: -C target-cpu=x86-64
RUSTDOCFLAGS: -C target-cpu=x86-64
steps:
- uses: actions/checkout@v4
- name: Docs
run: cargo doc --verbose --all-features
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "vers-vecs"
version = "1.7.0"
version = "1.8.1"
edition = "2021"
authors = ["Johannes \"Cydhra\" Hengstler"]
description = "A collection of succinct data structures supported by fast implementations of rank and select queries."
Expand Down
2 changes: 0 additions & 2 deletions benches/bp.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#![allow(long_running_const_eval)]

use criterion::{black_box, criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion};
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
Expand Down
2 changes: 0 additions & 2 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ since the intrinsics speed up both `rank` and `select` operations by a factor of
- `simd`: Enables the use of SIMD instructions for rank and select operations.
This feature requires AVX-512 support and uses unsafe code.
It also enables a special iterator for the rank/select bit vector that uses vectorized operations.
The feature only works on nightly Rust.
Enabling it on stable Rust is a no-op, because the required CPU features are not available there.
- `serde`: Enables serialization and deserialization of the data structures using the `serde` crate.
- `u16_lookup` Enables a larger lookup table for BP tree queries. The larger table requires 128 KiB instead of 4 KiB.

Expand Down
9 changes: 5 additions & 4 deletions src/bit_vec/fast_rs_vec/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ const SELECT_BLOCK_SIZE: usize = 1 << 13;
/// always stores the number zero, which serves as a sentinel value to avoid special-casing the
/// first block in a super-block (which would be a performance hit due branch prediction failures).
#[derive(Clone, Copy, Debug)]
#[repr(C)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
struct BlockDescriptor {
zeros: u16,
Expand Down Expand Up @@ -144,7 +145,7 @@ impl RsVec {
let mut new_zeros = word.count_zeros() as usize;

// in the last block, remove remaining zeros of limb that aren't part of the vector
if idx == vec.data.len() - 1 && vec.len % WORD_SIZE > 0 {
if idx == vec.data.len() - 1 && !vec.len.is_multiple_of(WORD_SIZE) {
let mask = (1 << (vec.len % WORD_SIZE)) - 1;
new_zeros -= (word | mask).count_zeros() as usize;
}
Expand Down Expand Up @@ -477,9 +478,9 @@ impl RsVec {
}

// if last incomplete block exists, test it without junk data
if self.len % 64 > 0
&& self.data[self.len / 64] & ((1 << (self.len % 64)) - 1)
!= other.data[self.len / 64] & ((1 << (other.len % 64)) - 1)
if !self.len.is_multiple_of(WORD_SIZE)
&& self.data[self.len / WORD_SIZE] & ((1 << (self.len % WORD_SIZE)) - 1)
!= other.data[self.len / WORD_SIZE] & ((1 << (other.len % WORD_SIZE)) - 1)
{
return false;
}
Expand Down
185 changes: 123 additions & 62 deletions src/bit_vec/fast_rs_vec/select.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use crate::bit_vec::fast_rs_vec::{BLOCK_SIZE, SELECT_BLOCK_SIZE, SUPER_BLOCK_SIZ
use crate::bit_vec::WORD_SIZE;
use crate::util::pdep::Pdep;
use crate::util::unroll;
use std::slice::from_raw_parts;

/// A safety constant for assertions to make sure that the block size doesn't change without
/// adjusting the code.
Expand Down Expand Up @@ -229,89 +230,149 @@ impl super::RsVec {
/// It loads the entire block into a SIMD register and compares the rank to the number of ones
/// in the block. The resulting mask is popcounted to find how many blocks from the block boundary
/// the rank is.
#[cfg(all(
feature = "simd",
target_arch = "x86_64",
target_feature = "avx",
target_feature = "avx2",
target_feature = "avx512vl",
target_feature = "avx512bw",
))]
#[inline(always)]
pub(super) fn search_block1(
// #[cfg(all(
// feature = "simd",
// target_arch = "x86_64",
// target_feature = "avx",
// target_feature = "avx2",
// target_feature = "avx512vl",
// target_feature = "avx512bw",
// ))]
// #[inline(always)]
// pub(super) fn search_block1(
// &self,
// rank: usize,
// block_at_super_block: usize,
// block_index: &mut usize,
// ) {
// use std::arch::x86_64::{
// _mm256_cmpgt_epu16_mask, _mm256_loadu_epi16, _mm256_set1_epi16, _mm256_set_epi16,
// _mm256_sub_epi16,
// };
//
// if self.blocks.len() > *block_index + BLOCKS_PER_SUPERBLOCK {
// debug_assert!(
// SUPER_BLOCK_SIZE / BLOCK_SIZE == BLOCKS_PER_SUPERBLOCK,
// "change unroll constant to {}",
// 64 - (SUPER_BLOCK_SIZE / BLOCK_SIZE).leading_zeros() - 1
// );
//
// unsafe {
// let bit_nums = _mm256_set_epi16(
// (15 * BLOCK_SIZE) as i16,
// (14 * BLOCK_SIZE) as i16,
// (13 * BLOCK_SIZE) as i16,
// (12 * BLOCK_SIZE) as i16,
// (11 * BLOCK_SIZE) as i16,
// (10 * BLOCK_SIZE) as i16,
// (9 * BLOCK_SIZE) as i16,
// (8 * BLOCK_SIZE) as i16,
// (7 * BLOCK_SIZE) as i16,
// (6 * BLOCK_SIZE) as i16,
// (5 * BLOCK_SIZE) as i16,
// (4 * BLOCK_SIZE) as i16,
// (3 * BLOCK_SIZE) as i16,
// (2 * BLOCK_SIZE) as i16,
// (1 * BLOCK_SIZE) as i16,
// (0 * BLOCK_SIZE) as i16,
// );
//
// let blocks = _mm256_loadu_epi16(self.blocks[*block_index..].as_ptr() as *const i16);
// let ones = _mm256_sub_epi16(bit_nums, blocks);
//
// let ranks = _mm256_set1_epi16(rank as i16);
// let mask = _mm256_cmpgt_epu16_mask(ones, ranks);
//
// debug_assert!(
// mask.count_zeros() > 0,
// "first block should always be zero, but still claims to be greater than rank"
// );
// *block_index += mask.count_zeros() as usize - 1;
// }
// } else {
// self.search_block1_naive(rank, block_at_super_block, block_index)
// }
// }

#[cfg(all(feature = "simd",))]
pub(super) fn search_block1_portable(
&self,
rank: usize,
block_at_super_block: usize,
_block_at_super_block: usize,
block_index: &mut usize,
) {
use std::arch::x86_64::{
_mm256_cmpgt_epu16_mask, _mm256_loadu_epi16, _mm256_set1_epi16, _mm256_set_epi16,
_mm256_sub_epi16,
};
use std::simd::cmp::SimdPartialOrd;
use std::simd::u16x16;

if self.blocks.len() > *block_index + BLOCKS_PER_SUPERBLOCK {
debug_assert!(
SUPER_BLOCK_SIZE / BLOCK_SIZE == BLOCKS_PER_SUPERBLOCK,
"change unroll constant to {}",
64 - (SUPER_BLOCK_SIZE / BLOCK_SIZE).leading_zeros() - 1
);
debug_assert!(
SUPER_BLOCK_SIZE / BLOCK_SIZE == BLOCKS_PER_SUPERBLOCK,
"change unroll constant to {}",
64 - (SUPER_BLOCK_SIZE / BLOCK_SIZE).leading_zeros() - 1
);

unsafe {
let bit_nums = _mm256_set_epi16(
(15 * BLOCK_SIZE) as i16,
(14 * BLOCK_SIZE) as i16,
(13 * BLOCK_SIZE) as i16,
(12 * BLOCK_SIZE) as i16,
(11 * BLOCK_SIZE) as i16,
(10 * BLOCK_SIZE) as i16,
(9 * BLOCK_SIZE) as i16,
(8 * BLOCK_SIZE) as i16,
(7 * BLOCK_SIZE) as i16,
(6 * BLOCK_SIZE) as i16,
(5 * BLOCK_SIZE) as i16,
(4 * BLOCK_SIZE) as i16,
(3 * BLOCK_SIZE) as i16,
(2 * BLOCK_SIZE) as i16,
(1 * BLOCK_SIZE) as i16,
(0 * BLOCK_SIZE) as i16,
);
let bit_nums = u16x16::from([
(0 * BLOCK_SIZE) as u16,
(1 * BLOCK_SIZE) as u16,
(2 * BLOCK_SIZE) as u16,
(3 * BLOCK_SIZE) as u16,
(4 * BLOCK_SIZE) as u16,
(5 * BLOCK_SIZE) as u16,
(6 * BLOCK_SIZE) as u16,
(7 * BLOCK_SIZE) as u16,
(8 * BLOCK_SIZE) as u16,
(9 * BLOCK_SIZE) as u16,
(10 * BLOCK_SIZE) as u16,
(11 * BLOCK_SIZE) as u16,
(12 * BLOCK_SIZE) as u16,
(13 * BLOCK_SIZE) as u16,
(14 * BLOCK_SIZE) as u16,
(15 * BLOCK_SIZE) as u16,
]);

let sentinel = u16x16::default();

let slice: &[u16] = unsafe {
from_raw_parts(
self.blocks[*block_index..].as_ptr() as *const u16,
self.blocks[*block_index..].len(),
)
};
let blocks = u16x16::load_or(slice, sentinel);
let ones = bit_nums - blocks;

let blocks = _mm256_loadu_epi16(self.blocks[*block_index..].as_ptr() as *const i16);
let ones = _mm256_sub_epi16(bit_nums, blocks);
let ranks = u16x16::splat(rank as u16);
let mask = ones.simd_gt(ranks);

let ranks = _mm256_set1_epi16(rank as i16);
let mask = _mm256_cmpgt_epu16_mask(ones, ranks);
// calculate the number of blocks where the number of ones does not exceed rank
// (subtract 48 unused bits because the bitmask has 64 bits)
let num = mask.to_bitmask().count_zeros() - 48;

debug_assert!(
mask.count_zeros() > 0,
"first block should always be zero, but still claims to be greater than rank"
);
*block_index += mask.count_zeros() as usize - 1;
}
} else {
self.search_block1_naive(rank, block_at_super_block, block_index)
}
debug_assert!(
num > 0,
"first block should always be zero, but still claims to be greater than rank"
);
*block_index += num as usize - 1;
}

/// Search for the block in a superblock that contains the rank. This function is only used
/// internally and is not part of the public API.
/// It compares blocks in a loop-unrolled binary search to find the block that contains the rank.
#[cfg(not(all(
feature = "simd",
target_arch = "x86_64",
target_feature = "avx",
target_feature = "avx2",
target_feature = "avx512vl",
target_feature = "avx512bw",
)))]
// #[cfg(not(all(
// feature = "simd",
// target_arch = "x86_64",
// target_feature = "avx",
// target_feature = "avx2",
// target_feature = "avx512vl",
// target_feature = "avx512bw",
// )))]
#[inline(always)]
pub(super) fn search_block1(
&self,
rank: usize,
block_at_super_block: usize,
block_index: &mut usize,
) {
self.search_block1_naive(rank, block_at_super_block, block_index);
self.search_block1_portable(rank, block_at_super_block, block_index);
}

#[inline(always)]
Expand Down
2 changes: 1 addition & 1 deletion src/bit_vec/mask.rs
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ where
.take(self.vec.len / WORD_SIZE)
.map(|limb| u64::from(limb.count_ones()))
.sum();
if self.vec.len % WORD_SIZE > 0 {
if !self.vec.len.is_multiple_of(WORD_SIZE) {
ones += u64::from(
((self.bin_op)(
*self.vec.data.last().unwrap(),
Expand Down
22 changes: 14 additions & 8 deletions src/bit_vec/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ impl BitVec {
#[must_use]
pub fn from_zeros(len: usize) -> Self {
let mut data = vec![0; len / WORD_SIZE];
if len % WORD_SIZE != 0 {
if !len.is_multiple_of(WORD_SIZE) {
data.push(0);
}
Self { data, len }
Expand All @@ -98,7 +98,7 @@ impl BitVec {
#[must_use]
pub fn from_ones(len: usize) -> Self {
let mut data = vec![u64::MAX; len / WORD_SIZE];
if len % WORD_SIZE != 0 {
if !len.is_multiple_of(WORD_SIZE) {
data.push((1 << (len % WORD_SIZE)) - 1);
}
Self { data, len }
Expand Down Expand Up @@ -500,7 +500,7 @@ impl BitVec {
/// [`append_bit_u8`]: BitVec::append_bit_u8
/// [`append_word`]: BitVec::append_word
pub fn append(&mut self, bit: bool) {
if self.len % WORD_SIZE == 0 {
if self.len.is_multiple_of(WORD_SIZE) {
self.data.push(0);
}
if bit {
Expand Down Expand Up @@ -574,7 +574,7 @@ impl BitVec {
/// [`append_bit_u8`]: BitVec::append_bit_u8
/// [`append_word`]: BitVec::append_word
pub fn append_bit(&mut self, bit: u64) {
if self.len % WORD_SIZE == 0 {
if self.len.is_multiple_of(WORD_SIZE) {
self.data.push(0);
}
if bit % 2 == 1 {
Expand Down Expand Up @@ -653,7 +653,7 @@ impl BitVec {
/// [`append_bit_u16`]: BitVec::append_bit_u16
/// [`append_bit_u8`]: BitVec::append_bit_u8
pub fn append_word(&mut self, word: u64) {
if self.len % WORD_SIZE == 0 {
if self.len.is_multiple_of(WORD_SIZE) {
self.data.push(word);
} else {
// zero out the unused bits before or-ing the new one, to ensure no garbage data remains
Expand Down Expand Up @@ -688,7 +688,7 @@ impl BitVec {
pub fn append_bits(&mut self, bits: u64, len: usize) {
assert!(len <= 64, "Cannot append more than 64 bits");

if self.len % WORD_SIZE == 0 {
if self.len.is_multiple_of(WORD_SIZE) {
self.data.push(bits);
} else {
// zero out the unused bits before or-ing the new one, to ensure no garbage data remains
Expand Down Expand Up @@ -725,7 +725,7 @@ impl BitVec {
/// [`append_bits`]: BitVec::append_bits
/// [`drop_last`]: BitVec::drop_last
pub fn append_bits_unchecked(&mut self, bits: u64, len: usize) {
if self.len % WORD_SIZE == 0 {
if self.len.is_multiple_of(WORD_SIZE) {
self.data.push(bits);
} else {
self.data[self.len / WORD_SIZE] |= bits << (self.len % WORD_SIZE);
Expand Down Expand Up @@ -820,6 +820,8 @@ impl BitVec {
/// assert_eq!(bv.get(1), Some(0));
/// assert_eq!(bv.get(2), Some(1));
/// ```
///
/// [`get_unchecked`]: Self::get_unchecked
#[must_use]
pub fn get(&self, pos: usize) -> Option<u64> {
if pos >= self.len {
Expand Down Expand Up @@ -1043,7 +1045,7 @@ impl BitVec {
.iter()
.map(|limb| u64::from(limb.count_ones()))
.sum();
if self.len % WORD_SIZE > 0 {
if !self.len.is_multiple_of(WORD_SIZE) {
ones += u64::from(
(self.data.last().unwrap() & ((1 << (self.len % WORD_SIZE)) - 1)).count_ones(),
);
Expand Down Expand Up @@ -1226,6 +1228,8 @@ impl BitVec {
/// containing the original vector.
///
/// See also: [`split_at_unchecked`]
///
/// [`split_at_unchecked`]: Self::split_at_unchecked
pub fn split_at(self, at: usize) -> Result<(Self, Self), Self> {
if at > self.len {
Err(self)
Expand All @@ -1241,6 +1245,8 @@ impl BitVec {
/// If the index is larger than the length of the vector the function will panic or run
/// out of memory.
/// Use [`split_at`] to properly handle this case.
///
/// [`split_at`]: Self::split_at
#[must_use]
pub fn split_at_unchecked(mut self, at: usize) -> (Self, Self) {
let other_len = self.len - at;
Expand Down
Loading