Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,13 @@ cpp_portable = ["cpp"]
# Optimize FastPFOR for the current CPU.
cpp_native = ["cpp"]
cpp = ["dep:cmake", "dep:cxx", "dep:cxx-build"]
rust = ["dep:thiserror", "dep:bytes", "dep:bytemuck"]
rust = ["dep:bytes"]

[dependencies]
bytemuck = { version = "1.25.0", optional = true }
bytemuck = { version = "1.25.0", features = ["min_const_generics"] }
bytes = { version = "1.11", optional = true }
cxx = { version = "1.0.194", optional = true }
thiserror = { version = "2.0.18", optional = true }
thiserror = "2.0.18"

[build-dependencies]
cmake = { version = "0.1.57", optional = true }
Expand Down
20 changes: 10 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,21 +90,21 @@ Feature selection can be overridden with the `FASTPFOR_SIMD_MODE` environment va
### Using C++ Wrapper

```rust
use fastpfor::cpp::{Codec32 as _, SimdFastPFor128Codec};
use fastpfor::{AnyLenCodec as _, cpp};

fn main() {
let mut codec = SimdFastPFor128Codec::new();
let mut codec = cpp::SimdFastPFor128Codec::new();

// Encode
let mut input = vec![1, 2, 3, 4, 5];
let mut output = vec![0; 10]; // must be large enough
let enc_slice = codec.encode32(&input, &mut output).unwrap();
let input = vec![1u32, 2, 3, 4, 5];
let mut compressed = Vec::new();
codec.encode(&input, &mut compressed).unwrap();

// Decode
let mut decoded = vec![0; 10]; // must be large enough
let dec_slice = codec.decode32(&enc_slice, &mut decoded).unwrap();
let mut decoded = Vec::new();
codec
.decode(&compressed, &mut decoded, None)
.unwrap();

assert_eq!(input, dec_slice);
assert_eq!(input, decoded);
}
```

Expand Down
27 changes: 16 additions & 11 deletions benches/bench_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ use core::ops::Range;
pub use std::io::Cursor;
use std::num::NonZeroU32;

#[cfg(feature = "cpp")]
use fastpfor::AnyLenCodec as _;
#[cfg(feature = "cpp")]
use fastpfor::cpp;
pub use fastpfor::rust::{BLOCK_SIZE_128, BLOCK_SIZE_256, DEFAULT_PAGE_SIZE, FastPFOR, Integer};
use rand::rngs::StdRng;
use rand::{RngExt as _, SeedableRng};
Expand Down Expand Up @@ -167,22 +171,24 @@ fn prepare_compressed_data(data: &[u32], block_size: NonZeroU32) -> Vec<u32> {
// ---------------------------------------------------------------------------

#[cfg(feature = "cpp")]
pub fn cpp_encode(codec: &fastpfor::cpp::FastPFor128Codec, data: &[u32]) -> Vec<u32> {
use fastpfor::cpp::Codec32 as _;
let mut out = vec![0u32; data.len() * 2 + 1024];
let new_len = codec.encode32(data, &mut out).unwrap().len();
out.truncate(new_len);
pub fn cpp_encode(codec: &mut cpp::FastPFor128Codec, data: &[u32]) -> Vec<u32> {
let mut out = Vec::new();
codec.encode(data, &mut out).unwrap();
out
}

#[cfg(feature = "cpp")]
pub fn cpp_decode(
codec: &fastpfor::cpp::FastPFor128Codec,
codec: &mut cpp::FastPFor128Codec,
compressed: &[u32],
decompressed: &mut [u32],
) -> usize {
use fastpfor::cpp::Codec32 as _;
codec.decode32(compressed, decompressed).unwrap().len()
let mut out = Vec::new();
codec
.decode(compressed, &mut out, Some(decompressed.len() as u32))
.unwrap();
decompressed.copy_from_slice(&out);
out.len()
}

// ---------------------------------------------------------------------------
Expand Down Expand Up @@ -268,10 +274,9 @@ pub struct CppDecodeFixture {
#[cfg(feature = "cpp")]
impl CppDecodeFixture {
fn new(name: &'static str, generator: DataGeneratorFn, size: usize) -> Self {
use fastpfor::cpp::FastPFor128Codec;
let data = generator(size);
let codec = FastPFor128Codec::new();
let cpp_compressed = cpp_encode(&codec, &data);
let mut codec = cpp::FastPFor128Codec::new();
let cpp_compressed = cpp_encode(&mut codec, &data);
let rust_compressed = prepare_compressed_data(&data, BLOCK_SIZE_128);
Self {
name,
Expand Down
12 changes: 6 additions & 6 deletions benches/fastpfor_benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ use bench_utils::{
};
#[cfg(feature = "cpp")]
use bench_utils::{cpp_decode, cpp_decode_fixtures, cpp_encode};
#[cfg(feature = "cpp")]
use fastpfor::cpp;

const SIZES: &[usize] = &[1024, 4096];

Expand Down Expand Up @@ -144,17 +146,15 @@ fn benchmark_compression_ratio(c: &mut Criterion) {
/// the pure-Rust `FastPFOR` codec with `BLOCK_SIZE_128`.
#[cfg(feature = "cpp")]
fn benchmark_cpp_vs_rust(c: &mut Criterion) {
use fastpfor::cpp::FastPFor128Codec;

let mut group = c.benchmark_group("cpp_vs_rust/encode");
for (size, fix) in compress_fixtures(SIZES) {
group.throughput(Throughput::Elements(size as u64));
group.bench_with_input(
BenchmarkId::new(format!("cpp/{}", fix.name), size),
&fix.data,
|b, data| {
let codec = FastPFor128Codec::new();
b.iter(|| black_box(cpp_encode(&codec, black_box(data))));
let mut codec = cpp::FastPFor128Codec::new();
b.iter(|| black_box(cpp_encode(&mut codec, black_box(data))));
},
);
group.bench_with_input(
Expand All @@ -175,9 +175,9 @@ fn benchmark_cpp_vs_rust(c: &mut Criterion) {
BenchmarkId::new(format!("cpp/{}", fix.name), size),
&fix.cpp_compressed,
|b, compressed| {
let codec = FastPFor128Codec::new();
let mut codec = cpp::FastPFor128Codec::new();
let mut out = vec![0u32; fix.original_len];
b.iter(|| black_box(cpp_decode(&codec, black_box(compressed), &mut out)));
b.iter(|| black_box(cpp_decode(&mut codec, black_box(compressed), &mut out)));
},
);
group.bench_with_input(
Expand Down
75 changes: 38 additions & 37 deletions fuzz/fuzz_targets/common.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use fastpfor::{cpp, rust};
use fastpfor::{AnyLenCodec, cpp, rust};

pub type BoxedCppCodec = Box<dyn cpp::Codec32>;
pub type BoxedCppCodec = Box<dyn AnyLenCodec>;

#[derive(arbitrary::Arbitrary)]
pub struct FuzzInput<C> {
Expand Down Expand Up @@ -77,42 +77,43 @@ pub enum CppCodec {

impl From<CppCodec> for BoxedCppCodec {
fn from(codec: CppCodec) -> Self {
use cpp::*;
match codec {
CppCodec::BP32 => Box::new(BP32Codec::default()),
CppCodec::Copy => Box::new(CopyCodec::default()),
CppCodec::FastBinaryPacking8 => Box::new(FastBinaryPacking8Codec::default()),
CppCodec::FastPFor128 => Box::new(FastPFor128Codec::default()),
CppCodec::FastPFor256 => Box::new(FastPFor256Codec::default()),
CppCodec::FastBinaryPacking16 => Box::new(FastBinaryPacking16Codec::default()),
CppCodec::FastBinaryPacking32 => Box::new(FastBinaryPacking32Codec::default()),
CppCodec::MaskedVByte => Box::new(MaskedVByteCodec::default()),
CppCodec::NewPFor => Box::new(NewPForCodec::default()),
CppCodec::OptPFor => Box::new(OptPForCodec::default()),
CppCodec::PFor2008 => Box::new(PFor2008Codec::default()),
CppCodec::PFor => Box::new(PForCodec::default()),
CppCodec::SimdBinaryPacking => Box::new(SimdBinaryPackingCodec::default()),
CppCodec::SimdFastPFor128 => Box::new(SimdFastPFor128Codec::default()),
CppCodec::SimdFastPFor256 => Box::new(SimdFastPFor256Codec::default()),
CppCodec::SimdGroupSimple => Box::new(SimdGroupSimpleCodec::default()),
CppCodec::SimdGroupSimpleRingBuf => Box::new(SimdGroupSimpleRingBufCodec::default()),
CppCodec::SimdNewPFor => Box::new(SimdNewPForCodec::default()),
CppCodec::SimdOptPFor => Box::new(SimdOptPForCodec::default()),
CppCodec::SimdPFor => Box::new(SimdPForCodec::default()),
CppCodec::SimdSimplePFor => Box::new(SimdSimplePForCodec::default()),
// CppCodec::Simple16 => Box::new(Simple16Codec::default()),
// CppCodec::Simple8b => Box::new(Simple8bCodec::default()),
// CppCodec::Simple8bRle => Box::new(Simple8bRleCodec::default()),
// CppCodec::Simple9 => Box::new(Simple9Codec::default()),
// CppCodec::Simple9Rle => Box::new(Simple9RleCodec::default()),
// CppCodec::SimplePFor => Box::new(SimplePForCodec::default()),
// CppCodec::Snappy => Box::new(SnappyCodec::default()),
CppCodec::StreamVByte => Box::new(StreamVByteCodec::default()),
CppCodec::VByte => Box::new(VByteCodec::default()),
CppCodec::VarInt => Box::new(VarIntCodec::default()),
// CppCodec::VarIntG8iu => Box::new(VarIntG8iuCodec::default()),
CppCodec::VarIntGb => Box::new(VarIntGbCodec::default()),
// CppCodec::VsEncoding => Box::new(VsEncodingCodec::default()),
CppCodec::BP32 => Box::new(cpp::BP32Codec::default()),
CppCodec::Copy => Box::new(cpp::CopyCodec::default()),
CppCodec::FastBinaryPacking8 => Box::new(cpp::FastBinaryPacking8Codec::default()),
CppCodec::FastPFor128 => Box::new(cpp::FastPFor128Codec::default()),
CppCodec::FastPFor256 => Box::new(cpp::FastPFor256Codec::default()),
CppCodec::FastBinaryPacking16 => Box::new(cpp::FastBinaryPacking16Codec::default()),
CppCodec::FastBinaryPacking32 => Box::new(cpp::FastBinaryPacking32Codec::default()),
CppCodec::MaskedVByte => Box::new(cpp::MaskedVByteCodec::default()),
CppCodec::NewPFor => Box::new(cpp::NewPForCodec::default()),
CppCodec::OptPFor => Box::new(cpp::OptPForCodec::default()),
CppCodec::PFor2008 => Box::new(cpp::PFor2008Codec::default()),
CppCodec::PFor => Box::new(cpp::PForCodec::default()),
CppCodec::SimdBinaryPacking => Box::new(cpp::SimdBinaryPackingCodec::default()),
CppCodec::SimdFastPFor128 => Box::new(cpp::SimdFastPFor128Codec::default()),
CppCodec::SimdFastPFor256 => Box::new(cpp::SimdFastPFor256Codec::default()),
CppCodec::SimdGroupSimple => Box::new(cpp::SimdGroupSimpleCodec::default()),
CppCodec::SimdGroupSimpleRingBuf => {
Box::new(cpp::SimdGroupSimpleRingBufCodec::default())
}
CppCodec::SimdNewPFor => Box::new(cpp::SimdNewPForCodec::default()),
CppCodec::SimdOptPFor => Box::new(cpp::SimdOptPForCodec::default()),
CppCodec::SimdPFor => Box::new(cpp::SimdPForCodec::default()),
CppCodec::SimdSimplePFor => Box::new(cpp::SimdSimplePForCodec::default()),
// CppCodec::Simple16 => Box::new(cpp::Simple16Codec::default()),
// CppCodec::Simple8b => Box::new(cpp::Simple8bCodec::default()),
// CppCodec::Simple8bRle => Box::new(cpp::Simple8bRleCodec::default()),
// CppCodec::Simple9 => Box::new(cpp::Simple9Codec::default()),
// CppCodec::Simple9Rle => Box::new(cpp::Simple9RleCodec::default()),
// CppCodec::SimplePFor => Box::new(cpp::SimplePForCodec::default()),
// CppCodec::Snappy => Box::new(cpp::SnappyCodec::default()),
CppCodec::StreamVByte => Box::new(cpp::StreamVByteCodec::default()),
CppCodec::VByte => Box::new(cpp::VByteCodec::default()),
CppCodec::VarInt => Box::new(cpp::VarIntCodec::default()),
// CppCodec::VarIntG8iu => Box::new(cpp::VarIntG8iuCodec::default()),
CppCodec::VarIntGb => Box::new(cpp::VarIntGbCodec::default()),
// CppCodec::VsEncoding => Box::new(cpp::VsEncodingCodec::default()),
}
}
}
Expand Down
28 changes: 13 additions & 15 deletions fuzz/fuzz_targets/cpp_roundtrip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,26 @@ mod common;
use common::*;

fuzz_target!(|data: FuzzInput<CppCodec>| {
let codec = BoxedCppCodec::from(data.codec);
let mut codec = BoxedCppCodec::from(data.codec);
let input = data.data;

// Allocate output buffer with generous size
let mut output = vec![0u32; input.len() * 2 + 1024];
let mut compressed = Vec::new();
codec.encode(&input, &mut compressed).unwrap();

// Compress the data
let enc_slice = codec.encode32(&input, &mut output).unwrap();

// Now decompress
let mut decoded = vec![0u32; input.len() * 2 + 1024];
let dec_slice = codec.decode32(enc_slice, &mut decoded).unwrap();
let mut decoded = Vec::new();
codec
.decode(&compressed, &mut decoded, None)
.expect("decode");

// Verify roundtrip
if dec_slice.len() + input.len() < 200 {
assert_eq!(input, dec_slice, "Decompressed output mismatches");
if decoded.len() + input.len() < 200 {
assert_eq!(input, decoded.as_slice(), "Decompressed output mismatches");
} else {
assert_eq!(dec_slice.len(), input.len(), "Decompressed length mismatch");
for (i, (&original, &decoded)) in input.iter().zip(dec_slice.iter()).enumerate() {
assert_eq!(decoded.len(), input.len(), "Decompressed length mismatch");
for (i, (&original, &out)) in input.iter().zip(decoded.iter()).enumerate() {
assert_eq!(
original, decoded,
"Mismatch at position {i}: expected {original}, got {decoded}"
original, out,
"Mismatch at position {i}: expected {original}, got {out}"
);
}
}
Expand Down
49 changes: 19 additions & 30 deletions fuzz/fuzz_targets/rust_compress_oracle.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#![no_main]

use fastpfor::{CodecToSlice, cpp, rust};
use fastpfor::{AnyLenCodec, CodecToSlice, cpp, rust};
use libfuzzer_sys::fuzz_target;
mod common;
use common::*;
Expand Down Expand Up @@ -28,43 +28,32 @@ fuzz_target!(|data: FuzzInput<RustCodec>| {
let last_block_size_multiple = input.len() / block_size * block_size;
let input = &input[..last_block_size_multiple];

// Allocate output buffers with generous size
// Allocate output buffer for Rust (slice API)
let mut rust_compressed = vec![0u32; input.len() * 2 + 1024];
let mut cpp_compressed = vec![0u32; input.len() * 2 + 1024];

// Compress with Rust implementation using Codec wrapper
let mut rust_codec = rust::Codec::from(data.codec);
let rust_result = rust_codec
.compress_to_slice(input, &mut rust_compressed)
.expect("Rust compression failed");

// Compress with C++ implementation
let compressed_oracle_from_cpp = match data.codec {
RustCodec::FastPFOR256 => {
let mut cpp_codec = cpp::FastPFor256Codec::new();
cpp_codec
.compress_to_slice(input, &mut cpp_compressed)
.expect("C++ compression failed")
}
RustCodec::FastPFOR128 => {
let mut cpp_codec = cpp::FastPFor128Codec::new();
cpp_codec
.compress_to_slice(input, &mut cpp_compressed)
.expect("C++ compression failed")
}
RustCodec::VariableByte => {
let mut cpp_codec = cpp::MaskedVByteCodec::new();
cpp_codec
.compress_to_slice(input, &mut cpp_compressed)
.expect("C++ compression failed")
}
RustCodec::JustCopy => {
let mut cpp_codec = cpp::CopyCodec::new();
cpp_codec
.compress_to_slice(input, &mut cpp_compressed)
.expect("C++ compression failed")
}
};
// Compress with C++ implementation (`AnyLenCodec` / Vec API)
let mut cpp_compressed = Vec::new();
match data.codec {
RustCodec::FastPFOR256 => cpp::FastPFor256Codec::new()
.encode(input, &mut cpp_compressed)
.expect("C++ compression failed"),
RustCodec::FastPFOR128 => cpp::FastPFor128Codec::new()
.encode(input, &mut cpp_compressed)
.expect("C++ compression failed"),
RustCodec::VariableByte => cpp::MaskedVByteCodec::new()
.encode(input, &mut cpp_compressed)
.expect("C++ compression failed"),
RustCodec::JustCopy => cpp::CopyCodec::new()
.encode(input, &mut cpp_compressed)
.expect("C++ compression failed"),
}
let compressed_oracle_from_cpp = cpp_compressed.as_slice();

// Compare compressed outputs
assert_eq!(
Expand Down
Loading
Loading