diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ccf812649..04108b22f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -204,28 +204,30 @@ jobs: # env: # CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} - # miri: - # needs: basics - # name: miri-test - # runs-on: ubuntu-latest - # steps: - # - uses: actions/checkout@v4 - # with: - # lfs: true + miri: + needs: basics + name: miri-test + runs-on: ubuntu-latest + continue-on-error: true + steps: + - uses: actions/checkout@v4 + with: + lfs: true - # - name: Install Rust nightly with miri - # uses: dtolnay/rust-toolchain@stable - # with: - # toolchain: nightly - # components: miri + - name: Install Rust nightly with miri + uses: dtolnay/rust-toolchain@stable + with: + toolchain: nightly + components: miri - # - name: Install cargo-nextest - # uses: taiki-e/install-action@v2 - # with: - # tool: cargo-nextest + - name: Install cargo-nextest + uses: taiki-e/install-action@v2 + with: + tool: cargo-nextest - # - uses: Swatinem/rust-cache@v2 - # - name: miri - # run: cargo +nightly miri nextest run --package diskann-quantization - # env: - # MIRIFLAGS: -Zmiri-disable-isolation -Zmiri-strict-provenance + - uses: Swatinem/rust-cache@v2 + + - name: miri + run: cargo +nightly miri nextest run --package diskann-quantization + env: + MIRIFLAGS: -Zmiri-disable-isolation -Zmiri-strict-provenance diff --git a/diskann-quantization/src/algorithms/heap.rs b/diskann-quantization/src/algorithms/heap.rs index b3d1093fe..3943cdd30 100644 --- a/diskann-quantization/src/algorithms/heap.rs +++ b/diskann-quantization/src/algorithms/heap.rs @@ -390,11 +390,15 @@ mod tests { // Heap of size 2. fuzz_test_impl(2, 101, &mut rng); - // Heap size not power of two. - fuzz_test_impl(1000, 1000, &mut rng); - - // Heap size power of two. - fuzz_test_impl(128, 1000, &mut rng); + // Miri is extremely slow, so we skip the larger tests there. + #[cfg(not(miri))] + { + // Heap size not power of two. + fuzz_test_impl(1000, 1000, &mut rng); + + // Heap size power of two. + fuzz_test_impl(128, 1000, &mut rng); + } } #[test] diff --git a/diskann-quantization/src/algorithms/kmeans/common.rs b/diskann-quantization/src/algorithms/kmeans/common.rs index 3c095de0d..4808b6fa5 100644 --- a/diskann-quantization/src/algorithms/kmeans/common.rs +++ b/diskann-quantization/src/algorithms/kmeans/common.rs @@ -565,19 +565,35 @@ mod tests { #[test] fn test_block_transpose_16() { + #[cfg(not(miri))] for nrows in 0..128 { for ncols in 0..5 { test_block_transpose::<16>(nrows, ncols); } } + + #[cfg(miri)] + for nrows in 127..128 { + for ncols in 4..5 { + test_block_transpose::<16>(nrows, ncols); + } + } } #[test] fn test_block_transpose_8() { + #[cfg(not(miri))] for nrows in 0..128 { for ncols in 0..5 { test_block_transpose::<8>(nrows, ncols); } } + + #[cfg(miri)] + for nrows in 127..128 { + for ncols in 4..5 { + test_block_transpose::<8>(nrows, ncols); + } + } } } diff --git a/diskann-quantization/src/algorithms/kmeans/lloyds.rs b/diskann-quantization/src/algorithms/kmeans/lloyds.rs index 5c99d3d4e..56c641b0e 100644 --- a/diskann-quantization/src/algorithms/kmeans/lloyds.rs +++ b/diskann-quantization/src/algorithms/kmeans/lloyds.rs @@ -567,6 +567,7 @@ mod tests { #[test] fn test_distances_in_place() { let mut rng = StdRng::seed_from_u64(0xece88a9c6cd86a8a); + #[cfg(not(miri))] for ndata in 1..=31 { for ncenters in 1..=5 { for dim in 1..=4 { @@ -574,6 +575,15 @@ mod tests { } } } + + #[cfg(miri)] + for ndata in 31..=31 { + for ncenters in 5..=5 { + for dim in 4..=4 { + test_distances_in_place_impl(ndata, ncenters, dim, TRIALS, &mut rng); + } + } + } } // We do not perform any value-dependent control-flow for memory accesses. @@ -605,6 +615,7 @@ mod tests { // // Similarly, we need to ensure we have both an even and odd number of centers, // so bound this up to 5. + #[cfg(not(miri))] for ndata in 1..=35 { for ncenters in 1..=5 { for dim in 1..=4 { @@ -612,6 +623,15 @@ mod tests { } } } + + #[cfg(miri)] + for ndata in 34..=35 { + for ncenters in 4..=5 { + for dim in 3..=4 { + test_miri_distances_in_place_impl(ndata, ncenters, dim); + } + } + } } // End-to-end test. @@ -719,6 +739,7 @@ mod tests { #[test] fn end_to_end_test() { let mut rng = StdRng::seed_from_u64(0xff22c38d0f0531bf); + #[cfg(not(miri))] let setup = EndToEndSetup { ncenters: 11, ndim: 4, @@ -726,6 +747,15 @@ mod tests { step_between_clusters: 20, ntrials: 10, }; + + #[cfg(miri)] + let setup = EndToEndSetup { + ncenters: 3, + ndim: 4, + data_per_center: 2, + step_between_clusters: 20, + ntrials: 2, + }; end_to_end_test_impl(&setup, &mut rng); } diff --git a/diskann-quantization/src/algorithms/kmeans/plusplus.rs b/diskann-quantization/src/algorithms/kmeans/plusplus.rs index e34945ad4..efc2ab544 100644 --- a/diskann-quantization/src/algorithms/kmeans/plusplus.rs +++ b/diskann-quantization/src/algorithms/kmeans/plusplus.rs @@ -683,6 +683,11 @@ mod tests { fn test_update_distances() { let mut rng = StdRng::seed_from_u64(0x56c94b53c73e4fd9); for num_points in 0..48 { + #[cfg(miri)] + if num_points % 7 != 0 { + continue; + } + for dim in 1..4 { test_update_distances_impl(num_points, dim, &mut rng); } @@ -695,6 +700,7 @@ mod tests { // Kmeans++ sanity checks - if there are only `N` distinct and we want `N` centers, // then all `N` should be selected without repeats. + #[cfg(not(miri))] fn sanity_check_impl(ncenters: usize, dim: usize, rng: &mut R) { let repeats_per_center = 3; let context = lazy_format!( @@ -756,6 +762,7 @@ mod tests { // This test is like the sanity check - but instead of exact repeats, we use slightly // perturbed values to test that the proportionality is of distances is respected. + #[cfg(not(miri))] fn fuzzy_sanity_check_impl(ncenters: usize, dim: usize, rng: &mut R) { let repeats_per_center = 3; diff --git a/diskann-quantization/src/algorithms/transforms/double_hadamard.rs b/diskann-quantization/src/algorithms/transforms/double_hadamard.rs index 33ea3fbb8..20fdf42db 100644 --- a/diskann-quantization/src/algorithms/transforms/double_hadamard.rs +++ b/diskann-quantization/src/algorithms/transforms/double_hadamard.rs @@ -417,6 +417,7 @@ mod tests { // // Subsampling results in poor preservation of inner products, so we skip it // altogether. + #[cfg(not(miri))] let subsampled_errors = test_utils::ErrorSetup { norm: test_utils::Check::absrel(0.0, 2e-2), l2: test_utils::Check::absrel(0.0, 2e-2), @@ -424,6 +425,9 @@ mod tests { }; let target_dim = |v| TargetDim::Override(NonZeroUsize::new(v).unwrap()); + + // Miri is extremely slow, so we skip the larger tests there. + #[cfg(not(miri))] let dim_combos = [ // Natural (15, 15, true, TargetDim::Same, &natural_errors), @@ -441,9 +445,18 @@ mod tests { (1024, 1023, false, target_dim(1023), &subsampled_errors), (1000, 999, false, target_dim(999), &subsampled_errors), ]; + #[cfg(miri)] + let dim_combos = [(15, 15, true, target_dim(15), &natural_errors)]; - let trials_per_combo = 20; - let trials_per_dim = 100; + cfg_if::cfg_if! { + if #[cfg(miri)] { + let trials_per_combo = 1; + let trials_per_dim = 1; + } else { + let trials_per_combo = 20; + let trials_per_dim = 100; + } + } let mut rng = StdRng::seed_from_u64(0x6d1699abe066147); for (input, output, preserves_norms, target, errors) in dim_combos { diff --git a/diskann-quantization/src/algorithms/transforms/padding_hadamard.rs b/diskann-quantization/src/algorithms/transforms/padding_hadamard.rs index ed6d3b27f..656a1b582 100644 --- a/diskann-quantization/src/algorithms/transforms/padding_hadamard.rs +++ b/diskann-quantization/src/algorithms/transforms/padding_hadamard.rs @@ -463,6 +463,7 @@ mod tests { // // Subsampling results in poor preservation of inner products, so we skip it // altogether. + #[cfg(not(miri))] let subsampled_errors = test_utils::ErrorSetup { norm: test_utils::Check::absrel(0.0, 1e-1), l2: test_utils::Check::absrel(0.0, 1e-1), @@ -471,6 +472,8 @@ mod tests { let target_dim = |v| TargetDim::Override(NonZeroUsize::new(v).unwrap()); + // Miri is extremely slow, so we skip the larger tests there. + #[cfg(not(miri))] let dim_combos = [ // Natural (15, 16, true, target_dim(16), &natural_errors), @@ -486,9 +489,18 @@ mod tests { (1000, 1000, false, TargetDim::Same, &subsampled_errors), (500, 1000, false, target_dim(1000), &subsampled_errors), ]; + #[cfg(miri)] + let dim_combos = [(15, 16, true, target_dim(16), &natural_errors)]; - let trials_per_combo = 20; - let trials_per_dim = 100; + cfg_if::cfg_if! { + if #[cfg(miri)] { + let trials_per_combo = 1; + let trials_per_dim = 1; + } else { + let trials_per_combo = 20; + let trials_per_dim = 100; + } + } let mut rng = StdRng::seed_from_u64(0x6d1699abe0626147); for (input, output, preserves_norms, target, errors) in dim_combos { diff --git a/diskann-quantization/src/algorithms/transforms/test_utils.rs b/diskann-quantization/src/algorithms/transforms/test_utils.rs index c853cc904..c38ba5ea0 100644 --- a/diskann-quantization/src/algorithms/transforms/test_utils.rs +++ b/diskann-quantization/src/algorithms/transforms/test_utils.rs @@ -172,7 +172,11 @@ fn within_ulp(mut got: f32, expected: f32, ulp: usize) -> bool { #[derive(Debug, Clone, Copy)] pub(super) enum Check { Ulp(usize), - AbsRel { abs: f32, rel: f32 }, + AbsRel { + abs: f32, + rel: f32, + }, + #[cfg(not(miri))] Skip, } @@ -185,6 +189,7 @@ impl Check { Self::AbsRel { abs, rel } } + #[cfg(not(miri))] pub(super) fn skip() -> Self { Self::Skip } @@ -219,6 +224,7 @@ impl Check { }) } } + #[cfg(not(miri))] Self::Skip => Ok(()), } } diff --git a/diskann-quantization/src/bits/distances.rs b/diskann-quantization/src/bits/distances.rs index 00ece2883..e888dccf0 100644 --- a/diskann-quantization/src/bits/distances.rs +++ b/diskann-quantization/src/bits/distances.rs @@ -2003,6 +2003,12 @@ mod tests { let dist = Uniform::new_inclusive(min, max).unwrap(); for dim in 0..dim_max { + // Only run the maximum dimension when running under miri. + #[cfg(miri)] + if dim != dim_max - 1 { + continue; + } + let mut x_reference: Vec = vec![0; dim]; let mut y_reference: Vec = vec![0; dim]; @@ -2092,7 +2098,7 @@ mod tests { cfg_if::cfg_if! { if #[cfg(miri)] { - const MAX_DIM: usize = 128; + const MAX_DIM: usize = 8; const TRIALS_PER_DIM: usize = 1; } else { const MAX_DIM: usize = 256; diff --git a/diskann-quantization/src/bits/slice.rs b/diskann-quantization/src/bits/slice.rs index fb77cefff..a08708f4c 100644 --- a/diskann-quantization/src/bits/slice.rs +++ b/diskann-quantization/src/bits/slice.rs @@ -1512,6 +1512,11 @@ mod tests { fn test_binary_dense() { let mut rng = StdRng::seed_from_u64(0xb3c95e8e19d3842e); for len in 0..MAX_DIM { + #[cfg(miri)] + if len != MAX_DIM - 1 { + continue; + } + test_send_and_sync::<1, Binary, Dense>(); test_empty::<1, Binary, Dense>(); test_construction_errors::<1, Binary, Dense>(); @@ -1558,6 +1563,11 @@ mod tests { fn test_4bit_bit_transpose() { let mut rng = StdRng::seed_from_u64(0xb3c95e8e19d3842e); for len in 0..MAX_DIM { + #[cfg(miri)] + if len != MAX_DIM - 1 { + continue; + } + test_send_and_sync::<4, Unsigned, BitTranspose>(); test_empty::<4, Unsigned, BitTranspose>(); test_construction_errors::<4, Unsigned, BitTranspose>(); diff --git a/diskann-quantization/src/minmax/quantizer.rs b/diskann-quantization/src/minmax/quantizer.rs index 13d624b0a..5e085d74e 100644 --- a/diskann-quantization/src/minmax/quantizer.rs +++ b/diskann-quantization/src/minmax/quantizer.rs @@ -475,6 +475,11 @@ mod minmax_quantizer_tests { let scales = [1.0, 1.1, 0.9]; for (s, e) in scales.iter().zip($err) { for d in 10..$dim { + #[cfg(miri)] + if d != $dim - 1 { + continue; + } + for _ in 0..TRIALS { test_quantizer_encoding_random::<$nbits>(d, &mut rng, e, *s); } diff --git a/diskann-quantization/src/minmax/vectors.rs b/diskann-quantization/src/minmax/vectors.rs index b8aac8a1c..45948e708 100644 --- a/diskann-quantization/src/minmax/vectors.rs +++ b/diskann-quantization/src/minmax/vectors.rs @@ -752,7 +752,13 @@ mod minmax_vector_tests { #[test] fn $name() { let mut rng = StdRng::seed_from_u64($seed); - for dim in 1..(bit_scale::<$nbits>() as usize) { + const MAX_DIM: usize = (bit_scale::<$nbits>() as usize); + for dim in 1..=MAX_DIM { + #[cfg(miri)] + if dim != MAX_DIM { + continue; + } + for _ in 0..TRIALS { test_minmax_compensated_vectors::<$nbits, _>(dim, &mut rng); } @@ -760,7 +766,7 @@ mod minmax_vector_tests { } }; } - test_minmax_compensated!(unsigned_minmax_compensated_test_u1, 1, 0xa32d5658097a1c35); + test_minmax_compensated!(unsigned_minmax_compensated_test_u1, 1, 0xa33d5658097a1c35); test_minmax_compensated!(unsigned_minmax_compensated_test_u2, 2, 0xaedf3d2a223b7b77); test_minmax_compensated!(unsigned_minmax_compensated_test_u4, 4, 0xf60c0c8d1aadc126); test_minmax_compensated!(unsigned_minmax_compensated_test_u8, 8, 0x09fa14c42a9d7d98); diff --git a/diskann-quantization/src/product/tables/test.rs b/diskann-quantization/src/product/tables/test.rs index 2c823c1eb..70732f65d 100644 --- a/diskann-quantization/src/product/tables/test.rs +++ b/diskann-quantization/src/product/tables/test.rs @@ -4,10 +4,13 @@ */ // A collection of test helpers to ensure uniformity across tables. -use diskann_utils::views::{Matrix, MatrixView, MutMatrixView}; +use diskann_utils::views::Matrix; +#[cfg(not(miri))] +use diskann_utils::views::{MatrixView, MutMatrixView}; +#[cfg(not(miri))] +use rand::seq::IndexedRandom; use rand::{ distr::{Distribution, Uniform}, - seq::IndexedRandom, Rng, SeedableRng, }; @@ -290,6 +293,7 @@ pub(super) fn check_pqtable_single_compression_errors( //////////////////////////////////////////////////////////////////// // A cantralized test for error handling in `CompressInto<[f32], [u8]>` +#[cfg(not(miri))] pub(super) fn check_pqtable_batch_compression_errors( build: &dyn Fn(Matrix, ChunkOffsets) -> T, context: &dyn std::fmt::Display, diff --git a/diskann-quantization/src/product/tables/transposed/pivots.rs b/diskann-quantization/src/product/tables/transposed/pivots.rs index a6d96c256..f4dcd9f36 100644 --- a/diskann-quantization/src/product/tables/transposed/pivots.rs +++ b/diskann-quantization/src/product/tables/transposed/pivots.rs @@ -1418,7 +1418,7 @@ mod tests { fn run_test_happy_path() { // Step dimensions by 1 to test all possible residual combinations. let dims: Vec = if cfg!(miri) { - (1..=8).collect() + (7..=8).collect() } else { (1..=16).collect() }; @@ -1583,12 +1583,22 @@ mod tests { #[test] fn test_process_into() { let mut rng = StdRng::seed_from_u64(0x21dfb5f35dfe5639); + + #[cfg(not(miri))] for total in 1..64 { for dim in 1..5 { println!("on ({}, {})", total, dim); test_process_into_impl(dim, total, &mut rng); } } + + #[cfg(miri)] + for total in 63..64 { + for dim in 4..5 { + println!("on ({}, {})", total, dim); + test_process_into_impl(dim, total, &mut rng); + } + } } #[test] diff --git a/diskann-quantization/src/product/tables/transposed/table.rs b/diskann-quantization/src/product/tables/transposed/table.rs index 2db1ac8a7..d20105e11 100644 --- a/diskann-quantization/src/product/tables/transposed/table.rs +++ b/diskann-quantization/src/product/tables/transposed/table.rs @@ -511,13 +511,14 @@ mod test_compression { }; use super::*; + #[cfg(not(miri))] + use crate::product::tables::test::{ + check_pqtable_batch_compression_errors, check_pqtable_single_compression_errors, + }; use crate::{ distances::{InnerProduct, SquaredL2}, error::format, - product::tables::test::{ - check_pqtable_batch_compression_errors, check_pqtable_single_compression_errors, - create_dataset, create_pivot_tables, - }, + product::tables::test::{create_dataset, create_pivot_tables}, }; use diskann_utils::lazy_format; @@ -618,7 +619,7 @@ mod test_compression { let mut rng = StdRng::seed_from_u64(0x88e3d3366501ad6c); let num_data = if cfg!(miri) { - vec![0, 8, 9, 10, 11] + vec![7, 8] } else { vec![0, 1, 2, 3, 4, 16, 17, 18, 19] }; @@ -915,11 +916,20 @@ mod test_compression { #[test] fn test_process_into() { let mut rng = StdRng::seed_from_u64(0x0e3cf3ba4b27e7f8); + + #[cfg(not(miri))] for num_chunks in 1..5 { for num_centers in 1..48 { test_process_into_impl(num_chunks, num_centers, 2, &mut rng); } } + + #[cfg(miri)] + for num_chunks in 4..5 { + for num_centers in 47..48 { + test_process_into_impl(num_chunks, num_centers, 2, &mut rng); + } + } } #[test] diff --git a/diskann-quantization/src/test_util.rs b/diskann-quantization/src/test_util.rs index a258dd12a..780471776 100644 --- a/diskann-quantization/src/test_util.rs +++ b/diskann-quantization/src/test_util.rs @@ -3,6 +3,7 @@ * Licensed under the MIT license. */ +#[cfg(not(miri))] use std::sync::{ atomic::{AtomicUsize, Ordering}, Arc, @@ -15,7 +16,9 @@ use rand::{ seq::SliceRandom, }; -use crate::alloc::{AllocatorCore, AllocatorError, GlobalAllocator}; +#[cfg(not(miri))] +use crate::alloc::GlobalAllocator; +use crate::alloc::{AllocatorCore, AllocatorError}; /// An allocator that always fails. #[derive(Debug, Clone, Copy)] @@ -36,11 +39,13 @@ unsafe impl AllocatorCore for AlwaysFails { /// An allocator that can only perform a limited number of allocations. /// /// Used to test interfaces for allocation reliability. +#[cfg(not(miri))] #[derive(Debug, Clone)] pub(crate) struct LimitedAllocator { remaining: Arc, } +#[cfg(not(miri))] impl LimitedAllocator { pub(crate) fn new(allocations: usize) -> Self { Self { @@ -49,6 +54,7 @@ impl LimitedAllocator { } } +#[cfg(not(miri))] /// SAFETY: This either forwards to the global allocator, or failed. unsafe impl AllocatorCore for LimitedAllocator { fn allocate(