Skip to content
46 changes: 24 additions & 22 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -204,28 +204,30 @@ jobs:
# env:
# CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}

# miri:
# needs: basics
# name: miri-test
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v4
# with:
# lfs: true
miri:
needs: basics
name: miri-test
runs-on: ubuntu-latest
continue-on-error: true
steps:
- uses: actions/checkout@v4
with:
lfs: true

# - name: Install Rust nightly with miri
# uses: dtolnay/rust-toolchain@stable
# with:
# toolchain: nightly
# components: miri
- name: Install Rust nightly with miri
uses: dtolnay/rust-toolchain@stable
with:
toolchain: nightly
components: miri

# - name: Install cargo-nextest
# uses: taiki-e/install-action@v2
# with:
# tool: cargo-nextest
- name: Install cargo-nextest
uses: taiki-e/install-action@v2
with:
tool: cargo-nextest

# - uses: Swatinem/rust-cache@v2
# - name: miri
# run: cargo +nightly miri nextest run --package diskann-quantization
# env:
# MIRIFLAGS: -Zmiri-disable-isolation -Zmiri-strict-provenance
- uses: Swatinem/rust-cache@v2

- name: miri
run: cargo +nightly miri nextest run --package diskann-quantization
env:
MIRIFLAGS: -Zmiri-disable-isolation -Zmiri-strict-provenance
14 changes: 9 additions & 5 deletions diskann-quantization/src/algorithms/heap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -390,11 +390,15 @@ mod tests {
// Heap of size 2.
fuzz_test_impl(2, 101, &mut rng);

// Heap size not power of two.
fuzz_test_impl(1000, 1000, &mut rng);

// Heap size power of two.
fuzz_test_impl(128, 1000, &mut rng);
// Miri is extremely slow, so we skip the larger tests there.
#[cfg(not(miri))]
{
// Heap size not power of two.
fuzz_test_impl(1000, 1000, &mut rng);

// Heap size power of two.
fuzz_test_impl(128, 1000, &mut rng);
}
}

#[test]
Expand Down
16 changes: 16 additions & 0 deletions diskann-quantization/src/algorithms/kmeans/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -565,19 +565,35 @@ mod tests {

#[test]
fn test_block_transpose_16() {
#[cfg(not(miri))]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can these bounds be set up with something like

let row_range = if cfg!(miri) {
    127..128
} else {
    0..128
};

and such to avoid the repitition?

for nrows in 0..128 {
for ncols in 0..5 {
test_block_transpose::<16>(nrows, ncols);
}
}

#[cfg(miri)]
for nrows in 127..128 {
for ncols in 4..5 {
test_block_transpose::<16>(nrows, ncols);
}
}
}

#[test]
fn test_block_transpose_8() {
#[cfg(not(miri))]
for nrows in 0..128 {
for ncols in 0..5 {
test_block_transpose::<8>(nrows, ncols);
}
}

#[cfg(miri)]
for nrows in 127..128 {
for ncols in 4..5 {
test_block_transpose::<8>(nrows, ncols);
}
}
}
}
30 changes: 30 additions & 0 deletions diskann-quantization/src/algorithms/kmeans/lloyds.rs
Original file line number Diff line number Diff line change
Expand Up @@ -567,13 +567,23 @@ mod tests {
#[test]
fn test_distances_in_place() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can probably skip this test entirely when running under miri. There is a dedicated Miri test just after this that validates the indexing.

let mut rng = StdRng::seed_from_u64(0xece88a9c6cd86a8a);
#[cfg(not(miri))]
for ndata in 1..=31 {
for ncenters in 1..=5 {
for dim in 1..=4 {
test_distances_in_place_impl(ndata, ncenters, dim, TRIALS, &mut rng);
}
}
}

#[cfg(miri)]
for ndata in 31..=31 {
for ncenters in 5..=5 {
for dim in 4..=4 {
test_distances_in_place_impl(ndata, ncenters, dim, TRIALS, &mut rng);
}
}
}
}

// We do not perform any value-dependent control-flow for memory accesses.
Expand Down Expand Up @@ -605,13 +615,23 @@ mod tests {
//
// Similarly, we need to ensure we have both an even and odd number of centers,
// so bound this up to 5.
#[cfg(not(miri))]
for ndata in 1..=35 {
for ncenters in 1..=5 {
for dim in 1..=4 {
test_miri_distances_in_place_impl(ndata, ncenters, dim);
}
}
}

#[cfg(miri)]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This one we cannot short circuit with Miri. The distances_in_place algorithm needs to be tested for a range of sizes to validate memory accesses.

for ndata in 34..=35 {
for ncenters in 4..=5 {
for dim in 3..=4 {
test_miri_distances_in_place_impl(ndata, ncenters, dim);
}
}
}
}

// End-to-end test.
Expand Down Expand Up @@ -719,13 +739,23 @@ mod tests {
#[test]
fn end_to_end_test() {
let mut rng = StdRng::seed_from_u64(0xff22c38d0f0531bf);
#[cfg(not(miri))]
let setup = EndToEndSetup {
ncenters: 11,
ndim: 4,
data_per_center: 8,
step_between_clusters: 20,
ntrials: 10,
};

#[cfg(miri)]
let setup = EndToEndSetup {
ncenters: 3,
ndim: 4,
data_per_center: 2,
step_between_clusters: 20,
ntrials: 2,
};
end_to_end_test_impl(&setup, &mut rng);
}

Expand Down
7 changes: 7 additions & 0 deletions diskann-quantization/src/algorithms/kmeans/plusplus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -683,6 +683,11 @@ mod tests {
fn test_update_distances() {
let mut rng = StdRng::seed_from_u64(0x56c94b53c73e4fd9);
for num_points in 0..48 {
#[cfg(miri)]
if num_points % 7 != 0 {
continue;
}

for dim in 1..4 {
test_update_distances_impl(num_points, dim, &mut rng);
}
Expand All @@ -695,6 +700,7 @@ mod tests {

// Kmeans++ sanity checks - if there are only `N` distinct and we want `N` centers,
// then all `N` should be selected without repeats.
#[cfg(not(miri))]
fn sanity_check_impl<R: Rng>(ncenters: usize, dim: usize, rng: &mut R) {
let repeats_per_center = 3;
let context = lazy_format!(
Expand Down Expand Up @@ -756,6 +762,7 @@ mod tests {

// This test is like the sanity check - but instead of exact repeats, we use slightly
// perturbed values to test that the proportionality is of distances is respected.
#[cfg(not(miri))]
fn fuzzy_sanity_check_impl<R: Rng>(ncenters: usize, dim: usize, rng: &mut R) {
let repeats_per_center = 3;

Expand Down
17 changes: 15 additions & 2 deletions diskann-quantization/src/algorithms/transforms/double_hadamard.rs
Original file line number Diff line number Diff line change
Expand Up @@ -417,13 +417,17 @@ mod tests {
//
// Subsampling results in poor preservation of inner products, so we skip it
// altogether.
#[cfg(not(miri))]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd be tempted to say that we can skip all of test_double_hadamard when running under Miri. It doesn't execute any unsafe code on its own.

let subsampled_errors = test_utils::ErrorSetup {
norm: test_utils::Check::absrel(0.0, 2e-2),
l2: test_utils::Check::absrel(0.0, 2e-2),
ip: test_utils::Check::skip(),
};

let target_dim = |v| TargetDim::Override(NonZeroUsize::new(v).unwrap());

// Miri is extremely slow, so we skip the larger tests there.
#[cfg(not(miri))]
let dim_combos = [
// Natural
(15, 15, true, TargetDim::Same, &natural_errors),
Expand All @@ -441,9 +445,18 @@ mod tests {
(1024, 1023, false, target_dim(1023), &subsampled_errors),
(1000, 999, false, target_dim(999), &subsampled_errors),
];
#[cfg(miri)]
let dim_combos = [(15, 15, true, target_dim(15), &natural_errors)];

let trials_per_combo = 20;
let trials_per_dim = 100;
cfg_if::cfg_if! {
if #[cfg(miri)] {
let trials_per_combo = 1;
let trials_per_dim = 1;
} else {
let trials_per_combo = 20;
let trials_per_dim = 100;
}
}

let mut rng = StdRng::seed_from_u64(0x6d1699abe066147);
for (input, output, preserves_norms, target, errors) in dim_combos {
Expand Down
16 changes: 14 additions & 2 deletions diskann-quantization/src/algorithms/transforms/padding_hadamard.rs
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,7 @@ mod tests {
//
// Subsampling results in poor preservation of inner products, so we skip it
// altogether.
#[cfg(not(miri))]
let subsampled_errors = test_utils::ErrorSetup {
norm: test_utils::Check::absrel(0.0, 1e-1),
l2: test_utils::Check::absrel(0.0, 1e-1),
Expand All @@ -471,6 +472,8 @@ mod tests {

let target_dim = |v| TargetDim::Override(NonZeroUsize::new(v).unwrap());

// Miri is extremely slow, so we skip the larger tests there.
#[cfg(not(miri))]
let dim_combos = [
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As with test_double_hadamard, I'm tempted to say we can skip test_padding_hadamard altogether under Miri.

// Natural
(15, 16, true, target_dim(16), &natural_errors),
Expand All @@ -486,9 +489,18 @@ mod tests {
(1000, 1000, false, TargetDim::Same, &subsampled_errors),
(500, 1000, false, target_dim(1000), &subsampled_errors),
];
#[cfg(miri)]
let dim_combos = [(15, 16, true, target_dim(16), &natural_errors)];

let trials_per_combo = 20;
let trials_per_dim = 100;
cfg_if::cfg_if! {
if #[cfg(miri)] {
let trials_per_combo = 1;
let trials_per_dim = 1;
} else {
let trials_per_combo = 20;
let trials_per_dim = 100;
}
}

let mut rng = StdRng::seed_from_u64(0x6d1699abe0626147);
for (input, output, preserves_norms, target, errors) in dim_combos {
Expand Down
8 changes: 7 additions & 1 deletion diskann-quantization/src/algorithms/transforms/test_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,11 @@ fn within_ulp(mut got: f32, expected: f32, ulp: usize) -> bool {
#[derive(Debug, Clone, Copy)]
pub(super) enum Check {
Ulp(usize),
AbsRel { abs: f32, rel: f32 },
AbsRel {
abs: f32,
rel: f32,
},
#[cfg(not(miri))]
Skip,
}

Expand All @@ -185,6 +189,7 @@ impl Check {
Self::AbsRel { abs, rel }
}

#[cfg(not(miri))]
pub(super) fn skip() -> Self {
Self::Skip
}
Expand Down Expand Up @@ -219,6 +224,7 @@ impl Check {
})
}
}
#[cfg(not(miri))]
Self::Skip => Ok(()),
}
}
Expand Down
8 changes: 7 additions & 1 deletion diskann-quantization/src/bits/distances.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2003,6 +2003,12 @@ mod tests {
let dist = Uniform::new_inclusive(min, max).unwrap();

for dim in 0..dim_max {
// Only run the maximum dimension when running under miri.
#[cfg(miri)]
if dim != dim_max - 1 {
continue;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to be more careful about skipping values here. Unrolling in the distance kernel implementations means that there are a lot of corner cases that need checking which are not necessarily covered by a single test.

An alternative is to have an audit and supply a list of specific dimensions to test.


let mut x_reference: Vec<u8> = vec![0; dim];
let mut y_reference: Vec<u8> = vec![0; dim];

Expand Down Expand Up @@ -2092,7 +2098,7 @@ mod tests {

cfg_if::cfg_if! {
if #[cfg(miri)] {
const MAX_DIM: usize = 128;
const MAX_DIM: usize = 8;
const TRIALS_PER_DIM: usize = 1;
} else {
const MAX_DIM: usize = 256;
Expand Down
10 changes: 10 additions & 0 deletions diskann-quantization/src/bits/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1512,6 +1512,11 @@ mod tests {
fn test_binary_dense() {
let mut rng = StdRng::seed_from_u64(0xb3c95e8e19d3842e);
for len in 0..MAX_DIM {
#[cfg(miri)]
if len != MAX_DIM - 1 {
continue;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The tests in slice all need to run under Miri unfortunately.


test_send_and_sync::<1, Binary, Dense>();
test_empty::<1, Binary, Dense>();
test_construction_errors::<1, Binary, Dense>();
Expand Down Expand Up @@ -1558,6 +1563,11 @@ mod tests {
fn test_4bit_bit_transpose() {
let mut rng = StdRng::seed_from_u64(0xb3c95e8e19d3842e);
for len in 0..MAX_DIM {
#[cfg(miri)]
if len != MAX_DIM - 1 {
continue;
}

test_send_and_sync::<4, Unsigned, BitTranspose>();
test_empty::<4, Unsigned, BitTranspose>();
test_construction_errors::<4, Unsigned, BitTranspose>();
Expand Down
5 changes: 5 additions & 0 deletions diskann-quantization/src/minmax/quantizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,11 @@ mod minmax_quantizer_tests {
let scales = [1.0, 1.1, 0.9];
for (s, e) in scales.iter().zip($err) {
for d in 10..$dim {
#[cfg(miri)]
if d != $dim - 1 {
continue;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can probably skip the min-max quantizer tests under Miri.


for _ in 0..TRIALS {
test_quantizer_encoding_random::<$nbits>(d, &mut rng, e, *s);
}
Expand Down
10 changes: 8 additions & 2 deletions diskann-quantization/src/minmax/vectors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -752,15 +752,21 @@ mod minmax_vector_tests {
#[test]
fn $name() {
let mut rng = StdRng::seed_from_u64($seed);
for dim in 1..(bit_scale::<$nbits>() as usize) {
const MAX_DIM: usize = (bit_scale::<$nbits>() as usize);
for dim in 1..=MAX_DIM {
#[cfg(miri)]
if dim != MAX_DIM {
continue;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can almost skip the MinMax entirely under Miri because it does not run unsafe outside of decompression. Since the indexing used in the decompression is checked in debug builds - it might be fine to just skip.


for _ in 0..TRIALS {
test_minmax_compensated_vectors::<$nbits, _>(dim, &mut rng);
}
}
}
};
}
test_minmax_compensated!(unsigned_minmax_compensated_test_u1, 1, 0xa32d5658097a1c35);
test_minmax_compensated!(unsigned_minmax_compensated_test_u1, 1, 0xa33d5658097a1c35);
test_minmax_compensated!(unsigned_minmax_compensated_test_u2, 2, 0xaedf3d2a223b7b77);
test_minmax_compensated!(unsigned_minmax_compensated_test_u4, 4, 0xf60c0c8d1aadc126);
test_minmax_compensated!(unsigned_minmax_compensated_test_u8, 8, 0x09fa14c42a9d7d98);
Expand Down
Loading