diff --git a/clippy.toml b/clippy.toml index bb74a2cd9..2fbcb11b9 100644 --- a/clippy.toml +++ b/clippy.toml @@ -2,6 +2,7 @@ disallowed-methods = [ { path = "rand::thread_rng", reason = "Do not use rand::* directly. Instead, use functions from random.rs" }, # We still have `std::fs` references. # { path = "std::fs", reason = "Do not use std::fs directly. Instead, use the FileStorageProvider." }, + { path = "vfs::PhysicalFS::new", reason = "Do not use vfs::PhysicalFS in tests. Instead, use the VirtualStorageProvider::new_overlay()." }, # Disallowed methods for the rayon crate to enforce execution within a specified thread pool instead of the global thread pool. { path = "rayon::iter::ParallelIterator::for_each", reason = "Use `for_each_in_pool` from rayon_utils.rs instead to enforce execution within a specified thread pool."}, { path = "rayon::iter::ParallelIterator::for_each_with", reason = "Use `for_each_with_in_pool` instead to enforce execution within a specified thread pool."}, diff --git a/diskann-disk/src/storage/quant/pq/pq_generation.rs b/diskann-disk/src/storage/quant/pq/pq_generation.rs index 63a8ce77f..1ea3a30af 100644 --- a/diskann-disk/src/storage/quant/pq/pq_generation.rs +++ b/diskann-disk/src/storage/quant/pq/pq_generation.rs @@ -194,7 +194,7 @@ mod pq_generation_tests { use diskann_utils::views::{MatrixView, MutMatrixView}; use diskann_vector::distance::Metric; use rstest::rstest; - use vfs::{FileSystem, MemoryFS, OverlayFS, PhysicalFS}; + use vfs::{FileSystem, MemoryFS, OverlayFS}; use super::{CompressionStage, PQGeneration, PQGenerationContext}; use crate::storage::quant::compressor::QuantCompressor; @@ -361,10 +361,7 @@ mod pq_generation_tests { #[rstest] fn test_pq_end_to_end_with_codebook() { - let storage_provider = VirtualStorageProvider::new(OverlayFS::new(&[ - MemoryFS::default().into(), - PhysicalFS::new(test_data_root()).into(), - ])); + let storage_provider = VirtualStorageProvider::new_overlay(test_data_root()); let pool = create_thread_pool_for_test(); let dim = 128; @@ -415,10 +412,7 @@ mod pq_generation_tests { #[case] centers: usize, ) { //test the error cases for parameters: num_chunks > dim, num_chunks == 0, num_centers == 0 - let storage_provider = VirtualStorageProvider::new(OverlayFS::new(&[ - MemoryFS::default().into(), - PhysicalFS::new("tests/data/").into(), - ])); + let storage_provider = VirtualStorageProvider::new_overlay(test_data_root()); let pool = create_thread_pool_for_test(); let max_k_means_reps = 10; let compressor = create_new_compressor( diff --git a/diskann-disk/src/utils/aligned_file_reader/storage_provider_aligned_file_reader.rs b/diskann-disk/src/utils/aligned_file_reader/storage_provider_aligned_file_reader.rs index 1b9a33ff6..2e0b88253 100644 --- a/diskann-disk/src/utils/aligned_file_reader/storage_provider_aligned_file_reader.rs +++ b/diskann-disk/src/utils/aligned_file_reader/storage_provider_aligned_file_reader.rs @@ -48,27 +48,21 @@ impl AlignedFileReader for StorageProviderAlignedFileReader { #[cfg(test)] mod tests { - use std::{ - fs::File, - io::{Seek, SeekFrom}, - }; + use std::io::{Seek, SeekFrom}; use diskann_providers::storage::VirtualStorageProvider; use diskann_utils::test_data_root; - use vfs::PhysicalFS; use super::*; use diskann_providers::common::AlignedBoxWithSlice; fn test_index_path() -> String { - test_data_root() - .join("disk_index_misc/disk_index_siftsmall_learn_256pts_R4_L50_A1.2_aligned_reader_test.index") - .to_string_lossy() + "/disk_index_misc/disk_index_siftsmall_learn_256pts_R4_L50_A1.2_aligned_reader_test.index" .to_string() } fn setup_reader() -> StorageProviderAlignedFileReader { - let storage_provider = VirtualStorageProvider::new(PhysicalFS::new("/")); + let storage_provider = VirtualStorageProvider::new_overlay(test_data_root()); StorageProviderAlignedFileReader::new(&storage_provider, &test_index_path()).unwrap() } @@ -104,7 +98,8 @@ mod tests { assert!(result.is_ok()); // Assert that the actual data is correct. - let mut file = File::open(test_index_path()).unwrap(); + let file_system = VirtualStorageProvider::new_overlay(test_data_root()); + let mut file = file_system.open_reader(&test_index_path()).unwrap(); for current_read in aligned_reads { let offset = current_read.offset(); let mut expected = vec![0; current_read.aligned_buf().len()]; diff --git a/diskann-disk/src/utils/partition.rs b/diskann-disk/src/utils/partition.rs index 04bc8e8ae..67a88ef54 100644 --- a/diskann-disk/src/utils/partition.rs +++ b/diskann-disk/src/utils/partition.rs @@ -428,7 +428,7 @@ mod partition_test { use diskann_providers::storage::VirtualStorageProvider; use diskann_providers::utils::create_thread_pool_for_test; use diskann_utils::test_data_root; - use vfs::{MemoryFS, OverlayFS, PhysicalFS}; + use vfs::{MemoryFS, OverlayFS}; use super::*; @@ -468,10 +468,7 @@ mod partition_test { let num_points: u32 = 100; let dim: usize = 10; - let base_filesystem = PhysicalFS::new(test_data_root()); - let memory_filesystem = MemoryFS::new(); - let vfs = OverlayFS::new(&[memory_filesystem.into(), base_filesystem.into()]); - let storage_provider = VirtualStorageProvider::new(vfs); + let storage_provider = VirtualStorageProvider::new_overlay(test_data_root()); { let writer = storage_provider.create_for_write(dataset_path).unwrap(); let mut dataset_writer = CachedWriter::>::new( @@ -556,10 +553,7 @@ mod partition_test { #[test] fn test_partition_with_ram_budget() -> ANNResult<()> { - let base_filesystem = PhysicalFS::new(test_data_root()); - let memory_filesystem = MemoryFS::new(); - let vfs = OverlayFS::new(&[memory_filesystem.into(), base_filesystem.into()]); - let storage_provider = VirtualStorageProvider::new(vfs); + let storage_provider = VirtualStorageProvider::new_overlay(test_data_root()); let dataset_file = "/sift/siftsmall_learn.bin"; let mut file = storage_provider.open_reader(dataset_file).unwrap(); let mut data = vec![]; diff --git a/diskann-providers/benches/benchmarks/copy_aligned_data_bench.rs b/diskann-providers/benches/benchmarks/copy_aligned_data_bench.rs index 82eba85a8..893f7cd2d 100644 --- a/diskann-providers/benches/benchmarks/copy_aligned_data_bench.rs +++ b/diskann-providers/benches/benchmarks/copy_aligned_data_bench.rs @@ -22,6 +22,7 @@ pub const BENCHMARK_ID: &str = "copy_aligned_data"; pub fn benchmark_copy_aligned_data(c: &mut Criterion) { let tmp_dir = TempDir::with_prefix(BENCHMARK_ID).expect("Failed to create temporary directory"); // Use physical file system rather than memory for testing the actual disk read/write + #[allow(clippy::disallowed_methods)] let storage_provider = VirtualStorageProvider::new(PhysicalFS::new(tmp_dir.path())); let num_points = 1_000_000; diff --git a/diskann-providers/benches/benchmarks_iai/copy_aligned_data_bench_iai.rs b/diskann-providers/benches/benchmarks_iai/copy_aligned_data_bench_iai.rs index 93df93f07..31406e379 100644 --- a/diskann-providers/benches/benchmarks_iai/copy_aligned_data_bench_iai.rs +++ b/diskann-providers/benches/benchmarks_iai/copy_aligned_data_bench_iai.rs @@ -28,6 +28,7 @@ iai_callgrind::library_benchmark_group!( pub fn benchmark_copy_aligned_data_iai() { let tmp_dir = TempDir::with_prefix(BENCHMARK_ID).expect("Failed to create temporary directory"); // Use physical file system rather than memory for testing the actual disk read/write + #[allow(clippy::disallowed_methods)] let storage_provider = VirtualStorageProvider::new(PhysicalFS::new(tmp_dir.path())); let num_points = 1_000_000; diff --git a/diskann-providers/src/model/pq/fixed_chunk_pq_table.rs b/diskann-providers/src/model/pq/fixed_chunk_pq_table.rs index cf5521470..7939bb93f 100644 --- a/diskann-providers/src/model/pq/fixed_chunk_pq_table.rs +++ b/diskann-providers/src/model/pq/fixed_chunk_pq_table.rs @@ -768,7 +768,6 @@ mod fixed_chunk_pq_table_test { distance::{InnerProduct, SquaredL2}, }; use itertools::iproduct; - use vfs::PhysicalFS; use super::*; use crate::{ @@ -945,9 +944,8 @@ mod fixed_chunk_pq_table_test { .parent() .unwrap() .to_path_buf(); - let vfs = PhysicalFS::new(workspace_root); - let storage_provider = VirtualStorageProvider::new(vfs); - let pq_pivots_path: &str = "test_data/sift/siftsmall_learn_pq_pivots.bin"; + let storage_provider = VirtualStorageProvider::new_overlay(workspace_root); + let pq_pivots_path: &str = "/test_data/sift/siftsmall_learn_pq_pivots.bin"; let (dim, pq_table, centroids, chunk_offsets) = load_pq_pivots_bin(pq_pivots_path, &1, &storage_provider).unwrap(); let fixed_chunk_pq_table = FixedChunkPQTable::new( @@ -1021,10 +1019,9 @@ mod fixed_chunk_pq_table_test { .parent() .unwrap() .to_path_buf(); - let vfs = PhysicalFS::new(workspace_root); - let storage_provider = VirtualStorageProvider::new(vfs); + let storage_provider = VirtualStorageProvider::new_overlay(workspace_root); - let pq_pivots_path: &str = "test_data/sift/siftsmall_learn_pq_pivots.bin"; + let pq_pivots_path: &str = "/test_data/sift/siftsmall_learn_pq_pivots.bin"; let (dim, pq_table, centroids, chunk_offsets) = load_pq_pivots_bin(pq_pivots_path, &1, &storage_provider).unwrap(); let fixed_chunk_pq_table = FixedChunkPQTable::new( @@ -1121,10 +1118,9 @@ mod fixed_chunk_pq_table_test { .parent() .unwrap() .to_path_buf(); - let vfs = PhysicalFS::new(workspace_root); - let storage_provider = VirtualStorageProvider::new(vfs); + let storage_provider = VirtualStorageProvider::new_overlay(workspace_root); - let pq_pivots_path: &str = "test_data/sift/siftsmall_learn_pq_pivots.bin"; + let pq_pivots_path: &str = "/test_data/sift/siftsmall_learn_pq_pivots.bin"; let (dim, pq_table, centroids, chunk_offsets) = load_pq_pivots_bin(pq_pivots_path, &1, &storage_provider).unwrap(); diff --git a/diskann-providers/src/model/pq/pq_construction.rs b/diskann-providers/src/model/pq/pq_construction.rs index 609670fc8..cbaf478e6 100644 --- a/diskann-providers/src/model/pq/pq_construction.rs +++ b/diskann-providers/src/model/pq/pq_construction.rs @@ -1009,7 +1009,7 @@ mod pq_test { use diskann::utils::IntoUsize; use rand_distr::{Distribution, Uniform}; use rstest::rstest; - use vfs::{MemoryFS, OverlayFS, PhysicalFS}; + use vfs::{MemoryFS, OverlayFS}; use super::*; use crate::{ @@ -1248,10 +1248,7 @@ mod pq_test { .parent() .unwrap() .to_path_buf(); - let storage_provider = VirtualStorageProvider::new(OverlayFS::new(&[ - MemoryFS::default().into(), - PhysicalFS::new(workspace_root).into(), - ])); + let storage_provider = VirtualStorageProvider::new_overlay(workspace_root); let pq_storage = PQStorage::new(PQ_PIVOT_PATH, PQ_COMPRESSED_PATH, Some(DATA_FILE)); let pool = create_thread_pool_for_test(); let result = generate_pq_pivots( @@ -1435,10 +1432,7 @@ mod pq_test { .parent() .unwrap() .to_path_buf(); - let storage_provider = VirtualStorageProvider::new(OverlayFS::new(&[ - MemoryFS::default().into(), - PhysicalFS::new(workspace_root).into(), - ])); + let storage_provider = VirtualStorageProvider::new_overlay(workspace_root); let data_file = "/test_data/sift/siftsmall_learn.bin"; let pq_pivots_path = "/pq_pivots_validation.bin"; @@ -1689,10 +1683,7 @@ mod pq_test { .parent() .unwrap() .to_path_buf(); - let storage_provider = VirtualStorageProvider::new(OverlayFS::new(&[ - MemoryFS::default().into(), - PhysicalFS::new(workspace_root).into(), - ])); + let storage_provider = VirtualStorageProvider::new_overlay(workspace_root); type ReaderType = as StorageReadProvider>::Reader; let data_file = "/test_data/sift/siftsmall_learn.bin"; @@ -1798,10 +1789,7 @@ mod pq_test { .parent() .unwrap() .to_path_buf(); - let storage_provider = VirtualStorageProvider::new(OverlayFS::new(&[ - MemoryFS::default().into(), - PhysicalFS::new(workspace_root).into(), - ])); + let storage_provider = VirtualStorageProvider::new_overlay(workspace_root); let data_file = "/test_data/sift/siftsmall_learn.bin"; let pq_pivots_path = "/pq_pivots_validation.bin"; diff --git a/diskann-providers/src/storage/pq_storage.rs b/diskann-providers/src/storage/pq_storage.rs index 3f7b2e327..ddf12593b 100644 --- a/diskann-providers/src/storage/pq_storage.rs +++ b/diskann-providers/src/storage/pq_storage.rs @@ -436,7 +436,7 @@ impl PQStorage { mod pq_storage_tests { use crate::storage::VirtualStorageProvider; - use vfs::{MemoryFS, OverlayFS, PhysicalFS}; + use vfs::MemoryFS; use super::*; use crate::utils::{gen_random_slice, read_metadata}; @@ -485,8 +485,7 @@ mod pq_storage_tests { .parent() .unwrap() .to_path_buf(); - let filesystem = PhysicalFS::new(workspace_root); - let storage_provider = VirtualStorageProvider::new(filesystem); + let storage_provider = VirtualStorageProvider::new_overlay(workspace_root); let result = PQStorage::new(PQ_PIVOT_PATH, PQ_COMPRESSED_PATH, Some(DATA_FILE)); assert!(result.pivot_data_exist(&storage_provider)); @@ -501,8 +500,7 @@ mod pq_storage_tests { .parent() .unwrap() .to_path_buf(); - let filesystem = PhysicalFS::new(workspace_root); - let storage_provider = VirtualStorageProvider::new(filesystem); + let storage_provider = VirtualStorageProvider::new_overlay(workspace_root); let result = PQStorage::new(PQ_PIVOT_PATH, PQ_COMPRESSED_PATH, Some(DATA_FILE)); let (npt, dim) = result .read_existing_pivot_metadata(&storage_provider) @@ -518,8 +516,7 @@ mod pq_storage_tests { .parent() .unwrap() .to_path_buf(); - let filesystem = PhysicalFS::new(workspace_root); - let storage_provider = VirtualStorageProvider::new(filesystem); + let storage_provider = VirtualStorageProvider::new_overlay(workspace_root); let result = PQStorage::new(PQ_PIVOT_PATH, PQ_COMPRESSED_PATH, Some(DATA_FILE)); let (pq_pivot_data, centroids, chunk_offsets, _) = result .load_existing_pivot_data(&1, &256, &128, &storage_provider, false) @@ -542,10 +539,7 @@ mod pq_storage_tests { .parent() .unwrap() .to_path_buf(); - let base_filesystem = PhysicalFS::new(workspace_root); - let memory_filesystem = MemoryFS::new(); - let vfs = OverlayFS::new(&[memory_filesystem.into(), base_filesystem.into()]); - let storage_provider = VirtualStorageProvider::new(vfs); + let storage_provider = VirtualStorageProvider::new_overlay(workspace_root); let pq_storage = PQStorage::new(PQ_PIVOT_PATH, PQ_COMPRESSED_PATH, Some(DATA_FILE)); // Write OPQ test data diff --git a/diskann-providers/src/storage/virtual_storage_provider.rs b/diskann-providers/src/storage/virtual_storage_provider.rs index bf02fb05e..e591c0c15 100644 --- a/diskann-providers/src/storage/virtual_storage_provider.rs +++ b/diskann-providers/src/storage/virtual_storage_provider.rs @@ -167,6 +167,7 @@ impl VirtualStorageProvider { /// Create a two-layer overlay filesystem with an in-memory filesystem for writes /// on top of the physical filesystem for reads. pub fn new_overlay>(path: P) -> Self { + #[allow(clippy::disallowed_methods)] let base_filesystem = PhysicalFS::new(path); let memory_filesystem = MemoryFS::new(); let overlay_filesystem = diff --git a/diskann-providers/src/utils/kmeans.rs b/diskann-providers/src/utils/kmeans.rs index 50cbe0f11..034210f0b 100644 --- a/diskann-providers/src/utils/kmeans.rs +++ b/diskann-providers/src/utils/kmeans.rs @@ -723,10 +723,7 @@ mod kmeans_test { .parent() .unwrap() .to_path_buf(); - let base_filesystem = PhysicalFS::new(workspace_root); - let memory_filesystem = MemoryFS::new(); - let vfs = OverlayFS::new(&[memory_filesystem.into(), base_filesystem.into()]); - let storage_provider = VirtualStorageProvider::new(vfs); + let storage_provider = VirtualStorageProvider::new_overlay(workspace_root); let mut reader = std::io::BufReader::new(storage_provider.open_reader(test_data_path).unwrap()); for _ in 0..256 { @@ -949,7 +946,6 @@ mod kmeans_test { } use proptest::{prelude::*, test_runner::Config}; - use vfs::{MemoryFS, OverlayFS, PhysicalFS}; proptest! { #![proptest_config(Config { diff --git a/diskann-providers/src/utils/normalizing_util.rs b/diskann-providers/src/utils/normalizing_util.rs index f7c1e4340..517b18afc 100644 --- a/diskann-providers/src/utils/normalizing_util.rs +++ b/diskann-providers/src/utils/normalizing_util.rs @@ -139,7 +139,6 @@ pub fn normalize_data_internal( #[cfg(test)] mod normalizing_utils_test { use crate::storage::{StorageReadProvider, VirtualStorageProvider}; - use vfs::{MemoryFS, OverlayFS, PhysicalFS}; use super::*; use crate::utils::{create_thread_pool_for_test, storage_utils::*}; @@ -154,10 +153,7 @@ mod normalizing_utils_test { .parent() .unwrap() .to_path_buf(); - let base_filesystem = PhysicalFS::new(workspace_root); - let memory_filesystem = MemoryFS::new(); - let vfs = OverlayFS::new(&[memory_filesystem.into(), base_filesystem.into()]); - let storage_provider = VirtualStorageProvider::new(vfs); + let storage_provider = VirtualStorageProvider::new_overlay(workspace_root); let pool = create_thread_pool_for_test(); normalize_data_file(in_file_name, out_file_name, &storage_provider, &pool).unwrap(); diff --git a/diskann-tools/src/utils/relative_contrast.rs b/diskann-tools/src/utils/relative_contrast.rs index 65a30fac7..41681cd17 100644 --- a/diskann-tools/src/utils/relative_contrast.rs +++ b/diskann-tools/src/utils/relative_contrast.rs @@ -109,7 +109,7 @@ mod relative_contrast_tests { use diskann_vector::distance::Metric; use half::f16; use rand::Rng; - use vfs::{MemoryFS, PhysicalFS}; + use vfs::MemoryFS; use super::*; use crate::utils::{ground_truth::compute_ground_truth_from_datafiles, GraphDataHalfVector}; @@ -201,10 +201,9 @@ mod relative_contrast_tests { /// Expectation: relative contrast > 1.5 #[test] fn test_compute_relative_contrast_with_sift_files() { - let filesystem = PhysicalFS::new(diskann_utils::test_data_root().join("sift")); - - let storage_provider = VirtualStorageProvider::new(filesystem); - let base_file_path = "siftsmall_learn_256pts.fbin"; + let storage_provider = + VirtualStorageProvider::new_overlay(diskann_utils::test_data_root().join("sift")); + let base_file_path = "/siftsmall_learn_256pts.fbin"; assert!( storage_provider.exists(base_file_path), @@ -218,7 +217,7 @@ mod relative_contrast_tests { .map(|_| f16::from_f32(rng.random_range(0.0..1.0))) .collect(); - let query_file_path = "query.bin"; + let query_file_path = "/query.bin"; { let mut query_writer = storage_provider @@ -233,7 +232,7 @@ mod relative_contrast_tests { } // Generate ground truth file using compute_ground_truth_from_datafiles - let gt_file_path = "ground_truth.bin"; + let gt_file_path = "/ground_truth.bin"; let recall_at = 3; compute_ground_truth_from_datafiles::( &storage_provider,