diff --git a/.gitignore b/.gitignore index 8487ac3..07ce38b 100644 --- a/.gitignore +++ b/.gitignore @@ -31,4 +31,7 @@ *.out *.app +# VS Code +.vscode + /build \ No newline at end of file diff --git a/results/diskann_results.txt b/results/diskann_results.txt new file mode 100644 index 0000000..fefcc70 --- /dev/null +++ b/results/diskann_results.txt @@ -0,0 +1,78 @@ +Parameters set: C=750, L=100, R=64, alpha=1.2, pruning_rule=0 +Reading bin file /nvmessd1/fbv4/prec1M.fbin ...Metadata: #pts = 1010812, #dims = 384, aligned_dim = 384... done. +Initialized Vamana Object with 1010812 points, dim=384. +Starting vamana build with listSize L=100, degree bound R=64, and alpha=1.2 +Medoid identified as 121334 + +0% of build completed... +10% of build completed... +20% of build completed... +30% of build completed... +40% of build completed... +50% of build completed... +60% of build completed... +70% of build completed... +80% of build completed... +90% of build completed... +100% of build completed... +Starting final cleanup..done. + Percentile Out Degree In Degree +======================================================= + 0 1 0 + 10 5 4 + 20 14 13 + 30 33 25 + 40 57 34 + 50 64 41 + 60 64 48 + 70 64 57 + 80 64 69 + 90 64 91 + 100 64 1853 +0.159% points are unreachable and 121334 is the most popular in-degree node. +Average degree 46.1 +Total build time: 176s +Writing bin: /nvmessd1/fbv4/avarhade/grann/diskann_prec1M_L100_R64_a1.2_data.bin +bin: #pts = 1010812, #dims = 384, size = 1552607240B +Finished writing bin. +Writing bin: /nvmessd1/fbv4/avarhade/grann/diskann_prec1M_L100_R64_a1.2_tags.bin +bin: #pts = 1010812, #dims = 1, size = 4043256B +Finished writing bin. +Reading bin file /nvmessd1/fbv4/queries.fbin ...Metadata: #pts = 13265, #dims = 384, aligned_dim = 384... done. + Stat(/nvmessd1/fbv4/prec1M_gt100.bin) returned: 0 +Opened: /nvmessd1/fbv4/prec1M_gt100.bin, size: 10612008, cache_size: 10612008 +Reading truthset file /nvmessd1/fbv4/prec1M_gt100.bin ... +Metadata: #pts = 13265, #dims = 100... +Initialized Empty Vamana Object. +Reading bin file /nvmessd1/fbv4/avarhade/grann/diskann_prec1M_L100_R64_a1.2_data.bin ...Metadata: #pts = 1010812, #dims = 384, aligned_dim = 384... done. +Reading bin file /nvmessd1/fbv4/avarhade/grann/diskann_prec1M_L100_R64_a1.2_tags.bin ... +Metadata: #pts = 1010812, #dims = 1. + Stat(/nvmessd1/fbv4/avarhade/grann/diskann_prec1M_L100_R64_a1.2_labels.txt) returned: -1 + Stat(/nvmessd1/fbv4/avarhade/grann/diskann_prec1M_L100_R64_a1.2_universal_label.txt) returned: -1 + Stat(/nvmessd1/fbv4/avarhade/grann/diskann_prec1M_L100_R64_a1.2_labels_to_medoids.txt) returned: -1 +Loading vamana index /nvmessd1/fbv4/avarhade/grann/diskann_prec1M_L100_R64_a1.2.....done. Vamana has 1010812 nodes and 46618450 out-edges +Average out degree: 46.12 +Vamana loaded + Ls QPS Mean Latency (mus) 99.9% Latency Recall@50 Mean Cmps. Mean Hops 99.9% Cmps. 99.9% Hops +========================================================================================================================= + 50 541.45 1846.24 9149.36 63.40 3206.93 61.64 5830.00 109.00 + 75 436.28 2292.00 3844.28 70.03 4332.75 85.89 7265.00 139.00 + 100 346.48 2886.06 4673.74 74.36 5427.74 110.27 8551.00 165.00 + 150 248.03 4031.63 6059.32 79.82 7554.02 159.32 11098.00 213.00 + 200 192.08 5206.02 7170.83 83.23 9619.01 208.68 13168.00 255.00 +Done searching. Now saving results +Writing bin: /diskann_results_50_idx_uint32.bin +bin: #pts = 13265, #dims = 50, size = 2653008B +Finished writing bin. +Writing bin: /diskann_results_75_idx_uint32.bin +bin: #pts = 13265, #dims = 50, size = 2653008B +Finished writing bin. +Writing bin: /diskann_results_100_idx_uint32.bin +bin: #pts = 13265, #dims = 50, size = 2653008B +Finished writing bin. +Writing bin: /diskann_results_150_idx_uint32.bin +bin: #pts = 13265, #dims = 50, size = 2653008B +Finished writing bin. +Writing bin: /diskann_results_200_idx_uint32.bin +bin: #pts = 13265, #dims = 50, size = 2653008B +Finished writing bin. diff --git a/results/hnsw_results.txt b/results/hnsw_results.txt new file mode 100644 index 0000000..cd9ecd7 --- /dev/null +++ b/results/hnsw_results.txt @@ -0,0 +1,94 @@ +Reading bin file /nvmessd1/fbv4/prec1M.fbin ...Metadata: #pts = 1010812, #dims = 384, aligned_dim = 384... done. +Initialized HNSW Object with 1010812 points, dim=384. +Writing bin: /tmp/temp_data +bin: #pts = 101209, #dims = 384, size = 155457032B +Finished writing bin. +Reading bin file /tmp/temp_data ...Metadata: #pts = 101209, #dims = 384, aligned_dim = 384... done. +Initialized HNSW Object with 101209 points, dim=384. +Starting hnsw build with listSize L=100, degree bound R=64, and alpha=1 on HNSW level 0 +Medoid identified as 66687 +10% of build completed +20% of build completed +30% of build completed +40% of build completed +50% of build completed +60% of build completed +70% of build completed +80% of build completed +90% of build completed +100% of build completed +110% of build completed +Starting final cleanup..done. +Total build time: 7.71948s +Average out degree: 25.5526 +Starting hnsw build with listSize L=100, degree bound R=64, and alpha=1 on HNSW level 1 +Medoid identified as 121334 +10% of build completed +20% of build completed +30% of build completed +40% of build completed +50% of build completed +60% of build completed +70% of build completed +80% of build completed +90% of build completed +100% of build completed +110% of build completed +Starting final cleanup..done. +Total build time: 216.582s +Average out degree: 30.0825 +Writing bin: /nvmessd1/fbv4/avarhade/grann/hnsw_prec1M_L100_R64_a1.2_0_data.bin +bin: #pts = 101209, #dims = 384, size = 155457032B +Finished writing bin. +Writing bin: /nvmessd1/fbv4/avarhade/grann/hnsw_prec1M_L100_R64_a1.2_0_tags.bin +bin: #pts = 101209, #dims = 1, size = 404844B +Finished writing bin. +Writing bin: /nvmessd1/fbv4/avarhade/grann/hnsw_prec1M_L100_R64_a1.2_1_data.bin +bin: #pts = 1010812, #dims = 384, size = 1552607240B +Finished writing bin. +Writing bin: /nvmessd1/fbv4/avarhade/grann/hnsw_prec1M_L100_R64_a1.2_1_tags.bin +bin: #pts = 1010812, #dims = 1, size = 4043256B +Finished writing bin. +Reading bin file /nvmessd1/fbv4/queries.fbin ...Metadata: #pts = 13265, #dims = 384, aligned_dim = 384... done. + Stat(/nvmessd1/fbv4/prec1M_gt100.bin) returned: 0 +Opened: /nvmessd1/fbv4/prec1M_gt100.bin, size: 10612008, cache_size: 10612008 +Reading truthset file /nvmessd1/fbv4/prec1M_gt100.bin ... +Metadata: #pts = 13265, #dims = 100... +Initialized Empty HNSW Object at level 0 +Initialized Empty HNSW Object at level 1 +Reading bin file /nvmessd1/fbv4/avarhade/grann/hnsw_prec1M_L100_R64_a1.2_0_data.bin ...Metadata: #pts = 101209, #dims = 384, aligned_dim = 384... done. +Reading bin file /nvmessd1/fbv4/avarhade/grann/hnsw_prec1M_L100_R64_a1.2_0_tags.bin ... +Metadata: #pts = 101209, #dims = 1. + Stat(/nvmessd1/fbv4/avarhade/grann/hnsw_prec1M_L100_R64_a1.2_0_labels.txt) returned: -1 + Stat(/nvmessd1/fbv4/avarhade/grann/hnsw_prec1M_L100_R64_a1.2_0_universal_label.txt) returned: -1 +Loading hnsw index /nvmessd1/fbv4/avarhade/grann/hnsw_prec1M_L100_R64_a1.2.....done. HNSW has 101209 nodes and 2586154 out-edgesAverage out degree: 25.55 +Reading bin file /nvmessd1/fbv4/avarhade/grann/hnsw_prec1M_L100_R64_a1.2_1_data.bin ...Metadata: #pts = 1010812, #dims = 384, aligned_dim = 384... done. +Reading bin file /nvmessd1/fbv4/avarhade/grann/hnsw_prec1M_L100_R64_a1.2_1_tags.bin ... +Metadata: #pts = 1010812, #dims = 1. + Stat(/nvmessd1/fbv4/avarhade/grann/hnsw_prec1M_L100_R64_a1.2_1_labels.txt) returned: -1 + Stat(/nvmessd1/fbv4/avarhade/grann/hnsw_prec1M_L100_R64_a1.2_1_universal_label.txt) returned: -1 +Loading hnsw index /nvmessd1/fbv4/avarhade/grann/hnsw_prec1M_L100_R64_a1.2.....done. HNSW has 1010812 nodes and 30407802 out-edgesAverage out degree: 30.08 +HNSW loaded + Ls QPS Mean Latency (mus) 99.9% Latency Recall@50 Mean Cmps. Mean Hops 99.9% Cmps. 99.9% Hops +========================================================================================================================= + 50 414.68 2410.85 10848.68 60.12 4371.73 107.99 5964.00 137.00 + 75 319.63 3128.54 4045.87 67.64 6164.12 156.81 7805.00 182.00 + 100 244.31 4093.08 5202.52 72.54 7913.30 206.06 9744.00 228.00 + 150 170.24 5874.00 7300.73 78.75 11289.65 305.20 13669.00 323.00 + 200 130.21 7679.69 9555.61 82.60 14530.51 404.75 17512.00 422.00 +Done searching. Now saving results +Writing bin: /hnsw_results_50_idx_uint32.bin +bin: #pts = 13265, #dims = 50, size = 2653008B +Finished writing bin. +Writing bin: /hnsw_results_75_idx_uint32.bin +bin: #pts = 13265, #dims = 50, size = 2653008B +Finished writing bin. +Writing bin: /hnsw_results_100_idx_uint32.bin +bin: #pts = 13265, #dims = 50, size = 2653008B +Finished writing bin. +Writing bin: /hnsw_results_150_idx_uint32.bin +bin: #pts = 13265, #dims = 50, size = 2653008B +Finished writing bin. +Writing bin: /hnsw_results_200_idx_uint32.bin +bin: #pts = 13265, #dims = 50, size = 2653008B +Finished writing bin. diff --git a/run-script.sh b/run-script.sh new file mode 100644 index 0000000..4da83b2 --- /dev/null +++ b/run-script.sh @@ -0,0 +1,9 @@ +# HNSW +cd build/tests +./build_hnsw float l2 /nvmessd1/fbv4/prec1M.fbin /nvmessd1/fbv4/avarhade/grann/hnsw_prec1M_L100_R64_a1.2 64 100 0 0.1 2 48 >> ../../results/hnsw_results.txt +./search_hnsw float l2 /nvmessd1/fbv4/avarhade/grann/hnsw_prec1M_L100_R64_a1.2 2 1 /nvmessd1/fbv4/queries.fbin /nvmessd1/fbv4/prec1M_gt100.bin 50 /hnsw_results 50 75 100 150 200 >> ../../results/hnsw_results.txt + +# DiskANN +# cd build/tests +# ./build_vamana float l2 /nvmessd1/fbv4/prec1M.fbin 0 /nvmessd1/fbv4/avarhade/grann/diskann_prec1M_L100_R64_a1.2 64 100 1.2 48 >> ../../results/diskann_results.txt +# ./search_vamana float l2 /nvmessd1/fbv4/avarhade/grann/diskann_prec1M_L100_R64_a1.2 1 /nvmessd1/fbv4/queries.fbin /nvmessd1/fbv4/prec1M_gt100.bin 50 /diskann_results 0 50 75 100 150 200 >> ../../results/diskann_results.txt \ No newline at end of file diff --git a/src/aux_compute_groundtruth.cpp b/src/aux_compute_groundtruth.cpp index 2608229..27caf1d 100644 --- a/src/aux_compute_groundtruth.cpp +++ b/src/aux_compute_groundtruth.cpp @@ -274,7 +274,7 @@ inline void save_groundtruth_as_one_file(const std::string filename, } template -int aux_main(int argv, char **argc) { +int aux_main([[maybe_unused]] int argv, char **argc) { size_t npoints, nqueries, dim; std::string base_file(argc[2]); std::string query_file(argc[3]); diff --git a/src/hnsw.cpp b/src/hnsw.cpp index 9c365c9..47dcb77 100644 --- a/src/hnsw.cpp +++ b/src/hnsw.cpp @@ -104,9 +104,14 @@ namespace grann { << this->_num_points << " points. " << std::endl; return; } + _u64 total_out_degree = 0; + for (const auto& neighbors : this->_out_nbrs) { + total_out_degree += neighbors.size(); + } + double avg_out_degree = static_cast(total_out_degree) / this->_num_points; std::cout << "..done. HNSW has " << nodes << " nodes and " << cc - << " out-edges" << std::endl; + << " out-edges" << "Average out degree: " << avg_out_degree << std::endl; } /************************************************************** @@ -254,6 +259,11 @@ namespace grann { this->_out_nbrs[node].emplace_back(id); } } + _u64 total_out_degree = 0; + for (const auto& neighbors : this->_out_nbrs) { + total_out_degree += neighbors.size(); + } + double avg_out_degree = static_cast(total_out_degree) / this->_num_points; std::cout << "done." << std::endl; this->_has_built = true; @@ -262,13 +272,14 @@ namespace grann { std::cout << "Total build time: " << ((double) build_timer.elapsed() / (double) 1000000) << "s" << std::endl; + std::cout << "Average out degree: " << avg_out_degree << std::endl; } template _u32 HNSW::search(const T *query, _u32 res_count, const Parameters &search_params, _u32 *indices, float *distances, QueryStats *stats, - std::vector