diff --git a/.github/actions/check-files/action.yml b/.github/actions/check-files/action.yml new file mode 100644 index 0000000..a4f1e61 --- /dev/null +++ b/.github/actions/check-files/action.yml @@ -0,0 +1,30 @@ +name: 'NSparse Check Files Changed' +description: 'Check if specific files have changed for nsparse repo' +inputs: + files: + description: 'Additional files to check for changes' + default: '' +outputs: + files_changed: + description: 'Whether any files changed' + value: ${{ steps.changed-files.outputs.any_changed }} +runs: + using: 'composite' + steps: + - name: Combine files + id: combine-files + shell: bash + run: | + DEFAULT_FILES="CMakeLists.txt,nsparse/**,tests/**,cmake/**,benchmarks/**" + if [ -n "${{ inputs.files }}" ]; then + COMBINED_FILES="$DEFAULT_FILES,${{ inputs.files }}" + else + COMBINED_FILES="$DEFAULT_FILES" + fi + echo "combined_files=$COMBINED_FILES" >> $GITHUB_OUTPUT + - name: Get changed files + id: changed-files + uses: tj-actions/changed-files@v47.0.0 + with: + files: ${{ steps.combine-files.outputs.combined_files }} + files_separator: "," diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml new file mode 100644 index 0000000..9035be2 --- /dev/null +++ b/.github/workflows/CI.yml @@ -0,0 +1,202 @@ +name: Build and Test nsparse +on: + push: + branches: + - "*" + - "feature/**" + pull_request: + branches: + - "*" + - "feature/**" + +jobs: + check-files: + name: Check files for Build and Test + runs-on: ubuntu-latest + outputs: + RUN_BUILD_AND_TEST: ${{ steps.check.outputs.files_changed }} + steps: + - uses: actions/checkout@v4 + - name: Check files + id: check + uses: ./.github/actions/check-files + with: + files: .github/workflows/CI.yml + + Get-CI-Image-Tag: + needs: check-files + if: needs.check-files.outputs.RUN_BUILD_AND_TEST == 'true' + uses: opensearch-project/opensearch-build/.github/workflows/get-ci-image-tag.yml@main + with: + product: opensearch + + Build-nsparse-Linux: + name: Build and Test nsparse on Linux + runs-on: ubuntu-latest + needs: Get-CI-Image-Tag + container: + image: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-version-linux }} + options: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-options }} + steps: + - name: Run start commands + run: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-command }} + + - name: Checkout + uses: actions/checkout@v4 + + # Generic build + - name: Configure (generic) + run: | + cmake -B build \ + -DNSPARSE_ENABLE_TESTS=ON \ + -DNSPARSE_OPT_LEVEL=generic \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_EXE_LINKER_FLAGS="-static-libstdc++ -static-libgcc" \ + -DCMAKE_SHARED_LINKER_FLAGS="-static-libstdc++ -static-libgcc" + + - name: Build (generic) + run: cmake --build build -j$(nproc) + + - name: Test (generic) + run: ctest --test-dir build --output-on-failure + + # Detect SIMD and build+test the best available level + - name: Detect SIMD capability + id: detect-simd + run: | + ARCH=$(uname -m) + if [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then + if cat /proc/cpuinfo 2>/dev/null | grep -qi sve; then + echo "opt_level=sve" >> $GITHUB_OUTPUT + else + echo "opt_level=neon" >> $GITHUB_OUTPUT + fi + elif [ "$ARCH" = "x86_64" ]; then + if lscpu | grep -qi avx512f && lscpu | grep -qi avx512cd && lscpu | grep -qi avx512vl && lscpu | grep -qi avx512dq && lscpu | grep -qi avx512bw; then + echo "opt_level=avx512" >> $GITHUB_OUTPUT + elif lscpu | grep -qi avx2; then + echo "opt_level=avx2" >> $GITHUB_OUTPUT + else + echo "opt_level=" >> $GITHUB_OUTPUT + fi + else + echo "opt_level=" >> $GITHUB_OUTPUT + fi + + - name: Configure (SIMD) + if: steps.detect-simd.outputs.opt_level != '' + run: | + rm -rf build + cmake -B build \ + -DNSPARSE_ENABLE_TESTS=ON \ + -DNSPARSE_OPT_LEVEL=${{ steps.detect-simd.outputs.opt_level }} \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_EXE_LINKER_FLAGS="-static-libstdc++ -static-libgcc" \ + -DCMAKE_SHARED_LINKER_FLAGS="-static-libstdc++ -static-libgcc" + + - name: Build (SIMD) + if: steps.detect-simd.outputs.opt_level != '' + run: cmake --build build -j$(nproc) + + - name: Test (SIMD) + if: steps.detect-simd.outputs.opt_level != '' + run: ctest --test-dir build --output-on-failure + + Build-nsparse-MacOS: + name: Build and Test nsparse on MacOS + needs: check-files + if: needs.check-files.outputs.RUN_BUILD_AND_TEST == 'true' + runs-on: macos-15 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install dependencies + run: brew install libomp + + - name: Detect SIMD capability + id: detect-simd + run: | + if sysctl -n machdep.cpu.features machdep.cpu.leaf7_features 2>/dev/null | grep -qi AVX2; then + echo "opt_level=avx2" >> $GITHUB_OUTPUT + elif uname -m | grep -q arm64; then + echo "opt_level=neon" >> $GITHUB_OUTPUT + else + echo "opt_level=generic" >> $GITHUB_OUTPUT + fi + + # Generic build + - name: Configure (generic) + run: | + cmake -B build \ + -DNSPARSE_ENABLE_TESTS=ON \ + -DNSPARSE_OPT_LEVEL=generic \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_C_COMPILER=/usr/bin/clang \ + -DCMAKE_CXX_COMPILER=/usr/bin/clang++ \ + -DOpenMP_CXX_FLAGS="-Xpreprocessor -fopenmp -I$(brew --prefix libomp)/include" \ + -DOpenMP_CXX_LIB_NAMES="omp" \ + -DOpenMP_omp_LIBRARY=$(brew --prefix libomp)/lib/libomp.dylib + + - name: Build (generic) + run: cmake --build build -j$(sysctl -n hw.ncpu) + + - name: Test (generic) + run: ctest --test-dir build --output-on-failure + + # SIMD build if different from generic + - name: Configure (SIMD) + if: steps.detect-simd.outputs.opt_level != 'generic' + run: | + rm -rf build + cmake -B build \ + -DNSPARSE_ENABLE_TESTS=ON \ + -DNSPARSE_OPT_LEVEL=${{ steps.detect-simd.outputs.opt_level }} \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_C_COMPILER=/usr/bin/clang \ + -DCMAKE_CXX_COMPILER=/usr/bin/clang++ \ + -DOpenMP_CXX_FLAGS="-Xpreprocessor -fopenmp -I$(brew --prefix libomp)/include" \ + -DOpenMP_CXX_LIB_NAMES="omp" \ + -DOpenMP_omp_LIBRARY=$(brew --prefix libomp)/lib/libomp.dylib + + - name: Build (SIMD) + if: steps.detect-simd.outputs.opt_level != 'generic' + run: cmake --build build -j$(sysctl -n hw.ncpu) + + - name: Test (SIMD) + if: steps.detect-simd.outputs.opt_level != 'generic' + run: ctest --test-dir build --output-on-failure + + Build-nsparse-Windows: + name: Build and Test nsparse on Windows + needs: check-files + if: needs.check-files.outputs.RUN_BUILD_AND_TEST == 'true' + runs-on: windows-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Configure (generic) + run: | + cmake -B build ` + -DNSPARSE_ENABLE_TESTS=ON ` + -DNSPARSE_OPT_LEVEL=generic ` + -DCMAKE_BUILD_TYPE=Release + + - name: Build (generic) + run: cmake --build build --config Release -j $env:NUMBER_OF_PROCESSORS + + - name: Test (generic) + run: ctest --test-dir build --build-config Release --output-on-failure + + check-results: + needs: [check-files, Build-nsparse-Linux, Build-nsparse-MacOS, Build-nsparse-Windows] + if: always() + name: Check results + runs-on: ubuntu-latest + steps: + - name: Fail if build or test failed + if: | + needs.check-files.outputs.RUN_BUILD_AND_TEST == 'true' && + (needs.Build-nsparse-Linux.result == 'failure' || needs.Build-nsparse-MacOS.result == 'failure' || needs.Build-nsparse-Windows.result == 'failure') + run: exit 1 diff --git a/.gitignore b/.gitignore index e30345c..650a18f 100644 --- a/.gitignore +++ b/.gitignore @@ -46,3 +46,6 @@ venv/ # third_party third_party/ + +# build +build/ diff --git a/README.md b/README.md index 5c7d974..2743004 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,19 @@ -![OpenSearch logo](OpenSearch.svg) + -- [Introduction](#introduction) +- [Introduction](#neural-sparse-cpp) - [Project Resources](#project-resources) - [Project Style Guidelines](#project-style-guidelines) - [Code of Conduct](#code-of-conduct) - [License](#license) - [Copyright](#copyright) -## Introduction +## neural-sparse-cpp -**neural-sparse-cpp** is a C++ library for high-performance sparse vector similarity search, developed as part of the [OpenSearch Project](https://opensearch.org/). It implements the SEISMIC (Sparse Embeddings In Search via Inverted Multi-Index Clustering) algorithm for approximate nearest neighbor search over sparse vectors. +**neural-sparse-cpp** is a C++ library for high-performance sparse vector similarity search, developed as part of the [OpenSearch Project](https://opensearch.org/). It provides multiple index types for nearest neighbor search over sparse vectors. Key features include: -- SEISMIC-based inverted index with clustering for fast approximate search +- Multiple index types: inverted index, SEISMIC, and SEISMIC with scalar quantization - Scalar quantization support for reduced memory usage - SIMD-optimized distance computations (AVX2, AVX512, NEON, SVE) - ID mapping and ID selector filtering diff --git a/nsparse/cluster/random_kmeans.cpp b/nsparse/cluster/random_kmeans.cpp index 2c60874..ebd9ed7 100644 --- a/nsparse/cluster/random_kmeans.cpp +++ b/nsparse/cluster/random_kmeans.cpp @@ -59,7 +59,7 @@ inline static size_t boundary_check_n_clusters(size_t n_docs, // Ensure at least one cluster n_clusters = n_clusters > n_docs ? n_docs : n_clusters; - n_clusters = std::max(1UL, n_clusters); + n_clusters = std::max(static_cast(1), n_clusters); return n_clusters; } diff --git a/nsparse/cluster/random_kmeans.h b/nsparse/cluster/random_kmeans.h index b56314a..2987cb0 100644 --- a/nsparse/cluster/random_kmeans.h +++ b/nsparse/cluster/random_kmeans.h @@ -11,6 +11,9 @@ #define RANDOM_KMEANS_H #include +#ifdef _MSC_VER +#include +#endif #include "nsparse/sparse_vectors.h" @@ -22,12 +25,22 @@ class ClusterRepresentatives { size_t alignmnt) : num_clusters_(num_clusters), sketch_size_(sketch_size) { // Align to 64-byte boundary for AVX-512 - +#ifdef _MSC_VER + data = static_cast(_aligned_malloc( + num_clusters * sketch_size * sizeof(float), alignmnt)); +#else data = static_cast(std::aligned_alloc( alignmnt, num_clusters * sketch_size * sizeof(float))); +#endif } - ~ClusterRepresentatives() { std::free(data); } + ~ClusterRepresentatives() { +#ifdef _MSC_VER + _aligned_free(data); +#else + std::free(data); +#endif + } // Access element (i,j) where i is cluster index and j is dimension float& operator()(size_t i, size_t j) { return data[i * sketch_size_ + j]; } diff --git a/nsparse/inverted_index.cpp b/nsparse/inverted_index.cpp index 98f27eb..7813bca 100644 --- a/nsparse/inverted_index.cpp +++ b/nsparse/inverted_index.cpp @@ -9,6 +9,11 @@ #include "nsparse/inverted_index.h" +#ifdef _MSC_VER +#include +#pragma intrinsic(_BitScanForward64) +#endif + #include #include #include @@ -146,7 +151,7 @@ void evaluate_window_candidates(std::vector& scorers, const uint64_t* bitmap, detail::TopKHolder& heap) { // Iterate only set bits in the bitmap. - // Each word covers 64 slots; __builtin_ctzll finds the next set bit. + // Each word covers 64 slots; ctzll finds the next set bit. static constexpr int kBitmapWords = kScoreWindowSize / 64; float threshold = heap.full() ? heap.peek_score() : 0.0F; float non_essential_sum = max_score_prefix[first_essential]; @@ -154,7 +159,13 @@ void evaluate_window_candidates(std::vector& scorers, for (int word_idx = 0; word_idx < kBitmapWords; ++word_idx) { uint64_t word = bitmap[word_idx]; while (word != 0) { +#ifdef _MSC_VER + unsigned long bit_pos; + _BitScanForward64(&bit_pos, word); + int bit = static_cast(bit_pos); +#else int bit = __builtin_ctzll(word); +#endif word &= word - 1; // clear lowest set bit int slot = (word_idx << 6) | bit; diff --git a/nsparse/invlists/inverted_lists.cpp b/nsparse/invlists/inverted_lists.cpp index 5e5d63b..4aa3562 100644 --- a/nsparse/invlists/inverted_lists.cpp +++ b/nsparse/invlists/inverted_lists.cpp @@ -11,6 +11,7 @@ #include #include +#include #include #include #include diff --git a/nsparse/seismic_common.h b/nsparse/seismic_common.h index d3f165b..b73e09b 100644 --- a/nsparse/seismic_common.h +++ b/nsparse/seismic_common.h @@ -140,7 +140,8 @@ inline std::vector build_inverted_lists_clusters( std::vector clustered_inverted_lists( inverted_lists_size); #pragma omp parallel for schedule(dynamic, 64) - for (size_t idx = 0; idx < inverted_lists_size; ++idx) { + for (int64_t idx = 0; idx < static_cast(inverted_lists_size); + ++idx) { auto& invlist = (*inverted_lists)[idx]; const auto& doc_ids = invlist.prune_and_keep_doc_ids(lambda); InvertedListClusters inverted_list_clusters( diff --git a/nsparse/utils/dense_vector_matrix.h b/nsparse/utils/dense_vector_matrix.h index 42e5f80..cbe44ff 100644 --- a/nsparse/utils/dense_vector_matrix.h +++ b/nsparse/utils/dense_vector_matrix.h @@ -12,6 +12,9 @@ #include #include +#ifdef _MSC_VER +#include +#endif namespace nsparse::detail { @@ -30,11 +33,22 @@ class DenseVectorMatrixT { DenseVectorMatrixT(size_t row, size_t dimension) : rows_(row), dimension_(dimension) { +#ifdef _MSC_VER + data_ = static_cast( + _aligned_malloc(row * dimension * sizeof(T), MATRIX_ALIGNMENT)); +#else data_ = static_cast( std::aligned_alloc(MATRIX_ALIGNMENT, row * dimension * sizeof(T))); +#endif } - ~DenseVectorMatrixT() { std::free(data_); } + ~DenseVectorMatrixT() { +#ifdef _MSC_VER + _aligned_free(data_); +#else + std::free(data_); +#endif + } T get(size_t row, size_t col) const { return data_[row * dimension_ + col]; diff --git a/nsparse/utils/prefetch.h b/nsparse/utils/prefetch.h index 9fd4ada..72b8784 100644 --- a/nsparse/utils/prefetch.h +++ b/nsparse/utils/prefetch.h @@ -14,9 +14,15 @@ #include "nsparse/types.h" #ifndef NSPARSE_PREFETCH +#ifdef _MSC_VER +#include +#define NSPARSE_PREFETCH(addr, rw, locality) \ + _mm_prefetch(reinterpret_cast(addr), _MM_HINT_T0) +#else #define NSPARSE_PREFETCH(addr, rw, locality) \ __builtin_prefetch(addr, rw, locality) #endif +#endif namespace nsparse::detail { diff --git a/tests/kmeans_utils_test.cpp b/tests/kmeans_utils_test.cpp index b7db726..0b60fcc 100644 --- a/tests/kmeans_utils_test.cpp +++ b/tests/kmeans_utils_test.cpp @@ -11,8 +11,8 @@ #include +#include #include - #include "nsparse/sparse_vectors.h" #include "nsparse/types.h" diff --git a/tests/prefetch_test.cpp b/tests/prefetch_test.cpp index 2971662..d9c3b1c 100644 --- a/tests/prefetch_test.cpp +++ b/tests/prefetch_test.cpp @@ -47,7 +47,12 @@ namespace { using nsparse::term_t; template -__attribute__((noinline)) void test_prefetch_vector(const term_t* indices, +#ifdef _MSC_VER +__declspec(noinline) +#else +__attribute__((noinline)) +#endif +void test_prefetch_vector(const term_t* indices, const T* values, size_t len) { static constexpr size_t kCacheLineSize = 64;