diff --git a/src/parallel_stable_sort_oneapi.cpp b/src/parallel_stable_sort_oneapi.cpp index 7736310..cf6244f 100644 --- a/src/parallel_stable_sort_oneapi.cpp +++ b/src/parallel_stable_sort_oneapi.cpp @@ -4,16 +4,19 @@ // SPDX-License-Identifier: MIT // ============================================================= +#include +#include +#include + +#define TBB_PREVIEW_GLOBAL_CONTROL 1 +#include "tbb/global_control.h" + #include #if FPGA || FEMU -#include +#include #endif -#include -#include -#include - #include #include #include @@ -179,13 +182,13 @@ int main() std::cout << logo << "\n\n"; #if FEMU - sycl::intel::fpga_emulator_selector device_selector{}; + INTEL::fpga_emulator_selector device_selector{}; #elif CPU cpu_selector device_selector{}; #elif GPU gpu_selector device_selector{}; #elif FPGA - sycl::intel::fpga_selector device_selector{}; + INTEL::fpga_selector device_selector{}; #else default_selector device_selector{}; #endif @@ -200,6 +203,8 @@ int main() device_queue.get_device().get_info() << " cores)" << std::endl; #endif + tbb::global_control c(tbb::global_control::max_allowed_parallelism, misc::n_tbb_workers); + char option = '\0'; std::cout << "\nDo you want to run stress test first [Y/N]?"; std::cin >> option; std::cout << "\n"; diff --git a/src/parallel_stable_sort_oneapi.hpp b/src/parallel_stable_sort_oneapi.hpp index d93249e..13519c7 100644 --- a/src/parallel_stable_sort_oneapi.hpp +++ b/src/parallel_stable_sort_oneapi.hpp @@ -1,24 +1,22 @@ +#include +#include +#include + #include #include #include #if FPGA || FEMU -#include +#include #endif -#include -#include -#include - -#include - #include "misc/utility.hpp" #ifndef PARALLEL_SORT_STL_H #define PARALLEL_SORT_STL_H using namespace cl::sycl; -using namespace dpstd::execution; +using namespace oneapi::dpl::execution; namespace internal { @@ -128,13 +126,8 @@ namespace internal pv_dd.reserve(array_dd.size() + 1); pv_dd.resize(array_dd.size() + 1); #elif FPGA | GPU - cl::sycl::usm_allocator q_array_alloc{ device_queue }; - cl::sycl::usm_allocator q_pv_alloc{ device_queue }; - - std::vector> pv_dd(q_pv_alloc); - std::vector> array_dd(q_array_alloc); - - array_dd.reserve(array.size()); pv_dd.reserve(array.size() + 1); + gen::ITEM* array_dd = (gen::ITEM*)sycl::malloc_device( array.size()*sizeof(gen::ITEM), device_queue ); + std::size_t* pv_dd = (std::size_t*)sycl::malloc_device( (array.size() + 1)*sizeof(std::size_t), device_queue ); device_queue.memset(&pv_dd[0], 0x00, sizeof(std::size_t) * (array.size() + 1)); device_queue.wait_and_throw(); @@ -182,7 +175,7 @@ namespace internal #if FPGA | CPU | GPU auto policy = make_device_policy(device_queue); #elif FEMU - auto policy = dpstd::execution::par_unseq; + auto policy = oneapi::dpl::execution::par_unseq; #endif #if FEMU | CPU @@ -233,6 +226,9 @@ namespace internal array.resize(array_dd.size()); array.assign(array_dd.begin(), array_dd.end()); } +#elif FPGA | GPU + free(array_dd, device_queue); + free(pv_dd, device_queue); #endif }