Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 12 additions & 7 deletions src/parallel_stable_sort_oneapi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,19 @@
// SPDX-License-Identifier: MIT
// =============================================================

#include <oneapi/dpl/execution>
#include <oneapi/dpl/algorithm>
#include <dpct/dpl_extras/iterators.h>

#define TBB_PREVIEW_GLOBAL_CONTROL 1
#include "tbb/global_control.h"

#include <CL/sycl.hpp>

#if FPGA || FEMU
#include <CL/sycl/intel/fpga_extensions.hpp>
#include <CL/sycl/INTEL/fpga_extensions.hpp>
#endif

#include <dpstd/execution>
#include <dpstd/algorithm>
#include <dpstd/iterators.h>

#include <ctime>
#include <deque>
#include <chrono>
Expand Down Expand Up @@ -179,13 +182,13 @@ int main()
std::cout << logo << "\n\n";

#if FEMU
sycl::intel::fpga_emulator_selector device_selector{};
INTEL::fpga_emulator_selector device_selector{};
#elif CPU
cpu_selector device_selector{};
#elif GPU
gpu_selector device_selector{};
#elif FPGA
sycl::intel::fpga_selector device_selector{};
INTEL::fpga_selector device_selector{};
#else
default_selector device_selector{};
#endif
Expand All @@ -200,6 +203,8 @@ int main()
device_queue.get_device().get_info<info::device::max_compute_units>() << " cores)" << std::endl;
#endif

tbb::global_control c(tbb::global_control::max_allowed_parallelism, misc::n_tbb_workers);

char option = '\0';
std::cout << "\nDo you want to run stress test first [Y/N]?"; std::cin >> option;
std::cout << "\n";
Expand Down
28 changes: 12 additions & 16 deletions src/parallel_stable_sort_oneapi.hpp
Original file line number Diff line number Diff line change
@@ -1,24 +1,22 @@
#include <oneapi/dpl/execution>
#include <oneapi/dpl/algorithm>
#include <dpct/dpl_extras/iterators.h>

#include <tbb/tbb.h>
#include <tbb/task.h>

#include <CL/sycl.hpp>
#if FPGA || FEMU
#include <CL/sycl/intel/fpga_extensions.hpp>
#include <CL/sycl/INTEL/fpga_extensions.hpp>
#endif

#include <dpstd/execution>
#include <dpstd/algorithm>
#include <dpstd/iterators.h>

#include <iostream>

#include "misc/utility.hpp"

#ifndef PARALLEL_SORT_STL_H
#define PARALLEL_SORT_STL_H

using namespace cl::sycl;
using namespace dpstd::execution;
using namespace oneapi::dpl::execution;

namespace internal
{
Expand Down Expand Up @@ -128,13 +126,8 @@ namespace internal

pv_dd.reserve(array_dd.size() + 1); pv_dd.resize(array_dd.size() + 1);
#elif FPGA | GPU
cl::sycl::usm_allocator<gen::ITEM, usm::alloc::device> q_array_alloc{ device_queue };
cl::sycl::usm_allocator<std::size_t, usm::alloc::device> q_pv_alloc{ device_queue };

std::vector<std::size_t, usm_allocator<std::size_t, usm::alloc::device>> pv_dd(q_pv_alloc);
std::vector<gen::ITEM, usm_allocator<gen::ITEM, usm::alloc::device>> array_dd(q_array_alloc);

array_dd.reserve(array.size()); pv_dd.reserve(array.size() + 1);
gen::ITEM* array_dd = (gen::ITEM*)sycl::malloc_device( array.size()*sizeof(gen::ITEM), device_queue );
std::size_t* pv_dd = (std::size_t*)sycl::malloc_device( (array.size() + 1)*sizeof(std::size_t), device_queue );

device_queue.memset(&pv_dd[0], 0x00, sizeof(std::size_t) * (array.size() + 1));
device_queue.wait_and_throw();
Expand Down Expand Up @@ -182,7 +175,7 @@ namespace internal
#if FPGA | CPU | GPU
auto policy = make_device_policy(device_queue);
#elif FEMU
auto policy = dpstd::execution::par_unseq;
auto policy = oneapi::dpl::execution::par_unseq;
#endif

#if FEMU | CPU
Expand Down Expand Up @@ -233,6 +226,9 @@ namespace internal
array.resize(array_dd.size());
array.assign(array_dd.begin(), array_dd.end());
}
#elif FPGA | GPU
free(array_dd, device_queue);
free(pv_dd, device_queue);
#endif
}

Expand Down