diff --git a/c/parallel/src/radix_sort.cu b/c/parallel/src/radix_sort.cu index 84db1db7d93..4f4ca7275b2 100644 --- a/c/parallel/src/radix_sort.cu +++ b/c/parallel/src/radix_sort.cu @@ -255,6 +255,7 @@ struct __align__({3}) values_storage_t {{ {4} using device_radix_sort_policy = {5}; using namespace cub; +using namespace cub::detail; using namespace cub::detail::radix_sort; static_assert(device_radix_sort_policy()(::cuda::arch_id{{CUB_PTX_ARCH / 10}}) == {6}, "Host generated and JIT compiled policy mismatch"); )XXX", diff --git a/cub/cub/agent/single_pass_scan_operators.cuh b/cub/cub/agent/single_pass_scan_operators.cuh index bc80b67abc6..1c471e5e4ad 100644 --- a/cub/cub/agent/single_pass_scan_operators.cuh +++ b/cub/cub/agent/single_pass_scan_operators.cuh @@ -33,6 +33,10 @@ #include +#if !_CCCL_COMPILER(NVRTC) +# include +#endif + CUB_NAMESPACE_BEGIN /****************************************************************************** @@ -539,6 +543,133 @@ struct exponential_backon_constructor_t #endif // CUB_ENABLE_POLICY_PTX_JSON }; +enum class delay_constructor_kind +{ + no_delay, + fixed_delay, + exponential_backoff, + exponential_backoff_jitter, + exponential_backoff_jitter_window, + exponential_backon_jitter_window, + exponential_backon_jitter, + exponential_backon +}; + +#if !_CCCL_COMPILER(NVRTC) +inline ::std::ostream& operator<<(::std::ostream& os, delay_constructor_kind kind) +{ + switch (kind) + { + case delay_constructor_kind::no_delay: + return os << "delay_constructor_kind::no_delay"; + case delay_constructor_kind::fixed_delay: + return os << "delay_constructor_kind::fixed_delay"; + case delay_constructor_kind::exponential_backoff: + return os << "delay_constructor_kind::exponential_backoff"; + case delay_constructor_kind::exponential_backoff_jitter: + return os << "delay_constructor_kind::exponential_backoff_jitter"; + case delay_constructor_kind::exponential_backoff_jitter_window: + return os << "delay_constructor_kind::exponential_backoff_jitter_window"; + case delay_constructor_kind::exponential_backon_jitter_window: + return os << "delay_constructor_kind::exponential_backon_jitter_window"; + case delay_constructor_kind::exponential_backon_jitter: + return os << "delay_constructor_kind::exponential_backon_jitter"; + case delay_constructor_kind::exponential_backon: + return os << "delay_constructor_kind::exponential_backon"; + default: + return os << "(kind) << ">"; + } +} +#endif // !_CCCL_COMPILER(NVRTC) + +// Describes the parameters of a delay constructor, used for tuning policies +struct delay_constructor_policy +{ + delay_constructor_kind kind; + unsigned int delay; + unsigned int l2_write_latency; + + _CCCL_API constexpr friend bool operator==(const delay_constructor_policy& lhs, const delay_constructor_policy& rhs) + { + return lhs.kind == rhs.kind && lhs.delay == rhs.delay && lhs.l2_write_latency == rhs.l2_write_latency; + } + + _CCCL_API constexpr friend bool operator!=(const delay_constructor_policy& lhs, const delay_constructor_policy& rhs) + { + return !(lhs == rhs); + } + +#if !_CCCL_COMPILER(NVRTC) + friend ::std::ostream& operator<<(::std::ostream& os, const delay_constructor_policy& p) + { + return os << "delay_constructor_policy { .kind = " << p.kind << ", .delay = " << p.delay + << ", .l2_write_latency = " << p.l2_write_latency << " }"; + } +#endif // !_CCCL_COMPILER(NVRTC) +}; + +// Converts a delay constructor type to its corresponding policy struct +template +inline constexpr auto delay_constructor_policy_from_type = 0; + +template +inline constexpr auto delay_constructor_policy_from_type> = + delay_constructor_policy{delay_constructor_kind::no_delay, 0, L2WriteLatency}; + +template +inline constexpr auto delay_constructor_policy_from_type> = + delay_constructor_policy{delay_constructor_kind::fixed_delay, Delay, L2WriteLatency}; + +template +inline constexpr auto delay_constructor_policy_from_type> = + delay_constructor_policy{delay_constructor_kind::exponential_backoff, Delay, L2WriteLatency}; + +template +inline constexpr auto + delay_constructor_policy_from_type> = + delay_constructor_policy{delay_constructor_kind::exponential_backoff_jitter, Delay, L2WriteLatency}; + +template +inline constexpr auto + delay_constructor_policy_from_type> = + delay_constructor_policy{delay_constructor_kind::exponential_backoff_jitter_window, Delay, L2WriteLatency}; + +template +inline constexpr auto + delay_constructor_policy_from_type> = + delay_constructor_policy{delay_constructor_kind::exponential_backon_jitter_window, Delay, L2WriteLatency}; + +template +inline constexpr auto delay_constructor_policy_from_type> = + delay_constructor_policy{delay_constructor_kind::exponential_backon_jitter, Delay, L2WriteLatency}; + +template +inline constexpr auto delay_constructor_policy_from_type> = + delay_constructor_policy{delay_constructor_kind::exponential_backon, Delay, L2WriteLatency}; + +// TODO(bgruber): this is modeled after , unify this +template +struct __delay_constructor_t_helper +{ +private: + using delay_constructors = ::cuda::std::__type_list< + no_delay_constructor_t, + fixed_delay_constructor_t, + exponential_backoff_constructor_t, + exponential_backoff_jitter_constructor_t, + exponential_backoff_jitter_window_constructor_t, + exponential_backon_jitter_window_constructor_t, + exponential_backon_jitter_constructor_t, + exponential_backon_constructor_t>; + +public: + using type = ::cuda::std::__type_at_c(Kind), delay_constructors>; +}; + +// Converts delay constructor policy values to the corresponding delay constructor type +template +using delay_constructor_t = typename __delay_constructor_t_helper::type; + using default_no_delay_constructor_t = no_delay_constructor_t<450>; using default_no_delay_t = default_no_delay_constructor_t::delay_t; diff --git a/cub/cub/device/dispatch/tuning/tuning_radix_sort.cuh b/cub/cub/device/dispatch/tuning/tuning_radix_sort.cuh index 7ae1e0bde7c..12c1594976d 100644 --- a/cub/cub/device/dispatch/tuning/tuning_radix_sort.cuh +++ b/cub/cub/device/dispatch/tuning/tuning_radix_sort.cuh @@ -18,11 +18,11 @@ #include #include #include +#include #include #include #include -#include #if !_CCCL_COMPILER(NVRTC) # include @@ -31,130 +31,6 @@ CUB_NAMESPACE_BEGIN namespace detail::radix_sort { -enum class delay_constructor_kind -{ - no_delay, - fixed_delay, - exponential_backoff, - exponential_backoff_jitter, - exponential_backoff_jitter_window, - exponential_backon_jitter_window, - exponential_backon_jitter, - exponential_backon -}; - -#if !_CCCL_COMPILER(NVRTC) -inline ::std::ostream& operator<<(::std::ostream& os, delay_constructor_kind kind) -{ - switch (kind) - { - case delay_constructor_kind::no_delay: - return os << "delay_constructor_kind::no_delay"; - case delay_constructor_kind::fixed_delay: - return os << "delay_constructor_kind::fixed_delay"; - case delay_constructor_kind::exponential_backoff: - return os << "delay_constructor_kind::exponential_backoff"; - case delay_constructor_kind::exponential_backoff_jitter: - return os << "delay_constructor_kind::exponential_backoff_jitter"; - case delay_constructor_kind::exponential_backoff_jitter_window: - return os << "delay_constructor_kind::exponential_backoff_jitter_window"; - case delay_constructor_kind::exponential_backon_jitter_window: - return os << "delay_constructor_kind::exponential_backon_jitter_window"; - case delay_constructor_kind::exponential_backon_jitter: - return os << "delay_constructor_kind::exponential_backon_jitter"; - case delay_constructor_kind::exponential_backon: - return os << "delay_constructor_kind::exponential_backon"; - default: - return os << "(kind) << ">"; - } -} -#endif // !_CCCL_COMPILER(NVRTC) - -struct delay_constructor_policy -{ - delay_constructor_kind kind; - unsigned int delay; - unsigned int l2_write_latency; - - _CCCL_API constexpr friend bool operator==(const delay_constructor_policy& lhs, const delay_constructor_policy& rhs) - { - return lhs.kind == rhs.kind && lhs.delay == rhs.delay && lhs.l2_write_latency == rhs.l2_write_latency; - } - - _CCCL_API constexpr friend bool operator!=(const delay_constructor_policy& lhs, const delay_constructor_policy& rhs) - { - return !(lhs == rhs); - } - -#if !_CCCL_COMPILER(NVRTC) - friend ::std::ostream& operator<<(::std::ostream& os, const delay_constructor_policy& p) - { - return os << "delay_constructor_policy { .kind = " << p.kind << ", .delay = " << p.delay - << ", .l2_write_latency = " << p.l2_write_latency << " }"; - } -#endif // !_CCCL_COMPILER(NVRTC) -}; - -template -inline constexpr auto delay_constructor_policy_from_type = 0; - -template -inline constexpr auto delay_constructor_policy_from_type> = - delay_constructor_policy{delay_constructor_kind::no_delay, 0, L2WriteLatency}; - -template -inline constexpr auto delay_constructor_policy_from_type> = - delay_constructor_policy{delay_constructor_kind::fixed_delay, Delay, L2WriteLatency}; - -template -inline constexpr auto delay_constructor_policy_from_type> = - delay_constructor_policy{delay_constructor_kind::exponential_backoff, Delay, L2WriteLatency}; - -template -inline constexpr auto - delay_constructor_policy_from_type> = - delay_constructor_policy{delay_constructor_kind::exponential_backoff_jitter, Delay, L2WriteLatency}; - -template -inline constexpr auto - delay_constructor_policy_from_type> = - delay_constructor_policy{delay_constructor_kind::exponential_backoff_jitter_window, Delay, L2WriteLatency}; - -template -inline constexpr auto - delay_constructor_policy_from_type> = - delay_constructor_policy{delay_constructor_kind::exponential_backon_jitter_window, Delay, L2WriteLatency}; - -template -inline constexpr auto delay_constructor_policy_from_type> = - delay_constructor_policy{delay_constructor_kind::exponential_backon_jitter, Delay, L2WriteLatency}; - -template -inline constexpr auto delay_constructor_policy_from_type> = - delay_constructor_policy{delay_constructor_kind::exponential_backon, Delay, L2WriteLatency}; - -// TODO(bgruber): this is modeled after , unify this -template -struct __delay_constructor_t_helper -{ -private: - using delay_constructors = ::cuda::std::__type_list< - detail::no_delay_constructor_t, - detail::fixed_delay_constructor_t, - detail::exponential_backoff_constructor_t, - detail::exponential_backoff_jitter_constructor_t, - detail::exponential_backoff_jitter_window_constructor_t, - detail::exponential_backon_jitter_window_constructor_t, - detail::exponential_backon_jitter_constructor_t, - detail::exponential_backon_constructor_t>; - -public: - using type = ::cuda::std::__type_at_c(Kind), delay_constructors>; -}; - -template -using delay_constructor_t = typename __delay_constructor_t_helper::type; - struct radix_sort_histogram_policy { int block_threads; diff --git a/nvbench_helper/nvbench_helper/look_back_helper.cuh b/nvbench_helper/nvbench_helper/look_back_helper.cuh index bee6708a781..c7e98f82cd8 100644 --- a/nvbench_helper/nvbench_helper/look_back_helper.cuh +++ b/nvbench_helper/nvbench_helper/look_back_helper.cuh @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright (c) 2011-2023, NVIDIA CORPORATION. All rights reserved. +// SPDX-FileCopyrightText: Copyright (c) 2011-2026, NVIDIA CORPORATION. All rights reserved. // SPDX-License-Identifier: BSD-3 #pragma once @@ -6,21 +6,10 @@ #if !TUNE_BASE # include -# include - # if !defined(TUNE_MAGIC_NS) || !defined(TUNE_L2_WRITE_LATENCY_NS) || !defined(TUNE_DELAY_CONSTRUCTOR_ID) # error "TUNE_MAGIC_NS, TUNE_L2_WRITE_LATENCY_NS, and TUNE_DELAY_CONSTRUCTOR_ID must be defined" # endif -using delay_constructors = nvbench::type_list< - cub::detail::no_delay_constructor_t, - cub::detail::fixed_delay_constructor_t, - cub::detail::exponential_backoff_constructor_t, - cub::detail::exponential_backoff_jitter_constructor_t, - cub::detail::exponential_backoff_jitter_window_constructor_t, - cub::detail::exponential_backon_jitter_window_constructor_t, - cub::detail::exponential_backon_jitter_constructor_t, - cub::detail::exponential_backon_constructor_t>; - -using delay_constructor_t = nvbench::tl::get; +using delay_constructor_t = + cub::detail::delay_constructor_t; #endif // !TUNE_BASE