Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions c/parallel/src/radix_sort.cu
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@ struct __align__({3}) values_storage_t {{
{4}
using device_radix_sort_policy = {5};
using namespace cub;
using namespace cub::detail;
using namespace cub::detail::radix_sort;
static_assert(device_radix_sort_policy()(::cuda::arch_id{{CUB_PTX_ARCH / 10}}) == {6}, "Host generated and JIT compiled policy mismatch");
)XXX",
Expand Down
131 changes: 131 additions & 0 deletions cub/cub/agent/single_pass_scan_operators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@

#include <nv/target>

#if !_CCCL_COMPILER(NVRTC)
# include <ostream>
#endif

CUB_NAMESPACE_BEGIN

/******************************************************************************
Expand Down Expand Up @@ -539,6 +543,133 @@ struct exponential_backon_constructor_t
#endif // CUB_ENABLE_POLICY_PTX_JSON
};

enum class delay_constructor_kind
{
no_delay,
fixed_delay,
exponential_backoff,
exponential_backoff_jitter,
exponential_backoff_jitter_window,
exponential_backon_jitter_window,
exponential_backon_jitter,
exponential_backon
};

#if !_CCCL_COMPILER(NVRTC)
inline ::std::ostream& operator<<(::std::ostream& os, delay_constructor_kind kind)
{
switch (kind)
{
case delay_constructor_kind::no_delay:
return os << "delay_constructor_kind::no_delay";
case delay_constructor_kind::fixed_delay:
return os << "delay_constructor_kind::fixed_delay";
case delay_constructor_kind::exponential_backoff:
return os << "delay_constructor_kind::exponential_backoff";
case delay_constructor_kind::exponential_backoff_jitter:
return os << "delay_constructor_kind::exponential_backoff_jitter";
case delay_constructor_kind::exponential_backoff_jitter_window:
return os << "delay_constructor_kind::exponential_backoff_jitter_window";
case delay_constructor_kind::exponential_backon_jitter_window:
return os << "delay_constructor_kind::exponential_backon_jitter_window";
case delay_constructor_kind::exponential_backon_jitter:
return os << "delay_constructor_kind::exponential_backon_jitter";
case delay_constructor_kind::exponential_backon:
return os << "delay_constructor_kind::exponential_backon";
default:
return os << "<unknown delay_constructor_kind: " << static_cast<int>(kind) << ">";
}
}
#endif // !_CCCL_COMPILER(NVRTC)

// Describes the parameters of a delay constructor, used for tuning policies
struct delay_constructor_policy
{
delay_constructor_kind kind;
unsigned int delay;
unsigned int l2_write_latency;

_CCCL_API constexpr friend bool operator==(const delay_constructor_policy& lhs, const delay_constructor_policy& rhs)
{
return lhs.kind == rhs.kind && lhs.delay == rhs.delay && lhs.l2_write_latency == rhs.l2_write_latency;
}

_CCCL_API constexpr friend bool operator!=(const delay_constructor_policy& lhs, const delay_constructor_policy& rhs)
{
return !(lhs == rhs);
}

#if !_CCCL_COMPILER(NVRTC)
friend ::std::ostream& operator<<(::std::ostream& os, const delay_constructor_policy& p)
{
return os << "delay_constructor_policy { .kind = " << p.kind << ", .delay = " << p.delay
<< ", .l2_write_latency = " << p.l2_write_latency << " }";
}
#endif // !_CCCL_COMPILER(NVRTC)
};

// Converts a delay constructor type to its corresponding policy struct
template <typename DelayConstructor>
inline constexpr auto delay_constructor_policy_from_type = 0;

template <unsigned int L2WriteLatency>
inline constexpr auto delay_constructor_policy_from_type<no_delay_constructor_t<L2WriteLatency>> =
delay_constructor_policy{delay_constructor_kind::no_delay, 0, L2WriteLatency};

template <unsigned int Delay, unsigned int L2WriteLatency>
inline constexpr auto delay_constructor_policy_from_type<fixed_delay_constructor_t<Delay, L2WriteLatency>> =
delay_constructor_policy{delay_constructor_kind::fixed_delay, Delay, L2WriteLatency};

template <unsigned int Delay, unsigned int L2WriteLatency>
inline constexpr auto delay_constructor_policy_from_type<exponential_backoff_constructor_t<Delay, L2WriteLatency>> =
delay_constructor_policy{delay_constructor_kind::exponential_backoff, Delay, L2WriteLatency};

template <unsigned int Delay, unsigned int L2WriteLatency>
inline constexpr auto
delay_constructor_policy_from_type<exponential_backoff_jitter_constructor_t<Delay, L2WriteLatency>> =
delay_constructor_policy{delay_constructor_kind::exponential_backoff_jitter, Delay, L2WriteLatency};

template <unsigned int Delay, unsigned int L2WriteLatency>
inline constexpr auto
delay_constructor_policy_from_type<exponential_backoff_jitter_window_constructor_t<Delay, L2WriteLatency>> =
delay_constructor_policy{delay_constructor_kind::exponential_backoff_jitter_window, Delay, L2WriteLatency};

template <unsigned int Delay, unsigned int L2WriteLatency>
inline constexpr auto
delay_constructor_policy_from_type<exponential_backon_jitter_window_constructor_t<Delay, L2WriteLatency>> =
delay_constructor_policy{delay_constructor_kind::exponential_backon_jitter_window, Delay, L2WriteLatency};

template <unsigned int Delay, unsigned int L2WriteLatency>
inline constexpr auto delay_constructor_policy_from_type<exponential_backon_jitter_constructor_t<Delay, L2WriteLatency>> =
delay_constructor_policy{delay_constructor_kind::exponential_backon_jitter, Delay, L2WriteLatency};

template <unsigned int Delay, unsigned int L2WriteLatency>
inline constexpr auto delay_constructor_policy_from_type<exponential_backon_constructor_t<Delay, L2WriteLatency>> =
delay_constructor_policy{delay_constructor_kind::exponential_backon, Delay, L2WriteLatency};

// TODO(bgruber): this is modeled after <look_back_helper.cuh>, unify this
template <delay_constructor_kind Kind, unsigned int Delay, unsigned int L2WriteLatency>
struct __delay_constructor_t_helper
{
private:
using delay_constructors = ::cuda::std::__type_list<
no_delay_constructor_t<L2WriteLatency>,
fixed_delay_constructor_t<Delay, L2WriteLatency>,
exponential_backoff_constructor_t<Delay, L2WriteLatency>,
exponential_backoff_jitter_constructor_t<Delay, L2WriteLatency>,
exponential_backoff_jitter_window_constructor_t<Delay, L2WriteLatency>,
exponential_backon_jitter_window_constructor_t<Delay, L2WriteLatency>,
exponential_backon_jitter_constructor_t<Delay, L2WriteLatency>,
exponential_backon_constructor_t<Delay, L2WriteLatency>>;

public:
using type = ::cuda::std::__type_at_c<static_cast<int>(Kind), delay_constructors>;
};

// Converts delay constructor policy values to the corresponding delay constructor type
template <delay_constructor_kind Kind, unsigned int Delay, unsigned int L2WriteLatency>
using delay_constructor_t = typename __delay_constructor_t_helper<Kind, Delay, L2WriteLatency>::type;

using default_no_delay_constructor_t = no_delay_constructor_t<450>;
using default_no_delay_t = default_no_delay_constructor_t::delay_t;

Expand Down
126 changes: 1 addition & 125 deletions cub/cub/device/dispatch/tuning/tuning_radix_sort.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@
#include <cub/agent/agent_radix_sort_onesweep.cuh>
#include <cub/agent/agent_radix_sort_upsweep.cuh>
#include <cub/agent/agent_scan.cuh>
#include <cub/agent/single_pass_scan_operators.cuh>
#include <cub/device/dispatch/tuning/common.cuh>
#include <cub/util_device.cuh>

#include <cuda/__device/arch_id.h>
#include <cuda/std/optional>

#if !_CCCL_COMPILER(NVRTC)
# include <ostream>
Expand All @@ -31,130 +31,6 @@
CUB_NAMESPACE_BEGIN
namespace detail::radix_sort
{
enum class delay_constructor_kind
{
no_delay,
fixed_delay,
exponential_backoff,
exponential_backoff_jitter,
exponential_backoff_jitter_window,
exponential_backon_jitter_window,
exponential_backon_jitter,
exponential_backon
};

#if !_CCCL_COMPILER(NVRTC)
inline ::std::ostream& operator<<(::std::ostream& os, delay_constructor_kind kind)
{
switch (kind)
{
case delay_constructor_kind::no_delay:
return os << "delay_constructor_kind::no_delay";
case delay_constructor_kind::fixed_delay:
return os << "delay_constructor_kind::fixed_delay";
case delay_constructor_kind::exponential_backoff:
return os << "delay_constructor_kind::exponential_backoff";
case delay_constructor_kind::exponential_backoff_jitter:
return os << "delay_constructor_kind::exponential_backoff_jitter";
case delay_constructor_kind::exponential_backoff_jitter_window:
return os << "delay_constructor_kind::exponential_backoff_jitter_window";
case delay_constructor_kind::exponential_backon_jitter_window:
return os << "delay_constructor_kind::exponential_backon_jitter_window";
case delay_constructor_kind::exponential_backon_jitter:
return os << "delay_constructor_kind::exponential_backon_jitter";
case delay_constructor_kind::exponential_backon:
return os << "delay_constructor_kind::exponential_backon";
default:
return os << "<unknown delay_constructor_kind: " << static_cast<int>(kind) << ">";
}
}
#endif // !_CCCL_COMPILER(NVRTC)

struct delay_constructor_policy
{
delay_constructor_kind kind;
unsigned int delay;
unsigned int l2_write_latency;

_CCCL_API constexpr friend bool operator==(const delay_constructor_policy& lhs, const delay_constructor_policy& rhs)
{
return lhs.kind == rhs.kind && lhs.delay == rhs.delay && lhs.l2_write_latency == rhs.l2_write_latency;
}

_CCCL_API constexpr friend bool operator!=(const delay_constructor_policy& lhs, const delay_constructor_policy& rhs)
{
return !(lhs == rhs);
}

#if !_CCCL_COMPILER(NVRTC)
friend ::std::ostream& operator<<(::std::ostream& os, const delay_constructor_policy& p)
{
return os << "delay_constructor_policy { .kind = " << p.kind << ", .delay = " << p.delay
<< ", .l2_write_latency = " << p.l2_write_latency << " }";
}
#endif // !_CCCL_COMPILER(NVRTC)
};

template <typename DelayConstructor>
inline constexpr auto delay_constructor_policy_from_type = 0;

template <unsigned int L2WriteLatency>
inline constexpr auto delay_constructor_policy_from_type<no_delay_constructor_t<L2WriteLatency>> =
delay_constructor_policy{delay_constructor_kind::no_delay, 0, L2WriteLatency};

template <unsigned int Delay, unsigned int L2WriteLatency>
inline constexpr auto delay_constructor_policy_from_type<fixed_delay_constructor_t<Delay, L2WriteLatency>> =
delay_constructor_policy{delay_constructor_kind::fixed_delay, Delay, L2WriteLatency};

template <unsigned int Delay, unsigned int L2WriteLatency>
inline constexpr auto delay_constructor_policy_from_type<exponential_backoff_constructor_t<Delay, L2WriteLatency>> =
delay_constructor_policy{delay_constructor_kind::exponential_backoff, Delay, L2WriteLatency};

template <unsigned int Delay, unsigned int L2WriteLatency>
inline constexpr auto
delay_constructor_policy_from_type<exponential_backoff_jitter_constructor_t<Delay, L2WriteLatency>> =
delay_constructor_policy{delay_constructor_kind::exponential_backoff_jitter, Delay, L2WriteLatency};

template <unsigned int Delay, unsigned int L2WriteLatency>
inline constexpr auto
delay_constructor_policy_from_type<exponential_backoff_jitter_window_constructor_t<Delay, L2WriteLatency>> =
delay_constructor_policy{delay_constructor_kind::exponential_backoff_jitter_window, Delay, L2WriteLatency};

template <unsigned int Delay, unsigned int L2WriteLatency>
inline constexpr auto
delay_constructor_policy_from_type<exponential_backon_jitter_window_constructor_t<Delay, L2WriteLatency>> =
delay_constructor_policy{delay_constructor_kind::exponential_backon_jitter_window, Delay, L2WriteLatency};

template <unsigned int Delay, unsigned int L2WriteLatency>
inline constexpr auto delay_constructor_policy_from_type<exponential_backon_jitter_constructor_t<Delay, L2WriteLatency>> =
delay_constructor_policy{delay_constructor_kind::exponential_backon_jitter, Delay, L2WriteLatency};

template <unsigned int Delay, unsigned int L2WriteLatency>
inline constexpr auto delay_constructor_policy_from_type<exponential_backon_constructor_t<Delay, L2WriteLatency>> =
delay_constructor_policy{delay_constructor_kind::exponential_backon, Delay, L2WriteLatency};

// TODO(bgruber): this is modeled after <look_back_helper.cuh>, unify this
template <delay_constructor_kind Kind, unsigned int Delay, unsigned int L2WriteLatency>
struct __delay_constructor_t_helper
{
private:
using delay_constructors = ::cuda::std::__type_list<
detail::no_delay_constructor_t<L2WriteLatency>,
detail::fixed_delay_constructor_t<Delay, L2WriteLatency>,
detail::exponential_backoff_constructor_t<Delay, L2WriteLatency>,
detail::exponential_backoff_jitter_constructor_t<Delay, L2WriteLatency>,
detail::exponential_backoff_jitter_window_constructor_t<Delay, L2WriteLatency>,
detail::exponential_backon_jitter_window_constructor_t<Delay, L2WriteLatency>,
detail::exponential_backon_jitter_constructor_t<Delay, L2WriteLatency>,
detail::exponential_backon_constructor_t<Delay, L2WriteLatency>>;

public:
using type = ::cuda::std::__type_at_c<static_cast<int>(Kind), delay_constructors>;
};

template <delay_constructor_kind Kind, unsigned int Delay, unsigned int L2WriteLatency>
using delay_constructor_t = typename __delay_constructor_t_helper<Kind, Delay, L2WriteLatency>::type;

struct radix_sort_histogram_policy
{
int block_threads;
Expand Down
17 changes: 3 additions & 14 deletions nvbench_helper/nvbench_helper/look_back_helper.cuh
Original file line number Diff line number Diff line change
@@ -1,26 +1,15 @@
// SPDX-FileCopyrightText: Copyright (c) 2011-2023, NVIDIA CORPORATION. All rights reserved.
// SPDX-FileCopyrightText: Copyright (c) 2011-2026, NVIDIA CORPORATION. All rights reserved.
// SPDX-License-Identifier: BSD-3

#pragma once

#if !TUNE_BASE
# include <cub/agent/single_pass_scan_operators.cuh>

# include <nvbench_helper.cuh>

# if !defined(TUNE_MAGIC_NS) || !defined(TUNE_L2_WRITE_LATENCY_NS) || !defined(TUNE_DELAY_CONSTRUCTOR_ID)
# error "TUNE_MAGIC_NS, TUNE_L2_WRITE_LATENCY_NS, and TUNE_DELAY_CONSTRUCTOR_ID must be defined"
# endif

using delay_constructors = nvbench::type_list<
cub::detail::no_delay_constructor_t<TUNE_L2_WRITE_LATENCY_NS>,
cub::detail::fixed_delay_constructor_t<TUNE_MAGIC_NS, TUNE_L2_WRITE_LATENCY_NS>,
cub::detail::exponential_backoff_constructor_t<TUNE_MAGIC_NS, TUNE_L2_WRITE_LATENCY_NS>,
cub::detail::exponential_backoff_jitter_constructor_t<TUNE_MAGIC_NS, TUNE_L2_WRITE_LATENCY_NS>,
cub::detail::exponential_backoff_jitter_window_constructor_t<TUNE_MAGIC_NS, TUNE_L2_WRITE_LATENCY_NS>,
cub::detail::exponential_backon_jitter_window_constructor_t<TUNE_MAGIC_NS, TUNE_L2_WRITE_LATENCY_NS>,
cub::detail::exponential_backon_jitter_constructor_t<TUNE_MAGIC_NS, TUNE_L2_WRITE_LATENCY_NS>,
cub::detail::exponential_backon_constructor_t<TUNE_MAGIC_NS, TUNE_L2_WRITE_LATENCY_NS>>;

using delay_constructor_t = nvbench::tl::get<TUNE_DELAY_CONSTRUCTOR_ID, delay_constructors>;
using delay_constructor_t =
cub::detail::delay_constructor_t<TUNE_DELAY_CONSTRUCTOR_ID, TUNE_MAGIC_NS, TUNE_L2_WRITE_LATENCY_NS>;
#endif // !TUNE_BASE
Loading