Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion cmake/rapids_config.cmake
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# =============================================================================
# cmake-format: off
# SPDX-FileCopyrightText: Copyright (c) 2018-2025, NVIDIA CORPORATION.
# SPDX-FileCopyrightText: Copyright (c) 2018-2026, NVIDIA CORPORATION.
# SPDX-License-Identifier: Apache-2.0
# cmake-format: on
# =============================================================================
Expand Down Expand Up @@ -34,6 +34,9 @@ endif()
if(NOT rapids-cmake-branch)
set(rapids-cmake-branch "${_rapids_branch}")
endif()

set(rapids-cmake-repo kingcrimsontianyu/rapids-cmake)
set(rapids-cmake-branch "bump-nvtx-to-3.4.0")
include("${CMAKE_CURRENT_LIST_DIR}/RAPIDS.cmake")

# Don't use sccache-dist for CMake's compiler tests
Expand Down
5 changes: 5 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,11 @@ target_link_options(kvikio PRIVATE "LINKER:--exclude-libs,ALL")

add_library(kvikio::kvikio ALIAS kvikio)

target_include_directories(
kvikio
PUBLIC "$<BUILD_INTERFACE:$<TARGET_PROPERTY:nvtx3::nvtx3-cpp,INTERFACE_INCLUDE_DIRECTORIES>>"
)

target_include_directories(
kvikio
PUBLIC "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
Expand Down
166 changes: 101 additions & 65 deletions cpp/include/kvikio/detail/nvtx.hpp
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
* SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once

#include <cstddef>
#include <cstdint>

#include <nvtx3/nvtx3.hpp>

#include <kvikio/shim/cuda.hpp>
#include <kvikio/utils.hpp>

namespace kvikio {
namespace kvikio::detail {

/**
* @brief Tag type for libkvikio's NVTX domain.
Expand All @@ -20,19 +21,99 @@ struct libkvikio_domain {
static constexpr char const* name{"libkvikio"};
};

using nvtx_scoped_range_type = nvtx3::scoped_range_in<libkvikio_domain>;
using nvtx_registered_string_type = nvtx3::registered_string_in<libkvikio_domain>;
using NvtxScopedRange = nvtx3::scoped_range_in<libkvikio_domain>;
using NvtxRegisteredString = nvtx3::registered_string_in<libkvikio_domain>;
using NvtxColor = nvtx3::color;

/**
* @brief Identifies a group of related NVTX ranges originating from a single pread/pwrite call.
*
* All tasks spawned by the same I/O call share one NvtxCallTag, giving them the same color and call
* index in the profiler timeline. This enables visual correlation of parallel tasks across worker
* threads.
*/
struct NvtxCallTag {
std::uint64_t call_idx{};
NvtxColor color;
};

/**
* @brief Structured NVTX payload for I/O operations.
*
* Attached to NVTX ranges via `nvtx3::payload_data` to provide labeled fields in profiling tools
* such as Nsight Systems (requires NVTX 3.3+ and Nsight Systems 2024.6+).
*/
struct NvtxIoPayload {
NvtxRegisteredString file_path;
std::size_t file_offset;
std::size_t size;
std::uint64_t call_idx;
};

namespace nvtx {

/**
* @brief Return the default color.
*
* @return Default color.
*/
const NvtxColor& default_color() noexcept;

/**
* @brief Return the color at the given index from the internal color palette whose size n is a
* power of 2. The index may exceed the size of the color palette, in which case it wraps around,
* i.e. (idx mod n).
*
* @param idx The index value.
* @return The color picked from the internal color palette.
*/
NvtxColor const& get_color_by_index(std::uint64_t idx) noexcept;

/**
* @brief Create a new call tag for correlating NVTX ranges from a single pread/pwrite call.
*
* Each invocation atomically increments a global counter and derives a color from the counter
* value. The counter wraps around at the maximum value of `std::uint64_t` (well-defined in C++).
*
* @return A call tag with a unique call index and its associated color.
*/
NvtxCallTag next_call_tag();

/**
* @brief Return a registered string with empty content.
*
* Useful as a placeholder for structured payload fields (e.g., file path) that are not yet
* populated.
*
* @return A reference to a statically allocated empty registered string.
*/
NvtxRegisteredString const& get_empty_registered_string();

} // namespace nvtx

} // namespace kvikio::detail

NVTX3_DEFINE_SCHEMA_GET(kvikio::detail::libkvikio_domain,
kvikio::detail::NvtxIoPayload,
"KvikIONvtxIOPayload",
NVTX_PAYLOAD_ENTRIES((file_path,
TYPE_NVTX_REGISTERED_STRING_HANDLE,
"file_path",
"Path to the file"),
(file_offset, TYPE_SIZE, "file_offset", "File offset"),
(size, TYPE_SIZE, "size", "Transferred bytes"),
(call_idx, TYPE_UINT64, "call_idx", "Call index")))

// Macro to concatenate two tokens x and y.
#define KVIKIO_CONCAT_HELPER(x, y) x##y
#define KVIKIO_CONCAT(x, y) KVIKIO_CONCAT_HELPER(x, y)

// Macro to create a static, registered string that will not have a name conflict with any
// registered string defined in the same scope.
#define KVIKIO_REGISTER_STRING(message) \
[](const char* a_message) -> auto& { \
static kvikio::nvtx_registered_string_type a_reg_str{a_message}; \
return a_reg_str; \
#define KVIKIO_REGISTER_STRING(message) \
[](const char* a_message) -> auto& { \
static kvikio::detail::NvtxRegisteredString a_reg_str{a_message}; \
return a_reg_str; \
}(message)

// Implementation of KVIKIO_NVTX_FUNC_RANGE()
Expand All @@ -55,24 +136,24 @@ using nvtx_registered_string_type = nvtx3::registered_string_in<libkvikio_domain
(__VA_ARGS__)

// Implementation of KVIKIO_NVTX_SCOPED_RANGE(...)
#define KVIKIO_NVTX_SCOPED_RANGE_IMPL_1(message) \
kvikio::nvtx_scoped_range_type KVIKIO_CONCAT(_kvikio_nvtx_range, __LINE__) \
{ \
nvtx3::event_attributes \
{ \
KVIKIO_REGISTER_STRING(message), kvikio::NvtxManager::default_color() \
} \
#define KVIKIO_NVTX_SCOPED_RANGE_IMPL_1(message) \
kvikio::detail::NvtxScopedRange KVIKIO_CONCAT(_kvikio_nvtx_range, __LINE__) \
{ \
nvtx3::event_attributes \
{ \
KVIKIO_REGISTER_STRING(message), kvikio::detail::nvtx::default_color() \
} \
}
#define KVIKIO_NVTX_SCOPED_RANGE_IMPL_3(message, payload_v, color) \
kvikio::nvtx_scoped_range_type KVIKIO_CONCAT(_kvikio_nvtx_range, __LINE__) \
kvikio::detail::NvtxScopedRange KVIKIO_CONCAT(_kvikio_nvtx_range, __LINE__) \
{ \
nvtx3::event_attributes \
{ \
KVIKIO_REGISTER_STRING(message), nvtx3::payload{kvikio::convert_to_64bit(payload_v)}, color \
} \
}
#define KVIKIO_NVTX_SCOPED_RANGE_IMPL_2(message, payload) \
KVIKIO_NVTX_SCOPED_RANGE_IMPL_3(message, payload, kvikio::NvtxManager::default_color())
KVIKIO_NVTX_SCOPED_RANGE_IMPL_3(message, payload, kvikio::detail::nvtx::default_color())
#define KVIKIO_NVTX_SCOPED_RANGE_SELECTOR(_1, _2, _3, NAME, ...) NAME
#define KVIKIO_NVTX_SCOPED_RANGE_IMPL(...) \
KVIKIO_NVTX_SCOPED_RANGE_SELECTOR(__VA_ARGS__, \
Expand All @@ -82,53 +163,10 @@ using nvtx_registered_string_type = nvtx3::registered_string_in<libkvikio_domain
(__VA_ARGS__)

// Implementation of KVIKIO_NVTX_MARKER(message, payload)
#define KVIKIO_NVTX_MARKER_IMPL(message, payload_v) \
nvtx3::mark_in<kvikio::libkvikio_domain>(nvtx3::event_attributes{ \
#define KVIKIO_NVTX_MARKER_IMPL(message, payload_v) \
nvtx3::mark_in<kvikio::detail::libkvikio_domain>(nvtx3::event_attributes{ \
KVIKIO_REGISTER_STRING(message), nvtx3::payload{kvikio::convert_to_64bit(payload_v)}})

using nvtx_color_type = nvtx3::color;

/**
* @brief Utility singleton class for NVTX annotation.
*/
class NvtxManager {
public:
static NvtxManager& instance() noexcept;

/**
* @brief Return the default color.
*
* @return Default color.
*/
static const nvtx_color_type& default_color() noexcept;

/**
* @brief Return the color at the given index from the internal color palette whose size n is a
* power of 2. The index may exceed the size of the color palette, in which case it wraps around,
* i.e. (idx mod n).
*
* @param idx The index value.
* @return The color picked from the internal color palette.
*/
static const nvtx_color_type& get_color_by_index(std::uint64_t idx) noexcept;

/**
* @brief Rename the current thread under the KvikIO NVTX domain.
*
* @note This NVTX feature is currently not supported by the Nsight System profiler. As a result,
* the OS thread will not be renamed in the nsys-ui.
*/
static void rename_current_thread(std::string_view new_name) noexcept;

NvtxManager(NvtxManager const&) = delete;
NvtxManager& operator=(NvtxManager const&) = delete;
NvtxManager(NvtxManager&&) = delete;
NvtxManager& operator=(NvtxManager&&) = delete;

private:
NvtxManager() = default;
};

/**
* @brief Convenience macro for generating an NVTX range in the `libkvikio` domain from the lifetime
* of a function. Can be used inside a regular function or a lambda expression.
Expand All @@ -155,7 +193,7 @@ class NvtxManager {
*
* void some_function(){
* // Specify payload and color
* auto const nvtx3::rgb color{0, 255, 0};
* nvtx3::rgb const color{0, 255, 0};
* KVIKIO_NVTX_FUNC_RANGE(4096, color);
* ...
* }
Expand Down Expand Up @@ -201,5 +239,3 @@ class NvtxManager {
* ```
*/
#define KVIKIO_NVTX_MARKER(message, payload) KVIKIO_NVTX_MARKER_IMPL(message, payload)

} // namespace kvikio
Loading