diff --git a/cmake/rapids_config.cmake b/cmake/rapids_config.cmake index 9cda1f26e4..3d0f3400b9 100644 --- a/cmake/rapids_config.cmake +++ b/cmake/rapids_config.cmake @@ -1,6 +1,6 @@ # ============================================================================= # cmake-format: off -# SPDX-FileCopyrightText: Copyright (c) 2018-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2018-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # cmake-format: on # ============================================================================= @@ -34,6 +34,9 @@ endif() if(NOT rapids-cmake-branch) set(rapids-cmake-branch "${_rapids_branch}") endif() + +set(rapids-cmake-repo kingcrimsontianyu/rapids-cmake) +set(rapids-cmake-branch "bump-nvtx-to-3.4.0") include("${CMAKE_CURRENT_LIST_DIR}/RAPIDS.cmake") # Don't use sccache-dist for CMake's compiler tests diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 9dac81c12d..0c94c79c27 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -180,6 +180,11 @@ target_link_options(kvikio PRIVATE "LINKER:--exclude-libs,ALL") add_library(kvikio::kvikio ALIAS kvikio) +target_include_directories( + kvikio + PUBLIC "$>" +) + target_include_directories( kvikio PUBLIC "$" diff --git a/cpp/include/kvikio/detail/nvtx.hpp b/cpp/include/kvikio/detail/nvtx.hpp index ef41681c55..777bf1f694 100644 --- a/cpp/include/kvikio/detail/nvtx.hpp +++ b/cpp/include/kvikio/detail/nvtx.hpp @@ -1,9 +1,10 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #pragma once +#include #include #include @@ -11,7 +12,7 @@ #include #include -namespace kvikio { +namespace kvikio::detail { /** * @brief Tag type for libkvikio's NVTX domain. @@ -20,8 +21,88 @@ struct libkvikio_domain { static constexpr char const* name{"libkvikio"}; }; -using nvtx_scoped_range_type = nvtx3::scoped_range_in; -using nvtx_registered_string_type = nvtx3::registered_string_in; +using NvtxScopedRange = nvtx3::scoped_range_in; +using NvtxRegisteredString = nvtx3::registered_string_in; +using NvtxColor = nvtx3::color; + +/** + * @brief Identifies a group of related NVTX ranges originating from a single pread/pwrite call. + * + * All tasks spawned by the same I/O call share one NvtxCallTag, giving them the same color and call + * index in the profiler timeline. This enables visual correlation of parallel tasks across worker + * threads. + */ +struct NvtxCallTag { + std::uint64_t call_idx{}; + NvtxColor color; +}; + +/** + * @brief Structured NVTX payload for I/O operations. + * + * Attached to NVTX ranges via `nvtx3::payload_data` to provide labeled fields in profiling tools + * such as Nsight Systems (requires NVTX 3.3+ and Nsight Systems 2024.6+). + */ +struct NvtxIoPayload { + NvtxRegisteredString file_path; + std::size_t file_offset; + std::size_t size; + std::uint64_t call_idx; +}; + +namespace nvtx { + +/** + * @brief Return the default color. + * + * @return Default color. + */ +const NvtxColor& default_color() noexcept; + +/** + * @brief Return the color at the given index from the internal color palette whose size n is a + * power of 2. The index may exceed the size of the color palette, in which case it wraps around, + * i.e. (idx mod n). + * + * @param idx The index value. + * @return The color picked from the internal color palette. + */ +NvtxColor const& get_color_by_index(std::uint64_t idx) noexcept; + +/** + * @brief Create a new call tag for correlating NVTX ranges from a single pread/pwrite call. + * + * Each invocation atomically increments a global counter and derives a color from the counter + * value. The counter wraps around at the maximum value of `std::uint64_t` (well-defined in C++). + * + * @return A call tag with a unique call index and its associated color. + */ +NvtxCallTag next_call_tag(); + +/** + * @brief Return a registered string with empty content. + * + * Useful as a placeholder for structured payload fields (e.g., file path) that are not yet + * populated. + * + * @return A reference to a statically allocated empty registered string. + */ +NvtxRegisteredString const& get_empty_registered_string(); + +} // namespace nvtx + +} // namespace kvikio::detail + +NVTX3_DEFINE_SCHEMA_GET(kvikio::detail::libkvikio_domain, + kvikio::detail::NvtxIoPayload, + "KvikIONvtxIOPayload", + NVTX_PAYLOAD_ENTRIES((file_path, + TYPE_NVTX_REGISTERED_STRING_HANDLE, + "file_path", + "Path to the file"), + (file_offset, TYPE_SIZE, "file_offset", "File offset"), + (size, TYPE_SIZE, "size", "Transferred bytes"), + (call_idx, TYPE_UINT64, "call_idx", "Call index"))) // Macro to concatenate two tokens x and y. #define KVIKIO_CONCAT_HELPER(x, y) x##y @@ -29,10 +110,10 @@ using nvtx_registered_string_type = nvtx3::registered_string_in auto& { \ - static kvikio::nvtx_registered_string_type a_reg_str{a_message}; \ - return a_reg_str; \ +#define KVIKIO_REGISTER_STRING(message) \ + [](const char* a_message) -> auto& { \ + static kvikio::detail::NvtxRegisteredString a_reg_str{a_message}; \ + return a_reg_str; \ }(message) // Implementation of KVIKIO_NVTX_FUNC_RANGE() @@ -55,16 +136,16 @@ using nvtx_registered_string_type = nvtx3::registered_string_in(nvtx3::event_attributes{ \ +#define KVIKIO_NVTX_MARKER_IMPL(message, payload_v) \ + nvtx3::mark_in(nvtx3::event_attributes{ \ KVIKIO_REGISTER_STRING(message), nvtx3::payload{kvikio::convert_to_64bit(payload_v)}}) -using nvtx_color_type = nvtx3::color; - -/** - * @brief Utility singleton class for NVTX annotation. - */ -class NvtxManager { - public: - static NvtxManager& instance() noexcept; - - /** - * @brief Return the default color. - * - * @return Default color. - */ - static const nvtx_color_type& default_color() noexcept; - - /** - * @brief Return the color at the given index from the internal color palette whose size n is a - * power of 2. The index may exceed the size of the color palette, in which case it wraps around, - * i.e. (idx mod n). - * - * @param idx The index value. - * @return The color picked from the internal color palette. - */ - static const nvtx_color_type& get_color_by_index(std::uint64_t idx) noexcept; - - /** - * @brief Rename the current thread under the KvikIO NVTX domain. - * - * @note This NVTX feature is currently not supported by the Nsight System profiler. As a result, - * the OS thread will not be renamed in the nsys-ui. - */ - static void rename_current_thread(std::string_view new_name) noexcept; - - NvtxManager(NvtxManager const&) = delete; - NvtxManager& operator=(NvtxManager const&) = delete; - NvtxManager(NvtxManager&&) = delete; - NvtxManager& operator=(NvtxManager&&) = delete; - - private: - NvtxManager() = default; -}; - /** * @brief Convenience macro for generating an NVTX range in the `libkvikio` domain from the lifetime * of a function. Can be used inside a regular function or a lambda expression. @@ -155,7 +193,7 @@ class NvtxManager { * * void some_function(){ * // Specify payload and color - * auto const nvtx3::rgb color{0, 255, 0}; + * nvtx3::rgb const color{0, 255, 0}; * KVIKIO_NVTX_FUNC_RANGE(4096, color); * ... * } @@ -201,5 +239,3 @@ class NvtxManager { * ``` */ #define KVIKIO_NVTX_MARKER(message, payload) KVIKIO_NVTX_MARKER_IMPL(message, payload) - -} // namespace kvikio diff --git a/cpp/include/kvikio/detail/parallel_operation.hpp b/cpp/include/kvikio/detail/parallel_operation.hpp index 1d3c43d287..187dfbf624 100644 --- a/cpp/include/kvikio/detail/parallel_operation.hpp +++ b/cpp/include/kvikio/detail/parallel_operation.hpp @@ -1,15 +1,12 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #pragma once -#include #include #include #include -#include -#include #include #include #include @@ -47,24 +44,6 @@ auto make_copyable_lambda(F op) [sp](auto&&... args) -> decltype(auto) { return (*sp)(std::forward(args)...); }; } -/** - * @brief Determine the NVTX color and call index. They are used to identify tasks from different - * pread/pwrite calls. Tasks from the same pread/pwrite call are given the same color and call - * index. The call index is atomically incremented on each pread/pwrite call, and will wrap around - * once it reaches the maximum value the integer type `std::uint64_t` can hold (this overflow - * behavior is well-defined in C++). The color is picked from an internal color palette according to - * the call index value. - * - * @return A pair of NVTX color and call index. - */ -inline const std::pair get_next_color_and_call_idx() noexcept -{ - static std::atomic_uint64_t call_counter{1ull}; - auto call_idx = call_counter.fetch_add(1ull, std::memory_order_relaxed); - auto& nvtx_color = NvtxManager::get_color_by_index(call_idx); - return {nvtx_color, call_idx}; -} - /** * @brief Submit the task callable to the underlying thread pool. * @@ -76,9 +55,8 @@ std::future submit_task(F op, std::size_t size, std::size_t file_offset, std::size_t devPtr_offset, - ThreadPool* thread_pool = &defaults::thread_pool(), - std::uint64_t nvtx_payload = 0ull, - nvtx_color_type nvtx_color = NvtxManager::default_color()) + ThreadPool* thread_pool = &defaults::thread_pool(), + detail::NvtxCallTag nvtx_call_tag = {}) { static_assert(std::is_invocable_r_v submit_task(F op, decltype(devPtr_offset)>); return thread_pool->submit_task([=] { - KVIKIO_NVTX_SCOPED_RANGE("task", nvtx_payload, nvtx_color); + detail::NvtxIoPayload info{ + detail::nvtx::get_empty_registered_string(), file_offset, size, nvtx_call_tag.call_idx}; + nvtx3::payload_data payload_data(info); + static detail::NvtxRegisteredString message{"Task"}; + detail::NvtxScopedRange range( + nvtx3::event_attributes(message, payload_data, nvtx_call_tag.color)); return op(buf, size, file_offset, devPtr_offset); }); } @@ -101,18 +84,12 @@ std::future submit_task(F op, * @return A future to be used later to check if the operation has finished its execution. */ template -std::future submit_move_only_task( - F op_move_only, - ThreadPool* thread_pool = &defaults::thread_pool(), - std::uint64_t nvtx_payload = 0ull, - nvtx_color_type nvtx_color = NvtxManager::default_color()) +std::future submit_move_only_task(F op_move_only, + ThreadPool* thread_pool = &defaults::thread_pool()) { static_assert(std::is_invocable_r_v); auto op_copyable = make_copyable_lambda(std::move(op_move_only)); - return thread_pool->submit_task([=] { - KVIKIO_NVTX_SCOPED_RANGE("task", nvtx_payload, nvtx_color); - return op_copyable(); - }); + return thread_pool->submit_task([=] { return op_copyable(); }); } } // namespace detail @@ -140,9 +117,8 @@ std::future parallel_io(F op, std::size_t file_offset, std::size_t task_size, std::size_t devPtr_offset, - ThreadPool* thread_pool = &defaults::thread_pool(), - std::uint64_t call_idx = 0, - nvtx_color_type nvtx_color = NvtxManager::default_color()) + ThreadPool* thread_pool = &defaults::thread_pool(), + detail::NvtxCallTag nvtx_call_tag = {}) { KVIKIO_EXPECT(task_size > 0, "`task_size` must be positive", std::invalid_argument); KVIKIO_EXPECT(thread_pool != nullptr, "The thread pool must not be nullptr"); @@ -156,7 +132,7 @@ std::future parallel_io(F op, // Single-task guard if (task_size >= size || get_page_size() >= size) { return detail::submit_task( - op, buf, size, file_offset, devPtr_offset, thread_pool, call_idx, nvtx_color); + op, buf, size, file_offset, devPtr_offset, thread_pool, nvtx_call_tag); } std::vector> tasks; @@ -165,7 +141,7 @@ std::future parallel_io(F op, // 1) Submit all tasks but the last one. These are all `task_size` sized tasks. while (size > task_size) { tasks.push_back(detail::submit_task( - op, buf, task_size, file_offset, devPtr_offset, thread_pool, call_idx, nvtx_color)); + op, buf, task_size, file_offset, devPtr_offset, thread_pool, nvtx_call_tag)); file_offset += task_size; devPtr_offset += task_size; size -= task_size; @@ -174,13 +150,19 @@ std::future parallel_io(F op, // 2) Submit the last task, which consists of performing the last I/O and waiting the previous // tasks. auto last_task = [=, tasks = std::move(tasks)]() mutable -> std::size_t { + detail::NvtxIoPayload info{ + detail::nvtx::get_empty_registered_string(), file_offset, size, nvtx_call_tag.call_idx}; + nvtx3::payload_data payload_data(info); + static detail::NvtxRegisteredString message{"Last task"}; + detail::NvtxScopedRange range( + nvtx3::event_attributes(message, payload_data, nvtx_call_tag.color)); auto ret = op(buf, size, file_offset, devPtr_offset); for (auto& task : tasks) { ret += task.get(); } return ret; }; - return detail::submit_move_only_task(std::move(last_task), thread_pool, call_idx, nvtx_color); + return detail::submit_move_only_task(std::move(last_task), thread_pool); } } // namespace kvikio diff --git a/cpp/include/kvikio/detail/posix_io.hpp b/cpp/include/kvikio/detail/posix_io.hpp index 89aba27819..6600b3b545 100644 --- a/cpp/include/kvikio/detail/posix_io.hpp +++ b/cpp/include/kvikio/detail/posix_io.hpp @@ -82,12 +82,15 @@ ssize_t posix_host_io( size_t bytes_remaining = count; char* buffer = const_cast(static_cast(buf)); auto const page_size = get_page_size(); + nvtx3::rgb const color_bio{255, 128, 128}; + nvtx3::rgb const color_dio{128, 255, 128}; // Process all bytes in a loop (unless PartialIO::YES returns early) while (bytes_remaining > 0) { ssize_t nbytes_processed{}; if (fd_direct_on == -1) { + KVIKIO_NVTX_SCOPED_RANGE("Buffered I/O", bytes_remaining, color_bio); // Direct I/O disabled: use buffered I/O for entire transfer nbytes_processed = pread_or_write(fd_direct_off, buffer, bytes_remaining, cur_offset); } else { @@ -99,9 +102,11 @@ ssize_t posix_host_io( // This ensures subsequent iterations will have page-aligned offsets auto const aligned_cur_offset = detail::align_up(cur_offset, page_size); auto const bytes_requested = std::min(aligned_cur_offset - cur_offset, bytes_remaining); + KVIKIO_NVTX_SCOPED_RANGE("Buffered I/O", bytes_requested, color_bio); nbytes_processed = pread_or_write(fd_direct_off, buffer, bytes_requested, cur_offset); } else { if (bytes_remaining < page_size) { + KVIKIO_NVTX_SCOPED_RANGE("Buffered I/O", bytes_remaining, color_bio); // Handle unaligned suffix: remaining bytes are less than a page, use buffered I/O nbytes_processed = pread_or_write(fd_direct_off, buffer, bytes_remaining, cur_offset); } else { @@ -122,15 +127,19 @@ ssize_t posix_host_io( std::memcpy(aligned_buf, buffer, bytes_requested); } - // Perform Direct I/O using the bounce buffer - nbytes_processed = - pread_or_write(fd_direct_on, aligned_buf, bytes_requested, cur_offset); + { + KVIKIO_NVTX_SCOPED_RANGE("Direct I/O", bytes_requested, color_dio); + // Perform Direct I/O using the bounce buffer + nbytes_processed = + pread_or_write(fd_direct_on, aligned_buf, bytes_requested, cur_offset); + } if constexpr (Operation == IOOperationType::READ) { // Copy data from bounce buffer to user buffer after Direct I/O read std::memcpy(buffer, aligned_buf, nbytes_processed); } } else { + KVIKIO_NVTX_SCOPED_RANGE("Direct I/O", bytes_requested, color_dio); // Buffer is page-aligned: perform Direct I/O directly with user buffer nbytes_processed = pread_or_write(fd_direct_on, buffer, bytes_requested, cur_offset); } diff --git a/cpp/src/detail/nvtx.cpp b/cpp/src/detail/nvtx.cpp index 45809ebab8..05cd0720f4 100644 --- a/cpp/src/detail/nvtx.cpp +++ b/cpp/src/detail/nvtx.cpp @@ -1,70 +1,61 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #include #include -#include #include #include -namespace kvikio { +namespace kvikio::detail { -NvtxManager& NvtxManager::instance() noexcept +namespace nvtx { +const NvtxColor& default_color() noexcept { - static NvtxManager _instance; - return _instance; -} - -const nvtx_color_type& NvtxManager::default_color() noexcept -{ - static nvtx_color_type default_color{nvtx3::argb{0, 255, 255, 255}}; + static NvtxColor default_color{nvtx3::argb{0, 255, 255, 255}}; return default_color; } -const nvtx_color_type& NvtxManager::get_color_by_index(std::uint64_t idx) noexcept +const NvtxColor& get_color_by_index(std::uint64_t idx) noexcept { constexpr std::size_t num_color{16}; static_assert((num_color & (num_color - 1)) == 0); // Is power of 2 - static std::array color_palette = {nvtx3::rgb{106, 192, 67}, - nvtx3::rgb{191, 73, 203}, - nvtx3::rgb{93, 151, 76}, - nvtx3::rgb{96, 72, 194}, - nvtx3::rgb{179, 170, 71}, - nvtx3::rgb{92, 58, 113}, - nvtx3::rgb{212, 136, 57}, - nvtx3::rgb{96, 144, 194}, - nvtx3::rgb{211, 69, 56}, - nvtx3::rgb{97, 179, 155}, - nvtx3::rgb{203, 69, 131}, - nvtx3::rgb{57, 89, 48}, - nvtx3::rgb{184, 133, 199}, - nvtx3::rgb{128, 102, 51}, - nvtx3::rgb{211, 138, 130}, - nvtx3::rgb{122, 50, 49}}; - auto safe_idx = idx & (num_color - 1); // idx % num_color + static std::array color_palette = {nvtx3::rgb{106, 192, 67}, + nvtx3::rgb{191, 73, 203}, + nvtx3::rgb{93, 151, 76}, + nvtx3::rgb{96, 72, 194}, + nvtx3::rgb{179, 170, 71}, + nvtx3::rgb{92, 58, 113}, + nvtx3::rgb{212, 136, 57}, + nvtx3::rgb{96, 144, 194}, + nvtx3::rgb{211, 69, 56}, + nvtx3::rgb{97, 179, 155}, + nvtx3::rgb{203, 69, 131}, + nvtx3::rgb{57, 89, 48}, + nvtx3::rgb{184, 133, 199}, + nvtx3::rgb{128, 102, 51}, + nvtx3::rgb{211, 138, 130}, + nvtx3::rgb{122, 50, 49}}; + auto safe_idx = idx & (num_color - 1); // idx % num_color return color_palette[safe_idx]; } -void NvtxManager::rename_current_thread(std::string_view new_name) noexcept +NvtxCallTag next_call_tag() { - auto tid = syscall(SYS_gettid); - std::stringstream ss; - ss << new_name << " (" << tid << ")"; + static std::atomic_uint64_t call_counter{1ull}; + auto call_idx = call_counter.fetch_add(1ull, std::memory_order_relaxed); + auto& nvtx_color = get_color_by_index(call_idx); + return {call_idx, nvtx_color}; +} - nvtxResourceAttributes_t attribs = {0}; - attribs.version = NVTX_VERSION; - attribs.size = NVTX_RESOURCE_ATTRIB_STRUCT_SIZE; - attribs.identifierType = NVTX_RESOURCE_TYPE_GENERIC_THREAD_NATIVE; - attribs.identifier.ullValue = tid; - attribs.messageType = NVTX_MESSAGE_TYPE_ASCII; - auto st = ss.str(); - attribs.message.ascii = st.c_str(); - nvtxResourceHandle_t handle = - nvtxDomainResourceCreate(nvtx3::domain::get(), &attribs); +NvtxRegisteredString const& get_empty_registered_string() +{ + static NvtxRegisteredString s(""); + return s; } +} // namespace nvtx -} // namespace kvikio +} // namespace kvikio::detail diff --git a/cpp/src/file_handle.cpp b/cpp/src/file_handle.cpp index b978b49dc3..6357b7d133 100644 --- a/cpp/src/file_handle.cpp +++ b/cpp/src/file_handle.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -230,8 +230,14 @@ std::future FileHandle::pread(void* buf, (_thread_pool != nullptr && thread_pool == &defaults::thread_pool()) ? _thread_pool : thread_pool; - auto& [nvtx_color, call_idx] = detail::get_next_color_and_call_idx(); - KVIKIO_NVTX_FUNC_RANGE(size, nvtx_color); + auto nvtx_call_tag = detail::nvtx::next_call_tag(); + detail::NvtxIoPayload info{ + detail::nvtx::get_empty_registered_string(), file_offset, size, nvtx_call_tag.call_idx}; + nvtx3::payload_data payload_data(info); + static detail::NvtxRegisteredString message{__PRETTY_FUNCTION__}; + detail::NvtxScopedRange range( + nvtx3::event_attributes(message, payload_data, nvtx_call_tag.color)); + if (is_host_memory(buf)) { auto op = [this](void* hostPtr_base, std::size_t size, @@ -242,8 +248,7 @@ std::future FileHandle::pread(void* buf, _file_direct_off.fd(), buf, size, file_offset, _file_direct_on.fd()); }; - return parallel_io( - op, buf, size, file_offset, task_size, 0, actual_thread_pool, call_idx, nvtx_color); + return parallel_io(op, buf, size, file_offset, task_size, 0, actual_thread_pool, nvtx_call_tag); } CUcontext ctx = get_context_from_pointer(buf); @@ -280,8 +285,7 @@ std::future FileHandle::pread(void* buf, task_size, devPtr_offset, actual_thread_pool, - call_idx, - nvtx_color); + nvtx_call_tag); } std::future FileHandle::pwrite(void const* buf, @@ -300,8 +304,8 @@ std::future FileHandle::pwrite(void const* buf, (_thread_pool != nullptr && thread_pool == &defaults::thread_pool()) ? _thread_pool : thread_pool; - auto& [nvtx_color, call_idx] = detail::get_next_color_and_call_idx(); - KVIKIO_NVTX_FUNC_RANGE(size, nvtx_color); + auto nvtx_call_tag = detail::nvtx::next_call_tag(); + KVIKIO_NVTX_FUNC_RANGE(size, nvtx_call_tag.color); if (is_host_memory(buf)) { auto op = [this](void const* hostPtr_base, std::size_t size, @@ -312,8 +316,7 @@ std::future FileHandle::pwrite(void const* buf, _file_direct_off.fd(), buf, size, file_offset, _file_direct_on.fd()); }; - return parallel_io( - op, buf, size, file_offset, task_size, 0, actual_thread_pool, call_idx, nvtx_color); + return parallel_io(op, buf, size, file_offset, task_size, 0, actual_thread_pool, nvtx_call_tag); } CUcontext ctx = get_context_from_pointer(buf); @@ -350,8 +353,7 @@ std::future FileHandle::pwrite(void const* buf, task_size, devPtr_offset, actual_thread_pool, - call_idx, - nvtx_color); + nvtx_call_tag); } void FileHandle::read_async(void* devPtr_base, diff --git a/cpp/src/mmap.cpp b/cpp/src/mmap.cpp index 9689f08aa8..b538e2c542 100644 --- a/cpp/src/mmap.cpp +++ b/cpp/src/mmap.cpp @@ -424,8 +424,8 @@ std::future MmapHandle::pread(void* buf, auto actual_size = validate_and_adjust_read_args(size, offset); if (actual_size == 0) { return make_ready_future(actual_size); } - auto& [nvtx_color, call_idx] = detail::get_next_color_and_call_idx(); - KVIKIO_NVTX_FUNC_RANGE(actual_size, nvtx_color); + auto nvtx_call_tag = detail::nvtx::next_call_tag(); + KVIKIO_NVTX_FUNC_RANGE(actual_size, nvtx_call_tag.color); auto const is_dst_buf_host_mem = is_host_memory(buf); CUcontext ctx{}; @@ -451,8 +451,7 @@ std::future MmapHandle::pread(void* buf, task_size, 0, // dst buffer offset initial value thread_pool, - call_idx, - nvtx_color); + nvtx_call_tag); } std::size_t MmapHandle::validate_and_adjust_read_args(std::optional const& size, diff --git a/cpp/src/remote_handle.cpp b/cpp/src/remote_handle.cpp index 210ee9a31c..b19a846b02 100644 --- a/cpp/src/remote_handle.cpp +++ b/cpp/src/remote_handle.cpp @@ -813,7 +813,7 @@ std::future RemoteHandle::pread(void* buf, ThreadPool* thread_pool) { KVIKIO_EXPECT(thread_pool != nullptr, "The thread pool must not be nullptr"); - auto& [nvtx_color, call_idx] = detail::get_next_color_and_call_idx(); + auto nvtx_call_tag = detail::nvtx::next_call_tag(); KVIKIO_NVTX_FUNC_RANGE(size); auto task = [this](void* devPtr_base, std::size_t size, @@ -821,7 +821,7 @@ std::future RemoteHandle::pread(void* buf, std::size_t devPtr_offset) -> std::size_t { return read(static_cast(devPtr_base) + devPtr_offset, size, file_offset); }; - return parallel_io(task, buf, size, file_offset, task_size, 0, thread_pool, call_idx, nvtx_color); + return parallel_io(task, buf, size, file_offset, task_size, 0, thread_pool, nvtx_call_tag); } } // namespace kvikio