From 6ff94fd6c679784aa1693e9a8f337e7614f8e93f Mon Sep 17 00:00:00 2001 From: Tianyu Liu Date: Mon, 23 Feb 2026 17:07:40 -0500 Subject: [PATCH 1/7] Improve NVTX --- cpp/include/kvikio/detail/posix_io.hpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/cpp/include/kvikio/detail/posix_io.hpp b/cpp/include/kvikio/detail/posix_io.hpp index 89aba27819..6600b3b545 100644 --- a/cpp/include/kvikio/detail/posix_io.hpp +++ b/cpp/include/kvikio/detail/posix_io.hpp @@ -82,12 +82,15 @@ ssize_t posix_host_io( size_t bytes_remaining = count; char* buffer = const_cast(static_cast(buf)); auto const page_size = get_page_size(); + nvtx3::rgb const color_bio{255, 128, 128}; + nvtx3::rgb const color_dio{128, 255, 128}; // Process all bytes in a loop (unless PartialIO::YES returns early) while (bytes_remaining > 0) { ssize_t nbytes_processed{}; if (fd_direct_on == -1) { + KVIKIO_NVTX_SCOPED_RANGE("Buffered I/O", bytes_remaining, color_bio); // Direct I/O disabled: use buffered I/O for entire transfer nbytes_processed = pread_or_write(fd_direct_off, buffer, bytes_remaining, cur_offset); } else { @@ -99,9 +102,11 @@ ssize_t posix_host_io( // This ensures subsequent iterations will have page-aligned offsets auto const aligned_cur_offset = detail::align_up(cur_offset, page_size); auto const bytes_requested = std::min(aligned_cur_offset - cur_offset, bytes_remaining); + KVIKIO_NVTX_SCOPED_RANGE("Buffered I/O", bytes_requested, color_bio); nbytes_processed = pread_or_write(fd_direct_off, buffer, bytes_requested, cur_offset); } else { if (bytes_remaining < page_size) { + KVIKIO_NVTX_SCOPED_RANGE("Buffered I/O", bytes_remaining, color_bio); // Handle unaligned suffix: remaining bytes are less than a page, use buffered I/O nbytes_processed = pread_or_write(fd_direct_off, buffer, bytes_remaining, cur_offset); } else { @@ -122,15 +127,19 @@ ssize_t posix_host_io( std::memcpy(aligned_buf, buffer, bytes_requested); } - // Perform Direct I/O using the bounce buffer - nbytes_processed = - pread_or_write(fd_direct_on, aligned_buf, bytes_requested, cur_offset); + { + KVIKIO_NVTX_SCOPED_RANGE("Direct I/O", bytes_requested, color_dio); + // Perform Direct I/O using the bounce buffer + nbytes_processed = + pread_or_write(fd_direct_on, aligned_buf, bytes_requested, cur_offset); + } if constexpr (Operation == IOOperationType::READ) { // Copy data from bounce buffer to user buffer after Direct I/O read std::memcpy(buffer, aligned_buf, nbytes_processed); } } else { + KVIKIO_NVTX_SCOPED_RANGE("Direct I/O", bytes_requested, color_dio); // Buffer is page-aligned: perform Direct I/O directly with user buffer nbytes_processed = pread_or_write(fd_direct_on, buffer, bytes_requested, cur_offset); } From 8a125a157ad67d21b89bd6345f2330fe287d274a Mon Sep 17 00:00:00 2001 From: Tianyu Liu Date: Mon, 23 Feb 2026 19:33:00 -0500 Subject: [PATCH 2/7] Update --- cpp/include/kvikio/detail/nvtx.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/kvikio/detail/nvtx.hpp b/cpp/include/kvikio/detail/nvtx.hpp index ef41681c55..a92b5879da 100644 --- a/cpp/include/kvikio/detail/nvtx.hpp +++ b/cpp/include/kvikio/detail/nvtx.hpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #pragma once @@ -155,7 +155,7 @@ class NvtxManager { * * void some_function(){ * // Specify payload and color - * auto const nvtx3::rgb color{0, 255, 0}; + * nvtx3::rgb const color{0, 255, 0}; * KVIKIO_NVTX_FUNC_RANGE(4096, color); * ... * } From 22420f2e7bfd928c0e2847ae11db49116a66a59c Mon Sep 17 00:00:00 2001 From: Tianyu Liu Date: Tue, 24 Feb 2026 17:49:30 -0500 Subject: [PATCH 3/7] Update --- cmake/rapids_config.cmake | 5 +- cpp/include/kvikio/detail/nvtx.hpp | 59 ++++++++------ .../kvikio/detail/parallel_operation.hpp | 44 +++++----- cpp/include/kvikio/file_handle.hpp | 3 +- cpp/src/detail/nvtx.cpp | 80 ++++++++++--------- cpp/src/file_handle.cpp | 47 +++++------ cpp/src/mmap.cpp | 7 +- cpp/src/remote_handle.cpp | 4 +- 8 files changed, 127 insertions(+), 122 deletions(-) diff --git a/cmake/rapids_config.cmake b/cmake/rapids_config.cmake index 9cda1f26e4..3d0f3400b9 100644 --- a/cmake/rapids_config.cmake +++ b/cmake/rapids_config.cmake @@ -1,6 +1,6 @@ # ============================================================================= # cmake-format: off -# SPDX-FileCopyrightText: Copyright (c) 2018-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2018-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # cmake-format: on # ============================================================================= @@ -34,6 +34,9 @@ endif() if(NOT rapids-cmake-branch) set(rapids-cmake-branch "${_rapids_branch}") endif() + +set(rapids-cmake-repo kingcrimsontianyu/rapids-cmake) +set(rapids-cmake-branch "bump-nvtx-to-3.4.0") include("${CMAKE_CURRENT_LIST_DIR}/RAPIDS.cmake") # Don't use sccache-dist for CMake's compiler tests diff --git a/cpp/include/kvikio/detail/nvtx.hpp b/cpp/include/kvikio/detail/nvtx.hpp index a92b5879da..f231244589 100644 --- a/cpp/include/kvikio/detail/nvtx.hpp +++ b/cpp/include/kvikio/detail/nvtx.hpp @@ -4,6 +4,7 @@ */ #pragma once +#include #include #include @@ -20,8 +21,9 @@ struct libkvikio_domain { static constexpr char const* name{"libkvikio"}; }; -using nvtx_scoped_range_type = nvtx3::scoped_range_in; -using nvtx_registered_string_type = nvtx3::registered_string_in; +using NvtxScopedRange = nvtx3::scoped_range_in; +using NvtxRegisteredString = nvtx3::registered_string_in; +using NvtxColor = nvtx3::color; // Macro to concatenate two tokens x and y. #define KVIKIO_CONCAT_HELPER(x, y) x##y @@ -29,10 +31,10 @@ using nvtx_registered_string_type = nvtx3::registered_string_in auto& { \ - static kvikio::nvtx_registered_string_type a_reg_str{a_message}; \ - return a_reg_str; \ +#define KVIKIO_REGISTER_STRING(message) \ + [](const char* a_message) -> auto& { \ + static kvikio::NvtxRegisteredString a_reg_str{a_message}; \ + return a_reg_str; \ }(message) // Implementation of KVIKIO_NVTX_FUNC_RANGE() @@ -55,16 +57,16 @@ using nvtx_registered_string_type = nvtx3::registered_string_in(nvtx3::event_attributes{ \ KVIKIO_REGISTER_STRING(message), nvtx3::payload{kvikio::convert_to_64bit(payload_v)}}) -using nvtx_color_type = nvtx3::color; +struct NvtxContext { + NvtxContext(); + NvtxContext(char const* file_name, + std::size_t file_offse, + std::size_t size, + std::uint64_t call_idx, + NvtxColor color); + + char const* file_name{}; + std::size_t file_offset{}; + std::size_t size{}; + std::uint64_t call_idx{}; + NvtxColor color; +}; /** * @brief Utility singleton class for NVTX annotation. @@ -100,7 +115,7 @@ class NvtxManager { * * @return Default color. */ - static const nvtx_color_type& default_color() noexcept; + static const NvtxColor& default_color() noexcept; /** * @brief Return the color at the given index from the internal color palette whose size n is a @@ -110,15 +125,11 @@ class NvtxManager { * @param idx The index value. * @return The color picked from the internal color palette. */ - static const nvtx_color_type& get_color_by_index(std::uint64_t idx) noexcept; + static const NvtxColor& get_color_by_index(std::uint64_t idx) noexcept; - /** - * @brief Rename the current thread under the KvikIO NVTX domain. - * - * @note This NVTX feature is currently not supported by the Nsight System profiler. As a result, - * the OS thread will not be renamed in the nsys-ui. - */ - static void rename_current_thread(std::string_view new_name) noexcept; + static NvtxContext get_next_call_context(char const* file_name, + std::size_t file_offset, + std::size_t size); NvtxManager(NvtxManager const&) = delete; NvtxManager& operator=(NvtxManager const&) = delete; diff --git a/cpp/include/kvikio/detail/parallel_operation.hpp b/cpp/include/kvikio/detail/parallel_operation.hpp index 1d3c43d287..f43620754a 100644 --- a/cpp/include/kvikio/detail/parallel_operation.hpp +++ b/cpp/include/kvikio/detail/parallel_operation.hpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #pragma once @@ -57,13 +57,13 @@ auto make_copyable_lambda(F op) * * @return A pair of NVTX color and call index. */ -inline const std::pair get_next_color_and_call_idx() noexcept -{ - static std::atomic_uint64_t call_counter{1ull}; - auto call_idx = call_counter.fetch_add(1ull, std::memory_order_relaxed); - auto& nvtx_color = NvtxManager::get_color_by_index(call_idx); - return {nvtx_color, call_idx}; -} +// inline const std::pair get_next_color_and_call_idx() noexcept +// { +// static std::atomic_uint64_t call_counter{1ull}; +// auto call_idx = call_counter.fetch_add(1ull, std::memory_order_relaxed); +// auto& nvtx_color = NvtxManager::get_color_by_index(call_idx); +// return {nvtx_color, call_idx}; +// } /** * @brief Submit the task callable to the underlying thread pool. @@ -76,9 +76,8 @@ std::future submit_task(F op, std::size_t size, std::size_t file_offset, std::size_t devPtr_offset, - ThreadPool* thread_pool = &defaults::thread_pool(), - std::uint64_t nvtx_payload = 0ull, - nvtx_color_type nvtx_color = NvtxManager::default_color()) + ThreadPool* thread_pool = &defaults::thread_pool(), + NvtxContext nvtx_context = {}) { static_assert(std::is_invocable_r_v submit_task(F op, decltype(devPtr_offset)>); return thread_pool->submit_task([=] { - KVIKIO_NVTX_SCOPED_RANGE("task", nvtx_payload, nvtx_color); + KVIKIO_NVTX_SCOPED_RANGE("task", nvtx_context.size, nvtx_context.color); return op(buf, size, file_offset, devPtr_offset); }); } @@ -101,16 +100,14 @@ std::future submit_task(F op, * @return A future to be used later to check if the operation has finished its execution. */ template -std::future submit_move_only_task( - F op_move_only, - ThreadPool* thread_pool = &defaults::thread_pool(), - std::uint64_t nvtx_payload = 0ull, - nvtx_color_type nvtx_color = NvtxManager::default_color()) +std::future submit_move_only_task(F op_move_only, + ThreadPool* thread_pool = &defaults::thread_pool(), + NvtxContext nvtx_context = {}) { static_assert(std::is_invocable_r_v); auto op_copyable = make_copyable_lambda(std::move(op_move_only)); return thread_pool->submit_task([=] { - KVIKIO_NVTX_SCOPED_RANGE("task", nvtx_payload, nvtx_color); + KVIKIO_NVTX_SCOPED_RANGE("task", nvtx_context.size, nvtx_context.color); return op_copyable(); }); } @@ -140,9 +137,8 @@ std::future parallel_io(F op, std::size_t file_offset, std::size_t task_size, std::size_t devPtr_offset, - ThreadPool* thread_pool = &defaults::thread_pool(), - std::uint64_t call_idx = 0, - nvtx_color_type nvtx_color = NvtxManager::default_color()) + ThreadPool* thread_pool = &defaults::thread_pool(), + NvtxContext nvtx_context = {}) { KVIKIO_EXPECT(task_size > 0, "`task_size` must be positive", std::invalid_argument); KVIKIO_EXPECT(thread_pool != nullptr, "The thread pool must not be nullptr"); @@ -156,7 +152,7 @@ std::future parallel_io(F op, // Single-task guard if (task_size >= size || get_page_size() >= size) { return detail::submit_task( - op, buf, size, file_offset, devPtr_offset, thread_pool, call_idx, nvtx_color); + op, buf, size, file_offset, devPtr_offset, thread_pool, nvtx_context); } std::vector> tasks; @@ -165,7 +161,7 @@ std::future parallel_io(F op, // 1) Submit all tasks but the last one. These are all `task_size` sized tasks. while (size > task_size) { tasks.push_back(detail::submit_task( - op, buf, task_size, file_offset, devPtr_offset, thread_pool, call_idx, nvtx_color)); + op, buf, task_size, file_offset, devPtr_offset, thread_pool, nvtx_context)); file_offset += task_size; devPtr_offset += task_size; size -= task_size; @@ -180,7 +176,7 @@ std::future parallel_io(F op, } return ret; }; - return detail::submit_move_only_task(std::move(last_task), thread_pool, call_idx, nvtx_color); + return detail::submit_move_only_task(std::move(last_task), thread_pool, nvtx_context); } } // namespace kvikio diff --git a/cpp/include/kvikio/file_handle.hpp b/cpp/include/kvikio/file_handle.hpp index 0bf4328b9f..da39df6438 100644 --- a/cpp/include/kvikio/file_handle.hpp +++ b/cpp/include/kvikio/file_handle.hpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #pragma once @@ -33,6 +33,7 @@ namespace kvikio { class FileHandle { private: // We use two file descriptors, one opened with the O_DIRECT flag and one without. + std::string _file_path; FileWrapper _file_direct_on{}; FileWrapper _file_direct_off{}; bool _initialized{false}; diff --git a/cpp/src/detail/nvtx.cpp b/cpp/src/detail/nvtx.cpp index 45809ebab8..b3e8057231 100644 --- a/cpp/src/detail/nvtx.cpp +++ b/cpp/src/detail/nvtx.cpp @@ -1,11 +1,10 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #include #include -#include #include @@ -13,58 +12,65 @@ namespace kvikio { +NvtxContext::NvtxContext() : color(NvtxManager::default_color()) {} + +NvtxContext::NvtxContext(char const* a_file_name, + std::size_t a_file_offset, + std::size_t a_size, + std::uint64_t a_call_idx, + NvtxColor a_color) + : file_name(a_file_name), + file_offset(a_file_offset), + size(a_size), + call_idx(a_call_idx), + color(a_color) +{ +} + NvtxManager& NvtxManager::instance() noexcept { static NvtxManager _instance; return _instance; } -const nvtx_color_type& NvtxManager::default_color() noexcept +const NvtxColor& NvtxManager::default_color() noexcept { - static nvtx_color_type default_color{nvtx3::argb{0, 255, 255, 255}}; + static NvtxColor default_color{nvtx3::argb{0, 255, 255, 255}}; return default_color; } -const nvtx_color_type& NvtxManager::get_color_by_index(std::uint64_t idx) noexcept +const NvtxColor& NvtxManager::get_color_by_index(std::uint64_t idx) noexcept { constexpr std::size_t num_color{16}; static_assert((num_color & (num_color - 1)) == 0); // Is power of 2 - static std::array color_palette = {nvtx3::rgb{106, 192, 67}, - nvtx3::rgb{191, 73, 203}, - nvtx3::rgb{93, 151, 76}, - nvtx3::rgb{96, 72, 194}, - nvtx3::rgb{179, 170, 71}, - nvtx3::rgb{92, 58, 113}, - nvtx3::rgb{212, 136, 57}, - nvtx3::rgb{96, 144, 194}, - nvtx3::rgb{211, 69, 56}, - nvtx3::rgb{97, 179, 155}, - nvtx3::rgb{203, 69, 131}, - nvtx3::rgb{57, 89, 48}, - nvtx3::rgb{184, 133, 199}, - nvtx3::rgb{128, 102, 51}, - nvtx3::rgb{211, 138, 130}, - nvtx3::rgb{122, 50, 49}}; - auto safe_idx = idx & (num_color - 1); // idx % num_color + static std::array color_palette = {nvtx3::rgb{106, 192, 67}, + nvtx3::rgb{191, 73, 203}, + nvtx3::rgb{93, 151, 76}, + nvtx3::rgb{96, 72, 194}, + nvtx3::rgb{179, 170, 71}, + nvtx3::rgb{92, 58, 113}, + nvtx3::rgb{212, 136, 57}, + nvtx3::rgb{96, 144, 194}, + nvtx3::rgb{211, 69, 56}, + nvtx3::rgb{97, 179, 155}, + nvtx3::rgb{203, 69, 131}, + nvtx3::rgb{57, 89, 48}, + nvtx3::rgb{184, 133, 199}, + nvtx3::rgb{128, 102, 51}, + nvtx3::rgb{211, 138, 130}, + nvtx3::rgb{122, 50, 49}}; + auto safe_idx = idx & (num_color - 1); // idx % num_color return color_palette[safe_idx]; } -void NvtxManager::rename_current_thread(std::string_view new_name) noexcept +NvtxContext NvtxManager::get_next_call_context(char const* file_name, + std::size_t file_offset, + std::size_t size) { - auto tid = syscall(SYS_gettid); - std::stringstream ss; - ss << new_name << " (" << tid << ")"; - - nvtxResourceAttributes_t attribs = {0}; - attribs.version = NVTX_VERSION; - attribs.size = NVTX_RESOURCE_ATTRIB_STRUCT_SIZE; - attribs.identifierType = NVTX_RESOURCE_TYPE_GENERIC_THREAD_NATIVE; - attribs.identifier.ullValue = tid; - attribs.messageType = NVTX_MESSAGE_TYPE_ASCII; - auto st = ss.str(); - attribs.message.ascii = st.c_str(); - nvtxResourceHandle_t handle = - nvtxDomainResourceCreate(nvtx3::domain::get(), &attribs); + static std::atomic_uint64_t call_counter{1ull}; + auto call_idx = call_counter.fetch_add(1ull, std::memory_order_relaxed); + auto& nvtx_color = NvtxManager::get_color_by_index(call_idx); + return {file_name, file_offset, size, call_idx, nvtx_color}; } } // namespace kvikio diff --git a/cpp/src/file_handle.cpp b/cpp/src/file_handle.cpp index b978b49dc3..c763da6c03 100644 --- a/cpp/src/file_handle.cpp +++ b/cpp/src/file_handle.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -93,14 +93,17 @@ FileHandle::FileHandle(std::string const& file_path, std::string const& flags, mode_t mode, CompatMode compat_mode) - : _initialized{true}, _compat_mode_manager{file_path, flags, mode, compat_mode, this} + : _file_path{file_path}, + _initialized{true}, + _compat_mode_manager{file_path, flags, mode, compat_mode, this} { KVIKIO_NVTX_FUNC_RANGE(); _thread_pool = get_thread_pool_per_block_device(file_path); } FileHandle::FileHandle(FileHandle&& o) noexcept - : _file_direct_on{std::exchange(o._file_direct_on, {})}, + : _file_path{std::exchange(o._file_path, {})}, + _file_direct_on{std::exchange(o._file_direct_on, {})}, _file_direct_off{std::exchange(o._file_direct_off, {})}, _initialized{std::exchange(o._initialized, false)}, _nbytes{std::exchange(o._nbytes, 0)}, @@ -112,6 +115,7 @@ FileHandle::FileHandle(FileHandle&& o) noexcept FileHandle& FileHandle::operator=(FileHandle&& o) noexcept { + _file_path = std::exchange(o._file_path, {}); _file_direct_on = std::exchange(o._file_direct_on, {}); _file_direct_off = std::exchange(o._file_direct_off, {}); _initialized = std::exchange(o._initialized, false); @@ -135,6 +139,7 @@ void FileHandle::close() noexcept KVIKIO_NVTX_FUNC_RANGE(); try { if (closed()) { return; } + _file_path.clear(); _cufile_handle.unregister_handle(); _file_direct_off.close(); _file_direct_on.close(); @@ -230,8 +235,8 @@ std::future FileHandle::pread(void* buf, (_thread_pool != nullptr && thread_pool == &defaults::thread_pool()) ? _thread_pool : thread_pool; - auto& [nvtx_color, call_idx] = detail::get_next_color_and_call_idx(); - KVIKIO_NVTX_FUNC_RANGE(size, nvtx_color); + auto nvtx_ctx = NvtxManager::get_next_call_context(_file_path.c_str(), file_offset, size); + KVIKIO_NVTX_FUNC_RANGE(nvtx_ctx.size, nvtx_ctx.color); if (is_host_memory(buf)) { auto op = [this](void* hostPtr_base, std::size_t size, @@ -242,8 +247,7 @@ std::future FileHandle::pread(void* buf, _file_direct_off.fd(), buf, size, file_offset, _file_direct_on.fd()); }; - return parallel_io( - op, buf, size, file_offset, task_size, 0, actual_thread_pool, call_idx, nvtx_color); + return parallel_io(op, buf, size, file_offset, task_size, 0, actual_thread_pool, nvtx_ctx); } CUcontext ctx = get_context_from_pointer(buf); @@ -273,15 +277,8 @@ std::future FileHandle::pread(void* buf, return read(devPtr_base, size, file_offset, devPtr_offset, /* sync_default_stream = */ false); }; auto [devPtr_base, base_size, devPtr_offset] = get_alloc_info(buf, &ctx); - return parallel_io(task, - devPtr_base, - size, - file_offset, - task_size, - devPtr_offset, - actual_thread_pool, - call_idx, - nvtx_color); + return parallel_io( + task, devPtr_base, size, file_offset, task_size, devPtr_offset, actual_thread_pool, nvtx_ctx); } std::future FileHandle::pwrite(void const* buf, @@ -300,8 +297,8 @@ std::future FileHandle::pwrite(void const* buf, (_thread_pool != nullptr && thread_pool == &defaults::thread_pool()) ? _thread_pool : thread_pool; - auto& [nvtx_color, call_idx] = detail::get_next_color_and_call_idx(); - KVIKIO_NVTX_FUNC_RANGE(size, nvtx_color); + auto nvtx_ctx = NvtxManager::get_next_call_context(_file_path.c_str(), file_offset, size); + KVIKIO_NVTX_FUNC_RANGE(nvtx_ctx.size, nvtx_ctx.color); if (is_host_memory(buf)) { auto op = [this](void const* hostPtr_base, std::size_t size, @@ -312,8 +309,7 @@ std::future FileHandle::pwrite(void const* buf, _file_direct_off.fd(), buf, size, file_offset, _file_direct_on.fd()); }; - return parallel_io( - op, buf, size, file_offset, task_size, 0, actual_thread_pool, call_idx, nvtx_color); + return parallel_io(op, buf, size, file_offset, task_size, 0, actual_thread_pool, nvtx_ctx); } CUcontext ctx = get_context_from_pointer(buf); @@ -343,15 +339,8 @@ std::future FileHandle::pwrite(void const* buf, return write(devPtr_base, size, file_offset, devPtr_offset, /* sync_default_stream = */ false); }; auto [devPtr_base, base_size, devPtr_offset] = get_alloc_info(buf, &ctx); - return parallel_io(op, - devPtr_base, - size, - file_offset, - task_size, - devPtr_offset, - actual_thread_pool, - call_idx, - nvtx_color); + return parallel_io( + op, devPtr_base, size, file_offset, task_size, devPtr_offset, actual_thread_pool, nvtx_ctx); } void FileHandle::read_async(void* devPtr_base, diff --git a/cpp/src/mmap.cpp b/cpp/src/mmap.cpp index 9689f08aa8..aaf2db6971 100644 --- a/cpp/src/mmap.cpp +++ b/cpp/src/mmap.cpp @@ -424,8 +424,8 @@ std::future MmapHandle::pread(void* buf, auto actual_size = validate_and_adjust_read_args(size, offset); if (actual_size == 0) { return make_ready_future(actual_size); } - auto& [nvtx_color, call_idx] = detail::get_next_color_and_call_idx(); - KVIKIO_NVTX_FUNC_RANGE(actual_size, nvtx_color); + auto nvtx_ctx = NvtxManager::get_next_call_context(nullptr, offset, actual_size); + KVIKIO_NVTX_FUNC_RANGE(actual_size, nvtx_ctx.color); auto const is_dst_buf_host_mem = is_host_memory(buf); CUcontext ctx{}; @@ -451,8 +451,7 @@ std::future MmapHandle::pread(void* buf, task_size, 0, // dst buffer offset initial value thread_pool, - call_idx, - nvtx_color); + nvtx_ctx); } std::size_t MmapHandle::validate_and_adjust_read_args(std::optional const& size, diff --git a/cpp/src/remote_handle.cpp b/cpp/src/remote_handle.cpp index 210ee9a31c..36fd97130d 100644 --- a/cpp/src/remote_handle.cpp +++ b/cpp/src/remote_handle.cpp @@ -813,7 +813,7 @@ std::future RemoteHandle::pread(void* buf, ThreadPool* thread_pool) { KVIKIO_EXPECT(thread_pool != nullptr, "The thread pool must not be nullptr"); - auto& [nvtx_color, call_idx] = detail::get_next_color_and_call_idx(); + auto nvtx_ctx = NvtxManager::get_next_call_context(nullptr, file_offset, size); KVIKIO_NVTX_FUNC_RANGE(size); auto task = [this](void* devPtr_base, std::size_t size, @@ -821,7 +821,7 @@ std::future RemoteHandle::pread(void* buf, std::size_t devPtr_offset) -> std::size_t { return read(static_cast(devPtr_base) + devPtr_offset, size, file_offset); }; - return parallel_io(task, buf, size, file_offset, task_size, 0, thread_pool, call_idx, nvtx_color); + return parallel_io(task, buf, size, file_offset, task_size, 0, thread_pool, nvtx_ctx); } } // namespace kvikio From a4de60adf871d05546477836fe1fea56e2d970cc Mon Sep 17 00:00:00 2001 From: Tianyu Liu Date: Tue, 24 Feb 2026 23:10:38 -0500 Subject: [PATCH 4/7] Update --- cpp/include/kvikio/detail/nvtx.hpp | 19 +++------ .../kvikio/detail/parallel_operation.hpp | 25 +++++------ cpp/include/kvikio/file_handle.hpp | 3 +- cpp/src/detail/nvtx.cpp | 20 +++------ cpp/src/file_handle.cpp | 41 +++++++++++-------- cpp/src/mmap.cpp | 6 +-- cpp/src/remote_handle.cpp | 4 +- 7 files changed, 51 insertions(+), 67 deletions(-) diff --git a/cpp/include/kvikio/detail/nvtx.hpp b/cpp/include/kvikio/detail/nvtx.hpp index f231244589..9db50bfe3b 100644 --- a/cpp/include/kvikio/detail/nvtx.hpp +++ b/cpp/include/kvikio/detail/nvtx.hpp @@ -88,17 +88,10 @@ using NvtxColor = nvtx3::color; nvtx3::mark_in(nvtx3::event_attributes{ \ KVIKIO_REGISTER_STRING(message), nvtx3::payload{kvikio::convert_to_64bit(payload_v)}}) -struct NvtxContext { - NvtxContext(); - NvtxContext(char const* file_name, - std::size_t file_offse, - std::size_t size, - std::uint64_t call_idx, - NvtxColor color); - - char const* file_name{}; - std::size_t file_offset{}; - std::size_t size{}; +struct NvtxCallTag { + NvtxCallTag(); + NvtxCallTag(std::uint64_t call_idx, NvtxColor color); + std::uint64_t call_idx{}; NvtxColor color; }; @@ -127,9 +120,7 @@ class NvtxManager { */ static const NvtxColor& get_color_by_index(std::uint64_t idx) noexcept; - static NvtxContext get_next_call_context(char const* file_name, - std::size_t file_offset, - std::size_t size); + static NvtxCallTag next_call_tag(); NvtxManager(NvtxManager const&) = delete; NvtxManager& operator=(NvtxManager const&) = delete; diff --git a/cpp/include/kvikio/detail/parallel_operation.hpp b/cpp/include/kvikio/detail/parallel_operation.hpp index f43620754a..54f65914bc 100644 --- a/cpp/include/kvikio/detail/parallel_operation.hpp +++ b/cpp/include/kvikio/detail/parallel_operation.hpp @@ -4,12 +4,9 @@ */ #pragma once -#include #include #include #include -#include -#include #include #include #include @@ -76,8 +73,8 @@ std::future submit_task(F op, std::size_t size, std::size_t file_offset, std::size_t devPtr_offset, - ThreadPool* thread_pool = &defaults::thread_pool(), - NvtxContext nvtx_context = {}) + ThreadPool* thread_pool = &defaults::thread_pool(), + NvtxCallTag nvtx_call_tag = {}) { static_assert(std::is_invocable_r_v submit_task(F op, decltype(devPtr_offset)>); return thread_pool->submit_task([=] { - KVIKIO_NVTX_SCOPED_RANGE("task", nvtx_context.size, nvtx_context.color); + KVIKIO_NVTX_SCOPED_RANGE("task", nvtx_call_tag.call_idx, nvtx_call_tag.color); return op(buf, size, file_offset, devPtr_offset); }); } @@ -101,13 +98,13 @@ std::future submit_task(F op, */ template std::future submit_move_only_task(F op_move_only, - ThreadPool* thread_pool = &defaults::thread_pool(), - NvtxContext nvtx_context = {}) + ThreadPool* thread_pool = &defaults::thread_pool(), + NvtxCallTag nvtx_call_tag = {}) { static_assert(std::is_invocable_r_v); auto op_copyable = make_copyable_lambda(std::move(op_move_only)); return thread_pool->submit_task([=] { - KVIKIO_NVTX_SCOPED_RANGE("task", nvtx_context.size, nvtx_context.color); + KVIKIO_NVTX_SCOPED_RANGE("task", nvtx_call_tag.call_idx, nvtx_call_tag.color); return op_copyable(); }); } @@ -137,8 +134,8 @@ std::future parallel_io(F op, std::size_t file_offset, std::size_t task_size, std::size_t devPtr_offset, - ThreadPool* thread_pool = &defaults::thread_pool(), - NvtxContext nvtx_context = {}) + ThreadPool* thread_pool = &defaults::thread_pool(), + NvtxCallTag nvtx_call_tag = {}) { KVIKIO_EXPECT(task_size > 0, "`task_size` must be positive", std::invalid_argument); KVIKIO_EXPECT(thread_pool != nullptr, "The thread pool must not be nullptr"); @@ -152,7 +149,7 @@ std::future parallel_io(F op, // Single-task guard if (task_size >= size || get_page_size() >= size) { return detail::submit_task( - op, buf, size, file_offset, devPtr_offset, thread_pool, nvtx_context); + op, buf, size, file_offset, devPtr_offset, thread_pool, nvtx_call_tag); } std::vector> tasks; @@ -161,7 +158,7 @@ std::future parallel_io(F op, // 1) Submit all tasks but the last one. These are all `task_size` sized tasks. while (size > task_size) { tasks.push_back(detail::submit_task( - op, buf, task_size, file_offset, devPtr_offset, thread_pool, nvtx_context)); + op, buf, task_size, file_offset, devPtr_offset, thread_pool, nvtx_call_tag)); file_offset += task_size; devPtr_offset += task_size; size -= task_size; @@ -176,7 +173,7 @@ std::future parallel_io(F op, } return ret; }; - return detail::submit_move_only_task(std::move(last_task), thread_pool, nvtx_context); + return detail::submit_move_only_task(std::move(last_task), thread_pool, nvtx_call_tag); } } // namespace kvikio diff --git a/cpp/include/kvikio/file_handle.hpp b/cpp/include/kvikio/file_handle.hpp index da39df6438..0bf4328b9f 100644 --- a/cpp/include/kvikio/file_handle.hpp +++ b/cpp/include/kvikio/file_handle.hpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #pragma once @@ -33,7 +33,6 @@ namespace kvikio { class FileHandle { private: // We use two file descriptors, one opened with the O_DIRECT flag and one without. - std::string _file_path; FileWrapper _file_direct_on{}; FileWrapper _file_direct_off{}; bool _initialized{false}; diff --git a/cpp/src/detail/nvtx.cpp b/cpp/src/detail/nvtx.cpp index b3e8057231..343fa451b8 100644 --- a/cpp/src/detail/nvtx.cpp +++ b/cpp/src/detail/nvtx.cpp @@ -12,18 +12,10 @@ namespace kvikio { -NvtxContext::NvtxContext() : color(NvtxManager::default_color()) {} +NvtxCallTag::NvtxCallTag() : color(NvtxManager::default_color()) {} -NvtxContext::NvtxContext(char const* a_file_name, - std::size_t a_file_offset, - std::size_t a_size, - std::uint64_t a_call_idx, - NvtxColor a_color) - : file_name(a_file_name), - file_offset(a_file_offset), - size(a_size), - call_idx(a_call_idx), - color(a_color) +NvtxCallTag::NvtxCallTag(std::uint64_t a_call_idx, NvtxColor a_color) + : call_idx(a_call_idx), color(a_color) { } @@ -63,14 +55,12 @@ const NvtxColor& NvtxManager::get_color_by_index(std::uint64_t idx) noexcept return color_palette[safe_idx]; } -NvtxContext NvtxManager::get_next_call_context(char const* file_name, - std::size_t file_offset, - std::size_t size) +NvtxCallTag NvtxManager::next_call_tag() { static std::atomic_uint64_t call_counter{1ull}; auto call_idx = call_counter.fetch_add(1ull, std::memory_order_relaxed); auto& nvtx_color = NvtxManager::get_color_by_index(call_idx); - return {file_name, file_offset, size, call_idx, nvtx_color}; + return {call_idx, nvtx_color}; } } // namespace kvikio diff --git a/cpp/src/file_handle.cpp b/cpp/src/file_handle.cpp index c763da6c03..47e147c0a2 100644 --- a/cpp/src/file_handle.cpp +++ b/cpp/src/file_handle.cpp @@ -93,17 +93,14 @@ FileHandle::FileHandle(std::string const& file_path, std::string const& flags, mode_t mode, CompatMode compat_mode) - : _file_path{file_path}, - _initialized{true}, - _compat_mode_manager{file_path, flags, mode, compat_mode, this} + : _initialized{true}, _compat_mode_manager{file_path, flags, mode, compat_mode, this} { KVIKIO_NVTX_FUNC_RANGE(); _thread_pool = get_thread_pool_per_block_device(file_path); } FileHandle::FileHandle(FileHandle&& o) noexcept - : _file_path{std::exchange(o._file_path, {})}, - _file_direct_on{std::exchange(o._file_direct_on, {})}, + : _file_direct_on{std::exchange(o._file_direct_on, {})}, _file_direct_off{std::exchange(o._file_direct_off, {})}, _initialized{std::exchange(o._initialized, false)}, _nbytes{std::exchange(o._nbytes, 0)}, @@ -115,7 +112,6 @@ FileHandle::FileHandle(FileHandle&& o) noexcept FileHandle& FileHandle::operator=(FileHandle&& o) noexcept { - _file_path = std::exchange(o._file_path, {}); _file_direct_on = std::exchange(o._file_direct_on, {}); _file_direct_off = std::exchange(o._file_direct_off, {}); _initialized = std::exchange(o._initialized, false); @@ -139,7 +135,6 @@ void FileHandle::close() noexcept KVIKIO_NVTX_FUNC_RANGE(); try { if (closed()) { return; } - _file_path.clear(); _cufile_handle.unregister_handle(); _file_direct_off.close(); _file_direct_on.close(); @@ -235,8 +230,8 @@ std::future FileHandle::pread(void* buf, (_thread_pool != nullptr && thread_pool == &defaults::thread_pool()) ? _thread_pool : thread_pool; - auto nvtx_ctx = NvtxManager::get_next_call_context(_file_path.c_str(), file_offset, size); - KVIKIO_NVTX_FUNC_RANGE(nvtx_ctx.size, nvtx_ctx.color); + auto nvtx_call_tag = NvtxManager::next_call_tag(); + KVIKIO_NVTX_FUNC_RANGE(size, nvtx_call_tag.color); if (is_host_memory(buf)) { auto op = [this](void* hostPtr_base, std::size_t size, @@ -247,7 +242,7 @@ std::future FileHandle::pread(void* buf, _file_direct_off.fd(), buf, size, file_offset, _file_direct_on.fd()); }; - return parallel_io(op, buf, size, file_offset, task_size, 0, actual_thread_pool, nvtx_ctx); + return parallel_io(op, buf, size, file_offset, task_size, 0, actual_thread_pool, nvtx_call_tag); } CUcontext ctx = get_context_from_pointer(buf); @@ -277,8 +272,14 @@ std::future FileHandle::pread(void* buf, return read(devPtr_base, size, file_offset, devPtr_offset, /* sync_default_stream = */ false); }; auto [devPtr_base, base_size, devPtr_offset] = get_alloc_info(buf, &ctx); - return parallel_io( - task, devPtr_base, size, file_offset, task_size, devPtr_offset, actual_thread_pool, nvtx_ctx); + return parallel_io(task, + devPtr_base, + size, + file_offset, + task_size, + devPtr_offset, + actual_thread_pool, + nvtx_call_tag); } std::future FileHandle::pwrite(void const* buf, @@ -297,8 +298,8 @@ std::future FileHandle::pwrite(void const* buf, (_thread_pool != nullptr && thread_pool == &defaults::thread_pool()) ? _thread_pool : thread_pool; - auto nvtx_ctx = NvtxManager::get_next_call_context(_file_path.c_str(), file_offset, size); - KVIKIO_NVTX_FUNC_RANGE(nvtx_ctx.size, nvtx_ctx.color); + auto nvtx_call_tag = NvtxManager::next_call_tag(); + KVIKIO_NVTX_FUNC_RANGE(size, nvtx_call_tag.color); if (is_host_memory(buf)) { auto op = [this](void const* hostPtr_base, std::size_t size, @@ -309,7 +310,7 @@ std::future FileHandle::pwrite(void const* buf, _file_direct_off.fd(), buf, size, file_offset, _file_direct_on.fd()); }; - return parallel_io(op, buf, size, file_offset, task_size, 0, actual_thread_pool, nvtx_ctx); + return parallel_io(op, buf, size, file_offset, task_size, 0, actual_thread_pool, nvtx_call_tag); } CUcontext ctx = get_context_from_pointer(buf); @@ -339,8 +340,14 @@ std::future FileHandle::pwrite(void const* buf, return write(devPtr_base, size, file_offset, devPtr_offset, /* sync_default_stream = */ false); }; auto [devPtr_base, base_size, devPtr_offset] = get_alloc_info(buf, &ctx); - return parallel_io( - op, devPtr_base, size, file_offset, task_size, devPtr_offset, actual_thread_pool, nvtx_ctx); + return parallel_io(op, + devPtr_base, + size, + file_offset, + task_size, + devPtr_offset, + actual_thread_pool, + nvtx_call_tag); } void FileHandle::read_async(void* devPtr_base, diff --git a/cpp/src/mmap.cpp b/cpp/src/mmap.cpp index aaf2db6971..b17e696c66 100644 --- a/cpp/src/mmap.cpp +++ b/cpp/src/mmap.cpp @@ -424,8 +424,8 @@ std::future MmapHandle::pread(void* buf, auto actual_size = validate_and_adjust_read_args(size, offset); if (actual_size == 0) { return make_ready_future(actual_size); } - auto nvtx_ctx = NvtxManager::get_next_call_context(nullptr, offset, actual_size); - KVIKIO_NVTX_FUNC_RANGE(actual_size, nvtx_ctx.color); + auto nvtx_call_tag = NvtxManager::next_call_tag(); + KVIKIO_NVTX_FUNC_RANGE(actual_size, nvtx_call_tag.color); auto const is_dst_buf_host_mem = is_host_memory(buf); CUcontext ctx{}; @@ -451,7 +451,7 @@ std::future MmapHandle::pread(void* buf, task_size, 0, // dst buffer offset initial value thread_pool, - nvtx_ctx); + nvtx_call_tag); } std::size_t MmapHandle::validate_and_adjust_read_args(std::optional const& size, diff --git a/cpp/src/remote_handle.cpp b/cpp/src/remote_handle.cpp index 36fd97130d..661b8ddf59 100644 --- a/cpp/src/remote_handle.cpp +++ b/cpp/src/remote_handle.cpp @@ -813,7 +813,7 @@ std::future RemoteHandle::pread(void* buf, ThreadPool* thread_pool) { KVIKIO_EXPECT(thread_pool != nullptr, "The thread pool must not be nullptr"); - auto nvtx_ctx = NvtxManager::get_next_call_context(nullptr, file_offset, size); + auto nvtx_call_tag = NvtxManager::next_call_tag(); KVIKIO_NVTX_FUNC_RANGE(size); auto task = [this](void* devPtr_base, std::size_t size, @@ -821,7 +821,7 @@ std::future RemoteHandle::pread(void* buf, std::size_t devPtr_offset) -> std::size_t { return read(static_cast(devPtr_base) + devPtr_offset, size, file_offset); }; - return parallel_io(task, buf, size, file_offset, task_size, 0, thread_pool, nvtx_ctx); + return parallel_io(task, buf, size, file_offset, task_size, 0, thread_pool, nvtx_call_tag); } } // namespace kvikio From 0b0b96551fca595b7520a0ffadb3e071bddd660c Mon Sep 17 00:00:00 2001 From: Tianyu Liu Date: Wed, 25 Feb 2026 11:11:31 -0500 Subject: [PATCH 5/7] Fix NVTX include issue --- cpp/CMakeLists.txt | 5 +++++ cpp/include/kvikio/detail/nvtx.hpp | 26 ++++++++++++++++++++++---- cpp/src/detail/nvtx.cpp | 6 ++++++ 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 9dac81c12d..8f3696661a 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -187,6 +187,11 @@ target_include_directories( INTERFACE "$" ) +target_include_directories( + kvikio BEFORE + PUBLIC "$>" +) + # Notice, we do not link to cuda or cufile since KvikIO opens them manually using `dlopen()`. target_link_libraries( kvikio diff --git a/cpp/include/kvikio/detail/nvtx.hpp b/cpp/include/kvikio/detail/nvtx.hpp index 9db50bfe3b..9a3da9c098 100644 --- a/cpp/include/kvikio/detail/nvtx.hpp +++ b/cpp/include/kvikio/detail/nvtx.hpp @@ -103,6 +103,11 @@ class NvtxManager { public: static NvtxManager& instance() noexcept; + NvtxManager(NvtxManager const&) = delete; + NvtxManager& operator=(NvtxManager const&) = delete; + NvtxManager(NvtxManager&&) = delete; + NvtxManager& operator=(NvtxManager&&) = delete; + /** * @brief Return the default color. * @@ -122,10 +127,7 @@ class NvtxManager { static NvtxCallTag next_call_tag(); - NvtxManager(NvtxManager const&) = delete; - NvtxManager& operator=(NvtxManager const&) = delete; - NvtxManager(NvtxManager&&) = delete; - NvtxManager& operator=(NvtxManager&&) = delete; + static NvtxRegisteredString const& get_empty_registered_string(); private: NvtxManager() = default; @@ -204,4 +206,20 @@ class NvtxManager { */ #define KVIKIO_NVTX_MARKER(message, payload) KVIKIO_NVTX_MARKER_IMPL(message, payload) +struct NvtxIoPayload { + NvtxRegisteredString file_path; + std::size_t file_offset; + std::size_t size; +}; + } // namespace kvikio + +NVTX3_DEFINE_SCHEMA_GET(kvikio::libkvikio_domain, + kvikio::NvtxIoPayload, + "KvikIONvtxIOPayload", + NVTX_PAYLOAD_ENTRIES((file_path, + TYPE_NVTX_REGISTERED_STRING_HANDLE, + "file_path", + "Path to the file"), + (file_offset, TYPE_SIZE, "file_offset", "File offset"), + (size, TYPE_SIZE, "size", "Transferred bytes"))) diff --git a/cpp/src/detail/nvtx.cpp b/cpp/src/detail/nvtx.cpp index 343fa451b8..031ef09f0e 100644 --- a/cpp/src/detail/nvtx.cpp +++ b/cpp/src/detail/nvtx.cpp @@ -63,4 +63,10 @@ NvtxCallTag NvtxManager::next_call_tag() return {call_idx, nvtx_color}; } +NvtxRegisteredString const& NvtxManager::get_empty_registered_string() +{ + static NvtxRegisteredString s(""); + return s; +} + } // namespace kvikio From c5e60a292d7259d7ae41cd2b8ca731000759da01 Mon Sep 17 00:00:00 2001 From: Tianyu Liu Date: Wed, 25 Feb 2026 14:18:54 -0500 Subject: [PATCH 6/7] Update --- cpp/CMakeLists.txt | 10 ++--- cpp/include/kvikio/detail/nvtx.hpp | 42 ++++++++++--------- .../kvikio/detail/parallel_operation.hpp | 35 ++++++++++------ cpp/src/detail/nvtx.cpp | 4 +- cpp/src/file_handle.cpp | 12 ++++-- cpp/src/mmap.cpp | 2 +- cpp/src/remote_handle.cpp | 2 +- 7 files changed, 63 insertions(+), 44 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 8f3696661a..0c94c79c27 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -182,14 +182,14 @@ add_library(kvikio::kvikio ALIAS kvikio) target_include_directories( kvikio - PUBLIC "$" - "$" - INTERFACE "$" + PUBLIC "$>" ) target_include_directories( - kvikio BEFORE - PUBLIC "$>" + kvikio + PUBLIC "$" + "$" + INTERFACE "$" ) # Notice, we do not link to cuda or cufile since KvikIO opens them manually using `dlopen()`. diff --git a/cpp/include/kvikio/detail/nvtx.hpp b/cpp/include/kvikio/detail/nvtx.hpp index 9a3da9c098..3a7437db07 100644 --- a/cpp/include/kvikio/detail/nvtx.hpp +++ b/cpp/include/kvikio/detail/nvtx.hpp @@ -12,7 +12,7 @@ #include #include -namespace kvikio { +namespace kvikio::detail { /** * @brief Tag type for libkvikio's NVTX domain. @@ -31,10 +31,10 @@ using NvtxColor = nvtx3::color; // Macro to create a static, registered string that will not have a name conflict with any // registered string defined in the same scope. -#define KVIKIO_REGISTER_STRING(message) \ - [](const char* a_message) -> auto& { \ - static kvikio::NvtxRegisteredString a_reg_str{a_message}; \ - return a_reg_str; \ +#define KVIKIO_REGISTER_STRING(message) \ + [](const char* a_message) -> auto& { \ + static kvikio::detail::NvtxRegisteredString a_reg_str{a_message}; \ + return a_reg_str; \ }(message) // Implementation of KVIKIO_NVTX_FUNC_RANGE() @@ -57,16 +57,16 @@ using NvtxColor = nvtx3::color; (__VA_ARGS__) // Implementation of KVIKIO_NVTX_SCOPED_RANGE(...) -#define KVIKIO_NVTX_SCOPED_RANGE_IMPL_1(message) \ - kvikio::NvtxScopedRange KVIKIO_CONCAT(_kvikio_nvtx_range, __LINE__) \ - { \ - nvtx3::event_attributes \ - { \ - KVIKIO_REGISTER_STRING(message), kvikio::NvtxManager::default_color() \ - } \ +#define KVIKIO_NVTX_SCOPED_RANGE_IMPL_1(message) \ + kvikio::detail::NvtxScopedRange KVIKIO_CONCAT(_kvikio_nvtx_range, __LINE__) \ + { \ + nvtx3::event_attributes \ + { \ + KVIKIO_REGISTER_STRING(message), kvikio::detail::NvtxManager::default_color() \ + } \ } #define KVIKIO_NVTX_SCOPED_RANGE_IMPL_3(message, payload_v, color) \ - kvikio::NvtxScopedRange KVIKIO_CONCAT(_kvikio_nvtx_range, __LINE__) \ + kvikio::detail::NvtxScopedRange KVIKIO_CONCAT(_kvikio_nvtx_range, __LINE__) \ { \ nvtx3::event_attributes \ { \ @@ -74,7 +74,7 @@ using NvtxColor = nvtx3::color; } \ } #define KVIKIO_NVTX_SCOPED_RANGE_IMPL_2(message, payload) \ - KVIKIO_NVTX_SCOPED_RANGE_IMPL_3(message, payload, kvikio::NvtxManager::default_color()) + KVIKIO_NVTX_SCOPED_RANGE_IMPL_3(message, payload, kvikio::detail::NvtxManager::default_color()) #define KVIKIO_NVTX_SCOPED_RANGE_SELECTOR(_1, _2, _3, NAME, ...) NAME #define KVIKIO_NVTX_SCOPED_RANGE_IMPL(...) \ KVIKIO_NVTX_SCOPED_RANGE_SELECTOR(__VA_ARGS__, \ @@ -84,8 +84,8 @@ using NvtxColor = nvtx3::color; (__VA_ARGS__) // Implementation of KVIKIO_NVTX_MARKER(message, payload) -#define KVIKIO_NVTX_MARKER_IMPL(message, payload_v) \ - nvtx3::mark_in(nvtx3::event_attributes{ \ +#define KVIKIO_NVTX_MARKER_IMPL(message, payload_v) \ + nvtx3::mark_in(nvtx3::event_attributes{ \ KVIKIO_REGISTER_STRING(message), nvtx3::payload{kvikio::convert_to_64bit(payload_v)}}) struct NvtxCallTag { @@ -210,16 +210,18 @@ struct NvtxIoPayload { NvtxRegisteredString file_path; std::size_t file_offset; std::size_t size; + std::size_t call_idx; }; -} // namespace kvikio +} // namespace kvikio::detail -NVTX3_DEFINE_SCHEMA_GET(kvikio::libkvikio_domain, - kvikio::NvtxIoPayload, +NVTX3_DEFINE_SCHEMA_GET(kvikio::detail::libkvikio_domain, + kvikio::detail::NvtxIoPayload, "KvikIONvtxIOPayload", NVTX_PAYLOAD_ENTRIES((file_path, TYPE_NVTX_REGISTERED_STRING_HANDLE, "file_path", "Path to the file"), (file_offset, TYPE_SIZE, "file_offset", "File offset"), - (size, TYPE_SIZE, "size", "Transferred bytes"))) + (size, TYPE_SIZE, "size", "Transferred bytes"), + (call_idx, TYPE_SIZE, "call_idx", "Call index"))) diff --git a/cpp/include/kvikio/detail/parallel_operation.hpp b/cpp/include/kvikio/detail/parallel_operation.hpp index 54f65914bc..6327176238 100644 --- a/cpp/include/kvikio/detail/parallel_operation.hpp +++ b/cpp/include/kvikio/detail/parallel_operation.hpp @@ -73,8 +73,8 @@ std::future submit_task(F op, std::size_t size, std::size_t file_offset, std::size_t devPtr_offset, - ThreadPool* thread_pool = &defaults::thread_pool(), - NvtxCallTag nvtx_call_tag = {}) + ThreadPool* thread_pool = &defaults::thread_pool(), + detail::NvtxCallTag nvtx_call_tag = {}) { static_assert(std::is_invocable_r_v submit_task(F op, decltype(devPtr_offset)>); return thread_pool->submit_task([=] { - KVIKIO_NVTX_SCOPED_RANGE("task", nvtx_call_tag.call_idx, nvtx_call_tag.color); + detail::NvtxIoPayload info{detail::NvtxManager::get_empty_registered_string(), + file_offset, + size, + nvtx_call_tag.call_idx}; + nvtx3::payload_data payload_data(info); + static detail::NvtxRegisteredString message{"Task"}; + detail::NvtxScopedRange range( + nvtx3::event_attributes(message, payload_data, nvtx_call_tag.color)); return op(buf, size, file_offset, devPtr_offset); }); } @@ -98,15 +105,11 @@ std::future submit_task(F op, */ template std::future submit_move_only_task(F op_move_only, - ThreadPool* thread_pool = &defaults::thread_pool(), - NvtxCallTag nvtx_call_tag = {}) + ThreadPool* thread_pool = &defaults::thread_pool()) { static_assert(std::is_invocable_r_v); auto op_copyable = make_copyable_lambda(std::move(op_move_only)); - return thread_pool->submit_task([=] { - KVIKIO_NVTX_SCOPED_RANGE("task", nvtx_call_tag.call_idx, nvtx_call_tag.color); - return op_copyable(); - }); + return thread_pool->submit_task([=] { return op_copyable(); }); } } // namespace detail @@ -134,8 +137,8 @@ std::future parallel_io(F op, std::size_t file_offset, std::size_t task_size, std::size_t devPtr_offset, - ThreadPool* thread_pool = &defaults::thread_pool(), - NvtxCallTag nvtx_call_tag = {}) + ThreadPool* thread_pool = &defaults::thread_pool(), + detail::NvtxCallTag nvtx_call_tag = {}) { KVIKIO_EXPECT(task_size > 0, "`task_size` must be positive", std::invalid_argument); KVIKIO_EXPECT(thread_pool != nullptr, "The thread pool must not be nullptr"); @@ -167,13 +170,21 @@ std::future parallel_io(F op, // 2) Submit the last task, which consists of performing the last I/O and waiting the previous // tasks. auto last_task = [=, tasks = std::move(tasks)]() mutable -> std::size_t { + detail::NvtxIoPayload info{detail::NvtxManager::get_empty_registered_string(), + file_offset, + size, + nvtx_call_tag.call_idx}; + nvtx3::payload_data payload_data(info); + static detail::NvtxRegisteredString message{"Last task"}; + detail::NvtxScopedRange range( + nvtx3::event_attributes(message, payload_data, nvtx_call_tag.color)); auto ret = op(buf, size, file_offset, devPtr_offset); for (auto& task : tasks) { ret += task.get(); } return ret; }; - return detail::submit_move_only_task(std::move(last_task), thread_pool, nvtx_call_tag); + return detail::submit_move_only_task(std::move(last_task), thread_pool); } } // namespace kvikio diff --git a/cpp/src/detail/nvtx.cpp b/cpp/src/detail/nvtx.cpp index 031ef09f0e..7de05e31f4 100644 --- a/cpp/src/detail/nvtx.cpp +++ b/cpp/src/detail/nvtx.cpp @@ -10,7 +10,7 @@ #include -namespace kvikio { +namespace kvikio::detail { NvtxCallTag::NvtxCallTag() : color(NvtxManager::default_color()) {} @@ -69,4 +69,4 @@ NvtxRegisteredString const& NvtxManager::get_empty_registered_string() return s; } -} // namespace kvikio +} // namespace kvikio::detail diff --git a/cpp/src/file_handle.cpp b/cpp/src/file_handle.cpp index 47e147c0a2..59daf1ce6d 100644 --- a/cpp/src/file_handle.cpp +++ b/cpp/src/file_handle.cpp @@ -230,8 +230,14 @@ std::future FileHandle::pread(void* buf, (_thread_pool != nullptr && thread_pool == &defaults::thread_pool()) ? _thread_pool : thread_pool; - auto nvtx_call_tag = NvtxManager::next_call_tag(); - KVIKIO_NVTX_FUNC_RANGE(size, nvtx_call_tag.color); + auto nvtx_call_tag = detail::NvtxManager::next_call_tag(); + detail::NvtxIoPayload info{ + detail::NvtxManager::get_empty_registered_string(), file_offset, size, nvtx_call_tag.call_idx}; + nvtx3::payload_data payload_data(info); + static detail::NvtxRegisteredString message{__PRETTY_FUNCTION__}; + detail::NvtxScopedRange range( + nvtx3::event_attributes(message, payload_data, nvtx_call_tag.color)); + if (is_host_memory(buf)) { auto op = [this](void* hostPtr_base, std::size_t size, @@ -298,7 +304,7 @@ std::future FileHandle::pwrite(void const* buf, (_thread_pool != nullptr && thread_pool == &defaults::thread_pool()) ? _thread_pool : thread_pool; - auto nvtx_call_tag = NvtxManager::next_call_tag(); + auto nvtx_call_tag = detail::NvtxManager::next_call_tag(); KVIKIO_NVTX_FUNC_RANGE(size, nvtx_call_tag.color); if (is_host_memory(buf)) { auto op = [this](void const* hostPtr_base, diff --git a/cpp/src/mmap.cpp b/cpp/src/mmap.cpp index b17e696c66..4b1c6517e1 100644 --- a/cpp/src/mmap.cpp +++ b/cpp/src/mmap.cpp @@ -424,7 +424,7 @@ std::future MmapHandle::pread(void* buf, auto actual_size = validate_and_adjust_read_args(size, offset); if (actual_size == 0) { return make_ready_future(actual_size); } - auto nvtx_call_tag = NvtxManager::next_call_tag(); + auto nvtx_call_tag = detail::NvtxManager::next_call_tag(); KVIKIO_NVTX_FUNC_RANGE(actual_size, nvtx_call_tag.color); auto const is_dst_buf_host_mem = is_host_memory(buf); diff --git a/cpp/src/remote_handle.cpp b/cpp/src/remote_handle.cpp index 661b8ddf59..0b339209b6 100644 --- a/cpp/src/remote_handle.cpp +++ b/cpp/src/remote_handle.cpp @@ -813,7 +813,7 @@ std::future RemoteHandle::pread(void* buf, ThreadPool* thread_pool) { KVIKIO_EXPECT(thread_pool != nullptr, "The thread pool must not be nullptr"); - auto nvtx_call_tag = NvtxManager::next_call_tag(); + auto nvtx_call_tag = detail::NvtxManager::next_call_tag(); KVIKIO_NVTX_FUNC_RANGE(size); auto task = [this](void* devPtr_base, std::size_t size, From 8183ece320c6de25c248a7dff9178ea263397dc8 Mon Sep 17 00:00:00 2001 From: Tianyu Liu Date: Wed, 25 Feb 2026 15:34:00 -0500 Subject: [PATCH 7/7] Update --- cpp/include/kvikio/detail/nvtx.hpp | 160 ++++++++++-------- .../kvikio/detail/parallel_operation.hpp | 30 +--- cpp/src/detail/nvtx.cpp | 25 +-- cpp/src/file_handle.cpp | 6 +- cpp/src/mmap.cpp | 2 +- cpp/src/remote_handle.cpp | 2 +- 6 files changed, 103 insertions(+), 122 deletions(-) diff --git a/cpp/include/kvikio/detail/nvtx.hpp b/cpp/include/kvikio/detail/nvtx.hpp index 3a7437db07..777bf1f694 100644 --- a/cpp/include/kvikio/detail/nvtx.hpp +++ b/cpp/include/kvikio/detail/nvtx.hpp @@ -25,6 +25,85 @@ using NvtxScopedRange = nvtx3::scoped_range_in; using NvtxRegisteredString = nvtx3::registered_string_in; using NvtxColor = nvtx3::color; +/** + * @brief Identifies a group of related NVTX ranges originating from a single pread/pwrite call. + * + * All tasks spawned by the same I/O call share one NvtxCallTag, giving them the same color and call + * index in the profiler timeline. This enables visual correlation of parallel tasks across worker + * threads. + */ +struct NvtxCallTag { + std::uint64_t call_idx{}; + NvtxColor color; +}; + +/** + * @brief Structured NVTX payload for I/O operations. + * + * Attached to NVTX ranges via `nvtx3::payload_data` to provide labeled fields in profiling tools + * such as Nsight Systems (requires NVTX 3.3+ and Nsight Systems 2024.6+). + */ +struct NvtxIoPayload { + NvtxRegisteredString file_path; + std::size_t file_offset; + std::size_t size; + std::uint64_t call_idx; +}; + +namespace nvtx { + +/** + * @brief Return the default color. + * + * @return Default color. + */ +const NvtxColor& default_color() noexcept; + +/** + * @brief Return the color at the given index from the internal color palette whose size n is a + * power of 2. The index may exceed the size of the color palette, in which case it wraps around, + * i.e. (idx mod n). + * + * @param idx The index value. + * @return The color picked from the internal color palette. + */ +NvtxColor const& get_color_by_index(std::uint64_t idx) noexcept; + +/** + * @brief Create a new call tag for correlating NVTX ranges from a single pread/pwrite call. + * + * Each invocation atomically increments a global counter and derives a color from the counter + * value. The counter wraps around at the maximum value of `std::uint64_t` (well-defined in C++). + * + * @return A call tag with a unique call index and its associated color. + */ +NvtxCallTag next_call_tag(); + +/** + * @brief Return a registered string with empty content. + * + * Useful as a placeholder for structured payload fields (e.g., file path) that are not yet + * populated. + * + * @return A reference to a statically allocated empty registered string. + */ +NvtxRegisteredString const& get_empty_registered_string(); + +} // namespace nvtx + +} // namespace kvikio::detail + +NVTX3_DEFINE_SCHEMA_GET(kvikio::detail::libkvikio_domain, + kvikio::detail::NvtxIoPayload, + "KvikIONvtxIOPayload", + NVTX_PAYLOAD_ENTRIES((file_path, + TYPE_NVTX_REGISTERED_STRING_HANDLE, + "file_path", + "Path to the file"), + (file_offset, TYPE_SIZE, "file_offset", "File offset"), + (size, TYPE_SIZE, "size", "Transferred bytes"), + (call_idx, TYPE_UINT64, "call_idx", "Call index"))) + // Macro to concatenate two tokens x and y. #define KVIKIO_CONCAT_HELPER(x, y) x##y #define KVIKIO_CONCAT(x, y) KVIKIO_CONCAT_HELPER(x, y) @@ -57,13 +136,13 @@ using NvtxColor = nvtx3::color; (__VA_ARGS__) // Implementation of KVIKIO_NVTX_SCOPED_RANGE(...) -#define KVIKIO_NVTX_SCOPED_RANGE_IMPL_1(message) \ - kvikio::detail::NvtxScopedRange KVIKIO_CONCAT(_kvikio_nvtx_range, __LINE__) \ - { \ - nvtx3::event_attributes \ - { \ - KVIKIO_REGISTER_STRING(message), kvikio::detail::NvtxManager::default_color() \ - } \ +#define KVIKIO_NVTX_SCOPED_RANGE_IMPL_1(message) \ + kvikio::detail::NvtxScopedRange KVIKIO_CONCAT(_kvikio_nvtx_range, __LINE__) \ + { \ + nvtx3::event_attributes \ + { \ + KVIKIO_REGISTER_STRING(message), kvikio::detail::nvtx::default_color() \ + } \ } #define KVIKIO_NVTX_SCOPED_RANGE_IMPL_3(message, payload_v, color) \ kvikio::detail::NvtxScopedRange KVIKIO_CONCAT(_kvikio_nvtx_range, __LINE__) \ @@ -74,7 +153,7 @@ using NvtxColor = nvtx3::color; } \ } #define KVIKIO_NVTX_SCOPED_RANGE_IMPL_2(message, payload) \ - KVIKIO_NVTX_SCOPED_RANGE_IMPL_3(message, payload, kvikio::detail::NvtxManager::default_color()) + KVIKIO_NVTX_SCOPED_RANGE_IMPL_3(message, payload, kvikio::detail::nvtx::default_color()) #define KVIKIO_NVTX_SCOPED_RANGE_SELECTOR(_1, _2, _3, NAME, ...) NAME #define KVIKIO_NVTX_SCOPED_RANGE_IMPL(...) \ KVIKIO_NVTX_SCOPED_RANGE_SELECTOR(__VA_ARGS__, \ @@ -88,51 +167,6 @@ using NvtxColor = nvtx3::color; nvtx3::mark_in(nvtx3::event_attributes{ \ KVIKIO_REGISTER_STRING(message), nvtx3::payload{kvikio::convert_to_64bit(payload_v)}}) -struct NvtxCallTag { - NvtxCallTag(); - NvtxCallTag(std::uint64_t call_idx, NvtxColor color); - - std::uint64_t call_idx{}; - NvtxColor color; -}; - -/** - * @brief Utility singleton class for NVTX annotation. - */ -class NvtxManager { - public: - static NvtxManager& instance() noexcept; - - NvtxManager(NvtxManager const&) = delete; - NvtxManager& operator=(NvtxManager const&) = delete; - NvtxManager(NvtxManager&&) = delete; - NvtxManager& operator=(NvtxManager&&) = delete; - - /** - * @brief Return the default color. - * - * @return Default color. - */ - static const NvtxColor& default_color() noexcept; - - /** - * @brief Return the color at the given index from the internal color palette whose size n is a - * power of 2. The index may exceed the size of the color palette, in which case it wraps around, - * i.e. (idx mod n). - * - * @param idx The index value. - * @return The color picked from the internal color palette. - */ - static const NvtxColor& get_color_by_index(std::uint64_t idx) noexcept; - - static NvtxCallTag next_call_tag(); - - static NvtxRegisteredString const& get_empty_registered_string(); - - private: - NvtxManager() = default; -}; - /** * @brief Convenience macro for generating an NVTX range in the `libkvikio` domain from the lifetime * of a function. Can be used inside a regular function or a lambda expression. @@ -205,23 +239,3 @@ class NvtxManager { * ``` */ #define KVIKIO_NVTX_MARKER(message, payload) KVIKIO_NVTX_MARKER_IMPL(message, payload) - -struct NvtxIoPayload { - NvtxRegisteredString file_path; - std::size_t file_offset; - std::size_t size; - std::size_t call_idx; -}; - -} // namespace kvikio::detail - -NVTX3_DEFINE_SCHEMA_GET(kvikio::detail::libkvikio_domain, - kvikio::detail::NvtxIoPayload, - "KvikIONvtxIOPayload", - NVTX_PAYLOAD_ENTRIES((file_path, - TYPE_NVTX_REGISTERED_STRING_HANDLE, - "file_path", - "Path to the file"), - (file_offset, TYPE_SIZE, "file_offset", "File offset"), - (size, TYPE_SIZE, "size", "Transferred bytes"), - (call_idx, TYPE_SIZE, "call_idx", "Call index"))) diff --git a/cpp/include/kvikio/detail/parallel_operation.hpp b/cpp/include/kvikio/detail/parallel_operation.hpp index 6327176238..187dfbf624 100644 --- a/cpp/include/kvikio/detail/parallel_operation.hpp +++ b/cpp/include/kvikio/detail/parallel_operation.hpp @@ -44,24 +44,6 @@ auto make_copyable_lambda(F op) [sp](auto&&... args) -> decltype(auto) { return (*sp)(std::forward(args)...); }; } -/** - * @brief Determine the NVTX color and call index. They are used to identify tasks from different - * pread/pwrite calls. Tasks from the same pread/pwrite call are given the same color and call - * index. The call index is atomically incremented on each pread/pwrite call, and will wrap around - * once it reaches the maximum value the integer type `std::uint64_t` can hold (this overflow - * behavior is well-defined in C++). The color is picked from an internal color palette according to - * the call index value. - * - * @return A pair of NVTX color and call index. - */ -// inline const std::pair get_next_color_and_call_idx() noexcept -// { -// static std::atomic_uint64_t call_counter{1ull}; -// auto call_idx = call_counter.fetch_add(1ull, std::memory_order_relaxed); -// auto& nvtx_color = NvtxManager::get_color_by_index(call_idx); -// return {nvtx_color, call_idx}; -// } - /** * @brief Submit the task callable to the underlying thread pool. * @@ -84,10 +66,8 @@ std::future submit_task(F op, decltype(devPtr_offset)>); return thread_pool->submit_task([=] { - detail::NvtxIoPayload info{detail::NvtxManager::get_empty_registered_string(), - file_offset, - size, - nvtx_call_tag.call_idx}; + detail::NvtxIoPayload info{ + detail::nvtx::get_empty_registered_string(), file_offset, size, nvtx_call_tag.call_idx}; nvtx3::payload_data payload_data(info); static detail::NvtxRegisteredString message{"Task"}; detail::NvtxScopedRange range( @@ -170,10 +150,8 @@ std::future parallel_io(F op, // 2) Submit the last task, which consists of performing the last I/O and waiting the previous // tasks. auto last_task = [=, tasks = std::move(tasks)]() mutable -> std::size_t { - detail::NvtxIoPayload info{detail::NvtxManager::get_empty_registered_string(), - file_offset, - size, - nvtx_call_tag.call_idx}; + detail::NvtxIoPayload info{ + detail::nvtx::get_empty_registered_string(), file_offset, size, nvtx_call_tag.call_idx}; nvtx3::payload_data payload_data(info); static detail::NvtxRegisteredString message{"Last task"}; detail::NvtxScopedRange range( diff --git a/cpp/src/detail/nvtx.cpp b/cpp/src/detail/nvtx.cpp index 7de05e31f4..05cd0720f4 100644 --- a/cpp/src/detail/nvtx.cpp +++ b/cpp/src/detail/nvtx.cpp @@ -12,26 +12,14 @@ namespace kvikio::detail { -NvtxCallTag::NvtxCallTag() : color(NvtxManager::default_color()) {} - -NvtxCallTag::NvtxCallTag(std::uint64_t a_call_idx, NvtxColor a_color) - : call_idx(a_call_idx), color(a_color) -{ -} - -NvtxManager& NvtxManager::instance() noexcept -{ - static NvtxManager _instance; - return _instance; -} - -const NvtxColor& NvtxManager::default_color() noexcept +namespace nvtx { +const NvtxColor& default_color() noexcept { static NvtxColor default_color{nvtx3::argb{0, 255, 255, 255}}; return default_color; } -const NvtxColor& NvtxManager::get_color_by_index(std::uint64_t idx) noexcept +const NvtxColor& get_color_by_index(std::uint64_t idx) noexcept { constexpr std::size_t num_color{16}; static_assert((num_color & (num_color - 1)) == 0); // Is power of 2 @@ -55,18 +43,19 @@ const NvtxColor& NvtxManager::get_color_by_index(std::uint64_t idx) noexcept return color_palette[safe_idx]; } -NvtxCallTag NvtxManager::next_call_tag() +NvtxCallTag next_call_tag() { static std::atomic_uint64_t call_counter{1ull}; auto call_idx = call_counter.fetch_add(1ull, std::memory_order_relaxed); - auto& nvtx_color = NvtxManager::get_color_by_index(call_idx); + auto& nvtx_color = get_color_by_index(call_idx); return {call_idx, nvtx_color}; } -NvtxRegisteredString const& NvtxManager::get_empty_registered_string() +NvtxRegisteredString const& get_empty_registered_string() { static NvtxRegisteredString s(""); return s; } +} // namespace nvtx } // namespace kvikio::detail diff --git a/cpp/src/file_handle.cpp b/cpp/src/file_handle.cpp index 59daf1ce6d..6357b7d133 100644 --- a/cpp/src/file_handle.cpp +++ b/cpp/src/file_handle.cpp @@ -230,9 +230,9 @@ std::future FileHandle::pread(void* buf, (_thread_pool != nullptr && thread_pool == &defaults::thread_pool()) ? _thread_pool : thread_pool; - auto nvtx_call_tag = detail::NvtxManager::next_call_tag(); + auto nvtx_call_tag = detail::nvtx::next_call_tag(); detail::NvtxIoPayload info{ - detail::NvtxManager::get_empty_registered_string(), file_offset, size, nvtx_call_tag.call_idx}; + detail::nvtx::get_empty_registered_string(), file_offset, size, nvtx_call_tag.call_idx}; nvtx3::payload_data payload_data(info); static detail::NvtxRegisteredString message{__PRETTY_FUNCTION__}; detail::NvtxScopedRange range( @@ -304,7 +304,7 @@ std::future FileHandle::pwrite(void const* buf, (_thread_pool != nullptr && thread_pool == &defaults::thread_pool()) ? _thread_pool : thread_pool; - auto nvtx_call_tag = detail::NvtxManager::next_call_tag(); + auto nvtx_call_tag = detail::nvtx::next_call_tag(); KVIKIO_NVTX_FUNC_RANGE(size, nvtx_call_tag.color); if (is_host_memory(buf)) { auto op = [this](void const* hostPtr_base, diff --git a/cpp/src/mmap.cpp b/cpp/src/mmap.cpp index 4b1c6517e1..b538e2c542 100644 --- a/cpp/src/mmap.cpp +++ b/cpp/src/mmap.cpp @@ -424,7 +424,7 @@ std::future MmapHandle::pread(void* buf, auto actual_size = validate_and_adjust_read_args(size, offset); if (actual_size == 0) { return make_ready_future(actual_size); } - auto nvtx_call_tag = detail::NvtxManager::next_call_tag(); + auto nvtx_call_tag = detail::nvtx::next_call_tag(); KVIKIO_NVTX_FUNC_RANGE(actual_size, nvtx_call_tag.color); auto const is_dst_buf_host_mem = is_host_memory(buf); diff --git a/cpp/src/remote_handle.cpp b/cpp/src/remote_handle.cpp index 0b339209b6..b19a846b02 100644 --- a/cpp/src/remote_handle.cpp +++ b/cpp/src/remote_handle.cpp @@ -813,7 +813,7 @@ std::future RemoteHandle::pread(void* buf, ThreadPool* thread_pool) { KVIKIO_EXPECT(thread_pool != nullptr, "The thread pool must not be nullptr"); - auto nvtx_call_tag = detail::NvtxManager::next_call_tag(); + auto nvtx_call_tag = detail::nvtx::next_call_tag(); KVIKIO_NVTX_FUNC_RANGE(size); auto task = [this](void* devPtr_base, std::size_t size,