Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
04aff83
Add a new remote I/O client based on libcurl poll-based multi API
kingcrimsontianyu Jan 12, 2026
8d976b2
Update
kingcrimsontianyu Jan 12, 2026
1715f03
Update
kingcrimsontianyu Jan 13, 2026
1863d63
Make the bounce buffer move constructible and move assignable
kingcrimsontianyu Jan 13, 2026
65299f6
Update
kingcrimsontianyu Jan 13, 2026
67d28a9
Add detailed unit test
kingcrimsontianyu Jan 13, 2026
9e0fee5
Merge branch 'relax-bounce-buffer' into remote-io-poll-based
kingcrimsontianyu Jan 13, 2026
3d47c76
Update
kingcrimsontianyu Jan 14, 2026
345821f
Update
kingcrimsontianyu Jan 14, 2026
8305ee4
Update
kingcrimsontianyu Jan 14, 2026
cf1341d
Update
kingcrimsontianyu Jan 14, 2026
68d8284
Update
kingcrimsontianyu Jan 14, 2026
ca8bbc8
Update
kingcrimsontianyu Jan 14, 2026
87c0b9f
Update
kingcrimsontianyu Jan 14, 2026
7e7f6c8
Update
kingcrimsontianyu Jan 14, 2026
d3ba6f4
Update
kingcrimsontianyu Jan 14, 2026
2e65baf
Update
kingcrimsontianyu Jan 14, 2026
019c472
Update
kingcrimsontianyu Jan 14, 2026
fd49875
Merge branch 'main' into relax-bounce-buffer
kingcrimsontianyu Jan 14, 2026
4f43e5f
Merge branch 'relax-bounce-buffer' into remote-io-poll-based
kingcrimsontianyu Jan 14, 2026
cf1dc29
Impl batch copy
kingcrimsontianyu Jan 15, 2026
4e7eed3
Update
kingcrimsontianyu Jan 15, 2026
9b860fe
Make num_bounce_buffer configurable
kingcrimsontianyu Jan 16, 2026
db8f49b
Remove temporary C++ unit test
kingcrimsontianyu Jan 16, 2026
edb7e3b
Add Python API
kingcrimsontianyu Jan 16, 2026
f93c833
Fix a couple of minor issues
kingcrimsontianyu Jan 16, 2026
c024738
Update
kingcrimsontianyu Jan 16, 2026
a1f1f9b
Add more Doxygen C++ comments
kingcrimsontianyu Jan 16, 2026
e799a24
Update Python doc
kingcrimsontianyu Jan 16, 2026
d2a376e
Update the existing Python S3 unit test to include the new backend
kingcrimsontianyu Jan 16, 2026
1fd38e3
Fix header inclusion trivial issues
kingcrimsontianyu Jan 16, 2026
fde0601
Update
kingcrimsontianyu Jan 16, 2026
9d75bf8
Use addressoff to handle special cases
kingcrimsontianyu Jan 16, 2026
0671038
Merge branch 'relax-bounce-buffer' into remote-io-poll-based
kingcrimsontianyu Jan 16, 2026
267ee0c
Enable endpoint polymorphism to support more endpoint types
kingcrimsontianyu Jan 16, 2026
5a5abab
Bug fixes
kingcrimsontianyu Jan 16, 2026
499cd63
Merge branch 'main' into relax-bounce-buffer
kingcrimsontianyu Jan 21, 2026
6234e7c
Merge branch 'relax-bounce-buffer' into remote-io-poll-based
kingcrimsontianyu Jan 21, 2026
b6652a0
Move utility functions from hpp to cpp
kingcrimsontianyu Jan 21, 2026
c5e0a39
Merge branch 'main' into remote-io-poll-based
kingcrimsontianyu Jan 28, 2026
9937063
Make dtor noexcept
kingcrimsontianyu Jan 28, 2026
0aedfe5
Merge branch 'main' into remote-io-poll-based
kingcrimsontianyu Jan 30, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# =============================================================================
# cmake-format: off
# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION.
# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION.
# SPDX-License-Identifier: Apache-2.0
# cmake-format: on
# =============================================================================
Expand Down Expand Up @@ -163,8 +163,16 @@ set(SOURCES
)

if(KvikIO_REMOTE_SUPPORT)
list(APPEND SOURCES "src/hdfs.cpp" "src/remote_handle.cpp" "src/detail/remote_handle.cpp"
"src/detail/tls.cpp" "src/detail/url.cpp" "src/shim/libcurl.cpp"
list(
APPEND
SOURCES
"src/hdfs.cpp"
"src/remote_handle.cpp"
"src/detail/remote_handle.cpp"
"src/detail/remote_handle_poll_based.cpp"
"src/detail/tls.cpp"
"src/detail/url.cpp"
"src/shim/libcurl.cpp"
)
endif()

Expand Down
61 changes: 60 additions & 1 deletion cpp/include/kvikio/defaults.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION.
* SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

Expand All @@ -16,6 +16,7 @@
#include <kvikio/compat_mode.hpp>
#include <kvikio/error.hpp>
#include <kvikio/http_status_codes.hpp>
#include <kvikio/remote_backend_type.hpp>
#include <kvikio/shim/cufile.hpp>
#include <kvikio/threadpool_wrapper.hpp>

Expand Down Expand Up @@ -49,6 +50,9 @@ bool getenv_or(std::string_view env_var_name, bool default_val);
template <>
CompatMode getenv_or(std::string_view env_var_name, CompatMode default_val);

template <>
RemoteBackendType getenv_or(std::string_view env_var_name, RemoteBackendType default_val);

template <>
std::vector<int> getenv_or(std::string_view env_var_name, std::vector<int> default_val);

Expand Down Expand Up @@ -122,6 +126,9 @@ class defaults {
bool _auto_direct_io_read;
bool _auto_direct_io_write;
bool _thread_pool_per_block_device;
RemoteBackendType _remote_backend;
std::size_t _remote_max_connections;
std::size_t _num_bounce_buffers;

static unsigned int get_num_threads_from_env();

Expand Down Expand Up @@ -417,6 +424,58 @@ class defaults {
* thread pool for all I/O operations.
*/
static void set_thread_pool_per_block_device(bool flag);

/**
* @brief Get the current remote I/O backend type.
*
* @return The currently configured RemoteBackendType.
*/
[[nodiscard]] static RemoteBackendType remote_backend();

/**
* @brief Set the remote I/O backend type.
*
* Note: Changing this after creating a RemoteHandle has no effect on existing handles. The
* backend is determined at RemoteHandle construction time.
*
* @param remote_backend The backend type to use for new RemoteHandle instances.
*/
static void set_remote_backend(RemoteBackendType remote_backend);

/**
* @brief Get the maximum number of concurrent connections for poll-based remote I/O.
*
* Only applies when using RemoteBackendType::LIBCURL_MULTI_POLL.
*
* @return Maximum number of concurrent connections.
*/
[[nodiscard]] static std::size_t remote_max_connections();

/**
* @brief Set the maximum number of concurrent connections for poll-based remote I/O.
*
* Only applies when using RemoteBackendType::LIBCURL_MULTI_POLL.
*
* @param remote_max_connections Maximum concurrent connections (must be positive).
*/
static void set_remote_max_connections(std::size_t remote_max_connections);

/**
* @brief Get the number of bounce buffers used per connection for poll-based remote I/O.
*
* Controls k-way buffering: higher values allow more overlap between network I/O and H2D
* transfers but consume more pinned memory.
*
* @return Number of bounce buffers per connection.
*/
[[nodiscard]] static std::size_t num_bounce_buffers();

/**
* @brief Set the number of bounce buffers used per connection for poll-based remote I/O.
*
* @param num_bounce_buffers Number of bounce buffers per connection (must be positive).
*/
static void set_num_bounce_buffers(std::size_t num_bounce_buffers);
};

} // namespace kvikio
51 changes: 50 additions & 1 deletion cpp/include/kvikio/detail/remote_handle.hpp
Original file line number Diff line number Diff line change
@@ -1,12 +1,52 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
* SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once

#include <cstring>

#include <kvikio/shim/libcurl.hpp>

namespace kvikio::detail {
/**
* @brief Check a libcurl easy interface return code and throw on error.
*
* @param err_code The CURLcode to check.
* @exception std::runtime_error if err_code is not CURLE_OK.
*/
#define KVIKIO_CHECK_CURL_EASY(err_code) \
kvikio::detail::check_curl_easy(err_code, __FILE__, __LINE__)

/**
* @brief Check a libcurl multi interface return code and throw on error.
*
* @param err_code The CURLMcode to check.
* @exception std::runtime_error if err_code is not CURLM_OK.
*/
#define KVIKIO_CHECK_CURL_MULTI(err_code) \
kvikio::detail::check_curl_multi(err_code, __FILE__, __LINE__)

/**
* @brief Check a libcurl easy interface return code and throw on error.
*
* @param err_code The CURLcode to check.
* @param filename Source filename for error reporting.
* @param line_number Source line number for error reporting.
* @exception std::runtime_error if err_code is not CURLE_OK.
*/
void check_curl_easy(CURLcode err_code, char const* filename, int line_number);

/**
* @brief Check a libcurl multi interface return code and throw on error.
*
* @param err_code The CURLMcode to check.
* @param filename Source filename for error reporting.
* @param line_number Source line number for error reporting.
* @exception std::runtime_error if err_code is not CURLM_OK.
*/
void check_curl_multi(CURLMcode err_code, char const* filename, int line_number);

/**
* @brief Callback for `CURLOPT_WRITEFUNCTION` that copies received data into a `std::string`.
*
Expand All @@ -20,4 +60,13 @@ std::size_t callback_get_string_response(char* data,
std::size_t size,
std::size_t num_bytes,
void* userdata);

/**
* @brief Set up the range request for libcurl. Use this method when HTTP range request is supposed.
*
* @param curl A curl handle
* @param file_offset File offset
* @param size read size
*/
void setup_range_request_impl(CurlHandle& curl, std::size_t file_offset, std::size_t size);
} // namespace kvikio::detail
133 changes: 133 additions & 0 deletions cpp/include/kvikio/detail/remote_handle_poll_based.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once

#include <memory>

#include <kvikio/bounce_buffer.hpp>
#include <kvikio/remote_handle.hpp>
#include <kvikio/shim/libcurl.hpp>

namespace kvikio::detail {

/**
* @brief Manages a rotating set of bounce buffers for overlapping network I/O with H2D transfers.
*
* This class implements k-way buffering, rotating through buffers circularly: while one buffer
* receives data from the network, previously filled buffers can be asynchronously copied to device
* memory. When all buffers have been used, the class synchronizes the CUDA stream before reusing
* buffers.
*/
class BounceBufferManager {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to use this everywhere we need a bounce buffer? If so, I think it would be good to move BounceBufferManager out of this PR and submit a separate PR that introduces BounceBufferManager and uses it consistently across the KvikIO?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. I agree. This will generalize #520 (double buffering), and benefit existing local and remote I/O handles based on pread/io_uring/easy handle.

public:
/**
* @brief Construct a BounceBufferManager with the specified number of bounce buffers.
*
* @param num_bounce_buffers Number of bounce buffers to allocate from the pool.
*/
BounceBufferManager(std::size_t num_bounce_buffers);

/**
* @brief Get a pointer to the current bounce buffer's data.
*
* @return Pointer to the current buffer's memory.
*/
void* data() const noexcept;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it doesn't require too many casts, it would be good to use std::byte* instead void* throughout.

Suggested change
void* data() const noexcept;
std::byte* data() const noexcept;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed.
Currently we have quite a number of places where void* has been used for buffers from function parameters and class data members. There are other places where char* is used to facilitate interaction with libcurl API. Perhaps for the public interface we still use void*, but internally wherever applicable, we stick to std::byte*. Let's defer this to a future, modernization PR.


/**
* @brief Copy data from the current bounce buffer to device memory and rotate to the next buffer.
*
* Issues an asynchronous H2D copy and advances to the next buffer. When wrapping around to buffer
* 0, synchronizes the stream to ensure all previous copies have completed before reuse.
*
* @param dst Device memory destination pointer.
* @param size Number of bytes to copy.
* @param stream CUDA stream for the asynchronous copy.
* @exception kvikio::CUfileException if size exceeds bounce buffer capacity.
*/
void copy(void* dst, std::size_t size, CUstream stream);

private:
std::size_t _bounce_buffer_idx{};
std::size_t _num_bounce_buffers{};
std::vector<CudaPinnedBounceBufferPool::Buffer> _bounce_buffers;
};

/**
* @brief Context for tracking the state of a single chunked transfer.
*
* Each concurrent connection has an associated TransferContext that tracks the destination buffer,
* transfer progress, and manages optional bounce buffers for GPU destinations.
*/
struct TransferContext {
bool overflow_error{};
bool is_host_mem{};
char* buf{};
CurlHandle* curl_easy_handle{};
std::size_t chunk_size{};
std::size_t bytes_transferred{};
std::optional<BounceBufferManager> _bounce_buffer_manager;
};

/**
* @brief Poll-based remote file handle using libcurl's multi interface.
*
* This class provides an alternative to the thread-pool-based remote I/O by using libcurl's multi
* interface with curl_multi_poll() for managing concurrent connections. It implements chunked
* parallel downloads with k-way buffering to overlap network transfers with host-to-device memory
* copies.
*
* @note Thread safety: The pread() method is protected by a mutex, making it safe to call from
* multiple threads, though calls will be serialized.
*/
class RemoteHandlePollBased {
private:
CURLM* _multi;
std::size_t _max_connections;
std::vector<std::unique_ptr<CurlHandle>> _curl_easy_handles;
std::vector<TransferContext> _transfer_ctxs;
RemoteEndpoint* _endpoint;
mutable std::mutex _mutex;

public:
/**
* @brief Construct a poll-based remote handle.
*
* Initializes the libcurl multi handle and creates the specified number of easy handles for
* concurrent transfers.
*
* @param endpoint Non-owning pointer to the remote endpoint. Must outlive this object.
* @param max_connections Maximum number of concurrent connections to use.
* @exception kvikio::CUfileException if task_size exceeds bounce_buffer_size.
* @exception kvikio::CUfileException if libcurl multi initialization fails.
*/
RemoteHandlePollBased(RemoteEndpoint* endpoint, std::size_t max_connections);

/**
* @brief Destructor that cleans up libcurl multi resources.
*
* Removes all easy handles from the multi handle and performs cleanup. Errors during cleanup are
* logged but do not throw.
*/
~RemoteHandlePollBased() noexcept;

/**
* @brief Read data from the remote file into a buffer.
*
* Performs a parallel chunked read using multiple concurrent HTTP range requests. For device
* memory destinations, uses bounce buffers with k-way buffering to overlap network I/O with H2D
* transfers.
*
* @param buf Destination buffer (host or device memory).
* @param size Number of bytes to read.
* @param file_offset Offset in the remote file to start reading from.
* @return Number of bytes actually read.
* @exception std::overflow_error if the server returns more data than expected (may indicate the
* server doesn't support range requests).
* @exception std::runtime_error on libcurl errors.
*/
std::size_t pread(void* buf, std::size_t size, std::size_t file_offset = 0);
};
} // namespace kvikio::detail
27 changes: 27 additions & 0 deletions cpp/include/kvikio/remote_backend_type.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once

#include <cstdint>

namespace kvikio {

/**
* @brief Enum representing the backend implementation for remote file I/O operations.
*
* KvikIO supports multiple libcurl-based backends for fetching data from remote endpoints (S3,
* HTTP, etc.). Each backend has different performance characteristics.
*/
enum class RemoteBackendType : uint8_t {
LIBCURL_EASY, ///< Use libcurl's easy interface with a thread pool for parallelism. Each chunk is
///< fetched by a separate thread using blocking curl_easy_perform() calls. This is
///< the default backend.
LIBCURL_MULTI_POLL, ///< Use libcurl's multi interface with poll-based concurrent transfers. A
///< single call manages multiple concurrent connections using
///< curl_multi_poll(), with k-way buffering to overlap network I/O with
///< host-to-device transfers.
};

} // namespace kvikio
15 changes: 12 additions & 3 deletions cpp/include/kvikio/remote_handle.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
* SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
Expand All @@ -13,6 +13,7 @@

#include <kvikio/defaults.hpp>
#include <kvikio/error.hpp>
#include <kvikio/remote_backend_type.hpp>
#include <kvikio/threadpool_wrapper.hpp>
#include <kvikio/utils.hpp>

Expand Down Expand Up @@ -291,13 +292,20 @@ class S3EndpointWithPresignedUrl : public RemoteEndpoint {
static bool is_url_valid(std::string const& url) noexcept;
};

// Forward declaration
namespace detail {
class RemoteHandlePollBased;
}

/**
* @brief Handle of remote file.
*/
class RemoteHandle {
private:
std::unique_ptr<RemoteEndpoint> _endpoint;
std::size_t _nbytes;
std::unique_ptr<detail::RemoteHandlePollBased> _poll_handle;
RemoteBackendType _remote_backend_type;

public:
/**
Expand Down Expand Up @@ -400,8 +408,9 @@ class RemoteHandle {
RemoteHandle(std::unique_ptr<RemoteEndpoint> endpoint);

// A remote handle is moveable but not copyable.
RemoteHandle(RemoteHandle&& o) = default;
RemoteHandle& operator=(RemoteHandle&& o) = default;
~RemoteHandle();
RemoteHandle(RemoteHandle&& o);
RemoteHandle& operator=(RemoteHandle&& o);
RemoteHandle(RemoteHandle const&) = delete;
RemoteHandle& operator=(RemoteHandle const&) = delete;

Expand Down
Loading