diff --git a/libibverbs/examples/devinfo.c b/libibverbs/examples/devinfo.c index c245b1f28..f44dd18b8 100644 --- a/libibverbs/examples/devinfo.c +++ b/libibverbs/examples/devinfo.c @@ -585,6 +585,7 @@ static int print_hca_cap(struct ibv_device *ib_dev, uint8_t ib_port) printf("\tmax_srq_sge:\t\t\t%d\n", device_attr.orig_attr.max_srq_sge); } printf("\tmax_pkeys:\t\t\t%d\n", device_attr.orig_attr.max_pkeys); + printf("\tmax_comp_cntr:\t\t\t\t%d\n", device_attr.max_comp_cntr); printf("\tlocal_ca_ack_delay:\t\t%d\n", device_attr.orig_attr.local_ca_ack_delay); print_odp_caps(&device_attr); diff --git a/libibverbs/man/CMakeLists.txt b/libibverbs/man/CMakeLists.txt index f498c1532..158a5127f 100644 --- a/libibverbs/man/CMakeLists.txt +++ b/libibverbs/man/CMakeLists.txt @@ -14,7 +14,9 @@ rdma_man_pages( ibv_create_ah.3 ibv_create_ah_from_wc.3 ibv_create_comp_channel.3 + ibv_create_comp_cntr.3.md ibv_create_counters.3.md + ibv_qp_attach_comp_cntr.3.md ibv_create_cq.3 ibv_create_cq_ex.3 ibv_modify_cq.3 @@ -98,6 +100,11 @@ rdma_alias_man_pages( ibv_create_ah.3 ibv_destroy_ah.3 ibv_create_ah_from_wc.3 ibv_init_ah_from_wc.3 ibv_create_comp_channel.3 ibv_destroy_comp_channel.3 + ibv_create_comp_cntr.3 ibv_destroy_comp_cntr.3 + ibv_create_comp_cntr.3 ibv_set_comp_cntr.3 + ibv_create_comp_cntr.3 ibv_set_err_comp_cntr.3 + ibv_create_comp_cntr.3 ibv_inc_comp_cntr.3 + ibv_create_comp_cntr.3 ibv_inc_err_comp_cntr.3 ibv_create_counters.3 ibv_destroy_counters.3 ibv_create_cq.3 ibv_destroy_cq.3 ibv_create_flow.3 ibv_destroy_flow.3 diff --git a/libibverbs/man/ibv_create_comp_cntr.3.md b/libibverbs/man/ibv_create_comp_cntr.3.md new file mode 100644 index 000000000..80fca5315 --- /dev/null +++ b/libibverbs/man/ibv_create_comp_cntr.3.md @@ -0,0 +1,245 @@ +--- +date: 2026-02-09 +footer: libibverbs +header: "Libibverbs Programmer's Manual" +layout: page +license: 'Licensed under the OpenIB.org BSD license (FreeBSD Variant) - See COPYING.md' +section: 3 +title: ibv_create_comp_cntr +tagline: Verbs +--- + +# NAME + +**ibv_create_comp_cntr**, **ibv_destroy_comp_cntr** - Create or destroy a +completion counter + +**ibv_set_comp_cntr**, **ibv_set_err_comp_cntr** - Set the value of a +completion or error counter + +**ibv_inc_comp_cntr**, **ibv_inc_err_comp_cntr** - Increment a completion or +error counter + +# SYNOPSIS + +```c +#include + +struct ibv_comp_cntr *ibv_create_comp_cntr(struct ibv_context *context, + struct ibv_comp_cntr_init_attr *cc_attr); + +int ibv_destroy_comp_cntr(struct ibv_comp_cntr *comp_cntr); + +int ibv_set_comp_cntr(struct ibv_comp_cntr *comp_cntr, uint64_t value); +int ibv_set_err_comp_cntr(struct ibv_comp_cntr *comp_cntr, uint64_t value); +int ibv_inc_comp_cntr(struct ibv_comp_cntr *comp_cntr, uint64_t amount); +int ibv_inc_err_comp_cntr(struct ibv_comp_cntr *comp_cntr, uint64_t amount); +``` + +# DESCRIPTION + +Completion counters provide a lightweight completion mechanism as an +alternative or extension to completion queues (CQs). Rather than generating +individual completion queue entries, a completion counter tracks the aggregate +number of completed operations. This makes them well suited for applications +that need to know how many requests have completed without requiring +per-request details, such as credit based flow control or tracking responses +from remote peers. + +Each completion counter maintains two distinct 64-bit values: a completion +count that is incremented on successful completions, and an error count that +is incremented when operations complete in error. + +**ibv_create_comp_cntr**() allocates a new completion counter for the RDMA +device context *context*. The properties of the counter are defined by +*cc_attr*. On success, the returned **ibv_comp_cntr** structure contains +pointers to the completion and error count values. The maximum number of +completion counters a device supports is reported by the *max_comp_cntr* +field of **ibv_device_attr_ex**. + +**ibv_destroy_comp_cntr**() releases all resources associated with the +completion counter *comp_cntr*. The counter must not be attached to any QP +when destroyed. + +**ibv_set_comp_cntr**() sets the completion count of *comp_cntr* to *value*. + +**ibv_set_err_comp_cntr**() sets the error count of *comp_cntr* to *value*. + +**ibv_inc_comp_cntr**() increments the completion count of *comp_cntr* by +*amount*. + +**ibv_inc_err_comp_cntr**() increments the error count of *comp_cntr* by +*amount*. + +## External memory + +By default, the memory backing the counter values is allocated internally. +When the **IBV_COMP_CNTR_INIT_WITH_EXTERNAL_MEM** flag is set in +*ibv_comp_cntr_init_attr.flags*, the application provides its own memory for +the completion and error counts via the *comp_cntr_ext_mem* and +*err_cntr_ext_mem* fields. The external memory is described by an +**ibv_memory_location** structure which supports two modes: a virtual address +(**IBV_MEMORY_LOCATION_VA**), where the application supplies a direct pointer, or +a DMA-BUF reference (**IBV_MEMORY_LOCATION_DMABUF**), where the application +supplies a file descriptor and offset into an exported DMA-BUF. When using +DMA-BUF, the *ptr* field may also be set to provide a process-accessible +mapping of the memory; if provided, the *comp_count* and *err_count* pointers +in the returned **ibv_comp_cntr** will point to it. Using external memory +allows the counter values to reside in application-managed buffers or in +memory exported through DMA-BUF, enabling zero-copy observation of completion +progress by co-located processes or devices. + +# ARGUMENTS + +## ibv_comp_cntr + +```c +struct ibv_comp_cntr { + struct ibv_context *context; + uint32_t handle; + uint64_t *comp_count; + uint64_t *err_count; + uint64_t comp_count_max_value; + uint64_t err_count_max_value; +}; +``` + +*context* +: Device context associated with the completion counter. + +*handle* +: Kernel object handle for the completion counter. + +*comp_count* +: Pointer to the current successful completion count. When the counter + is backed by CPU-accessible memory, this pointer may be read directly + by the application. + +*err_count* +: Pointer to the current error completion count. When the counter is + backed by CPU-accessible memory, this pointer may be read directly + by the application. + +*comp_count_max_value* +: The maximum value the completion count can hold. A subsequent + increment that would exceed this value wraps the counter to zero. + +*err_count_max_value* +: The maximum value the error count can hold. A subsequent increment + that would exceed this value wraps the counter to zero. + +## ibv_comp_cntr_init_attr + +```c +struct ibv_comp_cntr_init_attr { + uint32_t comp_mask; + uint32_t flags; + struct ibv_memory_location comp_cntr_ext_mem; + struct ibv_memory_location err_cntr_ext_mem; +}; +``` + +*comp_mask* +: Bitmask specifying what fields in the structure are valid. + +*flags* +: Creation flags. The following flags are supported: + + **IBV_COMP_CNTR_INIT_WITH_EXTERNAL_MEM** - Use application-provided + memory for the counter values, as specified by *comp_cntr_ext_mem* + and *err_cntr_ext_mem*. + +*comp_cntr_ext_mem* +: Memory location for the completion count when using external memory. + +*err_cntr_ext_mem* +: Memory location for the error count when using external memory. + +## ibv_memory_location + +```c +enum ibv_memory_location_type { + IBV_MEMORY_LOCATION_VA, + IBV_MEMORY_LOCATION_DMABUF, +}; + +struct ibv_memory_location { + uint8_t *ptr; + struct { + uint64_t offset; + int32_t fd; + uint32_t reserved; + } dmabuf; + uint8_t type; + uint8_t reserved[7]; +}; +``` + +*type* +: The type of memory location. **IBV_MEMORY_LOCATION_VA** for a virtual + address, or **IBV_MEMORY_LOCATION_DMABUF** for a DMA-BUF reference. + +*ptr* +: Virtual address pointer. Required when type is + **IBV_MEMORY_LOCATION_VA**. When type is + **IBV_MEMORY_LOCATION_DMABUF**, may optionally be set to provide a + process-accessible mapping of the DMA-BUF memory. + +*dmabuf.fd* +: DMA-BUF file descriptor (used when type is + **IBV_MEMORY_LOCATION_DMABUF**). + +*dmabuf.offset* +: Offset within the DMA-BUF. + +# RETURN VALUE + +**ibv_create_comp_cntr**() returns a pointer to the allocated ibv_comp_cntr +object, or NULL if the request fails (and sets errno to indicate the failure +reason). + +**ibv_destroy_comp_cntr**(), **ibv_set_comp_cntr**(), +**ibv_set_err_comp_cntr**(), **ibv_inc_comp_cntr**(), and +**ibv_inc_err_comp_cntr**() return 0 on success, or the value of errno on +failure (which indicates the failure reason). + +# ERRORS + +ENOTSUP +: Completion counters are not supported on this device. + +ENOMEM +: Not enough resources to create the completion counter. + +EINVAL +: Invalid argument(s) passed. + +EBUSY +: The completion counter is still attached to a QP + (**ibv_destroy_comp_cntr**() only). + +# NOTES + +Counter values should not be modified directly by writing to the memory +pointed to by *comp_count* or *err_count*. Applications must use the provided +API functions (**ibv_set_comp_cntr**(), **ibv_set_err_comp_cntr**(), +**ibv_inc_comp_cntr**(), **ibv_inc_err_comp_cntr**()) to update counter +values. + +Updates made to counter values (e.g. via **ibv_set_comp_cntr**() or +**ibv_inc_comp_cntr**()) may not be immediately visible when reading the +counter. A small delay may occur between the update and the observed value. +However, the final updated value will eventually be reflected. + +Applications should ensure that the counter value is stable before calling +**ibv_set_comp_cntr**() or **ibv_set_err_comp_cntr**(). Otherwise, concurrent +updates may be lost. + +# SEE ALSO + +**ibv_qp_attach_comp_cntr**(3), **ibv_create_cq**(3), +**ibv_create_cq_ex**(3), **ibv_create_qp**(3) + +# AUTHORS + +Michael Margolin diff --git a/libibverbs/man/ibv_qp_attach_comp_cntr.3.md b/libibverbs/man/ibv_qp_attach_comp_cntr.3.md new file mode 100644 index 000000000..3f82ffa51 --- /dev/null +++ b/libibverbs/man/ibv_qp_attach_comp_cntr.3.md @@ -0,0 +1,118 @@ +--- +date: 2026-02-09 +footer: libibverbs +header: "Libibverbs Programmer's Manual" +layout: page +license: 'Licensed under the OpenIB.org BSD license (FreeBSD Variant) - See COPYING.md' +section: 3 +title: ibv_qp_attach_comp_cntr +tagline: Verbs +--- + +# NAME + +**ibv_qp_attach_comp_cntr** - Attach a completion counter to a QP + +# SYNOPSIS + +```c +#include + +int ibv_qp_attach_comp_cntr(struct ibv_qp *qp, + struct ibv_comp_cntr *comp_cntr, + struct ibv_comp_cntr_attach_attr *attr); +``` + +# DESCRIPTION + +**ibv_qp_attach_comp_cntr**() attaches the completion counter *comp_cntr* to +the queue pair *qp*. The *attr* argument specifies which operation types +should update the counter. + +The QP must be in **IBV_QPS_RESET** or **IBV_QPS_INIT** state when attaching +a completion counter. Attempting to attach a counter to a QP in any other +state will fail with EINVAL. + +The completion counter starts collecting values for the specified QP once +attached. Attaching the same completion counter to multiple QPs will +accumulate values from all attached QPs into the same counter. + +Multiple completion counters can be attached to the same QP, provided their +*op_mask* values do not overlap. Attempting to attach a counter with an +*op_mask* that conflicts with an already attached counter will fail. + +The *op_mask* field controls which operation completions are counted. Local +operations (**IBV_COMP_CNTR_ATTACH_OP_SEND**, **IBV_COMP_CNTR_ATTACH_OP_RECV**, +**IBV_COMP_CNTR_ATTACH_OP_RDMA_READ**, **IBV_COMP_CNTR_ATTACH_OP_RDMA_WRITE**) +count completions initiated by the local QP. Remote operations +(**IBV_COMP_CNTR_ATTACH_OP_REMOTE_RDMA_READ**, +**IBV_COMP_CNTR_ATTACH_OP_REMOTE_RDMA_WRITE**) count completions of incoming +RDMA operations initiated by the remote side. Supported *op_mask* values may +vary by device; unsupported values will result in an ENOTSUP error. + +There is no explicit detach operation. A completion counter is implicitly +detached when the QP it is attached to is destroyed. A completion counter +cannot be destroyed while it is still attached to any QP; the QP must be +destroyed first. + +# ARGUMENTS + +*qp* +: The queue pair to attach the completion counter to. + +*comp_cntr* +: The completion counter to attach, previously created with + **ibv_create_comp_cntr**(). + +*attr* +: Attach attributes specifying which operation types update the counter. + +## ibv_comp_cntr_attach_attr + +```c +enum ibv_comp_cntr_attach_op { + IBV_COMP_CNTR_ATTACH_OP_SEND = 1 << 0, + IBV_COMP_CNTR_ATTACH_OP_RECV = 1 << 1, + IBV_COMP_CNTR_ATTACH_OP_RDMA_READ = 1 << 2, + IBV_COMP_CNTR_ATTACH_OP_REMOTE_RDMA_READ = 1 << 3, + IBV_COMP_CNTR_ATTACH_OP_RDMA_WRITE = 1 << 4, + IBV_COMP_CNTR_ATTACH_OP_REMOTE_RDMA_WRITE = 1 << 5, +}; + +struct ibv_comp_cntr_attach_attr { + uint32_t comp_mask; + uint32_t op_mask; +}; +``` + +*comp_mask* +: Bitmask specifying what fields in the structure are valid. + +*op_mask* +: Bitmask of **ibv_comp_cntr_attach_op** values specifying which + operation types should update the counter. + +# RETURN VALUE + +**ibv_qp_attach_comp_cntr**() returns 0 on success, or the value of errno on +failure (which indicates the failure reason). + +# ERRORS + +EINVAL +: Invalid argument(s) passed. + +ENOTSUP +: Requested operation is not supported on this device. + +EBUSY +: The *op_mask* overlaps with a completion counter already attached + to this QP. + +# SEE ALSO + +**ibv_create_comp_cntr**(3), **ibv_create_qp**(3) + +# AUTHORS + +Michael Margolin diff --git a/libibverbs/man/ibv_query_device_ex.3 b/libibverbs/man/ibv_query_device_ex.3 index c77e8b4f8..2d502e6ac 100644 --- a/libibverbs/man/ibv_query_device_ex.3 +++ b/libibverbs/man/ibv_query_device_ex.3 @@ -44,6 +44,7 @@ uint64_t max_dm_size; /* Max Device Memory size (in bytes) avail struct ibv_pci_atomic_caps atomic_caps; /* PCI atomic operations capabilities, use enum ibv_pci_atomic_op_size */ uint32_t xrc_odp_caps; /* Mask with enum ibv_odp_transport_cap_bits to know which operations are supported. */ uint32_t phys_port_cnt_ex /* Extended number of physical port count, allows exposing more than 255 ports device */ +uint32_t max_comp_cntr; /* Maximum number of completion counters supported (0 = unsupported) */ .in -8 }; diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h index 36d120eec..f9ebc49a5 100644 --- a/libibverbs/verbs.h +++ b/libibverbs/verbs.h @@ -361,6 +361,7 @@ struct ibv_device_attr_ex { struct ibv_pci_atomic_caps pci_atomic_caps; uint32_t xrc_odp_caps; uint32_t phys_port_cnt_ex; + uint32_t max_comp_cntr; }; enum ibv_mtu { @@ -485,6 +486,22 @@ struct ibv_async_event { enum ibv_event_type event_type; }; +enum ibv_memory_location_type { + IBV_MEMORY_LOCATION_VA, + IBV_MEMORY_LOCATION_DMABUF, +}; + +struct ibv_memory_location { + uint8_t *ptr; + struct { + uint64_t offset; + int32_t fd; + uint32_t reserved; + } dmabuf; + uint8_t type; /* Use ibv_memory_location_type */ + uint8_t reserved[7]; +}; + enum ibv_wc_status { IBV_WC_SUCCESS, IBV_WC_LOC_LEN_ERR, @@ -3018,6 +3035,69 @@ static inline int ibv_modify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *at return vctx->modify_cq(cq, attr); } + +struct ibv_comp_cntr { + struct ibv_context *context; + uint32_t handle; + uint64_t *comp_count; + uint64_t *err_count; + uint64_t comp_count_max_value; + uint64_t err_count_max_value; +}; + +enum { + IBV_COMP_CNTR_INIT_WITH_EXTERNAL_MEM, +}; + +struct ibv_comp_cntr_init_attr { + uint32_t comp_mask; /* Compatibility mask */ + uint32_t flags; + struct ibv_memory_location comp_cntr_ext_mem; + struct ibv_memory_location err_cntr_ext_mem; +}; + +/** + * ibv_create_comp_cntr - Create a completion counter + * @context: Device context to create the counter on. + * @cc_attr: Attributes for the completion counter. + */ +struct ibv_comp_cntr *ibv_create_comp_cntr(struct ibv_context *context, + struct ibv_comp_cntr_init_attr *cc_attr); + +/** + * ibv_destroy_comp_cntr - Destroy a completion counter + * @comp_cntr: The completion counter to destroy. + */ +int ibv_destroy_comp_cntr(struct ibv_comp_cntr *comp_cntr); + +/** + * ibv_set_comp_cntr - Set the completion count value + * @comp_cntr: The completion counter to update. + * @value: The value to set. + */ +int ibv_set_comp_cntr(struct ibv_comp_cntr *comp_cntr, uint64_t value); + +/** + * ibv_set_err_comp_cntr - Set the error count value + * @comp_cntr: The completion counter to update. + * @value: The value to set. + */ +int ibv_set_err_comp_cntr(struct ibv_comp_cntr *comp_cntr, uint64_t value); + +/** + * ibv_inc_comp_cntr - Increment the completion count + * @comp_cntr: The completion counter to increment. + * @amount: The amount to increment by. + */ +int ibv_inc_comp_cntr(struct ibv_comp_cntr *comp_cntr, uint64_t amount); + +/** + * ibv_inc_err_comp_cntr - Increment the error count + * @comp_cntr: The completion counter to increment. + * @amount: The amount to increment by. + */ +int ibv_inc_err_comp_cntr(struct ibv_comp_cntr *comp_cntr, uint64_t amount); + /** * ibv_create_srq - Creates a SRQ associated with the specified protection * domain. @@ -3293,6 +3373,29 @@ ibv_modify_qp_rate_limit(struct ibv_qp *qp, return vctx->modify_qp_rate_limit(qp, attr); } +enum ibv_comp_cntr_attach_op { + IBV_COMP_CNTR_ATTACH_OP_SEND = 1 << 0, + IBV_COMP_CNTR_ATTACH_OP_RECV = 1 << 1, + IBV_COMP_CNTR_ATTACH_OP_RDMA_READ = 1 << 2, + IBV_COMP_CNTR_ATTACH_OP_REMOTE_RDMA_READ = 1 << 3, + IBV_COMP_CNTR_ATTACH_OP_RDMA_WRITE = 1 << 4, + IBV_COMP_CNTR_ATTACH_OP_REMOTE_RDMA_WRITE = 1 << 5, +}; + +struct ibv_comp_cntr_attach_attr { + uint32_t comp_mask; /* Compatibility mask */ + uint32_t op_mask; /* Use ibv_comp_cntr_attach_op */ +}; + +/** + * ibv_qp_attach_comp_cntr - Attach a completion counter to a QP + * @qp: The queue pair to attach the counter to. + * @comp_cntr: The completion counter to attach. + * @attr: Attach attributes. + */ +int ibv_qp_attach_comp_cntr(struct ibv_qp *qp, struct ibv_comp_cntr *comp_cntr, + struct ibv_comp_cntr_attach_attr *attr); + /** * ibv_query_qp_data_in_order - Checks whether the data is guaranteed to be * written in-order.