diff --git a/debian/ibverbs-providers.symbols b/debian/ibverbs-providers.symbols
index e280809bd..7dfe785f7 100644
--- a/debian/ibverbs-providers.symbols
+++ b/debian/ibverbs-providers.symbols
@@ -173,6 +173,7 @@ libefa.so.1 ibverbs-providers #MINVER#
  EFA_1.2@EFA_1.2 43
  EFA_1.3@EFA_1.3 50
  EFA_1.4@EFA_1.4 59
+ EFA_1.5@EFA_1.5 63
  efadv_create_driver_qp@EFA_1.0 24
  efadv_create_qp_ex@EFA_1.1 26
  efadv_query_device@EFA_1.1 26
@@ -182,6 +183,8 @@ libefa.so.1 ibverbs-providers #MINVER#
  efadv_query_mr@EFA_1.3 50
  efadv_query_qp_wqs@EFA_1.4 59
  efadv_query_cq@EFA_1.4 59
+ efadv_get_max_sq_depth@EFA_1.5 63
+ efadv_get_max_rq_depth@EFA_1.5 63
 libhns.so.1 ibverbs-providers #MINVER#
 * Build-Depends-Package: libibverbs-dev
  HNS_1.0@HNS_1.0 51
diff --git a/kernel-headers/rdma/efa-abi.h b/kernel-headers/rdma/efa-abi.h
index 98b71b997..13225b038 100644
--- a/kernel-headers/rdma/efa-abi.h
+++ b/kernel-headers/rdma/efa-abi.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
 /*
- * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2026 Amazon.com, Inc. or its affiliates. All rights reserved.
  */
 
 #ifndef EFA_ABI_USER_H
@@ -44,7 +44,8 @@ struct efa_ibv_alloc_ucontext_resp {
 	__u32 max_llq_size; /* bytes */
 	__u16 max_tx_batch; /* units of 64 bytes */
 	__u16 min_sq_wr;
-	__u8 reserved_a0[4];
+	__u16 inline_buf_size_ex;
+	__u8 reserved_b0[2];
 };
 
 struct efa_ibv_alloc_pd_resp {
diff --git a/providers/efa/CMakeLists.txt b/providers/efa/CMakeLists.txt
index c4ce3c0fe..ea082f0cf 100644
--- a/providers/efa/CMakeLists.txt
+++ b/providers/efa/CMakeLists.txt
@@ -3,7 +3,7 @@ if (ENABLE_LTTNG AND LTTNGUST_FOUND)
 endif()
 
 rdma_shared_provider(efa libefa.map
-	1 1.4.${PACKAGE_VERSION}
+	1 1.5.${PACKAGE_VERSION}
 	${TRACE_FILE}
 	efa.c
 	verbs.c
diff --git a/providers/efa/efa.c b/providers/efa/efa.c
index a0a95beb8..94a4126ba 100644
--- a/providers/efa/efa.c
+++ b/providers/efa/efa.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
 /*
- * Copyright 2019-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2019-2026 Amazon.com, Inc. or its affiliates. All rights reserved.
  */
 
 #include <stdio.h>
@@ -83,6 +83,10 @@ static struct verbs_context *efa_alloc_context(struct ibv_device *vdev,
 	ctx->cqe_size = sizeof(struct efa_io_rx_cdesc);
 	ctx->ex_cqe_size = sizeof(struct efa_io_rx_cdesc_ex);
 	ctx->inline_buf_size = resp.inline_buf_size;
+	ctx->inline_buf_size_ex = resp.inline_buf_size_ex;
+	if (ctx->inline_buf_size_ex == 0)
+		ctx->inline_buf_size_ex = ctx->inline_buf_size;
+
 	ctx->max_llq_size = resp.max_llq_size;
 	ctx->max_tx_batch = resp.max_tx_batch;
 	ctx->min_sq_wr = resp.min_sq_wr;
diff --git a/providers/efa/efa.h b/providers/efa/efa.h
index 25b5e8f99..4abdbd6f8 100644
--- a/providers/efa/efa.h
+++ b/providers/efa/efa.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
 /*
- * Copyright 2019-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2019-2026 Amazon.com, Inc. or its affiliates. All rights reserved.
  */
 
 #ifndef __EFA_H__
@@ -31,6 +31,7 @@ struct efa_context {
 	uint32_t cmds_supp_udata_mask;
 	uint16_t sub_cqs_per_cq;
 	uint16_t inline_buf_size;
+	uint16_t inline_buf_size_ex;
 	uint32_t max_llq_size;
 	uint32_t device_caps;
 	uint32_t max_sq_wr;
@@ -133,6 +134,21 @@ struct efa_rq {
 	size_t buf_size;
 };
 
+struct efa_tx_wqe_ctx {
+	/* wqe buffer */
+	void *buff;
+	/* wqe meta descriptor */
+	struct efa_io_tx_meta_desc *md;
+	/* wqe local memory / SGL */
+	struct efa_io_tx_buf_desc *local_mem;
+	/* wqe remote memory - RDMA only */
+	struct efa_io_remote_mem_addr *remote_mem;
+	/* wqe inline data buffer */
+	uint8_t *inline_data;
+	/* max sge allowed for this wqe */
+	uint8_t max_sge;
+};
+
 struct efa_sq {
 	struct efa_wq wq;
 	uint8_t *desc;
@@ -141,6 +157,8 @@ struct efa_sq {
 	size_t max_inline_data;
 	size_t max_wr_rdma_sge;
 	uint16_t max_batch_wr;
+	uint16_t wqe_size;
+	bool inline_write_enabled;
 
 	/* Buffer for pending WR entries in the current session */
 	uint8_t *local_queue;
@@ -149,7 +167,7 @@ struct efa_sq {
 	/* Phase before current session */
 	int phase_rb;
 	/* Current wqe being built */
-	struct efa_io_tx_wqe *curr_tx_wqe;
+	struct efa_tx_wqe_ctx curr_tx_wqe;
 };
 
 struct efa_qp {
diff --git a/providers/efa/efa_io_defs.h b/providers/efa/efa_io_defs.h
index e4f6f78ac..fccb217b7 100644
--- a/providers/efa/efa_io_defs.h
+++ b/providers/efa/efa_io_defs.h
@@ -9,6 +9,7 @@
 #define EFA_IO_TX_DESC_NUM_BUFS              2
 #define EFA_IO_TX_DESC_NUM_RDMA_BUFS         1
 #define EFA_IO_TX_DESC_INLINE_MAX_SIZE       32
+#define EFA_IO_TX_DESC_INLINE_MAX_SIZE_128   80
 #define EFA_IO_TX_DESC_IMM_DATA_SIZE         4
 
 enum efa_io_queue_type {
@@ -164,9 +165,22 @@ struct efa_io_rdma_req {
 	struct efa_io_tx_buf_desc local_mem[1];
 };
 
+struct efa_io_rdma_req_128 {
+	/* Remote memory address */
+	struct efa_io_remote_mem_addr remote_mem;
+
+	union {
+		/* Local memory address */
+		struct efa_io_tx_buf_desc local_mem[1];
+
+		/* inline data for RDMA */
+		uint8_t inline_data[80];
+	};
+};
+
 /*
- * Tx WQE, composed of tx meta descriptors followed by either tx buffer
- * descriptors or inline data
+ * 64-byte Tx WQE, composed of tx meta descriptors followed by either tx
+ * buffer descriptors or inline data
  */
 struct efa_io_tx_wqe {
 	/* TX meta */
@@ -183,6 +197,25 @@ struct efa_io_tx_wqe {
 	} data;
 };
 
+/*
+ * 128-byte Tx WQE, composed of tx meta descriptors followed by either tx
+ * buffer descriptors or inline data
+ */
+struct efa_io_tx_wqe_128 {
+	/* TX meta */
+	struct efa_io_tx_meta_desc meta;
+
+	union {
+		/* Send buffer descriptors */
+		struct efa_io_tx_buf_desc sgl[2];
+
+		uint8_t inline_data[80];
+
+		/* RDMA local and remote memory addresses */
+		struct efa_io_rdma_req_128 rdma_req;
+	} data;
+};
+
 /*
  * Rx buffer descriptor; RX WQE is composed of one or more RX buffer
  * descriptors.
diff --git a/providers/efa/efadv.h b/providers/efa/efadv.h
index 7c034f881..bb2f9282b 100644
--- a/providers/efa/efadv.h
+++ b/providers/efa/efadv.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
 /*
- * Copyright 2019-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2019-2026 Amazon.com, Inc. or its affiliates. All rights reserved.
  */
 
 #ifndef __EFADV_H__
@@ -32,7 +32,7 @@ struct efadv_device_attr {
 	uint16_t max_sq_sge;
 	uint16_t max_rq_sge;
 	uint16_t inline_buf_size;
-	uint8_t reserved[2];
+	uint16_t inline_buf_size_ex;
 	uint32_t device_caps;
 	uint32_t max_rdma_size;
 };
@@ -47,6 +47,29 @@ struct efadv_ah_attr {
 	uint8_t reserved[6];
 };
 
+enum {
+	EFADV_SQ_DEPTH_ATTR_INLINE_WRITE = 1 << 0,
+};
+
+struct efadv_sq_depth_attr {
+	uint64_t comp_mask;
+	uint32_t flags;
+	uint32_t max_send_sge;
+	uint32_t max_rdma_sge;
+	uint32_t max_inline_data;
+};
+
+int efadv_get_max_sq_depth(struct ibv_context *ibvctx, struct efadv_sq_depth_attr *attr,
+			   uint32_t inlen);
+
+struct efadv_rq_depth_attr {
+	uint64_t comp_mask;
+	uint32_t max_recv_sge;
+};
+
+int efadv_get_max_rq_depth(struct ibv_context *ibvctx, struct efadv_rq_depth_attr *attr,
+			   uint32_t inlen);
+
 int efadv_query_ah(struct ibv_ah *ibvah, struct efadv_ah_attr *attr,
 		   uint32_t inlen);
 
@@ -61,6 +84,7 @@ struct ibv_qp *efadv_create_driver_qp(struct ibv_pd *ibvpd,
 
 enum {
 	EFADV_QP_FLAGS_UNSOLICITED_WRITE_RECV = 1 << 0,
+	EFADV_QP_FLAGS_INLINE_WRITE = 1 << 1,
 };
 
 struct efadv_qp_init_attr {
diff --git a/providers/efa/libefa.map b/providers/efa/libefa.map
index 13fac76a3..03a6d8e23 100644
--- a/providers/efa/libefa.map
+++ b/providers/efa/libefa.map
@@ -29,3 +29,9 @@ EFA_1.4 {
 		efadv_query_qp_wqs;
 		efadv_query_cq;
 } EFA_1.3;
+
+EFA_1.5 {
+	global:
+		efadv_get_max_sq_depth;
+		efadv_get_max_rq_depth;
+} EFA_1.4;
diff --git a/providers/efa/man/efadv_create_qp_ex.3.md b/providers/efa/man/efadv_create_qp_ex.3.md
index aaeedfdee..8617fe363 100644
--- a/providers/efa/man/efadv_create_qp_ex.3.md
+++ b/providers/efa/man/efadv_create_qp_ex.3.md
@@ -68,6 +68,9 @@ struct efadv_qp_init_attr {
 	EFADV_QP_FLAGS_UNSOLICITED_WRITE_RECV:
 		Receive WRs will not be consumed for RDMA write with imm.
 
+	EFADV_QP_FLAGS_INLINE_WRITE:
+		QP supports RDMA write with inline operations.
+
 *sl*
 :	Service Level - 0 value implies default level.
 
diff --git a/providers/efa/man/efadv_get_rq_max_depth.3.md b/providers/efa/man/efadv_get_rq_max_depth.3.md
new file mode 100644
index 000000000..ffaaf2c0a
--- /dev/null
+++ b/providers/efa/man/efadv_get_rq_max_depth.3.md
@@ -0,0 +1,57 @@
+---
+layout: page
+title: EFADV_GET_MAX_RQ_DEPTH
+section: 3
+tagline: Verbs
+date: 2026-02-17
+header: "EFA Direct Verbs Manual"
+footer: efa
+---
+
+# NAME
+
+efadv_get_max_rq_depth - Get EFA receive queue max depth based on receive queue attributes
+
+# SYNOPSIS
+
+```c
+#include <infiniband/efadv.h>
+
+int efadv_get_max_rq_depth(struct ibv_context *ibvctx, struct efadv_rq_depth_attr *attr,
+			   uint32_t inlen);
+```
+
+# DESCRIPTION
+
+**efadv_get_max_rq_depth()** get device-specific receive queue max depth based on RQ attributes.
+
+Compatibility is handled using the comp_mask and inlen fields.
+
+```c
+struct efadv_rq_depth_attr {
+	uint64_t comp_mask;
+	uint32_t max_recv_sge;
+};
+```
+
+*inlen*
+:	In: Size of struct efadv_rq_depth_attr.
+
+*comp_mask*
+:	Compatibility mask.
+
+*max_recv_sge*
+:	Requested max number of scatter/gather (s/g) elements in a WR in the receive queue.
+
+# RETURN VALUE
+
+**efadv_get_max_rq_depth()** returns max receive queue depth on success, or the negative value of errno on failure
+(which indicates the failure reason).
+
+# SEE ALSO
+
+**efadv**(7)
+
+# AUTHORS
+
+Yonatan Nachum <ynachum@amazon.com>
diff --git a/providers/efa/man/efadv_get_sq_max_depth.3.md b/providers/efa/man/efadv_get_sq_max_depth.3.md
new file mode 100644
index 000000000..fd5c2d07d
--- /dev/null
+++ b/providers/efa/man/efadv_get_sq_max_depth.3.md
@@ -0,0 +1,72 @@
+---
+layout: page
+title: EFADV_GET_MAX_SQ_DEPTH
+section: 3
+tagline: Verbs
+date: 2026-02-17
+header: "EFA Direct Verbs Manual"
+footer: efa
+---
+
+# NAME
+
+efadv_get_max_sq_depth - Get EFA send queue max depth based on send queue attributes
+
+# SYNOPSIS
+
+```c
+#include <infiniband/efadv.h>
+
+int efadv_get_max_sq_depth(struct ibv_context *ibvctx, struct efadv_sq_depth_attr *attr,
+			   uint32_t inlen);
+```
+
+# DESCRIPTION
+
+**efadv_get_max_sq_depth()** get device-specific send queue max depth based on SQ attributes.
+
+Compatibility is handled using the comp_mask and inlen fields.
+
+```c
+struct efadv_sq_depth_attr {
+	uint64_t comp_mask;
+	uint32_t flags;
+	uint32_t max_send_sge;
+	uint32_t max_rdma_sge;
+	uint32_t max_inline_data;
+};
+```
+
+*inlen*
+:	In: Size of struct efadv_sq_depth_attr.
+
+*comp_mask*
+:	Compatibility mask.
+
+*flags*
+:       A bitwise OR of the values described below.
+
+	EFADV_SQ_DEPTH_ATTR_INLINE_WRITE:
+		Inline RDMA write operation support is required.
+
+*max_send_sge*
+:	Requested max number of scatter/gather (s/g) elements in a send WR in the send queue.
+
+*max_rdma_sge*
+:	Requested max number of scatter/gather (s/g) elements in a RDMA WR in the send queue.
+
+*max_inline_data*
+:	Requested max number of data (bytes) that can be posted inline to the send queue.
+
+# RETURN VALUE
+
+**efadv_get_max_sq_depth()** returns max send queue depth on success, or the negative value of errno on failure
+(which indicates the failure reason).
+
+# SEE ALSO
+
+**efadv**(7)
+
+# AUTHORS
+
+Yonatan Nachum <ynachum@amazon.com>
diff --git a/providers/efa/man/efadv_query_device.3.md b/providers/efa/man/efadv_query_device.3.md
index c41bc3d9d..f46d362fc 100644
--- a/providers/efa/man/efadv_query_device.3.md
+++ b/providers/efa/man/efadv_query_device.3.md
@@ -36,7 +36,7 @@ struct efadv_device_attr {
 	uint16_t max_sq_sge;
 	uint16_t max_rq_sge;
 	uint16_t inline_buf_size;
-	uint8_t reserved[2];
+	uint16_t inline_buf_size_ex;
 	uint32_t device_caps;
 	uint32_t max_rdma_size;
 };
@@ -61,6 +61,9 @@ struct efadv_device_attr {
 :	Maximum Receive Queue (RQ) Scatter Gather Elements (SGEs).
 
 *inline_buf_size*
+:	Maximum inline buffer size (deprecated by inline_buf_size_ex).
+
+*inline_buf_size_ex*
 :	Maximum inline buffer size.
 
 *device_caps*
diff --git a/providers/efa/verbs.c b/providers/efa/verbs.c
index c2cc31fdd..b83b9d2a8 100644
--- a/providers/efa/verbs.c
+++ b/providers/efa/verbs.c
@@ -27,6 +27,9 @@
 #define EFA_DEV_CAP(ctx, cap) \
 	((ctx)->device_caps & EFA_QUERY_DEVICE_CAPS_##cap)
 
+#define EFA_IO_TX_DESC_SIZE_64	(sizeof(struct efa_io_tx_wqe))
+#define EFA_IO_TX_DESC_SIZE_128 (sizeof(struct efa_io_tx_wqe_128))
+
 static bool is_buf_cleared(void *buf, size_t len)
 {
 	int i;
@@ -90,7 +93,7 @@ int efa_query_device_ex(struct ibv_context *context,
 	}
 
 	a->max_qp_wr = min_t(int, a->max_qp_wr,
-			     ctx->max_llq_size / sizeof(struct efa_io_tx_wqe));
+			     ctx->max_llq_size / EFA_IO_TX_DESC_SIZE_64);
 	memcpy(fw_ver, &resp.ibv_resp.base.fw_ver,
 	       sizeof(resp.ibv_resp.base.fw_ver));
 	snprintf(a->fw_ver, sizeof(a->fw_ver), "%u.%u.%u.%u",
@@ -155,7 +158,7 @@ int efadv_query_device(struct ibv_context *ibvctx,
 		return EOPNOTSUPP;
 	}
 
-	if (!vext_field_avail(typeof(*attr), inline_buf_size, inlen)) {
+	if (!vext_field_avail(typeof(*attr), inline_buf_size_ex, inlen)) {
 		verbs_err(verbs_get_ctx(ibvctx), "Compatibility issues\n");
 		return EINVAL;
 	}
@@ -166,6 +169,7 @@ int efadv_query_device(struct ibv_context *ibvctx,
 	attr->max_sq_sge = ctx->max_sq_sge;
 	attr->max_rq_sge = ctx->max_rq_sge;
 	attr->inline_buf_size = ctx->inline_buf_size;
+	attr->inline_buf_size_ex = ctx->inline_buf_size_ex;
 
 	if (vext_field_avail(typeof(*attr), device_caps, inlen)) {
 		if (EFA_DEV_CAP(ctx, RNR_RETRY))
@@ -1488,7 +1492,7 @@ static int efa_sq_initialize(struct efa_qp *qp,
 	}
 
 	sq->desc_offset = resp->llq_desc_offset;
-	desc_ring_size = sq->wq.wqe_cnt * sizeof(struct efa_io_tx_wqe);
+	desc_ring_size = sq->wq.wqe_cnt * sq->wqe_size;
 	sq->desc_ring_mmap_size = align(desc_ring_size + sq->desc_offset,
 					qp->page_size);
 	sq->max_inline_data = attr->cap.max_inline_data;
@@ -1512,7 +1516,7 @@ static int efa_sq_initialize(struct efa_qp *qp,
 	sq->max_wr_rdma_sge = min_t(uint16_t, ctx->max_wr_rdma_sge,
 				    EFA_IO_TX_DESC_NUM_RDMA_BUFS);
 	sq->max_batch_wr = ctx->max_tx_batch ?
-		(ctx->max_tx_batch * 64) / sizeof(struct efa_io_tx_wqe) :
+		(ctx->max_tx_batch * 64) / sq->wqe_size :
 		UINT16_MAX;
 	if (ctx->min_sq_wr) {
 		/* The device can't accept a doorbell for the whole SQ at once,
@@ -1603,19 +1607,106 @@ static void efa_qp_init_indices(struct efa_qp *qp)
 	qp->rq.wq.wrid_idx_pool_next = 0;
 }
 
+static int efa_calc_sq_wqe_size(uint32_t max_inline_data, bool inline_write_enabled)
+{
+	if (max_inline_data > EFA_IO_TX_DESC_INLINE_MAX_SIZE || inline_write_enabled)
+		return EFA_IO_TX_DESC_SIZE_128;
+
+	return EFA_IO_TX_DESC_SIZE_64;
+}
+
+static int efa_calc_sq_max_depth(struct efa_context *ctx, uint32_t max_inline_data,
+				 bool write_with_inline)
+{
+	int sq_wqe_size = efa_calc_sq_wqe_size(max_inline_data, write_with_inline);
+
+	return rounddown_pow_of_two(ctx->max_llq_size / sq_wqe_size);
+}
+
+int efadv_get_max_sq_depth(struct ibv_context *ibvctx, struct efadv_sq_depth_attr *attr,
+			   uint32_t inlen)
+{
+	bool write_with_inline = !!(attr->flags & EFADV_SQ_DEPTH_ATTR_INLINE_WRITE);
+	struct efa_context *ctx = to_efa_context(ibvctx);
+
+	if (!is_efa_dev(ibvctx->device)) {
+		verbs_err(verbs_get_ctx(ibvctx), "Not an EFA device\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (!vext_field_avail(typeof(*attr), max_inline_data, inlen) || attr->comp_mask) {
+		verbs_err(verbs_get_ctx(ibvctx), "Compatibility issues\n");
+		return -EINVAL;
+	}
+
+	if (attr->max_send_sge > ctx->max_sq_sge) {
+		verbs_err(verbs_get_ctx(ibvctx), "Max send SGE %u > %u\n", attr->max_send_sge,
+			  ctx->max_sq_sge);
+		return -EINVAL;
+	}
+
+	if (attr->max_rdma_sge > ctx->max_wr_rdma_sge) {
+		verbs_err(verbs_get_ctx(ibvctx), "Max RDMA SGE %u > %u\n", attr->max_rdma_sge,
+			  ctx->max_wr_rdma_sge);
+		return -EINVAL;
+	}
+
+	if (attr->max_inline_data > ctx->inline_buf_size_ex) {
+		verbs_err(verbs_get_ctx(ibvctx), "Max inline data %u > %u\n", attr->max_inline_data,
+			  ctx->inline_buf_size_ex);
+		return -EINVAL;
+	}
+
+	return efa_calc_sq_max_depth(ctx, attr->max_inline_data, write_with_inline);
+}
+
+static int efa_calc_rq_max_depth(struct efa_context *ctx, uint32_t max_recv_sge)
+{
+	return rounddown_pow_of_two(ctx->max_rq_wr / max_recv_sge);
+}
+
+int efadv_get_max_rq_depth(struct ibv_context *ibvctx, struct efadv_rq_depth_attr *attr,
+			   uint32_t inlen)
+{
+	struct efa_context *ctx = to_efa_context(ibvctx);
+
+	if (!is_efa_dev(ibvctx->device)) {
+		verbs_err(verbs_get_ctx(ibvctx), "Not an EFA device\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (!vext_field_avail(typeof(*attr), max_recv_sge, inlen) || attr->comp_mask) {
+		verbs_err(verbs_get_ctx(ibvctx), "Compatibility issues\n");
+		return -EINVAL;
+	}
+
+	if (attr->max_recv_sge > ctx->max_rq_sge) {
+		verbs_err(verbs_get_ctx(ibvctx), "Max receive SGE %u > %u\n", attr->max_recv_sge,
+			  ctx->max_rq_sge);
+		return -EINVAL;
+	}
+
+	return efa_calc_rq_max_depth(ctx, attr->max_recv_sge);
+}
+
 static void efa_setup_qp(struct efa_context *ctx,
 			 struct efa_qp *qp,
-			 struct ibv_qp_cap *cap,
+			 struct ibv_qp_init_attr_ex *attr,
+			 struct efadv_qp_init_attr *efa_attr,
 			 size_t page_size)
 {
+	bool inline_write_enabled = !!(efa_attr->flags & EFADV_QP_FLAGS_INLINE_WRITE);
+	struct ibv_qp_cap *cap = &attr->cap;
 	uint16_t rq_desc_cnt;
 
 	efa_qp_init_indices(qp);
 
+	qp->sq.wqe_size = efa_calc_sq_wqe_size(cap->max_inline_data, inline_write_enabled);
 	qp->sq.wq.wqe_cnt = roundup_pow_of_two(max_t(uint32_t, cap->max_send_wr,
 						     ctx->min_sq_wr));
 	qp->sq.wq.max_sge = cap->max_send_sge;
 	qp->sq.wq.desc_mask = qp->sq.wq.wqe_cnt - 1;
+	qp->sq.inline_write_enabled = inline_write_enabled;
 
 	qp->rq.wq.max_sge = cap->max_recv_sge;
 	rq_desc_cnt = roundup_pow_of_two(cap->max_recv_sge * cap->max_recv_wr);
@@ -1652,7 +1743,8 @@ static void efa_unlock_cqs(struct ibv_qp *ibvqp)
 }
 
 static void efa_qp_fill_wr_pfns(struct ibv_qp_ex *ibvqpx,
-				struct ibv_qp_init_attr_ex *attr_ex);
+				struct ibv_qp_init_attr_ex *attr_ex,
+				uint16_t wqe_size);
 
 static int efa_check_qp_attr(struct efa_context *ctx,
 			     struct ibv_qp_init_attr_ex *attr,
@@ -1667,9 +1759,11 @@ static int efa_check_qp_attr(struct efa_context *ctx,
 
 	if (EFA_DEV_CAP(ctx, RDMA_READ))
 		supp_srd_send_ops_mask |= IBV_QP_EX_WITH_RDMA_READ;
-	if (EFA_DEV_CAP(ctx, RDMA_WRITE))
+	if (EFA_DEV_CAP(ctx, RDMA_WRITE)) {
+		supp_efa_flags |= EFADV_QP_FLAGS_INLINE_WRITE;
 		supp_srd_send_ops_mask |= IBV_QP_EX_WITH_RDMA_WRITE |
 					  IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM;
+	}
 	if (EFA_DEV_CAP(ctx, UNSOLICITED_WRITE_RECV))
 		supp_efa_flags |= EFADV_QP_FLAGS_UNSOLICITED_WRITE_RECV;
 
@@ -1738,8 +1832,12 @@ static int efa_check_qp_attr(struct efa_context *ctx,
 }
 
 static int efa_check_qp_limits(struct efa_context *ctx,
-			       struct ibv_qp_init_attr_ex *attr)
+			       struct ibv_qp_init_attr_ex *attr,
+			       struct efadv_qp_init_attr *efa_attr)
 {
+	bool inline_write_enabled = !!(efa_attr->flags & EFADV_QP_FLAGS_INLINE_WRITE);
+	int sq_max_depth, rq_max_depth;
+
 	if (attr->cap.max_send_sge > ctx->max_sq_sge) {
 		verbs_err(&ctx->ibvctx,
 			  "Max send SGE %u > %u\n", attr->cap.max_send_sge,
@@ -1754,17 +1852,25 @@ static int efa_check_qp_limits(struct efa_context *ctx,
 		return EINVAL;
 	}
 
-	if (attr->cap.max_send_wr > ctx->max_sq_wr) {
+	sq_max_depth = efa_calc_sq_max_depth(ctx, attr->cap.max_inline_data, inline_write_enabled);
+	if (attr->cap.max_send_wr > sq_max_depth) {
 		verbs_err(&ctx->ibvctx,
-			  "Max send WR %u > %u\n", attr->cap.max_send_wr,
-			  ctx->max_sq_wr);
+			  "Max Send WR %u > %u\n", attr->cap.max_send_wr, sq_max_depth);
 		return EINVAL;
 	}
 
-	if (attr->cap.max_recv_wr > ctx->max_rq_wr) {
+	rq_max_depth = efa_calc_rq_max_depth(ctx, attr->cap.max_recv_sge);
+	if (attr->cap.max_recv_wr > rq_max_depth) {
 		verbs_err(&ctx->ibvctx,
-			  "Max receive WR %u > %u\n", attr->cap.max_recv_wr,
-			  ctx->max_rq_wr);
+			  "Requested max SGE %u, max receive WR %u > %u\n", attr->cap.max_recv_sge,
+			  attr->cap.max_recv_wr, rq_max_depth);
+		return EINVAL;
+	}
+
+	if (attr->cap.max_inline_data > ctx->inline_buf_size_ex) {
+		verbs_err(&ctx->ibvctx,
+			  "Max inline data %u > %u\n", attr->cap.max_inline_data,
+			  ctx->inline_buf_size_ex);
 		return EINVAL;
 	}
 
@@ -1789,7 +1895,7 @@ static struct ibv_qp *create_qp(struct ibv_context *ibvctx,
 	if (err)
 		goto err_out;
 
-	err = efa_check_qp_limits(ctx, attr);
+	err = efa_check_qp_limits(ctx, attr, efa_attr);
 	if (err)
 		goto err_out;
 
@@ -1799,15 +1905,14 @@ static struct ibv_qp *create_qp(struct ibv_context *ibvctx,
 		goto err_out;
 	}
 
-	efa_setup_qp(ctx, qp, &attr->cap, dev->pg_sz);
+	efa_setup_qp(ctx, qp, attr, efa_attr, dev->pg_sz);
 
 	attr->cap.max_send_wr = qp->sq.wq.wqe_cnt;
 	attr->cap.max_recv_wr = qp->rq.wq.wqe_cnt;
 
 	req.rq_ring_size = (qp->rq.wq.desc_mask + 1) *
 		sizeof(struct efa_io_rx_desc);
-	req.sq_ring_size = (attr->cap.max_send_wr) *
-		sizeof(struct efa_io_tx_wqe);
+	req.sq_ring_size = attr->cap.max_send_wr * qp->sq.wqe_size;
 	if (attr->qp_type == IBV_QPT_DRIVER)
 		req.driver_qp_type = efa_attr->driver_qp_type;
 	if (efa_attr->flags & EFADV_QP_FLAGS_UNSOLICITED_WRITE_RECV)
@@ -1839,7 +1944,7 @@ static struct ibv_qp *create_qp(struct ibv_context *ibvctx,
 	pthread_spin_unlock(&ctx->qp_table_lock);
 
 	if (attr->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS) {
-		efa_qp_fill_wr_pfns(&qp->verbs_qp.qp_ex, attr);
+		efa_qp_fill_wr_pfns(&qp->verbs_qp.qp_ex, attr, qp->sq.wqe_size);
 		qp->verbs_qp.comp_mask |= VERBS_QP_EX;
 	}
 
@@ -2014,7 +2119,7 @@ int efadv_query_qp_wqs(struct ibv_qp *ibvqp, struct efadv_wq_attr *sq_attr,
 
 	sq_attr->comp_mask = 0;
 	sq_attr->buffer = qp->sq.desc;
-	sq_attr->entry_size = sizeof(struct efa_io_tx_wqe);
+	sq_attr->entry_size = qp->sq.wqe_size;
 	sq_attr->num_entries = qp->sq.wq.wqe_cnt;
 	sq_attr->doorbell = qp->sq.wq.db;
 	sq_attr->max_batch = qp->sq.max_batch_wr;
@@ -2083,20 +2188,23 @@ static void efa_set_tx_buf(struct efa_io_tx_buf_desc *tx_buf,
 }
 
 static void efa_post_send_sgl(struct efa_io_tx_buf_desc *tx_bufs,
+			      struct efa_io_tx_meta_desc *md,
 			      const struct ibv_sge *sg_list,
 			      int num_sge)
 {
 	const struct ibv_sge *sge;
 	size_t i;
 
+	md->length = num_sge;
+
 	for (i = 0; i < num_sge; i++) {
 		sge = &sg_list[i];
 		efa_set_tx_buf(&tx_bufs[i], sge->addr, sge->lkey, sge->length);
 	}
 }
 
-static void efa_post_send_inline_data(const struct ibv_send_wr *wr,
-				      struct efa_io_tx_wqe *tx_wqe)
+static void efa_post_send_inline_data(const struct ibv_send_wr *wr, struct efa_io_tx_meta_desc *md,
+				      uint8_t *inline_data)
 {
 	const struct ibv_sge *sgl = wr->sg_list;
 	uint32_t total_length = 0;
@@ -2106,13 +2214,13 @@ static void efa_post_send_inline_data(const struct ibv_send_wr *wr,
 	for (i = 0; i < wr->num_sge; i++) {
 		length = sgl[i].length;
 
-		memcpy(tx_wqe->data.inline_data + total_length,
+		memcpy(inline_data + total_length,
 		       (void *)(uintptr_t)sgl[i].addr, length);
 		total_length += length;
 	}
 
-	EFA_SET(&tx_wqe->meta.ctrl1, EFA_IO_TX_META_DESC_INLINE_MSG, 1);
-	tx_wqe->meta.length = total_length;
+	EFA_SET(&md->ctrl1, EFA_IO_TX_META_DESC_INLINE_MSG, 1);
+	md->length = total_length;
 }
 
 static size_t efa_sge_total_bytes(const struct ibv_sge *sg_list, int num_sge)
@@ -2173,25 +2281,26 @@ static void efa_set_common_ctrl_flags(struct efa_io_tx_meta_desc *desc,
 }
 
 #if defined(LTTNG_ENABLED) || defined(USDT_ENABLED)
-static uint32_t efa_get_wqe_length(struct efa_io_tx_wqe *tx_wqe)
+static uint32_t efa_wqe_get_data_length(struct efa_sq *sq)
 {
+	struct efa_io_tx_meta_desc *md = sq->curr_tx_wqe.md;
 	enum efa_io_send_op_type op_type;
 	uint32_t length = 0;
 	size_t i;
 
-	op_type = EFA_GET(&tx_wqe->meta.ctrl1, EFA_IO_TX_META_DESC_OP_TYPE);
+	op_type = EFA_GET(&md->ctrl1, EFA_IO_TX_META_DESC_OP_TYPE);
 	switch (op_type) {
 	case EFA_IO_SEND:
-		if (EFA_GET(&tx_wqe->meta.ctrl1, EFA_IO_TX_META_DESC_INLINE_MSG))
-			return tx_wqe->meta.length;
+		if (EFA_GET(&md->ctrl1, EFA_IO_TX_META_DESC_INLINE_MSG))
+			return md->length;
 
-		for (i = 0; i < tx_wqe->meta.length; i++)
-			length += tx_wqe->data.sgl[i].length;
+		for (i = 0; i < md->length; i++)
+			length += sq->curr_tx_wqe.local_mem[i].length;
 
 		return length;
 	case EFA_IO_RDMA_READ:
 	case EFA_IO_RDMA_WRITE:
-		return tx_wqe->data.rdma_req.remote_mem.length;
+		return sq->curr_tx_wqe.remote_mem->length;
 	}
 
 	return 0;
@@ -2280,16 +2389,38 @@ static int efa_post_send_validate_wr(struct efa_qp *qp,
 int efa_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
 		  struct ibv_send_wr **bad)
 {
-	struct efa_io_tx_meta_desc *meta_desc;
+	uint8_t wqe_buf[EFA_IO_TX_DESC_SIZE_128];
 	struct efa_qp *qp = to_efa_qp(ibvqp);
-	struct efa_io_tx_wqe tx_wqe;
+	struct efa_io_tx_wqe_128 *tx_wqe_128;
+	struct efa_io_tx_meta_desc *md;
+	struct efa_io_tx_buf_desc *sgl;
+	struct efa_io_tx_wqe *tx_wqe;
 	struct efa_sq *sq = &qp->sq;
 	struct efa_wq *wq = &sq->wq;
 	uint32_t sq_desc_offset;
 	uint32_t curbatch = 0;
+	uint8_t *inline_data;
 	struct efa_ah *ah;
 	int err = 0;
 
+	switch (sq->wqe_size) {
+	case EFA_IO_TX_DESC_SIZE_64:
+		tx_wqe = (struct efa_io_tx_wqe *)wqe_buf;
+		md = &tx_wqe->meta;
+		sgl = tx_wqe->data.sgl;
+		inline_data = tx_wqe->data.inline_data;
+		break;
+	case EFA_IO_TX_DESC_SIZE_128:
+		tx_wqe_128 = (struct efa_io_tx_wqe_128 *)wqe_buf;
+		md = &tx_wqe_128->meta;
+		sgl = tx_wqe_128->data.sgl;
+		inline_data = tx_wqe_128->data.inline_data;
+		break;
+
+	default:
+		return EINVAL;
+	}
+
 	if (wq->need_lock)
 		mmio_wc_spinlock(&wq->wqlock);
 	else
@@ -2302,37 +2433,30 @@ int efa_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
 			goto ring_db;
 		}
 
-		memset(&tx_wqe, 0, sizeof(tx_wqe));
-		meta_desc = &tx_wqe.meta;
+		memset(wqe_buf, 0, sq->wqe_size);
 		ah = to_efa_ah(wr->wr.ud.ah);
 
 		if (wr->send_flags & IBV_SEND_INLINE) {
-			efa_post_send_inline_data(wr, &tx_wqe);
+			efa_post_send_inline_data(wr, md, inline_data);
 		} else {
-			meta_desc->length = wr->num_sge;
-			efa_post_send_sgl(tx_wqe.data.sgl, wr->sg_list,
-					  wr->num_sge);
+			efa_post_send_sgl(sgl, md, wr->sg_list, wr->num_sge);
 		}
 
 		if (wr->opcode == IBV_WR_SEND_WITH_IMM) {
-			meta_desc->immediate_data = be32toh(wr->imm_data);
-			EFA_SET(&meta_desc->ctrl1, EFA_IO_TX_META_DESC_HAS_IMM,
-				1);
+			md->immediate_data = be32toh(wr->imm_data);
+			EFA_SET(&md->ctrl1, EFA_IO_TX_META_DESC_HAS_IMM, 1);
 		}
 
 		/* Set rest of the descriptor fields */
-		efa_set_common_ctrl_flags(meta_desc, sq, EFA_IO_SEND);
-		meta_desc->req_id = efa_wq_get_next_wrid_idx_locked(wq,
-								    wr->wr_id);
-		meta_desc->dest_qp_num = wr->wr.ud.remote_qpn;
-		meta_desc->ah = ah->efa_ah;
-		meta_desc->qkey = wr->wr.ud.remote_qkey;
+		efa_set_common_ctrl_flags(md, sq, EFA_IO_SEND);
+		md->req_id = efa_wq_get_next_wrid_idx_locked(wq, wr->wr_id);
+		md->dest_qp_num = wr->wr.ud.remote_qpn;
+		md->ah = ah->efa_ah;
+		md->qkey = wr->wr.ud.remote_qkey;
 
 		/* Copy descriptor */
-		sq_desc_offset = (wq->pc & wq->desc_mask) *
-				 sizeof(tx_wqe);
-		mmio_memcpy_x64(sq->desc + sq_desc_offset, &tx_wqe,
-				sizeof(tx_wqe));
+		sq_desc_offset = (wq->pc & wq->desc_mask) * sq->wqe_size;
+		mmio_memcpy_x64(sq->desc + sq_desc_offset, wqe_buf, sq->wqe_size);
 
 		/* advance index and change phase */
 		efa_sq_advance_post_idx(sq);
@@ -2345,8 +2469,8 @@ int efa_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
 			mmio_wc_start();
 		}
 		rdma_tracepoint(rdma_core_efa, post_send, qp->dev->name, wr->wr_id,
-				EFA_IO_SEND, ibvqp->qp_num, meta_desc->dest_qp_num,
-				ah->efa_ah, efa_get_wqe_length(&tx_wqe));
+				EFA_IO_SEND, ibvqp->qp_num, md->dest_qp_num,
+				ah->efa_ah, efa_wqe_get_data_length(sq));
 		wr = wr->next;
 	}
 
@@ -2366,12 +2490,9 @@ int efa_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
 	return err;
 }
 
-static struct efa_io_tx_wqe *efa_send_wr_common(struct ibv_qp_ex *ibvqpx,
-						enum efa_io_send_op_type op_type)
+static void *efa_send_wr_alloc(struct efa_qp *qp, struct ibv_qp_ex *ibvqpx)
 {
-	struct efa_qp *qp = to_efa_qp_ex(ibvqpx);
 	struct efa_sq *sq = &qp->sq;
-	struct efa_io_tx_meta_desc *meta_desc;
 	int err;
 
 	if (unlikely(qp->wr_session_err))
@@ -2383,179 +2504,275 @@ static struct efa_io_tx_wqe *efa_send_wr_common(struct ibv_qp_ex *ibvqpx,
 		return NULL;
 	}
 
-	sq->curr_tx_wqe = (struct efa_io_tx_wqe *)sq->local_queue +
-			  sq->num_wqe_pending;
-	memset(sq->curr_tx_wqe, 0, sizeof(*sq->curr_tx_wqe));
+	sq->curr_tx_wqe.buff = sq->local_queue + sq->num_wqe_pending * sq->wqe_size;
+	memset(sq->curr_tx_wqe.buff, 0, sq->wqe_size);
+
+	return sq->curr_tx_wqe.buff;
+}
+
+static void efa_send_wr_init(struct efa_qp *qp, struct ibv_qp_ex *ibvqpx,
+			     enum efa_io_send_op_type op_type, uint8_t max_sge,
+			     struct efa_io_tx_meta_desc *md,
+			     struct efa_io_tx_buf_desc *local_mem,
+			     struct efa_io_remote_mem_addr *remote_mem,
+			     uint8_t *inline_data)
+{
+	struct efa_sq *sq = &qp->sq;
 
-	meta_desc = &sq->curr_tx_wqe->meta;
-	efa_set_common_ctrl_flags(meta_desc, sq, op_type);
-	meta_desc->req_id = efa_wq_get_next_wrid_idx_locked(&sq->wq,
-							    ibvqpx->wr_id);
+	sq->curr_tx_wqe.md = md;
+	efa_set_common_ctrl_flags(sq->curr_tx_wqe.md, sq, op_type);
+	sq->curr_tx_wqe.md->req_id = efa_wq_get_next_wrid_idx_locked(&sq->wq, ibvqpx->wr_id);
 
 	/* advance index and change phase */
 	efa_sq_advance_post_idx(sq);
 	sq->num_wqe_pending++;
 
-	return sq->curr_tx_wqe;
+	sq->curr_tx_wqe.local_mem = local_mem;
+	sq->curr_tx_wqe.remote_mem = remote_mem;
+	sq->curr_tx_wqe.inline_data = inline_data;
+	sq->curr_tx_wqe.max_sge = max_sge;
 }
 
-static void efa_send_wr_set_imm_data(struct efa_io_tx_wqe *tx_wqe, __be32 imm_data)
+static void efa_send_wr_set_imm_data(struct efa_io_tx_meta_desc *meta_desc, __be32 imm_data)
 {
-	struct efa_io_tx_meta_desc *meta_desc;
-
-	meta_desc = &tx_wqe->meta;
 	meta_desc->immediate_data = be32toh(imm_data);
 	EFA_SET(&meta_desc->ctrl1, EFA_IO_TX_META_DESC_HAS_IMM, 1);
 }
 
-static void efa_send_wr_set_rdma_addr(struct efa_io_tx_wqe *tx_wqe, uint32_t rkey,
+static void efa_send_wr_set_rdma_addr(struct efa_io_remote_mem_addr *remote_mem, uint32_t rkey,
 				      uint64_t remote_addr)
 {
-	struct efa_io_remote_mem_addr *remote_mem;
-
-	remote_mem = &tx_wqe->data.rdma_req.remote_mem;
 	remote_mem->rkey = rkey;
 	remote_mem->buf_addr_lo = remote_addr & 0xFFFFFFFF;
 	remote_mem->buf_addr_hi = remote_addr >> 32;
 }
 
-static void efa_send_wr_send(struct ibv_qp_ex *ibvqpx)
+static void efa_send_wr_send_64(struct ibv_qp_ex *ibvqpx)
 {
-	efa_send_wr_common(ibvqpx, EFA_IO_SEND);
+	struct efa_qp *qp = to_efa_qp_ex(ibvqpx);
+	struct efa_io_tx_wqe *tx_wqe;
+
+	tx_wqe = (struct efa_io_tx_wqe *)efa_send_wr_alloc(qp, ibvqpx);
+	if (unlikely(!tx_wqe))
+		return;
+
+	efa_send_wr_init(qp, ibvqpx, EFA_IO_SEND, qp->sq.wq.max_sge, &tx_wqe->meta,
+			 tx_wqe->data.sgl, NULL, tx_wqe->data.inline_data);
+}
+
+static void efa_send_wr_send_128(struct ibv_qp_ex *ibvqpx)
+{
+	struct efa_qp *qp = to_efa_qp_ex(ibvqpx);
+	struct efa_io_tx_wqe_128 *tx_wqe;
+
+	tx_wqe = (struct efa_io_tx_wqe_128 *)efa_send_wr_alloc(qp, ibvqpx);
+	if (unlikely(!tx_wqe))
+		return;
+
+	efa_send_wr_init(qp, ibvqpx, EFA_IO_SEND, qp->sq.wq.max_sge, &tx_wqe->meta,
+			 tx_wqe->data.sgl, NULL, tx_wqe->data.inline_data);
 }
 
-static void efa_send_wr_send_imm(struct ibv_qp_ex *ibvqpx, __be32 imm_data)
+static void efa_send_wr_send_imm_64(struct ibv_qp_ex *ibvqpx, __be32 imm_data)
 {
+	struct efa_qp *qp = to_efa_qp_ex(ibvqpx);
 	struct efa_io_tx_wqe *tx_wqe;
 
-	tx_wqe = efa_send_wr_common(ibvqpx, EFA_IO_SEND);
+	tx_wqe = (struct efa_io_tx_wqe *)efa_send_wr_alloc(qp, ibvqpx);
 	if (unlikely(!tx_wqe))
 		return;
 
-	efa_send_wr_set_imm_data(tx_wqe, imm_data);
+	efa_send_wr_init(qp, ibvqpx, EFA_IO_SEND, qp->sq.wq.max_sge, &tx_wqe->meta,
+			 tx_wqe->data.sgl, NULL, tx_wqe->data.inline_data);
+	efa_send_wr_set_imm_data(qp->sq.curr_tx_wqe.md, imm_data);
 }
 
-static void efa_send_wr_rdma_read(struct ibv_qp_ex *ibvqpx, uint32_t rkey,
-				  uint64_t remote_addr)
+static void efa_send_wr_send_imm_128(struct ibv_qp_ex *ibvqpx, __be32 imm_data)
 {
+	struct efa_qp *qp = to_efa_qp_ex(ibvqpx);
+	struct efa_io_tx_wqe_128 *tx_wqe;
+
+	tx_wqe = (struct efa_io_tx_wqe_128 *)efa_send_wr_alloc(qp, ibvqpx);
+	if (unlikely(!tx_wqe))
+		return;
+
+	efa_send_wr_init(qp, ibvqpx, EFA_IO_SEND, qp->sq.wq.max_sge, &tx_wqe->meta,
+			 tx_wqe->data.sgl, NULL, tx_wqe->data.inline_data);
+	efa_send_wr_set_imm_data(qp->sq.curr_tx_wqe.md, imm_data);
+}
+
+static inline void efa_send_wr_rdma_common(struct efa_qp *qp, struct ibv_qp_ex *ibvqpx,
+					   uint32_t rkey, uint64_t remote_addr,
+					   enum efa_io_send_op_type op_type,
+					   struct efa_io_tx_meta_desc *md,
+					   struct efa_io_tx_buf_desc *local_mem,
+					   struct efa_io_remote_mem_addr *remote_mem,
+					   uint8_t *inline_data) ALWAYS_INLINE;
+static inline void efa_send_wr_rdma_common(struct efa_qp *qp, struct ibv_qp_ex *ibvqpx,
+					   uint32_t rkey, uint64_t remote_addr,
+					   enum efa_io_send_op_type op_type,
+					   struct efa_io_tx_meta_desc *md,
+					   struct efa_io_tx_buf_desc *local_mem,
+					   struct efa_io_remote_mem_addr *remote_mem,
+					   uint8_t *inline_data)
+{
+	efa_send_wr_init(qp, ibvqpx, op_type, qp->sq.max_wr_rdma_sge, md,
+			 local_mem, remote_mem, inline_data);
+
+	efa_send_wr_set_rdma_addr(remote_mem, rkey, remote_addr);
+}
+
+static void efa_send_wr_rdma_read_64(struct ibv_qp_ex *ibvqpx, uint32_t rkey,
+				     uint64_t remote_addr)
+{
+	struct efa_qp *qp = to_efa_qp_ex(ibvqpx);
 	struct efa_io_tx_wqe *tx_wqe;
 
-	tx_wqe = efa_send_wr_common(ibvqpx, EFA_IO_RDMA_READ);
+	tx_wqe = (struct efa_io_tx_wqe *)efa_send_wr_alloc(qp, ibvqpx);
+	if (unlikely(!tx_wqe))
+		return;
+
+	efa_send_wr_rdma_common(qp, ibvqpx, rkey, remote_addr, EFA_IO_RDMA_READ,
+				&tx_wqe->meta, tx_wqe->data.rdma_req.local_mem,
+				&tx_wqe->data.rdma_req.remote_mem, NULL);
+}
+
+static void efa_send_wr_rdma_read_128(struct ibv_qp_ex *ibvqpx, uint32_t rkey,
+				      uint64_t remote_addr)
+{
+	struct efa_qp *qp = to_efa_qp_ex(ibvqpx);
+	struct efa_io_tx_wqe_128 *tx_wqe;
+
+	tx_wqe = (struct efa_io_tx_wqe_128 *)efa_send_wr_alloc(qp, ibvqpx);
 	if (unlikely(!tx_wqe))
 		return;
 
-	efa_send_wr_set_rdma_addr(tx_wqe, rkey, remote_addr);
+	efa_send_wr_rdma_common(qp, ibvqpx, rkey, remote_addr, EFA_IO_RDMA_READ,
+				&tx_wqe->meta, tx_wqe->data.rdma_req.local_mem,
+				&tx_wqe->data.rdma_req.remote_mem, NULL);
 }
 
-static void efa_send_wr_rdma_write(struct ibv_qp_ex *ibvqpx, uint32_t rkey,
-				   uint64_t remote_addr)
+static void efa_send_wr_rdma_write_64(struct ibv_qp_ex *ibvqpx, uint32_t rkey,
+				      uint64_t remote_addr)
 {
+	struct efa_qp *qp = to_efa_qp_ex(ibvqpx);
 	struct efa_io_tx_wqe *tx_wqe;
 
-	tx_wqe = efa_send_wr_common(ibvqpx, EFA_IO_RDMA_WRITE);
+	tx_wqe = (struct efa_io_tx_wqe *)efa_send_wr_alloc(qp, ibvqpx);
+	if (unlikely(!tx_wqe))
+		return;
+
+	efa_send_wr_rdma_common(qp, ibvqpx, rkey, remote_addr, EFA_IO_RDMA_WRITE,
+				&tx_wqe->meta, tx_wqe->data.rdma_req.local_mem,
+				&tx_wqe->data.rdma_req.remote_mem, NULL);
+}
+
+static void efa_send_wr_rdma_write_128(struct ibv_qp_ex *ibvqpx, uint32_t rkey,
+				       uint64_t remote_addr)
+{
+	struct efa_qp *qp = to_efa_qp_ex(ibvqpx);
+	struct efa_io_tx_wqe_128 *tx_wqe;
+
+	tx_wqe = (struct efa_io_tx_wqe_128 *)efa_send_wr_alloc(qp, ibvqpx);
 	if (unlikely(!tx_wqe))
 		return;
 
-	efa_send_wr_set_rdma_addr(tx_wqe, rkey, remote_addr);
+	efa_send_wr_rdma_common(qp, ibvqpx, rkey, remote_addr, EFA_IO_RDMA_WRITE,
+				&tx_wqe->meta, tx_wqe->data.rdma_req.local_mem,
+				&tx_wqe->data.rdma_req.remote_mem,
+				qp->sq.inline_write_enabled ? tx_wqe->data.rdma_req.inline_data :
+							      NULL);
 }
 
-static void efa_send_wr_rdma_write_imm(struct ibv_qp_ex *ibvqpx, uint32_t rkey,
-				       uint64_t remote_addr, __be32 imm_data)
+static void efa_send_wr_rdma_write_imm_64(struct ibv_qp_ex *ibvqpx, uint32_t rkey,
+					  uint64_t remote_addr, __be32 imm_data)
 {
+	struct efa_qp *qp = to_efa_qp_ex(ibvqpx);
 	struct efa_io_tx_wqe *tx_wqe;
 
-	tx_wqe = efa_send_wr_common(ibvqpx, EFA_IO_RDMA_WRITE);
+	tx_wqe = (struct efa_io_tx_wqe *)efa_send_wr_alloc(qp, ibvqpx);
+	if (unlikely(!tx_wqe))
+		return;
+
+	efa_send_wr_rdma_common(qp, ibvqpx, rkey, remote_addr, EFA_IO_RDMA_WRITE,
+				&tx_wqe->meta, tx_wqe->data.rdma_req.local_mem,
+				&tx_wqe->data.rdma_req.remote_mem, NULL);
+	efa_send_wr_set_imm_data(qp->sq.curr_tx_wqe.md, imm_data);
+}
+
+static void efa_send_wr_rdma_write_imm_128(struct ibv_qp_ex *ibvqpx, uint32_t rkey,
+					   uint64_t remote_addr, __be32 imm_data)
+{
+	struct efa_qp *qp = to_efa_qp_ex(ibvqpx);
+	struct efa_io_tx_wqe_128 *tx_wqe;
+
+	tx_wqe = (struct efa_io_tx_wqe_128 *)efa_send_wr_alloc(qp, ibvqpx);
 	if (unlikely(!tx_wqe))
 		return;
 
-	efa_send_wr_set_rdma_addr(tx_wqe, rkey, remote_addr);
-	efa_send_wr_set_imm_data(tx_wqe, imm_data);
+	efa_send_wr_rdma_common(qp, ibvqpx, rkey, remote_addr, EFA_IO_RDMA_WRITE,
+				&tx_wqe->meta, tx_wqe->data.rdma_req.local_mem,
+				&tx_wqe->data.rdma_req.remote_mem,
+				qp->sq.inline_write_enabled ? tx_wqe->data.rdma_req.inline_data :
+							      NULL);
+	efa_send_wr_set_imm_data(qp->sq.curr_tx_wqe.md, imm_data);
 }
 
 static void efa_send_wr_set_sge(struct ibv_qp_ex *ibvqpx, uint32_t lkey,
 				uint64_t addr, uint32_t length)
 {
 	struct efa_qp *qp = to_efa_qp_ex(ibvqpx);
-	struct efa_io_tx_buf_desc *buf;
-	struct efa_io_tx_wqe *tx_wqe;
+	struct efa_io_tx_meta_desc *md;
 	uint8_t op_type;
 
 	if (unlikely(qp->wr_session_err))
 		return;
 
-	tx_wqe = qp->sq.curr_tx_wqe;
-	tx_wqe->meta.length = 1;
+	md = qp->sq.curr_tx_wqe.md;
+	md->length = 1;
 
-	op_type = EFA_GET(&tx_wqe->meta.ctrl1, EFA_IO_TX_META_DESC_OP_TYPE);
-	switch (op_type) {
-	case EFA_IO_SEND:
-		buf = &tx_wqe->data.sgl[0];
-		break;
-	case EFA_IO_RDMA_READ:
-	case EFA_IO_RDMA_WRITE:
-		tx_wqe->data.rdma_req.remote_mem.length = length;
-		buf = &tx_wqe->data.rdma_req.local_mem[0];
-		break;
-	default:
-		return;
-	}
+	op_type = EFA_GET(&md->ctrl1, EFA_IO_TX_META_DESC_OP_TYPE);
+	if (op_type == EFA_IO_RDMA_READ || op_type == EFA_IO_RDMA_WRITE)
+		qp->sq.curr_tx_wqe.remote_mem->length = length;
 
-	efa_set_tx_buf(buf, addr, lkey, length);
+	efa_set_tx_buf(qp->sq.curr_tx_wqe.local_mem, addr, lkey, length);
 }
 
 static void efa_send_wr_set_sge_list(struct ibv_qp_ex *ibvqpx, size_t num_sge,
 				     const struct ibv_sge *sg_list)
 {
 	struct efa_qp *qp = to_efa_qp_ex(ibvqpx);
-	struct efa_io_rdma_req *rdma_req;
-	struct efa_io_tx_wqe *tx_wqe;
+	struct efa_io_tx_meta_desc *md;
 	struct efa_sq *sq = &qp->sq;
 	uint8_t op_type;
 
 	if (unlikely(qp->wr_session_err))
 		return;
 
-	tx_wqe = sq->curr_tx_wqe;
-	op_type = EFA_GET(&tx_wqe->meta.ctrl1, EFA_IO_TX_META_DESC_OP_TYPE);
-	switch (op_type) {
-	case EFA_IO_SEND:
-		if (unlikely(num_sge > sq->wq.max_sge)) {
-			verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context),
-				  "SQ[%u] num_sge[%zu] > max_sge[%u]\n",
-				  ibvqpx->qp_base.qp_num, num_sge,
-				  sq->wq.max_sge);
-			qp->wr_session_err = EINVAL;
-			return;
-		}
-		efa_post_send_sgl(tx_wqe->data.sgl, sg_list, num_sge);
-		break;
-	case EFA_IO_RDMA_READ:
-	case EFA_IO_RDMA_WRITE:
-		if (unlikely(num_sge > sq->max_wr_rdma_sge)) {
-			verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context),
-				  "SQ[%u] num_sge[%zu] > max_rdma_sge[%zu]\n",
-				  ibvqpx->qp_base.qp_num, num_sge,
-				  sq->max_wr_rdma_sge);
-			qp->wr_session_err = EINVAL;
-			return;
-		}
-		rdma_req = &tx_wqe->data.rdma_req;
-		rdma_req->remote_mem.length = efa_sge_total_bytes(sg_list,
-								  num_sge);
-		efa_post_send_sgl(rdma_req->local_mem, sg_list, num_sge);
-		break;
-	default:
+	md = sq->curr_tx_wqe.md;
+
+	op_type = EFA_GET(&md->ctrl1, EFA_IO_TX_META_DESC_OP_TYPE);
+	if (unlikely(num_sge > sq->curr_tx_wqe.max_sge)) {
+		verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context),
+			  "SQ[%u] op_type[%u] num_sge[%zu] > max_sge[%u]\n",
+			  ibvqpx->qp_base.qp_num, op_type, num_sge,
+			  sq->curr_tx_wqe.max_sge);
+		qp->wr_session_err = EINVAL;
 		return;
 	}
 
-	tx_wqe->meta.length = num_sge;
+	if (op_type == EFA_IO_RDMA_READ || op_type == EFA_IO_RDMA_WRITE)
+		sq->curr_tx_wqe.remote_mem->length = efa_sge_total_bytes(sg_list, num_sge);
+
+	efa_post_send_sgl(sq->curr_tx_wqe.local_mem, md, sg_list, num_sge);
 }
 
 static void efa_send_wr_set_inline_data(struct ibv_qp_ex *ibvqpx, void *addr,
 					size_t length)
 {
 	struct efa_qp *qp = to_efa_qp_ex(ibvqpx);
-	struct efa_io_tx_wqe *tx_wqe = qp->sq.curr_tx_wqe;
+	struct efa_io_tx_meta_desc *md = qp->sq.curr_tx_wqe.md;
+	uint8_t op_type;
 
 	if (unlikely(qp->wr_session_err))
 		return;
@@ -2569,9 +2786,20 @@ static void efa_send_wr_set_inline_data(struct ibv_qp_ex *ibvqpx, void *addr,
 		return;
 	}
 
-	EFA_SET(&tx_wqe->meta.ctrl1, EFA_IO_TX_META_DESC_INLINE_MSG, 1);
-	memcpy(tx_wqe->data.inline_data, addr, length);
-	tx_wqe->meta.length = length;
+	if (unlikely(!qp->sq.curr_tx_wqe.inline_data)) {
+		op_type = EFA_GET(&md->ctrl1, EFA_IO_TX_META_DESC_OP_TYPE);
+		verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context),
+			  "SQ[%u] inline op_type[%u] isn't supported\n",
+			  ibvqpx->qp_base.qp_num, op_type);
+		qp->wr_session_err = EINVAL;
+		return;
+	}
+
+	EFA_SET(&md->ctrl1, EFA_IO_TX_META_DESC_INLINE_MSG, 1);
+	memcpy(qp->sq.curr_tx_wqe.inline_data, addr, length);
+	md->length = length;
+	if (qp->sq.curr_tx_wqe.remote_mem)
+		qp->sq.curr_tx_wqe.remote_mem->length = length;
 }
 
 static void
@@ -2580,9 +2808,9 @@ efa_send_wr_set_inline_data_list(struct ibv_qp_ex *ibvqpx,
 				 const struct ibv_data_buf *buf_list)
 {
 	struct efa_qp *qp = to_efa_qp_ex(ibvqpx);
-	struct efa_io_tx_wqe *tx_wqe = qp->sq.curr_tx_wqe;
-	uint32_t total_length = 0;
-	uint32_t length;
+	struct efa_io_tx_meta_desc *md = qp->sq.curr_tx_wqe.md;
+	uint32_t length, total_length = 0;
+	uint8_t op_type;
 	size_t i;
 
 	if (unlikely(qp->wr_session_err))
@@ -2599,16 +2827,27 @@ efa_send_wr_set_inline_data_list(struct ibv_qp_ex *ibvqpx,
 		return;
 	}
 
+	if (unlikely(!qp->sq.curr_tx_wqe.inline_data)) {
+		op_type = EFA_GET(&md->ctrl1, EFA_IO_TX_META_DESC_OP_TYPE);
+		verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context),
+			  "SQ[%u] inline op_type[%u] isn't supported\n",
+			  ibvqpx->qp_base.qp_num, op_type);
+		qp->wr_session_err = EINVAL;
+		return;
+	}
+
 	for (i = 0; i < num_buf; i++) {
 		length = buf_list[i].length;
 
-		memcpy(tx_wqe->data.inline_data + total_length,
+		memcpy(qp->sq.curr_tx_wqe.inline_data + total_length,
 		       buf_list[i].addr, length);
 		total_length += length;
 	}
 
-	EFA_SET(&tx_wqe->meta.ctrl1, EFA_IO_TX_META_DESC_INLINE_MSG, 1);
-	tx_wqe->meta.length = total_length;
+	EFA_SET(&md->ctrl1, EFA_IO_TX_META_DESC_INLINE_MSG, 1);
+	md->length = total_length;
+	if (qp->sq.curr_tx_wqe.remote_mem)
+		qp->sq.curr_tx_wqe.remote_mem->length = total_length;
 }
 
 static void efa_send_wr_set_addr(struct ibv_qp_ex *ibvqpx,
@@ -2617,20 +2856,21 @@ static void efa_send_wr_set_addr(struct ibv_qp_ex *ibvqpx,
 {
 	struct efa_qp *qp = to_efa_qp_ex(ibvqpx);
 	struct efa_ah *ah = to_efa_ah(ibvah);
-	struct efa_io_tx_wqe *tx_wqe;
+	struct efa_io_tx_meta_desc *md;
 
 	if (unlikely(qp->wr_session_err))
 		return;
 
-	tx_wqe = qp->sq.curr_tx_wqe;
+	md = qp->sq.curr_tx_wqe.md;
 
-	tx_wqe->meta.dest_qp_num = remote_qpn;
-	tx_wqe->meta.ah = ah->efa_ah;
-	tx_wqe->meta.qkey = remote_qkey;
+	md->dest_qp_num = remote_qpn;
+	md->ah = ah->efa_ah;
+	md->qkey = remote_qkey;
 
 	rdma_tracepoint(rdma_core_efa, post_send, qp->dev->name, ibvqpx->wr_id,
-			EFA_GET(&tx_wqe->meta.ctrl1, EFA_IO_TX_META_DESC_OP_TYPE),
-			ibvqpx->qp_base.qp_num, remote_qpn, ah->efa_ah, efa_get_wqe_length(tx_wqe));
+			EFA_GET(&md->ctrl1, EFA_IO_TX_META_DESC_OP_TYPE),
+			ibvqpx->qp_base.qp_num, remote_qpn, ah->efa_ah,
+			efa_wqe_get_data_length(qp->sq));
 }
 
 static void efa_send_wr_start(struct ibv_qp_ex *ibvqpx)
@@ -2690,11 +2930,9 @@ static int efa_send_wr_complete(struct ibv_qp_ex *ibvqpx)
 		num_wqe_to_copy = min3(sq->num_wqe_pending,
 				       sq->wq.wqe_cnt - sq_desc_idx,
 				       max_txbatch - curbatch);
-		mmio_memcpy_x64((struct efa_io_tx_wqe *)sq->desc +
-							sq_desc_idx,
-				(struct efa_io_tx_wqe *)sq->local_queue +
-							local_idx,
-				num_wqe_to_copy * sizeof(struct efa_io_tx_wqe));
+		mmio_memcpy_x64(sq->desc + sq_desc_idx * sq->wqe_size,
+				sq->local_queue + local_idx * sq->wqe_size,
+				num_wqe_to_copy * sq->wqe_size);
 
 		sq->num_wqe_pending -= num_wqe_to_copy;
 		local_idx += num_wqe_to_copy;
@@ -2736,26 +2974,32 @@ static void efa_send_wr_abort(struct ibv_qp_ex *ibvqpx)
 }
 
 static void efa_qp_fill_wr_pfns(struct ibv_qp_ex *ibvqpx,
-				struct ibv_qp_init_attr_ex *attr_ex)
+				struct ibv_qp_init_attr_ex *attr_ex,
+				uint16_t wqe_size)
 {
+	bool use_64 = wqe_size == EFA_IO_TX_DESC_SIZE_64;
+
 	ibvqpx->wr_start = efa_send_wr_start;
 	ibvqpx->wr_complete = efa_send_wr_complete;
 	ibvqpx->wr_abort = efa_send_wr_abort;
 
 	if (attr_ex->send_ops_flags & IBV_QP_EX_WITH_SEND)
-		ibvqpx->wr_send = efa_send_wr_send;
+		ibvqpx->wr_send = use_64 ? efa_send_wr_send_64 : efa_send_wr_send_128;
 
 	if (attr_ex->send_ops_flags & IBV_QP_EX_WITH_SEND_WITH_IMM)
-		ibvqpx->wr_send_imm = efa_send_wr_send_imm;
+		ibvqpx->wr_send_imm = use_64 ? efa_send_wr_send_imm_64 : efa_send_wr_send_imm_128;
 
 	if (attr_ex->send_ops_flags & IBV_QP_EX_WITH_RDMA_READ)
-		ibvqpx->wr_rdma_read = efa_send_wr_rdma_read;
+		ibvqpx->wr_rdma_read = use_64 ? efa_send_wr_rdma_read_64 :
+						efa_send_wr_rdma_read_128;
 
 	if (attr_ex->send_ops_flags & IBV_QP_EX_WITH_RDMA_WRITE)
-		ibvqpx->wr_rdma_write = efa_send_wr_rdma_write;
+		ibvqpx->wr_rdma_write = use_64 ? efa_send_wr_rdma_write_64 :
+						 efa_send_wr_rdma_write_128;
 
 	if (attr_ex->send_ops_flags & IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM)
-		ibvqpx->wr_rdma_write_imm = efa_send_wr_rdma_write_imm;
+		ibvqpx->wr_rdma_write_imm = use_64 ? efa_send_wr_rdma_write_imm_64 :
+						     efa_send_wr_rdma_write_imm_128;
 
 	ibvqpx->wr_set_inline_data = efa_send_wr_set_inline_data;
 	ibvqpx->wr_set_inline_data_list = efa_send_wr_set_inline_data_list;
diff --git a/pyverbs/providers/efa/efa_enums.pxd b/pyverbs/providers/efa/efa_enums.pxd
index 11c85d62f..258b97866 100644
--- a/pyverbs/providers/efa/efa_enums.pxd
+++ b/pyverbs/providers/efa/efa_enums.pxd
@@ -17,6 +17,7 @@ cdef extern from 'infiniband/efadv.h':
 
     cpdef enum:
         EFADV_QP_FLAGS_UNSOLICITED_WRITE_RECV
+        EFADV_QP_FLAGS_INLINE_WRITE
 
     cpdef enum:
         EFADV_WC_EX_WITH_SGID
@@ -26,3 +27,6 @@ cdef extern from 'infiniband/efadv.h':
         EFADV_MR_ATTR_VALIDITY_RECV_IC_ID
         EFADV_MR_ATTR_VALIDITY_RDMA_READ_IC_ID
         EFADV_MR_ATTR_VALIDITY_RDMA_RECV_IC_ID
+
+    cpdef enum:
+        EFADV_SQ_DEPTH_ATTR_INLINE_WRITE
diff --git a/pyverbs/providers/efa/efadv.pxd b/pyverbs/providers/efa/efadv.pxd
index 12e11f8ce..249ce7570 100644
--- a/pyverbs/providers/efa/efadv.pxd
+++ b/pyverbs/providers/efa/efadv.pxd
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB)
-# Copyright 2020-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
+# Copyright 2020-2026 Amazon.com, Inc. or its affiliates. All rights reserved.
 
 #cython: language_level=3
 
@@ -50,3 +50,11 @@ cdef class EfaDVCQInitAttr(PyverbsObject):
 
 cdef class EfaDVMRAttr(PyverbsObject):
     cdef dv.efadv_mr_attr mr_attr
+
+
+cdef class EfaDVSQDepthAttr(PyverbsObject):
+    cdef dv.efadv_sq_depth_attr sq_depth_attr
+
+
+cdef class EfaDVRQDepthAttr(PyverbsObject):
+    cdef dv.efadv_rq_depth_attr rq_depth_attr
diff --git a/pyverbs/providers/efa/efadv.pyx b/pyverbs/providers/efa/efadv.pyx
index ec21225aa..2f3e04cba 100644
--- a/pyverbs/providers/efa/efadv.pyx
+++ b/pyverbs/providers/efa/efadv.pyx
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB)
-# Copyright 2020-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
+# Copyright 2020-2026 Amazon.com, Inc. or its affiliates. All rights reserved.
 
 cimport pyverbs.providers.efa.efa_enums as dve
 cimport pyverbs.providers.efa.libefa as dv
@@ -92,6 +92,10 @@ cdef class EfaDVDeviceAttr(PyverbsObject):
     def inline_buf_size(self):
         return self.device_attr.inline_buf_size
 
+    @property
+    def inline_buf_size_ex(self):
+        return self.device_attr.inline_buf_size_ex
+
     @property
     def device_caps(self):
         return self.device_attr.device_caps
@@ -108,6 +112,7 @@ cdef class EfaDVDeviceAttr(PyverbsObject):
             print_format.format('Max SQ SQE', self.device_attr.max_sq_sge) + \
             print_format.format('Max RQ SQE', self.device_attr.max_rq_sge) + \
             print_format.format('Inline buffer size', self.device_attr.inline_buf_size) + \
+            print_format.format('Inline buffer size ex', self.device_attr.inline_buf_size_ex) + \
             print_format.format('Device Capabilities', dev_cap_to_str(self.device_attr.device_caps)) + \
             print_format.format('Max RDMA Size', self.device_attr.max_rdma_size)
 
@@ -325,3 +330,69 @@ cdef class EfaMR(MR):
             raise PyverbsRDMAError(f'Failed to query EFA MR', rc)
 
         return mr_attr
+
+
+cdef class EfaDVSQDepthAttr(PyverbsObject):
+    """
+    Represents efadv_sq_depth_attr struct
+    """
+    @property
+    def comp_mask(self):
+        return self.sq_depth_attr.comp_mask
+
+    @comp_mask.setter
+    def comp_mask(self, val):
+        self.sq_depth_attr.comp_mask = val
+
+    @property
+    def flags(self):
+        return self.sq_depth_attr.flags
+
+    @flags.setter
+    def flags(self, val):
+        self.sq_depth_attr.flags = val
+
+    @property
+    def max_send_sge(self):
+        return self.sq_depth_attr.max_send_sge
+
+    @max_send_sge.setter
+    def max_send_sge(self, val):
+        self.sq_depth_attr.max_send_sge = val
+
+    @property
+    def max_rdma_sge(self):
+        return self.sq_depth_attr.max_rdma_sge
+
+    @max_rdma_sge.setter
+    def max_rdma_sge(self, val):
+        self.sq_depth_attr.max_rdma_sge = val
+
+    @property
+    def max_inline_data(self):
+        return self.sq_depth_attr.max_inline_data
+
+    @max_inline_data.setter
+    def max_inline_data(self, val):
+        self.sq_depth_attr.max_inline_data = val
+
+
+cdef class EfaDVRQDepthAttr(PyverbsObject):
+    """
+    Represents efadv_rq_depth_attr struct
+    """
+    @property
+    def comp_mask(self):
+        return self.sq_depth_attr.comp_mask
+
+    @comp_mask.setter
+    def comp_mask(self, val):
+        self.rq_depth_attr.comp_mask = val
+
+    @property
+    def max_recv_sge(self):
+        return self.sq_depth_attr.max_recv_sge
+
+    @max_recv_sge.setter
+    def max_recv_sge(self, val):
+        self.rq_depth_attr.max_recv_sge = val
diff --git a/pyverbs/providers/efa/libefa.pxd b/pyverbs/providers/efa/libefa.pxd
index 265868ac0..bf043434f 100644
--- a/pyverbs/providers/efa/libefa.pxd
+++ b/pyverbs/providers/efa/libefa.pxd
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB)
-# Copyright 2020-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
+# Copyright 2020-2026 Amazon.com, Inc. or its affiliates. All rights reserved.
 
 #cython: language_level=3
 
@@ -17,7 +17,7 @@ cdef extern from 'infiniband/efadv.h':
         uint16_t max_sq_sge;
         uint16_t max_rq_sge;
         uint16_t inline_buf_size;
-        uint8_t reserved[2];
+        uint16_t inline_buf_size_ex;
         uint32_t device_caps;
         uint32_t max_rdma_size;
 
@@ -47,6 +47,17 @@ cdef extern from 'infiniband/efadv.h':
         uint16_t rdma_read_ic_id;
         uint16_t rdma_recv_ic_id;
 
+    cdef struct efadv_sq_depth_attr:
+        uint64_t comp_mask;
+        uint32_t flags;
+        uint32_t max_send_sge;
+        uint32_t max_rdma_sge;
+        uint32_t max_inline_data;
+
+    cdef struct efadv_rq_depth_attr:
+        uint64_t comp_mask;
+        uint32_t max_recv_sge;
+
     int efadv_query_device(v.ibv_context *ibvctx, efadv_device_attr *attrs,
                            uint32_t inlen)
     int efadv_query_ah(v.ibv_ah *ibvah, efadv_ah_attr *attr,
@@ -65,3 +76,7 @@ cdef extern from 'infiniband/efadv.h':
     int efadv_wc_read_sgid(efadv_cq *efadv_cq, v.ibv_gid *sgid)
     bool efadv_wc_is_unsolicited(efadv_cq *efadv_cq)
     int efadv_query_mr(v.ibv_mr *ibvmr, efadv_mr_attr *attr, uint32_t inlen)
+    int efadv_get_max_sq_depth(v.ibv_context *ibvctx, efadv_sq_depth_attr *attr,
+                               uint32_t inlen)
+    int efadv_get_max_rq_depth(v.ibv_context *ibvctx, efadv_rq_depth_attr *attr,
+                               uint32_t inlen);
diff --git a/util/util.h b/util/util.h
index 92b674067..ffccd1d92 100644
--- a/util/util.h
+++ b/util/util.h
@@ -81,6 +81,11 @@ static inline uint64_t roundup_pow_of_two(uint64_t n)
 	return n == 1 ? 1 : 1ULL << ilog64(n - 1);
 }
 
+static inline uint64_t rounddown_pow_of_two(uint64_t n)
+{
+	return n == 0 ? 0 : 1ULL << (ilog64(n) - 1);
+}
+
 static inline unsigned long DIV_ROUND_UP(unsigned long n, unsigned long d)
 {
 	return (n + d - 1) / d;