Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions src/ucp/core/ucp_mm.c
Original file line number Diff line number Diff line change
Expand Up @@ -261,8 +261,7 @@ static inline ucs_status_t ucp_mem_map_check_and_adjust_params(ucp_mem_map_param
}

if ((params->flags & UCP_MEM_MAP_FIXED) &&
(!params->address ||
((uintptr_t)params->address % ucs_get_page_size()))) {
((uintptr_t)params->address % ucs_get_page_size())) {
ucs_error("UCP_MEM_MAP_FIXED flag requires page aligned address");
return UCS_ERR_INVALID_PARAM;
}
Expand Down
33 changes: 17 additions & 16 deletions src/uct/api/uct.h
Original file line number Diff line number Diff line change
Expand Up @@ -509,22 +509,23 @@ typedef enum {
* @brief Memory domain capability flags.
*/
enum {
UCT_MD_FLAG_ALLOC = UCS_BIT(0), /**< MD supports memory allocation */
UCT_MD_FLAG_REG = UCS_BIT(1), /**< MD supports memory registration */
UCT_MD_FLAG_NEED_MEMH = UCS_BIT(2), /**< The transport needs a valid local
memory handle for zero-copy operations */
UCT_MD_FLAG_NEED_RKEY = UCS_BIT(3), /**< The transport needs a valid
remote memory key for remote memory
operations */
UCT_MD_FLAG_ADVISE = UCS_BIT(4), /**< MD supports memory advice */
UCT_MD_FLAG_FIXED = UCS_BIT(5), /**< MD supports memory allocation with
fixed address */
UCT_MD_FLAG_RKEY_PTR = UCS_BIT(6), /**< MD supports direct access to
remote memory via a pointer that
is returned by @ref uct_rkey_ptr */
UCT_MD_FLAG_SOCKADDR = UCS_BIT(7) /**< MD support for client-server
connection establishment via
sockaddr */
UCT_MD_FLAG_ALLOC = UCS_BIT(0), /**< MD supports memory allocation */
UCT_MD_FLAG_REG = UCS_BIT(1), /**< MD supports memory registration */
UCT_MD_FLAG_NEED_MEMH = UCS_BIT(2), /**< The transport needs a valid local
memory handle for zero-copy operations */
UCT_MD_FLAG_NEED_RKEY = UCS_BIT(3), /**< The transport needs a valid
remote memory key for remote memory
operations */
UCT_MD_FLAG_ADVISE = UCS_BIT(4), /**< MD supports memory advice */
UCT_MD_FLAG_FIXED = UCS_BIT(5), /**< MD supports memory allocation with
fixed address */
UCT_MD_FLAG_RKEY_PTR = UCS_BIT(6), /**< MD supports direct access to
remote memory via a pointer that
is returned by @ref uct_rkey_ptr */
UCT_MD_FLAG_SOCKADDR = UCS_BIT(7), /**< MD support for client-server
connection establishment via
sockaddr */
UCT_MD_FLAG_DEVICE_ALLOC = UCS_BIT(8) /**< MD supports Device Memory allocation */
};

/*
Expand Down
85 changes: 74 additions & 11 deletions src/uct/base/uct_mem.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,37 @@ static inline int uct_mem_get_mmap_flags(unsigned uct_mmap_flags)
return mm_flags;
}

/* get number of devices which support flags */
static int uct_mem_dm_flag_count(uct_md_h *mds, unsigned num_mds, int flags)
{
ucs_status_t status;
unsigned md_index;
uct_md_attr_t md_attr;
uct_md_h md;
int cnt;

for (cnt = 0, md_index = 0; md_index < num_mds; ++md_index) {
md = mds[md_index];
status = uct_md_query(md, &md_attr);
if (status != UCS_OK) {
ucs_warn("Failed to query MD");
continue;
}

if (ucs_test_all_flags(md_attr.cap.flags, flags)) {
cnt++;
}
}

return cnt;
}

ucs_status_t uct_mem_alloc(void *addr, size_t min_length, unsigned flags,
uct_alloc_method_t *methods, unsigned num_methods,
uct_md_h *mds, unsigned num_mds,
const char *alloc_name, uct_allocated_memory_t *mem)
{
int on_dm = !addr && (flags & UCT_MD_MEM_FLAG_FIXED);
uct_alloc_method_t *method;
uct_md_attr_t md_attr;
ucs_status_t status;
Expand All @@ -80,16 +106,30 @@ ucs_status_t uct_mem_alloc(void *addr, size_t min_length, unsigned flags,
}

if ((flags & UCT_MD_MEM_FLAG_FIXED) &&
(!addr || ((uintptr_t)addr % ucs_get_page_size()))) {
((uintptr_t)addr % ucs_get_page_size())) {
ucs_debug("UCT_MD_MEM_FLAG_FIXED requires valid page size aligned address");
return UCS_ERR_INVALID_PARAM;
}

if ((flags & UCT_MD_MEM_FLAG_NONBLOCK) && on_dm) {
ucs_debug("UCT_MD_MEM_FLAG_NONBLOCK can't be used with "
"UCT_MD_MEM_FLAG_FIXED and NULL address");
return UCS_ERR_INVALID_PARAM;
}

for (method = methods; method < methods + num_methods; ++method) {
ucs_trace("trying allocation method %s", uct_alloc_method_names[*method]);

switch (*method) {
case UCT_ALLOC_METHOD_MD:
if (on_dm &&
((uct_mem_dm_flag_count(mds, num_mds, UCT_MD_FLAG_DEVICE_ALLOC) != 1) ||
(uct_mem_dm_flag_count(mds, num_mds, UCT_MD_FLAG_REG) != 1))) {
/* DM allocation allowed on single device only, and no one else
* MD can register this memory */
return UCS_ERR_NO_RESOURCE;
}

/* Allocate with one of the specified memory domains */
for (md_index = 0; md_index < num_mds; ++md_index) {
md = mds[md_index];
Expand All @@ -99,15 +139,19 @@ ucs_status_t uct_mem_alloc(void *addr, size_t min_length, unsigned flags,
return status;
}

/* Check if MD supports allocation */
if (!(md_attr.cap.flags & UCT_MD_FLAG_ALLOC)) {
if (on_dm && !(md_attr.cap.flags & UCT_MD_FLAG_DEVICE_ALLOC)) {
/* DM requested, but not supported by MD */
continue;
}

/* Check if MD supports allocation with fixed address
* if it's requested */
if ((flags & UCT_MD_MEM_FLAG_FIXED) &&
if (!on_dm && !(md_attr.cap.flags & UCT_MD_FLAG_ALLOC)) {
/* DM is not requested, then ALLOC caps is required to allocate */
continue;
}

if (!on_dm && (flags & UCT_MD_MEM_FLAG_FIXED) &&
!(md_attr.cap.flags & UCT_MD_FLAG_FIXED)) {
/* FIXED requested, but not supported by MD */
continue;
}

Expand All @@ -121,19 +165,22 @@ ucs_status_t uct_mem_alloc(void *addr, size_t min_length, unsigned flags,
status = uct_md_mem_alloc(md, &alloc_length, &address, flags,
alloc_name, &memh);
if (status != UCS_OK) {
ucs_error("failed to allocate %zu bytes using md %s for %s: %s",
alloc_length, md->component->name,
alloc_name, ucs_status_string(status));
if (status != UCS_ERR_NO_RESOURCE) {
ucs_error("failed to allocate %zu bytes using md %s for %s: %s",
alloc_length, md->component->name,
alloc_name, ucs_status_string(status));
}
return status;
}

ucs_assert(memh != UCT_MEM_HANDLE_NULL);
mem->md = md;
mem->md = md;
mem->mem_type = md_attr.cap.mem_type;
mem->memh = memh;
mem->memh = memh;
goto allocated;

}

break;

case UCT_ALLOC_METHOD_THP:
Expand All @@ -143,6 +190,10 @@ ucs_status_t uct_mem_alloc(void *addr, size_t min_length, unsigned flags,
break;
}

if (on_dm) {
break;
}

if (!ucs_is_thp_enabled()) {
break;
}
Expand Down Expand Up @@ -181,6 +232,10 @@ ucs_status_t uct_mem_alloc(void *addr, size_t min_length, unsigned flags,
break;
}

if (on_dm) {
break;
}

alloc_length = min_length;
address = ucs_memalign(UCS_SYS_CACHE_LINE_SIZE, alloc_length
UCS_MEMTRACK_VAL);
Expand All @@ -192,6 +247,10 @@ ucs_status_t uct_mem_alloc(void *addr, size_t min_length, unsigned flags,
break;

case UCT_ALLOC_METHOD_MMAP:
if (on_dm) {
break;
}

/* Request memory from operating system using mmap() */
alloc_length = min_length;
address = addr;
Expand All @@ -208,6 +267,10 @@ ucs_status_t uct_mem_alloc(void *addr, size_t min_length, unsigned flags,
break;

case UCT_ALLOC_METHOD_HUGE:
if (on_dm) {
break;
}

/* Allocate huge pages */
alloc_length = min_length;
address = (flags & UCT_MD_MEM_FLAG_FIXED) ? addr : NULL;
Expand Down
92 changes: 83 additions & 9 deletions src/uct/ib/base/ib_md.c
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,9 @@ static ucs_status_t uct_ib_md_query(uct_md_h uct_md, uct_md_attr_t *md_attr)
UCT_MD_FLAG_NEED_MEMH |
UCT_MD_FLAG_NEED_RKEY |
UCT_MD_FLAG_ADVISE;
#if HAVE_IBV_EXP_DM
md_attr->cap.flags |= UCT_MD_FLAG_DEVICE_ALLOC;
#endif
md_attr->cap.reg_mem_types = UCS_BIT(UCT_MD_MEM_TYPE_HOST);

if (md->config.enable_gpudirect_rdma != UCS_NO) {
Expand Down Expand Up @@ -484,7 +487,9 @@ ucs_status_t uct_ib_verbs_reg_atomic_key(uct_ib_md_t *md,
{
#if HAVE_EXP_UMR
struct ibv_exp_mem_region *mem_reg = NULL;
struct ibv_mr *mr = memh->mr;
struct ibv_mr *mr = memh->mr;
int on_dm = memh->flags & UCT_IB_MEM_FLAG_DM;
uintptr_t base_addr = on_dm ? 0 : (uintptr_t)mr->addr;
struct ibv_exp_send_wr wr, *bad_wr;
struct ibv_exp_create_mr_in mrin;
ucs_status_t status;
Expand Down Expand Up @@ -557,15 +562,15 @@ ucs_status_t uct_ib_verbs_reg_atomic_key(uct_ib_md_t *md,
}

for (i = 0; i < list_size; i++) {
mem_reg[i].base_addr = (uintptr_t) mr->addr + i * reg_length;
mem_reg[i].base_addr = base_addr + i * reg_length;
mem_reg[i].length = reg_length;
mem_reg[i].mr = mr;
}

ucs_assert(list_size >= 1);
mem_reg[list_size - 1].length = mr->length % reg_length;
wr.ext_op.umr.mem_list.mem_reg_list = mem_reg;
wr.ext_op.umr.base_addr = (uint64_t) (uintptr_t) mr->addr + offset;
wr.ext_op.umr.base_addr = (uint64_t)base_addr + offset;
wr.ext_op.umr.num_mrs = list_size;
wr.ext_op.umr.modified_mr = umr;

Expand Down Expand Up @@ -835,18 +840,62 @@ static void uct_ib_mem_init(uct_ib_mem_t *memh, unsigned uct_flags,
}
}

#if HAVE_IBV_EXP_DM
/* TODO: here is code duplication with uct_rc_mlx5_iface_common_dm_tl_init to
* simplify backport it into v1.6 branch. deduplicate it after backport
* into v1.6 branch is complete */
static ucs_status_t uct_ib_mem_alloc_dm(uct_ib_mem_t *memh, uct_ib_md_t *md,
size_t length, uint64_t exp_access)
{
struct ibv_exp_alloc_dm_attr dm_attr = {.length = length};
struct ibv_exp_reg_mr_in mr_in = {0};

memh->dm = UCS_PROFILE_CALL(ibv_exp_alloc_dm, md->dev.ibv_context, &dm_attr);
if (memh->dm == NULL) {
ucs_debug("failed to allocate memory on device: %s", uct_ib_device_name(&md->dev));
return UCS_ERR_NO_RESOURCE;
}

mr_in.pd = md->pd;
mr_in.length = length;
mr_in.exp_access = UCT_IB_MEM_ACCESS_FLAGS | exp_access;
mr_in.comp_mask = IBV_EXP_REG_MR_DM;
mr_in.dm = memh->dm;

memh->mr = UCS_PROFILE_CALL(ibv_exp_reg_mr, &mr_in);
if (memh->mr == NULL) {
uct_ib_md_print_mem_reg_err_msg(UCS_LOG_LEVEL_DEBUG, mr_in.addr, mr_in.length,
mr_in.exp_access, "exp_");
UCS_PROFILE_CALL(ibv_exp_free_dm, memh->dm);
return UCS_ERR_IO_ERROR;
}

return UCS_OK;
}
#endif

static ucs_status_t uct_ib_mem_alloc(uct_md_h uct_md, size_t *length_p,
void **address_p, unsigned flags,
const char *alloc_name, uct_mem_h *memh_p)
{
#if HAVE_DECL_IBV_EXP_ACCESS_ALLOCATE_MR
uct_ib_md_t *md = ucs_derived_of(uct_md, uct_ib_md_t);
int on_dm = !(*address_p) && (flags & UCT_MD_MEM_FLAG_FIXED);
ucs_status_t status;
uint64_t exp_access;
uct_ib_mem_t *memh;
size_t length;

if (!md->config.enable_contig_pages) {
#if !HAVE_IBV_EXP_DM
if (on_dm) {
return UCS_ERR_UNSUPPORTED;
}
#endif

if (!on_dm && !md->config.enable_contig_pages) {
/* in case if ON_DEVICE flag is not active - then memory
* is allocated using contig_pages and contig pages
* should be enabled */
return UCS_ERR_UNSUPPORTED;
}

Expand All @@ -858,17 +907,35 @@ static ucs_status_t uct_ib_mem_alloc(uct_md_h uct_md, size_t *length_p,

length = *length_p;
exp_access = uct_ib_md_access_flags(md, flags, length) |
IBV_EXP_ACCESS_ALLOCATE_MR;
status = uct_ib_md_reg_mr(md, NULL, length, exp_access, 0, &memh->mr);
if (status != UCS_OK) {
goto err_free_memh;
(!on_dm ? IBV_EXP_ACCESS_ALLOCATE_MR : 0);

#if HAVE_IBV_EXP_DM
if (on_dm) {
status = uct_ib_mem_alloc_dm(memh, md, length, exp_access);
if (status != UCS_OK) {
goto err_free_memh;
}
*address_p = ((uct_mlx5_dm_va_t*)memh->dm)->start_va;
} else
#endif
{
status = uct_ib_md_reg_mr(md, NULL, length, exp_access, 0, &memh->mr);
if (status != UCS_OK) {
goto err_free_memh;
}
*address_p = memh->mr->addr;
}

ucs_trace("allocated memory %p..%p on %s lkey 0x%x rkey 0x%x",
memh->mr->addr, memh->mr->addr + memh->mr->length, uct_ib_device_name(&md->dev),
memh->mr->lkey, memh->mr->rkey);

uct_ib_mem_init(memh, flags, exp_access);

if (on_dm) {
memh->flags |= UCT_IB_MEM_FLAG_DM;
}

uct_ib_mem_set_numa_policy(md, memh);

if (md->config.odp.prefetch) {
Expand All @@ -878,7 +945,6 @@ static ucs_status_t uct_ib_mem_alloc(uct_md_h uct_md, size_t *length_p,
UCS_STATS_UPDATE_COUNTER(md->stats, UCT_IB_MD_STAT_MEM_ALLOC, +1);
ucs_memtrack_allocated(memh->mr->addr, memh->mr->length UCS_MEMTRACK_VAL);

*address_p = memh->mr->addr;
*length_p = memh->mr->length;
*memh_p = memh;
return UCS_OK;
Expand Down Expand Up @@ -915,6 +981,14 @@ static ucs_status_t uct_ib_mem_free(uct_md_h uct_md, uct_mem_h memh)
return status;
}

if (ib_memh->flags & UCT_IB_MEM_FLAG_DM) {
#if HAVE_IBV_EXP_DM
UCS_PROFILE_CALL(ibv_exp_free_dm, ib_memh->dm);
#else
ucs_assert_always(0);
#endif
}

uct_ib_memh_free(ib_memh);
return UCS_OK;
}
Expand Down
Loading