diff --git a/mpp/shmemx_c_func.h4 b/mpp/shmemx_c_func.h4 index 948146156..fb3e3f720 100644 --- a/mpp/shmemx_c_func.h4 +++ b/mpp/shmemx_c_func.h4 @@ -105,3 +105,7 @@ SHMEM_FUNCTION_ATTRIBUTES void SHPRE()shmemx_heap_create(void *base, size_t size SHMEM_FUNCTION_ATTRIBUTES void SHPRE()shmemx_heap_preinit(void); SHMEM_FUNCTION_ATTRIBUTES int SHPRE()shmemx_heap_preinit_thread(int requested, int *provided); SHMEM_FUNCTION_ATTRIBUTES void SHPRE()shmemx_heap_postinit(void); + +/* Memory Ordering Routines */ +SHMEM_FUNCTION_ATTRIBUTES void shmem_pe_quiet(const int *target_pes, int npes); +SHMEM_FUNCTION_ATTRIBUTES void shmem_ctx_pe_quiet(shmem_ctx_t ctx, const int *target_pes, int npes); diff --git a/src/shmem_synchronization.h b/src/shmem_synchronization.h index 0270d6d7b..e93a6226f 100644 --- a/src/shmem_synchronization.h +++ b/src/shmem_synchronization.h @@ -40,6 +40,25 @@ shmem_internal_quiet(shmem_ctx_t ctx) shmem_transport_syncmem(); } +static inline void +shmem_internal_pe_quiet(shmem_ctx_t ctx, const int *target_pes, int npes) +{ + int ret; + + if (ctx == SHMEM_CTX_INVALID) + return; + + ret = shmem_transport_pe_quiet((shmem_transport_ctx_t *)ctx, target_pes, npes); + if (0 != ret) { RAISE_ERROR(ret); } + + shmem_internal_membar(); + + /* Transport level memory flush is required to make memory + * changes (i.e. subsequent coherent load operations + * performed via the shmem_ptr API, the result of atomics + * that targeted the local process) visible */ + shmem_transport_syncmem(); +} static inline void shmem_internal_fence(shmem_ctx_t ctx) diff --git a/src/synchronization_f.c b/src/synchronization_f.c index 74e9c5c4d..30f48ec6b 100644 --- a/src/synchronization_f.c +++ b/src/synchronization_f.c @@ -37,6 +37,28 @@ FC_SHMEM_QUIET(void) shmem_internal_quiet(SHMEM_CTX_DEFAULT); } +#define FC_SHMEM_PE_QUIET FC_FUNC_(shmem_pe_quiet, SHMEM_PE_QUIET) +void SHMEM_FUNCTION_ATTRIBUTES +FC_SHMEM_PE_QUIET(const int *target_pes, int npes); +void +FC_SHMEM_PE_QUIET(const int *target_pes, int npes) +{ + SHMEM_ERR_CHECK_INITIALIZED(); + + shmem_internal_pe_quiet(SHMEM_CTX_DEFAULT, target_pes, npes); +} + +#define FC_SHMEM_CTX_QUIET FC_FUNC_(shmem_ctx_pe_quiet, SHMEM_CTX_PE_QUIET) +void SHMEM_FUNCTION_ATTRIBUTES +FC_SHMEM_CTX_PE_QUIET(shmem_ctx_t ctx, const int *target_pes, int npes); +void +FC_SHMEM_CTX_PE_QUIET(shmem_ctx_t ctx, const int *target_pes, int npes) +{ + SHMEM_ERR_CHECK_INITIALIZED(); + + shmem_internal_pe_quiet(ctx, target_pes, npes); +} + #define FC_SHMEM_FENCE FC_FUNC_(shmem_fence, SHMEM_FENCE) void SHMEM_FUNCTION_ATTRIBUTES diff --git a/src/transport_none.h b/src/transport_none.h index 9a10ea52c..8da3c6d98 100644 --- a/src/transport_none.h +++ b/src/transport_none.h @@ -103,6 +103,13 @@ shmem_transport_quiet(shmem_transport_ctx_t* ctx) return 0; } +static inline +int +shmem_transport_pe_quiet(shmem_transport_ctx_t* ctx, const int *target_pes, int npes) +{ + return 0; +} + static inline int shmem_transport_fence(shmem_transport_ctx_t* ctx) diff --git a/src/transport_ofi.c b/src/transport_ofi.c index f8f04d65e..46fed08e2 100644 --- a/src/transport_ofi.c +++ b/src/transport_ofi.c @@ -2050,6 +2050,7 @@ int shmem_transport_ctx_create(struct shmem_internal_team_t *team, long options, #ifndef USE_CTX_LOCK shmem_internal_cntr_write(&ctxp->pending_put_cntr, 0); shmem_internal_cntr_write(&ctxp->pending_get_cntr, 0); + #endif ctxp->stx_idx = -1; diff --git a/src/transport_ofi.h b/src/transport_ofi.h index f7e2ebf32..b4909697d 100644 --- a/src/transport_ofi.h +++ b/src/transport_ofi.h @@ -548,6 +548,15 @@ int shmem_transport_quiet(shmem_transport_ctx_t* ctx) return 0; } +static inline +int shmem_transport_pe_quiet(shmem_transport_ctx_t* ctx, const int *target_pes, int npes) +{ + + shmem_transport_quiet(ctx); + + return 0; +} + static inline int shmem_transport_fence(shmem_transport_ctx_t* ctx) @@ -627,7 +636,7 @@ void shmem_transport_put_scalar(shmem_transport_ctx_t* ctx, void *target, const SHMEM_TRANSPORT_OFI_CTX_LOCK(ctx); SHMEM_TRANSPORT_OFI_CNTR_INC(&ctx->pending_put_cntr); - + do { ret = fi_inject_write(ctx->ep, @@ -1002,7 +1011,6 @@ void shmem_transport_get_wait(shmem_transport_ctx_t* ctx) SHMEM_TRANSPORT_OFI_CTX_UNLOCK(ctx); } - static inline void shmem_transport_cswap_nbi(shmem_transport_ctx_t* ctx, void *target, const void *source, void *dest, const void *operand, diff --git a/src/transport_portals4.h b/src/transport_portals4.h index ab99becaa..bd3119d5d 100644 --- a/src/transport_portals4.h +++ b/src/transport_portals4.h @@ -292,6 +292,13 @@ shmem_transport_quiet(shmem_transport_ctx_t* ctx) return 0; } +static inline +int +shmem_transport_pe_quiet(shmem_transport_ctx_t* ctx, const int *target_pes, int npes) +{ + shmem_transport_quiet(ctx); +} + static inline int diff --git a/src/transport_ucx.h b/src/transport_ucx.h index 2e12d22b2..cf081d7c0 100644 --- a/src/transport_ucx.h +++ b/src/transport_ucx.h @@ -210,6 +210,22 @@ shmem_transport_quiet(shmem_transport_ctx_t* ctx) return 0; } +static inline +int +shmem_transport_pe_quiet(shmem_transport_ctx_t* ctx, const int *target_pes, int npes) +{ + ucs_status_t status; + + for (int i = 0; i < npes; i++) { + int pe = target_pes[i]; + + status = ucp_ep_flush(shmem_transport_peers[pe].ep); + UCX_CHECK_STATUS(status); + } + + return 0; +} + static inline int shmem_transport_fence(shmem_transport_ctx_t* ctx)