From 79365c070c91a83a5564507a236e3b76b721d766 Mon Sep 17 00:00:00 2001 From: Dan Bonachea Date: Tue, 27 Jan 2026 20:08:00 -0800 Subject: [PATCH 1/9] issue #87: Ensure optional stat gets zeroed in PRIF calls that cannot generate non-fatal errors --- src/caffeine/allocation_s.F90 | 1 + src/caffeine/coarray_access_s.F90 | 2 ++ src/caffeine/coarray_queries_s.F90 | 8 +++++++- src/caffeine/critical_s.F90 | 2 ++ src/caffeine/locks_s.F90 | 8 ++++++++ src/caffeine/sync_stmt_s.F90 | 1 + src/caffeine/teams_s.F90 | 6 ++++++ 7 files changed, 27 insertions(+), 1 deletion(-) diff --git a/src/caffeine/allocation_s.F90 b/src/caffeine/allocation_s.F90 index 3def11a38..151b9772b 100644 --- a/src/caffeine/allocation_s.F90 +++ b/src/caffeine/allocation_s.F90 @@ -164,6 +164,7 @@ subroutine coarray_cleanup_i(handle, stat, errmsg) bind(C) module procedure prif_deallocate call caf_deallocate(non_symmetric_heap_mspace, mem) + if (present(stat)) stat = 0 end procedure subroutine add_to_team_list(coarray_handle) diff --git a/src/caffeine/coarray_access_s.F90 b/src/caffeine/coarray_access_s.F90 index a9631bec2..368d3ffac 100644 --- a/src/caffeine/coarray_access_s.F90 +++ b/src/caffeine/coarray_access_s.F90 @@ -125,6 +125,8 @@ dest = current_image_buffer, & src = remote_ptr, & size = size_in_bytes) + + if (present(stat)) stat = 0 end procedure ! _______________________ Strided Get RMA ____________________________ diff --git a/src/caffeine/coarray_queries_s.F90 b/src/caffeine/coarray_queries_s.F90 index 73b4071e0..09db1ccb3 100644 --- a/src/caffeine/coarray_queries_s.F90 +++ b/src/caffeine/coarray_queries_s.F90 @@ -139,10 +139,14 @@ subroutine initial_index_helper(coarray_handle, sub, team, initial_team_index) module procedure prif_initial_team_index call initial_index_helper(coarray_handle, sub, current_team, initial_team_index) + + if (present(stat)) stat = 0 end procedure module procedure prif_initial_team_index_with_team call initial_index_helper(coarray_handle, sub, team, initial_team_index) + + if (present(stat)) stat = 0 end procedure module procedure prif_initial_team_index_with_team_number @@ -152,7 +156,9 @@ subroutine initial_index_helper(coarray_handle, sub, team, initial_team_index) call initial_index_helper(coarray_handle, sub, current_team, initial_team_index) else call unimplemented("prif_initial_team_index_with_team_number: no support for sibling teams") - end if + end if + + if (present(stat)) stat = 0 end procedure !--------------------------------------------------------------------- diff --git a/src/caffeine/critical_s.F90 b/src/caffeine/critical_s.F90 index 547216ffb..d1c9d48b2 100644 --- a/src/caffeine/critical_s.F90 +++ b/src/caffeine/critical_s.F90 @@ -9,6 +9,8 @@ module procedure prif_critical call unimplemented("prif_critical") + + if (present(stat)) stat = 0 end procedure module procedure prif_end_critical diff --git a/src/caffeine/locks_s.F90 b/src/caffeine/locks_s.F90 index 0bb78ec4e..f4ef51239 100644 --- a/src/caffeine/locks_s.F90 +++ b/src/caffeine/locks_s.F90 @@ -9,18 +9,26 @@ module procedure prif_lock call unimplemented("prif_lock") + + if (present(stat)) stat = 0 end procedure module procedure prif_lock_indirect call unimplemented("prif_lock_indirect") + + if (present(stat)) stat = 0 end procedure module procedure prif_unlock call unimplemented("prif_unlock") + + if (present(stat)) stat = 0 end procedure module procedure prif_unlock_indirect call unimplemented("prif_unlock_indirect") + + if (present(stat)) stat = 0 end procedure end submodule locks_s diff --git a/src/caffeine/sync_stmt_s.F90 b/src/caffeine/sync_stmt_s.F90 index b8d7218c3..3186f4a40 100644 --- a/src/caffeine/sync_stmt_s.F90 +++ b/src/caffeine/sync_stmt_s.F90 @@ -113,6 +113,7 @@ acquire_fence=merge(1,0,i==u)) end do + if (present(stat)) stat = 0 end procedure end submodule diff --git a/src/caffeine/teams_s.F90 b/src/caffeine/teams_s.F90 index 2fbafd274..bbd91fa10 100644 --- a/src/caffeine/teams_s.F90 +++ b/src/caffeine/teams_s.F90 @@ -23,6 +23,8 @@ call caf_establish_child_heap end if call prif_sync_all ! child team sync required by F23 11.1.5.2 + + if (present(stat)) stat = 0 end procedure module procedure prif_end_team @@ -61,6 +63,8 @@ ! set the current team back to the parent team current_team%info => current_team%info%parent_team + + if (present(stat)) stat = 0 end procedure module procedure prif_form_team @@ -92,6 +96,8 @@ team%info%this_image = caf_this_image(team%info%gex_team) team%info%num_images = caf_num_images(team%info%gex_team) end block + + if (present(stat)) stat = 0 end procedure module procedure prif_get_team From 3d0f61f6e860ae1ef6c47fd205192330911c2afc Mon Sep 17 00:00:00 2001 From: Dan Bonachea Date: Tue, 27 Jan 2026 21:15:23 -0800 Subject: [PATCH 2/9] prif_deallocate_coarrays: Factor stat/errmsg reporting --- src/caffeine/allocation_s.F90 | 29 ++++++----------------------- src/caffeine/prif_private_s.F90 | 21 +++++++++++++++++++++ src/prif.F90 | 3 +++ 3 files changed, 30 insertions(+), 23 deletions(-) diff --git a/src/caffeine/allocation_s.F90 b/src/caffeine/allocation_s.F90 index 151b9772b..3c21c2e0c 100644 --- a/src/caffeine/allocation_s.F90 +++ b/src/caffeine/allocation_s.F90 @@ -82,7 +82,6 @@ module procedure prif_deallocate_coarrays #endif integer :: i, num_handles - character(len=*), parameter :: unallocated_message = "Attempted to deallocate unallocated coarray" type(prif_coarray_handle), target :: coarray_handle # if HAVE_FINAL_FUNC_SUPPORT abstract interface @@ -102,17 +101,9 @@ subroutine coarray_cleanup_i(handle, stat, errmsg) bind(C) call prif_sync_all ! Need to ensure we don't deallocate anything till everyone gets here num_handles = size(coarray_handles) if (.not. all([(associated(coarray_handles(i)%info), i = 1, num_handles)])) then - if (present(stat)) then - stat = 1 ! TODO: decide what our stat codes should be - if (present(errmsg)) then - errmsg = unallocated_message - else if (present(errmsg_alloc)) then - errmsg_alloc = unallocated_message - end if - return - else - call prif_error_stop(.false._c_bool, stop_code_char=unallocated_message) - end if + call report_error(CAF_STAT_INVALID_ARGUMENT, "Attempted to deallocate unallocated coarray", & + stat, errmsg, errmsg_alloc) + return end if call_assert(all(coarray_handle_check(coarray_handles))) @@ -129,17 +120,9 @@ subroutine coarray_cleanup_i(handle, stat, errmsg) bind(C) if (.not. allocated(local_errmsg)) then ! provide a default errmsg local_errmsg = "coarray_cleanup finalization callback failed" end if - if (present(stat)) then - stat = local_stat - if (present(errmsg)) then - errmsg = local_errmsg - else if (present(errmsg_alloc)) then - call move_alloc(local_errmsg, errmsg_alloc) - end if - return ! NOTE: We no longer have guarantees that coarrays are in consistent state - else - call prif_error_stop(.false._c_bool, stop_code_char=local_errmsg) - end if + call report_error(local_stat, local_errmsg, & + stat, errmsg, errmsg_alloc) + return ! NOTE: We no longer have guarantees that coarrays are in consistent state end if # else ! TODO: issue a warning that we are ignoring the final_func? diff --git a/src/caffeine/prif_private_s.F90 b/src/caffeine/prif_private_s.F90 index 77c275ea7..5a4836189 100644 --- a/src/caffeine/prif_private_s.F90 +++ b/src/caffeine/prif_private_s.F90 @@ -381,6 +381,27 @@ pure function optional_value(var) result(c_val) end if end function + ! Report the provided error stat/msg using the provided optional stat/errmsg args + subroutine report_error(report_stat, report_msg, stat, errmsg, errmsg_alloc) + integer(c_int), intent(in) :: report_stat + character(len=*), intent(in) :: report_msg + integer(c_int), intent(out), optional :: stat + character(len=*), intent(inout), optional :: errmsg + character(len=:), intent(inout), allocatable , optional :: errmsg_alloc + + call_assert(report_stat /= 0) + if (.not. present(stat)) then + call prif_error_stop(.false._c_bool, stop_code_char=report_msg) + else + stat = report_stat + if (present(errmsg)) then + errmsg = report_msg + else if (present(errmsg_alloc)) then + errmsg_alloc = report_msg + end if + end if + end subroutine + ! verify state invariants for a coarray_handle ! Note this function validates invariants with deliberately UNconditional assertions ! Suggested caller usage for conditional validation is: diff --git a/src/prif.F90 b/src/prif.F90 index 72d9f9447..dfc063a46 100644 --- a/src/prif.F90 +++ b/src/prif.F90 @@ -119,6 +119,9 @@ module prif PRIF_STAT_OUT_OF_MEMORY = 301, & PRIF_STAT_ALREADY_INIT = 302 + integer(c_int), parameter, private :: & + CAF_STAT_INVALID_ARGUMENT = 404 + type, public :: prif_event_type private integer(PRIF_ATOMIC_INT_KIND) :: counter = 0 From 549a3195b87721bc44b8bec07bcbda59bb6a36d9 Mon Sep 17 00:00:00 2001 From: Dan Bonachea Date: Wed, 28 Jan 2026 09:55:10 -0800 Subject: [PATCH 3/9] caf_caffeinate: Improvements to segment size handling * Enforce segment size invariants The symmetric and non-symmetric heaps must each be sized on page boundaries. Previously some tiny or misaligned values of CAF_HEAP_SIZE would cause startup time crashes inside caf_caffeinate or dlmalloc. * Expose more shared heap information up into Fortran * Convert heap size manipulation from size_t to (u)intptr_t --- src/caffeine/caffeine.c | 29 ++++++++++++++++++++++------- src/caffeine/prif_private_s.F90 | 7 ++++--- src/caffeine/program_startup_s.F90 | 2 ++ 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/src/caffeine/caffeine.c b/src/caffeine/caffeine.c index 43ea6b7fb..4a56acf87 100644 --- a/src/caffeine/caffeine.c +++ b/src/caffeine/caffeine.c @@ -79,6 +79,7 @@ int caf_image_from_initial(gex_TM_t tm, int image_num) { // --------------------------------------------------- // NOTE: gex_TM_T is a typedef to a C pointer, so the `gex_TM_t* initial_team` arg in the C signature matches the BIND(C) interface of an `intent(out)` arg of type `c_ptr` for the same argument void caf_caffeinate( + intptr_t* total_heap_size, mspace* symmetric_heap, intptr_t* symmetric_heap_start, intptr_t* symmetric_heap_size, @@ -90,41 +91,55 @@ void caf_caffeinate( numprocs = gex_TM_QuerySize(myworldteam); *initial_team = myworldteam; + #define PAGE_ALIGNUP(sz) ((sz + GASNET_PAGESIZE - 1) & ~(GASNET_PAGESIZE-1)) + // query largest possible segment GASNet can give us of the same size across all processes: - size_t max_seg = gasnet_getMaxGlobalSegmentSize(); + uintptr_t max_seg = gasnet_getMaxGlobalSegmentSize(); // impose a reasonable default size #ifndef CAF_DEFAULT_HEAP_SIZE #define CAF_DEFAULT_HEAP_SIZE (128*1024*1024) // 128 MiB #endif - size_t default_seg = MIN(max_seg, CAF_DEFAULT_HEAP_SIZE); + uintptr_t default_seg = MIN(max_seg, CAF_DEFAULT_HEAP_SIZE); // retrieve user preference, defaulting to the above and units of MiB - size_t segsz = gasnett_getenv_int_withdefault("CAF_HEAP_SIZE", + uintptr_t segsz = gasnett_getenv_int_withdefault("CAF_HEAP_SIZE", default_seg, 1024*1024); + // ensure at least two full pages + segsz = MAX(segsz,2*GASNET_PAGESIZE); + // round-up to closest page size + segsz = PAGE_ALIGNUP(segsz); // cap user request to the largest available: // TODO: issue a console warning here instead of silently capping segsz = MIN(segsz,max_seg); + assert(segsz % GASNET_PAGESIZE == 0); GASNET_SAFE(gex_Segment_Attach(&mysegment, myworldteam, segsz)); *symmetric_heap_start = (intptr_t)gex_Segment_QueryAddr(mysegment); - size_t total_heap_size = gex_Segment_QuerySize(mysegment); + *total_heap_size = gex_Segment_QuerySize(mysegment); + assert(*total_heap_size >= 2*GASNET_PAGESIZE); #ifndef CAF_DEFAULT_COMP_FRAC #define CAF_DEFAULT_COMP_FRAC 0.1f // 10% #endif float default_comp_frac = MAX(MIN(0.99f, CAF_DEFAULT_COMP_FRAC), 0.01f); float non_symmetric_fraction = gasnett_getenv_dbl_withdefault("CAF_COMP_FRAC", default_comp_frac); - assert(non_symmetric_fraction > 0 && non_symmetric_fraction < 1); // TODO: real error reporting + if (non_symmetric_fraction <= 0 || non_symmetric_fraction >= 1) { + gasnett_fatalerror_nopos("If used, environment variable 'CAF_COMP_FRAC' must be a valid floating point value or fraction between 0 and 1."); + } - size_t non_symmetric_heap_size = total_heap_size * non_symmetric_fraction; - *symmetric_heap_size = total_heap_size - non_symmetric_heap_size; + uintptr_t non_symmetric_heap_size = *total_heap_size * non_symmetric_fraction; + non_symmetric_heap_size = PAGE_ALIGNUP(non_symmetric_heap_size); + *symmetric_heap_size = *total_heap_size - non_symmetric_heap_size; + assert(*symmetric_heap_size % GASNET_PAGESIZE == 0); intptr_t non_symmetric_heap_start = *symmetric_heap_start + *symmetric_heap_size; if (myproc == 0) { *symmetric_heap = create_mspace_with_base((void*)*symmetric_heap_start, *symmetric_heap_size, 0); + assert(*symmetric_heap); mspace_set_footprint_limit(*symmetric_heap, *symmetric_heap_size); } *non_symmetric_heap = create_mspace_with_base((void*)non_symmetric_heap_start, non_symmetric_heap_size, 0); + assert(*non_symmetric_heap); mspace_set_footprint_limit(*non_symmetric_heap, non_symmetric_heap_size); // init various subsystems: diff --git a/src/caffeine/prif_private_s.F90 b/src/caffeine/prif_private_s.F90 index 5a4836189..c12813426 100644 --- a/src/caffeine/prif_private_s.F90 +++ b/src/caffeine/prif_private_s.F90 @@ -26,12 +26,14 @@ type(prif_team_descriptor), target :: initial_team type(prif_team_type) :: current_team type(c_ptr) :: non_symmetric_heap_mspace + integer(c_intptr_t) :: total_heap_size, non_symmetric_heap_size interface ! ________ Program initiation and finalization ___________ subroutine caf_caffeinate( & + total_heap_size, & symmetric_heap, & symmetric_heap_start, & symmetric_heap_size, & @@ -40,9 +42,8 @@ subroutine caf_caffeinate( & bind(C) import c_ptr, c_intptr_t implicit none - type(c_ptr), intent(out) :: symmetric_heap - integer(c_intptr_t), intent(out) :: symmetric_heap_start, symmetric_heap_size - type(c_ptr), intent(out) :: non_symmetric_heap + integer(c_intptr_t), intent(out) :: total_heap_size, symmetric_heap_start, symmetric_heap_size + type(c_ptr), intent(out) :: symmetric_heap, non_symmetric_heap type(c_ptr), intent(out) :: initial_team end subroutine diff --git a/src/caffeine/program_startup_s.F90 b/src/caffeine/program_startup_s.F90 index 5b85e4ac7..08786bc08 100644 --- a/src/caffeine/program_startup_s.F90 +++ b/src/caffeine/program_startup_s.F90 @@ -14,6 +14,7 @@ stat = PRIF_STAT_ALREADY_INIT else call caf_caffeinate( & + total_heap_size, & initial_team%heap_mspace, & initial_team%heap_start, & initial_team%heap_size, & @@ -25,6 +26,7 @@ initial_team%team_number = -1 initial_team%this_image = caf_this_image(initial_team%gex_team) initial_team%num_images = caf_num_images(initial_team%gex_team) + non_symmetric_heap_size = total_heap_size - initial_team%heap_size call sync_init() From d142ea6b7981a3d32421f7968ca800cd74f8df27 Mon Sep 17 00:00:00 2001 From: Dan Bonachea Date: Thu, 29 Jan 2026 14:00:57 -0800 Subject: [PATCH 4/9] program_termination_s: Excise all list-directed output The use of list-directed output in `prif_(error_)stop(stop_code_char)` was leading to processor-defined line break behavior, which is quite poor in practice with flang. --- src/caffeine/program_termination_s.F90 | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/caffeine/program_termination_s.F90 b/src/caffeine/program_termination_s.F90 index 860ad9394..628aee31d 100644 --- a/src/caffeine/program_termination_s.F90 +++ b/src/caffeine/program_termination_s.F90 @@ -52,12 +52,12 @@ subroutine prif_stop_integer(quiet, stop_code) if (present(stop_code)) then if (.not. quiet) then - write(output_unit, *) "STOP ", stop_code + write(output_unit, '(A, I0)') "STOP ", stop_code end if exit_code = stop_code else if (.not. quiet) then - write(output_unit, *) "STOP" + write(output_unit, '(A)') "STOP" end if exit_code = 0_c_int end if @@ -74,7 +74,7 @@ subroutine prif_stop_character(quiet, stop_code) character(len=*), intent(in) :: stop_code if (.not. quiet) then - write(output_unit, *) "STOP '" // stop_code // "'" + write(output_unit, '(A)') "STOP '" // stop_code // "'" end if call flush_all() @@ -103,7 +103,7 @@ subroutine prif_error_stop_character(quiet, stop_code) character(len=*), intent(in) :: stop_code if (.not. quiet) then - write(error_unit, *) "ERROR STOP '" // stop_code // "'" + write(error_unit, '(A)') "ERROR STOP '" // stop_code // "'" end if call flush_all() @@ -124,7 +124,7 @@ subroutine prif_error_stop_integer(quiet, stop_code) exit_code = stop_code else if (.not.quiet) then - write(error_unit,'(a)') "ERROR STOP" + write(error_unit,'(A)') "ERROR STOP" end if exit_code = 1_c_int end if From 8f4a6f254833f5cee1c559cb6c756f83800bd580 Mon Sep 17 00:00:00 2001 From: Dan Bonachea Date: Wed, 28 Jan 2026 13:46:27 -0800 Subject: [PATCH 5/9] issue #128: prif_allocate(_coarray): Add error reporting for shared heap exhaustion --- src/caffeine/allocation_s.F90 | 58 +++++++++++++++++++++++++++++++-- src/caffeine/caffeine.c | 3 -- src/caffeine/prif_private_s.F90 | 49 ++++++++++++++++++++++++++++ 3 files changed, 105 insertions(+), 5 deletions(-) diff --git a/src/caffeine/allocation_s.F90 b/src/caffeine/allocation_s.F90 index 3c21c2e0c..18038bb11 100644 --- a/src/caffeine/allocation_s.F90 +++ b/src/caffeine/allocation_s.F90 @@ -36,7 +36,11 @@ descriptor_size = c_sizeof(unused) total_size = descriptor_size + size_in_bytes whole_block = caf_allocate(current_team%info%heap_mspace, total_size) - block_offset = as_int(whole_block) - current_team%info%heap_start + if (.not. c_associated(whole_block)) then + block_offset = -1 ! out of memory + else + block_offset = as_int(whole_block) - current_team%info%heap_start + end if else block_offset = 0 end if @@ -44,6 +48,14 @@ ! Use a co_sum to aggregate broadcasing the information from image 1 ! together with the team barrier spec-required by coarray allocation call prif_co_sum(block_offset) + if (block_offset == -1) then ! out of memory - abort allocation attempt + call report_error(PRIF_STAT_OUT_OF_MEMORY, out_of_memory_message(size_in_bytes, .true.), & + stat, errmsg, errmsg_alloc) + if (caf_have_child_teams()) then ! unroll state change above before return + call caf_establish_child_heap + end if + return + end if if (me /= 1) whole_block = as_c_ptr(current_team%info%heap_start + block_offset) call c_f_pointer(whole_block, coarray_handle%info) @@ -70,9 +82,51 @@ end procedure module procedure prif_allocate - allocated_memory = caf_allocate(non_symmetric_heap_mspace, size_in_bytes) + type(c_ptr) :: mem + + mem = caf_allocate(non_symmetric_heap_mspace, size_in_bytes) + if (.not. c_associated(mem)) then + call report_error(PRIF_STAT_OUT_OF_MEMORY, out_of_memory_message(size_in_bytes, .false.), & + stat, errmsg, errmsg_alloc) + else + allocated_memory = mem + end if end procedure + function out_of_memory_message(size_in_bytes, symmetric) result(message) + integer(c_size_t), intent(in) :: size_in_bytes + logical, intent(in) :: symmetric + character(len=:), allocatable :: mem_type + character(len=:), allocatable :: message + + message = "Fortran shared heap is out of memory" + if (symmetric) then + mem_type = "coarray" + else + message = message // " on image " // num_to_str(initial_team%this_image) + mem_type = "non-coarray" + end if + message = message // new_line('') & + // " while allocating " // num_to_str(size_in_bytes, .true.) // " of additional " & + // mem_type // " memory." // new_line('') & + // new_line('') & + // " Shared heap size information:" // new_line('') & + // " Total shared heap: " // pad(num_to_str(total_heap_size, .true.)) & + // " (CAF_HEAP_SIZE)" // new_line('') & + // " Total non-coarray heap: " // pad(num_to_str(non_symmetric_heap_size, .true.)) & + // " (CAF_COMP_FRAC * CAF_HEAP_SIZE)" // new_line('') & + // " Current team coarray heap: " // pad(num_to_str(current_team%info%heap_size, .true.)) // new_line('') & + // new_line('') & + // " Consider setting the CAF_HEAP_SIZE environment variable to request a larger heap." + contains + function pad(str) result(s) + character(len=*), intent(in) :: str + character(len=:), allocatable :: s + s = str + s = repeat(' ',max(0, 10 - len(str))) // s + end function + end function + #if CAF_PRIF_VERSION <= 6 module procedure prif_deallocate_coarray #else diff --git a/src/caffeine/caffeine.c b/src/caffeine/caffeine.c index 4a56acf87..a46936748 100644 --- a/src/caffeine/caffeine.c +++ b/src/caffeine/caffeine.c @@ -175,9 +175,6 @@ void caf_fatal_error( const CFI_cdesc_t* Fstr ) void* caf_allocate(mspace heap, size_t bytes) { void* allocated_space = mspace_memalign(heap, 8, bytes); - if (!allocated_space) // uh-oh, something went wrong.. - gasnett_fatalerror("caf_allocate failed to mspace_memalign(%"PRIuSZ")", - bytes); return allocated_space; } diff --git a/src/caffeine/prif_private_s.F90 b/src/caffeine/prif_private_s.F90 index c12813426..9deed1346 100644 --- a/src/caffeine/prif_private_s.F90 +++ b/src/caffeine/prif_private_s.F90 @@ -15,6 +15,7 @@ c_f_pointer, & c_f_procpointer, & c_funloc, & + c_int32_t, & c_loc, & c_null_funptr, & c_sizeof, & @@ -339,8 +340,56 @@ subroutine caf_form_team(current_team, new_team, team_number, new_index) bind(C) end interface + interface num_to_str + module procedure num_to_str32 + module procedure num_to_str64 + end interface + contains + pure function num_to_str32(num, is_mem_size) result(str) + integer(c_int32_t), value :: num + logical, intent(in), optional :: is_mem_size + character(len=:), allocatable :: str + + str = num_to_str64(int(num, c_int64_t), is_mem_size) + end function + + pure function num_to_str64(num, is_mem_size) result(str) + integer(c_int64_t), value :: num + logical, intent(in), optional :: is_mem_size + character(len=:), allocatable :: str, unit + character(len=40) num_str + integer(c_int64_t) :: divisor + + if (present(is_mem_size)) then + if (is_mem_size) then + divisor = 1 + ! Try to strike a compromise between digits and round off +# define CAF_USE_DIV(div, unit_str) \ + if ((num .ge. 10*div) .or. (num .ge. div .and. mod(num, div) == 0)) then ; \ + divisor = div; unit = unit_str; exit; \ + end if + do + CAF_USE_DIV(ishft(1_c_int64_t,40), " TiB") + CAF_USE_DIV(ishft(1_c_int64_t,30), " GiB") + CAF_USE_DIV(ishft(1_c_int64_t,20), " MiB") + CAF_USE_DIV(ishft(1_c_int64_t,10), " KiB") + CAF_USE_DIV(1_c_int64_t, " B"); + exit + end do + num = num / divisor +# undef CAF_USE_DIV + end if + end if + + write(num_str, '(i0)') num + str = trim(adjustl(num_str)) + if (allocated(unit)) then + str = str // unit + end if + end function + pure function as_int(ptr) type(c_ptr), intent(in) :: ptr integer(c_intptr_t) :: as_int From 870611d7a4077811f3b0b22d2d7062dddef57ea6 Mon Sep 17 00:00:00 2001 From: Dan Bonachea Date: Wed, 28 Jan 2026 15:13:43 -0800 Subject: [PATCH 6/9] test/prif_allocate_test: add coverage for out-of-memory errors --- test/prif_allocate_test.F90 | 49 +++++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/test/prif_allocate_test.F90 b/test/prif_allocate_test.F90 index dcb34ae97..5e3efe723 100644 --- a/test/prif_allocate_test.F90 +++ b/test/prif_allocate_test.F90 @@ -6,10 +6,11 @@ module prif_allocate_test_m use prif, only : & prif_num_images, prif_size_bytes, & prif_set_context_data, prif_get_context_data, prif_local_data_pointer, & - prif_alias_create, prif_alias_destroy, prif_this_image_no_coarray + prif_alias_create, prif_alias_destroy, prif_this_image_no_coarray, & + PRIF_STAT_OUT_OF_MEMORY use julienne_m, only: test_description_t, test_diagnosis_t, test_result_t, test_t, string_t, usher & - ,operator(.all.), operator(.also.), operator(.equalsExpected.), operator(//) + ,operator(.all.), operator(.also.), operator(.equalsExpected.), operator(.isAtLeast.), operator(//) implicit none private @@ -53,6 +54,8 @@ function results() result(test_results) , usher(check_final_func) & # endif ) & + ,test_description_t("reporting out-of-memory errors", & + usher(check_allocation_oom)) & ])) end function @@ -379,4 +382,46 @@ function check_allocate_integer_array_coarray_with_corank2() result(diag) call prif_deallocate_coarray(coarray_handle) end function + + function check_allocation_oom() result(diag) + type(test_diagnosis_t) diag + + integer(c_size_t) :: size_in_bytes + type(c_ptr) :: allocated_memory + integer(c_int) :: stat + character(len=:), allocatable :: errmsg + integer(kind=c_int64_t), dimension(1) :: lcobounds, ucobounds + integer :: num_imgs + type(prif_coarray_handle) :: coarray_handle + + diag = .true. + + size_in_bytes = ishft(500_c_size_t, 40) ! 500TB + + call prif_allocate(size_in_bytes, allocated_memory, stat, errmsg_alloc=errmsg) + ALSO(stat .equalsExpected. PRIF_STAT_OUT_OF_MEMORY) + ALSO(allocated(errmsg)) + if (allocated(errmsg)) then + ALSO(len(errmsg) > 1) + ALSO(index(errmsg, 'out of memory') .isAtLeast. 1) + end if + deallocate(errmsg) + + call prif_num_images(num_images=num_imgs) + lcobounds(1) = 1 + ucobounds(1) = num_imgs + + call prif_allocate_coarray( & + lcobounds, ucobounds, size_in_bytes, c_null_funptr, & + coarray_handle, allocated_memory, stat, errmsg_alloc=errmsg) + ALSO(stat .equalsExpected. PRIF_STAT_OUT_OF_MEMORY) + ALSO(allocated(errmsg)) + if (allocated(errmsg)) then + ALSO(len(errmsg) > 1) + ALSO(index(errmsg, 'out of memory') .isAtLeast. 1) + end if + deallocate(errmsg) + + end function + end module prif_allocate_test_m From 097a42729f6a2bc560dcb08514690e18ffee7c16 Mon Sep 17 00:00:00 2001 From: Dan Bonachea Date: Wed, 28 Jan 2026 15:49:26 -0800 Subject: [PATCH 7/9] Update implementation-status --- docs/implementation-status.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/docs/implementation-status.md b/docs/implementation-status.md index 9550d974e..95273ebb8 100644 --- a/docs/implementation-status.md +++ b/docs/implementation-status.md @@ -31,11 +31,6 @@ the labels in the Caffeine [issue tracker](https://github.com/BerkeleyLab/caffei Caffeine contains definitions for all of the PRIF-relevant constants from ISO_FORTRAN_ENV and for all of the PRIF-specific constants. -## `stat` and `errmsg` support - -Many PRIF procedures have optional arguments `stat`, `errmsg`, and `errmsg_alloc`. These arguments -are accepted, but in some cases, the associated runtime behavior is not fully implemented. - ## Program Startup and Shutdown | Procedure | Status | Notes | From 8352db195b7096c3a0c74e6c080b194ddf775f50 Mon Sep 17 00:00:00 2001 From: Dan Bonachea Date: Wed, 28 Jan 2026 19:55:34 -0800 Subject: [PATCH 8/9] Add support-test/out_of_memory --- .github/workflows/build.yml | 10 ++++- example/support-test/out_of_memory.F90 | 53 ++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 1 deletion(-) create mode 100644 example/support-test/out_of_memory.F90 diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d520270b9..0256b2ba4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -289,7 +289,7 @@ jobs: CAF_IMAGES=$(( CAF_IMAGES / 2 )) ; \ done - - name: Run exit tests + - name: Run exit/failure tests run: | echo CAF_IMAGES=${CAF_IMAGES} set -x @@ -301,6 +301,14 @@ jobs: ./run-fpm.sh run --verbose --example fail_image 2>&1 | tee output ; \ test ${PIPESTATUS[0]} > 0 && grep -q "FAIL IMAGE" output \ ) + ( set +e ; \ + ./run-fpm.sh run --verbose --example out_of_memory 2>&1 | tee output ; \ + test ${PIPESTATUS[0]} > 0 && grep -q "out of memory" output \ + ) + ( set +e ; \ + ./run-fpm.sh run --verbose --example out_of_memory -- --coarray 2>&1 | tee output ; \ + test ${PIPESTATUS[0]} > 0 && grep -q "out of memory" output \ + ) unset GASNET_SPAWN_VERBOSE for ((i=1; i<=4; i++)); do \ (set +e ; \ diff --git a/example/support-test/out_of_memory.F90 b/example/support-test/out_of_memory.F90 new file mode 100644 index 000000000..ef213bd9a --- /dev/null +++ b/example/support-test/out_of_memory.F90 @@ -0,0 +1,53 @@ +program out_of_memory + use iso_c_binding, only: c_bool, c_size_t, c_ptr, c_null_funptr, c_int64_t + use prif + implicit none + + integer :: init_exit_code, me, i + integer(c_size_t) :: size_in_bytes = ishft(500_c_size_t, 40) + type(c_ptr) :: allocated_memory + logical :: coarray = .false. + character(len=256) :: arg + + call prif_init(init_exit_code) + if (init_exit_code /= 0 .and. init_exit_code /= PRIF_STAT_ALREADY_INIT) then + call prif_error_stop(quiet=.false._c_bool, stop_code_char="program startup failed") + end if + call prif_this_image_no_coarray(this_image=me) + + do i = 1, command_argument_count() + call get_command_argument(i, arg) + + if (trim(arg) == "--coarray" .or. trim(arg) == "-c") then + coarray = .true. + else + read(arg, *) size_in_bytes + end if + end do + + if (coarray) then + if (me == 1) print *, "prif_allocate_coarray: ", size_in_bytes, " bytes" + block + integer(c_int64_t), dimension(1) :: lcobounds, ucobounds + integer :: num_imgs + type(prif_coarray_handle) :: coarray_handle + + call prif_num_images(num_images=num_imgs) + lcobounds(1) = 1 + ucobounds(1) = num_imgs + + call prif_allocate_coarray( & + lcobounds, ucobounds, size_in_bytes, c_null_funptr, & + coarray_handle, allocated_memory) + end block + else + if (me == 1) print *, "prif_allocate: ", size_in_bytes, " bytes" + call prif_sync_all() + call prif_allocate(size_in_bytes, allocated_memory) + end if + + + call prif_sync_all() + call prif_error_stop(quiet=.false._c_bool, stop_code_char="test failed") + +end program From a0798e363d71089e4c631b8922f078f3923846fa Mon Sep 17 00:00:00 2001 From: Dan Bonachea Date: Thu, 5 Feb 2026 00:26:58 -0800 Subject: [PATCH 9/9] FIXUP: caf_caffeinate: Improvements to segment size handling Fix a corner-case to ensure a high CAF_COMP_FRAC cannot steal the last page away from the symmetric heap. --- src/caffeine/caffeine.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/caffeine/caffeine.c b/src/caffeine/caffeine.c index a46936748..b487a2adc 100644 --- a/src/caffeine/caffeine.c +++ b/src/caffeine/caffeine.c @@ -130,6 +130,14 @@ void caf_caffeinate( uintptr_t non_symmetric_heap_size = *total_heap_size * non_symmetric_fraction; non_symmetric_heap_size = PAGE_ALIGNUP(non_symmetric_heap_size); *symmetric_heap_size = *total_heap_size - non_symmetric_heap_size; + if (*symmetric_heap_size == 0) { + assert(non_symmetric_heap_size > GASNET_PAGESIZE); + non_symmetric_heap_size -= GASNET_PAGESIZE; + *symmetric_heap_size += GASNET_PAGESIZE; + } + assert(non_symmetric_heap_size > 0); + assert(non_symmetric_heap_size % GASNET_PAGESIZE == 0); + assert(*symmetric_heap_size > 0); assert(*symmetric_heap_size % GASNET_PAGESIZE == 0); intptr_t non_symmetric_heap_start = *symmetric_heap_start + *symmetric_heap_size;