From c68dd9ce5cc3a6135d8e5ccb4ead61a340c681b1 Mon Sep 17 00:00:00 2001 From: Jeff Inman Date: Tue, 14 Jun 2022 12:36:35 -0600 Subject: [PATCH 1/3] Distinguish per-block vs MD tasks, in PostInititalizationCommunications(). The driver was apparently expecting pmesh->mesh_data.GetOrAdd() would find a distinct stage for each block, but there is only one stage at this point, having the width of Mesh::DefaultPackSize, which matches the number of blocks. Therefore, we split the inits into three TaskRegions, first for the blocks to begin receiving, a second to do MD-wide boundary-buffer exchanges, and a third for the blocks to manage local boundaries. --- src/phoebus_driver.cpp | 55 ++++++++++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 18 deletions(-) diff --git a/src/phoebus_driver.cpp b/src/phoebus_driver.cpp index c26d2ad65..6af6e794a 100644 --- a/src/phoebus_driver.cpp +++ b/src/phoebus_driver.cpp @@ -108,36 +108,55 @@ void PhoebusDriver::PostInitializationCommunication() { rad_mocmc_active = (rad->Param("method") == "mocmc"); } - TaskRegion &async_region = tc.AddRegion(blocks.size()); + TaskRegion &async_region = tc.AddRegion(3); + + // leading per-block tasks + auto &tl0 = async_region[0]; for (int ib = 0; ib < blocks.size(); ib++) { auto pmb = blocks[ib].get(); - auto &tl = async_region[ib]; auto &sc = pmb->meshblock_data.Get(); - auto &md = - pmesh->mesh_data.GetOrAdd(stage_name[0], ib); // TODO(BRR) This gives an empty md - auto start_recv = tl.AddTask(none, &MeshBlockData::StartReceiving, sc.get(), + auto start_recv = tl0.AddTask(none, &MeshBlockData::StartReceiving, sc.get(), BoundaryCommSubset::all); - auto send = - tl.AddTask(start_recv, parthenon::cell_centered_bvars::SendBoundaryBuffers, md); - auto recv = - tl.AddTask(send, parthenon::cell_centered_bvars::ReceiveBoundaryBuffers, md); - auto fill_from_bufs = - tl.AddTask(recv, parthenon::cell_centered_bvars::SetBoundaries, md); + } + + // tasks that span + auto &tl1 = async_region[1]; + auto &md = pmesh->mesh_data.GetOrAdd(stage_name[0], 0); + + auto send = + tl1.AddTask(none, parthenon::cell_centered_bvars::SendBoundaryBuffers, md); + + auto recv = + tl1.AddTask(send, parthenon::cell_centered_bvars::ReceiveBoundaryBuffers, md); + + auto fill_from_bufs = + tl1.AddTask(recv, parthenon::cell_centered_bvars::SetBoundaries, md); + + // tailing per-block-tasks + auto &tl2 = async_region[2]; + for (int ib = 0; ib < blocks.size(); ib++) { + auto pmb = blocks[ib].get(); + auto &sc = pmb->meshblock_data.Get(); + auto clear_comm_flags = - tl.AddTask(fill_from_bufs, &MeshBlockData::ClearBoundary, sc.get(), - BoundaryCommSubset::all); + tl2.AddTask(none, &MeshBlockData::ClearBoundary, + sc.get(), + BoundaryCommSubset::all); - auto prolongBound = tl.AddTask(clear_comm_flags, parthenon::ProlongateBoundaries, sc); + auto prolongBound = + tl2.AddTask(clear_comm_flags, parthenon::ProlongateBoundaries, sc); - auto set_bc = tl.AddTask(prolongBound, parthenon::ApplyBoundaryConditions, sc); + auto set_bc = + tl2.AddTask(prolongBound, parthenon::ApplyBoundaryConditions, sc); - auto convert_bc = tl.AddTask(set_bc, Boundaries::ConvertBoundaryConditions, sc); + auto convert_bc = + tl2.AddTask(set_bc, Boundaries::ConvertBoundaryConditions, sc); // Radiation should actually be included in ConvertBoundaryConditions // using MDT = std::remove_pointer::type; - // auto momentp2c = tl.AddTask(convert_bc, radiation::MomentPrim2Con, sc.get(), - // IndexDomain::entire); + // auto momentp2c = tl2.AddTask(convert_bc, radiation::MomentPrim2Con, sc.get(), + // IndexDomain::entire); } tc.Execute(); From 096be76997a2ac9c089ab00505dd39b0e7929632 Mon Sep 17 00:00:00 2001 From: Jeff Inman Date: Tue, 14 Jun 2022 15:34:52 -0600 Subject: [PATCH 2/3] Formatted with clang 12.0.1 --- src/phoebus_driver.cpp | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/src/phoebus_driver.cpp b/src/phoebus_driver.cpp index 6af6e794a..df8a8a012 100644 --- a/src/phoebus_driver.cpp +++ b/src/phoebus_driver.cpp @@ -117,21 +117,20 @@ void PhoebusDriver::PostInitializationCommunication() { auto &sc = pmb->meshblock_data.Get(); auto start_recv = tl0.AddTask(none, &MeshBlockData::StartReceiving, sc.get(), - BoundaryCommSubset::all); + BoundaryCommSubset::all); } // tasks that span auto &tl1 = async_region[1]; auto &md = pmesh->mesh_data.GetOrAdd(stage_name[0], 0); - auto send = - tl1.AddTask(none, parthenon::cell_centered_bvars::SendBoundaryBuffers, md); + auto send = tl1.AddTask(none, parthenon::cell_centered_bvars::SendBoundaryBuffers, md); auto recv = - tl1.AddTask(send, parthenon::cell_centered_bvars::ReceiveBoundaryBuffers, md); + tl1.AddTask(send, parthenon::cell_centered_bvars::ReceiveBoundaryBuffers, md); auto fill_from_bufs = - tl1.AddTask(recv, parthenon::cell_centered_bvars::SetBoundaries, md); + tl1.AddTask(recv, parthenon::cell_centered_bvars::SetBoundaries, md); // tailing per-block-tasks auto &tl2 = async_region[2]; @@ -139,19 +138,15 @@ void PhoebusDriver::PostInitializationCommunication() { auto pmb = blocks[ib].get(); auto &sc = pmb->meshblock_data.Get(); - auto clear_comm_flags = - tl2.AddTask(none, &MeshBlockData::ClearBoundary, - sc.get(), - BoundaryCommSubset::all); + auto clear_comm_flags = tl2.AddTask(none, &MeshBlockData::ClearBoundary, + sc.get(), BoundaryCommSubset::all); auto prolongBound = - tl2.AddTask(clear_comm_flags, parthenon::ProlongateBoundaries, sc); + tl2.AddTask(clear_comm_flags, parthenon::ProlongateBoundaries, sc); - auto set_bc = - tl2.AddTask(prolongBound, parthenon::ApplyBoundaryConditions, sc); + auto set_bc = tl2.AddTask(prolongBound, parthenon::ApplyBoundaryConditions, sc); - auto convert_bc = - tl2.AddTask(set_bc, Boundaries::ConvertBoundaryConditions, sc); + auto convert_bc = tl2.AddTask(set_bc, Boundaries::ConvertBoundaryConditions, sc); // Radiation should actually be included in ConvertBoundaryConditions // using MDT = std::remove_pointer::type; From d2bd33b347f45473c060778e8211c7e6d048688e Mon Sep 17 00:00:00 2001 From: Jeff Inman Date: Wed, 15 Jun 2022 13:30:20 -0600 Subject: [PATCH 3/3] Separated the 3 init phases into 3 Regions, in PhoebusDriver::PIC() TBD: Why is there a deadlock, for the 3D blast-wave input? (Not in the patched method, and only on certain hardware.) --- src/phoebus_driver.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/phoebus_driver.cpp b/src/phoebus_driver.cpp index df8a8a012..105afbfb4 100644 --- a/src/phoebus_driver.cpp +++ b/src/phoebus_driver.cpp @@ -108,10 +108,9 @@ void PhoebusDriver::PostInitializationCommunication() { rad_mocmc_active = (rad->Param("method") == "mocmc"); } - TaskRegion &async_region = tc.AddRegion(3); - // leading per-block tasks - auto &tl0 = async_region[0]; + TaskRegion &async_region0 = tc.AddRegion(1); + auto &tl0 = async_region0[0]; for (int ib = 0; ib < blocks.size(); ib++) { auto pmb = blocks[ib].get(); auto &sc = pmb->meshblock_data.Get(); @@ -121,7 +120,8 @@ void PhoebusDriver::PostInitializationCommunication() { } // tasks that span - auto &tl1 = async_region[1]; + TaskRegion &async_region1 = tc.AddRegion(1); + auto &tl1 = async_region1[0]; auto &md = pmesh->mesh_data.GetOrAdd(stage_name[0], 0); auto send = tl1.AddTask(none, parthenon::cell_centered_bvars::SendBoundaryBuffers, md); @@ -133,7 +133,8 @@ void PhoebusDriver::PostInitializationCommunication() { tl1.AddTask(recv, parthenon::cell_centered_bvars::SetBoundaries, md); // tailing per-block-tasks - auto &tl2 = async_region[2]; + TaskRegion &async_region2 = tc.AddRegion(1); + auto &tl2 = async_region2[0]; for (int ib = 0; ib < blocks.size(); ib++) { auto pmb = blocks[ib].get(); auto &sc = pmb->meshblock_data.Get();