Click here to expand python script
-
+
```python
import netCDF4
import sys
@@ -793,7 +793,7 @@ for v in nc1.variables.keys() :
* Directives-Based Approaches
* https://github.com/mrnorman/miniWeather/wiki/A-Practical-Introduction-to-GPU-Refactoring-in-Fortran-with-Directives-for-Climate
- * https://www.openacc.org
+ * https://www.openacc.org
* https://www.openacc.org/sites/default/files/inline-files/OpenACC%20API%202.6%20Reference%20Guide.pdf
* https://www.openmp.org
* https://www.openmp.org/wp-content/uploads/OpenMP-4.5-1115-CPP-web.pdf
diff --git a/c/CMakeLists.txt b/c/CMakeLists.txt
deleted file mode 100644
index 3d469143..00000000
--- a/c/CMakeLists.txt
+++ /dev/null
@@ -1,159 +0,0 @@
-cmake_minimum_required(VERSION 3.0)
-project(miniWeather CXX)
-
-enable_testing()
-
-include(utils.cmake)
-
-
-############################################################
-## Set Parameters
-############################################################
-if ("${NX}" STREQUAL "")
- SET(NX 100)
-endif()
-if ("${NZ}" STREQUAL "")
- SET(NZ 50)
-endif()
-if ("${SIM_TIME}" STREQUAL "")
- SET(SIM_TIME 1000)
-endif()
-if ("${OUT_FREQ}" STREQUAL "")
- SET(OUT_FREQ 10)
-endif()
-if ("${DATA_SPEC}" STREQUAL "")
- SET(DATA_SPEC DATA_SPEC_THERMAL)
-endif()
-SET(EXE_DEFS "-D_NX=${NX} -D_NZ=${NZ} -D_SIM_TIME=${SIM_TIME} -D_OUT_FREQ=${OUT_FREQ} -D_DATA_SPEC=${DATA_SPEC}")
-SET(TEST_DEFS "-D_NX=100 -D_NZ=50 -D_SIM_TIME=400 -D_OUT_FREQ=400 -D_DATA_SPEC=DATA_SPEC_THERMAL")
-
-
-############################################################
-## Append CXXFLAGS
-############################################################
-SET(CMAKE_CXX_FLAGS "${CXXFLAGS}")
-
-
-############################################################
-## Compile the serial version
-############################################################
-add_executable(serial miniWeather_serial.cpp)
-set_target_properties(serial PROPERTIES COMPILE_FLAGS "${EXE_DEFS}")
-
-add_executable(serial_test miniWeather_serial.cpp)
-set_target_properties(serial_test PROPERTIES COMPILE_FLAGS "${TEST_DEFS}")
-
-if (NOT ("${LDFLAGS}" STREQUAL "") )
- target_link_libraries(serial "${LDFLAGS}")
- target_link_libraries(serial_test "${LDFLAGS}")
-endif()
-if (NOT ("${SERIAL_LINK_FLAGS}" STREQUAL "") )
- target_link_libraries(serial "${SERIAL_LINK_FLAGS}")
- target_link_libraries(serial_test "${SERIAL_LINK_FLAGS}")
-endif()
-
-add_test(NAME SERIAL_TEST COMMAND ./check_output.sh ./serial_test 1e-13 4.5e-5 )
-
-
-############################################################
-## Compile the MPI version
-############################################################
-add_executable(mpi miniWeather_mpi.cpp)
-set_target_properties(mpi PROPERTIES COMPILE_FLAGS "${EXE_DEFS}")
-
-add_executable(mpi_test miniWeather_mpi.cpp)
-set_target_properties(mpi_test PROPERTIES COMPILE_FLAGS "${TEST_DEFS}")
-
-if (NOT ("${LDFLAGS}" STREQUAL "") )
- target_link_libraries(mpi "${LDFLAGS}")
- target_link_libraries(mpi_test "${LDFLAGS}")
-endif()
-if (NOT ("${MPI_LINK_FLAGS}" STREQUAL "") )
- target_link_libraries(mpi "${MPI_LINK_FLAGS}")
- target_link_libraries(mpi_test "${MPI_LINK_FLAGS}")
-endif()
-
-add_test(NAME MPI_TEST COMMAND ./check_output.sh ./mpi_test 1e-13 4.5e-5 )
-
-
-############################################################
-## Compile the MPI + OpenMP version
-############################################################
-if (NOT ("${OPENMP_FLAGS}" STREQUAL "") )
- add_executable(openmp miniWeather_mpi_openmp.cpp)
- set_target_properties(openmp PROPERTIES COMPILE_FLAGS "${EXE_DEFS} ${OPENMP_FLAGS}")
-
- add_executable(openmp_test miniWeather_mpi_openmp.cpp)
- set_target_properties(openmp_test PROPERTIES COMPILE_FLAGS "${TEST_DEFS} ${OPENMP_FLAGS}")
-
- if (NOT ("${LDFLAGS}" STREQUAL "") )
- target_link_libraries(openmp "${LDFLAGS}")
- target_link_libraries(openmp_test "${LDFLAGS}")
- endif()
- if ("${OPENMP_LINK_FLAGS}" STREQUAL "")
- SET(OPENMP_LINK_FLAGS ${OPENMP_FLAGS})
- endif()
- target_link_libraries(openmp "${OPENMP_LINK_FLAGS}")
- target_link_libraries(openmp_test "${OPENMP_LINK_FLAGS}")
-
- add_test(NAME OPENMP_TEST COMMAND ./check_output.sh ./openmp_test 1e-13 4.5e-5 )
-endif()
-
-
-
-############################################################
-## Compile the MPI + OpenACC version
-############################################################
-if (NOT ("${OPENACC_FLAGS}" STREQUAL "") )
- add_executable(openacc miniWeather_mpi_openacc.cpp)
- set_target_properties(openacc PROPERTIES COMPILE_FLAGS "${EXE_DEFS} ${OPENACC_FLAGS}")
-
- add_executable(openacc_test miniWeather_mpi_openacc.cpp)
- set_target_properties(openacc_test PROPERTIES COMPILE_FLAGS "${TEST_DEFS} ${OPENACC_FLAGS}")
-
- if (NOT ("${LDFLAGS}" STREQUAL "") )
- target_link_libraries(openacc "${LDFLAGS}")
- target_link_libraries(openacc_test "${LDFLAGS}")
- endif()
- if ("${OPENACC_LINK_FLAGS}" STREQUAL "")
- SET(OPENACC_LINK_FLAGS ${OPENACC_FLAGS})
- endif()
- target_link_libraries(openacc "${OPENACC_LINK_FLAGS}")
- target_link_libraries(openacc_test "${OPENACC_LINK_FLAGS}")
-
- add_test(NAME OPENACC_TEST COMMAND ./check_output.sh ./openacc_test 1e-13 4.5e-5 )
-endif()
-
-
-
-############################################################
-## Compile the MPI + OpenMP4.5 version
-############################################################
-if (NOT ("${OPENMP45_FLAGS}" STREQUAL "") )
- add_executable(openmp45 miniWeather_mpi_openmp45.cpp)
- set_target_properties(openmp45 PROPERTIES COMPILE_FLAGS "${EXE_DEFS} ${OPENMP45_FLAGS}")
-
- add_executable(openmp45_test miniWeather_mpi_openmp45.cpp)
- set_target_properties(openmp45_test PROPERTIES COMPILE_FLAGS "${TEST_DEFS} ${OPENMP45_FLAGS}")
-
- if (NOT ("${LDFLAGS}" STREQUAL "") )
- target_link_libraries(openmp45 "${LDFLAGS}")
- target_link_libraries(openmp45_test "${LDFLAGS}")
- endif()
- if ("${OPENMP45_LINK_FLAGS}" STREQUAL "")
- SET(OPENMP45_LINK_FLAGS ${OPENMP45_FLAGS})
- endif()
- target_link_libraries(openmp45 "${OPENMP45_LINK_FLAGS}")
- target_link_libraries(openmp45_test "${OPENMP45_LINK_FLAGS}")
-
- # The XL compiler dumps out non-unique filenames that screw up parallel compilation
- # So it must compile the test at a different time than the original executable
- if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "XL")
- add_dependencies(openmp45_test openmp45)
- endif()
-
- add_test(NAME OPENMP45_TEST COMMAND ./check_output.sh ./openmp45_test 1e-13 4.5e-5 )
-endif()
-
-
-
diff --git a/c/miniWeather_mpi.cpp b/c/miniWeather_mpi.cpp
deleted file mode 100644
index 26cf803d..00000000
--- a/c/miniWeather_mpi.cpp
+++ /dev/null
@@ -1,907 +0,0 @@
-
-//////////////////////////////////////////////////////////////////////////////////////////
-// miniWeather
-// Author: Matt Norman , Oak Ridge National Laboratory
-// This code simulates dry, stratified, compressible, non-hydrostatic fluid flows
-// For documentation, please see the attached documentation in the "documentation" folder
-//
-//////////////////////////////////////////////////////////////////////////////////////////
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include "pnetcdf.h"
-#include
-
-constexpr double pi = 3.14159265358979323846264338327; //Pi
-constexpr double grav = 9.8; //Gravitational acceleration (m / s^2)
-constexpr double cp = 1004.; //Specific heat of dry air at constant pressure
-constexpr double cv = 717.; //Specific heat of dry air at constant volume
-constexpr double rd = 287.; //Dry air constant for equation of state (P=rho*rd*T)
-constexpr double p0 = 1.e5; //Standard pressure at the surface in Pascals
-constexpr double C0 = 27.5629410929725921310572974482; //Constant to translate potential temperature into pressure (P=C0*(rho*theta)**gamma)
-constexpr double gamm = 1.40027894002789400278940027894; //gamma=cp/Rd , have to call this gamm because "gamma" is taken (I hate C so much)
-//Define domain and stability-related constants
-constexpr double xlen = 2.e4; //Length of the domain in the x-direction (meters)
-constexpr double zlen = 1.e4; //Length of the domain in the z-direction (meters)
-constexpr double hv_beta = 0.05; //How strong to diffuse the solution: hv_beta \in [0:1]
-constexpr double cfl = 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability)
-constexpr double max_speed = 450; //Assumed maximum wave speed during the simulation (speed of sound + speed of wind) (meter / sec)
-constexpr int hs = 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a full "stencil" of information for reconstruction
-constexpr int sten_size = 4; //Size of the stencil used for interpolation
-
-//Parameters for indexing and flags
-constexpr int NUM_VARS = 4; //Number of fluid state variables
-constexpr int ID_DENS = 0; //index for density ("rho")
-constexpr int ID_UMOM = 1; //index for momentum in the x-direction ("rho * u")
-constexpr int ID_WMOM = 2; //index for momentum in the z-direction ("rho * w")
-constexpr int ID_RHOT = 3; //index for density * potential temperature ("rho * theta")
-constexpr int DIR_X = 1; //Integer constant to express that this operation is in the x-direction
-constexpr int DIR_Z = 2; //Integer constant to express that this operation is in the z-direction
-constexpr int DATA_SPEC_COLLISION = 1;
-constexpr int DATA_SPEC_THERMAL = 2;
-constexpr int DATA_SPEC_GRAVITY_WAVES = 3;
-constexpr int DATA_SPEC_DENSITY_CURRENT = 5;
-constexpr int DATA_SPEC_INJECTION = 6;
-
-constexpr int nqpoints = 3;
-constexpr double qpoints [] = { 0.112701665379258311482073460022E0 , 0.500000000000000000000000000000E0 , 0.887298334620741688517926539980E0 };
-constexpr double qweights[] = { 0.277777777777777777777777777779E0 , 0.444444444444444444444444444444E0 , 0.277777777777777777777777777779E0 };
-
-///////////////////////////////////////////////////////////////////////////////////////
-// BEGIN USER-CONFIGURABLE PARAMETERS
-///////////////////////////////////////////////////////////////////////////////////////
-//The x-direction length is twice as long as the z-direction length
-//So, you'll want to have nx_glob be twice as large as nz_glob
-int constexpr nx_glob = _NX; //Number of total cells in the x-direction
-int constexpr nz_glob = _NZ; //Number of total cells in the z-direction
-double constexpr sim_time = _SIM_TIME; //How many seconds to run the simulation
-double constexpr output_freq = _OUT_FREQ; //How frequently to output data to file (in seconds)
-int constexpr data_spec_int = _DATA_SPEC; //How to initialize the data
-double constexpr dx = xlen / nx_glob; // grid spacing in the x-direction
-double constexpr dz = zlen / nz_glob; // grid spacing in the x-direction
-///////////////////////////////////////////////////////////////////////////////////////
-// END USER-CONFIGURABLE PARAMETERS
-///////////////////////////////////////////////////////////////////////////////////////
-
-///////////////////////////////////////////////////////////////////////////////////////
-// Variables that are initialized but remain static over the course of the simulation
-///////////////////////////////////////////////////////////////////////////////////////
-double dt; //Model time step (seconds)
-int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task
-int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task
-int nranks, myrank; //Number of MPI ranks and my rank id
-int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain
-int mainproc; //Am I the main process (rank == 0)?
-double *hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs)
-double *hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs)
-double *hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1)
-double *hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1)
-double *hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1)
-
-///////////////////////////////////////////////////////////////////////////////////////
-// Variables that are dynamics over the course of the simulation
-///////////////////////////////////////////////////////////////////////////////////////
-double etime; //Elapsed model time
-double output_counter; //Helps determine when it's time to do output
-//Runtime variable arrays
-double *state; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS)
-double *state_tmp; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS)
-double *flux; //Cell interface fluxes. Dimensions: (nx+1,nz+1,NUM_VARS)
-double *tend; //Fluid state tendencies. Dimensions: (nx,nz,NUM_VARS)
-double *sendbuf_l; //Buffer to send data to the left MPI rank
-double *sendbuf_r; //Buffer to send data to the right MPI rank
-double *recvbuf_l; //Buffer to receive data from the left MPI rank
-double *recvbuf_r; //Buffer to receive data from the right MPI rank
-int num_out = 0; //The number of outputs performed so far
-int direction_switch = 1;
-double mass0, te0; //Initial domain totals for mass and total energy
-double mass , te ; //Domain totals for mass and total energy
-
-//How is this not in the standard?!
-double dmin( double a , double b ) { if (a= 0) output(state,etime);
-
- ////////////////////////////////////////////////////
- // MAIN TIME STEP LOOP
- ////////////////////////////////////////////////////
- auto t1 = std::chrono::steady_clock::now();
- while (etime < sim_time) {
- //If the time step leads to exceeding the simulation time, shorten it for the last step
- if (etime + dt > sim_time) { dt = sim_time - etime; }
- //Perform a single time step
- perform_timestep(state,state_tmp,flux,tend,dt);
- //Inform the user
-#ifndef NO_INFORM
- if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); }
-#endif
- //Update the elapsed time and output counter
- etime = etime + dt;
- output_counter = output_counter + dt;
- //If it's time for output, reset the counter, and do output
- if (output_freq >= 0 && output_counter >= output_freq) {
- output_counter = output_counter - output_freq;
- output(state,etime);
- }
- }
- auto t2 = std::chrono::steady_clock::now();
- if (mainproc) {
- std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n";
- }
-
- //Final reductions for mass, kinetic energy, and total energy
- reductions(mass,te);
-
- if (mainproc) {
- printf( "d_mass: %le\n" , (mass - mass0)/mass0 );
- printf( "d_te: %le\n" , (te - te0 )/te0 );
- }
-
- finalize();
-}
-
-
-//Performs a single dimensionally split time step using a simple low-storage three-stage Runge-Kutta time integrator
-//The dimensional splitting is a second-order-accurate alternating Strang splitting in which the
-//order of directions is alternated each time step.
-//The Runge-Kutta method used here is defined as follows:
-// q* = q[n] + dt/3 * rhs(q[n])
-// q** = q[n] + dt/2 * rhs(q* )
-// q[n+1] = q[n] + dt/1 * rhs(q** )
-void perform_timestep( double *state , double *state_tmp , double *flux , double *tend , double dt ) {
- if (direction_switch) {
- //x-direction first
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend );
- //z-direction second
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend );
- } else {
- //z-direction second
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend );
- //x-direction first
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend );
- }
- if (direction_switch) { direction_switch = 0; } else { direction_switch = 1; }
-}
-
-
-//Perform a single semi-discretized step in time with the form:
-//state_out = state_init + dt * rhs(state_forcing)
-//Meaning the step starts from state_init, computes the rhs using state_forcing, and stores the result in state_out
-void semi_discrete_step( double *state_init , double *state_forcing , double *state_out , double dt , int dir , double *flux , double *tend ) {
- int i, k, ll, inds, indt, indw;
- double x, z, wpert, dist, x0, z0, xrad, zrad, amp;
- if (dir == DIR_X) {
- //Set the halo values for this MPI task's fluid state in the x-direction
- set_halo_values_x(state_forcing);
- //Compute the time tendencies for the fluid state in the x-direction
- compute_tendencies_x(state_forcing,flux,tend,dt);
- } else if (dir == DIR_Z) {
- //Set the halo values for this MPI task's fluid state in the z-direction
- set_halo_values_z(state_forcing);
- //Compute the time tendencies for the fluid state in the z-direction
- compute_tendencies_z(state_forcing,flux,tend,dt);
- }
-
- /////////////////////////////////////////////////
- // TODO: THREAD ME
- /////////////////////////////////////////////////
- //Apply the tendencies to the fluid state
- for (ll=0; ll , Oak Ridge National Laboratory
-// This code simulates dry, stratified, compressible, non-hydrostatic fluid flows
-// For documentation, please see the attached documentation in the "documentation" folder
-//
-//////////////////////////////////////////////////////////////////////////////////////////
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include "pnetcdf.h"
-#include
-
-constexpr double pi = 3.14159265358979323846264338327; //Pi
-constexpr double grav = 9.8; //Gravitational acceleration (m / s^2)
-constexpr double cp = 1004.; //Specific heat of dry air at constant pressure
-constexpr double cv = 717.; //Specific heat of dry air at constant volume
-constexpr double rd = 287.; //Dry air constant for equation of state (P=rho*rd*T)
-constexpr double p0 = 1.e5; //Standard pressure at the surface in Pascals
-constexpr double C0 = 27.5629410929725921310572974482; //Constant to translate potential temperature into pressure (P=C0*(rho*theta)**gamma)
-constexpr double gamm = 1.40027894002789400278940027894; //gamma=cp/Rd , have to call this gamm because "gamma" is taken (I hate C so much)
-//Define domain and stability-related constants
-constexpr double xlen = 2.e4; //Length of the domain in the x-direction (meters)
-constexpr double zlen = 1.e4; //Length of the domain in the z-direction (meters)
-constexpr double hv_beta = 0.05; //How strong to diffuse the solution: hv_beta \in [0:1]
-constexpr double cfl = 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability)
-constexpr double max_speed = 450; //Assumed maximum wave speed during the simulation (speed of sound + speed of wind) (meter / sec)
-constexpr int hs = 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a full "stencil" of information for reconstruction
-constexpr int sten_size = 4; //Size of the stencil used for interpolation
-
-//Parameters for indexing and flags
-constexpr int NUM_VARS = 4; //Number of fluid state variables
-constexpr int ID_DENS = 0; //index for density ("rho")
-constexpr int ID_UMOM = 1; //index for momentum in the x-direction ("rho * u")
-constexpr int ID_WMOM = 2; //index for momentum in the z-direction ("rho * w")
-constexpr int ID_RHOT = 3; //index for density * potential temperature ("rho * theta")
-constexpr int DIR_X = 1; //Integer constant to express that this operation is in the x-direction
-constexpr int DIR_Z = 2; //Integer constant to express that this operation is in the z-direction
-constexpr int DATA_SPEC_COLLISION = 1;
-constexpr int DATA_SPEC_THERMAL = 2;
-constexpr int DATA_SPEC_GRAVITY_WAVES = 3;
-constexpr int DATA_SPEC_DENSITY_CURRENT = 5;
-constexpr int DATA_SPEC_INJECTION = 6;
-
-constexpr int nqpoints = 3;
-constexpr double qpoints [] = { 0.112701665379258311482073460022E0 , 0.500000000000000000000000000000E0 , 0.887298334620741688517926539980E0 };
-constexpr double qweights[] = { 0.277777777777777777777777777779E0 , 0.444444444444444444444444444444E0 , 0.277777777777777777777777777779E0 };
-
-///////////////////////////////////////////////////////////////////////////////////////
-// BEGIN USER-CONFIGURABLE PARAMETERS
-///////////////////////////////////////////////////////////////////////////////////////
-//The x-direction length is twice as long as the z-direction length
-//So, you'll want to have nx_glob be twice as large as nz_glob
-int constexpr nx_glob = _NX; //Number of total cells in the x-direction
-int constexpr nz_glob = _NZ; //Number of total cells in the z-direction
-double constexpr sim_time = _SIM_TIME; //How many seconds to run the simulation
-double constexpr output_freq = _OUT_FREQ; //How frequently to output data to file (in seconds)
-int constexpr data_spec_int = _DATA_SPEC; //How to initialize the data
-double constexpr dx = xlen / nx_glob; // grid spacing in the x-direction
-double constexpr dz = zlen / nz_glob; // grid spacing in the x-direction
-///////////////////////////////////////////////////////////////////////////////////////
-// END USER-CONFIGURABLE PARAMETERS
-///////////////////////////////////////////////////////////////////////////////////////
-
-///////////////////////////////////////////////////////////////////////////////////////
-// Variables that are initialized but remain static over the course of the simulation
-///////////////////////////////////////////////////////////////////////////////////////
-double dt; //Model time step (seconds)
-int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task
-int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task
-int nranks, myrank; //Number of MPI ranks and my rank id
-int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain
-int mainproc; //Am I the main process (rank == 0)?
-double *hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs)
-double *hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs)
-double *hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1)
-double *hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1)
-double *hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1)
-
-///////////////////////////////////////////////////////////////////////////////////////
-// Variables that are dynamics over the course of the simulation
-///////////////////////////////////////////////////////////////////////////////////////
-double etime; //Elapsed model time
-double output_counter; //Helps determine when it's time to do output
-//Runtime variable arrays
-double *state; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS)
-double *state_tmp; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS)
-double *flux; //Cell interface fluxes. Dimensions: (nx+1,nz+1,NUM_VARS)
-double *tend; //Fluid state tendencies. Dimensions: (nx,nz,NUM_VARS)
-double *sendbuf_l; //Buffer to send data to the left MPI rank
-double *sendbuf_r; //Buffer to send data to the right MPI rank
-double *recvbuf_l; //Buffer to receive data from the left MPI rank
-double *recvbuf_r; //Buffer to receive data from the right MPI rank
-int num_out = 0; //The number of outputs performed so far
-int direction_switch = 1;
-double mass0, te0; //Initial domain totals for mass and total energy
-double mass , te ; //Domain totals for mass and total energy
-
-//How is this not in the standard?!
-double dmin( double a , double b ) { if (a= 0) output(state,etime);
-
- ////////////////////////////////////////////////////
- // MAIN TIME STEP LOOP
- ////////////////////////////////////////////////////
-#pragma acc wait
- auto t1 = std::chrono::steady_clock::now();
- while (etime < sim_time) {
- //If the time step leads to exceeding the simulation time, shorten it for the last step
- if (etime + dt > sim_time) { dt = sim_time - etime; }
- //Perform a single time step
- perform_timestep(state,state_tmp,flux,tend,dt);
- //Inform the user
-#ifndef NO_INFORM
- if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); }
-#endif
- //Update the elapsed time and output counter
- etime = etime + dt;
- output_counter = output_counter + dt;
- //If it's time for output, reset the counter, and do output
- if (output_freq >= 0 && output_counter >= output_freq) {
- output_counter = output_counter - output_freq;
- output(state,etime);
- }
- }
-#pragma acc wait
- auto t2 = std::chrono::steady_clock::now();
- if (mainproc) {
- std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n";
- }
-
- //Final reductions for mass, kinetic energy, and total energy
- reductions(mass,te);
-}
-
- if (mainproc) {
- printf( "d_mass: %le\n" , (mass - mass0)/mass0 );
- printf( "d_te: %le\n" , (te - te0 )/te0 );
- }
-
- finalize();
-}
-
-
-//Performs a single dimensionally split time step using a simple low-storage three-stage Runge-Kutta time integrator
-//The dimensional splitting is a second-order-accurate alternating Strang splitting in which the
-//order of directions is alternated each time step.
-//The Runge-Kutta method used here is defined as follows:
-// q* = q[n] + dt/3 * rhs(q[n])
-// q** = q[n] + dt/2 * rhs(q* )
-// q[n+1] = q[n] + dt/1 * rhs(q** )
-void perform_timestep( double *state , double *state_tmp , double *flux , double *tend , double dt ) {
- if (direction_switch) {
- //x-direction first
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend );
- //z-direction second
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend );
- } else {
- //z-direction second
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend );
- //x-direction first
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend );
- }
- if (direction_switch) { direction_switch = 0; } else { direction_switch = 1; }
-}
-
-
-//Perform a single semi-discretized step in time with the form:
-//state_out = state_init + dt * rhs(state_forcing)
-//Meaning the step starts from state_init, computes the rhs using state_forcing, and stores the result in state_out
-void semi_discrete_step( double *state_init , double *state_forcing , double *state_out , double dt , int dir , double *flux , double *tend ) {
- int i, k, ll, inds, indt, indw;
- double x, z, wpert, dist, x0, z0, xrad, zrad, amp;
- if (dir == DIR_X) {
- //Set the halo values for this MPI task's fluid state in the x-direction
- set_halo_values_x(state_forcing);
- //Compute the time tendencies for the fluid state in the x-direction
- compute_tendencies_x(state_forcing,flux,tend,dt);
- } else if (dir == DIR_Z) {
- //Set the halo values for this MPI task's fluid state in the z-direction
- set_halo_values_z(state_forcing);
- //Compute the time tendencies for the fluid state in the z-direction
- compute_tendencies_z(state_forcing,flux,tend,dt);
- }
-
- //Apply the tendencies to the fluid state
-#pragma acc parallel loop collapse(3) default(present) async
- for (ll=0; ll , Oak Ridge National Laboratory
-// This code simulates dry, stratified, compressible, non-hydrostatic fluid flows
-// For documentation, please see the attached documentation in the "documentation" folder
-//
-//////////////////////////////////////////////////////////////////////////////////////////
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include "pnetcdf.h"
-#include
-
-constexpr double pi = 3.14159265358979323846264338327; //Pi
-constexpr double grav = 9.8; //Gravitational acceleration (m / s^2)
-constexpr double cp = 1004.; //Specific heat of dry air at constant pressure
-constexpr double cv = 717.; //Specific heat of dry air at constant volume
-constexpr double rd = 287.; //Dry air constant for equation of state (P=rho*rd*T)
-constexpr double p0 = 1.e5; //Standard pressure at the surface in Pascals
-constexpr double C0 = 27.5629410929725921310572974482; //Constant to translate potential temperature into pressure (P=C0*(rho*theta)**gamma)
-constexpr double gamm = 1.40027894002789400278940027894; //gamma=cp/Rd , have to call this gamm because "gamma" is taken (I hate C so much)
-//Define domain and stability-related constants
-constexpr double xlen = 2.e4; //Length of the domain in the x-direction (meters)
-constexpr double zlen = 1.e4; //Length of the domain in the z-direction (meters)
-constexpr double hv_beta = 0.05; //How strong to diffuse the solution: hv_beta \in [0:1]
-constexpr double cfl = 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability)
-constexpr double max_speed = 450; //Assumed maximum wave speed during the simulation (speed of sound + speed of wind) (meter / sec)
-constexpr int hs = 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a full "stencil" of information for reconstruction
-constexpr int sten_size = 4; //Size of the stencil used for interpolation
-
-//Parameters for indexing and flags
-constexpr int NUM_VARS = 4; //Number of fluid state variables
-constexpr int ID_DENS = 0; //index for density ("rho")
-constexpr int ID_UMOM = 1; //index for momentum in the x-direction ("rho * u")
-constexpr int ID_WMOM = 2; //index for momentum in the z-direction ("rho * w")
-constexpr int ID_RHOT = 3; //index for density * potential temperature ("rho * theta")
-constexpr int DIR_X = 1; //Integer constant to express that this operation is in the x-direction
-constexpr int DIR_Z = 2; //Integer constant to express that this operation is in the z-direction
-constexpr int DATA_SPEC_COLLISION = 1;
-constexpr int DATA_SPEC_THERMAL = 2;
-constexpr int DATA_SPEC_GRAVITY_WAVES = 3;
-constexpr int DATA_SPEC_DENSITY_CURRENT = 5;
-constexpr int DATA_SPEC_INJECTION = 6;
-
-constexpr int nqpoints = 3;
-constexpr double qpoints [] = { 0.112701665379258311482073460022E0 , 0.500000000000000000000000000000E0 , 0.887298334620741688517926539980E0 };
-constexpr double qweights[] = { 0.277777777777777777777777777779E0 , 0.444444444444444444444444444444E0 , 0.277777777777777777777777777779E0 };
-
-///////////////////////////////////////////////////////////////////////////////////////
-// BEGIN USER-CONFIGURABLE PARAMETERS
-///////////////////////////////////////////////////////////////////////////////////////
-//The x-direction length is twice as long as the z-direction length
-//So, you'll want to have nx_glob be twice as large as nz_glob
-int constexpr nx_glob = _NX; //Number of total cells in the x-direction
-int constexpr nz_glob = _NZ; //Number of total cells in the z-direction
-double constexpr sim_time = _SIM_TIME; //How many seconds to run the simulation
-double constexpr output_freq = _OUT_FREQ; //How frequently to output data to file (in seconds)
-int constexpr data_spec_int = _DATA_SPEC; //How to initialize the data
-double constexpr dx = xlen / nx_glob; // grid spacing in the x-direction
-double constexpr dz = zlen / nz_glob; // grid spacing in the x-direction
-///////////////////////////////////////////////////////////////////////////////////////
-// END USER-CONFIGURABLE PARAMETERS
-///////////////////////////////////////////////////////////////////////////////////////
-
-///////////////////////////////////////////////////////////////////////////////////////
-// Variables that are initialized but remain static over the course of the simulation
-///////////////////////////////////////////////////////////////////////////////////////
-double dt; //Model time step (seconds)
-int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task
-int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task
-int nranks, myrank; //Number of MPI ranks and my rank id
-int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain
-int mainproc; //Am I the main process (rank == 0)?
-double *hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs)
-double *hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs)
-double *hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1)
-double *hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1)
-double *hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1)
-
-///////////////////////////////////////////////////////////////////////////////////////
-// Variables that are dynamics over the course of the simulation
-///////////////////////////////////////////////////////////////////////////////////////
-double etime; //Elapsed model time
-double output_counter; //Helps determine when it's time to do output
-//Runtime variable arrays
-double *state; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS)
-double *state_tmp; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS)
-double *flux; //Cell interface fluxes. Dimensions: (nx+1,nz+1,NUM_VARS)
-double *tend; //Fluid state tendencies. Dimensions: (nx,nz,NUM_VARS)
-double *sendbuf_l; //Buffer to send data to the left MPI rank
-double *sendbuf_r; //Buffer to send data to the right MPI rank
-double *recvbuf_l; //Buffer to receive data from the left MPI rank
-double *recvbuf_r; //Buffer to receive data from the right MPI rank
-int num_out = 0; //The number of outputs performed so far
-int direction_switch = 1;
-double mass0, te0; //Initial domain totals for mass and total energy
-double mass , te ; //Domain totals for mass and total energy
-
-//How is this not in the standard?!
-double dmin( double a , double b ) { if (a= 0) output(state,etime);
-
- ////////////////////////////////////////////////////
- // MAIN TIME STEP LOOP
- ////////////////////////////////////////////////////
- auto t1 = std::chrono::steady_clock::now();
- while (etime < sim_time) {
- //If the time step leads to exceeding the simulation time, shorten it for the last step
- if (etime + dt > sim_time) { dt = sim_time - etime; }
- //Perform a single time step
- perform_timestep(state,state_tmp,flux,tend,dt);
- //Inform the user
-#ifndef NO_INFORM
- if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); }
-#endif
- //Update the elapsed time and output counter
- etime = etime + dt;
- output_counter = output_counter + dt;
- //If it's time for output, reset the counter, and do output
- if (output_freq >= 0 && output_counter >= output_freq) {
- output_counter = output_counter - output_freq;
- output(state,etime);
- }
- }
- auto t2 = std::chrono::steady_clock::now();
- if (mainproc) {
- std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n";
- }
-
- //Final reductions for mass, kinetic energy, and total energy
- reductions(mass,te);
-
- if (mainproc) {
- printf( "d_mass: %le\n" , (mass - mass0)/mass0 );
- printf( "d_te: %le\n" , (te - te0 )/te0 );
- }
-
- finalize();
-}
-
-
-//Performs a single dimensionally split time step using a simple low-storage three-stage Runge-Kutta time integrator
-//The dimensional splitting is a second-order-accurate alternating Strang splitting in which the
-//order of directions is alternated each time step.
-//The Runge-Kutta method used here is defined as follows:
-// q* = q[n] + dt/3 * rhs(q[n])
-// q** = q[n] + dt/2 * rhs(q* )
-// q[n+1] = q[n] + dt/1 * rhs(q** )
-void perform_timestep( double *state , double *state_tmp , double *flux , double *tend , double dt ) {
- if (direction_switch) {
- //x-direction first
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend );
- //z-direction second
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend );
- } else {
- //z-direction second
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend );
- //x-direction first
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend );
- }
- if (direction_switch) { direction_switch = 0; } else { direction_switch = 1; }
-}
-
-
-//Perform a single semi-discretized step in time with the form:
-//state_out = state_init + dt * rhs(state_forcing)
-//Meaning the step starts from state_init, computes the rhs using state_forcing, and stores the result in state_out
-void semi_discrete_step( double *state_init , double *state_forcing , double *state_out , double dt , int dir , double *flux , double *tend ) {
- int i, k, ll, inds, indt, indw;
- double x, z, wpert, dist, x0, z0, xrad, zrad, amp;
- if (dir == DIR_X) {
- //Set the halo values for this MPI task's fluid state in the x-direction
- set_halo_values_x(state_forcing);
- //Compute the time tendencies for the fluid state in the x-direction
- compute_tendencies_x(state_forcing,flux,tend,dt);
- } else if (dir == DIR_Z) {
- //Set the halo values for this MPI task's fluid state in the z-direction
- set_halo_values_z(state_forcing);
- //Compute the time tendencies for the fluid state in the z-direction
- compute_tendencies_z(state_forcing,flux,tend,dt);
- }
-
- //Apply the tendencies to the fluid state
-#pragma omp parallel for private(inds,indt,x,z,x0,z0,xrad,zrad,amp,dist,wpert,indw) collapse(3)
- for (ll=0; ll , Oak Ridge National Laboratory
-// This code simulates dry, stratified, compressible, non-hydrostatic fluid flows
-// For documentation, please see the attached documentation in the "documentation" folder
-//
-//////////////////////////////////////////////////////////////////////////////////////////
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include "pnetcdf.h"
-#include
-
-constexpr double pi = 3.14159265358979323846264338327; //Pi
-constexpr double grav = 9.8; //Gravitational acceleration (m / s^2)
-constexpr double cp = 1004.; //Specific heat of dry air at constant pressure
-constexpr double cv = 717.; //Specific heat of dry air at constant volume
-constexpr double rd = 287.; //Dry air constant for equation of state (P=rho*rd*T)
-constexpr double p0 = 1.e5; //Standard pressure at the surface in Pascals
-constexpr double C0 = 27.5629410929725921310572974482; //Constant to translate potential temperature into pressure (P=C0*(rho*theta)**gamma)
-constexpr double gamm = 1.40027894002789400278940027894; //gamma=cp/Rd , have to call this gamm because "gamma" is taken (I hate C so much)
-//Define domain and stability-related constants
-constexpr double xlen = 2.e4; //Length of the domain in the x-direction (meters)
-constexpr double zlen = 1.e4; //Length of the domain in the z-direction (meters)
-constexpr double hv_beta = 0.05; //How strong to diffuse the solution: hv_beta \in [0:1]
-constexpr double cfl = 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability)
-constexpr double max_speed = 450; //Assumed maximum wave speed during the simulation (speed of sound + speed of wind) (meter / sec)
-constexpr int hs = 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a full "stencil" of information for reconstruction
-constexpr int sten_size = 4; //Size of the stencil used for interpolation
-
-//Parameters for indexing and flags
-constexpr int NUM_VARS = 4; //Number of fluid state variables
-constexpr int ID_DENS = 0; //index for density ("rho")
-constexpr int ID_UMOM = 1; //index for momentum in the x-direction ("rho * u")
-constexpr int ID_WMOM = 2; //index for momentum in the z-direction ("rho * w")
-constexpr int ID_RHOT = 3; //index for density * potential temperature ("rho * theta")
-constexpr int DIR_X = 1; //Integer constant to express that this operation is in the x-direction
-constexpr int DIR_Z = 2; //Integer constant to express that this operation is in the z-direction
-constexpr int DATA_SPEC_COLLISION = 1;
-constexpr int DATA_SPEC_THERMAL = 2;
-constexpr int DATA_SPEC_GRAVITY_WAVES = 3;
-constexpr int DATA_SPEC_DENSITY_CURRENT = 5;
-constexpr int DATA_SPEC_INJECTION = 6;
-
-constexpr int nqpoints = 3;
-constexpr double qpoints [] = { 0.112701665379258311482073460022E0 , 0.500000000000000000000000000000E0 , 0.887298334620741688517926539980E0 };
-constexpr double qweights[] = { 0.277777777777777777777777777779E0 , 0.444444444444444444444444444444E0 , 0.277777777777777777777777777779E0 };
-
-int asyncid = 1;
-
-///////////////////////////////////////////////////////////////////////////////////////
-// BEGIN USER-CONFIGURABLE PARAMETERS
-///////////////////////////////////////////////////////////////////////////////////////
-//The x-direction length is twice as long as the z-direction length
-//So, you'll want to have nx_glob be twice as large as nz_glob
-int constexpr nx_glob = _NX; //Number of total cells in the x-direction
-int constexpr nz_glob = _NZ; //Number of total cells in the z-direction
-double constexpr sim_time = _SIM_TIME; //How many seconds to run the simulation
-double constexpr output_freq = _OUT_FREQ; //How frequently to output data to file (in seconds)
-int constexpr data_spec_int = _DATA_SPEC; //How to initialize the data
-double constexpr dx = xlen / nx_glob; // grid spacing in the x-direction
-double constexpr dz = zlen / nz_glob; // grid spacing in the x-direction
-///////////////////////////////////////////////////////////////////////////////////////
-// END USER-CONFIGURABLE PARAMETERS
-///////////////////////////////////////////////////////////////////////////////////////
-
-///////////////////////////////////////////////////////////////////////////////////////
-// Variables that are initialized but remain static over the course of the simulation
-///////////////////////////////////////////////////////////////////////////////////////
-double dt; //Model time step (seconds)
-int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task
-int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task
-int nranks, myrank; //Number of MPI ranks and my rank id
-int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain
-int mainproc; //Am I the main process (rank == 0)?
-double *hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs)
-double *hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs)
-double *hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1)
-double *hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1)
-double *hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1)
-
-///////////////////////////////////////////////////////////////////////////////////////
-// Variables that are dynamics over the course of the simulation
-///////////////////////////////////////////////////////////////////////////////////////
-double etime; //Elapsed model time
-double output_counter; //Helps determine when it's time to do output
-//Runtime variable arrays
-double *state; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS)
-double *state_tmp; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS)
-double *flux; //Cell interface fluxes. Dimensions: (nx+1,nz+1,NUM_VARS)
-double *tend; //Fluid state tendencies. Dimensions: (nx,nz,NUM_VARS)
-double *sendbuf_l; //Buffer to send data to the left MPI rank
-double *sendbuf_r; //Buffer to send data to the right MPI rank
-double *recvbuf_l; //Buffer to receive data from the left MPI rank
-double *recvbuf_r; //Buffer to receive data from the right MPI rank
-int num_out = 0; //The number of outputs performed so far
-int direction_switch = 1;
-double mass0, te0; //Initial domain totals for mass and total energy
-double mass , te ; //Domain totals for mass and total energy
-
-//How is this not in the standard?!
-double dmin( double a , double b ) { if (a= 0) output(state,etime);
-
- ////////////////////////////////////////////////////
- // MAIN TIME STEP LOOP
- ////////////////////////////////////////////////////
-#pragma omp taskwait
- auto t1 = std::chrono::steady_clock::now();
- while (etime < sim_time) {
- //If the time step leads to exceeding the simulation time, shorten it for the last step
- if (etime + dt > sim_time) { dt = sim_time - etime; }
- //Perform a single time step
- perform_timestep(state,state_tmp,flux,tend,dt);
- //Inform the user
-#ifndef NO_INFORM
- if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); }
-#endif
- //Update the elapsed time and output counter
- etime = etime + dt;
- output_counter = output_counter + dt;
- //If it's time for output, reset the counter, and do output
- if (output_freq >= 0 && output_counter >= output_freq) {
- output_counter = output_counter - output_freq;
- output(state,etime);
- }
- }
-#pragma omp taskwait
- auto t2 = std::chrono::steady_clock::now();
- if (mainproc) {
- std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n";
- }
-
- //Final reductions for mass, kinetic energy, and total energy
- reductions(mass,te);
-}
-
- if (mainproc) {
- printf( "d_mass: %le\n" , (mass - mass0)/mass0 );
- printf( "d_te: %le\n" , (te - te0 )/te0 );
- }
-
- finalize();
-}
-
-
-//Performs a single dimensionally split time step using a simple low-storage three-stage Runge-Kutta time integrator
-//The dimensional splitting is a second-order-accurate alternating Strang splitting in which the
-//order of directions is alternated each time step.
-//The Runge-Kutta method used here is defined as follows:
-// q* = q[n] + dt/3 * rhs(q[n])
-// q** = q[n] + dt/2 * rhs(q* )
-// q[n+1] = q[n] + dt/1 * rhs(q** )
-void perform_timestep( double *state , double *state_tmp , double *flux , double *tend , double dt ) {
- if (direction_switch) {
- //x-direction first
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend );
- //z-direction second
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend );
- } else {
- //z-direction second
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend );
- //x-direction first
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend );
- }
- if (direction_switch) { direction_switch = 0; } else { direction_switch = 1; }
-}
-
-
-//Perform a single semi-discretized step in time with the form:
-//state_out = state_init + dt * rhs(state_forcing)
-//Meaning the step starts from state_init, computes the rhs using state_forcing, and stores the result in state_out
-void semi_discrete_step( double *state_init , double *state_forcing , double *state_out , double dt , int dir , double *flux , double *tend ) {
- if (dir == DIR_X) {
- //Set the halo values for this MPI task's fluid state in the x-direction
- set_halo_values_x(state_forcing);
- //Compute the time tendencies for the fluid state in the x-direction
- compute_tendencies_x(state_forcing,flux,tend,dt);
- } else if (dir == DIR_Z) {
- //Set the halo values for this MPI task's fluid state in the z-direction
- set_halo_values_z(state_forcing);
- //Compute the time tendencies for the fluid state in the z-direction
- compute_tendencies_z(state_forcing,flux,tend,dt);
- }
-
- //Apply the tendencies to the fluid state
-#pragma omp target teams distribute parallel for simd collapse(3) depend(inout:asyncid) nowait
- for (int ll=0; ll , Oak Ridge National Laboratory
-// This code simulates dry, stratified, compressible, non-hydrostatic fluid flows
-// For documentation, please see the attached documentation in the "documentation" folder
-//
-//////////////////////////////////////////////////////////////////////////////////////////
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include "pnetcdf.h"
-#include
-
-constexpr double pi = 3.14159265358979323846264338327; //Pi
-constexpr double grav = 9.8; //Gravitational acceleration (m / s^2)
-constexpr double cp = 1004.; //Specific heat of dry air at constant pressure
-constexpr double cv = 717.; //Specific heat of dry air at constant volume
-constexpr double rd = 287.; //Dry air constant for equation of state (P=rho*rd*T)
-constexpr double p0 = 1.e5; //Standard pressure at the surface in Pascals
-constexpr double C0 = 27.5629410929725921310572974482; //Constant to translate potential temperature into pressure (P=C0*(rho*theta)**gamma)
-constexpr double gamm = 1.40027894002789400278940027894; //gamma=cp/Rd , have to call this gamm because "gamma" is taken (I hate C so much)
-//Define domain and stability-related constants
-constexpr double xlen = 2.e4; //Length of the domain in the x-direction (meters)
-constexpr double zlen = 1.e4; //Length of the domain in the z-direction (meters)
-constexpr double hv_beta = 0.05; //How strong to diffuse the solution: hv_beta \in [0:1]
-constexpr double cfl = 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability)
-constexpr double max_speed = 450; //Assumed maximum wave speed during the simulation (speed of sound + speed of wind) (meter / sec)
-constexpr int hs = 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a full "stencil" of information for reconstruction
-constexpr int sten_size = 4; //Size of the stencil used for interpolation
-
-//Parameters for indexing and flags
-constexpr int NUM_VARS = 4; //Number of fluid state variables
-constexpr int ID_DENS = 0; //index for density ("rho")
-constexpr int ID_UMOM = 1; //index for momentum in the x-direction ("rho * u")
-constexpr int ID_WMOM = 2; //index for momentum in the z-direction ("rho * w")
-constexpr int ID_RHOT = 3; //index for density * potential temperature ("rho * theta")
-constexpr int DIR_X = 1; //Integer constant to express that this operation is in the x-direction
-constexpr int DIR_Z = 2; //Integer constant to express that this operation is in the z-direction
-constexpr int DATA_SPEC_COLLISION = 1;
-constexpr int DATA_SPEC_THERMAL = 2;
-constexpr int DATA_SPEC_GRAVITY_WAVES = 3;
-constexpr int DATA_SPEC_DENSITY_CURRENT = 5;
-constexpr int DATA_SPEC_INJECTION = 6;
-
-constexpr int nqpoints = 3;
-constexpr double qpoints [] = { 0.112701665379258311482073460022E0 , 0.500000000000000000000000000000E0 , 0.887298334620741688517926539980E0 };
-constexpr double qweights[] = { 0.277777777777777777777777777779E0 , 0.444444444444444444444444444444E0 , 0.277777777777777777777777777779E0 };
-
-///////////////////////////////////////////////////////////////////////////////////////
-// BEGIN USER-CONFIGURABLE PARAMETERS
-///////////////////////////////////////////////////////////////////////////////////////
-//The x-direction length is twice as long as the z-direction length
-//So, you'll want to have nx_glob be twice as large as nz_glob
-int constexpr nx_glob = _NX; //Number of total cells in the x-direction
-int constexpr nz_glob = _NZ; //Number of total cells in the z-direction
-double constexpr sim_time = _SIM_TIME; //How many seconds to run the simulation
-double constexpr output_freq = _OUT_FREQ; //How frequently to output data to file (in seconds)
-int constexpr data_spec_int = _DATA_SPEC; //How to initialize the data
-double constexpr dx = xlen / nx_glob; // grid spacing in the x-direction
-double constexpr dz = zlen / nz_glob; // grid spacing in the x-direction
-///////////////////////////////////////////////////////////////////////////////////////
-// END USER-CONFIGURABLE PARAMETERS
-///////////////////////////////////////////////////////////////////////////////////////
-
-///////////////////////////////////////////////////////////////////////////////////////
-// Variables that are initialized but remain static over the course of the simulation
-///////////////////////////////////////////////////////////////////////////////////////
-double dt; //Model time step (seconds)
-int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task
-int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task
-int nranks, myrank; //Number of MPI ranks and my rank id
-int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain
-int mainproc; //Am I the main process (rank == 0)?
-double *hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs)
-double *hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs)
-double *hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1)
-double *hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1)
-double *hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1)
-
-///////////////////////////////////////////////////////////////////////////////////////
-// Variables that are dynamics over the course of the simulation
-///////////////////////////////////////////////////////////////////////////////////////
-double etime; //Elapsed model time
-double output_counter; //Helps determine when it's time to do output
-//Runtime variable arrays
-double *state; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS)
-double *state_tmp; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS)
-double *flux; //Cell interface fluxes. Dimensions: (nx+1,nz+1,NUM_VARS)
-double *tend; //Fluid state tendencies. Dimensions: (nx,nz,NUM_VARS)
-int num_out = 0; //The number of outputs performed so far
-int direction_switch = 1;
-double mass0, te0; //Initial domain totals for mass and total energy
-double mass , te ; //Domain totals for mass and total energy
-
-//How is this not in the standard?!
-double dmin( double a , double b ) { if (a sim_time) { dt = sim_time - etime; }
- //Perform a single time step
- perform_timestep(state,state_tmp,flux,tend,dt);
- //Inform the user
-#ifndef NO_INFORM
- if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); }
-#endif
- //Update the elapsed time and output counter
- etime = etime + dt;
- output_counter = output_counter + dt;
- //If it's time for output, reset the counter, and do output
- if (output_counter >= output_freq) {
- output_counter = output_counter - output_freq;
- output(state,etime);
- }
- }
- auto t2 = std::chrono::steady_clock::now();
- if (mainproc) {
- std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n";
- }
-
- //Final reductions for mass, kinetic energy, and total energy
- reductions(mass,te);
-
- if (mainproc) {
- printf( "d_mass: %le\n" , (mass - mass0)/mass0 );
- printf( "d_te: %le\n" , (te - te0 )/te0 );
- }
-
- finalize();
-}
-
-
-//Performs a single dimensionally split time step using a simple low-storage three-stage Runge-Kutta time integrator
-//The dimensional splitting is a second-order-accurate alternating Strang splitting in which the
-//order of directions is alternated each time step.
-//The Runge-Kutta method used here is defined as follows:
-// q* = q[n] + dt/3 * rhs(q[n])
-// q** = q[n] + dt/2 * rhs(q* )
-// q[n+1] = q[n] + dt/1 * rhs(q** )
-void perform_timestep( double *state , double *state_tmp , double *flux , double *tend , double dt ) {
- if (direction_switch) {
- //x-direction first
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend );
- //z-direction second
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend );
- } else {
- //z-direction second
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend );
- //x-direction first
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend );
- }
- if (direction_switch) { direction_switch = 0; } else { direction_switch = 1; }
-}
-
-
-//Perform a single semi-discretized step in time with the form:
-//state_out = state_init + dt * rhs(state_forcing)
-//Meaning the step starts from state_init, computes the rhs using state_forcing, and stores the result in state_out
-void semi_discrete_step( double *state_init , double *state_forcing , double *state_out , double dt , int dir , double *flux , double *tend ) {
- int i, k, ll, inds, indt, indw;
- double x, z, wpert, dist, x0, z0, xrad, zrad, amp;
- if (dir == DIR_X) {
- //Set the halo values for this MPI task's fluid state in the x-direction
- set_halo_values_x(state_forcing);
- //Compute the time tendencies for the fluid state in the x-direction
- compute_tendencies_x(state_forcing,flux,tend,dt);
- } else if (dir == DIR_Z) {
- //Set the halo values for this MPI task's fluid state in the z-direction
- set_halo_values_z(state_forcing);
- //Compute the time tendencies for the fluid state in the z-direction
- compute_tendencies_z(state_forcing,flux,tend,dt);
- }
-
- /////////////////////////////////////////////////
- // TODO: THREAD ME
- /////////////////////////////////////////////////
- //Apply the tendencies to the fluid state
- for (ll=0; ll
-
-using yakl::SArray;
-using yakl::c::SimpleBounds;
-
-#ifdef SINGLE_PREC
- typedef float real;
- auto mpi_type = MPI_FLOAT;
-#else
- typedef double real;
- auto mpi_type = MPI_DOUBLE;
-#endif
-
-constexpr real pi = 3.14159265358979323846264338327; //Pi
-constexpr real grav = 9.8; //Gravitational acceleration (m / s^2)
-constexpr real cp = 1004.; //Specific heat of dry air at constant pressure
-constexpr real cv = 717.; //Specific heat of dry air at constant volume
-constexpr real rd = 287.; //Dry air constant for equation of state (P=rho*rd*T)
-constexpr real p0 = 1.e5; //Standard pressure at the surface in Pascals
-constexpr real C0 = 27.5629410929725921310572974482; //Constant to translate potential temperature into pressure (P=C0*(rho*theta)**gamma)
-constexpr real gamm = 1.40027894002789400278940027894; //gamma=cp/Rd , have to call this gamm because "gamma" is taken (I hate C so much)
-//Define domain and stability-related constants
-constexpr real xlen = 2.e4; //Length of the domain in the x-direction (meters)
-constexpr real zlen = 1.e4; //Length of the domain in the z-direction (meters)
-constexpr real hv_beta = 0.05; //How strong to diffuse the solution: hv_beta \in [0:1]
-constexpr real cfl = 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability)
-constexpr real max_speed = 450; //Assumed maximum wave speed during the simulation (speed of sound + speed of wind) (meter / sec)
-constexpr int hs = 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a full "stencil" of information for reconstruction
-constexpr int sten_size = 4; //Size of the stencil used for interpolation
-
-//Parameters for indexing and flags
-constexpr int NUM_VARS = 4; //Number of fluid state variables
-constexpr int ID_DENS = 0; //index for density ("rho")
-constexpr int ID_UMOM = 1; //index for momentum in the x-direction ("rho * u")
-constexpr int ID_WMOM = 2; //index for momentum in the z-direction ("rho * w")
-constexpr int ID_RHOT = 3; //index for density * potential temperature ("rho * theta")
-constexpr int DIR_X = 1; //Integer constant to express that this operation is in the x-direction
-constexpr int DIR_Z = 2; //Integer constant to express that this operation is in the z-direction
-constexpr int DATA_SPEC_COLLISION = 1;
-constexpr int DATA_SPEC_THERMAL = 2;
-constexpr int DATA_SPEC_GRAVITY_WAVES = 3;
-constexpr int DATA_SPEC_DENSITY_CURRENT = 5;
-constexpr int DATA_SPEC_INJECTION = 6;
-
-///////////////////////////////////////////////////////////////////////////////////////
-// BEGIN USER-CONFIGURABLE PARAMETERS
-///////////////////////////////////////////////////////////////////////////////////////
-//The x-direction length is twice as long as the z-direction length
-//So, you'll want to have nx_glob be twice as large as nz_glob
-int constexpr nx_glob = _NX; // Number of total cells in the x-direction
-int constexpr nz_glob = _NZ; // Number of total cells in the z-direction
-real constexpr sim_time = _SIM_TIME; // How many seconds to run the simulation
-real constexpr output_freq = _OUT_FREQ; // How frequently to output data to file (in seconds)
-int constexpr data_spec_int = _DATA_SPEC; // How to initialize the data
-///////////////////////////////////////////////////////////////////////////////////////
-// END USER-CONFIGURABLE PARAMETERS
-///////////////////////////////////////////////////////////////////////////////////////
-real constexpr dx = xlen / nx_glob;
-real constexpr dz = zlen / nz_glob;
-
-using yakl::c::Bounds;
-using yakl::c::parallel_for;
-using yakl::SArray;
-
-template inline T min( T val1 , T val2 ) {
- return val1 < val2 ? val1 : val2 ;
-}
-
-template inline T abs( T val ) {
- return val > 0 ? val : -val;
-}
-
-#ifdef SIMD_LEN
- unsigned int constexpr simd_len = SIMD_LEN;
-#else
- unsigned int constexpr simd_len = 4;
-#endif
-
-using yakl::simd::Pack;
-using yakl::simd::PackIterConfig;
-using yakl::simd::iterate_over_pack;
-
diff --git a/cpp/experimental/miniWeather_mpi_parallelfor_simd_x.cpp b/cpp/experimental/miniWeather_mpi_parallelfor_simd_x.cpp
deleted file mode 100644
index 7efca9d2..00000000
--- a/cpp/experimental/miniWeather_mpi_parallelfor_simd_x.cpp
+++ /dev/null
@@ -1,911 +0,0 @@
-
-//////////////////////////////////////////////////////////////////////////////////////////
-// miniWeather
-// Author: Matt Norman , Oak Ridge National Laboratory
-// This code simulates dry, stratified, compressible, non-hydrostatic fluid flows
-// For documentation, please see the attached documentation in the "documentation" folder
-//
-//////////////////////////////////////////////////////////////////////////////////////////
-
-#include
-#include
-#include
-#include "../const.h"
-#include "pnetcdf.h"
-#include
-#include
-
-// We're going to define all arrays on the host because this doesn't use parallel_for
-typedef yakl::Array real1d;
-typedef yakl::Array real2d;
-typedef yakl::Array real3d;
-typedef yakl::Array doub1d;
-typedef yakl::Array doub2d;
-typedef yakl::Array doub3d;
-
-typedef yakl::Array realConst1d;
-typedef yakl::Array realConst2d;
-typedef yakl::Array realConst3d;
-typedef yakl::Array doubConst1d;
-typedef yakl::Array doubConst2d;
-typedef yakl::Array doubConst3d;
-
-// Some arrays still need to be on the host, so we will explicitly create Host Array typedefs
-typedef yakl::Array real1dHost;
-typedef yakl::Array real2dHost;
-typedef yakl::Array real3dHost;
-typedef yakl::Array doub1dHost;
-typedef yakl::Array doub2dHost;
-typedef yakl::Array doub3dHost;
-
-///////////////////////////////////////////////////////////////////////////////////////
-// Variables that are initialized but remain static over the course of the simulation
-///////////////////////////////////////////////////////////////////////////////////////
-struct Fixed_data {
- int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task
- int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task
- int nranks, myrank; //Number of MPI ranks and my rank id
- int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain
- int mainproc; //Am I the main process (rank == 0)?
- realConst1d hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs)
- realConst1d hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs)
- realConst1d hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1)
- realConst1d hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1)
- realConst1d hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1)
-};
-
-///////////////////////////////////////////////////////////////////////////////////////
-// Variables that are dynamics over the course of the simulation
-///////////////////////////////////////////////////////////////////////////////////////
-
-//Declaring the functions defined after "main"
-void init ( real3d &state , real &dt , Fixed_data &fixed_data );
-void finalize ( );
-YAKL_INLINE void injection ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht );
-YAKL_INLINE void density_current ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht );
-YAKL_INLINE void gravity_waves ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht );
-YAKL_INLINE void thermal ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht );
-YAKL_INLINE void collision ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht );
-YAKL_INLINE void hydro_const_theta ( real z , real &r , real &t );
-YAKL_INLINE void hydro_const_bvfreq ( real z , real bv_freq0 , real &r , real &t );
-YAKL_INLINE real sample_ellipse_cosine( real x , real z , real amp , real x0 , real z0 , real xrad , real zrad );
-void output ( realConst3d state , real etime , int &num_out , Fixed_data const &fixed_data );
-void ncwrap ( int ierr , int line );
-void perform_timestep ( real3d const &state , real dt , int &direction_switch , Fixed_data const &fixed_data );
-void semi_discrete_step ( realConst3d state_init , real3d const &state_forcing , real3d const &state_out , real dt ,
- int dir , Fixed_data const &fixed_data );
-void compute_tendencies_x ( realConst3d state , real3d const &tend , real dt , Fixed_data const &fixed_data );
-void compute_tendencies_z ( realConst3d state , real3d const &tend , real dt , Fixed_data const &fixed_data );
-void set_halo_values_x ( real3d const &state , Fixed_data const &fixed_data );
-void set_halo_values_z ( real3d const &state , Fixed_data const &fixed_data );
-void reductions ( realConst3d state , double &mass , double &te , Fixed_data const &fixed_data );
-
-
-///////////////////////////////////////////////////////////////////////////////////////
-// THE MAIN PROGRAM STARTS HERE
-///////////////////////////////////////////////////////////////////////////////////////
-int main(int argc, char **argv) {
- MPI_Init(&argc,&argv);
- yakl::init();
- {
- Fixed_data fixed_data;
- real3d state;
- real dt; //Model time step (seconds)
-
- // Init allocates the state and hydrostatic arrays hy_*
- init( state , dt , fixed_data );
-
- auto &mainproc = fixed_data.mainproc;
-
- //Initial reductions for mass, kinetic energy, and total energy
- double mass0, te0;
- reductions(state,mass0,te0,fixed_data);
-
- int num_out = 0; //The number of outputs performed so far
- real output_counter = 0; //Helps determine when it's time to do output
- real etime = 0;
-
- //Output the initial state
- if (output_freq >= 0) {
- output(state,etime,num_out,fixed_data);
- }
-
- int direction_switch = 1; // Tells dimensionally split which order to take x,z solves
-
- ////////////////////////////////////////////////////
- // MAIN TIME STEP LOOP
- ////////////////////////////////////////////////////
- yakl::fence();
- auto t1 = std::chrono::steady_clock::now();
- while (etime < sim_time) {
- //If the time step leads to exceeding the simulation time, shorten it for the last step
- if (etime + dt > sim_time) { dt = sim_time - etime; }
- //Perform a single time step
- perform_timestep(state,dt,direction_switch,fixed_data);
- //Inform the user
- #ifndef NO_INFORM
- if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); }
- #endif
- //Update the elapsed time and output counter
- etime = etime + dt;
- output_counter = output_counter + dt;
- //If it's time for output, reset the counter, and do output
- if (output_freq >= 0 && output_counter >= output_freq) {
- output_counter = output_counter - output_freq;
- output(state,etime,num_out,fixed_data);
- }
- }
- yakl::fence();
- auto t2 = std::chrono::steady_clock::now();
- if (mainproc) {
- std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n";
- }
-
- //Final reductions for mass, kinetic energy, and total energy
- double mass, te;
- reductions(state,mass,te,fixed_data);
-
- if (mainproc) {
- printf( "d_mass: %le\n" , (mass - mass0)/mass0 );
- printf( "d_te: %le\n" , (te - te0 )/te0 );
- }
-
- finalize();
- }
- yakl::finalize();
- MPI_Finalize();
-}
-
-
-//Performs a single dimensionally split time step using a simple low-storage three-stage Runge-Kutta time integrator
-//The dimensional splitting is a second-order-accurate alternating Strang splitting in which the
-//order of directions is alternated each time step.
-//The Runge-Kutta method used here is defined as follows:
-// q* = q_n + dt/3 * rhs(q_n)
-// q** = q_n + dt/2 * rhs(q* )
-// q_n+1 = q_n + dt/1 * rhs(q**)
-void perform_timestep( real3d const &state , real dt , int &direction_switch , Fixed_data const &fixed_data) {
- auto &nx = fixed_data.nx ;
- auto &nz = fixed_data.nz ;
-
- real3d state_tmp("state_tmp",NUM_VARS,nz+2*hs,nx+2*hs);
-
- if (direction_switch) {
- //x-direction first
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , fixed_data );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , fixed_data );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , fixed_data );
- //z-direction second
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , fixed_data );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , fixed_data );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , fixed_data );
- } else {
- //z-direction second
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , fixed_data );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , fixed_data );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , fixed_data );
- //x-direction first
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , fixed_data );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , fixed_data );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , fixed_data );
- }
- if (direction_switch) { direction_switch = 0; } else { direction_switch = 1; }
-}
-
-
-//Perform a single semi-discretized step in time with the form:
-//state_out = state_init + dt * rhs(state_forcing)
-//Meaning the step starts from state_init, computes the rhs using state_forcing, and stores the result in state_out
-void semi_discrete_step( realConst3d state_init , real3d const &state_forcing , real3d const &state_out , real dt , int dir , Fixed_data const &fixed_data ) {
- auto &nx = fixed_data.nx ;
- auto &nz = fixed_data.nz ;
- auto &i_beg = fixed_data.i_beg ;
- auto &k_beg = fixed_data.k_beg ;
- auto &hy_dens_cell = fixed_data.hy_dens_cell ;
-
- real3d tend("tend",NUM_VARS,nz,nx);
-
- if (dir == DIR_X) {
- //Set the halo values for this MPI task's fluid state in the x-direction
- yakl::timer_start("halo x");
- set_halo_values_x(state_forcing,fixed_data);
- yakl::timer_stop("halo x");
- //Compute the time tendencies for the fluid state in the x-direction
- yakl::timer_start("tendencies x");
- compute_tendencies_x(state_forcing,tend,dt,fixed_data);
- yakl::timer_stop("tendencies x");
- } else if (dir == DIR_Z) {
- //Set the halo values for this MPI task's fluid state in the z-direction
- yakl::timer_start("halo z");
- set_halo_values_z(state_forcing,fixed_data);
- yakl::timer_stop("halo z");
- //Compute the time tendencies for the fluid state in the z-direction
- yakl::timer_start("tendencies z");
- compute_tendencies_z(state_forcing,tend,dt,fixed_data);
- yakl::timer_stop("tendencies z");
- }
-
- //Apply the tendencies to the fluid state
- // for (ll=0; ll(NUM_VARS,nz,nx) , YAKL_LAMBDA ( int ll, int k, int i ) {
- if (data_spec_int == DATA_SPEC_GRAVITY_WAVES) {
- real x = (i_beg + i+0.5)*dx;
- real z = (k_beg + k+0.5)*dz;
- real wpert = sample_ellipse_cosine( x,z , 0.01 , xlen/8,1000., 500.,500. );
- tend(ID_WMOM,k,i) += wpert*hy_dens_cell(hs+k);
- }
- state_out(ll,hs+k,hs+i) = state_init(ll,hs+k,hs+i) + dt * tend(ll,k,i);
- });
- yakl::timer_stop("apply tendencies");
-}
-
-
-//Compute the time tendencies of the fluid state using forcing in the x-direction
-//Since the halos are set in a separate routine, this will not require MPI
-//First, compute the flux vector at each cell interface in the x-direction (including hyperviscosity)
-//Then, compute the tendencies using those fluxes
-void compute_tendencies_x( realConst3d state , real3d const &tend , real dt , Fixed_data const &fixed_data ) {
- auto &nx = fixed_data.nx ;
- auto &nz = fixed_data.nz ;
- auto &hy_dens_cell = fixed_data.hy_dens_cell ;
- auto &hy_dens_theta_cell = fixed_data.hy_dens_theta_cell;
-
- real3d flux("flux",NUM_VARS,nz,nx+1);
-
- //Compute fluxes in the x-direction for each cell
- // for (k=0; k(nz,xblocks) , YAKL_LAMBDA (int k, int iblk) {
- SArray,1,4> stencil;
- SArray,1,NUM_VARS> d3_vals;
- SArray,1,NUM_VARS> vals;
- //Compute the hyperviscosity coefficient
- real hv_coef = -hv_beta * dx / (16*dt);
-
- //Use fourth-order interpolation from four cell averages to compute the value at the interface in question
- for (int ll=0; ll() );
- }
- //Fourth-order-accurate interpolation of the state
- vals(ll) = -stencil(0)/12 + 7*stencil(1)/12 + 7*stencil(2)/12 - stencil(3)/12;
- //First-order-accurate interpolation of the third spatial derivative of the state (for artificial viscosity)
- d3_vals(ll) = -stencil(0) + 3*stencil(1) - 3*stencil(2) + stencil(3);
- }
-
- //Compute density, u-wind, w-wind, potential temperature, and pressure (r,u,w,t,p respectively)
- auto r = vals(ID_DENS) + hy_dens_cell(hs+k);
- auto u = vals(ID_UMOM) / r;
- auto w = vals(ID_WMOM) / r;
- auto t = ( vals(ID_RHOT) + hy_dens_theta_cell(hs+k) ) / r;
- auto p = C0*pow((r*t),gamm);
-
- auto f1 = r*u - hv_coef*d3_vals(ID_DENS);
- auto f2 = r*u*u+p - hv_coef*d3_vals(ID_UMOM);
- auto f3 = r*u*w - hv_coef*d3_vals(ID_WMOM);
- auto f4 = r*u*t - hv_coef*d3_vals(ID_RHOT);
-
- //Compute the flux vector
- iterate_over_pack( [&] (unsigned int ilane) {
- int i = std::min(xdim-1 , iblk*simd_len + ilane);
- flux(ID_DENS,k,i) = f1(ilane);
- flux(ID_UMOM,k,i) = f2(ilane);
- flux(ID_WMOM,k,i) = f3(ilane);
- flux(ID_RHOT,k,i) = f4(ilane);
- } , PackIterConfig() );
- });
-
- //Use the fluxes to compute tendencies for each cell
- // for (ll=0; ll(NUM_VARS,nz,nx) , YAKL_LAMBDA ( int ll, int k, int i ) {
- tend(ll,k,i) = -( flux(ll,k,i+1) - flux(ll,k,i) ) / dx;
- });
-}
-
-
-//Compute the time tendencies of the fluid state using forcing in the z-direction
-//Since the halos are set in a separate routine, this will not require MPI
-//First, compute the flux vector at each cell interface in the z-direction (including hyperviscosity)
-//Then, compute the tendencies using those fluxes
-void compute_tendencies_z( realConst3d state , real3d const &tend , real dt , Fixed_data const &fixed_data ) {
- auto &nx = fixed_data.nx ;
- auto &nz = fixed_data.nz ;
- auto &hy_dens_int = fixed_data.hy_dens_int ;
- auto &hy_dens_theta_int = fixed_data.hy_dens_theta_int ;
- auto &hy_pressure_int = fixed_data.hy_pressure_int ;
-
- real3d flux("flux",NUM_VARS,nz+1,nx);
-
- //Compute fluxes in the x-direction for each cell
- // for (k=0; k(nz+1,xblocks) , YAKL_LAMBDA (int k, int iblk) {
- SArray,1,4> stencil;
- SArray,1,NUM_VARS> d3_vals;
- SArray,1,NUM_VARS> vals;
- //Compute the hyperviscosity coefficient
- real hv_coef = -hv_beta * dz / (16*dt);
-
- //Use fourth-order interpolation from four cell averages to compute the value at the interface in question
- for (int ll=0; ll() );
- }
- //Fourth-order-accurate interpolation of the state
- vals(ll) = -stencil(0)/12 + 7*stencil(1)/12 + 7*stencil(2)/12 - stencil(3)/12;
- //First-order-accurate interpolation of the third spatial derivative of the state
- d3_vals(ll) = -stencil(0) + 3*stencil(1) - 3*stencil(2) + stencil(3);
- }
-
- //Compute density, u-wind, w-wind, potential temperature, and pressure (r,u,w,t,p respectively)
- auto r = vals(ID_DENS) + hy_dens_int(k);
- auto u = vals(ID_UMOM) / r;
- auto w = vals(ID_WMOM) / r;
- auto t = ( vals(ID_RHOT) + hy_dens_theta_int(k) ) / r;
- auto p = C0*pow((r*t),gamm) - hy_pressure_int(k);
- if (k == 0 || k == nz) {
- w = 0;
- d3_vals(ID_DENS) = 0;
- }
-
- auto f1 = r*w - hv_coef*d3_vals(ID_DENS);
- auto f2 = r*w*u - hv_coef*d3_vals(ID_UMOM);
- auto f3 = r*w*w+p - hv_coef*d3_vals(ID_WMOM);
- auto f4 = r*w*t - hv_coef*d3_vals(ID_RHOT);
-
- //Compute the flux vector with hyperviscosity
- iterate_over_pack( [&] (unsigned int ilane) {
- int i = min( xdim-1 , iblk*simd_len + ilane );
- flux(ID_DENS,k,i) = f1(ilane);
- flux(ID_UMOM,k,i) = f2(ilane);
- flux(ID_WMOM,k,i) = f3(ilane);
- flux(ID_RHOT,k,i) = f4(ilane);
- } , PackIterConfig() );
- });
-
- //Use the fluxes to compute tendencies for each cell
- // for (ll=0; ll(NUM_VARS,nz,nx) , YAKL_LAMBDA ( int ll, int k, int i ) {
- tend(ll,k,i) = -( flux(ll,k+1,i) - flux(ll,k,i) ) / dz;
- if (ll == ID_WMOM) {
- tend(ll,k,i) -= state(ID_DENS,hs+k,hs+i)*grav;
- }
- });
-}
-
-
-
-//Set this MPI task's halo values in the x-direction. This routine will require MPI
-void set_halo_values_x( real3d const &state , Fixed_data const &fixed_data ) {
- auto &nx = fixed_data.nx ;
- auto &nz = fixed_data.nz ;
- auto &k_beg = fixed_data.k_beg ;
- auto &left_rank = fixed_data.left_rank ;
- auto &right_rank = fixed_data.right_rank ;
- auto &myrank = fixed_data.myrank ;
- auto &hy_dens_cell = fixed_data.hy_dens_cell ;
- auto &hy_dens_theta_cell = fixed_data.hy_dens_theta_cell;
-
- int ierr;
- MPI_Request req_r[2], req_s[2];
-
- if (fixed_data.nranks == 1) {
-
- parallel_for( SimpleBounds<2>(NUM_VARS,nz) , YAKL_LAMBDA (int ll, int k) {
- state(ll,hs+k,0 ) = state(ll,hs+k,nx+hs-2);
- state(ll,hs+k,1 ) = state(ll,hs+k,nx+hs-1);
- state(ll,hs+k,nx+hs ) = state(ll,hs+k,hs );
- state(ll,hs+k,nx+hs+1) = state(ll,hs+k,hs+1 );
- });
-
- } else {
-
- real3d sendbuf_l ( "sendbuf_l" , NUM_VARS,nz,hs ); //Buffer to send data to the left MPI rank
- real3d sendbuf_r ( "sendbuf_r" , NUM_VARS,nz,hs ); //Buffer to send data to the right MPI rank
- real3d recvbuf_l ( "recvbuf_l" , NUM_VARS,nz,hs ); //Buffer to receive data from the left MPI rank
- real3d recvbuf_r ( "recvbuf_r" , NUM_VARS,nz,hs ); //Buffer to receive data from the right MPI rank
- real3dHost sendbuf_l_cpu( "sendbuf_l" , NUM_VARS,nz,hs ); //Buffer to send data to the left MPI rank (CPU copy)
- real3dHost sendbuf_r_cpu( "sendbuf_r" , NUM_VARS,nz,hs ); //Buffer to send data to the right MPI rank (CPU copy)
- real3dHost recvbuf_l_cpu( "recvbuf_l" , NUM_VARS,nz,hs ); //Buffer to receive data from the left MPI rank (CPU copy)
- real3dHost recvbuf_r_cpu( "recvbuf_r" , NUM_VARS,nz,hs ); //Buffer to receive data from the right MPI rank (CPU copy)
-
- //Prepost receives
- ierr = MPI_Irecv(recvbuf_l_cpu.data(),hs*nz*NUM_VARS,mpi_type, left_rank,0,MPI_COMM_WORLD,&req_r[0]);
- ierr = MPI_Irecv(recvbuf_r_cpu.data(),hs*nz*NUM_VARS,mpi_type,right_rank,1,MPI_COMM_WORLD,&req_r[1]);
-
- //Pack the send buffers
- // for (ll=0; ll(NUM_VARS,nz,hs) , YAKL_LAMBDA (int ll, int k, int s) {
- sendbuf_l(ll,k,s) = state(ll,k+hs,hs+s);
- sendbuf_r(ll,k,s) = state(ll,k+hs,nx+s);
- });
- yakl::fence();
-
- // This will copy from GPU to host
- sendbuf_l.deep_copy_to(sendbuf_l_cpu);
- sendbuf_r.deep_copy_to(sendbuf_r_cpu);
- yakl::fence();
-
- //Fire off the sends
- ierr = MPI_Isend(sendbuf_l_cpu.data(),hs*nz*NUM_VARS,mpi_type, left_rank,1,MPI_COMM_WORLD,&req_s[0]);
- ierr = MPI_Isend(sendbuf_r_cpu.data(),hs*nz*NUM_VARS,mpi_type,right_rank,0,MPI_COMM_WORLD,&req_s[1]);
-
- //Wait for receives to finish
- ierr = MPI_Waitall(2,req_r,MPI_STATUSES_IGNORE);
-
- // This will copy from host to GPU
- recvbuf_l_cpu.deep_copy_to(recvbuf_l);
- recvbuf_r_cpu.deep_copy_to(recvbuf_r);
- yakl::fence();
-
- //Unpack the receive buffers
- // for (ll=0; ll(NUM_VARS,nz,hs) , YAKL_LAMBDA (int ll, int k, int s) {
- state(ll,k+hs,s ) = recvbuf_l(ll,k,s);
- state(ll,k+hs,nx+hs+s) = recvbuf_r(ll,k,s);
- });
- yakl::fence();
-
- //Wait for sends to finish
- ierr = MPI_Waitall(2,req_s,MPI_STATUSES_IGNORE);
-
- }
-
- if (data_spec_int == DATA_SPEC_INJECTION) {
- if (myrank == 0) {
- // for (k=0; k(nz,hs) , YAKL_LAMBDA (int k, int i) {
- double z = (k_beg + k+0.5)*dz;
- if (abs(z-3*zlen/4) <= zlen/16) {
- state(ID_UMOM,hs+k,i) = (state(ID_DENS,hs+k,i)+hy_dens_cell(hs+k)) * 50.;
- state(ID_RHOT,hs+k,i) = (state(ID_DENS,hs+k,i)+hy_dens_cell(hs+k)) * 298. - hy_dens_theta_cell(hs+k);
- }
- });
- }
- }
-}
-
-
-//Set this MPI task's halo values in the z-direction. This does not require MPI because there is no MPI
-//decomposition in the vertical direction
-void set_halo_values_z( real3d const &state , Fixed_data const &fixed_data ) {
- auto &nx = fixed_data.nx ;
- auto &nz = fixed_data.nz ;
- auto &hy_dens_cell = fixed_data.hy_dens_cell ;
-
- // for (ll=0; ll(NUM_VARS,nx+2*hs) , YAKL_LAMBDA (int ll, int i) {
- if (ll == ID_WMOM) {
- state(ll,0 ,i) = 0.;
- state(ll,1 ,i) = 0.;
- state(ll,nz+hs ,i) = 0.;
- state(ll,nz+hs+1,i) = 0.;
- } else if (ll == ID_UMOM) {
- state(ll,0 ,i) = state(ll,hs ,i) / hy_dens_cell(hs ) * hy_dens_cell(0 );
- state(ll,1 ,i) = state(ll,hs ,i) / hy_dens_cell(hs ) * hy_dens_cell(1 );
- state(ll,nz+hs ,i) = state(ll,nz+hs-1,i) / hy_dens_cell(nz+hs-1) * hy_dens_cell(nz+hs );
- state(ll,nz+hs+1,i) = state(ll,nz+hs-1,i) / hy_dens_cell(nz+hs-1) * hy_dens_cell(nz+hs+1);
- } else {
- state(ll,0 ,i) = state(ll,hs ,i);
- state(ll,1 ,i) = state(ll,hs ,i);
- state(ll,nz+hs ,i) = state(ll,nz+hs-1,i);
- state(ll,nz+hs+1,i) = state(ll,nz+hs-1,i);
- }
- });
-}
-
-
-void init( real3d &state , real &dt , Fixed_data &fixed_data ) {
- auto &nx = fixed_data.nx ;
- auto &nz = fixed_data.nz ;
- auto &i_beg = fixed_data.i_beg ;
- auto &k_beg = fixed_data.k_beg ;
- auto &left_rank = fixed_data.left_rank ;
- auto &right_rank = fixed_data.right_rank ;
- auto &nranks = fixed_data.nranks ;
- auto &myrank = fixed_data.myrank ;
- auto &mainproc = fixed_data.mainproc ;
- int ierr;
-
- ierr = MPI_Comm_size(MPI_COMM_WORLD,&nranks);
- ierr = MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
- real nper = ( (double) nx_glob ) / nranks;
- i_beg = round( nper* (myrank) );
- int i_end = round( nper*((myrank)+1) )-1;
- nx = i_end - i_beg + 1;
- left_rank = myrank - 1;
- if (left_rank == -1) left_rank = nranks-1;
- right_rank = myrank + 1;
- if (right_rank == nranks) right_rank = 0;
-
- //Vertical direction isn't MPI-ized, so the rank's local values = the global values
- k_beg = 0;
- nz = nz_glob;
- mainproc = (myrank == 0);
-
- //Allocate the model data
- state = real3d( "state" , NUM_VARS,nz+2*hs,nx+2*hs);
-
- //Define the maximum stable time step based on an assumed maximum wind speed
- dt = min(dx,dz) / max_speed * cfl;
-
- //If I'm the main process in MPI, display some grid information
- if (mainproc) {
- printf( "nx_glob, nz_glob: %d %d\n", nx_glob, nz_glob);
- printf( "dx,dz: %lf %lf\n",dx,dz);
- printf( "dt: %lf\n",dt);
- }
- //Want to make sure this info is displayed before further output
- ierr = MPI_Barrier(MPI_COMM_WORLD);
-
- // Define quadrature weights and points
- const int nqpoints = 3;
- SArray qpoints;
- SArray qweights;
-
- qpoints(0) = 0.112701665379258311482073460022;
- qpoints(1) = 0.500000000000000000000000000000;
- qpoints(2) = 0.887298334620741688517926539980;
-
- qweights(0) = 0.277777777777777777777777777779;
- qweights(1) = 0.444444444444444444444444444444;
- qweights(2) = 0.277777777777777777777777777779;
-
- //////////////////////////////////////////////////////////////////////////
- // Initialize the cell-averaged fluid state via Gauss-Legendre quadrature
- //////////////////////////////////////////////////////////////////////////
- // for (k=0; k(nz+2*hs,nx+2*hs) , YAKL_LAMBDA (int k, int i) {
- //Initialize the state to zero
- for (int ll=0; ll(nz,nx) , YAKL_LAMBDA (int k, int i) {
- dens (k,i) = state(ID_DENS,hs+k,hs+i);
- uwnd (k,i) = state(ID_UMOM,hs+k,hs+i) / ( hy_dens_cell(hs+k) + state(ID_DENS,hs+k,hs+i) );
- wwnd (k,i) = state(ID_WMOM,hs+k,hs+i) / ( hy_dens_cell(hs+k) + state(ID_DENS,hs+k,hs+i) );
- theta(k,i) = ( state(ID_RHOT,hs+k,hs+i) + hy_dens_theta_cell(hs+k) ) / ( hy_dens_cell(hs+k) + state(ID_DENS,hs+k,hs+i) ) - hy_dens_theta_cell(hs+k) / hy_dens_cell(hs+k);
- });
- yakl::fence();
-
- //Write the grid data to file with all the processes writing collectively
- st3[0] = num_out; st3[1] = k_beg; st3[2] = i_beg;
- ct3[0] = 1 ; ct3[1] = nz ; ct3[2] = nx ;
- ncwrap( ncmpi_put_vara_double_all( ncid , dens_varid , st3 , ct3 , dens .createHostCopy().data() ) , __LINE__ );
- ncwrap( ncmpi_put_vara_double_all( ncid , uwnd_varid , st3 , ct3 , uwnd .createHostCopy().data() ) , __LINE__ );
- ncwrap( ncmpi_put_vara_double_all( ncid , wwnd_varid , st3 , ct3 , wwnd .createHostCopy().data() ) , __LINE__ );
- ncwrap( ncmpi_put_vara_double_all( ncid , theta_varid , st3 , ct3 , theta.createHostCopy().data() ) , __LINE__ );
-
- //Only the main process needs to write the elapsed time
- //Begin "independent" write mode
- ncwrap( ncmpi_begin_indep_data(ncid) , __LINE__ );
- //write elapsed time to file
- if (mainproc) {
- st1[0] = num_out;
- ct1[0] = 1;
- double etimearr[1];
- etimearr[0] = etime; ncwrap( ncmpi_put_vara_double( ncid , t_varid , st1 , ct1 , etimearr ) , __LINE__ );
- }
- //End "independent" write mode
- ncwrap( ncmpi_end_indep_data(ncid) , __LINE__ );
-
- //Close the file
- ncwrap( ncmpi_close(ncid) , __LINE__ );
-
- //Increment the number of outputs
- num_out = num_out + 1;
-}
-
-
-//Error reporting routine for the PNetCDF I/O
-void ncwrap( int ierr , int line ) {
- if (ierr != NC_NOERR) {
- printf("NetCDF Error at line: %d\n", line);
- printf("%s\n",ncmpi_strerror(ierr));
- exit(-1);
- }
-}
-
-
-void finalize() {
-}
-
-
-//Compute reduced quantities for error checking without resorting to the "ncdiff" tool
-void reductions( realConst3d state, double &mass , double &te , Fixed_data const &fixed_data ) {
- auto &nx = fixed_data.nx ;
- auto &nz = fixed_data.nz ;
- auto &hy_dens_cell = fixed_data.hy_dens_cell ;
- auto &hy_dens_theta_cell = fixed_data.hy_dens_theta_cell;
-
- doub2d mass2d("mass2d",nz,nx);
- doub2d te2d ("te2d ",nz,nx);
-
- // for (k=0; k(nz,nx) , YAKL_LAMBDA (int k, int i) {
- double r = state(ID_DENS,hs+k,hs+i) + hy_dens_cell(hs+k); // Density
- double u = state(ID_UMOM,hs+k,hs+i) / r; // U-wind
- double w = state(ID_WMOM,hs+k,hs+i) / r; // W-wind
- double th = ( state(ID_RHOT,hs+k,hs+i) + hy_dens_theta_cell(hs+k) ) / r; // Potential Temperature (theta)
- double p = C0*pow(r*th,gamm); // Pressure
- double t = th / pow(p0/p,rd/cp); // Temperature
- double ke = r*(u*u+w*w); // Kinetic Energy
- double ie = r*cv*t; // Internal Energy
- mass2d(k,i) = r *dx*dz; // Accumulate domain mass
- te2d (k,i) = (ke + ie)*dx*dz; // Accumulate domain total energy
- });
- mass = yakl::intrinsics::sum( mass2d );
- te = yakl::intrinsics::sum( te2d );
-
- double glob[2], loc[2];
- loc[0] = mass;
- loc[1] = te;
- int ierr = MPI_Allreduce(loc,glob,2,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
- mass = glob[0];
- te = glob[1];
-}
-
-
diff --git a/cpp/miniWeather_mpi.cpp b/cpp/miniWeather_mpi.cpp
index 4d455f5c..d7566e12 100644
--- a/cpp/miniWeather_mpi.cpp
+++ b/cpp/miniWeather_mpi.cpp
@@ -2,473 +2,578 @@
//////////////////////////////////////////////////////////////////////////////////////////
// miniWeather
// Author: Matt Norman , Oak Ridge National Laboratory
-// This code simulates dry, stratified, compressible, non-hydrostatic fluid flows
-// For documentation, please see the attached documentation in the "documentation" folder
+// This code simulates dry, stratified, compressible, non-hydrostatic fluid
+// flows For documentation, please see the attached documentation in the
+// "documentation" folder
//
//////////////////////////////////////////////////////////////////////////////////////////
-#include
-#include
-#include
-#include
-#include "const.h"
#include "pnetcdf.h"
#include
+#include
+#include
+#include
+#include
+#include
+#include
-// We're going to define all arrays on the host because this doesn't use parallel_for
-typedef yakl::Array real1d;
-typedef yakl::Array real2d;
-typedef yakl::Array real3d;
-typedef yakl::Array doub1d;
-typedef yakl::Array doub2d;
-typedef yakl::Array doub3d;
-
-typedef yakl::Array realConst1d;
-typedef yakl::Array realConst2d;
-typedef yakl::Array realConst3d;
-typedef yakl::Array doubConst1d;
-typedef yakl::Array doubConst2d;
-typedef yakl::Array doubConst3d;
+constexpr double pi = 3.14159265358979323846264338327; // Pi
+constexpr double grav = 9.8; // Gravitational acceleration (m / s^2)
+constexpr double cp = 1004.; // Specific heat of dry air at constant pressure
+constexpr double cv = 717.; // Specific heat of dry air at constant volume
+constexpr double rd =
+ 287.; // Dry air constant for equation of state (P=rho*rd*T)
+constexpr double p0 = 1.e5; // Standard pressure at the surface in Pascals
+constexpr double C0 =
+ 27.5629410929725921310572974482; // Constant to translate potential
+ // temperature into pressure
+ // (P=C0*(rho*theta)**gamma)
+constexpr double gamm =
+ 1.40027894002789400278940027894; // gamma=cp/Rd , have to call this gamm
+ // because "gamma" is taken (I hate C so
+ // much)
+
+// Define domain and stability-related constants
+constexpr double xlen = 2.e4; // Length of the domain in the x-direction
+ // (meters)
+constexpr double zlen = 1.e4; // Length of the domain in the z-direction
+ // (meters)
+constexpr double hv_beta =
+ 0.05; // How strong to diffuse the solution: hv_beta \in [0:1]
+constexpr double cfl =
+ 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability)
+constexpr double max_speed =
+ 450; // Assumed maximum wave speed during the simulation (speed of sound +
+ // speed of wind) (meter / sec)
+constexpr int hs =
+ 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a
+ // full "stencil" of information for reconstruction
+constexpr int sten_size = 4; // Size of the stencil used for interpolation
+
+// Parameters for indexing and flags
+constexpr int NUM_VARS = 4; // Number of fluid state variables
+constexpr int ID_DENS = 0; // index for density ("rho")
+constexpr int ID_UMOM = 1; // index for momentum in the x-direction ("rho * u")
+constexpr int ID_WMOM = 2; // index for momentum in the z-direction ("rho * w")
+constexpr int ID_RHOT =
+ 3; // index for density * potential temperature ("rho * theta")
+constexpr int DIR_X =
+ 1; // Integer constant to express that this operation is in the x-direction
+constexpr int DIR_Z =
+ 2; // Integer constant to express that this operation is in the z-direction
+constexpr int DATA_SPEC_COLLISION = 1;
+constexpr int DATA_SPEC_THERMAL = 2;
+constexpr int DATA_SPEC_GRAVITY_WAVES = 3;
+constexpr int DATA_SPEC_DENSITY_CURRENT = 5;
+constexpr int DATA_SPEC_INJECTION = 6;
+
+constexpr int nqpoints = 3;
+constexpr double qpoints[] = {0.112701665379258311482073460022E0,
+ 0.500000000000000000000000000000E0,
+ 0.887298334620741688517926539980E0};
+constexpr double qweights[] = {0.277777777777777777777777777779E0,
+ 0.444444444444444444444444444444E0,
+ 0.277777777777777777777777777779E0};
///////////////////////////////////////////////////////////////////////////////////////
-// Variables that are initialized but remain static over the course of the simulation
+// BEGIN USER-CONFIGURABLE PARAMETERS
+///////////////////////////////////////////////////////////////////////////////////////
+// The x-direction length is twice as long as the z-direction length
+// So, you'll want to have nx_glob be twice as large as nz_glob
+int constexpr nx_glob = _NX; // Number of total cells in the x-direction
+int constexpr nz_glob = _NZ; // Number of total cells in the z-direction
+double constexpr sim_time = _SIM_TIME; // How many seconds to run the simulation
+double constexpr output_freq =
+ _OUT_FREQ; // How frequently to output data to file (in seconds)
+int constexpr data_spec_int = _DATA_SPEC; // How to initialize the data
+double constexpr dx = xlen / nx_glob; // grid spacing in the x-direction
+double constexpr dz = zlen / nz_glob; // grid spacing in the x-direction
+///////////////////////////////////////////////////////////////////////////////////////
+// END USER-CONFIGURABLE PARAMETERS
///////////////////////////////////////////////////////////////////////////////////////
-struct Fixed_data {
- int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task
- int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task
- int nranks, myrank; //Number of MPI ranks and my rank id
- int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain
- int mainproc; //Am I the main process (rank == 0)?
- realConst1d hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs)
- realConst1d hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs)
- realConst1d hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1)
- realConst1d hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1)
- realConst1d hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1)
-};
-//Declaring the functions defined after "main"
-void init ( real3d &state , real &dt , Fixed_data &fixed_data );
-void finalize ( );
-void injection ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht );
-void density_current ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht );
-void gravity_waves ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht );
-void thermal ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht );
-void collision ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht );
-void hydro_const_theta ( real z , real &r , real &t );
-void hydro_const_bvfreq ( real z , real bv_freq0 , real &r , real &t );
-real sample_ellipse_cosine( real x , real z , real amp , real x0 , real z0 , real xrad , real zrad );
-void output ( realConst3d state , real etime , int &num_out , Fixed_data const &fixed_data );
-void ncwrap ( int ierr , int line );
-void perform_timestep ( real3d const &state , real dt , int &direction_switch , Fixed_data const &fixed_data );
-void semi_discrete_step ( realConst3d state_init , real3d const &state_forcing , real3d const &state_out , real dt , int dir , Fixed_data const &fixed_data );
-void compute_tendencies_x ( realConst3d state , real3d const &tend , real dt , Fixed_data const &fixed_data );
-void compute_tendencies_z ( realConst3d state , real3d const &tend , real dt , Fixed_data const &fixed_data );
-void set_halo_values_x ( real3d const &state , Fixed_data const &fixed_data );
-void set_halo_values_z ( real3d const &state , Fixed_data const &fixed_data );
-void reductions ( realConst3d state , double &mass , double &te , Fixed_data const &fixed_data );
+///////////////////////////////////////////////////////////////////////////////////////
+// Variables that are initialized but remain static over the course of the
+// simulation
+///////////////////////////////////////////////////////////////////////////////////////
+double dt; // Model time step (seconds)
+int nx, nz; // Number of local grid cells in the x- and z- dimensions for this
+ // MPI task
+int i_beg, k_beg; // beginning index in the x- and z-directions for this MPI
+ // task
+int nranks, myrank; // Number of MPI ranks and my rank id
+int left_rank, right_rank; // MPI Rank IDs that exist to my left and right in
+ // the global domain
+int mainproc; // Am I the main process (rank == 0)?
+double *hy_dens_cell; // hydrostatic density (vert cell avgs). Dimensions:
+ // (1-hs:nz+hs)
+double *hy_dens_theta_cell; // hydrostatic rho*t (vert cell avgs). Dimensions:
+ // (1-hs:nz+hs)
+double *
+ hy_dens_int; // hydrostatic density (vert cell interf). Dimensions: (1:nz+1)
+double *hy_dens_theta_int; // hydrostatic rho*t (vert cell interf). Dimensions:
+ // (1:nz+1)
+double *hy_pressure_int; // hydrostatic press (vert cell interf). Dimensions:
+ // (1:nz+1)
+///////////////////////////////////////////////////////////////////////////////////////
+// Variables that are dynamics over the course of the simulation
+///////////////////////////////////////////////////////////////////////////////////////
+double etime; // Elapsed model time
+double output_counter; // Helps determine when it's time to do output
+// Runtime variable arrays
+double *state; // Fluid state. Dimensions:
+ // (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS)
+double *state_tmp; // Fluid state. Dimensions:
+ // (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS)
+double *flux; // Cell interface fluxes. Dimensions: (nx+1,nz+1,NUM_VARS)
+double *tend; // Fluid state tendencies. Dimensions: (nx,nz,NUM_VARS)
+double *sendbuf_l; // Buffer to send data to the left MPI rank
+double *sendbuf_r; // Buffer to send data to the right MPI rank
+double *recvbuf_l; // Buffer to receive data from the left MPI rank
+double *recvbuf_r; // Buffer to receive data from the right MPI rank
+int num_out = 0; // The number of outputs performed so far
+int direction_switch = 1;
+double mass0, te0; // Initial domain totals for mass and total energy
+double mass, te; // Domain totals for mass and total energy
+
+// How is this not in the standard?!
+double dmin(double a, double b) {
+ if (a < b) {
+ return a;
+ } else {
+ return b;
+ }
+};
+
+// Declaring the functions defined after "main"
+void init(int *argc, char ***argv);
+void finalize();
+void injection(double x, double z, double &r, double &u, double &w, double &t,
+ double &hr, double &ht);
+void density_current(double x, double z, double &r, double &u, double &w,
+ double &t, double &hr, double &ht);
+void gravity_waves(double x, double z, double &r, double &u, double &w,
+ double &t, double &hr, double &ht);
+void thermal(double x, double z, double &r, double &u, double &w, double &t,
+ double &hr, double &ht);
+void collision(double x, double z, double &r, double &u, double &w, double &t,
+ double &hr, double &ht);
+void hydro_const_theta(double z, double &r, double &t);
+void hydro_const_bvfreq(double z, double bv_freq0, double &r, double &t);
+double sample_ellipse_cosine(double x, double z, double amp, double x0,
+ double z0, double xrad, double zrad);
+void output(double *state, double etime);
+void ncwrap(int ierr, int line);
+void perform_timestep(double *state, double *state_tmp, double *flux,
+ double *tend, double dt);
+void semi_discrete_step(double *state_init, double *state_forcing,
+ double *state_out, double dt, int dir, double *flux,
+ double *tend);
+void compute_tendencies_x(double *state, double *flux, double *tend, double dt);
+void compute_tendencies_z(double *state, double *flux, double *tend, double dt);
+void set_halo_values_x(double *state);
+void set_halo_values_z(double *state);
+void reductions(double &mass, double &te);
///////////////////////////////////////////////////////////////////////////////////////
// THE MAIN PROGRAM STARTS HERE
///////////////////////////////////////////////////////////////////////////////////////
int main(int argc, char **argv) {
- MPI_Init(&argc,&argv);
- yakl::init();
- {
- Fixed_data fixed_data;
- real3d state;
- real dt; //Model time step (seconds)
-
- // init allocates state
- init( state , dt , fixed_data );
- auto &mainproc = fixed_data.mainproc;
+ init(&argc, &argv);
- //Initial reductions for mass, kinetic energy, and total energy
- double mass0, te0;
- reductions(state,mass0,te0,fixed_data);
+ // Initial reductions for mass, kinetic energy, and total energy
+ reductions(mass0, te0);
- int num_out = 0; //The number of outputs performed so far
- real output_counter = 0; //Helps determine when it's time to do output
- real etime = 0;
+ // Output the initial state
+ if (output_freq >= 0)
+ output(state, etime);
- //Output the initial state
- if (output_freq >= 0) {
- output(state,etime,num_out,fixed_data);
+ ////////////////////////////////////////////////////
+ // MAIN TIME STEP LOOP
+ ////////////////////////////////////////////////////
+ auto t1 = std::chrono::steady_clock::now();
+ while (etime < sim_time) {
+ // If the time step leads to exceeding the simulation time, shorten it for
+ // the last step
+ if (etime + dt > sim_time) {
+ dt = sim_time - etime;
}
-
- int direction_switch = 1; // Tells dimensionally split which order to take x,z solves
-
- ////////////////////////////////////////////////////
- // MAIN TIME STEP LOOP
- ////////////////////////////////////////////////////
- auto t1 = std::chrono::steady_clock::now();
- while (etime < sim_time) {
- //If the time step leads to exceeding the simulation time, shorten it for the last step
- if (etime + dt > sim_time) { dt = sim_time - etime; }
- //Perform a single time step
- perform_timestep(state,dt,direction_switch,fixed_data);
- //Inform the user
- #ifndef NO_INFORM
- if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); }
- #endif
- //Update the elapsed time and output counter
- etime = etime + dt;
- output_counter = output_counter + dt;
- //If it's time for output, reset the counter, and do output
- if (output_freq >= 0 && output_counter >= output_freq) {
- output_counter = output_counter - output_freq;
- output(state,etime,num_out,fixed_data);
- }
- }
- auto t2 = std::chrono::steady_clock::now();
+ // Perform a single time step
+ perform_timestep(state, state_tmp, flux, tend, dt);
+ // Inform the user
+#ifndef NO_INFORM
if (mainproc) {
- std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n";
+ printf("Elapsed Time: %lf / %lf\n", etime, sim_time);
}
-
- //Final reductions for mass, kinetic energy, and total energy
- double mass, te;
- reductions(state,mass,te,fixed_data);
-
- if (mainproc) {
- printf( "d_mass: %le\n" , (mass - mass0)/mass0 );
- printf( "d_te: %le\n" , (te - te0 )/te0 );
+#endif
+ // Update the elapsed time and output counter
+ etime = etime + dt;
+ output_counter = output_counter + dt;
+ // If it's time for output, reset the counter, and do output
+ if (output_freq >= 0 && output_counter >= output_freq) {
+ output_counter = output_counter - output_freq;
+ output(state, etime);
}
-
- finalize();
}
- yakl::finalize();
- MPI_Finalize();
-}
+ auto t2 = std::chrono::steady_clock::now();
+ if (mainproc) {
+ std::cout << "CPU Time: " << std::chrono::duration(t2 - t1).count()
+ << " sec\n";
+ }
+ // Final reductions for mass, kinetic energy, and total energy
+ reductions(mass, te);
-//Performs a single dimensionally split time step using a simple low-storage three-stage Runge-Kutta time integrator
-//The dimensional splitting is a second-order-accurate alternating Strang splitting in which the
-//order of directions is alternated each time step.
-//The Runge-Kutta method used here is defined as follows:
-// q* = q_n + dt/3 * rhs(q_n)
-// q** = q_n + dt/2 * rhs(q* )
-// q_n+1 = q_n + dt/1 * rhs(q**)
-void perform_timestep( real3d const &state , real dt , int &direction_switch , Fixed_data const &fixed_data ) {
- auto &nx = fixed_data.nx ;
- auto &nz = fixed_data.nz ;
+ if (mainproc) {
+ printf("d_mass: %le\n", (mass - mass0) / mass0);
+ printf("d_te: %le\n", (te - te0) / te0);
+ }
- real3d state_tmp("state_tmp",NUM_VARS,nz+2*hs,nx+2*hs);
+ finalize();
+}
+// Performs a single dimensionally split time step using a simple low-storage
+// three-stage Runge-Kutta time integrator The dimensional splitting is a
+// second-order-accurate alternating Strang splitting in which the order of
+// directions is alternated each time step. The Runge-Kutta method used here is
+// defined as follows:
+// q* = q[n] + dt/3 * rhs(q[n])
+// q** = q[n] + dt/2 * rhs(q* )
+// q[n+1] = q[n] + dt/1 * rhs(q** )
+void perform_timestep(double *state, double *state_tmp, double *flux,
+ double *tend, double dt) {
+ if (direction_switch) {
+ // x-direction first
+ semi_discrete_step(state, state, state_tmp, dt / 3, DIR_X, flux, tend);
+ semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_X, flux, tend);
+ semi_discrete_step(state, state_tmp, state, dt / 1, DIR_X, flux, tend);
+ // z-direction second
+ semi_discrete_step(state, state, state_tmp, dt / 3, DIR_Z, flux, tend);
+ semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_Z, flux, tend);
+ semi_discrete_step(state, state_tmp, state, dt / 1, DIR_Z, flux, tend);
+ } else {
+ // z-direction second
+ semi_discrete_step(state, state, state_tmp, dt / 3, DIR_Z, flux, tend);
+ semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_Z, flux, tend);
+ semi_discrete_step(state, state_tmp, state, dt / 1, DIR_Z, flux, tend);
+ // x-direction first
+ semi_discrete_step(state, state, state_tmp, dt / 3, DIR_X, flux, tend);
+ semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_X, flux, tend);
+ semi_discrete_step(state, state_tmp, state, dt / 1, DIR_X, flux, tend);
+ }
if (direction_switch) {
- //x-direction first
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , fixed_data );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , fixed_data );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , fixed_data );
- //z-direction second
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , fixed_data );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , fixed_data );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , fixed_data );
+ direction_switch = 0;
} else {
- //z-direction second
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , fixed_data );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , fixed_data );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , fixed_data );
- //x-direction first
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , fixed_data );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , fixed_data );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , fixed_data );
+ direction_switch = 1;
}
- if (direction_switch) { direction_switch = 0; } else { direction_switch = 1; }
}
-
-//Perform a single semi-discretized step in time with the form:
-//state_out = state_init + dt * rhs(state_forcing)
-//Meaning the step starts from state_init, computes the rhs using state_forcing, and stores the result in state_out
-void semi_discrete_step( realConst3d state_init , real3d const &state_forcing , real3d const &state_out , real dt , int dir , Fixed_data const &fixed_data ) {
- auto &nx = fixed_data.nx ;
- auto &nz = fixed_data.nz ;
- auto &i_beg = fixed_data.i_beg ;
- auto &k_beg = fixed_data.k_beg ;
- auto &hy_dens_cell = fixed_data.hy_dens_cell ;
-
- real3d tend("tend",NUM_VARS,nz,nx);
-
- if (dir == DIR_X) {
- //Set the halo values for this MPI task's fluid state in the x-direction
- yakl::timer_start("halo x");
- set_halo_values_x(state_forcing,fixed_data);
- yakl::timer_stop("halo x");
- //Compute the time tendencies for the fluid state in the x-direction
- yakl::timer_start("tendencies x");
- compute_tendencies_x(state_forcing,tend,dt,fixed_data);
- yakl::timer_stop("tendencies x");
+// Perform a single semi-discretized step in time with the form:
+// state_out = state_init + dt * rhs(state_forcing)
+// Meaning the step starts from state_init, computes the rhs using
+// state_forcing, and stores the result in state_out
+void semi_discrete_step(double *state_init, double *state_forcing,
+ double *state_out, double dt, int dir, double *flux,
+ double *tend) {
+ int i, k, ll, inds, indt, indw;
+ double x, z, wpert, dist, x0, z0, xrad, zrad, amp;
+ if (dir == DIR_X) {
+ // Set the halo values for this MPI task's fluid state in the x-direction
+ set_halo_values_x(state_forcing);
+ // Compute the time tendencies for the fluid state in the x-direction
+ compute_tendencies_x(state_forcing, flux, tend, dt);
} else if (dir == DIR_Z) {
- //Set the halo values for this MPI task's fluid state in the z-direction
- yakl::timer_start("halo z");
- set_halo_values_z(state_forcing,fixed_data);
- yakl::timer_stop("halo z");
- //Compute the time tendencies for the fluid state in the z-direction
- yakl::timer_start("tendencies z");
- compute_tendencies_z(state_forcing,tend,dt,fixed_data);
- yakl::timer_stop("tendencies z");
+ // Set the halo values for this MPI task's fluid state in the z-direction
+ set_halo_values_z(state_forcing);
+ // Compute the time tendencies for the fluid state in the z-direction
+ compute_tendencies_z(state_forcing, flux, tend, dt);
}
/////////////////////////////////////////////////
- // TODO: MAKE THESE 3 LOOPS A PARALLEL_FOR
+ // TODO: THREAD ME
/////////////////////////////////////////////////
- //Apply the tendencies to the fluid state
- yakl::timer_start("apply tendencies");
- for (int ll=0; ll stencil;
- SArray d3_vals;
- SArray vals;
- //Use fourth-order interpolation from four cell averages to compute the value at the interface in question
- for (int ll=0; ll stencil;
- SArray d3_vals;
- SArray vals;
- //Use fourth-order interpolation from four cell averages to compute the value at the interface in question
- for (int ll=0; ll qpoints;
- SArray qweights;
-
- qpoints(0) = 0.112701665379258311482073460022;
- qpoints(1) = 0.500000000000000000000000000000;
- qpoints(2) = 0.887298334620741688517926539980;
-
- qweights(0) = 0.277777777777777777777777777779;
- qweights(1) = 0.444444444444444444444444444444;
- qweights(2) = 0.277777777777777777777777777779;
-
//////////////////////////////////////////////////////////////////////////
// Initialize the cell-averaged fluid state via Gauss-Legendre quadrature
//////////////////////////////////////////////////////////////////////////
- /////////////////////////////////////////////////
- // TODO: MAKE THESE 2 LOOPS A PARALLEL_FOR
- /////////////////////////////////////////////////
- for (int k=0; k , Oak Ridge National Laboratory
+// This code simulates dry, stratified, compressible, non-hydrostatic fluid
+// flows For documentation, please see the attached documentation in the
+// "documentation" folder
+//
+//////////////////////////////////////////////////////////////////////////////////////////
+
+#include "pnetcdf.h"
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+constexpr double pi = 3.14159265358979323846264338327; // Pi
+constexpr double grav = 9.8; // Gravitational acceleration (m / s^2)
+constexpr double cp = 1004.; // Specific heat of dry air at constant pressure
+constexpr double cv = 717.; // Specific heat of dry air at constant volume
+constexpr double rd =
+ 287.; // Dry air constant for equation of state (P=rho*rd*T)
+constexpr double p0 = 1.e5; // Standard pressure at the surface in Pascals
+constexpr double C0 =
+ 27.5629410929725921310572974482; // Constant to translate potential
+ // temperature into pressure
+ // (P=C0*(rho*theta)**gamma)
+constexpr double gamm =
+ 1.40027894002789400278940027894; // gamma=cp/Rd , have to call this gamm
+ // because "gamma" is taken (I hate C so
+ // much)
+
+// Define domain and stability-related constants
+constexpr double xlen = 2.e4; // Length of the domain in the x-direction
+ // (meters)
+constexpr double zlen = 1.e4; // Length of the domain in the z-direction
+ // (meters)
+constexpr double hv_beta =
+ 0.05; // How strong to diffuse the solution: hv_beta \in [0:1]
+constexpr double cfl =
+ 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability)
+constexpr double max_speed =
+ 450; // Assumed maximum wave speed during the simulation (speed of sound +
+ // speed of wind) (meter / sec)
+constexpr int hs =
+ 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a
+ // full "stencil" of information for reconstruction
+constexpr int sten_size = 4; // Size of the stencil used for interpolation
+
+// Parameters for indexing and flags
+constexpr int NUM_VARS = 4; // Number of fluid state variables
+constexpr int ID_DENS = 0; // index for density ("rho")
+constexpr int ID_UMOM = 1; // index for momentum in the x-direction ("rho * u")
+constexpr int ID_WMOM = 2; // index for momentum in the z-direction ("rho * w")
+constexpr int ID_RHOT =
+ 3; // index for density * potential temperature ("rho * theta")
+constexpr int DIR_X =
+ 1; // Integer constant to express that this operation is in the x-direction
+constexpr int DIR_Z =
+ 2; // Integer constant to express that this operation is in the z-direction
+constexpr int DATA_SPEC_COLLISION = 1;
+constexpr int DATA_SPEC_THERMAL = 2;
+constexpr int DATA_SPEC_GRAVITY_WAVES = 3;
+constexpr int DATA_SPEC_DENSITY_CURRENT = 5;
+constexpr int DATA_SPEC_INJECTION = 6;
+
+constexpr int nqpoints = 3;
+constexpr double qpoints[] = {0.112701665379258311482073460022E0,
+ 0.500000000000000000000000000000E0,
+ 0.887298334620741688517926539980E0};
+constexpr double qweights[] = {0.277777777777777777777777777779E0,
+ 0.444444444444444444444444444444E0,
+ 0.277777777777777777777777777779E0};
+
+///////////////////////////////////////////////////////////////////////////////////////
+// BEGIN USER-CONFIGURABLE PARAMETERS
+///////////////////////////////////////////////////////////////////////////////////////
+// The x-direction length is twice as long as the z-direction length
+// So, you'll want to have nx_glob be twice as large as nz_glob
+int constexpr nx_glob = _NX; // Number of total cells in the x-direction
+int constexpr nz_glob = _NZ; // Number of total cells in the z-direction
+double constexpr sim_time = _SIM_TIME; // How many seconds to run the simulation
+double constexpr output_freq =
+ _OUT_FREQ; // How frequently to output data to file (in seconds)
+int constexpr data_spec_int = _DATA_SPEC; // How to initialize the data
+double constexpr dx = xlen / nx_glob; // grid spacing in the x-direction
+double constexpr dz = zlen / nz_glob; // grid spacing in the x-direction
+///////////////////////////////////////////////////////////////////////////////////////
+// END USER-CONFIGURABLE PARAMETERS
+///////////////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////////////
+// Variables that are initialized but remain static over the course of the
+// simulation
+///////////////////////////////////////////////////////////////////////////////////////
+double dt; // Model time step (seconds)
+int nx, nz; // Number of local grid cells in the x- and z- dimensions for this
+ // MPI task
+int i_beg, k_beg; // beginning index in the x- and z-directions for this MPI
+ // task
+int nranks, myrank; // Number of MPI ranks and my rank id
+int left_rank, right_rank; // MPI Rank IDs that exist to my left and right in
+ // the global domain
+int mainproc; // Am I the main process (rank == 0)?
+double *hy_dens_cell; // hydrostatic density (vert cell avgs). Dimensions:
+ // (1-hs:nz+hs)
+double *hy_dens_theta_cell; // hydrostatic rho*t (vert cell avgs). Dimensions:
+ // (1-hs:nz+hs)
+double *
+ hy_dens_int; // hydrostatic density (vert cell interf). Dimensions: (1:nz+1)
+double *hy_dens_theta_int; // hydrostatic rho*t (vert cell interf). Dimensions:
+ // (1:nz+1)
+double *hy_pressure_int; // hydrostatic press (vert cell interf). Dimensions:
+ // (1:nz+1)
+
+///////////////////////////////////////////////////////////////////////////////////////
+// Variables that are dynamics over the course of the simulation
+///////////////////////////////////////////////////////////////////////////////////////
+double etime; // Elapsed model time
+double output_counter; // Helps determine when it's time to do output
+// Runtime variable arrays
+double *state; // Fluid state. Dimensions:
+ // (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS)
+double *state_tmp; // Fluid state. Dimensions:
+ // (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS)
+double *flux; // Cell interface fluxes. Dimensions: (nx+1,nz+1,NUM_VARS)
+double *tend; // Fluid state tendencies. Dimensions: (nx,nz,NUM_VARS)
+double *sendbuf_l; // Buffer to send data to the left MPI rank
+double *sendbuf_r; // Buffer to send data to the right MPI rank
+double *recvbuf_l; // Buffer to receive data from the left MPI rank
+double *recvbuf_r; // Buffer to receive data from the right MPI rank
+int num_out = 0; // The number of outputs performed so far
+int direction_switch = 1;
+double mass0, te0; // Initial domain totals for mass and total energy
+double mass, te; // Domain totals for mass and total energy
+
+// How is this not in the standard?!
+double dmin(double a, double b) {
+ if (a < b) {
+ return a;
+ } else {
+ return b;
+ }
+};
+
+// Declaring the functions defined after "main"
+void init(int *argc, char ***argv);
+void finalize();
+void injection(double x, double z, double &r, double &u, double &w, double &t,
+ double &hr, double &ht);
+void density_current(double x, double z, double &r, double &u, double &w,
+ double &t, double &hr, double &ht);
+void gravity_waves(double x, double z, double &r, double &u, double &w,
+ double &t, double &hr, double &ht);
+void thermal(double x, double z, double &r, double &u, double &w, double &t,
+ double &hr, double &ht);
+void collision(double x, double z, double &r, double &u, double &w, double &t,
+ double &hr, double &ht);
+void hydro_const_theta(double z, double &r, double &t);
+void hydro_const_bvfreq(double z, double bv_freq0, double &r, double &t);
+double sample_ellipse_cosine(double x, double z, double amp, double x0,
+ double z0, double xrad, double zrad);
+void output(double *state, double etime);
+void ncwrap(int ierr, int line);
+void perform_timestep(double *state, double *state_tmp, double *flux,
+ double *tend, double dt);
+void semi_discrete_step(double *state_init, double *state_forcing,
+ double *state_out, double dt, int dir, double *flux,
+ double *tend);
+void compute_tendencies_x(double *state, double *flux, double *tend, double dt);
+void compute_tendencies_z(double *state, double *flux, double *tend, double dt);
+void set_halo_values_x(double *state);
+void set_halo_values_z(double *state);
+void reductions(double &mass, double &te);
+
+///////////////////////////////////////////////////////////////////////////////////////
+// THE MAIN PROGRAM STARTS HERE
+///////////////////////////////////////////////////////////////////////////////////////
+int main(int argc, char **argv) {
+
+ init(&argc, &argv);
+
+#pragma acc data copyin( \
+ state_tmp[0 : (nz + 2 * hs) * (nx + 2 * hs) * NUM_VARS], \
+ hy_dens_cell[0 : nz + 2 * hs], hy_dens_theta_cell[0 : nz + 2 * hs], \
+ hy_dens_int[0 : nz + 1], hy_dens_theta_int[0 : nz + 1], \
+ hy_pressure_int[0 : nz + 1]) \
+ create(flux[0 : (nz + 1) * (nx + 1) * NUM_VARS], \
+ tend[0 : nz * nx * NUM_VARS], sendbuf_l[0 : hs * nz * NUM_VARS], \
+ sendbuf_r[0 : hs * nz * NUM_VARS], \
+ recvbuf_l[0 : hs * nz * NUM_VARS], \
+ recvbuf_r[0 : hs * nz * NUM_VARS]) \
+ copy(state[0 : (nz + 2 * hs) * (nx + 2 * hs) * NUM_VARS])
+ {
+
+ // Initial reductions for mass, kinetic energy, and total energy
+ reductions(mass0, te0);
+
+ // Output the initial state
+ if (output_freq >= 0)
+ output(state, etime);
+
+ ////////////////////////////////////////////////////
+ // MAIN TIME STEP LOOP
+ ////////////////////////////////////////////////////
+#pragma acc wait
+ auto t1 = std::chrono::steady_clock::now();
+ while (etime < sim_time) {
+ // If the time step leads to exceeding the simulation time, shorten it for
+ // the last step
+ if (etime + dt > sim_time) {
+ dt = sim_time - etime;
+ }
+ // Perform a single time step
+ perform_timestep(state, state_tmp, flux, tend, dt);
+ // Inform the user
+#ifndef NO_INFORM
+ if (mainproc) {
+ printf("Elapsed Time: %lf / %lf\n", etime, sim_time);
+ }
+#endif
+ // Update the elapsed time and output counter
+ etime = etime + dt;
+ output_counter = output_counter + dt;
+ // If it's time for output, reset the counter, and do output
+ if (output_freq >= 0 && output_counter >= output_freq) {
+ output_counter = output_counter - output_freq;
+ output(state, etime);
+ }
+ }
+#pragma acc wait
+ auto t2 = std::chrono::steady_clock::now();
+ if (mainproc) {
+ std::cout << "CPU Time: "
+ << std::chrono::duration(t2 - t1).count() << " sec\n";
+ }
+
+ // Final reductions for mass, kinetic energy, and total energy
+ reductions(mass, te);
+ }
+
+ if (mainproc) {
+ printf("d_mass: %le\n", (mass - mass0) / mass0);
+ printf("d_te: %le\n", (te - te0) / te0);
+ }
+
+ finalize();
+}
+
+// Performs a single dimensionally split time step using a simple low-storage
+// three-stage Runge-Kutta time integrator The dimensional splitting is a
+// second-order-accurate alternating Strang splitting in which the order of
+// directions is alternated each time step. The Runge-Kutta method used here is
+// defined as follows:
+// q* = q[n] + dt/3 * rhs(q[n])
+// q** = q[n] + dt/2 * rhs(q* )
+// q[n+1] = q[n] + dt/1 * rhs(q** )
+void perform_timestep(double *state, double *state_tmp, double *flux,
+ double *tend, double dt) {
+ if (direction_switch) {
+ // x-direction first
+ semi_discrete_step(state, state, state_tmp, dt / 3, DIR_X, flux, tend);
+ semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_X, flux, tend);
+ semi_discrete_step(state, state_tmp, state, dt / 1, DIR_X, flux, tend);
+ // z-direction second
+ semi_discrete_step(state, state, state_tmp, dt / 3, DIR_Z, flux, tend);
+ semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_Z, flux, tend);
+ semi_discrete_step(state, state_tmp, state, dt / 1, DIR_Z, flux, tend);
+ } else {
+ // z-direction second
+ semi_discrete_step(state, state, state_tmp, dt / 3, DIR_Z, flux, tend);
+ semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_Z, flux, tend);
+ semi_discrete_step(state, state_tmp, state, dt / 1, DIR_Z, flux, tend);
+ // x-direction first
+ semi_discrete_step(state, state, state_tmp, dt / 3, DIR_X, flux, tend);
+ semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_X, flux, tend);
+ semi_discrete_step(state, state_tmp, state, dt / 1, DIR_X, flux, tend);
+ }
+ if (direction_switch) {
+ direction_switch = 0;
+ } else {
+ direction_switch = 1;
+ }
+}
+
+// Perform a single semi-discretized step in time with the form:
+// state_out = state_init + dt * rhs(state_forcing)
+// Meaning the step starts from state_init, computes the rhs using
+// state_forcing, and stores the result in state_out
+void semi_discrete_step(double *state_init, double *state_forcing,
+ double *state_out, double dt, int dir, double *flux,
+ double *tend) {
+ int i, k, ll, inds, indt, indw;
+ double x, z, wpert, dist, x0, z0, xrad, zrad, amp;
+ if (dir == DIR_X) {
+ // Set the halo values for this MPI task's fluid state in the x-direction
+ set_halo_values_x(state_forcing);
+ // Compute the time tendencies for the fluid state in the x-direction
+ compute_tendencies_x(state_forcing, flux, tend, dt);
+ } else if (dir == DIR_Z) {
+ // Set the halo values for this MPI task's fluid state in the z-direction
+ set_halo_values_z(state_forcing);
+ // Compute the time tendencies for the fluid state in the z-direction
+ compute_tendencies_z(state_forcing, flux, tend, dt);
+ }
+
+ // Apply the tendencies to the fluid state
+#pragma acc parallel loop collapse(3) default(present) async
+ for (ll = 0; ll < NUM_VARS; ll++) {
+ for (k = 0; k < nz; k++) {
+ for (i = 0; i < nx; i++) {
+ if (data_spec_int == DATA_SPEC_GRAVITY_WAVES) {
+ x = (i_beg + i + 0.5) * dx;
+ z = (k_beg + k + 0.5) * dz;
+ // Using sample_ellipse_cosine requires "acc routine" in OpenACC and
+ // "declare target" in OpenMP offload Neither of these are
+ // particularly well supported. So I'm manually inlining here wpert =
+ // sample_ellipse_cosine( x,z , 0.01 , xlen/8,1000., 500.,500. );
+ {
+ x0 = xlen / 8;
+ z0 = 1000;
+ xrad = 500;
+ zrad = 500;
+ amp = 0.01;
+ // Compute distance from bubble center
+ dist = sqrt(((x - x0) / xrad) * ((x - x0) / xrad) +
+ ((z - z0) / zrad) * ((z - z0) / zrad)) *
+ pi / 2.;
+ // If the distance from bubble center is less than the radius,
+ // create a cos**2 profile
+ if (dist <= pi / 2.) {
+ wpert = amp * pow(cos(dist), 2.);
+ } else {
+ wpert = 0.;
+ }
+ }
+ indw = ID_WMOM * nz * nx + k * nx + i;
+ tend[indw] += wpert * hy_dens_cell[hs + k];
+ }
+ inds = ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) +
+ i + hs;
+ indt = ll * nz * nx + k * nx + i;
+ state_out[inds] = state_init[inds] + dt * tend[indt];
+ }
+ }
+ }
+}
+
+// Compute the time tendencies of the fluid state using forcing in the
+// x-direction Since the halos are set in a separate routine, this will not
+// require MPI First, compute the flux vector at each cell interface in the
+// x-direction (including hyperviscosity) Then, compute the tendencies using
+// those fluxes
+void compute_tendencies_x(double *state, double *flux, double *tend,
+ double dt) {
+ int i, k, ll, s, inds, indf1, indf2, indt;
+ double r, u, w, t, p, stencil[4], d3_vals[NUM_VARS], vals[NUM_VARS], hv_coef;
+ // Compute the hyperviscosity coefficient
+ hv_coef = -hv_beta * dx / (16 * dt);
+ // Compute fluxes in the x-direction for each cell
+#pragma acc parallel loop collapse(2) private(stencil, vals, \
+ d3_vals) default(present) async
+ for (k = 0; k < nz; k++) {
+ for (i = 0; i < nx + 1; i++) {
+ // Use fourth-order interpolation from four cell averages to compute the
+ // value at the interface in question
+ for (ll = 0; ll < NUM_VARS; ll++) {
+ for (s = 0; s < sten_size; s++) {
+ inds = ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) +
+ i + s;
+ stencil[s] = state[inds];
+ }
+ // Fourth-order-accurate interpolation of the state
+ vals[ll] = -stencil[0] / 12 + 7 * stencil[1] / 12 +
+ 7 * stencil[2] / 12 - stencil[3] / 12;
+ // First-order-accurate interpolation of the third spatial derivative of
+ // the state (for artificial viscosity)
+ d3_vals[ll] =
+ -stencil[0] + 3 * stencil[1] - 3 * stencil[2] + stencil[3];
+ }
+
+ // Compute density, u-wind, w-wind, potential temperature, and pressure
+ // (r,u,w,t,p respectively)
+ r = vals[ID_DENS] + hy_dens_cell[k + hs];
+ u = vals[ID_UMOM] / r;
+ w = vals[ID_WMOM] / r;
+ t = (vals[ID_RHOT] + hy_dens_theta_cell[k + hs]) / r;
+ p = C0 * pow((r * t), gamm);
+
+ // Compute the flux vector
+ flux[ID_DENS * (nz + 1) * (nx + 1) + k * (nx + 1) + i] =
+ r * u - hv_coef * d3_vals[ID_DENS];
+ flux[ID_UMOM * (nz + 1) * (nx + 1) + k * (nx + 1) + i] =
+ r * u * u + p - hv_coef * d3_vals[ID_UMOM];
+ flux[ID_WMOM * (nz + 1) * (nx + 1) + k * (nx + 1) + i] =
+ r * u * w - hv_coef * d3_vals[ID_WMOM];
+ flux[ID_RHOT * (nz + 1) * (nx + 1) + k * (nx + 1) + i] =
+ r * u * t - hv_coef * d3_vals[ID_RHOT];
+ }
+ }
+
+ // Use the fluxes to compute tendencies for each cell
+#pragma acc parallel loop collapse(3) default(present) async
+ for (ll = 0; ll < NUM_VARS; ll++) {
+ for (k = 0; k < nz; k++) {
+ for (i = 0; i < nx; i++) {
+ indt = ll * nz * nx + k * nx + i;
+ indf1 = ll * (nz + 1) * (nx + 1) + k * (nx + 1) + i;
+ indf2 = ll * (nz + 1) * (nx + 1) + k * (nx + 1) + i + 1;
+ tend[indt] = -(flux[indf2] - flux[indf1]) / dx;
+ }
+ }
+ }
+}
+
+// Compute the time tendencies of the fluid state using forcing in the
+// z-direction Since the halos are set in a separate routine, this will not
+// require MPI First, compute the flux vector at each cell interface in the
+// z-direction (including hyperviscosity) Then, compute the tendencies using
+// those fluxes
+void compute_tendencies_z(double *state, double *flux, double *tend,
+ double dt) {
+ int i, k, ll, s, inds, indf1, indf2, indt;
+ double r, u, w, t, p, stencil[4], d3_vals[NUM_VARS], vals[NUM_VARS], hv_coef;
+ // Compute the hyperviscosity coefficient
+ hv_coef = -hv_beta * dz / (16 * dt);
+ // Compute fluxes in the x-direction for each cell
+#pragma acc parallel loop collapse(2) private(stencil, vals, \
+ d3_vals) default(present) async
+ for (k = 0; k < nz + 1; k++) {
+ for (i = 0; i < nx; i++) {
+ // Use fourth-order interpolation from four cell averages to compute the
+ // value at the interface in question
+ for (ll = 0; ll < NUM_VARS; ll++) {
+ for (s = 0; s < sten_size; s++) {
+ inds = ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + s) * (nx + 2 * hs) +
+ i + hs;
+ stencil[s] = state[inds];
+ }
+ // Fourth-order-accurate interpolation of the state
+ vals[ll] = -stencil[0] / 12 + 7 * stencil[1] / 12 +
+ 7 * stencil[2] / 12 - stencil[3] / 12;
+ // First-order-accurate interpolation of the third spatial derivative of
+ // the state
+ d3_vals[ll] =
+ -stencil[0] + 3 * stencil[1] - 3 * stencil[2] + stencil[3];
+ }
+
+ // Compute density, u-wind, w-wind, potential temperature, and pressure
+ // (r,u,w,t,p respectively)
+ r = vals[ID_DENS] + hy_dens_int[k];
+ u = vals[ID_UMOM] / r;
+ w = vals[ID_WMOM] / r;
+ t = (vals[ID_RHOT] + hy_dens_theta_int[k]) / r;
+ p = C0 * pow((r * t), gamm) - hy_pressure_int[k];
+ // Enforce vertical boundary condition and exact mass conservation
+ if (k == 0 || k == nz) {
+ w = 0;
+ d3_vals[ID_DENS] = 0;
+ }
+
+ // Compute the flux vector with hyperviscosity
+ flux[ID_DENS * (nz + 1) * (nx + 1) + k * (nx + 1) + i] =
+ r * w - hv_coef * d3_vals[ID_DENS];
+ flux[ID_UMOM * (nz + 1) * (nx + 1) + k * (nx + 1) + i] =
+ r * w * u - hv_coef * d3_vals[ID_UMOM];
+ flux[ID_WMOM * (nz + 1) * (nx + 1) + k * (nx + 1) + i] =
+ r * w * w + p - hv_coef * d3_vals[ID_WMOM];
+ flux[ID_RHOT * (nz + 1) * (nx + 1) + k * (nx + 1) + i] =
+ r * w * t - hv_coef * d3_vals[ID_RHOT];
+ }
+ }
+
+ // Use the fluxes to compute tendencies for each cell
+#pragma acc parallel loop collapse(3) default(present) async
+ for (ll = 0; ll < NUM_VARS; ll++) {
+ for (k = 0; k < nz; k++) {
+ for (i = 0; i < nx; i++) {
+ indt = ll * nz * nx + k * nx + i;
+ indf1 = ll * (nz + 1) * (nx + 1) + (k) * (nx + 1) + i;
+ indf2 = ll * (nz + 1) * (nx + 1) + (k + 1) * (nx + 1) + i;
+ tend[indt] = -(flux[indf2] - flux[indf1]) / dz;
+ if (ll == ID_WMOM) {
+ inds = ID_DENS * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i + hs;
+ tend[indt] = tend[indt] - state[inds] * grav;
+ }
+ }
+ }
+ }
+}
+
+// Set this MPI task's halo values in the x-direction. This routine will require
+// MPI
+void set_halo_values_x(double *state) {
+ int k, ll, ind_r, ind_u, ind_t, i, s, ierr;
+ double z;
+
+ if (nranks == 1) {
+
+#pragma acc parallel loop collapse(2) default(present) async
+ for (ll = 0; ll < NUM_VARS; ll++) {
+ for (k = 0; k < nz; k++) {
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) +
+ 0] = state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + nx + hs - 2];
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) +
+ 1] = state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + nx + hs - 1];
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) +
+ nx + hs] = state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + hs];
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) +
+ nx + hs + 1] = state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + hs + 1];
+ }
+ }
+
+ } else {
+
+ MPI_Request req_r[2], req_s[2];
+
+ // Prepost receives
+ ierr = MPI_Irecv(recvbuf_l, hs * nz * NUM_VARS, MPI_DOUBLE, left_rank, 0,
+ MPI_COMM_WORLD, &req_r[0]);
+ ierr = MPI_Irecv(recvbuf_r, hs * nz * NUM_VARS, MPI_DOUBLE, right_rank, 1,
+ MPI_COMM_WORLD, &req_r[1]);
+
+ // Pack the send buffers
+#pragma acc parallel loop collapse(3) default(present) async
+ for (ll = 0; ll < NUM_VARS; ll++) {
+ for (k = 0; k < nz; k++) {
+ for (s = 0; s < hs; s++) {
+ sendbuf_l[ll * nz * hs + k * hs + s] =
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + hs + s];
+ sendbuf_r[ll * nz * hs + k * hs + s] =
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + nx + s];
+ }
+ }
+ }
+
+#pragma acc update host(sendbuf_l[0 : nz * hs * NUM_VARS], \
+ sendbuf_r[0 : nz * hs * NUM_VARS]) async
+#pragma acc wait
+
+ // Fire off the sends
+ ierr = MPI_Isend(sendbuf_l, hs * nz * NUM_VARS, MPI_DOUBLE, left_rank, 1,
+ MPI_COMM_WORLD, &req_s[0]);
+ ierr = MPI_Isend(sendbuf_r, hs * nz * NUM_VARS, MPI_DOUBLE, right_rank, 0,
+ MPI_COMM_WORLD, &req_s[1]);
+
+ // Wait for receives to finish
+ ierr = MPI_Waitall(2, req_r, MPI_STATUSES_IGNORE);
+
+#pragma acc update device(recvbuf_l[0 : nz * hs * NUM_VARS], \
+ recvbuf_r[0 : nz * hs * NUM_VARS]) async
+
+ // Unpack the receive buffers
+#pragma acc parallel loop collapse(3) default(present) async
+ for (ll = 0; ll < NUM_VARS; ll++) {
+ for (k = 0; k < nz; k++) {
+ for (s = 0; s < hs; s++) {
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) +
+ s] = recvbuf_l[ll * nz * hs + k * hs + s];
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) +
+ nx + hs + s] = recvbuf_r[ll * nz * hs + k * hs + s];
+ }
+ }
+ }
+
+ // Wait for sends to finish
+ ierr = MPI_Waitall(2, req_s, MPI_STATUSES_IGNORE);
+ }
+
+ if (data_spec_int == DATA_SPEC_INJECTION) {
+ if (myrank == 0) {
+#pragma acc parallel loop collapse(2) default(present) async
+ for (k = 0; k < nz; k++) {
+ for (i = 0; i < hs; i++) {
+ z = (k_beg + k + 0.5) * dz;
+ if (abs(z - 3 * zlen / 4) <= zlen / 16) {
+ ind_r = ID_DENS * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i;
+ ind_u = ID_UMOM * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i;
+ ind_t = ID_RHOT * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i;
+ state[ind_u] = (state[ind_r] + hy_dens_cell[k + hs]) * 50.;
+ state[ind_t] = (state[ind_r] + hy_dens_cell[k + hs]) * 298. -
+ hy_dens_theta_cell[k + hs];
+ }
+ }
+ }
+ }
+ }
+}
+
+// Set this MPI task's halo values in the z-direction. This does not require MPI
+// because there is no MPI decomposition in the vertical direction
+void set_halo_values_z(double *state) {
+ int i, ll;
+#pragma acc parallel loop collapse(2) default(present) async
+ for (ll = 0; ll < NUM_VARS; ll++) {
+ for (i = 0; i < nx + 2 * hs; i++) {
+ if (ll == ID_WMOM) {
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (0) * (nx + 2 * hs) + i] =
+ 0.;
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (1) * (nx + 2 * hs) + i] =
+ 0.;
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (nz + hs) * (nx + 2 * hs) +
+ i] = 0.;
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (nz + hs + 1) * (nx + 2 * hs) + i] = 0.;
+ } else if (ll == ID_UMOM) {
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (0) * (nx + 2 * hs) + i] =
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (hs) * (nx + 2 * hs) +
+ i] /
+ hy_dens_cell[hs] * hy_dens_cell[0];
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (1) * (nx + 2 * hs) + i] =
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (hs) * (nx + 2 * hs) +
+ i] /
+ hy_dens_cell[hs] * hy_dens_cell[1];
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (nz + hs) * (nx + 2 * hs) +
+ i] = state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (nz + hs - 1) * (nx + 2 * hs) + i] /
+ hy_dens_cell[nz + hs - 1] * hy_dens_cell[nz + hs];
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (nz + hs + 1) * (nx + 2 * hs) + i] =
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (nz + hs - 1) * (nx + 2 * hs) + i] /
+ hy_dens_cell[nz + hs - 1] * hy_dens_cell[nz + hs + 1];
+ } else {
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (0) * (nx + 2 * hs) + i] =
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (hs) * (nx + 2 * hs) +
+ i];
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (1) * (nx + 2 * hs) + i] =
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (hs) * (nx + 2 * hs) +
+ i];
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (nz + hs) * (nx + 2 * hs) +
+ i] = state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (nz + hs - 1) * (nx + 2 * hs) + i];
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (nz + hs + 1) * (nx + 2 * hs) + i] =
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (nz + hs - 1) * (nx + 2 * hs) + i];
+ }
+ }
+ }
+}
+
+void init(int *argc, char ***argv) {
+ int i, k, ii, kk, ll, ierr, inds, i_end;
+ double x, z, r, u, w, t, hr, ht, nper;
+
+ ierr = MPI_Init(argc, argv);
+
+ ierr = MPI_Comm_size(MPI_COMM_WORLD, &nranks);
+ ierr = MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
+ nper = ((double)nx_glob) / nranks;
+ i_beg = round(nper * (myrank));
+ i_end = round(nper * ((myrank) + 1)) - 1;
+ nx = i_end - i_beg + 1;
+ left_rank = myrank - 1;
+ if (left_rank == -1)
+ left_rank = nranks - 1;
+ right_rank = myrank + 1;
+ if (right_rank == nranks)
+ right_rank = 0;
+
+ ////////////////////////////////////////////////////////////////////////////////
+ ////////////////////////////////////////////////////////////////////////////////
+ // YOU DON'T NEED TO ALTER ANYTHING BELOW THIS POINT IN THE CODE
+ ////////////////////////////////////////////////////////////////////////////////
+ ////////////////////////////////////////////////////////////////////////////////
+
+ // Vertical direction isn't MPI-ized, so the rank's local values = the global
+ // values
+ k_beg = 0;
+ nz = nz_glob;
+ mainproc = (myrank == 0);
+
+ // Allocate the model data
+ state = (double *)malloc((nx + 2 * hs) * (nz + 2 * hs) * NUM_VARS *
+ sizeof(double));
+ state_tmp = (double *)malloc((nx + 2 * hs) * (nz + 2 * hs) * NUM_VARS *
+ sizeof(double));
+ flux = (double *)malloc((nx + 1) * (nz + 1) * NUM_VARS * sizeof(double));
+ tend = (double *)malloc(nx * nz * NUM_VARS * sizeof(double));
+ hy_dens_cell = (double *)malloc((nz + 2 * hs) * sizeof(double));
+ hy_dens_theta_cell = (double *)malloc((nz + 2 * hs) * sizeof(double));
+ hy_dens_int = (double *)malloc((nz + 1) * sizeof(double));
+ hy_dens_theta_int = (double *)malloc((nz + 1) * sizeof(double));
+ hy_pressure_int = (double *)malloc((nz + 1) * sizeof(double));
+ sendbuf_l = (double *)malloc(hs * nz * NUM_VARS * sizeof(double));
+ sendbuf_r = (double *)malloc(hs * nz * NUM_VARS * sizeof(double));
+ recvbuf_l = (double *)malloc(hs * nz * NUM_VARS * sizeof(double));
+ recvbuf_r = (double *)malloc(hs * nz * NUM_VARS * sizeof(double));
+
+ // Define the maximum stable time step based on an assumed maximum wind speed
+ dt = dmin(dx, dz) / max_speed * cfl;
+ // Set initial elapsed model time and output_counter to zero
+ etime = 0.;
+ output_counter = 0.;
+
+ // If I'm the main process in MPI, display some grid information
+ if (mainproc) {
+ printf("nx_glob, nz_glob: %d %d\n", nx_glob, nz_glob);
+ printf("dx,dz: %lf %lf\n", dx, dz);
+ printf("dt: %lf\n", dt);
+ }
+ // Want to make sure this info is displayed before further output
+ ierr = MPI_Barrier(MPI_COMM_WORLD);
+
+ //////////////////////////////////////////////////////////////////////////
+ // Initialize the cell-averaged fluid state via Gauss-Legendre quadrature
+ //////////////////////////////////////////////////////////////////////////
+ for (k = 0; k < nz + 2 * hs; k++) {
+ for (i = 0; i < nx + 2 * hs; i++) {
+ // Initialize the state to zero
+ for (ll = 0; ll < NUM_VARS; ll++) {
+ inds = ll * (nz + 2 * hs) * (nx + 2 * hs) + k * (nx + 2 * hs) + i;
+ state[inds] = 0.;
+ }
+ // Use Gauss-Legendre quadrature to initialize a hydrostatic balance +
+ // temperature perturbation
+ for (kk = 0; kk < nqpoints; kk++) {
+ for (ii = 0; ii < nqpoints; ii++) {
+ // Compute the x,z location within the global domain based on cell and
+ // quadrature index
+ x = (i_beg + i - hs + 0.5) * dx + (qpoints[ii] - 0.5) * dx;
+ z = (k_beg + k - hs + 0.5) * dz + (qpoints[kk] - 0.5) * dz;
+
+ // Set the fluid state based on the user's specification
+ if (data_spec_int == DATA_SPEC_COLLISION) {
+ collision(x, z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_THERMAL) {
+ thermal(x, z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_GRAVITY_WAVES) {
+ gravity_waves(x, z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_DENSITY_CURRENT) {
+ density_current(x, z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_INJECTION) {
+ injection(x, z, r, u, w, t, hr, ht);
+ }
+
+ // Store into the fluid state array
+ inds =
+ ID_DENS * (nz + 2 * hs) * (nx + 2 * hs) + k * (nx + 2 * hs) + i;
+ state[inds] = state[inds] + r * qweights[ii] * qweights[kk];
+ inds =
+ ID_UMOM * (nz + 2 * hs) * (nx + 2 * hs) + k * (nx + 2 * hs) + i;
+ state[inds] =
+ state[inds] + (r + hr) * u * qweights[ii] * qweights[kk];
+ inds =
+ ID_WMOM * (nz + 2 * hs) * (nx + 2 * hs) + k * (nx + 2 * hs) + i;
+ state[inds] =
+ state[inds] + (r + hr) * w * qweights[ii] * qweights[kk];
+ inds =
+ ID_RHOT * (nz + 2 * hs) * (nx + 2 * hs) + k * (nx + 2 * hs) + i;
+ state[inds] = state[inds] + ((r + hr) * (t + ht) - hr * ht) *
+ qweights[ii] * qweights[kk];
+ }
+ }
+ for (ll = 0; ll < NUM_VARS; ll++) {
+ inds = ll * (nz + 2 * hs) * (nx + 2 * hs) + k * (nx + 2 * hs) + i;
+ state_tmp[inds] = state[inds];
+ }
+ }
+ }
+ // Compute the hydrostatic background state over vertical cell averages
+ for (k = 0; k < nz + 2 * hs; k++) {
+ hy_dens_cell[k] = 0.;
+ hy_dens_theta_cell[k] = 0.;
+ for (kk = 0; kk < nqpoints; kk++) {
+ z = (k_beg + k - hs + 0.5) * dz;
+ // Set the fluid state based on the user's specification
+ if (data_spec_int == DATA_SPEC_COLLISION) {
+ collision(0., z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_THERMAL) {
+ thermal(0., z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_GRAVITY_WAVES) {
+ gravity_waves(0., z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_DENSITY_CURRENT) {
+ density_current(0., z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_INJECTION) {
+ injection(0., z, r, u, w, t, hr, ht);
+ }
+ hy_dens_cell[k] = hy_dens_cell[k] + hr * qweights[kk];
+ hy_dens_theta_cell[k] = hy_dens_theta_cell[k] + hr * ht * qweights[kk];
+ }
+ }
+ // Compute the hydrostatic background state at vertical cell interfaces
+ for (k = 0; k < nz + 1; k++) {
+ z = (k_beg + k) * dz;
+ if (data_spec_int == DATA_SPEC_COLLISION) {
+ collision(0., z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_THERMAL) {
+ thermal(0., z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_GRAVITY_WAVES) {
+ gravity_waves(0., z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_DENSITY_CURRENT) {
+ density_current(0., z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_INJECTION) {
+ injection(0., z, r, u, w, t, hr, ht);
+ }
+ hy_dens_int[k] = hr;
+ hy_dens_theta_int[k] = hr * ht;
+ hy_pressure_int[k] = C0 * pow((hr * ht), gamm);
+ }
+}
+
+// This test case is initially balanced but injects fast, cold air from the left
+// boundary near the model top x and z are input coordinates at which to sample
+// r,u,w,t are output density, u-wind, w-wind, and potential temperature at that
+// location hr and ht are output background hydrostatic density and potential
+// temperature at that location
+void injection(double x, double z, double &r, double &u, double &w, double &t,
+ double &hr, double &ht) {
+ hydro_const_theta(z, hr, ht);
+ r = 0.;
+ t = 0.;
+ u = 0.;
+ w = 0.;
+}
+
+// Initialize a density current (falling cold thermal that propagates along the
+// model bottom) x and z are input coordinates at which to sample r,u,w,t are
+// output density, u-wind, w-wind, and potential temperature at that location hr
+// and ht are output background hydrostatic density and potential temperature at
+// that location
+void density_current(double x, double z, double &r, double &u, double &w,
+ double &t, double &hr, double &ht) {
+ hydro_const_theta(z, hr, ht);
+ r = 0.;
+ t = 0.;
+ u = 0.;
+ w = 0.;
+ t = t + sample_ellipse_cosine(x, z, -20., xlen / 2, 5000., 4000., 2000.);
+}
+
+// x and z are input coordinates at which to sample
+// r,u,w,t are output density, u-wind, w-wind, and potential temperature at that
+// location hr and ht are output background hydrostatic density and potential
+// temperature at that location
+void gravity_waves(double x, double z, double &r, double &u, double &w,
+ double &t, double &hr, double &ht) {
+ hydro_const_bvfreq(z, 0.02, hr, ht);
+ r = 0.;
+ t = 0.;
+ u = 15.;
+ w = 0.;
+}
+
+// Rising thermal
+// x and z are input coordinates at which to sample
+// r,u,w,t are output density, u-wind, w-wind, and potential temperature at that
+// location hr and ht are output background hydrostatic density and potential
+// temperature at that location
+void thermal(double x, double z, double &r, double &u, double &w, double &t,
+ double &hr, double &ht) {
+ hydro_const_theta(z, hr, ht);
+ r = 0.;
+ t = 0.;
+ u = 0.;
+ w = 0.;
+ t = t + sample_ellipse_cosine(x, z, 3., xlen / 2, 2000., 2000., 2000.);
+}
+
+// Colliding thermals
+// x and z are input coordinates at which to sample
+// r,u,w,t are output density, u-wind, w-wind, and potential temperature at that
+// location hr and ht are output background hydrostatic density and potential
+// temperature at that location
+void collision(double x, double z, double &r, double &u, double &w, double &t,
+ double &hr, double &ht) {
+ hydro_const_theta(z, hr, ht);
+ r = 0.;
+ t = 0.;
+ u = 0.;
+ w = 0.;
+ t = t + sample_ellipse_cosine(x, z, 20., xlen / 2, 2000., 2000., 2000.);
+ t = t + sample_ellipse_cosine(x, z, -20., xlen / 2, 8000., 2000., 2000.);
+}
+
+// Establish hydrostatic balance using constant potential temperature (thermally
+// neutral atmosphere) z is the input coordinate r and t are the output
+// background hydrostatic density and potential temperature
+void hydro_const_theta(double z, double &r, double &t) {
+ const double theta0 = 300.; // Background potential temperature
+ const double exner0 = 1.; // Surface-level Exner pressure
+ double p, exner, rt;
+ // Establish hydrostatic balance first using Exner pressure
+ t = theta0; // Potential Temperature at z
+ exner = exner0 - grav * z / (cp * theta0); // Exner pressure at z
+ p = p0 * pow(exner, (cp / rd)); // Pressure at z
+ rt = pow((p / C0), (1. / gamm)); // rho*theta at z
+ r = rt / t; // Density at z
+}
+
+// Establish hydrostatic balance using constant Brunt-Vaisala frequency
+// z is the input coordinate
+// bv_freq0 is the constant Brunt-Vaisala frequency
+// r and t are the output background hydrostatic density and potential
+// temperature
+void hydro_const_bvfreq(double z, double bv_freq0, double &r, double &t) {
+ const double theta0 = 300.; // Background potential temperature
+ const double exner0 = 1.; // Surface-level Exner pressure
+ double p, exner, rt;
+ t = theta0 * exp(bv_freq0 * bv_freq0 / grav * z); // Pot temp at z
+ exner = exner0 - grav * grav / (cp * bv_freq0 * bv_freq0) * (t - theta0) /
+ (t * theta0); // Exner pressure at z
+ p = p0 * pow(exner, (cp / rd)); // Pressure at z
+ rt = pow((p / C0), (1. / gamm)); // rho*theta at z
+ r = rt / t; // Density at z
+}
+
+// Sample from an ellipse of a specified center, radius, and amplitude at a
+// specified location x and z are input coordinates amp,x0,z0,xrad,zrad are
+// input amplitude, center, and radius of the ellipse
+double sample_ellipse_cosine(double x, double z, double amp, double x0,
+ double z0, double xrad, double zrad) {
+ double dist;
+ // Compute distance from bubble center
+ dist = sqrt(((x - x0) / xrad) * ((x - x0) / xrad) +
+ ((z - z0) / zrad) * ((z - z0) / zrad)) *
+ pi / 2.;
+ // If the distance from bubble center is less than the radius, create a cos**2
+ // profile
+ if (dist <= pi / 2.) {
+ return amp * pow(cos(dist), 2.);
+ } else {
+ return 0.;
+ }
+}
+
+// Output the fluid state (state) to a NetCDF file at a given elapsed model time
+// (etime) The file I/O uses parallel-netcdf, the only external library required
+// for this mini-app. If it's too cumbersome, you can comment the I/O out, but
+// you'll miss out on some potentially cool graphics
+void output(double *state, double etime) {
+ int ncid, t_dimid, x_dimid, z_dimid, dens_varid, uwnd_varid, wwnd_varid,
+ theta_varid, t_varid, dimids[3];
+ int i, k, ind_r, ind_u, ind_w, ind_t;
+ MPI_Offset st1[1], ct1[1], st3[3], ct3[3];
+ // Temporary arrays to hold density, u-wind, w-wind, and potential temperature
+ // (theta)
+ double *dens, *uwnd, *wwnd, *theta;
+ double *etimearr;
+#pragma acc update host(state[0 : (nz + 2 * hs) * (nx + 2 * hs) * NUM_VARS]) \
+ async
+#pragma acc wait
+ // Inform the user
+ if (mainproc) {
+ printf("*** OUTPUT ***\n");
+ }
+ // Allocate some (big) temp arrays
+ dens = (double *)malloc(nx * nz * sizeof(double));
+ uwnd = (double *)malloc(nx * nz * sizeof(double));
+ wwnd = (double *)malloc(nx * nz * sizeof(double));
+ theta = (double *)malloc(nx * nz * sizeof(double));
+ etimearr = (double *)malloc(1 * sizeof(double));
+
+ // If the elapsed time is zero, create the file. Otherwise, open the file
+ if (etime == 0) {
+ // Create the file
+ ncwrap(ncmpi_create(MPI_COMM_WORLD, "output.nc", NC_CLOBBER, MPI_INFO_NULL,
+ &ncid),
+ __LINE__);
+ // Create the dimensions
+ ncwrap(ncmpi_def_dim(ncid, "t", (MPI_Offset)NC_UNLIMITED, &t_dimid),
+ __LINE__);
+ ncwrap(ncmpi_def_dim(ncid, "x", (MPI_Offset)nx_glob, &x_dimid), __LINE__);
+ ncwrap(ncmpi_def_dim(ncid, "z", (MPI_Offset)nz_glob, &z_dimid), __LINE__);
+ // Create the variables
+ dimids[0] = t_dimid;
+ ncwrap(ncmpi_def_var(ncid, "t", NC_DOUBLE, 1, dimids, &t_varid), __LINE__);
+ dimids[0] = t_dimid;
+ dimids[1] = z_dimid;
+ dimids[2] = x_dimid;
+ ncwrap(ncmpi_def_var(ncid, "dens", NC_DOUBLE, 3, dimids, &dens_varid),
+ __LINE__);
+ ncwrap(ncmpi_def_var(ncid, "uwnd", NC_DOUBLE, 3, dimids, &uwnd_varid),
+ __LINE__);
+ ncwrap(ncmpi_def_var(ncid, "wwnd", NC_DOUBLE, 3, dimids, &wwnd_varid),
+ __LINE__);
+ ncwrap(ncmpi_def_var(ncid, "theta", NC_DOUBLE, 3, dimids, &theta_varid),
+ __LINE__);
+ // End "define" mode
+ ncwrap(ncmpi_enddef(ncid), __LINE__);
+ } else {
+ // Open the file
+ ncwrap(
+ ncmpi_open(MPI_COMM_WORLD, "output.nc", NC_WRITE, MPI_INFO_NULL, &ncid),
+ __LINE__);
+ // Get the variable IDs
+ ncwrap(ncmpi_inq_varid(ncid, "dens", &dens_varid), __LINE__);
+ ncwrap(ncmpi_inq_varid(ncid, "uwnd", &uwnd_varid), __LINE__);
+ ncwrap(ncmpi_inq_varid(ncid, "wwnd", &wwnd_varid), __LINE__);
+ ncwrap(ncmpi_inq_varid(ncid, "theta", &theta_varid), __LINE__);
+ ncwrap(ncmpi_inq_varid(ncid, "t", &t_varid), __LINE__);
+ }
+
+ // Store perturbed values in the temp arrays for output
+ for (k = 0; k < nz; k++) {
+ for (i = 0; i < nx; i++) {
+ ind_r = ID_DENS * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i + hs;
+ ind_u = ID_UMOM * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i + hs;
+ ind_w = ID_WMOM * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i + hs;
+ ind_t = ID_RHOT * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i + hs;
+ dens[k * nx + i] = state[ind_r];
+ uwnd[k * nx + i] = state[ind_u] / (hy_dens_cell[k + hs] + state[ind_r]);
+ wwnd[k * nx + i] = state[ind_w] / (hy_dens_cell[k + hs] + state[ind_r]);
+ theta[k * nx + i] = (state[ind_t] + hy_dens_theta_cell[k + hs]) /
+ (hy_dens_cell[k + hs] + state[ind_r]) -
+ hy_dens_theta_cell[k + hs] / hy_dens_cell[k + hs];
+ }
+ }
+
+ // Write the grid data to file with all the processes writing collectively
+ st3[0] = num_out;
+ st3[1] = k_beg;
+ st3[2] = i_beg;
+ ct3[0] = 1;
+ ct3[1] = nz;
+ ct3[2] = nx;
+ ncwrap(ncmpi_put_vara_double_all(ncid, dens_varid, st3, ct3, dens), __LINE__);
+ ncwrap(ncmpi_put_vara_double_all(ncid, uwnd_varid, st3, ct3, uwnd), __LINE__);
+ ncwrap(ncmpi_put_vara_double_all(ncid, wwnd_varid, st3, ct3, wwnd), __LINE__);
+ ncwrap(ncmpi_put_vara_double_all(ncid, theta_varid, st3, ct3, theta),
+ __LINE__);
+
+ // Only the main process needs to write the elapsed time
+ // Begin "independent" write mode
+ ncwrap(ncmpi_begin_indep_data(ncid), __LINE__);
+ // write elapsed time to file
+ if (mainproc) {
+ st1[0] = num_out;
+ ct1[0] = 1;
+ etimearr[0] = etime;
+ ncwrap(ncmpi_put_vara_double(ncid, t_varid, st1, ct1, etimearr), __LINE__);
+ }
+ // End "independent" write mode
+ ncwrap(ncmpi_end_indep_data(ncid), __LINE__);
+
+ // Close the file
+ ncwrap(ncmpi_close(ncid), __LINE__);
+
+ // Increment the number of outputs
+ num_out = num_out + 1;
+
+ // Deallocate the temp arrays
+ free(dens);
+ free(uwnd);
+ free(wwnd);
+ free(theta);
+ free(etimearr);
+}
+
+// Error reporting routine for the PNetCDF I/O
+void ncwrap(int ierr, int line) {
+ if (ierr != NC_NOERR) {
+ printf("NetCDF Error at line: %d\n", line);
+ printf("%s\n", ncmpi_strerror(ierr));
+ exit(-1);
+ }
+}
+
+void finalize() {
+ int ierr;
+ free(state);
+ free(state_tmp);
+ free(flux);
+ free(tend);
+ free(hy_dens_cell);
+ free(hy_dens_theta_cell);
+ free(hy_dens_int);
+ free(hy_dens_theta_int);
+ free(hy_pressure_int);
+ free(sendbuf_l);
+ free(sendbuf_r);
+ free(recvbuf_l);
+ free(recvbuf_r);
+ ierr = MPI_Finalize();
+}
+
+// Compute reduced quantities for error checking without resorting to the
+// "ncdiff" tool
+void reductions(double &mass, double &te) {
+ double mass_loc = 0;
+ double te_loc = 0;
+#pragma acc parallel loop collapse(2) \
+ reduction(+ : mass_loc, te_loc) default(present)
+ for (int k = 0; k < nz; k++) {
+ for (int i = 0; i < nx; i++) {
+ int ind_r = ID_DENS * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i + hs;
+ int ind_u = ID_UMOM * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i + hs;
+ int ind_w = ID_WMOM * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i + hs;
+ int ind_t = ID_RHOT * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i + hs;
+ double r = state[ind_r] + hy_dens_cell[hs + k]; // Density
+ double u = state[ind_u] / r; // U-wind
+ double w = state[ind_w] / r; // W-wind
+ double th = (state[ind_t] + hy_dens_theta_cell[hs + k]) /
+ r; // Potential Temperature (theta)
+ double p = C0 * pow(r * th, gamm); // Pressure
+ double t = th / pow(p0 / p, rd / cp); // Temperature
+ double ke = r * (u * u + w * w); // Kinetic Energy
+ double ie = r * cv * t; // Internal Energy
+ mass_loc += r * dx * dz; // Accumulate domain mass
+ te_loc += (ke + ie) * dx * dz; // Accumulate domain total energy
+ }
+ }
+ double glob[2], loc[2];
+ loc[0] = mass_loc;
+ loc[1] = te_loc;
+ int ierr = MPI_Allreduce(loc, glob, 2, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+ mass = glob[0];
+ te = glob[1];
+}
diff --git a/cpp/miniWeather_mpi_openmp.cpp b/cpp/miniWeather_mpi_openmp.cpp
new file mode 100644
index 00000000..2e8ff059
--- /dev/null
+++ b/cpp/miniWeather_mpi_openmp.cpp
@@ -0,0 +1,1122 @@
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// miniWeather
+// Author: Matt Norman , Oak Ridge National Laboratory
+// This code simulates dry, stratified, compressible, non-hydrostatic fluid
+// flows For documentation, please see the attached documentation in the
+// "documentation" folder
+//
+//////////////////////////////////////////////////////////////////////////////////////////
+
+#include "pnetcdf.h"
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+constexpr double pi = 3.14159265358979323846264338327; // Pi
+constexpr double grav = 9.8; // Gravitational acceleration (m / s^2)
+constexpr double cp = 1004.; // Specific heat of dry air at constant pressure
+constexpr double cv = 717.; // Specific heat of dry air at constant volume
+constexpr double rd =
+ 287.; // Dry air constant for equation of state (P=rho*rd*T)
+constexpr double p0 = 1.e5; // Standard pressure at the surface in Pascals
+constexpr double C0 =
+ 27.5629410929725921310572974482; // Constant to translate potential
+ // temperature into pressure
+ // (P=C0*(rho*theta)**gamma)
+constexpr double gamm =
+ 1.40027894002789400278940027894; // gamma=cp/Rd , have to call this gamm
+ // because "gamma" is taken (I hate C so
+ // much)
+
+// Define domain and stability-related constants
+constexpr double xlen = 2.e4; // Length of the domain in the x-direction
+ // (meters)
+constexpr double zlen = 1.e4; // Length of the domain in the z-direction
+ // (meters)
+constexpr double hv_beta =
+ 0.05; // How strong to diffuse the solution: hv_beta \in [0:1]
+constexpr double cfl =
+ 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability)
+constexpr double max_speed =
+ 450; // Assumed maximum wave speed during the simulation (speed of sound +
+ // speed of wind) (meter / sec)
+constexpr int hs =
+ 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a
+ // full "stencil" of information for reconstruction
+constexpr int sten_size = 4; // Size of the stencil used for interpolation
+
+// Parameters for indexing and flags
+constexpr int NUM_VARS = 4; // Number of fluid state variables
+constexpr int ID_DENS = 0; // index for density ("rho")
+constexpr int ID_UMOM = 1; // index for momentum in the x-direction ("rho * u")
+constexpr int ID_WMOM = 2; // index for momentum in the z-direction ("rho * w")
+constexpr int ID_RHOT =
+ 3; // index for density * potential temperature ("rho * theta")
+constexpr int DIR_X =
+ 1; // Integer constant to express that this operation is in the x-direction
+constexpr int DIR_Z =
+ 2; // Integer constant to express that this operation is in the z-direction
+constexpr int DATA_SPEC_COLLISION = 1;
+constexpr int DATA_SPEC_THERMAL = 2;
+constexpr int DATA_SPEC_GRAVITY_WAVES = 3;
+constexpr int DATA_SPEC_DENSITY_CURRENT = 5;
+constexpr int DATA_SPEC_INJECTION = 6;
+
+constexpr int nqpoints = 3;
+constexpr double qpoints[] = {0.112701665379258311482073460022E0,
+ 0.500000000000000000000000000000E0,
+ 0.887298334620741688517926539980E0};
+constexpr double qweights[] = {0.277777777777777777777777777779E0,
+ 0.444444444444444444444444444444E0,
+ 0.277777777777777777777777777779E0};
+
+///////////////////////////////////////////////////////////////////////////////////////
+// BEGIN USER-CONFIGURABLE PARAMETERS
+///////////////////////////////////////////////////////////////////////////////////////
+// The x-direction length is twice as long as the z-direction length
+// So, you'll want to have nx_glob be twice as large as nz_glob
+int constexpr nx_glob = _NX; // Number of total cells in the x-direction
+int constexpr nz_glob = _NZ; // Number of total cells in the z-direction
+double constexpr sim_time = _SIM_TIME; // How many seconds to run the simulation
+double constexpr output_freq =
+ _OUT_FREQ; // How frequently to output data to file (in seconds)
+int constexpr data_spec_int = _DATA_SPEC; // How to initialize the data
+double constexpr dx = xlen / nx_glob; // grid spacing in the x-direction
+double constexpr dz = zlen / nz_glob; // grid spacing in the x-direction
+///////////////////////////////////////////////////////////////////////////////////////
+// END USER-CONFIGURABLE PARAMETERS
+///////////////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////////////
+// Variables that are initialized but remain static over the course of the
+// simulation
+///////////////////////////////////////////////////////////////////////////////////////
+double dt; // Model time step (seconds)
+int nx, nz; // Number of local grid cells in the x- and z- dimensions for this
+ // MPI task
+int i_beg, k_beg; // beginning index in the x- and z-directions for this MPI
+ // task
+int nranks, myrank; // Number of MPI ranks and my rank id
+int left_rank, right_rank; // MPI Rank IDs that exist to my left and right in
+ // the global domain
+int mainproc; // Am I the main process (rank == 0)?
+double *hy_dens_cell; // hydrostatic density (vert cell avgs). Dimensions:
+ // (1-hs:nz+hs)
+double *hy_dens_theta_cell; // hydrostatic rho*t (vert cell avgs). Dimensions:
+ // (1-hs:nz+hs)
+double *
+ hy_dens_int; // hydrostatic density (vert cell interf). Dimensions: (1:nz+1)
+double *hy_dens_theta_int; // hydrostatic rho*t (vert cell interf). Dimensions:
+ // (1:nz+1)
+double *hy_pressure_int; // hydrostatic press (vert cell interf). Dimensions:
+ // (1:nz+1)
+
+///////////////////////////////////////////////////////////////////////////////////////
+// Variables that are dynamics over the course of the simulation
+///////////////////////////////////////////////////////////////////////////////////////
+double etime; // Elapsed model time
+double output_counter; // Helps determine when it's time to do output
+// Runtime variable arrays
+double *state; // Fluid state. Dimensions:
+ // (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS)
+double *state_tmp; // Fluid state. Dimensions:
+ // (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS)
+double *flux; // Cell interface fluxes. Dimensions: (nx+1,nz+1,NUM_VARS)
+double *tend; // Fluid state tendencies. Dimensions: (nx,nz,NUM_VARS)
+double *sendbuf_l; // Buffer to send data to the left MPI rank
+double *sendbuf_r; // Buffer to send data to the right MPI rank
+double *recvbuf_l; // Buffer to receive data from the left MPI rank
+double *recvbuf_r; // Buffer to receive data from the right MPI rank
+int num_out = 0; // The number of outputs performed so far
+int direction_switch = 1;
+double mass0, te0; // Initial domain totals for mass and total energy
+double mass, te; // Domain totals for mass and total energy
+
+// How is this not in the standard?!
+double dmin(double a, double b) {
+ if (a < b) {
+ return a;
+ } else {
+ return b;
+ }
+};
+
+// Declaring the functions defined after "main"
+void init(int *argc, char ***argv);
+void finalize();
+void injection(double x, double z, double &r, double &u, double &w, double &t,
+ double &hr, double &ht);
+void density_current(double x, double z, double &r, double &u, double &w,
+ double &t, double &hr, double &ht);
+void gravity_waves(double x, double z, double &r, double &u, double &w,
+ double &t, double &hr, double &ht);
+void thermal(double x, double z, double &r, double &u, double &w, double &t,
+ double &hr, double &ht);
+void collision(double x, double z, double &r, double &u, double &w, double &t,
+ double &hr, double &ht);
+void hydro_const_theta(double z, double &r, double &t);
+void hydro_const_bvfreq(double z, double bv_freq0, double &r, double &t);
+double sample_ellipse_cosine(double x, double z, double amp, double x0,
+ double z0, double xrad, double zrad);
+void output(double *state, double etime);
+void ncwrap(int ierr, int line);
+void perform_timestep(double *state, double *state_tmp, double *flux,
+ double *tend, double dt);
+void semi_discrete_step(double *state_init, double *state_forcing,
+ double *state_out, double dt, int dir, double *flux,
+ double *tend);
+void compute_tendencies_x(double *state, double *flux, double *tend, double dt);
+void compute_tendencies_z(double *state, double *flux, double *tend, double dt);
+void set_halo_values_x(double *state);
+void set_halo_values_z(double *state);
+void reductions(double &mass, double &te);
+
+///////////////////////////////////////////////////////////////////////////////////////
+// THE MAIN PROGRAM STARTS HERE
+///////////////////////////////////////////////////////////////////////////////////////
+int main(int argc, char **argv) {
+
+ init(&argc, &argv);
+
+ // Initial reductions for mass, kinetic energy, and total energy
+ reductions(mass0, te0);
+
+ // Output the initial state
+ if (output_freq >= 0)
+ output(state, etime);
+
+ ////////////////////////////////////////////////////
+ // MAIN TIME STEP LOOP
+ ////////////////////////////////////////////////////
+ auto t1 = std::chrono::steady_clock::now();
+ while (etime < sim_time) {
+ // If the time step leads to exceeding the simulation time, shorten it for
+ // the last step
+ if (etime + dt > sim_time) {
+ dt = sim_time - etime;
+ }
+ // Perform a single time step
+ perform_timestep(state, state_tmp, flux, tend, dt);
+ // Inform the user
+#ifndef NO_INFORM
+ if (mainproc) {
+ printf("Elapsed Time: %lf / %lf\n", etime, sim_time);
+ }
+#endif
+ // Update the elapsed time and output counter
+ etime = etime + dt;
+ output_counter = output_counter + dt;
+ // If it's time for output, reset the counter, and do output
+ if (output_freq >= 0 && output_counter >= output_freq) {
+ output_counter = output_counter - output_freq;
+ output(state, etime);
+ }
+ }
+ auto t2 = std::chrono::steady_clock::now();
+ if (mainproc) {
+ std::cout << "CPU Time: " << std::chrono::duration(t2 - t1).count()
+ << " sec\n";
+ }
+
+ // Final reductions for mass, kinetic energy, and total energy
+ reductions(mass, te);
+
+ if (mainproc) {
+ printf("d_mass: %le\n", (mass - mass0) / mass0);
+ printf("d_te: %le\n", (te - te0) / te0);
+ }
+
+ finalize();
+}
+
+// Performs a single dimensionally split time step using a simple low-storage
+// three-stage Runge-Kutta time integrator The dimensional splitting is a
+// second-order-accurate alternating Strang splitting in which the order of
+// directions is alternated each time step. The Runge-Kutta method used here is
+// defined as follows:
+// q* = q[n] + dt/3 * rhs(q[n])
+// q** = q[n] + dt/2 * rhs(q* )
+// q[n+1] = q[n] + dt/1 * rhs(q** )
+void perform_timestep(double *state, double *state_tmp, double *flux,
+ double *tend, double dt) {
+ if (direction_switch) {
+ // x-direction first
+ semi_discrete_step(state, state, state_tmp, dt / 3, DIR_X, flux, tend);
+ semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_X, flux, tend);
+ semi_discrete_step(state, state_tmp, state, dt / 1, DIR_X, flux, tend);
+ // z-direction second
+ semi_discrete_step(state, state, state_tmp, dt / 3, DIR_Z, flux, tend);
+ semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_Z, flux, tend);
+ semi_discrete_step(state, state_tmp, state, dt / 1, DIR_Z, flux, tend);
+ } else {
+ // z-direction second
+ semi_discrete_step(state, state, state_tmp, dt / 3, DIR_Z, flux, tend);
+ semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_Z, flux, tend);
+ semi_discrete_step(state, state_tmp, state, dt / 1, DIR_Z, flux, tend);
+ // x-direction first
+ semi_discrete_step(state, state, state_tmp, dt / 3, DIR_X, flux, tend);
+ semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_X, flux, tend);
+ semi_discrete_step(state, state_tmp, state, dt / 1, DIR_X, flux, tend);
+ }
+ if (direction_switch) {
+ direction_switch = 0;
+ } else {
+ direction_switch = 1;
+ }
+}
+
+// Perform a single semi-discretized step in time with the form:
+// state_out = state_init + dt * rhs(state_forcing)
+// Meaning the step starts from state_init, computes the rhs using
+// state_forcing, and stores the result in state_out
+void semi_discrete_step(double *state_init, double *state_forcing,
+ double *state_out, double dt, int dir, double *flux,
+ double *tend) {
+ int i, k, ll, inds, indt, indw;
+ double x, z, wpert, dist, x0, z0, xrad, zrad, amp;
+ if (dir == DIR_X) {
+ // Set the halo values for this MPI task's fluid state in the x-direction
+ set_halo_values_x(state_forcing);
+ // Compute the time tendencies for the fluid state in the x-direction
+ compute_tendencies_x(state_forcing, flux, tend, dt);
+ } else if (dir == DIR_Z) {
+ // Set the halo values for this MPI task's fluid state in the z-direction
+ set_halo_values_z(state_forcing);
+ // Compute the time tendencies for the fluid state in the z-direction
+ compute_tendencies_z(state_forcing, flux, tend, dt);
+ }
+
+ // Apply the tendencies to the fluid state
+#pragma omp parallel for private(inds, indt, x, z, x0, z0, xrad, zrad, amp, \
+ dist, wpert, indw) collapse(3)
+ for (ll = 0; ll < NUM_VARS; ll++) {
+ for (k = 0; k < nz; k++) {
+ for (i = 0; i < nx; i++) {
+ if (data_spec_int == DATA_SPEC_GRAVITY_WAVES) {
+ x = (i_beg + i + 0.5) * dx;
+ z = (k_beg + k + 0.5) * dz;
+ // Using sample_ellipse_cosine requires "acc routine" in OpenACC and
+ // "declare target" in OpenMP offload Neither of these are
+ // particularly well supported. So I'm manually inlining here wpert =
+ // sample_ellipse_cosine( x,z , 0.01 , xlen/8,1000., 500.,500. );
+ {
+ x0 = xlen / 8;
+ z0 = 1000;
+ xrad = 500;
+ zrad = 500;
+ amp = 0.01;
+ // Compute distance from bubble center
+ dist = sqrt(((x - x0) / xrad) * ((x - x0) / xrad) +
+ ((z - z0) / zrad) * ((z - z0) / zrad)) *
+ pi / 2.;
+ // If the distance from bubble center is less than the radius,
+ // create a cos**2 profile
+ if (dist <= pi / 2.) {
+ wpert = amp * pow(cos(dist), 2.);
+ } else {
+ wpert = 0.;
+ }
+ }
+ indw = ID_WMOM * nz * nx + k * nx + i;
+ tend[indw] += wpert * hy_dens_cell[hs + k];
+ }
+ inds = ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) +
+ i + hs;
+ indt = ll * nz * nx + k * nx + i;
+ state_out[inds] = state_init[inds] + dt * tend[indt];
+ }
+ }
+ }
+}
+
+// Compute the time tendencies of the fluid state using forcing in the
+// x-direction Since the halos are set in a separate routine, this will not
+// require MPI First, compute the flux vector at each cell interface in the
+// x-direction (including hyperviscosity) Then, compute the tendencies using
+// those fluxes
+void compute_tendencies_x(double *state, double *flux, double *tend,
+ double dt) {
+ int i, k, ll, s, inds, indf1, indf2, indt;
+ double r, u, w, t, p, stencil[4], d3_vals[NUM_VARS], vals[NUM_VARS], hv_coef;
+ // Compute the hyperviscosity coefficient
+ hv_coef = -hv_beta * dx / (16 * dt);
+ // Compute fluxes in the x-direction for each cell
+#pragma omp parallel for private(inds, stencil, vals, d3_vals, r, u, w, t, p, \
+ ll, s) collapse(2)
+ for (k = 0; k < nz; k++) {
+ for (i = 0; i < nx + 1; i++) {
+ // Use fourth-order interpolation from four cell averages to compute the
+ // value at the interface in question
+ for (ll = 0; ll < NUM_VARS; ll++) {
+ for (s = 0; s < sten_size; s++) {
+ inds = ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) +
+ i + s;
+ stencil[s] = state[inds];
+ }
+ // Fourth-order-accurate interpolation of the state
+ vals[ll] = -stencil[0] / 12 + 7 * stencil[1] / 12 +
+ 7 * stencil[2] / 12 - stencil[3] / 12;
+ // First-order-accurate interpolation of the third spatial derivative of
+ // the state (for artificial viscosity)
+ d3_vals[ll] =
+ -stencil[0] + 3 * stencil[1] - 3 * stencil[2] + stencil[3];
+ }
+
+ // Compute density, u-wind, w-wind, potential temperature, and pressure
+ // (r,u,w,t,p respectively)
+ r = vals[ID_DENS] + hy_dens_cell[k + hs];
+ u = vals[ID_UMOM] / r;
+ w = vals[ID_WMOM] / r;
+ t = (vals[ID_RHOT] + hy_dens_theta_cell[k + hs]) / r;
+ p = C0 * pow((r * t), gamm);
+
+ // Compute the flux vector
+ flux[ID_DENS * (nz + 1) * (nx + 1) + k * (nx + 1) + i] =
+ r * u - hv_coef * d3_vals[ID_DENS];
+ flux[ID_UMOM * (nz + 1) * (nx + 1) + k * (nx + 1) + i] =
+ r * u * u + p - hv_coef * d3_vals[ID_UMOM];
+ flux[ID_WMOM * (nz + 1) * (nx + 1) + k * (nx + 1) + i] =
+ r * u * w - hv_coef * d3_vals[ID_WMOM];
+ flux[ID_RHOT * (nz + 1) * (nx + 1) + k * (nx + 1) + i] =
+ r * u * t - hv_coef * d3_vals[ID_RHOT];
+ }
+ }
+
+ // Use the fluxes to compute tendencies for each cell
+#pragma omp parallel for private(indt, indf1, indf2) collapse(3)
+ for (ll = 0; ll < NUM_VARS; ll++) {
+ for (k = 0; k < nz; k++) {
+ for (i = 0; i < nx; i++) {
+ indt = ll * nz * nx + k * nx + i;
+ indf1 = ll * (nz + 1) * (nx + 1) + k * (nx + 1) + i;
+ indf2 = ll * (nz + 1) * (nx + 1) + k * (nx + 1) + i + 1;
+ tend[indt] = -(flux[indf2] - flux[indf1]) / dx;
+ }
+ }
+ }
+}
+
+// Compute the time tendencies of the fluid state using forcing in the
+// z-direction Since the halos are set in a separate routine, this will not
+// require MPI First, compute the flux vector at each cell interface in the
+// z-direction (including hyperviscosity) Then, compute the tendencies using
+// those fluxes
+void compute_tendencies_z(double *state, double *flux, double *tend,
+ double dt) {
+ int i, k, ll, s, inds, indf1, indf2, indt;
+ double r, u, w, t, p, stencil[4], d3_vals[NUM_VARS], vals[NUM_VARS], hv_coef;
+ // Compute the hyperviscosity coefficient
+ hv_coef = -hv_beta * dz / (16 * dt);
+ // Compute fluxes in the x-direction for each cell
+#pragma omp parallel for private(inds, stencil, vals, d3_vals, r, u, w, t, p, \
+ ll, s) collapse(2)
+ for (k = 0; k < nz + 1; k++) {
+ for (i = 0; i < nx; i++) {
+ // Use fourth-order interpolation from four cell averages to compute the
+ // value at the interface in question
+ for (ll = 0; ll < NUM_VARS; ll++) {
+ for (s = 0; s < sten_size; s++) {
+ inds = ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + s) * (nx + 2 * hs) +
+ i + hs;
+ stencil[s] = state[inds];
+ }
+ // Fourth-order-accurate interpolation of the state
+ vals[ll] = -stencil[0] / 12 + 7 * stencil[1] / 12 +
+ 7 * stencil[2] / 12 - stencil[3] / 12;
+ // First-order-accurate interpolation of the third spatial derivative of
+ // the state
+ d3_vals[ll] =
+ -stencil[0] + 3 * stencil[1] - 3 * stencil[2] + stencil[3];
+ }
+
+ // Compute density, u-wind, w-wind, potential temperature, and pressure
+ // (r,u,w,t,p respectively)
+ r = vals[ID_DENS] + hy_dens_int[k];
+ u = vals[ID_UMOM] / r;
+ w = vals[ID_WMOM] / r;
+ t = (vals[ID_RHOT] + hy_dens_theta_int[k]) / r;
+ p = C0 * pow((r * t), gamm) - hy_pressure_int[k];
+ // Enforce vertical boundary condition and exact mass conservation
+ if (k == 0 || k == nz) {
+ w = 0;
+ d3_vals[ID_DENS] = 0;
+ }
+
+ // Compute the flux vector with hyperviscosity
+ flux[ID_DENS * (nz + 1) * (nx + 1) + k * (nx + 1) + i] =
+ r * w - hv_coef * d3_vals[ID_DENS];
+ flux[ID_UMOM * (nz + 1) * (nx + 1) + k * (nx + 1) + i] =
+ r * w * u - hv_coef * d3_vals[ID_UMOM];
+ flux[ID_WMOM * (nz + 1) * (nx + 1) + k * (nx + 1) + i] =
+ r * w * w + p - hv_coef * d3_vals[ID_WMOM];
+ flux[ID_RHOT * (nz + 1) * (nx + 1) + k * (nx + 1) + i] =
+ r * w * t - hv_coef * d3_vals[ID_RHOT];
+ }
+ }
+
+ // Use the fluxes to compute tendencies for each cell
+#pragma omp parallel for private(indt, indf1, indf2, inds) collapse(3)
+ for (ll = 0; ll < NUM_VARS; ll++) {
+ for (k = 0; k < nz; k++) {
+ for (i = 0; i < nx; i++) {
+ indt = ll * nz * nx + k * nx + i;
+ indf1 = ll * (nz + 1) * (nx + 1) + (k) * (nx + 1) + i;
+ indf2 = ll * (nz + 1) * (nx + 1) + (k + 1) * (nx + 1) + i;
+ tend[indt] = -(flux[indf2] - flux[indf1]) / dz;
+ if (ll == ID_WMOM) {
+ inds = ID_DENS * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i + hs;
+ tend[indt] = tend[indt] - state[inds] * grav;
+ }
+ }
+ }
+ }
+}
+
+// Set this MPI task's halo values in the x-direction. This routine will require
+// MPI
+void set_halo_values_x(double *state) {
+ int k, ll, ind_r, ind_u, ind_t, i, s, ierr;
+ double z;
+
+ if (nranks == 1) {
+
+#pragma omp parallel for collapse(2)
+ for (ll = 0; ll < NUM_VARS; ll++) {
+ for (k = 0; k < nz; k++) {
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) +
+ 0] = state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + nx + hs - 2];
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) +
+ 1] = state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + nx + hs - 1];
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) +
+ nx + hs] = state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + hs];
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) +
+ nx + hs + 1] = state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + hs + 1];
+ }
+ }
+
+ } else {
+
+ MPI_Request req_r[2], req_s[2];
+
+ // Prepost receives
+ ierr = MPI_Irecv(recvbuf_l, hs * nz * NUM_VARS, MPI_DOUBLE, left_rank, 0,
+ MPI_COMM_WORLD, &req_r[0]);
+ ierr = MPI_Irecv(recvbuf_r, hs * nz * NUM_VARS, MPI_DOUBLE, right_rank, 1,
+ MPI_COMM_WORLD, &req_r[1]);
+
+ // Pack the send buffers
+#pragma omp parallel for collapse(3)
+ for (ll = 0; ll < NUM_VARS; ll++) {
+ for (k = 0; k < nz; k++) {
+ for (s = 0; s < hs; s++) {
+ sendbuf_l[ll * nz * hs + k * hs + s] =
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + hs + s];
+ sendbuf_r[ll * nz * hs + k * hs + s] =
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + nx + s];
+ }
+ }
+ }
+
+ // Fire off the sends
+ ierr = MPI_Isend(sendbuf_l, hs * nz * NUM_VARS, MPI_DOUBLE, left_rank, 1,
+ MPI_COMM_WORLD, &req_s[0]);
+ ierr = MPI_Isend(sendbuf_r, hs * nz * NUM_VARS, MPI_DOUBLE, right_rank, 0,
+ MPI_COMM_WORLD, &req_s[1]);
+
+ // Wait for receives to finish
+ ierr = MPI_Waitall(2, req_r, MPI_STATUSES_IGNORE);
+
+ // Unpack the receive buffers
+#pragma omp parallel for collapse(3)
+ for (ll = 0; ll < NUM_VARS; ll++) {
+ for (k = 0; k < nz; k++) {
+ for (s = 0; s < hs; s++) {
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) +
+ s] = recvbuf_l[ll * nz * hs + k * hs + s];
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) +
+ nx + hs + s] = recvbuf_r[ll * nz * hs + k * hs + s];
+ }
+ }
+ }
+
+ // Wait for sends to finish
+ ierr = MPI_Waitall(2, req_s, MPI_STATUSES_IGNORE);
+ }
+
+ if (data_spec_int == DATA_SPEC_INJECTION) {
+ if (myrank == 0) {
+#pragma omp parallel for private(z, ind_r, ind_u, ind_t) collapse(2)
+ for (k = 0; k < nz; k++) {
+ for (i = 0; i < hs; i++) {
+ z = (k_beg + k + 0.5) * dz;
+ if (fabs(z - 3 * zlen / 4) <= zlen / 16) {
+ ind_r = ID_DENS * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i;
+ ind_u = ID_UMOM * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i;
+ ind_t = ID_RHOT * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i;
+ state[ind_u] = (state[ind_r] + hy_dens_cell[k + hs]) * 50.;
+ state[ind_t] = (state[ind_r] + hy_dens_cell[k + hs]) * 298. -
+ hy_dens_theta_cell[k + hs];
+ }
+ }
+ }
+ }
+ }
+}
+
+// Set this MPI task's halo values in the z-direction. This does not require MPI
+// because there is no MPI decomposition in the vertical direction
+void set_halo_values_z(double *state) {
+ int i, ll;
+#pragma omp parallel for collapse(2)
+ for (ll = 0; ll < NUM_VARS; ll++) {
+ for (i = 0; i < nx + 2 * hs; i++) {
+ if (ll == ID_WMOM) {
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (0) * (nx + 2 * hs) + i] =
+ 0.;
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (1) * (nx + 2 * hs) + i] =
+ 0.;
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (nz + hs) * (nx + 2 * hs) +
+ i] = 0.;
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (nz + hs + 1) * (nx + 2 * hs) + i] = 0.;
+ } else if (ll == ID_UMOM) {
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (0) * (nx + 2 * hs) + i] =
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (hs) * (nx + 2 * hs) +
+ i] /
+ hy_dens_cell[hs] * hy_dens_cell[0];
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (1) * (nx + 2 * hs) + i] =
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (hs) * (nx + 2 * hs) +
+ i] /
+ hy_dens_cell[hs] * hy_dens_cell[1];
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (nz + hs) * (nx + 2 * hs) +
+ i] = state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (nz + hs - 1) * (nx + 2 * hs) + i] /
+ hy_dens_cell[nz + hs - 1] * hy_dens_cell[nz + hs];
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (nz + hs + 1) * (nx + 2 * hs) + i] =
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (nz + hs - 1) * (nx + 2 * hs) + i] /
+ hy_dens_cell[nz + hs - 1] * hy_dens_cell[nz + hs + 1];
+ } else {
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (0) * (nx + 2 * hs) + i] =
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (hs) * (nx + 2 * hs) +
+ i];
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (1) * (nx + 2 * hs) + i] =
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (hs) * (nx + 2 * hs) +
+ i];
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (nz + hs) * (nx + 2 * hs) +
+ i] = state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (nz + hs - 1) * (nx + 2 * hs) + i];
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (nz + hs + 1) * (nx + 2 * hs) + i] =
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (nz + hs - 1) * (nx + 2 * hs) + i];
+ }
+ }
+ }
+}
+
+void init(int *argc, char ***argv) {
+ int i, k, ii, kk, ll, ierr, inds, i_end;
+ double x, z, r, u, w, t, hr, ht, nper;
+
+ ierr = MPI_Init(argc, argv);
+
+ ierr = MPI_Comm_size(MPI_COMM_WORLD, &nranks);
+ ierr = MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
+ nper = ((double)nx_glob) / nranks;
+ i_beg = round(nper * (myrank));
+ i_end = round(nper * ((myrank) + 1)) - 1;
+ nx = i_end - i_beg + 1;
+ left_rank = myrank - 1;
+ if (left_rank == -1)
+ left_rank = nranks - 1;
+ right_rank = myrank + 1;
+ if (right_rank == nranks)
+ right_rank = 0;
+
+ ////////////////////////////////////////////////////////////////////////////////
+ ////////////////////////////////////////////////////////////////////////////////
+ // YOU DON'T NEED TO ALTER ANYTHING BELOW THIS POINT IN THE CODE
+ ////////////////////////////////////////////////////////////////////////////////
+ ////////////////////////////////////////////////////////////////////////////////
+
+ // Vertical direction isn't MPI-ized, so the rank's local values = the global
+ // values
+ k_beg = 0;
+ nz = nz_glob;
+ mainproc = (myrank == 0);
+
+ // Allocate the model data
+ state = (double *)malloc((nx + 2 * hs) * (nz + 2 * hs) * NUM_VARS *
+ sizeof(double));
+ state_tmp = (double *)malloc((nx + 2 * hs) * (nz + 2 * hs) * NUM_VARS *
+ sizeof(double));
+ flux = (double *)malloc((nx + 1) * (nz + 1) * NUM_VARS * sizeof(double));
+ tend = (double *)malloc(nx * nz * NUM_VARS * sizeof(double));
+ hy_dens_cell = (double *)malloc((nz + 2 * hs) * sizeof(double));
+ hy_dens_theta_cell = (double *)malloc((nz + 2 * hs) * sizeof(double));
+ hy_dens_int = (double *)malloc((nz + 1) * sizeof(double));
+ hy_dens_theta_int = (double *)malloc((nz + 1) * sizeof(double));
+ hy_pressure_int = (double *)malloc((nz + 1) * sizeof(double));
+ sendbuf_l = (double *)malloc(hs * nz * NUM_VARS * sizeof(double));
+ sendbuf_r = (double *)malloc(hs * nz * NUM_VARS * sizeof(double));
+ recvbuf_l = (double *)malloc(hs * nz * NUM_VARS * sizeof(double));
+ recvbuf_r = (double *)malloc(hs * nz * NUM_VARS * sizeof(double));
+
+ // Define the maximum stable time step based on an assumed maximum wind speed
+ dt = dmin(dx, dz) / max_speed * cfl;
+ // Set initial elapsed model time and output_counter to zero
+ etime = 0.;
+ output_counter = 0.;
+
+ // If I'm the main process in MPI, display some grid information
+ if (mainproc) {
+ printf("nx_glob, nz_glob: %d %d\n", nx_glob, nz_glob);
+ printf("dx,dz: %lf %lf\n", dx, dz);
+ printf("dt: %lf\n", dt);
+ }
+ // Want to make sure this info is displayed before further output
+ ierr = MPI_Barrier(MPI_COMM_WORLD);
+
+ //////////////////////////////////////////////////////////////////////////
+ // Initialize the cell-averaged fluid state via Gauss-Legendre quadrature
+ //////////////////////////////////////////////////////////////////////////
+#pragma omp parallel for private(ll, kk, ii, inds, x, z, r, u, w, t, hr, ht) \
+ collapse(2)
+ for (k = 0; k < nz + 2 * hs; k++) {
+ for (i = 0; i < nx + 2 * hs; i++) {
+ // Initialize the state to zero
+ for (ll = 0; ll < NUM_VARS; ll++) {
+ inds = ll * (nz + 2 * hs) * (nx + 2 * hs) + k * (nx + 2 * hs) + i;
+ state[inds] = 0.;
+ }
+ // Use Gauss-Legendre quadrature to initialize a hydrostatic balance +
+ // temperature perturbation
+ for (kk = 0; kk < nqpoints; kk++) {
+ for (ii = 0; ii < nqpoints; ii++) {
+ // Compute the x,z location within the global domain based on cell and
+ // quadrature index
+ x = (i_beg + i - hs + 0.5) * dx + (qpoints[ii] - 0.5) * dx;
+ z = (k_beg + k - hs + 0.5) * dz + (qpoints[kk] - 0.5) * dz;
+
+ // Set the fluid state based on the user's specification
+ if (data_spec_int == DATA_SPEC_COLLISION) {
+ collision(x, z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_THERMAL) {
+ thermal(x, z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_GRAVITY_WAVES) {
+ gravity_waves(x, z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_DENSITY_CURRENT) {
+ density_current(x, z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_INJECTION) {
+ injection(x, z, r, u, w, t, hr, ht);
+ }
+
+ // Store into the fluid state array
+ inds =
+ ID_DENS * (nz + 2 * hs) * (nx + 2 * hs) + k * (nx + 2 * hs) + i;
+ state[inds] = state[inds] + r * qweights[ii] * qweights[kk];
+ inds =
+ ID_UMOM * (nz + 2 * hs) * (nx + 2 * hs) + k * (nx + 2 * hs) + i;
+ state[inds] =
+ state[inds] + (r + hr) * u * qweights[ii] * qweights[kk];
+ inds =
+ ID_WMOM * (nz + 2 * hs) * (nx + 2 * hs) + k * (nx + 2 * hs) + i;
+ state[inds] =
+ state[inds] + (r + hr) * w * qweights[ii] * qweights[kk];
+ inds =
+ ID_RHOT * (nz + 2 * hs) * (nx + 2 * hs) + k * (nx + 2 * hs) + i;
+ state[inds] = state[inds] + ((r + hr) * (t + ht) - hr * ht) *
+ qweights[ii] * qweights[kk];
+ }
+ }
+ for (ll = 0; ll < NUM_VARS; ll++) {
+ inds = ll * (nz + 2 * hs) * (nx + 2 * hs) + k * (nx + 2 * hs) + i;
+ state_tmp[inds] = state[inds];
+ }
+ }
+ }
+ // Compute the hydrostatic background state over vertical cell averages
+#pragma omp parallel for private(kk, z, r, u, w, t, hr, ht)
+ for (k = 0; k < nz + 2 * hs; k++) {
+ hy_dens_cell[k] = 0.;
+ hy_dens_theta_cell[k] = 0.;
+ for (kk = 0; kk < nqpoints; kk++) {
+ z = (k_beg + k - hs + 0.5) * dz;
+ // Set the fluid state based on the user's specification
+ if (data_spec_int == DATA_SPEC_COLLISION) {
+ collision(0., z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_THERMAL) {
+ thermal(0., z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_GRAVITY_WAVES) {
+ gravity_waves(0., z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_DENSITY_CURRENT) {
+ density_current(0., z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_INJECTION) {
+ injection(0., z, r, u, w, t, hr, ht);
+ }
+ hy_dens_cell[k] = hy_dens_cell[k] + hr * qweights[kk];
+ hy_dens_theta_cell[k] = hy_dens_theta_cell[k] + hr * ht * qweights[kk];
+ }
+ }
+ // Compute the hydrostatic background state at vertical cell interfaces
+#pragma omp parallel for private(z, r, u, w, t, hr, ht)
+ for (k = 0; k < nz + 1; k++) {
+ z = (k_beg + k) * dz;
+ if (data_spec_int == DATA_SPEC_COLLISION) {
+ collision(0., z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_THERMAL) {
+ thermal(0., z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_GRAVITY_WAVES) {
+ gravity_waves(0., z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_DENSITY_CURRENT) {
+ density_current(0., z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_INJECTION) {
+ injection(0., z, r, u, w, t, hr, ht);
+ }
+ hy_dens_int[k] = hr;
+ hy_dens_theta_int[k] = hr * ht;
+ hy_pressure_int[k] = C0 * pow((hr * ht), gamm);
+ }
+}
+
+// This test case is initially balanced but injects fast, cold air from the left
+// boundary near the model top x and z are input coordinates at which to sample
+// r,u,w,t are output density, u-wind, w-wind, and potential temperature at that
+// location hr and ht are output background hydrostatic density and potential
+// temperature at that location
+void injection(double x, double z, double &r, double &u, double &w, double &t,
+ double &hr, double &ht) {
+ hydro_const_theta(z, hr, ht);
+ r = 0.;
+ t = 0.;
+ u = 0.;
+ w = 0.;
+}
+
+// Initialize a density current (falling cold thermal that propagates along the
+// model bottom) x and z are input coordinates at which to sample r,u,w,t are
+// output density, u-wind, w-wind, and potential temperature at that location hr
+// and ht are output background hydrostatic density and potential temperature at
+// that location
+void density_current(double x, double z, double &r, double &u, double &w,
+ double &t, double &hr, double &ht) {
+ hydro_const_theta(z, hr, ht);
+ r = 0.;
+ t = 0.;
+ u = 0.;
+ w = 0.;
+ t = t + sample_ellipse_cosine(x, z, -20., xlen / 2, 5000., 4000., 2000.);
+}
+
+// x and z are input coordinates at which to sample
+// r,u,w,t are output density, u-wind, w-wind, and potential temperature at that
+// location hr and ht are output background hydrostatic density and potential
+// temperature at that location
+void gravity_waves(double x, double z, double &r, double &u, double &w,
+ double &t, double &hr, double &ht) {
+ hydro_const_bvfreq(z, 0.02, hr, ht);
+ r = 0.;
+ t = 0.;
+ u = 15.;
+ w = 0.;
+}
+
+// Rising thermal
+// x and z are input coordinates at which to sample
+// r,u,w,t are output density, u-wind, w-wind, and potential temperature at that
+// location hr and ht are output background hydrostatic density and potential
+// temperature at that location
+void thermal(double x, double z, double &r, double &u, double &w, double &t,
+ double &hr, double &ht) {
+ hydro_const_theta(z, hr, ht);
+ r = 0.;
+ t = 0.;
+ u = 0.;
+ w = 0.;
+ t = t + sample_ellipse_cosine(x, z, 3., xlen / 2, 2000., 2000., 2000.);
+}
+
+// Colliding thermals
+// x and z are input coordinates at which to sample
+// r,u,w,t are output density, u-wind, w-wind, and potential temperature at that
+// location hr and ht are output background hydrostatic density and potential
+// temperature at that location
+void collision(double x, double z, double &r, double &u, double &w, double &t,
+ double &hr, double &ht) {
+ hydro_const_theta(z, hr, ht);
+ r = 0.;
+ t = 0.;
+ u = 0.;
+ w = 0.;
+ t = t + sample_ellipse_cosine(x, z, 20., xlen / 2, 2000., 2000., 2000.);
+ t = t + sample_ellipse_cosine(x, z, -20., xlen / 2, 8000., 2000., 2000.);
+}
+
+// Establish hydrostatic balance using constant potential temperature (thermally
+// neutral atmosphere) z is the input coordinate r and t are the output
+// background hydrostatic density and potential temperature
+void hydro_const_theta(double z, double &r, double &t) {
+ const double theta0 = 300.; // Background potential temperature
+ const double exner0 = 1.; // Surface-level Exner pressure
+ double p, exner, rt;
+ // Establish hydrostatic balance first using Exner pressure
+ t = theta0; // Potential Temperature at z
+ exner = exner0 - grav * z / (cp * theta0); // Exner pressure at z
+ p = p0 * pow(exner, (cp / rd)); // Pressure at z
+ rt = pow((p / C0), (1. / gamm)); // rho*theta at z
+ r = rt / t; // Density at z
+}
+
+// Establish hydrostatic balance using constant Brunt-Vaisala frequency
+// z is the input coordinate
+// bv_freq0 is the constant Brunt-Vaisala frequency
+// r and t are the output background hydrostatic density and potential
+// temperature
+void hydro_const_bvfreq(double z, double bv_freq0, double &r, double &t) {
+ const double theta0 = 300.; // Background potential temperature
+ const double exner0 = 1.; // Surface-level Exner pressure
+ double p, exner, rt;
+ t = theta0 * exp(bv_freq0 * bv_freq0 / grav * z); // Pot temp at z
+ exner = exner0 - grav * grav / (cp * bv_freq0 * bv_freq0) * (t - theta0) /
+ (t * theta0); // Exner pressure at z
+ p = p0 * pow(exner, (cp / rd)); // Pressure at z
+ rt = pow((p / C0), (1. / gamm)); // rho*theta at z
+ r = rt / t; // Density at z
+}
+
+// Sample from an ellipse of a specified center, radius, and amplitude at a
+// specified location x and z are input coordinates amp,x0,z0,xrad,zrad are
+// input amplitude, center, and radius of the ellipse
+double sample_ellipse_cosine(double x, double z, double amp, double x0,
+ double z0, double xrad, double zrad) {
+ double dist;
+ // Compute distance from bubble center
+ dist = sqrt(((x - x0) / xrad) * ((x - x0) / xrad) +
+ ((z - z0) / zrad) * ((z - z0) / zrad)) *
+ pi / 2.;
+ // If the distance from bubble center is less than the radius, create a cos**2
+ // profile
+ if (dist <= pi / 2.) {
+ return amp * pow(cos(dist), 2.);
+ } else {
+ return 0.;
+ }
+}
+
+// Output the fluid state (state) to a NetCDF file at a given elapsed model time
+// (etime) The file I/O uses parallel-netcdf, the only external library required
+// for this mini-app. If it's too cumbersome, you can comment the I/O out, but
+// you'll miss out on some potentially cool graphics
+void output(double *state, double etime) {
+ int ncid, t_dimid, x_dimid, z_dimid, dens_varid, uwnd_varid, wwnd_varid,
+ theta_varid, t_varid, dimids[3];
+ int i, k, ind_r, ind_u, ind_w, ind_t;
+ MPI_Offset st1[1], ct1[1], st3[3], ct3[3];
+ // Temporary arrays to hold density, u-wind, w-wind, and potential temperature
+ // (theta)
+ double *dens, *uwnd, *wwnd, *theta;
+ double *etimearr;
+ // Inform the user
+ if (mainproc) {
+ printf("*** OUTPUT ***\n");
+ }
+ // Allocate some (big) temp arrays
+ dens = (double *)malloc(nx * nz * sizeof(double));
+ uwnd = (double *)malloc(nx * nz * sizeof(double));
+ wwnd = (double *)malloc(nx * nz * sizeof(double));
+ theta = (double *)malloc(nx * nz * sizeof(double));
+ etimearr = (double *)malloc(1 * sizeof(double));
+
+ // If the elapsed time is zero, create the file. Otherwise, open the file
+ if (etime == 0) {
+ // Create the file
+ ncwrap(ncmpi_create(MPI_COMM_WORLD, "output.nc", NC_CLOBBER, MPI_INFO_NULL,
+ &ncid),
+ __LINE__);
+ // Create the dimensions
+ ncwrap(ncmpi_def_dim(ncid, "t", (MPI_Offset)NC_UNLIMITED, &t_dimid),
+ __LINE__);
+ ncwrap(ncmpi_def_dim(ncid, "x", (MPI_Offset)nx_glob, &x_dimid), __LINE__);
+ ncwrap(ncmpi_def_dim(ncid, "z", (MPI_Offset)nz_glob, &z_dimid), __LINE__);
+ // Create the variables
+ dimids[0] = t_dimid;
+ ncwrap(ncmpi_def_var(ncid, "t", NC_DOUBLE, 1, dimids, &t_varid), __LINE__);
+ dimids[0] = t_dimid;
+ dimids[1] = z_dimid;
+ dimids[2] = x_dimid;
+ ncwrap(ncmpi_def_var(ncid, "dens", NC_DOUBLE, 3, dimids, &dens_varid),
+ __LINE__);
+ ncwrap(ncmpi_def_var(ncid, "uwnd", NC_DOUBLE, 3, dimids, &uwnd_varid),
+ __LINE__);
+ ncwrap(ncmpi_def_var(ncid, "wwnd", NC_DOUBLE, 3, dimids, &wwnd_varid),
+ __LINE__);
+ ncwrap(ncmpi_def_var(ncid, "theta", NC_DOUBLE, 3, dimids, &theta_varid),
+ __LINE__);
+ // End "define" mode
+ ncwrap(ncmpi_enddef(ncid), __LINE__);
+ } else {
+ // Open the file
+ ncwrap(
+ ncmpi_open(MPI_COMM_WORLD, "output.nc", NC_WRITE, MPI_INFO_NULL, &ncid),
+ __LINE__);
+ // Get the variable IDs
+ ncwrap(ncmpi_inq_varid(ncid, "dens", &dens_varid), __LINE__);
+ ncwrap(ncmpi_inq_varid(ncid, "uwnd", &uwnd_varid), __LINE__);
+ ncwrap(ncmpi_inq_varid(ncid, "wwnd", &wwnd_varid), __LINE__);
+ ncwrap(ncmpi_inq_varid(ncid, "theta", &theta_varid), __LINE__);
+ ncwrap(ncmpi_inq_varid(ncid, "t", &t_varid), __LINE__);
+ }
+
+ // Store perturbed values in the temp arrays for output
+#pragma omp parallel for private(ind_r, ind_u, ind_w, ind_t) collapse(2)
+ for (k = 0; k < nz; k++) {
+ for (i = 0; i < nx; i++) {
+ ind_r = ID_DENS * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i + hs;
+ ind_u = ID_UMOM * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i + hs;
+ ind_w = ID_WMOM * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i + hs;
+ ind_t = ID_RHOT * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i + hs;
+ dens[k * nx + i] = state[ind_r];
+ uwnd[k * nx + i] = state[ind_u] / (hy_dens_cell[k + hs] + state[ind_r]);
+ wwnd[k * nx + i] = state[ind_w] / (hy_dens_cell[k + hs] + state[ind_r]);
+ theta[k * nx + i] = (state[ind_t] + hy_dens_theta_cell[k + hs]) /
+ (hy_dens_cell[k + hs] + state[ind_r]) -
+ hy_dens_theta_cell[k + hs] / hy_dens_cell[k + hs];
+ }
+ }
+
+ // Write the grid data to file with all the processes writing collectively
+ st3[0] = num_out;
+ st3[1] = k_beg;
+ st3[2] = i_beg;
+ ct3[0] = 1;
+ ct3[1] = nz;
+ ct3[2] = nx;
+ ncwrap(ncmpi_put_vara_double_all(ncid, dens_varid, st3, ct3, dens), __LINE__);
+ ncwrap(ncmpi_put_vara_double_all(ncid, uwnd_varid, st3, ct3, uwnd), __LINE__);
+ ncwrap(ncmpi_put_vara_double_all(ncid, wwnd_varid, st3, ct3, wwnd), __LINE__);
+ ncwrap(ncmpi_put_vara_double_all(ncid, theta_varid, st3, ct3, theta),
+ __LINE__);
+
+ // Only the main process needs to write the elapsed time
+ // Begin "independent" write mode
+ ncwrap(ncmpi_begin_indep_data(ncid), __LINE__);
+ // write elapsed time to file
+ if (mainproc) {
+ st1[0] = num_out;
+ ct1[0] = 1;
+ etimearr[0] = etime;
+ ncwrap(ncmpi_put_vara_double(ncid, t_varid, st1, ct1, etimearr), __LINE__);
+ }
+ // End "independent" write mode
+ ncwrap(ncmpi_end_indep_data(ncid), __LINE__);
+
+ // Close the file
+ ncwrap(ncmpi_close(ncid), __LINE__);
+
+ // Increment the number of outputs
+ num_out = num_out + 1;
+
+ // Deallocate the temp arrays
+ free(dens);
+ free(uwnd);
+ free(wwnd);
+ free(theta);
+ free(etimearr);
+}
+
+// Error reporting routine for the PNetCDF I/O
+void ncwrap(int ierr, int line) {
+ if (ierr != NC_NOERR) {
+ printf("NetCDF Error at line: %d\n", line);
+ printf("%s\n", ncmpi_strerror(ierr));
+ exit(-1);
+ }
+}
+
+void finalize() {
+ int ierr;
+ free(state);
+ free(state_tmp);
+ free(flux);
+ free(tend);
+ free(hy_dens_cell);
+ free(hy_dens_theta_cell);
+ free(hy_dens_int);
+ free(hy_dens_theta_int);
+ free(hy_pressure_int);
+ free(sendbuf_l);
+ free(sendbuf_r);
+ free(recvbuf_l);
+ free(recvbuf_r);
+ ierr = MPI_Finalize();
+}
+
+// Compute reduced quantities for error checking without resorting to the
+// "ncdiff" tool
+void reductions(double &mass, double &te) {
+ double mass_loc = 0;
+ double te_loc = 0;
+#pragma omp parallel for reduction(+ : mass_loc, te_loc)
+ for (int k = 0; k < nz; k++) {
+ for (int i = 0; i < nx; i++) {
+ int ind_r = ID_DENS * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i + hs;
+ int ind_u = ID_UMOM * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i + hs;
+ int ind_w = ID_WMOM * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i + hs;
+ int ind_t = ID_RHOT * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i + hs;
+ double r = state[ind_r] + hy_dens_cell[hs + k]; // Density
+ double u = state[ind_u] / r; // U-wind
+ double w = state[ind_w] / r; // W-wind
+ double th = (state[ind_t] + hy_dens_theta_cell[hs + k]) /
+ r; // Potential Temperature (theta)
+ double p = C0 * pow(r * th, gamm); // Pressure
+ double t = th / pow(p0 / p, rd / cp); // Temperature
+ double ke = r * (u * u + w * w); // Kinetic Energy
+ double ie = r * cv * t; // Internal Energy
+ mass_loc += r * dx * dz; // Accumulate domain mass
+ te_loc += (ke + ie) * dx * dz; // Accumulate domain total energy
+ }
+ }
+ double glob[2], loc[2];
+ loc[0] = mass_loc;
+ loc[1] = te_loc;
+ int ierr = MPI_Allreduce(loc, glob, 2, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+ mass = glob[0];
+ te = glob[1];
+}
diff --git a/cpp/miniWeather_mpi_openmp45.cpp b/cpp/miniWeather_mpi_openmp45.cpp
new file mode 100644
index 00000000..20b219a3
--- /dev/null
+++ b/cpp/miniWeather_mpi_openmp45.cpp
@@ -0,0 +1,1153 @@
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// miniWeather
+// Author: Matt Norman , Oak Ridge National Laboratory
+// This code simulates dry, stratified, compressible, non-hydrostatic fluid
+// flows For documentation, please see the attached documentation in the
+// "documentation" folder
+//
+//////////////////////////////////////////////////////////////////////////////////////////
+
+#include "pnetcdf.h"
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+constexpr double pi = 3.14159265358979323846264338327; // Pi
+constexpr double grav = 9.8; // Gravitational acceleration (m / s^2)
+constexpr double cp = 1004.; // Specific heat of dry air at constant pressure
+constexpr double cv = 717.; // Specific heat of dry air at constant volume
+constexpr double rd =
+ 287.; // Dry air constant for equation of state (P=rho*rd*T)
+constexpr double p0 = 1.e5; // Standard pressure at the surface in Pascals
+constexpr double C0 =
+ 27.5629410929725921310572974482; // Constant to translate potential
+ // temperature into pressure
+ // (P=C0*(rho*theta)**gamma)
+constexpr double gamm =
+ 1.40027894002789400278940027894; // gamma=cp/Rd , have to call this gamm
+ // because "gamma" is taken (I hate C so
+ // much)
+
+// Define domain and stability-related constants
+constexpr double xlen = 2.e4; // Length of the domain in the x-direction
+ // (meters)
+constexpr double zlen = 1.e4; // Length of the domain in the z-direction
+ // (meters)
+constexpr double hv_beta =
+ 0.05; // How strong to diffuse the solution: hv_beta \in [0:1]
+constexpr double cfl =
+ 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability)
+constexpr double max_speed =
+ 450; // Assumed maximum wave speed during the simulation (speed of sound +
+ // speed of wind) (meter / sec)
+constexpr int hs =
+ 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a
+ // full "stencil" of information for reconstruction
+constexpr int sten_size = 4; // Size of the stencil used for interpolation
+
+// Parameters for indexing and flags
+constexpr int NUM_VARS = 4; // Number of fluid state variables
+constexpr int ID_DENS = 0; // index for density ("rho")
+constexpr int ID_UMOM = 1; // index for momentum in the x-direction ("rho * u")
+constexpr int ID_WMOM = 2; // index for momentum in the z-direction ("rho * w")
+constexpr int ID_RHOT =
+ 3; // index for density * potential temperature ("rho * theta")
+constexpr int DIR_X =
+ 1; // Integer constant to express that this operation is in the x-direction
+constexpr int DIR_Z =
+ 2; // Integer constant to express that this operation is in the z-direction
+constexpr int DATA_SPEC_COLLISION = 1;
+constexpr int DATA_SPEC_THERMAL = 2;
+constexpr int DATA_SPEC_GRAVITY_WAVES = 3;
+constexpr int DATA_SPEC_DENSITY_CURRENT = 5;
+constexpr int DATA_SPEC_INJECTION = 6;
+
+constexpr int nqpoints = 3;
+constexpr double qpoints[] = {0.112701665379258311482073460022E0,
+ 0.500000000000000000000000000000E0,
+ 0.887298334620741688517926539980E0};
+constexpr double qweights[] = {0.277777777777777777777777777779E0,
+ 0.444444444444444444444444444444E0,
+ 0.277777777777777777777777777779E0};
+
+int asyncid = 1;
+
+///////////////////////////////////////////////////////////////////////////////////////
+// BEGIN USER-CONFIGURABLE PARAMETERS
+///////////////////////////////////////////////////////////////////////////////////////
+// The x-direction length is twice as long as the z-direction length
+// So, you'll want to have nx_glob be twice as large as nz_glob
+int constexpr nx_glob = _NX; // Number of total cells in the x-direction
+int constexpr nz_glob = _NZ; // Number of total cells in the z-direction
+double constexpr sim_time = _SIM_TIME; // How many seconds to run the simulation
+double constexpr output_freq =
+ _OUT_FREQ; // How frequently to output data to file (in seconds)
+int constexpr data_spec_int = _DATA_SPEC; // How to initialize the data
+double constexpr dx = xlen / nx_glob; // grid spacing in the x-direction
+double constexpr dz = zlen / nz_glob; // grid spacing in the x-direction
+///////////////////////////////////////////////////////////////////////////////////////
+// END USER-CONFIGURABLE PARAMETERS
+///////////////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////////////
+// Variables that are initialized but remain static over the course of the
+// simulation
+///////////////////////////////////////////////////////////////////////////////////////
+double dt; // Model time step (seconds)
+int nx, nz; // Number of local grid cells in the x- and z- dimensions for this
+ // MPI task
+int i_beg, k_beg; // beginning index in the x- and z-directions for this MPI
+ // task
+int nranks, myrank; // Number of MPI ranks and my rank id
+int left_rank, right_rank; // MPI Rank IDs that exist to my left and right in
+ // the global domain
+int mainproc; // Am I the main process (rank == 0)?
+double *hy_dens_cell; // hydrostatic density (vert cell avgs). Dimensions:
+ // (1-hs:nz+hs)
+double *hy_dens_theta_cell; // hydrostatic rho*t (vert cell avgs). Dimensions:
+ // (1-hs:nz+hs)
+double *
+ hy_dens_int; // hydrostatic density (vert cell interf). Dimensions: (1:nz+1)
+double *hy_dens_theta_int; // hydrostatic rho*t (vert cell interf). Dimensions:
+ // (1:nz+1)
+double *hy_pressure_int; // hydrostatic press (vert cell interf). Dimensions:
+ // (1:nz+1)
+
+///////////////////////////////////////////////////////////////////////////////////////
+// Variables that are dynamics over the course of the simulation
+///////////////////////////////////////////////////////////////////////////////////////
+double etime; // Elapsed model time
+double output_counter; // Helps determine when it's time to do output
+// Runtime variable arrays
+double *state; // Fluid state. Dimensions:
+ // (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS)
+double *state_tmp; // Fluid state. Dimensions:
+ // (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS)
+double *flux; // Cell interface fluxes. Dimensions: (nx+1,nz+1,NUM_VARS)
+double *tend; // Fluid state tendencies. Dimensions: (nx,nz,NUM_VARS)
+double *sendbuf_l; // Buffer to send data to the left MPI rank
+double *sendbuf_r; // Buffer to send data to the right MPI rank
+double *recvbuf_l; // Buffer to receive data from the left MPI rank
+double *recvbuf_r; // Buffer to receive data from the right MPI rank
+int num_out = 0; // The number of outputs performed so far
+int direction_switch = 1;
+double mass0, te0; // Initial domain totals for mass and total energy
+double mass, te; // Domain totals for mass and total energy
+
+// How is this not in the standard?!
+double dmin(double a, double b) {
+ if (a < b) {
+ return a;
+ } else {
+ return b;
+ }
+};
+
+// Declaring the functions defined after "main"
+void init(int *argc, char ***argv);
+void finalize();
+void injection(double x, double z, double &r, double &u, double &w, double &t,
+ double &hr, double &ht);
+void density_current(double x, double z, double &r, double &u, double &w,
+ double &t, double &hr, double &ht);
+void gravity_waves(double x, double z, double &r, double &u, double &w,
+ double &t, double &hr, double &ht);
+void thermal(double x, double z, double &r, double &u, double &w, double &t,
+ double &hr, double &ht);
+void collision(double x, double z, double &r, double &u, double &w, double &t,
+ double &hr, double &ht);
+void hydro_const_theta(double z, double &r, double &t);
+void hydro_const_bvfreq(double z, double bv_freq0, double &r, double &t);
+double sample_ellipse_cosine(double x, double z, double amp, double x0,
+ double z0, double xrad, double zrad);
+void output(double *state, double etime);
+void ncwrap(int ierr, int line);
+void perform_timestep(double *state, double *state_tmp, double *flux,
+ double *tend, double dt);
+void semi_discrete_step(double *state_init, double *state_forcing,
+ double *state_out, double dt, int dir, double *flux,
+ double *tend);
+void compute_tendencies_x(double *state, double *flux, double *tend, double dt);
+void compute_tendencies_z(double *state, double *flux, double *tend, double dt);
+void set_halo_values_x(double *state);
+void set_halo_values_z(double *state);
+void reductions(double &mass, double &te);
+
+///////////////////////////////////////////////////////////////////////////////////////
+// THE MAIN PROGRAM STARTS HERE
+///////////////////////////////////////////////////////////////////////////////////////
+int main(int argc, char **argv) {
+
+ init(&argc, &argv);
+
+#pragma omp target data map( \
+ to : state_tmp[ : (nz + 2 * hs) * (nx + 2 * hs) * NUM_VARS], \
+ hy_dens_cell[ : nz + 2 * hs], hy_dens_theta_cell[ : nz + 2 * hs], \
+ hy_dens_int[ : nz + 1], hy_dens_theta_int[ : nz + 1], \
+ hy_pressure_int[ : nz + 1]) \
+ map(alloc : flux[ : (nz + 1) * (nx + 1) * NUM_VARS], \
+ tend[ : nz * nx * NUM_VARS], sendbuf_l[ : hs * nz * NUM_VARS], \
+ sendbuf_r[ : hs * nz * NUM_VARS], \
+ recvbuf_l[ : hs * nz * NUM_VARS], \
+ recvbuf_r[ : hs * nz * NUM_VARS]) \
+ map(tofrom : state[ : (nz + 2 * hs) * (nx + 2 * hs) * NUM_VARS])
+ {
+
+ // Initial reductions for mass, kinetic energy, and total energy
+ reductions(mass0, te0);
+
+ // Output the initial state
+ if (output_freq >= 0)
+ output(state, etime);
+
+ ////////////////////////////////////////////////////
+ // MAIN TIME STEP LOOP
+ ////////////////////////////////////////////////////
+#pragma omp taskwait
+ auto t1 = std::chrono::steady_clock::now();
+ while (etime < sim_time) {
+ // If the time step leads to exceeding the simulation time, shorten it for
+ // the last step
+ if (etime + dt > sim_time) {
+ dt = sim_time - etime;
+ }
+ // Perform a single time step
+ perform_timestep(state, state_tmp, flux, tend, dt);
+ // Inform the user
+#ifndef NO_INFORM
+ if (mainproc) {
+ printf("Elapsed Time: %lf / %lf\n", etime, sim_time);
+ }
+#endif
+ // Update the elapsed time and output counter
+ etime = etime + dt;
+ output_counter = output_counter + dt;
+ // If it's time for output, reset the counter, and do output
+ if (output_freq >= 0 && output_counter >= output_freq) {
+ output_counter = output_counter - output_freq;
+ output(state, etime);
+ }
+ }
+#pragma omp taskwait
+ auto t2 = std::chrono::steady_clock::now();
+ if (mainproc) {
+ std::cout << "CPU Time: "
+ << std::chrono::duration(t2 - t1).count() << " sec\n";
+ }
+
+ // Final reductions for mass, kinetic energy, and total energy
+ reductions(mass, te);
+ }
+
+ if (mainproc) {
+ printf("d_mass: %le\n", (mass - mass0) / mass0);
+ printf("d_te: %le\n", (te - te0) / te0);
+ }
+
+ finalize();
+}
+
+// Performs a single dimensionally split time step using a simple low-storage
+// three-stage Runge-Kutta time integrator The dimensional splitting is a
+// second-order-accurate alternating Strang splitting in which the order of
+// directions is alternated each time step. The Runge-Kutta method used here is
+// defined as follows:
+// q* = q[n] + dt/3 * rhs(q[n])
+// q** = q[n] + dt/2 * rhs(q* )
+// q[n+1] = q[n] + dt/1 * rhs(q** )
+void perform_timestep(double *state, double *state_tmp, double *flux,
+ double *tend, double dt) {
+ if (direction_switch) {
+ // x-direction first
+ semi_discrete_step(state, state, state_tmp, dt / 3, DIR_X, flux, tend);
+ semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_X, flux, tend);
+ semi_discrete_step(state, state_tmp, state, dt / 1, DIR_X, flux, tend);
+ // z-direction second
+ semi_discrete_step(state, state, state_tmp, dt / 3, DIR_Z, flux, tend);
+ semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_Z, flux, tend);
+ semi_discrete_step(state, state_tmp, state, dt / 1, DIR_Z, flux, tend);
+ } else {
+ // z-direction second
+ semi_discrete_step(state, state, state_tmp, dt / 3, DIR_Z, flux, tend);
+ semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_Z, flux, tend);
+ semi_discrete_step(state, state_tmp, state, dt / 1, DIR_Z, flux, tend);
+ // x-direction first
+ semi_discrete_step(state, state, state_tmp, dt / 3, DIR_X, flux, tend);
+ semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_X, flux, tend);
+ semi_discrete_step(state, state_tmp, state, dt / 1, DIR_X, flux, tend);
+ }
+ if (direction_switch) {
+ direction_switch = 0;
+ } else {
+ direction_switch = 1;
+ }
+}
+
+// Perform a single semi-discretized step in time with the form:
+// state_out = state_init + dt * rhs(state_forcing)
+// Meaning the step starts from state_init, computes the rhs using
+// state_forcing, and stores the result in state_out
+void semi_discrete_step(double *state_init, double *state_forcing,
+ double *state_out, double dt, int dir, double *flux,
+ double *tend) {
+ if (dir == DIR_X) {
+ // Set the halo values for this MPI task's fluid state in the x-direction
+ set_halo_values_x(state_forcing);
+ // Compute the time tendencies for the fluid state in the x-direction
+ compute_tendencies_x(state_forcing, flux, tend, dt);
+ } else if (dir == DIR_Z) {
+ // Set the halo values for this MPI task's fluid state in the z-direction
+ set_halo_values_z(state_forcing);
+ // Compute the time tendencies for the fluid state in the z-direction
+ compute_tendencies_z(state_forcing, flux, tend, dt);
+ }
+
+ // Apply the tendencies to the fluid state
+#pragma omp target teams distribute parallel for simd collapse(3) \
+ depend(inout : asyncid) nowait
+ for (int ll = 0; ll < NUM_VARS; ll++) {
+ for (int k = 0; k < nz; k++) {
+ for (int i = 0; i < nx; i++) {
+ if (data_spec_int == DATA_SPEC_GRAVITY_WAVES) {
+ const double x = (i_beg + i + 0.5) * dx;
+ const double z = (k_beg + k + 0.5) * dz;
+ double wpert;
+ // Using sample_ellipse_cosine requires "acc routine" in OpenACC and
+ // "declare target" in OpenMP offload Neither of these are
+ // particularly well supported. So I'm manually inlining here wpert =
+ // sample_ellipse_cosine( x,z , 0.01 , xlen/8,1000., 500.,500. );
+ {
+ const double x0 = xlen / 8;
+ const double z0 = 1000;
+ const double xrad = 500;
+ const double zrad = 500;
+ const double amp = 0.01;
+ // Compute distance from bubble center
+ const double dist = sqrt(((x - x0) / xrad) * ((x - x0) / xrad) +
+ ((z - z0) / zrad) * ((z - z0) / zrad)) *
+ pi / 2.;
+ // If the distance from bubble center is less than the radius,
+ // create a cos**2 profile
+ if (dist <= pi / 2.) {
+ wpert = amp * pow(cos(dist), 2.);
+ } else {
+ wpert = 0.;
+ }
+ }
+ const int indw = ID_WMOM * nz * nx + k * nx + i;
+ tend[indw] += wpert * hy_dens_cell[hs + k];
+ }
+ const int inds = ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i + hs;
+ const int indt = ll * nz * nx + k * nx + i;
+ state_out[inds] = state_init[inds] + dt * tend[indt];
+ }
+ }
+ }
+}
+
+// Compute the time tendencies of the fluid state using forcing in the
+// x-direction Since the halos are set in a separate routine, this will not
+// require MPI First, compute the flux vector at each cell interface in the
+// x-direction (including hyperviscosity) Then, compute the tendencies using
+// those fluxes
+void compute_tendencies_x(double *state, double *flux, double *tend,
+ double dt) {
+ double stencil[4], d3_vals[NUM_VARS], vals[NUM_VARS];
+ // Compute the hyperviscosity coefficient
+ const double hv_coef = -hv_beta * dx / (16 * dt);
+ // Compute fluxes in the x-direction for each cell
+#pragma omp target teams distribute parallel for simd collapse(2) private( \
+ stencil, vals, d3_vals) depend(inout : asyncid) nowait
+ for (int k = 0; k < nz; k++) {
+ for (int i = 0; i < nx + 1; i++) {
+ // Use fourth-order interpolation from four cell averages to compute the
+ // value at the interface in question
+ for (int ll = 0; ll < NUM_VARS; ll++) {
+ for (int s = 0; s < sten_size; s++) {
+ const int inds = ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i + s;
+ stencil[s] = state[inds];
+ }
+ // Fourth-order-accurate interpolation of the state
+ vals[ll] = -stencil[0] / 12 + 7 * stencil[1] / 12 +
+ 7 * stencil[2] / 12 - stencil[3] / 12;
+ // First-order-accurate interpolation of the third spatial derivative of
+ // the state (for artificial viscosity)
+ d3_vals[ll] =
+ -stencil[0] + 3 * stencil[1] - 3 * stencil[2] + stencil[3];
+ }
+
+ // Compute density, u-wind, w-wind, potential temperature, and pressure
+ // (r,u,w,t,p respectively)
+ const double r = vals[ID_DENS] + hy_dens_cell[k + hs];
+ const double u = vals[ID_UMOM] / r;
+ const double w = vals[ID_WMOM] / r;
+ const double t = (vals[ID_RHOT] + hy_dens_theta_cell[k + hs]) / r;
+ const double p = C0 * pow((r * t), gamm);
+
+ // Compute the flux vector
+ flux[ID_DENS * (nz + 1) * (nx + 1) + k * (nx + 1) + i] =
+ r * u - hv_coef * d3_vals[ID_DENS];
+ flux[ID_UMOM * (nz + 1) * (nx + 1) + k * (nx + 1) + i] =
+ r * u * u + p - hv_coef * d3_vals[ID_UMOM];
+ flux[ID_WMOM * (nz + 1) * (nx + 1) + k * (nx + 1) + i] =
+ r * u * w - hv_coef * d3_vals[ID_WMOM];
+ flux[ID_RHOT * (nz + 1) * (nx + 1) + k * (nx + 1) + i] =
+ r * u * t - hv_coef * d3_vals[ID_RHOT];
+ }
+ }
+
+ // Use the fluxes to compute tendencies for each cell
+#pragma omp target teams distribute parallel for simd collapse(3) \
+ depend(inout : asyncid) nowait
+ for (int ll = 0; ll < NUM_VARS; ll++) {
+ for (int k = 0; k < nz; k++) {
+ for (int i = 0; i < nx; i++) {
+ const int indt = ll * nz * nx + k * nx + i;
+ const int indf1 = ll * (nz + 1) * (nx + 1) + k * (nx + 1) + i;
+ const int indf2 = ll * (nz + 1) * (nx + 1) + k * (nx + 1) + i + 1;
+ tend[indt] = -(flux[indf2] - flux[indf1]) / dx;
+ }
+ }
+ }
+}
+
+// Compute the time tendencies of the fluid state using forcing in the
+// z-direction Since the halos are set in a separate routine, this will not
+// require MPI First, compute the flux vector at each cell interface in the
+// z-direction (including hyperviscosity) Then, compute the tendencies using
+// those fluxes
+void compute_tendencies_z(double *state, double *flux, double *tend,
+ double dt) {
+ double stencil[4], d3_vals[NUM_VARS], vals[NUM_VARS];
+ // Compute the hyperviscosity coefficient
+ const double hv_coef = -hv_beta * dz / (16 * dt);
+ // Compute fluxes in the x-direction for each cell
+#pragma omp target teams distribute parallel for simd collapse(2) private( \
+ stencil, vals, d3_vals) depend(inout : asyncid) nowait
+ for (int k = 0; k < nz + 1; k++) {
+ for (int i = 0; i < nx; i++) {
+ // Use fourth-order interpolation from four cell averages to compute the
+ // value at the interface in question
+ for (int ll = 0; ll < NUM_VARS; ll++) {
+ for (int s = 0; s < sten_size; s++) {
+ const int inds = ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + s) * (nx + 2 * hs) + i + hs;
+ stencil[s] = state[inds];
+ }
+ // Fourth-order-accurate interpolation of the state
+ vals[ll] = -stencil[0] / 12 + 7 * stencil[1] / 12 +
+ 7 * stencil[2] / 12 - stencil[3] / 12;
+ // First-order-accurate interpolation of the third spatial derivative of
+ // the state
+ d3_vals[ll] =
+ -stencil[0] + 3 * stencil[1] - 3 * stencil[2] + stencil[3];
+ }
+
+ // Compute density, u-wind, w-wind, potential temperature, and pressure
+ // (r,u,w,t,p respectively)
+ const double r = vals[ID_DENS] + hy_dens_int[k];
+ const double u = vals[ID_UMOM] / r;
+ double w = vals[ID_WMOM] / r;
+ const double t = (vals[ID_RHOT] + hy_dens_theta_int[k]) / r;
+ const double p = C0 * pow((r * t), gamm) - hy_pressure_int[k];
+ // Enforce vertical boundary condition and exact mass conservation
+ if (k == 0 || k == nz) {
+ w = 0;
+ d3_vals[ID_DENS] = 0;
+ }
+
+ // Compute the flux vector with hyperviscosity
+ flux[ID_DENS * (nz + 1) * (nx + 1) + k * (nx + 1) + i] =
+ r * w - hv_coef * d3_vals[ID_DENS];
+ flux[ID_UMOM * (nz + 1) * (nx + 1) + k * (nx + 1) + i] =
+ r * w * u - hv_coef * d3_vals[ID_UMOM];
+ flux[ID_WMOM * (nz + 1) * (nx + 1) + k * (nx + 1) + i] =
+ r * w * w + p - hv_coef * d3_vals[ID_WMOM];
+ flux[ID_RHOT * (nz + 1) * (nx + 1) + k * (nx + 1) + i] =
+ r * w * t - hv_coef * d3_vals[ID_RHOT];
+ }
+ }
+
+ // Use the fluxes to compute tendencies for each cell
+#pragma omp target teams distribute parallel for simd collapse(3) \
+ depend(inout : asyncid) nowait
+ for (int ll = 0; ll < NUM_VARS; ll++) {
+ for (int k = 0; k < nz; k++) {
+ for (int i = 0; i < nx; i++) {
+ const int indt = ll * nz * nx + k * nx + i;
+ const int indf1 = ll * (nz + 1) * (nx + 1) + (k) * (nx + 1) + i;
+ const int indf2 = ll * (nz + 1) * (nx + 1) + (k + 1) * (nx + 1) + i;
+ tend[indt] = -(flux[indf2] - flux[indf1]) / dz;
+ if (ll == ID_WMOM) {
+ const int inds = ID_DENS * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i + hs;
+ tend[indt] = tend[indt] - state[inds] * grav;
+ }
+ }
+ }
+ }
+}
+
+// Set this MPI task's halo values in the x-direction. This routine will require
+// MPI
+void set_halo_values_x(double *state) {
+ int ierr;
+
+ if (nranks == 1) {
+
+#pragma omp target teams distribute parallel for simd collapse(2) \
+ depend(inout : asyncid) nowait
+ for (int ll = 0; ll < NUM_VARS; ll++) {
+ for (int k = 0; k < nz; k++) {
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) +
+ 0] = state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + nx + hs - 2];
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) +
+ 1] = state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + nx + hs - 1];
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) +
+ nx + hs] = state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + hs];
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) +
+ nx + hs + 1] = state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + hs + 1];
+ }
+ }
+
+ } else {
+
+ MPI_Request req_r[2], req_s[2];
+
+ // Prepost receives
+ ierr = MPI_Irecv(recvbuf_l, hs * nz * NUM_VARS, MPI_DOUBLE, left_rank, 0,
+ MPI_COMM_WORLD, &req_r[0]);
+ ierr = MPI_Irecv(recvbuf_r, hs * nz * NUM_VARS, MPI_DOUBLE, right_rank, 1,
+ MPI_COMM_WORLD, &req_r[1]);
+
+ // Pack the send buffers
+#pragma omp target teams distribute parallel for simd collapse(3) \
+ depend(inout : asyncid) nowait
+ for (int ll = 0; ll < NUM_VARS; ll++) {
+ for (int k = 0; k < nz; k++) {
+ for (int s = 0; s < hs; s++) {
+ sendbuf_l[ll * nz * hs + k * hs + s] =
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + hs + s];
+ sendbuf_r[ll * nz * hs + k * hs + s] =
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + nx + s];
+ }
+ }
+ }
+
+#pragma omp target update from(sendbuf_l[ : nz * hs * NUM_VARS], \
+ sendbuf_r[ : nz * hs * NUM_VARS]) \
+ depend(inout : asyncid) nowait
+#pragma omp taskwait
+
+ // Fire off the sends
+ ierr = MPI_Isend(sendbuf_l, hs * nz * NUM_VARS, MPI_DOUBLE, left_rank, 1,
+ MPI_COMM_WORLD, &req_s[0]);
+ ierr = MPI_Isend(sendbuf_r, hs * nz * NUM_VARS, MPI_DOUBLE, right_rank, 0,
+ MPI_COMM_WORLD, &req_s[1]);
+
+ // Wait for receives to finish
+ ierr = MPI_Waitall(2, req_r, MPI_STATUSES_IGNORE);
+
+#pragma omp target update to(recvbuf_l[ : nz * hs * NUM_VARS], \
+ recvbuf_r[ : nz * hs * NUM_VARS]) \
+ depend(inout : asyncid) nowait
+
+ // Unpack the receive buffers
+#pragma omp target teams distribute parallel for simd collapse(3) \
+ depend(inout : asyncid) nowait
+ for (int ll = 0; ll < NUM_VARS; ll++) {
+ for (int k = 0; k < nz; k++) {
+ for (int s = 0; s < hs; s++) {
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) +
+ s] = recvbuf_l[ll * nz * hs + k * hs + s];
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) +
+ nx + hs + s] = recvbuf_r[ll * nz * hs + k * hs + s];
+ }
+ }
+ }
+
+ // Wait for sends to finish
+ ierr = MPI_Waitall(2, req_s, MPI_STATUSES_IGNORE);
+ }
+
+ if (data_spec_int == DATA_SPEC_INJECTION) {
+ if (myrank == 0) {
+#pragma omp target teams distribute parallel for simd collapse(2) \
+ depend(inout : asyncid) nowait
+ for (int k = 0; k < nz; k++) {
+ for (int i = 0; i < hs; i++) {
+ const double z = (k_beg + k + 0.5) * dz;
+ if (fabs(z - 3 * zlen / 4) <= zlen / 16) {
+ const int ind_r = ID_DENS * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i;
+ const int ind_u = ID_UMOM * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i;
+ const int ind_t = ID_RHOT * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i;
+ state[ind_u] = (state[ind_r] + hy_dens_cell[k + hs]) * 50.;
+ state[ind_t] = (state[ind_r] + hy_dens_cell[k + hs]) * 298. -
+ hy_dens_theta_cell[k + hs];
+ }
+ }
+ }
+ }
+ }
+}
+
+// Set this MPI task's halo values in the z-direction. This does not require MPI
+// because there is no MPI decomposition in the vertical direction
+void set_halo_values_z(double *state) {
+#pragma omp target teams distribute parallel for simd collapse(2) \
+ depend(inout : asyncid) nowait
+ for (int ll = 0; ll < NUM_VARS; ll++) {
+ for (int i = 0; i < nx + 2 * hs; i++) {
+ if (ll == ID_WMOM) {
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (0) * (nx + 2 * hs) + i] =
+ 0.;
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (1) * (nx + 2 * hs) + i] =
+ 0.;
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (nz + hs) * (nx + 2 * hs) +
+ i] = 0.;
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (nz + hs + 1) * (nx + 2 * hs) + i] = 0.;
+ } else if (ll == ID_UMOM) {
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (0) * (nx + 2 * hs) + i] =
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (hs) * (nx + 2 * hs) +
+ i] /
+ hy_dens_cell[hs] * hy_dens_cell[0];
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (1) * (nx + 2 * hs) + i] =
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (hs) * (nx + 2 * hs) +
+ i] /
+ hy_dens_cell[hs] * hy_dens_cell[1];
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (nz + hs) * (nx + 2 * hs) +
+ i] = state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (nz + hs - 1) * (nx + 2 * hs) + i] /
+ hy_dens_cell[nz + hs - 1] * hy_dens_cell[nz + hs];
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (nz + hs + 1) * (nx + 2 * hs) + i] =
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (nz + hs - 1) * (nx + 2 * hs) + i] /
+ hy_dens_cell[nz + hs - 1] * hy_dens_cell[nz + hs + 1];
+ } else {
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (0) * (nx + 2 * hs) + i] =
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (hs) * (nx + 2 * hs) +
+ i];
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (1) * (nx + 2 * hs) + i] =
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (hs) * (nx + 2 * hs) +
+ i];
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) + (nz + hs) * (nx + 2 * hs) +
+ i] = state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (nz + hs - 1) * (nx + 2 * hs) + i];
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (nz + hs + 1) * (nx + 2 * hs) + i] =
+ state[ll * (nz + 2 * hs) * (nx + 2 * hs) +
+ (nz + hs - 1) * (nx + 2 * hs) + i];
+ }
+ }
+ }
+}
+
+void init(int *argc, char ***argv) {
+ int ierr = MPI_Init(argc, argv);
+
+ ierr = MPI_Comm_size(MPI_COMM_WORLD, &nranks);
+ ierr = MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
+ const double nper = ((double)nx_glob) / nranks;
+ const int i_beg = round(nper * (myrank));
+ const int i_end = round(nper * ((myrank) + 1)) - 1;
+ nx = i_end - i_beg + 1;
+ left_rank = myrank - 1;
+ if (left_rank == -1)
+ left_rank = nranks - 1;
+ right_rank = myrank + 1;
+ if (right_rank == nranks)
+ right_rank = 0;
+
+ ////////////////////////////////////////////////////////////////////////////////
+ ////////////////////////////////////////////////////////////////////////////////
+ // YOU DON'T NEED TO ALTER ANYTHING BELOW THIS POINT IN THE CODE
+ ////////////////////////////////////////////////////////////////////////////////
+ ////////////////////////////////////////////////////////////////////////////////
+
+ // Vertical direction isn't MPI-ized, so the rank's local values = the global
+ // values
+ k_beg = 0;
+ nz = nz_glob;
+ mainproc = (myrank == 0);
+
+ // Allocate the model data
+ state = (double *)malloc((nx + 2 * hs) * (nz + 2 * hs) * NUM_VARS *
+ sizeof(double));
+ state_tmp = (double *)malloc((nx + 2 * hs) * (nz + 2 * hs) * NUM_VARS *
+ sizeof(double));
+ flux = (double *)malloc((nx + 1) * (nz + 1) * NUM_VARS * sizeof(double));
+ tend = (double *)malloc(nx * nz * NUM_VARS * sizeof(double));
+ hy_dens_cell = (double *)malloc((nz + 2 * hs) * sizeof(double));
+ hy_dens_theta_cell = (double *)malloc((nz + 2 * hs) * sizeof(double));
+ hy_dens_int = (double *)malloc((nz + 1) * sizeof(double));
+ hy_dens_theta_int = (double *)malloc((nz + 1) * sizeof(double));
+ hy_pressure_int = (double *)malloc((nz + 1) * sizeof(double));
+ sendbuf_l = (double *)malloc(hs * nz * NUM_VARS * sizeof(double));
+ sendbuf_r = (double *)malloc(hs * nz * NUM_VARS * sizeof(double));
+ recvbuf_l = (double *)malloc(hs * nz * NUM_VARS * sizeof(double));
+ recvbuf_r = (double *)malloc(hs * nz * NUM_VARS * sizeof(double));
+
+ // Define the maximum stable time step based on an assumed maximum wind speed
+ dt = dmin(dx, dz) / max_speed * cfl;
+ // Set initial elapsed model time and output_counter to zero
+ etime = 0.;
+ output_counter = 0.;
+
+ // If I'm the main process in MPI, display some grid information
+ if (mainproc) {
+ printf("nx_glob, nz_glob: %d %d\n", nx_glob, nz_glob);
+ printf("dx,dz: %lf %lf\n", dx, dz);
+ printf("dt: %lf\n", dt);
+ }
+ // Want to make sure this info is displayed before further output
+ ierr = MPI_Barrier(MPI_COMM_WORLD);
+
+ double r, u, w, t, hr, ht;
+
+ //////////////////////////////////////////////////////////////////////////
+ // Initialize the cell-averaged fluid state via Gauss-Legendre quadrature
+ //////////////////////////////////////////////////////////////////////////
+ for (int k = 0; k < nz + 2 * hs; k++) {
+ for (int i = 0; i < nx + 2 * hs; i++) {
+ // Initialize the state to zero
+ for (int ll = 0; ll < NUM_VARS; ll++) {
+ const int inds =
+ ll * (nz + 2 * hs) * (nx + 2 * hs) + k * (nx + 2 * hs) + i;
+ state[inds] = 0.;
+ }
+ // Use Gauss-Legendre quadrature to initialize a hydrostatic balance +
+ // temperature perturbation
+ for (int kk = 0; kk < nqpoints; kk++) {
+ for (int ii = 0; ii < nqpoints; ii++) {
+ // Compute the x,z location within the global domain based on cell and
+ // quadrature index
+ const double x =
+ (i_beg + i - hs + 0.5) * dx + (qpoints[ii] - 0.5) * dx;
+ const double z =
+ (k_beg + k - hs + 0.5) * dz + (qpoints[kk] - 0.5) * dz;
+
+ // Set the fluid state based on the user's specification
+ if (data_spec_int == DATA_SPEC_COLLISION) {
+ collision(x, z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_THERMAL) {
+ thermal(x, z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_GRAVITY_WAVES) {
+ gravity_waves(x, z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_DENSITY_CURRENT) {
+ density_current(x, z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_INJECTION) {
+ injection(x, z, r, u, w, t, hr, ht);
+ }
+
+ // Store into the fluid state array
+ int inds =
+ ID_DENS * (nz + 2 * hs) * (nx + 2 * hs) + k * (nx + 2 * hs) + i;
+ state[inds] = state[inds] + r * qweights[ii] * qweights[kk];
+ inds =
+ ID_UMOM * (nz + 2 * hs) * (nx + 2 * hs) + k * (nx + 2 * hs) + i;
+ state[inds] =
+ state[inds] + (r + hr) * u * qweights[ii] * qweights[kk];
+ inds =
+ ID_WMOM * (nz + 2 * hs) * (nx + 2 * hs) + k * (nx + 2 * hs) + i;
+ state[inds] =
+ state[inds] + (r + hr) * w * qweights[ii] * qweights[kk];
+ inds =
+ ID_RHOT * (nz + 2 * hs) * (nx + 2 * hs) + k * (nx + 2 * hs) + i;
+ state[inds] = state[inds] + ((r + hr) * (t + ht) - hr * ht) *
+ qweights[ii] * qweights[kk];
+ }
+ }
+ for (int ll = 0; ll < NUM_VARS; ll++) {
+ const int inds =
+ ll * (nz + 2 * hs) * (nx + 2 * hs) + k * (nx + 2 * hs) + i;
+ state_tmp[inds] = state[inds];
+ }
+ }
+ }
+ // Compute the hydrostatic background state over vertical cell averages
+ for (int k = 0; k < nz + 2 * hs; k++) {
+ hy_dens_cell[k] = 0.;
+ hy_dens_theta_cell[k] = 0.;
+ for (int kk = 0; kk < nqpoints; kk++) {
+ const double z = (k_beg + k - hs + 0.5) * dz;
+ // Set the fluid state based on the user's specification
+ if (data_spec_int == DATA_SPEC_COLLISION) {
+ collision(0., z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_THERMAL) {
+ thermal(0., z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_GRAVITY_WAVES) {
+ gravity_waves(0., z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_DENSITY_CURRENT) {
+ density_current(0., z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_INJECTION) {
+ injection(0., z, r, u, w, t, hr, ht);
+ }
+ hy_dens_cell[k] = hy_dens_cell[k] + hr * qweights[kk];
+ hy_dens_theta_cell[k] = hy_dens_theta_cell[k] + hr * ht * qweights[kk];
+ }
+ }
+ // Compute the hydrostatic background state at vertical cell interfaces
+ for (int k = 0; k < nz + 1; k++) {
+ const double z = (k_beg + k) * dz;
+ if (data_spec_int == DATA_SPEC_COLLISION) {
+ collision(0., z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_THERMAL) {
+ thermal(0., z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_GRAVITY_WAVES) {
+ gravity_waves(0., z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_DENSITY_CURRENT) {
+ density_current(0., z, r, u, w, t, hr, ht);
+ }
+ if (data_spec_int == DATA_SPEC_INJECTION) {
+ injection(0., z, r, u, w, t, hr, ht);
+ }
+ hy_dens_int[k] = hr;
+ hy_dens_theta_int[k] = hr * ht;
+ hy_pressure_int[k] = C0 * pow((hr * ht), gamm);
+ }
+}
+
+// This test case is initially balanced but injects fast, cold air from the left
+// boundary near the model top x and z are input coordinates at which to sample
+// r,u,w,t are output density, u-wind, w-wind, and potential temperature at that
+// location hr and ht are output background hydrostatic density and potential
+// temperature at that location
+void injection(double x, double z, double &r, double &u, double &w, double &t,
+ double &hr, double &ht) {
+ hydro_const_theta(z, hr, ht);
+ r = 0.;
+ t = 0.;
+ u = 0.;
+ w = 0.;
+}
+
+// Initialize a density current (falling cold thermal that propagates along the
+// model bottom) x and z are input coordinates at which to sample r,u,w,t are
+// output density, u-wind, w-wind, and potential temperature at that location hr
+// and ht are output background hydrostatic density and potential temperature at
+// that location
+void density_current(double x, double z, double &r, double &u, double &w,
+ double &t, double &hr, double &ht) {
+ hydro_const_theta(z, hr, ht);
+ r = 0.;
+ t = 0.;
+ u = 0.;
+ w = 0.;
+ t = t + sample_ellipse_cosine(x, z, -20., xlen / 2, 5000., 4000., 2000.);
+}
+
+// x and z are input coordinates at which to sample
+// r,u,w,t are output density, u-wind, w-wind, and potential temperature at that
+// location hr and ht are output background hydrostatic density and potential
+// temperature at that location
+void gravity_waves(double x, double z, double &r, double &u, double &w,
+ double &t, double &hr, double &ht) {
+ hydro_const_bvfreq(z, 0.02, hr, ht);
+ r = 0.;
+ t = 0.;
+ u = 15.;
+ w = 0.;
+}
+
+// Rising thermal
+// x and z are input coordinates at which to sample
+// r,u,w,t are output density, u-wind, w-wind, and potential temperature at that
+// location hr and ht are output background hydrostatic density and potential
+// temperature at that location
+void thermal(double x, double z, double &r, double &u, double &w, double &t,
+ double &hr, double &ht) {
+ hydro_const_theta(z, hr, ht);
+ r = 0.;
+ t = 0.;
+ u = 0.;
+ w = 0.;
+ t = t + sample_ellipse_cosine(x, z, 3., xlen / 2, 2000., 2000., 2000.);
+}
+
+// Colliding thermals
+// x and z are input coordinates at which to sample
+// r,u,w,t are output density, u-wind, w-wind, and potential temperature at that
+// location hr and ht are output background hydrostatic density and potential
+// temperature at that location
+void collision(double x, double z, double &r, double &u, double &w, double &t,
+ double &hr, double &ht) {
+ hydro_const_theta(z, hr, ht);
+ r = 0.;
+ t = 0.;
+ u = 0.;
+ w = 0.;
+ t = t + sample_ellipse_cosine(x, z, 20., xlen / 2, 2000., 2000., 2000.);
+ t = t + sample_ellipse_cosine(x, z, -20., xlen / 2, 8000., 2000., 2000.);
+}
+
+// Establish hydrostatic balance using constant potential temperature (thermally
+// neutral atmosphere) z is the input coordinate r and t are the output
+// background hydrostatic density and potential temperature
+void hydro_const_theta(double z, double &r, double &t) {
+ const double theta0 = 300.; // Background potential temperature
+ const double exner0 = 1.; // Surface-level Exner pressure
+ // Establish hydrostatic balance first using Exner pressure
+ t = theta0; // Potential Temperature at z
+ const double exner = exner0 - grav * z / (cp * theta0); // Exner pressure at z
+ const double p = p0 * pow(exner, (cp / rd)); // Pressure at z
+ const double rt = pow((p / C0), (1. / gamm)); // rho*theta at z
+ r = rt / t; // Density at z
+}
+
+// Establish hydrostatic balance using constant Brunt-Vaisala frequency
+// z is the input coordinate
+// bv_freq0 is the constant Brunt-Vaisala frequency
+// r and t are the output background hydrostatic density and potential
+// temperature
+void hydro_const_bvfreq(double z, double bv_freq0, double &r, double &t) {
+ const double theta0 = 300.; // Background potential temperature
+ const double exner0 = 1.; // Surface-level Exner pressure
+ t = theta0 * exp(bv_freq0 * bv_freq0 / grav * z); // Pot temp at z
+ const double exner = exner0 - grav * grav / (cp * bv_freq0 * bv_freq0) *
+ (t - theta0) /
+ (t * theta0); // Exner pressure at z
+ const double p = p0 * pow(exner, (cp / rd)); // Pressure at z
+ const double rt = pow((p / C0), (1. / gamm)); // rho*theta at z
+ r = rt / t; // Density at z
+}
+
+// Sample from an ellipse of a specified center, radius, and amplitude at a
+// specified location x and z are input coordinates amp,x0,z0,xrad,zrad are
+// input amplitude, center, and radius of the ellipse
+double sample_ellipse_cosine(double x, double z, double amp, double x0,
+ double z0, double xrad, double zrad) {
+ double dist;
+ // Compute distance from bubble center
+ dist = sqrt(((x - x0) / xrad) * ((x - x0) / xrad) +
+ ((z - z0) / zrad) * ((z - z0) / zrad)) *
+ pi / 2.;
+ // If the distance from bubble center is less than the radius, create a cos**2
+ // profile
+ if (dist <= pi / 2.) {
+ return amp * pow(cos(dist), 2.);
+ } else {
+ return 0.;
+ }
+}
+
+// Output the fluid state (state) to a NetCDF file at a given elapsed model time
+// (etime) The file I/O uses parallel-netcdf, the only external library required
+// for this mini-app. If it's too cumbersome, you can comment the I/O out, but
+// you'll miss out on some potentially cool graphics
+void output(double *state, double etime) {
+ int ncid, t_dimid, x_dimid, z_dimid, dens_varid, uwnd_varid, wwnd_varid,
+ theta_varid, t_varid, dimids[3];
+ MPI_Offset st1[1], ct1[1], st3[3], ct3[3];
+ // Temporary arrays to hold density, u-wind, w-wind, and potential temperature
+ // (theta)
+ double *dens, *uwnd, *wwnd, *theta;
+ double *etimearr;
+
+#pragma omp target update from( \
+ state[ : (nz + 2 * hs) * (nx + 2 * hs) * NUM_VARS]) \
+ depend(inout : asyncid) nowait
+#pragma omp taskwait
+
+ // Inform the user
+ if (mainproc) {
+ printf("*** OUTPUT ***\n");
+ }
+ // Allocate some (big) temp arrays
+ dens = (double *)malloc(nx * nz * sizeof(double));
+ uwnd = (double *)malloc(nx * nz * sizeof(double));
+ wwnd = (double *)malloc(nx * nz * sizeof(double));
+ theta = (double *)malloc(nx * nz * sizeof(double));
+ etimearr = (double *)malloc(1 * sizeof(double));
+
+ // If the elapsed time is zero, create the file. Otherwise, open the file
+ if (etime == 0) {
+ // Create the file
+ ncwrap(ncmpi_create(MPI_COMM_WORLD, "output.nc", NC_CLOBBER, MPI_INFO_NULL,
+ &ncid),
+ __LINE__);
+ // Create the dimensions
+ ncwrap(ncmpi_def_dim(ncid, "t", (MPI_Offset)NC_UNLIMITED, &t_dimid),
+ __LINE__);
+ ncwrap(ncmpi_def_dim(ncid, "x", (MPI_Offset)nx_glob, &x_dimid), __LINE__);
+ ncwrap(ncmpi_def_dim(ncid, "z", (MPI_Offset)nz_glob, &z_dimid), __LINE__);
+ // Create the variables
+ dimids[0] = t_dimid;
+ ncwrap(ncmpi_def_var(ncid, "t", NC_DOUBLE, 1, dimids, &t_varid), __LINE__);
+ dimids[0] = t_dimid;
+ dimids[1] = z_dimid;
+ dimids[2] = x_dimid;
+ ncwrap(ncmpi_def_var(ncid, "dens", NC_DOUBLE, 3, dimids, &dens_varid),
+ __LINE__);
+ ncwrap(ncmpi_def_var(ncid, "uwnd", NC_DOUBLE, 3, dimids, &uwnd_varid),
+ __LINE__);
+ ncwrap(ncmpi_def_var(ncid, "wwnd", NC_DOUBLE, 3, dimids, &wwnd_varid),
+ __LINE__);
+ ncwrap(ncmpi_def_var(ncid, "theta", NC_DOUBLE, 3, dimids, &theta_varid),
+ __LINE__);
+ // End "define" mode
+ ncwrap(ncmpi_enddef(ncid), __LINE__);
+ } else {
+ // Open the file
+ ncwrap(
+ ncmpi_open(MPI_COMM_WORLD, "output.nc", NC_WRITE, MPI_INFO_NULL, &ncid),
+ __LINE__);
+ // Get the variable IDs
+ ncwrap(ncmpi_inq_varid(ncid, "dens", &dens_varid), __LINE__);
+ ncwrap(ncmpi_inq_varid(ncid, "uwnd", &uwnd_varid), __LINE__);
+ ncwrap(ncmpi_inq_varid(ncid, "wwnd", &wwnd_varid), __LINE__);
+ ncwrap(ncmpi_inq_varid(ncid, "theta", &theta_varid), __LINE__);
+ ncwrap(ncmpi_inq_varid(ncid, "t", &t_varid), __LINE__);
+ }
+
+ // Store perturbed values in the temp arrays for output
+ for (int k = 0; k < nz; k++) {
+ for (int i = 0; i < nx; i++) {
+ const int ind_r = ID_DENS * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i + hs;
+ const int ind_u = ID_UMOM * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i + hs;
+ const int ind_w = ID_WMOM * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i + hs;
+ const int ind_t = ID_RHOT * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i + hs;
+ dens[k * nx + i] = state[ind_r];
+ uwnd[k * nx + i] = state[ind_u] / (hy_dens_cell[k + hs] + state[ind_r]);
+ wwnd[k * nx + i] = state[ind_w] / (hy_dens_cell[k + hs] + state[ind_r]);
+ theta[k * nx + i] = (state[ind_t] + hy_dens_theta_cell[k + hs]) /
+ (hy_dens_cell[k + hs] + state[ind_r]) -
+ hy_dens_theta_cell[k + hs] / hy_dens_cell[k + hs];
+ }
+ }
+
+ // Write the grid data to file with all the processes writing collectively
+ st3[0] = num_out;
+ st3[1] = k_beg;
+ st3[2] = i_beg;
+ ct3[0] = 1;
+ ct3[1] = nz;
+ ct3[2] = nx;
+ ncwrap(ncmpi_put_vara_double_all(ncid, dens_varid, st3, ct3, dens), __LINE__);
+ ncwrap(ncmpi_put_vara_double_all(ncid, uwnd_varid, st3, ct3, uwnd), __LINE__);
+ ncwrap(ncmpi_put_vara_double_all(ncid, wwnd_varid, st3, ct3, wwnd), __LINE__);
+ ncwrap(ncmpi_put_vara_double_all(ncid, theta_varid, st3, ct3, theta),
+ __LINE__);
+
+ // Only the main process needs to write the elapsed time
+ // Begin "independent" write mode
+ ncwrap(ncmpi_begin_indep_data(ncid), __LINE__);
+ // write elapsed time to file
+ if (mainproc) {
+ st1[0] = num_out;
+ ct1[0] = 1;
+ etimearr[0] = etime;
+ ncwrap(ncmpi_put_vara_double(ncid, t_varid, st1, ct1, etimearr), __LINE__);
+ }
+ // End "independent" write mode
+ ncwrap(ncmpi_end_indep_data(ncid), __LINE__);
+
+ // Close the file
+ ncwrap(ncmpi_close(ncid), __LINE__);
+
+ // Increment the number of outputs
+ num_out = num_out + 1;
+
+ // Deallocate the temp arrays
+ free(dens);
+ free(uwnd);
+ free(wwnd);
+ free(theta);
+ free(etimearr);
+}
+
+// Error reporting routine for the PNetCDF I/O
+void ncwrap(int ierr, int line) {
+ if (ierr != NC_NOERR) {
+ printf("NetCDF Error at line: %d\n", line);
+ printf("%s\n", ncmpi_strerror(ierr));
+ exit(-1);
+ }
+}
+
+void finalize() {
+ free(state);
+ free(state_tmp);
+ free(flux);
+ free(tend);
+ free(hy_dens_cell);
+ free(hy_dens_theta_cell);
+ free(hy_dens_int);
+ free(hy_dens_theta_int);
+ free(hy_pressure_int);
+ free(sendbuf_l);
+ free(sendbuf_r);
+ free(recvbuf_l);
+ free(recvbuf_r);
+ const int ierr = MPI_Finalize();
+}
+
+// Compute reduced quantities for error checking without resorting to the
+// "ncdiff" tool
+void reductions(double &mass, double &te) {
+ mass = 0;
+ te = 0;
+#pragma omp target teams distribute parallel for simd collapse(2) \
+ reduction(+ : mass, te)
+ for (int k = 0; k < nz; k++) {
+ for (int i = 0; i < nx; i++) {
+ const int ind_r = ID_DENS * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i + hs;
+ const int ind_u = ID_UMOM * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i + hs;
+ const int ind_w = ID_WMOM * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i + hs;
+ const int ind_t = ID_RHOT * (nz + 2 * hs) * (nx + 2 * hs) +
+ (k + hs) * (nx + 2 * hs) + i + hs;
+ const double r = state[ind_r] + hy_dens_cell[hs + k]; // Density
+ const double u = state[ind_u] / r; // U-wind
+ const double w = state[ind_w] / r; // W-wind
+ const double th = (state[ind_t] + hy_dens_theta_cell[hs + k]) /
+ r; // Potential Temperature (theta)
+ const double p = C0 * pow(r * th, gamm); // Pressure
+ const double t = th / pow(p0 / p, rd / cp); // Temperature
+ const double ke = r * (u * u + w * w); // Kinetic Energy
+ const double ie = r * cv * t; // Internal Energy
+ mass += r * dx * dz; // Accumulate domain mass
+ te += (ke + ie) * dx * dz; // Accumulate domain total energy
+ }
+ }
+ double glob[2], loc[2];
+ loc[0] = mass;
+ loc[1] = te;
+ int ierr = MPI_Allreduce(loc, glob, 2, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+ mass = glob[0];
+ te = glob[1];
+}
diff --git a/cpp/miniWeather_mpi_parallelfor.cpp b/cpp/miniWeather_mpi_parallelfor.cpp
deleted file mode 100644
index 0782a025..00000000
--- a/cpp/miniWeather_mpi_parallelfor.cpp
+++ /dev/null
@@ -1,901 +0,0 @@
-
-//////////////////////////////////////////////////////////////////////////////////////////
-// miniWeather
-// Author: Matt Norman , Oak Ridge National Laboratory
-// This code simulates dry, stratified, compressible, non-hydrostatic fluid flows
-// For documentation, please see the attached documentation in the "documentation" folder
-//
-//////////////////////////////////////////////////////////////////////////////////////////
-
-#include
-#include
-#include
-#include "const.h"
-#include "pnetcdf.h"
-#include
-#include
-
-// We're going to define all arrays on the host because this doesn't use parallel_for
-typedef yakl::Array real1d;
-typedef yakl::Array real2d;
-typedef yakl::Array real3d;
-typedef yakl::Array doub1d;
-typedef yakl::Array doub2d;
-typedef yakl::Array doub3d;
-
-typedef yakl::Array realConst1d;
-typedef yakl::Array realConst2d;
-typedef yakl::Array realConst3d;
-typedef yakl::Array doubConst1d;
-typedef yakl::Array doubConst2d;
-typedef yakl::Array doubConst3d;
-
-// Some arrays still need to be on the host, so we will explicitly create Host Array typedefs
-typedef yakl::Array real1dHost;
-typedef yakl::Array real2dHost;
-typedef yakl::Array real3dHost;
-typedef yakl::Array doub1dHost;
-typedef yakl::Array doub2dHost;
-typedef yakl::Array doub3dHost;
-
-///////////////////////////////////////////////////////////////////////////////////////
-// Variables that are initialized but remain static over the course of the simulation
-///////////////////////////////////////////////////////////////////////////////////////
-struct Fixed_data {
- int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task
- int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task
- int nranks, myrank; //Number of MPI ranks and my rank id
- int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain
- int mainproc; //Am I the main process (rank == 0)?
- realConst1d hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs)
- realConst1d hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs)
- realConst1d hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1)
- realConst1d hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1)
- realConst1d hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1)
-};
-
-///////////////////////////////////////////////////////////////////////////////////////
-// Variables that are dynamics over the course of the simulation
-///////////////////////////////////////////////////////////////////////////////////////
-
-//Declaring the functions defined after "main"
-void init ( real3d &state , real &dt , Fixed_data &fixed_data );
-void finalize ( );
-YAKL_INLINE void injection ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht );
-YAKL_INLINE void density_current ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht );
-YAKL_INLINE void gravity_waves ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht );
-YAKL_INLINE void thermal ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht );
-YAKL_INLINE void collision ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht );
-YAKL_INLINE void hydro_const_theta ( real z , real &r , real &t );
-YAKL_INLINE void hydro_const_bvfreq ( real z , real bv_freq0 , real &r , real &t );
-YAKL_INLINE real sample_ellipse_cosine( real x , real z , real amp , real x0 , real z0 , real xrad , real zrad );
-void output ( realConst3d state , real etime , int &num_out , Fixed_data const &fixed_data );
-void ncwrap ( int ierr , int line );
-void perform_timestep ( real3d const &state , real dt , int &direction_switch , Fixed_data const &fixed_data );
-void semi_discrete_step ( realConst3d state_init , real3d const &state_forcing , real3d const &state_out , real dt , int dir , Fixed_data const &fixed_data );
-void compute_tendencies_x ( realConst3d state , real3d const &tend , real dt , Fixed_data const &fixed_data );
-void compute_tendencies_z ( realConst3d state , real3d const &tend , real dt , Fixed_data const &fixed_data );
-void set_halo_values_x ( real3d const &state , Fixed_data const &fixed_data );
-void set_halo_values_z ( real3d const &state , Fixed_data const &fixed_data );
-void reductions ( realConst3d state , double &mass , double &te , Fixed_data const &fixed_data );
-
-
-///////////////////////////////////////////////////////////////////////////////////////
-// THE MAIN PROGRAM STARTS HERE
-///////////////////////////////////////////////////////////////////////////////////////
-int main(int argc, char **argv) {
- MPI_Init(&argc,&argv);
- yakl::init();
- {
- Fixed_data fixed_data;
- real3d state;
- real dt; //Model time step (seconds)
-
- // Init allocates the state and hydrostatic arrays hy_*
- init( state , dt , fixed_data );
-
- auto &mainproc = fixed_data.mainproc;
-
- //Initial reductions for mass, kinetic energy, and total energy
- double mass0, te0;
- reductions(state,mass0,te0,fixed_data);
-
- int num_out = 0; //The number of outputs performed so far
- real output_counter = 0; //Helps determine when it's time to do output
- real etime = 0;
-
- //Output the initial state
- if (output_freq >= 0) {
- output(state,etime,num_out,fixed_data);
- }
-
- int direction_switch = 1; // Tells dimensionally split which order to take x,z solves
-
- ////////////////////////////////////////////////////
- // MAIN TIME STEP LOOP
- ////////////////////////////////////////////////////
- yakl::fence();
- auto t1 = std::chrono::steady_clock::now();
- while (etime < sim_time) {
- //If the time step leads to exceeding the simulation time, shorten it for the last step
- if (etime + dt > sim_time) { dt = sim_time - etime; }
- //Perform a single time step
- perform_timestep(state,dt,direction_switch,fixed_data);
- //Inform the user
- #ifndef NO_INFORM
- if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); }
- #endif
- //Update the elapsed time and output counter
- etime = etime + dt;
- output_counter = output_counter + dt;
- //If it's time for output, reset the counter, and do output
- if (output_freq >= 0 && output_counter >= output_freq) {
- output_counter = output_counter - output_freq;
- output(state,etime,num_out,fixed_data);
- }
- }
- yakl::fence();
- auto t2 = std::chrono::steady_clock::now();
- if (mainproc) {
- std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n";
- }
-
- //Final reductions for mass, kinetic energy, and total energy
- double mass, te;
- reductions(state,mass,te,fixed_data);
-
- if (mainproc) {
- printf( "d_mass: %le\n" , (mass - mass0)/mass0 );
- printf( "d_te: %le\n" , (te - te0 )/te0 );
- }
-
- finalize();
- }
- yakl::finalize();
- MPI_Finalize();
-}
-
-
-//Performs a single dimensionally split time step using a simple low-storage three-stage Runge-Kutta time integrator
-//The dimensional splitting is a second-order-accurate alternating Strang splitting in which the
-//order of directions is alternated each time step.
-//The Runge-Kutta method used here is defined as follows:
-// q* = q_n + dt/3 * rhs(q_n)
-// q** = q_n + dt/2 * rhs(q* )
-// q_n+1 = q_n + dt/1 * rhs(q**)
-void perform_timestep( real3d const &state , real dt , int &direction_switch , Fixed_data const &fixed_data ) {
- auto &nx = fixed_data.nx ;
- auto &nz = fixed_data.nz ;
-
- real3d state_tmp("state_tmp",NUM_VARS,nz+2*hs,nx+2*hs);
-
- if (direction_switch) {
- //x-direction first
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , fixed_data );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , fixed_data );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , fixed_data );
- //z-direction second
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , fixed_data );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , fixed_data );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , fixed_data );
- } else {
- //z-direction second
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , fixed_data );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , fixed_data );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , fixed_data );
- //x-direction first
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , fixed_data );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , fixed_data );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , fixed_data );
- }
- if (direction_switch) { direction_switch = 0; } else { direction_switch = 1; }
-}
-
-
-//Perform a single semi-discretized step in time with the form:
-//state_out = state_init + dt * rhs(state_forcing)
-//Meaning the step starts from state_init, computes the rhs using state_forcing, and stores the result in state_out
-void semi_discrete_step( realConst3d state_init , real3d const &state_forcing , real3d const &state_out , real dt , int dir , Fixed_data const &fixed_data ) {
- auto &nx = fixed_data.nx ;
- auto &nz = fixed_data.nz ;
- auto &i_beg = fixed_data.i_beg ;
- auto &k_beg = fixed_data.k_beg ;
- auto &hy_dens_cell = fixed_data.hy_dens_cell ;
-
- real3d tend("tend",NUM_VARS,nz,nx);
-
- if (dir == DIR_X) {
- //Set the halo values for this MPI task's fluid state in the x-direction
- yakl::timer_start("halo x");
- set_halo_values_x(state_forcing,fixed_data);
- yakl::timer_stop("halo x");
- //Compute the time tendencies for the fluid state in the x-direction
- yakl::timer_start("tendencies x");
- compute_tendencies_x(state_forcing,tend,dt,fixed_data);
- yakl::timer_stop("tendencies x");
- } else if (dir == DIR_Z) {
- //Set the halo values for this MPI task's fluid state in the z-direction
- yakl::timer_start("halo z");
- set_halo_values_z(state_forcing,fixed_data);
- yakl::timer_stop("halo z");
- //Compute the time tendencies for the fluid state in the z-direction
- yakl::timer_start("tendencies z");
- compute_tendencies_z(state_forcing,tend,dt,fixed_data);
- yakl::timer_stop("tendencies z");
- }
-
- //Apply the tendencies to the fluid state
- // for (ll=0; ll(NUM_VARS,nz,nx) , YAKL_LAMBDA ( int ll, int k, int i ) {
- if (data_spec_int == DATA_SPEC_GRAVITY_WAVES) {
- real x = (i_beg + i+0.5)*dx;
- real z = (k_beg + k+0.5)*dz;
- real wpert = sample_ellipse_cosine( x,z , 0.01 , xlen/8. ,1000. , 500. ,500. );
- tend(ID_WMOM,k,i) += wpert*hy_dens_cell(hs+k);
- }
- state_out(ll,hs+k,hs+i) = state_init(ll,hs+k,hs+i) + dt * tend(ll,k,i);
- });
- yakl::timer_stop("apply tendencies");
-}
-
-
-//Compute the time tendencies of the fluid state using forcing in the x-direction
-//Since the halos are set in a separate routine, this will not require MPI
-//First, compute the flux vector at each cell interface in the x-direction (including hyperviscosity)
-//Then, compute the tendencies using those fluxes
-void compute_tendencies_x( realConst3d state , real3d const &tend , real dt , Fixed_data const &fixed_data ) {
- auto &nx = fixed_data.nx ;
- auto &nz = fixed_data.nz ;
- auto &hy_dens_cell = fixed_data.hy_dens_cell ;
- auto &hy_dens_theta_cell = fixed_data.hy_dens_theta_cell;
-
- real3d flux("flux",NUM_VARS,nz,nx+1);
-
- //Compute the hyperviscosity coefficient
- real hv_coef = -hv_beta * dx / (16*dt);
- //Compute fluxes in the x-direction for each cell
- // for (k=0; k(nz,nx+1) , YAKL_LAMBDA (int k, int i ) {
- SArray stencil;
- SArray d3_vals;
- SArray vals;
-
- //Use fourth-order interpolation from four cell averages to compute the value at the interface in question
- for (int ll=0; ll(NUM_VARS,nz,nx) , YAKL_LAMBDA ( int ll, int k, int i ) {
- tend(ll,k,i) = -( flux(ll,k,i+1) - flux(ll,k,i) ) / dx;
- });
-}
-
-
-//Compute the time tendencies of the fluid state using forcing in the z-direction
-//Since the halos are set in a separate routine, this will not require MPI
-//First, compute the flux vector at each cell interface in the z-direction (including hyperviscosity)
-//Then, compute the tendencies using those fluxes
-void compute_tendencies_z( realConst3d state , real3d const &tend , real dt , Fixed_data const &fixed_data ) {
- auto &nx = fixed_data.nx ;
- auto &nz = fixed_data.nz ;
- auto &hy_dens_int = fixed_data.hy_dens_int ;
- auto &hy_dens_theta_int = fixed_data.hy_dens_theta_int ;
- auto &hy_pressure_int = fixed_data.hy_pressure_int ;
-
- real3d flux("flux",NUM_VARS,nz+1,nx);
-
- //Compute the hyperviscosity coefficient
- real hv_coef = -hv_beta * dz / (16*dt);
- //Compute fluxes in the x-direction for each cell
- // for (k=0; k(nz+1,nx) , YAKL_LAMBDA (int k, int i) {
- SArray stencil;
- SArray d3_vals;
- SArray vals;
-
- //Use fourth-order interpolation from four cell averages to compute the value at the interface in question
- for (int ll=0; ll(NUM_VARS,nz,nx) , YAKL_LAMBDA ( int ll, int k, int i ) {
- tend(ll,k,i) = -( flux(ll,k+1,i) - flux(ll,k,i) ) / dz;
- if (ll == ID_WMOM) {
- tend(ll,k,i) -= state(ID_DENS,hs+k,hs+i)*grav;
- }
- });
-}
-
-
-
-//Set this MPI task's halo values in the x-direction. This routine will require MPI
-void set_halo_values_x( real3d const &state , Fixed_data const &fixed_data ) {
- auto &nx = fixed_data.nx ;
- auto &nz = fixed_data.nz ;
- auto &k_beg = fixed_data.k_beg ;
- auto &left_rank = fixed_data.left_rank ;
- auto &right_rank = fixed_data.right_rank ;
- auto &myrank = fixed_data.myrank ;
- auto &hy_dens_cell = fixed_data.hy_dens_cell ;
- auto &hy_dens_theta_cell = fixed_data.hy_dens_theta_cell;
-
- int ierr;
- MPI_Request req_r[2], req_s[2];
-
- if (fixed_data.nranks == 1) {
-
- parallel_for( SimpleBounds<2>(NUM_VARS,nz) , YAKL_LAMBDA (int ll, int k) {
- state(ll,hs+k,0 ) = state(ll,hs+k,nx+hs-2);
- state(ll,hs+k,1 ) = state(ll,hs+k,nx+hs-1);
- state(ll,hs+k,nx+hs ) = state(ll,hs+k,hs );
- state(ll,hs+k,nx+hs+1) = state(ll,hs+k,hs+1 );
- });
-
- } else {
-
- real3d sendbuf_l ( "sendbuf_l" , NUM_VARS,nz,hs ); //Buffer to send data to the left MPI rank
- real3d sendbuf_r ( "sendbuf_r" , NUM_VARS,nz,hs ); //Buffer to send data to the right MPI rank
- real3d recvbuf_l ( "recvbuf_l" , NUM_VARS,nz,hs ); //Buffer to receive data from the left MPI rank
- real3d recvbuf_r ( "recvbuf_r" , NUM_VARS,nz,hs ); //Buffer to receive data from the right MPI rank
- #ifndef GPU_AWARE_MPI
- real3dHost sendbuf_l_cpu( "sendbuf_l" , NUM_VARS,nz,hs ); //Buffer to send data to the left MPI rank (CPU copy)
- real3dHost sendbuf_r_cpu( "sendbuf_r" , NUM_VARS,nz,hs ); //Buffer to send data to the right MPI rank (CPU copy)
- real3dHost recvbuf_l_cpu( "recvbuf_l" , NUM_VARS,nz,hs ); //Buffer to receive data from the left MPI rank (CPU copy)
- real3dHost recvbuf_r_cpu( "recvbuf_r" , NUM_VARS,nz,hs ); //Buffer to receive data from the right MPI rank (CPU copy)
- #endif
-
- //Prepost receives
- #ifdef GPU_AWARE_MPI
- yakl::fence();
- ierr = MPI_Irecv(recvbuf_l.data(),hs*nz*NUM_VARS,mpi_type, left_rank,0,MPI_COMM_WORLD,&req_r[0]);
- ierr = MPI_Irecv(recvbuf_r.data(),hs*nz*NUM_VARS,mpi_type,right_rank,1,MPI_COMM_WORLD,&req_r[1]);
- #else
- ierr = MPI_Irecv(recvbuf_l_cpu.data(),hs*nz*NUM_VARS,mpi_type, left_rank,0,MPI_COMM_WORLD,&req_r[0]);
- ierr = MPI_Irecv(recvbuf_r_cpu.data(),hs*nz*NUM_VARS,mpi_type,right_rank,1,MPI_COMM_WORLD,&req_r[1]);
- #endif
-
- //Pack the send buffers
- // for (ll=0; ll(NUM_VARS,nz,hs) , YAKL_LAMBDA (int ll, int k, int s) {
- sendbuf_l(ll,k,s) = state(ll,k+hs,hs+s);
- sendbuf_r(ll,k,s) = state(ll,k+hs,nx+s);
- });
- yakl::fence();
-
- #ifndef GPU_AWARE_MPI
- // This will copy from GPU to host
- sendbuf_l.deep_copy_to(sendbuf_l_cpu);
- sendbuf_r.deep_copy_to(sendbuf_r_cpu);
- yakl::fence();
- #endif
-
- //Fire off the sends
- #ifdef GPU_AWARE_MPI
- ierr = MPI_Isend(sendbuf_l.data(),hs*nz*NUM_VARS,mpi_type, left_rank,1,MPI_COMM_WORLD,&req_s[0]);
- ierr = MPI_Isend(sendbuf_r.data(),hs*nz*NUM_VARS,mpi_type,right_rank,0,MPI_COMM_WORLD,&req_s[1]);
- #else
- ierr = MPI_Isend(sendbuf_l_cpu.data(),hs*nz*NUM_VARS,mpi_type, left_rank,1,MPI_COMM_WORLD,&req_s[0]);
- ierr = MPI_Isend(sendbuf_r_cpu.data(),hs*nz*NUM_VARS,mpi_type,right_rank,0,MPI_COMM_WORLD,&req_s[1]);
- #endif
-
- //Wait for receives to finish
- ierr = MPI_Waitall(2,req_r,MPI_STATUSES_IGNORE);
-
- #ifndef GPU_AWARE_MPI
- // This will copy from host to GPU
- recvbuf_l_cpu.deep_copy_to(recvbuf_l);
- recvbuf_r_cpu.deep_copy_to(recvbuf_r);
- yakl::fence();
- #endif
-
- //Unpack the receive buffers
- // for (ll=0; ll(NUM_VARS,nz,hs) , YAKL_LAMBDA (int ll, int k, int s) {
- state(ll,k+hs,s ) = recvbuf_l(ll,k,s);
- state(ll,k+hs,nx+hs+s) = recvbuf_r(ll,k,s);
- });
- yakl::fence();
-
- //Wait for sends to finish
- ierr = MPI_Waitall(2,req_s,MPI_STATUSES_IGNORE);
-
- }
-
- if (data_spec_int == DATA_SPEC_INJECTION) {
- if (myrank == 0) {
- // for (k=0; k(nz,hs) , YAKL_LAMBDA (int k, int i) {
- double z = (k_beg + k+0.5)*dz;
- if (abs(z-3*zlen/4) <= zlen/16) {
- state(ID_UMOM,hs+k,i) = (state(ID_DENS,hs+k,i)+hy_dens_cell(hs+k)) * 50;
- state(ID_RHOT,hs+k,i) = (state(ID_DENS,hs+k,i)+hy_dens_cell(hs+k)) * 298 - hy_dens_theta_cell(hs+k);
- }
- });
- }
- }
-}
-
-
-//Set this MPI task's halo values in the z-direction. This does not require MPI because there is no MPI
-//decomposition in the vertical direction
-void set_halo_values_z( real3d const &state , Fixed_data const &fixed_data ) {
- auto &nx = fixed_data.nx ;
- auto &nz = fixed_data.nz ;
- auto &hy_dens_cell = fixed_data.hy_dens_cell ;
-
- // for (ll=0; ll(NUM_VARS,nx+2*hs) , YAKL_LAMBDA (int ll, int i) {
- if (ll == ID_WMOM) {
- state(ll,0 ,i) = 0.;
- state(ll,1 ,i) = 0.;
- state(ll,nz+hs ,i) = 0.;
- state(ll,nz+hs+1,i) = 0.;
- } else if (ll == ID_UMOM) {
- state(ll,0 ,i) = state(ll,hs ,i) / hy_dens_cell(hs ) * hy_dens_cell(0 );
- state(ll,1 ,i) = state(ll,hs ,i) / hy_dens_cell(hs ) * hy_dens_cell(1 );
- state(ll,nz+hs ,i) = state(ll,nz+hs-1,i) / hy_dens_cell(nz+hs-1) * hy_dens_cell(nz+hs );
- state(ll,nz+hs+1,i) = state(ll,nz+hs-1,i) / hy_dens_cell(nz+hs-1) * hy_dens_cell(nz+hs+1);
- } else {
- state(ll,0 ,i) = state(ll,hs ,i);
- state(ll,1 ,i) = state(ll,hs ,i);
- state(ll,nz+hs ,i) = state(ll,nz+hs-1,i);
- state(ll,nz+hs+1,i) = state(ll,nz+hs-1,i);
- }
- });
-}
-
-
-void init( real3d &state , real &dt , Fixed_data &fixed_data ) {
- auto &nx = fixed_data.nx ;
- auto &nz = fixed_data.nz ;
- auto &i_beg = fixed_data.i_beg ;
- auto &k_beg = fixed_data.k_beg ;
- auto &left_rank = fixed_data.left_rank ;
- auto &right_rank = fixed_data.right_rank ;
- auto &nranks = fixed_data.nranks ;
- auto &myrank = fixed_data.myrank ;
- auto &mainproc = fixed_data.mainproc ;
- int ierr;
-
- ierr = MPI_Comm_size(MPI_COMM_WORLD,&nranks);
- ierr = MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
- real nper = ( (double) nx_glob ) / nranks;
- i_beg = round( nper* (myrank) );
- int i_end = round( nper*((myrank)+1) )-1;
- nx = i_end - i_beg + 1;
- left_rank = myrank - 1;
- if (left_rank == -1) left_rank = nranks-1;
- right_rank = myrank + 1;
- if (right_rank == nranks) right_rank = 0;
-
- //Vertical direction isn't MPI-ized, so the rank's local values = the global values
- k_beg = 0;
- nz = nz_glob;
- mainproc = (myrank == 0);
-
- //Allocate the model data
- state = real3d( "state" , NUM_VARS,nz+2*hs,nx+2*hs);
-
- //Define the maximum stable time step based on an assumed maximum wind speed
- dt = min(dx,dz) / max_speed * cfl;
-
- //If I'm the main process in MPI, display some grid information
- if (mainproc) {
- printf( "nx_glob, nz_glob: %d %d\n", nx_glob, nz_glob);
- printf( "dx,dz: %lf %lf\n",dx,dz);
- printf( "dt: %lf\n",dt);
- }
- //Want to make sure this info is displayed before further output
- ierr = MPI_Barrier(MPI_COMM_WORLD);
-
- // Define quadrature weights and points
- const int nqpoints = 3;
- SArray qpoints;
- SArray qweights;
-
- qpoints(0) = 0.112701665379258311482073460022;
- qpoints(1) = 0.500000000000000000000000000000;
- qpoints(2) = 0.887298334620741688517926539980;
-
- qweights(0) = 0.277777777777777777777777777779;
- qweights(1) = 0.444444444444444444444444444444;
- qweights(2) = 0.277777777777777777777777777779;
-
- //////////////////////////////////////////////////////////////////////////
- // Initialize the cell-averaged fluid state via Gauss-Legendre quadrature
- //////////////////////////////////////////////////////////////////////////
- // for (k=0; k(nz+2*hs,nx+2*hs) , YAKL_LAMBDA (int k, int i) {
- //Initialize the state to zero
- for (int ll=0; ll(nz,nx) , YAKL_LAMBDA (int k, int i) {
- dens (k,i) = state(ID_DENS,hs+k,hs+i);
- uwnd (k,i) = state(ID_UMOM,hs+k,hs+i) / ( hy_dens_cell(hs+k) + state(ID_DENS,hs+k,hs+i) );
- wwnd (k,i) = state(ID_WMOM,hs+k,hs+i) / ( hy_dens_cell(hs+k) + state(ID_DENS,hs+k,hs+i) );
- theta(k,i) = ( state(ID_RHOT,hs+k,hs+i) + hy_dens_theta_cell(hs+k) ) / ( hy_dens_cell(hs+k) + state(ID_DENS,hs+k,hs+i) ) - hy_dens_theta_cell(hs+k) / hy_dens_cell(hs+k);
- });
- yakl::fence();
-
- //Write the grid data to file with all the processes writing collectively
- st3[0] = num_out; st3[1] = k_beg; st3[2] = i_beg;
- ct3[0] = 1 ; ct3[1] = nz ; ct3[2] = nx ;
- ncwrap( ncmpi_put_vara_double_all( ncid , dens_varid , st3 , ct3 , dens .createHostCopy().data() ) , __LINE__ );
- ncwrap( ncmpi_put_vara_double_all( ncid , uwnd_varid , st3 , ct3 , uwnd .createHostCopy().data() ) , __LINE__ );
- ncwrap( ncmpi_put_vara_double_all( ncid , wwnd_varid , st3 , ct3 , wwnd .createHostCopy().data() ) , __LINE__ );
- ncwrap( ncmpi_put_vara_double_all( ncid , theta_varid , st3 , ct3 , theta.createHostCopy().data() ) , __LINE__ );
-
- //Only the main process needs to write the elapsed time
- //Begin "independent" write mode
- ncwrap( ncmpi_begin_indep_data(ncid) , __LINE__ );
- //write elapsed time to file
- if (mainproc) {
- st1[0] = num_out;
- ct1[0] = 1;
- double etimearr[1];
- etimearr[0] = etime; ncwrap( ncmpi_put_vara_double( ncid , t_varid , st1 , ct1 , etimearr ) , __LINE__ );
- }
- //End "independent" write mode
- ncwrap( ncmpi_end_indep_data(ncid) , __LINE__ );
-
- //Close the file
- ncwrap( ncmpi_close(ncid) , __LINE__ );
-
- //Increment the number of outputs
- num_out = num_out + 1;
-}
-
-
-//Error reporting routine for the PNetCDF I/O
-void ncwrap( int ierr , int line ) {
- if (ierr != NC_NOERR) {
- printf("NetCDF Error at line: %d\n", line);
- printf("%s\n",ncmpi_strerror(ierr));
- exit(-1);
- }
-}
-
-
-void finalize() {
-}
-
-
-//Compute reduced quantities for error checking without resorting to the "ncdiff" tool
-void reductions( realConst3d state, double &mass , double &te , Fixed_data const &fixed_data ) {
- auto &nx = fixed_data.nx ;
- auto &nz = fixed_data.nz ;
- auto &hy_dens_cell = fixed_data.hy_dens_cell ;
- auto &hy_dens_theta_cell = fixed_data.hy_dens_theta_cell;
-
- doub2d mass2d("mass2d",nz,nx);
- doub2d te2d ("te2d ",nz,nx);
-
- // for (k=0; k(nz,nx) , YAKL_LAMBDA (int k, int i) {
- double r = state(ID_DENS,hs+k,hs+i) + hy_dens_cell(hs+k); // Density
- double u = state(ID_UMOM,hs+k,hs+i) / r; // U-wind
- double w = state(ID_WMOM,hs+k,hs+i) / r; // W-wind
- double th = ( state(ID_RHOT,hs+k,hs+i) + hy_dens_theta_cell(hs+k) ) / r; // Potential Temperature (theta)
- double p = C0*pow(r*th,gamm); // Pressure
- double t = th / pow(p0/p,rd/cp); // Temperature
- double ke = r*(u*u+w*w); // Kinetic Energy
- double ie = r*cv*t; // Internal Energy
- mass2d(k,i) = r *dx*dz; // Accumulate domain mass
- te2d (k,i) = (ke + ie)*dx*dz; // Accumulate domain total energy
- });
- mass = yakl::intrinsics::sum( mass2d );
- te = yakl::intrinsics::sum( te2d );
-
- double glob[2], loc[2];
- loc[0] = mass;
- loc[1] = te;
- int ierr = MPI_Allreduce(loc,glob,2,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
- mass = glob[0];
- te = glob[1];
-}
-
-
diff --git a/cpp/miniWeather_serial.cpp b/cpp/miniWeather_serial.cpp
index abd1de96..4a501068 100644
--- a/cpp/miniWeather_serial.cpp
+++ b/cpp/miniWeather_serial.cpp
@@ -2,382 +2,488 @@
//////////////////////////////////////////////////////////////////////////////////////////
// miniWeather
// Author: Matt Norman , Oak Ridge National Laboratory
-// This code simulates dry, stratified, compressible, non-hydrostatic fluid flows
-// For documentation, please see the attached documentation in the "documentation" folder
+// This code simulates dry, stratified, compressible, non-hydrostatic fluid
+// flows For documentation, please see the attached documentation in the
+// "documentation" folder
//
//////////////////////////////////////////////////////////////////////////////////////////
-#include
-#include
-#include
-#include
-#include "const.h"
#include "pnetcdf.h"
#include
+#include
+#include
+#include
+#include
+#include
+#include
-// We're going to define all arrays on the host because this doesn't use parallel_for
-typedef yakl::Array real1d;
-typedef yakl::Array real2d;
-typedef yakl::Array real3d;
-typedef yakl::Array doub1d;
-typedef yakl::Array doub2d;
-typedef yakl::Array doub3d;
-
-typedef yakl::Array realConst1d;
-typedef yakl::Array realConst2d;
-typedef yakl::Array realConst3d;
-typedef yakl::Array doubConst1d;
-typedef yakl::Array doubConst2d;
-typedef yakl::Array doubConst3d;
+constexpr double pi = 3.14159265358979323846264338327; // Pi
+constexpr double grav = 9.8; // Gravitational acceleration (m / s^2)
+constexpr double cp = 1004.; // Specific heat of dry air at constant pressure
+constexpr double cv = 717.; // Specific heat of dry air at constant volume
+constexpr double rd =
+ 287.; // Dry air constant for equation of state (P=rho*rd*T)
+constexpr double p0 = 1.e5; // Standard pressure at the surface in Pascals
+constexpr double C0 =
+ 27.5629410929725921310572974482; // Constant to translate potential
+ // temperature into pressure
+ // (P=C0*(rho*theta)**gamma)
+constexpr double gamm =
+ 1.40027894002789400278940027894; // gamma=cp/Rd , have to call this gamm
+ // because "gamma" is taken (I hate C so
+ // much)
+// Define domain and stability-related constants
+constexpr double xlen = 2.e4; // Length of the domain in the x-direction
+ // (meters)
+constexpr double zlen = 1.e4; // Length of the domain in the z-direction
+ // (meters)
+constexpr double hv_beta =
+ 0.05; // How strong to diffuse the solution: hv_beta \in [0:1]
+constexpr double cfl =
+ 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability)
+constexpr double max_speed =
+ 450; // Assumed maximum wave speed during the simulation (speed of sound +
+ // speed of wind) (meter / sec)
+constexpr int hs =
+ 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a
+ // full "stencil" of information for reconstruction
+constexpr int sten_size = 4; // Size of the stencil used for interpolation
+
+// Parameters for indexing and flags
+constexpr int NUM_VARS = 4; // Number of fluid state variables
+constexpr int ID_DENS = 0; // index for density ("rho")
+constexpr int ID_UMOM = 1; // index for momentum in the x-direction ("rho * u")
+constexpr int ID_WMOM = 2; // index for momentum in the z-direction ("rho * w")
+constexpr int ID_RHOT =
+ 3; // index for density * potential temperature ("rho * theta")
+constexpr int DIR_X =
+ 1; // Integer constant to express that this operation is in the x-direction
+constexpr int DIR_Z =
+ 2; // Integer constant to express that this operation is in the z-direction
+constexpr int DATA_SPEC_COLLISION = 1;
+constexpr int DATA_SPEC_THERMAL = 2;
+constexpr int DATA_SPEC_GRAVITY_WAVES = 3;
+constexpr int DATA_SPEC_DENSITY_CURRENT = 5;
+constexpr int DATA_SPEC_INJECTION = 6;
+
+constexpr int nqpoints = 3;
+constexpr double qpoints[] = {0.112701665379258311482073460022E0,
+ 0.500000000000000000000000000000E0,
+ 0.887298334620741688517926539980E0};
+constexpr double qweights[] = {0.277777777777777777777777777779E0,
+ 0.444444444444444444444444444444E0,
+ 0.277777777777777777777777777779E0};
///////////////////////////////////////////////////////////////////////////////////////
-// Variables that are initialized but remain static over the course of the simulation
+// BEGIN USER-CONFIGURABLE PARAMETERS
+///////////////////////////////////////////////////////////////////////////////////////
+// The x-direction length is twice as long as the z-direction length
+// So, you'll want to have nx_glob be twice as large as nz_glob
+int constexpr nx_glob = _NX; // Number of total cells in the x-direction
+int constexpr nz_glob = _NZ; // Number of total cells in the z-direction
+double constexpr sim_time = _SIM_TIME; // How many seconds to run the simulation
+double constexpr output_freq =
+ _OUT_FREQ; // How frequently to output data to file (in seconds)
+int constexpr data_spec_int = _DATA_SPEC; // How to initialize the data
+double constexpr dx = xlen / nx_glob; // grid spacing in the x-direction
+double constexpr dz = zlen / nz_glob; // grid spacing in the x-direction
+///////////////////////////////////////////////////////////////////////////////////////
+// END USER-CONFIGURABLE PARAMETERS
///////////////////////////////////////////////////////////////////////////////////////
-struct Fixed_data {
- int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task
- int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task
- int nranks, myrank; //Number of MPI ranks and my rank id
- int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain
- int mainproc; //Am I the main process (rank == 0)?
- realConst1d hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs)
- realConst1d hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs)
- realConst1d hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1)
- realConst1d hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1)
- realConst1d hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1)
-};
-//Declaring the functions defined after "main"
-void init ( real3d &state , real &dt , Fixed_data &fixed_data );
-void finalize ( );
-void injection ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht );
-void density_current ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht );
-void gravity_waves ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht );
-void thermal ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht );
-void collision ( real x , real z , real &r , real &u , real &w , real &t , real &hr , real &ht );
-void hydro_const_theta ( real z , real &r , real &t );
-void hydro_const_bvfreq ( real z , real bv_freq0 , real &r , real &t );
-real sample_ellipse_cosine( real x , real z , real amp , real x0 , real z0 , real xrad , real zrad );
-void output ( realConst3d state , real etime , int &num_out , Fixed_data const &fixed_data );
-void ncwrap ( int ierr , int line );
-void perform_timestep ( real3d const &state , real dt , int &direction_switch , Fixed_data const &fixed_data );
-void semi_discrete_step ( realConst3d state_init , real3d const &state_forcing , real3d const &state_out , real dt , int dir , Fixed_data const &fixed_data );
-void compute_tendencies_x ( realConst3d state , real3d const &tend , real dt , Fixed_data const &fixed_data );
-void compute_tendencies_z ( realConst3d state , real3d const &tend , real dt , Fixed_data const &fixed_data );
-void set_halo_values_x ( real3d const &state , Fixed_data const &fixed_data );
-void set_halo_values_z ( real3d const &state , Fixed_data const &fixed_data );
-void reductions ( realConst3d state , double &mass , double &te , Fixed_data const &fixed_data );
+///////////////////////////////////////////////////////////////////////////////////////
+// Variables that are initialized but remain static over the course of the
+// simulation
+///////////////////////////////////////////////////////////////////////////////////////
+double dt; // Model time step (seconds)
+int nx, nz; // Number of local grid cells in the x- and z- dimensions for this
+ // MPI task
+int i_beg, k_beg; // beginning index in the x- and z-directions for this MPI
+ // task
+int nranks, myrank; // Number of MPI ranks and my rank id
+int left_rank, right_rank; // MPI Rank IDs that exist to my left and right in
+ // the global domain
+int mainproc; // Am I the main process (rank == 0)?
+double *hy_dens_cell; // hydrostatic density (vert cell avgs). Dimensions:
+ // (1-hs:nz+hs)
+double *hy_dens_theta_cell; // hydrostatic rho*t (vert cell avgs). Dimensions:
+ // (1-hs:nz+hs)
+double *
+ hy_dens_int; // hydrostatic density (vert cell interf). Dimensions: (1:nz+1)
+double *hy_dens_theta_int; // hydrostatic rho*t (vert cell interf). Dimensions:
+ // (1:nz+1)
+double *hy_pressure_int; // hydrostatic press (vert cell interf). Dimensions:
+ // (1:nz+1)
+///////////////////////////////////////////////////////////////////////////////////////
+// Variables that are dynamics over the course of the simulation
+///////////////////////////////////////////////////////////////////////////////////////
+double etime; // Elapsed model time
+double output_counter; // Helps determine when it's time to do output
+// Runtime variable arrays
+double *state; // Fluid state. Dimensions:
+ // (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS)
+double *state_tmp; // Fluid state. Dimensions:
+ // (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS)
+double *flux; // Cell interface fluxes. Dimensions: (nx+1,nz+1,NUM_VARS)
+double *tend; // Fluid state tendencies. Dimensions: (nx,nz,NUM_VARS)
+int num_out = 0; // The number of outputs performed so far
+int direction_switch = 1;
+double mass0, te0; // Initial domain totals for mass and total energy
+double mass, te; // Domain totals for mass and total energy
+
+// How is this not in the standard?!
+double dmin(double a, double b) {
+ if (a < b) {
+ return a;
+ } else {
+ return b;
+ }
+};
+
+// Declaring the functions defined after "main"
+void init(int *argc, char ***argv);
+void finalize();
+void injection(double x, double z, double &r, double &u, double &w, double &t,
+ double &hr, double &ht);
+void density_current(double x, double z, double &r, double &u, double &w,
+ double &t, double &hr, double &ht);
+void gravity_waves(double x, double z, double &r, double &u, double &w,
+ double &t, double &hr, double &ht);
+void thermal(double x, double z, double &r, double &u, double &w, double &t,
+ double &hr, double &ht);
+void collision(double x, double z, double &r, double &u, double &w, double &t,
+ double &hr, double &ht);
+void hydro_const_theta(double z, double &r, double &t);
+void hydro_const_bvfreq(double z, double bv_freq0, double &r, double &t);
+double sample_ellipse_cosine(double x, double z, double amp, double x0,
+ double z0, double xrad, double zrad);
+void output(double *state, double etime);
+void ncwrap(int ierr, int line);
+void perform_timestep(double *state, double *state_tmp, double *flux,
+ double *tend, double dt);
+void semi_discrete_step(double *state_init, double *state_forcing,
+ double *state_out, double dt, int dir, double *flux,
+ double *tend);
+void compute_tendencies_x(double *state, double *flux, double *tend, double dt);
+void compute_tendencies_z(double *state, double *flux, double *tend, double dt);
+void set_halo_values_x(double *state);
+void set_halo_values_z(double *state);
+void reductions(double &mass, double &te);
///////////////////////////////////////////////////////////////////////////////////////
// THE MAIN PROGRAM STARTS HERE
///////////////////////////////////////////////////////////////////////////////////////
int main(int argc, char **argv) {
- MPI_Init(&argc,&argv);
- yakl::init();
- {
- Fixed_data fixed_data;
- real3d state;
- real dt; //Model time step (seconds)
-
- // init allocates state
- init( state , dt , fixed_data );
- auto &mainproc = fixed_data.mainproc;
+ init(&argc, &argv);
- //Initial reductions for mass, kinetic energy, and total energy
- double mass0, te0;
- reductions(state,mass0,te0,fixed_data);
+ // Initial reductions for mass, kinetic energy, and total energy
+ reductions(mass0, te0);
- int num_out = 0; //The number of outputs performed so far
- real output_counter = 0; //Helps determine when it's time to do output
- real etime = 0;
+ // Output the initial state
+ output(state, etime);
- //Output the initial state
- if (output_freq >= 0) {
- output(state,etime,num_out,fixed_data);
- }
-
- int direction_switch = 1; // Tells dimensionally split which order to take x,z solves
-
- ////////////////////////////////////////////////////
- // MAIN TIME STEP LOOP
- ////////////////////////////////////////////////////
- auto t1 = std::chrono::steady_clock::now();
- while (etime < sim_time) {
- //If the time step leads to exceeding the simulation time, shorten it for the last step
- if (etime + dt > sim_time) { dt = sim_time - etime; }
- //Perform a single time step
- perform_timestep(state,dt,direction_switch,fixed_data);
- //Inform the user
- #ifndef NO_INFORM
- if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); }
- #endif
- //Update the elapsed time and output counter
- etime = etime + dt;
- output_counter = output_counter + dt;
- //If it's time for output, reset the counter, and do output
- if (output_freq >= 0 && output_counter >= output_freq) {
- output_counter = output_counter - output_freq;
- output(state,etime,num_out,fixed_data);
- }
+ ////////////////////////////////////////////////////
+ // MAIN TIME STEP LOOP
+ ////////////////////////////////////////////////////
+ auto t1 = std::chrono::steady_clock::now();
+ while (etime < sim_time) {
+ // If the time step leads to exceeding the simulation time, shorten it for
+ // the last step
+ if (etime + dt > sim_time) {
+ dt = sim_time - etime;
}
- auto t2 = std::chrono::steady_clock::now();
+ // Perform a single time step
+ perform_timestep(state, state_tmp, flux, tend, dt);
+ // Inform the user
+#ifndef NO_INFORM
if (mainproc) {
- std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n";
+ printf("Elapsed Time: %lf / %lf\n", etime, sim_time);
}
-
- //Final reductions for mass, kinetic energy, and total energy
- double mass, te;
- reductions(state,mass,te,fixed_data);
-
- if (mainproc) {
- printf( "d_mass: %le\n" , (mass - mass0)/mass0 );
- printf( "d_te: %le\n" , (te - te0 )/te0 );
+#endif
+ // Update the elapsed time and output counter
+ etime = etime + dt;
+ output_counter = output_counter + dt;
+ // If it's time for output, reset the counter, and do output
+ if (output_counter >= output_freq) {
+ output_counter = output_counter - output_freq;
+ output(state, etime);
}
-
- finalize();
}
- yakl::finalize();
- MPI_Finalize();
-}
+ auto t2 = std::chrono::steady_clock::now();
+ if (mainproc) {
+ std::cout << "CPU Time: " << std::chrono::duration(t2 - t1).count()
+ << " sec\n";
+ }
+ // Final reductions for mass, kinetic energy, and total energy
+ reductions(mass, te);
-//Performs a single dimensionally split time step using a simple low-storage three-stage Runge-Kutta time integrator
-//The dimensional splitting is a second-order-accurate alternating Strang splitting in which the
-//order of directions is alternated each time step.
-//The Runge-Kutta method used here is defined as follows:
-// q* = q_n + dt/3 * rhs(q_n)
-// q** = q_n + dt/2 * rhs(q* )
-// q_n+1 = q_n + dt/1 * rhs(q**)
-void perform_timestep( real3d const &state , real dt , int &direction_switch , Fixed_data const &fixed_data ) {
- auto &nx = fixed_data.nx ;
- auto &nz = fixed_data.nz ;
+ if (mainproc) {
+ printf("d_mass: %le\n", (mass - mass0) / mass0);
+ printf("d_te: %le\n", (te - te0) / te0);
+ }
- real3d state_tmp("state_tmp",NUM_VARS,nz+2*hs,nx+2*hs);
+ finalize();
+}
+// Performs a single dimensionally split time step using a simple low-storage
+// three-stage Runge-Kutta time integrator The dimensional splitting is a
+// second-order-accurate alternating Strang splitting in which the order of
+// directions is alternated each time step. The Runge-Kutta method used here is
+// defined as follows:
+// q* = q[n] + dt/3 * rhs(q[n])
+// q** = q[n] + dt/2 * rhs(q* )
+// q[n+1] = q[n] + dt/1 * rhs(q** )
+void perform_timestep(double *state, double *state_tmp, double *flux,
+ double *tend, double dt) {
+ if (direction_switch) {
+ // x-direction first
+ semi_discrete_step(state, state, state_tmp, dt / 3, DIR_X, flux, tend);
+ semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_X, flux, tend);
+ semi_discrete_step(state, state_tmp, state, dt / 1, DIR_X, flux, tend);
+ // z-direction second
+ semi_discrete_step(state, state, state_tmp, dt / 3, DIR_Z, flux, tend);
+ semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_Z, flux, tend);
+ semi_discrete_step(state, state_tmp, state, dt / 1, DIR_Z, flux, tend);
+ } else {
+ // z-direction second
+ semi_discrete_step(state, state, state_tmp, dt / 3, DIR_Z, flux, tend);
+ semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_Z, flux, tend);
+ semi_discrete_step(state, state_tmp, state, dt / 1, DIR_Z, flux, tend);
+ // x-direction first
+ semi_discrete_step(state, state, state_tmp, dt / 3, DIR_X, flux, tend);
+ semi_discrete_step(state, state_tmp, state_tmp, dt / 2, DIR_X, flux, tend);
+ semi_discrete_step(state, state_tmp, state, dt / 1, DIR_X, flux, tend);
+ }
if (direction_switch) {
- //x-direction first
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , fixed_data );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , fixed_data );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , fixed_data );
- //z-direction second
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , fixed_data );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , fixed_data );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , fixed_data );
+ direction_switch = 0;
} else {
- //z-direction second
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , fixed_data );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , fixed_data );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , fixed_data );
- //x-direction first
- semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , fixed_data );
- semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , fixed_data );
- semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , fixed_data );
+ direction_switch = 1;
}
- if (direction_switch) { direction_switch = 0; } else { direction_switch = 1; }
}
-
-//Perform a single semi-discretized step in time with the form:
-//state_out = state_init + dt * rhs(state_forcing)
-//Meaning the step starts from state_init, computes the rhs using state_forcing, and stores the result in state_out
-void semi_discrete_step( realConst3d state_init , real3d const &state_forcing , real3d const &state_out , real dt , int dir , Fixed_data const &fixed_data ) {
- auto &nx = fixed_data.nx ;
- auto &nz = fixed_data.nz ;
- auto &i_beg = fixed_data.i_beg ;
- auto &k_beg = fixed_data.k_beg ;
- auto &hy_dens_cell = fixed_data.hy_dens_cell ;
-
- real3d tend("tend",NUM_VARS,nz,nx);
-
- if (dir == DIR_X) {
- //Set the halo values for this MPI task's fluid state in the x-direction
- yakl::timer_start("halo x");
- set_halo_values_x(state_forcing,fixed_data);
- yakl::timer_stop("halo x");
- //Compute the time tendencies for the fluid state in the x-direction
- yakl::timer_start("tendencies x");
- compute_tendencies_x(state_forcing,tend,dt,fixed_data);
- yakl::timer_stop("tendencies x");
+// Perform a single semi-discretized step in time with the form:
+// state_out = state_init + dt * rhs(state_forcing)
+// Meaning the step starts from state_init, computes the rhs using
+// state_forcing, and stores the result in state_out
+void semi_discrete_step(double *state_init, double *state_forcing,
+ double *state_out, double dt, int dir, double *flux,
+ double *tend) {
+ int i, k, ll, inds, indt, indw;
+ double x, z, wpert, dist, x0, z0, xrad, zrad, amp;
+ if (dir == DIR_X) {
+ // Set the halo values for this MPI task's fluid state in the x-direction
+ set_halo_values_x(state_forcing);
+ // Compute the time tendencies for the fluid state in the x-direction
+ compute_tendencies_x(state_forcing, flux, tend, dt);
} else if (dir == DIR_Z) {
- //Set the halo values for this MPI task's fluid state in the z-direction
- yakl::timer_start("halo z");
- set_halo_values_z(state_forcing,fixed_data);
- yakl::timer_stop("halo z");
- //Compute the time tendencies for the fluid state in the z-direction
- yakl::timer_start("tendencies z");
- compute_tendencies_z(state_forcing,tend,dt,fixed_data);
- yakl::timer_stop("tendencies z");
+ // Set the halo values for this MPI task's fluid state in the z-direction
+ set_halo_values_z(state_forcing);
+ // Compute the time tendencies for the fluid state in the z-direction
+ compute_tendencies_z(state_forcing, flux, tend, dt);
}
/////////////////////////////////////////////////
- // TODO: MAKE THESE 3 LOOPS A PARALLEL_FOR
+ // TODO: THREAD ME
/////////////////////////////////////////////////
- //Apply the tendencies to the fluid state
- yakl::timer_start("apply tendencies");
- for (int ll=0; ll stencil;
- SArray d3_vals;
- SArray vals;
- //Use fourth-order interpolation from four cell averages to compute the value at the interface in question
- for (int ll=0; ll stencil;
- SArray d3_vals;
- SArray vals;
- //Use fourth-order interpolation from four cell averages to compute the value at the interface in question
- for (int ll=0; ll qpoints;
- SArray qweights;
-
- qpoints(0) = 0.112701665379258311482073460022;
- qpoints(1) = 0.500000000000000000000000000000;
- qpoints(2) = 0.887298334620741688517926539980;
-
- qweights(0) = 0.277777777777777777777777777779;
- qweights(1) = 0.444444444444444444444444444444;
- qweights(2) = 0.277777777777777777777777777779;
-
//////////////////////////////////////////////////////////////////////////
// Initialize the cell-averaged fluid state via Gauss-Legendre quadrature
//////////////////////////////////////////////////////////////////////////
- /////////////////////////////////////////////////
- // TODO: MAKE THESE 2 LOOPS A PARALLEL_FOR
- /////////////////////////////////////////////////
- for (int k=0; k
+
+using yakl::SArray;
+using yakl::c::SimpleBounds;
+
+#ifdef SINGLE_PREC
+typedef float real;
+auto mpi_type = MPI_FLOAT;
+#else
+typedef double real;
+auto mpi_type = MPI_DOUBLE;
+#endif
+
+constexpr real pi = 3.14159265358979323846264338327; // Pi
+constexpr real grav = 9.8; // Gravitational acceleration (m / s^2)
+constexpr real cp = 1004.; // Specific heat of dry air at constant pressure
+constexpr real cv = 717.; // Specific heat of dry air at constant volume
+constexpr real rd = 287.; // Dry air constant for equation of state (P=rho*rd*T)
+constexpr real p0 = 1.e5; // Standard pressure at the surface in Pascals
+constexpr real C0 =
+ 27.5629410929725921310572974482; // Constant to translate potential
+ // temperature into pressure
+ // (P=C0*(rho*theta)**gamma)
+constexpr real gamm =
+ 1.40027894002789400278940027894; // gamma=cp/Rd , have to call this gamm
+ // because "gamma" is taken (I hate C so
+ // much)
+// Define domain and stability-related constants
+constexpr real xlen = 2.e4; // Length of the domain in the x-direction (meters)
+constexpr real zlen = 1.e4; // Length of the domain in the z-direction (meters)
+constexpr real hv_beta =
+ 0.05; // How strong to diffuse the solution: hv_beta \in [0:1]
+constexpr real cfl =
+ 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability)
+constexpr real max_speed =
+ 450; // Assumed maximum wave speed during the simulation (speed of sound +
+ // speed of wind) (meter / sec)
+constexpr int hs =
+ 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a
+ // full "stencil" of information for reconstruction
+constexpr int sten_size = 4; // Size of the stencil used for interpolation
+
+// Parameters for indexing and flags
+constexpr int NUM_VARS = 4; // Number of fluid state variables
+constexpr int ID_DENS = 0; // index for density ("rho")
+constexpr int ID_UMOM = 1; // index for momentum in the x-direction ("rho * u")
+constexpr int ID_WMOM = 2; // index for momentum in the z-direction ("rho * w")
+constexpr int ID_RHOT =
+ 3; // index for density * potential temperature ("rho * theta")
+constexpr int DIR_X =
+ 1; // Integer constant to express that this operation is in the x-direction
+constexpr int DIR_Z =
+ 2; // Integer constant to express that this operation is in the z-direction
+constexpr int DATA_SPEC_COLLISION = 1;
+constexpr int DATA_SPEC_THERMAL = 2;
+constexpr int DATA_SPEC_GRAVITY_WAVES = 3;
+constexpr int DATA_SPEC_DENSITY_CURRENT = 5;
+constexpr int DATA_SPEC_INJECTION = 6;
+
+///////////////////////////////////////////////////////////////////////////////////////
+// BEGIN USER-CONFIGURABLE PARAMETERS
+///////////////////////////////////////////////////////////////////////////////////////
+// The x-direction length is twice as long as the z-direction length
+// So, you'll want to have nx_glob be twice as large as nz_glob
+int constexpr nx_glob = _NX; // Number of total cells in the x-direction
+int constexpr nz_glob = _NZ; // Number of total cells in the z-direction
+real constexpr sim_time = _SIM_TIME; // How many seconds to run the simulation
+real constexpr output_freq =
+ _OUT_FREQ; // How frequently to output data to file (in seconds)
+int constexpr data_spec_int = _DATA_SPEC; // How to initialize the data
+///////////////////////////////////////////////////////////////////////////////////////
+// END USER-CONFIGURABLE PARAMETERS
+///////////////////////////////////////////////////////////////////////////////////////
+real constexpr dx = xlen / nx_glob;
+real constexpr dz = zlen / nz_glob;
+
+using yakl::SArray;
+using yakl::c::Bounds;
+using yakl::c::parallel_for;
+
+template inline T min(T val1, T val2) {
+ return val1 < val2 ? val1 : val2;
+}
+
+template inline T abs(T val) { return val > 0 ? val : -val; }
+
+#ifdef SIMD_LEN
+unsigned int constexpr simd_len = SIMD_LEN;
+#else
+unsigned int constexpr simd_len = 4;
+#endif
+
+using yakl::simd::iterate_over_pack;
+using yakl::simd::Pack;
+using yakl::simd::PackIterConfig;
diff --git a/cpp_yakl/experimental/miniWeather_mpi_parallelfor_simd_x.cpp b/cpp_yakl/experimental/miniWeather_mpi_parallelfor_simd_x.cpp
new file mode 100644
index 00000000..2f43f287
--- /dev/null
+++ b/cpp_yakl/experimental/miniWeather_mpi_parallelfor_simd_x.cpp
@@ -0,0 +1,1083 @@
+
+//////////////////////////////////////////////////////////////////////////////////////////
+// miniWeather
+// Author: Matt Norman , Oak Ridge National Laboratory
+// This code simulates dry, stratified, compressible, non-hydrostatic fluid
+// flows For documentation, please see the attached documentation in the
+// "documentation" folder
+//
+//////////////////////////////////////////////////////////////////////////////////////////
+
+#include "../const.h"
+#include "pnetcdf.h"
+#include
+#include
+#include
+#include
+#include
+
+// We're going to define all arrays on the host because this doesn't use
+// parallel_for
+typedef yakl::Array real1d;
+typedef yakl::Array