diff --git a/c/CMakeLists.txt b/c/CMakeLists.txt index 3d46914..ddcb6f5 100644 --- a/c/CMakeLists.txt +++ b/c/CMakeLists.txt @@ -54,6 +54,50 @@ endif() add_test(NAME SERIAL_TEST COMMAND ./check_output.sh ./serial_test 1e-13 4.5e-5 ) +############################################################ +## Compile the serial for_each version +############################################################ +if (NOT ("${STDPAR_FLAGS}" STREQUAL "") ) + add_executable(serial_for_each miniWeather_serial_for_each.cpp) + set_target_properties(serial_for_each PROPERTIES COMPILE_FLAGS "${EXE_DEFS} ${STDPAR_FLAGS}") + + add_executable(serial_for_each_test miniWeather_serial_for_each.cpp) + set_target_properties(serial_for_each_test PROPERTIES COMPILE_FLAGS "${TEST_DEFS} ${STDPAR_FLAGS}") + + if (NOT ("${LDFLAGS}" STREQUAL "") ) + target_link_libraries(serial_for_each "${LDFLAGS} ${STDPAR_FLAGS}") + target_link_libraries(serial_for_each_test "${LDFLAGS} ${STDPAR_FLAGS}") + endif() + if (NOT ("${SERIAL_LINK_FLAGS}" STREQUAL "") ) + target_link_libraries(serial_for_each "${SERIAL_LINK_FLAGS} ${STDPAR_FLAGS}") + target_link_libraries(serial_for_each_test "${SERIAL_LINK_FLAGS} ${STDPAR_FLAGS}") + endif() + + add_test(NAME SERIAL_FOR_EACH_TEST COMMAND ./check_output.sh ./serial_for_each_test 1e-13 4.5e-5 ) +endif() + + +############################################################ +## Compile the serial for_each_n version +############################################################ +if (NOT ("${STDPAR_FLAGS}" STREQUAL "") ) + add_executable(serial_for_each_n miniWeather_serial_for_each_n.cpp) + set_target_properties(serial_for_each_n PROPERTIES COMPILE_FLAGS "${EXE_DEFS} ${STDPAR_FLAGS}") + + add_executable(serial_for_each_n_test miniWeather_serial_for_each_n.cpp) + set_target_properties(serial_for_each_n_test PROPERTIES COMPILE_FLAGS "${TEST_DEFS} ${STDPAR_FLAGS}") + + if (NOT ("${LDFLAGS}" STREQUAL "") ) + target_link_libraries(serial_for_each_n "${LDFLAGS} ${STDPAR_FLAGS}") + target_link_libraries(serial_for_each_n_test "${LDFLAGS} ${STDPAR_FLAGS}") + endif() + if (NOT ("${SERIAL_LINK_FLAGS}" STREQUAL "") ) + target_link_libraries(serial_for_each_n "${SERIAL_LINK_FLAGS} ${STDPAR_FLAGS}") + target_link_libraries(serial_for_each_n_test "${SERIAL_LINK_FLAGS} ${STDPAR_FLAGS}") + endif() + + add_test(NAME SERIAL_FOR_EACH_N_TEST COMMAND ./check_output.sh ./serial_for_each_n_test 1e-13 4.5e-5 ) +endif() ############################################################ ## Compile the MPI version @@ -75,6 +119,50 @@ endif() add_test(NAME MPI_TEST COMMAND ./check_output.sh ./mpi_test 1e-13 4.5e-5 ) +############################################################ +## Compile the mpi for_each version +############################################################ +if (NOT ("${STDPAR_FLAGS}" STREQUAL "") ) + add_executable(mpi_for_each miniWeather_mpi_for_each.cpp) + set_target_properties(mpi_for_each PROPERTIES COMPILE_FLAGS "${EXE_DEFS} ${STDPAR_FLAGS}") + + add_executable(mpi_for_each_test miniWeather_mpi_for_each.cpp) + set_target_properties(mpi_for_each_test PROPERTIES COMPILE_FLAGS "${TEST_DEFS} ${STDPAR_FLAGS}") + + if (NOT ("${LDFLAGS}" STREQUAL "") ) + target_link_libraries(mpi_for_each "${LDFLAGS} ${STDPAR_FLAGS}") + target_link_libraries(mpi_for_each_test "${LDFLAGS} ${STDPAR_FLAGS}") + endif() + if (NOT ("${SERIAL_LINK_FLAGS}" STREQUAL "") ) + target_link_libraries(mpi_for_each "${MPI_LINK_FLAGS} ${STDPAR_FLAGS}") + target_link_libraries(mpi_for_each_test "${MPI_LINK_FLAGS} ${STDPAR_FLAGS}") + endif() + + add_test(NAME MPI_FOR_EACH_TEST COMMAND ./check_output.sh ./mpi_for_each_test 1e-13 4.5e-5 ) +endif() + + +############################################################ +## Compile the serial for_each_n version +############################################################ +if (NOT ("${STDPAR_FLAGS}" STREQUAL "") ) + add_executable(mpi_for_each_n miniWeather_mpi_for_each_n.cpp) + set_target_properties(mpi_for_each_n PROPERTIES COMPILE_FLAGS "${EXE_DEFS} ${STDPAR_FLAGS}") + + add_executable(mpi_for_each_n_test miniWeather_mpi_for_each_n.cpp) + set_target_properties(mpi_for_each_n_test PROPERTIES COMPILE_FLAGS "${TEST_DEFS} ${STDPAR_FLAGS}") + + if (NOT ("${LDFLAGS}" STREQUAL "") ) + target_link_libraries(mpi_for_each_n "${LDFLAGS} ${STDPAR_FLAGS}") + target_link_libraries(mpi_for_each_n_test "${LDFLAGS} ${STDPAR_FLAGS}") + endif() + if (NOT ("${SERIAL_LINK_FLAGS}" STREQUAL "") ) + target_link_libraries(mpi_for_each_n "${MPI_LINK_FLAGS} ${STDPAR_FLAGS}") + target_link_libraries(mpi_for_each_n_test "${MPI_LINK_FLAGS} ${STDPAR_FLAGS}") + endif() + + add_test(NAME MPI_FOR_EACH_N_TEST COMMAND ./check_output.sh ./mpi_for_each_n_test 1e-13 4.5e-5 ) +endif() ############################################################ ## Compile the MPI + OpenMP version diff --git a/c/miniWeather_mpi_for_each.cpp b/c/miniWeather_mpi_for_each.cpp new file mode 100644 index 0000000..39d3e90 --- /dev/null +++ b/c/miniWeather_mpi_for_each.cpp @@ -0,0 +1,906 @@ + +////////////////////////////////////////////////////////////////////////////////////////// +// miniWeather +// Author: Matt Norman , Oak Ridge National Laboratory +// Jeff Larkin , NVIDIA Corporation +// This code simulates dry, stratified, compressible, non-hydrostatic fluid flows +// For documentation, please see the attached documentation in the "documentation" folder +// +////////////////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include +#include +#ifndef NO_PNETCDF +#include "pnetcdf.h" +#endif +#include +#include +#include +#include + +// These should become unnecessary with the addition of cartesian_product +constexpr std::tuple idx2d(int idx, int nx) { return {idx%nx, idx/nx}; } +constexpr std::tuple idx3d(int idx, int nx, int nz) { return {idx%nx, (idx/nx)%nz, idx / (nx*nz)}; } + +constexpr double pi = 3.14159265358979323846264338327; //Pi +constexpr double grav = 9.8; //Gravitational acceleration (m / s^2) +constexpr double cp = 1004.; //Specific heat of dry air at constant pressure +constexpr double cv = 717.; //Specific heat of dry air at constant volume +constexpr double rd = 287.; //Dry air constant for equation of state (P=rho*rd*T) +constexpr double p0 = 1.e5; //Standard pressure at the surface in Pascals +constexpr double C0 = 27.5629410929725921310572974482; //Constant to translate potential temperature into pressure (P=C0*(rho*theta)**gamma) +constexpr double gamm = 1.40027894002789400278940027894; //gamma=cp/Rd , have to call this gamm because "gamma" is taken (I hate C so much) +//Define domain and stability-related constants +constexpr double xlen = 2.e4; //Length of the domain in the x-direction (meters) +constexpr double zlen = 1.e4; //Length of the domain in the z-direction (meters) +constexpr double hv_beta = 0.05; //How strong to diffuse the solution: hv_beta \in [0:1] +constexpr double cfl = 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability) +constexpr double max_speed = 450; //Assumed maximum wave speed during the simulation (speed of sound + speed of wind) (meter / sec) +constexpr int hs = 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a full "stencil" of information for reconstruction +constexpr int sten_size = 4; //Size of the stencil used for interpolation + +//Parameters for indexing and flags +constexpr int NUM_VARS = 4; //Number of fluid state variables +constexpr int ID_DENS = 0; //index for density ("rho") +constexpr int ID_UMOM = 1; //index for momentum in the x-direction ("rho * u") +constexpr int ID_WMOM = 2; //index for momentum in the z-direction ("rho * w") +constexpr int ID_RHOT = 3; //index for density * potential temperature ("rho * theta") +constexpr int DIR_X = 1; //Integer constant to express that this operation is in the x-direction +constexpr int DIR_Z = 2; //Integer constant to express that this operation is in the z-direction +constexpr int DATA_SPEC_COLLISION = 1; +constexpr int DATA_SPEC_THERMAL = 2; +constexpr int DATA_SPEC_GRAVITY_WAVES = 3; +constexpr int DATA_SPEC_DENSITY_CURRENT = 5; +constexpr int DATA_SPEC_INJECTION = 6; + +constexpr int nqpoints = 3; +constexpr double qpoints [] = { 0.112701665379258311482073460022E0 , 0.500000000000000000000000000000E0 , 0.887298334620741688517926539980E0 }; +constexpr double qweights[] = { 0.277777777777777777777777777779E0 , 0.444444444444444444444444444444E0 , 0.277777777777777777777777777779E0 }; + +/////////////////////////////////////////////////////////////////////////////////////// +// BEGIN USER-CONFIGURABLE PARAMETERS +/////////////////////////////////////////////////////////////////////////////////////// +//The x-direction length is twice as long as the z-direction length +//So, you'll want to have nx_glob be twice as large as nz_glob +int constexpr nx_glob = _NX; //Number of total cells in the x-direction +int constexpr nz_glob = _NZ; //Number of total cells in the z-direction +double constexpr sim_time = _SIM_TIME; //How many seconds to run the simulation +double constexpr output_freq = _OUT_FREQ; //How frequently to output data to file (in seconds) +int constexpr data_spec_int = _DATA_SPEC; //How to initialize the data +double constexpr dx = xlen / nx_glob; // grid spacing in the x-direction +double constexpr dz = zlen / nz_glob; // grid spacing in the x-direction +/////////////////////////////////////////////////////////////////////////////////////// +// END USER-CONFIGURABLE PARAMETERS +/////////////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////////////// +// Variables that are initialized but remain static over the course of the simulation +/////////////////////////////////////////////////////////////////////////////////////// +double dt; //Model time step (seconds) +int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task +int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task +int nranks, myrank; //Number of MPI ranks and my rank id +int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain +int mainproc; //Am I the main process (rank == 0)? +double *hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs) +double *hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs) +double *hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1) +double *hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1) +double *hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1) + +/////////////////////////////////////////////////////////////////////////////////////// +// Variables that are dynamics over the course of the simulation +/////////////////////////////////////////////////////////////////////////////////////// +double etime; //Elapsed model time +double output_counter; //Helps determine when it's time to do output +//Runtime variable arrays +double *state; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) +double *state_tmp; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) +double *flux; //Cell interface fluxes. Dimensions: (nx+1,nz+1,NUM_VARS) +double *tend; //Fluid state tendencies. Dimensions: (nx,nz,NUM_VARS) +double *sendbuf_l; //Buffer to send data to the left MPI rank +double *sendbuf_r; //Buffer to send data to the right MPI rank +double *recvbuf_l; //Buffer to receive data from the left MPI rank +double *recvbuf_r; //Buffer to receive data from the right MPI rank +int num_out = 0; //The number of outputs performed so far +int direction_switch = 1; +double mass0, te0; //Initial domain totals for mass and total energy +double mass , te ; //Domain totals for mass and total energy + +//How is this not in the standard?! +double dmin( double a , double b ) { if (a= 0) output(state,etime); + + //////////////////////////////////////////////////// + // MAIN TIME STEP LOOP + //////////////////////////////////////////////////// + auto t1 = std::chrono::steady_clock::now(); + while (etime < sim_time) { + //If the time step leads to exceeding the simulation time, shorten it for the last step + if (etime + dt > sim_time) { dt = sim_time - etime; } + //Perform a single time step + perform_timestep(state,state_tmp,flux,tend,dt); + //Inform the user +#ifndef NO_INFORM + if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); } +#endif + //Update the elapsed time and output counter + etime = etime + dt; + output_counter = output_counter + dt; + //If it's time for output, reset the counter, and do output + if (output_freq >= 0 && output_counter >= output_freq) { + output_counter = output_counter - output_freq; + output(state,etime); + } + } + auto t2 = std::chrono::steady_clock::now(); + if (mainproc) { + std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n"; + } + + //Final reductions for mass, kinetic energy, and total energy + reductions(mass,te); + + if (mainproc) { + printf( "d_mass: %le\n" , (mass - mass0)/mass0 ); + printf( "d_te: %le\n" , (te - te0 )/te0 ); + } + + finalize(); +} + + +//Performs a single dimensionally split time step using a simple low-storage three-stage Runge-Kutta time integrator +//The dimensional splitting is a second-order-accurate alternating Strang splitting in which the +//order of directions is alternated each time step. +//The Runge-Kutta method used here is defined as follows: +// q* = q[n] + dt/3 * rhs(q[n]) +// q** = q[n] + dt/2 * rhs(q* ) +// q[n+1] = q[n] + dt/1 * rhs(q** ) +void perform_timestep( double *state , double *state_tmp , double *flux , double *tend , double dt ) { + if (direction_switch) { + //x-direction first + semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend ); + semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend ); + semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend ); + //z-direction second + semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend ); + semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend ); + semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend ); + } else { + //z-direction second + semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend ); + semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend ); + semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend ); + //x-direction first + semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend ); + semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend ); + semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend ); + } + if (direction_switch) { direction_switch = 0; } else { direction_switch = 1; } +} + + +//Perform a single semi-discretized step in time with the form: +//state_out = state_init + dt * rhs(state_forcing) +//Meaning the step starts from state_init, computes the rhs using state_forcing, and stores the result in state_out +void semi_discrete_step( double *state_init , double *state_forcing , double *state_out , double dt , int dir , double *flux , double *tend ) { + if (dir == DIR_X) { + //Set the halo values for this MPI task's fluid state in the x-direction + set_halo_values_x(state_forcing); + //Compute the time tendencies for the fluid state in the x-direction + compute_tendencies_x(state_forcing,flux,tend,dt); + } else if (dir == DIR_Z) { + //Set the halo values for this MPI task's fluid state in the z-direction + set_halo_values_z(state_forcing); + //Compute the time tendencies for the fluid state in the z-direction + compute_tendencies_z(state_forcing,flux,tend,dt); + } + + ///////////////////////////////////////////////// + // TODO: THREAD ME + ///////////////////////////////////////////////// + //Apply the tendencies to the fluid state + auto bounds = std::views::iota(0,(int)(NUM_VARS*nz*nx)); + std::for_each(std::execution::par_unseq,bounds.begin(),bounds.end(),[=,nx=nx,nz=nz,hy_dens_cell=hy_dens_cell,i_beg=i_beg,k_beg=k_beg](int idx){ + auto [i,k,ll] = idx3d(idx,nx,nz); + if (data_spec_int == DATA_SPEC_GRAVITY_WAVES) { + double x = (i_beg + i+0.5)*dx; + double z = (k_beg + k+0.5)*dz; + double wpert; + // Using sample_ellipse_cosine requires "acc routine" in OpenACC and "declare target" in OpenMP offload + // Neither of these are particularly well supported. So I'm manually inlining here + // wpert = sample_ellipse_cosine( x,z , 0.01 , xlen/8,1000., 500.,500. ); + { + double x0 = xlen/8; + double z0 = 1000; + double xrad = 500; + double zrad = 500; + double amp = 0.01; + //Compute distance from bubble center + double dist = sqrt( ((x-x0)/xrad)*((x-x0)/xrad) + ((z-z0)/zrad)*((z-z0)/zrad) ) * pi / 2.; + //If the distance from bubble center is less than the radius, create a cos**2 profile + if (dist <= pi / 2.) { + wpert = amp * pow(cos(dist),2.); + } else { + wpert = 0.; + } + } + int indw = ID_WMOM*nz*nx + k*nx + i; + tend[indw] += wpert*hy_dens_cell[hs+k]; + } + int inds = ll*(nz+2*hs)*(nx+2*hs) + (k+hs)*(nx+2*hs) + i+hs; + int indt = ll*nz*nx + k*nx + i; + state_out[inds] = state_init[inds] + dt * tend[indt]; + }); +} + + +//Compute the time tendencies of the fluid state using forcing in the x-direction +//Since the halos are set in a separate routine, this will not require MPI +//First, compute the flux vector at each cell interface in the x-direction (including hyperviscosity) +//Then, compute the tendencies using those fluxes +void compute_tendencies_x( double *state , double *flux , double *tend , double dt) { + double hv_coef; + //Compute the hyperviscosity coefficient + hv_coef = -hv_beta * dx / (16*dt); + ///////////////////////////////////////////////// + // TODO: THREAD ME + ///////////////////////////////////////////////// + //Compute fluxes in the x-direction for each cell + auto bounds1 = std::views::iota(0,(int)(nz*(nx+1))); + std::for_each(std::execution::par_unseq,bounds1.begin(),bounds1.end(),[=,nx=nx,nz=nz,hy_dens_cell=hy_dens_cell,hy_dens_theta_cell=hy_dens_theta_cell](int idx){ + auto [i,k] = idx2d(idx,nx+1); + double stencil[4], d3_vals[NUM_VARS],vals[NUM_VARS]; + //Use fourth-order interpolation from four cell averages to compute the value at the interface in question + for (int ll=0; ll , Oak Ridge National Laboratory +// Jeff Larkin , NVIDIA Corporation +// This code simulates dry, stratified, compressible, non-hydrostatic fluid flows +// For documentation, please see the attached documentation in the "documentation" folder +// +////////////////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include +#include +#ifndef NO_PNETCDF +#include "pnetcdf.h" +#endif +#include +#include +#include +#include + +// These should become unnecessary with the addition of cartesian_product +constexpr std::tuple idx2d(int idx, int nx) { return {idx%nx, idx/nx}; } +constexpr std::tuple idx3d(int idx, int nx, int nz) { return {idx%nx, (idx/nx)%nz, idx / (nx*nz)}; } + +constexpr double pi = 3.14159265358979323846264338327; //Pi +constexpr double grav = 9.8; //Gravitational acceleration (m / s^2) +constexpr double cp = 1004.; //Specific heat of dry air at constant pressure +constexpr double cv = 717.; //Specific heat of dry air at constant volume +constexpr double rd = 287.; //Dry air constant for equation of state (P=rho*rd*T) +constexpr double p0 = 1.e5; //Standard pressure at the surface in Pascals +constexpr double C0 = 27.5629410929725921310572974482; //Constant to translate potential temperature into pressure (P=C0*(rho*theta)**gamma) +constexpr double gamm = 1.40027894002789400278940027894; //gamma=cp/Rd , have to call this gamm because "gamma" is taken (I hate C so much) +//Define domain and stability-related constants +constexpr double xlen = 2.e4; //Length of the domain in the x-direction (meters) +constexpr double zlen = 1.e4; //Length of the domain in the z-direction (meters) +constexpr double hv_beta = 0.05; //How strong to diffuse the solution: hv_beta \in [0:1] +constexpr double cfl = 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability) +constexpr double max_speed = 450; //Assumed maximum wave speed during the simulation (speed of sound + speed of wind) (meter / sec) +constexpr int hs = 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a full "stencil" of information for reconstruction +constexpr int sten_size = 4; //Size of the stencil used for interpolation + +//Parameters for indexing and flags +constexpr int NUM_VARS = 4; //Number of fluid state variables +constexpr int ID_DENS = 0; //index for density ("rho") +constexpr int ID_UMOM = 1; //index for momentum in the x-direction ("rho * u") +constexpr int ID_WMOM = 2; //index for momentum in the z-direction ("rho * w") +constexpr int ID_RHOT = 3; //index for density * potential temperature ("rho * theta") +constexpr int DIR_X = 1; //Integer constant to express that this operation is in the x-direction +constexpr int DIR_Z = 2; //Integer constant to express that this operation is in the z-direction +constexpr int DATA_SPEC_COLLISION = 1; +constexpr int DATA_SPEC_THERMAL = 2; +constexpr int DATA_SPEC_GRAVITY_WAVES = 3; +constexpr int DATA_SPEC_DENSITY_CURRENT = 5; +constexpr int DATA_SPEC_INJECTION = 6; + +constexpr int nqpoints = 3; +constexpr double qpoints [] = { 0.112701665379258311482073460022E0 , 0.500000000000000000000000000000E0 , 0.887298334620741688517926539980E0 }; +constexpr double qweights[] = { 0.277777777777777777777777777779E0 , 0.444444444444444444444444444444E0 , 0.277777777777777777777777777779E0 }; + +/////////////////////////////////////////////////////////////////////////////////////// +// BEGIN USER-CONFIGURABLE PARAMETERS +/////////////////////////////////////////////////////////////////////////////////////// +//The x-direction length is twice as long as the z-direction length +//So, you'll want to have nx_glob be twice as large as nz_glob +int constexpr nx_glob = _NX; //Number of total cells in the x-direction +int constexpr nz_glob = _NZ; //Number of total cells in the z-direction +double constexpr sim_time = _SIM_TIME; //How many seconds to run the simulation +double constexpr output_freq = _OUT_FREQ; //How frequently to output data to file (in seconds) +int constexpr data_spec_int = _DATA_SPEC; //How to initialize the data +double constexpr dx = xlen / nx_glob; // grid spacing in the x-direction +double constexpr dz = zlen / nz_glob; // grid spacing in the x-direction +/////////////////////////////////////////////////////////////////////////////////////// +// END USER-CONFIGURABLE PARAMETERS +/////////////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////////////// +// Variables that are initialized but remain static over the course of the simulation +/////////////////////////////////////////////////////////////////////////////////////// +double dt; //Model time step (seconds) +int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task +int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task +int nranks, myrank; //Number of MPI ranks and my rank id +int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain +int mainproc; //Am I the main process (rank == 0)? +double *hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs) +double *hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs) +double *hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1) +double *hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1) +double *hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1) + +/////////////////////////////////////////////////////////////////////////////////////// +// Variables that are dynamics over the course of the simulation +/////////////////////////////////////////////////////////////////////////////////////// +double etime; //Elapsed model time +double output_counter; //Helps determine when it's time to do output +//Runtime variable arrays +double *state; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) +double *state_tmp; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) +double *flux; //Cell interface fluxes. Dimensions: (nx+1,nz+1,NUM_VARS) +double *tend; //Fluid state tendencies. Dimensions: (nx,nz,NUM_VARS) +double *sendbuf_l; //Buffer to send data to the left MPI rank +double *sendbuf_r; //Buffer to send data to the right MPI rank +double *recvbuf_l; //Buffer to receive data from the left MPI rank +double *recvbuf_r; //Buffer to receive data from the right MPI rank +int num_out = 0; //The number of outputs performed so far +int direction_switch = 1; +double mass0, te0; //Initial domain totals for mass and total energy +double mass , te ; //Domain totals for mass and total energy + +//How is this not in the standard?! +double dmin( double a , double b ) { if (a= 0) output(state,etime); + + //////////////////////////////////////////////////// + // MAIN TIME STEP LOOP + //////////////////////////////////////////////////// + auto t1 = std::chrono::steady_clock::now(); + while (etime < sim_time) { + //If the time step leads to exceeding the simulation time, shorten it for the last step + if (etime + dt > sim_time) { dt = sim_time - etime; } + //Perform a single time step + perform_timestep(state,state_tmp,flux,tend,dt); + //Inform the user +#ifndef NO_INFORM + if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); } +#endif + //Update the elapsed time and output counter + etime = etime + dt; + output_counter = output_counter + dt; + //If it's time for output, reset the counter, and do output + if (output_freq >= 0 && output_counter >= output_freq) { + output_counter = output_counter - output_freq; + output(state,etime); + } + } + auto t2 = std::chrono::steady_clock::now(); + if (mainproc) { + std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n"; + } + + //Final reductions for mass, kinetic energy, and total energy + reductions(mass,te); + + if (mainproc) { + printf( "d_mass: %le\n" , (mass - mass0)/mass0 ); + printf( "d_te: %le\n" , (te - te0 )/te0 ); + } + + finalize(); +} + + +//Performs a single dimensionally split time step using a simple low-storage three-stage Runge-Kutta time integrator +//The dimensional splitting is a second-order-accurate alternating Strang splitting in which the +//order of directions is alternated each time step. +//The Runge-Kutta method used here is defined as follows: +// q* = q[n] + dt/3 * rhs(q[n]) +// q** = q[n] + dt/2 * rhs(q* ) +// q[n+1] = q[n] + dt/1 * rhs(q** ) +void perform_timestep( double *state , double *state_tmp , double *flux , double *tend , double dt ) { + if (direction_switch) { + //x-direction first + semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend ); + semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend ); + semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend ); + //z-direction second + semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend ); + semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend ); + semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend ); + } else { + //z-direction second + semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend ); + semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend ); + semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend ); + //x-direction first + semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend ); + semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend ); + semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend ); + } + if (direction_switch) { direction_switch = 0; } else { direction_switch = 1; } +} + + +//Perform a single semi-discretized step in time with the form: +//state_out = state_init + dt * rhs(state_forcing) +//Meaning the step starts from state_init, computes the rhs using state_forcing, and stores the result in state_out +void semi_discrete_step( double *state_init , double *state_forcing , double *state_out , double dt , int dir , double *flux , double *tend ) { + if (dir == DIR_X) { + //Set the halo values for this MPI task's fluid state in the x-direction + set_halo_values_x(state_forcing); + //Compute the time tendencies for the fluid state in the x-direction + compute_tendencies_x(state_forcing,flux,tend,dt); + } else if (dir == DIR_Z) { + //Set the halo values for this MPI task's fluid state in the z-direction + set_halo_values_z(state_forcing); + //Compute the time tendencies for the fluid state in the z-direction + compute_tendencies_z(state_forcing,flux,tend,dt); + } + + ///////////////////////////////////////////////// + // TODO: THREAD ME + ///////////////////////////////////////////////// + //Apply the tendencies to the fluid state + std::for_each_n(std::execution::par_unseq,std::views::iota(0).begin(),(NUM_VARS*nz*nx),[=,nx=nx,nz=nz,hy_dens_cell=hy_dens_cell,i_beg=i_beg,k_beg=k_beg](int idx){ + auto [i,k,ll] = idx3d(idx,nx,nz); + if (data_spec_int == DATA_SPEC_GRAVITY_WAVES) { + double x = (i_beg + i+0.5)*dx; + double z = (k_beg + k+0.5)*dz; + double wpert; + // Using sample_ellipse_cosine requires "acc routine" in OpenACC and "declare target" in OpenMP offload + // Neither of these are particularly well supported. So I'm manually inlining here + // wpert = sample_ellipse_cosine( x,z , 0.01 , xlen/8,1000., 500.,500. ); + { + double x0 = xlen/8; + double z0 = 1000; + double xrad = 500; + double zrad = 500; + double amp = 0.01; + //Compute distance from bubble center + double dist = sqrt( ((x-x0)/xrad)*((x-x0)/xrad) + ((z-z0)/zrad)*((z-z0)/zrad) ) * pi / 2.; + //If the distance from bubble center is less than the radius, create a cos**2 profile + if (dist <= pi / 2.) { + wpert = amp * pow(cos(dist),2.); + } else { + wpert = 0.; + } + } + int indw = ID_WMOM*nz*nx + k*nx + i; + tend[indw] += wpert*hy_dens_cell[hs+k]; + } + int inds = ll*(nz+2*hs)*(nx+2*hs) + (k+hs)*(nx+2*hs) + i+hs; + int indt = ll*nz*nx + k*nx + i; + state_out[inds] = state_init[inds] + dt * tend[indt]; + }); +} + + +//Compute the time tendencies of the fluid state using forcing in the x-direction +//Since the halos are set in a separate routine, this will not require MPI +//First, compute the flux vector at each cell interface in the x-direction (including hyperviscosity) +//Then, compute the tendencies using those fluxes +void compute_tendencies_x( double *state , double *flux , double *tend , double dt) { + double hv_coef; + //Compute the hyperviscosity coefficient + hv_coef = -hv_beta * dx / (16*dt); + ///////////////////////////////////////////////// + // TODO: THREAD ME + ///////////////////////////////////////////////// + //Compute fluxes in the x-direction for each cell + std::for_each_n(std::execution::par_unseq,std::views::iota(0).begin(),(nz*(nx+1)),[=,nx=nx,nz=nz,hy_dens_cell=hy_dens_cell,hy_dens_theta_cell=hy_dens_theta_cell](int idx){ + auto [i,k] = idx2d(idx,nx+1); + double stencil[4], d3_vals[NUM_VARS],vals[NUM_VARS]; + //Use fourth-order interpolation from four cell averages to compute the value at the interface in question + for (int ll=0; ll , Oak Ridge National Laboratory +// Jeff Larkin , NVIDIA Corporation +// This code simulates dry, stratified, compressible, non-hydrostatic fluid flows +// For documentation, please see the attached documentation in the "documentation" folder +// +////////////////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include +#include +#ifndef NO_PNETCDF +#include "pnetcdf.h" +#endif +#include +#include +#include +#include + +// These should become unnecessary with the addition of cartesian_product +constexpr std::tuple idx2d(int idx, int nx) { return {idx%nx, idx/nx}; } +constexpr std::tuple idx3d(int idx, int nx, int nz) { return {idx%nx, (idx/nx)%nz, idx / (nx*nz)}; } + +constexpr double pi = 3.14159265358979323846264338327; //Pi +constexpr double grav = 9.8; //Gravitational acceleration (m / s^2) +constexpr double cp = 1004.; //Specific heat of dry air at constant pressure +constexpr double cv = 717.; //Specific heat of dry air at constant volume +constexpr double rd = 287.; //Dry air constant for equation of state (P=rho*rd*T) +constexpr double p0 = 1.e5; //Standard pressure at the surface in Pascals +constexpr double C0 = 27.5629410929725921310572974482; //Constant to translate potential temperature into pressure (P=C0*(rho*theta)**gamma) +constexpr double gamm = 1.40027894002789400278940027894; //gamma=cp/Rd , have to call this gamm because "gamma" is taken (I hate C so much) +//Define domain and stability-related constants +constexpr double xlen = 2.e4; //Length of the domain in the x-direction (meters) +constexpr double zlen = 1.e4; //Length of the domain in the z-direction (meters) +constexpr double hv_beta = 0.05; //How strong to diffuse the solution: hv_beta \in [0:1] +constexpr double cfl = 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability) +constexpr double max_speed = 450; //Assumed maximum wave speed during the simulation (speed of sound + speed of wind) (meter / sec) +constexpr int hs = 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a full "stencil" of information for reconstruction +constexpr int sten_size = 4; //Size of the stencil used for interpolation + +//Parameters for indexing and flags +constexpr int NUM_VARS = 4; //Number of fluid state variables +constexpr int ID_DENS = 0; //index for density ("rho") +constexpr int ID_UMOM = 1; //index for momentum in the x-direction ("rho * u") +constexpr int ID_WMOM = 2; //index for momentum in the z-direction ("rho * w") +constexpr int ID_RHOT = 3; //index for density * potential temperature ("rho * theta") +constexpr int DIR_X = 1; //Integer constant to express that this operation is in the x-direction +constexpr int DIR_Z = 2; //Integer constant to express that this operation is in the z-direction +constexpr int DATA_SPEC_COLLISION = 1; +constexpr int DATA_SPEC_THERMAL = 2; +constexpr int DATA_SPEC_GRAVITY_WAVES = 3; +constexpr int DATA_SPEC_DENSITY_CURRENT = 5; +constexpr int DATA_SPEC_INJECTION = 6; + +constexpr int nqpoints = 3; +constexpr double qpoints [] = { 0.112701665379258311482073460022E0 , 0.500000000000000000000000000000E0 , 0.887298334620741688517926539980E0 }; +constexpr double qweights[] = { 0.277777777777777777777777777779E0 , 0.444444444444444444444444444444E0 , 0.277777777777777777777777777779E0 }; + +/////////////////////////////////////////////////////////////////////////////////////// +// BEGIN USER-CONFIGURABLE PARAMETERS +/////////////////////////////////////////////////////////////////////////////////////// +//The x-direction length is twice as long as the z-direction length +//So, you'll want to have nx_glob be twice as large as nz_glob +int constexpr nx_glob = _NX; //Number of total cells in the x-direction +int constexpr nz_glob = _NZ; //Number of total cells in the z-direction +double constexpr sim_time = _SIM_TIME; //How many seconds to run the simulation +double constexpr output_freq = _OUT_FREQ; //How frequently to output data to file (in seconds) +int constexpr data_spec_int = _DATA_SPEC; //How to initialize the data +double constexpr dx = xlen / nx_glob; // grid spacing in the x-direction +double constexpr dz = zlen / nz_glob; // grid spacing in the x-direction +/////////////////////////////////////////////////////////////////////////////////////// +// END USER-CONFIGURABLE PARAMETERS +/////////////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////////////// +// Variables that are initialized but remain static over the course of the simulation +/////////////////////////////////////////////////////////////////////////////////////// +double dt; //Model time step (seconds) +int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task +int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task +int nranks, myrank; //Number of MPI ranks and my rank id +int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain +int mainproc; //Am I the main process (rank == 0)? +double *hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs) +double *hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs) +double *hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1) +double *hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1) +double *hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1) + +/////////////////////////////////////////////////////////////////////////////////////// +// Variables that are dynamics over the course of the simulation +/////////////////////////////////////////////////////////////////////////////////////// +double etime; //Elapsed model time +double output_counter; //Helps determine when it's time to do output +//Runtime variable arrays +double *state; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) +double *state_tmp; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) +double *flux; //Cell interface fluxes. Dimensions: (nx+1,nz+1,NUM_VARS) +double *tend; //Fluid state tendencies. Dimensions: (nx,nz,NUM_VARS) +int num_out = 0; //The number of outputs performed so far +int direction_switch = 1; +double mass0, te0; //Initial domain totals for mass and total energy +double mass , te ; //Domain totals for mass and total energy + +//How is this not in the standard?! +double dmin( double a , double b ) { if (a sim_time) { dt = sim_time - etime; } + //Perform a single time step + perform_timestep(state,state_tmp,flux,tend,dt); + //Inform the user +#ifndef NO_INFORM + if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); } +#endif + //Update the elapsed time and output counter + etime = etime + dt; + output_counter = output_counter + dt; + //If it's time for output, reset the counter, and do output + if (output_counter >= output_freq) { + output_counter = output_counter - output_freq; + output(state,etime); + } + } + auto t2 = std::chrono::steady_clock::now(); + if (mainproc) { + std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n"; + } + + //Final reductions for mass, kinetic energy, and total energy + reductions(mass,te); + + if (mainproc) { + printf( "d_mass: %le\n" , (mass - mass0)/mass0 ); + printf( "d_te: %le\n" , (te - te0 )/te0 ); + } + + finalize(); +} + + +//Performs a single dimensionally split time step using a simple low-storage three-stage Runge-Kutta time integrator +//The dimensional splitting is a second-order-accurate alternating Strang splitting in which the +//order of directions is alternated each time step. +//The Runge-Kutta method used here is defined as follows: +// q* = q[n] + dt/3 * rhs(q[n]) +// q** = q[n] + dt/2 * rhs(q* ) +// q[n+1] = q[n] + dt/1 * rhs(q** ) +void perform_timestep( double *state , double *state_tmp , double *flux , double *tend , double dt ) { + if (direction_switch) { + //x-direction first + semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend ); + semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend ); + semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend ); + //z-direction second + semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend ); + semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend ); + semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend ); + } else { + //z-direction second + semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend ); + semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend ); + semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend ); + //x-direction first + semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend ); + semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend ); + semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend ); + } + if (direction_switch) { direction_switch = 0; } else { direction_switch = 1; } +} + + +//Perform a single semi-discretized step in time with the form: +//state_out = state_init + dt * rhs(state_forcing) +//Meaning the step starts from state_init, computes the rhs using state_forcing, and stores the result in state_out +void semi_discrete_step( double *state_init , double *state_forcing , double *state_out , double dt , int dir , double *flux , double *tend ) { + if (dir == DIR_X) { + //Set the halo values for this MPI task's fluid state in the x-direction + set_halo_values_x(state_forcing); + //Compute the time tendencies for the fluid state in the x-direction + compute_tendencies_x(state_forcing,flux,tend,dt); + } else if (dir == DIR_Z) { + //Set the halo values for this MPI task's fluid state in the z-direction + set_halo_values_z(state_forcing); + //Compute the time tendencies for the fluid state in the z-direction + compute_tendencies_z(state_forcing,flux,tend,dt); + } + + ///////////////////////////////////////////////// + // TODO: THREAD ME + ///////////////////////////////////////////////// + //Apply the tendencies to the fluid state + auto bounds = std::views::iota(0,(int)(NUM_VARS*nz*nx)); + std::for_each(std::execution::par_unseq,bounds.begin(),bounds.end(),[=,nx=nx,nz=nz,hy_dens_cell=hy_dens_cell,i_beg=i_beg,k_beg=k_beg](int idx){ + auto [i,k,ll] = idx3d(idx,nx,nz); + if (data_spec_int == DATA_SPEC_GRAVITY_WAVES) { + double x = (i_beg + i+0.5)*dx; + double z = (k_beg + k+0.5)*dz; + double wpert; + // Using sample_ellipse_cosine requires "acc routine" in OpenACC and "declare target" in OpenMP offload + // Neither of these are particularly well supported. So I'm manually inlining here + // wpert = sample_ellipse_cosine( x,z , 0.01 , xlen/8,1000., 500.,500. ); + { + double x0 = xlen/8; + double z0 = 1000; + double xrad = 500; + double zrad = 500; + double amp = 0.01; + //Compute distance from bubble center + double dist = sqrt( ((x-x0)/xrad)*((x-x0)/xrad) + ((z-z0)/zrad)*((z-z0)/zrad) ) * pi / 2.; + //If the distance from bubble center is less than the radius, create a cos**2 profile + if (dist <= pi / 2.) { + wpert = amp * pow(cos(dist),2.); + } else { + wpert = 0.; + } + } + int indw = ID_WMOM*nz*nx + k*nx + i; + tend[indw] += wpert*hy_dens_cell[hs+k]; + } + int inds = ll*(nz+2*hs)*(nx+2*hs) + (k+hs)*(nx+2*hs) + i+hs; + int indt = ll*nz*nx + k*nx + i; + state_out[inds] = state_init[inds] + dt * tend[indt]; + }); +} + + +//Compute the time tendencies of the fluid state using forcing in the x-direction +//Since the halos are set in a separate routine, this will not require MPI +//First, compute the flux vector at each cell interface in the x-direction (including hyperviscosity) +//Then, compute the tendencies using those fluxes +void compute_tendencies_x( double *state , double *flux , double *tend , double dt) { + double hv_coef; + //Compute the hyperviscosity coefficient + hv_coef = -hv_beta * dx / (16*dt); + ///////////////////////////////////////////////// + // TODO: THREAD ME + ///////////////////////////////////////////////// + //Compute fluxes in the x-direction for each cell + auto bounds1 = std::views::iota(0,(int)(nz*(nx+1))); + std::for_each(std::execution::par_unseq,bounds1.begin(),bounds1.end(),[=,nx=nx,nz=nz,hy_dens_cell=hy_dens_cell,hy_dens_theta_cell=hy_dens_theta_cell](int idx){ + auto [i,k] = idx2d(idx,nx+1); + double stencil[4], d3_vals[NUM_VARS],vals[NUM_VARS]; + //Use fourth-order interpolation from four cell averages to compute the value at the interface in question + for (int ll=0; ll , Oak Ridge National Laboratory +// Jeff Larkin , NVIDIA Corporation +// This code simulates dry, stratified, compressible, non-hydrostatic fluid flows +// For documentation, please see the attached documentation in the "documentation" folder +// +////////////////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include +#include +#ifndef NO_PNETCDF +#include "pnetcdf.h" +#endif +#include +#include +#include +#include + +// These should become unnecessary with the addition of cartesian_product +constexpr std::tuple idx2d(int idx, int nx) { return {idx%nx, idx/nx}; } +constexpr std::tuple idx3d(int idx, int nx, int nz) { return {idx%nx, (idx/nx)%nz, idx / (nx*nz)}; } + +constexpr double pi = 3.14159265358979323846264338327; //Pi +constexpr double grav = 9.8; //Gravitational acceleration (m / s^2) +constexpr double cp = 1004.; //Specific heat of dry air at constant pressure +constexpr double cv = 717.; //Specific heat of dry air at constant volume +constexpr double rd = 287.; //Dry air constant for equation of state (P=rho*rd*T) +constexpr double p0 = 1.e5; //Standard pressure at the surface in Pascals +constexpr double C0 = 27.5629410929725921310572974482; //Constant to translate potential temperature into pressure (P=C0*(rho*theta)**gamma) +constexpr double gamm = 1.40027894002789400278940027894; //gamma=cp/Rd , have to call this gamm because "gamma" is taken (I hate C so much) +//Define domain and stability-related constants +constexpr double xlen = 2.e4; //Length of the domain in the x-direction (meters) +constexpr double zlen = 1.e4; //Length of the domain in the z-direction (meters) +constexpr double hv_beta = 0.05; //How strong to diffuse the solution: hv_beta \in [0:1] +constexpr double cfl = 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability) +constexpr double max_speed = 450; //Assumed maximum wave speed during the simulation (speed of sound + speed of wind) (meter / sec) +constexpr int hs = 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a full "stencil" of information for reconstruction +constexpr int sten_size = 4; //Size of the stencil used for interpolation + +//Parameters for indexing and flags +constexpr int NUM_VARS = 4; //Number of fluid state variables +constexpr int ID_DENS = 0; //index for density ("rho") +constexpr int ID_UMOM = 1; //index for momentum in the x-direction ("rho * u") +constexpr int ID_WMOM = 2; //index for momentum in the z-direction ("rho * w") +constexpr int ID_RHOT = 3; //index for density * potential temperature ("rho * theta") +constexpr int DIR_X = 1; //Integer constant to express that this operation is in the x-direction +constexpr int DIR_Z = 2; //Integer constant to express that this operation is in the z-direction +constexpr int DATA_SPEC_COLLISION = 1; +constexpr int DATA_SPEC_THERMAL = 2; +constexpr int DATA_SPEC_GRAVITY_WAVES = 3; +constexpr int DATA_SPEC_DENSITY_CURRENT = 5; +constexpr int DATA_SPEC_INJECTION = 6; + +constexpr int nqpoints = 3; +constexpr double qpoints [] = { 0.112701665379258311482073460022E0 , 0.500000000000000000000000000000E0 , 0.887298334620741688517926539980E0 }; +constexpr double qweights[] = { 0.277777777777777777777777777779E0 , 0.444444444444444444444444444444E0 , 0.277777777777777777777777777779E0 }; + +/////////////////////////////////////////////////////////////////////////////////////// +// BEGIN USER-CONFIGURABLE PARAMETERS +/////////////////////////////////////////////////////////////////////////////////////// +//The x-direction length is twice as long as the z-direction length +//So, you'll want to have nx_glob be twice as large as nz_glob +int constexpr nx_glob = _NX; //Number of total cells in the x-direction +int constexpr nz_glob = _NZ; //Number of total cells in the z-direction +double constexpr sim_time = _SIM_TIME; //How many seconds to run the simulation +double constexpr output_freq = _OUT_FREQ; //How frequently to output data to file (in seconds) +int constexpr data_spec_int = _DATA_SPEC; //How to initialize the data +double constexpr dx = xlen / nx_glob; // grid spacing in the x-direction +double constexpr dz = zlen / nz_glob; // grid spacing in the x-direction +/////////////////////////////////////////////////////////////////////////////////////// +// END USER-CONFIGURABLE PARAMETERS +/////////////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////////////// +// Variables that are initialized but remain static over the course of the simulation +/////////////////////////////////////////////////////////////////////////////////////// +double dt; //Model time step (seconds) +int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task +int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task +int nranks, myrank; //Number of MPI ranks and my rank id +int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain +int mainproc; //Am I the main process (rank == 0)? +double *hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs) +double *hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs) +double *hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1) +double *hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1) +double *hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1) + +/////////////////////////////////////////////////////////////////////////////////////// +// Variables that are dynamics over the course of the simulation +/////////////////////////////////////////////////////////////////////////////////////// +double etime; //Elapsed model time +double output_counter; //Helps determine when it's time to do output +//Runtime variable arrays +double *state; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) +double *state_tmp; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) +double *flux; //Cell interface fluxes. Dimensions: (nx+1,nz+1,NUM_VARS) +double *tend; //Fluid state tendencies. Dimensions: (nx,nz,NUM_VARS) +int num_out = 0; //The number of outputs performed so far +int direction_switch = 1; +double mass0, te0; //Initial domain totals for mass and total energy +double mass , te ; //Domain totals for mass and total energy + +//How is this not in the standard?! +double dmin( double a , double b ) { if (a sim_time) { dt = sim_time - etime; } + //Perform a single time step + perform_timestep(state,state_tmp,flux,tend,dt); + //Inform the user +#ifndef NO_INFORM + if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); } +#endif + //Update the elapsed time and output counter + etime = etime + dt; + output_counter = output_counter + dt; + //If it's time for output, reset the counter, and do output + if (output_counter >= output_freq) { + output_counter = output_counter - output_freq; + output(state,etime); + } + } + auto t2 = std::chrono::steady_clock::now(); + if (mainproc) { + std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n"; + } + + //Final reductions for mass, kinetic energy, and total energy + reductions(mass,te); + + if (mainproc) { + printf( "d_mass: %le\n" , (mass - mass0)/mass0 ); + printf( "d_te: %le\n" , (te - te0 )/te0 ); + } + + finalize(); +} + + +//Performs a single dimensionally split time step using a simple low-storage three-stage Runge-Kutta time integrator +//The dimensional splitting is a second-order-accurate alternating Strang splitting in which the +//order of directions is alternated each time step. +//The Runge-Kutta method used here is defined as follows: +// q* = q[n] + dt/3 * rhs(q[n]) +// q** = q[n] + dt/2 * rhs(q* ) +// q[n+1] = q[n] + dt/1 * rhs(q** ) +void perform_timestep( double *state , double *state_tmp , double *flux , double *tend , double dt ) { + if (direction_switch) { + //x-direction first + semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend ); + semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend ); + semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend ); + //z-direction second + semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend ); + semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend ); + semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend ); + } else { + //z-direction second + semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_Z , flux , tend ); + semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_Z , flux , tend ); + semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_Z , flux , tend ); + //x-direction first + semi_discrete_step( state , state , state_tmp , dt / 3 , DIR_X , flux , tend ); + semi_discrete_step( state , state_tmp , state_tmp , dt / 2 , DIR_X , flux , tend ); + semi_discrete_step( state , state_tmp , state , dt / 1 , DIR_X , flux , tend ); + } + if (direction_switch) { direction_switch = 0; } else { direction_switch = 1; } +} + + +//Perform a single semi-discretized step in time with the form: +//state_out = state_init + dt * rhs(state_forcing) +//Meaning the step starts from state_init, computes the rhs using state_forcing, and stores the result in state_out +void semi_discrete_step( double *state_init , double *state_forcing , double *state_out , double dt , int dir , double *flux , double *tend ) { + int i, k, ll, inds, indt, indw; + double x, z, wpert, dist, x0, z0, xrad, zrad, amp; + if (dir == DIR_X) { + //Set the halo values for this MPI task's fluid state in the x-direction + set_halo_values_x(state_forcing); + //Compute the time tendencies for the fluid state in the x-direction + compute_tendencies_x(state_forcing,flux,tend,dt); + } else if (dir == DIR_Z) { + //Set the halo values for this MPI task's fluid state in the z-direction + set_halo_values_z(state_forcing); + //Compute the time tendencies for the fluid state in the z-direction + compute_tendencies_z(state_forcing,flux,tend,dt); + } + + ///////////////////////////////////////////////// + // TODO: THREAD ME + ///////////////////////////////////////////////// + //Apply the tendencies to the fluid state + std::for_each_n(std::execution::par_unseq,std::views::iota(0).begin(),(NUM_VARS*nz*nx),[=,nx=nx,nz=nz,hy_dens_cell=hy_dens_cell,i_beg=i_beg,k_beg=k_beg](int idx){ + auto [i,k,ll] = idx3d(idx,nx,nz); + if (data_spec_int == DATA_SPEC_GRAVITY_WAVES) { + double x = (i_beg + i+0.5)*dx; + double z = (k_beg + k+0.5)*dz; + double wpert; + // Using sample_ellipse_cosine requires "acc routine" in OpenACC and "declare target" in OpenMP offload + // Neither of these are particularly well supported. So I'm manually inlining here + // wpert = sample_ellipse_cosine( x,z , 0.01 , xlen/8,1000., 500.,500. ); + { + double x0 = xlen/8; + double z0 = 1000; + double xrad = 500; + double zrad = 500; + double amp = 0.01; + //Compute distance from bubble center + double dist = sqrt( ((x-x0)/xrad)*((x-x0)/xrad) + ((z-z0)/zrad)*((z-z0)/zrad) ) * pi / 2.; + //If the distance from bubble center is less than the radius, create a cos**2 profile + if (dist <= pi / 2.) { + wpert = amp * pow(cos(dist),2.); + } else { + wpert = 0.; + } + } + int indw = ID_WMOM*nz*nx + k*nx + i; + tend[indw] += wpert*hy_dens_cell[hs+k]; + } + int inds = ll*(nz+2*hs)*(nx+2*hs) + (k+hs)*(nx+2*hs) + i+hs; + int indt = ll*nz*nx + k*nx + i; + state_out[inds] = state_init[inds] + dt * tend[indt]; + }); +} + + +//Compute the time tendencies of the fluid state using forcing in the x-direction +//Since the halos are set in a separate routine, this will not require MPI +//First, compute the flux vector at each cell interface in the x-direction (including hyperviscosity) +//Then, compute the tendencies using those fluxes +void compute_tendencies_x( double *state , double *flux , double *tend , double dt) { + double hv_coef; + //Compute the hyperviscosity coefficient + hv_coef = -hv_beta * dx / (16*dt); + ///////////////////////////////////////////////// + // TODO: THREAD ME + ///////////////////////////////////////////////// + //Compute fluxes in the x-direction for each cell + std::for_each_n(std::execution::par_unseq,std::views::iota(0).begin(),(nz*(nx+1)),[=,nx=nx,nz=nz,hy_dens_cell=hy_dens_cell,hy_dens_theta_cell=hy_dens_theta_cell](int idx){ + auto [i,k] = idx2d(idx,nx+1); + double stencil[4], d3_vals[NUM_VARS],vals[NUM_VARS]; + //Use fourth-order interpolation from four cell averages to compute the value at the interface in question + for (int ll=0; ll , Oak Ridge National Laboratory +// Jeff Larkin , NVIDIA Corporation +// This code simulates dry, stratified, compressible, non-hydrostatic fluid flows +// For documentation, please see the attached documentation in the "documentation" folder +// +////////////////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include +#include +#ifndef NO_PNETCDF +#include "pnetcdf.h" +#endif +#include +#include +#include +#include + +#include +namespace stdex = std::experimental; + +template +using ndim_span = stdex::mdspan>; +using real3d_span = ndim_span; + +// These should become unnecessary with the addition of cartesian_product +constexpr std::tuple idx2d(int idx, int nx) { return {idx%nx, idx/nx}; } +constexpr std::tuple idx3d(int idx, int nx, int nz) { return {idx%nx, (idx/nx)%nz, idx / (nx*nz)}; } + +constexpr double pi = 3.14159265358979323846264338327; //Pi +constexpr double grav = 9.8; //Gravitational acceleration (m / s^2) +constexpr double cp = 1004.; //Specific heat of dry air at constant pressure +constexpr double cv = 717.; //Specific heat of dry air at constant volume +constexpr double rd = 287.; //Dry air constant for equation of state (P=rho*rd*T) +constexpr double p0 = 1.e5; //Standard pressure at the surface in Pascals +constexpr double C0 = 27.5629410929725921310572974482; //Constant to translate potential temperature into pressure (P=C0*(rho*theta)**gamma) +constexpr double gamm = 1.40027894002789400278940027894; //gamma=cp/Rd , have to call this gamm because "gamma" is taken (I hate C so much) +//Define domain and stability-related constants +constexpr double xlen = 2.e4; //Length of the domain in the x-direction (meters) +constexpr double zlen = 1.e4; //Length of the domain in the z-direction (meters) +constexpr double hv_beta = 0.05; //How strong to diffuse the solution: hv_beta \in [0:1] +constexpr double cfl = 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability) +constexpr double max_speed = 450; //Assumed maximum wave speed during the simulation (speed of sound + speed of wind) (meter / sec) +constexpr int hs = 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a full "stencil" of information for reconstruction +constexpr int sten_size = 4; //Size of the stencil used for interpolation + +//Parameters for indexing and flags +constexpr int NUM_VARS = 4; //Number of fluid state variables +constexpr int ID_DENS = 0; //index for density ("rho") +constexpr int ID_UMOM = 1; //index for momentum in the x-direction ("rho * u") +constexpr int ID_WMOM = 2; //index for momentum in the z-direction ("rho * w") +constexpr int ID_RHOT = 3; //index for density * potential temperature ("rho * theta") +constexpr int DIR_X = 1; //Integer constant to express that this operation is in the x-direction +constexpr int DIR_Z = 2; //Integer constant to express that this operation is in the z-direction +constexpr int DATA_SPEC_COLLISION = 1; +constexpr int DATA_SPEC_THERMAL = 2; +constexpr int DATA_SPEC_GRAVITY_WAVES = 3; +constexpr int DATA_SPEC_DENSITY_CURRENT = 5; +constexpr int DATA_SPEC_INJECTION = 6; + +constexpr int nqpoints = 3; +constexpr double qpoints [] = { 0.112701665379258311482073460022E0 , 0.500000000000000000000000000000E0 , 0.887298334620741688517926539980E0 }; +constexpr double qweights[] = { 0.277777777777777777777777777779E0 , 0.444444444444444444444444444444E0 , 0.277777777777777777777777777779E0 }; + +/////////////////////////////////////////////////////////////////////////////////////// +// BEGIN USER-CONFIGURABLE PARAMETERS +/////////////////////////////////////////////////////////////////////////////////////// +//The x-direction length is twice as long as the z-direction length +//So, you'll want to have nx_glob be twice as large as nz_glob +int constexpr nx_glob = _NX; //Number of total cells in the x-direction +int constexpr nz_glob = _NZ; //Number of total cells in the z-direction +double constexpr sim_time = _SIM_TIME; //How many seconds to run the simulation +double constexpr output_freq = _OUT_FREQ; //How frequently to output data to file (in seconds) +int constexpr data_spec_int = _DATA_SPEC; //How to initialize the data +double constexpr dx = xlen / nx_glob; // grid spacing in the x-direction +double constexpr dz = zlen / nz_glob; // grid spacing in the x-direction +/////////////////////////////////////////////////////////////////////////////////////// +// END USER-CONFIGURABLE PARAMETERS +/////////////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////////////// +// Variables that are initialized but remain static over the course of the simulation +/////////////////////////////////////////////////////////////////////////////////////// +double dt; //Model time step (seconds) +int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task +int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task +int nranks, myrank; //Number of MPI ranks and my rank id +int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain +int mainproc; //Am I the main process (rank == 0)? +double *hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs) +double *hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs) +double *hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1) +double *hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1) +double *hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1) + +/////////////////////////////////////////////////////////////////////////////////////// +// Variables that are dynamics over the course of the simulation +/////////////////////////////////////////////////////////////////////////////////////// +double etime; //Elapsed model time +double output_counter; //Helps determine when it's time to do output +//Runtime variable arrays +double *state; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) +double *state_tmp; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) +double *flux; //Cell interface fluxes. Dimensions: (nx+1,nz+1,NUM_VARS) +double *tend; //Fluid state tendencies. Dimensions: (nx,nz,NUM_VARS) +double *sendbuf_l; //Buffer to send data to the left MPI rank +double *sendbuf_r; //Buffer to send data to the right MPI rank +double *recvbuf_l; //Buffer to receive data from the left MPI rank +double *recvbuf_r; //Buffer to receive data from the right MPI rank +int num_out = 0; //The number of outputs performed so far +int direction_switch = 1; +double mass0, te0; //Initial domain totals for mass and total energy +double mass , te ; //Domain totals for mass and total energy + +//How is this not in the standard?! +double dmin( double a , double b ) { if (a sim_time) { dt = sim_time - etime; } + //Perform a single time step + perform_timestep(real3d_span{state,NUM_VARS,nz+2*hs,nx+2*hs},real3d_span{state_tmp,NUM_VARS,nz+2*hs,nx+2*hs},real3d_span{flux,NUM_VARS, (nz+1) ,(nx+1)},real3d_span{tend,NUM_VARS,nz,nx},dt); + //Inform the user +#ifndef NO_INFORM + if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); } +#endif + //Update the elapsed time and output counter + etime = etime + dt; + output_counter = output_counter + dt; + //If it's time for output, reset the counter, and do output + if (output_counter >= output_freq) { + output_counter = output_counter - output_freq; + output(state,etime); + } + } + auto t2 = std::chrono::steady_clock::now(); + if (mainproc) { + std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n"; + } + + //Final reductions for mass, kinetic energy, and total energy + reductions(mass,te); + + if (mainproc) { + printf( "d_mass: %le\n" , (mass - mass0)/mass0 ); + printf( "d_te: %le\n" , (te - te0 )/te0 ); + } + + finalize(); +} + + +//Performs a single dimensionally split time step using a simple low-storage three-stage Runge-Kutta time integrator +//The dimensional splitting is a second-order-accurate alternating Strang splitting in which the +//order of directions is alternated each time step. +//The Runge-Kutta method used here is defined as follows: +// q* = q[n] + dt/3 * rhs(q[n]) +// q** = q[n] + dt/2 * rhs(q* ) +// q[n+1] = q[n] + dt/1 * rhs(q** ) +void perform_timestep( real3d_span state , real3d_span state_tmp , real3d_span flux , real3d_span tend , double dt ) { + if (direction_switch) { + //x-direction first + semi_discrete_step( state, state , state_tmp, dt / 3 , DIR_X , flux , tend ); + semi_discrete_step( state, state_tmp, state_tmp, dt / 2 , DIR_X , flux , tend ); + semi_discrete_step( state, state_tmp, state , dt / 1 , DIR_X , flux , tend ); + //z-direction second + semi_discrete_step( state, state , state_tmp, dt / 3 , DIR_Z , flux , tend ); + semi_discrete_step( state, state_tmp, state_tmp, dt / 2 , DIR_Z , flux , tend ); + semi_discrete_step( state, state_tmp, state , dt / 1 , DIR_Z , flux , tend ); + } else { + //z-direction second + semi_discrete_step( state, state , state_tmp, dt / 3 , DIR_Z , flux , tend ); + semi_discrete_step( state, state_tmp, state_tmp, dt / 2 , DIR_Z , flux , tend ); + semi_discrete_step( state, state_tmp, state , dt / 1 , DIR_Z , flux , tend ); + //x-direction first + semi_discrete_step( state, state , state_tmp, dt / 3 , DIR_X , flux , tend ); + semi_discrete_step( state, state_tmp, state_tmp, dt / 2 , DIR_X , flux , tend ); + semi_discrete_step( state, state_tmp, state , dt / 1 , DIR_X , flux , tend ); + } + if (direction_switch) { direction_switch = 0; } else { direction_switch = 1; } +} + + +//Perform a single semi-discretized step in time with the form: +//state_out = state_init + dt * rhs(state_forcing) +//Meaning the step starts from state_init, computes the rhs using state_forcing, and stores the result in state_out +void semi_discrete_step( real3d_span state_init , real3d_span state_forcing , real3d_span state_out , double dt , int dir , real3d_span flux , real3d_span tend ) { + if (dir == DIR_X) { + //Set the halo values for this MPI task's fluid state in the x-direction + set_halo_values_x(state_forcing); + //Compute the time tendencies for the fluid state in the x-direction + compute_tendencies_x(state_forcing,flux,tend,dt); + } else if (dir == DIR_Z) { + //Set the halo values for this MPI task's fluid state in the z-direction + set_halo_values_z(state_forcing); + //Compute the time tendencies for the fluid state in the z-direction + compute_tendencies_z(state_forcing,flux,tend,dt); + } + + ///////////////////////////////////////////////// + // TODO: THREAD ME + ///////////////////////////////////////////////// + //Apply the tendencies to the fluid state + auto bounds = std::views::iota(0,(int)(NUM_VARS*nz*nx)); + std::for_each(std::execution::par_unseq,bounds.begin(),bounds.end(),[=,nx=nx,nz=nz,hy_dens_cell=hy_dens_cell,i_beg=i_beg,k_beg=k_beg](int idx){ + auto [i,k,ll] = idx3d(idx,nx,nz); + if (data_spec_int == DATA_SPEC_GRAVITY_WAVES) { + double x = (i_beg + i+0.5)*dx; + double z = (k_beg + k+0.5)*dz; + double wpert; + // Using sample_ellipse_cosine requires "acc routine" in OpenACC and "declare target" in OpenMP offload + // Neither of these are particularly well supported. So I'm manually inlining here + // wpert = sample_ellipse_cosine( x,z , 0.01 , xlen/8,1000., 500.,500. ); + { + double x0 = xlen/8; + double z0 = 1000; + double xrad = 500; + double zrad = 500; + double amp = 0.01; + //Compute distance from bubble center + double dist = sqrt( ((x-x0)/xrad)*((x-x0)/xrad) + ((z-z0)/zrad)*((z-z0)/zrad) ) * pi / 2.; + //If the distance from bubble center is less than the radius, create a cos**2 profile + if (dist <= pi / 2.) { + wpert = amp * pow(cos(dist),2.); + } else { + wpert = 0.; + } + } + tend(ID_WMOM,k,i) += wpert*hy_dens_cell[hs+k]; + } + state_out(ll, (k+hs), (i+hs)) = state_init(ll, (k+hs), (i+hs)) + dt * tend(ll,k,i); + }); +} + + +//Compute the time tendencies of the fluid state using forcing in the x-direction +//Since the halos are set in a separate routine, this will not require MPI +//First, compute the flux vector at each cell interface in the x-direction (including hyperviscosity) +//Then, compute the tendencies using those fluxes +void compute_tendencies_x( real3d_span state , real3d_span flux , real3d_span tend , double dt) { + double hv_coef; + //Compute the hyperviscosity coefficient + hv_coef = -hv_beta * dx / (16*dt); + ///////////////////////////////////////////////// + // TODO: THREAD ME + ///////////////////////////////////////////////// + //Compute fluxes in the x-direction for each cell + auto bounds1 = std::views::iota(0,(int)(nz*(nx+1))); + std::for_each(std::execution::par_unseq,bounds1.begin(),bounds1.end(),[=,nx=nx,nz=nz,hy_dens_cell=hy_dens_cell,hy_dens_theta_cell=hy_dens_theta_cell](int idx){ + auto [i,k] = idx2d(idx,nx+1); + double stencil[4], d3_vals[NUM_VARS],vals[NUM_VARS]; + //Use fourth-order interpolation from four cell averages to compute the value at the interface in question + for (int ll=0; ll , Oak Ridge National Laboratory +// Jeff Larkin , NVIDIA Corporation +// This code simulates dry, stratified, compressible, non-hydrostatic fluid flows +// For documentation, please see the attached documentation in the "documentation" folder +// +////////////////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include +#include +#ifndef NO_PNETCDF +#include "pnetcdf.h" +#endif +#include +#include +#include +#include + +#include +namespace stdex = std::experimental; + +template +using ndim_span = stdex::mdspan>; +using real3d_span = ndim_span; + +// These should become unnecessary with the addition of cartesian_product +constexpr std::tuple idx2d(int idx, int nx) { return {idx%nx, idx/nx}; } +constexpr std::tuple idx3d(int idx, int nx, int nz) { return {idx%nx, (idx/nx)%nz, idx / (nx*nz)}; } + +constexpr double pi = 3.14159265358979323846264338327; //Pi +constexpr double grav = 9.8; //Gravitational acceleration (m / s^2) +constexpr double cp = 1004.; //Specific heat of dry air at constant pressure +constexpr double cv = 717.; //Specific heat of dry air at constant volume +constexpr double rd = 287.; //Dry air constant for equation of state (P=rho*rd*T) +constexpr double p0 = 1.e5; //Standard pressure at the surface in Pascals +constexpr double C0 = 27.5629410929725921310572974482; //Constant to translate potential temperature into pressure (P=C0*(rho*theta)**gamma) +constexpr double gamm = 1.40027894002789400278940027894; //gamma=cp/Rd , have to call this gamm because "gamma" is taken (I hate C so much) +//Define domain and stability-related constants +constexpr double xlen = 2.e4; //Length of the domain in the x-direction (meters) +constexpr double zlen = 1.e4; //Length of the domain in the z-direction (meters) +constexpr double hv_beta = 0.05; //How strong to diffuse the solution: hv_beta \in [0:1] +constexpr double cfl = 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability) +constexpr double max_speed = 450; //Assumed maximum wave speed during the simulation (speed of sound + speed of wind) (meter / sec) +constexpr int hs = 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a full "stencil" of information for reconstruction +constexpr int sten_size = 4; //Size of the stencil used for interpolation + +//Parameters for indexing and flags +constexpr int NUM_VARS = 4; //Number of fluid state variables +constexpr int ID_DENS = 0; //index for density ("rho") +constexpr int ID_UMOM = 1; //index for momentum in the x-direction ("rho * u") +constexpr int ID_WMOM = 2; //index for momentum in the z-direction ("rho * w") +constexpr int ID_RHOT = 3; //index for density * potential temperature ("rho * theta") +constexpr int DIR_X = 1; //Integer constant to express that this operation is in the x-direction +constexpr int DIR_Z = 2; //Integer constant to express that this operation is in the z-direction +constexpr int DATA_SPEC_COLLISION = 1; +constexpr int DATA_SPEC_THERMAL = 2; +constexpr int DATA_SPEC_GRAVITY_WAVES = 3; +constexpr int DATA_SPEC_DENSITY_CURRENT = 5; +constexpr int DATA_SPEC_INJECTION = 6; + +constexpr int nqpoints = 3; +constexpr double qpoints [] = { 0.112701665379258311482073460022E0 , 0.500000000000000000000000000000E0 , 0.887298334620741688517926539980E0 }; +constexpr double qweights[] = { 0.277777777777777777777777777779E0 , 0.444444444444444444444444444444E0 , 0.277777777777777777777777777779E0 }; + +/////////////////////////////////////////////////////////////////////////////////////// +// BEGIN USER-CONFIGURABLE PARAMETERS +/////////////////////////////////////////////////////////////////////////////////////// +//The x-direction length is twice as long as the z-direction length +//So, you'll want to have nx_glob be twice as large as nz_glob +int constexpr nx_glob = _NX; //Number of total cells in the x-direction +int constexpr nz_glob = _NZ; //Number of total cells in the z-direction +double constexpr sim_time = _SIM_TIME; //How many seconds to run the simulation +double constexpr output_freq = _OUT_FREQ; //How frequently to output data to file (in seconds) +int constexpr data_spec_int = _DATA_SPEC; //How to initialize the data +double constexpr dx = xlen / nx_glob; // grid spacing in the x-direction +double constexpr dz = zlen / nz_glob; // grid spacing in the x-direction +/////////////////////////////////////////////////////////////////////////////////////// +// END USER-CONFIGURABLE PARAMETERS +/////////////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////////////// +// Variables that are initialized but remain static over the course of the simulation +/////////////////////////////////////////////////////////////////////////////////////// +double dt; //Model time step (seconds) +int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task +int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task +int nranks, myrank; //Number of MPI ranks and my rank id +int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain +int mainproc; //Am I the main process (rank == 0)? +double *hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs) +double *hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs) +double *hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1) +double *hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1) +double *hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1) + +/////////////////////////////////////////////////////////////////////////////////////// +// Variables that are dynamics over the course of the simulation +/////////////////////////////////////////////////////////////////////////////////////// +double etime; //Elapsed model time +double output_counter; //Helps determine when it's time to do output +//Runtime variable arrays +double *state; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) +double *state_tmp; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) +double *flux; //Cell interface fluxes. Dimensions: (nx+1,nz+1,NUM_VARS) +double *tend; //Fluid state tendencies. Dimensions: (nx,nz,NUM_VARS) +double *sendbuf_l; //Buffer to send data to the left MPI rank +double *sendbuf_r; //Buffer to send data to the right MPI rank +double *recvbuf_l; //Buffer to receive data from the left MPI rank +double *recvbuf_r; //Buffer to receive data from the right MPI rank +int num_out = 0; //The number of outputs performed so far +int direction_switch = 1; +double mass0, te0; //Initial domain totals for mass and total energy +double mass , te ; //Domain totals for mass and total energy + +//How is this not in the standard?! +double dmin( double a , double b ) { if (a sim_time) { dt = sim_time - etime; } + //Perform a single time step + perform_timestep(real3d_span{state,NUM_VARS,nz+2*hs,nx+2*hs},real3d_span{state_tmp,NUM_VARS,nz+2*hs,nx+2*hs},real3d_span{flux,NUM_VARS, (nz+1) ,(nx+1)},real3d_span{tend,NUM_VARS,nz,nx},dt); + //Inform the user +#ifndef NO_INFORM + if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); } +#endif + //Update the elapsed time and output counter + etime = etime + dt; + output_counter = output_counter + dt; + //If it's time for output, reset the counter, and do output + if (output_counter >= output_freq) { + output_counter = output_counter - output_freq; + output(state,etime); + } + } + auto t2 = std::chrono::steady_clock::now(); + if (mainproc) { + std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n"; + } + + //Final reductions for mass, kinetic energy, and total energy + reductions(mass,te); + + if (mainproc) { + printf( "d_mass: %le\n" , (mass - mass0)/mass0 ); + printf( "d_te: %le\n" , (te - te0 )/te0 ); + } + + finalize(); +} + + +//Performs a single dimensionally split time step using a simple low-storage three-stage Runge-Kutta time integrator +//The dimensional splitting is a second-order-accurate alternating Strang splitting in which the +//order of directions is alternated each time step. +//The Runge-Kutta method used here is defined as follows: +// q* = q[n] + dt/3 * rhs(q[n]) +// q** = q[n] + dt/2 * rhs(q* ) +// q[n+1] = q[n] + dt/1 * rhs(q** ) +void perform_timestep( real3d_span state , real3d_span state_tmp , real3d_span flux , real3d_span tend , double dt ) { + if (direction_switch) { + //x-direction first + semi_discrete_step( state, state , state_tmp, dt / 3 , DIR_X , flux , tend ); + semi_discrete_step( state, state_tmp, state_tmp, dt / 2 , DIR_X , flux , tend ); + semi_discrete_step( state, state_tmp, state , dt / 1 , DIR_X , flux , tend ); + //z-direction second + semi_discrete_step( state, state , state_tmp, dt / 3 , DIR_Z , flux , tend ); + semi_discrete_step( state, state_tmp, state_tmp, dt / 2 , DIR_Z , flux , tend ); + semi_discrete_step( state, state_tmp, state , dt / 1 , DIR_Z , flux , tend ); + } else { + //z-direction second + semi_discrete_step( state, state , state_tmp, dt / 3 , DIR_Z , flux , tend ); + semi_discrete_step( state, state_tmp, state_tmp, dt / 2 , DIR_Z , flux , tend ); + semi_discrete_step( state, state_tmp, state , dt / 1 , DIR_Z , flux , tend ); + //x-direction first + semi_discrete_step( state, state , state_tmp, dt / 3 , DIR_X , flux , tend ); + semi_discrete_step( state, state_tmp, state_tmp, dt / 2 , DIR_X , flux , tend ); + semi_discrete_step( state, state_tmp, state , dt / 1 , DIR_X , flux , tend ); + } + if (direction_switch) { direction_switch = 0; } else { direction_switch = 1; } +} + + +//Perform a single semi-discretized step in time with the form: +//state_out = state_init + dt * rhs(state_forcing) +//Meaning the step starts from state_init, computes the rhs using state_forcing, and stores the result in state_out +void semi_discrete_step( real3d_span state_init , real3d_span state_forcing , real3d_span state_out , double dt , int dir , real3d_span flux , real3d_span tend ) { + if (dir == DIR_X) { + //Set the halo values for this MPI task's fluid state in the x-direction + set_halo_values_x(state_forcing); + //Compute the time tendencies for the fluid state in the x-direction + compute_tendencies_x(state_forcing,flux,tend,dt); + } else if (dir == DIR_Z) { + //Set the halo values for this MPI task's fluid state in the z-direction + set_halo_values_z(state_forcing); + //Compute the time tendencies for the fluid state in the z-direction + compute_tendencies_z(state_forcing,flux,tend,dt); + } + + ///////////////////////////////////////////////// + // TODO: THREAD ME + ///////////////////////////////////////////////// + //Apply the tendencies to the fluid state + std::for_each_n(std::execution::par_unseq,std::views::iota(0).begin(),(NUM_VARS*nz*nx),[=,nx=nx,nz=nz,hy_dens_cell=hy_dens_cell,i_beg=i_beg,k_beg=k_beg](int idx){ + auto [i,k,ll] = idx3d(idx,nx,nz); + if (data_spec_int == DATA_SPEC_GRAVITY_WAVES) { + double x = (i_beg + i+0.5)*dx; + double z = (k_beg + k+0.5)*dz; + double wpert; + // Using sample_ellipse_cosine requires "acc routine" in OpenACC and "declare target" in OpenMP offload + // Neither of these are particularly well supported. So I'm manually inlining here + // wpert = sample_ellipse_cosine( x,z , 0.01 , xlen/8,1000., 500.,500. ); + { + double x0 = xlen/8; + double z0 = 1000; + double xrad = 500; + double zrad = 500; + double amp = 0.01; + //Compute distance from bubble center + double dist = sqrt( ((x-x0)/xrad)*((x-x0)/xrad) + ((z-z0)/zrad)*((z-z0)/zrad) ) * pi / 2.; + //If the distance from bubble center is less than the radius, create a cos**2 profile + if (dist <= pi / 2.) { + wpert = amp * pow(cos(dist),2.); + } else { + wpert = 0.; + } + } + tend(ID_WMOM,k,i) += wpert*hy_dens_cell[hs+k]; + } + state_out(ll, (k+hs), (i+hs)) = state_init(ll, (k+hs), (i+hs)) + dt * tend(ll,k,i); + }); +} + + +//Compute the time tendencies of the fluid state using forcing in the x-direction +//Since the halos are set in a separate routine, this will not require MPI +//First, compute the flux vector at each cell interface in the x-direction (including hyperviscosity) +//Then, compute the tendencies using those fluxes +void compute_tendencies_x( real3d_span state , real3d_span flux , real3d_span tend , double dt) { + double hv_coef; + //Compute the hyperviscosity coefficient + hv_coef = -hv_beta * dx / (16*dt); + ///////////////////////////////////////////////// + // TODO: THREAD ME + ///////////////////////////////////////////////// + //Compute fluxes in the x-direction for each cell + std::for_each_n(std::execution::par_unseq,std::views::iota(0).begin(),(nz*(nx+1)),[=,nx=nx,nz=nz,hy_dens_cell=hy_dens_cell,hy_dens_theta_cell=hy_dens_theta_cell](int idx){ + auto [i,k] = idx2d(idx,nx+1); + double stencil[4], d3_vals[NUM_VARS],vals[NUM_VARS]; + //Use fourth-order interpolation from four cell averages to compute the value at the interface in question + for (int ll=0; ll , Oak Ridge National Laboratory +// Jeff Larkin , NVIDIA Corporation +// This code simulates dry, stratified, compressible, non-hydrostatic fluid flows +// For documentation, please see the attached documentation in the "documentation" folder +// +////////////////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include +#include +#ifndef NO_PNETCDF +#include "pnetcdf.h" +#endif +#include +#include +#include +#include + +#include +namespace stdex = std::experimental; + +template +using ndim_span = stdex::mdspan>; +using real3d_span = ndim_span; + +// These should become unnecessary with the addition of cartesian_product +constexpr std::tuple idx2d(int idx, int nx) { return {idx%nx, idx/nx}; } +constexpr std::tuple idx3d(int idx, int nx, int nz) { return {idx%nx, (idx/nx)%nz, idx / (nx*nz)}; } + +constexpr double pi = 3.14159265358979323846264338327; //Pi +constexpr double grav = 9.8; //Gravitational acceleration (m / s^2) +constexpr double cp = 1004.; //Specific heat of dry air at constant pressure +constexpr double cv = 717.; //Specific heat of dry air at constant volume +constexpr double rd = 287.; //Dry air constant for equation of state (P=rho*rd*T) +constexpr double p0 = 1.e5; //Standard pressure at the surface in Pascals +constexpr double C0 = 27.5629410929725921310572974482; //Constant to translate potential temperature into pressure (P=C0*(rho*theta)**gamma) +constexpr double gamm = 1.40027894002789400278940027894; //gamma=cp/Rd , have to call this gamm because "gamma" is taken (I hate C so much) +//Define domain and stability-related constants +constexpr double xlen = 2.e4; //Length of the domain in the x-direction (meters) +constexpr double zlen = 1.e4; //Length of the domain in the z-direction (meters) +constexpr double hv_beta = 0.05; //How strong to diffuse the solution: hv_beta \in [0:1] +constexpr double cfl = 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability) +constexpr double max_speed = 450; //Assumed maximum wave speed during the simulation (speed of sound + speed of wind) (meter / sec) +constexpr int hs = 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a full "stencil" of information for reconstruction +constexpr int sten_size = 4; //Size of the stencil used for interpolation + +//Parameters for indexing and flags +constexpr int NUM_VARS = 4; //Number of fluid state variables +constexpr int ID_DENS = 0; //index for density ("rho") +constexpr int ID_UMOM = 1; //index for momentum in the x-direction ("rho * u") +constexpr int ID_WMOM = 2; //index for momentum in the z-direction ("rho * w") +constexpr int ID_RHOT = 3; //index for density * potential temperature ("rho * theta") +constexpr int DIR_X = 1; //Integer constant to express that this operation is in the x-direction +constexpr int DIR_Z = 2; //Integer constant to express that this operation is in the z-direction +constexpr int DATA_SPEC_COLLISION = 1; +constexpr int DATA_SPEC_THERMAL = 2; +constexpr int DATA_SPEC_GRAVITY_WAVES = 3; +constexpr int DATA_SPEC_DENSITY_CURRENT = 5; +constexpr int DATA_SPEC_INJECTION = 6; + +constexpr int nqpoints = 3; +constexpr double qpoints [] = { 0.112701665379258311482073460022E0 , 0.500000000000000000000000000000E0 , 0.887298334620741688517926539980E0 }; +constexpr double qweights[] = { 0.277777777777777777777777777779E0 , 0.444444444444444444444444444444E0 , 0.277777777777777777777777777779E0 }; + +/////////////////////////////////////////////////////////////////////////////////////// +// BEGIN USER-CONFIGURABLE PARAMETERS +/////////////////////////////////////////////////////////////////////////////////////// +//The x-direction length is twice as long as the z-direction length +//So, you'll want to have nx_glob be twice as large as nz_glob +int constexpr nx_glob = _NX; //Number of total cells in the x-direction +int constexpr nz_glob = _NZ; //Number of total cells in the z-direction +double constexpr sim_time = _SIM_TIME; //How many seconds to run the simulation +double constexpr output_freq = _OUT_FREQ; //How frequently to output data to file (in seconds) +int constexpr data_spec_int = _DATA_SPEC; //How to initialize the data +double constexpr dx = xlen / nx_glob; // grid spacing in the x-direction +double constexpr dz = zlen / nz_glob; // grid spacing in the x-direction +/////////////////////////////////////////////////////////////////////////////////////// +// END USER-CONFIGURABLE PARAMETERS +/////////////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////////////// +// Variables that are initialized but remain static over the course of the simulation +/////////////////////////////////////////////////////////////////////////////////////// +double dt; //Model time step (seconds) +int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task +int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task +int nranks, myrank; //Number of MPI ranks and my rank id +int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain +int mainproc; //Am I the main process (rank == 0)? +double *hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs) +double *hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs) +double *hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1) +double *hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1) +double *hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1) + +/////////////////////////////////////////////////////////////////////////////////////// +// Variables that are dynamics over the course of the simulation +/////////////////////////////////////////////////////////////////////////////////////// +double etime; //Elapsed model time +double output_counter; //Helps determine when it's time to do output +//Runtime variable arrays +double *state; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) +double *state_tmp; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) +double *flux; //Cell interface fluxes. Dimensions: (nx+1,nz+1,NUM_VARS) +double *tend; //Fluid state tendencies. Dimensions: (nx,nz,NUM_VARS) +int num_out = 0; //The number of outputs performed so far +int direction_switch = 1; +double mass0, te0; //Initial domain totals for mass and total energy +double mass , te ; //Domain totals for mass and total energy + +//How is this not in the standard?! +double dmin( double a , double b ) { if (a sim_time) { dt = sim_time - etime; } + //Perform a single time step + perform_timestep(real3d_span{state,NUM_VARS,nz+2*hs,nx+2*hs},real3d_span{state_tmp,NUM_VARS,nz+2*hs,nx+2*hs},real3d_span{flux,NUM_VARS, (nz+1) ,(nx+1)},real3d_span{tend,NUM_VARS,nz,nx},dt); + //Inform the user +#ifndef NO_INFORM + if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); } +#endif + //Update the elapsed time and output counter + etime = etime + dt; + output_counter = output_counter + dt; + //If it's time for output, reset the counter, and do output + if (output_counter >= output_freq) { + output_counter = output_counter - output_freq; + output(state,etime); + } + } + auto t2 = std::chrono::steady_clock::now(); + if (mainproc) { + std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n"; + } + + //Final reductions for mass, kinetic energy, and total energy + reductions(mass,te); + + if (mainproc) { + printf( "d_mass: %le\n" , (mass - mass0)/mass0 ); + printf( "d_te: %le\n" , (te - te0 )/te0 ); + } + + finalize(); +} + + +//Performs a single dimensionally split time step using a simple low-storage three-stage Runge-Kutta time integrator +//The dimensional splitting is a second-order-accurate alternating Strang splitting in which the +//order of directions is alternated each time step. +//The Runge-Kutta method used here is defined as follows: +// q* = q[n] + dt/3 * rhs(q[n]) +// q** = q[n] + dt/2 * rhs(q* ) +// q[n+1] = q[n] + dt/1 * rhs(q** ) +void perform_timestep( real3d_span state , real3d_span state_tmp , real3d_span flux , real3d_span tend , double dt ) { + if (direction_switch) { + //x-direction first + semi_discrete_step( state, state , state_tmp, dt / 3 , DIR_X , flux , tend ); + semi_discrete_step( state, state_tmp, state_tmp, dt / 2 , DIR_X , flux , tend ); + semi_discrete_step( state, state_tmp, state , dt / 1 , DIR_X , flux , tend ); + //z-direction second + semi_discrete_step( state, state , state_tmp, dt / 3 , DIR_Z , flux , tend ); + semi_discrete_step( state, state_tmp, state_tmp, dt / 2 , DIR_Z , flux , tend ); + semi_discrete_step( state, state_tmp, state , dt / 1 , DIR_Z , flux , tend ); + } else { + //z-direction second + semi_discrete_step( state, state , state_tmp, dt / 3 , DIR_Z , flux , tend ); + semi_discrete_step( state, state_tmp, state_tmp, dt / 2 , DIR_Z , flux , tend ); + semi_discrete_step( state, state_tmp, state , dt / 1 , DIR_Z , flux , tend ); + //x-direction first + semi_discrete_step( state, state , state_tmp, dt / 3 , DIR_X , flux , tend ); + semi_discrete_step( state, state_tmp, state_tmp, dt / 2 , DIR_X , flux , tend ); + semi_discrete_step( state, state_tmp, state , dt / 1 , DIR_X , flux , tend ); + } + if (direction_switch) { direction_switch = 0; } else { direction_switch = 1; } +} + + +//Perform a single semi-discretized step in time with the form: +//state_out = state_init + dt * rhs(state_forcing) +//Meaning the step starts from state_init, computes the rhs using state_forcing, and stores the result in state_out +void semi_discrete_step( real3d_span state_init , real3d_span state_forcing , real3d_span state_out , double dt , int dir , real3d_span flux , real3d_span tend ) { + if (dir == DIR_X) { + //Set the halo values for this MPI task's fluid state in the x-direction + set_halo_values_x(state_forcing); + //Compute the time tendencies for the fluid state in the x-direction + compute_tendencies_x(state_forcing,flux,tend,dt); + } else if (dir == DIR_Z) { + //Set the halo values for this MPI task's fluid state in the z-direction + set_halo_values_z(state_forcing); + //Compute the time tendencies for the fluid state in the z-direction + compute_tendencies_z(state_forcing,flux,tend,dt); + } + + ///////////////////////////////////////////////// + // TODO: THREAD ME + ///////////////////////////////////////////////// + //Apply the tendencies to the fluid state + auto bounds = std::views::iota(0,(int)(NUM_VARS*nz*nx)); + std::for_each(std::execution::par_unseq,bounds.begin(),bounds.end(),[=,nx=nx,nz=nz,hy_dens_cell=hy_dens_cell,i_beg=i_beg,k_beg=k_beg](int idx){ + auto [i,k,ll] = idx3d(idx,nx,nz); + if (data_spec_int == DATA_SPEC_GRAVITY_WAVES) { + double x = (i_beg + i+0.5)*dx; + double z = (k_beg + k+0.5)*dz; + double wpert; + // Using sample_ellipse_cosine requires "acc routine" in OpenACC and "declare target" in OpenMP offload + // Neither of these are particularly well supported. So I'm manually inlining here + // wpert = sample_ellipse_cosine( x,z , 0.01 , xlen/8,1000., 500.,500. ); + { + double x0 = xlen/8; + double z0 = 1000; + double xrad = 500; + double zrad = 500; + double amp = 0.01; + //Compute distance from bubble center + double dist = sqrt( ((x-x0)/xrad)*((x-x0)/xrad) + ((z-z0)/zrad)*((z-z0)/zrad) ) * pi / 2.; + //If the distance from bubble center is less than the radius, create a cos**2 profile + if (dist <= pi / 2.) { + wpert = amp * pow(cos(dist),2.); + } else { + wpert = 0.; + } + } + tend(ID_WMOM,k,i) += wpert*hy_dens_cell[hs+k]; + } + state_out(ll, (k+hs), (i+hs)) = state_init(ll, (k+hs), (i+hs)) + dt * tend(ll,k,i); + }); +} + + +//Compute the time tendencies of the fluid state using forcing in the x-direction +//Since the halos are set in a separate routine, this will not require MPI +//First, compute the flux vector at each cell interface in the x-direction (including hyperviscosity) +//Then, compute the tendencies using those fluxes +void compute_tendencies_x( real3d_span state , real3d_span flux , real3d_span tend , double dt) { + double hv_coef; + //Compute the hyperviscosity coefficient + hv_coef = -hv_beta * dx / (16*dt); + ///////////////////////////////////////////////// + // TODO: THREAD ME + ///////////////////////////////////////////////// + //Compute fluxes in the x-direction for each cell + auto bounds1 = std::views::iota(0,(int)(nz*(nx+1))); + std::for_each(std::execution::par_unseq,bounds1.begin(),bounds1.end(),[=,nx=nx,nz=nz,hy_dens_cell=hy_dens_cell,hy_dens_theta_cell=hy_dens_theta_cell](int idx){ + auto [i,k] = idx2d(idx,nx+1); + double stencil[4], d3_vals[NUM_VARS],vals[NUM_VARS]; + //Use fourth-order interpolation from four cell averages to compute the value at the interface in question + for (int ll=0; ll , Oak Ridge National Laboratory +// Jeff Larkin , NVIDIA Corporation +// This code simulates dry, stratified, compressible, non-hydrostatic fluid flows +// For documentation, please see the attached documentation in the "documentation" folder +// +////////////////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include +#include +#ifndef NO_PNETCDF +#include "pnetcdf.h" +#endif +#include +#include +#include +#include + +#include +namespace stdex = std::experimental; + +template +using ndim_span = stdex::mdspan>; +using real3d_span = ndim_span; + +// These should become unnecessary with the addition of cartesian_product +constexpr std::tuple idx2d(int idx, int nx) { return {idx%nx, idx/nx}; } +constexpr std::tuple idx3d(int idx, int nx, int nz) { return {idx%nx, (idx/nx)%nz, idx / (nx*nz)}; } + +constexpr double pi = 3.14159265358979323846264338327; //Pi +constexpr double grav = 9.8; //Gravitational acceleration (m / s^2) +constexpr double cp = 1004.; //Specific heat of dry air at constant pressure +constexpr double cv = 717.; //Specific heat of dry air at constant volume +constexpr double rd = 287.; //Dry air constant for equation of state (P=rho*rd*T) +constexpr double p0 = 1.e5; //Standard pressure at the surface in Pascals +constexpr double C0 = 27.5629410929725921310572974482; //Constant to translate potential temperature into pressure (P=C0*(rho*theta)**gamma) +constexpr double gamm = 1.40027894002789400278940027894; //gamma=cp/Rd , have to call this gamm because "gamma" is taken (I hate C so much) +//Define domain and stability-related constants +constexpr double xlen = 2.e4; //Length of the domain in the x-direction (meters) +constexpr double zlen = 1.e4; //Length of the domain in the z-direction (meters) +constexpr double hv_beta = 0.05; //How strong to diffuse the solution: hv_beta \in [0:1] +constexpr double cfl = 1.50; //"Courant, Friedrichs, Lewy" number (for numerical stability) +constexpr double max_speed = 450; //Assumed maximum wave speed during the simulation (speed of sound + speed of wind) (meter / sec) +constexpr int hs = 2; //"Halo" size: number of cells beyond the MPI tasks's domain needed for a full "stencil" of information for reconstruction +constexpr int sten_size = 4; //Size of the stencil used for interpolation + +//Parameters for indexing and flags +constexpr int NUM_VARS = 4; //Number of fluid state variables +constexpr int ID_DENS = 0; //index for density ("rho") +constexpr int ID_UMOM = 1; //index for momentum in the x-direction ("rho * u") +constexpr int ID_WMOM = 2; //index for momentum in the z-direction ("rho * w") +constexpr int ID_RHOT = 3; //index for density * potential temperature ("rho * theta") +constexpr int DIR_X = 1; //Integer constant to express that this operation is in the x-direction +constexpr int DIR_Z = 2; //Integer constant to express that this operation is in the z-direction +constexpr int DATA_SPEC_COLLISION = 1; +constexpr int DATA_SPEC_THERMAL = 2; +constexpr int DATA_SPEC_GRAVITY_WAVES = 3; +constexpr int DATA_SPEC_DENSITY_CURRENT = 5; +constexpr int DATA_SPEC_INJECTION = 6; + +constexpr int nqpoints = 3; +constexpr double qpoints [] = { 0.112701665379258311482073460022E0 , 0.500000000000000000000000000000E0 , 0.887298334620741688517926539980E0 }; +constexpr double qweights[] = { 0.277777777777777777777777777779E0 , 0.444444444444444444444444444444E0 , 0.277777777777777777777777777779E0 }; + +/////////////////////////////////////////////////////////////////////////////////////// +// BEGIN USER-CONFIGURABLE PARAMETERS +/////////////////////////////////////////////////////////////////////////////////////// +//The x-direction length is twice as long as the z-direction length +//So, you'll want to have nx_glob be twice as large as nz_glob +int constexpr nx_glob = _NX; //Number of total cells in the x-direction +int constexpr nz_glob = _NZ; //Number of total cells in the z-direction +double constexpr sim_time = _SIM_TIME; //How many seconds to run the simulation +double constexpr output_freq = _OUT_FREQ; //How frequently to output data to file (in seconds) +int constexpr data_spec_int = _DATA_SPEC; //How to initialize the data +double constexpr dx = xlen / nx_glob; // grid spacing in the x-direction +double constexpr dz = zlen / nz_glob; // grid spacing in the x-direction +/////////////////////////////////////////////////////////////////////////////////////// +// END USER-CONFIGURABLE PARAMETERS +/////////////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////////////// +// Variables that are initialized but remain static over the course of the simulation +/////////////////////////////////////////////////////////////////////////////////////// +double dt; //Model time step (seconds) +int nx, nz; //Number of local grid cells in the x- and z- dimensions for this MPI task +int i_beg, k_beg; //beginning index in the x- and z-directions for this MPI task +int nranks, myrank; //Number of MPI ranks and my rank id +int left_rank, right_rank; //MPI Rank IDs that exist to my left and right in the global domain +int mainproc; //Am I the main process (rank == 0)? +double *hy_dens_cell; //hydrostatic density (vert cell avgs). Dimensions: (1-hs:nz+hs) +double *hy_dens_theta_cell; //hydrostatic rho*t (vert cell avgs). Dimensions: (1-hs:nz+hs) +double *hy_dens_int; //hydrostatic density (vert cell interf). Dimensions: (1:nz+1) +double *hy_dens_theta_int; //hydrostatic rho*t (vert cell interf). Dimensions: (1:nz+1) +double *hy_pressure_int; //hydrostatic press (vert cell interf). Dimensions: (1:nz+1) + +/////////////////////////////////////////////////////////////////////////////////////// +// Variables that are dynamics over the course of the simulation +/////////////////////////////////////////////////////////////////////////////////////// +double etime; //Elapsed model time +double output_counter; //Helps determine when it's time to do output +//Runtime variable arrays +double *state; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) +double *state_tmp; //Fluid state. Dimensions: (1-hs:nx+hs,1-hs:nz+hs,NUM_VARS) +double *flux; //Cell interface fluxes. Dimensions: (nx+1,nz+1,NUM_VARS) +double *tend; //Fluid state tendencies. Dimensions: (nx,nz,NUM_VARS) +int num_out = 0; //The number of outputs performed so far +int direction_switch = 1; +double mass0, te0; //Initial domain totals for mass and total energy +double mass , te ; //Domain totals for mass and total energy + +//How is this not in the standard?! +double dmin( double a , double b ) { if (a sim_time) { dt = sim_time - etime; } + //Perform a single time step + perform_timestep(real3d_span{state,NUM_VARS,nz+2*hs,nx+2*hs},real3d_span{state_tmp,NUM_VARS,nz+2*hs,nx+2*hs},real3d_span{flux,NUM_VARS, (nz+1) ,(nx+1)},real3d_span{tend,NUM_VARS,nz,nx},dt); + //Inform the user +#ifndef NO_INFORM + if (mainproc) { printf( "Elapsed Time: %lf / %lf\n", etime , sim_time ); } +#endif + //Update the elapsed time and output counter + etime = etime + dt; + output_counter = output_counter + dt; + //If it's time for output, reset the counter, and do output + if (output_counter >= output_freq) { + output_counter = output_counter - output_freq; + output(state,etime); + } + } + auto t2 = std::chrono::steady_clock::now(); + if (mainproc) { + std::cout << "CPU Time: " << std::chrono::duration(t2-t1).count() << " sec\n"; + } + + //Final reductions for mass, kinetic energy, and total energy + reductions(mass,te); + + if (mainproc) { + printf( "d_mass: %le\n" , (mass - mass0)/mass0 ); + printf( "d_te: %le\n" , (te - te0 )/te0 ); + } + + finalize(); +} + + +//Performs a single dimensionally split time step using a simple low-storage three-stage Runge-Kutta time integrator +//The dimensional splitting is a second-order-accurate alternating Strang splitting in which the +//order of directions is alternated each time step. +//The Runge-Kutta method used here is defined as follows: +// q* = q[n] + dt/3 * rhs(q[n]) +// q** = q[n] + dt/2 * rhs(q* ) +// q[n+1] = q[n] + dt/1 * rhs(q** ) +void perform_timestep( real3d_span state , real3d_span state_tmp , real3d_span flux , real3d_span tend , double dt ) { + if (direction_switch) { + //x-direction first + semi_discrete_step( state, state , state_tmp, dt / 3 , DIR_X , flux , tend ); + semi_discrete_step( state, state_tmp, state_tmp, dt / 2 , DIR_X , flux , tend ); + semi_discrete_step( state, state_tmp, state , dt / 1 , DIR_X , flux , tend ); + //z-direction second + semi_discrete_step( state, state , state_tmp, dt / 3 , DIR_Z , flux , tend ); + semi_discrete_step( state, state_tmp, state_tmp, dt / 2 , DIR_Z , flux , tend ); + semi_discrete_step( state, state_tmp, state , dt / 1 , DIR_Z , flux , tend ); + } else { + //z-direction second + semi_discrete_step( state, state , state_tmp, dt / 3 , DIR_Z , flux , tend ); + semi_discrete_step( state, state_tmp, state_tmp, dt / 2 , DIR_Z , flux , tend ); + semi_discrete_step( state, state_tmp, state , dt / 1 , DIR_Z , flux , tend ); + //x-direction first + semi_discrete_step( state, state , state_tmp, dt / 3 , DIR_X , flux , tend ); + semi_discrete_step( state, state_tmp, state_tmp, dt / 2 , DIR_X , flux , tend ); + semi_discrete_step( state, state_tmp, state , dt / 1 , DIR_X , flux , tend ); + } + if (direction_switch) { direction_switch = 0; } else { direction_switch = 1; } +} + + +//Perform a single semi-discretized step in time with the form: +//state_out = state_init + dt * rhs(state_forcing) +//Meaning the step starts from state_init, computes the rhs using state_forcing, and stores the result in state_out +void semi_discrete_step( real3d_span state_init , real3d_span state_forcing , real3d_span state_out , double dt , int dir , real3d_span flux , real3d_span tend ) { + if (dir == DIR_X) { + //Set the halo values for this MPI task's fluid state in the x-direction + set_halo_values_x(state_forcing); + //Compute the time tendencies for the fluid state in the x-direction + compute_tendencies_x(state_forcing,flux,tend,dt); + } else if (dir == DIR_Z) { + //Set the halo values for this MPI task's fluid state in the z-direction + set_halo_values_z(state_forcing); + //Compute the time tendencies for the fluid state in the z-direction + compute_tendencies_z(state_forcing,flux,tend,dt); + } + + ///////////////////////////////////////////////// + // TODO: THREAD ME + ///////////////////////////////////////////////// + //Apply the tendencies to the fluid state + std::for_each_n(std::execution::par_unseq,std::views::iota(0).begin(),NUM_VARS*nz*nx,[=,nx=nx,nz=nz,hy_dens_cell=hy_dens_cell,i_beg=i_beg,k_beg=k_beg](int idx){ + auto [i,k,ll] = idx3d(idx,nx,nz); + if (data_spec_int == DATA_SPEC_GRAVITY_WAVES) { + double x = (i_beg + i+0.5)*dx; + double z = (k_beg + k+0.5)*dz; + double wpert; + // Using sample_ellipse_cosine requires "acc routine" in OpenACC and "declare target" in OpenMP offload + // Neither of these are particularly well supported. So I'm manually inlining here + // wpert = sample_ellipse_cosine( x,z , 0.01 , xlen/8,1000., 500.,500. ); + { + double x0 = xlen/8; + double z0 = 1000; + double xrad = 500; + double zrad = 500; + double amp = 0.01; + //Compute distance from bubble center + double dist = sqrt( ((x-x0)/xrad)*((x-x0)/xrad) + ((z-z0)/zrad)*((z-z0)/zrad) ) * pi / 2.; + //If the distance from bubble center is less than the radius, create a cos**2 profile + if (dist <= pi / 2.) { + wpert = amp * pow(cos(dist),2.); + } else { + wpert = 0.; + } + } + tend(ID_WMOM,k,i) += wpert*hy_dens_cell[hs+k]; + } + state_out(ll, (k+hs), (i+hs)) = state_init(ll, (k+hs), (i+hs)) + dt * tend(ll,k,i); + }); +} + + +//Compute the time tendencies of the fluid state using forcing in the x-direction +//Since the halos are set in a separate routine, this will not require MPI +//First, compute the flux vector at each cell interface in the x-direction (including hyperviscosity) +//Then, compute the tendencies using those fluxes +void compute_tendencies_x( real3d_span state , real3d_span flux , real3d_span tend , double dt) { + double hv_coef; + //Compute the hyperviscosity coefficient + hv_coef = -hv_beta * dx / (16*dt); + ///////////////////////////////////////////////// + // TODO: THREAD ME + ///////////////////////////////////////////////// + //Compute fluxes in the x-direction for each cell + std::for_each_n(std::execution::par_unseq,std::views::iota(0).begin(),(nz*(nx+1)),[=,nx=nx,nz=nz,hy_dens_cell=hy_dens_cell,hy_dens_theta_cell=hy_dens_theta_cell](int idx){ + auto [i,k] = idx2d(idx,nx+1); + double stencil[4], d3_vals[NUM_VARS],vals[NUM_VARS]; + //Use fourth-order interpolation from four cell averages to compute the value at the interface in question + for (int ll=0; ll