From a1de7d6021b9d92d3456cbd8ea1d55ecc4f43d57 Mon Sep 17 00:00:00 2001 From: cguzman Date: Tue, 22 Oct 2019 13:23:20 +0200 Subject: [PATCH 1/4] Added compilation of the tests on GPU: For the moment, only unit_tests_arrhenius added. The implementation it's a bit ugly (can't be executed in parallel, duplicate .sh test files with _gpu.sh) , but it works and only do a minor changes on CmakeLists.txt How it works: basically it creates a partmclib_gpu (apart from partmclib) with all files compiled with ENABLE_GPU=ON, and if cuda_found, a new test _gpu test is generated --- CMakeLists.txt | 135 +++++++++++++----- src/camp_solver.c | 12 +- src/cuda/camp_gpu_solver.cu | 30 ++-- src/cuda/camp_gpu_solver.h | 1 - src/cuda/rxns_gpu/rxn_aqueous_equilibrium.cu | 10 +- src/cuda/rxns_gpu/rxn_arrhenius.cu | 34 +++-- src/rxn_solver.c | 4 + .../input_files/run_rxn_arrhenius_gpu.sh | 35 +++++ 8 files changed, 183 insertions(+), 78 deletions(-) create mode 100755 test/unit_tests/input_files/run_rxn_arrhenius_gpu.sh diff --git a/CMakeLists.txt b/CMakeLists.txt index 97551690b..3ba098a90 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -170,11 +170,21 @@ endif() ###################################################################### # GPU support -if (ENABLE_GPU) - add_definitions(-DPMC_USE_GPU) - enable_language(CUDA) + +#Compile GPU tests if CUDA detected +find_package(CUDA) +if (CUDA_FOUND) + enable_language(CUDA) + #If enable_gpu, all will be executed on GPU + if (ENABLE_GPU) + #Define global variable of use gpu no matter what test to include user main test + add_definitions(-DPMC_USE_GPU) + endif() endif() +if(NOT CUDA_FOUND AND ENABLE_GPU) + message( WARNING "Not CUDA module found, can't support GPU computation") +endif() ###################################################################### # debugging options @@ -236,6 +246,13 @@ else() set(MPI_TEST_FLAG serial) endif() +if(CUDA_FOUND) + add_test(test_rxn_arrhenius_mech_gpu ${CMAKE_BINARY_DIR}/test_run/unit_tests/input_files/run_rxn_arrhenius_gpu.sh ${MPI_TEST_FLAG}) +endif() + + +add_test(test_rxn_arrhenius_mech ${CMAKE_BINARY_DIR}/test_run/unit_tests/input_files/run_rxn_arrhenius.sh ${MPI_TEST_FLAG}) + if (ENABLE_TESTS) if (ENABLE_JSON AND ENABLE_SUNDIALS) # New unit tests (UNDER DEVELOPMENT) @@ -411,8 +428,12 @@ endif() # partmc library set(STD_C_FLAGS "-std=c99") + set(STD_CUDA_FLAGS "-dc -arch=compute_70 -code=sm_70") + + + if(${CMAKE_Fortran_COMPILER_ID} MATCHES "Intel") set(STD_F_FLAGS "-extend_source -warn truncated_source") elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "GNU") @@ -498,7 +519,8 @@ set_source_files_properties(${CAMP_C_SRC} PROPERTIES COMPILE_FLAGS set(CAMP_CXX_SRC "") -if(ENABLE_GPU) +if (CUDA_FOUND) +#if(ENABLE_GPU) set(CAMP_CUDA_SRC src/cuda/camp_gpu_solver.cu src/cuda/rxns_gpu/rxn_aqueous_equilibrium.cu @@ -521,33 +543,38 @@ if(ENABLE_GPU) set_source_files_properties(${CAMP_CUDA_SRC} PROPERTIES COMPILE_FLAGS ${STD_CUDA_FLAGS}) + set_source_files_properties(${CAMP_CUDA_SRC} PROPERTIES LANGUAGE CUDA) else() - set(CAMP_CUDA_SRC "") +# set(CAMP_CUDA_SRC "") endif() -add_library(partmclib src/aero_state.F90 src/integer_varray.F90 - src/integer_rmap.F90 src/integer_rmap2.F90 src/aero_sorted.F90 - src/aero_binned.F90 src/bin_grid.F90 src/constants.F90 - src/scenario.F90 src/env_state.F90 src/aero_mode.F90 - src/aero_dist.F90 src/aero_weight.F90 src/aero_weight_array.F90 - src/coag_kernel_additive.F90 src/coag_kernel_sedi.F90 - src/coag_kernel_constant.F90 src/coag_kernel_brown.F90 - src/coag_kernel_zero.F90 src/coag_kernel_brown_free.F90 - src/coag_kernel_brown_cont.F90 src/aero_data.F90 src/run_exact.F90 - src/run_part.F90 src/util.F90 src/stats.F90 src/run_sect.F90 src/output.F90 - src/mosaic.F90 src/gas_data.F90 src/gas_state.F90 - src/coagulation.F90 src/exact_soln.F90 src/coagulation_dist.F90 - src/coag_kernel.F90 src/spec_line.F90 src/spec_file.F90 src/rand.F90 - src/aero_particle.F90 src/aero_particle_array.F90 src/mpi.F90 - src/netcdf.F90 src/aero_info.F90 src/aero_info_array.F90 - src/nucleate.F90 src/condense.F90 src/fractal.F90 src/chamber.F90 - src/property.F90 src/chem_spec_data.F90 - src/rxn_data.F90 src/camp_state.F90 src/mechanism_data.F90 - src/camp_core.F90 src/camp_solver_data.F90 src/aero_rep_data.F90 - src/aero_phase_data.F90 src/aero_rep_factory.F90 src/camp_interface.F90 - src/rxn_factory.F90 src/sub_model_data.F90 src/sub_model_factory.F90 - src/solver_stats.F90 src/camp_box_model_data.F90 +set(CAMP_F_SRC + src/aero_state.F90 src/integer_varray.F90 + src/integer_rmap.F90 src/integer_rmap2.F90 src/aero_sorted.F90 + src/aero_binned.F90 src/bin_grid.F90 src/constants.F90 + src/scenario.F90 src/env_state.F90 src/aero_mode.F90 + src/aero_dist.F90 src/aero_weight.F90 src/aero_weight_array.F90 + src/coag_kernel_additive.F90 src/coag_kernel_sedi.F90 + src/coag_kernel_constant.F90 src/coag_kernel_brown.F90 + src/coag_kernel_zero.F90 src/coag_kernel_brown_free.F90 + src/coag_kernel_brown_cont.F90 src/aero_data.F90 src/run_exact.F90 + src/run_part.F90 src/util.F90 src/stats.F90 src/run_sect.F90 src/output.F90 + src/mosaic.F90 src/gas_data.F90 src/gas_state.F90 + src/coagulation.F90 src/exact_soln.F90 src/coagulation_dist.F90 + src/coag_kernel.F90 src/spec_line.F90 src/spec_file.F90 src/rand.F90 + src/aero_particle.F90 src/aero_particle_array.F90 src/mpi.F90 + src/netcdf.F90 src/aero_info.F90 src/aero_info_array.F90 + src/nucleate.F90 src/condense.F90 src/fractal.F90 src/chamber.F90 + src/property.F90 src/chem_spec_data.F90 + src/rxn_data.F90 src/camp_state.F90 src/mechanism_data.F90 + src/camp_core.F90 src/camp_solver_data.F90 src/aero_rep_data.F90 + src/aero_phase_data.F90 src/aero_rep_factory.F90 src/camp_interface.F90 + src/rxn_factory.F90 src/sub_model_data.F90 src/sub_model_factory.F90 + src/solver_stats.F90 src/camp_box_model_data.F90 + ) + +add_library(partmclib ${CAMP_F_SRC} ${CAMP_C_SRC} ${AEROSOL_REPS_SRC} ${SUB_MODELS_SRC} ${REACTIONS_SRC} ${SUNDIALS_SRC} ${GSL_SRC} ${C_SORT_SRC} ${CAMP_CUDA_SRC} ${CAMP_CXX_SRC} ) @@ -556,6 +583,24 @@ target_link_libraries(partmclib ${NETCDF_LIBS} ${SUNDIALS_LIBS} set_target_properties(partmclib PROPERTIES OUTPUT_NAME partmc) +if (CUDA_FOUND) + add_library(partmclib_gpu ${CAMP_F_SRC} + ${CAMP_C_SRC} ${AEROSOL_REPS_SRC} ${SUB_MODELS_SRC} ${REACTIONS_SRC} + ${SUNDIALS_SRC} ${GSL_SRC} ${C_SORT_SRC} ${CAMP_CUDA_SRC} ${CAMP_CXX_SRC} ) + + target_compile_definitions(partmclib_gpu PRIVATE + "PMC_USE_GPU=ON") + + target_link_libraries(partmclib_gpu ${NETCDF_LIBS} ${SUNDIALS_LIBS} + ${MOSAIC_LIB} ${GSL_LIBS} ${JSON_LIB}) + + set_target_properties(partmclib_gpu PROPERTIES OUTPUT_NAME partmc_gpu) + + add_executable(partmc_gpu src/partmc.F90) + + target_link_libraries(partmc_gpu partmclib_gpu) +endif() + ###################################################################### # partmc executable @@ -630,6 +675,28 @@ add_executable(mock_monarch test/monarch/mock_monarch.F90 target_link_libraries(mock_monarch partmclib) + +set(UNIT_TEST_SRC + test/unit_tests/unit_test_data.F90 + test/unit_tests/unit_test_driver.F90) + +set(UNIT_TEST_RXN_ARRHENIUS_SRC + test/unit_tests/rxns/unit_test_rxn_arrhenius.F90 ${UNIT_TEST_SRC}) +set_source_files_properties(${UNIT_TEST_RXN_ARRHENIUS_SRC} PROPERTIES COMPILE_FLAGS ${STD_F_FLAGS}) +add_executable(unit_test_rxn_arrhenius ${UNIT_TEST_RXN_ARRHENIUS_SRC}) +target_compile_definitions(unit_test_rxn_arrhenius PRIVATE + "UNIT_TEST_MODULE_=pmc_unit_test_rxn_arrhenius \ + ;UNIT_TEST_TYPE_=unit_test_rxn_arrhenius_t()") +target_link_libraries(unit_test_rxn_arrhenius partmclib) + +if (CUDA_FOUND) + add_executable(unit_test_rxn_arrhenius_gpu ${UNIT_TEST_RXN_ARRHENIUS_SRC}) + target_compile_definitions(unit_test_rxn_arrhenius_gpu PRIVATE + "UNIT_TEST_MODULE_=pmc_unit_test_rxn_arrhenius \ + ;UNIT_TEST_TYPE_=unit_test_rxn_arrhenius_t()") + target_link_libraries(unit_test_rxn_arrhenius_gpu partmclib_gpu) +endif() + if (ENABLE_TESTS) ###################################################################### @@ -708,19 +775,9 @@ target_link_libraries(test_sub_model_ZSR_aerosol_water partmclib) ###################################################################### # New unit tests (UNDER DEVELOPMENT) -set(UNIT_TEST_SRC - test/unit_tests/unit_test_data.F90 - test/unit_tests/unit_test_driver.F90) -set(UNIT_TEST_RXN_ARRHENIUS_SRC - test/unit_tests/rxns/unit_test_rxn_arrhenius.F90 ${UNIT_TEST_SRC}) -set_source_files_properties(${UNIT_TEST_RXN_ARRHENIUS_SRC} PROPERTIES COMPILE_DEFINITIONS - "UNIT_TEST_MODULE_=pmc_unit_test_rxn_arrhenius \ - ;UNIT_TEST_TYPE_=unit_test_rxn_arrhenius_t()") -set_source_files_properties(${UNIT_TEST_RXN_ARRHENIUS_SRC} PROPERTIES COMPILE_FLAGS - ${STD_F_FLAGS}) -add_executable(unit_test_rxn_arrhenius ${UNIT_TEST_RXN_ARRHENIUS_SRC}) -target_link_libraries(unit_test_rxn_arrhenius partmclib) + + ###################################################################### # test_camp_core diff --git a/src/camp_solver.c b/src/camp_solver.c index 97bb01fa7..342205475 100644 --- a/src/camp_solver.c +++ b/src/camp_solver.c @@ -339,6 +339,7 @@ void *solver_new(int n_state_var, int n_cells, int *var_type, int n_rxn, #ifdef PMC_USE_GPU solver_new_gpu_cu(n_dep_var, n_state_var, n_rxn, n_rxn_int_param, n_rxn_float_param, n_rxn_env_param, n_cells); + printf("gpu acting \n"); #endif #ifdef PMC_DEBUG @@ -881,8 +882,8 @@ int f(realtype t, N_Vector y, N_Vector deriv, void *solver_data) { // Calculate the time derivative f(t,y) rxn_calc_deriv(md, deriv_data, (double)time_step); #else - // Add contributions from reactions not implemented on GPU - rxn_calc_deriv_specific_types(md, deriv_data, (double)time_step); + // Add contributions from reactions not implemented on GPU + rxn_calc_deriv_specific_types(md, deriv_data, (double)time_step); #endif #ifdef PMC_DEBUG @@ -1016,8 +1017,9 @@ int Jac(realtype t, N_Vector y, N_Vector deriv, SUNMatrix J, void *solver_data, PMC_DEBUG_JAC(md->J_rxn, "reaction Jacobian"); #else - // Add contributions from reactions not implemented on GPU - rxn_calc_jac_specific_types(md, J_rxn_data, time_step); + // Add contributions from reactions not implemented on GPU + rxn_calc_jac_specific_types(md, J_rxn_data, time_step); + PMC_DEBUG_JAC(md->J_rxn, "reaction Jacobian"); #endif // rxn_calc_jac_specific_types(md, J_rxn_data, time_step); @@ -1868,7 +1870,7 @@ void error_handler(int error_code, const char *module, const char *function, */ void model_free(ModelData model_data) { #ifdef PMC_USE_GPU - // free_gpu_cu(); + free_gpu_cu(); #endif #ifdef PMC_USE_SUNDIALS diff --git a/src/cuda/camp_gpu_solver.cu b/src/cuda/camp_gpu_solver.cu index d4d021709..a0a0350a0 100644 --- a/src/cuda/camp_gpu_solver.cu +++ b/src/cuda/camp_gpu_solver.cu @@ -97,6 +97,7 @@ void solver_new_gpu_cu(int n_dep_var, if (n_dep_var*n_cells < DATA_SIZE_LIMIT_OPT){ few_data = 1; } + //few_data = 1; //Set working GPU: we have 4 gpu available on power9. as default, it should be assign to gpu 0 int device=0; @@ -425,6 +426,12 @@ void rxn_calc_deriv_gpu(ModelData *model_data, N_Vector deriv, realtype time_ste double *state = model_data->total_state; double *rate_constants = model_data->rxn_env_data; +//TODO: not working with new arrhenius test FOR SOME REASON SEG FAULT +//Pa ARREGLARLO poner deriv size y todo eso en un struct gpu y asi el multi cell y one cell core tienen su struct y sus cosas +//(parece que al ser global variable pues no lo pilla bien y no sobreescribe x algun motivo) + +/* + //Faster, use for few values if (few_data){ //This method of passing them as a function parameter has a theoric maximum of 4kb of data @@ -438,18 +445,13 @@ void rxn_calc_deriv_gpu(ModelData *model_data, N_Vector deriv, realtype time_ste HANDLE_ERROR(cudaMemcpy(rate_constants_gpu, rate_constants, rate_constants_size, cudaMemcpyHostToDevice)); } - HANDLE_ERROR(cudaMemset(deriv_gpu_data, 0, deriv_size)); + HANDLE_ERROR(cudaMemset(deriv_gpu_data, 0.0, deriv_size)); - solveDerivative << < (n_blocks), max_n_gpu_thread >> > - (state_gpu, deriv_gpu_data, time_step, model_data->n_per_cell_dep_var, - model_data->n_per_cell_state_var, model_data->n_rxn_env_data, - n_rxn, n_cells, - int_pointer_gpu, double_pointer_gpu, rate_constants_gpu, rate_constants_idx_gpu); cudaDeviceSynchronize();// Secure cuda synchronization //Use pinned memory for few values - if (few_data){ + if (!few_data){ HANDLE_ERROR(cudaMemcpy(deriv_cpu, deriv_gpu_data, deriv_size, cudaMemcpyDeviceToHost)); memcpy(deriv_data, deriv_cpu, deriv_size); } @@ -457,9 +459,11 @@ void rxn_calc_deriv_gpu(ModelData *model_data, N_Vector deriv, realtype time_ste HANDLE_ERROR(cudaMemcpy(deriv_data, deriv_gpu_data, deriv_size, cudaMemcpyDeviceToHost)); } + printf("holaa %d, %d\n", deriv_size/sizeof(double), n_cells); //TODO Calculate on CPU non gpu-translated type reactions (HL & SIMPOL phase transfer) (or wait till v2.0 with C++) + */ } @@ -514,8 +518,8 @@ __global__ void solveJacobian(double *state_init, double *jac_init, // state, jac_data, (void *) rxn_data, float_data, time_step, n_rxn); break; case RXN_ARRHENIUS : - rxn_gpu_arrhenius_calc_jac_contrib(rate_constants, - state, jac_data, (void *) rxn_data, float_data, time_step,n_rxn); + //rxn_gpu_arrhenius_calc_jac_contrib(rate_constants, + // state, jac_data, (void *) rxn_data, float_data, time_step,n_rxn); break; case RXN_CMAQ_H2O2 : rxn_gpu_CMAQ_H2O2_calc_jac_contrib(rate_constants, @@ -580,6 +584,8 @@ void rxn_calc_jac_gpu(ModelData *model_data, SUNMatrix jac, realtype time_step) double *state = model_data->total_state; double *rate_constants = model_data->rxn_env_data; + /* + //Faster, use for few values if (few_data){ //This method of passing them as a function parameter has a theoric maximum of 4kb of data @@ -610,6 +616,8 @@ void rxn_calc_jac_gpu(ModelData *model_data, SUNMatrix jac, realtype time_step) HANDLE_ERROR(cudaMemcpy(jac_data, jac_gpu_data, jac_size, cudaMemcpyDeviceToHost)); } + */ + } @@ -617,11 +625,12 @@ void rxn_calc_jac_gpu(ModelData *model_data, SUNMatrix jac, realtype time_step) */ void free_gpu_cu() { +printf("free"); HANDLE_ERROR(cudaFree(int_pointer_gpu)); HANDLE_ERROR(cudaFree(double_pointer_gpu)); HANDLE_ERROR(cudaFree(deriv_gpu_data)); - HANDLE_ERROR(cudaFree(jac_gpu_data)); + //HANDLE_ERROR(cudaFree(jac_gpu_data)); if(few_data){ } @@ -630,7 +639,6 @@ void free_gpu_cu() { HANDLE_ERROR(cudaFree(rate_constants_gpu)); HANDLE_ERROR(cudaFree(rate_constants_idx_gpu)); } - } /* Auxiliar functions */ diff --git a/src/cuda/camp_gpu_solver.h b/src/cuda/camp_gpu_solver.h index d6be403d7..08a84d2fe 100644 --- a/src/cuda/camp_gpu_solver.h +++ b/src/cuda/camp_gpu_solver.h @@ -13,7 +13,6 @@ #include #include "../camp_common.h" - //Value to consider data size too big -> Memory optimization will change below and under the limit #define DATA_SIZE_LIMIT_OPT 2000 diff --git a/src/cuda/rxns_gpu/rxn_aqueous_equilibrium.cu b/src/cuda/rxns_gpu/rxn_aqueous_equilibrium.cu index 98fd0da15..e6dd78f45 100644 --- a/src/cuda/rxns_gpu/rxn_aqueous_equilibrium.cu +++ b/src/cuda/rxns_gpu/rxn_aqueous_equilibrium.cu @@ -143,10 +143,12 @@ __device__ double rxn_gpu_aqueous_equilibrium_calc_overall_rate(int *rxn_data, */ #ifdef PMC_USE_SUNDIALS -#ifdef PMC_USE_GPU -__device__ -#endif -void rxn_gpu_aqueous_equilibrium_calc_deriv_contrib(double *rate_constants, double *state, +//TODO: seems this implementation ifdef raise a warning of host__ device__ function with new tests gpu +//TODO: O a lo mejor es xk en el .h no esta asi (diria que si) +//#ifdef PMC_USE_GPU +//__device__ +//#endif +__device__ void rxn_gpu_aqueous_equilibrium_calc_deriv_contrib(double *rate_constants, double *state, double *deriv, void *rxn_data, double * double_pointer_gpu, double time_step, int n_rxn2) { int n_rxn=n_rxn2; diff --git a/src/cuda/rxns_gpu/rxn_arrhenius.cu b/src/cuda/rxns_gpu/rxn_arrhenius.cu index 73b177825..fe478c2c4 100644 --- a/src/cuda/rxns_gpu/rxn_arrhenius.cu +++ b/src/cuda/rxns_gpu/rxn_arrhenius.cu @@ -244,31 +244,29 @@ __device__ void rxn_gpu_arrhenius_calc_jac_contrib(double *rate_constants, doubl int n_rxn=n_rxn2; int *int_data = (int*) rxn_data; double *float_data = double_pointer_gpu; + double rate; // Calculate the reaction rate //double rate = RATE_CONSTANT_; - double rate = rate_constants[0]; - for (int i_spec=0; i_spec Date: Tue, 22 Oct 2019 15:42:01 +0200 Subject: [PATCH 2/4] Fixed new unit_test_arrhenius on gpu by moving the global gpu variables to camp_common.h --- CMakeLists.txt | 43 ++-- src/camp_common.h | 30 +++ src/camp_solver.c | 8 +- src/cuda/camp_gpu_solver.cu | 224 +++++++++--------- src/cuda/camp_gpu_solver.h | 3 +- test/chemistry/cb05cl_ae5/test_cb05cl_ae5.F90 | 2 +- .../test_chemistry_cb05cl_ae5_gpu.sh | 30 +++ test/monarch/pmc_monarch_interface.F90 | 4 - 8 files changed, 200 insertions(+), 144 deletions(-) create mode 100755 test/chemistry/cb05cl_ae5/test_chemistry_cb05cl_ae5_gpu.sh diff --git a/CMakeLists.txt b/CMakeLists.txt index 3ba098a90..0bfacc425 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -248,11 +248,15 @@ endif() if(CUDA_FOUND) add_test(test_rxn_arrhenius_mech_gpu ${CMAKE_BINARY_DIR}/test_run/unit_tests/input_files/run_rxn_arrhenius_gpu.sh ${MPI_TEST_FLAG}) -endif() + add_test(test_chemistry_cb05cl_ae5_gpu ${CMAKE_BINARY_DIR}/test_run/chemistry/cb05cl_ae5/test_chemistry_cb05cl_ae5_gpu.sh ${MPI_TEST_FLAG}) + +endif() add_test(test_rxn_arrhenius_mech ${CMAKE_BINARY_DIR}/test_run/unit_tests/input_files/run_rxn_arrhenius.sh ${MPI_TEST_FLAG}) +add_test(test_chemistry_cb05cl_ae5 ${CMAKE_BINARY_DIR}/test_run/chemistry/cb05cl_ae5/test_chemistry_cb05cl_ae5.sh ${MPI_TEST_FLAG}) + if (ENABLE_TESTS) if (ENABLE_JSON AND ENABLE_SUNDIALS) # New unit tests (UNDER DEVELOPMENT) @@ -659,13 +663,21 @@ set_source_files_properties(${CB5_EBI_SOLVER} PROPERTIES COMPILE_FLAGS ${CB5_EBI_FLAGS}) ###################################################################### -# test_chemistry_cb05cl_ae5_big +# test_chemistry_cb05cl_ae5 -add_executable(test_chemistry_cb05cl_ae5_big - test/chemistry/cb05cl_ae5/test_cb05cl_ae5_big.F90 +add_executable(test_chemistry_cb05cl_ae5 + test/chemistry/cb05cl_ae5/test_cb05cl_ae5.F90 ${CB5_EBI_SOLVER} ${CB5_KPP_SOLVER} test/chemistry/cb05cl_ae5/module_BSC_CHEM_DATA.F90) -target_link_libraries(test_chemistry_cb05cl_ae5_big partmclib) +target_link_libraries(test_chemistry_cb05cl_ae5 partmclib) + +if (CUDA_FOUND) + add_executable(test_chemistry_cb05cl_ae5_gpu + test/chemistry/cb05cl_ae5/test_cb05cl_ae5.F90 + ${CB5_EBI_SOLVER} ${CB5_KPP_SOLVER} + test/chemistry/cb05cl_ae5/module_BSC_CHEM_DATA.F90) + target_link_libraries(test_chemistry_cb05cl_ae5_gpu partmclib_gpu) +endif() ###################################################################### # MONARCH interface @@ -685,14 +697,14 @@ set(UNIT_TEST_RXN_ARRHENIUS_SRC set_source_files_properties(${UNIT_TEST_RXN_ARRHENIUS_SRC} PROPERTIES COMPILE_FLAGS ${STD_F_FLAGS}) add_executable(unit_test_rxn_arrhenius ${UNIT_TEST_RXN_ARRHENIUS_SRC}) target_compile_definitions(unit_test_rxn_arrhenius PRIVATE - "UNIT_TEST_MODULE_=pmc_unit_test_rxn_arrhenius \ - ;UNIT_TEST_TYPE_=unit_test_rxn_arrhenius_t()") + "UNIT_TEST_MODULE_=pmc_unit_test_rxn_arrhenius \ + ;UNIT_TEST_TYPE_=unit_test_rxn_arrhenius_t()") target_link_libraries(unit_test_rxn_arrhenius partmclib) if (CUDA_FOUND) add_executable(unit_test_rxn_arrhenius_gpu ${UNIT_TEST_RXN_ARRHENIUS_SRC}) target_compile_definitions(unit_test_rxn_arrhenius_gpu PRIVATE - "UNIT_TEST_MODULE_=pmc_unit_test_rxn_arrhenius \ + "UNIT_TEST_MODULE_=pmc_unit_test_rxn_arrhenius \ ;UNIT_TEST_TYPE_=unit_test_rxn_arrhenius_t()") target_link_libraries(unit_test_rxn_arrhenius_gpu partmclib_gpu) endif() @@ -700,20 +712,19 @@ endif() if (ENABLE_TESTS) ###################################################################### -# test_chemistry_cb05cl_ae5 +# test_chemistry_cb05cl_ae5_big -add_executable(test_chemistry_cb05cl_ae5 - test/chemistry/cb05cl_ae5/test_cb05cl_ae5.F90 - ${CB5_EBI_SOLVER} ${CB5_KPP_SOLVER} - test/chemistry/cb05cl_ae5/module_BSC_CHEM_DATA.F90) -target_link_libraries(test_chemistry_cb05cl_ae5 partmclib) +add_executable(test_chemistry_cb05cl_ae5_big + test/chemistry/cb05cl_ae5/test_cb05cl_ae5_big.F90 + ${CB5_EBI_SOLVER} ${CB5_KPP_SOLVER} + test/chemistry/cb05cl_ae5/module_BSC_CHEM_DATA.F90) +target_link_libraries(test_chemistry_cb05cl_ae5_big partmclib) ###################################################################### # test_chemistry_eqsam_v03d set(EQSAM_V03D - test/chemistry/eqsam_v03d/eqsam_v03d.F90 - ) + test/chemistry/eqsam_v03d/eqsam_v03d.F90) add_executable(test_chemistry_eqsam_v03d test/chemistry/eqsam_v03d/test_eqsam_v03d.F90 ${EQSAM_V03D}) diff --git a/src/camp_common.h b/src/camp_common.h index 8f6fd5049..d8e63724d 100644 --- a/src/camp_common.h +++ b/src/camp_common.h @@ -59,6 +59,12 @@ typedef struct { int param_id; // sub model Jacobian id } JacMap; +/* GPU data structure */ +typedef struct { + size_t deriv_size; + +} DataGPU; + /* Model data structure */ typedef struct { int n_per_cell_state_var; // number of state variables per grid cell @@ -167,6 +173,30 @@ typedef struct { // for the current grid cell int n_sub_model_env_data; // Number of sub model environmental parameters // from all sub models +//#ifdef CUDA_FOUND +// GPU data + double *deriv_gpu_data; + double *deriv_cpu; + double *jac_gpu_data; + double *jac_cpu; + size_t deriv_size; + size_t jac_size; + size_t state_size; + size_t env_size; + size_t rate_constants_size; + size_t rate_constants_idx_size; + bool few_data; + bool implemented_all; + int *int_pointer; + int *int_pointer_gpu; + double *double_pointer; + double *double_pointer_gpu; + double *state_gpu; + double *env_gpu; + double *rate_constants_gpu; + int *rate_constants_idx_gpu; + +//#endif } ModelData; /* Solver data structure */ diff --git a/src/camp_solver.c b/src/camp_solver.c index 342205475..a90fe29a1 100644 --- a/src/camp_solver.c +++ b/src/camp_solver.c @@ -337,9 +337,8 @@ void *solver_new(int n_state_var, int n_cells, int *var_type, int n_rxn, sd->model_data.sub_model_env_idx[0] = 0; #ifdef PMC_USE_GPU - solver_new_gpu_cu(n_dep_var, n_state_var, n_rxn, n_rxn_int_param, + solver_new_gpu_cu(&(sd->model_data), n_dep_var, n_state_var, n_rxn, n_rxn_int_param, n_rxn_float_param, n_rxn_env_param, n_cells); - printf("gpu acting \n"); #endif #ifdef PMC_DEBUG @@ -453,11 +452,6 @@ void solver_initialize(void *solver_data, double *abs_tol, double rel_tol, flag = CVodeSetDlsGuessHelper(sd->cvode_mem, guess_helper); check_flag_fail(&flag, "CVodeSetDlsGuessHelper", 1); -// Allocate Jacobian on GPU -#ifdef PMC_USE_GPU - allocate_jac_gpu(sd->model_data.n_per_cell_solver_jac_elem, n_cells); -#endif - // Set gpu rxn values #ifdef PMC_USE_GPU solver_set_rxn_data_gpu(&(sd->model_data)); diff --git a/src/cuda/camp_gpu_solver.cu b/src/cuda/camp_gpu_solver.cu index a0a0350a0..a05cbe017 100644 --- a/src/cuda/camp_gpu_solver.cu +++ b/src/cuda/camp_gpu_solver.cu @@ -28,29 +28,29 @@ int max_n_gpu_thread; int max_n_gpu_blocks; //General data -double *deriv_gpu_data; -double *deriv_cpu; -double *jac_gpu_data; -double *jac_cpu; -size_t deriv_size; -size_t jac_size; -size_t state_size; -size_t env_size; -size_t rate_constants_size; -size_t rate_constants_idx_size; - -bool few_data = 0; -bool implemented_all = true; -int *int_pointer; -int *int_pointer_gpu; -double *double_pointer; -double *double_pointer_gpu; -double *state_gpu; -double *state_cpu; -double *env_gpu; -double *rate_constants_gpu; -double *rate_constants_cpu; -int *rate_constants_idx_gpu; +/*double *model_data->deriv_gpu_data; +double *model_data->deriv_cpu; +double *model_data->jac_gpu_data; +double *model_data->jac_cpu; +//size_t model_data->deriv_size; +size_t model_data->jac_size; +size_t model_data->state_size; +size_t model_data->env_size; +size_t model_data->rate_constants_size; +size_t model_data->rate_constants_idx_size; + + +bool model_data->few_data; +bool model_data->implemented_all; +int *model_data->int_pointer; +int *model_data->int_pointer_gpu; +double *model_data->double_pointer; +double *model_data->double_pointer_gpu; +double *model_data->state_gpu; +double *model_data->env_gpu; +double *model_data->rate_constants_gpu; +int *model_data->rate_constants_idx_gpu; +*/ static void HandleError(cudaError_t err, const char *file, @@ -81,23 +81,29 @@ static void HandleError2(const char *file, * \param n_rxn_float_param Total number of floating-point reaction parameters * \param n_cells Number of grid cells to solve simultaneously */ -void solver_new_gpu_cu(int n_dep_var, +void solver_new_gpu_cu(ModelData *model_data, int n_dep_var, int n_state_var, int n_rxn, int n_rxn_int_param, int n_rxn_float_param, int n_rxn_env_param, int n_cells) { +//Get Global gpu data + //DataGPU *dgpu = (DataGPU *)malloc(sizeof(DataGPU)); //Lengths - state_size = n_state_var*n_cells * sizeof(double); - deriv_size = n_dep_var*n_cells * sizeof(double); - env_size = PMC_NUM_ENV_PARAM_*n_cells * sizeof(double); //Temp and pressure - rate_constants_size = n_rxn_env_param * n_cells * sizeof(double); - rate_constants_idx_size = (n_rxn+1) * sizeof(int); + model_data->state_size = n_state_var * n_cells * sizeof(double); + model_data->deriv_size = n_dep_var * n_cells * sizeof(double); + model_data->env_size = PMC_NUM_ENV_PARAM_ * n_cells * sizeof(double); //Temp and pressure + model_data->rate_constants_size = n_rxn_env_param * n_cells * sizeof(double); + model_data->rate_constants_idx_size = (n_rxn+1) * sizeof(int); + model_data->few_data = 0; + model_data->implemented_all = true; + + printf("new deriv_lenght %d, cells %d\n", model_data->deriv_size/sizeof(double), n_cells); //Detect if we are working with few data values if (n_dep_var*n_cells < DATA_SIZE_LIMIT_OPT){ - few_data = 1; + model_data->few_data = 1; } - //few_data = 1; + //model_data->few_data = 1; //Set working GPU: we have 4 gpu available on power9. as default, it should be assign to gpu 0 int device=0; @@ -113,18 +119,17 @@ void solver_new_gpu_cu(int n_dep_var, int n_blocks = (n_rxn + max_n_gpu_thread - 1) / max_n_gpu_thread; //GPU allocation - cudaMalloc((void **) &deriv_gpu_data, deriv_size); - cudaMalloc((void **) &state_gpu, state_size); - cudaMalloc((void **) &env_gpu, env_size); - cudaMalloc((void **) &rate_constants_gpu, rate_constants_size); - cudaMalloc((void **) &rate_constants_idx_gpu, rate_constants_idx_size); + cudaMalloc((void **) &model_data->deriv_gpu_data, model_data->deriv_size); + cudaMalloc((void **) &model_data->state_gpu, model_data->state_size); + cudaMalloc((void **) &model_data->env_gpu, model_data->env_size); + cudaMalloc((void **) &model_data->rate_constants_gpu, model_data->rate_constants_size); + cudaMalloc((void **) &model_data->rate_constants_idx_gpu, model_data->rate_constants_idx_size); //GPU allocation few data on pinned memory - if(few_data){ + if(model_data->few_data){ //Notice auxiliar variables are created because we // can't pin directly variables initialized before - cudaMallocHost((void**)&rate_constants_cpu, rate_constants_size); - cudaMallocHost((void**)&deriv_cpu, deriv_size); + cudaMallocHost((void**)&model_data->deriv_cpu, model_data->deriv_size); } // Warning if exceeding GPU limits @@ -139,22 +144,6 @@ void solver_new_gpu_cu(int n_dep_var, } -/** \brief Allocate Jacobian on GPU -* -* \param n_jac_elem Number of data elements on the jacobian -* \param n_cells Number of cells to compute -*/ -void allocate_jac_gpu(int n_jac_elem, int n_cells){ - - jac_size = n_jac_elem * n_cells * sizeof(double); - cudaMalloc((void **) &jac_gpu_data, jac_size); - - if(few_data){ - cudaMallocHost((void**)&jac_cpu, jac_size); - } - -} - /** \brief Set reaction data on GPU prepared structure. RXN data is divided * into two different matrix, per double and int data respectively. Matrix are * reversed to improve memory access on GPU. @@ -165,6 +154,7 @@ void allocate_jac_gpu(int n_jac_elem, int n_cells){ void solver_set_rxn_data_gpu(ModelData *model_data) { int n_rxn = model_data->n_rxn; + int n_cells = model_data->n_cells; unsigned int int_max_length = 0; unsigned int double_max_length = 0; @@ -227,7 +217,7 @@ void solver_set_rxn_data_gpu(ModelData *model_data) { #ifdef FAILURE_DETAIL printf("WARNING: Reaction type %d is not fully implemented on GPU. Computing on CPU...\n", rxn_type); #endif - implemented_all=false; + model_data->implemented_all=false; } //Get RXN lengths @@ -253,15 +243,15 @@ void solver_set_rxn_data_gpu(ModelData *model_data) { //Allocate int and double rxn data separately //Add -1 to avoid access and have a square matrix - int_pointer = (int *) malloc(rxn_int_length * sizeof(int)); - memset(int_pointer, -1, rxn_int_length * sizeof(int)); + model_data->int_pointer = (int *) malloc(rxn_int_length * sizeof(int)); + memset(model_data->int_pointer, -1, rxn_int_length * sizeof(int)); //Add 0 to avoid access and have a square matrix - double_pointer = (double*)calloc(rxn_double_length, sizeof(double)); + model_data->double_pointer = (double*)calloc(rxn_double_length, sizeof(double)); //GPU allocation - cudaMalloc((void **) &int_pointer_gpu, rxn_int_length * sizeof(int)); - cudaMalloc((void **) &double_pointer_gpu, rxn_double_length * sizeof(double)); + cudaMalloc((void **) &model_data->int_pointer_gpu, rxn_int_length * sizeof(int)); + cudaMalloc((void **) &model_data->double_pointer_gpu, rxn_double_length * sizeof(double)); //Update number of zeros added on each reaction for (int i_rxn = 0; i_rxn < n_rxn; i_rxn++) @@ -285,11 +275,11 @@ void solver_set_rxn_data_gpu(ModelData *model_data) { int i_pos=rxn_position[i_rxn];//i_rxn;//rxn_position[i_rxn];//for bubblesort for (int j = 0; j < int_lengths[i_pos]; j++){ int *rxn_int_data = &(model_data->rxn_int_data[model_data->rxn_int_indices[i_pos]]); - int_pointer[n_rxn*j + i_rxn] = rxn_int_data[j]; + model_data->int_pointer[n_rxn*j + i_rxn] = rxn_int_data[j]; } for (int j = 0; j < double_lengths[i_pos]; j++) { double *rxn_float_data = &(model_data->rxn_float_data[model_data->rxn_float_indices[i_pos]]); - double_pointer[n_rxn*j + i_rxn] = rxn_float_data[j]; + model_data->double_pointer[n_rxn*j + i_rxn] = rxn_float_data[j]; } //Reorder the rate indices //Todo update on main code the rate_constants to read consecutively in cpu @@ -297,11 +287,19 @@ void solver_set_rxn_data_gpu(ModelData *model_data) { } //Save data to GPU - HANDLE_ERROR(cudaMemcpy(int_pointer_gpu, int_pointer, rxn_int_length*sizeof(int), cudaMemcpyHostToDevice)); - HANDLE_ERROR(cudaMemcpy(double_pointer_gpu, double_pointer, rxn_double_length*sizeof(double), cudaMemcpyHostToDevice)); + HANDLE_ERROR(cudaMemcpy(model_data->int_pointer_gpu, model_data->int_pointer, rxn_int_length*sizeof(int), cudaMemcpyHostToDevice)); + HANDLE_ERROR(cudaMemcpy(model_data->double_pointer_gpu, model_data->double_pointer, rxn_double_length*sizeof(double), cudaMemcpyHostToDevice)); //Set rate_constants-idx - HANDLE_ERROR(cudaMemcpy(rate_constants_idx_gpu, rate_constants_idx_aux, rate_constants_idx_size, cudaMemcpyHostToDevice)); + HANDLE_ERROR(cudaMemcpy(model_data->rate_constants_idx_gpu, rate_constants_idx_aux, model_data->rate_constants_idx_size, cudaMemcpyHostToDevice)); + + //Allocate jacobian + model_data->jac_size = model_data->n_per_cell_solver_jac_elem * n_cells * sizeof(double); + cudaMalloc((void **) &model_data->jac_gpu_data, model_data->jac_size); + + if(model_data->few_data){ + cudaMallocHost((void**)&model_data->jac_cpu, model_data->jac_size); + } } @@ -311,11 +309,11 @@ void solver_set_rxn_data_gpu(ModelData *model_data) { * \param deriv_init Pointer to first value of derivative array * \param time_step Current time step being computed (s) * \param deriv_length_cell Derivative length for one cell - * \param state_size_cell Derivative length for one cell + * \param model_data->state_size_cell Derivative length for one cell * \param n_rxn Number of reactions to include * \param n_cells_gpu Number of cells to compute - * \param int_pointer Pointer to integer reaction data - * \param double_pointer Pointer to double reaction data + * \param model_data->int_pointer Pointer to integer reaction data + * \param model_data->double_pointer Pointer to double reaction data * \param rate_constants_init Pointer to first value of reaction rates */ __global__ void solveDerivative(double *state_init, double *deriv_init, @@ -430,40 +428,39 @@ void rxn_calc_deriv_gpu(ModelData *model_data, N_Vector deriv, realtype time_ste //Pa ARREGLARLO poner deriv size y todo eso en un struct gpu y asi el multi cell y one cell core tienen su struct y sus cosas //(parece que al ser global variable pues no lo pilla bien y no sobreescribe x algun motivo) -/* - //Faster, use for few values - if (few_data){ + if (model_data->few_data){ //This method of passing them as a function parameter has a theoric maximum of 4kb of data - state_gpu= state; - rate_constants_gpu= rate_constants; + model_data->state_gpu= state; + model_data->rate_constants_gpu= rate_constants; } //Slower, use for large values else{ - HANDLE_ERROR(cudaMemcpy(state_gpu, state, state_size, cudaMemcpyHostToDevice)); + HANDLE_ERROR(cudaMemcpy(model_data->state_gpu, state, model_data->state_size, cudaMemcpyHostToDevice)); //todo rate_constants only change each time_step, not each deriv iteration - HANDLE_ERROR(cudaMemcpy(rate_constants_gpu, rate_constants, rate_constants_size, cudaMemcpyHostToDevice)); + HANDLE_ERROR(cudaMemcpy(model_data->rate_constants_gpu, rate_constants, model_data->rate_constants_size, cudaMemcpyHostToDevice)); } - HANDLE_ERROR(cudaMemset(deriv_gpu_data, 0.0, deriv_size)); + HANDLE_ERROR(cudaMemset(model_data->deriv_gpu_data, 0.0, model_data->deriv_size)); + solveDerivative << < (n_blocks), max_n_gpu_thread >> > + (model_data->state_gpu, model_data->deriv_gpu_data, time_step, model_data->n_per_cell_dep_var, + model_data->n_per_cell_state_var, model_data->n_rxn_env_data, + n_rxn, n_cells, + model_data->int_pointer_gpu, model_data->double_pointer_gpu, model_data->rate_constants_gpu, model_data->rate_constants_idx_gpu); cudaDeviceSynchronize();// Secure cuda synchronization //Use pinned memory for few values - if (!few_data){ - HANDLE_ERROR(cudaMemcpy(deriv_cpu, deriv_gpu_data, deriv_size, cudaMemcpyDeviceToHost)); - memcpy(deriv_data, deriv_cpu, deriv_size); + if (model_data->few_data){ + HANDLE_ERROR(cudaMemcpy(model_data->deriv_cpu, model_data->deriv_gpu_data, model_data->deriv_size, cudaMemcpyDeviceToHost)); + memcpy(deriv_data, model_data->deriv_cpu, model_data->deriv_size); } else { - HANDLE_ERROR(cudaMemcpy(deriv_data, deriv_gpu_data, deriv_size, cudaMemcpyDeviceToHost)); + HANDLE_ERROR(cudaMemcpy(deriv_data, model_data->deriv_gpu_data, model_data->deriv_size, cudaMemcpyDeviceToHost)); } - printf("holaa %d, %d\n", deriv_size/sizeof(double), n_cells); - - //TODO Calculate on CPU non gpu-translated type reactions (HL & SIMPOL phase transfer) (or wait till v2.0 with C++) - - */ + //printf("deriv deriv_lenght %d, cells %d\n", model_data->deriv_size/sizeof(double), n_cells); } @@ -473,11 +470,11 @@ void rxn_calc_deriv_gpu(ModelData *model_data, N_Vector deriv, realtype time_ste * \param jac_init Pointer to first value of jacobian array * \param time_step Current time step being computed (s) * \param jac_length_cell jacobian length for one cell - * \param state_size_cell jacobian length for one cell + * \param model_data->state_size_cell jacobian length for one cell * \param n_rxn Number of reactions to include * \param n_cells_gpu Number of cells to compute - * \param int_pointer Pointer to integer reaction data - * \param double_pointer Pointer to double reaction data + * \param model_data->int_pointer Pointer to integer reaction data + * \param model_data->double_pointer Pointer to double reaction data * \param rate_constants_init Pointer to first value of reaction rates */ //TODO: fix jacGPU once matt let me modify rxn_solver and reduce jacobians (in v2.0 or before if needed) @@ -584,39 +581,37 @@ void rxn_calc_jac_gpu(ModelData *model_data, SUNMatrix jac, realtype time_step) double *state = model_data->total_state; double *rate_constants = model_data->rxn_env_data; - /* - //Faster, use for few values - if (few_data){ + if (model_data->few_data){ //This method of passing them as a function parameter has a theoric maximum of 4kb of data - state_gpu= state; - rate_constants_gpu= rate_constants; + model_data->state_gpu= state; + model_data->rate_constants_gpu= rate_constants; } //Slower, use for large values else{ - HANDLE_ERROR(cudaMemcpy(state_gpu, state, state_size, cudaMemcpyHostToDevice)); - HANDLE_ERROR(cudaMemcpy(rate_constants_gpu, rate_constants, rate_constants_size, cudaMemcpyHostToDevice)); + HANDLE_ERROR(cudaMemcpy(model_data->state_gpu, state, model_data->state_size, cudaMemcpyHostToDevice)); + HANDLE_ERROR(cudaMemcpy(model_data->rate_constants_gpu, rate_constants, model_data->rate_constants_size, cudaMemcpyHostToDevice)); } - HANDLE_ERROR(cudaMemset(jac_gpu_data, 0, jac_size)); + HANDLE_ERROR(cudaMemset(model_data->jac_gpu_data, 0, model_data->jac_size)); solveJacobian << < (n_blocks), max_n_gpu_thread >> > - (state_gpu, jac_gpu_data, time_step, model_data->n_per_cell_rxn_jac_elem, + (model_data->state_gpu, model_data->jac_gpu_data, time_step, model_data->n_per_cell_rxn_jac_elem, model_data->n_per_cell_state_var, model_data->n_rxn_env_data, - n_rxn, n_cells, int_pointer_gpu, double_pointer_gpu, rate_constants_gpu, rate_constants_idx_gpu); + n_rxn, n_cells, model_data->int_pointer_gpu, model_data->double_pointer_gpu, model_data->rate_constants_gpu, model_data->rate_constants_idx_gpu); cudaDeviceSynchronize();// Secure cuda synchronization //Use pinned memory for few values - if (few_data){ - HANDLE_ERROR(cudaMemcpy(jac_cpu, jac_gpu_data, jac_size, cudaMemcpyDeviceToHost)); - memcpy(jac_data, jac_cpu, jac_size); + if (model_data->few_data){ + HANDLE_ERROR(cudaMemcpy(model_data->jac_cpu, model_data->jac_gpu_data, model_data->jac_size, cudaMemcpyDeviceToHost)); + memcpy(jac_data, model_data->jac_cpu, model_data->jac_size); } else { - HANDLE_ERROR(cudaMemcpy(jac_data, jac_gpu_data, jac_size, cudaMemcpyDeviceToHost)); + HANDLE_ERROR(cudaMemcpy(jac_data, model_data->jac_gpu_data, model_data->jac_size, cudaMemcpyDeviceToHost)); } - */ + } @@ -624,21 +619,22 @@ void rxn_calc_jac_gpu(ModelData *model_data, SUNMatrix jac, realtype time_step) /** \brief Free GPU data structures */ void free_gpu_cu() { +/* +printf("free gpu\n"); -printf("free"); - - HANDLE_ERROR(cudaFree(int_pointer_gpu)); - HANDLE_ERROR(cudaFree(double_pointer_gpu)); - HANDLE_ERROR(cudaFree(deriv_gpu_data)); - //HANDLE_ERROR(cudaFree(jac_gpu_data)); + HANDLE_ERROR(cudaFree(model_data->int_pointer_gpu)); + HANDLE_ERROR(cudaFree(model_data->double_pointer_gpu)); + HANDLE_ERROR(cudaFree(model_data->deriv_gpu_data)); + //HANDLE_ERROR(cudaFree(model_data->jac_gpu_data)); - if(few_data){ + if(model_data->few_data){ } else{ - HANDLE_ERROR(cudaFree(state_gpu)); - HANDLE_ERROR(cudaFree(rate_constants_gpu)); - HANDLE_ERROR(cudaFree(rate_constants_idx_gpu)); + HANDLE_ERROR(cudaFree(model_data->state_gpu)); + HANDLE_ERROR(cudaFree(model_data->rate_constants_gpu)); + HANDLE_ERROR(cudaFree(model_data->rate_constants_idx_gpu)); } + */ } /* Auxiliar functions */ diff --git a/src/cuda/camp_gpu_solver.h b/src/cuda/camp_gpu_solver.h index 08a84d2fe..bf51da0cc 100644 --- a/src/cuda/camp_gpu_solver.h +++ b/src/cuda/camp_gpu_solver.h @@ -23,9 +23,8 @@ //Force solving on CPU: Test option #define FORCE_CPU 0 -void solver_new_gpu_cu(int n_dep_var, int n_state_var, int n_rxn, +void solver_new_gpu_cu(ModelData *model_data, int n_dep_var, int n_state_var, int n_rxn, int n_rxn_int_param, int n_rxn_float_param, int n_rxn_env_param, int n_cells); -void allocate_jac_gpu(int n_jac_elem, int n_cells); void rxn_update_env_state_gpu(ModelData *model_data, double *env); void rxn_calc_deriv_gpu(ModelData *model_data, N_Vector deriv, realtype time_step); void rxn_calc_jac_gpu(ModelData *model_data, SUNMatrix jac, realtype time_step); diff --git a/test/chemistry/cb05cl_ae5/test_cb05cl_ae5.F90 b/test/chemistry/cb05cl_ae5/test_cb05cl_ae5.F90 index c46101cc3..a80c9d9b4 100644 --- a/test/chemistry/cb05cl_ae5/test_cb05cl_ae5.F90 +++ b/test/chemistry/cb05cl_ae5/test_cb05cl_ae5.F90 @@ -42,7 +42,7 @@ program pmc_test_cb05cl_ae5 ! CAMP-chem output file unit integer(kind=i_kind), parameter :: CAMP_FILE_UNIT = 12 ! Number of timesteps to integrate over - integer(kind=i_kind), parameter :: NUM_TIME_STEPS = 100 + integer(kind=i_kind), parameter :: NUM_TIME_STEPS = 2!100 ! Number of EBI-solver species integer(kind=i_kind), parameter :: NUM_EBI_SPEC = 72 ! Number of EBI-solever photolysis reactions diff --git a/test/chemistry/cb05cl_ae5/test_chemistry_cb05cl_ae5_gpu.sh b/test/chemistry/cb05cl_ae5/test_chemistry_cb05cl_ae5_gpu.sh new file mode 100755 index 000000000..b6e591b83 --- /dev/null +++ b/test/chemistry/cb05cl_ae5/test_chemistry_cb05cl_ae5_gpu.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +# exit on error +set -e +# turn on command echoing +set -v +# make sure that the current directory is the one where this script is +cd ${0%/*} +# make the output directory if it doesn't exist +mkdir -p out + +((counter = 1)) +while [ true ] +do + echo Attempt $counter + +if ! ../../../test_chemistry_cb05cl_ae5_gpu; then + echo Failure "$counter" + if [ "$counter" -gt 1 ] + then + echo FAIL + exit 1 + fi + echo retrying... + else + echo PASS + exit 0 + fi + ((counter++)) +done diff --git a/test/monarch/pmc_monarch_interface.F90 b/test/monarch/pmc_monarch_interface.F90 index a61559d39..211258da8 100644 --- a/test/monarch/pmc_monarch_interface.F90 +++ b/test/monarch/pmc_monarch_interface.F90 @@ -390,7 +390,6 @@ subroutine integrate(this, start_time, time_step, i_start, i_end, j_start, & state_size_per_cell = this%camp_core%state_size_per_cell() end if - #ifdef PMC_DEBUG ! Evaluate the Jacobian during solving solver_stats%eval_Jac = .true. @@ -435,7 +434,6 @@ subroutine integrate(this, start_time, time_step, i_start, i_end, j_start, & call this%camp_core%solve(this%camp_state, & real(time_step, kind=dp), solver_stats = solver_stats) - #ifdef PMC_DEBUG ! Check the Jacobian evaluations call warn_assert_msg(611569150, solver_stats%Jac_eval_fails.eq.0,& @@ -444,8 +442,6 @@ subroutine integrate(this, start_time, time_step, i_start, i_end, j_start, & trim( to_string( start_time ) ) ) #endif - - ! Update the MONARCH tracer array with new species concentrations MONARCH_conc(i,j,k_flip,this%map_monarch_id(:)) = & this%camp_state%state_var(this%map_camp_id(:)) From c0b2f63ab5eec44c40aa90ec7b25c066fb49568b Mon Sep 17 00:00:00 2001 From: cguzman Date: Sat, 26 Oct 2019 21:17:09 +0200 Subject: [PATCH 3/4] Cleaning code. Prepared a folder debug for other issue --- CMakeLists.txt | 71 ++++++++--------- src/camp_debug.h | 4 + src/camp_solver.c | 11 +-- src/camp_solver_data.F90 | 3 +- src/cuda/camp_gpu_solver.cu | 79 ++++++++++--------- src/cuda/camp_gpu_solver.h | 1 + .../rxns_gpu/rxn_SIMPOL_phase_transfer.cu | 1 - src/cuda/rxns_gpu/rxn_aqueous_equilibrium.cu | 3 +- src/cuda/rxns_gpu/rxn_arrhenius.cu | 5 +- src/debug_and_stats/camp_debug_2.c | 24 ++++++ src/debug_and_stats/camp_debug_2.h | 12 +++ test/chemistry/cb05cl_ae5/test_cb05cl_ae5.F90 | 2 +- 12 files changed, 126 insertions(+), 90 deletions(-) create mode 100644 src/debug_and_stats/camp_debug_2.c create mode 100644 src/debug_and_stats/camp_debug_2.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 0bfacc425..ae7942af0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -246,17 +246,6 @@ else() set(MPI_TEST_FLAG serial) endif() -if(CUDA_FOUND) - add_test(test_rxn_arrhenius_mech_gpu ${CMAKE_BINARY_DIR}/test_run/unit_tests/input_files/run_rxn_arrhenius_gpu.sh ${MPI_TEST_FLAG}) - - add_test(test_chemistry_cb05cl_ae5_gpu ${CMAKE_BINARY_DIR}/test_run/chemistry/cb05cl_ae5/test_chemistry_cb05cl_ae5_gpu.sh ${MPI_TEST_FLAG}) - -endif() - -add_test(test_rxn_arrhenius_mech ${CMAKE_BINARY_DIR}/test_run/unit_tests/input_files/run_rxn_arrhenius.sh ${MPI_TEST_FLAG}) - -add_test(test_chemistry_cb05cl_ae5 ${CMAKE_BINARY_DIR}/test_run/chemistry/cb05cl_ae5/test_chemistry_cb05cl_ae5.sh ${MPI_TEST_FLAG}) - if (ENABLE_TESTS) if (ENABLE_JSON AND ENABLE_SUNDIALS) # New unit tests (UNDER DEVELOPMENT) @@ -283,6 +272,15 @@ if (ENABLE_TESTS) add_test(test_MONARCH_1 ${CMAKE_BINARY_DIR}/test_run/monarch/test_monarch_1.sh ${MPI_TEST_FLAG}) add_test(test_MONARCH_2 ${CMAKE_BINARY_DIR}/test_run/monarch/test_monarch_2.sh ${MPI_TEST_FLAG}) + if (CUDA_FOUND AND NOT ENABLE_GPU) + #todo: new unit_tests with gpu + add_test(test_rxn_arrhenius_mech_gpu ${CMAKE_BINARY_DIR}/test_run/unit_tests/input_files/run_rxn_arrhenius_gpu.sh ${MPI_TEST_FLAG}) + + #Relevant mechanisms tests + add_test(test_chemistry_cb05cl_ae5_gpu ${CMAKE_BINARY_DIR}/test_run/chemistry/cb05cl_ae5/test_chemistry_cb05cl_ae5_gpu.sh ${MPI_TEST_FLAG}) + + endif() + endif() add_test(test_additive_1 ${CMAKE_BINARY_DIR}/test_run/additive/test_additive_1.sh) @@ -435,9 +433,6 @@ set(STD_C_FLAGS "-std=c99") set(STD_CUDA_FLAGS "-dc -arch=compute_70 -code=sm_70") - - - if(${CMAKE_Fortran_COMPILER_ID} MATCHES "Intel") set(STD_F_FLAGS "-extend_source -warn truncated_source") elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "GNU") @@ -516,7 +511,8 @@ set(SUB_MODELS_SRC ${SUB_MODELS_F_SRC} ${SUB_MODELS_C_SRC}) set(CAMP_C_SRC src/camp_solver.c src/rxn_solver.c src/aero_phase_solver.c - src/aero_rep_solver.c src/sub_model_solver.c) + src/aero_rep_solver.c src/sub_model_solver.c + src/debug_and_stats/camp_debug_2.c) set_source_files_properties(${CAMP_C_SRC} PROPERTIES COMPILE_FLAGS ${STD_C_FLAGS}) @@ -587,7 +583,7 @@ target_link_libraries(partmclib ${NETCDF_LIBS} ${SUNDIALS_LIBS} set_target_properties(partmclib PROPERTIES OUTPUT_NAME partmc) -if (CUDA_FOUND) +if (CUDA_FOUND AND NOT ENABLE_GPU) add_library(partmclib_gpu ${CAMP_F_SRC} ${CAMP_C_SRC} ${AEROSOL_REPS_SRC} ${SUB_MODELS_SRC} ${REACTIONS_SRC} ${SUNDIALS_SRC} ${GSL_SRC} ${C_SORT_SRC} ${CAMP_CUDA_SRC} ${CAMP_CXX_SRC} ) @@ -671,7 +667,7 @@ add_executable(test_chemistry_cb05cl_ae5 test/chemistry/cb05cl_ae5/module_BSC_CHEM_DATA.F90) target_link_libraries(test_chemistry_cb05cl_ae5 partmclib) -if (CUDA_FOUND) +if (CUDA_FOUND AND NOT ENABLE_GPU) add_executable(test_chemistry_cb05cl_ae5_gpu test/chemistry/cb05cl_ae5/test_cb05cl_ae5.F90 ${CB5_EBI_SOLVER} ${CB5_KPP_SOLVER} @@ -687,28 +683,6 @@ add_executable(mock_monarch test/monarch/mock_monarch.F90 target_link_libraries(mock_monarch partmclib) - -set(UNIT_TEST_SRC - test/unit_tests/unit_test_data.F90 - test/unit_tests/unit_test_driver.F90) - -set(UNIT_TEST_RXN_ARRHENIUS_SRC - test/unit_tests/rxns/unit_test_rxn_arrhenius.F90 ${UNIT_TEST_SRC}) -set_source_files_properties(${UNIT_TEST_RXN_ARRHENIUS_SRC} PROPERTIES COMPILE_FLAGS ${STD_F_FLAGS}) -add_executable(unit_test_rxn_arrhenius ${UNIT_TEST_RXN_ARRHENIUS_SRC}) -target_compile_definitions(unit_test_rxn_arrhenius PRIVATE - "UNIT_TEST_MODULE_=pmc_unit_test_rxn_arrhenius \ - ;UNIT_TEST_TYPE_=unit_test_rxn_arrhenius_t()") -target_link_libraries(unit_test_rxn_arrhenius partmclib) - -if (CUDA_FOUND) - add_executable(unit_test_rxn_arrhenius_gpu ${UNIT_TEST_RXN_ARRHENIUS_SRC}) - target_compile_definitions(unit_test_rxn_arrhenius_gpu PRIVATE - "UNIT_TEST_MODULE_=pmc_unit_test_rxn_arrhenius \ - ;UNIT_TEST_TYPE_=unit_test_rxn_arrhenius_t()") - target_link_libraries(unit_test_rxn_arrhenius_gpu partmclib_gpu) -endif() - if (ENABLE_TESTS) ###################################################################### @@ -786,9 +760,26 @@ target_link_libraries(test_sub_model_ZSR_aerosol_water partmclib) ###################################################################### # New unit tests (UNDER DEVELOPMENT) +set(UNIT_TEST_SRC + test/unit_tests/unit_test_data.F90 + test/unit_tests/unit_test_driver.F90) +set(UNIT_TEST_RXN_ARRHENIUS_SRC + test/unit_tests/rxns/unit_test_rxn_arrhenius.F90 ${UNIT_TEST_SRC}) +set_source_files_properties(${UNIT_TEST_RXN_ARRHENIUS_SRC} PROPERTIES COMPILE_FLAGS ${STD_F_FLAGS}) +add_executable(unit_test_rxn_arrhenius ${UNIT_TEST_RXN_ARRHENIUS_SRC}) +target_compile_definitions(unit_test_rxn_arrhenius PRIVATE + "UNIT_TEST_MODULE_=pmc_unit_test_rxn_arrhenius \ + ;UNIT_TEST_TYPE_=unit_test_rxn_arrhenius_t()") +target_link_libraries(unit_test_rxn_arrhenius partmclib) - +if (CUDA_FOUND AND NOT ENABLE_GPU) + add_executable(unit_test_rxn_arrhenius_gpu ${UNIT_TEST_RXN_ARRHENIUS_SRC}) + target_compile_definitions(unit_test_rxn_arrhenius_gpu PRIVATE + "UNIT_TEST_MODULE_=pmc_unit_test_rxn_arrhenius \ + ;UNIT_TEST_TYPE_=unit_test_rxn_arrhenius_t()") + target_link_libraries(unit_test_rxn_arrhenius_gpu partmclib_gpu) +endif() ###################################################################### # test_camp_core diff --git a/src/camp_debug.h b/src/camp_debug.h index 58bac669d..5f86fe1e6 100644 --- a/src/camp_debug.h +++ b/src/camp_debug.h @@ -5,6 +5,8 @@ * Header file with some debugging functions for use with camp_solver.c * */ +#ifndef CAMP_DEBUG_H +#define CAMP_DEBUG_H #ifdef PMC_DEBUG #define PMC_DEBUG_SPEC_ 0 #define PMC_DEBUG_PRINT(x) \ @@ -146,3 +148,5 @@ static void print_derivative(N_Vector deriv) { printf(" index: %d \n", i); } } + +#endif \ No newline at end of file diff --git a/src/camp_solver.c b/src/camp_solver.c index a90fe29a1..08e11f641 100644 --- a/src/camp_solver.c +++ b/src/camp_solver.c @@ -964,7 +964,8 @@ int Jac(realtype t, N_Vector y, N_Vector deriv, SUNMatrix J, void *solver_data, #ifdef PMC_USE_GPU // Calculate the Jacobian - rxn_calc_jac_gpu(md, J, time_step); + //TODO: Fix jacobian mapping with jac_map[i_map].rxn_id + //rxn_calc_jac_gpu(md, J, time_step); #endif #ifdef PMC_DEBUG @@ -973,7 +974,6 @@ int Jac(realtype t, N_Vector y, N_Vector deriv, SUNMatrix J, void *solver_data, #endif // Solving on CPU only - // Loop over the grid cells to calculate sub-model and rxn Jacobians for (int i_cell = 0; i_cell < n_cells; ++i_cell) { // Set the grid cell state pointers @@ -1004,19 +1004,20 @@ int Jac(realtype t, N_Vector y, N_Vector deriv, SUNMatrix J, void *solver_data, clock_t start = clock(); #endif -#ifndef PMC_USE_GPU +//#ifdef PMC_USE_GPU // Calculate the reaction Jacobian rxn_calc_jac(md, J_rxn_data, time_step); PMC_DEBUG_JAC(md->J_rxn, "reaction Jacobian"); -#else +/*#else // Add contributions from reactions not implemented on GPU rxn_calc_jac_specific_types(md, J_rxn_data, time_step); PMC_DEBUG_JAC(md->J_rxn, "reaction Jacobian"); #endif -// rxn_calc_jac_specific_types(md, J_rxn_data, time_step); +rxn_calc_jac_specific_types(md, J_rxn_data, time_step); +*/ #ifdef PMC_DEBUG clock_t end = clock(); sd->timeJac += (end - start); diff --git a/src/camp_solver_data.F90 b/src/camp_solver_data.F90 index cb694c908..5bfb4c50a 100644 --- a/src/camp_solver_data.F90 +++ b/src/camp_solver_data.F90 @@ -888,7 +888,8 @@ subroutine solve(this, camp_state, t_initial, t_final, solver_stats) end if ! Reset the solver function timers - call this%reset_timers( ) + !Move this to intialization: I want to print only 1 time the deriv, not all times we call solve... + !call this%reset_timers( ) #endif ! Run the solver diff --git a/src/cuda/camp_gpu_solver.cu b/src/cuda/camp_gpu_solver.cu index a05cbe017..990bbe7c7 100644 --- a/src/cuda/camp_gpu_solver.cu +++ b/src/cuda/camp_gpu_solver.cu @@ -27,30 +27,12 @@ extern "C" { int max_n_gpu_thread; int max_n_gpu_blocks; -//General data -/*double *model_data->deriv_gpu_data; -double *model_data->deriv_cpu; -double *model_data->jac_gpu_data; -double *model_data->jac_cpu; -//size_t model_data->deriv_size; -size_t model_data->jac_size; -size_t model_data->state_size; -size_t model_data->env_size; -size_t model_data->rate_constants_size; -size_t model_data->rate_constants_idx_size; - - -bool model_data->few_data; -bool model_data->implemented_all; -int *model_data->int_pointer; -int *model_data->int_pointer_gpu; -double *model_data->double_pointer; -double *model_data->double_pointer_gpu; -double *model_data->state_gpu; -double *model_data->env_gpu; -double *model_data->rate_constants_gpu; -int *model_data->rate_constants_idx_gpu; -*/ +//Debug info +int counterDeriv; // Total calls to f() +int counterJac; // Total calls to Jac() +clock_t timeDeriv; // Compute time for calls to f() +clock_t timeJac; // Compute time for calls to Jac() + static void HandleError(cudaError_t err, const char *file, @@ -85,9 +67,6 @@ void solver_new_gpu_cu(ModelData *model_data, int n_dep_var, int n_state_var, int n_rxn, int n_rxn_int_param, int n_rxn_float_param, int n_rxn_env_param, int n_cells) { -//Get Global gpu data - //DataGPU *dgpu = (DataGPU *)malloc(sizeof(DataGPU)); - //Lengths model_data->state_size = n_state_var * n_cells * sizeof(double); model_data->deriv_size = n_dep_var * n_cells * sizeof(double); @@ -96,14 +75,13 @@ void solver_new_gpu_cu(ModelData *model_data, int n_dep_var, model_data->rate_constants_idx_size = (n_rxn+1) * sizeof(int); model_data->few_data = 0; model_data->implemented_all = true; - - printf("new deriv_lenght %d, cells %d\n", model_data->deriv_size/sizeof(double), n_cells); + counterDeriv=0; + counterJac=0; //Detect if we are working with few data values if (n_dep_var*n_cells < DATA_SIZE_LIMIT_OPT){ model_data->few_data = 1; } - //model_data->few_data = 1; //Set working GPU: we have 4 gpu available on power9. as default, it should be assign to gpu 0 int device=0; @@ -233,8 +211,6 @@ void solver_set_rxn_data_gpu(ModelData *model_data) { } - //todo checkout chem_mod after 99 merge, save the changed files of this branch (like gpu) copy into checkout chem_mod - //Add a for to search the biggest distance int_max_length (ptrs[i] - ptrs[i-1] //Total lengths of rxn structure @@ -407,6 +383,14 @@ __global__ void solveDerivative(double *state_init, double *deriv_init, } +static void print_derivative_3(N_Vector deriv) { + // printf(" deriv length: %d\n", NV_LENGTH_S(deriv)); + for (int i = 0; i < NV_LENGTH_S(deriv); i++) { // NV_LENGTH_S(deriv) + printf(" deriv: % -le", NV_DATA_S(deriv)[i]); + printf(" index: %d \n", i); + } +} + /** \brief Calculate the time derivative \f$f(t,y)\f$ on GPU * * \param model_data Pointer to the model data @@ -443,6 +427,7 @@ void rxn_calc_deriv_gpu(ModelData *model_data, N_Vector deriv, realtype time_ste HANDLE_ERROR(cudaMemset(model_data->deriv_gpu_data, 0.0, model_data->deriv_size)); + //TODO: execute this asyncrhonous and let CPU thinks be computed in the meanwhile solveDerivative << < (n_blocks), max_n_gpu_thread >> > (model_data->state_gpu, model_data->deriv_gpu_data, time_step, model_data->n_per_cell_dep_var, model_data->n_per_cell_state_var, model_data->n_rxn_env_data, @@ -460,7 +445,12 @@ void rxn_calc_deriv_gpu(ModelData *model_data, N_Vector deriv, realtype time_ste HANDLE_ERROR(cudaMemcpy(deriv_data, model_data->deriv_gpu_data, model_data->deriv_size, cudaMemcpyDeviceToHost)); } - //printf("deriv deriv_lenght %d, cells %d\n", model_data->deriv_size/sizeof(double), n_cells); + //if(counterDeriv==0) print_derivative_3(deriv); + + //counterDeriv++; + +//todo: add mock_monarch as test gpu? +//todo: fix debug things... this camp_debug.h is horrible, and probably it will be a better option to print timederiv or counter... } @@ -477,7 +467,6 @@ void rxn_calc_deriv_gpu(ModelData *model_data, N_Vector deriv, realtype time_ste * \param model_data->double_pointer Pointer to double reaction data * \param rate_constants_init Pointer to first value of reaction rates */ -//TODO: fix jacGPU once matt let me modify rxn_solver and reduce jacobians (in v2.0 or before if needed) __global__ void solveJacobian(double *state_init, double *jac_init, double time_step, int jac_length_cell, int state_size_cell, int rate_constants_size_cell, int n_rxn, @@ -515,8 +504,8 @@ __global__ void solveJacobian(double *state_init, double *jac_init, // state, jac_data, (void *) rxn_data, float_data, time_step, n_rxn); break; case RXN_ARRHENIUS : - //rxn_gpu_arrhenius_calc_jac_contrib(rate_constants, - // state, jac_data, (void *) rxn_data, float_data, time_step,n_rxn); + rxn_gpu_arrhenius_calc_jac_contrib(rate_constants, + state, jac_data, (void *) rxn_data, float_data, time_step,n_rxn); break; case RXN_CMAQ_H2O2 : rxn_gpu_CMAQ_H2O2_calc_jac_contrib(rate_constants, @@ -563,15 +552,29 @@ __global__ void solveJacobian(double *state_init, double *jac_init, } } +static void print_jacobian_3(SUNMatrix M) { + printf("\n NNZ JAC: %lld \n", SM_NNZ_S(M)); + printf("DATA | INDEXVALS:\n"); + for (int i = 0; i < SM_NNZ_S(M); i++) { + printf("% -le \n", (SM_DATA_S(M))[i]); + // printf("%lld \n", (SM_INDEXVALS_S(M))[i]); + } + printf("PTRS:\n"); + for (int i = 0; i <= SM_NP_S(M); i++) { + //printf("%lld \n", (SM_INDEXPTRS_S(M))[i]); + } +} + /** \brief Calculate the Jacobian on GPU * * \param model_data Pointer to the model data * \param J Jacobian to be calculated * \param time_step Current model time step (s) */ - void rxn_calc_jac_gpu(ModelData *model_data, SUNMatrix jac, realtype time_step) { + //TODO: Fix jacobian with jac_ids... + // Get a pointer to the jacobian data double *jac_data = SM_DATA_S(jac); int n_cells = model_data->n_cells; @@ -611,7 +614,9 @@ void rxn_calc_jac_gpu(ModelData *model_data, SUNMatrix jac, realtype time_step) HANDLE_ERROR(cudaMemcpy(jac_data, model_data->jac_gpu_data, model_data->jac_size, cudaMemcpyDeviceToHost)); } + if(counterJac==0)print_jacobian_3(jac); + counterJac++; } diff --git a/src/cuda/camp_gpu_solver.h b/src/cuda/camp_gpu_solver.h index bf51da0cc..e531fab94 100644 --- a/src/cuda/camp_gpu_solver.h +++ b/src/cuda/camp_gpu_solver.h @@ -12,6 +12,7 @@ #include #include #include "../camp_common.h" +#include "../debug_and_stats/camp_debug_2.h" //Value to consider data size too big -> Memory optimization will change below and under the limit #define DATA_SIZE_LIMIT_OPT 2000 diff --git a/src/cuda/rxns_gpu/rxn_SIMPOL_phase_transfer.cu b/src/cuda/rxns_gpu/rxn_SIMPOL_phase_transfer.cu index a944e0f6d..85220dcec 100644 --- a/src/cuda/rxns_gpu/rxn_SIMPOL_phase_transfer.cu +++ b/src/cuda/rxns_gpu/rxn_SIMPOL_phase_transfer.cu @@ -17,7 +17,6 @@ extern "C"{ #define TEMPERATURE_K_ env_data[0] #define PRESSURE_PA_ env_data[1] - //todo fix // Universal gas constant (J/mol/K) #define UNIV_GAS_CONST_ 8.314472 // Small number for ignoring low concentrations diff --git a/src/cuda/rxns_gpu/rxn_aqueous_equilibrium.cu b/src/cuda/rxns_gpu/rxn_aqueous_equilibrium.cu index e6dd78f45..4746ba708 100644 --- a/src/cuda/rxns_gpu/rxn_aqueous_equilibrium.cu +++ b/src/cuda/rxns_gpu/rxn_aqueous_equilibrium.cu @@ -143,8 +143,7 @@ __device__ double rxn_gpu_aqueous_equilibrium_calc_overall_rate(int *rxn_data, */ #ifdef PMC_USE_SUNDIALS -//TODO: seems this implementation ifdef raise a warning of host__ device__ function with new tests gpu -//TODO: O a lo mejor es xk en el .h no esta asi (diria que si) +//TODO: Apply this to cpu reactions, merging cpu and gpu code //#ifdef PMC_USE_GPU //__device__ //#endif diff --git a/src/cuda/rxns_gpu/rxn_arrhenius.cu b/src/cuda/rxns_gpu/rxn_arrhenius.cu index fe478c2c4..22504e4b5 100644 --- a/src/cuda/rxns_gpu/rxn_arrhenius.cu +++ b/src/cuda/rxns_gpu/rxn_arrhenius.cu @@ -244,15 +244,14 @@ __device__ void rxn_gpu_arrhenius_calc_jac_contrib(double *rate_constants, doubl int n_rxn=n_rxn2; int *int_data = (int*) rxn_data; double *float_data = double_pointer_gpu; - double rate; // Calculate the reaction rate //double rate = RATE_CONSTANT_; -//TODO FIX THIS, ITS NOT WORKING WITH NEW UNIT TEST IDK WHYYYYYYY +//TODO FIX THIS, ITS NOT WORKING NOW WHYYYY (i guess in branch 114 it was working ) // Add contributions to the Jacobian int i_elem = 0; for (int i_ind=0; i_ind +#include +#include "camp_debug_2.h" + +/** \brief Print derivative array + * + * \param deriv Derivative array + */ +static void print_derivative_2(N_Vector deriv) { + // printf(" deriv length: %d\n", NV_LENGTH_S(deriv)); + for (int i = 0; i < NV_LENGTH_S(deriv); i++) { // NV_LENGTH_S(deriv) + printf(" deriv: % -le", NV_DATA_S(deriv)[i]); + printf(" index: %d \n", i); + } +} \ No newline at end of file diff --git a/src/debug_and_stats/camp_debug_2.h b/src/debug_and_stats/camp_debug_2.h new file mode 100644 index 000000000..2744e341c --- /dev/null +++ b/src/debug_and_stats/camp_debug_2.h @@ -0,0 +1,12 @@ +// +// Created by cguzman on 10/24/19. +// + +#ifndef CAMP_DEBUG_2_H +#define CAMP_DEBUG_2_H + +#include "../camp_common.h" + +static void print_derivative_2(N_Vector deriv); + +#endif //CAMP_DEBUG_2_H diff --git a/test/chemistry/cb05cl_ae5/test_cb05cl_ae5.F90 b/test/chemistry/cb05cl_ae5/test_cb05cl_ae5.F90 index a80c9d9b4..c46101cc3 100644 --- a/test/chemistry/cb05cl_ae5/test_cb05cl_ae5.F90 +++ b/test/chemistry/cb05cl_ae5/test_cb05cl_ae5.F90 @@ -42,7 +42,7 @@ program pmc_test_cb05cl_ae5 ! CAMP-chem output file unit integer(kind=i_kind), parameter :: CAMP_FILE_UNIT = 12 ! Number of timesteps to integrate over - integer(kind=i_kind), parameter :: NUM_TIME_STEPS = 2!100 + integer(kind=i_kind), parameter :: NUM_TIME_STEPS = 100 ! Number of EBI-solver species integer(kind=i_kind), parameter :: NUM_EBI_SPEC = 72 ! Number of EBI-solever photolysis reactions From ea78eecd9e1c8bed5c1aa3755d2e738e77de534e Mon Sep 17 00:00:00 2001 From: cguzman Date: Sat, 26 Oct 2019 21:38:28 +0200 Subject: [PATCH 4/4] Fixing format --- src/camp_common.h | 8 ++++---- src/camp_solver.c | 17 +++++++++-------- src/debug_and_stats/camp_debug_2.c | 6 +++--- src/debug_and_stats/camp_debug_2.h | 2 +- src/rxn_solver.c | 2 +- 5 files changed, 18 insertions(+), 17 deletions(-) diff --git a/src/camp_common.h b/src/camp_common.h index d8e63724d..68ecee258 100644 --- a/src/camp_common.h +++ b/src/camp_common.h @@ -61,7 +61,7 @@ typedef struct { /* GPU data structure */ typedef struct { - size_t deriv_size; + size_t deriv_size; } DataGPU; @@ -173,8 +173,8 @@ typedef struct { // for the current grid cell int n_sub_model_env_data; // Number of sub model environmental parameters // from all sub models -//#ifdef CUDA_FOUND -// GPU data + //#ifdef CUDA_FOUND + // GPU data double *deriv_gpu_data; double *deriv_cpu; double *jac_gpu_data; @@ -196,7 +196,7 @@ typedef struct { double *rate_constants_gpu; int *rate_constants_idx_gpu; -//#endif + //#endif } ModelData; /* Solver data structure */ diff --git a/src/camp_solver.c b/src/camp_solver.c index 08e11f641..bdae8594b 100644 --- a/src/camp_solver.c +++ b/src/camp_solver.c @@ -337,8 +337,9 @@ void *solver_new(int n_state_var, int n_cells, int *var_type, int n_rxn, sd->model_data.sub_model_env_idx[0] = 0; #ifdef PMC_USE_GPU - solver_new_gpu_cu(&(sd->model_data), n_dep_var, n_state_var, n_rxn, n_rxn_int_param, - n_rxn_float_param, n_rxn_env_param, n_cells); + solver_new_gpu_cu(&(sd->model_data), n_dep_var, n_state_var, n_rxn, + n_rxn_int_param, n_rxn_float_param, n_rxn_env_param, + n_cells); #endif #ifdef PMC_DEBUG @@ -876,8 +877,8 @@ int f(realtype t, N_Vector y, N_Vector deriv, void *solver_data) { // Calculate the time derivative f(t,y) rxn_calc_deriv(md, deriv_data, (double)time_step); #else - // Add contributions from reactions not implemented on GPU - rxn_calc_deriv_specific_types(md, deriv_data, (double)time_step); + // Add contributions from reactions not implemented on GPU + rxn_calc_deriv_specific_types(md, deriv_data, (double)time_step); #endif #ifdef PMC_DEBUG @@ -964,8 +965,8 @@ int Jac(realtype t, N_Vector y, N_Vector deriv, SUNMatrix J, void *solver_data, #ifdef PMC_USE_GPU // Calculate the Jacobian - //TODO: Fix jacobian mapping with jac_map[i_map].rxn_id - //rxn_calc_jac_gpu(md, J, time_step); + // TODO: Fix jacobian mapping with jac_map[i_map].rxn_id + // rxn_calc_jac_gpu(md, J, time_step); #endif #ifdef PMC_DEBUG @@ -1004,7 +1005,7 @@ int Jac(realtype t, N_Vector y, N_Vector deriv, SUNMatrix J, void *solver_data, clock_t start = clock(); #endif -//#ifdef PMC_USE_GPU + //#ifdef PMC_USE_GPU // Calculate the reaction Jacobian rxn_calc_jac(md, J_rxn_data, time_step); @@ -1865,7 +1866,7 @@ void error_handler(int error_code, const char *module, const char *function, */ void model_free(ModelData model_data) { #ifdef PMC_USE_GPU - free_gpu_cu(); + free_gpu_cu(); #endif #ifdef PMC_USE_SUNDIALS diff --git a/src/debug_and_stats/camp_debug_2.c b/src/debug_and_stats/camp_debug_2.c index 0fd548d1e..6fdcabc0a 100644 --- a/src/debug_and_stats/camp_debug_2.c +++ b/src/debug_and_stats/camp_debug_2.c @@ -5,11 +5,11 @@ * Debug and stats functions * */ -//todo fix this...move all to this folder and compile correctly... -//todo: i think is not necessary include all time stdio.h +// todo fix this...move all to this folder and compile correctly... +// todo: i think is not necessary include all time stdio.h +#include "camp_debug_2.h" #include #include -#include "camp_debug_2.h" /** \brief Print derivative array * diff --git a/src/debug_and_stats/camp_debug_2.h b/src/debug_and_stats/camp_debug_2.h index 2744e341c..71bf7a09e 100644 --- a/src/debug_and_stats/camp_debug_2.h +++ b/src/debug_and_stats/camp_debug_2.h @@ -9,4 +9,4 @@ static void print_derivative_2(N_Vector deriv); -#endif //CAMP_DEBUG_2_H +#endif // CAMP_DEBUG_2_H diff --git a/src/rxn_solver.c b/src/rxn_solver.c index 6809e970a..6889ea301 100644 --- a/src/rxn_solver.c +++ b/src/rxn_solver.c @@ -510,7 +510,7 @@ void rxn_calc_jac_specific_types(ModelData *model_data, double *J_data, rxn_int_data, rxn_float_data, rxn_env_data, time_step); break; - case RXN_ARRHENIUS: //TODO delete arrhenius when it works + case RXN_ARRHENIUS: // TODO delete arrhenius when it works rxn_arrhenius_calc_jac_contrib(model_data, J_data, rxn_int_data, rxn_float_data, rxn_env_data, time_step); break;