From f39c65f043375e7e88c826dafdea57ca45f478d0 Mon Sep 17 00:00:00 2001 From: Yu Feng Date: Fri, 11 Sep 2015 14:26:09 -0700 Subject: [PATCH] Add a buggy r2c benchmarker. The FFTW runs are giving wrong results. Also PADDED is added manually. --- tests/Makefile.am | 1 + tests/bench_r2c.c | 506 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 507 insertions(+) create mode 100644 tests/bench_r2c.c diff --git a/tests/Makefile.am b/tests/Makefile.am index 9cdef9f..b624b1b 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -122,6 +122,7 @@ check_PROGRAMS += \ check_PROGRAMS += \ bench_c2c \ + bench_r2c \ time_c2c \ time_c2c_transposed endif diff --git a/tests/bench_r2c.c b/tests/bench_r2c.c new file mode 100644 index 0000000..b3c1fc3 --- /dev/null +++ b/tests/bench_r2c.c @@ -0,0 +1,506 @@ +#include +#include + +static void measure_pfft( + const ptrdiff_t *n, MPI_Comm comm_cart_3d, int loops, + unsigned pfft_opt_flags, int transposed, int inplace, int verbose, + int print_timer); +static void measure_fftw( + const ptrdiff_t *n, int parallel, int loops, + unsigned fftw_opt_flags, int transposed, int inplace, int verbose); +static void loop_pfft_tests( + ptrdiff_t *n, MPI_Comm comm, int loops, + unsigned pfft_flags, int transposed, int inplace, int verbose, int cmp_flags, + int print_timer); +static void loop_fftw_tests( + ptrdiff_t *n, int parallel, int loops, int transposed, int inplace, int verbose); +static void init_parameters( + int argc, char **argv, + ptrdiff_t *n, int *np, + unsigned *pfft_flags, + int *loops, int *transposed, int *verbose, int *inplace, + int *cmp_fftw, int *cmp_decomp, int *cmp_flags, + int *print_timer); + +int main(int argc, char **argv) +{ + int parallel; + MPI_Comm comm_cart_1d, comm_cart_2d, comm_cart_3d; + + /* Set size of FFT and process mesh */ + ptrdiff_t n[3] = {32,32,32}; + int np[3] = {1,1,1}; + int loops = 1; + int verbose = 0; + int inplace = 0; + int cmp_fftw = 0; + int cmp_decomp = 0; + int cmp_flags = 0; + int transposed = 0; + int print_timer = 0; + unsigned pfft_flags = 0; + + /* Initialize MPI and PFFT */ + MPI_Init(&argc, &argv); + pfft_init(); + + /* set parameters by command line */ + init_parameters(argc, argv, n, np, &pfft_flags, &loops, &transposed, &verbose, &inplace, &cmp_fftw, &cmp_decomp, &cmp_flags, &print_timer); + + /* Create three-dimensional process grid of size np[0] x np[1] x np[2], if possible */ + if( pfft_create_procmesh(3, MPI_COMM_WORLD, np, &comm_cart_3d) ){ + pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: Procmesh of size %d x %d x %d does not fit to number of allocated processes.\n", np[0], np[1], np[2]); + pfft_fprintf(MPI_COMM_WORLD, stderr, " Please allocate %d processes (mpiexec -np %d ...) or change the procmesh (with -pfft_np * * *).\n", np[0]*np[1]*np[2], np[0]*np[1]*np[2]); + MPI_Finalize(); + return 1; + } + + int num_serial_dims = (np[0]==1) + (np[1]==1) + (np[2]==1); + + if( cmp_decomp || num_serial_dims==0){ + pfft_printf(MPI_COMM_WORLD, "* PFFT runtimes (3d data decomposition):\n"); + loop_pfft_tests(n, comm_cart_3d, loops, pfft_flags, transposed, inplace, verbose, cmp_flags, print_timer); + pfft_printf(MPI_COMM_WORLD, "\n"); + MPI_Comm_free(&comm_cart_3d); + } + + /* run 2d-data decomposition if possible */ + if( num_serial_dims >= 1 ){ + if( cmp_decomp || num_serial_dims==1){ + /* move serial dims to the end */ + if(np[1]==1){ np[1] = np[2]; np[2] = 1; } + if(np[0]==1){ np[0] = np[1]; np[1] = 1; } + if(np[1]==1){ np[1] = np[2]; np[2] = 1; } + + if( pfft_create_procmesh(2, MPI_COMM_WORLD, np, &comm_cart_2d) ) + pfft_printf(MPI_COMM_WORLD, "Error in creation of 2d procmesh of size %d x %d\n", np[0], np[1]); + pfft_printf(MPI_COMM_WORLD, "* PFFT runtimes (2d data decomposition):\n"); + loop_pfft_tests(n, comm_cart_2d, loops, pfft_flags, transposed, inplace, verbose, cmp_flags, print_timer); + pfft_printf(MPI_COMM_WORLD, "\n"); + MPI_Comm_free(&comm_cart_2d); + } + } + + /* run 1d-data decomposition if possible */ + if( num_serial_dims >= 2 ){ + /* move serial dims to the end */ + if(np[1]==1){ np[1] = np[2]; np[2] = 1; } + if(np[0]==1){ np[0] = np[1]; np[1] = 1; } + + if( pfft_create_procmesh(1, MPI_COMM_WORLD, np, &comm_cart_1d) ) + pfft_printf(MPI_COMM_WORLD, "Error in creation of 2d procmesh of size %d\n", np[0]); + pfft_printf(MPI_COMM_WORLD, "* PFFT runtimes (1d data decomposition):\n"); + loop_pfft_tests(n, comm_cart_1d, loops, pfft_flags, transposed, inplace, verbose, cmp_flags, print_timer); + pfft_printf(MPI_COMM_WORLD, "\n"); + MPI_Comm_free(&comm_cart_1d); + + if(cmp_fftw){ + pfft_printf(MPI_COMM_WORLD, "* FFTW_MPI runtimes (1d data decomposition):\n"); + loop_fftw_tests(n, parallel=1, loops, transposed, inplace, verbose); + } + } + + /* run serial if possible */ + if( np[0]*np[1]*np[2] == 1 ){ + if(cmp_fftw){ + pfft_printf(MPI_COMM_WORLD, "* serial FFTW runtimes (no data decomposition at all):\n"); + loop_fftw_tests(n, parallel=0, loops, transposed, inplace, verbose); + pfft_printf(MPI_COMM_WORLD, "\n"); + } + } + + /* free mem and finalize */ + MPI_Finalize(); + return 0; +} + +static void loop_pfft_tests( + ptrdiff_t *n, MPI_Comm comm, int loops, + unsigned pfft_flags, int transposed, int inplace, int verbose, int cmp_flags, + int print_timer + ) +{ + + unsigned tune, measure, destroy; + + if(!cmp_flags){ + measure_pfft(n, comm, loops, pfft_flags, transposed, inplace, verbose, print_timer); + return; + } + + destroy = 0; + for(int k=0; k<2; k++){ + measure = PFFT_ESTIMATE; + for(int l=0; l<2; l++){ + tune = PFFT_NO_TUNE; + for(int m=0; m<2; m++){ + measure_pfft(n, comm, loops, tune | measure | destroy, transposed, inplace, verbose, print_timer); + tune = PFFT_TUNE; + } + measure = PFFT_MEASURE; + } + if(inplace) break; + destroy = PFFT_DESTROY_INPUT; + } +} + +static void loop_fftw_tests( + ptrdiff_t *n, int parallel, int loops, int transposed, int inplace, int verbose + ) +{ + unsigned measure, destroy; + + destroy = 0; + for(int k=0; k<2; k++){ + measure = FFTW_ESTIMATE; + for(int l=0; l<2; l++){ + measure_fftw(n, parallel, loops, measure | destroy, transposed, inplace, verbose); + measure = FFTW_MEASURE; + } + if(inplace) break; + destroy = FFTW_DESTROY_INPUT; + } +} + +static void measure_pfft( + const ptrdiff_t *n, MPI_Comm comm_cart, int loops, + unsigned pfft_opt_flags, int transposed, int inplace, int verbose, + int print_timer + ) +{ + ptrdiff_t alloc_local; + ptrdiff_t local_ni[3], local_i_start[3]; + ptrdiff_t local_no[3], local_o_start[3]; + double err, timer[4]; + double *in; + pfft_complex *out; + pfft_plan plan_forw=NULL, plan_back=NULL; + unsigned tr_in = (transposed) ? PFFT_TRANSPOSED_IN : PFFT_TRANSPOSED_NONE | PFFT_PADDED_C2R; + unsigned tr_out = (transposed) ? PFFT_TRANSPOSED_OUT : PFFT_TRANSPOSED_NONE | PFFT_PADDED_R2C; + + /* Get parameters of data distribution */ + alloc_local = pfft_local_size_dft_r2c_3d(n, comm_cart, tr_out, + local_ni, local_i_start, local_no, local_o_start); + + /* Allocate memory */ + in = pfft_alloc_complex(alloc_local); + if(inplace) out = in; + else out = pfft_alloc_complex(alloc_local); + + /* Plan parallel forward FFT */ + timer[0] = -MPI_Wtime(); + plan_forw = pfft_plan_dft_r2c_3d( + n, in, out, comm_cart, PFFT_FORWARD, tr_out | pfft_opt_flags); + timer[0] += MPI_Wtime(); + + /* Plan parallel backward FFT */ + timer[1] = -MPI_Wtime(); + plan_back = pfft_plan_dft_c2r_3d( + n, out, in, comm_cart, PFFT_BACKWARD, tr_in | pfft_opt_flags); + timer[1] += MPI_Wtime(); + + /* Initialize input with random numbers */ + pfft_init_input_real_3d(n, local_ni, local_i_start, + in); + + if(verbose) + pfft_apr_real_3d(in, local_ni, local_i_start, "PFFT Input", comm_cart); + + /* execute parallel forward FFT */ + timer[2] = -MPI_Wtime(); + for(int t=0; t