From 6b555754421bc1db64562f065d39b308b142967d Mon Sep 17 00:00:00 2001 From: Marcus Petschlies Date: Tue, 6 Dec 2016 17:17:03 +0100 Subject: [PATCH 1/8] my changes --- wrapper/lib_wrapper.c | 1 + 1 file changed, 1 insertion(+) diff --git a/wrapper/lib_wrapper.c b/wrapper/lib_wrapper.c index 5353ebda4..eb6847261 100644 --- a/wrapper/lib_wrapper.c +++ b/wrapper/lib_wrapper.c @@ -22,6 +22,7 @@ * Author: Carsten Urbach * curbach@gmx.de * + * *******************************************************************************/ #include From 68a08eecb08bc78095ba27753da6beb5fb9b516c Mon Sep 17 00:00:00 2001 From: Marcus Petschlies Date: Tue, 20 Dec 2016 13:31:07 +0100 Subject: [PATCH 2/8] added communicator to tmLQCD_mpi_params struct; added tmLQCD_deflator_params struct and function prototypes for exact deflation --- include/tmLQCD.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/tmLQCD.h b/include/tmLQCD.h index b0c7ea58e..7c4dcd464 100755 --- a/include/tmLQCD.h +++ b/include/tmLQCD.h @@ -41,6 +41,9 @@ extern "C" typedef struct { unsigned int nproc, nproc_t, nproc_x, nproc_y, nproc_z, cart_id, proc_id, time_rank, omp_num_threads; unsigned int proc_coords[4]; +#ifdef TM_USE_MPI + MPI_Comm cart_grid; +#endif } tmLQCD_mpi_params; int tmLQCD_invert_init(int argc, char *argv[], const int verbose, const int external_id); @@ -58,6 +61,12 @@ extern "C" const int op_id, const int gauge_persist); #endif + int tmLQCD_invert_eo(double * const propagator, double * const source, const int op_id); + + int tmLQCD_get_deflator_params(tmLQCD_deflator_params*params, const int op_id); + int tmLQCD_init_deflator(const int op_id); + int tmLQCD_set_deflator_fields(const int op_id1, const int op_id2); + #ifdef __cplusplus } #endif From 88e9d8defa75db1ff2b7c9403c98455823623656 Mon Sep 17 00:00:00 2001 From: Marcus Petschlies Date: Thu, 22 Dec 2016 09:25:52 +0100 Subject: [PATCH 3/8] added interface functions for exact deflation in wrapper/lib_wrapper.c --- wrapper/lib_wrapper.c | 172 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 172 insertions(+) diff --git a/wrapper/lib_wrapper.c b/wrapper/lib_wrapper.c index eb6847261..73dd86138 100644 --- a/wrapper/lib_wrapper.c +++ b/wrapper/lib_wrapper.c @@ -369,3 +369,175 @@ int tmLQCD_get_gauge_field_pointer(double ** gf) { return(0); } + + +/*************************************************************************************** + * invert even-odd preconditioned operator directly + * propagator and source must be in even-odd ordering already + ***************************************************************************************/ +int tmLQCD_invert_eo(double * const propagator, double * const source, const int op_id) { + int iter; + spinor * const p_ptr = (spinor * const)propagator; + spinor * const s_ptr = (spinor * const)source; + + + if(!tmLQCD_invert_initialised) { + fprintf(stderr, "[tmLQCD_invert_eo] tmLQCD_inver_init must be called first. Aborting...\n"); + fflush(stderr); + return(-1); + } + + if(op_id < 0 || op_id >= no_operators) { + fprintf(stderr, "[tmLQCD_invert_eo] op_id=%d not in valid range. Aborting...\n", op_id); + fflush(stderr); + return(-1); + } + + if (operator_list[op_id].solver != EXACTDEFLATEDCG ) { + fprintf(stderr, "[tmLQCD_invert_eo] Error, wrong solver type in operator\n"); + fflush(stderr); + return(-2); + } + + if(g_proc_id == 0) { + printf("# [tmLQCD_invert_eo] Using eo-prec., symmetrized, deflated cg!\n"); + fflush(stdout); + } + + /**************************************** + * - initialize parameters; this is otherwise + * done in operator.c or invert.c + * - + ****************************************/ + boundary(operator_list[op_id].kappa); + g_mu = operator_list[op_id].mu; + g_c_sw = operator_list[op_id].c_sw; + g_kappa = operator_list[op_id].kappa; + + if(operator_list[op_id].type == CLOVER) { + if (g_cart_id == 0 && g_debug_level > 1) { + printf("#\n# csw = %e, computing clover leafs\n", g_c_sw); + } + init_sw_fields(VOLUME); + + sw_term( (const su3**) g_gauge_field, operator_list[op_id].kappa, operator_list[op_id].c_sw); + /* this must be EE here! */ + /* to match clover_inv in Qsw_psi */ + sw_invert(EE, operator_list[op_id].mu); + /* now copy double sw and sw_inv fields to 32bit versions */ + copy_32_sw_fields(); + } + + /* call invert */ + iter = exactdeflated_cg( operator_list[op_id].solver_params, operator_list[op_id].deflator_params, p_ptr, s_ptr); + + if(iter<0) { + fprintf(stderr, "[tmLQCD_invert_eo] Error from deflated_cg, status was %d\n", iter); + fflush(stderr); + return(1); + } + + return(0); +} /* end of tmLQCD_invert_eo */ + +/*************************************************************************************** + * copy several deflator parameters + ***************************************************************************************/ +int tmLQCD_get_deflator_params(tmLQCD_deflator_params*params, const int op_id) { + if(!tmLQCD_invert_initialised) { + fprintf(stderr, "[tmLQCD_get_deflator_params] tmLQCD_invert_init must be called first. Aborting...\n"); + return(-1); + } + + if(operator_list[op_id].deflator_params.nconv == -1) { + fprintf(stderr, "[tmLQCD_get_deflator_params] deflator no initialized. Aborting...\n"); + return(-2); + } + + strcpy(params->type_name, operator_list[op_id].deflator_params.type_name); + params->eoprec = operator_list[op_id].deflator_params.eoprec; + params->evecs = operator_list[op_id].deflator_params.evecs; + params->evals = operator_list[op_id].deflator_params.evals; + params->prec = operator_list[op_id].deflator_params.prec; + params->nev = operator_list[op_id].deflator_params.nconv; + + return(0); +} /* end of tmLQCD_get_deflator_params */ + +/*************************************************************************************** + * call the deflator function + ***************************************************************************************/ +int tmLQCD_init_deflator(const int op_id) { + int status; + + if(op_id < 0 || op_id >= no_operators) { + fprintf(stderr, "[tmLQCD_init_deflator] op_id=%d not in valid range. Aborting...\n", op_id); + return(1); + } + + + if(operator_list[op_id].deflator_params.nconv > -1) { + fprintf(stderr, "[tmLQCD_init_deflator] deflator already initialized. Aborting...\n"); + return(2); + } + + /**************************************** + * initialize parameters; this is otherwise + * done in operator.c or invert.c + ****************************************/ + boundary(operator_list[op_id].kappa); + g_mu = operator_list[op_id].mu; + g_c_sw = operator_list[op_id].c_sw; + g_kappa = operator_list[op_id].kappa; + + if (g_cart_id == 0) { + fprintf(stdout, "# [tmLQCD_init_deflator] 2 kappa mu = %e, kappa = %e, c_sw = %e\n", g_mu, g_kappa, g_c_sw); + } + + if(operator_list[op_id].type == CLOVER) { + if (g_cart_id == 0) { + printf("# [tmLQCD_init_deflator]\n# [tmLQCD_init_deflator] csw = %e, computing clover leafs\n", g_c_sw); + } + init_sw_fields(VOLUME); + + sw_term( (const su3**) g_gauge_field, operator_list[op_id].kappa, operator_list[op_id].c_sw); + /* this must be EE here! */ + /* to match clover_inv in Qsw_psi */ + sw_invert(EE, operator_list[op_id].mu); + /* now copy double sw and sw_inv fields to 32bit versions */ + copy_32_sw_fields(); + } + + + status = operator_list[op_id].deflator_params.init( &(operator_list[op_id].deflator_params) ); + if(status != 0) { + fprintf(stderr, "[tmLQCD_init_deflator] Error from deflator_init, status was %d\n", status); + return(3); + } + + return(0); +} /* end of tmLQCD_init_deflator */ + +int tmLQCD_set_deflator_fields(const int op_id1, const int op_id2) { + int status; + + if( op_id1 < 0 || op_id1 >= no_operators || op_id2 < 0 || op_id2 >= no_operators) { + fprintf(stderr, "[tmLQCD_init_deflator] op_id out of range; aborting\n"); + return(1); + } + + if( operator_list[op_id2].deflator_params.nconv <= 0 || operator_list[op_id2].deflator_params.evecs == NULL || operator_list[op_id2].deflator_params.evals == NULL ) { + fprintf(stderr, "[tmLQCD_copy_deflator_fields] source deflator not initialized; aborting\n"); + return(2); + } + + /**************************************** + * set deflator fields + ****************************************/ + operator_list[op_id1].deflator_params.nconv = operator_list[op_id2].deflator_params.nconv; + operator_list[op_id1].deflator_params.evecs = operator_list[op_id2].deflator_params.evecs; + operator_list[op_id1].deflator_params.evals = operator_list[op_id2].deflator_params.evals; + + return(0); +} /* end of tmLQCD_set_deflator_fields */ + From ff9c75aca12223bb5400e1d04443f78f98a96893 Mon Sep 17 00:00:00 2001 From: Marcus Petschlies Date: Thu, 22 Dec 2016 09:28:36 +0100 Subject: [PATCH 4/8] solver-functions for exact deflation --- solver/deflated_cg.c | 436 ++++++++++++++++++++++++++++++++ solver/deflator.c | 582 +++++++++++++++++++++++++++++++++++++++++++ solver/deflator.h | 103 ++++++++ 3 files changed, 1121 insertions(+) create mode 100644 solver/deflated_cg.c create mode 100644 solver/deflator.c create mode 100644 solver/deflator.h diff --git a/solver/deflated_cg.c b/solver/deflated_cg.c new file mode 100644 index 000000000..6c1e1b5cc --- /dev/null +++ b/solver/deflated_cg.c @@ -0,0 +1,436 @@ +/***************************************************************************** + * Copyright (C) 2014 Abdou M. Abdel-Rehim + * + * Deflating CG using eigenvectors computed using ARPACK + * eigenvectors used correspond to those with smallest magnitude + * + * This file is part of tmLQCD. + * + * tmLQCD is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * tmLQCD is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with tmLQCD. If not, see . + * + ****************************************************************************/ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include +#include +#include +#include + +#ifdef TM_USE_MPI +# include +#endif + +#include "global.h" +#include "gettime.h" +#include "linalg_eo.h" +#include "start.h" +#include "linalg/blas.h" +#include "linalg/lapack.h" +#include +#include +#include +#include +#include "solver_field.h" +#include "solver/deflated_cg.h" + +int exactdeflated_cg( + solver_params_t solver_params, /* (IN) parameters for solver */ + deflator_params_t deflator_params, /* (IN) parameters for deflator */ + spinor * const x, /* (IN/OUT) initial guess on input, solution on output for this RHS*/ + spinor * const b /* (IN) right-hand side*/ +) { + + /* Static variables and arrays. */ + static int ncurRHS=0; /* current number of the system being solved */ + static void *_ax, *_r, *_tmps1, *_tmps2; + static spinor *ax, *r, *tmps1, *tmps2; + static _Complex double *H,*Hinv,*initwork,*tmpv1; + static double *hevals; + static int *IPIV; + int i,j,tmpsize; + int ONE=1; + _Complex double c1, c2, c3; + double d1,d2,d3; + double et1,et2; /* timing variables */ + + int parallel; /* for parallel processing of the scalar products */ +#ifdef TM_USE_MPI + parallel=1; +#else + parallel=0; +#endif + + const int N = deflator_params.eoprec == 0 ? VOLUME : VOLUME/2; /* (IN) Number of lattice sites for this process*/ + + /* leading dimension for spinor vectors */ + int LDN; + if(N==VOLUME) + LDN = VOLUMEPLUSRAND; + else + LDN = VOLUMEPLUSRAND/2; + + /* (IN) Number of right-hand sides to be solved*/ + const int nrhs = solver_params.nrhs; + + /* (IN) First number of right-hand sides to be solved using tolerance eps_sq1*/ + const int nrhs1 = solver_params.nrhs1; + + /* (IN) squared tolerance of convergence of the linear system for systems 1 till nrhs1*/ + const double eps_sq1 = solver_params.eps_sq1; + + /* (IN) suqared tolerance for restarting cg */ + const double res_eps_sq = solver_params.res_eps_sq; + + /* (IN) how to project with approximate eigenvectors */ + int projection_type = deflator_params.projection_type; + + /*(IN) field of eigenvectors */ + _Complex double *evecs = (_Complex double *) deflator_params.evecs; + + /*(IN) list of eigenvalues */ + _Complex double *evals = (_Complex double *) deflator_params.evals; + + /* (IN) operator in double precision */ + matrix_mult f = deflator_params.f; + + /* (IN) operator in single precision */ + matrix_mult32 f32 = deflator_params.f32; + + /* (IN) final operator application during projection of type 1 */ + matrix_mult f_final = deflator_params.f_final; + + /* (IN) initial operator application during projection of type 1 */ + matrix_mult f_initial = deflator_params.f_initial; + + /* (IN) number of converged eigenvectors as returned by deflator */ + int nconv = deflator_params.nconv; + + const double eps_sq = solver_params.eps_sq; /* (IN) squared tolerance of convergence of the linear system for systems nrhs1+1 till nrhs*/ + const int rel_prec = solver_params.rel_prec; /* (IN) 0 for using absoute error for convergence + 1 for using relative error for convergence*/ + const int maxit = solver_params.maxiter; /* (IN) Maximum allowed number of iterations to solution for the linear system*/ + + /************************************************************** + * if this is the first right hand side, allocate memory + **************************************************************/ + if(ncurRHS==0){ +#if (defined SSE || defined SSE2 || defined SSE3) + _ax = malloc((LDN+ALIGN_BASE)*sizeof(spinor)); + if(_ax==NULL) + { + if(g_proc_id == g_stdio_proc) + fprintf(stderr,"[exactdefled_cg] insufficient memory for _ax inside deflated_cg.\n"); + exit(1); + } + else + {ax = (spinor *) ( ((unsigned long int)(_ax)+ALIGN_BASE)&~ALIGN_BASE);} + + _r = malloc((LDN+ALIGN_BASE)*sizeof(spinor)); + if(_r==NULL) + { + if(g_proc_id == g_stdio_proc) + fprintf(stderr,"[exactdefled_cg] insufficient memory for _r inside deflated_cg.\n"); + + exit(1); + } + else + {r = (spinor *) ( ((unsigned long int)(_r)+ALIGN_BASE)&~ALIGN_BASE);} + + _tmps1 = malloc((LDN+ALIGN_BASE)*sizeof(spinor)); + if(_tmps1==NULL) + { + if(g_proc_id == g_stdio_proc) + fprintf(stderr,"[exactdefled_cg] insufficient memory for _tmps1 inside deflated_cg.\n"); + exit(1); + } + else + {tmps1 = (spinor *) ( ((unsigned long int)(_tmps1)+ALIGN_BASE)&~ALIGN_BASE);} + + _tmps2 = malloc((LDN+ALIGN_BASE)*sizeof(spinor)); + if(_tmps2==NULL) { + if(g_proc_id == g_stdio_proc) + fprintf(stderr,"[exactdefled_cg] insufficient memory for _tmps2 inside deflated_cg.\n"); + exit(1); + } else { + tmps2 = (spinor *) ( ((unsigned long int)(_tmps2)+ALIGN_BASE)&~ALIGN_BASE); + } + +#else + ax = (spinor *) malloc(LDN*sizeof(spinor)); + r = (spinor *) malloc(LDN*sizeof(spinor)); + tmps1 = (spinor *) malloc(LDN*sizeof(spinor)); + tmps2 = (spinor *) malloc(LDN*sizeof(spinor)); + + if( (ax == NULL) || (r==NULL) || (tmps1==NULL) || (tmps2==NULL) ) { + if(g_proc_id == g_stdio_proc) fprintf(stderr,"[exactdefled_cg] insufficient memory for ax,r,tmps1,tmps2 inside exactdeflated_cg.\n"); + exit(1); + } +#endif + + tmpv1 = (_Complex double *) malloc(12*N*sizeof(_Complex double)); + + if((evecs == NULL) || (evals==NULL) || (tmpv1==NULL)) + { + if(g_proc_id == g_stdio_proc) + fprintf(stderr,"[exactdefled_cg] insufficient memory for evecs and evals inside exactdeflated_cg.\n"); + exit(1); + } + + H = (_Complex double *) malloc(nconv*nconv*sizeof(_Complex double)); + Hinv = (_Complex double *) malloc(nconv*nconv*sizeof(_Complex double)); + initwork = (_Complex double *) malloc(nconv*sizeof(_Complex double)); + IPIV = (int *) malloc(nconv*sizeof(int)); + hevals = (double *) malloc(nconv*sizeof(double)); + + if((H==NULL) || (Hinv==NULL) || (initwork==NULL) || (IPIV==NULL) || (hevals==NULL)) { + if(g_proc_id == g_stdio_proc) + fprintf(stderr,"[exactdefled_cg] insufficient memory for H, Hinv, initwork, IPIV, hevals inside exactdeflated_cg.\n"); + exit(1); + } + + et1=gettime(); + /* compute the elements of the hermitian matrix H + leading dimension is nconv and active dimension is nconv + H = V^+ Op V; V = (v_1 ... v_nconv) + */ + + if( projection_type == 0) { + + for(i=0; i nrhs1) { + eps_sq_used = eps_sq; + } else { + eps_sq_used = eps_sq1; + } + + if(g_proc_id == g_stdio_proc && g_debug_level > 0) { + fprintf(stdout, "# [exactdefled_cg] System %d, eps_sq %e, projection type %d\n",ncurRHS,eps_sq_used, projection_type); + fflush(stdout); + } + + /*---------------------------------------------------------------*/ + /* Call init-CG until this right-hand side converges */ + /*---------------------------------------------------------------*/ + double wt1,wt2,wE,wI; + double normsq,tol_sq; + int flag,maxit_remain,numIts,its; + int info_lapack; + + wE = 0.0; wI = 0.0; /* Start accumulator timers */ + flag = -1; /* System has not converged yet */ + maxit_remain = maxit; /* Initialize Max and current # of iters */ + numIts = 0; + restart_eps_sq_used=res_eps_sq; + + while( flag == -1 ) { + + if(nconv > 0) { + + /* --------------------------------------------------------- + * Perform init-CG with eigenvectors + * + * xinit = xinit + evecs*Hinv*evec'*(b-Ax0) + * --------------------------------------------------------- */ + wt1 = gettime(); + + /*r0=b-Ax0*/ + f(ax,x); /*ax = A*x */ + diff(r,b,ax,N); /* r=b-A*x */ + + if( projection_type == 0) { + + /* x = x + evecs*inv(H)*evecs'*r */ + for(int i=0; i < nconv; i++) { + assign_complex_to_spinor(tmps1,&evecs[i*12*N],12*N); + initwork[i]= scalar_prod(tmps1,r,N,parallel); + } + + /* solve the linear system H y = c */ + tmpsize=nconv*nconv; + _FT(zcopy) (&tmpsize,H,&ONE,Hinv,&ONE); /* copy H into Hinv */ + /* SUBROUTINE ZGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) */ + _FT(zgesv) (&nconv,&ONE,Hinv,&nconv,IPIV,initwork,&nconv,&info_lapack); + + if(info_lapack != 0) { + if(g_proc_id == g_stdio_proc) { + fprintf(stderr, "[exactdefled_cg] Error in ZGESV:, info = %d\n",info_lapack); + fflush(stderr); + } + exit(1); + } + + /* x = x + evecs*inv(H)*evecs'*r */ + for(i=0; i 0 */ + + + /* which tolerance to use */ + if(eps_sq_used > restart_eps_sq_used) { + tol_sq = eps_sq_used; + flag = 1; /* shouldn't restart again */ + } else { + tol_sq = restart_eps_sq_used; + } + + wt1 = gettime(); + its = cg_her(x,b,maxit_remain,tol_sq,rel_prec,N,f); + + wt2 = gettime(); + + wE = wE + wt2-wt1; + + /* check convergence */ + if(its == -1) + { + /* cg didn't converge */ + if(g_proc_id == g_stdio_proc) { + fprintf(stderr, "[exactdefled_cg] CG didn't converge within the maximum number of iterations in deflated_cg. Exiting...\n"); + fflush(stderr); + exit(1); + + } + } + else + { + numIts += its; + maxit_remain = maxit - numIts; /* remaining number of iterations */ + restart_eps_sq_used = restart_eps_sq_used*res_eps_sq; /* prepare for the next restart */ + } + + } + /* end while (flag ==-1) */ + + /* ---------- */ + /* Reporting */ + /* ---------- */ + /* compute the exact residual */ + f(ax,x); /* ax= A*x */ + diff(r,b,ax,N); /* r=b-A*x */ + normsq=square_norm(r,N,parallel); + if(g_debug_level > 0 && g_proc_id == g_stdio_proc) + { + fprintf(stdout, "# [exactdefled_cg] For this rhs:\n"); + fprintf(stdout, "# [exactdefled_cg] Total initCG Wallclock : %+e\n", wI); + fprintf(stdout, "# [exactdefled_cg] Total cg Wallclock : %+e\n", wE); + fprintf(stdout, "# [exactdefled_cg] Iterations: %-d\n", numIts); + fprintf(stdout, "# [exactdefled_cg] Actual Resid of LinSys : %+e\n",normsq); + } + + /* apply the adjoint operator again */ + f_initial(ax,x); + /* copy back to x */ + memcpy(x,ax,N*sizeof(spinor)); + + /* free memory if this was your last system to solve */ + if(ncurRHS == nrhs) { +#if ( (defined SSE) || (defined SSE2) || (defined SSE3)) + free(_ax); free(_r); free(_tmps1); free(_tmps2); +#else + free(ax); free(r); free(tmps1); free(tmps2); +#endif + free(H); free(Hinv); + free(initwork); free(tmpv1); + free(hevals); free(IPIV); + } /* end of if ncurRHS == nrhs */ + + return(numIts); +} /* end of exactdeflated_cg */ diff --git a/solver/deflator.c b/solver/deflator.c new file mode 100644 index 000000000..47b9a2c49 --- /dev/null +++ b/solver/deflator.c @@ -0,0 +1,582 @@ +/***************************************************************************** + * Copyright (C) 2014 Abdou M. Abdel-Rehim + * + * Deflating CG using eigenvectors computed using ARPACK + * eigenvectors used correspond to those with smallest magnitude + * + * This file is part of tmLQCD. + * + * tmLQCD is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * tmLQCD is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with tmLQCD. If not, see . + * + ****************************************************************************/ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include +#include +#include +#include + +#ifdef TM_USE_MPI +# include +#endif + +#include "global.h" +#include "gettime.h" +#include "linalg_eo.h" +#include "start.h" +#include "linalg/blas.h" +#include "linalg/lapack.h" +#include +#include +#include +#include +#include "solver_field.h" +#include "solver/deflator.h" +#include "operator/tm_operators_32.h" +#include "operator/tm_operators.h" + + + +int make_exactdeflator( deflator_params_t *deflator_params ) { + + /* Static variables and arrays. */ + void *_ax,*_r,*_tmps1,*_tmps2; + spinor *ax,*r,*tmps1,*tmps2; + _Complex double *evecs,*evals,*H,*HU,*Hinv,*initwork,*tmpv1; + _Complex double *zheev_work; + double *hevals,*zheev_rwork; + int *IPIV; + int info_arpack=0; + int nconv=0; /* number of converged eigenvectors as returned by arpack */ + int exit_status = 0; + int i,j,tmpsize; + char cV='V',cN='N', cU='U'; + int ONE=1; + int zheev_lwork,zheev_info; + _Complex double c1, tpone=1.0,tzero=0.0; + double d1,d2,d3; + double et1,et2; /* timing variables */ + char evecs_filename[500]; + FILE *evecs_fs=NULL; + size_t evecs_count; + WRITER *evecs_writer=NULL; + spinor *evecs_ptr0 = NULL, *evecs_ptr1 = NULL; + paramsPropagatorFormat *evecs_propagatorFormat = NULL; + void *evecs_io_buffer = NULL; + + const int N = deflator_params->eoprec==0 ? VOLUME : VOLUME/2; + + int parallel; /* for parallel processing of the scalar products */ +#ifdef TM_USE_MPI + parallel=1; +#else + parallel=0; +#endif + + /* leading dimension for spinor vectors */ + int LDN; + if(N==VOLUME) + LDN = VOLUMEPLUSRAND; + else + LDN = VOLUMEPLUSRAND/2; + + /* (IN) number of eigenvectors to be computed by arpack*/ + const int nev = deflator_params->nev; + + /* (IN) size of the subspace used by arpack with the condition (nev+1) =< ncv*/ + const int ncv = deflator_params->ncv; + + /* (IN) tolerance for computing eigenvalues with arpack */ + double deflator_eig_tol = deflator_params->eig_tol; + + /* (IN) maximum number of iterations to be used by arpack*/ + int deflator_eig_maxiter = deflator_params->eig_maxiter; + + /* (IN) 0 for eigenvalues with smallest real part "SR" + 1 for eigenvalues with largest real part "LR" + 2 for eigenvalues with smallest absolute value "SM" + 3 for eigenvalues with largest absolute value "LM" + 4 for eigenvalues with smallest imaginary part "SI" + 5 for eigenvalues with largest imaginary part "LI"*/ + int kind = deflator_params->evals_kind; + + /* (IN) 0 don't compute the eiegnvalues and their residuals of the original system + 1 compute the eigenvalues and the residuals for the original system (the orthonormal basis + still be used in deflation and they are not overwritten).*/ + int comp_evecs = deflator_params->comp_evecs; + + /* (IN) 0 no polynomial acceleration; 1 use polynomial acceleration*/ + int acc = deflator_params->use_acc; + + /* (IN) degree of the Chebyshev polynomial (irrelevant if acc=0)*/ + int cheb_k = deflator_params->cheb_k; + + /* (IN) lower end of the interval where the acceleration will be used (irrelevant if acc=0)*/ + double emin = deflator_params->op_evmin; + + /* (IN) upper end of the interval where the acceleration will be used (irrelevant if acc=0)*/ + double emax = deflator_params->op_evmax; + + /* (IN) file name to be used for printing out debugging information from arpack*/ + char *deflator_logfile = deflator_params->logfile; + + /* (IN) read eigenvectors in Schur basis from file */ + int deflator_read_ev = deflator_params->read_ev; + + /* (IN) write eigenvectors in Schur basis to file */ + int deflator_write_ev = deflator_params->write_ev; + + /* (IN) file name to be used for reading and writing evecs from and to disc */ + char *deflator_evecs_filename = deflator_params->evecs_filename; + + /* (IN) precision used for writing eigenvectors */ + int deflator_evecs_writeprec = deflator_params->evecs_writeprec; + + /* (IN) file format for evecs used by arpack */ + char *deflator_evecs_fileformat = deflator_params->evecs_fileformat; + + /* (IN) f(s,r) computes s=A*r, i.e. matrix-vector multiply in double precision */ + matrix_mult f = deflator_params->f; + + /* (IN) f32(s,r) computes s=A*r, i.e. matrix-vector multiply in single precision */ + matrix_mult32 f32 = deflator_params->f32; + + /* set nconv to 0 to signify, that init has been called */ + deflator_params->nconv = 0; + + if(g_cart_id == 1) { + fprintf(stdout, "# [make_exactdeflator] eo prec = %d\n", deflator_params->eoprec); + fprintf(stdout, "# [make_exactdeflator] N = %d\n", N); + fprintf(stdout, "# [make_exactdeflator] nev = %d\n", nev); + fprintf(stdout, "# [make_exactdeflator] ncv = %d\n", ncv); + fprintf(stdout, "# [make_exactdeflator] evals kind = %d\n", kind); + fprintf(stdout, "# [make_exactdeflator] use acc = %d\n", acc); + fprintf(stdout, "# [make_exactdeflator] Chebyshev k = %d\n", cheb_k); + fprintf(stdout, "# [make_exactdeflator] emin / emax = %e / %e\n", emin, emax); + fprintf(stdout, "# [make_exactdeflator] comp evecs = %d\n", comp_evecs); + fprintf(stdout, "# [make_exactdeflator] deflator_eig_tol = %e\n", deflator_eig_tol); + fprintf(stdout, "# [make_exactdeflator] deflator_eig_maxiter = %d\n", deflator_eig_maxiter); + fprintf(stdout, "# [make_exactdeflator] deflator_write_ev = %d\n", deflator_write_ev); + fprintf(stdout, "# [make_exactdeflator] deflator_read_ev = %d\n", deflator_read_ev); + fprintf(stdout, "# [make_exactdeflator] f == Qtm_pm_psi = %d\n", (f==&Qtm_pm_psi)); + fprintf(stdout, "# [make_exactdeflator] f32 == Qtm_pm_psi = %d\n", (f32==&Qtm_pm_psi_32)); + } + + /*------------------------------------------------------------- + if this is the first right hand side, allocate memory, + call arpack, and compute resiudals of eigenvectors if needed + -------------------------------------------------------------*/ +#if (defined SSE || defined SSE2 || defined SSE3) + _ax = malloc((LDN+ALIGN_BASE)*sizeof(spinor)); + if(_ax==NULL) { + if(g_proc_id == g_stdio_proc) fprintf(stderr,"[make_exactdeflator] insufficient memory for _ax inside deflator.\n"); + exit(1); + } else { + ax = (spinor *) ( ((unsigned long int)(_ax)+ALIGN_BASE)&~ALIGN_BASE); + } + + _r = malloc((LDN+ALIGN_BASE)*sizeof(spinor)); + if(_r==NULL) { + if(g_proc_id == g_stdio_proc) fprintf(stderr,"[make_exactdeflator] insufficient memory for _r inside deflator.\n"); + exit(1); + } else { + r = (spinor *) ( ((unsigned long int)(_r)+ALIGN_BASE)&~ALIGN_BASE); + } + + _tmps1 = malloc((LDN+ALIGN_BASE)*sizeof(spinor)); + if(_tmps1==NULL) { + if(g_proc_id == g_stdio_proc) fprintf(stderr,"[make_exactdeflator] insufficient memory for _tmps1 inside deflator.\n"); + exit(1); + } else { + tmps1 = (spinor *) ( ((unsigned long int)(_tmps1)+ALIGN_BASE)&~ALIGN_BASE); + } + + _tmps2 = malloc((LDN+ALIGN_BASE)*sizeof(spinor)); + if(_tmps2==NULL) { + if(g_proc_id == g_stdio_proc) fprintf(stderr,"[make_exactdeflator] insufficient memory for _tmps2 inside deflator.\n"); + exit(1); + } else { + tmps2 = (spinor *) ( ((unsigned long int)(_tmps2)+ALIGN_BASE)&~ALIGN_BASE); + } + +#else + ax = (spinor *) malloc(LDN*sizeof(spinor)); + r = (spinor *) malloc(LDN*sizeof(spinor)); + tmps1 = (spinor *) malloc(LDN*sizeof(spinor)); + tmps2 = (spinor *) malloc(LDN*sizeof(spinor)); + + if( (ax == NULL) || (r==NULL) || (tmps1==NULL) || (tmps2==NULL) ) { + if(g_proc_id == g_stdio_proc) fprintf(stderr,"[make_exactdeflator] insufficient memory for ax,r,tmps1,tmps2 inside deflator_cg.\n"); + exit(1); + } +#endif + + deflator_params->evecs = malloc(ncv*12*N*sizeof(_Complex double)); /* note: allocation without RAND */ + deflator_params->evals = malloc(ncv*sizeof(_Complex double)); + deflator_params->prec = 64; + tmpv1 = (_Complex double *) malloc(12*N*sizeof(_Complex double)); + + evecs = (_Complex double *)deflator_params->evecs; + evals = (_Complex double *)deflator_params->evals; + + if((evecs == NULL) || (evals==NULL) || (tmpv1==NULL)) { + if(g_proc_id == g_stdio_proc) fprintf(stderr,"[make_exactdeflator] insufficient memory for evecs and evals inside deflator_cg.\n"); + exit(1); + } + + if ( deflator_read_ev == 1) { + + if (strcmp(deflator_evecs_fileformat, "partfile") == 0) { + /* set evec filenmae */ + sprintf(evecs_filename, "%s.%.5d.pt%.2dpx%.2dpy%.2dpz%.2d", deflator_evecs_filename, nev, g_proc_coords[0], g_proc_coords[1], g_proc_coords[2], g_proc_coords[3]); + evecs_fs = fopen(evecs_filename, "r"); + if (evecs_fs == NULL) { + fprintf(stderr, "[make_exactdeflator] (%.4d) Error, could not open file %s for reading\n", g_cart_id, evecs_filename); + return(-2); + } + if(g_proc_id == g_stdio_proc) fprintf(stdout, "# [make_exactdeflator] reading eigenvectors from file %s\n", evecs_filename); + + if(deflator_evecs_writeprec == 64) { + + evecs_io_buffer = (void*)evecs; + + et1=gettime(); + evecs_count = fread( evecs_io_buffer, sizeof(_Complex double), (size_t)nev*12*N, evecs_fs); + et2=gettime(); + + } else { + evecs_io_buffer = malloc(sizeof(_Complex double) * (size_t)nev*12*N ); + if( evecs_io_buffer == NULL) { + fprintf(stderr, "[make_exactdeflator] (%.4d) Error, could not allocate memory for evecs_io_buffer\n", g_cart_id); + return(-42); + } + + et1=gettime(); + evecs_count = fread( evecs_io_buffer, sizeof(_Complex double)/2, (size_t)nev*12*N, evecs_fs); + et2=gettime(); + + single2double(evecs, evecs_io_buffer, nev*24*N); + + free( evecs_io_buffer ); + evecs_io_buffer = NULL; + } + + if( evecs_count != ((size_t)nev*12*N) ) { + fprintf(stderr, "[make_exactdeflator] (%.4d) Error, could not proper amount of data from file %s\n", g_cart_id, evecs_filename); + return(-3); + } + fclose(evecs_fs); + evecs_fs = NULL; + if(g_proc_id == g_stdio_proc) { + fprintf(stdout,"# [make_exactdeflator] ARPACK time for reading %d eigenvectors: %+e seconds\n", nev, et2-et1); + } + } else if(strcmp(deflator_evecs_fileformat, "single") == 0) { + + if(N==VOLUME) { + for(i=0; inconv = nev; + info_arpack = 0; + } else { + et1=gettime(); + evals_arpack(N,nev,ncv,kind,acc,cheb_k,emin,emax,evals,evecs,deflator_eig_tol,deflator_eig_maxiter,f,&info_arpack,&nconv,deflator_logfile); + et2=gettime(); + + if(info_arpack != 0) { /* arpack didn't converge */ + if(g_proc_id == g_stdio_proc) fprintf(stderr,"[make_exactdeflator] Error: ARPACK didn't converge; exiting\n"); + return(-1); + } + + if(g_proc_id == g_stdio_proc) { + fprintf(stdout,"# [make_exactdeflator] ARPACK has computed %d eigenvectors\n",nconv); + fprintf(stdout,"# [make_exactdeflator] ARPACK time: %+e\n",et2-et1); + } + + if ( deflator_write_ev == 1) { + + if(strcmp(deflator_evecs_fileformat, "partfile") == 0 ) { + + /* set evec filenmae */ + sprintf(evecs_filename, "%s.%.5d.pt%.2dpx%.2dpy%.2dpz%.2d", deflator_evecs_filename, nconv, g_proc_coords[0], g_proc_coords[1], g_proc_coords[2], g_proc_coords[3]); + + evecs_fs = fopen(evecs_filename, "w"); + if (evecs_fs == NULL) { + fprintf(stderr, "[make_exactdeflator] (%.4d) Error, could not open file %s for writing\n", g_cart_id, evecs_filename); + return(-4); + } + + if(deflator_evecs_writeprec == 64) { + + evecs_io_buffer = (void*)evecs; + + et1=gettime(); + evecs_count = fwrite( evecs_io_buffer, sizeof(_Complex double), (size_t)nconv*12*N, evecs_fs); + et2=gettime(); + + } else { + evecs_io_buffer = malloc(sizeof(_Complex double) * (size_t)nconv*12*N ); + if( evecs_io_buffer == NULL) { + fprintf(stderr, "[make_exactdeflator] (%.4d) Error, could not allocate memory for evecs_io_buffer\n", g_cart_id); + return(-41); + } + double2single(evecs_io_buffer, evecs, nconv*24*N); + + et1=gettime(); + evecs_count = fwrite( evecs_io_buffer, sizeof(_Complex double)/2, (size_t)nconv*12*N, evecs_fs); + et2=gettime(); + free(evecs_io_buffer); + evecs_io_buffer = NULL; + } + + if( evecs_count != ((size_t)nconv*12*N) ) { + fprintf(stderr, "[make_exactdeflator] (%.4d) Error, could not write proper amount of data to file %s\n", g_cart_id, evecs_filename); + return(-5); + } + fclose(evecs_fs); + evecs_fs = NULL; + + if(g_proc_id == g_stdio_proc) { + fprintf(stdout,"[make_exactdeflator] (%.4d) ARPACK time for writing %d eigenvectors: %+e seconds\n", g_cart_id, nconv, et2-et1); + } + + } else if (strcmp(deflator_evecs_fileformat, "single") == 0) { + + if(N==VOLUME) { + for(i=0; inconv = nconv; + + /* compute Ritz values and Ritz vectors if needed */ + if( (nconv>0) && (comp_evecs !=0)) { + + H = (_Complex double *)malloc(nconv*nconv*sizeof(_Complex double)); + HU = (_Complex double *) malloc(nconv*nconv*sizeof(_Complex double)); + Hinv = (_Complex double *) malloc(nconv*nconv*sizeof(_Complex double)); + initwork = (_Complex double *) malloc(nconv*sizeof(_Complex double)); + IPIV = (int *) malloc(nconv*sizeof(int)); + zheev_lwork = 3*nconv; + zheev_work = (_Complex double *) malloc(zheev_lwork*sizeof(_Complex double)); + zheev_rwork = (double *) malloc(3*nconv*sizeof(double)); + hevals = (double *) malloc(nconv*sizeof(double)); + + if((H==NULL) || (HU==NULL) || (Hinv==NULL) || (initwork==NULL) || (IPIV==NULL) || (zheev_lwork==NULL) || (zheev_rwork==NULL) || (hevals==NULL)) { + if(g_proc_id == g_stdio_proc) { + fprintf(stderr,"[make_exactdeflator] insufficient memory for H, HU, Hinv, initwork, IPIV, zheev_lwork, zheev_rwork, hevals inside deflator_cg.\n"); + } + exit(1); + } + et1=gettime(); + /* compute the elements of the hermitian matrix H + leading dimension is nconv and active dimension is nconv */ + + for(i=0; i0) && (comp_evecs !=0)) */ + + et2=gettime(); + if(g_proc_id == g_stdio_proc) { + fprintf(stdout,"# [make_exactdeflator] time to compute eigenvectors: %+e\n",et2-et1); + } + +#if ( (defined SSE) || (defined SSE2) || (defined SSE3)) + free(_ax); + free(_r); + free(_tmps1); + free(_tmps2); +#else + free(ax); + free(r); + free(tmps1); + free(tmps2); +#endif + free(tmpv1); + + return(0); +} /* end of make_exactdeflator */ + +int fini_exactdeflator( deflator_params_t *deflator_params ) { + + strcpy(deflator_params->type_name, "NA"); + deflator_params->type = 0; + + deflator_params->eoprec = -1; + + deflator_params->f = (matrix_mult)NULL; + deflator_params->f32 = (matrix_mult32)NULL; + + deflator_params->f_final = (matrix_mult)NULL; + deflator_params->f_initial = (matrix_mult)NULL; + + deflator_params->projection_type = -1; + + if( deflator_params->evecs != NULL ) free( deflator_params->evecs); + deflator_params->evecs = NULL; + + if( deflator_params->evals != NULL ) free( deflator_params->evals ); + deflator_params->evals = NULL; + deflator_params->prec = 0; + + deflator_params->nconv = -1; + deflator_params->nev = 0; + deflator_params->ncv = 0; + deflator_params->evals_kind = 0; + deflator_params->comp_evecs = 0; + deflator_params->eig_tol = 0.; + deflator_params->eig_maxiter = 0; + strcpy(deflator_params->logfile, "NA"); + + deflator_params->use_acc = 0; + deflator_params->cheb_k = 0; + deflator_params->op_evmin = 0.; + deflator_params->op_evmax = 0.; + + deflator_params->write_ev = 0; + deflator_params->read_ev = 0; + strcpy(deflator_params->evecs_filename, "NA"); + strcpy(deflator_params->evecs_fileformat,"NA"); + + deflator_params->evecs_writeprec = 0; + + deflator_params->init = NULL; + deflator_params->fini = NULL; + return(0); +} /* end of fini_exactdeflator */ diff --git a/solver/deflator.h b/solver/deflator.h new file mode 100644 index 000000000..52c07cf6d --- /dev/null +++ b/solver/deflator.h @@ -0,0 +1,103 @@ +/***************************************************************************** + * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach + * + * Deflating CG using eigenvectors computed using ARPACK + * + * Author: A.M. Abdel-Rehim (amabdelrehim@gmail.com) + * November 2014 + * + * This file is part of tmLQCD. + * + * tmLQCD is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * tmLQCD is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with tmLQCD. If not, see . + * + ****************************************************************************/ +/* A sample input is given in the sample-input folder */ + +#ifndef _DEFLATOR_H +#define _DEFLATOR_H + +#include "su3.h" +#include "solver/matrix_mult_typedef.h" +#include "solver/eigenvalues_arpack.h" + + +typedef struct deflator_params_t { + + char type_name[100]; + int type; + + int eoprec; + + matrix_mult f; /* f(s,r) computes s=A*r, i.e. matrix-vector multiply in double precision */ + matrix_mult32 f32; /* f32(s,r) computes s=A*r, i.e. matrix-vector multiply in single precision */ + + matrix_mult f_final; /* (IN) final operator application during projection of type 1 */ + matrix_mult f_initial; /* (IN) initial operator application during projection of type 1 */ + + int projection_type; + + void *evecs; + void *evals; + int prec; + + /********************************** + * arpack parameters + **********************************/ + + int nconv; /* number of converged eigenvectors */ + int nev; /* number of eigenvectors to be computed by arpack*/ + int ncv; /* Size of the subspace used by arpack with the condition (nev+1) =< ncv */ + int evals_kind; /* type of eigenvalues to be computed + 0 eigenvalues of smallest real part + 1 eigenvalues of largest real part + 2 eigenvalues of smallest absolute value + 3 eigenvalues of largest absolute value*/ + int comp_evecs; /* 0 don't compute the resiudals of the eigenvalues + 1 compute the residulas of the eigenvalues*/ + double eig_tol; /* tolerance for computing eigenvalues with arpack */ + int eig_maxiter; /* maximum number of iterations to be used by arpack*/ + char logfile[500]; /* file name for the logfile used by arpack*/ + + /************************************* + * Chebychev polynomial acceleraton + * paprameters + *************************************/ + int use_acc; /* type of acceleration to be used: */ + /* 0 no acceleration, 1 compute eiegnvectors of the acceleration polynomial, */ + int cheb_k; /* order of the polynomial used is k+1 and the lowest value is k=-1 which correspond to T_0 */ + double op_evmin; /* lowest boundary of the interval for the polynomial acceleration */ + double op_evmax; /* highest boundary for the interval for the polynomial acceleration */ + + /************************************* + * parameters to control of I/O for + * eigenvectors + *************************************/ + int write_ev; + int read_ev; + char evecs_filename[500]; /* file name for the evecs used by arpack*/ + char evecs_fileformat[200]; /* file format for evecs used by arpack */ + + int evecs_writeprec; + + /************************************* + * init function + *************************************/ + int (*init)(struct deflator_params_t*); + int (*fini)(struct deflator_params_t*); + +} deflator_params_t; + +int make_exactdeflator( deflator_params_t *deflator_params); + +#endif From c6c0ea96ba5d4fa8556cbcccd7bc19f599132d3e Mon Sep 17 00:00:00 2001 From: Marcus Petschlies Date: Mon, 26 Dec 2016 11:04:32 +0100 Subject: [PATCH 5/8] added deflator_params to operator type; added parsing of exdefl parameters to read_input.l; additional solver type EXACTDEFLATION in solver/solver_types.h --- operator.h | 3 ++ read_input.l | 118 +++++++++++++++++++++++++++++++++++++++++- solver/solver_types.h | 3 +- 3 files changed, 122 insertions(+), 2 deletions(-) diff --git a/operator.h b/operator.h index 411476528..f5a3c3eb3 100644 --- a/operator.h +++ b/operator.h @@ -89,6 +89,9 @@ typedef struct { /*solver parameters struct*/ solver_params_t solver_params; + /* deflator parameters struct */ + deflator_params_t deflator_params; + /* multiple masses for CGMMS */ double extra_masses[MAX_EXTRA_MASSES]; int no_extra_masses; diff --git a/read_input.l b/read_input.l index e312c6e68..febe30445 100644 --- a/read_input.l +++ b/read_input.l @@ -285,6 +285,9 @@ static inline void rmQuotes(char *str){ %x MULTIGRID %x INITMULTIGRID +%x EXACTDEFLATION +%x INITEXACTDEFLATION + %x INITOPERATOR %x TMOP %x DBTMOP @@ -630,6 +633,104 @@ static inline void rmQuotes(char *str){ } } + +{ + {SPC}*Nev{EQL}{DIGIT}+ { + sscanf(yytext, " %[a-zA-Z] = %d", name, &a); + (optr->deflator_params).nev = a; + if(myverbose) printf(" nev set to %d line %d operator %d\n", a, line_of_file, current_operator); + } + {SPC}*Ncv{EQL}{DIGIT}+ { + sscanf(yytext, " %[a-zA-Z] = %d", name, &a); + (optr->deflator_params).ncv = a; + if(myverbose) printf(" ncv set to %d line %d operator %d\n", a, line_of_file, current_operator); + } + {SPC}*EvalsKind{EQL}{DIGIT}+ { + sscanf(yytext, " %[a-zA-Z] = %d", name, &a); + (optr->deflator_params).evals_kind = a; + if(myverbose) printf(" evals_kind set to %d line %d operator %d\n", a, line_of_file, current_operator); + } + {SPC}*EvecsMaxiter{EQL}{DIGIT}+ { + sscanf(yytext, " %[a-zA-Z] = %d", name, &a); + (optr->deflator_params).eig_maxiter = a; + if(myverbose) printf(" eig_maxiter set to %d line %d operator %d\n", a, line_of_file, current_operator); + } + {SPC}*EvecsTolerance{EQL}{FLT} { + sscanf(yytext, " %[a-zA-Z1] = %lf", name, &c); + (optr->deflator_params).eig_tol = c; + if(myverbose) printf(" eig_tol set to %lf line %d operator %d\n", c, line_of_file, current_operator); + } + {SPC}*ChebPolyDegree{EQL}{DIGIT}+ { + sscanf(yytext, " %[a-zA-Z] = %d", name, &a); + (optr->deflator_params).cheb_k = a; + if(myverbose) printf(" cheb_k set to %d line %d operator %d\n", a, line_of_file, current_operator); + } + {SPC}*AccMode{EQL}{DIGIT}+ { + sscanf(yytext, " %[a-zA-Z] = %d", name, &a); + (optr->deflator_params).use_acc = a; + if(myverbose) printf(" polynomial acceleration mode set to %d line %d operator %d\n", a, line_of_file, current_operator); + } + {SPC}*CompEvecs{EQL}{DIGIT}+ { + sscanf(yytext, " %[a-zA-Z] = %d", name, &a); + (optr->deflator_params).comp_evecs = a; + if(myverbose) printf(" compute evecs option set to %d line %d operator %d\n", a, line_of_file, current_operator); + } + {SPC}*LogFile{EQL}{FILENAME}+ { + sscanf(yytext, " %[a-zA-Z] = %s", name, fname); + strncpy( (optr->deflator_params).logfile, fname, 200); + if(myverbose) printf(" logfile name is set to %s line %d operator %d\n", fname, line_of_file, current_operator); + } + {SPC}*EVminHPD{EQL}{FLT} { + sscanf(yytext, " %[a-zA-Z1] = %lf", name, &c); + (optr->deflator_params).op_evmin = c; + if(myverbose) printf(" op_evmin set to %lf line %d operator %d\n", c, line_of_file, current_operator); + } + {SPC}*EVmaxHPD{EQL}{FLT} { + sscanf(yytext, " %[a-zA-Z1] = %lf", name, &c); + (optr->deflator_params).op_evmax = c; + if(myverbose) printf(" op_evmax set to %lf line %d operator %d\n", c, line_of_file, current_operator); + } + {SPC}*EvecsRead{EQL}{DIGIT}+ { + sscanf(yytext, " %[a-zA-Z] = %d", name, &a); + (optr->deflator_params).read_ev = a; + if(myverbose) printf(" read_ev set to %d line %d operator %d\n", a, line_of_file, current_operator); + } + {SPC}*EvecsWrite{EQL}{DIGIT}+ { + sscanf(yytext, " %[a-zA-Z] = %d", name, &a); + (optr->deflator_params).write_ev = a; + if(myverbose) printf(" write_ev set to %d line %d operator %d\n", a, line_of_file, current_operator); + } + {SPC}*ProjectionType{EQL}{DIGIT}+ { + sscanf(yytext, " %[a-zA-Z] = %d", name, &a); + (optr->deflator_params).projection_type = a; + if(myverbose) printf(" projection_type set to %d line %d operator %d\n", a, line_of_file, current_operator); + } + {SPC}*EvecsFile{EQL}{FILENAME}+ { + sscanf(yytext, " %[a-zA-Z] = %s", name, fname); + strncpy( (optr->deflator_params).evecs_filename, fname, 200); + if(myverbose) printf(" evecs file name is set to %s line %d operator %d\n", fname, line_of_file, current_operator); + } + {SPC}*EvecsWriteprec{EQL}{DIGIT}+ { + sscanf(yytext, " %[a-zA-Z] = %d", name, &a); + (optr->deflator_params).evecs_writeprec = a; + if(myverbose) printf(" evecs precision for writing set to %d line %d operator %d\n", a, line_of_file, current_operator); + } + {SPC}*EvecsFileformat{EQL}{FILENAME}+ { + sscanf(yytext, " %[a-zA-Z] = %d", name, fname); + strncpy( (optr->deflator_params).evecs_fileformat, fname, 100); + if(myverbose) printf(" evecs precision for writing set to %d line %d operator %d\n", a, line_of_file, current_operator); + } + {SPC}*EvecsPrecision{EQL}{DIGIT}+ { + sscanf(yytext, " %[a-zA-Z] = %d", name, &a); + (optr->deflator_params).prec = 64; + if(myverbose) printf(" precision for eigenvectors set to %d line %d operator %d\n", a, line_of_file, current_operator); + } + {SPC}*EndExactDeflation{SPC}* { + if(myverbose) printf("EXACTDEFLATION parsed in line %d\n\n", line_of_file); + BEGIN(name_caller); + } +} + AMG{SPC}* { if(myverbose) printf("Initialising DDalphaAMG line %d\n", line_of_file); BEGIN(MULTIGRID); @@ -805,6 +906,10 @@ static inline void rmQuotes(char *str){ optr->rel_prec = 0; if(myverbose) printf(" SolverRelativePrecision set to NO line %d operator %d\n", line_of_file, current_operator); } + {SPC}*BeginExactDeflation { + printf("[read_input] starting exact deflator init\n"); + BEGIN(EXACTDEFLATION); + } {SPC}*mcgdelta{EQL}{FLT} { sscanf(yytext, " %[a-zA-Z1] = %lf", name, &c); (optr->solver_params).mcg_delta = c; @@ -1175,6 +1280,12 @@ static inline void rmQuotes(char *str){ if(myverbose) printf(" Solver set to DDalphaAMG line %d operator %d\n", line_of_file, current_operator); BEGIN(name_caller); } + exactdeflatedcg { + optr->solver = EXACTDEFLATEDCG; + if(myverbose) printf(" Solver set to EXACTDEFLATEDCG line %d operator %d\n", line_of_file, current_operator); + BEGIN(name_caller); + } + } @@ -1206,6 +1317,11 @@ static inline void rmQuotes(char *str){ if(myverbose) printf(" Solver set to DDalphaAMG line %d operator %d\n", line_of_file, current_operator); BEGIN(name_caller); } + exactdeflatedcg { + optr->solver = EXACTDEFLATEDCG; + if(myverbose) printf(" Solver set to EXACTDEFLATEDCG line %d operator %d\n", line_of_file, current_operator); + BEGIN(name_caller); + } } { @@ -2474,7 +2590,7 @@ static inline void rmQuotes(char *str){ BEGIN(comment_caller); } -{SPC}*\n { +{SPC}*\n { line_of_file++; } <*>{SPC}*\n { diff --git a/solver/solver_types.h b/solver/solver_types.h index d16491580..2205d71d5 100644 --- a/solver/solver_types.h +++ b/solver/solver_types.h @@ -24,7 +24,8 @@ typedef enum SOLVER_TYPE { MCR, CR, BICG, - MG + MG, + EXACTDEFLATEDCG } SOLVER_TYPE; #endif From b50813c8859e56b35dd3fae9b94169743e38b254 Mon Sep 17 00:00:00 2001 From: Marcus Petschlies Date: Mon, 26 Dec 2016 11:40:14 +0100 Subject: [PATCH 6/8] added deflator_params initialization in operator.c; TODO: add deflator_params and solver_params to argument list of invert_eo and invert_clover_eo --- operator.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/operator.c b/operator.c index d9a4e14fe..d3f7683c6 100644 --- a/operator.c +++ b/operator.c @@ -130,6 +130,37 @@ int add_operator(const int type) { optr->inverter = &op_invert; optr->write_prop = &op_write_prop; + /* init deflator parameters */ + (optr->deflator_params).type = -1; + (optr->deflator_params).eoprec = _default_even_odd_flag; + (optr->deflator_params).f = NULL; + (optr->deflator_params).f32 = NULL; + (optr->deflator_params).f_final = NULL; + (optr->deflator_params).f_initial = NULL; + (optr->deflator_params).evecs = NULL; + (optr->deflator_params).evals = NULL; + (optr->deflator_params).prec = 64; + (optr->deflator_params).nconv = -1; + (optr->deflator_params).nev = 0; + (optr->deflator_params).ncv = 0; + (optr->deflator_params).evals_kind = 0; + (optr->deflator_params).comp_evecs = 0; + (optr->deflator_params).eig_tol = 0.; + (optr->deflator_params).eig_maxiter = 0; + (optr->deflator_params).use_acc = 0; + (optr->deflator_params).cheb_k = 0; + (optr->deflator_params).op_evmin = 0.0; + (optr->deflator_params).op_evmax = 0.0; + (optr->deflator_params).write_ev = 0; + (optr->deflator_params).read_ev = 0; + (optr->deflator_params).evecs_writeprec = 64; + (optr->deflator_params).init = NULL; + strcpy( (optr->deflator_params).type_name, "NA"); + strcpy((optr->deflator_params).logfile, "arpack_log" ); + strcpy((optr->deflator_params).evecs_filename, "eigenvectors"); + strcpy((optr->deflator_params).evecs_fileformat, "single"); + + /* Overlap needs special treatment */ if(optr->type == OVERLAP) { optr->even_odd_flag = 0; @@ -165,12 +196,28 @@ int init_operators() { optr = operator_list + i; /* This is a hack, it should be set on an operator basis. */ optr->rel_prec = g_relative_precision_flag; + + if(optr->solver == EXACTDEFLATEDCG) { + (optr->deflator_params).eoprec = optr->even_odd_flag; + (optr->deflator_params).type = optr->type; + (optr->deflator_params).init = make_exactdeflator; + } + if(optr->type == TMWILSON || optr->type == WILSON) { if(optr->c_sw > 0) { init_sw_fields(); } optr->applyM = &M_full; optr->applyQ = &Q_full; + + if(optr->solver == EXACTDEFLATEDCG) { + if(optr->type == TMWILSON ) { + strcpy( (optr->deflator_params).type_name, "TMWILSON" ); + } else if( optr->type == WILSON) { + strcpy( (optr->deflator_params).type_name, "WILSON" ); + } + } + if(optr->even_odd_flag) { optr->applyMee = &Mee_psi; optr->applyMeeInv = &Mee_inv_psi; @@ -179,6 +226,15 @@ int init_operators() { optr->applyQsq = &Qtm_pm_psi; optr->applyMp = &Mtm_plus_psi; optr->applyMm = &Mtm_minus_psi; + + if(optr->solver == EXACTDEFLATEDCG) { + /* the the operators in the deflator */ + (optr->deflator_params).f = &Qtm_pm_psi; + (optr->deflator_params).f32 = &Qtm_pm_psi_32; + (optr->deflator_params).f_final = &Qtm_plus_psi; + (optr->deflator_params).f_initial = &Qtm_minus_psi; + } + } else { optr->applyQp = &Q_plus_psi; @@ -186,6 +242,14 @@ int init_operators() { optr->applyQsq = &Q_pm_psi; optr->applyMp = &D_psi; optr->applyMm = &M_minus_psi; + + if(optr->solver == EXACTDEFLATEDCG) { + /* the the operators in the deflator */ + (optr->deflator_params).f = &Q_pm_psi; + (optr->deflator_params).f32 = &Q_pm_psi_32; + (optr->deflator_params).f_final = &Q_plus_psi; + (optr->deflator_params).f_initial = &Q_minus_psi; + } } if(optr->solver == CGMMS) { if (g_cart_id == 0 && optr->even_odd_flag == 1) @@ -211,6 +275,11 @@ int init_operators() { } optr->applyM = &Msw_full; optr->applyQ = &Qsw_full; + + if(optr->solver == EXACTDEFLATEDCG) { + strcpy( (optr->deflator_params).type_name, "CLOVER" ); + } + if(optr->even_odd_flag) { optr->applyMee = &Mee_sw_psi; optr->applyMeeInv = &Mee_sw_inv_psi; @@ -219,6 +288,14 @@ int init_operators() { optr->applyQsq = &Qsw_pm_psi; optr->applyMp = &Msw_plus_psi; optr->applyMm = &Msw_minus_psi; + + if(optr->solver == EXACTDEFLATEDCG) { + /* the the operators in the deflator */ + (optr->deflator_params).f = &Qsw_pm_psi; + (optr->deflator_params).f32 = &Qsw_pm_psi_32; + (optr->deflator_params).f_final = &Qsw_plus_psi; + (optr->deflator_params).f_initial = &Qsw_minus_psi; + } } else { optr->applyQp = &Qsw_full_plus_psi; @@ -226,6 +303,14 @@ int init_operators() { optr->applyQsq = &Qsw_full_pm_psi; optr->applyMp = &D_psi; optr->applyMm = &Msw_full_minus_psi; + + if(optr->solver == EXACTDEFLATEDCG) { + /* the the operators in the deflator */ + (optr->deflator_params).f = Qsw_full_pm_psi; + (optr->deflator_params).f32 = NULL; + (optr->deflator_params).f_final = &Qsw_full_plus_psi; + (optr->deflator_params).f_initial = &Qsw_full_minus_psi; + } } if(optr->solver == CGMMS) { if (g_cart_id == 0 && optr->even_odd_flag == 1) From 443ee35c72aa306ff745244168530bd04c984e48 Mon Sep 17 00:00:00 2001 From: hch02q Date: Thu, 29 Dec 2016 18:17:29 +0100 Subject: [PATCH 7/8] added support for arpackcg, ARPACK-based exact deflation; calls to ARPACKCG and EXACTDEFLATEDCG must be added to invert_eo and invert_clover_eo --- include/tmLQCD.h | 9 + invert_eo.h | 1 + linalg/arpack.h | 74 ++++ linalg/assign.c | 162 ++++++++ linalg/assign.h | 6 + linalg/fortran.h | 6 + operator.c | 1 + operator.h | 1 + read_input.l | 2 +- solver/Makefile.in | 4 +- solver/arpack_cg.c | 733 ++++++++++++++++++++++++++++++++++++ solver/arpack_cg.h | 52 +++ solver/deflated_cg.h | 43 +++ solver/eigenvalues_arpack.c | 515 +++++++++++++++++++++++++ solver/eigenvalues_arpack.h | 74 ++++ solver/precon.c | 125 ++++++ solver/precon.h | 39 ++ solver/solver_params.h | 51 +++ wrapper/lib_wrapper.c | 5 + 19 files changed, 1901 insertions(+), 2 deletions(-) create mode 100644 linalg/arpack.h create mode 100644 solver/arpack_cg.c create mode 100644 solver/arpack_cg.h create mode 100644 solver/deflated_cg.h create mode 100644 solver/eigenvalues_arpack.c create mode 100644 solver/eigenvalues_arpack.h create mode 100644 solver/precon.c create mode 100644 solver/precon.h diff --git a/include/tmLQCD.h b/include/tmLQCD.h index 7c4dcd464..038fd9299 100755 --- a/include/tmLQCD.h +++ b/include/tmLQCD.h @@ -46,6 +46,15 @@ extern "C" #endif } tmLQCD_mpi_params; + typedef struct { + char type_name[100]; + int eoprec; + void *evecs; + void *evals; + int prec; + int nev; + } tmLQCD_deflator_params; + int tmLQCD_invert_init(int argc, char *argv[], const int verbose, const int external_id); int tmLQCD_read_gauge(const int nconfig); int tmLQCD_invert(double * const propagator, double * const source, diff --git a/invert_eo.h b/invert_eo.h index d64bb3c0e..bb3b87c34 100644 --- a/invert_eo.h +++ b/invert_eo.h @@ -28,6 +28,7 @@ #define _INVERT_EO_H #include "global.h" #include "solver/solver_params.h" +#include "solver/deflator.h" int invert_eo(spinor * const Even_new, spinor * const Odd_new, spinor * const Even, spinor * const Odd, diff --git a/linalg/arpack.h b/linalg/arpack.h new file mode 100644 index 000000000..a4d5c7197 --- /dev/null +++ b/linalg/arpack.h @@ -0,0 +1,74 @@ +/************************************************************************* + * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach + * + * This file is part of tmLQCD. + * + * tmLQCD is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * tmLQCD is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with tmLQCD. If not, see . + * + * + * Header file to interface with ARPACK package for solving large + * eigenvalue problems + * + * Author: Abdou M. Abdel-Rehim (amabdelrehim@gmail.com) 2014 + *************************************************************************/ + + +#ifndef _ARPACK_H +#define _ARPACK_H + +#include +#include "linalg/fortran.h" +#ifdef TM_USE_MPI +# include +#endif + + +//ARPACK initlog and finilog routines for printing the ARPACK log (same for serial and parallel version) +extern int _AFT(initlog) (int*, char*, int); +extern int _AFT(finilog) (int*); + + +//ARPACK driver routines for computing eigenvectors (serial version) +extern int _AFT(znaupd) (int *ido, char *bmat, int *n, char *which, int *nev, double *tol, + _Complex double *resid, int *ncv, _Complex double *v, int *ldv, + int *iparam, int *ipntr, _Complex double *workd, _Complex double *workl, + int *lworkl, double *rwork, int *info, int bmat_size, int which_size ); + +extern int _AFT(zneupd) (int *comp_evecs, char *howmany, int *select, _Complex double *evals, + _Complex double *v, int *ldv, _Complex double *sigma, _Complex double *workev, + char *bmat, int *n, char *which, int *nev, double *tol, _Complex double *resid, + int *ncv, _Complex double *v1, int *ldv1, int *iparam, int *ipntr, + _Complex double *workd, _Complex double *workl, int *lworkl, double *rwork, int *info, + int howmany_size, int bmat_size, int which_size); + +extern int _AFT(mcinitdebug)(int*,int*,int*,int*,int*,int*,int*,int*); + +//PARPACK routines (parallel version) +#ifdef TM_USE_MPI +extern int _AFT(pznaupd) (int *comm, int *ido, char *bmat, int *n, char *which, int *nev, double *tol, + _Complex double *resid, int *ncv, _Complex double *v, int *ldv, + int *iparam, int *ipntr, _Complex double *workd, _Complex double *workl, + int *lworkl, double *rwork, int *info, int bmat_size, int which_size ); + +extern int _AFT(pzneupd) (int *comm, int *comp_evecs, char *howmany, int *select, _Complex double *evals, + _Complex double *v, int *ldv, _Complex double *sigma, _Complex double *workev, + char *bmat, int *n, char *which, int *nev, double *tol, _Complex double *resid, + int *ncv, _Complex double *v1, int *ldv1, int *iparam, int *ipntr, + _Complex double *workd, _Complex double *workl, int *lworkl, double *rwork, int *info, + int howmany_size, int bmat_size, int which_size); + +extern int _AFT(pmcinitdebug)(int*,int*,int*,int*,int*,int*,int*,int*); +#endif + +#endif diff --git a/linalg/assign.c b/linalg/assign.c index 7f5a4cdef..01a799238 100644 --- a/linalg/assign.c +++ b/linalg/assign.c @@ -34,6 +34,7 @@ #include #include #include "su3.h" +#include "global.h" #include "assign.h" @@ -67,3 +68,164 @@ void assign_su3vect(su3_vector * const R, su3_vector * const S, const int N) } } #endif + + +/* copy a complex double S of size N into a spinor R of size N/12 */ +/* S input, R output */ +/* S and R must not overlap */ +void assign_complex_to_spinor(spinor * const R, _Complex double * const S, const int N) +{ + //check the dimension + if( (N%12) != 0){ + if(g_proc_id==g_stdio_proc) + fprintf(stderr,"Error in assign_double_to_spinor. N is not a multiple of 12"); + } + + int k; //spinor index + spinor *r; + _Complex double *s; + + k=0; + for(int ix=0; ixs0).c0 = *s; + (r->s0).c1 = *(s+1); + (r->s0).c2 = *(s+2); + + (r->s1).c0 = *(s+3); + (r->s1).c1 = *(s+4); + (r->s1).c2 = *(s+5); + + (r->s2).c0 = *(s+6); + (r->s2).c1 = *(s+7); + (r->s2).c2 = *(s+8); + + + (r->s3).c0 = *(s+9); + (r->s3).c1 = *(s+10); + (r->s3).c2 = *(s+11); + + k++; + } + + return; +} + + +void assign_complex_to_spinor_32(spinor32 * const R, _Complex float* const S, const int N) +{ + //check the dimension + if( (N%12) != 0){ + if(g_proc_id==g_stdio_proc) + fprintf(stderr,"Error in assign_double_to_spinor. N is not a multiple of 12"); + } + + int k; //spinor index + spinor32 *r; + _Complex float *s; + + k=0; + for(int ix=0; ixs0).c0 = *s; + (r->s0).c1 = *(s+1); + (r->s0).c2 = *(s+2); + + (r->s1).c0 = *(s+3); + (r->s1).c1 = *(s+4); + (r->s1).c2 = *(s+5); + + (r->s2).c0 = *(s+6); + (r->s2).c1 = *(s+7); + (r->s2).c2 = *(s+8); + + + (r->s3).c0 = *(s+9); + (r->s3).c1 = *(s+10); + (r->s3).c2 = *(s+11); + + k++; + } + + return; +} + +/* copy a spinor S of size N into a complex double R of size 12*N */ +/* S input, R output */ +/* S and R must not overlap */ +void assign_spinor_to_complex(_Complex double * const R, spinor * const S, const int N) +{ + + int k; //complex double index + _Complex double *r; + spinor *s; + + k=0; + for(int ix=0; ixs0).c0; + *(r+1) = (s->s0).c1; + *(r+2) = (s->s0).c2; + + *(r+3) = (s->s1).c0; + *(r+4) = (s->s1).c1; + *(r+5) = (s->s1).c2; + + *(r+6) = (s->s2).c0; + *(r+7) = (s->s2).c1; + *(r+8) = (s->s2).c2; + + *(r+9) = (s->s3).c0; + *(r+10) = (s->s3).c1; + *(r+11) = (s->s3).c2; + + k +=12; + } + + return; +} + + +void assign_spinor_to_complex_32(_Complex float * const R, spinor32 * const S, const int N) +{ + + int k; //complex double index + _Complex float *r; + spinor32 *s; + + k=0; + for(int ix=0; ixs0).c0; + *(r+1) = (s->s0).c1; + *(r+2) = (s->s0).c2; + + *(r+3) = (s->s1).c0; + *(r+4) = (s->s1).c1; + *(r+5) = (s->s1).c2; + + *(r+6) = (s->s2).c0; + *(r+7) = (s->s2).c1; + *(r+8) = (s->s2).c2; + + *(r+9) = (s->s3).c0; + *(r+10) = (s->s3).c1; + *(r+11) = (s->s3).c2; + + k +=12; + } + + return; +} diff --git a/linalg/assign.h b/linalg/assign.h index 302829aa6..00a3fceaf 100644 --- a/linalg/assign.h +++ b/linalg/assign.h @@ -27,4 +27,10 @@ void assign(spinor * const R, spinor * const S, const int N); void assign_32(spinor32 * const R, spinor32 * const S, const int N); void assign_su3vect(su3_vector * const R, su3_vector * const S, const int N); +void assign_complex_to_spinor(spinor * const R, _Complex double * const S, const int N); /* N is the size of S */ +void assign_spinor_to_complex(_Complex double * const R, spinor * const S, const int N); /* N is the size of S */ + +void assign_complex_to_spinor_32(spinor32 * const R, _Complex float* const S, const int N); +void assign_spinor_to_complex_32(_Complex float* const R, spinor32 * const S, const int N); /* N is the size of S */ + #endif diff --git a/linalg/fortran.h b/linalg/fortran.h index 95b8ccaf8..1ed1379cd 100644 --- a/linalg/fortran.h +++ b/linalg/fortran.h @@ -25,4 +25,10 @@ #define _FT(s) s ## _ #endif +#if (defined NOARPACKUNDERSCORE) +#define _AFT(s) s +#else +#define _AFT(s) s ## _ +#endif + #endif diff --git a/operator.c b/operator.c index d3f7683c6..f21d8fff3 100644 --- a/operator.c +++ b/operator.c @@ -48,6 +48,7 @@ #include "start.h" #include "solver/eigenvalues.h" #include "solver/solver.h" +#include "solver/deflator.h" #include #include #include diff --git a/operator.h b/operator.h index f5a3c3eb3..0074e95f6 100644 --- a/operator.h +++ b/operator.h @@ -25,6 +25,7 @@ #include "solver/dirac_operator_eigenvectors.h" #include "su3.h" #include "solver/solver_params.h" +#include "solver/deflator.h" #define TMWILSON 0 diff --git a/read_input.l b/read_input.l index febe30445..1f9d402a2 100644 --- a/read_input.l +++ b/read_input.l @@ -102,7 +102,7 @@ static inline void rmQuotes(char *str){ double c; float cs; int reread = 0; - char name[100]; + char name[100], fname[200]; char * type; int verbose = 0; diff --git a/solver/Makefile.in b/solver/Makefile.in index a2fd11132..debe8b4b5 100644 --- a/solver/Makefile.in +++ b/solver/Makefile.in @@ -36,8 +36,10 @@ libsolver_TARGETS = bicgstab_complex gmres incr_eigcg eigcg restart_X ortho \ cgne4complex mr4complex fgmres4complex \ quicksort gmres_dr lu_solve jdher Msap \ jdher_bi gram-schmidt eigenvalues_bi \ + eigenvalues_arpack arpack_cg \ + deflator deflated_cg \ bicgstab_complex_bi cg_her_bi pcg_her \ - sub_low_ev cg_her_nd poly_precon \ + sub_low_ev cg_her_nd poly_precon precon \ generate_dfl_subspace dfl_projector \ cg_mms_tm cg_mms_tm_nd mixed_cg_mms_tm_nd \ solver_field sumr mixed_cg_her index_jd \ diff --git a/solver/arpack_cg.c b/solver/arpack_cg.c new file mode 100644 index 000000000..10cca6e13 --- /dev/null +++ b/solver/arpack_cg.c @@ -0,0 +1,733 @@ +/***************************************************************************** + * Copyright (C) 2014 Abdou M. Abdel-Rehim + * + * Deflating CG using eigenvectors computed using ARPACK + * eigenvectors used correspond to those with smallest magnitude + * + * This file is part of tmLQCD. + * + * tmLQCD is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * tmLQCD is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with tmLQCD. If not, see . + * + ****************************************************************************/ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include +#include +#include +#include + +#ifdef TM_USE_MPI +# include +#endif + +#include "global.h" +#include "gettime.h" +#include "linalg_eo.h" +#include "start.h" +#include "linalg/blas.h" +#include "linalg/lapack.h" +#include +#include +#include +#include +#include "solver_field.h" +#include "solver/arpack_cg.h" + +int arpack_cg( + /* solver params */ + const int N, /* (IN) Number of lattice sites for this process*/ + solver_params_t solver_params, /* (IN) parameters for solver */ + spinor * const x, /* (IN/OUT) initial guess on input, solution on output for this RHS*/ + spinor * const b, /* (IN) right-hand side*/ + matrix_mult f, /* (IN) f(s,r) computes s=A*r, i.e. matrix-vector multiply in double precision */ + matrix_mult f32, /* (IN) f(s,r) computes s=A*r, i.e. matrix-vector multiply in single precision */ + const double eps_sq, /* (IN) squared tolerance of convergence of the linear system for systems nrhs1+1 till nrhs*/ + const int rel_prec, /* (IN) 0 for using absoute error for convergence + 1 for using relative error for convergence*/ + const int maxit, /* (IN) Maximum allowed number of iterations to solution for the linear system*/ + matrix_mult f_final, /* (IN) final operator application during projection of type 1 */ + matrix_mult f_initial /* (IN) initial operator application during projection of type 1 */ +) { + + /* Static variables and arrays. */ + static int ncurRHS=0; /* current number of the system being solved */ + static void *_ax,*_r,*_tmps1,*_tmps2; + static spinor *ax,*r,*tmps1,*tmps2; + static _Complex double *evecs,*evals,*H,*HU,*Hinv,*initwork,*tmpv1; + static _Complex double *zheev_work; + static double *hevals,*zheev_rwork; + static int *IPIV; + static int info_arpack=0; + static int nconv=0; /* number of converged eigenvectors as returned by arpack */ + int i,j,tmpsize; + char cV='V',cN='N', cU='U'; + int ONE=1; + int zheev_lwork,zheev_info; + _Complex double c1, c2, c3, tpone=1.0,tzero=0.0; + double d1,d2,d3; + double et1,et2; /* timing variables */ + char evecs_filename[500]; + FILE *evecs_fs=NULL; + size_t evecs_count; + WRITER *evecs_writer=NULL; + spinor *evecs_ptr0 = NULL, *evecs_ptr1 = NULL; + paramsPropagatorFormat *evecs_propagatorFormat = NULL; + void *evecs_io_buffer = NULL; + + int parallel; /* for parallel processing of the scalar products */ +#ifdef TM_USE_MPI + parallel=1; +#else + parallel=0; +#endif + + /* leading dimension for spinor vectors */ + int LDN; + if(N==VOLUME) + LDN = VOLUMEPLUSRAND; + else + LDN = VOLUMEPLUSRAND/2; + + /*(IN) Number of right-hand sides to be solved*/ + const int nrhs = solver_params.arpackcg_nrhs; + /*(IN) First number of right-hand sides to be solved using tolerance eps_sq1*/ + const int nrhs1 = solver_params.arpackcg_nrhs1; + /*(IN) squared tolerance of convergence of the linear system for systems 1 till nrhs1*/ + const double eps_sq1 = solver_params.arpackcg_eps_sq1; + /*(IN) suqared tolerance for restarting cg */ + const double res_eps_sq = solver_params.arpackcg_res_eps_sq; + + /* parameters for arpack */ + + /*(IN) number of eigenvectors to be computed by arpack*/ + const int nev = solver_params.arpackcg_nev; + /*(IN) size of the subspace used by arpack with the condition (nev+1) =< ncv*/ + const int ncv = solver_params.arpackcg_ncv; + /*(IN) tolerance for computing eigenvalues with arpack */ + double arpack_eig_tol = solver_params.arpackcg_eig_tol; + /*(IN) maximum number of iterations to be used by arpack*/ + int arpack_eig_maxiter = solver_params.arpackcg_eig_maxiter; + /*(IN) 0 for eigenvalues with smallest real part "SR" + 1 for eigenvalues with largest real part "LR" + 2 for eigenvalues with smallest absolute value "SM" + 3 for eigenvalues with largest absolute value "LM" + 4 for eigenvalues with smallest imaginary part "SI" + 5 for eigenvalues with largest imaginary part "LI"*/ + int kind = solver_params.arpackcg_evals_kind; + /*(IN) 0 don't compute the eiegnvalues and their residuals of the original system + 1 compute the eigenvalues and the residuals for the original system (the orthonormal basis + still be used in deflation and they are not overwritten).*/ + int comp_evecs = solver_params.arpackcg_comp_evecs; + /*(IN) 0 no polynomial acceleration; 1 use polynomial acceleration*/ + int acc = solver_params.use_acc; + /*(IN) degree of the Chebyshev polynomial (irrelevant if acc=0)*/ + int cheb_k = solver_params.cheb_k; + /*(IN) lower end of the interval where the acceleration will be used (irrelevant if acc=0)*/ + double emin = solver_params.op_evmin; + /*(IN) upper end of the interval where the acceleration will be used (irrelevant if acc=0)*/ + double emax = solver_params.op_evmax; + /*(IN) file name to be used for printing out debugging information from arpack*/ + char *arpack_logfile = solver_params.arpack_logfile; + /*(IN) read eigenvectors in Schur basis from file */ + int arpack_read_ev = solver_params.arpackcg_read_ev; + /*(IN) write eigenvectors in Schur basis to file */ + int arpack_write_ev = solver_params.arpackcg_write_ev; + /*(IN) file name to be used for reading and writing evecs from and to disc */ + char *arpack_evecs_filename = solver_params.arpack_evecs_filename; + /*(IN) precision used for writing eigenvectors */ + int arpack_evecs_writeprec = solver_params.arpack_evecs_writeprec; + /* how to project with approximate eigenvectors */ + int projection_type = solver_params.projection_type; + /* file format for evecs used by arpack */ + char *arpack_evecs_fileformat = solver_params.arpack_evecs_fileformat; + + /*------------------------------------------------------------- + if this is the first right hand side, allocate memory, + call arpack, and compute resiudals of eigenvectors if needed + -------------------------------------------------------------*/ + if(ncurRHS==0){ +#if (defined SSE || defined SSE2 || defined SSE3) + _ax = malloc((LDN+ALIGN_BASE)*sizeof(spinor)); + if(_ax==NULL) + { + if(g_proc_id == g_stdio_proc) + fprintf(stderr,"[arpack_cg] insufficient memory for _ax inside arpack_cg.\n"); + exit(1); + } + else + {ax = (spinor *) ( ((unsigned long int)(_ax)+ALIGN_BASE)&~ALIGN_BASE);} + + _r = malloc((LDN+ALIGN_BASE)*sizeof(spinor)); + if(_r==NULL) + { + if(g_proc_id == g_stdio_proc) + fprintf(stderr,"[arpack_cg] insufficient memory for _r inside arpack_cg.\n"); + exit(1); + } + else + {r = (spinor *) ( ((unsigned long int)(_r)+ALIGN_BASE)&~ALIGN_BASE);} + + _tmps1 = malloc((LDN+ALIGN_BASE)*sizeof(spinor)); + if(_tmps1==NULL) + { + if(g_proc_id == g_stdio_proc) + fprintf(stderr,"[arpack_cg] insufficient memory for _tmps1 inside arpack_cg.\n"); + exit(1); + } + else + {tmps1 = (spinor *) ( ((unsigned long int)(_tmps1)+ALIGN_BASE)&~ALIGN_BASE);} + + _tmps2 = malloc((LDN+ALIGN_BASE)*sizeof(spinor)); + if(_tmps2==NULL) + { + if(g_proc_id == g_stdio_proc) + fprintf(stderr,"[arpack_cg] insufficient memory for _tmps2 inside arpack_cg.\n"); + exit(1); + } + else + {tmps2 = (spinor *) ( ((unsigned long int)(_tmps2)+ALIGN_BASE)&~ALIGN_BASE);} + +#else + ax = (spinor *) malloc(LDN*sizeof(spinor)); + r = (spinor *) malloc(LDN*sizeof(spinor)); + tmps1 = (spinor *) malloc(LDN*sizeof(spinor)); + tmps2 = (spinor *) malloc(LDN*sizeof(spinor)); + + if( (ax == NULL) || (r==NULL) || (tmps1==NULL) || (tmps2==NULL) ) + { + if(g_proc_id == g_stdio_proc) + fprintf(stderr,"[arpack_cg] insufficient memory for ax,r,tmps1,tmps2 inside arpack_cg.\n"); + exit(1); + } +#endif + + + evecs = (_Complex double *) malloc(ncv*12*N*sizeof(_Complex double)); /* note: no extra buffer */ + evals = (_Complex double *) malloc(ncv*sizeof(_Complex double)); + tmpv1 = (_Complex double *) malloc(12*N*sizeof(_Complex double)); + + if((evecs == NULL) || (evals==NULL) || (tmpv1==NULL)) + { + if(g_proc_id == g_stdio_proc) + fprintf(stderr,"[arpack_cg] insufficient memory for evecs and evals inside arpack_cg.\n"); + exit(1); + } + + if ( arpack_read_ev == 1) { + + if (strcmp(arpack_evecs_fileformat, "partfile") == 0) { + /* set evec filenmae */ + sprintf(evecs_filename, "%s.%.5d.pt%.2dpx%.2dpy%.2dpz%.2d", arpack_evecs_filename, nev, g_proc_coords[0], g_proc_coords[1], g_proc_coords[2], g_proc_coords[3]); + evecs_fs = fopen(evecs_filename, "r"); + if (evecs_fs == NULL) { + fprintf(stderr, "[arpack_cg] (%.4d) Error, could not open file %s for reading\n", g_cart_id, evecs_filename); + return(-2); + } + fprintf(stdout, "# [arpack_cg] reading eigenvectors from file %s\n", evecs_filename); + + if(arpack_evecs_writeprec == 64) { + + evecs_io_buffer = (void*)evecs; + + et1=gettime(); + evecs_count = fread( evecs_io_buffer, sizeof(_Complex double), (size_t)nev*12*N, evecs_fs); + et2=gettime(); + + } else { + evecs_io_buffer = malloc(sizeof(_Complex double) * (size_t)nev*12*N ); + if( evecs_io_buffer == NULL) { + fprintf(stderr, "[arpack_cg] (%.4d) Error, could not allocate memory for evecs_io_buffer\n", g_cart_id); + return(-42); + } + + et1=gettime(); + evecs_count = fread( evecs_io_buffer, sizeof(_Complex double)/2, (size_t)nev*12*N, evecs_fs); + et2=gettime(); + + single2double(evecs, evecs_io_buffer, nev*24*N); + + free( evecs_io_buffer ); + evecs_io_buffer = NULL; + } + + if( evecs_count != ((size_t)nev*12*N) ) { + fprintf(stderr, "[arpack_cg] (%.4d) Error, could not proper amount of data from file %s\n", g_cart_id, evecs_filename); + return(-3); + } + fclose(evecs_fs); + evecs_fs = NULL; + if(g_proc_id == g_stdio_proc) { + fprintf(stdout,"# [arpack_cg] ARPACK time for reading %d eigenvectors: %+e seconds\n", nev, et2-et1); + } + } else if(strcmp(arpack_evecs_fileformat, "single") == 0) { + + if(N==VOLUME) { + for(i=0; i0) && (comp_evecs !=0)) + { + /* copy H into HU */ + tmpsize=nconv*nconv; + _FT(zcopy)(&tmpsize,H,&ONE,HU,&ONE); + + /* compute eigenvalues and eigenvectors of HU*/ + /* SUBROUTINE ZHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK,INFO ) */ + _FT(zheev)(&cV,&cU,&nconv,HU,&nconv,hevals,zheev_work,&zheev_lwork,zheev_rwork,&zheev_info,1,1); + + if(zheev_info != 0) + { + if(g_proc_id == g_stdio_proc) + { + fprintf(stderr,"[arpack_cg] Error in ZHEEV:, info = %d\n",zheev_info); + fflush(stderr); + } + exit(1); + } + + /* If you want to replace the schur (orthonormal) basis by eigen basis + use something like this. It is better to use the schur basis because + they are better conditioned. Use this part only to get the eigenvalues + and their resduals for the operator (D^\daggerD) + esize=(ncv-nconv)*12*N; + Zrestart_X(evecs,12*N,HU,12*N,nconv,nconv,&evecs[nconv*N],esize); */ + + /* compute residuals and print out results */ + + if(g_proc_id == g_stdio_proc) + {fprintf(stdout,"# [arpack_cg] Ritz values of A and their residulas (||A*x-lambda*x||/||x||\n"); + fprintf(stdout,"# [arpack_cg] =============================================================\n"); + fflush(stdout);} + + for(i=0; i0) && (comp_evecs !=0)) */ + et2=gettime(); + if(g_proc_id == g_stdio_proc) { + fprintf(stdout,"[arpack_cg] time to compute eigenvectors: %+e\n",et2-et1); + } + + } /* if(ncurRHS==0) */ + + double eps_sq_used,restart_eps_sq_used; /* tolerance squared for the linear system */ + + double cur_res; /* current residual squared */ + + /*increment the RHS counter*/ + ncurRHS = ncurRHS +1; + + /* set the tolerance to be used for this right-hand side */ + if(ncurRHS > nrhs1){ + eps_sq_used = eps_sq; + } + else{ + eps_sq_used = eps_sq1; + } + + if(g_proc_id == g_stdio_proc && g_debug_level > 0) { + fprintf(stdout, "# [arpack_cg] System %d, eps_sq %e, projection type %d\n",ncurRHS,eps_sq_used, projection_type); + fflush(stdout); + } + + /*---------------------------------------------------------------*/ + /* Call init-CG until this right-hand side converges */ + /*---------------------------------------------------------------*/ + double wt1,wt2,wE,wI; + double normsq,tol_sq; + int flag,maxit_remain,numIts,its; + int info_lapack; + + wE = 0.0; wI = 0.0; /* Start accumulator timers */ + flag = -1; /* System has not converged yet */ + maxit_remain = maxit; /* Initialize Max and current # of iters */ + numIts = 0; + restart_eps_sq_used=res_eps_sq; + + while( flag == -1 ) + { + + if(nconv > 0) + { + + + /* --------------------------------------------------------- */ + /* Perform init-CG with evecs vectors */ + /* xinit = xinit + evecs*Hinv*evec'*(b-Ax0) */ + /* --------------------------------------------------------- */ + wt1 = gettime(); + + /*r0=b-Ax0*/ + f(ax,x); /*ax = A*x */ + diff(r,b,ax,N); /* r=b-A*x */ + + if( projection_type == 0) { + + /* x = x + evecs*inv(H)*evecs'*r */ + for(int i=0; i < nconv; i++) + { + assign_complex_to_spinor(tmps1,&evecs[i*12*N],12*N); + initwork[i]= scalar_prod(tmps1,r,N,parallel); + } + + /* solve the linear system H y = c */ + tmpsize=nconv*nconv; + _FT(zcopy) (&tmpsize,H,&ONE,Hinv,&ONE); /* copy H into Hinv */ + /* SUBROUTINE ZGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) */ + _FT(zgesv) (&nconv,&ONE,Hinv,&nconv,IPIV,initwork,&nconv,&info_lapack); + + if(info_lapack != 0) + { + if(g_proc_id == g_stdio_proc) { + fprintf(stderr, "[arpack_cg] Error in ZGESV:, info = %d\n",info_lapack); + fflush(stderr); + } + exit(1); + } + + /* x = x + evecs*inv(H)*evecs'*r */ + for(i=0; i 0) */ + + + /* which tolerance to use */ + if(eps_sq_used > restart_eps_sq_used) + { + tol_sq = eps_sq_used; + flag = 1; /* shouldn't restart again */ + } + else + { + tol_sq = restart_eps_sq_used; + } + + wt1 = gettime(); + its = cg_her(x,b,maxit_remain,tol_sq,rel_prec,N,f); + + wt2 = gettime(); + + wE = wE + wt2-wt1; + + /* check convergence */ + if(its == -1) + { + /* cg didn't converge */ + if(g_proc_id == g_stdio_proc) { + fprintf(stderr, "[arpack_cg] CG didn't converge within the maximum number of iterations in arpack_cg. Exiting...\n"); + fflush(stderr); + exit(1); + + } + } + else + { + numIts += its; + maxit_remain = maxit - numIts; /* remaining number of iterations */ + restart_eps_sq_used = restart_eps_sq_used*res_eps_sq; /* prepare for the next restart */ + } + + } + /* end while (flag ==-1) */ + + /* ---------- */ + /* Reporting */ + /* ---------- */ + /* compute the exact residual */ + f(ax,x); /* ax= A*x */ + diff(r,b,ax,N); /* r=b-A*x */ + normsq=square_norm(r,N,parallel); + if(g_debug_level > 0 && g_proc_id == g_stdio_proc) + { + fprintf(stdout, "# [arpack_cg] For this rhs:\n"); + fprintf(stdout, "# [arpack_cg] Total initCG Wallclock : %+e\n", wI); + fprintf(stdout, "# [arpack_cg] Total cg Wallclock : %+e\n", wE); + fprintf(stdout, "# [arpack_cg] Iterations: %-d\n", numIts); + fprintf(stdout, "# [arpack_cg] Actual Resid of LinSys : %+e\n",normsq); + } + + + /* free memory if this was your last system to solve */ + if(ncurRHS == nrhs){ +#if ( (defined SSE) || (defined SSE2) || (defined SSE3)) + free(_ax); free(_r); free(_tmps1); free(_tmps2); +#else + free(ax); free(r); free(tmps1); free(tmps2); +#endif + free(evecs); free(evals); free(H); free(HU); free(Hinv); + free(initwork); free(tmpv1); free(zheev_work); + free(hevals); free(zheev_rwork); free(IPIV); + } + + + return numIts; +} + + + + diff --git a/solver/arpack_cg.h b/solver/arpack_cg.h new file mode 100644 index 000000000..b683b268a --- /dev/null +++ b/solver/arpack_cg.h @@ -0,0 +1,52 @@ +/***************************************************************************** + * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach + * + * Deflating CG using eigenvectors computed using ARPACK + * + * Author: A.M. Abdel-Rehim (amabdelrehim@gmail.com) + * Novemebr 2014 + * + * This file is part of tmLQCD. + * + * tmLQCD is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * tmLQCD is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with tmLQCD. If not, see . + * + ****************************************************************************/ +/* A sample input is given in the sample-input folder */ + +#ifndef _ARPACK_CG_H +#define _ARPACK_CG_H + +#include "su3.h" +#include "solver/matrix_mult_typedef.h" +#include "solver/solver_params.h" +#include "solver/eigenvalues_arpack.h" + + +int arpack_cg( + /* solver params */ + const int N, /* (IN) Number of lattice sites for this process*/ + solver_params_t solver_params, /* (IN) parameters for solver */ + spinor * const x, /* (IN/OUT) initial guess on input, solution on output for this RHS*/ + spinor * const b, /* (IN) right-hand side*/ + matrix_mult f, /* (IN) f(s,r) computes s=A*r, i.e. matrix-vector multiply in double precision */ + matrix_mult f32, /* (IN) f(s,r) computes s=A*r, i.e. matrix-vector multiply in single precision */ + const double eps_sq, /* (IN) squared tolerance of convergence of the linear system for systems nrhs1+1 till nrhs*/ + const int rel_prec, /* (IN) 0 for using absoute error for convergence + 1 for using relative error for convergence*/ + const int maxit, /* (IN) Maximum allowed number of iterations to solution for the linear system*/ + matrix_mult f_final, /* (IN) final operator application during projection of type 1 */ + matrix_mult f_initial /* (IN) initial operator application during projection of type 1 */ +); + +#endif diff --git a/solver/deflated_cg.h b/solver/deflated_cg.h new file mode 100644 index 000000000..41d240137 --- /dev/null +++ b/solver/deflated_cg.h @@ -0,0 +1,43 @@ +/***************************************************************************** + * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach + * + * Deflating CG using eigenvectors computed using ARPACK + * + * Author: A.M. Abdel-Rehim (amabdelrehim@gmail.com) + * Novemebr 2014 + * + * This file is part of tmLQCD. + * + * tmLQCD is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * tmLQCD is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with tmLQCD. If not, see . + * + ****************************************************************************/ +/* A sample input is given in the sample-input folder */ + +#ifndef _DEFLATED_CG_H +#define _DEFLATED_CG_H + +#include "su3.h" +/* #include "solver/matrix_mult_typedef.h" */ +#include "solver/solver_params.h" +#include "solver/deflator.h" + + +int exactdeflated_cg( + solver_params_t solver_params, /* (IN) parameters for solver */ + deflator_params_t deflator_params, /* (IN) parameters for deflator */ + spinor * const x, /* (IN/OUT) initial guess on input, solution on output for this RHS*/ + spinor * const b /* (IN) right-hand side*/ +); + +#endif diff --git a/solver/eigenvalues_arpack.c b/solver/eigenvalues_arpack.c new file mode 100644 index 000000000..0ff771573 --- /dev/null +++ b/solver/eigenvalues_arpack.c @@ -0,0 +1,515 @@ +/*********************************************************************** + * Interface for solving the eigenvalue problem A*x=lambda*x + * for a complex matrix A using ARPACK and PARPACK. The matrix + * is accessed through a matrix-vector multiplication function. + * + * Author: A.M. Abdel-Rehim, 2014 + * + * For reference see the driver programs zndrv1 and in the EXAMPLES + * subdriectories of ARPACK and PARPACK. + * + * This file is part of tmLQCD software suite + ***********************************************************************/ +#ifdef HAVE_CONFIG_H +# include +#endif +#include +#include +#include +#include +#include +#ifdef TM_USE_MPI +# include +#endif +#include "global.h" +#include "linalg_eo.h" +#include "start.h" +#include "quicksort.h" +#include "linalg/blas.h" +#include "linalg/lapack.h" +#include "solver/eigenvalues_arpack.h" +void evals_arpack( + int n, + int nev, + int ncv, + int which, + int use_acc, + int cheb_k, + double amin, + double amax, + _Complex double *evals, + _Complex double *v, + double tol, + int maxiter, + matrix_mult av, + int *info, + int *nconv, + char *arpack_logfile) +/* + compute nev eigenvectors using ARPACK and PARPACK + n : (IN) size of the local lattice + nev : (IN) number of eigenvectors requested. + ncv : (IN) size of the subspace used to compute eigenvectors (nev+1) =< ncv < 12*n + where 12n is the size of the matrix under consideration + which : (IN) which eigenvectors to compute. Choices are: + 0: smallest real part "SR" + 1: largest real part "LR" + 2: smallest absolute value "SM" + 3: largest absolute value "LM" + 4: smallest imaginary part "SI" + 5: largest imaginary part "LI" + use_acc: (IN) specify the polynomial acceleration mode + 0 no acceleration + 1 use acceleration by computing the eigenvectors of a shifted-normalized chebyshev polynomial + cheb_k : (IN) degree of the chebyshev polynomial to be used for acceleration (irrelevant when use_acc=0 and init_resid_arpack=0) + amin,amax: (IN) bounds of the interval [amin,amax] for the acceleration polynomial (irrelevant when use_acc=0 and init_resid_arpack=0) + evals : (OUT) array of size nev+1 which has the computed nev Ritz values + v : computed eigenvectors. Size is n*ncv spinors. + tol : Requested tolerance for the accuracy of the computed eigenvectors. + A value of 0 means machine precision. + maxiter: maximum number of restarts (iterations) allowed to be used by ARPACK + av : operator for computing the action of the matrix on the vector + av(vout,vin) where vout is output spinor and vin is input spinors. + info : output from arpack. 0 means that it converged to the desired tolerance. + otherwise, an error message is printed to stderr + nconv : actual number of converged eigenvectors. + arpack_logfile: name of the logfile to be used by arpack +*/ +{ + + //print the input: + //================ + if(g_proc_id == g_stdio_proc) + { + fprintf(stdout,"# [eigenvalues_arpack] Input to eigenvalues_arpack\n"); + fprintf(stdout,"# [eigenvalues_arpack] ===========================\n"); + fprintf(stdout,"# [eigenvalues_arpack] n= %d\n", n); + fprintf(stdout,"# [eigenvalues_arpack] The number of Ritz values requested is %d\n", nev); + fprintf(stdout,"# [eigenvalues_arpack] The number of Arnoldi vectors generated is %d\n", ncv); + fprintf(stdout,"# [eigenvalues_arpack] What portion of the spectrum which: %d\n", which); + fprintf(stdout,"# [eigenvalues_arpack] polynomial acceleartion option %d\n", use_acc ); + fprintf(stdout,"# [eigenvalues_arpack] chebyshev polynomial paramaters: degree %d amin %+e amx %+e\n",cheb_k,amin,amax); + fprintf(stdout,"# [eigenvalues_arpack] The convergence criterion is %+e\n", tol); + fprintf(stdout,"# [eigenvalues_arpack] maximum number of iterations for arpack %d\n",maxiter); + } + + //create the MPI communicator +#ifdef TM_USE_MPI + MPI_Fint mpi_comm_f = MPI_Comm_c2f(g_cart_grid); + //MPI_Comm comm; //communicator used when we call PARPACK + //int comm_err = MPI_Comm_dup(MPI_COMM_WORLD,&comm); //duplicate the MPI_COMM_WORLD to create a communicator to be used with arpack + //if(comm_err != MPI_SUCCESS) { //error when trying to duplicate the communicator + // if(g_proc_id == g_stdio_proc){ + // fprintf(stderr,"[eigenvalues_arpack] MPI_Comm_dup return with an error. Exciting...\n"); + // exit(-1); + // } + //} +#endif + + int parallel; +#ifdef TM_USE_MPI + parallel=1; +#else + parallel=0; +#endif + + int ido=0; //control of the action taken by reverse communications + //set initially to zero + + char *bmat=strdup("I"); /* Specifies that the right hand side matrix + should be the identity matrix; this makes + the problem a standard eigenvalue problem. + */ + + //matrix dimensions + int ldv,N,LDV; + + if(n==VOLUME) //full + ldv= VOLUMEPLUSRAND; + else //even-odd + ldv= VOLUMEPLUSRAND/2; + + //dimesnions as complex variables + N =12*n; //dimension + LDV=12*ldv; //leading dimension (including communication buffers) + + + char *which_evals; + + if(which==0) + which_evals=strdup("SR"); + if(which==1) + which_evals=strdup("LR"); + if(which==2) + which_evals=strdup("SM"); + if(which==3) + which_evals=strdup("LM"); + if(which==4) + which_evals=strdup("SI"); + if(which==5) + which_evals=strdup("LI"); + + + //check + if (which_evals == NULL || + ( strcmp("SR", which_evals) + && strcmp("LR", which_evals) + && strcmp("SI", which_evals) + && strcmp("LI", which_evals) + && strcmp("SM", which_evals) + && strcmp("LM", which_evals))) + { + if(g_proc_id == g_stdio_proc) + {fprintf(stderr,"[eigenvalues_arpack] Error: invalid value for which_evals\n"); fflush(stderr); exit(1);} + } + + //check input + if(nev>=N){ + if(g_proc_id == g_stdio_proc) + {fprintf(stderr,"[eigenvalues_arpack] number of eigenvalues requested should be less than the size of the matrix.\n"); fflush(stderr); exit(1);} + } + + if(ncv < (nev+1)){ + if(g_proc_id == g_stdio_proc) + {fprintf(stderr,"[eigenvalues_arpack] search subspace must be larger than the number of requested eiegnvalues.\n"); fflush(stderr); exit(1);} + } + + _Complex double *resid = (_Complex double *) malloc(N*sizeof(_Complex double)); + if(resid == NULL){ + if(g_proc_id == g_stdio_proc) + { fprintf(stderr,"[eigenvalues_arpack] Error: not enough memory for resid in eigenvalues_arpack.\n"); fflush(stderr); exit(1);} + } + + + int *iparam = (int *) malloc(11*sizeof(int)); + if(iparam == NULL){ + if(g_proc_id == g_stdio_proc) + { fprintf(stderr,"[eigenvalues_arpack] Error: not enough memory for iparam in eigenvalues_arpack.\n"); fflush(stderr); exit(1);} + } + + + iparam[0]=1; //use exact shifts + + iparam[2]=maxiter; + + iparam[3]=1; + + iparam[6]=1; + + + int *ipntr = (int *) malloc(14*sizeof(int)); + + _Complex double *workd = (_Complex double *) malloc(3*N*sizeof(_Complex double)); + + int lworkl=(3*ncv*ncv+5*ncv)*2; //just allocate more space + + _Complex double *workl=(_Complex double *) malloc(lworkl*sizeof(_Complex double)); + + double *rwork = (double *) malloc(ncv*sizeof(double)); + + int rvec=1; //always call the subroutine that computes orthonormal bais for the eigenvectors + + char *howmany=strdup("P"); //always compute orthonormal basis + + int *select = (int *) malloc(ncv*sizeof(int)); //since all Ritz vectors or Schur vectors are computed no need to initialize this array + + _Complex double sigma; + + _Complex double *workev = (_Complex double *) malloc(2*ncv*sizeof(_Complex double)); + + double *sorted_evals = (double *) malloc(ncv*sizeof(double)); //will be used to sort the eigenvalues + int *sorted_evals_index = (int *) malloc(ncv*sizeof(int)); + + if((ipntr == NULL) || (workd==NULL) || (workl==NULL) || (rwork==NULL) || (select==NULL) || (workev==NULL) || (sorted_evals==NULL) || (sorted_evals_index==NULL)){ + if(g_proc_id == g_stdio_proc) + { fprintf(stderr,"[eigenvalues_arpack] Error: not enough memory for ipntr,workd,workl,rwork,select,workev,sorted_evals,sorted_evals_index in eigenvalues_arpack.\n"); fflush(stderr); exit(1);} + } + + double d1,d2,d3; + + + (*info) = 0; //means use a random starting vector with Arnoldi + + void *_x,*_ax,*_r,*_tmps1,*_tmps2; //spinors that might be needed + spinor *x,*ax,*r,*tmps1,*tmps2; //spinors that might be needed + + #if (defined SSE || defined SSE2 || defined SSE3) + _x = malloc((ldv+ALIGN_BASE)*sizeof(spinor)); + if(_x==NULL){ + if(g_proc_id == g_stdio_proc) + { fprintf(stderr,"[eigenvalues_arpack] Error: not enough memory for _x in eigenvalues_arpack.\n"); fflush(stderr); exit(1);} + } + else + x = (spinor *) ( ((unsigned long int)(_x)+ALIGN_BASE)&~ALIGN_BASE); + + + _ax = malloc((ldv+ALIGN_BASE)*sizeof(spinor)); + if(_ax==NULL){ + if(g_proc_id == g_stdio_proc) + { fprintf(stderr,"[eigenvalues_arpack] Error: not enough memory for _ax in eigenvalues_arpack.\n"); fflush(stderr); exit(1);} + } + else + ax = (spinor *) ( ((unsigned long int)(_ax)+ALIGN_BASE)&~ALIGN_BASE); + + + _tmps1 = malloc((ldv+ALIGN_BASE)*sizeof(spinor)); + if(_tmps1==NULL){ + if(g_proc_id == g_stdio_proc) + { fprintf(stderr,"[eigenvalues_arpack] Error: not enough memory for _tmps1 in eigenvalues_arpack.\n"); fflush(stderr); exit(1);} + } + else + tmps1 = (spinor *) ( ((unsigned long int)(_tmps1)+ALIGN_BASE)&~ALIGN_BASE); + + + _tmps2 = malloc((ldv+ALIGN_BASE)*sizeof(spinor)); + if(_tmps2==NULL){ + if(g_proc_id == g_stdio_proc) + { fprintf(stderr,"[eigenvalues_arpack] Error: not enough memory for _tmps2 in eigenvalues_arpack.\n"); fflush(stderr); exit(1);} + } + else + tmps2 = (spinor *) ( ((unsigned long int)(_tmps2)+ALIGN_BASE)&~ALIGN_BASE); + + + #else + + x = (spinor *) calloc(ldv,sizeof(spinor)); + ax = (spinor *) calloc(ldv,sizeof(spinor)); + tmps1 = (spinor *) calloc(ldv,sizeof(spinor)); + tmps2 = (spinor *) calloc(ldv,sizeof(spinor)); + if((x==NULL) || (ax==NULL) || (tmps1==NULL) || (tmps2==NULL)){ + if(g_proc_id == g_stdio_proc) + { fprintf(stderr,"[eigenvalues_arpack] Error: not enough memory in x, ax, tmps1, tmps2 in eigenvalues_arpack.\n"); fflush(stderr); exit(1);} + } + + #endif + + int i,j; + + /* Code added to print the log of ARPACK */ + //const char *arpack_logfile; + int arpack_log_u = 9999; + +#ifndef TM_USE_MPI + //sprintf(arpack_logfile,"ARPACK_output.log"); + if ( NULL != arpack_logfile ) { + /* correctness of this code depends on alignment in Fortran and C + being the same ; if you observe crashes, disable this part */ + _AFT(initlog)(&arpack_log_u, arpack_logfile, strlen(arpack_logfile)); + int msglvl0 = 0, + msglvl1 = 1, + msglvl2 = 2, + msglvl3 = 3; + _AFT(mcinitdebug)( + &arpack_log_u, /*logfil*/ + &msglvl3, /*mcaupd*/ + &msglvl3, /*mcaup2*/ + &msglvl0, /*mcaitr*/ + &msglvl3, /*mceigh*/ + &msglvl0, /*mcapps*/ + &msglvl0, /*mcgets*/ + &msglvl3 /*mceupd*/); + + fprintf(stdout,"# [arpack] *** ARPACK verbosity set to mcaup2=3 mcaupd=3 mceupd=3; \n" + "# [arpack] *** output is directed to '%s';\n" + "# [arpack] *** if you don't see output, your memory may be corrupted\n", + arpack_logfile); + } +#else + //if( g_proc_id == g_stdio_proc ){ + // sprintf(arpack_logfile,"ARPACK_output.log"); + //} + if ( NULL != arpack_logfile + && (g_proc_id == g_stdio_proc) ) { + /* correctness of this code depends on alignment in Fortran and C + being the same ; if you observe crashes, disable this part */ + _AFT(initlog)(&arpack_log_u, arpack_logfile, strlen(arpack_logfile)); + int msglvl0 = 0, + msglvl1 = 1, + msglvl2 = 2, + msglvl3 = 3; + _AFT(pmcinitdebug)( + &arpack_log_u, /*logfil*/ + &msglvl3, /*mcaupd*/ + &msglvl3, /*mcaup2*/ + &msglvl0, /*mcaitr*/ + &msglvl3, /*mceigh*/ + &msglvl0, /*mcapps*/ + &msglvl0, /*mcgets*/ + &msglvl3 /*mceupd*/); + + fprintf(stdout,"# [arpack] *** ARPACK verbosity set to mcaup2=3 mcaupd=3 mceupd=3; \n" + "# [arpack] *** output is directed to '%s';\n" + "# [arpack] *** if you don't see output, your memory may be corrupted\n", + arpack_logfile); + } +#endif + + + + + /* + M A I N L O O P (Reverse communication) + */ + + do + { + +#ifndef TM_USE_MPI + _AFT(znaupd)(&ido,"I", &N, which_evals, &nev, &tol,resid, &ncv, + v, &N, iparam, ipntr, workd, + workl, &lworkl,rwork,info,1,2); +#else + _AFT(pznaupd)(&mpi_comm_f, &ido,"I", &N, which_evals, &nev, &tol, resid, &ncv, + v, &N, iparam, ipntr, workd, + workl, &lworkl,rwork,info,1,2); +#endif + + if (ido == 99 || (*info) == 1) + break; + + if ((ido==-1)||(ido==1)){ + + assign_complex_to_spinor(x,workd+ipntr[0]-1,N); + if((use_acc==0)) + av(ax,x); + else + cheb_poly_op(ax,x,av,n,amin,amax,cheb_k,tmps1,tmps2); + + assign_spinor_to_complex(workd+ipntr[1]-1, ax, n); + } + } while (ido != 99); + +/* + Check for convergence +*/ + if ( (*info) < 0 ) + { + if(g_proc_id == g_stdio_proc){ + fprintf(stderr,"[eigenvalues_arpack] Error with _naupd, info = %d\n", *info); + fprintf(stderr,"[eigenvalues_arpack] Check the documentation of _naupd\n");} + } + else + { + (*nconv) = iparam[4]; + if(g_proc_id == g_stdio_proc){ + fprintf(stderr,"[eigenvalues_arpack] number of converged eigenvectors = %d\n", *nconv);} + + //compute eigenvectors +#ifndef TM_USE_MPI + _AFT(zneupd) (&rvec,"P", select,evals,v,&N,&sigma, + workev,"I",&N,which_evals,&nev,&tol,resid,&ncv, + v,&N,iparam,ipntr,workd,workl,&lworkl, + rwork,info,1,1,2); +#else + _AFT(pzneupd) (&mpi_comm_f,&rvec,"P", select,evals, v,&N,&sigma, + workev,"I",&N,which_evals,&nev,&tol, resid,&ncv, + v,&N,iparam,ipntr,workd,workl,&lworkl, + rwork,info,1,1,2); +#endif + + + if( (*info)!=0) + { + if(g_proc_id == g_stdio_proc){ + fprintf(stderr,"[eigenvalues_arpack] Error with _neupd, info = %d \n",(*info)); + fprintf(stderr,"[eigenvalues_arpack] Check the documentation of _neupd. \n");} + } + else //report eiegnvalues and their residuals + { + if(g_proc_id == g_stdio_proc){ + fprintf(stdout,"[eigenvalues_arpack] Ritz Values and their errors\n"); + fprintf(stdout,"[eigenvalues_arpack] ============================\n"); + } + + (*nconv) = iparam[4]; + for(j=0; j< (*nconv); j++) + { + /* print out the computed ritz values and their error estimates */ + if(g_proc_id == g_stdio_proc) + fprintf(stdout,"# [eigenvalues_arpack] RitzValue[%06d] %+e %+e error= %+e \n",j,creal(evals[j]),cimag(evals[j]),cabs(*(workl+ipntr[10]-1+j))); + sorted_evals_index[j] = j; + sorted_evals[j] = cabs(evals[j]); + } + + //SORT THE EIGENVALUES in ascending order based on their absolute value + quicksort((*nconv),sorted_evals,sorted_evals_index); + //Print sorted evals + if(g_proc_id == g_stdio_proc) + fprintf(stdout,"# [eigenvalues_arpack] Sorted eigenvalues based on their absolute values\n"); + + for(j=0; j< (*nconv); j++) + { + /* print out the computed ritz values and their error estimates */ + if(g_proc_id == g_stdio_proc) + fprintf(stdout,"# [eigenvalues_arpack] RitzValue[%06d] %+e %+e error= %+e \n",j,creal(evals[sorted_evals_index[j]]),cimag(evals[sorted_evals_index[j]]),cabs(*(workl+ipntr[10]-1+sorted_evals_index[j]))); + + } + + } + + /*Print additional convergence information.*/ + if( (*info)==1) + { + if(g_proc_id == g_stdio_proc) + fprintf(stderr,"[eigenvalues_arpack] Maximum number of iterations reached.\n"); + } + else + { + + if(g_proc_id == g_stdio_proc) + { + if((*info)==3) + { + fprintf(stderr,"[eigenvalues_arpack] No shifts could be applied during implicit\n"); + fprintf(stderr,"[eigenvalues_arpack] Arnoldi update, try increasing NCV.\n"); + } + + fprintf(stdout,"# [eigenvalues_arpack] _NDRV1\n"); + fprintf(stdout,"# [eigenvalues_arpack] =======\n"); + fprintf(stdout,"# [eigenvalues_arpack] Size of the matrix is %d\n", N); + fprintf(stdout,"# [eigenvalues_arpack] The number of Ritz values requested is %d\n", nev); + fprintf(stdout,"# [eigenvalues_arpack] The number of Arnoldi vectors generated is %d\n", ncv); + fprintf(stdout,"# [eigenvalues_arpack] What portion of the spectrum: %s\n", which_evals); + fprintf(stdout,"# [eigenvalues_arpack] The number of converged Ritz values is %d\n", (*nconv) ); + fprintf(stdout,"# [eigenvalues_arpack] The number of Implicit Arnoldi update iterations taken is %d\n", iparam[2]); + fprintf(stdout,"# [eigenvalues_arpack] The number of OP*x is %d\n", iparam[8]); + fprintf(stdout,"# [eigenvalues_arpack] The convergence criterion is %+e\n", tol); + } + + } + } //if(info < 0) else part + + +#ifndef TM_USE_MPI + if (NULL != arpack_logfile) + _AFT(finilog)(&arpack_log_u); +#else + if(g_proc_id == g_stdio_proc){ + if (NULL != arpack_logfile){ + _AFT(finilog)(&arpack_log_u); + } + } +#endif + + + //free memory + free(resid); + free(iparam); + free(ipntr); + free(workd); + //free(zv); + free(select); + free(workev); + #if ( (defined SSE) || (defined SSE2) || (defined SSE3) ) + free(_x); free(_ax); free(_tmps1); free(_tmps2); + #else + free(x); free(ax); free(tmps1); free(tmps2); + #endif + + return; +} + + + + + diff --git a/solver/eigenvalues_arpack.h b/solver/eigenvalues_arpack.h new file mode 100644 index 000000000..10c82b9dc --- /dev/null +++ b/solver/eigenvalues_arpack.h @@ -0,0 +1,74 @@ +/************************************************************************* + * Interface for solving the eigenvalue problem A*x=lambda*x for a complex + * matrix A using ARPACK. The matrix is accessed through a matrix-vector + * multiplication function. + * + * Author: A.M. Abdel-Rehim, 2014 + * + * For reference see the driver programs zndrv1 in the EXAMPLES + * subdriectories of ARPACK and PARPACK and the documentation. + * + * This file is part of tmLQCD software suite + ***********************************************************************/ + +#ifndef _EIGENVALUES_ARPACK +#define _EIGENVALUES_ARPACK + +#include "su3.h" +#include "solver/matrix_mult_typedef.h" +#include "linalg/arpack.h" +/* #include "memalloc.h" */ +#include "solver/precon.h" + +//evals_arpack(N,nev,ncv,kind,acc,cheb_k,emin,emax,evals,evecs,arpack_eig_tol,arpack_eig_maxiter,f,&info_arpack,&nconv,arpack_logfile); + +/*compute nev eigenvectors using ARPACK and PARPACK*/ +void evals_arpack( + int n, + int nev, + int ncv, + int which, + int use_acc, + int cheb_k, + double amin, + double amax, + _Complex double *evals, + _Complex double *v, + double tol, + int maxiter, + matrix_mult av, + int *info, + int *nconv, + char *arpack_logfile); +/* + compute nev eigenvectors using ARPACK and PARPACK + n : (IN) size of the local lattice + nev : (IN) number of eigenvectors requested. + ncv : (IN) size of the subspace used to compute eigenvectors (nev+1) =< ncv < 12*n + where 12n is the size of the matrix under consideration + which : (IN) which eigenvectors to compute. Choices are: + 0: smallest real part "SR" + 1: largest real part "LR" + 2: smallest absolute value "SM" + 3: largest absolute value "LM" + 4: smallest imaginary part "SI" + 5: largest imaginary part "LI" + use_acc: (IN) specify the polynomial acceleration mode + 0 no acceleration + 1 use acceleration by computing the eigenvectors of a shifted-normalized chebyshev polynomial + cheb_k : (IN) degree of the chebyshev polynomial to be used for acceleration (irrelevant when use_acc=0 and init_resid_arpack=0) + amin,amax: (IN) bounds of the interval [amin,amax] for the acceleration polynomial (irrelevant when use_acc=0 and init_resid_arpack=0) + evals : (OUT) array of size nev+1 which has the computed nev Ritz values + v : computed eigenvectors. Size is n*ncv spinors. + tol : Requested tolerance for the accuracy of the computed eigenvectors. + A value of 0 means machine precision. + maxiter: maximum number of restarts (iterations) allowed to be used by ARPACK + av : operator for computing the action of the matrix on the vector + av(vout,vin) where vout is output spinor and vin is input spinors. + info : output from arpack. 0 means that it converged to the desired tolerance. + otherwise, an error message is printed to stderr + nconv : actual number of converged eigenvectors. + arpack_logfile: name for the logfile to be used by arpack +*/ + +#endif diff --git a/solver/precon.c b/solver/precon.c new file mode 100644 index 000000000..2acc2e996 --- /dev/null +++ b/solver/precon.c @@ -0,0 +1,125 @@ +/*********************************************************************** + * Chebyshev polynomial preconditioning related functions. + * Note: there also other related functions in poly_precon.h files + * that was developed earlier by Carsten + * + * Author: A.M. Abdel-Rehim, 2014 + * + * This file is part of tmLQCD software suite + ***********************************************************************/ +#include +#include "global.h" +#include "solver/precon.h" + + +void cheb_poly_op(spinor * const R, spinor * const S, matrix_mult f, const int N, const double a, const double b, const int k, spinor *v1, spinor *v2) +{ + + double delta,theta; + double sigma,sigma1,sigma_old; + double d1,d2,d3; + + + if(k < 0){ //check the order of the requested polynomial + if(g_proc_id == g_stdio_proc) + fprintf(stderr,"Error: lowest allowed order of the Chebyshev polynomial is 0.\n"); + exit(1); + } + + delta = (b-a)/2.0; + theta = (b+a)/2.0; + + sigma1 = -delta/theta; + + //T_0(Q)=1 + assign(R,S,N); + if(k== 0){ + return; + } + + + //T_1(Q) = [2/(b-a)*Q - (b+a)/(b-a)]*sigma_1 + d1 = sigma1/delta; + d2 = 1.0; + f(R,S); //R=Q(S) + assign_mul_add_mul_r(R,S,d1,d2,N); + if(k==1){ + return; + } + + //degree >=2 + //========== + + //T_0 = S + //T_1 = R + + + int LDN; + if(N==VOLUME) + LDN = VOLUMEPLUSRAND; + else + LDN = VOLUMEPLUSRAND/2; + + assign(v1,S,N); + assign(v2,R,N); + + sigma_old = sigma1; + + for(int i=2; i <= k; i++) + { + sigma = 1.0/(2.0/sigma1-sigma_old); + + d1 = 2.0*sigma/delta; + d2 = -d1*theta; + d3 = -sigma*sigma_old; + + f(R,v2); + assign_mul_add_mul_add_mul_r(R,v2,v1,d1,d2,d3,N); + + assign(v1,v2,N); + assign(v2,R,N); + + sigma_old = sigma; + } + + return; + +} + +void cheb_poly_roots(_Complex double *roots, const int k, const double a, const double b) +/* +roots of the shifted Chebyshev polynomial in the interval [a,b] +The roots of C_k(d(x)) and are given by +x_l = (b-a)/2*[cos(pi/2 (2*l-1)/k)+(b+a)/(b-a)] +*/ +{ + double PI=3.141592653589793; + + double d1,d2,d3; + + d1=0.5*(b+a); + d2=0.5*(b-a); + d3=PI/(double)k; + + if(k < 1){ //check the order of the requested polynomial + if(g_proc_id == g_stdio_proc) + fprintf(stderr,"Error: lowest allowed order for roots of Chebyshev polynomial is 1.\n"); + exit(1); + } + + + + + int i; + + for(i=1; i<=k; i++) + roots[i-1] = d1+d2*cos(d3*(i-0.5)); + + + return ; +} + + + + + diff --git a/solver/precon.h b/solver/precon.h new file mode 100644 index 000000000..5167d23a6 --- /dev/null +++ b/solver/precon.h @@ -0,0 +1,39 @@ +/*********************************************************************** + * preconditioning related functions. + * Note: there exist already polynomial preconditioning operators + * related to the chebychev polynomials in poly_precon.h files + * that was developed earlier by Carsten + * + * Author: A.M. Abdel-Rehim, 2014 + * + * This file is part of tmLQCD software suite + ***********************************************************************/ + +#ifndef _PRECON_H +#define _PRECON_H + +#include "su3.h" +#include "solver/matrix_mult_typedef.h" +#include "linalg_eo.h" +/* #include "memalloc.h" */ +#include "start.h" + +void cheb_poly_op(spinor * const R, spinor * const S, matrix_mult f, const int N, const double a, const double b, const int k, spinor *v1, spinor *v2); +/* + Chebyshev polynomial of the oprator f in the interval [a,b] normalized to 1 at 0. + R: output spinor + S: input spinor + f: matrix-vector multiplication for the operator + a,b: limits of the interval with b>a + k: degree of the polynomial + v1 and v2 are spinors needed to be used in the recurrence relation +*/ + + +void cheb_poly_roots(_Complex double *roots, const int k, const double a, const double b); +/* +roots of the shifted Chebyshev polynomial of degree k in the interval [a,b] +*/ + + +#endif diff --git a/solver/solver_params.h b/solver/solver_params.h index c51207445..e94a7bac9 100644 --- a/solver/solver_params.h +++ b/solver/solver_params.h @@ -32,6 +32,14 @@ typedef struct { + int maxiter; + int rel_prec; + int nrhs; + int nrhs1; + double eps_sq; + double eps_sq1; + double res_eps_sq; + /******************************** * Incremental EigCG parameters ********************************/ @@ -56,6 +64,49 @@ typedef struct { where the maximum is over the iterated residuals since the last update */ float mcg_delta; + /********************************** + * arpack_cg parameters + **********************************/ + + int arpackcg_nrhs; /*Number of right-hand sides to be solved*/ + int arpackcg_nrhs1; /*First number of right-hand sides to be solved using tolerance eps_sq1*/ + double arpackcg_eps_sq1; /*Squared tolerance of convergence of the linear system for systems 1 till nrhs1*/ + double arpackcg_eps_sq; /*Squared tolerance of convergence of the linear system for systems nrhs1+1 till nrhs*/ + double arpackcg_res_eps_sq; /*Suqared tolerance for restarting cg */ + int arpackcg_nev; /*Number of eigenvectors to be computed by arpack*/ + int arpackcg_ncv; /*Size of the subspace used by arpack with the condition (nev+1) =< ncv */ + int arpackcg_evals_kind; /* type of eigenvalues to be computed + 0 eigenvalues of smallest real part + 1 eigenvalues of largest real part + 2 eigenvalues of smallest absolute value + 3 eigenvalues of largest absolute value*/ + int arpackcg_comp_evecs; /* 0 don't compute the resiudals of the eigenvalues + 1 compute the residulas of the eigenvalues*/ + double arpackcg_eig_tol; /*Tolerance for computing eigenvalues with arpack */ + int arpackcg_eig_maxiter; /*Maximum number of iterations to be used by arpack*/ + char arpack_logfile[500]; /*file name for the logfile used by arpack*/ + + /************************************* + *chebychev polynomila preconditioner + *for CG related paprameters + *************************************/ + int use_acc; /* type of acceleration to be used: */ + /* 0 no acceleration, 1 compute eiegnvectors of the acceleration polynomial, */ + int cheb_k; /* order of the polynomial used is k+1 and the lowest value is k=-1 which correspond to T_0 */ + double op_evmin; /* lowest boundary of the interval for the polynomial acceleration */ + double op_evmax; /* highest boundary for the interval for the polynomial acceleration */ + + /************************************* + * parameters to control of I/O for + * eigenvectors + *************************************/ + int arpackcg_write_ev; + int arpackcg_read_ev; + char arpack_evecs_filename[500]; /* file name for the evecs used by arpack*/ + char arpack_evecs_fileformat[200]; /* file format for evecs used by arpack */ + int projection_type; + + int arpack_evecs_writeprec; } solver_params_t; diff --git a/wrapper/lib_wrapper.c b/wrapper/lib_wrapper.c index 73dd86138..caa6713ee 100644 --- a/wrapper/lib_wrapper.c +++ b/wrapper/lib_wrapper.c @@ -55,8 +55,13 @@ #include "invert_eo.h" #include "start.h" #include "operator.h" +#include "operator/clovertm_operators.h" +#include "operator/clovertm_operators_32.h" +#include "operator/clover_leaf.h" +#include "solver/solver_types.h" #include "measure_gauge_action.h" #include "linalg/convert_eo_to_lexic.h" +#include "operator/Hopping_Matrix.h" #include "include/tmLQCD.h" #include "fatal_error.h" From 6886593688d3907337d15a9037800b3b13192da8 Mon Sep 17 00:00:00 2001 From: Marcus Petschlies Date: Thu, 12 Jan 2017 14:15:49 +0100 Subject: [PATCH 8/8] added duplication of g_cart_grid to tmLQCD_mpi_params.cart_grid --- wrapper/lib_wrapper.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/wrapper/lib_wrapper.c b/wrapper/lib_wrapper.c index caa6713ee..28caaba75 100644 --- a/wrapper/lib_wrapper.c +++ b/wrapper/lib_wrapper.c @@ -354,6 +354,9 @@ int tmLQCD_get_mpi_params(tmLQCD_mpi_params * params) { params->proc_coords[1] = g_proc_coords[1]; params->proc_coords[2] = g_proc_coords[2]; params->proc_coords[3] = g_proc_coords[3]; +#ifdef TM_USE_MPI + MPI_Comm_dup(g_cart_grid, &(params->cart_grid) ); +#endif return(0); }