From 98b85d85729425b40e43fe6544898529ccdbe367 Mon Sep 17 00:00:00 2001 From: Vaibhav-Chemistry Date: Mon, 13 Oct 2025 15:41:41 -0400 Subject: [PATCH 1/4] CPU version to link to CPU ZEST --- CMakeLists.txt | 2 +- examples/CMakeLists.txt | 43 ++++++++++++++++++++++++++--------- include/cpu_util.h | 2 +- include/cuda_util.h | 6 ++++- include/cusp.h | 4 ++++ include/gauss.h | 2 ++ include/jellium_ints.h | 2 ++ include/scf_util.h | 2 ++ recipe.yaml | 2 +- src/integrals/CMakeLists.txt | 22 ++++++++++++++---- src/integrals/cpu_util.cpp | 20 ++++++++++++++++ src/integrals/gauss.cpp | 2 ++ src/integrals/integrals_d.cpp | 6 ++++- src/integrals/prosph.cpp | 17 ++++++++++---- src/integrals/reduce.cpp | 4 ++-- src/integrals/scf_util.cpp | 4 ++-- 16 files changed, 111 insertions(+), 29 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 29ec023..a9c6a04 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,7 +17,7 @@ project(SlaterGPU VERSION 0.1 DESCRIPTION "Integrals over Slater type orbitals with GPU Acceleration" LANGUAGES C CXX) -set(USE_ACC True) +set(USE_ACC False) set(USE_MPI True) set(USE_OMP True) set(RED_DOUBLE True) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 14be0eb..82b0f85 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -7,32 +7,54 @@ target_include_directories(sgpu.exe PRIVATE "${CMAKE_SOURCE_DIR}/src/libcintw" "${CMAKE_SOURCE_DIR}/include" ) + set(TEST_COMP_FLAGS ${CMAKE_CXX_FLAGS}) + +# CUDA/GPU dependencies - only when USE_ACC is enabled if(USE_ACC) + enable_language(CUDA) + find_package(CUDA REQUIRED) + find_library(CUDART_LIBRARY cudart ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) + target_link_options(sgpu.exe PRIVATE -acc=gpu ) set(TEST_COMP_FLAGS ${TEST_COMP_FLAGS} -DUSE_ACC=1 -acc=gpu ) + + # GPU-specific link libraries + set(GPU_LIBRARIES + "${CUDART_LIBRARY}" + "${CUDA_LIBRARIES}" + "${CUDA_TOOLKIT_ROOT_DIR}/../../math_libs/lib64/libcublas.so" + "${CUDA_TOOLKIT_ROOT_DIR}/../../math_libs/lib64/libcusolver.so" + ) +else() + # CPU-only mode + set(GPU_LIBRARIES "") + message(STATUS "Building CPU-only version (USE_ACC=OFF)") endif() #LAPACK cmake not right. #need to use this as a variable below find_package(LAPACK) - find_package(BLAS) -enable_language(CUDA) -find_package(CUDA REQUIRED) -find_library(CUDART_LIBRARY cudart ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) - -#enable_language(FORTRAN) -#find_library(FORTRANLIBS fortranlibs ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) set_property(TARGET sgpu.exe PROPERTY CXX_STANDARD 14) target_compile_options(sgpu.exe PRIVATE ${TEST_COMP_FLAGS} -O2) -#target_link_libraries(sgpu.exe PRIVATE SlaterGPU io "${CUDART_LIBRARY}" "${CUDA_LIBRARIES}" ) -target_link_libraries(sgpu.exe PRIVATE SlaterGPU io cintw "${FORTRANLIBS}" "${CUDART_LIBRARY}" "${CUDA_LIBRARIES}" "${CUDA_TOOLKIT_ROOT_DIR}/../../math_libs/lib64/libcublas.so" "${CUDA_TOOLKIT_ROOT_DIR}/../../math_libs/lib64/libcusolver.so" "-L/home/paulzim/integrate/gpu/lib/lapack-3.9.0/lib/ -llapack -lblas" "-L/export/apps/RockyOS8/nvidia_hpc/2024_249/Linux_x86_64/24.9/ -fortranlibs" ) + +# Link libraries - GPU_LIBRARIES will be empty for CPU-only builds +target_link_libraries(sgpu.exe PRIVATE + SlaterGPU + io + cintw + "${FORTRANLIBS}" + ${GPU_LIBRARIES} + "-L/home/paulzim/integrate/gpu/lib/lapack-3.9.0/lib/ -llapack -lblas" + "-L/export/apps/RockyOS8/nvidia_hpc/2024_249/Linux_x86_64/24.9/ -fortranlibs" + ) + file(COPY lih_VK1 DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) file(COPY lih_ri5 DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) file(COPY h2_ri5 DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) @@ -42,6 +64,5 @@ file(COPY noplus_ri5 DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) file(COPY ch4_ri5 DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) file(COPY run.sh DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) -# Add installation for the executable install(TARGETS sgpu.exe - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) \ No newline at end of file diff --git a/include/cpu_util.h b/include/cpu_util.h index 9a259d7..0fe6f21 100644 --- a/include/cpu_util.h +++ b/include/cpu_util.h @@ -52,6 +52,6 @@ void mat_times_mat_bt_cpu(double* C, double* A, double* B, int M, int N, int K, void mat_times_mat_bt_cpu(float* C, float* A, float* B, int N); void mat_times_mat_bt_cpu(double* C, double* A, double* B, int M, int N, int K); void mat_times_mat_bt_cpu(double* C, double* A, double* B, int N); - +void mat_times_mat_at_cpu(double* C, double* A, double* B, int M, int N, int K); #endif diff --git a/include/cuda_util.h b/include/cuda_util.h index 7e35284..1954698 100644 --- a/include/cuda_util.h +++ b/include/cuda_util.h @@ -1,6 +1,8 @@ #ifndef CUDA_UTILH #define CUDA_UTILH +#if USE_ACC + //#include #include #include @@ -41,4 +43,6 @@ void copy_to_all_gpu(int ngpu, int s1, double* A, int include_first); void copy_to_all_gpu(int ngpu, int s1, float* A, int include_first); void copy_to_all_gpu(int ngpu, int s1, int s2, double** A, int include_first); -#endif +#endif // USE_ACC + +#endif // CUDA_UTILH \ No newline at end of file diff --git a/include/cusp.h b/include/cusp.h index 2b070b7..621e338 100644 --- a/include/cusp.h +++ b/include/cusp.h @@ -4,7 +4,9 @@ //#include "hartree.h" #include "read.h" #include "write.h" +#if USE_ACC #include "cuda_util.h" +#endif #include "cpu_util.h" #include "becke.h" #include "integrals.h" @@ -24,6 +26,8 @@ using namespace std; +typedef void* cusolverDnHandle_t; // Dummy type for CPU-only builds + void compute_diatomic_symm(int natoms, int* atno, vector > basis, vector& pB_all, int prl); void compute_cusp(int natoms, int* atno, double* coords, vector > &basis, double* pB1, double* pB2, int prl); diff --git a/include/gauss.h b/include/gauss.h index e5f83ee..d0931af 100644 --- a/include/gauss.h +++ b/include/gauss.h @@ -6,7 +6,9 @@ #include "read.h" #include "gto_grid.h" #include "print.h" +#if USE_ACC #include "cuda_util.h" +#endif void eval_gh(int gs, float* grid, float* val1, int l1, int m1, const float norm1, const float zeta1); void eval_ghd(int gs, double* grid, double* val1, int l1, int m1, const double norm1, const double zeta1); diff --git a/include/jellium_ints.h b/include/jellium_ints.h index 373fd6a..4acb49d 100644 --- a/include/jellium_ints.h +++ b/include/jellium_ints.h @@ -4,7 +4,9 @@ //#include "hartree.h" #include "read.h" #include "write.h" +#if USE_ACC #include "cuda_util.h" +#endif #include "cpu_util.h" #include "integrals.h" #include "becke.h" diff --git a/include/scf_util.h b/include/scf_util.h index 8a8bc58..922f8e6 100644 --- a/include/scf_util.h +++ b/include/scf_util.h @@ -9,7 +9,9 @@ #include #include "cpu_util.h" +#if USE_ACC #include "cuda_util.h" +#endif int count_zero_mo(int N, double* jCA); int core_count(int natoms, int* atno); diff --git a/recipe.yaml b/recipe.yaml index 4596a38..dfdc52b 100644 --- a/recipe.yaml +++ b/recipe.yaml @@ -1,5 +1,5 @@ context: - version: "0.1.14" + version: "0.1.16" package: name: slatergpu diff --git a/src/integrals/CMakeLists.txt b/src/integrals/CMakeLists.txt index c11e217..ac20f1a 100644 --- a/src/integrals/CMakeLists.txt +++ b/src/integrals/CMakeLists.txt @@ -1,4 +1,5 @@ -set(INTEGRALS_SOURCES +# Common sources for both CPU and GPU builds +set(INTEGRALS_COMMON_SOURCES integrals.cpp integrals_d.cpp integrals_aux.cpp @@ -19,9 +20,7 @@ set(INTEGRALS_SOURCES ps_first_second_order.cpp cusp.cpp cpu_util.cpp - gpu_util.cpp grid_util.cpp - cuda_util.cpp scf_util.cpp symm.cpp gauss.cpp @@ -30,6 +29,21 @@ set(INTEGRALS_SOURCES jellium_ints.cpp ) +# GPU-specific sources (only compiled when USE_ACC is ON) +set(INTEGRALS_GPU_SOURCES + gpu_util.cpp + cuda_util.cpp + ) + +# Combine sources based on USE_ACC flag +if(USE_ACC) + set(INTEGRALS_SOURCES ${INTEGRALS_COMMON_SOURCES} ${INTEGRALS_GPU_SOURCES}) + message(STATUS "Building SlaterGPU library with GPU support") +else() + set(INTEGRALS_SOURCES ${INTEGRALS_COMMON_SOURCES}) + message(STATUS "Building SlaterGPU library for CPU-only") +endif() + add_library(SlaterGPU STATIC "${INTEGRALS_SOURCES}") target_include_directories(SlaterGPU PUBLIC $ @@ -52,4 +66,4 @@ else() target_compile_options(SlaterGPU PRIVATE ${SLATER_COMP_FLAGS} -DUSE_ACC=0 ) -endif() +endif() \ No newline at end of file diff --git a/src/integrals/cpu_util.cpp b/src/integrals/cpu_util.cpp index 44b39dc..7afd923 100644 --- a/src/integrals/cpu_util.cpp +++ b/src/integrals/cpu_util.cpp @@ -1120,3 +1120,23 @@ void cross(double* m, double* r1, double* r2) return; } + +// Extended version of mat_times_mat_at_cpu with M, N, K parameters +void mat_times_mat_at_cpu(double* C, double* A, double* B, int M, int N, int K) +{ + // C = A^T * B + // A is K x M, B is K x N, C is M x N + + int LDA = M; + int LDB = N; + int LDC = N; + + double ALPHA = 1.0; + double BETA = 0.0; + + char TA = 'T'; + char TB = 'N'; + dgemm_(&TB,&TA,&N,&M,&K,&ALPHA,B,&LDB,A,&LDA,&BETA,C,&LDC); + + return; +} \ No newline at end of file diff --git a/src/integrals/gauss.cpp b/src/integrals/gauss.cpp index 7be6b93..7bbdc27 100644 --- a/src/integrals/gauss.cpp +++ b/src/integrals/gauss.cpp @@ -240,11 +240,13 @@ void integrate_hole_para_gh(double* rdm, bool full_rdm, bool hfx_on, int Nc, int val2b[i1][j] = v2; } + #if USE_ACC copy_to_all_gpu(ngpu,gsa6,grid,0); for (int i1=0;i1 void auto_crash(); void print_duration(chrono::high_resolution_clock::time_point t1, chrono::high_resolution_clock::time_point t2, string name); -void copy_to_all_gpu(int ngpu, int s1, double* A, int include_first); +#if USE_ACC + void copy_to_all_gpu(int ngpu, int s1, double* A, int include_first); +#endif void gather_12_d_En_0(int s1, int s2, int gs, int iN, float** valtx1, float** valtx2, float** valS1x, float** valS2x, float* wtt1, float* wtt2) { @@ -292,7 +294,9 @@ void compute_d_3c_para(int npgu, int natoms, int* atno, float* coords, vector Date: Tue, 14 Oct 2025 22:37:37 -0400 Subject: [PATCH 2/4] More edits for cpu version --- include/cuda_util.h | 12 ++++--- include/cusp.h | 3 -- include/gauss.h | 2 -- include/jellium_ints.h | 2 -- include/scf_util.h | 2 -- recipe.yaml | 2 +- src/integrals/CMakeLists.txt | 7 ++--- src/integrals/cuda_util.cpp | 61 +++++++++++++++++++++++++++++++++--- src/integrals/gpu_util.cpp | 15 +++++++++ 9 files changed, 82 insertions(+), 24 deletions(-) diff --git a/include/cuda_util.h b/include/cuda_util.h index 1954698..3f58030 100644 --- a/include/cuda_util.h +++ b/include/cuda_util.h @@ -1,11 +1,16 @@ #ifndef CUDA_UTILH #define CUDA_UTILH -#if USE_ACC - -//#include +#if !USE_ACC +#include +typedef std::complex cuDoubleComplex; +typedef std::complex cuFloatComplex; +typedef void* cusolverDnHandle_t; +typedef void* cublasHandle_t; +#else #include #include +#endif int invert_eigen_cusolver(int size, double* A, double eig_max, cusolverDnHandle_t& cu_hdl); int invert_stable_cusolver(int size, double* A, double delta, cusolverDnHandle_t& cu_hdl); @@ -43,6 +48,5 @@ void copy_to_all_gpu(int ngpu, int s1, double* A, int include_first); void copy_to_all_gpu(int ngpu, int s1, float* A, int include_first); void copy_to_all_gpu(int ngpu, int s1, int s2, double** A, int include_first); -#endif // USE_ACC #endif // CUDA_UTILH \ No newline at end of file diff --git a/include/cusp.h b/include/cusp.h index 621e338..fcefc37 100644 --- a/include/cusp.h +++ b/include/cusp.h @@ -4,9 +4,7 @@ //#include "hartree.h" #include "read.h" #include "write.h" -#if USE_ACC #include "cuda_util.h" -#endif #include "cpu_util.h" #include "becke.h" #include "integrals.h" @@ -26,7 +24,6 @@ using namespace std; -typedef void* cusolverDnHandle_t; // Dummy type for CPU-only builds void compute_diatomic_symm(int natoms, int* atno, vector > basis, vector& pB_all, int prl); diff --git a/include/gauss.h b/include/gauss.h index d0931af..e5f83ee 100644 --- a/include/gauss.h +++ b/include/gauss.h @@ -6,9 +6,7 @@ #include "read.h" #include "gto_grid.h" #include "print.h" -#if USE_ACC #include "cuda_util.h" -#endif void eval_gh(int gs, float* grid, float* val1, int l1, int m1, const float norm1, const float zeta1); void eval_ghd(int gs, double* grid, double* val1, int l1, int m1, const double norm1, const double zeta1); diff --git a/include/jellium_ints.h b/include/jellium_ints.h index 4acb49d..373fd6a 100644 --- a/include/jellium_ints.h +++ b/include/jellium_ints.h @@ -4,9 +4,7 @@ //#include "hartree.h" #include "read.h" #include "write.h" -#if USE_ACC #include "cuda_util.h" -#endif #include "cpu_util.h" #include "integrals.h" #include "becke.h" diff --git a/include/scf_util.h b/include/scf_util.h index 922f8e6..8a8bc58 100644 --- a/include/scf_util.h +++ b/include/scf_util.h @@ -9,9 +9,7 @@ #include #include "cpu_util.h" -#if USE_ACC #include "cuda_util.h" -#endif int count_zero_mo(int N, double* jCA); int core_count(int natoms, int* atno); diff --git a/recipe.yaml b/recipe.yaml index dfdc52b..4a744e3 100644 --- a/recipe.yaml +++ b/recipe.yaml @@ -1,5 +1,5 @@ context: - version: "0.1.16" + version: "0.1.17" package: name: slatergpu diff --git a/src/integrals/CMakeLists.txt b/src/integrals/CMakeLists.txt index ac20f1a..0c0fcc3 100644 --- a/src/integrals/CMakeLists.txt +++ b/src/integrals/CMakeLists.txt @@ -27,17 +27,14 @@ set(INTEGRALS_COMMON_SOURCES hess.cpp opt.cpp jellium_ints.cpp - ) - -# GPU-specific sources (only compiled when USE_ACC is ON) -set(INTEGRALS_GPU_SOURCES gpu_util.cpp cuda_util.cpp ) + # Combine sources based on USE_ACC flag if(USE_ACC) - set(INTEGRALS_SOURCES ${INTEGRALS_COMMON_SOURCES} ${INTEGRALS_GPU_SOURCES}) + set(INTEGRALS_SOURCES ${INTEGRALS_COMMON_SOURCES}) message(STATUS "Building SlaterGPU library with GPU support") else() set(INTEGRALS_SOURCES ${INTEGRALS_COMMON_SOURCES}) diff --git a/src/integrals/cuda_util.cpp b/src/integrals/cuda_util.cpp index 98327e0..d3c6763 100644 --- a/src/integrals/cuda_util.cpp +++ b/src/integrals/cuda_util.cpp @@ -1,3 +1,5 @@ +//Vaibhav needs to fix this + #include #include #include @@ -10,7 +12,10 @@ #include "cuda_util.h" #include "cpu_util.h" +#if USE_ACC #include "cuda_runtime.h" +#include +#endif //these functions assume the matrices are already on the device, via ACC //hoping to reduce overhead by getting cusolver handle just once @@ -23,21 +28,32 @@ void print_square_sm(int N, float* A); void print_square_sm(int N, double* A); double read_float(string filename); -#include + void print_square_complex(int N, cuDoubleComplex* A) { + #if !USE_ACC + printf("\n ERROR: there is no CPU implementation for print_square_complex \n"); exit(-1); + #else for (int i=0;i Date: Thu, 8 Jan 2026 15:57:00 -0500 Subject: [PATCH 3/4] Enable GPU acceleration in CMake configuration --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a9c6a04..29ec023 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,7 +17,7 @@ project(SlaterGPU VERSION 0.1 DESCRIPTION "Integrals over Slater type orbitals with GPU Acceleration" LANGUAGES C CXX) -set(USE_ACC False) +set(USE_ACC True) set(USE_MPI True) set(USE_OMP True) set(RED_DOUBLE True) From c42ccc5ba0c76e1968838ee2570f42370eae1ae6 Mon Sep 17 00:00:00 2001 From: Vaibhav-Chemistry Date: Fri, 9 Jan 2026 14:50:09 -0500 Subject: [PATCH 4/4] Making pVp generation based on X2C file --- examples/main.cpp | 16 +++-- include/cintwrapper.h | 4 ++ include/integrals.h | 1 + src/integrals/integrals_aux.cpp | 54 +++++++++++++- src/libcintw/cintwrapper.cpp | 121 ++++++++++++++++++++++++++++++++ 5 files changed, 191 insertions(+), 5 deletions(-) diff --git a/examples/main.cpp b/examples/main.cpp index 26baeab..d48726d 100644 --- a/examples/main.cpp +++ b/examples/main.cpp @@ -68,14 +68,20 @@ void compute_ps_integrals_to_disk(int natoms, int* atno, double* coords, vector< for (int j=0;j 0) printf("Printing PS Integral Files:\n"); write_S_En_T(N,Ssp,Ensp,Tsp); - //write_square(N,pVpsp,"pVp",2); + if (x2c) + write_square(N,pVpsp,"pVp",2); write_square(Naux,Asp,"A",2); write_C(Naux,N2,Csp); @@ -297,7 +303,9 @@ int main(int argc, char* argv[]) { if (prl > 0) printf("Printing Standard Integral Files:\n"); write_S_En_T(N,S,En,T); write_square(Naux,A,"A",2); - write_square(N,pVp,"pVp",2); + bool x2c = read_int("X2C"); + if (x2c) + write_square(N,pVp,"pVp",2); write_C(Naux, N2, C); if(c4 > 0) diff --git a/include/cintwrapper.h b/include/cintwrapper.h index 8e6d642..041f835 100644 --- a/include/cintwrapper.h +++ b/include/cintwrapper.h @@ -99,6 +99,10 @@ void get_overlap_ri(double * overlap, int Naux, int natm, int nbas, int nbas_ri, int nenv, int *atm, int* bas, double *env); +void get_hcore(double *hcore, int N, + int natm, int nbas, int nenv, + int *atm, int *bas, double *env); + void get_hcore(double *hcore, double *En, double *T, int N, int natm, int nbas, int nenv, int *atm, int *bas, double *env); diff --git a/include/integrals.h b/include/integrals.h index b475b07..b6a0a57 100644 --- a/include/integrals.h +++ b/include/integrals.h @@ -228,6 +228,7 @@ void copy_symm_4c_ps_cpu(int natoms, int* n2i, int N, double* olp); int get_natoms_with_basis(int natoms, int* atno, vector >& basis); vector > setup_integrals_gsgpu(vector >& basis_aux, int natoms, int* atno, double* coords, int& nbas, int& nenv, int& N, int& Naux, int& nbas_ri, int* &atm, int* &bas, double* &env, int prl); +void compute_integrals_g(int natm, int nbas, int nenv, int N, int Naux, int nbas_ri, int* atm, int* bas, double* env, double* S, double* T, double* jH1, double* A, double* C, int prl); void compute_integrals_g(int natm, int nbas, int nenv, int N, int Naux, int nbas_ri, int* atm, int* bas, double* env, double* S, double* En, double* T, double* jH1, double* A, double* C, double* pvp, int prl); void compute_gaussian_integrals_to_disk(int N, int Naux, int natoms, int nbas, int nenv, int nbas_ri, int* atm, int* bas, double* env); diff --git a/src/integrals/integrals_aux.cpp b/src/integrals/integrals_aux.cpp index caf9cea..4a52e24 100644 --- a/src/integrals/integrals_aux.cpp +++ b/src/integrals/integrals_aux.cpp @@ -5,6 +5,7 @@ #include "../../include/cintprep.h" #include "write.h" +#include "read.h" void auto_crash() { @@ -1617,13 +1618,64 @@ vector > setup_integrals_gsgpu(vector >& basis_aux return basis; } +void compute_integrals_g(int natm, int nbas, int nenv, int N, int Naux, int nbas_ri, int* atm, int* bas, double* env, double* S, double* T, double* jH1, double* A, double* C, int prl) +{ + get_overlap(S, N, natm, nbas, nenv, atm, bas, env); + get_hcore(jH1, N, natm, nbas, nenv, atm, bas, env); + if (T!=NULL) + get_tcore(T, N, natm, nbas, nenv, atm, bas, env); + + if (prl>1) + { + printf("\n S: \n"); + print_square(N,S); + printf("\n jH1: \n"); + print_square(N,jH1); + } + + if (Naux>0) + { + gen_eri_2c(A, Naux, natm, nbas, nenv, nbas_ri, atm, bas, env); + gen_eri_3c(C, N, Naux, natm, nbas, nenv, nbas_ri, atm, bas, env); + } + else + { + printf(" no auxiliary basis \n"); + } + + if (prl>1) + { + printf("\n A: \n"); + for (int m=0;m1) { diff --git a/src/libcintw/cintwrapper.cpp b/src/libcintw/cintwrapper.cpp index 444fd9e..d9a82ea 100644 --- a/src/libcintw/cintwrapper.cpp +++ b/src/libcintw/cintwrapper.cpp @@ -157,6 +157,127 @@ void get_overlap_ri(double * overlap, int Naux, } // else } +void get_hcore(double *hcore, int N, + int natm, int nbas, int nenv, + int *atm, int *bas, double *env) { + int idx_i = 0; + int di, dj; + int shls[2]; + + double *tmp = new double[N * N]; + + if (BT::DO_CART) { + for (int i = 0; i < nbas; i++) { + int idx_j = 0; + shls[0] = i; + di = CINTcgto_cart(i, bas); + for (int j = 0; j <= i; j++) { + dj = CINTcgto_cart(j, bas); + shls[1] = j; + double *buf = new double[di*dj]; + cint1e_nuc_cart(buf,shls,atm,natm,bas,nbas,env); + + for (int j1 = 0; j1 < dj; j1++) { + for (int i1 = 0; i1 < di; i1++) { + int oi = i1 + idx_i; + int oj = j1 + idx_j; + tmp[oi * N + oj] = tmp[oj * N + oi] = buf[j1*di + i1]; + } // for i1 + } // for j1 + delete [] buf; + idx_j += dj; + } // for j + idx_i += di; + }// for i + + for (int i = 0; i < N * N; i++) { + hcore[i] = tmp[i]; + } + + idx_i = 0; + for (int i = 0; i < nbas; i++) { + int idx_j = 0; + shls[0] = i; + di = CINTcgto_cart(i, bas); + for (int j = 0; j <= i; j++) { + dj = CINTcgto_cart(j, bas); + shls[1] = j; + double *buf = new double[di*dj]; + cint1e_kin_cart(buf,shls,atm,natm,bas,nbas,env); + + for (int j1 = 0; j1 < dj; j1++) { + for (int i1 = 0; i1 < di; i1++) { + int oi = i1 + idx_i; + int oj = j1 + idx_j; + tmp[oi * N + oj] = tmp[oj * N + oi] = buf[j1*di + i1]; + } // for i1 + } // for j1 + delete [] buf; + idx_j += dj; + } // for j + idx_i += di; + }// for i + } // if BT::DO_CART + else { + for (int i = 0; i < nbas; i++) { + int idx_j = 0; + shls[0] = i; + di = CINTcgto_spheric(i, bas); + for (int j = 0; j <= i; j++) { + dj = CINTcgto_spheric(j, bas); + shls[1] = j; + double *buf = new double[di*dj]; + cint1e_nuc_sph(buf,shls,atm,natm,bas,nbas,env); + + for (int j1 = 0; j1 < dj; j1++) { + for (int i1 = 0; i1 < di; i1++) { + int oi = i1 + idx_i; + int oj = j1 + idx_j; + tmp[oi * N + oj] = tmp[oj * N + oi] = buf[j1*di + i1]; + } // for i1 + } // for j1 + delete [] buf; + idx_j += dj; + } // for j + idx_i += di; + }// for i + + for (int i = 0; i < N * N; i++) { + hcore[i] = tmp[i]; + } + + idx_i = 0; + for (int i = 0; i < nbas; i++) { + int idx_j = 0; + shls[0] = i; + di = CINTcgto_spheric(i, bas); + for (int j = 0; j <= i; j++) { + dj = CINTcgto_spheric(j, bas); + shls[1] = j; + double *buf = new double[di*dj]; + cint1e_kin_sph(buf,shls,atm,natm,bas,nbas,env); + + for (int j1 = 0; j1 < dj; j1++) { + for (int i1 = 0; i1 < di; i1++) { + int oi = i1 + idx_i; + int oj = j1 + idx_j; + tmp[oi * N + oj] = tmp[oj * N + oi] = buf[j1*di + i1]; + } // for i1 + } // for j1 + delete [] buf; + idx_j += dj; + } // for j + idx_i += di; + }// for i + } + + for (int i = 0; i < N * N; i++) { + hcore[i] += tmp[i]; + } + + delete [] tmp; +} + void get_hcore(double *hcore, double *En, double *T, int N, int natm, int nbas, int nenv, int *atm, int *bas, double *env) {