Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 32 additions & 11 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,32 +7,54 @@ target_include_directories(sgpu.exe PRIVATE
"${CMAKE_SOURCE_DIR}/src/libcintw"
"${CMAKE_SOURCE_DIR}/include"
)

set(TEST_COMP_FLAGS ${CMAKE_CXX_FLAGS})

# CUDA/GPU dependencies - only when USE_ACC is enabled
if(USE_ACC)
enable_language(CUDA)
find_package(CUDA REQUIRED)
find_library(CUDART_LIBRARY cudart ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})

target_link_options(sgpu.exe PRIVATE
-acc=gpu
)
set(TEST_COMP_FLAGS ${TEST_COMP_FLAGS}
-DUSE_ACC=1 -acc=gpu
)

# GPU-specific link libraries
set(GPU_LIBRARIES
"${CUDART_LIBRARY}"
"${CUDA_LIBRARIES}"
"${CUDA_TOOLKIT_ROOT_DIR}/../../math_libs/lib64/libcublas.so"
"${CUDA_TOOLKIT_ROOT_DIR}/../../math_libs/lib64/libcusolver.so"
)
else()
# CPU-only mode
set(GPU_LIBRARIES "")
message(STATUS "Building CPU-only version (USE_ACC=OFF)")
endif()

#LAPACK cmake not right.
#need to use this as a variable below
find_package(LAPACK)

find_package(BLAS)
enable_language(CUDA)
find_package(CUDA REQUIRED)
find_library(CUDART_LIBRARY cudart ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})

#enable_language(FORTRAN)
#find_library(FORTRANLIBS fortranlibs ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})

set_property(TARGET sgpu.exe PROPERTY CXX_STANDARD 14)
target_compile_options(sgpu.exe PRIVATE ${TEST_COMP_FLAGS} -O2)
#target_link_libraries(sgpu.exe PRIVATE SlaterGPU io "${CUDART_LIBRARY}" "${CUDA_LIBRARIES}" )
target_link_libraries(sgpu.exe PRIVATE SlaterGPU io cintw "${FORTRANLIBS}" "${CUDART_LIBRARY}" "${CUDA_LIBRARIES}" "${CUDA_TOOLKIT_ROOT_DIR}/../../math_libs/lib64/libcublas.so" "${CUDA_TOOLKIT_ROOT_DIR}/../../math_libs/lib64/libcusolver.so" "-L/home/paulzim/integrate/gpu/lib/lapack-3.9.0/lib/ -llapack -lblas" "-L/export/apps/RockyOS8/nvidia_hpc/2024_249/Linux_x86_64/24.9/ -fortranlibs" )

# Link libraries - GPU_LIBRARIES will be empty for CPU-only builds
target_link_libraries(sgpu.exe PRIVATE
SlaterGPU
io
cintw
"${FORTRANLIBS}"
${GPU_LIBRARIES}
"-L/home/paulzim/integrate/gpu/lib/lapack-3.9.0/lib/ -llapack -lblas"
"-L/export/apps/RockyOS8/nvidia_hpc/2024_249/Linux_x86_64/24.9/ -fortranlibs"
)

file(COPY lih_VK1 DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
file(COPY lih_ri5 DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
file(COPY h2_ri5 DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
Expand All @@ -42,6 +64,5 @@ file(COPY noplus_ri5 DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
file(COPY ch4_ri5 DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
file(COPY run.sh DESTINATION ${CMAKE_CURRENT_BINARY_DIR})

# Add installation for the executable
install(TARGETS sgpu.exe
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
16 changes: 12 additions & 4 deletions examples/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,20 @@ void compute_ps_integrals_to_disk(int natoms, int* atno, double* coords, vector<
for (int j=0;j<N2;j++) Ssp[j] = Tsp[j] = Ensp[j] = pVpsp[j] = 0.;

compute_STEn_ps(natoms,atno,coords,basis,nquad,nmu,nnu,nphi,Ssp,Tsp,Ensp,prl);
//compute_pVp_ps(natoms,atno,coords,basis,nquad,nmu,nnu,nphi,pVpsp,prl);
//compute_pVp_3c_ps(natoms,atno,coords,basis,nquad,nquad2,nsplit,nmu,nnu,nphi,pVpsp,prl);

bool x2c = read_int("X2C");
if (x2c)
{
compute_pVp_ps(natoms,atno,coords,basis,nquad,nmu,nnu,nphi,pVpsp,prl);
compute_pVp_3c_ps(natoms,atno,coords,basis,nquad,nquad2,nsplit,nmu,nnu,nphi,pVpsp,prl);
}
compute_2c_ps(0,0,gamma,nbatch,natoms,atno,coords,basis_aux,nquad,nmu,nnu,nphi,Asp,prl);
compute_3c_ps(0,0,gamma,nbatch,natoms,atno,coords,basis,basis_aux,nquad,nquad2,nsplit,nmu,nnu,nphi,Ensp,Csp,prl);

if (prl > 0) printf("Printing PS Integral Files:\n");
write_S_En_T(N,Ssp,Ensp,Tsp);
//write_square(N,pVpsp,"pVp",2);
if (x2c)
write_square(N,pVpsp,"pVp",2);
write_square(Naux,Asp,"A",2);
write_C(Naux,N2,Csp);

Expand Down Expand Up @@ -297,7 +303,9 @@ int main(int argc, char* argv[]) {
if (prl > 0) printf("Printing Standard Integral Files:\n");
write_S_En_T(N,S,En,T);
write_square(Naux,A,"A",2);
write_square(N,pVp,"pVp",2);
bool x2c = read_int("X2C");
if (x2c)
write_square(N,pVp,"pVp",2);
write_C(Naux, N2, C);

if(c4 > 0)
Expand Down
4 changes: 4 additions & 0 deletions include/cintwrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,10 @@ void get_overlap_ri(double * overlap, int Naux,
int natm, int nbas, int nbas_ri, int nenv,
int *atm, int* bas, double *env);

void get_hcore(double *hcore, int N,
int natm, int nbas, int nenv,
int *atm, int *bas, double *env);

void get_hcore(double *hcore, double *En, double *T, int N,
int natm, int nbas, int nenv,
int *atm, int *bas, double *env);
Expand Down
2 changes: 1 addition & 1 deletion include/cpu_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,6 @@ void mat_times_mat_bt_cpu(double* C, double* A, double* B, int M, int N, int K,
void mat_times_mat_bt_cpu(float* C, float* A, float* B, int N);
void mat_times_mat_bt_cpu(double* C, double* A, double* B, int M, int N, int K);
void mat_times_mat_bt_cpu(double* C, double* A, double* B, int N);

void mat_times_mat_at_cpu(double* C, double* A, double* B, int M, int N, int K);

#endif
12 changes: 10 additions & 2 deletions include/cuda_util.h
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
#ifndef CUDA_UTILH
#define CUDA_UTILH

//#include <cublas.h>
#if !USE_ACC
#include <complex>
typedef std::complex<double> cuDoubleComplex;
typedef std::complex<float> cuFloatComplex;
typedef void* cusolverDnHandle_t;
typedef void* cublasHandle_t;
#else
#include <cublas_v2.h>
#include <cusolverDn.h>
#endif

int invert_eigen_cusolver(int size, double* A, double eig_max, cusolverDnHandle_t& cu_hdl);
int invert_stable_cusolver(int size, double* A, double delta, cusolverDnHandle_t& cu_hdl);
Expand Down Expand Up @@ -41,4 +48,5 @@ void copy_to_all_gpu(int ngpu, int s1, double* A, int include_first);
void copy_to_all_gpu(int ngpu, int s1, float* A, int include_first);
void copy_to_all_gpu(int ngpu, int s1, int s2, double** A, int include_first);

#endif

#endif // CUDA_UTILH
1 change: 1 addition & 0 deletions include/cusp.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

using namespace std;


void compute_diatomic_symm(int natoms, int* atno, vector<vector<double> > basis, vector<double*>& pB_all, int prl);

void compute_cusp(int natoms, int* atno, double* coords, vector<vector<double> > &basis, double* pB1, double* pB2, int prl);
Expand Down
1 change: 1 addition & 0 deletions include/integrals.h
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ void copy_symm_4c_ps_cpu(int natoms, int* n2i, int N, double* olp);
int get_natoms_with_basis(int natoms, int* atno, vector<vector<double> >& basis);

vector<vector<double> > setup_integrals_gsgpu(vector<vector<double> >& basis_aux, int natoms, int* atno, double* coords, int& nbas, int& nenv, int& N, int& Naux, int& nbas_ri, int* &atm, int* &bas, double* &env, int prl);
void compute_integrals_g(int natm, int nbas, int nenv, int N, int Naux, int nbas_ri, int* atm, int* bas, double* env, double* S, double* T, double* jH1, double* A, double* C, int prl);
void compute_integrals_g(int natm, int nbas, int nenv, int N, int Naux, int nbas_ri, int* atm, int* bas, double* env, double* S, double* En, double* T, double* jH1, double* A, double* C, double* pvp, int prl);
void compute_gaussian_integrals_to_disk(int N, int Naux, int natoms, int nbas, int nenv, int nbas_ri, int* atm, int* bas, double* env);

Expand Down
19 changes: 15 additions & 4 deletions src/integrals/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
set(INTEGRALS_SOURCES
# Common sources for both CPU and GPU builds
set(INTEGRALS_COMMON_SOURCES
integrals.cpp
integrals_d.cpp
integrals_aux.cpp
Expand All @@ -19,17 +20,27 @@ set(INTEGRALS_SOURCES
ps_first_second_order.cpp
cusp.cpp
cpu_util.cpp
gpu_util.cpp
grid_util.cpp
cuda_util.cpp
scf_util.cpp
symm.cpp
gauss.cpp
hess.cpp
opt.cpp
jellium_ints.cpp
gpu_util.cpp
cuda_util.cpp
)


# Combine sources based on USE_ACC flag
if(USE_ACC)
set(INTEGRALS_SOURCES ${INTEGRALS_COMMON_SOURCES})
message(STATUS "Building SlaterGPU library with GPU support")
else()
set(INTEGRALS_SOURCES ${INTEGRALS_COMMON_SOURCES})
message(STATUS "Building SlaterGPU library for CPU-only")
endif()

add_library(SlaterGPU STATIC "${INTEGRALS_SOURCES}")
target_include_directories(SlaterGPU PUBLIC
$<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/include>
Expand All @@ -52,4 +63,4 @@ else()
target_compile_options(SlaterGPU PRIVATE
${SLATER_COMP_FLAGS} -DUSE_ACC=0
)
endif()
endif()
20 changes: 20 additions & 0 deletions src/integrals/cpu_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1120,3 +1120,23 @@ void cross(double* m, double* r1, double* r2)

return;
}

// Extended version of mat_times_mat_at_cpu with M, N, K parameters
void mat_times_mat_at_cpu(double* C, double* A, double* B, int M, int N, int K)
{
// C = A^T * B
// A is K x M, B is K x N, C is M x N

int LDA = M;
int LDB = N;
int LDC = N;

double ALPHA = 1.0;
double BETA = 0.0;

char TA = 'T';
char TB = 'N';
dgemm_(&TB,&TA,&N,&M,&K,&ALPHA,B,&LDB,A,&LDA,&BETA,C,&LDC);

return;
}
Loading