diff --git a/src/sedacs/gpu/nvda/Makefile b/src/sedacs/gpu/nvda/Makefile index 767dfec..95cb91d 100644 --- a/src/sedacs/gpu/nvda/Makefile +++ b/src/sedacs/gpu/nvda/Makefile @@ -5,14 +5,15 @@ # # ########################################################## # -# ## CC COMPILER OPTIONS ## +# ## CXX COMPILER OPTIONS ## # -# ## CC compiler options: -CXX:=${CXX} -CXX_FLAGS:=${CXX_FLAGS} -CXX_SO_FLAGS= -fpic -EXTRA_CXX_FLAGS= --diag_suppress=bad_macro_redef +# ## CXX compiler options: $(info $$CXX is [${CXX}]) +GPU_ARCH:=cc${GPU_ARCH} +CXX:=${CXX} +CXX_FLAGS:=${CXX_FLAGS} -fpic -gpu $(GPU_ARCH) +EXTRA_CXX_FLAGS:=${EXTRA_CXX_FLAGS} +CXX_SO_FLAGS=-shared -fpic # # # # # CUDA math library directory: @@ -42,9 +43,9 @@ CUDA_LINK_LIB= -lcudart -lnvToolsExt # # MPI library # -MPI_LIB_DIR= -L${OMPI_ROOT}/lib +MPI_LIB_DIR= -L${MPI_ROOT}/lib # # MPI include directory: -MPI_INC_DIR= -I${OMPI_ROOT}/include +MPI_INC_DIR= -I${MPI_ROOT}/include # # MPI linking libraries: MPI_LINK_LIB= -lmpi # # @@ -122,36 +123,36 @@ OBJS = $(OBJSP2) $(OBJDIAG) $(OBJPRT) $(OBJCHEBY) \ # Link compiled object files to target so: so: $(OBJS) - $(CXX) -shared $(CXX_SO_FLAGS) $(CXX_FLAGS) $(EXTRA_CXX_FLAGS) $(OBJS) -o $(SO) $(MPI_LIB_DIR) $(MPI_LINK_LIB) $(CUDA_LIB_DIR) $(CUDA_LINK_LIB) $(CUDAMATH_LIB_DIR) $(CUDAMATH_LINK_LIB) $(NVCOMP_LIB_DIR) $(NVCOMP_LINK_LIB) -lnvToolsExt + $(CXX) $(CXX_SO_FLAGS) -gpu=cc89 -cuda $(EXTRA_CXX_FLAGS) $(OBJS) -o $(SO) $(MPI_LIB_DIR) $(MPI_LINK_LIB) $(CUDA_LIB_DIR) $(CUDA_LINK_LIB) $(CUDAMATH_LIB_DIR) $(CUDAMATH_LINK_LIB) $(NVCOMP_LIB_DIR) $(NVCOMP_LINK_LIB) #-lnvToolsExt -L/usr/lib $(OBJ_DIR)/diag/%.o: $(SRC_DIR)/diag/%.cu - $(CXX) $(CXX_SO_FLAGS) $(CXX_FLAGS) $(EXTRA_CXX_FLAGS) -c $< $(INCLUDE_DIR) $(MPI_INC_DIR) $(CUDA_INC_DIR) $(CUDAMATH_INC_DIR) -o $@ $(CUDA_LIB_DIR) $(CUDA_LINK_LIB) + $(CXX) $(CXX_FLAGS) $(EXTRA_CXX_FLAGS) -c $< $(INCLUDE_DIR) $(MPI_INC_DIR) $(CUDA_INC_DIR) $(CUDAMATH_INC_DIR) -o $@ $(CUDA_LIB_DIR) $(CUDA_LINK_LIB) $(OBJ_DIR)/dnnsp2/%.o: $(SRC_DIR)/dnnsp2/%.cu - $(CXX) $(CXX_SO_FLAGS) $(CXX_FLAGS) $(EXTRA_CXX_FLAGS) -c $< $(INCLUDE_DIR) $(MPI_INC_DIR) $(CUDA_INC_DIR) $(CUDAMATH_INC_DIR) -o $@ $(CUDA_LIB_DIR) $(CUDA_LINK_LIB) + $(CXX) $(CXX_FLAGS) $(EXTRA_CXX_FLAGS) -c $< $(INCLUDE_DIR) $(MPI_INC_DIR) $(CUDA_INC_DIR) $(CUDAMATH_INC_DIR) -o $@ $(CUDA_LIB_DIR) $(CUDA_LINK_LIB) $(OBJ_DIR)/dnnprt/%.o: $(SRC_DIR)/dnnprt/%.cu - $(CXX) $(CXX_SO_FLAGS) $(CXX_FLAGS) $(EXTRA_CXX_FLAGS) -c $< $(INCLUDE_DIR) $(MPI_INC_DIR) $(CUDA_INC_DIR) $(CUDAMATH_INC_DIR) -o $@ $(CUDA_LIB_DIR) $(CUDA_LINK_LIB) + $(CXX) $(CXX_FLAGS) $(EXTRA_CXX_FLAGS) -c $< $(INCLUDE_DIR) $(MPI_INC_DIR) $(CUDA_INC_DIR) $(CUDAMATH_INC_DIR) -o $@ $(CUDA_LIB_DIR) $(CUDA_LINK_LIB) $(OBJ_DIR)/goldensp2/%.o: $(SRC_DIR)/goldensp2/%.cu - $(CXX) $(CXX_SO_FLAGS) $(CXX_FLAGS) $(EXTRA_CXX_FLAGS) -c $< $(INCLUDE_DIR) $(MPI_INC_DIR) $(CUDA_INC_DIR) $(CUDAMATH_INC_DIR) -o $@ $(CUDA_LIB_DIR) $(CUDA_LINK_LIB) + $(CXX) $(CXX_FLAGS) $(EXTRA_CXX_FLAGS) -c $< $(INCLUDE_DIR) $(MPI_INC_DIR) $(CUDA_INC_DIR) $(CUDAMATH_INC_DIR) -o $@ $(CUDA_LIB_DIR) $(CUDA_LINK_LIB) $(OBJ_DIR)/movingmusp2/%.o: $(SRC_DIR)/movingmusp2/%.cu - $(CXX) $(CXX_SO_FLAGS) $(CXX_FLAGS) $(EXTRA_CXX_FLAGS) -c $< $(INCLUDE_DIR) $(MPI_INC_DIR) $(CUDA_INC_DIR) $(CUDAMATH_INC_DIR) -o $@ $(CUDA_LIB_DIR) $(CUDA_LINK_LIB) + $(CXX) $(CXX_FLAGS) $(EXTRA_CXX_FLAGS) -c $< $(INCLUDE_DIR) $(MPI_INC_DIR) $(CUDA_INC_DIR) $(CUDAMATH_INC_DIR) -o $@ $(CUDA_LIB_DIR) $(CUDA_LINK_LIB) $(OBJ_DIR)/fastcheby/%.o: $(SRC_DIR)/fastcheby/%.cu - $(CXX) $(CXX_SO_FLAGS) $(CXX_FLAGS) $(EXTRA_CXX_FLAGS) -c $< $(INCLUDE_DIR) $(MPI_INC_DIR) $(CUDA_INC_DIR) $(CUDAMATH_INC_DIR) $(LAPACK_INC_DIR) -o $@ $(CUDA_LIB_DIR) $(CUDA_LINK_LIB) $(LAPACK_LIB_DIR) $(LAPACK_LINK_LIB) + $(CXX) $(CXX_FLAGS) $(EXTRA_CXX_FLAGS) -c $< $(INCLUDE_DIR) $(MPI_INC_DIR) $(CUDA_INC_DIR) $(CUDAMATH_INC_DIR) $(LAPACK_INC_DIR) -o $@ $(CUDA_LIB_DIR) $(CUDA_LINK_LIB) $(LAPACK_LIB_DIR) $(LAPACK_LINK_LIB) #$(OBJ_DIR)/invovlp/%.o: $(SRC_DIR)/invovlp/%.cu # $(CXX) $(CXX_SO_FLAGS) $(CXX_FLAGS) $(EXTRA_CXX_FLAGS) -c $< $(INCLUDE_DIR) $(MPI_INC_DIR) $(CUDA_INC_DIR)\ # $(CUDAMATH_INC_DIR) -o $@ $(CUDA_LIB_DIR) $(CUDA_LINK_LIB) $(OBJ_DIR)/mlsp2/%.o: $(SRC_DIR)/mlsp2/%.cu - $(CXX) $(CXX_SO_FLAGS) $(CXX_FLAGS) $(EXTRA_CXX_FLAGS) -c $< $(INCLUDE_DIR) $(MPI_INC_DIR) $(CUDA_INC_DIR)\ + $(CXX) $(CXX_FLAGS) $(EXTRA_CXX_FLAGS) -c $< $(INCLUDE_DIR) $(MPI_INC_DIR) $(CUDA_INC_DIR)\ $(CUDAMATH_INC_DIR) -o $@ $(CUDA_LIB_DIR) $(CUDA_LINK_LIB) $(OBJ_DIR)/lib/%.o: $(SRC_DIR)/lib/%.cu - $(CXX) $(CXX_SO_FLAGS) $(CXX_FLAGS) $(EXTRA_CXX_FLAGS) -c $< $(INCLUDE_DIR) $(MPI_INC_DIR) $(CUDA_INC_DIR) $(CUDAMATH_INC_DIR) -o $@ $(CUDA_LIB_DIR) $(CUDA_LINK_LIB) + $(CXX) $(CXX_FLAGS) $(EXTRA_CXX_FLAGS) -c $< $(INCLUDE_DIR) $(MPI_INC_DIR) $(CUDA_INC_DIR) $(CUDAMATH_INC_DIR) -o $@ $(CUDA_LIB_DIR) $(CUDA_LINK_LIB) #Clean objects in object directory. diff --git a/src/sedacs/gpu/nvda/von-neumann_build.sh b/src/sedacs/gpu/nvda/von-neumann_build.sh index a1d2de9..a5f8e2a 100755 --- a/src/sedacs/gpu/nvda/von-neumann_build.sh +++ b/src/sedacs/gpu/nvda/von-neumann_build.sh @@ -1,10 +1,9 @@ #!/bin/bash # set env vars -export NVHPC_ROOT=/opt/nvidia/hpc_sdk/Linux_x86_64/24.3 +export NVHPC_ROOT=/projects/shared/spack/opt/spack/linux-zen4/nvhpc-25.5-2z35ztggfsmvkpb74a2rouofaymnkyj6/Linux_x86_64/25.5/ export LD_LIBRARY_PATH=$NVHPC_ROOT/math_libs/lib64:$NVHPC_ROOT/cuda/lib64:$LD_LIBRARY_PATH - -export OMPI_ROOT=/projects/shared/spack/opt/spack/linux-ubuntu22.04-zen4/gcc-12.3.0/openmpi-4.1.6-iqs52vvok2k7s6nb7vh3rxqedsfinihz #/projects/shared/spack/opt/spack/linux-ubuntu22.04-zen4/nvhpc-24.3/openmpi-5.0.3-jw7heth5kztakv3khq3nqoyiecskyatk +export MPI_ROOT=/projects/shared/spack/opt/spack/linux-zen4/openmpi-5.0.7-6dehamwfocnigimmbozkxhujq4b3epqf #/projects/shared/spack/opt/spack/linux-ubuntu22.04-zen4/gcc-12.3.0/openmpi-4.1.6-iqs52vvok2k7s6nb7vh3rxqedsfinihz # Make sure all the paths are correct rm -r build @@ -14,9 +13,11 @@ make clean MY_PATH=$(pwd) -export CXX=nvc++ #${NVHPC_ROOT}/compilers/bin/nvc++ -export GPU_ARCH=${GPU_ARCH:=cc89} # use cc70 for V100, cc80 for A100 and cc90 for H100 -export CXX_FLAGS=${CXX_FLAGS:=" -O3 -cuda -gpu=${GPU_ARCH} -acc=gpu -Minfo=accel"} +export CXX=nvc++ # c++ compiler, needs to be able to compile with fpic +export GPU_ARCH=${GPU_ARCH:="89"} # use 70 for V100, 80 for A100 and 90 for H100 +export CXX_FLAGS=${CXX_FLAGS:="-O3"} # add compilation flags +export EXTRA_CXX_FLAGS=${EXTRA_CXX_FLAGS:="--diag_suppress=declared_but_not_referenced --diag_suppress=cuda_compile --diag_suppress=bad_macro_redef"} + echo "Your CXX compiler is: " ${CXX} make