From 1f0c578abc3855c559918474ae1ece9f88540d00 Mon Sep 17 00:00:00 2001 From: Suvorov Rodion Date: Fri, 29 Nov 2024 12:35:43 +0000 Subject: [PATCH 01/34] feat: add CMake options for RISC-V and RVV1.0 --- GraphBLAS/CMakeLists.txt | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/GraphBLAS/CMakeLists.txt b/GraphBLAS/CMakeLists.txt index c3ee54779b..c1a49cc785 100644 --- a/GraphBLAS/CMakeLists.txt +++ b/GraphBLAS/CMakeLists.txt @@ -122,6 +122,32 @@ if ( DEFINED GBAVX512F ) endif ( ) endif ( ) +#------------------------------------------------------------------------------- +# RISC-V +#------------------------------------------------------------------------------- + +if ( DEFINED GBRISCV64 ) + if ( GBRISCV64 ) + set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGBRISCV64=1 " ) + else ( ) + set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGBRISCV64=0 " ) + endif ( ) +endif ( ) + +if ( DEFINED GBRVV ) + if ( GBRVV ) + set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGBRVV=1 " ) + else ( ) + set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGBRVV=0 " ) + endif ( ) +endif ( ) + +#------------------------------------------------------------------------------- +# check compiler features +#------------------------------------------------------------------------------- + +include ( GraphBLAS_complex ) + #------------------------------------------------------------------------------- # determine build type #------------------------------------------------------------------------------- From de24ca6eec0a9b8f9241a4c1ada9c4268b1ee67d Mon Sep 17 00:00:00 2001 From: Suvorov Rodion Date: Fri, 29 Nov 2024 12:42:53 +0000 Subject: [PATCH 02/34] feat: add riscv vectorization support in global values --- Source/global/GB_Global.c | 32 +++++++++++++++++++++++++++++++- Source/global/GB_Global.h | 1 + 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/Source/global/GB_Global.c b/Source/global/GB_Global.c index 27db605d04..72a65691aa 100644 --- a/Source/global/GB_Global.c +++ b/Source/global/GB_Global.c @@ -133,6 +133,7 @@ typedef struct bool cpu_features_avx2 ; // x86_64 with AVX2 bool cpu_features_avx512f ; // x86_64 with AVX512f + bool cpu_features_rvv ; // RISC-V with RVV1.0 //-------------------------------------------------------------------------- // integer control @@ -226,6 +227,7 @@ static GB_Global_struct GB_Global = // CPU features .cpu_features_avx2 = false, // x86_64 with AVX2 .cpu_features_avx512f = false, // x86_64 with AVX512f + .cpu_features_rvv = false, // RISC-V with RVV1.0 // integer control .p_control = (int8_t) 32, @@ -342,6 +344,7 @@ void GB_Global_cpu_features_query (void) GB_Global.cpu_features_avx2 = false ; } #endif + #if defined ( GBAVX512F ) { // the build system asserts whether or not AVX512F is available @@ -353,19 +356,41 @@ void GB_Global_cpu_features_query (void) GB_Global.cpu_features_avx512f = false ; } #endif + } #endif + } + #endif + #if GBRISCV64 + { + //---------------------------------------------------------------------- + // xRISC-V architecture: see if RVV1.0 is supported + //---------------------------------------------------------------------- + + #if defined ( GBRVV ) + { + // the build system asserts whether or not RVV1.0 is available + GB_Global.cpu_features_rvv = (bool) (GBRVV) ; + } + #else + { + // RVV1.0 not available + GB_Global.cpu_features_rvv = false ; + } + #endif + } #else { //---------------------------------------------------------------------- - // not on the x86_64 architecture, so no AVX2 or AVX512F acceleration + // not on the x86_64 or RISC-V architecture, so no AVX2, AVX512F or RVV1.0 acceleration //---------------------------------------------------------------------- GB_Global.cpu_features_avx2 = false ; GB_Global.cpu_features_avx512f = false ; + GB_Global.cpu_features_rvv = false ; } #endif @@ -381,6 +406,11 @@ bool GB_Global_cpu_features_avx512f (void) return (GB_Global.cpu_features_avx512f) ; } +bool GB_Global_cpu_features_rvv (void) +{ + return (GB_Global.cpu_features_rvv) ; +} + //------------------------------------------------------------------------------ // hyper_switch //------------------------------------------------------------------------------ diff --git a/Source/global/GB_Global.h b/Source/global/GB_Global.h index 47a77897e9..4a3ae12256 100644 --- a/Source/global/GB_Global.h +++ b/Source/global/GB_Global.h @@ -17,6 +17,7 @@ void GB_Global_cpu_features_query (void) ; bool GB_Global_cpu_features_avx2 (void) ; bool GB_Global_cpu_features_avx512f (void) ; +bool GB_Global_cpu_features_rvv (void) ; void GB_Global_mode_set (int mode) ; int GB_Global_mode_get (void) ; From 5b0a97b844303611eb36ad11d5a8e57724c90dfe Mon Sep 17 00:00:00 2001 From: Suvorov Rodion Date: Fri, 29 Nov 2024 12:48:05 +0000 Subject: [PATCH 03/34] feat: add defines for RVV1.0 --- Source/include/GB_compiler.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/Source/include/GB_compiler.h b/Source/include/GB_compiler.h index 85f3d1ff2f..6430ab9b44 100644 --- a/Source/include/GB_compiler.h +++ b/Source/include/GB_compiler.h @@ -245,6 +245,16 @@ #endif +#if !defined ( GBRISCV64 ) + + #if defined(__riscv) + #define GBRISCV64 1 + #else + #define GBRISCV64 0 + #endif + +#endif + //------------------------------------------------------------------------------ // AVX2 and AVX512F support for the x86_64 architecture //------------------------------------------------------------------------------ @@ -306,6 +316,31 @@ #define GB_TARGET_AVX2 #endif +//------------------------------------------------------------------------------ +// RVV1.0 support for the RISC-V architecture +//------------------------------------------------------------------------------ + +#if GBRISCV64 + #if GB_COMPILER_GCC + // TODO: add other compilers + #if __GNUC__ >= 13 + #define GB_COMPILER_SUPPORTS_RVV1 1 + #else + #define GB_COMPILER_SUPPORTS_RVV1 0 + #endif + #endif +#else + // non-RISC-V architecture + #define GB_COMPILER_SUPPORTS_RVV1 0 +#endif + +// prefix for function with target rvv1.0 +#if GB_COMPILER_SUPPORTS_RVV1 + #define GB_TARGET_RVV1 __attribute__ ((target ("arch=rv64gcv"))) +#else + #define GB_TARGET_RVV1 +#endif + //------------------------------------------------------------------------------ // disable Google's cpu_featgures on some compilers //------------------------------------------------------------------------------ From 2b9743e4797d7031ac8a017f74d8542f3fb1032e Mon Sep 17 00:00:00 2001 From: Suvorov Rodion Date: Fri, 29 Nov 2024 13:02:46 +0000 Subject: [PATCH 04/34] feat: add rvv function templates --- FactoryKernels/GB_AxB__plus_times_fp32.c | 33 +++++++++++++++ FactoryKernels/GB_AxB__plus_times_fp64.c | 33 +++++++++++++++ GraphBLAS/rename/GB_rename.h | 1 + Source/codegen/Generator/GB_AxB.c | 34 +++++++++++++++ .../template/GB_jit_kernel_AxB_saxpy5.c | 42 +++++++++++++++++++ Source/mxm/factory/GB_AxB_saxpy5_meta.c | 11 +++++ 6 files changed, 154 insertions(+) diff --git a/FactoryKernels/GB_AxB__plus_times_fp32.c b/FactoryKernels/GB_AxB__plus_times_fp32.c index 0acbd8ca9b..b0da66935d 100644 --- a/FactoryKernels/GB_AxB__plus_times_fp32.c +++ b/FactoryKernels/GB_AxB__plus_times_fp32.c @@ -287,6 +287,39 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp32) #endif + //---------------------------------------------------------------------- + // saxpy5 method with RISC-V vectors + //---------------------------------------------------------------------- + #define GB_V16_256 (16 * GB_Z_NBITS <= 256) + #define GB_V8_256 ( 8 * GB_Z_NBITS <= 256) + #define GB_V4_256 ( 4 * GB_Z_NBITS <= 256) + + #undef GB_V16 + #undef GB_V8 + #undef GB_V4 + + #define GB_V16 GB_V16_256 + #define GB_V8 GB_V8_256 + #define GB_V4 GB_V4_256 + + #if GB_COMPILER_SUPPORTS_RVV1 + + GB_TARGET_RVV1 static inline void GB_AxB_saxpy5_unrolled_rvv + ( + GrB_Matrix C, + const GrB_Matrix A, + const GrB_Matrix B, + const int ntasks, + const int nthreads, + const int64_t *B_slice + ) + { + printf("riscvhype!\n"); + #include "mxm/template/GB_AxB_saxpy5_unrolled.c" + } + + #endif + //---------------------------------------------------------------------- // saxpy5 method unrolled, with no vectors //---------------------------------------------------------------------- diff --git a/FactoryKernels/GB_AxB__plus_times_fp64.c b/FactoryKernels/GB_AxB__plus_times_fp64.c index 572ff1e8e4..eac3354798 100644 --- a/FactoryKernels/GB_AxB__plus_times_fp64.c +++ b/FactoryKernels/GB_AxB__plus_times_fp64.c @@ -287,6 +287,39 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp64) #endif + //---------------------------------------------------------------------- + // saxpy5 method with RISC-V vectors + //---------------------------------------------------------------------- + #define GB_V16_256 (16 * GB_Z_NBITS <= 256) + #define GB_V8_256 ( 8 * GB_Z_NBITS <= 256) + #define GB_V4_256 ( 4 * GB_Z_NBITS <= 256) + + #undef GB_V16 + #undef GB_V8 + #undef GB_V4 + + #define GB_V16 GB_V16_256 + #define GB_V8 GB_V8_256 + #define GB_V4 GB_V4_256 + + #if GB_COMPILER_SUPPORTS_RVV1 + + GB_TARGET_RVV1 static inline void GB_AxB_saxpy5_unrolled_rvv + ( + GrB_Matrix C, + const GrB_Matrix A, + const GrB_Matrix B, + const int ntasks, + const int nthreads, + const int64_t *B_slice + ) + { + printf("riscvhype!\n"); + #include "mxm/template/GB_AxB_saxpy5_unrolled.c" + } + + #endif + //---------------------------------------------------------------------- // saxpy5 method unrolled, with no vectors //---------------------------------------------------------------------- diff --git a/GraphBLAS/rename/GB_rename.h b/GraphBLAS/rename/GB_rename.h index cbfab16bf0..2380582ca8 100644 --- a/GraphBLAS/rename/GB_rename.h +++ b/GraphBLAS/rename/GB_rename.h @@ -390,6 +390,7 @@ #define GB_Global_calloc_function_set GM_Global_calloc_function_set #define GB_Global_cpu_features_avx2 GM_Global_cpu_features_avx2 #define GB_Global_cpu_features_avx512f GM_Global_cpu_features_avx512f +#define GB_Global_cpu_features_rvv GM_Global_cpu_features_rvv #define GB_Global_cpu_features_query GM_Global_cpu_features_query #define GB_Global_flush_get GM_Global_flush_get #define GB_Global_flush_set GM_Global_flush_set diff --git a/Source/codegen/Generator/GB_AxB.c b/Source/codegen/Generator/GB_AxB.c index 3466347c57..6f666862d8 100644 --- a/Source/codegen/Generator/GB_AxB.c +++ b/Source/codegen/Generator/GB_AxB.c @@ -310,6 +310,40 @@ m4_divert(if_semiring_has_avx) } #endif +m4_divert(if_semiring_has_rvv) + //---------------------------------------------------------------------- + // saxpy5 method with RISC-V vectors + //---------------------------------------------------------------------- + #define GB_V16_256 (16 * GB_Z_NBITS <= 256) + #define GB_V8_256 ( 8 * GB_Z_NBITS <= 256) + #define GB_V4_256 ( 4 * GB_Z_NBITS <= 256) + + #undef GB_V16 + #undef GB_V8 + #undef GB_V4 + + #define GB_V16 GB_V16_256 + #define GB_V8 GB_V8_256 + #define GB_V4 GB_V4_256 + + #if GB_COMPILER_SUPPORTS_RVV1 + + GB_TARGET_RVV1 static inline void GB_AxB_saxpy5_unrolled_rvv + ( + GrB_Matrix C, + const GrB_Matrix A, + const GrB_Matrix B, + const int ntasks, + const int nthreads, + const int64_t *B_slice + ) + { + printf("riscvhype!\n"); + #include "mxm/template/GB_AxB_saxpy5_unrolled.c" + } + + #endif + m4_divert(if_saxpy5_enabled) //---------------------------------------------------------------------- diff --git a/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c b/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c index 36838a8363..7289ab1282 100644 --- a/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c +++ b/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c @@ -82,6 +82,38 @@ GB_JIT_GLOBAL GB_JIT_KERNEL_AXB_SAXPY5_PROTO (GB_jit_kernel) ; } #endif + + //---------------------------------------------------------------------- + // saxpy5 method with RISC-V vectors + //---------------------------------------------------------------------- + #define GB_V16_256 (16 * GB_Z_NBITS <= 256) + #define GB_V8_256 ( 8 * GB_Z_NBITS <= 256) + #define GB_V4_256 ( 4 * GB_Z_NBITS <= 256) + + #undef GB_V16 + #undef GB_V8 + #undef GB_V4 + + #define GB_V16 GB_V16_256 + #define GB_V8 GB_V8_256 + #define GB_V4 GB_V4_256 + + #if GB_COMPILER_SUPPORTS_RVV1 + + GB_TARGET_AVX2 static inline void GB_AxB_saxpy5_unrolled_rvv + ( + GrB_Matrix C, + const GrB_Matrix A, + const GrB_Matrix B, + const int ntasks, + const int nthreads, + const int64_t *B_slice + ) + { + #include "template/GB_AxB_saxpy5_unrolled.c" + } + + #endif #endif @@ -169,6 +201,16 @@ GB_JIT_GLOBAL GB_JIT_KERNEL_AXB_SAXPY5_PROTO (GB_jit_kernel) return (GrB_SUCCESS) ; } #endif + + #if GB_COMPILER_SUPPORTS_RVV1 + if (cpu_has_avx2) + { + // RISC-V64 with RVV1.0 + GB_AxB_saxpy5_unrolled_rvv (C, A, B, ntasks, nthreads, + B_slice) ; + return (GrB_SUCCESS) ; + } + #endif } #endif diff --git a/Source/mxm/factory/GB_AxB_saxpy5_meta.c b/Source/mxm/factory/GB_AxB_saxpy5_meta.c index cd4851405f..efe1d8cc1e 100644 --- a/Source/mxm/factory/GB_AxB_saxpy5_meta.c +++ b/Source/mxm/factory/GB_AxB_saxpy5_meta.c @@ -143,6 +143,17 @@ } #endif #endif + + #if GB_SEMIRING_HAS_AVX_IMPLEMENTATION + #if GB_COMPILER_SUPPORTS_RVV1 + if (GB_Global_cpu_features_rvv ( )) + { + GB_AxB_saxpy5_unrolled_rvv (C, A, B, + ntasks, nthreads, B_slice) ; + return (GrB_SUCCESS) ; + } + #endif + #endif // any architecture and any built-in semiring GB_AxB_saxpy5_unrolled_vanilla (C, A, B, ntasks, nthreads, B_slice) ; From 3f51b1dd2e68647e895c2e10917e85c2828f6d0d Mon Sep 17 00:00:00 2001 From: Suvorov Rodion Date: Sat, 30 Nov 2024 09:57:11 +0000 Subject: [PATCH 05/34] test: add test --- test/test.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 test/test.c diff --git a/test/test.c b/test/test.c new file mode 100644 index 0000000000..b9fe12bc59 --- /dev/null +++ b/test/test.c @@ -0,0 +1,118 @@ +#include +#include +#include +#include +// #include "../GraphBLAS/Source/include/GB_compiler.h" + +int main() +{ + GrB_Info info; + GrB_Matrix A, B, C; + + GrB_Index nrows = 1000, ncols = 1000; + int test_count = 20; + + info = GrB_init(GrB_NONBLOCKING); + if (info != GrB_SUCCESS) + { + printf("Initialization failed!\n"); + return 1; + } + GrB_Matrix_new(&A, GrB_FP64, nrows, ncols); + GrB_Matrix_new(&B, GrB_FP64, nrows, ncols); + GrB_Matrix_new(&C, GrB_FP64, nrows, ncols); + + GrB_set(B, GxB_FULL, GxB_SPARSITY_CONTROL); + // INITIALIZE MATRICES + srand(52); + clock_t start = clock(); + for (GrB_Index i = 0; i < nrows; i++) + { + for (GrB_Index j = 0; j < ncols; j++) + { + double value = (double)rand() / RAND_MAX; + info = GrB_Matrix_setElement_FP64(A, value, i, j); + } + } + + for (GrB_Index i = 0; i < nrows; i++) + { + for (GrB_Index j = 0; j < ncols; j++) + { + double value = (double)rand() / RAND_MAX; + info = GrB_Matrix_setElement_FP64(B, value, i, j); + } + } + + for (GrB_Index i = 0; i < nrows; i++) + { + for (GrB_Index j = 0; j < ncols; j++) + { + double value = (double)rand() / RAND_MAX; + info = GrB_Matrix_setElement_FP64(C, 0, i, j); + } + } + // printf("!%d\n",GBX86); + // printf("!%d\n",GBRISCV64); + clock_t end = clock(); + float seconds = (float)(end - start) / CLOCKS_PER_SEC; + printf("==============MATRIX SIZE: %lux%lu==============\n", nrows, ncols); + printf("==============INITIALIZING TIME: %f==============\n\n", seconds); + + // Set Matrices type + + GrB_set(A, GxB_SPARSE, GxB_SPARSITY_CONTROL); + GrB_set(B, GxB_FULL, GxB_SPARSITY_CONTROL); + GrB_set(C, GxB_FULL, GxB_SPARSITY_CONTROL); + int32_t sparsityA; + GrB_get(A, &sparsityA, GxB_SPARSITY_STATUS); + //printf("A matrix type: %d\n", sparsityA); + int32_t sparsityB; + GrB_get(B, &sparsityB, GxB_SPARSITY_STATUS); + // printf("B matrix type: %d\n", sparsityB); + int32_t sparsityC; + GrB_get(C, &sparsityC, GxB_SPARSITY_STATUS); +// printf("C matrix type: %d\n", sparsityB); + + float average_time = 0.0; + printf("=================NUBMER OF TESTS: %d=================\n", test_count); + for (int i = 0; i < test_count; i++) + { + // double element; + // GrB_Matrix_extractElement_FP64(&element,C,0,0); + // printf("first C element %f\n",element); + clock_t start = clock(); + info = GrB_mxm(C, NULL, GrB_PLUS_FP64, GxB_PLUS_TIMES_FP64, A, B, NULL); + if (info != GrB_SUCCESS) + { + printf("Multiplication failed!\n"); + return 1; + } + clock_t end = clock(); + float seconds = (float)(end - start) / CLOCKS_PER_SEC; + // printf("test %d: time: %f\n", i + 1, seconds); + printf("%f;\n", seconds); + average_time += seconds; + for (GrB_Index i = 0; i < nrows; i++) + { + for (GrB_Index j = 0; j < ncols; j++) + { + double value = (double)rand() / RAND_MAX; + info = GrB_Matrix_setElement_FP64(C, 0, i, j); + } + } + if (i==0){ + average_time -= seconds; + } + } + printf("average time: %f\n", average_time / (test_count-1)); + // printf("%f\n", average_time / test_count); + + GrB_Matrix_free(&A); + GrB_Matrix_free(&B); + GrB_Matrix_free(&C); + + GrB_finalize(); + + return 0; +} From 487c91b840c8902406b47528db828aecbb5c6f91 Mon Sep 17 00:00:00 2001 From: Suvorov Rodion Date: Sat, 30 Nov 2024 11:39:42 +0000 Subject: [PATCH 06/34] fix: add defines for RVV implementation --- FactoryKernels/GB_AxB__plus_times_fp32.c | 1 + FactoryKernels/GB_AxB__plus_times_fp64.c | 1 + Source/GB_control.h | 3 +-- Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c | 6 +++++- Source/mxm/factory/GB_AxB_saxpy5_meta.c | 2 +- Source/mxm/include/GB_mxm_shared_definitions.h | 6 ++++++ 6 files changed, 15 insertions(+), 4 deletions(-) diff --git a/FactoryKernels/GB_AxB__plus_times_fp32.c b/FactoryKernels/GB_AxB__plus_times_fp32.c index b0da66935d..2578f50860 100644 --- a/FactoryKernels/GB_AxB__plus_times_fp32.c +++ b/FactoryKernels/GB_AxB__plus_times_fp32.c @@ -49,6 +49,7 @@ // special case semirings: #define GB_SEMIRING_HAS_AVX_IMPLEMENTATION 1 +#define GB_SEMIRING_HAS_RVV_IMPLEMENTATION 1 // monoid properties: #define GB_Z_TYPE float diff --git a/FactoryKernels/GB_AxB__plus_times_fp64.c b/FactoryKernels/GB_AxB__plus_times_fp64.c index eac3354798..13f53fb5b4 100644 --- a/FactoryKernels/GB_AxB__plus_times_fp64.c +++ b/FactoryKernels/GB_AxB__plus_times_fp64.c @@ -49,6 +49,7 @@ // special case semirings: #define GB_SEMIRING_HAS_AVX_IMPLEMENTATION 1 +#define GB_SEMIRING_HAS_RVV_IMPLEMENTATION 1 // monoid properties: #define GB_Z_TYPE double diff --git a/Source/GB_control.h b/Source/GB_control.h index 33a69616a1..1ffd07b5f7 100644 --- a/Source/GB_control.h +++ b/Source/GB_control.h @@ -2239,5 +2239,4 @@ #define GxB_NO_TIMES_SECONDJ_INT32 1 #define GxB_NO_TIMES_SECONDJ_INT64 1 #define GxB_NO_TIMES_SECONDJ1_INT32 1 - #define GxB_NO_TIMES_SECONDJ1_INT64 1 - + #define GxB_NO_TIMES_SECONDJ1_INT64 1 \ No newline at end of file diff --git a/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c b/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c index 7289ab1282..b91c2ec2cd 100644 --- a/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c +++ b/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c @@ -201,8 +201,12 @@ GB_JIT_GLOBAL GB_JIT_KERNEL_AXB_SAXPY5_PROTO (GB_jit_kernel) return (GrB_SUCCESS) ; } #endif + } + #endif - #if GB_COMPILER_SUPPORTS_RVV1 + #if GB_SEMIRING_HAS_RVV_IMPLEMENTATION + { + #if GB_COMPILER_SUPPORTS_RVV1 if (cpu_has_avx2) { // RISC-V64 with RVV1.0 diff --git a/Source/mxm/factory/GB_AxB_saxpy5_meta.c b/Source/mxm/factory/GB_AxB_saxpy5_meta.c index efe1d8cc1e..b36d43c0e1 100644 --- a/Source/mxm/factory/GB_AxB_saxpy5_meta.c +++ b/Source/mxm/factory/GB_AxB_saxpy5_meta.c @@ -144,7 +144,7 @@ #endif #endif - #if GB_SEMIRING_HAS_AVX_IMPLEMENTATION + #if GB_SEMIRING_HAS_RVV_IMPLEMENTATION #if GB_COMPILER_SUPPORTS_RVV1 if (GB_Global_cpu_features_rvv ( )) { diff --git a/Source/mxm/include/GB_mxm_shared_definitions.h b/Source/mxm/include/GB_mxm_shared_definitions.h index 8ed47b3f83..85e1861ae1 100644 --- a/Source/mxm/include/GB_mxm_shared_definitions.h +++ b/Source/mxm/include/GB_mxm_shared_definitions.h @@ -71,6 +71,12 @@ #define GB_SEMIRING_HAS_AVX_IMPLEMENTATION 0 #endif +//1 if the semiring has a RVV1.0 implementation +#ifndef GB_SEMIRING_HAS_RVV_IMPLEMENTATION +#define GB_SEMIRING_HAS_RVV_IMPLEMENTATION 0 +#endif + + //------------------------------------------------------------------------------ // special multiply operators //------------------------------------------------------------------------------ From 6069c832647ae972574f41178013ef6baac4edfa Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Sat, 14 Dec 2024 15:56:19 +0300 Subject: [PATCH 07/34] feat: implement vectorized with rvv saxpy function --- FactoryKernels/GB_AxB__plus_times_fp64.c | 19 ++------- Source/mxm/template/GB_AxB_saxpy5_lv.c | 54 ++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 15 deletions(-) create mode 100644 Source/mxm/template/GB_AxB_saxpy5_lv.c diff --git a/FactoryKernels/GB_AxB__plus_times_fp64.c b/FactoryKernels/GB_AxB__plus_times_fp64.c index 13f53fb5b4..cdb8e652e3 100644 --- a/FactoryKernels/GB_AxB__plus_times_fp64.c +++ b/FactoryKernels/GB_AxB__plus_times_fp64.c @@ -6,7 +6,8 @@ // SPDX-License-Identifier: Apache-2.0 //------------------------------------------------------------------------------ - +#include +#include "GB.h" #include "GB_control.h" #if defined (GxB_NO_FP64) #define GB_TYPE_ENABLED 0 @@ -291,18 +292,7 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp64) //---------------------------------------------------------------------- // saxpy5 method with RISC-V vectors //---------------------------------------------------------------------- - #define GB_V16_256 (16 * GB_Z_NBITS <= 256) - #define GB_V8_256 ( 8 * GB_Z_NBITS <= 256) - #define GB_V4_256 ( 4 * GB_Z_NBITS <= 256) - - #undef GB_V16 - #undef GB_V8 - #undef GB_V4 - - #define GB_V16 GB_V16_256 - #define GB_V8 GB_V8_256 - #define GB_V4 GB_V4_256 - + #if GB_COMPILER_SUPPORTS_RVV1 GB_TARGET_RVV1 static inline void GB_AxB_saxpy5_unrolled_rvv @@ -315,8 +305,7 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp64) const int64_t *B_slice ) { - printf("riscvhype!\n"); - #include "mxm/template/GB_AxB_saxpy5_unrolled.c" + #include "mxm/template/GB_AxB_saxpy5_lv.c" } #endif diff --git a/Source/mxm/template/GB_AxB_saxpy5_lv.c b/Source/mxm/template/GB_AxB_saxpy5_lv.c new file mode 100644 index 0000000000..6d13b14f02 --- /dev/null +++ b/Source/mxm/template/GB_AxB_saxpy5_lv.c @@ -0,0 +1,54 @@ +{ + + //-------------------------------------------------------------------------- + // get C, A, and B + //-------------------------------------------------------------------------- + + const int64_t m = C->vlen; // # of rows of C and A + const int64_t *restrict Bp = B->p; + const int64_t *restrict Bh = B->h; + const int64_t *restrict Bi = B->i; +#ifdef GB_JIT_KERNEL +#define B_iso GB_B_ISO +#else + const bool B_iso = B->iso; +#endif + const GB_A_TYPE *restrict Ax = (GB_A_TYPE *)A->x; +#if !GB_B_IS_PATTERN + const GB_B_TYPE *restrict Bx = (GB_B_TYPE *)B->x; +#endif + GB_C_TYPE *restrict Cx = (GB_C_TYPE *)C->x; + int tid; +#pragma omp parallel for num_threads(nthreads) schedule(dynamic, 1) + for (tid = 0; tid < ntasks; tid++) + { + // get the task descriptor + const int64_t jB_start = B_slice[tid]; + const int64_t jB_end = B_slice[tid + 1]; + + // C(:,jB_start:jB_end-1) += A * B(:,jB_start:jB_end-1) + for (int64_t jB = jB_start; jB < jB_end; jB++) + { + // get B(:,j) and C(:,j) + const int64_t j = GBH_B(Bh, jB); + GB_C_TYPE *restrict Cxj = Cx + (j * m); + const int64_t pB_start = Bp[jB]; + const int64_t pB_end = Bp[jB + 1]; + + size_t vl = __riscv_vsetvl_e64m8(m); + + vfloat64m8_t vc = __riscv_vle64_v_f64m8(Cxj, vl); + for (int64_t pB = pB_start; pB < pB_end; pB++) + { + const int64_t k = Bi[pB]; + GB_DECLAREB(bkj); + GB_GETB(bkj, Bx, pB, B_iso); + // const GB_B_TYPE bkj = Bx[pB]; + vfloat64m8_t va = __riscv_vle64_v_f64m8(Ax + k * m, vl); + vc = __riscv_vfmacc_vf_f64m8(vc, bkj, va, vl); + } + + __riscv_vse64_v_f64m8(Cxj, vc, vl); + } + } +} From c9824e208da2b16175b89b9028a635e908b79a9f Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Sat, 14 Dec 2024 17:52:02 +0300 Subject: [PATCH 08/34] feat: correct include --- FactoryKernels/GB_AxB__plus_times_fp64.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/FactoryKernels/GB_AxB__plus_times_fp64.c b/FactoryKernels/GB_AxB__plus_times_fp64.c index cdb8e652e3..ae516b25ea 100644 --- a/FactoryKernels/GB_AxB__plus_times_fp64.c +++ b/FactoryKernels/GB_AxB__plus_times_fp64.c @@ -6,7 +6,9 @@ // SPDX-License-Identifier: Apache-2.0 //------------------------------------------------------------------------------ +#ifdef GBRISCV64 #include +#endif #include "GB.h" #include "GB_control.h" #if defined (GxB_NO_FP64) From 725303780ea4d80bb940c000dd3ea6ad4ea7e4da Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Wed, 18 Dec 2024 22:55:53 +0300 Subject: [PATCH 09/34] feat: correct support of saxpy5 lv in factory kernels. --- FactoryKernels/GB_AxB__plus_times_fp32.c | 21 +++++++-------------- FactoryKernels/GB_AxB__plus_times_fp64.c | 4 ++++ 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/FactoryKernels/GB_AxB__plus_times_fp32.c b/FactoryKernels/GB_AxB__plus_times_fp32.c index 2578f50860..58e6079d43 100644 --- a/FactoryKernels/GB_AxB__plus_times_fp32.c +++ b/FactoryKernels/GB_AxB__plus_times_fp32.c @@ -6,7 +6,11 @@ // SPDX-License-Identifier: Apache-2.0 //------------------------------------------------------------------------------ - +#ifdef GBRISCV64 +#include +#endif +#include "stdio.h" +#include "GB.h" #include "GB_control.h" #if defined (GxB_NO_FP32) #define GB_TYPE_ENABLED 0 @@ -290,18 +294,7 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp32) //---------------------------------------------------------------------- // saxpy5 method with RISC-V vectors - //---------------------------------------------------------------------- - #define GB_V16_256 (16 * GB_Z_NBITS <= 256) - #define GB_V8_256 ( 8 * GB_Z_NBITS <= 256) - #define GB_V4_256 ( 4 * GB_Z_NBITS <= 256) - - #undef GB_V16 - #undef GB_V8 - #undef GB_V4 - - #define GB_V16 GB_V16_256 - #define GB_V8 GB_V8_256 - #define GB_V4 GB_V4_256 + //--------------------------------------------------------------------- #if GB_COMPILER_SUPPORTS_RVV1 @@ -316,7 +309,7 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp32) ) { printf("riscvhype!\n"); - #include "mxm/template/GB_AxB_saxpy5_unrolled.c" + #include "mxm/template/GB_AxB_saxpy5_lv.c" } #endif diff --git a/FactoryKernels/GB_AxB__plus_times_fp64.c b/FactoryKernels/GB_AxB__plus_times_fp64.c index ae516b25ea..1092152bd9 100644 --- a/FactoryKernels/GB_AxB__plus_times_fp64.c +++ b/FactoryKernels/GB_AxB__plus_times_fp64.c @@ -9,6 +9,7 @@ #ifdef GBRISCV64 #include #endif +#include "stdio.h" #include "GB.h" #include "GB_control.h" #if defined (GxB_NO_FP64) @@ -286,6 +287,7 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp64) const int64_t *B_slice ) { + printf("avx2\n"); #include "mxm/template/GB_AxB_saxpy5_unrolled.c" } @@ -307,6 +309,7 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp64) const int64_t *B_slice ) { + printf("rvv\n"); #include "mxm/template/GB_AxB_saxpy5_lv.c" } @@ -334,6 +337,7 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp64) const int64_t *B_slice ) { + printf("vanilla\n"); #include "mxm/template/GB_AxB_saxpy5_unrolled.c" } From 202f17f7bff27831905b08d6ae82ec1da8d4fd02 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Wed, 18 Dec 2024 22:57:52 +0300 Subject: [PATCH 10/34] fix: correct vector extension define in global structure --- Source/global/GB_Global.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Source/global/GB_Global.c b/Source/global/GB_Global.c index 72a65691aa..91230b3617 100644 --- a/Source/global/GB_Global.c +++ b/Source/global/GB_Global.c @@ -361,8 +361,7 @@ void GB_Global_cpu_features_query (void) #endif } - #endif - #if GBRISCV64 + #elif GBRISCV64 { //---------------------------------------------------------------------- // xRISC-V architecture: see if RVV1.0 is supported From 31cc089e53d9b64636ed799d430217b0fe6ced24 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Wed, 18 Dec 2024 22:58:53 +0300 Subject: [PATCH 11/34] fix: saxpy function with rvv --- Source/mxm/template/GB_AxB_saxpy5_lv.c | 57 +++++++++++++------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/Source/mxm/template/GB_AxB_saxpy5_lv.c b/Source/mxm/template/GB_AxB_saxpy5_lv.c index 6d13b14f02..6ab7de5124 100644 --- a/Source/mxm/template/GB_AxB_saxpy5_lv.c +++ b/Source/mxm/template/GB_AxB_saxpy5_lv.c @@ -1,54 +1,55 @@ { - - //-------------------------------------------------------------------------- - // get C, A, and B - //-------------------------------------------------------------------------- - const int64_t m = C->vlen; // # of rows of C and A const int64_t *restrict Bp = B->p; const int64_t *restrict Bh = B->h; const int64_t *restrict Bi = B->i; -#ifdef GB_JIT_KERNEL -#define B_iso GB_B_ISO -#else - const bool B_iso = B->iso; -#endif const GB_A_TYPE *restrict Ax = (GB_A_TYPE *)A->x; -#if !GB_B_IS_PATTERN const GB_B_TYPE *restrict Bx = (GB_B_TYPE *)B->x; -#endif + size_t vl = __riscv_vsetvl_e64m8(m); GB_C_TYPE *restrict Cx = (GB_C_TYPE *)C->x; - int tid; + #pragma omp parallel for num_threads(nthreads) schedule(dynamic, 1) - for (tid = 0; tid < ntasks; tid++) + for (int tid = 0; tid < ntasks; tid++) { - // get the task descriptor const int64_t jB_start = B_slice[tid]; const int64_t jB_end = B_slice[tid + 1]; - // C(:,jB_start:jB_end-1) += A * B(:,jB_start:jB_end-1) for (int64_t jB = jB_start; jB < jB_end; jB++) { - // get B(:,j) and C(:,j) const int64_t j = GBH_B(Bh, jB); GB_C_TYPE *restrict Cxj = Cx + (j * m); const int64_t pB_start = Bp[jB]; const int64_t pB_end = Bp[jB + 1]; + for (int64_t i = 0; i < m && (m - i) >= vl; i += vl) + { + vfloat64m8_t vc = __riscv_vlse64_v_f64m8(Cxj + i, sizeof(double), vl); - size_t vl = __riscv_vsetvl_e64m8(m); + for (int64_t pB = pB_start; pB < pB_end; pB++) + { + const int64_t k = Bi[pB]; + const GB_B_TYPE bkj = Bx[pB]; + vfloat64m8_t va = __riscv_vlse64_v_f64m8(Ax + i + k * m, sizeof(double), vl); + vc = __riscv_vfmacc_vf_f64m8(vc, bkj, va, vl); + } - vfloat64m8_t vc = __riscv_vle64_v_f64m8(Cxj, vl); - for (int64_t pB = pB_start; pB < pB_end; pB++) - { - const int64_t k = Bi[pB]; - GB_DECLAREB(bkj); - GB_GETB(bkj, Bx, pB, B_iso); - // const GB_B_TYPE bkj = Bx[pB]; - vfloat64m8_t va = __riscv_vle64_v_f64m8(Ax + k * m, vl); - vc = __riscv_vfmacc_vf_f64m8(vc, bkj, va, vl); + __riscv_vsse64_v_f64m8(Cxj + i, sizeof(double), vc, vl); } + int64_t remaining = m % vl; + if (remaining > 0) + { + int64_t i = m - remaining; + vfloat64m8_t vc = __riscv_vlse64_v_f64m8(Cxj + i, sizeof(double), remaining); - __riscv_vse64_v_f64m8(Cxj, vc, vl); + for (int64_t pB = pB_start; pB < pB_end; pB++) + { + const int64_t k = Bi[pB]; + const GB_B_TYPE bkj = Bx[pB]; + vfloat64m8_t va = __riscv_vlse64_v_f64m8(Ax + i + k * m, sizeof(double), remaining); + vc = __riscv_vfmacc_vf_f64m8(vc, bkj, va, remaining); + } + + __riscv_vsse64_v_f64m8(Cxj + i, sizeof(double), vc, remaining); + } } } } From fb014b62d6fc86cac38d04e92b6fc80ac284eb2d Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Fri, 27 Dec 2024 12:32:26 +0300 Subject: [PATCH 12/34] refactor: remove straided functions --- FactoryKernels/GB_AxB__plus_times_fp64.c | 3 --- Source/mxm/template/GB_AxB_saxpy5_lv.c | 14 +++++++------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/FactoryKernels/GB_AxB__plus_times_fp64.c b/FactoryKernels/GB_AxB__plus_times_fp64.c index 1092152bd9..fa0bc658c7 100644 --- a/FactoryKernels/GB_AxB__plus_times_fp64.c +++ b/FactoryKernels/GB_AxB__plus_times_fp64.c @@ -287,7 +287,6 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp64) const int64_t *B_slice ) { - printf("avx2\n"); #include "mxm/template/GB_AxB_saxpy5_unrolled.c" } @@ -309,7 +308,6 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp64) const int64_t *B_slice ) { - printf("rvv\n"); #include "mxm/template/GB_AxB_saxpy5_lv.c" } @@ -337,7 +335,6 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp64) const int64_t *B_slice ) { - printf("vanilla\n"); #include "mxm/template/GB_AxB_saxpy5_unrolled.c" } diff --git a/Source/mxm/template/GB_AxB_saxpy5_lv.c b/Source/mxm/template/GB_AxB_saxpy5_lv.c index 6ab7de5124..5ef338c915 100644 --- a/Source/mxm/template/GB_AxB_saxpy5_lv.c +++ b/Source/mxm/template/GB_AxB_saxpy5_lv.c @@ -1,5 +1,5 @@ { - const int64_t m = C->vlen; // # of rows of C and A + const int64_t m = C->vlen; const int64_t *restrict Bp = B->p; const int64_t *restrict Bh = B->h; const int64_t *restrict Bi = B->i; @@ -22,33 +22,33 @@ const int64_t pB_end = Bp[jB + 1]; for (int64_t i = 0; i < m && (m - i) >= vl; i += vl) { - vfloat64m8_t vc = __riscv_vlse64_v_f64m8(Cxj + i, sizeof(double), vl); + vfloat64m8_t vc = __riscv_vle64_v_f64m8(Cxj + i, sizeof(double), vl); for (int64_t pB = pB_start; pB < pB_end; pB++) { const int64_t k = Bi[pB]; const GB_B_TYPE bkj = Bx[pB]; - vfloat64m8_t va = __riscv_vlse64_v_f64m8(Ax + i + k * m, sizeof(double), vl); + vfloat64m8_t va = __riscv_vle64_v_f64m8(Ax + i + k * m, sizeof(double), vl); vc = __riscv_vfmacc_vf_f64m8(vc, bkj, va, vl); } - __riscv_vsse64_v_f64m8(Cxj + i, sizeof(double), vc, vl); + __riscv_vse64_v_f64m8(Cxj + i, sizeof(double), vc, vl); } int64_t remaining = m % vl; if (remaining > 0) { int64_t i = m - remaining; - vfloat64m8_t vc = __riscv_vlse64_v_f64m8(Cxj + i, sizeof(double), remaining); + vfloat64m8_t vc = __riscv_vle64_v_f64m8(Cxj + i, sizeof(double), remaining); for (int64_t pB = pB_start; pB < pB_end; pB++) { const int64_t k = Bi[pB]; const GB_B_TYPE bkj = Bx[pB]; - vfloat64m8_t va = __riscv_vlse64_v_f64m8(Ax + i + k * m, sizeof(double), remaining); + vfloat64m8_t va = __riscv_vle64_v_f64m8(Ax + i + k * m, sizeof(double), remaining); vc = __riscv_vfmacc_vf_f64m8(vc, bkj, va, remaining); } - __riscv_vsse64_v_f64m8(Cxj + i, sizeof(double), vc, remaining); + __riscv_vse64_v_f64m8(Cxj + i, sizeof(double), vc, remaining); } } } From a1237f199b53a44b3e53e5f84ee097c6ca6c10fb Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Mon, 30 Dec 2024 19:09:32 +0300 Subject: [PATCH 13/34] fix: redundant arguments --- Source/mxm/template/GB_AxB_saxpy5_lv.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Source/mxm/template/GB_AxB_saxpy5_lv.c b/Source/mxm/template/GB_AxB_saxpy5_lv.c index 5ef338c915..3ca0e2d717 100644 --- a/Source/mxm/template/GB_AxB_saxpy5_lv.c +++ b/Source/mxm/template/GB_AxB_saxpy5_lv.c @@ -22,33 +22,33 @@ const int64_t pB_end = Bp[jB + 1]; for (int64_t i = 0; i < m && (m - i) >= vl; i += vl) { - vfloat64m8_t vc = __riscv_vle64_v_f64m8(Cxj + i, sizeof(double), vl); + vfloat64m8_t vc = __riscv_vle64_v_f64m8(Cxj + i, vl); for (int64_t pB = pB_start; pB < pB_end; pB++) { const int64_t k = Bi[pB]; const GB_B_TYPE bkj = Bx[pB]; - vfloat64m8_t va = __riscv_vle64_v_f64m8(Ax + i + k * m, sizeof(double), vl); + vfloat64m8_t va = __riscv_vle64_v_f64m8(Ax + i + k * m, vl); vc = __riscv_vfmacc_vf_f64m8(vc, bkj, va, vl); } - __riscv_vse64_v_f64m8(Cxj + i, sizeof(double), vc, vl); + __riscv_vse64_v_f64m8(Cxj + i, vc, vl); } int64_t remaining = m % vl; if (remaining > 0) { int64_t i = m - remaining; - vfloat64m8_t vc = __riscv_vle64_v_f64m8(Cxj + i, sizeof(double), remaining); + vfloat64m8_t vc = __riscv_vle64_v_f64m8(Cxj + i, remaining); for (int64_t pB = pB_start; pB < pB_end; pB++) { const int64_t k = Bi[pB]; const GB_B_TYPE bkj = Bx[pB]; - vfloat64m8_t va = __riscv_vle64_v_f64m8(Ax + i + k * m, sizeof(double), remaining); + vfloat64m8_t va = __riscv_vle64_v_f64m8(Ax + i + k * m, remaining); vc = __riscv_vfmacc_vf_f64m8(vc, bkj, va, remaining); } - __riscv_vse64_v_f64m8(Cxj + i, sizeof(double), vc, remaining); + __riscv_vse64_v_f64m8(Cxj + i, vc, remaining); } } } From b52cd65eb5e557585006f4be1b1e172595259378 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Thu, 2 Jan 2025 14:57:33 +0300 Subject: [PATCH 14/34] refactor: remove test files --- test/test.c | 118 ---------------------------------------------------- 1 file changed, 118 deletions(-) delete mode 100644 test/test.c diff --git a/test/test.c b/test/test.c deleted file mode 100644 index b9fe12bc59..0000000000 --- a/test/test.c +++ /dev/null @@ -1,118 +0,0 @@ -#include -#include -#include -#include -// #include "../GraphBLAS/Source/include/GB_compiler.h" - -int main() -{ - GrB_Info info; - GrB_Matrix A, B, C; - - GrB_Index nrows = 1000, ncols = 1000; - int test_count = 20; - - info = GrB_init(GrB_NONBLOCKING); - if (info != GrB_SUCCESS) - { - printf("Initialization failed!\n"); - return 1; - } - GrB_Matrix_new(&A, GrB_FP64, nrows, ncols); - GrB_Matrix_new(&B, GrB_FP64, nrows, ncols); - GrB_Matrix_new(&C, GrB_FP64, nrows, ncols); - - GrB_set(B, GxB_FULL, GxB_SPARSITY_CONTROL); - // INITIALIZE MATRICES - srand(52); - clock_t start = clock(); - for (GrB_Index i = 0; i < nrows; i++) - { - for (GrB_Index j = 0; j < ncols; j++) - { - double value = (double)rand() / RAND_MAX; - info = GrB_Matrix_setElement_FP64(A, value, i, j); - } - } - - for (GrB_Index i = 0; i < nrows; i++) - { - for (GrB_Index j = 0; j < ncols; j++) - { - double value = (double)rand() / RAND_MAX; - info = GrB_Matrix_setElement_FP64(B, value, i, j); - } - } - - for (GrB_Index i = 0; i < nrows; i++) - { - for (GrB_Index j = 0; j < ncols; j++) - { - double value = (double)rand() / RAND_MAX; - info = GrB_Matrix_setElement_FP64(C, 0, i, j); - } - } - // printf("!%d\n",GBX86); - // printf("!%d\n",GBRISCV64); - clock_t end = clock(); - float seconds = (float)(end - start) / CLOCKS_PER_SEC; - printf("==============MATRIX SIZE: %lux%lu==============\n", nrows, ncols); - printf("==============INITIALIZING TIME: %f==============\n\n", seconds); - - // Set Matrices type - - GrB_set(A, GxB_SPARSE, GxB_SPARSITY_CONTROL); - GrB_set(B, GxB_FULL, GxB_SPARSITY_CONTROL); - GrB_set(C, GxB_FULL, GxB_SPARSITY_CONTROL); - int32_t sparsityA; - GrB_get(A, &sparsityA, GxB_SPARSITY_STATUS); - //printf("A matrix type: %d\n", sparsityA); - int32_t sparsityB; - GrB_get(B, &sparsityB, GxB_SPARSITY_STATUS); - // printf("B matrix type: %d\n", sparsityB); - int32_t sparsityC; - GrB_get(C, &sparsityC, GxB_SPARSITY_STATUS); -// printf("C matrix type: %d\n", sparsityB); - - float average_time = 0.0; - printf("=================NUBMER OF TESTS: %d=================\n", test_count); - for (int i = 0; i < test_count; i++) - { - // double element; - // GrB_Matrix_extractElement_FP64(&element,C,0,0); - // printf("first C element %f\n",element); - clock_t start = clock(); - info = GrB_mxm(C, NULL, GrB_PLUS_FP64, GxB_PLUS_TIMES_FP64, A, B, NULL); - if (info != GrB_SUCCESS) - { - printf("Multiplication failed!\n"); - return 1; - } - clock_t end = clock(); - float seconds = (float)(end - start) / CLOCKS_PER_SEC; - // printf("test %d: time: %f\n", i + 1, seconds); - printf("%f;\n", seconds); - average_time += seconds; - for (GrB_Index i = 0; i < nrows; i++) - { - for (GrB_Index j = 0; j < ncols; j++) - { - double value = (double)rand() / RAND_MAX; - info = GrB_Matrix_setElement_FP64(C, 0, i, j); - } - } - if (i==0){ - average_time -= seconds; - } - } - printf("average time: %f\n", average_time / (test_count-1)); - // printf("%f\n", average_time / test_count); - - GrB_Matrix_free(&A); - GrB_Matrix_free(&B); - GrB_Matrix_free(&C); - - GrB_finalize(); - - return 0; -} From 8834d86eed900c3373ae03e9c8772180203314f2 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Thu, 2 Jan 2025 14:57:59 +0300 Subject: [PATCH 15/34] refactor: remove debug prints --- FactoryKernels/GB_AxB__plus_times_fp32.c | 2 -- FactoryKernels/GB_AxB__plus_times_fp64.c | 1 - 2 files changed, 3 deletions(-) diff --git a/FactoryKernels/GB_AxB__plus_times_fp32.c b/FactoryKernels/GB_AxB__plus_times_fp32.c index 58e6079d43..8e36ef6d65 100644 --- a/FactoryKernels/GB_AxB__plus_times_fp32.c +++ b/FactoryKernels/GB_AxB__plus_times_fp32.c @@ -9,7 +9,6 @@ #ifdef GBRISCV64 #include #endif -#include "stdio.h" #include "GB.h" #include "GB_control.h" #if defined (GxB_NO_FP32) @@ -308,7 +307,6 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp32) const int64_t *B_slice ) { - printf("riscvhype!\n"); #include "mxm/template/GB_AxB_saxpy5_lv.c" } diff --git a/FactoryKernels/GB_AxB__plus_times_fp64.c b/FactoryKernels/GB_AxB__plus_times_fp64.c index fa0bc658c7..ae516b25ea 100644 --- a/FactoryKernels/GB_AxB__plus_times_fp64.c +++ b/FactoryKernels/GB_AxB__plus_times_fp64.c @@ -9,7 +9,6 @@ #ifdef GBRISCV64 #include #endif -#include "stdio.h" #include "GB.h" #include "GB_control.h" #if defined (GxB_NO_FP64) From 574a9403bbf67ffbbade1e09ea9eb45cfb83d382 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Thu, 2 Jan 2025 15:15:56 +0300 Subject: [PATCH 16/34] fix: codegen rvv support --- Source/GB_control.h | 3 ++- Source/codegen/Generator/GB_AxB.c | 18 +++++------------- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/Source/GB_control.h b/Source/GB_control.h index 1ffd07b5f7..f8e34f3793 100644 --- a/Source/GB_control.h +++ b/Source/GB_control.h @@ -2239,4 +2239,5 @@ #define GxB_NO_TIMES_SECONDJ_INT32 1 #define GxB_NO_TIMES_SECONDJ_INT64 1 #define GxB_NO_TIMES_SECONDJ1_INT32 1 - #define GxB_NO_TIMES_SECONDJ1_INT64 1 \ No newline at end of file + #define GxB_NO_TIMES_SECONDJ1_INT64 1 + \ No newline at end of file diff --git a/Source/codegen/Generator/GB_AxB.c b/Source/codegen/Generator/GB_AxB.c index 6f666862d8..0156b97134 100644 --- a/Source/codegen/Generator/GB_AxB.c +++ b/Source/codegen/Generator/GB_AxB.c @@ -7,6 +7,10 @@ //------------------------------------------------------------------------------ +#ifdef GBRISCV64 +#include +#endif +#include "GB.h" #include "GB_control.h" GB_type_enabled #if GB_TYPE_ENABLED @@ -314,17 +318,6 @@ m4_divert(if_semiring_has_rvv) //---------------------------------------------------------------------- // saxpy5 method with RISC-V vectors //---------------------------------------------------------------------- - #define GB_V16_256 (16 * GB_Z_NBITS <= 256) - #define GB_V8_256 ( 8 * GB_Z_NBITS <= 256) - #define GB_V4_256 ( 4 * GB_Z_NBITS <= 256) - - #undef GB_V16 - #undef GB_V8 - #undef GB_V4 - - #define GB_V16 GB_V16_256 - #define GB_V8 GB_V8_256 - #define GB_V4 GB_V4_256 #if GB_COMPILER_SUPPORTS_RVV1 @@ -338,8 +331,7 @@ m4_divert(if_semiring_has_rvv) const int64_t *B_slice ) { - printf("riscvhype!\n"); - #include "mxm/template/GB_AxB_saxpy5_unrolled.c" + #include "mxm/template/GB_AxB_saxpy5_lv.c" } #endif From bf4fb46e8e7d78c213a9d6df1bc8b161ff5017a6 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Thu, 2 Jan 2025 15:21:10 +0300 Subject: [PATCH 17/34] small refactor --- FactoryKernels/GB_AxB__plus_times_fp32.c | 1 + FactoryKernels/GB_AxB__plus_times_fp64.c | 1 + 2 files changed, 2 insertions(+) diff --git a/FactoryKernels/GB_AxB__plus_times_fp32.c b/FactoryKernels/GB_AxB__plus_times_fp32.c index 8e36ef6d65..b1e7aed96e 100644 --- a/FactoryKernels/GB_AxB__plus_times_fp32.c +++ b/FactoryKernels/GB_AxB__plus_times_fp32.c @@ -6,6 +6,7 @@ // SPDX-License-Identifier: Apache-2.0 //------------------------------------------------------------------------------ + #ifdef GBRISCV64 #include #endif diff --git a/FactoryKernels/GB_AxB__plus_times_fp64.c b/FactoryKernels/GB_AxB__plus_times_fp64.c index ae516b25ea..8b536323ce 100644 --- a/FactoryKernels/GB_AxB__plus_times_fp64.c +++ b/FactoryKernels/GB_AxB__plus_times_fp64.c @@ -6,6 +6,7 @@ // SPDX-License-Identifier: Apache-2.0 //------------------------------------------------------------------------------ + #ifdef GBRISCV64 #include #endif From 60586d4810a01ba57a25316e6f290ab37f185f27 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Thu, 2 Jan 2025 15:24:46 +0300 Subject: [PATCH 18/34] refactor: jit kernel rvv support --- .../template/GB_jit_kernel_AxB_saxpy5.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c b/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c index b91c2ec2cd..901d417d38 100644 --- a/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c +++ b/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c @@ -8,6 +8,9 @@ //------------------------------------------------------------------------------ #include "include/GB_AxB_saxpy3_template.h" +#ifdef GBRISCV64 +#include +#endif GB_JIT_GLOBAL GB_JIT_KERNEL_AXB_SAXPY5_PROTO (GB_jit_kernel) ; @@ -86,17 +89,6 @@ GB_JIT_GLOBAL GB_JIT_KERNEL_AXB_SAXPY5_PROTO (GB_jit_kernel) ; //---------------------------------------------------------------------- // saxpy5 method with RISC-V vectors //---------------------------------------------------------------------- - #define GB_V16_256 (16 * GB_Z_NBITS <= 256) - #define GB_V8_256 ( 8 * GB_Z_NBITS <= 256) - #define GB_V4_256 ( 4 * GB_Z_NBITS <= 256) - - #undef GB_V16 - #undef GB_V8 - #undef GB_V4 - - #define GB_V16 GB_V16_256 - #define GB_V8 GB_V8_256 - #define GB_V4 GB_V4_256 #if GB_COMPILER_SUPPORTS_RVV1 @@ -110,7 +102,7 @@ GB_JIT_GLOBAL GB_JIT_KERNEL_AXB_SAXPY5_PROTO (GB_jit_kernel) ; const int64_t *B_slice ) { - #include "template/GB_AxB_saxpy5_unrolled.c" + #include "template/GB_AxB_saxpy5_lv.c" } #endif From be0bade5c4fa0feff66834c785f0c3a3e041d133 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Thu, 2 Jan 2025 15:25:49 +0300 Subject: [PATCH 19/34] refactor: new line symbol --- Source/GB_control.h | 1 - 1 file changed, 1 deletion(-) diff --git a/Source/GB_control.h b/Source/GB_control.h index f8e34f3793..3d13f09389 100644 --- a/Source/GB_control.h +++ b/Source/GB_control.h @@ -2240,4 +2240,3 @@ #define GxB_NO_TIMES_SECONDJ_INT64 1 #define GxB_NO_TIMES_SECONDJ1_INT32 1 #define GxB_NO_TIMES_SECONDJ1_INT64 1 - \ No newline at end of file From fa3ef86d91ff4cca573fbc0026583a2b4b8caeb9 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Sat, 4 Jan 2025 21:55:42 +0300 Subject: [PATCH 20/34] refactor: very small --- Source/global/GB_Global.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/Source/global/GB_Global.c b/Source/global/GB_Global.c index 91230b3617..9a45c1fb7a 100644 --- a/Source/global/GB_Global.c +++ b/Source/global/GB_Global.c @@ -344,7 +344,6 @@ void GB_Global_cpu_features_query (void) GB_Global.cpu_features_avx2 = false ; } #endif - #if defined ( GBAVX512F ) { // the build system asserts whether or not AVX512F is available @@ -356,7 +355,6 @@ void GB_Global_cpu_features_query (void) GB_Global.cpu_features_avx512f = false ; } #endif - } #endif From 382c663537773eb9df03a39ae30230a229d3e281 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Wed, 5 Feb 2025 19:32:44 +0300 Subject: [PATCH 21/34] feat: implement float support --- FactoryKernels/GB_AxB__plus_times_fp32.c | 8 ++++++++ FactoryKernels/GB_AxB__plus_times_fp64.c | 9 ++++++++- Source/mxm/template/GB_AxB_saxpy5_lv.c | 20 +++++++++----------- 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/FactoryKernels/GB_AxB__plus_times_fp32.c b/FactoryKernels/GB_AxB__plus_times_fp32.c index b1e7aed96e..564b07ce00 100644 --- a/FactoryKernels/GB_AxB__plus_times_fp32.c +++ b/FactoryKernels/GB_AxB__plus_times_fp32.c @@ -24,6 +24,14 @@ #include "assign/GB_bitmap_assign_methods.h" #include "FactoryKernels/GB_AxB__include2.h" +// riscv intrinsics + +#define VSETVL(x) __riscv_vsetvl_e32m8(x) +#define VLE(x,y) __riscv_vle32_v_f32m8(x, y) +#define VFMACC(x,y,z,w) __riscv_vfmacc_vf_f32m8(x, y, z, w) +#define VSE(x,y,z) __riscv_vse32_v_f32m8(x, y, z) +#define VECTORTYPE vfloat32m8_t + // semiring operators: #define GB_MULTADD(z,a,b,i,k,j) z += (a*b) #define GB_MULT(z,a,b,i,k,j) z = (a*b) diff --git a/FactoryKernels/GB_AxB__plus_times_fp64.c b/FactoryKernels/GB_AxB__plus_times_fp64.c index 8b536323ce..efcd944341 100644 --- a/FactoryKernels/GB_AxB__plus_times_fp64.c +++ b/FactoryKernels/GB_AxB__plus_times_fp64.c @@ -24,6 +24,14 @@ #include "assign/GB_bitmap_assign_methods.h" #include "FactoryKernels/GB_AxB__include2.h" +// riscv intrinsics + +#define VSETVL(x) __riscv_vsetvl_e64m8(x) +#define VLE(x,y) __riscv_vle64_v_f64m8(x, y) +#define VFMACC(x,y,z,w) __riscv_vfmacc_vf_f64m8(x, y, z, w) +#define VSE(x,y,z) __riscv_vse64_v_f64m8(x, y, z) +#define VECTORTYPE vfloat64m8_t + // semiring operators: #define GB_MULTADD(z,a,b,i,k,j) z += (a*b) #define GB_MULT(z,a,b,i,k,j) z = (a*b) @@ -295,7 +303,6 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp64) //---------------------------------------------------------------------- // saxpy5 method with RISC-V vectors //---------------------------------------------------------------------- - #if GB_COMPILER_SUPPORTS_RVV1 GB_TARGET_RVV1 static inline void GB_AxB_saxpy5_unrolled_rvv diff --git a/Source/mxm/template/GB_AxB_saxpy5_lv.c b/Source/mxm/template/GB_AxB_saxpy5_lv.c index 3ca0e2d717..dde2f6e836 100644 --- a/Source/mxm/template/GB_AxB_saxpy5_lv.c +++ b/Source/mxm/template/GB_AxB_saxpy5_lv.c @@ -5,7 +5,7 @@ const int64_t *restrict Bi = B->i; const GB_A_TYPE *restrict Ax = (GB_A_TYPE *)A->x; const GB_B_TYPE *restrict Bx = (GB_B_TYPE *)B->x; - size_t vl = __riscv_vsetvl_e64m8(m); + size_t vl = VSETVL(m); GB_C_TYPE *restrict Cx = (GB_C_TYPE *)C->x; #pragma omp parallel for num_threads(nthreads) schedule(dynamic, 1) @@ -22,33 +22,31 @@ const int64_t pB_end = Bp[jB + 1]; for (int64_t i = 0; i < m && (m - i) >= vl; i += vl) { - vfloat64m8_t vc = __riscv_vle64_v_f64m8(Cxj + i, vl); - + VECTORTYPE vc = VLE(Cxj + i, vl); for (int64_t pB = pB_start; pB < pB_end; pB++) { const int64_t k = Bi[pB]; const GB_B_TYPE bkj = Bx[pB]; - vfloat64m8_t va = __riscv_vle64_v_f64m8(Ax + i + k * m, vl); - vc = __riscv_vfmacc_vf_f64m8(vc, bkj, va, vl); + VECTORTYPE va = VLE(Ax + i + k * m, vl); + vc = VFMACC(vc, bkj, va, vl); } - __riscv_vse64_v_f64m8(Cxj + i, vc, vl); + VSE(Cxj + i, vc, vl); } int64_t remaining = m % vl; if (remaining > 0) { int64_t i = m - remaining; - vfloat64m8_t vc = __riscv_vle64_v_f64m8(Cxj + i, remaining); - + VECTORTYPE vc = VLE(Cxj + i, remaining); for (int64_t pB = pB_start; pB < pB_end; pB++) { const int64_t k = Bi[pB]; const GB_B_TYPE bkj = Bx[pB]; - vfloat64m8_t va = __riscv_vle64_v_f64m8(Ax + i + k * m, remaining); - vc = __riscv_vfmacc_vf_f64m8(vc, bkj, va, remaining); + VECTORTYPE va = VLE(Ax + i + k * m, remaining); + vc = VFMACC(vc, bkj, va, remaining); } - __riscv_vse64_v_f64m8(Cxj + i, vc, remaining); + VSE(Cxj + i, vc, remaining); } } } From 4b83262aaf201814a3a6d7e957349f00e42d18c5 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Sat, 8 Feb 2025 14:52:03 +0300 Subject: [PATCH 22/34] feat: implement riscv support in cpufeatures --- Source/cpu/GB_cpu_features_impl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Source/cpu/GB_cpu_features_impl.c b/Source/cpu/GB_cpu_features_impl.c index a51a3f69d8..c4eb9bce6b 100644 --- a/Source/cpu/GB_cpu_features_impl.c +++ b/Source/cpu/GB_cpu_features_impl.c @@ -44,6 +44,7 @@ #include "src/impl_x86_freebsd.c" #include "src/impl_x86_linux_or_android.c" #include "src/impl_x86_windows.c" + #include "src/impl_riscv_linux.c" #if GBX86 #if (defined(__apple__) || defined(__APPLE__) || defined(__MACH__)) // needed for src/impl_x86_macos.c: From 3345b077f1c73bab9cb10c3851fcb1385ce9c665 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov <107667059+suvorovrain@users.noreply.github.com> Date: Sun, 9 Feb 2025 15:38:19 +0300 Subject: [PATCH 23/34] refactor: add new line symbol --- Source/GB_control.h | 1 + 1 file changed, 1 insertion(+) diff --git a/Source/GB_control.h b/Source/GB_control.h index 3d13f09389..33a69616a1 100644 --- a/Source/GB_control.h +++ b/Source/GB_control.h @@ -2240,3 +2240,4 @@ #define GxB_NO_TIMES_SECONDJ_INT64 1 #define GxB_NO_TIMES_SECONDJ1_INT32 1 #define GxB_NO_TIMES_SECONDJ1_INT64 1 + From d97ebc05c8b54371215d4593c750212039f251cc Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Mon, 10 Feb 2025 20:17:19 +0300 Subject: [PATCH 24/34] fix: update target --- Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c b/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c index 901d417d38..09cafa5589 100644 --- a/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c +++ b/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c @@ -92,7 +92,7 @@ GB_JIT_GLOBAL GB_JIT_KERNEL_AXB_SAXPY5_PROTO (GB_jit_kernel) ; #if GB_COMPILER_SUPPORTS_RVV1 - GB_TARGET_AVX2 static inline void GB_AxB_saxpy5_unrolled_rvv + GB_TARGET_RVV1 static inline void GB_AxB_saxpy5_unrolled_rvv ( GrB_Matrix C, const GrB_Matrix A, From 6d4dc1ada36f3d86e2ed3c9dad9ea0582a9346a1 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Tue, 11 Feb 2025 00:31:44 +0300 Subject: [PATCH 25/34] refactor: rename global rvv var --- GraphBLAS/rename/GB_rename.h | 2 +- Source/global/GB_Global.c | 14 +++++++------- Source/global/GB_Global.h | 2 +- Source/mxm/factory/GB_AxB_saxpy5_meta.c | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/GraphBLAS/rename/GB_rename.h b/GraphBLAS/rename/GB_rename.h index 2380582ca8..95e783b997 100644 --- a/GraphBLAS/rename/GB_rename.h +++ b/GraphBLAS/rename/GB_rename.h @@ -390,7 +390,7 @@ #define GB_Global_calloc_function_set GM_Global_calloc_function_set #define GB_Global_cpu_features_avx2 GM_Global_cpu_features_avx2 #define GB_Global_cpu_features_avx512f GM_Global_cpu_features_avx512f -#define GB_Global_cpu_features_rvv GM_Global_cpu_features_rvv +#define GB_Global_cpu_features_rvv_1_0 GM_Global_cpu_features_rvv_1_0 #define GB_Global_cpu_features_query GM_Global_cpu_features_query #define GB_Global_flush_get GM_Global_flush_get #define GB_Global_flush_set GM_Global_flush_set diff --git a/Source/global/GB_Global.c b/Source/global/GB_Global.c index 9a45c1fb7a..3894147e5d 100644 --- a/Source/global/GB_Global.c +++ b/Source/global/GB_Global.c @@ -133,7 +133,7 @@ typedef struct bool cpu_features_avx2 ; // x86_64 with AVX2 bool cpu_features_avx512f ; // x86_64 with AVX512f - bool cpu_features_rvv ; // RISC-V with RVV1.0 + bool cpu_features_rvv_1_0 ; // RISC-V with RVV1.0 //-------------------------------------------------------------------------- // integer control @@ -227,7 +227,7 @@ static GB_Global_struct GB_Global = // CPU features .cpu_features_avx2 = false, // x86_64 with AVX2 .cpu_features_avx512f = false, // x86_64 with AVX512f - .cpu_features_rvv = false, // RISC-V with RVV1.0 + .cpu_features_rvv_1_0 = false, // RISC-V with RVV1.0 // integer control .p_control = (int8_t) 32, @@ -368,12 +368,12 @@ void GB_Global_cpu_features_query (void) #if defined ( GBRVV ) { // the build system asserts whether or not RVV1.0 is available - GB_Global.cpu_features_rvv = (bool) (GBRVV) ; + GB_Global.cpu_features_rvv_1_0 = (bool) (GBRVV) ; } #else { // RVV1.0 not available - GB_Global.cpu_features_rvv = false ; + GB_Global.cpu_features_rvv_1_0 = false ; } #endif @@ -387,7 +387,7 @@ void GB_Global_cpu_features_query (void) GB_Global.cpu_features_avx2 = false ; GB_Global.cpu_features_avx512f = false ; - GB_Global.cpu_features_rvv = false ; + GB_Global.cpu_features_rvv_1_0 = false ; } #endif @@ -403,9 +403,9 @@ bool GB_Global_cpu_features_avx512f (void) return (GB_Global.cpu_features_avx512f) ; } -bool GB_Global_cpu_features_rvv (void) +bool GB_Global_cpu_features_rvv_1_0 (void) { - return (GB_Global.cpu_features_rvv) ; + return (GB_Global.cpu_features_rvv_1_0) ; } //------------------------------------------------------------------------------ diff --git a/Source/global/GB_Global.h b/Source/global/GB_Global.h index 4a3ae12256..e54687ffa5 100644 --- a/Source/global/GB_Global.h +++ b/Source/global/GB_Global.h @@ -17,7 +17,7 @@ void GB_Global_cpu_features_query (void) ; bool GB_Global_cpu_features_avx2 (void) ; bool GB_Global_cpu_features_avx512f (void) ; -bool GB_Global_cpu_features_rvv (void) ; +bool GB_Global_cpu_features_rvv_1_0 (void) ; void GB_Global_mode_set (int mode) ; int GB_Global_mode_get (void) ; diff --git a/Source/mxm/factory/GB_AxB_saxpy5_meta.c b/Source/mxm/factory/GB_AxB_saxpy5_meta.c index b36d43c0e1..4f26e0803b 100644 --- a/Source/mxm/factory/GB_AxB_saxpy5_meta.c +++ b/Source/mxm/factory/GB_AxB_saxpy5_meta.c @@ -146,7 +146,7 @@ #if GB_SEMIRING_HAS_RVV_IMPLEMENTATION #if GB_COMPILER_SUPPORTS_RVV1 - if (GB_Global_cpu_features_rvv ( )) + if (GB_Global_cpu_features_rvv_1_0 ( )) { GB_AxB_saxpy5_unrolled_rvv (C, A, B, ntasks, nthreads, B_slice) ; From f1c4a25bd05d66421ef9a23a1933e13853774607 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Mon, 17 Feb 2025 23:11:46 +0300 Subject: [PATCH 26/34] refactor: correct CMake update --- CMakeLists.txt | 20 ++++++++++++++++++++ GraphBLAS/CMakeLists.txt | 27 --------------------------- 2 files changed, 20 insertions(+), 27 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c1efd8bdcc..0baa264911 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -162,6 +162,26 @@ if ( DEFINED GBAVX512F ) endif ( ) endif ( ) +#------------------------------------------------------------------------------- +# RISC-V +#------------------------------------------------------------------------------- + +if ( DEFINED GBRISCV64 ) + if ( GBRISCV64 ) + set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGBRISCV64=1 " ) + else ( ) + set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGBRISCV64=0 " ) + endif ( ) +endif ( ) + +if ( DEFINED GBRVV ) + if ( GBRVV ) + set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGBRVV=1 " ) + else ( ) + set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGBRVV=0 " ) + endif ( ) +endif ( ) + #------------------------------------------------------------------------------- # check compiler features #------------------------------------------------------------------------------- diff --git a/GraphBLAS/CMakeLists.txt b/GraphBLAS/CMakeLists.txt index c1a49cc785..14546666be 100644 --- a/GraphBLAS/CMakeLists.txt +++ b/GraphBLAS/CMakeLists.txt @@ -122,32 +122,6 @@ if ( DEFINED GBAVX512F ) endif ( ) endif ( ) -#------------------------------------------------------------------------------- -# RISC-V -#------------------------------------------------------------------------------- - -if ( DEFINED GBRISCV64 ) - if ( GBRISCV64 ) - set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGBRISCV64=1 " ) - else ( ) - set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGBRISCV64=0 " ) - endif ( ) -endif ( ) - -if ( DEFINED GBRVV ) - if ( GBRVV ) - set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGBRVV=1 " ) - else ( ) - set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGBRVV=0 " ) - endif ( ) -endif ( ) - -#------------------------------------------------------------------------------- -# check compiler features -#------------------------------------------------------------------------------- - -include ( GraphBLAS_complex ) - #------------------------------------------------------------------------------- # determine build type #------------------------------------------------------------------------------- @@ -356,4 +330,3 @@ include ( GraphBLAS_JIT_configure ) configure_file ( "../Config/GB_config.h.in" "${PROJECT_SOURCE_DIR}/Config/GB_config.h" NEWLINE_STYLE LF ) - From 2e40e4779be884a26ec31844a64ad7feea635321 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov <107667059+suvorovrain@users.noreply.github.com> Date: Mon, 17 Feb 2025 23:20:14 +0300 Subject: [PATCH 27/34] refactor: new line symbol --- GraphBLAS/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/GraphBLAS/CMakeLists.txt b/GraphBLAS/CMakeLists.txt index 14546666be..c3ee54779b 100644 --- a/GraphBLAS/CMakeLists.txt +++ b/GraphBLAS/CMakeLists.txt @@ -330,3 +330,4 @@ include ( GraphBLAS_JIT_configure ) configure_file ( "../Config/GB_config.h.in" "${PROJECT_SOURCE_DIR}/Config/GB_config.h" NEWLINE_STYLE LF ) + From 04cdb9a5d4263d0ab0a42e1de26bd35953655608 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Mon, 17 Feb 2025 23:22:16 +0300 Subject: [PATCH 28/34] refactor: correct include --- FactoryKernels/GB_AxB__plus_times_fp32.c | 1 - FactoryKernels/GB_AxB__plus_times_fp64.c | 1 - Source/codegen/Generator/GB_AxB.c | 1 - 3 files changed, 3 deletions(-) diff --git a/FactoryKernels/GB_AxB__plus_times_fp32.c b/FactoryKernels/GB_AxB__plus_times_fp32.c index 564b07ce00..4013dc18b4 100644 --- a/FactoryKernels/GB_AxB__plus_times_fp32.c +++ b/FactoryKernels/GB_AxB__plus_times_fp32.c @@ -10,7 +10,6 @@ #ifdef GBRISCV64 #include #endif -#include "GB.h" #include "GB_control.h" #if defined (GxB_NO_FP32) #define GB_TYPE_ENABLED 0 diff --git a/FactoryKernels/GB_AxB__plus_times_fp64.c b/FactoryKernels/GB_AxB__plus_times_fp64.c index efcd944341..de315175c9 100644 --- a/FactoryKernels/GB_AxB__plus_times_fp64.c +++ b/FactoryKernels/GB_AxB__plus_times_fp64.c @@ -10,7 +10,6 @@ #ifdef GBRISCV64 #include #endif -#include "GB.h" #include "GB_control.h" #if defined (GxB_NO_FP64) #define GB_TYPE_ENABLED 0 diff --git a/Source/codegen/Generator/GB_AxB.c b/Source/codegen/Generator/GB_AxB.c index 0156b97134..0f52d6f6d7 100644 --- a/Source/codegen/Generator/GB_AxB.c +++ b/Source/codegen/Generator/GB_AxB.c @@ -10,7 +10,6 @@ #ifdef GBRISCV64 #include #endif -#include "GB.h" #include "GB_control.h" GB_type_enabled #if GB_TYPE_ENABLED From 4b5bbefa0df66ffd74bcbc5f006254d6bca666dc Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Tue, 18 Feb 2025 15:44:59 +0300 Subject: [PATCH 29/34] fix: add missing riscv defines into cpu_features --- cpu_features/include/internal/hwcaps.h | 12 ++++++++++++ cpu_features/src/utils/list_cpu_features.c | 7 +++++++ 2 files changed, 19 insertions(+) diff --git a/cpu_features/include/internal/hwcaps.h b/cpu_features/include/internal/hwcaps.h index 59e16576b8..fc078d4acf 100644 --- a/cpu_features/include/internal/hwcaps.h +++ b/cpu_features/include/internal/hwcaps.h @@ -134,6 +134,18 @@ CPU_FEATURES_START_CPP_NAMESPACE #define ARM_HWCAP2_SHA2 (1UL << 3) #define ARM_HWCAP2_CRC32 (1UL << 4) +// https://elixir.bootlin.com/linux/latest/source/arch/riscv/include/uapi/asm/hwcap.h +#define RISCV_HWCAP_32 0x32 +#define RISCV_HWCAP_64 0x64 +#define RISCV_HWCAP_128 0x128 +#define RISCV_HWCAP_M (1UL << ('M' - 'A')) +#define RISCV_HWCAP_A (1UL << ('A' - 'A')) +#define RISCV_HWCAP_F (1UL << ('F' - 'A')) +#define RISCV_HWCAP_D (1UL << ('D' - 'A')) +#define RISCV_HWCAP_Q (1UL << ('Q' - 'A')) +#define RISCV_HWCAP_C (1UL << ('C' - 'A')) +#define RISCV_HWCAP_V (1UL << ('V' - 'A')) + // http://elixir.free-electrons.com/linux/latest/source/arch/mips/include/uapi/asm/hwcap.h #define MIPS_HWCAP_R6 (1UL << 0) #define MIPS_HWCAP_MSA (1UL << 1) diff --git a/cpu_features/src/utils/list_cpu_features.c b/cpu_features/src/utils/list_cpu_features.c index 83cd387f08..00267e0091 100644 --- a/cpu_features/src/utils/list_cpu_features.c +++ b/cpu_features/src/utils/list_cpu_features.c @@ -423,6 +423,7 @@ static Node* CreateTree(void) { AddMapEntry(root, "microarchitecture", CreateString(strings.type.base_platform)); AddFlags(root, &info.features); +<<<<<<< HEAD #elif defined(CPU_FEATURES_ARCH_S390X) const S390XInfo info = GetS390XInfo(); const S390XPlatformStrings strings = GetS390XPlatformStrings(); @@ -431,16 +432,22 @@ static Node* CreateTree(void) { AddMapEntry(root, "model", CreateString(strings.type.platform)); AddMapEntry(root, "# processors", CreateInt(strings.num_processors)); AddFlags(root, &info.features); +======= +>>>>>>> 2bc06c36d7 (fix: add missing riscv defines into cpu_features) #elif defined(CPU_FEATURES_ARCH_RISCV) const RiscvInfo info = GetRiscvInfo(); AddMapEntry(root, "arch", CreateString("risc-v")); AddMapEntry(root, "vendor", CreateString(info.vendor)); AddMapEntry(root, "microarchitecture", CreateString(info.uarch)); +<<<<<<< HEAD AddFlags(root, &info.features); #elif defined(CPU_FEATURES_ARCH_LOONGARCH) const LoongArchInfo info = GetLoongArchInfo(); AddMapEntry(root, "arch", CreateString("loongarch")); AddFlags(root, &info.features); +======= + AddFlags(root, &info.features); +>>>>>>> 2bc06c36d7 (fix: add missing riscv defines into cpu_features) #endif return root; } From 2bc74551e301c2f6e653cbc88467f73b2f01048e Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Tue, 18 Feb 2025 15:48:53 +0300 Subject: [PATCH 30/34] fix: apply new version of GB changes --- FactoryKernels/GB_AxB__plus_times_fp32.c | 13 +++++------- FactoryKernels/GB_AxB__plus_times_fp64.c | 13 +++++------- Source/mxm/template/GB_AxB_saxpy5_lv.c | 25 +++++++++++++----------- 3 files changed, 24 insertions(+), 27 deletions(-) diff --git a/FactoryKernels/GB_AxB__plus_times_fp32.c b/FactoryKernels/GB_AxB__plus_times_fp32.c index 4013dc18b4..b274a35f98 100644 --- a/FactoryKernels/GB_AxB__plus_times_fp32.c +++ b/FactoryKernels/GB_AxB__plus_times_fp32.c @@ -9,6 +9,11 @@ #ifdef GBRISCV64 #include +#define VSETVL(x) __riscv_vsetvl_e32m8(x) +#define VLE(x,y) __riscv_vle32_v_f32m8(x, y) +#define VFMACC(x,y,z,w) __riscv_vfmacc_vf_f32m8(x, y, z, w) +#define VSE(x,y,z) __riscv_vse32_v_f32m8(x, y, z) +#define VECTORTYPE vfloat32m8_t #endif #include "GB_control.h" #if defined (GxB_NO_FP32) @@ -23,14 +28,6 @@ #include "assign/GB_bitmap_assign_methods.h" #include "FactoryKernels/GB_AxB__include2.h" -// riscv intrinsics - -#define VSETVL(x) __riscv_vsetvl_e32m8(x) -#define VLE(x,y) __riscv_vle32_v_f32m8(x, y) -#define VFMACC(x,y,z,w) __riscv_vfmacc_vf_f32m8(x, y, z, w) -#define VSE(x,y,z) __riscv_vse32_v_f32m8(x, y, z) -#define VECTORTYPE vfloat32m8_t - // semiring operators: #define GB_MULTADD(z,a,b,i,k,j) z += (a*b) #define GB_MULT(z,a,b,i,k,j) z = (a*b) diff --git a/FactoryKernels/GB_AxB__plus_times_fp64.c b/FactoryKernels/GB_AxB__plus_times_fp64.c index de315175c9..f4de12d2b6 100644 --- a/FactoryKernels/GB_AxB__plus_times_fp64.c +++ b/FactoryKernels/GB_AxB__plus_times_fp64.c @@ -9,6 +9,11 @@ #ifdef GBRISCV64 #include +#define VSETVL(x) __riscv_vsetvl_e64m8(x) +#define VLE(x,y) __riscv_vle64_v_f64m8(x, y) +#define VFMACC(x,y,z,w) __riscv_vfmacc_vf_f64m8(x, y, z, w) +#define VSE(x,y,z) __riscv_vse64_v_f64m8(x, y, z) +#define VECTORTYPE vfloat64m8_t #endif #include "GB_control.h" #if defined (GxB_NO_FP64) @@ -23,14 +28,6 @@ #include "assign/GB_bitmap_assign_methods.h" #include "FactoryKernels/GB_AxB__include2.h" -// riscv intrinsics - -#define VSETVL(x) __riscv_vsetvl_e64m8(x) -#define VLE(x,y) __riscv_vle64_v_f64m8(x, y) -#define VFMACC(x,y,z,w) __riscv_vfmacc_vf_f64m8(x, y, z, w) -#define VSE(x,y,z) __riscv_vse64_v_f64m8(x, y, z) -#define VECTORTYPE vfloat64m8_t - // semiring operators: #define GB_MULTADD(z,a,b,i,k,j) z += (a*b) #define GB_MULT(z,a,b,i,k,j) z = (a*b) diff --git a/Source/mxm/template/GB_AxB_saxpy5_lv.c b/Source/mxm/template/GB_AxB_saxpy5_lv.c index dde2f6e836..4239dfa06a 100644 --- a/Source/mxm/template/GB_AxB_saxpy5_lv.c +++ b/Source/mxm/template/GB_AxB_saxpy5_lv.c @@ -1,8 +1,9 @@ { const int64_t m = C->vlen; - const int64_t *restrict Bp = B->p; - const int64_t *restrict Bh = B->h; - const int64_t *restrict Bi = B->i; + GB_Bp_DECLARE (Bp, const) ; GB_Bp_PTR (Bp, B) ; + GB_Bh_DECLARE (Bh, const) ; GB_Bh_PTR (Bh, B) ; + GB_Bi_DECLARE (Bi, const) ; GB_Bi_PTR (Bi, B) ; + const bool B_iso = B->iso ; const GB_A_TYPE *restrict Ax = (GB_A_TYPE *)A->x; const GB_B_TYPE *restrict Bx = (GB_B_TYPE *)B->x; size_t vl = VSETVL(m); @@ -16,17 +17,18 @@ for (int64_t jB = jB_start; jB < jB_end; jB++) { - const int64_t j = GBH_B(Bh, jB); - GB_C_TYPE *restrict Cxj = Cx + (j * m); - const int64_t pB_start = Bp[jB]; - const int64_t pB_end = Bp[jB + 1]; + const int64_t j = GBh_B (Bh, jB) ; + GB_C_TYPE *restrict Cxj = Cx + (j * m) ; + const int64_t pB_start = GB_IGET (Bp, jB) ; + const int64_t pB_end = GB_IGET (Bp, jB+1) ; for (int64_t i = 0; i < m && (m - i) >= vl; i += vl) { VECTORTYPE vc = VLE(Cxj + i, vl); for (int64_t pB = pB_start; pB < pB_end; pB++) { - const int64_t k = Bi[pB]; - const GB_B_TYPE bkj = Bx[pB]; + const int64_t k = GB_IGET (Bi, pB) ; + GB_DECLAREB (bkj) ; + GB_GETB (bkj, Bx, pB, B_iso) ; VECTORTYPE va = VLE(Ax + i + k * m, vl); vc = VFMACC(vc, bkj, va, vl); } @@ -40,8 +42,9 @@ VECTORTYPE vc = VLE(Cxj + i, remaining); for (int64_t pB = pB_start; pB < pB_end; pB++) { - const int64_t k = Bi[pB]; - const GB_B_TYPE bkj = Bx[pB]; + const int64_t k = GB_IGET (Bi, pB) ; + GB_DECLAREB (bkj) ; + GB_GETB (bkj, Bx, pB, B_iso) ; VECTORTYPE va = VLE(Ax + i + k * m, remaining); vc = VFMACC(vc, bkj, va, remaining); } From ab758ff0c61cf0d281ee3bc31dc00bf048a858e4 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Tue, 18 Feb 2025 19:23:32 +0300 Subject: [PATCH 31/34] fix: correct include + debug --- FactoryKernels/GB_AxB__plus_times_fp32.c | 2 +- FactoryKernels/GB_AxB__plus_times_fp64.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/FactoryKernels/GB_AxB__plus_times_fp32.c b/FactoryKernels/GB_AxB__plus_times_fp32.c index b274a35f98..b0b6a76cf7 100644 --- a/FactoryKernels/GB_AxB__plus_times_fp32.c +++ b/FactoryKernels/GB_AxB__plus_times_fp32.c @@ -7,7 +7,7 @@ //------------------------------------------------------------------------------ -#ifdef GBRISCV64 +#if __riscv #include #define VSETVL(x) __riscv_vsetvl_e32m8(x) #define VLE(x,y) __riscv_vle32_v_f32m8(x, y) diff --git a/FactoryKernels/GB_AxB__plus_times_fp64.c b/FactoryKernels/GB_AxB__plus_times_fp64.c index f4de12d2b6..218d74c367 100644 --- a/FactoryKernels/GB_AxB__plus_times_fp64.c +++ b/FactoryKernels/GB_AxB__plus_times_fp64.c @@ -7,7 +7,7 @@ //------------------------------------------------------------------------------ -#ifdef GBRISCV64 +#if __riscv #include #define VSETVL(x) __riscv_vsetvl_e64m8(x) #define VLE(x,y) __riscv_vle64_v_f64m8(x, y) @@ -15,6 +15,7 @@ #define VSE(x,y,z) __riscv_vse64_v_f64m8(x, y, z) #define VECTORTYPE vfloat64m8_t #endif +#include #include "GB_control.h" #if defined (GxB_NO_FP64) #define GB_TYPE_ENABLED 0 @@ -311,6 +312,7 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp64) const int64_t *B_slice ) { + printf("rvv\n"); #include "mxm/template/GB_AxB_saxpy5_lv.c" } @@ -338,6 +340,7 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp64) const int64_t *B_slice ) { + printf("van\n"); #include "mxm/template/GB_AxB_saxpy5_unrolled.c" } From 9b23496c9b77545cca879984a7cef5e62e2a399f Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Wed, 19 Feb 2025 13:25:02 +0300 Subject: [PATCH 32/34] fix: cpu_features correct usage --- CMakeLists.txt | 4 +++- FactoryKernels/GB_AxB__plus_times_fp32.c | 15 +++++++-------- FactoryKernels/GB_AxB__plus_times_fp64.c | 17 ++++++++--------- GraphBLAS/CMakeLists.txt | 22 ++++++++++++++++++++++ Source/cpu/GB_cpu_features.h | 6 +++++- Source/global/GB_Global.c | 18 +++++++++++++++--- 6 files changed, 60 insertions(+), 22 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0baa264911..0209e87209 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -163,11 +163,12 @@ if ( DEFINED GBAVX512F ) endif ( ) #------------------------------------------------------------------------------- -# RISC-V +# RISC-V #------------------------------------------------------------------------------- if ( DEFINED GBRISCV64 ) if ( GBRISCV64 ) + # default: this is detected automatically, but can be set here also set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGBRISCV64=1 " ) else ( ) set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGBRISCV64=0 " ) @@ -176,6 +177,7 @@ endif ( ) if ( DEFINED GBRVV ) if ( GBRVV ) + # default: this is detected automatically, but can be set here also set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGBRVV=1 " ) else ( ) set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGBRVV=0 " ) diff --git a/FactoryKernels/GB_AxB__plus_times_fp32.c b/FactoryKernels/GB_AxB__plus_times_fp32.c index b0b6a76cf7..ff98472642 100644 --- a/FactoryKernels/GB_AxB__plus_times_fp32.c +++ b/FactoryKernels/GB_AxB__plus_times_fp32.c @@ -7,14 +7,6 @@ //------------------------------------------------------------------------------ -#if __riscv -#include -#define VSETVL(x) __riscv_vsetvl_e32m8(x) -#define VLE(x,y) __riscv_vle32_v_f32m8(x, y) -#define VFMACC(x,y,z,w) __riscv_vfmacc_vf_f32m8(x, y, z, w) -#define VSE(x,y,z) __riscv_vse32_v_f32m8(x, y, z) -#define VECTORTYPE vfloat32m8_t -#endif #include "GB_control.h" #if defined (GxB_NO_FP32) #define GB_TYPE_ENABLED 0 @@ -302,6 +294,13 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp32) #if GB_COMPILER_SUPPORTS_RVV1 + #include + #define VSETVL(x) __riscv_vsetvl_e32m8(x) + #define VLE(x,y) __riscv_vle32_v_f32m8(x, y) + #define VFMACC(x,y,z,w) __riscv_vfmacc_vf_f32m8(x, y, z, w) + #define VSE(x,y,z) __riscv_vse32_v_f32m8(x, y, z) + #define VECTORTYPE vfloat32m8_t + GB_TARGET_RVV1 static inline void GB_AxB_saxpy5_unrolled_rvv ( GrB_Matrix C, diff --git a/FactoryKernels/GB_AxB__plus_times_fp64.c b/FactoryKernels/GB_AxB__plus_times_fp64.c index 218d74c367..3ab9ca4afa 100644 --- a/FactoryKernels/GB_AxB__plus_times_fp64.c +++ b/FactoryKernels/GB_AxB__plus_times_fp64.c @@ -7,15 +7,6 @@ //------------------------------------------------------------------------------ -#if __riscv -#include -#define VSETVL(x) __riscv_vsetvl_e64m8(x) -#define VLE(x,y) __riscv_vle64_v_f64m8(x, y) -#define VFMACC(x,y,z,w) __riscv_vfmacc_vf_f64m8(x, y, z, w) -#define VSE(x,y,z) __riscv_vse64_v_f64m8(x, y, z) -#define VECTORTYPE vfloat64m8_t -#endif -#include #include "GB_control.h" #if defined (GxB_NO_FP64) #define GB_TYPE_ENABLED 0 @@ -300,8 +291,16 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp64) //---------------------------------------------------------------------- // saxpy5 method with RISC-V vectors //---------------------------------------------------------------------- + #if GB_COMPILER_SUPPORTS_RVV1 + #include + #define VSETVL(x) __riscv_vsetvl_e64m8(x) + #define VLE(x,y) __riscv_vle64_v_f64m8(x, y) + #define VFMACC(x,y,z,w) __riscv_vfmacc_vf_f64m8(x, y, z, w) + #define VSE(x,y,z) __riscv_vse64_v_f64m8(x, y, z) + #define VECTORTYPE vfloat64m8_t + GB_TARGET_RVV1 static inline void GB_AxB_saxpy5_unrolled_rvv ( GrB_Matrix C, diff --git a/GraphBLAS/CMakeLists.txt b/GraphBLAS/CMakeLists.txt index c3ee54779b..c26b1053da 100644 --- a/GraphBLAS/CMakeLists.txt +++ b/GraphBLAS/CMakeLists.txt @@ -122,6 +122,28 @@ if ( DEFINED GBAVX512F ) endif ( ) endif ( ) +#------------------------------------------------------------------------------- +# RISC-V +#------------------------------------------------------------------------------- + +if ( DEFINED GBRISCV64 ) + if ( GBRISCV64 ) + # default: this is detected automatically, but can be set here also + set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGBRISCV64=1 " ) + else ( ) + set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGBRISCV64=0 " ) + endif ( ) +endif ( ) + +if ( DEFINED GBRVV ) + if ( GBRVV ) + # default: this is detected automatically, but can be set here also + set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGBRVV=1 " ) + else ( ) + set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGBRVV=0 " ) + endif ( ) +endif ( ) + #------------------------------------------------------------------------------- # determine build type #------------------------------------------------------------------------------- diff --git a/Source/cpu/GB_cpu_features.h b/Source/cpu/GB_cpu_features.h index bb9775b4fc..5ec7658d39 100644 --- a/Source/cpu/GB_cpu_features.h +++ b/Source/cpu/GB_cpu_features.h @@ -38,9 +38,13 @@ #include "cpu_features_macros.h" #define STACK_LINE_READER_BUFFER_SIZE 1024 #if GBX86 - // Intel x86 (also AMD): other architectures are not exploited + // Intel x86 (also AMD) #include "cpuinfo_x86.h" #endif + #if GBRISCV64 + // RISC-V + #include "cpuinfo_riscv.h" + #endif #endif #endif diff --git a/Source/global/GB_Global.c b/Source/global/GB_Global.c index 3894147e5d..477fa2696e 100644 --- a/Source/global/GB_Global.c +++ b/Source/global/GB_Global.c @@ -361,20 +361,32 @@ void GB_Global_cpu_features_query (void) } #elif GBRISCV64 { + //---------------------------------------------------------------------- - // xRISC-V architecture: see if RVV1.0 is supported + // RISC-V architecture: see if RVV1.0 is supported //---------------------------------------------------------------------- - #if defined ( GBRVV ) + #if !defined ( GBNCPUFEAT ) + { + // Google's cpu_features package is available: use run-time tests + RiscvFeatures features = GetRiscvInfo ().features ; + GB_Global.cpu_features_rvv_1_0 = (bool) (features.V) ; + + } + #else { + #if defined ( GBRVV ) + { // the build system asserts whether or not RVV1.0 is available GB_Global.cpu_features_rvv_1_0 = (bool) (GBRVV) ; - } + } #else { // RVV1.0 not available GB_Global.cpu_features_rvv_1_0 = false ; } + #endif + } #endif } From 62bf4b5ecca9d3d5ddeb457ccfdf030fb550b920 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Wed, 19 Feb 2025 13:29:39 +0300 Subject: [PATCH 33/34] refactor: remove debug prints --- FactoryKernels/GB_AxB__plus_times_fp64.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/FactoryKernels/GB_AxB__plus_times_fp64.c b/FactoryKernels/GB_AxB__plus_times_fp64.c index 3ab9ca4afa..e25ca0b158 100644 --- a/FactoryKernels/GB_AxB__plus_times_fp64.c +++ b/FactoryKernels/GB_AxB__plus_times_fp64.c @@ -311,7 +311,6 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp64) const int64_t *B_slice ) { - printf("rvv\n"); #include "mxm/template/GB_AxB_saxpy5_lv.c" } @@ -339,7 +338,6 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp64) const int64_t *B_slice ) { - printf("van\n"); #include "mxm/template/GB_AxB_saxpy5_unrolled.c" } From fb9b7a7fc64c31ecc70a4b98314a781594ed5921 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Wed, 19 Feb 2025 13:49:48 +0300 Subject: [PATCH 34/34] refactor: comments for saxpy function --- Source/mxm/template/GB_AxB_saxpy5_lv.c | 38 +++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/Source/mxm/template/GB_AxB_saxpy5_lv.c b/Source/mxm/template/GB_AxB_saxpy5_lv.c index 4239dfa06a..97b53ace43 100644 --- a/Source/mxm/template/GB_AxB_saxpy5_lv.c +++ b/Source/mxm/template/GB_AxB_saxpy5_lv.c @@ -1,40 +1,70 @@ +//------------------------------------------------------------------------------ +// GB_AxB_saxpy5_lv.c: C+=A*B when C is full +//------------------------------------------------------------------------------ + { - const int64_t m = C->vlen; + + //-------------------------------------------------------------------------- + // get C, A, and B + //-------------------------------------------------------------------------- + + const int64_t m = C->vlen; // # of rows of C and A GB_Bp_DECLARE (Bp, const) ; GB_Bp_PTR (Bp, B) ; GB_Bh_DECLARE (Bh, const) ; GB_Bh_PTR (Bh, B) ; GB_Bi_DECLARE (Bi, const) ; GB_Bi_PTR (Bi, B) ; const bool B_iso = B->iso ; const GB_A_TYPE *restrict Ax = (GB_A_TYPE *)A->x; const GB_B_TYPE *restrict Bx = (GB_B_TYPE *)B->x; + // get the max number of elements that vector can store size_t vl = VSETVL(m); GB_C_TYPE *restrict Cx = (GB_C_TYPE *)C->x; -#pragma omp parallel for num_threads(nthreads) schedule(dynamic, 1) + //-------------------------------------------------------------------------- + // C += A*B where A is full (and not iso or pattern-only) + //-------------------------------------------------------------------------- + + #pragma omp parallel for num_threads(nthreads) schedule(dynamic, 1) for (int tid = 0; tid < ntasks; tid++) { + // get the task descriptor const int64_t jB_start = B_slice[tid]; const int64_t jB_end = B_slice[tid + 1]; - + // C(:,jB_start:jB_end-1) += A * B(:,jB_start:jB_end-1) for (int64_t jB = jB_start; jB < jB_end; jB++) { + // get B(:,j) and C(:,j) const int64_t j = GBh_B (Bh, jB) ; GB_C_TYPE *restrict Cxj = Cx + (j * m) ; const int64_t pB_start = GB_IGET (Bp, jB) ; const int64_t pB_end = GB_IGET (Bp, jB+1) ; + + //------------------------------------------------------------------ + // C(:,j) += A*B(:,j), on sets of vl rows of C and A at a time + //------------------------------------------------------------------ + for (int64_t i = 0; i < m && (m - i) >= vl; i += vl) { + // get C(i:i+vl,j) VECTORTYPE vc = VLE(Cxj + i, vl); for (int64_t pB = pB_start; pB < pB_end; pB++) { + // bkj = B(k,j) const int64_t k = GB_IGET (Bi, pB) ; GB_DECLAREB (bkj) ; GB_GETB (bkj, Bx, pB, B_iso) ; + // get A(i,k) VECTORTYPE va = VLE(Ax + i + k * m, vl); + // C(i:i+15,j) += A(i:i+15,k)*B(k,j) vc = VFMACC(vc, bkj, va, vl); } - + // save C(i:i+15,j) VSE(Cxj + i, vc, vl); } + + //------------------------------------------------------------------ + // lines 179-1036 from GB_AxB_saxpy5_unrolled.c + //------------------------------------------------------------------ + int64_t remaining = m % vl; if (remaining > 0) {