diff --git a/.gitignore b/.gitignore index 135ff58..182f48c 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ ParSpMatVec +*.log diff --git a/deps/src/A_mul_B.f90 b/deps/src/A_mul_B.f90 index 82a1c5c..fd965bb 100644 --- a/deps/src/A_mul_B.f90 +++ b/deps/src/A_mul_B.f90 @@ -1,92 +1,178 @@ - -subroutine a_mul_b_rr( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y ) -!DIR$ ATTRIBUTES DLLEXPORT :: a_mul_b_rr -!DIR$ ATTRIBUTES ALIAS: 'a_mul_b_rr_':: a_mul_b_rr - -! y = beta*y + alpha * A*x - -use omp_lib -implicit none - -integer(kind=8),intent(in):: nthreads -integer(kind=8),intent(in):: nvec + +subroutine a_mul_b_rr_32( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y ) +!DIR$ ATTRIBUTES DLLEXPORT :: a_mul_b_rr_32 +!DIR$ ATTRIBUTES ALIAS: 'a_mul_b_rr_32_':: a_mul_b_rr_32 + +! y = beta*y + alpha * A*x + +use omp_lib +implicit none + +integer(kind=8),intent(in):: nthreads +integer(kind=8),intent(in):: nvec integer(kind=8),intent(in):: n ! # of columns in A integer(kind=8),intent(in):: m ! # of rows in A - -real(kind=8),intent(in):: alpha, beta + +real(kind=8),intent(in):: alpha, beta +real(kind=8),intent(in):: A(*) +integer(kind=4),intent(in):: jA(*), iA(n+1) +real(kind=8),intent(in):: x(n,nvec) +real(kind=8),intent(inout):: y(m,nvec) + +integer ivec, i, j1,j2, j, jaj, mythread, mm, jm +real(kind=8) xi +real(kind=8),allocatable:: yt(:) + +include "A_mul_B.fi" + +return +end subroutine a_mul_b_rr_32 + +subroutine a_mul_b_rr_64( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y ) +!DIR$ ATTRIBUTES DLLEXPORT :: a_mul_b_rr_64 +!DIR$ ATTRIBUTES ALIAS: 'a_mul_b_rr_64_':: a_mul_b_rr_64 + +! y = beta*y + alpha * A*x + +use omp_lib +implicit none + +integer(kind=8),intent(in):: nthreads +integer(kind=8),intent(in):: nvec +integer(kind=8),intent(in):: n ! # of columns in A +integer(kind=8),intent(in):: m ! # of rows in A + +real(kind=8),intent(in):: alpha, beta real(kind=8),intent(in):: A(*) integer(kind=8),intent(in):: jA(*), iA(n+1) real(kind=8),intent(in):: x(n,nvec) real(kind=8),intent(inout):: y(m,nvec) - -integer ivec, i, j1,j2, j, jaj, mythread, mm, jm -real(kind=8) xi -real(kind=8),allocatable:: yt(:) - -include "A_mul_B.fi" - -return -end subroutine a_mul_b_rr - -!-------------------------------------------------------------------- - -subroutine a_mul_b_rc( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y ) -!DIR$ ATTRIBUTES DLLEXPORT :: a_mul_b_rc -!DIR$ ATTRIBUTES ALIAS: 'a_mul_b_rc_':: a_mul_b_rc - -! y = beta*y + alpha * A*x - -use omp_lib -implicit none - -integer(kind=8),intent(in):: nthreads -integer(kind=8),intent(in):: nvec + +integer ivec, i, j1,j2, j, jaj, mythread, mm, jm +real(kind=8) xi +real(kind=8),allocatable:: yt(:) + +include "A_mul_B.fi" + +return +end subroutine a_mul_b_rr_64 + +!-------------------------------------------------------------------- + +subroutine a_mul_b_rc_32( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y ) +!DIR$ ATTRIBUTES DLLEXPORT :: a_mul_b_rc_32 +!DIR$ ATTRIBUTES ALIAS: 'a_mul_b_rc_32_':: a_mul_b_rc_32 + +! y = beta*y + alpha * A*x + +use omp_lib +implicit none + +integer(kind=8),intent(in):: nthreads +integer(kind=8),intent(in):: nvec integer(kind=8),intent(in):: n ! # of columns in A integer(kind=8),intent(in):: m ! # of rows in A - -complex(kind=8),intent(in):: alpha, beta + +complex(kind=8),intent(in):: alpha, beta +real(kind=8),intent(in):: A(*) +integer(kind=4),intent(in):: jA(*), iA(n+1) +complex(kind=8),intent(in):: x(n,nvec) +complex(kind=8),intent(inout):: y(m,nvec) + +integer ivec, i, j1,j2, j, jaj, mythread, mm, jm +complex(kind=8) xi +complex(kind=8),allocatable:: yt(:) + +include "A_mul_B.fi" + +return +end subroutine a_mul_b_rc_32 + +subroutine a_mul_b_rc_64( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y ) +!DIR$ ATTRIBUTES DLLEXPORT :: a_mul_b_rc_64 +!DIR$ ATTRIBUTES ALIAS: 'a_mul_b_rc_64_':: a_mul_b_rc_64 + +! y = beta*y + alpha * A*x + +use omp_lib +implicit none + +integer(kind=8),intent(in):: nthreads +integer(kind=8),intent(in):: nvec +integer(kind=8),intent(in):: n ! # of columns in A +integer(kind=8),intent(in):: m ! # of rows in A + +complex(kind=8),intent(in):: alpha, beta real(kind=8),intent(in):: A(*) integer(kind=8),intent(in):: jA(*), iA(n+1) complex(kind=8),intent(in):: x(n,nvec) complex(kind=8),intent(inout):: y(m,nvec) - -integer ivec, i, j1,j2, j, jaj, mythread, mm, jm -complex(kind=8) xi -complex(kind=8),allocatable:: yt(:) - -include "A_mul_B.fi" - -return -end subroutine a_mul_b_rc - -!-------------------------------------------------------------------- - -subroutine a_mul_b_cc( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y ) -!DIR$ ATTRIBUTES DLLEXPORT :: a_mul_b_cc -!DIR$ ATTRIBUTES ALIAS: 'a_mul_b_cc_':: a_mul_b_cc - -! y = beta*y + alpha * A*x - -use omp_lib -implicit none - -integer(kind=8),intent(in):: nthreads -integer(kind=8),intent(in):: nvec + +integer ivec, i, j1,j2, j, jaj, mythread, mm, jm +complex(kind=8) xi +complex(kind=8),allocatable:: yt(:) + +include "A_mul_B.fi" + +return +end subroutine a_mul_b_rc_64 + +!-------------------------------------------------------------------- + +subroutine a_mul_b_cc_32( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y ) +!DIR$ ATTRIBUTES DLLEXPORT :: a_mul_b_cc_32 +!DIR$ ATTRIBUTES ALIAS: 'a_mul_b_cc_32_':: a_mul_b_cc_32 + +! y = beta*y + alpha * A*x + +use omp_lib +implicit none + +integer(kind=8),intent(in):: nthreads +integer(kind=8),intent(in):: nvec +integer(kind=8),intent(in):: n ! # of columns in A +integer(kind=8),intent(in):: m ! # of rows in A + +complex(kind=8),intent(in):: alpha, beta +complex(kind=8),intent(in):: A(*) +integer(kind=4),intent(in):: jA(*), iA(n+1) +complex(kind=8),intent(in):: x(n,nvec) +complex(kind=8),intent(inout):: y(m,nvec) + +integer ivec, i, j1,j2, j, jaj, mythread, mm, jm +complex(kind=8) xi +complex(kind=8),allocatable:: yt(:) + +include "A_mul_B.fi" + +return +end subroutine a_mul_b_cc_32 + +subroutine a_mul_b_cc_64( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y ) +!DIR$ ATTRIBUTES DLLEXPORT :: a_mul_b_cc_64 +!DIR$ ATTRIBUTES ALIAS: 'a_mul_b_cc_64_':: a_mul_b_cc_64 + +! y = beta*y + alpha * A*x + +use omp_lib +implicit none + +integer(kind=8),intent(in):: nthreads +integer(kind=8),intent(in):: nvec integer(kind=8),intent(in):: n ! # of columns in A integer(kind=8),intent(in):: m ! # of rows in A - -complex(kind=8),intent(in):: alpha, beta + +complex(kind=8),intent(in):: alpha, beta complex(kind=8),intent(in):: A(*) integer(kind=8),intent(in):: jA(*), iA(n+1) complex(kind=8),intent(in):: x(n,nvec) complex(kind=8),intent(inout):: y(m,nvec) - -integer ivec, i, j1,j2, j, jaj, mythread, mm, jm -complex(kind=8) xi -complex(kind=8),allocatable:: yt(:) - -include "A_mul_B.fi" - -return -end subroutine a_mul_b_cc - + +integer ivec, i, j1,j2, j, jaj, mythread, mm, jm +complex(kind=8) xi +complex(kind=8),allocatable:: yt(:) + +include "A_mul_B.fi" + +return +end subroutine a_mul_b_cc_64 diff --git a/deps/src/Ac_mul_B.f90 b/deps/src/Ac_mul_B.f90 index e35f45a..fe13024 100644 --- a/deps/src/Ac_mul_B.f90 +++ b/deps/src/Ac_mul_B.f90 @@ -1,190 +1,356 @@ - - - -subroutine ac_mul_b_rr( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y ) -!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_rr -!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_rr_':: ac_mul_b_rr - -! y = beta*y + alpha * A'*x - -#undef CMPLXA -implicit none - -integer(kind=8),intent(in):: nthreads -integer(kind=8),intent(in):: nvec ! # of vectors + + + +subroutine ac_mul_b_rr_32( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y ) +!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_rr_32 +!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_rr_32_':: ac_mul_b_rr_32 + +! y = beta*y + alpha * A'*x + +#undef CMPLXA +implicit none + +integer(kind=8),intent(in):: nthreads +integer(kind=8),intent(in):: nvec ! # of vectors integer(kind=8),intent(in):: n ! # of columns in A integer(kind=8),intent(in):: m ! # of rows in A - -real(kind=8),intent(in):: alpha, beta + +real(kind=8),intent(in):: alpha, beta +real(kind=8),intent(in):: A(*) +integer(kind=4),intent(in):: jA(*), iA(n+1) +real(kind=8),intent(in):: x(m,nvec) +real(kind=8),intent(inout):: y(n,nvec) + +integer ivec, i, j1,j2, j +real(kind=8) t + +#include "Ac_mul_B.fi" + +return +end subroutine ac_mul_b_rr_32 + +subroutine ac_mul_b_rr_64( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y ) +!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_rr_64 +!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_rr_':: ac_mul_b_rr_64 + +! y = beta*y + alpha * A'*x + +#undef CMPLXA +implicit none + +integer(kind=8),intent(in):: nthreads +integer(kind=8),intent(in):: nvec ! # of vectors +integer(kind=8),intent(in):: n ! # of columns in A +integer(kind=8),intent(in):: m ! # of rows in A + +real(kind=8),intent(in):: alpha, beta real(kind=8),intent(in):: A(*) integer(kind=8),intent(in):: jA(*), iA(n+1) real(kind=8),intent(in):: x(m,nvec) real(kind=8),intent(inout):: y(n,nvec) - -integer(kind=8) ivec, i, j1,j2, j -real(kind=8) t - -#include "Ac_mul_B.fi" - -return -end subroutine ac_mul_b_rr - -!------------------------------------------------------------------------ - -subroutine ac_mul_b_rc( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y ) -!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_rc -!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_rc_':: ac_mul_b_rc - -! y = beta*y + alpha * A'*x - -#undef CMPLXA -implicit none - -integer(kind=8),intent(in):: nthreads -integer(kind=8),intent(in):: nvec ! # of vectors + +integer(kind=8) ivec, i, j1,j2, j +real(kind=8) t + +#include "Ac_mul_B.fi" + +return +end subroutine ac_mul_b_rr_64 + +!------------------------------------------------------------------------ + +subroutine ac_mul_b_rc_32( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y ) +!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_rc_32 +!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_rc_32_':: ac_mul_b_rc_32 + +! y = beta*y + alpha * A'*x + +#undef CMPLXA +implicit none + +integer(kind=8),intent(in):: nthreads +integer(kind=8),intent(in):: nvec ! # of vectors +integer(kind=8),intent(in):: n ! # of columns in A +integer(kind=8),intent(in):: m ! # of rows in A + +complex(kind=8),intent(in):: alpha, beta +real(kind=8),intent(in):: A(*) +integer(kind=4),intent(in):: jA(*), iA(n+1) +complex(kind=8),intent(in):: x(m,nvec) +complex(kind=8),intent(inout):: y(n,nvec) + +integer ivec, i, j1,j2, j +complex(kind=8) t + +#include "Ac_mul_B.fi" + +return +end subroutine ac_mul_b_rc_32 + +subroutine ac_mul_b_rc_64( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y ) +!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_rc_64 +!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_rc_64_':: ac_mul_b_rc_64 + +! y = beta*y + alpha * A'*x + +#undef CMPLXA +implicit none + +integer(kind=8),intent(in):: nthreads +integer(kind=8),intent(in):: nvec ! # of vectors integer(kind=8),intent(in):: n ! # of columns in A integer(kind=8),intent(in):: m ! # of rows in A - -complex(kind=8),intent(in):: alpha, beta + +complex(kind=8),intent(in):: alpha, beta real(kind=8),intent(in):: A(*) integer(kind=8),intent(in):: jA(*), iA(n+1) complex(kind=8),intent(in):: x(m,nvec) complex(kind=8),intent(inout):: y(n,nvec) - -integer(kind=8) ivec, i, j1,j2, j -complex(kind=8) t - -#include "Ac_mul_B.fi" - -return -end subroutine ac_mul_b_rc - -!------------------------------------------------------------------------ - -subroutine ac_mul_b_cc( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y ) -!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_cc -!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_cc_':: ac_mul_b_cc - -! y = beta*y + alpha * A'*x - -#define CMPLXA -implicit none - -integer(kind=8),intent(in):: nthreads -integer(kind=8),intent(in):: nvec ! # of vectors + +integer(kind=8) ivec, i, j1,j2, j +complex(kind=8) t + +#include "Ac_mul_B.fi" + +return +end subroutine ac_mul_b_rc_64 + +!------------------------------------------------------------------------ + +subroutine ac_mul_b_cc_32( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y ) +!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_cc_32 +!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_cc_32_':: ac_mul_b_cc_32 + +! y = beta*y + alpha * A'*x + +#define CMPLXA +implicit none + +integer(kind=8),intent(in):: nthreads +integer(kind=8),intent(in):: nvec ! # of vectors integer(kind=8),intent(in):: n ! # of columns in A integer(kind=8),intent(in):: m ! # of rows in A - -complex(kind=8),intent(in):: alpha, beta + +complex(kind=8),intent(in):: alpha, beta complex(kind=8),intent(in):: A(*) +integer(kind=4),intent(in):: jA(*), iA(n+1) +complex(kind=8),intent(in):: x(m,nvec) +complex(kind=8),intent(inout):: y(n,nvec) + +integer ivec, i, j1,j2, j +complex(kind=8) t + +#include "Ac_mul_B.fi" + +return +end subroutine ac_mul_b_cc_32 + +subroutine ac_mul_b_cc_64( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y ) +!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_cc_64 +!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_cc_64_':: ac_mul_b_cc_64 + +! y = beta*y + alpha * A'*x + +#define CMPLXA +implicit none + +integer(kind=8),intent(in):: nthreads +integer(kind=8),intent(in):: nvec ! # of vectors +integer(kind=8),intent(in):: n ! # of columns in A +integer(kind=8),intent(in):: m ! # of rows in A + +complex(kind=8),intent(in):: alpha, beta +complex(kind=8),intent(in):: A(*) +integer(kind=8),intent(in):: jA(*), iA(n+1) +complex(kind=8),intent(in):: x(m,nvec) +complex(kind=8),intent(inout):: y(n,nvec) + +integer(kind=8) ivec, i, j1,j2, j +complex(kind=8) t + +#include "Ac_mul_B.fi" + +return +end subroutine ac_mul_b_cc_64 + +subroutine ac_mul_b_cc_short_64( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y ) +!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_cc_short_64 +!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_cc_short_64_':: ac_mul_b_cc_short_64 + +! y = beta*y + alpha * A'*x + +#define CMPLXA +implicit none + +integer(kind=8),intent(in):: nthreads +integer(kind=8),intent(in):: nvec ! # of vectors +integer(kind=8),intent(in):: n ! # of columns in A +integer(kind=8),intent(in):: m ! # of rows in A + +complex(kind=4),intent(in):: alpha, beta +complex(kind=4),intent(in):: A(*) +integer(kind=8),intent(in):: jA(*), iA(n+1) +complex(kind=4),intent(in):: x(m,nvec) +complex(kind=4),intent(inout):: y(n,nvec) + + + +integer(kind=8) ivec, i, j1,j2, j +complex(kind=8) t + +#include "Ac_mul_B.fi" + +return +end subroutine ac_mul_b_cc_short_64 + +subroutine ac_mul_b_cc_short_32( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y ) +!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_cc_short_32 +!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_cc_short_32_':: ac_mul_b_cc_short_32 + +! y = beta*y + alpha * A'*x + +#define CMPLXA +implicit none + +integer(kind=8),intent(in):: nthreads +integer(kind=8),intent(in):: nvec ! # of vectors +integer(kind=8),intent(in):: n ! # of columns in A +integer(kind=8),intent(in):: m ! # of rows in A + +complex(kind=4),intent(in):: alpha, beta +complex(kind=4),intent(in):: A(*) +integer(kind=4),intent(in):: jA(*), iA(n+1) +complex(kind=4),intent(in):: x(m,nvec) +complex(kind=4),intent(inout):: y(n,nvec) + + + +integer(kind=8) ivec, i, j1,j2, j +complex(kind=8) t + +#include "Ac_mul_B.fi" + +return +end subroutine ac_mul_b_cc_short_32 + +subroutine ac_mul_b_rc_short_64( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y ) +!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_rc_short_64 +!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_rc_short_64_':: ac_mul_b_rc_short_64 + +! y = beta*y + alpha * A'*x + +#undef CMPLXA +implicit none + +integer(kind=8),intent(in):: nthreads +integer(kind=8),intent(in):: nvec ! # of vectors +integer(kind=8),intent(in):: n ! # of columns in A +integer(kind=8),intent(in):: m ! # of rows in A + +complex(kind=4),intent(in):: alpha, beta +real(kind=4),intent(in):: A(*) +integer(kind=8),intent(in):: jA(*), iA(n+1) +complex(kind=4),intent(in):: x(m,nvec) +complex(kind=4),intent(inout):: y(n,nvec) + + + +integer(kind=8) ivec, i, j1,j2, j +complex(kind=8) t + +#include "Ac_mul_B.fi" + +return +end subroutine ac_mul_b_rc_short_64 + +subroutine ac_mul_b_rc_short_32( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y ) +!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_rc_short_32 +!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_rc_short_32_':: ac_mul_b_rc_short_32 + +! y = beta*y + alpha * A'*x + +#undef CMPLXA +implicit none + +integer(kind=8),intent(in):: nthreads +integer(kind=8),intent(in):: nvec ! # of vectors +integer(kind=8),intent(in):: n ! # of columns in A +integer(kind=8),intent(in):: m ! # of rows in A + +complex(kind=4),intent(in):: alpha, beta +real(kind=4),intent(in):: A(*) +integer(kind=4),intent(in):: jA(*), iA(n+1) +complex(kind=4),intent(in):: x(m,nvec) +complex(kind=4),intent(inout):: y(n,nvec) + + + +integer(kind=8) ivec, i, j1,j2, j +complex(kind=8) t + +#include "Ac_mul_B.fi" + +return +end subroutine ac_mul_b_rc_short_32 + +subroutine ac_mul_b_cc_mixed_64( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y ) +!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_cc_mixed_64 +!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_cc_mixed_64_':: ac_mul_b_cc_mixed_64 + +! y = beta*y + alpha * A'*x + +#define CMPLXA +implicit none + +integer(kind=8),intent(in):: nthreads +integer(kind=8),intent(in):: nvec ! # of vectors +integer(kind=8),intent(in):: n ! # of columns in A +integer(kind=8),intent(in):: m ! # of rows in A + +complex(kind=8),intent(in):: alpha +complex(kind=8),intent(in):: beta +complex(kind=4),intent(in):: A(*) integer(kind=8),intent(in):: jA(*), iA(n+1) -complex(kind=8),intent(in):: x(m,nvec) -complex(kind=8),intent(inout):: y(n,nvec) - - - -integer(kind=8) ivec, i, j1,j2, j -complex(kind=8) t - -#include "Ac_mul_B.fi" - -return -end subroutine ac_mul_b_cc - - - - - - -subroutine ac_mul_b_cc_short( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y ) -!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_cc_short -!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_cc_short_':: ac_mul_b_cc_short - -! y = beta*y + alpha * A'*x - -#define CMPLXA -implicit none - -integer(kind=8),intent(in):: nthreads -integer(kind=8),intent(in):: nvec ! # of vectors -integer(kind=8),intent(in):: n ! # of columns in A -integer(kind=8),intent(in):: m ! # of rows in A - -complex(kind=4),intent(in):: alpha, beta -complex(kind=4),intent(in):: A(*) -integer(kind=8),intent(in):: jA(*), iA(n+1) -complex(kind=4),intent(in):: x(m,nvec) -complex(kind=4),intent(inout):: y(n,nvec) - - - -integer(kind=8) ivec, i, j1,j2, j -complex(kind=8) t - -#include "Ac_mul_B.fi" - -return -end subroutine ac_mul_b_cc_short - -subroutine ac_mul_b_rc_short( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y ) -!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_rc_short -!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_rc_short_':: ac_mul_b_rc_short - -! y = beta*y + alpha * A'*x - -#undef CMPLXA -implicit none - -integer(kind=8),intent(in):: nthreads -integer(kind=8),intent(in):: nvec ! # of vectors -integer(kind=8),intent(in):: n ! # of columns in A -integer(kind=8),intent(in):: m ! # of rows in A - -complex(kind=4),intent(in):: alpha, beta -real(kind=4),intent(in):: A(*) -integer(kind=8),intent(in):: jA(*), iA(n+1) -complex(kind=4),intent(in):: x(m,nvec) -complex(kind=4),intent(inout):: y(n,nvec) - - - -integer(kind=8) ivec, i, j1,j2, j -complex(kind=8) t - -#include "Ac_mul_B.fi" - -return -end subroutine ac_mul_b_rc_short - - - -subroutine ac_mul_b_cc_mixed( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y ) -!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_cc_mixed -!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_cc_mixed_':: ac_mul_b_cc_mixed - -! y = beta*y + alpha * A'*x - -#define CMPLXA -implicit none - -integer(kind=8),intent(in):: nthreads -integer(kind=8),intent(in):: nvec ! # of vectors -integer(kind=8),intent(in):: n ! # of columns in A -integer(kind=8),intent(in):: m ! # of rows in A - -complex(kind=8),intent(in):: alpha -complex(kind=8),intent(in):: beta -complex(kind=4),intent(in):: A(*) -integer(kind=8),intent(in):: jA(*), iA(n+1) -complex(kind=8),intent(in):: x(m,nvec) -complex(kind=8),intent(inout):: y(n,nvec) - - - -integer(kind=8) ivec, i, j1,j2, j -complex(kind=8) t - -#include "Ac_mul_B.fi" - -return -end subroutine ac_mul_b_cc_mixed +complex(kind=8),intent(in):: x(m,nvec) +complex(kind=8),intent(inout):: y(n,nvec) + + + +integer(kind=8) ivec, i, j1,j2, j +complex(kind=8) t + +#include "Ac_mul_B.fi" + +return +end subroutine ac_mul_b_cc_mixed_64 + +subroutine ac_mul_b_cc_mixed_32( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y ) +!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_cc_mixed_32 +!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_cc_mixed_32_':: ac_mul_b_cc_mixed_32 + +! y = beta*y + alpha * A'*x + +#define CMPLXA +implicit none + +integer(kind=8),intent(in):: nthreads +integer(kind=8),intent(in):: nvec ! # of vectors +integer(kind=8),intent(in):: n ! # of columns in A +integer(kind=8),intent(in):: m ! # of rows in A + +complex(kind=8),intent(in):: alpha +complex(kind=8),intent(in):: beta +complex(kind=4),intent(in):: A(*) +integer(kind=4),intent(in):: jA(*), iA(n+1) +complex(kind=8),intent(in):: x(m,nvec) +complex(kind=8),intent(inout):: y(n,nvec) + + + +integer(kind=8) ivec, i, j1,j2, j +complex(kind=8) t + +#include "Ac_mul_B.fi" + +return +end subroutine ac_mul_b_cc_mixed_32 \ No newline at end of file diff --git a/src/A_mul_B.jl b/src/A_mul_B.jl index d8e1f64..b14e9d0 100644 --- a/src/A_mul_B.jl +++ b/src/A_mul_B.jl @@ -2,11 +2,11 @@ export A_mul_B! function A_mul_B!( alpha::Float64, - A::SparseMatrixCSC{Float64,Int}, + A::SparseMatrixCSC{Float64,Ti}, x::Array{Float64}, beta::Float64, y::Array{Float64}, - nthreads::Int64=0 ) + nthreads::Int64=0 ) where Ti # Real: y = beta*y + alpha * A*x if nthreads == 0 @@ -27,20 +27,28 @@ function A_mul_B!( alpha::Float64, end - p = ccall( (:a_mul_b_rr_, spmatveclib), - Int64, ( Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Ptr{Int64}, Ptr{Int64}, Ptr{Float64}, Ptr{Float64}), - Ref(nthreads), Ref(nvec), Ref(m), Ref(n), Ref(alpha), Ref(beta), A.nzval, A.rowval, A.colptr, x, y); - + if Ti == Int32 + p = ccall( (:a_mul_b_rr_32_, spmatveclib), + Nothing, ( Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{Float64}, Ref{Float64}, Ref{Float64}, Ref{Int32}, Ref{Int32}, Ref{Float64}, Ref{Float64}), + nthreads, nvec, m, n, alpha, beta, A.nzval, A.rowval, A.colptr, x, y) + elseif Ti == Int64 + p = ccall( (:a_mul_b_rr_64_, spmatveclib), + Nothing, ( Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{Float64}, Ref{Float64}, Ref{Float64}, Ref{Int64}, Ref{Int64}, Ref{Float64}, Ref{Float64}), + nthreads, nvec, m, n, alpha, beta, A.nzval, A.rowval, A.colptr, x, y) + else + error("Unsupported sparse matrix indexing integer type $Ti") + end + return p end # function A_mul_B! #------------------------------------------------------------------------------ function A_mul_B!( alpha::ComplexF64, - A::SparseMatrixCSC{Float64,Int}, + A::SparseMatrixCSC{Float64,Ti}, x::Array{ComplexF64}, beta::ComplexF64, y::Array{ComplexF64}, - nthreads::Int64=0 ) + nthreads::Int64=0 ) where Ti # Real, Complex A: y = beta*y + alpha * A*x if nthreads == 0 @@ -59,20 +67,28 @@ function A_mul_B!( alpha::ComplexF64, throw(DimensionMismatch("length(y,2) != nvec")) end - p = ccall( (:a_mul_b_rc_, spmatveclib), - Int64, ( Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF64}, Ptr{ComplexF64}, Ptr{Float64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF64}, Ptr{ComplexF64}), - Ref(nthreads), Ref(nvec), Ref(m), Ref(n), Ref(alpha), Ref(beta), A.nzval, A.rowval, A.colptr, convert(Ptr{ComplexF64}, pointer(x)), convert(Ptr{ComplexF64}, pointer(y))); - + if Ti == Int32 + p = ccall( (:a_mul_b_rc_32_, spmatveclib), + Nothing, ( Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{ComplexF64}, Ref{ComplexF64}, Ref{Float64}, Ref{Int32}, Ref{Int32}, Ref{ComplexF64}, Ref{ComplexF64}), + nthreads, nvec, m, n, alpha, beta, A.nzval, A.rowval, A.colptr, x, y) + elseif Ti == Int64 + p = ccall( (:a_mul_b_rc_64_, spmatveclib), + Nothing, ( Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{ComplexF64}, Ref{ComplexF64}, Ref{Float64}, Ref{Int64}, Ref{Int64}, Ref{ComplexF64}, Ref{ComplexF64}), + nthreads, nvec, m, n, alpha, beta, A.nzval, A.rowval, A.colptr, x, y) + else + error("Unsupported sparse matrix indexing integer type $Ti") + end + return p end # function A_mul_B! #------------------------------------------------------------------------------ function A_mul_B!( alpha::ComplexF64, - A::SparseMatrixCSC{ComplexF64,Int}, + A::SparseMatrixCSC{ComplexF64,Ti}, x::Array{ComplexF64}, beta::ComplexF64, y::Array{ComplexF64}, - nthreads::Int64=0 ) + nthreads::Int64=0 ) where Ti # Complex A: y = beta*y + alpha * A*x if nthreads == 0 @@ -91,8 +107,16 @@ function A_mul_B!( alpha::ComplexF64, throw(DimensionMismatch("length(y,2) != nvec")) end - p = ccall( (:a_mul_b_cc_, spmatveclib), - Int64, ( Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF64}, Ptr{ComplexF64}, Ptr{ComplexF64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF64}, Ptr{ComplexF64}), - Ref(nthreads), Ref(nvec), Ref(m), Ref(n), Ref(alpha), Ref(beta), convert(Ptr{ComplexF64}, pointer(A.nzval)), A.rowval, A.colptr, convert(Ptr{ComplexF64}, pointer(x)), convert(Ptr{ComplexF64}, pointer(y))); - + if Ti == Int32 + p = ccall( (:a_mul_b_cc_32_, spmatveclib), + Nothing, ( Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{ComplexF64}, Ref{ComplexF64}, Ref{ComplexF64}, Ref{Int32}, Ref{Int32}, Ref{ComplexF64}, Ref{ComplexF64}), + nthreads, nvec, m, n, alpha, beta, A.nzval, A.rowval, A.colptr, x, y) + elseif Ti == Int64 + p = ccall( (:a_mul_b_cc_64_, spmatveclib), + Nothing, ( Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{ComplexF64}, Ref{ComplexF64}, Ref{ComplexF64}, Ref{Int64}, Ref{Int64}, Ref{ComplexF64}, Ref{ComplexF64}), + nthreads, nvec, m, n, alpha, beta, A.nzval, A.rowval, A.colptr, x, y) + else + error("Unsupported sparse matrix indexing integer type $Ti") + end + return p end # function A_mul_B! diff --git a/src/Ac_mul_B.jl b/src/Ac_mul_B.jl index 8728516..69a8e51 100644 --- a/src/Ac_mul_B.jl +++ b/src/Ac_mul_B.jl @@ -2,11 +2,11 @@ export Ac_mul_B! function Ac_mul_B!( alpha::Float64, - A::SparseMatrixCSC{Float64,Int}, + A::SparseMatrixCSC{Float64,Ti}, x::Array{Float64}, beta::Float64, y::Array{Float64}, - nthreads::Int64=0 ) + nthreads::Int64=0 ) where Ti # Real: y = beta*y + alpha * A'*x if nthreads == 0 @@ -26,20 +26,28 @@ function Ac_mul_B!( alpha::Float64, throw(DimensionMismatch("length(y,2) != nvec")) end - p = ccall( (:ac_mul_b_rr_, spmatveclib), - Int64, ( Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Ptr{Int64}, Ptr{Int64}, Ptr{Float64}, Ptr{Float64}), - Ref(nthreads), Ref(nvec), Ref(m), Ref(n), Ref(alpha), Ref(beta), A.nzval, A.rowval, A.colptr, x, y); - + if Ti == Int32 + p = ccall( (:ac_mul_b_rr_32_, spmatveclib), + Nothing, ( Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{Float64}, Ref{Float64}, Ref{Float64}, Ref{Int32}, Ref{Int32}, Ref{Float64}, Ref{Float64}), + nthreads, nvec, m, n, alpha, beta, A.nzval, A.rowval, A.colptr, x, y) + elseif Ti == Int64 + p = ccall( (:ac_mul_b_rr_64_, spmatveclib), + Nothing, ( Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{Float64}, Ref{Float64}, Ref{Float64}, Ref{Int64}, Ref{Int64}, Ref{Float64}, Ref{Float64}), + nthreads, nvec, m, n, alpha, beta, A.nzval, A.rowval, A.colptr, x, y) + else + error("Unsupported sparse matrix indexing integer type $Ti") + end + return p end # function Ac_mul_B! #------------------------------------------------------------------------------ function Ac_mul_B!( alpha::ComplexF64, - A::SparseMatrixCSC{Float64,Int}, + A::SparseMatrixCSC{Float64,Ti}, x::Array{ComplexF64}, beta::ComplexF64, y::Array{ComplexF64}, - nthreads::Int64=0 ) + nthreads::Int64=0 ) where Ti # Real, Complex A: y = beta*y + alpha * A'*x if nthreads == 0 @@ -58,20 +66,28 @@ function Ac_mul_B!( alpha::ComplexF64, throw(DimensionMismatch("length(y,2) != nvec")) end - p = ccall( (:ac_mul_b_rc_, spmatveclib), - Int64, ( Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF64}, Ptr{ComplexF64}, Ptr{Float64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF64}, Ptr{ComplexF64}), - Ref(nthreads), Ref(nvec), Ref(m), Ref(n), Ref(alpha), Ref(beta), A.nzval, A.rowval, A.colptr, convert(Ptr{ComplexF64}, pointer(x)), convert(Ptr{ComplexF64}, pointer(y))); - + if Ti == Int32 + p = ccall( (:ac_mul_b_rc_32_, spmatveclib), + Nothing, ( Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{ComplexF64}, Ref{ComplexF64}, Ref{Float64}, Ref{Int32}, Ref{Int32}, Ref{ComplexF64}, Ref{ComplexF64}), + nthreads, nvec, m, n, alpha, beta, A.nzval, A.rowval, A.colptr, x, y) + elseif Ti == Int64 + p = ccall( (:ac_mul_b_rc_64_, spmatveclib), + Nothing, ( Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{ComplexF64}, Ref{ComplexF64}, Ref{Float64}, Ref{Int64}, Ref{Int64}, Ref{ComplexF64}, Ref{ComplexF64}), + nthreads, nvec, m, n, alpha, beta, A.nzval, A.rowval, A.colptr, x, y) + else + error("Unsupported sparse matrix indexing integer type $Ti") + end + return p end # function Ac_mul_B! #------------------------------------------------------------------------------ function Ac_mul_B!( alpha::ComplexF64, - A::SparseMatrixCSC{ComplexF64,Int}, + A::SparseMatrixCSC{ComplexF64,Ti}, x::Array{ComplexF64}, beta::ComplexF64, y::Array{ComplexF64}, - nthreads::Int64=0 ) + nthreads::Int64=0 ) where Ti # Complex: y = beta*y + alpha * A'*x if nthreads == 0 @@ -91,19 +107,27 @@ function Ac_mul_B!( alpha::ComplexF64, throw(DimensionMismatch("length(y,2) != nvec")) end - p = ccall( (:ac_mul_b_cc_, spmatveclib), - Int64, ( Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF64}, Ptr{ComplexF64}, Ptr{ComplexF64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF64}, Ptr{ComplexF64}), - Ref(nthreads), Ref(nvec), Ref(m), Ref(n), Ref(alpha), Ref(beta), convert(Ptr{ComplexF64}, pointer(A.nzval)), A.rowval, A.colptr, convert(Ptr{ComplexF64}, pointer(x)), convert(Ptr{ComplexF64}, pointer(y))); - + if Ti == Int32 + p = ccall( (:ac_mul_b_cc_32_, spmatveclib), + Nothing, ( Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{ComplexF64}, Ref{ComplexF64}, Ref{ComplexF64}, Ref{Int32}, Ref{Int32}, Ref{ComplexF64}, Ref{ComplexF64}), + nthreads, nvec, m, n, alpha, beta, A.nzval, A.rowval, A.colptr, x, y) + elseif Ti == Int64 + p = ccall( (:ac_mul_b_cc_64_, spmatveclib), + Nothing, ( Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{ComplexF64}, Ref{ComplexF64}, Ref{ComplexF64}, Ref{Int64}, Ref{Int64}, Ref{ComplexF64}, Ref{ComplexF64}), + nthreads, nvec, m, n, alpha, beta, A.nzval, A.rowval, A.colptr, x, y) + else + error("Unsupported sparse matrix indexing integer type $Ti") + end + return p end # function Ac_mul_B! function Ac_mul_B!( alpha::ComplexF32, - A::SparseMatrixCSC{ComplexF32,Int64}, + A::SparseMatrixCSC{ComplexF32,Ti}, x::Array{ComplexF32}, beta::ComplexF32, y::Array{ComplexF32}, - nthreads::Int64=0 ) + nthreads::Int64=0 ) where Ti # Complex: y = beta*y + alpha * A'*x if nthreads == 0 @@ -122,18 +146,26 @@ function Ac_mul_B!( alpha::ComplexF32, throw(DimensionMismatch("length(y,2) != nvec")) end - p = ccall( (:ac_mul_b_cc_short_, spmatveclib), - Int64, ( Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF32}, Ptr{ComplexF32}, Ptr{ComplexF32}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF32}, Ptr{ComplexF32}), + if Ti == Int32 + p = ccall( (:ac_mul_b_cc_short_32_, spmatveclib), + Int64, ( Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF32}, Ptr{ComplexF32}, Ptr{ComplexF32}, Ptr{Int32}, Ptr{Int32}, Ptr{ComplexF32}, Ptr{ComplexF32}), Ref(nthreads), Ref(nvec), Ref(m), Ref(n), Ref(alpha), Ref(beta), convert(Ptr{ComplexF32}, pointer(A.nzval)), A.rowval, A.colptr, convert(Ptr{ComplexF32}, pointer(x)), convert(Ptr{ComplexF32}, pointer(y))); - + elseif Ti == Int64 + p = ccall( (:ac_mul_b_cc_short_64_, spmatveclib), + Int64, ( Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF32}, Ptr{ComplexF32}, Ptr{ComplexF32}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF32}, Ptr{ComplexF32}), + Ref(nthreads), Ref(nvec), Ref(m), Ref(n), Ref(alpha), Ref(beta), convert(Ptr{ComplexF32}, pointer(A.nzval)), A.rowval, A.colptr, convert(Ptr{ComplexF32}, pointer(x)), convert(Ptr{ComplexF32}, pointer(y))); + else + error("Unsupported sparse matrix indexing integer type $Ti") + end + return p end # function Ac_mul_B! function Ac_mul_B!( alpha::ComplexF32, - A::SparseMatrixCSC{Float32,Int64}, + A::SparseMatrixCSC{Float32,Ti}, x::Array{ComplexF32}, beta::ComplexF32, y::Array{ComplexF32}, - nthreads::Int64=0 ) + nthreads::Int64=0 ) where Ti # Complex: y = beta*y + alpha * A'*x if nthreads == 0 @@ -152,19 +184,27 @@ function Ac_mul_B!( alpha::ComplexF32, throw(DimensionMismatch("length(y,2) != nvec")) end - p = ccall( (:ac_mul_b_rc_short_, spmatveclib), - Int64, ( Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF32}, Ptr{ComplexF32}, Ptr{Float32}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF32}, Ptr{ComplexF32}), + if Ti == Int32 + p = ccall( (:ac_mul_b_rc_short_32_, spmatveclib), + Int64, ( Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF32}, Ptr{ComplexF32}, Ptr{Float32}, Ptr{Int32}, Ptr{Int32}, Ptr{ComplexF32}, Ptr{ComplexF32}), Ref(nthreads), Ref(nvec), Ref(m), Ref(n), Ref(alpha), Ref(beta), A.nzval, A.rowval, A.colptr, convert(Ptr{ComplexF32}, pointer(x)), convert(Ptr{ComplexF32}, pointer(y))); - + elseif Ti == Int64 + p = ccall( (:ac_mul_b_rc_short_64_, spmatveclib), + Int64, ( Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF32}, Ptr{ComplexF32}, Ptr{Float32}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF32}, Ptr{ComplexF32}), + Ref(nthreads), Ref(nvec), Ref(m), Ref(n), Ref(alpha), Ref(beta), A.nzval, A.rowval, A.colptr, convert(Ptr{ComplexF32}, pointer(x)), convert(Ptr{ComplexF32}, pointer(y))); + else + error("Unsupported sparse matrix indexing integer type $Ti") + end + return p end # function Ac_mul_B! function Ac_mul_B!( alpha::ComplexF64, - A::SparseMatrixCSC{ComplexF32,Int64}, + A::SparseMatrixCSC{ComplexF32,Ti}, x::Array{ComplexF64}, beta::ComplexF64, y::Array{ComplexF64}, - nthreads::Int64=0 ) + nthreads::Int64=0 ) where Ti # Complex: y = beta*y + alpha * A'*x if nthreads == 0 @@ -183,9 +223,17 @@ function Ac_mul_B!( alpha::ComplexF64, throw(DimensionMismatch("length(y,2) != nvec")) end - p = ccall( (:ac_mul_b_cc_mixed_, spmatveclib), - Int64, ( Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF64}, Ptr{ComplexF64}, Ptr{ComplexF32}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF64}, Ptr{ComplexF64}), + if Ti == Int32 + p = ccall( (:ac_mul_b_cc_mixed_32_, spmatveclib), + Int64, ( Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF64}, Ptr{ComplexF64}, Ptr{ComplexF32}, Ptr{Int32}, Ptr{Int32}, Ptr{ComplexF64}, Ptr{ComplexF64}), Ref(nthreads), Ref(nvec), Ref(m), Ref(n), Ref(alpha), Ref(beta), convert(Ptr{ComplexF32}, pointer(A.nzval)), A.rowval, A.colptr, convert(Ptr{ComplexF64}, pointer(x)), convert(Ptr{ComplexF64}, pointer(y))); - + elseif Ti == Int64 + p = ccall( (:ac_mul_b_cc_mixed_64_, spmatveclib), + Int64, ( Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF64}, Ptr{ComplexF64}, Ptr{ComplexF32}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF64}, Ptr{ComplexF64}), + Ref(nthreads), Ref(nvec), Ref(m), Ref(n), Ref(alpha), Ref(beta), convert(Ptr{ComplexF32}, pointer(A.nzval)), A.rowval, A.colptr, convert(Ptr{ComplexF64}, pointer(x)), convert(Ptr{ComplexF64}, pointer(y))); + else + error("Unsupported sparse matrix indexing integer type $Ti") + end + return p end # function Ac_mul_B! diff --git a/test/runtests.jl b/test/runtests.jl index 11d604d..8d90f3a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,2 +1,3 @@ +using Test, Printf include("test_A_mul_B.jl") -include("test_Ac_mul_B.jl") \ No newline at end of file +include("test_Ac_mul_B.jl") diff --git a/test/test_A_mul_B.jl b/test/test_A_mul_B.jl index fbfe1c6..6b010a4 100644 --- a/test/test_A_mul_B.jl +++ b/test/test_A_mul_B.jl @@ -1,4 +1,5 @@ - +@testset "A_mul_B" begin +for Ti in [Int32, Int64] using ParSpMatVec using Test using SparseArrays @@ -7,6 +8,7 @@ using LinearAlgebra n = 50000 nvec = 20 A = sprand(n,n, 2.e-6); +A = convert(SparseMatrixCSC{eltype(A),Ti}, A) numProcs = 4; x = rand(n,nvec); x = x*10 .- 5; @@ -100,6 +102,7 @@ ii,jj,vv = findnz(A) ai = ones(length(ii)); vv = vv + im*ai A = sparse(ii,jj, vv, n,n) +A = convert(SparseMatrixCSC{eltype(A),Ti}, A) ii=0; jj=0; vv=0; ai=0; xi = rand(n,nvec); xi = xi*10 .- 5; @@ -148,3 +151,6 @@ catch E @test isa(E,DimensionMismatch) end println() + +end # Ti loop +end # testset \ No newline at end of file diff --git a/test/test_Ac_mul_B.jl b/test/test_Ac_mul_B.jl index 60b8ef9..afaa761 100644 --- a/test/test_Ac_mul_B.jl +++ b/test/test_Ac_mul_B.jl @@ -1,4 +1,5 @@ - +@testset "Ac_mul_B" begin +for Ti in [Int32, Int64] using ParSpMatVec using Test using SparseArrays @@ -9,6 +10,7 @@ n = 50000 numProcs =4; nvec = 5 A = sprand(n,n, 2.e-6); +A = convert(SparseMatrixCSC{eltype(A),Ti}, A) x = rand(n,nvec); x = x*10 .- 5; y = rand(n,nvec); y = y*10 .- 5; @@ -99,6 +101,7 @@ ii,jj,vv = findnz(A) ai = ones(length(ii)); vv = vv + im*ai A = sparse(ii,jj, vv, n,n) +A = convert(SparseMatrixCSC{eltype(A),Ti}, A) ii=0; jj=0; vv=0; ai=0; xi = rand(n,nvec); xi = xi*10 .- 5; @@ -150,7 +153,7 @@ println() println("Complex short") alpha = convert(ComplexF32, alpha) beta = convert(ComplexF32,beta); -A = convert(SparseMatrixCSC{ComplexF32,Int64},A); +A = convert(SparseMatrixCSC{ComplexF32,Ti},A); x = convert(Array{ComplexF32},x); y = convert(Array{ComplexF32},y); @@ -190,7 +193,7 @@ println() println("Complex short with a real matrix") alpha = convert(ComplexF32, alpha) beta = convert(ComplexF32,beta); -A = convert(SparseMatrixCSC{Float32,Int64},real(A)); +A = convert(SparseMatrixCSC{Float32,Ti},real(A)); x = convert(Array{ComplexF32},x); y = convert(Array{ComplexF32},y); @@ -231,7 +234,7 @@ println() println("Complex single with a complex matrix but double target and source") alpha = convert(ComplexF64, alpha) beta = convert(ComplexF64,beta); -A = convert(SparseMatrixCSC{ComplexF32,Int64},real(A) + 1im*A); +A = convert(SparseMatrixCSC{ComplexF32,Ti},real(A) + 1im*A); x = convert(Array{ComplexF64},x); y = convert(Array{ComplexF64},y); @@ -251,4 +254,5 @@ for k=0:numProcs end println() - +end # End Ti loop +end # End testset \ No newline at end of file