diff --git a/.gitignore b/.gitignore
index 135ff58..182f48c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
 ParSpMatVec
+*.log
diff --git a/deps/src/A_mul_B.f90 b/deps/src/A_mul_B.f90
index 82a1c5c..fd965bb 100644
--- a/deps/src/A_mul_B.f90
+++ b/deps/src/A_mul_B.f90
@@ -1,92 +1,178 @@
-
-subroutine a_mul_b_rr( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y )
-!DIR$ ATTRIBUTES DLLEXPORT :: a_mul_b_rr
-!DIR$ ATTRIBUTES ALIAS: 'a_mul_b_rr_':: a_mul_b_rr
-
-! y = beta*y  +  alpha * A*x
-
-use omp_lib
-implicit none
-
-integer(kind=8),intent(in):: nthreads
-integer(kind=8),intent(in):: nvec
+
+subroutine a_mul_b_rr_32( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y )
+!DIR$ ATTRIBUTES DLLEXPORT :: a_mul_b_rr_32
+!DIR$ ATTRIBUTES ALIAS: 'a_mul_b_rr_32_':: a_mul_b_rr_32
+
+! y = beta*y  +  alpha * A*x
+
+use omp_lib
+implicit none
+
+integer(kind=8),intent(in):: nthreads
+integer(kind=8),intent(in):: nvec
 integer(kind=8),intent(in):: n  ! # of columns in A
 integer(kind=8),intent(in):: m  ! # of rows in A
-
-real(kind=8),intent(in):: alpha, beta
+
+real(kind=8),intent(in):: alpha, beta
+real(kind=8),intent(in):: A(*)
+integer(kind=4),intent(in):: jA(*), iA(n+1)
+real(kind=8),intent(in):: x(n,nvec)
+real(kind=8),intent(inout):: y(m,nvec)
+
+integer ivec, i, j1,j2, j, jaj, mythread, mm, jm
+real(kind=8) xi
+real(kind=8),allocatable:: yt(:)
+
+include "A_mul_B.fi"
+
+return
+end subroutine a_mul_b_rr_32
+
+subroutine a_mul_b_rr_64( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y )
+!DIR$ ATTRIBUTES DLLEXPORT :: a_mul_b_rr_64
+!DIR$ ATTRIBUTES ALIAS: 'a_mul_b_rr_64_':: a_mul_b_rr_64
+
+! y = beta*y  +  alpha * A*x
+
+use omp_lib
+implicit none
+
+integer(kind=8),intent(in):: nthreads
+integer(kind=8),intent(in):: nvec
+integer(kind=8),intent(in):: n  ! # of columns in A
+integer(kind=8),intent(in):: m  ! # of rows in A
+
+real(kind=8),intent(in):: alpha, beta
 real(kind=8),intent(in):: A(*)
 integer(kind=8),intent(in):: jA(*), iA(n+1)
 real(kind=8),intent(in):: x(n,nvec)
 real(kind=8),intent(inout):: y(m,nvec)
-
-integer ivec, i, j1,j2, j, jaj, mythread, mm, jm
-real(kind=8) xi
-real(kind=8),allocatable:: yt(:)
-
-include "A_mul_B.fi"
-
-return
-end subroutine a_mul_b_rr
-
-!--------------------------------------------------------------------
-
-subroutine a_mul_b_rc( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y )
-!DIR$ ATTRIBUTES DLLEXPORT :: a_mul_b_rc
-!DIR$ ATTRIBUTES ALIAS: 'a_mul_b_rc_':: a_mul_b_rc
-
-! y = beta*y  +  alpha * A*x
-
-use omp_lib
-implicit none
-
-integer(kind=8),intent(in):: nthreads
-integer(kind=8),intent(in):: nvec
+
+integer ivec, i, j1,j2, j, jaj, mythread, mm, jm
+real(kind=8) xi
+real(kind=8),allocatable:: yt(:)
+
+include "A_mul_B.fi"
+
+return
+end subroutine a_mul_b_rr_64
+
+!--------------------------------------------------------------------
+
+subroutine a_mul_b_rc_32( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y )
+!DIR$ ATTRIBUTES DLLEXPORT :: a_mul_b_rc_32
+!DIR$ ATTRIBUTES ALIAS: 'a_mul_b_rc_32_':: a_mul_b_rc_32
+
+! y = beta*y  +  alpha * A*x
+
+use omp_lib
+implicit none
+
+integer(kind=8),intent(in):: nthreads
+integer(kind=8),intent(in):: nvec
 integer(kind=8),intent(in):: n  ! # of columns in A
 integer(kind=8),intent(in):: m  ! # of rows in A
-
-complex(kind=8),intent(in):: alpha, beta
+
+complex(kind=8),intent(in):: alpha, beta
+real(kind=8),intent(in):: A(*)
+integer(kind=4),intent(in):: jA(*), iA(n+1)
+complex(kind=8),intent(in):: x(n,nvec)
+complex(kind=8),intent(inout):: y(m,nvec)
+
+integer ivec, i, j1,j2, j, jaj, mythread, mm, jm
+complex(kind=8) xi
+complex(kind=8),allocatable:: yt(:)
+
+include "A_mul_B.fi"
+
+return
+end subroutine a_mul_b_rc_32
+
+subroutine a_mul_b_rc_64( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y )
+!DIR$ ATTRIBUTES DLLEXPORT :: a_mul_b_rc_64
+!DIR$ ATTRIBUTES ALIAS: 'a_mul_b_rc_64_':: a_mul_b_rc_64
+
+! y = beta*y  +  alpha * A*x
+
+use omp_lib
+implicit none
+
+integer(kind=8),intent(in):: nthreads
+integer(kind=8),intent(in):: nvec
+integer(kind=8),intent(in):: n  ! # of columns in A
+integer(kind=8),intent(in):: m  ! # of rows in A
+
+complex(kind=8),intent(in):: alpha, beta
 real(kind=8),intent(in):: A(*)
 integer(kind=8),intent(in):: jA(*), iA(n+1)
 complex(kind=8),intent(in):: x(n,nvec)
 complex(kind=8),intent(inout):: y(m,nvec)
-
-integer ivec, i, j1,j2, j, jaj, mythread, mm, jm
-complex(kind=8) xi
-complex(kind=8),allocatable:: yt(:)
-
-include "A_mul_B.fi"
-
-return
-end subroutine a_mul_b_rc
-
-!--------------------------------------------------------------------
-
-subroutine a_mul_b_cc( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y )
-!DIR$ ATTRIBUTES DLLEXPORT :: a_mul_b_cc
-!DIR$ ATTRIBUTES ALIAS: 'a_mul_b_cc_':: a_mul_b_cc
-
-! y = beta*y  +  alpha * A*x
-
-use omp_lib
-implicit none
-
-integer(kind=8),intent(in):: nthreads
-integer(kind=8),intent(in):: nvec
+
+integer ivec, i, j1,j2, j, jaj, mythread, mm, jm
+complex(kind=8) xi
+complex(kind=8),allocatable:: yt(:)
+
+include "A_mul_B.fi"
+
+return
+end subroutine a_mul_b_rc_64
+
+!--------------------------------------------------------------------
+
+subroutine a_mul_b_cc_32( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y )
+!DIR$ ATTRIBUTES DLLEXPORT :: a_mul_b_cc_32
+!DIR$ ATTRIBUTES ALIAS: 'a_mul_b_cc_32_':: a_mul_b_cc_32
+
+! y = beta*y  +  alpha * A*x
+
+use omp_lib
+implicit none
+
+integer(kind=8),intent(in):: nthreads
+integer(kind=8),intent(in):: nvec
+integer(kind=8),intent(in):: n  ! # of columns in A
+integer(kind=8),intent(in):: m  ! # of rows in A
+
+complex(kind=8),intent(in):: alpha, beta
+complex(kind=8),intent(in):: A(*)
+integer(kind=4),intent(in):: jA(*), iA(n+1)
+complex(kind=8),intent(in):: x(n,nvec)
+complex(kind=8),intent(inout):: y(m,nvec)
+
+integer ivec, i, j1,j2, j, jaj, mythread, mm, jm
+complex(kind=8) xi
+complex(kind=8),allocatable:: yt(:)
+
+include "A_mul_B.fi"
+
+return
+end subroutine a_mul_b_cc_32
+
+subroutine a_mul_b_cc_64( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y )
+!DIR$ ATTRIBUTES DLLEXPORT :: a_mul_b_cc_64
+!DIR$ ATTRIBUTES ALIAS: 'a_mul_b_cc_64_':: a_mul_b_cc_64
+
+! y = beta*y  +  alpha * A*x
+
+use omp_lib
+implicit none
+
+integer(kind=8),intent(in):: nthreads
+integer(kind=8),intent(in):: nvec
 integer(kind=8),intent(in):: n  ! # of columns in A
 integer(kind=8),intent(in):: m  ! # of rows in A
-
-complex(kind=8),intent(in):: alpha, beta
+
+complex(kind=8),intent(in):: alpha, beta
 complex(kind=8),intent(in):: A(*)
 integer(kind=8),intent(in):: jA(*), iA(n+1)
 complex(kind=8),intent(in):: x(n,nvec)
 complex(kind=8),intent(inout):: y(m,nvec)
-
-integer ivec, i, j1,j2, j, jaj, mythread, mm, jm
-complex(kind=8) xi
-complex(kind=8),allocatable:: yt(:)
-
-include "A_mul_B.fi"
-
-return
-end subroutine a_mul_b_cc
-
+
+integer ivec, i, j1,j2, j, jaj, mythread, mm, jm
+complex(kind=8) xi
+complex(kind=8),allocatable:: yt(:)
+
+include "A_mul_B.fi"
+
+return
+end subroutine a_mul_b_cc_64
diff --git a/deps/src/Ac_mul_B.f90 b/deps/src/Ac_mul_B.f90
index e35f45a..fe13024 100644
--- a/deps/src/Ac_mul_B.f90
+++ b/deps/src/Ac_mul_B.f90
@@ -1,190 +1,356 @@
-
-   
-
-subroutine ac_mul_b_rr( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y )
-!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_rr
-!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_rr_':: ac_mul_b_rr
-
-! y = beta*y  + alpha * A'*x
-
-#undef CMPLXA
-implicit none
-
-integer(kind=8),intent(in):: nthreads
-integer(kind=8),intent(in):: nvec ! # of vectors
+
+   
+
+subroutine ac_mul_b_rr_32( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y )
+!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_rr_32
+!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_rr_32_':: ac_mul_b_rr_32
+
+! y = beta*y  + alpha * A'*x
+
+#undef CMPLXA
+implicit none
+
+integer(kind=8),intent(in):: nthreads
+integer(kind=8),intent(in):: nvec ! # of vectors
 integer(kind=8),intent(in):: n  ! # of columns in A
 integer(kind=8),intent(in):: m  ! # of rows in A
-
-real(kind=8),intent(in):: alpha, beta
+
+real(kind=8),intent(in):: alpha, beta
+real(kind=8),intent(in):: A(*)
+integer(kind=4),intent(in):: jA(*), iA(n+1)
+real(kind=8),intent(in):: x(m,nvec)
+real(kind=8),intent(inout):: y(n,nvec)
+
+integer ivec, i, j1,j2, j
+real(kind=8) t
+
+#include "Ac_mul_B.fi"
+
+return
+end subroutine ac_mul_b_rr_32
+
+subroutine ac_mul_b_rr_64( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y )
+!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_rr_64
+!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_rr_':: ac_mul_b_rr_64
+
+! y = beta*y  + alpha * A'*x
+
+#undef CMPLXA
+implicit none
+
+integer(kind=8),intent(in):: nthreads
+integer(kind=8),intent(in):: nvec ! # of vectors
+integer(kind=8),intent(in):: n  ! # of columns in A
+integer(kind=8),intent(in):: m  ! # of rows in A
+
+real(kind=8),intent(in):: alpha, beta
 real(kind=8),intent(in):: A(*)
 integer(kind=8),intent(in):: jA(*), iA(n+1)
 real(kind=8),intent(in):: x(m,nvec)
 real(kind=8),intent(inout):: y(n,nvec)
-
-integer(kind=8) ivec, i, j1,j2, j
-real(kind=8) t
-
-#include "Ac_mul_B.fi"
-
-return
-end subroutine ac_mul_b_rr
-
-!------------------------------------------------------------------------
-
-subroutine ac_mul_b_rc( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y )
-!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_rc
-!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_rc_':: ac_mul_b_rc
-
-! y = beta*y  + alpha * A'*x
-
-#undef CMPLXA
-implicit none
-
-integer(kind=8),intent(in):: nthreads
-integer(kind=8),intent(in):: nvec ! # of vectors
+
+integer(kind=8) ivec, i, j1,j2, j
+real(kind=8) t
+
+#include "Ac_mul_B.fi"
+
+return
+end subroutine ac_mul_b_rr_64
+
+!------------------------------------------------------------------------
+
+subroutine ac_mul_b_rc_32( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y )
+!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_rc_32
+!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_rc_32_':: ac_mul_b_rc_32
+
+! y = beta*y  + alpha * A'*x
+
+#undef CMPLXA
+implicit none
+
+integer(kind=8),intent(in):: nthreads
+integer(kind=8),intent(in):: nvec ! # of vectors
+integer(kind=8),intent(in):: n  ! # of columns in A
+integer(kind=8),intent(in):: m  ! # of rows in A
+
+complex(kind=8),intent(in):: alpha, beta
+real(kind=8),intent(in):: A(*)
+integer(kind=4),intent(in):: jA(*), iA(n+1)
+complex(kind=8),intent(in):: x(m,nvec)
+complex(kind=8),intent(inout):: y(n,nvec)
+
+integer ivec, i, j1,j2, j
+complex(kind=8) t
+
+#include "Ac_mul_B.fi"
+
+return
+end subroutine ac_mul_b_rc_32
+
+subroutine ac_mul_b_rc_64( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y )
+!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_rc_64
+!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_rc_64_':: ac_mul_b_rc_64
+
+! y = beta*y  + alpha * A'*x
+
+#undef CMPLXA
+implicit none
+
+integer(kind=8),intent(in):: nthreads
+integer(kind=8),intent(in):: nvec ! # of vectors
 integer(kind=8),intent(in):: n  ! # of columns in A
 integer(kind=8),intent(in):: m  ! # of rows in A
-
-complex(kind=8),intent(in):: alpha, beta
+
+complex(kind=8),intent(in):: alpha, beta
 real(kind=8),intent(in):: A(*)
 integer(kind=8),intent(in):: jA(*), iA(n+1)
 complex(kind=8),intent(in):: x(m,nvec)
 complex(kind=8),intent(inout):: y(n,nvec)
-
-integer(kind=8) ivec, i, j1,j2, j
-complex(kind=8) t
-
-#include "Ac_mul_B.fi"
-
-return
-end subroutine ac_mul_b_rc
-
-!------------------------------------------------------------------------
-
-subroutine ac_mul_b_cc( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y )
-!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_cc
-!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_cc_':: ac_mul_b_cc
-
-! y = beta*y  + alpha * A'*x
-
-#define CMPLXA
-implicit none
-
-integer(kind=8),intent(in):: nthreads
-integer(kind=8),intent(in):: nvec ! # of vectors
+
+integer(kind=8) ivec, i, j1,j2, j
+complex(kind=8) t
+
+#include "Ac_mul_B.fi"
+
+return
+end subroutine ac_mul_b_rc_64
+
+!------------------------------------------------------------------------
+
+subroutine ac_mul_b_cc_32( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y )
+!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_cc_32
+!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_cc_32_':: ac_mul_b_cc_32
+
+! y = beta*y  + alpha * A'*x
+
+#define CMPLXA
+implicit none
+
+integer(kind=8),intent(in):: nthreads
+integer(kind=8),intent(in):: nvec ! # of vectors
 integer(kind=8),intent(in):: n  ! # of columns in A
 integer(kind=8),intent(in):: m  ! # of rows in A
-
-complex(kind=8),intent(in):: alpha, beta
+
+complex(kind=8),intent(in):: alpha, beta
 complex(kind=8),intent(in):: A(*)
+integer(kind=4),intent(in):: jA(*), iA(n+1)
+complex(kind=8),intent(in):: x(m,nvec)
+complex(kind=8),intent(inout):: y(n,nvec)
+
+integer ivec, i, j1,j2, j
+complex(kind=8) t
+
+#include "Ac_mul_B.fi"
+
+return
+end subroutine ac_mul_b_cc_32
+
+subroutine ac_mul_b_cc_64( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y )
+!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_cc_64
+!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_cc_64_':: ac_mul_b_cc_64
+
+! y = beta*y  + alpha * A'*x
+
+#define CMPLXA
+implicit none
+
+integer(kind=8),intent(in):: nthreads
+integer(kind=8),intent(in):: nvec ! # of vectors
+integer(kind=8),intent(in):: n  ! # of columns in A
+integer(kind=8),intent(in):: m  ! # of rows in A
+
+complex(kind=8),intent(in):: alpha, beta
+complex(kind=8),intent(in):: A(*)
+integer(kind=8),intent(in):: jA(*), iA(n+1)
+complex(kind=8),intent(in):: x(m,nvec)
+complex(kind=8),intent(inout):: y(n,nvec)
+
+integer(kind=8) ivec, i, j1,j2, j
+complex(kind=8) t
+
+#include "Ac_mul_B.fi"
+
+return
+end subroutine ac_mul_b_cc_64
+
+subroutine ac_mul_b_cc_short_64( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y )
+!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_cc_short_64
+!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_cc_short_64_':: ac_mul_b_cc_short_64
+
+! y = beta*y  + alpha * A'*x
+
+#define CMPLXA
+implicit none
+
+integer(kind=8),intent(in):: nthreads
+integer(kind=8),intent(in):: nvec ! # of vectors
+integer(kind=8),intent(in):: n  ! # of columns in A
+integer(kind=8),intent(in):: m  ! # of rows in A
+
+complex(kind=4),intent(in):: alpha, beta
+complex(kind=4),intent(in):: A(*)
+integer(kind=8),intent(in):: jA(*), iA(n+1)
+complex(kind=4),intent(in):: x(m,nvec)
+complex(kind=4),intent(inout):: y(n,nvec)
+
+
+
+integer(kind=8) ivec, i, j1,j2, j
+complex(kind=8) t
+
+#include "Ac_mul_B.fi"
+
+return
+end subroutine ac_mul_b_cc_short_64
+
+subroutine ac_mul_b_cc_short_32( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y )
+!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_cc_short_32
+!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_cc_short_32_':: ac_mul_b_cc_short_32
+
+! y = beta*y  + alpha * A'*x
+
+#define CMPLXA
+implicit none
+
+integer(kind=8),intent(in):: nthreads
+integer(kind=8),intent(in):: nvec ! # of vectors
+integer(kind=8),intent(in):: n  ! # of columns in A
+integer(kind=8),intent(in):: m  ! # of rows in A
+
+complex(kind=4),intent(in):: alpha, beta
+complex(kind=4),intent(in):: A(*)
+integer(kind=4),intent(in):: jA(*), iA(n+1)
+complex(kind=4),intent(in):: x(m,nvec)
+complex(kind=4),intent(inout):: y(n,nvec)
+
+
+
+integer(kind=8) ivec, i, j1,j2, j
+complex(kind=8) t
+
+#include "Ac_mul_B.fi"
+
+return
+end subroutine ac_mul_b_cc_short_32
+
+subroutine ac_mul_b_rc_short_64( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y )
+!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_rc_short_64
+!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_rc_short_64_':: ac_mul_b_rc_short_64
+
+! y = beta*y  + alpha * A'*x
+
+#undef CMPLXA
+implicit none
+
+integer(kind=8),intent(in):: nthreads
+integer(kind=8),intent(in):: nvec ! # of vectors
+integer(kind=8),intent(in):: n  ! # of columns in A
+integer(kind=8),intent(in):: m  ! # of rows in A
+
+complex(kind=4),intent(in):: alpha, beta
+real(kind=4),intent(in):: A(*)
+integer(kind=8),intent(in):: jA(*), iA(n+1)
+complex(kind=4),intent(in):: x(m,nvec)
+complex(kind=4),intent(inout):: y(n,nvec)
+
+
+
+integer(kind=8) ivec, i, j1,j2, j
+complex(kind=8) t
+
+#include "Ac_mul_B.fi"
+
+return
+end subroutine ac_mul_b_rc_short_64
+
+subroutine ac_mul_b_rc_short_32( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y )
+!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_rc_short_32
+!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_rc_short_32_':: ac_mul_b_rc_short_32
+
+! y = beta*y  + alpha * A'*x
+
+#undef CMPLXA
+implicit none
+
+integer(kind=8),intent(in):: nthreads
+integer(kind=8),intent(in):: nvec ! # of vectors
+integer(kind=8),intent(in):: n  ! # of columns in A
+integer(kind=8),intent(in):: m  ! # of rows in A
+
+complex(kind=4),intent(in):: alpha, beta
+real(kind=4),intent(in):: A(*)
+integer(kind=4),intent(in):: jA(*), iA(n+1)
+complex(kind=4),intent(in):: x(m,nvec)
+complex(kind=4),intent(inout):: y(n,nvec)
+
+
+
+integer(kind=8) ivec, i, j1,j2, j
+complex(kind=8) t
+
+#include "Ac_mul_B.fi"
+
+return
+end subroutine ac_mul_b_rc_short_32
+
+subroutine ac_mul_b_cc_mixed_64( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y )
+!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_cc_mixed_64
+!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_cc_mixed_64_':: ac_mul_b_cc_mixed_64
+
+! y = beta*y  + alpha * A'*x
+
+#define CMPLXA
+implicit none
+
+integer(kind=8),intent(in):: nthreads
+integer(kind=8),intent(in):: nvec ! # of vectors
+integer(kind=8),intent(in):: n  ! # of columns in A
+integer(kind=8),intent(in):: m  ! # of rows in A
+
+complex(kind=8),intent(in):: alpha
+complex(kind=8),intent(in):: beta
+complex(kind=4),intent(in):: A(*)
 integer(kind=8),intent(in):: jA(*), iA(n+1)
-complex(kind=8),intent(in):: x(m,nvec)
-complex(kind=8),intent(inout):: y(n,nvec)
-
-
-
-integer(kind=8) ivec, i, j1,j2, j
-complex(kind=8) t
-
-#include "Ac_mul_B.fi"
-
-return
-end subroutine ac_mul_b_cc
-
-
-
-
-
-
-subroutine ac_mul_b_cc_short( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y )
-!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_cc_short
-!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_cc_short_':: ac_mul_b_cc_short
-
-! y = beta*y  + alpha * A'*x
-
-#define CMPLXA
-implicit none
-
-integer(kind=8),intent(in):: nthreads
-integer(kind=8),intent(in):: nvec ! # of vectors
-integer(kind=8),intent(in):: n  ! # of columns in A
-integer(kind=8),intent(in):: m  ! # of rows in A
-
-complex(kind=4),intent(in):: alpha, beta
-complex(kind=4),intent(in):: A(*)
-integer(kind=8),intent(in):: jA(*), iA(n+1)
-complex(kind=4),intent(in):: x(m,nvec)
-complex(kind=4),intent(inout):: y(n,nvec)
-
-
-
-integer(kind=8) ivec, i, j1,j2, j
-complex(kind=8) t
-
-#include "Ac_mul_B.fi"
-
-return
-end subroutine ac_mul_b_cc_short
-
-subroutine ac_mul_b_rc_short( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y )
-!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_rc_short
-!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_rc_short_':: ac_mul_b_rc_short
-
-! y = beta*y  + alpha * A'*x
-
-#undef CMPLXA
-implicit none
-
-integer(kind=8),intent(in):: nthreads
-integer(kind=8),intent(in):: nvec ! # of vectors
-integer(kind=8),intent(in):: n  ! # of columns in A
-integer(kind=8),intent(in):: m  ! # of rows in A
-
-complex(kind=4),intent(in):: alpha, beta
-real(kind=4),intent(in):: A(*)
-integer(kind=8),intent(in):: jA(*), iA(n+1)
-complex(kind=4),intent(in):: x(m,nvec)
-complex(kind=4),intent(inout):: y(n,nvec)
-
-
-
-integer(kind=8) ivec, i, j1,j2, j
-complex(kind=8) t
-
-#include "Ac_mul_B.fi"
-
-return
-end subroutine ac_mul_b_rc_short
-
-
-
-subroutine ac_mul_b_cc_mixed( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y )
-!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_cc_mixed
-!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_cc_mixed_':: ac_mul_b_cc_mixed
-
-! y = beta*y  + alpha * A'*x
-
-#define CMPLXA
-implicit none
-
-integer(kind=8),intent(in):: nthreads
-integer(kind=8),intent(in):: nvec ! # of vectors
-integer(kind=8),intent(in):: n  ! # of columns in A
-integer(kind=8),intent(in):: m  ! # of rows in A
-
-complex(kind=8),intent(in):: alpha
-complex(kind=8),intent(in):: beta
-complex(kind=4),intent(in):: A(*)
-integer(kind=8),intent(in):: jA(*), iA(n+1)
-complex(kind=8),intent(in):: x(m,nvec)
-complex(kind=8),intent(inout):: y(n,nvec)
-
-
-
-integer(kind=8) ivec, i, j1,j2, j
-complex(kind=8) t
-
-#include "Ac_mul_B.fi"
-
-return
-end subroutine ac_mul_b_cc_mixed
+complex(kind=8),intent(in):: x(m,nvec)
+complex(kind=8),intent(inout):: y(n,nvec)
+
+
+
+integer(kind=8) ivec, i, j1,j2, j
+complex(kind=8) t
+
+#include "Ac_mul_B.fi"
+
+return
+end subroutine ac_mul_b_cc_mixed_64
+
+subroutine ac_mul_b_cc_mixed_32( nthreads, nvec, n, m, alpha, beta, A, jA, iA, x, y )
+!DIR$ ATTRIBUTES DLLEXPORT :: ac_mul_b_cc_mixed_32
+!DIR$ ATTRIBUTES ALIAS: 'ac_mul_b_cc_mixed_32_':: ac_mul_b_cc_mixed_32
+
+! y = beta*y  + alpha * A'*x
+
+#define CMPLXA
+implicit none
+
+integer(kind=8),intent(in):: nthreads
+integer(kind=8),intent(in):: nvec ! # of vectors
+integer(kind=8),intent(in):: n  ! # of columns in A
+integer(kind=8),intent(in):: m  ! # of rows in A
+
+complex(kind=8),intent(in):: alpha
+complex(kind=8),intent(in):: beta
+complex(kind=4),intent(in):: A(*)
+integer(kind=4),intent(in):: jA(*), iA(n+1)
+complex(kind=8),intent(in):: x(m,nvec)
+complex(kind=8),intent(inout):: y(n,nvec)
+
+
+
+integer(kind=8) ivec, i, j1,j2, j
+complex(kind=8) t
+
+#include "Ac_mul_B.fi"
+
+return
+end subroutine ac_mul_b_cc_mixed_32
\ No newline at end of file
diff --git a/src/A_mul_B.jl b/src/A_mul_B.jl
index d8e1f64..b14e9d0 100644
--- a/src/A_mul_B.jl
+++ b/src/A_mul_B.jl
@@ -2,11 +2,11 @@
 export A_mul_B!
 
 function A_mul_B!( alpha::Float64,
-                   A::SparseMatrixCSC{Float64,Int},
+                   A::SparseMatrixCSC{Float64,Ti},
                    x::Array{Float64},
                    beta::Float64,
                    y::Array{Float64},
-                   nthreads::Int64=0 )
+                   nthreads::Int64=0 ) where Ti
 # Real:  y = beta*y  +  alpha * A*x 
 
    if nthreads == 0
@@ -27,20 +27,28 @@ function A_mul_B!( alpha::Float64,
    end
    
    
-	p  = ccall( (:a_mul_b_rr_, spmatveclib),
-		 Int64, ( Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Ptr{Int64}, Ptr{Int64}, Ptr{Float64}, Ptr{Float64}),
-                Ref(nthreads), Ref(nvec), Ref(m), Ref(n),    Ref(alpha),   Ref(beta),              A.nzval,      A.rowval,   A.colptr,   x,   y);
-   
+	if Ti == Int32
+      p = ccall( (:a_mul_b_rr_32_, spmatveclib),
+		       Nothing, ( Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{Float64}, Ref{Float64}, Ref{Float64}, Ref{Int32}, Ref{Int32}, Ref{Float64}, Ref{Float64}),
+             nthreads, nvec, m, n,    alpha,   beta,              A.nzval,      A.rowval,   A.colptr,   x,   y)
+   elseif Ti == Int64
+      p = ccall( (:a_mul_b_rr_64_, spmatveclib),
+		       Nothing, ( Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{Float64}, Ref{Float64}, Ref{Float64}, Ref{Int64}, Ref{Int64}, Ref{Float64}, Ref{Float64}),
+             nthreads, nvec, m, n,    alpha,   beta,              A.nzval,      A.rowval,   A.colptr,   x,   y) 
+   else
+      error("Unsupported sparse matrix indexing integer type $Ti")
+   end
+   return p
 end  # function A_mul_B!
 
 #------------------------------------------------------------------------------
 
 function A_mul_B!( alpha::ComplexF64,
-                   A::SparseMatrixCSC{Float64,Int},
+                   A::SparseMatrixCSC{Float64,Ti},
                    x::Array{ComplexF64},
                    beta::ComplexF64,
                    y::Array{ComplexF64},
-                   nthreads::Int64=0 )
+                   nthreads::Int64=0 ) where Ti
 # Real, Complex A:  y = beta*y  +  alpha * A*x 
 
    if nthreads == 0
@@ -59,20 +67,28 @@ function A_mul_B!( alpha::ComplexF64,
       throw(DimensionMismatch("length(y,2) != nvec"))
    end
    
-	p  = ccall( (:a_mul_b_rc_, spmatveclib),
-		 Int64, ( Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF64}, Ptr{ComplexF64}, Ptr{Float64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF64}, Ptr{ComplexF64}),
-                   Ref(nthreads), Ref(nvec), Ref(m), Ref(n),     Ref(alpha),   Ref(beta),              A.nzval,      A.rowval,   A.colptr,   convert(Ptr{ComplexF64}, pointer(x)),  convert(Ptr{ComplexF64}, pointer(y)));
-   
+	if Ti == Int32
+	   p = ccall( (:a_mul_b_rc_32_, spmatveclib),
+		 Nothing, ( Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{ComplexF64}, Ref{ComplexF64}, Ref{Float64}, Ref{Int32}, Ref{Int32}, Ref{ComplexF64}, Ref{ComplexF64}),
+                   nthreads, nvec, m, n,     alpha,   beta,              A.nzval,      A.rowval,   A.colptr,   x,  y)
+   elseif Ti == Int64
+      p = ccall( (:a_mul_b_rc_64_, spmatveclib),
+		 Nothing, ( Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{ComplexF64}, Ref{ComplexF64}, Ref{Float64}, Ref{Int64}, Ref{Int64}, Ref{ComplexF64}, Ref{ComplexF64}),
+                   nthreads, nvec, m, n,     alpha,   beta,              A.nzval,      A.rowval,   A.colptr,   x,  y)
+   else
+      error("Unsupported sparse matrix indexing integer type $Ti")
+   end
+   return p
 end  # function A_mul_B!
 
 #------------------------------------------------------------------------------
 
 function A_mul_B!( alpha::ComplexF64,
-                   A::SparseMatrixCSC{ComplexF64,Int},
+                   A::SparseMatrixCSC{ComplexF64,Ti},
                    x::Array{ComplexF64},
                    beta::ComplexF64,
                    y::Array{ComplexF64},
-                   nthreads::Int64=0 )
+                   nthreads::Int64=0 ) where Ti
 # Complex A:  y = beta*y  +  alpha * A*x 
 
    if nthreads == 0
@@ -91,8 +107,16 @@ function A_mul_B!( alpha::ComplexF64,
       throw(DimensionMismatch("length(y,2) != nvec"))
    end
    
-	p  = ccall( (:a_mul_b_cc_, spmatveclib),
-		 Int64, ( Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF64}, Ptr{ComplexF64}, Ptr{ComplexF64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF64}, Ptr{ComplexF64}),
-                   Ref(nthreads), Ref(nvec), Ref(m), Ref(n),     Ref(alpha),   Ref(beta),              convert(Ptr{ComplexF64}, pointer(A.nzval)),      A.rowval,   A.colptr,   convert(Ptr{ComplexF64}, pointer(x)),  convert(Ptr{ComplexF64}, pointer(y)));
-   
+	if Ti == Int32
+	   p = ccall( (:a_mul_b_cc_32_, spmatveclib),
+		   Nothing, ( Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{ComplexF64}, Ref{ComplexF64}, Ref{ComplexF64}, Ref{Int32}, Ref{Int32}, Ref{ComplexF64}, Ref{ComplexF64}),
+                   nthreads, nvec, m, n,     alpha,   beta,            A.nzval,      A.rowval,   A.colptr,  x,  y)
+   elseif Ti == Int64
+      p = ccall( (:a_mul_b_cc_64_, spmatveclib),
+		 Nothing, ( Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{ComplexF64}, Ref{ComplexF64}, Ref{ComplexF64}, Ref{Int64}, Ref{Int64}, Ref{ComplexF64}, Ref{ComplexF64}),
+                   nthreads, nvec, m, n,     alpha,   beta,            A.nzval,      A.rowval,   A.colptr,  x,  y)
+   else
+      error("Unsupported sparse matrix indexing integer type $Ti")
+   end
+   return p
 end  # function A_mul_B!
diff --git a/src/Ac_mul_B.jl b/src/Ac_mul_B.jl
index 8728516..69a8e51 100644
--- a/src/Ac_mul_B.jl
+++ b/src/Ac_mul_B.jl
@@ -2,11 +2,11 @@
 export Ac_mul_B!
 
 function Ac_mul_B!( alpha::Float64,
-                    A::SparseMatrixCSC{Float64,Int},
+                    A::SparseMatrixCSC{Float64,Ti},
                     x::Array{Float64},
                     beta::Float64,
                     y::Array{Float64},
-                    nthreads::Int64=0 )
+                    nthreads::Int64=0 ) where Ti
 # Real:  y = beta*y  +  alpha * A'*x 
 
    if nthreads == 0
@@ -26,20 +26,28 @@ function Ac_mul_B!( alpha::Float64,
       throw(DimensionMismatch("length(y,2) != nvec"))
    end
    
-	p  = ccall( (:ac_mul_b_rr_, spmatveclib),
-		 Int64, ( Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Ptr{Int64}, Ptr{Int64}, Ptr{Float64}, Ptr{Float64}),
-                Ref(nthreads), Ref(nvec), Ref(m), Ref(n),    Ref(alpha),   Ref(beta),              A.nzval,      A.rowval,   A.colptr,   x,   y);
-   
+	if Ti == Int32
+      p = ccall( (:ac_mul_b_rr_32_, spmatveclib),
+      Nothing, ( Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{Float64}, Ref{Float64}, Ref{Float64}, Ref{Int32}, Ref{Int32}, Ref{Float64}, Ref{Float64}),
+              nthreads, nvec, m, n,    alpha,   beta,              A.nzval,      A.rowval,   A.colptr,   x,   y)
+   elseif Ti == Int64
+      p = ccall( (:ac_mul_b_rr_64_, spmatveclib),
+      Nothing, ( Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{Float64}, Ref{Float64}, Ref{Float64}, Ref{Int64}, Ref{Int64}, Ref{Float64}, Ref{Float64}),
+              nthreads, nvec, m, n,    alpha,   beta,              A.nzval,      A.rowval,   A.colptr,   x,   y)
+   else
+      error("Unsupported sparse matrix indexing integer type $Ti")
+   end
+   return p
 end  # function Ac_mul_B!
 
 #------------------------------------------------------------------------------
 
 function Ac_mul_B!( alpha::ComplexF64,
-                    A::SparseMatrixCSC{Float64,Int},
+                    A::SparseMatrixCSC{Float64,Ti},
                     x::Array{ComplexF64},
                     beta::ComplexF64,
                     y::Array{ComplexF64},
-                    nthreads::Int64=0 )
+                    nthreads::Int64=0 ) where Ti
 # Real, Complex A:  y = beta*y  +  alpha * A'*x 
 
    if nthreads == 0
@@ -58,20 +66,28 @@ function Ac_mul_B!( alpha::ComplexF64,
       throw(DimensionMismatch("length(y,2) != nvec"))
    end
    
-	p  = ccall( (:ac_mul_b_rc_, spmatveclib),
-		 Int64, ( Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF64}, Ptr{ComplexF64}, Ptr{Float64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF64}, Ptr{ComplexF64}),
-                   Ref(nthreads), Ref(nvec), Ref(m), Ref(n),     Ref(alpha),   Ref(beta),              A.nzval,      A.rowval,   A.colptr,   convert(Ptr{ComplexF64}, pointer(x)),  convert(Ptr{ComplexF64}, pointer(y)));
-   
+	if Ti == Int32
+	   p = ccall( (:ac_mul_b_rc_32_, spmatveclib),
+		 Nothing, ( Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{ComplexF64}, Ref{ComplexF64}, Ref{Float64}, Ref{Int32}, Ref{Int32}, Ref{ComplexF64}, Ref{ComplexF64}),
+                   nthreads, nvec, m, n,     alpha,   beta,          A.nzval,      A.rowval,   A.colptr,   x,  y)
+   elseif Ti == Int64
+      p = ccall( (:ac_mul_b_rc_64_, spmatveclib),
+		 Nothing, ( Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{ComplexF64}, Ref{ComplexF64}, Ref{Float64}, Ref{Int64}, Ref{Int64}, Ref{ComplexF64}, Ref{ComplexF64}),
+                   nthreads, nvec, m, n,     alpha,   beta,          A.nzval,      A.rowval,   A.colptr,   x,  y)
+   else
+      error("Unsupported sparse matrix indexing integer type $Ti")
+   end
+   return p
 end  # function Ac_mul_B!
 
 #------------------------------------------------------------------------------
 
 function Ac_mul_B!( alpha::ComplexF64,
-                    A::SparseMatrixCSC{ComplexF64,Int},
+                    A::SparseMatrixCSC{ComplexF64,Ti},
                     x::Array{ComplexF64},
                     beta::ComplexF64,
                     y::Array{ComplexF64},
-                    nthreads::Int64=0 )
+                    nthreads::Int64=0 ) where Ti
 # Complex:  y = beta*y  +  alpha * A'*x 
 
    if nthreads == 0
@@ -91,19 +107,27 @@ function Ac_mul_B!( alpha::ComplexF64,
       throw(DimensionMismatch("length(y,2) != nvec"))
    end
    
-	p  = ccall( (:ac_mul_b_cc_, spmatveclib),
-		 Int64, ( Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF64}, Ptr{ComplexF64}, Ptr{ComplexF64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF64}, Ptr{ComplexF64}),
-                   Ref(nthreads), Ref(nvec), Ref(m), Ref(n),     Ref(alpha),   Ref(beta),              convert(Ptr{ComplexF64}, pointer(A.nzval)),      A.rowval,   A.colptr,   convert(Ptr{ComplexF64}, pointer(x)),  convert(Ptr{ComplexF64}, pointer(y)));
-   
+	if Ti == Int32
+      p = ccall( (:ac_mul_b_cc_32_, spmatveclib),
+		 Nothing, ( Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{ComplexF64}, Ref{ComplexF64}, Ref{ComplexF64}, Ref{Int32}, Ref{Int32}, Ref{ComplexF64}, Ref{ComplexF64}),
+                   nthreads, nvec, m, n,     alpha,   beta,           A.nzval,      A.rowval,   A.colptr,   x,  y)
+   elseif Ti == Int64
+      p = ccall( (:ac_mul_b_cc_64_, spmatveclib),
+		 Nothing, ( Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{Int64}, Ref{ComplexF64}, Ref{ComplexF64}, Ref{ComplexF64}, Ref{Int64}, Ref{Int64}, Ref{ComplexF64}, Ref{ComplexF64}),
+                   nthreads, nvec, m, n,     alpha,   beta,           A.nzval,      A.rowval,   A.colptr,   x,  y)
+   else
+      error("Unsupported sparse matrix indexing integer type $Ti")
+   end
+   return p
 end  # function Ac_mul_B!
 
 
 function Ac_mul_B!( alpha::ComplexF32,
-                    A::SparseMatrixCSC{ComplexF32,Int64},
+                    A::SparseMatrixCSC{ComplexF32,Ti},
                     x::Array{ComplexF32},
                     beta::ComplexF32,
                     y::Array{ComplexF32},
-                    nthreads::Int64=0 )
+                    nthreads::Int64=0 ) where Ti
 # Complex:  y = beta*y  +  alpha * A'*x 
 
    if nthreads == 0
@@ -122,18 +146,26 @@ function Ac_mul_B!( alpha::ComplexF32,
       throw(DimensionMismatch("length(y,2) != nvec"))
    end
    
-	p  = ccall( (:ac_mul_b_cc_short_, spmatveclib),
-		 Int64, ( Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF32}, Ptr{ComplexF32}, Ptr{ComplexF32}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF32}, Ptr{ComplexF32}),
+   if Ti == Int32
+	   p  = ccall( (:ac_mul_b_cc_short_32_, spmatveclib),
+		    Int64, ( Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF32}, Ptr{ComplexF32}, Ptr{ComplexF32}, Ptr{Int32}, Ptr{Int32}, Ptr{ComplexF32}, Ptr{ComplexF32}),
                    Ref(nthreads), Ref(nvec), Ref(m), Ref(n),     Ref(alpha),   Ref(beta),              convert(Ptr{ComplexF32}, pointer(A.nzval)),      A.rowval,   A.colptr,   convert(Ptr{ComplexF32}, pointer(x)),  convert(Ptr{ComplexF32}, pointer(y)));
-   
+   elseif Ti == Int64
+      p  = ccall( (:ac_mul_b_cc_short_64_, spmatveclib),
+		    Int64, ( Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF32}, Ptr{ComplexF32}, Ptr{ComplexF32}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF32}, Ptr{ComplexF32}),
+                   Ref(nthreads), Ref(nvec), Ref(m), Ref(n),     Ref(alpha),   Ref(beta),              convert(Ptr{ComplexF32}, pointer(A.nzval)),      A.rowval,   A.colptr,   convert(Ptr{ComplexF32}, pointer(x)),  convert(Ptr{ComplexF32}, pointer(y)));
+   else
+      error("Unsupported sparse matrix indexing integer type $Ti")
+   end
+   return p 
 end  # function Ac_mul_B!
 
 function Ac_mul_B!( alpha::ComplexF32,
-                    A::SparseMatrixCSC{Float32,Int64},
+                    A::SparseMatrixCSC{Float32,Ti},
                     x::Array{ComplexF32},
                     beta::ComplexF32,
                     y::Array{ComplexF32},
-                    nthreads::Int64=0 )
+                    nthreads::Int64=0 ) where Ti
 # Complex:  y = beta*y  +  alpha * A'*x 
 
    if nthreads == 0
@@ -152,19 +184,27 @@ function Ac_mul_B!( alpha::ComplexF32,
       throw(DimensionMismatch("length(y,2) != nvec"))
    end
    
-	p  = ccall( (:ac_mul_b_rc_short_, spmatveclib),
-		 Int64, ( Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF32}, Ptr{ComplexF32}, Ptr{Float32}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF32}, Ptr{ComplexF32}),
+   if Ti == Int32
+	   p  = ccall( (:ac_mul_b_rc_short_32_, spmatveclib),
+		    Int64, ( Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF32}, Ptr{ComplexF32}, Ptr{Float32}, Ptr{Int32}, Ptr{Int32}, Ptr{ComplexF32}, Ptr{ComplexF32}),
                    Ref(nthreads), Ref(nvec), Ref(m), Ref(n),     Ref(alpha),   Ref(beta),  A.nzval,      A.rowval,   A.colptr,   convert(Ptr{ComplexF32}, pointer(x)),  convert(Ptr{ComplexF32}, pointer(y)));
-   
+   elseif Ti == Int64
+      p  = ccall( (:ac_mul_b_rc_short_64_, spmatveclib),
+		    Int64, ( Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF32}, Ptr{ComplexF32}, Ptr{Float32}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF32}, Ptr{ComplexF32}),
+                   Ref(nthreads), Ref(nvec), Ref(m), Ref(n),     Ref(alpha),   Ref(beta),  A.nzval,      A.rowval,   A.colptr,   convert(Ptr{ComplexF32}, pointer(x)),  convert(Ptr{ComplexF32}, pointer(y)));
+   else
+      error("Unsupported sparse matrix indexing integer type $Ti")
+   end
+   return p
 end  # function Ac_mul_B!
 
 
 function Ac_mul_B!( alpha::ComplexF64,
-                    A::SparseMatrixCSC{ComplexF32,Int64},
+                    A::SparseMatrixCSC{ComplexF32,Ti},
                     x::Array{ComplexF64},
                     beta::ComplexF64,
                     y::Array{ComplexF64},
-                    nthreads::Int64=0 )
+                    nthreads::Int64=0 ) where Ti
 # Complex:  y = beta*y  +  alpha * A'*x 
 
    if nthreads == 0
@@ -183,9 +223,17 @@ function Ac_mul_B!( alpha::ComplexF64,
       throw(DimensionMismatch("length(y,2) != nvec"))
    end
    
-	p  = ccall( (:ac_mul_b_cc_mixed_, spmatveclib),
-		 Int64, ( Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF64}, Ptr{ComplexF64}, Ptr{ComplexF32}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF64}, Ptr{ComplexF64}),
+   if Ti == Int32
+	   p  = ccall( (:ac_mul_b_cc_mixed_32_, spmatveclib),
+		    Int64, ( Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF64}, Ptr{ComplexF64}, Ptr{ComplexF32}, Ptr{Int32}, Ptr{Int32}, Ptr{ComplexF64}, Ptr{ComplexF64}),
                    Ref(nthreads), Ref(nvec), Ref(m), Ref(n),     Ref(alpha),   Ref(beta),              convert(Ptr{ComplexF32}, pointer(A.nzval)),      A.rowval,   A.colptr,   convert(Ptr{ComplexF64}, pointer(x)),  convert(Ptr{ComplexF64}, pointer(y)));
-   
+   elseif Ti == Int64
+      p  = ccall( (:ac_mul_b_cc_mixed_64_, spmatveclib),
+		    Int64, ( Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF64}, Ptr{ComplexF64}, Ptr{ComplexF32}, Ptr{Int64}, Ptr{Int64}, Ptr{ComplexF64}, Ptr{ComplexF64}),
+                   Ref(nthreads), Ref(nvec), Ref(m), Ref(n),     Ref(alpha),   Ref(beta),              convert(Ptr{ComplexF32}, pointer(A.nzval)),      A.rowval,   A.colptr,   convert(Ptr{ComplexF64}, pointer(x)),  convert(Ptr{ComplexF64}, pointer(y)));
+   else
+      error("Unsupported sparse matrix indexing integer type $Ti")
+   end
+   return p
 end  # function Ac_mul_B!
 
diff --git a/test/runtests.jl b/test/runtests.jl
index 11d604d..8d90f3a 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,2 +1,3 @@
+using Test, Printf
 include("test_A_mul_B.jl")
-include("test_Ac_mul_B.jl")
\ No newline at end of file
+include("test_Ac_mul_B.jl")
diff --git a/test/test_A_mul_B.jl b/test/test_A_mul_B.jl
index fbfe1c6..6b010a4 100644
--- a/test/test_A_mul_B.jl
+++ b/test/test_A_mul_B.jl
@@ -1,4 +1,5 @@
-
+@testset "A_mul_B" begin
+for Ti in [Int32, Int64]
 using ParSpMatVec
 using Test
 using SparseArrays
@@ -7,6 +8,7 @@ using LinearAlgebra
 n = 50000
 nvec = 20
 A = sprand(n,n, 2.e-6);
+A = convert(SparseMatrixCSC{eltype(A),Ti}, A)
 numProcs = 4;
 
 x = rand(n,nvec);  x = x*10 .- 5;
@@ -100,6 +102,7 @@ ii,jj,vv = findnz(A)
 ai = ones(length(ii));
 vv = vv + im*ai
 A = sparse(ii,jj, vv, n,n)
+A = convert(SparseMatrixCSC{eltype(A),Ti}, A)
 ii=0; jj=0; vv=0; ai=0;
 
 xi = rand(n,nvec);  xi = xi*10 .- 5;
@@ -148,3 +151,6 @@ catch E
 	@test isa(E,DimensionMismatch)
 end
 println()
+
+end # Ti loop
+end # testset
\ No newline at end of file
diff --git a/test/test_Ac_mul_B.jl b/test/test_Ac_mul_B.jl
index 60b8ef9..afaa761 100644
--- a/test/test_Ac_mul_B.jl
+++ b/test/test_Ac_mul_B.jl
@@ -1,4 +1,5 @@
-
+@testset "Ac_mul_B" begin
+for Ti in [Int32, Int64]
 using ParSpMatVec
 using Test
 using SparseArrays
@@ -9,6 +10,7 @@ n = 50000
 numProcs =4;
 nvec = 5
 A = sprand(n,n, 2.e-6);
+A = convert(SparseMatrixCSC{eltype(A),Ti}, A)
 
 x = rand(n,nvec);  x = x*10 .- 5;
 y = rand(n,nvec);  y = y*10 .- 5;
@@ -99,6 +101,7 @@ ii,jj,vv = findnz(A)
 ai = ones(length(ii));
 vv = vv + im*ai
 A = sparse(ii,jj, vv, n,n)
+A = convert(SparseMatrixCSC{eltype(A),Ti}, A)
 ii=0; jj=0; vv=0; ai=0;
 
 xi = rand(n,nvec);  xi = xi*10 .- 5;
@@ -150,7 +153,7 @@ println()
 println("Complex short")
 alpha = convert(ComplexF32, alpha)
 beta  = convert(ComplexF32,beta);
-A = convert(SparseMatrixCSC{ComplexF32,Int64},A);
+A = convert(SparseMatrixCSC{ComplexF32,Ti},A);
 x = convert(Array{ComplexF32},x);
 y = convert(Array{ComplexF32},y);
 
@@ -190,7 +193,7 @@ println()
 println("Complex short with a real matrix")
 alpha = convert(ComplexF32, alpha)
 beta  = convert(ComplexF32,beta);
-A = convert(SparseMatrixCSC{Float32,Int64},real(A));
+A = convert(SparseMatrixCSC{Float32,Ti},real(A));
 x = convert(Array{ComplexF32},x);
 y = convert(Array{ComplexF32},y);
 
@@ -231,7 +234,7 @@ println()
 println("Complex single with a complex matrix but double target and source")
 alpha = convert(ComplexF64, alpha)
 beta  = convert(ComplexF64,beta);
-A = convert(SparseMatrixCSC{ComplexF32,Int64},real(A) + 1im*A);
+A = convert(SparseMatrixCSC{ComplexF32,Ti},real(A) + 1im*A);
 x = convert(Array{ComplexF64},x);
 y = convert(Array{ComplexF64},y);
 
@@ -251,4 +254,5 @@ for k=0:numProcs
 end
 println()
 
-
+end # End Ti loop
+end # End testset
\ No newline at end of file