diff --git a/.classpath b/.classpath new file mode 100755 index 00000000..1a79d80b --- /dev/null +++ b/.classpath @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + + + + + + diff --git a/.project b/.project new file mode 100644 index 00000000..20732747 --- /dev/null +++ b/.project @@ -0,0 +1,18 @@ + + + BIDMat + + + + + + org.scala-ide.sdt.core.scalabuilder + + + + + + org.scala-ide.sdt.core.scalanature + org.eclipse.jdt.core.javanature + + diff --git a/BIDMat.jar b/BIDMat.jar new file mode 100644 index 00000000..84bac8ed Binary files /dev/null and b/BIDMat.jar differ diff --git a/Copyright.txt b/Copyright.txt new file mode 100755 index 00000000..21326596 --- /dev/null +++ b/Copyright.txt @@ -0,0 +1,25 @@ +Copyright (c) 2012, Regents of the University of California +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/INSTALLING.txt b/INSTALLING.txt new file mode 100755 index 00000000..8a01920d --- /dev/null +++ b/INSTALLING.txt @@ -0,0 +1,28 @@ +BIDMat is a set of Scala, Java and native code libraries. To run it, you need: + +* A java runtime, version 1.5 or later. + +* An Installation of the Scala language, which you can get from here: http://www.scala-lang.org/ + You need Scala 2.9.1 or later. We have tested extensively on 2.9.1 and 2.9.2. + +* A 64bit Intel machine (Linux, Windows, and *soon* Mac), if you plan to use any native + code acceleration. We have tested on Windows 7 and RedHat Enterprise Linux 6. + You should set Mat.noMKL=true in scala if you dont have one. A lot of + the code will still work, but that's really not the point of the library. + +* To use these libraries with a CUDA GPU, you need a CUDA-enabled GPU device. The code has + been developed for GTX 600-series, and this is the ideal platform. The code should work + with Tesla K10 devices, which share the same GPU chip as the 680/690 which is our + main platform. Most operations will work also with 500-series and we are working to add + the others. + +* You need the [http://developer.nvidia.com/cuda/cuda-downloads NVIDIA CUDA driver and the + CUDA Toolkit] from the NVIDIA website. This version is based on CUDA 4.2. CUDA 5.0 supported + should appear soon. + +In Linux, you should just be able to run the "bidmat" script from the top-level +directory. If that doesnt work, its probably because $JAVA_HOME is not set in your +environment. You can set it inside the script instead. + +On windows you will need to set the system PATH variable to include the lib subdirectory, +which is at /BIDMat/lib/win64, and which contains the native code dlls diff --git a/README.md b/README.md index 84cebc04..d166638e 100755 --- a/README.md +++ b/README.md @@ -1,4 +1,7 @@ +NOTE: Documentation for BIDMat is available here: +http://bid.berkeley.edu/BIDMat/index.php/Main_Page + BIDMat is a matrix library intended to support large-scale exploratory data analysis and to accelerate production deployment on single machines or clusters. While there are many excellent tools exist to @@ -12,17 +15,19 @@ of a high-end programming language including good general-purpose data sructures. And also of Scala's compiler-based REPL (Read-Eval-Print Loop). 2. To leverage native machine performance through native libraries -(Intel MKL, HDF5 and string/XML processing). Java/Scala are excellent -high-level languages, but are one or two orders of magnitude away from -native performance in some key areas: especially matrix algebra and -string processing, and below the bar to a lesser degree in File-IO. +(Intel MKL, HDF5, CUDA and string/XML processing). Java/Scala are +excellent high-level languages, but are one or two orders of magnitude +away from native performance in some key areas: especially matrix +algebra and string processing, and below the bar to a lesser degree in +File-IO. 3. To leverage GPU hardware and GPU-based data as a first-class object. GPUs now offer large improvements (again one or more orders of magnitude) over CPU performance in many areas that are relevant to data mining: matrix algebra, transcendental functions, random number -generation, network and graph algorithms and even natural language -parsing. Our own work suggests that the list is going to continue to -grow, and that GPU acceleration will fairly soon be a requirement for -competitive performance in most algorithms. +generation. These advantages in low-level operations carry over to +network and graph algorithms and even natural language parsing. Our +own work suggests that the list is going to continue to grow, and that +GPU acceleration will fairly soon be a requirement for competitive +performance in most algorithms. diff --git a/bidmat b/bidmat new file mode 100755 index 00000000..f3f48319 --- /dev/null +++ b/bidmat @@ -0,0 +1,22 @@ +#!/bin/bash +export BIDMAT_ROOT="$( cd -P "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +export BIDMAT_ROOT="$( echo ${BIDMAT_ROOT} | sed s+/cygdrive/c+c:+ )" +# This is only needed/works on Linux +export LD_LIBRARY_PATH="${BIDMAT_ROOT}/lib/linux64:${BIDMAT_ROOT}/lib/linux64/JCUDA5.0:/usr/local/cuda-5.0/lib64:${LD_LIBRARY_PATH}" +# export JAVA_HOME="" # Set here if not set in environment +export JAVA_OPTS="-Xmx12G -Xms128M" # Set as much memory as possible +# Fix these if needed +export JCUDA_VERSION="0.5.0RC" +export JCUDA_LIBDIR=${BIDMAT_ROOT}/lib +export LIBDIR=${BIDMAT_ROOT}/lib + +export BIDMAT_LIBS="${BIDMAT_ROOT}/BIDMat.jar;${LIBDIR}/ptplot.jar;${LIBDIR}/ptplotapplication.jar;${LIBDIR}/jhdf5.jar" +export JCUDA_LIBS="${JCUDA_LIBDIR}/jcuda-${JCUDA_VERSION}.jar;${JCUDA_LIBDIR}/jcublas-${JCUDA_VERSION}.jar;${JCUDA_LIBDIR}/jcufft-${JCUDA_VERSION}.jar;${JCUDA_LIBDIR}/jcurand-${JCUDA_VERSION}.jar;${JCUDA_LIBDIR}/jcusparse-${JCUDA_VERSION}.jar" + +export ALL_LIBS="${BIDMAT_LIBS};${JCUDA_LIBS};${JAVA_HOME}/lib/tools.jar" + +if [ ! "$OS" = "Windows_NT" ]; then + export ALL_LIBS=`echo "${ALL_LIBS}" | sed 's/;/:/g'` +fi + +scala -nobootcp -cp "${ALL_LIBS}" -Yrepl-sync -i ${LIBDIR}/bidmat_init.scala \ No newline at end of file diff --git a/bidmat.cmd b/bidmat.cmd new file mode 100755 index 00000000..8db514e2 --- /dev/null +++ b/bidmat.cmd @@ -0,0 +1,20 @@ +@ECHO OFF +:: Set JAVA_HOME here if not set in environment +:: SET JAVA_HOME= +:: Set as much memory as possible +(SET JAVA_OPTS=-Xmx12G -Xms128M) +:: Fix these if needed +SET JCUDA_VERSION=0.5.0RC +SET JCUDA_LIBDIR=%CD%\lib +SET LIBDIR=%CD%\lib +SET PATH=%LIBDIR%\win64;%LIBDIR%\win64\JCUDA5.0;%PATH% + + +SET BIDMAT_LIBS=%CD%\BIDMat.jar;%LIBDIR%\ptplot.jar;%LIBDIR%\ptplotapplication.jar;%LIBDIR%\jhdf5.jar + +SET JCUDA_LIBS=%JCUDA_LIBDIR%\jcuda-%JCUDA_VERSION%.jar;%JCUDA_LIBDIR%\jcublas-%JCUDA_VERSION%.jar;%JCUDA_LIBDIR%\jcufft-%JCUDA_VERSION%.jar;%JCUDA_LIBDIR%\jcurand-%JCUDA_VERSION%.jar;%JCUDA_LIBDIR%\jcusparse-%JCUDA_VERSION%.jar + +SET ALL_LIBS=%BIDMAT_LIBS%;%JCUDA_LIBS%;%JAVA_HOME%\lib\tools.jar +echo %ALL_LIBS% + +scala -nobootcp -cp "%ALL_LIBS%" -Yrepl-sync -i %LIBDIR%\bidmat_init.scala \ No newline at end of file diff --git a/bidmat4.2 b/bidmat4.2 new file mode 100755 index 00000000..4a417778 --- /dev/null +++ b/bidmat4.2 @@ -0,0 +1,21 @@ +#!/bin/bash +export BIDMAT_ROOT=`pwd` +# This is only needed/works on Linux +export LD_LIBRARY_PATH="${BIDMAT_ROOT}/lib/linux64:${BIDMAT_ROOT}/lib/linux64/JCUDA4.2:/usr/local/cuda-4.2/lib64:${LD_LIBRARY_PATH}" +# export JAVA_HOME="" # Set here if not set in environment +export JAVA_OPTS="-Xmx12G -Xms128M" # Set as much memory as possible +# Fix these if needed +export JCUDA_VERSION="0.4.2" +export JCUDA_LIBDIR=${BIDMAT_ROOT}/lib +export LIBDIR=${BIDMAT_ROOT}/lib + +export BIDMAT_LIBS="${BIDMAT_ROOT}/BIDMat.jar;${LIBDIR}/ptplot.jar;${LIBDIR}/ptplotapplication.jar;${LIBDIR}/jhdf5.jar" +export JCUDA_LIBS="${JCUDA_LIBDIR}/jcuda-${JCUDA_VERSION}.jar;${JCUDA_LIBDIR}/jcublas-${JCUDA_VERSION}.jar;${JCUDA_LIBDIR}/jcufft-${JCUDA_VERSION}.jar;${JCUDA_LIBDIR}/jcurand-${JCUDA_VERSION}.jar;${JCUDA_LIBDIR}/jcusparse-${JCUDA_VERSION}.jar" + +export ALL_LIBS="${BIDMAT_LIBS};${JCUDA_LIBS};${JAVA_HOME}/lib/tools.jar" + +if [ ! "$OS" = "Windows_NT" ]; then + export ALL_LIBS=`echo "${ALL_LIBS}" | sed 's/;/:/g'` +fi + +scala -nobootcp -cp "${ALL_LIBS}" -Yrepl-sync -i ${LIBDIR}/bidmat_init.scala \ No newline at end of file diff --git a/build.sbt b/build.sbt new file mode 100755 index 00000000..e88a5e45 --- /dev/null +++ b/build.sbt @@ -0,0 +1,48 @@ + +name := "BIDMat" + +version := "0.1.0" + +organization := "edu.berkeley.bid" + +scalaVersion := "2.9.1" + +resolvers ++= Seq( + "Scala Tools Snapshots" at "http://scala-tools.org/repo-snapshots/" +) + +libraryDependencies <<= (scalaVersion, libraryDependencies) { (sv, deps) => + deps :+ ("org.scala-lang" % "scala-compiler" % sv) +} + +libraryDependencies += "org.scala-lang" % "jline" % "2.9.1" + +libraryDependencies += "org.scalatest" %% "scalatest" % "1.8" % "test" + +libraryDependencies += "org.scala-tools.testing" %% "scalacheck" % "1.9" % "test" + +libraryDependencies += "junit" % "junit" % "4.5" % "test" + +credentials += Credentials(Path.userHome / ".ivy2" / ".credentials") + +javacOptions ++= Seq("-source", "1.5", "-target", "1.5") + +scalacOptions ++= Seq("-deprecation","-target:jvm-1.5") + +initialCommands := scala.io.Source.fromFile("lib/bidmat_init.scala").getLines.mkString("\n") + +javaOptions += "-Xmx12g" + +seq(ProguardPlugin.proguardSettings :_*) + +proguardOptions ++= Seq ( + "-keep class scala.** { *; }", + "-keep class org.jfree.** { *; }", + keepMain("scala.tools.nsc.MainGenericRunner"), + keepLimitedSerializability, + keepAllScala, + "-keep class ch.epfl.** { *; }", + "-keep interface scala.ScalaObject" +) + + diff --git a/jni/src/BIDMat_CBLAS.c b/jni/src/BIDMat_CBLAS.c new file mode 100755 index 00000000..29c5ce5e --- /dev/null +++ b/jni/src/BIDMat_CBLAS.c @@ -0,0 +1,358 @@ +#include +#include +#include + + +JNIEXPORT jdouble JNICALL Java_edu_berkeley_bid_CBLAS_ddot +(JNIEnv * env, jobject calling_obj, jint N, jdoubleArray jX, jint incX, jdoubleArray jY, jint incY){ + jdouble * X = (*env)->GetPrimitiveArrayCritical(env, jX, JNI_FALSE); + jdouble * Y = (*env)->GetPrimitiveArrayCritical(env, jY, JNI_FALSE); + jdouble returnValue; + + returnValue = cblas_ddot(N, X, incX, Y, incY); + + (*env)->ReleasePrimitiveArrayCritical(env, jY, Y, 0); + (*env)->ReleasePrimitiveArrayCritical(env, jX, X, 0); + return returnValue; +} + + +JNIEXPORT jdouble JNICALL Java_edu_berkeley_bid_CBLAS_ddotxx +(JNIEnv * env, jobject calling_obj, jint N, jdoubleArray jX, jint startX, jdoubleArray jY, jint startY){ + jdouble * X = (*env)->GetPrimitiveArrayCritical(env, jX, JNI_FALSE); + jdouble * Y = (*env)->GetPrimitiveArrayCritical(env, jY, JNI_FALSE); + jdouble returnValue; + + returnValue = cblas_ddot(N, X+startX, 1, Y+startY, 1); + + (*env)->ReleasePrimitiveArrayCritical(env, jY, Y, 0); + (*env)->ReleasePrimitiveArrayCritical(env, jX, X, 0); + return returnValue; +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_CBLAS_daxpy +(JNIEnv * env, jobject calling_obj, jint N, jdouble a, jdoubleArray jX, jint incX, jdoubleArray jY, jint incY){ + jdouble * X = (*env)->GetPrimitiveArrayCritical(env, jX, JNI_FALSE); + jdouble * Y = (*env)->GetPrimitiveArrayCritical(env, jY, JNI_FALSE); + + cblas_daxpy(N, a, X, incX, Y, incY); + + (*env)->ReleasePrimitiveArrayCritical(env, jY, Y, 0); + (*env)->ReleasePrimitiveArrayCritical(env, jX, X, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_CBLAS_daxpyxx +(JNIEnv * env, jobject calling_obj, jint N, jdouble a, jdoubleArray jX, jint startX, jdoubleArray jY, jint startY){ + jdouble * X = (*env)->GetPrimitiveArrayCritical(env, jX, JNI_FALSE); + jdouble * Y = (*env)->GetPrimitiveArrayCritical(env, jY, JNI_FALSE); + + cblas_daxpy(N, a, X+startX, 1, Y+startY, 1); + + (*env)->ReleasePrimitiveArrayCritical(env, jY, Y, 0); + (*env)->ReleasePrimitiveArrayCritical(env, jX, X, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_CBLAS_dgemv +(JNIEnv * env, jobject calling_obj, jint order, jint transA, jint M, jint N, jdouble alpha, + jdoubleArray jA, jint lda, jdoubleArray jX, jint incX, jdouble beta, jdoubleArray jY, jint incY){ + jdouble * A = (*env)->GetPrimitiveArrayCritical(env, jA, JNI_FALSE); + jdouble * X = (*env)->GetPrimitiveArrayCritical(env, jX, JNI_FALSE); + jdouble * Y = (*env)->GetPrimitiveArrayCritical(env, jY, JNI_FALSE); + + cblas_dgemv((CBLAS_ORDER)order, (CBLAS_TRANSPOSE)transA, M, N, alpha, A, lda, X, incX, beta, Y, incY); + + (*env)->ReleasePrimitiveArrayCritical(env, jY, Y, 0); + (*env)->ReleasePrimitiveArrayCritical(env, jX, X, 0); + (*env)->ReleasePrimitiveArrayCritical(env, jA, A, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_CBLAS_dgemm +(JNIEnv * env, jobject calling_obj, jint order, jint transA, jint transB, jint M, jint N, jint K, + jdouble alpha, jdoubleArray jA, jint lda, jdoubleArray jB, jint ldb, jdouble beta, jdoubleArray jC, jint ldc){ + jdouble * A = (*env)->GetPrimitiveArrayCritical(env, jA, JNI_FALSE); + jdouble * B = (*env)->GetPrimitiveArrayCritical(env, jB, JNI_FALSE); + jdouble * C = (*env)->GetPrimitiveArrayCritical(env, jC, JNI_FALSE); + + cblas_dgemm((CBLAS_ORDER)order, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, M, N, K, + alpha, A, lda, B, ldb, beta, C, ldc); + + (*env)->ReleasePrimitiveArrayCritical(env, jC, C, 0); + (*env)->ReleasePrimitiveArrayCritical(env, jB, B, 0); + (*env)->ReleasePrimitiveArrayCritical(env, jA, A, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_CBLAS_domatcopy +(JNIEnv * env, jobject calling_obj, jstring j_order, jstring j_transA, jint M, jint N, + jdouble alpha, jdoubleArray j_A, jint lda, jdoubleArray j_B, jint ldb) { + char * order = (char *)(*env)->GetStringUTFChars(env, j_order, 0); + char * transA = (char *)(*env)->GetStringUTFChars(env, j_transA, 0); + jdouble * A = (*env)->GetPrimitiveArrayCritical(env, j_A, JNI_FALSE); + jdouble * B = (*env)->GetPrimitiveArrayCritical(env, j_B, JNI_FALSE); + + mkl_domatcopy(order[0], transA[0], M, N, alpha, A, lda, B, ldb); + + (*env)->ReleasePrimitiveArrayCritical(env, j_B, B, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_A, A, 0); + (*env)->ReleaseStringUTFChars(env, j_transA, transA); + (*env)->ReleaseStringUTFChars(env, j_order, order); +} + + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_CBLAS_dmcscm +(JNIEnv * env, jobject calling_obj, jint M, jint N, jdoubleArray j_A, jint lda, + jdoubleArray j_B, jintArray j_ir, jintArray j_jc, jdoubleArray j_C, jint ldc){ + jdouble * A = (*env)->GetPrimitiveArrayCritical(env, j_A, JNI_FALSE); + jdouble * B = (*env)->GetPrimitiveArrayCritical(env, j_B, JNI_FALSE); + jint * ir = (*env)->GetPrimitiveArrayCritical(env, j_ir, JNI_FALSE); + jint * jc = (*env)->GetPrimitiveArrayCritical(env, j_jc, JNI_FALSE); + jdouble * C = (*env)->GetPrimitiveArrayCritical(env, j_C, JNI_FALSE); + + int ioff = jc[0]; + int i, j, ir0; + for (i = 0; i < N; i++) { + for (j = jc[i]-ioff; j < jc[i+1]-ioff; j++) { + ir0 = ir[j]-ioff; + cblas_daxpy(M, B[j], A+(ir0*lda), 1, C+(i*ldc), 1); + } + } + + (*env)->ReleasePrimitiveArrayCritical(env, j_C, C, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_jc, jc, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_ir, ir, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_B, B, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_A, A, 0); +} + + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_CBLAS_dmcsrm +(JNIEnv * env, jobject calling_obj, jint M, jint N, jdoubleArray j_A, jint lda, + jdoubleArray j_B, jintArray j_ir, jintArray j_jc, jdoubleArray j_C, jint ldc){ + jdouble * A = (*env)->GetPrimitiveArrayCritical(env, j_A, JNI_FALSE); + jdouble * B = (*env)->GetPrimitiveArrayCritical(env, j_B, JNI_FALSE); + jint * ir = (*env)->GetPrimitiveArrayCritical(env, j_ir, JNI_FALSE); + jint * jc = (*env)->GetPrimitiveArrayCritical(env, j_jc, JNI_FALSE); + jdouble * C = (*env)->GetPrimitiveArrayCritical(env, j_C, JNI_FALSE); + + int ioff = jc[0]; + int i, j, k; + for (i = 0; i < N; i++) { + for (j = jc[i]-ioff; j < jc[i+1]-ioff; j++) { + k = ir[j]-ioff; + cblas_daxpy(M, B[j], A+(i*lda), 1, C+(k*ldc), 1); + } + } + + (*env)->ReleasePrimitiveArrayCritical(env, j_C, C, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_jc, jc, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_ir, ir, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_B, B, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_A, A, 0); +} + +JNIEXPORT jfloat JNICALL Java_edu_berkeley_bid_CBLAS_sdot +(JNIEnv * env, jobject calling_obj, jint N, jfloatArray jX, jint incX, jfloatArray jY, jint incY){ + jfloat * X = (*env)->GetPrimitiveArrayCritical(env, jX, JNI_FALSE); + jfloat * Y = (*env)->GetPrimitiveArrayCritical(env, jY, JNI_FALSE); + jfloat returnValue; + + returnValue = cblas_sdot(N, X, incX, Y, incY); + + (*env)->ReleasePrimitiveArrayCritical(env, jY, Y, 0); + (*env)->ReleasePrimitiveArrayCritical(env, jX, X, 0); + return returnValue; +} + + +JNIEXPORT jfloat JNICALL Java_edu_berkeley_bid_CBLAS_sdotxx +(JNIEnv * env, jobject calling_obj, jint N, jfloatArray jX, jint startX, jfloatArray jY, jint startY){ + jfloat * X = (*env)->GetPrimitiveArrayCritical(env, jX, JNI_FALSE); + jfloat * Y = (*env)->GetPrimitiveArrayCritical(env, jY, JNI_FALSE); + jfloat returnValue; + + returnValue = cblas_sdot(N, X+startX, 1, Y+startY, 1); + + (*env)->ReleasePrimitiveArrayCritical(env, jY, Y, 0); + (*env)->ReleasePrimitiveArrayCritical(env, jX, X, 0); + return returnValue; +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_CBLAS_sgemv +(JNIEnv * env, jobject calling_obj, jint order, jint transA, jint M, jint N, jfloat alpha, +jfloatArray jA, jint lda, jfloatArray jX, jint incX, jfloat beta, jfloatArray jY, jint incY){ + jfloat * A = (*env)->GetPrimitiveArrayCritical(env, jA, JNI_FALSE); + jfloat * X = (*env)->GetPrimitiveArrayCritical(env, jX, JNI_FALSE); + jfloat * Y = (*env)->GetPrimitiveArrayCritical(env, jY, JNI_FALSE); + + cblas_sgemv((CBLAS_ORDER)order, (CBLAS_TRANSPOSE)transA, M, N, alpha, A, lda, X, incX, beta, Y, incY); + + (*env)->ReleasePrimitiveArrayCritical(env, jY, Y, 0); + (*env)->ReleasePrimitiveArrayCritical(env, jX, X, 0); + (*env)->ReleasePrimitiveArrayCritical(env, jA, A, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_CBLAS_sgemm +(JNIEnv * env, jobject calling_obj, jint order, jint transA, jint transB, jint M, jint N, jint K, +jfloat alpha, jfloatArray jA, jint lda, jfloatArray jB, jint ldb, jfloat beta, jfloatArray jC, jint ldc){ + jfloat * A = (*env)->GetPrimitiveArrayCritical(env, jA, JNI_FALSE); + jfloat * B = (*env)->GetPrimitiveArrayCritical(env, jB, JNI_FALSE); + jfloat * C = (*env)->GetPrimitiveArrayCritical(env, jC, JNI_FALSE); + + cblas_sgemm((CBLAS_ORDER)order, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, M, N, K, + alpha, A, lda, B, ldb, beta, C, ldc); + + (*env)->ReleasePrimitiveArrayCritical(env, jC, C, 0); + (*env)->ReleasePrimitiveArrayCritical(env, jB, B, 0); + (*env)->ReleasePrimitiveArrayCritical(env, jA, A, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_CBLAS_somatcopy +(JNIEnv * env, jobject calling_obj, jstring j_order, jstring j_transA, jint M, jint N, + jfloat alpha, jfloatArray j_A, jint lda, jfloatArray j_B, jint ldb) { + char * order = (char *)(*env)->GetStringUTFChars(env, j_order, 0); + char * transA = (char *)(*env)->GetStringUTFChars(env, j_transA, 0); + jfloat * A = (*env)->GetPrimitiveArrayCritical(env, j_A, JNI_FALSE); + jfloat * B = (*env)->GetPrimitiveArrayCritical(env, j_B, JNI_FALSE); + + mkl_somatcopy(order[0], transA[0], M, N, alpha, A, lda, B, ldb); + + (*env)->ReleasePrimitiveArrayCritical(env, j_B, B, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_A, A, 0); + (*env)->ReleaseStringUTFChars(env, j_transA, transA); + (*env)->ReleaseStringUTFChars(env, j_order, order); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_CBLAS_saxpy +(JNIEnv * env, jobject calling_obj, jint N, jfloat a, jfloatArray jX, jint incX, jfloatArray jY, jint incY){ + jfloat * X = (*env)->GetPrimitiveArrayCritical(env, jX, JNI_FALSE); + jfloat * Y = (*env)->GetPrimitiveArrayCritical(env, jY, JNI_FALSE); + + cblas_saxpy(N, a, X, incX, Y, incY); + + (*env)->ReleasePrimitiveArrayCritical(env, jY, Y, 0); + (*env)->ReleasePrimitiveArrayCritical(env, jX, X, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_CBLAS_saxpyxx +(JNIEnv * env, jobject calling_obj, jint N, jfloat a, jfloatArray jX, jint startX, jfloatArray jY, jint startY){ + jfloat * X = (*env)->GetPrimitiveArrayCritical(env, jX, JNI_FALSE); + jfloat * Y = (*env)->GetPrimitiveArrayCritical(env, jY, JNI_FALSE); + + cblas_saxpy(N, a, X+startX, 1, Y+startY, 1); + + (*env)->ReleasePrimitiveArrayCritical(env, jY, Y, 0); + (*env)->ReleasePrimitiveArrayCritical(env, jX, X, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_CBLAS_smcscm +(JNIEnv * env, jobject calling_obj, jint M, jint N, jfloatArray j_A, jint lda, + jfloatArray j_B, jintArray j_ir, jintArray j_jc, jfloatArray j_C, jint ldc){ + jfloat * A = (*env)->GetPrimitiveArrayCritical(env, j_A, JNI_FALSE); + jfloat * B = (*env)->GetPrimitiveArrayCritical(env, j_B, JNI_FALSE); + jint * ir = (*env)->GetPrimitiveArrayCritical(env, j_ir, JNI_FALSE); + jint * jc = (*env)->GetPrimitiveArrayCritical(env, j_jc, JNI_FALSE); + jfloat * C = (*env)->GetPrimitiveArrayCritical(env, j_C, JNI_FALSE); + + int ioff = jc[0]; + int i, j, ir0; + for (i = 0; i < N; i++) { + for (j = jc[i]-ioff; j < jc[i+1]-ioff; j++) { + ir0 = ir[j]-ioff; + cblas_saxpy(M, B[j], A+(ir0*lda), 1, C+(i*ldc), 1); + } + } + + (*env)->ReleasePrimitiveArrayCritical(env, j_C, C, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_jc, jc, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_ir, ir, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_B, B, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_A, A, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_CBLAS_smcsrm +(JNIEnv * env, jobject calling_obj, jint M, jint N, jfloatArray j_A, jint lda, + jfloatArray j_B, jintArray j_ir, jintArray j_jc, jfloatArray j_C, jint ldc){ + jfloat * A = (*env)->GetPrimitiveArrayCritical(env, j_A, JNI_FALSE); + jfloat * B = (*env)->GetPrimitiveArrayCritical(env, j_B, JNI_FALSE); + jint * ir = (*env)->GetPrimitiveArrayCritical(env, j_ir, JNI_FALSE); + jint * jc = (*env)->GetPrimitiveArrayCritical(env, j_jc, JNI_FALSE); + jfloat * C = (*env)->GetPrimitiveArrayCritical(env, j_C, JNI_FALSE); + + int ioff = jc[0]; + int i, j, k; + for (i = 0; i < N; i++) { + for (j = jc[i]-ioff; j < jc[i+1]-ioff; j++) { + k = ir[j]-ioff; + cblas_saxpy(M, B[j], A+(i*lda), 1, C+(k*ldc), 1); + } + } + + (*env)->ReleasePrimitiveArrayCritical(env, j_C, C, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_jc, jc, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_ir, ir, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_B, B, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_A, A, 0); +} + + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_CBLAS_cgemv +(JNIEnv * env, jobject calling_obj, jint order, jint transA, jint M, jint N, jfloatArray jAlpha, +jfloatArray jA, jint lda, jfloatArray jX, jint incX, jfloatArray jBeta, jfloatArray jY, jint incY){ + jfloat * A = (*env)->GetPrimitiveArrayCritical(env, jA, JNI_FALSE); + jfloat * X = (*env)->GetPrimitiveArrayCritical(env, jX, JNI_FALSE); + jfloat * Y = (*env)->GetPrimitiveArrayCritical(env, jY, JNI_FALSE); + jfloat * alpha = (*env)->GetPrimitiveArrayCritical(env, jAlpha, JNI_FALSE); + jfloat * beta = (*env)->GetPrimitiveArrayCritical(env, jBeta, JNI_FALSE); + + cblas_cgemv((CBLAS_ORDER)order, (CBLAS_TRANSPOSE)transA, M, N, alpha, A, lda, X, incX, beta, Y, incY); + + (*env)->ReleasePrimitiveArrayCritical(env, jBeta, beta, 0); + (*env)->ReleasePrimitiveArrayCritical(env, jAlpha, alpha, 0); + (*env)->ReleasePrimitiveArrayCritical(env, jY, Y, 0); + (*env)->ReleasePrimitiveArrayCritical(env, jX, X, 0); + (*env)->ReleasePrimitiveArrayCritical(env, jA, A, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_CBLAS_cgemm +(JNIEnv * env, jobject calling_obj, jint order, jint transA, jint transB, jint M, jint N, jint K, +jfloatArray jAlpha, jfloatArray jA, jint lda, jfloatArray jB, jint ldb, jfloatArray jBeta, jfloatArray jC, jint ldc){ + jfloat * A = (*env)->GetPrimitiveArrayCritical(env, jA, JNI_FALSE); + jfloat * B = (*env)->GetPrimitiveArrayCritical(env, jB, JNI_FALSE); + jfloat * C = (*env)->GetPrimitiveArrayCritical(env, jC, JNI_FALSE); + jfloat * alpha = (*env)->GetPrimitiveArrayCritical(env, jAlpha, JNI_FALSE); + jfloat * beta = (*env)->GetPrimitiveArrayCritical(env, jBeta, JNI_FALSE); + + cblas_cgemm((CBLAS_ORDER)order, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, M, N, K, + alpha, A, lda, B, ldb, beta, C, ldc); + + (*env)->ReleasePrimitiveArrayCritical(env, jC, C, 0); + (*env)->ReleasePrimitiveArrayCritical(env, jB, B, 0); + (*env)->ReleasePrimitiveArrayCritical(env, jA, A, 0); +} + + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_CBLAS_caxpy +(JNIEnv * env, jobject calling_obj, jint N, jfloatArray jA, jfloatArray jX, jint incX, + jfloatArray jY, jint incY){ + jfloat * X = (*env)->GetPrimitiveArrayCritical(env, jX, JNI_FALSE); + jfloat * Y = (*env)->GetPrimitiveArrayCritical(env, jY, JNI_FALSE); + jfloat * a = (*env)->GetPrimitiveArrayCritical(env, jA, JNI_FALSE); + + cblas_caxpy(N, a, X, incX, Y, incY); + + (*env)->ReleasePrimitiveArrayCritical(env, jA, a, 0); + (*env)->ReleasePrimitiveArrayCritical(env, jY, Y, 0); + (*env)->ReleasePrimitiveArrayCritical(env, jX, X, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_CBLAS_caxpyxx +(JNIEnv * env, jobject calling_obj, jint N, jfloatArray jA, jfloatArray jX, jint startX, jfloatArray jY, jint startY){ + jfloat * X = (*env)->GetPrimitiveArrayCritical(env, jX, JNI_FALSE); + jfloat * Y = (*env)->GetPrimitiveArrayCritical(env, jY, JNI_FALSE); + jfloat * a = (*env)->GetPrimitiveArrayCritical(env, jA, JNI_FALSE); + + cblas_caxpy(N, a, X+startX, 1, Y+startY, 1); + + (*env)->ReleasePrimitiveArrayCritical(env, jA, a, 0); + (*env)->ReleasePrimitiveArrayCritical(env, jY, Y, 0); + (*env)->ReleasePrimitiveArrayCritical(env, jX, X, 0); +} diff --git a/jni/src/BIDMat_CUMAT.cpp b/jni/src/BIDMat_CUMAT.cpp new file mode 100755 index 00000000..03054a1d --- /dev/null +++ b/jni/src/BIDMat_CUMAT.cpp @@ -0,0 +1,138 @@ + +#include +#include +#include "Logger.hpp" +#include "JNIUtils.hpp" +#include "PointerUtils.hpp" +#include "MatKernel.hpp" + +extern "C" { + + JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM *jvm, void *reserved) + { + JNIEnv *env = NULL; + if (jvm->GetEnv((void **)&env, JNI_VERSION_1_4)) + { + return JNI_ERR; + } + + Logger::log(LOG_TRACE, "Initializing JCublas\n"); + + jclass cls = NULL; + + // Initialize the JNIUtils and PointerUtils + if (initJNIUtils(env) == JNI_ERR) return JNI_ERR; + if (initPointerUtils(env) == JNI_ERR) return JNI_ERR; + + return JNI_VERSION_1_4; + + } + + JNIEXPORT jint JNICALL Java_edu_berkeley_bid_CUMAT_applyop + (JNIEnv *env, jobject obj, jobject jA, jint Anrows, jint Ancols, + jobject jB, jint Bnrows, jint Bncols, jobject jC, jint opn) + { + float *nativeA = (float*)getPointer(env, jA); + float *nativeB = (float*)getPointer(env, jB); + float *nativeC = (float*)getPointer(env, jC); + + return apply_binop(nativeA, Anrows, Ancols, nativeB, Bnrows, Bncols, nativeC, opn); + } + + JNIEXPORT jint JNICALL Java_edu_berkeley_bid_CUMAT_applyiop + (JNIEnv *env, jobject obj, jobject jA, jint Anrows, jint Ancols, + jobject jB, jint Bnrows, jint Bncols, jobject jC, jint opn) + { + int *nativeA = (int*)getPointer(env, jA); + int *nativeB = (int*)getPointer(env, jB); + int *nativeC = (int*)getPointer(env, jC); + + return apply_biniop(nativeA, Anrows, Ancols, nativeB, Bnrows, Bncols, nativeC, opn); + } + + JNIEXPORT jint JNICALL Java_edu_berkeley_bid_CUMAT_applygfun + (JNIEnv *env, jobject obj, jobject jA, jobject jB, jint N, jint opn) + { + float *nativeA = (float*)getPointer(env, jA); + float *nativeB = (float*)getPointer(env, jB); + + return apply_gfun(nativeA, nativeB, N, opn); + } + + JNIEXPORT jint JNICALL Java_edu_berkeley_bid_CUMAT_applygfun2 + (JNIEnv *env, jobject obj, jobject jA, jobject jB, jobject jC, jint N, jint opn) + { + float *nativeA = (float*)getPointer(env, jA); + float *nativeB = (float*)getPointer(env, jB); + float *nativeC = (float*)getPointer(env, jC); + + return apply_gfun2(nativeA, nativeB, nativeC, N, opn); + } + + JNIEXPORT jint JNICALL Java_edu_berkeley_bid_CUMAT_dsmult + (JNIEnv *env, jobject obj, jint nrows, jint ncols, jint nnz, + jobject jA, jobject jBdata, jobject jBir, jobject jBic, jobject jC) + { + float *A = (float*)getPointer(env, jA); + float *Bdata = (float*)getPointer(env, jBdata); + float *C = (float*)getPointer(env, jC); + int *Bir = (int*)getPointer(env, jBir); + int *Bic = (int*)getPointer(env, jBic); + + return dsmult(nrows, ncols, nnz, A, Bdata, Bir, Bic, C); + } + + JNIEXPORT jint JNICALL Java_edu_berkeley_bid_CUMAT_dsmultT + (JNIEnv *env, jobject obj, jint nrows, jint ncols, jint nnz, + jobject jA, jobject jBdata, jobject jBir, jobject jBic, jobject jC) + { + float *A = (float*)getPointer(env, jA); + float *Bdata = (float*)getPointer(env, jBdata); + float *C = (float*)getPointer(env, jC); + int *Bir = (int*)getPointer(env, jBir); + int *Bic = (int*)getPointer(env, jBic); + + return dsmultT(nrows, ncols, nnz, A, Bdata, Bir, Bic, C); + } + + JNIEXPORT jint JNICALL Java_edu_berkeley_bid_CUMAT_dds + (JNIEnv *env, jobject obj, jint nrows, jint nnz, + jobject jA, jobject jB, jobject jCir, jobject jCic, jobject jP) + { + float *A = (float*)getPointer(env, jA); + float *B = (float*)getPointer(env, jB); + float *P = (float*)getPointer(env, jP); + int *Cir = (int*)getPointer(env, jCir); + int *Cic = (int*)getPointer(env, jCic); + + return dds(nrows, nnz, A, B, Cir, Cic, P); + } + + JNIEXPORT jint JNICALL Java_edu_berkeley_bid_CUMAT_reduce1op + (JNIEnv *env, jobject obj, jint nrows, jint ncols, jobject jA, jobject jB, jint opn) + { + float *A = (float*)getPointer(env, jA); + float *B = (float*)getPointer(env, jB); + + return reduce1op(nrows, ncols, A, B, opn); + } + + JNIEXPORT jint JNICALL Java_edu_berkeley_bid_CUMAT_reduce2op + (JNIEnv *env, jobject obj, jint nrows, jint ncols, jobject jA, jobject jB, jint opn) + { + float *A = (float*)getPointer(env, jA); + float *B = (float*)getPointer(env, jB); + + return reduce2op(nrows, ncols, A, B, opn); + } + + JNIEXPORT jint JNICALL Java_edu_berkeley_bid_CUMAT_transpose + (JNIEnv *env, jobject obj, jobject jA, jint instride, jobject jB, jint outstride, jint nrows, jint ncols) + { + float *A = (float*)getPointer(env, jA); + float *B = (float*)getPointer(env, jB); + + return transpose(A, instride, B, outstride, nrows, ncols); + } + +} diff --git a/jni/src/BIDMat_LAPACK.c b/jni/src/BIDMat_LAPACK.c new file mode 100755 index 00000000..54f753af --- /dev/null +++ b/jni/src/BIDMat_LAPACK.c @@ -0,0 +1,1074 @@ + +#include +#include +#include + + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_dgetrf +(JNIEnv * env, jobject calling_obj, jint order, jint m, jint n, jdoubleArray ja, jint lda, jintArray jipiv){ + jdouble * a = (*env)->GetPrimitiveArrayCritical(env, ja, JNI_FALSE); + jint * ipiv = (*env)->GetPrimitiveArrayCritical(env, jipiv, JNI_FALSE); + jint returnValue; + + returnValue = LAPACKE_dgetrf(order, m, n, a, lda, ipiv); + + (*env)->ReleasePrimitiveArrayCritical(env, jipiv, ipiv, 0); + (*env)->ReleasePrimitiveArrayCritical(env, ja, a, 0); + return returnValue; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_sgetrf +(JNIEnv * env, jobject calling_obj, jint order, jint m, jint n, jfloatArray ja, jint lda, jintArray jipiv){ + jfloat * a = (*env)->GetPrimitiveArrayCritical(env, ja, JNI_FALSE); + jint * ipiv = (*env)->GetPrimitiveArrayCritical(env, jipiv, JNI_FALSE); + jint returnValue; + + returnValue = LAPACKE_sgetrf(order, m, n, a, lda, ipiv); + + (*env)->ReleasePrimitiveArrayCritical(env, jipiv, ipiv, 0); + (*env)->ReleasePrimitiveArrayCritical(env, ja, a, 0); + return returnValue; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_cgetrf +(JNIEnv * env, jobject calling_obj, jint order, jint m, jint n, jfloatArray ja, jint lda, jintArray jipiv){ + jfloat * a = (*env)->GetPrimitiveArrayCritical(env, ja, JNI_FALSE); + jint * ipiv = (*env)->GetPrimitiveArrayCritical(env, jipiv, JNI_FALSE); + jint returnValue; + + returnValue = LAPACKE_cgetrf(order, m, n, (MKL_Complex8 *)a, lda, ipiv); + + (*env)->ReleasePrimitiveArrayCritical(env, jipiv, ipiv, 0); + (*env)->ReleasePrimitiveArrayCritical(env, ja, a, 0); + return returnValue; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_zgetrf +(JNIEnv * env, jobject calling_obj, jint order, jint m, jint n, jdoubleArray ja, jint lda, jintArray jipiv){ + jdouble * a = (*env)->GetPrimitiveArrayCritical(env, ja, JNI_FALSE); + jint * ipiv = (*env)->GetPrimitiveArrayCritical(env, jipiv, JNI_FALSE); + jint returnValue; + + returnValue = LAPACKE_zgetrf(order, m, n, (MKL_Complex16 *)a, lda, ipiv); + + (*env)->ReleasePrimitiveArrayCritical(env, jipiv, ipiv, 0); + (*env)->ReleasePrimitiveArrayCritical(env, ja, a, 0); + return returnValue; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_dgetri +(JNIEnv * env, jobject calling_obj, jint order, jint n, jdoubleArray ja, jint lda, jintArray jipiv){ + jdouble * a = (*env)->GetPrimitiveArrayCritical(env, ja, JNI_FALSE); + jint * ipiv = (*env)->GetPrimitiveArrayCritical(env, jipiv, JNI_FALSE); + jint returnValue; + + returnValue = LAPACKE_dgetri(order, n, a, lda, ipiv); + + (*env)->ReleasePrimitiveArrayCritical(env, jipiv, ipiv, 0); + (*env)->ReleasePrimitiveArrayCritical(env, ja, a, 0); + return returnValue; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_sgetri +(JNIEnv * env, jobject calling_obj, jint order, jint n, jfloatArray ja, jint lda, jintArray jipiv){ + jfloat * a = (*env)->GetPrimitiveArrayCritical(env, ja, JNI_FALSE); + jint * ipiv = (*env)->GetPrimitiveArrayCritical(env, jipiv, JNI_FALSE); + jint returnValue; + + returnValue = LAPACKE_sgetri(order, n, a, lda, ipiv); + + (*env)->ReleasePrimitiveArrayCritical(env, jipiv, ipiv, 0); + (*env)->ReleasePrimitiveArrayCritical(env, ja, a, 0); + return returnValue; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_cgetri +(JNIEnv * env, jobject calling_obj, jint order, jint n, jfloatArray ja, jint lda, jintArray jipiv){ + jfloat * a = (*env)->GetPrimitiveArrayCritical(env, ja, JNI_FALSE); + jint * ipiv = (*env)->GetPrimitiveArrayCritical(env, jipiv, JNI_FALSE); + jint returnValue; + + returnValue = LAPACKE_cgetri(order, n, (MKL_Complex8 *)a, lda, ipiv); + + (*env)->ReleasePrimitiveArrayCritical(env, jipiv, ipiv, 0); + (*env)->ReleasePrimitiveArrayCritical(env, ja, a, 0); + return returnValue; +} + + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_zgetri +(JNIEnv * env, jobject calling_obj, jint order, jint n, jdoubleArray ja, jint lda, jintArray jipiv){ + jdouble * a = (*env)->GetPrimitiveArrayCritical(env, ja, JNI_FALSE); + jint * ipiv = (*env)->GetPrimitiveArrayCritical(env, jipiv, JNI_FALSE); + jint returnValue; + + returnValue = LAPACKE_zgetri(order, n, (MKL_Complex16 *)a, lda, ipiv); + + (*env)->ReleasePrimitiveArrayCritical(env, jipiv, ipiv, 0); + (*env)->ReleasePrimitiveArrayCritical(env, ja, a, 0); + return returnValue; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_dgetrs +(JNIEnv * env, jobject calling_obj, jint order, jstring j_transa, jint n, jint nrhs, jdoubleArray j_a, jint lda, + jintArray j_ipiv, jdoubleArray j_b, int ldb){ + char * transa = (char *)(*env)->GetStringUTFChars(env, j_transa, JNI_FALSE); + jdouble * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jint * ipiv = (*env)->GetPrimitiveArrayCritical(env, j_ipiv, JNI_FALSE); + jdouble * b = (*env)->GetPrimitiveArrayCritical(env, j_b, JNI_FALSE); + jint returnValue; + + returnValue = LAPACKE_dgetrs(order, *transa, n, nrhs, a, lda, ipiv, b, ldb); + + (*env)->ReleasePrimitiveArrayCritical(env, j_b, b, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_ipiv, ipiv, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleaseStringUTFChars(env, j_transa, transa); + return returnValue; +} + + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_sgetrs +(JNIEnv * env, jobject calling_obj, jint order, jstring j_transa, jint n, jint nrhs, jfloatArray j_a, jint lda, + jintArray j_ipiv, jfloatArray j_b, int ldb){ + char * transa = (char *)(*env)->GetStringUTFChars(env, j_transa, JNI_FALSE); + jfloat * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jint * ipiv = (*env)->GetPrimitiveArrayCritical(env, j_ipiv, JNI_FALSE); + jfloat * b = (*env)->GetPrimitiveArrayCritical(env, j_b, JNI_FALSE); + jint returnValue; + + returnValue = LAPACKE_sgetrs(order, *transa, n, nrhs, a, lda, ipiv, b, ldb); + + (*env)->ReleasePrimitiveArrayCritical(env, j_b, b, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_ipiv, ipiv, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleaseStringUTFChars(env, j_transa, transa); + return returnValue; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_cgetrs +(JNIEnv * env, jobject calling_obj, jint order, jstring j_transa, jint n, jint nrhs, jfloatArray j_a, jint lda, + jintArray j_ipiv, jfloatArray j_b, int ldb){ + char * transa = (char *)(*env)->GetStringUTFChars(env, j_transa, JNI_FALSE); + jfloat * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jint * ipiv = (*env)->GetPrimitiveArrayCritical(env, j_ipiv, JNI_FALSE); + jfloat * b = (*env)->GetPrimitiveArrayCritical(env, j_b, JNI_FALSE); + jint returnValue; + + returnValue = LAPACKE_cgetrs(order, *transa, n, nrhs, (MKL_Complex8 *)a, lda, ipiv, (MKL_Complex8 *)b, ldb); + + (*env)->ReleasePrimitiveArrayCritical(env, j_b, b, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_ipiv, ipiv, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleaseStringUTFChars(env, j_transa, transa); + return returnValue; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_zgetrs +(JNIEnv * env, jobject calling_obj, jint order, jstring j_transa, jint n, jint nrhs, jdoubleArray j_a, jint lda, + jintArray j_ipiv, jdoubleArray j_b, int ldb){ + char * transa = (char *)(*env)->GetStringUTFChars(env, j_transa, JNI_FALSE); + jdouble * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jint * ipiv = (*env)->GetPrimitiveArrayCritical(env, j_ipiv, JNI_FALSE); + jdouble * b = (*env)->GetPrimitiveArrayCritical(env, j_b, JNI_FALSE); + jint returnValue; + + returnValue = LAPACKE_zgetrs(order, *transa, n, nrhs, (MKL_Complex16 *)a, lda, ipiv, (MKL_Complex16 *)b, ldb); + + (*env)->ReleasePrimitiveArrayCritical(env, j_b, b, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_ipiv, ipiv, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleaseStringUTFChars(env, j_transa, transa); + return returnValue; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_dtrtrs +(JNIEnv * env, jobject calling_obj, jint order, jstring j_meta, jint n, jint nrhs, jdoubleArray j_a, jint lda, + jdoubleArray j_b, int ldb){ + char * meta = (char *)(*env)->GetStringUTFChars(env, j_meta, JNI_FALSE); + jdouble * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jdouble * b = (*env)->GetPrimitiveArrayCritical(env, j_b, JNI_FALSE); + jint returnValue; + + returnValue = LAPACKE_dtrtrs(order, meta[0], meta[1], meta[2], n, nrhs, a, lda, b, ldb); + + (*env)->ReleasePrimitiveArrayCritical(env, j_b, b, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleaseStringUTFChars(env, j_meta, meta); + return returnValue; +} + + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_strtrs +(JNIEnv * env, jobject calling_obj, jint order, jstring j_meta, jint n, jint nrhs, jfloatArray j_a, jint lda, + jfloatArray j_b, int ldb){ + char * meta = (char *)(*env)->GetStringUTFChars(env, j_meta, JNI_FALSE); + jfloat * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jfloat * b = (*env)->GetPrimitiveArrayCritical(env, j_b, JNI_FALSE); + jint returnValue; + + returnValue = LAPACKE_strtrs(order, meta[0], meta[1], meta[2], n, nrhs, a, lda, b, ldb); + + (*env)->ReleasePrimitiveArrayCritical(env, j_b, b, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleaseStringUTFChars(env, j_meta, meta); + return returnValue; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_ctrtrs +(JNIEnv * env, jobject calling_obj, jint order, jstring j_meta, jint n, jint nrhs, jfloatArray j_a, jint lda, + jfloatArray j_b, int ldb){ + char * meta = (char *)(*env)->GetStringUTFChars(env, j_meta, JNI_FALSE); + jfloat * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jfloat * b = (*env)->GetPrimitiveArrayCritical(env, j_b, JNI_FALSE); + jint returnValue; + + returnValue = LAPACKE_ctrtrs(order, meta[0], meta[1], meta[2], n, nrhs, (MKL_Complex8 *)a, lda, (MKL_Complex8 *)b, ldb); + + (*env)->ReleasePrimitiveArrayCritical(env, j_b, b, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleaseStringUTFChars(env, j_meta, meta); + return returnValue; +} + + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_ztrtrs +(JNIEnv * env, jobject calling_obj, jint order, jstring j_meta, jint n, jint nrhs, jdoubleArray j_a, jint lda, + jdoubleArray j_b, int ldb){ + char * meta = (char *)(*env)->GetStringUTFChars(env, j_meta, JNI_FALSE); + jdouble * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jdouble * b = (*env)->GetPrimitiveArrayCritical(env, j_b, JNI_FALSE); + jint returnValue; + + returnValue = LAPACKE_ztrtrs(order, meta[0], meta[1], meta[2], n, nrhs, (MKL_Complex16 *)a, lda, (MKL_Complex16 *)b, ldb); + + (*env)->ReleasePrimitiveArrayCritical(env, j_b, b, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleaseStringUTFChars(env, j_meta, meta); + return returnValue; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_dsytrd +(JNIEnv * env, jobject calling_obj, jint order, jstring j_uplo, jint n, jdoubleArray j_a, jint lda, + jdoubleArray j_d, jdoubleArray j_e, jdoubleArray j_tau) { + char * uplo = (char *)(*env)->GetStringUTFChars(env, j_uplo, JNI_FALSE); + jdouble * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jdouble * d = (*env)->GetPrimitiveArrayCritical(env, j_d, JNI_FALSE); + jdouble * e = (*env)->GetPrimitiveArrayCritical(env, j_e, JNI_FALSE); + jdouble * tau = (*env)->GetPrimitiveArrayCritical(env, j_tau, JNI_FALSE); + jint returnValue; + + returnValue = LAPACKE_dsytrd(order, *uplo, n, a, lda, d, e, tau); + + (*env)->ReleasePrimitiveArrayCritical(env, j_tau, tau, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_e, e, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_d, d, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleaseStringUTFChars(env, j_uplo, uplo); + + return returnValue; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_ssytrd +(JNIEnv * env, jobject calling_obj, jint order, jstring j_uplo, jint n, jfloatArray j_a, jint lda, + jfloatArray j_d, jfloatArray j_e, jfloatArray j_tau) { + char * uplo = (char *)(*env)->GetStringUTFChars(env, j_uplo, JNI_FALSE); + jfloat * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jfloat * d = (*env)->GetPrimitiveArrayCritical(env, j_d, JNI_FALSE); + jfloat * e = (*env)->GetPrimitiveArrayCritical(env, j_e, JNI_FALSE); + jfloat * tau = (*env)->GetPrimitiveArrayCritical(env, j_tau, JNI_FALSE); + jint returnValue; + + returnValue = LAPACKE_ssytrd(order, *uplo, n, a, lda, d, e, tau); + + (*env)->ReleasePrimitiveArrayCritical(env, j_tau, tau, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_e, e, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_d, d, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleaseStringUTFChars(env, j_uplo, uplo); + + return returnValue; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_dorgtr +(JNIEnv * env, jobject calling_obj, jint order, jstring j_uplo, jint n, + jdoubleArray j_a, jint lda, jdoubleArray j_tau) { + char * uplo = (char *)(*env)->GetStringUTFChars(env, j_uplo, JNI_FALSE); + jdouble * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jdouble * tau = (*env)->GetPrimitiveArrayCritical(env, j_tau, JNI_FALSE); + jint returnValue; + + returnValue = LAPACKE_dorgtr(order, *uplo, n, a, lda, tau); + + (*env)->ReleasePrimitiveArrayCritical(env, j_tau, tau, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleaseStringUTFChars(env, j_uplo, uplo); + + return returnValue; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_sorgtr +(JNIEnv * env, jobject calling_obj, jint order, jstring j_uplo, jint n, + jfloatArray j_a, jint lda, jfloatArray j_tau) { + char * uplo = (char *)(*env)->GetStringUTFChars(env, j_uplo, JNI_FALSE); + jfloat * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jfloat * tau = (*env)->GetPrimitiveArrayCritical(env, j_tau, JNI_FALSE); + jint returnValue; + + returnValue = LAPACKE_sorgtr(order, *uplo, n, a, lda, tau); + + (*env)->ReleasePrimitiveArrayCritical(env, j_tau, tau, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleaseStringUTFChars(env, j_uplo, uplo); + + return returnValue; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_dsteqr +(JNIEnv * env, jobject calling_obj, jint order, jstring j_compz, jint n, + jdoubleArray j_d, jdoubleArray j_e, jdoubleArray j_z, int ldz) { + char * compz = (char *)(*env)->GetStringUTFChars(env, j_compz, JNI_FALSE); + jdouble * d = (*env)->GetPrimitiveArrayCritical(env, j_d, JNI_FALSE); + jdouble * e = (*env)->GetPrimitiveArrayCritical(env, j_e, JNI_FALSE); + jdouble * z = (*env)->GetPrimitiveArrayCritical(env, j_z, JNI_FALSE); + jint returnValue; + + returnValue = LAPACKE_dsteqr(order, *compz, n, d, e, z, ldz); + + (*env)->ReleasePrimitiveArrayCritical(env, j_z, z, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_e, e, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_d, d, 0); + (*env)->ReleaseStringUTFChars(env, j_compz, compz); + + return returnValue; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_ssteqr +(JNIEnv * env, jobject calling_obj, jint order, jstring j_compz, jint n, +jfloatArray j_d, jfloatArray j_e, jfloatArray j_z, int ldz) { + char * compz = (char *)(*env)->GetStringUTFChars(env, j_compz, JNI_FALSE); + jfloat * d = (*env)->GetPrimitiveArrayCritical(env, j_d, JNI_FALSE); + jfloat * e = (*env)->GetPrimitiveArrayCritical(env, j_e, JNI_FALSE); + jfloat * z = (*env)->GetPrimitiveArrayCritical(env, j_z, JNI_FALSE); + jint returnValue; + + returnValue = LAPACKE_ssteqr(order, *compz, n, d, e, z, ldz); + + (*env)->ReleasePrimitiveArrayCritical(env, j_z, z, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_e, e, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_d, d, 0); + (*env)->ReleaseStringUTFChars(env, j_compz, compz); + + return returnValue; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_csteqr +(JNIEnv * env, jobject calling_obj, jint order, jstring j_compz, jint n, +jfloatArray j_d, jfloatArray j_e, jfloatArray j_z, int ldz) { + char * compz = (char *)(*env)->GetStringUTFChars(env, j_compz, JNI_FALSE); + jfloat * d = (*env)->GetPrimitiveArrayCritical(env, j_d, JNI_FALSE); + jfloat * e = (*env)->GetPrimitiveArrayCritical(env, j_e, JNI_FALSE); + jfloat * z = (*env)->GetPrimitiveArrayCritical(env, j_z, JNI_FALSE); + jint returnValue; + + returnValue = LAPACKE_csteqr(order, *compz, n, d, e, (MKL_Complex8 *)z, ldz); + + (*env)->ReleasePrimitiveArrayCritical(env, j_z, z, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_e, e, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_d, d, 0); + (*env)->ReleaseStringUTFChars(env, j_compz, compz); + + return returnValue; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_zsteqr +(JNIEnv * env, jobject calling_obj, jint order, jstring j_compz, jint n, +jdoubleArray j_d, jdoubleArray j_e, jdoubleArray j_z, int ldz) { + char * compz = (char *)(*env)->GetStringUTFChars(env, j_compz, JNI_FALSE); + jdouble * d = (*env)->GetPrimitiveArrayCritical(env, j_d, JNI_FALSE); + jdouble * e = (*env)->GetPrimitiveArrayCritical(env, j_e, JNI_FALSE); + jdouble * z = (*env)->GetPrimitiveArrayCritical(env, j_z, JNI_FALSE); + jint returnValue; + + returnValue = LAPACKE_zsteqr(order, *compz, n, d, e, (MKL_Complex16 *)z, ldz); + + (*env)->ReleasePrimitiveArrayCritical(env, j_z, z, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_e, e, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_d, d, 0); + (*env)->ReleaseStringUTFChars(env, j_compz, compz); + + return returnValue; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_dstedc +(JNIEnv * env, jobject calling_obj, jint order, jstring j_compz, jint n, + jdoubleArray j_d, jdoubleArray j_e, jdoubleArray j_z, int ldz) { + char * compz = (char *)(*env)->GetStringUTFChars(env, j_compz, JNI_FALSE); + jdouble * d = (*env)->GetPrimitiveArrayCritical(env, j_d, JNI_FALSE); + jdouble * e = (*env)->GetPrimitiveArrayCritical(env, j_e, JNI_FALSE); + jdouble * z = (*env)->GetPrimitiveArrayCritical(env, j_z, JNI_FALSE); + jint returnValue; + + returnValue = LAPACKE_dstedc(order, *compz, n, d, e, z, ldz); + + (*env)->ReleasePrimitiveArrayCritical(env, j_z, z, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_e, e, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_d, d, 0); + (*env)->ReleaseStringUTFChars(env, j_compz, compz); + + return returnValue; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_dsyevd +(JNIEnv * env, jobject calling_obj, jint order, jstring j_compz, jstring j_uplo, jint n, + jdoubleArray j_a, int lda, jdoubleArray j_w) { + char * compz = (char *)(*env)->GetStringUTFChars(env, j_compz, JNI_FALSE); + char * uplo = (char *)(*env)->GetStringUTFChars(env, j_uplo, JNI_FALSE); + jdouble * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jdouble * w = (*env)->GetPrimitiveArrayCritical(env, j_w, JNI_FALSE); + jint returnValue; + + returnValue = LAPACKE_dsyevd(order, *compz, *uplo, n, a, lda, w); + + (*env)->ReleasePrimitiveArrayCritical(env, j_w, w, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleaseStringUTFChars(env, j_uplo, uplo); + (*env)->ReleaseStringUTFChars(env, j_compz, compz); + + return returnValue; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_ssyevd +(JNIEnv * env, jobject calling_obj, jint order, jstring j_compz, jstring j_uplo, jint n, + jfloatArray j_a, int lda, jfloatArray j_w) { + char * compz = (char *)(*env)->GetStringUTFChars(env, j_compz, JNI_FALSE); + char * uplo = (char *)(*env)->GetStringUTFChars(env, j_uplo, JNI_FALSE); + jfloat * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jfloat * w = (*env)->GetPrimitiveArrayCritical(env, j_w, JNI_FALSE); + jint returnValue; + + returnValue = LAPACKE_ssyevd(order, *compz, *uplo, n, a, lda, w); + + (*env)->ReleasePrimitiveArrayCritical(env, j_w, w, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleaseStringUTFChars(env, j_uplo, uplo); + (*env)->ReleaseStringUTFChars(env, j_compz, compz); + + return returnValue; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_dpotrf +(JNIEnv * env, jobject calling_obj, jint order, jstring j_uplo, jint n, jdoubleArray j_a, jint lda) { + char * uplo = (char *)(*env)->GetStringUTFChars(env, j_uplo, JNI_FALSE); + jdouble * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jint returnValue; + + returnValue = LAPACKE_dpotrf(order, *uplo, n, a, lda); + + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleaseStringUTFChars(env, j_uplo, uplo); + + return returnValue; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_spotrf +(JNIEnv * env, jobject calling_obj, jint order, jstring j_uplo, jint n, jfloatArray j_a, jint lda) { + char * uplo = (char *)(*env)->GetStringUTFChars(env, j_uplo, JNI_FALSE); + jfloat * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jint returnValue; + + returnValue = LAPACKE_spotrf(order, *uplo, n, a, lda); + + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleaseStringUTFChars(env, j_uplo, uplo); + + return returnValue; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_cpotrf +(JNIEnv * env, jobject calling_obj, jint order, jstring j_uplo, jint n, jfloatArray j_a, jint lda) { + char * uplo = (char *)(*env)->GetStringUTFChars(env, j_uplo, JNI_FALSE); + jfloat * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jint returnValue; + + returnValue = LAPACKE_cpotrf(order, *uplo, n, (MKL_Complex8 *)a, lda); + + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleaseStringUTFChars(env, j_uplo, uplo); + + return returnValue; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_zpotrf +(JNIEnv * env, jobject calling_obj, jint order, jstring j_uplo, jint n, jdoubleArray j_a, jint lda) { + char * uplo = (char *)(*env)->GetStringUTFChars(env, j_uplo, JNI_FALSE); + jdouble * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jint returnValue; + + returnValue = LAPACKE_zpotrf(order, *uplo, n, (MKL_Complex16 *)a, lda); + + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleaseStringUTFChars(env, j_uplo, uplo); + + return returnValue; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_sgebal +(JNIEnv * env, jobject calling_obj, jint matrix_order, jstring j_job, jint n, jfloatArray j_a, jint lda, jintArray j_ilo, jintArray j_ihi, jfloatArray j_scale) { + + char * job = (char *)(*env)->GetStringUTFChars(env, j_job, JNI_FALSE); + jfloat * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jint * ilo = (*env)->GetPrimitiveArrayCritical(env, j_ilo, JNI_FALSE); + jint * ihi = (*env)->GetPrimitiveArrayCritical(env, j_ihi, JNI_FALSE); + jfloat * scale = (*env)->GetPrimitiveArrayCritical(env, j_scale, JNI_FALSE); + + jint retval = LAPACKE_sgebal(matrix_order, *job, n, a, lda, ilo, ihi, scale); + + (*env)->ReleaseStringUTFChars(env, j_job, job); + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_ilo, ilo, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_ihi, ihi, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_scale, scale, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_dgebal +(JNIEnv * env, jobject calling_obj, jint matrix_order, jstring j_job, jint n, jdoubleArray j_a, jint lda, jintArray j_ilo, jintArray j_ihi, jdoubleArray j_scale) { + + char * job = (char *)(*env)->GetStringUTFChars(env, j_job, JNI_FALSE); + jdouble * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jint * ilo = (*env)->GetPrimitiveArrayCritical(env, j_ilo, JNI_FALSE); + jint * ihi = (*env)->GetPrimitiveArrayCritical(env, j_ihi, JNI_FALSE); + jdouble * scale = (*env)->GetPrimitiveArrayCritical(env, j_scale, JNI_FALSE); + + jint retval = LAPACKE_dgebal(matrix_order, *job, n, a, lda, ilo, ihi, scale); + + (*env)->ReleaseStringUTFChars(env, j_job, job); + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_ilo, ilo, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_ihi, ihi, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_scale, scale, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_cgebal +(JNIEnv * env, jobject calling_obj, jint matrix_order, jstring j_job, jint n, jfloatArray j_a, jint lda, jintArray j_ilo, jintArray j_ihi, jfloatArray j_scale) { + + char * job = (char *)(*env)->GetStringUTFChars(env, j_job, JNI_FALSE); + jfloat * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jint * ilo = (*env)->GetPrimitiveArrayCritical(env, j_ilo, JNI_FALSE); + jint * ihi = (*env)->GetPrimitiveArrayCritical(env, j_ihi, JNI_FALSE); + jfloat * scale = (*env)->GetPrimitiveArrayCritical(env, j_scale, JNI_FALSE); + + jint retval = LAPACKE_cgebal(matrix_order, *job, n, (lapack_complex_float *)a, lda, ilo, ihi, scale); + + (*env)->ReleaseStringUTFChars(env, j_job, job); + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_ilo, ilo, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_ihi, ihi, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_scale, scale, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_zgebal +(JNIEnv * env, jobject calling_obj, jint matrix_order, jstring j_job, jint n, jdoubleArray j_a, jint lda, jintArray j_ilo, jintArray j_ihi, jdoubleArray j_scale) { + + char * job = (char *)(*env)->GetStringUTFChars(env, j_job, JNI_FALSE); + jdouble * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jint * ilo = (*env)->GetPrimitiveArrayCritical(env, j_ilo, JNI_FALSE); + jint * ihi = (*env)->GetPrimitiveArrayCritical(env, j_ihi, JNI_FALSE); + jdouble * scale = (*env)->GetPrimitiveArrayCritical(env, j_scale, JNI_FALSE); + + jint retval = LAPACKE_zgebal(matrix_order, *job, n, (lapack_complex_double *)a, lda, ilo, ihi, scale); + + (*env)->ReleaseStringUTFChars(env, j_job, job); + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_ilo, ilo, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_ihi, ihi, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_scale, scale, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_cunghr +(JNIEnv * env, jobject calling_obj, jint matrix_order, jint n, jint ilo, jint ihi, jfloatArray j_a, jint lda, jfloatArray j_tau) { + + jfloat * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jfloat * tau = (*env)->GetPrimitiveArrayCritical(env, j_tau, JNI_FALSE); + + jint retval = LAPACKE_cunghr(matrix_order, n, ilo, ihi, (lapack_complex_float *)a, lda, (lapack_complex_float *)tau); + + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_tau, tau, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_zunghr +(JNIEnv * env, jobject calling_obj, jint matrix_order, jint n, jint ilo, jint ihi, jdoubleArray j_a, jint lda, jdoubleArray j_tau) { + + jdouble * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jdouble * tau = (*env)->GetPrimitiveArrayCritical(env, j_tau, JNI_FALSE); + + jint retval = LAPACKE_zunghr(matrix_order, n, ilo, ihi, (lapack_complex_double *)a, lda, (lapack_complex_double *)tau); + + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_tau, tau, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_strevc +(JNIEnv * env, jobject calling_obj, jint matrix_order, jstring j_side, jstring j_howmny, jintArray j_select, jint n, jfloatArray j_t, jint ldt, jfloatArray j_vl, jint ldvl, jfloatArray j_vr, jint ldvr, jint mm, jintArray j_m) { + + char * side = (char *)(*env)->GetStringUTFChars(env, j_side, JNI_FALSE); + char * howmny = (char *)(*env)->GetStringUTFChars(env, j_howmny, JNI_FALSE); + jint * select = (*env)->GetPrimitiveArrayCritical(env, j_select, JNI_FALSE); + jfloat * t = (*env)->GetPrimitiveArrayCritical(env, j_t, JNI_FALSE); + jfloat * vl = (*env)->GetPrimitiveArrayCritical(env, j_vl, JNI_FALSE); + jfloat * vr = (*env)->GetPrimitiveArrayCritical(env, j_vr, JNI_FALSE); + jint * m = (*env)->GetPrimitiveArrayCritical(env, j_m, JNI_FALSE); + + jint retval = LAPACKE_strevc(matrix_order, *side, *howmny, (lapack_logical *)select, n, t, ldt, vl, ldvl, vr, ldvr, mm, m); + + (*env)->ReleaseStringUTFChars(env, j_side, side); + (*env)->ReleaseStringUTFChars(env, j_howmny, howmny); + (*env)->ReleasePrimitiveArrayCritical(env, j_select, select, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_t, t, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_vl, vl, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_vr, vr, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_m, m, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_dtrevc +(JNIEnv * env, jobject calling_obj, jint matrix_order, jstring j_side, jstring j_howmny, jintArray j_select, jint n, jdoubleArray j_t, jint ldt, jdoubleArray j_vl, jint ldvl, jdoubleArray j_vr, jint ldvr, jint mm, jintArray j_m) { + + char * side = (char *)(*env)->GetStringUTFChars(env, j_side, JNI_FALSE); + char * howmny = (char *)(*env)->GetStringUTFChars(env, j_howmny, JNI_FALSE); + jint * select = (*env)->GetPrimitiveArrayCritical(env, j_select, JNI_FALSE); + jdouble * t = (*env)->GetPrimitiveArrayCritical(env, j_t, JNI_FALSE); + jdouble * vl = (*env)->GetPrimitiveArrayCritical(env, j_vl, JNI_FALSE); + jdouble * vr = (*env)->GetPrimitiveArrayCritical(env, j_vr, JNI_FALSE); + jint * m = (*env)->GetPrimitiveArrayCritical(env, j_m, JNI_FALSE); + + jint retval = LAPACKE_dtrevc(matrix_order, *side, *howmny, (lapack_logical *)select, n, t, ldt, vl, ldvl, vr, ldvr, mm, m); + + (*env)->ReleaseStringUTFChars(env, j_side, side); + (*env)->ReleaseStringUTFChars(env, j_howmny, howmny); + (*env)->ReleasePrimitiveArrayCritical(env, j_select, select, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_t, t, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_vl, vl, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_vr, vr, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_m, m, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_ctrevc +(JNIEnv * env, jobject calling_obj, jint matrix_order, jstring j_side, jstring j_howmny, jintArray j_select, jint n, jfloatArray j_t, jint ldt, jfloatArray j_vl, jint ldvl, jfloatArray j_vr, jint ldvr, jint mm, jintArray j_m) { + + char * side = (char *)(*env)->GetStringUTFChars(env, j_side, JNI_FALSE); + char * howmny = (char *)(*env)->GetStringUTFChars(env, j_howmny, JNI_FALSE); + jint * select = (*env)->GetPrimitiveArrayCritical(env, j_select, JNI_FALSE); + jfloat * t = (*env)->GetPrimitiveArrayCritical(env, j_t, JNI_FALSE); + jfloat * vl = (*env)->GetPrimitiveArrayCritical(env, j_vl, JNI_FALSE); + jfloat * vr = (*env)->GetPrimitiveArrayCritical(env, j_vr, JNI_FALSE); + jint * m = (*env)->GetPrimitiveArrayCritical(env, j_m, JNI_FALSE); + + jint retval = LAPACKE_ctrevc(matrix_order, *side, *howmny, (lapack_logical *)select, n, (lapack_complex_float *)t, ldt, (lapack_complex_float *)vl, ldvl, (lapack_complex_float *)vr, ldvr, mm, m); + + (*env)->ReleaseStringUTFChars(env, j_side, side); + (*env)->ReleaseStringUTFChars(env, j_howmny, howmny); + (*env)->ReleasePrimitiveArrayCritical(env, j_select, select, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_t, t, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_vl, vl, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_vr, vr, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_m, m, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_ztrevc +(JNIEnv * env, jobject calling_obj, jint matrix_order, jstring j_side, jstring j_howmny, jintArray j_select, jint n, jdoubleArray j_t, jint ldt, jdoubleArray j_vl, jint ldvl, jdoubleArray j_vr, jint ldvr, jint mm, jintArray j_m) { + + char * side = (char *)(*env)->GetStringUTFChars(env, j_side, JNI_FALSE); + char * howmny = (char *)(*env)->GetStringUTFChars(env, j_howmny, JNI_FALSE); + jint * select = (*env)->GetPrimitiveArrayCritical(env, j_select, JNI_FALSE); + jdouble * t = (*env)->GetPrimitiveArrayCritical(env, j_t, JNI_FALSE); + jdouble * vl = (*env)->GetPrimitiveArrayCritical(env, j_vl, JNI_FALSE); + jdouble * vr = (*env)->GetPrimitiveArrayCritical(env, j_vr, JNI_FALSE); + jint * m = (*env)->GetPrimitiveArrayCritical(env, j_m, JNI_FALSE); + + jint retval = LAPACKE_ztrevc(matrix_order, *side, *howmny, (lapack_logical *)select, n, (lapack_complex_double *)t, ldt, (lapack_complex_double *)vl, ldvl, (lapack_complex_double *)vr, ldvr, mm, m); + + (*env)->ReleaseStringUTFChars(env, j_side, side); + (*env)->ReleaseStringUTFChars(env, j_howmny, howmny); + (*env)->ReleasePrimitiveArrayCritical(env, j_select, select, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_t, t, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_vl, vl, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_vr, vr, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_m, m, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_sgehrd +(JNIEnv * env, jobject calling_obj, jint matrix_order, jint n, jint ilo, jint ihi, jfloatArray j_a, jint lda, jfloatArray j_tau) { + + jfloat * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jfloat * tau = (*env)->GetPrimitiveArrayCritical(env, j_tau, JNI_FALSE); + + jint retval = LAPACKE_sgehrd(matrix_order, n, ilo, ihi, a, lda, tau); + + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_tau, tau, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_dgehrd +(JNIEnv * env, jobject calling_obj, jint matrix_order, jint n, jint ilo, jint ihi, jdoubleArray j_a, jint lda, jdoubleArray j_tau) { + + jdouble * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jdouble * tau = (*env)->GetPrimitiveArrayCritical(env, j_tau, JNI_FALSE); + + jint retval = LAPACKE_dgehrd(matrix_order, n, ilo, ihi, a, lda, tau); + + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_tau, tau, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_cgehrd +(JNIEnv * env, jobject calling_obj, jint matrix_order, jint n, jint ilo, jint ihi, jfloatArray j_a, jint lda, jfloatArray j_tau) { + + jfloat * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jfloat * tau = (*env)->GetPrimitiveArrayCritical(env, j_tau, JNI_FALSE); + + jint retval = LAPACKE_cgehrd(matrix_order, n, ilo, ihi, (lapack_complex_float *)a, lda, (lapack_complex_float *)tau); + + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_tau, tau, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_zgehrd +(JNIEnv * env, jobject calling_obj, jint matrix_order, jint n, jint ilo, jint ihi, jdoubleArray j_a, jint lda, jdoubleArray j_tau) { + + jdouble * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jdouble * tau = (*env)->GetPrimitiveArrayCritical(env, j_tau, JNI_FALSE); + + jint retval = LAPACKE_zgehrd(matrix_order, n, ilo, ihi, (lapack_complex_double *)a, lda, (lapack_complex_double *)tau); + + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_tau, tau, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_shseqr +(JNIEnv * env, jobject calling_obj, jint matrix_order, jstring j_job, jstring j_compz, jint n, jint ilo, jint ihi, jfloatArray j_h, jint ldh, jfloatArray j_wr, jfloatArray j_wi, jfloatArray j_z, jint ldz) { + + char * job = (char *)(*env)->GetStringUTFChars(env, j_job, JNI_FALSE); + char * compz = (char *)(*env)->GetStringUTFChars(env, j_compz, JNI_FALSE); + jfloat * h = (*env)->GetPrimitiveArrayCritical(env, j_h, JNI_FALSE); + jfloat * wr = (*env)->GetPrimitiveArrayCritical(env, j_wr, JNI_FALSE); + jfloat * wi = (*env)->GetPrimitiveArrayCritical(env, j_wi, JNI_FALSE); + jfloat * z = (*env)->GetPrimitiveArrayCritical(env, j_z, JNI_FALSE); + + jint retval = LAPACKE_shseqr(matrix_order, *job, *compz, n, ilo, ihi, h, ldh, wr, wi, z, ldz); + + (*env)->ReleaseStringUTFChars(env, j_job, job); + (*env)->ReleaseStringUTFChars(env, j_compz, compz); + (*env)->ReleasePrimitiveArrayCritical(env, j_h, h, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_wr, wr, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_wi, wi, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_z, z, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_dhseqr +(JNIEnv * env, jobject calling_obj, jint matrix_order, jstring j_job, jstring j_compz, jint n, jint ilo, jint ihi, jdoubleArray j_h, jint ldh, jdoubleArray j_wr, jdoubleArray j_wi, jdoubleArray j_z, jint ldz) { + + char * job = (char *)(*env)->GetStringUTFChars(env, j_job, JNI_FALSE); + char * compz = (char *)(*env)->GetStringUTFChars(env, j_compz, JNI_FALSE); + jdouble * h = (*env)->GetPrimitiveArrayCritical(env, j_h, JNI_FALSE); + jdouble * wr = (*env)->GetPrimitiveArrayCritical(env, j_wr, JNI_FALSE); + jdouble * wi = (*env)->GetPrimitiveArrayCritical(env, j_wi, JNI_FALSE); + jdouble * z = (*env)->GetPrimitiveArrayCritical(env, j_z, JNI_FALSE); + + jint retval = LAPACKE_dhseqr(matrix_order, *job, *compz, n, ilo, ihi, h, ldh, wr, wi, z, ldz); + + (*env)->ReleaseStringUTFChars(env, j_job, job); + (*env)->ReleaseStringUTFChars(env, j_compz, compz); + (*env)->ReleasePrimitiveArrayCritical(env, j_h, h, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_wr, wr, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_wi, wi, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_z, z, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_chseqr +(JNIEnv * env, jobject calling_obj, jint matrix_order, jstring j_job, jstring j_compz, jint n, jint ilo, jint ihi, jfloatArray j_h, jint ldh, jfloatArray j_w, jfloatArray j_z, jint ldz) { + + char * job = (char *)(*env)->GetStringUTFChars(env, j_job, JNI_FALSE); + char * compz = (char *)(*env)->GetStringUTFChars(env, j_compz, JNI_FALSE); + jfloat * h = (*env)->GetPrimitiveArrayCritical(env, j_h, JNI_FALSE); + jfloat * w = (*env)->GetPrimitiveArrayCritical(env, j_w, JNI_FALSE); + jfloat * z = (*env)->GetPrimitiveArrayCritical(env, j_z, JNI_FALSE); + + jint retval = LAPACKE_chseqr(matrix_order, *job, *compz, n, ilo, ihi, (lapack_complex_float *)h, ldh, (lapack_complex_float *)w, (lapack_complex_float *)z, ldz); + + (*env)->ReleaseStringUTFChars(env, j_job, job); + (*env)->ReleaseStringUTFChars(env, j_compz, compz); + (*env)->ReleasePrimitiveArrayCritical(env, j_h, h, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_w, w, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_z, z, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_zhseqr +(JNIEnv * env, jobject calling_obj, jint matrix_order, jstring j_job, jstring j_compz, jint n, jint ilo, jint ihi, jdoubleArray j_h, jint ldh, jdoubleArray j_w, jdoubleArray j_z, jint ldz) { + + char * job = (char *)(*env)->GetStringUTFChars(env, j_job, JNI_FALSE); + char * compz = (char *)(*env)->GetStringUTFChars(env, j_compz, JNI_FALSE); + jdouble * h = (*env)->GetPrimitiveArrayCritical(env, j_h, JNI_FALSE); + jdouble * w = (*env)->GetPrimitiveArrayCritical(env, j_w, JNI_FALSE); + jdouble * z = (*env)->GetPrimitiveArrayCritical(env, j_z, JNI_FALSE); + + jint retval = LAPACKE_zhseqr(matrix_order, *job, *compz, n, ilo, ihi, (lapack_complex_double *)h, ldh, (lapack_complex_double *)w, (lapack_complex_double *)z, ldz); + + (*env)->ReleaseStringUTFChars(env, j_job, job); + (*env)->ReleaseStringUTFChars(env, j_compz, compz); + (*env)->ReleasePrimitiveArrayCritical(env, j_h, h, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_w, w, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_z, z, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_sgebak +(JNIEnv * env, jobject calling_obj, jint matrix_order, jstring j_job, jstring j_side, jint n, jint ilo, jint ihi, jfloatArray j_scale, jint m, jfloatArray j_v, jint ldv) { + + char * job = (char *)(*env)->GetStringUTFChars(env, j_job, JNI_FALSE); + char * side = (char *)(*env)->GetStringUTFChars(env, j_side, JNI_FALSE); + jfloat * scale = (*env)->GetPrimitiveArrayCritical(env, j_scale, JNI_FALSE); + jfloat * v = (*env)->GetPrimitiveArrayCritical(env, j_v, JNI_FALSE); + + jint retval = LAPACKE_sgebak(matrix_order, *job, *side, n, ilo, ihi, scale, m, v, ldv); + + (*env)->ReleaseStringUTFChars(env, j_job, job); + (*env)->ReleaseStringUTFChars(env, j_side, side); + (*env)->ReleasePrimitiveArrayCritical(env, j_scale, scale, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_v, v, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_dgebak +(JNIEnv * env, jobject calling_obj, jint matrix_order, jstring j_job, jstring j_side, jint n, jint ilo, jint ihi, jdoubleArray j_scale, jint m, jdoubleArray j_v, jint ldv) { + + char * job = (char *)(*env)->GetStringUTFChars(env, j_job, JNI_FALSE); + char * side = (char *)(*env)->GetStringUTFChars(env, j_side, JNI_FALSE); + jdouble * scale = (*env)->GetPrimitiveArrayCritical(env, j_scale, JNI_FALSE); + jdouble * v = (*env)->GetPrimitiveArrayCritical(env, j_v, JNI_FALSE); + + jint retval = LAPACKE_dgebak(matrix_order, *job, *side, n, ilo, ihi, scale, m, v, ldv); + + (*env)->ReleaseStringUTFChars(env, j_job, job); + (*env)->ReleaseStringUTFChars(env, j_side, side); + (*env)->ReleasePrimitiveArrayCritical(env, j_scale, scale, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_v, v, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_cgebak +(JNIEnv * env, jobject calling_obj, jint matrix_order, jstring j_job, jstring j_side, jint n, jint ilo, jint ihi, jfloatArray j_scale, jint m, jfloatArray j_v, jint ldv) { + + char * job = (char *)(*env)->GetStringUTFChars(env, j_job, JNI_FALSE); + char * side = (char *)(*env)->GetStringUTFChars(env, j_side, JNI_FALSE); + jfloat * scale = (*env)->GetPrimitiveArrayCritical(env, j_scale, JNI_FALSE); + jfloat * v = (*env)->GetPrimitiveArrayCritical(env, j_v, JNI_FALSE); + + jint retval = LAPACKE_cgebak(matrix_order, *job, *side, n, ilo, ihi, scale, m, (lapack_complex_float *)v, ldv); + + (*env)->ReleaseStringUTFChars(env, j_job, job); + (*env)->ReleaseStringUTFChars(env, j_side, side); + (*env)->ReleasePrimitiveArrayCritical(env, j_scale, scale, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_v, v, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_zgebak +(JNIEnv * env, jobject calling_obj, jint matrix_order, jstring j_job, jstring j_side, jint n, jint ilo, jint ihi, jdoubleArray j_scale, jint m, jdoubleArray j_v, jint ldv) { + + char * job = (char *)(*env)->GetStringUTFChars(env, j_job, JNI_FALSE); + char * side = (char *)(*env)->GetStringUTFChars(env, j_side, JNI_FALSE); + jdouble * scale = (*env)->GetPrimitiveArrayCritical(env, j_scale, JNI_FALSE); + jdouble * v = (*env)->GetPrimitiveArrayCritical(env, j_v, JNI_FALSE); + + jint retval = LAPACKE_zgebak(matrix_order, *job, *side, n, ilo, ihi, scale, m, (lapack_complex_double *)v, ldv); + + (*env)->ReleaseStringUTFChars(env, j_job, job); + (*env)->ReleaseStringUTFChars(env, j_side, side); + (*env)->ReleasePrimitiveArrayCritical(env, j_scale, scale, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_v, v, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_sgeqrf +(JNIEnv * env, jobject calling_obj, jint matrix_order, jint m, jint n, jfloatArray j_a, jint lda, jfloatArray j_tau) { + + jfloat * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jfloat * tau = (*env)->GetPrimitiveArrayCritical(env, j_tau, JNI_FALSE); + + jint retval = LAPACKE_sgeqrf(matrix_order, m, n, a, lda, tau); + + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_tau, tau, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_dgeqrf +(JNIEnv * env, jobject calling_obj, jint matrix_order, jint m, jint n, jdoubleArray j_a, jint lda, jdoubleArray j_tau) { + + jdouble * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jdouble * tau = (*env)->GetPrimitiveArrayCritical(env, j_tau, JNI_FALSE); + + jint retval = LAPACKE_dgeqrf(matrix_order, m, n, a, lda, tau); + + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_tau, tau, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_cgeqrf +(JNIEnv * env, jobject calling_obj, jint matrix_order, jint m, jint n, jfloatArray j_a, jint lda, jfloatArray j_tau) { + + jfloat * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jfloat * tau = (*env)->GetPrimitiveArrayCritical(env, j_tau, JNI_FALSE); + + jint retval = LAPACKE_cgeqrf(matrix_order, m, n, (lapack_complex_float *)a, lda, (lapack_complex_float *)tau); + + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_tau, tau, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_zgeqrf +(JNIEnv * env, jobject calling_obj, jint matrix_order, jint m, jint n, jdoubleArray j_a, jint lda, jdoubleArray j_tau) { + + jdouble * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jdouble * tau = (*env)->GetPrimitiveArrayCritical(env, j_tau, JNI_FALSE); + + jint retval = LAPACKE_zgeqrf(matrix_order, m, n, (lapack_complex_double *)a, lda, (lapack_complex_double *)tau); + + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_tau, tau, 0); + return retval; +} + + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_sgeqp3 +(JNIEnv * env, jobject calling_obj, jint matrix_order, jint m, jint n, jfloatArray j_a, jint lda, jintArray j_jpvt, jfloatArray j_tau) { + + jfloat * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jint * jpvt = (*env)->GetPrimitiveArrayCritical(env, j_jpvt, JNI_FALSE); + jfloat * tau = (*env)->GetPrimitiveArrayCritical(env, j_tau, JNI_FALSE); + + jint retval = LAPACKE_sgeqp3(matrix_order, m, n, a, lda, jpvt, tau); + + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_jpvt, jpvt, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_tau, tau, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_dgeqp3 +(JNIEnv * env, jobject calling_obj, jint matrix_order, jint m, jint n, jdoubleArray j_a, jint lda, jintArray j_jpvt, jdoubleArray j_tau) { + + jdouble * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jint * jpvt = (*env)->GetPrimitiveArrayCritical(env, j_jpvt, JNI_FALSE); + jdouble * tau = (*env)->GetPrimitiveArrayCritical(env, j_tau, JNI_FALSE); + + jint retval = LAPACKE_dgeqp3(matrix_order, m, n, a, lda, jpvt, tau); + + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_jpvt, jpvt, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_tau, tau, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_cgeqp3 +(JNIEnv * env, jobject calling_obj, jint matrix_order, jint m, jint n, jfloatArray j_a, jint lda, jintArray j_jpvt, jfloatArray j_tau) { + + jfloat * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jint * jpvt = (*env)->GetPrimitiveArrayCritical(env, j_jpvt, JNI_FALSE); + jfloat * tau = (*env)->GetPrimitiveArrayCritical(env, j_tau, JNI_FALSE); + + jint retval = LAPACKE_cgeqp3(matrix_order, m, n, (lapack_complex_float *)a, lda, jpvt, (lapack_complex_float *)tau); + + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_jpvt, jpvt, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_tau, tau, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_zgeqp3 +(JNIEnv * env, jobject calling_obj, jint matrix_order, jint m, jint n, jdoubleArray j_a, jint lda, jintArray j_jpvt, jdoubleArray j_tau) { + + jdouble * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jint * jpvt = (*env)->GetPrimitiveArrayCritical(env, j_jpvt, JNI_FALSE); + jdouble * tau = (*env)->GetPrimitiveArrayCritical(env, j_tau, JNI_FALSE); + + jint retval = LAPACKE_zgeqp3(matrix_order, m, n, (lapack_complex_double *)a, lda, jpvt, (lapack_complex_double *)tau); + + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_jpvt, jpvt, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_tau, tau, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_sorgqr +(JNIEnv * env, jobject calling_obj, jint matrix_order, jint m, jint n, jint k, jfloatArray j_a, jint lda, jfloatArray j_tau) { + + jfloat * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jfloat * tau = (*env)->GetPrimitiveArrayCritical(env, j_tau, JNI_FALSE); + + jint retval = LAPACKE_sorgqr(matrix_order, m, n, k, a, lda, tau); + + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_tau, tau, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_dorgqr +(JNIEnv * env, jobject calling_obj, jint matrix_order, jint m, jint n, jint k, jdoubleArray j_a, jint lda, jdoubleArray j_tau) { + + jdouble * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jdouble * tau = (*env)->GetPrimitiveArrayCritical(env, j_tau, JNI_FALSE); + + jint retval = LAPACKE_dorgqr(matrix_order, m, n, k, a, lda, tau); + + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_tau, tau, 0); + return retval; +} + + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_cungqr +(JNIEnv * env, jobject calling_obj, jint matrix_order, jint m, jint n, jint k, jfloatArray j_a, jint lda, jfloatArray j_tau) { + + jfloat * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jfloat * tau = (*env)->GetPrimitiveArrayCritical(env, j_tau, JNI_FALSE); + + jint retval = LAPACKE_cungqr(matrix_order, m, n, k, (lapack_complex_float *)a, lda, (lapack_complex_float *)tau); + + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_tau, tau, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_LAPACK_zungqr +(JNIEnv * env, jobject calling_obj, jint matrix_order, jint m, jint n, jint k, jdoubleArray j_a, jint lda, jdoubleArray j_tau) { + + jdouble * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jdouble * tau = (*env)->GetPrimitiveArrayCritical(env, j_tau, JNI_FALSE); + + jint retval = LAPACKE_zungqr(matrix_order, m, n, k, (lapack_complex_double *)a, lda, (lapack_complex_double *)tau); + + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_tau, tau, 0); + return retval; +} diff --git a/jni/src/BIDMat_SPBLAS.c b/jni/src/BIDMat_SPBLAS.c new file mode 100755 index 00000000..597f6665 --- /dev/null +++ b/jni/src/BIDMat_SPBLAS.c @@ -0,0 +1,232 @@ + +#include +#include +#include + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_SPBLAS_scsrmm +(JNIEnv * env, jobject calling_obj, jstring j_transa, jint m, jint n, jint k, jfloat alpha, jstring j_matdescra, + jfloatArray j_vals, jintArray j_ir, jintArray j_jc, jfloatArray j_b, jint ldb, jfloat beta, jfloatArray j_c, jint ldc){ + char * transa = (char *)(*env)->GetStringUTFChars(env, j_transa, 0); + char * matdescra = (char *)(*env)->GetStringUTFChars(env, j_matdescra, 0); + jfloat * vals = (*env)->GetPrimitiveArrayCritical(env, j_vals, 0); + jint * ir = (*env)->GetPrimitiveArrayCritical(env, j_ir, 0); + jint * jc = (*env)->GetPrimitiveArrayCritical(env, j_jc, 0); + jfloat * b = (*env)->GetPrimitiveArrayCritical(env, j_b, 0); + jfloat * c = (*env)->GetPrimitiveArrayCritical(env, j_c, 0); + jint returnValue = 0; + + if (transa != NULL && matdescra != NULL && vals != NULL && ir != NULL && jc != NULL && b != NULL && c != NULL) { + mkl_scsrmm(transa, &m, &n, &k, &alpha, matdescra, vals, ir, jc, jc+1, b, &ldb, &beta, c, &ldc); + } else { + returnValue = 1; + } + + (*env)->ReleasePrimitiveArrayCritical(env, j_c, c, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_b, b, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_jc, jc, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_ir, ir, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_vals, vals, 0); + (*env)->ReleaseStringUTFChars(env, j_matdescra, matdescra); + (*env)->ReleaseStringUTFChars(env, j_transa, transa); + return returnValue; +}; + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_SPBLAS_scscmm +(JNIEnv * env, jobject calling_obj, jstring j_transa, jint m, jint n, jint k, jfloat alpha, jstring j_matdescra, + jfloatArray j_vals, jintArray j_ir, jintArray j_jc, jfloatArray j_b, jint ldb, jfloat beta, jfloatArray j_c, jint ldc){ + char * transa = (char *)(*env)->GetStringUTFChars(env, j_transa, 0); + char * matdescra = (char *)(*env)->GetStringUTFChars(env, j_matdescra, 0); + jfloat * vals = (*env)->GetPrimitiveArrayCritical(env, j_vals, 0); + jint * ir = (*env)->GetPrimitiveArrayCritical(env, j_ir, 0); + jint * jc = (*env)->GetPrimitiveArrayCritical(env, j_jc, 0); + jfloat * b = (*env)->GetPrimitiveArrayCritical(env, j_b, 0); + jfloat * c = (*env)->GetPrimitiveArrayCritical(env, j_c, 0); + jint returnValue = 0; + + if (transa != NULL && matdescra != NULL && vals != NULL && ir != NULL && jc != NULL && b != NULL && c != NULL) { + mkl_scscmm(transa, &m, &n, &k, &alpha, matdescra, vals, ir, jc, jc+1, b, &ldb, &beta, c, &ldc); + } else { + returnValue = 1; + } + + (*env)->ReleasePrimitiveArrayCritical(env, j_c, c, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_b, b, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_jc, jc, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_ir, ir, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_vals, vals, 0); + (*env)->ReleaseStringUTFChars(env, j_matdescra, matdescra); + (*env)->ReleaseStringUTFChars(env, j_transa, transa); + return returnValue; +}; + + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_SPBLAS_scscmv +(JNIEnv * env, jobject calling_obj, jstring j_transa, jint m, jint k, jfloat alpha, jstring j_matdescra, + jfloatArray j_vals, jintArray j_ir, jintArray j_jc, jfloatArray j_x, jfloat beta, jfloatArray j_y){ + char * transa = (char *)(*env)->GetStringUTFChars(env, j_transa, 0); + char * matdescra = (char *)(*env)->GetStringUTFChars(env, j_matdescra, 0); + jfloat * vals = (*env)->GetPrimitiveArrayCritical(env, j_vals, 0); + jint * ir = (*env)->GetPrimitiveArrayCritical(env, j_ir, 0); + jint * jc = (*env)->GetPrimitiveArrayCritical(env, j_jc, 0); + jfloat * x = (*env)->GetPrimitiveArrayCritical(env, j_x, 0); + jfloat * y = (*env)->GetPrimitiveArrayCritical(env, j_y, 0); + jint returnValue; + + if (transa != NULL && matdescra != NULL && vals != NULL && ir != NULL && jc != NULL && x != NULL && y != NULL) { + MKL_SCSCMV(transa, &m, &k, &alpha, matdescra, vals, ir, jc, jc+1, x, &beta, y); + } else { + returnValue = 1; + } + + (*env)->ReleasePrimitiveArrayCritical(env, j_y, y, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_x, x, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_jc, jc, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_ir, ir, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_vals, vals, 0); + (*env)->ReleaseStringUTFChars(env, j_matdescra, matdescra); + (*env)->ReleaseStringUTFChars(env, j_transa, transa); + return returnValue; +}; + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_SPBLAS_scsrmv +(JNIEnv * env, jobject calling_obj, jstring j_transa, jint m, jint k, jfloat alpha, jstring j_matdescra, + jfloatArray j_vals, jintArray j_ir, jintArray j_jc, jfloatArray j_x, jfloat beta, jfloatArray j_y){ + char * transa = (char *)(*env)->GetStringUTFChars(env, j_transa, 0); + char * matdescra = (char *)(*env)->GetStringUTFChars(env, j_matdescra, 0); + jfloat * vals = (*env)->GetPrimitiveArrayCritical(env, j_vals, 0); + jint * ir = (*env)->GetPrimitiveArrayCritical(env, j_ir, 0); + jint * jc = (*env)->GetPrimitiveArrayCritical(env, j_jc, 0); + jfloat * x = (*env)->GetPrimitiveArrayCritical(env, j_x, 0); + jfloat * y = (*env)->GetPrimitiveArrayCritical(env, j_y, 0); + jint returnValue; + + if (transa != NULL && matdescra != NULL && vals != NULL && ir != NULL && jc != NULL && x != NULL && y != NULL) { + MKL_SCSRMV(transa, &m, &k, &alpha, matdescra, vals, ir, jc, jc+1, x, &beta, y); + } else { + returnValue = 1; + } + + (*env)->ReleasePrimitiveArrayCritical(env, j_y, y, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_x, x, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_jc, jc, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_ir, ir, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_vals, vals, 0); + (*env)->ReleaseStringUTFChars(env, j_matdescra, matdescra); + (*env)->ReleaseStringUTFChars(env, j_transa, transa); + return returnValue; +}; + + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_SPBLAS_dcsrmm +(JNIEnv * env, jobject calling_obj, jstring j_transa, jint m, jint n, jint k, jdouble alpha, jstring j_matdescra, + jdoubleArray j_vals, jintArray j_ir, jintArray j_jc, jdoubleArray j_b, jint ldb, jdouble beta, jdoubleArray j_c, jint ldc){ + char * transa = (char *)(*env)->GetStringUTFChars(env, j_transa, 0); + char * matdescra = (char *)(*env)->GetStringUTFChars(env, j_matdescra, 0); + jdouble * vals = (*env)->GetPrimitiveArrayCritical(env, j_vals, 0); + jint * ir = (*env)->GetPrimitiveArrayCritical(env, j_ir, 0); + jint * jc = (*env)->GetPrimitiveArrayCritical(env, j_jc, 0); + jdouble * b = (*env)->GetPrimitiveArrayCritical(env, j_b, 0); + jdouble * c = (*env)->GetPrimitiveArrayCritical(env, j_c, 0); + jint returnValue = 0; + + if (transa != NULL && matdescra != NULL && vals != NULL && ir != NULL && jc != NULL && b != NULL && c != NULL) { + mkl_dcsrmm(transa, &m, &n, &k, &alpha, matdescra, vals, ir, jc, jc+1, b, &ldb, &beta, c, &ldc); + } else { + returnValue = 1; + } + + (*env)->ReleasePrimitiveArrayCritical(env, j_c, c, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_b, b, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_jc, jc, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_ir, ir, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_vals, vals, 0); + (*env)->ReleaseStringUTFChars(env, j_matdescra, matdescra); + (*env)->ReleaseStringUTFChars(env, j_transa, transa); + return returnValue; +}; + + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_SPBLAS_dcscmm +(JNIEnv * env, jobject calling_obj, jstring j_transa, jint m, jint n, jint k, jdouble alpha, jstring j_matdescra, + jdoubleArray j_vals, jintArray j_ir, jintArray j_jc, jdoubleArray j_b, jint ldb, jdouble beta, jdoubleArray j_c, jint ldc){ + char * transa = (char *)(*env)->GetStringUTFChars(env, j_transa, 0); + char * matdescra = (char *)(*env)->GetStringUTFChars(env, j_matdescra, 0); + jdouble * vals = (*env)->GetPrimitiveArrayCritical(env, j_vals, 0); + jint * ir = (*env)->GetPrimitiveArrayCritical(env, j_ir, 0); + jint * jc = (*env)->GetPrimitiveArrayCritical(env, j_jc, 0); + jdouble * b = (*env)->GetPrimitiveArrayCritical(env, j_b, 0); + jdouble * c = (*env)->GetPrimitiveArrayCritical(env, j_c, 0); + jint returnValue = 0; + + if (transa != NULL && matdescra != NULL && vals != NULL && ir != NULL && jc != NULL && b != NULL && c != NULL) { + mkl_dcscmm(transa, &m, &n, &k, &alpha, matdescra, vals, ir, jc, jc+1, b, &ldb, &beta, c, &ldc); + } else { + returnValue = 1; + } + + (*env)->ReleasePrimitiveArrayCritical(env, j_c, c, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_b, b, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_jc, jc, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_ir, ir, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_vals, vals, 0); + (*env)->ReleaseStringUTFChars(env, j_matdescra, matdescra); + (*env)->ReleaseStringUTFChars(env, j_transa, transa); + return returnValue; +}; + + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_SPBLAS_dcscmv +(JNIEnv * env, jobject calling_obj, jstring j_transa, jint m, jint k, jdouble alpha, jstring j_matdescra, + jdoubleArray j_vals, jintArray j_ir, jintArray j_jc, jdoubleArray j_x, jdouble beta, jdoubleArray j_y){ + char * transa = (char *)(*env)->GetStringUTFChars(env, j_transa, 0); + char * matdescra = (char *)(*env)->GetStringUTFChars(env, j_matdescra, 0); + jdouble * vals = (*env)->GetPrimitiveArrayCritical(env, j_vals, 0); + jint * ir = (*env)->GetPrimitiveArrayCritical(env, j_ir, 0); + jint * jc = (*env)->GetPrimitiveArrayCritical(env, j_jc, 0); + jdouble * x = (*env)->GetPrimitiveArrayCritical(env, j_x, 0); + jdouble * y = (*env)->GetPrimitiveArrayCritical(env, j_y, 0); + jint returnValue = 0; + + if (transa != NULL && matdescra != NULL && vals != NULL && ir != NULL && jc != NULL && x != NULL && y != NULL) { + MKL_DCSCMV(transa, &m, &k, &alpha, matdescra, vals, ir, jc, jc+1, x, &beta, y); + } else { + returnValue = 1; + } + + (*env)->ReleasePrimitiveArrayCritical(env, j_y, y, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_x, x, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_jc, jc, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_ir, ir, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_vals, vals, 0); + (*env)->ReleaseStringUTFChars(env, j_matdescra, matdescra); + (*env)->ReleaseStringUTFChars(env, j_transa, transa); + return returnValue; +}; + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_SPBLAS_dcsrmv +(JNIEnv * env, jobject calling_obj, jstring j_transa, jint m, jint k, jdouble alpha, jstring j_matdescra, + jdoubleArray j_vals, jintArray j_ir, jintArray j_jc, jdoubleArray j_x, jdouble beta, jdoubleArray j_y){ + char * transa = (char *)(*env)->GetStringUTFChars(env, j_transa, 0); + char * matdescra = (char *)(*env)->GetStringUTFChars(env, j_matdescra, 0); + jdouble * vals = (*env)->GetPrimitiveArrayCritical(env, j_vals, 0); + jint * ir = (*env)->GetPrimitiveArrayCritical(env, j_ir, 0); + jint * jc = (*env)->GetPrimitiveArrayCritical(env, j_jc, 0); + jdouble * x = (*env)->GetPrimitiveArrayCritical(env, j_x, 0); + jdouble * y = (*env)->GetPrimitiveArrayCritical(env, j_y, 0); + jint returnValue = 0; + + if (transa != NULL && matdescra != NULL && vals != NULL && ir != NULL && jc != NULL && x != NULL && y != NULL) { + MKL_DCSRMV(transa, &m, &k, &alpha, matdescra, vals, ir, jc, jc+1, x, &beta, y); + } else { + returnValue = 1; + } + + (*env)->ReleasePrimitiveArrayCritical(env, j_y, y, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_x, x, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_jc, jc, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_ir, ir, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_vals, vals, 0); + (*env)->ReleaseStringUTFChars(env, j_matdescra, matdescra); + (*env)->ReleaseStringUTFChars(env, j_transa, transa); + return returnValue; +} diff --git a/jni/src/BIDMat_UTILS.c b/jni/src/BIDMat_UTILS.c new file mode 100755 index 00000000..88ccfc73 --- /dev/null +++ b/jni/src/BIDMat_UTILS.c @@ -0,0 +1,73 @@ +#include +#include +#include +#include + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_UTILS_memcpybi +(JNIEnv * env, jobject calling_obj, jint N, jbyteArray jA, jint startA, jintArray jB, jint startB){ + jbyte * A = (*env)->GetPrimitiveArrayCritical(env, jA, JNI_FALSE); + jint * B = (*env)->GetPrimitiveArrayCritical(env, jB, JNI_FALSE); + + memcpy(((char *)B)+startB, A+startA, N); + + (*env)->ReleasePrimitiveArrayCritical(env, jA, A, 0); + (*env)->ReleasePrimitiveArrayCritical(env, jB, B, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_UTILS_memcpybf +(JNIEnv * env, jobject calling_obj, jint N, jbyteArray jA, jint startA, jfloatArray jB, jint startB){ + jbyte * A = (*env)->GetPrimitiveArrayCritical(env, jA, JNI_FALSE); + jfloat * B = (*env)->GetPrimitiveArrayCritical(env, jB, JNI_FALSE); + + memcpy(((char *)B)+startB, A+startA, N); + + (*env)->ReleasePrimitiveArrayCritical(env, jA, A, 0); + (*env)->ReleasePrimitiveArrayCritical(env, jB, B, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_UTILS_memcpybd +(JNIEnv * env, jobject calling_obj, jint N, jbyteArray jA, jint startA, jdoubleArray jB, jint startB){ + jbyte * A = (*env)->GetPrimitiveArrayCritical(env, jA, JNI_FALSE); + jdouble * B = (*env)->GetPrimitiveArrayCritical(env, jB, JNI_FALSE); + + memcpy(((char *)B)+startB, A+startA, N); + + (*env)->ReleasePrimitiveArrayCritical(env, jA, A, 0); + (*env)->ReleasePrimitiveArrayCritical(env, jB, B, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_UTILS_memcpyib +(JNIEnv * env, jobject calling_obj, jint N, jintArray jA, jint startA, jbyteArray jB, jint startB){ + jint * A = (*env)->GetPrimitiveArrayCritical(env, jA, JNI_FALSE); + jbyte * B = (*env)->GetPrimitiveArrayCritical(env, jB, JNI_FALSE); + + memcpy(B+startB, ((char *)A)+startA, N); + + (*env)->ReleasePrimitiveArrayCritical(env, jA, A, 0); + (*env)->ReleasePrimitiveArrayCritical(env, jB, B, 0); +} + + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_UTILS_memcpyfb +(JNIEnv * env, jobject calling_obj, jint N, jfloatArray jA, jint startA, jbyteArray jB, jint startB){ + jfloat * A = (*env)->GetPrimitiveArrayCritical(env, jA, JNI_FALSE); + jbyte * B = (*env)->GetPrimitiveArrayCritical(env, jB, JNI_FALSE); + + memcpy(B+startB, ((char *)A)+startA, N); + + (*env)->ReleasePrimitiveArrayCritical(env, jA, A, 0); + (*env)->ReleasePrimitiveArrayCritical(env, jB, B, 0); +} + + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_UTILS_memcpydb +(JNIEnv * env, jobject calling_obj, jint N, jdoubleArray jA, jint startA, jbyteArray jB, jint startB){ + jdouble * A = (*env)->GetPrimitiveArrayCritical(env, jA, JNI_FALSE); + jbyte * B = (*env)->GetPrimitiveArrayCritical(env, jB, JNI_FALSE); + + memcpy(B+startB, ((char *)A)+startA, N); + + (*env)->ReleasePrimitiveArrayCritical(env, jA, A, 0); + (*env)->ReleasePrimitiveArrayCritical(env, jB, B, 0); +} + diff --git a/jni/src/BIDMat_VML.c b/jni/src/BIDMat_VML.c new file mode 100755 index 00000000..c4b2a614 --- /dev/null +++ b/jni/src/BIDMat_VML.c @@ -0,0 +1,2006 @@ + +#include +#include +#include + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsCdfNormInv (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jlong arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmsCdfNormInv(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdCdfNormInv (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jlong arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmdCdfNormInv(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsLinearFrac (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jfloat arg4, jfloat arg5, jfloat arg6, jfloat arg7, jfloatArray arg8, jlong arg9){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jfloat * jni_arg8 = (*env)->GetPrimitiveArrayCritical(env, arg8, JNI_FALSE); + + vmsLinearFrac(n, jni_arg2, jni_arg3, arg4, arg5, arg6, arg7, jni_arg8, arg9); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg8, jni_arg8, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdLinearFrac (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jdouble arg4, jdouble arg5, jdouble arg6, jdouble arg7, jdoubleArray arg8, jlong arg9){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jdouble * jni_arg8 = (*env)->GetPrimitiveArrayCritical(env, arg8, JNI_FALSE); + + vmdLinearFrac(n, jni_arg2, jni_arg3, arg4, arg5, arg6, arg7, jni_arg8, arg9); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg8, jni_arg8, 0); +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VML_vmlSetErrStatus (JNIEnv * env, jobject calling_obj, jint n){ + jint returnValue; + + returnValue = vmlSetErrStatus(n); + + + return returnValue; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VML_vmlGetErrStatus (JNIEnv * env, jobject calling_obj){ + jint returnValue; + + returnValue = vmlGetErrStatus(); + + + return returnValue; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VML_vmlClearErrStatus (JNIEnv * env, jobject calling_obj){ + jint returnValue; + + returnValue = vmlClearErrStatus(); + + + return returnValue; +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsAbs (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsAbs(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdAbs (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdAbs(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsAdd (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jfloatArray arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jfloat * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vsAdd(n, jni_arg2, jni_arg3, jni_arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdAdd (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jdoubleArray arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jdouble * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vdAdd(n, jni_arg2, jni_arg3, jni_arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsSub (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jfloatArray arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jfloat * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vsSub(n, jni_arg2, jni_arg3, jni_arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdSub (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jdoubleArray arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jdouble * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vdSub(n, jni_arg2, jni_arg3, jni_arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsInv (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsInv(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdInv (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdInv(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsSqrt (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsSqrt(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdSqrt (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdSqrt(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsSqrt (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jlong arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmsSqrt(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdSqrt (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jlong arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmdSqrt(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsInvSqrt (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsInvSqrt(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdInvSqrt (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdInvSqrt(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsInvSqrt (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jlong arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmsInvSqrt(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdInvSqrt (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jlong arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmdInvSqrt(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsCbrt (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsCbrt(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdCbrt (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdCbrt(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsCbrt (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jlong arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmsCbrt(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdCbrt (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jlong arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmdCbrt(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsInvCbrt (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsInvCbrt(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdInvCbrt (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdInvCbrt(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsInvCbrt (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jlong arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmsInvCbrt(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdInvCbrt (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jlong arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmdInvCbrt(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsSqr (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsSqr(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdSqr (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdSqr(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsExp (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsExp(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdExp (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdExp(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsExp (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jlong arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmsExp(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdExp (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jlong arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmdExp(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsExpm1 (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsExpm1(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdExpm1 (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdExpm1(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsExpm1 (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jlong arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmsExpm1(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdExpm1 (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jlong arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmdExpm1(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsLn (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsLn(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdLn (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdLn(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsLn (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jlong arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmsLn(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdLn (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jlong arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmdLn(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsLog10 (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsLog10(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdLog10 (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdLog10(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsLog10 (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jlong arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmsLog10(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdLog10 (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jlong arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmdLog10(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsLog1p (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsLog1p(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdLog1p (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdLog1p(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsLog1p (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jlong arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmsLog1p(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdLog1p (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jlong arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmdLog1p(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsCos (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsCos(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdCos (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdCos(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsCos (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jlong arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmsCos(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdCos (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jlong arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmdCos(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsSin (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsSin(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdSin (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdSin(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsSin (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jlong arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmsSin(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdSin (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jlong arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmdSin(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsTan (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsTan(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdTan (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdTan(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsTan (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jlong arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmsTan(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdTan (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jlong arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmdTan(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsCosh (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsCosh(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdCosh (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdCosh(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsCosh (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jlong arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmsCosh(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdCosh (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jlong arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmdCosh(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsSinh (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsSinh(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdSinh (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdSinh(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsSinh (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jlong arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmsSinh(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdSinh (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jlong arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmdSinh(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsTanh (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsTanh(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdTanh (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdTanh(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsTanh (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jlong arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmsTanh(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdTanh (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jlong arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmdTanh(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsAcos (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsAcos(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdAcos (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdAcos(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsAcos (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jlong arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmsAcos(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdAcos (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jlong arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmdAcos(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsAsin (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsAsin(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdAsin (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdAsin(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsAsin (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jlong arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmsAsin(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdAsin (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jlong arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmdAsin(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsAtan (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsAtan(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdAtan (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdAtan(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsAtan (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jlong arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmsAtan(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdAtan (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jlong arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmdAtan(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsAcosh (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsAcosh(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdAcosh (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdAcosh(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsAcosh (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jlong arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmsAcosh(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdAcosh (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jlong arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmdAcosh(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsAsinh (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsAsinh(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdAsinh (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdAsinh(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsAsinh (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jlong arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmsAsinh(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdAsinh (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jlong arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmdAsinh(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsAtanh (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsAtanh(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdAtanh (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdAtanh(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsAtanh (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jlong arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmsAtanh(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdAtanh (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jlong arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmdAtanh(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsErf (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsErf(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdErf (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdErf(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsErf (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jlong arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmsErf(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdErf (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jlong arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmdErf(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsErfInv (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsErfInv(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdErfInv (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdErfInv(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsErfInv (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jlong arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmsErfInv(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdErfInv (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jlong arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmdErfInv(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsHypot (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jfloatArray arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jfloat * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vsHypot(n, jni_arg2, jni_arg3, jni_arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdHypot (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jdoubleArray arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jdouble * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vdHypot(n, jni_arg2, jni_arg3, jni_arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsHypot (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jfloatArray arg4, jlong arg5){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jfloat * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vmsHypot(n, jni_arg2, jni_arg3, jni_arg4, arg5); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdHypot (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jdoubleArray arg4, jlong arg5){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jdouble * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vmdHypot(n, jni_arg2, jni_arg3, jni_arg4, arg5); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsErfc (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsErfc(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdErfc (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdErfc(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsErfc (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jlong arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmsErfc(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdErfc (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jlong arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmdErfc(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsErfcInv (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsErfcInv(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdErfcInv (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdErfcInv(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsErfcInv (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jlong arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmsErfcInv(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdErfcInv (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jlong arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmdErfcInv(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsCdfNorm (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsCdfNorm(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdCdfNorm (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdCdfNorm(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsCdfNorm (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jlong arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmsCdfNorm(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdCdfNorm (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jlong arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmdCdfNorm(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsCdfNormInv (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsCdfNormInv(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdCdfNormInv (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdCdfNormInv(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsLGamma (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsLGamma(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdLGamma (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdLGamma(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsLGamma (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jlong arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmsLGamma(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdLGamma (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jlong arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmdLGamma(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsTGamma (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsTGamma(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdTGamma (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdTGamma(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsTGamma (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jlong arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmsTGamma(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdTGamma (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jlong arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmdTGamma(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsAtan2 (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jfloatArray arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jfloat * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vsAtan2(n, jni_arg2, jni_arg3, jni_arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdAtan2 (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jdoubleArray arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jdouble * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vdAtan2(n, jni_arg2, jni_arg3, jni_arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsAtan2 (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jfloatArray arg4, jlong arg5){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jfloat * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vmsAtan2(n, jni_arg2, jni_arg3, jni_arg4, arg5); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdAtan2 (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jdoubleArray arg4, jlong arg5){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jdouble * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vmdAtan2(n, jni_arg2, jni_arg3, jni_arg4, arg5); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsMul (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jfloatArray arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jfloat * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vsMul(n, jni_arg2, jni_arg3, jni_arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdMul (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jdoubleArray arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jdouble * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vdMul(n, jni_arg2, jni_arg3, jni_arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsDiv (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jfloatArray arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jfloat * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vsDiv(n, jni_arg2, jni_arg3, jni_arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdDiv (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jdoubleArray arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jdouble * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vdDiv(n, jni_arg2, jni_arg3, jni_arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsPow (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jfloatArray arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jfloat * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vsPow(n, jni_arg2, jni_arg3, jni_arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdPow (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jdoubleArray arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jdouble * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vdPow(n, jni_arg2, jni_arg3, jni_arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsPow (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jfloatArray arg4, jlong arg5){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jfloat * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vmsPow(n, jni_arg2, jni_arg3, jni_arg4, arg5); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdPow (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jdoubleArray arg4, jlong arg5){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jdouble * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vmdPow(n, jni_arg2, jni_arg3, jni_arg4, arg5); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsPow3o2 (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsPow3o2(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdPow3o2 (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdPow3o2(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsPow3o2 (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jlong arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmsPow3o2(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdPow3o2 (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jlong arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmdPow3o2(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsPow2o3 (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsPow2o3(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdPow2o3 (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdPow2o3(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsPow2o3 (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jlong arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmsPow2o3(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdPow2o3 (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jlong arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vmdPow2o3(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsPowx (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloat arg3, jfloatArray arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vsPowx(n, jni_arg2, arg3, jni_arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdPowx (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdouble arg3, jdoubleArray arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vdPowx(n, jni_arg2, arg3, jni_arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsPowx (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloat arg3, jfloatArray arg4, jlong arg5){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vmsPowx(n, jni_arg2, arg3, jni_arg4, arg5); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdPowx (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdouble arg3, jdoubleArray arg4, jlong arg5){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vmdPowx(n, jni_arg2, arg3, jni_arg4, arg5); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsSinCos (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jfloatArray arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jfloat * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vsSinCos(n, jni_arg2, jni_arg3, jni_arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdSinCos (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jdoubleArray arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jdouble * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vdSinCos(n, jni_arg2, jni_arg3, jni_arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmsSinCos (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jfloatArray arg4, jlong arg5){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jfloat * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vmsSinCos(n, jni_arg2, jni_arg3, jni_arg4, arg5); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vmdSinCos (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jdoubleArray arg4, jlong arg5){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jdouble * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vmdSinCos(n, jni_arg2, jni_arg3, jni_arg4, arg5); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsLinearFrac (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jfloat arg4, jfloat arg5, jfloat arg6, jfloat arg7, jfloatArray arg8){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jfloat * jni_arg8 = (*env)->GetPrimitiveArrayCritical(env, arg8, JNI_FALSE); + + vsLinearFrac(n, jni_arg2, jni_arg3, arg4, arg5, arg6, arg7, jni_arg8); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg8, jni_arg8, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdLinearFrac (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jdouble arg4, jdouble arg5, jdouble arg6, jdouble arg7, jdoubleArray arg8){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jdouble * jni_arg8 = (*env)->GetPrimitiveArrayCritical(env, arg8, JNI_FALSE); + + vdLinearFrac(n, jni_arg2, jni_arg3, arg4, arg5, arg6, arg7, jni_arg8); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg8, jni_arg8, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsCeil (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsCeil(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdCeil (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdCeil(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsFloor (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsFloor(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdFloor (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdFloor(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsModf (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jfloatArray arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jfloat * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vsModf(n, jni_arg2, jni_arg3, jni_arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdModf (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jdoubleArray arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jdouble * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vdModf(n, jni_arg2, jni_arg3, jni_arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsNearbyInt (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsNearbyInt(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdNearbyInt (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdNearbyInt(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsRint (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsRint(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdRint (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdRint(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsRound (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsRound(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdRound (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdRound(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsTrunc (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsTrunc(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdTrunc (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdTrunc(n, jni_arg2, jni_arg3); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsPackI (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jint arg3, jfloatArray arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vsPackI(n, jni_arg2, arg3, jni_arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdPackI (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jint arg3, jdoubleArray arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vdPackI(n, jni_arg2, arg3, jni_arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsPackV (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jintArray arg3, jfloatArray arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jint * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jfloat * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vsPackV(n, jni_arg2, jni_arg3, jni_arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdPackV (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jintArray arg3, jdoubleArray arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jint * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jdouble * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vdPackV(n, jni_arg2, jni_arg3, jni_arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsPackM (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jintArray arg3, jfloatArray arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jint * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jfloat * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vsPackM(n, jni_arg2, jni_arg3, jni_arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdPackM (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jintArray arg3, jdoubleArray arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jint * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jdouble * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vdPackM(n, jni_arg2, jni_arg3, jni_arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsUnpackI (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jint arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vsUnpackI(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdUnpackI (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jint arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + + vdUnpackI(n, jni_arg2, jni_arg3, arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsUnpackV (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jintArray arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jint * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vsUnpackV(n, jni_arg2, jni_arg3, jni_arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdUnpackV (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jintArray arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jint * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vdUnpackV(n, jni_arg2, jni_arg3, jni_arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vsUnpackM (JNIEnv * env, jobject calling_obj, jint n, jfloatArray arg2, jfloatArray arg3, jintArray arg4){ + jfloat * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jfloat * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jint * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vsUnpackM(n, jni_arg2, jni_arg3, jni_arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT void JNICALL Java_edu_berkeley_bid_VML_vdUnpackM (JNIEnv * env, jobject calling_obj, jint n, jdoubleArray arg2, jdoubleArray arg3, jintArray arg4){ + jdouble * jni_arg2 = (*env)->GetPrimitiveArrayCritical(env, arg2, JNI_FALSE); + jdouble * jni_arg3 = (*env)->GetPrimitiveArrayCritical(env, arg3, JNI_FALSE); + jint * jni_arg4 = (*env)->GetPrimitiveArrayCritical(env, arg4, JNI_FALSE); + + vdUnpackM(n, jni_arg2, jni_arg3, jni_arg4); + + (*env)->ReleasePrimitiveArrayCritical(env, arg2, jni_arg2, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg3, jni_arg3, 0); + (*env)->ReleasePrimitiveArrayCritical(env, arg4, jni_arg4, 0); +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VML_vmlSetMode (JNIEnv * env, jobject calling_obj, jint n){ + jint returnValue; + + returnValue = vmlSetMode(n); + + + return returnValue; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VML_vmlGetMode (JNIEnv * env, jobject calling_obj){ + jint returnValue; + + returnValue = vmlGetMode(); + + + return returnValue; +} + diff --git a/jni/src/BIDMat_VSL.c b/jni/src/BIDMat_VSL.c new file mode 100755 index 00000000..ef1f022c --- /dev/null +++ b/jni/src/BIDMat_VSL.c @@ -0,0 +1,469 @@ + +#include "mkl_vsl.h" + +#include + +#include +#include + +union VoidLong { + jlong l; + void* p; +}; + +static jlong void2long(void* ptr) { + union VoidLong v; + v.l = (jlong) 0; + v.p = ptr; + return v.l; +} + +static void* long2void(jlong l) { + union VoidLong v; + v.l = l; + return v.p; +} + +static VSLStreamStatePtr getStream(JNIEnv *env, jclass clazz, jobject jstream) +{ + jfieldID handle_id = (*env)->GetFieldID(env, clazz, "handle", "J"); + jlong handle = (*env)->GetLongField(env, jstream, handle_id); + VSLStreamStatePtr streamp = long2void(handle); + return streamp; +} + +static void setStream(JNIEnv *env, jclass clazz, jobject jstream, VSLStreamStatePtr streamp) +{ + jfieldID handle_id = (*env)->GetFieldID(env, clazz, "handle", "J"); + jlong handle = void2long(streamp); + (*env)->SetLongField(env, jstream, handle_id, handle); +} + + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_vslNewStream + (JNIEnv *env, jclass clazz, jobject jstream, jint brng, jint seed) +{ + VSLStreamStatePtr streamp; + int status = vslNewStream(&streamp, brng, seed); + setStream(env, clazz, jstream, streamp); + + return (jint)status; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_vslDeleteStream + (JNIEnv *env, jclass clazz, jobject jstream) +{ + VSLStreamStatePtr streamp = getStream(env, clazz, jstream); + int status = vslDeleteStream(&streamp); + setStream(env, clazz, jstream, streamp); + + return (jint)status; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_vdRngCauchy +(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jdoubleArray j_r, jdouble a, jdouble b) { + + VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); + jdouble * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); + + jint retval = vdRngCauchy(method, stream, n, r, a, b); + + (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_vdRngUniform +(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jdoubleArray j_r, jdouble a, jdouble b) { + + VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); + jdouble * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); + + jint retval = vdRngUniform(method, stream, n, r, a, b); + + (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_vdRngGaussian +(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jdoubleArray j_r, jdouble a, jdouble b) { + + VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); + jdouble * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); + + jint retval = vdRngGaussian(method, stream, n, r, a, b); + + (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_vdRngGaussianMV +(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jdoubleArray j_r, jint d, jint m, jdoubleArray j_a, jdoubleArray j_b) { + + VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); + jdouble * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); + jdouble * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jdouble * b = (*env)->GetPrimitiveArrayCritical(env, j_b, JNI_FALSE); + + jint retval = vdRngGaussianMV(method, stream, n, r, d, m, a, b); + + (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_b, b, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_vdRngExponential +(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jdoubleArray j_r, jdouble a, jdouble b) { + + VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); + jdouble * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); + + jint retval = vdRngExponential(method, stream, n, r, a, b); + + (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_vdRngLaplace +(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jdoubleArray j_r, jdouble a, jdouble b) { + + VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); + jdouble * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); + + jint retval = vdRngLaplace(method, stream, n, r, a, b); + + (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_vdRngWeibull +(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jdoubleArray j_r, jdouble a, jdouble b, jdouble c) { + + VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); + jdouble * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); + + jint retval = vdRngWeibull(method, stream, n, r, a, b, c); + + (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_vdRngRayleigh +(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jdoubleArray j_r, jdouble a, jdouble b) { + + VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); + jdouble * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); + + jint retval = vdRngRayleigh(method, stream, n, r, a, b); + + (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_vdRngLognormal +(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jdoubleArray j_r, jdouble a, jdouble b, jdouble c, jdouble d) { + + VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); + jdouble * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); + + jint retval = vdRngLognormal(method, stream, n, r, a, b, c, d); + + (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_vdRngGumbel +(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jdoubleArray j_r, jdouble a, jdouble b) { + + VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); + jdouble * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); + + jint retval = vdRngGumbel(method, stream, n, r, a, b); + + (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_vdRngGamma +(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jdoubleArray j_r, jdouble a, jdouble b, jdouble c) { + + VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); + jdouble * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); + + jint retval = vdRngGamma(method, stream, n, r, a, b, c); + + (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_vdRngBeta +(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jdoubleArray j_r, jdouble a, jdouble b, jdouble c, jdouble d) { + + VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); + jdouble * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); + + jint retval = vdRngBeta(method, stream, n, r, a, b, c, d); + + (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_viRngBernoulli +(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jintArray j_r, jdouble a) { + + VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); + jint * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); + + jint retval = viRngBernoulli(method, stream, n, r, a); + + (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_viRngUniform +(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jintArray j_r, jdouble a, jdouble b) { + + VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); + jint * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); + + jint retval = viRngUniform(method, stream, n, r, a, b); + + (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_viRngUniformBits +(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jintArray j_r) { + + VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); + jint * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); + + jint retval = viRngUniformBits(method, stream, n, (unsigned int *)r); + + (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_viRngGeometric +(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jintArray j_r, jdouble p) { + + VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); + jint * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); + + jint retval = viRngGeometric(method, stream, n, r, p); + + (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_viRngBinomial +(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jintArray j_r, jdouble m, jdouble p) { + + VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); + jint * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); + + jint retval = viRngBinomial(method, stream, n, r, m, p); + + (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_viRngHypergeometric +(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jintArray j_r, jint a, jint b, jint c) { + + VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); + jint * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); + + jint retval = viRngHypergeometric(method, stream, n, r, a, b, c); + + (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_viRngNegbinomial +(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jintArray j_r, jdouble a, jdouble b) { + + VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); + jint * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); + + jint retval = viRngNegbinomial(method, stream, n, r, a, b); + + (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_viRngPoisson +(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jintArray j_r, jdouble a) { + + VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); + jint * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); + + jint retval = viRngPoisson(method, stream, n, r, a); + + (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_viRngPoissonV +(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jintArray j_r, jdoubleArray j_a) { + + VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); + jint * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); + jdouble * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + + jint retval = viRngPoissonV(method, stream, n, r, a); + + (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + return retval; +} + + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_vsRngCauchy +(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jfloatArray j_r, jfloat a, jfloat b) { + + VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); + jfloat * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); + + jint retval = vsRngCauchy(method, stream, n, r, a, b); + + (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_vsRngUniform +(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jfloatArray j_r, jfloat a, jfloat b) { + + VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); + jfloat * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); + + jint retval = vsRngUniform(method, stream, n, r, a, b); + + (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_vsRngGaussian +(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jfloatArray j_r, jfloat a, jfloat b) { + + VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); + jfloat * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); + + jint retval = vsRngGaussian(method, stream, n, r, a, b); + + (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_vsRngGaussianMV +(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jfloatArray j_r, jint d, jint m, jfloatArray j_a, jfloatArray j_b) { + + VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); + jfloat * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); + jfloat * a = (*env)->GetPrimitiveArrayCritical(env, j_a, JNI_FALSE); + jfloat * b = (*env)->GetPrimitiveArrayCritical(env, j_b, JNI_FALSE); + + jint retval = vsRngGaussianMV(method, stream, n, r, d, m, a, b); + + (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_a, a, 0); + (*env)->ReleasePrimitiveArrayCritical(env, j_b, b, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_vsRngExponential +(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jfloatArray j_r, jfloat a, jfloat b) { + + VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); + jfloat * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); + + jint retval = vsRngExponential(method, stream, n, r, a, b); + + (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_vsRngLaplace +(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jfloatArray j_r, jfloat a, jfloat b) { + + VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); + jfloat * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); + + jint retval = vsRngLaplace(method, stream, n, r, a, b); + + (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_vsRngWeibull +(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jfloatArray j_r, jfloat a, jfloat b, jfloat c) { + + VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); + jfloat * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); + + jint retval = vsRngWeibull(method, stream, n, r, a, b, c); + + (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_vsRngRayleigh +(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jfloatArray j_r, jfloat a, jfloat b) { + + VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); + jfloat * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); + + jint retval = vsRngRayleigh(method, stream, n, r, a, b); + + (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_vsRngLognormal +(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jfloatArray j_r, jfloat a, jfloat b, jfloat c, jfloat d) { + + VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); + jfloat * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); + + jint retval = vsRngLognormal(method, stream, n, r, a, b, c, d); + + (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_vsRngGumbel +(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jfloatArray j_r, jfloat a, jfloat b) { + + VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); + jfloat * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); + + jint retval = vsRngGumbel(method, stream, n, r, a, b); + + (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_vsRngGamma +(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jfloatArray j_r, jfloat a, jfloat b, jfloat c) { + + VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); + jfloat * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); + + jint retval = vsRngGamma(method, stream, n, r, a, b, c); + + (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); + return retval; +} + +JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_vsRngBeta +(JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jfloatArray j_r, jfloat a, jfloat b, jfloat c, jfloat d) { + + VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); + jfloat * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); + + jint retval = vsRngBeta(method, stream, n, r, a, b, c, d); + + (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); + return retval; +} + diff --git a/jni/src/Copyright.txt b/jni/src/Copyright.txt new file mode 100755 index 00000000..21326596 --- /dev/null +++ b/jni/src/Copyright.txt @@ -0,0 +1,25 @@ +Copyright (c) 2012, Regents of the University of California +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/jni/src/Makefile b/jni/src/Makefile new file mode 100755 index 00000000..677710c1 --- /dev/null +++ b/jni/src/Makefile @@ -0,0 +1,36 @@ +include Makefile.incl + +MKL_OBJS=BIDMat_CBLAS.$(OBJ) BIDMat_UTILS.$(OBJ) BIDMat_SPBLAS.$(OBJ) BIDMat_LAPACK.$(OBJ) \ + BIDMat_VML.$(OBJ) BIDMat_VSL.$(OBJ) + +CUDA_OBJS=BIDMat_CUMAT.$(OBJ) MatKernel.$(OBJ) + +.SUFFIXES: .$(OBJ) .c .cpp .cu + +all: $(LIBPREPEND)bidmatmkl$(LIBAPPEND) $(LIBPREPEND)bidmatcuda$(LIBAPPEND) + +$(LIBPREPEND)bidmatmkl$(LIBAPPEND): $(MKL_OBJS) + $(LD) $(LDFLAGS) $(MKL_OBJS) $(MKL_LIBS) $(OUTFLG)$@ + +$(LIBPREPEND)bidmatcuda$(LIBAPPEND): $(CUDA_OBJS) + $(GLD) $(LDFLAGS) $(CUDA_OBJS) $(CUDA_LIBS) $(OUTFLG)$@ + +%.$(OBJ) : %.c + $(CC) $(CPPFLAGS) $(LAPACK_INCLUDES) $(CFLAGS) $*.c + +%.$(OBJ) : %.cpp + $(GCC) $(CPPFLAGS) $(LAPACK_INCLUDES) $(CFLAGS) $*.cpp + +%.$(OBJ) : %.cu + $(NVCC) $(NVCCFLAGS) $*.cu + +install: all + cp $(LIBPREPEND)bidmatmkl$(LIBAPPEND) ../../lib/$(SUBLIB) + cp $(LIBPREPEND)bidmatcuda$(LIBAPPEND) ../../lib/$(SUBLIB) + +clean: + rm -f *.$(OBJ) *$(LIBAPPEND) *.pdb *.exp *.lib + +distclean: clean + rm -f *$(LIBAPPEND) *.exp *.lib *.jnilib Makefile.incl + diff --git a/jni/src/MatKernel.cu b/jni/src/MatKernel.cu new file mode 100755 index 00000000..4d0f43d2 --- /dev/null +++ b/jni/src/MatKernel.cu @@ -0,0 +1,660 @@ +#include +#include + +__device__ float op_add(float a, float b) {return a+b;} +__device__ float op_sub(float a, float b) {return a-b;} +__device__ float op_mul(float a, float b) {return a*b;} +__device__ float op_div(float a, float b) {return a/b;} +__device__ float op_gt(float a, float b) {return (a > b) ? 1.0f : 0;} +__device__ float op_lt(float a, float b) {return (a < b) ? 1.0f : 0;} +__device__ float op_eq(float a, float b) {return (a == b) ? 1.0f : 0;} +__device__ float op_ge(float a, float b) {return (a >= b) ? 1.0f : 0;} +__device__ float op_le(float a, float b) {return (a <= b) ? 1.0f : 0;} +__device__ float op_ne(float a, float b) {return (a != b) ? 1.0f : 0;} +__device__ float op_max(float a, float b) {return max(a,b);} +__device__ float op_min(float a, float b) {return min(a,b);} + +__device__ int iop_add(int a, int b) {return a+b;} +__device__ int iop_sub(int a, int b) {return a-b;} +__device__ int iop_mul(int a, int b) {return a*b;} +__device__ int iop_div(int a, int b) {return a/b;} +__device__ int iop_gt(int a, int b) {return (a > b) ? 1 : 0;} +__device__ int iop_lt(int a, int b) {return (a < b) ? 1 : 0;} +__device__ int iop_eq(int a, int b) {return (a == b) ? 1 : 0;} +__device__ int iop_ge(int a, int b) {return (a >= b) ? 1 : 0;} +__device__ int iop_le(int a, int b) {return (a <= b) ? 1 : 0;} +__device__ int iop_ne(int a, int b) {return (a != b) ? 1 : 0;} + +typedef float (*optype)(float,float); +typedef int (*ioptype)(int,int); + +__device__ const optype operators[] = { + op_add, + op_sub, + op_mul, + op_div, + op_gt, + op_lt, + op_eq, + op_ge, + op_le, + op_ne, + op_max, + op_min}; + +__device__ const ioptype ioperators[] = { + iop_add, + iop_sub, + iop_mul, + iop_div, + iop_gt, + iop_lt, + iop_eq, + iop_ge, + iop_le, + iop_ne}; + +__device__ float fn_abs(float a) {return abs(a);} +__device__ float fn_exp(float a) {return expf(a);} +__device__ float fn_log(float a) {return logf(a);} +__device__ float fn_expm1(float a) {return expm1f(a);} +__device__ float fn_sqrt(float a) {return sqrtf(a);} +__device__ float fn_ln(float a) {return logf(a);} +__device__ float fn_log10(float a) {return log10f(a);} +__device__ float fn_log1p(float a) {return log1pf(a);} +__device__ float fn_cos(float a) {return cosf(a);} +__device__ float fn_sin(float a) {return sinf(a);} +__device__ float fn_tan(float a) {return tanf(a);} +__device__ float fn_cosh(float a) {return coshf(a);} +__device__ float fn_sinh(float a) {return sinhf(a);} +__device__ float fn_tanh(float a) {return tanhf(a);} +__device__ float fn_acos(float a) {return acosf(a);} +__device__ float fn_asin(float a) {return asinf(a);} +__device__ float fn_atan(float a) {return atanf(a);} +__device__ float fn_acosh(float a) {return acoshf(a);} +__device__ float fn_asinh(float a) {return asinhf(a);} +__device__ float fn_atanh(float a) {return atanhf(a);} +__device__ float fn_erf(float a) {return erff(a);} +__device__ float fn_erfinv(float a) {return erfinvf(a);} +__device__ float fn_erfc(float a) {return erfcf(a);} +__device__ float fn_erfcinv(float a) {return erfcinvf(a);} +__device__ float fn_gammaln(float a) {return lgammaf(a);} +__device__ float fn_gamma(float a) {return tgammaf(a);} +__device__ float fn_ceil(float a) {return ceilf(a);} +__device__ float fn_floor(float a) {return floorf(a);} +__device__ float fn_round(float a) {return roundf(a);} +__device__ float fn_trunc(float a) {return truncf(a);} +__device__ float fn_sign(float a) {return (a>0) ? 1.0f : ((a<0) ? -1.0f : 0);} +__device__ float fn_j0(float a) {return j0f(a);} +__device__ float fn_j1(float a) {return j1f(a);} +//__device__ float fn_jn(float a) {return jnf(a);} +__device__ float fn_y0(float a) {return y0f(a);} +__device__ float fn_y1(float a) {return y1f(a);} +//__device__ float fn_yn(float a) {return ynf(a);} +__device__ float fn_exppsi(float a) {return (a<1.0f) ? 0.5f*a*a : a-0.5f;} + +__device__ float fn_atan2(float a, float b) {return atan2f(a, b);} +__device__ float fn_pow(float a, float b) {return powf(a, b);} + +typedef float (*fntype)(float); + +__device__ const fntype fctns[35] = { + fn_abs, + fn_exp, + fn_expm1, + fn_sqrt, + fn_ln, + fn_log10, + fn_log1p, + fn_cos, + fn_sin, + fn_tan, + fn_cosh, + fn_sinh, + fn_tanh, + fn_acos, + fn_asin, + fn_atan, + fn_acosh, + fn_asinh, + fn_atanh, + fn_erf, + fn_erfinv, + fn_erfc, + fn_erfcinv, + fn_gammaln, + fn_gamma, + fn_ceil, + fn_floor, + fn_round, + fn_trunc, + fn_sign, + fn_j0, + fn_j1, + fn_y0, + fn_y1, + fn_exppsi}; + +__device__ const optype fctns2[2] = { + fn_atan2, + fn_pow}; + + +__global__ void __apply_gfun(float *A, float *B, int N, int opn) { + fntype fn = fctns[opn]; + int ip = threadIdx.x + blockDim.x * blockIdx.x; + for (int i = ip; i < N; i += blockDim.x * gridDim.x) { + B[i] = fn(A[i]); + } +} + +int apply_gfun(float *A, float *B, int N, int opn) { + int nthreads = 32; + int nblocks = 1; + while (nblocks * nthreads < N) { + if (nblocks < 16) { + nblocks = 2*nblocks; + } else if (nthreads < 1024) { + nthreads = 2*nthreads; + } else { + nblocks = 2*nblocks; + } + } + __apply_gfun<<>>(A, B, N, opn); + cudaError_t err = cudaGetLastError(); + return err; +} + +__global__ void __apply_gfun2(float *A, float *B, float *C, int N, int opn) { + optype fn = fctns2[opn]; + int ip = threadIdx.x + blockDim.x * blockIdx.x; + for (int i = ip; i < N; i += blockDim.x * gridDim.x) { + C[i] = fn(A[i], B[i]); + } +} + +int apply_gfun2(float *A, float *B, float *C, int N, int opn) { + int nthreads = 32; + int nblocks = 1; + while (nblocks * nthreads < N) { + if (nblocks < 16) { + nblocks = 2*nblocks; + } else if (nthreads < 1024) { + nthreads = 2*nthreads; + } else { + nblocks = 2*nblocks; + } + } + __apply_gfun2<<>>(A, B, C, N, opn); + cudaError_t err = cudaGetLastError(); + return err; +} + +__global__ void __apply_full(float *A, float *B, float *C, int N, int opn) { + optype op = operators[opn]; + int ip = threadIdx.x + blockDim.x * blockIdx.x; + for (int i = ip; i < N; i += blockDim.x * gridDim.x) { + C[i] = op(A[i],B[i]); + } +} + +__global__ void __apply_right_col(float *A, float *B, float *C, int nrows, int ncols, int opn) { + optype op = operators[opn]; + int ip = threadIdx.x + blockDim.x * blockIdx.x; + for (int i = ip; i < nrows*ncols; i += blockDim.x * gridDim.x) { + C[i] = op(A[i],B[i % nrows]); + } +} + +__global__ void __apply_right_row(float *A, float *B, float *C, int nrows, int ncols, int opn) { + optype op = operators[opn]; + int ip = threadIdx.x + blockDim.x * blockIdx.x; + for (int i = ip; i < nrows*ncols; i += blockDim.x * gridDim.x) { + C[i] = op(A[i],B[i / nrows]); + } +} + +__global__ void __apply_left_col(float *A, float *B, float *C, int nrows, int ncols, int opn) { + optype op = operators[opn]; + int ip = threadIdx.x + blockDim.x * blockIdx.x; + for (int i = ip; i < nrows*ncols; i += blockDim.x * gridDim.x) { + C[i] = op(A[i % nrows],B[i]); + } +} + +__global__ void __apply_left_row(float *A, float *B, float *C, int nrows, int ncols, int opn) { + optype op = operators[opn]; + int ip = threadIdx.x + blockDim.x * blockIdx.x; + for (int i = ip; i < nrows*ncols; i += blockDim.x * gridDim.x) { + C[i] = op(A[i / nrows],B[i]); + } +} + +__global__ void __apply_right_val(float *A, float *B, float *C, int nrows, int ncols, int opn) { + optype op = operators[opn]; + int ip = threadIdx.x + blockDim.x * blockIdx.x; + float val = B[0]; + for (int i = ip; i < nrows*ncols; i += blockDim.x * gridDim.x) { + C[i] = op(A[i],val); + } +} + +__global__ void __apply_left_val(float *A, float *B, float *C, int nrows, int ncols, int opn) { + optype op = operators[opn]; + int ip = threadIdx.x + blockDim.x * blockIdx.x; + float val = A[0]; + for (int i = ip; i < nrows*ncols; i += blockDim.x * gridDim.x) { + C[i] = op(val,B[i]); + } +} + +int apply_binop(float *A, int Anrows, int Ancols, + float *B, int Bnrows, int Bncols, float *C, int opn) { + int N = max(Anrows, Bnrows)*max(Ancols, Bncols); + int nthreads = 32; + int nblocks = 1; + while (nblocks * nthreads < N) { + if (nblocks < 16) { + nblocks = 2*nblocks; + } else if (nthreads < 1024) { + nthreads = 2*nthreads; + } else { + nblocks = 2*nblocks; + } + } + if (Anrows == Bnrows && Ancols == Bncols) { + __apply_full<<>>(A, B, C, N, opn); + } else if (Anrows == Bnrows && Bncols == 1) { + __apply_right_col<<>>(A, B, C, Anrows, Ancols, opn); + } else if (Ancols == Bncols && Bnrows == 1) { + __apply_right_row<<>>(A, B, C, Anrows, Ancols, opn); + } else if (Anrows == Bnrows && Ancols == 1) { + __apply_left_col<<>>(A, B, C, Bnrows, Bncols, opn); + } else if (Ancols == Bncols && Anrows == 1) { + __apply_left_row<<>>(A, B, C, Bnrows, Bncols, opn); + } else if (Bnrows == 1 && Bncols == 1) { + __apply_right_val<<>>(A, B, C, Anrows, Ancols, opn); + } else if (Anrows == 1 && Ancols == 1) { + __apply_left_val<<>>(A, B, C, Bnrows, Bncols, opn); + } + cudaError_t err = cudaGetLastError(); + return err; +} + +__global__ void __apply_full_int(int *A, int *B, int *C, int N, int opn) { + ioptype op = ioperators[opn]; + int ip = threadIdx.x + blockDim.x * blockIdx.x; + for (int i = ip; i < N; i += blockDim.x * gridDim.x) { + C[i] = op(A[i],B[i]); + } +} + +__global__ void __apply_right_col_int(int *A, int *B, int *C, int nrows, int ncols, int opn) { + ioptype op = ioperators[opn]; + int ip = threadIdx.x + blockDim.x * blockIdx.x; + for (int i = ip; i < nrows*ncols; i += blockDim.x * gridDim.x) { + C[i] = op(A[i],B[i % nrows]); + } +} + +__global__ void __apply_right_row_int(int *A, int *B, int *C, int nrows, int ncols, int opn) { + ioptype op = ioperators[opn]; + int ip = threadIdx.x + blockDim.x * blockIdx.x; + for (int i = ip; i < nrows*ncols; i += blockDim.x * gridDim.x) { + C[i] = op(A[i],B[i / nrows]); + } +} + +__global__ void __apply_left_col_int(int *A, int *B, int *C, int nrows, int ncols, int opn) { + ioptype op = ioperators[opn]; + int ip = threadIdx.x + blockDim.x * blockIdx.x; + for (int i = ip; i < nrows*ncols; i += blockDim.x * gridDim.x) { + C[i] = op(A[i % nrows],B[i]); + } +} + +__global__ void __apply_left_row_int(int *A, int *B, int *C, int nrows, int ncols, int opn) { + ioptype op = ioperators[opn]; + int ip = threadIdx.x + blockDim.x * blockIdx.x; + for (int i = ip; i < nrows*ncols; i += blockDim.x * gridDim.x) { + C[i] = op(A[i / nrows],B[i]); + } +} + +__global__ void __apply_right_val_int(int *A, int *B, int *C, int nrows, int ncols, int opn) { + ioptype op = ioperators[opn]; + int ip = threadIdx.x + blockDim.x * blockIdx.x; + int val = B[0]; + for (int i = ip; i < nrows*ncols; i += blockDim.x * gridDim.x) { + C[i] = op(A[i],val); + } +} + +__global__ void __apply_left_val_int(int *A, int *B, int *C, int nrows, int ncols, int opn) { + ioptype op = ioperators[opn]; + int ip = threadIdx.x + blockDim.x * blockIdx.x; + int val = A[0]; + for (int i = ip; i < nrows*ncols; i += blockDim.x * gridDim.x) { + C[i] = op(val,B[i]); + } +} + +int apply_biniop(int *A, int Anrows, int Ancols, + int *B, int Bnrows, int Bncols, + int *C, int opn) { + int N = max(Anrows, Bnrows)*max(Ancols, Bncols); + int nthreads = 32; + int nblocks = 1; + while (nblocks * nthreads < N) { + if (nblocks < 16) { + nblocks = 2*nblocks; + } else if (nthreads < 1024) { + nthreads = 2*nthreads; + } else { + nblocks = 2*nblocks; + } + } + if (Anrows == Bnrows && Ancols == Bncols) { + __apply_full_int<<>>(A, B, C, N, opn); + } else if (Anrows == Bnrows && Bncols == 1) { + __apply_right_col_int<<>>(A, B, C, Anrows, Ancols, opn); + } else if (Ancols == Bncols && Bnrows == 1) { + __apply_right_row_int<<>>(A, B, C, Anrows, Ancols, opn); + } else if (Anrows == Bnrows && Ancols == 1) { + __apply_left_col_int<<>>(A, B, C, Bnrows, Bncols, opn); + } else if (Ancols == Bncols && Anrows == 1) { + __apply_left_row_int<<>>(A, B, C, Bnrows, Bncols, opn); + } else if (Bnrows == 1 && Bncols == 1) { + __apply_right_val_int<<>>(A, B, C, Anrows, Ancols, opn); + } else if (Anrows == 1 && Ancols == 1) { + __apply_left_val_int<<>>(A, B, C, Bnrows, Bncols, opn); + } + cudaError_t err = cudaGetLastError(); + return err; +} + +__global__ void __dsmult(int nrows, int nnz, float *A, float *Bdata, int *Bir, int *Bic, float *C) { + int jstart = ((long long)blockIdx.x) * nnz / gridDim.x; + int jend = ((long long)(blockIdx.x + 1)) * nnz / gridDim.x; + for (int i = threadIdx.x; i < nrows; i += blockDim.x) { + float sum = 0; + for (int j = jstart; j < jend ; j++) { + sum += A[i + nrows * Bir[j]] * Bdata[j]; + if (j == jend-1 || Bic[j] != Bic[j+1]) { + atomicAdd(&C[i + nrows * Bic[j]], sum); + sum = 0; + } + } + } +} + +int dsmult(int nrows, int ncols, int nnz, float *A, float *Bdata, int *Bir, int *Bic, float *C) { + int nthreads = min(1024, nrows); + int nblocks = min(1024*1024, ncols); + __dsmult<<>>(nrows, nnz, A, Bdata, Bir, Bic, C); + cudaError_t err = cudaGetLastError(); + return err; +} + +__global__ void __dsmultT(int nrows, int nnz, float *A, float *Bdata, int *Bir, int *Bic, float *C) { + int jstart = ((long long)blockIdx.x) * nnz / gridDim.x; + int jend = ((long long)(blockIdx.x + 1)) * nnz / gridDim.x; + for (int i = threadIdx.x; i < nrows; i += blockDim.x) { + float aval = 0; + for (int j = jstart; j < jend ; j++) { + if (j == jstart || Bic[j-1] != Bic[j]) { + aval = A[i + nrows * Bic[j]]; + } + atomicAdd(&C[i + nrows * Bir[j]], aval * Bdata[j]); + } + } +} + +int dsmultT(int nrows, int ncols, int nnz, float *A, float *Bdata, int *Bir, int *Bic, float *C) { + int nthreads = min(1024, nrows); + int nblocks = min(1024*1024, ncols); + __dsmultT<<>>(nrows, nnz, A, Bdata, Bir, Bic, C); + cudaError_t err = cudaGetLastError(); + return err; +} + +__global__ void __dds(int nrows, int nnz, float *A, float *B, int *Cir, int *Cic, float *P); + +__global__ void __reduce1op(int nrows, int ncols, float *A, float *B, int opn); + +#ifdef __CUDA_ARCH__ +#if __CUDA_ARCH__ > 200 + +__global__ void __dds(int nrows, int nnz, float *A, float *B, int *Cir, int *Cic, float *P) { + int jstart = ((long long)blockIdx.x) * nnz / gridDim.x; + int jend = ((long long)(blockIdx.x + 1)) * nnz / gridDim.x; + for (int j = jstart; j < jend ; j++) { + float sum = 0; + int aoff = nrows * Cir[j]; + int boff = nrows * Cic[j]; + for (int i = threadIdx.x; i < nrows; i += blockDim.x) { + sum += A[i + aoff] * B[i + boff]; + } + for (int i = 1; i < blockDim.x; i *= 2) { + sum = sum + __shfl_down(sum, i); + } + if (threadIdx.x == 0) { + P[j] = sum; + } + } +} + +__global__ void __reduce1op(int nrows, int ncols, float *A, float *B, int opn) { + optype op = operators[opn]; + int basecol = threadIdx.y + blockDim.y * blockIdx.x; + for (int icol = basecol; icol < ncols; icol += blockDim.y * gridDim.x) { + float v = A[threadIdx.x + icol * nrows]; + for (int i = threadIdx.x + blockDim.x; i < nrows; i += blockDim.x) { + v = op(v, A[i + icol * nrows]); + } + for (int i = 1; i < blockDim.x; i *= 2) { + v = op(v, __shfl_down(v, i)); + } + if (threadIdx.x == 0) { + B[icol] = v; + } + } +} +#else + +__global__ void __dds(int nrows, int nnz, float *A, float *B, int *Cir, int *Cic, float *P) { + __shared__ float parts[1][33]; + int jstart = ((long long)blockIdx.x) * nnz / gridDim.x; + int jend = ((long long)(blockIdx.x + 1)) * nnz / gridDim.x; + for (int j = jstart; j < jend ; j++) { + float sum = 0; + int aoff = nrows * Cir[j]; + int boff = nrows * Cic[j]; + for (int i = threadIdx.x; i < nrows; i += blockDim.x) { + sum += A[i + aoff] * B[i + boff]; + } + parts[0][threadIdx.x] = sum; + for (int i = 1; i < blockDim.x; i *= 2) { + if (i + threadIdx.x < blockDim.x) { + parts[0][threadIdx.x] = parts[0][threadIdx.x] + parts[0][i + threadIdx.x]; + } + } + if (threadIdx.x == 0) { + P[j] = parts[0][0]; + } + } +} + +__global__ void __reduce1op(int nrows, int ncols, float *A, float *B, int opn) { + __shared__ float parts[32][33]; + optype op = operators[opn]; + for (int icol = threadIdx.y + blockIdx.y * blockDim.y; icol < ncols; icol += blockDim.y * gridDim.y) { + float v = A[threadIdx.x + icol * nrows]; + for (int irow = threadIdx.x + blockDim.x; irow < nrows; irow += blockDim.x) { + v = op(v, A[irow + icol * nrows]); + } + parts[threadIdx.x][threadIdx.y] = v; + for (int i = 1; i < blockDim.x; i *= 2) { + if (i + threadIdx.x < blockDim.x) { + parts[threadIdx.x][threadIdx.y] = op(parts[threadIdx.x][threadIdx.y], parts[i + threadIdx.x][threadIdx.y]); + } + } + if (threadIdx.x == 0) { + B[icol] = parts[0][threadIdx.y]; + } + __syncthreads(); + } +} +#endif +#endif + +#define BLOCKDIM 32 + +__global__ void __transpose(float *in, int instride, float *out, int outstride, int nrows, int ncols) { + int nx = BLOCKDIM * gridDim.x; + int ny = BLOCKDIM * gridDim.y; + int ix = BLOCKDIM * blockIdx.x; + int iy = BLOCKDIM * blockIdx.y; + __shared__ float tile[BLOCKDIM][BLOCKDIM+1]; + + for (int yb = iy; yb < ncols; yb += ny) { + for (int xb = ix; xb < nrows; xb += nx) { + if (xb + threadIdx.x < nrows) { + int ylim = min(ncols, yb + BLOCKDIM); + for (int y = threadIdx.y + yb; y < ylim; y += blockDim.y) { + tile[threadIdx.x][y-yb] = in[threadIdx.x+xb + y*instride]; + } + } + __syncthreads(); + if (yb + threadIdx.x < ncols) { + int xlim = min(nrows, xb + BLOCKDIM); + for (int x = threadIdx.y + xb; x < xlim; x += blockDim.y) { + out[threadIdx.x + yb + x*outstride] = tile[x-xb][threadIdx.x]; + } + } + __syncthreads(); + } + } +} + +int transpose(float *in, int instride, float *out, int outstride, int nrows, int ncols) { + const dim3 griddims(32,32); + const dim3 blockdims(BLOCKDIM,16,1); + cudaError_t err; + __transpose<<>>(in, instride, out, outstride, nrows, ncols); + cudaDeviceSynchronize(); + err = cudaGetLastError(); + if (err != cudaSuccess) {fprintf(stderr, "cuda error in transpose"); return err;} + return 0; +} + + + + int dds(int nrows, int nnz, float *A, float *B, int *Cir, int *Cic, float *P) { + int nthreads = min(32, nrows); + int nblocks = min(32*1024*1024, max(1,nnz/8)); + __dds<<>>(nrows, nnz, A, B, Cir, Cic, P); + cudaError_t err = cudaGetLastError(); + return err; +} + +int reduce1op(int nrows, int ncols, float *A, float *B, int opn) { + int blkx = min(32, nrows); + int blky = min(32, ncols); + int nblks = max(1, ((int)(((long long)nrows) * ncols / blkx / blky / 16))); + const dim3 blkdims(blkx,blky,1); + const dim3 griddims(1,nblks,1); + __reduce1op<<>>(nrows, ncols, A, B, opn); + cudaDeviceSynchronize(); + cudaError_t err = cudaGetLastError(); + return err; +} + +__global__ void __reduce2op(int nrows, int ncols, float *A, float *B, int opn) { + __shared__ float parts[32][33]; + optype op = operators[opn]; + int baserow = threadIdx.x + blockDim.x * blockIdx.x; + for (int irow = baserow; irow < nrows; irow += blockDim.x * gridDim.x) { + float v = A[irow + threadIdx.y * nrows]; + for (int icol = threadIdx.y + blockDim.y; icol < ncols; icol += blockDim.y) { + v = op(v, A[irow + icol * nrows]); + } + parts[threadIdx.x][threadIdx.y] = v; + __syncthreads(); + float newv = 0; + for (int i = 1; i < blockDim.y; i *= 2) { + if (i + threadIdx.y < blockDim.y) newv = parts[threadIdx.x][i+threadIdx.y]; + __syncthreads(); + if (i + threadIdx.y < blockDim.y) parts[threadIdx.x][threadIdx.y] = op(parts[threadIdx.x][threadIdx.y], newv); + __syncthreads(); + } + if (threadIdx.y == 0) { + B[irow] = parts[threadIdx.x][0]; + } + __syncthreads(); + } +} + +int reduce2op(int nrows, int ncols, float *A, float *B, int opn) { + int blkx = min(32, nrows); + int blky = min(32, ncols); + int nblks = max(1, ((int)(((long long)nrows) * ncols / blkx / blky / 16))); + const dim3 blkdims(blkx,blky,1); + const dim3 griddims(nblks,1,1); + __reduce2op<<>>(nrows, ncols, A, B, opn); + cudaDeviceSynchronize(); + cudaError_t err = cudaGetLastError(); + return err; +} + + +#ifdef TEST +int main(int argc, char **argv) { + int m=8, n=8, opn = 0; + float *dA, *dB, *dC, *A, *B, *C; + if (argc > 1) { + sscanf(argv[1], "%d", &opn); + if (argc > 2) { + sscanf(argv[2], "%d", &m); + if (argc > 3) { + sscanf(argv[3], "%d", &n); + } + } + } + A = (float *)malloc(m*n*sizeof(float)); + B = (float *)malloc(m*n*sizeof(float)); + C = (float *)malloc(m*n*sizeof(float)); + cudaMalloc((void**)&dA, m*n*sizeof(float)); + cudaMalloc((void**)&dB, m*n*sizeof(float)); + cudaMalloc((void**)&dC, m*n*sizeof(float)); + + for (int i = 0; i < m*n; i++) { + A[i] = 1.0f; + B[i] = 2.0f; + } + + cudaMemcpy(dA, A, m*n*sizeof(float), cudaMemcpyHostToDevice); + cudaMemcpy(dB, B, m*n*sizeof(float), cudaMemcpyHostToDevice); + + printf("A %f %f %f %f\n", A[0], A[1], A[2], A[3]); + printf("B %f %f %f %f\n", B[0], B[1], B[2], B[3]); + + MatKernel(dA, m, n, dB, m, n, dC, opn); + cudaError_t err = cudaGetLastError(); + if( cudaSuccess != err) { + fprintf(stderr, "CUDA error %d", err); + exit(1); + } + + cudaMemcpy(C, dC, m*n*sizeof(float), cudaMemcpyDeviceToHost); + + printf("C %f %f %f %f\n", C[0], C[1], C[2], C[3]); + printf("A %f %f %f %f\n", A[0], A[1], A[2], A[3]); + printf("B %f %f %f %f\n", B[0], B[1], B[2], B[3]); + + if (dA != NULL) cudaFree(dA); + if (dB != NULL) cudaFree(dB); + if (dC != NULL) cudaFree(dC); + if (C != NULL) free(C); +} +#endif diff --git a/jni/src/MatKernel.hpp b/jni/src/MatKernel.hpp new file mode 100755 index 00000000..ef1acdf6 --- /dev/null +++ b/jni/src/MatKernel.hpp @@ -0,0 +1,20 @@ + +int apply_binop(float *nativeA, int Anrows, int Ancols, float *nativeB, int Bnrows, int Bncols, float *nativeC, int opn); + +int apply_biniop(int *nativeA, int Anrows, int Ancols, int *nativeB, int Bnrows, int Bncols, int *nativeC, int opn); + +int apply_gfun(float *nativeA, float *nativeB, int N, int opn); + +int apply_gfun2(float *nativeA, float *nativeB, float *nativeC, int N, int opn); + +int dsmult(int nrows, int ncols, int nnz, float *A, float *Bdata, int *Bir, int *Bic, float *C); + +int dsmultT(int nrows, int ncols, int nnz, float *A, float *Bdata, int *Bir, int *Bic, float *C); + +int dds(int nrows, int nnz, float *A, float *B, int *Cir, int *Cic, float *P); + +int reduce1op(int nrows, int ncols, float *A, float *B, int opn); + +int reduce2op(int nrows, int ncols, float *A, float *B, int opn); + +int transpose(float *in, int instride, float *out, int outstride, int nrows, int ncols); diff --git a/jni/src/configure b/jni/src/configure new file mode 100755 index 00000000..586c4ce1 --- /dev/null +++ b/jni/src/configure @@ -0,0 +1,161 @@ +#!/bin/bash + +OS=`uname` +PARLIB=$1 +ARCH="x86_64" + +VERSION="dev" + +# standardise the OS and ARCH names +if [ "$OS" = "Darwin" ] ; then + OS="apple" +elif [ "$OS" = "Linux" ] ; then + OS="linux" +elif [ "$OS" = "SunOS" ] ; then + OS="sun" +elif [[ "$OS" == CYGWIN* ]] ; then + OS="windows" +else + echo "OS not supported" $OS + exit 1 +fi + +if [ "$ARCH" = "x86" ] || [ "$ARCH" = "i686" ] || [ "$ARCH" = "i586" ] \ + || [ "$ARCH" = "i486" ] || [ "$ARCH" = "i386" ] ; then + ARCH="x86" +elif [ "$ARCH" = "Power Macintosh" ] ; then + ARCH="ppc" +elif [ "$ARCH" = "amd64" ] || [ "$ARCH" = "x86_64" ] ; then + ARCH="x86_64" +elif [ "$ARCH" = "sun4u" ] ; then + ARCH="sparc" +else + echo "ARCH not supported" + exit 1 +fi + +if [ "$OS" = "apple" ] ; then + CC="gcc -Wall" + OBJ="o" + OUTFLG="-o " + CPPFLAGS="$CPPFLAGS -I/System/Library/Frameworks/JavaVM.framework/Home/include" + CFLAGS="-fPIC -fno-common $CFLAGS" + LB="ar rc" + LD="gcc -dynamiclib" + LDFLAGS="$LDFLAGS -framework JavaVM" + LIBPREPEND="lib" + LIBAPPEND="-apple-"${ARCH}".jnilib" + FC="g95" + FFLAGS="$CFLAGS" + LAPACK_INCLUDES="-I/System/Library/Frameworks/vecLib.framework/Headers" + FORTRAN_LIBS="-lg95 -Wl,-single_module" + MKL_LIBS="-framework veclib" +elif [ "$OS" = "linux" ] ; then + MKL_ROOT="/opt/intel/mkl" + JAVA_HOME="/usr/java/default" + CUDA_HOME="/usr/local/cuda" + JCUDA_HOME="/home/jfc/code/JCUDA5" + CC="icc" + GCC="gcc" + NVCC="nvcc" + NVCCFLAGS="-c -arch=compute_20 -code=sm_20,sm_30 --machine 64 -Xcompiler \"-fPIC -c -O2 -DNDEBUG\"" + SUBLIB=linux64 + OBJ="o" + OUTFLG="-o " + CPPFLAGS="$CPPFLAGS -I$JAVA_HOME/include -I$JAVA_HOME/include/linux -I$MKL_ROOT/include \ + -I$MKL_ROOT/include/intel64/lp64 -I$JCUDA_HOME/CommonJNI/src -I$CUDA_HOME/include" + CFLAGS="-fPIC -c -O2 -DNDEBUG -std=c99 $CFLAGS" + LB="ar rc" + GLD="gcc -shared" + LD="icc -shared -static-intel" + LDFLAGS="$LDFLAGS" + LIBPREPEND="lib" + LIBAPPEND=".so" + FC="gfortran" + FFLAGS="$CFLAGS" + LAPACK_INCLUDES="" + FORTRAN_LIBS="-lgfortran" + if [ "$PARLIB" = "threaded" ] ; then + MKL_LIBS="-L$JAVA_HOME/lib -L/opt/intel/composerxe/lib/intel64 $MKL_ROOT/lib/intel64/libmkl_intel_lp64.a -Wl,--start-group \ + $MKL_ROOT/lib/intel64/libmkl_intel_thread.a $MKL_ROOT/lib/intel64/libmkl_core.a \ + -Wl,--end-group -liomp5 -lpthread -lm" + else + MKL_LIBS="-L$JAVA_HOME/lib -L/opt/intel/composerxe/lib/intel64 $MKL_ROOT/lib/intel64/libmkl_intel_lp64.a -Wl,--start-group \ + $MKL_ROOT/lib/intel64/libmkl_sequential.a $MKL_ROOT/lib/intel64/libmkl_core.a \ + -Wl,--end-group -liomp5 -lpthread -lm" + fi + CUDA_LIBS="-L${CUDA_HOME}/lib64 -L${JCUDA_HOME}/lib -lcudart -lCommonJNI" +elif [ "$OS" = "windows" ] ; then + MKL_ROOT="c:/Intel/MKL" +# JAVA_HOME="" +# CUDA_HOME="C:/Progra~1/NVIDIA~2/CUDA/v4.2" +# JCUDA_HOME="/code/JCUDA" + CUDA_HOME="C:/Progra~1/NVIDIA~2/CUDA/v5.0" + JCUDA_HOME="/code/JCUDA5" + JAVA_HOME="C:/Progra~1/Java/jdk1.6.0_29" + CC="icl" + GCC="icl" + NVCC="nvcc" + SUBLIB=win64 + OBJ="obj" + OUTFLG="/OUT:" + CPPFLAGS="" + NVCCFLAGS="-c -arch=compute_20 -code=sm_20,sm_30 --machine 64 -Xcompiler \"/EHsc /W3 /nologo /O2 /Zi /MT\"" +# NVCCFLAGS="-c -arch=compute_30 -code=sm_30 --machine 64 -Xcompiler \"/EHsc /W3 /nologo /O2 /Zi /MT\"" + CFLAGS="/c /MT /DNDEBUG /O2 /Qstd=c99 $CFLAGS" # static linking +# CFLAGS="/c /MT /DMKL_ILP64 /DNDEBUG /O2 /Qstd=c99 $CFLAGS" # static link, 64bit ints + LB="lib" + LD="link" + GLD="link" + LDFLAGS="/DLL /MACHINE:AMD64 $LDFLAGS" + LIBPREPEND="" + LIBAPPEND=".dll" + FC="ifort" + FFLAGS="-c $FFLAGS" + LAPACK_INCLUDES="" + FORTRAN_LIBS="" + if [ "$PARLIB" = "threaded" ] ; then +# MKL_LIBS="mkl_intel_lp64_dll.lib mkl_intel_thread_dll.lib mkl_core_dll.lib" # threaded, 32bit ints, dll + MKL_LIBS="mkl_intel_lp64.lib mkl_intel_thread.lib mkl_core.lib libiomp5md.lib" # threaded, 32bit integer, static link + else +# MKL_LIBS="mkl_intel_lp64_dll.lib mkl_sequential_dll.lib mkl_core_dll.lib" # sequential, 32bit ints, dll + MKL_LIBS="mkl_intel_lp64.lib mkl_sequential.lib mkl_core.lib libiomp5md.lib" # sequential, 32bit int, static link + fi +# MKL_LIBS="mkl_intel_ilp64.lib mkl_intel_thread.lib mkl_core.lib libiomp5md.lib" # threaded, 64bit integer, static link +# MKL_LIBS="mkl_intel_ilp64.lib mkl_sequential.lib mkl_core.lib libiomp5md.lib" # sequential, 64bit integer, static link + CUDA_LIBS="cudart.lib CommonJNI.lib" + LIB="$MKL_ROOT/mkl/lib/intel64;$MKL_ROOT/compiler/lib/intel64;$JAVA_HOME/lib;$CUDA_HOME/lib/x64;$JCUDA_HOME/lib;$LIB" + INCLUDE="$JAVA_HOME/include;$JAVA_HOME/include/win32;c:/Intel/MKL/mkl/include;c:/codeh/BIDMat/jni/include;$JCUDA_HOME/CommonJNI/src;$CUDA_HOME/include;$INCLUDE" +else + echo "OS not supported" + exit 1 +fi + +echo "Creating config for $OS $ARCH" + +echo "CC=$CC" > Makefile.incl +echo "GCC=$GCC" >> Makefile.incl +echo "NVCC=$NVCC" >> Makefile.incl +echo "NVCCFLAGS=$NVCCFLAGS" >> Makefile.incl +echo "SUBLIB=$SUBLIB" >> Makefile.incl +echo "OBJ=$OBJ" >> Makefile.incl +echo "OUTFLG=$OUTFLG" >> Makefile.incl +echo "CPPFLAGS=$CPPFLAGS" >> Makefile.incl +echo "CFLAGS=$CFLAGS" >> Makefile.incl +echo "LB=$LB" >> Makefile.incl +echo "LD=$LD" >> Makefile.incl +echo "GLD=$GLD" >> Makefile.incl +echo "LDFLAGS=$LDFLAGS" >> Makefile.incl +echo "LIBPREPEND=$LIBPREPEND" >> Makefile.incl +echo "LIBAPPEND=$LIBAPPEND" >> Makefile.incl +echo "LAPACK_INCLUDES=$LAPACK_INCLUDES" >> Makefile.incl +echo "MKL_LIBS=$MKL_LIBS" >> Makefile.incl +echo "CUDA_LIBS=$CUDA_LIBS" >> Makefile.incl +echo "FORTRAN_LIBS=$FORTRAN_LIBS" >> Makefile.incl +echo "FC=$FC" >> Makefile.incl +echo "FFLAGS=$FFLAGS" >> Makefile.incl +echo "LIB=$LIB" >> Makefile.incl +echo "INCLUDE=$INCLUDE" >> Makefile.incl +echo "JCUDA_COMMON=$JCUDA_HOME/CommonJNI/src" >> Makefile.incl + + diff --git a/lib/HDF5_Copyright.html b/lib/HDF5_Copyright.html new file mode 100644 index 00000000..07a71f45 --- /dev/null +++ b/lib/HDF5_Copyright.html @@ -0,0 +1,160 @@ + + + + HDF5 Copyright Notice and License Terms + + + + + + + +
+ +

Copyright Notice and License Terms for +
+HDF5 (Hierarchical Data Format 5) Software Library and Utilities

+
+

+ + +HDF5 (Hierarchical Data Format 5) Software Library and Utilities +
+Copyright 2006-2012 by The HDF Group. +

+NCSA HDF5 (Hierarchical Data Format 5) Software Library and Utilities +
+Copyright 1998-2006 by the Board of Trustees of the University of Illinois. +

+All rights reserved. +

+ +

+Redistribution and use in source and binary forms, with or without +modification, are permitted for any purpose (including commercial purposes) +provided that the following conditions are met: + +

+

    +
  1. +Redistributions of source code must retain the above copyright notice, +this list of conditions, and the following disclaimer. + +
  2. +Redistributions in binary form must reproduce the above copyright notice, +this list of conditions, and the following disclaimer in the documentation +and/or materials provided with the distribution. + +
  3. +In addition, redistributions of modified forms of the source or binary code +must carry prominent notices stating that the original code was changed and +the date of the change. + +
  4. +All publications or advertising materials mentioning features or use of this +software are asked, but not required, to acknowledge that it was developed +by The HDF Group and by the National Center for Supercomputing Applications +at the University of Illinois at Urbana-Champaign and credit the contributors. + +
  5. +Neither the name of The HDF Group, the name of the University, nor the name +of any Contributor may be used to endorse or promote products derived from +this software without specific prior written permission from The HDF Group, +the University, or the Contributor, respectively. +
+ +

+DISCLAIMER: +THIS SOFTWARE IS PROVIDED BY THE HDF GROUP AND THE CONTRIBUTORS +"AS IS" WITH NO WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED. +In no event shall The HDF Group or the Contributors be liable for any damages +suffered by the users arising out of the use of this software, even if advised +of the possibility of such damage. + + +


+
+ +

+Contributors: National Center for Supercomputing Applications (NCSA) at +the University of Illinois, Fortner Software, Unidata Program Center (netCDF), +The Independent JPEG Group (JPEG), Jean-loup Gailly and Mark Adler (gzip), +and Digital Equipment Corporation (DEC). + +


+ +

+Portions of HDF5 were developed with support from the Lawrence Berkeley +National Laboratory (LBNL) and the United States Department of Energy +under Prime Contract No. DE-AC02-05CH11231. + +


+ +

+Portions of HDF5 were developed with support from the University of +California, Lawrence Livermore National Laboratory (UC LLNL). +The following statement applies to those portions of the product and must +be retained in any redistribution of source code, binaries, documentation, +and/or accompanying materials: +

+ This work was partially produced at the University of California, + Lawrence Livermore National Laboratory (UC LLNL) under contract + no. W-7405-ENG-48 (Contract 48) between the U.S. Department of + Energy (DOE) and The Regents of the University of California + (University) for the operation of UC LLNL. +

+ DISCLAIMER: + This work was prepared as an account of work sponsored by an agency + of the United States Government. Neither the United States Government + nor the University of California nor any of their employees, makes + any warranty, express or implied, or assumes any liability or + responsibility for the accuracy, completeness, or usefulness of any + information, apparatus, product, or process disclosed, or represents + that its use would not infringe privately- owned rights. Reference + herein to any specific commercial products, process, or service by + trade name, trademark, manufacturer, or otherwise, does not + necessarily constitute or imply its endorsement, recommendation, or + favoring by the United States Government or the University of + California. The views and opinions of authors expressed herein do not + necessarily state or reflect those of the United States Government or + the University of California, and shall not be used for advertising + or product endorsement purposes. +

+ +
+ + + + + +
+ + + + + +
+
+ The HDF Group Help Desk: +
+ Describes HDF5 Release 1.8.9, May 2012. +
+
  + Copyright by + The HDF Group +
+ and the Board of Trustees of the University of Illinois +
+
+Last modified: 5 March 2012 + + + + + + + diff --git a/lib/JCUDA_Copyright.txt b/lib/JCUDA_Copyright.txt new file mode 100644 index 00000000..a47ba681 --- /dev/null +++ b/lib/JCUDA_Copyright.txt @@ -0,0 +1,24 @@ +JCuda - Java bindings for NVIDIA CUDA + +Copyright (c) 2008-2012 Marco Hutter - http://www.jcuda.org + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. diff --git a/lib/PtPlot_Copyright.txt b/lib/PtPlot_Copyright.txt new file mode 100755 index 00000000..7da2f50e --- /dev/null +++ b/lib/PtPlot_Copyright.txt @@ -0,0 +1,27 @@ +Below is the copyright agreement for the Ptolemy II system. +Version: $Id: copyright.txt 57469 2010-03-10 22:04:46Z cxh $ + +Copyright (c) 1995-2010 The Regents of the University of California. +All rights reserved. + +Permission is hereby granted, without written agreement and without +license or royalty fees, to use, copy, modify, and distribute this +software and its documentation for any purpose, provided that the above +copyright notice and the following two paragraphs appear in all copies +of this software. + +IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY +FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES +ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF +THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. + +THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE +PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF +CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, +ENHANCEMENTS, OR MODIFICATIONS. + +Ptolemy II includes the work of others, to see those copyrights, follow +the copyright link on the splash page or see copyright.htm. diff --git a/lib/bidmat_init.scala b/lib/bidmat_init.scala new file mode 100755 index 00000000..94a40ca5 --- /dev/null +++ b/lib/bidmat_init.scala @@ -0,0 +1,7 @@ +import BIDMat.{Mat, FMat, DMat, IMat, CMat, BMat, CSMat, SMat, SDMat, GMat, GIMat, GSMat, HMat} +import BIDMat.MatFunctions._ +import BIDMat.SciFunctions._ +import BIDMat.Solvers._ +import BIDMat.Plotting._ + +Mat.checkCUDA diff --git a/lib/jcublas-0.4.2.jar b/lib/jcublas-0.4.2.jar new file mode 100644 index 00000000..8183047a Binary files /dev/null and b/lib/jcublas-0.4.2.jar differ diff --git a/lib/jcublas-0.5.0RC.jar b/lib/jcublas-0.5.0RC.jar new file mode 100755 index 00000000..139ed978 Binary files /dev/null and b/lib/jcublas-0.5.0RC.jar differ diff --git a/lib/jcuda-0.4.2.jar b/lib/jcuda-0.4.2.jar new file mode 100644 index 00000000..6b73cf5b Binary files /dev/null and b/lib/jcuda-0.4.2.jar differ diff --git a/lib/jcuda-0.5.0RC.jar b/lib/jcuda-0.5.0RC.jar new file mode 100755 index 00000000..b0e89f1a Binary files /dev/null and b/lib/jcuda-0.5.0RC.jar differ diff --git a/lib/jcudpp-0.4.2.jar b/lib/jcudpp-0.4.2.jar new file mode 100644 index 00000000..3c329bf0 Binary files /dev/null and b/lib/jcudpp-0.4.2.jar differ diff --git a/lib/jcufft-0.4.2.jar b/lib/jcufft-0.4.2.jar new file mode 100644 index 00000000..4f55ba01 Binary files /dev/null and b/lib/jcufft-0.4.2.jar differ diff --git a/lib/jcufft-0.5.0RC.jar b/lib/jcufft-0.5.0RC.jar new file mode 100755 index 00000000..5a26a2e4 Binary files /dev/null and b/lib/jcufft-0.5.0RC.jar differ diff --git a/lib/jcurand-0.4.2.jar b/lib/jcurand-0.4.2.jar new file mode 100644 index 00000000..69b674ca Binary files /dev/null and b/lib/jcurand-0.4.2.jar differ diff --git a/lib/jcurand-0.5.0RC.jar b/lib/jcurand-0.5.0RC.jar new file mode 100755 index 00000000..1399969d Binary files /dev/null and b/lib/jcurand-0.5.0RC.jar differ diff --git a/lib/jcusparse-0.4.2.jar b/lib/jcusparse-0.4.2.jar new file mode 100644 index 00000000..c58917db Binary files /dev/null and b/lib/jcusparse-0.4.2.jar differ diff --git a/lib/jcusparse-0.5.0RC.jar b/lib/jcusparse-0.5.0RC.jar new file mode 100755 index 00000000..80be5937 Binary files /dev/null and b/lib/jcusparse-0.5.0RC.jar differ diff --git a/lib/jhdf5.jar b/lib/jhdf5.jar new file mode 100644 index 00000000..9d15b7d1 Binary files /dev/null and b/lib/jhdf5.jar differ diff --git a/lib/linux64/HDF5_Copyright.html b/lib/linux64/HDF5_Copyright.html new file mode 100755 index 00000000..07a71f45 --- /dev/null +++ b/lib/linux64/HDF5_Copyright.html @@ -0,0 +1,160 @@ + + + + HDF5 Copyright Notice and License Terms + + + + + + + +
+ +

Copyright Notice and License Terms for +
+HDF5 (Hierarchical Data Format 5) Software Library and Utilities

+
+

+ + +HDF5 (Hierarchical Data Format 5) Software Library and Utilities +
+Copyright 2006-2012 by The HDF Group. +

+NCSA HDF5 (Hierarchical Data Format 5) Software Library and Utilities +
+Copyright 1998-2006 by the Board of Trustees of the University of Illinois. +

+All rights reserved. +

+ +

+Redistribution and use in source and binary forms, with or without +modification, are permitted for any purpose (including commercial purposes) +provided that the following conditions are met: + +

+

    +
  1. +Redistributions of source code must retain the above copyright notice, +this list of conditions, and the following disclaimer. + +
  2. +Redistributions in binary form must reproduce the above copyright notice, +this list of conditions, and the following disclaimer in the documentation +and/or materials provided with the distribution. + +
  3. +In addition, redistributions of modified forms of the source or binary code +must carry prominent notices stating that the original code was changed and +the date of the change. + +
  4. +All publications or advertising materials mentioning features or use of this +software are asked, but not required, to acknowledge that it was developed +by The HDF Group and by the National Center for Supercomputing Applications +at the University of Illinois at Urbana-Champaign and credit the contributors. + +
  5. +Neither the name of The HDF Group, the name of the University, nor the name +of any Contributor may be used to endorse or promote products derived from +this software without specific prior written permission from The HDF Group, +the University, or the Contributor, respectively. +
+ +

+DISCLAIMER: +THIS SOFTWARE IS PROVIDED BY THE HDF GROUP AND THE CONTRIBUTORS +"AS IS" WITH NO WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED. +In no event shall The HDF Group or the Contributors be liable for any damages +suffered by the users arising out of the use of this software, even if advised +of the possibility of such damage. + + +


+
+ +

+Contributors: National Center for Supercomputing Applications (NCSA) at +the University of Illinois, Fortner Software, Unidata Program Center (netCDF), +The Independent JPEG Group (JPEG), Jean-loup Gailly and Mark Adler (gzip), +and Digital Equipment Corporation (DEC). + +


+ +

+Portions of HDF5 were developed with support from the Lawrence Berkeley +National Laboratory (LBNL) and the United States Department of Energy +under Prime Contract No. DE-AC02-05CH11231. + +


+ +

+Portions of HDF5 were developed with support from the University of +California, Lawrence Livermore National Laboratory (UC LLNL). +The following statement applies to those portions of the product and must +be retained in any redistribution of source code, binaries, documentation, +and/or accompanying materials: +

+ This work was partially produced at the University of California, + Lawrence Livermore National Laboratory (UC LLNL) under contract + no. W-7405-ENG-48 (Contract 48) between the U.S. Department of + Energy (DOE) and The Regents of the University of California + (University) for the operation of UC LLNL. +

+ DISCLAIMER: + This work was prepared as an account of work sponsored by an agency + of the United States Government. Neither the United States Government + nor the University of California nor any of their employees, makes + any warranty, express or implied, or assumes any liability or + responsibility for the accuracy, completeness, or usefulness of any + information, apparatus, product, or process disclosed, or represents + that its use would not infringe privately- owned rights. Reference + herein to any specific commercial products, process, or service by + trade name, trademark, manufacturer, or otherwise, does not + necessarily constitute or imply its endorsement, recommendation, or + favoring by the United States Government or the University of + California. The views and opinions of authors expressed herein do not + necessarily state or reflect those of the United States Government or + the University of California, and shall not be used for advertising + or product endorsement purposes. +

+ +
+ + + + + +
+ + + + + +
+
+ The HDF Group Help Desk: +
+ Describes HDF5 Release 1.8.9, May 2012. +
+
  + Copyright by + The HDF Group +
+ and the Board of Trustees of the University of Illinois +
+
+Last modified: 5 March 2012 + + + + + + + diff --git a/lib/linux64/JCUDA4.2/libJCublas-linux-x86_64.so b/lib/linux64/JCUDA4.2/libJCublas-linux-x86_64.so new file mode 100755 index 00000000..50637794 Binary files /dev/null and b/lib/linux64/JCUDA4.2/libJCublas-linux-x86_64.so differ diff --git a/lib/linux64/JCUDA4.2/libJCublas2-linux-x86_64.so b/lib/linux64/JCUDA4.2/libJCublas2-linux-x86_64.so new file mode 100755 index 00000000..ff0797ba Binary files /dev/null and b/lib/linux64/JCUDA4.2/libJCublas2-linux-x86_64.so differ diff --git a/lib/linux64/JCUDA4.2/libJCudaDriver-linux-x86_64.so b/lib/linux64/JCUDA4.2/libJCudaDriver-linux-x86_64.so new file mode 100755 index 00000000..d197de2d Binary files /dev/null and b/lib/linux64/JCUDA4.2/libJCudaDriver-linux-x86_64.so differ diff --git a/lib/linux64/JCUDA4.2/libJCudaRuntime-linux-x86_64.so b/lib/linux64/JCUDA4.2/libJCudaRuntime-linux-x86_64.so new file mode 100755 index 00000000..9a5aa6a0 Binary files /dev/null and b/lib/linux64/JCUDA4.2/libJCudaRuntime-linux-x86_64.so differ diff --git a/lib/linux64/JCUDA4.2/libJCufft-linux-x86_64.so b/lib/linux64/JCUDA4.2/libJCufft-linux-x86_64.so new file mode 100755 index 00000000..750b0f6b Binary files /dev/null and b/lib/linux64/JCUDA4.2/libJCufft-linux-x86_64.so differ diff --git a/lib/linux64/JCUDA4.2/libJCurand-linux-x86_64.so b/lib/linux64/JCUDA4.2/libJCurand-linux-x86_64.so new file mode 100755 index 00000000..5db8c4ed Binary files /dev/null and b/lib/linux64/JCUDA4.2/libJCurand-linux-x86_64.so differ diff --git a/lib/linux64/JCUDA4.2/libJCusparse-linux-x86_64.so b/lib/linux64/JCUDA4.2/libJCusparse-linux-x86_64.so new file mode 100755 index 00000000..215ebae6 Binary files /dev/null and b/lib/linux64/JCUDA4.2/libJCusparse-linux-x86_64.so differ diff --git a/lib/linux64/JCUDA4.2/libJCusparse2-linux-x86_64.so b/lib/linux64/JCUDA4.2/libJCusparse2-linux-x86_64.so new file mode 100755 index 00000000..b20485ce Binary files /dev/null and b/lib/linux64/JCUDA4.2/libJCusparse2-linux-x86_64.so differ diff --git a/lib/linux64/JCUDA4.2/libbidmatcuda.so b/lib/linux64/JCUDA4.2/libbidmatcuda.so new file mode 100755 index 00000000..1b2c0e0b Binary files /dev/null and b/lib/linux64/JCUDA4.2/libbidmatcuda.so differ diff --git a/lib/linux64/JCUDA5.0/libJCublas-linux-x86_64.so b/lib/linux64/JCUDA5.0/libJCublas-linux-x86_64.so new file mode 100755 index 00000000..cf3aeb39 Binary files /dev/null and b/lib/linux64/JCUDA5.0/libJCublas-linux-x86_64.so differ diff --git a/lib/linux64/JCUDA5.0/libJCublas2-linux-x86_64.so b/lib/linux64/JCUDA5.0/libJCublas2-linux-x86_64.so new file mode 100755 index 00000000..11d1ee13 Binary files /dev/null and b/lib/linux64/JCUDA5.0/libJCublas2-linux-x86_64.so differ diff --git a/lib/linux64/JCUDA5.0/libJCudaDriver-linux-x86_64.so b/lib/linux64/JCUDA5.0/libJCudaDriver-linux-x86_64.so new file mode 100755 index 00000000..bd6dfa53 Binary files /dev/null and b/lib/linux64/JCUDA5.0/libJCudaDriver-linux-x86_64.so differ diff --git a/lib/linux64/JCUDA5.0/libJCudaRuntime-linux-x86_64.so b/lib/linux64/JCUDA5.0/libJCudaRuntime-linux-x86_64.so new file mode 100755 index 00000000..6bfbdbcf Binary files /dev/null and b/lib/linux64/JCUDA5.0/libJCudaRuntime-linux-x86_64.so differ diff --git a/lib/linux64/JCUDA5.0/libJCufft-linux-x86_64.so b/lib/linux64/JCUDA5.0/libJCufft-linux-x86_64.so new file mode 100755 index 00000000..90499083 Binary files /dev/null and b/lib/linux64/JCUDA5.0/libJCufft-linux-x86_64.so differ diff --git a/lib/linux64/JCUDA5.0/libJCurand-linux-x86_64.so b/lib/linux64/JCUDA5.0/libJCurand-linux-x86_64.so new file mode 100755 index 00000000..396e9274 Binary files /dev/null and b/lib/linux64/JCUDA5.0/libJCurand-linux-x86_64.so differ diff --git a/lib/linux64/JCUDA5.0/libJCusparse-linux-x86_64.so b/lib/linux64/JCUDA5.0/libJCusparse-linux-x86_64.so new file mode 100755 index 00000000..d6b8b827 Binary files /dev/null and b/lib/linux64/JCUDA5.0/libJCusparse-linux-x86_64.so differ diff --git a/lib/linux64/JCUDA5.0/libJCusparse2-linux-x86_64.so b/lib/linux64/JCUDA5.0/libJCusparse2-linux-x86_64.so new file mode 100755 index 00000000..917b3bba Binary files /dev/null and b/lib/linux64/JCUDA5.0/libJCusparse2-linux-x86_64.so differ diff --git a/lib/linux64/JCUDA5.0/libbidmatcuda.so b/lib/linux64/JCUDA5.0/libbidmatcuda.so new file mode 100755 index 00000000..cfc001f0 Binary files /dev/null and b/lib/linux64/JCUDA5.0/libbidmatcuda.so differ diff --git a/lib/linux64/JCUDA_Copyright.txt b/lib/linux64/JCUDA_Copyright.txt new file mode 100755 index 00000000..a47ba681 --- /dev/null +++ b/lib/linux64/JCUDA_Copyright.txt @@ -0,0 +1,24 @@ +JCuda - Java bindings for NVIDIA CUDA + +Copyright (c) 2008-2012 Marco Hutter - http://www.jcuda.org + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. diff --git a/lib/linux64/libbidmatmkl.so b/lib/linux64/libbidmatmkl.so new file mode 100755 index 00000000..c9c0e293 Binary files /dev/null and b/lib/linux64/libbidmatmkl.so differ diff --git a/lib/linux64/libhdf4.settings b/lib/linux64/libhdf4.settings new file mode 100644 index 00000000..6f6e7cb6 --- /dev/null +++ b/lib/linux64/libhdf4.settings @@ -0,0 +1,35 @@ + SUMMARY OF THE HDF4 CONFIGURATION + ================================= + +General Information: +------------------- + HDF4 Version: 4.2.6-post2 + Configured on: Tue Dec 13 17:07:13 CST 2011 + Configured by: hdftest@koala + Configure mode: production + Host system: x86_64-unknown-linux-gnu + Uname information: Linux koala 2.6.18-274.12.1.el5 #1 SMP Tue Nov 29 13:37:46 EST 2011 x86_64 x86_64 x86_64 GNU/Linux + Libraries: + Installation point: /mnt/scr1/pre-release/hdf4/vdev/koalajava + +Compiling Options: +------------------ + Compilation Mode: production + C compiler: /usr/bin/gcc ( gcc (GCC) 4.1.2 20080704 ) + CFLAGS: -fPIC -O3 -fomit-frame-pointer + CPPFLAGS: -I/usr/include/rpc -I/mnt/hdf/packages/jpeg-PIC/Linux2.6-x86_64-gcc/include -I/mnt/hdf/packages/szip/static/encoder/Linux2.6-x86_64-gcc/include -DBIG_LONGS -DSWAP + Shared Libraries: no + Static Libraries: yes + LDFLAGS: -L/mnt/hdf/packages/jpeg-PIC/Linux2.6-x86_64-gcc/lib -L/mnt/hdf/packages/szip/static/encoder/Linux2.6-x86_64-gcc/lib + Extra libraries: -lsz -ljpeg -lz -lm + Archiver: ar + Ranlib: ranlib + +Languages: +---------- + Fortran: no + +Features: +--------- + SZIP compression: enabled with encoder + Support for netCDF API 2.3.2: yes diff --git a/lib/linux64/libhdf5.settings b/lib/linux64/libhdf5.settings new file mode 100644 index 00000000..afaae7fa --- /dev/null +++ b/lib/linux64/libhdf5.settings @@ -0,0 +1,62 @@ + SUMMARY OF THE HDF5 CONFIGURATION + ================================= + +General Information: +------------------- + HDF5 Version: 1.8.8 + Configured on: Wed Nov 16 17:48:07 CST 2011 + Configured by: hdftest@koala + Configure mode: production + Host system: x86_64-unknown-linux-gnu + Uname information: Linux koala 2.6.18-274.7.1.el5 #1 SMP Thu Oct 20 16:21:01 EDT 2011 x86_64 x86_64 x86_64 GNU/Linux + Byte sex: little-endian + Libraries: + Installation point: /mnt/scr1/pre-release/hdf5/v188/koalajava + +Compiling Options: +------------------ + Compilation Mode: production + C Compiler: /usr/bin/ gcc -fPIC ( gcc (GCC) 4.1.2 20080704 ) + CFLAGS: + H5_CFLAGS: -std=c99 -pedantic -Wall -Wextra -Wundef -Wshadow -Wpointer-arith -Wbad-function-cast -Wcast-qual -Wcast-align -Wwrite-strings -Wconversion -Waggregate-return -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations -Wredundant-decls -Wnested-externs -Winline -Wno-long-long -Wfloat-equal -Wmissing-format-attribute -Wmissing-noreturn -Wpacked -Wdisabled-optimization -Wformat=2 -Wunreachable-code -Wendif-labels -Wdeclaration-after-statement -Wold-style-definition -Winvalid-pch -Wvariadic-macros -Wnonnull -Winit-self -Wmissing-include-dirs -Wswitch-default -Wswitch-enum -Wunused-macros -Wunsafe-loop-optimizations -Wc++-compat -Wvolatile-register-var -O3 -fomit-frame-pointer -finline-functions + AM_CFLAGS: + CPPFLAGS: + H5_CPPFLAGS: -D_POSIX_C_SOURCE=199506L -DNDEBUG -UH5_DEBUG_API + AM_CPPFLAGS: -I/mnt/hdf/packages/szip-PIC/static/encoder/Linux2.6-x86_64-gcc/include -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_BSD_SOURCE + Shared C Library: no + Static C Library: yes + Statically Linked Executables: no + LDFLAGS: + H5_LDFLAGS: + AM_LDFLAGS: -L/mnt/hdf/packages/szip-PIC/static/encoder/Linux2.6-x86_64-gcc/lib + Extra libraries: -lsz -lz -lrt -lm + Archiver: ar + Ranlib: ranlib + Debugged Packages: + API Tracing: no + +Languages: +---------- + Fortran: no + + C++: no + +Features: +--------- + Parallel HDF5: no + High Level library: yes + Threadsafety: no + Default API Mapping: v18 + With Deprecated Public Symbols: yes + I/O filters (external): deflate(zlib),szip(encoder) + I/O filters (internal): shuffle,fletcher32,nbit,scaleoffset + MPE: no + Direct VFD: no + dmalloc: no +Clear file buffers before write: yes + Using memory checker: no + Function Stack Tracing: no + GPFS: no + Strict File Format Checks: no + Optimization Instrumentation: no + Large File Support (LFS): yes diff --git a/lib/linux64/libiomp5.so b/lib/linux64/libiomp5.so new file mode 100755 index 00000000..3b9e7257 Binary files /dev/null and b/lib/linux64/libiomp5.so differ diff --git a/lib/linux64/libjhdf.so b/lib/linux64/libjhdf.so new file mode 100755 index 00000000..ff6304c3 Binary files /dev/null and b/lib/linux64/libjhdf.so differ diff --git a/lib/linux64/libjhdf5.so b/lib/linux64/libjhdf5.so new file mode 100755 index 00000000..c3dcb2d3 Binary files /dev/null and b/lib/linux64/libjhdf5.so differ diff --git a/lib/ptplot.jar b/lib/ptplot.jar new file mode 100644 index 00000000..9582f1cb Binary files /dev/null and b/lib/ptplot.jar differ diff --git a/lib/ptplotapplication.jar b/lib/ptplotapplication.jar new file mode 100755 index 00000000..cc32dd0c Binary files /dev/null and b/lib/ptplotapplication.jar differ diff --git a/lib/win64/HDF5_Copyright.html b/lib/win64/HDF5_Copyright.html new file mode 100755 index 00000000..07a71f45 --- /dev/null +++ b/lib/win64/HDF5_Copyright.html @@ -0,0 +1,160 @@ + + + + HDF5 Copyright Notice and License Terms + + + + + + + +
+ +

Copyright Notice and License Terms for +
+HDF5 (Hierarchical Data Format 5) Software Library and Utilities

+
+

+ + +HDF5 (Hierarchical Data Format 5) Software Library and Utilities +
+Copyright 2006-2012 by The HDF Group. +

+NCSA HDF5 (Hierarchical Data Format 5) Software Library and Utilities +
+Copyright 1998-2006 by the Board of Trustees of the University of Illinois. +

+All rights reserved. +

+ +

+Redistribution and use in source and binary forms, with or without +modification, are permitted for any purpose (including commercial purposes) +provided that the following conditions are met: + +

+

    +
  1. +Redistributions of source code must retain the above copyright notice, +this list of conditions, and the following disclaimer. + +
  2. +Redistributions in binary form must reproduce the above copyright notice, +this list of conditions, and the following disclaimer in the documentation +and/or materials provided with the distribution. + +
  3. +In addition, redistributions of modified forms of the source or binary code +must carry prominent notices stating that the original code was changed and +the date of the change. + +
  4. +All publications or advertising materials mentioning features or use of this +software are asked, but not required, to acknowledge that it was developed +by The HDF Group and by the National Center for Supercomputing Applications +at the University of Illinois at Urbana-Champaign and credit the contributors. + +
  5. +Neither the name of The HDF Group, the name of the University, nor the name +of any Contributor may be used to endorse or promote products derived from +this software without specific prior written permission from The HDF Group, +the University, or the Contributor, respectively. +
+ +

+DISCLAIMER: +THIS SOFTWARE IS PROVIDED BY THE HDF GROUP AND THE CONTRIBUTORS +"AS IS" WITH NO WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED. +In no event shall The HDF Group or the Contributors be liable for any damages +suffered by the users arising out of the use of this software, even if advised +of the possibility of such damage. + + +


+
+ +

+Contributors: National Center for Supercomputing Applications (NCSA) at +the University of Illinois, Fortner Software, Unidata Program Center (netCDF), +The Independent JPEG Group (JPEG), Jean-loup Gailly and Mark Adler (gzip), +and Digital Equipment Corporation (DEC). + +


+ +

+Portions of HDF5 were developed with support from the Lawrence Berkeley +National Laboratory (LBNL) and the United States Department of Energy +under Prime Contract No. DE-AC02-05CH11231. + +


+ +

+Portions of HDF5 were developed with support from the University of +California, Lawrence Livermore National Laboratory (UC LLNL). +The following statement applies to those portions of the product and must +be retained in any redistribution of source code, binaries, documentation, +and/or accompanying materials: +

+ This work was partially produced at the University of California, + Lawrence Livermore National Laboratory (UC LLNL) under contract + no. W-7405-ENG-48 (Contract 48) between the U.S. Department of + Energy (DOE) and The Regents of the University of California + (University) for the operation of UC LLNL. +

+ DISCLAIMER: + This work was prepared as an account of work sponsored by an agency + of the United States Government. Neither the United States Government + nor the University of California nor any of their employees, makes + any warranty, express or implied, or assumes any liability or + responsibility for the accuracy, completeness, or usefulness of any + information, apparatus, product, or process disclosed, or represents + that its use would not infringe privately- owned rights. Reference + herein to any specific commercial products, process, or service by + trade name, trademark, manufacturer, or otherwise, does not + necessarily constitute or imply its endorsement, recommendation, or + favoring by the United States Government or the University of + California. The views and opinions of authors expressed herein do not + necessarily state or reflect those of the United States Government or + the University of California, and shall not be used for advertising + or product endorsement purposes. +

+ +
+ + + + + +
+ + + + + +
+
+ The HDF Group Help Desk: +
+ Describes HDF5 Release 1.8.9, May 2012. +
+
  + Copyright by + The HDF Group +
+ and the Board of Trustees of the University of Illinois +
+
+Last modified: 5 March 2012 + + + + + + + diff --git a/lib/win64/JCUDA4.2/JCublas-windows-x86_64.dll b/lib/win64/JCUDA4.2/JCublas-windows-x86_64.dll new file mode 100755 index 00000000..812bf249 Binary files /dev/null and b/lib/win64/JCUDA4.2/JCublas-windows-x86_64.dll differ diff --git a/lib/win64/JCUDA4.2/JCublas2-windows-x86_64.dll b/lib/win64/JCUDA4.2/JCublas2-windows-x86_64.dll new file mode 100755 index 00000000..66d70142 Binary files /dev/null and b/lib/win64/JCUDA4.2/JCublas2-windows-x86_64.dll differ diff --git a/lib/win64/JCUDA4.2/JCudaDriver-windows-x86_64.dll b/lib/win64/JCUDA4.2/JCudaDriver-windows-x86_64.dll new file mode 100755 index 00000000..5993832c Binary files /dev/null and b/lib/win64/JCUDA4.2/JCudaDriver-windows-x86_64.dll differ diff --git a/lib/win64/JCUDA4.2/JCudaRuntime-windows-x86_64.dll b/lib/win64/JCUDA4.2/JCudaRuntime-windows-x86_64.dll new file mode 100755 index 00000000..3aba265f Binary files /dev/null and b/lib/win64/JCUDA4.2/JCudaRuntime-windows-x86_64.dll differ diff --git a/lib/win64/JCUDA4.2/JCufft-windows-x86_64.dll b/lib/win64/JCUDA4.2/JCufft-windows-x86_64.dll new file mode 100755 index 00000000..7fbad0dd Binary files /dev/null and b/lib/win64/JCUDA4.2/JCufft-windows-x86_64.dll differ diff --git a/lib/win64/JCUDA4.2/JCurand-windows-x86_64.dll b/lib/win64/JCUDA4.2/JCurand-windows-x86_64.dll new file mode 100755 index 00000000..f189d062 Binary files /dev/null and b/lib/win64/JCUDA4.2/JCurand-windows-x86_64.dll differ diff --git a/lib/win64/JCUDA4.2/JCusparse-windows-x86_64.dll b/lib/win64/JCUDA4.2/JCusparse-windows-x86_64.dll new file mode 100755 index 00000000..0f483793 Binary files /dev/null and b/lib/win64/JCUDA4.2/JCusparse-windows-x86_64.dll differ diff --git a/lib/win64/JCUDA4.2/JCusparse2-windows-x86_64.dll b/lib/win64/JCUDA4.2/JCusparse2-windows-x86_64.dll new file mode 100755 index 00000000..5b66a121 Binary files /dev/null and b/lib/win64/JCUDA4.2/JCusparse2-windows-x86_64.dll differ diff --git a/lib/win64/JCUDA4.2/bidmatcuda.dll b/lib/win64/JCUDA4.2/bidmatcuda.dll new file mode 100755 index 00000000..7a506749 Binary files /dev/null and b/lib/win64/JCUDA4.2/bidmatcuda.dll differ diff --git a/lib/win64/JCUDA5.0/JCublas-windows-x86_64.dll b/lib/win64/JCUDA5.0/JCublas-windows-x86_64.dll new file mode 100755 index 00000000..e8f812aa Binary files /dev/null and b/lib/win64/JCUDA5.0/JCublas-windows-x86_64.dll differ diff --git a/lib/win64/JCUDA5.0/JCublas2-windows-x86_64.dll b/lib/win64/JCUDA5.0/JCublas2-windows-x86_64.dll new file mode 100755 index 00000000..bb0cff7b Binary files /dev/null and b/lib/win64/JCUDA5.0/JCublas2-windows-x86_64.dll differ diff --git a/lib/win64/JCUDA5.0/JCudaDriver-windows-x86_64.dll b/lib/win64/JCUDA5.0/JCudaDriver-windows-x86_64.dll new file mode 100755 index 00000000..cc72206e Binary files /dev/null and b/lib/win64/JCUDA5.0/JCudaDriver-windows-x86_64.dll differ diff --git a/lib/win64/JCUDA5.0/JCudaRuntime-windows-x86_64.dll b/lib/win64/JCUDA5.0/JCudaRuntime-windows-x86_64.dll new file mode 100755 index 00000000..35715c0d Binary files /dev/null and b/lib/win64/JCUDA5.0/JCudaRuntime-windows-x86_64.dll differ diff --git a/lib/win64/JCUDA5.0/JCufft-windows-x86_64.dll b/lib/win64/JCUDA5.0/JCufft-windows-x86_64.dll new file mode 100755 index 00000000..060f337a Binary files /dev/null and b/lib/win64/JCUDA5.0/JCufft-windows-x86_64.dll differ diff --git a/lib/win64/JCUDA5.0/JCurand-windows-x86_64.dll b/lib/win64/JCUDA5.0/JCurand-windows-x86_64.dll new file mode 100755 index 00000000..f248ec61 Binary files /dev/null and b/lib/win64/JCUDA5.0/JCurand-windows-x86_64.dll differ diff --git a/lib/win64/JCUDA5.0/JCusparse-windows-x86_64.dll b/lib/win64/JCUDA5.0/JCusparse-windows-x86_64.dll new file mode 100755 index 00000000..5ecef03b Binary files /dev/null and b/lib/win64/JCUDA5.0/JCusparse-windows-x86_64.dll differ diff --git a/lib/win64/JCUDA5.0/JCusparse2-windows-x86_64.dll b/lib/win64/JCUDA5.0/JCusparse2-windows-x86_64.dll new file mode 100755 index 00000000..854747cf Binary files /dev/null and b/lib/win64/JCUDA5.0/JCusparse2-windows-x86_64.dll differ diff --git a/lib/win64/JCUDA5.0/bidmatcuda.dll b/lib/win64/JCUDA5.0/bidmatcuda.dll new file mode 100755 index 00000000..37ce271e Binary files /dev/null and b/lib/win64/JCUDA5.0/bidmatcuda.dll differ diff --git a/lib/win64/JCUDA_Copyright.txt b/lib/win64/JCUDA_Copyright.txt new file mode 100755 index 00000000..a47ba681 --- /dev/null +++ b/lib/win64/JCUDA_Copyright.txt @@ -0,0 +1,24 @@ +JCuda - Java bindings for NVIDIA CUDA + +Copyright (c) 2008-2012 Marco Hutter - http://www.jcuda.org + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. diff --git a/lib/win64/bidmatmkl.dll b/lib/win64/bidmatmkl.dll new file mode 100755 index 00000000..f616fe52 Binary files /dev/null and b/lib/win64/bidmatmkl.dll differ diff --git a/lib/win64/jhdf.dll b/lib/win64/jhdf.dll new file mode 100755 index 00000000..fd03758e Binary files /dev/null and b/lib/win64/jhdf.dll differ diff --git a/lib/win64/jhdf5.dll b/lib/win64/jhdf5.dll new file mode 100755 index 00000000..3d47abfb Binary files /dev/null and b/lib/win64/jhdf5.dll differ diff --git a/lib/win64/libiomp5md.dll b/lib/win64/libiomp5md.dll new file mode 100755 index 00000000..faf9ebcc Binary files /dev/null and b/lib/win64/libiomp5md.dll differ diff --git a/project/plugins.sbt b/project/plugins.sbt new file mode 100755 index 00000000..bf5cb709 --- /dev/null +++ b/project/plugins.sbt @@ -0,0 +1,7 @@ + +libraryDependencies <+= sbtVersion(v => "com.github.siasia" %% "xsbt-proguard-plugin" % (v+"-0.1.1")) + +resolvers += "Proguard plugin repo" at "http://siasia.github.com/maven2" + + + diff --git a/src/main/java/edu/berkeley/bid/CBLAS.java b/src/main/java/edu/berkeley/bid/CBLAS.java new file mode 100755 index 00000000..fba33ec2 --- /dev/null +++ b/src/main/java/edu/berkeley/bid/CBLAS.java @@ -0,0 +1,73 @@ +package edu.berkeley.bid; + +public final class CBLAS { + + private CBLAS() {} + + static { + System.loadLibrary("bidmatmkl"); + } + + public final static class ORDER { + private ORDER() {} + public final static int RowMajor=101; + public final static int ColMajor=102; + } + + public final static class TRANSPOSE { + private TRANSPOSE() {} + public final static int NoTrans =111; + public final static int Trans =112; + public final static int ConjTrans=113; + } + + public final static class UPLO { + private UPLO() {} + public final static int Upper=121; + public final static int Lower=122; + } + + public final static class DIAG { + private DIAG() {} + public final static int NonUnit=131; + public final static int Unit =132; + } + + public final static class SIDE { + private SIDE() {} + public final static int Left =141; + public final static int Right=142; + } + + public static native double ddot( int N, double [] X, int incX, double [] Y, int incY); + public static native double ddotxx( int N, double [] X, int startX, double [] Y, int startY); + public static native double daxpy( int N, double a, double [] X, int incX, double [] Y, int incY); + public static native double daxpyxx( int N, double a, double [] X, int startX, double [] Y, int startY); + public static native void dgemv( int order, int TransA, int M, int N, double alpha, double [] A, int lda, + double [] X, int incX, double beta, double [] Y, int incY); + public static native void dgemm( int Order, int TransA, int TransB, int M, int N, int K, double alpha, + double [] A, int lda, double [] B, int ldb, double beta, double [] C, int ldc); + public static native void domatcopy( String Order, String TransA, int M, int N, double alpha, double [] A, int lda, double [] B, int ldb); + public static native void dmcscm( int m, int n, double [] a, int lda, double [] b, int [] ir, int [] jc, double [] c, int ldc); + public static native void dmcsrm( int m, int n, double [] a, int lda, double [] b, int [] ir, int [] jc, double [] c, int ldc); + + public static native float sdot( int N, float [] X, int incX, float [] Y, int incY); + public static native float sdotxx( int N, float [] X, int startX, float [] Y, int startY); + public static native double saxpy( int N, float a, float [] X, int incX, float [] Y, int incY); + public static native double saxpyxx( int N, float a, float [] X, int startX, float [] Y, int startY); + public static native void sgemv( int order, int TransA, int M, int N, float alpha, float [] A, int lda, + float [] X, int incX, float beta, float [] Y, int incY); + public static native void sgemm( int Order, int TransA, int TransB, int M, int N, int K, float alpha, + float [] A, int lda, float [] B, int ldb, float beta, float [] C, int ldc); + public static native void somatcopy( String Order, String TransA, int M, int N, float alpha, float [] A, int lda, float [] B, int ldb); + + public static native double caxpy( int N, float [] a, float [] X, int incX, float [] Y, int incY); + public static native double caxpyxx( int N, float [] a, float [] X, int startX, float [] Y, int startY); + public static native void cgemv( int order, int TransA, int M, int N, float [] alpha, float [] A, int lda, + float [] X, int incX, float [] beta, float [] Y, int incY); + public static native void cgemm( int Order, int TransA, int TransB, int M, int N, int K, float [] alpha, + float [] A, int lda, float [] B, int ldb, float [] beta, float [] C, int ldc); + + public static native void smcscm( int m, int n, float [] a, int lda, float [] b, int [] ir, int [] jc, float [] c, int ldc); + public static native void smcsrm( int m, int n, float [] a, int lda, float [] b, int [] ir, int [] jc, float [] c, int ldc); +} \ No newline at end of file diff --git a/src/main/java/edu/berkeley/bid/CUMAT.java b/src/main/java/edu/berkeley/bid/CUMAT.java new file mode 100755 index 00000000..9c8fcdd9 --- /dev/null +++ b/src/main/java/edu/berkeley/bid/CUMAT.java @@ -0,0 +1,32 @@ +package edu.berkeley.bid; +import jcuda.*; +import jcuda.runtime.*; + +public final class CUMAT { + + private CUMAT() {} + + static { + System.loadLibrary("bidmatcuda"); + } + + public static native int applyop(Pointer A, int Anrows, int Ancols, Pointer B, int Bnrows, int Bncols, Pointer C, int opn); + + public static native int applyiop(Pointer A, int Anrows, int Ancols, Pointer B, int Bnrows, int Bncols, Pointer C, int opn); + + public static native int applygfun(Pointer A, Pointer B, int N, int opn); + + public static native int applygfun2(Pointer A, Pointer B, Pointer C, int N, int opn); + + public static native int reduce1op(int nr, int nc, Pointer A, Pointer B, int opn); + + public static native int reduce2op(int nr, int nc, Pointer A, Pointer B, int opn); + + public static native int dsmult(int nr, int nc, int nnz, Pointer A, Pointer Bdata, Pointer Bir, Pointer Bic, Pointer C); + + public static native int dsmultT(int nr, int nc, int nnz, Pointer A, Pointer Bdata, Pointer Bir, Pointer Bic, Pointer C); + + public static native int dds(int nr, int nnz, Pointer A, Pointer B, Pointer Cir, Pointer Cic, Pointer P); + + public static native int transpose(Pointer A, int lda, Pointer B, int ldb, int nr, int nc); +} diff --git a/src/main/java/edu/berkeley/bid/Copyright.txt b/src/main/java/edu/berkeley/bid/Copyright.txt new file mode 100755 index 00000000..21326596 --- /dev/null +++ b/src/main/java/edu/berkeley/bid/Copyright.txt @@ -0,0 +1,25 @@ +Copyright (c) 2012, Regents of the University of California +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/src/main/java/edu/berkeley/bid/LAPACK.java b/src/main/java/edu/berkeley/bid/LAPACK.java new file mode 100755 index 00000000..0cea7828 --- /dev/null +++ b/src/main/java/edu/berkeley/bid/LAPACK.java @@ -0,0 +1,102 @@ +package edu.berkeley.bid; + +public final class LAPACK { + + private LAPACK() {} + + static { + System.loadLibrary("bidmatmkl"); + } + +/* public final static class ORDER { + private ORDER() {} + public final static int RowMajor=101; + public final static int ColMajor=102; + } */ + + public static native int sgetrf( int order, int M, int N, float [] A, int lda, int [] ipiv); + public static native int dgetrf( int order, int M, int N, double [] A, int lda, int [] ipiv); + public static native int cgetrf( int order, int M, int N, float [] A, int lda, int [] ipiv); + public static native int zgetrf( int order, int M, int N, double [] A, int lda, int [] ipiv); + + public static native int sgetri( int order, int N, float [] A, int lda, int [] ipiv); + public static native int dgetri( int order, int N, double [] A, int lda, int [] ipiv); + public static native int cgetri( int order, int N, float [] A, int lda, int [] ipiv); + public static native int zgetri( int order, int N, double [] A, int lda, int [] ipiv); + + public static native int sgetrs( int order, String trans, int N, int nrhs, float [] A, int lda, int [] ipiv, float [] b, int ldb); + public static native int dgetrs( int order, String trans, int N, int nrhs, double [] A, int lda, int [] ipiv, double [] b, int ldb); + public static native int cgetrs( int order, String trans, int N, int nrhs, float [] A, int lda, int [] ipiv, float [] b, int ldb); + public static native int zgetrs( int order, String trans, int N, int nrhs, double [] A, int lda, int [] ipiv, double [] b, int ldb); + + public static native int strtrs( int order, String mdata, int n, int nrhs, float [] A, int lda, float [] b, int ldb); + public static native int dtrtrs( int order, String mdata, int n, int nrhs, double [] A, int lda, double [] b, int ldb); + public static native int ctrtrs( int order, String mdata, int n, int nrhs, float [] A, int lda, float [] b, int ldb); + public static native int ztrtrs( int order, String mdata, int n, int nrhs, double [] A, int lda, double [] b, int ldb); + + public static native int ssteqr( int order, String compz, int n, float [] d, float [] e, float [] z, int ldz ); + public static native int dsteqr( int order, String compz, int n, double [] d, double [] e, double [] z, int ldz ); + public static native int csteqr( int order, String compz, int n, float [] d, float [] e, float [] z, int ldz ); + public static native int zsteqr( int order, String compz, int n, double [] d, double [] e, double [] z, int ldz ); + + public static native int ssytrd( int order, String uplo, int n, float [] a, int lda, float [] d, float [] e, float [] tau ); + public static native int dsytrd( int order, String uplo, int n, double [] a, int lda, double [] d, double [] e, double [] tau ); + + public static native int sorgtr( int order, String uplo, int n, float [] a, int lda, float [] tau ); + public static native int dorgtr( int order, String uplo, int n, double [] a, int lda, double [] tau ); + + public static native int sstedc( int order, String compz, int n, float [] d, float [] e, float [] z, int ldz ); + public static native int dstedc( int order, String compz, int n, double [] d, double [] e, double [] z, int ldz ); + + public static native int ssyevd( int order, String jobz, String uplo, int n, float [] a, int lda, float [] w ); + public static native int dsyevd( int order, String jobz, String uplo, int n, double [] a, int lda, double [] w ); + + public static native int spotrf( int order, String uplo, int n, float [] a, int lda); + public static native int dpotrf( int order, String uplo, int n, double [] a, int lda); + public static native int cpotrf( int order, String uplo, int n, float [] a, int lda); + public static native int zpotrf( int order, String uplo, int n, double [] a, int lda); + + public static native int sgebal(int matrix_order, String job, int n, float [] a, int lda, int [] ilo, int [] ihi, float [] scale); + public static native int dgebal(int matrix_order, String job, int n, double [] a, int lda, int [] ilo, int [] ihi, double [] scale); + public static native int cgebal(int matrix_order, String job, int n, float [] a, int lda, int [] ilo, int [] ihi, float [] scale); + public static native int zgebal(int matrix_order, String job, int n, double [] a, int lda, int [] ilo, int [] ihi, double [] scale); + + public static native int cunghr(int matrix_order, int n, int ilo, int ihi, float [] a, int lda, float [] tau); + public static native int zunghr(int matrix_order, int n, int ilo, int ihi, double [] a, int lda, double [] tau); + + public static native int strevc(int matrix_order, String side, String howmny, int [] select, int n, float [] t, int ldt, float [] vl, int ldvl, float [] vr, int ldvr, int mm, int [] m); + public static native int dtrevc(int matrix_order, String side, String howmny, int [] select, int n, double [] t, int ldt, double [] vl, int ldvl, double [] vr, int ldvr, int mm, int [] m); + public static native int ctrevc(int matrix_order, String side, String howmny, int [] select, int n, float [] t, int ldt, float [] vl, int ldvl, float [] vr, int ldvr, int mm, int [] m); + public static native int ztrevc(int matrix_order, String side, String howmny, int [] select, int n, double [] t, int ldt, double [] vl, int ldvl, double [] vr, int ldvr, int mm, int [] m); + + public static native int sgehrd(int matrix_order, int n, int ilo, int ihi, float [] a, int lda, float [] tau); + public static native int dgehrd(int matrix_order, int n, int ilo, int ihi, double [] a, int lda, double [] tau); + public static native int cgehrd(int matrix_order, int n, int ilo, int ihi, float [] a, int lda, float [] tau); + public static native int zgehrd(int matrix_order, int n, int ilo, int ihi, double [] a, int lda, double [] tau); + + public static native int shseqr(int matrix_order, String job, String compz, int n, int ilo, int ihi, float [] h, int ldh, float [] wr, float [] wi, float [] z, int ldz); + public static native int dhseqr(int matrix_order, String job, String compz, int n, int ilo, int ihi, double [] h, int ldh, double [] wr, double [] wi, double [] z, int ldz); + public static native int chseqr(int matrix_order, String job, String compz, int n, int ilo, int ihi, float [] h, int ldh, float [] w, float [] z, int ldz); + public static native int zhseqr(int matrix_order, String job, String compz, int n, int ilo, int ihi, double [] h, int ldh, double [] w, double [] z, int ldz); + + public static native int sgebak(int matrix_order, String job, String side, int n, int ilo, int ihi, float [] scale, int m, float [] v, int ldv); + public static native int dgebak(int matrix_order, String job, String side, int n, int ilo, int ihi, double [] scale, int m, double [] v, int ldv); + public static native int cgebak(int matrix_order, String job, String side, int n, int ilo, int ihi, float [] scale, int m, float [] v, int ldv); + public static native int zgebak(int matrix_order, String job, String side, int n, int ilo, int ihi, double [] scale, int m, double [] v, int ldv); + + public static native int sgeqrf(int matrix_order, int m, int n, float [] a, int lda, float [] tau); + public static native int dgeqrf(int matrix_order, int m, int n, double [] a, int lda, double [] tau); + public static native int cgeqrf(int matrix_order, int m, int n, float [] a, int lda, float [] tau); + public static native int zgeqrf(int matrix_order, int m, int n, double [] a, int lda, double [] tau); + + public static native int sgeqp3(int matrix_order, int m, int n, float [] a, int lda, int [] jpvt, float [] tau); + public static native int dgeqp3(int matrix_order, int m, int n, double [] a, int lda, int [] jpvt, double [] tau); + public static native int cgeqp3(int matrix_order, int m, int n, float [] a, int lda, int [] jpvt, float [] tau); + public static native int zgeqp3(int matrix_order, int m, int n, double [] a, int lda, int [] jpvt, double [] tau); + + public static native int sorgqr(int matrix_order, int m, int n, int k, float [] a, int lda, float [] tau); + public static native int dorgqr(int matrix_order, int m, int n, int k, double [] a, int lda, double [] tau); + + public static native int cungqr(int matrix_order, int m, int n, int k, float [] a, int lda, float [] tau); + public static native int zungqr(int matrix_order, int m, int n, int k, double [] a, int lda, double [] tau); +} \ No newline at end of file diff --git a/src/main/java/edu/berkeley/bid/SPBLAS.java b/src/main/java/edu/berkeley/bid/SPBLAS.java new file mode 100755 index 00000000..6effca69 --- /dev/null +++ b/src/main/java/edu/berkeley/bid/SPBLAS.java @@ -0,0 +1,35 @@ +package edu.berkeley.bid; + +public final class SPBLAS { + + private SPBLAS() {} + + static { + System.loadLibrary("bidmatmkl"); + } + + public static native void scsrmm(String transa, int m, int n, int k, float alpha, String matdescra, + float [] val, int [] ir, int [] jc, float [] b, int ldb, float beta, float [] c, int ldc); + + public static native void scscmm(String transa, int m, int n, int k, float alpha, String matdescra, + float [] val, int [] ir, int [] jc, float [] b, int ldb, float beta, float [] c, int ldc); + + public static native void scsrmv (String transa, int m, int k, float alpha, String matdescra, + float [] val, int [] ir, int [] jc, float [] x, float beta, float [] y); + + public static native void scscmv (String transa, int m, int k, float alpha, String matdescra, + float [] val, int [] ir, int [] jc, float [] x, float beta, float [] y); + + public static native void dcsrmm(String transa, int m, int n, int k, double alpha, String matdescra, + double [] val, int [] ir, int [] jc, double [] b, int ldb, double beta, double [] c, int ldc); + + public static native void dcscmm(String transa, int m, int n, int k, double alpha, String matdescra, + double [] val, int [] ir, int [] jc, double [] b, int ldb, double beta, double [] c, int ldc); + + public static native void dcsrmv (String transa, int m, int k, double alpha, String matdescra, + double [] val, int [] ir, int [] jc, double [] x, double beta, double [] y); + + public static native void dcscmv (String transa, int m, int k, double alpha, String matdescra, + double [] val, int [] ir, int [] jc, double [] x, double beta, double [] y); + +} \ No newline at end of file diff --git a/src/main/java/edu/berkeley/bid/UTILS.java b/src/main/java/edu/berkeley/bid/UTILS.java new file mode 100755 index 00000000..5a20f2cd --- /dev/null +++ b/src/main/java/edu/berkeley/bid/UTILS.java @@ -0,0 +1,41 @@ +package edu.berkeley.bid; +import java.io.*; +import java.util.zip.*; + +public final class UTILS { + + private UTILS() {} + + static { + System.loadLibrary("bidmatmkl"); + } + + public static native void memcpybi( int n, byte [] a, int startA, int [] b, int startB ); + public static native void memcpybf( int n, byte [] a, int startA, float [] b, int startB ); + public static native void memcpybd( int n, byte [] a, int startA, double [] b, int startB ); + + public static native void memcpyib( int n, int [] a, int startA, byte [] b, int startB ); + public static native void memcpyfb( int n, float [] a, int startA, byte [] b, int startB ); + public static native void memcpydb( int n, double [] a, int startA, byte [] b, int startB ); + + public static OutputStream _getOutputStream(String fname, Boolean compressed, int compressionLevel) throws IOException { + FileOutputStream fout = new FileOutputStream(fname); + if (compressed) { + switch (compressionLevel) { + case 1: return new GZIPOutputStream(fout, 1024*1024){{def.setLevel(1);}}; + case 2: return new GZIPOutputStream(fout, 1024*1024){{def.setLevel(2);}}; + case 3: return new GZIPOutputStream(fout, 1024*1024){{def.setLevel(3);}}; + case 4: return new GZIPOutputStream(fout, 1024*1024){{def.setLevel(4);}}; + case 5: return new GZIPOutputStream(fout, 1024*1024){{def.setLevel(5);}}; + case 6: return new GZIPOutputStream(fout, 1024*1024){{def.setLevel(6);}}; + case 7: return new GZIPOutputStream(fout, 1024*1024){{def.setLevel(7);}}; + case 8: return new GZIPOutputStream(fout, 1024*1024){{def.setLevel(8);}}; + case 9: return new GZIPOutputStream(fout, 1024*1024){{def.setLevel(9);}}; + default: throw new RuntimeException("Unsupported compression level "+compressionLevel); + } + } else { + return new BufferedOutputStream(fout, 1024*1024); + } + } + +} \ No newline at end of file diff --git a/src/main/java/edu/berkeley/bid/VML.java b/src/main/java/edu/berkeley/bid/VML.java new file mode 100755 index 00000000..8bb0b482 --- /dev/null +++ b/src/main/java/edu/berkeley/bid/VML.java @@ -0,0 +1,144 @@ +package edu.berkeley.bid; + +public final class VML { + + private VML() {} + + static { + System.loadLibrary("bidmatmkl"); + } + + public final static class VMLMODE { + private VMLMODE() {} + public final static int VML_LA = 0x00000001; + public final static int VML_HA = 0x00000002; + public final static int VML_EP = 0x00000003; + + public final static int VML_ERRMODE_IGNORE = 0x00000100; + public final static int VML_ERRMODE_ERRNO = 0x00000200; + public final static int VML_ERRMODE_STDERR = 0x00000400; + public final static int VML_ERRMODE_EXCEPT = 0x00000800; + public final static int VML_ERRMODE_CALLBACK = 0x00001000; + public final static int VML_ERRMODE_DEFAULT = VML_ERRMODE_ERRNO | VML_ERRMODE_CALLBACK | VML_ERRMODE_EXCEPT; + + public final static int VML_FTZDAZ_ON = 0x00280000; + public final static int VML_FTZDAZ_OFF = 0x00140000; + } + + + public static native void vsAbs ( int n, float [] a, float [] r); + public static native void vdAbs ( int n, double [] a, double [] r); + public static native void vsAdd ( int n, float [] a, float [] b, float [] r); + public static native void vdAdd ( int n, double [] a, double [] b, double [] r); + public static native void vsSub ( int n, float [] a, float [] b, float [] r); + public static native void vdSub ( int n, double [] a, double [] b, double [] r); + public static native void vsInv ( int n, float [] a, float [] r); + public static native void vdInv ( int n, double [] a, double [] r); + public static native void vsSqrt ( int n, float [] a, float [] r); + public static native void vdSqrt ( int n, double [] a, double [] r); + public static native void vsExp ( int n, float [] a, float [] r); + public static native void vdExp ( int n, double [] a, double [] r); + public static native void vsExpm1 ( int n, float [] a, float [] r); + public static native void vdExpm1 ( int n, double [] a, double [] r); + public static native void vsLn ( int n, float [] a, float [] r); + public static native void vdLn ( int n, double [] a, double [] r); + public static native void vsLog10 ( int n, float [] a, float [] r); + public static native void vdLog10 ( int n, double [] a, double [] r); + public static native void vsLog1p ( int n, float [] a, float [] r); + public static native void vdLog1p ( int n, double [] a, double [] r); + public static native void vsCos ( int n, float [] a, float [] r); + public static native void vdCos ( int n, double [] a, double [] r); + public static native void vsSin ( int n, float [] a, float [] r); + public static native void vdSin ( int n, double [] a, double [] r); + public static native void vsTan ( int n, float [] a, float [] r); + public static native void vdTan ( int n, double [] a, double [] r); + public static native void vsCosh ( int n, float [] a, float [] r); + public static native void vdCosh ( int n, double [] a, double [] r); + public static native void vsSinh ( int n, float [] a, float [] r); + public static native void vdSinh ( int n, double [] a, double [] r); + public static native void vsTanh ( int n, float [] a, float [] r); + public static native void vdTanh ( int n, double [] a, double [] r); + public static native void vsAcos ( int n, float [] a, float [] r); + public static native void vdAcos ( int n, double [] a, double [] r); + public static native void vsAsin ( int n, float [] a, float [] r); + public static native void vdAsin ( int n, double [] a, double [] r); + public static native void vsAtan ( int n, float [] a, float [] r); + public static native void vdAtan ( int n, double [] a, double [] r); + public static native void vsAcosh ( int n, float [] a, float [] r); + public static native void vdAcosh ( int n, double [] a, double [] r); + public static native void vsAsinh ( int n, float [] a, float [] r); + public static native void vdAsinh ( int n, double [] a, double [] r); + public static native void vsAtanh ( int n, float [] a, float [] r); + public static native void vdAtanh ( int n, double [] a, double [] r); + public static native void vsErf ( int n, float [] a, float [] r); + public static native void vdErf ( int n, double [] a, double [] r); + public static native void vsErfInv ( int n, float [] a, float [] r); + public static native void vdErfInv ( int n, double [] a, double [] r); + public static native void vsHypot ( int n, float [] a, float [] b, float [] r); + public static native void vdHypot ( int n, double [] a, double [] b, double [] r); + public static native void vsErfc ( int n, float [] a, float [] r); + public static native void vdErfc ( int n, double [] a, double [] r); + public static native void vsErfcInv ( int n, float [] a, float [] r); + public static native void vdErfcInv ( int n, double [] a, double [] r); + public static native void vsCdfNorm ( int n, float [] a, float [] r); + public static native void vdCdfNorm ( int n, double [] a, double [] r); + public static native void vsCdfNormInv ( int n, float [] a, float [] r); + public static native void vdCdfNormInv ( int n, double [] a, double [] r); + public static native void vsLGamma ( int n, float [] a, float [] r); + public static native void vdLGamma ( int n, double [] a, double [] r); + public static native void vsTGamma ( int n, float [] a, float [] r); + public static native void vdTGamma ( int n, double [] a, double [] r); + public static native void vsAtan2 ( int n, float [] a, float [] b, float [] r); + public static native void vdAtan2 ( int n, double [] a, double [] b, double [] r); + public static native void vsMul ( int n, float [] a, float [] b, float [] r); + public static native void vdMul ( int n, double [] a, double [] b, double [] r); + public static native void vsDiv ( int n, float [] a, float [] b, float [] r); + public static native void vdDiv ( int n, double [] a, double [] b, double [] r); + public static native void vsPow ( int n, float [] a, float [] b, float [] r); + public static native void vdPow ( int n, double [] a, double [] b, double [] r); + public static native void vsPow3o2 ( int n, float [] a, float [] r); + public static native void vdPow3o2 ( int n, double [] a, double [] r); + public static native void vsPow2o3 ( int n, float [] a, float [] r); + public static native void vdPow2o3 ( int n, double [] a, double [] r); + public static native void vsPowx ( int n, float [] a, float b, float [] r); + public static native void vdPowx ( int n, double [] a, double b, double [] r); + public static native void vsSinCos ( int n, float [] a, float [] r1, float [] r2); + public static native void vdSinCos ( int n, double [] a, double [] r1, double [] r2); + public static native void vsLinearFrac ( int n, float [] a, float [] b, float scalea, float shifta, float scaleb, float shiftb, float [] r); + public static native void vdLinearFrac ( int n, double [] a, double [] b, double scalea, double shifta, double scaleb, double shiftb, double [] r); + public static native void vsCeil ( int n, float [] a, float [] r); + public static native void vdCeil ( int n, double [] a, double [] r);; + public static native void vsFloor ( int n, float [] a, float [] r); + public static native void vdFloor ( int n, double [] a, double [] r); + public static native void vsModf ( int n, float [] a, float [] r1, float [] r2); + public static native void vdModf ( int n, double [] a, double [] r1, double [] r2); + public static native void vmsModf ( int n, float [] a, float [] r1, float [] r2, long mode); + public static native void vmdModf ( int n, double [] a, double [] r1, double [] r2, long mode); + public static native void vsNearbyInt ( int n, float [] a, float [] r); + public static native void vdNearbyInt ( int n, double [] a, double [] r); + public static native void vsRint ( int n, float [] a, float [] r); + public static native void vdRint ( int n, double [] a, double [] r); + public static native void vsRound ( int n, float [] a, float [] r); + public static native void vdRound ( int n, double [] a, double [] r); + public static native void vsTrunc ( int n, float [] a, float [] r); + public static native void vdTrunc ( int n, double [] a, double [] r); + public static native void vsPackI ( int n, float [] a, int incra, float [] y); + public static native void vdPackI ( int n, double [] a, int incra, double [] y); + public static native void vsPackV ( int n, float [] a, int [] ia, float [] y); + public static native void vdPackV ( int n, double [] a, int [] ia, double [] y); + public static native void vsPackM ( int n, float [] a, int [] ma, float [] y); + public static native void vdPackM ( int n, double [] a, int [] ma, double [] y); + public static native void vsUnpackI ( int n, float [] a, float [] y, int incry ); + public static native void vdUnpackI ( int n, double [] a, double [] y, int incry ); + public static native void vsUnpackV ( int n, float [] a, float [] y, int [] iy ); + public static native void vdUnpackV ( int n, double [] a, double [] y, int [] iy ); + public static native void vsUnpackM ( int n, float [] a, float [] y, int [] my ); + public static native void vdUnpackM ( int n, double [] a, double [] y, int [] my ); + public static native int vmlSetErrStatus ( int status); + public static native int vmlGetErrStatus (); + public static native int vmlClearErrStatus (); + public static native int vmlSetMode ( int newmode); + public static native int vmlGetMode (); + public static native void MKLFreeTls ( int fdwReason); + +} diff --git a/src/main/java/edu/berkeley/bid/VSL.java b/src/main/java/edu/berkeley/bid/VSL.java new file mode 100755 index 00000000..e4994b8e --- /dev/null +++ b/src/main/java/edu/berkeley/bid/VSL.java @@ -0,0 +1,118 @@ +package edu.berkeley.bid; + +public final class VSL { + + static { System.loadLibrary( "bidmatmkl" ); } + + private long handle = 0; + + public VSL() {} + + protected void finalize() { + if (handle != 0) { + vslDeleteStream(this); + handle = 0; + } + } + + public static native int vslNewStream(VSL stream, int brng, int seed); + + public static native int vslDeleteStream(VSL stream); + + public static native int vdRngCauchy(int method, VSL stream, int n, double[] r, double a, double beta); + + public static native int vsRngCauchy(int method, VSL stream, int n, float[] r, float a, float beta); + + public static native int vdRngUniform(int method, VSL stream, int n, double[] r, double a, double b); + + public static native int vsRngUniform(int method, VSL stream, int n, float[] r, float a, float b); + + public static native int vdRngGaussian(int method, VSL stream, int n, double[] r, double a, double sigma); + + public static native int vsRngGaussian(int method, VSL stream, int n, float[] r, float a, float sigma); + + public static native int vdRngGaussianMV(int method, VSL stream, int n, double[] r, int dimen, int mstorage, double[] a, double[] t); + + public static native int vsRngGaussianMV(int method, VSL stream, int n, float[] r, int dimen, int mstorage, float[] a, float[] t); + + public static native int vdRngExponential(int method, VSL stream, int n, double[] r, double a, double beta); + + public static native int vsRngExponential(int method, VSL stream, int n, float[] r, float a, float beta); + + public static native int vdRngLaplace(int method, VSL stream, int n, double[] r, double a, double beta); + + public static native int vsRngLaplace(int method, VSL stream, int n, float[] r, float a, float beta); + + public static native int vdRngWeibull(int method, VSL stream, int n, double[] r, double alpha, double a, double beta); + + public static native int vsRngWeibull(int method, VSL stream, int n, float[] r, float alpha, float a, float beta); + + public static native int vdRngRayleigh(int method, VSL stream, int n, double[] r, double a, double beta); + + public static native int vsRngRayleigh(int method, VSL stream, int n, float[] r, float a, float beta); + + public static native int vdRngLognormal(int method, VSL stream, int n, double[] r, double a, double sigma, double b, double beta); + + public static native int vsRngLognormal(int method, VSL stream, int n, float[] r, float a, float sigma, float b, float beta); + + public static native int vdRngGumbel(int method, VSL stream, int n, double[] r, double a, double beta); + + public static native int vsRngGumbel(int method, VSL stream, int n, float[] r, float a, float beta); + + public static native int vdRngGamma(int method, VSL stream, int n, double[] r, double alpha, double a, double beta); + + public static native int vsRngGamma(int method, VSL stream, int n, float[] r, float alpha, float a, float beta); + + public static native int vdRngBeta(int method, VSL stream, int n, double[] r, double p, double q, double a, double beta); + + public static native int vsRngBeta(int method, VSL stream, int n, float[] r, float p, float q, float a, float beta); + + public static native int viRngBernoulli(int method, VSL stream, int n, int[] r, double p); + + public static native int viRngUniform(int method, VSL stream, int n, int[] r, int a, int b); + + public static native int viRngUniformBits(int method, VSL stream, int n, int[] r); + + public static native int viRngGeometric(int method, VSL stream, int n, int[] r, double p); + + public static native int viRngBinomial(int method, VSL stream, int n, int[] r, int ntrial, double p); + + public static native int viRngHypergeometric(int method, VSL stream, int n, int[] r, int l, int s, int m); + + public static native int viRngNegbinomial(int method, VSL stream, int n, int[] r, double a, double p); + + public static native int viRngPoisson(int method, VSL stream, int n, int[] r, double lambda); + + public static native int viRngPoissonV(int method, VSL stream, int n, int[] r, double[] lambda); + + public static native int vslSkipAheadStream(VSL stream, int nskip); + + public static native int vslGetStreamStateBrng(VSL stream); + + public static native int vslGetNumRegBrngs(); + + public final static int BRNG_MCG31 = 0x100000; + + public final static int BRNG_R250 = 0x200000; + + public final static int BRNG_MRG32K3A = 0x300000; + + public final static int BRNG_MCG59 = 0x400000; + + public final static int BRNG_WH = 0x500000; + + public final static int BRNG_SOBOL = 0x600000; + + public final static int BRNG_NIEDERR = 0x700000; + + public final static int BRNG_MT19937 = 0x800000; + + public final static int BRNG_MT2203 = 0x900000; + + public final static int BRNG_IABSTRACT = 0xa00000; + + public final static int BRNG_DABSTRACT = 0xb00000; + + public final static int BRNG_SABSTRACT = 0xc00000; + +} diff --git a/src/main/scala/BIDMat/BMat.scala b/src/main/scala/BIDMat/BMat.scala new file mode 100755 index 00000000..443a1e84 --- /dev/null +++ b/src/main/scala/BIDMat/BMat.scala @@ -0,0 +1,172 @@ +package BIDMat +import edu.berkeley.bid.CBLAS._ +import edu.berkeley.bid.LAPACK._ + +case class BMat(nr:Int, nc:Int, nnz1:Int, ir0:Array[Int], jc0:Array[Int], data0:Array[Byte]) extends SparseMat[Byte](nr, nc, nnz1, ir0, jc0, data0) { + + def size() = length; + + def tryForBMat(m:Mat, s:String):BMat = + m match { + case mm:BMat => mm + case _ => throw new RuntimeException("wrong type for operator "+s+" arg "+m) + } + + def tryForOutBMat(out:Mat):BMat = + if (out.asInstanceOf[AnyRef] == null) { + null + } else { + out match { + case outmat:BMat => outmat + case _ => throw new RuntimeException("wrong type for LHS matrix "+out) + } + } + + override def mytype = "BMat" + + override def t:BMat = BMat(gt) + + def horzcat(b: BMat) = BMat(super.horzcat(b)) + + def vertcat(b: BMat) = BMat(super.vertcat(b)) + + def find3:(IMat, IMat, IMat) = { + val (ii, jj, vv) = gfind3 + val vi = IMat(vv.length, 1) + Mat.copyToIntArray(vv.data, 0, vi.data, 0, vv.length) + (IMat(ii), IMat(jj), vi) + } + + override def apply(a:IMat, b:IMat):BMat = BMat(gapply(a, b)) + + override def apply(a:IMat, b:Int):BMat = BMat(gapply(a, IMat.ielem(b))) + + override def apply(a:Int, b:IMat):BMat = BMat(gapply(IMat.ielem(a), b)) + + def bbMatOp(b: BMat, f:(Byte, Byte) => Byte, out:Mat):BMat = BMat(sgMatOp(b, f, out)) + + def bbMatOpScalar(b: Byte, f:(Byte, Byte) => Byte, out:Mat):BMat = BMat(sgMatOpScalar(b, f, out)) + + def bbReduceOp(n:Int, f1:(Byte) => Byte, f2:(Byte, Byte) => Byte) = IMat(sgReduceOp(n, f1, f2, null)) + + def toCSMat:CSMat = { + val out = CSMat(ncols, 1) + val ioff = Mat.ioneBased + var i = 0 + while (i < ncols) { + out.data(i) = new String(data, jc(i)-ioff, jc(i+1)-jc(i), BMat.encoding) + i += 1 + } + out + } + + override def toString:String = { + val somespaces = " " + val ioff = Mat.ioneBased + val ss = new StringBuilder + val nChars = Mat.terminalWidth-4 + val totchars = 10*nChars + var nelems = 0 + var maxlen = 0 + val lbuf = new scala.collection.mutable.ListBuffer[String] + while (maxlen * nelems < totchars && nelems < ncols) { + val str = new String(data, jc(nelems)-ioff, jc(nelems+1)-jc(nelems), BMat.encoding) + lbuf.append(str) + maxlen = math.max(maxlen, 1+str.length) + nelems += 1 + } + nelems -= 1 + var i = 0 + var thisrow = 0 + lbuf.forall((str:String) => { + ss.append(str + somespaces.substring(0, maxlen - str.length)) + thisrow += 1 + if ((thisrow + 1) * maxlen >= nChars) { + ss.append("\n") + thisrow = 0 + } + true + }) + if (nelems < ncols) { + ss.append("...") + } + ss.toString + } + + def > (b : Byte) = bbMatOpScalar(b, (x:Byte, y:Byte) => if (x > y) 1 else 0, null) + def < (b : Byte) = bbMatOpScalar(b, (x:Byte, y:Byte) => if (x < y) 1 else 0, null) + def == (b : Byte) = bbMatOpScalar(b, (x:Byte, y:Byte) => if (x == y) 1 else 0, null) + def === (b : Byte) = bbMatOpScalar(b, (x:Byte, y:Byte) => if (x == y) 1 else 0, null) + def >= (b : Byte) = bbMatOpScalar(b, (x:Byte, y:Byte) => if (x >= y) 1 else 0, null) + def <= (b : Byte) = bbMatOpScalar(b, (x:Byte, y:Byte) => if (x <= y) 1 else 0, null) + def != (b : Byte) = bbMatOpScalar(b, (x:Byte, y:Byte) => if (x != y) 1 else 0, null) + + override def \ (b: Mat) = b match { + case fb:BMat => horzcat(fb) + } + + override def on (b: Mat) = b match { + case fb:BMat => vertcat(fb) + } + + override def ~ (b: Mat):Pair = + b match { + case db:BMat => new BPair(this, db) + case _ => throw new RuntimeException("mismatched types for operator ~") + } +} + +class BPair (val omat:Mat, val mat:BMat) extends Pair { + + + def > (b : Byte) = mat.bbMatOpScalar(b, (x:Byte, y:Byte) => if (x > y) 1 else 0, omat) + def < (b : Byte) = mat.bbMatOpScalar(b, (x:Byte, y:Byte) => if (x < y) 1 else 0, omat) + def == (b : Byte) = mat.bbMatOpScalar(b, (x:Byte, y:Byte) => if (x == y) 1 else 0, omat) + def >= (b : Byte) = mat.bbMatOpScalar(b, (x:Byte, y:Byte) => if (x >= y) 1 else 0, omat) + def <= (b : Byte) = mat.bbMatOpScalar(b, (x:Byte, y:Byte) => if (x <= y) 1 else 0, omat) + def != (b : Byte) = mat.bbMatOpScalar(b, (x:Byte, y:Byte) => if (x != y) 1 else 0, omat) +} + +object BMat { + + def apply(nr:Int, nc:Int, nnz0:Int):BMat = new BMat(nr, nc, nnz0, new Array[Int](nnz0), new Array[Int](nc+1), new Array[Byte](nnz0)) + + def apply(a:SparseMat[Byte]):BMat = new BMat(a.nrows, a.ncols, a.nnz, a.ir, a.jc, a.data) + + def SnoRows(nr:Int, nc:Int, nnz0:Int):BMat = new BMat(nr, nc, nnz0, null, new Array[Int](nc+1), new Array[Byte](nnz0)) + + var encoding = "UTF8" +// var encoding = "UTF_16LE" + + def apply(cc:CSMat):BMat = { + val ioff = Mat.ioneBased + val ncolsx = cc.length + var nrowsx = 0 + var nnzx = 0 + var i = 0 + while (i < ncolsx) { + val len = cc(i).getBytes(encoding).length + nnzx += len + nrowsx = math.max(nrowsx, 1+len) + i += 1 + } + val out = SnoRows(nrowsx, ncolsx, nnzx) + nnzx = 0 + i = 0 + while (i < ncolsx) { + out.jc(i) = nnzx + ioff + val bytes = cc(i).getBytes(encoding) + System.arraycopy(bytes, 0, out.data, nnzx, bytes.length) + nnzx += bytes.length + i += 1 + } + out.jc(i) = nnzx + out + } +} + + + + + + diff --git a/src/main/scala/BIDMat/CMat.scala b/src/main/scala/BIDMat/CMat.scala new file mode 100755 index 00000000..081f120a --- /dev/null +++ b/src/main/scala/BIDMat/CMat.scala @@ -0,0 +1,1056 @@ +package BIDMat +import edu.berkeley.bid.CBLAS._ +import edu.berkeley.bid.LAPACK._ +import java.util.Arrays + +case class CMat(nr:Int, nc:Int, data0:Array[Float]) extends DenseMat[Float](nr, nc, data0) { + + def size() = length; + + override def dv:Double = + if (nrows > 1 || ncols > 1) { + throw new RuntimeException("Matrix should be 1x1 to extract value") + } else { + data(0) + } + + override def mytype = "CMat" + + def get(r0:Int, c0:Int):CMat = { + val off = Mat.oneBased + val r = r0 - off + val c = c0 - off + if (r >= nrows || c >= ncols) { + throw new IndexOutOfBoundsException("("+(r+off)+","+(c+off)+") >= ("+nrows+","+ncols+")"); + } else { + val indx = 2*(r+c*nrows) + CMat.celem(data(indx), data(indx+1)) + } + } + + def get(i0:Int):CMat = { + val off = Mat.oneBased + val i = i0 - off + if (i < 0 || i >= length) { + throw new IndexOutOfBoundsException(""+(i+off)+" >= ("+nrows+","+ncols+")"); + } else { + CMat.celem(data(2*i), data(2*i+1)) + } + } + + def update(r0:Int, c0:Int, v:CMat):CMat = { + val off = Mat.oneBased + val r = r0 - off + val c = c0 - off + if (r >= nrows || c >= ncols) { + throw new IndexOutOfBoundsException("("+(r+off)+","+(c+off)+") >= ("+nrows+","+ncols+")"); + } else { + val indx = 2*(r+c*nrows) + data(indx) = v.data(0) + data(indx+1) = v.data(1) + } + v + } + + def update(i0:Int, v:CMat):CMat = { + val off = Mat.oneBased + val i = i0 - off + if (i < 0 || i >= length) { + throw new IndexOutOfBoundsException(""+(i+off)+" >= ("+nrows+","+ncols+")"); + } else { + data(2*i) = v.data(0) + data(2*i+1) = v.data(1) + } + v + } + + def t(oldmat:Mat):CMat = { + var out = CMat.newOrCheckCMat(ncols, nrows, oldmat) + var i = 0 + while (i < nrows) { + var j = 0 + while (j < ncols) { + out.data(2*(j+i*ncols)) = data(2*(i+j*nrows)) + out.data(2*(j+i*ncols)+1) = data(2*(i+j*nrows)+1) + j += 1 + } + i += 1 + } + out + } + + override def t:CMat = t(null:CMat) + + def h(oldmat:Mat):CMat = { + var out = CMat.newOrCheckCMat(ncols, nrows, oldmat) + var i = 0 + while (i < nrows) { + var j = 0 + while (j < ncols) { + out.data(2*(j+i*ncols)) = data(2*(i+j*nrows)) + out.data(2*(j+i*ncols)+1) = -data(2*(i+j*nrows)+1) + j += 1 + } + i += 1 + } + out + } + + def h:CMat = h(null:CMat) + + def vertcat(a:CMat):CMat = + if (ncols != a.ncols) { + throw new RuntimeException("ncols must match") + } else { + var out = CMat(nrows+a.nrows, ncols) + var i = 0 + while (i < ncols) { + System.arraycopy(data, 2*i*nrows, out.data, 2*i*(nrows+a.nrows), 2*nrows) + System.arraycopy(a.data, 2*i*a.nrows, out.data, 2*(nrows+i*(nrows+a.nrows)), 2*a.nrows) + i += 1 + } + out + } + + def horzcat(a:CMat):CMat= + if (nrows != a.nrows) { + throw new RuntimeException("nrows must match") + } else { + var out = CMat(nrows, ncols+a.ncols) + System.arraycopy(data, 0, out.data, 0, 2*nrows*ncols) + System.arraycopy(a.data, 0, out.data, 2*nrows*ncols, 2*nrows*a.ncols) + out + } + + override def nnz:Int = { + var count:Int = 0 + var i = 0 + while (i < length) { + if (data(2*i) != 0 || data(2*i+1) != 0) { + count += 1 + } + i += 1 + } + count + } + + override def findInds(out:IMat, off:Int):IMat = { + var count = 0 + var i = 0 + while (i < length) { + if (data(2*i) != 0 || data(2*i+1) != 0) { + out.data(count) = i + off + count += 1 + } + i += 1 + } + out + } + + def find3:(IMat, IMat, CMat) = { + val off = Mat.oneBased + val iout = IMat(nnz, 1) + val jout = IMat(nnz, 1) + val vout = CMat(nnz, 1) + findInds(iout, 0) + var i = 0 + while (i < iout.length) { + val ival:Int = iout.data(i) + vout.data(2*i) = data(2*ival) + vout.data(2*i+1) = data(2*ival+1) + jout.data(i) = (ival / nrows) + off + iout.data(i) = (ival % nrows) + off + i += 1 + } + (iout, jout, vout) + } + + override def apply(iv:IMat):CMat = + iv match { + case aa:MatrixWildcard => { + val out = CMat(length, 1) + System.arraycopy(data, 0, out.data, 0, 2*out.length) + out + } + case _ => { + val off = Mat.oneBased + val out = CMat(iv.nrows, iv.ncols) + var i = 0 + while (i < out.length) { + val ind = iv.data(i) - off + if (ind < 0 || ind >= length) { + throw new RuntimeException("bad linear index "+(ind+off)+" vs "+length) + } else { + out.data(2*i) = data(2*ind) + out.data(2*i+1) = data(2*ind+1) + } + i += 1 + } + out + } + } + + def update(iv:IMat, b:CMat) = + iv match { + case aaa:MatrixWildcard => { + if (length != b.length || b.ncols != 1) { + if (b.length == 1) { + var i = 0 + val b0 = b.data(0) + val b1 = b.data(1) + while (i < length) { + data(2*i) = b0 + data(2*i+1) = b1 + i += 1 + } + } else throw new RuntimeException("dims mismatch") + } else { + System.arraycopy(b.data, 0, data, 0, 2*length) + } + } + case _ => { + val off = Mat.oneBased + if (iv.nrows != b.nrows || iv.ncols != b.ncols) { + if (b.length == 1) { + val b0 = b.data(0) + val b1 = b.data(1) + var i = 0 + while (i < iv.length) { + val ind = iv.data(i) - off + if (ind < 0 || ind >= length) { + throw new RuntimeException("bad linear index "+(ind+off)+" vs "+length) + } else { + data(2*ind) = b0 + data(2*ind+1) = b1 + } + i += 1 + } + } else throw new RuntimeException("dims mismatch") + } else { + var i = 0 + while (i < iv.length) { + val ind = iv.data(i) - off + if (ind < 0 || ind >= length) { + throw new RuntimeException("bad linear index "+(ind+off)+" vs "+length) + } else { + data(2*ind) = b.data(2*i) + data(2*ind+1) = b.data(2*i+1) + } + i += 1 + } + } + } + } + + override def apply(iv:IMat, jv:IMat):CMat = { + val off = Mat.oneBased + val rowinds = DenseMat.getInds(iv, nrows) + val colinds = DenseMat.getInds(jv, ncols) + val out = CMat(rowinds.length, colinds.length) + var i = 0 + while (i < out.ncols) { + var j = 0 + val c = colinds(i) - off + while (j < out.nrows) { + val r = rowinds(j) - off + out.data(2*(j+i*out.nrows)) = data(2*(r+nrows*c)) + out.data(2*(j+i*out.nrows)+1) = data(2*(r+nrows*c)+1) + j += 1 + } + i += 1 + } + out + } + + override def apply(iv:IMat, j:Int):CMat = { + apply(iv, IMat.ielem(j)) + } + + override def apply(i:Int, jv:IMat):CMat = { + apply(IMat.ielem(i), jv) + } + + def update(iv:IMat, jv:IMat, b:CMat):CMat = { + val off = Mat.oneBased + val rowinds = DenseMat.getInds(iv, nrows) + val colinds = DenseMat.getInds(jv, ncols) + if (rowinds.length != b.nrows || colinds.length != b.ncols) { + if (b.length == 1) { + val b0 = b.data(0) + val b1 = b.data(1) + var i = 0 + while (i < b.ncols) { + val c = colinds(i) - off + var j = 0 + while (j < b.nrows) { + val r = rowinds(j) - off + data(2*(r+nrows*c)) = b0 + data(2*(r+nrows*c)+1) = b1 + j += 1 + } + i += 1 + } + } else throw new RuntimeException("dims mismatch in assignment") + } else { + var i = 0 + while (i < b.ncols) { + val c = colinds(i) - off + var j = 0 + while (j < b.nrows) { + val r = rowinds(j) - off + data(2*(r+nrows*c)) = b.data(2*(j+i*b.nrows)) + data(2*(r+nrows*c)+1) = b.data(2*(j+i*b.nrows)+1) + j += 1 + } + i += 1 + } + } + b + } + + def update(iv:IMat, j:Int, b:CMat):CMat = { + update(iv, IMat.ielem(j), b) + } + + def update(i:Int, jv:IMat, b:CMat):CMat = { + update(IMat.ielem(i), jv, b) + } + + /* + * Implement sliced assignment, a(iv,jv) = b:T where iv and jv are vectors, using ? as wildcard + */ + + def ccMatOp(a:Mat, op2:(Float,Float,Float,Float) => (Float,Float), oldmat:Mat):CMat = { + a match { + case aa:CMat => { + if (nrows==a.nrows && ncols==1) { + val out = CMat.newOrCheckCMat(nrows, a.ncols, oldmat) + Mat.nflops += aa.length + var i = 0 + while (i < a.ncols) { + var j = 0 + while (j < nrows) { + val (v0, v1) = op2(data(2*j), data(2*j), aa.data(2*(j+i*a.nrows)), aa.data(2*(j+i*a.nrows)+1)) + out.data(2*(j+i*nrows)) = v0 + out.data(2*(j+i*nrows)+1) = v1 + j += 1 + } + i += 1 + } + out + } else if (ncols==a.ncols && nrows==1) { + val out = CMat.newOrCheckCMat(a.nrows, ncols, oldmat) + Mat.nflops += aa.length + var i = 0 + while (i < ncols) { + var j = 0 + while (j < a.nrows) { + val (v0, v1) = op2(data(2*i), data(2*i+1), aa.data(2*(j+i*a.nrows)), aa.data(2*(j+i*a.nrows)+1)) + out.data(2*(j+i*a.nrows)) = v0 + out.data(2*(j+i*a.nrows)+1) = v1 + j += 1 + } + i += 1 + } + out + } else if (nrows==a.nrows && a.ncols==1) { + val out = CMat.newOrCheckCMat(nrows, ncols, oldmat) + Mat.nflops += length + var i = 0 + while (i < ncols) { + var j = 0 + while (j < nrows) { + val (v0, v1) = op2(data(2*(j+i*nrows)), data(2*(j+i*nrows)+1), aa.data(2*j), aa.data(2*j+1)) + out.data(2*(j+i*nrows)) = v0 + out.data(2*(j+i*nrows)+1) = v1 + j += 1 + } + i += 1 + } + out + } else if (ncols==a.ncols && a.nrows==1) { + val out = CMat.newOrCheckCMat(nrows, ncols, oldmat) + Mat.nflops += length + var i = 0 + while (i < ncols) { + var j = 0 + while (j < nrows) { + val (v0, v1) = op2(data(2*(j+i*nrows)), data(2*(j+i*nrows)+1), aa.data(2*i), aa.data(2*i+1)) + out.data(2*(j+i*nrows)) = v0 + out.data(2*(j+i*nrows)+1) = v1 + j += 1 + } + i += 1 + } + out + } else ccMatOpStrict(a, op2, oldmat) + } + case _ => throw new RuntimeException("arg must be dense") + } + } + /* + * This version applies the operator op2 with stricter dimension checking, + * either dims must match or one arg must be scalar + */ + def ccMatOpStrict(a:Mat, op2:(Float,Float,Float,Float) => (Float,Float), oldmat:Mat):CMat = + a match { + case aa:CMat => { + if (nrows==a.nrows && ncols==a.ncols) { + val out = CMat.newOrCheckCMat(nrows, ncols, oldmat) + Mat.nflops += length + var i = 0 + while (i < aa.length) { + val (v0, v1) = op2(data(2*i), data(2*i+1), aa.data(2*i), aa.data(2*i+1)) + out.data(2*i) = v0 + out.data(2*i+1) = v1 + i += 1 + } + out + } else if (a.nrows == 1 && a.ncols == 1) { + val out = CMat.newOrCheckCMat(nrows, ncols, oldmat) + Mat.nflops += length + val a0 = aa.data(0) + val a1 = aa.data(1) + var i = 0 + while (i < length) { + val (v0, v1) = op2(data(2*i), data(2*i+1), a0, a1) + out.data(2*i) = v0 + out.data(2*i+1) = v1 + i += 1 + } + out + } else if (nrows == 1 && ncols == 1) { + val out = CMat.newOrCheckCMat(a.nrows, a.ncols, oldmat) + Mat.nflops += aa.length + val a0 = aa.data(0) + val a1 = aa.data(1) + var i = 0 + while (i < aa.length) { + val (v0, v1) = op2(a0, a1, aa.data(2*i), aa.data(2*i+1)) + out.data(2*i) = v0 + out.data(2*i+1) = v1 + i += 1 + } + out + } else throw new RuntimeException("dims incompatible") + } + case _ => throw new RuntimeException("arg must be dense") + } + + def ccMatOpv(a:Mat, opv:(Array[Float],Int,Int,Array[Float],Int,Int,Array[Float],Int,Int,Int) => Float, oldmat:Mat):CMat = { + a match { + case aa:CMat => { + if (nrows==a.nrows && ncols==1) { + val out = CMat.newOrCheckCMat(nrows, a.ncols, oldmat) + Mat.nflops += aa.length + var i = 0 + while (i < a.ncols) { + opv(data, 0, 1, aa.data, i*a.nrows, 1, out.data, i*nrows, 1, nrows) + i += 1 + } + out + } else if (ncols==a.ncols && nrows==1) { + val out = CMat.newOrCheckCMat(a.nrows, ncols, oldmat) + Mat.nflops += aa.length + var i = 0 + while (i < ncols) { + opv(data, i, 0, aa.data, i*a.nrows, 1, out.data, i*a.nrows, 1, a.nrows) + i += 1 + } + out + } else if (nrows==a.nrows && a.ncols==1) { + val out = CMat.newOrCheckCMat(nrows, ncols, oldmat) + Mat.nflops += length + var i = 0 + while (i < ncols) { + opv(data, i*nrows, 1, aa.data, 0, 1, out.data, i*nrows, 1, nrows) + i += 1 + } + out + } else if (ncols==a.ncols && a.nrows==1) { + val out = CMat.newOrCheckCMat(nrows, ncols, oldmat) + Mat.nflops += length + var i = 0 + while (i < ncols) { + opv(data, i*nrows, 1, aa.data, i, 0, out.data, i*nrows, 1, a.nrows) + i += 1 + } + out + } else ccMatOpStrictv(a, opv, oldmat) + } + case _ => throw new RuntimeException("arg must be dense") + } + } + + def ccMatOpStrictv(a:Mat, opv:(Array[Float],Int,Int,Array[Float],Int,Int,Array[Float],Int,Int,Int) => Float, oldmat:Mat):CMat = + a match { + case aa:CMat => { + if (nrows==a.nrows && ncols==a.ncols) { + val out = CMat.newOrCheckCMat(nrows, ncols, oldmat) + Mat.nflops += length + opv(data, 0, 1, aa.data, 0, 1, out.data, 0, 1, aa.length) + out + } else if (a.nrows == 1 && a.ncols == 1) { + val out = CMat.newOrCheckCMat(nrows, ncols, oldmat) + Mat.nflops += length + opv(data, 0, 1, aa.data, 0, 0, out.data, 0, 1, length) + out + } else if (nrows == 1 && ncols == 1) { + val out = CMat.newOrCheckCMat(a.nrows, a.ncols, oldmat) + Mat.nflops += aa.length + opv(data, 0, 0, aa.data, 0, 1, out.data, 0, 1, aa.length) + out + } else throw new RuntimeException("dims incompatible") + } + case _ => throw new RuntimeException("arg must be dense") + } + + def ccMatOpScalarv(a0:Float, a1:Float, opv:(Array[Float],Int,Int,Array[Float],Int,Int,Array[Float],Int,Int,Int) => Float, omat:Mat):CMat = { + val out = CMat.newOrCheckCMat(nrows, ncols, omat) + Mat.nflops += length + val aa = new Array[Float](2) + aa(0) = a0 + aa(1) = a1 + opv(data, 0, 1, aa, 0, 0, out.data, 0, 1, length) + out + } + + def ffReduceOp(n:Int, f1:(Float) => Float, f2:(Float, Float) => Float, out:Mat) = + CMat(ggReduceOp(n, f1, f2, out)) + + def ffReduceOpv(n:Int, f:(Array[Float],Int,Int,Array[Float],Int,Int,Array[Float],Int,Int,Int) => Float, out:Mat) = + CMat(ggReduceOpv(n, f, out)) + + def ccReduceOpv(dim0:Int, opv:(Array[Float],Int,Int,Array[Float],Int,Int,Array[Float],Int,Int,Int) => Float, oldmat:Mat):CMat = { + var dim = if (nrows == 1 && dim0 == 0) 2 else math.max(1, dim0) + if (dim == 1) { + val out = CMat.newOrCheckCMat(1, ncols, oldmat) + Mat.nflops += length + var i = 0 + while (i < ncols) { + out.data(i) = data(i*nrows) + opv(data, i*nrows+1, 1, out.data, i, 0, out.data, i, 0, nrows-1) + i += 1 + } + out + } else if (dim == 2) { + val out = CMat.newOrCheckCMat(nrows, 1, oldmat) + Mat.nflops += length + var j = 0 + while (j < 2*nrows) { + out.data(j) = data(j) + j += 1 + } + var i = 1 + while (i < ncols) { + opv(data, i*nrows, 1, out.data, 0, 1, out.data, 0, 1, nrows) + i += 1 + } + out + } else + throw new RuntimeException("index must 1 or 2"); + } + + def ffReduceAll(n:Int, f1:(Float) => Float, f2:(Float, Float) => Float, out:Mat) = + CMat(ggReduceAll(n, f1, f2, out)) + + def ffReduceAllv(n:Int, f:(Array[Float],Int,Int,Array[Float],Int,Int,Array[Float],Int,Int,Int) => Float, out:Mat) = + CMat(ggReduceAllv(n, f, out)) + + override def printOne(i:Int):String = { + val u = data(2*i) + val v = data(2*i+1) + val s0 = if (u % 1 == 0 && math.abs(u) < 1e10) { + "%d" format u.intValue + } else { + "%.5g" format u + } + if (v == 0) { + s0 + } else { + val s1 = if (v % 1 == 0 && math.abs(v) < 1e10) { + "%d" format v.intValue + } else { + "%.5g" format v + } + if (u == 0) { + s1+"i" + } else if (v > 0) { + s0+"+"+s1+"i" + } else { + s0+s1+"i" + } + } + } + + override def copyTo(out:Mat) = { + out match { + case cout:CMat => System.arraycopy(data, 0, cout.data, 0, 2*length) + } + out + } + + override def copy = { + val out = CMat(nrows, ncols) + System.arraycopy(data, 0, out.data, 0, 2*length) + out + } + + override def zeros(nr:Int, nc:Int) = { + CMat(nr, nc) + } + + override def ones(nr:Int, nc:Int) = { + val out = CMat(nr, nc) + var i = 0 + while (i < out.length) { + out(2*i) = 1 + i += 1 + } + out + } + + def fDMult(aa:CMat, outmat:Mat):CMat = { + if (ncols == aa.nrows) { + val out = CMat.newOrCheckCMat(nrows, aa.ncols, outmat) + Mat.nflops += 2L * length * aa.ncols + if (Mat.noMKL) { + if (outmat.asInstanceOf[AnyRef] != null) out.clear + var i = 0 + while (i < aa.ncols) { + var j = 0 + while (j < aa.nrows) { + var k = 0 + val u0 = aa.data(2*(j + i*ncols)) + val u1 = aa.data(2*(j + i*ncols)+1) + while (k < nrows) { + val v0 = data(2*(k+j*nrows)) + val v1 = data(2*(k+j*nrows)+1) + out.data(2*(k+i*nrows)) += u0*v0-u1*v1 + out.data(2*(k+i*nrows)+1) += u1*v0+u0*v1 + k += 1 + } + j += 1 + } + i += 1 + } + } else { + val alpha = List(1.0f,0f).toArray + val beta = List(0f,0f).toArray + if (nrows == 1) { + cgemv(ORDER.ColMajor, TRANSPOSE.Trans, aa.nrows, aa.ncols, alpha, aa.data, aa.nrows, data, 1, beta, out.data, 1) + } else if (aa.ncols == 1) { + cgemv(ORDER.ColMajor, TRANSPOSE.NoTrans, nrows, ncols, alpha, data, nrows, aa.data, 1, beta, out.data, 1) + } else { + cgemm(ORDER.ColMajor, TRANSPOSE.NoTrans, TRANSPOSE.NoTrans, + nrows, aa.ncols, ncols, alpha, data, nrows, aa.data, aa.nrows, beta, out.data, nrows) + } + } + out + } else if (ncols == 1 && nrows == 1){ + val out = CMat.newOrCheckCMat(aa.nrows, aa.ncols, outmat) + Mat.nflops += aa.length + var i = 0 + val u0 = data(0) + val u1 = data(1) + while (i < aa.length) { + val v0 = aa.data(2*i) + val v1 = aa.data(2*i+1) + out.data(2*i) = u0*v0-u1*v1 + out.data(2*i+1) = u0*v1+u1*v0 + i += 1 + } + out + } else if (aa.ncols == 1 && aa.nrows == 1){ + val out = CMat.newOrCheckCMat(nrows, ncols, outmat) + Mat.nflops += length + var i = 0 + val u0 = aa.data(0) + val u1 = aa.data(1) + while (i < length) { + val v0 = data(2*i) + val v1 = data(2*i+1) + out.data(2*i) = u0*v0-u1*v1 + out.data(2*i+1) = u0*v1+u1*v0 + i += 1 + } + out + } else throw new RuntimeException("dimensions mismatch") + } + + + def dot (b : CMat):CMat = + if (math.min(nrows, ncols) != 1 || math.min(b.nrows,b.ncols) != 1 || length != b.length) { + throw new RuntimeException("vector dims not compatible") + } else { + Mat.nflops += 2 * length + var w0 = 0.0 + var w1 = 0.0 + var i = 0 + while (i < length){ + val u0 = data(2*i) + val u1 = data(2*i+1) + val v0 = b.data(2*i) + val v1 = b.data(2*i+1) + w0 += u0*v0-u1*v1 + w1 += u0*v1+u1*v0 + i += 1 + } + CMat.celem(w0.asInstanceOf[Float], w1.asInstanceOf[Float]) + } + + def solvel(a0:Mat):CMat = + a0 match { + case a:CMat => { + Mat.nflops += 2L*a.nrows*a.nrows*a.nrows/3 + 2L*nrows*a.nrows*a.nrows + if (a.nrows != a.ncols || ncols != a.nrows) { + throw new RuntimeException("solve needs a square matrix") + } else { + val out = CMat(nrows, ncols) + val tmp = new Array[Float](2*length) + System.arraycopy(a.data, 0, tmp, 0, 2*a.length) + System.arraycopy(data, 0, out.data, 0, 2*length) + val ipiv = new Array[Int](ncols) + cgetrf(ORDER.RowMajor, ncols, ncols, tmp, ncols, ipiv) + cgetrs(ORDER.RowMajor, "N", ncols, nrows, tmp, ncols, ipiv, out.data, nrows) + out + } + } + case _ => throw new RuntimeException("unsupported arg to / "+a0) + } + + def solver(a0:Mat):CMat = + a0 match { + case a:CMat => { + Mat.nflops += 2L*nrows*nrows*nrows/3 + 2L*nrows*nrows*a.ncols + if (nrows != ncols || ncols != a.nrows) { + throw new RuntimeException("solve needs a square matrix") + } else { + val out = CMat(a.nrows, a.ncols) + val tmp = new Array[Float](2*length) + System.arraycopy(data, 0, tmp, 0, 2*length) + System.arraycopy(a.data, 0, out.data, 0, 2*a.length) + val ipiv = new Array[Int](ncols) + cgetrf(ORDER.ColMajor, ncols, ncols, tmp, ncols, ipiv) + cgetrs(ORDER.ColMajor, "N", ncols, a.ncols, tmp, nrows, ipiv, out.data, nrows) + out + } + } + case _ => throw new RuntimeException("unsupported arg to \\ "+a0) + } + + def inv:CMat = { + import edu.berkeley.bid.LAPACK._ + if (nrows != ncols) { + throw new RuntimeException("inv method needs a square matrix") + } else { + val out = CMat(nrows, ncols) + System.arraycopy(data, 0, out.data, 0, length) + val ipiv = new Array[Int](nrows) + cgetrf(ORDER.ColMajor, nrows, ncols, out.data, nrows, ipiv) + cgetri(ORDER.ColMajor, nrows, out.data, nrows, ipiv) + out + } + } + + override def clear = { + Arrays.fill(this.data,0,2*length,0) + this + } + + override def clearUpper(off:Int) = { + if (nrows != ncols) { + throw new RuntimeException("clearUpper assumes a square matrix") + } else { + var i = 1 + while (i < ncols) { + var j = 0 + while (j < i+off) { + data(2*(j + i*nrows)) = 0 + data(2*(j + i*nrows)+1) = 0 + j += 1 + } + i += 1 + } + this + } + } + override def clearUpper = clearUpper(0) + + override def clearLower(off:Int):CMat = { + if (nrows != ncols) { + throw new RuntimeException("clearLower assumes a square matrix") + } else { + var i = 0 + while (i < ncols-1) { + var j = i+1+off + while (j < nrows) { + data(2*(j + i*nrows)) = 0 + data(2*(j + i*nrows)+1) = 0 + j += 1 + } + i += 1 + } + } + this + } + + override def clearLower:CMat = clearLower(0) + + override def mkdiag = { + if (math.min(nrows, ncols) > 1) { + throw new RuntimeException("mkdiag needs a vector input") + } + val n = math.max(nrows, ncols) + val out = CMat(n,n) + var i = 0 + while (i < n) { + out.data(2*i*(n+1)) = data(2*i) + out.data(2*i*(n+1)+1) = data(2*i+1) + i += 1 + } + out + } + + override def getdiag = { + val n = math.min(nrows, ncols) + val out = CMat(n,1) + var i = 0 + while (i < n) { + out.data(2*i) = data(2*i*(nrows+1)) + out.data(2*i+1) = data(2*i*(nrows+1)+1) + i += 1 + } + out + } + + def * (b : CMat) = fDMult(b, null) + def + (b : CMat) = ccMatOpv(b, CMat.vecAdd _, null) + def - (b : CMat) = ccMatOpv(b, CMat.vecSub _, null) + def *@ (b : CMat) = ccMatOpv(b, CMat.vecMul _, null) + def /@ (b : CMat) = ccMatOpv(b, CMat.vecDiv _, null) + def / (b : CMat) = solvel(b) + def \\ (b : CMat) = solver(b) + + def == (b : CMat) = ccMatOp(b, (ar:Float, ai:Float, br:Float, bi:Float) => if (ar == br && ai == bi) (1f, 0f) else (0f, 0f), null) + def != (b : CMat) = ccMatOp(b, (ar:Float, ai:Float, br:Float, bi:Float) => if (ar != br || ai != bi) (1f, 0f) else (0f, 0f), null) + + override def * (b : Float) = ccMatOpScalarv(b, 0, CMat.vecMul _, null) + override def + (b : Float) = ccMatOpScalarv(b, 0, CMat.vecAdd _, null) + override def - (b : Float) = ccMatOpScalarv(b, 0, CMat.vecSub _, null) + override def *@ (b : Float) = ccMatOpScalarv(b, 0, CMat.vecMul _, null) + override def /@ (b : Float) = ccMatOpScalarv(b, 0, CMat.vecDiv _, null) + + override def == (b : Float) = ccMatOp(CMat.celem(b, 0), (ar:Float, ai:Float, br:Float, bi:Float) => if (ar == br && ai == bi) (1f, 0f) else (0f, 0f), null) + override def != (b : Float) = ccMatOp(CMat.celem(b, 0), (ar:Float, ai:Float, br:Float, bi:Float) => if (ar != br || ai != bi) (1f, 0f) else (0f, 0f), null) + + override def * (b : Double) = ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecMul _, null) + override def + (b : Double) = ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecAdd _, null) + override def - (b : Double) = ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecSub _, null) + override def *@ (b : Double) = ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecMul _, null) + override def /@ (b : Double) = ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecDiv _, null) + + override def == (b : Double) = ccMatOp(CMat.celem(b.asInstanceOf[Float], 0), (ar:Float, ai:Float, br:Float, bi:Float) => if (ar == br && ai == bi) (1f, 0f) else (0f, 0f), null) + override def != (b : Double) = ccMatOp(CMat.celem(b.asInstanceOf[Float], 0), (ar:Float, ai:Float, br:Float, bi:Float) => if (ar != br || ai != bi) (1f, 0f) else (0f, 0f), null) + + override def * (b : Int) = ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecMul _, null) + override def + (b : Int) = ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecAdd _, null) + override def - (b : Int) = ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecSub _, null) + override def *@ (b : Int) = ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecMul _, null) + override def /@ (b : Int) = ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecDiv _, null) + + override def == (b : Int) = ccMatOp(CMat.celem(b.asInstanceOf[Float], 0), (ar:Float, ai:Float, br:Float, bi:Float) => if (ar == br && ai == bi) (1f, 0f) else (0f, 0f), null) + override def != (b : Int) = ccMatOp(CMat.celem(b.asInstanceOf[Float], 0), (ar:Float, ai:Float, br:Float, bi:Float) => if (ar != br || ai != bi) (1f, 0f) else (0f, 0f), null) + + def \ (b: CMat) = horzcat(b) + def on (b: CMat) = vertcat(b) + + def ~ (b : CMat):CPair = new CPair(this, b) + + override def ~ (b: Mat):Pair = + b match { + case db:CMat => new CPair(this, db) + case _ => throw new RuntimeException("mismatched types for operator ~") + } + + /* + * Operators whose second arg is generic. + */ + import Operator._ + override def + (b : Mat):Mat = applyMat(this, b, null, Mop_Plus) + override def - (b : Mat):Mat = applyMat(this, b, null, Mop_Minus) + override def * (b : Mat):Mat = applyMat(this, b, null, Mop_Times) + override def / (b : Mat):Mat = applyMat(this, b, null, Mop_Div) + override def \\ (b : Mat):Mat = applyMat(this, b, null, Mop_RSolve) + override def *@ (b : Mat):Mat = applyMat(this, b, null, Mop_ETimes) + override def /@ (b : Mat):Mat = applyMat(this, b, null, Mop_EDiv) + override def \ (b : Mat):Mat = applyMat(this, b, null, Mop_HCat) + override def on (b : Mat):Mat = applyMat(this, b, null, Mop_VCat) + + override def == (b : Mat):Mat = applyMat(this, b, null, Mop_EQ) + override def != (b : Mat):Mat = applyMat(this, b, null, Mop_NE) + + override def recycle(nr:Int, nc:Int, nnz:Int):CMat = { + if (nrows == nr && nc == ncols) { + this + } else if (data.size >= 2*nr*nc) { + new CMat(nr, nc, data) + } else { + CMat(nr, nc) + } + } +} + +class CPair (val omat:Mat, val mat:CMat) extends Pair { + + override def t:CMat = CMat(mat.gt(omat)) + + def * (b : CMat) = mat.fDMult(b, omat) + def + (b : CMat) = mat.ccMatOpv(b, CMat.vecAdd _, omat) + def - (b : CMat) = mat.ccMatOpv(b, CMat.vecSub _, omat) + def *@ (b : CMat) = mat.ccMatOpv(b, CMat.vecMul _, omat) + def /@ (b : CMat) = mat.ccMatOpv(b, CMat.vecDiv _, omat) +// override def ^ (b : Mat) = mat.ccMatOp(b, (x:Float, y:Float) => math.pow(x,y).toFloat, null) + + def == (b : CMat) = mat.ccMatOp(b, (ar:Float, ai:Float, br:Float, bi:Float) => if (ar == br && ai == bi) (1f, 0f) else (0f, 0f), omat) + def != (b : CMat) = mat.ccMatOp(b, (ar:Float, ai:Float, br:Float, bi:Float) => if (ar != br || ai != bi) (1f, 0f) else (0f, 0f), omat) + + override def * (b : Float) = mat.ccMatOpScalarv(b, 0, CMat.vecMul _, omat) + override def + (b : Float) = mat.ccMatOpScalarv(b, 0, CMat.vecAdd _, omat) + override def - (b : Float) = mat.ccMatOpScalarv(b, 0, CMat.vecSub _, omat) + override def *@ (b : Float) = mat.ccMatOpScalarv(b, 0, CMat.vecMul _, omat) + override def /@ (b : Float) = mat.ccMatOpScalarv(b, 0, CMat.vecDiv _, omat) + + + override def == (b : Float) = mat.ccMatOp(CMat.celem(b, 0), (ar:Float, ai:Float, br:Float, bi:Float) => if (ar == br && ai == bi) (1f, 0f) else (0f, 0f), omat) + override def != (b : Float) = mat.ccMatOp(CMat.celem(b, 0), (ar:Float, ai:Float, br:Float, bi:Float) => if (ar != br || ai != bi) (1f, 0f) else (0f, 0f), omat) + + override def * (b : Double) = mat.ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecMul _, omat) + override def + (b : Double) = mat.ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecAdd _, omat) + override def - (b : Double) = mat.ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecSub _, omat) + override def *@ (b : Double) = mat.ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecMul _, omat) + override def /@ (b : Double) = mat.ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecDiv _, omat) + + override def == (b : Double) = mat.ccMatOp(CMat.celem(b.asInstanceOf[Float], 0), (ar:Float, ai:Float, br:Float, bi:Float) => if (ar == br && ai == bi) (1f, 0f) else (0f, 0f), omat) + override def != (b : Double) = mat.ccMatOp(CMat.celem(b.asInstanceOf[Float], 0), (ar:Float, ai:Float, br:Float, bi:Float) => if (ar != br || ai != bi) (1f, 0f) else (0f, 0f), omat) + + override def * (b : Int) = mat.ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecMul _, omat) + override def + (b : Int) = mat.ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecAdd _, omat) + override def - (b : Int) = mat.ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecSub _, omat) + override def *@ (b : Int) = mat.ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecMul _, omat) + override def /@ (b : Int) = mat.ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecDiv _, omat) + + override def == (b : Int) = mat.ccMatOp(CMat.celem(b.asInstanceOf[Float], 0), (ar:Float, ai:Float, br:Float, bi:Float) => if (ar == br && ai == bi) (1f, 0f) else (0f, 0f), omat) + override def != (b : Int) = mat.ccMatOp(CMat.celem(b.asInstanceOf[Float], 0), (ar:Float, ai:Float, br:Float, bi:Float) => if (ar != br || ai != bi) (1f, 0f) else (0f, 0f), omat) + +} + +object CMat { + + def apply(nr:Int, nc:Int) = new CMat(nr, nc, new Array[Float](2*nr*nc)) + + def real(a:FMat):CMat = { + val out = CMat(a.nrows, a.ncols) + var i = 0 + while (i < a.length) { + out.data(2*i) = a.data(i) + i += 1 + } + out + } + + def imag(a:FMat):CMat = { + val out = CMat(a.nrows, a.ncols) + var i = 0 + while (i < a.length) { + out.data(2*i+1) = a.data(i) + i += 1 + } + out + } + + def apply(x:Mat):CMat = { + x match { + case dd:DMat => real(FMat(dd)) + case cc:CMat => {val out = CMat(x.nrows, x.ncols); System.arraycopy(cc.data, 0, out.data, 0, 2*cc.length); out} + case ii:IMat => real(FMat(ii)) + case ff:FMat => real(ff) +// case xx:DenseMat[Float] => new CMat(xx.nrows, xx.ncols, xx.data) + case _ => throw new RuntimeException("Unsupported source type") + } + } + + def celem(x:Float, y:Float) = { + val out = CMat(1,1) + out.data(0) = x + out.data(1) = y + out + } + + def vecAdd(a:Array[Float], a0:Int, ainc:Int, b:Array[Float], b0:Int, binc:Int, c:Array[Float], c0:Int, cinc:Int, n:Int):Float = { + var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n + while (ci < cend) { + c(2*ci) = a(2*ai) + b(2*bi) + c(2*ci+1) = a(2*ai+1) + b(2*bi+1) + ai += ainc; bi += binc; ci += cinc + } + 0 + } + + def vecSub(a:Array[Float], a0:Int, ainc:Int, b:Array[Float], b0:Int, binc:Int, c:Array[Float], c0:Int, cinc:Int, n:Int):Float = { + var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n + while (ci < cend) { + c(2*ci) = a(2*ai) - b(2*bi) + c(2*ci+1) = a(2*ai+1) - b(2*bi+1) + ai += ainc; bi += binc; ci += cinc + } + 0 + } + + def vecMul(a:Array[Float], a0:Int, ainc:Int, b:Array[Float], b0:Int, binc:Int, c:Array[Float], c0:Int, cinc:Int, n:Int):Float = { + var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n + while (ci < cend) { + val u0 = a(2*ai) + val u1 = a(2*ai+1) + val v0 = b(2*ai) + val v1 = b(2*ai+1) + c(2*ci) = u0*v0-u1*v1 + c(2*ci+1) = u0*v1+v0*u1 + ai += ainc; bi += binc; ci += cinc + } + 0 + } + + def vecDiv(a:Array[Float], a0:Int, ainc:Int, b:Array[Float], b0:Int, binc:Int, c:Array[Float], c0:Int, cinc:Int, n:Int):Float = { + var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n + while (ci < cend) { + val u0 = a(2*ai) + val u1 = a(2*ai+1) + val v0 = b(2*ai) + val v1 = b(2*ai+1) + val denom = v0*v0 + v1*v1 + c(2*ci) = (u0*v0+u1*v1)/denom + c(2*ci+1) = (u1*v0-v1*u0)/denom + ai += ainc; bi += binc; ci += cinc + } + 0 + } + + def newOrCheckCMat(nr:Int, nc:Int, outmat:Mat):CMat = { + if (outmat.asInstanceOf[AnyRef] == null || (outmat.nrows == 0 && outmat.ncols == 0)) { + CMat(nr, nc) + } else { + if (outmat.nrows != nr || outmat.ncols != nc) { + outmat.recycle(nr, nc, 0).asInstanceOf[CMat] + } else { + outmat.asInstanceOf[CMat] + } + } + } +} + + + + + + diff --git a/src/main/scala/BIDMat/CSMat.scala b/src/main/scala/BIDMat/CSMat.scala new file mode 100755 index 00000000..b1c0a1b8 --- /dev/null +++ b/src/main/scala/BIDMat/CSMat.scala @@ -0,0 +1,165 @@ +package BIDMat +import Mat._ + +case class CSMat(override val nrows:Int, override val ncols:Int, override val data:Array[String]) extends DenseMat[String](nrows, ncols, data) { + + def size() = length; + + override def t:CSMat = CSMat(gt(null)) + + override def mytype = "CSMat" + + def horzcat(b: CSMat) = CSMat(ghorzcat(b)) + + def vertcat(b: CSMat) = CSMat(gvertcat(b)) + + def find3:(IMat, IMat, CSMat) = { val vv = gfind3 ; (IMat(vv._1), IMat(vv._2), CSMat(vv._3)) } + + override def apply(a:IMat):CSMat = CSMat(gapply(a)) + + override def apply(a:IMat, b:IMat):CSMat = CSMat(gapply(a, b)) + + override def apply(a:Int, b:IMat):CSMat = CSMat(gapply(a, b)) + + override def apply(a:IMat, b:Int):CSMat = CSMat(gapply(a, b)) + + def ccMatOp(b: CSMat, f:(String, String) => String, old:CSMat) = CSMat(ggMatOp(b, f, old)) + + def ccMatOpScalar(b: String, f:(String, String) => String, old:CSMat) = CSMat(ggMatOpScalar(b, f, old)) + + def ccReduceOp(n:Int, f1:(String) => String, f2:(String, String) => String, old:CSMat) = CSMat(ggReduceOp(n, f1, f2, old)) + + override def printOne(i:Int):String = { + val v = data(i) + if (v != null) + v.toString() + else + "NULL" + } + + /* + * Trait to implement binary operations on dense matrices + */ + trait DCSMatOp { + @inline def op1(x:String):String = x; + def op2(x:String, y:String):String; + + def dCSMatOp(a:CSMat):CSMat = + if (nrows==a.nrows && ncols==1) { + val out = CSMat(nrows, a.ncols) + for (i <- 0 until a.ncols) { + for (j <- 0 until nrows) { + out.data(j+i*nrows) = op2(data(j), a.data(j+i*a.nrows)) + } + } + out + } else if (ncols==a.ncols && nrows==1) { + val out = CSMat(a.nrows, ncols) + for (i <- 0 until ncols) { + for (j <- 0 until a.nrows) { + out.data(j+i*a.nrows) = op2(data(i), a.data(j+i*a.nrows)) + } + } + out + } else if (nrows==a.nrows && a.ncols==1) { + val out = CSMat(nrows, ncols) + for (i <- 0 until ncols) { + for (j <- 0 until nrows) { + out.data(j+i*nrows) = op2(data(j+i*nrows), a.data(j)) + } + } + out + } else if (ncols==a.ncols && a.nrows==1) { + val out = CSMat(nrows, ncols) + for (i <- 0 until ncols) { + for (j <- 0 until nrows) { + out.data(j+i*nrows) = op2(data(j+i*nrows), a.data(i)) + } + } + out + } else dCSMatOpStrict(a) + + def dCSMatOpStrict(a:CSMat):CSMat = + if (nrows==a.nrows && ncols==a.ncols) { + val out = CSMat(nrows, ncols) + var i = 0 + while (i < a.length) { + out.data(i) = op2(data(i), a.data(i)) + i += 1 + } + out + } else if (a.nrows == 1 && a.ncols == 1) { + val out = CSMat(nrows, ncols) + val aval = a.data(0) + for (i <- 0 until length) { + out.data(i) = op2(data(i), aval) + } + out + } else if (nrows == 1 && ncols == 1) { + val out = CSMat(a.nrows, a.ncols) + val aval = data(0) + for (i <- 0 until a.length) { + out.data(i) = op2(aval, a.data(i)) + } + out + } else throw new RuntimeException("dims incompatible") + + def dCSMatReduceOp(dim:Int):CSMat = + if (dim == 1) { + val out = CSMat(1, ncols) + for (i <- 0 until ncols) { + var j = 1 + var acc = op1(data(i*nrows)) + while (j < nrows) { + acc = op2(acc, data(j+i*nrows)) + j += 1 + } + out.data(i) = acc + } + out + } else if (dim == 2) { + val out = CSMat(nrows, 1) + var j = 0 + while (j < nrows) { + out.data(j) = op1(data(j)) + j += 1 + } + for (i <- 1 until ncols) { + var j = 0 + while (j < nrows) { + out.data(j) = op2(out.data(j), data(j+i*nrows)) + j += 1 + } + } + out + } else + throw new RuntimeException("index must 1 or 2") + } + + def + (b : CSMat) = ccMatOp(b, (x:String, y:String) => x + y, null) + + def \ (b: CSMat) = horzcat(b) + def \ (b: String) = horzcat(CSMat.cselem(b)) + def on (b: CSMat) = vertcat(b) + def on (b: String) = vertcat(CSMat.cselem(b)) +} + +object CSMat { + + def apply(nr:Int, nc:Int):CSMat = new CSMat(nr, nc, new Array[String](nr*nc)) + + def apply(a:DenseMat[String]):CSMat = new CSMat(a.nrows, a.ncols, a.data) + + def cselem(x:String) = { + val out = CSMat(1,1) + out.data(0) = x + out + } + +} + + + + + + diff --git a/src/main/scala/BIDMat/Copyright.txt b/src/main/scala/BIDMat/Copyright.txt new file mode 100755 index 00000000..21326596 --- /dev/null +++ b/src/main/scala/BIDMat/Copyright.txt @@ -0,0 +1,25 @@ +Copyright (c) 2012, Regents of the University of California +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/src/main/scala/BIDMat/DMat.scala b/src/main/scala/BIDMat/DMat.scala new file mode 100755 index 00000000..00928b4d --- /dev/null +++ b/src/main/scala/BIDMat/DMat.scala @@ -0,0 +1,668 @@ +package BIDMat + +import edu.berkeley.bid.CBLAS._ +import edu.berkeley.bid.LAPACK._ +import edu.berkeley.bid.SPBLAS._ +import java.util.Arrays + +case class DMat(nr:Int, nc:Int, data0:Array[Double]) extends DenseMat[Double](nr, nc, data0) { + + def size() = length; + + def getdata() = data + + override def set(v:Float):DMat = { + Arrays.fill(data,0,length,v) + this + } + + override def t:DMat = if (Mat.noMKL) { + DMat(gt(null)) + } else { + val out = DMat(ncols, nrows) + domatcopy("C", "T", nrows, ncols, 1.0, data, nrows, out.data, ncols) + out + } + + override def dv:Double = + if (nrows > 1 || ncols > 1) { + throw new RuntimeException("Matrix should be 1x1 to extract value") + } else { + data(0) + } + + override def mytype = "DMat" + + def horzcat(b: DMat) = DMat(ghorzcat(b)) + + def vertcat(b: DMat) = DMat(gvertcat(b)) + + def find3:(IMat, IMat, DMat) = { val (ii, jj, vv) = gfind3 ; (ii, jj, DMat(vv)) } + + override def apply(a:IMat):DMat = DMat(gapply(a)) + + override def apply(a:IMat, b:IMat):DMat = DMat(gapply(a, b)) + + override def apply(a:IMat, b:Int):DMat = DMat(gapply(a, b)) + + override def apply(a:Int, b:IMat):DMat = DMat(gapply(a, b)) + + def update(iv:IMat, jv:IMat, b:DMat):DMat = DMat(_update(iv, jv, b)) + + def update(iv:IMat, j:Int, b:DMat):DMat = DMat(_update(iv, IMat.ielem(j), b)) + + def update(i:Int, jv:IMat, b:DMat):DMat = DMat(_update(IMat.ielem(i), jv, b)) + + def ddMatOp(b: Mat, f:(Double, Double) => Double, out:Mat) = + b match { + case bb:DMat => DMat(ggMatOp(bb, f, out)) + case _ => throw new RuntimeException("unsupported operation "+f+" on "+this+" and "+b) + } + + def ddMatOpv(b: Mat, f:(Array[Double],Int,Int,Array[Double],Int,Int,Array[Double],Int,Int,Int) => Double, out:Mat) = + b match { + case bb:DMat => DMat(ggMatOpv(bb, f, out)) + case _ => throw new RuntimeException("unsupported operation "+f+" on "+this+" and "+b) + } + + def ddMatOpScalar(b: Double, f:(Double, Double) => Double, out:Mat) = DMat(ggMatOpScalar(b, f, out)) + + def ddMatOpScalarv(b: Double, f:(Array[Double],Int,Int,Array[Double],Int,Int,Array[Double],Int,Int,Int) => Double, out:Mat) = + DMat(ggMatOpScalarv(b, f, out)) + + def ddReduceOp(n:Int, f1:(Double) => Double, f2:(Double, Double) => Double, out:Mat) = DMat(ggReduceOp(n, f1, f2, out)) + + def ddReduceOpv(n:Int, f:(Array[Double],Int,Int,Array[Double],Int,Int,Array[Double],Int,Int,Int) => Double, out:Mat) = + DMat(ggReduceOpv(n, f, out)) + + def ddReduceAll(n:Int, f1:(Double) => Double, f2:(Double, Double) => Double, out:Mat) = + DMat(ggReduceAll(n, f1, f2, out)) + + def ddReduceAllv(n:Int, f:(Array[Double],Int,Int,Array[Double],Int,Int,Array[Double],Int,Int,Int) => Double, out:Mat) = + DMat(ggReduceAllv(n, f, out)) + + override def printOne(i:Int):String = { + val v = data(i) + if (v % 1 == 0 && math.abs(v) < 1e10) { + "%d" format v.intValue + } else { + "%.5g" format v + } + } + + override def copyTo(a:Mat) = { + a match { + case out:DMat => System.arraycopy(data, 0, out.data, 0, length) + } + a + } + + override def copy = { + val out = DMat(nrows, ncols) + System.arraycopy(data, 0, out.data, 0, length) + out + } + + override def zeros(nr:Int, nc:Int) = { + DMat(nr, nc) + } + + override def ones(nr:Int, nc:Int) = { + val out = DMat(nr, nc) + var i = 0 + while (i < out.length) { + out(i) = 1 + i += 1 + } + out + } + + override def clearUpper(off:Int) = setUpper(0, off) + override def clearUpper = setUpper(0, 0) + + override def clearLower(off:Int) = setLower(0, off) + override def clearLower = setLower(0, 0) + + + def fDMult(aa:DMat, outmat:Mat):DMat = { + if (ncols == aa.nrows) { + val out = DMat.newOrCheckDMat(nrows, aa.ncols, outmat) + Mat.nflops += 2 * length.toLong * aa.ncols.toLong + if (Mat.noMKL) { + out.clear + var i = 0 + while (i < aa.ncols) { + var j = 0 + while (j < aa.nrows) { + var k = 0 + val dval = aa.data(j + i*ncols) + while (k < nrows) { + out.data(k+i*nrows) += data(k+j*nrows)*dval + k += 1 + } + j += 1 + } + i += 1 + } + } else { + if (nrows == 1) { + dgemv(ORDER.ColMajor, TRANSPOSE.Trans, aa.nrows, aa.ncols, 1.0, aa.data, aa.nrows, data, 1, 0, out.data, 1) + } else if (aa.ncols == 1) { + dgemv(ORDER.ColMajor, TRANSPOSE.NoTrans, nrows, ncols, 1.0, data, nrows, aa.data, 1, 0, out.data, 1) + } else { + dgemm(ORDER.ColMajor, TRANSPOSE.NoTrans, TRANSPOSE.NoTrans, + nrows, aa.ncols, ncols, 1.0, data, nrows, aa.data, aa.nrows, 0, out.data, nrows) + } + } + out + } else if (ncols == 1 && nrows == 1) { + val out = DMat.newOrCheckDMat(aa.nrows, aa.ncols, outmat) + Mat.nflops += aa.length + var i = 0 + val dvar = data(0) + while (i < aa.length) { + out.data(i) = dvar * aa.data(i) + i += 1 + } + out + } else if (aa.ncols == 1 && aa.nrows == 1) { + val out = DMat.newOrCheckDMat(nrows, ncols, outmat) + Mat.nflops += length + var i = 0 + val dvar = aa.data(0) + while (i < length) { + out.data(i) = dvar * data(i) + i += 1 + } + out + } else throw new RuntimeException("dimensions mismatch") + } + + def fSMult(ss:SDMat, outmat:Mat):DMat = { + if (ncols != ss.nrows) { + throw new RuntimeException("dimensions mismatch") + } else { + val out = DMat.newOrCheckDMat(nrows, ss.ncols, outmat) + Mat.nflops += 2 * nrows.toLong * ss.nnz + val ioff = Mat.ioneBased; + val nr = ss.nrows + val nc = ss.ncols + val kk = ncols + var jc0:Array[Int] = null + var ir0:Array[Int] = null + if (ioff == 0) { + jc0 = SparseMat.incInds(ss.jc) + ir0 = SparseMat.incInds(ss.ir) + } else { + jc0 = ss.jc + ir0 = ss.ir + } + if (nrows == 1 && !Mat.noMKL) { + dcscmv("T", nr, nc, 1.0, "GLNF", ss.data, ir0, jc0, data, 0.0, out.data) + out + } else { + out.clear + if (nrows < 20 || Mat.noMKL) { + var i = 0 + while (i < ss.ncols) { + var j = ss.jc(i) - ioff + while (j < ss.jc(i+1)-ioff) { + val dval = ss.data(j) + val ival = ss.ir(j) - ioff + var k = 0 + while (k < nrows) { + out.data(k+i*nrows) += data(k+ival*nrows)*dval + k += 1 + } + j += 1 + } + i += 1 + } + } else { + dmcscm(nrows, ss.ncols, data, nrows, ss.data, ss.ir, ss.jc, out.data, nrows) + // dcsrmm("N", ss.ncols, nrows, ncols, 1.0, "GLNF", ss.data, ss.ir, ss.jc, data, ncols, 0, out.data, out.ncols) + } + } + out + } + } + + def multT(a:SDMat, outmat:Mat):DMat = { + import edu.berkeley.bid.CBLAS._ + if (ncols == a.nrows) { + val out = DMat.newOrCheckDMat(nrows, a.ncols, outmat) + if (outmat.asInstanceOf[AnyRef] != null) out.clear + dmcsrm(nrows, a.ncols, data, nrows, a.data, a.ir, a.jc, out.data, nrows) + Mat.nflops += 2L * a.nnz * nrows + out + } else { + throw new RuntimeException("xT dimensions mismatch") + } + } + + /* + * Very slow, row-and-column multiply + */ + def sDMult(a:Mat):DMat = + a match { + case aa:DMat => { + if (ncols == a.nrows) { + val out = DMat(nrows, a.ncols) + var i = 0 + while (i < a.ncols) { + var j = 0 + while (j < nrows) { + var k = 0 + var sum = 0.0 + while (k < ncols) { + sum += data(j+k*nrows) * aa.data(k+i*a.nrows) + k += 1 + } + out.data(j + i*out.nrows) = sum + j += 1 + } + i += 1 + } + out + } else throw new RuntimeException("dimensions mismatch") + } + case _ => throw new RuntimeException("argument must be dense") + } + + /* + * Weka multiply + */ + + def wDMult(a:Mat, omat:Mat):DMat = + a match { + case aa:DMat => { + if (ncols == a.nrows) { + val out = DMat.newOrCheckDMat(nrows, a.ncols, omat) + val tmp = new Array[Double](ncols) + var i = 0 + while (i < nrows) { + var j = 0 + while (j < ncols) { + tmp(j) = data(i+j*nrows) + j += 1 + } + j = 0 + while (j < a.ncols) { + var k = 0 + var sum = 0.0 + while (k < ncols) { + sum += tmp(k) * aa.data(k+i*a.nrows) + k += 1 + } + out.data(j + i*out.nrows) = sum + j += 1 + } + i += 1 + } + out + } else throw new RuntimeException("dimensions mismatch") + } + case _ => throw new RuntimeException("argument must be dense") + } + + def dot(a:DMat):Double = super.dot(a) + + override def dot(a:Mat):Double = super.dot(a.asInstanceOf[DMat]) + + def solvel(a0:Mat):DMat = + a0 match { + case a:DMat => { + Mat.nflops += 2L*a.nrows*a.nrows*a.nrows/3 + 2L*nrows*a.nrows*a.nrows + if (a.nrows != a.ncols || ncols != a.nrows) { + throw new RuntimeException("solve needs a square matrix") + } else { + val out = DMat(nrows, ncols) + val tmp = new Array[Double](ncols*ncols) + System.arraycopy(a.data, 0, tmp, 0, a.length) + System.arraycopy(data, 0, out.data, 0, length) + val ipiv = new Array[Int](ncols) + dgetrf(ORDER.RowMajor, ncols, ncols, tmp, ncols, ipiv) + dgetrs(ORDER.RowMajor, "N", ncols, nrows, tmp, ncols, ipiv, out.data, nrows) + out + } + } + case _ => throw new RuntimeException("unsupported arg to / "+a0) + } + + def solver(a0:Mat):DMat = + a0 match { + case a:DMat => { + Mat.nflops += 2L*nrows*nrows*nrows/3 + 2L*nrows*nrows*a.ncols + if (nrows != ncols || ncols != a.nrows) { + throw new RuntimeException("solve needs a square matrix") + } else { + val out = DMat(a.nrows, a.ncols) + val tmp = new Array[Double](ncols*ncols) + System.arraycopy(data, 0, tmp, 0, length) + System.arraycopy(a.data, 0, out.data, 0, a.length) + val ipiv = new Array[Int](ncols) + dgetrf(ORDER.ColMajor, ncols, ncols, tmp, ncols, ipiv) + dgetrs(ORDER.ColMajor, "N", ncols, a.ncols, tmp, nrows, ipiv, out.data, nrows) + out + } + } + case _ => throw new RuntimeException("unsupported arg to / "+a0) + } + + override def clear = { + Arrays.fill(this.data,0,length,0) + this + } + + override def recycle(nr:Int, nc:Int, nnz:Int):DMat = { + if (nrows == nr && nc == ncols) { + this + } else if (data.size >= nr*nc) { + new DMat(nr, nc, data) + } else { + DMat(nr, nc) + } + } + /* + * Routines to operate on two DMats. These are the compute routines. + */ + def * (b : DMat) = fDMult(b, null) + def * (b : SDMat) = fSMult(b, null) + def xT (b : SDMat) = multT(b, null) + def / (b : DMat) = solvel(b) + def \\ (b : DMat) = solver(b) + def ^ (b : DMat) = ddMatOp(b, (x:Double, y:Double) => math.pow(x,y), null) + + def + (b : DMat) = ddMatOpv(b, DMat.vecAdd _, null) + def - (b : DMat) = ddMatOpv(b, DMat.vecSub _, null) + def *@ (b : DMat) = ddMatOpv(b, DMat.vecMul _, null) + def /@ (b : DMat) = ddMatOpv(b, DMat.dVecDiv _, null) + + def > (b : DMat) = ddMatOp(b, (x:Double, y:Double) => if (x > y) 1.0 else 0.0, null) + def < (b : DMat) = ddMatOp(b, (x:Double, y:Double) => if (x < y) 1.0 else 0.0, null) + def == (b : DMat) = ddMatOp(b, (x:Double, y:Double) => if (x == y) 1.0 else 0.0, null) + def === (b : DMat) = ddMatOp(b, (x:Double, y:Double) => if (x == y) 1.0 else 0.0, null) + def >= (b : DMat) = ddMatOp(b, (x:Double, y:Double) => if (x >= y) 1.0 else 0.0, null) + def <= (b : DMat) = ddMatOp(b, (x:Double, y:Double) => if (x <= y) 1.0 else 0.0, null) + def != (b : DMat) = ddMatOp(b, (x:Double, y:Double) => if (x != y) 1.0 else 0.0, null) + + override def * (b : Double) = fDMult(DMat.elem(b), null) + override def + (b : Double) = ddMatOpScalarv(b, DMat.vecAdd _, null) + override def - (b : Double) = ddMatOpScalarv(b, DMat.vecSub _, null) + override def *@ (b : Double) = ddMatOpScalarv(b, DMat.vecMul _, null) + override def /@ (b : Double) = ddMatOpScalarv(b, DMat.dVecDiv _, null) + override def ^ (b : Double) = ddMatOpScalar(b, (x:Double, y:Double) => math.pow(x,y), null) + + override def > (b : Double) = ddMatOpScalar(b, (x:Double, y:Double) => if (x > y) 1.0 else 0.0, null) + override def < (b : Double) = ddMatOpScalar(b, (x:Double, y:Double) => if (x < y) 1.0 else 0.0, null) + override def == (b : Double) = ddMatOpScalar(b, (x:Double, y:Double) => if (x == y) 1.0 else 0.0, null) + override def >= (b : Double) = ddMatOpScalar(b, (x:Double, y:Double) => if (x >= y) 1.0 else 0.0, null) + override def <= (b : Double) = ddMatOpScalar(b, (x:Double, y:Double) => if (x <= y) 1.0 else 0.0, null) + override def != (b : Double) = ddMatOpScalar(b, (x:Double, y:Double) => if (x != y) 1.0 else 0.0, null) + + override def * (b : Float) = fDMult(DMat.elem(b), null) + override def + (b : Float) = ddMatOpScalarv(b, DMat.vecAdd _, null) + override def - (b : Float) = ddMatOpScalarv(b, DMat.vecSub _, null) + override def *@ (b : Float) = ddMatOpScalarv(b, DMat.vecMul _, null) + override def /@ (b : Float) = ddMatOpScalarv(b, DMat.dVecDiv _, null) + override def ^ (b : Float) = ddMatOpScalar(b, (x:Double, y:Double) => math.pow(x,y), null) + + override def > (b : Float) = ddMatOpScalar(b, (x:Double, y:Double) => if (x > y) 1.0 else 0.0, null) + override def < (b : Float) = ddMatOpScalar(b, (x:Double, y:Double) => if (x < y) 1.0 else 0.0, null) + override def == (b : Float) = ddMatOpScalar(b, (x:Double, y:Double) => if (x == y) 1.0 else 0.0, null) + override def >= (b : Float) = ddMatOpScalar(b, (x:Double, y:Double) => if (x >= y) 1.0 else 0.0, null) + override def <= (b : Float) = ddMatOpScalar(b, (x:Double, y:Double) => if (x <= y) 1.0 else 0.0, null) + override def != (b : Float) = ddMatOpScalar(b, (x:Double, y:Double) => if (x != y) 1.0 else 0.0, null) + + def \ (b: DMat) = DMat(ghorzcat(b)) + def \ (b:Double) = DMat(ghorzcat(DMat.elem(b))) + + def on (b: DMat) = DMat(gvertcat(b)) + def on (b: Double) = vertcat(DMat.elem(b)) + + def ~ (b : DMat):DPair = new DPair(this, b) + def ~ (b : SDMat):SDPair = new SDPair(this, b) + + override def ~ (b: Mat):Pair = b match { + case db:DMat => new DPair(this, db) + case sb:SDMat => new SDPair(this, sb) + case _ => throw new RuntimeException("wrong types for operator ~ ") + } + /* + * Specialize to IMats to help the type system. + */ + def + (b : IMat):DMat = this + DMat(b) + def - (b : IMat):DMat = this - DMat(b) + def * (b : IMat):DMat = this * DMat(b) + def / (b : IMat):DMat = this / DMat(b) + def \\ (b : IMat):DMat = this \\ DMat(b) + def *@ (b : IMat):DMat = this *@ DMat(b) + def /@ (b : IMat):DMat = this /@ DMat(b) + def \ (b : IMat):DMat = this \ DMat(b) + def on (b : IMat):DMat = this on DMat(b) + + def > (b : IMat):DMat = this > DMat(b) + def < (b : IMat):DMat = this < DMat(b) + def >= (b : IMat):DMat = this >= DMat(b) + def <= (b : IMat):DMat = this <= DMat(b) + def == (b : IMat):DMat = this == DMat(b) + def === (b : IMat):DMat = this === DMat(b) + def != (b : IMat):DMat = this != DMat(b) + + /* + * Specialize to FMats to help the type system. + */ + def + (b : FMat):DMat = this + DMat(b) + def - (b : FMat):DMat = this - DMat(b) + def * (b : FMat):DMat = this * DMat(b) + def / (b : FMat):DMat = this / DMat(b) + def \\ (b : FMat):DMat = this \\ DMat(b) + def *@ (b : FMat):DMat = this *@ DMat(b) + def /@ (b : FMat):DMat = this /@ DMat(b) + def \ (b : FMat):DMat = this \ DMat(b) + def on (b : FMat):DMat = this on DMat(b) + + def > (b : FMat):DMat = this > DMat(b) + def < (b : FMat):DMat = this < DMat(b) + def >= (b : FMat):DMat = this >= DMat(b) + def <= (b : FMat):DMat = this <= DMat(b) + def == (b : FMat):DMat = this == DMat(b) + def === (b : FMat):DMat = this === DMat(b) + def != (b : FMat):DMat = this != DMat(b) + + /* + * Specialize to CMats to help the type system. + */ + def + (b : CMat):CMat = CMat(this) + b + def - (b : CMat):CMat = CMat(this) - b + def * (b : CMat):CMat = CMat(this) * b + def / (b : CMat):CMat = CMat(this) / b + def \\ (b : CMat):CMat = CMat(this) \\ b + def *@ (b : CMat):CMat = CMat(this) *@ b + def /@ (b : CMat):CMat = CMat(this) /@ b + def \ (b : CMat):CMat = CMat(this) \ b + def on (b : CMat):CMat = CMat(this) on b + + /* + * Operators whose second arg is generic. + */ + import Operator._ + override def + (b : Mat):Mat = applyMat(this, b, null, Mop_Plus) + override def - (b : Mat):Mat = applyMat(this, b, null, Mop_Minus) + override def * (b : Mat):Mat = applyMat(this, b, null, Mop_Times) + override def / (b : Mat):Mat = applyMat(this, b, null, Mop_Div) + override def \\ (b : Mat):Mat = applyMat(this, b, null, Mop_RSolve) + override def *@ (b : Mat):Mat = applyMat(this, b, null, Mop_ETimes) + override def /@ (b : Mat):Mat = applyMat(this, b, null, Mop_EDiv) + override def \ (b : Mat):Mat = applyMat(this, b, null, Mop_HCat) + override def on (b : Mat):Mat = applyMat(this, b, null, Mop_VCat) + + override def > (b : Mat):Mat = applyMat(this, b, null, Mop_GT) + override def < (b : Mat):Mat = applyMat(this, b, null, Mop_LT) + override def >= (b : Mat):Mat = applyMat(this, b, null, Mop_GE) + override def <= (b : Mat):Mat = applyMat(this, b, null, Mop_LE) + override def == (b : Mat):Mat = applyMat(this, b, null, Mop_EQ) + override def === (b : Mat):Mat = applyMat(this, b, null, Mop_EQ) + override def != (b : Mat):Mat = applyMat(this, b, null, Mop_NE) + +} + +class DPair (val omat:Mat, val mat:DMat) extends Pair{ + override def t:DMat = if (Mat.noMKL) { + DMat(mat.gt(omat)) + } else { + val out = DMat.newOrCheckDMat(mat.ncols, mat.nrows, omat) + domatcopy("C", "T", mat.nrows, mat.ncols, 1.0, mat.data, mat.nrows, out.data, mat.ncols) + out + } + + def * (b : DMat) = mat.fDMult(b, omat) + def * (b : SDMat) = mat.fSMult(b, omat) + def xT (b : SDMat) = mat.multT(b, omat) + def + (b : DMat) = mat.ddMatOpv(b, DMat.vecAdd _, omat) + def - (b : DMat) = mat.ddMatOpv(b, DMat.vecSub _, omat) + def *@ (b : DMat) = mat.ddMatOpv(b, DMat.vecMul _, omat) + def /@ (b : DMat) = mat.ddMatOpv(b, DMat.dVecDiv _, omat) + def ^ (b : DMat) = mat.ddMatOp(b, (x:Double, y:Double) => math.pow(x,y), null) + + def > (b : DMat) = mat.ddMatOp(b, (x:Double, y:Double) => if (x > y) 1.0 else 0.0, omat) + def < (b : DMat) = mat.ddMatOp(b, (x:Double, y:Double) => if (x < y) 1.0 else 0.0, omat) + def == (b : DMat) = mat.ddMatOp(b, (x:Double, y:Double) => if (x == y) 1.0 else 0.0, omat) + def === (b : DMat) = mat.ddMatOp(b, (x:Double, y:Double) => if (x == y) 1.0 else 0.0, omat) + def >= (b : DMat) = mat.ddMatOp(b, (x:Double, y:Double) => if (x >= y) 1.0 else 0.0, omat) + def <= (b : DMat) = mat.ddMatOp(b, (x:Double, y:Double) => if (x <= y) 1.0 else 0.0, omat) + def != (b : DMat) = mat.ddMatOp(b, (x:Double, y:Double) => if (x != y) 1.0 else 0.0, omat) + + override def * (b : Double) = mat.fDMult(DMat.elem(b), omat) + override def * (b : Float) = mat.fDMult(DMat.elem(b), omat) + override def + (b : Double) = mat.ddMatOpScalarv(b, DMat.vecAdd _, omat) + override def - (b : Double) = mat.ddMatOpScalarv(b, DMat.vecSub _, omat) + override def *@ (b : Double) = mat.ddMatOpScalarv(b, DMat.vecMul _, omat) + override def /@ (b : Double) = mat.ddMatOpScalarv(b, DMat.dVecDiv _, omat) + override def ^ (b : Double) = mat.ddMatOpScalar(b, (x:Double, y:Double) => math.pow(x,y), omat) + + override def > (b : Double) = mat.ddMatOpScalar(b, (x:Double, y:Double) => if (x > y) 1.0 else 0.0, omat) + override def < (b : Double) = mat.ddMatOpScalar(b, (x:Double, y:Double) => if (x < y) 1.0 else 0.0, omat) + override def == (b : Double) = mat.ddMatOpScalar(b, (x:Double, y:Double) => if (x == y) 1.0 else 0.0, omat) + override def === (b : Double) = mat.ddMatOpScalar(b, (x:Double, y:Double) => if (x == y) 1.0 else 0.0, omat) + override def >= (b : Double) = mat.ddMatOpScalar(b, (x:Double, y:Double) => if (x >= y) 1.0 else 0.0, omat) + override def <= (b : Double) = mat.ddMatOpScalar(b, (x:Double, y:Double) => if (x <= y) 1.0 else 0.0, omat) + override def != (b : Double) = mat.ddMatOpScalar(b, (x:Double, y:Double) => if (x != y) 1.0 else 0.0, omat) + + import Operator._ + override def + (b : Mat):Mat = applyMat(mat, b, omat, Mop_Plus) + override def - (b : Mat):Mat = applyMat(mat, b, omat, Mop_Minus) + override def * (b : Mat):Mat = applyMat(mat, b, omat, Mop_Times) + override def / (b : Mat):Mat = applyMat(mat, b, omat, Mop_Div) + override def \\ (b : Mat):Mat = applyMat(mat, b, omat, Mop_RSolve) + override def *@ (b : Mat):Mat = applyMat(mat, b, omat, Mop_ETimes) + override def /@ (b : Mat):Mat = applyMat(mat, b, omat, Mop_EDiv) + override def \ (b : Mat):Mat = applyMat(mat, b, omat, Mop_HCat) + override def on (b : Mat):Mat = applyMat(mat, b, omat, Mop_VCat) + + override def > (b : Mat):Mat = applyMat(mat, b, omat, Mop_GT) + override def < (b : Mat):Mat = applyMat(mat, b, omat, Mop_LT) + override def >= (b : Mat):Mat = applyMat(mat, b, omat, Mop_GE) + override def <= (b : Mat):Mat = applyMat(mat, b, omat, Mop_LE) + override def == (b : Mat):Mat = applyMat(mat, b, omat, Mop_EQ) + override def === (b : Mat):Mat = applyMat(mat, b, omat, Mop_EQ) + override def != (b : Mat):Mat = applyMat(mat, b, omat, Mop_NE) +} + +object DMat { + + def dVecDiv(a:Array[Double], a0:Int, ainc:Int, b:Array[Double], b0:Int, binc:Int, c:Array[Double], c0:Int, cinc:Int, n:Int):Double = { + var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n + while (ci < cend) { + c(ci) = a(ai) / b(bi); ai += ainc; bi += binc; ci += cinc + } + 0 + } + + def newOrCheckDMat(nr:Int, nc:Int, omat:Mat):DMat = { + if (omat.asInstanceOf[AnyRef] == null || (omat.nrows == 0 && omat.ncols == 0)) { + DMat(nr, nc) + } else { + omat match { + case outmat:DMat => + if (outmat.nrows != nr || outmat.ncols != nc) { + outmat.recycle(nr, nc, 0) + } else { + outmat + } + case _ => throw new RuntimeException("wrong type for out matrix "+omat) + } + } + } + + def apply(nr:Int, nc:Int) = new DMat(nr, nc, new Array[Double](nr*nc)) + + def apply(a:DenseMat[Double]):DMat = new DMat(a.nrows, a.ncols, a.data) + + def apply(x:Mat):DMat = { + var out:DMat = null + x match { + case dd:DMat => {out = DMat(x.nrows, x.ncols); System.arraycopy(dd.data, 0, out.data, 0, dd.length)} + case ff:FMat => {out = DMat(x.nrows, x.ncols); Mat.copyToDoubleArray(ff.data, 0, out.data, 0, ff.length)} + case ii:IMat => {out = DMat(x.nrows, x.ncols); Mat.copyToDoubleArray(ii.data, 0, out.data, 0, ii.length)} + case ss:SDMat => out = DMat(ss.full) + case _ => throw new RuntimeException("Unsupported source type") + } + out + } + + + def vecAdd(a:Array[Double], a0:Int, ainc:Int, b:Array[Double], b0:Int, binc:Int, c:Array[Double], c0:Int, cinc:Int, n:Int):Double = { + var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n + while (ci < cend) { + c(ci) = a(ai) + b(bi); ai += ainc; bi += binc; ci += cinc + } + 0 + } + + def vecSub(a:Array[Double], a0:Int, ainc:Int, b:Array[Double], b0:Int, binc:Int, c:Array[Double], c0:Int, cinc:Int, n:Int):Double = { + var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n + while (ci < cend) { + c(ci) = a(ai) - b(bi); ai += ainc; bi += binc; ci += cinc + } + 0 + } + + def vecMul(a:Array[Double], a0:Int, ainc:Int, b:Array[Double], b0:Int, binc:Int, c:Array[Double], c0:Int, cinc:Int, n:Int):Double = { + var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n + while (ci < cend) { + c(ci) = a(ai) * b(bi); ai += ainc; bi += binc; ci += cinc + } + 0 + } + + def vecMax(a:Array[Double], a0:Int, ainc:Int, b:Array[Double], b0:Int, binc:Int, c:Array[Double], c0:Int, cinc:Int, n:Int):Double = { + var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n + while (ci < cend) { + c(ci) = math.max(a(ai), b(bi)); ai += ainc; bi += binc; ci += cinc + } + 0 + } + + def vecMin(a:Array[Double], a0:Int, ainc:Int, b:Array[Double], b0:Int, binc:Int, c:Array[Double], c0:Int, cinc:Int, n:Int):Double = { + var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n + while (ci < cend) { + c(ci) = math.min(a(ai), b(bi)); ai += ainc; bi += binc; ci += cinc + } + 0 + } + + + def elem(x:Double) = { + val out = DMat(1,1) + out.data(0) = x + out + } + +} + + + + + + diff --git a/src/main/scala/BIDMat/DenseMat.scala b/src/main/scala/BIDMat/DenseMat.scala new file mode 100755 index 00000000..54f1077a --- /dev/null +++ b/src/main/scala/BIDMat/DenseMat.scala @@ -0,0 +1,1313 @@ +package BIDMat +import scala.math.Numeric._ +import java.util.Arrays +import java.util.Comparator +import scala.actors._ +import scala.actors.Actor._ + +class DenseMat[@specialized(Double,Float,Int,Byte) T] +(nr: Int, nc: Int, val data:Array[T])(implicit manifest:ClassManifest[T]) extends Mat(nr, nc) { + + def this(nr:Int, nc:Int)(implicit manifest:ClassManifest[T]) = this(nr, nc, new Array[T](nr*nc)) + + /* + * Return the (0,0) value as a scalar + */ + def v:T = + if (nrows > 1 || ncols > 1) { + throw new RuntimeException("Matrix should be 1x1 to extract value") + } else { + data(0) + } + + override def mytype = "DenseMat" + /* + * Test if this matrix is a row or column vector + */ + def isvector(): Boolean = { + if (nrows == 1 || ncols == 1) { + true + } else { + false + } + } + /* + * Bounds-checked matrix access, 0- or 1-based + */ + def apply(r0:Int, c0:Int):T = { + val off = Mat.oneBased + val r = r0 - off + val c = c0 - off + if (r < 0 || r >= nrows || c < 0 || c >= ncols) { + throw new IndexOutOfBoundsException("("+(r+off)+","+(c+off)+") vs ("+nrows+","+ncols+")"); + } else { + data(r+c*nrows) + } + } + /* + * Bounds-checked linear access, 0- or 1-based + */ + def apply(i0:Int):T = { + val off = Mat.oneBased + val i = i0 - off + if (i < 0 || i >= length) { + throw new IndexOutOfBoundsException(""+(i+off)+" >= ("+length+")"); + } else { + data(i) + } + } + /* + * Unchecked 0-based matrix access + */ + def get_(r:Int, c:Int):T = { + data(r+c*nrows) + } + + /* + * Update a matrix value, m(r,c) = v, 0- or 1-based + */ + def update(r0:Int, c0:Int, v:T):T = { + val off = Mat.oneBased + val r = r0 - off + val c = c0 - off + if (r < 0 || r >= nrows || c < 0 || c >= ncols) { + throw new IndexOutOfBoundsException("("+(r+off)+","+(c+off)+") vs ("+nrows+","+ncols+")"); + } else { + data(r+c*nrows) = v + } + v + } + /* + * Update a matrix value with linear access, m(i) = v + */ + def update(i0:Int, v:T):T = { + val off = Mat.oneBased + val i = i0 - off + if (i < 0 || i >= length) { + throw new IndexOutOfBoundsException(""+(i+off)+" vs ("+length+")"); + } else { + data(i) = v + } + v + } + /* + * Unchecked 0-based set + */ + def set_(r:Int, c:Int, v:T):T = { + data(r+c*nrows) = v + v + } + /* + * Transpose + */ + def gt(oldmat:Mat):DenseMat[T] = { + var out:DenseMat[T] = DenseMat.newOrCheck(ncols, nrows, oldmat) + var i = 0 + while (i < nrows) { + var j = 0 + while (j < ncols) { + out.data(j+i*ncols) = data(i+j*nrows) + j += 1 + } + i += 1 + } + out + } + /* + * Stack matrices vertically + */ + def gvertcat(a:DenseMat[T]):DenseMat[T] = + if (ncols != a.ncols) { + throw new RuntimeException("ncols must match") + } else { + var out = new DenseMat[T](nrows+a.nrows, ncols) + var i = 0 + while (i < ncols) { + System.arraycopy(data, i*nrows, out.data, i*(nrows+a.nrows), nrows) + System.arraycopy(a.data, i*a.nrows, out.data, nrows+i*(nrows+a.nrows), a.nrows) + i += 1 + } + out + } + /* + * Stack matrices horizontally + */ + def ghorzcat(a:DenseMat[T]):DenseMat[T]= + if (nrows != a.nrows) { + throw new RuntimeException("nrows must match") + } else { + var out = new DenseMat[T](nrows, ncols+a.ncols) + System.arraycopy(data, 0, out.data, 0, nrows*ncols) + System.arraycopy(a.data, 0, out.data, nrows*ncols, nrows*a.ncols) + out + } + /* + * Count number of non-zero entries + */ + override def nnz:Int = { + var count:Int = 0 + var i = 0 + while (i < length) { + if (data(i) != 0) { + count += 1 + } + i += 1 + } + count + } + /* + * Helper function for find functions + */ + def findInds(out:IMat, off:Int):IMat = { + var count = 0 + var i = off + while (i < length+off) { + if (data(i) != 0) { + out.data(count) = i + count += 1 + } + i += 1 + } + out + } + /* + * Find indices (linear) for all non-zeros elements + */ + def find:IMat = { + var out = IMat(nnz, 1) + findInds(out, Mat.oneBased) + } + /* + * Find indices (i,j) for non-zero elements + */ + def find2:(IMat, IMat) = { + val iout = IMat(nnz, 1) + val jout = IMat(nnz, 1) + findInds(iout, 0) + val off = Mat.oneBased + var i = 0 + while (i < iout.length) { + val ival:Int = iout.data(i) + jout.data(i) = (ival / nrows) + off + iout.data(i) = (ival % nrows) + off + i += 1 + } + (iout, jout) + } + /* + * Find tuples (i,j,v) for non-zero elements + */ + def gfind3:(IMat, IMat, DenseMat[T]) = { + val iout = IMat(nnz, 1) + val jout = IMat(nnz, 1) + val vout = new DenseMat[T](nnz, 1) + findInds(iout, 0) + val off = Mat.oneBased + var i = 0 + while (i < iout.length) { + val ival:Int = iout.data(i) + vout.data(i) = data(ival) + jout.data(i) = (ival / nrows) + off + iout.data(i) = (ival % nrows) + off + i += 1 + } + (iout, jout, vout) + } + /* + * Return a(im) where im is a matrix of indices + */ + def gapply(im:IMat):DenseMat[T] = + im match { + case aa:MatrixWildcard => { + val out = new DenseMat[T](length, 1) + System.arraycopy(data, 0, out.data, 0, out.length) + out + } + case _ => { + val out = new DenseMat[T](im.nrows, im.ncols) + var i = 0 + val off = Mat.oneBased + while (i < out.length) { + val ind = im.data(i) - off + if (ind < 0 || ind >= length) { + throw new RuntimeException("bad linear index "+(ind+off)+" vs "+length) + } else { + out.data(i) = data(ind) + } + i += 1 + } + out + } + } + + /* + * Implement a(im) = b where im is a matrix of indices to a and im and b are same-sized + */ + def update(im:IMat, b:DenseMat[T]):DenseMat[T] = + im match { + case aaa:MatrixWildcard => { + if (length != b.length || b.ncols != 1) { + throw new RuntimeException("dims mismatch") + } else { + System.arraycopy(b.data, 0, data, 0, length) + } + b + } + case _ => { + if (im.nrows != b.nrows || im.ncols != b.ncols) { + throw new RuntimeException("dims mismatch") + } else { + val off = Mat.oneBased + var i = 0 + while (i < im.length) { + val ind = im.data(i) - off + if (ind < 0 || ind >= length) { + throw new RuntimeException("bad linear index "+(ind+off)+" vs "+length) + } else { + data(ind) = b.data(i) + } + i += 1 + } + } + b + } + } + + /* + * Implement a(im) = b where im is a matrix of indices to a, and b is a constant + */ + def update(a:IMat, b:T):T = { + a match { + case aaa:MatrixWildcard => { + var i = 0 + while (i < length) { + data(i) = b + i += 1 + } + } + case _ => { + var i = 0 + val off = Mat.oneBased + while (i < a.length) { + val ind = a.data(i) - off + if (ind < 0 || ind >= length) { + throw new RuntimeException("bad linear index "+(ind+off)+" vs "+length) + } else { + data(ind) = b + } + i += 1 + } + } + } + b + } + /* + * Implement slicing, a(iv,jv) where iv and jv are vectors, using ? as wildcard + */ + def gapply(iv:IMat, jv:IMat):DenseMat[T] = { + val rowinds = DenseMat.getInds(iv, nrows) + val colinds = DenseMat.getInds(jv, ncols) + val out = new DenseMat[T](rowinds.length, colinds.length) + val off = Mat.oneBased + var i = 0 + while (i < out.ncols) { + var j = 0 + val c = colinds(i) - off + while (j < out.nrows) { + out.data(j+i*out.nrows) = data(rowinds(j)-off+nrows*c) + j += 1 + } + i += 1 + } + out + } + /* + * Implement slicing, a(iv,j) where iv a vector, j an integer, using ? as wildcard + */ + def gapply(iv:IMat, jv:Int):DenseMat[T] = { + gapply(iv, IMat.ielem(jv)) + } + /* + * Implement slicing, a(i,jv) where i integer, jv a vector, using ? as wildcard + */ + def gapply(i:Int, jv:IMat):DenseMat[T] = { + gapply(IMat.ielem(i), jv) + } + + /* + * Implement sliced assignment, a(iv,jv) = b where iv and jv are vectors, using ? as wildcard + */ + def _update(iv:IMat, jv:IMat, b:DenseMat[T]):DenseMat[T] = { + val rowinds = DenseMat.getInds(iv, nrows) + val colinds = DenseMat.getInds(jv, ncols) + if (rowinds.length != b.nrows || colinds.length != b.ncols) { + throw new RuntimeException("dims mismatch in assignment") + } else { + val off = Mat.oneBased + var i = 0 + while (i < b.ncols) { + val c = colinds(i) - off + var j = 0 + while (j < b.nrows) { + data(rowinds(j)-off+nrows*c) = b.data(j+i*b.nrows) + j += 1 + } + i += 1 + } + } + b + } + + override def update(iv:IMat, jv:IMat, b:Mat):Mat = { + (this, b) match { + case (me:FMat, bb:FMat) => me.update(iv, jv, bb):FMat + case (me:DMat, bb:DMat) => me.update(iv, jv, bb):DMat + case (me:IMat, bb:IMat) => me.update(iv, jv, bb):IMat + case (me:CMat, bb:CMat) => me.update(iv, jv, bb):CMat + } + } + + /* + * Implement sliced assignment, a(iv,jv) = b:T where iv and jv are vectors, using ? as wildcard + */ + def update(iv:IMat, jv:IMat, b:T):T = { + val rowinds = DenseMat.getInds(iv, nrows) + val colinds = DenseMat.getInds(jv, ncols) + val off = Mat.oneBased + var i = 0 + while (i < colinds.length) { + val c = colinds(i) - off + var j = 0 + while (j < rowinds.length) { + val r = rowinds(j) - off + data(r+nrows*c) = b + j += 1 + } + i += 1 + } + b + } + /* + * Implement sliced assignment, a(iv,j) = b where iv a vectors, j integer, using ? as wildcard + */ + def update(iv:IMat, j:Int, b:T):T = { + update(iv, IMat.ielem(j), b) + } + /* + * Implement sliced assignment, a(i,jv) = b where jv a vector, using ? as wildcard + */ + def update(i:Int, jv:IMat, b:T):T = { + update(IMat.ielem(i), jv, b) + } + + def printOne(i:Int):String = " " + + override def toString:String = { + val nChars = Mat.terminalWidth-4 + val maxRows = 640/nChars + var maxCols = nChars + var fieldWidth = 4 + var icols = 0 + while (icols < math.min(ncols, maxCols)) { + var newWidth = fieldWidth + for (j <- 0 until math.min(nrows,maxRows)) newWidth = math.max(newWidth, 2+(printOne(j+nrows*icols).length)) + if ((icols+1)*newWidth < nChars) { + fieldWidth = newWidth + icols += 1 + } else { + maxCols = icols + } + } + val sb:StringBuilder = new StringBuilder + val somespaces = " " + for (i <- 0 until math.min(nrows, maxRows)) { + for (j <- 0 until math.min(ncols, icols)) { + val str = printOne(i+j*nrows) + sb.append(somespaces.substring(0,fieldWidth-str.length)+str) + } + if (ncols > icols) { + sb.append("...") + } + sb.append("\n") + } + if (nrows > maxRows) { + for (j <- 0 until math.min(ncols, maxCols)) { + sb.append(somespaces.substring(0, fieldWidth-2)+"..") + } + sb.append("\n") + } + sb.toString() + } + + override def clear:DenseMat[T] ={ + if (length == 0) { + this + } else { + val v = data(0) + v match { + case a:Float => Arrays.fill(data.asInstanceOf[Array[Float]], 0, length, 0) + case a:Double => Arrays.fill(data.asInstanceOf[Array[Double]], 0, length, 0) + case a:Int => Arrays.fill(data.asInstanceOf[Array[Int]], 0, length, 0) + case a:AnyRef => Arrays.fill(data.asInstanceOf[Array[AnyRef]], 0, length, null) + } + } + this + } + + def setUpper(v:T, off:Int) = { + var i = 0 + while (i < ncols) { + var j = 0 + while (j < i+off) { + data(j + i*nrows) = v + j += 1 + } + i += 1 + } + this + } + + def setLower(v:T, off:Int) = { + var i = 0 + while (i < ncols) { + var j = math.max(0,i+1+off) + while (j < nrows) { + data(j + i*nrows) = v + j += 1 + } + i += 1 + } + this + } + + /* + * General operation between two matrices. Apply op2 to corresponding elements from the input matrices. + */ + def ggMatOp(aa:DenseMat[T], op2:(T,T) => T, oldmat:Mat):DenseMat[T] = { + if (nrows==aa.nrows && ncols==1) { + val out = DenseMat.newOrCheck(nrows, aa.ncols, oldmat) + Mat.nflops += aa.length + var i = 0 + while (i < aa.ncols) { + var j = 0 + while (j < nrows) { + out.data(j+i*nrows) = op2(data(j), aa.data(j+i*aa.nrows)) + j += 1 + } + i += 1 + } + out + } else if (ncols==aa.ncols && nrows==1) { + val out = DenseMat.newOrCheck[T](aa.nrows, ncols, oldmat) + Mat.nflops += aa.length + var i = 0 + while (i < ncols) { + var j = 0 + while (j < aa.nrows) { + out.data(j+i*aa.nrows) = op2(data(i), aa.data(j+i*aa.nrows)) + j += 1 + } + i += 1 + } + out + } else if (nrows==aa.nrows && aa.ncols==1) { + val out = DenseMat.newOrCheck[T](nrows, ncols, oldmat) + Mat.nflops += length + var i = 0 + while (i < ncols) { + var j = 0 + while (j < nrows) { + out.data(j+i*nrows) = op2(data(j+i*nrows), aa.data(j)) + j += 1 + } + i += 1 + } + out + } else if (ncols==aa.ncols && aa.nrows==1) { + val out = DenseMat.newOrCheck[T](nrows, ncols, oldmat) + Mat.nflops += length + var i = 0 + while (i < ncols) { + var j = 0 + while (j < nrows) { + out.data(j+i*nrows) = op2(data(j+i*nrows), aa.data(i)) + j += 1 + } + i += 1 + } + out + } else ggMatOpStrict(aa, op2, oldmat) + } + + /* + * This version applies the operator op2 with stricter dimension checking, + * either dims must match or one arg must be scalar + */ + def ggMatOpStrict(aa:DenseMat[T], op2:(T,T) => T, oldmat:Mat):DenseMat[T] = + if (nrows==aa.nrows && ncols==aa.ncols) { + val out = DenseMat.newOrCheck[T](nrows, ncols, oldmat) + Mat.nflops += length + var i = 0 + while (i < aa.length) { + out.data(i) = op2(data(i), aa.data(i)) + i += 1 + } + out + } else if (aa.nrows == 1 && aa.ncols == 1) { + val out = DenseMat.newOrCheck[T](nrows, ncols, oldmat) + Mat.nflops += length + val aval = aa.data(0) + var i = 0 + while (i < length) { + out.data(i) = op2(data(i), aval) + i += 1 + } + out + } else if (nrows == 1 && ncols == 1) { + val out = DenseMat.newOrCheck[T](aa.nrows, aa.ncols, oldmat) + Mat.nflops += aa.length + val aval = data(0) + var i = 0 + while (i < aa.length) { + out.data(i) = op2(aval, aa.data(i)) + i += 1 + } + out + } else throw new RuntimeException("dims incompatible"); + + /* + * Apply the binary operation op2 to the matrix and a scalar argument + */ + def ggMatOpScalar(a:T, op2:(T,T) => T, oldmat:Mat):DenseMat[T] = { + val out = DenseMat.newOrCheck[T](nrows, ncols, oldmat) + Mat.nflops += length + var i = 0 + while (i < length) { + out.data(i) = op2(data(i), a) + i += 1 + } + out + } + /* + * General operation between two matrices. Apply op2 to corresponding elements from the input matrices. + * Implemented with vector operation primitives. + */ + def ggMatOpv(aa:DenseMat[T], opv:(Array[T],Int,Int,Array[T],Int,Int,Array[T],Int,Int,Int) => T, oldmat:Mat):DenseMat[T] = + if (nrows==aa.nrows && ncols==1) { + val out = DenseMat.newOrCheck[T](nrows, aa.ncols, oldmat) + Mat.nflops += aa.length + var i = 0 + while (i < aa.ncols) { + opv(data, 0, 1, aa.data, i*aa.nrows, 1, out.data, i*nrows, 1, nrows) + i += 1 + } + out + } else if (ncols==aa.ncols && nrows==1) { + val out = DenseMat.newOrCheck[T](aa.nrows, ncols, oldmat) + Mat.nflops += aa.length + var i = 0 + while (i < ncols) { + opv(data, i, 0, aa.data, i*aa.nrows, 1, out.data, i*aa.nrows, 1, aa.nrows) + i += 1 + } + out + } else if (nrows==aa.nrows && aa.ncols==1) { + val out = DenseMat.newOrCheck[T](nrows, ncols, oldmat) + Mat.nflops += length + var i = 0 + while (i < ncols) { + opv(data, i*nrows, 1, aa.data, 0, 1, out.data, i*nrows, 1, nrows) + i += 1 + } + out + } else if (ncols==aa.ncols && aa.nrows==1) { + val out = DenseMat.newOrCheck[T](nrows, ncols, oldmat) + Mat.nflops += length + var i = 0 + while (i < ncols) { + opv(data, i*nrows, 1, aa.data, i, 0, out.data, i*nrows, 1, nrows) + i += 1 + } + out + } else ggMatOpStrictv(aa, opv, oldmat); + + + def ggMatOpStrictv(aa:DenseMat[T], opv:(Array[T],Int,Int,Array[T],Int,Int,Array[T],Int,Int,Int) => T, oldmat:Mat):DenseMat[T] = { + var out:DenseMat[T] = null + var mylen = 0 + if ((nrows==aa.nrows && ncols==aa.ncols) || (aa.nrows == 1 && aa.ncols == 1)) { + out = DenseMat.newOrCheck[T](nrows, ncols, oldmat) + mylen = length + } else if (nrows == 1 && ncols == 1) { + val out = DenseMat.newOrCheck[T](aa.nrows, aa.ncols, oldmat) + mylen = aa.length + } else throw new RuntimeException("dims incompatible") + if (mylen > 100000 && Mat.numThreads > 1) { + val done = IMat(1, Mat.numThreads) + for (ithread<- 0 until Mat.numThreads) { + val istart = ithread*mylen/Mat.numThreads + val len = (ithread+1)*mylen/Mat.numThreads - istart + actor { + if (nrows==aa.nrows && ncols==aa.ncols) { + opv(data, istart, 1, aa.data, istart, 1, out.data, istart, 1, len) + } else if (aa.nrows == 1 && aa.ncols == 1) { + opv(data, istart, 1, aa.data, 0, 0, out.data, istart, 1, len) + } else { + opv(data, 0, 0, aa.data, istart, 1, out.data, istart, 1, len) + } + done(ithread) = 1 + } + } + while (SciFunctions.sum(done).v < Mat.numThreads) {Thread.`yield`()} + } else if (nrows==aa.nrows && ncols==aa.ncols) { + opv(data, 0, 1, aa.data, 0, 1, out.data, 0, 1, aa.length) + } else if (aa.nrows == 1 && aa.ncols == 1) { + opv(data, 0, 1, aa.data, 0, 0, out.data, 0, 1, length) + } else if (nrows == 1 && ncols == 1) { + opv(data, 0, 0, aa.data, 0, 1, out.data, 0, 1, aa.length) + } + Mat.nflops += mylen + out + } + + def ggMatOpScalarv(a:T, opv:(Array[T],Int,Int,Array[T],Int,Int,Array[T],Int,Int,Int) => T, oldmat:Mat):DenseMat[T] = { + val out = DenseMat.newOrCheck[T](nrows, ncols, oldmat) + Mat.nflops += length + val aa = new Array[T](1) + aa(0) = a + opv(data, 0, 1, aa, 0, 0, out.data, 0, 1, length) + out + } + + def ggReduceOp(dim0:Int, op1:(T) => T, op2:(T,T) => T, oldmat:Mat):DenseMat[T] = { + var dim = if (nrows == 1 && dim0 == 0) 2 else math.max(1, dim0) + if (dim == 1) { + val out = DenseMat.newOrCheck[T](1, ncols, oldmat) + Mat.nflops += length + var i = 0 + while (i < ncols) { + var j = 1 + var acc = op1(data(i*nrows)) + while (j < nrows) { + acc = op2(acc, data(j+i*nrows)) + j += 1 + } + out.data(i) = acc + i += 1 + } + out + } else if (dim == 2) { + val out = DenseMat.newOrCheck[T](nrows, 1, oldmat) + Mat.nflops += length + var j = 0 + while (j < nrows) { + out.data(j) = op1(data(j)) + j += 1 + } + var i = 1 + while (i < ncols) { + var j = 0 + while (j < nrows) { + out.data(j) = op2(out.data(j), data(j+i*nrows)) + j += 1 + } + i += 1 + } + out + } else + throw new RuntimeException("index must 1 or 2"); + } + + def ggOpt2(dim0:Int, op2:(T,T) => Boolean):(DenseMat[T],IMat) = { + var dim = if (nrows == 1 && dim0 == 0) 2 else math.max(1, dim0) + if (dim == 1) { + val out = new DenseMat[T](1, ncols) + val iout = IMat(1, ncols) + Mat.nflops += length + var i = 0 + while (i < ncols) { + var j = 1 + var acc = data(i*nrows) + var iacc = 0 + while (j < nrows) { + val v = data(j+i*nrows) + if (op2(v, acc)) { + acc = v + iacc = j + } + j += 1 + } + out.data(i) = acc + iout.data(i) = iacc + i += 1 + } + (out, iout) + } else if (dim == 2) { + val out = new DenseMat[T](nrows, 1) + val iout = IMat(nrows, 1) + Mat.nflops += length + var j = 0 + while (j < nrows) { + out.data(j) = data(j) + iout.data(j) = 0 + j += 1 + } + var i = 1 + while (i < ncols) { + var j = 0 + while (j < nrows) { + val v = data(j+i*nrows) + if (op2(v, out.data(j))) { + out.data(j) = v + iout.data(j) = i + } + j += 1 + } + i += 1 + } + (out, iout) + } else + throw new RuntimeException("index must 1 or 2"); + } + + def ggReduceOpv(dim0:Int, opv:(Array[T],Int,Int,Array[T],Int,Int,Array[T],Int,Int,Int) => T, oldmat:Mat):DenseMat[T] = { + var dim = if (nrows == 1 && dim0 == 0) 2 else math.max(1, dim0) + if (dim == 1) { + val out = DenseMat.newOrCheck[T](1, ncols, oldmat) + Mat.nflops += length + var i = 0 + while (i < ncols) { + out.data(i) = data(i*nrows) + opv(data, i*nrows+1, 1, out.data, i, 0, out.data, i, 0, nrows-1) + i += 1 + } + out + } else if (dim == 2) { + val out = DenseMat.newOrCheck[T](nrows, 1, oldmat) + Mat.nflops += length + var j = 0 + while (j < nrows) { + out.data(j) = data(j) + j += 1 + } + var i = 1 + while (i < ncols) { + opv(data, i*nrows, 1, out.data, 0, 1, out.data, 0, 1, nrows) + i += 1 + } + out + } else + throw new RuntimeException("index must 1 or 2"); + } + + def ggReduceAll(dim0:Int, op1:(T) => T, op2:(T,T) => T, oldmat:Mat):DenseMat[T] = { + var dim = if (nrows == 1 && dim0 == 0) 2 else math.max(1, dim0) + if (dim == 1) { + val out = DenseMat.newOrCheck[T](nrows, ncols, oldmat) + Mat.nflops += length + var i = 0 + while (i < ncols) { + val i0 = i*nrows + var j = 1 + var acc = op1(data(i0)) + out.data(i0) = acc + while (j < nrows) { + acc = op2(acc, data(j+i0)) + out.data(j+i0) = acc + j += 1 + } + i += 1 + } + out + } else if (dim == 2) { + val out = DenseMat.newOrCheck[T](nrows, ncols, oldmat) + Mat.nflops += length + var j = 0 + while (j < nrows) { + out.data(j) = op1(data(j)) + j += 1 + } + var i = 1 + while (i < ncols) { + val i0 = i*nrows + var j = 0 + while (j < nrows) { + out.data(j+i0) = op2(out.data(j+i0-nrows), data(j+i0)) + j += 1 + } + i += 1 + } + out + } else + throw new RuntimeException("index must 1 or 2") + } + + def ggReduceAllv(dim0:Int, opv:(Array[T],Int,Int,Array[T],Int,Int,Array[T],Int,Int,Int) => T, oldmat:Mat):DenseMat[T] = { + var dim = if (nrows == 1 && dim0 == 0) 2 else math.max(1, dim0) + if (dim == 1) { + val out = DenseMat.newOrCheck[T](nrows, ncols, oldmat) + Mat.nflops += length + var i = 0 + while (i < ncols) { + val i0 = i*nrows + out.data(i0) = data(i0) + opv(data, i0+1, 1, out.data, i0, 1, out.data, i0+1, 1, nrows-1) + i += 1 + } + out + } else if (dim == 2) { + val out = DenseMat.newOrCheck[T](nrows, ncols, oldmat) + Mat.nflops += length + var j = 0 + while (j < nrows) { + out.data(j) = data(j) + j += 1 + } + var i = 1 + while (i < ncols) { + val i0 = i*nrows + opv(data, i0, 1, out.data, i0-nrows, 1, out.data, i0, 1, nrows) + i += 1 + } + out + } else + throw new RuntimeException("index must 1 or 2") + } + + def dot (a : DenseMat[T])(implicit numeric:Numeric[T]):Double = + if (nrows != a.nrows || ncols != a.ncols) { + throw new RuntimeException("dot dims not compatible") + } else { + Mat.nflops += 2 * length + var v = 0.0 + var i = 0 + while (i < length){ + v += numeric.toDouble(numeric.times(data(i),a.data(i))) + i += 1 + } + v + } + + def mkdiag = { + if (math.min(nrows, ncols) > 1) { + throw new RuntimeException("mkdiag needs a vector input") + } + val n = math.max(nrows, ncols) + val out = new DenseMat[T](n,n) + var i = 0 + while (i < n) { + out.data(i*(n+1)) = data(i) + i += 1 + } + out + } + + def getdiag = { + val n = math.min(nrows, ncols) + val out = new DenseMat[T](n,1) + var i = 0 + while (i < n) { + out.data(i) = data(i*(nrows+1)) + i += 1 + } + out + } + +} + +object DenseMat { + + def vecCmp[@specialized(Double, Float, Int, Byte) T](xmap:Array[T])(a:Array[T], a0:Int, ainc:Int, b:Array[T], b0:Int, binc:Int, c:Array[T], c0:Int, cinc:Int, n:Int) + (implicit numeric:Numeric[T]):T = { + var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n + while (ci < cend) { + val indx = numeric.compare(a(ai), b(bi)); c(ci) = xmap(indx+1); ai += ainc; bi += binc; ci += cinc + } + numeric.zero + } + + + def newOrCheck[T](nr:Int, nc:Int, oldmat:Mat) + (implicit classManifest:ClassManifest[T]):DenseMat[T] = { + if (oldmat.asInstanceOf[AnyRef] == null || (oldmat.nrows == 0 && oldmat.ncols == 0)) { + new DenseMat[T](nr, nc) + } else { + val omat = oldmat.asInstanceOf[DenseMat[T]] + if (oldmat.nrows != nr || oldmat.ncols != nc) { + if (nr*nc <= omat.data.size) { + return new DenseMat[T](nr, nc, omat.data) + } else { + new DenseMat[T](nr, nc) + } + } else { + omat + } + } + } + + def getInds(ii:IMat, n:Int):Array[Int] = { + var inds:Array[Int] = null + val off = Mat.oneBased + ii match { + case aaa:MatrixWildcard => { + inds = new Array[Int](n) + var i = 0 + while (i < n) { + inds(i) = i + off + i += 1 + } + inds + } + case _ => { + var i = 0 + while (i < ii.length) { + val ind = ii.data(i) - off + if (ind < 0 || ind >= n) { + throw new RuntimeException("index out of range "+(ind+off)+" vs "+n) + } + i += 1 + } + ii.data + } + } + } + + def getSInds(in:Seq[Int], n:Int):Array[Int] = { + var inds:Array[Int] = new Array[Int](math.min(in.length,n)) + val off = Mat.oneBased + var i = 0 + while (i < in.length) { + val ind = in(i) - off + if (ind < 0 || ind >= n) { + throw new RuntimeException("index out of range "+(ind+off)+" vs "+n) + } + i += 1 + } + inds + } + + def genSort[@specialized(Double, Float, Int, Byte) T](a:Array[T],from:Int,to:Int):Unit = { + a match { + case aa:Array[Double] => { + Arrays.sort(aa, from, to) + } + case aa:Array[Float] => { + Arrays.sort(aa, from, to) + } + case aa:Array[Int] => { + Arrays.sort(aa, from, to) + } + case aa:Array[Byte] => { + Arrays.sort(aa, from, to) + } + } + } + + def genSort[@specialized(Double, Float, Int, Byte) T](a:Array[T]):Unit = { + genSort(a, 0, a.size) + } + + def reverse[@specialized(Double, Float, Int, Byte) T](a:Array[T],from:Int,to:Int) = { + var i = 0 + var n = to - from + while (2*i < n-1) { + val tmp = a(i+from) + a(i+from) = a(to-i-1) + a(to-i-1) = tmp + i += 1 + } + } + + def reverse[@specialized(Double, Float, Int, Byte) T](a:Array[T]):Unit = { + reverse(a, 0, a.size) + } + + def sort[@specialized(Double, Float, Int, Byte) T](a:DenseMat[T], ik0:Int, asc:Boolean) + (implicit classManifest:ClassManifest[T], ordering:Ordering[T]):DenseMat[T] = { + import BIDMat.Sorting._ + val out = new DenseMat[T](a.nrows, a.ncols) + var ik = ik0 + if (ik0 == 0) { + if (a.nrows == 1) { + ik = 2 + } else { + ik = 1 + } + } + if (a.nrows == 1 || a.ncols == 1) { + System.arraycopy(a.data, 0, out.data, 0, a.length) + genSort(out.data) + if (!asc) { + reverse(a.data) + } + out + } else if (ik == 1) { + val thiscol = new Array[T](a.nrows) + var i = 0 + while (i < a.ncols) { + var j = 0 + while (j < a.nrows) { + thiscol(j) = a.data(j+i*a.nrows) + j += 1 + } + genSort(thiscol) + j = 0 + if (asc) { + while (j < a.nrows) { + out.data(j+i*a.nrows) = thiscol(j) + j += 1 + } + } else { + while (j < a.nrows) { + out.data(j+i*a.nrows) = thiscol(a.nrows-j-1) + j += 1 + } + } + i += 1 + } + out + } else { + val thisrow = new Array[T](a.ncols) + var i = 0 + while (i < a.nrows) { + var j = 0 + while (j < a.ncols) { + thisrow(j) = a.data(i+j*a.nrows) + j += 1 + } + genSort(thisrow) + j = 0 + if (asc) { + while (j < a.ncols) { + out.data(i+j*out.nrows) = thisrow(j) + j += 1 + } + } else { + while (j < a.ncols) { + out.data(i+j*out.nrows) = thisrow(a.ncols-j-1) + j += 1 + } + } + i += 1 + } + out + } + } + + class MyComparator[@specialized(Double, Float, Int, Byte) T](a:Array[T]) + (implicit ordering:Ordering[T]) extends java.util.Comparator[Int] { + def compare(ii:Int, jj:Int):Int = { + val c0 = ordering.compare(a(ii), a(jj)) + if (c0 != 0) { + c0 + } else { + ii compare jj + } + } + } + + def sort2[@specialized(Double, Float, Int, Byte) T](a:DenseMat[T], asc:Boolean) + (implicit classManifest:ClassManifest[T], ord:Ordering[T]): (DenseMat[T], IMat) = + if (a.nrows == 1) { + sort2(a, 2, asc) + } else { + sort2(a, 1, asc) + } + + def sort2[@specialized(Double, Float, Int, Byte) T](a:DenseMat[T], ik:Int, asc:Boolean) + (implicit classManifest:ClassManifest[T], ord:Ordering[T]):(DenseMat[T], IMat) = { + import BIDMat.Sorting._ + val out = new DenseMat[T](a.nrows, a.ncols) + val iout = IMat(a.nrows, a.ncols) + if (ik == 1) { + var i = 0 + while (i < a.ncols) { + var j = 0 + while (j < a.nrows) { + iout.data(j+i*a.nrows) = j + out.data(j+i*a.nrows) = a.data(j+i*a.nrows) + j += 1 + } + i += 1 + } + i = 0 + while (i < a.ncols) { + if (asc) { + quickSort2(out.data, iout.data, i*a.nrows, (i+1)*a.nrows, 1) + } else { + quickSort2(out.data, iout.data, (i+1)*a.nrows-1, i*a.nrows-1, -1) + } + i += 1 + } + (out, iout) + } else { + val vcols = new Array[T](a.ncols) + val icols = new Array[Int](a.ncols) + var i = 0 + while (i < a.nrows) { + var j = 0 + while (j < a.ncols) { + vcols(j) = a.data(i + j*a.nrows) + icols(j) = j + j += 1 + } + if (asc) { + quickSort2(vcols, icols, 0, icols.length, 1) + } else { + quickSort2(vcols, icols, icols.length-1, -1, -1) + } + j = 0 + while (j < a.ncols) { + out.data(i+j*out.nrows) = vcols(j) + iout.data(i+j*iout.nrows) = icols(j) + j += 1 + } + i += 1 + } + (out, iout) + } + } + + def sortlex[@specialized(Double, Float, Int, Byte) T](a:DenseMat[T], asc:Boolean)(implicit ordering:Ordering[T]):IMat = { + import BIDMat.Sorting._ + val out = IMat(a.nrows,1) + val ii = out.data + val aa = a.data + val nr = a.nrows + var i = 0 + while (i < a.nrows) { + out.data(i) = i + i += 1 + } + def comp(i:Int, j:Int):Int = { + var k = 0 + val ip = ii(i) + val jp = ii(j) + var c0 = 0 + while (k < a.ncols && c0 == 0) { + c0 = ordering.compare(aa(ip+k*nr), aa(jp+k*nr)) + k += 1 + } + if (c0 != 0) { + c0 + } else { + ip compare jp + } + } + def swap(i:Int, j:Int):Unit = { + val tmp = ii(i) + ii(i) = ii(j) + ii(j) = tmp + } + if (asc) { + quickSort(comp, swap, 0, a.nrows) + } else { + quickSort((i:Int,j:Int)=>comp(j,i), swap, 0, a.nrows) + } + out + } + + def unique2[@specialized(Double, Float, Int) T](a:DenseMat[T]) + (implicit manifest:Manifest[T], numeric:Numeric[T], ord:Ordering[T]):(IMat, IMat) = { + val (vss, iss) = sort2(a, true) + val iptrs = IMat(a.length,1) + var lastpos = 0 + iptrs.data(iss.data(0)) = lastpos + var i = 1 + while (i < iss.length) { + if (vss.data(i-1) != vss.data(i)) { + lastpos += 1 + } + iptrs.data(iss.data(i)) = lastpos + i += 1 + } + val bptrs = IMat(lastpos+1,1) + i = iss.length + while (i > 0) { + bptrs.data(iptrs.data(i-1)) = i-1 + i = i - 1 + } + (bptrs, iptrs) + } + + def uniquerows2[@specialized(Double, Float, Int) T](a:DenseMat[T])(implicit ordering:Ordering[T]):(IMat, IMat) = { + val iss = sortlex(a, true) + def compeq(i:Int, j:Int):Boolean = { + var k:Int = 0; + while (k < a.ncols && ordering.equiv(a(i,k):T, a(j,k):T)) { + k += 1 + } + if (k == a.ncols) true + else false + } + val iptrs = IMat(a.nrows, 1) + var lastpos = 0 + iptrs.data(iss.data(0)) = lastpos + var i = 1 + while (i < iss.length) { + if (!compeq(iss.data(i-1), iss.data(i))) { + lastpos += 1 + } + iptrs.data(iss.data(i)) = lastpos + i += 1 + } + val bptrs = IMat(lastpos+1,1) + i = iss.length + while (i > 0) { + bptrs.data(iptrs.data(i-1)) = i-1 + i = i - 1 + } + (bptrs, iptrs) + } + + def accum[@specialized(Double, Float, Int) T](inds:IMat, vals:DenseMat[T], nr:Int, nc:Int) + (implicit numeric:Numeric[T], classManifest:ClassManifest[T]):DenseMat[T] = { + if (inds.ncols > 2 || (vals.length > 1 && (inds.nrows != vals.nrows))) + throw new RuntimeException("mismatch in array dimensions") + else { + if (inds.ncols == 1) { + val out = new DenseMat[T](nr, nc) + Mat.nflops += inds.nrows + var i = 0 + if (vals.length > 1) { + while (i < inds.nrows) { + out.data(inds.data(i)) = numeric.plus(out.data(inds.data(i)), vals.data(i)) + i += 1 + } + } else { + while (i < inds.nrows) { + out.data(inds.data(i)) = numeric.plus(out.data(inds.data(i)), vals.data(0)) + i += 1 + } + } + out + } else { + val out = new DenseMat[T](nr, nc) + Mat.nflops += inds.nrows + var i = 0 + if (vals.length > 1) { + while (i < inds.nrows) { + if (inds.data(i) >= nr || inds.data(i+inds.nrows) >= nc) + throw new RuntimeException("indices out of bounds "+inds.data(i)+" "+inds.data(i+inds.nrows)) + val indx = inds.data(i) + nr*inds.data(i+inds.nrows) + out.data(indx) = numeric.plus(out.data(indx), vals.data(i)) + i += 1 + } + } else { + while (i < inds.nrows) { + if (inds.data(i) >= nr || inds.data(i+inds.nrows) >= nc) + throw new RuntimeException("indices out of bounds "+inds.data(i)+" "+inds.data(i+inds.nrows)) + val indx = inds.data(i) + nr*inds.data(i+inds.nrows) + out.data(indx) = numeric.plus(out.data(indx), vals.data(0)) + i += 1 + } + } + out + } + } + } + +} + +trait MatrixWildcard extends Mat + diff --git a/src/main/scala/BIDMat/FMat.scala b/src/main/scala/BIDMat/FMat.scala new file mode 100755 index 00000000..868172fe --- /dev/null +++ b/src/main/scala/BIDMat/FMat.scala @@ -0,0 +1,720 @@ +package BIDMat +import edu.berkeley.bid.CBLAS._ +import edu.berkeley.bid.LAPACK._ +import edu.berkeley.bid.SPBLAS._ +import scala.actors.Actor._ +import java.util.Arrays + + +case class FMat(nr:Int, nc:Int, data0:Array[Float]) extends DenseMat[Float](nr, nc, data0) { + + def size() = length; + + override def t:FMat = FMat(gt(null)) + + override def dv:Double = + if (nrows > 1 || ncols > 1) { + throw new RuntimeException("Matrix should be 1x1 to extract value") + } else { + data(0) + } + + override def mytype = "FMat" + + def i:CMat = CMat.imag(this) + + def horzcat(b: FMat) = FMat(ghorzcat(b)) + + def vertcat(b: FMat) = FMat(gvertcat(b)) + + def find3:(IMat, IMat, FMat) = { val (ii, jj, vv) = gfind3 ; (IMat(ii), IMat(jj), FMat(vv)) } + + override def apply(a:IMat):FMat = FMat(gapply(a)) + + override def apply(a:IMat, b:IMat):FMat = FMat(gapply(a, b)) + + override def apply(a:IMat, b:Int):FMat = FMat(gapply(a, b)) + + override def apply(a:Int, b:IMat):FMat = FMat(gapply(a, b)) + + def update(iv:IMat, jv:IMat, b:FMat):FMat = FMat(_update(iv, jv, b)) + + def update(iv:IMat, j:Int, b:FMat):FMat = FMat(_update(iv, IMat.ielem(j), b)) + + def update(i:Int, jv:IMat, b:FMat):FMat = FMat(_update(IMat.ielem(i), jv, b)) + + def ffMatOp(b: Mat, f:(Float, Float) => Float, out:Mat):FMat = + b match { + case bb:FMat => FMat(ggMatOp(bb, f, out)) + case _ => throw new RuntimeException("unsupported operation "+f+" on "+this+" and "+b) + } + + def ffMatOpv(b: Mat, f:(Array[Float],Int,Int,Array[Float],Int,Int,Array[Float],Int,Int,Int) => Float, out:Mat) = + b match { + case bb:FMat => FMat(ggMatOpv(bb, f, out)) + case _ => throw new RuntimeException("unsupported operation "+f+" on "+this+" and "+b) + } + + def ffMatOpScalar(b: Float, f:(Float, Float) => Float, out:Mat):FMat = FMat(ggMatOpScalar(b, f, out)) + + def ffMatOpScalarv(b: Float, f:(Array[Float],Int,Int,Array[Float],Int,Int,Array[Float],Int,Int,Int) => Float, out:Mat) = + FMat(ggMatOpScalarv(b, f, out)) + + def ffReduceOp(n:Int, f1:(Float) => Float, f2:(Float, Float) => Float, out:Mat) = + FMat(ggReduceOp(n, f1, f2, out)) + + def ffReduceOpv(n:Int, f:(Array[Float],Int,Int,Array[Float],Int,Int,Array[Float],Int,Int,Int) => Float, out:Mat) = + FMat(ggReduceOpv(n, f, out)) + + def ffReduceAll(n:Int, f1:(Float) => Float, f2:(Float, Float) => Float, out:Mat) = + FMat(ggReduceAll(n, f1, f2, out)) + + def ffReduceAllv(n:Int, f:(Array[Float],Int,Int,Array[Float],Int,Int,Array[Float],Int,Int,Int) => Float, out:Mat) = + FMat(ggReduceAllv(n, f, out)) + + override def printOne(i:Int):String = { + val v = data(i) + if (v % 1 == 0 && math.abs(v) < 1e10) { + "%d" format v.intValue + } else { + "%.5g" format v + } + } + + override def copy = { + val out = FMat(nrows, ncols) + System.arraycopy(data, 0, out.data, 0, length) + out + } + + def copyTo(a:FMat) = { + val aa = a.recycle(nrows, ncols, 0) + System.arraycopy(data, 0, aa.data, 0, length) + aa + } + + override def set(v:Float):FMat = { + Arrays.fill(data,0,length,v) + this + } + + override def copyTo(a:Mat) = { + a match { + case out:FMat => copyTo(out):FMat + case aa:GMat => aa.copyFrom(this) + } + a + } + + override def zeros(nr:Int, nc:Int) = { + FMat(nr, nc) + } + + override def ones(nr:Int, nc:Int) = { + val out = FMat(nr, nc) + var i = 0 + while (i < out.length) { + out(i) = 1 + i += 1 + } + out + } + + override def clearUpper(off:Int) = setUpper(0, off) + override def clearUpper = setUpper(0, 0) + + override def clearLower(off:Int) = setLower(0, off) + override def clearLower = setLower(0, 0) + + + def fDMult(a:FMat, outmat:Mat):FMat = { + if (ncols == a.nrows) { + val out = FMat.newOrCheckFMat(nrows, a.ncols, outmat) + Mat.nflops += 2L * length * a.ncols + if (Mat.noMKL) { + out.clear + var i = 0 + while (i < a.ncols) { + var j = 0 + while (j < a.nrows) { + var k = 0 + val dval = a.data(j + i*ncols) + while (k < nrows) { + out.data(k+i*nrows) += data(k+j*nrows)*dval + k += 1 + } + j += 1 + } + i += 1 + } + } else if (nrows == 1) { + sgemv(ORDER.ColMajor, TRANSPOSE.Trans, a.nrows, a.ncols, 1.0f, a.data, a.nrows, data, 1, 0, out.data, 1) + } else if (a.ncols == 1) { + sgemv(ORDER.ColMajor, TRANSPOSE.NoTrans, nrows, ncols, 1.0f, data, nrows, a.data, 1, 0, out.data, 1) + } else { + sgemm(ORDER.ColMajor, TRANSPOSE.NoTrans, TRANSPOSE.NoTrans, + nrows, a.ncols, ncols, 1.0f, data, nrows, a.data, a.nrows, 0, out.data, nrows) + } + out + } else if (ncols == 1 && nrows == 1){ + val out = FMat.newOrCheckFMat(a.nrows, a.ncols, outmat) + Mat.nflops += a.length + var i = 0 + val dvar = data(0) + while (i < a.length) { + out.data(i) = dvar * a.data(i) + i += 1 + } + out + } else if (a.ncols == 1 && a.nrows == 1){ + val out = FMat.newOrCheckFMat(nrows, ncols, outmat) + Mat.nflops += length + var i = 0 + val dvar = a.data(0) + while (i < length) { + out.data(i) = dvar * data(i) + i += 1 + } + out + } else throw new RuntimeException("dimensions mismatch") + } + + def fSMultHelper(a:SMat, out:FMat, istart:Int, iend:Int, ioff:Int) = { + var i = istart + while (i < iend) { + var j = a.jc(i) - ioff + while (j < a.jc(i+1)-ioff) { + val dval = a.data(j) + val ival = a.ir(j) - ioff + if (Mat.noMKL || nrows < 220) { + var k = 0 + while (k < nrows) { + out.data(k+i*nrows) += data(k+ival*nrows)*dval + k += 1 + } + } else { + saxpyxx(nrows, dval, data, ival*nrows, out.data, i*nrows) + } + j += 1 + } + i += 1 + } + } + + def fSMultHelper2(a:SMat, out:FMat, istart:Int, iend:Int, ioff:Int) = { + var i = 0 + while (i < a.ncols) { + var j = a.jc(i) - ioff + while (j < a.jc(i+1)-ioff) { + val dval = a.data(j) + val ival = a.ir(j) - ioff + var k = istart + while (k < iend) { + out.data(k+i*nrows) += data(k+ival*nrows)*dval + k += 1 + } + j += 1 + } + i += 1 + } + } + + def fSMult(a:SMat, outmat:Mat):FMat = { + if (ncols != a.nrows) { + throw new RuntimeException("dimensions mismatch") + } else { + val out = FMat.newOrCheckFMat(nrows, a.ncols, outmat) + out.clear + Mat.nflops += 2L * nrows * a.nnz + val ioff = Mat.ioneBased; + if (Mat.noMKL || Mat.numThreads > 1) { + if (1L*nrows*a.nnz > 100000L && Mat.numThreads > 1) { + val done = IMat(1,Mat.numThreads) + for (ithread <- 0 until Mat.numThreads) { + val istart = ithread*a.ncols/Mat.numThreads + val iend = (ithread+1)*a.ncols/Mat.numThreads + actor { + fSMultHelper(a, out, istart, iend, ioff) + done(ithread) = 1 + } + } + while (SciFunctions.sum(done).v < Mat.numThreads) {Thread.`yield`()} + } else { + fSMultHelper(a, out, 0, a.ncols, ioff) + } + } else { + var jc0 = if (ioff == 0) SparseMat.incInds(a.jc) else a.jc + var ir0 = if (ioff == 0) SparseMat.incInds(a.ir) else a.ir + if (nrows == 1) { + scscmv("T", a.nrows, a.ncols, 1.0f, "GLNF", a.data, ir0, jc0, data, 0f, out.data) + } else { + smcscm(nrows, a.ncols, data, nrows, a.data, ir0, jc0, out.data, nrows) + } + } + out + } + } + + def multT(a:SMat, outmat:Mat):FMat = { + import edu.berkeley.bid.CBLAS._ + if (ncols == a.ncols) { + val out = FMat.newOrCheckFMat(nrows, a.nrows, outmat) + out.clear + smcsrm(nrows, a.ncols, data, nrows, a.data, a.ir, a.jc, out.data, nrows) + Mat.nflops += 2L * a.nnz * nrows + out + } else { + throw new RuntimeException("xT dimensions mismatch") + } + } + + def multT(a:FMat, outmat:Mat):FMat = { + import edu.berkeley.bid.CBLAS._ + if (ncols == a.ncols) { + val out = FMat.newOrCheckFMat(nrows, a.nrows, outmat) + sgemm(ORDER.ColMajor, TRANSPOSE.NoTrans, TRANSPOSE.Trans, + nrows, a.nrows, ncols, 1.0f, data, nrows, a.data, a.nrows, 0, out.data, nrows) + Mat.nflops += 2L * length * a.nrows + out + } else { + throw new RuntimeException("xT dimensions mismatch") + } + } + /* + * Column-based (Streaming) multiply + */ + + def DMult(aa:FMat, omat:Mat):FMat = + if (ncols == aa.nrows) { + val out = FMat.newOrCheckFMat(nrows, aa.ncols, omat) // Needs to be cleared + out.clear + for (i <- 0 until aa.ncols) + for (j <- 0 until aa.nrows) { + var k = 0 + val dval = aa.data(j + i*ncols) + while (k < nrows) { + out.data(k+i*nrows) += data(k+j*nrows)*dval + k += 1 + } + } + out + } else throw new RuntimeException("dimensions mismatch") + + /* + * Very slow, row-and-column multiply + */ + + def sDMult(aa:FMat, omat:Mat):FMat = + if (ncols == aa.nrows) { + val out = FMat.newOrCheckFMat(nrows, aa.ncols, omat) + for (i <- 0 until aa.ncols) + for (j <- 0 until nrows) { + var k = 0 + var sum = 0f + while (k < ncols) { + sum += data(j+k*nrows) * aa.data(k+i*aa.nrows) + k += 1 + } + out.data(j + i*out.nrows) = sum + } + out + } else throw new RuntimeException("dimensions mismatch"); + + def GPUmult(b:FMat, out:Mat) = GMat.GPUmult(this, b, out) + + def dot(a:FMat):Double = super.dot(a) + + override def dot(a:Mat):Double = super.dot(a.asInstanceOf[FMat]) + + def solvel(a0:Mat):FMat = + a0 match { + case a:FMat => { + Mat.nflops += 2L*a.nrows*a.nrows*a.nrows/3 + 2L*nrows*a.nrows*a.nrows + if (a.nrows != a.ncols || ncols != a.nrows) { + throw new RuntimeException("solve needs a square matrix") + } else { + val out = FMat(nrows, ncols) + val tmp = new Array[Float](ncols*ncols) + System.arraycopy(a.data, 0, tmp, 0, a.length) + System.arraycopy(data, 0, out.data, 0, length) + val ipiv = new Array[Int](ncols) + sgetrf(ORDER.RowMajor, ncols, ncols, tmp, ncols, ipiv) + sgetrs(ORDER.RowMajor, "N", ncols, nrows, tmp, ncols, ipiv, out.data, nrows) + out + } + } + case _ => throw new RuntimeException("unsupported arg to / "+a0) + } + + def solver(a0:Mat):FMat = + a0 match { + case a:FMat => { + Mat.nflops += 2L*nrows*nrows*nrows/3 + 2L*nrows*nrows*a.ncols + if (nrows != ncols || ncols != a.nrows) { + throw new RuntimeException("solve needs a square matrix") + } else { + val out = FMat(a.nrows, a.ncols) + val tmp = new Array[Float](ncols*ncols) + System.arraycopy(data, 0, tmp, 0, length) + System.arraycopy(a.data, 0, out.data, 0, a.length) + val ipiv = new Array[Int](ncols) + sgetrf(ORDER.ColMajor, ncols, ncols, tmp, ncols, ipiv) + sgetrs(ORDER.ColMajor, "N", ncols, a.ncols, tmp, nrows, ipiv, out.data, nrows) + out + } + } + case _ => throw new RuntimeException("unsupported arg to \\ "+a0) + } + + def inv:FMat = { + import edu.berkeley.bid.LAPACK._ + if (nrows != ncols) { + throw new RuntimeException("inv method needs a square matrix") + } else { + val out = FMat(nrows, ncols) + System.arraycopy(data, 0, out.data, 0, length) + val ipiv = new Array[Int](nrows) + sgetrf(ORDER.ColMajor, nrows, ncols, out.data, nrows, ipiv) + sgetri(ORDER.ColMajor, nrows, out.data, nrows, ipiv) + out + } + } + + override def clear = { + Arrays.fill(this.data,0,length,0) + this + } + + override def recycle(nr:Int, nc:Int, nnz:Int):FMat = { + if (nrows == nr && nc == ncols) { + this + } else if (data.size >= nr*nc) { + new FMat(nr, nc, data) + } else { + FMat(nr, nc) + } + } + + /* + * Basic operators on pairs of FMats. These are the compute routines. + */ + def xG (b :FMat) = GPUmult(b, null) + def + (b : FMat) = ffMatOpv(b, FMat.vecAdd _, null) + def - (b : FMat) = ffMatOpv(b, FMat.vecSub _, null) + def * (b : FMat) = fDMult(b, null) + def * (b : SMat) = fSMult(b, null) + def xT (b : SMat) = multT(b, null) + def xT (b : FMat) = multT(b, null) + def / (b : FMat) = solvel(b) + def \\ (b : FMat) = solver(b) + def *@ (b : FMat) = ffMatOpv(b, FMat.vecMul _, null) + def /@ (b : FMat) = ffMatOpv(b, FMat.fVecDiv _, null) + + override def * (b : Float) = fDMult(FMat.felem(b), null) + override def + (b : Float) = ffMatOpScalarv(b, FMat.vecAdd _, null) + override def - (b : Float) = ffMatOpScalarv(b, FMat.vecSub _, null) + override def *@ (b : Float) = ffMatOpScalarv(b, FMat.vecMul _, null) + override def /@ (b : Float) = ffMatOpScalarv(b, FMat.fVecDiv _, null) + + override def * (b : Int) = fDMult(FMat.felem(b), null) + override def + (b : Int) = ffMatOpScalarv(b, FMat.vecAdd _, null) + override def - (b : Int) = ffMatOpScalarv(b, FMat.vecSub _, null) + override def *@ (b : Int) = ffMatOpScalarv(b, FMat.vecMul _, null) + override def /@ (b : Int) = ffMatOpScalarv(b, FMat.fVecDiv _, null) + + override def * (b : Double) = fDMult(FMat.felem(b.asInstanceOf[Float]), null) + override def + (b : Double) = ffMatOpScalarv(b.asInstanceOf[Float], FMat.vecAdd _, null) + override def - (b : Double) = ffMatOpScalarv(b.asInstanceOf[Float], FMat.vecSub _, null) + override def *@ (b : Double) = ffMatOpScalarv(b.asInstanceOf[Float], FMat.vecMul _, null) + override def /@ (b : Double) = ffMatOpScalarv(b.asInstanceOf[Float], FMat.fVecDiv _, null) + + def > (b : FMat) = ffMatOp(b, (x:Float, y:Float) => if (x > y) 1f else 0f, null) + def < (b : FMat) = ffMatOp(b, (x:Float, y:Float) => if (x < y) 1f else 0f, null) + def == (b : FMat) = ffMatOp(b, (x:Float, y:Float) => if (x == y) 1f else 0f, null) + def === (b : FMat) = ffMatOp(b, (x:Float, y:Float) => if (x == y) 1f else 0f, null) + def >= (b : FMat) = ffMatOp(b, (x:Float, y:Float) => if (x >= y) 1f else 0f, null) + def <= (b : FMat) = ffMatOp(b, (x:Float, y:Float) => if (x <= y) 1f else 0f, null) + def != (b : FMat) = ffMatOp(b, (x:Float, y:Float) => if (x != y) 1f else 0f, null) + + override def > (b : Double) = ffMatOpScalar(b.asInstanceOf[Float], (x:Float, y:Float) => if (x > y) 1f else 0f, null) + override def < (b : Double) = ffMatOpScalar(b.asInstanceOf[Float], (x:Float, y:Float) => if (x < y) 1f else 0f, null) + override def == (b : Double) = ffMatOpScalar(b.asInstanceOf[Float], (x:Float, y:Float) => if (x == y) 1f else 0f, null) + override def === (b : Double) = ffMatOpScalar(b.asInstanceOf[Float], (x:Float, y:Float) => if (x == y) 1f else 0f, null) + override def >= (b : Double) = ffMatOpScalar(b.asInstanceOf[Float], (x:Float, y:Float) => if (x >= y) 1f else 0f, null) + override def <= (b : Double) = ffMatOpScalar(b.asInstanceOf[Float], (x:Float, y:Float) => if (x <= y) 1f else 0f, null) + override def != (b : Double) = ffMatOpScalar(b.asInstanceOf[Float], (x:Float, y:Float) => if (x != y) 1f else 0f, null) + + override def > (b : Int) = ffMatOpScalar(b, (x:Float, y:Float) => if (x > y) 1f else 0f, null) + override def < (b : Int) = ffMatOpScalar(b, (x:Float, y:Float) => if (x < y) 1f else 0f, null) + override def == (b : Int) = ffMatOpScalar(b, (x:Float, y:Float) => if (x == y) 1f else 0f, null) + override def === (b : Int) = ffMatOpScalar(b, (x:Float, y:Float) => if (x == y) 1f else 0f, null) + override def >= (b : Int) = ffMatOpScalar(b, (x:Float, y:Float) => if (x >= y) 1f else 0f, null) + override def <= (b : Int) = ffMatOpScalar(b, (x:Float, y:Float) => if (x <= y) 1f else 0f, null) + override def != (b : Int) = ffMatOpScalar(b, (x:Float, y:Float) => if (x != y) 1f else 0f, null) + + def \ (b: FMat) = horzcat(b) + def \ (b: Float) = horzcat(FMat.felem(b)) + + def on (b: FMat) = vertcat(b) + def on (b: Float) = vertcat(FMat.felem(b)) + + def ~ (b : FMat):FPair = new FPair(this, b) + def ~ (b : SMat):SPair = new SPair(this, b) + + override def ~ (b: Mat):Pair = + b match { + case db:FMat => new FPair(this, db) + case sb:SMat => new SPair(this, sb) + case _ => throw new RuntimeException("mismatched types for operator ~") + } + + /* + * Specialize to IMats to help the type system. + */ + def + (b : IMat):FMat = this + FMat(b) + def - (b : IMat):FMat = this - FMat(b) + def * (b : IMat):FMat = this * FMat(b) + def / (b : IMat):FMat = this / FMat(b) + def \\ (b : IMat):FMat = this \\ FMat(b) + def *@ (b : IMat):FMat = this *@ FMat(b) + def /@ (b : IMat):FMat = this /@ FMat(b) + def \ (b : IMat):FMat = this \ FMat(b) + def on (b : IMat):FMat = this on FMat(b) + + def > (b : IMat):FMat = this > FMat(b) + def < (b : IMat):FMat = this < FMat(b) + def >= (b : IMat):FMat = this >= FMat(b) + def <= (b : IMat):FMat = this <= FMat(b) + def == (b : IMat):FMat = this == FMat(b) + def === (b : IMat):FMat = this === FMat(b) + def != (b : IMat):FMat = this != FMat(b) + + /* + * Specialize to DMats to help the type system. + */ + def + (b : DMat):DMat = DMat(this) + b + def - (b : DMat):DMat = DMat(this) - b + def * (b : DMat):DMat = DMat(this) * b + def / (b : DMat):DMat = DMat(this) / b + def \\ (b : DMat):DMat = DMat(this) \\ b + def *@ (b : DMat):DMat = DMat(this) *@ b + def /@ (b : DMat):DMat = DMat(this) /@ b + def \ (b : DMat):DMat = DMat(this) \ b + def on (b : DMat):DMat = DMat(this) on b + + def > (b : DMat):DMat = DMat(this) > b + def < (b : DMat):DMat = DMat(this) < b + def >= (b : DMat):DMat = DMat(this) >= b + def <= (b : DMat):DMat = DMat(this) <= b + def == (b : DMat):DMat = DMat(this) == b + def === (b : DMat):DMat = DMat(this) === b + def != (b : DMat):DMat = DMat(this) != b + + /* + * Specialize to CMats to help the type system. + */ + def + (b : CMat):CMat = CMat(this) + b + def - (b : CMat):CMat = CMat(this) - b + def * (b : CMat):CMat = CMat(this) * b + def / (b : CMat):CMat = CMat(this) / b + def \\ (b : CMat):CMat = CMat(this) \\ b + def *@ (b : CMat):CMat = CMat(this) *@ b + def /@ (b : CMat):CMat = CMat(this) /@ b + def \ (b : CMat):CMat = CMat(this) \ b + def on (b : CMat):CMat = CMat(this) on b + + /* + * Operators whose second arg is generic. + */ + import Operator._ + override def + (b : Mat):Mat = applyMat(this, b, null, Mop_Plus) + override def - (b : Mat):Mat = applyMat(this, b, null, Mop_Minus) + override def * (b : Mat):Mat = applyMat(this, b, null, Mop_Times) + override def xT (b : Mat) = b match { + case bb:SMat => multT(bb, null) + case bb:FMat => multT(bb, null) + } + override def / (b : Mat):Mat = applyMat(this, b, null, Mop_Div) + override def \\ (b : Mat):Mat = applyMat(this, b, null, Mop_RSolve) + override def *@ (b : Mat):Mat = applyMat(this, b, null, Mop_ETimes) + override def /@ (b : Mat):Mat = applyMat(this, b, null, Mop_EDiv) + override def \ (b : Mat):Mat = applyMat(this, b, null, Mop_HCat) + override def on (b : Mat):Mat = applyMat(this, b, null, Mop_VCat) + + override def > (b : Mat):Mat = applyMat(this, b, null, Mop_GT) + override def < (b : Mat):Mat = applyMat(this, b, null, Mop_LT) + override def >= (b : Mat):Mat = applyMat(this, b, null, Mop_GE) + override def <= (b : Mat):Mat = applyMat(this, b, null, Mop_LE) + override def == (b : Mat):Mat = applyMat(this, b, null, Mop_EQ) + override def === (b : Mat):Mat = applyMat(this, b, null, Mop_EQ) + override def != (b : Mat):Mat = applyMat(this, b, null, Mop_NE) + +} + +class FPair(val omat:Mat, val mat:FMat) extends Pair { + + override def t:FMat = FMat(mat.gt(omat)) + + def xG (b :FMat) = mat.GPUmult(b, omat) + def * (b : FMat) = mat.fDMult(b, omat) + def * (b : SMat) = mat.fSMult(b, omat) + def xT (b : SMat) = mat.multT(b, omat) + def xT (b : FMat) = mat.multT(b, omat) + def + (b : FMat) = mat.ffMatOpv(b, FMat.vecAdd _, omat) + def - (b : FMat) = mat.ffMatOpv(b, FMat.vecSub _, omat) + def *@ (b : FMat) = mat.ffMatOpv(b, FMat.vecMul _, omat) + def /@ (b : FMat) = mat.ffMatOpv(b, FMat.fVecDiv _, omat) + def ^ (b : FMat) = mat.ffMatOp(b, (x:Float, y:Float) => math.pow(x,y).toFloat, omat) + + def > (b : FMat) = mat.ffMatOp(b, (x:Float, y:Float) => if (x > y) 1.0f else 0.0f, omat) + def < (b : FMat) = mat.ffMatOp(b, (x:Float, y:Float) => if (x < y) 1.0f else 0.0f, omat) + def == (b : FMat) = mat.ffMatOp(b, (x:Float, y:Float) => if (x == y) 1.0f else 0.0f, omat) + def === (b : FMat) = mat.ffMatOp(b, (x:Float, y:Float) => if (x == y) 1.0f else 0.0f, omat) + def >= (b : FMat) = mat.ffMatOp(b, (x:Float, y:Float) => if (x >= y) 1.0f else 0.0f, omat) + def <= (b : FMat) = mat.ffMatOp(b, (x:Float, y:Float) => if (x <= y) 1.0f else 0.0f, omat) + def != (b : FMat) = mat.ffMatOp(b, (x:Float, y:Float) => if (x != y) 1.0f else 0.0f, omat) + + override def * (b : Float) = mat.fDMult(FMat.felem(b), omat) + override def * (b : Double) = mat.fDMult(FMat.felem(b.asInstanceOf[Float]), omat) + override def + (b : Float) = mat.ffMatOpScalarv(b, FMat.vecAdd _, omat) + override def - (b : Float) = mat.ffMatOpScalarv(b, FMat.vecSub _, omat) + override def *@ (b : Float) = mat.ffMatOpScalarv(b, FMat.vecMul _, omat) + override def /@ (b : Float) = mat.ffMatOpScalarv(b, FMat.fVecDiv _, omat) + override def ^ (b : Float) = mat.ffMatOpScalar(b, (x:Float, y:Float) => math.pow(x,y).toFloat, omat) + + override def > (b : Float) = mat.ffMatOpScalar(b, (x:Float, y:Float) => if (x > y) 1.0f else 0.0f, omat) + override def < (b : Float) = mat.ffMatOpScalar(b, (x:Float, y:Float) => if (x < y) 1.0f else 0.0f, omat) + override def == (b : Float) = mat.ffMatOpScalar(b, (x:Float, y:Float) => if (x == y) 1.0f else 0.0f, omat) + override def >= (b : Float) = mat.ffMatOpScalar(b, (x:Float, y:Float) => if (x >= y) 1.0f else 0.0f, omat) + override def <= (b : Float) = mat.ffMatOpScalar(b, (x:Float, y:Float) => if (x <= y) 1.0f else 0.0f, omat) + override def != (b : Float) = mat.ffMatOpScalar(b, (x:Float, y:Float) => if (x != y) 1.0f else 0.0f, omat) + + override def * (b : Int) = mat.fDMult(FMat.felem(b), omat) + override def + (b : Int) = mat.ffMatOpScalarv(b, FMat.vecAdd _, omat) + override def - (b : Int) = mat.ffMatOpScalarv(b, FMat.vecSub _, omat) + override def *@ (b : Int) = mat.ffMatOpScalarv(b, FMat.vecMul _, omat) + override def /@ (b : Int) = mat.ffMatOpScalarv(b, FMat.fVecDiv _, omat) + override def ^ (b : Int) = mat.ffMatOpScalar(b, (x:Float, y:Float) => math.pow(x,y).toFloat, omat) + + override def > (b : Int) = mat.ffMatOpScalar(b, (x:Float, y:Float) => if (x > y) 1.0f else 0.0f, omat) + override def < (b : Int) = mat.ffMatOpScalar(b, (x:Float, y:Float) => if (x < y) 1.0f else 0.0f, omat) + override def == (b : Int) = mat.ffMatOpScalar(b, (x:Float, y:Float) => if (x == y) 1.0f else 0.0f, omat) + override def >= (b : Int) = mat.ffMatOpScalar(b, (x:Float, y:Float) => if (x >= y) 1.0f else 0.0f, omat) + override def <= (b : Int) = mat.ffMatOpScalar(b, (x:Float, y:Float) => if (x <= y) 1.0f else 0.0f, omat) + override def != (b : Int) = mat.ffMatOpScalar(b, (x:Float, y:Float) => if (x != y) 1.0f else 0.0f, omat) + + import Operator._ + override def + (b : Mat):Mat = applyMat(mat, b, omat, Mop_Plus) + override def - (b : Mat):Mat = applyMat(mat, b, omat, Mop_Minus) + override def * (b : Mat):Mat = applyMat(mat, b, omat, Mop_Times) + override def xT (b : Mat) = b match { + case bb:SMat => mat.multT(bb, omat) + case bb:FMat => mat.multT(bb, omat) + } + override def / (b : Mat):Mat = applyMat(mat, b, omat, Mop_Div) + override def \\ (b : Mat):Mat = applyMat(mat, b, omat, Mop_RSolve) + override def *@ (b : Mat):Mat = applyMat(mat, b, omat, Mop_ETimes) + override def /@ (b : Mat):Mat = applyMat(mat, b, omat, Mop_EDiv) + override def \ (b : Mat):Mat = applyMat(mat, b, omat, Mop_HCat) + override def on (b : Mat):Mat = applyMat(mat, b, omat, Mop_VCat) + + override def > (b : Mat):Mat = applyMat(mat, b, omat, Mop_GT) + override def < (b : Mat):Mat = applyMat(mat, b, omat, Mop_LT) + override def >= (b : Mat):Mat = applyMat(mat, b, omat, Mop_GE) + override def <= (b : Mat):Mat = applyMat(mat, b, omat, Mop_LE) + override def == (b : Mat):Mat = applyMat(mat, b, omat, Mop_EQ) + override def === (b : Mat):Mat = applyMat(mat, b, omat, Mop_EQ) + override def != (b : Mat):Mat = applyMat(mat, b, omat, Mop_NE) +} + +object FMat { + + def fVecDiv(a:Array[Float], a0:Int, ainc:Int, b:Array[Float], b0:Int, binc:Int, c:Array[Float], c0:Int, cinc:Int, n:Int):Float = { + var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n + while (ci < cend) { + c(ci) = a(ai) / b(bi); ai += ainc; bi += binc; ci += cinc + } + 0f + } + + def apply(nr:Int, nc:Int) = new FMat(nr, nc, new Array[Float](nr*nc)) + + def apply(a:DenseMat[Float]):FMat = new FMat(a.nrows, a.ncols, a.data) + + def apply(x:Mat):FMat = { + var out:FMat = null + x match { + case dd:DMat => {out = FMat(x.nrows, x.ncols); Mat.copyToFloatArray(dd.data, 0, out.data, 0, dd.length)} + case ff:FMat => {out = FMat(x.nrows, x.ncols); System.arraycopy(ff.data, 0, out.data, 0, ff.length)} + case ii:IMat => {out = FMat(x.nrows, x.ncols); Mat.copyToFloatArray(ii.data, 0, out.data, 0, ii.length)} + case ss:SMat => out = FMat(ss.full) + case gg:GMat => out = gg.toFMat + case _ => throw new RuntimeException("Unsupported source type") + } + out + } + + def vecAdd(a:Array[Float], a0:Int, ainc:Int, b:Array[Float], b0:Int, binc:Int, c:Array[Float], c0:Int, cinc:Int, n:Int):Float = { + var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n + while (ci < cend) { + c(ci) = a(ai) + b(bi); ai += ainc; bi += binc; ci += cinc + } + 0 + } + + def vecSub(a:Array[Float], a0:Int, ainc:Int, b:Array[Float], b0:Int, binc:Int, c:Array[Float], c0:Int, cinc:Int, n:Int):Float = { + var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n + while (ci < cend) { + c(ci) = a(ai) - b(bi); ai += ainc; bi += binc; ci += cinc + } + 0 + } + + def vecMul(a:Array[Float], a0:Int, ainc:Int, b:Array[Float], b0:Int, binc:Int, c:Array[Float], c0:Int, cinc:Int, n:Int):Float = { + var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n + while (ci < cend) { + c(ci) = a(ai) * b(bi); ai += ainc; bi += binc; ci += cinc + } + 0 + } + + def vecMax(a:Array[Float], a0:Int, ainc:Int, b:Array[Float], b0:Int, binc:Int, c:Array[Float], c0:Int, cinc:Int, n:Int):Float = { + var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n + while (ci < cend) { + c(ci) = math.max(a(ai), b(bi)); ai += ainc; bi += binc; ci += cinc + } + 0 + } + + def vecMin(a:Array[Float], a0:Int, ainc:Int, b:Array[Float], b0:Int, binc:Int, c:Array[Float], c0:Int, cinc:Int, n:Int):Float = { + var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n + while (ci < cend) { + c(ci) = math.min(a(ai), b(bi)); ai += ainc; bi += binc; ci += cinc + } + 0 + } + + def felem(x:Float) = { + val out = FMat(1,1) + out.data(0) = x + out + } + + def newOrCheckFMat(nr:Int, nc:Int, outmat:Mat):FMat = { + if (outmat.asInstanceOf[AnyRef] == null || (outmat.nrows == 0 && outmat.ncols == 0)) { + FMat(nr, nc) + } else { + if (outmat.nrows != nr || outmat.ncols != nc) { + outmat.recycle(nr, nc, 0).asInstanceOf[FMat] + } else { + outmat.asInstanceOf[FMat] + } + } + } +} + + + + + + diff --git a/src/main/scala/BIDMat/GIMat.scala b/src/main/scala/BIDMat/GIMat.scala new file mode 100755 index 00000000..cdf1eb18 --- /dev/null +++ b/src/main/scala/BIDMat/GIMat.scala @@ -0,0 +1,120 @@ +package BIDMat +import jcuda._; +import jcuda.jcublas.JCublas; +import jcuda.runtime.JCuda; +import edu.berkeley.bid.CUMAT; + +class GIMat(nr:Int, nc:Int, val data:Pointer, val realsize:Int) extends Mat(nr, nc) { + + override def toString:String = { + val nr = scala.math.min(nrows,10) + val nc = scala.math.min(ncols,50) + val tmpMat = IMat(nr, nc) + JCublas.cublasGetMatrix(nr, nc, Sizeof.INT, data, nrows, Pointer.to(tmpMat.data), nr) + tmpMat.toString + } + + override def mytype = "GIMat" + + def GIop(a:GIMat, oldmat:GIMat, op:Int):GIMat = { + if ((nrows == a.nrows && ncols == a.ncols) || + (nrows == a.nrows && (a.ncols == 1 || ncols == 1)) || + (ncols == a.ncols && (a.nrows == 1 || nrows == 1)) || + (a.ncols == 1 && a.nrows == 1) || + (ncols == 1 && nrows == 1)) { + val out = GIMat.newOrCheckGIMat(nrows, a.ncols, oldmat) + Mat.nflops += scala.math.max(length, a.length) + CUMAT.applyiop(data, nrows, ncols, a.data, a.nrows, a.ncols, out.data, op) + JCuda.cudaDeviceSynchronize() + out + } else throw new RuntimeException("dimensions mismatch") + } + + def toIMat():IMat = { + val out = IMat(nrows, ncols) + JCublas.cublasGetVector(nrows*ncols, Sizeof.INT, data, 1, Pointer.to(out.data), 1); + out + } + + def free() = { + JCublas.cublasFree(data) + } + + def + (a : GIMat) = GIop(a, null, 0) + def - (a : GIMat) = GIop(a, null, 1) + def *@ (a : GIMat) = GIop(a, null, 2) + def /@ (a : GIMat) = GIop(a, null, 3) + def > (b : GIMat) = GIop(b, null, 4) + def < (b : GIMat) = GIop(b, null, 5) + def == (b : GIMat) = GIop(b, null, 6) + def === (b : GIMat) = GIop(b, null, 6) + def >= (b : GIMat) = GIop(b, null, 7) + def <= (b : GIMat) = GIop(b, null, 8) + def != (b : GIMat) = GIop(b, null, 9) + + def ~ (b: GIMat) = new GIPair(this, b) + + override def recycle(nr:Int, nc:Int, nnz:Int):GIMat = { + if (nrows == nr && nc == ncols) { + this + } else if (realsize >= nr*nc) { + new GIMat(nr, nc, data, realsize) + } else { + free + GIMat(nr, nc) + } + } +} + +class GIPair (val omat:GIMat, val mat:GIMat){ + + def + (a : GIMat) = mat.GIop(a, omat, 0) + def - (a : GIMat) = mat.GIop(a, omat, 1) + def *@ (a : GIMat) = mat.GIop(a, omat, 2) + def /@ (a : GIMat) = mat.GIop(a, omat, 3) + def > (b : GIMat) = mat.GIop(b, omat, 4) + def < (b : GIMat) = mat.GIop(b, omat, 5) + def == (b : GIMat) = mat.GIop(b, omat, 6) + def === (b : GIMat) = mat.GIop(b, omat, 6) + def >= (b : GIMat) = mat.GIop(b, omat, 7) + def <= (b : GIMat) = mat.GIop(b, omat, 8) + def != (b : GIMat) = mat.GIop(b, omat, 9) +} + + +object GIMat { + + def apply(nr:Int, nc:Int):GIMat = { + val retv = new GIMat(nr, nc, new Pointer(), nr*nc) + JCublas.cublasAlloc(nr*nc, Sizeof.INT, retv.data) + retv + } + + def apply(a:IMat):GIMat = { + val retv = new GIMat(a.nrows, a.ncols, new Pointer(), a.length) + val rsize = a.nrows*a.ncols + JCublas.cublasAlloc(rsize, Sizeof.INT, retv.data) + JCublas.cublasSetVector(rsize, Sizeof.INT, Pointer.to(a.data), 1, retv.data, 1); + retv + } + + def newOrCheckGIMat(nr:Int, nc:Int, oldmat:GIMat):GIMat = { + if (oldmat.asInstanceOf[AnyRef] == null) { + GIMat(nr, nc) + } else { + if (oldmat.nrows != nr || oldmat.ncols != nc) { + oldmat.recycle(nr, nc, 0) + } else { + oldmat + } + } + } +} + + + + + + + + diff --git a/src/main/scala/BIDMat/GMat.scala b/src/main/scala/BIDMat/GMat.scala new file mode 100755 index 00000000..0daab9a8 --- /dev/null +++ b/src/main/scala/BIDMat/GMat.scala @@ -0,0 +1,545 @@ +package BIDMat +import jcuda._ +import jcuda.jcublas._ +import jcuda.jcublas.JCublas._ +import jcuda.runtime.JCuda._ +import jcuda.runtime._ +import scala.actors.Actor._ +import edu.berkeley.bid.CUMAT + + +class GMat(nr:Int, nc:Int, val data:Pointer, val realsize:Int) extends Mat(nr, nc) { + + override def dv:Double = + if (nrows > 1 || ncols > 1) { + throw new RuntimeException("Matrix should be 1x1 to extract value") + } else { + toFMat.data(0) + } + + override def mytype = "GMat" + + override def nnz = length + + override def clear = { + cudaMemset(data, 0, Sizeof.FLOAT*length) + cudaDeviceSynchronize + this + } + + override def t = { + val out = GMat(ncols, nrows) + CUMAT.transpose(this.data, nrows, out.data, ncols, nrows, ncols) + cudaDeviceSynchronize() + out + } + + override def set(v:Float):GMat = { + val a = MatFunctions.row(v) + JCublas.cublasSetVector(length, Sizeof.FLOAT, Pointer.to(a.data), 0, data, 1); + cudaDeviceSynchronize() + this + } + + + override def toString:String = { + val nr = scala.math.min(nrows,10) + val nc = scala.math.min(ncols,50) + val tmpMat = FMat(nr, nc) + cublasGetMatrix(nr, nc, Sizeof.FLOAT, data, nrows, Pointer.to(tmpMat.data), nr) + cudaDeviceSynchronize() + tmpMat.toString + } + + override def zeros(nr:Int, nc:Int) = GMat.gzeros(nr, nc) + + override def ones(nt:Int, nc:Int) = GMat.gones(nr, nc) + + def GMult(a:GMat, oldmat:Mat):GMat = { + if (ncols == a.nrows) { + val out = GMat.newOrCheckGMat(nrows, a.ncols, oldmat) + Mat.nflops += 2L * length * a.ncols + cublasSgemm('n', 'n', nrows, a.ncols, ncols, 1.0f, data, nrows, a.data, a.nrows, 0f, out.data, nrows) + cudaDeviceSynchronize() + if (cublasGetError != 0) { + println("device is %d" format SciFunctions.device) + throw new RuntimeException("Cublas error in * "+cublasGetError) + } + out + } else if (ncols == 1 && nrows == 1) { + val out = GMat.newOrCheckGMat(a.nrows, a.ncols, oldmat) + Mat.nflops += 1L * a.length + out.clear + cublasSaxpy(a.length, this.dv.asInstanceOf[Float], a.data, 1, out.data, 1) + cudaDeviceSynchronize() + out + } else if (a.ncols == 1 && a.nrows == 1) { + val out = GMat.newOrCheckGMat(nrows, ncols, oldmat) + Mat.nflops += 1L * length + out.clear + cublasSaxpy(length, a.dv.asInstanceOf[Float], data, 1, out.data, 1) + cudaDeviceSynchronize() + out + } else throw new RuntimeException("dimensions mismatch") + } + + def GMultT(a:GMat, oldmat:Mat):GMat = { + if (ncols == a.ncols) { + val out = GMat.newOrCheckGMat(nrows, a.nrows, oldmat) + Mat.nflops += 2L * length * a.nrows + cublasSgemm('n', 't', nrows, a.nrows, ncols, 1.0f, data, nrows, a.data, a.nrows, 0f, out.data, nrows) + cudaDeviceSynchronize() + val ee = cublasGetError + if (ee != 0) { + println("device is %d" format SciFunctions.device) + throw new RuntimeException("Cublas error in xT "+ee) + } + out + } else throw new RuntimeException("dimensions mismatch") + } + + def GSMult(a:GSMat, oldmat:Mat):GMat = { + if (ncols == a.nrows) { + val out = GMat.newOrCheckGMat(nrows, a.ncols, oldmat) + Mat.nflops += 2L * nrows * a.nnz + out.clear + CUMAT.dsmult(nrows, ncols, a.nnz, data, a.data, a.ir, a.ic, out.data) + cudaDeviceSynchronize() + out + } else throw new RuntimeException("dimensions mismatch") + } + + def GSMultT(a:GSMat, oldmat:Mat):GMat = { + if (ncols == a.ncols) { + val out = GMat.newOrCheckGMat(nrows, a.nrows, oldmat) + Mat.nflops += 2L * nrows * a.nnz + out.clear + CUMAT.dsmultT(nrows, ncols, a.nnz, data, a.data, a.ir, a.ic, out.data) + cudaDeviceSynchronize() + out + } else throw new RuntimeException("dimensions mismatch") + } + + def gOp(a:GMat, oldmat:Mat, op:Int):GMat = { + if ((nrows == a.nrows && ncols == a.ncols) || + (nrows == a.nrows && (a.ncols == 1 || ncols == 1)) || + (ncols == a.ncols && (a.nrows == 1 || nrows == 1)) || + (a.ncols == 1 && a.nrows == 1) || + (ncols == 1 && nrows == 1)) { + val out = GMat.newOrCheckGMat(math.max(nrows, a.nrows), math.max(ncols, a.ncols), oldmat) + Mat.nflops += scala.math.max(length, a.length) + CUMAT.applyop(data, nrows, ncols, a.data, a.nrows, a.ncols, out.data, op) + cudaDeviceSynchronize() + out + } else throw new RuntimeException("dimensions mismatch") + } + + def dot (a : GMat):Double = + if (nrows != a.nrows || ncols != a.ncols) { + throw new RuntimeException("dot dims not compatible") + } else { + cublasSdot(length, data, 1, a.data, 1) + } + + override def dot (a : Mat):Double = + if (nrows != a.nrows || ncols != a.ncols) { + throw new RuntimeException("dot dims not compatible") + } else { + a match { + case aa:GMat => cublasSdot(length, data, 1, aa.data, 1) + } + } + + def reduceOp(oldmat:Mat, dir:Int, op:Int):GMat = { + if (dir == 1 || (dir == 0 && nrows > 1)) { + val out = GMat.newOrCheckGMat(1, ncols, oldmat) + out.clear + CUMAT.reduce1op(nrows, ncols, data, out.data, op) + Mat.nflops += length + cudaDeviceSynchronize() + out + } else if (dir == 2 || dir == 0) { + val out = GMat.newOrCheckGMat(nrows, 1, oldmat) + out.clear + CUMAT.reduce2op(nrows, ncols, data, out.data, op) + Mat.nflops += length + cudaDeviceSynchronize() + out + } else { + throw new RuntimeException("dimension must be 1 or 2") + } + } + + def toFMat():FMat = { + val out = FMat(nrows, ncols) + cublasGetVector(nrows*ncols, Sizeof.FLOAT, data, 1, Pointer.to(out.data), 1) + cudaDeviceSynchronize() + out + } + + def copyTo(out:FMat):FMat = { + val a = out.recycle(nrows, ncols, 0) + cublasGetVector(nrows*ncols, Sizeof.FLOAT, data, 1, Pointer.to(a.data), 1) + cudaDeviceSynchronize() + a + } + + def copyFrom(in:FMat):GMat = { + cublasSetVector(nrows*ncols, Sizeof.FLOAT, Pointer.to(in.data), 1, data, 1) + cudaDeviceSynchronize() + this + } + + def copyTo(out:GMat):GMat = { + val a = out.recycle(nrows, ncols, 0) + cudaMemcpy(a.data, data, length*Sizeof.FLOAT, cudaMemcpyKind.cudaMemcpyDeviceToDevice) + cudaDeviceSynchronize() + a + } + + override def copyTo(out:Mat):Mat = { + out match { + case a:FMat => copyTo(a) + case a:GMat => copyTo(a) + } + } + + def free() = { + JCublas.cublasFree(data) + } + + import GMat.BinOp._ + def * (a : GMat) = GMult(a, null) + def * (a : GSMat) = GSMult(a, null) + def xT (a : GMat) = GMultT(a, null) + def xT (a : GSMat) = GSMultT(a, null) + def + (a : GMat) = gOp(a, null, op_add) + def - (a : GMat) = gOp(a, null, op_sub) + def *@ (a : GMat) = gOp(a, null, op_mul) + def /@ (a : GMat) = gOp(a, null, op_div) + + def > (b : GMat) = gOp(b, null, op_gt) + def < (b : GMat) = gOp(b, null, op_lt) + def == (b : GMat) = gOp(b, null, op_eq) + def === (b : GMat) = gOp(b, null, op_eq) + def >= (b : GMat) = gOp(b, null, op_ge) + def <= (b : GMat) = gOp(b, null, op_le) + def != (b : GMat) = gOp(b, null, op_ne) + + override def + (b : Float):Mat = gOp(GMat(b), null, op_add) + override def - (b : Float):Mat = gOp(GMat(b), null, op_sub) + override def *@ (b : Float):Mat = gOp(GMat(b), null, op_mul) + override def /@ (b : Float):Mat = gOp(GMat(b), null, op_div) + + override def > (b : Float) = gOp(GMat(b), null, op_gt) + override def < (b : Float) = gOp(GMat(b), null, op_lt) + override def == (b : Float) = gOp(GMat(b), null, op_eq) + override def === (b : Float) = gOp(GMat(b), null, op_eq) + override def >= (b : Float) = gOp(GMat(b), null, op_ge) + override def <= (b : Float) = gOp(GMat(b), null, op_le) + override def != (b : Float) = gOp(GMat(b), null, op_ne) + + def ~ (b: GMat) = new GPair(this, b) + def ~ (b: GSMat) = new GSPair(this, b) + override def ~ (b: Mat):Pair = b match { + case bb:GMat => new GPair(this, bb) + case bb:GSMat => new GSPair(this, bb) + } + + import Operator._ + override def + (b : Mat):Mat = applyMat(this, b, null, Mop_Plus) + override def - (b : Mat):Mat = applyMat(this, b, null, Mop_Minus) + override def * (b : Mat):Mat = applyMat(this, b, null, Mop_Times) + override def * (b : Float):Mat = applyMat(this, GMat(FMat.felem(b)), null, Mop_Times) + override def * (b : Int):Mat = applyMat(this, GMat(FMat.felem(b)), null, Mop_Times) + override def * (b : Double):Mat = applyMat(this, GMat(FMat.felem(b.asInstanceOf[Float])), null, Mop_Times) + override def xT (b : Mat) = b match { + case bb:GSMat => GSMultT(bb, null) + case bb:GMat => GMultT(bb, null) + } + override def / (b : Mat):Mat = applyMat(this, b, null, Mop_Div) + override def \\ (b : Mat):Mat = applyMat(this, b, null, Mop_RSolve) + override def *@ (b : Mat):Mat = applyMat(this, b, null, Mop_ETimes) + override def /@ (b : Mat):Mat = applyMat(this, b, null, Mop_EDiv) + override def \ (b : Mat):Mat = applyMat(this, b, null, Mop_HCat) + override def on (b : Mat):Mat = applyMat(this, b, null, Mop_VCat) + + override def > (b : Mat):Mat = applyMat(this, b, null, Mop_GT) + override def < (b : Mat):Mat = applyMat(this, b, null, Mop_LT) + override def >= (b : Mat):Mat = applyMat(this, b, null, Mop_GE) + override def <= (b : Mat):Mat = applyMat(this, b, null, Mop_LE) + override def == (b : Mat):Mat = applyMat(this, b, null, Mop_EQ) + override def === (b : Mat):Mat = applyMat(this, b, null, Mop_EQ) + override def != (b : Mat):Mat = applyMat(this, b, null, Mop_NE) + + override def recycle(nr:Int, nc:Int, nnz:Int):GMat = { + if (nrows == nr && nc == ncols) { + this + } else if (realsize >= nr*nc) { + new GMat(nr, nc, data, realsize) + } else { + free + GMat(nr, nc) + } + } +} + +class GPair(val omat:Mat, val mat:GMat) extends Pair{ + import GMat.BinOp._ + + override def t = { + val out = GMat.newOrCheckGMat(mat.ncols, mat.nrows, omat) + CUMAT.transpose(mat.data, mat.nrows, out.data, mat.ncols, mat.nrows, mat.ncols) + out + } + + def + (a : GMat) = mat.gOp(a, omat, op_add) + def - (a : GMat) = mat.gOp(a, omat, op_sub) + def *@ (a : GMat) = mat.gOp(a, omat, op_mul) + def /@ (a : GMat) = mat.gOp(a, omat, op_div) + def > (b : GMat) = mat.gOp(b, omat, op_gt) + def < (b : GMat) = mat.gOp(b, omat, op_lt) + def == (b : GMat) = mat.gOp(b, omat, op_eq) + def === (b : GMat) = mat.gOp(b, omat, op_eq) + def >= (b : GMat) = mat.gOp(b, omat, op_ge) + def <= (b : GMat) = mat.gOp(b, omat, op_le) + def != (b : GMat) = mat.gOp(b, omat, op_ne) + + override def + (b : Float):Mat = mat.gOp(GMat(b), omat, op_add) + override def - (b : Float):Mat = mat.gOp(GMat(b), omat, op_sub) + override def *@ (b : Float):Mat = mat.gOp(GMat(b), omat, op_mul) + override def * (b : Float):Mat = mat.gOp(GMat(b), omat, op_mul) + override def /@ (b : Float):Mat = mat.gOp(GMat(b), omat, op_div) + + override def > (b : Float) = mat.gOp(GMat(b), omat, op_gt) + override def < (b : Float) = mat.gOp(GMat(b), omat, op_lt) + override def == (b : Float) = mat.gOp(GMat(b), omat, op_eq) + override def === (b : Float) = mat.gOp(GMat(b), omat, op_eq) + override def >= (b : Float) = mat.gOp(GMat(b), omat, op_ge) + override def <= (b : Float) = mat.gOp(GMat(b), omat, op_le) + override def != (b : Float) = mat.gOp(GMat(b), omat, op_ne) + + def * (a : GMat) = mat.GMult(a, omat) + def * (a : GSMat) = mat.GSMult(a, omat) + + override def * (b: Mat):Mat = b match { + case bb:GMat => mat.GMult(bb, omat) + case bb:GSMat => mat.GSMult(bb, omat) + } + + def xT (a : GSMat) = mat.GSMultT(a, omat) + def xT (a : GMat) = mat.GMultT(a, omat) + override def xT (b: Mat):Mat = b match { + case bb:GSMat => mat.GSMultT(bb, omat) + case bb:GMat => mat.GMultT(bb, omat) + } + + import Operator._ + override def + (b : Mat):Mat = applyMat(mat, b, omat, Mop_Plus) + override def - (b : Mat):Mat = applyMat(mat, b, omat, Mop_Minus) + override def / (b : Mat):Mat = applyMat(mat, b, omat, Mop_Div) + override def \\ (b : Mat):Mat = applyMat(mat, b, omat, Mop_RSolve) + override def *@ (b : Mat):Mat = applyMat(mat, b, omat, Mop_ETimes) + override def /@ (b : Mat):Mat = applyMat(mat, b, omat, Mop_EDiv) + override def \ (b : Mat):Mat = applyMat(mat, b, omat, Mop_HCat) + override def on (b : Mat):Mat = applyMat(mat, b, omat, Mop_VCat) + + override def > (b : Mat):Mat = applyMat(mat, b, omat, Mop_GT) + override def < (b : Mat):Mat = applyMat(mat, b, omat, Mop_LT) + override def >= (b : Mat):Mat = applyMat(mat, b, omat, Mop_GE) + override def <= (b : Mat):Mat = applyMat(mat, b, omat, Mop_LE) + override def == (b : Mat):Mat = applyMat(mat, b, omat, Mop_EQ) + override def === (b : Mat):Mat = applyMat(mat, b, omat, Mop_EQ) + override def != (b : Mat):Mat = applyMat(mat, b, omat, Mop_NE) + +} + + +object GMat { + + object BinOp { + val op_add=0 + val op_sub=1 + val op_mul=2 + val op_div=3 + val op_gt=4 + val op_lt=5 + val op_eq=6 + val op_ge=7 + val op_le=8 + val op_ne=9 + val op_max=10 + val op_min=11 + } + + object TransF { + val abs=0 + val exp=1 + val expm1=2 + val sqrt=3 + val ln=4 + val log10=5 + val log1p=6 + val cos=7 + val sin=8 + val tan=9 + val cosh=10 + val sinh=11 + val tanh=12 + val acos=13 + val asin=14 + val atan=15 + val acosh=16 + val asinh=17 + val atanh=18 + val erf=19 + val erfinv=20 + val erfc=21 + val erfcinv=22 + val gammaln=23 + val gamma=24 + val ceil=25 + val floor=26 + val round=27 + val trunc=28 + val sign=29 + val exppsi=34 + } + + object TransF2 { + val atan2=0 + val pow=1 + } + + def gzeros(nr:Int, nc:Int) = { + val out = GMat(nr, nc) + cudaMemset(out.data, 0, Sizeof.FLOAT*out.length) + cudaDeviceSynchronize() + out + } + + def gones(nr:Int, nc:Int) = { + val out = GMat(nr, nc) + val one = GMat(FMat.felem(1)) + cublasScopy(out.length, one.data, 0, out.data, 1) + cudaDeviceSynchronize() + out + } + + def apply(nr:Int, nc:Int):GMat = { +// println("nr, nc = %d,%d" format (nr,nc)) + val retv = new GMat(nr, nc, new Pointer(), nr*nc) + val status = cublasAlloc(nr*nc, Sizeof.FLOAT, retv.data) + if (status != cublasStatus.CUBLAS_STATUS_SUCCESS) throw new RuntimeException("CUDA alloc failed "+status) + retv + } + + def toFMat(a:GMat):FMat = a.toFMat() + + def apply(a:FMat):GMat = { + val rsize = a.nrows*a.ncols + val retv = GMat(a.nrows, a.ncols) + JCublas.cublasSetVector(rsize, Sizeof.FLOAT, Pointer.to(a.data), 1, retv.data, 1); + cudaDeviceSynchronize() + retv + } + + def apply(a:Mat):GMat = a match { + case aa:GMat => aa + case aa:FMat => GMat(aa) + case aa:DMat => GMat(FMat(aa)) + } + + def apply(a:Float):GMat = { + GMat(FMat.felem(a)) + } + + def fromFMat(a:FMat, b:GMat):GMat = { + val bb = b.recycle(a.nrows, a.ncols, 0) + JCublas.cublasSetVector(a.length, Sizeof.FLOAT, Pointer.to(a.data), 1, bb.data, 1) + cudaDeviceSynchronize() + bb + } + + def DDS(A:GMat, B:GMat, C:GSMat, oldmat:Mat):GSMat = { + if (A.nrows != B.nrows || C.nrows != A.ncols || C.ncols != B.ncols) { + throw new RuntimeException("dimensions mismatch") + } + val out = GSMat.newOrCheckGSMat(C, oldmat) + cudaMemcpy(out.ir, C.ir, Sizeof.INT * C.nnz, cudaMemcpyKind.cudaMemcpyDeviceToDevice) + cudaMemcpy(out.ic, C.ic, Sizeof.INT * C.nnz, cudaMemcpyKind.cudaMemcpyDeviceToDevice) + CUMAT.dds(A.nrows, C.nnz, A.data, B.data, C.ir, C.ic, out.data) + cudaDeviceSynchronize() + Mat.nflops += 2L * C.nnz * A.nrows + out + } + + def GPUmult(a:FMat, b:FMat, omat:Mat):FMat = + if (a.ncols != b.nrows) { + throw new RuntimeException("dimensions mismatch in xG") + } else { + val out = FMat.newOrCheckFMat(a.nrows, b.ncols, omat) + val nthreads = Mat.hasCUDA + val done = IMat(nthreads,1) + val nncols = b.ncols/nthreads + for (i <- 0 until nthreads) { + actor { + if (SciFunctions.device(i) == 0) { + val aa = new Pointer + var status = cublasAlloc(a.nrows*a.ncols, Sizeof.FLOAT, aa) + if (status != cublasStatus.CUBLAS_STATUS_SUCCESS) throw new RuntimeException("CUDA alloc failed "+status) + val bb = new Pointer + status = cublasAlloc(b.nrows*nncols, Sizeof.FLOAT, bb) + if (status != cublasStatus.CUBLAS_STATUS_SUCCESS) throw new RuntimeException("CUDA alloc failed "+status) + val cc = new Pointer + status = cublasAlloc(a.nrows*nncols, Sizeof.FLOAT, cc) + if (status != cublasStatus.CUBLAS_STATUS_SUCCESS) throw new RuntimeException("CUDA alloc failed "+status) + status = cublasSetVector(a.nrows*a.ncols, Sizeof.FLOAT, Pointer.to(a.data), 1, aa, 1) + cudaDeviceSynchronize + if (status != cublasStatus.CUBLAS_STATUS_SUCCESS) throw new RuntimeException("CUDA copy a failed "+status) + status = cublasSetVector(b.nrows*nncols, Sizeof.FLOAT, Pointer.to(b.data).withByteOffset(Sizeof.FLOAT*i*b.nrows*nncols), 1, bb, 1) + cudaDeviceSynchronize + if (status != cublasStatus.CUBLAS_STATUS_SUCCESS) throw new RuntimeException("CUDA copy b failed "+status) + cublasSgemm('n', 'n', a.nrows, nncols, a.ncols, 1.0f, aa, a.nrows, bb, b.nrows, 0f, cc, a.nrows) + cudaDeviceSynchronize + val err = cublasGetError + if (err != 0) throw new RuntimeException("Cublas error in xG, sgemm "+err) + status = cublasGetVector(a.nrows*nncols, Sizeof.FLOAT, cc, 1, Pointer.to(out.data).withByteOffset(Sizeof.FLOAT*i*a.nrows*nncols), 1) + cudaDeviceSynchronize + if (status != cublasStatus.CUBLAS_STATUS_SUCCESS) throw new RuntimeException("CUDA copy c failed "+status) + cublasFree(cc) + cublasFree(bb) + cublasFree(aa) + } else { + done(i) = 1 + throw new RuntimeException("Couldnt set device "+i) + } + done(i) = 1 + } + } + while (SciFunctions.sum(done,1).dv < nthreads) {Thread.`yield`}; + Mat.nflops += 2L * a.nrows * a.ncols * b.ncols + out + } + + def newOrCheckGMat(nr:Int, nc:Int, outmat:Mat):GMat = { + if (outmat.asInstanceOf[AnyRef] == null || (outmat.nrows == 0 && outmat.ncols == 0)) { + GMat(nr, nc) + } else { + outmat match { + case omat:GMat => if (omat.nrows != nr || omat.ncols != nc) { + omat.recycle(nr, nc, 0) + } else { + omat + } + } + } + } +} + + + + + + + diff --git a/src/main/scala/BIDMat/GSMat.scala b/src/main/scala/BIDMat/GSMat.scala new file mode 100755 index 00000000..dffa3cb4 --- /dev/null +++ b/src/main/scala/BIDMat/GSMat.scala @@ -0,0 +1,120 @@ +package BIDMat +import jcuda._ +import jcuda.jcublas.JCublas +import jcuda.runtime.JCuda +import jcuda.runtime._ +import edu.berkeley.bid.CUMAT + +case class GSMat(nr:Int, nc:Int, val nnz0:Int, val ir:Pointer, val ic:Pointer, val data:Pointer, val realnnz:Int) extends Mat(nr, nc) { + + def getdata() = data; + + override def mytype = "GSMat" + + override def nnz = nnz0 + + override def contents:GMat = new GMat(nnz, 1, data, realnnz) + + override def toString:String = { + val nnz0 = scala.math.min(nnz,12) + val tmpMat = SMat(nnz0, nnz0, nnz0) + val tmpcols = new Array[Int](nnz0) + JCublas.cublasGetVector(nnz0, Sizeof.INT, ir, 1, Pointer.to(tmpMat.ir), 1) + JCublas.cublasGetVector(nnz0, Sizeof.FLOAT, data, 1, Pointer.to(tmpMat.data), 1) + JCublas.cublasGetVector(nnz0, Sizeof.INT, ic, 1, Pointer.to(tmpcols), 1) + SparseMat.compressInds(tmpcols, math.min(ncols, tmpcols(nnz0-1)+1), tmpMat.jc, nnz0) + if (Mat.ioneBased == 1) { + SparseMat.incInds(tmpMat.ir, tmpMat.ir) + } + tmpMat.toString + } + + def toSMat():SMat = { + val out = SMat(nrows, ncols, nnz) + val tmpcols = new Array[Int](nnz) + JCublas.cublasGetVector(nnz, Sizeof.INT, ir, 1, Pointer.to(out.ir), 1) + JCublas.cublasGetVector(nnz, Sizeof.FLOAT, data, 1, Pointer.to(out.data), 1) + JCublas.cublasGetVector(nnz, Sizeof.INT, ic, 1, Pointer.to(tmpcols), 1) + SparseMat.compressInds(tmpcols, ncols, out.jc, nnz) + if (Mat.ioneBased == 1) { + SparseMat.incInds(out.ir, out.ir) + } + out + } + + def free() = { + JCublas.cublasFree(data) + JCublas.cublasFree(ic) + JCublas.cublasFree(ir) + } + + override def recycle(nr:Int, nc:Int, nnz:Int):GSMat = { + if (realnnz >= nnz) { + new GSMat(nr, nc, nnz, ir, ic, data, realnnz) + } else { + free + GSMat(nr, nc, nnz) + } + } +} + +class GSPair (val omat:GMat, val mat:GSMat) extends Pair { + +} + +object GSMat { + + def apply(nr:Int, nc:Int, nnz0:Int):GSMat = { +// println("nr, nc, nnz = %d,%d,%d" format (nr,nc,nnz0)) + val out = new GSMat(nr, nc, nnz0, new Pointer(), new Pointer(), new Pointer(), nnz0) + JCublas.cublasAlloc(out.nnz, Sizeof.INT, out.ir) + JCublas.cublasAlloc(out.nnz, Sizeof.INT, out.ic) + JCublas.cublasAlloc(out.nnz, Sizeof.FLOAT, out.data) + out + } + + def apply(a:SMat):GSMat = { + val out = GSMat(a.nrows, a.ncols, a.nnz) + JCublas.cublasSetVector(a.nnz, Sizeof.FLOAT, Pointer.to(a.data), 1, out.data, 1) + if (Mat.ioneBased == 1) { + JCublas.cublasSetVector(a.nnz, Sizeof.INT, Pointer.to(SparseMat.decInds(a.ir)), 1, out.ir, 1) + } else { + JCublas.cublasSetVector(a.nnz, Sizeof.INT, Pointer.to(a.ir), 1, out.ir, 1) + } + JCublas.cublasSetVector(a.nnz, Sizeof.INT, Pointer.to(SparseMat.uncompressInds(a.jc, a.ir)), 1, out.ic, 1) + out + } + + def fromSMat(a:SMat, b:GSMat):GSMat = { + val out = b.recycle(a.nrows, a.ncols, a.nnz) + JCublas.cublasSetVector(a.nnz, Sizeof.FLOAT, Pointer.to(a.data), 1, out.data, 1) + if (Mat.ioneBased == 1) { + JCublas.cublasSetVector(a.nnz, Sizeof.INT, Pointer.to(SparseMat.decInds(a.ir)), 1, out.ir, 1) + } else { + JCublas.cublasSetVector(a.nnz, Sizeof.INT, Pointer.to(a.ir), 1, out.ir, 1) + } + JCublas.cublasSetVector(a.nnz, Sizeof.INT, Pointer.to(SparseMat.uncompressInds(a.jc, a.ir)), 1, out.ic, 1) + out + } + + def newOrCheckGSMat(mat:GSMat, oldmat:Mat):GSMat = { + if (oldmat.asInstanceOf[AnyRef] == null || (oldmat.nrows ==0 && oldmat.ncols == 0)) { + GSMat(mat.nrows, mat.ncols, mat.nnz) + } else { + oldmat match { + case omat:GSMat => if (oldmat.nrows == mat.nrows && oldmat.ncols == mat.ncols && oldmat.nnz == mat.nnz) { + omat + } else { + omat.recycle(mat.nrows, mat.ncols, mat.nnz) + } + } + } + } +} + + + + + + + diff --git a/src/main/scala/BIDMat/HMat.scala b/src/main/scala/BIDMat/HMat.scala new file mode 100755 index 00000000..1b604107 --- /dev/null +++ b/src/main/scala/BIDMat/HMat.scala @@ -0,0 +1,344 @@ +package BIDMat + +import java.io._ +import java.util.zip._ +import scala.util.matching.Regex +import Regex._ +import scala.collection.mutable._ +import scala.actors._ +import scala.actors.Actor._ +import MatFunctions._ +import MatHDF5._ +import edu.berkeley.bid.UTILS._ + +case class HMat(nr:Int, nc:Int, fileList:List[String], varname:String, blkinds:Array[Int], catdim:Int) extends Mat(nr, nc) { + + var fnameCache:String = null + + var fmatCache:Mat = null + + override def mytype = "HMat" + +// Implement slicing from a hard disk matrix + override def apply(a:IMat, b:IMat):Mat = { + var ilast:Int = 0 + def findindx(ind:Int):Int = { + while (ilast >= 0 && ind < blkinds(ilast)) ilast -= 1 + while (ilast < blkinds.length && ind >= blkinds(ilast)) ilast += 1 + if (ilast >= 0 && blkinds(ilast) <= ind && blkinds(ilast+1) > ind) { + ilast + } else { + -1 + } + } + + val locs = IMat(1,b.length) + var i = 0 + var iblk = 0 + var out:Mat = null + while (i <= b.length) { + if (i < b.length) locs(i) = findindx(b(i)) + if (i == b.length || locs(i) != locs(iblk)) { + if (fnameCache == null || fileList(locs(iblk)) != fnameCache) { + fmatCache = MatHDF5.hload(fileList(locs(iblk)), varname).asInstanceOf[Mat] + fnameCache = fileList(locs(iblk)) + } + val newmat = fmatCache(a, b(MatFunctions.irow(iblk->i))) + if (out.asInstanceOf[AnyRef] != null) { + out = out \ newmat + } else { + out = newmat + } + iblk = i + 1 + } + i += 1 + } + out + } +} + +object HMat { + + def readSomeInts(din:InputStream, a:Array[Int], buf:Array[Byte], n:Int) { + var nread = 0 + while (nread < 4*n) { + val readnow = din.read(buf, 0, math.min(buf.length, 4*n-nread)) + memcpybi(readnow, buf, 0, a, nread) + nread += readnow + } + } + + def readSomeFloats(din:InputStream, a:Array[Float], buf:Array[Byte], n:Int) { + var nread = 0 + while (nread < 4*n) { + val readnow = din.read(buf, 0, math.min(buf.length, 4*n-nread)) + memcpybf(readnow, buf, 0, a, nread) + nread += readnow + } + } + + def readSomeDoubles(din:InputStream, a:Array[Double], buf:Array[Byte], n:Int) { + var nread = 0 + while (nread < 8*n) { + val readnow = din.read(buf, 0, math.min(buf.length, 8*n-nread)) + memcpybd(readnow, buf, 0, a, nread) + nread += readnow + } + } + + def writeSomeInts(dout:OutputStream, a:Array[Int], buf:Array[Byte], n:Int) { + var nwritten = 0 + while (nwritten < 4*n) { + val todo = math.min(4*n-nwritten, buf.length) + memcpyib(todo, a, nwritten, buf, 0) + dout.write(buf, 0, todo) + nwritten += todo + } + } + + def writeSomeFloats(dout:OutputStream, a:Array[Float], buf:Array[Byte], n:Int) { + var nwritten = 0 + while (nwritten < 4*n) { + val todo = math.min(4*n-nwritten, buf.length) + memcpyfb(todo, a, nwritten, buf, 0) + dout.write(buf, 0, todo) + nwritten += todo + } + } + + def writeSomeDoubles(dout:OutputStream, a:Array[Double], buf:Array[Byte], n:Int) { + var nwritten = 0 + while (nwritten < 8*n) { + val todo = math.min(8*n-nwritten, buf.length) + memcpydb(todo, a, nwritten, buf, 0) + dout.write(buf, 0, todo) + nwritten += todo + } + } + + def getInputStream(fname:String, compressed:Boolean):InputStream = { + val fin = new FileInputStream(fname) + if (compressed) { + new GZIPInputStream(fin, 1024*1024) + } else { + new BufferedInputStream(fin, 1024*1024) + } + } + + def getOutputStream(fname:String, compressed:Boolean):OutputStream = { + import edu.berkeley.bid.UTILS._ + _getOutputStream(fname, compressed, Mat.compressionLevel) + } + + def loadFMat(fname:String, compressed:Boolean=true):FMat = { + val gin = getInputStream(fname, compressed) + val buff = new Array[Byte](1024*1024) + val hints = new Array[Int](4) + readSomeInts(gin, hints, buff, 4) + val ftype = hints(0) + val nrows = hints(1) + val ncols = hints(2) + val out = FMat(nrows, ncols) + readSomeFloats(gin, out.data, buff, ncols*nrows) + gin.close + out + } + + def loadIMat(fname:String, compressed:Boolean=true):IMat = { + val gin = getInputStream(fname, compressed) + val buff = new Array[Byte](1024*1024) + val hints = new Array[Int](4) + readSomeInts(gin, hints, buff, 4) + val ftype = hints(0) + val nrows = hints(1) + val ncols = hints(2) + val out = IMat(nrows, ncols) + readSomeInts(gin, out.data, buff, ncols*nrows) + gin.close + out + } + + def loadDMat(fname:String, compressed:Boolean=true):DMat = { + val gin = getInputStream(fname, compressed) + val buff = new Array[Byte](1024*1024) + val hints = new Array[Int](4) + readSomeInts(gin, hints, buff, 4) + val ftype = hints(0) + val nrows = hints(1) + val ncols = hints(2) + val out = DMat(nrows, ncols) + readSomeDoubles(gin, out.data, buff, ncols*nrows) + gin.close + out + } + + def saveFMat(fname:String, m:FMat, compressed:Boolean=true):Unit = { + val gout = getOutputStream(fname, compressed) + val hints = new Array[Int](4) + val tbuf = new Array[Byte](16) + hints(0) = 130 // 1=dense, 3=float + hints(1) = m.nrows + hints(2) = m.ncols + hints(3) = 0 + writeSomeInts(gout, hints, tbuf, 4) + val buff = new Array[Byte](math.min(1024*1024, 4*m.ncols*m.nrows)) + writeSomeFloats(gout, m.data, buff, m.nrows*m.ncols) + gout.close + } + + def saveIMat(fname:String, m:IMat, compressed:Boolean=true):Unit = { + val gout = getOutputStream(fname, compressed) + val hints = new Array[Int](4) + val tbuf = new Array[Byte](16) + hints(0) = 110 // 1=dense, 1=int + hints(1) = m.nrows + hints(2) = m.ncols + hints(3) = 0 + writeSomeInts(gout, hints, tbuf, 4) + val buff = new Array[Byte](math.min(1024*1024, 4*m.ncols*m.nrows)) + writeSomeInts(gout, m.data, buff, m.nrows*m.ncols) + gout.close + } + + def saveDMat(fname:String, m:DMat, compressed:Boolean=true):Unit = { + val gout = getOutputStream(fname, compressed) + val hints = new Array[Int](4) + val tbuf = new Array[Byte](16) + hints(0) = 140 // 1=dense, 4=double + hints(1) = m.nrows + hints(2) = m.ncols + hints(3) = 0 + writeSomeInts(gout, hints, tbuf, 4) + val buff = new Array[Byte](math.min(1024*1024, 4*m.ncols*m.nrows)) + writeSomeDoubles(gout, m.data, buff, m.nrows*m.ncols) + gout.close + } + + def loadSMat(fname:String, compressed:Boolean=true):SMat = { + val gin = getInputStream(fname, compressed) + val buff = new Array[Byte](1024*1024) + val hints = new Array[Int](4) + readSomeInts(gin, hints, buff, 4) + val ftype = hints(0) + val nrows = hints(1) + val ncols = hints(2) + val nnz = hints(3) + val out = SMat(nrows, ncols, nnz) + readSomeInts(gin, out.jc, buff, ncols+1) + readSomeInts(gin, out.ir, buff, nnz) + readSomeFloats(gin, out.data, buff, nnz) + MatHDF5.addOne(out.jc) + MatHDF5.addOne(out.ir) + gin.close + out + } + + def saveSMat(fname:String, m:SMat, compressed:Boolean=true):Unit = { + val gout = getOutputStream(fname, compressed) + val hints = new Array[Int](4) + val tbuf = new Array[Byte](16) + hints(0) = 231 // 2=sparse, 3=float, 1=int + hints(1) = m.nrows + hints(2) = m.ncols + hints(3) = m.nnz + writeSomeInts(gout, hints, tbuf, 4) + val buff = new Array[Byte](math.min(1024*1024, 4*math.max(m.ncols+1, m.nnz))) + try { + MatHDF5.subOne(m.jc) + MatHDF5.subOne(m.ir) + writeSomeInts(gout, m.jc, buff, m.ncols+1) + writeSomeInts(gout, m.ir, buff, m.nnz) + writeSomeFloats(gout, m.data, buff, m.nnz) + } catch { + case e:Exception => { + MatHDF5.addOne(m.jc) + MatHDF5.addOne(m.ir) + throw new RuntimeException("Exception in saveSMat "+e) + } + case _ => { + MatHDF5.addOne(m.jc) + MatHDF5.addOne(m.ir) + throw new RuntimeException("Problem in saveSMat") + } + } + MatHDF5.addOne(m.jc) + MatHDF5.addOne(m.ir) + gout.close + } + + def testLoad(fname:String, varname:String, n:Int) = { + val a = new Array[SMat](n) + var ndone = izeros(n,1) + for (i <- 0 until n) { + actor { + a(i) = loadSMat(("/disk%02d/" format i)+fname) + ndone(i) = 1 + } + } + while (SciFunctions.sum(ndone).v < n) {Thread.sleep(10)} + a + } + + def apply(dirname:String, filepat:String, varname:String, catd:Int) { + var files:ListBuffer[String] = new ListBuffer[String] + val dir:File = new File(dirname) + val slen = dir.getName.length + 1 + + def searchDir(dir:File) { + for (f <- dir.listFiles) { + if (f.isDirectory) { + searchDir(f) + } else { + if (f.getName.substring(slen).matches(filepat)) { + files.append(f.getName) + } + } + } + } + + searchDir(dir) + val blkinds = new Array[Int](files.length+1) + var i = 0 + var nrows = -1 + var ncols = -1 + files.foreach((fn:String) => { + val (nr, nc) = MatHDF5.readMatDims(fn, varname) + if (catd == 2) { + if (nrows >= 0) { + if (nr != nrows) { + throw new RuntimeException("incorrect number of rows in file "+fn) + } + } else { + nrows = nr.asInstanceOf[Int] + } + blkinds(i+1) = blkinds(i) + nc.asInstanceOf[Int] + i += 1 + } else if (catd == 1) { + if (ncols >= 0) { + if (nc != 1) { + throw new RuntimeException("incorrect number of cols in file "+fn) + } + } else { + ncols = 1 + } + blkinds(i+1) = blkinds(i) + nr.asInstanceOf[Int] + i += 1 + } + }) + if (catd == 2) { + HMat(nrows, blkinds(files.length), files.toList, varname, blkinds, 2) + } else { + if (catd == 1) { + HMat(blkinds(files.length), ncols, files.toList, varname, blkinds, 1) + } else { + throw new RuntimeException("cat dimension must be 1 or 2") + } + } + } +} + + + + + + diff --git a/src/main/scala/BIDMat/IMat.scala b/src/main/scala/BIDMat/IMat.scala new file mode 100755 index 00000000..206d3eaa --- /dev/null +++ b/src/main/scala/BIDMat/IMat.scala @@ -0,0 +1,433 @@ +package BIDMat + +import java.util.Arrays + +case class IMat(nr:Int, nc:Int, data0:Array[Int]) extends DenseMat[Int](nr, nc, data0) { + + def size() = length; + + override def t:IMat = IMat(gt(null)) + + override def dv:Double = + if (nrows > 1 || ncols > 1) { + throw new RuntimeException("Matrix should be 1x1 to extract value") + } else { + data(0) + } + + override def mytype = "IMat" + + override def set(v:Float):IMat = { + Arrays.fill(data,0,length,v.asInstanceOf[Int]) + this + } + + def horzcat(b: IMat) = IMat(ghorzcat(b)) + + def vertcat(b: IMat) = IMat(gvertcat(b)) + + def find3:(IMat, IMat, IMat) = { val (ii, jj, vv) = gfind3 ; (ii, jj, IMat(vv)) } + + override def apply(a:IMat):IMat = IMat(gapply(a)) + + override def apply(a:IMat, b:IMat):IMat = IMat(gapply(a, b)) + + override def apply(a:IMat, b:Int):IMat = IMat(gapply(a, b)) + + override def apply(a:Int, b:IMat):IMat = IMat(gapply(a, b)) + + def update(iv:IMat, jv:IMat, b:IMat):IMat = IMat(_update(iv, jv, b)) + + def update(iv:IMat, j:Int, b:IMat):IMat = IMat(_update(iv, IMat.ielem(j), b)) + + def update(i:Int, jv:IMat, b:IMat):IMat = IMat(_update(IMat.ielem(i), jv, b)) + + def iiMatOp(b: Mat, f:(Int, Int) => Int, old:Mat):IMat = + b match { + case bb:IMat => IMat(ggMatOp(bb, f, old)) + case _ => throw new RuntimeException("unsupported operation "+f+" on "+this+" and "+b) + } + + def iiMatOpv(b: Mat, f:(Array[Int],Int,Int,Array[Int],Int,Int,Array[Int],Int,Int,Int) => Int, old:Mat):IMat = + b match { + case bb:IMat => IMat(ggMatOpv(bb, f, old)) + case _ => throw new RuntimeException("unsupported operation "+f+" on "+this+" and "+b) + } + + def iiMatOpScalar(b: Int, f:(Int, Int) => Int, old:Mat) = IMat(ggMatOpScalar(b, f, old)) + + def iiMatOpScalarv(b: Int, f:(Array[Int],Int,Int,Array[Int],Int,Int,Array[Int],Int,Int,Int) => Int, old:Mat) = IMat(ggMatOpScalarv(b, f, old)) + + def iiReduceOp(n:Int, f1:(Int) => Int, f2:(Int, Int) => Int, old:Mat) = IMat(ggReduceOp(n, f1, f2, old)) + + def iiReduceOpv(n:Int, f:(Array[Int],Int,Int,Array[Int],Int,Int,Array[Int],Int,Int,Int) => Int, old:Mat) = IMat(ggReduceOpv(n, f, old)) + + def iiReduceAll(n:Int, f1:(Int) => Int, f2:(Int, Int) => Int, old:Mat) = IMat(ggReduceAll(n, f1, f2, old)) + + def iiReduceAllv(n:Int, f:(Array[Int],Int,Int,Array[Int],Int,Int,Array[Int],Int,Int,Int) => Int, old:Mat) = IMat(ggReduceAllv(n, f, old)) + + override def printOne(i:Int):String = { + val v = data(i) + "%d" format v + } + + override def copyTo(a:Mat) = { + a match { + case out:IMat => System.arraycopy(data, 0, out.data, 0, length) + } + a + } + + override def copy = { + val out = IMat(nrows, ncols) + System.arraycopy(data, 0, out.data, 0, length) + out + } + + override def zeros(nr:Int, nc:Int) = { + IMat(nr, nc) + } + + override def ones(nr:Int, nc:Int) = { + val out = IMat(nr, nc) + var i = 0 + while (i < out.length) { + out(i) = 1 + i += 1 + } + out + } + + override def clearUpper(off:Int) = setUpper(0, off) + override def clearUpper = setUpper(0, 0) + + override def clearLower(off:Int) = setLower(0, off) + override def clearLower = setLower(0, 0) + + + def iMult(a0:Mat, omat:Mat):IMat = + a0 match { + case a:IMat => + if (ncols == a.nrows) { + val out = IMat.newOrCheckIMat(nrows, a.ncols, omat) + out.clear + Mat.nflops += 2L * length * a.ncols + for (i <- 0 until a.ncols) + for (j <- 0 until a.nrows) { + var k = 0 + val dval = a.data(j + i*ncols) + while (k < nrows) { + out.data(k+i*nrows) += data(k+j*nrows)*dval + k += 1 + } + } + out + } else if (ncols == 1 && nrows == 1) { + val out = IMat(a.nrows, a.ncols) + Mat.nflops += a.length + var i = 0 + val dvar = data(0) + while (i < a.length) { + out.data(i) = dvar * a.data(i) + i += 1 + } + out + } else if (a.ncols == 1 && a.nrows == 1) { + val out = IMat(nrows, ncols) + Mat.nflops += length + var i = 0 + val dvar = a.data(0) + while (i < length) { + out.data(i) = dvar * data(i) + i += 1 + } + out + } else throw new RuntimeException("dimensions mismatch") + case _ => throw new RuntimeException("unsupported arg to * "+a0) + } + + def dot(a:IMat):Double = super.dot(a) + + override def dot(a:Mat):Double = super.dot(a.asInstanceOf[IMat]) + + def * (b : IMat) = iMult(b, null) + def + (b : IMat) = iiMatOpv(b, IMat.vecAdd _, null) + def - (b : IMat) = iiMatOpv(b, IMat.vecSub _, null) + def *@ (b : IMat) = iiMatOpv(b, IMat.vecMul _, null) + def /@ (b : IMat) = iiMatOpv(b, IMat.iVecDiv _, null) + + override def + (b : Int) = iiMatOpScalarv(b, IMat.vecAdd _, null) + override def - (b : Int) = iiMatOpScalarv(b, IMat.vecSub _, null) + override def *@ (b : Int) = iiMatOpScalarv(b, IMat.vecMul _, null) + override def /@ (b : Int) = iiMatOpScalarv(b, IMat.iVecDiv _, null) + + def > (b : IMat) = iiMatOp(b, (x:Int, y:Int) => if (x > y) 1 else 0, null) + def < (b : IMat) = iiMatOp(b, (x:Int, y:Int) => if (x < y) 1 else 0, null) + def == (b : IMat) = iiMatOp(b, (x:Int, y:Int) => if (x == y) 1 else 0, null) + def === (b : IMat) = iiMatOp(b, (x:Int, y:Int) => if (x == y) 1 else 0, null) + def >= (b : IMat) = iiMatOp(b, (x:Int, y:Int) => if (x >= y) 1 else 0, null) + def <= (b : IMat) = iiMatOp(b, (x:Int, y:Int) => if (x <= y) 1 else 0, null) + def != (b : IMat) = iiMatOp(b, (x:Int, y:Int) => if (x != y) 1 else 0, null) + + override def > (b : Int) = iiMatOpScalar(b, (x:Int, y:Int) => if (x > y) 1 else 0, null) + override def < (b : Int) = iiMatOpScalar(b, (x:Int, y:Int) => if (x < y) 1 else 0, null) + override def == (b : Int) = iiMatOpScalar(b, (x:Int, y:Int) => if (x == y) 1 else 0, null) + override def === (b : Int) = iiMatOpScalar(b, (x:Int, y:Int) => if (x == y) 1 else 0, null) + override def >= (b : Int) = iiMatOpScalar(b, (x:Int, y:Int) => if (x >= y) 1 else 0, null) + override def <= (b : Int) = iiMatOpScalar(b, (x:Int, y:Int) => if (x <= y) 1 else 0, null) + override def != (b : Int) = iiMatOpScalar(b, (x:Int, y:Int) => if (x != y) 1 else 0, null) + + def \ (b: IMat) = horzcat(b) + def \ (b: Int) = horzcat(IMat.ielem(b)) + def on (b: IMat) = vertcat(b) + def on (b: Int) = vertcat(IMat.ielem(b)) + + /* + * Specialize to FMats to help the type system. + */ + def + (b : FMat):FMat = FMat(this) + b + def - (b : FMat):FMat = FMat(this) - b + def * (b : FMat):FMat = FMat(this) * b + def / (b : FMat):FMat = FMat(this) / b + def \\ (b : FMat):FMat = FMat(this) \\ b + def *@ (b : FMat):FMat = FMat(this) *@ b + def /@ (b : FMat):FMat = FMat(this) /@ b + def \ (b : FMat):FMat = FMat(this) \ b + def on (b : FMat):FMat = FMat(this) on b + + def > (b : FMat):FMat = FMat(this) > b + def < (b : FMat):FMat = FMat(this) < b + def >= (b : FMat):FMat = FMat(this) >= b + def <= (b : FMat):FMat = FMat(this) <= b + def == (b : FMat):FMat = FMat(this) == b + def === (b : FMat):FMat = FMat(this) === b + def != (b : FMat):FMat = FMat(this) != b + + /* + * Specialize to DMats to help the type system. + */ + def + (b : DMat):DMat = DMat(this) + b + def - (b : DMat):DMat = DMat(this) - b + def * (b : DMat):DMat = DMat(this) * b + def / (b : DMat):DMat = DMat(this) / b + def \\ (b : DMat):DMat = DMat(this) \\ b + def *@ (b : DMat):DMat = DMat(this) *@ b + def /@ (b : DMat):DMat = DMat(this) /@ b + def \ (b : DMat):DMat = DMat(this) \ b + def on (b : DMat):DMat = DMat(this) on b + + def > (b : DMat):DMat = DMat(this) > b + def < (b : DMat):DMat = DMat(this) < b + def >= (b : DMat):DMat = DMat(this) >= b + def <= (b : DMat):DMat = DMat(this) <= b + def == (b : DMat):DMat = DMat(this) == b + def === (b : DMat):DMat = DMat(this) === b + def != (b : DMat):DMat = DMat(this) != b + /* + * Specialize to CMats to help the type system. + */ + def + (b : CMat):CMat = CMat(this) + b + def - (b : CMat):CMat = CMat(this) - b + def * (b : CMat):CMat = CMat(this) * b + def / (b : CMat):CMat = CMat(this) / b + def \\ (b : CMat):CMat = CMat(this) \\ b + def *@ (b : CMat):CMat = CMat(this) *@ b + def /@ (b : CMat):CMat = CMat(this) /@ b + def \ (b : CMat):CMat = CMat(this) \ b + def on (b : CMat):CMat = CMat(this) on b + /* + * Operators whose second arg is generic. + */ + import Operator._ + override def + (b : Mat):Mat = applyMat(this, b, null, Mop_Plus) + override def - (b : Mat):Mat = applyMat(this, b, null, Mop_Minus) + override def * (b : Mat):Mat = applyMat(this, b, null, Mop_Times) + override def / (b : Mat):Mat = applyMat(this, b, null, Mop_Div) + override def \\ (b : Mat):Mat = applyMat(this, b, null, Mop_RSolve) + override def *@ (b : Mat):Mat = applyMat(this, b, null, Mop_ETimes) + override def /@ (b : Mat):Mat = applyMat(this, b, null, Mop_EDiv) + override def \ (b : Mat):Mat = applyMat(this, b, null, Mop_HCat) + override def on (b : Mat):Mat = applyMat(this, b, null, Mop_VCat) + + override def > (b : Mat):Mat = applyMat(this, b, null, Mop_GT) + override def < (b : Mat):Mat = applyMat(this, b, null, Mop_LT) + override def >= (b : Mat):Mat = applyMat(this, b, null, Mop_GE) + override def <= (b : Mat):Mat = applyMat(this, b, null, Mop_LE) + override def == (b : Mat):Mat = applyMat(this, b, null, Mop_EQ) + override def === (b : Mat):Mat = applyMat(this, b, null, Mop_EQ) + override def != (b : Mat):Mat = applyMat(this, b, null, Mop_NE) + + def ~ (b : IMat):IPair = new IPair(this, b) + + override def ~ (b: Mat):Pair = + b match { + case db:IMat => new IPair(this, db) + case _ => throw new RuntimeException("mismatched types for operator ~") + } + + override def clear = { + Arrays.fill(this.data,0,length,0) + this + } + + override def recycle(nr:Int, nc:Int, nnz:Int):IMat = { + if (nrows == nr && nc == ncols) { + this + } else if (data.size >= nr*nc) { + new IMat(nr, nc, data) + } else { + IMat(nr, nc) + } + } +} + +class IPair(val omat:Mat, val mat:IMat) extends Pair { + + override def t:IMat = IMat(mat.gt(omat)) + + def * (b : IMat) = mat.iMult(b, omat) + def * (b : SMat) = mat.iMult(b, omat) +// def xT (b : SMat) = mat.multT(b, omat) + def + (b : IMat) = mat.iiMatOpv(b, IMat.vecAdd _, omat) + def - (b : IMat) = mat.iiMatOpv(b, IMat.vecSub _, omat) + def *@ (b : IMat) = mat.iiMatOpv(b, IMat.vecMul _, omat) +// def /@ (b : IMat) = mat.iiMatOpv(b, IMat.fVecDiv _, omat) +// def ^ (b : IMat) = mat.iiMatOp(b, (x:Float, y:Float) => math.pow(x,y).toFloat, omat) + + def > (b : IMat) = mat.iiMatOp(b, (x:Int, y:Int) => if (x > y) 1 else 0, omat) + def < (b : IMat) = mat.iiMatOp(b, (x:Int, y:Int) => if (x < y) 1 else 0, omat) + def == (b : IMat) = mat.iiMatOp(b, (x:Int, y:Int) => if (x == y) 1 else 0, omat) + def === (b : IMat) = mat.iiMatOp(b, (x:Int, y:Int) => if (x == y) 1 else 0, omat) + def >= (b : IMat) = mat.iiMatOp(b, (x:Int, y:Int) => if (x >= y) 1 else 0, omat) + def <= (b : IMat) = mat.iiMatOp(b, (x:Int, y:Int) => if (x <= y) 1 else 0, omat) + def != (b : IMat) = mat.iiMatOp(b, (x:Int, y:Int) => if (x != y) 1 else 0, omat) + + + override def * (b : Int) = mat.iMult(IMat.ielem(b), omat) + override def + (b : Int) = mat.iiMatOpScalarv(b, IMat.vecAdd _, omat) + override def - (b : Int) = mat.iiMatOpScalarv(b, IMat.vecSub _, omat) + override def *@ (b : Int) = mat.iiMatOpScalarv(b, IMat.vecMul _, omat) +// override def /@ (b : Int) = mat.iiMatOpScalarv(b, IMat.fVecDiv _, omat) +// override def ^ (b : Int) = mat.iiMatOpScalar(b, (x:Float, y:Float) => math.pow(x,y).toFloat, omat) + + override def > (b : Int) = mat.iiMatOpScalar(b, (x:Int, y:Int) => if (x > y) 1 else 0, omat) + override def < (b : Int) = mat.iiMatOpScalar(b, (x:Int, y:Int) => if (x < y) 1 else 0, omat) + override def == (b : Int) = mat.iiMatOpScalar(b, (x:Int, y:Int) => if (x == y) 1 else 0, omat) + override def >= (b : Int) = mat.iiMatOpScalar(b, (x:Int, y:Int) => if (x >= y) 1 else 0, omat) + override def <= (b : Int) = mat.iiMatOpScalar(b, (x:Int, y:Int) => if (x <= y) 1 else 0, omat) + override def != (b : Int) = mat.iiMatOpScalar(b, (x:Int, y:Int) => if (x != y) 1 else 0, omat) + + import Operator._ + override def + (b : Mat):Mat = applyMat(mat, b, omat, Mop_Plus) + override def - (b : Mat):Mat = applyMat(mat, b, omat, Mop_Minus) + override def * (b : Mat):Mat = applyMat(mat, b, omat, Mop_Times) + override def / (b : Mat):Mat = applyMat(mat, b, omat, Mop_Div) + override def \\ (b : Mat):Mat = applyMat(mat, b, omat, Mop_RSolve) + override def *@ (b : Mat):Mat = applyMat(mat, b, omat, Mop_ETimes) + override def /@ (b : Mat):Mat = applyMat(mat, b, omat, Mop_EDiv) + override def \ (b : Mat):Mat = applyMat(mat, b, omat, Mop_HCat) + override def on (b : Mat):Mat = applyMat(mat, b, omat, Mop_VCat) + + override def > (b : Mat):Mat = applyMat(mat, b, omat, Mop_GT) + override def < (b : Mat):Mat = applyMat(mat, b, omat, Mop_LT) + override def >= (b : Mat):Mat = applyMat(mat, b, omat, Mop_GE) + override def <= (b : Mat):Mat = applyMat(mat, b, omat, Mop_LE) + override def == (b : Mat):Mat = applyMat(mat, b, omat, Mop_EQ) + override def === (b : Mat):Mat = applyMat(mat, b, omat, Mop_EQ) + override def != (b : Mat):Mat = applyMat(mat, b, omat, Mop_NE) +} + + +object IMat { + + def iVecDiv(a:Array[Int], a0:Int, ainc:Int, b:Array[Int], b0:Int, binc:Int, c:Array[Int], c0:Int, cinc:Int, n:Int):Int = { + var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n + while (ci < cend) { + c(ci) = a(ai) / b(bi); ai += ainc; bi += binc; ci += cinc + } + 0 + } + + def apply(nr:Int, nc:Int) = new IMat(nr, nc, new Array[Int](nr*nc)) + + def apply(a:DenseMat[Int]):IMat = new IMat(a.nrows, a.ncols, a.data) + + def apply(x:Mat):IMat = { + var out:IMat = null + x match { + case dd:DMat => {out = IMat(x.nrows, x.ncols) ; Mat.copyToIntArray(dd.data, 0, out.data, 0, dd.length)} + case ff:FMat => {out = IMat(x.nrows, x.ncols); Mat.copyToIntArray(ff.data, 0, out.data, 0, ff.length)} + case ii:IMat => {out = IMat(x.nrows, x.ncols); System.arraycopy(ii.data, 0, out.data, 0, ii.length)} + case gg:GIMat => out = gg.toIMat + case _ => throw new RuntimeException("Unsupported source type") + } + out + } + + def vecAdd(a:Array[Int], a0:Int, ainc:Int, b:Array[Int], b0:Int, binc:Int, c:Array[Int], c0:Int, cinc:Int, n:Int):Int = { + var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n + while (ci < cend) { + c(ci) = a(ai) + b(bi); ai += ainc; bi += binc; ci += cinc + } + 0 + } + + def vecSub(a:Array[Int], a0:Int, ainc:Int, b:Array[Int], b0:Int, binc:Int, c:Array[Int], c0:Int, cinc:Int, n:Int):Int = { + var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n + while (ci < cend) { + c(ci) = a(ai) - b(bi); ai += ainc; bi += binc; ci += cinc + } + 0 + } + + def vecMul(a:Array[Int], a0:Int, ainc:Int, b:Array[Int], b0:Int, binc:Int, c:Array[Int], c0:Int, cinc:Int, n:Int):Int = { + var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n + while (ci < cend) { + c(ci) = a(ai) * b(bi); ai += ainc; bi += binc; ci += cinc + } + 0 + } + + def vecMax(a:Array[Int], a0:Int, ainc:Int, b:Array[Int], b0:Int, binc:Int, c:Array[Int], c0:Int, cinc:Int, n:Int):Int = { + var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n + while (ci < cend) { + c(ci) = math.max(a(ai), b(bi)); ai += ainc; bi += binc; ci += cinc + } + 0 + } + + def vecMin(a:Array[Int], a0:Int, ainc:Int, b:Array[Int], b0:Int, binc:Int, c:Array[Int], c0:Int, cinc:Int, n:Int):Int = { + var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n + while (ci < cend) { + c(ci) = math.min(a(ai), b(bi)); ai += ainc; bi += binc; ci += cinc + } + 0 + } + + + def ielem(x:Int) = { + val out = IMat(1,1) + out.data(0) = x + out + } + + def newOrCheckIMat(nr:Int, nc:Int, omat:Mat):IMat = { + if (omat.asInstanceOf[AnyRef] == null || (omat.nrows == 0 && omat.ncols == 0)) { + IMat(nr, nc) + } else { + omat match { + case outmat:IMat => if (outmat.nrows != nr || outmat.ncols != nc) { + outmat.recycle(nr, nc, 0) + } else { + outmat + } + } + } + } +} + + + + + + diff --git a/src/main/scala/BIDMat/Mat.scala b/src/main/scala/BIDMat/Mat.scala new file mode 100755 index 00000000..a4f29f3a --- /dev/null +++ b/src/main/scala/BIDMat/Mat.scala @@ -0,0 +1,445 @@ +package BIDMat + +class Mat(nr:Int, nc:Int) { + val nrows = nr + val ncols = nc + + def length = nr*nc + + def notImplemented0(s:String):Mat = { + throw new RuntimeException("operator "+s+" not implemented for "+this.mytype) + } + + def notImplemented1(s:String,that:Mat):Mat = { + throw new RuntimeException("operator "+s+" not implemented for "+this.mytype+" and "+that.mytype) + } + + def t = notImplemented0("t") + def dv:Double = throw new RuntimeException("operator dv not implemented for "+this.mytype) + + def mytype = "Mat" + def copyTo(a:Mat) = notImplemented0("copy"); + def copy = notImplemented0("copy"); + def set(v:Float) = notImplemented0("set") + def zeros(nr:Int, nc:Int) = notImplemented0("zeros"); + def ones(nr:Int, nc:Int) = notImplemented0("ones"); + def clearUpper(i:Int) = notImplemented0("clearUpper"); + def clearLower(i:Int) = notImplemented0("clearLower"); + def clearUpper = notImplemented0("clearUpper"); + def clearLower = notImplemented0("clearLower"); + + def nnz:Int = {notImplemented0("nnz"); 0} + def clear = notImplemented0("clear"); + def zeros(nr:Int, nc:Int, nnz:Int):Mat = zeros(nr, nc) + def recycle(nr:Int, nc:Int, nnz:Int):Mat = notImplemented0("recycle"); + def contents:Mat = notImplemented0("recycle"); + + def apply(a:IMat):Mat = notImplemented0("linear array access"); + def apply(a:IMat, b:IMat):Mat = notImplemented0("block array access"); + def apply(a:IMat, b:Int):Mat = notImplemented0("block array access"); + def apply(a:Int, b:IMat):Mat = notImplemented0("block array access"); + + def update(a:IMat, b:Mat) = notImplemented0("linear update"); + def update(a:IMat, b:IMat, m:Mat) = notImplemented0("block update"); + def update(a:IMat, b:Int, m:Mat) = notImplemented0("block update"); + def update(a:Int, b:IMat, m:Mat) = notImplemented0("block update"); + + def + (b : Mat):Mat = notImplemented1("+", b) + def - (b : Mat):Mat = notImplemented1("-", b) + def * (b : Mat):Mat = notImplemented1("*", b) + def xT (b : Mat):Mat = notImplemented1("*", b) + def Tx (b : Mat):Mat = notImplemented1("*", b) + def / (b : Mat):Mat = notImplemented1("/", b) + def *@ (b : Mat):Mat = notImplemented1("*@", b) + def /@ (b : Mat):Mat = notImplemented1("/@", b) + def \\ (b : Mat):Mat = notImplemented1("\\\\", b) + def ^ (b : Mat):Mat = notImplemented1("^", b) + + def > (b : Mat):Mat = notImplemented1(">", b) + def < (b : Mat):Mat = notImplemented1("<", b) + def >= (b : Mat):Mat = notImplemented1(">=", b) + def <= (b : Mat):Mat = notImplemented1("<=", b) + def == (b : Mat):Mat = notImplemented1("==", b) + def === (b : Mat):Mat = notImplemented1("===", b) + def != (b : Mat):Mat = notImplemented1("!=", b) + + def <-- (b : Mat):Mat = b.copyTo(this) + + def + (b : Int):Mat = notImplemented0("+") + def - (b : Int):Mat = notImplemented0("-") + def * (b : Int):Mat = notImplemented0("*") + def / (b : Int):Mat = notImplemented0("/") + def *@ (b : Int):Mat = notImplemented0("*@") + def /@ (b : Int):Mat = notImplemented0("/@") + def \\ (b : Int):Mat = notImplemented0("\\\\") + def ^ (b : Int):Mat = notImplemented0("^") + + def > (b : Int):Mat = notImplemented0(">") + def < (b : Int):Mat = notImplemented0("<") + def >= (b : Int):Mat = notImplemented0(">=") + def <= (b : Int):Mat = notImplemented0("<=") + def == (b : Int):Mat = notImplemented0("==") + def === (b : Int):Mat = notImplemented0("===") + def != (b : Int):Mat = notImplemented0("!=") + + def + (b : Float):Mat = notImplemented0("+") + def - (b : Float):Mat = notImplemented0("-") + def * (b : Float):Mat = notImplemented0("*") + def / (b : Float):Mat = notImplemented0("/") + def *@ (b : Float):Mat = notImplemented0("*@") + def /@ (b : Float):Mat = notImplemented0("/@") + def \\ (b : Float):Mat = notImplemented0("\\\\") + def ^ (b : Float):Mat = notImplemented0("^") + + def > (b : Float):Mat = notImplemented0(">") + def < (b : Float):Mat = notImplemented0("<") + def >= (b : Float):Mat = notImplemented0(">=") + def <= (b : Float):Mat = notImplemented0("<=") + def == (b : Float):Mat = notImplemented0("==") + def === (b : Float):Mat = notImplemented0("===") + def != (b : Float):Mat = notImplemented0("!=") + + def + (b : Double):Mat = notImplemented0("+") + def - (b : Double):Mat = notImplemented0("-") + def * (b : Double):Mat = notImplemented0("*") + def / (b : Double):Mat = notImplemented0("/") + def *@ (b : Double):Mat = notImplemented0("*@") + def /@ (b : Double):Mat = notImplemented0("/@") + def \\ (b : Double):Mat = notImplemented0("\\\\") + def ^ (b : Double):Mat = notImplemented0("^") + + def > (b : Double):Mat = notImplemented0(">") + def < (b : Double):Mat = notImplemented0("<") + def >= (b : Double):Mat = notImplemented0(">=") + def <= (b : Double):Mat = notImplemented0("<=") + def == (b : Double):Mat = notImplemented0("==") + def === (b : Double):Mat = notImplemented0("===") + def != (b : Double):Mat = notImplemented0("!=") + + def \ (b : Mat):Mat = notImplemented1("\\", b) + def on (b : Mat):Mat = notImplemented1("on", b) + def ~ (b : Mat):Pair = b match { + case bb:FMat => new FPair(this, bb) + case bb:DMat => new DPair(this, bb) + case bb:IMat => new IPair(this, bb) + case bb:SMat => new SPair(this, bb) +// case bb:SDMat => new SDPair(this, bb) + case bb:CMat => new CPair(this, bb) + case bb:GMat => new GPair(this, bb) + } + + def dot (b : Mat):Double = {notImplemented1("dot", b); 0} + +} + +abstract class Pair { + + def notImplemented0(s:String):Mat = { + throw new RuntimeException("operator "+s+" not implemented for "+this) + } + def notImplemented1(s:String,that:Mat):Mat = { + throw new RuntimeException("operator "+s+" not implemented for "+this+" and "+that) + } + + def t = notImplemented0("t") + + def + (b : Mat):Mat = notImplemented1("+", b) + def - (b : Mat):Mat = notImplemented1("-", b) + def * (b : Mat):Mat = notImplemented1("*", b) + def xT (b : Mat):Mat = notImplemented1("xT", b) + def Tx (b : Mat):Mat = notImplemented1("Tx", b) + def / (b : Mat):Mat = notImplemented1("/", b) + def *@ (b : Mat):Mat = notImplemented1("*@", b) + def /@ (b : Mat):Mat = notImplemented1("/@", b) + def \\ (b : Mat):Mat = notImplemented1("\\\\", b) + def ^ (b : Mat):Mat = notImplemented1("^", b) + + def > (b : Mat):Mat = notImplemented1(">", b) + def < (b : Mat):Mat = notImplemented1("<", b) + def >= (b : Mat):Mat = notImplemented1(">=", b) + def <= (b : Mat):Mat = notImplemented1("<=", b) + def == (b : Mat):Mat = notImplemented1("==", b) + def === (b : Mat):Mat = notImplemented1("===", b) + def != (b : Mat):Mat = notImplemented1("!=", b) + + def \ (b : Mat):Mat = notImplemented1("\\", b) + def on (b : Mat):Mat = notImplemented1("on", b) + + def + (b : Int):Mat = notImplemented0("+") + def - (b : Int):Mat = notImplemented0("-") + def * (b : Int):Mat = notImplemented0("*") + def / (b : Int):Mat = notImplemented0("/") + def *@ (b : Int):Mat = notImplemented0("*@") + def /@ (b : Int):Mat = notImplemented0("/@") + def \\ (b : Int):Mat = notImplemented0("\\\\") + def ^ (b : Int):Mat = notImplemented0("^") + + def > (b : Int):Mat = notImplemented0(">") + def < (b : Int):Mat = notImplemented0("<") + def >= (b : Int):Mat = notImplemented0(">=") + def <= (b : Int):Mat = notImplemented0("<=") + def == (b : Int):Mat = notImplemented0("==") + def === (b : Int):Mat = notImplemented0("===") + def != (b : Int):Mat = notImplemented0("!=") + + def + (b : Float):Mat = notImplemented0("+") + def - (b : Float):Mat = notImplemented0("-") + def * (b : Float):Mat = notImplemented0("*") + def / (b : Float):Mat = notImplemented0("/") + def *@ (b : Float):Mat = notImplemented0("*@") + def /@ (b : Float):Mat = notImplemented0("/@") + def \\ (b : Float):Mat = notImplemented0("\\\\") + def ^ (b : Float):Mat = notImplemented0("^") + + def > (b : Float):Mat = notImplemented0(">") + def < (b : Float):Mat = notImplemented0("<") + def >= (b : Float):Mat = notImplemented0(">=") + def <= (b : Float):Mat = notImplemented0("<=") + def == (b : Float):Mat = notImplemented0("==") + def === (b : Float):Mat = notImplemented0("===") + def != (b : Float):Mat = notImplemented0("!=") + + def + (b : Double):Mat = notImplemented0("+") + def - (b : Double):Mat = notImplemented0("-") + def * (b : Double):Mat = notImplemented0("*") + def / (b : Double):Mat = notImplemented0("/") + def *@ (b : Double):Mat = notImplemented0("*@") + def /@ (b : Double):Mat = notImplemented0("/@") + def \\ (b : Double):Mat = notImplemented0("\\\\") + def ^ (b : Double):Mat = notImplemented0("^") + + def > (b : Double):Mat = notImplemented0(">") + def < (b : Double):Mat = notImplemented0("<") + def >= (b : Double):Mat = notImplemented0(">=") + def <= (b : Double):Mat = notImplemented0("<=") + def == (b : Double):Mat = notImplemented0("==") + def === (b : Double):Mat = notImplemented0("===") + def != (b : Double):Mat = notImplemented0("!=") +} + +object Mat { + import Ordered._ + import scala.tools.jline.TerminalFactory + + var compressType = 1 // 0=none, 1=zlib, 2=szip + + var compressionLevel = 3 // for zlib + + var chunkSize = 1024*1024 // for either method + + var szipBlock = 32 // szip block size + + var numThreads = 8 + + var noMKL:Boolean = false + + var nflops = 0L + + var oneBased = 0 + + var ioneBased = 1 + + var hasCUDA = 0 + + def checkCUDA:Unit = { + if (hasCUDA == 0) { + try { + val os = System.getProperty("os.name") + if (os.equals("Linux")) { + System.loadLibrary("cudart") + } else { + try { + System.loadLibrary("cudart64_50_35") + } catch { + case _ => try { + System.loadLibrary("cudart64_42_9") + } + } + } + } catch { + case _ => { + println("Cant find CUDA SDK") + hasCUDA = -1 + } + } + } + if (hasCUDA >= 0) { + try { + var cudanum = new Array[Int](1) + jcuda.runtime.JCuda.cudaGetDeviceCount(cudanum) + hasCUDA = cudanum(0) + printf("%d CUDA device%s found", hasCUDA, if (hasCUDA == 1) "" else "s") + if (hasCUDA > 0) { + jcuda.runtime.JCuda.cudaRuntimeGetVersion(cudanum) + println(", CUDA version %d.%d" format (cudanum(0)/1000, (cudanum(0)%100) / 10)) + } else { + println("") + } + } catch { + case e:NoClassDefFoundError => println("Couldn't load the JCUDA driver") + case e:Exception => println("Exception while initializing JCUDA driver") + case _ => println("Something went wrong while loading JCUDA driver") + } + } + } + + var terminal = TerminalFactory.create + + def terminalWidth = math.max(terminal.getWidth,80) + + def copyToIntArray[@specialized(Double, Float) T](data:Array[T], i0:Int, idata:Array[Int], d0:Int, n:Int) + (implicit numeric : Numeric[T]) = { + var i = 0 + while (i < n) { + idata(i+d0) = numeric.toInt(data(i+i0)); + i += 1 + } + } + + def copyToDoubleArray[@specialized(Int, Float) T](data:Array[T], i0:Int, ddata:Array[Double], d0:Int, n:Int) + (implicit numeric : Numeric[T]) = { + var i = 0 + while (i < n) { + ddata(i+d0) = numeric.toDouble(data(i+i0)); + i += 1 + } + } + + def copyToFloatArray[@specialized(Int, Double) T](data:Array[T], i0:Int, fdata:Array[Float], d0:Int, n:Int) + (implicit numeric : Numeric[T]) = { + var i = 0 + while (i < n) { + fdata(i+d0) = numeric.toFloat(data(i+i0)); + i += 1 + } + } + + def copyListToFloatArray[T](a:List[T], b:Array[Float])(implicit numeric : Numeric[T]) = { + var i = 0; + var todo = a.iterator + while (i < a.length) { + val h = todo.next + b(i) = numeric.toFloat(h) + i += 1 + } + } + + def ibinsearch(v:Int, x:Array[Int], istartp:Int, iendp:Int):Int = { + var istart = istartp + var iend = iendp + while (iend - istart > 1) { + var mid:Int = (istart + iend)/2 + if (v < x(mid)) iend = mid else istart = mid + } + if (v == x(istart)) istart else -1 + } + + def binsearch[T : Ordering](v:T, x:Array[T], istartp:Int, iendp:Int):Int = { + var istart = istartp + var iend = iendp + while (iend - istart > 1) { + var mid:Int = (istart + iend)/2 + if (v < x(mid)) iend = mid else istart = mid + } + if (v == x(istart)) istart else -1 + } + + def lexsort[T :Ordering](a:List[Array[T]]):Array[Int] = { + val n = a(0).length + val ind = new Array[Int](n) + var i = 0; while(i < n) {ind(i) = i; i += 1} + def comp(i:Int, j:Int):Int = { + val alen = a.length; + val ip = ind(i) + val jp = ind(j) + var c0 = 0 + var k = 0; + while (k < alen && c0 == 0) { + c0 = a(k)(ip) compare a(k)(jp) + k += 1 + } + if (c0 != 0) { + c0 + } else { + ip compare jp + } + } + def swap(i:Int, j:Int):Unit = { + val tmp = ind(i) + ind(i) = ind(j) + ind(j) = tmp + } + BIDMat.Sorting.quickSort(comp, swap, 0, n) + ind + } + + def ilexsort(a:List[Array[Int]]):Array[Int] = { + val n = a(0).length + val ind = new Array[Int](n) + var i = 0; while(i < n) {ind(i) = i; i += 1} + def comp(i:Int, j:Int):Int = { + var k = 0; + val alen = a.length; + var c0 = 0 + val ip = ind(i) + val jp = ind(j) + while (k < alen && c0 == 0) { + c0 = a(k)(ip) compare a(k)(jp) + k += 1 + } + if (c0 != 0) { + c0 + } else { + ip compare jp + } + } + def swap(i:Int, j:Int):Unit = { + val tmp = ind(i) + ind(i) = ind(j) + ind(j) = tmp + } + BIDMat.Sorting.quickSort(comp, swap, 0, n) + ind + } + + def ilexsort2(a:Array[Int], b:Array[Int]):Array[Int] = { + val n = a.length + val ind = new Array[Int](n) + var i = 0; while(i < n) {ind(i) = i; i += 1} + def comp(i:Int, j:Int):Int = { + val c0 = a(i) compare a(j) + if (c0 != 0) { + c0 + } else { + val c1 = b(i) compare b(j) + if (c1 != 0) { + c1 + } else { + ind(i) compare ind(j) + } + } + } + def swap(i:Int, j:Int):Unit = { + val tmpa = a(i) + a(i) = a(j) + a(j) = tmpa + val tmpb = b(i) + b(i) = b(j) + b(j) = tmpb + val tmpi = ind(i) + ind(i) = ind(j) + ind(j) = tmpi + } + BIDMat.Sorting.quickSort(comp, swap, 0, n) + ind + } + + def ilexsort(args:Array[Int]*):Array[Int] = { + ilexsort(args.toList) + } + + def lexsort[T : Ordering](args:Array[T]*):Array[Int] = { + lexsort(args.toList) + } + +} diff --git a/src/main/scala/BIDMat/MatFunctions.scala b/src/main/scala/BIDMat/MatFunctions.scala new file mode 100755 index 00000000..36ec790a --- /dev/null +++ b/src/main/scala/BIDMat/MatFunctions.scala @@ -0,0 +1,657 @@ +package BIDMat + +import scala.compat.Platform._ +import edu.berkeley.bid.CBLAS._ +import edu.berkeley.bid.LAPACK._ +import scala.actors.Actor._ + +class IMatWildcard extends IMat(0,0,null) with MatrixWildcard + +object MatFunctions { + + var currentTimeWasThen:Long = 0 + + var lastFlops:Long = 0 + + def tic = { currentTimeWasThen = currentTime } + + def toc:Float = {(currentTime - currentTimeWasThen)/1000.0f} + + def flip = { lastFlops = Mat.nflops ; tic } + + def flop:(Float, Float) = { val t1 = toc; ( (Mat.nflops -lastFlops)/t1, t1 ) } + + def gflop:(Float, Float) = { val t1 = toc; ( (Mat.nflops -lastFlops)/t1/1e9f, t1 ) } + + def size(a:Mat):(Int, Int) = (a.nrows, a.ncols) + + def size(a:Mat, n:Int):Int = { + if (n == 1) { + a.nrows + } else if (n == 2) { + a.ncols + } else { + throw new RuntimeException("size arg must be 1 or 2") + } + } + + def length(a:DMat):Int = a.length + + def length(a:FMat):Int = a.length + + def length(a:IMat):Int = a.length + + def nnz(a:DMat):Int = a.nnz + + def nnz(a:FMat):Int = a.nnz + + def nnz(a:IMat):Int = a.nnz + + def nnz(a:SMat):Int = a.nnz + + def nnz(a:SDMat):Int = a.nnz + + implicit def flt2FMat(x:Float):FMat = row(x) + + implicit def dbl2FMat(x:Double):FMat = row(x) + + implicit def int2IMat(x:Int):IMat = irow(x) + +// implicit def dbl2CMat(x:Double):CMat = CMat.celem(x.asInstanceOf[Float],0) + + implicit def range2IMat(x:Range):IMat = irow(x) + + implicit def tuple2IMat(x:Tuple2[Int,Int]):IMat = irow(x._1 until x._2) + + implicit def fMat2DMat(x:FMat):DMat = { + val out = DMat(x.nrows, x.ncols) + Mat.copyToDoubleArray(x.data, 0, out.data, 0, x.length) + out + } + + implicit def iMat2FMat(x:IMat):FMat = { + val out = FMat(x.nrows, x.ncols) + Mat.copyToFloatArray(x.data, 0, out.data, 0, x.length) + out + } + + def recycleTry(a:Mat, nr:Int, nc:Int, b:FMat, nnz:Int):FMat = recycleTry(a, nr, nc, b:Mat, nnz).asInstanceOf[FMat] + + def recycleTry(a:Mat, nr:Int, nc:Int, b:DMat, nnz:Int):DMat = recycleTry(a, nr, nc, b:Mat, nnz).asInstanceOf[DMat] + + def recycleTry(a:Mat, nr:Int, nc:Int, b:IMat, nnz:Int):IMat = recycleTry(a, nr, nc, b:Mat, nnz).asInstanceOf[IMat] + + def recycleTry(a:Mat, nr:Int, nc:Int, b:SMat, nnz:Int):SMat = recycleTry(a, nr, nc, b:Mat, nnz).asInstanceOf[SMat] + + def recycleTry(a:Mat, nr:Int, nc:Int, b:SDMat, nnz:Int):SDMat = recycleTry(a, nr, nc, b:Mat, nnz).asInstanceOf[SDMat] + + def recycleTry(a:Mat, nr:Int, nc:Int, b:GMat, nnz:Int):GMat = recycleTry(a, nr, nc, b:Mat, nnz).asInstanceOf[GMat] + + def recycleTry(a:Mat, nr:Int, nc:Int, b:GIMat, nnz:Int):GIMat = recycleTry(a, nr, nc, b:Mat, nnz).asInstanceOf[GIMat] + + def recycleTry(a:Mat, nr:Int, nc:Int, b:GSMat, nnz:Int):GSMat = recycleTry(a, nr, nc, b:Mat, nnz).asInstanceOf[GSMat] + + def recycleTry(a:Mat, nr:Int, nc:Int, b:Mat, nnz:Int):Mat = { + if (a.asInstanceOf[AnyRef] == null || (a.nrows == 0 && a.ncols == 0)) { + b.zeros(nr, nc, nnz) + } else { + a.recycle(nr, nc, nnz) + } + } + + def recycleTry(a:Mat, b:FMat):FMat = recycleTry(a, b.nrows, b.ncols, b:Mat, b.nnz).asInstanceOf[FMat] + + def recycleTry(a:Mat, b:DMat):DMat = recycleTry(a, b.nrows, b.ncols, b:Mat, b.nnz).asInstanceOf[DMat] + + def recycleTry(a:Mat, b:IMat):IMat = recycleTry(a, b.nrows, b.ncols, b:Mat, b.nnz).asInstanceOf[IMat] + + def recycleTry(a:Mat, b:SMat):SMat = recycleTry(a, b.nrows, b.ncols, b:Mat, b.nnz).asInstanceOf[SMat] + + def recycleTry(a:Mat, b:SDMat):SDMat = recycleTry(a, b.nrows, b.ncols, b:Mat, b.nnz).asInstanceOf[SDMat] + + def recycleTry(a:Mat, b:GMat):GMat = recycleTry(a, b.nrows, b.ncols, b:Mat, b.nnz).asInstanceOf[GMat] + + def recycleTry(a:Mat, b:GIMat):GIMat = recycleTry(a, b.nrows, b.ncols, b:Mat, b.nnz).asInstanceOf[GIMat] + + def recycleTry(a:Mat, b:GSMat):GSMat = recycleTry(a, b.nrows, b.ncols, b:Mat, b.nnz).asInstanceOf[GSMat] + + def recycleTry(a:Mat, b:Mat):Mat = recycleTry(a, b.nrows, b.ncols, b, b.nnz) + + def recycleTry(a:Mat, b:FMat, c:FMat):FMat = + recycleTry(a, math.max(b.nrows, c.nrows), math.max(b.ncols, c.ncols), b:Mat, b.nnz).asInstanceOf[FMat]; + + def recycleTry(a:Mat, b:DMat, c:DMat):DMat = + recycleTry(a, math.max(b.nrows, c.nrows), math.max(b.ncols, c.ncols), b:Mat, b.nnz).asInstanceOf[DMat]; + + def recycleTry(a:Mat, b:IMat, c:IMat):IMat = + recycleTry(a, math.max(b.nrows, c.nrows), math.max(b.ncols, c.ncols), b:Mat, b.nnz).asInstanceOf[IMat]; + + def recycleTry(a:Mat, b:SMat, c:SMat):SMat = + recycleTry(a, math.max(b.nrows, c.nrows), math.max(b.ncols, c.ncols), b:Mat, b.nnz).asInstanceOf[SMat]; + + def recycleTry(a:Mat, b:SDMat, c:SDMat):SDMat = + recycleTry(a, math.max(b.nrows, c.nrows), math.max(b.ncols, c.ncols), b:Mat, b.nnz).asInstanceOf[SDMat]; + + def recycleTry(a:Mat, b:GMat, c:GMat):GMat = + recycleTry(a, math.max(b.nrows, c.nrows), math.max(b.ncols, c.ncols), b:Mat, b.nnz).asInstanceOf[GMat]; + + def recycleTry(a:Mat, b:GIMat, c:GIMat):GIMat = + recycleTry(a, math.max(b.nrows, c.nrows), math.max(b.ncols, c.ncols), b:Mat, b.nnz).asInstanceOf[GIMat]; + + def recycleTry(a:Mat, b:GSMat, c:GSMat):GSMat = + recycleTry(a, math.max(b.nrows, c.nrows), math.max(b.ncols, c.ncols), b:Mat, b.nnz).asInstanceOf[GSMat]; + + def recycleTry(a:Mat, b:Mat, c:Mat):Mat = recycleTry(a, math.max(b.nrows, c.nrows), math.max(b.ncols, c.ncols), b, b.nnz) + + def find(a:DMat) = a.find + def find2(a:DMat) = a.find2 + def find3(a:DMat) = a.find3 + def accum(inds:IMat, vals:DMat, nr:Int, nc:Int) = DMat(DenseMat.accum(inds, vals, nr, nc)) + def accum(inds:IMat, vals:DMat, nr:Int) = DMat(DenseMat.accum(inds, vals, nr, 1)) + def sort(a:DMat, ind:Int):DMat = DMat(DenseMat.sort(a, ind, true)) + def sort(a:DMat):DMat = DMat(DenseMat.sort(a, 0, true)) + def sort2(a:DMat):(DMat, IMat) = {val (d,i) = DenseMat.sort2(a, true); (DMat(d), i)} + def sort2(a:DMat,dir:Int):(DMat, IMat) = {val (d,i) = DenseMat.sort2(a, dir, true); (DMat(d), i)} + def sortdown(a:DMat, ind:Int):DMat = DMat(DenseMat.sort(a, ind, false)) + def sortdown(a:DMat):DMat = DMat(DenseMat.sort(a, 0, false)) + def sortdown2(a:DMat):(DMat, IMat) = {val (d,i) = DenseMat.sort2(a, false); (DMat(d), i)} + def sortdown2(a:DMat, dir:Int):(DMat, IMat) = {val (d,i) = DenseMat.sort2(a, dir, false); (DMat(d), i)} + def sortrows(a:DMat):(DMat, IMat) = { val ii = DenseMat.sortlex(a, true); (a(ii,?), ii) } + def sortrowsdown(a:DMat):(DMat, IMat) = { val ii = DenseMat.sortlex(a, false); (a(ii,?), ii) } + def sortlex(a:DMat):IMat = DenseMat.sortlex(a, true) + def sortlexdown(a:DMat):IMat = DenseMat.sortlex(a, false) + def uniquerows(a:DMat):(DMat, IMat, IMat) = { val (ii, jj) = DenseMat.uniquerows2(a) ; (a(ii,?), ii, jj)} + def unique(a:DMat):(DMat, IMat, IMat) = {val (ii, jj) = DenseMat.unique2(if (math.min(a.nrows,a.ncols)==1) a else a(?)) ; (a(ii,?), ii, jj)} + + def find(a:FMat) = a.find + def find2(a:FMat) = a.find2 + def find3(a:FMat) = a.find3 + def accum(inds:IMat, vals:FMat, nr:Int, nc:Int) = FMat(DenseMat.accum(inds, vals, nr, nc)) + def accum(inds:IMat, vals:FMat, nr:Int) = FMat(DenseMat.accum(inds, vals, nr, 1)) + def sort(a:FMat, ind:Int):FMat = FMat(DenseMat.sort(a, ind, true)) + def sort(a:FMat):FMat = FMat(DenseMat.sort(a, 0, true)) + def sort2(a:FMat):(FMat, IMat) = {val (d,i) = DenseMat.sort2(a, true); (FMat(d), i)} + def sort2(a:FMat,dir:Int):(FMat, IMat) = {val (d,i) = DenseMat.sort2(a, dir, true); (FMat(d), i)} + def sortdown(a:FMat, ind:Int):FMat = FMat(DenseMat.sort(a, ind, false)) + def sortdown(a:FMat):FMat = FMat(DenseMat.sort(a, 0, false)) + def sortdown2(a:FMat):(FMat, IMat) = {val (d,i) = DenseMat.sort2(a, false); (FMat(d), i)} + def sortdown2(a:FMat, dir:Int):(FMat, IMat) = {val (d,i) = DenseMat.sort2(a, dir, false); (FMat(d), i)} + def sortrows(a:FMat):(FMat, IMat) = { val ii = DenseMat.sortlex(a, true); (a(ii,?), ii) } + def sortrowsdown(a:FMat):(FMat, IMat) = { val ii = DenseMat.sortlex(a, false); (a(ii,?), ii) } + def sortlex(a:FMat):IMat = DenseMat.sortlex(a, true) + def sortlexdown(a:FMat):IMat = DenseMat.sortlex(a, false) + def uniquerows(a:FMat):(FMat, IMat, IMat) = { val (ii, jj) = DenseMat.uniquerows2(a) ; (a(ii,?), ii, jj)} + def unique(a:FMat):(FMat, IMat, IMat) = {val (ii, jj) = DenseMat.unique2(if (math.min(a.nrows,a.ncols)==1) a else a(?)) ; (a(ii,?), ii, jj)} + + def find(a:IMat) = a.find + def find2(a:IMat) = a.find2 + def find3(a:IMat) = a.find3 + def accum(inds:IMat, vals:IMat, nr:Int, nc:Int) = IMat(DenseMat.accum(inds, vals, nr, nc)) + def accum(inds:IMat, vals:IMat, nr:Int) = IMat(DenseMat.accum(inds, vals, nr, 1)) + def sort(a:IMat, ind:Int):IMat = IMat(DenseMat.sort(a, ind, true)) + def sort(a:IMat):IMat = IMat(DenseMat.sort(a, 0, true)) + def sort2(a:IMat):(IMat, IMat) = {val (d,i) = DenseMat.sort2(a, true); (IMat(d), i)} + def sort2(a:IMat,dir:Int):(IMat, IMat) = {val (d,i) = DenseMat.sort2(a, dir, true); (IMat(d), i)} + def sortdown(a:IMat, ind:Int):IMat = IMat(DenseMat.sort(a, ind, false)) + def sortdown(a:IMat):IMat = IMat(DenseMat.sort(a, 0, false)) + def sortdown2(a:IMat):(IMat, IMat) = {val (d,i) = DenseMat.sort2(a, false); (IMat(d), i)} + def sortdown2(a:IMat, dir:Int):(IMat, IMat) = {val (d,i) = DenseMat.sort2(a, dir, false); (IMat(d), i)} + def sortrows(a:IMat):(IMat, IMat) = { val ii = DenseMat.sortlex(a, true); (a(ii,?), ii) } + def sortrowsdown(a:IMat):(IMat, IMat) = { val ii = DenseMat.sortlex(a, false); (a(ii,?), ii) } + def sortlex(a:IMat):IMat = DenseMat.sortlex[Int](a, true) + def sortlexdown(a:IMat):IMat = DenseMat.sortlex(a, false) + def uniquerows(a:IMat):(IMat, IMat, IMat) = { val (ii, jj) = DenseMat.uniquerows2(a) ; (a(ii,?), ii, jj)} + def unique(a:IMat):(IMat, IMat, IMat) = {val (ii, jj) = DenseMat.unique2(if (math.min(a.nrows,a.ncols)==1) a else a(?)) ; (a(ii,?), ii, jj)} + + def find(a:CSMat) = a.find + def find2(a:CSMat) = a.find2 + def find3(a:CSMat) = a.find3 + def sort(a:CSMat, ind:Int):CSMat = CSMat(DenseMat.sort(a, ind, true)) + def sort(a:CSMat):CSMat = CSMat(DenseMat.sort(a, 0, true)) + def sort2(a:CSMat):(CSMat, IMat) = {val (d,i) = DenseMat.sort2(a, true); (CSMat(d), i)} + def sortdown(a:CSMat, ind:Int):CSMat = CSMat(DenseMat.sort(a, ind, false)) + def sortdown(a:CSMat):CSMat = CSMat(DenseMat.sort(a, 0, false)) + def sortdown2(a:CSMat):(CSMat, IMat) = {val (d,i) = DenseMat.sort2(a, false); (CSMat(d), i)} + def sortrows(a:CSMat):(CSMat, IMat) = { val ii = DenseMat.sortlex(a, true); (a(ii,?), ii) } + def sortrowsdown(a:CSMat):(CSMat, IMat) = { val ii = DenseMat.sortlex(a, false); (a(ii,?), ii) } + def sortlex(a:CSMat):IMat = DenseMat.sortlex(a, true) + def sortlexdown(a:CSMat):IMat = DenseMat.sortlex(a, false) + def uniquerows(a:CSMat):(CSMat, IMat, IMat) = { val (ii, jj) = DenseMat.uniquerows2(a) ; (a(ii,?), ii, jj)} + + def find(a:SDMat) = a.find + def find2(a:SDMat) = a.find2 + def find3(a:SDMat) = a.find3 + + def find(a:SMat) = a.find + def find2(a:SMat) = a.find2 + def find3(a:SMat) = a.find3 + + def invperm(a:IMat):IMat = { + val out = IMat(a.nrows, a.ncols) + var nrows = a.nrows + var ncols = a.ncols + if (a.nrows == 1) { + ncols = 1 + nrows = a.ncols + } + for (i <- 0 until ncols) { + val ioff = i*nrows + for (i<-0 until nrows) { + out.data(a.data(i + ioff) + ioff) = i + } + } + out + } + + def drow(x:Array[Double]):DMat = { + val mat = DMat(1,x.length) + System.arraycopy(x, 0, mat.data, 0, x.length) + mat + } + + def drow(x:List[Double]):DMat = { + val mat = DMat(1,x.length) + x.copyToArray(mat.data) + mat + } + + def drow(args:Double*):DMat = drow(args.toArray) + + def drow(x:Range):DMat = { + val mat = DMat(1,x.length) + for (i <- 0 until x.length) + mat.data(i) = x(i) + mat + } + + def dcol(x:Range):DMat = { + val mat = DMat(x.length,1) + for (i <- 0 until x.length) + mat.data(i) = x(i) + mat + } + + def dcol(x:List[Double]):DMat = { + val mat = DMat(x.length,1) + x.copyToArray(mat.data) + mat + } + + def dcol(args:Double*):DMat = { + dcol(args.toList) + } + + def dzeros(nr:Int, nc:Int):DMat = { + DMat(nr,nc) + } + + def dones(nr:Int, nc:Int):DMat = { + val out = DMat(nr,nc) + var i = 0 + while (i < out.length) { + out.data(i) = 1 + i += 1 + } + out + } + + def row(x:Array[Float]):FMat = { + val mat = FMat(1,x.length) + System.arraycopy(x, 0, mat.data, 0, x.length) + mat + } + + def row(x:Array[Double]):FMat = { + val mat = FMat(1,x.length) + Mat.copyToFloatArray(x, 0, mat.data, 0, x.length) + mat + } + + def row(x:Array[Int]):FMat = { + val mat = FMat(1,x.length) + Mat.copyToFloatArray(x, 0, mat.data, 0, x.length) + mat + } + + def row[T](x:List[T])(implicit numeric : Numeric[T]):FMat = { + val mat = FMat(1, x.length) + Mat.copyListToFloatArray(x, mat.data) + mat + } + + def row[T](x:T*)(implicit numeric : Numeric[T]):FMat = row(x.toList) + + def row(x:Range):FMat = { + val mat = FMat(1,x.length) + for (i <- 0 until x.length) + mat.data(i) = x(i) + mat + } + + def col(x:Array[Float]):FMat = { + val mat = FMat(x.length, 1) + System.arraycopy(x, 0, mat.data, 0, x.length) + mat + } + + def col(x:Array[Double]):FMat = { + val mat = FMat(x.length, 1) + Mat.copyToFloatArray(x, 0, mat.data, 0, x.length) + mat + } + + def col(x:Array[Int]):FMat = { + val mat = FMat(x.length, 1) + Mat.copyToFloatArray(x, 0, mat.data, 0, x.length) + mat + } + + def col[T](x:List[T])(implicit numeric : Numeric[T]):FMat = { + val mat = FMat(x.length, 1) + Mat.copyListToFloatArray(x, mat.data) + mat + } + + def col[T](x:T*)(implicit numeric : Numeric[T]):FMat = col(x.toList) + + def col(x:Range):FMat = { + val mat = FMat(x.length,1) + for (i <- 0 until x.length) + mat.data(i) = x(i) + mat + } + + def zeros(nr:Int, nc:Int):FMat = FMat(nr,nc) + + def ones(nr:Int, nc:Int):FMat = { + val out = FMat(nr,nc) + var i = 0 + while (i < out.length) { + out.data(i) = 1 + i += 1 + } + out + } + + def irow(x:Range):IMat = { + val mat = IMat(1,x.length) + for (i <- 0 until x.length) + mat.data(i) = x(i) + mat + } + + def irow(x:Tuple2[Int,Int]):IMat = irow(x._1 until x._2) + + def irow(x:Array[Int]):IMat = { + val mat = IMat(1,x.length) + System.arraycopy(x, 0, mat.data, 0, x.length) + mat + } + + def irow(x:List[Int]):IMat = { + val mat = IMat(1,x.length) + x.copyToArray(mat.data) + mat + } + + def irow(args:Int*):IMat = { + irow(args.toList) + } + + def icol(x:Range):IMat = { + val mat = IMat(x.length,1) + for (i <- 0 until x.length) + mat.data(i) = x(i) + mat + } + + def icol(x:Tuple2[Int,Int]):IMat = icol(x._1 until x._2) + + def icol(x:List[Int]):IMat = { + val mat = IMat(x.length,1) + x.copyToArray(mat.data) + mat + } + + def icol(args:Int*):IMat = { + icol(args.toList) + } + + def izeros(nr:Int, nc:Int):IMat = { + IMat(nr,nc) + } + + def iones(nr:Int, nc:Int):IMat = { + val out = IMat(nr,nc) + var i = 0 + while (i < out.length) { + out.data(i) = 1 + i += 1 + } + out + } + + def crow(x:List[String]):CSMat = { + val mat = CSMat(1, x.length) + x.copyToArray(mat.data) + mat + } + + def crow(args:String*):CSMat = { + crow(args.toList) + } + + def ccol(x:List[String]):CSMat = { + val mat = CSMat(x.length,1) + x.copyToArray(mat.data) + mat + } + + def ccol(args:String*):CSMat = { + ccol(args.toList) + } + + def blank = new Mat(0,0) + + def fblank = new FMat(0,0,null) + + def dblank = new DMat(0,0,null) + + def cblank = new CMat(0,0,null) + + def iblank = new IMat(0,0,null) + + def sblank = new SMat(0,0,0,null,null,null) + + def sdblank = new SDMat(0,0,0,null,null,null) + + def gblank = new GMat(0,0,null,0) + + def giblank = new GIMat(0,0,null,0) + + def gsblank = new GSMat(0,0,0,null,null,null,0) + + + def sparse(a:DMat):SDMat = { + val (ii, jj, vv) = a.find3 + val out = SDMat(a.nrows, a.ncols, ii.nrows) + var i = 0 + val ioff = Mat.ioneBased + while (i < ii.nrows) {out.ir(i) = ii.data(i) + ioff; i+= 1} + SparseMat.compressInds(jj.data, a.ncols, out.jc, a.nnz) + System.arraycopy(vv.data, 0, out.data, 0, ii.nrows) + out + } + + def sparse(a:FMat):SMat = { + val (ii, jj, vv) = a.find3 + val out = SMat(a.nrows, a.ncols, ii.nrows) + var i = 0 + val ioff = Mat.ioneBased + while (i < ii.nrows) {out.ir(i) = ii.data(i) + ioff; i+= 1} + SparseMat.compressInds(jj.data, a.ncols, out.jc, a.nnz) + System.arraycopy(vv.data, 0, out.data, 0, ii.nrows) + out + } + + def sparse(ii:IMat, jj:IMat, vv:DMat, nr:Int, nc:Int):SDMat = { + SDMat(SparseMat.sparseImpl[Double](ii.data, jj.data, vv.data, nr, nc)) + } + + def _maxi(a:IMat) = a.iiReduceOp(0, (x:Int) => x, (x:Int, y:Int) => math.max(x,y), null) + + def sparse(ii:IMat, jj:IMat, vv:DMat):SDMat = { + SDMat(SparseMat.sparseImpl[Double](ii.data, jj.data, vv.data, _maxi(ii).v+1, _maxi(jj).v+1)) + } + + def sparse(ii:IMat, jj:IMat, vv:FMat, nr:Int, nc:Int):SMat = { + SMat(SparseMat.sparseImpl[Float](ii.data, jj.data, vv.data, nr, nc)) + } + + def sparse(ii:IMat, jj:IMat, vv:FMat):SMat = { + SMat(SparseMat.sparseImpl[Float](ii.data, jj.data, vv.data, _maxi(ii).v+1, _maxi(jj).v+1)) + } + + def full(a:DMat):DMat = a + + def full(a:FMat):FMat = a + + def full(sd:SDMat):DMat = DMat(sd.full) + + def full(ss:SMat):FMat = FMat(ss.full) + + def full(a:Mat):Mat = a match { + case aa:DMat => a + case aa:FMat => a + case aa:IMat => a + case aa:SMat => full(aa):FMat + case aa:SDMat => full(aa):DMat + } + + def DDShelper(a:FMat, b:FMat, c:SMat, out:SMat, istart:Int, iend:Int, ioff:Int) = { + var i = istart + while (i < iend) { + var j = c.jc(i)-ioff + while (j < c.jc(i+1)-ioff) { + var dsum = 0.0f + val a0 = (c.ir(j)-ioff)*a.nrows + val b0 = i*a.nrows + if (Mat.noMKL || a.nrows < 256) { + var k = 0 + while (k < a.nrows) { + dsum += a.data(k + a0) * b.data(k + b0) + k += 1 + } + } else { + dsum = sdotxx(a.nrows, a.data, a0, b.data, b0) + } + out.data(j) = dsum + out.ir(j) = c.ir(j) + j += 1 + } + out.jc(i+1) = c.jc(i+1) + i += 1 + } + } + + def DDS(a:FMat,b:FMat,c:SMat,omat:Mat):SMat = { + if (a.nrows != b.nrows) { + throw new RuntimeException("nrows of dense A and B must match") + } else if (c.nrows != a.ncols || c.ncols != b.ncols) { + throw new RuntimeException("dims of C must match A'*B") + } else { + val out = SMat.newOrCheckSMat(c, omat) + Mat.nflops += 2L * c.nnz * a.nrows + val ioff = Mat.ioneBased + out.jc(0) = ioff + if (c.nnz > 100000 && Mat.numThreads > 1) { + val done = IMat(1,Mat.numThreads) + for (i <- 0 until Mat.numThreads) { + actor { + val istart = i*c.ncols/Mat.numThreads + val iend = (i+1)*c.ncols/Mat.numThreads + DDShelper(a, b, c, out, istart, iend, ioff) + done(i) = 1 + } + } + while (SciFunctions.sum(done).v < Mat.numThreads) {Thread.`yield`()} + } else { + DDShelper(a, b, c, out, 0, c.ncols, ioff) + } + out + } + } + + def DDS(a:GMat,b:GMat,c:GSMat,omat:Mat):GSMat = GMat.DDS(a,b,c,omat) + + def DDS(a:Mat, b:Mat, c:Mat, omat:Mat=null):Mat = { + (a, b, c) match { + case (a:FMat, b:FMat, c:SMat) => DDS(a, b, c, omat):SMat + case (a:GMat, b:GMat, c:GSMat) => GMat.DDS(a, b, c, omat):GSMat + } + } + + def DDSQ(a:FMat,b:FMat,c:SMat, veps:Float):SMat = { + if (a.nrows != b.nrows) { + throw new RuntimeException("nrows of dense A and B must match") + } else if (c.nrows != a.ncols || c.ncols != b.ncols) { + throw new RuntimeException("dims of C must match A'*B") + } else { + val out = SMat(c.nrows,c.ncols,c.nnz) + Mat.nflops += c.nnz * a.nrows + val ioff = Mat.ioneBased + var i = 0 + out.jc(0) = ioff + while (i < c.ncols) { + var j = c.jc(i)-ioff + while (j < c.jc(i+1)-ioff) { + var dsum = 0.0f + var k = 0 + val a0 = (c.ir(j)-ioff)*a.nrows + val b0 = i*a.nrows + if (Mat.noMKL) { + while (k < a.nrows) { + dsum += a.data(k + a0) * b.data(k + b0) + k += 1 + } + } else { + dsum = sdotxx(a.nrows, a.data, a0, b.data, b0) + } + out.data(j) = dsum / math.max(veps, dsum) + out.ir(j) = c.ir(j) + j += 1 + } + out.jc(i+1) = c.jc(i+1) + i += 1 + } + out + } + } + + def mkdiag(a:DMat) = DMat(a.mkdiag) + def mkdiag(a:FMat) = FMat(a.mkdiag) + def mkdiag(a:IMat) = IMat(a.mkdiag) + def mkdiag(a:CMat) = CMat(a.mkdiag) + + def getdiag(a:DMat) = DMat(a.getdiag) + def getdiag(a:FMat) = FMat(a.getdiag) + def getdiag(a:IMat) = IMat(a.getdiag) + def getdiag(a:CMat) = CMat(a.getdiag) + + def load[T](fname:String, vname:String):T = MatHDF5.hload(fname, vname).asInstanceOf[T] + + def load[A,B](fname:String, v1:String, v2:String):(A,B) = { + val a = MatHDF5.hload(fname, List(v1, v2)); + (a(0).asInstanceOf[A], a(1).asInstanceOf[B]) + } + + def loadx(fname:String, vnames:String*):List[AnyRef] = MatHDF5.hload(fname, vnames.toList) + + def saveAsHDF5(fname:String, args:AnyRef*) = MatHDF5.hsaveAsHDF5(fname, args.toList) + + def saveAs(fname:String, args:AnyRef*) = MatHDF5.hsaveAs(fname, args.toList) + + final val ? = new IMatWildcard +} + + diff --git a/src/main/scala/BIDMat/MatHDF5.scala b/src/main/scala/BIDMat/MatHDF5.scala new file mode 100755 index 00000000..c6fe3f0a --- /dev/null +++ b/src/main/scala/BIDMat/MatHDF5.scala @@ -0,0 +1,510 @@ +package BIDMat +import ncsa.hdf.hdf5lib.structs._ +import ncsa.hdf.hdf5lib.H5._ +import ncsa.hdf.hdf5lib.HDF5Constants._ + +object MatHDF5 { + var refcount:Long = -1 + + def setCompressionPlist(dplist_id:Int, dims:Array[Long]) = { + if (Mat.compressType > 0) { + if (dims.length == 1) { + if (dims(0) > 1024) { + val cdims = new Array[Long](1) + cdims(0) = math.max(1, math.min(dims(0), Mat.chunkSize)) + H5Pset_chunk(dplist_id, 1, cdims) + if (Mat.compressType == 1) { + H5Pset_deflate(dplist_id, Mat.compressionLevel) + } else { + H5Pset_szip(dplist_id, H5_SZIP_EC_OPTION_MASK, Mat.szipBlock) + } + } + } else { + if (dims(0)*dims(1) > 1024) { + val cdims = new Array[Long](2) + cdims(0) = math.max(1, math.min(dims(0), 1+Mat.chunkSize/dims(1))) + cdims(1) = math.max(1, dims(1)) + if (Mat.compressType == 1) { + H5Pset_deflate(dplist_id, Mat.compressionLevel) + } else { + H5Pset_szip(dplist_id, H5_SZIP_EC_OPTION_MASK, Mat.szipBlock) + } + } + } + } + } + + def getStringAttr(id:Int, obj_name:String, attr_name:String):String = { + val attr_id = H5Aopen_by_name(id, obj_name, attr_name, H5P_DEFAULT, H5P_DEFAULT) + val attr_type_id = H5Aget_type(attr_id) + val attr_type_size = H5Tget_size(attr_type_id) + val sbuf = new Array[Byte](attr_type_size + 1) + H5Aread(attr_id, attr_type_id, sbuf) + H5Tclose(attr_type_id) + H5Aclose(attr_id) + new String(sbuf).trim() + } + + def putStringAttr(id:Int, attr_name:String, attr_val:String) = { + val space_id = H5Screate(H5S_SCALAR) + val memtype_id = H5Tcopy(H5T_FORTRAN_S1) + H5Tset_size(memtype_id, attr_val.length()) + val attr_id = H5Acreate(id, attr_name, memtype_id, space_id, H5P_DEFAULT, H5P_DEFAULT) + H5Awrite(attr_id, memtype_id, attr_val.getBytes()) + H5Tclose(memtype_id) + H5Aclose(attr_id) + H5Sclose(space_id) + } + + def getLongAttr(id:Int, obj_name:String, attr_name:String):Long = { + val attr_id = H5Aopen_by_name(id, obj_name, attr_name, H5P_DEFAULT, H5P_DEFAULT) + val attr_type_id = H5Aget_type(attr_id) + val attr_type_size = H5Tget_size(attr_type_id) + val sbuf = new Array[Long](attr_type_size/8) + H5Aread(attr_id, attr_type_id, sbuf) + H5Tclose(attr_type_id) + H5Aclose(attr_id) + sbuf(0) + } + + def putIntAttr(id:Int, attr_name:String, attr_val:Int) = { + val space_id = H5Screate(H5S_SCALAR) + val attr_id = H5Acreate(id, attr_name, H5T_NATIVE_INT, space_id, H5P_DEFAULT, H5P_DEFAULT) + val lbuf = Array[Int](1) + lbuf(0) = attr_val + H5Awrite(attr_id, H5T_NATIVE_INT, lbuf) + H5Aclose(attr_id) + H5Sclose(space_id) + } + + def putLongAttr(id:Int, attr_name:String, attr_val:Long) = { + val space_id = H5Screate(H5S_SCALAR) + val attr_id = H5Acreate(id, attr_name, H5T_NATIVE_LLONG, space_id, H5P_DEFAULT, H5P_DEFAULT) + val lbuf = Array[Long](1) + lbuf(0) = attr_val + H5Awrite(attr_id, H5T_NATIVE_LLONG, lbuf) + H5Aclose(attr_id) + H5Sclose(space_id) + } + + def putByteAttr(id:Int, attr_name:String, attr_val:Byte) = { + val space_id = H5Screate(H5S_SCALAR) + val attr_id = H5Acreate(id, attr_name, H5T_NATIVE_UCHAR, space_id, H5P_DEFAULT, H5P_DEFAULT) + val lbuf = Array[Byte](1) + lbuf(0) = attr_val + H5Awrite(attr_id, H5T_NATIVE_UCHAR, lbuf) + H5Aclose(attr_id) + H5Sclose(space_id) + } + + def getMatDims(data_id:Int):Array[Long] = { + val space_id = H5Dget_space(data_id) + val dims = new Array[Long](2) + val ok = H5Sget_simple_extent_dims(space_id, dims, null) + H5Sclose(space_id) + dims + } + + def readMatDims(fname:String, varname:String):(Long, Long) = { + val fid = H5Fopen(fname,H5F_ACC_RDONLY,H5P_DEFAULT) + val data_id = H5Dopen(fid, varname, H5P_DEFAULT) + val dims = getMatDims(data_id) + H5Dclose(data_id) + H5Fclose(fid) + if (dims(1) == 0) { + (dims(0), dims(1)) + } else { + (dims(1), dims(0)) + } + } + + def getDenseMat[T : ClassManifest](fid:Int, varname:String, h5class:Int, dsize:Int):DenseMat[T] = { + val data_id = H5Dopen(fid, varname, H5P_DEFAULT) + val data_type_id = H5Dget_type(data_id) + val data_class = H5Tget_class(data_type_id) + val data_size = H5Tget_size(data_type_id) + val dims = getMatDims(data_id) + var mdata:DenseMat[T] = null + if (data_class == h5class && data_size == dsize) { + mdata = new DenseMat[T](dims(1).intValue, dims(0).intValue) + H5Dread(data_id, data_type_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, mdata.data) + } else { + throw new RuntimeException("Bad class or data size "+data_class+" "+data_size) + } + H5Tclose(data_type_id) + H5Dclose(data_id) + mdata + } + + def getCellMat(fid:Int, varname:String):CSMat = { + val data_id = H5Dopen(fid, varname, H5P_DEFAULT) + val data_type_id = H5Dget_type(data_id) + val data_class = H5Tget_class(data_type_id) + val data_size = H5Tget_size(data_type_id) + val dims = getMatDims(data_id) + var mdata:CSMat = null + mdata = CSMat(dims(1).intValue, dims(0).intValue) + val bdata = new Array[Array[Byte]]((dims(0)*dims(1)).intValue) + for (i <- 0 until bdata.length) { + bdata(i) = new Array[Byte](data_size) + } + H5Dread(data_id, data_type_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, bdata) + val obj_type_out = new Array[Int](1) + obj_type_out(0) = 1 + for (i <- 0 until bdata.length) { + val item_id = H5Rdereference(fid, H5R_OBJECT, bdata(i)) + mdata.data(i) = getMat(item_id, ".").asInstanceOf[String] + H5Oclose(item_id) + } + H5Tclose(data_type_id) + H5Dclose(data_id) + mdata + } + + def getMatString(fid:Int, varname:String):String = { + val data_id = H5Dopen(fid, varname, H5P_DEFAULT) + val data_type_id = H5Dget_type(data_id) + val data_class = H5Tget_class(data_type_id) + val data_size = H5Tget_size(data_type_id) + val dims = getMatDims(data_id) + val nrows = dims(0).intValue + val ncols = dims(1).intValue + val sbuf = new Array[Byte](data_size*nrows*ncols) + H5Dread(data_id, data_type_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, sbuf) + H5Tclose(data_type_id) + H5Dclose(data_id) + new String(sbuf, "UTF_16LE").trim() + } + + def getSparseMat[T](fid:Int, varname:String)(implicit manifest:Manifest[T], numeric:Numeric[T]):SparseMat[T] = { + val nrows = getLongAttr(fid, varname, "MATLAB_sparse").intValue + val jc_id = H5Dopen(fid, varname+"/jc", H5P_DEFAULT) + val ncols = getMatDims(jc_id)(0).intValue - 1 + val data_id = H5Dopen(fid, varname+"/data", H5P_DEFAULT) + val data_type_id = H5Dget_type(data_id) + val nnz = getMatDims(data_id)(0).intValue + var ir_id = -1 + try { + ir_id = H5Dopen(fid, varname+"/ir", H5P_DEFAULT) + } catch { + case _ => {} + } + val sdata = if (ir_id >= 0) { + SparseMat(nrows, ncols, nnz) + } else { + SparseMat.noRows(nrows, ncols, nnz) + } + val convert_ints = H5Tcopy(H5T_NATIVE_INT) + H5Dread_int(jc_id, convert_ints, H5S_ALL, H5S_ALL, H5P_DEFAULT, sdata.jc) + addOne(sdata.jc) + H5Dclose(jc_id) + if (ir_id >= 0) { + H5Dread_int(ir_id, convert_ints, H5S_ALL, H5S_ALL, H5P_DEFAULT, sdata.ir) + addOne(sdata.ir) + H5Dclose(ir_id) + } + H5Tclose(convert_ints) + H5Dread(data_id, data_type_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, sdata.data) + H5Tclose(data_type_id) + H5Dclose(data_id) + sdata + } + + def getMat(fid:Int, varname:String):AnyRef = { + if (fid > 0 && H5Aexists_by_name(fid, varname, "MATLAB_class", H5P_DEFAULT)) { + val attr_class = getStringAttr(fid, varname, "MATLAB_class") + if (attr_class.equals("double")) { + if (H5Aexists_by_name(fid, varname, "MATLAB_sparse", H5P_DEFAULT)) { + SDMat(getSparseMat[Double](fid, varname)) + } else { + DMat(getDenseMat[Double](fid, varname, H5T_FLOAT, 8)) + } + } else if (attr_class.equals("single")) { + if (H5Aexists_by_name(fid, varname, "MATLAB_sparse", H5P_DEFAULT)) { + SMat(getSparseMat[Float](fid, varname)) + } else { + FMat(getDenseMat[Float](fid, varname, H5T_FLOAT, 4)) + } + } else if (attr_class.equals("int32")) { + if (H5Aexists_by_name(fid, varname, "MATLAB_sparse", H5P_DEFAULT)) { + throw new RuntimeException("Sparse arrays of ints unsupported") + } else { + IMat(getDenseMat[Int](fid, varname, H5T_INTEGER, 4)) + } + } else if (attr_class.equals("int8")) { + if (H5Aexists_by_name(fid, varname, "MATLAB_sparse", H5P_DEFAULT)) { + BMat(getSparseMat[Byte](fid, varname)) + } else { + throw new RuntimeException("Dense arrays of bytes unsupported") + } + } else if (attr_class.equals("char")) { + if (H5Aexists_by_name(fid, varname, "MATLAB_sparse", H5P_DEFAULT)) { + throw new RuntimeException("Sparse arrays of char unsupported") + } else { + getMatString(fid, varname) + } + } else if (attr_class.equals("cell")) { + if (H5Aexists_by_name(fid, varname, "MATLAB_sparse", H5P_DEFAULT)) { + throw new RuntimeException("Sparse cell arrays unsupported") + } else { + getCellMat(fid, varname) + } + } else throw new RuntimeException("Couldnt read storage class "+attr_class) + } else throw new RuntimeException("Couldnt find matlab var named "+varname) + } + + def writeMatHeader(fname:String) = { + val ff = new java.io.RandomAccessFile(fname,"rws") + val sp = new scala.sys.SystemProperties() + val hstring = "MATLAB 7.3 MAT-file, Platform: "+sp.get("os.arch").get+" "+sp.get("os.name").get+" "+sp.get("os.version").get+ " "+ + "Created by BIDMat on "+(new java.text.SimpleDateFormat("EEE MMM d HH:mm:ss yyyy")).format(new java.util.Date())+ + " HDF5 Schema 1.0 ." + val hb = hstring.getBytes() + val hbytes = new Array[Byte](512) + for (i <- 0 until 116) hbytes(i) = 32 + System.arraycopy(hb, 0, hbytes, 0, math.min(hstring.length(), 116)) + val version:Byte = 2 + hbytes(125) = version + hbytes(126) = 0x49 + hbytes(127) = 0x4D + ff.write(hbytes) + // ff.write(emptyHDF5file) + ff.close() + } + + def putDenseMat[T](fid:Int, a:DenseMat[T], aname:String, h5class:Int, matclass:String):Array[Byte] = { + val dims = new Array[Long](2) + dims(0) = a.ncols + dims(1) = a.nrows + val filespace_id = H5Screate_simple(2, dims, null) + val dplist_id = H5Pcreate(H5P_DATASET_CREATE) +// setCompressionPlist(dplist_id, dims) + val dataset_id = H5Dcreate(fid, "/"+aname, h5class, filespace_id, H5P_DEFAULT, dplist_id, H5P_DEFAULT) + H5Dwrite(dataset_id, h5class, H5S_ALL, H5S_ALL, H5P_DEFAULT, a.data) + H5Pclose(dplist_id) + putStringAttr(dataset_id, "MATLAB_class", matclass) + val ref = H5Rcreate(dataset_id, ".", H5R_OBJECT, -1) + H5Dclose(dataset_id) + H5Sclose(filespace_id) + ref + } + + def putEmptyRef(id:Int):Array[Byte] = { + val dims = new Array[Long](1) + dims(0) = 2 + val tmp = Array[Long](2) + val dmatspace_id = H5Screate_simple(1, dims, null) + val dmat_id = H5Dcreate(id, "0", H5T_NATIVE_ULLONG, dmatspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT) + H5Dwrite(dmat_id, H5T_NATIVE_ULLONG, H5S_ALL, H5S_ALL, H5P_DEFAULT, tmp) + putStringAttr(dmat_id, "MATLAB_class", "canonical empty") + putByteAttr(dmat_id, "MATLAB_empty", 1) + val ref = H5Rcreate(dmat_id, ".", H5R_OBJECT, -1) + H5Dclose(dmat_id) + H5Sclose(dmatspace_id) + ref + } + + def putCellMat(fid:Int, varname:String, a:CSMat) = { + var group_id = 0 + if (refcount < 0) { + group_id = H5Gcreate(fid, "/#refs#", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT) + putEmptyRef(group_id) + refcount = 1 + } else { + group_id = H5Gopen(fid, "/#refs#", H5P_DEFAULT) + } + val refIds = new Array[Array[Byte]](a.length) + for (i <- 0 until a.length) { + val newname = "%x" format refcount + refcount += 1 + refIds(i) = putMat(group_id, a.data(i), newname) + } + val dims = new Array[Long](2) + dims(0) = a.ncols + dims(1) = a.nrows + val dplist_id = H5Pcreate(H5P_DATASET_CREATE) + setCompressionPlist(dplist_id, dims) + val refspace_id = H5Screate_simple(2, dims, null) + val refs_id = H5Dcreate(fid, varname, H5T_STD_REF_OBJ, refspace_id, H5P_DEFAULT, dplist_id, H5P_DEFAULT) + H5Dwrite(refs_id, H5T_STD_REF_OBJ, H5S_ALL, H5S_ALL, H5P_DEFAULT, refIds) + putStringAttr(refs_id, "MATLAB_class", "cell") + val ref = H5Rcreate(refs_id, ".", H5R_OBJECT, -1) + H5Dclose(refs_id) + H5Sclose(refspace_id) + H5Pclose(dplist_id) + H5Gclose(group_id) + ref + } + + def putSparseMat[T](fid:Int, a:SparseMat[T], varname:String, nativeClass:Int, className:String):Array[Byte] = { + val dims = new Array[Long](1) + val group_id = H5Gcreate(fid, "/"+varname, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT) + putStringAttr(group_id, "MATLAB_class", className) + putLongAttr(group_id, "MATLAB_sparse", a.nrows) + val convert_ints = H5Tcopy(H5T_NATIVE_INT) + dims(0) = a.ncols + 1 + var dplist_id = H5Pcreate(H5P_DATASET_CREATE) + setCompressionPlist(dplist_id, dims) + val jcs_id = H5Screate_simple(1, dims, null) + val jc_id = H5Dcreate(group_id, "jc", H5T_NATIVE_LLONG, jcs_id, H5P_DEFAULT, dplist_id, H5P_DEFAULT) + subOne(a.jc) + try { + H5Dwrite(jc_id, convert_ints, H5S_ALL, H5S_ALL, H5P_DEFAULT, a.jc) + } catch { + case e => { + addOne(a.jc) + throw new RuntimeException("Error writing sparse mat "+e) + } + } + addOne(a.jc) + H5Dclose(jc_id) + H5Sclose(jcs_id) + H5Pclose(dplist_id) + + dims(0) = a.nnz + dplist_id = H5Pcreate(H5P_DATASET_CREATE) + setCompressionPlist(dplist_id, dims) + if (a.ir != null) { + val irs_id = H5Screate_simple(1, dims, null) + val ir_id = H5Dcreate(group_id, "ir", H5T_NATIVE_LLONG, irs_id, H5P_DEFAULT, dplist_id, H5P_DEFAULT) + subOne(a.ir) + try { + H5Dwrite(ir_id, convert_ints, H5S_ALL, H5S_ALL, H5P_DEFAULT, a.ir) + } catch { + case e => { + addOne(a.ir) + throw new RuntimeException("Error writing sparse mat "+e) + } + } + addOne(a.ir) + H5Dclose(ir_id) + H5Sclose(irs_id) + } + + val dataspace_id = H5Screate_simple(1, dims, null) + val data_id = H5Dcreate(group_id, "data", nativeClass, dataspace_id, H5P_DEFAULT, dplist_id, H5P_DEFAULT) + H5Dwrite(data_id, nativeClass, H5S_ALL, H5S_ALL, H5P_DEFAULT, a.data) + H5Dclose(data_id) + H5Sclose(dataspace_id) + H5Pclose(dplist_id) + H5Tclose(convert_ints) + val ref = H5Rcreate(group_id, ".", H5R_OBJECT, -1) + H5Gclose(group_id) + ref + } + + def putMatString(id:Int, varname:String, str:String):Array[Byte] = { + val dims = new Array[Long](2) + dims(0) = str.length + dims(1) = 1 + val dplist_id = H5Pcreate(H5P_DATASET_CREATE) + setCompressionPlist(dplist_id, dims) + val sbytes = str.getBytes("UTF_16LE") + val strspace_id = H5Screate_simple(2, dims, null) + val str_id = H5Dcreate(id, varname, H5T_NATIVE_USHORT, strspace_id, H5P_DEFAULT, dplist_id, H5P_DEFAULT) + putStringAttr(str_id, "MATLAB_class", "char") + putIntAttr(str_id, "MATLAB_int_decode", 2) + H5Dwrite(str_id, H5T_NATIVE_USHORT, H5S_ALL, H5S_ALL, H5P_DEFAULT, sbytes) + val ref = H5Rcreate(str_id, ".", H5R_OBJECT, -1) + H5Dclose(str_id) + H5Sclose(strspace_id) + H5Pclose(dplist_id) + ref + } + + def putMat(fid:Int, a:AnyRef, aname:String):Array[Byte] = { + a match { + case aa:DMat => putDenseMat[Double](fid, aa, aname, H5T_NATIVE_DOUBLE, "double") + case aa:FMat => putDenseMat[Float](fid, aa, aname, H5T_NATIVE_FLOAT, "single") + case aa:IMat => putDenseMat[Int](fid, aa, aname, H5T_NATIVE_INT, "int32") + case aa:BMat => putSparseMat[Byte](fid, aa, aname, H5T_NATIVE_CHAR, "int8") + case aa:SMat => putSparseMat[Float](fid, aa, aname, H5T_NATIVE_FLOAT, "single") + case aa:SDMat => putSparseMat[Double](fid, aa, aname, H5T_NATIVE_DOUBLE, "double") + case aa:CSMat => putCellMat(fid, aname, aa) + case aa:String => putMatString(fid, aname, aa) + case _ => throw new RuntimeException("unsupported matrix type to save") + } + } + + def hload(fname:String, vname:String):AnyRef = { + val fapl = H5Pcreate(H5P_FILE_ACCESS) +// H5Pset_fapl_core(fapl, 16*1024*1024, false); println("core driver") + H5Pset_fapl_stdio(fapl); //println("stdio driver") + val fid = H5Fopen(fname,H5F_ACC_RDONLY,fapl) + H5Pclose(fapl) + val mat = getMat(fid, vname) + H5Fclose(fid) + mat + } + + def hload(fname:String, vnames:List[String]):List[AnyRef] = { + val fapl = H5Pcreate(H5P_FILE_ACCESS) +// H5Pset_fapl_core(fapl, 32*1024*1024, false); println("core driver") + H5Pset_fapl_stdio(fapl); //println("stdio driver") + val fid = H5Fopen(fname,H5F_ACC_RDONLY,fapl) + H5Pclose(fapl) + val mats = vnames.map((vname) => getMat(fid, vname)) + H5Fclose(fid) + mats + } + + def hsaveAsHDF5(fname:String, args:List[AnyRef]) = { + refcount = -1 + val fapl_id = H5Pcreate (H5P_FILE_ACCESS) + H5Pset_fapl_stdio(fapl_id) + val fid = H5Fcreate(fname, H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id) + H5Pclose(fapl_id) + saveAsImpl(fid, args) + H5Fclose(fid) + } + + def hsaveAs(fname:String, args:List[AnyRef]) = { + refcount = -1 + val fapl_id = H5Pcreate (H5P_FILE_ACCESS) + H5Pset_fapl_stdio(fapl_id) + val fcplist_id = H5Pcreate(H5P_FILE_CREATE) + H5Pset_userblock(fcplist_id, 512) + val fid = H5Fcreate(fname, H5F_ACC_TRUNC, fcplist_id, fapl_id) + H5Pclose(fcplist_id) + H5Pclose(fapl_id) + saveAsImpl(fid, args) + H5Fclose(fid) + writeMatHeader(fname) + } + + def saveAsImpl(fid:Int, argList:List[AnyRef]) = { + var i = 0 + while (i < argList.length) { + argList(i) match { + case a:Mat => { + argList(i+1) match { + case str:String => putMat(fid, a, str) + case _ => throw new RuntimeException("odd numbered args must be String variable names") + } + } + case _ => throw new RuntimeException("even numbered args must be Mat variables") + } + i += 2 + } + } + + def addOne(ii:Array[Int]) = { + if (Mat.ioneBased == 1) { + var i = 0 + while (i < ii.length) { + ii(i) += 1 + i += 1 + } + } + } + + def subOne(ii:Array[Int]) = { + if (Mat.ioneBased == 1) { + var i = 0 + while (i < ii.length) { + ii(i) = ii(i) - 1 + i += 1 + } + } + } +} diff --git a/src/main/scala/BIDMat/MySorting.scala b/src/main/scala/BIDMat/MySorting.scala new file mode 100755 index 00000000..e881fe11 --- /dev/null +++ b/src/main/scala/BIDMat/MySorting.scala @@ -0,0 +1,497 @@ +package BIDMat + +import scala.reflect.ClassManifest +import scala.math.Ordering +import scala.actors.Actor._ + +object Sorting { + + def quickSort2[T](ga:Array[T], ii:Array[Int], lo:Int, hi:Int, stride:Int):Unit = { + ga match { + case a:Array[Float] => quickSort2(a, ii, lo, hi, stride, Mat.numThreads/2) + case a:Array[Double] => quickSort2(a, ii, lo, hi, stride, Mat.numThreads/2) + case a:Array[Int] => quickSort2(a, ii, lo, hi, stride, Mat.numThreads/2) + } + } + + def quickSort2(a:Array[Float], ii:Array[Int], lo:Int, hi:Int, stride:Int, nthreads:Int):Unit = { + if ((hi - lo)/stride > 0) { + if ((hi - lo)/stride <= 16) { + isort(a, ii, lo, hi, stride) + } else { + val ip = partition(a, ii, lo, hi, stride) + if (nthreads > 1 && (hi-lo)/stride > 400) { + var done0 = false + var done1 = false + actor { quickSort2(a, ii, lo, ip, stride, nthreads/2); done0 = true } + actor { quickSort2(a, ii, ip, hi, stride, nthreads/2); done1 = true } + while (!done0 || !done1) {Thread.`yield`} + } else { + quickSort2(a, ii, lo, ip, stride, nthreads/2) + quickSort2(a, ii, ip, hi, stride, nthreads/2) + } + } + } + } + + def isort(a:Array[Float], ii:Array[Int], lo:Int, hi:Int, stride:Int):Unit = { + var i = lo + while (i != hi) { + var j = i+stride + var imin = i + var vmin = a(i) + while (j != hi) { + if (a(j) <= vmin && ((a(j) < vmin) || ii(j) < ii(imin))) { + vmin = a(j) + imin = j + } + j += stride + } + a(imin) = a(i) + a(i) = vmin + val itmp = ii(imin) + ii(imin) = ii(i) + ii(i) = itmp + i += stride + } + } + + def med3(a:Array[Float], ii:Array[Int], lo:Int, hi:Int, stride:Int):Int = { + val nv = (hi - lo)/stride + val i1 = lo + stride*(math.floor(nv*java.lang.Math.random()).asInstanceOf[Int]) + val i2 = lo + stride*(math.floor(nv*java.lang.Math.random()).asInstanceOf[Int]) + val i3 = lo + stride*(math.floor(nv*java.lang.Math.random()).asInstanceOf[Int]) + val v1 = a(i1) + val v2 = a(i2) + val v3 = a(i3) + val ii1 = ii(i1) + val ii2 = ii(i2) + val ii3 = ii(i3) + if ((v2 >= v1) && ((v2 > v1) || ii2 > ii1)) { + if ((v3 >= v2) && ((v3 > v2) || ii3 > ii2)) i2 else { + if ((v3 >= v1) && ((v3 > v1) || ii3 > ii1)) i3 else i1 + } + } else { + if ((v3 >= v1) && ((v3 > v1) || ii3 > ii1)) i1 else { + if ((v3 >= v2) && ((v3 > v2) || ii3 > ii2)) i3 else i2 + } + } + } + + def med9(a:Array[Float], ii:Array[Int], lo:Int, hi:Int, stride:Int):Int = { + val i1 = med3(a, ii, lo, hi, stride) + val i2 = med3(a, ii, lo, hi, stride) + val i3 = med3(a, ii, lo, hi, stride) + val v1 = a(i1) + val v2 = a(i2) + val v3 = a(i3) + val ii1 = ii(i1) + val ii2 = ii(i2) + val ii3 = ii(i3) + if ((v2 >= v1) && ((v2 > v1) || ii2 > ii1)) { + if ((v3 >= v2) && ((v3 > v2) || ii3 > ii2)) i2 else { + if ((v3 >= v1) && ((v3 > v1) || ii3 > ii1)) i3 else i1 + } + } else { + if ((v3 >= v1) && ((v3 > v1) || ii3 > ii1)) i1 else { + if ((v3 >= v2) && ((v3 > v2) || ii3 > ii2)) i3 else i2 + } + } + } + + def partition(a:Array[Float], ii:Array[Int], lo:Int, hi:Int, stride:Int):Int = { + val sstride = math.signum(stride) + val nvals = (hi - lo)/stride + val im = if (nvals > 600) { + med9(a, ii, lo, hi, stride) + } else if (nvals > 100) { + med3(a, ii, lo, hi, stride) + } else { + lo + stride*(math.floor(nvals*java.lang.Math.random()).asInstanceOf[Int]) + } + var v = a(im) + var iv = ii(im) + var done = false + var i = lo - stride + var j = hi + while (! done) { + i += stride + j -= stride + while ((hi-i)*sstride > sstride*stride && ((a(i) <= v) && ((a(i) < v) || ii(i) <= iv))) {i += stride} + while ( ((a(j) >= v) && ((a(j) > v) || ii(j) > iv))) {j -= stride} + if ((i - j)*sstride >= 0) { + done = true + } else { + val atmp = a(i) + a(i) = a(j) + a(j) = atmp + val itmp = ii(i) + ii(i) = ii(j) + ii(j) = itmp + } + } + j + stride + } + + def quickSort2(a:Array[Double], ii:Array[Int], lo:Int, hi:Int, stride:Int, nthreads:Int):Unit = { + if ((hi - lo)/stride > 0) { + if ((hi - lo)/stride <= 16) { + isort(a, ii, lo, hi, stride) + } else { + val ip = partition(a, ii, lo, hi, stride) + if (nthreads > 1 && (hi-lo)/stride > 400) { + var done0 = false + var done1 = false + actor { quickSort2(a, ii, lo, ip, stride, nthreads/2); done0 = true } + actor { quickSort2(a, ii, ip, hi, stride, nthreads/2); done1 = true } + while (!done0 || !done1) {Thread.`yield`} + } else { + quickSort2(a, ii, lo, ip, stride, nthreads/2) + quickSort2(a, ii, ip, hi, stride, nthreads/2) + } + } + } + } + + def isort(a:Array[Double], ii:Array[Int], lo:Int, hi:Int, stride:Int):Unit = { + var i = lo + while (i != hi) { + var j = i+stride + var imin = i + var vmin = a(i) + while (j != hi) { + if (a(j) <= vmin && ((a(j) < vmin) || ii(j) < ii(imin))) { + vmin = a(j) + imin = j + } + j += stride + } + a(imin) = a(i) + a(i) = vmin + val itmp = ii(imin) + ii(imin) = ii(i) + ii(i) = itmp + i += stride + } + } + + def med3(a:Array[Double], ii:Array[Int], lo:Int, hi:Int, stride:Int):Int = { + val nv = (hi - lo)/stride + val i1 = lo + stride*(math.floor(nv*java.lang.Math.random()).asInstanceOf[Int]) + val i2 = lo + stride*(math.floor(nv*java.lang.Math.random()).asInstanceOf[Int]) + val i3 = lo + stride*(math.floor(nv*java.lang.Math.random()).asInstanceOf[Int]) + val v1 = a(i1) + val v2 = a(i2) + val v3 = a(i3) + val ii1 = ii(i1) + val ii2 = ii(i2) + val ii3 = ii(i3) + if ((v2 >= v1) && ((v2 > v1) || ii2 > ii1)) { + if ((v3 >= v2) && ((v3 > v2) || ii3 > ii2)) i2 else { + if ((v3 >= v1) && ((v3 > v1) || ii3 > ii1)) i3 else i1 + } + } else { + if ((v3 >= v1) && ((v3 > v1) || ii3 > ii1)) i1 else { + if ((v3 >= v2) && ((v3 > v2) || ii3 > ii2)) i3 else i2 + } + } + } + + def med9(a:Array[Double], ii:Array[Int], lo:Int, hi:Int, stride:Int):Int = { + val i1 = med3(a, ii, lo, hi, stride) + val i2 = med3(a, ii, lo, hi, stride) + val i3 = med3(a, ii, lo, hi, stride) + val v1 = a(i1) + val v2 = a(i2) + val v3 = a(i3) + val ii1 = ii(i1) + val ii2 = ii(i2) + val ii3 = ii(i3) + if ((v2 >= v1) && ((v2 > v1) || ii2 > ii1)) { + if ((v3 >= v2) && ((v3 > v2) || ii3 > ii2)) i2 else { + if ((v3 >= v1) && ((v3 > v1) || ii3 > ii1)) i3 else i1 + } + } else { + if ((v3 >= v1) && ((v3 > v1) || ii3 > ii1)) i1 else { + if ((v3 >= v2) && ((v3 > v2) || ii3 > ii2)) i3 else i2 + } + } + } + + def partition(a:Array[Double], ii:Array[Int], lo:Int, hi:Int, stride:Int):Int = { + val sstride = math.signum(stride) + val nvals = (hi - lo)/stride + val im = if (nvals > 600) { + med9(a, ii, lo, hi, stride) + } else if (nvals > 100) { + med3(a, ii, lo, hi, stride) + } else { + lo + stride*(math.floor(nvals*java.lang.Math.random()).asInstanceOf[Int]) + } + var v = a(im) + var iv = ii(im) + var done = false + var i = lo - stride + var j = hi + while (! done) { + i += stride + j -= stride + while ((hi-i)*sstride > sstride*stride && ((a(i) <= v) && ((a(i) < v) || ii(i) <= iv))) {i += stride} + while ( ((a(j) >= v) && ((a(j) > v) || ii(j) > iv))) {j -= stride} + if ((i - j)*sstride >= 0) { + done = true + } else { + val atmp = a(i) + a(i) = a(j) + a(j) = atmp + val itmp = ii(i) + ii(i) = ii(j) + ii(j) = itmp + } + } + j + stride + } + + + + def quickSort2(a:Array[Int], ii:Array[Int], lo:Int, hi:Int, stride:Int, nthreads:Int):Unit = { + if ((hi - lo)/stride > 0) { + if ((hi - lo)/stride <= 16) { + isort(a, ii, lo, hi, stride) + } else { + val ip = partition(a, ii, lo, hi, stride) + if (nthreads > 1 && (hi-lo)/stride > 400) { + var done0 = false + var done1 = false + actor { quickSort2(a, ii, lo, ip, stride, nthreads/2); done0 = true } + actor { quickSort2(a, ii, ip, hi, stride, nthreads/2); done1 = true } + while (!done0 || !done1) {Thread.`yield`} + } else { + quickSort2(a, ii, lo, ip, stride, nthreads/2) + quickSort2(a, ii, ip, hi, stride, nthreads/2) + } + } + } + } + + def isort(a:Array[Int], ii:Array[Int], lo:Int, hi:Int, stride:Int):Unit = { + var i = lo + while (i != hi) { + var j = i+stride + var imin = i + var vmin = a(i) + while (j != hi) { + if (a(j) <= vmin && ((a(j) < vmin) || ii(j) < ii(imin))) { + vmin = a(j) + imin = j + } + j += stride + } + a(imin) = a(i) + a(i) = vmin + val itmp = ii(imin) + ii(imin) = ii(i) + ii(i) = itmp + i += stride + } + } + + def med3(a:Array[Int], ii:Array[Int], lo:Int, hi:Int, stride:Int):Int = { + val nv = (hi - lo)/stride + val i1 = lo + stride*(math.floor(nv*java.lang.Math.random()).asInstanceOf[Int]) + val i2 = lo + stride*(math.floor(nv*java.lang.Math.random()).asInstanceOf[Int]) + val i3 = lo + stride*(math.floor(nv*java.lang.Math.random()).asInstanceOf[Int]) + val v1 = a(i1) + val v2 = a(i2) + val v3 = a(i3) + val ii1 = ii(i1) + val ii2 = ii(i2) + val ii3 = ii(i3) + if ((v2 >= v1) && ((v2 > v1) || ii2 > ii1)) { + if ((v3 >= v2) && ((v3 > v2) || ii3 > ii2)) i2 else { + if ((v3 >= v1) && ((v3 > v1) || ii3 > ii1)) i3 else i1 + } + } else { + if ((v3 >= v1) && ((v3 > v1) || ii3 > ii1)) i1 else { + if ((v3 >= v2) && ((v3 > v2) || ii3 > ii2)) i3 else i2 + } + } + } + + def med9(a:Array[Int], ii:Array[Int], lo:Int, hi:Int, stride:Int):Int = { + val i1 = med3(a, ii, lo, hi, stride) + val i2 = med3(a, ii, lo, hi, stride) + val i3 = med3(a, ii, lo, hi, stride) + val v1 = a(i1) + val v2 = a(i2) + val v3 = a(i3) + val ii1 = ii(i1) + val ii2 = ii(i2) + val ii3 = ii(i3) + if ((v2 >= v1) && ((v2 > v1) || ii2 > ii1)) { + if ((v3 >= v2) && ((v3 > v2) || ii3 > ii2)) i2 else { + if ((v3 >= v1) && ((v3 > v1) || ii3 > ii1)) i3 else i1 + } + } else { + if ((v3 >= v1) && ((v3 > v1) || ii3 > ii1)) i1 else { + if ((v3 >= v2) && ((v3 > v2) || ii3 > ii2)) i3 else i2 + } + } + } + + def partition(a:Array[Int], ii:Array[Int], lo:Int, hi:Int, stride:Int):Int = { + val sstride = math.signum(stride) + val nvals = (hi - lo)/stride + val im = if (nvals > 600) { + med9(a, ii, lo, hi, stride) + } else if (nvals > 100) { + med3(a, ii, lo, hi, stride) + } else { + lo + stride*(math.floor(nvals*java.lang.Math.random()).asInstanceOf[Int]) + } + var v = a(im) + var iv = ii(im) + var done = false + var i = lo - stride + var j = hi + while (! done) { + i += stride + j -= stride + while ((hi-i)*sstride > sstride*stride && ((a(i) <= v) && ((a(i) < v) || ii(i) <= iv))) {i += stride} + while ( ((a(j) >= v) && ((a(j) > v) || ii(j) > iv))) {j -= stride} + if ((i - j)*sstride >= 0) { + done = true + } else { + val atmp = a(i) + a(i) = a(j) + a(j) = atmp + val itmp = ii(i) + ii(i) = ii(j) + ii(j) = itmp + } + } + j + stride + } + + + def quickSort[@specialized(Double, Float, Int, Byte) T](a:Array[T])(implicit ord:Ordering[T]) = { + def comp(i:Int, j:Int):Int = { + ord.compare(a(i),a(j)) + } + def swap(i:Int, j:Int) = { + val tmp = a(i) + a(i) = a(j) + a(j) = tmp + } + sort1(comp, swap, 0, a.length) + } + + def quickSort(comp:(Int,Int)=>Int, swap: (Int,Int) => Unit, start:Int, len:Int) { sort1(comp, swap, start, len) } + + private def sort1(comp: (Int, Int) => Int, swap: (Int,Int) => Unit, off: Int, len: Int) { + + def vecswap(_a: Int, _b: Int, n: Int) { + var a = _a + var b = _b + var i = 0 + while (i < n) { + swap(a, b) + i += 1 + a += 1 + b += 1 + } + } + def med3(a: Int, b: Int, c: Int) = { + if (comp(a,b) < 0) { + if (comp(b,c) < 0) b else if (comp(a,c) < 0) c else a + } else { + if (comp(b,c) > 0) b else if (comp(a,c) > 0) c else a + } + } + def sort2(off: Int, len: Int) { + if (len < 7) { + var i = off + while (i < len + off) { + var j = i + while (j > off && comp(j-1,j) > 0) { + swap(j, j-1) + j -= 1 + } + i += 1 + } + } else { + var m = off + (len >> 1) + if (len > 30) { + var l = off + var n = off + len - 1 + if (len > 300) { + val s = len / 8 + l = med3(l, l+s, l+2*s) + m = med3(m-s, m, m+s) + n = med3(n-2*s, n-s, n) + } + m = med3(l, m, n) + } + + var a = off + var b = a + var c = off + len - 1 + var d = c + var done = false + while (!done) { + var pp = -1 + while (b <= c && pp <= 0) { + pp = comp(b, m) + if (pp == 0) { + swap(a, b) + m = a + a += 1 + } + if (pp <= 0) b += 1 + } + pp = 1 + while (c >= b && pp >= 0) { + pp = comp(c, m) + if (pp == 0) { + swap(c, d) + m = d + d -= 1 + } + if (pp >= 0) c -= 1 + } + if (b > c) { + done = true + } else { + swap(b, c) + c -= 1 + b += 1 + } + } + + val n = off + len + var s = math.min(a-off, b-a) + vecswap(off, b-s, s) + s = math.min(d-c, n-d-1) + vecswap(b, n-s, s) + + s = b - a + if (s > 1) + sort2(off, s) + s = d - c + if (s > 1) + sort2(n-s, s) + } + } + sort2(off, len) + } + + def main(args:Array[String]) = { + import BIDMat.SciFunctions._ + import BIDMat.MatFunctions._ + val n = args(0).toInt + val a = SciFunctions.rand(n, 1) + val ii = MatFunctions.icol(0->n) + quickSort2(a.data, ii.data, 0, n, 1) + println("check %d" format find(a(1->n,0) < a(0->(n-1),0)).length) + } +} diff --git a/src/main/scala/BIDMat/Operators.scala b/src/main/scala/BIDMat/Operators.scala new file mode 100644 index 00000000..1876d9a4 --- /dev/null +++ b/src/main/scala/BIDMat/Operators.scala @@ -0,0 +1,320 @@ +package BIDMat +import MatFunctions._ + +object Operator { + def applyMat(a:FMat, b:Mat, c:Mat, op:Mop):Mat = { + b match { + case fb:FMat => op.fop(a, fb, c) + case sb:SMat => op.fop(a, sb, c) + case db:DMat => op.dop(DMat(a), db, c) + case ib:IMat => op.fop(a, FMat(ib), c) + case cb:CMat => op.cop(CMat(a), cb, c) + } + } + + def applyMat(a:DMat, b:Mat, c:Mat, op:Mop):Mat = { + b match { + case fb:FMat => op.dop(a, DMat(fb), c) + case db:DMat => op.dop(a, db, c) + case ib:IMat => op.dop(a, DMat(ib), c) + case cb:CMat => op.cop(CMat(a), cb, c) + } + } + + def applyMat(a:IMat, b:Mat, c:Mat, op:Mop):Mat = { + b match { + case fb:FMat => op.fop(FMat(a), fb, c) + case db:DMat => op.dop(DMat(a), db, c) + case ib:IMat => op.iop(a, ib, c) + case cb:CMat => op.cop(CMat(a), cb, c) + } + } + + def applyMat(a:CMat, b:Mat, c:Mat, op:Mop):Mat = { + b match { + case fb:FMat => op.cop(a, CMat(fb), c) + case db:DMat => op.cop(a, CMat(db), c) + case ib:IMat => op.cop(a, CMat(ib), c) + case cb:CMat => op.cop(CMat(a), cb, c) + } + } + + def applyMat(a:GMat, b:Mat, c:Mat, op:Mop):Mat = { + b match { + case gb:GMat => op.gop(a, gb, c) + } + } + + def applyMat(a:SMat, b:Mat, c:Mat, op:Mop):Mat = { + b match { + case sb:SMat => op.sop(a, sb, c) + } + } + + def multDim1(a:Mat, b:Mat):Int = { + if (a.nrows == 1 && a.ncols == 1) { + b.nrows + } else { + a.nrows + } + } + + def multDim2(a:Mat, b:Mat):Int = { + if (b.nrows == 1 && b.ncols == 1) { + a.ncols + } else { + b.ncols + } + } + + def getFPair(c:Mat, a:FMat):FPair = { + if (c.asInstanceOf[AnyRef] != null) { + new FPair(c, a) + } else { + new FPair(FMat(a.nrows, a.ncols), a) + } + } + + def getFPair(c:Mat, a:FMat, b:FMat):FPair = { + if (c.asInstanceOf[AnyRef] != null) { + new FPair(c, a) + } else { + new FPair(FMat(multDim1(a,b), multDim2(a,b)), a) + } + } + + def getDPair(c:Mat, a:DMat):DPair = { + if (c.asInstanceOf[AnyRef] != null) { + new DPair(c, a) + } else { + new DPair(DMat(a.nrows, a.ncols), a) + } + } + + def getDPair(c:Mat, a:DMat, b:DMat):DPair = { + if (c.asInstanceOf[AnyRef] != null) { + new DPair(c, a) + } else { + new DPair(DMat(multDim1(a,b), multDim2(a,b)), a) + } + } + + def getIPair(c:Mat, a:IMat):IPair = { + if (c.asInstanceOf[AnyRef] != null) { + new IPair(c, a) + } else { + new IPair(IMat(a.nrows, a.ncols), a) + } + } + + def getIPair(c:Mat, a:IMat, b:IMat):IPair = { + if (c.asInstanceOf[AnyRef] != null) { + new IPair(c, a) + } else { + new IPair(IMat(multDim1(a,b), multDim2(a,b)), a) + } + } + + def getCPair(c:Mat, a:CMat):CPair = { + if (c.asInstanceOf[AnyRef] != null) { + new CPair(c, a) + } else { + new CPair(CMat(a.nrows, a.ncols), a) + } + } + + def getCPair(c:Mat, a:CMat, b:CMat):CPair = { + if (c.asInstanceOf[AnyRef] != null) { + new CPair(c, a) + } else { + new CPair(CMat(multDim1(a,b), multDim2(a,b)), a) + } + } + + def getGPair(c:Mat, a:GMat):GPair = { + if (c.asInstanceOf[AnyRef] != null) { + new GPair(c, a) + } else { + new GPair(GMat(a.nrows, a.ncols), a) + } + } + + def getGPair(c:Mat, a:GMat, b:GMat):GPair = { + if (c.asInstanceOf[AnyRef] != null) { + new GPair(c, a) + } else { + new GPair(GMat(multDim1(a,b), multDim2(a,b)), a) + } + } + + def getSPair(c:Mat, a:SMat):SPair = { + if (c.asInstanceOf[AnyRef] != null) { + new SPair(c, a) + } else { + new SPair(SMat(a.nrows, a.ncols, a.nnz), a) + } + } +} + +trait Mop { + def fop(a:FMat, b:FMat, c:Mat):FMat + def fop(a:FMat, b:SMat, c:Mat):FMat + def dop(a:DMat, b:DMat, c:Mat):DMat + def iop(a:IMat, b:IMat, c:Mat):IMat + def cop(a:CMat, b:CMat, c:Mat):CMat + def gop(a:GMat, b:GMat, c:Mat):GMat + def sop(a:SMat, b:SMat, c:Mat):SMat + def notImplemented0(s:String, m:Mat):Mat = { + throw new RuntimeException("operator "+s+" not implemented for "+m.mytype) + } +} + +object Mop_Plus extends Mop { + override def fop(a:FMat, b:FMat, c:Mat):FMat = Operator.getFPair(c, a) + b + override def fop(a:FMat, b:SMat, c:Mat):FMat = Operator.getFPair(c, a) + full(b) + override def dop(a:DMat, b:DMat, c:Mat):DMat = Operator.getDPair(c, a) + b + override def iop(a:IMat, b:IMat, c:Mat):IMat = Operator.getIPair(c, a) + b + override def cop(a:CMat, b:CMat, c:Mat):CMat = Operator.getCPair(c, a) + b + override def gop(a:GMat, b:GMat, c:Mat):GMat = Operator.getGPair(c, a) + b + override def sop(a:SMat, b:SMat, c:Mat):SMat = Operator.getSPair(c, a) + b +} + +object Mop_Minus extends Mop { + override def fop(a:FMat, b:FMat, c:Mat):FMat = Operator.getFPair(c, a) - b + override def fop(a:FMat, b:SMat, c:Mat):FMat = Operator.getFPair(c, a) - full(b) + override def dop(a:DMat, b:DMat, c:Mat):DMat = Operator.getDPair(c, a) - b + override def iop(a:IMat, b:IMat, c:Mat):IMat = Operator.getIPair(c, a) - b + override def cop(a:CMat, b:CMat, c:Mat):CMat = Operator.getCPair(c, a) - b + override def gop(a:GMat, b:GMat, c:Mat):GMat = Operator.getGPair(c, a) - b + override def sop(a:SMat, b:SMat, c:Mat):SMat = Operator.getSPair(c, a) - b +} + +object Mop_Times extends Mop { + override def fop(a:FMat, b:FMat, c:Mat):FMat = Operator.getFPair(c, a, b) * b + override def fop(a:FMat, b:SMat, c:Mat):FMat = Operator.getFPair(c, a) * b + override def dop(a:DMat, b:DMat, c:Mat):DMat = Operator.getDPair(c, a, b) * b + override def iop(a:IMat, b:IMat, c:Mat):IMat = Operator.getIPair(c, a, b) * b + override def cop(a:CMat, b:CMat, c:Mat):CMat = Operator.getCPair(c, a, b) * b + override def gop(a:GMat, b:GMat, c:Mat):GMat = Operator.getGPair(c, a, b) * b + override def sop(a:SMat, b:SMat, c:Mat):SMat = {notImplemented0("*", a); a} +} + +object Mop_Div extends Mop { + override def fop(a:FMat, b:FMat, c:Mat):FMat = a / b + override def fop(a:FMat, b:SMat, c:Mat):FMat = {notImplemented0("/", a); a} + override def dop(a:DMat, b:DMat, c:Mat):DMat = a / b + override def cop(a:CMat, b:CMat, c:Mat):CMat = a / b + override def iop(a:IMat, b:IMat, c:Mat):IMat = {notImplemented0("/", a); a} + override def gop(a:GMat, b:GMat, c:Mat):GMat = {notImplemented0("/", a); a} + override def sop(a:SMat, b:SMat, c:Mat):SMat = {notImplemented0("/", a); a} +} + +object Mop_RSolve extends Mop { + override def fop(a:FMat, b:FMat, c:Mat):FMat = a \\ b + override def fop(a:FMat, b:SMat, c:Mat):FMat = {notImplemented0("\\\\", a); a} + override def dop(a:DMat, b:DMat, c:Mat):DMat = a \\ b + override def cop(a:CMat, b:CMat, c:Mat):CMat = a \\ b + override def iop(a:IMat, b:IMat, c:Mat):IMat = {notImplemented0("\\\\", a); a} + override def gop(a:GMat, b:GMat, c:Mat):GMat = {notImplemented0("\\\\", a); a} + override def sop(a:SMat, b:SMat, c:Mat):SMat = {notImplemented0("\\\\", a); a} +} + +object Mop_ETimes extends Mop { + override def fop(a:FMat, b:FMat, c:Mat):FMat = Operator.getFPair(c, a) *@ b + override def fop(a:FMat, b:SMat, c:Mat):FMat = Operator.getFPair(c, a) *@ full(b) + override def dop(a:DMat, b:DMat, c:Mat):DMat = Operator.getDPair(c, a) *@ b + override def iop(a:IMat, b:IMat, c:Mat):IMat = Operator.getIPair(c, a) *@ b + override def cop(a:CMat, b:CMat, c:Mat):CMat = Operator.getCPair(c, a) *@ b + override def gop(a:GMat, b:GMat, c:Mat):GMat = Operator.getGPair(c, a) *@ b + override def sop(a:SMat, b:SMat, c:Mat):SMat = Operator.getSPair(c, a) *@ b +} + +object Mop_EDiv extends Mop { + override def fop(a:FMat, b:FMat, c:Mat):FMat = Operator.getFPair(c, a) /@ b + override def fop(a:FMat, b:SMat, c:Mat):FMat = Operator.getFPair(c, a) /@ full(b) + override def dop(a:DMat, b:DMat, c:Mat):DMat = Operator.getDPair(c, a) /@ b + override def iop(a:IMat, b:IMat, c:Mat):IMat = {notImplemented0("/@", a); a} + override def cop(a:CMat, b:CMat, c:Mat):CMat = Operator.getCPair(c, a) /@ b + override def gop(a:GMat, b:GMat, c:Mat):GMat = Operator.getGPair(c, a) /@ b + override def sop(a:SMat, b:SMat, c:Mat):SMat = Operator.getSPair(c, a) /@ b +} + +object Mop_HCat extends Mop { + override def fop(a:FMat, b:FMat, c:Mat):FMat = a \ b + override def fop(a:FMat, b:SMat, c:Mat):FMat = {notImplemented0("\\", a); a} + override def dop(a:DMat, b:DMat, c:Mat):DMat = a \ b + override def iop(a:IMat, b:IMat, c:Mat):IMat = a \ b + override def cop(a:CMat, b:CMat, c:Mat):CMat = a \ b + override def gop(a:GMat, b:GMat, c:Mat):GMat = {notImplemented0("\\", a); a} + override def sop(a:SMat, b:SMat, c:Mat):SMat = {notImplemented0("\\", a); a} +} + +object Mop_VCat extends Mop { + override def fop(a:FMat, b:FMat, c:Mat):FMat = a on b + override def fop(a:FMat, b:SMat, c:Mat):FMat = {notImplemented0("on", a); a} + override def dop(a:DMat, b:DMat, c:Mat):DMat = a on b + override def iop(a:IMat, b:IMat, c:Mat):IMat = a on b + override def cop(a:CMat, b:CMat, c:Mat):CMat = a on b + override def gop(a:GMat, b:GMat, c:Mat):GMat = {notImplemented0("on", a); a} + override def sop(a:SMat, b:SMat, c:Mat):SMat = {notImplemented0("on", a); a} +} + +object Mop_LT extends Mop { + override def fop(a:FMat, b:FMat, c:Mat):FMat = Operator.getFPair(c, a) < b + override def fop(a:FMat, b:SMat, c:Mat):FMat = Operator.getFPair(c, a) < full(b) + override def dop(a:DMat, b:DMat, c:Mat):DMat = Operator.getDPair(c, a) < b + override def iop(a:IMat, b:IMat, c:Mat):IMat = Operator.getIPair(c, a) < b + override def cop(a:CMat, b:CMat, c:Mat):CMat = {notImplemented0("<", a); a} + override def gop(a:GMat, b:GMat, c:Mat):GMat = Operator.getGPair(c, a) < b + override def sop(a:SMat, b:SMat, c:Mat):SMat = {notImplemented0("<", a); a} +} + +object Mop_GT extends Mop { + override def fop(a:FMat, b:FMat, c:Mat):FMat = Operator.getFPair(c, a) > b + override def fop(a:FMat, b:SMat, c:Mat):FMat = Operator.getFPair(c, a) > full(b) + override def dop(a:DMat, b:DMat, c:Mat):DMat = Operator.getDPair(c, a) > b + override def iop(a:IMat, b:IMat, c:Mat):IMat = Operator.getIPair(c, a) > b + override def cop(a:CMat, b:CMat, c:Mat):CMat = {notImplemented0(">", a); a} + override def gop(a:GMat, b:GMat, c:Mat):GMat = Operator.getGPair(c, a) > b + override def sop(a:SMat, b:SMat, c:Mat):SMat = {notImplemented0(">", a); a} +} + +object Mop_LE extends Mop { + override def fop(a:FMat, b:FMat, c:Mat):FMat = Operator.getFPair(c, a) <= b + override def fop(a:FMat, b:SMat, c:Mat):FMat = Operator.getFPair(c, a) <= full(b) + override def dop(a:DMat, b:DMat, c:Mat):DMat = Operator.getDPair(c, a) <= b + override def iop(a:IMat, b:IMat, c:Mat):IMat = Operator.getIPair(c, a) <= b + override def cop(a:CMat, b:CMat, c:Mat):CMat = {notImplemented0("<=", a); a} + override def gop(a:GMat, b:GMat, c:Mat):GMat = Operator.getGPair(c, a) <= b + override def sop(a:SMat, b:SMat, c:Mat):SMat = {notImplemented0("<=", a); a} +} + +object Mop_GE extends Mop { + override def fop(a:FMat, b:FMat, c:Mat):FMat = Operator.getFPair(c, a) >= b + override def fop(a:FMat, b:SMat, c:Mat):FMat = Operator.getFPair(c, a) >= full(b) + override def dop(a:DMat, b:DMat, c:Mat):DMat = Operator.getDPair(c, a) >= b + override def iop(a:IMat, b:IMat, c:Mat):IMat = Operator.getIPair(c, a) >= b + override def cop(a:CMat, b:CMat, c:Mat):CMat = {notImplemented0(">=", a); a} + override def gop(a:GMat, b:GMat, c:Mat):GMat = Operator.getGPair(c, a) >= b + override def sop(a:SMat, b:SMat, c:Mat):SMat = {notImplemented0(">=", a); a} +} + +object Mop_EQ extends Mop { + override def fop(a:FMat, b:FMat, c:Mat):FMat = Operator.getFPair(c, a) == b + override def fop(a:FMat, b:SMat, c:Mat):FMat = Operator.getFPair(c, a) == full(b) + override def dop(a:DMat, b:DMat, c:Mat):DMat = Operator.getDPair(c, a) == b + override def iop(a:IMat, b:IMat, c:Mat):IMat = Operator.getIPair(c, a) == b + override def cop(a:CMat, b:CMat, c:Mat):CMat = Operator.getCPair(c, a) == b + override def gop(a:GMat, b:GMat, c:Mat):GMat = Operator.getGPair(c, a) == b + override def sop(a:SMat, b:SMat, c:Mat):SMat = {notImplemented0("==", a); a} +} + +object Mop_NE extends Mop { + override def fop(a:FMat, b:FMat, c:Mat):FMat = Operator.getFPair(c, a) != b + override def fop(a:FMat, b:SMat, c:Mat):FMat = Operator.getFPair(c, a) != full(b) + override def dop(a:DMat, b:DMat, c:Mat):DMat = Operator.getDPair(c, a) != b + override def iop(a:IMat, b:IMat, c:Mat):IMat = Operator.getIPair(c, a) != b + override def cop(a:CMat, b:CMat, c:Mat):CMat = Operator.getCPair(c, a) != b + override def gop(a:GMat, b:GMat, c:Mat):GMat = Operator.getGPair(c, a) != b + override def sop(a:SMat, b:SMat, c:Mat):SMat = {notImplemented0("!=", a); a} +} diff --git a/src/main/scala/BIDMat/Plotting.scala b/src/main/scala/BIDMat/Plotting.scala new file mode 100755 index 00000000..44e33edf --- /dev/null +++ b/src/main/scala/BIDMat/Plotting.scala @@ -0,0 +1,642 @@ +package BIDMat +import ptolemy.plot._ +import java.awt._ +import java.awt.geom.AffineTransform +import java.awt.image.BufferedImage +import javax.swing._ +import javax.imageio.stream.FileImageOutputStream +import javax.imageio.ImageIO +import java.io._ + +object Plotting { + var ifigure:Int = 1 + + def _plot(mats:Mat*)(xlog:Boolean=false, ylog:Boolean=false, isconnected:Boolean=true):Plot = { + var p:Plot = new Plot + p.setXLog(xlog) + p.setYLog(ylog) + val dataset = 0 + if (mats.length == 1) { + val m = mats(0) + if (m.nrows == 1 || m.ncols == 1) { + m match { + case mf:FMat => for (i <- 0 until m.length) p.addPoint(dataset, i, mf(i), isconnected) + case md:DMat => for (i <- 0 until m.length) p.addPoint(dataset, i, md(i), isconnected) + case mi:IMat => for (i <- 0 until m.length) p.addPoint(dataset, i, mi(i), isconnected) + } + } else { + for (i <- 0 until m.ncols) { + m match { + case mf:FMat => for (j <- 0 until m.nrows) p.addPoint(i, j, mf(j,i), isconnected) + case md:DMat => for (j <- 0 until m.nrows) p.addPoint(i, j, md(j,i), isconnected) + case mi:IMat => for (j <- 0 until m.nrows) p.addPoint(i, j, mi(j,i), isconnected) + } + } + } + } else { + var i = 0 + while (i*2 < mats.length) { + (mats(2*i), mats(2*i+1)) match { + case (a:FMat, b:FMat) => for (j <- 0 until a.length) p.addPoint(i, a(j), b(j), isconnected) + case (a:FMat, b:DMat) => for (j <- 0 until a.length) p.addPoint(i, a(j), b(j), isconnected) + case (a:DMat, b:FMat) => for (j <- 0 until a.length) p.addPoint(i, a(j), b(j), isconnected) + case (a:DMat, b:DMat) => for (j <- 0 until a.length) p.addPoint(i, a(j), b(j), isconnected) + case (a:FMat, b:IMat) => for (j <- 0 until a.length) p.addPoint(i, a(j), b(j), isconnected) + case (a:DMat, b:IMat) => for (j <- 0 until a.length) p.addPoint(i, a(j), b(j), isconnected) + case (a:IMat, b:FMat) => for (j <- 0 until a.length) p.addPoint(i, a(j), b(j), isconnected) + case (a:IMat, b:DMat) => for (j <- 0 until a.length) p.addPoint(i, a(j), b(j), isconnected) + case (a:IMat, b:IMat) => for (j <- 0 until a.length) p.addPoint(i, a(j), b(j), isconnected) + } + i += 1 + } + } + var pframe:PlotFrame = new PlotFrame("Figure "+ifigure, p) + ifigure += 1 + pframe.setVisible(true) + p + } + + def plot(mats:Mat*) = _plot(mats: _*)() + + def loglog(mats:Mat*) = _plot(mats: _*)(xlog=true, ylog=true) + + def semilogx(mats:Mat*) = _plot(mats: _*)(xlog=true) + + def semilogy(mats:Mat*) = _plot(mats: _*)(ylog=true) + + def p_plot(mats:Mat*) = _plot(mats: _*)(isconnected=false) + + def ploglog(mats:Mat*) = _plot(mats: _*)(xlog=true, ylog=true, isconnected=false) + + def psemilogx(mats:Mat*) = _plot(mats: _*)(xlog=true, isconnected=false) + + def psemilogy(mats:Mat*) = _plot(mats: _*)(ylog=true, isconnected=false) + + + def hist(m:Mat, nbars:Int=10) = { + import SciFunctions._ + var p:Histogram = new Histogram + val dataset = 0 + if (m.nrows == 1 || m.ncols == 1) { + m match { + case mf:FMat => { + var vmax = maxi(mf,0).v + var vmin = mini(mf,0).v + p.setBinWidth((vmax-vmin)/nbars) + for (i <- 0 until m.length) p.addPoint(dataset, mf(i)) + } + case md:DMat => { + var vmax = maxi(md,0).v + var vmin = mini(md,0).v + p.setBinWidth((vmax-vmin)/nbars) + for (i <- 0 until m.length) p.addPoint(dataset, md(i)) + } + case mi:IMat => { + var vmax = maxi(mi,0).v.asInstanceOf[Double] + var vmin = mini(mi,0).v + p.setBinWidth((vmax-vmin)/nbars) + for (i <- 0 until m.length) p.addPoint(dataset, mi(i)) + } + } + } + var pframe:PlotFrame = new PlotFrame("Figure "+ifigure, p) + ifigure += 1 + pframe.setVisible(true) + } + + def heatmap(m:Mat) = { + val hc:HeatChart = new HeatChart(m) + val img:BufferedImage = hc.getChartImage(true) + hc.saveToFile(new File("heat_map_"+ifigure+".jpg")) + val jl:JLabel = new JLabel(new ImageIcon(img)) + val jp:JPanel = new JPanel + jp.add(jl) + val jsp:JScrollPane = new JScrollPane(jp) + val jFrame:JFrame = new JFrame("Figure "+ifigure) + jFrame.getContentPane().add( jsp ) + jFrame.setSize(800, 600) + ifigure += 1 + jFrame.setVisible(true) + } +} + +class HeatChart(mat:Mat) { + private var xValues:Array[Double] = new Array[Double](mat.ncols) + private var yValues:Array[Double] = new Array[Double](mat.nrows) + setXValues(0, 1) + setYValues(0, 1) + private var xValuesHorizontal:Boolean = false + private var yValuesHorizontal:Boolean = true + + + private var cellSize:Dimension = new Dimension(10,10) + private var margin:Int = 20 + private var backgroundColor = Color.WHITE + + private var chartSize:Dimension = new Dimension(100,100) + + private var highValueColor:Color = Color.BLUE + private var lowValueColor:Color = Color.WHITE + + private var colorValueDistance:Int = 1 + private var colorScale:Double = 1.0 + + private var heatMapSize:Dimension = new Dimension(1000,1000) + + private var heatMapTL:Point = new Point(0, 0) + private var heatMapBR:Point = new Point(800, 800) + private var heatMapC:Point = new Point(400, 400) + + private var axisThickness:Int = 2 + private var axisColor:Color = Color.BLACK + private var axisLabelsFont:Font = new Font("Sans-Serif", Font.PLAIN, 12) + private var axisLabelColor:Color = Color.BLACK + private var xAxisLabel:String = "X Label" + private var yAxisLabel:String = "Y Label" + private var axisValuesColor:Color = Color.BLACK + private var axisValuesFont:Font = new Font("Sans-Serif", Font.PLAIN, 10) + private var xAxisValuesFrequency:Int = 1 + private var yAxisValuesFrequency:Int = 1 + private var showXAxisValues:Boolean = true + private var showYAxisValues:Boolean = true + + private var xAxisValuesHeight:Int = 0 + private var xAxisValuesWidthMax:Int = 0 + + private var yAxisValuesHeight:Int = 0 + private var yAxisValuesAscent:Int = 0 + private var yAxisValuesWidthMax:Int = 0 + + private var xAxisLabelSize:Dimension = new Dimension(0,0) + private var xAxisLabelDescent:Int = 0 + + private var yAxisLabelSize:Dimension = new Dimension(0,0) + private var yAxisLabelAscent:Int = 0 + + + private var lowValue:Double = min(mat) + private var highValue:Double = max(mat) + + + updateColorDistance() + + + //should be replaced by built-in min and max to speed up + private def min(mat:Mat):Double = { + mat match { + case mi:IMat => IMin(mi).toDouble + case mf:FMat => FMin(mf).toDouble + case md:DMat => DMin(md) + } + } + + + private def max(mat:Mat):Double = { + mat match { + case mi:IMat => IMax(mi).toDouble + case mf:FMat => FMax(mf).toDouble + case md:DMat => DMax(md) + } + } + + private def IMin(mat:IMat):Int = { + var minV:Int = mat(0) + for (i:Int <- 0 until mat.length) { + if (mat(i) < minV) + minV = mat(i) + } + minV + } + + private def FMin(mat:FMat):Float = { + var minV:Float = mat(0) + for (i:Int <- 0 until mat.length) { + if (mat(i) < minV) + minV = mat(i) + } + minV + } + + private def DMin(mat:DMat):Double = { + var minV:Double = mat(0) + for (i:Int <- 0 until mat.length) { + if (mat(i) < minV) + minV = mat(i) + } + minV + } + + private def IMax(mat:IMat):Int = { + var maxV:Int = mat(0) + for (i:Int <- 0 until mat.length) { + if (mat(i) > maxV) + maxV = mat(i) + } + maxV + } + + private def FMax(mat:FMat):Float = { + var maxV:Float = mat(0) + for (i:Int <- 0 until mat.length) { + if (mat(i) > maxV) + maxV = mat(i) + } + maxV + } + + private def DMax(mat:DMat):Double = { + var maxV:Double = mat(0) + for (i:Int <- 0 until mat.length) { + if (mat(i) > maxV) + maxV = mat(i) + } + maxV + } + + def getChartImage(alpha:Boolean):BufferedImage = { + measureComponents() + updateCoordinates() + var imageType:Int = if ( alpha ) BufferedImage.TYPE_4BYTE_ABGR else BufferedImage.TYPE_3BYTE_BGR + + var chartImage:BufferedImage = new BufferedImage(chartSize.width, chartSize.height, imageType) + + var chartGraphics:Graphics2D = chartImage.createGraphics() + + chartGraphics.setRenderingHint(RenderingHints.KEY_ANTIALIASING, + RenderingHints.VALUE_ANTIALIAS_ON) + + chartGraphics.setColor(backgroundColor) + chartGraphics.fillRect(0, 0, chartSize.width, chartSize.height) + + drawHeatMap(chartGraphics, mat) + + drawXLabel(chartGraphics) + drawYLabel(chartGraphics) + + drawAxisBars(chartGraphics) + + drawXValues(chartGraphics) + drawYValues(chartGraphics) + chartImage + } + + private def drawHeatMap(chartGraphics:Graphics2D, mat:Mat) = { + + var noYCells:Int = mat.nrows + var noXCells:Int = mat.ncols + + var heatMapImage:BufferedImage = new BufferedImage(heatMapSize.width, heatMapSize.height, BufferedImage.TYPE_INT_ARGB) + var heatMapGraphics:Graphics2D = heatMapImage.createGraphics() + + mat match { + case mi:IMat => + for ( x <- 0 until noXCells ) { + for ( y <- 0 until noYCells ) { + heatMapGraphics.setColor( getCellColor( mi(y+x*noYCells).toDouble, lowValue, highValue ) ) + + var cellX:Int = x*cellSize.width + var cellY:Int = y*cellSize.height + + heatMapGraphics.fillRect(cellX, cellY, cellSize.width, cellSize.height) + } + } + case mf:FMat => + for ( x <- 0 until noXCells ) { + for ( y <- 0 until noYCells ) { + heatMapGraphics.setColor( getCellColor( mf(y+x*noYCells).toDouble, lowValue, highValue ) ) + + var cellX:Int = x*cellSize.width + var cellY:Int = y*cellSize.height + + heatMapGraphics.fillRect(cellX, cellY, cellSize.width, cellSize.height) + } + } + case md:DMat => + for ( x <- 0 until noXCells ) { + for ( y <- 0 until noYCells ) { + heatMapGraphics.setColor( getCellColor( md(y+x*noYCells), lowValue, highValue ) ) + + var cellX:Int = x*cellSize.width + var cellY:Int = y*cellSize.height + + heatMapGraphics.fillRect(cellX, cellY, cellSize.width, cellSize.height) + } + } + } + + chartGraphics.drawImage(heatMapImage, heatMapTL.x, heatMapTL.y, heatMapSize.width, heatMapSize.height, null) + } + + + + private def getCellColor(data:Double, min:Double, max:Double):Color = { + var range:Double = max - min + var position:Double = data - min + + var percentPosition:Double = position / range + + var colorPosition:Int = getColorPosition(percentPosition) + var r:Int = lowValueColor.getRed() + var g:Int = lowValueColor.getGreen + var b:Int = lowValueColor.getBlue() + + for ( i <- 0 until colorPosition ) { + var rDistance:Int = r - highValueColor.getRed() + var gDistance:Int = g - highValueColor.getGreen() + var bDistance:Int = b - highValueColor.getBlue() + + if ( (math.abs(rDistance) >= math.abs(gDistance) ) + && (math.abs(rDistance) >= math.abs(bDistance) ) ) { + r = changeColorValue(r, rDistance) + } else if (math.abs(gDistance) >= math.abs(bDistance)) { + g = changeColorValue(g, gDistance) + } else { + b = changeColorValue(b, bDistance) + } + } + new Color(r, g, b) + } + + + private def getColorPosition(percentPosition:Double):Int = { + math.round( colorValueDistance * math.pow(percentPosition, colorScale) ).toInt + } + + private def updateColorDistance() = { + var r1:Int = lowValueColor.getRed() + var g1:Int = lowValueColor.getGreen() + var b1:Int = lowValueColor.getBlue() + var r2:Int = highValueColor.getRed() + var g2:Int = highValueColor.getGreen() + var b2:Int = highValueColor.getBlue() + + colorValueDistance = math.abs(r1 - r2) + colorValueDistance += math.abs(g1 - g2) + colorValueDistance += math.abs(b1 - b2) + } + + private def changeColorValue(colorValue:Int, colorDistance:Int):Int = { + if (colorDistance < 0) { + colorValue+1 + } else if (colorDistance > 0) { + colorValue-1 + } else { + colorValue + } + } + + private def measureComponents() = { + var chartImage:BufferedImage = new BufferedImage(1, 1, BufferedImage.TYPE_INT_ARGB) + var tempGraphics:Graphics2D = chartImage.createGraphics() + + + if (xAxisLabel != null) { + tempGraphics.setFont(axisLabelsFont) + var metrics:FontMetrics = tempGraphics.getFontMetrics() + xAxisLabelSize = new Dimension(metrics.stringWidth(xAxisLabel), metrics.getHeight()) + xAxisLabelDescent = metrics.getDescent() + } else { + xAxisLabelSize = new Dimension(0, 0) + } + + + if (yAxisLabel != null) { + tempGraphics.setFont(axisLabelsFont) + var metrics:FontMetrics = tempGraphics.getFontMetrics() + yAxisLabelSize = new Dimension(metrics.stringWidth(yAxisLabel), metrics.getHeight()) + yAxisLabelAscent = metrics.getAscent() + } else { + yAxisLabelSize = new Dimension(0, 0) + } + + + if (showXAxisValues) { + tempGraphics.setFont(axisValuesFont) + var metrics:FontMetrics = tempGraphics.getFontMetrics() + xAxisValuesHeight = metrics.getHeight() + xAxisValuesWidthMax = 0 + + for (i <- 0 until xValues.length) { + var w:Int = metrics.stringWidth(xValues(i).toString()) + if (w > xAxisValuesWidthMax) { + xAxisValuesWidthMax = w + } + } + } else { + xAxisValuesHeight = 0 + } + + if (showYAxisValues) { + tempGraphics.setFont(axisValuesFont) + var metrics:FontMetrics = tempGraphics.getFontMetrics() + yAxisValuesHeight = metrics.getHeight() + yAxisValuesAscent = metrics.getAscent() + yAxisValuesWidthMax = 0 + + for (i <-0 until yValues.length) { + var w:Int = metrics.stringWidth(yValues(i).toString()) + if (w > yAxisValuesWidthMax) { + yAxisValuesWidthMax = w + } + } + } else { + yAxisValuesHeight = 0 + } + + + var heatMapWidth:Int = ( mat.ncols * cellSize.width) + var heatMapHeight:Int = ( mat.nrows * cellSize.height) + heatMapSize = new Dimension(heatMapWidth, heatMapHeight) + + var yValuesHorizontalSize:Int = 0 + + if (yValuesHorizontal) { + yValuesHorizontalSize = yAxisValuesWidthMax + } else { + yValuesHorizontalSize = yAxisValuesHeight + } + + var xValuesVerticalSize:Int = 0 + if (xValuesHorizontal) { + xValuesVerticalSize = xAxisValuesHeight + } else { + xValuesVerticalSize = xAxisValuesWidthMax + } + + var chartWidth:Int = heatMapWidth + (2 * margin) + yAxisLabelSize.height + yValuesHorizontalSize + axisThickness + var chartHeight:Int = heatMapHeight + (2 * margin) + xAxisLabelSize.height + xValuesVerticalSize + axisThickness + chartSize = new Dimension(chartWidth, chartHeight) + } + + private def updateCoordinates() { + var x:Int = margin + axisThickness + yAxisLabelSize.height + if (yValuesHorizontal) x+=yAxisValuesWidthMax else x+=yAxisValuesHeight + var y:Int = margin + heatMapTL = new Point(x, y) + + x = heatMapTL.x + heatMapSize.width + y = heatMapTL.y + heatMapSize.height + heatMapBR = new Point(x, y) + + x = heatMapTL.x + (heatMapSize.width / 2) + y = heatMapTL.y + (heatMapSize.height / 2) + heatMapC = new Point(x, y) + } + + private def drawXLabel(chartGraphics:Graphics2D) = { + if (xAxisLabel != null) { + var yPosXAxisLabel:Int = chartSize.height - (margin / 2) - xAxisLabelDescent + var xPosXAxisLabel:Int = heatMapC.x - (xAxisLabelSize.width / 2) + + chartGraphics.setFont(axisLabelsFont) + chartGraphics.setColor(axisLabelColor) + chartGraphics.drawString(xAxisLabel, xPosXAxisLabel, yPosXAxisLabel) + } + } + + private def drawYLabel(chartGraphics:Graphics2D) = { + if (yAxisLabel != null) { + var yPosYAxisLabel:Int = heatMapC.y + (yAxisLabelSize.width / 2) + var xPosYAxisLabel:Int = (margin / 2) + yAxisLabelAscent + + chartGraphics.setFont(axisLabelsFont) + chartGraphics.setColor(axisLabelColor) + + var transform:AffineTransform = chartGraphics.getTransform() + var originalTransform:AffineTransform = transform.clone().asInstanceOf[AffineTransform] + transform.rotate(math.toRadians(270), xPosYAxisLabel, yPosYAxisLabel) + chartGraphics.setTransform(transform) + + chartGraphics.drawString(yAxisLabel, xPosYAxisLabel, yPosYAxisLabel) + + chartGraphics.setTransform(originalTransform) + } + } + + + private def drawAxisBars(chartGraphics:Graphics2D) = { + if (axisThickness > 0) { + chartGraphics.setColor(axisColor) + + var x:Int = heatMapTL.x - axisThickness + var y:Int = heatMapBR.y + var width:Int = heatMapSize.width + axisThickness + var height:Int = axisThickness + chartGraphics.fillRect(x, y, width, height) + + x = heatMapTL.x - axisThickness + y = heatMapTL.y + width = axisThickness + height = heatMapSize.height + chartGraphics.fillRect(x, y, width, height) + } + } + + + private def drawXValues(chartGraphics:Graphics2D) = { + if (showXAxisValues) { + + chartGraphics.setColor(axisValuesColor) + + for (i <- 0 until mat.ncols) { + if (i % xAxisValuesFrequency == 0) { + var xValueStr:String = xValues(i).toString() + chartGraphics.setFont(axisValuesFont) + var metrics:FontMetrics = chartGraphics.getFontMetrics() + var valueWidth:Int = metrics.stringWidth(xValueStr) + + if (xValuesHorizontal) { + var valueXPos:Int = (i * cellSize.width) + ((cellSize.width / 2) - (valueWidth / 2)) + valueXPos += heatMapTL.x + var valueYPos:Int = heatMapBR.y + metrics.getAscent() + 1 + + chartGraphics.drawString(xValueStr, valueXPos, valueYPos) + } else { + var valueXPos:Int = heatMapTL.x + (i * cellSize.width) + ((cellSize.width / 2) + (xAxisValuesHeight / 2)) + var valueYPos:Int = heatMapBR.y + axisThickness + valueWidth + + var transform:AffineTransform = chartGraphics.getTransform() + var originalTransform:AffineTransform = transform.clone().asInstanceOf[AffineTransform] + transform.rotate(math.toRadians(270), valueXPos, valueYPos) + chartGraphics.setTransform(transform) + + chartGraphics.drawString(xValueStr, valueXPos, valueYPos) + + chartGraphics.setTransform(originalTransform) + } + } + } + } + } + + + private def drawYValues(chartGraphics:Graphics2D) = { + if (showYAxisValues) { + chartGraphics.setColor(axisValuesColor) + for (i <- 0 until mat.nrows ) { + if (i % yAxisValuesFrequency == 0) { + var yValueStr:String = yValues(i).toString() + chartGraphics.setFont(axisValuesFont) + var metrics:FontMetrics = chartGraphics.getFontMetrics() + var valueWidth:Int = metrics.stringWidth(yValueStr) + if (yValuesHorizontal) { + var valueXPos:Int = margin + yAxisLabelSize.height + (yAxisValuesWidthMax - valueWidth) + var valueYPos:Int = heatMapTL.y + (i * cellSize.height) + (cellSize.height/2) + (yAxisValuesAscent/2) + + chartGraphics.drawString(yValueStr, valueXPos, valueYPos) + } else { + var valueXPos:Int = margin + yAxisLabelSize.height + yAxisValuesAscent + var valueYPos:Int = heatMapTL.y + (i * cellSize.height) + (cellSize.height/2) + (valueWidth/2) + + var transform:AffineTransform = chartGraphics.getTransform() + var originalTransform:AffineTransform = transform.clone().asInstanceOf[AffineTransform] + transform.rotate(math.toRadians(270), valueXPos, valueYPos) + chartGraphics.setTransform(transform) + + chartGraphics.drawString(yValueStr, valueXPos, valueYPos) + + chartGraphics.setTransform(originalTransform) + } + } + } + } + } + + + def setXValues(xOffset:Double, xInterval:Double) = { + for (i <- 0 until mat.ncols) { + xValues(i) = xOffset + (i * xInterval) + } + } + + def setYValues(yOffset:Double, yInterval:Double) = { + for (i <- 0 until mat.nrows) { + yValues(i) = yOffset + (i * yInterval) + } + } + + + def saveToFile(outputFile:File) = { + var filename:String = outputFile.getName() + var extPoint:Int = filename.lastIndexOf('.') + + if (extPoint < 0) { + throw new IOException("Illegal filename: need a extension.") + } + + var ext:String = filename.substring(extPoint + 1) + + if (ext.toLowerCase().equals("jpg") || ext.toLowerCase().equals("jpeg")) { + var chart:BufferedImage = getChartImage(false) + + ImageIO.write(chart, ext, outputFile) + } else { + var chart:BufferedImage = getChartImage(true) + + ImageIO.write(chart, ext, outputFile) + } + } + +} + + diff --git a/src/main/scala/BIDMat/SDMat.scala b/src/main/scala/BIDMat/SDMat.scala new file mode 100755 index 00000000..d6cdcb62 --- /dev/null +++ b/src/main/scala/BIDMat/SDMat.scala @@ -0,0 +1,225 @@ +package BIDMat + +import edu.berkeley.bid.SPBLAS._ + +case class SDMat(nr:Int, nc:Int, nnz1:Int, ir0:Array[Int], jc0:Array[Int], data0:Array[Double]) extends SparseMat[Double](nr, nc, nnz1, ir0, jc0, data0) { + + def getdata() = data; + + override def t:SDMat = SDMat(gt) + + override def mytype = "SDMat" + + def horzcat(b: SDMat) = SDMat(super.horzcat(b)) + + def vertcat(b: SDMat) = SDMat(super.vertcat(b)) + + def find:IMat = IMat(gfind) + + def find2:(IMat, IMat) = { val (ii, jj) = gfind2 ; (IMat(ii), IMat(jj)) } + + def find3:(IMat, IMat, DMat) = { val (ii, jj, vv) = gfind3 ; (IMat(ii), IMat(jj), DMat(vv)) } + + override def apply(a:IMat, b:IMat):SDMat = SDMat(gapply(a, b)) + + def ssMatOp(b: SDMat, f:(Double, Double) => Double, omat:Mat) = SDMat(sgMatOp(b, f, omat)) + + def ssMatOpScalar(b: Double, f:(Double, Double) => Double, omat:Mat) = SDMat(sgMatOpScalar(b, f, omat)) + + def ssReduceOp(n:Int, f1:(Double) => Double, f2:(Double, Double) => Double, omat:Mat) = DMat(sgReduceOp(n, f1, f2, omat)) + + def horzcat(a:DMat):DMat = MatFunctions.full(this).horzcat(a) + + def vertcat(a:DMat):DMat = MatFunctions.full(this).vertcat(a) + + def SMult(a:Mat, omat:DMat):DMat = { + val ioff = Mat.ioneBased + if (ncols != a.nrows) { + throw new RuntimeException("dimensions mismatch") + } else { + a match { + case aa:SDMat => { + val out = DMat.newOrCheckDMat(nrows, a.ncols, omat) + if (omat.asInstanceOf[AnyRef] != null) out.clear + var i = 0 + while (i < a.ncols) { + var j =aa.jc(i)-ioff + while (j < aa.jc(i+1)-ioff) { + val dval = aa.data(j) + var k = jc(aa.ir(j)-ioff)-ioff + while (k < jc(aa.ir(j)+1-ioff)-ioff) { + out.data(ir(k)-ioff+nrows*i) += data(k) * dval + k += 1 + } + j += 1 + } + i += 1 + } + out + } + case dd:DMat => { + val out = DMat.newOrCheckDMat(nrows, a.ncols, omat) + if (omat.asInstanceOf[AnyRef] != null) out.clear + Mat.nflops += 2L * nnz * a.ncols + if (Mat.noMKL) { + var i = 0 + while (i < dd.ncols) { + var j = 0 + while (j < ncols) { + val dval = dd.data(j + i*dd.nrows) + var k = jc(j)-ioff + while (k < jc(j+1)-ioff) { + out.data(ir(k)-ioff + i*nrows) += dval * data(k); + k += 1 + } + j += 1 + } + i += 1 + } + } else { + val nc = dd.ncols + var jc0 = jc + var ir0 = ir + if (ioff == 0) { + jc0 = SparseMat.incInds(jc) + ir0 = SparseMat.incInds(ir) + } + // if (dd.ncols == 1) { + // Seg faults in Linux and Windows: + // dcscmv("N", nrows, ncols, 1.0, "GLNF", data, ir, jc, dd.data, 0.0, out.data) + // } else { + dcscmm("N", nrows, nc, ncols, 1.0, "GLNF", data, ir0, jc0, dd.data, ncols, 0.0, out.data, nr) + // } + } + out + } + case _ => throw new RuntimeException("unsupported arg") + } + } + } + + def Tmult(a:DMat, omat:DMat):DMat = { + val out = DMat.newOrCheckDMat(ncols, a.ncols, omat) + if (omat.asInstanceOf[AnyRef] != null) out.clear + var jc0 = jc + var ir0 = ir + if (Mat.ioneBased == 0) { + jc0 = SparseMat.incInds(jc) + ir0 = SparseMat.incInds(ir) + } + dcscmm("T", nrows, a.ncols, ncols, 1.0f, "GLNF", data, ir0, jc0, a.data, a.nrows, 0f, out.data, out.nrows) + Mat.nflops += 2L * nnz * a.ncols + out + } + + def SSMult(a:SDMat):SDMat = + if (ncols != a.nrows) { + throw new RuntimeException("dimensions mismatch") + } else { + val ioff = Mat.ioneBased + var numnz = 0 + var i = 0 + while (i < a.ncols) { + var j = a.jc(i)-ioff + while (j < a.jc(i+1)-ioff) { + numnz += jc(a.ir(j)-ioff+1) - jc(a.ir(j)-ioff) + j += 1 + } + i += 1 + } + val ii = new Array[Int](numnz) + val jj = new Array[Int](numnz) + val vv = new Array[Double](numnz) + numnz = 0 + i = 0 + while (i < a.ncols) { + var j = a.jc(i)-ioff + while (j < a.jc(i+1)-ioff) { + val dval = a.data(j) + var k = jc(a.ir(j)-ioff)-ioff + while (k < jc(a.ir(j)-ioff+1)-ioff) { + vv(numnz) = data(k) * dval + ii(numnz) = ir(k)-ioff + jj(numnz) = i + numnz += 1 + k += 1 + } + j += 1 + } + i += 1 + } + SDMat(SparseMat.sparseImpl[Double](ii, jj, vv, nrows, a.ncols)) + } + + def + (b : SDMat) = ssMatOp(b, (x:Double, y:Double) => x + y, null) + def - (b : SDMat) = ssMatOp(b, (x:Double, y:Double) => x - y, null) + def * (b : DMat):DMat = SMult(b, null) + def Tx (b : DMat):DMat = Tmult(b, null) + override def * (b : Mat):DMat = SMult(b, null) + def *! (b : SDMat) = SSMult(b) + def *@ (b : SDMat) = ssMatOp(b, (x:Double, y:Double) => x * y, null) + def /@ (b : SDMat) = ssMatOp(b, (x:Double, y:Double) => x / y, null) + + def > (b : SDMat) = ssMatOp(b, (x:Double, y:Double) => if (x > y) 1.0 else 0.0, null) + def < (b : SDMat) = ssMatOp(b, (x:Double, y:Double) => if (x < y) 1.0 else 0.0, null) + def == (b : SDMat) = ssMatOp(b, (x:Double, y:Double) => if (x == y) 1.0 else 0.0, null) + def === (b : SDMat) = ssMatOp(b, (x:Double, y:Double) => if (x == y) 1.0 else 0.0, null) + def >= (b : SDMat) = ssMatOp(b, (x:Double, y:Double) => if (x >= y) 1.0 else 0.0, null) + def <= (b : SDMat) = ssMatOp(b, (x:Double, y:Double) => if (x <= y) 1.0 else 0.0, null) + def != (b : SDMat) = ssMatOp(b, (x:Double, y:Double) => if (x != y) 1.0 else 0.0, null) + + override def + (b : Double) = ssMatOpScalar(b, (x:Double, y:Double) => x + y, null) + override def - (b : Double) = ssMatOpScalar(b, (x:Double, y:Double) => x - y, null) + override def *@ (b : Double) = ssMatOpScalar(b, (x:Double, y:Double) => x * y, null) + override def /@ (b : Double) = ssMatOpScalar(b, (x:Double, y:Double) => x / y, null) + + override def > (b : Double) = ssMatOpScalar(b, (x:Double, y:Double) => if (x > y) 1.0 else 0.0, null) + override def < (b : Double) = ssMatOpScalar(b, (x:Double, y:Double) => if (x < y) 1.0 else 0.0, null) + override def == (b : Double) = ssMatOpScalar(b, (x:Double, y:Double) => if (x == y) 1.0 else 0.0, null) + override def >= (b : Double) = ssMatOpScalar(b, (x:Double, y:Double) => if (x >= y) 1.0 else 0.0, null) + override def <= (b : Double) = ssMatOpScalar(b, (x:Double, y:Double) => if (x <= y) 1.0 else 0.0, null) + override def != (b : Double) = ssMatOpScalar(b, (x:Double, y:Double) => if (x != y) 1.0 else 0.0, null) + + def \ (b: SDMat) = horzcat(b) + def on (b: SDMat) = vertcat(b) + + def toSMat:SMat = { + val out = SMat(nrows, ncols, nnz) + System.arraycopy(jc, 0, out.jc, 0, ncols+1) + System.arraycopy(ir, 0, out.ir, 0, nnz) + Mat.copyToFloatArray(data, 0, out.data, 0, nnz) + out + } + + override def zeros(nr:Int, nc:Int, nnz:Int) = SDMat(nr, nc, nnz) + + override def recycle(nr:Int, nc:Int, nnz:Int):SDMat = { + val jc0 = if (jc.size >= nc+1) jc else new Array[Int](nc+1) + val ir0 = if (ir.size >= nnz) ir else new Array[Int](nnz) + val data0 = if (data.size >= nnz) data else new Array[Double](nnz) + new SDMat(nr, nc, nnz, jc0, ir0, data0) + } +} + +class SDPair (val omat:DMat, val mat:SDMat) extends Pair{ + def * (b : DMat):DMat = mat.SMult(b, omat) + def Tx (b : DMat):DMat = mat.Tmult(b, omat) + override def * (b : Mat):DMat = mat.SMult(b, omat) +} + +object SDMat { + + def apply(nr:Int, nc:Int, nnz0:Int):SDMat = new SDMat(nr, nc, nnz0, new Array[Int](nnz0), new Array[Int](nc+1), new Array[Double](nnz0)) + + def apply(a:SparseMat[Double]):SDMat = new SDMat(a.nrows, a.ncols, a.nnz, a.ir, a.jc, a.data) + + def apply(a:SMat) = a.toSDMat + + def SDnoRows(nr:Int, nc:Int, nnz0:Int):SDMat = new SDMat(nr, nc, nnz0, null, new Array[Int](nc+1), new Array[Double](nnz0)) +} + + + + + + diff --git a/src/main/scala/BIDMat/SMat.scala b/src/main/scala/BIDMat/SMat.scala new file mode 100755 index 00000000..8ed7164f --- /dev/null +++ b/src/main/scala/BIDMat/SMat.scala @@ -0,0 +1,269 @@ +package BIDMat + +import edu.berkeley.bid.SPBLAS._ + +case class SMat(nr:Int, nc:Int, nnz1:Int, ir0:Array[Int], jc0:Array[Int], data0:Array[Float]) extends SparseMat[Float](nr, nc, nnz1, ir0, jc0, data0) { + + def getdata() = data; + + override def t:SMat = SMat(gt) + + override def mytype = "SMat" + + def horzcat(b: SMat) = SMat(super.horzcat(b)) + + def vertcat(b: SMat) = SMat(super.vertcat(b)) + + def find:IMat = IMat(gfind) + + def find2:(IMat, IMat) = { val (ii, jj) = gfind2 ; (IMat(ii), IMat(jj)) } + + def find3:(IMat, IMat, FMat) = { val (ii, jj, vv) = gfind3 ; (IMat(ii), IMat(jj), FMat(vv)) } + + override def contents:FMat = FMat(nnz, 1, data) + + override def apply(a:IMat, b:IMat):SMat = SMat(gapply(a, b)) + + def ssMatOp(b: SMat, f:(Float, Float) => Float, omat:Mat) = SMat(sgMatOp(b, f, omat)) + + def ssMatOpScalar(b: Float, f:(Float, Float) => Float, omat:Mat) = SMat(sgMatOpScalar(b, f, omat)) + + def ssReduceOp(n:Int, f1:(Float) => Float, f2:(Float, Float) => Float, omat:Mat) = FMat(sgReduceOp(n, f1, f2, omat)) + + def horzcat(a:FMat):FMat = FMat(MatFunctions.full(this).ghorzcat(a)) + + def vertcat(a:FMat):FMat = FMat(MatFunctions.full(this).gvertcat(a)) + + def SMult(a:Mat, omat:Mat):FMat = { + val ioff = Mat.ioneBased + if (ncols != a.nrows) { + throw new RuntimeException("dimensions mismatch") + } else { + a match { + case aa:SMat => { + val out = FMat.newOrCheckFMat(nrows, a.ncols, omat) + if (omat.asInstanceOf[AnyRef] != null) out.clear + var i = 0 + while (i < a.ncols) { + var j =aa.jc(i)-ioff + while (j < aa.jc(i+1)-ioff) { + val dval = aa.data(j) + var k = jc(aa.ir(j)-ioff)-ioff + while (k < jc(aa.ir(j)+1-ioff)-ioff) { + out.data(ir(k)-ioff+nrows*i) += data(k) * dval + k += 1 + } + j += 1 + } + i += 1 + } + out + } + case dd:FMat => { + val out = FMat.newOrCheckFMat(nrows, a.ncols, omat) + if (omat.asInstanceOf[AnyRef] != null) out.clear + Mat.nflops += 2L * nnz * a.ncols + if (Mat.noMKL) { + var i = 0 + while (i < dd.ncols) { + var j = 0 + while (j < ncols) { + val dval = dd.data(j + i*dd.nrows) + var k = jc(j)-ioff + while (k < jc(j+1)-ioff) { + out.data(ir(k)-ioff + i*nrows) += dval * data(k); + k += 1 + } + j += 1 + } + i += 1 + } + } else { + val nc = dd.ncols + var jc0 = jc + var ir0 = ir + if (ioff == 0) { + jc0 = SparseMat.incInds(jc) + ir0 = SparseMat.incInds(ir) + } + // if (dd.ncols == 1) { + // Seg faults in linux and windows + // scscmv("N", nrows, ncols, 1.0f, "GLNF", data, ir, jc, dd.data, 0f, out.data) + // } else { + scscmm("N", nrows, nc, ncols, 1.0f, "GLNF", data, ir0, jc0, dd.data, ncols, 0f, out.data, out.nrows) + // } + } + out + } + case _ => throw new RuntimeException("unsupported arg") + } + } + } + + def Tmult(a:FMat, omat:Mat):FMat = { + val out = FMat.newOrCheckFMat(ncols, a.ncols, omat) + if (omat.asInstanceOf[AnyRef] != null) out.clear + var jc0 = jc + var ir0 = ir + if (Mat.ioneBased == 0) { + jc0 = SparseMat.incInds(jc) + ir0 = SparseMat.incInds(ir) + } + scscmm("T", nrows, a.ncols, ncols, 1.0f, "GLNF", data, ir0, jc0, a.data, a.nrows, 0f, out.data, out.nrows) + Mat.nflops += 2L * nnz * a.ncols + out + } + + def SSMult(a:SMat):SMat = + if (ncols != a.nrows) { + throw new RuntimeException("dimensions mismatch") + } else { + val ioff = Mat.ioneBased + var numnz = 0 + var i = 0 + while (i < a.ncols) { + var j = a.jc(i)-ioff + while (j < a.jc(i+1)-ioff) { + numnz += jc(a.ir(j)-ioff+1) - jc(a.ir(j)-ioff) + j += 1 + } + i += 1 + } + val ii = new Array[Int](numnz) + val jj = new Array[Int](numnz) + val vv = new Array[Float](numnz) + numnz = 0 + i = 0 + while (i < a.ncols) { + var j = a.jc(i)-ioff + while (j < a.jc(i+1)-ioff) { + val dval = a.data(j) + var k = jc(a.ir(j)-ioff)-ioff + while (k < jc(a.ir(j)-ioff+1)-ioff) { + vv(numnz) = data(k) * dval + ii(numnz) = ir(k)-ioff + jj(numnz) = i + numnz += 1 + k += 1 + } + j += 1 + } + i += 1 + } + SMat(SparseMat.sparseImpl[Float](ii, jj, vv, nrows, a.ncols)) + } + + def + (b : SMat) = ssMatOp(b, (x:Float, y:Float) => x + y, null) + def - (b : SMat) = ssMatOp(b, (x:Float, y:Float) => x - y, null) + def * (b : FMat):FMat = SMult(b, null) + def Tx (b : FMat):FMat = Tmult(b, null) + def *! (b : SMat) = SSMult(b) + def *@ (b : SMat) = ssMatOp(b, (x:Float, y:Float) => x * y, null) + def /@ (b : SMat) = ssMatOp(b, (x:Float, y:Float) => x / y, null) + + def > (b : SMat) = ssMatOp(b, (x:Float, y:Float) => if (x > y) 1.0f else 0f, null) + def < (b : SMat) = ssMatOp(b, (x:Float, y:Float) => if (x < y) 1.0f else 0f, null) + def == (b : SMat) = ssMatOp(b, (x:Float, y:Float) => if (x == y) 1.0f else 0f, null) + def === (b : SMat) = ssMatOp(b, (x:Float, y:Float) => if (x == y) 1.0f else 0f, null) + def >= (b : SMat) = ssMatOp(b, (x:Float, y:Float) => if (x >= y) 1.0f else 0f, null) + def <= (b : SMat) = ssMatOp(b, (x:Float, y:Float) => if (x <= y) 1.0f else 0f, null) + def != (b : SMat) = ssMatOp(b, (x:Float, y:Float) => if (x != y) 1.0f else 0f, null) + + override def + (b : Float) = ssMatOpScalar(b, (x:Float, y:Float) => x + y, null) + override def - (b : Float) = ssMatOpScalar(b, (x:Float, y:Float) => x - y, null) + override def *@ (b : Float) = ssMatOpScalar(b, (x:Float, y:Float) => x * y, null) + override def /@ (b : Float) = ssMatOpScalar(b, (x:Float, y:Float) => x / y, null) + + override def > (b : Float) = ssMatOpScalar(b, (x:Float, y:Float) => if (x > y) 1.0f else 0f, null) + override def < (b : Float) = ssMatOpScalar(b, (x:Float, y:Float) => if (x < y) 1.0f else 0f, null) + override def == (b : Float) = ssMatOpScalar(b, (x:Float, y:Float) => if (x == y) 1.0f else 0f, null) + override def >= (b : Float) = ssMatOpScalar(b, (x:Float, y:Float) => if (x >= y) 1.0f else 0f, null) + override def <= (b : Float) = ssMatOpScalar(b, (x:Float, y:Float) => if (x <= y) 1.0f else 0f, null) + override def != (b : Float) = ssMatOpScalar(b, (x:Float, y:Float) => if (x != y) 1.0f else 0f, null) + + override def * (b : Mat):FMat = SMult(b, null) + override def Tx (b : Mat):Mat = b match {case bb:FMat => Tmult(bb, null)} + + def \ (b: SMat) = horzcat(b) + def on (b: SMat) = vertcat(b) + + def ~ (b : SMat):SPair = new SPair(this, b) + + override def ~ (b: Mat):Pair = + b match { + case sb:SMat => new SPair(this, sb) + case _ => throw new RuntimeException("mismatched types for operator ~") + } + + def toSDMat:SDMat = { + val out = SDMat(nrows, ncols, nnz) + System.arraycopy(jc, 0, out.jc, 0, ncols+1) + System.arraycopy(ir, 0, out.ir, 0, nnz) + Mat.copyToDoubleArray(data, 0, out.data, 0, nnz) + out + } + + override def zeros(nr:Int, nc:Int, nnz:Int) = SMat(nr, nc, nnz) + + override def recycle(nr:Int, nc:Int, nnz:Int):SMat = { + val jc0 = if (jc.size >= nc+1) jc else new Array[Int](nc+1) + val ir0 = if (ir.size >= nnz) ir else new Array[Int](nnz) + val data0 = if (data.size >= nnz) data else new Array[Float](nnz) + new SMat(nr, nc, nnz, ir0, jc0, data0) + } +} + +class SPair (val omat:Mat, val mat:SMat) extends Pair{ + def * (b : FMat):FMat = mat.SMult(b, omat) + def Tx (b : FMat):FMat = mat.Tmult(b, omat) + override def * (b : Mat):FMat = mat.SMult(b, omat) + override def Tx (b : Mat):Mat = b match {case bb:FMat => mat.Tmult(bb, omat)} + + def + (b : SMat) = mat.ssMatOp(b, (x:Float, y:Float) => x + y, omat) + def - (b : SMat) = mat.ssMatOp(b, (x:Float, y:Float) => x - y, omat) + def *@ (b : SMat) = mat.ssMatOp(b, (x:Float, y:Float) => x * y, omat) + def /@ (b : SMat) = mat.ssMatOp(b, (x:Float, y:Float) => x / y, omat) + + import Operator._ + override def + (b : Mat):Mat = applyMat(mat, b, omat, Mop_Plus) + override def - (b : Mat):Mat = applyMat(mat, b, omat, Mop_Minus) + override def *@ (b : Mat):Mat = applyMat(mat, b, omat, Mop_ETimes) + override def /@ (b : Mat):Mat = applyMat(mat, b, omat, Mop_EDiv) +} + +object SMat { + + def apply(nr:Int, nc:Int, nnz0:Int):SMat = new SMat(nr, nc, nnz0, new Array[Int](nnz0), new Array[Int](nc+1), new Array[Float](nnz0)) + + def apply(a:SparseMat[Float]):SMat = new SMat(a.nrows, a.ncols, a.nnz, a.ir, a.jc, a.data) + + def apply(a:SDMat) = a.toSMat + + def apply(a:Mat) = a match { + case aa:SMat => aa + case aa:GSMat => aa.toSMat + case aa:SDMat => aa.toSMat + } + + def SnoRows(nr:Int, nc:Int, nnz0:Int):SMat = new SMat(nr, nc, nnz0, null, new Array[Int](nc+1), new Array[Float](nnz0)) + + def newOrCheckSMat(mat:SMat, oldmat:Mat):SMat = { + if (oldmat.asInstanceOf[AnyRef] == null || (oldmat.nrows == 0 && oldmat.ncols == 0)) { + SMat(mat.nrows, mat.ncols, mat.nnz) + } else { + oldmat match { + case omat:SMat => if (oldmat.nrows == mat.nrows && oldmat.ncols == mat.ncols && oldmat.nnz == mat.nnz) { + omat + } else { + omat.recycle(mat.nrows, mat.ncols, mat.nnz) + } + } + } + } +} + + + + + + diff --git a/src/main/scala/BIDMat/SciFunctions.scala b/src/main/scala/BIDMat/SciFunctions.scala new file mode 100755 index 00000000..bbcf73fb --- /dev/null +++ b/src/main/scala/BIDMat/SciFunctions.scala @@ -0,0 +1,1556 @@ +package BIDMat + +import edu.berkeley.bid.VML._ +import edu.berkeley.bid.VSL +import edu.berkeley.bid.VSL._ +import edu.berkeley.bid.CBLAS._ +import jcuda._; +import jcuda.jcublas.JCublas; +import jcuda.runtime.JCuda; +import jcuda.jcurand.JCurand._; +import jcuda.jcurand.curandGenerator; +import jcuda.jcurand.curandRngType._; +import edu.berkeley.bid.CUMAT; +import java.util.Random._; +import MatFunctions._ + +object SciFunctions { + final val SEED:Int = 1452462553 + // Java initialization + final val myrand = new java.util.Random(SEED) + // VSL random number generator initialization + final val BRNG:Int = BRNG_MCG31 + final val METHOD:Int = 0 + final val stream = new VSL(); + final val errcode = vslNewStream(stream, BRNG, SEED) + // VML mode control, controlled with setVMLmode() + final val VMLdefault = VMLMODE.VML_ERRMODE_DEFAULT | VMLMODE.VML_HA // Default + final val VMLfast = VMLMODE.VML_ERRMODE_DEFAULT | VMLMODE.VML_LA // Faster, Low accuracy, default error handling + final val VMLturbo = VMLMODE.VML_ERRMODE_DEFAULT | VMLMODE.VML_EP // Fastest, Lower accuracy, default error handling + // Curand initialization + var cudarng:curandGenerator = null + if (Mat.hasCUDA > 0) { + jcuda.runtime.JCuda.initialize + cudarng = new curandGenerator + curandCreateGenerator(cudarng, CURAND_RNG_PSEUDO_DEFAULT) + curandSetPseudoRandomGeneratorSeed(cudarng, SEED) + } + + def resetCUDA = JCuda.cudaDeviceReset + + def device(i:Int) = JCuda.cudaSetDevice(i) + + def device:Int = { + val ar = Array[Int](1) + JCuda.cudaGetDevice(ar) + ar(0) + } + + def connect(i:Int) = { + val v0 = JCuda.cudaDeviceEnablePeerAccess(i,0) + val j = device + device(i) + val v1 = JCuda.cudaDeviceEnablePeerAccess(j,0) + device(j) + (v0, v1) + } + + def disconnect(i:Int) = { + val v0 = JCuda.cudaDeviceDisablePeerAccess(i) + val j = device + device(i) + val v1 = JCuda.cudaDeviceDisablePeerAccess(j) + device(j) + (v0, v1) + } + + def canconnect(i:Int) = { + val ar = Array[Int](1) + val j = device + JCuda.cudaDeviceCanAccessPeer(ar, i, j) + val v0 = ar(0) + JCuda.cudaDeviceCanAccessPeer(ar, j, i) + (v0, ar(0)) + } + + def norm(a:FMat) = math.sqrt(sdot(a.length, a.data, 1, a.data, 1)).asInstanceOf[Float] + + def norm(a:DMat) = math.sqrt(ddot(a.length, a.data, 1, a.data, 1)) + + def norm(a:GMat) = math.sqrt(JCublas.cublasSdot(a.length, a.data, 1, a.data, 1)) + + def norm (a:Mat):Double = { + a match { + case aa:FMat => norm(aa) + case aa:DMat => norm(aa) + case aa:GMat => norm(aa) + } + } + + + def drand(minv:Double, maxv:Double, out:DMat):DMat = { + if (Mat.noMKL) { + var i = 0; val len = out.length; val odata = out.data; + while (i < len) {odata(i) = myrand.nextDouble; i += 1} + } else { + vdRngUniform( METHOD, stream, out.length, out.data, minv, maxv ) + } + Mat.nflops += 10L*out.nrows*out.ncols + out + } + + def drand(m:Int, n:Int, minv:Double, maxv:Double):DMat = drand(minv, maxv, DMat(m, n)) + + def drand(m:Int, n:Int):DMat = drand(m, n, 0, 1) + + def drand(out:DMat):DMat = drand(0.0, 1.0, out) + + def rand(minv:Float, maxv:Float, out:FMat):FMat = { + if (Mat.noMKL) { + var i = 0; val len = out.length; val odata = out.data; + while (i < len) {odata(i) = myrand.nextFloat; i += 1} + } else { + vsRngUniform( METHOD, stream, out.length, out.data, minv, maxv ) + } + Mat.nflops += 10L*out.nrows*out.ncols + out + } + + def rand(m:Int, n:Int, minv:Float, maxv:Float):FMat = rand(minv, maxv, FMat(m, n)) + + def rand(m:Int, n:Int):FMat = rand(m, n, 0, 1) + + def rand(out:FMat):FMat = rand(0.0f, 1.0f, out) + + def grand(out:GMat, nr:Int, nc:Int):GMat = { + Mat.nflops += 10L*out.length + curandGenerateUniform(cudarng, out.data, out.length) + JCuda.cudaDeviceSynchronize() + out + } + + def grand(out:GMat):GMat = grand(out, out.nrows, out.ncols) + + def grand(nr:Int, nc:Int):GMat = { + val out = GMat(nr, nc) + grand(out) + } + + def normrnd(mu:Float, sig:Float, out:FMat):FMat = { + if (Mat.noMKL) { + var i = 0; val len = out.length; val odata = out.data; + while (i < len) {odata(i) = mu + sig*myrand.nextGaussian.asInstanceOf[Float]; i += 1} + } else { + vsRngGaussian(METHOD, stream, out.length, out.data, mu, sig ) + } + Mat.nflops += 10L*out.length + out + } + + def normrnd(mu:Float, sig:Float, m:Int, n:Int):FMat = { + normrnd(mu, sig, FMat(m, n)) + } + + def cnormrnd(mu:Float, sig:Float, out:CMat):CMat = { + if (Mat.noMKL) { + var i = 0; val len = out.length; val odata = out.data; + while (i < 2*len) {odata(i) = mu + sig*myrand.nextGaussian.asInstanceOf[Float]; i += 1} + } else { + vsRngGaussian(METHOD, stream, 2*out.length, out.data, mu, sig ) + } + Mat.nflops += 10L*out.length + out + } + + def cnormrnd(mu:Float, sig:Float, m:Int, n:Int):CMat = { + cnormrnd(mu, sig, CMat(m, n)) + } + + def gnormrnd(mu:Float, sig:Float, out:GMat, nr:Int, nc:Int):GMat = { + Mat.nflops += 10L*out.length + curandGenerateNormal(cudarng, out.data, out.length, mu, sig) + JCuda.cudaDeviceSynchronize() + out + } + + def gnormrnd(mu:Float, sig:Float, out:GMat):GMat = gnormrnd(mu, sig, out, out.nrows, out.ncols) + + def gnormrnd(mu:Float, sig:Float, nr:Int, nc:Int):GMat = { + val out = GMat(nr, nc) + gnormrnd(mu, sig, out) + } + + def gamrnd(shape:Float, scale:Float, out:FMat):FMat = { + vsRngGamma( METHOD, stream, out.length, out.data, shape, 0, scale ) + Mat.nflops += 20L*out.length + out + } + + def gamrnd(shape:Float, scale:Float, m:Int, n:Int):FMat = { + gamrnd(shape, scale, FMat(m, n)) + } + + def laprnd(a:Float, b:Float, out:FMat):FMat = { + vsRngLaplace( METHOD, stream, out.length, out.data, a, b ) + Mat.nflops += 20L*out.length + out + } + + def laprnd(a:Float, b:Float, m:Int, n:Int):FMat = { + laprnd(a, b, FMat(m, n)) + } + + def cauchyrnd(a:Float, b:Float, out:FMat):FMat = { + vsRngCauchy( METHOD, stream, out.length, out.data, a, b ) + Mat.nflops += 20L*out.length + out + } + + def cauchyrnd(a:Float, b:Float, m:Int, n:Int):FMat = { + cauchyrnd(a, b, FMat(m, n)) + } + + def exprnd(a:Float, b:Float, out:FMat):FMat = { + vsRngExponential( METHOD, stream, out.length, out.data, a, b ) + Mat.nflops += 20L*out.length + out + } + + def exprnd(a:Float, m:Int, n:Int):FMat = { + exprnd(a, 1, FMat(m, n)) + } + + def exprnd(a:Float, b:Float, m:Int, n:Int):FMat = { + exprnd(a, b, FMat(m, n)) + } + + def exprnd(a:Float, out:FMat):FMat = { + exprnd(a, 1, out) + } + + def betarnd(p:Float, q:Float, out:FMat):FMat = { + vsRngBeta( METHOD, stream, out.length, out.data, p, q, 0, 1 ) + Mat.nflops += 20L*out.length + out + } + + def betarnd(p:Float, q:Float, m:Int, n:Int):FMat = { + betarnd(p, q, FMat(m, n)) + } + + def poissrnd(lambda:FMat, out:IMat):IMat = { + checkSizes(lambda, out) + viRngPoissonV( METHOD, stream, out.length, out.data, DMat(lambda).data ) + Mat.nflops += 20L*out.length + out + } + + def poissrnd(lambda:FMat):IMat = { + poissrnd(lambda, IMat(lambda.nrows, lambda.ncols)) + } + + def dnormrnd(mu:Double, sig:Double, out:DMat):DMat = { + if (Mat.noMKL) { + var i = 0; val len = out.length; val odata = out.data; + while (i < len) {odata(i) = mu + sig*myrand.nextGaussian; i += 1} + } else { + vdRngGaussian( METHOD, stream, out.length, out.data, mu, sig ) + } + Mat.nflops += 10L*out.length + out + } + + def dnormrnd(mu:Double, sig:Double, m:Int, n:Int):DMat = { + dnormrnd(mu, sig, DMat(m, n)) + } + + def dgamrnd(shape:Double, scale:Double, out:DMat):DMat = { + vdRngGamma( METHOD, stream, out.length, out.data, shape, 0, scale ) + Mat.nflops += 20L*out.length + out + } + + def dgamrnd(shape:Double, scale:Double, m:Int, n:Int):DMat = { + dgamrnd(shape, scale, DMat(m, n)) + } + + def dlaprnd(a:Double, b:Double, out:DMat):DMat = { + vdRngLaplace( METHOD, stream, out.length, out.data, a, b ) + Mat.nflops += 20L*out.length + out + } + + def dlaprnd(a:Double, b:Double, m:Int, n:Int):DMat = { + dlaprnd(a, b, DMat(m, n)) + } + + def dcauchyrnd(a:Double, b:Double, out:DMat):DMat = { + vdRngCauchy( METHOD, stream, out.length, out.data, a, b ) + Mat.nflops += 20L*out.length + out + } + + def dcauchyrnd(a:Double, b:Double, m:Int, n:Int):DMat = { + dcauchyrnd(a, b, DMat(m, n)) + } + + def dexprnd(a:Double, b:Double, out:DMat):DMat = { + vdRngExponential( METHOD, stream, out.length, out.data, a, b ) + Mat.nflops += 20L*out.length + out + } + + def dexprnd(a:Double, m:Int, n:Int):DMat = { + dexprnd(a, 1, DMat(m, n)) + } + + def dexprnd(a:Double, b:Double, m:Int, n:Int):DMat = { + dexprnd(a, b, DMat(m, n)) + } + + def dexprnd(a:Double, out:DMat):DMat = { + dexprnd(a, 1, out) + } + + def dbetarnd(p:Double, q:Double, out:DMat):DMat = { + vdRngBeta( METHOD, stream, out.length, out.data, p, q, 0, 1 ) + Mat.nflops += 20L*out.length + out + } + + def dbetarnd(p:Double, q:Double, m:Int, n:Int):DMat = { + dbetarnd(p, q, DMat(m, n)) + } + + def binornd(k:Int, p:Double, out:IMat):IMat = { + viRngBinomial( METHOD, stream, out.length, out.data, k, p ) + Mat.nflops += 20L*out.length + out + } + + def binornd(k:Int, p:Double, m:Int, n:Int):IMat = { + binornd(k, p, IMat(m, n)) + } + + def bernrnd(p:Double, out:IMat):IMat = { + viRngBernoulli( METHOD, stream, out.length, out.data, p ) + Mat.nflops += 20L*out.length + out + } + + def bernrnd(p:Double, m:Int, n:Int):IMat = { + bernrnd(p, IMat(m, n)) + } + + def geornd(p:Double, out:IMat):IMat = { + viRngGeometric( METHOD, stream, out.length, out.data, p ) + Mat.nflops += 20L*out.length + out + } + + def geornd(p:Double, m:Int, n:Int):IMat = { + geornd(p, IMat(m, n)) + } + + def nbinrnd(a:Double, p:Double, out:IMat):IMat = { + viRngNegbinomial( METHOD, stream, out.length, out.data, a, p ) + Mat.nflops += 20L*out.length + out + } + + def nbinrnd(a:Double, p:Double, m:Int, n:Int):IMat = { + nbinrnd(a, p, IMat(m, n)) + } + + def poissrnd(lambda:Double, out:IMat):IMat = { + viRngPoisson( METHOD, stream, out.length, out.data, lambda ) + Mat.nflops += 20L*out.length + out + } + + def poissrnd(lambda:Double, m:Int, n:Int):IMat = { + poissrnd(lambda, IMat(m, n)) + } + + def poissrnd(lambda:DMat, out:IMat):IMat = { + checkSizes(lambda, out) + viRngPoissonV( METHOD, stream, out.length, out.data, lambda.data ) + Mat.nflops += 20L*out.length + out + } + + def poissrnd(lambda:DMat):IMat = { + poissrnd(lambda, IMat(lambda.nrows, lambda.ncols)) + } + + def min(a:DMat, b:DMat) = a.ddMatOp(b, (x:Double, y:Double) => math.min(x,y), null) + def max(a:DMat, b:DMat) = a.ddMatOp(b, (x:Double, y:Double) => math.max(x,y), null) + def sum(a:DMat, n:Int) = a.ddReduceOp(n, (x:Double) => x, (x:Double, y:Double) => x+y, null) + def cumsum(a:DMat, n:Int) = a.ddReduceAll(n, (x:Double) => x, (x:Double, y:Double) => x+y, null) + def maxi(a:DMat, n:Int) = a.ddReduceOp(n, (x:Double) => x, (x:Double, y:Double) => math.max(x,y), null) + def mini(a:DMat, n:Int):DMat = a.ddReduceOp(n, (x:Double) => x, (x:Double, y:Double) => math.min(x,y), null) + def sum(a:DMat) = a.ddReduceOp(0, (x:Double) => x, (x:Double, y:Double) => x+y, null) + def cumsum(a:DMat) = a.ddReduceAll(0, (x:Double) => x, (x:Double, y:Double) => x+y, null) + def maxi(a:DMat) = a.ddReduceOp(0, (x:Double) => x, (x:Double, y:Double) => math.max(x,y), null) + def mini(a:DMat):DMat = a.ddReduceOp(0, (x:Double) => x, (x:Double, y:Double) => math.min(x,y), null) + def maxi2(a:DMat,d:Int):(DMat,IMat) = {val (m,ii)=a.ggOpt2(d,(x:Double,y:Double)=>(x>y)); (DMat(m), ii)} + def mini2(a:DMat,d:Int):(DMat,IMat) = {val (m,ii)=a.ggOpt2(d,(x:Double,y:Double)=>(x(x>y)); (DMat(m), ii)} + def mini2(a:DMat):(DMat,IMat) = {val (m,ii)=a.ggOpt2(0,(x:Double,y:Double)=>(x math.min(x,y), out) + def max(a:DMat, b:DMat, out:Mat) = a.ddMatOp(b, (x:Double, y:Double) => math.max(x,y), out) + def sum(a:DMat, n:Int, out:Mat) = a.ddReduceOp(n, (x:Double) => x, (x:Double, y:Double) => x+y, out) + def cumsum(a:DMat, n:Int, out:Mat) = a.ddReduceAll(n, (x:Double) => x, (x:Double, y:Double) => x+y, out) + def maxi(a:DMat, n:Int, out:Mat) = a.ddReduceOp(n, (x:Double) => x, (x:Double, y:Double) => math.max(x,y), out) + def mini(a:DMat, n:Int, out:Mat):DMat = a.ddReduceOp(n, (x:Double) => x, (x:Double, y:Double) => math.min(x,y), out) + def sum(a:DMat, out:Mat) = a.ddReduceOp(0, (x:Double) => x, (x:Double, y:Double) => x+y, out) + def cumsum(a:DMat, out:Mat) = a.ddReduceAll(0, (x:Double) => x, (x:Double, y:Double) => x+y, out) + def maxi(a:DMat, out:Mat) = a.ddReduceOp(0, (x:Double) => x, (x:Double, y:Double) => math.max(x,y), out) + def mini(a:DMat, out:Mat):DMat = a.ddReduceOp(0, (x:Double) => x, (x:Double, y:Double) => math.min(x,y), out) + + def min(a:FMat, b:FMat) = a.ffMatOp(b, (x:Float, y:Float) => math.min(x,y), null) + def max(a:FMat, b:FMat) = a.ffMatOp(b, (x:Float, y:Float) => math.max(x,y), null) + def sum(a:FMat, n:Int) = a.ffReduceOp(n, (x:Float) => x, (x:Float, y:Float) => x+y, null) + def cumsum(a:FMat, n:Int) = a.ffReduceAll(n, (x:Float) => x, (x:Float, y:Float) => x+y, null) + def maxi(a:FMat, n:Int) = a.ffReduceOp(n, (x:Float) => x, (x:Float, y:Float) => math.max(x,y), null) + def mini(a:FMat, n:Int):FMat = a.ffReduceOp(n, (x:Float) => x, (x:Float, y:Float) => math.min(x,y), null) + def sum(a:FMat) = a.ffReduceOp(0, (x:Float) => x, (x:Float, y:Float) => x+y, null) + def cumsum(a:FMat) = a.ffReduceAll(0, (x:Float) => x, (x:Float, y:Float) => x+y, null) + def maxi(a:FMat) = a.ffReduceOp(0, (x:Float) => x, (x:Float, y:Float) => math.max(x,y), null) + def mini(a:FMat):FMat = a.ffReduceOp(0, (x:Float) => x, (x:Float, y:Float) => math.min(x,y), null) + def maxi2(a:FMat,d:Int):(FMat,IMat) = {val (m,ii)=a.ggOpt2(d,(x:Float,y:Float)=>(x>y)); (FMat(m), ii)} + def mini2(a:FMat,d:Int):(FMat,IMat) = {val (m,ii)=a.ggOpt2(d,(x:Float,y:Float)=>(x(x>y)); (FMat(m), ii)} + def mini2(a:FMat):(FMat,IMat) = {val (m,ii)=a.ggOpt2(0,(x:Float,y:Float)=>(x math.min(x,y), out) + def max(a:FMat, b:FMat, out:Mat) = a.ffMatOp(b, (x:Float, y:Float) => math.max(x,y), out) + def sum(a:FMat, n:Int, out:Mat) = a.ffReduceOp(n, (x:Float) => x, (x:Float, y:Float) => x+y, out) + def cumsum(a:FMat, n:Int, out:Mat) = a.ffReduceAll(n, (x:Float) => x, (x:Float, y:Float) => x+y, out) + def maxi(a:FMat, n:Int, out:Mat) = a.ffReduceOp(n, (x:Float) => x, (x:Float, y:Float) => math.max(x,y), out) + def mini(a:FMat, n:Int, out:Mat):FMat = a.ffReduceOp(n, (x:Float) => x, (x:Float, y:Float) => math.min(x,y), out) + def sum(a:FMat, out:Mat) = a.ffReduceOp(0, (x:Float) => x, (x:Float, y:Float) => x+y, out) + def cumsum(a:FMat, out:Mat) = a.ffReduceAll(0, (x:Float) => x, (x:Float, y:Float) => x+y, out) + def maxi(a:FMat, out:Mat) = a.ffReduceOp(0, (x:Float) => x, (x:Float, y:Float) => math.max(x,y), out) + def mini(a:FMat, out:Mat):FMat = a.ffReduceOp(0, (x:Float) => x, (x:Float, y:Float) => math.min(x,y), out) + + def min (a:IMat, b:IMat) = a.iiMatOp(b, (x:Int, y:Int) => math.min(x,y), null) + def max (a:IMat, b:IMat) = a.iiMatOp(b, (x:Int, y:Int) => math.max(x,y), null) + def sum(a:IMat, n:Int) = a.iiReduceOp(n, (x:Int) => x, (x:Int, y:Int) => x+y, null) + def cumsum(a:IMat, n:Int) = a.iiReduceAll(n, (x:Int) => x, (x:Int, y:Int) => x+y, null) + def maxi(a:IMat, n:Int) = a.iiReduceOp(n, (x:Int) => x, (x:Int, y:Int) => math.max(x,y), null) + def mini(a:IMat, n:Int):IMat = a.iiReduceOp(n, (x:Int) => x, (x:Int, y:Int) => math.min(x,y), null) + def sum(a:IMat) = a.iiReduceOp(0, (x:Int) => x, (x:Int, y:Int) => x+y, null) + def cumsum(a:IMat) = a.iiReduceAll(0, (x:Int) => x, (x:Int, y:Int) => x+y, null) + def maxi(a:IMat) = a.iiReduceOp(0, (x:Int) => x, (x:Int, y:Int) => math.max(x,y), null) + def mini(a:IMat):IMat = a.iiReduceOp(0, (x:Int) => x, (x:Int, y:Int) => math.min(x,y), null) + def maxi2(a:IMat,d:Int):(IMat,IMat) = {val (m,ii)=a.ggOpt2(d,(x:Int,y:Int)=>(x>y)); (IMat(m), ii)} + def mini2(a:IMat,d:Int):(IMat,IMat) = {val (m,ii)=a.ggOpt2(d,(x:Int,y:Int)=>(x(x>y)); (IMat(m), ii)} + def mini2(a:IMat):(IMat,IMat) = {val (m,ii)=a.ggOpt2(0,(x:Int,y:Int)=>(x math.min(x,y), out) + def max (a:IMat, b:IMat, out:Mat) = a.iiMatOp(b, (x:Int, y:Int) => math.max(x,y), out) + def sum(a:IMat, n:Int, out:Mat) = a.iiReduceOp(n, (x:Int) => x, (x:Int, y:Int) => x+y, out) + def cumsum(a:IMat, n:Int, out:Mat) = a.iiReduceAll(n, (x:Int) => x, (x:Int, y:Int) => x+y, out) + def maxi(a:IMat, n:Int, out:Mat) = a.iiReduceOp(n, (x:Int) => x, (x:Int, y:Int) => math.max(x,y), out) + def mini(a:IMat, n:Int, out:Mat):IMat = a.iiReduceOp(n, (x:Int) => x, (x:Int, y:Int) => math.min(x,y), out) + def sum(a:IMat, out:Mat) = a.iiReduceOp(0, (x:Int) => x, (x:Int, y:Int) => x+y, out) + def cumsum(a:IMat, out:Mat) = a.iiReduceAll(0, (x:Int) => x, (x:Int, y:Int) => x+y, out) + def maxi(a:IMat, out:Mat) = a.iiReduceOp(0, (x:Int) => x, (x:Int, y:Int) => math.max(x,y), out) + def mini(a:IMat, out:Mat):IMat = a.iiReduceOp(0, (x:Int) => x, (x:Int, y:Int) => math.min(x,y), out) + + def min(a:SDMat, b:SDMat) = a.ssMatOp(b, (x:Double, y:Double) => math.min(x,y), null) + def max(a:SDMat, b:SDMat) = a.ssMatOp(b, (x:Double, y:Double) => math.max(x,y), null) + def sum(a:SDMat, n:Int) = a.ssReduceOp(n, (x:Double) => x, (x:Double, y:Double) => x+y, null) + def maxi(a:SDMat, n:Int) = a.ssReduceOp(n, (x:Double) => x, (x:Double, y:Double) => math.max(x,y), null) + def mini(a:SDMat, n:Int) = a.ssReduceOp(n, (x:Double) => x, (x:Double, y:Double) => math.min(x,y), null) + def sum(a:SDMat) = a.ssReduceOp(0, (x:Double) => x, (x:Double, y:Double) => x+y, null) + def maxi(a:SDMat) = a.ssReduceOp(0, (x:Double) => x, (x:Double, y:Double) => math.max(x,y), null) + def mini(a:SDMat) = a.ssReduceOp(0, (x:Double) => x, (x:Double, y:Double) => math.min(x,y), null) + + def sum(a:SDMat, n:Int, omat:Mat) = a.ssReduceOp(n, (x:Double) => x, (x:Double, y:Double) => x+y, omat) + def maxi(a:SDMat, n:Int, omat:Mat) = a.ssReduceOp(n, (x:Double) => x, (x:Double, y:Double) => math.max(x,y), omat) + def mini(a:SDMat, n:Int, omat:Mat) = a.ssReduceOp(n, (x:Double) => x, (x:Double, y:Double) => math.min(x,y), omat) + def sum(a:SDMat, omat:Mat) = a.ssReduceOp(0, (x:Double) => x, (x:Double, y:Double) => x+y, omat) + def maxi(a:SDMat, omat:Mat) = a.ssReduceOp(0, (x:Double) => x, (x:Double, y:Double) => math.max(x,y), omat) + def mini(a:SDMat, omat:Mat) = a.ssReduceOp(0, (x:Double) => x, (x:Double, y:Double) => math.min(x,y), omat) + + def min(a:SMat, b:SMat) = a.ssMatOp(b, (x:Float, y:Float) => math.min(x,y), null) + def max(a:SMat, b:SMat) = a.ssMatOp(b, (x:Float, y:Float) => math.max(x,y), null) + def sum(a:SMat, n:Int) = a.ssReduceOp(n, (x:Float) => x, (x:Float, y:Float) => x+y, null) + def maxi(a:SMat, n:Int) = a.ssReduceOp(n, (x:Float) => x, (x:Float, y:Float) => math.max(x,y), null) + def mini(a:SMat, n:Int) = a.ssReduceOp(n, (x:Float) => x, (x:Float, y:Float) => math.min(x,y), null) + def sum(a:SMat) = a.ssReduceOp(0, (x:Float) => x, (x:Float, y:Float) => x+y, null) + def maxi(a:SMat) = a.ssReduceOp(0, (x:Float) => x, (x:Float, y:Float) => math.max(x,y), null) + def mini(a:SMat) = a.ssReduceOp(0, (x:Float) => x, (x:Float, y:Float) => math.min(x,y), null) + def min(a:SMat, b:Float) = a.ssMatOpScalar(b, (x:Float, y:Float) => math.min(x,y), null) + def max(a:SMat, b:Float) = a.ssMatOpScalar(b, (x:Float, y:Float) => math.max(x,y), null) + def min(b:Float, a:SMat) = a.ssMatOpScalar(b, (x:Float, y:Float) => math.min(x,y), null) + def max(b:Float, a:SMat) = a.ssMatOpScalar(b, (x:Float, y:Float) => math.max(x,y), null) + def min(a:SMat, b:Float, omat:Mat) = a.ssMatOpScalar(b, (x:Float, y:Float) => math.min(x,y), omat) + def max(a:SMat, b:Float, omat:Mat) = a.ssMatOpScalar(b, (x:Float, y:Float) => math.max(x,y), omat) + def min(b:Float, a:SMat, omat:Mat) = a.ssMatOpScalar(b, (x:Float, y:Float) => math.min(x,y), omat) + def max(b:Float, a:SMat, omat:Mat) = a.ssMatOpScalar(b, (x:Float, y:Float) => math.max(x,y), omat) + + def sum(a:SMat, n:Int, omat:Mat) = a.ssReduceOp(n, (x:Float) => x, (x:Float, y:Float) => x+y, omat) + def maxi(a:SMat, n:Int, omat:Mat) = a.ssReduceOp(n, (x:Float) => x, (x:Float, y:Float) => math.max(x,y), omat) + def mini(a:SMat, n:Int, omat:Mat) = a.ssReduceOp(n, (x:Float) => x, (x:Float, y:Float) => math.min(x,y), omat) + def sum(a:SMat, omat:Mat) = a.ssReduceOp(0, (x:Float) => x, (x:Float, y:Float) => x+y, omat) + def maxi(a:SMat, omat:Mat) = a.ssReduceOp(0, (x:Float) => x, (x:Float, y:Float) => math.max(x,y), omat) + def mini(a:SMat, omat:Mat) = a.ssReduceOp(0, (x:Float) => x, (x:Float, y:Float) => math.min(x,y), omat) + def min(a:SDMat, b:Double) = a.ssMatOpScalar(b, (x:Double, y:Double) => math.min(x,y), null) + def max(a:SDMat, b:Double) = a.ssMatOpScalar(b, (x:Double, y:Double) => math.max(x,y), null) + def min(b:Double, a:SDMat) = a.ssMatOpScalar(b, (x:Double, y:Double) => math.min(x,y), null) + def max(b:Double, a:SDMat) = a.ssMatOpScalar(b, (x:Double, y:Double) => math.max(x,y), null) + + def sum(a:CMat, n:Int) = a.ccReduceOpv(n, CMat.vecAdd _, null) + def sum(a:CMat, n:Int, c:Mat) = a.ccReduceOpv(n, CMat.vecAdd _, c) + + def max(a:Mat, b:Mat):Mat = { + (a, b) match { + case (aa:FMat, bb:FMat) => max(aa, bb):FMat + case (aa:IMat, bb:IMat) => max(aa, bb):IMat + case (aa:DMat, bb:DMat) => max(aa, bb):DMat + case (aa:GMat, bb:GMat) => max(aa, bb):GMat + } + } + + def min(a:Mat, b:Mat):Mat = { + (a, b) match { + case (aa:FMat, bb:FMat) => min(aa, bb):FMat + case (aa:IMat, bb:IMat) => min(aa, bb):IMat + case (aa:DMat, bb:DMat) => min(aa, bb):DMat + case (aa:GMat, bb:GMat) => min(aa, bb):GMat + } + } + + def max(a:Mat, b:Mat, c:Mat):Mat = { + (a, b) match { + case (aa:FMat, bb:FMat) => max(aa, bb, c):FMat + case (aa:IMat, bb:IMat) => max(aa, bb, c):IMat + case (aa:DMat, bb:DMat) => max(aa, bb, c):DMat + case (aa:GMat, bb:GMat) => max(aa, bb, c):GMat + } + } + + def min(a:Mat, b:Mat, c:Mat):Mat = { + (a, b) match { + case (aa:FMat, bb:FMat) => min(aa, bb, c):FMat + case (aa:IMat, bb:IMat) => min(aa, bb, c):IMat + case (aa:DMat, bb:DMat) => min(aa, bb, c):DMat + case (aa:GMat, bb:GMat) => min(aa, bb, c):GMat + } + } + + def max(a:Float, b:Mat, c:Mat):Mat = { + b match { + case bb:FMat => max(a, bb, c):FMat + case bb:IMat => max(a.asInstanceOf[Int], bb, c):IMat + case bb:DMat => max(DMat(a), bb, c):DMat + case bb:GMat => max(GMat(a), bb, c):GMat + case bb:SMat => max(a, bb, c):SMat + } + } + + def min(a:Float, b:Mat, c:Mat):Mat = { + b match { + case bb:FMat=> min(a, bb, c):FMat + case bb:IMat=> min(a.asInstanceOf[Int], bb, c):IMat + case bb:DMat => min(DMat(a), bb, c):DMat + case bb:GMat => min(GMat(a), bb, c):GMat + case bb:SMat => min(a, bb, c):SMat + } + } + + def max(b:Mat, a:Float, c:Mat):Mat = { + b match { + case bb:FMat => max(a, bb, c):FMat + case bb:IMat => max(a.asInstanceOf[Int], bb, c):IMat + case bb:DMat => max(DMat(a), bb, c):DMat + case bb:GMat => max(GMat(a), bb, c):GMat + case bb:SMat => max(a, bb, c):SMat + } + } + + def min(b:Mat, a:Float, c:Mat):Mat = { + b match { + case bb:FMat=> min(a, bb, c):FMat + case bb:IMat=> min(a.asInstanceOf[Int], bb, c):IMat + case bb:DMat => min(DMat(a), bb, c):DMat + case bb:GMat => min(GMat(a), bb, c):GMat + case bb:SMat => min(a, bb, c):SMat + } + } + + def max(a:Double, b:Mat, c:Mat):Mat = { + b match { + case bb:FMat => max(a.asInstanceOf[Float], bb, c):FMat + case bb:IMat => max(a.asInstanceOf[Int], bb, c):IMat + case bb:DMat => max(DMat(a), bb, c):DMat + case bb:GMat => max(GMat(a), bb, c):GMat + case bb:SMat => max(a.asInstanceOf[Float], bb, c):SMat + } + } + + def min(a:Double, b:Mat, c:Mat):Mat = { + b match { + case bb:FMat => min(a.asInstanceOf[Float], bb, c):FMat + case bb:IMat => min(a.asInstanceOf[Int], bb, c):IMat + case bb:DMat=> min(DMat(a), bb, c):DMat + case bb:GMat => min(GMat(a), bb, c):GMat + case bb:SMat => min(a.asInstanceOf[Float], bb, c):SMat + } + } + + def max(a:Mat, b:Double, c:Mat):Mat = { + a match { + case aa:FMat => max(aa, b.asInstanceOf[Float], c):FMat + case aa:IMat => max(aa, b.asInstanceOf[Int], c):IMat + case aa:DMat => max(aa, DMat(b), c):DMat + case aa:GMat => max(aa, GMat(b), c):GMat + case aa:SMat => max(b.asInstanceOf[Float], aa, c):SMat + } + } + + def min(a:Mat, b:Double, c:Mat):Mat = { + a match { + case aa:FMat => min(aa, b.asInstanceOf[Float], c):FMat + case aa:IMat => min(aa, b.asInstanceOf[Int], c):IMat + case aa:DMat => min(aa, DMat(b), c):DMat + case aa:GMat => min(aa, GMat(b), c):GMat + case aa:SMat => min(b.asInstanceOf[Float], aa, c):SMat + } + } + + def mini(a:Mat, b:Int):Mat = { + a match { + case aa:FMat => mini(aa, b):FMat + case aa:IMat => mini(aa, b):IMat + case aa:DMat => mini(aa, b):DMat + case aa:GMat => mini(aa, b):GMat + } + } + + def maxi(a:Mat, b:Int):Mat = { + a match { + case aa:FMat => maxi(aa, b):FMat + case aa:IMat => maxi(aa, b):IMat + case aa:DMat => maxi(aa, b):DMat + case aa:GMat => maxi(aa, b):GMat + } + } + + def sum(a:Mat, b:Int):Mat = { + a match { + case aa:FMat => sum(aa, b):FMat + case aa:IMat => sum(aa, b):IMat + case aa:DMat => sum(aa, b):DMat + case aa:CMat => sum(aa, b):CMat + case aa:SMat => sum(aa, b):FMat + case aa:GMat => sum(aa, b):GMat + } + } + + def sum(a:Mat, b:Int, c:Mat):Mat = { + a match { + case aa:FMat => sum(aa, b, c):FMat + case aa:IMat => sum(aa, b, c):IMat + case aa:DMat=> sum(aa, b, c):DMat + case aa:SMat=> sum(aa, b, c):FMat + case aa:CMat => sum(aa, b, c):CMat + case aa:GMat => sum(aa, b, c):GMat + } + } + + def mean(a:FMat, dim0:Int):FMat = { + _mean(a, dim0).asInstanceOf[FMat] + } + + def mean(a:FMat):FMat = { + _mean(a, 0).asInstanceOf[FMat] + } + + def mean(a:DMat, dim0:Int):DMat = { + _mean(a, dim0).asInstanceOf[DMat] + } + + def mean(a:DMat):DMat = { + _mean(a, 0).asInstanceOf[DMat] + } + + def mean(a:IMat, dim0:Int):FMat = { + _mean(a, dim0).asInstanceOf[FMat] + } + + def mean(a:IMat):FMat = { + _mean(a, 0).asInstanceOf[FMat] + } + + def mean(a:CMat, dim0:Int):CMat = { + _mean(a, dim0).asInstanceOf[CMat] + } + + def mean(a:CMat):CMat = { + _mean(a, 0).asInstanceOf[CMat] + } + + def mean(a:GMat, dim0:Int):GMat = { + _mean(a, dim0).asInstanceOf[GMat] + } + + def mean(a:GMat):GMat = { + _mean(a, 0).asInstanceOf[GMat] + } + + def mean(a:Mat, b:Int):Mat = _mean(a,b) + + def mean(a:Mat):Mat = _mean(a, 0):Mat + + def _mean(a:Mat, dim0:Int):Mat = { + val dim = if (a.nrows == 1 && dim0 == 0) 2 else math.max(1, dim0) + if (dim == 1) { + sum(a, 1)*(1.0f/a.nrows) + } else { + sum(a, 2)*(1.0f/a.ncols) + } + } + + def variance(a:FMat, dim0:Int):FMat = { + _variance(a, dim0).asInstanceOf[FMat] + } + + def variance(a:FMat):FMat = { + _variance(a, 0).asInstanceOf[FMat] + } + + def variance(a:DMat, dim0:Int):DMat = { + _variance(a, dim0).asInstanceOf[DMat] + } + + def variance(a:DMat):DMat = { + _variance(a, 0).asInstanceOf[DMat] + } + + def variance(a:IMat, dim0:Int):FMat = { + _variance(a, dim0).asInstanceOf[FMat] + } + + def variance(a:IMat):FMat = { + _variance(a, 0).asInstanceOf[FMat] + } + + def variance(a:CMat, dim0:Int):CMat = { + _variance(a, dim0).asInstanceOf[CMat] + } + + def variance(a:CMat):CMat = { + _variance(a, 0).asInstanceOf[CMat] + } + + def variance(a:GMat, dim0:Int):GMat = { + _variance(a, dim0).asInstanceOf[GMat] + } + + def variance(a:GMat):GMat = { + _variance(a, 0).asInstanceOf[GMat] + } + + def variance(a:Mat, dim:Int) = _variance(a, dim) + + def variance(a:Mat):Mat = _variance(a, 0) + + def _variance(a:Mat, dim0:Int):Mat = { + val dim = if (a.nrows == 1 && dim0 == 0) 2 else math.max(1, dim0) + if (dim == 1) { + val m = mean(a, 1) + sum(a *@ a, 1)*(1.0f/a.nrows) - m *@ m + } else { + val m = mean(a, 2) + sum(a *@ a, 2)*(1.0f/a.ncols) - m *@ m + } + } + + + def applyDFun(a:DMat, omat:Mat, vfn:(Int, Array[Double], Array[Double])=>Unit, efn:(Double)=>Double, nflops:Long) ={ + val out = recycleTry(omat, a) + if (Mat.noMKL || vfn == null) { + if (efn == null) { + throw new RuntimeException("no Scala builtin version of this math function, sorry") + } + var i = 0; val len = a.length; val odata = out.data; val adata = a.data + while (i < len) {odata(i) = efn(adata(i)); i += 1} + } else { + vfn(a.length, a.data, out.data) + } + Mat.nflops += nflops*a.length + out + } + + def applyDFunV(a:DMat, omat:Mat, vfn:(Int, Array[Double], Array[Double])=>Unit, + efn:(Int, Array[Double], Array[Double])=>Unit, nflops:Long) = { + val out = recycleTry(omat, a) + if (Mat.noMKL) { + if (efn == null) { + throw new RuntimeException("no Scala builtin version of this math function, sorry") + } + efn(a.length, a.data, out.data) + } else { + vfn(a.length, a.data, out.data) + } + Mat.nflops += nflops*a.length + out + } + + def applySFun(a:FMat, omat:Mat, vfn:(Int, Array[Float], Array[Float])=>Unit, efn:(Float)=>Float, nflops:Long) ={ + val out = recycleTry(omat, a) + if (Mat.noMKL || vfn == null) { + if (efn == null) { + throw new RuntimeException("no Scala builtin version of this math function, sorry") + } + var i = 0; val len = a.length; val odata = out.data; val adata = a.data + while (i < len) {odata(i) = efn(adata(i)); i += 1} + } else { + vfn(a.length, a.data, out.data) + } + Mat.nflops += nflops*a.length + out + } + + def applySFunV(a:FMat, omat:Mat, vfn:(Int, Array[Float], Array[Float])=>Unit, + efn:(Int, Array[Float], Array[Float])=>Unit, nflops:Long) ={ + val out = recycleTry(omat, a) + if (Mat.noMKL) { + if (efn == null) { + throw new RuntimeException("no Scala builtin version of this math function, sorry") + } + efn(a.length, a.data, out.data) + } else { + vfn(a.length, a.data, out.data) + } + Mat.nflops += nflops*a.length + out + } + + def applyD2Fun(a:DMat, b:DMat, omat:Mat, + vfn:(Int, Array[Double], Array[Double], Array[Double]) => Unit, + efn:(Double, Double)=>Double, nflops:Long):DMat = { + val out = recycleTry(omat, a, b) + if (Mat.noMKL) { + if (efn == null) { + throw new RuntimeException("no Scala builtin version of this math function, sorry") + } + var i = 0; val len = a.length; val odata = out.data; val adata = a.data; val bdata = b.data + while (i < len) {odata(i) = efn(adata(i), bdata(i)); i += 1} + } else { + vfn(a.length, a.data, b.data, out.data) + } + Mat.nflops += nflops*a.length + out + } + + def sign(a:DMat, out:Mat) = applyDFun(a, out, null, math.signum _, 1L) + def sign(a:DMat):DMat = sign(a, DMat(a.nrows, a.ncols)) + + def abs(a:DMat, out:Mat) = applyDFun(a, out, vdAbs _, math.abs _, 1L) + def abs(a:DMat):DMat = abs(a, DMat(a.nrows, a.ncols)) + + def _vdexp(n:Int, a:Array[Double], b:Array[Double]) = {var i=0 ; while (i(math.floor(x+0.5)), 1L) + def round(a:DMat):DMat = round(a, DMat(a.nrows, a.ncols)) + + def trunc(a:DMat, out:Mat) = applyDFun(a, out, vdTrunc _, null, 1L) + def trunc(a:DMat):DMat = trunc(a, DMat(a.nrows, a.ncols)) + + def atan2(a:DMat, b:DMat, out:Mat) = applyD2Fun(a, b, out, vdAtan2 _, math.atan2, 10L) + def atan2(a:DMat, b:DMat):DMat = atan2(a, b, DMat(a.nrows, a.ncols)) + + def pow(a:DMat, b:DMat, out:Mat) = applyD2Fun(a, b, out, vdPow _, math.pow, 10L) + def pow(a:DMat, b:DMat):DMat = pow(a, b, DMat(a.nrows, a.ncols)) + + def exppsi(a:DMat, out:Mat) = applyDFun(a, out, null, (x:Double)=>if (x<1.0) 0.5*x*x else x-0.5, 1L) + def exppsi(a:DMat):DMat = exppsi(a, DMat(a.nrows, a.ncols)) + + + def sdev(a:DMat, dim0:Int):DMat = sqrt(variance(a, dim0)) + def sdev(a:DMat):DMat = sdev(a, 0) + + def sdev(a:FMat, dim0:Int):FMat = sqrt(variance(a, dim0)) + def sdev(a:FMat):FMat = sdev(a, 0) + + def sign(a:FMat, out:Mat) = applySFun(a, out, null, math.signum _, 1L) + def sign(a:FMat):FMat = sign(a, FMat(a.nrows, a.ncols)) + + def abs(a:FMat, out:Mat) = applySFun(a, out, vsAbs _, math.abs _, 1L) + def abs(a:FMat):FMat = abs(a, FMat(a.nrows, a.ncols)) + + def _vsexp(n:Int, a:Array[Float], b:Array[Float]) = {var i=0 ; while (i math.expm1(x).asInstanceOf[Float], 10L) + def exp(a:FMat):FMat = exp(a, FMat(a.nrows, a.ncols)) + + def expm1(a:FMat, out:Mat) = applySFun(a, out, vsExpm1 _, (x:Float) => math.expm1(x).asInstanceOf[Float], 10L) + def expm1(a:FMat):FMat = expm1(a, FMat(a.nrows, a.ncols)) + + def sqrt(a:FMat, out:Mat) = applySFun(a, out, vsSqrt _, (x:Float) => math.sqrt(x).asInstanceOf[Float], 10L) + def sqrt(a:FMat):FMat = sqrt(a, FMat(a.nrows, a.ncols)) + + def ln(a:FMat, out:Mat) = applySFun(a, out, vsLn _, (x:Float) => math.log(x).asInstanceOf[Float], 10L) + def ln(a:FMat):FMat = ln(a, FMat(a.nrows, a.ncols)) + + def log10(a:FMat, out:Mat) = applySFun(a, out, vsLog10 _, (x:Float) => math.log10(x).asInstanceOf[Float], 10L) + def log10(a:FMat):FMat = log10(a, FMat(a.nrows, a.ncols)) + + def log1p(a:FMat, out:Mat) = applySFun(a, out, vsLog1p _, (x:Float) => math.log1p(x).asInstanceOf[Float], 10L) + def log1p(a:FMat):FMat = log1p(a, FMat(a.nrows, a.ncols)) + + def cos(a:FMat, out:Mat) = applySFun(a, out, vsCos _, (x:Float) => math.cos(x).asInstanceOf[Float], 10L) + def cos(a:FMat):FMat = cos(a, FMat(a.nrows, a.ncols)) + + def sin(a:FMat, out:Mat) = applySFun(a, out, vsSin _, (x:Float) => math.sin(x).asInstanceOf[Float], 10L) + def sin(a:FMat):FMat = sin(a, FMat(a.nrows, a.ncols)) + + def tan(a:FMat, out:Mat) = applySFun(a, out, vsTan _, (x:Float) => math.tan(x).asInstanceOf[Float], 10L) + def tan(a:FMat):FMat = tan(a, FMat(a.nrows, a.ncols)) + + def cosh(a:FMat, out:Mat) = applySFun(a, out, vsCosh _, (x:Float) => math.cosh(x).asInstanceOf[Float], 10L) + def cosh(a:FMat):FMat = cosh(a, FMat(a.nrows, a.ncols)) + + def sinh(a:FMat, out:Mat) = applySFun(a, out, vsSinh _, (x:Float) => math.sinh(x).asInstanceOf[Float], 10L) + def sinh(a:FMat):FMat = sinh(a, FMat(a.nrows, a.ncols)) + + def tanh(a:FMat, out:Mat) = applySFun(a, out, vsTanh _, (x:Float) => math.tanh(x).asInstanceOf[Float], 10L) + def tanh(a:FMat):FMat = tanh(a, FMat(a.nrows, a.ncols)) + + def acos(a:FMat, out:Mat) = applySFun(a, out, vsAcos _, (x:Float) => math.acos(x).asInstanceOf[Float], 10L) + def acos(a:FMat):FMat = acos(a, FMat(a.nrows, a.ncols)) + + def asin(a:FMat, out:Mat) = applySFun(a, out, vsAsin _, (x:Float) => math.asin(x).asInstanceOf[Float], 10L) + def asin(a:FMat):FMat = asin(a, FMat(a.nrows, a.ncols)) + + def atan(a:FMat, out:Mat) = applySFun(a, out, vsAtan _, (x:Float) => math.atan(x).asInstanceOf[Float], 10L) + def atan(a:FMat):FMat = atan(a, FMat(a.nrows, a.ncols)) + + def acosh(a:FMat, out:Mat) = applySFun(a, out, vsCosh _, null, 10L) + def acosh(a:FMat):FMat = acosh(a, FMat(a.nrows, a.ncols)) + + def asinh(a:FMat, out:Mat) = applySFun(a, out, vsSinh _, null, 10L) + def asinh(a:FMat):FMat = asinh(a, FMat(a.nrows, a.ncols)) + + def atanh(a:FMat, out:Mat) = applySFun(a, out, vsAtanh _, null, 10L) + def atanh(a:FMat):FMat = atanh(a, FMat(a.nrows, a.ncols)) + + def erf(a:FMat, out:Mat) = applySFun(a, out, vsErf _, null, 10L) + def erf(a:FMat):FMat = erf(a, FMat(a.nrows, a.ncols)) + + def erfinv(a:FMat, out:Mat) = applySFun(a, out, vsErfInv _, null, 10L) + def erfinv(a:FMat):FMat = erfinv(a, FMat(a.nrows, a.ncols)) + + def erfc(a:FMat, out:Mat) = applySFun(a, out, vsErfc _, null, 10L) + def erfc(a:FMat):FMat = erfc(a, FMat(a.nrows, a.ncols)) + + def erfcinv(a:FMat, out:Mat) = applySFun(a, out, vsErfcInv _, null, 10L) + def erfcinv(a:FMat):FMat = erfcinv(a, FMat(a.nrows, a.ncols)) + + def normcdf(a:FMat, out:Mat) = applySFun(a, out, vsCdfNorm _, null, 10L) + def normcdf(a:FMat):FMat = normcdf(a, FMat(a.nrows, a.ncols)) + + def norminv(a:FMat, out:Mat) = applySFun(a, out, vsCdfNormInv _, null, 10L) + def norminv(a:FMat):FMat = norminv(a, FMat(a.nrows, a.ncols)) + + def gammaln(a:FMat, out:Mat) = applySFun(a, out, vsLGamma _, null, 10L) + def gammaln(a:FMat):FMat = gammaln(a, FMat(a.nrows, a.ncols)) + + def gamma(a:FMat, out:Mat) = applySFun(a, out, vsTGamma _, null, 10L) + def gamma(a:FMat):FMat = gamma(a, FMat(a.nrows, a.ncols)) + + def ceil(a:FMat, out:Mat) = applySFun(a, out, vsCeil _, (x:Float) => math.ceil(x).asInstanceOf[Float], 1L) + def ceil(a:FMat):FMat = ceil(a, FMat(a.nrows, a.ncols)) + + def floor(a:FMat, out:Mat) = applySFun(a, out, vsFloor _, (x:Float) => math.floor(x).asInstanceOf[Float], 1L) + def floor(a:FMat):FMat = floor(a, FMat(a.nrows, a.ncols)) + + def round(a:FMat, out:Mat) = applySFun(a, out, vsRound _, (x:Float)=>math.floor(x+0.5).asInstanceOf[Float], 1L) + def round(a:FMat):FMat = round(a, FMat(a.nrows, a.ncols)) + + def trunc(a:FMat, out:Mat) = applySFun(a, out, vsTrunc _, null, 1L) + def trunc(a:FMat):FMat = trunc(a, FMat(a.nrows, a.ncols)) + + def exppsi(a:FMat, out:Mat) = applySFun(a, out, null, (x:Float)=>if (x<1.0f) 0.5f*x*x else x-0.5f, 1L) + def exppsi(a:FMat):FMat = exppsi(a, FMat(a.nrows, a.ncols)) + + def setVMLmode(n:Int) = { + vmlSetMode(n) + } + + def getVMLmode():Int = { + vmlGetMode() + } + + private def checkSizes(a:Mat, b:Mat) = { + if (a.nrows != b.nrows || a.ncols != b.ncols) { + throw new RuntimeException("argument dims mismatch") + } + } + + private def checkSizes(a:Mat, b:Mat, c:DMat) = { + if (a.nrows != b.nrows || a.ncols != b.ncols || a.nrows != c.nrows || a.ncols != c.ncols) { + throw new RuntimeException("argument dims mismatch") + } + } + + def sprand(nrows:Int, ncols:Int, v:Double):SMat = { + val ioff = Mat.ioneBased + val out = SMat(nrows, ncols, math.max(math.min(nrows*ncols, 200),(1.5*v*nrows*ncols).intValue)) + Mat.nflops += (5L*nrows*ncols*v).toLong + val vec = geornd(v, 1, out.nnz) + val vals = rand(1, out.nnz) + var irow = vec.data(0).intValue + var ipos = 0 + var i = 0 + out.jc(0) = ioff + while (i < ncols) { + while (irow < nrows && ipos < out.nnz-1) { + out.data(ipos) = vals.data(ipos) + out.ir(ipos) = irow+ioff + ipos += 1 + irow += 1 + vec.data(ipos).intValue + } + irow = irow - nrows + out.jc(i+1) = ipos+ioff + i += 1 + } + SMat(out.sparseTrim) + } + + def histc(a:DMat, b:DMat):IMat = { + val out = IMat(b.length, 1) + var i = 0 + var hc = 0 + var j = 0 + while (j < a.length) { + if (i >= b.length-1 || a.data(j) < b.data(i+1)) { + hc += 1 + } else { + out.data(i) = hc + hc = 0 + i += 1 + }; + j += 1 + } + out.data(b.length-1) = hc + out + } + + def roc(score0:DMat, vpos0:DMat, vneg0:DMat, nxvals:Int):DMat = { + import BIDMat.MatFunctions._ + var score:DMat = null + if (size(score0,2) > size(score0,1)) { + score = score0.t + } else { + score = score0 + }; + var (vv, ii) = sortdown2(score); + var vpos = vpos0(ii); + var vneg = vneg0(ii); + var n = length(vpos); + if (size(vpos,2) > 1) { + vpos = vpos.t + }; + if (size(vneg,2) > 1) { + vneg = vneg.t; + }; + if (nnz(vneg < 0.0) + nnz(vpos < 0.0) > 0) { + sys.error("ROCcurve assumes vneg & vpos >= 0"); + }; + + var tp = cumsum(vpos); + var fp = cumsum(vneg); + var npos = tp(n-1); + var nneg = fp(n-1); + var xvals:FMat = row(1 to nxvals)*(1.0*nneg/nxvals) + var nc:IMat = histc(fp, 0.0f \ xvals); + var loci = max(cumsum(nc(0 until nxvals)), 1); + val curve = (0.0 on tp(loci-1, 0))*(1.0/npos) + curve + } + + def applyGfun(in:GMat, omat:Mat, opn:Int, kflops:Long):GMat = { + val out = recycleTry(omat, in) + CUMAT.applygfun(in.data, out.data, in.nrows*in.ncols, opn) + JCuda.cudaDeviceSynchronize() + Mat.nflops += kflops*in.length + out + } + + def applyGfun(in:GMat, opn:Int, kflops:Long):GMat = { + val out = GMat(in.nrows, in.ncols) + CUMAT.applygfun(in.data, out.data, in.nrows*in.ncols, opn) + JCuda.cudaDeviceSynchronize() + Mat.nflops += kflops*in.length + out + } + + def applyGfun2(a:GMat, b:GMat, omat:Mat, opn:Int, kflops:Long):GMat = { + if (a.nrows == b.nrows && a.ncols == b.ncols) { + val out = GMat(a.nrows, a.ncols) + CUMAT.applygfun2(a.data, b.data, out.data, a.nrows*a.ncols, opn) + JCuda.cudaDeviceSynchronize() + Mat.nflops += kflops*a.length + out + } else { + throw new RuntimeException("Dimensions mismatch") + } + } + + def applyGfun2(a:GMat, b:GMat, opn:Int, kflops:Long):GMat = { + if (a.nrows == b.nrows && a.ncols == b.ncols) { + val out = GMat(a.nrows, a.ncols) + CUMAT.applygfun2(a.data, b.data, out.data, a.nrows*a.ncols, opn) + JCuda.cudaDeviceSynchronize() + Mat.nflops += kflops*a.length + out + } else { + throw new RuntimeException("Dimensions mismatch") + } + } + import GMat.TransF + + def abs(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.abs, 1L) + def exp(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.exp, 10L) + def expm1(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.expm1, 10L) + def sqrt(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.sqrt, 10L) + def ln(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.ln, 10L) + def log10(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.log10, 10L) + def log1p(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.log1p, 10L) + def cos(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.cos, 10L) + def sin(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.sin, 10L) + def tan(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.tan, 10L) + def cosh(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.cosh, 10L) + def sinh(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.sinh, 10L) + def tanh(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.tanh, 10L) + def acos(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.acos, 10L) + def asin(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.asin, 10L) + def atan(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.atan, 10L) + def acosh(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.acosh, 10L) + def asinh(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.asinh, 10L) + def atanh(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.atanh, 10L) + def erf(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.erf, 10L) + def erfinv(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.erfinv, 10L) + def erfc(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.erfc, 10L) + def ercinv(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.erfcinv, 10L) + def gammaln(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.gammaln, 10L) + def gamma(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.gamma, 10L) + def ceil(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.ceil, 10L) + def floor(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.floor, 10L) + def round(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.round, 10L) + def trunc(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.trunc, 10L) + def sign(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.sign, 1L) + def exppsi(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.exppsi, 1L) + + import GMat.TransF2 + + def atan2(a:GMat, b:GMat, out:Mat):GMat = applyGfun2(a, b, out, TransF2.atan2, 10L) + def pow(a:GMat, b:GMat, out:Mat):GMat = applyGfun2(a, b, out, TransF2.pow, 10L) + + def abs(in:GMat):GMat = applyGfun(in, TransF.abs, 10L) + def exp(in:GMat):GMat = applyGfun(in, TransF.exp, 10L) + def expm1(in:GMat):GMat = applyGfun(in, TransF.expm1, 10L) + def sqrt(in:GMat):GMat = applyGfun(in, TransF.sqrt, 10L) + def ln(in:GMat):GMat = applyGfun(in, TransF.ln, 10L) + def log10(in:GMat):GMat = applyGfun(in, TransF.log10, 10L) + def log1p(in:GMat):GMat = applyGfun(in, TransF.log1p, 10L) + def cos(in:GMat):GMat = applyGfun(in, TransF.cos, 10L) + def sin(in:GMat):GMat = applyGfun(in, TransF.sin, 10L) + def tan(in:GMat):GMat = applyGfun(in, TransF.tan, 10L) + def cosh(in:GMat):GMat = applyGfun(in, TransF.cosh, 10L) + def sinh(in:GMat):GMat = applyGfun(in, TransF.sinh, 10L) + def tanh(in:GMat):GMat = applyGfun(in, TransF.tanh, 10L) + def acos(in:GMat):GMat = applyGfun(in, TransF.acos, 10L) + def asin(in:GMat):GMat = applyGfun(in, TransF.asin, 10L) + def atan(in:GMat):GMat = applyGfun(in, TransF.atan, 10L) + def acosh(in:GMat):GMat = applyGfun(in, TransF.acosh, 10L) + def asinh(in:GMat):GMat = applyGfun(in, TransF.asinh, 10L) + def atanh(in:GMat):GMat = applyGfun(in, TransF.atanh, 10L) + def erf(in:GMat):GMat = applyGfun(in, TransF.erf, 10L) + def erfinv(in:GMat):GMat = applyGfun(in, TransF.erfinv, 10L) + def erfc(in:GMat):GMat = applyGfun(in, TransF.erfc, 10L) + def ercinv(in:GMat):GMat = applyGfun(in, TransF.erfcinv, 10L) + def gammaln(in:GMat):GMat = applyGfun(in, TransF.gammaln, 10L) + def gamma(in:GMat):GMat = applyGfun(in, TransF.gamma, 10L) + def ceil(in:GMat):GMat = applyGfun(in, TransF.ceil, 10L) + def floor(in:GMat):GMat = applyGfun(in, TransF.floor, 10L) + def round(in:GMat):GMat = applyGfun(in, TransF.round, 10L) + def trunc(in:GMat):GMat = applyGfun(in, TransF.trunc, 10L) + def sign(in:GMat):GMat = applyGfun(in, TransF.sign, 1L) + def exppsi(in:GMat):GMat = applyGfun(in, TransF.exppsi, 1L) + + def atan2(a:GMat, b:GMat):GMat = applyGfun2(a, b, TransF2.atan2, 10L) + def pow(a:GMat, b:GMat):GMat = applyGfun2(a, b, TransF2.pow, 10L) + + import GMat.BinOp + def max(a:GMat, b:GMat):GMat = a.gOp(b, null, BinOp.op_max) + def min(a:GMat, b:GMat):GMat = a.gOp(b, null, BinOp.op_min) + def maxi(a:GMat, dir:Int):GMat = a.reduceOp(null, dir, BinOp.op_max) + def mini(a:GMat, dir:Int):GMat = a.reduceOp(null, dir, BinOp.op_min) + def sum(a:GMat, dir:Int):GMat = a.reduceOp(null, dir, BinOp.op_add) + def maxi(a:GMat):GMat = a.reduceOp(null, 0, BinOp.op_max) + def mini(a:GMat):GMat = a.reduceOp(null, 0, BinOp.op_min) + def sum(a:GMat):GMat = a.reduceOp(null, 0, BinOp.op_add) + + def max(a:GMat, b:GMat, out:Mat):GMat = a.gOp(b, out, BinOp.op_max) + def min(a:GMat, b:GMat, out:Mat):GMat = a.gOp(b, out, BinOp.op_min) + def maxi(a:GMat, dir:Int, out:Mat):GMat = a.reduceOp(out, dir, BinOp.op_max) + def mini(a:GMat, dir:Int, out:Mat):GMat = a.reduceOp(out, dir, BinOp.op_min) + def sum(a:GMat, dir:Int, out:Mat):GMat = a.reduceOp(out, dir, BinOp.op_add) + def maxi(a:GMat, out:Mat):GMat = a.reduceOp(out, 0, BinOp.op_max) + def mini(a:GMat, out:Mat):GMat = a.reduceOp(out, 0, BinOp.op_min) + def sum(a:GMat, out:Mat):GMat = a.reduceOp(out, 0, BinOp.op_add) + + def abs(a:Mat, b:Mat):Mat = { + a match { + case aa:FMat => abs(aa, b):FMat + case aa:DMat => abs(aa, b):DMat + case aa:GMat => abs(aa, b):GMat + } + } + + def sign(a:Mat, b:Mat):Mat = { + a match { + case aa:FMat => sign(aa, b) + case aa:DMat => sign(aa, b) + case aa:GMat => sign(aa, b) + } + } + + def sqrt(a:Mat, b:Mat):Mat = { + a match { + case aa:FMat => sqrt(aa, b) + case aa:DMat => sqrt(aa, b) + case aa:GMat => sqrt(aa, b) + } + } + + def exp(a:Mat, b:Mat):Mat = { + a match { + case aa:FMat => exp(aa, b) + case aa:DMat => exp(aa, b) + case aa:GMat => exp(aa, b) + } + } + + def expm1(a:Mat, b:Mat):Mat = { + a match { + case aa:FMat => expm1(aa, b) + case aa:DMat => expm1(aa, b) + case aa:GMat => expm1(aa, b) + } + } + + def ln(a:Mat, b:Mat):Mat = { + a match { + case aa:FMat => ln(aa, b) + case aa:DMat => ln(aa, b) + case aa:GMat => ln(aa, b) + } + } + + def log10(a:Mat, b:Mat):Mat = { + a match { + case aa:FMat => log10(aa, b) + case aa:DMat => log10(aa, b) + case aa:GMat => log10(aa, b) + } + } + + def log1p(a:Mat, b:Mat):Mat = { + a match { + case aa:FMat => log1p(aa, b) + case aa:DMat => log1p(aa, b) + case aa:GMat => log1p(aa, b) + } + } + + def cos(a:Mat, b:Mat):Mat = { + a match { + case aa:FMat => cos(aa, b) + case aa:DMat => cos(aa, b) + case aa:GMat => cos(aa, b) + } + } + + def sin(a:Mat, b:Mat):Mat = { + a match { + case aa:FMat => sin(aa, b) + case aa:DMat => sin(aa, b) + case aa:GMat => sin(aa, b) + } + } + + def tan(a:Mat, b:Mat):Mat = { + a match { + case aa:FMat => tan(aa, b) + case aa:DMat => tan(aa, b) + case aa:GMat => tan(aa, b) + } + } + + def cosh(a:Mat, b:Mat):Mat = { + a match { + case aa:FMat => cosh(aa, b) + case aa:DMat => cosh(aa, b) + case aa:GMat => cosh(aa, b) + } + } + + def sinh(a:Mat, b:Mat):Mat = { + a match { + case aa:FMat => sinh(aa, b) + case aa:DMat => sinh(aa, b) + case aa:GMat => sinh(aa, b) + } + } + + def tanh(a:Mat, b:Mat):Mat = { + a match { + case aa:FMat => tanh(aa, b) + case aa:DMat => tanh(aa, b) + case aa:GMat => tanh(aa, b) + } + } + + def acos(a:Mat, b:Mat):Mat = { + a match { + case aa:FMat => acos(aa, b) + case aa:DMat => acos(aa, b) + case aa:GMat => acos(aa, b) + } + } + + def asin(a:Mat, b:Mat):Mat = { + a match { + case aa:FMat => asin(aa, b) + case aa:DMat => asin(aa, b) + case aa:GMat => asin(aa, b) + } + } + + def atan(a:Mat, b:Mat):Mat = { + a match { + case aa:FMat => atan(aa, b) + case aa:DMat => atan(aa, b) + case aa:GMat => atan(aa, b) + } + } + + def acosh(a:Mat, b:Mat):Mat = { + a match { + case aa:FMat => acosh(aa, b) + case aa:DMat => acosh(aa, b) + case aa:GMat => acosh(aa, b) + } + } + + def asinh(a:Mat, b:Mat):Mat = { + a match { + case aa:FMat => asinh(aa, b) + case aa:DMat => asinh(aa, b) + case aa:GMat => asinh(aa, b) + } + } + + def erf(a:Mat, b:Mat):Mat = { + a match { + case aa:FMat => erf(aa, b) + case aa:DMat => erf(aa, b) + case aa:GMat => erf(aa, b) + } + } + + def erfinv(a:Mat, b:Mat):Mat = { + a match { + case aa:FMat => erfinv(aa, b) + case aa:DMat => erfinv(aa, b) + case aa:GMat => erfinv(aa, b) + } + } + + def erfc(a:Mat, b:Mat):Mat = { + a match { + case aa:FMat => erfc(aa, b) + case aa:DMat => erfc(aa, b) + case aa:GMat => erfc(aa, b) + } + } + + def erfcinv(a:Mat, b:Mat):Mat = { + a match { + case aa:FMat => erfcinv(aa, b) + case aa:DMat => erfcinv(aa, b) + case aa:GMat => erfcinv(aa, b) + } + } + + def gamma(a:Mat, b:Mat):Mat = { + a match { + case aa:FMat => gamma(aa, b) + case aa:DMat => gamma(aa, b) + case aa:GMat => gamma(aa, b) + } + } + + def gammaln(a:Mat, b:Mat):Mat = { + a match { + case aa:FMat => gammaln(aa, b) + case aa:DMat => gammaln(aa, b) + case aa:GMat => gammaln(aa, b) + } + } + + def floor(a:Mat, b:Mat):Mat = { + a match { + case aa:FMat => floor(aa, b) + case aa:DMat => floor(aa, b) + case aa:GMat => floor(aa, b) + } + } + + def ceil(a:Mat, b:Mat):Mat = { + a match { + case aa:FMat => ceil(aa, b) + case aa:DMat => ceil(aa, b) + case aa:GMat => ceil(aa, b) + } + } + + def round(a:Mat, b:Mat):Mat = { + a match { + case aa:FMat => round(aa, b) + case aa:DMat => round(aa, b) + case aa:GMat => round(aa, b) + } + } + + def trunc(a:Mat, b:Mat):Mat = { + a match { + case aa:FMat => trunc(aa, b) + case aa:DMat => trunc(aa, b) + case aa:GMat => trunc(aa, b) + } + } + + def exppsi(a:Mat, b:Mat):Mat = { + a match { + case aa:FMat => exppsi(aa, b) + case aa:DMat => exppsi(aa, b) + case aa:GMat => exppsi(aa, b) + } + } + + def atan2(a:Mat, b:Mat, c:Mat):Mat = { + (a, b) match { + case (aa:FMat, bb:FMat) => atan2(aa, bb, c) + case (aa:DMat, bb:DMat) => atan2(aa, bb, c) + case (aa:GMat, bb:GMat) => atan2(aa, bb, c) + } + } + + def pow(a:Mat, b:Mat, c:Mat):Mat = { + (a, b) match { + case (aa:FMat, bb:FMat) => pow(aa, bb, c) + case (aa:DMat, bb:DMat) => pow(aa, bb, c) + case (aa:GMat, bb:GMat) => pow(aa, bb, c) + } + } +} + + + + + + diff --git a/src/main/scala/BIDMat/Solvers.scala b/src/main/scala/BIDMat/Solvers.scala new file mode 100755 index 00000000..00cbc126 --- /dev/null +++ b/src/main/scala/BIDMat/Solvers.scala @@ -0,0 +1,334 @@ +package BIDMat +import edu.berkeley.bid.CBLAS._ +import edu.berkeley.bid.LAPACK._ +import MatFunctions._ +import SciFunctions._ + +object Solvers { + + def inv(a:FMat):FMat = _inv(a).asInstanceOf[FMat] + def inv(a:DMat):DMat = _inv(a).asInstanceOf[DMat] + def inv(a:CMat):CMat = _inv(a).asInstanceOf[CMat] + def inv(a:Mat):Mat = _inv(a) + + def _inv(a:Mat):Mat = { + Mat.nflops += 4L*a.nrows*a.nrows*a.nrows/3 + if (a.nrows != a.ncols) { + throw new RuntimeException("inv needs a square matrix") + } else { + val out = a.copy + val ipiv = new Array[Int](a.nrows) + out match { + case dout:DMat => { + dgetrf(ORDER.ColMajor, a.nrows, a.ncols, dout.data, a.nrows, ipiv) + dgetri(ORDER.ColMajor, a.nrows, dout.data, a.nrows, ipiv) + } + case fout:FMat => { + sgetrf(ORDER.ColMajor, a.nrows, a.ncols, fout.data, a.nrows, ipiv) + sgetri(ORDER.ColMajor, a.nrows, fout.data, a.nrows, ipiv) + } + case dout:CMat => { + cgetrf(ORDER.ColMajor, a.nrows, a.ncols, dout.data, a.nrows, ipiv) + cgetri(ORDER.ColMajor, a.nrows, dout.data, a.nrows, ipiv) + } + } + out + } + } + + def seig(a:FMat, getVecs:Boolean):(FMat, FMat) = {val (d,out) = _seig(a, getVecs); (d.asInstanceOf[FMat], out.asInstanceOf[FMat])} + def seig(a:DMat, getVecs:Boolean):(DMat, DMat) = {val (d,out) = _seig(a, getVecs); (d.asInstanceOf[DMat], out.asInstanceOf[DMat])} + def seig(a:Mat, getVecs:Boolean):(Mat, Mat) = _seig(a, getVecs) + + def seig(a:FMat):(FMat, FMat) = {val (d,out) = _seig(a, true); (d.asInstanceOf[FMat], out.asInstanceOf[FMat])} + def seig(a:DMat):(DMat, DMat) = {val (d,out) = _seig(a, true); (d.asInstanceOf[DMat], out.asInstanceOf[DMat])} + def seig(a:Mat):(Mat, Mat) = _seig(a, true) + + def _seig(a:Mat, getVecs:Boolean):(Mat, Mat) = { + Mat.nflops += 6L*a.nrows*a.nrows*a.nrows + if (a.nrows != a.ncols) { + throw new RuntimeException("eig needs a square matrix") + } else { + val out = a.copy + val d = a.zeros(a.nrows,1) + val e = a.zeros(a.nrows,1) + val tau = a.zeros(a.nrows,1) + (out, d, e, tau) match { + case (dout:DMat, dd:DMat, de:DMat, dtau:DMat) => { + dsytrd(ORDER.ColMajor, "U", a.nrows, dout.data, a.nrows, dd.data, de.data, dtau.data) + dorgtr(ORDER.ColMajor, "U", a.nrows, dout.data, a.nrows, dtau.data) + dsteqr(ORDER.ColMajor, if (getVecs) "V" else "N", a.nrows, dd.data, de.data, dout.data, a.nrows) + } + case (fout:FMat, fd:FMat, fe:FMat, ftau:FMat) => { + ssytrd(ORDER.ColMajor, "U", a.nrows, fout.data, a.nrows, fd.data, fe.data, ftau.data) + sorgtr(ORDER.ColMajor, "U", a.nrows, fout.data, a.nrows, ftau.data) + ssteqr(ORDER.ColMajor, if (getVecs) "V" else "N", a.nrows, fd.data, fe.data, fout.data, a.nrows) + } + } + (d, out) + } + } + + def geig(a:Mat):(CMat, CMat) = geig(a, true) + + def geig(in:Mat, getVecs:Boolean):(CMat, CMat) = { + Mat.nflops += 10L*in.nrows*in.nrows*in.nrows + if (in.nrows != in.ncols) { + throw new RuntimeException("eig needs a square matrix") + } else { + val ilo = new Array[Int](1) + val ihi = new Array[Int](1) + val a = CMat(in) + val scale = ones(a.nrows,1) + val tau = a.zeros(a.nrows,1) + val w = a.zeros(a.nrows, 1) + val mm = a.nrows + ilo(0) = 1; ihi(0) = a.nrows; + cgebal(ORDER.ColMajor, "S", a.nrows, a.data, a.nrows, ilo, ihi, scale.data) + cgehrd(ORDER.ColMajor, a.nrows, ilo(0), ihi(0), a.data, a.nrows, tau.data) + val q = a.copy + cunghr(ORDER.ColMajor, a.nrows, ilo(0), ihi(0), q.data, a.nrows, tau.data) + val z = q.copy + chseqr(ORDER.ColMajor, "S", "I", a.nrows, ilo(0), ihi(0), a.data, a.nrows, w.data, z.data, a.nrows) + if (getVecs) { + Mat.nflops += 50L*in.nrows*in.nrows*in.nrows + val m = new Array[Int](1) + val select = IMat(in.nrows, 1) + val vl = a.zeros(a.nrows, 1) + val ee = z.copy + ctrevc(ORDER.ColMajor, "R", "A", select.data, a.nrows, a.data, a.nrows, vl.data, 1, ee.data, a.nrows, mm, m) + cgebak(ORDER.ColMajor, "S", "R", a.nrows, ilo(0), ihi(0), scale.data, mm, ee.data, a.nrows); + z ~ q * (z * ee); + } + (w, z) + } + } + + def feig(a:FMat):(FMat, FMat) = {val (w,out) = _feig(a) ; (w.asInstanceOf[FMat], out.asInstanceOf[FMat])} + def feig(a:DMat):(DMat, DMat) = {val (w,out) = _feig(a) ; (w.asInstanceOf[DMat], out.asInstanceOf[DMat])} + def feig(a:Mat):(Mat, Mat) = _feig(a) + + def _feig(a:Mat):(Mat, Mat) = { // Faster, divide and conquer algorithm for pos definite matrices + Mat.nflops += 3L*a.nrows*a.nrows*a.nrows + if (a.nrows != a.ncols) { + throw new RuntimeException("feig needs a square matrix") + } else { + val out = a.copy + val w = a.zeros(a.nrows,1) + (out, w) match { + case (dout:DMat, dw:DMat) => dsyevd(ORDER.ColMajor, "V", "U", a.nrows, dout.data, a.nrows, dw.data) + case (sout:FMat, sw:FMat) => ssyevd(ORDER.ColMajor, "V", "U", a.nrows, sout.data, a.nrows, sw.data) + } + (w, out) + } + } + /* + * Standard QR decomposition. Given m x n input A, return m x m orthonormal Q and m x n upper-triangular R. + */ + + def QRdecomp(a:FMat):(FMat, FMat) = {val (q,r) = _QRdecomp(a); (q.asInstanceOf[FMat], r.asInstanceOf[FMat])} + def QRdecomp(a:DMat):(DMat, DMat) = {val (q,r) = _QRdecomp(a); (q.asInstanceOf[DMat], r.asInstanceOf[DMat])} + def QRdecomp(a:CMat):(CMat, CMat) = {val (q,r) = _QRdecomp(a); (q.asInstanceOf[CMat], r.asInstanceOf[CMat])} + def QRdecomp(a:Mat):(Mat, Mat) = _QRdecomp(a) + + def _QRdecomp(a:Mat):(Mat, Mat) = { + Mat.nflops += 4L*a.nrows*a.ncols*math.min(a.nrows, a.ncols) + val m = a.nrows + val n = a.ncols + val r = a.copy + val q = a.zeros(m,m) + val tau = a.zeros(math.max(a.nrows, a.ncols), 1) + (r, q, tau) match { + case (fr:FMat, fq:FMat, ftau:FMat) => { + sgeqrf(ORDER.ColMajor, m, n, fr.data, m, ftau.data) + fq(?,0->n) = fr + sorgqr(ORDER.ColMajor, m, m, n, fq.data, m, ftau.data) + } + case (dr:DMat, dq:DMat, dtau:DMat) => { + dgeqrf(ORDER.ColMajor, m, n, dr.data, m, dtau.data) + dq(?,0->n) = dr + dorgqr(ORDER.ColMajor, m, m, n, dq.data, m, dtau.data) + } + case (cr:CMat, cq:CMat, ctau:CMat) => { + cgeqrf(ORDER.ColMajor, m, n, cr.data, m, ctau.data) + cq(?,0->n) = cr + cungqr(ORDER.ColMajor, m, m, n, cq.data, m, ctau.data) + } + } + r.clearLower + (q, r) + } + + /* + * Thin QR decomposition. Given m x n input A, return m x n orthonormal Q and n x n upper triangular R. + */ + + def QRdecompt(a:FMat):(FMat, FMat) = {val (q,r) = _QRdecompt(a); (q.asInstanceOf[FMat], r.asInstanceOf[FMat])} + def QRdecompt(a:DMat):(DMat, DMat) = {val (q,r) = _QRdecompt(a); (q.asInstanceOf[DMat], r.asInstanceOf[DMat])} + def QRdecompt(a:CMat):(CMat, CMat) = {val (q,r) = _QRdecompt(a); (q.asInstanceOf[CMat], r.asInstanceOf[CMat])} + def QRdecompt(a:Mat):(Mat, Mat) = _QRdecompt(a) + + def _QRdecompt(a:Mat):(Mat, Mat) = { + val m = a.nrows + val n = a.ncols + val a2 = a.zeros(a.ncols, a.ncols) + (a, a2) match { + case (fa:FMat, fa2:FMat) => sgemm(ORDER.ColMajor, TRANSPOSE.Trans, TRANSPOSE.NoTrans, n, n, m, 1f, fa.data, m, fa.data, m, 0f, fa2.data, n) + case (da:DMat, da2:DMat) => dgemm(ORDER.ColMajor, TRANSPOSE.Trans, TRANSPOSE.NoTrans, n, n, m, 1f, da.data, m, da.data, m, 0f, da2.data, n) + case (ca:CMat, ca2:CMat) => { + val cone = CMat.celem(1,0) + val czero = CMat.celem(0,0) + cgemm(ORDER.ColMajor, TRANSPOSE.Trans, TRANSPOSE.NoTrans, n, n, m, cone.data, ca.data, m, ca.data, m, czero.data, ca2.data, n) + } + } + Mat.nflops += 2L*a.ncols*a.ncols*a.nrows + val r = chol(a2).t + val q = a * inv(r) + (q, r) + } + + def chol(a:FMat):FMat = _chol(a).asInstanceOf[FMat] + def chol(a:DMat):DMat = _chol(a).asInstanceOf[DMat] + def chol(a:CMat):CMat = _chol(a).asInstanceOf[CMat] + def chol(a:Mat):Mat = _chol(a) + + def _chol(a:Mat):Mat = { // Cholesky factorization + Mat.nflops += 1L*a.nrows*a.nrows*a.nrows/3 + if (a.nrows != a.ncols) { + throw new RuntimeException("chol needs a square matrix") + } else { + val out = a.copy + out match { + case dout:DMat => dpotrf(ORDER.ColMajor, "L", a.nrows, dout.data, a.nrows) + case fout:FMat => spotrf(ORDER.ColMajor, "L", a.nrows, fout.data, a.nrows) + case cout:CMat => cpotrf(ORDER.ColMajor, "L", a.nrows, cout.data, a.nrows) + } + out.clearUpper + out + } + } + + /* + * Trisolve solves A x = r, for triangular A. Mode string argument is 3 characters. + * Char1 = "U" or "L" for upper or lower-triangular input. + * Char2 = "N", "T" or "C" for A not-transposed, transposed or conjugate respectively. + * Char3 = "N" or "U" whether the leading diagonal is non-unit "N" or unit "U" respectively. + */ + def trisolve(a:DMat, r:DMat, mode:String):DMat = _trisolve(a, r, mode).asInstanceOf[DMat] + def trisolve(a:FMat, r:FMat, mode:String):FMat = _trisolve(a, r, mode).asInstanceOf[FMat] + def trisolve(a:CMat, r:CMat, mode:String):CMat = _trisolve(a, r, mode).asInstanceOf[CMat] + def trisolve(a:Mat, r:Mat, mode:String):Mat = _trisolve(a, r, mode) + + def _trisolve(a:Mat, r:Mat, mode:String):Mat = { + if (a.nrows != a.ncols) { + throw new RuntimeException("tsolve a must be square") + } + if (a.ncols != r.nrows) { + throw new RuntimeException("tsolve matrix and rhs must have same ncols") + } + val out = r.copy + Mat.nflops += 1L*a.nrows*a.nrows*r.ncols + (a, out) match { + case (da:DMat, dout:DMat) => dtrtrs(ORDER.ColMajor, mode, a.nrows, r.ncols, da.data, a.nrows, dout.data, out.nrows) + case (fa:FMat, fout:FMat) => strtrs(ORDER.ColMajor, mode, a.nrows, r.ncols, fa.data, a.nrows, fout.data, out.nrows) + case (ca:CMat, cout:CMat) => ctrtrs(ORDER.ColMajor, mode, a.nrows, r.ncols, ca.data, a.nrows, cout.data, out.nrows) + } + out + } + + def trisolve(a:DMat, r:DMat):DMat = _trisolve(a, r, "UNN").asInstanceOf[DMat] + def trisolve(a:FMat, r:FMat):FMat = _trisolve(a, r, "UNN").asInstanceOf[FMat] + def trisolve(a:CMat, r:CMat):CMat = _trisolve(a, r, "UNN").asInstanceOf[CMat] + def trisolve(a:Mat, r:Mat):Mat = _trisolve(a, r, "UNN") + + def shiftLeft(mat:FMat, step:Int) = { + var i = step + while (i < mat.ncols) { + System.arraycopy(mat.data, i*mat.nrows, mat.data, (i-step)*mat.nrows, mat.nrows) + i += 1 + } + } + + def shiftRight(mat:FMat, step:Int) = { + var i = mat.ncols - 1 + while (i >= step) { + System.arraycopy(mat.data, (i-step)*mat.nrows, mat.data, i*mat.nrows, mat.nrows) + i -= 1 + } + } + + def blgmres(A:FMat, b:FMat, nrst:Int, m:Int, s:Int, tol:Float) = { + val n = A.nrows + val R = normrnd(0, 1, n, s) + val H = A.zeros(s*(m+1), s*m) + val V = A.zeros(n, s*(m+1)) + val e1 = A.zeros(s*(m+1),1) + e1(0,0) = 1 + val rots = new Array[FMat](m) + val bnorm = norm(b) + var x = R(?,0) + var done = false + + def blk(i:Int) = i*s->(i+1)*s + def blk2(i:Int) = i*s->(i+2)*s + + var irestart = 0 + while (irestart < nrst && !done) { + val res = b - A*x + R(?,0) = res + var (vj, r) = QRdecompt(R) + V(?, 0 -> s) = vj + var ex = r(0,0)*e1 + var j = 0 + while (j < m && !done) { + var Uj = A * vj + var k = 0 + while (k <= j) { + val Vl = V(?, blk(k)) + val Hj = Vl.t * Uj + Uj = Uj - Vl * Hj + H(blk(k), blk(j)) = Hj + k += 1 + } + val (vjp, hjp) = QRdecompt(Uj) + H(blk(j+1), blk(j)) = hjp + V(?, blk(j+1)) = vjp + vj = vjp + k = 0 + while (k < j) { // Apply blocked Givens rotations + H(blk2(k), blk(j)) = rots(k) * H(blk2(k), blk(j)) + k += 1 + } + var (rot, tri) = QRdecomp(H(blk2(j), blk(j))) + H(blk2(j), blk(j)) = tri + rots(j) = rot.t + ex(blk2(j),0) = rots(j) * ex(blk2(j),0) + k = 0 + while (k < s && !done) { + val ihere = j*s+k + printf("%f ", ex(ihere,0)); + if (math.abs(ex(ihere,0))/bnorm < tol) { + val ym = trisolve(H(0->ihere, 0->ihere), ex(0->ihere,0)) + x = x + V(?,0->ihere) * ym + done = true; + } + k += 1 + } + printf("\n"); + j += 1 + } + if (!done) { + val ym = trisolve(H(0->s*m,?), ex(0->s*m,0)) + val zi = V(?,0->s*m) * ym + x = x + zi + if (s > 1) { + shiftRight(R, 1) + R(?, 1) = zi + } + } + irestart += 1 + } + (x, R, H, V) + } + +} diff --git a/src/main/scala/BIDMat/SparseMat.scala b/src/main/scala/BIDMat/SparseMat.scala new file mode 100755 index 00000000..5a4996ac --- /dev/null +++ b/src/main/scala/BIDMat/SparseMat.scala @@ -0,0 +1,764 @@ +package BIDMat + +class SparseMat[@specialized(Double,Float) T] +(nr: Int, nc: Int, var nnz0:Int, var ir:Array[Int], val jc:Array[Int], val data:Array[T]) +(implicit manifest:Manifest[T], numeric:Numeric[T]) extends Mat(nr, nc) { + + override def nnz = nnz0 + + /* + * Bounds-checked matrix access + */ + def apply(r0:Int, c0:Int):T = { + val off = Mat.oneBased + val r = r0 - off + val c = c0 - off + if (r < 0 || r >= nrows || c < 0 || c >= ncols) { + throw new IndexOutOfBoundsException("("+(r+off)+","+(c+off)+") vs ("+nrows+","+ncols+")"); + } else { + get_(r, c); + } + } + /* + * Internal (unchecked) accessor + */ + def get_(r:Int, c:Int):T = { + val ioff = Mat.ioneBased + var ix = 0 + if (ir != null) { + ix = Mat.ibinsearch(r+ioff, ir, jc(c)-ioff, jc(c+1)-ioff) + } else { + ix = r+ioff - jc(c) + } + if (ix >= 0) data(ix) else numeric.zero + } + /* + * Update a matrix value, m(r,c) = v + */ + def update(r0:Int, c0:Int, v:T):T = { + val off = Mat.oneBased + val r = r0 - off + val c = c0 - off + if (r < 0 || r >= nrows || c < 0 || c >= ncols) { + throw new IndexOutOfBoundsException("("+(r+off)+","+(c+off)+") vs ("+nrows+","+ncols+")"); + } else { + set_(r, c, v); + } + v + } + /* + * Internal (unchecked) setter + */ + def set_(r:Int, c:Int, v:T) = { + val ioff = Mat.ioneBased + var ix = 0 + if (ir != null) { + ix = Mat.ibinsearch(r+ioff, ir, jc(c)-ioff, jc(c+1)-ioff) + } else { + ix = r+ioff - jc(c) + } + if (ix >= 0) data(ix) = v + else throw new RuntimeException("Can't set missing values") + } + + def explicitInds = { + if (ir == null) { + val ioff = Mat.ioneBased + ir = new Array[Int](nnz) + var i = 0 + while (i < ncols) { + var j = 0 + while (j + jc(i) < jc(i)+1) { + ir(j+jc(i)-ioff) = j+ioff + j += 1 + } + i += 1 + } + } + } + /* + * Transpose + */ + def gt:SparseMat[T] = { + explicitInds + SparseMat.sparseImpl[T](SparseMat.uncompressInds(jc, ir), + if (Mat.ioneBased==1) SparseMat.decInds(ir) else ir, data, ncols, nrows) + } + /* + * Stack matrices vertically + */ + def vertcat(a:SparseMat[T]):SparseMat[T] = + if (ncols != a.ncols) { + throw new RuntimeException("ncols must match") + } else { + if (ir != null) a.explicitInds + if (a.ir != null) explicitInds + val out = if (ir != null) { + SparseMat[T](nrows+a.nrows, ncols, nnz+a.nnz) + } else { + SparseMat.noRows[T](nrows+a.nrows, ncols, nnz+a.nnz) + } + val ioff = Mat.ioneBased + var ip = 0 + var i = 0 + out.jc(0) = ioff + while (i < ncols) { + var j = jc(i)-ioff + while (j < jc(i+1)-ioff) { + if (out.ir != null) out.ir(ip) = ir(j) + out.data(ip) = data(j) + ip += 1 + j += 1 + } + j = a.jc(i)-ioff + while (j < a.jc(i+1)-ioff) { + if (out.ir != null) out.ir(ip) = a.ir(j) + nrows + out.data(ip) = a.data(j) + ip += 1 + j += 1 + } + out.jc(i+1) = ip+ioff + i += 1 + } + out + } + + /* + * Stack matrices horizontally + */ + + def horzcat(a:SparseMat[T]):SparseMat[T] = + if (nrows != a.nrows) { + throw new RuntimeException("nrows must match") + } else { + if (ir != null) a.explicitInds + if (a.ir != null) explicitInds + val out = if (ir != null) { + SparseMat[T](nrows+a.nrows, ncols, nnz+a.nnz) + } else { + SparseMat.noRows[T](nrows+a.nrows, ncols, nnz+a.nnz) + } + var ip = 0 + System.arraycopy(data, 0, out.data, 0, nnz) + System.arraycopy(a.data, 0, out.data, nnz, a.nnz) + if (out.ir != null) { + System.arraycopy(ir, 0, out.ir, 0, nnz) + System.arraycopy(a.ir, 0, out.ir, nnz, a.nnz) + } + System.arraycopy(jc, 0, out.jc, 0, ncols+1) + for (i <- 1 to a.ncols) { + out.jc(i+ncols) = a.jc(i) + nnz + } + out + } + + /* + * Find indices (single) for all non-zeros elements + */ + def gfind:IMat = { + var out = IMat(nnz, 1) + val ioff = Mat.ioneBased + val off = Mat.oneBased + var i = 0 + while (i < ncols) { + var j = jc(i)-ioff + if (ir != null) { + while (j < jc(i+1)-ioff) { + out.data(j) = ir(j)-ioff+off + i*nrows + j += 1 + } + } else { + while (j < jc(i+1)-ioff) { + out.data(j) = j-jc(i)+ioff+off + i*nrows + j += 1 + } + } + i += 1 + } + out + } + /* + * Find indices (i,j) for non-zero elements + */ + def gfind2:(IMat, IMat) = { + var iout = IMat(nnz, 1) + var jout = IMat(nnz, 1) + val ioff = Mat.ioneBased + val off = Mat.oneBased + var i = 0 + while (i < ncols) { + var j = jc(i)-ioff + if (ir != null) { + while (j < jc(i+1)-ioff) { + iout.data(j) = ir(j)-ioff+off + j += 1 + } + } else { + while (j < jc(i+1)-ioff) { + iout.data(j) = j-jc(i)+ioff+off + j += 1 + } + } + i += 1 + } + if (off == 0) { + System.arraycopy(SparseMat.uncompressInds(jc, ir), 0, jout.data, 0, nnz) + } else { + SparseMat.incInds(SparseMat.uncompressInds(jc, ir), jout.data) + } + (iout, jout) + } + /* + * Find indices and values (i,j,v) for non-zero elements + */ + def gfind3:(IMat, IMat, DenseMat[T]) = { + val vout = new DenseMat[T](nnz,1) + val (iout, jout) = gfind2 + System.arraycopy(data, 0, vout.data, 0, nnz) + (iout, jout, vout) + } + /* + * Implement a(im) = b where im is a matrix of indices to a and im and b are same-sized + */ + def update(im:IMat, b:SparseMat[T]) = { + } + + /* + * Implement slicing, a(iv,jv) where iv and jv are vectors, using ? as wildcard + */ + def gapply(iv:IMat, jv:IMat):SparseMat[T] = + iv match { + case aa:MatrixWildcard => { + val colinds = DenseMat.getInds(jv, ncols) + val ioff = Mat.ioneBased + val off = Mat.oneBased + var tnnz = 0 + for (i <- 0 until colinds.length) tnnz += jc(colinds(i)-off+1) - jc(colinds(i)-off) + val out = if (ir != null) { + SparseMat[T](nrows, colinds.length, tnnz) + } else { + SparseMat.noRows[T](nrows, colinds.length, tnnz) + } + var inext = 0 + var i = 0 + out.jc(0) = ioff + while (i < out.ncols) { + val istep = jc(colinds(i)-off+1) - jc(colinds(i)-off) + if (ir != null) System.arraycopy(ir, jc(colinds(i)-off)-ioff, out.ir, inext, istep) + System.arraycopy(data, jc(colinds(i)-off)-ioff, out.data, inext, istep) + inext += istep + out.jc(i+1) = inext+ioff + i += 1 + } + out + } + case _ => { + explicitInds + val off = Mat.oneBased + val rowinds = if (off == 0) DenseMat.getInds(iv, nrows) else SparseMat.decInds(DenseMat.getInds(iv, nrows)); + val smat = SparseMat.sparseImpl[Int]((0 until iv.length).toArray, rowinds, Array.fill[Int](iv.length)(1), iv.length, nrows) + val colinds = DenseMat.getInds(jv, ncols) + val ioff = Mat.ioneBased + var tnnz = 0 + var i = 0 + while (i < colinds.length) { + var j = jc(colinds(i)-off)-ioff + while (j < jc(colinds(i)-off+1)-ioff) { + tnnz += smat.jc(ir(j)+1-ioff) - smat.jc(ir(j)-ioff) + j += 1 + } + i += 1 + } + val out = SparseMat[T](iv.length, colinds.length, tnnz) + tnnz = 0 + i = 0 + out.jc(0) = ioff + while (i < colinds.length) { + var j = jc(colinds(i)-off)-ioff + while (j < jc(colinds(i)-off+1)-ioff) { + val dval = data(j) + var k = smat.jc(ir(j)-ioff) - ioff + while (k < smat.jc(ir(j)+1-ioff)-ioff) { + out.ir(tnnz) = smat.ir(k) + out.data(tnnz) = dval + tnnz += 1 + k += 1 + } + j += 1 + } + out.jc(i+1) = tnnz+ioff + i += 1 + } + out + } + } + + private def printOne(a:T):String = + a match { + case v:Double => { + if (v % 1 == 0 && math.abs(v) < 1e10) { + "%d" format v.intValue + } else { + "%.5g" format v + } + } + case v:Float => { + if (v % 1 == 0 && math.abs(v) < 1e5) { + "%d" format v.intValue + } else { + "%.5g" format v + } + } + case _ => "" + } + + private def printOne(v0:Int):String = { + val v = v0 + Mat.oneBased + "%d" format v + } + + + override def toString:String = { + val ioff = Mat.ioneBased + val maxRows = 8 + var fieldWidth = 4 + val sb:StringBuilder = new StringBuilder + val somespaces = " " + var innz = 0 + var icol = 0 + while (innz < math.min(nnz, maxRows)) { + while (innz >= jc(icol+1)-ioff) icol += 1 + fieldWidth = math.max(fieldWidth, 2+printOne(ir(innz)).length) + fieldWidth = math.max(fieldWidth, 2+printOne(icol).length) + fieldWidth = math.max(fieldWidth, 2+printOne(data(innz)).length) + innz += 1 + } + innz = 0 + icol = 0 + while (innz < math.min(nnz, maxRows)) { + while (innz >= jc(icol+1)-ioff) icol += 1 + var str = printOne(ir(innz)-ioff) + sb.append("("+somespaces.substring(0,fieldWidth-str.length)+str) + str = printOne(icol) + sb.append(","+somespaces.substring(0,fieldWidth-str.length)+str) + str = printOne(data(innz)) + sb.append(")"+somespaces.substring(0,fieldWidth-str.length)+str+"\n") + innz += 1 + } + if (nnz > maxRows) { + for (j <- 0 until 3) { + sb.append(somespaces.substring(0, fieldWidth-2)+"...") + } + sb.append("\n") + } + sb.toString() + } + + def gsMult(a:SparseMat[T]):DenseMat[T] = + if (ncols != a.nrows) + throw new RuntimeException("dims mismatch") + else { + explicitInds + a.explicitInds + val out = new DenseMat[T](nrows, a.ncols) + val ioff = Mat.ioneBased + var i = 0 + while (i < a.ncols) { + val i0 = nrows*i + var j = a.jc(i)-ioff + while (j < a.jc(i+1)-ioff) { + val ind = a.ir(j)-ioff + val tval = a.data(j) + var k = jc(ind)-ioff + while (k < jc(ind+1)-ioff) { + val indx = ir(k)-ioff + i0 + data(indx) = numeric.plus(data(indx), numeric.times(tval, data(k))) + k += 1 + } + j += 1 + } + i += 1 + } + out + } + + def sgMatOp(b:SparseMat[T], op2:(T,T) => T, omat:Mat):SparseMat[T] = + if (nrows==b.nrows && ncols==b.ncols) { + if (ir != null) b.explicitInds + if (b.ir != null) explicitInds + if (ir == null) { + sgMatOpNR(b,op2,omat) + } else { + val out = SparseMat.newOrCheck(nrows, ncols, nnz+b.nnz, omat) + val ioff = Mat.ioneBased + var nzc = 0 + out.jc(0) = ioff + var i = 0 + while (i < ncols) { + var ia = jc(i)-ioff + var ib = b.jc(i)-ioff + while (ia < jc(i+1)-ioff && ib < b.jc(i+1)-ioff) { + if (ir(ia) < b.ir(ib)) { + out.ir(nzc) = ir(ia) + out.data(nzc) = op2(data(ia), numeric.zero) + ia += 1 + } else if (ir(ia) > b.ir(ib)) { + out.ir(nzc) = b.ir(ib) + out.data(nzc) = op2(numeric.zero, b.data(ib)) + ib += 1 + } else { + out.ir(nzc) = ir(ia) + out.data(nzc) = op2(data(ia), b.data(ib)) + ia += 1 + ib += 1 + } + nzc += 1 + } + while (ia < jc(i+1)-ioff) { + out.ir(nzc) = ir(ia) + out.data(nzc) = op2(data(ia), numeric.zero) + ia += 1 + nzc += 1 + } + while (ib < b.jc(i+1)-ioff) { + out.ir(nzc) = b.ir(ib) + out.data(nzc) = op2(numeric.zero, b.data(ib)) + ib += 1 + nzc += 1 + } + out.jc(i+1) = nzc+ioff + i += 1 + } + out.sparseTrim + } + } else { + throw new RuntimeException("dimensions mismatch") + } + + + def sgMatOpNR(b:SparseMat[T], op2:(T,T) => T, omat:Mat):SparseMat[T] = { + val out = SparseMat.newOrCheck(nrows, ncols, nnz+b.nnz, omat, true) + val ioff = Mat.ioneBased + var nzc = 0 + out.jc(0) = ioff + for (i <- 0 until ncols) { + var ia = jc(i)-ioff + var ib = b.jc(i)-ioff + while (ia < jc(i+1)-ioff && ib < b.jc(i+1)-ioff) { + out.data(nzc) = op2(data(ia), b.data(ib)) + ia += 1 + ib += 1 + nzc += 1 + } + while (ia < jc(i+1)-ioff) { + out.data(nzc) = op2(data(ia), numeric.zero) + ia += 1 + nzc += 1 + } + while (ib < b.jc(i+1)-ioff) { + out.data(nzc) = op2(numeric.zero, b.data(ib)) + ib += 1 + nzc += 1 + } + out.jc(i+1) = nzc+ioff + } + out.sparseTrim + } + + def sgReduceOp(dim:Int, op1:(T) => T, op2:(T,T) => T, omat:Mat):DenseMat[T] = { + val ioff = Mat.ioneBased + if (dim == 0) { + if (nrows > 1 && ncols > 1) { + throw new RuntimeException("must be a vector") + } else { + val out = DenseMat.newOrCheck(1, 1, omat) + var j = 0 + var acc = op1(numeric.zero) + while (j < nnz) { + acc = op2(acc, data(j)) + j += 1 + } + out.data(0) = acc + out + } + } else if (dim == 1) { + val out = DenseMat.newOrCheck(1, ncols, omat) + var i = 0 + while (i < ncols) { + var acc = op1(numeric.zero) + var j = jc(i)-ioff + while (j < jc(i+1)-ioff) { + acc = op2(acc, data(j)) + j += 1 + } + out.data(i) = acc + i += 1 + } + out + } else if (dim == 2) { + val out = DenseMat.newOrCheck(nrows, 1, omat) + out.clear + if (ir != null) { + var j = 0 + while (j < nnz) { + out.data(ir(j)-ioff) = op2(out.data(ir(j)-ioff), data(j)) + j += 1 + } + } else { + var i = 0 + while (i < ncols) { + var j = jc(i) + while (j < jc(i+1)) { + out.data(j-jc(i)) = op2(out.data(j-jc(i)), data(j-ioff)) + j += 1 + } + i += 1 + } + } + out + } else + throw new RuntimeException("index must 1 or 2") + } + + def ssMatOpOne(b:DenseMat[T], op2:(T,T) => T, omat:Mat):SparseMat[T] = + if (b.nrows == 1 && b.ncols == 1) { + sgMatOpScalar(b.data(0), op2, omat) + } else throw new RuntimeException("dims incompatible") + + def sgMatOpScalar(b:T, op2:(T,T) => T, outmat:Mat):SparseMat[T] = { + val out = SparseMat.newOrCheck(nrows, ncols, nnz, outmat, (ir == null)) + var i = 0 + out.jc(0) = jc(0) + while (i < nnz) { + out.data(i) = op2(data(i), b) + if (ir != null) out.ir(i) = ir(i) + i += 1 + } + i = 0 + while (i < ncols) { + out.jc(i) = jc(i) + i += 1 + } + out.sparseTrim + } + + def sparseTrim:SparseMat[T] = { + val ioff = Mat.ioneBased + var i = 0 + var nzc = 0 + while (i < ncols) { + var j = jc(i) + while (j < jc(i+1)) { + if (numeric.signum(data(j-ioff)) != 0) nzc += 1 + j += 1 + } + i += 1 + } + if (nzc == nnz) { + this + } else { + var out = this + nzc = 0 + var lastjc = 0 + var i = 0 + out.jc(0) = ioff + while (i < ncols) { + var j = lastjc + while (j < jc(i+1)-ioff) { + if (numeric.signum(data(j)) != 0) { + out.data(nzc) = data(j) + if (ir != null) out.ir(nzc) = ir(j) + nzc += 1 + } + j += 1 + } + lastjc = jc(i+1)-ioff + out.jc(i+1) = nzc+ioff + i += 1 + } + nnz0 = nzc + out + } + } + + def check = { + val ioff = Mat.ioneBased + var i = 0 + if (jc(0) != ioff) { + throw new RuntimeException("jc(0) should be "+ioff) + } + while (i < ncols) { + var j = jc(i)-ioff + if (jc(i) > jc(i+1)) { + throw new RuntimeException("jc(i) out of order " + i + " " + jc(i) + " " + jc(i+1)) + } + if (ir != null) { + while (j < jc(i+1)-ioff-1) { + if (ir(j+1) <= ir(j)) { + throw new RuntimeException("ir(j) out of order "+j+" "+ir(j)+" "+ir(j+1)) + } + if (ir(j) < ioff) { + throw new RuntimeException("ir("+j+")="+ir(j)+" too small") + } + if (ir(j+1) >= nrows+ioff) { + throw new RuntimeException("ir("+(j+1)+")="+ir(j+1)+" out of range "+(nrows+ioff)) + } + j += 1 + } + } + i += 1 + } + if (jc(ncols) != nnz+ioff) { + throw new RuntimeException("jc(ncols) should be "+nnz) + } + } + + def full:DenseMat[T] = { + val out = new DenseMat[T](nrows, ncols) + val ioff = Mat.ioneBased + if (ir != null) { + val cols = SparseMat.uncompressInds(jc, ir) + var i = 0 + while (i < nnz) { + out.data(ir(i)-ioff + nrows*cols(i)) = data(i) + i += 1 + } + } else { + var i = 0 + while (i < ncols) { + var j = jc(i)-ioff + while (j < jc(i+1)-ioff) { + out.data(j-jc(i)+ioff + nrows*i) = data(j) + j += 1 + } + i += 1 + } + } + out + } + + override def recycle(nr:Int, nc:Int, nnz:Int):SparseMat[T] = { + val jc0 = if (jc.size >= nc+1) jc else new Array[Int](nc+1) + val ir0 = if (ir.size >= nnz) ir else new Array[Int](nnz) + val data0 = if (data.size >= nnz) data else new Array[T](nnz) + new SparseMat[T](nr, nc, nnz, ir0, jc0, data0) + } + +} + + +object SparseMat { + + def apply[T](nr:Int, nc:Int, nnz0:Int) + (implicit manifest:Manifest[T], numeric:Numeric[T]):SparseMat[T] = + new SparseMat[T](nr, nc, nnz0, new Array[Int](nnz0), new Array[Int](nc+1), new Array[T](nnz0)) + + def noRows[T](nr:Int, nc:Int, nnz0:Int) + (implicit manifest:Manifest[T], numeric:Numeric[T]):SparseMat[T] = + new SparseMat[T](nr, nc, nnz0, null, new Array[Int](nc+1), new Array[T](nnz0)) + + def sparseImpl[@specialized(Double, Float) T](rows:Array[Int], cols:Array[Int], vals:Array[T], nrows:Int, ncols:Int) + (implicit manifest:Manifest[T], numeric:Numeric[T]):SparseMat[T] = { + val ioff = Mat.ioneBased + val out = SparseMat[T](nrows, ncols, rows.length) + val orows = out.ir + val ocols = new Array[Int](rows.length) + var i = 0 + while (i < cols.length) { + ocols(i) = cols(i) + orows(i) = rows(i) + ioff + i += 1 + } + val isort = BIDMat.Mat.ilexsort2(ocols, orows) + i = 0 + var igood = 0 + while (i < cols.length) { + if (i == 0 || orows(i) != orows(i-1) || ocols(i) != ocols(i-1)) { + ocols(igood) = ocols(i) + orows(igood) = orows(i) + out.data(igood) = vals(isort(i)) + igood += 1 + } else { + out.data(igood) = numeric.plus(out.data(igood), vals(isort(i))) + } + i += 1 + } + SparseMat.compressInds(ocols, ncols, out.jc, igood) + out.sparseTrim + } + + def compressInds(coli:Array[Int], ncols:Int, out:Array[Int], nnz0:Int):Array[Int] = { + val ioff = Mat.ioneBased + out(0) = ioff + var j = 0 + var i = 0 + while (i < ncols) { + while (j < nnz0 && coli(j) <= i) j+= 1 + out(i+1) = j+ioff + i += 1 + } + out + } + + def uncompressInds(coli:Array[Int], rowi:Array[Int]):Array[Int] = { + val ioff = Mat.ioneBased + val out = new Array[Int](rowi.length) + var i = 0 + while (i < (coli.length-1)) { + var j = coli(i)-ioff + while (j < coli(i+1)-ioff) { + out(j) = i + j+= 1 + } + i += 1 + } + out + } + + def incInds(inds:Array[Int], out:Array[Int]):Array[Int] = { + var i = 0 + while (i < inds.length) { + out(i) = inds(i) + 1 + i += 1 + } + out + } + + def incInds(inds:Array[Int]):Array[Int] = { + val out = new Array[Int](inds.length) + incInds(inds, out) + } + + def decInds(inds:Array[Int]):Array[Int] = { + val out = new Array[Int](inds.length) + var i = 0 + while (i < inds.length) { + out(i) = inds(i) - 1 + i += 1 + } + out + } + + def newOrCheck[T](nr:Int, nc:Int, nnz:Int, oldmat:Mat, norows:Boolean = false) + (implicit manifest:Manifest[T], numeric:Numeric[T]):SparseMat[T] = { + if (oldmat.asInstanceOf[AnyRef] == null || (oldmat.nrows == 0 && oldmat.ncols == 0)) { + if (norows) + SparseMat.noRows(nr, nc, nnz) + else + SparseMat(nr, nc, nnz) + } else { + val omat = oldmat.asInstanceOf[SparseMat[T]]; + if (omat.nrows == nr && omat.ncols == nc && omat.nnz == nnz) { + omat + } else { + omat.recycle(nr, nc, nnz) + } + } + } +} + + + + + + diff --git a/src/main/scala/BIDMat/Translators.scala b/src/main/scala/BIDMat/Translators.scala new file mode 100755 index 00000000..9760b84a --- /dev/null +++ b/src/main/scala/BIDMat/Translators.scala @@ -0,0 +1,147 @@ +package BIDMat +import scala.util.parsing.combinator._ + +class MKLdeclarationsParser extends JavaTokenParsers { + var otype:Int = 3 + var prefix:String = "LAPACK" + def fdecl: Parser[String] = ftype~ident~"("~decls~")"~""";*""".r ^^ { + case a~b~c~d~e~f => { + val b0 = b.split("_"); val b1 = b0(b0.length-1) + val toreturn:Boolean = (a != fvoid) + otype match { + case 0 => "JNIEXPORT "+a("")+"JNICALL Java_edu_berkeley_bid_"+prefix+"_"+b1+ + "\n(JNIEnv * env, jobject calling_obj, "+d+") {\n" + case 1 => d + case 2 => " "+(if (toreturn) a("")+"retval = " else "") +b+c+d+e+";\n" + case 3 => d+(if (toreturn) " return retval;\n}\n" else "}\n") + case 4 => " public static native "+a(b1)+c+d+e+";\n" + } + } + } + + def decls: Parser[String] = repsep(adecl, ",") ^^ { + case a:List[String] => { + var ll = "" + for (el <- a) { + ll = ll + el + if (otype == 0 || otype == 2 || otype == 4) { + if (el != a.last) ll = ll+", " + } + } + ll + } + } + + def adecl: Parser[String] = ftype~ident ^^ { case a~b => a(b) } + + def ftype: Parser[String=>String] = fvoidp | fvoid | flogicalp | flogical | fintp | fint | fuintp | fuint | fchar | + ffloatp | ffloat | ffcomplexp | ffcomplex | fdoublep | fdouble | fdcomplexp | fdcomplex + + def sstring(y:String, wrap:Int, p1:String, p2:String, p3:String, p4:String) = { + otype match { + case 0 => p1 + " " + (if (wrap > 0) "j_"+y else y) + case 1 => if (wrap == 1) " "+p2+" "+y+" = (*env)->GetPrimitiveArrayCritical(env, j_"+y+", JNI_FALSE);\n" else + if (wrap == 2) " char * "+y+" = (char *)(*env)->GetStringUTFChars(env, j_"+y+", JNI_FALSE);\n" else "" + case 2 => if (y.length == 0) p2+" " else p3+y + case 3 => if (wrap == 1) " (*env)->ReleasePrimitiveArrayCritical(env, j_"+y+", "+y+", 0);\n" else + if (wrap == 2) " (*env)->ReleaseStringUTFChars(env, j_"+y+", "+y+");\n" else "" + case 4 => p4+" "+y + } + } + + def fvoid: Parser[String=>String] = "void" ^^ (x => (y=> + sstring(y, 0, "void", "void", "", "void"))) + + def fenum: Parser[String=>String] = ("const"~"enum"~ident | "enum"~ident) ^^ { + case a~b~c => (y => sstring(y, 0, "jint", "jint", "("+c+")", "int")) + case b~c => (y => sstring(y, 0, "jint", "jint", "("+c+")", "int")) + } + + def flogical: Parser[String=>String] = ("const"~"lapack_logical" | "lapack_logical") ^^ (x => (y=> + sstring(y, 1, "int", "jint *", "(lapack_logical)", "int"))) + + def fchar: Parser[String=>String] = ("const"~"char" | "char") ^^ (x => (y=> + sstring(y, 2, "jstring", "char *", "*", "String"))) + + def fint: Parser[String=>String] = ("const"~"int" | "int" | "const"~"lapack_int" | "lapack_int" | "MKL_INT") ^^ (x => (y=> + sstring(y, 0, "jint", "jint", "", "int"))) + + def fuint: Parser[String=>String] = ("const"~"MKL_UINT" | "MKL_UINT"| "const"~"unsigned"~"int" | "unsigned"~"int")^^ (x => (y => + sstring(y, 0, "jint", "jint", "(unsigned int)", "int"))) + + def ffloat: Parser[String=>String] = ("const"~"float" | "float" | "lapack_float") ^^ (x => (y => + sstring(y, 0, "jfloat", "jfloat", "", "float"))) + + def fdouble: Parser[String=>String] = ("const"~"double" | "double" | "lapack_double") ^^ (x => (y => + sstring(y, 0, "jdouble", "jdouble", "", "double"))) + + def ffcomplex: Parser[String=>String] = ("const"~"lapack_complex_float" | "lapack_complex_float" | "MKL_complex8") ^^ (x => (y => + sstring(y, 1, "jfloatArray", "jfloat *", "*", "float []"))) + + def fdcomplex: Parser[String=>String] = ("const"~"lapack_complex_double" | "lapack_complex_double" | "MKL_complex16") ^^ (x => (y => + sstring(y, 1, "jdoubleArray", "jdouble *", "*", "double []"))) + + def fvoidp: Parser[String=>String] = ("const"~"void"~"*" | "void"~"*") ^^ (x => (y => + sstring(y, 1, "jfloatArray", "jfloat *", "(void *)", "float []"))) + + def flogicalp: Parser[String=>String] = ("const"~"lapack_logical"~"*" | "lapack_logical"~"*") ^^ (x => (y=> + sstring(y, 1, "jintArray", "jint *", "(lapack_logical *)", "int []"))) + + def fintp: Parser[String=>String] = ("const"~"int"~"*" | "int"~"*" | "MKL_INT"~"*" | "lapack_int"~"*") ^^ (x => (y=> + sstring(y, 1, "jintArray", "jint *", "", "int []"))) + + def fuintp: Parser[String=>String] = ("const"~"MKL_UINT"~"*" | "MKL_UINT"~"*" | "const"~"unsigned"~"int"~"*" | "unsigned"~"int"~"*") ^^ (x => (y => + sstring(y, 1, "jintArray ", "jint *", "", "int []"))) + + def ffloatp: Parser[String=>String] = ("const"~"float"~"*" | "float"~"*") ^^ (x => (y=> + sstring(y, 1, "jfloatArray", "jfloat *", "", "float []"))) + + def fdoublep: Parser[String=>String] = ("const"~"double"~"*" | "double"~"*") ^^ (x => (y=> + sstring(y, 1, "jdoubleArray", "jdouble *", "", "double []"))) + + def ffcomplexp: Parser[String=>String] = ("const"~"lapack_complex_float"~"*" | "lapack_complex_float"~"*" | "MKL_complex8"~"*") ^^ { + case x0~x1~x2 => (y => sstring(y, 1, "jfloatArray", "jfloat *", "("+x1+" *)", "float []")) + case x1~x2 => (y => sstring(y, 1, "jfloatArray", "jfloat *", "("+x1+" *)", "float []")) + } + + def fdcomplexp: Parser[String=>String] = ("const"~"lapack_complex_double"~"*" | "lapack_complex_double"~"*" | "MKL_complex16"~"*") ^^ { + case x0~x1~x2 => (y=> sstring(y, 1, "jdoubleArray", "jdouble *", "("+x1+" *)", "double []")) + case x1~x2 => (y=> sstring(y, 1, "jdoubleArray", "jdouble *", "("+x1+" *)", "double []")) + } + +} + +object translateTester { + def main(args: Array[String]) { + val p = new MKLdeclarationsParser + val s = scala.io.Source.fromFile(args(0)) + if (args.length == 1) { + s.getLines.foreach((line) => { + if (line.length > 1) { + p.otype = 0 + println(p.parseAll(p.fdecl, line).get) + p.otype = 1 + println(p.parseAll(p.fdecl, line).get) + p.otype = 2 + println(p.parseAll(p.fdecl, line).get) + p.otype = 3 + println(p.parseAll(p.fdecl, line).get) + } + }) + } else { + s.getLines.foreach((line) => { + if (line.length > 1) { + p.otype = 4 + println(p.parseAll(p.fdecl, line).get) + } + }) + } + } +} + + + +object Translators { + + +} diff --git a/src/test/scala/BIDMat/Copyright.txt b/src/test/scala/BIDMat/Copyright.txt new file mode 100755 index 00000000..21326596 --- /dev/null +++ b/src/test/scala/BIDMat/Copyright.txt @@ -0,0 +1,25 @@ +Copyright (c) 2012, Regents of the University of California +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/src/test/scala/BIDMat/DMatTest.scala b/src/test/scala/BIDMat/DMatTest.scala new file mode 100755 index 00000000..8d013c15 --- /dev/null +++ b/src/test/scala/BIDMat/DMatTest.scala @@ -0,0 +1,130 @@ +package BIDMat + +import Mat._ +import MatFunctions._ +import org.scalatest._; +import org.scalatest.junit._; +import org.scalatest.prop._; +import org.junit.runner.RunWith + +@RunWith(classOf[JUnitRunner]) +class DMatTest extends FunSuite with Checkers { + val x = DMat(2,3) + val xvalues = List(1.0,2.0,3.0,4.0,5.0,6.0).toArray + System.arraycopy(xvalues, 0, x.data, 0, 6) + val y = DMat(1,3) + val yvalues = List(7.0,8.0,9.0).toArray + System.arraycopy(yvalues, 0, y.data, 0, 3) + val z = DMat(2,1) + val zvalues = List(10.0,11.0).toArray + System.arraycopy(zvalues, 0, z.data, 0, 2) + val xx = DMat(3,4) + val xxvalues = List(1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0).toArray + System.arraycopy(xxvalues, 0, xx.data, 0, 12) + + test("DMat fill") { + assert(x(0,0) == 1.0); + assert(x(1,0) == 2.0); + assert(x(0,1) == 3.0); + assert(x(1,1) == 4.0); + assert(x(0,2) == 5.0); + assert(x(1,2) == 6.0); + } + + test("DMat on") { + val a = x on y; + assert(a(0,0) == 1.0); + assert(a(1,0) == 2.0); + assert(a(0,1) == 3.0); + assert(a(1,1) == 4.0); + assert(a(0,2) == 5.0); + assert(a(1,2) == 6.0); + + assert(a(2,0) == 7.0); + assert(a(2,1) == 8.0); + assert(a(2,2) == 9.0); + } + + test("DMat \\") { + val a = x \ z; + assert(a(0,0) == 1.0); + assert(a(1,0) == 2.0); + assert(a(0,1) == 3.0); + assert(a(1,1) == 4.0); + assert(a(0,2) == 5.0); + assert(a(1,2) == 6.0); + assert(a(0,3) == 10.0); + assert(a(1,3) == 11.0); + } + + test("DMat t") { + val a = x.t; + assert(a(0,0) == 1.0); + assert(a(0,1) == 2.0); + assert(a(1,0) == 3.0); + assert(a(1,1) == 4.0); + assert(a(2,0) == 5.0); + assert(a(2,1) == 6.0); + } + + test("DMat slice 1") { + val a = xx(?,1) + assert(a(0,0) == 4.0); + assert(a(1,0) == 5.0); + assert(a(2,0) == 6.0); + } + + test("DMat slice 2") { + val a = xx(?,1 to 2) + assert(a(0,0) == 4.0); + assert(a(1,0) == 5.0); + assert(a(2,0) == 6.0); + assert(a(0,1) == 7.0); + assert(a(1,1) == 8.0); + assert(a(2,1) == 9.0); + } + + test("DMat slice 3") { + val a = xx(1,?) + assert(a(0,0) == 2.0); + assert(a(0,1) == 5.0); + assert(a(0,2) == 8.0); + assert(a(0,3) == 11.0); + } + + test("DMat slice 4") { + val a = xx(0 to 1,?) + assert(a(0,0) == 1.0); + assert(a(0,1) == 4.0); + assert(a(0,2) == 7.0); + assert(a(0,3) == 10.0); + assert(a(1,0) == 2.0); + assert(a(1,1) == 5.0); + assert(a(1,2) == 8.0); + assert(a(1,3) == 11.0); + } + + test("DMat slice 5") { + val a = xx(?,?) + assert(a(0,0) == 1.0); + assert(a(0,1) == 4.0); + assert(a(0,2) == 7.0); + assert(a(0,3) == 10.0); + assert(a(1,0) == 2.0); + assert(a(1,1) == 5.0); + assert(a(1,2) == 8.0); + assert(a(1,3) == 11.0); + assert(a(2,0) == 3.0); + assert(a(2,1) == 6.0); + assert(a(2,2) == 9.0); + assert(a(2,3) == 12.0); + } + + test("DMat slice 6") { + val a = xx(0 to 1, 2 to 3) + assert(a(0,0) == 7.0); + assert(a(1,0) == 8.0); + assert(a(0,1) == 10.0); + assert(a(1,1) == 11.0); + } +} \ No newline at end of file diff --git a/src/test/scala/BIDMat/TestDops.scala b/src/test/scala/BIDMat/TestDops.scala new file mode 100755 index 00000000..b8e9b85a --- /dev/null +++ b/src/test/scala/BIDMat/TestDops.scala @@ -0,0 +1,25 @@ +package BIDMat + + +import DMat._ +import IMat._ +import FMat._ +import scala.compat.Platform._ + + +object TestDops { + def main(args: Array[String]): Unit = { + val n = 2000 + val a = IMat(n,n) + val b = IMat(n,n) + val t0 = currentTime + val m = 1000 + println("starting up") + for (i <- 0 until m) { + val c = a + b + } + val t1 = currentTime - t0 + println("time="+t1+" msec, gflops="+(n.doubleValue*n*m/t1/1e6)) + } +} + diff --git a/src/test/scala/BIDMat/TestDops2.scala b/src/test/scala/BIDMat/TestDops2.scala new file mode 100755 index 00000000..6af1030d --- /dev/null +++ b/src/test/scala/BIDMat/TestDops2.scala @@ -0,0 +1,29 @@ +package BIDMat + +import Mat._ +import DMat._ +import FMat._ +import scala.compat.Platform._ + + +object TestDops2 { + def main(args: Array[String]): Unit = { + val n = 50000 + val k = 20 + val l = 1 + val a = FMat(l,n) + val b = FMat(n,k) + val d = FMat(k,n) + val e = FMat(n,l) + val m = 30000 + val t0 = currentTime + println("Starting up") + for (i <- 0 until m) { + val c = a * b +// val c = d * e + } + val t1 = currentTime - t0 + println("time="+t1+"msec, gflops="+(2.0*k*n*l*m/t1/1e6)) + } +} + diff --git a/src/test/scala/BIDMat/TestDops3.scala b/src/test/scala/BIDMat/TestDops3.scala new file mode 100755 index 00000000..17616ac1 --- /dev/null +++ b/src/test/scala/BIDMat/TestDops3.scala @@ -0,0 +1,30 @@ +package BIDMat + +import Mat._ +import DMat._ +import FMat._ +import scala.compat.Platform._ + + +object TestDops3 { + def main(args: Array[String]): Unit = { + val n = 50000 + val k = 10 + val l = 1 + val a = FMat(n,k) +// val a2 = FMat(k,n) + val b = FMat(l,n) + val d = FMat(k,l) + val m = 30000 + val t0 = currentTime + println("Starting up") + for (i <- 0 until m) { +// val c = b * a +// val e = a * d + val c = a t + } + val t1 = currentTime - t0 + println("time="+t1+"msec, gflops="+(2.0*k*n*l*m/t1/1e6)) + } +} + diff --git a/src/test/scala/BIDMat/TestHDF5.scala b/src/test/scala/BIDMat/TestHDF5.scala new file mode 100755 index 00000000..8abed633 --- /dev/null +++ b/src/test/scala/BIDMat/TestHDF5.scala @@ -0,0 +1,39 @@ +package BIDMat + + +import MatFunctions._ +import SciFunctions._ +import CMat._ + + + +object TestHDF5 { + def main(args: Array[String]) : Unit = { + val n = 50000 + val k = 10 + val l = 1 + val a = rand(n,k) + val b = IMat(l,n) + val c = sprand(10,10,0.1) + val d = CSMat(1,2) + d(0,0) = "test" + d(0,1) = "try" + val fname = "d:\\sentiment\\tmp\\mtest.mat" +// println(a.toString) +// println(b.toString) + saveAs(fname, d, "c") +/* val fid = new java.io.FileInputStream(fname) + var next:Int = 0 + var i = 0 + while (next >= 0) { + var next = fid.read() + if (next > 0) println("buf("+i+")=("+(next + (if (next > 127) -256 else 0))+")") + i += 1 + } + fid.close() */ + val e:CMat = load(fname, "c").asInstanceOf[CMat] + println(e(0,0).asInstanceOf[String]) + println(e(0,1).asInstanceOf[String]) + } +} +