>>(nrows, ncols, A, B, opn);
+ cudaDeviceSynchronize();
+ cudaError_t err = cudaGetLastError();
+ return err;
+}
+
+
+#ifdef TEST
+int main(int argc, char **argv) {
+ int m=8, n=8, opn = 0;
+ float *dA, *dB, *dC, *A, *B, *C;
+ if (argc > 1) {
+ sscanf(argv[1], "%d", &opn);
+ if (argc > 2) {
+ sscanf(argv[2], "%d", &m);
+ if (argc > 3) {
+ sscanf(argv[3], "%d", &n);
+ }
+ }
+ }
+ A = (float *)malloc(m*n*sizeof(float));
+ B = (float *)malloc(m*n*sizeof(float));
+ C = (float *)malloc(m*n*sizeof(float));
+ cudaMalloc((void**)&dA, m*n*sizeof(float));
+ cudaMalloc((void**)&dB, m*n*sizeof(float));
+ cudaMalloc((void**)&dC, m*n*sizeof(float));
+
+ for (int i = 0; i < m*n; i++) {
+ A[i] = 1.0f;
+ B[i] = 2.0f;
+ }
+
+ cudaMemcpy(dA, A, m*n*sizeof(float), cudaMemcpyHostToDevice);
+ cudaMemcpy(dB, B, m*n*sizeof(float), cudaMemcpyHostToDevice);
+
+ printf("A %f %f %f %f\n", A[0], A[1], A[2], A[3]);
+ printf("B %f %f %f %f\n", B[0], B[1], B[2], B[3]);
+
+ MatKernel(dA, m, n, dB, m, n, dC, opn);
+ cudaError_t err = cudaGetLastError();
+ if( cudaSuccess != err) {
+ fprintf(stderr, "CUDA error %d", err);
+ exit(1);
+ }
+
+ cudaMemcpy(C, dC, m*n*sizeof(float), cudaMemcpyDeviceToHost);
+
+ printf("C %f %f %f %f\n", C[0], C[1], C[2], C[3]);
+ printf("A %f %f %f %f\n", A[0], A[1], A[2], A[3]);
+ printf("B %f %f %f %f\n", B[0], B[1], B[2], B[3]);
+
+ if (dA != NULL) cudaFree(dA);
+ if (dB != NULL) cudaFree(dB);
+ if (dC != NULL) cudaFree(dC);
+ if (C != NULL) free(C);
+}
+#endif
diff --git a/jni/src/MatKernel.hpp b/jni/src/MatKernel.hpp
new file mode 100755
index 00000000..ef1acdf6
--- /dev/null
+++ b/jni/src/MatKernel.hpp
@@ -0,0 +1,20 @@
+
+int apply_binop(float *nativeA, int Anrows, int Ancols, float *nativeB, int Bnrows, int Bncols, float *nativeC, int opn);
+
+int apply_biniop(int *nativeA, int Anrows, int Ancols, int *nativeB, int Bnrows, int Bncols, int *nativeC, int opn);
+
+int apply_gfun(float *nativeA, float *nativeB, int N, int opn);
+
+int apply_gfun2(float *nativeA, float *nativeB, float *nativeC, int N, int opn);
+
+int dsmult(int nrows, int ncols, int nnz, float *A, float *Bdata, int *Bir, int *Bic, float *C);
+
+int dsmultT(int nrows, int ncols, int nnz, float *A, float *Bdata, int *Bir, int *Bic, float *C);
+
+int dds(int nrows, int nnz, float *A, float *B, int *Cir, int *Cic, float *P);
+
+int reduce1op(int nrows, int ncols, float *A, float *B, int opn);
+
+int reduce2op(int nrows, int ncols, float *A, float *B, int opn);
+
+int transpose(float *in, int instride, float *out, int outstride, int nrows, int ncols);
diff --git a/jni/src/configure b/jni/src/configure
new file mode 100755
index 00000000..586c4ce1
--- /dev/null
+++ b/jni/src/configure
@@ -0,0 +1,161 @@
+#!/bin/bash
+
+OS=`uname`
+PARLIB=$1
+ARCH="x86_64"
+
+VERSION="dev"
+
+# standardise the OS and ARCH names
+if [ "$OS" = "Darwin" ] ; then
+ OS="apple"
+elif [ "$OS" = "Linux" ] ; then
+ OS="linux"
+elif [ "$OS" = "SunOS" ] ; then
+ OS="sun"
+elif [[ "$OS" == CYGWIN* ]] ; then
+ OS="windows"
+else
+ echo "OS not supported" $OS
+ exit 1
+fi
+
+if [ "$ARCH" = "x86" ] || [ "$ARCH" = "i686" ] || [ "$ARCH" = "i586" ] \
+ || [ "$ARCH" = "i486" ] || [ "$ARCH" = "i386" ] ; then
+ ARCH="x86"
+elif [ "$ARCH" = "Power Macintosh" ] ; then
+ ARCH="ppc"
+elif [ "$ARCH" = "amd64" ] || [ "$ARCH" = "x86_64" ] ; then
+ ARCH="x86_64"
+elif [ "$ARCH" = "sun4u" ] ; then
+ ARCH="sparc"
+else
+ echo "ARCH not supported"
+ exit 1
+fi
+
+if [ "$OS" = "apple" ] ; then
+ CC="gcc -Wall"
+ OBJ="o"
+ OUTFLG="-o "
+ CPPFLAGS="$CPPFLAGS -I/System/Library/Frameworks/JavaVM.framework/Home/include"
+ CFLAGS="-fPIC -fno-common $CFLAGS"
+ LB="ar rc"
+ LD="gcc -dynamiclib"
+ LDFLAGS="$LDFLAGS -framework JavaVM"
+ LIBPREPEND="lib"
+ LIBAPPEND="-apple-"${ARCH}".jnilib"
+ FC="g95"
+ FFLAGS="$CFLAGS"
+ LAPACK_INCLUDES="-I/System/Library/Frameworks/vecLib.framework/Headers"
+ FORTRAN_LIBS="-lg95 -Wl,-single_module"
+ MKL_LIBS="-framework veclib"
+elif [ "$OS" = "linux" ] ; then
+ MKL_ROOT="/opt/intel/mkl"
+ JAVA_HOME="/usr/java/default"
+ CUDA_HOME="/usr/local/cuda"
+ JCUDA_HOME="/home/jfc/code/JCUDA5"
+ CC="icc"
+ GCC="gcc"
+ NVCC="nvcc"
+ NVCCFLAGS="-c -arch=compute_20 -code=sm_20,sm_30 --machine 64 -Xcompiler \"-fPIC -c -O2 -DNDEBUG\""
+ SUBLIB=linux64
+ OBJ="o"
+ OUTFLG="-o "
+ CPPFLAGS="$CPPFLAGS -I$JAVA_HOME/include -I$JAVA_HOME/include/linux -I$MKL_ROOT/include \
+ -I$MKL_ROOT/include/intel64/lp64 -I$JCUDA_HOME/CommonJNI/src -I$CUDA_HOME/include"
+ CFLAGS="-fPIC -c -O2 -DNDEBUG -std=c99 $CFLAGS"
+ LB="ar rc"
+ GLD="gcc -shared"
+ LD="icc -shared -static-intel"
+ LDFLAGS="$LDFLAGS"
+ LIBPREPEND="lib"
+ LIBAPPEND=".so"
+ FC="gfortran"
+ FFLAGS="$CFLAGS"
+ LAPACK_INCLUDES=""
+ FORTRAN_LIBS="-lgfortran"
+ if [ "$PARLIB" = "threaded" ] ; then
+ MKL_LIBS="-L$JAVA_HOME/lib -L/opt/intel/composerxe/lib/intel64 $MKL_ROOT/lib/intel64/libmkl_intel_lp64.a -Wl,--start-group \
+ $MKL_ROOT/lib/intel64/libmkl_intel_thread.a $MKL_ROOT/lib/intel64/libmkl_core.a \
+ -Wl,--end-group -liomp5 -lpthread -lm"
+ else
+ MKL_LIBS="-L$JAVA_HOME/lib -L/opt/intel/composerxe/lib/intel64 $MKL_ROOT/lib/intel64/libmkl_intel_lp64.a -Wl,--start-group \
+ $MKL_ROOT/lib/intel64/libmkl_sequential.a $MKL_ROOT/lib/intel64/libmkl_core.a \
+ -Wl,--end-group -liomp5 -lpthread -lm"
+ fi
+ CUDA_LIBS="-L${CUDA_HOME}/lib64 -L${JCUDA_HOME}/lib -lcudart -lCommonJNI"
+elif [ "$OS" = "windows" ] ; then
+ MKL_ROOT="c:/Intel/MKL"
+# JAVA_HOME=""
+# CUDA_HOME="C:/Progra~1/NVIDIA~2/CUDA/v4.2"
+# JCUDA_HOME="/code/JCUDA"
+ CUDA_HOME="C:/Progra~1/NVIDIA~2/CUDA/v5.0"
+ JCUDA_HOME="/code/JCUDA5"
+ JAVA_HOME="C:/Progra~1/Java/jdk1.6.0_29"
+ CC="icl"
+ GCC="icl"
+ NVCC="nvcc"
+ SUBLIB=win64
+ OBJ="obj"
+ OUTFLG="/OUT:"
+ CPPFLAGS=""
+ NVCCFLAGS="-c -arch=compute_20 -code=sm_20,sm_30 --machine 64 -Xcompiler \"/EHsc /W3 /nologo /O2 /Zi /MT\""
+# NVCCFLAGS="-c -arch=compute_30 -code=sm_30 --machine 64 -Xcompiler \"/EHsc /W3 /nologo /O2 /Zi /MT\""
+ CFLAGS="/c /MT /DNDEBUG /O2 /Qstd=c99 $CFLAGS" # static linking
+# CFLAGS="/c /MT /DMKL_ILP64 /DNDEBUG /O2 /Qstd=c99 $CFLAGS" # static link, 64bit ints
+ LB="lib"
+ LD="link"
+ GLD="link"
+ LDFLAGS="/DLL /MACHINE:AMD64 $LDFLAGS"
+ LIBPREPEND=""
+ LIBAPPEND=".dll"
+ FC="ifort"
+ FFLAGS="-c $FFLAGS"
+ LAPACK_INCLUDES=""
+ FORTRAN_LIBS=""
+ if [ "$PARLIB" = "threaded" ] ; then
+# MKL_LIBS="mkl_intel_lp64_dll.lib mkl_intel_thread_dll.lib mkl_core_dll.lib" # threaded, 32bit ints, dll
+ MKL_LIBS="mkl_intel_lp64.lib mkl_intel_thread.lib mkl_core.lib libiomp5md.lib" # threaded, 32bit integer, static link
+ else
+# MKL_LIBS="mkl_intel_lp64_dll.lib mkl_sequential_dll.lib mkl_core_dll.lib" # sequential, 32bit ints, dll
+ MKL_LIBS="mkl_intel_lp64.lib mkl_sequential.lib mkl_core.lib libiomp5md.lib" # sequential, 32bit int, static link
+ fi
+# MKL_LIBS="mkl_intel_ilp64.lib mkl_intel_thread.lib mkl_core.lib libiomp5md.lib" # threaded, 64bit integer, static link
+# MKL_LIBS="mkl_intel_ilp64.lib mkl_sequential.lib mkl_core.lib libiomp5md.lib" # sequential, 64bit integer, static link
+ CUDA_LIBS="cudart.lib CommonJNI.lib"
+ LIB="$MKL_ROOT/mkl/lib/intel64;$MKL_ROOT/compiler/lib/intel64;$JAVA_HOME/lib;$CUDA_HOME/lib/x64;$JCUDA_HOME/lib;$LIB"
+ INCLUDE="$JAVA_HOME/include;$JAVA_HOME/include/win32;c:/Intel/MKL/mkl/include;c:/codeh/BIDMat/jni/include;$JCUDA_HOME/CommonJNI/src;$CUDA_HOME/include;$INCLUDE"
+else
+ echo "OS not supported"
+ exit 1
+fi
+
+echo "Creating config for $OS $ARCH"
+
+echo "CC=$CC" > Makefile.incl
+echo "GCC=$GCC" >> Makefile.incl
+echo "NVCC=$NVCC" >> Makefile.incl
+echo "NVCCFLAGS=$NVCCFLAGS" >> Makefile.incl
+echo "SUBLIB=$SUBLIB" >> Makefile.incl
+echo "OBJ=$OBJ" >> Makefile.incl
+echo "OUTFLG=$OUTFLG" >> Makefile.incl
+echo "CPPFLAGS=$CPPFLAGS" >> Makefile.incl
+echo "CFLAGS=$CFLAGS" >> Makefile.incl
+echo "LB=$LB" >> Makefile.incl
+echo "LD=$LD" >> Makefile.incl
+echo "GLD=$GLD" >> Makefile.incl
+echo "LDFLAGS=$LDFLAGS" >> Makefile.incl
+echo "LIBPREPEND=$LIBPREPEND" >> Makefile.incl
+echo "LIBAPPEND=$LIBAPPEND" >> Makefile.incl
+echo "LAPACK_INCLUDES=$LAPACK_INCLUDES" >> Makefile.incl
+echo "MKL_LIBS=$MKL_LIBS" >> Makefile.incl
+echo "CUDA_LIBS=$CUDA_LIBS" >> Makefile.incl
+echo "FORTRAN_LIBS=$FORTRAN_LIBS" >> Makefile.incl
+echo "FC=$FC" >> Makefile.incl
+echo "FFLAGS=$FFLAGS" >> Makefile.incl
+echo "LIB=$LIB" >> Makefile.incl
+echo "INCLUDE=$INCLUDE" >> Makefile.incl
+echo "JCUDA_COMMON=$JCUDA_HOME/CommonJNI/src" >> Makefile.incl
+
+
diff --git a/lib/HDF5_Copyright.html b/lib/HDF5_Copyright.html
new file mode 100644
index 00000000..07a71f45
--- /dev/null
+++ b/lib/HDF5_Copyright.html
@@ -0,0 +1,160 @@
+
+
+
+ HDF5 Copyright Notice and License Terms
+
+
+
+
+
+
+
+
+
+Copyright Notice and License Terms for
+
+HDF5 (Hierarchical Data Format 5) Software Library and Utilities
+
+
+
+
+HDF5 (Hierarchical Data Format 5) Software Library and Utilities
+
+Copyright 2006-2012 by The HDF Group.
+
+NCSA HDF5 (Hierarchical Data Format 5) Software Library and Utilities
+
+Copyright 1998-2006 by the Board of Trustees of the University of Illinois.
+
+All rights reserved.
+
+
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted for any purpose (including commercial purposes)
+provided that the following conditions are met:
+
+
+
+-
+Redistributions of source code must retain the above copyright notice,
+this list of conditions, and the following disclaimer.
+
+
-
+Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions, and the following disclaimer in the documentation
+and/or materials provided with the distribution.
+
+
-
+In addition, redistributions of modified forms of the source or binary code
+must carry prominent notices stating that the original code was changed and
+the date of the change.
+
+
-
+All publications or advertising materials mentioning features or use of this
+software are asked, but not required, to acknowledge that it was developed
+by The HDF Group and by the National Center for Supercomputing Applications
+at the University of Illinois at Urbana-Champaign and credit the contributors.
+
+
-
+Neither the name of The HDF Group, the name of the University, nor the name
+of any Contributor may be used to endorse or promote products derived from
+this software without specific prior written permission from The HDF Group,
+the University, or the Contributor, respectively.
+
+
+
+DISCLAIMER:
+THIS SOFTWARE IS PROVIDED BY THE HDF GROUP AND THE CONTRIBUTORS
+"AS IS" WITH NO WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED.
+In no event shall The HDF Group or the Contributors be liable for any damages
+suffered by the users arising out of the use of this software, even if advised
+of the possibility of such damage.
+
+
+
+
+
+
+Contributors: National Center for Supercomputing Applications (NCSA) at
+the University of Illinois, Fortner Software, Unidata Program Center (netCDF),
+The Independent JPEG Group (JPEG), Jean-loup Gailly and Mark Adler (gzip),
+and Digital Equipment Corporation (DEC).
+
+
+
+
+Portions of HDF5 were developed with support from the Lawrence Berkeley
+National Laboratory (LBNL) and the United States Department of Energy
+under Prime Contract No. DE-AC02-05CH11231.
+
+
+
+
+Portions of HDF5 were developed with support from the University of
+California, Lawrence Livermore National Laboratory (UC LLNL).
+The following statement applies to those portions of the product and must
+be retained in any redistribution of source code, binaries, documentation,
+and/or accompanying materials:
+
+ This work was partially produced at the University of California,
+ Lawrence Livermore National Laboratory (UC LLNL) under contract
+ no. W-7405-ENG-48 (Contract 48) between the U.S. Department of
+ Energy (DOE) and The Regents of the University of California
+ (University) for the operation of UC LLNL.
+
+ DISCLAIMER:
+ This work was prepared as an account of work sponsored by an agency
+ of the United States Government. Neither the United States Government
+ nor the University of California nor any of their employees, makes
+ any warranty, express or implied, or assumes any liability or
+ responsibility for the accuracy, completeness, or usefulness of any
+ information, apparatus, product, or process disclosed, or represents
+ that its use would not infringe privately- owned rights. Reference
+ herein to any specific commercial products, process, or service by
+ trade name, trademark, manufacturer, or otherwise, does not
+ necessarily constitute or imply its endorsement, recommendation, or
+ favoring by the United States Government or the University of
+ California. The views and opinions of authors expressed herein do not
+ necessarily state or reflect those of the United States Government or
+ the University of California, and shall not be used for advertising
+ or product endorsement purposes.
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The HDF Group Help Desk:
+
+ Describes HDF5 Release 1.8.9, May 2012.
+
+ | |
+
+ Copyright by
+ The HDF Group
+
+ and the Board of Trustees of the University of Illinois
+ |
+
+
+
+Last modified: 5 March 2012
+
+
+
+
+
+
+
diff --git a/lib/JCUDA_Copyright.txt b/lib/JCUDA_Copyright.txt
new file mode 100644
index 00000000..a47ba681
--- /dev/null
+++ b/lib/JCUDA_Copyright.txt
@@ -0,0 +1,24 @@
+JCuda - Java bindings for NVIDIA CUDA
+
+Copyright (c) 2008-2012 Marco Hutter - http://www.jcuda.org
+
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated documentation
+files (the "Software"), to deal in the Software without
+restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
diff --git a/lib/PtPlot_Copyright.txt b/lib/PtPlot_Copyright.txt
new file mode 100755
index 00000000..7da2f50e
--- /dev/null
+++ b/lib/PtPlot_Copyright.txt
@@ -0,0 +1,27 @@
+Below is the copyright agreement for the Ptolemy II system.
+Version: $Id: copyright.txt 57469 2010-03-10 22:04:46Z cxh $
+
+Copyright (c) 1995-2010 The Regents of the University of California.
+All rights reserved.
+
+Permission is hereby granted, without written agreement and without
+license or royalty fees, to use, copy, modify, and distribute this
+software and its documentation for any purpose, provided that the above
+copyright notice and the following two paragraphs appear in all copies
+of this software.
+
+IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
+THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
+
+THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
+INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE
+PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF
+CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+ENHANCEMENTS, OR MODIFICATIONS.
+
+Ptolemy II includes the work of others, to see those copyrights, follow
+the copyright link on the splash page or see copyright.htm.
diff --git a/lib/bidmat_init.scala b/lib/bidmat_init.scala
new file mode 100755
index 00000000..94a40ca5
--- /dev/null
+++ b/lib/bidmat_init.scala
@@ -0,0 +1,7 @@
+import BIDMat.{Mat, FMat, DMat, IMat, CMat, BMat, CSMat, SMat, SDMat, GMat, GIMat, GSMat, HMat}
+import BIDMat.MatFunctions._
+import BIDMat.SciFunctions._
+import BIDMat.Solvers._
+import BIDMat.Plotting._
+
+Mat.checkCUDA
diff --git a/lib/jcublas-0.4.2.jar b/lib/jcublas-0.4.2.jar
new file mode 100644
index 00000000..8183047a
Binary files /dev/null and b/lib/jcublas-0.4.2.jar differ
diff --git a/lib/jcublas-0.5.0RC.jar b/lib/jcublas-0.5.0RC.jar
new file mode 100755
index 00000000..139ed978
Binary files /dev/null and b/lib/jcublas-0.5.0RC.jar differ
diff --git a/lib/jcuda-0.4.2.jar b/lib/jcuda-0.4.2.jar
new file mode 100644
index 00000000..6b73cf5b
Binary files /dev/null and b/lib/jcuda-0.4.2.jar differ
diff --git a/lib/jcuda-0.5.0RC.jar b/lib/jcuda-0.5.0RC.jar
new file mode 100755
index 00000000..b0e89f1a
Binary files /dev/null and b/lib/jcuda-0.5.0RC.jar differ
diff --git a/lib/jcudpp-0.4.2.jar b/lib/jcudpp-0.4.2.jar
new file mode 100644
index 00000000..3c329bf0
Binary files /dev/null and b/lib/jcudpp-0.4.2.jar differ
diff --git a/lib/jcufft-0.4.2.jar b/lib/jcufft-0.4.2.jar
new file mode 100644
index 00000000..4f55ba01
Binary files /dev/null and b/lib/jcufft-0.4.2.jar differ
diff --git a/lib/jcufft-0.5.0RC.jar b/lib/jcufft-0.5.0RC.jar
new file mode 100755
index 00000000..5a26a2e4
Binary files /dev/null and b/lib/jcufft-0.5.0RC.jar differ
diff --git a/lib/jcurand-0.4.2.jar b/lib/jcurand-0.4.2.jar
new file mode 100644
index 00000000..69b674ca
Binary files /dev/null and b/lib/jcurand-0.4.2.jar differ
diff --git a/lib/jcurand-0.5.0RC.jar b/lib/jcurand-0.5.0RC.jar
new file mode 100755
index 00000000..1399969d
Binary files /dev/null and b/lib/jcurand-0.5.0RC.jar differ
diff --git a/lib/jcusparse-0.4.2.jar b/lib/jcusparse-0.4.2.jar
new file mode 100644
index 00000000..c58917db
Binary files /dev/null and b/lib/jcusparse-0.4.2.jar differ
diff --git a/lib/jcusparse-0.5.0RC.jar b/lib/jcusparse-0.5.0RC.jar
new file mode 100755
index 00000000..80be5937
Binary files /dev/null and b/lib/jcusparse-0.5.0RC.jar differ
diff --git a/lib/jhdf5.jar b/lib/jhdf5.jar
new file mode 100644
index 00000000..9d15b7d1
Binary files /dev/null and b/lib/jhdf5.jar differ
diff --git a/lib/linux64/HDF5_Copyright.html b/lib/linux64/HDF5_Copyright.html
new file mode 100755
index 00000000..07a71f45
--- /dev/null
+++ b/lib/linux64/HDF5_Copyright.html
@@ -0,0 +1,160 @@
+
+
+
+ HDF5 Copyright Notice and License Terms
+
+
+
+
+
+
+
+
+
+Copyright Notice and License Terms for
+
+HDF5 (Hierarchical Data Format 5) Software Library and Utilities
+
+
+
+
+HDF5 (Hierarchical Data Format 5) Software Library and Utilities
+
+Copyright 2006-2012 by The HDF Group.
+
+NCSA HDF5 (Hierarchical Data Format 5) Software Library and Utilities
+
+Copyright 1998-2006 by the Board of Trustees of the University of Illinois.
+
+All rights reserved.
+
+
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted for any purpose (including commercial purposes)
+provided that the following conditions are met:
+
+
+
+-
+Redistributions of source code must retain the above copyright notice,
+this list of conditions, and the following disclaimer.
+
+
-
+Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions, and the following disclaimer in the documentation
+and/or materials provided with the distribution.
+
+
-
+In addition, redistributions of modified forms of the source or binary code
+must carry prominent notices stating that the original code was changed and
+the date of the change.
+
+
-
+All publications or advertising materials mentioning features or use of this
+software are asked, but not required, to acknowledge that it was developed
+by The HDF Group and by the National Center for Supercomputing Applications
+at the University of Illinois at Urbana-Champaign and credit the contributors.
+
+
-
+Neither the name of The HDF Group, the name of the University, nor the name
+of any Contributor may be used to endorse or promote products derived from
+this software without specific prior written permission from The HDF Group,
+the University, or the Contributor, respectively.
+
+
+
+DISCLAIMER:
+THIS SOFTWARE IS PROVIDED BY THE HDF GROUP AND THE CONTRIBUTORS
+"AS IS" WITH NO WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED.
+In no event shall The HDF Group or the Contributors be liable for any damages
+suffered by the users arising out of the use of this software, even if advised
+of the possibility of such damage.
+
+
+
+
+
+
+Contributors: National Center for Supercomputing Applications (NCSA) at
+the University of Illinois, Fortner Software, Unidata Program Center (netCDF),
+The Independent JPEG Group (JPEG), Jean-loup Gailly and Mark Adler (gzip),
+and Digital Equipment Corporation (DEC).
+
+
+
+
+Portions of HDF5 were developed with support from the Lawrence Berkeley
+National Laboratory (LBNL) and the United States Department of Energy
+under Prime Contract No. DE-AC02-05CH11231.
+
+
+
+
+Portions of HDF5 were developed with support from the University of
+California, Lawrence Livermore National Laboratory (UC LLNL).
+The following statement applies to those portions of the product and must
+be retained in any redistribution of source code, binaries, documentation,
+and/or accompanying materials:
+
+ This work was partially produced at the University of California,
+ Lawrence Livermore National Laboratory (UC LLNL) under contract
+ no. W-7405-ENG-48 (Contract 48) between the U.S. Department of
+ Energy (DOE) and The Regents of the University of California
+ (University) for the operation of UC LLNL.
+
+ DISCLAIMER:
+ This work was prepared as an account of work sponsored by an agency
+ of the United States Government. Neither the United States Government
+ nor the University of California nor any of their employees, makes
+ any warranty, express or implied, or assumes any liability or
+ responsibility for the accuracy, completeness, or usefulness of any
+ information, apparatus, product, or process disclosed, or represents
+ that its use would not infringe privately- owned rights. Reference
+ herein to any specific commercial products, process, or service by
+ trade name, trademark, manufacturer, or otherwise, does not
+ necessarily constitute or imply its endorsement, recommendation, or
+ favoring by the United States Government or the University of
+ California. The views and opinions of authors expressed herein do not
+ necessarily state or reflect those of the United States Government or
+ the University of California, and shall not be used for advertising
+ or product endorsement purposes.
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The HDF Group Help Desk:
+
+ Describes HDF5 Release 1.8.9, May 2012.
+
+ | |
+
+ Copyright by
+ The HDF Group
+
+ and the Board of Trustees of the University of Illinois
+ |
+
+
+
+Last modified: 5 March 2012
+
+
+
+
+
+
+
diff --git a/lib/linux64/JCUDA4.2/libJCublas-linux-x86_64.so b/lib/linux64/JCUDA4.2/libJCublas-linux-x86_64.so
new file mode 100755
index 00000000..50637794
Binary files /dev/null and b/lib/linux64/JCUDA4.2/libJCublas-linux-x86_64.so differ
diff --git a/lib/linux64/JCUDA4.2/libJCublas2-linux-x86_64.so b/lib/linux64/JCUDA4.2/libJCublas2-linux-x86_64.so
new file mode 100755
index 00000000..ff0797ba
Binary files /dev/null and b/lib/linux64/JCUDA4.2/libJCublas2-linux-x86_64.so differ
diff --git a/lib/linux64/JCUDA4.2/libJCudaDriver-linux-x86_64.so b/lib/linux64/JCUDA4.2/libJCudaDriver-linux-x86_64.so
new file mode 100755
index 00000000..d197de2d
Binary files /dev/null and b/lib/linux64/JCUDA4.2/libJCudaDriver-linux-x86_64.so differ
diff --git a/lib/linux64/JCUDA4.2/libJCudaRuntime-linux-x86_64.so b/lib/linux64/JCUDA4.2/libJCudaRuntime-linux-x86_64.so
new file mode 100755
index 00000000..9a5aa6a0
Binary files /dev/null and b/lib/linux64/JCUDA4.2/libJCudaRuntime-linux-x86_64.so differ
diff --git a/lib/linux64/JCUDA4.2/libJCufft-linux-x86_64.so b/lib/linux64/JCUDA4.2/libJCufft-linux-x86_64.so
new file mode 100755
index 00000000..750b0f6b
Binary files /dev/null and b/lib/linux64/JCUDA4.2/libJCufft-linux-x86_64.so differ
diff --git a/lib/linux64/JCUDA4.2/libJCurand-linux-x86_64.so b/lib/linux64/JCUDA4.2/libJCurand-linux-x86_64.so
new file mode 100755
index 00000000..5db8c4ed
Binary files /dev/null and b/lib/linux64/JCUDA4.2/libJCurand-linux-x86_64.so differ
diff --git a/lib/linux64/JCUDA4.2/libJCusparse-linux-x86_64.so b/lib/linux64/JCUDA4.2/libJCusparse-linux-x86_64.so
new file mode 100755
index 00000000..215ebae6
Binary files /dev/null and b/lib/linux64/JCUDA4.2/libJCusparse-linux-x86_64.so differ
diff --git a/lib/linux64/JCUDA4.2/libJCusparse2-linux-x86_64.so b/lib/linux64/JCUDA4.2/libJCusparse2-linux-x86_64.so
new file mode 100755
index 00000000..b20485ce
Binary files /dev/null and b/lib/linux64/JCUDA4.2/libJCusparse2-linux-x86_64.so differ
diff --git a/lib/linux64/JCUDA4.2/libbidmatcuda.so b/lib/linux64/JCUDA4.2/libbidmatcuda.so
new file mode 100755
index 00000000..1b2c0e0b
Binary files /dev/null and b/lib/linux64/JCUDA4.2/libbidmatcuda.so differ
diff --git a/lib/linux64/JCUDA5.0/libJCublas-linux-x86_64.so b/lib/linux64/JCUDA5.0/libJCublas-linux-x86_64.so
new file mode 100755
index 00000000..cf3aeb39
Binary files /dev/null and b/lib/linux64/JCUDA5.0/libJCublas-linux-x86_64.so differ
diff --git a/lib/linux64/JCUDA5.0/libJCublas2-linux-x86_64.so b/lib/linux64/JCUDA5.0/libJCublas2-linux-x86_64.so
new file mode 100755
index 00000000..11d1ee13
Binary files /dev/null and b/lib/linux64/JCUDA5.0/libJCublas2-linux-x86_64.so differ
diff --git a/lib/linux64/JCUDA5.0/libJCudaDriver-linux-x86_64.so b/lib/linux64/JCUDA5.0/libJCudaDriver-linux-x86_64.so
new file mode 100755
index 00000000..bd6dfa53
Binary files /dev/null and b/lib/linux64/JCUDA5.0/libJCudaDriver-linux-x86_64.so differ
diff --git a/lib/linux64/JCUDA5.0/libJCudaRuntime-linux-x86_64.so b/lib/linux64/JCUDA5.0/libJCudaRuntime-linux-x86_64.so
new file mode 100755
index 00000000..6bfbdbcf
Binary files /dev/null and b/lib/linux64/JCUDA5.0/libJCudaRuntime-linux-x86_64.so differ
diff --git a/lib/linux64/JCUDA5.0/libJCufft-linux-x86_64.so b/lib/linux64/JCUDA5.0/libJCufft-linux-x86_64.so
new file mode 100755
index 00000000..90499083
Binary files /dev/null and b/lib/linux64/JCUDA5.0/libJCufft-linux-x86_64.so differ
diff --git a/lib/linux64/JCUDA5.0/libJCurand-linux-x86_64.so b/lib/linux64/JCUDA5.0/libJCurand-linux-x86_64.so
new file mode 100755
index 00000000..396e9274
Binary files /dev/null and b/lib/linux64/JCUDA5.0/libJCurand-linux-x86_64.so differ
diff --git a/lib/linux64/JCUDA5.0/libJCusparse-linux-x86_64.so b/lib/linux64/JCUDA5.0/libJCusparse-linux-x86_64.so
new file mode 100755
index 00000000..d6b8b827
Binary files /dev/null and b/lib/linux64/JCUDA5.0/libJCusparse-linux-x86_64.so differ
diff --git a/lib/linux64/JCUDA5.0/libJCusparse2-linux-x86_64.so b/lib/linux64/JCUDA5.0/libJCusparse2-linux-x86_64.so
new file mode 100755
index 00000000..917b3bba
Binary files /dev/null and b/lib/linux64/JCUDA5.0/libJCusparse2-linux-x86_64.so differ
diff --git a/lib/linux64/JCUDA5.0/libbidmatcuda.so b/lib/linux64/JCUDA5.0/libbidmatcuda.so
new file mode 100755
index 00000000..cfc001f0
Binary files /dev/null and b/lib/linux64/JCUDA5.0/libbidmatcuda.so differ
diff --git a/lib/linux64/JCUDA_Copyright.txt b/lib/linux64/JCUDA_Copyright.txt
new file mode 100755
index 00000000..a47ba681
--- /dev/null
+++ b/lib/linux64/JCUDA_Copyright.txt
@@ -0,0 +1,24 @@
+JCuda - Java bindings for NVIDIA CUDA
+
+Copyright (c) 2008-2012 Marco Hutter - http://www.jcuda.org
+
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated documentation
+files (the "Software"), to deal in the Software without
+restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
diff --git a/lib/linux64/libbidmatmkl.so b/lib/linux64/libbidmatmkl.so
new file mode 100755
index 00000000..c9c0e293
Binary files /dev/null and b/lib/linux64/libbidmatmkl.so differ
diff --git a/lib/linux64/libhdf4.settings b/lib/linux64/libhdf4.settings
new file mode 100644
index 00000000..6f6e7cb6
--- /dev/null
+++ b/lib/linux64/libhdf4.settings
@@ -0,0 +1,35 @@
+ SUMMARY OF THE HDF4 CONFIGURATION
+ =================================
+
+General Information:
+-------------------
+ HDF4 Version: 4.2.6-post2
+ Configured on: Tue Dec 13 17:07:13 CST 2011
+ Configured by: hdftest@koala
+ Configure mode: production
+ Host system: x86_64-unknown-linux-gnu
+ Uname information: Linux koala 2.6.18-274.12.1.el5 #1 SMP Tue Nov 29 13:37:46 EST 2011 x86_64 x86_64 x86_64 GNU/Linux
+ Libraries:
+ Installation point: /mnt/scr1/pre-release/hdf4/vdev/koalajava
+
+Compiling Options:
+------------------
+ Compilation Mode: production
+ C compiler: /usr/bin/gcc ( gcc (GCC) 4.1.2 20080704 )
+ CFLAGS: -fPIC -O3 -fomit-frame-pointer
+ CPPFLAGS: -I/usr/include/rpc -I/mnt/hdf/packages/jpeg-PIC/Linux2.6-x86_64-gcc/include -I/mnt/hdf/packages/szip/static/encoder/Linux2.6-x86_64-gcc/include -DBIG_LONGS -DSWAP
+ Shared Libraries: no
+ Static Libraries: yes
+ LDFLAGS: -L/mnt/hdf/packages/jpeg-PIC/Linux2.6-x86_64-gcc/lib -L/mnt/hdf/packages/szip/static/encoder/Linux2.6-x86_64-gcc/lib
+ Extra libraries: -lsz -ljpeg -lz -lm
+ Archiver: ar
+ Ranlib: ranlib
+
+Languages:
+----------
+ Fortran: no
+
+Features:
+---------
+ SZIP compression: enabled with encoder
+ Support for netCDF API 2.3.2: yes
diff --git a/lib/linux64/libhdf5.settings b/lib/linux64/libhdf5.settings
new file mode 100644
index 00000000..afaae7fa
--- /dev/null
+++ b/lib/linux64/libhdf5.settings
@@ -0,0 +1,62 @@
+ SUMMARY OF THE HDF5 CONFIGURATION
+ =================================
+
+General Information:
+-------------------
+ HDF5 Version: 1.8.8
+ Configured on: Wed Nov 16 17:48:07 CST 2011
+ Configured by: hdftest@koala
+ Configure mode: production
+ Host system: x86_64-unknown-linux-gnu
+ Uname information: Linux koala 2.6.18-274.7.1.el5 #1 SMP Thu Oct 20 16:21:01 EDT 2011 x86_64 x86_64 x86_64 GNU/Linux
+ Byte sex: little-endian
+ Libraries:
+ Installation point: /mnt/scr1/pre-release/hdf5/v188/koalajava
+
+Compiling Options:
+------------------
+ Compilation Mode: production
+ C Compiler: /usr/bin/ gcc -fPIC ( gcc (GCC) 4.1.2 20080704 )
+ CFLAGS:
+ H5_CFLAGS: -std=c99 -pedantic -Wall -Wextra -Wundef -Wshadow -Wpointer-arith -Wbad-function-cast -Wcast-qual -Wcast-align -Wwrite-strings -Wconversion -Waggregate-return -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations -Wredundant-decls -Wnested-externs -Winline -Wno-long-long -Wfloat-equal -Wmissing-format-attribute -Wmissing-noreturn -Wpacked -Wdisabled-optimization -Wformat=2 -Wunreachable-code -Wendif-labels -Wdeclaration-after-statement -Wold-style-definition -Winvalid-pch -Wvariadic-macros -Wnonnull -Winit-self -Wmissing-include-dirs -Wswitch-default -Wswitch-enum -Wunused-macros -Wunsafe-loop-optimizations -Wc++-compat -Wvolatile-register-var -O3 -fomit-frame-pointer -finline-functions
+ AM_CFLAGS:
+ CPPFLAGS:
+ H5_CPPFLAGS: -D_POSIX_C_SOURCE=199506L -DNDEBUG -UH5_DEBUG_API
+ AM_CPPFLAGS: -I/mnt/hdf/packages/szip-PIC/static/encoder/Linux2.6-x86_64-gcc/include -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_BSD_SOURCE
+ Shared C Library: no
+ Static C Library: yes
+ Statically Linked Executables: no
+ LDFLAGS:
+ H5_LDFLAGS:
+ AM_LDFLAGS: -L/mnt/hdf/packages/szip-PIC/static/encoder/Linux2.6-x86_64-gcc/lib
+ Extra libraries: -lsz -lz -lrt -lm
+ Archiver: ar
+ Ranlib: ranlib
+ Debugged Packages:
+ API Tracing: no
+
+Languages:
+----------
+ Fortran: no
+
+ C++: no
+
+Features:
+---------
+ Parallel HDF5: no
+ High Level library: yes
+ Threadsafety: no
+ Default API Mapping: v18
+ With Deprecated Public Symbols: yes
+ I/O filters (external): deflate(zlib),szip(encoder)
+ I/O filters (internal): shuffle,fletcher32,nbit,scaleoffset
+ MPE: no
+ Direct VFD: no
+ dmalloc: no
+Clear file buffers before write: yes
+ Using memory checker: no
+ Function Stack Tracing: no
+ GPFS: no
+ Strict File Format Checks: no
+ Optimization Instrumentation: no
+ Large File Support (LFS): yes
diff --git a/lib/linux64/libiomp5.so b/lib/linux64/libiomp5.so
new file mode 100755
index 00000000..3b9e7257
Binary files /dev/null and b/lib/linux64/libiomp5.so differ
diff --git a/lib/linux64/libjhdf.so b/lib/linux64/libjhdf.so
new file mode 100755
index 00000000..ff6304c3
Binary files /dev/null and b/lib/linux64/libjhdf.so differ
diff --git a/lib/linux64/libjhdf5.so b/lib/linux64/libjhdf5.so
new file mode 100755
index 00000000..c3dcb2d3
Binary files /dev/null and b/lib/linux64/libjhdf5.so differ
diff --git a/lib/ptplot.jar b/lib/ptplot.jar
new file mode 100644
index 00000000..9582f1cb
Binary files /dev/null and b/lib/ptplot.jar differ
diff --git a/lib/ptplotapplication.jar b/lib/ptplotapplication.jar
new file mode 100755
index 00000000..cc32dd0c
Binary files /dev/null and b/lib/ptplotapplication.jar differ
diff --git a/lib/win64/HDF5_Copyright.html b/lib/win64/HDF5_Copyright.html
new file mode 100755
index 00000000..07a71f45
--- /dev/null
+++ b/lib/win64/HDF5_Copyright.html
@@ -0,0 +1,160 @@
+
+
+
+ HDF5 Copyright Notice and License Terms
+
+
+
+
+
+
+
+
+
+Copyright Notice and License Terms for
+
+HDF5 (Hierarchical Data Format 5) Software Library and Utilities
+
+
+
+
+HDF5 (Hierarchical Data Format 5) Software Library and Utilities
+
+Copyright 2006-2012 by The HDF Group.
+
+NCSA HDF5 (Hierarchical Data Format 5) Software Library and Utilities
+
+Copyright 1998-2006 by the Board of Trustees of the University of Illinois.
+
+All rights reserved.
+
+
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted for any purpose (including commercial purposes)
+provided that the following conditions are met:
+
+
+
+-
+Redistributions of source code must retain the above copyright notice,
+this list of conditions, and the following disclaimer.
+
+
-
+Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions, and the following disclaimer in the documentation
+and/or materials provided with the distribution.
+
+
-
+In addition, redistributions of modified forms of the source or binary code
+must carry prominent notices stating that the original code was changed and
+the date of the change.
+
+
-
+All publications or advertising materials mentioning features or use of this
+software are asked, but not required, to acknowledge that it was developed
+by The HDF Group and by the National Center for Supercomputing Applications
+at the University of Illinois at Urbana-Champaign and credit the contributors.
+
+
-
+Neither the name of The HDF Group, the name of the University, nor the name
+of any Contributor may be used to endorse or promote products derived from
+this software without specific prior written permission from The HDF Group,
+the University, or the Contributor, respectively.
+
+
+
+DISCLAIMER:
+THIS SOFTWARE IS PROVIDED BY THE HDF GROUP AND THE CONTRIBUTORS
+"AS IS" WITH NO WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED.
+In no event shall The HDF Group or the Contributors be liable for any damages
+suffered by the users arising out of the use of this software, even if advised
+of the possibility of such damage.
+
+
+
+
+
+
+Contributors: National Center for Supercomputing Applications (NCSA) at
+the University of Illinois, Fortner Software, Unidata Program Center (netCDF),
+The Independent JPEG Group (JPEG), Jean-loup Gailly and Mark Adler (gzip),
+and Digital Equipment Corporation (DEC).
+
+
+
+
+Portions of HDF5 were developed with support from the Lawrence Berkeley
+National Laboratory (LBNL) and the United States Department of Energy
+under Prime Contract No. DE-AC02-05CH11231.
+
+
+
+
+Portions of HDF5 were developed with support from the University of
+California, Lawrence Livermore National Laboratory (UC LLNL).
+The following statement applies to those portions of the product and must
+be retained in any redistribution of source code, binaries, documentation,
+and/or accompanying materials:
+
+ This work was partially produced at the University of California,
+ Lawrence Livermore National Laboratory (UC LLNL) under contract
+ no. W-7405-ENG-48 (Contract 48) between the U.S. Department of
+ Energy (DOE) and The Regents of the University of California
+ (University) for the operation of UC LLNL.
+
+ DISCLAIMER:
+ This work was prepared as an account of work sponsored by an agency
+ of the United States Government. Neither the United States Government
+ nor the University of California nor any of their employees, makes
+ any warranty, express or implied, or assumes any liability or
+ responsibility for the accuracy, completeness, or usefulness of any
+ information, apparatus, product, or process disclosed, or represents
+ that its use would not infringe privately- owned rights. Reference
+ herein to any specific commercial products, process, or service by
+ trade name, trademark, manufacturer, or otherwise, does not
+ necessarily constitute or imply its endorsement, recommendation, or
+ favoring by the United States Government or the University of
+ California. The views and opinions of authors expressed herein do not
+ necessarily state or reflect those of the United States Government or
+ the University of California, and shall not be used for advertising
+ or product endorsement purposes.
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The HDF Group Help Desk:
+
+ Describes HDF5 Release 1.8.9, May 2012.
+
+ | |
+
+ Copyright by
+ The HDF Group
+
+ and the Board of Trustees of the University of Illinois
+ |
+
+
+
+Last modified: 5 March 2012
+
+
+
+
+
+
+
diff --git a/lib/win64/JCUDA4.2/JCublas-windows-x86_64.dll b/lib/win64/JCUDA4.2/JCublas-windows-x86_64.dll
new file mode 100755
index 00000000..812bf249
Binary files /dev/null and b/lib/win64/JCUDA4.2/JCublas-windows-x86_64.dll differ
diff --git a/lib/win64/JCUDA4.2/JCublas2-windows-x86_64.dll b/lib/win64/JCUDA4.2/JCublas2-windows-x86_64.dll
new file mode 100755
index 00000000..66d70142
Binary files /dev/null and b/lib/win64/JCUDA4.2/JCublas2-windows-x86_64.dll differ
diff --git a/lib/win64/JCUDA4.2/JCudaDriver-windows-x86_64.dll b/lib/win64/JCUDA4.2/JCudaDriver-windows-x86_64.dll
new file mode 100755
index 00000000..5993832c
Binary files /dev/null and b/lib/win64/JCUDA4.2/JCudaDriver-windows-x86_64.dll differ
diff --git a/lib/win64/JCUDA4.2/JCudaRuntime-windows-x86_64.dll b/lib/win64/JCUDA4.2/JCudaRuntime-windows-x86_64.dll
new file mode 100755
index 00000000..3aba265f
Binary files /dev/null and b/lib/win64/JCUDA4.2/JCudaRuntime-windows-x86_64.dll differ
diff --git a/lib/win64/JCUDA4.2/JCufft-windows-x86_64.dll b/lib/win64/JCUDA4.2/JCufft-windows-x86_64.dll
new file mode 100755
index 00000000..7fbad0dd
Binary files /dev/null and b/lib/win64/JCUDA4.2/JCufft-windows-x86_64.dll differ
diff --git a/lib/win64/JCUDA4.2/JCurand-windows-x86_64.dll b/lib/win64/JCUDA4.2/JCurand-windows-x86_64.dll
new file mode 100755
index 00000000..f189d062
Binary files /dev/null and b/lib/win64/JCUDA4.2/JCurand-windows-x86_64.dll differ
diff --git a/lib/win64/JCUDA4.2/JCusparse-windows-x86_64.dll b/lib/win64/JCUDA4.2/JCusparse-windows-x86_64.dll
new file mode 100755
index 00000000..0f483793
Binary files /dev/null and b/lib/win64/JCUDA4.2/JCusparse-windows-x86_64.dll differ
diff --git a/lib/win64/JCUDA4.2/JCusparse2-windows-x86_64.dll b/lib/win64/JCUDA4.2/JCusparse2-windows-x86_64.dll
new file mode 100755
index 00000000..5b66a121
Binary files /dev/null and b/lib/win64/JCUDA4.2/JCusparse2-windows-x86_64.dll differ
diff --git a/lib/win64/JCUDA4.2/bidmatcuda.dll b/lib/win64/JCUDA4.2/bidmatcuda.dll
new file mode 100755
index 00000000..7a506749
Binary files /dev/null and b/lib/win64/JCUDA4.2/bidmatcuda.dll differ
diff --git a/lib/win64/JCUDA5.0/JCublas-windows-x86_64.dll b/lib/win64/JCUDA5.0/JCublas-windows-x86_64.dll
new file mode 100755
index 00000000..e8f812aa
Binary files /dev/null and b/lib/win64/JCUDA5.0/JCublas-windows-x86_64.dll differ
diff --git a/lib/win64/JCUDA5.0/JCublas2-windows-x86_64.dll b/lib/win64/JCUDA5.0/JCublas2-windows-x86_64.dll
new file mode 100755
index 00000000..bb0cff7b
Binary files /dev/null and b/lib/win64/JCUDA5.0/JCublas2-windows-x86_64.dll differ
diff --git a/lib/win64/JCUDA5.0/JCudaDriver-windows-x86_64.dll b/lib/win64/JCUDA5.0/JCudaDriver-windows-x86_64.dll
new file mode 100755
index 00000000..cc72206e
Binary files /dev/null and b/lib/win64/JCUDA5.0/JCudaDriver-windows-x86_64.dll differ
diff --git a/lib/win64/JCUDA5.0/JCudaRuntime-windows-x86_64.dll b/lib/win64/JCUDA5.0/JCudaRuntime-windows-x86_64.dll
new file mode 100755
index 00000000..35715c0d
Binary files /dev/null and b/lib/win64/JCUDA5.0/JCudaRuntime-windows-x86_64.dll differ
diff --git a/lib/win64/JCUDA5.0/JCufft-windows-x86_64.dll b/lib/win64/JCUDA5.0/JCufft-windows-x86_64.dll
new file mode 100755
index 00000000..060f337a
Binary files /dev/null and b/lib/win64/JCUDA5.0/JCufft-windows-x86_64.dll differ
diff --git a/lib/win64/JCUDA5.0/JCurand-windows-x86_64.dll b/lib/win64/JCUDA5.0/JCurand-windows-x86_64.dll
new file mode 100755
index 00000000..f248ec61
Binary files /dev/null and b/lib/win64/JCUDA5.0/JCurand-windows-x86_64.dll differ
diff --git a/lib/win64/JCUDA5.0/JCusparse-windows-x86_64.dll b/lib/win64/JCUDA5.0/JCusparse-windows-x86_64.dll
new file mode 100755
index 00000000..5ecef03b
Binary files /dev/null and b/lib/win64/JCUDA5.0/JCusparse-windows-x86_64.dll differ
diff --git a/lib/win64/JCUDA5.0/JCusparse2-windows-x86_64.dll b/lib/win64/JCUDA5.0/JCusparse2-windows-x86_64.dll
new file mode 100755
index 00000000..854747cf
Binary files /dev/null and b/lib/win64/JCUDA5.0/JCusparse2-windows-x86_64.dll differ
diff --git a/lib/win64/JCUDA5.0/bidmatcuda.dll b/lib/win64/JCUDA5.0/bidmatcuda.dll
new file mode 100755
index 00000000..37ce271e
Binary files /dev/null and b/lib/win64/JCUDA5.0/bidmatcuda.dll differ
diff --git a/lib/win64/JCUDA_Copyright.txt b/lib/win64/JCUDA_Copyright.txt
new file mode 100755
index 00000000..a47ba681
--- /dev/null
+++ b/lib/win64/JCUDA_Copyright.txt
@@ -0,0 +1,24 @@
+JCuda - Java bindings for NVIDIA CUDA
+
+Copyright (c) 2008-2012 Marco Hutter - http://www.jcuda.org
+
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated documentation
+files (the "Software"), to deal in the Software without
+restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
diff --git a/lib/win64/bidmatmkl.dll b/lib/win64/bidmatmkl.dll
new file mode 100755
index 00000000..f616fe52
Binary files /dev/null and b/lib/win64/bidmatmkl.dll differ
diff --git a/lib/win64/jhdf.dll b/lib/win64/jhdf.dll
new file mode 100755
index 00000000..fd03758e
Binary files /dev/null and b/lib/win64/jhdf.dll differ
diff --git a/lib/win64/jhdf5.dll b/lib/win64/jhdf5.dll
new file mode 100755
index 00000000..3d47abfb
Binary files /dev/null and b/lib/win64/jhdf5.dll differ
diff --git a/lib/win64/libiomp5md.dll b/lib/win64/libiomp5md.dll
new file mode 100755
index 00000000..faf9ebcc
Binary files /dev/null and b/lib/win64/libiomp5md.dll differ
diff --git a/project/plugins.sbt b/project/plugins.sbt
new file mode 100755
index 00000000..bf5cb709
--- /dev/null
+++ b/project/plugins.sbt
@@ -0,0 +1,7 @@
+
+libraryDependencies <+= sbtVersion(v => "com.github.siasia" %% "xsbt-proguard-plugin" % (v+"-0.1.1"))
+
+resolvers += "Proguard plugin repo" at "http://siasia.github.com/maven2"
+
+
+
diff --git a/src/main/java/edu/berkeley/bid/CBLAS.java b/src/main/java/edu/berkeley/bid/CBLAS.java
new file mode 100755
index 00000000..fba33ec2
--- /dev/null
+++ b/src/main/java/edu/berkeley/bid/CBLAS.java
@@ -0,0 +1,73 @@
+package edu.berkeley.bid;
+
+public final class CBLAS {
+
+ private CBLAS() {}
+
+ static {
+ System.loadLibrary("bidmatmkl");
+ }
+
+ public final static class ORDER {
+ private ORDER() {}
+ public final static int RowMajor=101;
+ public final static int ColMajor=102;
+ }
+
+ public final static class TRANSPOSE {
+ private TRANSPOSE() {}
+ public final static int NoTrans =111;
+ public final static int Trans =112;
+ public final static int ConjTrans=113;
+ }
+
+ public final static class UPLO {
+ private UPLO() {}
+ public final static int Upper=121;
+ public final static int Lower=122;
+ }
+
+ public final static class DIAG {
+ private DIAG() {}
+ public final static int NonUnit=131;
+ public final static int Unit =132;
+ }
+
+ public final static class SIDE {
+ private SIDE() {}
+ public final static int Left =141;
+ public final static int Right=142;
+ }
+
+ public static native double ddot( int N, double [] X, int incX, double [] Y, int incY);
+ public static native double ddotxx( int N, double [] X, int startX, double [] Y, int startY);
+ public static native double daxpy( int N, double a, double [] X, int incX, double [] Y, int incY);
+ public static native double daxpyxx( int N, double a, double [] X, int startX, double [] Y, int startY);
+ public static native void dgemv( int order, int TransA, int M, int N, double alpha, double [] A, int lda,
+ double [] X, int incX, double beta, double [] Y, int incY);
+ public static native void dgemm( int Order, int TransA, int TransB, int M, int N, int K, double alpha,
+ double [] A, int lda, double [] B, int ldb, double beta, double [] C, int ldc);
+ public static native void domatcopy( String Order, String TransA, int M, int N, double alpha, double [] A, int lda, double [] B, int ldb);
+ public static native void dmcscm( int m, int n, double [] a, int lda, double [] b, int [] ir, int [] jc, double [] c, int ldc);
+ public static native void dmcsrm( int m, int n, double [] a, int lda, double [] b, int [] ir, int [] jc, double [] c, int ldc);
+
+ public static native float sdot( int N, float [] X, int incX, float [] Y, int incY);
+ public static native float sdotxx( int N, float [] X, int startX, float [] Y, int startY);
+ public static native double saxpy( int N, float a, float [] X, int incX, float [] Y, int incY);
+ public static native double saxpyxx( int N, float a, float [] X, int startX, float [] Y, int startY);
+ public static native void sgemv( int order, int TransA, int M, int N, float alpha, float [] A, int lda,
+ float [] X, int incX, float beta, float [] Y, int incY);
+ public static native void sgemm( int Order, int TransA, int TransB, int M, int N, int K, float alpha,
+ float [] A, int lda, float [] B, int ldb, float beta, float [] C, int ldc);
+ public static native void somatcopy( String Order, String TransA, int M, int N, float alpha, float [] A, int lda, float [] B, int ldb);
+
+ public static native double caxpy( int N, float [] a, float [] X, int incX, float [] Y, int incY);
+ public static native double caxpyxx( int N, float [] a, float [] X, int startX, float [] Y, int startY);
+ public static native void cgemv( int order, int TransA, int M, int N, float [] alpha, float [] A, int lda,
+ float [] X, int incX, float [] beta, float [] Y, int incY);
+ public static native void cgemm( int Order, int TransA, int TransB, int M, int N, int K, float [] alpha,
+ float [] A, int lda, float [] B, int ldb, float [] beta, float [] C, int ldc);
+
+ public static native void smcscm( int m, int n, float [] a, int lda, float [] b, int [] ir, int [] jc, float [] c, int ldc);
+ public static native void smcsrm( int m, int n, float [] a, int lda, float [] b, int [] ir, int [] jc, float [] c, int ldc);
+}
\ No newline at end of file
diff --git a/src/main/java/edu/berkeley/bid/CUMAT.java b/src/main/java/edu/berkeley/bid/CUMAT.java
new file mode 100755
index 00000000..9c8fcdd9
--- /dev/null
+++ b/src/main/java/edu/berkeley/bid/CUMAT.java
@@ -0,0 +1,32 @@
+package edu.berkeley.bid;
+import jcuda.*;
+import jcuda.runtime.*;
+
+public final class CUMAT {
+
+ private CUMAT() {}
+
+ static {
+ System.loadLibrary("bidmatcuda");
+ }
+
+ public static native int applyop(Pointer A, int Anrows, int Ancols, Pointer B, int Bnrows, int Bncols, Pointer C, int opn);
+
+ public static native int applyiop(Pointer A, int Anrows, int Ancols, Pointer B, int Bnrows, int Bncols, Pointer C, int opn);
+
+ public static native int applygfun(Pointer A, Pointer B, int N, int opn);
+
+ public static native int applygfun2(Pointer A, Pointer B, Pointer C, int N, int opn);
+
+ public static native int reduce1op(int nr, int nc, Pointer A, Pointer B, int opn);
+
+ public static native int reduce2op(int nr, int nc, Pointer A, Pointer B, int opn);
+
+ public static native int dsmult(int nr, int nc, int nnz, Pointer A, Pointer Bdata, Pointer Bir, Pointer Bic, Pointer C);
+
+ public static native int dsmultT(int nr, int nc, int nnz, Pointer A, Pointer Bdata, Pointer Bir, Pointer Bic, Pointer C);
+
+ public static native int dds(int nr, int nnz, Pointer A, Pointer B, Pointer Cir, Pointer Cic, Pointer P);
+
+ public static native int transpose(Pointer A, int lda, Pointer B, int ldb, int nr, int nc);
+}
diff --git a/src/main/java/edu/berkeley/bid/Copyright.txt b/src/main/java/edu/berkeley/bid/Copyright.txt
new file mode 100755
index 00000000..21326596
--- /dev/null
+++ b/src/main/java/edu/berkeley/bid/Copyright.txt
@@ -0,0 +1,25 @@
+Copyright (c) 2012, Regents of the University of California
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/src/main/java/edu/berkeley/bid/LAPACK.java b/src/main/java/edu/berkeley/bid/LAPACK.java
new file mode 100755
index 00000000..0cea7828
--- /dev/null
+++ b/src/main/java/edu/berkeley/bid/LAPACK.java
@@ -0,0 +1,102 @@
+package edu.berkeley.bid;
+
+public final class LAPACK {
+
+ private LAPACK() {}
+
+ static {
+ System.loadLibrary("bidmatmkl");
+ }
+
+/* public final static class ORDER {
+ private ORDER() {}
+ public final static int RowMajor=101;
+ public final static int ColMajor=102;
+ } */
+
+ public static native int sgetrf( int order, int M, int N, float [] A, int lda, int [] ipiv);
+ public static native int dgetrf( int order, int M, int N, double [] A, int lda, int [] ipiv);
+ public static native int cgetrf( int order, int M, int N, float [] A, int lda, int [] ipiv);
+ public static native int zgetrf( int order, int M, int N, double [] A, int lda, int [] ipiv);
+
+ public static native int sgetri( int order, int N, float [] A, int lda, int [] ipiv);
+ public static native int dgetri( int order, int N, double [] A, int lda, int [] ipiv);
+ public static native int cgetri( int order, int N, float [] A, int lda, int [] ipiv);
+ public static native int zgetri( int order, int N, double [] A, int lda, int [] ipiv);
+
+ public static native int sgetrs( int order, String trans, int N, int nrhs, float [] A, int lda, int [] ipiv, float [] b, int ldb);
+ public static native int dgetrs( int order, String trans, int N, int nrhs, double [] A, int lda, int [] ipiv, double [] b, int ldb);
+ public static native int cgetrs( int order, String trans, int N, int nrhs, float [] A, int lda, int [] ipiv, float [] b, int ldb);
+ public static native int zgetrs( int order, String trans, int N, int nrhs, double [] A, int lda, int [] ipiv, double [] b, int ldb);
+
+ public static native int strtrs( int order, String mdata, int n, int nrhs, float [] A, int lda, float [] b, int ldb);
+ public static native int dtrtrs( int order, String mdata, int n, int nrhs, double [] A, int lda, double [] b, int ldb);
+ public static native int ctrtrs( int order, String mdata, int n, int nrhs, float [] A, int lda, float [] b, int ldb);
+ public static native int ztrtrs( int order, String mdata, int n, int nrhs, double [] A, int lda, double [] b, int ldb);
+
+ public static native int ssteqr( int order, String compz, int n, float [] d, float [] e, float [] z, int ldz );
+ public static native int dsteqr( int order, String compz, int n, double [] d, double [] e, double [] z, int ldz );
+ public static native int csteqr( int order, String compz, int n, float [] d, float [] e, float [] z, int ldz );
+ public static native int zsteqr( int order, String compz, int n, double [] d, double [] e, double [] z, int ldz );
+
+ public static native int ssytrd( int order, String uplo, int n, float [] a, int lda, float [] d, float [] e, float [] tau );
+ public static native int dsytrd( int order, String uplo, int n, double [] a, int lda, double [] d, double [] e, double [] tau );
+
+ public static native int sorgtr( int order, String uplo, int n, float [] a, int lda, float [] tau );
+ public static native int dorgtr( int order, String uplo, int n, double [] a, int lda, double [] tau );
+
+ public static native int sstedc( int order, String compz, int n, float [] d, float [] e, float [] z, int ldz );
+ public static native int dstedc( int order, String compz, int n, double [] d, double [] e, double [] z, int ldz );
+
+ public static native int ssyevd( int order, String jobz, String uplo, int n, float [] a, int lda, float [] w );
+ public static native int dsyevd( int order, String jobz, String uplo, int n, double [] a, int lda, double [] w );
+
+ public static native int spotrf( int order, String uplo, int n, float [] a, int lda);
+ public static native int dpotrf( int order, String uplo, int n, double [] a, int lda);
+ public static native int cpotrf( int order, String uplo, int n, float [] a, int lda);
+ public static native int zpotrf( int order, String uplo, int n, double [] a, int lda);
+
+ public static native int sgebal(int matrix_order, String job, int n, float [] a, int lda, int [] ilo, int [] ihi, float [] scale);
+ public static native int dgebal(int matrix_order, String job, int n, double [] a, int lda, int [] ilo, int [] ihi, double [] scale);
+ public static native int cgebal(int matrix_order, String job, int n, float [] a, int lda, int [] ilo, int [] ihi, float [] scale);
+ public static native int zgebal(int matrix_order, String job, int n, double [] a, int lda, int [] ilo, int [] ihi, double [] scale);
+
+ public static native int cunghr(int matrix_order, int n, int ilo, int ihi, float [] a, int lda, float [] tau);
+ public static native int zunghr(int matrix_order, int n, int ilo, int ihi, double [] a, int lda, double [] tau);
+
+ public static native int strevc(int matrix_order, String side, String howmny, int [] select, int n, float [] t, int ldt, float [] vl, int ldvl, float [] vr, int ldvr, int mm, int [] m);
+ public static native int dtrevc(int matrix_order, String side, String howmny, int [] select, int n, double [] t, int ldt, double [] vl, int ldvl, double [] vr, int ldvr, int mm, int [] m);
+ public static native int ctrevc(int matrix_order, String side, String howmny, int [] select, int n, float [] t, int ldt, float [] vl, int ldvl, float [] vr, int ldvr, int mm, int [] m);
+ public static native int ztrevc(int matrix_order, String side, String howmny, int [] select, int n, double [] t, int ldt, double [] vl, int ldvl, double [] vr, int ldvr, int mm, int [] m);
+
+ public static native int sgehrd(int matrix_order, int n, int ilo, int ihi, float [] a, int lda, float [] tau);
+ public static native int dgehrd(int matrix_order, int n, int ilo, int ihi, double [] a, int lda, double [] tau);
+ public static native int cgehrd(int matrix_order, int n, int ilo, int ihi, float [] a, int lda, float [] tau);
+ public static native int zgehrd(int matrix_order, int n, int ilo, int ihi, double [] a, int lda, double [] tau);
+
+ public static native int shseqr(int matrix_order, String job, String compz, int n, int ilo, int ihi, float [] h, int ldh, float [] wr, float [] wi, float [] z, int ldz);
+ public static native int dhseqr(int matrix_order, String job, String compz, int n, int ilo, int ihi, double [] h, int ldh, double [] wr, double [] wi, double [] z, int ldz);
+ public static native int chseqr(int matrix_order, String job, String compz, int n, int ilo, int ihi, float [] h, int ldh, float [] w, float [] z, int ldz);
+ public static native int zhseqr(int matrix_order, String job, String compz, int n, int ilo, int ihi, double [] h, int ldh, double [] w, double [] z, int ldz);
+
+ public static native int sgebak(int matrix_order, String job, String side, int n, int ilo, int ihi, float [] scale, int m, float [] v, int ldv);
+ public static native int dgebak(int matrix_order, String job, String side, int n, int ilo, int ihi, double [] scale, int m, double [] v, int ldv);
+ public static native int cgebak(int matrix_order, String job, String side, int n, int ilo, int ihi, float [] scale, int m, float [] v, int ldv);
+ public static native int zgebak(int matrix_order, String job, String side, int n, int ilo, int ihi, double [] scale, int m, double [] v, int ldv);
+
+ public static native int sgeqrf(int matrix_order, int m, int n, float [] a, int lda, float [] tau);
+ public static native int dgeqrf(int matrix_order, int m, int n, double [] a, int lda, double [] tau);
+ public static native int cgeqrf(int matrix_order, int m, int n, float [] a, int lda, float [] tau);
+ public static native int zgeqrf(int matrix_order, int m, int n, double [] a, int lda, double [] tau);
+
+ public static native int sgeqp3(int matrix_order, int m, int n, float [] a, int lda, int [] jpvt, float [] tau);
+ public static native int dgeqp3(int matrix_order, int m, int n, double [] a, int lda, int [] jpvt, double [] tau);
+ public static native int cgeqp3(int matrix_order, int m, int n, float [] a, int lda, int [] jpvt, float [] tau);
+ public static native int zgeqp3(int matrix_order, int m, int n, double [] a, int lda, int [] jpvt, double [] tau);
+
+ public static native int sorgqr(int matrix_order, int m, int n, int k, float [] a, int lda, float [] tau);
+ public static native int dorgqr(int matrix_order, int m, int n, int k, double [] a, int lda, double [] tau);
+
+ public static native int cungqr(int matrix_order, int m, int n, int k, float [] a, int lda, float [] tau);
+ public static native int zungqr(int matrix_order, int m, int n, int k, double [] a, int lda, double [] tau);
+}
\ No newline at end of file
diff --git a/src/main/java/edu/berkeley/bid/SPBLAS.java b/src/main/java/edu/berkeley/bid/SPBLAS.java
new file mode 100755
index 00000000..6effca69
--- /dev/null
+++ b/src/main/java/edu/berkeley/bid/SPBLAS.java
@@ -0,0 +1,35 @@
+package edu.berkeley.bid;
+
+public final class SPBLAS {
+
+ private SPBLAS() {}
+
+ static {
+ System.loadLibrary("bidmatmkl");
+ }
+
+ public static native void scsrmm(String transa, int m, int n, int k, float alpha, String matdescra,
+ float [] val, int [] ir, int [] jc, float [] b, int ldb, float beta, float [] c, int ldc);
+
+ public static native void scscmm(String transa, int m, int n, int k, float alpha, String matdescra,
+ float [] val, int [] ir, int [] jc, float [] b, int ldb, float beta, float [] c, int ldc);
+
+ public static native void scsrmv (String transa, int m, int k, float alpha, String matdescra,
+ float [] val, int [] ir, int [] jc, float [] x, float beta, float [] y);
+
+ public static native void scscmv (String transa, int m, int k, float alpha, String matdescra,
+ float [] val, int [] ir, int [] jc, float [] x, float beta, float [] y);
+
+ public static native void dcsrmm(String transa, int m, int n, int k, double alpha, String matdescra,
+ double [] val, int [] ir, int [] jc, double [] b, int ldb, double beta, double [] c, int ldc);
+
+ public static native void dcscmm(String transa, int m, int n, int k, double alpha, String matdescra,
+ double [] val, int [] ir, int [] jc, double [] b, int ldb, double beta, double [] c, int ldc);
+
+ public static native void dcsrmv (String transa, int m, int k, double alpha, String matdescra,
+ double [] val, int [] ir, int [] jc, double [] x, double beta, double [] y);
+
+ public static native void dcscmv (String transa, int m, int k, double alpha, String matdescra,
+ double [] val, int [] ir, int [] jc, double [] x, double beta, double [] y);
+
+}
\ No newline at end of file
diff --git a/src/main/java/edu/berkeley/bid/UTILS.java b/src/main/java/edu/berkeley/bid/UTILS.java
new file mode 100755
index 00000000..5a20f2cd
--- /dev/null
+++ b/src/main/java/edu/berkeley/bid/UTILS.java
@@ -0,0 +1,41 @@
+package edu.berkeley.bid;
+import java.io.*;
+import java.util.zip.*;
+
+public final class UTILS {
+
+ private UTILS() {}
+
+ static {
+ System.loadLibrary("bidmatmkl");
+ }
+
+ public static native void memcpybi( int n, byte [] a, int startA, int [] b, int startB );
+ public static native void memcpybf( int n, byte [] a, int startA, float [] b, int startB );
+ public static native void memcpybd( int n, byte [] a, int startA, double [] b, int startB );
+
+ public static native void memcpyib( int n, int [] a, int startA, byte [] b, int startB );
+ public static native void memcpyfb( int n, float [] a, int startA, byte [] b, int startB );
+ public static native void memcpydb( int n, double [] a, int startA, byte [] b, int startB );
+
+ public static OutputStream _getOutputStream(String fname, Boolean compressed, int compressionLevel) throws IOException {
+ FileOutputStream fout = new FileOutputStream(fname);
+ if (compressed) {
+ switch (compressionLevel) {
+ case 1: return new GZIPOutputStream(fout, 1024*1024){{def.setLevel(1);}};
+ case 2: return new GZIPOutputStream(fout, 1024*1024){{def.setLevel(2);}};
+ case 3: return new GZIPOutputStream(fout, 1024*1024){{def.setLevel(3);}};
+ case 4: return new GZIPOutputStream(fout, 1024*1024){{def.setLevel(4);}};
+ case 5: return new GZIPOutputStream(fout, 1024*1024){{def.setLevel(5);}};
+ case 6: return new GZIPOutputStream(fout, 1024*1024){{def.setLevel(6);}};
+ case 7: return new GZIPOutputStream(fout, 1024*1024){{def.setLevel(7);}};
+ case 8: return new GZIPOutputStream(fout, 1024*1024){{def.setLevel(8);}};
+ case 9: return new GZIPOutputStream(fout, 1024*1024){{def.setLevel(9);}};
+ default: throw new RuntimeException("Unsupported compression level "+compressionLevel);
+ }
+ } else {
+ return new BufferedOutputStream(fout, 1024*1024);
+ }
+ }
+
+}
\ No newline at end of file
diff --git a/src/main/java/edu/berkeley/bid/VML.java b/src/main/java/edu/berkeley/bid/VML.java
new file mode 100755
index 00000000..8bb0b482
--- /dev/null
+++ b/src/main/java/edu/berkeley/bid/VML.java
@@ -0,0 +1,144 @@
+package edu.berkeley.bid;
+
+public final class VML {
+
+ private VML() {}
+
+ static {
+ System.loadLibrary("bidmatmkl");
+ }
+
+ public final static class VMLMODE {
+ private VMLMODE() {}
+ public final static int VML_LA = 0x00000001;
+ public final static int VML_HA = 0x00000002;
+ public final static int VML_EP = 0x00000003;
+
+ public final static int VML_ERRMODE_IGNORE = 0x00000100;
+ public final static int VML_ERRMODE_ERRNO = 0x00000200;
+ public final static int VML_ERRMODE_STDERR = 0x00000400;
+ public final static int VML_ERRMODE_EXCEPT = 0x00000800;
+ public final static int VML_ERRMODE_CALLBACK = 0x00001000;
+ public final static int VML_ERRMODE_DEFAULT = VML_ERRMODE_ERRNO | VML_ERRMODE_CALLBACK | VML_ERRMODE_EXCEPT;
+
+ public final static int VML_FTZDAZ_ON = 0x00280000;
+ public final static int VML_FTZDAZ_OFF = 0x00140000;
+ }
+
+
+ public static native void vsAbs ( int n, float [] a, float [] r);
+ public static native void vdAbs ( int n, double [] a, double [] r);
+ public static native void vsAdd ( int n, float [] a, float [] b, float [] r);
+ public static native void vdAdd ( int n, double [] a, double [] b, double [] r);
+ public static native void vsSub ( int n, float [] a, float [] b, float [] r);
+ public static native void vdSub ( int n, double [] a, double [] b, double [] r);
+ public static native void vsInv ( int n, float [] a, float [] r);
+ public static native void vdInv ( int n, double [] a, double [] r);
+ public static native void vsSqrt ( int n, float [] a, float [] r);
+ public static native void vdSqrt ( int n, double [] a, double [] r);
+ public static native void vsExp ( int n, float [] a, float [] r);
+ public static native void vdExp ( int n, double [] a, double [] r);
+ public static native void vsExpm1 ( int n, float [] a, float [] r);
+ public static native void vdExpm1 ( int n, double [] a, double [] r);
+ public static native void vsLn ( int n, float [] a, float [] r);
+ public static native void vdLn ( int n, double [] a, double [] r);
+ public static native void vsLog10 ( int n, float [] a, float [] r);
+ public static native void vdLog10 ( int n, double [] a, double [] r);
+ public static native void vsLog1p ( int n, float [] a, float [] r);
+ public static native void vdLog1p ( int n, double [] a, double [] r);
+ public static native void vsCos ( int n, float [] a, float [] r);
+ public static native void vdCos ( int n, double [] a, double [] r);
+ public static native void vsSin ( int n, float [] a, float [] r);
+ public static native void vdSin ( int n, double [] a, double [] r);
+ public static native void vsTan ( int n, float [] a, float [] r);
+ public static native void vdTan ( int n, double [] a, double [] r);
+ public static native void vsCosh ( int n, float [] a, float [] r);
+ public static native void vdCosh ( int n, double [] a, double [] r);
+ public static native void vsSinh ( int n, float [] a, float [] r);
+ public static native void vdSinh ( int n, double [] a, double [] r);
+ public static native void vsTanh ( int n, float [] a, float [] r);
+ public static native void vdTanh ( int n, double [] a, double [] r);
+ public static native void vsAcos ( int n, float [] a, float [] r);
+ public static native void vdAcos ( int n, double [] a, double [] r);
+ public static native void vsAsin ( int n, float [] a, float [] r);
+ public static native void vdAsin ( int n, double [] a, double [] r);
+ public static native void vsAtan ( int n, float [] a, float [] r);
+ public static native void vdAtan ( int n, double [] a, double [] r);
+ public static native void vsAcosh ( int n, float [] a, float [] r);
+ public static native void vdAcosh ( int n, double [] a, double [] r);
+ public static native void vsAsinh ( int n, float [] a, float [] r);
+ public static native void vdAsinh ( int n, double [] a, double [] r);
+ public static native void vsAtanh ( int n, float [] a, float [] r);
+ public static native void vdAtanh ( int n, double [] a, double [] r);
+ public static native void vsErf ( int n, float [] a, float [] r);
+ public static native void vdErf ( int n, double [] a, double [] r);
+ public static native void vsErfInv ( int n, float [] a, float [] r);
+ public static native void vdErfInv ( int n, double [] a, double [] r);
+ public static native void vsHypot ( int n, float [] a, float [] b, float [] r);
+ public static native void vdHypot ( int n, double [] a, double [] b, double [] r);
+ public static native void vsErfc ( int n, float [] a, float [] r);
+ public static native void vdErfc ( int n, double [] a, double [] r);
+ public static native void vsErfcInv ( int n, float [] a, float [] r);
+ public static native void vdErfcInv ( int n, double [] a, double [] r);
+ public static native void vsCdfNorm ( int n, float [] a, float [] r);
+ public static native void vdCdfNorm ( int n, double [] a, double [] r);
+ public static native void vsCdfNormInv ( int n, float [] a, float [] r);
+ public static native void vdCdfNormInv ( int n, double [] a, double [] r);
+ public static native void vsLGamma ( int n, float [] a, float [] r);
+ public static native void vdLGamma ( int n, double [] a, double [] r);
+ public static native void vsTGamma ( int n, float [] a, float [] r);
+ public static native void vdTGamma ( int n, double [] a, double [] r);
+ public static native void vsAtan2 ( int n, float [] a, float [] b, float [] r);
+ public static native void vdAtan2 ( int n, double [] a, double [] b, double [] r);
+ public static native void vsMul ( int n, float [] a, float [] b, float [] r);
+ public static native void vdMul ( int n, double [] a, double [] b, double [] r);
+ public static native void vsDiv ( int n, float [] a, float [] b, float [] r);
+ public static native void vdDiv ( int n, double [] a, double [] b, double [] r);
+ public static native void vsPow ( int n, float [] a, float [] b, float [] r);
+ public static native void vdPow ( int n, double [] a, double [] b, double [] r);
+ public static native void vsPow3o2 ( int n, float [] a, float [] r);
+ public static native void vdPow3o2 ( int n, double [] a, double [] r);
+ public static native void vsPow2o3 ( int n, float [] a, float [] r);
+ public static native void vdPow2o3 ( int n, double [] a, double [] r);
+ public static native void vsPowx ( int n, float [] a, float b, float [] r);
+ public static native void vdPowx ( int n, double [] a, double b, double [] r);
+ public static native void vsSinCos ( int n, float [] a, float [] r1, float [] r2);
+ public static native void vdSinCos ( int n, double [] a, double [] r1, double [] r2);
+ public static native void vsLinearFrac ( int n, float [] a, float [] b, float scalea, float shifta, float scaleb, float shiftb, float [] r);
+ public static native void vdLinearFrac ( int n, double [] a, double [] b, double scalea, double shifta, double scaleb, double shiftb, double [] r);
+ public static native void vsCeil ( int n, float [] a, float [] r);
+ public static native void vdCeil ( int n, double [] a, double [] r);;
+ public static native void vsFloor ( int n, float [] a, float [] r);
+ public static native void vdFloor ( int n, double [] a, double [] r);
+ public static native void vsModf ( int n, float [] a, float [] r1, float [] r2);
+ public static native void vdModf ( int n, double [] a, double [] r1, double [] r2);
+ public static native void vmsModf ( int n, float [] a, float [] r1, float [] r2, long mode);
+ public static native void vmdModf ( int n, double [] a, double [] r1, double [] r2, long mode);
+ public static native void vsNearbyInt ( int n, float [] a, float [] r);
+ public static native void vdNearbyInt ( int n, double [] a, double [] r);
+ public static native void vsRint ( int n, float [] a, float [] r);
+ public static native void vdRint ( int n, double [] a, double [] r);
+ public static native void vsRound ( int n, float [] a, float [] r);
+ public static native void vdRound ( int n, double [] a, double [] r);
+ public static native void vsTrunc ( int n, float [] a, float [] r);
+ public static native void vdTrunc ( int n, double [] a, double [] r);
+ public static native void vsPackI ( int n, float [] a, int incra, float [] y);
+ public static native void vdPackI ( int n, double [] a, int incra, double [] y);
+ public static native void vsPackV ( int n, float [] a, int [] ia, float [] y);
+ public static native void vdPackV ( int n, double [] a, int [] ia, double [] y);
+ public static native void vsPackM ( int n, float [] a, int [] ma, float [] y);
+ public static native void vdPackM ( int n, double [] a, int [] ma, double [] y);
+ public static native void vsUnpackI ( int n, float [] a, float [] y, int incry );
+ public static native void vdUnpackI ( int n, double [] a, double [] y, int incry );
+ public static native void vsUnpackV ( int n, float [] a, float [] y, int [] iy );
+ public static native void vdUnpackV ( int n, double [] a, double [] y, int [] iy );
+ public static native void vsUnpackM ( int n, float [] a, float [] y, int [] my );
+ public static native void vdUnpackM ( int n, double [] a, double [] y, int [] my );
+ public static native int vmlSetErrStatus ( int status);
+ public static native int vmlGetErrStatus ();
+ public static native int vmlClearErrStatus ();
+ public static native int vmlSetMode ( int newmode);
+ public static native int vmlGetMode ();
+ public static native void MKLFreeTls ( int fdwReason);
+
+}
diff --git a/src/main/java/edu/berkeley/bid/VSL.java b/src/main/java/edu/berkeley/bid/VSL.java
new file mode 100755
index 00000000..e4994b8e
--- /dev/null
+++ b/src/main/java/edu/berkeley/bid/VSL.java
@@ -0,0 +1,118 @@
+package edu.berkeley.bid;
+
+public final class VSL {
+
+ static { System.loadLibrary( "bidmatmkl" ); }
+
+ private long handle = 0;
+
+ public VSL() {}
+
+ protected void finalize() {
+ if (handle != 0) {
+ vslDeleteStream(this);
+ handle = 0;
+ }
+ }
+
+ public static native int vslNewStream(VSL stream, int brng, int seed);
+
+ public static native int vslDeleteStream(VSL stream);
+
+ public static native int vdRngCauchy(int method, VSL stream, int n, double[] r, double a, double beta);
+
+ public static native int vsRngCauchy(int method, VSL stream, int n, float[] r, float a, float beta);
+
+ public static native int vdRngUniform(int method, VSL stream, int n, double[] r, double a, double b);
+
+ public static native int vsRngUniform(int method, VSL stream, int n, float[] r, float a, float b);
+
+ public static native int vdRngGaussian(int method, VSL stream, int n, double[] r, double a, double sigma);
+
+ public static native int vsRngGaussian(int method, VSL stream, int n, float[] r, float a, float sigma);
+
+ public static native int vdRngGaussianMV(int method, VSL stream, int n, double[] r, int dimen, int mstorage, double[] a, double[] t);
+
+ public static native int vsRngGaussianMV(int method, VSL stream, int n, float[] r, int dimen, int mstorage, float[] a, float[] t);
+
+ public static native int vdRngExponential(int method, VSL stream, int n, double[] r, double a, double beta);
+
+ public static native int vsRngExponential(int method, VSL stream, int n, float[] r, float a, float beta);
+
+ public static native int vdRngLaplace(int method, VSL stream, int n, double[] r, double a, double beta);
+
+ public static native int vsRngLaplace(int method, VSL stream, int n, float[] r, float a, float beta);
+
+ public static native int vdRngWeibull(int method, VSL stream, int n, double[] r, double alpha, double a, double beta);
+
+ public static native int vsRngWeibull(int method, VSL stream, int n, float[] r, float alpha, float a, float beta);
+
+ public static native int vdRngRayleigh(int method, VSL stream, int n, double[] r, double a, double beta);
+
+ public static native int vsRngRayleigh(int method, VSL stream, int n, float[] r, float a, float beta);
+
+ public static native int vdRngLognormal(int method, VSL stream, int n, double[] r, double a, double sigma, double b, double beta);
+
+ public static native int vsRngLognormal(int method, VSL stream, int n, float[] r, float a, float sigma, float b, float beta);
+
+ public static native int vdRngGumbel(int method, VSL stream, int n, double[] r, double a, double beta);
+
+ public static native int vsRngGumbel(int method, VSL stream, int n, float[] r, float a, float beta);
+
+ public static native int vdRngGamma(int method, VSL stream, int n, double[] r, double alpha, double a, double beta);
+
+ public static native int vsRngGamma(int method, VSL stream, int n, float[] r, float alpha, float a, float beta);
+
+ public static native int vdRngBeta(int method, VSL stream, int n, double[] r, double p, double q, double a, double beta);
+
+ public static native int vsRngBeta(int method, VSL stream, int n, float[] r, float p, float q, float a, float beta);
+
+ public static native int viRngBernoulli(int method, VSL stream, int n, int[] r, double p);
+
+ public static native int viRngUniform(int method, VSL stream, int n, int[] r, int a, int b);
+
+ public static native int viRngUniformBits(int method, VSL stream, int n, int[] r);
+
+ public static native int viRngGeometric(int method, VSL stream, int n, int[] r, double p);
+
+ public static native int viRngBinomial(int method, VSL stream, int n, int[] r, int ntrial, double p);
+
+ public static native int viRngHypergeometric(int method, VSL stream, int n, int[] r, int l, int s, int m);
+
+ public static native int viRngNegbinomial(int method, VSL stream, int n, int[] r, double a, double p);
+
+ public static native int viRngPoisson(int method, VSL stream, int n, int[] r, double lambda);
+
+ public static native int viRngPoissonV(int method, VSL stream, int n, int[] r, double[] lambda);
+
+ public static native int vslSkipAheadStream(VSL stream, int nskip);
+
+ public static native int vslGetStreamStateBrng(VSL stream);
+
+ public static native int vslGetNumRegBrngs();
+
+ public final static int BRNG_MCG31 = 0x100000;
+
+ public final static int BRNG_R250 = 0x200000;
+
+ public final static int BRNG_MRG32K3A = 0x300000;
+
+ public final static int BRNG_MCG59 = 0x400000;
+
+ public final static int BRNG_WH = 0x500000;
+
+ public final static int BRNG_SOBOL = 0x600000;
+
+ public final static int BRNG_NIEDERR = 0x700000;
+
+ public final static int BRNG_MT19937 = 0x800000;
+
+ public final static int BRNG_MT2203 = 0x900000;
+
+ public final static int BRNG_IABSTRACT = 0xa00000;
+
+ public final static int BRNG_DABSTRACT = 0xb00000;
+
+ public final static int BRNG_SABSTRACT = 0xc00000;
+
+}
diff --git a/src/main/scala/BIDMat/BMat.scala b/src/main/scala/BIDMat/BMat.scala
new file mode 100755
index 00000000..443a1e84
--- /dev/null
+++ b/src/main/scala/BIDMat/BMat.scala
@@ -0,0 +1,172 @@
+package BIDMat
+import edu.berkeley.bid.CBLAS._
+import edu.berkeley.bid.LAPACK._
+
+case class BMat(nr:Int, nc:Int, nnz1:Int, ir0:Array[Int], jc0:Array[Int], data0:Array[Byte]) extends SparseMat[Byte](nr, nc, nnz1, ir0, jc0, data0) {
+
+ def size() = length;
+
+ def tryForBMat(m:Mat, s:String):BMat =
+ m match {
+ case mm:BMat => mm
+ case _ => throw new RuntimeException("wrong type for operator "+s+" arg "+m)
+ }
+
+ def tryForOutBMat(out:Mat):BMat =
+ if (out.asInstanceOf[AnyRef] == null) {
+ null
+ } else {
+ out match {
+ case outmat:BMat => outmat
+ case _ => throw new RuntimeException("wrong type for LHS matrix "+out)
+ }
+ }
+
+ override def mytype = "BMat"
+
+ override def t:BMat = BMat(gt)
+
+ def horzcat(b: BMat) = BMat(super.horzcat(b))
+
+ def vertcat(b: BMat) = BMat(super.vertcat(b))
+
+ def find3:(IMat, IMat, IMat) = {
+ val (ii, jj, vv) = gfind3
+ val vi = IMat(vv.length, 1)
+ Mat.copyToIntArray(vv.data, 0, vi.data, 0, vv.length)
+ (IMat(ii), IMat(jj), vi)
+ }
+
+ override def apply(a:IMat, b:IMat):BMat = BMat(gapply(a, b))
+
+ override def apply(a:IMat, b:Int):BMat = BMat(gapply(a, IMat.ielem(b)))
+
+ override def apply(a:Int, b:IMat):BMat = BMat(gapply(IMat.ielem(a), b))
+
+ def bbMatOp(b: BMat, f:(Byte, Byte) => Byte, out:Mat):BMat = BMat(sgMatOp(b, f, out))
+
+ def bbMatOpScalar(b: Byte, f:(Byte, Byte) => Byte, out:Mat):BMat = BMat(sgMatOpScalar(b, f, out))
+
+ def bbReduceOp(n:Int, f1:(Byte) => Byte, f2:(Byte, Byte) => Byte) = IMat(sgReduceOp(n, f1, f2, null))
+
+ def toCSMat:CSMat = {
+ val out = CSMat(ncols, 1)
+ val ioff = Mat.ioneBased
+ var i = 0
+ while (i < ncols) {
+ out.data(i) = new String(data, jc(i)-ioff, jc(i+1)-jc(i), BMat.encoding)
+ i += 1
+ }
+ out
+ }
+
+ override def toString:String = {
+ val somespaces = " "
+ val ioff = Mat.ioneBased
+ val ss = new StringBuilder
+ val nChars = Mat.terminalWidth-4
+ val totchars = 10*nChars
+ var nelems = 0
+ var maxlen = 0
+ val lbuf = new scala.collection.mutable.ListBuffer[String]
+ while (maxlen * nelems < totchars && nelems < ncols) {
+ val str = new String(data, jc(nelems)-ioff, jc(nelems+1)-jc(nelems), BMat.encoding)
+ lbuf.append(str)
+ maxlen = math.max(maxlen, 1+str.length)
+ nelems += 1
+ }
+ nelems -= 1
+ var i = 0
+ var thisrow = 0
+ lbuf.forall((str:String) => {
+ ss.append(str + somespaces.substring(0, maxlen - str.length))
+ thisrow += 1
+ if ((thisrow + 1) * maxlen >= nChars) {
+ ss.append("\n")
+ thisrow = 0
+ }
+ true
+ })
+ if (nelems < ncols) {
+ ss.append("...")
+ }
+ ss.toString
+ }
+
+ def > (b : Byte) = bbMatOpScalar(b, (x:Byte, y:Byte) => if (x > y) 1 else 0, null)
+ def < (b : Byte) = bbMatOpScalar(b, (x:Byte, y:Byte) => if (x < y) 1 else 0, null)
+ def == (b : Byte) = bbMatOpScalar(b, (x:Byte, y:Byte) => if (x == y) 1 else 0, null)
+ def === (b : Byte) = bbMatOpScalar(b, (x:Byte, y:Byte) => if (x == y) 1 else 0, null)
+ def >= (b : Byte) = bbMatOpScalar(b, (x:Byte, y:Byte) => if (x >= y) 1 else 0, null)
+ def <= (b : Byte) = bbMatOpScalar(b, (x:Byte, y:Byte) => if (x <= y) 1 else 0, null)
+ def != (b : Byte) = bbMatOpScalar(b, (x:Byte, y:Byte) => if (x != y) 1 else 0, null)
+
+ override def \ (b: Mat) = b match {
+ case fb:BMat => horzcat(fb)
+ }
+
+ override def on (b: Mat) = b match {
+ case fb:BMat => vertcat(fb)
+ }
+
+ override def ~ (b: Mat):Pair =
+ b match {
+ case db:BMat => new BPair(this, db)
+ case _ => throw new RuntimeException("mismatched types for operator ~")
+ }
+}
+
+class BPair (val omat:Mat, val mat:BMat) extends Pair {
+
+
+ def > (b : Byte) = mat.bbMatOpScalar(b, (x:Byte, y:Byte) => if (x > y) 1 else 0, omat)
+ def < (b : Byte) = mat.bbMatOpScalar(b, (x:Byte, y:Byte) => if (x < y) 1 else 0, omat)
+ def == (b : Byte) = mat.bbMatOpScalar(b, (x:Byte, y:Byte) => if (x == y) 1 else 0, omat)
+ def >= (b : Byte) = mat.bbMatOpScalar(b, (x:Byte, y:Byte) => if (x >= y) 1 else 0, omat)
+ def <= (b : Byte) = mat.bbMatOpScalar(b, (x:Byte, y:Byte) => if (x <= y) 1 else 0, omat)
+ def != (b : Byte) = mat.bbMatOpScalar(b, (x:Byte, y:Byte) => if (x != y) 1 else 0, omat)
+}
+
+object BMat {
+
+ def apply(nr:Int, nc:Int, nnz0:Int):BMat = new BMat(nr, nc, nnz0, new Array[Int](nnz0), new Array[Int](nc+1), new Array[Byte](nnz0))
+
+ def apply(a:SparseMat[Byte]):BMat = new BMat(a.nrows, a.ncols, a.nnz, a.ir, a.jc, a.data)
+
+ def SnoRows(nr:Int, nc:Int, nnz0:Int):BMat = new BMat(nr, nc, nnz0, null, new Array[Int](nc+1), new Array[Byte](nnz0))
+
+ var encoding = "UTF8"
+// var encoding = "UTF_16LE"
+
+ def apply(cc:CSMat):BMat = {
+ val ioff = Mat.ioneBased
+ val ncolsx = cc.length
+ var nrowsx = 0
+ var nnzx = 0
+ var i = 0
+ while (i < ncolsx) {
+ val len = cc(i).getBytes(encoding).length
+ nnzx += len
+ nrowsx = math.max(nrowsx, 1+len)
+ i += 1
+ }
+ val out = SnoRows(nrowsx, ncolsx, nnzx)
+ nnzx = 0
+ i = 0
+ while (i < ncolsx) {
+ out.jc(i) = nnzx + ioff
+ val bytes = cc(i).getBytes(encoding)
+ System.arraycopy(bytes, 0, out.data, nnzx, bytes.length)
+ nnzx += bytes.length
+ i += 1
+ }
+ out.jc(i) = nnzx
+ out
+ }
+}
+
+
+
+
+
+
diff --git a/src/main/scala/BIDMat/CMat.scala b/src/main/scala/BIDMat/CMat.scala
new file mode 100755
index 00000000..081f120a
--- /dev/null
+++ b/src/main/scala/BIDMat/CMat.scala
@@ -0,0 +1,1056 @@
+package BIDMat
+import edu.berkeley.bid.CBLAS._
+import edu.berkeley.bid.LAPACK._
+import java.util.Arrays
+
+case class CMat(nr:Int, nc:Int, data0:Array[Float]) extends DenseMat[Float](nr, nc, data0) {
+
+ def size() = length;
+
+ override def dv:Double =
+ if (nrows > 1 || ncols > 1) {
+ throw new RuntimeException("Matrix should be 1x1 to extract value")
+ } else {
+ data(0)
+ }
+
+ override def mytype = "CMat"
+
+ def get(r0:Int, c0:Int):CMat = {
+ val off = Mat.oneBased
+ val r = r0 - off
+ val c = c0 - off
+ if (r >= nrows || c >= ncols) {
+ throw new IndexOutOfBoundsException("("+(r+off)+","+(c+off)+") >= ("+nrows+","+ncols+")");
+ } else {
+ val indx = 2*(r+c*nrows)
+ CMat.celem(data(indx), data(indx+1))
+ }
+ }
+
+ def get(i0:Int):CMat = {
+ val off = Mat.oneBased
+ val i = i0 - off
+ if (i < 0 || i >= length) {
+ throw new IndexOutOfBoundsException(""+(i+off)+" >= ("+nrows+","+ncols+")");
+ } else {
+ CMat.celem(data(2*i), data(2*i+1))
+ }
+ }
+
+ def update(r0:Int, c0:Int, v:CMat):CMat = {
+ val off = Mat.oneBased
+ val r = r0 - off
+ val c = c0 - off
+ if (r >= nrows || c >= ncols) {
+ throw new IndexOutOfBoundsException("("+(r+off)+","+(c+off)+") >= ("+nrows+","+ncols+")");
+ } else {
+ val indx = 2*(r+c*nrows)
+ data(indx) = v.data(0)
+ data(indx+1) = v.data(1)
+ }
+ v
+ }
+
+ def update(i0:Int, v:CMat):CMat = {
+ val off = Mat.oneBased
+ val i = i0 - off
+ if (i < 0 || i >= length) {
+ throw new IndexOutOfBoundsException(""+(i+off)+" >= ("+nrows+","+ncols+")");
+ } else {
+ data(2*i) = v.data(0)
+ data(2*i+1) = v.data(1)
+ }
+ v
+ }
+
+ def t(oldmat:Mat):CMat = {
+ var out = CMat.newOrCheckCMat(ncols, nrows, oldmat)
+ var i = 0
+ while (i < nrows) {
+ var j = 0
+ while (j < ncols) {
+ out.data(2*(j+i*ncols)) = data(2*(i+j*nrows))
+ out.data(2*(j+i*ncols)+1) = data(2*(i+j*nrows)+1)
+ j += 1
+ }
+ i += 1
+ }
+ out
+ }
+
+ override def t:CMat = t(null:CMat)
+
+ def h(oldmat:Mat):CMat = {
+ var out = CMat.newOrCheckCMat(ncols, nrows, oldmat)
+ var i = 0
+ while (i < nrows) {
+ var j = 0
+ while (j < ncols) {
+ out.data(2*(j+i*ncols)) = data(2*(i+j*nrows))
+ out.data(2*(j+i*ncols)+1) = -data(2*(i+j*nrows)+1)
+ j += 1
+ }
+ i += 1
+ }
+ out
+ }
+
+ def h:CMat = h(null:CMat)
+
+ def vertcat(a:CMat):CMat =
+ if (ncols != a.ncols) {
+ throw new RuntimeException("ncols must match")
+ } else {
+ var out = CMat(nrows+a.nrows, ncols)
+ var i = 0
+ while (i < ncols) {
+ System.arraycopy(data, 2*i*nrows, out.data, 2*i*(nrows+a.nrows), 2*nrows)
+ System.arraycopy(a.data, 2*i*a.nrows, out.data, 2*(nrows+i*(nrows+a.nrows)), 2*a.nrows)
+ i += 1
+ }
+ out
+ }
+
+ def horzcat(a:CMat):CMat=
+ if (nrows != a.nrows) {
+ throw new RuntimeException("nrows must match")
+ } else {
+ var out = CMat(nrows, ncols+a.ncols)
+ System.arraycopy(data, 0, out.data, 0, 2*nrows*ncols)
+ System.arraycopy(a.data, 0, out.data, 2*nrows*ncols, 2*nrows*a.ncols)
+ out
+ }
+
+ override def nnz:Int = {
+ var count:Int = 0
+ var i = 0
+ while (i < length) {
+ if (data(2*i) != 0 || data(2*i+1) != 0) {
+ count += 1
+ }
+ i += 1
+ }
+ count
+ }
+
+ override def findInds(out:IMat, off:Int):IMat = {
+ var count = 0
+ var i = 0
+ while (i < length) {
+ if (data(2*i) != 0 || data(2*i+1) != 0) {
+ out.data(count) = i + off
+ count += 1
+ }
+ i += 1
+ }
+ out
+ }
+
+ def find3:(IMat, IMat, CMat) = {
+ val off = Mat.oneBased
+ val iout = IMat(nnz, 1)
+ val jout = IMat(nnz, 1)
+ val vout = CMat(nnz, 1)
+ findInds(iout, 0)
+ var i = 0
+ while (i < iout.length) {
+ val ival:Int = iout.data(i)
+ vout.data(2*i) = data(2*ival)
+ vout.data(2*i+1) = data(2*ival+1)
+ jout.data(i) = (ival / nrows) + off
+ iout.data(i) = (ival % nrows) + off
+ i += 1
+ }
+ (iout, jout, vout)
+ }
+
+ override def apply(iv:IMat):CMat =
+ iv match {
+ case aa:MatrixWildcard => {
+ val out = CMat(length, 1)
+ System.arraycopy(data, 0, out.data, 0, 2*out.length)
+ out
+ }
+ case _ => {
+ val off = Mat.oneBased
+ val out = CMat(iv.nrows, iv.ncols)
+ var i = 0
+ while (i < out.length) {
+ val ind = iv.data(i) - off
+ if (ind < 0 || ind >= length) {
+ throw new RuntimeException("bad linear index "+(ind+off)+" vs "+length)
+ } else {
+ out.data(2*i) = data(2*ind)
+ out.data(2*i+1) = data(2*ind+1)
+ }
+ i += 1
+ }
+ out
+ }
+ }
+
+ def update(iv:IMat, b:CMat) =
+ iv match {
+ case aaa:MatrixWildcard => {
+ if (length != b.length || b.ncols != 1) {
+ if (b.length == 1) {
+ var i = 0
+ val b0 = b.data(0)
+ val b1 = b.data(1)
+ while (i < length) {
+ data(2*i) = b0
+ data(2*i+1) = b1
+ i += 1
+ }
+ } else throw new RuntimeException("dims mismatch")
+ } else {
+ System.arraycopy(b.data, 0, data, 0, 2*length)
+ }
+ }
+ case _ => {
+ val off = Mat.oneBased
+ if (iv.nrows != b.nrows || iv.ncols != b.ncols) {
+ if (b.length == 1) {
+ val b0 = b.data(0)
+ val b1 = b.data(1)
+ var i = 0
+ while (i < iv.length) {
+ val ind = iv.data(i) - off
+ if (ind < 0 || ind >= length) {
+ throw new RuntimeException("bad linear index "+(ind+off)+" vs "+length)
+ } else {
+ data(2*ind) = b0
+ data(2*ind+1) = b1
+ }
+ i += 1
+ }
+ } else throw new RuntimeException("dims mismatch")
+ } else {
+ var i = 0
+ while (i < iv.length) {
+ val ind = iv.data(i) - off
+ if (ind < 0 || ind >= length) {
+ throw new RuntimeException("bad linear index "+(ind+off)+" vs "+length)
+ } else {
+ data(2*ind) = b.data(2*i)
+ data(2*ind+1) = b.data(2*i+1)
+ }
+ i += 1
+ }
+ }
+ }
+ }
+
+ override def apply(iv:IMat, jv:IMat):CMat = {
+ val off = Mat.oneBased
+ val rowinds = DenseMat.getInds(iv, nrows)
+ val colinds = DenseMat.getInds(jv, ncols)
+ val out = CMat(rowinds.length, colinds.length)
+ var i = 0
+ while (i < out.ncols) {
+ var j = 0
+ val c = colinds(i) - off
+ while (j < out.nrows) {
+ val r = rowinds(j) - off
+ out.data(2*(j+i*out.nrows)) = data(2*(r+nrows*c))
+ out.data(2*(j+i*out.nrows)+1) = data(2*(r+nrows*c)+1)
+ j += 1
+ }
+ i += 1
+ }
+ out
+ }
+
+ override def apply(iv:IMat, j:Int):CMat = {
+ apply(iv, IMat.ielem(j))
+ }
+
+ override def apply(i:Int, jv:IMat):CMat = {
+ apply(IMat.ielem(i), jv)
+ }
+
+ def update(iv:IMat, jv:IMat, b:CMat):CMat = {
+ val off = Mat.oneBased
+ val rowinds = DenseMat.getInds(iv, nrows)
+ val colinds = DenseMat.getInds(jv, ncols)
+ if (rowinds.length != b.nrows || colinds.length != b.ncols) {
+ if (b.length == 1) {
+ val b0 = b.data(0)
+ val b1 = b.data(1)
+ var i = 0
+ while (i < b.ncols) {
+ val c = colinds(i) - off
+ var j = 0
+ while (j < b.nrows) {
+ val r = rowinds(j) - off
+ data(2*(r+nrows*c)) = b0
+ data(2*(r+nrows*c)+1) = b1
+ j += 1
+ }
+ i += 1
+ }
+ } else throw new RuntimeException("dims mismatch in assignment")
+ } else {
+ var i = 0
+ while (i < b.ncols) {
+ val c = colinds(i) - off
+ var j = 0
+ while (j < b.nrows) {
+ val r = rowinds(j) - off
+ data(2*(r+nrows*c)) = b.data(2*(j+i*b.nrows))
+ data(2*(r+nrows*c)+1) = b.data(2*(j+i*b.nrows)+1)
+ j += 1
+ }
+ i += 1
+ }
+ }
+ b
+ }
+
+ def update(iv:IMat, j:Int, b:CMat):CMat = {
+ update(iv, IMat.ielem(j), b)
+ }
+
+ def update(i:Int, jv:IMat, b:CMat):CMat = {
+ update(IMat.ielem(i), jv, b)
+ }
+
+ /*
+ * Implement sliced assignment, a(iv,jv) = b:T where iv and jv are vectors, using ? as wildcard
+ */
+
+ def ccMatOp(a:Mat, op2:(Float,Float,Float,Float) => (Float,Float), oldmat:Mat):CMat = {
+ a match {
+ case aa:CMat => {
+ if (nrows==a.nrows && ncols==1) {
+ val out = CMat.newOrCheckCMat(nrows, a.ncols, oldmat)
+ Mat.nflops += aa.length
+ var i = 0
+ while (i < a.ncols) {
+ var j = 0
+ while (j < nrows) {
+ val (v0, v1) = op2(data(2*j), data(2*j), aa.data(2*(j+i*a.nrows)), aa.data(2*(j+i*a.nrows)+1))
+ out.data(2*(j+i*nrows)) = v0
+ out.data(2*(j+i*nrows)+1) = v1
+ j += 1
+ }
+ i += 1
+ }
+ out
+ } else if (ncols==a.ncols && nrows==1) {
+ val out = CMat.newOrCheckCMat(a.nrows, ncols, oldmat)
+ Mat.nflops += aa.length
+ var i = 0
+ while (i < ncols) {
+ var j = 0
+ while (j < a.nrows) {
+ val (v0, v1) = op2(data(2*i), data(2*i+1), aa.data(2*(j+i*a.nrows)), aa.data(2*(j+i*a.nrows)+1))
+ out.data(2*(j+i*a.nrows)) = v0
+ out.data(2*(j+i*a.nrows)+1) = v1
+ j += 1
+ }
+ i += 1
+ }
+ out
+ } else if (nrows==a.nrows && a.ncols==1) {
+ val out = CMat.newOrCheckCMat(nrows, ncols, oldmat)
+ Mat.nflops += length
+ var i = 0
+ while (i < ncols) {
+ var j = 0
+ while (j < nrows) {
+ val (v0, v1) = op2(data(2*(j+i*nrows)), data(2*(j+i*nrows)+1), aa.data(2*j), aa.data(2*j+1))
+ out.data(2*(j+i*nrows)) = v0
+ out.data(2*(j+i*nrows)+1) = v1
+ j += 1
+ }
+ i += 1
+ }
+ out
+ } else if (ncols==a.ncols && a.nrows==1) {
+ val out = CMat.newOrCheckCMat(nrows, ncols, oldmat)
+ Mat.nflops += length
+ var i = 0
+ while (i < ncols) {
+ var j = 0
+ while (j < nrows) {
+ val (v0, v1) = op2(data(2*(j+i*nrows)), data(2*(j+i*nrows)+1), aa.data(2*i), aa.data(2*i+1))
+ out.data(2*(j+i*nrows)) = v0
+ out.data(2*(j+i*nrows)+1) = v1
+ j += 1
+ }
+ i += 1
+ }
+ out
+ } else ccMatOpStrict(a, op2, oldmat)
+ }
+ case _ => throw new RuntimeException("arg must be dense")
+ }
+ }
+ /*
+ * This version applies the operator op2 with stricter dimension checking,
+ * either dims must match or one arg must be scalar
+ */
+ def ccMatOpStrict(a:Mat, op2:(Float,Float,Float,Float) => (Float,Float), oldmat:Mat):CMat =
+ a match {
+ case aa:CMat => {
+ if (nrows==a.nrows && ncols==a.ncols) {
+ val out = CMat.newOrCheckCMat(nrows, ncols, oldmat)
+ Mat.nflops += length
+ var i = 0
+ while (i < aa.length) {
+ val (v0, v1) = op2(data(2*i), data(2*i+1), aa.data(2*i), aa.data(2*i+1))
+ out.data(2*i) = v0
+ out.data(2*i+1) = v1
+ i += 1
+ }
+ out
+ } else if (a.nrows == 1 && a.ncols == 1) {
+ val out = CMat.newOrCheckCMat(nrows, ncols, oldmat)
+ Mat.nflops += length
+ val a0 = aa.data(0)
+ val a1 = aa.data(1)
+ var i = 0
+ while (i < length) {
+ val (v0, v1) = op2(data(2*i), data(2*i+1), a0, a1)
+ out.data(2*i) = v0
+ out.data(2*i+1) = v1
+ i += 1
+ }
+ out
+ } else if (nrows == 1 && ncols == 1) {
+ val out = CMat.newOrCheckCMat(a.nrows, a.ncols, oldmat)
+ Mat.nflops += aa.length
+ val a0 = aa.data(0)
+ val a1 = aa.data(1)
+ var i = 0
+ while (i < aa.length) {
+ val (v0, v1) = op2(a0, a1, aa.data(2*i), aa.data(2*i+1))
+ out.data(2*i) = v0
+ out.data(2*i+1) = v1
+ i += 1
+ }
+ out
+ } else throw new RuntimeException("dims incompatible")
+ }
+ case _ => throw new RuntimeException("arg must be dense")
+ }
+
+ def ccMatOpv(a:Mat, opv:(Array[Float],Int,Int,Array[Float],Int,Int,Array[Float],Int,Int,Int) => Float, oldmat:Mat):CMat = {
+ a match {
+ case aa:CMat => {
+ if (nrows==a.nrows && ncols==1) {
+ val out = CMat.newOrCheckCMat(nrows, a.ncols, oldmat)
+ Mat.nflops += aa.length
+ var i = 0
+ while (i < a.ncols) {
+ opv(data, 0, 1, aa.data, i*a.nrows, 1, out.data, i*nrows, 1, nrows)
+ i += 1
+ }
+ out
+ } else if (ncols==a.ncols && nrows==1) {
+ val out = CMat.newOrCheckCMat(a.nrows, ncols, oldmat)
+ Mat.nflops += aa.length
+ var i = 0
+ while (i < ncols) {
+ opv(data, i, 0, aa.data, i*a.nrows, 1, out.data, i*a.nrows, 1, a.nrows)
+ i += 1
+ }
+ out
+ } else if (nrows==a.nrows && a.ncols==1) {
+ val out = CMat.newOrCheckCMat(nrows, ncols, oldmat)
+ Mat.nflops += length
+ var i = 0
+ while (i < ncols) {
+ opv(data, i*nrows, 1, aa.data, 0, 1, out.data, i*nrows, 1, nrows)
+ i += 1
+ }
+ out
+ } else if (ncols==a.ncols && a.nrows==1) {
+ val out = CMat.newOrCheckCMat(nrows, ncols, oldmat)
+ Mat.nflops += length
+ var i = 0
+ while (i < ncols) {
+ opv(data, i*nrows, 1, aa.data, i, 0, out.data, i*nrows, 1, a.nrows)
+ i += 1
+ }
+ out
+ } else ccMatOpStrictv(a, opv, oldmat)
+ }
+ case _ => throw new RuntimeException("arg must be dense")
+ }
+ }
+
+ def ccMatOpStrictv(a:Mat, opv:(Array[Float],Int,Int,Array[Float],Int,Int,Array[Float],Int,Int,Int) => Float, oldmat:Mat):CMat =
+ a match {
+ case aa:CMat => {
+ if (nrows==a.nrows && ncols==a.ncols) {
+ val out = CMat.newOrCheckCMat(nrows, ncols, oldmat)
+ Mat.nflops += length
+ opv(data, 0, 1, aa.data, 0, 1, out.data, 0, 1, aa.length)
+ out
+ } else if (a.nrows == 1 && a.ncols == 1) {
+ val out = CMat.newOrCheckCMat(nrows, ncols, oldmat)
+ Mat.nflops += length
+ opv(data, 0, 1, aa.data, 0, 0, out.data, 0, 1, length)
+ out
+ } else if (nrows == 1 && ncols == 1) {
+ val out = CMat.newOrCheckCMat(a.nrows, a.ncols, oldmat)
+ Mat.nflops += aa.length
+ opv(data, 0, 0, aa.data, 0, 1, out.data, 0, 1, aa.length)
+ out
+ } else throw new RuntimeException("dims incompatible")
+ }
+ case _ => throw new RuntimeException("arg must be dense")
+ }
+
+ def ccMatOpScalarv(a0:Float, a1:Float, opv:(Array[Float],Int,Int,Array[Float],Int,Int,Array[Float],Int,Int,Int) => Float, omat:Mat):CMat = {
+ val out = CMat.newOrCheckCMat(nrows, ncols, omat)
+ Mat.nflops += length
+ val aa = new Array[Float](2)
+ aa(0) = a0
+ aa(1) = a1
+ opv(data, 0, 1, aa, 0, 0, out.data, 0, 1, length)
+ out
+ }
+
+ def ffReduceOp(n:Int, f1:(Float) => Float, f2:(Float, Float) => Float, out:Mat) =
+ CMat(ggReduceOp(n, f1, f2, out))
+
+ def ffReduceOpv(n:Int, f:(Array[Float],Int,Int,Array[Float],Int,Int,Array[Float],Int,Int,Int) => Float, out:Mat) =
+ CMat(ggReduceOpv(n, f, out))
+
+ def ccReduceOpv(dim0:Int, opv:(Array[Float],Int,Int,Array[Float],Int,Int,Array[Float],Int,Int,Int) => Float, oldmat:Mat):CMat = {
+ var dim = if (nrows == 1 && dim0 == 0) 2 else math.max(1, dim0)
+ if (dim == 1) {
+ val out = CMat.newOrCheckCMat(1, ncols, oldmat)
+ Mat.nflops += length
+ var i = 0
+ while (i < ncols) {
+ out.data(i) = data(i*nrows)
+ opv(data, i*nrows+1, 1, out.data, i, 0, out.data, i, 0, nrows-1)
+ i += 1
+ }
+ out
+ } else if (dim == 2) {
+ val out = CMat.newOrCheckCMat(nrows, 1, oldmat)
+ Mat.nflops += length
+ var j = 0
+ while (j < 2*nrows) {
+ out.data(j) = data(j)
+ j += 1
+ }
+ var i = 1
+ while (i < ncols) {
+ opv(data, i*nrows, 1, out.data, 0, 1, out.data, 0, 1, nrows)
+ i += 1
+ }
+ out
+ } else
+ throw new RuntimeException("index must 1 or 2");
+ }
+
+ def ffReduceAll(n:Int, f1:(Float) => Float, f2:(Float, Float) => Float, out:Mat) =
+ CMat(ggReduceAll(n, f1, f2, out))
+
+ def ffReduceAllv(n:Int, f:(Array[Float],Int,Int,Array[Float],Int,Int,Array[Float],Int,Int,Int) => Float, out:Mat) =
+ CMat(ggReduceAllv(n, f, out))
+
+ override def printOne(i:Int):String = {
+ val u = data(2*i)
+ val v = data(2*i+1)
+ val s0 = if (u % 1 == 0 && math.abs(u) < 1e10) {
+ "%d" format u.intValue
+ } else {
+ "%.5g" format u
+ }
+ if (v == 0) {
+ s0
+ } else {
+ val s1 = if (v % 1 == 0 && math.abs(v) < 1e10) {
+ "%d" format v.intValue
+ } else {
+ "%.5g" format v
+ }
+ if (u == 0) {
+ s1+"i"
+ } else if (v > 0) {
+ s0+"+"+s1+"i"
+ } else {
+ s0+s1+"i"
+ }
+ }
+ }
+
+ override def copyTo(out:Mat) = {
+ out match {
+ case cout:CMat => System.arraycopy(data, 0, cout.data, 0, 2*length)
+ }
+ out
+ }
+
+ override def copy = {
+ val out = CMat(nrows, ncols)
+ System.arraycopy(data, 0, out.data, 0, 2*length)
+ out
+ }
+
+ override def zeros(nr:Int, nc:Int) = {
+ CMat(nr, nc)
+ }
+
+ override def ones(nr:Int, nc:Int) = {
+ val out = CMat(nr, nc)
+ var i = 0
+ while (i < out.length) {
+ out(2*i) = 1
+ i += 1
+ }
+ out
+ }
+
+ def fDMult(aa:CMat, outmat:Mat):CMat = {
+ if (ncols == aa.nrows) {
+ val out = CMat.newOrCheckCMat(nrows, aa.ncols, outmat)
+ Mat.nflops += 2L * length * aa.ncols
+ if (Mat.noMKL) {
+ if (outmat.asInstanceOf[AnyRef] != null) out.clear
+ var i = 0
+ while (i < aa.ncols) {
+ var j = 0
+ while (j < aa.nrows) {
+ var k = 0
+ val u0 = aa.data(2*(j + i*ncols))
+ val u1 = aa.data(2*(j + i*ncols)+1)
+ while (k < nrows) {
+ val v0 = data(2*(k+j*nrows))
+ val v1 = data(2*(k+j*nrows)+1)
+ out.data(2*(k+i*nrows)) += u0*v0-u1*v1
+ out.data(2*(k+i*nrows)+1) += u1*v0+u0*v1
+ k += 1
+ }
+ j += 1
+ }
+ i += 1
+ }
+ } else {
+ val alpha = List(1.0f,0f).toArray
+ val beta = List(0f,0f).toArray
+ if (nrows == 1) {
+ cgemv(ORDER.ColMajor, TRANSPOSE.Trans, aa.nrows, aa.ncols, alpha, aa.data, aa.nrows, data, 1, beta, out.data, 1)
+ } else if (aa.ncols == 1) {
+ cgemv(ORDER.ColMajor, TRANSPOSE.NoTrans, nrows, ncols, alpha, data, nrows, aa.data, 1, beta, out.data, 1)
+ } else {
+ cgemm(ORDER.ColMajor, TRANSPOSE.NoTrans, TRANSPOSE.NoTrans,
+ nrows, aa.ncols, ncols, alpha, data, nrows, aa.data, aa.nrows, beta, out.data, nrows)
+ }
+ }
+ out
+ } else if (ncols == 1 && nrows == 1){
+ val out = CMat.newOrCheckCMat(aa.nrows, aa.ncols, outmat)
+ Mat.nflops += aa.length
+ var i = 0
+ val u0 = data(0)
+ val u1 = data(1)
+ while (i < aa.length) {
+ val v0 = aa.data(2*i)
+ val v1 = aa.data(2*i+1)
+ out.data(2*i) = u0*v0-u1*v1
+ out.data(2*i+1) = u0*v1+u1*v0
+ i += 1
+ }
+ out
+ } else if (aa.ncols == 1 && aa.nrows == 1){
+ val out = CMat.newOrCheckCMat(nrows, ncols, outmat)
+ Mat.nflops += length
+ var i = 0
+ val u0 = aa.data(0)
+ val u1 = aa.data(1)
+ while (i < length) {
+ val v0 = data(2*i)
+ val v1 = data(2*i+1)
+ out.data(2*i) = u0*v0-u1*v1
+ out.data(2*i+1) = u0*v1+u1*v0
+ i += 1
+ }
+ out
+ } else throw new RuntimeException("dimensions mismatch")
+ }
+
+
+ def dot (b : CMat):CMat =
+ if (math.min(nrows, ncols) != 1 || math.min(b.nrows,b.ncols) != 1 || length != b.length) {
+ throw new RuntimeException("vector dims not compatible")
+ } else {
+ Mat.nflops += 2 * length
+ var w0 = 0.0
+ var w1 = 0.0
+ var i = 0
+ while (i < length){
+ val u0 = data(2*i)
+ val u1 = data(2*i+1)
+ val v0 = b.data(2*i)
+ val v1 = b.data(2*i+1)
+ w0 += u0*v0-u1*v1
+ w1 += u0*v1+u1*v0
+ i += 1
+ }
+ CMat.celem(w0.asInstanceOf[Float], w1.asInstanceOf[Float])
+ }
+
+ def solvel(a0:Mat):CMat =
+ a0 match {
+ case a:CMat => {
+ Mat.nflops += 2L*a.nrows*a.nrows*a.nrows/3 + 2L*nrows*a.nrows*a.nrows
+ if (a.nrows != a.ncols || ncols != a.nrows) {
+ throw new RuntimeException("solve needs a square matrix")
+ } else {
+ val out = CMat(nrows, ncols)
+ val tmp = new Array[Float](2*length)
+ System.arraycopy(a.data, 0, tmp, 0, 2*a.length)
+ System.arraycopy(data, 0, out.data, 0, 2*length)
+ val ipiv = new Array[Int](ncols)
+ cgetrf(ORDER.RowMajor, ncols, ncols, tmp, ncols, ipiv)
+ cgetrs(ORDER.RowMajor, "N", ncols, nrows, tmp, ncols, ipiv, out.data, nrows)
+ out
+ }
+ }
+ case _ => throw new RuntimeException("unsupported arg to / "+a0)
+ }
+
+ def solver(a0:Mat):CMat =
+ a0 match {
+ case a:CMat => {
+ Mat.nflops += 2L*nrows*nrows*nrows/3 + 2L*nrows*nrows*a.ncols
+ if (nrows != ncols || ncols != a.nrows) {
+ throw new RuntimeException("solve needs a square matrix")
+ } else {
+ val out = CMat(a.nrows, a.ncols)
+ val tmp = new Array[Float](2*length)
+ System.arraycopy(data, 0, tmp, 0, 2*length)
+ System.arraycopy(a.data, 0, out.data, 0, 2*a.length)
+ val ipiv = new Array[Int](ncols)
+ cgetrf(ORDER.ColMajor, ncols, ncols, tmp, ncols, ipiv)
+ cgetrs(ORDER.ColMajor, "N", ncols, a.ncols, tmp, nrows, ipiv, out.data, nrows)
+ out
+ }
+ }
+ case _ => throw new RuntimeException("unsupported arg to \\ "+a0)
+ }
+
+ def inv:CMat = {
+ import edu.berkeley.bid.LAPACK._
+ if (nrows != ncols) {
+ throw new RuntimeException("inv method needs a square matrix")
+ } else {
+ val out = CMat(nrows, ncols)
+ System.arraycopy(data, 0, out.data, 0, length)
+ val ipiv = new Array[Int](nrows)
+ cgetrf(ORDER.ColMajor, nrows, ncols, out.data, nrows, ipiv)
+ cgetri(ORDER.ColMajor, nrows, out.data, nrows, ipiv)
+ out
+ }
+ }
+
+ override def clear = {
+ Arrays.fill(this.data,0,2*length,0)
+ this
+ }
+
+ override def clearUpper(off:Int) = {
+ if (nrows != ncols) {
+ throw new RuntimeException("clearUpper assumes a square matrix")
+ } else {
+ var i = 1
+ while (i < ncols) {
+ var j = 0
+ while (j < i+off) {
+ data(2*(j + i*nrows)) = 0
+ data(2*(j + i*nrows)+1) = 0
+ j += 1
+ }
+ i += 1
+ }
+ this
+ }
+ }
+ override def clearUpper = clearUpper(0)
+
+ override def clearLower(off:Int):CMat = {
+ if (nrows != ncols) {
+ throw new RuntimeException("clearLower assumes a square matrix")
+ } else {
+ var i = 0
+ while (i < ncols-1) {
+ var j = i+1+off
+ while (j < nrows) {
+ data(2*(j + i*nrows)) = 0
+ data(2*(j + i*nrows)+1) = 0
+ j += 1
+ }
+ i += 1
+ }
+ }
+ this
+ }
+
+ override def clearLower:CMat = clearLower(0)
+
+ override def mkdiag = {
+ if (math.min(nrows, ncols) > 1) {
+ throw new RuntimeException("mkdiag needs a vector input")
+ }
+ val n = math.max(nrows, ncols)
+ val out = CMat(n,n)
+ var i = 0
+ while (i < n) {
+ out.data(2*i*(n+1)) = data(2*i)
+ out.data(2*i*(n+1)+1) = data(2*i+1)
+ i += 1
+ }
+ out
+ }
+
+ override def getdiag = {
+ val n = math.min(nrows, ncols)
+ val out = CMat(n,1)
+ var i = 0
+ while (i < n) {
+ out.data(2*i) = data(2*i*(nrows+1))
+ out.data(2*i+1) = data(2*i*(nrows+1)+1)
+ i += 1
+ }
+ out
+ }
+
+ def * (b : CMat) = fDMult(b, null)
+ def + (b : CMat) = ccMatOpv(b, CMat.vecAdd _, null)
+ def - (b : CMat) = ccMatOpv(b, CMat.vecSub _, null)
+ def *@ (b : CMat) = ccMatOpv(b, CMat.vecMul _, null)
+ def /@ (b : CMat) = ccMatOpv(b, CMat.vecDiv _, null)
+ def / (b : CMat) = solvel(b)
+ def \\ (b : CMat) = solver(b)
+
+ def == (b : CMat) = ccMatOp(b, (ar:Float, ai:Float, br:Float, bi:Float) => if (ar == br && ai == bi) (1f, 0f) else (0f, 0f), null)
+ def != (b : CMat) = ccMatOp(b, (ar:Float, ai:Float, br:Float, bi:Float) => if (ar != br || ai != bi) (1f, 0f) else (0f, 0f), null)
+
+ override def * (b : Float) = ccMatOpScalarv(b, 0, CMat.vecMul _, null)
+ override def + (b : Float) = ccMatOpScalarv(b, 0, CMat.vecAdd _, null)
+ override def - (b : Float) = ccMatOpScalarv(b, 0, CMat.vecSub _, null)
+ override def *@ (b : Float) = ccMatOpScalarv(b, 0, CMat.vecMul _, null)
+ override def /@ (b : Float) = ccMatOpScalarv(b, 0, CMat.vecDiv _, null)
+
+ override def == (b : Float) = ccMatOp(CMat.celem(b, 0), (ar:Float, ai:Float, br:Float, bi:Float) => if (ar == br && ai == bi) (1f, 0f) else (0f, 0f), null)
+ override def != (b : Float) = ccMatOp(CMat.celem(b, 0), (ar:Float, ai:Float, br:Float, bi:Float) => if (ar != br || ai != bi) (1f, 0f) else (0f, 0f), null)
+
+ override def * (b : Double) = ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecMul _, null)
+ override def + (b : Double) = ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecAdd _, null)
+ override def - (b : Double) = ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecSub _, null)
+ override def *@ (b : Double) = ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecMul _, null)
+ override def /@ (b : Double) = ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecDiv _, null)
+
+ override def == (b : Double) = ccMatOp(CMat.celem(b.asInstanceOf[Float], 0), (ar:Float, ai:Float, br:Float, bi:Float) => if (ar == br && ai == bi) (1f, 0f) else (0f, 0f), null)
+ override def != (b : Double) = ccMatOp(CMat.celem(b.asInstanceOf[Float], 0), (ar:Float, ai:Float, br:Float, bi:Float) => if (ar != br || ai != bi) (1f, 0f) else (0f, 0f), null)
+
+ override def * (b : Int) = ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecMul _, null)
+ override def + (b : Int) = ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecAdd _, null)
+ override def - (b : Int) = ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecSub _, null)
+ override def *@ (b : Int) = ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecMul _, null)
+ override def /@ (b : Int) = ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecDiv _, null)
+
+ override def == (b : Int) = ccMatOp(CMat.celem(b.asInstanceOf[Float], 0), (ar:Float, ai:Float, br:Float, bi:Float) => if (ar == br && ai == bi) (1f, 0f) else (0f, 0f), null)
+ override def != (b : Int) = ccMatOp(CMat.celem(b.asInstanceOf[Float], 0), (ar:Float, ai:Float, br:Float, bi:Float) => if (ar != br || ai != bi) (1f, 0f) else (0f, 0f), null)
+
+ def \ (b: CMat) = horzcat(b)
+ def on (b: CMat) = vertcat(b)
+
+ def ~ (b : CMat):CPair = new CPair(this, b)
+
+ override def ~ (b: Mat):Pair =
+ b match {
+ case db:CMat => new CPair(this, db)
+ case _ => throw new RuntimeException("mismatched types for operator ~")
+ }
+
+ /*
+ * Operators whose second arg is generic.
+ */
+ import Operator._
+ override def + (b : Mat):Mat = applyMat(this, b, null, Mop_Plus)
+ override def - (b : Mat):Mat = applyMat(this, b, null, Mop_Minus)
+ override def * (b : Mat):Mat = applyMat(this, b, null, Mop_Times)
+ override def / (b : Mat):Mat = applyMat(this, b, null, Mop_Div)
+ override def \\ (b : Mat):Mat = applyMat(this, b, null, Mop_RSolve)
+ override def *@ (b : Mat):Mat = applyMat(this, b, null, Mop_ETimes)
+ override def /@ (b : Mat):Mat = applyMat(this, b, null, Mop_EDiv)
+ override def \ (b : Mat):Mat = applyMat(this, b, null, Mop_HCat)
+ override def on (b : Mat):Mat = applyMat(this, b, null, Mop_VCat)
+
+ override def == (b : Mat):Mat = applyMat(this, b, null, Mop_EQ)
+ override def != (b : Mat):Mat = applyMat(this, b, null, Mop_NE)
+
+ override def recycle(nr:Int, nc:Int, nnz:Int):CMat = {
+ if (nrows == nr && nc == ncols) {
+ this
+ } else if (data.size >= 2*nr*nc) {
+ new CMat(nr, nc, data)
+ } else {
+ CMat(nr, nc)
+ }
+ }
+}
+
+class CPair (val omat:Mat, val mat:CMat) extends Pair {
+
+ override def t:CMat = CMat(mat.gt(omat))
+
+ def * (b : CMat) = mat.fDMult(b, omat)
+ def + (b : CMat) = mat.ccMatOpv(b, CMat.vecAdd _, omat)
+ def - (b : CMat) = mat.ccMatOpv(b, CMat.vecSub _, omat)
+ def *@ (b : CMat) = mat.ccMatOpv(b, CMat.vecMul _, omat)
+ def /@ (b : CMat) = mat.ccMatOpv(b, CMat.vecDiv _, omat)
+// override def ^ (b : Mat) = mat.ccMatOp(b, (x:Float, y:Float) => math.pow(x,y).toFloat, null)
+
+ def == (b : CMat) = mat.ccMatOp(b, (ar:Float, ai:Float, br:Float, bi:Float) => if (ar == br && ai == bi) (1f, 0f) else (0f, 0f), omat)
+ def != (b : CMat) = mat.ccMatOp(b, (ar:Float, ai:Float, br:Float, bi:Float) => if (ar != br || ai != bi) (1f, 0f) else (0f, 0f), omat)
+
+ override def * (b : Float) = mat.ccMatOpScalarv(b, 0, CMat.vecMul _, omat)
+ override def + (b : Float) = mat.ccMatOpScalarv(b, 0, CMat.vecAdd _, omat)
+ override def - (b : Float) = mat.ccMatOpScalarv(b, 0, CMat.vecSub _, omat)
+ override def *@ (b : Float) = mat.ccMatOpScalarv(b, 0, CMat.vecMul _, omat)
+ override def /@ (b : Float) = mat.ccMatOpScalarv(b, 0, CMat.vecDiv _, omat)
+
+
+ override def == (b : Float) = mat.ccMatOp(CMat.celem(b, 0), (ar:Float, ai:Float, br:Float, bi:Float) => if (ar == br && ai == bi) (1f, 0f) else (0f, 0f), omat)
+ override def != (b : Float) = mat.ccMatOp(CMat.celem(b, 0), (ar:Float, ai:Float, br:Float, bi:Float) => if (ar != br || ai != bi) (1f, 0f) else (0f, 0f), omat)
+
+ override def * (b : Double) = mat.ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecMul _, omat)
+ override def + (b : Double) = mat.ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecAdd _, omat)
+ override def - (b : Double) = mat.ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecSub _, omat)
+ override def *@ (b : Double) = mat.ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecMul _, omat)
+ override def /@ (b : Double) = mat.ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecDiv _, omat)
+
+ override def == (b : Double) = mat.ccMatOp(CMat.celem(b.asInstanceOf[Float], 0), (ar:Float, ai:Float, br:Float, bi:Float) => if (ar == br && ai == bi) (1f, 0f) else (0f, 0f), omat)
+ override def != (b : Double) = mat.ccMatOp(CMat.celem(b.asInstanceOf[Float], 0), (ar:Float, ai:Float, br:Float, bi:Float) => if (ar != br || ai != bi) (1f, 0f) else (0f, 0f), omat)
+
+ override def * (b : Int) = mat.ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecMul _, omat)
+ override def + (b : Int) = mat.ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecAdd _, omat)
+ override def - (b : Int) = mat.ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecSub _, omat)
+ override def *@ (b : Int) = mat.ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecMul _, omat)
+ override def /@ (b : Int) = mat.ccMatOpScalarv(b.asInstanceOf[Float], 0, CMat.vecDiv _, omat)
+
+ override def == (b : Int) = mat.ccMatOp(CMat.celem(b.asInstanceOf[Float], 0), (ar:Float, ai:Float, br:Float, bi:Float) => if (ar == br && ai == bi) (1f, 0f) else (0f, 0f), omat)
+ override def != (b : Int) = mat.ccMatOp(CMat.celem(b.asInstanceOf[Float], 0), (ar:Float, ai:Float, br:Float, bi:Float) => if (ar != br || ai != bi) (1f, 0f) else (0f, 0f), omat)
+
+}
+
+object CMat {
+
+ def apply(nr:Int, nc:Int) = new CMat(nr, nc, new Array[Float](2*nr*nc))
+
+ def real(a:FMat):CMat = {
+ val out = CMat(a.nrows, a.ncols)
+ var i = 0
+ while (i < a.length) {
+ out.data(2*i) = a.data(i)
+ i += 1
+ }
+ out
+ }
+
+ def imag(a:FMat):CMat = {
+ val out = CMat(a.nrows, a.ncols)
+ var i = 0
+ while (i < a.length) {
+ out.data(2*i+1) = a.data(i)
+ i += 1
+ }
+ out
+ }
+
+ def apply(x:Mat):CMat = {
+ x match {
+ case dd:DMat => real(FMat(dd))
+ case cc:CMat => {val out = CMat(x.nrows, x.ncols); System.arraycopy(cc.data, 0, out.data, 0, 2*cc.length); out}
+ case ii:IMat => real(FMat(ii))
+ case ff:FMat => real(ff)
+// case xx:DenseMat[Float] => new CMat(xx.nrows, xx.ncols, xx.data)
+ case _ => throw new RuntimeException("Unsupported source type")
+ }
+ }
+
+ def celem(x:Float, y:Float) = {
+ val out = CMat(1,1)
+ out.data(0) = x
+ out.data(1) = y
+ out
+ }
+
+ def vecAdd(a:Array[Float], a0:Int, ainc:Int, b:Array[Float], b0:Int, binc:Int, c:Array[Float], c0:Int, cinc:Int, n:Int):Float = {
+ var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n
+ while (ci < cend) {
+ c(2*ci) = a(2*ai) + b(2*bi)
+ c(2*ci+1) = a(2*ai+1) + b(2*bi+1)
+ ai += ainc; bi += binc; ci += cinc
+ }
+ 0
+ }
+
+ def vecSub(a:Array[Float], a0:Int, ainc:Int, b:Array[Float], b0:Int, binc:Int, c:Array[Float], c0:Int, cinc:Int, n:Int):Float = {
+ var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n
+ while (ci < cend) {
+ c(2*ci) = a(2*ai) - b(2*bi)
+ c(2*ci+1) = a(2*ai+1) - b(2*bi+1)
+ ai += ainc; bi += binc; ci += cinc
+ }
+ 0
+ }
+
+ def vecMul(a:Array[Float], a0:Int, ainc:Int, b:Array[Float], b0:Int, binc:Int, c:Array[Float], c0:Int, cinc:Int, n:Int):Float = {
+ var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n
+ while (ci < cend) {
+ val u0 = a(2*ai)
+ val u1 = a(2*ai+1)
+ val v0 = b(2*ai)
+ val v1 = b(2*ai+1)
+ c(2*ci) = u0*v0-u1*v1
+ c(2*ci+1) = u0*v1+v0*u1
+ ai += ainc; bi += binc; ci += cinc
+ }
+ 0
+ }
+
+ def vecDiv(a:Array[Float], a0:Int, ainc:Int, b:Array[Float], b0:Int, binc:Int, c:Array[Float], c0:Int, cinc:Int, n:Int):Float = {
+ var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n
+ while (ci < cend) {
+ val u0 = a(2*ai)
+ val u1 = a(2*ai+1)
+ val v0 = b(2*ai)
+ val v1 = b(2*ai+1)
+ val denom = v0*v0 + v1*v1
+ c(2*ci) = (u0*v0+u1*v1)/denom
+ c(2*ci+1) = (u1*v0-v1*u0)/denom
+ ai += ainc; bi += binc; ci += cinc
+ }
+ 0
+ }
+
+ def newOrCheckCMat(nr:Int, nc:Int, outmat:Mat):CMat = {
+ if (outmat.asInstanceOf[AnyRef] == null || (outmat.nrows == 0 && outmat.ncols == 0)) {
+ CMat(nr, nc)
+ } else {
+ if (outmat.nrows != nr || outmat.ncols != nc) {
+ outmat.recycle(nr, nc, 0).asInstanceOf[CMat]
+ } else {
+ outmat.asInstanceOf[CMat]
+ }
+ }
+ }
+}
+
+
+
+
+
+
diff --git a/src/main/scala/BIDMat/CSMat.scala b/src/main/scala/BIDMat/CSMat.scala
new file mode 100755
index 00000000..b1c0a1b8
--- /dev/null
+++ b/src/main/scala/BIDMat/CSMat.scala
@@ -0,0 +1,165 @@
+package BIDMat
+import Mat._
+
+case class CSMat(override val nrows:Int, override val ncols:Int, override val data:Array[String]) extends DenseMat[String](nrows, ncols, data) {
+
+ def size() = length;
+
+ override def t:CSMat = CSMat(gt(null))
+
+ override def mytype = "CSMat"
+
+ def horzcat(b: CSMat) = CSMat(ghorzcat(b))
+
+ def vertcat(b: CSMat) = CSMat(gvertcat(b))
+
+ def find3:(IMat, IMat, CSMat) = { val vv = gfind3 ; (IMat(vv._1), IMat(vv._2), CSMat(vv._3)) }
+
+ override def apply(a:IMat):CSMat = CSMat(gapply(a))
+
+ override def apply(a:IMat, b:IMat):CSMat = CSMat(gapply(a, b))
+
+ override def apply(a:Int, b:IMat):CSMat = CSMat(gapply(a, b))
+
+ override def apply(a:IMat, b:Int):CSMat = CSMat(gapply(a, b))
+
+ def ccMatOp(b: CSMat, f:(String, String) => String, old:CSMat) = CSMat(ggMatOp(b, f, old))
+
+ def ccMatOpScalar(b: String, f:(String, String) => String, old:CSMat) = CSMat(ggMatOpScalar(b, f, old))
+
+ def ccReduceOp(n:Int, f1:(String) => String, f2:(String, String) => String, old:CSMat) = CSMat(ggReduceOp(n, f1, f2, old))
+
+ override def printOne(i:Int):String = {
+ val v = data(i)
+ if (v != null)
+ v.toString()
+ else
+ "NULL"
+ }
+
+ /*
+ * Trait to implement binary operations on dense matrices
+ */
+ trait DCSMatOp {
+ @inline def op1(x:String):String = x;
+ def op2(x:String, y:String):String;
+
+ def dCSMatOp(a:CSMat):CSMat =
+ if (nrows==a.nrows && ncols==1) {
+ val out = CSMat(nrows, a.ncols)
+ for (i <- 0 until a.ncols) {
+ for (j <- 0 until nrows) {
+ out.data(j+i*nrows) = op2(data(j), a.data(j+i*a.nrows))
+ }
+ }
+ out
+ } else if (ncols==a.ncols && nrows==1) {
+ val out = CSMat(a.nrows, ncols)
+ for (i <- 0 until ncols) {
+ for (j <- 0 until a.nrows) {
+ out.data(j+i*a.nrows) = op2(data(i), a.data(j+i*a.nrows))
+ }
+ }
+ out
+ } else if (nrows==a.nrows && a.ncols==1) {
+ val out = CSMat(nrows, ncols)
+ for (i <- 0 until ncols) {
+ for (j <- 0 until nrows) {
+ out.data(j+i*nrows) = op2(data(j+i*nrows), a.data(j))
+ }
+ }
+ out
+ } else if (ncols==a.ncols && a.nrows==1) {
+ val out = CSMat(nrows, ncols)
+ for (i <- 0 until ncols) {
+ for (j <- 0 until nrows) {
+ out.data(j+i*nrows) = op2(data(j+i*nrows), a.data(i))
+ }
+ }
+ out
+ } else dCSMatOpStrict(a)
+
+ def dCSMatOpStrict(a:CSMat):CSMat =
+ if (nrows==a.nrows && ncols==a.ncols) {
+ val out = CSMat(nrows, ncols)
+ var i = 0
+ while (i < a.length) {
+ out.data(i) = op2(data(i), a.data(i))
+ i += 1
+ }
+ out
+ } else if (a.nrows == 1 && a.ncols == 1) {
+ val out = CSMat(nrows, ncols)
+ val aval = a.data(0)
+ for (i <- 0 until length) {
+ out.data(i) = op2(data(i), aval)
+ }
+ out
+ } else if (nrows == 1 && ncols == 1) {
+ val out = CSMat(a.nrows, a.ncols)
+ val aval = data(0)
+ for (i <- 0 until a.length) {
+ out.data(i) = op2(aval, a.data(i))
+ }
+ out
+ } else throw new RuntimeException("dims incompatible")
+
+ def dCSMatReduceOp(dim:Int):CSMat =
+ if (dim == 1) {
+ val out = CSMat(1, ncols)
+ for (i <- 0 until ncols) {
+ var j = 1
+ var acc = op1(data(i*nrows))
+ while (j < nrows) {
+ acc = op2(acc, data(j+i*nrows))
+ j += 1
+ }
+ out.data(i) = acc
+ }
+ out
+ } else if (dim == 2) {
+ val out = CSMat(nrows, 1)
+ var j = 0
+ while (j < nrows) {
+ out.data(j) = op1(data(j))
+ j += 1
+ }
+ for (i <- 1 until ncols) {
+ var j = 0
+ while (j < nrows) {
+ out.data(j) = op2(out.data(j), data(j+i*nrows))
+ j += 1
+ }
+ }
+ out
+ } else
+ throw new RuntimeException("index must 1 or 2")
+ }
+
+ def + (b : CSMat) = ccMatOp(b, (x:String, y:String) => x + y, null)
+
+ def \ (b: CSMat) = horzcat(b)
+ def \ (b: String) = horzcat(CSMat.cselem(b))
+ def on (b: CSMat) = vertcat(b)
+ def on (b: String) = vertcat(CSMat.cselem(b))
+}
+
+object CSMat {
+
+ def apply(nr:Int, nc:Int):CSMat = new CSMat(nr, nc, new Array[String](nr*nc))
+
+ def apply(a:DenseMat[String]):CSMat = new CSMat(a.nrows, a.ncols, a.data)
+
+ def cselem(x:String) = {
+ val out = CSMat(1,1)
+ out.data(0) = x
+ out
+ }
+
+}
+
+
+
+
+
+
diff --git a/src/main/scala/BIDMat/Copyright.txt b/src/main/scala/BIDMat/Copyright.txt
new file mode 100755
index 00000000..21326596
--- /dev/null
+++ b/src/main/scala/BIDMat/Copyright.txt
@@ -0,0 +1,25 @@
+Copyright (c) 2012, Regents of the University of California
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/src/main/scala/BIDMat/DMat.scala b/src/main/scala/BIDMat/DMat.scala
new file mode 100755
index 00000000..00928b4d
--- /dev/null
+++ b/src/main/scala/BIDMat/DMat.scala
@@ -0,0 +1,668 @@
+package BIDMat
+
+import edu.berkeley.bid.CBLAS._
+import edu.berkeley.bid.LAPACK._
+import edu.berkeley.bid.SPBLAS._
+import java.util.Arrays
+
+case class DMat(nr:Int, nc:Int, data0:Array[Double]) extends DenseMat[Double](nr, nc, data0) {
+
+ def size() = length;
+
+ def getdata() = data
+
+ override def set(v:Float):DMat = {
+ Arrays.fill(data,0,length,v)
+ this
+ }
+
+ override def t:DMat = if (Mat.noMKL) {
+ DMat(gt(null))
+ } else {
+ val out = DMat(ncols, nrows)
+ domatcopy("C", "T", nrows, ncols, 1.0, data, nrows, out.data, ncols)
+ out
+ }
+
+ override def dv:Double =
+ if (nrows > 1 || ncols > 1) {
+ throw new RuntimeException("Matrix should be 1x1 to extract value")
+ } else {
+ data(0)
+ }
+
+ override def mytype = "DMat"
+
+ def horzcat(b: DMat) = DMat(ghorzcat(b))
+
+ def vertcat(b: DMat) = DMat(gvertcat(b))
+
+ def find3:(IMat, IMat, DMat) = { val (ii, jj, vv) = gfind3 ; (ii, jj, DMat(vv)) }
+
+ override def apply(a:IMat):DMat = DMat(gapply(a))
+
+ override def apply(a:IMat, b:IMat):DMat = DMat(gapply(a, b))
+
+ override def apply(a:IMat, b:Int):DMat = DMat(gapply(a, b))
+
+ override def apply(a:Int, b:IMat):DMat = DMat(gapply(a, b))
+
+ def update(iv:IMat, jv:IMat, b:DMat):DMat = DMat(_update(iv, jv, b))
+
+ def update(iv:IMat, j:Int, b:DMat):DMat = DMat(_update(iv, IMat.ielem(j), b))
+
+ def update(i:Int, jv:IMat, b:DMat):DMat = DMat(_update(IMat.ielem(i), jv, b))
+
+ def ddMatOp(b: Mat, f:(Double, Double) => Double, out:Mat) =
+ b match {
+ case bb:DMat => DMat(ggMatOp(bb, f, out))
+ case _ => throw new RuntimeException("unsupported operation "+f+" on "+this+" and "+b)
+ }
+
+ def ddMatOpv(b: Mat, f:(Array[Double],Int,Int,Array[Double],Int,Int,Array[Double],Int,Int,Int) => Double, out:Mat) =
+ b match {
+ case bb:DMat => DMat(ggMatOpv(bb, f, out))
+ case _ => throw new RuntimeException("unsupported operation "+f+" on "+this+" and "+b)
+ }
+
+ def ddMatOpScalar(b: Double, f:(Double, Double) => Double, out:Mat) = DMat(ggMatOpScalar(b, f, out))
+
+ def ddMatOpScalarv(b: Double, f:(Array[Double],Int,Int,Array[Double],Int,Int,Array[Double],Int,Int,Int) => Double, out:Mat) =
+ DMat(ggMatOpScalarv(b, f, out))
+
+ def ddReduceOp(n:Int, f1:(Double) => Double, f2:(Double, Double) => Double, out:Mat) = DMat(ggReduceOp(n, f1, f2, out))
+
+ def ddReduceOpv(n:Int, f:(Array[Double],Int,Int,Array[Double],Int,Int,Array[Double],Int,Int,Int) => Double, out:Mat) =
+ DMat(ggReduceOpv(n, f, out))
+
+ def ddReduceAll(n:Int, f1:(Double) => Double, f2:(Double, Double) => Double, out:Mat) =
+ DMat(ggReduceAll(n, f1, f2, out))
+
+ def ddReduceAllv(n:Int, f:(Array[Double],Int,Int,Array[Double],Int,Int,Array[Double],Int,Int,Int) => Double, out:Mat) =
+ DMat(ggReduceAllv(n, f, out))
+
+ override def printOne(i:Int):String = {
+ val v = data(i)
+ if (v % 1 == 0 && math.abs(v) < 1e10) {
+ "%d" format v.intValue
+ } else {
+ "%.5g" format v
+ }
+ }
+
+ override def copyTo(a:Mat) = {
+ a match {
+ case out:DMat => System.arraycopy(data, 0, out.data, 0, length)
+ }
+ a
+ }
+
+ override def copy = {
+ val out = DMat(nrows, ncols)
+ System.arraycopy(data, 0, out.data, 0, length)
+ out
+ }
+
+ override def zeros(nr:Int, nc:Int) = {
+ DMat(nr, nc)
+ }
+
+ override def ones(nr:Int, nc:Int) = {
+ val out = DMat(nr, nc)
+ var i = 0
+ while (i < out.length) {
+ out(i) = 1
+ i += 1
+ }
+ out
+ }
+
+ override def clearUpper(off:Int) = setUpper(0, off)
+ override def clearUpper = setUpper(0, 0)
+
+ override def clearLower(off:Int) = setLower(0, off)
+ override def clearLower = setLower(0, 0)
+
+
+ def fDMult(aa:DMat, outmat:Mat):DMat = {
+ if (ncols == aa.nrows) {
+ val out = DMat.newOrCheckDMat(nrows, aa.ncols, outmat)
+ Mat.nflops += 2 * length.toLong * aa.ncols.toLong
+ if (Mat.noMKL) {
+ out.clear
+ var i = 0
+ while (i < aa.ncols) {
+ var j = 0
+ while (j < aa.nrows) {
+ var k = 0
+ val dval = aa.data(j + i*ncols)
+ while (k < nrows) {
+ out.data(k+i*nrows) += data(k+j*nrows)*dval
+ k += 1
+ }
+ j += 1
+ }
+ i += 1
+ }
+ } else {
+ if (nrows == 1) {
+ dgemv(ORDER.ColMajor, TRANSPOSE.Trans, aa.nrows, aa.ncols, 1.0, aa.data, aa.nrows, data, 1, 0, out.data, 1)
+ } else if (aa.ncols == 1) {
+ dgemv(ORDER.ColMajor, TRANSPOSE.NoTrans, nrows, ncols, 1.0, data, nrows, aa.data, 1, 0, out.data, 1)
+ } else {
+ dgemm(ORDER.ColMajor, TRANSPOSE.NoTrans, TRANSPOSE.NoTrans,
+ nrows, aa.ncols, ncols, 1.0, data, nrows, aa.data, aa.nrows, 0, out.data, nrows)
+ }
+ }
+ out
+ } else if (ncols == 1 && nrows == 1) {
+ val out = DMat.newOrCheckDMat(aa.nrows, aa.ncols, outmat)
+ Mat.nflops += aa.length
+ var i = 0
+ val dvar = data(0)
+ while (i < aa.length) {
+ out.data(i) = dvar * aa.data(i)
+ i += 1
+ }
+ out
+ } else if (aa.ncols == 1 && aa.nrows == 1) {
+ val out = DMat.newOrCheckDMat(nrows, ncols, outmat)
+ Mat.nflops += length
+ var i = 0
+ val dvar = aa.data(0)
+ while (i < length) {
+ out.data(i) = dvar * data(i)
+ i += 1
+ }
+ out
+ } else throw new RuntimeException("dimensions mismatch")
+ }
+
+ def fSMult(ss:SDMat, outmat:Mat):DMat = {
+ if (ncols != ss.nrows) {
+ throw new RuntimeException("dimensions mismatch")
+ } else {
+ val out = DMat.newOrCheckDMat(nrows, ss.ncols, outmat)
+ Mat.nflops += 2 * nrows.toLong * ss.nnz
+ val ioff = Mat.ioneBased;
+ val nr = ss.nrows
+ val nc = ss.ncols
+ val kk = ncols
+ var jc0:Array[Int] = null
+ var ir0:Array[Int] = null
+ if (ioff == 0) {
+ jc0 = SparseMat.incInds(ss.jc)
+ ir0 = SparseMat.incInds(ss.ir)
+ } else {
+ jc0 = ss.jc
+ ir0 = ss.ir
+ }
+ if (nrows == 1 && !Mat.noMKL) {
+ dcscmv("T", nr, nc, 1.0, "GLNF", ss.data, ir0, jc0, data, 0.0, out.data)
+ out
+ } else {
+ out.clear
+ if (nrows < 20 || Mat.noMKL) {
+ var i = 0
+ while (i < ss.ncols) {
+ var j = ss.jc(i) - ioff
+ while (j < ss.jc(i+1)-ioff) {
+ val dval = ss.data(j)
+ val ival = ss.ir(j) - ioff
+ var k = 0
+ while (k < nrows) {
+ out.data(k+i*nrows) += data(k+ival*nrows)*dval
+ k += 1
+ }
+ j += 1
+ }
+ i += 1
+ }
+ } else {
+ dmcscm(nrows, ss.ncols, data, nrows, ss.data, ss.ir, ss.jc, out.data, nrows)
+ // dcsrmm("N", ss.ncols, nrows, ncols, 1.0, "GLNF", ss.data, ss.ir, ss.jc, data, ncols, 0, out.data, out.ncols)
+ }
+ }
+ out
+ }
+ }
+
+ def multT(a:SDMat, outmat:Mat):DMat = {
+ import edu.berkeley.bid.CBLAS._
+ if (ncols == a.nrows) {
+ val out = DMat.newOrCheckDMat(nrows, a.ncols, outmat)
+ if (outmat.asInstanceOf[AnyRef] != null) out.clear
+ dmcsrm(nrows, a.ncols, data, nrows, a.data, a.ir, a.jc, out.data, nrows)
+ Mat.nflops += 2L * a.nnz * nrows
+ out
+ } else {
+ throw new RuntimeException("xT dimensions mismatch")
+ }
+ }
+
+ /*
+ * Very slow, row-and-column multiply
+ */
+ def sDMult(a:Mat):DMat =
+ a match {
+ case aa:DMat => {
+ if (ncols == a.nrows) {
+ val out = DMat(nrows, a.ncols)
+ var i = 0
+ while (i < a.ncols) {
+ var j = 0
+ while (j < nrows) {
+ var k = 0
+ var sum = 0.0
+ while (k < ncols) {
+ sum += data(j+k*nrows) * aa.data(k+i*a.nrows)
+ k += 1
+ }
+ out.data(j + i*out.nrows) = sum
+ j += 1
+ }
+ i += 1
+ }
+ out
+ } else throw new RuntimeException("dimensions mismatch")
+ }
+ case _ => throw new RuntimeException("argument must be dense")
+ }
+
+ /*
+ * Weka multiply
+ */
+
+ def wDMult(a:Mat, omat:Mat):DMat =
+ a match {
+ case aa:DMat => {
+ if (ncols == a.nrows) {
+ val out = DMat.newOrCheckDMat(nrows, a.ncols, omat)
+ val tmp = new Array[Double](ncols)
+ var i = 0
+ while (i < nrows) {
+ var j = 0
+ while (j < ncols) {
+ tmp(j) = data(i+j*nrows)
+ j += 1
+ }
+ j = 0
+ while (j < a.ncols) {
+ var k = 0
+ var sum = 0.0
+ while (k < ncols) {
+ sum += tmp(k) * aa.data(k+i*a.nrows)
+ k += 1
+ }
+ out.data(j + i*out.nrows) = sum
+ j += 1
+ }
+ i += 1
+ }
+ out
+ } else throw new RuntimeException("dimensions mismatch")
+ }
+ case _ => throw new RuntimeException("argument must be dense")
+ }
+
+ def dot(a:DMat):Double = super.dot(a)
+
+ override def dot(a:Mat):Double = super.dot(a.asInstanceOf[DMat])
+
+ def solvel(a0:Mat):DMat =
+ a0 match {
+ case a:DMat => {
+ Mat.nflops += 2L*a.nrows*a.nrows*a.nrows/3 + 2L*nrows*a.nrows*a.nrows
+ if (a.nrows != a.ncols || ncols != a.nrows) {
+ throw new RuntimeException("solve needs a square matrix")
+ } else {
+ val out = DMat(nrows, ncols)
+ val tmp = new Array[Double](ncols*ncols)
+ System.arraycopy(a.data, 0, tmp, 0, a.length)
+ System.arraycopy(data, 0, out.data, 0, length)
+ val ipiv = new Array[Int](ncols)
+ dgetrf(ORDER.RowMajor, ncols, ncols, tmp, ncols, ipiv)
+ dgetrs(ORDER.RowMajor, "N", ncols, nrows, tmp, ncols, ipiv, out.data, nrows)
+ out
+ }
+ }
+ case _ => throw new RuntimeException("unsupported arg to / "+a0)
+ }
+
+ def solver(a0:Mat):DMat =
+ a0 match {
+ case a:DMat => {
+ Mat.nflops += 2L*nrows*nrows*nrows/3 + 2L*nrows*nrows*a.ncols
+ if (nrows != ncols || ncols != a.nrows) {
+ throw new RuntimeException("solve needs a square matrix")
+ } else {
+ val out = DMat(a.nrows, a.ncols)
+ val tmp = new Array[Double](ncols*ncols)
+ System.arraycopy(data, 0, tmp, 0, length)
+ System.arraycopy(a.data, 0, out.data, 0, a.length)
+ val ipiv = new Array[Int](ncols)
+ dgetrf(ORDER.ColMajor, ncols, ncols, tmp, ncols, ipiv)
+ dgetrs(ORDER.ColMajor, "N", ncols, a.ncols, tmp, nrows, ipiv, out.data, nrows)
+ out
+ }
+ }
+ case _ => throw new RuntimeException("unsupported arg to / "+a0)
+ }
+
+ override def clear = {
+ Arrays.fill(this.data,0,length,0)
+ this
+ }
+
+ override def recycle(nr:Int, nc:Int, nnz:Int):DMat = {
+ if (nrows == nr && nc == ncols) {
+ this
+ } else if (data.size >= nr*nc) {
+ new DMat(nr, nc, data)
+ } else {
+ DMat(nr, nc)
+ }
+ }
+ /*
+ * Routines to operate on two DMats. These are the compute routines.
+ */
+ def * (b : DMat) = fDMult(b, null)
+ def * (b : SDMat) = fSMult(b, null)
+ def xT (b : SDMat) = multT(b, null)
+ def / (b : DMat) = solvel(b)
+ def \\ (b : DMat) = solver(b)
+ def ^ (b : DMat) = ddMatOp(b, (x:Double, y:Double) => math.pow(x,y), null)
+
+ def + (b : DMat) = ddMatOpv(b, DMat.vecAdd _, null)
+ def - (b : DMat) = ddMatOpv(b, DMat.vecSub _, null)
+ def *@ (b : DMat) = ddMatOpv(b, DMat.vecMul _, null)
+ def /@ (b : DMat) = ddMatOpv(b, DMat.dVecDiv _, null)
+
+ def > (b : DMat) = ddMatOp(b, (x:Double, y:Double) => if (x > y) 1.0 else 0.0, null)
+ def < (b : DMat) = ddMatOp(b, (x:Double, y:Double) => if (x < y) 1.0 else 0.0, null)
+ def == (b : DMat) = ddMatOp(b, (x:Double, y:Double) => if (x == y) 1.0 else 0.0, null)
+ def === (b : DMat) = ddMatOp(b, (x:Double, y:Double) => if (x == y) 1.0 else 0.0, null)
+ def >= (b : DMat) = ddMatOp(b, (x:Double, y:Double) => if (x >= y) 1.0 else 0.0, null)
+ def <= (b : DMat) = ddMatOp(b, (x:Double, y:Double) => if (x <= y) 1.0 else 0.0, null)
+ def != (b : DMat) = ddMatOp(b, (x:Double, y:Double) => if (x != y) 1.0 else 0.0, null)
+
+ override def * (b : Double) = fDMult(DMat.elem(b), null)
+ override def + (b : Double) = ddMatOpScalarv(b, DMat.vecAdd _, null)
+ override def - (b : Double) = ddMatOpScalarv(b, DMat.vecSub _, null)
+ override def *@ (b : Double) = ddMatOpScalarv(b, DMat.vecMul _, null)
+ override def /@ (b : Double) = ddMatOpScalarv(b, DMat.dVecDiv _, null)
+ override def ^ (b : Double) = ddMatOpScalar(b, (x:Double, y:Double) => math.pow(x,y), null)
+
+ override def > (b : Double) = ddMatOpScalar(b, (x:Double, y:Double) => if (x > y) 1.0 else 0.0, null)
+ override def < (b : Double) = ddMatOpScalar(b, (x:Double, y:Double) => if (x < y) 1.0 else 0.0, null)
+ override def == (b : Double) = ddMatOpScalar(b, (x:Double, y:Double) => if (x == y) 1.0 else 0.0, null)
+ override def >= (b : Double) = ddMatOpScalar(b, (x:Double, y:Double) => if (x >= y) 1.0 else 0.0, null)
+ override def <= (b : Double) = ddMatOpScalar(b, (x:Double, y:Double) => if (x <= y) 1.0 else 0.0, null)
+ override def != (b : Double) = ddMatOpScalar(b, (x:Double, y:Double) => if (x != y) 1.0 else 0.0, null)
+
+ override def * (b : Float) = fDMult(DMat.elem(b), null)
+ override def + (b : Float) = ddMatOpScalarv(b, DMat.vecAdd _, null)
+ override def - (b : Float) = ddMatOpScalarv(b, DMat.vecSub _, null)
+ override def *@ (b : Float) = ddMatOpScalarv(b, DMat.vecMul _, null)
+ override def /@ (b : Float) = ddMatOpScalarv(b, DMat.dVecDiv _, null)
+ override def ^ (b : Float) = ddMatOpScalar(b, (x:Double, y:Double) => math.pow(x,y), null)
+
+ override def > (b : Float) = ddMatOpScalar(b, (x:Double, y:Double) => if (x > y) 1.0 else 0.0, null)
+ override def < (b : Float) = ddMatOpScalar(b, (x:Double, y:Double) => if (x < y) 1.0 else 0.0, null)
+ override def == (b : Float) = ddMatOpScalar(b, (x:Double, y:Double) => if (x == y) 1.0 else 0.0, null)
+ override def >= (b : Float) = ddMatOpScalar(b, (x:Double, y:Double) => if (x >= y) 1.0 else 0.0, null)
+ override def <= (b : Float) = ddMatOpScalar(b, (x:Double, y:Double) => if (x <= y) 1.0 else 0.0, null)
+ override def != (b : Float) = ddMatOpScalar(b, (x:Double, y:Double) => if (x != y) 1.0 else 0.0, null)
+
+ def \ (b: DMat) = DMat(ghorzcat(b))
+ def \ (b:Double) = DMat(ghorzcat(DMat.elem(b)))
+
+ def on (b: DMat) = DMat(gvertcat(b))
+ def on (b: Double) = vertcat(DMat.elem(b))
+
+ def ~ (b : DMat):DPair = new DPair(this, b)
+ def ~ (b : SDMat):SDPair = new SDPair(this, b)
+
+ override def ~ (b: Mat):Pair = b match {
+ case db:DMat => new DPair(this, db)
+ case sb:SDMat => new SDPair(this, sb)
+ case _ => throw new RuntimeException("wrong types for operator ~ ")
+ }
+ /*
+ * Specialize to IMats to help the type system.
+ */
+ def + (b : IMat):DMat = this + DMat(b)
+ def - (b : IMat):DMat = this - DMat(b)
+ def * (b : IMat):DMat = this * DMat(b)
+ def / (b : IMat):DMat = this / DMat(b)
+ def \\ (b : IMat):DMat = this \\ DMat(b)
+ def *@ (b : IMat):DMat = this *@ DMat(b)
+ def /@ (b : IMat):DMat = this /@ DMat(b)
+ def \ (b : IMat):DMat = this \ DMat(b)
+ def on (b : IMat):DMat = this on DMat(b)
+
+ def > (b : IMat):DMat = this > DMat(b)
+ def < (b : IMat):DMat = this < DMat(b)
+ def >= (b : IMat):DMat = this >= DMat(b)
+ def <= (b : IMat):DMat = this <= DMat(b)
+ def == (b : IMat):DMat = this == DMat(b)
+ def === (b : IMat):DMat = this === DMat(b)
+ def != (b : IMat):DMat = this != DMat(b)
+
+ /*
+ * Specialize to FMats to help the type system.
+ */
+ def + (b : FMat):DMat = this + DMat(b)
+ def - (b : FMat):DMat = this - DMat(b)
+ def * (b : FMat):DMat = this * DMat(b)
+ def / (b : FMat):DMat = this / DMat(b)
+ def \\ (b : FMat):DMat = this \\ DMat(b)
+ def *@ (b : FMat):DMat = this *@ DMat(b)
+ def /@ (b : FMat):DMat = this /@ DMat(b)
+ def \ (b : FMat):DMat = this \ DMat(b)
+ def on (b : FMat):DMat = this on DMat(b)
+
+ def > (b : FMat):DMat = this > DMat(b)
+ def < (b : FMat):DMat = this < DMat(b)
+ def >= (b : FMat):DMat = this >= DMat(b)
+ def <= (b : FMat):DMat = this <= DMat(b)
+ def == (b : FMat):DMat = this == DMat(b)
+ def === (b : FMat):DMat = this === DMat(b)
+ def != (b : FMat):DMat = this != DMat(b)
+
+ /*
+ * Specialize to CMats to help the type system.
+ */
+ def + (b : CMat):CMat = CMat(this) + b
+ def - (b : CMat):CMat = CMat(this) - b
+ def * (b : CMat):CMat = CMat(this) * b
+ def / (b : CMat):CMat = CMat(this) / b
+ def \\ (b : CMat):CMat = CMat(this) \\ b
+ def *@ (b : CMat):CMat = CMat(this) *@ b
+ def /@ (b : CMat):CMat = CMat(this) /@ b
+ def \ (b : CMat):CMat = CMat(this) \ b
+ def on (b : CMat):CMat = CMat(this) on b
+
+ /*
+ * Operators whose second arg is generic.
+ */
+ import Operator._
+ override def + (b : Mat):Mat = applyMat(this, b, null, Mop_Plus)
+ override def - (b : Mat):Mat = applyMat(this, b, null, Mop_Minus)
+ override def * (b : Mat):Mat = applyMat(this, b, null, Mop_Times)
+ override def / (b : Mat):Mat = applyMat(this, b, null, Mop_Div)
+ override def \\ (b : Mat):Mat = applyMat(this, b, null, Mop_RSolve)
+ override def *@ (b : Mat):Mat = applyMat(this, b, null, Mop_ETimes)
+ override def /@ (b : Mat):Mat = applyMat(this, b, null, Mop_EDiv)
+ override def \ (b : Mat):Mat = applyMat(this, b, null, Mop_HCat)
+ override def on (b : Mat):Mat = applyMat(this, b, null, Mop_VCat)
+
+ override def > (b : Mat):Mat = applyMat(this, b, null, Mop_GT)
+ override def < (b : Mat):Mat = applyMat(this, b, null, Mop_LT)
+ override def >= (b : Mat):Mat = applyMat(this, b, null, Mop_GE)
+ override def <= (b : Mat):Mat = applyMat(this, b, null, Mop_LE)
+ override def == (b : Mat):Mat = applyMat(this, b, null, Mop_EQ)
+ override def === (b : Mat):Mat = applyMat(this, b, null, Mop_EQ)
+ override def != (b : Mat):Mat = applyMat(this, b, null, Mop_NE)
+
+}
+
+class DPair (val omat:Mat, val mat:DMat) extends Pair{
+ override def t:DMat = if (Mat.noMKL) {
+ DMat(mat.gt(omat))
+ } else {
+ val out = DMat.newOrCheckDMat(mat.ncols, mat.nrows, omat)
+ domatcopy("C", "T", mat.nrows, mat.ncols, 1.0, mat.data, mat.nrows, out.data, mat.ncols)
+ out
+ }
+
+ def * (b : DMat) = mat.fDMult(b, omat)
+ def * (b : SDMat) = mat.fSMult(b, omat)
+ def xT (b : SDMat) = mat.multT(b, omat)
+ def + (b : DMat) = mat.ddMatOpv(b, DMat.vecAdd _, omat)
+ def - (b : DMat) = mat.ddMatOpv(b, DMat.vecSub _, omat)
+ def *@ (b : DMat) = mat.ddMatOpv(b, DMat.vecMul _, omat)
+ def /@ (b : DMat) = mat.ddMatOpv(b, DMat.dVecDiv _, omat)
+ def ^ (b : DMat) = mat.ddMatOp(b, (x:Double, y:Double) => math.pow(x,y), null)
+
+ def > (b : DMat) = mat.ddMatOp(b, (x:Double, y:Double) => if (x > y) 1.0 else 0.0, omat)
+ def < (b : DMat) = mat.ddMatOp(b, (x:Double, y:Double) => if (x < y) 1.0 else 0.0, omat)
+ def == (b : DMat) = mat.ddMatOp(b, (x:Double, y:Double) => if (x == y) 1.0 else 0.0, omat)
+ def === (b : DMat) = mat.ddMatOp(b, (x:Double, y:Double) => if (x == y) 1.0 else 0.0, omat)
+ def >= (b : DMat) = mat.ddMatOp(b, (x:Double, y:Double) => if (x >= y) 1.0 else 0.0, omat)
+ def <= (b : DMat) = mat.ddMatOp(b, (x:Double, y:Double) => if (x <= y) 1.0 else 0.0, omat)
+ def != (b : DMat) = mat.ddMatOp(b, (x:Double, y:Double) => if (x != y) 1.0 else 0.0, omat)
+
+ override def * (b : Double) = mat.fDMult(DMat.elem(b), omat)
+ override def * (b : Float) = mat.fDMult(DMat.elem(b), omat)
+ override def + (b : Double) = mat.ddMatOpScalarv(b, DMat.vecAdd _, omat)
+ override def - (b : Double) = mat.ddMatOpScalarv(b, DMat.vecSub _, omat)
+ override def *@ (b : Double) = mat.ddMatOpScalarv(b, DMat.vecMul _, omat)
+ override def /@ (b : Double) = mat.ddMatOpScalarv(b, DMat.dVecDiv _, omat)
+ override def ^ (b : Double) = mat.ddMatOpScalar(b, (x:Double, y:Double) => math.pow(x,y), omat)
+
+ override def > (b : Double) = mat.ddMatOpScalar(b, (x:Double, y:Double) => if (x > y) 1.0 else 0.0, omat)
+ override def < (b : Double) = mat.ddMatOpScalar(b, (x:Double, y:Double) => if (x < y) 1.0 else 0.0, omat)
+ override def == (b : Double) = mat.ddMatOpScalar(b, (x:Double, y:Double) => if (x == y) 1.0 else 0.0, omat)
+ override def === (b : Double) = mat.ddMatOpScalar(b, (x:Double, y:Double) => if (x == y) 1.0 else 0.0, omat)
+ override def >= (b : Double) = mat.ddMatOpScalar(b, (x:Double, y:Double) => if (x >= y) 1.0 else 0.0, omat)
+ override def <= (b : Double) = mat.ddMatOpScalar(b, (x:Double, y:Double) => if (x <= y) 1.0 else 0.0, omat)
+ override def != (b : Double) = mat.ddMatOpScalar(b, (x:Double, y:Double) => if (x != y) 1.0 else 0.0, omat)
+
+ import Operator._
+ override def + (b : Mat):Mat = applyMat(mat, b, omat, Mop_Plus)
+ override def - (b : Mat):Mat = applyMat(mat, b, omat, Mop_Minus)
+ override def * (b : Mat):Mat = applyMat(mat, b, omat, Mop_Times)
+ override def / (b : Mat):Mat = applyMat(mat, b, omat, Mop_Div)
+ override def \\ (b : Mat):Mat = applyMat(mat, b, omat, Mop_RSolve)
+ override def *@ (b : Mat):Mat = applyMat(mat, b, omat, Mop_ETimes)
+ override def /@ (b : Mat):Mat = applyMat(mat, b, omat, Mop_EDiv)
+ override def \ (b : Mat):Mat = applyMat(mat, b, omat, Mop_HCat)
+ override def on (b : Mat):Mat = applyMat(mat, b, omat, Mop_VCat)
+
+ override def > (b : Mat):Mat = applyMat(mat, b, omat, Mop_GT)
+ override def < (b : Mat):Mat = applyMat(mat, b, omat, Mop_LT)
+ override def >= (b : Mat):Mat = applyMat(mat, b, omat, Mop_GE)
+ override def <= (b : Mat):Mat = applyMat(mat, b, omat, Mop_LE)
+ override def == (b : Mat):Mat = applyMat(mat, b, omat, Mop_EQ)
+ override def === (b : Mat):Mat = applyMat(mat, b, omat, Mop_EQ)
+ override def != (b : Mat):Mat = applyMat(mat, b, omat, Mop_NE)
+}
+
+object DMat {
+
+ def dVecDiv(a:Array[Double], a0:Int, ainc:Int, b:Array[Double], b0:Int, binc:Int, c:Array[Double], c0:Int, cinc:Int, n:Int):Double = {
+ var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n
+ while (ci < cend) {
+ c(ci) = a(ai) / b(bi); ai += ainc; bi += binc; ci += cinc
+ }
+ 0
+ }
+
+ def newOrCheckDMat(nr:Int, nc:Int, omat:Mat):DMat = {
+ if (omat.asInstanceOf[AnyRef] == null || (omat.nrows == 0 && omat.ncols == 0)) {
+ DMat(nr, nc)
+ } else {
+ omat match {
+ case outmat:DMat =>
+ if (outmat.nrows != nr || outmat.ncols != nc) {
+ outmat.recycle(nr, nc, 0)
+ } else {
+ outmat
+ }
+ case _ => throw new RuntimeException("wrong type for out matrix "+omat)
+ }
+ }
+ }
+
+ def apply(nr:Int, nc:Int) = new DMat(nr, nc, new Array[Double](nr*nc))
+
+ def apply(a:DenseMat[Double]):DMat = new DMat(a.nrows, a.ncols, a.data)
+
+ def apply(x:Mat):DMat = {
+ var out:DMat = null
+ x match {
+ case dd:DMat => {out = DMat(x.nrows, x.ncols); System.arraycopy(dd.data, 0, out.data, 0, dd.length)}
+ case ff:FMat => {out = DMat(x.nrows, x.ncols); Mat.copyToDoubleArray(ff.data, 0, out.data, 0, ff.length)}
+ case ii:IMat => {out = DMat(x.nrows, x.ncols); Mat.copyToDoubleArray(ii.data, 0, out.data, 0, ii.length)}
+ case ss:SDMat => out = DMat(ss.full)
+ case _ => throw new RuntimeException("Unsupported source type")
+ }
+ out
+ }
+
+
+ def vecAdd(a:Array[Double], a0:Int, ainc:Int, b:Array[Double], b0:Int, binc:Int, c:Array[Double], c0:Int, cinc:Int, n:Int):Double = {
+ var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n
+ while (ci < cend) {
+ c(ci) = a(ai) + b(bi); ai += ainc; bi += binc; ci += cinc
+ }
+ 0
+ }
+
+ def vecSub(a:Array[Double], a0:Int, ainc:Int, b:Array[Double], b0:Int, binc:Int, c:Array[Double], c0:Int, cinc:Int, n:Int):Double = {
+ var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n
+ while (ci < cend) {
+ c(ci) = a(ai) - b(bi); ai += ainc; bi += binc; ci += cinc
+ }
+ 0
+ }
+
+ def vecMul(a:Array[Double], a0:Int, ainc:Int, b:Array[Double], b0:Int, binc:Int, c:Array[Double], c0:Int, cinc:Int, n:Int):Double = {
+ var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n
+ while (ci < cend) {
+ c(ci) = a(ai) * b(bi); ai += ainc; bi += binc; ci += cinc
+ }
+ 0
+ }
+
+ def vecMax(a:Array[Double], a0:Int, ainc:Int, b:Array[Double], b0:Int, binc:Int, c:Array[Double], c0:Int, cinc:Int, n:Int):Double = {
+ var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n
+ while (ci < cend) {
+ c(ci) = math.max(a(ai), b(bi)); ai += ainc; bi += binc; ci += cinc
+ }
+ 0
+ }
+
+ def vecMin(a:Array[Double], a0:Int, ainc:Int, b:Array[Double], b0:Int, binc:Int, c:Array[Double], c0:Int, cinc:Int, n:Int):Double = {
+ var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n
+ while (ci < cend) {
+ c(ci) = math.min(a(ai), b(bi)); ai += ainc; bi += binc; ci += cinc
+ }
+ 0
+ }
+
+
+ def elem(x:Double) = {
+ val out = DMat(1,1)
+ out.data(0) = x
+ out
+ }
+
+}
+
+
+
+
+
+
diff --git a/src/main/scala/BIDMat/DenseMat.scala b/src/main/scala/BIDMat/DenseMat.scala
new file mode 100755
index 00000000..54f1077a
--- /dev/null
+++ b/src/main/scala/BIDMat/DenseMat.scala
@@ -0,0 +1,1313 @@
+package BIDMat
+import scala.math.Numeric._
+import java.util.Arrays
+import java.util.Comparator
+import scala.actors._
+import scala.actors.Actor._
+
+class DenseMat[@specialized(Double,Float,Int,Byte) T]
+(nr: Int, nc: Int, val data:Array[T])(implicit manifest:ClassManifest[T]) extends Mat(nr, nc) {
+
+ def this(nr:Int, nc:Int)(implicit manifest:ClassManifest[T]) = this(nr, nc, new Array[T](nr*nc))
+
+ /*
+ * Return the (0,0) value as a scalar
+ */
+ def v:T =
+ if (nrows > 1 || ncols > 1) {
+ throw new RuntimeException("Matrix should be 1x1 to extract value")
+ } else {
+ data(0)
+ }
+
+ override def mytype = "DenseMat"
+ /*
+ * Test if this matrix is a row or column vector
+ */
+ def isvector(): Boolean = {
+ if (nrows == 1 || ncols == 1) {
+ true
+ } else {
+ false
+ }
+ }
+ /*
+ * Bounds-checked matrix access, 0- or 1-based
+ */
+ def apply(r0:Int, c0:Int):T = {
+ val off = Mat.oneBased
+ val r = r0 - off
+ val c = c0 - off
+ if (r < 0 || r >= nrows || c < 0 || c >= ncols) {
+ throw new IndexOutOfBoundsException("("+(r+off)+","+(c+off)+") vs ("+nrows+","+ncols+")");
+ } else {
+ data(r+c*nrows)
+ }
+ }
+ /*
+ * Bounds-checked linear access, 0- or 1-based
+ */
+ def apply(i0:Int):T = {
+ val off = Mat.oneBased
+ val i = i0 - off
+ if (i < 0 || i >= length) {
+ throw new IndexOutOfBoundsException(""+(i+off)+" >= ("+length+")");
+ } else {
+ data(i)
+ }
+ }
+ /*
+ * Unchecked 0-based matrix access
+ */
+ def get_(r:Int, c:Int):T = {
+ data(r+c*nrows)
+ }
+
+ /*
+ * Update a matrix value, m(r,c) = v, 0- or 1-based
+ */
+ def update(r0:Int, c0:Int, v:T):T = {
+ val off = Mat.oneBased
+ val r = r0 - off
+ val c = c0 - off
+ if (r < 0 || r >= nrows || c < 0 || c >= ncols) {
+ throw new IndexOutOfBoundsException("("+(r+off)+","+(c+off)+") vs ("+nrows+","+ncols+")");
+ } else {
+ data(r+c*nrows) = v
+ }
+ v
+ }
+ /*
+ * Update a matrix value with linear access, m(i) = v
+ */
+ def update(i0:Int, v:T):T = {
+ val off = Mat.oneBased
+ val i = i0 - off
+ if (i < 0 || i >= length) {
+ throw new IndexOutOfBoundsException(""+(i+off)+" vs ("+length+")");
+ } else {
+ data(i) = v
+ }
+ v
+ }
+ /*
+ * Unchecked 0-based set
+ */
+ def set_(r:Int, c:Int, v:T):T = {
+ data(r+c*nrows) = v
+ v
+ }
+ /*
+ * Transpose
+ */
+ def gt(oldmat:Mat):DenseMat[T] = {
+ var out:DenseMat[T] = DenseMat.newOrCheck(ncols, nrows, oldmat)
+ var i = 0
+ while (i < nrows) {
+ var j = 0
+ while (j < ncols) {
+ out.data(j+i*ncols) = data(i+j*nrows)
+ j += 1
+ }
+ i += 1
+ }
+ out
+ }
+ /*
+ * Stack matrices vertically
+ */
+ def gvertcat(a:DenseMat[T]):DenseMat[T] =
+ if (ncols != a.ncols) {
+ throw new RuntimeException("ncols must match")
+ } else {
+ var out = new DenseMat[T](nrows+a.nrows, ncols)
+ var i = 0
+ while (i < ncols) {
+ System.arraycopy(data, i*nrows, out.data, i*(nrows+a.nrows), nrows)
+ System.arraycopy(a.data, i*a.nrows, out.data, nrows+i*(nrows+a.nrows), a.nrows)
+ i += 1
+ }
+ out
+ }
+ /*
+ * Stack matrices horizontally
+ */
+ def ghorzcat(a:DenseMat[T]):DenseMat[T]=
+ if (nrows != a.nrows) {
+ throw new RuntimeException("nrows must match")
+ } else {
+ var out = new DenseMat[T](nrows, ncols+a.ncols)
+ System.arraycopy(data, 0, out.data, 0, nrows*ncols)
+ System.arraycopy(a.data, 0, out.data, nrows*ncols, nrows*a.ncols)
+ out
+ }
+ /*
+ * Count number of non-zero entries
+ */
+ override def nnz:Int = {
+ var count:Int = 0
+ var i = 0
+ while (i < length) {
+ if (data(i) != 0) {
+ count += 1
+ }
+ i += 1
+ }
+ count
+ }
+ /*
+ * Helper function for find functions
+ */
+ def findInds(out:IMat, off:Int):IMat = {
+ var count = 0
+ var i = off
+ while (i < length+off) {
+ if (data(i) != 0) {
+ out.data(count) = i
+ count += 1
+ }
+ i += 1
+ }
+ out
+ }
+ /*
+ * Find indices (linear) for all non-zeros elements
+ */
+ def find:IMat = {
+ var out = IMat(nnz, 1)
+ findInds(out, Mat.oneBased)
+ }
+ /*
+ * Find indices (i,j) for non-zero elements
+ */
+ def find2:(IMat, IMat) = {
+ val iout = IMat(nnz, 1)
+ val jout = IMat(nnz, 1)
+ findInds(iout, 0)
+ val off = Mat.oneBased
+ var i = 0
+ while (i < iout.length) {
+ val ival:Int = iout.data(i)
+ jout.data(i) = (ival / nrows) + off
+ iout.data(i) = (ival % nrows) + off
+ i += 1
+ }
+ (iout, jout)
+ }
+ /*
+ * Find tuples (i,j,v) for non-zero elements
+ */
+ def gfind3:(IMat, IMat, DenseMat[T]) = {
+ val iout = IMat(nnz, 1)
+ val jout = IMat(nnz, 1)
+ val vout = new DenseMat[T](nnz, 1)
+ findInds(iout, 0)
+ val off = Mat.oneBased
+ var i = 0
+ while (i < iout.length) {
+ val ival:Int = iout.data(i)
+ vout.data(i) = data(ival)
+ jout.data(i) = (ival / nrows) + off
+ iout.data(i) = (ival % nrows) + off
+ i += 1
+ }
+ (iout, jout, vout)
+ }
+ /*
+ * Return a(im) where im is a matrix of indices
+ */
+ def gapply(im:IMat):DenseMat[T] =
+ im match {
+ case aa:MatrixWildcard => {
+ val out = new DenseMat[T](length, 1)
+ System.arraycopy(data, 0, out.data, 0, out.length)
+ out
+ }
+ case _ => {
+ val out = new DenseMat[T](im.nrows, im.ncols)
+ var i = 0
+ val off = Mat.oneBased
+ while (i < out.length) {
+ val ind = im.data(i) - off
+ if (ind < 0 || ind >= length) {
+ throw new RuntimeException("bad linear index "+(ind+off)+" vs "+length)
+ } else {
+ out.data(i) = data(ind)
+ }
+ i += 1
+ }
+ out
+ }
+ }
+
+ /*
+ * Implement a(im) = b where im is a matrix of indices to a and im and b are same-sized
+ */
+ def update(im:IMat, b:DenseMat[T]):DenseMat[T] =
+ im match {
+ case aaa:MatrixWildcard => {
+ if (length != b.length || b.ncols != 1) {
+ throw new RuntimeException("dims mismatch")
+ } else {
+ System.arraycopy(b.data, 0, data, 0, length)
+ }
+ b
+ }
+ case _ => {
+ if (im.nrows != b.nrows || im.ncols != b.ncols) {
+ throw new RuntimeException("dims mismatch")
+ } else {
+ val off = Mat.oneBased
+ var i = 0
+ while (i < im.length) {
+ val ind = im.data(i) - off
+ if (ind < 0 || ind >= length) {
+ throw new RuntimeException("bad linear index "+(ind+off)+" vs "+length)
+ } else {
+ data(ind) = b.data(i)
+ }
+ i += 1
+ }
+ }
+ b
+ }
+ }
+
+ /*
+ * Implement a(im) = b where im is a matrix of indices to a, and b is a constant
+ */
+ def update(a:IMat, b:T):T = {
+ a match {
+ case aaa:MatrixWildcard => {
+ var i = 0
+ while (i < length) {
+ data(i) = b
+ i += 1
+ }
+ }
+ case _ => {
+ var i = 0
+ val off = Mat.oneBased
+ while (i < a.length) {
+ val ind = a.data(i) - off
+ if (ind < 0 || ind >= length) {
+ throw new RuntimeException("bad linear index "+(ind+off)+" vs "+length)
+ } else {
+ data(ind) = b
+ }
+ i += 1
+ }
+ }
+ }
+ b
+ }
+ /*
+ * Implement slicing, a(iv,jv) where iv and jv are vectors, using ? as wildcard
+ */
+ def gapply(iv:IMat, jv:IMat):DenseMat[T] = {
+ val rowinds = DenseMat.getInds(iv, nrows)
+ val colinds = DenseMat.getInds(jv, ncols)
+ val out = new DenseMat[T](rowinds.length, colinds.length)
+ val off = Mat.oneBased
+ var i = 0
+ while (i < out.ncols) {
+ var j = 0
+ val c = colinds(i) - off
+ while (j < out.nrows) {
+ out.data(j+i*out.nrows) = data(rowinds(j)-off+nrows*c)
+ j += 1
+ }
+ i += 1
+ }
+ out
+ }
+ /*
+ * Implement slicing, a(iv,j) where iv a vector, j an integer, using ? as wildcard
+ */
+ def gapply(iv:IMat, jv:Int):DenseMat[T] = {
+ gapply(iv, IMat.ielem(jv))
+ }
+ /*
+ * Implement slicing, a(i,jv) where i integer, jv a vector, using ? as wildcard
+ */
+ def gapply(i:Int, jv:IMat):DenseMat[T] = {
+ gapply(IMat.ielem(i), jv)
+ }
+
+ /*
+ * Implement sliced assignment, a(iv,jv) = b where iv and jv are vectors, using ? as wildcard
+ */
+ def _update(iv:IMat, jv:IMat, b:DenseMat[T]):DenseMat[T] = {
+ val rowinds = DenseMat.getInds(iv, nrows)
+ val colinds = DenseMat.getInds(jv, ncols)
+ if (rowinds.length != b.nrows || colinds.length != b.ncols) {
+ throw new RuntimeException("dims mismatch in assignment")
+ } else {
+ val off = Mat.oneBased
+ var i = 0
+ while (i < b.ncols) {
+ val c = colinds(i) - off
+ var j = 0
+ while (j < b.nrows) {
+ data(rowinds(j)-off+nrows*c) = b.data(j+i*b.nrows)
+ j += 1
+ }
+ i += 1
+ }
+ }
+ b
+ }
+
+ override def update(iv:IMat, jv:IMat, b:Mat):Mat = {
+ (this, b) match {
+ case (me:FMat, bb:FMat) => me.update(iv, jv, bb):FMat
+ case (me:DMat, bb:DMat) => me.update(iv, jv, bb):DMat
+ case (me:IMat, bb:IMat) => me.update(iv, jv, bb):IMat
+ case (me:CMat, bb:CMat) => me.update(iv, jv, bb):CMat
+ }
+ }
+
+ /*
+ * Implement sliced assignment, a(iv,jv) = b:T where iv and jv are vectors, using ? as wildcard
+ */
+ def update(iv:IMat, jv:IMat, b:T):T = {
+ val rowinds = DenseMat.getInds(iv, nrows)
+ val colinds = DenseMat.getInds(jv, ncols)
+ val off = Mat.oneBased
+ var i = 0
+ while (i < colinds.length) {
+ val c = colinds(i) - off
+ var j = 0
+ while (j < rowinds.length) {
+ val r = rowinds(j) - off
+ data(r+nrows*c) = b
+ j += 1
+ }
+ i += 1
+ }
+ b
+ }
+ /*
+ * Implement sliced assignment, a(iv,j) = b where iv a vectors, j integer, using ? as wildcard
+ */
+ def update(iv:IMat, j:Int, b:T):T = {
+ update(iv, IMat.ielem(j), b)
+ }
+ /*
+ * Implement sliced assignment, a(i,jv) = b where jv a vector, using ? as wildcard
+ */
+ def update(i:Int, jv:IMat, b:T):T = {
+ update(IMat.ielem(i), jv, b)
+ }
+
+ def printOne(i:Int):String = " "
+
+ override def toString:String = {
+ val nChars = Mat.terminalWidth-4
+ val maxRows = 640/nChars
+ var maxCols = nChars
+ var fieldWidth = 4
+ var icols = 0
+ while (icols < math.min(ncols, maxCols)) {
+ var newWidth = fieldWidth
+ for (j <- 0 until math.min(nrows,maxRows)) newWidth = math.max(newWidth, 2+(printOne(j+nrows*icols).length))
+ if ((icols+1)*newWidth < nChars) {
+ fieldWidth = newWidth
+ icols += 1
+ } else {
+ maxCols = icols
+ }
+ }
+ val sb:StringBuilder = new StringBuilder
+ val somespaces = " "
+ for (i <- 0 until math.min(nrows, maxRows)) {
+ for (j <- 0 until math.min(ncols, icols)) {
+ val str = printOne(i+j*nrows)
+ sb.append(somespaces.substring(0,fieldWidth-str.length)+str)
+ }
+ if (ncols > icols) {
+ sb.append("...")
+ }
+ sb.append("\n")
+ }
+ if (nrows > maxRows) {
+ for (j <- 0 until math.min(ncols, maxCols)) {
+ sb.append(somespaces.substring(0, fieldWidth-2)+"..")
+ }
+ sb.append("\n")
+ }
+ sb.toString()
+ }
+
+ override def clear:DenseMat[T] ={
+ if (length == 0) {
+ this
+ } else {
+ val v = data(0)
+ v match {
+ case a:Float => Arrays.fill(data.asInstanceOf[Array[Float]], 0, length, 0)
+ case a:Double => Arrays.fill(data.asInstanceOf[Array[Double]], 0, length, 0)
+ case a:Int => Arrays.fill(data.asInstanceOf[Array[Int]], 0, length, 0)
+ case a:AnyRef => Arrays.fill(data.asInstanceOf[Array[AnyRef]], 0, length, null)
+ }
+ }
+ this
+ }
+
+ def setUpper(v:T, off:Int) = {
+ var i = 0
+ while (i < ncols) {
+ var j = 0
+ while (j < i+off) {
+ data(j + i*nrows) = v
+ j += 1
+ }
+ i += 1
+ }
+ this
+ }
+
+ def setLower(v:T, off:Int) = {
+ var i = 0
+ while (i < ncols) {
+ var j = math.max(0,i+1+off)
+ while (j < nrows) {
+ data(j + i*nrows) = v
+ j += 1
+ }
+ i += 1
+ }
+ this
+ }
+
+ /*
+ * General operation between two matrices. Apply op2 to corresponding elements from the input matrices.
+ */
+ def ggMatOp(aa:DenseMat[T], op2:(T,T) => T, oldmat:Mat):DenseMat[T] = {
+ if (nrows==aa.nrows && ncols==1) {
+ val out = DenseMat.newOrCheck(nrows, aa.ncols, oldmat)
+ Mat.nflops += aa.length
+ var i = 0
+ while (i < aa.ncols) {
+ var j = 0
+ while (j < nrows) {
+ out.data(j+i*nrows) = op2(data(j), aa.data(j+i*aa.nrows))
+ j += 1
+ }
+ i += 1
+ }
+ out
+ } else if (ncols==aa.ncols && nrows==1) {
+ val out = DenseMat.newOrCheck[T](aa.nrows, ncols, oldmat)
+ Mat.nflops += aa.length
+ var i = 0
+ while (i < ncols) {
+ var j = 0
+ while (j < aa.nrows) {
+ out.data(j+i*aa.nrows) = op2(data(i), aa.data(j+i*aa.nrows))
+ j += 1
+ }
+ i += 1
+ }
+ out
+ } else if (nrows==aa.nrows && aa.ncols==1) {
+ val out = DenseMat.newOrCheck[T](nrows, ncols, oldmat)
+ Mat.nflops += length
+ var i = 0
+ while (i < ncols) {
+ var j = 0
+ while (j < nrows) {
+ out.data(j+i*nrows) = op2(data(j+i*nrows), aa.data(j))
+ j += 1
+ }
+ i += 1
+ }
+ out
+ } else if (ncols==aa.ncols && aa.nrows==1) {
+ val out = DenseMat.newOrCheck[T](nrows, ncols, oldmat)
+ Mat.nflops += length
+ var i = 0
+ while (i < ncols) {
+ var j = 0
+ while (j < nrows) {
+ out.data(j+i*nrows) = op2(data(j+i*nrows), aa.data(i))
+ j += 1
+ }
+ i += 1
+ }
+ out
+ } else ggMatOpStrict(aa, op2, oldmat)
+ }
+
+ /*
+ * This version applies the operator op2 with stricter dimension checking,
+ * either dims must match or one arg must be scalar
+ */
+ def ggMatOpStrict(aa:DenseMat[T], op2:(T,T) => T, oldmat:Mat):DenseMat[T] =
+ if (nrows==aa.nrows && ncols==aa.ncols) {
+ val out = DenseMat.newOrCheck[T](nrows, ncols, oldmat)
+ Mat.nflops += length
+ var i = 0
+ while (i < aa.length) {
+ out.data(i) = op2(data(i), aa.data(i))
+ i += 1
+ }
+ out
+ } else if (aa.nrows == 1 && aa.ncols == 1) {
+ val out = DenseMat.newOrCheck[T](nrows, ncols, oldmat)
+ Mat.nflops += length
+ val aval = aa.data(0)
+ var i = 0
+ while (i < length) {
+ out.data(i) = op2(data(i), aval)
+ i += 1
+ }
+ out
+ } else if (nrows == 1 && ncols == 1) {
+ val out = DenseMat.newOrCheck[T](aa.nrows, aa.ncols, oldmat)
+ Mat.nflops += aa.length
+ val aval = data(0)
+ var i = 0
+ while (i < aa.length) {
+ out.data(i) = op2(aval, aa.data(i))
+ i += 1
+ }
+ out
+ } else throw new RuntimeException("dims incompatible");
+
+ /*
+ * Apply the binary operation op2 to the matrix and a scalar argument
+ */
+ def ggMatOpScalar(a:T, op2:(T,T) => T, oldmat:Mat):DenseMat[T] = {
+ val out = DenseMat.newOrCheck[T](nrows, ncols, oldmat)
+ Mat.nflops += length
+ var i = 0
+ while (i < length) {
+ out.data(i) = op2(data(i), a)
+ i += 1
+ }
+ out
+ }
+ /*
+ * General operation between two matrices. Apply op2 to corresponding elements from the input matrices.
+ * Implemented with vector operation primitives.
+ */
+ def ggMatOpv(aa:DenseMat[T], opv:(Array[T],Int,Int,Array[T],Int,Int,Array[T],Int,Int,Int) => T, oldmat:Mat):DenseMat[T] =
+ if (nrows==aa.nrows && ncols==1) {
+ val out = DenseMat.newOrCheck[T](nrows, aa.ncols, oldmat)
+ Mat.nflops += aa.length
+ var i = 0
+ while (i < aa.ncols) {
+ opv(data, 0, 1, aa.data, i*aa.nrows, 1, out.data, i*nrows, 1, nrows)
+ i += 1
+ }
+ out
+ } else if (ncols==aa.ncols && nrows==1) {
+ val out = DenseMat.newOrCheck[T](aa.nrows, ncols, oldmat)
+ Mat.nflops += aa.length
+ var i = 0
+ while (i < ncols) {
+ opv(data, i, 0, aa.data, i*aa.nrows, 1, out.data, i*aa.nrows, 1, aa.nrows)
+ i += 1
+ }
+ out
+ } else if (nrows==aa.nrows && aa.ncols==1) {
+ val out = DenseMat.newOrCheck[T](nrows, ncols, oldmat)
+ Mat.nflops += length
+ var i = 0
+ while (i < ncols) {
+ opv(data, i*nrows, 1, aa.data, 0, 1, out.data, i*nrows, 1, nrows)
+ i += 1
+ }
+ out
+ } else if (ncols==aa.ncols && aa.nrows==1) {
+ val out = DenseMat.newOrCheck[T](nrows, ncols, oldmat)
+ Mat.nflops += length
+ var i = 0
+ while (i < ncols) {
+ opv(data, i*nrows, 1, aa.data, i, 0, out.data, i*nrows, 1, nrows)
+ i += 1
+ }
+ out
+ } else ggMatOpStrictv(aa, opv, oldmat);
+
+
+ def ggMatOpStrictv(aa:DenseMat[T], opv:(Array[T],Int,Int,Array[T],Int,Int,Array[T],Int,Int,Int) => T, oldmat:Mat):DenseMat[T] = {
+ var out:DenseMat[T] = null
+ var mylen = 0
+ if ((nrows==aa.nrows && ncols==aa.ncols) || (aa.nrows == 1 && aa.ncols == 1)) {
+ out = DenseMat.newOrCheck[T](nrows, ncols, oldmat)
+ mylen = length
+ } else if (nrows == 1 && ncols == 1) {
+ val out = DenseMat.newOrCheck[T](aa.nrows, aa.ncols, oldmat)
+ mylen = aa.length
+ } else throw new RuntimeException("dims incompatible")
+ if (mylen > 100000 && Mat.numThreads > 1) {
+ val done = IMat(1, Mat.numThreads)
+ for (ithread<- 0 until Mat.numThreads) {
+ val istart = ithread*mylen/Mat.numThreads
+ val len = (ithread+1)*mylen/Mat.numThreads - istart
+ actor {
+ if (nrows==aa.nrows && ncols==aa.ncols) {
+ opv(data, istart, 1, aa.data, istart, 1, out.data, istart, 1, len)
+ } else if (aa.nrows == 1 && aa.ncols == 1) {
+ opv(data, istart, 1, aa.data, 0, 0, out.data, istart, 1, len)
+ } else {
+ opv(data, 0, 0, aa.data, istart, 1, out.data, istart, 1, len)
+ }
+ done(ithread) = 1
+ }
+ }
+ while (SciFunctions.sum(done).v < Mat.numThreads) {Thread.`yield`()}
+ } else if (nrows==aa.nrows && ncols==aa.ncols) {
+ opv(data, 0, 1, aa.data, 0, 1, out.data, 0, 1, aa.length)
+ } else if (aa.nrows == 1 && aa.ncols == 1) {
+ opv(data, 0, 1, aa.data, 0, 0, out.data, 0, 1, length)
+ } else if (nrows == 1 && ncols == 1) {
+ opv(data, 0, 0, aa.data, 0, 1, out.data, 0, 1, aa.length)
+ }
+ Mat.nflops += mylen
+ out
+ }
+
+ def ggMatOpScalarv(a:T, opv:(Array[T],Int,Int,Array[T],Int,Int,Array[T],Int,Int,Int) => T, oldmat:Mat):DenseMat[T] = {
+ val out = DenseMat.newOrCheck[T](nrows, ncols, oldmat)
+ Mat.nflops += length
+ val aa = new Array[T](1)
+ aa(0) = a
+ opv(data, 0, 1, aa, 0, 0, out.data, 0, 1, length)
+ out
+ }
+
+ def ggReduceOp(dim0:Int, op1:(T) => T, op2:(T,T) => T, oldmat:Mat):DenseMat[T] = {
+ var dim = if (nrows == 1 && dim0 == 0) 2 else math.max(1, dim0)
+ if (dim == 1) {
+ val out = DenseMat.newOrCheck[T](1, ncols, oldmat)
+ Mat.nflops += length
+ var i = 0
+ while (i < ncols) {
+ var j = 1
+ var acc = op1(data(i*nrows))
+ while (j < nrows) {
+ acc = op2(acc, data(j+i*nrows))
+ j += 1
+ }
+ out.data(i) = acc
+ i += 1
+ }
+ out
+ } else if (dim == 2) {
+ val out = DenseMat.newOrCheck[T](nrows, 1, oldmat)
+ Mat.nflops += length
+ var j = 0
+ while (j < nrows) {
+ out.data(j) = op1(data(j))
+ j += 1
+ }
+ var i = 1
+ while (i < ncols) {
+ var j = 0
+ while (j < nrows) {
+ out.data(j) = op2(out.data(j), data(j+i*nrows))
+ j += 1
+ }
+ i += 1
+ }
+ out
+ } else
+ throw new RuntimeException("index must 1 or 2");
+ }
+
+ def ggOpt2(dim0:Int, op2:(T,T) => Boolean):(DenseMat[T],IMat) = {
+ var dim = if (nrows == 1 && dim0 == 0) 2 else math.max(1, dim0)
+ if (dim == 1) {
+ val out = new DenseMat[T](1, ncols)
+ val iout = IMat(1, ncols)
+ Mat.nflops += length
+ var i = 0
+ while (i < ncols) {
+ var j = 1
+ var acc = data(i*nrows)
+ var iacc = 0
+ while (j < nrows) {
+ val v = data(j+i*nrows)
+ if (op2(v, acc)) {
+ acc = v
+ iacc = j
+ }
+ j += 1
+ }
+ out.data(i) = acc
+ iout.data(i) = iacc
+ i += 1
+ }
+ (out, iout)
+ } else if (dim == 2) {
+ val out = new DenseMat[T](nrows, 1)
+ val iout = IMat(nrows, 1)
+ Mat.nflops += length
+ var j = 0
+ while (j < nrows) {
+ out.data(j) = data(j)
+ iout.data(j) = 0
+ j += 1
+ }
+ var i = 1
+ while (i < ncols) {
+ var j = 0
+ while (j < nrows) {
+ val v = data(j+i*nrows)
+ if (op2(v, out.data(j))) {
+ out.data(j) = v
+ iout.data(j) = i
+ }
+ j += 1
+ }
+ i += 1
+ }
+ (out, iout)
+ } else
+ throw new RuntimeException("index must 1 or 2");
+ }
+
+ def ggReduceOpv(dim0:Int, opv:(Array[T],Int,Int,Array[T],Int,Int,Array[T],Int,Int,Int) => T, oldmat:Mat):DenseMat[T] = {
+ var dim = if (nrows == 1 && dim0 == 0) 2 else math.max(1, dim0)
+ if (dim == 1) {
+ val out = DenseMat.newOrCheck[T](1, ncols, oldmat)
+ Mat.nflops += length
+ var i = 0
+ while (i < ncols) {
+ out.data(i) = data(i*nrows)
+ opv(data, i*nrows+1, 1, out.data, i, 0, out.data, i, 0, nrows-1)
+ i += 1
+ }
+ out
+ } else if (dim == 2) {
+ val out = DenseMat.newOrCheck[T](nrows, 1, oldmat)
+ Mat.nflops += length
+ var j = 0
+ while (j < nrows) {
+ out.data(j) = data(j)
+ j += 1
+ }
+ var i = 1
+ while (i < ncols) {
+ opv(data, i*nrows, 1, out.data, 0, 1, out.data, 0, 1, nrows)
+ i += 1
+ }
+ out
+ } else
+ throw new RuntimeException("index must 1 or 2");
+ }
+
+ def ggReduceAll(dim0:Int, op1:(T) => T, op2:(T,T) => T, oldmat:Mat):DenseMat[T] = {
+ var dim = if (nrows == 1 && dim0 == 0) 2 else math.max(1, dim0)
+ if (dim == 1) {
+ val out = DenseMat.newOrCheck[T](nrows, ncols, oldmat)
+ Mat.nflops += length
+ var i = 0
+ while (i < ncols) {
+ val i0 = i*nrows
+ var j = 1
+ var acc = op1(data(i0))
+ out.data(i0) = acc
+ while (j < nrows) {
+ acc = op2(acc, data(j+i0))
+ out.data(j+i0) = acc
+ j += 1
+ }
+ i += 1
+ }
+ out
+ } else if (dim == 2) {
+ val out = DenseMat.newOrCheck[T](nrows, ncols, oldmat)
+ Mat.nflops += length
+ var j = 0
+ while (j < nrows) {
+ out.data(j) = op1(data(j))
+ j += 1
+ }
+ var i = 1
+ while (i < ncols) {
+ val i0 = i*nrows
+ var j = 0
+ while (j < nrows) {
+ out.data(j+i0) = op2(out.data(j+i0-nrows), data(j+i0))
+ j += 1
+ }
+ i += 1
+ }
+ out
+ } else
+ throw new RuntimeException("index must 1 or 2")
+ }
+
+ def ggReduceAllv(dim0:Int, opv:(Array[T],Int,Int,Array[T],Int,Int,Array[T],Int,Int,Int) => T, oldmat:Mat):DenseMat[T] = {
+ var dim = if (nrows == 1 && dim0 == 0) 2 else math.max(1, dim0)
+ if (dim == 1) {
+ val out = DenseMat.newOrCheck[T](nrows, ncols, oldmat)
+ Mat.nflops += length
+ var i = 0
+ while (i < ncols) {
+ val i0 = i*nrows
+ out.data(i0) = data(i0)
+ opv(data, i0+1, 1, out.data, i0, 1, out.data, i0+1, 1, nrows-1)
+ i += 1
+ }
+ out
+ } else if (dim == 2) {
+ val out = DenseMat.newOrCheck[T](nrows, ncols, oldmat)
+ Mat.nflops += length
+ var j = 0
+ while (j < nrows) {
+ out.data(j) = data(j)
+ j += 1
+ }
+ var i = 1
+ while (i < ncols) {
+ val i0 = i*nrows
+ opv(data, i0, 1, out.data, i0-nrows, 1, out.data, i0, 1, nrows)
+ i += 1
+ }
+ out
+ } else
+ throw new RuntimeException("index must 1 or 2")
+ }
+
+ def dot (a : DenseMat[T])(implicit numeric:Numeric[T]):Double =
+ if (nrows != a.nrows || ncols != a.ncols) {
+ throw new RuntimeException("dot dims not compatible")
+ } else {
+ Mat.nflops += 2 * length
+ var v = 0.0
+ var i = 0
+ while (i < length){
+ v += numeric.toDouble(numeric.times(data(i),a.data(i)))
+ i += 1
+ }
+ v
+ }
+
+ def mkdiag = {
+ if (math.min(nrows, ncols) > 1) {
+ throw new RuntimeException("mkdiag needs a vector input")
+ }
+ val n = math.max(nrows, ncols)
+ val out = new DenseMat[T](n,n)
+ var i = 0
+ while (i < n) {
+ out.data(i*(n+1)) = data(i)
+ i += 1
+ }
+ out
+ }
+
+ def getdiag = {
+ val n = math.min(nrows, ncols)
+ val out = new DenseMat[T](n,1)
+ var i = 0
+ while (i < n) {
+ out.data(i) = data(i*(nrows+1))
+ i += 1
+ }
+ out
+ }
+
+}
+
+object DenseMat {
+
+ def vecCmp[@specialized(Double, Float, Int, Byte) T](xmap:Array[T])(a:Array[T], a0:Int, ainc:Int, b:Array[T], b0:Int, binc:Int, c:Array[T], c0:Int, cinc:Int, n:Int)
+ (implicit numeric:Numeric[T]):T = {
+ var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n
+ while (ci < cend) {
+ val indx = numeric.compare(a(ai), b(bi)); c(ci) = xmap(indx+1); ai += ainc; bi += binc; ci += cinc
+ }
+ numeric.zero
+ }
+
+
+ def newOrCheck[T](nr:Int, nc:Int, oldmat:Mat)
+ (implicit classManifest:ClassManifest[T]):DenseMat[T] = {
+ if (oldmat.asInstanceOf[AnyRef] == null || (oldmat.nrows == 0 && oldmat.ncols == 0)) {
+ new DenseMat[T](nr, nc)
+ } else {
+ val omat = oldmat.asInstanceOf[DenseMat[T]]
+ if (oldmat.nrows != nr || oldmat.ncols != nc) {
+ if (nr*nc <= omat.data.size) {
+ return new DenseMat[T](nr, nc, omat.data)
+ } else {
+ new DenseMat[T](nr, nc)
+ }
+ } else {
+ omat
+ }
+ }
+ }
+
+ def getInds(ii:IMat, n:Int):Array[Int] = {
+ var inds:Array[Int] = null
+ val off = Mat.oneBased
+ ii match {
+ case aaa:MatrixWildcard => {
+ inds = new Array[Int](n)
+ var i = 0
+ while (i < n) {
+ inds(i) = i + off
+ i += 1
+ }
+ inds
+ }
+ case _ => {
+ var i = 0
+ while (i < ii.length) {
+ val ind = ii.data(i) - off
+ if (ind < 0 || ind >= n) {
+ throw new RuntimeException("index out of range "+(ind+off)+" vs "+n)
+ }
+ i += 1
+ }
+ ii.data
+ }
+ }
+ }
+
+ def getSInds(in:Seq[Int], n:Int):Array[Int] = {
+ var inds:Array[Int] = new Array[Int](math.min(in.length,n))
+ val off = Mat.oneBased
+ var i = 0
+ while (i < in.length) {
+ val ind = in(i) - off
+ if (ind < 0 || ind >= n) {
+ throw new RuntimeException("index out of range "+(ind+off)+" vs "+n)
+ }
+ i += 1
+ }
+ inds
+ }
+
+ def genSort[@specialized(Double, Float, Int, Byte) T](a:Array[T],from:Int,to:Int):Unit = {
+ a match {
+ case aa:Array[Double] => {
+ Arrays.sort(aa, from, to)
+ }
+ case aa:Array[Float] => {
+ Arrays.sort(aa, from, to)
+ }
+ case aa:Array[Int] => {
+ Arrays.sort(aa, from, to)
+ }
+ case aa:Array[Byte] => {
+ Arrays.sort(aa, from, to)
+ }
+ }
+ }
+
+ def genSort[@specialized(Double, Float, Int, Byte) T](a:Array[T]):Unit = {
+ genSort(a, 0, a.size)
+ }
+
+ def reverse[@specialized(Double, Float, Int, Byte) T](a:Array[T],from:Int,to:Int) = {
+ var i = 0
+ var n = to - from
+ while (2*i < n-1) {
+ val tmp = a(i+from)
+ a(i+from) = a(to-i-1)
+ a(to-i-1) = tmp
+ i += 1
+ }
+ }
+
+ def reverse[@specialized(Double, Float, Int, Byte) T](a:Array[T]):Unit = {
+ reverse(a, 0, a.size)
+ }
+
+ def sort[@specialized(Double, Float, Int, Byte) T](a:DenseMat[T], ik0:Int, asc:Boolean)
+ (implicit classManifest:ClassManifest[T], ordering:Ordering[T]):DenseMat[T] = {
+ import BIDMat.Sorting._
+ val out = new DenseMat[T](a.nrows, a.ncols)
+ var ik = ik0
+ if (ik0 == 0) {
+ if (a.nrows == 1) {
+ ik = 2
+ } else {
+ ik = 1
+ }
+ }
+ if (a.nrows == 1 || a.ncols == 1) {
+ System.arraycopy(a.data, 0, out.data, 0, a.length)
+ genSort(out.data)
+ if (!asc) {
+ reverse(a.data)
+ }
+ out
+ } else if (ik == 1) {
+ val thiscol = new Array[T](a.nrows)
+ var i = 0
+ while (i < a.ncols) {
+ var j = 0
+ while (j < a.nrows) {
+ thiscol(j) = a.data(j+i*a.nrows)
+ j += 1
+ }
+ genSort(thiscol)
+ j = 0
+ if (asc) {
+ while (j < a.nrows) {
+ out.data(j+i*a.nrows) = thiscol(j)
+ j += 1
+ }
+ } else {
+ while (j < a.nrows) {
+ out.data(j+i*a.nrows) = thiscol(a.nrows-j-1)
+ j += 1
+ }
+ }
+ i += 1
+ }
+ out
+ } else {
+ val thisrow = new Array[T](a.ncols)
+ var i = 0
+ while (i < a.nrows) {
+ var j = 0
+ while (j < a.ncols) {
+ thisrow(j) = a.data(i+j*a.nrows)
+ j += 1
+ }
+ genSort(thisrow)
+ j = 0
+ if (asc) {
+ while (j < a.ncols) {
+ out.data(i+j*out.nrows) = thisrow(j)
+ j += 1
+ }
+ } else {
+ while (j < a.ncols) {
+ out.data(i+j*out.nrows) = thisrow(a.ncols-j-1)
+ j += 1
+ }
+ }
+ i += 1
+ }
+ out
+ }
+ }
+
+ class MyComparator[@specialized(Double, Float, Int, Byte) T](a:Array[T])
+ (implicit ordering:Ordering[T]) extends java.util.Comparator[Int] {
+ def compare(ii:Int, jj:Int):Int = {
+ val c0 = ordering.compare(a(ii), a(jj))
+ if (c0 != 0) {
+ c0
+ } else {
+ ii compare jj
+ }
+ }
+ }
+
+ def sort2[@specialized(Double, Float, Int, Byte) T](a:DenseMat[T], asc:Boolean)
+ (implicit classManifest:ClassManifest[T], ord:Ordering[T]): (DenseMat[T], IMat) =
+ if (a.nrows == 1) {
+ sort2(a, 2, asc)
+ } else {
+ sort2(a, 1, asc)
+ }
+
+ def sort2[@specialized(Double, Float, Int, Byte) T](a:DenseMat[T], ik:Int, asc:Boolean)
+ (implicit classManifest:ClassManifest[T], ord:Ordering[T]):(DenseMat[T], IMat) = {
+ import BIDMat.Sorting._
+ val out = new DenseMat[T](a.nrows, a.ncols)
+ val iout = IMat(a.nrows, a.ncols)
+ if (ik == 1) {
+ var i = 0
+ while (i < a.ncols) {
+ var j = 0
+ while (j < a.nrows) {
+ iout.data(j+i*a.nrows) = j
+ out.data(j+i*a.nrows) = a.data(j+i*a.nrows)
+ j += 1
+ }
+ i += 1
+ }
+ i = 0
+ while (i < a.ncols) {
+ if (asc) {
+ quickSort2(out.data, iout.data, i*a.nrows, (i+1)*a.nrows, 1)
+ } else {
+ quickSort2(out.data, iout.data, (i+1)*a.nrows-1, i*a.nrows-1, -1)
+ }
+ i += 1
+ }
+ (out, iout)
+ } else {
+ val vcols = new Array[T](a.ncols)
+ val icols = new Array[Int](a.ncols)
+ var i = 0
+ while (i < a.nrows) {
+ var j = 0
+ while (j < a.ncols) {
+ vcols(j) = a.data(i + j*a.nrows)
+ icols(j) = j
+ j += 1
+ }
+ if (asc) {
+ quickSort2(vcols, icols, 0, icols.length, 1)
+ } else {
+ quickSort2(vcols, icols, icols.length-1, -1, -1)
+ }
+ j = 0
+ while (j < a.ncols) {
+ out.data(i+j*out.nrows) = vcols(j)
+ iout.data(i+j*iout.nrows) = icols(j)
+ j += 1
+ }
+ i += 1
+ }
+ (out, iout)
+ }
+ }
+
+ def sortlex[@specialized(Double, Float, Int, Byte) T](a:DenseMat[T], asc:Boolean)(implicit ordering:Ordering[T]):IMat = {
+ import BIDMat.Sorting._
+ val out = IMat(a.nrows,1)
+ val ii = out.data
+ val aa = a.data
+ val nr = a.nrows
+ var i = 0
+ while (i < a.nrows) {
+ out.data(i) = i
+ i += 1
+ }
+ def comp(i:Int, j:Int):Int = {
+ var k = 0
+ val ip = ii(i)
+ val jp = ii(j)
+ var c0 = 0
+ while (k < a.ncols && c0 == 0) {
+ c0 = ordering.compare(aa(ip+k*nr), aa(jp+k*nr))
+ k += 1
+ }
+ if (c0 != 0) {
+ c0
+ } else {
+ ip compare jp
+ }
+ }
+ def swap(i:Int, j:Int):Unit = {
+ val tmp = ii(i)
+ ii(i) = ii(j)
+ ii(j) = tmp
+ }
+ if (asc) {
+ quickSort(comp, swap, 0, a.nrows)
+ } else {
+ quickSort((i:Int,j:Int)=>comp(j,i), swap, 0, a.nrows)
+ }
+ out
+ }
+
+ def unique2[@specialized(Double, Float, Int) T](a:DenseMat[T])
+ (implicit manifest:Manifest[T], numeric:Numeric[T], ord:Ordering[T]):(IMat, IMat) = {
+ val (vss, iss) = sort2(a, true)
+ val iptrs = IMat(a.length,1)
+ var lastpos = 0
+ iptrs.data(iss.data(0)) = lastpos
+ var i = 1
+ while (i < iss.length) {
+ if (vss.data(i-1) != vss.data(i)) {
+ lastpos += 1
+ }
+ iptrs.data(iss.data(i)) = lastpos
+ i += 1
+ }
+ val bptrs = IMat(lastpos+1,1)
+ i = iss.length
+ while (i > 0) {
+ bptrs.data(iptrs.data(i-1)) = i-1
+ i = i - 1
+ }
+ (bptrs, iptrs)
+ }
+
+ def uniquerows2[@specialized(Double, Float, Int) T](a:DenseMat[T])(implicit ordering:Ordering[T]):(IMat, IMat) = {
+ val iss = sortlex(a, true)
+ def compeq(i:Int, j:Int):Boolean = {
+ var k:Int = 0;
+ while (k < a.ncols && ordering.equiv(a(i,k):T, a(j,k):T)) {
+ k += 1
+ }
+ if (k == a.ncols) true
+ else false
+ }
+ val iptrs = IMat(a.nrows, 1)
+ var lastpos = 0
+ iptrs.data(iss.data(0)) = lastpos
+ var i = 1
+ while (i < iss.length) {
+ if (!compeq(iss.data(i-1), iss.data(i))) {
+ lastpos += 1
+ }
+ iptrs.data(iss.data(i)) = lastpos
+ i += 1
+ }
+ val bptrs = IMat(lastpos+1,1)
+ i = iss.length
+ while (i > 0) {
+ bptrs.data(iptrs.data(i-1)) = i-1
+ i = i - 1
+ }
+ (bptrs, iptrs)
+ }
+
+ def accum[@specialized(Double, Float, Int) T](inds:IMat, vals:DenseMat[T], nr:Int, nc:Int)
+ (implicit numeric:Numeric[T], classManifest:ClassManifest[T]):DenseMat[T] = {
+ if (inds.ncols > 2 || (vals.length > 1 && (inds.nrows != vals.nrows)))
+ throw new RuntimeException("mismatch in array dimensions")
+ else {
+ if (inds.ncols == 1) {
+ val out = new DenseMat[T](nr, nc)
+ Mat.nflops += inds.nrows
+ var i = 0
+ if (vals.length > 1) {
+ while (i < inds.nrows) {
+ out.data(inds.data(i)) = numeric.plus(out.data(inds.data(i)), vals.data(i))
+ i += 1
+ }
+ } else {
+ while (i < inds.nrows) {
+ out.data(inds.data(i)) = numeric.plus(out.data(inds.data(i)), vals.data(0))
+ i += 1
+ }
+ }
+ out
+ } else {
+ val out = new DenseMat[T](nr, nc)
+ Mat.nflops += inds.nrows
+ var i = 0
+ if (vals.length > 1) {
+ while (i < inds.nrows) {
+ if (inds.data(i) >= nr || inds.data(i+inds.nrows) >= nc)
+ throw new RuntimeException("indices out of bounds "+inds.data(i)+" "+inds.data(i+inds.nrows))
+ val indx = inds.data(i) + nr*inds.data(i+inds.nrows)
+ out.data(indx) = numeric.plus(out.data(indx), vals.data(i))
+ i += 1
+ }
+ } else {
+ while (i < inds.nrows) {
+ if (inds.data(i) >= nr || inds.data(i+inds.nrows) >= nc)
+ throw new RuntimeException("indices out of bounds "+inds.data(i)+" "+inds.data(i+inds.nrows))
+ val indx = inds.data(i) + nr*inds.data(i+inds.nrows)
+ out.data(indx) = numeric.plus(out.data(indx), vals.data(0))
+ i += 1
+ }
+ }
+ out
+ }
+ }
+ }
+
+}
+
+trait MatrixWildcard extends Mat
+
diff --git a/src/main/scala/BIDMat/FMat.scala b/src/main/scala/BIDMat/FMat.scala
new file mode 100755
index 00000000..868172fe
--- /dev/null
+++ b/src/main/scala/BIDMat/FMat.scala
@@ -0,0 +1,720 @@
+package BIDMat
+import edu.berkeley.bid.CBLAS._
+import edu.berkeley.bid.LAPACK._
+import edu.berkeley.bid.SPBLAS._
+import scala.actors.Actor._
+import java.util.Arrays
+
+
+case class FMat(nr:Int, nc:Int, data0:Array[Float]) extends DenseMat[Float](nr, nc, data0) {
+
+ def size() = length;
+
+ override def t:FMat = FMat(gt(null))
+
+ override def dv:Double =
+ if (nrows > 1 || ncols > 1) {
+ throw new RuntimeException("Matrix should be 1x1 to extract value")
+ } else {
+ data(0)
+ }
+
+ override def mytype = "FMat"
+
+ def i:CMat = CMat.imag(this)
+
+ def horzcat(b: FMat) = FMat(ghorzcat(b))
+
+ def vertcat(b: FMat) = FMat(gvertcat(b))
+
+ def find3:(IMat, IMat, FMat) = { val (ii, jj, vv) = gfind3 ; (IMat(ii), IMat(jj), FMat(vv)) }
+
+ override def apply(a:IMat):FMat = FMat(gapply(a))
+
+ override def apply(a:IMat, b:IMat):FMat = FMat(gapply(a, b))
+
+ override def apply(a:IMat, b:Int):FMat = FMat(gapply(a, b))
+
+ override def apply(a:Int, b:IMat):FMat = FMat(gapply(a, b))
+
+ def update(iv:IMat, jv:IMat, b:FMat):FMat = FMat(_update(iv, jv, b))
+
+ def update(iv:IMat, j:Int, b:FMat):FMat = FMat(_update(iv, IMat.ielem(j), b))
+
+ def update(i:Int, jv:IMat, b:FMat):FMat = FMat(_update(IMat.ielem(i), jv, b))
+
+ def ffMatOp(b: Mat, f:(Float, Float) => Float, out:Mat):FMat =
+ b match {
+ case bb:FMat => FMat(ggMatOp(bb, f, out))
+ case _ => throw new RuntimeException("unsupported operation "+f+" on "+this+" and "+b)
+ }
+
+ def ffMatOpv(b: Mat, f:(Array[Float],Int,Int,Array[Float],Int,Int,Array[Float],Int,Int,Int) => Float, out:Mat) =
+ b match {
+ case bb:FMat => FMat(ggMatOpv(bb, f, out))
+ case _ => throw new RuntimeException("unsupported operation "+f+" on "+this+" and "+b)
+ }
+
+ def ffMatOpScalar(b: Float, f:(Float, Float) => Float, out:Mat):FMat = FMat(ggMatOpScalar(b, f, out))
+
+ def ffMatOpScalarv(b: Float, f:(Array[Float],Int,Int,Array[Float],Int,Int,Array[Float],Int,Int,Int) => Float, out:Mat) =
+ FMat(ggMatOpScalarv(b, f, out))
+
+ def ffReduceOp(n:Int, f1:(Float) => Float, f2:(Float, Float) => Float, out:Mat) =
+ FMat(ggReduceOp(n, f1, f2, out))
+
+ def ffReduceOpv(n:Int, f:(Array[Float],Int,Int,Array[Float],Int,Int,Array[Float],Int,Int,Int) => Float, out:Mat) =
+ FMat(ggReduceOpv(n, f, out))
+
+ def ffReduceAll(n:Int, f1:(Float) => Float, f2:(Float, Float) => Float, out:Mat) =
+ FMat(ggReduceAll(n, f1, f2, out))
+
+ def ffReduceAllv(n:Int, f:(Array[Float],Int,Int,Array[Float],Int,Int,Array[Float],Int,Int,Int) => Float, out:Mat) =
+ FMat(ggReduceAllv(n, f, out))
+
+ override def printOne(i:Int):String = {
+ val v = data(i)
+ if (v % 1 == 0 && math.abs(v) < 1e10) {
+ "%d" format v.intValue
+ } else {
+ "%.5g" format v
+ }
+ }
+
+ override def copy = {
+ val out = FMat(nrows, ncols)
+ System.arraycopy(data, 0, out.data, 0, length)
+ out
+ }
+
+ def copyTo(a:FMat) = {
+ val aa = a.recycle(nrows, ncols, 0)
+ System.arraycopy(data, 0, aa.data, 0, length)
+ aa
+ }
+
+ override def set(v:Float):FMat = {
+ Arrays.fill(data,0,length,v)
+ this
+ }
+
+ override def copyTo(a:Mat) = {
+ a match {
+ case out:FMat => copyTo(out):FMat
+ case aa:GMat => aa.copyFrom(this)
+ }
+ a
+ }
+
+ override def zeros(nr:Int, nc:Int) = {
+ FMat(nr, nc)
+ }
+
+ override def ones(nr:Int, nc:Int) = {
+ val out = FMat(nr, nc)
+ var i = 0
+ while (i < out.length) {
+ out(i) = 1
+ i += 1
+ }
+ out
+ }
+
+ override def clearUpper(off:Int) = setUpper(0, off)
+ override def clearUpper = setUpper(0, 0)
+
+ override def clearLower(off:Int) = setLower(0, off)
+ override def clearLower = setLower(0, 0)
+
+
+ def fDMult(a:FMat, outmat:Mat):FMat = {
+ if (ncols == a.nrows) {
+ val out = FMat.newOrCheckFMat(nrows, a.ncols, outmat)
+ Mat.nflops += 2L * length * a.ncols
+ if (Mat.noMKL) {
+ out.clear
+ var i = 0
+ while (i < a.ncols) {
+ var j = 0
+ while (j < a.nrows) {
+ var k = 0
+ val dval = a.data(j + i*ncols)
+ while (k < nrows) {
+ out.data(k+i*nrows) += data(k+j*nrows)*dval
+ k += 1
+ }
+ j += 1
+ }
+ i += 1
+ }
+ } else if (nrows == 1) {
+ sgemv(ORDER.ColMajor, TRANSPOSE.Trans, a.nrows, a.ncols, 1.0f, a.data, a.nrows, data, 1, 0, out.data, 1)
+ } else if (a.ncols == 1) {
+ sgemv(ORDER.ColMajor, TRANSPOSE.NoTrans, nrows, ncols, 1.0f, data, nrows, a.data, 1, 0, out.data, 1)
+ } else {
+ sgemm(ORDER.ColMajor, TRANSPOSE.NoTrans, TRANSPOSE.NoTrans,
+ nrows, a.ncols, ncols, 1.0f, data, nrows, a.data, a.nrows, 0, out.data, nrows)
+ }
+ out
+ } else if (ncols == 1 && nrows == 1){
+ val out = FMat.newOrCheckFMat(a.nrows, a.ncols, outmat)
+ Mat.nflops += a.length
+ var i = 0
+ val dvar = data(0)
+ while (i < a.length) {
+ out.data(i) = dvar * a.data(i)
+ i += 1
+ }
+ out
+ } else if (a.ncols == 1 && a.nrows == 1){
+ val out = FMat.newOrCheckFMat(nrows, ncols, outmat)
+ Mat.nflops += length
+ var i = 0
+ val dvar = a.data(0)
+ while (i < length) {
+ out.data(i) = dvar * data(i)
+ i += 1
+ }
+ out
+ } else throw new RuntimeException("dimensions mismatch")
+ }
+
+ def fSMultHelper(a:SMat, out:FMat, istart:Int, iend:Int, ioff:Int) = {
+ var i = istart
+ while (i < iend) {
+ var j = a.jc(i) - ioff
+ while (j < a.jc(i+1)-ioff) {
+ val dval = a.data(j)
+ val ival = a.ir(j) - ioff
+ if (Mat.noMKL || nrows < 220) {
+ var k = 0
+ while (k < nrows) {
+ out.data(k+i*nrows) += data(k+ival*nrows)*dval
+ k += 1
+ }
+ } else {
+ saxpyxx(nrows, dval, data, ival*nrows, out.data, i*nrows)
+ }
+ j += 1
+ }
+ i += 1
+ }
+ }
+
+ def fSMultHelper2(a:SMat, out:FMat, istart:Int, iend:Int, ioff:Int) = {
+ var i = 0
+ while (i < a.ncols) {
+ var j = a.jc(i) - ioff
+ while (j < a.jc(i+1)-ioff) {
+ val dval = a.data(j)
+ val ival = a.ir(j) - ioff
+ var k = istart
+ while (k < iend) {
+ out.data(k+i*nrows) += data(k+ival*nrows)*dval
+ k += 1
+ }
+ j += 1
+ }
+ i += 1
+ }
+ }
+
+ def fSMult(a:SMat, outmat:Mat):FMat = {
+ if (ncols != a.nrows) {
+ throw new RuntimeException("dimensions mismatch")
+ } else {
+ val out = FMat.newOrCheckFMat(nrows, a.ncols, outmat)
+ out.clear
+ Mat.nflops += 2L * nrows * a.nnz
+ val ioff = Mat.ioneBased;
+ if (Mat.noMKL || Mat.numThreads > 1) {
+ if (1L*nrows*a.nnz > 100000L && Mat.numThreads > 1) {
+ val done = IMat(1,Mat.numThreads)
+ for (ithread <- 0 until Mat.numThreads) {
+ val istart = ithread*a.ncols/Mat.numThreads
+ val iend = (ithread+1)*a.ncols/Mat.numThreads
+ actor {
+ fSMultHelper(a, out, istart, iend, ioff)
+ done(ithread) = 1
+ }
+ }
+ while (SciFunctions.sum(done).v < Mat.numThreads) {Thread.`yield`()}
+ } else {
+ fSMultHelper(a, out, 0, a.ncols, ioff)
+ }
+ } else {
+ var jc0 = if (ioff == 0) SparseMat.incInds(a.jc) else a.jc
+ var ir0 = if (ioff == 0) SparseMat.incInds(a.ir) else a.ir
+ if (nrows == 1) {
+ scscmv("T", a.nrows, a.ncols, 1.0f, "GLNF", a.data, ir0, jc0, data, 0f, out.data)
+ } else {
+ smcscm(nrows, a.ncols, data, nrows, a.data, ir0, jc0, out.data, nrows)
+ }
+ }
+ out
+ }
+ }
+
+ def multT(a:SMat, outmat:Mat):FMat = {
+ import edu.berkeley.bid.CBLAS._
+ if (ncols == a.ncols) {
+ val out = FMat.newOrCheckFMat(nrows, a.nrows, outmat)
+ out.clear
+ smcsrm(nrows, a.ncols, data, nrows, a.data, a.ir, a.jc, out.data, nrows)
+ Mat.nflops += 2L * a.nnz * nrows
+ out
+ } else {
+ throw new RuntimeException("xT dimensions mismatch")
+ }
+ }
+
+ def multT(a:FMat, outmat:Mat):FMat = {
+ import edu.berkeley.bid.CBLAS._
+ if (ncols == a.ncols) {
+ val out = FMat.newOrCheckFMat(nrows, a.nrows, outmat)
+ sgemm(ORDER.ColMajor, TRANSPOSE.NoTrans, TRANSPOSE.Trans,
+ nrows, a.nrows, ncols, 1.0f, data, nrows, a.data, a.nrows, 0, out.data, nrows)
+ Mat.nflops += 2L * length * a.nrows
+ out
+ } else {
+ throw new RuntimeException("xT dimensions mismatch")
+ }
+ }
+ /*
+ * Column-based (Streaming) multiply
+ */
+
+ def DMult(aa:FMat, omat:Mat):FMat =
+ if (ncols == aa.nrows) {
+ val out = FMat.newOrCheckFMat(nrows, aa.ncols, omat) // Needs to be cleared
+ out.clear
+ for (i <- 0 until aa.ncols)
+ for (j <- 0 until aa.nrows) {
+ var k = 0
+ val dval = aa.data(j + i*ncols)
+ while (k < nrows) {
+ out.data(k+i*nrows) += data(k+j*nrows)*dval
+ k += 1
+ }
+ }
+ out
+ } else throw new RuntimeException("dimensions mismatch")
+
+ /*
+ * Very slow, row-and-column multiply
+ */
+
+ def sDMult(aa:FMat, omat:Mat):FMat =
+ if (ncols == aa.nrows) {
+ val out = FMat.newOrCheckFMat(nrows, aa.ncols, omat)
+ for (i <- 0 until aa.ncols)
+ for (j <- 0 until nrows) {
+ var k = 0
+ var sum = 0f
+ while (k < ncols) {
+ sum += data(j+k*nrows) * aa.data(k+i*aa.nrows)
+ k += 1
+ }
+ out.data(j + i*out.nrows) = sum
+ }
+ out
+ } else throw new RuntimeException("dimensions mismatch");
+
+ def GPUmult(b:FMat, out:Mat) = GMat.GPUmult(this, b, out)
+
+ def dot(a:FMat):Double = super.dot(a)
+
+ override def dot(a:Mat):Double = super.dot(a.asInstanceOf[FMat])
+
+ def solvel(a0:Mat):FMat =
+ a0 match {
+ case a:FMat => {
+ Mat.nflops += 2L*a.nrows*a.nrows*a.nrows/3 + 2L*nrows*a.nrows*a.nrows
+ if (a.nrows != a.ncols || ncols != a.nrows) {
+ throw new RuntimeException("solve needs a square matrix")
+ } else {
+ val out = FMat(nrows, ncols)
+ val tmp = new Array[Float](ncols*ncols)
+ System.arraycopy(a.data, 0, tmp, 0, a.length)
+ System.arraycopy(data, 0, out.data, 0, length)
+ val ipiv = new Array[Int](ncols)
+ sgetrf(ORDER.RowMajor, ncols, ncols, tmp, ncols, ipiv)
+ sgetrs(ORDER.RowMajor, "N", ncols, nrows, tmp, ncols, ipiv, out.data, nrows)
+ out
+ }
+ }
+ case _ => throw new RuntimeException("unsupported arg to / "+a0)
+ }
+
+ def solver(a0:Mat):FMat =
+ a0 match {
+ case a:FMat => {
+ Mat.nflops += 2L*nrows*nrows*nrows/3 + 2L*nrows*nrows*a.ncols
+ if (nrows != ncols || ncols != a.nrows) {
+ throw new RuntimeException("solve needs a square matrix")
+ } else {
+ val out = FMat(a.nrows, a.ncols)
+ val tmp = new Array[Float](ncols*ncols)
+ System.arraycopy(data, 0, tmp, 0, length)
+ System.arraycopy(a.data, 0, out.data, 0, a.length)
+ val ipiv = new Array[Int](ncols)
+ sgetrf(ORDER.ColMajor, ncols, ncols, tmp, ncols, ipiv)
+ sgetrs(ORDER.ColMajor, "N", ncols, a.ncols, tmp, nrows, ipiv, out.data, nrows)
+ out
+ }
+ }
+ case _ => throw new RuntimeException("unsupported arg to \\ "+a0)
+ }
+
+ def inv:FMat = {
+ import edu.berkeley.bid.LAPACK._
+ if (nrows != ncols) {
+ throw new RuntimeException("inv method needs a square matrix")
+ } else {
+ val out = FMat(nrows, ncols)
+ System.arraycopy(data, 0, out.data, 0, length)
+ val ipiv = new Array[Int](nrows)
+ sgetrf(ORDER.ColMajor, nrows, ncols, out.data, nrows, ipiv)
+ sgetri(ORDER.ColMajor, nrows, out.data, nrows, ipiv)
+ out
+ }
+ }
+
+ override def clear = {
+ Arrays.fill(this.data,0,length,0)
+ this
+ }
+
+ override def recycle(nr:Int, nc:Int, nnz:Int):FMat = {
+ if (nrows == nr && nc == ncols) {
+ this
+ } else if (data.size >= nr*nc) {
+ new FMat(nr, nc, data)
+ } else {
+ FMat(nr, nc)
+ }
+ }
+
+ /*
+ * Basic operators on pairs of FMats. These are the compute routines.
+ */
+ def xG (b :FMat) = GPUmult(b, null)
+ def + (b : FMat) = ffMatOpv(b, FMat.vecAdd _, null)
+ def - (b : FMat) = ffMatOpv(b, FMat.vecSub _, null)
+ def * (b : FMat) = fDMult(b, null)
+ def * (b : SMat) = fSMult(b, null)
+ def xT (b : SMat) = multT(b, null)
+ def xT (b : FMat) = multT(b, null)
+ def / (b : FMat) = solvel(b)
+ def \\ (b : FMat) = solver(b)
+ def *@ (b : FMat) = ffMatOpv(b, FMat.vecMul _, null)
+ def /@ (b : FMat) = ffMatOpv(b, FMat.fVecDiv _, null)
+
+ override def * (b : Float) = fDMult(FMat.felem(b), null)
+ override def + (b : Float) = ffMatOpScalarv(b, FMat.vecAdd _, null)
+ override def - (b : Float) = ffMatOpScalarv(b, FMat.vecSub _, null)
+ override def *@ (b : Float) = ffMatOpScalarv(b, FMat.vecMul _, null)
+ override def /@ (b : Float) = ffMatOpScalarv(b, FMat.fVecDiv _, null)
+
+ override def * (b : Int) = fDMult(FMat.felem(b), null)
+ override def + (b : Int) = ffMatOpScalarv(b, FMat.vecAdd _, null)
+ override def - (b : Int) = ffMatOpScalarv(b, FMat.vecSub _, null)
+ override def *@ (b : Int) = ffMatOpScalarv(b, FMat.vecMul _, null)
+ override def /@ (b : Int) = ffMatOpScalarv(b, FMat.fVecDiv _, null)
+
+ override def * (b : Double) = fDMult(FMat.felem(b.asInstanceOf[Float]), null)
+ override def + (b : Double) = ffMatOpScalarv(b.asInstanceOf[Float], FMat.vecAdd _, null)
+ override def - (b : Double) = ffMatOpScalarv(b.asInstanceOf[Float], FMat.vecSub _, null)
+ override def *@ (b : Double) = ffMatOpScalarv(b.asInstanceOf[Float], FMat.vecMul _, null)
+ override def /@ (b : Double) = ffMatOpScalarv(b.asInstanceOf[Float], FMat.fVecDiv _, null)
+
+ def > (b : FMat) = ffMatOp(b, (x:Float, y:Float) => if (x > y) 1f else 0f, null)
+ def < (b : FMat) = ffMatOp(b, (x:Float, y:Float) => if (x < y) 1f else 0f, null)
+ def == (b : FMat) = ffMatOp(b, (x:Float, y:Float) => if (x == y) 1f else 0f, null)
+ def === (b : FMat) = ffMatOp(b, (x:Float, y:Float) => if (x == y) 1f else 0f, null)
+ def >= (b : FMat) = ffMatOp(b, (x:Float, y:Float) => if (x >= y) 1f else 0f, null)
+ def <= (b : FMat) = ffMatOp(b, (x:Float, y:Float) => if (x <= y) 1f else 0f, null)
+ def != (b : FMat) = ffMatOp(b, (x:Float, y:Float) => if (x != y) 1f else 0f, null)
+
+ override def > (b : Double) = ffMatOpScalar(b.asInstanceOf[Float], (x:Float, y:Float) => if (x > y) 1f else 0f, null)
+ override def < (b : Double) = ffMatOpScalar(b.asInstanceOf[Float], (x:Float, y:Float) => if (x < y) 1f else 0f, null)
+ override def == (b : Double) = ffMatOpScalar(b.asInstanceOf[Float], (x:Float, y:Float) => if (x == y) 1f else 0f, null)
+ override def === (b : Double) = ffMatOpScalar(b.asInstanceOf[Float], (x:Float, y:Float) => if (x == y) 1f else 0f, null)
+ override def >= (b : Double) = ffMatOpScalar(b.asInstanceOf[Float], (x:Float, y:Float) => if (x >= y) 1f else 0f, null)
+ override def <= (b : Double) = ffMatOpScalar(b.asInstanceOf[Float], (x:Float, y:Float) => if (x <= y) 1f else 0f, null)
+ override def != (b : Double) = ffMatOpScalar(b.asInstanceOf[Float], (x:Float, y:Float) => if (x != y) 1f else 0f, null)
+
+ override def > (b : Int) = ffMatOpScalar(b, (x:Float, y:Float) => if (x > y) 1f else 0f, null)
+ override def < (b : Int) = ffMatOpScalar(b, (x:Float, y:Float) => if (x < y) 1f else 0f, null)
+ override def == (b : Int) = ffMatOpScalar(b, (x:Float, y:Float) => if (x == y) 1f else 0f, null)
+ override def === (b : Int) = ffMatOpScalar(b, (x:Float, y:Float) => if (x == y) 1f else 0f, null)
+ override def >= (b : Int) = ffMatOpScalar(b, (x:Float, y:Float) => if (x >= y) 1f else 0f, null)
+ override def <= (b : Int) = ffMatOpScalar(b, (x:Float, y:Float) => if (x <= y) 1f else 0f, null)
+ override def != (b : Int) = ffMatOpScalar(b, (x:Float, y:Float) => if (x != y) 1f else 0f, null)
+
+ def \ (b: FMat) = horzcat(b)
+ def \ (b: Float) = horzcat(FMat.felem(b))
+
+ def on (b: FMat) = vertcat(b)
+ def on (b: Float) = vertcat(FMat.felem(b))
+
+ def ~ (b : FMat):FPair = new FPair(this, b)
+ def ~ (b : SMat):SPair = new SPair(this, b)
+
+ override def ~ (b: Mat):Pair =
+ b match {
+ case db:FMat => new FPair(this, db)
+ case sb:SMat => new SPair(this, sb)
+ case _ => throw new RuntimeException("mismatched types for operator ~")
+ }
+
+ /*
+ * Specialize to IMats to help the type system.
+ */
+ def + (b : IMat):FMat = this + FMat(b)
+ def - (b : IMat):FMat = this - FMat(b)
+ def * (b : IMat):FMat = this * FMat(b)
+ def / (b : IMat):FMat = this / FMat(b)
+ def \\ (b : IMat):FMat = this \\ FMat(b)
+ def *@ (b : IMat):FMat = this *@ FMat(b)
+ def /@ (b : IMat):FMat = this /@ FMat(b)
+ def \ (b : IMat):FMat = this \ FMat(b)
+ def on (b : IMat):FMat = this on FMat(b)
+
+ def > (b : IMat):FMat = this > FMat(b)
+ def < (b : IMat):FMat = this < FMat(b)
+ def >= (b : IMat):FMat = this >= FMat(b)
+ def <= (b : IMat):FMat = this <= FMat(b)
+ def == (b : IMat):FMat = this == FMat(b)
+ def === (b : IMat):FMat = this === FMat(b)
+ def != (b : IMat):FMat = this != FMat(b)
+
+ /*
+ * Specialize to DMats to help the type system.
+ */
+ def + (b : DMat):DMat = DMat(this) + b
+ def - (b : DMat):DMat = DMat(this) - b
+ def * (b : DMat):DMat = DMat(this) * b
+ def / (b : DMat):DMat = DMat(this) / b
+ def \\ (b : DMat):DMat = DMat(this) \\ b
+ def *@ (b : DMat):DMat = DMat(this) *@ b
+ def /@ (b : DMat):DMat = DMat(this) /@ b
+ def \ (b : DMat):DMat = DMat(this) \ b
+ def on (b : DMat):DMat = DMat(this) on b
+
+ def > (b : DMat):DMat = DMat(this) > b
+ def < (b : DMat):DMat = DMat(this) < b
+ def >= (b : DMat):DMat = DMat(this) >= b
+ def <= (b : DMat):DMat = DMat(this) <= b
+ def == (b : DMat):DMat = DMat(this) == b
+ def === (b : DMat):DMat = DMat(this) === b
+ def != (b : DMat):DMat = DMat(this) != b
+
+ /*
+ * Specialize to CMats to help the type system.
+ */
+ def + (b : CMat):CMat = CMat(this) + b
+ def - (b : CMat):CMat = CMat(this) - b
+ def * (b : CMat):CMat = CMat(this) * b
+ def / (b : CMat):CMat = CMat(this) / b
+ def \\ (b : CMat):CMat = CMat(this) \\ b
+ def *@ (b : CMat):CMat = CMat(this) *@ b
+ def /@ (b : CMat):CMat = CMat(this) /@ b
+ def \ (b : CMat):CMat = CMat(this) \ b
+ def on (b : CMat):CMat = CMat(this) on b
+
+ /*
+ * Operators whose second arg is generic.
+ */
+ import Operator._
+ override def + (b : Mat):Mat = applyMat(this, b, null, Mop_Plus)
+ override def - (b : Mat):Mat = applyMat(this, b, null, Mop_Minus)
+ override def * (b : Mat):Mat = applyMat(this, b, null, Mop_Times)
+ override def xT (b : Mat) = b match {
+ case bb:SMat => multT(bb, null)
+ case bb:FMat => multT(bb, null)
+ }
+ override def / (b : Mat):Mat = applyMat(this, b, null, Mop_Div)
+ override def \\ (b : Mat):Mat = applyMat(this, b, null, Mop_RSolve)
+ override def *@ (b : Mat):Mat = applyMat(this, b, null, Mop_ETimes)
+ override def /@ (b : Mat):Mat = applyMat(this, b, null, Mop_EDiv)
+ override def \ (b : Mat):Mat = applyMat(this, b, null, Mop_HCat)
+ override def on (b : Mat):Mat = applyMat(this, b, null, Mop_VCat)
+
+ override def > (b : Mat):Mat = applyMat(this, b, null, Mop_GT)
+ override def < (b : Mat):Mat = applyMat(this, b, null, Mop_LT)
+ override def >= (b : Mat):Mat = applyMat(this, b, null, Mop_GE)
+ override def <= (b : Mat):Mat = applyMat(this, b, null, Mop_LE)
+ override def == (b : Mat):Mat = applyMat(this, b, null, Mop_EQ)
+ override def === (b : Mat):Mat = applyMat(this, b, null, Mop_EQ)
+ override def != (b : Mat):Mat = applyMat(this, b, null, Mop_NE)
+
+}
+
+class FPair(val omat:Mat, val mat:FMat) extends Pair {
+
+ override def t:FMat = FMat(mat.gt(omat))
+
+ def xG (b :FMat) = mat.GPUmult(b, omat)
+ def * (b : FMat) = mat.fDMult(b, omat)
+ def * (b : SMat) = mat.fSMult(b, omat)
+ def xT (b : SMat) = mat.multT(b, omat)
+ def xT (b : FMat) = mat.multT(b, omat)
+ def + (b : FMat) = mat.ffMatOpv(b, FMat.vecAdd _, omat)
+ def - (b : FMat) = mat.ffMatOpv(b, FMat.vecSub _, omat)
+ def *@ (b : FMat) = mat.ffMatOpv(b, FMat.vecMul _, omat)
+ def /@ (b : FMat) = mat.ffMatOpv(b, FMat.fVecDiv _, omat)
+ def ^ (b : FMat) = mat.ffMatOp(b, (x:Float, y:Float) => math.pow(x,y).toFloat, omat)
+
+ def > (b : FMat) = mat.ffMatOp(b, (x:Float, y:Float) => if (x > y) 1.0f else 0.0f, omat)
+ def < (b : FMat) = mat.ffMatOp(b, (x:Float, y:Float) => if (x < y) 1.0f else 0.0f, omat)
+ def == (b : FMat) = mat.ffMatOp(b, (x:Float, y:Float) => if (x == y) 1.0f else 0.0f, omat)
+ def === (b : FMat) = mat.ffMatOp(b, (x:Float, y:Float) => if (x == y) 1.0f else 0.0f, omat)
+ def >= (b : FMat) = mat.ffMatOp(b, (x:Float, y:Float) => if (x >= y) 1.0f else 0.0f, omat)
+ def <= (b : FMat) = mat.ffMatOp(b, (x:Float, y:Float) => if (x <= y) 1.0f else 0.0f, omat)
+ def != (b : FMat) = mat.ffMatOp(b, (x:Float, y:Float) => if (x != y) 1.0f else 0.0f, omat)
+
+ override def * (b : Float) = mat.fDMult(FMat.felem(b), omat)
+ override def * (b : Double) = mat.fDMult(FMat.felem(b.asInstanceOf[Float]), omat)
+ override def + (b : Float) = mat.ffMatOpScalarv(b, FMat.vecAdd _, omat)
+ override def - (b : Float) = mat.ffMatOpScalarv(b, FMat.vecSub _, omat)
+ override def *@ (b : Float) = mat.ffMatOpScalarv(b, FMat.vecMul _, omat)
+ override def /@ (b : Float) = mat.ffMatOpScalarv(b, FMat.fVecDiv _, omat)
+ override def ^ (b : Float) = mat.ffMatOpScalar(b, (x:Float, y:Float) => math.pow(x,y).toFloat, omat)
+
+ override def > (b : Float) = mat.ffMatOpScalar(b, (x:Float, y:Float) => if (x > y) 1.0f else 0.0f, omat)
+ override def < (b : Float) = mat.ffMatOpScalar(b, (x:Float, y:Float) => if (x < y) 1.0f else 0.0f, omat)
+ override def == (b : Float) = mat.ffMatOpScalar(b, (x:Float, y:Float) => if (x == y) 1.0f else 0.0f, omat)
+ override def >= (b : Float) = mat.ffMatOpScalar(b, (x:Float, y:Float) => if (x >= y) 1.0f else 0.0f, omat)
+ override def <= (b : Float) = mat.ffMatOpScalar(b, (x:Float, y:Float) => if (x <= y) 1.0f else 0.0f, omat)
+ override def != (b : Float) = mat.ffMatOpScalar(b, (x:Float, y:Float) => if (x != y) 1.0f else 0.0f, omat)
+
+ override def * (b : Int) = mat.fDMult(FMat.felem(b), omat)
+ override def + (b : Int) = mat.ffMatOpScalarv(b, FMat.vecAdd _, omat)
+ override def - (b : Int) = mat.ffMatOpScalarv(b, FMat.vecSub _, omat)
+ override def *@ (b : Int) = mat.ffMatOpScalarv(b, FMat.vecMul _, omat)
+ override def /@ (b : Int) = mat.ffMatOpScalarv(b, FMat.fVecDiv _, omat)
+ override def ^ (b : Int) = mat.ffMatOpScalar(b, (x:Float, y:Float) => math.pow(x,y).toFloat, omat)
+
+ override def > (b : Int) = mat.ffMatOpScalar(b, (x:Float, y:Float) => if (x > y) 1.0f else 0.0f, omat)
+ override def < (b : Int) = mat.ffMatOpScalar(b, (x:Float, y:Float) => if (x < y) 1.0f else 0.0f, omat)
+ override def == (b : Int) = mat.ffMatOpScalar(b, (x:Float, y:Float) => if (x == y) 1.0f else 0.0f, omat)
+ override def >= (b : Int) = mat.ffMatOpScalar(b, (x:Float, y:Float) => if (x >= y) 1.0f else 0.0f, omat)
+ override def <= (b : Int) = mat.ffMatOpScalar(b, (x:Float, y:Float) => if (x <= y) 1.0f else 0.0f, omat)
+ override def != (b : Int) = mat.ffMatOpScalar(b, (x:Float, y:Float) => if (x != y) 1.0f else 0.0f, omat)
+
+ import Operator._
+ override def + (b : Mat):Mat = applyMat(mat, b, omat, Mop_Plus)
+ override def - (b : Mat):Mat = applyMat(mat, b, omat, Mop_Minus)
+ override def * (b : Mat):Mat = applyMat(mat, b, omat, Mop_Times)
+ override def xT (b : Mat) = b match {
+ case bb:SMat => mat.multT(bb, omat)
+ case bb:FMat => mat.multT(bb, omat)
+ }
+ override def / (b : Mat):Mat = applyMat(mat, b, omat, Mop_Div)
+ override def \\ (b : Mat):Mat = applyMat(mat, b, omat, Mop_RSolve)
+ override def *@ (b : Mat):Mat = applyMat(mat, b, omat, Mop_ETimes)
+ override def /@ (b : Mat):Mat = applyMat(mat, b, omat, Mop_EDiv)
+ override def \ (b : Mat):Mat = applyMat(mat, b, omat, Mop_HCat)
+ override def on (b : Mat):Mat = applyMat(mat, b, omat, Mop_VCat)
+
+ override def > (b : Mat):Mat = applyMat(mat, b, omat, Mop_GT)
+ override def < (b : Mat):Mat = applyMat(mat, b, omat, Mop_LT)
+ override def >= (b : Mat):Mat = applyMat(mat, b, omat, Mop_GE)
+ override def <= (b : Mat):Mat = applyMat(mat, b, omat, Mop_LE)
+ override def == (b : Mat):Mat = applyMat(mat, b, omat, Mop_EQ)
+ override def === (b : Mat):Mat = applyMat(mat, b, omat, Mop_EQ)
+ override def != (b : Mat):Mat = applyMat(mat, b, omat, Mop_NE)
+}
+
+object FMat {
+
+ def fVecDiv(a:Array[Float], a0:Int, ainc:Int, b:Array[Float], b0:Int, binc:Int, c:Array[Float], c0:Int, cinc:Int, n:Int):Float = {
+ var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n
+ while (ci < cend) {
+ c(ci) = a(ai) / b(bi); ai += ainc; bi += binc; ci += cinc
+ }
+ 0f
+ }
+
+ def apply(nr:Int, nc:Int) = new FMat(nr, nc, new Array[Float](nr*nc))
+
+ def apply(a:DenseMat[Float]):FMat = new FMat(a.nrows, a.ncols, a.data)
+
+ def apply(x:Mat):FMat = {
+ var out:FMat = null
+ x match {
+ case dd:DMat => {out = FMat(x.nrows, x.ncols); Mat.copyToFloatArray(dd.data, 0, out.data, 0, dd.length)}
+ case ff:FMat => {out = FMat(x.nrows, x.ncols); System.arraycopy(ff.data, 0, out.data, 0, ff.length)}
+ case ii:IMat => {out = FMat(x.nrows, x.ncols); Mat.copyToFloatArray(ii.data, 0, out.data, 0, ii.length)}
+ case ss:SMat => out = FMat(ss.full)
+ case gg:GMat => out = gg.toFMat
+ case _ => throw new RuntimeException("Unsupported source type")
+ }
+ out
+ }
+
+ def vecAdd(a:Array[Float], a0:Int, ainc:Int, b:Array[Float], b0:Int, binc:Int, c:Array[Float], c0:Int, cinc:Int, n:Int):Float = {
+ var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n
+ while (ci < cend) {
+ c(ci) = a(ai) + b(bi); ai += ainc; bi += binc; ci += cinc
+ }
+ 0
+ }
+
+ def vecSub(a:Array[Float], a0:Int, ainc:Int, b:Array[Float], b0:Int, binc:Int, c:Array[Float], c0:Int, cinc:Int, n:Int):Float = {
+ var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n
+ while (ci < cend) {
+ c(ci) = a(ai) - b(bi); ai += ainc; bi += binc; ci += cinc
+ }
+ 0
+ }
+
+ def vecMul(a:Array[Float], a0:Int, ainc:Int, b:Array[Float], b0:Int, binc:Int, c:Array[Float], c0:Int, cinc:Int, n:Int):Float = {
+ var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n
+ while (ci < cend) {
+ c(ci) = a(ai) * b(bi); ai += ainc; bi += binc; ci += cinc
+ }
+ 0
+ }
+
+ def vecMax(a:Array[Float], a0:Int, ainc:Int, b:Array[Float], b0:Int, binc:Int, c:Array[Float], c0:Int, cinc:Int, n:Int):Float = {
+ var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n
+ while (ci < cend) {
+ c(ci) = math.max(a(ai), b(bi)); ai += ainc; bi += binc; ci += cinc
+ }
+ 0
+ }
+
+ def vecMin(a:Array[Float], a0:Int, ainc:Int, b:Array[Float], b0:Int, binc:Int, c:Array[Float], c0:Int, cinc:Int, n:Int):Float = {
+ var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n
+ while (ci < cend) {
+ c(ci) = math.min(a(ai), b(bi)); ai += ainc; bi += binc; ci += cinc
+ }
+ 0
+ }
+
+ def felem(x:Float) = {
+ val out = FMat(1,1)
+ out.data(0) = x
+ out
+ }
+
+ def newOrCheckFMat(nr:Int, nc:Int, outmat:Mat):FMat = {
+ if (outmat.asInstanceOf[AnyRef] == null || (outmat.nrows == 0 && outmat.ncols == 0)) {
+ FMat(nr, nc)
+ } else {
+ if (outmat.nrows != nr || outmat.ncols != nc) {
+ outmat.recycle(nr, nc, 0).asInstanceOf[FMat]
+ } else {
+ outmat.asInstanceOf[FMat]
+ }
+ }
+ }
+}
+
+
+
+
+
+
diff --git a/src/main/scala/BIDMat/GIMat.scala b/src/main/scala/BIDMat/GIMat.scala
new file mode 100755
index 00000000..cdf1eb18
--- /dev/null
+++ b/src/main/scala/BIDMat/GIMat.scala
@@ -0,0 +1,120 @@
+package BIDMat
+import jcuda._;
+import jcuda.jcublas.JCublas;
+import jcuda.runtime.JCuda;
+import edu.berkeley.bid.CUMAT;
+
+class GIMat(nr:Int, nc:Int, val data:Pointer, val realsize:Int) extends Mat(nr, nc) {
+
+ override def toString:String = {
+ val nr = scala.math.min(nrows,10)
+ val nc = scala.math.min(ncols,50)
+ val tmpMat = IMat(nr, nc)
+ JCublas.cublasGetMatrix(nr, nc, Sizeof.INT, data, nrows, Pointer.to(tmpMat.data), nr)
+ tmpMat.toString
+ }
+
+ override def mytype = "GIMat"
+
+ def GIop(a:GIMat, oldmat:GIMat, op:Int):GIMat = {
+ if ((nrows == a.nrows && ncols == a.ncols) ||
+ (nrows == a.nrows && (a.ncols == 1 || ncols == 1)) ||
+ (ncols == a.ncols && (a.nrows == 1 || nrows == 1)) ||
+ (a.ncols == 1 && a.nrows == 1) ||
+ (ncols == 1 && nrows == 1)) {
+ val out = GIMat.newOrCheckGIMat(nrows, a.ncols, oldmat)
+ Mat.nflops += scala.math.max(length, a.length)
+ CUMAT.applyiop(data, nrows, ncols, a.data, a.nrows, a.ncols, out.data, op)
+ JCuda.cudaDeviceSynchronize()
+ out
+ } else throw new RuntimeException("dimensions mismatch")
+ }
+
+ def toIMat():IMat = {
+ val out = IMat(nrows, ncols)
+ JCublas.cublasGetVector(nrows*ncols, Sizeof.INT, data, 1, Pointer.to(out.data), 1);
+ out
+ }
+
+ def free() = {
+ JCublas.cublasFree(data)
+ }
+
+ def + (a : GIMat) = GIop(a, null, 0)
+ def - (a : GIMat) = GIop(a, null, 1)
+ def *@ (a : GIMat) = GIop(a, null, 2)
+ def /@ (a : GIMat) = GIop(a, null, 3)
+ def > (b : GIMat) = GIop(b, null, 4)
+ def < (b : GIMat) = GIop(b, null, 5)
+ def == (b : GIMat) = GIop(b, null, 6)
+ def === (b : GIMat) = GIop(b, null, 6)
+ def >= (b : GIMat) = GIop(b, null, 7)
+ def <= (b : GIMat) = GIop(b, null, 8)
+ def != (b : GIMat) = GIop(b, null, 9)
+
+ def ~ (b: GIMat) = new GIPair(this, b)
+
+ override def recycle(nr:Int, nc:Int, nnz:Int):GIMat = {
+ if (nrows == nr && nc == ncols) {
+ this
+ } else if (realsize >= nr*nc) {
+ new GIMat(nr, nc, data, realsize)
+ } else {
+ free
+ GIMat(nr, nc)
+ }
+ }
+}
+
+class GIPair (val omat:GIMat, val mat:GIMat){
+
+ def + (a : GIMat) = mat.GIop(a, omat, 0)
+ def - (a : GIMat) = mat.GIop(a, omat, 1)
+ def *@ (a : GIMat) = mat.GIop(a, omat, 2)
+ def /@ (a : GIMat) = mat.GIop(a, omat, 3)
+ def > (b : GIMat) = mat.GIop(b, omat, 4)
+ def < (b : GIMat) = mat.GIop(b, omat, 5)
+ def == (b : GIMat) = mat.GIop(b, omat, 6)
+ def === (b : GIMat) = mat.GIop(b, omat, 6)
+ def >= (b : GIMat) = mat.GIop(b, omat, 7)
+ def <= (b : GIMat) = mat.GIop(b, omat, 8)
+ def != (b : GIMat) = mat.GIop(b, omat, 9)
+}
+
+
+object GIMat {
+
+ def apply(nr:Int, nc:Int):GIMat = {
+ val retv = new GIMat(nr, nc, new Pointer(), nr*nc)
+ JCublas.cublasAlloc(nr*nc, Sizeof.INT, retv.data)
+ retv
+ }
+
+ def apply(a:IMat):GIMat = {
+ val retv = new GIMat(a.nrows, a.ncols, new Pointer(), a.length)
+ val rsize = a.nrows*a.ncols
+ JCublas.cublasAlloc(rsize, Sizeof.INT, retv.data)
+ JCublas.cublasSetVector(rsize, Sizeof.INT, Pointer.to(a.data), 1, retv.data, 1);
+ retv
+ }
+
+ def newOrCheckGIMat(nr:Int, nc:Int, oldmat:GIMat):GIMat = {
+ if (oldmat.asInstanceOf[AnyRef] == null) {
+ GIMat(nr, nc)
+ } else {
+ if (oldmat.nrows != nr || oldmat.ncols != nc) {
+ oldmat.recycle(nr, nc, 0)
+ } else {
+ oldmat
+ }
+ }
+ }
+}
+
+
+
+
+
+
+
+
diff --git a/src/main/scala/BIDMat/GMat.scala b/src/main/scala/BIDMat/GMat.scala
new file mode 100755
index 00000000..0daab9a8
--- /dev/null
+++ b/src/main/scala/BIDMat/GMat.scala
@@ -0,0 +1,545 @@
+package BIDMat
+import jcuda._
+import jcuda.jcublas._
+import jcuda.jcublas.JCublas._
+import jcuda.runtime.JCuda._
+import jcuda.runtime._
+import scala.actors.Actor._
+import edu.berkeley.bid.CUMAT
+
+
+class GMat(nr:Int, nc:Int, val data:Pointer, val realsize:Int) extends Mat(nr, nc) {
+
+ override def dv:Double =
+ if (nrows > 1 || ncols > 1) {
+ throw new RuntimeException("Matrix should be 1x1 to extract value")
+ } else {
+ toFMat.data(0)
+ }
+
+ override def mytype = "GMat"
+
+ override def nnz = length
+
+ override def clear = {
+ cudaMemset(data, 0, Sizeof.FLOAT*length)
+ cudaDeviceSynchronize
+ this
+ }
+
+ override def t = {
+ val out = GMat(ncols, nrows)
+ CUMAT.transpose(this.data, nrows, out.data, ncols, nrows, ncols)
+ cudaDeviceSynchronize()
+ out
+ }
+
+ override def set(v:Float):GMat = {
+ val a = MatFunctions.row(v)
+ JCublas.cublasSetVector(length, Sizeof.FLOAT, Pointer.to(a.data), 0, data, 1);
+ cudaDeviceSynchronize()
+ this
+ }
+
+
+ override def toString:String = {
+ val nr = scala.math.min(nrows,10)
+ val nc = scala.math.min(ncols,50)
+ val tmpMat = FMat(nr, nc)
+ cublasGetMatrix(nr, nc, Sizeof.FLOAT, data, nrows, Pointer.to(tmpMat.data), nr)
+ cudaDeviceSynchronize()
+ tmpMat.toString
+ }
+
+ override def zeros(nr:Int, nc:Int) = GMat.gzeros(nr, nc)
+
+ override def ones(nt:Int, nc:Int) = GMat.gones(nr, nc)
+
+ def GMult(a:GMat, oldmat:Mat):GMat = {
+ if (ncols == a.nrows) {
+ val out = GMat.newOrCheckGMat(nrows, a.ncols, oldmat)
+ Mat.nflops += 2L * length * a.ncols
+ cublasSgemm('n', 'n', nrows, a.ncols, ncols, 1.0f, data, nrows, a.data, a.nrows, 0f, out.data, nrows)
+ cudaDeviceSynchronize()
+ if (cublasGetError != 0) {
+ println("device is %d" format SciFunctions.device)
+ throw new RuntimeException("Cublas error in * "+cublasGetError)
+ }
+ out
+ } else if (ncols == 1 && nrows == 1) {
+ val out = GMat.newOrCheckGMat(a.nrows, a.ncols, oldmat)
+ Mat.nflops += 1L * a.length
+ out.clear
+ cublasSaxpy(a.length, this.dv.asInstanceOf[Float], a.data, 1, out.data, 1)
+ cudaDeviceSynchronize()
+ out
+ } else if (a.ncols == 1 && a.nrows == 1) {
+ val out = GMat.newOrCheckGMat(nrows, ncols, oldmat)
+ Mat.nflops += 1L * length
+ out.clear
+ cublasSaxpy(length, a.dv.asInstanceOf[Float], data, 1, out.data, 1)
+ cudaDeviceSynchronize()
+ out
+ } else throw new RuntimeException("dimensions mismatch")
+ }
+
+ def GMultT(a:GMat, oldmat:Mat):GMat = {
+ if (ncols == a.ncols) {
+ val out = GMat.newOrCheckGMat(nrows, a.nrows, oldmat)
+ Mat.nflops += 2L * length * a.nrows
+ cublasSgemm('n', 't', nrows, a.nrows, ncols, 1.0f, data, nrows, a.data, a.nrows, 0f, out.data, nrows)
+ cudaDeviceSynchronize()
+ val ee = cublasGetError
+ if (ee != 0) {
+ println("device is %d" format SciFunctions.device)
+ throw new RuntimeException("Cublas error in xT "+ee)
+ }
+ out
+ } else throw new RuntimeException("dimensions mismatch")
+ }
+
+ def GSMult(a:GSMat, oldmat:Mat):GMat = {
+ if (ncols == a.nrows) {
+ val out = GMat.newOrCheckGMat(nrows, a.ncols, oldmat)
+ Mat.nflops += 2L * nrows * a.nnz
+ out.clear
+ CUMAT.dsmult(nrows, ncols, a.nnz, data, a.data, a.ir, a.ic, out.data)
+ cudaDeviceSynchronize()
+ out
+ } else throw new RuntimeException("dimensions mismatch")
+ }
+
+ def GSMultT(a:GSMat, oldmat:Mat):GMat = {
+ if (ncols == a.ncols) {
+ val out = GMat.newOrCheckGMat(nrows, a.nrows, oldmat)
+ Mat.nflops += 2L * nrows * a.nnz
+ out.clear
+ CUMAT.dsmultT(nrows, ncols, a.nnz, data, a.data, a.ir, a.ic, out.data)
+ cudaDeviceSynchronize()
+ out
+ } else throw new RuntimeException("dimensions mismatch")
+ }
+
+ def gOp(a:GMat, oldmat:Mat, op:Int):GMat = {
+ if ((nrows == a.nrows && ncols == a.ncols) ||
+ (nrows == a.nrows && (a.ncols == 1 || ncols == 1)) ||
+ (ncols == a.ncols && (a.nrows == 1 || nrows == 1)) ||
+ (a.ncols == 1 && a.nrows == 1) ||
+ (ncols == 1 && nrows == 1)) {
+ val out = GMat.newOrCheckGMat(math.max(nrows, a.nrows), math.max(ncols, a.ncols), oldmat)
+ Mat.nflops += scala.math.max(length, a.length)
+ CUMAT.applyop(data, nrows, ncols, a.data, a.nrows, a.ncols, out.data, op)
+ cudaDeviceSynchronize()
+ out
+ } else throw new RuntimeException("dimensions mismatch")
+ }
+
+ def dot (a : GMat):Double =
+ if (nrows != a.nrows || ncols != a.ncols) {
+ throw new RuntimeException("dot dims not compatible")
+ } else {
+ cublasSdot(length, data, 1, a.data, 1)
+ }
+
+ override def dot (a : Mat):Double =
+ if (nrows != a.nrows || ncols != a.ncols) {
+ throw new RuntimeException("dot dims not compatible")
+ } else {
+ a match {
+ case aa:GMat => cublasSdot(length, data, 1, aa.data, 1)
+ }
+ }
+
+ def reduceOp(oldmat:Mat, dir:Int, op:Int):GMat = {
+ if (dir == 1 || (dir == 0 && nrows > 1)) {
+ val out = GMat.newOrCheckGMat(1, ncols, oldmat)
+ out.clear
+ CUMAT.reduce1op(nrows, ncols, data, out.data, op)
+ Mat.nflops += length
+ cudaDeviceSynchronize()
+ out
+ } else if (dir == 2 || dir == 0) {
+ val out = GMat.newOrCheckGMat(nrows, 1, oldmat)
+ out.clear
+ CUMAT.reduce2op(nrows, ncols, data, out.data, op)
+ Mat.nflops += length
+ cudaDeviceSynchronize()
+ out
+ } else {
+ throw new RuntimeException("dimension must be 1 or 2")
+ }
+ }
+
+ def toFMat():FMat = {
+ val out = FMat(nrows, ncols)
+ cublasGetVector(nrows*ncols, Sizeof.FLOAT, data, 1, Pointer.to(out.data), 1)
+ cudaDeviceSynchronize()
+ out
+ }
+
+ def copyTo(out:FMat):FMat = {
+ val a = out.recycle(nrows, ncols, 0)
+ cublasGetVector(nrows*ncols, Sizeof.FLOAT, data, 1, Pointer.to(a.data), 1)
+ cudaDeviceSynchronize()
+ a
+ }
+
+ def copyFrom(in:FMat):GMat = {
+ cublasSetVector(nrows*ncols, Sizeof.FLOAT, Pointer.to(in.data), 1, data, 1)
+ cudaDeviceSynchronize()
+ this
+ }
+
+ def copyTo(out:GMat):GMat = {
+ val a = out.recycle(nrows, ncols, 0)
+ cudaMemcpy(a.data, data, length*Sizeof.FLOAT, cudaMemcpyKind.cudaMemcpyDeviceToDevice)
+ cudaDeviceSynchronize()
+ a
+ }
+
+ override def copyTo(out:Mat):Mat = {
+ out match {
+ case a:FMat => copyTo(a)
+ case a:GMat => copyTo(a)
+ }
+ }
+
+ def free() = {
+ JCublas.cublasFree(data)
+ }
+
+ import GMat.BinOp._
+ def * (a : GMat) = GMult(a, null)
+ def * (a : GSMat) = GSMult(a, null)
+ def xT (a : GMat) = GMultT(a, null)
+ def xT (a : GSMat) = GSMultT(a, null)
+ def + (a : GMat) = gOp(a, null, op_add)
+ def - (a : GMat) = gOp(a, null, op_sub)
+ def *@ (a : GMat) = gOp(a, null, op_mul)
+ def /@ (a : GMat) = gOp(a, null, op_div)
+
+ def > (b : GMat) = gOp(b, null, op_gt)
+ def < (b : GMat) = gOp(b, null, op_lt)
+ def == (b : GMat) = gOp(b, null, op_eq)
+ def === (b : GMat) = gOp(b, null, op_eq)
+ def >= (b : GMat) = gOp(b, null, op_ge)
+ def <= (b : GMat) = gOp(b, null, op_le)
+ def != (b : GMat) = gOp(b, null, op_ne)
+
+ override def + (b : Float):Mat = gOp(GMat(b), null, op_add)
+ override def - (b : Float):Mat = gOp(GMat(b), null, op_sub)
+ override def *@ (b : Float):Mat = gOp(GMat(b), null, op_mul)
+ override def /@ (b : Float):Mat = gOp(GMat(b), null, op_div)
+
+ override def > (b : Float) = gOp(GMat(b), null, op_gt)
+ override def < (b : Float) = gOp(GMat(b), null, op_lt)
+ override def == (b : Float) = gOp(GMat(b), null, op_eq)
+ override def === (b : Float) = gOp(GMat(b), null, op_eq)
+ override def >= (b : Float) = gOp(GMat(b), null, op_ge)
+ override def <= (b : Float) = gOp(GMat(b), null, op_le)
+ override def != (b : Float) = gOp(GMat(b), null, op_ne)
+
+ def ~ (b: GMat) = new GPair(this, b)
+ def ~ (b: GSMat) = new GSPair(this, b)
+ override def ~ (b: Mat):Pair = b match {
+ case bb:GMat => new GPair(this, bb)
+ case bb:GSMat => new GSPair(this, bb)
+ }
+
+ import Operator._
+ override def + (b : Mat):Mat = applyMat(this, b, null, Mop_Plus)
+ override def - (b : Mat):Mat = applyMat(this, b, null, Mop_Minus)
+ override def * (b : Mat):Mat = applyMat(this, b, null, Mop_Times)
+ override def * (b : Float):Mat = applyMat(this, GMat(FMat.felem(b)), null, Mop_Times)
+ override def * (b : Int):Mat = applyMat(this, GMat(FMat.felem(b)), null, Mop_Times)
+ override def * (b : Double):Mat = applyMat(this, GMat(FMat.felem(b.asInstanceOf[Float])), null, Mop_Times)
+ override def xT (b : Mat) = b match {
+ case bb:GSMat => GSMultT(bb, null)
+ case bb:GMat => GMultT(bb, null)
+ }
+ override def / (b : Mat):Mat = applyMat(this, b, null, Mop_Div)
+ override def \\ (b : Mat):Mat = applyMat(this, b, null, Mop_RSolve)
+ override def *@ (b : Mat):Mat = applyMat(this, b, null, Mop_ETimes)
+ override def /@ (b : Mat):Mat = applyMat(this, b, null, Mop_EDiv)
+ override def \ (b : Mat):Mat = applyMat(this, b, null, Mop_HCat)
+ override def on (b : Mat):Mat = applyMat(this, b, null, Mop_VCat)
+
+ override def > (b : Mat):Mat = applyMat(this, b, null, Mop_GT)
+ override def < (b : Mat):Mat = applyMat(this, b, null, Mop_LT)
+ override def >= (b : Mat):Mat = applyMat(this, b, null, Mop_GE)
+ override def <= (b : Mat):Mat = applyMat(this, b, null, Mop_LE)
+ override def == (b : Mat):Mat = applyMat(this, b, null, Mop_EQ)
+ override def === (b : Mat):Mat = applyMat(this, b, null, Mop_EQ)
+ override def != (b : Mat):Mat = applyMat(this, b, null, Mop_NE)
+
+ override def recycle(nr:Int, nc:Int, nnz:Int):GMat = {
+ if (nrows == nr && nc == ncols) {
+ this
+ } else if (realsize >= nr*nc) {
+ new GMat(nr, nc, data, realsize)
+ } else {
+ free
+ GMat(nr, nc)
+ }
+ }
+}
+
+class GPair(val omat:Mat, val mat:GMat) extends Pair{
+ import GMat.BinOp._
+
+ override def t = {
+ val out = GMat.newOrCheckGMat(mat.ncols, mat.nrows, omat)
+ CUMAT.transpose(mat.data, mat.nrows, out.data, mat.ncols, mat.nrows, mat.ncols)
+ out
+ }
+
+ def + (a : GMat) = mat.gOp(a, omat, op_add)
+ def - (a : GMat) = mat.gOp(a, omat, op_sub)
+ def *@ (a : GMat) = mat.gOp(a, omat, op_mul)
+ def /@ (a : GMat) = mat.gOp(a, omat, op_div)
+ def > (b : GMat) = mat.gOp(b, omat, op_gt)
+ def < (b : GMat) = mat.gOp(b, omat, op_lt)
+ def == (b : GMat) = mat.gOp(b, omat, op_eq)
+ def === (b : GMat) = mat.gOp(b, omat, op_eq)
+ def >= (b : GMat) = mat.gOp(b, omat, op_ge)
+ def <= (b : GMat) = mat.gOp(b, omat, op_le)
+ def != (b : GMat) = mat.gOp(b, omat, op_ne)
+
+ override def + (b : Float):Mat = mat.gOp(GMat(b), omat, op_add)
+ override def - (b : Float):Mat = mat.gOp(GMat(b), omat, op_sub)
+ override def *@ (b : Float):Mat = mat.gOp(GMat(b), omat, op_mul)
+ override def * (b : Float):Mat = mat.gOp(GMat(b), omat, op_mul)
+ override def /@ (b : Float):Mat = mat.gOp(GMat(b), omat, op_div)
+
+ override def > (b : Float) = mat.gOp(GMat(b), omat, op_gt)
+ override def < (b : Float) = mat.gOp(GMat(b), omat, op_lt)
+ override def == (b : Float) = mat.gOp(GMat(b), omat, op_eq)
+ override def === (b : Float) = mat.gOp(GMat(b), omat, op_eq)
+ override def >= (b : Float) = mat.gOp(GMat(b), omat, op_ge)
+ override def <= (b : Float) = mat.gOp(GMat(b), omat, op_le)
+ override def != (b : Float) = mat.gOp(GMat(b), omat, op_ne)
+
+ def * (a : GMat) = mat.GMult(a, omat)
+ def * (a : GSMat) = mat.GSMult(a, omat)
+
+ override def * (b: Mat):Mat = b match {
+ case bb:GMat => mat.GMult(bb, omat)
+ case bb:GSMat => mat.GSMult(bb, omat)
+ }
+
+ def xT (a : GSMat) = mat.GSMultT(a, omat)
+ def xT (a : GMat) = mat.GMultT(a, omat)
+ override def xT (b: Mat):Mat = b match {
+ case bb:GSMat => mat.GSMultT(bb, omat)
+ case bb:GMat => mat.GMultT(bb, omat)
+ }
+
+ import Operator._
+ override def + (b : Mat):Mat = applyMat(mat, b, omat, Mop_Plus)
+ override def - (b : Mat):Mat = applyMat(mat, b, omat, Mop_Minus)
+ override def / (b : Mat):Mat = applyMat(mat, b, omat, Mop_Div)
+ override def \\ (b : Mat):Mat = applyMat(mat, b, omat, Mop_RSolve)
+ override def *@ (b : Mat):Mat = applyMat(mat, b, omat, Mop_ETimes)
+ override def /@ (b : Mat):Mat = applyMat(mat, b, omat, Mop_EDiv)
+ override def \ (b : Mat):Mat = applyMat(mat, b, omat, Mop_HCat)
+ override def on (b : Mat):Mat = applyMat(mat, b, omat, Mop_VCat)
+
+ override def > (b : Mat):Mat = applyMat(mat, b, omat, Mop_GT)
+ override def < (b : Mat):Mat = applyMat(mat, b, omat, Mop_LT)
+ override def >= (b : Mat):Mat = applyMat(mat, b, omat, Mop_GE)
+ override def <= (b : Mat):Mat = applyMat(mat, b, omat, Mop_LE)
+ override def == (b : Mat):Mat = applyMat(mat, b, omat, Mop_EQ)
+ override def === (b : Mat):Mat = applyMat(mat, b, omat, Mop_EQ)
+ override def != (b : Mat):Mat = applyMat(mat, b, omat, Mop_NE)
+
+}
+
+
+object GMat {
+
+ object BinOp {
+ val op_add=0
+ val op_sub=1
+ val op_mul=2
+ val op_div=3
+ val op_gt=4
+ val op_lt=5
+ val op_eq=6
+ val op_ge=7
+ val op_le=8
+ val op_ne=9
+ val op_max=10
+ val op_min=11
+ }
+
+ object TransF {
+ val abs=0
+ val exp=1
+ val expm1=2
+ val sqrt=3
+ val ln=4
+ val log10=5
+ val log1p=6
+ val cos=7
+ val sin=8
+ val tan=9
+ val cosh=10
+ val sinh=11
+ val tanh=12
+ val acos=13
+ val asin=14
+ val atan=15
+ val acosh=16
+ val asinh=17
+ val atanh=18
+ val erf=19
+ val erfinv=20
+ val erfc=21
+ val erfcinv=22
+ val gammaln=23
+ val gamma=24
+ val ceil=25
+ val floor=26
+ val round=27
+ val trunc=28
+ val sign=29
+ val exppsi=34
+ }
+
+ object TransF2 {
+ val atan2=0
+ val pow=1
+ }
+
+ def gzeros(nr:Int, nc:Int) = {
+ val out = GMat(nr, nc)
+ cudaMemset(out.data, 0, Sizeof.FLOAT*out.length)
+ cudaDeviceSynchronize()
+ out
+ }
+
+ def gones(nr:Int, nc:Int) = {
+ val out = GMat(nr, nc)
+ val one = GMat(FMat.felem(1))
+ cublasScopy(out.length, one.data, 0, out.data, 1)
+ cudaDeviceSynchronize()
+ out
+ }
+
+ def apply(nr:Int, nc:Int):GMat = {
+// println("nr, nc = %d,%d" format (nr,nc))
+ val retv = new GMat(nr, nc, new Pointer(), nr*nc)
+ val status = cublasAlloc(nr*nc, Sizeof.FLOAT, retv.data)
+ if (status != cublasStatus.CUBLAS_STATUS_SUCCESS) throw new RuntimeException("CUDA alloc failed "+status)
+ retv
+ }
+
+ def toFMat(a:GMat):FMat = a.toFMat()
+
+ def apply(a:FMat):GMat = {
+ val rsize = a.nrows*a.ncols
+ val retv = GMat(a.nrows, a.ncols)
+ JCublas.cublasSetVector(rsize, Sizeof.FLOAT, Pointer.to(a.data), 1, retv.data, 1);
+ cudaDeviceSynchronize()
+ retv
+ }
+
+ def apply(a:Mat):GMat = a match {
+ case aa:GMat => aa
+ case aa:FMat => GMat(aa)
+ case aa:DMat => GMat(FMat(aa))
+ }
+
+ def apply(a:Float):GMat = {
+ GMat(FMat.felem(a))
+ }
+
+ def fromFMat(a:FMat, b:GMat):GMat = {
+ val bb = b.recycle(a.nrows, a.ncols, 0)
+ JCublas.cublasSetVector(a.length, Sizeof.FLOAT, Pointer.to(a.data), 1, bb.data, 1)
+ cudaDeviceSynchronize()
+ bb
+ }
+
+ def DDS(A:GMat, B:GMat, C:GSMat, oldmat:Mat):GSMat = {
+ if (A.nrows != B.nrows || C.nrows != A.ncols || C.ncols != B.ncols) {
+ throw new RuntimeException("dimensions mismatch")
+ }
+ val out = GSMat.newOrCheckGSMat(C, oldmat)
+ cudaMemcpy(out.ir, C.ir, Sizeof.INT * C.nnz, cudaMemcpyKind.cudaMemcpyDeviceToDevice)
+ cudaMemcpy(out.ic, C.ic, Sizeof.INT * C.nnz, cudaMemcpyKind.cudaMemcpyDeviceToDevice)
+ CUMAT.dds(A.nrows, C.nnz, A.data, B.data, C.ir, C.ic, out.data)
+ cudaDeviceSynchronize()
+ Mat.nflops += 2L * C.nnz * A.nrows
+ out
+ }
+
+ def GPUmult(a:FMat, b:FMat, omat:Mat):FMat =
+ if (a.ncols != b.nrows) {
+ throw new RuntimeException("dimensions mismatch in xG")
+ } else {
+ val out = FMat.newOrCheckFMat(a.nrows, b.ncols, omat)
+ val nthreads = Mat.hasCUDA
+ val done = IMat(nthreads,1)
+ val nncols = b.ncols/nthreads
+ for (i <- 0 until nthreads) {
+ actor {
+ if (SciFunctions.device(i) == 0) {
+ val aa = new Pointer
+ var status = cublasAlloc(a.nrows*a.ncols, Sizeof.FLOAT, aa)
+ if (status != cublasStatus.CUBLAS_STATUS_SUCCESS) throw new RuntimeException("CUDA alloc failed "+status)
+ val bb = new Pointer
+ status = cublasAlloc(b.nrows*nncols, Sizeof.FLOAT, bb)
+ if (status != cublasStatus.CUBLAS_STATUS_SUCCESS) throw new RuntimeException("CUDA alloc failed "+status)
+ val cc = new Pointer
+ status = cublasAlloc(a.nrows*nncols, Sizeof.FLOAT, cc)
+ if (status != cublasStatus.CUBLAS_STATUS_SUCCESS) throw new RuntimeException("CUDA alloc failed "+status)
+ status = cublasSetVector(a.nrows*a.ncols, Sizeof.FLOAT, Pointer.to(a.data), 1, aa, 1)
+ cudaDeviceSynchronize
+ if (status != cublasStatus.CUBLAS_STATUS_SUCCESS) throw new RuntimeException("CUDA copy a failed "+status)
+ status = cublasSetVector(b.nrows*nncols, Sizeof.FLOAT, Pointer.to(b.data).withByteOffset(Sizeof.FLOAT*i*b.nrows*nncols), 1, bb, 1)
+ cudaDeviceSynchronize
+ if (status != cublasStatus.CUBLAS_STATUS_SUCCESS) throw new RuntimeException("CUDA copy b failed "+status)
+ cublasSgemm('n', 'n', a.nrows, nncols, a.ncols, 1.0f, aa, a.nrows, bb, b.nrows, 0f, cc, a.nrows)
+ cudaDeviceSynchronize
+ val err = cublasGetError
+ if (err != 0) throw new RuntimeException("Cublas error in xG, sgemm "+err)
+ status = cublasGetVector(a.nrows*nncols, Sizeof.FLOAT, cc, 1, Pointer.to(out.data).withByteOffset(Sizeof.FLOAT*i*a.nrows*nncols), 1)
+ cudaDeviceSynchronize
+ if (status != cublasStatus.CUBLAS_STATUS_SUCCESS) throw new RuntimeException("CUDA copy c failed "+status)
+ cublasFree(cc)
+ cublasFree(bb)
+ cublasFree(aa)
+ } else {
+ done(i) = 1
+ throw new RuntimeException("Couldnt set device "+i)
+ }
+ done(i) = 1
+ }
+ }
+ while (SciFunctions.sum(done,1).dv < nthreads) {Thread.`yield`};
+ Mat.nflops += 2L * a.nrows * a.ncols * b.ncols
+ out
+ }
+
+ def newOrCheckGMat(nr:Int, nc:Int, outmat:Mat):GMat = {
+ if (outmat.asInstanceOf[AnyRef] == null || (outmat.nrows == 0 && outmat.ncols == 0)) {
+ GMat(nr, nc)
+ } else {
+ outmat match {
+ case omat:GMat => if (omat.nrows != nr || omat.ncols != nc) {
+ omat.recycle(nr, nc, 0)
+ } else {
+ omat
+ }
+ }
+ }
+ }
+}
+
+
+
+
+
+
+
diff --git a/src/main/scala/BIDMat/GSMat.scala b/src/main/scala/BIDMat/GSMat.scala
new file mode 100755
index 00000000..dffa3cb4
--- /dev/null
+++ b/src/main/scala/BIDMat/GSMat.scala
@@ -0,0 +1,120 @@
+package BIDMat
+import jcuda._
+import jcuda.jcublas.JCublas
+import jcuda.runtime.JCuda
+import jcuda.runtime._
+import edu.berkeley.bid.CUMAT
+
+case class GSMat(nr:Int, nc:Int, val nnz0:Int, val ir:Pointer, val ic:Pointer, val data:Pointer, val realnnz:Int) extends Mat(nr, nc) {
+
+ def getdata() = data;
+
+ override def mytype = "GSMat"
+
+ override def nnz = nnz0
+
+ override def contents:GMat = new GMat(nnz, 1, data, realnnz)
+
+ override def toString:String = {
+ val nnz0 = scala.math.min(nnz,12)
+ val tmpMat = SMat(nnz0, nnz0, nnz0)
+ val tmpcols = new Array[Int](nnz0)
+ JCublas.cublasGetVector(nnz0, Sizeof.INT, ir, 1, Pointer.to(tmpMat.ir), 1)
+ JCublas.cublasGetVector(nnz0, Sizeof.FLOAT, data, 1, Pointer.to(tmpMat.data), 1)
+ JCublas.cublasGetVector(nnz0, Sizeof.INT, ic, 1, Pointer.to(tmpcols), 1)
+ SparseMat.compressInds(tmpcols, math.min(ncols, tmpcols(nnz0-1)+1), tmpMat.jc, nnz0)
+ if (Mat.ioneBased == 1) {
+ SparseMat.incInds(tmpMat.ir, tmpMat.ir)
+ }
+ tmpMat.toString
+ }
+
+ def toSMat():SMat = {
+ val out = SMat(nrows, ncols, nnz)
+ val tmpcols = new Array[Int](nnz)
+ JCublas.cublasGetVector(nnz, Sizeof.INT, ir, 1, Pointer.to(out.ir), 1)
+ JCublas.cublasGetVector(nnz, Sizeof.FLOAT, data, 1, Pointer.to(out.data), 1)
+ JCublas.cublasGetVector(nnz, Sizeof.INT, ic, 1, Pointer.to(tmpcols), 1)
+ SparseMat.compressInds(tmpcols, ncols, out.jc, nnz)
+ if (Mat.ioneBased == 1) {
+ SparseMat.incInds(out.ir, out.ir)
+ }
+ out
+ }
+
+ def free() = {
+ JCublas.cublasFree(data)
+ JCublas.cublasFree(ic)
+ JCublas.cublasFree(ir)
+ }
+
+ override def recycle(nr:Int, nc:Int, nnz:Int):GSMat = {
+ if (realnnz >= nnz) {
+ new GSMat(nr, nc, nnz, ir, ic, data, realnnz)
+ } else {
+ free
+ GSMat(nr, nc, nnz)
+ }
+ }
+}
+
+class GSPair (val omat:GMat, val mat:GSMat) extends Pair {
+
+}
+
+object GSMat {
+
+ def apply(nr:Int, nc:Int, nnz0:Int):GSMat = {
+// println("nr, nc, nnz = %d,%d,%d" format (nr,nc,nnz0))
+ val out = new GSMat(nr, nc, nnz0, new Pointer(), new Pointer(), new Pointer(), nnz0)
+ JCublas.cublasAlloc(out.nnz, Sizeof.INT, out.ir)
+ JCublas.cublasAlloc(out.nnz, Sizeof.INT, out.ic)
+ JCublas.cublasAlloc(out.nnz, Sizeof.FLOAT, out.data)
+ out
+ }
+
+ def apply(a:SMat):GSMat = {
+ val out = GSMat(a.nrows, a.ncols, a.nnz)
+ JCublas.cublasSetVector(a.nnz, Sizeof.FLOAT, Pointer.to(a.data), 1, out.data, 1)
+ if (Mat.ioneBased == 1) {
+ JCublas.cublasSetVector(a.nnz, Sizeof.INT, Pointer.to(SparseMat.decInds(a.ir)), 1, out.ir, 1)
+ } else {
+ JCublas.cublasSetVector(a.nnz, Sizeof.INT, Pointer.to(a.ir), 1, out.ir, 1)
+ }
+ JCublas.cublasSetVector(a.nnz, Sizeof.INT, Pointer.to(SparseMat.uncompressInds(a.jc, a.ir)), 1, out.ic, 1)
+ out
+ }
+
+ def fromSMat(a:SMat, b:GSMat):GSMat = {
+ val out = b.recycle(a.nrows, a.ncols, a.nnz)
+ JCublas.cublasSetVector(a.nnz, Sizeof.FLOAT, Pointer.to(a.data), 1, out.data, 1)
+ if (Mat.ioneBased == 1) {
+ JCublas.cublasSetVector(a.nnz, Sizeof.INT, Pointer.to(SparseMat.decInds(a.ir)), 1, out.ir, 1)
+ } else {
+ JCublas.cublasSetVector(a.nnz, Sizeof.INT, Pointer.to(a.ir), 1, out.ir, 1)
+ }
+ JCublas.cublasSetVector(a.nnz, Sizeof.INT, Pointer.to(SparseMat.uncompressInds(a.jc, a.ir)), 1, out.ic, 1)
+ out
+ }
+
+ def newOrCheckGSMat(mat:GSMat, oldmat:Mat):GSMat = {
+ if (oldmat.asInstanceOf[AnyRef] == null || (oldmat.nrows ==0 && oldmat.ncols == 0)) {
+ GSMat(mat.nrows, mat.ncols, mat.nnz)
+ } else {
+ oldmat match {
+ case omat:GSMat => if (oldmat.nrows == mat.nrows && oldmat.ncols == mat.ncols && oldmat.nnz == mat.nnz) {
+ omat
+ } else {
+ omat.recycle(mat.nrows, mat.ncols, mat.nnz)
+ }
+ }
+ }
+ }
+}
+
+
+
+
+
+
+
diff --git a/src/main/scala/BIDMat/HMat.scala b/src/main/scala/BIDMat/HMat.scala
new file mode 100755
index 00000000..1b604107
--- /dev/null
+++ b/src/main/scala/BIDMat/HMat.scala
@@ -0,0 +1,344 @@
+package BIDMat
+
+import java.io._
+import java.util.zip._
+import scala.util.matching.Regex
+import Regex._
+import scala.collection.mutable._
+import scala.actors._
+import scala.actors.Actor._
+import MatFunctions._
+import MatHDF5._
+import edu.berkeley.bid.UTILS._
+
+case class HMat(nr:Int, nc:Int, fileList:List[String], varname:String, blkinds:Array[Int], catdim:Int) extends Mat(nr, nc) {
+
+ var fnameCache:String = null
+
+ var fmatCache:Mat = null
+
+ override def mytype = "HMat"
+
+// Implement slicing from a hard disk matrix
+ override def apply(a:IMat, b:IMat):Mat = {
+ var ilast:Int = 0
+ def findindx(ind:Int):Int = {
+ while (ilast >= 0 && ind < blkinds(ilast)) ilast -= 1
+ while (ilast < blkinds.length && ind >= blkinds(ilast)) ilast += 1
+ if (ilast >= 0 && blkinds(ilast) <= ind && blkinds(ilast+1) > ind) {
+ ilast
+ } else {
+ -1
+ }
+ }
+
+ val locs = IMat(1,b.length)
+ var i = 0
+ var iblk = 0
+ var out:Mat = null
+ while (i <= b.length) {
+ if (i < b.length) locs(i) = findindx(b(i))
+ if (i == b.length || locs(i) != locs(iblk)) {
+ if (fnameCache == null || fileList(locs(iblk)) != fnameCache) {
+ fmatCache = MatHDF5.hload(fileList(locs(iblk)), varname).asInstanceOf[Mat]
+ fnameCache = fileList(locs(iblk))
+ }
+ val newmat = fmatCache(a, b(MatFunctions.irow(iblk->i)))
+ if (out.asInstanceOf[AnyRef] != null) {
+ out = out \ newmat
+ } else {
+ out = newmat
+ }
+ iblk = i + 1
+ }
+ i += 1
+ }
+ out
+ }
+}
+
+object HMat {
+
+ def readSomeInts(din:InputStream, a:Array[Int], buf:Array[Byte], n:Int) {
+ var nread = 0
+ while (nread < 4*n) {
+ val readnow = din.read(buf, 0, math.min(buf.length, 4*n-nread))
+ memcpybi(readnow, buf, 0, a, nread)
+ nread += readnow
+ }
+ }
+
+ def readSomeFloats(din:InputStream, a:Array[Float], buf:Array[Byte], n:Int) {
+ var nread = 0
+ while (nread < 4*n) {
+ val readnow = din.read(buf, 0, math.min(buf.length, 4*n-nread))
+ memcpybf(readnow, buf, 0, a, nread)
+ nread += readnow
+ }
+ }
+
+ def readSomeDoubles(din:InputStream, a:Array[Double], buf:Array[Byte], n:Int) {
+ var nread = 0
+ while (nread < 8*n) {
+ val readnow = din.read(buf, 0, math.min(buf.length, 8*n-nread))
+ memcpybd(readnow, buf, 0, a, nread)
+ nread += readnow
+ }
+ }
+
+ def writeSomeInts(dout:OutputStream, a:Array[Int], buf:Array[Byte], n:Int) {
+ var nwritten = 0
+ while (nwritten < 4*n) {
+ val todo = math.min(4*n-nwritten, buf.length)
+ memcpyib(todo, a, nwritten, buf, 0)
+ dout.write(buf, 0, todo)
+ nwritten += todo
+ }
+ }
+
+ def writeSomeFloats(dout:OutputStream, a:Array[Float], buf:Array[Byte], n:Int) {
+ var nwritten = 0
+ while (nwritten < 4*n) {
+ val todo = math.min(4*n-nwritten, buf.length)
+ memcpyfb(todo, a, nwritten, buf, 0)
+ dout.write(buf, 0, todo)
+ nwritten += todo
+ }
+ }
+
+ def writeSomeDoubles(dout:OutputStream, a:Array[Double], buf:Array[Byte], n:Int) {
+ var nwritten = 0
+ while (nwritten < 8*n) {
+ val todo = math.min(8*n-nwritten, buf.length)
+ memcpydb(todo, a, nwritten, buf, 0)
+ dout.write(buf, 0, todo)
+ nwritten += todo
+ }
+ }
+
+ def getInputStream(fname:String, compressed:Boolean):InputStream = {
+ val fin = new FileInputStream(fname)
+ if (compressed) {
+ new GZIPInputStream(fin, 1024*1024)
+ } else {
+ new BufferedInputStream(fin, 1024*1024)
+ }
+ }
+
+ def getOutputStream(fname:String, compressed:Boolean):OutputStream = {
+ import edu.berkeley.bid.UTILS._
+ _getOutputStream(fname, compressed, Mat.compressionLevel)
+ }
+
+ def loadFMat(fname:String, compressed:Boolean=true):FMat = {
+ val gin = getInputStream(fname, compressed)
+ val buff = new Array[Byte](1024*1024)
+ val hints = new Array[Int](4)
+ readSomeInts(gin, hints, buff, 4)
+ val ftype = hints(0)
+ val nrows = hints(1)
+ val ncols = hints(2)
+ val out = FMat(nrows, ncols)
+ readSomeFloats(gin, out.data, buff, ncols*nrows)
+ gin.close
+ out
+ }
+
+ def loadIMat(fname:String, compressed:Boolean=true):IMat = {
+ val gin = getInputStream(fname, compressed)
+ val buff = new Array[Byte](1024*1024)
+ val hints = new Array[Int](4)
+ readSomeInts(gin, hints, buff, 4)
+ val ftype = hints(0)
+ val nrows = hints(1)
+ val ncols = hints(2)
+ val out = IMat(nrows, ncols)
+ readSomeInts(gin, out.data, buff, ncols*nrows)
+ gin.close
+ out
+ }
+
+ def loadDMat(fname:String, compressed:Boolean=true):DMat = {
+ val gin = getInputStream(fname, compressed)
+ val buff = new Array[Byte](1024*1024)
+ val hints = new Array[Int](4)
+ readSomeInts(gin, hints, buff, 4)
+ val ftype = hints(0)
+ val nrows = hints(1)
+ val ncols = hints(2)
+ val out = DMat(nrows, ncols)
+ readSomeDoubles(gin, out.data, buff, ncols*nrows)
+ gin.close
+ out
+ }
+
+ def saveFMat(fname:String, m:FMat, compressed:Boolean=true):Unit = {
+ val gout = getOutputStream(fname, compressed)
+ val hints = new Array[Int](4)
+ val tbuf = new Array[Byte](16)
+ hints(0) = 130 // 1=dense, 3=float
+ hints(1) = m.nrows
+ hints(2) = m.ncols
+ hints(3) = 0
+ writeSomeInts(gout, hints, tbuf, 4)
+ val buff = new Array[Byte](math.min(1024*1024, 4*m.ncols*m.nrows))
+ writeSomeFloats(gout, m.data, buff, m.nrows*m.ncols)
+ gout.close
+ }
+
+ def saveIMat(fname:String, m:IMat, compressed:Boolean=true):Unit = {
+ val gout = getOutputStream(fname, compressed)
+ val hints = new Array[Int](4)
+ val tbuf = new Array[Byte](16)
+ hints(0) = 110 // 1=dense, 1=int
+ hints(1) = m.nrows
+ hints(2) = m.ncols
+ hints(3) = 0
+ writeSomeInts(gout, hints, tbuf, 4)
+ val buff = new Array[Byte](math.min(1024*1024, 4*m.ncols*m.nrows))
+ writeSomeInts(gout, m.data, buff, m.nrows*m.ncols)
+ gout.close
+ }
+
+ def saveDMat(fname:String, m:DMat, compressed:Boolean=true):Unit = {
+ val gout = getOutputStream(fname, compressed)
+ val hints = new Array[Int](4)
+ val tbuf = new Array[Byte](16)
+ hints(0) = 140 // 1=dense, 4=double
+ hints(1) = m.nrows
+ hints(2) = m.ncols
+ hints(3) = 0
+ writeSomeInts(gout, hints, tbuf, 4)
+ val buff = new Array[Byte](math.min(1024*1024, 4*m.ncols*m.nrows))
+ writeSomeDoubles(gout, m.data, buff, m.nrows*m.ncols)
+ gout.close
+ }
+
+ def loadSMat(fname:String, compressed:Boolean=true):SMat = {
+ val gin = getInputStream(fname, compressed)
+ val buff = new Array[Byte](1024*1024)
+ val hints = new Array[Int](4)
+ readSomeInts(gin, hints, buff, 4)
+ val ftype = hints(0)
+ val nrows = hints(1)
+ val ncols = hints(2)
+ val nnz = hints(3)
+ val out = SMat(nrows, ncols, nnz)
+ readSomeInts(gin, out.jc, buff, ncols+1)
+ readSomeInts(gin, out.ir, buff, nnz)
+ readSomeFloats(gin, out.data, buff, nnz)
+ MatHDF5.addOne(out.jc)
+ MatHDF5.addOne(out.ir)
+ gin.close
+ out
+ }
+
+ def saveSMat(fname:String, m:SMat, compressed:Boolean=true):Unit = {
+ val gout = getOutputStream(fname, compressed)
+ val hints = new Array[Int](4)
+ val tbuf = new Array[Byte](16)
+ hints(0) = 231 // 2=sparse, 3=float, 1=int
+ hints(1) = m.nrows
+ hints(2) = m.ncols
+ hints(3) = m.nnz
+ writeSomeInts(gout, hints, tbuf, 4)
+ val buff = new Array[Byte](math.min(1024*1024, 4*math.max(m.ncols+1, m.nnz)))
+ try {
+ MatHDF5.subOne(m.jc)
+ MatHDF5.subOne(m.ir)
+ writeSomeInts(gout, m.jc, buff, m.ncols+1)
+ writeSomeInts(gout, m.ir, buff, m.nnz)
+ writeSomeFloats(gout, m.data, buff, m.nnz)
+ } catch {
+ case e:Exception => {
+ MatHDF5.addOne(m.jc)
+ MatHDF5.addOne(m.ir)
+ throw new RuntimeException("Exception in saveSMat "+e)
+ }
+ case _ => {
+ MatHDF5.addOne(m.jc)
+ MatHDF5.addOne(m.ir)
+ throw new RuntimeException("Problem in saveSMat")
+ }
+ }
+ MatHDF5.addOne(m.jc)
+ MatHDF5.addOne(m.ir)
+ gout.close
+ }
+
+ def testLoad(fname:String, varname:String, n:Int) = {
+ val a = new Array[SMat](n)
+ var ndone = izeros(n,1)
+ for (i <- 0 until n) {
+ actor {
+ a(i) = loadSMat(("/disk%02d/" format i)+fname)
+ ndone(i) = 1
+ }
+ }
+ while (SciFunctions.sum(ndone).v < n) {Thread.sleep(10)}
+ a
+ }
+
+ def apply(dirname:String, filepat:String, varname:String, catd:Int) {
+ var files:ListBuffer[String] = new ListBuffer[String]
+ val dir:File = new File(dirname)
+ val slen = dir.getName.length + 1
+
+ def searchDir(dir:File) {
+ for (f <- dir.listFiles) {
+ if (f.isDirectory) {
+ searchDir(f)
+ } else {
+ if (f.getName.substring(slen).matches(filepat)) {
+ files.append(f.getName)
+ }
+ }
+ }
+ }
+
+ searchDir(dir)
+ val blkinds = new Array[Int](files.length+1)
+ var i = 0
+ var nrows = -1
+ var ncols = -1
+ files.foreach((fn:String) => {
+ val (nr, nc) = MatHDF5.readMatDims(fn, varname)
+ if (catd == 2) {
+ if (nrows >= 0) {
+ if (nr != nrows) {
+ throw new RuntimeException("incorrect number of rows in file "+fn)
+ }
+ } else {
+ nrows = nr.asInstanceOf[Int]
+ }
+ blkinds(i+1) = blkinds(i) + nc.asInstanceOf[Int]
+ i += 1
+ } else if (catd == 1) {
+ if (ncols >= 0) {
+ if (nc != 1) {
+ throw new RuntimeException("incorrect number of cols in file "+fn)
+ }
+ } else {
+ ncols = 1
+ }
+ blkinds(i+1) = blkinds(i) + nr.asInstanceOf[Int]
+ i += 1
+ }
+ })
+ if (catd == 2) {
+ HMat(nrows, blkinds(files.length), files.toList, varname, blkinds, 2)
+ } else {
+ if (catd == 1) {
+ HMat(blkinds(files.length), ncols, files.toList, varname, blkinds, 1)
+ } else {
+ throw new RuntimeException("cat dimension must be 1 or 2")
+ }
+ }
+ }
+}
+
+
+
+
+
+
diff --git a/src/main/scala/BIDMat/IMat.scala b/src/main/scala/BIDMat/IMat.scala
new file mode 100755
index 00000000..206d3eaa
--- /dev/null
+++ b/src/main/scala/BIDMat/IMat.scala
@@ -0,0 +1,433 @@
+package BIDMat
+
+import java.util.Arrays
+
+case class IMat(nr:Int, nc:Int, data0:Array[Int]) extends DenseMat[Int](nr, nc, data0) {
+
+ def size() = length;
+
+ override def t:IMat = IMat(gt(null))
+
+ override def dv:Double =
+ if (nrows > 1 || ncols > 1) {
+ throw new RuntimeException("Matrix should be 1x1 to extract value")
+ } else {
+ data(0)
+ }
+
+ override def mytype = "IMat"
+
+ override def set(v:Float):IMat = {
+ Arrays.fill(data,0,length,v.asInstanceOf[Int])
+ this
+ }
+
+ def horzcat(b: IMat) = IMat(ghorzcat(b))
+
+ def vertcat(b: IMat) = IMat(gvertcat(b))
+
+ def find3:(IMat, IMat, IMat) = { val (ii, jj, vv) = gfind3 ; (ii, jj, IMat(vv)) }
+
+ override def apply(a:IMat):IMat = IMat(gapply(a))
+
+ override def apply(a:IMat, b:IMat):IMat = IMat(gapply(a, b))
+
+ override def apply(a:IMat, b:Int):IMat = IMat(gapply(a, b))
+
+ override def apply(a:Int, b:IMat):IMat = IMat(gapply(a, b))
+
+ def update(iv:IMat, jv:IMat, b:IMat):IMat = IMat(_update(iv, jv, b))
+
+ def update(iv:IMat, j:Int, b:IMat):IMat = IMat(_update(iv, IMat.ielem(j), b))
+
+ def update(i:Int, jv:IMat, b:IMat):IMat = IMat(_update(IMat.ielem(i), jv, b))
+
+ def iiMatOp(b: Mat, f:(Int, Int) => Int, old:Mat):IMat =
+ b match {
+ case bb:IMat => IMat(ggMatOp(bb, f, old))
+ case _ => throw new RuntimeException("unsupported operation "+f+" on "+this+" and "+b)
+ }
+
+ def iiMatOpv(b: Mat, f:(Array[Int],Int,Int,Array[Int],Int,Int,Array[Int],Int,Int,Int) => Int, old:Mat):IMat =
+ b match {
+ case bb:IMat => IMat(ggMatOpv(bb, f, old))
+ case _ => throw new RuntimeException("unsupported operation "+f+" on "+this+" and "+b)
+ }
+
+ def iiMatOpScalar(b: Int, f:(Int, Int) => Int, old:Mat) = IMat(ggMatOpScalar(b, f, old))
+
+ def iiMatOpScalarv(b: Int, f:(Array[Int],Int,Int,Array[Int],Int,Int,Array[Int],Int,Int,Int) => Int, old:Mat) = IMat(ggMatOpScalarv(b, f, old))
+
+ def iiReduceOp(n:Int, f1:(Int) => Int, f2:(Int, Int) => Int, old:Mat) = IMat(ggReduceOp(n, f1, f2, old))
+
+ def iiReduceOpv(n:Int, f:(Array[Int],Int,Int,Array[Int],Int,Int,Array[Int],Int,Int,Int) => Int, old:Mat) = IMat(ggReduceOpv(n, f, old))
+
+ def iiReduceAll(n:Int, f1:(Int) => Int, f2:(Int, Int) => Int, old:Mat) = IMat(ggReduceAll(n, f1, f2, old))
+
+ def iiReduceAllv(n:Int, f:(Array[Int],Int,Int,Array[Int],Int,Int,Array[Int],Int,Int,Int) => Int, old:Mat) = IMat(ggReduceAllv(n, f, old))
+
+ override def printOne(i:Int):String = {
+ val v = data(i)
+ "%d" format v
+ }
+
+ override def copyTo(a:Mat) = {
+ a match {
+ case out:IMat => System.arraycopy(data, 0, out.data, 0, length)
+ }
+ a
+ }
+
+ override def copy = {
+ val out = IMat(nrows, ncols)
+ System.arraycopy(data, 0, out.data, 0, length)
+ out
+ }
+
+ override def zeros(nr:Int, nc:Int) = {
+ IMat(nr, nc)
+ }
+
+ override def ones(nr:Int, nc:Int) = {
+ val out = IMat(nr, nc)
+ var i = 0
+ while (i < out.length) {
+ out(i) = 1
+ i += 1
+ }
+ out
+ }
+
+ override def clearUpper(off:Int) = setUpper(0, off)
+ override def clearUpper = setUpper(0, 0)
+
+ override def clearLower(off:Int) = setLower(0, off)
+ override def clearLower = setLower(0, 0)
+
+
+ def iMult(a0:Mat, omat:Mat):IMat =
+ a0 match {
+ case a:IMat =>
+ if (ncols == a.nrows) {
+ val out = IMat.newOrCheckIMat(nrows, a.ncols, omat)
+ out.clear
+ Mat.nflops += 2L * length * a.ncols
+ for (i <- 0 until a.ncols)
+ for (j <- 0 until a.nrows) {
+ var k = 0
+ val dval = a.data(j + i*ncols)
+ while (k < nrows) {
+ out.data(k+i*nrows) += data(k+j*nrows)*dval
+ k += 1
+ }
+ }
+ out
+ } else if (ncols == 1 && nrows == 1) {
+ val out = IMat(a.nrows, a.ncols)
+ Mat.nflops += a.length
+ var i = 0
+ val dvar = data(0)
+ while (i < a.length) {
+ out.data(i) = dvar * a.data(i)
+ i += 1
+ }
+ out
+ } else if (a.ncols == 1 && a.nrows == 1) {
+ val out = IMat(nrows, ncols)
+ Mat.nflops += length
+ var i = 0
+ val dvar = a.data(0)
+ while (i < length) {
+ out.data(i) = dvar * data(i)
+ i += 1
+ }
+ out
+ } else throw new RuntimeException("dimensions mismatch")
+ case _ => throw new RuntimeException("unsupported arg to * "+a0)
+ }
+
+ def dot(a:IMat):Double = super.dot(a)
+
+ override def dot(a:Mat):Double = super.dot(a.asInstanceOf[IMat])
+
+ def * (b : IMat) = iMult(b, null)
+ def + (b : IMat) = iiMatOpv(b, IMat.vecAdd _, null)
+ def - (b : IMat) = iiMatOpv(b, IMat.vecSub _, null)
+ def *@ (b : IMat) = iiMatOpv(b, IMat.vecMul _, null)
+ def /@ (b : IMat) = iiMatOpv(b, IMat.iVecDiv _, null)
+
+ override def + (b : Int) = iiMatOpScalarv(b, IMat.vecAdd _, null)
+ override def - (b : Int) = iiMatOpScalarv(b, IMat.vecSub _, null)
+ override def *@ (b : Int) = iiMatOpScalarv(b, IMat.vecMul _, null)
+ override def /@ (b : Int) = iiMatOpScalarv(b, IMat.iVecDiv _, null)
+
+ def > (b : IMat) = iiMatOp(b, (x:Int, y:Int) => if (x > y) 1 else 0, null)
+ def < (b : IMat) = iiMatOp(b, (x:Int, y:Int) => if (x < y) 1 else 0, null)
+ def == (b : IMat) = iiMatOp(b, (x:Int, y:Int) => if (x == y) 1 else 0, null)
+ def === (b : IMat) = iiMatOp(b, (x:Int, y:Int) => if (x == y) 1 else 0, null)
+ def >= (b : IMat) = iiMatOp(b, (x:Int, y:Int) => if (x >= y) 1 else 0, null)
+ def <= (b : IMat) = iiMatOp(b, (x:Int, y:Int) => if (x <= y) 1 else 0, null)
+ def != (b : IMat) = iiMatOp(b, (x:Int, y:Int) => if (x != y) 1 else 0, null)
+
+ override def > (b : Int) = iiMatOpScalar(b, (x:Int, y:Int) => if (x > y) 1 else 0, null)
+ override def < (b : Int) = iiMatOpScalar(b, (x:Int, y:Int) => if (x < y) 1 else 0, null)
+ override def == (b : Int) = iiMatOpScalar(b, (x:Int, y:Int) => if (x == y) 1 else 0, null)
+ override def === (b : Int) = iiMatOpScalar(b, (x:Int, y:Int) => if (x == y) 1 else 0, null)
+ override def >= (b : Int) = iiMatOpScalar(b, (x:Int, y:Int) => if (x >= y) 1 else 0, null)
+ override def <= (b : Int) = iiMatOpScalar(b, (x:Int, y:Int) => if (x <= y) 1 else 0, null)
+ override def != (b : Int) = iiMatOpScalar(b, (x:Int, y:Int) => if (x != y) 1 else 0, null)
+
+ def \ (b: IMat) = horzcat(b)
+ def \ (b: Int) = horzcat(IMat.ielem(b))
+ def on (b: IMat) = vertcat(b)
+ def on (b: Int) = vertcat(IMat.ielem(b))
+
+ /*
+ * Specialize to FMats to help the type system.
+ */
+ def + (b : FMat):FMat = FMat(this) + b
+ def - (b : FMat):FMat = FMat(this) - b
+ def * (b : FMat):FMat = FMat(this) * b
+ def / (b : FMat):FMat = FMat(this) / b
+ def \\ (b : FMat):FMat = FMat(this) \\ b
+ def *@ (b : FMat):FMat = FMat(this) *@ b
+ def /@ (b : FMat):FMat = FMat(this) /@ b
+ def \ (b : FMat):FMat = FMat(this) \ b
+ def on (b : FMat):FMat = FMat(this) on b
+
+ def > (b : FMat):FMat = FMat(this) > b
+ def < (b : FMat):FMat = FMat(this) < b
+ def >= (b : FMat):FMat = FMat(this) >= b
+ def <= (b : FMat):FMat = FMat(this) <= b
+ def == (b : FMat):FMat = FMat(this) == b
+ def === (b : FMat):FMat = FMat(this) === b
+ def != (b : FMat):FMat = FMat(this) != b
+
+ /*
+ * Specialize to DMats to help the type system.
+ */
+ def + (b : DMat):DMat = DMat(this) + b
+ def - (b : DMat):DMat = DMat(this) - b
+ def * (b : DMat):DMat = DMat(this) * b
+ def / (b : DMat):DMat = DMat(this) / b
+ def \\ (b : DMat):DMat = DMat(this) \\ b
+ def *@ (b : DMat):DMat = DMat(this) *@ b
+ def /@ (b : DMat):DMat = DMat(this) /@ b
+ def \ (b : DMat):DMat = DMat(this) \ b
+ def on (b : DMat):DMat = DMat(this) on b
+
+ def > (b : DMat):DMat = DMat(this) > b
+ def < (b : DMat):DMat = DMat(this) < b
+ def >= (b : DMat):DMat = DMat(this) >= b
+ def <= (b : DMat):DMat = DMat(this) <= b
+ def == (b : DMat):DMat = DMat(this) == b
+ def === (b : DMat):DMat = DMat(this) === b
+ def != (b : DMat):DMat = DMat(this) != b
+ /*
+ * Specialize to CMats to help the type system.
+ */
+ def + (b : CMat):CMat = CMat(this) + b
+ def - (b : CMat):CMat = CMat(this) - b
+ def * (b : CMat):CMat = CMat(this) * b
+ def / (b : CMat):CMat = CMat(this) / b
+ def \\ (b : CMat):CMat = CMat(this) \\ b
+ def *@ (b : CMat):CMat = CMat(this) *@ b
+ def /@ (b : CMat):CMat = CMat(this) /@ b
+ def \ (b : CMat):CMat = CMat(this) \ b
+ def on (b : CMat):CMat = CMat(this) on b
+ /*
+ * Operators whose second arg is generic.
+ */
+ import Operator._
+ override def + (b : Mat):Mat = applyMat(this, b, null, Mop_Plus)
+ override def - (b : Mat):Mat = applyMat(this, b, null, Mop_Minus)
+ override def * (b : Mat):Mat = applyMat(this, b, null, Mop_Times)
+ override def / (b : Mat):Mat = applyMat(this, b, null, Mop_Div)
+ override def \\ (b : Mat):Mat = applyMat(this, b, null, Mop_RSolve)
+ override def *@ (b : Mat):Mat = applyMat(this, b, null, Mop_ETimes)
+ override def /@ (b : Mat):Mat = applyMat(this, b, null, Mop_EDiv)
+ override def \ (b : Mat):Mat = applyMat(this, b, null, Mop_HCat)
+ override def on (b : Mat):Mat = applyMat(this, b, null, Mop_VCat)
+
+ override def > (b : Mat):Mat = applyMat(this, b, null, Mop_GT)
+ override def < (b : Mat):Mat = applyMat(this, b, null, Mop_LT)
+ override def >= (b : Mat):Mat = applyMat(this, b, null, Mop_GE)
+ override def <= (b : Mat):Mat = applyMat(this, b, null, Mop_LE)
+ override def == (b : Mat):Mat = applyMat(this, b, null, Mop_EQ)
+ override def === (b : Mat):Mat = applyMat(this, b, null, Mop_EQ)
+ override def != (b : Mat):Mat = applyMat(this, b, null, Mop_NE)
+
+ def ~ (b : IMat):IPair = new IPair(this, b)
+
+ override def ~ (b: Mat):Pair =
+ b match {
+ case db:IMat => new IPair(this, db)
+ case _ => throw new RuntimeException("mismatched types for operator ~")
+ }
+
+ override def clear = {
+ Arrays.fill(this.data,0,length,0)
+ this
+ }
+
+ override def recycle(nr:Int, nc:Int, nnz:Int):IMat = {
+ if (nrows == nr && nc == ncols) {
+ this
+ } else if (data.size >= nr*nc) {
+ new IMat(nr, nc, data)
+ } else {
+ IMat(nr, nc)
+ }
+ }
+}
+
+class IPair(val omat:Mat, val mat:IMat) extends Pair {
+
+ override def t:IMat = IMat(mat.gt(omat))
+
+ def * (b : IMat) = mat.iMult(b, omat)
+ def * (b : SMat) = mat.iMult(b, omat)
+// def xT (b : SMat) = mat.multT(b, omat)
+ def + (b : IMat) = mat.iiMatOpv(b, IMat.vecAdd _, omat)
+ def - (b : IMat) = mat.iiMatOpv(b, IMat.vecSub _, omat)
+ def *@ (b : IMat) = mat.iiMatOpv(b, IMat.vecMul _, omat)
+// def /@ (b : IMat) = mat.iiMatOpv(b, IMat.fVecDiv _, omat)
+// def ^ (b : IMat) = mat.iiMatOp(b, (x:Float, y:Float) => math.pow(x,y).toFloat, omat)
+
+ def > (b : IMat) = mat.iiMatOp(b, (x:Int, y:Int) => if (x > y) 1 else 0, omat)
+ def < (b : IMat) = mat.iiMatOp(b, (x:Int, y:Int) => if (x < y) 1 else 0, omat)
+ def == (b : IMat) = mat.iiMatOp(b, (x:Int, y:Int) => if (x == y) 1 else 0, omat)
+ def === (b : IMat) = mat.iiMatOp(b, (x:Int, y:Int) => if (x == y) 1 else 0, omat)
+ def >= (b : IMat) = mat.iiMatOp(b, (x:Int, y:Int) => if (x >= y) 1 else 0, omat)
+ def <= (b : IMat) = mat.iiMatOp(b, (x:Int, y:Int) => if (x <= y) 1 else 0, omat)
+ def != (b : IMat) = mat.iiMatOp(b, (x:Int, y:Int) => if (x != y) 1 else 0, omat)
+
+
+ override def * (b : Int) = mat.iMult(IMat.ielem(b), omat)
+ override def + (b : Int) = mat.iiMatOpScalarv(b, IMat.vecAdd _, omat)
+ override def - (b : Int) = mat.iiMatOpScalarv(b, IMat.vecSub _, omat)
+ override def *@ (b : Int) = mat.iiMatOpScalarv(b, IMat.vecMul _, omat)
+// override def /@ (b : Int) = mat.iiMatOpScalarv(b, IMat.fVecDiv _, omat)
+// override def ^ (b : Int) = mat.iiMatOpScalar(b, (x:Float, y:Float) => math.pow(x,y).toFloat, omat)
+
+ override def > (b : Int) = mat.iiMatOpScalar(b, (x:Int, y:Int) => if (x > y) 1 else 0, omat)
+ override def < (b : Int) = mat.iiMatOpScalar(b, (x:Int, y:Int) => if (x < y) 1 else 0, omat)
+ override def == (b : Int) = mat.iiMatOpScalar(b, (x:Int, y:Int) => if (x == y) 1 else 0, omat)
+ override def >= (b : Int) = mat.iiMatOpScalar(b, (x:Int, y:Int) => if (x >= y) 1 else 0, omat)
+ override def <= (b : Int) = mat.iiMatOpScalar(b, (x:Int, y:Int) => if (x <= y) 1 else 0, omat)
+ override def != (b : Int) = mat.iiMatOpScalar(b, (x:Int, y:Int) => if (x != y) 1 else 0, omat)
+
+ import Operator._
+ override def + (b : Mat):Mat = applyMat(mat, b, omat, Mop_Plus)
+ override def - (b : Mat):Mat = applyMat(mat, b, omat, Mop_Minus)
+ override def * (b : Mat):Mat = applyMat(mat, b, omat, Mop_Times)
+ override def / (b : Mat):Mat = applyMat(mat, b, omat, Mop_Div)
+ override def \\ (b : Mat):Mat = applyMat(mat, b, omat, Mop_RSolve)
+ override def *@ (b : Mat):Mat = applyMat(mat, b, omat, Mop_ETimes)
+ override def /@ (b : Mat):Mat = applyMat(mat, b, omat, Mop_EDiv)
+ override def \ (b : Mat):Mat = applyMat(mat, b, omat, Mop_HCat)
+ override def on (b : Mat):Mat = applyMat(mat, b, omat, Mop_VCat)
+
+ override def > (b : Mat):Mat = applyMat(mat, b, omat, Mop_GT)
+ override def < (b : Mat):Mat = applyMat(mat, b, omat, Mop_LT)
+ override def >= (b : Mat):Mat = applyMat(mat, b, omat, Mop_GE)
+ override def <= (b : Mat):Mat = applyMat(mat, b, omat, Mop_LE)
+ override def == (b : Mat):Mat = applyMat(mat, b, omat, Mop_EQ)
+ override def === (b : Mat):Mat = applyMat(mat, b, omat, Mop_EQ)
+ override def != (b : Mat):Mat = applyMat(mat, b, omat, Mop_NE)
+}
+
+
+object IMat {
+
+ def iVecDiv(a:Array[Int], a0:Int, ainc:Int, b:Array[Int], b0:Int, binc:Int, c:Array[Int], c0:Int, cinc:Int, n:Int):Int = {
+ var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n
+ while (ci < cend) {
+ c(ci) = a(ai) / b(bi); ai += ainc; bi += binc; ci += cinc
+ }
+ 0
+ }
+
+ def apply(nr:Int, nc:Int) = new IMat(nr, nc, new Array[Int](nr*nc))
+
+ def apply(a:DenseMat[Int]):IMat = new IMat(a.nrows, a.ncols, a.data)
+
+ def apply(x:Mat):IMat = {
+ var out:IMat = null
+ x match {
+ case dd:DMat => {out = IMat(x.nrows, x.ncols) ; Mat.copyToIntArray(dd.data, 0, out.data, 0, dd.length)}
+ case ff:FMat => {out = IMat(x.nrows, x.ncols); Mat.copyToIntArray(ff.data, 0, out.data, 0, ff.length)}
+ case ii:IMat => {out = IMat(x.nrows, x.ncols); System.arraycopy(ii.data, 0, out.data, 0, ii.length)}
+ case gg:GIMat => out = gg.toIMat
+ case _ => throw new RuntimeException("Unsupported source type")
+ }
+ out
+ }
+
+ def vecAdd(a:Array[Int], a0:Int, ainc:Int, b:Array[Int], b0:Int, binc:Int, c:Array[Int], c0:Int, cinc:Int, n:Int):Int = {
+ var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n
+ while (ci < cend) {
+ c(ci) = a(ai) + b(bi); ai += ainc; bi += binc; ci += cinc
+ }
+ 0
+ }
+
+ def vecSub(a:Array[Int], a0:Int, ainc:Int, b:Array[Int], b0:Int, binc:Int, c:Array[Int], c0:Int, cinc:Int, n:Int):Int = {
+ var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n
+ while (ci < cend) {
+ c(ci) = a(ai) - b(bi); ai += ainc; bi += binc; ci += cinc
+ }
+ 0
+ }
+
+ def vecMul(a:Array[Int], a0:Int, ainc:Int, b:Array[Int], b0:Int, binc:Int, c:Array[Int], c0:Int, cinc:Int, n:Int):Int = {
+ var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n
+ while (ci < cend) {
+ c(ci) = a(ai) * b(bi); ai += ainc; bi += binc; ci += cinc
+ }
+ 0
+ }
+
+ def vecMax(a:Array[Int], a0:Int, ainc:Int, b:Array[Int], b0:Int, binc:Int, c:Array[Int], c0:Int, cinc:Int, n:Int):Int = {
+ var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n
+ while (ci < cend) {
+ c(ci) = math.max(a(ai), b(bi)); ai += ainc; bi += binc; ci += cinc
+ }
+ 0
+ }
+
+ def vecMin(a:Array[Int], a0:Int, ainc:Int, b:Array[Int], b0:Int, binc:Int, c:Array[Int], c0:Int, cinc:Int, n:Int):Int = {
+ var ai = a0; var bi = b0; var ci = c0; var cend = c0 + n
+ while (ci < cend) {
+ c(ci) = math.min(a(ai), b(bi)); ai += ainc; bi += binc; ci += cinc
+ }
+ 0
+ }
+
+
+ def ielem(x:Int) = {
+ val out = IMat(1,1)
+ out.data(0) = x
+ out
+ }
+
+ def newOrCheckIMat(nr:Int, nc:Int, omat:Mat):IMat = {
+ if (omat.asInstanceOf[AnyRef] == null || (omat.nrows == 0 && omat.ncols == 0)) {
+ IMat(nr, nc)
+ } else {
+ omat match {
+ case outmat:IMat => if (outmat.nrows != nr || outmat.ncols != nc) {
+ outmat.recycle(nr, nc, 0)
+ } else {
+ outmat
+ }
+ }
+ }
+ }
+}
+
+
+
+
+
+
diff --git a/src/main/scala/BIDMat/Mat.scala b/src/main/scala/BIDMat/Mat.scala
new file mode 100755
index 00000000..a4f29f3a
--- /dev/null
+++ b/src/main/scala/BIDMat/Mat.scala
@@ -0,0 +1,445 @@
+package BIDMat
+
+class Mat(nr:Int, nc:Int) {
+ val nrows = nr
+ val ncols = nc
+
+ def length = nr*nc
+
+ def notImplemented0(s:String):Mat = {
+ throw new RuntimeException("operator "+s+" not implemented for "+this.mytype)
+ }
+
+ def notImplemented1(s:String,that:Mat):Mat = {
+ throw new RuntimeException("operator "+s+" not implemented for "+this.mytype+" and "+that.mytype)
+ }
+
+ def t = notImplemented0("t")
+ def dv:Double = throw new RuntimeException("operator dv not implemented for "+this.mytype)
+
+ def mytype = "Mat"
+ def copyTo(a:Mat) = notImplemented0("copy");
+ def copy = notImplemented0("copy");
+ def set(v:Float) = notImplemented0("set")
+ def zeros(nr:Int, nc:Int) = notImplemented0("zeros");
+ def ones(nr:Int, nc:Int) = notImplemented0("ones");
+ def clearUpper(i:Int) = notImplemented0("clearUpper");
+ def clearLower(i:Int) = notImplemented0("clearLower");
+ def clearUpper = notImplemented0("clearUpper");
+ def clearLower = notImplemented0("clearLower");
+
+ def nnz:Int = {notImplemented0("nnz"); 0}
+ def clear = notImplemented0("clear");
+ def zeros(nr:Int, nc:Int, nnz:Int):Mat = zeros(nr, nc)
+ def recycle(nr:Int, nc:Int, nnz:Int):Mat = notImplemented0("recycle");
+ def contents:Mat = notImplemented0("recycle");
+
+ def apply(a:IMat):Mat = notImplemented0("linear array access");
+ def apply(a:IMat, b:IMat):Mat = notImplemented0("block array access");
+ def apply(a:IMat, b:Int):Mat = notImplemented0("block array access");
+ def apply(a:Int, b:IMat):Mat = notImplemented0("block array access");
+
+ def update(a:IMat, b:Mat) = notImplemented0("linear update");
+ def update(a:IMat, b:IMat, m:Mat) = notImplemented0("block update");
+ def update(a:IMat, b:Int, m:Mat) = notImplemented0("block update");
+ def update(a:Int, b:IMat, m:Mat) = notImplemented0("block update");
+
+ def + (b : Mat):Mat = notImplemented1("+", b)
+ def - (b : Mat):Mat = notImplemented1("-", b)
+ def * (b : Mat):Mat = notImplemented1("*", b)
+ def xT (b : Mat):Mat = notImplemented1("*", b)
+ def Tx (b : Mat):Mat = notImplemented1("*", b)
+ def / (b : Mat):Mat = notImplemented1("/", b)
+ def *@ (b : Mat):Mat = notImplemented1("*@", b)
+ def /@ (b : Mat):Mat = notImplemented1("/@", b)
+ def \\ (b : Mat):Mat = notImplemented1("\\\\", b)
+ def ^ (b : Mat):Mat = notImplemented1("^", b)
+
+ def > (b : Mat):Mat = notImplemented1(">", b)
+ def < (b : Mat):Mat = notImplemented1("<", b)
+ def >= (b : Mat):Mat = notImplemented1(">=", b)
+ def <= (b : Mat):Mat = notImplemented1("<=", b)
+ def == (b : Mat):Mat = notImplemented1("==", b)
+ def === (b : Mat):Mat = notImplemented1("===", b)
+ def != (b : Mat):Mat = notImplemented1("!=", b)
+
+ def <-- (b : Mat):Mat = b.copyTo(this)
+
+ def + (b : Int):Mat = notImplemented0("+")
+ def - (b : Int):Mat = notImplemented0("-")
+ def * (b : Int):Mat = notImplemented0("*")
+ def / (b : Int):Mat = notImplemented0("/")
+ def *@ (b : Int):Mat = notImplemented0("*@")
+ def /@ (b : Int):Mat = notImplemented0("/@")
+ def \\ (b : Int):Mat = notImplemented0("\\\\")
+ def ^ (b : Int):Mat = notImplemented0("^")
+
+ def > (b : Int):Mat = notImplemented0(">")
+ def < (b : Int):Mat = notImplemented0("<")
+ def >= (b : Int):Mat = notImplemented0(">=")
+ def <= (b : Int):Mat = notImplemented0("<=")
+ def == (b : Int):Mat = notImplemented0("==")
+ def === (b : Int):Mat = notImplemented0("===")
+ def != (b : Int):Mat = notImplemented0("!=")
+
+ def + (b : Float):Mat = notImplemented0("+")
+ def - (b : Float):Mat = notImplemented0("-")
+ def * (b : Float):Mat = notImplemented0("*")
+ def / (b : Float):Mat = notImplemented0("/")
+ def *@ (b : Float):Mat = notImplemented0("*@")
+ def /@ (b : Float):Mat = notImplemented0("/@")
+ def \\ (b : Float):Mat = notImplemented0("\\\\")
+ def ^ (b : Float):Mat = notImplemented0("^")
+
+ def > (b : Float):Mat = notImplemented0(">")
+ def < (b : Float):Mat = notImplemented0("<")
+ def >= (b : Float):Mat = notImplemented0(">=")
+ def <= (b : Float):Mat = notImplemented0("<=")
+ def == (b : Float):Mat = notImplemented0("==")
+ def === (b : Float):Mat = notImplemented0("===")
+ def != (b : Float):Mat = notImplemented0("!=")
+
+ def + (b : Double):Mat = notImplemented0("+")
+ def - (b : Double):Mat = notImplemented0("-")
+ def * (b : Double):Mat = notImplemented0("*")
+ def / (b : Double):Mat = notImplemented0("/")
+ def *@ (b : Double):Mat = notImplemented0("*@")
+ def /@ (b : Double):Mat = notImplemented0("/@")
+ def \\ (b : Double):Mat = notImplemented0("\\\\")
+ def ^ (b : Double):Mat = notImplemented0("^")
+
+ def > (b : Double):Mat = notImplemented0(">")
+ def < (b : Double):Mat = notImplemented0("<")
+ def >= (b : Double):Mat = notImplemented0(">=")
+ def <= (b : Double):Mat = notImplemented0("<=")
+ def == (b : Double):Mat = notImplemented0("==")
+ def === (b : Double):Mat = notImplemented0("===")
+ def != (b : Double):Mat = notImplemented0("!=")
+
+ def \ (b : Mat):Mat = notImplemented1("\\", b)
+ def on (b : Mat):Mat = notImplemented1("on", b)
+ def ~ (b : Mat):Pair = b match {
+ case bb:FMat => new FPair(this, bb)
+ case bb:DMat => new DPair(this, bb)
+ case bb:IMat => new IPair(this, bb)
+ case bb:SMat => new SPair(this, bb)
+// case bb:SDMat => new SDPair(this, bb)
+ case bb:CMat => new CPair(this, bb)
+ case bb:GMat => new GPair(this, bb)
+ }
+
+ def dot (b : Mat):Double = {notImplemented1("dot", b); 0}
+
+}
+
+abstract class Pair {
+
+ def notImplemented0(s:String):Mat = {
+ throw new RuntimeException("operator "+s+" not implemented for "+this)
+ }
+ def notImplemented1(s:String,that:Mat):Mat = {
+ throw new RuntimeException("operator "+s+" not implemented for "+this+" and "+that)
+ }
+
+ def t = notImplemented0("t")
+
+ def + (b : Mat):Mat = notImplemented1("+", b)
+ def - (b : Mat):Mat = notImplemented1("-", b)
+ def * (b : Mat):Mat = notImplemented1("*", b)
+ def xT (b : Mat):Mat = notImplemented1("xT", b)
+ def Tx (b : Mat):Mat = notImplemented1("Tx", b)
+ def / (b : Mat):Mat = notImplemented1("/", b)
+ def *@ (b : Mat):Mat = notImplemented1("*@", b)
+ def /@ (b : Mat):Mat = notImplemented1("/@", b)
+ def \\ (b : Mat):Mat = notImplemented1("\\\\", b)
+ def ^ (b : Mat):Mat = notImplemented1("^", b)
+
+ def > (b : Mat):Mat = notImplemented1(">", b)
+ def < (b : Mat):Mat = notImplemented1("<", b)
+ def >= (b : Mat):Mat = notImplemented1(">=", b)
+ def <= (b : Mat):Mat = notImplemented1("<=", b)
+ def == (b : Mat):Mat = notImplemented1("==", b)
+ def === (b : Mat):Mat = notImplemented1("===", b)
+ def != (b : Mat):Mat = notImplemented1("!=", b)
+
+ def \ (b : Mat):Mat = notImplemented1("\\", b)
+ def on (b : Mat):Mat = notImplemented1("on", b)
+
+ def + (b : Int):Mat = notImplemented0("+")
+ def - (b : Int):Mat = notImplemented0("-")
+ def * (b : Int):Mat = notImplemented0("*")
+ def / (b : Int):Mat = notImplemented0("/")
+ def *@ (b : Int):Mat = notImplemented0("*@")
+ def /@ (b : Int):Mat = notImplemented0("/@")
+ def \\ (b : Int):Mat = notImplemented0("\\\\")
+ def ^ (b : Int):Mat = notImplemented0("^")
+
+ def > (b : Int):Mat = notImplemented0(">")
+ def < (b : Int):Mat = notImplemented0("<")
+ def >= (b : Int):Mat = notImplemented0(">=")
+ def <= (b : Int):Mat = notImplemented0("<=")
+ def == (b : Int):Mat = notImplemented0("==")
+ def === (b : Int):Mat = notImplemented0("===")
+ def != (b : Int):Mat = notImplemented0("!=")
+
+ def + (b : Float):Mat = notImplemented0("+")
+ def - (b : Float):Mat = notImplemented0("-")
+ def * (b : Float):Mat = notImplemented0("*")
+ def / (b : Float):Mat = notImplemented0("/")
+ def *@ (b : Float):Mat = notImplemented0("*@")
+ def /@ (b : Float):Mat = notImplemented0("/@")
+ def \\ (b : Float):Mat = notImplemented0("\\\\")
+ def ^ (b : Float):Mat = notImplemented0("^")
+
+ def > (b : Float):Mat = notImplemented0(">")
+ def < (b : Float):Mat = notImplemented0("<")
+ def >= (b : Float):Mat = notImplemented0(">=")
+ def <= (b : Float):Mat = notImplemented0("<=")
+ def == (b : Float):Mat = notImplemented0("==")
+ def === (b : Float):Mat = notImplemented0("===")
+ def != (b : Float):Mat = notImplemented0("!=")
+
+ def + (b : Double):Mat = notImplemented0("+")
+ def - (b : Double):Mat = notImplemented0("-")
+ def * (b : Double):Mat = notImplemented0("*")
+ def / (b : Double):Mat = notImplemented0("/")
+ def *@ (b : Double):Mat = notImplemented0("*@")
+ def /@ (b : Double):Mat = notImplemented0("/@")
+ def \\ (b : Double):Mat = notImplemented0("\\\\")
+ def ^ (b : Double):Mat = notImplemented0("^")
+
+ def > (b : Double):Mat = notImplemented0(">")
+ def < (b : Double):Mat = notImplemented0("<")
+ def >= (b : Double):Mat = notImplemented0(">=")
+ def <= (b : Double):Mat = notImplemented0("<=")
+ def == (b : Double):Mat = notImplemented0("==")
+ def === (b : Double):Mat = notImplemented0("===")
+ def != (b : Double):Mat = notImplemented0("!=")
+}
+
+object Mat {
+ import Ordered._
+ import scala.tools.jline.TerminalFactory
+
+ var compressType = 1 // 0=none, 1=zlib, 2=szip
+
+ var compressionLevel = 3 // for zlib
+
+ var chunkSize = 1024*1024 // for either method
+
+ var szipBlock = 32 // szip block size
+
+ var numThreads = 8
+
+ var noMKL:Boolean = false
+
+ var nflops = 0L
+
+ var oneBased = 0
+
+ var ioneBased = 1
+
+ var hasCUDA = 0
+
+ def checkCUDA:Unit = {
+ if (hasCUDA == 0) {
+ try {
+ val os = System.getProperty("os.name")
+ if (os.equals("Linux")) {
+ System.loadLibrary("cudart")
+ } else {
+ try {
+ System.loadLibrary("cudart64_50_35")
+ } catch {
+ case _ => try {
+ System.loadLibrary("cudart64_42_9")
+ }
+ }
+ }
+ } catch {
+ case _ => {
+ println("Cant find CUDA SDK")
+ hasCUDA = -1
+ }
+ }
+ }
+ if (hasCUDA >= 0) {
+ try {
+ var cudanum = new Array[Int](1)
+ jcuda.runtime.JCuda.cudaGetDeviceCount(cudanum)
+ hasCUDA = cudanum(0)
+ printf("%d CUDA device%s found", hasCUDA, if (hasCUDA == 1) "" else "s")
+ if (hasCUDA > 0) {
+ jcuda.runtime.JCuda.cudaRuntimeGetVersion(cudanum)
+ println(", CUDA version %d.%d" format (cudanum(0)/1000, (cudanum(0)%100) / 10))
+ } else {
+ println("")
+ }
+ } catch {
+ case e:NoClassDefFoundError => println("Couldn't load the JCUDA driver")
+ case e:Exception => println("Exception while initializing JCUDA driver")
+ case _ => println("Something went wrong while loading JCUDA driver")
+ }
+ }
+ }
+
+ var terminal = TerminalFactory.create
+
+ def terminalWidth = math.max(terminal.getWidth,80)
+
+ def copyToIntArray[@specialized(Double, Float) T](data:Array[T], i0:Int, idata:Array[Int], d0:Int, n:Int)
+ (implicit numeric : Numeric[T]) = {
+ var i = 0
+ while (i < n) {
+ idata(i+d0) = numeric.toInt(data(i+i0));
+ i += 1
+ }
+ }
+
+ def copyToDoubleArray[@specialized(Int, Float) T](data:Array[T], i0:Int, ddata:Array[Double], d0:Int, n:Int)
+ (implicit numeric : Numeric[T]) = {
+ var i = 0
+ while (i < n) {
+ ddata(i+d0) = numeric.toDouble(data(i+i0));
+ i += 1
+ }
+ }
+
+ def copyToFloatArray[@specialized(Int, Double) T](data:Array[T], i0:Int, fdata:Array[Float], d0:Int, n:Int)
+ (implicit numeric : Numeric[T]) = {
+ var i = 0
+ while (i < n) {
+ fdata(i+d0) = numeric.toFloat(data(i+i0));
+ i += 1
+ }
+ }
+
+ def copyListToFloatArray[T](a:List[T], b:Array[Float])(implicit numeric : Numeric[T]) = {
+ var i = 0;
+ var todo = a.iterator
+ while (i < a.length) {
+ val h = todo.next
+ b(i) = numeric.toFloat(h)
+ i += 1
+ }
+ }
+
+ def ibinsearch(v:Int, x:Array[Int], istartp:Int, iendp:Int):Int = {
+ var istart = istartp
+ var iend = iendp
+ while (iend - istart > 1) {
+ var mid:Int = (istart + iend)/2
+ if (v < x(mid)) iend = mid else istart = mid
+ }
+ if (v == x(istart)) istart else -1
+ }
+
+ def binsearch[T : Ordering](v:T, x:Array[T], istartp:Int, iendp:Int):Int = {
+ var istart = istartp
+ var iend = iendp
+ while (iend - istart > 1) {
+ var mid:Int = (istart + iend)/2
+ if (v < x(mid)) iend = mid else istart = mid
+ }
+ if (v == x(istart)) istart else -1
+ }
+
+ def lexsort[T :Ordering](a:List[Array[T]]):Array[Int] = {
+ val n = a(0).length
+ val ind = new Array[Int](n)
+ var i = 0; while(i < n) {ind(i) = i; i += 1}
+ def comp(i:Int, j:Int):Int = {
+ val alen = a.length;
+ val ip = ind(i)
+ val jp = ind(j)
+ var c0 = 0
+ var k = 0;
+ while (k < alen && c0 == 0) {
+ c0 = a(k)(ip) compare a(k)(jp)
+ k += 1
+ }
+ if (c0 != 0) {
+ c0
+ } else {
+ ip compare jp
+ }
+ }
+ def swap(i:Int, j:Int):Unit = {
+ val tmp = ind(i)
+ ind(i) = ind(j)
+ ind(j) = tmp
+ }
+ BIDMat.Sorting.quickSort(comp, swap, 0, n)
+ ind
+ }
+
+ def ilexsort(a:List[Array[Int]]):Array[Int] = {
+ val n = a(0).length
+ val ind = new Array[Int](n)
+ var i = 0; while(i < n) {ind(i) = i; i += 1}
+ def comp(i:Int, j:Int):Int = {
+ var k = 0;
+ val alen = a.length;
+ var c0 = 0
+ val ip = ind(i)
+ val jp = ind(j)
+ while (k < alen && c0 == 0) {
+ c0 = a(k)(ip) compare a(k)(jp)
+ k += 1
+ }
+ if (c0 != 0) {
+ c0
+ } else {
+ ip compare jp
+ }
+ }
+ def swap(i:Int, j:Int):Unit = {
+ val tmp = ind(i)
+ ind(i) = ind(j)
+ ind(j) = tmp
+ }
+ BIDMat.Sorting.quickSort(comp, swap, 0, n)
+ ind
+ }
+
+ def ilexsort2(a:Array[Int], b:Array[Int]):Array[Int] = {
+ val n = a.length
+ val ind = new Array[Int](n)
+ var i = 0; while(i < n) {ind(i) = i; i += 1}
+ def comp(i:Int, j:Int):Int = {
+ val c0 = a(i) compare a(j)
+ if (c0 != 0) {
+ c0
+ } else {
+ val c1 = b(i) compare b(j)
+ if (c1 != 0) {
+ c1
+ } else {
+ ind(i) compare ind(j)
+ }
+ }
+ }
+ def swap(i:Int, j:Int):Unit = {
+ val tmpa = a(i)
+ a(i) = a(j)
+ a(j) = tmpa
+ val tmpb = b(i)
+ b(i) = b(j)
+ b(j) = tmpb
+ val tmpi = ind(i)
+ ind(i) = ind(j)
+ ind(j) = tmpi
+ }
+ BIDMat.Sorting.quickSort(comp, swap, 0, n)
+ ind
+ }
+
+ def ilexsort(args:Array[Int]*):Array[Int] = {
+ ilexsort(args.toList)
+ }
+
+ def lexsort[T : Ordering](args:Array[T]*):Array[Int] = {
+ lexsort(args.toList)
+ }
+
+}
diff --git a/src/main/scala/BIDMat/MatFunctions.scala b/src/main/scala/BIDMat/MatFunctions.scala
new file mode 100755
index 00000000..36ec790a
--- /dev/null
+++ b/src/main/scala/BIDMat/MatFunctions.scala
@@ -0,0 +1,657 @@
+package BIDMat
+
+import scala.compat.Platform._
+import edu.berkeley.bid.CBLAS._
+import edu.berkeley.bid.LAPACK._
+import scala.actors.Actor._
+
+class IMatWildcard extends IMat(0,0,null) with MatrixWildcard
+
+object MatFunctions {
+
+ var currentTimeWasThen:Long = 0
+
+ var lastFlops:Long = 0
+
+ def tic = { currentTimeWasThen = currentTime }
+
+ def toc:Float = {(currentTime - currentTimeWasThen)/1000.0f}
+
+ def flip = { lastFlops = Mat.nflops ; tic }
+
+ def flop:(Float, Float) = { val t1 = toc; ( (Mat.nflops -lastFlops)/t1, t1 ) }
+
+ def gflop:(Float, Float) = { val t1 = toc; ( (Mat.nflops -lastFlops)/t1/1e9f, t1 ) }
+
+ def size(a:Mat):(Int, Int) = (a.nrows, a.ncols)
+
+ def size(a:Mat, n:Int):Int = {
+ if (n == 1) {
+ a.nrows
+ } else if (n == 2) {
+ a.ncols
+ } else {
+ throw new RuntimeException("size arg must be 1 or 2")
+ }
+ }
+
+ def length(a:DMat):Int = a.length
+
+ def length(a:FMat):Int = a.length
+
+ def length(a:IMat):Int = a.length
+
+ def nnz(a:DMat):Int = a.nnz
+
+ def nnz(a:FMat):Int = a.nnz
+
+ def nnz(a:IMat):Int = a.nnz
+
+ def nnz(a:SMat):Int = a.nnz
+
+ def nnz(a:SDMat):Int = a.nnz
+
+ implicit def flt2FMat(x:Float):FMat = row(x)
+
+ implicit def dbl2FMat(x:Double):FMat = row(x)
+
+ implicit def int2IMat(x:Int):IMat = irow(x)
+
+// implicit def dbl2CMat(x:Double):CMat = CMat.celem(x.asInstanceOf[Float],0)
+
+ implicit def range2IMat(x:Range):IMat = irow(x)
+
+ implicit def tuple2IMat(x:Tuple2[Int,Int]):IMat = irow(x._1 until x._2)
+
+ implicit def fMat2DMat(x:FMat):DMat = {
+ val out = DMat(x.nrows, x.ncols)
+ Mat.copyToDoubleArray(x.data, 0, out.data, 0, x.length)
+ out
+ }
+
+ implicit def iMat2FMat(x:IMat):FMat = {
+ val out = FMat(x.nrows, x.ncols)
+ Mat.copyToFloatArray(x.data, 0, out.data, 0, x.length)
+ out
+ }
+
+ def recycleTry(a:Mat, nr:Int, nc:Int, b:FMat, nnz:Int):FMat = recycleTry(a, nr, nc, b:Mat, nnz).asInstanceOf[FMat]
+
+ def recycleTry(a:Mat, nr:Int, nc:Int, b:DMat, nnz:Int):DMat = recycleTry(a, nr, nc, b:Mat, nnz).asInstanceOf[DMat]
+
+ def recycleTry(a:Mat, nr:Int, nc:Int, b:IMat, nnz:Int):IMat = recycleTry(a, nr, nc, b:Mat, nnz).asInstanceOf[IMat]
+
+ def recycleTry(a:Mat, nr:Int, nc:Int, b:SMat, nnz:Int):SMat = recycleTry(a, nr, nc, b:Mat, nnz).asInstanceOf[SMat]
+
+ def recycleTry(a:Mat, nr:Int, nc:Int, b:SDMat, nnz:Int):SDMat = recycleTry(a, nr, nc, b:Mat, nnz).asInstanceOf[SDMat]
+
+ def recycleTry(a:Mat, nr:Int, nc:Int, b:GMat, nnz:Int):GMat = recycleTry(a, nr, nc, b:Mat, nnz).asInstanceOf[GMat]
+
+ def recycleTry(a:Mat, nr:Int, nc:Int, b:GIMat, nnz:Int):GIMat = recycleTry(a, nr, nc, b:Mat, nnz).asInstanceOf[GIMat]
+
+ def recycleTry(a:Mat, nr:Int, nc:Int, b:GSMat, nnz:Int):GSMat = recycleTry(a, nr, nc, b:Mat, nnz).asInstanceOf[GSMat]
+
+ def recycleTry(a:Mat, nr:Int, nc:Int, b:Mat, nnz:Int):Mat = {
+ if (a.asInstanceOf[AnyRef] == null || (a.nrows == 0 && a.ncols == 0)) {
+ b.zeros(nr, nc, nnz)
+ } else {
+ a.recycle(nr, nc, nnz)
+ }
+ }
+
+ def recycleTry(a:Mat, b:FMat):FMat = recycleTry(a, b.nrows, b.ncols, b:Mat, b.nnz).asInstanceOf[FMat]
+
+ def recycleTry(a:Mat, b:DMat):DMat = recycleTry(a, b.nrows, b.ncols, b:Mat, b.nnz).asInstanceOf[DMat]
+
+ def recycleTry(a:Mat, b:IMat):IMat = recycleTry(a, b.nrows, b.ncols, b:Mat, b.nnz).asInstanceOf[IMat]
+
+ def recycleTry(a:Mat, b:SMat):SMat = recycleTry(a, b.nrows, b.ncols, b:Mat, b.nnz).asInstanceOf[SMat]
+
+ def recycleTry(a:Mat, b:SDMat):SDMat = recycleTry(a, b.nrows, b.ncols, b:Mat, b.nnz).asInstanceOf[SDMat]
+
+ def recycleTry(a:Mat, b:GMat):GMat = recycleTry(a, b.nrows, b.ncols, b:Mat, b.nnz).asInstanceOf[GMat]
+
+ def recycleTry(a:Mat, b:GIMat):GIMat = recycleTry(a, b.nrows, b.ncols, b:Mat, b.nnz).asInstanceOf[GIMat]
+
+ def recycleTry(a:Mat, b:GSMat):GSMat = recycleTry(a, b.nrows, b.ncols, b:Mat, b.nnz).asInstanceOf[GSMat]
+
+ def recycleTry(a:Mat, b:Mat):Mat = recycleTry(a, b.nrows, b.ncols, b, b.nnz)
+
+ def recycleTry(a:Mat, b:FMat, c:FMat):FMat =
+ recycleTry(a, math.max(b.nrows, c.nrows), math.max(b.ncols, c.ncols), b:Mat, b.nnz).asInstanceOf[FMat];
+
+ def recycleTry(a:Mat, b:DMat, c:DMat):DMat =
+ recycleTry(a, math.max(b.nrows, c.nrows), math.max(b.ncols, c.ncols), b:Mat, b.nnz).asInstanceOf[DMat];
+
+ def recycleTry(a:Mat, b:IMat, c:IMat):IMat =
+ recycleTry(a, math.max(b.nrows, c.nrows), math.max(b.ncols, c.ncols), b:Mat, b.nnz).asInstanceOf[IMat];
+
+ def recycleTry(a:Mat, b:SMat, c:SMat):SMat =
+ recycleTry(a, math.max(b.nrows, c.nrows), math.max(b.ncols, c.ncols), b:Mat, b.nnz).asInstanceOf[SMat];
+
+ def recycleTry(a:Mat, b:SDMat, c:SDMat):SDMat =
+ recycleTry(a, math.max(b.nrows, c.nrows), math.max(b.ncols, c.ncols), b:Mat, b.nnz).asInstanceOf[SDMat];
+
+ def recycleTry(a:Mat, b:GMat, c:GMat):GMat =
+ recycleTry(a, math.max(b.nrows, c.nrows), math.max(b.ncols, c.ncols), b:Mat, b.nnz).asInstanceOf[GMat];
+
+ def recycleTry(a:Mat, b:GIMat, c:GIMat):GIMat =
+ recycleTry(a, math.max(b.nrows, c.nrows), math.max(b.ncols, c.ncols), b:Mat, b.nnz).asInstanceOf[GIMat];
+
+ def recycleTry(a:Mat, b:GSMat, c:GSMat):GSMat =
+ recycleTry(a, math.max(b.nrows, c.nrows), math.max(b.ncols, c.ncols), b:Mat, b.nnz).asInstanceOf[GSMat];
+
+ def recycleTry(a:Mat, b:Mat, c:Mat):Mat = recycleTry(a, math.max(b.nrows, c.nrows), math.max(b.ncols, c.ncols), b, b.nnz)
+
+ def find(a:DMat) = a.find
+ def find2(a:DMat) = a.find2
+ def find3(a:DMat) = a.find3
+ def accum(inds:IMat, vals:DMat, nr:Int, nc:Int) = DMat(DenseMat.accum(inds, vals, nr, nc))
+ def accum(inds:IMat, vals:DMat, nr:Int) = DMat(DenseMat.accum(inds, vals, nr, 1))
+ def sort(a:DMat, ind:Int):DMat = DMat(DenseMat.sort(a, ind, true))
+ def sort(a:DMat):DMat = DMat(DenseMat.sort(a, 0, true))
+ def sort2(a:DMat):(DMat, IMat) = {val (d,i) = DenseMat.sort2(a, true); (DMat(d), i)}
+ def sort2(a:DMat,dir:Int):(DMat, IMat) = {val (d,i) = DenseMat.sort2(a, dir, true); (DMat(d), i)}
+ def sortdown(a:DMat, ind:Int):DMat = DMat(DenseMat.sort(a, ind, false))
+ def sortdown(a:DMat):DMat = DMat(DenseMat.sort(a, 0, false))
+ def sortdown2(a:DMat):(DMat, IMat) = {val (d,i) = DenseMat.sort2(a, false); (DMat(d), i)}
+ def sortdown2(a:DMat, dir:Int):(DMat, IMat) = {val (d,i) = DenseMat.sort2(a, dir, false); (DMat(d), i)}
+ def sortrows(a:DMat):(DMat, IMat) = { val ii = DenseMat.sortlex(a, true); (a(ii,?), ii) }
+ def sortrowsdown(a:DMat):(DMat, IMat) = { val ii = DenseMat.sortlex(a, false); (a(ii,?), ii) }
+ def sortlex(a:DMat):IMat = DenseMat.sortlex(a, true)
+ def sortlexdown(a:DMat):IMat = DenseMat.sortlex(a, false)
+ def uniquerows(a:DMat):(DMat, IMat, IMat) = { val (ii, jj) = DenseMat.uniquerows2(a) ; (a(ii,?), ii, jj)}
+ def unique(a:DMat):(DMat, IMat, IMat) = {val (ii, jj) = DenseMat.unique2(if (math.min(a.nrows,a.ncols)==1) a else a(?)) ; (a(ii,?), ii, jj)}
+
+ def find(a:FMat) = a.find
+ def find2(a:FMat) = a.find2
+ def find3(a:FMat) = a.find3
+ def accum(inds:IMat, vals:FMat, nr:Int, nc:Int) = FMat(DenseMat.accum(inds, vals, nr, nc))
+ def accum(inds:IMat, vals:FMat, nr:Int) = FMat(DenseMat.accum(inds, vals, nr, 1))
+ def sort(a:FMat, ind:Int):FMat = FMat(DenseMat.sort(a, ind, true))
+ def sort(a:FMat):FMat = FMat(DenseMat.sort(a, 0, true))
+ def sort2(a:FMat):(FMat, IMat) = {val (d,i) = DenseMat.sort2(a, true); (FMat(d), i)}
+ def sort2(a:FMat,dir:Int):(FMat, IMat) = {val (d,i) = DenseMat.sort2(a, dir, true); (FMat(d), i)}
+ def sortdown(a:FMat, ind:Int):FMat = FMat(DenseMat.sort(a, ind, false))
+ def sortdown(a:FMat):FMat = FMat(DenseMat.sort(a, 0, false))
+ def sortdown2(a:FMat):(FMat, IMat) = {val (d,i) = DenseMat.sort2(a, false); (FMat(d), i)}
+ def sortdown2(a:FMat, dir:Int):(FMat, IMat) = {val (d,i) = DenseMat.sort2(a, dir, false); (FMat(d), i)}
+ def sortrows(a:FMat):(FMat, IMat) = { val ii = DenseMat.sortlex(a, true); (a(ii,?), ii) }
+ def sortrowsdown(a:FMat):(FMat, IMat) = { val ii = DenseMat.sortlex(a, false); (a(ii,?), ii) }
+ def sortlex(a:FMat):IMat = DenseMat.sortlex(a, true)
+ def sortlexdown(a:FMat):IMat = DenseMat.sortlex(a, false)
+ def uniquerows(a:FMat):(FMat, IMat, IMat) = { val (ii, jj) = DenseMat.uniquerows2(a) ; (a(ii,?), ii, jj)}
+ def unique(a:FMat):(FMat, IMat, IMat) = {val (ii, jj) = DenseMat.unique2(if (math.min(a.nrows,a.ncols)==1) a else a(?)) ; (a(ii,?), ii, jj)}
+
+ def find(a:IMat) = a.find
+ def find2(a:IMat) = a.find2
+ def find3(a:IMat) = a.find3
+ def accum(inds:IMat, vals:IMat, nr:Int, nc:Int) = IMat(DenseMat.accum(inds, vals, nr, nc))
+ def accum(inds:IMat, vals:IMat, nr:Int) = IMat(DenseMat.accum(inds, vals, nr, 1))
+ def sort(a:IMat, ind:Int):IMat = IMat(DenseMat.sort(a, ind, true))
+ def sort(a:IMat):IMat = IMat(DenseMat.sort(a, 0, true))
+ def sort2(a:IMat):(IMat, IMat) = {val (d,i) = DenseMat.sort2(a, true); (IMat(d), i)}
+ def sort2(a:IMat,dir:Int):(IMat, IMat) = {val (d,i) = DenseMat.sort2(a, dir, true); (IMat(d), i)}
+ def sortdown(a:IMat, ind:Int):IMat = IMat(DenseMat.sort(a, ind, false))
+ def sortdown(a:IMat):IMat = IMat(DenseMat.sort(a, 0, false))
+ def sortdown2(a:IMat):(IMat, IMat) = {val (d,i) = DenseMat.sort2(a, false); (IMat(d), i)}
+ def sortdown2(a:IMat, dir:Int):(IMat, IMat) = {val (d,i) = DenseMat.sort2(a, dir, false); (IMat(d), i)}
+ def sortrows(a:IMat):(IMat, IMat) = { val ii = DenseMat.sortlex(a, true); (a(ii,?), ii) }
+ def sortrowsdown(a:IMat):(IMat, IMat) = { val ii = DenseMat.sortlex(a, false); (a(ii,?), ii) }
+ def sortlex(a:IMat):IMat = DenseMat.sortlex[Int](a, true)
+ def sortlexdown(a:IMat):IMat = DenseMat.sortlex(a, false)
+ def uniquerows(a:IMat):(IMat, IMat, IMat) = { val (ii, jj) = DenseMat.uniquerows2(a) ; (a(ii,?), ii, jj)}
+ def unique(a:IMat):(IMat, IMat, IMat) = {val (ii, jj) = DenseMat.unique2(if (math.min(a.nrows,a.ncols)==1) a else a(?)) ; (a(ii,?), ii, jj)}
+
+ def find(a:CSMat) = a.find
+ def find2(a:CSMat) = a.find2
+ def find3(a:CSMat) = a.find3
+ def sort(a:CSMat, ind:Int):CSMat = CSMat(DenseMat.sort(a, ind, true))
+ def sort(a:CSMat):CSMat = CSMat(DenseMat.sort(a, 0, true))
+ def sort2(a:CSMat):(CSMat, IMat) = {val (d,i) = DenseMat.sort2(a, true); (CSMat(d), i)}
+ def sortdown(a:CSMat, ind:Int):CSMat = CSMat(DenseMat.sort(a, ind, false))
+ def sortdown(a:CSMat):CSMat = CSMat(DenseMat.sort(a, 0, false))
+ def sortdown2(a:CSMat):(CSMat, IMat) = {val (d,i) = DenseMat.sort2(a, false); (CSMat(d), i)}
+ def sortrows(a:CSMat):(CSMat, IMat) = { val ii = DenseMat.sortlex(a, true); (a(ii,?), ii) }
+ def sortrowsdown(a:CSMat):(CSMat, IMat) = { val ii = DenseMat.sortlex(a, false); (a(ii,?), ii) }
+ def sortlex(a:CSMat):IMat = DenseMat.sortlex(a, true)
+ def sortlexdown(a:CSMat):IMat = DenseMat.sortlex(a, false)
+ def uniquerows(a:CSMat):(CSMat, IMat, IMat) = { val (ii, jj) = DenseMat.uniquerows2(a) ; (a(ii,?), ii, jj)}
+
+ def find(a:SDMat) = a.find
+ def find2(a:SDMat) = a.find2
+ def find3(a:SDMat) = a.find3
+
+ def find(a:SMat) = a.find
+ def find2(a:SMat) = a.find2
+ def find3(a:SMat) = a.find3
+
+ def invperm(a:IMat):IMat = {
+ val out = IMat(a.nrows, a.ncols)
+ var nrows = a.nrows
+ var ncols = a.ncols
+ if (a.nrows == 1) {
+ ncols = 1
+ nrows = a.ncols
+ }
+ for (i <- 0 until ncols) {
+ val ioff = i*nrows
+ for (i<-0 until nrows) {
+ out.data(a.data(i + ioff) + ioff) = i
+ }
+ }
+ out
+ }
+
+ def drow(x:Array[Double]):DMat = {
+ val mat = DMat(1,x.length)
+ System.arraycopy(x, 0, mat.data, 0, x.length)
+ mat
+ }
+
+ def drow(x:List[Double]):DMat = {
+ val mat = DMat(1,x.length)
+ x.copyToArray(mat.data)
+ mat
+ }
+
+ def drow(args:Double*):DMat = drow(args.toArray)
+
+ def drow(x:Range):DMat = {
+ val mat = DMat(1,x.length)
+ for (i <- 0 until x.length)
+ mat.data(i) = x(i)
+ mat
+ }
+
+ def dcol(x:Range):DMat = {
+ val mat = DMat(x.length,1)
+ for (i <- 0 until x.length)
+ mat.data(i) = x(i)
+ mat
+ }
+
+ def dcol(x:List[Double]):DMat = {
+ val mat = DMat(x.length,1)
+ x.copyToArray(mat.data)
+ mat
+ }
+
+ def dcol(args:Double*):DMat = {
+ dcol(args.toList)
+ }
+
+ def dzeros(nr:Int, nc:Int):DMat = {
+ DMat(nr,nc)
+ }
+
+ def dones(nr:Int, nc:Int):DMat = {
+ val out = DMat(nr,nc)
+ var i = 0
+ while (i < out.length) {
+ out.data(i) = 1
+ i += 1
+ }
+ out
+ }
+
+ def row(x:Array[Float]):FMat = {
+ val mat = FMat(1,x.length)
+ System.arraycopy(x, 0, mat.data, 0, x.length)
+ mat
+ }
+
+ def row(x:Array[Double]):FMat = {
+ val mat = FMat(1,x.length)
+ Mat.copyToFloatArray(x, 0, mat.data, 0, x.length)
+ mat
+ }
+
+ def row(x:Array[Int]):FMat = {
+ val mat = FMat(1,x.length)
+ Mat.copyToFloatArray(x, 0, mat.data, 0, x.length)
+ mat
+ }
+
+ def row[T](x:List[T])(implicit numeric : Numeric[T]):FMat = {
+ val mat = FMat(1, x.length)
+ Mat.copyListToFloatArray(x, mat.data)
+ mat
+ }
+
+ def row[T](x:T*)(implicit numeric : Numeric[T]):FMat = row(x.toList)
+
+ def row(x:Range):FMat = {
+ val mat = FMat(1,x.length)
+ for (i <- 0 until x.length)
+ mat.data(i) = x(i)
+ mat
+ }
+
+ def col(x:Array[Float]):FMat = {
+ val mat = FMat(x.length, 1)
+ System.arraycopy(x, 0, mat.data, 0, x.length)
+ mat
+ }
+
+ def col(x:Array[Double]):FMat = {
+ val mat = FMat(x.length, 1)
+ Mat.copyToFloatArray(x, 0, mat.data, 0, x.length)
+ mat
+ }
+
+ def col(x:Array[Int]):FMat = {
+ val mat = FMat(x.length, 1)
+ Mat.copyToFloatArray(x, 0, mat.data, 0, x.length)
+ mat
+ }
+
+ def col[T](x:List[T])(implicit numeric : Numeric[T]):FMat = {
+ val mat = FMat(x.length, 1)
+ Mat.copyListToFloatArray(x, mat.data)
+ mat
+ }
+
+ def col[T](x:T*)(implicit numeric : Numeric[T]):FMat = col(x.toList)
+
+ def col(x:Range):FMat = {
+ val mat = FMat(x.length,1)
+ for (i <- 0 until x.length)
+ mat.data(i) = x(i)
+ mat
+ }
+
+ def zeros(nr:Int, nc:Int):FMat = FMat(nr,nc)
+
+ def ones(nr:Int, nc:Int):FMat = {
+ val out = FMat(nr,nc)
+ var i = 0
+ while (i < out.length) {
+ out.data(i) = 1
+ i += 1
+ }
+ out
+ }
+
+ def irow(x:Range):IMat = {
+ val mat = IMat(1,x.length)
+ for (i <- 0 until x.length)
+ mat.data(i) = x(i)
+ mat
+ }
+
+ def irow(x:Tuple2[Int,Int]):IMat = irow(x._1 until x._2)
+
+ def irow(x:Array[Int]):IMat = {
+ val mat = IMat(1,x.length)
+ System.arraycopy(x, 0, mat.data, 0, x.length)
+ mat
+ }
+
+ def irow(x:List[Int]):IMat = {
+ val mat = IMat(1,x.length)
+ x.copyToArray(mat.data)
+ mat
+ }
+
+ def irow(args:Int*):IMat = {
+ irow(args.toList)
+ }
+
+ def icol(x:Range):IMat = {
+ val mat = IMat(x.length,1)
+ for (i <- 0 until x.length)
+ mat.data(i) = x(i)
+ mat
+ }
+
+ def icol(x:Tuple2[Int,Int]):IMat = icol(x._1 until x._2)
+
+ def icol(x:List[Int]):IMat = {
+ val mat = IMat(x.length,1)
+ x.copyToArray(mat.data)
+ mat
+ }
+
+ def icol(args:Int*):IMat = {
+ icol(args.toList)
+ }
+
+ def izeros(nr:Int, nc:Int):IMat = {
+ IMat(nr,nc)
+ }
+
+ def iones(nr:Int, nc:Int):IMat = {
+ val out = IMat(nr,nc)
+ var i = 0
+ while (i < out.length) {
+ out.data(i) = 1
+ i += 1
+ }
+ out
+ }
+
+ def crow(x:List[String]):CSMat = {
+ val mat = CSMat(1, x.length)
+ x.copyToArray(mat.data)
+ mat
+ }
+
+ def crow(args:String*):CSMat = {
+ crow(args.toList)
+ }
+
+ def ccol(x:List[String]):CSMat = {
+ val mat = CSMat(x.length,1)
+ x.copyToArray(mat.data)
+ mat
+ }
+
+ def ccol(args:String*):CSMat = {
+ ccol(args.toList)
+ }
+
+ def blank = new Mat(0,0)
+
+ def fblank = new FMat(0,0,null)
+
+ def dblank = new DMat(0,0,null)
+
+ def cblank = new CMat(0,0,null)
+
+ def iblank = new IMat(0,0,null)
+
+ def sblank = new SMat(0,0,0,null,null,null)
+
+ def sdblank = new SDMat(0,0,0,null,null,null)
+
+ def gblank = new GMat(0,0,null,0)
+
+ def giblank = new GIMat(0,0,null,0)
+
+ def gsblank = new GSMat(0,0,0,null,null,null,0)
+
+
+ def sparse(a:DMat):SDMat = {
+ val (ii, jj, vv) = a.find3
+ val out = SDMat(a.nrows, a.ncols, ii.nrows)
+ var i = 0
+ val ioff = Mat.ioneBased
+ while (i < ii.nrows) {out.ir(i) = ii.data(i) + ioff; i+= 1}
+ SparseMat.compressInds(jj.data, a.ncols, out.jc, a.nnz)
+ System.arraycopy(vv.data, 0, out.data, 0, ii.nrows)
+ out
+ }
+
+ def sparse(a:FMat):SMat = {
+ val (ii, jj, vv) = a.find3
+ val out = SMat(a.nrows, a.ncols, ii.nrows)
+ var i = 0
+ val ioff = Mat.ioneBased
+ while (i < ii.nrows) {out.ir(i) = ii.data(i) + ioff; i+= 1}
+ SparseMat.compressInds(jj.data, a.ncols, out.jc, a.nnz)
+ System.arraycopy(vv.data, 0, out.data, 0, ii.nrows)
+ out
+ }
+
+ def sparse(ii:IMat, jj:IMat, vv:DMat, nr:Int, nc:Int):SDMat = {
+ SDMat(SparseMat.sparseImpl[Double](ii.data, jj.data, vv.data, nr, nc))
+ }
+
+ def _maxi(a:IMat) = a.iiReduceOp(0, (x:Int) => x, (x:Int, y:Int) => math.max(x,y), null)
+
+ def sparse(ii:IMat, jj:IMat, vv:DMat):SDMat = {
+ SDMat(SparseMat.sparseImpl[Double](ii.data, jj.data, vv.data, _maxi(ii).v+1, _maxi(jj).v+1))
+ }
+
+ def sparse(ii:IMat, jj:IMat, vv:FMat, nr:Int, nc:Int):SMat = {
+ SMat(SparseMat.sparseImpl[Float](ii.data, jj.data, vv.data, nr, nc))
+ }
+
+ def sparse(ii:IMat, jj:IMat, vv:FMat):SMat = {
+ SMat(SparseMat.sparseImpl[Float](ii.data, jj.data, vv.data, _maxi(ii).v+1, _maxi(jj).v+1))
+ }
+
+ def full(a:DMat):DMat = a
+
+ def full(a:FMat):FMat = a
+
+ def full(sd:SDMat):DMat = DMat(sd.full)
+
+ def full(ss:SMat):FMat = FMat(ss.full)
+
+ def full(a:Mat):Mat = a match {
+ case aa:DMat => a
+ case aa:FMat => a
+ case aa:IMat => a
+ case aa:SMat => full(aa):FMat
+ case aa:SDMat => full(aa):DMat
+ }
+
+ def DDShelper(a:FMat, b:FMat, c:SMat, out:SMat, istart:Int, iend:Int, ioff:Int) = {
+ var i = istart
+ while (i < iend) {
+ var j = c.jc(i)-ioff
+ while (j < c.jc(i+1)-ioff) {
+ var dsum = 0.0f
+ val a0 = (c.ir(j)-ioff)*a.nrows
+ val b0 = i*a.nrows
+ if (Mat.noMKL || a.nrows < 256) {
+ var k = 0
+ while (k < a.nrows) {
+ dsum += a.data(k + a0) * b.data(k + b0)
+ k += 1
+ }
+ } else {
+ dsum = sdotxx(a.nrows, a.data, a0, b.data, b0)
+ }
+ out.data(j) = dsum
+ out.ir(j) = c.ir(j)
+ j += 1
+ }
+ out.jc(i+1) = c.jc(i+1)
+ i += 1
+ }
+ }
+
+ def DDS(a:FMat,b:FMat,c:SMat,omat:Mat):SMat = {
+ if (a.nrows != b.nrows) {
+ throw new RuntimeException("nrows of dense A and B must match")
+ } else if (c.nrows != a.ncols || c.ncols != b.ncols) {
+ throw new RuntimeException("dims of C must match A'*B")
+ } else {
+ val out = SMat.newOrCheckSMat(c, omat)
+ Mat.nflops += 2L * c.nnz * a.nrows
+ val ioff = Mat.ioneBased
+ out.jc(0) = ioff
+ if (c.nnz > 100000 && Mat.numThreads > 1) {
+ val done = IMat(1,Mat.numThreads)
+ for (i <- 0 until Mat.numThreads) {
+ actor {
+ val istart = i*c.ncols/Mat.numThreads
+ val iend = (i+1)*c.ncols/Mat.numThreads
+ DDShelper(a, b, c, out, istart, iend, ioff)
+ done(i) = 1
+ }
+ }
+ while (SciFunctions.sum(done).v < Mat.numThreads) {Thread.`yield`()}
+ } else {
+ DDShelper(a, b, c, out, 0, c.ncols, ioff)
+ }
+ out
+ }
+ }
+
+ def DDS(a:GMat,b:GMat,c:GSMat,omat:Mat):GSMat = GMat.DDS(a,b,c,omat)
+
+ def DDS(a:Mat, b:Mat, c:Mat, omat:Mat=null):Mat = {
+ (a, b, c) match {
+ case (a:FMat, b:FMat, c:SMat) => DDS(a, b, c, omat):SMat
+ case (a:GMat, b:GMat, c:GSMat) => GMat.DDS(a, b, c, omat):GSMat
+ }
+ }
+
+ def DDSQ(a:FMat,b:FMat,c:SMat, veps:Float):SMat = {
+ if (a.nrows != b.nrows) {
+ throw new RuntimeException("nrows of dense A and B must match")
+ } else if (c.nrows != a.ncols || c.ncols != b.ncols) {
+ throw new RuntimeException("dims of C must match A'*B")
+ } else {
+ val out = SMat(c.nrows,c.ncols,c.nnz)
+ Mat.nflops += c.nnz * a.nrows
+ val ioff = Mat.ioneBased
+ var i = 0
+ out.jc(0) = ioff
+ while (i < c.ncols) {
+ var j = c.jc(i)-ioff
+ while (j < c.jc(i+1)-ioff) {
+ var dsum = 0.0f
+ var k = 0
+ val a0 = (c.ir(j)-ioff)*a.nrows
+ val b0 = i*a.nrows
+ if (Mat.noMKL) {
+ while (k < a.nrows) {
+ dsum += a.data(k + a0) * b.data(k + b0)
+ k += 1
+ }
+ } else {
+ dsum = sdotxx(a.nrows, a.data, a0, b.data, b0)
+ }
+ out.data(j) = dsum / math.max(veps, dsum)
+ out.ir(j) = c.ir(j)
+ j += 1
+ }
+ out.jc(i+1) = c.jc(i+1)
+ i += 1
+ }
+ out
+ }
+ }
+
+ def mkdiag(a:DMat) = DMat(a.mkdiag)
+ def mkdiag(a:FMat) = FMat(a.mkdiag)
+ def mkdiag(a:IMat) = IMat(a.mkdiag)
+ def mkdiag(a:CMat) = CMat(a.mkdiag)
+
+ def getdiag(a:DMat) = DMat(a.getdiag)
+ def getdiag(a:FMat) = FMat(a.getdiag)
+ def getdiag(a:IMat) = IMat(a.getdiag)
+ def getdiag(a:CMat) = CMat(a.getdiag)
+
+ def load[T](fname:String, vname:String):T = MatHDF5.hload(fname, vname).asInstanceOf[T]
+
+ def load[A,B](fname:String, v1:String, v2:String):(A,B) = {
+ val a = MatHDF5.hload(fname, List(v1, v2));
+ (a(0).asInstanceOf[A], a(1).asInstanceOf[B])
+ }
+
+ def loadx(fname:String, vnames:String*):List[AnyRef] = MatHDF5.hload(fname, vnames.toList)
+
+ def saveAsHDF5(fname:String, args:AnyRef*) = MatHDF5.hsaveAsHDF5(fname, args.toList)
+
+ def saveAs(fname:String, args:AnyRef*) = MatHDF5.hsaveAs(fname, args.toList)
+
+ final val ? = new IMatWildcard
+}
+
+
diff --git a/src/main/scala/BIDMat/MatHDF5.scala b/src/main/scala/BIDMat/MatHDF5.scala
new file mode 100755
index 00000000..c6fe3f0a
--- /dev/null
+++ b/src/main/scala/BIDMat/MatHDF5.scala
@@ -0,0 +1,510 @@
+package BIDMat
+import ncsa.hdf.hdf5lib.structs._
+import ncsa.hdf.hdf5lib.H5._
+import ncsa.hdf.hdf5lib.HDF5Constants._
+
+object MatHDF5 {
+ var refcount:Long = -1
+
+ def setCompressionPlist(dplist_id:Int, dims:Array[Long]) = {
+ if (Mat.compressType > 0) {
+ if (dims.length == 1) {
+ if (dims(0) > 1024) {
+ val cdims = new Array[Long](1)
+ cdims(0) = math.max(1, math.min(dims(0), Mat.chunkSize))
+ H5Pset_chunk(dplist_id, 1, cdims)
+ if (Mat.compressType == 1) {
+ H5Pset_deflate(dplist_id, Mat.compressionLevel)
+ } else {
+ H5Pset_szip(dplist_id, H5_SZIP_EC_OPTION_MASK, Mat.szipBlock)
+ }
+ }
+ } else {
+ if (dims(0)*dims(1) > 1024) {
+ val cdims = new Array[Long](2)
+ cdims(0) = math.max(1, math.min(dims(0), 1+Mat.chunkSize/dims(1)))
+ cdims(1) = math.max(1, dims(1))
+ if (Mat.compressType == 1) {
+ H5Pset_deflate(dplist_id, Mat.compressionLevel)
+ } else {
+ H5Pset_szip(dplist_id, H5_SZIP_EC_OPTION_MASK, Mat.szipBlock)
+ }
+ }
+ }
+ }
+ }
+
+ def getStringAttr(id:Int, obj_name:String, attr_name:String):String = {
+ val attr_id = H5Aopen_by_name(id, obj_name, attr_name, H5P_DEFAULT, H5P_DEFAULT)
+ val attr_type_id = H5Aget_type(attr_id)
+ val attr_type_size = H5Tget_size(attr_type_id)
+ val sbuf = new Array[Byte](attr_type_size + 1)
+ H5Aread(attr_id, attr_type_id, sbuf)
+ H5Tclose(attr_type_id)
+ H5Aclose(attr_id)
+ new String(sbuf).trim()
+ }
+
+ def putStringAttr(id:Int, attr_name:String, attr_val:String) = {
+ val space_id = H5Screate(H5S_SCALAR)
+ val memtype_id = H5Tcopy(H5T_FORTRAN_S1)
+ H5Tset_size(memtype_id, attr_val.length())
+ val attr_id = H5Acreate(id, attr_name, memtype_id, space_id, H5P_DEFAULT, H5P_DEFAULT)
+ H5Awrite(attr_id, memtype_id, attr_val.getBytes())
+ H5Tclose(memtype_id)
+ H5Aclose(attr_id)
+ H5Sclose(space_id)
+ }
+
+ def getLongAttr(id:Int, obj_name:String, attr_name:String):Long = {
+ val attr_id = H5Aopen_by_name(id, obj_name, attr_name, H5P_DEFAULT, H5P_DEFAULT)
+ val attr_type_id = H5Aget_type(attr_id)
+ val attr_type_size = H5Tget_size(attr_type_id)
+ val sbuf = new Array[Long](attr_type_size/8)
+ H5Aread(attr_id, attr_type_id, sbuf)
+ H5Tclose(attr_type_id)
+ H5Aclose(attr_id)
+ sbuf(0)
+ }
+
+ def putIntAttr(id:Int, attr_name:String, attr_val:Int) = {
+ val space_id = H5Screate(H5S_SCALAR)
+ val attr_id = H5Acreate(id, attr_name, H5T_NATIVE_INT, space_id, H5P_DEFAULT, H5P_DEFAULT)
+ val lbuf = Array[Int](1)
+ lbuf(0) = attr_val
+ H5Awrite(attr_id, H5T_NATIVE_INT, lbuf)
+ H5Aclose(attr_id)
+ H5Sclose(space_id)
+ }
+
+ def putLongAttr(id:Int, attr_name:String, attr_val:Long) = {
+ val space_id = H5Screate(H5S_SCALAR)
+ val attr_id = H5Acreate(id, attr_name, H5T_NATIVE_LLONG, space_id, H5P_DEFAULT, H5P_DEFAULT)
+ val lbuf = Array[Long](1)
+ lbuf(0) = attr_val
+ H5Awrite(attr_id, H5T_NATIVE_LLONG, lbuf)
+ H5Aclose(attr_id)
+ H5Sclose(space_id)
+ }
+
+ def putByteAttr(id:Int, attr_name:String, attr_val:Byte) = {
+ val space_id = H5Screate(H5S_SCALAR)
+ val attr_id = H5Acreate(id, attr_name, H5T_NATIVE_UCHAR, space_id, H5P_DEFAULT, H5P_DEFAULT)
+ val lbuf = Array[Byte](1)
+ lbuf(0) = attr_val
+ H5Awrite(attr_id, H5T_NATIVE_UCHAR, lbuf)
+ H5Aclose(attr_id)
+ H5Sclose(space_id)
+ }
+
+ def getMatDims(data_id:Int):Array[Long] = {
+ val space_id = H5Dget_space(data_id)
+ val dims = new Array[Long](2)
+ val ok = H5Sget_simple_extent_dims(space_id, dims, null)
+ H5Sclose(space_id)
+ dims
+ }
+
+ def readMatDims(fname:String, varname:String):(Long, Long) = {
+ val fid = H5Fopen(fname,H5F_ACC_RDONLY,H5P_DEFAULT)
+ val data_id = H5Dopen(fid, varname, H5P_DEFAULT)
+ val dims = getMatDims(data_id)
+ H5Dclose(data_id)
+ H5Fclose(fid)
+ if (dims(1) == 0) {
+ (dims(0), dims(1))
+ } else {
+ (dims(1), dims(0))
+ }
+ }
+
+ def getDenseMat[T : ClassManifest](fid:Int, varname:String, h5class:Int, dsize:Int):DenseMat[T] = {
+ val data_id = H5Dopen(fid, varname, H5P_DEFAULT)
+ val data_type_id = H5Dget_type(data_id)
+ val data_class = H5Tget_class(data_type_id)
+ val data_size = H5Tget_size(data_type_id)
+ val dims = getMatDims(data_id)
+ var mdata:DenseMat[T] = null
+ if (data_class == h5class && data_size == dsize) {
+ mdata = new DenseMat[T](dims(1).intValue, dims(0).intValue)
+ H5Dread(data_id, data_type_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, mdata.data)
+ } else {
+ throw new RuntimeException("Bad class or data size "+data_class+" "+data_size)
+ }
+ H5Tclose(data_type_id)
+ H5Dclose(data_id)
+ mdata
+ }
+
+ def getCellMat(fid:Int, varname:String):CSMat = {
+ val data_id = H5Dopen(fid, varname, H5P_DEFAULT)
+ val data_type_id = H5Dget_type(data_id)
+ val data_class = H5Tget_class(data_type_id)
+ val data_size = H5Tget_size(data_type_id)
+ val dims = getMatDims(data_id)
+ var mdata:CSMat = null
+ mdata = CSMat(dims(1).intValue, dims(0).intValue)
+ val bdata = new Array[Array[Byte]]((dims(0)*dims(1)).intValue)
+ for (i <- 0 until bdata.length) {
+ bdata(i) = new Array[Byte](data_size)
+ }
+ H5Dread(data_id, data_type_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, bdata)
+ val obj_type_out = new Array[Int](1)
+ obj_type_out(0) = 1
+ for (i <- 0 until bdata.length) {
+ val item_id = H5Rdereference(fid, H5R_OBJECT, bdata(i))
+ mdata.data(i) = getMat(item_id, ".").asInstanceOf[String]
+ H5Oclose(item_id)
+ }
+ H5Tclose(data_type_id)
+ H5Dclose(data_id)
+ mdata
+ }
+
+ def getMatString(fid:Int, varname:String):String = {
+ val data_id = H5Dopen(fid, varname, H5P_DEFAULT)
+ val data_type_id = H5Dget_type(data_id)
+ val data_class = H5Tget_class(data_type_id)
+ val data_size = H5Tget_size(data_type_id)
+ val dims = getMatDims(data_id)
+ val nrows = dims(0).intValue
+ val ncols = dims(1).intValue
+ val sbuf = new Array[Byte](data_size*nrows*ncols)
+ H5Dread(data_id, data_type_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, sbuf)
+ H5Tclose(data_type_id)
+ H5Dclose(data_id)
+ new String(sbuf, "UTF_16LE").trim()
+ }
+
+ def getSparseMat[T](fid:Int, varname:String)(implicit manifest:Manifest[T], numeric:Numeric[T]):SparseMat[T] = {
+ val nrows = getLongAttr(fid, varname, "MATLAB_sparse").intValue
+ val jc_id = H5Dopen(fid, varname+"/jc", H5P_DEFAULT)
+ val ncols = getMatDims(jc_id)(0).intValue - 1
+ val data_id = H5Dopen(fid, varname+"/data", H5P_DEFAULT)
+ val data_type_id = H5Dget_type(data_id)
+ val nnz = getMatDims(data_id)(0).intValue
+ var ir_id = -1
+ try {
+ ir_id = H5Dopen(fid, varname+"/ir", H5P_DEFAULT)
+ } catch {
+ case _ => {}
+ }
+ val sdata = if (ir_id >= 0) {
+ SparseMat(nrows, ncols, nnz)
+ } else {
+ SparseMat.noRows(nrows, ncols, nnz)
+ }
+ val convert_ints = H5Tcopy(H5T_NATIVE_INT)
+ H5Dread_int(jc_id, convert_ints, H5S_ALL, H5S_ALL, H5P_DEFAULT, sdata.jc)
+ addOne(sdata.jc)
+ H5Dclose(jc_id)
+ if (ir_id >= 0) {
+ H5Dread_int(ir_id, convert_ints, H5S_ALL, H5S_ALL, H5P_DEFAULT, sdata.ir)
+ addOne(sdata.ir)
+ H5Dclose(ir_id)
+ }
+ H5Tclose(convert_ints)
+ H5Dread(data_id, data_type_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, sdata.data)
+ H5Tclose(data_type_id)
+ H5Dclose(data_id)
+ sdata
+ }
+
+ def getMat(fid:Int, varname:String):AnyRef = {
+ if (fid > 0 && H5Aexists_by_name(fid, varname, "MATLAB_class", H5P_DEFAULT)) {
+ val attr_class = getStringAttr(fid, varname, "MATLAB_class")
+ if (attr_class.equals("double")) {
+ if (H5Aexists_by_name(fid, varname, "MATLAB_sparse", H5P_DEFAULT)) {
+ SDMat(getSparseMat[Double](fid, varname))
+ } else {
+ DMat(getDenseMat[Double](fid, varname, H5T_FLOAT, 8))
+ }
+ } else if (attr_class.equals("single")) {
+ if (H5Aexists_by_name(fid, varname, "MATLAB_sparse", H5P_DEFAULT)) {
+ SMat(getSparseMat[Float](fid, varname))
+ } else {
+ FMat(getDenseMat[Float](fid, varname, H5T_FLOAT, 4))
+ }
+ } else if (attr_class.equals("int32")) {
+ if (H5Aexists_by_name(fid, varname, "MATLAB_sparse", H5P_DEFAULT)) {
+ throw new RuntimeException("Sparse arrays of ints unsupported")
+ } else {
+ IMat(getDenseMat[Int](fid, varname, H5T_INTEGER, 4))
+ }
+ } else if (attr_class.equals("int8")) {
+ if (H5Aexists_by_name(fid, varname, "MATLAB_sparse", H5P_DEFAULT)) {
+ BMat(getSparseMat[Byte](fid, varname))
+ } else {
+ throw new RuntimeException("Dense arrays of bytes unsupported")
+ }
+ } else if (attr_class.equals("char")) {
+ if (H5Aexists_by_name(fid, varname, "MATLAB_sparse", H5P_DEFAULT)) {
+ throw new RuntimeException("Sparse arrays of char unsupported")
+ } else {
+ getMatString(fid, varname)
+ }
+ } else if (attr_class.equals("cell")) {
+ if (H5Aexists_by_name(fid, varname, "MATLAB_sparse", H5P_DEFAULT)) {
+ throw new RuntimeException("Sparse cell arrays unsupported")
+ } else {
+ getCellMat(fid, varname)
+ }
+ } else throw new RuntimeException("Couldnt read storage class "+attr_class)
+ } else throw new RuntimeException("Couldnt find matlab var named "+varname)
+ }
+
+ def writeMatHeader(fname:String) = {
+ val ff = new java.io.RandomAccessFile(fname,"rws")
+ val sp = new scala.sys.SystemProperties()
+ val hstring = "MATLAB 7.3 MAT-file, Platform: "+sp.get("os.arch").get+" "+sp.get("os.name").get+" "+sp.get("os.version").get+ " "+
+ "Created by BIDMat on "+(new java.text.SimpleDateFormat("EEE MMM d HH:mm:ss yyyy")).format(new java.util.Date())+
+ " HDF5 Schema 1.0 ."
+ val hb = hstring.getBytes()
+ val hbytes = new Array[Byte](512)
+ for (i <- 0 until 116) hbytes(i) = 32
+ System.arraycopy(hb, 0, hbytes, 0, math.min(hstring.length(), 116))
+ val version:Byte = 2
+ hbytes(125) = version
+ hbytes(126) = 0x49
+ hbytes(127) = 0x4D
+ ff.write(hbytes)
+ // ff.write(emptyHDF5file)
+ ff.close()
+ }
+
+ def putDenseMat[T](fid:Int, a:DenseMat[T], aname:String, h5class:Int, matclass:String):Array[Byte] = {
+ val dims = new Array[Long](2)
+ dims(0) = a.ncols
+ dims(1) = a.nrows
+ val filespace_id = H5Screate_simple(2, dims, null)
+ val dplist_id = H5Pcreate(H5P_DATASET_CREATE)
+// setCompressionPlist(dplist_id, dims)
+ val dataset_id = H5Dcreate(fid, "/"+aname, h5class, filespace_id, H5P_DEFAULT, dplist_id, H5P_DEFAULT)
+ H5Dwrite(dataset_id, h5class, H5S_ALL, H5S_ALL, H5P_DEFAULT, a.data)
+ H5Pclose(dplist_id)
+ putStringAttr(dataset_id, "MATLAB_class", matclass)
+ val ref = H5Rcreate(dataset_id, ".", H5R_OBJECT, -1)
+ H5Dclose(dataset_id)
+ H5Sclose(filespace_id)
+ ref
+ }
+
+ def putEmptyRef(id:Int):Array[Byte] = {
+ val dims = new Array[Long](1)
+ dims(0) = 2
+ val tmp = Array[Long](2)
+ val dmatspace_id = H5Screate_simple(1, dims, null)
+ val dmat_id = H5Dcreate(id, "0", H5T_NATIVE_ULLONG, dmatspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)
+ H5Dwrite(dmat_id, H5T_NATIVE_ULLONG, H5S_ALL, H5S_ALL, H5P_DEFAULT, tmp)
+ putStringAttr(dmat_id, "MATLAB_class", "canonical empty")
+ putByteAttr(dmat_id, "MATLAB_empty", 1)
+ val ref = H5Rcreate(dmat_id, ".", H5R_OBJECT, -1)
+ H5Dclose(dmat_id)
+ H5Sclose(dmatspace_id)
+ ref
+ }
+
+ def putCellMat(fid:Int, varname:String, a:CSMat) = {
+ var group_id = 0
+ if (refcount < 0) {
+ group_id = H5Gcreate(fid, "/#refs#", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)
+ putEmptyRef(group_id)
+ refcount = 1
+ } else {
+ group_id = H5Gopen(fid, "/#refs#", H5P_DEFAULT)
+ }
+ val refIds = new Array[Array[Byte]](a.length)
+ for (i <- 0 until a.length) {
+ val newname = "%x" format refcount
+ refcount += 1
+ refIds(i) = putMat(group_id, a.data(i), newname)
+ }
+ val dims = new Array[Long](2)
+ dims(0) = a.ncols
+ dims(1) = a.nrows
+ val dplist_id = H5Pcreate(H5P_DATASET_CREATE)
+ setCompressionPlist(dplist_id, dims)
+ val refspace_id = H5Screate_simple(2, dims, null)
+ val refs_id = H5Dcreate(fid, varname, H5T_STD_REF_OBJ, refspace_id, H5P_DEFAULT, dplist_id, H5P_DEFAULT)
+ H5Dwrite(refs_id, H5T_STD_REF_OBJ, H5S_ALL, H5S_ALL, H5P_DEFAULT, refIds)
+ putStringAttr(refs_id, "MATLAB_class", "cell")
+ val ref = H5Rcreate(refs_id, ".", H5R_OBJECT, -1)
+ H5Dclose(refs_id)
+ H5Sclose(refspace_id)
+ H5Pclose(dplist_id)
+ H5Gclose(group_id)
+ ref
+ }
+
+ def putSparseMat[T](fid:Int, a:SparseMat[T], varname:String, nativeClass:Int, className:String):Array[Byte] = {
+ val dims = new Array[Long](1)
+ val group_id = H5Gcreate(fid, "/"+varname, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)
+ putStringAttr(group_id, "MATLAB_class", className)
+ putLongAttr(group_id, "MATLAB_sparse", a.nrows)
+ val convert_ints = H5Tcopy(H5T_NATIVE_INT)
+ dims(0) = a.ncols + 1
+ var dplist_id = H5Pcreate(H5P_DATASET_CREATE)
+ setCompressionPlist(dplist_id, dims)
+ val jcs_id = H5Screate_simple(1, dims, null)
+ val jc_id = H5Dcreate(group_id, "jc", H5T_NATIVE_LLONG, jcs_id, H5P_DEFAULT, dplist_id, H5P_DEFAULT)
+ subOne(a.jc)
+ try {
+ H5Dwrite(jc_id, convert_ints, H5S_ALL, H5S_ALL, H5P_DEFAULT, a.jc)
+ } catch {
+ case e => {
+ addOne(a.jc)
+ throw new RuntimeException("Error writing sparse mat "+e)
+ }
+ }
+ addOne(a.jc)
+ H5Dclose(jc_id)
+ H5Sclose(jcs_id)
+ H5Pclose(dplist_id)
+
+ dims(0) = a.nnz
+ dplist_id = H5Pcreate(H5P_DATASET_CREATE)
+ setCompressionPlist(dplist_id, dims)
+ if (a.ir != null) {
+ val irs_id = H5Screate_simple(1, dims, null)
+ val ir_id = H5Dcreate(group_id, "ir", H5T_NATIVE_LLONG, irs_id, H5P_DEFAULT, dplist_id, H5P_DEFAULT)
+ subOne(a.ir)
+ try {
+ H5Dwrite(ir_id, convert_ints, H5S_ALL, H5S_ALL, H5P_DEFAULT, a.ir)
+ } catch {
+ case e => {
+ addOne(a.ir)
+ throw new RuntimeException("Error writing sparse mat "+e)
+ }
+ }
+ addOne(a.ir)
+ H5Dclose(ir_id)
+ H5Sclose(irs_id)
+ }
+
+ val dataspace_id = H5Screate_simple(1, dims, null)
+ val data_id = H5Dcreate(group_id, "data", nativeClass, dataspace_id, H5P_DEFAULT, dplist_id, H5P_DEFAULT)
+ H5Dwrite(data_id, nativeClass, H5S_ALL, H5S_ALL, H5P_DEFAULT, a.data)
+ H5Dclose(data_id)
+ H5Sclose(dataspace_id)
+ H5Pclose(dplist_id)
+ H5Tclose(convert_ints)
+ val ref = H5Rcreate(group_id, ".", H5R_OBJECT, -1)
+ H5Gclose(group_id)
+ ref
+ }
+
+ def putMatString(id:Int, varname:String, str:String):Array[Byte] = {
+ val dims = new Array[Long](2)
+ dims(0) = str.length
+ dims(1) = 1
+ val dplist_id = H5Pcreate(H5P_DATASET_CREATE)
+ setCompressionPlist(dplist_id, dims)
+ val sbytes = str.getBytes("UTF_16LE")
+ val strspace_id = H5Screate_simple(2, dims, null)
+ val str_id = H5Dcreate(id, varname, H5T_NATIVE_USHORT, strspace_id, H5P_DEFAULT, dplist_id, H5P_DEFAULT)
+ putStringAttr(str_id, "MATLAB_class", "char")
+ putIntAttr(str_id, "MATLAB_int_decode", 2)
+ H5Dwrite(str_id, H5T_NATIVE_USHORT, H5S_ALL, H5S_ALL, H5P_DEFAULT, sbytes)
+ val ref = H5Rcreate(str_id, ".", H5R_OBJECT, -1)
+ H5Dclose(str_id)
+ H5Sclose(strspace_id)
+ H5Pclose(dplist_id)
+ ref
+ }
+
+ def putMat(fid:Int, a:AnyRef, aname:String):Array[Byte] = {
+ a match {
+ case aa:DMat => putDenseMat[Double](fid, aa, aname, H5T_NATIVE_DOUBLE, "double")
+ case aa:FMat => putDenseMat[Float](fid, aa, aname, H5T_NATIVE_FLOAT, "single")
+ case aa:IMat => putDenseMat[Int](fid, aa, aname, H5T_NATIVE_INT, "int32")
+ case aa:BMat => putSparseMat[Byte](fid, aa, aname, H5T_NATIVE_CHAR, "int8")
+ case aa:SMat => putSparseMat[Float](fid, aa, aname, H5T_NATIVE_FLOAT, "single")
+ case aa:SDMat => putSparseMat[Double](fid, aa, aname, H5T_NATIVE_DOUBLE, "double")
+ case aa:CSMat => putCellMat(fid, aname, aa)
+ case aa:String => putMatString(fid, aname, aa)
+ case _ => throw new RuntimeException("unsupported matrix type to save")
+ }
+ }
+
+ def hload(fname:String, vname:String):AnyRef = {
+ val fapl = H5Pcreate(H5P_FILE_ACCESS)
+// H5Pset_fapl_core(fapl, 16*1024*1024, false); println("core driver")
+ H5Pset_fapl_stdio(fapl); //println("stdio driver")
+ val fid = H5Fopen(fname,H5F_ACC_RDONLY,fapl)
+ H5Pclose(fapl)
+ val mat = getMat(fid, vname)
+ H5Fclose(fid)
+ mat
+ }
+
+ def hload(fname:String, vnames:List[String]):List[AnyRef] = {
+ val fapl = H5Pcreate(H5P_FILE_ACCESS)
+// H5Pset_fapl_core(fapl, 32*1024*1024, false); println("core driver")
+ H5Pset_fapl_stdio(fapl); //println("stdio driver")
+ val fid = H5Fopen(fname,H5F_ACC_RDONLY,fapl)
+ H5Pclose(fapl)
+ val mats = vnames.map((vname) => getMat(fid, vname))
+ H5Fclose(fid)
+ mats
+ }
+
+ def hsaveAsHDF5(fname:String, args:List[AnyRef]) = {
+ refcount = -1
+ val fapl_id = H5Pcreate (H5P_FILE_ACCESS)
+ H5Pset_fapl_stdio(fapl_id)
+ val fid = H5Fcreate(fname, H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id)
+ H5Pclose(fapl_id)
+ saveAsImpl(fid, args)
+ H5Fclose(fid)
+ }
+
+ def hsaveAs(fname:String, args:List[AnyRef]) = {
+ refcount = -1
+ val fapl_id = H5Pcreate (H5P_FILE_ACCESS)
+ H5Pset_fapl_stdio(fapl_id)
+ val fcplist_id = H5Pcreate(H5P_FILE_CREATE)
+ H5Pset_userblock(fcplist_id, 512)
+ val fid = H5Fcreate(fname, H5F_ACC_TRUNC, fcplist_id, fapl_id)
+ H5Pclose(fcplist_id)
+ H5Pclose(fapl_id)
+ saveAsImpl(fid, args)
+ H5Fclose(fid)
+ writeMatHeader(fname)
+ }
+
+ def saveAsImpl(fid:Int, argList:List[AnyRef]) = {
+ var i = 0
+ while (i < argList.length) {
+ argList(i) match {
+ case a:Mat => {
+ argList(i+1) match {
+ case str:String => putMat(fid, a, str)
+ case _ => throw new RuntimeException("odd numbered args must be String variable names")
+ }
+ }
+ case _ => throw new RuntimeException("even numbered args must be Mat variables")
+ }
+ i += 2
+ }
+ }
+
+ def addOne(ii:Array[Int]) = {
+ if (Mat.ioneBased == 1) {
+ var i = 0
+ while (i < ii.length) {
+ ii(i) += 1
+ i += 1
+ }
+ }
+ }
+
+ def subOne(ii:Array[Int]) = {
+ if (Mat.ioneBased == 1) {
+ var i = 0
+ while (i < ii.length) {
+ ii(i) = ii(i) - 1
+ i += 1
+ }
+ }
+ }
+}
diff --git a/src/main/scala/BIDMat/MySorting.scala b/src/main/scala/BIDMat/MySorting.scala
new file mode 100755
index 00000000..e881fe11
--- /dev/null
+++ b/src/main/scala/BIDMat/MySorting.scala
@@ -0,0 +1,497 @@
+package BIDMat
+
+import scala.reflect.ClassManifest
+import scala.math.Ordering
+import scala.actors.Actor._
+
+object Sorting {
+
+ def quickSort2[T](ga:Array[T], ii:Array[Int], lo:Int, hi:Int, stride:Int):Unit = {
+ ga match {
+ case a:Array[Float] => quickSort2(a, ii, lo, hi, stride, Mat.numThreads/2)
+ case a:Array[Double] => quickSort2(a, ii, lo, hi, stride, Mat.numThreads/2)
+ case a:Array[Int] => quickSort2(a, ii, lo, hi, stride, Mat.numThreads/2)
+ }
+ }
+
+ def quickSort2(a:Array[Float], ii:Array[Int], lo:Int, hi:Int, stride:Int, nthreads:Int):Unit = {
+ if ((hi - lo)/stride > 0) {
+ if ((hi - lo)/stride <= 16) {
+ isort(a, ii, lo, hi, stride)
+ } else {
+ val ip = partition(a, ii, lo, hi, stride)
+ if (nthreads > 1 && (hi-lo)/stride > 400) {
+ var done0 = false
+ var done1 = false
+ actor { quickSort2(a, ii, lo, ip, stride, nthreads/2); done0 = true }
+ actor { quickSort2(a, ii, ip, hi, stride, nthreads/2); done1 = true }
+ while (!done0 || !done1) {Thread.`yield`}
+ } else {
+ quickSort2(a, ii, lo, ip, stride, nthreads/2)
+ quickSort2(a, ii, ip, hi, stride, nthreads/2)
+ }
+ }
+ }
+ }
+
+ def isort(a:Array[Float], ii:Array[Int], lo:Int, hi:Int, stride:Int):Unit = {
+ var i = lo
+ while (i != hi) {
+ var j = i+stride
+ var imin = i
+ var vmin = a(i)
+ while (j != hi) {
+ if (a(j) <= vmin && ((a(j) < vmin) || ii(j) < ii(imin))) {
+ vmin = a(j)
+ imin = j
+ }
+ j += stride
+ }
+ a(imin) = a(i)
+ a(i) = vmin
+ val itmp = ii(imin)
+ ii(imin) = ii(i)
+ ii(i) = itmp
+ i += stride
+ }
+ }
+
+ def med3(a:Array[Float], ii:Array[Int], lo:Int, hi:Int, stride:Int):Int = {
+ val nv = (hi - lo)/stride
+ val i1 = lo + stride*(math.floor(nv*java.lang.Math.random()).asInstanceOf[Int])
+ val i2 = lo + stride*(math.floor(nv*java.lang.Math.random()).asInstanceOf[Int])
+ val i3 = lo + stride*(math.floor(nv*java.lang.Math.random()).asInstanceOf[Int])
+ val v1 = a(i1)
+ val v2 = a(i2)
+ val v3 = a(i3)
+ val ii1 = ii(i1)
+ val ii2 = ii(i2)
+ val ii3 = ii(i3)
+ if ((v2 >= v1) && ((v2 > v1) || ii2 > ii1)) {
+ if ((v3 >= v2) && ((v3 > v2) || ii3 > ii2)) i2 else {
+ if ((v3 >= v1) && ((v3 > v1) || ii3 > ii1)) i3 else i1
+ }
+ } else {
+ if ((v3 >= v1) && ((v3 > v1) || ii3 > ii1)) i1 else {
+ if ((v3 >= v2) && ((v3 > v2) || ii3 > ii2)) i3 else i2
+ }
+ }
+ }
+
+ def med9(a:Array[Float], ii:Array[Int], lo:Int, hi:Int, stride:Int):Int = {
+ val i1 = med3(a, ii, lo, hi, stride)
+ val i2 = med3(a, ii, lo, hi, stride)
+ val i3 = med3(a, ii, lo, hi, stride)
+ val v1 = a(i1)
+ val v2 = a(i2)
+ val v3 = a(i3)
+ val ii1 = ii(i1)
+ val ii2 = ii(i2)
+ val ii3 = ii(i3)
+ if ((v2 >= v1) && ((v2 > v1) || ii2 > ii1)) {
+ if ((v3 >= v2) && ((v3 > v2) || ii3 > ii2)) i2 else {
+ if ((v3 >= v1) && ((v3 > v1) || ii3 > ii1)) i3 else i1
+ }
+ } else {
+ if ((v3 >= v1) && ((v3 > v1) || ii3 > ii1)) i1 else {
+ if ((v3 >= v2) && ((v3 > v2) || ii3 > ii2)) i3 else i2
+ }
+ }
+ }
+
+ def partition(a:Array[Float], ii:Array[Int], lo:Int, hi:Int, stride:Int):Int = {
+ val sstride = math.signum(stride)
+ val nvals = (hi - lo)/stride
+ val im = if (nvals > 600) {
+ med9(a, ii, lo, hi, stride)
+ } else if (nvals > 100) {
+ med3(a, ii, lo, hi, stride)
+ } else {
+ lo + stride*(math.floor(nvals*java.lang.Math.random()).asInstanceOf[Int])
+ }
+ var v = a(im)
+ var iv = ii(im)
+ var done = false
+ var i = lo - stride
+ var j = hi
+ while (! done) {
+ i += stride
+ j -= stride
+ while ((hi-i)*sstride > sstride*stride && ((a(i) <= v) && ((a(i) < v) || ii(i) <= iv))) {i += stride}
+ while ( ((a(j) >= v) && ((a(j) > v) || ii(j) > iv))) {j -= stride}
+ if ((i - j)*sstride >= 0) {
+ done = true
+ } else {
+ val atmp = a(i)
+ a(i) = a(j)
+ a(j) = atmp
+ val itmp = ii(i)
+ ii(i) = ii(j)
+ ii(j) = itmp
+ }
+ }
+ j + stride
+ }
+
+ def quickSort2(a:Array[Double], ii:Array[Int], lo:Int, hi:Int, stride:Int, nthreads:Int):Unit = {
+ if ((hi - lo)/stride > 0) {
+ if ((hi - lo)/stride <= 16) {
+ isort(a, ii, lo, hi, stride)
+ } else {
+ val ip = partition(a, ii, lo, hi, stride)
+ if (nthreads > 1 && (hi-lo)/stride > 400) {
+ var done0 = false
+ var done1 = false
+ actor { quickSort2(a, ii, lo, ip, stride, nthreads/2); done0 = true }
+ actor { quickSort2(a, ii, ip, hi, stride, nthreads/2); done1 = true }
+ while (!done0 || !done1) {Thread.`yield`}
+ } else {
+ quickSort2(a, ii, lo, ip, stride, nthreads/2)
+ quickSort2(a, ii, ip, hi, stride, nthreads/2)
+ }
+ }
+ }
+ }
+
+ def isort(a:Array[Double], ii:Array[Int], lo:Int, hi:Int, stride:Int):Unit = {
+ var i = lo
+ while (i != hi) {
+ var j = i+stride
+ var imin = i
+ var vmin = a(i)
+ while (j != hi) {
+ if (a(j) <= vmin && ((a(j) < vmin) || ii(j) < ii(imin))) {
+ vmin = a(j)
+ imin = j
+ }
+ j += stride
+ }
+ a(imin) = a(i)
+ a(i) = vmin
+ val itmp = ii(imin)
+ ii(imin) = ii(i)
+ ii(i) = itmp
+ i += stride
+ }
+ }
+
+ def med3(a:Array[Double], ii:Array[Int], lo:Int, hi:Int, stride:Int):Int = {
+ val nv = (hi - lo)/stride
+ val i1 = lo + stride*(math.floor(nv*java.lang.Math.random()).asInstanceOf[Int])
+ val i2 = lo + stride*(math.floor(nv*java.lang.Math.random()).asInstanceOf[Int])
+ val i3 = lo + stride*(math.floor(nv*java.lang.Math.random()).asInstanceOf[Int])
+ val v1 = a(i1)
+ val v2 = a(i2)
+ val v3 = a(i3)
+ val ii1 = ii(i1)
+ val ii2 = ii(i2)
+ val ii3 = ii(i3)
+ if ((v2 >= v1) && ((v2 > v1) || ii2 > ii1)) {
+ if ((v3 >= v2) && ((v3 > v2) || ii3 > ii2)) i2 else {
+ if ((v3 >= v1) && ((v3 > v1) || ii3 > ii1)) i3 else i1
+ }
+ } else {
+ if ((v3 >= v1) && ((v3 > v1) || ii3 > ii1)) i1 else {
+ if ((v3 >= v2) && ((v3 > v2) || ii3 > ii2)) i3 else i2
+ }
+ }
+ }
+
+ def med9(a:Array[Double], ii:Array[Int], lo:Int, hi:Int, stride:Int):Int = {
+ val i1 = med3(a, ii, lo, hi, stride)
+ val i2 = med3(a, ii, lo, hi, stride)
+ val i3 = med3(a, ii, lo, hi, stride)
+ val v1 = a(i1)
+ val v2 = a(i2)
+ val v3 = a(i3)
+ val ii1 = ii(i1)
+ val ii2 = ii(i2)
+ val ii3 = ii(i3)
+ if ((v2 >= v1) && ((v2 > v1) || ii2 > ii1)) {
+ if ((v3 >= v2) && ((v3 > v2) || ii3 > ii2)) i2 else {
+ if ((v3 >= v1) && ((v3 > v1) || ii3 > ii1)) i3 else i1
+ }
+ } else {
+ if ((v3 >= v1) && ((v3 > v1) || ii3 > ii1)) i1 else {
+ if ((v3 >= v2) && ((v3 > v2) || ii3 > ii2)) i3 else i2
+ }
+ }
+ }
+
+ def partition(a:Array[Double], ii:Array[Int], lo:Int, hi:Int, stride:Int):Int = {
+ val sstride = math.signum(stride)
+ val nvals = (hi - lo)/stride
+ val im = if (nvals > 600) {
+ med9(a, ii, lo, hi, stride)
+ } else if (nvals > 100) {
+ med3(a, ii, lo, hi, stride)
+ } else {
+ lo + stride*(math.floor(nvals*java.lang.Math.random()).asInstanceOf[Int])
+ }
+ var v = a(im)
+ var iv = ii(im)
+ var done = false
+ var i = lo - stride
+ var j = hi
+ while (! done) {
+ i += stride
+ j -= stride
+ while ((hi-i)*sstride > sstride*stride && ((a(i) <= v) && ((a(i) < v) || ii(i) <= iv))) {i += stride}
+ while ( ((a(j) >= v) && ((a(j) > v) || ii(j) > iv))) {j -= stride}
+ if ((i - j)*sstride >= 0) {
+ done = true
+ } else {
+ val atmp = a(i)
+ a(i) = a(j)
+ a(j) = atmp
+ val itmp = ii(i)
+ ii(i) = ii(j)
+ ii(j) = itmp
+ }
+ }
+ j + stride
+ }
+
+
+
+ def quickSort2(a:Array[Int], ii:Array[Int], lo:Int, hi:Int, stride:Int, nthreads:Int):Unit = {
+ if ((hi - lo)/stride > 0) {
+ if ((hi - lo)/stride <= 16) {
+ isort(a, ii, lo, hi, stride)
+ } else {
+ val ip = partition(a, ii, lo, hi, stride)
+ if (nthreads > 1 && (hi-lo)/stride > 400) {
+ var done0 = false
+ var done1 = false
+ actor { quickSort2(a, ii, lo, ip, stride, nthreads/2); done0 = true }
+ actor { quickSort2(a, ii, ip, hi, stride, nthreads/2); done1 = true }
+ while (!done0 || !done1) {Thread.`yield`}
+ } else {
+ quickSort2(a, ii, lo, ip, stride, nthreads/2)
+ quickSort2(a, ii, ip, hi, stride, nthreads/2)
+ }
+ }
+ }
+ }
+
+ def isort(a:Array[Int], ii:Array[Int], lo:Int, hi:Int, stride:Int):Unit = {
+ var i = lo
+ while (i != hi) {
+ var j = i+stride
+ var imin = i
+ var vmin = a(i)
+ while (j != hi) {
+ if (a(j) <= vmin && ((a(j) < vmin) || ii(j) < ii(imin))) {
+ vmin = a(j)
+ imin = j
+ }
+ j += stride
+ }
+ a(imin) = a(i)
+ a(i) = vmin
+ val itmp = ii(imin)
+ ii(imin) = ii(i)
+ ii(i) = itmp
+ i += stride
+ }
+ }
+
+ def med3(a:Array[Int], ii:Array[Int], lo:Int, hi:Int, stride:Int):Int = {
+ val nv = (hi - lo)/stride
+ val i1 = lo + stride*(math.floor(nv*java.lang.Math.random()).asInstanceOf[Int])
+ val i2 = lo + stride*(math.floor(nv*java.lang.Math.random()).asInstanceOf[Int])
+ val i3 = lo + stride*(math.floor(nv*java.lang.Math.random()).asInstanceOf[Int])
+ val v1 = a(i1)
+ val v2 = a(i2)
+ val v3 = a(i3)
+ val ii1 = ii(i1)
+ val ii2 = ii(i2)
+ val ii3 = ii(i3)
+ if ((v2 >= v1) && ((v2 > v1) || ii2 > ii1)) {
+ if ((v3 >= v2) && ((v3 > v2) || ii3 > ii2)) i2 else {
+ if ((v3 >= v1) && ((v3 > v1) || ii3 > ii1)) i3 else i1
+ }
+ } else {
+ if ((v3 >= v1) && ((v3 > v1) || ii3 > ii1)) i1 else {
+ if ((v3 >= v2) && ((v3 > v2) || ii3 > ii2)) i3 else i2
+ }
+ }
+ }
+
+ def med9(a:Array[Int], ii:Array[Int], lo:Int, hi:Int, stride:Int):Int = {
+ val i1 = med3(a, ii, lo, hi, stride)
+ val i2 = med3(a, ii, lo, hi, stride)
+ val i3 = med3(a, ii, lo, hi, stride)
+ val v1 = a(i1)
+ val v2 = a(i2)
+ val v3 = a(i3)
+ val ii1 = ii(i1)
+ val ii2 = ii(i2)
+ val ii3 = ii(i3)
+ if ((v2 >= v1) && ((v2 > v1) || ii2 > ii1)) {
+ if ((v3 >= v2) && ((v3 > v2) || ii3 > ii2)) i2 else {
+ if ((v3 >= v1) && ((v3 > v1) || ii3 > ii1)) i3 else i1
+ }
+ } else {
+ if ((v3 >= v1) && ((v3 > v1) || ii3 > ii1)) i1 else {
+ if ((v3 >= v2) && ((v3 > v2) || ii3 > ii2)) i3 else i2
+ }
+ }
+ }
+
+ def partition(a:Array[Int], ii:Array[Int], lo:Int, hi:Int, stride:Int):Int = {
+ val sstride = math.signum(stride)
+ val nvals = (hi - lo)/stride
+ val im = if (nvals > 600) {
+ med9(a, ii, lo, hi, stride)
+ } else if (nvals > 100) {
+ med3(a, ii, lo, hi, stride)
+ } else {
+ lo + stride*(math.floor(nvals*java.lang.Math.random()).asInstanceOf[Int])
+ }
+ var v = a(im)
+ var iv = ii(im)
+ var done = false
+ var i = lo - stride
+ var j = hi
+ while (! done) {
+ i += stride
+ j -= stride
+ while ((hi-i)*sstride > sstride*stride && ((a(i) <= v) && ((a(i) < v) || ii(i) <= iv))) {i += stride}
+ while ( ((a(j) >= v) && ((a(j) > v) || ii(j) > iv))) {j -= stride}
+ if ((i - j)*sstride >= 0) {
+ done = true
+ } else {
+ val atmp = a(i)
+ a(i) = a(j)
+ a(j) = atmp
+ val itmp = ii(i)
+ ii(i) = ii(j)
+ ii(j) = itmp
+ }
+ }
+ j + stride
+ }
+
+
+ def quickSort[@specialized(Double, Float, Int, Byte) T](a:Array[T])(implicit ord:Ordering[T]) = {
+ def comp(i:Int, j:Int):Int = {
+ ord.compare(a(i),a(j))
+ }
+ def swap(i:Int, j:Int) = {
+ val tmp = a(i)
+ a(i) = a(j)
+ a(j) = tmp
+ }
+ sort1(comp, swap, 0, a.length)
+ }
+
+ def quickSort(comp:(Int,Int)=>Int, swap: (Int,Int) => Unit, start:Int, len:Int) { sort1(comp, swap, start, len) }
+
+ private def sort1(comp: (Int, Int) => Int, swap: (Int,Int) => Unit, off: Int, len: Int) {
+
+ def vecswap(_a: Int, _b: Int, n: Int) {
+ var a = _a
+ var b = _b
+ var i = 0
+ while (i < n) {
+ swap(a, b)
+ i += 1
+ a += 1
+ b += 1
+ }
+ }
+ def med3(a: Int, b: Int, c: Int) = {
+ if (comp(a,b) < 0) {
+ if (comp(b,c) < 0) b else if (comp(a,c) < 0) c else a
+ } else {
+ if (comp(b,c) > 0) b else if (comp(a,c) > 0) c else a
+ }
+ }
+ def sort2(off: Int, len: Int) {
+ if (len < 7) {
+ var i = off
+ while (i < len + off) {
+ var j = i
+ while (j > off && comp(j-1,j) > 0) {
+ swap(j, j-1)
+ j -= 1
+ }
+ i += 1
+ }
+ } else {
+ var m = off + (len >> 1)
+ if (len > 30) {
+ var l = off
+ var n = off + len - 1
+ if (len > 300) {
+ val s = len / 8
+ l = med3(l, l+s, l+2*s)
+ m = med3(m-s, m, m+s)
+ n = med3(n-2*s, n-s, n)
+ }
+ m = med3(l, m, n)
+ }
+
+ var a = off
+ var b = a
+ var c = off + len - 1
+ var d = c
+ var done = false
+ while (!done) {
+ var pp = -1
+ while (b <= c && pp <= 0) {
+ pp = comp(b, m)
+ if (pp == 0) {
+ swap(a, b)
+ m = a
+ a += 1
+ }
+ if (pp <= 0) b += 1
+ }
+ pp = 1
+ while (c >= b && pp >= 0) {
+ pp = comp(c, m)
+ if (pp == 0) {
+ swap(c, d)
+ m = d
+ d -= 1
+ }
+ if (pp >= 0) c -= 1
+ }
+ if (b > c) {
+ done = true
+ } else {
+ swap(b, c)
+ c -= 1
+ b += 1
+ }
+ }
+
+ val n = off + len
+ var s = math.min(a-off, b-a)
+ vecswap(off, b-s, s)
+ s = math.min(d-c, n-d-1)
+ vecswap(b, n-s, s)
+
+ s = b - a
+ if (s > 1)
+ sort2(off, s)
+ s = d - c
+ if (s > 1)
+ sort2(n-s, s)
+ }
+ }
+ sort2(off, len)
+ }
+
+ def main(args:Array[String]) = {
+ import BIDMat.SciFunctions._
+ import BIDMat.MatFunctions._
+ val n = args(0).toInt
+ val a = SciFunctions.rand(n, 1)
+ val ii = MatFunctions.icol(0->n)
+ quickSort2(a.data, ii.data, 0, n, 1)
+ println("check %d" format find(a(1->n,0) < a(0->(n-1),0)).length)
+ }
+}
diff --git a/src/main/scala/BIDMat/Operators.scala b/src/main/scala/BIDMat/Operators.scala
new file mode 100644
index 00000000..1876d9a4
--- /dev/null
+++ b/src/main/scala/BIDMat/Operators.scala
@@ -0,0 +1,320 @@
+package BIDMat
+import MatFunctions._
+
+object Operator {
+ def applyMat(a:FMat, b:Mat, c:Mat, op:Mop):Mat = {
+ b match {
+ case fb:FMat => op.fop(a, fb, c)
+ case sb:SMat => op.fop(a, sb, c)
+ case db:DMat => op.dop(DMat(a), db, c)
+ case ib:IMat => op.fop(a, FMat(ib), c)
+ case cb:CMat => op.cop(CMat(a), cb, c)
+ }
+ }
+
+ def applyMat(a:DMat, b:Mat, c:Mat, op:Mop):Mat = {
+ b match {
+ case fb:FMat => op.dop(a, DMat(fb), c)
+ case db:DMat => op.dop(a, db, c)
+ case ib:IMat => op.dop(a, DMat(ib), c)
+ case cb:CMat => op.cop(CMat(a), cb, c)
+ }
+ }
+
+ def applyMat(a:IMat, b:Mat, c:Mat, op:Mop):Mat = {
+ b match {
+ case fb:FMat => op.fop(FMat(a), fb, c)
+ case db:DMat => op.dop(DMat(a), db, c)
+ case ib:IMat => op.iop(a, ib, c)
+ case cb:CMat => op.cop(CMat(a), cb, c)
+ }
+ }
+
+ def applyMat(a:CMat, b:Mat, c:Mat, op:Mop):Mat = {
+ b match {
+ case fb:FMat => op.cop(a, CMat(fb), c)
+ case db:DMat => op.cop(a, CMat(db), c)
+ case ib:IMat => op.cop(a, CMat(ib), c)
+ case cb:CMat => op.cop(CMat(a), cb, c)
+ }
+ }
+
+ def applyMat(a:GMat, b:Mat, c:Mat, op:Mop):Mat = {
+ b match {
+ case gb:GMat => op.gop(a, gb, c)
+ }
+ }
+
+ def applyMat(a:SMat, b:Mat, c:Mat, op:Mop):Mat = {
+ b match {
+ case sb:SMat => op.sop(a, sb, c)
+ }
+ }
+
+ def multDim1(a:Mat, b:Mat):Int = {
+ if (a.nrows == 1 && a.ncols == 1) {
+ b.nrows
+ } else {
+ a.nrows
+ }
+ }
+
+ def multDim2(a:Mat, b:Mat):Int = {
+ if (b.nrows == 1 && b.ncols == 1) {
+ a.ncols
+ } else {
+ b.ncols
+ }
+ }
+
+ def getFPair(c:Mat, a:FMat):FPair = {
+ if (c.asInstanceOf[AnyRef] != null) {
+ new FPair(c, a)
+ } else {
+ new FPair(FMat(a.nrows, a.ncols), a)
+ }
+ }
+
+ def getFPair(c:Mat, a:FMat, b:FMat):FPair = {
+ if (c.asInstanceOf[AnyRef] != null) {
+ new FPair(c, a)
+ } else {
+ new FPair(FMat(multDim1(a,b), multDim2(a,b)), a)
+ }
+ }
+
+ def getDPair(c:Mat, a:DMat):DPair = {
+ if (c.asInstanceOf[AnyRef] != null) {
+ new DPair(c, a)
+ } else {
+ new DPair(DMat(a.nrows, a.ncols), a)
+ }
+ }
+
+ def getDPair(c:Mat, a:DMat, b:DMat):DPair = {
+ if (c.asInstanceOf[AnyRef] != null) {
+ new DPair(c, a)
+ } else {
+ new DPair(DMat(multDim1(a,b), multDim2(a,b)), a)
+ }
+ }
+
+ def getIPair(c:Mat, a:IMat):IPair = {
+ if (c.asInstanceOf[AnyRef] != null) {
+ new IPair(c, a)
+ } else {
+ new IPair(IMat(a.nrows, a.ncols), a)
+ }
+ }
+
+ def getIPair(c:Mat, a:IMat, b:IMat):IPair = {
+ if (c.asInstanceOf[AnyRef] != null) {
+ new IPair(c, a)
+ } else {
+ new IPair(IMat(multDim1(a,b), multDim2(a,b)), a)
+ }
+ }
+
+ def getCPair(c:Mat, a:CMat):CPair = {
+ if (c.asInstanceOf[AnyRef] != null) {
+ new CPair(c, a)
+ } else {
+ new CPair(CMat(a.nrows, a.ncols), a)
+ }
+ }
+
+ def getCPair(c:Mat, a:CMat, b:CMat):CPair = {
+ if (c.asInstanceOf[AnyRef] != null) {
+ new CPair(c, a)
+ } else {
+ new CPair(CMat(multDim1(a,b), multDim2(a,b)), a)
+ }
+ }
+
+ def getGPair(c:Mat, a:GMat):GPair = {
+ if (c.asInstanceOf[AnyRef] != null) {
+ new GPair(c, a)
+ } else {
+ new GPair(GMat(a.nrows, a.ncols), a)
+ }
+ }
+
+ def getGPair(c:Mat, a:GMat, b:GMat):GPair = {
+ if (c.asInstanceOf[AnyRef] != null) {
+ new GPair(c, a)
+ } else {
+ new GPair(GMat(multDim1(a,b), multDim2(a,b)), a)
+ }
+ }
+
+ def getSPair(c:Mat, a:SMat):SPair = {
+ if (c.asInstanceOf[AnyRef] != null) {
+ new SPair(c, a)
+ } else {
+ new SPair(SMat(a.nrows, a.ncols, a.nnz), a)
+ }
+ }
+}
+
+trait Mop {
+ def fop(a:FMat, b:FMat, c:Mat):FMat
+ def fop(a:FMat, b:SMat, c:Mat):FMat
+ def dop(a:DMat, b:DMat, c:Mat):DMat
+ def iop(a:IMat, b:IMat, c:Mat):IMat
+ def cop(a:CMat, b:CMat, c:Mat):CMat
+ def gop(a:GMat, b:GMat, c:Mat):GMat
+ def sop(a:SMat, b:SMat, c:Mat):SMat
+ def notImplemented0(s:String, m:Mat):Mat = {
+ throw new RuntimeException("operator "+s+" not implemented for "+m.mytype)
+ }
+}
+
+object Mop_Plus extends Mop {
+ override def fop(a:FMat, b:FMat, c:Mat):FMat = Operator.getFPair(c, a) + b
+ override def fop(a:FMat, b:SMat, c:Mat):FMat = Operator.getFPair(c, a) + full(b)
+ override def dop(a:DMat, b:DMat, c:Mat):DMat = Operator.getDPair(c, a) + b
+ override def iop(a:IMat, b:IMat, c:Mat):IMat = Operator.getIPair(c, a) + b
+ override def cop(a:CMat, b:CMat, c:Mat):CMat = Operator.getCPair(c, a) + b
+ override def gop(a:GMat, b:GMat, c:Mat):GMat = Operator.getGPair(c, a) + b
+ override def sop(a:SMat, b:SMat, c:Mat):SMat = Operator.getSPair(c, a) + b
+}
+
+object Mop_Minus extends Mop {
+ override def fop(a:FMat, b:FMat, c:Mat):FMat = Operator.getFPair(c, a) - b
+ override def fop(a:FMat, b:SMat, c:Mat):FMat = Operator.getFPair(c, a) - full(b)
+ override def dop(a:DMat, b:DMat, c:Mat):DMat = Operator.getDPair(c, a) - b
+ override def iop(a:IMat, b:IMat, c:Mat):IMat = Operator.getIPair(c, a) - b
+ override def cop(a:CMat, b:CMat, c:Mat):CMat = Operator.getCPair(c, a) - b
+ override def gop(a:GMat, b:GMat, c:Mat):GMat = Operator.getGPair(c, a) - b
+ override def sop(a:SMat, b:SMat, c:Mat):SMat = Operator.getSPair(c, a) - b
+}
+
+object Mop_Times extends Mop {
+ override def fop(a:FMat, b:FMat, c:Mat):FMat = Operator.getFPair(c, a, b) * b
+ override def fop(a:FMat, b:SMat, c:Mat):FMat = Operator.getFPair(c, a) * b
+ override def dop(a:DMat, b:DMat, c:Mat):DMat = Operator.getDPair(c, a, b) * b
+ override def iop(a:IMat, b:IMat, c:Mat):IMat = Operator.getIPair(c, a, b) * b
+ override def cop(a:CMat, b:CMat, c:Mat):CMat = Operator.getCPair(c, a, b) * b
+ override def gop(a:GMat, b:GMat, c:Mat):GMat = Operator.getGPair(c, a, b) * b
+ override def sop(a:SMat, b:SMat, c:Mat):SMat = {notImplemented0("*", a); a}
+}
+
+object Mop_Div extends Mop {
+ override def fop(a:FMat, b:FMat, c:Mat):FMat = a / b
+ override def fop(a:FMat, b:SMat, c:Mat):FMat = {notImplemented0("/", a); a}
+ override def dop(a:DMat, b:DMat, c:Mat):DMat = a / b
+ override def cop(a:CMat, b:CMat, c:Mat):CMat = a / b
+ override def iop(a:IMat, b:IMat, c:Mat):IMat = {notImplemented0("/", a); a}
+ override def gop(a:GMat, b:GMat, c:Mat):GMat = {notImplemented0("/", a); a}
+ override def sop(a:SMat, b:SMat, c:Mat):SMat = {notImplemented0("/", a); a}
+}
+
+object Mop_RSolve extends Mop {
+ override def fop(a:FMat, b:FMat, c:Mat):FMat = a \\ b
+ override def fop(a:FMat, b:SMat, c:Mat):FMat = {notImplemented0("\\\\", a); a}
+ override def dop(a:DMat, b:DMat, c:Mat):DMat = a \\ b
+ override def cop(a:CMat, b:CMat, c:Mat):CMat = a \\ b
+ override def iop(a:IMat, b:IMat, c:Mat):IMat = {notImplemented0("\\\\", a); a}
+ override def gop(a:GMat, b:GMat, c:Mat):GMat = {notImplemented0("\\\\", a); a}
+ override def sop(a:SMat, b:SMat, c:Mat):SMat = {notImplemented0("\\\\", a); a}
+}
+
+object Mop_ETimes extends Mop {
+ override def fop(a:FMat, b:FMat, c:Mat):FMat = Operator.getFPair(c, a) *@ b
+ override def fop(a:FMat, b:SMat, c:Mat):FMat = Operator.getFPair(c, a) *@ full(b)
+ override def dop(a:DMat, b:DMat, c:Mat):DMat = Operator.getDPair(c, a) *@ b
+ override def iop(a:IMat, b:IMat, c:Mat):IMat = Operator.getIPair(c, a) *@ b
+ override def cop(a:CMat, b:CMat, c:Mat):CMat = Operator.getCPair(c, a) *@ b
+ override def gop(a:GMat, b:GMat, c:Mat):GMat = Operator.getGPair(c, a) *@ b
+ override def sop(a:SMat, b:SMat, c:Mat):SMat = Operator.getSPair(c, a) *@ b
+}
+
+object Mop_EDiv extends Mop {
+ override def fop(a:FMat, b:FMat, c:Mat):FMat = Operator.getFPair(c, a) /@ b
+ override def fop(a:FMat, b:SMat, c:Mat):FMat = Operator.getFPair(c, a) /@ full(b)
+ override def dop(a:DMat, b:DMat, c:Mat):DMat = Operator.getDPair(c, a) /@ b
+ override def iop(a:IMat, b:IMat, c:Mat):IMat = {notImplemented0("/@", a); a}
+ override def cop(a:CMat, b:CMat, c:Mat):CMat = Operator.getCPair(c, a) /@ b
+ override def gop(a:GMat, b:GMat, c:Mat):GMat = Operator.getGPair(c, a) /@ b
+ override def sop(a:SMat, b:SMat, c:Mat):SMat = Operator.getSPair(c, a) /@ b
+}
+
+object Mop_HCat extends Mop {
+ override def fop(a:FMat, b:FMat, c:Mat):FMat = a \ b
+ override def fop(a:FMat, b:SMat, c:Mat):FMat = {notImplemented0("\\", a); a}
+ override def dop(a:DMat, b:DMat, c:Mat):DMat = a \ b
+ override def iop(a:IMat, b:IMat, c:Mat):IMat = a \ b
+ override def cop(a:CMat, b:CMat, c:Mat):CMat = a \ b
+ override def gop(a:GMat, b:GMat, c:Mat):GMat = {notImplemented0("\\", a); a}
+ override def sop(a:SMat, b:SMat, c:Mat):SMat = {notImplemented0("\\", a); a}
+}
+
+object Mop_VCat extends Mop {
+ override def fop(a:FMat, b:FMat, c:Mat):FMat = a on b
+ override def fop(a:FMat, b:SMat, c:Mat):FMat = {notImplemented0("on", a); a}
+ override def dop(a:DMat, b:DMat, c:Mat):DMat = a on b
+ override def iop(a:IMat, b:IMat, c:Mat):IMat = a on b
+ override def cop(a:CMat, b:CMat, c:Mat):CMat = a on b
+ override def gop(a:GMat, b:GMat, c:Mat):GMat = {notImplemented0("on", a); a}
+ override def sop(a:SMat, b:SMat, c:Mat):SMat = {notImplemented0("on", a); a}
+}
+
+object Mop_LT extends Mop {
+ override def fop(a:FMat, b:FMat, c:Mat):FMat = Operator.getFPair(c, a) < b
+ override def fop(a:FMat, b:SMat, c:Mat):FMat = Operator.getFPair(c, a) < full(b)
+ override def dop(a:DMat, b:DMat, c:Mat):DMat = Operator.getDPair(c, a) < b
+ override def iop(a:IMat, b:IMat, c:Mat):IMat = Operator.getIPair(c, a) < b
+ override def cop(a:CMat, b:CMat, c:Mat):CMat = {notImplemented0("<", a); a}
+ override def gop(a:GMat, b:GMat, c:Mat):GMat = Operator.getGPair(c, a) < b
+ override def sop(a:SMat, b:SMat, c:Mat):SMat = {notImplemented0("<", a); a}
+}
+
+object Mop_GT extends Mop {
+ override def fop(a:FMat, b:FMat, c:Mat):FMat = Operator.getFPair(c, a) > b
+ override def fop(a:FMat, b:SMat, c:Mat):FMat = Operator.getFPair(c, a) > full(b)
+ override def dop(a:DMat, b:DMat, c:Mat):DMat = Operator.getDPair(c, a) > b
+ override def iop(a:IMat, b:IMat, c:Mat):IMat = Operator.getIPair(c, a) > b
+ override def cop(a:CMat, b:CMat, c:Mat):CMat = {notImplemented0(">", a); a}
+ override def gop(a:GMat, b:GMat, c:Mat):GMat = Operator.getGPair(c, a) > b
+ override def sop(a:SMat, b:SMat, c:Mat):SMat = {notImplemented0(">", a); a}
+}
+
+object Mop_LE extends Mop {
+ override def fop(a:FMat, b:FMat, c:Mat):FMat = Operator.getFPair(c, a) <= b
+ override def fop(a:FMat, b:SMat, c:Mat):FMat = Operator.getFPair(c, a) <= full(b)
+ override def dop(a:DMat, b:DMat, c:Mat):DMat = Operator.getDPair(c, a) <= b
+ override def iop(a:IMat, b:IMat, c:Mat):IMat = Operator.getIPair(c, a) <= b
+ override def cop(a:CMat, b:CMat, c:Mat):CMat = {notImplemented0("<=", a); a}
+ override def gop(a:GMat, b:GMat, c:Mat):GMat = Operator.getGPair(c, a) <= b
+ override def sop(a:SMat, b:SMat, c:Mat):SMat = {notImplemented0("<=", a); a}
+}
+
+object Mop_GE extends Mop {
+ override def fop(a:FMat, b:FMat, c:Mat):FMat = Operator.getFPair(c, a) >= b
+ override def fop(a:FMat, b:SMat, c:Mat):FMat = Operator.getFPair(c, a) >= full(b)
+ override def dop(a:DMat, b:DMat, c:Mat):DMat = Operator.getDPair(c, a) >= b
+ override def iop(a:IMat, b:IMat, c:Mat):IMat = Operator.getIPair(c, a) >= b
+ override def cop(a:CMat, b:CMat, c:Mat):CMat = {notImplemented0(">=", a); a}
+ override def gop(a:GMat, b:GMat, c:Mat):GMat = Operator.getGPair(c, a) >= b
+ override def sop(a:SMat, b:SMat, c:Mat):SMat = {notImplemented0(">=", a); a}
+}
+
+object Mop_EQ extends Mop {
+ override def fop(a:FMat, b:FMat, c:Mat):FMat = Operator.getFPair(c, a) == b
+ override def fop(a:FMat, b:SMat, c:Mat):FMat = Operator.getFPair(c, a) == full(b)
+ override def dop(a:DMat, b:DMat, c:Mat):DMat = Operator.getDPair(c, a) == b
+ override def iop(a:IMat, b:IMat, c:Mat):IMat = Operator.getIPair(c, a) == b
+ override def cop(a:CMat, b:CMat, c:Mat):CMat = Operator.getCPair(c, a) == b
+ override def gop(a:GMat, b:GMat, c:Mat):GMat = Operator.getGPair(c, a) == b
+ override def sop(a:SMat, b:SMat, c:Mat):SMat = {notImplemented0("==", a); a}
+}
+
+object Mop_NE extends Mop {
+ override def fop(a:FMat, b:FMat, c:Mat):FMat = Operator.getFPair(c, a) != b
+ override def fop(a:FMat, b:SMat, c:Mat):FMat = Operator.getFPair(c, a) != full(b)
+ override def dop(a:DMat, b:DMat, c:Mat):DMat = Operator.getDPair(c, a) != b
+ override def iop(a:IMat, b:IMat, c:Mat):IMat = Operator.getIPair(c, a) != b
+ override def cop(a:CMat, b:CMat, c:Mat):CMat = Operator.getCPair(c, a) != b
+ override def gop(a:GMat, b:GMat, c:Mat):GMat = Operator.getGPair(c, a) != b
+ override def sop(a:SMat, b:SMat, c:Mat):SMat = {notImplemented0("!=", a); a}
+}
diff --git a/src/main/scala/BIDMat/Plotting.scala b/src/main/scala/BIDMat/Plotting.scala
new file mode 100755
index 00000000..44e33edf
--- /dev/null
+++ b/src/main/scala/BIDMat/Plotting.scala
@@ -0,0 +1,642 @@
+package BIDMat
+import ptolemy.plot._
+import java.awt._
+import java.awt.geom.AffineTransform
+import java.awt.image.BufferedImage
+import javax.swing._
+import javax.imageio.stream.FileImageOutputStream
+import javax.imageio.ImageIO
+import java.io._
+
+object Plotting {
+ var ifigure:Int = 1
+
+ def _plot(mats:Mat*)(xlog:Boolean=false, ylog:Boolean=false, isconnected:Boolean=true):Plot = {
+ var p:Plot = new Plot
+ p.setXLog(xlog)
+ p.setYLog(ylog)
+ val dataset = 0
+ if (mats.length == 1) {
+ val m = mats(0)
+ if (m.nrows == 1 || m.ncols == 1) {
+ m match {
+ case mf:FMat => for (i <- 0 until m.length) p.addPoint(dataset, i, mf(i), isconnected)
+ case md:DMat => for (i <- 0 until m.length) p.addPoint(dataset, i, md(i), isconnected)
+ case mi:IMat => for (i <- 0 until m.length) p.addPoint(dataset, i, mi(i), isconnected)
+ }
+ } else {
+ for (i <- 0 until m.ncols) {
+ m match {
+ case mf:FMat => for (j <- 0 until m.nrows) p.addPoint(i, j, mf(j,i), isconnected)
+ case md:DMat => for (j <- 0 until m.nrows) p.addPoint(i, j, md(j,i), isconnected)
+ case mi:IMat => for (j <- 0 until m.nrows) p.addPoint(i, j, mi(j,i), isconnected)
+ }
+ }
+ }
+ } else {
+ var i = 0
+ while (i*2 < mats.length) {
+ (mats(2*i), mats(2*i+1)) match {
+ case (a:FMat, b:FMat) => for (j <- 0 until a.length) p.addPoint(i, a(j), b(j), isconnected)
+ case (a:FMat, b:DMat) => for (j <- 0 until a.length) p.addPoint(i, a(j), b(j), isconnected)
+ case (a:DMat, b:FMat) => for (j <- 0 until a.length) p.addPoint(i, a(j), b(j), isconnected)
+ case (a:DMat, b:DMat) => for (j <- 0 until a.length) p.addPoint(i, a(j), b(j), isconnected)
+ case (a:FMat, b:IMat) => for (j <- 0 until a.length) p.addPoint(i, a(j), b(j), isconnected)
+ case (a:DMat, b:IMat) => for (j <- 0 until a.length) p.addPoint(i, a(j), b(j), isconnected)
+ case (a:IMat, b:FMat) => for (j <- 0 until a.length) p.addPoint(i, a(j), b(j), isconnected)
+ case (a:IMat, b:DMat) => for (j <- 0 until a.length) p.addPoint(i, a(j), b(j), isconnected)
+ case (a:IMat, b:IMat) => for (j <- 0 until a.length) p.addPoint(i, a(j), b(j), isconnected)
+ }
+ i += 1
+ }
+ }
+ var pframe:PlotFrame = new PlotFrame("Figure "+ifigure, p)
+ ifigure += 1
+ pframe.setVisible(true)
+ p
+ }
+
+ def plot(mats:Mat*) = _plot(mats: _*)()
+
+ def loglog(mats:Mat*) = _plot(mats: _*)(xlog=true, ylog=true)
+
+ def semilogx(mats:Mat*) = _plot(mats: _*)(xlog=true)
+
+ def semilogy(mats:Mat*) = _plot(mats: _*)(ylog=true)
+
+ def p_plot(mats:Mat*) = _plot(mats: _*)(isconnected=false)
+
+ def ploglog(mats:Mat*) = _plot(mats: _*)(xlog=true, ylog=true, isconnected=false)
+
+ def psemilogx(mats:Mat*) = _plot(mats: _*)(xlog=true, isconnected=false)
+
+ def psemilogy(mats:Mat*) = _plot(mats: _*)(ylog=true, isconnected=false)
+
+
+ def hist(m:Mat, nbars:Int=10) = {
+ import SciFunctions._
+ var p:Histogram = new Histogram
+ val dataset = 0
+ if (m.nrows == 1 || m.ncols == 1) {
+ m match {
+ case mf:FMat => {
+ var vmax = maxi(mf,0).v
+ var vmin = mini(mf,0).v
+ p.setBinWidth((vmax-vmin)/nbars)
+ for (i <- 0 until m.length) p.addPoint(dataset, mf(i))
+ }
+ case md:DMat => {
+ var vmax = maxi(md,0).v
+ var vmin = mini(md,0).v
+ p.setBinWidth((vmax-vmin)/nbars)
+ for (i <- 0 until m.length) p.addPoint(dataset, md(i))
+ }
+ case mi:IMat => {
+ var vmax = maxi(mi,0).v.asInstanceOf[Double]
+ var vmin = mini(mi,0).v
+ p.setBinWidth((vmax-vmin)/nbars)
+ for (i <- 0 until m.length) p.addPoint(dataset, mi(i))
+ }
+ }
+ }
+ var pframe:PlotFrame = new PlotFrame("Figure "+ifigure, p)
+ ifigure += 1
+ pframe.setVisible(true)
+ }
+
+ def heatmap(m:Mat) = {
+ val hc:HeatChart = new HeatChart(m)
+ val img:BufferedImage = hc.getChartImage(true)
+ hc.saveToFile(new File("heat_map_"+ifigure+".jpg"))
+ val jl:JLabel = new JLabel(new ImageIcon(img))
+ val jp:JPanel = new JPanel
+ jp.add(jl)
+ val jsp:JScrollPane = new JScrollPane(jp)
+ val jFrame:JFrame = new JFrame("Figure "+ifigure)
+ jFrame.getContentPane().add( jsp )
+ jFrame.setSize(800, 600)
+ ifigure += 1
+ jFrame.setVisible(true)
+ }
+}
+
+class HeatChart(mat:Mat) {
+ private var xValues:Array[Double] = new Array[Double](mat.ncols)
+ private var yValues:Array[Double] = new Array[Double](mat.nrows)
+ setXValues(0, 1)
+ setYValues(0, 1)
+ private var xValuesHorizontal:Boolean = false
+ private var yValuesHorizontal:Boolean = true
+
+
+ private var cellSize:Dimension = new Dimension(10,10)
+ private var margin:Int = 20
+ private var backgroundColor = Color.WHITE
+
+ private var chartSize:Dimension = new Dimension(100,100)
+
+ private var highValueColor:Color = Color.BLUE
+ private var lowValueColor:Color = Color.WHITE
+
+ private var colorValueDistance:Int = 1
+ private var colorScale:Double = 1.0
+
+ private var heatMapSize:Dimension = new Dimension(1000,1000)
+
+ private var heatMapTL:Point = new Point(0, 0)
+ private var heatMapBR:Point = new Point(800, 800)
+ private var heatMapC:Point = new Point(400, 400)
+
+ private var axisThickness:Int = 2
+ private var axisColor:Color = Color.BLACK
+ private var axisLabelsFont:Font = new Font("Sans-Serif", Font.PLAIN, 12)
+ private var axisLabelColor:Color = Color.BLACK
+ private var xAxisLabel:String = "X Label"
+ private var yAxisLabel:String = "Y Label"
+ private var axisValuesColor:Color = Color.BLACK
+ private var axisValuesFont:Font = new Font("Sans-Serif", Font.PLAIN, 10)
+ private var xAxisValuesFrequency:Int = 1
+ private var yAxisValuesFrequency:Int = 1
+ private var showXAxisValues:Boolean = true
+ private var showYAxisValues:Boolean = true
+
+ private var xAxisValuesHeight:Int = 0
+ private var xAxisValuesWidthMax:Int = 0
+
+ private var yAxisValuesHeight:Int = 0
+ private var yAxisValuesAscent:Int = 0
+ private var yAxisValuesWidthMax:Int = 0
+
+ private var xAxisLabelSize:Dimension = new Dimension(0,0)
+ private var xAxisLabelDescent:Int = 0
+
+ private var yAxisLabelSize:Dimension = new Dimension(0,0)
+ private var yAxisLabelAscent:Int = 0
+
+
+ private var lowValue:Double = min(mat)
+ private var highValue:Double = max(mat)
+
+
+ updateColorDistance()
+
+
+ //should be replaced by built-in min and max to speed up
+ private def min(mat:Mat):Double = {
+ mat match {
+ case mi:IMat => IMin(mi).toDouble
+ case mf:FMat => FMin(mf).toDouble
+ case md:DMat => DMin(md)
+ }
+ }
+
+
+ private def max(mat:Mat):Double = {
+ mat match {
+ case mi:IMat => IMax(mi).toDouble
+ case mf:FMat => FMax(mf).toDouble
+ case md:DMat => DMax(md)
+ }
+ }
+
+ private def IMin(mat:IMat):Int = {
+ var minV:Int = mat(0)
+ for (i:Int <- 0 until mat.length) {
+ if (mat(i) < minV)
+ minV = mat(i)
+ }
+ minV
+ }
+
+ private def FMin(mat:FMat):Float = {
+ var minV:Float = mat(0)
+ for (i:Int <- 0 until mat.length) {
+ if (mat(i) < minV)
+ minV = mat(i)
+ }
+ minV
+ }
+
+ private def DMin(mat:DMat):Double = {
+ var minV:Double = mat(0)
+ for (i:Int <- 0 until mat.length) {
+ if (mat(i) < minV)
+ minV = mat(i)
+ }
+ minV
+ }
+
+ private def IMax(mat:IMat):Int = {
+ var maxV:Int = mat(0)
+ for (i:Int <- 0 until mat.length) {
+ if (mat(i) > maxV)
+ maxV = mat(i)
+ }
+ maxV
+ }
+
+ private def FMax(mat:FMat):Float = {
+ var maxV:Float = mat(0)
+ for (i:Int <- 0 until mat.length) {
+ if (mat(i) > maxV)
+ maxV = mat(i)
+ }
+ maxV
+ }
+
+ private def DMax(mat:DMat):Double = {
+ var maxV:Double = mat(0)
+ for (i:Int <- 0 until mat.length) {
+ if (mat(i) > maxV)
+ maxV = mat(i)
+ }
+ maxV
+ }
+
+ def getChartImage(alpha:Boolean):BufferedImage = {
+ measureComponents()
+ updateCoordinates()
+ var imageType:Int = if ( alpha ) BufferedImage.TYPE_4BYTE_ABGR else BufferedImage.TYPE_3BYTE_BGR
+
+ var chartImage:BufferedImage = new BufferedImage(chartSize.width, chartSize.height, imageType)
+
+ var chartGraphics:Graphics2D = chartImage.createGraphics()
+
+ chartGraphics.setRenderingHint(RenderingHints.KEY_ANTIALIASING,
+ RenderingHints.VALUE_ANTIALIAS_ON)
+
+ chartGraphics.setColor(backgroundColor)
+ chartGraphics.fillRect(0, 0, chartSize.width, chartSize.height)
+
+ drawHeatMap(chartGraphics, mat)
+
+ drawXLabel(chartGraphics)
+ drawYLabel(chartGraphics)
+
+ drawAxisBars(chartGraphics)
+
+ drawXValues(chartGraphics)
+ drawYValues(chartGraphics)
+ chartImage
+ }
+
+ private def drawHeatMap(chartGraphics:Graphics2D, mat:Mat) = {
+
+ var noYCells:Int = mat.nrows
+ var noXCells:Int = mat.ncols
+
+ var heatMapImage:BufferedImage = new BufferedImage(heatMapSize.width, heatMapSize.height, BufferedImage.TYPE_INT_ARGB)
+ var heatMapGraphics:Graphics2D = heatMapImage.createGraphics()
+
+ mat match {
+ case mi:IMat =>
+ for ( x <- 0 until noXCells ) {
+ for ( y <- 0 until noYCells ) {
+ heatMapGraphics.setColor( getCellColor( mi(y+x*noYCells).toDouble, lowValue, highValue ) )
+
+ var cellX:Int = x*cellSize.width
+ var cellY:Int = y*cellSize.height
+
+ heatMapGraphics.fillRect(cellX, cellY, cellSize.width, cellSize.height)
+ }
+ }
+ case mf:FMat =>
+ for ( x <- 0 until noXCells ) {
+ for ( y <- 0 until noYCells ) {
+ heatMapGraphics.setColor( getCellColor( mf(y+x*noYCells).toDouble, lowValue, highValue ) )
+
+ var cellX:Int = x*cellSize.width
+ var cellY:Int = y*cellSize.height
+
+ heatMapGraphics.fillRect(cellX, cellY, cellSize.width, cellSize.height)
+ }
+ }
+ case md:DMat =>
+ for ( x <- 0 until noXCells ) {
+ for ( y <- 0 until noYCells ) {
+ heatMapGraphics.setColor( getCellColor( md(y+x*noYCells), lowValue, highValue ) )
+
+ var cellX:Int = x*cellSize.width
+ var cellY:Int = y*cellSize.height
+
+ heatMapGraphics.fillRect(cellX, cellY, cellSize.width, cellSize.height)
+ }
+ }
+ }
+
+ chartGraphics.drawImage(heatMapImage, heatMapTL.x, heatMapTL.y, heatMapSize.width, heatMapSize.height, null)
+ }
+
+
+
+ private def getCellColor(data:Double, min:Double, max:Double):Color = {
+ var range:Double = max - min
+ var position:Double = data - min
+
+ var percentPosition:Double = position / range
+
+ var colorPosition:Int = getColorPosition(percentPosition)
+ var r:Int = lowValueColor.getRed()
+ var g:Int = lowValueColor.getGreen
+ var b:Int = lowValueColor.getBlue()
+
+ for ( i <- 0 until colorPosition ) {
+ var rDistance:Int = r - highValueColor.getRed()
+ var gDistance:Int = g - highValueColor.getGreen()
+ var bDistance:Int = b - highValueColor.getBlue()
+
+ if ( (math.abs(rDistance) >= math.abs(gDistance) )
+ && (math.abs(rDistance) >= math.abs(bDistance) ) ) {
+ r = changeColorValue(r, rDistance)
+ } else if (math.abs(gDistance) >= math.abs(bDistance)) {
+ g = changeColorValue(g, gDistance)
+ } else {
+ b = changeColorValue(b, bDistance)
+ }
+ }
+ new Color(r, g, b)
+ }
+
+
+ private def getColorPosition(percentPosition:Double):Int = {
+ math.round( colorValueDistance * math.pow(percentPosition, colorScale) ).toInt
+ }
+
+ private def updateColorDistance() = {
+ var r1:Int = lowValueColor.getRed()
+ var g1:Int = lowValueColor.getGreen()
+ var b1:Int = lowValueColor.getBlue()
+ var r2:Int = highValueColor.getRed()
+ var g2:Int = highValueColor.getGreen()
+ var b2:Int = highValueColor.getBlue()
+
+ colorValueDistance = math.abs(r1 - r2)
+ colorValueDistance += math.abs(g1 - g2)
+ colorValueDistance += math.abs(b1 - b2)
+ }
+
+ private def changeColorValue(colorValue:Int, colorDistance:Int):Int = {
+ if (colorDistance < 0) {
+ colorValue+1
+ } else if (colorDistance > 0) {
+ colorValue-1
+ } else {
+ colorValue
+ }
+ }
+
+ private def measureComponents() = {
+ var chartImage:BufferedImage = new BufferedImage(1, 1, BufferedImage.TYPE_INT_ARGB)
+ var tempGraphics:Graphics2D = chartImage.createGraphics()
+
+
+ if (xAxisLabel != null) {
+ tempGraphics.setFont(axisLabelsFont)
+ var metrics:FontMetrics = tempGraphics.getFontMetrics()
+ xAxisLabelSize = new Dimension(metrics.stringWidth(xAxisLabel), metrics.getHeight())
+ xAxisLabelDescent = metrics.getDescent()
+ } else {
+ xAxisLabelSize = new Dimension(0, 0)
+ }
+
+
+ if (yAxisLabel != null) {
+ tempGraphics.setFont(axisLabelsFont)
+ var metrics:FontMetrics = tempGraphics.getFontMetrics()
+ yAxisLabelSize = new Dimension(metrics.stringWidth(yAxisLabel), metrics.getHeight())
+ yAxisLabelAscent = metrics.getAscent()
+ } else {
+ yAxisLabelSize = new Dimension(0, 0)
+ }
+
+
+ if (showXAxisValues) {
+ tempGraphics.setFont(axisValuesFont)
+ var metrics:FontMetrics = tempGraphics.getFontMetrics()
+ xAxisValuesHeight = metrics.getHeight()
+ xAxisValuesWidthMax = 0
+
+ for (i <- 0 until xValues.length) {
+ var w:Int = metrics.stringWidth(xValues(i).toString())
+ if (w > xAxisValuesWidthMax) {
+ xAxisValuesWidthMax = w
+ }
+ }
+ } else {
+ xAxisValuesHeight = 0
+ }
+
+ if (showYAxisValues) {
+ tempGraphics.setFont(axisValuesFont)
+ var metrics:FontMetrics = tempGraphics.getFontMetrics()
+ yAxisValuesHeight = metrics.getHeight()
+ yAxisValuesAscent = metrics.getAscent()
+ yAxisValuesWidthMax = 0
+
+ for (i <-0 until yValues.length) {
+ var w:Int = metrics.stringWidth(yValues(i).toString())
+ if (w > yAxisValuesWidthMax) {
+ yAxisValuesWidthMax = w
+ }
+ }
+ } else {
+ yAxisValuesHeight = 0
+ }
+
+
+ var heatMapWidth:Int = ( mat.ncols * cellSize.width)
+ var heatMapHeight:Int = ( mat.nrows * cellSize.height)
+ heatMapSize = new Dimension(heatMapWidth, heatMapHeight)
+
+ var yValuesHorizontalSize:Int = 0
+
+ if (yValuesHorizontal) {
+ yValuesHorizontalSize = yAxisValuesWidthMax
+ } else {
+ yValuesHorizontalSize = yAxisValuesHeight
+ }
+
+ var xValuesVerticalSize:Int = 0
+ if (xValuesHorizontal) {
+ xValuesVerticalSize = xAxisValuesHeight
+ } else {
+ xValuesVerticalSize = xAxisValuesWidthMax
+ }
+
+ var chartWidth:Int = heatMapWidth + (2 * margin) + yAxisLabelSize.height + yValuesHorizontalSize + axisThickness
+ var chartHeight:Int = heatMapHeight + (2 * margin) + xAxisLabelSize.height + xValuesVerticalSize + axisThickness
+ chartSize = new Dimension(chartWidth, chartHeight)
+ }
+
+ private def updateCoordinates() {
+ var x:Int = margin + axisThickness + yAxisLabelSize.height
+ if (yValuesHorizontal) x+=yAxisValuesWidthMax else x+=yAxisValuesHeight
+ var y:Int = margin
+ heatMapTL = new Point(x, y)
+
+ x = heatMapTL.x + heatMapSize.width
+ y = heatMapTL.y + heatMapSize.height
+ heatMapBR = new Point(x, y)
+
+ x = heatMapTL.x + (heatMapSize.width / 2)
+ y = heatMapTL.y + (heatMapSize.height / 2)
+ heatMapC = new Point(x, y)
+ }
+
+ private def drawXLabel(chartGraphics:Graphics2D) = {
+ if (xAxisLabel != null) {
+ var yPosXAxisLabel:Int = chartSize.height - (margin / 2) - xAxisLabelDescent
+ var xPosXAxisLabel:Int = heatMapC.x - (xAxisLabelSize.width / 2)
+
+ chartGraphics.setFont(axisLabelsFont)
+ chartGraphics.setColor(axisLabelColor)
+ chartGraphics.drawString(xAxisLabel, xPosXAxisLabel, yPosXAxisLabel)
+ }
+ }
+
+ private def drawYLabel(chartGraphics:Graphics2D) = {
+ if (yAxisLabel != null) {
+ var yPosYAxisLabel:Int = heatMapC.y + (yAxisLabelSize.width / 2)
+ var xPosYAxisLabel:Int = (margin / 2) + yAxisLabelAscent
+
+ chartGraphics.setFont(axisLabelsFont)
+ chartGraphics.setColor(axisLabelColor)
+
+ var transform:AffineTransform = chartGraphics.getTransform()
+ var originalTransform:AffineTransform = transform.clone().asInstanceOf[AffineTransform]
+ transform.rotate(math.toRadians(270), xPosYAxisLabel, yPosYAxisLabel)
+ chartGraphics.setTransform(transform)
+
+ chartGraphics.drawString(yAxisLabel, xPosYAxisLabel, yPosYAxisLabel)
+
+ chartGraphics.setTransform(originalTransform)
+ }
+ }
+
+
+ private def drawAxisBars(chartGraphics:Graphics2D) = {
+ if (axisThickness > 0) {
+ chartGraphics.setColor(axisColor)
+
+ var x:Int = heatMapTL.x - axisThickness
+ var y:Int = heatMapBR.y
+ var width:Int = heatMapSize.width + axisThickness
+ var height:Int = axisThickness
+ chartGraphics.fillRect(x, y, width, height)
+
+ x = heatMapTL.x - axisThickness
+ y = heatMapTL.y
+ width = axisThickness
+ height = heatMapSize.height
+ chartGraphics.fillRect(x, y, width, height)
+ }
+ }
+
+
+ private def drawXValues(chartGraphics:Graphics2D) = {
+ if (showXAxisValues) {
+
+ chartGraphics.setColor(axisValuesColor)
+
+ for (i <- 0 until mat.ncols) {
+ if (i % xAxisValuesFrequency == 0) {
+ var xValueStr:String = xValues(i).toString()
+ chartGraphics.setFont(axisValuesFont)
+ var metrics:FontMetrics = chartGraphics.getFontMetrics()
+ var valueWidth:Int = metrics.stringWidth(xValueStr)
+
+ if (xValuesHorizontal) {
+ var valueXPos:Int = (i * cellSize.width) + ((cellSize.width / 2) - (valueWidth / 2))
+ valueXPos += heatMapTL.x
+ var valueYPos:Int = heatMapBR.y + metrics.getAscent() + 1
+
+ chartGraphics.drawString(xValueStr, valueXPos, valueYPos)
+ } else {
+ var valueXPos:Int = heatMapTL.x + (i * cellSize.width) + ((cellSize.width / 2) + (xAxisValuesHeight / 2))
+ var valueYPos:Int = heatMapBR.y + axisThickness + valueWidth
+
+ var transform:AffineTransform = chartGraphics.getTransform()
+ var originalTransform:AffineTransform = transform.clone().asInstanceOf[AffineTransform]
+ transform.rotate(math.toRadians(270), valueXPos, valueYPos)
+ chartGraphics.setTransform(transform)
+
+ chartGraphics.drawString(xValueStr, valueXPos, valueYPos)
+
+ chartGraphics.setTransform(originalTransform)
+ }
+ }
+ }
+ }
+ }
+
+
+ private def drawYValues(chartGraphics:Graphics2D) = {
+ if (showYAxisValues) {
+ chartGraphics.setColor(axisValuesColor)
+ for (i <- 0 until mat.nrows ) {
+ if (i % yAxisValuesFrequency == 0) {
+ var yValueStr:String = yValues(i).toString()
+ chartGraphics.setFont(axisValuesFont)
+ var metrics:FontMetrics = chartGraphics.getFontMetrics()
+ var valueWidth:Int = metrics.stringWidth(yValueStr)
+ if (yValuesHorizontal) {
+ var valueXPos:Int = margin + yAxisLabelSize.height + (yAxisValuesWidthMax - valueWidth)
+ var valueYPos:Int = heatMapTL.y + (i * cellSize.height) + (cellSize.height/2) + (yAxisValuesAscent/2)
+
+ chartGraphics.drawString(yValueStr, valueXPos, valueYPos)
+ } else {
+ var valueXPos:Int = margin + yAxisLabelSize.height + yAxisValuesAscent
+ var valueYPos:Int = heatMapTL.y + (i * cellSize.height) + (cellSize.height/2) + (valueWidth/2)
+
+ var transform:AffineTransform = chartGraphics.getTransform()
+ var originalTransform:AffineTransform = transform.clone().asInstanceOf[AffineTransform]
+ transform.rotate(math.toRadians(270), valueXPos, valueYPos)
+ chartGraphics.setTransform(transform)
+
+ chartGraphics.drawString(yValueStr, valueXPos, valueYPos)
+
+ chartGraphics.setTransform(originalTransform)
+ }
+ }
+ }
+ }
+ }
+
+
+ def setXValues(xOffset:Double, xInterval:Double) = {
+ for (i <- 0 until mat.ncols) {
+ xValues(i) = xOffset + (i * xInterval)
+ }
+ }
+
+ def setYValues(yOffset:Double, yInterval:Double) = {
+ for (i <- 0 until mat.nrows) {
+ yValues(i) = yOffset + (i * yInterval)
+ }
+ }
+
+
+ def saveToFile(outputFile:File) = {
+ var filename:String = outputFile.getName()
+ var extPoint:Int = filename.lastIndexOf('.')
+
+ if (extPoint < 0) {
+ throw new IOException("Illegal filename: need a extension.")
+ }
+
+ var ext:String = filename.substring(extPoint + 1)
+
+ if (ext.toLowerCase().equals("jpg") || ext.toLowerCase().equals("jpeg")) {
+ var chart:BufferedImage = getChartImage(false)
+
+ ImageIO.write(chart, ext, outputFile)
+ } else {
+ var chart:BufferedImage = getChartImage(true)
+
+ ImageIO.write(chart, ext, outputFile)
+ }
+ }
+
+}
+
+
diff --git a/src/main/scala/BIDMat/SDMat.scala b/src/main/scala/BIDMat/SDMat.scala
new file mode 100755
index 00000000..d6cdcb62
--- /dev/null
+++ b/src/main/scala/BIDMat/SDMat.scala
@@ -0,0 +1,225 @@
+package BIDMat
+
+import edu.berkeley.bid.SPBLAS._
+
+case class SDMat(nr:Int, nc:Int, nnz1:Int, ir0:Array[Int], jc0:Array[Int], data0:Array[Double]) extends SparseMat[Double](nr, nc, nnz1, ir0, jc0, data0) {
+
+ def getdata() = data;
+
+ override def t:SDMat = SDMat(gt)
+
+ override def mytype = "SDMat"
+
+ def horzcat(b: SDMat) = SDMat(super.horzcat(b))
+
+ def vertcat(b: SDMat) = SDMat(super.vertcat(b))
+
+ def find:IMat = IMat(gfind)
+
+ def find2:(IMat, IMat) = { val (ii, jj) = gfind2 ; (IMat(ii), IMat(jj)) }
+
+ def find3:(IMat, IMat, DMat) = { val (ii, jj, vv) = gfind3 ; (IMat(ii), IMat(jj), DMat(vv)) }
+
+ override def apply(a:IMat, b:IMat):SDMat = SDMat(gapply(a, b))
+
+ def ssMatOp(b: SDMat, f:(Double, Double) => Double, omat:Mat) = SDMat(sgMatOp(b, f, omat))
+
+ def ssMatOpScalar(b: Double, f:(Double, Double) => Double, omat:Mat) = SDMat(sgMatOpScalar(b, f, omat))
+
+ def ssReduceOp(n:Int, f1:(Double) => Double, f2:(Double, Double) => Double, omat:Mat) = DMat(sgReduceOp(n, f1, f2, omat))
+
+ def horzcat(a:DMat):DMat = MatFunctions.full(this).horzcat(a)
+
+ def vertcat(a:DMat):DMat = MatFunctions.full(this).vertcat(a)
+
+ def SMult(a:Mat, omat:DMat):DMat = {
+ val ioff = Mat.ioneBased
+ if (ncols != a.nrows) {
+ throw new RuntimeException("dimensions mismatch")
+ } else {
+ a match {
+ case aa:SDMat => {
+ val out = DMat.newOrCheckDMat(nrows, a.ncols, omat)
+ if (omat.asInstanceOf[AnyRef] != null) out.clear
+ var i = 0
+ while (i < a.ncols) {
+ var j =aa.jc(i)-ioff
+ while (j < aa.jc(i+1)-ioff) {
+ val dval = aa.data(j)
+ var k = jc(aa.ir(j)-ioff)-ioff
+ while (k < jc(aa.ir(j)+1-ioff)-ioff) {
+ out.data(ir(k)-ioff+nrows*i) += data(k) * dval
+ k += 1
+ }
+ j += 1
+ }
+ i += 1
+ }
+ out
+ }
+ case dd:DMat => {
+ val out = DMat.newOrCheckDMat(nrows, a.ncols, omat)
+ if (omat.asInstanceOf[AnyRef] != null) out.clear
+ Mat.nflops += 2L * nnz * a.ncols
+ if (Mat.noMKL) {
+ var i = 0
+ while (i < dd.ncols) {
+ var j = 0
+ while (j < ncols) {
+ val dval = dd.data(j + i*dd.nrows)
+ var k = jc(j)-ioff
+ while (k < jc(j+1)-ioff) {
+ out.data(ir(k)-ioff + i*nrows) += dval * data(k);
+ k += 1
+ }
+ j += 1
+ }
+ i += 1
+ }
+ } else {
+ val nc = dd.ncols
+ var jc0 = jc
+ var ir0 = ir
+ if (ioff == 0) {
+ jc0 = SparseMat.incInds(jc)
+ ir0 = SparseMat.incInds(ir)
+ }
+ // if (dd.ncols == 1) {
+ // Seg faults in Linux and Windows:
+ // dcscmv("N", nrows, ncols, 1.0, "GLNF", data, ir, jc, dd.data, 0.0, out.data)
+ // } else {
+ dcscmm("N", nrows, nc, ncols, 1.0, "GLNF", data, ir0, jc0, dd.data, ncols, 0.0, out.data, nr)
+ // }
+ }
+ out
+ }
+ case _ => throw new RuntimeException("unsupported arg")
+ }
+ }
+ }
+
+ def Tmult(a:DMat, omat:DMat):DMat = {
+ val out = DMat.newOrCheckDMat(ncols, a.ncols, omat)
+ if (omat.asInstanceOf[AnyRef] != null) out.clear
+ var jc0 = jc
+ var ir0 = ir
+ if (Mat.ioneBased == 0) {
+ jc0 = SparseMat.incInds(jc)
+ ir0 = SparseMat.incInds(ir)
+ }
+ dcscmm("T", nrows, a.ncols, ncols, 1.0f, "GLNF", data, ir0, jc0, a.data, a.nrows, 0f, out.data, out.nrows)
+ Mat.nflops += 2L * nnz * a.ncols
+ out
+ }
+
+ def SSMult(a:SDMat):SDMat =
+ if (ncols != a.nrows) {
+ throw new RuntimeException("dimensions mismatch")
+ } else {
+ val ioff = Mat.ioneBased
+ var numnz = 0
+ var i = 0
+ while (i < a.ncols) {
+ var j = a.jc(i)-ioff
+ while (j < a.jc(i+1)-ioff) {
+ numnz += jc(a.ir(j)-ioff+1) - jc(a.ir(j)-ioff)
+ j += 1
+ }
+ i += 1
+ }
+ val ii = new Array[Int](numnz)
+ val jj = new Array[Int](numnz)
+ val vv = new Array[Double](numnz)
+ numnz = 0
+ i = 0
+ while (i < a.ncols) {
+ var j = a.jc(i)-ioff
+ while (j < a.jc(i+1)-ioff) {
+ val dval = a.data(j)
+ var k = jc(a.ir(j)-ioff)-ioff
+ while (k < jc(a.ir(j)-ioff+1)-ioff) {
+ vv(numnz) = data(k) * dval
+ ii(numnz) = ir(k)-ioff
+ jj(numnz) = i
+ numnz += 1
+ k += 1
+ }
+ j += 1
+ }
+ i += 1
+ }
+ SDMat(SparseMat.sparseImpl[Double](ii, jj, vv, nrows, a.ncols))
+ }
+
+ def + (b : SDMat) = ssMatOp(b, (x:Double, y:Double) => x + y, null)
+ def - (b : SDMat) = ssMatOp(b, (x:Double, y:Double) => x - y, null)
+ def * (b : DMat):DMat = SMult(b, null)
+ def Tx (b : DMat):DMat = Tmult(b, null)
+ override def * (b : Mat):DMat = SMult(b, null)
+ def *! (b : SDMat) = SSMult(b)
+ def *@ (b : SDMat) = ssMatOp(b, (x:Double, y:Double) => x * y, null)
+ def /@ (b : SDMat) = ssMatOp(b, (x:Double, y:Double) => x / y, null)
+
+ def > (b : SDMat) = ssMatOp(b, (x:Double, y:Double) => if (x > y) 1.0 else 0.0, null)
+ def < (b : SDMat) = ssMatOp(b, (x:Double, y:Double) => if (x < y) 1.0 else 0.0, null)
+ def == (b : SDMat) = ssMatOp(b, (x:Double, y:Double) => if (x == y) 1.0 else 0.0, null)
+ def === (b : SDMat) = ssMatOp(b, (x:Double, y:Double) => if (x == y) 1.0 else 0.0, null)
+ def >= (b : SDMat) = ssMatOp(b, (x:Double, y:Double) => if (x >= y) 1.0 else 0.0, null)
+ def <= (b : SDMat) = ssMatOp(b, (x:Double, y:Double) => if (x <= y) 1.0 else 0.0, null)
+ def != (b : SDMat) = ssMatOp(b, (x:Double, y:Double) => if (x != y) 1.0 else 0.0, null)
+
+ override def + (b : Double) = ssMatOpScalar(b, (x:Double, y:Double) => x + y, null)
+ override def - (b : Double) = ssMatOpScalar(b, (x:Double, y:Double) => x - y, null)
+ override def *@ (b : Double) = ssMatOpScalar(b, (x:Double, y:Double) => x * y, null)
+ override def /@ (b : Double) = ssMatOpScalar(b, (x:Double, y:Double) => x / y, null)
+
+ override def > (b : Double) = ssMatOpScalar(b, (x:Double, y:Double) => if (x > y) 1.0 else 0.0, null)
+ override def < (b : Double) = ssMatOpScalar(b, (x:Double, y:Double) => if (x < y) 1.0 else 0.0, null)
+ override def == (b : Double) = ssMatOpScalar(b, (x:Double, y:Double) => if (x == y) 1.0 else 0.0, null)
+ override def >= (b : Double) = ssMatOpScalar(b, (x:Double, y:Double) => if (x >= y) 1.0 else 0.0, null)
+ override def <= (b : Double) = ssMatOpScalar(b, (x:Double, y:Double) => if (x <= y) 1.0 else 0.0, null)
+ override def != (b : Double) = ssMatOpScalar(b, (x:Double, y:Double) => if (x != y) 1.0 else 0.0, null)
+
+ def \ (b: SDMat) = horzcat(b)
+ def on (b: SDMat) = vertcat(b)
+
+ def toSMat:SMat = {
+ val out = SMat(nrows, ncols, nnz)
+ System.arraycopy(jc, 0, out.jc, 0, ncols+1)
+ System.arraycopy(ir, 0, out.ir, 0, nnz)
+ Mat.copyToFloatArray(data, 0, out.data, 0, nnz)
+ out
+ }
+
+ override def zeros(nr:Int, nc:Int, nnz:Int) = SDMat(nr, nc, nnz)
+
+ override def recycle(nr:Int, nc:Int, nnz:Int):SDMat = {
+ val jc0 = if (jc.size >= nc+1) jc else new Array[Int](nc+1)
+ val ir0 = if (ir.size >= nnz) ir else new Array[Int](nnz)
+ val data0 = if (data.size >= nnz) data else new Array[Double](nnz)
+ new SDMat(nr, nc, nnz, jc0, ir0, data0)
+ }
+}
+
+class SDPair (val omat:DMat, val mat:SDMat) extends Pair{
+ def * (b : DMat):DMat = mat.SMult(b, omat)
+ def Tx (b : DMat):DMat = mat.Tmult(b, omat)
+ override def * (b : Mat):DMat = mat.SMult(b, omat)
+}
+
+object SDMat {
+
+ def apply(nr:Int, nc:Int, nnz0:Int):SDMat = new SDMat(nr, nc, nnz0, new Array[Int](nnz0), new Array[Int](nc+1), new Array[Double](nnz0))
+
+ def apply(a:SparseMat[Double]):SDMat = new SDMat(a.nrows, a.ncols, a.nnz, a.ir, a.jc, a.data)
+
+ def apply(a:SMat) = a.toSDMat
+
+ def SDnoRows(nr:Int, nc:Int, nnz0:Int):SDMat = new SDMat(nr, nc, nnz0, null, new Array[Int](nc+1), new Array[Double](nnz0))
+}
+
+
+
+
+
+
diff --git a/src/main/scala/BIDMat/SMat.scala b/src/main/scala/BIDMat/SMat.scala
new file mode 100755
index 00000000..8ed7164f
--- /dev/null
+++ b/src/main/scala/BIDMat/SMat.scala
@@ -0,0 +1,269 @@
+package BIDMat
+
+import edu.berkeley.bid.SPBLAS._
+
+case class SMat(nr:Int, nc:Int, nnz1:Int, ir0:Array[Int], jc0:Array[Int], data0:Array[Float]) extends SparseMat[Float](nr, nc, nnz1, ir0, jc0, data0) {
+
+ def getdata() = data;
+
+ override def t:SMat = SMat(gt)
+
+ override def mytype = "SMat"
+
+ def horzcat(b: SMat) = SMat(super.horzcat(b))
+
+ def vertcat(b: SMat) = SMat(super.vertcat(b))
+
+ def find:IMat = IMat(gfind)
+
+ def find2:(IMat, IMat) = { val (ii, jj) = gfind2 ; (IMat(ii), IMat(jj)) }
+
+ def find3:(IMat, IMat, FMat) = { val (ii, jj, vv) = gfind3 ; (IMat(ii), IMat(jj), FMat(vv)) }
+
+ override def contents:FMat = FMat(nnz, 1, data)
+
+ override def apply(a:IMat, b:IMat):SMat = SMat(gapply(a, b))
+
+ def ssMatOp(b: SMat, f:(Float, Float) => Float, omat:Mat) = SMat(sgMatOp(b, f, omat))
+
+ def ssMatOpScalar(b: Float, f:(Float, Float) => Float, omat:Mat) = SMat(sgMatOpScalar(b, f, omat))
+
+ def ssReduceOp(n:Int, f1:(Float) => Float, f2:(Float, Float) => Float, omat:Mat) = FMat(sgReduceOp(n, f1, f2, omat))
+
+ def horzcat(a:FMat):FMat = FMat(MatFunctions.full(this).ghorzcat(a))
+
+ def vertcat(a:FMat):FMat = FMat(MatFunctions.full(this).gvertcat(a))
+
+ def SMult(a:Mat, omat:Mat):FMat = {
+ val ioff = Mat.ioneBased
+ if (ncols != a.nrows) {
+ throw new RuntimeException("dimensions mismatch")
+ } else {
+ a match {
+ case aa:SMat => {
+ val out = FMat.newOrCheckFMat(nrows, a.ncols, omat)
+ if (omat.asInstanceOf[AnyRef] != null) out.clear
+ var i = 0
+ while (i < a.ncols) {
+ var j =aa.jc(i)-ioff
+ while (j < aa.jc(i+1)-ioff) {
+ val dval = aa.data(j)
+ var k = jc(aa.ir(j)-ioff)-ioff
+ while (k < jc(aa.ir(j)+1-ioff)-ioff) {
+ out.data(ir(k)-ioff+nrows*i) += data(k) * dval
+ k += 1
+ }
+ j += 1
+ }
+ i += 1
+ }
+ out
+ }
+ case dd:FMat => {
+ val out = FMat.newOrCheckFMat(nrows, a.ncols, omat)
+ if (omat.asInstanceOf[AnyRef] != null) out.clear
+ Mat.nflops += 2L * nnz * a.ncols
+ if (Mat.noMKL) {
+ var i = 0
+ while (i < dd.ncols) {
+ var j = 0
+ while (j < ncols) {
+ val dval = dd.data(j + i*dd.nrows)
+ var k = jc(j)-ioff
+ while (k < jc(j+1)-ioff) {
+ out.data(ir(k)-ioff + i*nrows) += dval * data(k);
+ k += 1
+ }
+ j += 1
+ }
+ i += 1
+ }
+ } else {
+ val nc = dd.ncols
+ var jc0 = jc
+ var ir0 = ir
+ if (ioff == 0) {
+ jc0 = SparseMat.incInds(jc)
+ ir0 = SparseMat.incInds(ir)
+ }
+ // if (dd.ncols == 1) {
+ // Seg faults in linux and windows
+ // scscmv("N", nrows, ncols, 1.0f, "GLNF", data, ir, jc, dd.data, 0f, out.data)
+ // } else {
+ scscmm("N", nrows, nc, ncols, 1.0f, "GLNF", data, ir0, jc0, dd.data, ncols, 0f, out.data, out.nrows)
+ // }
+ }
+ out
+ }
+ case _ => throw new RuntimeException("unsupported arg")
+ }
+ }
+ }
+
+ def Tmult(a:FMat, omat:Mat):FMat = {
+ val out = FMat.newOrCheckFMat(ncols, a.ncols, omat)
+ if (omat.asInstanceOf[AnyRef] != null) out.clear
+ var jc0 = jc
+ var ir0 = ir
+ if (Mat.ioneBased == 0) {
+ jc0 = SparseMat.incInds(jc)
+ ir0 = SparseMat.incInds(ir)
+ }
+ scscmm("T", nrows, a.ncols, ncols, 1.0f, "GLNF", data, ir0, jc0, a.data, a.nrows, 0f, out.data, out.nrows)
+ Mat.nflops += 2L * nnz * a.ncols
+ out
+ }
+
+ def SSMult(a:SMat):SMat =
+ if (ncols != a.nrows) {
+ throw new RuntimeException("dimensions mismatch")
+ } else {
+ val ioff = Mat.ioneBased
+ var numnz = 0
+ var i = 0
+ while (i < a.ncols) {
+ var j = a.jc(i)-ioff
+ while (j < a.jc(i+1)-ioff) {
+ numnz += jc(a.ir(j)-ioff+1) - jc(a.ir(j)-ioff)
+ j += 1
+ }
+ i += 1
+ }
+ val ii = new Array[Int](numnz)
+ val jj = new Array[Int](numnz)
+ val vv = new Array[Float](numnz)
+ numnz = 0
+ i = 0
+ while (i < a.ncols) {
+ var j = a.jc(i)-ioff
+ while (j < a.jc(i+1)-ioff) {
+ val dval = a.data(j)
+ var k = jc(a.ir(j)-ioff)-ioff
+ while (k < jc(a.ir(j)-ioff+1)-ioff) {
+ vv(numnz) = data(k) * dval
+ ii(numnz) = ir(k)-ioff
+ jj(numnz) = i
+ numnz += 1
+ k += 1
+ }
+ j += 1
+ }
+ i += 1
+ }
+ SMat(SparseMat.sparseImpl[Float](ii, jj, vv, nrows, a.ncols))
+ }
+
+ def + (b : SMat) = ssMatOp(b, (x:Float, y:Float) => x + y, null)
+ def - (b : SMat) = ssMatOp(b, (x:Float, y:Float) => x - y, null)
+ def * (b : FMat):FMat = SMult(b, null)
+ def Tx (b : FMat):FMat = Tmult(b, null)
+ def *! (b : SMat) = SSMult(b)
+ def *@ (b : SMat) = ssMatOp(b, (x:Float, y:Float) => x * y, null)
+ def /@ (b : SMat) = ssMatOp(b, (x:Float, y:Float) => x / y, null)
+
+ def > (b : SMat) = ssMatOp(b, (x:Float, y:Float) => if (x > y) 1.0f else 0f, null)
+ def < (b : SMat) = ssMatOp(b, (x:Float, y:Float) => if (x < y) 1.0f else 0f, null)
+ def == (b : SMat) = ssMatOp(b, (x:Float, y:Float) => if (x == y) 1.0f else 0f, null)
+ def === (b : SMat) = ssMatOp(b, (x:Float, y:Float) => if (x == y) 1.0f else 0f, null)
+ def >= (b : SMat) = ssMatOp(b, (x:Float, y:Float) => if (x >= y) 1.0f else 0f, null)
+ def <= (b : SMat) = ssMatOp(b, (x:Float, y:Float) => if (x <= y) 1.0f else 0f, null)
+ def != (b : SMat) = ssMatOp(b, (x:Float, y:Float) => if (x != y) 1.0f else 0f, null)
+
+ override def + (b : Float) = ssMatOpScalar(b, (x:Float, y:Float) => x + y, null)
+ override def - (b : Float) = ssMatOpScalar(b, (x:Float, y:Float) => x - y, null)
+ override def *@ (b : Float) = ssMatOpScalar(b, (x:Float, y:Float) => x * y, null)
+ override def /@ (b : Float) = ssMatOpScalar(b, (x:Float, y:Float) => x / y, null)
+
+ override def > (b : Float) = ssMatOpScalar(b, (x:Float, y:Float) => if (x > y) 1.0f else 0f, null)
+ override def < (b : Float) = ssMatOpScalar(b, (x:Float, y:Float) => if (x < y) 1.0f else 0f, null)
+ override def == (b : Float) = ssMatOpScalar(b, (x:Float, y:Float) => if (x == y) 1.0f else 0f, null)
+ override def >= (b : Float) = ssMatOpScalar(b, (x:Float, y:Float) => if (x >= y) 1.0f else 0f, null)
+ override def <= (b : Float) = ssMatOpScalar(b, (x:Float, y:Float) => if (x <= y) 1.0f else 0f, null)
+ override def != (b : Float) = ssMatOpScalar(b, (x:Float, y:Float) => if (x != y) 1.0f else 0f, null)
+
+ override def * (b : Mat):FMat = SMult(b, null)
+ override def Tx (b : Mat):Mat = b match {case bb:FMat => Tmult(bb, null)}
+
+ def \ (b: SMat) = horzcat(b)
+ def on (b: SMat) = vertcat(b)
+
+ def ~ (b : SMat):SPair = new SPair(this, b)
+
+ override def ~ (b: Mat):Pair =
+ b match {
+ case sb:SMat => new SPair(this, sb)
+ case _ => throw new RuntimeException("mismatched types for operator ~")
+ }
+
+ def toSDMat:SDMat = {
+ val out = SDMat(nrows, ncols, nnz)
+ System.arraycopy(jc, 0, out.jc, 0, ncols+1)
+ System.arraycopy(ir, 0, out.ir, 0, nnz)
+ Mat.copyToDoubleArray(data, 0, out.data, 0, nnz)
+ out
+ }
+
+ override def zeros(nr:Int, nc:Int, nnz:Int) = SMat(nr, nc, nnz)
+
+ override def recycle(nr:Int, nc:Int, nnz:Int):SMat = {
+ val jc0 = if (jc.size >= nc+1) jc else new Array[Int](nc+1)
+ val ir0 = if (ir.size >= nnz) ir else new Array[Int](nnz)
+ val data0 = if (data.size >= nnz) data else new Array[Float](nnz)
+ new SMat(nr, nc, nnz, ir0, jc0, data0)
+ }
+}
+
+class SPair (val omat:Mat, val mat:SMat) extends Pair{
+ def * (b : FMat):FMat = mat.SMult(b, omat)
+ def Tx (b : FMat):FMat = mat.Tmult(b, omat)
+ override def * (b : Mat):FMat = mat.SMult(b, omat)
+ override def Tx (b : Mat):Mat = b match {case bb:FMat => mat.Tmult(bb, omat)}
+
+ def + (b : SMat) = mat.ssMatOp(b, (x:Float, y:Float) => x + y, omat)
+ def - (b : SMat) = mat.ssMatOp(b, (x:Float, y:Float) => x - y, omat)
+ def *@ (b : SMat) = mat.ssMatOp(b, (x:Float, y:Float) => x * y, omat)
+ def /@ (b : SMat) = mat.ssMatOp(b, (x:Float, y:Float) => x / y, omat)
+
+ import Operator._
+ override def + (b : Mat):Mat = applyMat(mat, b, omat, Mop_Plus)
+ override def - (b : Mat):Mat = applyMat(mat, b, omat, Mop_Minus)
+ override def *@ (b : Mat):Mat = applyMat(mat, b, omat, Mop_ETimes)
+ override def /@ (b : Mat):Mat = applyMat(mat, b, omat, Mop_EDiv)
+}
+
+object SMat {
+
+ def apply(nr:Int, nc:Int, nnz0:Int):SMat = new SMat(nr, nc, nnz0, new Array[Int](nnz0), new Array[Int](nc+1), new Array[Float](nnz0))
+
+ def apply(a:SparseMat[Float]):SMat = new SMat(a.nrows, a.ncols, a.nnz, a.ir, a.jc, a.data)
+
+ def apply(a:SDMat) = a.toSMat
+
+ def apply(a:Mat) = a match {
+ case aa:SMat => aa
+ case aa:GSMat => aa.toSMat
+ case aa:SDMat => aa.toSMat
+ }
+
+ def SnoRows(nr:Int, nc:Int, nnz0:Int):SMat = new SMat(nr, nc, nnz0, null, new Array[Int](nc+1), new Array[Float](nnz0))
+
+ def newOrCheckSMat(mat:SMat, oldmat:Mat):SMat = {
+ if (oldmat.asInstanceOf[AnyRef] == null || (oldmat.nrows == 0 && oldmat.ncols == 0)) {
+ SMat(mat.nrows, mat.ncols, mat.nnz)
+ } else {
+ oldmat match {
+ case omat:SMat => if (oldmat.nrows == mat.nrows && oldmat.ncols == mat.ncols && oldmat.nnz == mat.nnz) {
+ omat
+ } else {
+ omat.recycle(mat.nrows, mat.ncols, mat.nnz)
+ }
+ }
+ }
+ }
+}
+
+
+
+
+
+
diff --git a/src/main/scala/BIDMat/SciFunctions.scala b/src/main/scala/BIDMat/SciFunctions.scala
new file mode 100755
index 00000000..bbcf73fb
--- /dev/null
+++ b/src/main/scala/BIDMat/SciFunctions.scala
@@ -0,0 +1,1556 @@
+package BIDMat
+
+import edu.berkeley.bid.VML._
+import edu.berkeley.bid.VSL
+import edu.berkeley.bid.VSL._
+import edu.berkeley.bid.CBLAS._
+import jcuda._;
+import jcuda.jcublas.JCublas;
+import jcuda.runtime.JCuda;
+import jcuda.jcurand.JCurand._;
+import jcuda.jcurand.curandGenerator;
+import jcuda.jcurand.curandRngType._;
+import edu.berkeley.bid.CUMAT;
+import java.util.Random._;
+import MatFunctions._
+
+object SciFunctions {
+ final val SEED:Int = 1452462553
+ // Java initialization
+ final val myrand = new java.util.Random(SEED)
+ // VSL random number generator initialization
+ final val BRNG:Int = BRNG_MCG31
+ final val METHOD:Int = 0
+ final val stream = new VSL();
+ final val errcode = vslNewStream(stream, BRNG, SEED)
+ // VML mode control, controlled with setVMLmode()
+ final val VMLdefault = VMLMODE.VML_ERRMODE_DEFAULT | VMLMODE.VML_HA // Default
+ final val VMLfast = VMLMODE.VML_ERRMODE_DEFAULT | VMLMODE.VML_LA // Faster, Low accuracy, default error handling
+ final val VMLturbo = VMLMODE.VML_ERRMODE_DEFAULT | VMLMODE.VML_EP // Fastest, Lower accuracy, default error handling
+ // Curand initialization
+ var cudarng:curandGenerator = null
+ if (Mat.hasCUDA > 0) {
+ jcuda.runtime.JCuda.initialize
+ cudarng = new curandGenerator
+ curandCreateGenerator(cudarng, CURAND_RNG_PSEUDO_DEFAULT)
+ curandSetPseudoRandomGeneratorSeed(cudarng, SEED)
+ }
+
+ def resetCUDA = JCuda.cudaDeviceReset
+
+ def device(i:Int) = JCuda.cudaSetDevice(i)
+
+ def device:Int = {
+ val ar = Array[Int](1)
+ JCuda.cudaGetDevice(ar)
+ ar(0)
+ }
+
+ def connect(i:Int) = {
+ val v0 = JCuda.cudaDeviceEnablePeerAccess(i,0)
+ val j = device
+ device(i)
+ val v1 = JCuda.cudaDeviceEnablePeerAccess(j,0)
+ device(j)
+ (v0, v1)
+ }
+
+ def disconnect(i:Int) = {
+ val v0 = JCuda.cudaDeviceDisablePeerAccess(i)
+ val j = device
+ device(i)
+ val v1 = JCuda.cudaDeviceDisablePeerAccess(j)
+ device(j)
+ (v0, v1)
+ }
+
+ def canconnect(i:Int) = {
+ val ar = Array[Int](1)
+ val j = device
+ JCuda.cudaDeviceCanAccessPeer(ar, i, j)
+ val v0 = ar(0)
+ JCuda.cudaDeviceCanAccessPeer(ar, j, i)
+ (v0, ar(0))
+ }
+
+ def norm(a:FMat) = math.sqrt(sdot(a.length, a.data, 1, a.data, 1)).asInstanceOf[Float]
+
+ def norm(a:DMat) = math.sqrt(ddot(a.length, a.data, 1, a.data, 1))
+
+ def norm(a:GMat) = math.sqrt(JCublas.cublasSdot(a.length, a.data, 1, a.data, 1))
+
+ def norm (a:Mat):Double = {
+ a match {
+ case aa:FMat => norm(aa)
+ case aa:DMat => norm(aa)
+ case aa:GMat => norm(aa)
+ }
+ }
+
+
+ def drand(minv:Double, maxv:Double, out:DMat):DMat = {
+ if (Mat.noMKL) {
+ var i = 0; val len = out.length; val odata = out.data;
+ while (i < len) {odata(i) = myrand.nextDouble; i += 1}
+ } else {
+ vdRngUniform( METHOD, stream, out.length, out.data, minv, maxv )
+ }
+ Mat.nflops += 10L*out.nrows*out.ncols
+ out
+ }
+
+ def drand(m:Int, n:Int, minv:Double, maxv:Double):DMat = drand(minv, maxv, DMat(m, n))
+
+ def drand(m:Int, n:Int):DMat = drand(m, n, 0, 1)
+
+ def drand(out:DMat):DMat = drand(0.0, 1.0, out)
+
+ def rand(minv:Float, maxv:Float, out:FMat):FMat = {
+ if (Mat.noMKL) {
+ var i = 0; val len = out.length; val odata = out.data;
+ while (i < len) {odata(i) = myrand.nextFloat; i += 1}
+ } else {
+ vsRngUniform( METHOD, stream, out.length, out.data, minv, maxv )
+ }
+ Mat.nflops += 10L*out.nrows*out.ncols
+ out
+ }
+
+ def rand(m:Int, n:Int, minv:Float, maxv:Float):FMat = rand(minv, maxv, FMat(m, n))
+
+ def rand(m:Int, n:Int):FMat = rand(m, n, 0, 1)
+
+ def rand(out:FMat):FMat = rand(0.0f, 1.0f, out)
+
+ def grand(out:GMat, nr:Int, nc:Int):GMat = {
+ Mat.nflops += 10L*out.length
+ curandGenerateUniform(cudarng, out.data, out.length)
+ JCuda.cudaDeviceSynchronize()
+ out
+ }
+
+ def grand(out:GMat):GMat = grand(out, out.nrows, out.ncols)
+
+ def grand(nr:Int, nc:Int):GMat = {
+ val out = GMat(nr, nc)
+ grand(out)
+ }
+
+ def normrnd(mu:Float, sig:Float, out:FMat):FMat = {
+ if (Mat.noMKL) {
+ var i = 0; val len = out.length; val odata = out.data;
+ while (i < len) {odata(i) = mu + sig*myrand.nextGaussian.asInstanceOf[Float]; i += 1}
+ } else {
+ vsRngGaussian(METHOD, stream, out.length, out.data, mu, sig )
+ }
+ Mat.nflops += 10L*out.length
+ out
+ }
+
+ def normrnd(mu:Float, sig:Float, m:Int, n:Int):FMat = {
+ normrnd(mu, sig, FMat(m, n))
+ }
+
+ def cnormrnd(mu:Float, sig:Float, out:CMat):CMat = {
+ if (Mat.noMKL) {
+ var i = 0; val len = out.length; val odata = out.data;
+ while (i < 2*len) {odata(i) = mu + sig*myrand.nextGaussian.asInstanceOf[Float]; i += 1}
+ } else {
+ vsRngGaussian(METHOD, stream, 2*out.length, out.data, mu, sig )
+ }
+ Mat.nflops += 10L*out.length
+ out
+ }
+
+ def cnormrnd(mu:Float, sig:Float, m:Int, n:Int):CMat = {
+ cnormrnd(mu, sig, CMat(m, n))
+ }
+
+ def gnormrnd(mu:Float, sig:Float, out:GMat, nr:Int, nc:Int):GMat = {
+ Mat.nflops += 10L*out.length
+ curandGenerateNormal(cudarng, out.data, out.length, mu, sig)
+ JCuda.cudaDeviceSynchronize()
+ out
+ }
+
+ def gnormrnd(mu:Float, sig:Float, out:GMat):GMat = gnormrnd(mu, sig, out, out.nrows, out.ncols)
+
+ def gnormrnd(mu:Float, sig:Float, nr:Int, nc:Int):GMat = {
+ val out = GMat(nr, nc)
+ gnormrnd(mu, sig, out)
+ }
+
+ def gamrnd(shape:Float, scale:Float, out:FMat):FMat = {
+ vsRngGamma( METHOD, stream, out.length, out.data, shape, 0, scale )
+ Mat.nflops += 20L*out.length
+ out
+ }
+
+ def gamrnd(shape:Float, scale:Float, m:Int, n:Int):FMat = {
+ gamrnd(shape, scale, FMat(m, n))
+ }
+
+ def laprnd(a:Float, b:Float, out:FMat):FMat = {
+ vsRngLaplace( METHOD, stream, out.length, out.data, a, b )
+ Mat.nflops += 20L*out.length
+ out
+ }
+
+ def laprnd(a:Float, b:Float, m:Int, n:Int):FMat = {
+ laprnd(a, b, FMat(m, n))
+ }
+
+ def cauchyrnd(a:Float, b:Float, out:FMat):FMat = {
+ vsRngCauchy( METHOD, stream, out.length, out.data, a, b )
+ Mat.nflops += 20L*out.length
+ out
+ }
+
+ def cauchyrnd(a:Float, b:Float, m:Int, n:Int):FMat = {
+ cauchyrnd(a, b, FMat(m, n))
+ }
+
+ def exprnd(a:Float, b:Float, out:FMat):FMat = {
+ vsRngExponential( METHOD, stream, out.length, out.data, a, b )
+ Mat.nflops += 20L*out.length
+ out
+ }
+
+ def exprnd(a:Float, m:Int, n:Int):FMat = {
+ exprnd(a, 1, FMat(m, n))
+ }
+
+ def exprnd(a:Float, b:Float, m:Int, n:Int):FMat = {
+ exprnd(a, b, FMat(m, n))
+ }
+
+ def exprnd(a:Float, out:FMat):FMat = {
+ exprnd(a, 1, out)
+ }
+
+ def betarnd(p:Float, q:Float, out:FMat):FMat = {
+ vsRngBeta( METHOD, stream, out.length, out.data, p, q, 0, 1 )
+ Mat.nflops += 20L*out.length
+ out
+ }
+
+ def betarnd(p:Float, q:Float, m:Int, n:Int):FMat = {
+ betarnd(p, q, FMat(m, n))
+ }
+
+ def poissrnd(lambda:FMat, out:IMat):IMat = {
+ checkSizes(lambda, out)
+ viRngPoissonV( METHOD, stream, out.length, out.data, DMat(lambda).data )
+ Mat.nflops += 20L*out.length
+ out
+ }
+
+ def poissrnd(lambda:FMat):IMat = {
+ poissrnd(lambda, IMat(lambda.nrows, lambda.ncols))
+ }
+
+ def dnormrnd(mu:Double, sig:Double, out:DMat):DMat = {
+ if (Mat.noMKL) {
+ var i = 0; val len = out.length; val odata = out.data;
+ while (i < len) {odata(i) = mu + sig*myrand.nextGaussian; i += 1}
+ } else {
+ vdRngGaussian( METHOD, stream, out.length, out.data, mu, sig )
+ }
+ Mat.nflops += 10L*out.length
+ out
+ }
+
+ def dnormrnd(mu:Double, sig:Double, m:Int, n:Int):DMat = {
+ dnormrnd(mu, sig, DMat(m, n))
+ }
+
+ def dgamrnd(shape:Double, scale:Double, out:DMat):DMat = {
+ vdRngGamma( METHOD, stream, out.length, out.data, shape, 0, scale )
+ Mat.nflops += 20L*out.length
+ out
+ }
+
+ def dgamrnd(shape:Double, scale:Double, m:Int, n:Int):DMat = {
+ dgamrnd(shape, scale, DMat(m, n))
+ }
+
+ def dlaprnd(a:Double, b:Double, out:DMat):DMat = {
+ vdRngLaplace( METHOD, stream, out.length, out.data, a, b )
+ Mat.nflops += 20L*out.length
+ out
+ }
+
+ def dlaprnd(a:Double, b:Double, m:Int, n:Int):DMat = {
+ dlaprnd(a, b, DMat(m, n))
+ }
+
+ def dcauchyrnd(a:Double, b:Double, out:DMat):DMat = {
+ vdRngCauchy( METHOD, stream, out.length, out.data, a, b )
+ Mat.nflops += 20L*out.length
+ out
+ }
+
+ def dcauchyrnd(a:Double, b:Double, m:Int, n:Int):DMat = {
+ dcauchyrnd(a, b, DMat(m, n))
+ }
+
+ def dexprnd(a:Double, b:Double, out:DMat):DMat = {
+ vdRngExponential( METHOD, stream, out.length, out.data, a, b )
+ Mat.nflops += 20L*out.length
+ out
+ }
+
+ def dexprnd(a:Double, m:Int, n:Int):DMat = {
+ dexprnd(a, 1, DMat(m, n))
+ }
+
+ def dexprnd(a:Double, b:Double, m:Int, n:Int):DMat = {
+ dexprnd(a, b, DMat(m, n))
+ }
+
+ def dexprnd(a:Double, out:DMat):DMat = {
+ dexprnd(a, 1, out)
+ }
+
+ def dbetarnd(p:Double, q:Double, out:DMat):DMat = {
+ vdRngBeta( METHOD, stream, out.length, out.data, p, q, 0, 1 )
+ Mat.nflops += 20L*out.length
+ out
+ }
+
+ def dbetarnd(p:Double, q:Double, m:Int, n:Int):DMat = {
+ dbetarnd(p, q, DMat(m, n))
+ }
+
+ def binornd(k:Int, p:Double, out:IMat):IMat = {
+ viRngBinomial( METHOD, stream, out.length, out.data, k, p )
+ Mat.nflops += 20L*out.length
+ out
+ }
+
+ def binornd(k:Int, p:Double, m:Int, n:Int):IMat = {
+ binornd(k, p, IMat(m, n))
+ }
+
+ def bernrnd(p:Double, out:IMat):IMat = {
+ viRngBernoulli( METHOD, stream, out.length, out.data, p )
+ Mat.nflops += 20L*out.length
+ out
+ }
+
+ def bernrnd(p:Double, m:Int, n:Int):IMat = {
+ bernrnd(p, IMat(m, n))
+ }
+
+ def geornd(p:Double, out:IMat):IMat = {
+ viRngGeometric( METHOD, stream, out.length, out.data, p )
+ Mat.nflops += 20L*out.length
+ out
+ }
+
+ def geornd(p:Double, m:Int, n:Int):IMat = {
+ geornd(p, IMat(m, n))
+ }
+
+ def nbinrnd(a:Double, p:Double, out:IMat):IMat = {
+ viRngNegbinomial( METHOD, stream, out.length, out.data, a, p )
+ Mat.nflops += 20L*out.length
+ out
+ }
+
+ def nbinrnd(a:Double, p:Double, m:Int, n:Int):IMat = {
+ nbinrnd(a, p, IMat(m, n))
+ }
+
+ def poissrnd(lambda:Double, out:IMat):IMat = {
+ viRngPoisson( METHOD, stream, out.length, out.data, lambda )
+ Mat.nflops += 20L*out.length
+ out
+ }
+
+ def poissrnd(lambda:Double, m:Int, n:Int):IMat = {
+ poissrnd(lambda, IMat(m, n))
+ }
+
+ def poissrnd(lambda:DMat, out:IMat):IMat = {
+ checkSizes(lambda, out)
+ viRngPoissonV( METHOD, stream, out.length, out.data, lambda.data )
+ Mat.nflops += 20L*out.length
+ out
+ }
+
+ def poissrnd(lambda:DMat):IMat = {
+ poissrnd(lambda, IMat(lambda.nrows, lambda.ncols))
+ }
+
+ def min(a:DMat, b:DMat) = a.ddMatOp(b, (x:Double, y:Double) => math.min(x,y), null)
+ def max(a:DMat, b:DMat) = a.ddMatOp(b, (x:Double, y:Double) => math.max(x,y), null)
+ def sum(a:DMat, n:Int) = a.ddReduceOp(n, (x:Double) => x, (x:Double, y:Double) => x+y, null)
+ def cumsum(a:DMat, n:Int) = a.ddReduceAll(n, (x:Double) => x, (x:Double, y:Double) => x+y, null)
+ def maxi(a:DMat, n:Int) = a.ddReduceOp(n, (x:Double) => x, (x:Double, y:Double) => math.max(x,y), null)
+ def mini(a:DMat, n:Int):DMat = a.ddReduceOp(n, (x:Double) => x, (x:Double, y:Double) => math.min(x,y), null)
+ def sum(a:DMat) = a.ddReduceOp(0, (x:Double) => x, (x:Double, y:Double) => x+y, null)
+ def cumsum(a:DMat) = a.ddReduceAll(0, (x:Double) => x, (x:Double, y:Double) => x+y, null)
+ def maxi(a:DMat) = a.ddReduceOp(0, (x:Double) => x, (x:Double, y:Double) => math.max(x,y), null)
+ def mini(a:DMat):DMat = a.ddReduceOp(0, (x:Double) => x, (x:Double, y:Double) => math.min(x,y), null)
+ def maxi2(a:DMat,d:Int):(DMat,IMat) = {val (m,ii)=a.ggOpt2(d,(x:Double,y:Double)=>(x>y)); (DMat(m), ii)}
+ def mini2(a:DMat,d:Int):(DMat,IMat) = {val (m,ii)=a.ggOpt2(d,(x:Double,y:Double)=>(x(x>y)); (DMat(m), ii)}
+ def mini2(a:DMat):(DMat,IMat) = {val (m,ii)=a.ggOpt2(0,(x:Double,y:Double)=>(x math.min(x,y), out)
+ def max(a:DMat, b:DMat, out:Mat) = a.ddMatOp(b, (x:Double, y:Double) => math.max(x,y), out)
+ def sum(a:DMat, n:Int, out:Mat) = a.ddReduceOp(n, (x:Double) => x, (x:Double, y:Double) => x+y, out)
+ def cumsum(a:DMat, n:Int, out:Mat) = a.ddReduceAll(n, (x:Double) => x, (x:Double, y:Double) => x+y, out)
+ def maxi(a:DMat, n:Int, out:Mat) = a.ddReduceOp(n, (x:Double) => x, (x:Double, y:Double) => math.max(x,y), out)
+ def mini(a:DMat, n:Int, out:Mat):DMat = a.ddReduceOp(n, (x:Double) => x, (x:Double, y:Double) => math.min(x,y), out)
+ def sum(a:DMat, out:Mat) = a.ddReduceOp(0, (x:Double) => x, (x:Double, y:Double) => x+y, out)
+ def cumsum(a:DMat, out:Mat) = a.ddReduceAll(0, (x:Double) => x, (x:Double, y:Double) => x+y, out)
+ def maxi(a:DMat, out:Mat) = a.ddReduceOp(0, (x:Double) => x, (x:Double, y:Double) => math.max(x,y), out)
+ def mini(a:DMat, out:Mat):DMat = a.ddReduceOp(0, (x:Double) => x, (x:Double, y:Double) => math.min(x,y), out)
+
+ def min(a:FMat, b:FMat) = a.ffMatOp(b, (x:Float, y:Float) => math.min(x,y), null)
+ def max(a:FMat, b:FMat) = a.ffMatOp(b, (x:Float, y:Float) => math.max(x,y), null)
+ def sum(a:FMat, n:Int) = a.ffReduceOp(n, (x:Float) => x, (x:Float, y:Float) => x+y, null)
+ def cumsum(a:FMat, n:Int) = a.ffReduceAll(n, (x:Float) => x, (x:Float, y:Float) => x+y, null)
+ def maxi(a:FMat, n:Int) = a.ffReduceOp(n, (x:Float) => x, (x:Float, y:Float) => math.max(x,y), null)
+ def mini(a:FMat, n:Int):FMat = a.ffReduceOp(n, (x:Float) => x, (x:Float, y:Float) => math.min(x,y), null)
+ def sum(a:FMat) = a.ffReduceOp(0, (x:Float) => x, (x:Float, y:Float) => x+y, null)
+ def cumsum(a:FMat) = a.ffReduceAll(0, (x:Float) => x, (x:Float, y:Float) => x+y, null)
+ def maxi(a:FMat) = a.ffReduceOp(0, (x:Float) => x, (x:Float, y:Float) => math.max(x,y), null)
+ def mini(a:FMat):FMat = a.ffReduceOp(0, (x:Float) => x, (x:Float, y:Float) => math.min(x,y), null)
+ def maxi2(a:FMat,d:Int):(FMat,IMat) = {val (m,ii)=a.ggOpt2(d,(x:Float,y:Float)=>(x>y)); (FMat(m), ii)}
+ def mini2(a:FMat,d:Int):(FMat,IMat) = {val (m,ii)=a.ggOpt2(d,(x:Float,y:Float)=>(x(x>y)); (FMat(m), ii)}
+ def mini2(a:FMat):(FMat,IMat) = {val (m,ii)=a.ggOpt2(0,(x:Float,y:Float)=>(x math.min(x,y), out)
+ def max(a:FMat, b:FMat, out:Mat) = a.ffMatOp(b, (x:Float, y:Float) => math.max(x,y), out)
+ def sum(a:FMat, n:Int, out:Mat) = a.ffReduceOp(n, (x:Float) => x, (x:Float, y:Float) => x+y, out)
+ def cumsum(a:FMat, n:Int, out:Mat) = a.ffReduceAll(n, (x:Float) => x, (x:Float, y:Float) => x+y, out)
+ def maxi(a:FMat, n:Int, out:Mat) = a.ffReduceOp(n, (x:Float) => x, (x:Float, y:Float) => math.max(x,y), out)
+ def mini(a:FMat, n:Int, out:Mat):FMat = a.ffReduceOp(n, (x:Float) => x, (x:Float, y:Float) => math.min(x,y), out)
+ def sum(a:FMat, out:Mat) = a.ffReduceOp(0, (x:Float) => x, (x:Float, y:Float) => x+y, out)
+ def cumsum(a:FMat, out:Mat) = a.ffReduceAll(0, (x:Float) => x, (x:Float, y:Float) => x+y, out)
+ def maxi(a:FMat, out:Mat) = a.ffReduceOp(0, (x:Float) => x, (x:Float, y:Float) => math.max(x,y), out)
+ def mini(a:FMat, out:Mat):FMat = a.ffReduceOp(0, (x:Float) => x, (x:Float, y:Float) => math.min(x,y), out)
+
+ def min (a:IMat, b:IMat) = a.iiMatOp(b, (x:Int, y:Int) => math.min(x,y), null)
+ def max (a:IMat, b:IMat) = a.iiMatOp(b, (x:Int, y:Int) => math.max(x,y), null)
+ def sum(a:IMat, n:Int) = a.iiReduceOp(n, (x:Int) => x, (x:Int, y:Int) => x+y, null)
+ def cumsum(a:IMat, n:Int) = a.iiReduceAll(n, (x:Int) => x, (x:Int, y:Int) => x+y, null)
+ def maxi(a:IMat, n:Int) = a.iiReduceOp(n, (x:Int) => x, (x:Int, y:Int) => math.max(x,y), null)
+ def mini(a:IMat, n:Int):IMat = a.iiReduceOp(n, (x:Int) => x, (x:Int, y:Int) => math.min(x,y), null)
+ def sum(a:IMat) = a.iiReduceOp(0, (x:Int) => x, (x:Int, y:Int) => x+y, null)
+ def cumsum(a:IMat) = a.iiReduceAll(0, (x:Int) => x, (x:Int, y:Int) => x+y, null)
+ def maxi(a:IMat) = a.iiReduceOp(0, (x:Int) => x, (x:Int, y:Int) => math.max(x,y), null)
+ def mini(a:IMat):IMat = a.iiReduceOp(0, (x:Int) => x, (x:Int, y:Int) => math.min(x,y), null)
+ def maxi2(a:IMat,d:Int):(IMat,IMat) = {val (m,ii)=a.ggOpt2(d,(x:Int,y:Int)=>(x>y)); (IMat(m), ii)}
+ def mini2(a:IMat,d:Int):(IMat,IMat) = {val (m,ii)=a.ggOpt2(d,(x:Int,y:Int)=>(x(x>y)); (IMat(m), ii)}
+ def mini2(a:IMat):(IMat,IMat) = {val (m,ii)=a.ggOpt2(0,(x:Int,y:Int)=>(x math.min(x,y), out)
+ def max (a:IMat, b:IMat, out:Mat) = a.iiMatOp(b, (x:Int, y:Int) => math.max(x,y), out)
+ def sum(a:IMat, n:Int, out:Mat) = a.iiReduceOp(n, (x:Int) => x, (x:Int, y:Int) => x+y, out)
+ def cumsum(a:IMat, n:Int, out:Mat) = a.iiReduceAll(n, (x:Int) => x, (x:Int, y:Int) => x+y, out)
+ def maxi(a:IMat, n:Int, out:Mat) = a.iiReduceOp(n, (x:Int) => x, (x:Int, y:Int) => math.max(x,y), out)
+ def mini(a:IMat, n:Int, out:Mat):IMat = a.iiReduceOp(n, (x:Int) => x, (x:Int, y:Int) => math.min(x,y), out)
+ def sum(a:IMat, out:Mat) = a.iiReduceOp(0, (x:Int) => x, (x:Int, y:Int) => x+y, out)
+ def cumsum(a:IMat, out:Mat) = a.iiReduceAll(0, (x:Int) => x, (x:Int, y:Int) => x+y, out)
+ def maxi(a:IMat, out:Mat) = a.iiReduceOp(0, (x:Int) => x, (x:Int, y:Int) => math.max(x,y), out)
+ def mini(a:IMat, out:Mat):IMat = a.iiReduceOp(0, (x:Int) => x, (x:Int, y:Int) => math.min(x,y), out)
+
+ def min(a:SDMat, b:SDMat) = a.ssMatOp(b, (x:Double, y:Double) => math.min(x,y), null)
+ def max(a:SDMat, b:SDMat) = a.ssMatOp(b, (x:Double, y:Double) => math.max(x,y), null)
+ def sum(a:SDMat, n:Int) = a.ssReduceOp(n, (x:Double) => x, (x:Double, y:Double) => x+y, null)
+ def maxi(a:SDMat, n:Int) = a.ssReduceOp(n, (x:Double) => x, (x:Double, y:Double) => math.max(x,y), null)
+ def mini(a:SDMat, n:Int) = a.ssReduceOp(n, (x:Double) => x, (x:Double, y:Double) => math.min(x,y), null)
+ def sum(a:SDMat) = a.ssReduceOp(0, (x:Double) => x, (x:Double, y:Double) => x+y, null)
+ def maxi(a:SDMat) = a.ssReduceOp(0, (x:Double) => x, (x:Double, y:Double) => math.max(x,y), null)
+ def mini(a:SDMat) = a.ssReduceOp(0, (x:Double) => x, (x:Double, y:Double) => math.min(x,y), null)
+
+ def sum(a:SDMat, n:Int, omat:Mat) = a.ssReduceOp(n, (x:Double) => x, (x:Double, y:Double) => x+y, omat)
+ def maxi(a:SDMat, n:Int, omat:Mat) = a.ssReduceOp(n, (x:Double) => x, (x:Double, y:Double) => math.max(x,y), omat)
+ def mini(a:SDMat, n:Int, omat:Mat) = a.ssReduceOp(n, (x:Double) => x, (x:Double, y:Double) => math.min(x,y), omat)
+ def sum(a:SDMat, omat:Mat) = a.ssReduceOp(0, (x:Double) => x, (x:Double, y:Double) => x+y, omat)
+ def maxi(a:SDMat, omat:Mat) = a.ssReduceOp(0, (x:Double) => x, (x:Double, y:Double) => math.max(x,y), omat)
+ def mini(a:SDMat, omat:Mat) = a.ssReduceOp(0, (x:Double) => x, (x:Double, y:Double) => math.min(x,y), omat)
+
+ def min(a:SMat, b:SMat) = a.ssMatOp(b, (x:Float, y:Float) => math.min(x,y), null)
+ def max(a:SMat, b:SMat) = a.ssMatOp(b, (x:Float, y:Float) => math.max(x,y), null)
+ def sum(a:SMat, n:Int) = a.ssReduceOp(n, (x:Float) => x, (x:Float, y:Float) => x+y, null)
+ def maxi(a:SMat, n:Int) = a.ssReduceOp(n, (x:Float) => x, (x:Float, y:Float) => math.max(x,y), null)
+ def mini(a:SMat, n:Int) = a.ssReduceOp(n, (x:Float) => x, (x:Float, y:Float) => math.min(x,y), null)
+ def sum(a:SMat) = a.ssReduceOp(0, (x:Float) => x, (x:Float, y:Float) => x+y, null)
+ def maxi(a:SMat) = a.ssReduceOp(0, (x:Float) => x, (x:Float, y:Float) => math.max(x,y), null)
+ def mini(a:SMat) = a.ssReduceOp(0, (x:Float) => x, (x:Float, y:Float) => math.min(x,y), null)
+ def min(a:SMat, b:Float) = a.ssMatOpScalar(b, (x:Float, y:Float) => math.min(x,y), null)
+ def max(a:SMat, b:Float) = a.ssMatOpScalar(b, (x:Float, y:Float) => math.max(x,y), null)
+ def min(b:Float, a:SMat) = a.ssMatOpScalar(b, (x:Float, y:Float) => math.min(x,y), null)
+ def max(b:Float, a:SMat) = a.ssMatOpScalar(b, (x:Float, y:Float) => math.max(x,y), null)
+ def min(a:SMat, b:Float, omat:Mat) = a.ssMatOpScalar(b, (x:Float, y:Float) => math.min(x,y), omat)
+ def max(a:SMat, b:Float, omat:Mat) = a.ssMatOpScalar(b, (x:Float, y:Float) => math.max(x,y), omat)
+ def min(b:Float, a:SMat, omat:Mat) = a.ssMatOpScalar(b, (x:Float, y:Float) => math.min(x,y), omat)
+ def max(b:Float, a:SMat, omat:Mat) = a.ssMatOpScalar(b, (x:Float, y:Float) => math.max(x,y), omat)
+
+ def sum(a:SMat, n:Int, omat:Mat) = a.ssReduceOp(n, (x:Float) => x, (x:Float, y:Float) => x+y, omat)
+ def maxi(a:SMat, n:Int, omat:Mat) = a.ssReduceOp(n, (x:Float) => x, (x:Float, y:Float) => math.max(x,y), omat)
+ def mini(a:SMat, n:Int, omat:Mat) = a.ssReduceOp(n, (x:Float) => x, (x:Float, y:Float) => math.min(x,y), omat)
+ def sum(a:SMat, omat:Mat) = a.ssReduceOp(0, (x:Float) => x, (x:Float, y:Float) => x+y, omat)
+ def maxi(a:SMat, omat:Mat) = a.ssReduceOp(0, (x:Float) => x, (x:Float, y:Float) => math.max(x,y), omat)
+ def mini(a:SMat, omat:Mat) = a.ssReduceOp(0, (x:Float) => x, (x:Float, y:Float) => math.min(x,y), omat)
+ def min(a:SDMat, b:Double) = a.ssMatOpScalar(b, (x:Double, y:Double) => math.min(x,y), null)
+ def max(a:SDMat, b:Double) = a.ssMatOpScalar(b, (x:Double, y:Double) => math.max(x,y), null)
+ def min(b:Double, a:SDMat) = a.ssMatOpScalar(b, (x:Double, y:Double) => math.min(x,y), null)
+ def max(b:Double, a:SDMat) = a.ssMatOpScalar(b, (x:Double, y:Double) => math.max(x,y), null)
+
+ def sum(a:CMat, n:Int) = a.ccReduceOpv(n, CMat.vecAdd _, null)
+ def sum(a:CMat, n:Int, c:Mat) = a.ccReduceOpv(n, CMat.vecAdd _, c)
+
+ def max(a:Mat, b:Mat):Mat = {
+ (a, b) match {
+ case (aa:FMat, bb:FMat) => max(aa, bb):FMat
+ case (aa:IMat, bb:IMat) => max(aa, bb):IMat
+ case (aa:DMat, bb:DMat) => max(aa, bb):DMat
+ case (aa:GMat, bb:GMat) => max(aa, bb):GMat
+ }
+ }
+
+ def min(a:Mat, b:Mat):Mat = {
+ (a, b) match {
+ case (aa:FMat, bb:FMat) => min(aa, bb):FMat
+ case (aa:IMat, bb:IMat) => min(aa, bb):IMat
+ case (aa:DMat, bb:DMat) => min(aa, bb):DMat
+ case (aa:GMat, bb:GMat) => min(aa, bb):GMat
+ }
+ }
+
+ def max(a:Mat, b:Mat, c:Mat):Mat = {
+ (a, b) match {
+ case (aa:FMat, bb:FMat) => max(aa, bb, c):FMat
+ case (aa:IMat, bb:IMat) => max(aa, bb, c):IMat
+ case (aa:DMat, bb:DMat) => max(aa, bb, c):DMat
+ case (aa:GMat, bb:GMat) => max(aa, bb, c):GMat
+ }
+ }
+
+ def min(a:Mat, b:Mat, c:Mat):Mat = {
+ (a, b) match {
+ case (aa:FMat, bb:FMat) => min(aa, bb, c):FMat
+ case (aa:IMat, bb:IMat) => min(aa, bb, c):IMat
+ case (aa:DMat, bb:DMat) => min(aa, bb, c):DMat
+ case (aa:GMat, bb:GMat) => min(aa, bb, c):GMat
+ }
+ }
+
+ def max(a:Float, b:Mat, c:Mat):Mat = {
+ b match {
+ case bb:FMat => max(a, bb, c):FMat
+ case bb:IMat => max(a.asInstanceOf[Int], bb, c):IMat
+ case bb:DMat => max(DMat(a), bb, c):DMat
+ case bb:GMat => max(GMat(a), bb, c):GMat
+ case bb:SMat => max(a, bb, c):SMat
+ }
+ }
+
+ def min(a:Float, b:Mat, c:Mat):Mat = {
+ b match {
+ case bb:FMat=> min(a, bb, c):FMat
+ case bb:IMat=> min(a.asInstanceOf[Int], bb, c):IMat
+ case bb:DMat => min(DMat(a), bb, c):DMat
+ case bb:GMat => min(GMat(a), bb, c):GMat
+ case bb:SMat => min(a, bb, c):SMat
+ }
+ }
+
+ def max(b:Mat, a:Float, c:Mat):Mat = {
+ b match {
+ case bb:FMat => max(a, bb, c):FMat
+ case bb:IMat => max(a.asInstanceOf[Int], bb, c):IMat
+ case bb:DMat => max(DMat(a), bb, c):DMat
+ case bb:GMat => max(GMat(a), bb, c):GMat
+ case bb:SMat => max(a, bb, c):SMat
+ }
+ }
+
+ def min(b:Mat, a:Float, c:Mat):Mat = {
+ b match {
+ case bb:FMat=> min(a, bb, c):FMat
+ case bb:IMat=> min(a.asInstanceOf[Int], bb, c):IMat
+ case bb:DMat => min(DMat(a), bb, c):DMat
+ case bb:GMat => min(GMat(a), bb, c):GMat
+ case bb:SMat => min(a, bb, c):SMat
+ }
+ }
+
+ def max(a:Double, b:Mat, c:Mat):Mat = {
+ b match {
+ case bb:FMat => max(a.asInstanceOf[Float], bb, c):FMat
+ case bb:IMat => max(a.asInstanceOf[Int], bb, c):IMat
+ case bb:DMat => max(DMat(a), bb, c):DMat
+ case bb:GMat => max(GMat(a), bb, c):GMat
+ case bb:SMat => max(a.asInstanceOf[Float], bb, c):SMat
+ }
+ }
+
+ def min(a:Double, b:Mat, c:Mat):Mat = {
+ b match {
+ case bb:FMat => min(a.asInstanceOf[Float], bb, c):FMat
+ case bb:IMat => min(a.asInstanceOf[Int], bb, c):IMat
+ case bb:DMat=> min(DMat(a), bb, c):DMat
+ case bb:GMat => min(GMat(a), bb, c):GMat
+ case bb:SMat => min(a.asInstanceOf[Float], bb, c):SMat
+ }
+ }
+
+ def max(a:Mat, b:Double, c:Mat):Mat = {
+ a match {
+ case aa:FMat => max(aa, b.asInstanceOf[Float], c):FMat
+ case aa:IMat => max(aa, b.asInstanceOf[Int], c):IMat
+ case aa:DMat => max(aa, DMat(b), c):DMat
+ case aa:GMat => max(aa, GMat(b), c):GMat
+ case aa:SMat => max(b.asInstanceOf[Float], aa, c):SMat
+ }
+ }
+
+ def min(a:Mat, b:Double, c:Mat):Mat = {
+ a match {
+ case aa:FMat => min(aa, b.asInstanceOf[Float], c):FMat
+ case aa:IMat => min(aa, b.asInstanceOf[Int], c):IMat
+ case aa:DMat => min(aa, DMat(b), c):DMat
+ case aa:GMat => min(aa, GMat(b), c):GMat
+ case aa:SMat => min(b.asInstanceOf[Float], aa, c):SMat
+ }
+ }
+
+ def mini(a:Mat, b:Int):Mat = {
+ a match {
+ case aa:FMat => mini(aa, b):FMat
+ case aa:IMat => mini(aa, b):IMat
+ case aa:DMat => mini(aa, b):DMat
+ case aa:GMat => mini(aa, b):GMat
+ }
+ }
+
+ def maxi(a:Mat, b:Int):Mat = {
+ a match {
+ case aa:FMat => maxi(aa, b):FMat
+ case aa:IMat => maxi(aa, b):IMat
+ case aa:DMat => maxi(aa, b):DMat
+ case aa:GMat => maxi(aa, b):GMat
+ }
+ }
+
+ def sum(a:Mat, b:Int):Mat = {
+ a match {
+ case aa:FMat => sum(aa, b):FMat
+ case aa:IMat => sum(aa, b):IMat
+ case aa:DMat => sum(aa, b):DMat
+ case aa:CMat => sum(aa, b):CMat
+ case aa:SMat => sum(aa, b):FMat
+ case aa:GMat => sum(aa, b):GMat
+ }
+ }
+
+ def sum(a:Mat, b:Int, c:Mat):Mat = {
+ a match {
+ case aa:FMat => sum(aa, b, c):FMat
+ case aa:IMat => sum(aa, b, c):IMat
+ case aa:DMat=> sum(aa, b, c):DMat
+ case aa:SMat=> sum(aa, b, c):FMat
+ case aa:CMat => sum(aa, b, c):CMat
+ case aa:GMat => sum(aa, b, c):GMat
+ }
+ }
+
+ def mean(a:FMat, dim0:Int):FMat = {
+ _mean(a, dim0).asInstanceOf[FMat]
+ }
+
+ def mean(a:FMat):FMat = {
+ _mean(a, 0).asInstanceOf[FMat]
+ }
+
+ def mean(a:DMat, dim0:Int):DMat = {
+ _mean(a, dim0).asInstanceOf[DMat]
+ }
+
+ def mean(a:DMat):DMat = {
+ _mean(a, 0).asInstanceOf[DMat]
+ }
+
+ def mean(a:IMat, dim0:Int):FMat = {
+ _mean(a, dim0).asInstanceOf[FMat]
+ }
+
+ def mean(a:IMat):FMat = {
+ _mean(a, 0).asInstanceOf[FMat]
+ }
+
+ def mean(a:CMat, dim0:Int):CMat = {
+ _mean(a, dim0).asInstanceOf[CMat]
+ }
+
+ def mean(a:CMat):CMat = {
+ _mean(a, 0).asInstanceOf[CMat]
+ }
+
+ def mean(a:GMat, dim0:Int):GMat = {
+ _mean(a, dim0).asInstanceOf[GMat]
+ }
+
+ def mean(a:GMat):GMat = {
+ _mean(a, 0).asInstanceOf[GMat]
+ }
+
+ def mean(a:Mat, b:Int):Mat = _mean(a,b)
+
+ def mean(a:Mat):Mat = _mean(a, 0):Mat
+
+ def _mean(a:Mat, dim0:Int):Mat = {
+ val dim = if (a.nrows == 1 && dim0 == 0) 2 else math.max(1, dim0)
+ if (dim == 1) {
+ sum(a, 1)*(1.0f/a.nrows)
+ } else {
+ sum(a, 2)*(1.0f/a.ncols)
+ }
+ }
+
+ def variance(a:FMat, dim0:Int):FMat = {
+ _variance(a, dim0).asInstanceOf[FMat]
+ }
+
+ def variance(a:FMat):FMat = {
+ _variance(a, 0).asInstanceOf[FMat]
+ }
+
+ def variance(a:DMat, dim0:Int):DMat = {
+ _variance(a, dim0).asInstanceOf[DMat]
+ }
+
+ def variance(a:DMat):DMat = {
+ _variance(a, 0).asInstanceOf[DMat]
+ }
+
+ def variance(a:IMat, dim0:Int):FMat = {
+ _variance(a, dim0).asInstanceOf[FMat]
+ }
+
+ def variance(a:IMat):FMat = {
+ _variance(a, 0).asInstanceOf[FMat]
+ }
+
+ def variance(a:CMat, dim0:Int):CMat = {
+ _variance(a, dim0).asInstanceOf[CMat]
+ }
+
+ def variance(a:CMat):CMat = {
+ _variance(a, 0).asInstanceOf[CMat]
+ }
+
+ def variance(a:GMat, dim0:Int):GMat = {
+ _variance(a, dim0).asInstanceOf[GMat]
+ }
+
+ def variance(a:GMat):GMat = {
+ _variance(a, 0).asInstanceOf[GMat]
+ }
+
+ def variance(a:Mat, dim:Int) = _variance(a, dim)
+
+ def variance(a:Mat):Mat = _variance(a, 0)
+
+ def _variance(a:Mat, dim0:Int):Mat = {
+ val dim = if (a.nrows == 1 && dim0 == 0) 2 else math.max(1, dim0)
+ if (dim == 1) {
+ val m = mean(a, 1)
+ sum(a *@ a, 1)*(1.0f/a.nrows) - m *@ m
+ } else {
+ val m = mean(a, 2)
+ sum(a *@ a, 2)*(1.0f/a.ncols) - m *@ m
+ }
+ }
+
+
+ def applyDFun(a:DMat, omat:Mat, vfn:(Int, Array[Double], Array[Double])=>Unit, efn:(Double)=>Double, nflops:Long) ={
+ val out = recycleTry(omat, a)
+ if (Mat.noMKL || vfn == null) {
+ if (efn == null) {
+ throw new RuntimeException("no Scala builtin version of this math function, sorry")
+ }
+ var i = 0; val len = a.length; val odata = out.data; val adata = a.data
+ while (i < len) {odata(i) = efn(adata(i)); i += 1}
+ } else {
+ vfn(a.length, a.data, out.data)
+ }
+ Mat.nflops += nflops*a.length
+ out
+ }
+
+ def applyDFunV(a:DMat, omat:Mat, vfn:(Int, Array[Double], Array[Double])=>Unit,
+ efn:(Int, Array[Double], Array[Double])=>Unit, nflops:Long) = {
+ val out = recycleTry(omat, a)
+ if (Mat.noMKL) {
+ if (efn == null) {
+ throw new RuntimeException("no Scala builtin version of this math function, sorry")
+ }
+ efn(a.length, a.data, out.data)
+ } else {
+ vfn(a.length, a.data, out.data)
+ }
+ Mat.nflops += nflops*a.length
+ out
+ }
+
+ def applySFun(a:FMat, omat:Mat, vfn:(Int, Array[Float], Array[Float])=>Unit, efn:(Float)=>Float, nflops:Long) ={
+ val out = recycleTry(omat, a)
+ if (Mat.noMKL || vfn == null) {
+ if (efn == null) {
+ throw new RuntimeException("no Scala builtin version of this math function, sorry")
+ }
+ var i = 0; val len = a.length; val odata = out.data; val adata = a.data
+ while (i < len) {odata(i) = efn(adata(i)); i += 1}
+ } else {
+ vfn(a.length, a.data, out.data)
+ }
+ Mat.nflops += nflops*a.length
+ out
+ }
+
+ def applySFunV(a:FMat, omat:Mat, vfn:(Int, Array[Float], Array[Float])=>Unit,
+ efn:(Int, Array[Float], Array[Float])=>Unit, nflops:Long) ={
+ val out = recycleTry(omat, a)
+ if (Mat.noMKL) {
+ if (efn == null) {
+ throw new RuntimeException("no Scala builtin version of this math function, sorry")
+ }
+ efn(a.length, a.data, out.data)
+ } else {
+ vfn(a.length, a.data, out.data)
+ }
+ Mat.nflops += nflops*a.length
+ out
+ }
+
+ def applyD2Fun(a:DMat, b:DMat, omat:Mat,
+ vfn:(Int, Array[Double], Array[Double], Array[Double]) => Unit,
+ efn:(Double, Double)=>Double, nflops:Long):DMat = {
+ val out = recycleTry(omat, a, b)
+ if (Mat.noMKL) {
+ if (efn == null) {
+ throw new RuntimeException("no Scala builtin version of this math function, sorry")
+ }
+ var i = 0; val len = a.length; val odata = out.data; val adata = a.data; val bdata = b.data
+ while (i < len) {odata(i) = efn(adata(i), bdata(i)); i += 1}
+ } else {
+ vfn(a.length, a.data, b.data, out.data)
+ }
+ Mat.nflops += nflops*a.length
+ out
+ }
+
+ def sign(a:DMat, out:Mat) = applyDFun(a, out, null, math.signum _, 1L)
+ def sign(a:DMat):DMat = sign(a, DMat(a.nrows, a.ncols))
+
+ def abs(a:DMat, out:Mat) = applyDFun(a, out, vdAbs _, math.abs _, 1L)
+ def abs(a:DMat):DMat = abs(a, DMat(a.nrows, a.ncols))
+
+ def _vdexp(n:Int, a:Array[Double], b:Array[Double]) = {var i=0 ; while (i(math.floor(x+0.5)), 1L)
+ def round(a:DMat):DMat = round(a, DMat(a.nrows, a.ncols))
+
+ def trunc(a:DMat, out:Mat) = applyDFun(a, out, vdTrunc _, null, 1L)
+ def trunc(a:DMat):DMat = trunc(a, DMat(a.nrows, a.ncols))
+
+ def atan2(a:DMat, b:DMat, out:Mat) = applyD2Fun(a, b, out, vdAtan2 _, math.atan2, 10L)
+ def atan2(a:DMat, b:DMat):DMat = atan2(a, b, DMat(a.nrows, a.ncols))
+
+ def pow(a:DMat, b:DMat, out:Mat) = applyD2Fun(a, b, out, vdPow _, math.pow, 10L)
+ def pow(a:DMat, b:DMat):DMat = pow(a, b, DMat(a.nrows, a.ncols))
+
+ def exppsi(a:DMat, out:Mat) = applyDFun(a, out, null, (x:Double)=>if (x<1.0) 0.5*x*x else x-0.5, 1L)
+ def exppsi(a:DMat):DMat = exppsi(a, DMat(a.nrows, a.ncols))
+
+
+ def sdev(a:DMat, dim0:Int):DMat = sqrt(variance(a, dim0))
+ def sdev(a:DMat):DMat = sdev(a, 0)
+
+ def sdev(a:FMat, dim0:Int):FMat = sqrt(variance(a, dim0))
+ def sdev(a:FMat):FMat = sdev(a, 0)
+
+ def sign(a:FMat, out:Mat) = applySFun(a, out, null, math.signum _, 1L)
+ def sign(a:FMat):FMat = sign(a, FMat(a.nrows, a.ncols))
+
+ def abs(a:FMat, out:Mat) = applySFun(a, out, vsAbs _, math.abs _, 1L)
+ def abs(a:FMat):FMat = abs(a, FMat(a.nrows, a.ncols))
+
+ def _vsexp(n:Int, a:Array[Float], b:Array[Float]) = {var i=0 ; while (i math.expm1(x).asInstanceOf[Float], 10L)
+ def exp(a:FMat):FMat = exp(a, FMat(a.nrows, a.ncols))
+
+ def expm1(a:FMat, out:Mat) = applySFun(a, out, vsExpm1 _, (x:Float) => math.expm1(x).asInstanceOf[Float], 10L)
+ def expm1(a:FMat):FMat = expm1(a, FMat(a.nrows, a.ncols))
+
+ def sqrt(a:FMat, out:Mat) = applySFun(a, out, vsSqrt _, (x:Float) => math.sqrt(x).asInstanceOf[Float], 10L)
+ def sqrt(a:FMat):FMat = sqrt(a, FMat(a.nrows, a.ncols))
+
+ def ln(a:FMat, out:Mat) = applySFun(a, out, vsLn _, (x:Float) => math.log(x).asInstanceOf[Float], 10L)
+ def ln(a:FMat):FMat = ln(a, FMat(a.nrows, a.ncols))
+
+ def log10(a:FMat, out:Mat) = applySFun(a, out, vsLog10 _, (x:Float) => math.log10(x).asInstanceOf[Float], 10L)
+ def log10(a:FMat):FMat = log10(a, FMat(a.nrows, a.ncols))
+
+ def log1p(a:FMat, out:Mat) = applySFun(a, out, vsLog1p _, (x:Float) => math.log1p(x).asInstanceOf[Float], 10L)
+ def log1p(a:FMat):FMat = log1p(a, FMat(a.nrows, a.ncols))
+
+ def cos(a:FMat, out:Mat) = applySFun(a, out, vsCos _, (x:Float) => math.cos(x).asInstanceOf[Float], 10L)
+ def cos(a:FMat):FMat = cos(a, FMat(a.nrows, a.ncols))
+
+ def sin(a:FMat, out:Mat) = applySFun(a, out, vsSin _, (x:Float) => math.sin(x).asInstanceOf[Float], 10L)
+ def sin(a:FMat):FMat = sin(a, FMat(a.nrows, a.ncols))
+
+ def tan(a:FMat, out:Mat) = applySFun(a, out, vsTan _, (x:Float) => math.tan(x).asInstanceOf[Float], 10L)
+ def tan(a:FMat):FMat = tan(a, FMat(a.nrows, a.ncols))
+
+ def cosh(a:FMat, out:Mat) = applySFun(a, out, vsCosh _, (x:Float) => math.cosh(x).asInstanceOf[Float], 10L)
+ def cosh(a:FMat):FMat = cosh(a, FMat(a.nrows, a.ncols))
+
+ def sinh(a:FMat, out:Mat) = applySFun(a, out, vsSinh _, (x:Float) => math.sinh(x).asInstanceOf[Float], 10L)
+ def sinh(a:FMat):FMat = sinh(a, FMat(a.nrows, a.ncols))
+
+ def tanh(a:FMat, out:Mat) = applySFun(a, out, vsTanh _, (x:Float) => math.tanh(x).asInstanceOf[Float], 10L)
+ def tanh(a:FMat):FMat = tanh(a, FMat(a.nrows, a.ncols))
+
+ def acos(a:FMat, out:Mat) = applySFun(a, out, vsAcos _, (x:Float) => math.acos(x).asInstanceOf[Float], 10L)
+ def acos(a:FMat):FMat = acos(a, FMat(a.nrows, a.ncols))
+
+ def asin(a:FMat, out:Mat) = applySFun(a, out, vsAsin _, (x:Float) => math.asin(x).asInstanceOf[Float], 10L)
+ def asin(a:FMat):FMat = asin(a, FMat(a.nrows, a.ncols))
+
+ def atan(a:FMat, out:Mat) = applySFun(a, out, vsAtan _, (x:Float) => math.atan(x).asInstanceOf[Float], 10L)
+ def atan(a:FMat):FMat = atan(a, FMat(a.nrows, a.ncols))
+
+ def acosh(a:FMat, out:Mat) = applySFun(a, out, vsCosh _, null, 10L)
+ def acosh(a:FMat):FMat = acosh(a, FMat(a.nrows, a.ncols))
+
+ def asinh(a:FMat, out:Mat) = applySFun(a, out, vsSinh _, null, 10L)
+ def asinh(a:FMat):FMat = asinh(a, FMat(a.nrows, a.ncols))
+
+ def atanh(a:FMat, out:Mat) = applySFun(a, out, vsAtanh _, null, 10L)
+ def atanh(a:FMat):FMat = atanh(a, FMat(a.nrows, a.ncols))
+
+ def erf(a:FMat, out:Mat) = applySFun(a, out, vsErf _, null, 10L)
+ def erf(a:FMat):FMat = erf(a, FMat(a.nrows, a.ncols))
+
+ def erfinv(a:FMat, out:Mat) = applySFun(a, out, vsErfInv _, null, 10L)
+ def erfinv(a:FMat):FMat = erfinv(a, FMat(a.nrows, a.ncols))
+
+ def erfc(a:FMat, out:Mat) = applySFun(a, out, vsErfc _, null, 10L)
+ def erfc(a:FMat):FMat = erfc(a, FMat(a.nrows, a.ncols))
+
+ def erfcinv(a:FMat, out:Mat) = applySFun(a, out, vsErfcInv _, null, 10L)
+ def erfcinv(a:FMat):FMat = erfcinv(a, FMat(a.nrows, a.ncols))
+
+ def normcdf(a:FMat, out:Mat) = applySFun(a, out, vsCdfNorm _, null, 10L)
+ def normcdf(a:FMat):FMat = normcdf(a, FMat(a.nrows, a.ncols))
+
+ def norminv(a:FMat, out:Mat) = applySFun(a, out, vsCdfNormInv _, null, 10L)
+ def norminv(a:FMat):FMat = norminv(a, FMat(a.nrows, a.ncols))
+
+ def gammaln(a:FMat, out:Mat) = applySFun(a, out, vsLGamma _, null, 10L)
+ def gammaln(a:FMat):FMat = gammaln(a, FMat(a.nrows, a.ncols))
+
+ def gamma(a:FMat, out:Mat) = applySFun(a, out, vsTGamma _, null, 10L)
+ def gamma(a:FMat):FMat = gamma(a, FMat(a.nrows, a.ncols))
+
+ def ceil(a:FMat, out:Mat) = applySFun(a, out, vsCeil _, (x:Float) => math.ceil(x).asInstanceOf[Float], 1L)
+ def ceil(a:FMat):FMat = ceil(a, FMat(a.nrows, a.ncols))
+
+ def floor(a:FMat, out:Mat) = applySFun(a, out, vsFloor _, (x:Float) => math.floor(x).asInstanceOf[Float], 1L)
+ def floor(a:FMat):FMat = floor(a, FMat(a.nrows, a.ncols))
+
+ def round(a:FMat, out:Mat) = applySFun(a, out, vsRound _, (x:Float)=>math.floor(x+0.5).asInstanceOf[Float], 1L)
+ def round(a:FMat):FMat = round(a, FMat(a.nrows, a.ncols))
+
+ def trunc(a:FMat, out:Mat) = applySFun(a, out, vsTrunc _, null, 1L)
+ def trunc(a:FMat):FMat = trunc(a, FMat(a.nrows, a.ncols))
+
+ def exppsi(a:FMat, out:Mat) = applySFun(a, out, null, (x:Float)=>if (x<1.0f) 0.5f*x*x else x-0.5f, 1L)
+ def exppsi(a:FMat):FMat = exppsi(a, FMat(a.nrows, a.ncols))
+
+ def setVMLmode(n:Int) = {
+ vmlSetMode(n)
+ }
+
+ def getVMLmode():Int = {
+ vmlGetMode()
+ }
+
+ private def checkSizes(a:Mat, b:Mat) = {
+ if (a.nrows != b.nrows || a.ncols != b.ncols) {
+ throw new RuntimeException("argument dims mismatch")
+ }
+ }
+
+ private def checkSizes(a:Mat, b:Mat, c:DMat) = {
+ if (a.nrows != b.nrows || a.ncols != b.ncols || a.nrows != c.nrows || a.ncols != c.ncols) {
+ throw new RuntimeException("argument dims mismatch")
+ }
+ }
+
+ def sprand(nrows:Int, ncols:Int, v:Double):SMat = {
+ val ioff = Mat.ioneBased
+ val out = SMat(nrows, ncols, math.max(math.min(nrows*ncols, 200),(1.5*v*nrows*ncols).intValue))
+ Mat.nflops += (5L*nrows*ncols*v).toLong
+ val vec = geornd(v, 1, out.nnz)
+ val vals = rand(1, out.nnz)
+ var irow = vec.data(0).intValue
+ var ipos = 0
+ var i = 0
+ out.jc(0) = ioff
+ while (i < ncols) {
+ while (irow < nrows && ipos < out.nnz-1) {
+ out.data(ipos) = vals.data(ipos)
+ out.ir(ipos) = irow+ioff
+ ipos += 1
+ irow += 1 + vec.data(ipos).intValue
+ }
+ irow = irow - nrows
+ out.jc(i+1) = ipos+ioff
+ i += 1
+ }
+ SMat(out.sparseTrim)
+ }
+
+ def histc(a:DMat, b:DMat):IMat = {
+ val out = IMat(b.length, 1)
+ var i = 0
+ var hc = 0
+ var j = 0
+ while (j < a.length) {
+ if (i >= b.length-1 || a.data(j) < b.data(i+1)) {
+ hc += 1
+ } else {
+ out.data(i) = hc
+ hc = 0
+ i += 1
+ };
+ j += 1
+ }
+ out.data(b.length-1) = hc
+ out
+ }
+
+ def roc(score0:DMat, vpos0:DMat, vneg0:DMat, nxvals:Int):DMat = {
+ import BIDMat.MatFunctions._
+ var score:DMat = null
+ if (size(score0,2) > size(score0,1)) {
+ score = score0.t
+ } else {
+ score = score0
+ };
+ var (vv, ii) = sortdown2(score);
+ var vpos = vpos0(ii);
+ var vneg = vneg0(ii);
+ var n = length(vpos);
+ if (size(vpos,2) > 1) {
+ vpos = vpos.t
+ };
+ if (size(vneg,2) > 1) {
+ vneg = vneg.t;
+ };
+ if (nnz(vneg < 0.0) + nnz(vpos < 0.0) > 0) {
+ sys.error("ROCcurve assumes vneg & vpos >= 0");
+ };
+
+ var tp = cumsum(vpos);
+ var fp = cumsum(vneg);
+ var npos = tp(n-1);
+ var nneg = fp(n-1);
+ var xvals:FMat = row(1 to nxvals)*(1.0*nneg/nxvals)
+ var nc:IMat = histc(fp, 0.0f \ xvals);
+ var loci = max(cumsum(nc(0 until nxvals)), 1);
+ val curve = (0.0 on tp(loci-1, 0))*(1.0/npos)
+ curve
+ }
+
+ def applyGfun(in:GMat, omat:Mat, opn:Int, kflops:Long):GMat = {
+ val out = recycleTry(omat, in)
+ CUMAT.applygfun(in.data, out.data, in.nrows*in.ncols, opn)
+ JCuda.cudaDeviceSynchronize()
+ Mat.nflops += kflops*in.length
+ out
+ }
+
+ def applyGfun(in:GMat, opn:Int, kflops:Long):GMat = {
+ val out = GMat(in.nrows, in.ncols)
+ CUMAT.applygfun(in.data, out.data, in.nrows*in.ncols, opn)
+ JCuda.cudaDeviceSynchronize()
+ Mat.nflops += kflops*in.length
+ out
+ }
+
+ def applyGfun2(a:GMat, b:GMat, omat:Mat, opn:Int, kflops:Long):GMat = {
+ if (a.nrows == b.nrows && a.ncols == b.ncols) {
+ val out = GMat(a.nrows, a.ncols)
+ CUMAT.applygfun2(a.data, b.data, out.data, a.nrows*a.ncols, opn)
+ JCuda.cudaDeviceSynchronize()
+ Mat.nflops += kflops*a.length
+ out
+ } else {
+ throw new RuntimeException("Dimensions mismatch")
+ }
+ }
+
+ def applyGfun2(a:GMat, b:GMat, opn:Int, kflops:Long):GMat = {
+ if (a.nrows == b.nrows && a.ncols == b.ncols) {
+ val out = GMat(a.nrows, a.ncols)
+ CUMAT.applygfun2(a.data, b.data, out.data, a.nrows*a.ncols, opn)
+ JCuda.cudaDeviceSynchronize()
+ Mat.nflops += kflops*a.length
+ out
+ } else {
+ throw new RuntimeException("Dimensions mismatch")
+ }
+ }
+ import GMat.TransF
+
+ def abs(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.abs, 1L)
+ def exp(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.exp, 10L)
+ def expm1(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.expm1, 10L)
+ def sqrt(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.sqrt, 10L)
+ def ln(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.ln, 10L)
+ def log10(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.log10, 10L)
+ def log1p(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.log1p, 10L)
+ def cos(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.cos, 10L)
+ def sin(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.sin, 10L)
+ def tan(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.tan, 10L)
+ def cosh(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.cosh, 10L)
+ def sinh(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.sinh, 10L)
+ def tanh(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.tanh, 10L)
+ def acos(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.acos, 10L)
+ def asin(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.asin, 10L)
+ def atan(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.atan, 10L)
+ def acosh(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.acosh, 10L)
+ def asinh(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.asinh, 10L)
+ def atanh(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.atanh, 10L)
+ def erf(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.erf, 10L)
+ def erfinv(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.erfinv, 10L)
+ def erfc(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.erfc, 10L)
+ def ercinv(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.erfcinv, 10L)
+ def gammaln(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.gammaln, 10L)
+ def gamma(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.gamma, 10L)
+ def ceil(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.ceil, 10L)
+ def floor(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.floor, 10L)
+ def round(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.round, 10L)
+ def trunc(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.trunc, 10L)
+ def sign(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.sign, 1L)
+ def exppsi(in:GMat, out:Mat):GMat = applyGfun(in, out, TransF.exppsi, 1L)
+
+ import GMat.TransF2
+
+ def atan2(a:GMat, b:GMat, out:Mat):GMat = applyGfun2(a, b, out, TransF2.atan2, 10L)
+ def pow(a:GMat, b:GMat, out:Mat):GMat = applyGfun2(a, b, out, TransF2.pow, 10L)
+
+ def abs(in:GMat):GMat = applyGfun(in, TransF.abs, 10L)
+ def exp(in:GMat):GMat = applyGfun(in, TransF.exp, 10L)
+ def expm1(in:GMat):GMat = applyGfun(in, TransF.expm1, 10L)
+ def sqrt(in:GMat):GMat = applyGfun(in, TransF.sqrt, 10L)
+ def ln(in:GMat):GMat = applyGfun(in, TransF.ln, 10L)
+ def log10(in:GMat):GMat = applyGfun(in, TransF.log10, 10L)
+ def log1p(in:GMat):GMat = applyGfun(in, TransF.log1p, 10L)
+ def cos(in:GMat):GMat = applyGfun(in, TransF.cos, 10L)
+ def sin(in:GMat):GMat = applyGfun(in, TransF.sin, 10L)
+ def tan(in:GMat):GMat = applyGfun(in, TransF.tan, 10L)
+ def cosh(in:GMat):GMat = applyGfun(in, TransF.cosh, 10L)
+ def sinh(in:GMat):GMat = applyGfun(in, TransF.sinh, 10L)
+ def tanh(in:GMat):GMat = applyGfun(in, TransF.tanh, 10L)
+ def acos(in:GMat):GMat = applyGfun(in, TransF.acos, 10L)
+ def asin(in:GMat):GMat = applyGfun(in, TransF.asin, 10L)
+ def atan(in:GMat):GMat = applyGfun(in, TransF.atan, 10L)
+ def acosh(in:GMat):GMat = applyGfun(in, TransF.acosh, 10L)
+ def asinh(in:GMat):GMat = applyGfun(in, TransF.asinh, 10L)
+ def atanh(in:GMat):GMat = applyGfun(in, TransF.atanh, 10L)
+ def erf(in:GMat):GMat = applyGfun(in, TransF.erf, 10L)
+ def erfinv(in:GMat):GMat = applyGfun(in, TransF.erfinv, 10L)
+ def erfc(in:GMat):GMat = applyGfun(in, TransF.erfc, 10L)
+ def ercinv(in:GMat):GMat = applyGfun(in, TransF.erfcinv, 10L)
+ def gammaln(in:GMat):GMat = applyGfun(in, TransF.gammaln, 10L)
+ def gamma(in:GMat):GMat = applyGfun(in, TransF.gamma, 10L)
+ def ceil(in:GMat):GMat = applyGfun(in, TransF.ceil, 10L)
+ def floor(in:GMat):GMat = applyGfun(in, TransF.floor, 10L)
+ def round(in:GMat):GMat = applyGfun(in, TransF.round, 10L)
+ def trunc(in:GMat):GMat = applyGfun(in, TransF.trunc, 10L)
+ def sign(in:GMat):GMat = applyGfun(in, TransF.sign, 1L)
+ def exppsi(in:GMat):GMat = applyGfun(in, TransF.exppsi, 1L)
+
+ def atan2(a:GMat, b:GMat):GMat = applyGfun2(a, b, TransF2.atan2, 10L)
+ def pow(a:GMat, b:GMat):GMat = applyGfun2(a, b, TransF2.pow, 10L)
+
+ import GMat.BinOp
+ def max(a:GMat, b:GMat):GMat = a.gOp(b, null, BinOp.op_max)
+ def min(a:GMat, b:GMat):GMat = a.gOp(b, null, BinOp.op_min)
+ def maxi(a:GMat, dir:Int):GMat = a.reduceOp(null, dir, BinOp.op_max)
+ def mini(a:GMat, dir:Int):GMat = a.reduceOp(null, dir, BinOp.op_min)
+ def sum(a:GMat, dir:Int):GMat = a.reduceOp(null, dir, BinOp.op_add)
+ def maxi(a:GMat):GMat = a.reduceOp(null, 0, BinOp.op_max)
+ def mini(a:GMat):GMat = a.reduceOp(null, 0, BinOp.op_min)
+ def sum(a:GMat):GMat = a.reduceOp(null, 0, BinOp.op_add)
+
+ def max(a:GMat, b:GMat, out:Mat):GMat = a.gOp(b, out, BinOp.op_max)
+ def min(a:GMat, b:GMat, out:Mat):GMat = a.gOp(b, out, BinOp.op_min)
+ def maxi(a:GMat, dir:Int, out:Mat):GMat = a.reduceOp(out, dir, BinOp.op_max)
+ def mini(a:GMat, dir:Int, out:Mat):GMat = a.reduceOp(out, dir, BinOp.op_min)
+ def sum(a:GMat, dir:Int, out:Mat):GMat = a.reduceOp(out, dir, BinOp.op_add)
+ def maxi(a:GMat, out:Mat):GMat = a.reduceOp(out, 0, BinOp.op_max)
+ def mini(a:GMat, out:Mat):GMat = a.reduceOp(out, 0, BinOp.op_min)
+ def sum(a:GMat, out:Mat):GMat = a.reduceOp(out, 0, BinOp.op_add)
+
+ def abs(a:Mat, b:Mat):Mat = {
+ a match {
+ case aa:FMat => abs(aa, b):FMat
+ case aa:DMat => abs(aa, b):DMat
+ case aa:GMat => abs(aa, b):GMat
+ }
+ }
+
+ def sign(a:Mat, b:Mat):Mat = {
+ a match {
+ case aa:FMat => sign(aa, b)
+ case aa:DMat => sign(aa, b)
+ case aa:GMat => sign(aa, b)
+ }
+ }
+
+ def sqrt(a:Mat, b:Mat):Mat = {
+ a match {
+ case aa:FMat => sqrt(aa, b)
+ case aa:DMat => sqrt(aa, b)
+ case aa:GMat => sqrt(aa, b)
+ }
+ }
+
+ def exp(a:Mat, b:Mat):Mat = {
+ a match {
+ case aa:FMat => exp(aa, b)
+ case aa:DMat => exp(aa, b)
+ case aa:GMat => exp(aa, b)
+ }
+ }
+
+ def expm1(a:Mat, b:Mat):Mat = {
+ a match {
+ case aa:FMat => expm1(aa, b)
+ case aa:DMat => expm1(aa, b)
+ case aa:GMat => expm1(aa, b)
+ }
+ }
+
+ def ln(a:Mat, b:Mat):Mat = {
+ a match {
+ case aa:FMat => ln(aa, b)
+ case aa:DMat => ln(aa, b)
+ case aa:GMat => ln(aa, b)
+ }
+ }
+
+ def log10(a:Mat, b:Mat):Mat = {
+ a match {
+ case aa:FMat => log10(aa, b)
+ case aa:DMat => log10(aa, b)
+ case aa:GMat => log10(aa, b)
+ }
+ }
+
+ def log1p(a:Mat, b:Mat):Mat = {
+ a match {
+ case aa:FMat => log1p(aa, b)
+ case aa:DMat => log1p(aa, b)
+ case aa:GMat => log1p(aa, b)
+ }
+ }
+
+ def cos(a:Mat, b:Mat):Mat = {
+ a match {
+ case aa:FMat => cos(aa, b)
+ case aa:DMat => cos(aa, b)
+ case aa:GMat => cos(aa, b)
+ }
+ }
+
+ def sin(a:Mat, b:Mat):Mat = {
+ a match {
+ case aa:FMat => sin(aa, b)
+ case aa:DMat => sin(aa, b)
+ case aa:GMat => sin(aa, b)
+ }
+ }
+
+ def tan(a:Mat, b:Mat):Mat = {
+ a match {
+ case aa:FMat => tan(aa, b)
+ case aa:DMat => tan(aa, b)
+ case aa:GMat => tan(aa, b)
+ }
+ }
+
+ def cosh(a:Mat, b:Mat):Mat = {
+ a match {
+ case aa:FMat => cosh(aa, b)
+ case aa:DMat => cosh(aa, b)
+ case aa:GMat => cosh(aa, b)
+ }
+ }
+
+ def sinh(a:Mat, b:Mat):Mat = {
+ a match {
+ case aa:FMat => sinh(aa, b)
+ case aa:DMat => sinh(aa, b)
+ case aa:GMat => sinh(aa, b)
+ }
+ }
+
+ def tanh(a:Mat, b:Mat):Mat = {
+ a match {
+ case aa:FMat => tanh(aa, b)
+ case aa:DMat => tanh(aa, b)
+ case aa:GMat => tanh(aa, b)
+ }
+ }
+
+ def acos(a:Mat, b:Mat):Mat = {
+ a match {
+ case aa:FMat => acos(aa, b)
+ case aa:DMat => acos(aa, b)
+ case aa:GMat => acos(aa, b)
+ }
+ }
+
+ def asin(a:Mat, b:Mat):Mat = {
+ a match {
+ case aa:FMat => asin(aa, b)
+ case aa:DMat => asin(aa, b)
+ case aa:GMat => asin(aa, b)
+ }
+ }
+
+ def atan(a:Mat, b:Mat):Mat = {
+ a match {
+ case aa:FMat => atan(aa, b)
+ case aa:DMat => atan(aa, b)
+ case aa:GMat => atan(aa, b)
+ }
+ }
+
+ def acosh(a:Mat, b:Mat):Mat = {
+ a match {
+ case aa:FMat => acosh(aa, b)
+ case aa:DMat => acosh(aa, b)
+ case aa:GMat => acosh(aa, b)
+ }
+ }
+
+ def asinh(a:Mat, b:Mat):Mat = {
+ a match {
+ case aa:FMat => asinh(aa, b)
+ case aa:DMat => asinh(aa, b)
+ case aa:GMat => asinh(aa, b)
+ }
+ }
+
+ def erf(a:Mat, b:Mat):Mat = {
+ a match {
+ case aa:FMat => erf(aa, b)
+ case aa:DMat => erf(aa, b)
+ case aa:GMat => erf(aa, b)
+ }
+ }
+
+ def erfinv(a:Mat, b:Mat):Mat = {
+ a match {
+ case aa:FMat => erfinv(aa, b)
+ case aa:DMat => erfinv(aa, b)
+ case aa:GMat => erfinv(aa, b)
+ }
+ }
+
+ def erfc(a:Mat, b:Mat):Mat = {
+ a match {
+ case aa:FMat => erfc(aa, b)
+ case aa:DMat => erfc(aa, b)
+ case aa:GMat => erfc(aa, b)
+ }
+ }
+
+ def erfcinv(a:Mat, b:Mat):Mat = {
+ a match {
+ case aa:FMat => erfcinv(aa, b)
+ case aa:DMat => erfcinv(aa, b)
+ case aa:GMat => erfcinv(aa, b)
+ }
+ }
+
+ def gamma(a:Mat, b:Mat):Mat = {
+ a match {
+ case aa:FMat => gamma(aa, b)
+ case aa:DMat => gamma(aa, b)
+ case aa:GMat => gamma(aa, b)
+ }
+ }
+
+ def gammaln(a:Mat, b:Mat):Mat = {
+ a match {
+ case aa:FMat => gammaln(aa, b)
+ case aa:DMat => gammaln(aa, b)
+ case aa:GMat => gammaln(aa, b)
+ }
+ }
+
+ def floor(a:Mat, b:Mat):Mat = {
+ a match {
+ case aa:FMat => floor(aa, b)
+ case aa:DMat => floor(aa, b)
+ case aa:GMat => floor(aa, b)
+ }
+ }
+
+ def ceil(a:Mat, b:Mat):Mat = {
+ a match {
+ case aa:FMat => ceil(aa, b)
+ case aa:DMat => ceil(aa, b)
+ case aa:GMat => ceil(aa, b)
+ }
+ }
+
+ def round(a:Mat, b:Mat):Mat = {
+ a match {
+ case aa:FMat => round(aa, b)
+ case aa:DMat => round(aa, b)
+ case aa:GMat => round(aa, b)
+ }
+ }
+
+ def trunc(a:Mat, b:Mat):Mat = {
+ a match {
+ case aa:FMat => trunc(aa, b)
+ case aa:DMat => trunc(aa, b)
+ case aa:GMat => trunc(aa, b)
+ }
+ }
+
+ def exppsi(a:Mat, b:Mat):Mat = {
+ a match {
+ case aa:FMat => exppsi(aa, b)
+ case aa:DMat => exppsi(aa, b)
+ case aa:GMat => exppsi(aa, b)
+ }
+ }
+
+ def atan2(a:Mat, b:Mat, c:Mat):Mat = {
+ (a, b) match {
+ case (aa:FMat, bb:FMat) => atan2(aa, bb, c)
+ case (aa:DMat, bb:DMat) => atan2(aa, bb, c)
+ case (aa:GMat, bb:GMat) => atan2(aa, bb, c)
+ }
+ }
+
+ def pow(a:Mat, b:Mat, c:Mat):Mat = {
+ (a, b) match {
+ case (aa:FMat, bb:FMat) => pow(aa, bb, c)
+ case (aa:DMat, bb:DMat) => pow(aa, bb, c)
+ case (aa:GMat, bb:GMat) => pow(aa, bb, c)
+ }
+ }
+}
+
+
+
+
+
+
diff --git a/src/main/scala/BIDMat/Solvers.scala b/src/main/scala/BIDMat/Solvers.scala
new file mode 100755
index 00000000..00cbc126
--- /dev/null
+++ b/src/main/scala/BIDMat/Solvers.scala
@@ -0,0 +1,334 @@
+package BIDMat
+import edu.berkeley.bid.CBLAS._
+import edu.berkeley.bid.LAPACK._
+import MatFunctions._
+import SciFunctions._
+
+object Solvers {
+
+ def inv(a:FMat):FMat = _inv(a).asInstanceOf[FMat]
+ def inv(a:DMat):DMat = _inv(a).asInstanceOf[DMat]
+ def inv(a:CMat):CMat = _inv(a).asInstanceOf[CMat]
+ def inv(a:Mat):Mat = _inv(a)
+
+ def _inv(a:Mat):Mat = {
+ Mat.nflops += 4L*a.nrows*a.nrows*a.nrows/3
+ if (a.nrows != a.ncols) {
+ throw new RuntimeException("inv needs a square matrix")
+ } else {
+ val out = a.copy
+ val ipiv = new Array[Int](a.nrows)
+ out match {
+ case dout:DMat => {
+ dgetrf(ORDER.ColMajor, a.nrows, a.ncols, dout.data, a.nrows, ipiv)
+ dgetri(ORDER.ColMajor, a.nrows, dout.data, a.nrows, ipiv)
+ }
+ case fout:FMat => {
+ sgetrf(ORDER.ColMajor, a.nrows, a.ncols, fout.data, a.nrows, ipiv)
+ sgetri(ORDER.ColMajor, a.nrows, fout.data, a.nrows, ipiv)
+ }
+ case dout:CMat => {
+ cgetrf(ORDER.ColMajor, a.nrows, a.ncols, dout.data, a.nrows, ipiv)
+ cgetri(ORDER.ColMajor, a.nrows, dout.data, a.nrows, ipiv)
+ }
+ }
+ out
+ }
+ }
+
+ def seig(a:FMat, getVecs:Boolean):(FMat, FMat) = {val (d,out) = _seig(a, getVecs); (d.asInstanceOf[FMat], out.asInstanceOf[FMat])}
+ def seig(a:DMat, getVecs:Boolean):(DMat, DMat) = {val (d,out) = _seig(a, getVecs); (d.asInstanceOf[DMat], out.asInstanceOf[DMat])}
+ def seig(a:Mat, getVecs:Boolean):(Mat, Mat) = _seig(a, getVecs)
+
+ def seig(a:FMat):(FMat, FMat) = {val (d,out) = _seig(a, true); (d.asInstanceOf[FMat], out.asInstanceOf[FMat])}
+ def seig(a:DMat):(DMat, DMat) = {val (d,out) = _seig(a, true); (d.asInstanceOf[DMat], out.asInstanceOf[DMat])}
+ def seig(a:Mat):(Mat, Mat) = _seig(a, true)
+
+ def _seig(a:Mat, getVecs:Boolean):(Mat, Mat) = {
+ Mat.nflops += 6L*a.nrows*a.nrows*a.nrows
+ if (a.nrows != a.ncols) {
+ throw new RuntimeException("eig needs a square matrix")
+ } else {
+ val out = a.copy
+ val d = a.zeros(a.nrows,1)
+ val e = a.zeros(a.nrows,1)
+ val tau = a.zeros(a.nrows,1)
+ (out, d, e, tau) match {
+ case (dout:DMat, dd:DMat, de:DMat, dtau:DMat) => {
+ dsytrd(ORDER.ColMajor, "U", a.nrows, dout.data, a.nrows, dd.data, de.data, dtau.data)
+ dorgtr(ORDER.ColMajor, "U", a.nrows, dout.data, a.nrows, dtau.data)
+ dsteqr(ORDER.ColMajor, if (getVecs) "V" else "N", a.nrows, dd.data, de.data, dout.data, a.nrows)
+ }
+ case (fout:FMat, fd:FMat, fe:FMat, ftau:FMat) => {
+ ssytrd(ORDER.ColMajor, "U", a.nrows, fout.data, a.nrows, fd.data, fe.data, ftau.data)
+ sorgtr(ORDER.ColMajor, "U", a.nrows, fout.data, a.nrows, ftau.data)
+ ssteqr(ORDER.ColMajor, if (getVecs) "V" else "N", a.nrows, fd.data, fe.data, fout.data, a.nrows)
+ }
+ }
+ (d, out)
+ }
+ }
+
+ def geig(a:Mat):(CMat, CMat) = geig(a, true)
+
+ def geig(in:Mat, getVecs:Boolean):(CMat, CMat) = {
+ Mat.nflops += 10L*in.nrows*in.nrows*in.nrows
+ if (in.nrows != in.ncols) {
+ throw new RuntimeException("eig needs a square matrix")
+ } else {
+ val ilo = new Array[Int](1)
+ val ihi = new Array[Int](1)
+ val a = CMat(in)
+ val scale = ones(a.nrows,1)
+ val tau = a.zeros(a.nrows,1)
+ val w = a.zeros(a.nrows, 1)
+ val mm = a.nrows
+ ilo(0) = 1; ihi(0) = a.nrows;
+ cgebal(ORDER.ColMajor, "S", a.nrows, a.data, a.nrows, ilo, ihi, scale.data)
+ cgehrd(ORDER.ColMajor, a.nrows, ilo(0), ihi(0), a.data, a.nrows, tau.data)
+ val q = a.copy
+ cunghr(ORDER.ColMajor, a.nrows, ilo(0), ihi(0), q.data, a.nrows, tau.data)
+ val z = q.copy
+ chseqr(ORDER.ColMajor, "S", "I", a.nrows, ilo(0), ihi(0), a.data, a.nrows, w.data, z.data, a.nrows)
+ if (getVecs) {
+ Mat.nflops += 50L*in.nrows*in.nrows*in.nrows
+ val m = new Array[Int](1)
+ val select = IMat(in.nrows, 1)
+ val vl = a.zeros(a.nrows, 1)
+ val ee = z.copy
+ ctrevc(ORDER.ColMajor, "R", "A", select.data, a.nrows, a.data, a.nrows, vl.data, 1, ee.data, a.nrows, mm, m)
+ cgebak(ORDER.ColMajor, "S", "R", a.nrows, ilo(0), ihi(0), scale.data, mm, ee.data, a.nrows);
+ z ~ q * (z * ee);
+ }
+ (w, z)
+ }
+ }
+
+ def feig(a:FMat):(FMat, FMat) = {val (w,out) = _feig(a) ; (w.asInstanceOf[FMat], out.asInstanceOf[FMat])}
+ def feig(a:DMat):(DMat, DMat) = {val (w,out) = _feig(a) ; (w.asInstanceOf[DMat], out.asInstanceOf[DMat])}
+ def feig(a:Mat):(Mat, Mat) = _feig(a)
+
+ def _feig(a:Mat):(Mat, Mat) = { // Faster, divide and conquer algorithm for pos definite matrices
+ Mat.nflops += 3L*a.nrows*a.nrows*a.nrows
+ if (a.nrows != a.ncols) {
+ throw new RuntimeException("feig needs a square matrix")
+ } else {
+ val out = a.copy
+ val w = a.zeros(a.nrows,1)
+ (out, w) match {
+ case (dout:DMat, dw:DMat) => dsyevd(ORDER.ColMajor, "V", "U", a.nrows, dout.data, a.nrows, dw.data)
+ case (sout:FMat, sw:FMat) => ssyevd(ORDER.ColMajor, "V", "U", a.nrows, sout.data, a.nrows, sw.data)
+ }
+ (w, out)
+ }
+ }
+ /*
+ * Standard QR decomposition. Given m x n input A, return m x m orthonormal Q and m x n upper-triangular R.
+ */
+
+ def QRdecomp(a:FMat):(FMat, FMat) = {val (q,r) = _QRdecomp(a); (q.asInstanceOf[FMat], r.asInstanceOf[FMat])}
+ def QRdecomp(a:DMat):(DMat, DMat) = {val (q,r) = _QRdecomp(a); (q.asInstanceOf[DMat], r.asInstanceOf[DMat])}
+ def QRdecomp(a:CMat):(CMat, CMat) = {val (q,r) = _QRdecomp(a); (q.asInstanceOf[CMat], r.asInstanceOf[CMat])}
+ def QRdecomp(a:Mat):(Mat, Mat) = _QRdecomp(a)
+
+ def _QRdecomp(a:Mat):(Mat, Mat) = {
+ Mat.nflops += 4L*a.nrows*a.ncols*math.min(a.nrows, a.ncols)
+ val m = a.nrows
+ val n = a.ncols
+ val r = a.copy
+ val q = a.zeros(m,m)
+ val tau = a.zeros(math.max(a.nrows, a.ncols), 1)
+ (r, q, tau) match {
+ case (fr:FMat, fq:FMat, ftau:FMat) => {
+ sgeqrf(ORDER.ColMajor, m, n, fr.data, m, ftau.data)
+ fq(?,0->n) = fr
+ sorgqr(ORDER.ColMajor, m, m, n, fq.data, m, ftau.data)
+ }
+ case (dr:DMat, dq:DMat, dtau:DMat) => {
+ dgeqrf(ORDER.ColMajor, m, n, dr.data, m, dtau.data)
+ dq(?,0->n) = dr
+ dorgqr(ORDER.ColMajor, m, m, n, dq.data, m, dtau.data)
+ }
+ case (cr:CMat, cq:CMat, ctau:CMat) => {
+ cgeqrf(ORDER.ColMajor, m, n, cr.data, m, ctau.data)
+ cq(?,0->n) = cr
+ cungqr(ORDER.ColMajor, m, m, n, cq.data, m, ctau.data)
+ }
+ }
+ r.clearLower
+ (q, r)
+ }
+
+ /*
+ * Thin QR decomposition. Given m x n input A, return m x n orthonormal Q and n x n upper triangular R.
+ */
+
+ def QRdecompt(a:FMat):(FMat, FMat) = {val (q,r) = _QRdecompt(a); (q.asInstanceOf[FMat], r.asInstanceOf[FMat])}
+ def QRdecompt(a:DMat):(DMat, DMat) = {val (q,r) = _QRdecompt(a); (q.asInstanceOf[DMat], r.asInstanceOf[DMat])}
+ def QRdecompt(a:CMat):(CMat, CMat) = {val (q,r) = _QRdecompt(a); (q.asInstanceOf[CMat], r.asInstanceOf[CMat])}
+ def QRdecompt(a:Mat):(Mat, Mat) = _QRdecompt(a)
+
+ def _QRdecompt(a:Mat):(Mat, Mat) = {
+ val m = a.nrows
+ val n = a.ncols
+ val a2 = a.zeros(a.ncols, a.ncols)
+ (a, a2) match {
+ case (fa:FMat, fa2:FMat) => sgemm(ORDER.ColMajor, TRANSPOSE.Trans, TRANSPOSE.NoTrans, n, n, m, 1f, fa.data, m, fa.data, m, 0f, fa2.data, n)
+ case (da:DMat, da2:DMat) => dgemm(ORDER.ColMajor, TRANSPOSE.Trans, TRANSPOSE.NoTrans, n, n, m, 1f, da.data, m, da.data, m, 0f, da2.data, n)
+ case (ca:CMat, ca2:CMat) => {
+ val cone = CMat.celem(1,0)
+ val czero = CMat.celem(0,0)
+ cgemm(ORDER.ColMajor, TRANSPOSE.Trans, TRANSPOSE.NoTrans, n, n, m, cone.data, ca.data, m, ca.data, m, czero.data, ca2.data, n)
+ }
+ }
+ Mat.nflops += 2L*a.ncols*a.ncols*a.nrows
+ val r = chol(a2).t
+ val q = a * inv(r)
+ (q, r)
+ }
+
+ def chol(a:FMat):FMat = _chol(a).asInstanceOf[FMat]
+ def chol(a:DMat):DMat = _chol(a).asInstanceOf[DMat]
+ def chol(a:CMat):CMat = _chol(a).asInstanceOf[CMat]
+ def chol(a:Mat):Mat = _chol(a)
+
+ def _chol(a:Mat):Mat = { // Cholesky factorization
+ Mat.nflops += 1L*a.nrows*a.nrows*a.nrows/3
+ if (a.nrows != a.ncols) {
+ throw new RuntimeException("chol needs a square matrix")
+ } else {
+ val out = a.copy
+ out match {
+ case dout:DMat => dpotrf(ORDER.ColMajor, "L", a.nrows, dout.data, a.nrows)
+ case fout:FMat => spotrf(ORDER.ColMajor, "L", a.nrows, fout.data, a.nrows)
+ case cout:CMat => cpotrf(ORDER.ColMajor, "L", a.nrows, cout.data, a.nrows)
+ }
+ out.clearUpper
+ out
+ }
+ }
+
+ /*
+ * Trisolve solves A x = r, for triangular A. Mode string argument is 3 characters.
+ * Char1 = "U" or "L" for upper or lower-triangular input.
+ * Char2 = "N", "T" or "C" for A not-transposed, transposed or conjugate respectively.
+ * Char3 = "N" or "U" whether the leading diagonal is non-unit "N" or unit "U" respectively.
+ */
+ def trisolve(a:DMat, r:DMat, mode:String):DMat = _trisolve(a, r, mode).asInstanceOf[DMat]
+ def trisolve(a:FMat, r:FMat, mode:String):FMat = _trisolve(a, r, mode).asInstanceOf[FMat]
+ def trisolve(a:CMat, r:CMat, mode:String):CMat = _trisolve(a, r, mode).asInstanceOf[CMat]
+ def trisolve(a:Mat, r:Mat, mode:String):Mat = _trisolve(a, r, mode)
+
+ def _trisolve(a:Mat, r:Mat, mode:String):Mat = {
+ if (a.nrows != a.ncols) {
+ throw new RuntimeException("tsolve a must be square")
+ }
+ if (a.ncols != r.nrows) {
+ throw new RuntimeException("tsolve matrix and rhs must have same ncols")
+ }
+ val out = r.copy
+ Mat.nflops += 1L*a.nrows*a.nrows*r.ncols
+ (a, out) match {
+ case (da:DMat, dout:DMat) => dtrtrs(ORDER.ColMajor, mode, a.nrows, r.ncols, da.data, a.nrows, dout.data, out.nrows)
+ case (fa:FMat, fout:FMat) => strtrs(ORDER.ColMajor, mode, a.nrows, r.ncols, fa.data, a.nrows, fout.data, out.nrows)
+ case (ca:CMat, cout:CMat) => ctrtrs(ORDER.ColMajor, mode, a.nrows, r.ncols, ca.data, a.nrows, cout.data, out.nrows)
+ }
+ out
+ }
+
+ def trisolve(a:DMat, r:DMat):DMat = _trisolve(a, r, "UNN").asInstanceOf[DMat]
+ def trisolve(a:FMat, r:FMat):FMat = _trisolve(a, r, "UNN").asInstanceOf[FMat]
+ def trisolve(a:CMat, r:CMat):CMat = _trisolve(a, r, "UNN").asInstanceOf[CMat]
+ def trisolve(a:Mat, r:Mat):Mat = _trisolve(a, r, "UNN")
+
+ def shiftLeft(mat:FMat, step:Int) = {
+ var i = step
+ while (i < mat.ncols) {
+ System.arraycopy(mat.data, i*mat.nrows, mat.data, (i-step)*mat.nrows, mat.nrows)
+ i += 1
+ }
+ }
+
+ def shiftRight(mat:FMat, step:Int) = {
+ var i = mat.ncols - 1
+ while (i >= step) {
+ System.arraycopy(mat.data, (i-step)*mat.nrows, mat.data, i*mat.nrows, mat.nrows)
+ i -= 1
+ }
+ }
+
+ def blgmres(A:FMat, b:FMat, nrst:Int, m:Int, s:Int, tol:Float) = {
+ val n = A.nrows
+ val R = normrnd(0, 1, n, s)
+ val H = A.zeros(s*(m+1), s*m)
+ val V = A.zeros(n, s*(m+1))
+ val e1 = A.zeros(s*(m+1),1)
+ e1(0,0) = 1
+ val rots = new Array[FMat](m)
+ val bnorm = norm(b)
+ var x = R(?,0)
+ var done = false
+
+ def blk(i:Int) = i*s->(i+1)*s
+ def blk2(i:Int) = i*s->(i+2)*s
+
+ var irestart = 0
+ while (irestart < nrst && !done) {
+ val res = b - A*x
+ R(?,0) = res
+ var (vj, r) = QRdecompt(R)
+ V(?, 0 -> s) = vj
+ var ex = r(0,0)*e1
+ var j = 0
+ while (j < m && !done) {
+ var Uj = A * vj
+ var k = 0
+ while (k <= j) {
+ val Vl = V(?, blk(k))
+ val Hj = Vl.t * Uj
+ Uj = Uj - Vl * Hj
+ H(blk(k), blk(j)) = Hj
+ k += 1
+ }
+ val (vjp, hjp) = QRdecompt(Uj)
+ H(blk(j+1), blk(j)) = hjp
+ V(?, blk(j+1)) = vjp
+ vj = vjp
+ k = 0
+ while (k < j) { // Apply blocked Givens rotations
+ H(blk2(k), blk(j)) = rots(k) * H(blk2(k), blk(j))
+ k += 1
+ }
+ var (rot, tri) = QRdecomp(H(blk2(j), blk(j)))
+ H(blk2(j), blk(j)) = tri
+ rots(j) = rot.t
+ ex(blk2(j),0) = rots(j) * ex(blk2(j),0)
+ k = 0
+ while (k < s && !done) {
+ val ihere = j*s+k
+ printf("%f ", ex(ihere,0));
+ if (math.abs(ex(ihere,0))/bnorm < tol) {
+ val ym = trisolve(H(0->ihere, 0->ihere), ex(0->ihere,0))
+ x = x + V(?,0->ihere) * ym
+ done = true;
+ }
+ k += 1
+ }
+ printf("\n");
+ j += 1
+ }
+ if (!done) {
+ val ym = trisolve(H(0->s*m,?), ex(0->s*m,0))
+ val zi = V(?,0->s*m) * ym
+ x = x + zi
+ if (s > 1) {
+ shiftRight(R, 1)
+ R(?, 1) = zi
+ }
+ }
+ irestart += 1
+ }
+ (x, R, H, V)
+ }
+
+}
diff --git a/src/main/scala/BIDMat/SparseMat.scala b/src/main/scala/BIDMat/SparseMat.scala
new file mode 100755
index 00000000..5a4996ac
--- /dev/null
+++ b/src/main/scala/BIDMat/SparseMat.scala
@@ -0,0 +1,764 @@
+package BIDMat
+
+class SparseMat[@specialized(Double,Float) T]
+(nr: Int, nc: Int, var nnz0:Int, var ir:Array[Int], val jc:Array[Int], val data:Array[T])
+(implicit manifest:Manifest[T], numeric:Numeric[T]) extends Mat(nr, nc) {
+
+ override def nnz = nnz0
+
+ /*
+ * Bounds-checked matrix access
+ */
+ def apply(r0:Int, c0:Int):T = {
+ val off = Mat.oneBased
+ val r = r0 - off
+ val c = c0 - off
+ if (r < 0 || r >= nrows || c < 0 || c >= ncols) {
+ throw new IndexOutOfBoundsException("("+(r+off)+","+(c+off)+") vs ("+nrows+","+ncols+")");
+ } else {
+ get_(r, c);
+ }
+ }
+ /*
+ * Internal (unchecked) accessor
+ */
+ def get_(r:Int, c:Int):T = {
+ val ioff = Mat.ioneBased
+ var ix = 0
+ if (ir != null) {
+ ix = Mat.ibinsearch(r+ioff, ir, jc(c)-ioff, jc(c+1)-ioff)
+ } else {
+ ix = r+ioff - jc(c)
+ }
+ if (ix >= 0) data(ix) else numeric.zero
+ }
+ /*
+ * Update a matrix value, m(r,c) = v
+ */
+ def update(r0:Int, c0:Int, v:T):T = {
+ val off = Mat.oneBased
+ val r = r0 - off
+ val c = c0 - off
+ if (r < 0 || r >= nrows || c < 0 || c >= ncols) {
+ throw new IndexOutOfBoundsException("("+(r+off)+","+(c+off)+") vs ("+nrows+","+ncols+")");
+ } else {
+ set_(r, c, v);
+ }
+ v
+ }
+ /*
+ * Internal (unchecked) setter
+ */
+ def set_(r:Int, c:Int, v:T) = {
+ val ioff = Mat.ioneBased
+ var ix = 0
+ if (ir != null) {
+ ix = Mat.ibinsearch(r+ioff, ir, jc(c)-ioff, jc(c+1)-ioff)
+ } else {
+ ix = r+ioff - jc(c)
+ }
+ if (ix >= 0) data(ix) = v
+ else throw new RuntimeException("Can't set missing values")
+ }
+
+ def explicitInds = {
+ if (ir == null) {
+ val ioff = Mat.ioneBased
+ ir = new Array[Int](nnz)
+ var i = 0
+ while (i < ncols) {
+ var j = 0
+ while (j + jc(i) < jc(i)+1) {
+ ir(j+jc(i)-ioff) = j+ioff
+ j += 1
+ }
+ i += 1
+ }
+ }
+ }
+ /*
+ * Transpose
+ */
+ def gt:SparseMat[T] = {
+ explicitInds
+ SparseMat.sparseImpl[T](SparseMat.uncompressInds(jc, ir),
+ if (Mat.ioneBased==1) SparseMat.decInds(ir) else ir, data, ncols, nrows)
+ }
+ /*
+ * Stack matrices vertically
+ */
+ def vertcat(a:SparseMat[T]):SparseMat[T] =
+ if (ncols != a.ncols) {
+ throw new RuntimeException("ncols must match")
+ } else {
+ if (ir != null) a.explicitInds
+ if (a.ir != null) explicitInds
+ val out = if (ir != null) {
+ SparseMat[T](nrows+a.nrows, ncols, nnz+a.nnz)
+ } else {
+ SparseMat.noRows[T](nrows+a.nrows, ncols, nnz+a.nnz)
+ }
+ val ioff = Mat.ioneBased
+ var ip = 0
+ var i = 0
+ out.jc(0) = ioff
+ while (i < ncols) {
+ var j = jc(i)-ioff
+ while (j < jc(i+1)-ioff) {
+ if (out.ir != null) out.ir(ip) = ir(j)
+ out.data(ip) = data(j)
+ ip += 1
+ j += 1
+ }
+ j = a.jc(i)-ioff
+ while (j < a.jc(i+1)-ioff) {
+ if (out.ir != null) out.ir(ip) = a.ir(j) + nrows
+ out.data(ip) = a.data(j)
+ ip += 1
+ j += 1
+ }
+ out.jc(i+1) = ip+ioff
+ i += 1
+ }
+ out
+ }
+
+ /*
+ * Stack matrices horizontally
+ */
+
+ def horzcat(a:SparseMat[T]):SparseMat[T] =
+ if (nrows != a.nrows) {
+ throw new RuntimeException("nrows must match")
+ } else {
+ if (ir != null) a.explicitInds
+ if (a.ir != null) explicitInds
+ val out = if (ir != null) {
+ SparseMat[T](nrows+a.nrows, ncols, nnz+a.nnz)
+ } else {
+ SparseMat.noRows[T](nrows+a.nrows, ncols, nnz+a.nnz)
+ }
+ var ip = 0
+ System.arraycopy(data, 0, out.data, 0, nnz)
+ System.arraycopy(a.data, 0, out.data, nnz, a.nnz)
+ if (out.ir != null) {
+ System.arraycopy(ir, 0, out.ir, 0, nnz)
+ System.arraycopy(a.ir, 0, out.ir, nnz, a.nnz)
+ }
+ System.arraycopy(jc, 0, out.jc, 0, ncols+1)
+ for (i <- 1 to a.ncols) {
+ out.jc(i+ncols) = a.jc(i) + nnz
+ }
+ out
+ }
+
+ /*
+ * Find indices (single) for all non-zeros elements
+ */
+ def gfind:IMat = {
+ var out = IMat(nnz, 1)
+ val ioff = Mat.ioneBased
+ val off = Mat.oneBased
+ var i = 0
+ while (i < ncols) {
+ var j = jc(i)-ioff
+ if (ir != null) {
+ while (j < jc(i+1)-ioff) {
+ out.data(j) = ir(j)-ioff+off + i*nrows
+ j += 1
+ }
+ } else {
+ while (j < jc(i+1)-ioff) {
+ out.data(j) = j-jc(i)+ioff+off + i*nrows
+ j += 1
+ }
+ }
+ i += 1
+ }
+ out
+ }
+ /*
+ * Find indices (i,j) for non-zero elements
+ */
+ def gfind2:(IMat, IMat) = {
+ var iout = IMat(nnz, 1)
+ var jout = IMat(nnz, 1)
+ val ioff = Mat.ioneBased
+ val off = Mat.oneBased
+ var i = 0
+ while (i < ncols) {
+ var j = jc(i)-ioff
+ if (ir != null) {
+ while (j < jc(i+1)-ioff) {
+ iout.data(j) = ir(j)-ioff+off
+ j += 1
+ }
+ } else {
+ while (j < jc(i+1)-ioff) {
+ iout.data(j) = j-jc(i)+ioff+off
+ j += 1
+ }
+ }
+ i += 1
+ }
+ if (off == 0) {
+ System.arraycopy(SparseMat.uncompressInds(jc, ir), 0, jout.data, 0, nnz)
+ } else {
+ SparseMat.incInds(SparseMat.uncompressInds(jc, ir), jout.data)
+ }
+ (iout, jout)
+ }
+ /*
+ * Find indices and values (i,j,v) for non-zero elements
+ */
+ def gfind3:(IMat, IMat, DenseMat[T]) = {
+ val vout = new DenseMat[T](nnz,1)
+ val (iout, jout) = gfind2
+ System.arraycopy(data, 0, vout.data, 0, nnz)
+ (iout, jout, vout)
+ }
+ /*
+ * Implement a(im) = b where im is a matrix of indices to a and im and b are same-sized
+ */
+ def update(im:IMat, b:SparseMat[T]) = {
+ }
+
+ /*
+ * Implement slicing, a(iv,jv) where iv and jv are vectors, using ? as wildcard
+ */
+ def gapply(iv:IMat, jv:IMat):SparseMat[T] =
+ iv match {
+ case aa:MatrixWildcard => {
+ val colinds = DenseMat.getInds(jv, ncols)
+ val ioff = Mat.ioneBased
+ val off = Mat.oneBased
+ var tnnz = 0
+ for (i <- 0 until colinds.length) tnnz += jc(colinds(i)-off+1) - jc(colinds(i)-off)
+ val out = if (ir != null) {
+ SparseMat[T](nrows, colinds.length, tnnz)
+ } else {
+ SparseMat.noRows[T](nrows, colinds.length, tnnz)
+ }
+ var inext = 0
+ var i = 0
+ out.jc(0) = ioff
+ while (i < out.ncols) {
+ val istep = jc(colinds(i)-off+1) - jc(colinds(i)-off)
+ if (ir != null) System.arraycopy(ir, jc(colinds(i)-off)-ioff, out.ir, inext, istep)
+ System.arraycopy(data, jc(colinds(i)-off)-ioff, out.data, inext, istep)
+ inext += istep
+ out.jc(i+1) = inext+ioff
+ i += 1
+ }
+ out
+ }
+ case _ => {
+ explicitInds
+ val off = Mat.oneBased
+ val rowinds = if (off == 0) DenseMat.getInds(iv, nrows) else SparseMat.decInds(DenseMat.getInds(iv, nrows));
+ val smat = SparseMat.sparseImpl[Int]((0 until iv.length).toArray, rowinds, Array.fill[Int](iv.length)(1), iv.length, nrows)
+ val colinds = DenseMat.getInds(jv, ncols)
+ val ioff = Mat.ioneBased
+ var tnnz = 0
+ var i = 0
+ while (i < colinds.length) {
+ var j = jc(colinds(i)-off)-ioff
+ while (j < jc(colinds(i)-off+1)-ioff) {
+ tnnz += smat.jc(ir(j)+1-ioff) - smat.jc(ir(j)-ioff)
+ j += 1
+ }
+ i += 1
+ }
+ val out = SparseMat[T](iv.length, colinds.length, tnnz)
+ tnnz = 0
+ i = 0
+ out.jc(0) = ioff
+ while (i < colinds.length) {
+ var j = jc(colinds(i)-off)-ioff
+ while (j < jc(colinds(i)-off+1)-ioff) {
+ val dval = data(j)
+ var k = smat.jc(ir(j)-ioff) - ioff
+ while (k < smat.jc(ir(j)+1-ioff)-ioff) {
+ out.ir(tnnz) = smat.ir(k)
+ out.data(tnnz) = dval
+ tnnz += 1
+ k += 1
+ }
+ j += 1
+ }
+ out.jc(i+1) = tnnz+ioff
+ i += 1
+ }
+ out
+ }
+ }
+
+ private def printOne(a:T):String =
+ a match {
+ case v:Double => {
+ if (v % 1 == 0 && math.abs(v) < 1e10) {
+ "%d" format v.intValue
+ } else {
+ "%.5g" format v
+ }
+ }
+ case v:Float => {
+ if (v % 1 == 0 && math.abs(v) < 1e5) {
+ "%d" format v.intValue
+ } else {
+ "%.5g" format v
+ }
+ }
+ case _ => ""
+ }
+
+ private def printOne(v0:Int):String = {
+ val v = v0 + Mat.oneBased
+ "%d" format v
+ }
+
+
+ override def toString:String = {
+ val ioff = Mat.ioneBased
+ val maxRows = 8
+ var fieldWidth = 4
+ val sb:StringBuilder = new StringBuilder
+ val somespaces = " "
+ var innz = 0
+ var icol = 0
+ while (innz < math.min(nnz, maxRows)) {
+ while (innz >= jc(icol+1)-ioff) icol += 1
+ fieldWidth = math.max(fieldWidth, 2+printOne(ir(innz)).length)
+ fieldWidth = math.max(fieldWidth, 2+printOne(icol).length)
+ fieldWidth = math.max(fieldWidth, 2+printOne(data(innz)).length)
+ innz += 1
+ }
+ innz = 0
+ icol = 0
+ while (innz < math.min(nnz, maxRows)) {
+ while (innz >= jc(icol+1)-ioff) icol += 1
+ var str = printOne(ir(innz)-ioff)
+ sb.append("("+somespaces.substring(0,fieldWidth-str.length)+str)
+ str = printOne(icol)
+ sb.append(","+somespaces.substring(0,fieldWidth-str.length)+str)
+ str = printOne(data(innz))
+ sb.append(")"+somespaces.substring(0,fieldWidth-str.length)+str+"\n")
+ innz += 1
+ }
+ if (nnz > maxRows) {
+ for (j <- 0 until 3) {
+ sb.append(somespaces.substring(0, fieldWidth-2)+"...")
+ }
+ sb.append("\n")
+ }
+ sb.toString()
+ }
+
+ def gsMult(a:SparseMat[T]):DenseMat[T] =
+ if (ncols != a.nrows)
+ throw new RuntimeException("dims mismatch")
+ else {
+ explicitInds
+ a.explicitInds
+ val out = new DenseMat[T](nrows, a.ncols)
+ val ioff = Mat.ioneBased
+ var i = 0
+ while (i < a.ncols) {
+ val i0 = nrows*i
+ var j = a.jc(i)-ioff
+ while (j < a.jc(i+1)-ioff) {
+ val ind = a.ir(j)-ioff
+ val tval = a.data(j)
+ var k = jc(ind)-ioff
+ while (k < jc(ind+1)-ioff) {
+ val indx = ir(k)-ioff + i0
+ data(indx) = numeric.plus(data(indx), numeric.times(tval, data(k)))
+ k += 1
+ }
+ j += 1
+ }
+ i += 1
+ }
+ out
+ }
+
+ def sgMatOp(b:SparseMat[T], op2:(T,T) => T, omat:Mat):SparseMat[T] =
+ if (nrows==b.nrows && ncols==b.ncols) {
+ if (ir != null) b.explicitInds
+ if (b.ir != null) explicitInds
+ if (ir == null) {
+ sgMatOpNR(b,op2,omat)
+ } else {
+ val out = SparseMat.newOrCheck(nrows, ncols, nnz+b.nnz, omat)
+ val ioff = Mat.ioneBased
+ var nzc = 0
+ out.jc(0) = ioff
+ var i = 0
+ while (i < ncols) {
+ var ia = jc(i)-ioff
+ var ib = b.jc(i)-ioff
+ while (ia < jc(i+1)-ioff && ib < b.jc(i+1)-ioff) {
+ if (ir(ia) < b.ir(ib)) {
+ out.ir(nzc) = ir(ia)
+ out.data(nzc) = op2(data(ia), numeric.zero)
+ ia += 1
+ } else if (ir(ia) > b.ir(ib)) {
+ out.ir(nzc) = b.ir(ib)
+ out.data(nzc) = op2(numeric.zero, b.data(ib))
+ ib += 1
+ } else {
+ out.ir(nzc) = ir(ia)
+ out.data(nzc) = op2(data(ia), b.data(ib))
+ ia += 1
+ ib += 1
+ }
+ nzc += 1
+ }
+ while (ia < jc(i+1)-ioff) {
+ out.ir(nzc) = ir(ia)
+ out.data(nzc) = op2(data(ia), numeric.zero)
+ ia += 1
+ nzc += 1
+ }
+ while (ib < b.jc(i+1)-ioff) {
+ out.ir(nzc) = b.ir(ib)
+ out.data(nzc) = op2(numeric.zero, b.data(ib))
+ ib += 1
+ nzc += 1
+ }
+ out.jc(i+1) = nzc+ioff
+ i += 1
+ }
+ out.sparseTrim
+ }
+ } else {
+ throw new RuntimeException("dimensions mismatch")
+ }
+
+
+ def sgMatOpNR(b:SparseMat[T], op2:(T,T) => T, omat:Mat):SparseMat[T] = {
+ val out = SparseMat.newOrCheck(nrows, ncols, nnz+b.nnz, omat, true)
+ val ioff = Mat.ioneBased
+ var nzc = 0
+ out.jc(0) = ioff
+ for (i <- 0 until ncols) {
+ var ia = jc(i)-ioff
+ var ib = b.jc(i)-ioff
+ while (ia < jc(i+1)-ioff && ib < b.jc(i+1)-ioff) {
+ out.data(nzc) = op2(data(ia), b.data(ib))
+ ia += 1
+ ib += 1
+ nzc += 1
+ }
+ while (ia < jc(i+1)-ioff) {
+ out.data(nzc) = op2(data(ia), numeric.zero)
+ ia += 1
+ nzc += 1
+ }
+ while (ib < b.jc(i+1)-ioff) {
+ out.data(nzc) = op2(numeric.zero, b.data(ib))
+ ib += 1
+ nzc += 1
+ }
+ out.jc(i+1) = nzc+ioff
+ }
+ out.sparseTrim
+ }
+
+ def sgReduceOp(dim:Int, op1:(T) => T, op2:(T,T) => T, omat:Mat):DenseMat[T] = {
+ val ioff = Mat.ioneBased
+ if (dim == 0) {
+ if (nrows > 1 && ncols > 1) {
+ throw new RuntimeException("must be a vector")
+ } else {
+ val out = DenseMat.newOrCheck(1, 1, omat)
+ var j = 0
+ var acc = op1(numeric.zero)
+ while (j < nnz) {
+ acc = op2(acc, data(j))
+ j += 1
+ }
+ out.data(0) = acc
+ out
+ }
+ } else if (dim == 1) {
+ val out = DenseMat.newOrCheck(1, ncols, omat)
+ var i = 0
+ while (i < ncols) {
+ var acc = op1(numeric.zero)
+ var j = jc(i)-ioff
+ while (j < jc(i+1)-ioff) {
+ acc = op2(acc, data(j))
+ j += 1
+ }
+ out.data(i) = acc
+ i += 1
+ }
+ out
+ } else if (dim == 2) {
+ val out = DenseMat.newOrCheck(nrows, 1, omat)
+ out.clear
+ if (ir != null) {
+ var j = 0
+ while (j < nnz) {
+ out.data(ir(j)-ioff) = op2(out.data(ir(j)-ioff), data(j))
+ j += 1
+ }
+ } else {
+ var i = 0
+ while (i < ncols) {
+ var j = jc(i)
+ while (j < jc(i+1)) {
+ out.data(j-jc(i)) = op2(out.data(j-jc(i)), data(j-ioff))
+ j += 1
+ }
+ i += 1
+ }
+ }
+ out
+ } else
+ throw new RuntimeException("index must 1 or 2")
+ }
+
+ def ssMatOpOne(b:DenseMat[T], op2:(T,T) => T, omat:Mat):SparseMat[T] =
+ if (b.nrows == 1 && b.ncols == 1) {
+ sgMatOpScalar(b.data(0), op2, omat)
+ } else throw new RuntimeException("dims incompatible")
+
+ def sgMatOpScalar(b:T, op2:(T,T) => T, outmat:Mat):SparseMat[T] = {
+ val out = SparseMat.newOrCheck(nrows, ncols, nnz, outmat, (ir == null))
+ var i = 0
+ out.jc(0) = jc(0)
+ while (i < nnz) {
+ out.data(i) = op2(data(i), b)
+ if (ir != null) out.ir(i) = ir(i)
+ i += 1
+ }
+ i = 0
+ while (i < ncols) {
+ out.jc(i) = jc(i)
+ i += 1
+ }
+ out.sparseTrim
+ }
+
+ def sparseTrim:SparseMat[T] = {
+ val ioff = Mat.ioneBased
+ var i = 0
+ var nzc = 0
+ while (i < ncols) {
+ var j = jc(i)
+ while (j < jc(i+1)) {
+ if (numeric.signum(data(j-ioff)) != 0) nzc += 1
+ j += 1
+ }
+ i += 1
+ }
+ if (nzc == nnz) {
+ this
+ } else {
+ var out = this
+ nzc = 0
+ var lastjc = 0
+ var i = 0
+ out.jc(0) = ioff
+ while (i < ncols) {
+ var j = lastjc
+ while (j < jc(i+1)-ioff) {
+ if (numeric.signum(data(j)) != 0) {
+ out.data(nzc) = data(j)
+ if (ir != null) out.ir(nzc) = ir(j)
+ nzc += 1
+ }
+ j += 1
+ }
+ lastjc = jc(i+1)-ioff
+ out.jc(i+1) = nzc+ioff
+ i += 1
+ }
+ nnz0 = nzc
+ out
+ }
+ }
+
+ def check = {
+ val ioff = Mat.ioneBased
+ var i = 0
+ if (jc(0) != ioff) {
+ throw new RuntimeException("jc(0) should be "+ioff)
+ }
+ while (i < ncols) {
+ var j = jc(i)-ioff
+ if (jc(i) > jc(i+1)) {
+ throw new RuntimeException("jc(i) out of order " + i + " " + jc(i) + " " + jc(i+1))
+ }
+ if (ir != null) {
+ while (j < jc(i+1)-ioff-1) {
+ if (ir(j+1) <= ir(j)) {
+ throw new RuntimeException("ir(j) out of order "+j+" "+ir(j)+" "+ir(j+1))
+ }
+ if (ir(j) < ioff) {
+ throw new RuntimeException("ir("+j+")="+ir(j)+" too small")
+ }
+ if (ir(j+1) >= nrows+ioff) {
+ throw new RuntimeException("ir("+(j+1)+")="+ir(j+1)+" out of range "+(nrows+ioff))
+ }
+ j += 1
+ }
+ }
+ i += 1
+ }
+ if (jc(ncols) != nnz+ioff) {
+ throw new RuntimeException("jc(ncols) should be "+nnz)
+ }
+ }
+
+ def full:DenseMat[T] = {
+ val out = new DenseMat[T](nrows, ncols)
+ val ioff = Mat.ioneBased
+ if (ir != null) {
+ val cols = SparseMat.uncompressInds(jc, ir)
+ var i = 0
+ while (i < nnz) {
+ out.data(ir(i)-ioff + nrows*cols(i)) = data(i)
+ i += 1
+ }
+ } else {
+ var i = 0
+ while (i < ncols) {
+ var j = jc(i)-ioff
+ while (j < jc(i+1)-ioff) {
+ out.data(j-jc(i)+ioff + nrows*i) = data(j)
+ j += 1
+ }
+ i += 1
+ }
+ }
+ out
+ }
+
+ override def recycle(nr:Int, nc:Int, nnz:Int):SparseMat[T] = {
+ val jc0 = if (jc.size >= nc+1) jc else new Array[Int](nc+1)
+ val ir0 = if (ir.size >= nnz) ir else new Array[Int](nnz)
+ val data0 = if (data.size >= nnz) data else new Array[T](nnz)
+ new SparseMat[T](nr, nc, nnz, ir0, jc0, data0)
+ }
+
+}
+
+
+object SparseMat {
+
+ def apply[T](nr:Int, nc:Int, nnz0:Int)
+ (implicit manifest:Manifest[T], numeric:Numeric[T]):SparseMat[T] =
+ new SparseMat[T](nr, nc, nnz0, new Array[Int](nnz0), new Array[Int](nc+1), new Array[T](nnz0))
+
+ def noRows[T](nr:Int, nc:Int, nnz0:Int)
+ (implicit manifest:Manifest[T], numeric:Numeric[T]):SparseMat[T] =
+ new SparseMat[T](nr, nc, nnz0, null, new Array[Int](nc+1), new Array[T](nnz0))
+
+ def sparseImpl[@specialized(Double, Float) T](rows:Array[Int], cols:Array[Int], vals:Array[T], nrows:Int, ncols:Int)
+ (implicit manifest:Manifest[T], numeric:Numeric[T]):SparseMat[T] = {
+ val ioff = Mat.ioneBased
+ val out = SparseMat[T](nrows, ncols, rows.length)
+ val orows = out.ir
+ val ocols = new Array[Int](rows.length)
+ var i = 0
+ while (i < cols.length) {
+ ocols(i) = cols(i)
+ orows(i) = rows(i) + ioff
+ i += 1
+ }
+ val isort = BIDMat.Mat.ilexsort2(ocols, orows)
+ i = 0
+ var igood = 0
+ while (i < cols.length) {
+ if (i == 0 || orows(i) != orows(i-1) || ocols(i) != ocols(i-1)) {
+ ocols(igood) = ocols(i)
+ orows(igood) = orows(i)
+ out.data(igood) = vals(isort(i))
+ igood += 1
+ } else {
+ out.data(igood) = numeric.plus(out.data(igood), vals(isort(i)))
+ }
+ i += 1
+ }
+ SparseMat.compressInds(ocols, ncols, out.jc, igood)
+ out.sparseTrim
+ }
+
+ def compressInds(coli:Array[Int], ncols:Int, out:Array[Int], nnz0:Int):Array[Int] = {
+ val ioff = Mat.ioneBased
+ out(0) = ioff
+ var j = 0
+ var i = 0
+ while (i < ncols) {
+ while (j < nnz0 && coli(j) <= i) j+= 1
+ out(i+1) = j+ioff
+ i += 1
+ }
+ out
+ }
+
+ def uncompressInds(coli:Array[Int], rowi:Array[Int]):Array[Int] = {
+ val ioff = Mat.ioneBased
+ val out = new Array[Int](rowi.length)
+ var i = 0
+ while (i < (coli.length-1)) {
+ var j = coli(i)-ioff
+ while (j < coli(i+1)-ioff) {
+ out(j) = i
+ j+= 1
+ }
+ i += 1
+ }
+ out
+ }
+
+ def incInds(inds:Array[Int], out:Array[Int]):Array[Int] = {
+ var i = 0
+ while (i < inds.length) {
+ out(i) = inds(i) + 1
+ i += 1
+ }
+ out
+ }
+
+ def incInds(inds:Array[Int]):Array[Int] = {
+ val out = new Array[Int](inds.length)
+ incInds(inds, out)
+ }
+
+ def decInds(inds:Array[Int]):Array[Int] = {
+ val out = new Array[Int](inds.length)
+ var i = 0
+ while (i < inds.length) {
+ out(i) = inds(i) - 1
+ i += 1
+ }
+ out
+ }
+
+ def newOrCheck[T](nr:Int, nc:Int, nnz:Int, oldmat:Mat, norows:Boolean = false)
+ (implicit manifest:Manifest[T], numeric:Numeric[T]):SparseMat[T] = {
+ if (oldmat.asInstanceOf[AnyRef] == null || (oldmat.nrows == 0 && oldmat.ncols == 0)) {
+ if (norows)
+ SparseMat.noRows(nr, nc, nnz)
+ else
+ SparseMat(nr, nc, nnz)
+ } else {
+ val omat = oldmat.asInstanceOf[SparseMat[T]];
+ if (omat.nrows == nr && omat.ncols == nc && omat.nnz == nnz) {
+ omat
+ } else {
+ omat.recycle(nr, nc, nnz)
+ }
+ }
+ }
+}
+
+
+
+
+
+
diff --git a/src/main/scala/BIDMat/Translators.scala b/src/main/scala/BIDMat/Translators.scala
new file mode 100755
index 00000000..9760b84a
--- /dev/null
+++ b/src/main/scala/BIDMat/Translators.scala
@@ -0,0 +1,147 @@
+package BIDMat
+import scala.util.parsing.combinator._
+
+class MKLdeclarationsParser extends JavaTokenParsers {
+ var otype:Int = 3
+ var prefix:String = "LAPACK"
+ def fdecl: Parser[String] = ftype~ident~"("~decls~")"~""";*""".r ^^ {
+ case a~b~c~d~e~f => {
+ val b0 = b.split("_"); val b1 = b0(b0.length-1)
+ val toreturn:Boolean = (a != fvoid)
+ otype match {
+ case 0 => "JNIEXPORT "+a("")+"JNICALL Java_edu_berkeley_bid_"+prefix+"_"+b1+
+ "\n(JNIEnv * env, jobject calling_obj, "+d+") {\n"
+ case 1 => d
+ case 2 => " "+(if (toreturn) a("")+"retval = " else "") +b+c+d+e+";\n"
+ case 3 => d+(if (toreturn) " return retval;\n}\n" else "}\n")
+ case 4 => " public static native "+a(b1)+c+d+e+";\n"
+ }
+ }
+ }
+
+ def decls: Parser[String] = repsep(adecl, ",") ^^ {
+ case a:List[String] => {
+ var ll = ""
+ for (el <- a) {
+ ll = ll + el
+ if (otype == 0 || otype == 2 || otype == 4) {
+ if (el != a.last) ll = ll+", "
+ }
+ }
+ ll
+ }
+ }
+
+ def adecl: Parser[String] = ftype~ident ^^ { case a~b => a(b) }
+
+ def ftype: Parser[String=>String] = fvoidp | fvoid | flogicalp | flogical | fintp | fint | fuintp | fuint | fchar |
+ ffloatp | ffloat | ffcomplexp | ffcomplex | fdoublep | fdouble | fdcomplexp | fdcomplex
+
+ def sstring(y:String, wrap:Int, p1:String, p2:String, p3:String, p4:String) = {
+ otype match {
+ case 0 => p1 + " " + (if (wrap > 0) "j_"+y else y)
+ case 1 => if (wrap == 1) " "+p2+" "+y+" = (*env)->GetPrimitiveArrayCritical(env, j_"+y+", JNI_FALSE);\n" else
+ if (wrap == 2) " char * "+y+" = (char *)(*env)->GetStringUTFChars(env, j_"+y+", JNI_FALSE);\n" else ""
+ case 2 => if (y.length == 0) p2+" " else p3+y
+ case 3 => if (wrap == 1) " (*env)->ReleasePrimitiveArrayCritical(env, j_"+y+", "+y+", 0);\n" else
+ if (wrap == 2) " (*env)->ReleaseStringUTFChars(env, j_"+y+", "+y+");\n" else ""
+ case 4 => p4+" "+y
+ }
+ }
+
+ def fvoid: Parser[String=>String] = "void" ^^ (x => (y=>
+ sstring(y, 0, "void", "void", "", "void")))
+
+ def fenum: Parser[String=>String] = ("const"~"enum"~ident | "enum"~ident) ^^ {
+ case a~b~c => (y => sstring(y, 0, "jint", "jint", "("+c+")", "int"))
+ case b~c => (y => sstring(y, 0, "jint", "jint", "("+c+")", "int"))
+ }
+
+ def flogical: Parser[String=>String] = ("const"~"lapack_logical" | "lapack_logical") ^^ (x => (y=>
+ sstring(y, 1, "int", "jint *", "(lapack_logical)", "int")))
+
+ def fchar: Parser[String=>String] = ("const"~"char" | "char") ^^ (x => (y=>
+ sstring(y, 2, "jstring", "char *", "*", "String")))
+
+ def fint: Parser[String=>String] = ("const"~"int" | "int" | "const"~"lapack_int" | "lapack_int" | "MKL_INT") ^^ (x => (y=>
+ sstring(y, 0, "jint", "jint", "", "int")))
+
+ def fuint: Parser[String=>String] = ("const"~"MKL_UINT" | "MKL_UINT"| "const"~"unsigned"~"int" | "unsigned"~"int")^^ (x => (y =>
+ sstring(y, 0, "jint", "jint", "(unsigned int)", "int")))
+
+ def ffloat: Parser[String=>String] = ("const"~"float" | "float" | "lapack_float") ^^ (x => (y =>
+ sstring(y, 0, "jfloat", "jfloat", "", "float")))
+
+ def fdouble: Parser[String=>String] = ("const"~"double" | "double" | "lapack_double") ^^ (x => (y =>
+ sstring(y, 0, "jdouble", "jdouble", "", "double")))
+
+ def ffcomplex: Parser[String=>String] = ("const"~"lapack_complex_float" | "lapack_complex_float" | "MKL_complex8") ^^ (x => (y =>
+ sstring(y, 1, "jfloatArray", "jfloat *", "*", "float []")))
+
+ def fdcomplex: Parser[String=>String] = ("const"~"lapack_complex_double" | "lapack_complex_double" | "MKL_complex16") ^^ (x => (y =>
+ sstring(y, 1, "jdoubleArray", "jdouble *", "*", "double []")))
+
+ def fvoidp: Parser[String=>String] = ("const"~"void"~"*" | "void"~"*") ^^ (x => (y =>
+ sstring(y, 1, "jfloatArray", "jfloat *", "(void *)", "float []")))
+
+ def flogicalp: Parser[String=>String] = ("const"~"lapack_logical"~"*" | "lapack_logical"~"*") ^^ (x => (y=>
+ sstring(y, 1, "jintArray", "jint *", "(lapack_logical *)", "int []")))
+
+ def fintp: Parser[String=>String] = ("const"~"int"~"*" | "int"~"*" | "MKL_INT"~"*" | "lapack_int"~"*") ^^ (x => (y=>
+ sstring(y, 1, "jintArray", "jint *", "", "int []")))
+
+ def fuintp: Parser[String=>String] = ("const"~"MKL_UINT"~"*" | "MKL_UINT"~"*" | "const"~"unsigned"~"int"~"*" | "unsigned"~"int"~"*") ^^ (x => (y =>
+ sstring(y, 1, "jintArray ", "jint *", "", "int []")))
+
+ def ffloatp: Parser[String=>String] = ("const"~"float"~"*" | "float"~"*") ^^ (x => (y=>
+ sstring(y, 1, "jfloatArray", "jfloat *", "", "float []")))
+
+ def fdoublep: Parser[String=>String] = ("const"~"double"~"*" | "double"~"*") ^^ (x => (y=>
+ sstring(y, 1, "jdoubleArray", "jdouble *", "", "double []")))
+
+ def ffcomplexp: Parser[String=>String] = ("const"~"lapack_complex_float"~"*" | "lapack_complex_float"~"*" | "MKL_complex8"~"*") ^^ {
+ case x0~x1~x2 => (y => sstring(y, 1, "jfloatArray", "jfloat *", "("+x1+" *)", "float []"))
+ case x1~x2 => (y => sstring(y, 1, "jfloatArray", "jfloat *", "("+x1+" *)", "float []"))
+ }
+
+ def fdcomplexp: Parser[String=>String] = ("const"~"lapack_complex_double"~"*" | "lapack_complex_double"~"*" | "MKL_complex16"~"*") ^^ {
+ case x0~x1~x2 => (y=> sstring(y, 1, "jdoubleArray", "jdouble *", "("+x1+" *)", "double []"))
+ case x1~x2 => (y=> sstring(y, 1, "jdoubleArray", "jdouble *", "("+x1+" *)", "double []"))
+ }
+
+}
+
+object translateTester {
+ def main(args: Array[String]) {
+ val p = new MKLdeclarationsParser
+ val s = scala.io.Source.fromFile(args(0))
+ if (args.length == 1) {
+ s.getLines.foreach((line) => {
+ if (line.length > 1) {
+ p.otype = 0
+ println(p.parseAll(p.fdecl, line).get)
+ p.otype = 1
+ println(p.parseAll(p.fdecl, line).get)
+ p.otype = 2
+ println(p.parseAll(p.fdecl, line).get)
+ p.otype = 3
+ println(p.parseAll(p.fdecl, line).get)
+ }
+ })
+ } else {
+ s.getLines.foreach((line) => {
+ if (line.length > 1) {
+ p.otype = 4
+ println(p.parseAll(p.fdecl, line).get)
+ }
+ })
+ }
+ }
+}
+
+
+
+object Translators {
+
+
+}
diff --git a/src/test/scala/BIDMat/Copyright.txt b/src/test/scala/BIDMat/Copyright.txt
new file mode 100755
index 00000000..21326596
--- /dev/null
+++ b/src/test/scala/BIDMat/Copyright.txt
@@ -0,0 +1,25 @@
+Copyright (c) 2012, Regents of the University of California
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/src/test/scala/BIDMat/DMatTest.scala b/src/test/scala/BIDMat/DMatTest.scala
new file mode 100755
index 00000000..8d013c15
--- /dev/null
+++ b/src/test/scala/BIDMat/DMatTest.scala
@@ -0,0 +1,130 @@
+package BIDMat
+
+import Mat._
+import MatFunctions._
+import org.scalatest._;
+import org.scalatest.junit._;
+import org.scalatest.prop._;
+import org.junit.runner.RunWith
+
+@RunWith(classOf[JUnitRunner])
+class DMatTest extends FunSuite with Checkers {
+ val x = DMat(2,3)
+ val xvalues = List(1.0,2.0,3.0,4.0,5.0,6.0).toArray
+ System.arraycopy(xvalues, 0, x.data, 0, 6)
+ val y = DMat(1,3)
+ val yvalues = List(7.0,8.0,9.0).toArray
+ System.arraycopy(yvalues, 0, y.data, 0, 3)
+ val z = DMat(2,1)
+ val zvalues = List(10.0,11.0).toArray
+ System.arraycopy(zvalues, 0, z.data, 0, 2)
+ val xx = DMat(3,4)
+ val xxvalues = List(1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0).toArray
+ System.arraycopy(xxvalues, 0, xx.data, 0, 12)
+
+ test("DMat fill") {
+ assert(x(0,0) == 1.0);
+ assert(x(1,0) == 2.0);
+ assert(x(0,1) == 3.0);
+ assert(x(1,1) == 4.0);
+ assert(x(0,2) == 5.0);
+ assert(x(1,2) == 6.0);
+ }
+
+ test("DMat on") {
+ val a = x on y;
+ assert(a(0,0) == 1.0);
+ assert(a(1,0) == 2.0);
+ assert(a(0,1) == 3.0);
+ assert(a(1,1) == 4.0);
+ assert(a(0,2) == 5.0);
+ assert(a(1,2) == 6.0);
+
+ assert(a(2,0) == 7.0);
+ assert(a(2,1) == 8.0);
+ assert(a(2,2) == 9.0);
+ }
+
+ test("DMat \\") {
+ val a = x \ z;
+ assert(a(0,0) == 1.0);
+ assert(a(1,0) == 2.0);
+ assert(a(0,1) == 3.0);
+ assert(a(1,1) == 4.0);
+ assert(a(0,2) == 5.0);
+ assert(a(1,2) == 6.0);
+ assert(a(0,3) == 10.0);
+ assert(a(1,3) == 11.0);
+ }
+
+ test("DMat t") {
+ val a = x.t;
+ assert(a(0,0) == 1.0);
+ assert(a(0,1) == 2.0);
+ assert(a(1,0) == 3.0);
+ assert(a(1,1) == 4.0);
+ assert(a(2,0) == 5.0);
+ assert(a(2,1) == 6.0);
+ }
+
+ test("DMat slice 1") {
+ val a = xx(?,1)
+ assert(a(0,0) == 4.0);
+ assert(a(1,0) == 5.0);
+ assert(a(2,0) == 6.0);
+ }
+
+ test("DMat slice 2") {
+ val a = xx(?,1 to 2)
+ assert(a(0,0) == 4.0);
+ assert(a(1,0) == 5.0);
+ assert(a(2,0) == 6.0);
+ assert(a(0,1) == 7.0);
+ assert(a(1,1) == 8.0);
+ assert(a(2,1) == 9.0);
+ }
+
+ test("DMat slice 3") {
+ val a = xx(1,?)
+ assert(a(0,0) == 2.0);
+ assert(a(0,1) == 5.0);
+ assert(a(0,2) == 8.0);
+ assert(a(0,3) == 11.0);
+ }
+
+ test("DMat slice 4") {
+ val a = xx(0 to 1,?)
+ assert(a(0,0) == 1.0);
+ assert(a(0,1) == 4.0);
+ assert(a(0,2) == 7.0);
+ assert(a(0,3) == 10.0);
+ assert(a(1,0) == 2.0);
+ assert(a(1,1) == 5.0);
+ assert(a(1,2) == 8.0);
+ assert(a(1,3) == 11.0);
+ }
+
+ test("DMat slice 5") {
+ val a = xx(?,?)
+ assert(a(0,0) == 1.0);
+ assert(a(0,1) == 4.0);
+ assert(a(0,2) == 7.0);
+ assert(a(0,3) == 10.0);
+ assert(a(1,0) == 2.0);
+ assert(a(1,1) == 5.0);
+ assert(a(1,2) == 8.0);
+ assert(a(1,3) == 11.0);
+ assert(a(2,0) == 3.0);
+ assert(a(2,1) == 6.0);
+ assert(a(2,2) == 9.0);
+ assert(a(2,3) == 12.0);
+ }
+
+ test("DMat slice 6") {
+ val a = xx(0 to 1, 2 to 3)
+ assert(a(0,0) == 7.0);
+ assert(a(1,0) == 8.0);
+ assert(a(0,1) == 10.0);
+ assert(a(1,1) == 11.0);
+ }
+}
\ No newline at end of file
diff --git a/src/test/scala/BIDMat/TestDops.scala b/src/test/scala/BIDMat/TestDops.scala
new file mode 100755
index 00000000..b8e9b85a
--- /dev/null
+++ b/src/test/scala/BIDMat/TestDops.scala
@@ -0,0 +1,25 @@
+package BIDMat
+
+
+import DMat._
+import IMat._
+import FMat._
+import scala.compat.Platform._
+
+
+object TestDops {
+ def main(args: Array[String]): Unit = {
+ val n = 2000
+ val a = IMat(n,n)
+ val b = IMat(n,n)
+ val t0 = currentTime
+ val m = 1000
+ println("starting up")
+ for (i <- 0 until m) {
+ val c = a + b
+ }
+ val t1 = currentTime - t0
+ println("time="+t1+" msec, gflops="+(n.doubleValue*n*m/t1/1e6))
+ }
+}
+
diff --git a/src/test/scala/BIDMat/TestDops2.scala b/src/test/scala/BIDMat/TestDops2.scala
new file mode 100755
index 00000000..6af1030d
--- /dev/null
+++ b/src/test/scala/BIDMat/TestDops2.scala
@@ -0,0 +1,29 @@
+package BIDMat
+
+import Mat._
+import DMat._
+import FMat._
+import scala.compat.Platform._
+
+
+object TestDops2 {
+ def main(args: Array[String]): Unit = {
+ val n = 50000
+ val k = 20
+ val l = 1
+ val a = FMat(l,n)
+ val b = FMat(n,k)
+ val d = FMat(k,n)
+ val e = FMat(n,l)
+ val m = 30000
+ val t0 = currentTime
+ println("Starting up")
+ for (i <- 0 until m) {
+ val c = a * b
+// val c = d * e
+ }
+ val t1 = currentTime - t0
+ println("time="+t1+"msec, gflops="+(2.0*k*n*l*m/t1/1e6))
+ }
+}
+
diff --git a/src/test/scala/BIDMat/TestDops3.scala b/src/test/scala/BIDMat/TestDops3.scala
new file mode 100755
index 00000000..17616ac1
--- /dev/null
+++ b/src/test/scala/BIDMat/TestDops3.scala
@@ -0,0 +1,30 @@
+package BIDMat
+
+import Mat._
+import DMat._
+import FMat._
+import scala.compat.Platform._
+
+
+object TestDops3 {
+ def main(args: Array[String]): Unit = {
+ val n = 50000
+ val k = 10
+ val l = 1
+ val a = FMat(n,k)
+// val a2 = FMat(k,n)
+ val b = FMat(l,n)
+ val d = FMat(k,l)
+ val m = 30000
+ val t0 = currentTime
+ println("Starting up")
+ for (i <- 0 until m) {
+// val c = b * a
+// val e = a * d
+ val c = a t
+ }
+ val t1 = currentTime - t0
+ println("time="+t1+"msec, gflops="+(2.0*k*n*l*m/t1/1e6))
+ }
+}
+
diff --git a/src/test/scala/BIDMat/TestHDF5.scala b/src/test/scala/BIDMat/TestHDF5.scala
new file mode 100755
index 00000000..8abed633
--- /dev/null
+++ b/src/test/scala/BIDMat/TestHDF5.scala
@@ -0,0 +1,39 @@
+package BIDMat
+
+
+import MatFunctions._
+import SciFunctions._
+import CMat._
+
+
+
+object TestHDF5 {
+ def main(args: Array[String]) : Unit = {
+ val n = 50000
+ val k = 10
+ val l = 1
+ val a = rand(n,k)
+ val b = IMat(l,n)
+ val c = sprand(10,10,0.1)
+ val d = CSMat(1,2)
+ d(0,0) = "test"
+ d(0,1) = "try"
+ val fname = "d:\\sentiment\\tmp\\mtest.mat"
+// println(a.toString)
+// println(b.toString)
+ saveAs(fname, d, "c")
+/* val fid = new java.io.FileInputStream(fname)
+ var next:Int = 0
+ var i = 0
+ while (next >= 0) {
+ var next = fid.read()
+ if (next > 0) println("buf("+i+")=("+(next + (if (next > 127) -256 else 0))+")")
+ i += 1
+ }
+ fid.close() */
+ val e:CMat = load(fname, "c").asInstanceOf[CMat]
+ println(e(0,0).asInstanceOf[String])
+ println(e(0,1).asInstanceOf[String])
+ }
+}
+