diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..95890d7 --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,79 @@ +^appveyor\.yml$ +^azure-pipelines\.yml$ +^bench$ +^.clangd$ +^codecov\.yml$ +^compile_commands.json$ +^CONDUCT\.md$ +^config\.yaml$ +^cran-comments\.md$ +^CRAN-RELEASE$ +^[^/]+.csv +^[^/]+.sh +^data-raw$ +^\.dir-locals\.el$ +^development.md$ +^doc$ +^docker$ +^.dockerignore.*$ +^docs$ +^etc$ +^\.github$ +^\.github/workflows/pkgdown\.yaml$ +^\.github/workflows/pr-commands\.yaml$ +^\.github/workflows/R-CMD-check\.yaml$ +^\.gitlab$ +^home$ +^\.httr-oauth$ +^inst/bench$ +^inst/db$ +^inst/extdata/ext$ +^inst/extdata/int$ +^inst/extdata/tmp$ +^inst/rcon$ +^Jenkinsfile.*$ +^LICENSE\.md$ +.*log$ +^logo\.png$ +^logs$ +^maven-project$ +^Makefile.*$ +^Makefile$ +^man-roxygen$ +^Meta$ +^\.mypy_cache$ +^notes$ +^pkgdown$ +^_pkgdown\.yml$ +^poetry\.lock$ +^Project\.toml$ +^pyproject\.toml$ +^\.python-version$ +^rcon$ +^renv$ +^renv\.lock$ +^.renvignore$ +^README-.*\.png$ +^README\.Rmd$ +^resources\.txt$ +^revdep$ +^Rplots.* +^.Rprofile$ +^\.\.Rcheck$ +^.*\.Rproj$ +^\.Rproj\.user$ +^sanitize\.sh$ +^script\.R$ +^setup\.cfg$ +^setup\.py$ +^src/\.clang-format$ +^src/\.ycm_extra_conf\.pyc?$ +^TAGS$ +^_targets$ +^_targets\.R$ +^temp$ +^travis$ +^travis/check_format\.sh$ +^\.travis\.yml$ +^vignettes/rsconnect$ +^.vimrc$ diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..0ce825c --- /dev/null +++ b/.gitattributes @@ -0,0 +1,39 @@ +# Set the default behavior, in case people don't have core.autocrlf set. +* text=auto eol=lf + +*.sh text eol=lf +*.bat text eol=crlf +*.cmd text eol=crlf +pom.xml text eol=lf +*.java text eol=lf +*.scala text eol=lf +*.xml text eol=lf +*.properties text eol=lf + +# Denote all files that are truly binary and should not be modified. +*.gpg binary +*.png binary +*.jpg binary +*.gif binary +*.jpeg binary +*.zip binary +*.phar binary +*.ttf binary +*.woff binary +*.woff2 binary +*.eot binary +*.ico binary +*.mo binary +*.pdf binary +*.xsd binary +*.ts binary +*.exe binary + +*.pt binary +*.pickle binary + +*.docx binary +*.xslx binary +*.pptx binary +/NEWS.md merge=union +/.Rbuildignore merge=union diff --git a/.gitignore b/.gitignore index 89ae616..78add95 100644 --- a/.gitignore +++ b/.gitignore @@ -1,12 +1,149 @@ -/build -/.classpath -/.project -/.settings -/target +**/.settings/ +**/.classpath +**/.project +**/.metadata +#**/bin/ +**/*.2delete +**/target/ +**/build/ +*.egg-info +.idea/ +*.iml +*.eml + + +# Ignore all dotfiles... +#.* + +.#* + + + + +# except for .gitignore +!.gitignore +!.gitattributes + +!.dockerignore +!.travis.yml + + +!.python-version + + +# python +venv/ + + +################### +# Compiled source # +################### +*.com +*.class +docs +/src/*.o +/src/*.o-* +/src/*.d +/src/*.so + +*.dll +*.exe +*.o +*.so + + +############ +# setup.py # +############ + +/build/ +/dist/ + +*.pyc +*.egg +*.whl + +*.egg-info/ + +########## +# pytest # +########## + +/.pytest_cache/ + +# Packages # +############ +# it's better to unpack these files and commit the raw source +# git has its own built in compression methods +*.7z +*.dmg +*.gz +*.iso + +### *.jar + +*.rar +*.tar +#*.zip + +# Logs and databases # +###################### +*.log +*.tlog +*.sqlite +/logs/ + + +# OS generated files # +###################### +.DS_Store* +ehthumbs.db +Icon? +Thumbs.db + + +# general *~ + +# Eclipse +.settings +.project +.classpath + +# IDEA +/.idea +/.idea_modules + +# Sublime +*.sublime-project +*.sublime-workspace + +# SPARK +metastore_db/ +/doc/_build/ +*.~undo-tree~ +.RData +.Rproj.user +..Rcheck .Rhistory -*.class -# Package Files # -*.jar -*.war -*.ear +Rplots.* +inst/doc +.httr-oauth +vignettes/*.R +.DS_Store +/clion-test.R +/BROWSE +/GPATH +/GRTAGS +/GTAGS +/TAGS +/.dir-locals.el +.obsidian/ +/issues +/home +/logs +/temp +/.drake +bench-libs +doc +Meta diff --git a/R-skeleton/DESCRIPTION b/DESCRIPTION similarity index 100% rename from R-skeleton/DESCRIPTION rename to DESCRIPTION diff --git a/R-skeleton/NAMESPACE b/NAMESPACE similarity index 97% rename from R-skeleton/NAMESPACE rename to NAMESPACE index 0ce6687..0a34b4b 100644 --- a/R-skeleton/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,7 @@ -import("rJava") -export(extraTrees, toRMatrix, toJavaMatrix, toJavaCSMatrix, toJavaMatrix2D, selectTrees, setJavaMemory, prepareForSave) -S3method(predict, extraTrees) -S3method(print, extraTrees) -S3method(extraTrees, default) - - +import("rJava") +export(extraTrees, toRMatrix, toJavaMatrix, toJavaCSMatrix, toJavaMatrix2D, selectTrees, setJavaMemory, prepareForSave) +S3method(predict, extraTrees) +S3method(print, extraTrees) +S3method(extraTrees, default) + + diff --git a/R-skeleton/R/extraTrees.R b/R/extraTrees.R similarity index 93% rename from R-skeleton/R/extraTrees.R rename to R/extraTrees.R index 739a925..5c3d1c9 100644 --- a/R-skeleton/R/extraTrees.R +++ b/R/extraTrees.R @@ -1,4 +1,4 @@ -extraTrees <- function(x, ...) UseMethod("extraTrees", x) - - - +extraTrees <- function(x, ...) UseMethod("extraTrees", x) + + + diff --git a/R-skeleton/R/extraTrees.default.R b/R/extraTrees.default.R similarity index 100% rename from R-skeleton/R/extraTrees.default.R rename to R/extraTrees.default.R diff --git a/R-skeleton/R/onLoad.R b/R/onLoad.R similarity index 97% rename from R-skeleton/R/onLoad.R rename to R/onLoad.R index d848932..127e7d7 100644 --- a/R-skeleton/R/onLoad.R +++ b/R/onLoad.R @@ -1,3 +1,3 @@ -.onLoad <- function(libname, pkgname) { - .jpackage(pkgname, lib.loc = libname) +.onLoad <- function(libname, pkgname) { + .jpackage(pkgname, lib.loc = libname) } \ No newline at end of file diff --git a/R-skeleton/R/predict.extraTrees.R b/R/predict.extraTrees.R similarity index 100% rename from R-skeleton/R/predict.extraTrees.R rename to R/predict.extraTrees.R diff --git a/R-skeleton/R/print.extraTrees.R b/R/print.extraTrees.R similarity index 100% rename from R-skeleton/R/print.extraTrees.R rename to R/print.extraTrees.R diff --git a/R-skeleton/R/setMemory.R b/R/setMemory.R similarity index 100% rename from R-skeleton/R/setMemory.R rename to R/setMemory.R diff --git a/R-skeleton/inst/CHANGELOG b/inst/CHANGELOG similarity index 100% rename from R-skeleton/inst/CHANGELOG rename to inst/CHANGELOG diff --git a/R-skeleton/inst/CITATION b/inst/CITATION similarity index 100% rename from R-skeleton/inst/CITATION rename to inst/CITATION diff --git a/inst/java/ExtraTrees.jar b/inst/java/ExtraTrees.jar new file mode 100644 index 0000000..509b140 Binary files /dev/null and b/inst/java/ExtraTrees.jar differ diff --git a/R-skeleton/inst/tests/test-all.R b/inst/tests/test-all.R similarity index 100% rename from R-skeleton/inst/tests/test-all.R rename to inst/tests/test-all.R diff --git a/R-skeleton/inst/tests/testthat/test-classification.r b/inst/tests/testthat/test-classification.r similarity index 100% rename from R-skeleton/inst/tests/testthat/test-classification.r rename to inst/tests/testthat/test-classification.r diff --git a/R-skeleton/inst/tests/testthat/test-multitask.r b/inst/tests/testthat/test-multitask.r similarity index 100% rename from R-skeleton/inst/tests/testthat/test-multitask.r rename to inst/tests/testthat/test-multitask.r diff --git a/R-skeleton/inst/tests/testthat/test-na.r b/inst/tests/testthat/test-na.r similarity index 100% rename from R-skeleton/inst/tests/testthat/test-na.r rename to inst/tests/testthat/test-na.r diff --git a/R-skeleton/inst/tests/testthat/test-quantile.r b/inst/tests/testthat/test-quantile.r similarity index 100% rename from R-skeleton/inst/tests/testthat/test-quantile.r rename to inst/tests/testthat/test-quantile.r diff --git a/R-skeleton/inst/tests/testthat/test-regression.r b/inst/tests/testthat/test-regression.r similarity index 100% rename from R-skeleton/inst/tests/testthat/test-regression.r rename to inst/tests/testthat/test-regression.r diff --git a/R-skeleton/inst/tests/testthat/test-sparse-input.r b/inst/tests/testthat/test-sparse-input.r similarity index 100% rename from R-skeleton/inst/tests/testthat/test-sparse-input.r rename to inst/tests/testthat/test-sparse-input.r diff --git a/R-skeleton/inst/tests/testthat/test-weights.r b/inst/tests/testthat/test-weights.r similarity index 100% rename from R-skeleton/inst/tests/testthat/test-weights.r rename to inst/tests/testthat/test-weights.r diff --git a/R-skeleton/man/extraTrees.Rd b/man/extraTrees.Rd similarity index 100% rename from R-skeleton/man/extraTrees.Rd rename to man/extraTrees.Rd diff --git a/R-skeleton/man/predict.extraTrees.Rd b/man/predict.extraTrees.Rd similarity index 100% rename from R-skeleton/man/predict.extraTrees.Rd rename to man/predict.extraTrees.Rd diff --git a/R-skeleton/man/prepareForSave.Rd b/man/prepareForSave.Rd similarity index 100% rename from R-skeleton/man/prepareForSave.Rd rename to man/prepareForSave.Rd diff --git a/R-skeleton/man/selectTrees.Rd b/man/selectTrees.Rd similarity index 100% rename from R-skeleton/man/selectTrees.Rd rename to man/selectTrees.Rd diff --git a/R-skeleton/man/setJavaMemory.Rd b/man/setJavaMemory.Rd similarity index 96% rename from R-skeleton/man/setJavaMemory.Rd rename to man/setJavaMemory.Rd index d20c02e..3816f24 100644 --- a/R-skeleton/man/setJavaMemory.Rd +++ b/man/setJavaMemory.Rd @@ -1,21 +1,21 @@ -\name{setJavaMemory} -\alias{setJavaMemory} -\title{Utility function for setting Java memory.} -\description{ - Function for setting JVM memory, specified in MB. If you get java.lang.OutOfMemoryError you can use this function to increase the memory available to ExtraTrees. -} -\usage{ - setJavaMemory( memoryInMB ) -} -\arguments{ - \item{memoryInMB}{Integer specifying the amount of memory (MB)} -} -%\value{ -%} -\author{Jaak Simm} -\examples{ - ## use 2G memory - setJavaMemory(2000) -} -\keyword{java.lang.OutOfMemoryError,memory,JVM} - +\name{setJavaMemory} +\alias{setJavaMemory} +\title{Utility function for setting Java memory.} +\description{ + Function for setting JVM memory, specified in MB. If you get java.lang.OutOfMemoryError you can use this function to increase the memory available to ExtraTrees. +} +\usage{ + setJavaMemory( memoryInMB ) +} +\arguments{ + \item{memoryInMB}{Integer specifying the amount of memory (MB)} +} +%\value{ +%} +\author{Jaak Simm} +\examples{ + ## use 2G memory + setJavaMemory(2000) +} +\keyword{java.lang.OutOfMemoryError,memory,JVM} + diff --git a/R-skeleton/man/toJavaCSMatrix.Rd b/man/toJavaCSMatrix.Rd similarity index 100% rename from R-skeleton/man/toJavaCSMatrix.Rd rename to man/toJavaCSMatrix.Rd diff --git a/R-skeleton/man/toJavaMatrix.Rd b/man/toJavaMatrix.Rd similarity index 96% rename from R-skeleton/man/toJavaMatrix.Rd rename to man/toJavaMatrix.Rd index 44caa71..a6e3e6a 100644 --- a/R-skeleton/man/toJavaMatrix.Rd +++ b/man/toJavaMatrix.Rd @@ -1,18 +1,18 @@ -\name{toJavaMatrix} -\alias{toJavaMatrix} -\title{Utility function for converting an R matrix (numeric matrix) to Java matrix.} -\description{ - Internal function used for converting an R matrix to a Matrix object in Java. Matrix class is a custom Java class used for storing matrices by the implementation of ExtraTrees in Java. -} -\usage{ - toJavaMatrix( m ) -} -\arguments{ - \item{m}{ matrix of numeric values. } -} -\value{ - reference to Java matrix with the same contents as the input R matrix. -} -\author{Jaak Simm} -\keyword{java,matrix,conversion} - +\name{toJavaMatrix} +\alias{toJavaMatrix} +\title{Utility function for converting an R matrix (numeric matrix) to Java matrix.} +\description{ + Internal function used for converting an R matrix to a Matrix object in Java. Matrix class is a custom Java class used for storing matrices by the implementation of ExtraTrees in Java. +} +\usage{ + toJavaMatrix( m ) +} +\arguments{ + \item{m}{ matrix of numeric values. } +} +\value{ + reference to Java matrix with the same contents as the input R matrix. +} +\author{Jaak Simm} +\keyword{java,matrix,conversion} + diff --git a/R-skeleton/man/toJavaMatrix2D.Rd b/man/toJavaMatrix2D.Rd similarity index 97% rename from R-skeleton/man/toJavaMatrix2D.Rd rename to man/toJavaMatrix2D.Rd index 9ba02ed..4769f78 100644 --- a/R-skeleton/man/toJavaMatrix2D.Rd +++ b/man/toJavaMatrix2D.Rd @@ -1,20 +1,20 @@ -\name{toJavaMatrix2D} -\alias{toJavaMatrix2D} -\title{Utility function for converting an R matrix (standard matrix or SparseMatrix) to appropriate Java matrix object.} -\description{ - Internal function used for converting an R matrix to an appropriate object in Java. - It uses toJavaMatrix() and toJavaCSMatrix() underneath and returns a reference to - general matrix representation in Java of type Array2D (interface). -} -\usage{ - toJavaMatrix2D( m ) -} -\arguments{ - \item{m}{ matrix of numeric values. } -} -\value{ - reference to Java matrix (dense or sparse) with the same contents as the input R matrix. -} -\author{Jaak Simm} -\keyword{java,matrix,conversion} - +\name{toJavaMatrix2D} +\alias{toJavaMatrix2D} +\title{Utility function for converting an R matrix (standard matrix or SparseMatrix) to appropriate Java matrix object.} +\description{ + Internal function used for converting an R matrix to an appropriate object in Java. + It uses toJavaMatrix() and toJavaCSMatrix() underneath and returns a reference to + general matrix representation in Java of type Array2D (interface). +} +\usage{ + toJavaMatrix2D( m ) +} +\arguments{ + \item{m}{ matrix of numeric values. } +} +\value{ + reference to Java matrix (dense or sparse) with the same contents as the input R matrix. +} +\author{Jaak Simm} +\keyword{java,matrix,conversion} + diff --git a/R-skeleton/man/toRMatrix.Rd b/man/toRMatrix.Rd similarity index 96% rename from R-skeleton/man/toRMatrix.Rd rename to man/toRMatrix.Rd index a50b653..5240fc1 100644 --- a/R-skeleton/man/toRMatrix.Rd +++ b/man/toRMatrix.Rd @@ -1,18 +1,18 @@ -\name{toRMatrix} -\alias{toRMatrix} -\title{Utility function for converting Java matrix to R matrix (matrix of doubles).} -\description{ - Internal function used for converting a Matrix object from Java to an R matrix. Matrix class is a custom Java class used for storing matrices by the implementation of ExtraTrees in Java. -} -\usage{ - toRMatrix( javam ) -} -\arguments{ - \item{javam}{ Java matrix (Matrix class). } -} -\value{ - R (double) matrix with the same contents as the input. -} -\author{Jaak Simm} -\keyword{java,matrix,conversion} - +\name{toRMatrix} +\alias{toRMatrix} +\title{Utility function for converting Java matrix to R matrix (matrix of doubles).} +\description{ + Internal function used for converting a Matrix object from Java to an R matrix. Matrix class is a custom Java class used for storing matrices by the implementation of ExtraTrees in Java. +} +\usage{ + toRMatrix( javam ) +} +\arguments{ + \item{javam}{ Java matrix (Matrix class). } +} +\value{ + R (double) matrix with the same contents as the input. +} +\author{Jaak Simm} +\keyword{java,matrix,conversion} + diff --git a/maven-project/.gitignore b/maven-project/.gitignore new file mode 100644 index 0000000..89ae616 --- /dev/null +++ b/maven-project/.gitignore @@ -0,0 +1,12 @@ +/build +/.classpath +/.project +/.settings +/target +*~ +.Rhistory +*.class +# Package Files # +*.jar +*.war +*.ear diff --git a/maven-project/LICENSE b/maven-project/LICENSE new file mode 120000 index 0000000..ea5b606 --- /dev/null +++ b/maven-project/LICENSE @@ -0,0 +1 @@ +../LICENSE \ No newline at end of file diff --git a/maven-project/NOTICE b/maven-project/NOTICE new file mode 120000 index 0000000..7e1b82f --- /dev/null +++ b/maven-project/NOTICE @@ -0,0 +1 @@ +../NOTICE \ No newline at end of file diff --git a/maven-project/R-skeleton/DESCRIPTION b/maven-project/R-skeleton/DESCRIPTION new file mode 120000 index 0000000..5880357 --- /dev/null +++ b/maven-project/R-skeleton/DESCRIPTION @@ -0,0 +1 @@ +../../DESCRIPTION \ No newline at end of file diff --git a/maven-project/R-skeleton/NAMESPACE b/maven-project/R-skeleton/NAMESPACE new file mode 120000 index 0000000..a200c5f --- /dev/null +++ b/maven-project/R-skeleton/NAMESPACE @@ -0,0 +1 @@ +../../NAMESPACE \ No newline at end of file diff --git a/maven-project/R-skeleton/R b/maven-project/R-skeleton/R new file mode 120000 index 0000000..dbe424b --- /dev/null +++ b/maven-project/R-skeleton/R @@ -0,0 +1 @@ +../../R \ No newline at end of file diff --git a/maven-project/R-skeleton/inst b/maven-project/R-skeleton/inst new file mode 120000 index 0000000..d585bfb --- /dev/null +++ b/maven-project/R-skeleton/inst @@ -0,0 +1 @@ +../../inst \ No newline at end of file diff --git a/maven-project/R-skeleton/man b/maven-project/R-skeleton/man new file mode 120000 index 0000000..ee201c1 --- /dev/null +++ b/maven-project/R-skeleton/man @@ -0,0 +1 @@ +../../man \ No newline at end of file diff --git a/maven-project/R-skeleton/vignettes b/maven-project/R-skeleton/vignettes new file mode 120000 index 0000000..f83fe56 --- /dev/null +++ b/maven-project/R-skeleton/vignettes @@ -0,0 +1 @@ +../../vignettes \ No newline at end of file diff --git a/R-test/basic-MTclassification.R b/maven-project/R-test/basic-MTclassification.R similarity index 100% rename from R-test/basic-MTclassification.R rename to maven-project/R-test/basic-MTclassification.R diff --git a/R-test/basic-MTregression.R b/maven-project/R-test/basic-MTregression.R similarity index 100% rename from R-test/basic-MTregression.R rename to maven-project/R-test/basic-MTregression.R diff --git a/R-test/basic-bagging-classification.R b/maven-project/R-test/basic-bagging-classification.R similarity index 100% rename from R-test/basic-bagging-classification.R rename to maven-project/R-test/basic-bagging-classification.R diff --git a/R-test/basic-classification.R b/maven-project/R-test/basic-classification.R similarity index 100% rename from R-test/basic-classification.R rename to maven-project/R-test/basic-classification.R diff --git a/R-test/basic-input-na-classification.R b/maven-project/R-test/basic-input-na-classification.R similarity index 100% rename from R-test/basic-input-na-classification.R rename to maven-project/R-test/basic-input-na-classification.R diff --git a/R-test/basic-quantile-regression.R b/maven-project/R-test/basic-quantile-regression.R similarity index 100% rename from R-test/basic-quantile-regression.R rename to maven-project/R-test/basic-quantile-regression.R diff --git a/R-test/basic-regression.R b/maven-project/R-test/basic-regression.R similarity index 100% rename from R-test/basic-regression.R rename to maven-project/R-test/basic-regression.R diff --git a/R-test/basic-weighted-classification.R b/maven-project/R-test/basic-weighted-classification.R similarity index 100% rename from R-test/basic-weighted-classification.R rename to maven-project/R-test/basic-weighted-classification.R diff --git a/R-test/basic-weighted-regression.R b/maven-project/R-test/basic-weighted-regression.R similarity index 100% rename from R-test/basic-weighted-regression.R rename to maven-project/R-test/basic-weighted-regression.R diff --git a/R-test/data-set-regression.R b/maven-project/R-test/data-set-regression.R similarity index 100% rename from R-test/data-set-regression.R rename to maven-project/R-test/data-set-regression.R diff --git a/R-test/data-set.R b/maven-project/R-test/data-set.R similarity index 100% rename from R-test/data-set.R rename to maven-project/R-test/data-set.R diff --git a/R-test/large-scale-large-dim.R b/maven-project/R-test/large-scale-large-dim.R similarity index 100% rename from R-test/large-scale-large-dim.R rename to maven-project/R-test/large-scale-large-dim.R diff --git a/R-test/large-scale-test.R b/maven-project/R-test/large-scale-test.R similarity index 100% rename from R-test/large-scale-test.R rename to maven-project/R-test/large-scale-test.R diff --git a/maven-project/README.md b/maven-project/README.md new file mode 120000 index 0000000..32d46ee --- /dev/null +++ b/maven-project/README.md @@ -0,0 +1 @@ +../README.md \ No newline at end of file diff --git a/maven-project/build.sh b/maven-project/build.sh new file mode 100755 index 0000000..c066ddc --- /dev/null +++ b/maven-project/build.sh @@ -0,0 +1,35 @@ +#!/bin/sh + +die() { + + rc=$? + + echo "ERROR($rc): $@" + exit 1 + +} + +cd $(dirname $0) + + +mvn clean verify || die "mvn build failed!" + +OUT_DIR="R-skeleton/inst/java" + + +rm -rf $OUT_DIR + +mkdir -p $OUT_DIR + +rc=1 + +for f in $(find target -name 'extra*.jar'); do + echo "found: $f" + cp -v $f $OUT_DIR/ExtraTrees.jar || die "cannot cp $f to $OUR_DIR" + rc=0 +done + +ls -l $OUT_DIR + +exit $rc + diff --git a/build.xml b/maven-project/build.xml similarity index 100% rename from build.xml rename to maven-project/build.xml diff --git a/maven-project/development.md b/maven-project/development.md new file mode 120000 index 0000000..e2af6b2 --- /dev/null +++ b/maven-project/development.md @@ -0,0 +1 @@ +../development.md \ No newline at end of file diff --git a/pom.xml b/maven-project/pom.xml similarity index 92% rename from pom.xml rename to maven-project/pom.xml index d470053..ae34d2a 100644 --- a/pom.xml +++ b/maven-project/pom.xml @@ -21,11 +21,11 @@ maven-compiler-plugin 2.1 - 1.6 - 1.6 + 1.7 + 1.7 - \ No newline at end of file + diff --git a/src/main/java/org/extratrees/AbstractBinaryTree.java b/maven-project/src/main/java/org/extratrees/AbstractBinaryTree.java similarity index 100% rename from src/main/java/org/extratrees/AbstractBinaryTree.java rename to maven-project/src/main/java/org/extratrees/AbstractBinaryTree.java diff --git a/src/main/java/org/extratrees/AbstractTrees.java b/maven-project/src/main/java/org/extratrees/AbstractTrees.java similarity index 100% rename from src/main/java/org/extratrees/AbstractTrees.java rename to maven-project/src/main/java/org/extratrees/AbstractTrees.java diff --git a/src/main/java/org/extratrees/Aggregator.java b/maven-project/src/main/java/org/extratrees/Aggregator.java similarity index 100% rename from src/main/java/org/extratrees/Aggregator.java rename to maven-project/src/main/java/org/extratrees/Aggregator.java diff --git a/src/main/java/org/extratrees/BinaryTree.java b/maven-project/src/main/java/org/extratrees/BinaryTree.java similarity index 100% rename from src/main/java/org/extratrees/BinaryTree.java rename to maven-project/src/main/java/org/extratrees/BinaryTree.java diff --git a/src/main/java/org/extratrees/ExtraTrees.java b/maven-project/src/main/java/org/extratrees/ExtraTrees.java similarity index 100% rename from src/main/java/org/extratrees/ExtraTrees.java rename to maven-project/src/main/java/org/extratrees/ExtraTrees.java diff --git a/src/main/java/org/extratrees/FactorBinaryTree.java b/maven-project/src/main/java/org/extratrees/FactorBinaryTree.java similarity index 100% rename from src/main/java/org/extratrees/FactorBinaryTree.java rename to maven-project/src/main/java/org/extratrees/FactorBinaryTree.java diff --git a/src/main/java/org/extratrees/FactorExtraTrees.java b/maven-project/src/main/java/org/extratrees/FactorExtraTrees.java similarity index 100% rename from src/main/java/org/extratrees/FactorExtraTrees.java rename to maven-project/src/main/java/org/extratrees/FactorExtraTrees.java diff --git a/src/main/java/org/extratrees/QuantileBinaryTree.java b/maven-project/src/main/java/org/extratrees/QuantileBinaryTree.java similarity index 100% rename from src/main/java/org/extratrees/QuantileBinaryTree.java rename to maven-project/src/main/java/org/extratrees/QuantileBinaryTree.java diff --git a/src/main/java/org/extratrees/QuantileExtraTrees.java b/maven-project/src/main/java/org/extratrees/QuantileExtraTrees.java similarity index 100% rename from src/main/java/org/extratrees/QuantileExtraTrees.java rename to maven-project/src/main/java/org/extratrees/QuantileExtraTrees.java diff --git a/src/main/java/org/extratrees/QuickSelect.java b/maven-project/src/main/java/org/extratrees/QuickSelect.java similarity index 100% rename from src/main/java/org/extratrees/QuickSelect.java rename to maven-project/src/main/java/org/extratrees/QuickSelect.java diff --git a/src/main/java/org/extratrees/ShuffledIterator.java b/maven-project/src/main/java/org/extratrees/ShuffledIterator.java similarity index 100% rename from src/main/java/org/extratrees/ShuffledIterator.java rename to maven-project/src/main/java/org/extratrees/ShuffledIterator.java diff --git a/src/main/java/org/extratrees/TaskCutResult.java b/maven-project/src/main/java/org/extratrees/TaskCutResult.java similarity index 100% rename from src/main/java/org/extratrees/TaskCutResult.java rename to maven-project/src/main/java/org/extratrees/TaskCutResult.java diff --git a/src/main/java/org/extratrees/data/Array2D.java b/maven-project/src/main/java/org/extratrees/data/Array2D.java similarity index 100% rename from src/main/java/org/extratrees/data/Array2D.java rename to maven-project/src/main/java/org/extratrees/data/Array2D.java diff --git a/src/main/java/org/extratrees/data/CSparseMatrix.java b/maven-project/src/main/java/org/extratrees/data/CSparseMatrix.java similarity index 100% rename from src/main/java/org/extratrees/data/CSparseMatrix.java rename to maven-project/src/main/java/org/extratrees/data/CSparseMatrix.java diff --git a/src/main/java/org/extratrees/data/Matrix.java b/maven-project/src/main/java/org/extratrees/data/Matrix.java similarity index 100% rename from src/main/java/org/extratrees/data/Matrix.java rename to maven-project/src/main/java/org/extratrees/data/Matrix.java diff --git a/src/main/java/org/extratrees/data/Row.java b/maven-project/src/main/java/org/extratrees/data/Row.java similarity index 100% rename from src/main/java/org/extratrees/data/Row.java rename to maven-project/src/main/java/org/extratrees/data/Row.java diff --git a/src/test/java/org/extratrees/AllFeatureTests.java b/maven-project/src/test/java/org/extratrees/AllFeatureTests.java similarity index 100% rename from src/test/java/org/extratrees/AllFeatureTests.java rename to maven-project/src/test/java/org/extratrees/AllFeatureTests.java diff --git a/src/test/java/org/extratrees/BenchmarkRange.java b/maven-project/src/test/java/org/extratrees/BenchmarkRange.java similarity index 100% rename from src/test/java/org/extratrees/BenchmarkRange.java rename to maven-project/src/test/java/org/extratrees/BenchmarkRange.java diff --git a/src/test/java/org/extratrees/BenchmarkTests.java b/maven-project/src/test/java/org/extratrees/BenchmarkTests.java similarity index 100% rename from src/test/java/org/extratrees/BenchmarkTests.java rename to maven-project/src/test/java/org/extratrees/BenchmarkTests.java diff --git a/src/test/java/org/extratrees/ExtraTreeTests.java b/maven-project/src/test/java/org/extratrees/ExtraTreeTests.java similarity index 100% rename from src/test/java/org/extratrees/ExtraTreeTests.java rename to maven-project/src/test/java/org/extratrees/ExtraTreeTests.java diff --git a/src/test/java/org/extratrees/FactorTests.java b/maven-project/src/test/java/org/extratrees/FactorTests.java similarity index 100% rename from src/test/java/org/extratrees/FactorTests.java rename to maven-project/src/test/java/org/extratrees/FactorTests.java diff --git a/src/test/java/org/extratrees/MultitaskTests.java b/maven-project/src/test/java/org/extratrees/MultitaskTests.java similarity index 100% rename from src/test/java/org/extratrees/MultitaskTests.java rename to maven-project/src/test/java/org/extratrees/MultitaskTests.java diff --git a/src/test/java/org/extratrees/NATests.java b/maven-project/src/test/java/org/extratrees/NATests.java similarity index 100% rename from src/test/java/org/extratrees/NATests.java rename to maven-project/src/test/java/org/extratrees/NATests.java diff --git a/src/test/java/org/extratrees/QuantileTests.java b/maven-project/src/test/java/org/extratrees/QuantileTests.java similarity index 100% rename from src/test/java/org/extratrees/QuantileTests.java rename to maven-project/src/test/java/org/extratrees/QuantileTests.java diff --git a/src/test/java/org/extratrees/QuickSelectTests.java b/maven-project/src/test/java/org/extratrees/QuickSelectTests.java similarity index 100% rename from src/test/java/org/extratrees/QuickSelectTests.java rename to maven-project/src/test/java/org/extratrees/QuickSelectTests.java diff --git a/src/test/java/org/extratrees/SerializationTests.java b/maven-project/src/test/java/org/extratrees/SerializationTests.java similarity index 100% rename from src/test/java/org/extratrees/SerializationTests.java rename to maven-project/src/test/java/org/extratrees/SerializationTests.java diff --git a/src/test/java/org/extratrees/SetSeedTests.java b/maven-project/src/test/java/org/extratrees/SetSeedTests.java similarity index 100% rename from src/test/java/org/extratrees/SetSeedTests.java rename to maven-project/src/test/java/org/extratrees/SetSeedTests.java diff --git a/src/test/java/org/extratrees/ShuffleTests.java b/maven-project/src/test/java/org/extratrees/ShuffleTests.java similarity index 100% rename from src/test/java/org/extratrees/ShuffleTests.java rename to maven-project/src/test/java/org/extratrees/ShuffleTests.java diff --git a/src/test/java/org/extratrees/SparseMartixTest.java b/maven-project/src/test/java/org/extratrees/SparseMartixTest.java similarity index 100% rename from src/test/java/org/extratrees/SparseMartixTest.java rename to maven-project/src/test/java/org/extratrees/SparseMartixTest.java diff --git a/src/test/java/org/extratrees/SubsetTests.java b/maven-project/src/test/java/org/extratrees/SubsetTests.java similarity index 100% rename from src/test/java/org/extratrees/SubsetTests.java rename to maven-project/src/test/java/org/extratrees/SubsetTests.java diff --git a/src/test/java/org/extratrees/Timer.java b/maven-project/src/test/java/org/extratrees/Timer.java similarity index 100% rename from src/test/java/org/extratrees/Timer.java rename to maven-project/src/test/java/org/extratrees/Timer.java diff --git a/R-skeleton/vignettes/extraTrees.Rnw b/vignettes/extraTrees.Rnw similarity index 97% rename from R-skeleton/vignettes/extraTrees.Rnw rename to vignettes/extraTrees.Rnw index 54cd009..1e612aa 100644 --- a/R-skeleton/vignettes/extraTrees.Rnw +++ b/vignettes/extraTrees.Rnw @@ -1,134 +1,134 @@ -\documentclass[a4paper, 11pt]{article} -\usepackage[OT1]{fontenc} -\usepackage{url} -\usepackage{Sweave} -\begin{document} - -% \VignetteIndexEntry{extraTrees} - -\title{Package for ExtraTrees method for classification and regression} -\author{Jaak Simm and Ildefons Magrans de Abril} -\date{2013-10-05} - -\maketitle - -\tableofcontents - -\section{Introduction} - -This document provides detailed guidance on using the package \texttt{extraTrees}. - -\section{Training and predicting} - -Usage of \texttt{extraTrees} was made similar to \texttt{randomForest} package as -ExtraTrees (extremely randomized trees) method is similar RandomForest. The main -difference is that when at each node RandomForest chooses the best cutting threshold -for the feature, ExtraTrees instead chooses the cut (uniformly) randomly. Similarly to -RandomForest the feature with the biggest gain (or best score) is chosen after the cutting -threshold has been fixed. - -This package includes an extension to ExtraTrees that we found useful in some experiments: -instead of a single random cut we choose \textbf{several} random cuts for each feature. -This reduces the probability of making very poor cuts but still maintains the stochastic -cutting approach of ExtraTrees. Using more than one cut (e.g., 3-5 cuts) can improve -the accuracy, usually when the standard ExtraTrees performs worse than RandomTrees. - -A simple usage example is given in Figure \ref{fig:basic.example}. Try changing the value of numRandomCuts to 5 and see how the performance changes. For some data also the value of mtry (the number of chosen features at each node) should be increased. -\begin{figure} -\begin{verbatim} -library(extraTrees) -## train and test data: -n <- 1000 -p <- 10 -f <- function(x) { - (x[,1]>0.5) + 0.8*(x[,2]>0.6) + 0.5*(x[,3]>0.4) + 0.2*x[,5] + - 0.1*runif(nrow(x)) -} -x <- matrix(runif(n*p), n, p) -y <- as.numeric(f(x)) -xtest <- matrix(runif(n*p), n, p) -ytest <- f(xtest) - -## extraTrees: -et <- extraTrees(x, y, numRandomCuts=1) -yhat <- predict(et, xtest) -yerr <- mean( (ytest-yhat)^2 ) -print( sprintf("Squared error: %f", yerr) ) -\end{verbatim} -\caption{Example of using \texttt{extraTrees} with 1 cut (the default).} -\label{fig:basic.example} -\end{figure} - - -\paragraph{\texttt{METHODS}} - -There two main methods: -\begin{itemize} -\item \texttt{extraTrees} that does the training, -\item \texttt{predict} that does the prediction after the trees have been trained. -\end{itemize} - -For classification ExtraTrees at each node chooses the cut based on minimizing the Gini impurity index and for regression the variance. - - -\section{Large scale usage} -Although ExtraTrees is quite fast (about 10 faster than randomForest -on the same data and number of trees), there are cases when the -data set is still too big for the default setup. - -\subsection{Increasing allocated memory} -If your data has high number of data points and/or -high number of dimensions, then you can run out of Java memory. -This causes following error: -\begin{verbatim} - java.lang.OutOfMemoryError: Java heap space -\end{verbatim} -To solve that you need to increase the memory by supplying "-Xmx1g" for 1GB -or "-Xmx4g" for 4GB in R's Java options before loading \texttt{extraTrees}: -\begin{verbatim} - options( java.parameters = "-Xmx1g" ) - library(extraTrees) -\end{verbatim} -This is shown in the first lines of Figure \ref{fig:large.scale}. -Make sure your machine -has enough free memory available before you do that. - -\subsection{Using multiple cores} -Secondly, if the running time is too long you can use multi-core computation -by increasing the \texttt{numThreads} option (default is 1) in \texttt{extraTrees}. -This is shown the last lines in Figure \ref{fig:large.scale}. - -\begin{figure} -\begin{verbatim} -## To solve the problem give more memory to Java. -## Using 1GB Java memory for extraTrees (specified by 1g): -options( java.parameters = "-Xmx1g" ) -library(extraTrees) -## train and test data: -n <- 1000 -p <- 10 -f <- function(x) { - (x[,1]>0.5) + 0.8*(x[,2]>0.6) + 0.5*(x[,3]>0.4) + 0.2*x[,5] + - 0.1*runif(nrow(x)) -} -x <- matrix(runif(n*p), n, p) -y <- as.numeric(f(x)) -xtest <- matrix(runif(n*p), n, p) -ytest <- f(xtest) - -## extraTrees with 1 CPU thread (the default): -system.time({et <- extraTrees(x, y, numThreads=1)}) -## extraTrees with 2 CPU thread: -system.time({et <- extraTrees(x, y, numThreads=2)}) -\end{verbatim} -\caption{Example of how to use \texttt{extraTrees} in large scale settings.} -\label{fig:large.scale} -\end{figure} - - -%\section{Acknowledgements} - -%We would like thank Ildefons Magrans de Abril for suggesting -%making this R package. - -\end{document} +\documentclass[a4paper, 11pt]{article} +\usepackage[OT1]{fontenc} +\usepackage{url} +\usepackage{Sweave} +\begin{document} + +% \VignetteIndexEntry{extraTrees} + +\title{Package for ExtraTrees method for classification and regression} +\author{Jaak Simm and Ildefons Magrans de Abril} +\date{2013-10-05} + +\maketitle + +\tableofcontents + +\section{Introduction} + +This document provides detailed guidance on using the package \texttt{extraTrees}. + +\section{Training and predicting} + +Usage of \texttt{extraTrees} was made similar to \texttt{randomForest} package as +ExtraTrees (extremely randomized trees) method is similar RandomForest. The main +difference is that when at each node RandomForest chooses the best cutting threshold +for the feature, ExtraTrees instead chooses the cut (uniformly) randomly. Similarly to +RandomForest the feature with the biggest gain (or best score) is chosen after the cutting +threshold has been fixed. + +This package includes an extension to ExtraTrees that we found useful in some experiments: +instead of a single random cut we choose \textbf{several} random cuts for each feature. +This reduces the probability of making very poor cuts but still maintains the stochastic +cutting approach of ExtraTrees. Using more than one cut (e.g., 3-5 cuts) can improve +the accuracy, usually when the standard ExtraTrees performs worse than RandomTrees. + +A simple usage example is given in Figure \ref{fig:basic.example}. Try changing the value of numRandomCuts to 5 and see how the performance changes. For some data also the value of mtry (the number of chosen features at each node) should be increased. +\begin{figure} +\begin{verbatim} +library(extraTrees) +## train and test data: +n <- 1000 +p <- 10 +f <- function(x) { + (x[,1]>0.5) + 0.8*(x[,2]>0.6) + 0.5*(x[,3]>0.4) + 0.2*x[,5] + + 0.1*runif(nrow(x)) +} +x <- matrix(runif(n*p), n, p) +y <- as.numeric(f(x)) +xtest <- matrix(runif(n*p), n, p) +ytest <- f(xtest) + +## extraTrees: +et <- extraTrees(x, y, numRandomCuts=1) +yhat <- predict(et, xtest) +yerr <- mean( (ytest-yhat)^2 ) +print( sprintf("Squared error: %f", yerr) ) +\end{verbatim} +\caption{Example of using \texttt{extraTrees} with 1 cut (the default).} +\label{fig:basic.example} +\end{figure} + + +\paragraph{\texttt{METHODS}} + +There two main methods: +\begin{itemize} +\item \texttt{extraTrees} that does the training, +\item \texttt{predict} that does the prediction after the trees have been trained. +\end{itemize} + +For classification ExtraTrees at each node chooses the cut based on minimizing the Gini impurity index and for regression the variance. + + +\section{Large scale usage} +Although ExtraTrees is quite fast (about 10 faster than randomForest +on the same data and number of trees), there are cases when the +data set is still too big for the default setup. + +\subsection{Increasing allocated memory} +If your data has high number of data points and/or +high number of dimensions, then you can run out of Java memory. +This causes following error: +\begin{verbatim} + java.lang.OutOfMemoryError: Java heap space +\end{verbatim} +To solve that you need to increase the memory by supplying "-Xmx1g" for 1GB +or "-Xmx4g" for 4GB in R's Java options before loading \texttt{extraTrees}: +\begin{verbatim} + options( java.parameters = "-Xmx1g" ) + library(extraTrees) +\end{verbatim} +This is shown in the first lines of Figure \ref{fig:large.scale}. +Make sure your machine +has enough free memory available before you do that. + +\subsection{Using multiple cores} +Secondly, if the running time is too long you can use multi-core computation +by increasing the \texttt{numThreads} option (default is 1) in \texttt{extraTrees}. +This is shown the last lines in Figure \ref{fig:large.scale}. + +\begin{figure} +\begin{verbatim} +## To solve the problem give more memory to Java. +## Using 1GB Java memory for extraTrees (specified by 1g): +options( java.parameters = "-Xmx1g" ) +library(extraTrees) +## train and test data: +n <- 1000 +p <- 10 +f <- function(x) { + (x[,1]>0.5) + 0.8*(x[,2]>0.6) + 0.5*(x[,3]>0.4) + 0.2*x[,5] + + 0.1*runif(nrow(x)) +} +x <- matrix(runif(n*p), n, p) +y <- as.numeric(f(x)) +xtest <- matrix(runif(n*p), n, p) +ytest <- f(xtest) + +## extraTrees with 1 CPU thread (the default): +system.time({et <- extraTrees(x, y, numThreads=1)}) +## extraTrees with 2 CPU thread: +system.time({et <- extraTrees(x, y, numThreads=2)}) +\end{verbatim} +\caption{Example of how to use \texttt{extraTrees} in large scale settings.} +\label{fig:large.scale} +\end{figure} + + +%\section{Acknowledgements} + +%We would like thank Ildefons Magrans de Abril for suggesting +%making this R package. + +\end{document}