From 7bf60910b00212ac15dbded1fec8b0555d6c7fbb Mon Sep 17 00:00:00 2001 From: dselivanov Date: Sun, 13 May 2018 17:21:09 +0400 Subject: [PATCH 01/14] initial skeleton for R package --- .gitignore | 6 ++- include/blitzml/base/common.h | 16 ++++--- r-package/.Rbuildignore | 2 + r-package/DESCRIPTION | 14 ++++++ r-package/NAMESPACE | 5 ++ r-package/R/RcppExports.R | 19 ++++++++ r-package/R/solver.R | 14 ++++++ r-package/blitzml.Rproj | 21 +++++++++ r-package/src/Makevars | 14 ++++++ r-package/src/RcppExports.cpp | 66 +++++++++++++++++++++++++++ r-package/src/base | 1 + r-package/src/dataset | 1 + r-package/src/mapped_csc_matrix.h | 32 +++++++++++++ r-package/src/smooth_loss | 1 + r-package/src/sparse_linear | 1 + r-package/src/wrap_sparse_dataset.cpp | 16 +++++++ r-package/src/wrap_sparse_solver.cpp | 35 ++++++++++++++ src/base/scheduler.cpp | 24 +++++----- 18 files changed, 268 insertions(+), 20 deletions(-) create mode 100644 r-package/.Rbuildignore create mode 100644 r-package/DESCRIPTION create mode 100644 r-package/NAMESPACE create mode 100644 r-package/R/RcppExports.R create mode 100644 r-package/R/solver.R create mode 100644 r-package/blitzml.Rproj create mode 100644 r-package/src/Makevars create mode 100644 r-package/src/RcppExports.cpp create mode 120000 r-package/src/base create mode 120000 r-package/src/dataset create mode 100644 r-package/src/mapped_csc_matrix.h create mode 120000 r-package/src/smooth_loss create mode 120000 r-package/src/sparse_linear create mode 100644 r-package/src/wrap_sparse_dataset.cpp create mode 100644 r-package/src/wrap_sparse_solver.cpp diff --git a/.gitignore b/.gitignore index 1e539b7..1bed0ff 100644 --- a/.gitignore +++ b/.gitignore @@ -11,5 +11,9 @@ MANIFEST docs/_build docs/_templates docs/*.log -.DS_Store +.DS_Store +.Rproj.user +*.o +*.so +.Rhistory diff --git a/include/blitzml/base/common.h b/include/blitzml/base/common.h index 9014809..9098c8b 100644 --- a/include/blitzml/base/common.h +++ b/include/blitzml/base/common.h @@ -17,16 +17,18 @@ namespace BlitzML { #define LIBRARY_API extern "C" #endif -typedef double value_t; -typedef std::size_t size_t; -typedef int index_t; +typedef double value_t; +//typedef std::size_t size_t; +typedef uint32_t size_t; +typedef uint32_t index_t; +//typedef int index_t; typedef std::vector::iterator index_itr; typedef std::vector::iterator value_itr; typedef std::vector::const_iterator const_index_itr; typedef std::vector::const_iterator const_value_itr; -void assert(bool okay, std::string error_message); +//void assert(bool okay, std::string error_message); void warn_if(bool condition, std::string message); void print(const char* fmt, ...); void debug(const char* fmt, ...); @@ -34,7 +36,7 @@ void debug(const char* fmt, ...); class ObjectiveValues { public: - ObjectiveValues() + ObjectiveValues() : dual_obj_(std::numeric_limits::min()), primal_obj_x_(std::numeric_limits::max()), primal_obj_y_(std::numeric_limits::max()), @@ -82,9 +84,9 @@ class Parameters { virtual ~Parameters() { debug("delete params"); } - private: + private: const value_t *values; - size_t count; + size_t count; Parameters(); }; diff --git a/r-package/.Rbuildignore b/r-package/.Rbuildignore new file mode 100644 index 0000000..91114bf --- /dev/null +++ b/r-package/.Rbuildignore @@ -0,0 +1,2 @@ +^.*\.Rproj$ +^\.Rproj\.user$ diff --git a/r-package/DESCRIPTION b/r-package/DESCRIPTION new file mode 100644 index 0000000..9a5bba0 --- /dev/null +++ b/r-package/DESCRIPTION @@ -0,0 +1,14 @@ +Package: blitzml +Type: Package +Title: What the Package Does (Title Case) +Version: 0.1.0 +Author: Who wrote it +Maintainer: The package maintainer +Description: More about what it does (maybe more than one line) + Use four spaces when indenting paragraphs within the Description. +License: What license is it under? +Encoding: UTF-8 +LazyData: true +Imports: Rcpp (>= 0.12.16) +LinkingTo: Rcpp +RoxygenNote: 6.0.1 diff --git a/r-package/NAMESPACE b/r-package/NAMESPACE new file mode 100644 index 0000000..345b4f4 --- /dev/null +++ b/r-package/NAMESPACE @@ -0,0 +1,5 @@ +# Generated by roxygen2: do not edit by hand + +export(get_max_lambda) +importFrom(Rcpp,sourceCpp) +useDynLib("blitzml", .registration=TRUE) diff --git a/r-package/R/RcppExports.R b/r-package/R/RcppExports.R new file mode 100644 index 0000000..98b84f0 --- /dev/null +++ b/r-package/R/RcppExports.R @@ -0,0 +1,19 @@ +# Generated by using Rcpp::compileAttributes() -> do not edit by hand +# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +create_solver_sparse_dataset <- function(x, y) { + .Call(`_blitzml_create_solver_sparse_dataset`, x, y) +} + +BlitzML_new_solver <- function() { + .Call(`_blitzml_BlitzML_new_solver`) +} + +BlitzML_new_parameters <- function(params) { + .Call(`_blitzml_BlitzML_new_parameters`, params) +} + +BlitzML_sparse_linear_solver_compute_max_l1_penalty <- function(xptr_solver, xptr_dataset, xptr_params) { + .Call(`_blitzml_BlitzML_sparse_linear_solver_compute_max_l1_penalty`, xptr_solver, xptr_dataset, xptr_params) +} + diff --git a/r-package/R/solver.R b/r-package/R/solver.R new file mode 100644 index 0000000..fab1416 --- /dev/null +++ b/r-package/R/solver.R @@ -0,0 +1,14 @@ +# loss_indices: +# 0 - squared +# 1 - huber +# 2 - logistic + +#' @useDynLib "blitzml", .registration=TRUE +#' @importFrom Rcpp sourceCpp +#' @export +get_max_lambda = function(x, y, params = c(l1_penalty = 1, "na" = 0, include_bias_term = 1, loss_index = 2)) { + solver = BlitzML_new_solver() + dataset = create_solver_sparse_dataset(x, y) + params = BlitzML_new_parameters(params) + BlitzML_sparse_linear_solver_compute_max_l1_penalty(solver, dataset, params) +} diff --git a/r-package/blitzml.Rproj b/r-package/blitzml.Rproj new file mode 100644 index 0000000..270314b --- /dev/null +++ b/r-package/blitzml.Rproj @@ -0,0 +1,21 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +AutoAppendNewline: Yes +StripTrailingWhitespace: Yes + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source +PackageRoxygenize: rd,collate,namespace diff --git a/r-package/src/Makevars b/r-package/src/Makevars new file mode 100644 index 0000000..0d22719 --- /dev/null +++ b/r-package/src/Makevars @@ -0,0 +1,14 @@ +CXX_STD = CXX11 + +PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) + +SOURCES=$(wildcard base/*.cpp dataset/*.cpp smooth_loss/*.cpp sparse_linear/*.cpp) + +OBJECTS = wrap_sparse_solver.o wrap_sparse_dataset.o RcppExports.o $(SOURCES:.cpp=.o) + +all: $(SHLIB) + +clean: + @rm -f $(OBJECTS) + +PKG_CXXFLAGS = -I../../include diff --git a/r-package/src/RcppExports.cpp b/r-package/src/RcppExports.cpp new file mode 100644 index 0000000..db5d844 --- /dev/null +++ b/r-package/src/RcppExports.cpp @@ -0,0 +1,66 @@ +// Generated by using Rcpp::compileAttributes() -> do not edit by hand +// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#include + +using namespace Rcpp; + +// create_solver_sparse_dataset +SEXP create_solver_sparse_dataset(const Rcpp::S4& x, const Rcpp::NumericVector& y); +RcppExport SEXP _blitzml_create_solver_sparse_dataset(SEXP xSEXP, SEXP ySEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const Rcpp::S4& >::type x(xSEXP); + Rcpp::traits::input_parameter< const Rcpp::NumericVector& >::type y(ySEXP); + rcpp_result_gen = Rcpp::wrap(create_solver_sparse_dataset(x, y)); + return rcpp_result_gen; +END_RCPP +} +// BlitzML_new_solver +SEXP BlitzML_new_solver(); +RcppExport SEXP _blitzml_BlitzML_new_solver() { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + rcpp_result_gen = Rcpp::wrap(BlitzML_new_solver()); + return rcpp_result_gen; +END_RCPP +} +// BlitzML_new_parameters +SEXP BlitzML_new_parameters(const Rcpp::NumericVector params); +RcppExport SEXP _blitzml_BlitzML_new_parameters(SEXP paramsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const Rcpp::NumericVector >::type params(paramsSEXP); + rcpp_result_gen = Rcpp::wrap(BlitzML_new_parameters(params)); + return rcpp_result_gen; +END_RCPP +} +// BlitzML_sparse_linear_solver_compute_max_l1_penalty +double BlitzML_sparse_linear_solver_compute_max_l1_penalty(SEXP xptr_solver, SEXP xptr_dataset, SEXP xptr_params); +RcppExport SEXP _blitzml_BlitzML_sparse_linear_solver_compute_max_l1_penalty(SEXP xptr_solverSEXP, SEXP xptr_datasetSEXP, SEXP xptr_paramsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< SEXP >::type xptr_solver(xptr_solverSEXP); + Rcpp::traits::input_parameter< SEXP >::type xptr_dataset(xptr_datasetSEXP); + Rcpp::traits::input_parameter< SEXP >::type xptr_params(xptr_paramsSEXP); + rcpp_result_gen = Rcpp::wrap(BlitzML_sparse_linear_solver_compute_max_l1_penalty(xptr_solver, xptr_dataset, xptr_params)); + return rcpp_result_gen; +END_RCPP +} + +static const R_CallMethodDef CallEntries[] = { + {"_blitzml_create_solver_sparse_dataset", (DL_FUNC) &_blitzml_create_solver_sparse_dataset, 2}, + {"_blitzml_BlitzML_new_solver", (DL_FUNC) &_blitzml_BlitzML_new_solver, 0}, + {"_blitzml_BlitzML_new_parameters", (DL_FUNC) &_blitzml_BlitzML_new_parameters, 1}, + {"_blitzml_BlitzML_sparse_linear_solver_compute_max_l1_penalty", (DL_FUNC) &_blitzml_BlitzML_sparse_linear_solver_compute_max_l1_penalty, 3}, + {NULL, NULL, 0} +}; + +RcppExport void R_init_blitzml(DllInfo *dll) { + R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); + R_useDynamicSymbols(dll, FALSE); +} diff --git a/r-package/src/base b/r-package/src/base new file mode 120000 index 0000000..1210551 --- /dev/null +++ b/r-package/src/base @@ -0,0 +1 @@ +../../src/base/ \ No newline at end of file diff --git a/r-package/src/dataset b/r-package/src/dataset new file mode 120000 index 0000000..c409859 --- /dev/null +++ b/r-package/src/dataset @@ -0,0 +1 @@ +../../src/dataset/ \ No newline at end of file diff --git a/r-package/src/mapped_csc_matrix.h b/r-package/src/mapped_csc_matrix.h new file mode 100644 index 0000000..f3b6931 --- /dev/null +++ b/r-package/src/mapped_csc_matrix.h @@ -0,0 +1,32 @@ +#include + +template< typename T> +class MappedCSC { +public: + MappedCSC(); + MappedCSC(std::uint32_t n_rows, + std::uint32_t n_cols, + uint32_t nnz, + std::uint32_t * row_indices, + std::uint32_t * col_ptrs, + T * values): + n_rows(n_rows), n_cols(n_cols), nnz(nnz), row_indices(row_indices), col_ptrs(col_ptrs), values(values) {}; + const std::uint32_t n_rows; + const std::uint32_t n_cols; + const uint32_t nnz; + const std::uint32_t * row_indices; + const std::uint32_t * col_ptrs; + T * values; +}; + +using dMappedCSC = MappedCSC; + +dMappedCSC extract_mapped_csc(Rcpp::S4 input) { + Rcpp::IntegerVector dim = input.slot("Dim"); + Rcpp::NumericVector values = input.slot("x"); + uint32_t nrows = dim[0]; + uint32_t ncols = dim[1]; + Rcpp::IntegerVector row_indices = input.slot("i"); + Rcpp::IntegerVector col_ptrs = input.slot("p"); + return dMappedCSC(nrows, ncols, values.length(), (uint32_t *)row_indices.begin(), (uint32_t *)col_ptrs.begin(), values.begin()); +} diff --git a/r-package/src/smooth_loss b/r-package/src/smooth_loss new file mode 120000 index 0000000..7a16780 --- /dev/null +++ b/r-package/src/smooth_loss @@ -0,0 +1 @@ +../../src/smooth_loss/ \ No newline at end of file diff --git a/r-package/src/sparse_linear b/r-package/src/sparse_linear new file mode 120000 index 0000000..591e05d --- /dev/null +++ b/r-package/src/sparse_linear @@ -0,0 +1 @@ +../../src/sparse_linear/ \ No newline at end of file diff --git a/r-package/src/wrap_sparse_dataset.cpp b/r-package/src/wrap_sparse_dataset.cpp new file mode 100644 index 0000000..d87d89e --- /dev/null +++ b/r-package/src/wrap_sparse_dataset.cpp @@ -0,0 +1,16 @@ +#include +#include "mapped_csc_matrix.h" +#include + + +// [[Rcpp::export]] +SEXP create_solver_sparse_dataset(const Rcpp::S4 &x, + const Rcpp::NumericVector &y) { + dMappedCSC x_csc = extract_mapped_csc(x); + double *y_ptr = (double *)y.begin(); + BlitzML::SparseDataset* dataset = + new BlitzML::SparseDataset(x_csc.row_indices, x_csc.col_ptrs, x_csc.values, + x_csc.n_rows, x_csc.n_cols, x_csc.nnz, y_ptr, y.size()); + Rcpp::XPtr> ptr(dataset, true); + return ptr; +} diff --git a/r-package/src/wrap_sparse_solver.cpp b/r-package/src/wrap_sparse_solver.cpp new file mode 100644 index 0000000..0a3bbfa --- /dev/null +++ b/r-package/src/wrap_sparse_solver.cpp @@ -0,0 +1,35 @@ +#include + +#include +#include +#include +#include +#include +#include + +using namespace BlitzML; + +// [[Rcpp::export]] +SEXP BlitzML_new_solver() { + BlitzML::SparseLogRegSolver* solver = new BlitzML::SparseLogRegSolver(); + Rcpp::XPtr ptr(solver, true); + return ptr; +} + +// [[Rcpp::export]] +SEXP BlitzML_new_parameters(const Rcpp::NumericVector params) { + BlitzML::Parameters* blitz_params = new BlitzML::Parameters((double*)params.begin(), params.size() ); + Rcpp::XPtr ptr(blitz_params, true); + return ptr; +} + +// [[Rcpp::export]] +double BlitzML_sparse_linear_solver_compute_max_l1_penalty( + SEXP xptr_solver, + SEXP xptr_dataset, + SEXP xptr_params) { + Rcpp::XPtr< BlitzML::SparseLogRegSolver > solver(xptr_solver); + Rcpp::XPtr< BlitzML::SparseDataset > data(xptr_dataset); + Rcpp::XPtr< BlitzML::Parameters > params(xptr_params); + return solver->compute_max_l1_penalty(data, params); +} diff --git a/src/base/scheduler.cpp b/src/base/scheduler.cpp index 8d58b17..cb04561 100644 --- a/src/base/scheduler.cpp +++ b/src/base/scheduler.cpp @@ -27,7 +27,7 @@ value_t Scheduler::estimate_C_progress() { value_t ret = 1.; if (C_progress_estimates.size() > 0) { ret = median_last_k(C_progress_estimates, 2); - } + } if (ret == ret && ret >= 1.) { return ret; } else { @@ -42,16 +42,16 @@ void Scheduler::set_problem_sizes(const std::vector &problem_sizes) { } -void Scheduler::optimize_xi_and_epsilon(unsigned short &best_capsule_ind, +void Scheduler::optimize_xi_and_epsilon(unsigned short &best_capsule_ind, value_t &best_epsilon, double &best_time_limit) { - assert(gamma > 0, "in scheduler, must set gamma to be > 0"); - assert(xi_candidates.size() > 0, - "in scheduler, must have at least 1 xi candidate"); - assert(epsilon_candidates.size() > 0, - "in scheduler, must have at least 1 epsilon candidate"); - + // assert(gamma > 0, "in scheduler, must set gamma to be > 0"); + // assert(xi_candidates.size() > 0, + // "in scheduler, must have at least 1 xi candidate"); + // assert(epsilon_candidates.size() > 0, + // "in scheduler, must have at least 1 epsilon candidate"); + value_t C_progress = estimate_C_progress(); double T_overhead = overhead_time_estimate(); @@ -98,7 +98,7 @@ void Scheduler::optimize_xi_and_epsilon(unsigned short &best_capsule_ind, } -void Scheduler::record_subproblem_size(value_t xi, +void Scheduler::record_subproblem_size(value_t xi, value_t subproblem_size) { chosen_xi = xi; chosen_subproblem_size = subproblem_size; @@ -133,7 +133,7 @@ void Scheduler::record_subproblem_progress(value_t new_Delta, //C_progress_est = 1.5 / chosen_xi; //} else { C_progress_est = (1 - new_Delta / current_Delta) / (1 - realized_epsilon) / chosen_xi; - //} + //} /* if (C_progress_est != C_progress_est || C_progress_est < 1) { @@ -151,8 +151,8 @@ value_t Scheduler::subproblem_time_complexity(value_t epsilon) { double Scheduler::overhead_time_estimate() { - assert(C_overhead_time_estimates.size() > 0, - "cannot estimate overhead time before calling record_overhead_time"); + // assert(C_overhead_time_estimates.size() > 0, + // "cannot estimate overhead time before calling record_overhead_time"); return median_last_k(C_overhead_time_estimates, 5); } From 8ac8d73bcc537e62188382f644553f2b2f6bd924 Mon Sep 17 00:00:00 2001 From: dselivanov Date: Mon, 14 May 2018 09:17:29 +0400 Subject: [PATCH 02/14] rename assert -> assert_with_error_message --- include/blitzml/base/common.h | 2 +- src/base/common.cpp | 6 +++--- src/base/scheduler.cpp | 14 +++++++------- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/blitzml/base/common.h b/include/blitzml/base/common.h index 9098c8b..b55eb67 100644 --- a/include/blitzml/base/common.h +++ b/include/blitzml/base/common.h @@ -28,7 +28,7 @@ typedef std::vector::const_iterator const_index_itr; typedef std::vector::const_iterator const_value_itr; -//void assert(bool okay, std::string error_message); +void assert_with_error_message(bool okay, std::string error_message); void warn_if(bool condition, std::string message); void print(const char* fmt, ...); void debug(const char* fmt, ...); diff --git a/src/base/common.cpp b/src/base/common.cpp index 95c1a64..eeea6b7 100644 --- a/src/base/common.cpp +++ b/src/base/common.cpp @@ -12,10 +12,10 @@ using std::endl; namespace BlitzML { -void assert(bool okay, string error_message) { +void assert_with_error_message(bool okay, string error_message) { if (!okay) { - cerr << "Program exited with error: " - << error_message + cerr << "Program exited with error: " + << error_message << endl; throw error_message; } diff --git a/src/base/scheduler.cpp b/src/base/scheduler.cpp index cb04561..d64c1f0 100644 --- a/src/base/scheduler.cpp +++ b/src/base/scheduler.cpp @@ -46,11 +46,11 @@ void Scheduler::optimize_xi_and_epsilon(unsigned short &best_capsule_ind, value_t &best_epsilon, double &best_time_limit) { - // assert(gamma > 0, "in scheduler, must set gamma to be > 0"); - // assert(xi_candidates.size() > 0, - // "in scheduler, must have at least 1 xi candidate"); - // assert(epsilon_candidates.size() > 0, - // "in scheduler, must have at least 1 epsilon candidate"); + assert_with_error_message(gamma > 0, "in scheduler, must set gamma to be > 0"); + assert_with_error_message(xi_candidates.size() > 0, + "in scheduler, must have at least 1 xi candidate"); + assert_with_error_message(epsilon_candidates.size() > 0, + "in scheduler, must have at least 1 epsilon candidate"); value_t C_progress = estimate_C_progress(); @@ -151,8 +151,8 @@ value_t Scheduler::subproblem_time_complexity(value_t epsilon) { double Scheduler::overhead_time_estimate() { - // assert(C_overhead_time_estimates.size() > 0, - // "cannot estimate overhead time before calling record_overhead_time"); + assert_with_error_message(C_overhead_time_estimates.size() > 0, + "cannot estimate overhead time before calling record_overhead_time"); return median_last_k(C_overhead_time_estimates, 5); } From 02cf3b6f64810477e0c8a97159030aecc72a83af Mon Sep 17 00:00:00 2001 From: dselivanov Date: Mon, 14 May 2018 10:29:33 +0400 Subject: [PATCH 03/14] working logistic regression on sparse matrices --- r-package/DESCRIPTION | 21 ++++--- r-package/NAMESPACE | 1 + r-package/R/RcppExports.R | 28 +++++++-- r-package/R/solver.R | 55 +++++++++++++++++- r-package/src/RcppExports.cpp | 84 +++++++++++++++++++++++---- r-package/src/wrap_sparse_dataset.cpp | 2 +- r-package/src/wrap_sparse_solver.cpp | 43 +++++++++++++- 7 files changed, 206 insertions(+), 28 deletions(-) diff --git a/r-package/DESCRIPTION b/r-package/DESCRIPTION index 9a5bba0..2aa702f 100644 --- a/r-package/DESCRIPTION +++ b/r-package/DESCRIPTION @@ -1,14 +1,19 @@ Package: blitzml Type: Package -Title: What the Package Does (Title Case) -Version: 0.1.0 -Author: Who wrote it -Maintainer: The package maintainer -Description: More about what it does (maybe more than one line) - Use four spaces when indenting paragraphs within the Description. -License: What license is it under? +Title: Efficient Solver for Linear Models with Various Loss Functions and L1 penalty +Version: 0.1.0.1 +Authors@R: c( + person("Tyler", "Johnson", role = c("aut"), comment = "backend C++ solver"), + person("Dmitriy", "Selivanov", role = c("aut", "cre"), email = "selivanov.dmitriy@gmail.com", comment = "R package") + ) +Maintainer: Dmitriy Selivanov +Description: Efficient Solver for Linear Models with Various + Loss Functions and L1 penalty +License: GPL (>= 2) Encoding: UTF-8 LazyData: true -Imports: Rcpp (>= 0.12.16) +Imports: + R6, + Rcpp (>= 0.12.0) LinkingTo: Rcpp RoxygenNote: 6.0.1 diff --git a/r-package/NAMESPACE b/r-package/NAMESPACE index 345b4f4..434f32a 100644 --- a/r-package/NAMESPACE +++ b/r-package/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(BlitzMLRegression) export(get_max_lambda) importFrom(Rcpp,sourceCpp) useDynLib("blitzml", .registration=TRUE) diff --git a/r-package/R/RcppExports.R b/r-package/R/RcppExports.R index 98b84f0..a5dd362 100644 --- a/r-package/R/RcppExports.R +++ b/r-package/R/RcppExports.R @@ -1,12 +1,12 @@ # Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 -create_solver_sparse_dataset <- function(x, y) { - .Call(`_blitzml_create_solver_sparse_dataset`, x, y) +BlitzML_new_sparse_dataset <- function(x, y) { + .Call(`_blitzml_BlitzML_new_sparse_dataset`, x, y) } -BlitzML_new_solver <- function() { - .Call(`_blitzml_BlitzML_new_solver`) +BlitzML_new_sparse_logreg_solver <- function() { + .Call(`_blitzml_BlitzML_new_sparse_logreg_solver`) } BlitzML_new_parameters <- function(params) { @@ -17,3 +17,23 @@ BlitzML_sparse_linear_solver_compute_max_l1_penalty <- function(xptr_solver, xpt .Call(`_blitzml_BlitzML_sparse_linear_solver_compute_max_l1_penalty`, xptr_solver, xptr_dataset, xptr_params) } +BlitzML_solve_problem <- function(xptr_solver, xptr_dataset, xptr_params, result, status_buffer, log_dir) { + invisible(.Call(`_blitzml_BlitzML_solve_problem`, xptr_solver, xptr_dataset, xptr_params, result, status_buffer, log_dir)) +} + +BlitzML_set_tolerance <- function(xptr_solver, value) { + invisible(.Call(`_blitzml_BlitzML_set_tolerance`, xptr_solver, value)) +} + +BlitzML_set_max_time <- function(xptr_solver, value) { + invisible(.Call(`_blitzml_BlitzML_set_max_time`, xptr_solver, value)) +} + +BlitzML_set_max_iterations <- function(xptr_solver, value) { + invisible(.Call(`_blitzml_BlitzML_set_max_iterations`, xptr_solver, value)) +} + +BlitzML_set_use_working_sets <- function(xptr_solver, value) { + invisible(.Call(`_blitzml_BlitzML_set_use_working_sets`, xptr_solver, value)) +} + diff --git a/r-package/R/solver.R b/r-package/R/solver.R index fab1416..21560bd 100644 --- a/r-package/R/solver.R +++ b/r-package/R/solver.R @@ -7,8 +7,59 @@ #' @importFrom Rcpp sourceCpp #' @export get_max_lambda = function(x, y, params = c(l1_penalty = 1, "na" = 0, include_bias_term = 1, loss_index = 2)) { - solver = BlitzML_new_solver() - dataset = create_solver_sparse_dataset(x, y) + solver = BlitzML_new_sparse_logreg_solver() + dataset = BlitzML_new_sparse_dataset(x, y) params = BlitzML_new_parameters(params) BlitzML_sparse_linear_solver_compute_max_l1_penalty(solver, dataset, params) } + +#' @export +BlitzMLRegression = R6::R6Class( + "BlitzMLRegression", + public = list( + initialize = function(lambda, + include_bias_term = TRUE, + loss = c("squared", "huber", "logistic"), + log_dir = "/dev/null") { + loss = match.arg(loss) + loss_index = match(loss, private$supported_losses) - 1L + private$log_dir = log_dir + switch (loss, + logistic = {private$solver = BlitzML_new_sparse_logreg_solver()}, + stop(sprintf("solver for '%s' loss function is not supported yet", loss)) + ) + private$params = BlitzML_new_parameters(c(lambda = lambda, + undefined_param = 0, + include_bias_term = as.numeric(include_bias_term), + loss_index = loss_index)) + }, + fit = function(x, y, init = NULL, ...) { + stopifnot(is.vector(y)) + stopifnot(inherits(x, "dgCMatrix")) + stopifnot(nrow(x) == length(y)) + + num_features = ncol(x) + num_examples = length(y) + + result_buffer = numeric(num_features + 1 + num_examples + 2) + if (!is.null(init)) + result_buffer[1:num_features] = init + + status_buffer = paste(rep(" ", 64), collapse = "") + + dataset = BlitzML_new_sparse_dataset(x, y) + + BlitzML_solve_problem(private$solver, dataset, private$params, result_buffer, status_buffer, private$log_dir) + return(list(coef = result_buffer[1:num_features], + bias = result_buffer[[num_features + 1L]], + result = result_buffer, + status = status_buffer)) + } + ), + private = list( + supported_losses = c("squared", "huber", "logistic"), + params = NULL, + solver = NULL, + log_dir = NULL + ) +) diff --git a/r-package/src/RcppExports.cpp b/r-package/src/RcppExports.cpp index db5d844..a0b36cc 100644 --- a/r-package/src/RcppExports.cpp +++ b/r-package/src/RcppExports.cpp @@ -5,25 +5,25 @@ using namespace Rcpp; -// create_solver_sparse_dataset -SEXP create_solver_sparse_dataset(const Rcpp::S4& x, const Rcpp::NumericVector& y); -RcppExport SEXP _blitzml_create_solver_sparse_dataset(SEXP xSEXP, SEXP ySEXP) { +// BlitzML_new_sparse_dataset +SEXP BlitzML_new_sparse_dataset(const Rcpp::S4& x, const Rcpp::NumericVector& y); +RcppExport SEXP _blitzml_BlitzML_new_sparse_dataset(SEXP xSEXP, SEXP ySEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const Rcpp::S4& >::type x(xSEXP); Rcpp::traits::input_parameter< const Rcpp::NumericVector& >::type y(ySEXP); - rcpp_result_gen = Rcpp::wrap(create_solver_sparse_dataset(x, y)); + rcpp_result_gen = Rcpp::wrap(BlitzML_new_sparse_dataset(x, y)); return rcpp_result_gen; END_RCPP } -// BlitzML_new_solver -SEXP BlitzML_new_solver(); -RcppExport SEXP _blitzml_BlitzML_new_solver() { +// BlitzML_new_sparse_logreg_solver +SEXP BlitzML_new_sparse_logreg_solver(); +RcppExport SEXP _blitzml_BlitzML_new_sparse_logreg_solver() { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - rcpp_result_gen = Rcpp::wrap(BlitzML_new_solver()); + rcpp_result_gen = Rcpp::wrap(BlitzML_new_sparse_logreg_solver()); return rcpp_result_gen; END_RCPP } @@ -51,12 +51,76 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// BlitzML_solve_problem +void BlitzML_solve_problem(SEXP xptr_solver, SEXP xptr_dataset, SEXP xptr_params, Rcpp::NumericVector& result, Rcpp::String& status_buffer, const Rcpp::String& log_dir); +RcppExport SEXP _blitzml_BlitzML_solve_problem(SEXP xptr_solverSEXP, SEXP xptr_datasetSEXP, SEXP xptr_paramsSEXP, SEXP resultSEXP, SEXP status_bufferSEXP, SEXP log_dirSEXP) { +BEGIN_RCPP + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< SEXP >::type xptr_solver(xptr_solverSEXP); + Rcpp::traits::input_parameter< SEXP >::type xptr_dataset(xptr_datasetSEXP); + Rcpp::traits::input_parameter< SEXP >::type xptr_params(xptr_paramsSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericVector& >::type result(resultSEXP); + Rcpp::traits::input_parameter< Rcpp::String& >::type status_buffer(status_bufferSEXP); + Rcpp::traits::input_parameter< const Rcpp::String& >::type log_dir(log_dirSEXP); + BlitzML_solve_problem(xptr_solver, xptr_dataset, xptr_params, result, status_buffer, log_dir); + return R_NilValue; +END_RCPP +} +// BlitzML_set_tolerance +void BlitzML_set_tolerance(SEXP xptr_solver, double value); +RcppExport SEXP _blitzml_BlitzML_set_tolerance(SEXP xptr_solverSEXP, SEXP valueSEXP) { +BEGIN_RCPP + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< SEXP >::type xptr_solver(xptr_solverSEXP); + Rcpp::traits::input_parameter< double >::type value(valueSEXP); + BlitzML_set_tolerance(xptr_solver, value); + return R_NilValue; +END_RCPP +} +// BlitzML_set_max_time +void BlitzML_set_max_time(SEXP xptr_solver, double value); +RcppExport SEXP _blitzml_BlitzML_set_max_time(SEXP xptr_solverSEXP, SEXP valueSEXP) { +BEGIN_RCPP + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< SEXP >::type xptr_solver(xptr_solverSEXP); + Rcpp::traits::input_parameter< double >::type value(valueSEXP); + BlitzML_set_max_time(xptr_solver, value); + return R_NilValue; +END_RCPP +} +// BlitzML_set_max_iterations +void BlitzML_set_max_iterations(SEXP xptr_solver, unsigned value); +RcppExport SEXP _blitzml_BlitzML_set_max_iterations(SEXP xptr_solverSEXP, SEXP valueSEXP) { +BEGIN_RCPP + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< SEXP >::type xptr_solver(xptr_solverSEXP); + Rcpp::traits::input_parameter< unsigned >::type value(valueSEXP); + BlitzML_set_max_iterations(xptr_solver, value); + return R_NilValue; +END_RCPP +} +// BlitzML_set_use_working_sets +void BlitzML_set_use_working_sets(SEXP xptr_solver, unsigned value); +RcppExport SEXP _blitzml_BlitzML_set_use_working_sets(SEXP xptr_solverSEXP, SEXP valueSEXP) { +BEGIN_RCPP + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< SEXP >::type xptr_solver(xptr_solverSEXP); + Rcpp::traits::input_parameter< unsigned >::type value(valueSEXP); + BlitzML_set_use_working_sets(xptr_solver, value); + return R_NilValue; +END_RCPP +} static const R_CallMethodDef CallEntries[] = { - {"_blitzml_create_solver_sparse_dataset", (DL_FUNC) &_blitzml_create_solver_sparse_dataset, 2}, - {"_blitzml_BlitzML_new_solver", (DL_FUNC) &_blitzml_BlitzML_new_solver, 0}, + {"_blitzml_BlitzML_new_sparse_dataset", (DL_FUNC) &_blitzml_BlitzML_new_sparse_dataset, 2}, + {"_blitzml_BlitzML_new_sparse_logreg_solver", (DL_FUNC) &_blitzml_BlitzML_new_sparse_logreg_solver, 0}, {"_blitzml_BlitzML_new_parameters", (DL_FUNC) &_blitzml_BlitzML_new_parameters, 1}, {"_blitzml_BlitzML_sparse_linear_solver_compute_max_l1_penalty", (DL_FUNC) &_blitzml_BlitzML_sparse_linear_solver_compute_max_l1_penalty, 3}, + {"_blitzml_BlitzML_solve_problem", (DL_FUNC) &_blitzml_BlitzML_solve_problem, 6}, + {"_blitzml_BlitzML_set_tolerance", (DL_FUNC) &_blitzml_BlitzML_set_tolerance, 2}, + {"_blitzml_BlitzML_set_max_time", (DL_FUNC) &_blitzml_BlitzML_set_max_time, 2}, + {"_blitzml_BlitzML_set_max_iterations", (DL_FUNC) &_blitzml_BlitzML_set_max_iterations, 2}, + {"_blitzml_BlitzML_set_use_working_sets", (DL_FUNC) &_blitzml_BlitzML_set_use_working_sets, 2}, {NULL, NULL, 0} }; diff --git a/r-package/src/wrap_sparse_dataset.cpp b/r-package/src/wrap_sparse_dataset.cpp index d87d89e..ce084c0 100644 --- a/r-package/src/wrap_sparse_dataset.cpp +++ b/r-package/src/wrap_sparse_dataset.cpp @@ -4,7 +4,7 @@ // [[Rcpp::export]] -SEXP create_solver_sparse_dataset(const Rcpp::S4 &x, +SEXP BlitzML_new_sparse_dataset(const Rcpp::S4 &x, const Rcpp::NumericVector &y) { dMappedCSC x_csc = extract_mapped_csc(x); double *y_ptr = (double *)y.begin(); diff --git a/r-package/src/wrap_sparse_solver.cpp b/r-package/src/wrap_sparse_solver.cpp index 0a3bbfa..2831d1f 100644 --- a/r-package/src/wrap_sparse_solver.cpp +++ b/r-package/src/wrap_sparse_solver.cpp @@ -7,10 +7,8 @@ #include #include -using namespace BlitzML; - // [[Rcpp::export]] -SEXP BlitzML_new_solver() { +SEXP BlitzML_new_sparse_logreg_solver() { BlitzML::SparseLogRegSolver* solver = new BlitzML::SparseLogRegSolver(); Rcpp::XPtr ptr(solver, true); return ptr; @@ -33,3 +31,42 @@ double BlitzML_sparse_linear_solver_compute_max_l1_penalty( Rcpp::XPtr< BlitzML::Parameters > params(xptr_params); return solver->compute_max_l1_penalty(data, params); } + +// [[Rcpp::export]] +void BlitzML_solve_problem(SEXP xptr_solver, + SEXP xptr_dataset, + SEXP xptr_params, + Rcpp::NumericVector &result, + Rcpp::String &status_buffer, + const Rcpp::String &log_dir) { + Rcpp::XPtr< BlitzML::SparseLogRegSolver > solver(xptr_solver); + Rcpp::XPtr< BlitzML::SparseDataset > data(xptr_dataset); + Rcpp::XPtr< BlitzML::Parameters > params(xptr_params); + double *result_ptr = (double *)result.begin(); + char *buf = (char *)status_buffer.get_cstring(); + // status = + solver->solve(data, params, result_ptr, buf, log_dir.get_cstring()); +} + +// [[Rcpp::export]] +void BlitzML_set_tolerance(SEXP xptr_solver, double value) { + Rcpp::XPtr< BlitzML::SparseLogRegSolver > solver(xptr_solver); + solver->set_tolerance(value); +} +// [[Rcpp::export]] +void BlitzML_set_max_time(SEXP xptr_solver, double value) { + Rcpp::XPtr< BlitzML::SparseLogRegSolver > solver(xptr_solver); + solver->set_max_time(value); +} + +// [[Rcpp::export]] +void BlitzML_set_max_iterations(SEXP xptr_solver, unsigned value) { + Rcpp::XPtr< BlitzML::SparseLogRegSolver > solver(xptr_solver); + solver->set_max_iterations(value); +} + +// [[Rcpp::export]] +void BlitzML_set_use_working_sets(SEXP xptr_solver, unsigned value) { + Rcpp::XPtr< BlitzML::SparseLogRegSolver > solver(xptr_solver); + solver->set_use_working_sets(value); +} From 8e1be8f6bb1ca9032b2780b4bc8e59ab580e81cc Mon Sep 17 00:00:00 2001 From: dselivanov Date: Mon, 14 May 2018 14:02:06 +0400 Subject: [PATCH 04/14] fully working logistic regression with cross-validation --- r-package/DESCRIPTION | 8 +- r-package/NAMESPACE | 8 +- r-package/R/solver.R | 175 +++++++++++++++++++++------ r-package/R/utils.R | 27 +++++ r-package/R/zzz.R | 3 + r-package/src/RcppExports.cpp | 4 +- r-package/src/wrap_sparse_solver.cpp | 4 +- 7 files changed, 184 insertions(+), 45 deletions(-) create mode 100644 r-package/R/utils.R create mode 100644 r-package/R/zzz.R diff --git a/r-package/DESCRIPTION b/r-package/DESCRIPTION index 2aa702f..9185d0e 100644 --- a/r-package/DESCRIPTION +++ b/r-package/DESCRIPTION @@ -6,14 +6,18 @@ Authors@R: c( person("Tyler", "Johnson", role = c("aut"), comment = "backend C++ solver"), person("Dmitriy", "Selivanov", role = c("aut", "cre"), email = "selivanov.dmitriy@gmail.com", comment = "R package") ) -Maintainer: Dmitriy Selivanov Description: Efficient Solver for Linear Models with Various Loss Functions and L1 penalty License: GPL (>= 2) Encoding: UTF-8 LazyData: true +Depends: + R (>= 3.1.0), + methods Imports: + Matrix, R6, - Rcpp (>= 0.12.0) + Rcpp (>= 0.12.0), + futile.logger LinkingTo: Rcpp RoxygenNote: 6.0.1 diff --git a/r-package/NAMESPACE b/r-package/NAMESPACE index 434f32a..cef907a 100644 --- a/r-package/NAMESPACE +++ b/r-package/NAMESPACE @@ -1,6 +1,10 @@ # Generated by roxygen2: do not edit by hand -export(BlitzMLRegression) -export(get_max_lambda) +export(LassoLogisticRegressionBlitzML) +export(auc) +import(Matrix) importFrom(Rcpp,sourceCpp) +importFrom(futile.logger,flog.debug) +importFrom(futile.logger,flog.info) +importFrom(futile.logger,flog.trace) useDynLib("blitzml", .registration=TRUE) diff --git a/r-package/R/solver.R b/r-package/R/solver.R index 21560bd..f6ecdf3 100644 --- a/r-package/R/solver.R +++ b/r-package/R/solver.R @@ -5,61 +5,162 @@ #' @useDynLib "blitzml", .registration=TRUE #' @importFrom Rcpp sourceCpp -#' @export -get_max_lambda = function(x, y, params = c(l1_penalty = 1, "na" = 0, include_bias_term = 1, loss_index = 2)) { - solver = BlitzML_new_sparse_logreg_solver() - dataset = BlitzML_new_sparse_dataset(x, y) - params = BlitzML_new_parameters(params) - BlitzML_sparse_linear_solver_compute_max_l1_penalty(solver, dataset, params) -} +#' @import Matrix +#' @importFrom futile.logger flog.info flog.debug flog.trace #' @export -BlitzMLRegression = R6::R6Class( +LassoLogisticRegressionBlitzML = R6::R6Class( "BlitzMLRegression", public = list( - initialize = function(lambda, + initialize = function(loss = c("squared", "huber", "logistic"), + lambda = "auto", + n_lambda = 20, + lambda_min_fraction = 0.0001, + lambda_max_fraction = 0.2, include_bias_term = TRUE, - loss = c("squared", "huber", "logistic"), - log_dir = "/dev/null") { - loss = match.arg(loss) - loss_index = match(loss, private$supported_losses) - 1L + log_dir = tempdir()) { + if(loss != "logistic") + stop("only 'logistic' loss supported at the moment") + + stopifnot(is.numeric(lambda) || identical(lambda, "auto")) + if(is.numeric(lambda)) + lambda = sort(lambda) + + private$n_lambda = n_lambda[[1]] + private$lambda_min_fraction = lambda_min_fraction[[1]] + private$lambda_max_fraction = lambda_max_fraction[[1]] + + private$loss = match.arg(loss) + loss_index = match(private$loss, private$supported_losses) - 1L private$log_dir = log_dir - switch (loss, - logistic = {private$solver = BlitzML_new_sparse_logreg_solver()}, - stop(sprintf("solver for '%s' loss function is not supported yet", loss)) - ) - private$params = BlitzML_new_parameters(c(lambda = lambda, - undefined_param = 0, - include_bias_term = as.numeric(include_bias_term), - loss_index = loss_index)) + private$params = list(lambda = lambda, + undefined_param = 0, + include_bias_term = as.numeric(include_bias_term), + loss_index = loss_index) }, - fit = function(x, y, init = NULL, ...) { - stopifnot(is.vector(y)) + fit = function(x, y, ...) { + stopifnot(is.numeric(y)) stopifnot(inherits(x, "dgCMatrix")) stopifnot(nrow(x) == length(y)) + solver = switch (private$loss, + logistic = BlitzML_new_sparse_logreg_solver(), + stop(sprintf("solver for '%s' loss function is not supported yet", private$loss)) + ) num_features = ncol(x) - num_examples = length(y) - + num_examples = nrow(x) + # FIXME + # https://github.com/tbjohns/BlitzML/blob/93f80a83e206001817a0b1a0b81db6455ef44a87/python-package/_sparse_linear.py#L141 result_buffer = numeric(num_features + 1 + num_examples + 2) - if (!is.null(init)) - result_buffer[1:num_features] = init - - status_buffer = paste(rep(" ", 64), collapse = "") + status_buffer = raw(64) dataset = BlitzML_new_sparse_dataset(x, y) - BlitzML_solve_problem(private$solver, dataset, private$params, result_buffer, status_buffer, private$log_dir) - return(list(coef = result_buffer[1:num_features], - bias = result_buffer[[num_features + 1L]], - result = result_buffer, - status = status_buffer)) - } + # str(self$lambda_seq) + if(is.null(self$lambda_seq)) { + if(identical(private$params$lambda, "auto")) { + self$lambda_seq = private$generate_lambda_seq(x, y) + } else { + self$lambda_seq = private$params$lambda + } + } + + res = lapply(self$lambda_seq, function(lambda) { + start = Sys.time() + params = c(lambda = lambda, + undefined_param = private$params$undefined_param, + include_bias_term = private$params$include_bias_term, + loss_index = private$params$loss_index) + params = BlitzML_new_parameters(params) + BlitzML_solve_problem(solver, dataset, params, result_buffer, status_buffer, private$log_dir) + + coefs = result_buffer[1L:num_features] + bias = result_buffer[[num_features + 1L]] + + time_spent = difftime(Sys.time(), start, "sec") + futile.logger::flog.trace("solved in %f sec for lambda %f, %d non-zero coef (status: '%s')", + time_spent, lambda, sum(coefs != 0), rawToChar(status_buffer)) + # put bias followed coefficients + as(c(bias, coefs), "CsparseMatrix") + }) + res = do.call(cbind, res) + colnames(res) = paste("lambda=", self$lambda_seq, sep = "") + cn = colnames(x) + if(is.null(cn)) + cn = paste("V", seq_len(ncol(x)), sep = "") + rownames(res) = c("bias", cn) + self$coef = res + invisible(self$coef) + }, + predict = function(x, ...) { + x = cbind(rep(1, nrow(x)), x) + sigmoid(x %*% self$coef) + }, + cross_validate = function(x, y, + fold_id = NULL, + n_folds = 4, + callbacks = list(auc = blitzml::auc), + cores = parallel::detectCores(), + ...) { + + if(is.null(self$lambda_seq) && identical(private$params$lambda, "auto")) { + self$lambda_seq = private$generate_lambda_seq(x, y) + } + + if (.Platform$OS.type != "unix" && cores > 1) { + warning("Windows detected - setting mc.cores = 1") + cores = 1L + } + + lapply(callbacks, function(f) stopifnot(is.function(f) && length(formals(f)) == 2L)) + + if(is.null(fold_id)) + fold_id = sample.int(n_folds, nrow(x), replace = TRUE) + folds = unique(fold_id) + + res = parallel::mclapply(folds, function(fld) { + i = fold_id == fld + + x_train = x[i, , drop = FALSE] + y_train = y[i] + + x_test = x[-i, , drop = FALSE] + y_test = y[-i] + + coef = self$fit(x_train, y_train) + preds = self$predict(x_test) + flog.trace("executing callbacks") + scores = lapply(callbacks, function(f) { + vapply(1:ncol(preds), function(col) { + f(y_test, preds[, col]) + }, 0.0) + }) + ret = data.frame(fold = fld, scores, list(lambda = self$lambda_seq)) + attr(ret, "coef") = coef + ret + }, mc.cores = cores, ...) + ret = do.call(rbind, res) + attr(ret, "coef") = lapply(res, function(x) attr(x, "coef")) + ret + }, + coef = NULL, + lambda_seq = NULL ), private = list( + generate_lambda_seq = function(x, y) { + lambda_max = find_max_lambda(x, y, params = c(0, 0, private$params$include_bias_term, private$params$loss_index)) + flog.info("found max lambda: %f", lambda_max) + 10**seq(log10(private$lambda_max_fraction * lambda_max), + log10(private$lambda_min_fraction * lambda_max), + length.out = private$n_lambda) + }, + supported_losses = c("squared", "huber", "logistic"), params = NULL, - solver = NULL, - log_dir = NULL + loss = NULL, + log_dir = NULL, + lambda_min_fraction = NULL, + lambda_max_fraction = NULL, + n_lambda = NULL ) ) diff --git a/r-package/R/utils.R b/r-package/R/utils.R new file mode 100644 index 0000000..acf0af3 --- /dev/null +++ b/r-package/R/utils.R @@ -0,0 +1,27 @@ +find_max_lambda = function(x, y, params = c(0, 0, 1, 2)) { + solver = BlitzML_new_sparse_logreg_solver() + dataset = BlitzML_new_sparse_dataset(x, y) + params = BlitzML_new_parameters(params) + BlitzML_sparse_linear_solver_compute_max_l1_penalty(solver, dataset, params) +} + +sigmoid = function(x) { + 1 / (1 + exp(-x)) +} + +fast_rank = function(...) { + if(require(data.table)) + data.table::frank(...) + else + rank(...) +} + +#' @export +auc = function (actual, predicted) { + actual = as.integer(actual > 0) + rprob = fast_rank(predicted) + n1 = sum(actual) + n0 = length(actual) - n1 + u = sum(rprob[actual == 1]) - n1 * (n1 + 1)/2 + exp(log(u) - log(n1) - log(n0)) +} diff --git a/r-package/R/zzz.R b/r-package/R/zzz.R new file mode 100644 index 0000000..03dd43d --- /dev/null +++ b/r-package/R/zzz.R @@ -0,0 +1,3 @@ +.onAttach = function(libname, pkgname) { + suppressPackageStartupMessages(fast_rank(c(1, 2, 3))) +} diff --git a/r-package/src/RcppExports.cpp b/r-package/src/RcppExports.cpp index a0b36cc..6fd79c2 100644 --- a/r-package/src/RcppExports.cpp +++ b/r-package/src/RcppExports.cpp @@ -52,7 +52,7 @@ BEGIN_RCPP END_RCPP } // BlitzML_solve_problem -void BlitzML_solve_problem(SEXP xptr_solver, SEXP xptr_dataset, SEXP xptr_params, Rcpp::NumericVector& result, Rcpp::String& status_buffer, const Rcpp::String& log_dir); +void BlitzML_solve_problem(SEXP xptr_solver, SEXP xptr_dataset, SEXP xptr_params, Rcpp::NumericVector& result, Rcpp::RawVector& status_buffer, const Rcpp::String& log_dir); RcppExport SEXP _blitzml_BlitzML_solve_problem(SEXP xptr_solverSEXP, SEXP xptr_datasetSEXP, SEXP xptr_paramsSEXP, SEXP resultSEXP, SEXP status_bufferSEXP, SEXP log_dirSEXP) { BEGIN_RCPP Rcpp::RNGScope rcpp_rngScope_gen; @@ -60,7 +60,7 @@ BEGIN_RCPP Rcpp::traits::input_parameter< SEXP >::type xptr_dataset(xptr_datasetSEXP); Rcpp::traits::input_parameter< SEXP >::type xptr_params(xptr_paramsSEXP); Rcpp::traits::input_parameter< Rcpp::NumericVector& >::type result(resultSEXP); - Rcpp::traits::input_parameter< Rcpp::String& >::type status_buffer(status_bufferSEXP); + Rcpp::traits::input_parameter< Rcpp::RawVector& >::type status_buffer(status_bufferSEXP); Rcpp::traits::input_parameter< const Rcpp::String& >::type log_dir(log_dirSEXP); BlitzML_solve_problem(xptr_solver, xptr_dataset, xptr_params, result, status_buffer, log_dir); return R_NilValue; diff --git a/r-package/src/wrap_sparse_solver.cpp b/r-package/src/wrap_sparse_solver.cpp index 2831d1f..631c1df 100644 --- a/r-package/src/wrap_sparse_solver.cpp +++ b/r-package/src/wrap_sparse_solver.cpp @@ -37,13 +37,13 @@ void BlitzML_solve_problem(SEXP xptr_solver, SEXP xptr_dataset, SEXP xptr_params, Rcpp::NumericVector &result, - Rcpp::String &status_buffer, + Rcpp::RawVector &status_buffer, const Rcpp::String &log_dir) { Rcpp::XPtr< BlitzML::SparseLogRegSolver > solver(xptr_solver); Rcpp::XPtr< BlitzML::SparseDataset > data(xptr_dataset); Rcpp::XPtr< BlitzML::Parameters > params(xptr_params); double *result_ptr = (double *)result.begin(); - char *buf = (char *)status_buffer.get_cstring(); + char *buf = (char *)status_buffer.begin(); // status = solver->solve(data, params, result_ptr, buf, log_dir.get_cstring()); } From f8d72609cb6135fcbe347063a7f2fbe9f4d0057e Mon Sep 17 00:00:00 2001 From: dselivanov Date: Tue, 15 May 2018 12:39:09 +0400 Subject: [PATCH 05/14] add example --- r-package/.Rbuildignore | 1 + r-package/R/solver.R | 74 +++++++++++++++------- r-package/README.md | 54 ++++++++++++++++ r-package/docs/img/1-cross-validation.png | Bin 0 -> 29105 bytes r-package/src/wrap_sparse_solver.cpp | 1 - 5 files changed, 105 insertions(+), 25 deletions(-) create mode 100644 r-package/README.md create mode 100644 r-package/docs/img/1-cross-validation.png diff --git a/r-package/.Rbuildignore b/r-package/.Rbuildignore index 91114bf..e8e0505 100644 --- a/r-package/.Rbuildignore +++ b/r-package/.Rbuildignore @@ -1,2 +1,3 @@ ^.*\.Rproj$ ^\.Rproj\.user$ +^docs/* diff --git a/r-package/R/solver.R b/r-package/R/solver.R index f6ecdf3..63cf494 100644 --- a/r-package/R/solver.R +++ b/r-package/R/solver.R @@ -1,8 +1,3 @@ -# loss_indices: -# 0 - squared -# 1 - huber -# 2 - logistic - #' @useDynLib "blitzml", .registration=TRUE #' @importFrom Rcpp sourceCpp #' @import Matrix @@ -10,14 +5,18 @@ #' @export LassoLogisticRegressionBlitzML = R6::R6Class( - "BlitzMLRegression", + "LassoLogisticRegressionBlitzML", public = list( initialize = function(loss = c("squared", "huber", "logistic"), lambda = "auto", n_lambda = 20, lambda_min_fraction = 0.0001, - lambda_max_fraction = 0.2, + lambda_max_fraction = 0.3, include_bias_term = TRUE, + tol = 1e-3, + max_time = 60, + max_iter = 10000, + use_working_sets = TRUE, log_dir = tempdir()) { if(loss != "logistic") stop("only 'logistic' loss supported at the moment") @@ -29,14 +28,21 @@ LassoLogisticRegressionBlitzML = R6::R6Class( private$n_lambda = n_lambda[[1]] private$lambda_min_fraction = lambda_min_fraction[[1]] private$lambda_max_fraction = lambda_max_fraction[[1]] - + private$tol = tol + private$max_iter = max_iter + private$use_working_sets= use_working_sets + private$max_time = max_time private$loss = match.arg(loss) - loss_index = match(private$loss, private$supported_losses) - 1L private$log_dir = log_dir + + # loss_index: + # 0 - squared + # 1 - huber + # 2 - logistic private$params = list(lambda = lambda, undefined_param = 0, include_bias_term = as.numeric(include_bias_term), - loss_index = loss_index) + loss_index = match(private$loss, private$supported_losses) - 1L) }, fit = function(x, y, ...) { stopifnot(is.numeric(y)) @@ -47,16 +53,23 @@ LassoLogisticRegressionBlitzML = R6::R6Class( logistic = BlitzML_new_sparse_logreg_solver(), stop(sprintf("solver for '%s' loss function is not supported yet", private$loss)) ) + # BlitzML requirement - positive classes encoded as 1 and negative as -1 + # FIXME this is for logistic solver only! + y = ifelse(y > 0, 1, -1) + + BlitzML_set_tolerance(solver, private$tol) + BlitzML_set_use_working_sets(solver, private$use_working_sets) + BlitzML_set_max_time(solver, private$max_time) + BlitzML_set_max_iterations(solver, private$max_iter) + num_features = ncol(x) num_examples = nrow(x) # FIXME # https://github.com/tbjohns/BlitzML/blob/93f80a83e206001817a0b1a0b81db6455ef44a87/python-package/_sparse_linear.py#L141 result_buffer = numeric(num_features + 1 + num_examples + 2) - status_buffer = raw(64) dataset = BlitzML_new_sparse_dataset(x, y) - # str(self$lambda_seq) if(is.null(self$lambda_seq)) { if(identical(private$params$lambda, "auto")) { self$lambda_seq = private$generate_lambda_seq(x, y) @@ -66,6 +79,7 @@ LassoLogisticRegressionBlitzML = R6::R6Class( } res = lapply(self$lambda_seq, function(lambda) { + status_buffer = raw(64) start = Sys.time() params = c(lambda = lambda, undefined_param = private$params$undefined_param, @@ -77,10 +91,16 @@ LassoLogisticRegressionBlitzML = R6::R6Class( coefs = result_buffer[1L:num_features] bias = result_buffer[[num_features + 1L]] + status = rawToChar(status_buffer) time_spent = difftime(Sys.time(), start, "sec") - futile.logger::flog.trace("solved in %f sec for lambda %f, %d non-zero coef (status: '%s')", - time_spent, lambda, sum(coefs != 0), rawToChar(status_buffer)) - # put bias followed coefficients + + if(status == 'Exceeded time limit') + flog.warn("haven't converged for lambda %f - exceeded time limit of %f sec", lambda, private$max_time) + + flog.trace("solved in %f sec for lambda %f, %d non-zero coef (status: '%s')", + time_spent, lambda, sum(coefs != 0), status) + + # put bias followed coefficients and convert to sparse matrix (actually vector) as(c(bias, coefs), "CsparseMatrix") }) res = do.call(cbind, res) @@ -93,14 +113,15 @@ LassoLogisticRegressionBlitzML = R6::R6Class( invisible(self$coef) }, predict = function(x, ...) { + # add first dummy column of ones in order to add biases via matrix multiplication x = cbind(rep(1, nrow(x)), x) - sigmoid(x %*% self$coef) + as.matrix(sigmoid(x %*% self$coef)) }, cross_validate = function(x, y, fold_id = NULL, n_folds = 4, callbacks = list(auc = blitzml::auc), - cores = parallel::detectCores(), + cores = getOption("mc.cores", parallel::detectCores()), ...) { if(is.null(self$lambda_seq) && identical(private$params$lambda, "auto")) { @@ -124,8 +145,8 @@ LassoLogisticRegressionBlitzML = R6::R6Class( x_train = x[i, , drop = FALSE] y_train = y[i] - x_test = x[-i, , drop = FALSE] - y_test = y[-i] + x_test = x[!i, , drop = FALSE] + y_test = y[!i] coef = self$fit(x_train, y_train) preds = self$predict(x_test) @@ -136,11 +157,12 @@ LassoLogisticRegressionBlitzML = R6::R6Class( }, 0.0) }) ret = data.frame(fold = fld, scores, list(lambda = self$lambda_seq)) - attr(ret, "coef") = coef + # attr(ret, "coef") = coef ret }, mc.cores = cores, ...) ret = do.call(rbind, res) - attr(ret, "coef") = lapply(res, function(x) attr(x, "coef")) + class(ret) = c("cv.LassoLogisticRegressionBlitzML", class(ret)) + # attr(ret, "coef") = lapply(res, function(x) attr(x, "coef")) ret }, coef = NULL, @@ -150,8 +172,8 @@ LassoLogisticRegressionBlitzML = R6::R6Class( generate_lambda_seq = function(x, y) { lambda_max = find_max_lambda(x, y, params = c(0, 0, private$params$include_bias_term, private$params$loss_index)) flog.info("found max lambda: %f", lambda_max) - 10**seq(log10(private$lambda_max_fraction * lambda_max), - log10(private$lambda_min_fraction * lambda_max), + 10**seq(log10(private$lambda_min_fraction * lambda_max), + log10(private$lambda_max_fraction * lambda_max), length.out = private$n_lambda) }, @@ -161,6 +183,10 @@ LassoLogisticRegressionBlitzML = R6::R6Class( log_dir = NULL, lambda_min_fraction = NULL, lambda_max_fraction = NULL, - n_lambda = NULL + n_lambda = NULL, + tol = NULL, + max_iter = NULL, + max_time = NULL, + use_working_sets = NULL ) ) diff --git a/r-package/README.md b/r-package/README.md new file mode 100644 index 0000000..16051e7 --- /dev/null +++ b/r-package/README.md @@ -0,0 +1,54 @@ +# Work in progress + +## Installation + +```bash +git clone https://github.com/dselivanov/BlitzML.git +R CMD INSTALL r-package +``` + +- At the moment package supports only logistic regressionon on sparse matrices. + +## Example + +Here is exmple on sentiment classification. We perform 4-fold cross-validation and track AUC: +```r +library(blitzml) +library(text2vec) +library(futile.logger) +flog.threshold(WARN, "blitzml") + +it = itoken(movie_review$review, tolower, word_tokenizer) +dtm = create_dtm(it, hash_vectorizer(2**14)) + +system.time({ + set.seed(1) + model = LassoLogisticRegressionBlitzML$new(loss = "logistic", lambda = "auto", + tol = 1e-2, + use_working_sets = F) + cv_stat = model$cross_validate(dtm, movie_review$sentiment, n_folds = 8, cores = 1) +}) +#INFO [2018-05-15 12:31:26] found max lambda: 1699.562800 +# user system elapsed +# 4.956 0.221 5.212 +``` + +And visialization of lambda path: +```r +library(data.table) +library(ggplot2) + +setDT(cv_stat) +cv_stat = cv_stat[, .(cvm = mean(auc), sd = sd(auc)), keyby = .(lambda)] + +ggplot(data = cv_stat, aes(x = lambda, y = cvm, col = 'red')) + + geom_point(size = 2) + + geom_line(size = 1) + + ylab("auc") + + scale_color_discrete(guide=FALSE) + + geom_errorbar(aes(ymin = cvm - sd, ymax = cvm + sd) , width=.01, alpha = 0.3) + + scale_x_log10() + + theme_bw() +``` + +![cv](docs/img/1-cross-validation.png) diff --git a/r-package/docs/img/1-cross-validation.png b/r-package/docs/img/1-cross-validation.png new file mode 100644 index 0000000000000000000000000000000000000000..4810c5292006c86514a48bd2fde0beaee12c2397 GIT binary patch literal 29105 zcmZ_0by!qixCRPHBZ^YeDAFY$ARr|o9nw80C9NQxBO(&gH8e_hhr|HVFrYLH-Q5fg zch8{b{LXXl{l^D*_TFo)ufO;GmVv6uGKBb)_-JTogmSVk)zHvx(W9ZEhvQ-cf6-#Z ze2RvKNnt4^rD`oDBV})6@1Xwvjft6znVp$~rHPu%3pBK6-@a)YzN6M85vy~wV0hf8 zZu8XtV~#i>BVm-*)AWj;K2uWycCm5-6OUF+5!-$7U475^>Mqk&q*Xn6))Pruq1C?h zEJt_YVinvm_>2=n^ekc(Cw{7{!C4y+5k83J&(dkLZO?<`ofMDlT8ka5)!ICr*a#AI zvVGx zzyZfR=huG0m2UOsvHZNKwxnj~)@pve{e2P4rof;2340w5=eFu8`zuT9{0TiCZZ9v+ z>fK}!UJj{3TWg-H2cKg(aV|3BqWwr){l5C7zmi0(%wLsG$`jq9u%cX&^Wlp8aYvtf z!O%8qwO`>jx!@3;0@=nHSS{`PWqGiJZ?P_R{_Bu$DifTw>l>%9+xo~oQ$76!^1(ih zgyncxEb&hQ8t|2LZrw9wk-(lI8+(`QcL$r7q>zLn2$XJ4PPNkXd|$W%0&8w;iN5OW z1o@rT^i{B#28ewkE2F#!Eg%~-+spSiFd-KA<`BdXXQH6LKknCW#4Cr0_`r+)$Svr( z5+aJajM#MNY)sxp+UU99$Yp&9pITO;^psFVU7GPVADg0q`e*^iovJSkJ*?+gh7A|( zpi+Z14#C$|^AyQHwfjy%ryCc%2=)uBj9H&W?Y(M;4!z#GGR0q;Had0 zAZ>wdGzTB{7Parb@{a9-tP~#_KG(J9y7Q_a4(rV7REt9S+slGOzr%nEjs|8gmdo}A zm{+$K%&X6<*$^W_q$hcyagOUfog=6nVRb;MH{(I(_MqBgE!Mg~{vm@6DJ_{pi@@CL zH@y4Iw{cBgydDF`7H*uArE;f;(%dnjTh2Z=XynpD4?Hi$I8?gQ)YMpeAI(amWv}$C zQIa_1v6N{@w1CEOjrKwMqZpMXHA^CO5q09+(jm#a0p(fFX|@>W7c|4Uc4cbo118g6 z7`lVPb{m8+QWu4RsJE2G0&KAf>#JXL9(?Ik41K2_Fk!1lg|#z%7;yNGA;+qe8TB@h!EdzobG&Bh`xtEd}uIQUnIKD~JQyqIV zf&`)()9)~k_O)+}7z>h_KZf=+N^DP~08X=$uy&($;kMCpP()*(Q`$rWU9YoQc zhKorgfrkF?pC)2@w7GBp`w;Mx8Sp1oDt@eo|NSdE?#}9h)DJk2iyBJPPIjGT845!CB`kE)zZ$HsR<58+#sD|)#)zOIl#>qn1&WUt+?vP=+j|WQ>RPn>`Gm0@7Xcp;IydO5b!=LpXI8s9pA=&8={nMW3-fWT21PIIxn3qi z@T)t2PWYUZ7ET71KwZa8eb5o74~jH%`I=YK)b__6qPjAUo4oIA*3xUf^6i&2n{>>a z07cuDrQT9o!aSiSF}H_8$95v}<(=;NE;i;A=B3w<@S~r%mkQd{Eo=2QI9|_=IZB|Z zDUKH{wXLLC8?{Y~91C?$k&F23vnv8$e50H>!A|Z8_qa0TirHU3SaK|JXQ&JC3e(Hi z%uKi|u}NOOyu7Sg88Q0vq4CT!L>!q-9auX!1nutT2 zQ2P~)m8IqDr<7*5Wk1N)#SoG`( zJR0~BE|D(@dIZP+dHV@EMtDoo7{Py+38Dvth554VKMsQiZo$Nrz;}H6|Gi1DJJEZX ze4W)dxh_-9ryYhh65I7|YE0^+N9p(fI-@Ezu+sdqNCJ#X?vnTEYW?Ebz1gW+()zqR z4cUkJ#Tqhbk&2y$#u(qtdh%Mn$lrLBY7cO_N>4OmPS92o(X)Fsp{Ic z7303__!#7L5$NE49KmOtmIU@=%ap9W+33(W38#wZSng%GwbWQ6GUB&p^w zkH6=*iGHN_(r_|lHzDJo&ScR#+1d!ge^C+G#SyE-UD=4f{Qe?s<2s2v`pm7$xm3kC z`{X%?5uG3SRvMz3@5#9%MJ$Svt}bB}dAIVi-wpvXxiO!2M1VSlpZ^i#GkoXX^|scW z#(y0*Fb`LVR(tvKcoFWYfDz6n%-N&e1~ZLz-KCRPaW}Fn&;)P2Vm-X1)7W~< zw!QJR<>9$114ZbO@?DpQ#FX{pk*K3`D@GElw%yD^13X!#E@UR%x~y>~LcVd&|Sn)ArI6(FeB!+4sC% zTB$X0EOyzQwCsT+T3dZwlJUTb&g?J&((5V0{V*!-8C9+iHc`uWP{IU%!U8-i>utw2 z{>i9O2Z6*-kLMl3)xK2rn&#^onc`Qnc!Ps~-Z&W64ra4^pWD(j8gzd<)iFDLasNZ5 zkxS);z8&X`o^>KzZ+X9ff9dnDo}D(6zgk+A036^JiNHf&2}?`^D%swo^uqwU1m6WI zGqE#S3P;_jUPOX?RwG13Y|pM|6i)H{Wh1}Rld>v2`OOAG#gPiIoayP5YX~ZvN=Ok9 z`93{d%ihC{)@m}WA0v2b(-grYN?jEvRjN~s@ARcOf3^t3*O6dscIw+EW_LVE_J?f9 zwYd0j(&KDtWBP{yZLNnRp4L7nOwN4%(~Bq(i>d(x;w+z?dj2-$5LtM|QS6uzF>%_n zdl*rqX|A7;%l;+;41w@Ng5_h3O!D_I`mNqWOL-qt4*|uy0c%qwpib?$&5xZ|go2Uw zzX+G$#-Hb1W>`nD*=k9i_+%+gE3JgZn9#-XoC-|8++*jm)_%KNhr@1+oo*p^Z2+N2 zHQN!t=BOi6m{bgkzj+=}eLUQNw%DN)ziXvS$T>Xd@3!S1+zp+O zOt<&R-fRvi)Oep?Sa~=!Na{T+Mr`WIbhKUgd-|NMVn?t+_c z;-q+!O_Na$4K|T@@65%%)a;6eN&-dfk7FkH>)b!_(kXTcCmKIKn(<*j+F>602qWA- z+Nx9AUUFOKGW>pcuD*DiIwMB3Tm6>2B+?Oc_yLd56EPVg2j9nS-GOR%bWR% zdh_RQ-O(`LuwCRbWV_BvEOfqB+n!C3BT49^{c)IR-&s9uzj)%P@7#B1S-|yO> zDFb)iri$hukN4!sk7_b)#8d2r6^NE~4+%)=T*S_>TVZ6%KklFl7x4jmUh0gM-@l3u z1Po{BI}nmNQ~1|IB<^9|SGiD-J0O6@Z7xYMy_daQk;}}kXNT54?nXF_ffw=>lvBmV zQwSYH8j`x9&0m()zOSy|vHm)`m1%P2^u?okiE0yzsocDaA1pU<7+b^iY;W7vxa{Y2 zhQX`lq;Xo@MD1Gc*9UpV?v~D*$&S)W_gE5qt+79O#9jBs zr?;kFkG*Lua^p0!qhUJmpRnK(-{<;4`GzaIOtNFPnP$7Yy$`Ftsr3-QYZ7DPA@qm! zr@Nz=51sGz8x+f}7YaTeBp!al8<)o^vWJP~JSN$k>=M&oFqE>Rgh;3mhYN&=e0 zpI85ALg1=CkOf{0BGp&)*1*O%6e zzgZuQ_sV&1gt=>}S@b5$3!&NdB+z0nNFa@h$u#j5XO+fTa`nN2++?j|bkd2h z%K=348f@sy-@qW!J(|sXYZzMaB$ z9DfXb8rIij+G8K5s3PBqJ!tNu=7jrF<+SqtwWmg|MXJEyE|=#ufw~3r z(Wxm^HNn3}&%RE&`So%zbcQGq@}a&*bCVK#Io6B%X&B6m4BaDjYP&(J>9*u)nU;IF z=dxv+eIrlZ@X)X1VJmMls9yT7&?lhB<%3^}KFZ3aPAG(^U3(9l50Bi*If758Pc>?l zS`Q8mX0z_iiZ3dsa#?)iCpqhLS)wa_EqB`nZdTM&4Z1&Nw*1q}V@K2|Ytgaay}1KF zc%^dqyVHd~`w+8DN~ZQ$Spsu<3FOYO;+)k;T*=_~Gn?=?IJod5ripDDzIU#trrAFY zohSc962@;Wi6Wd1Qfz;&s(lJuO3zy?z2v~DnjFCVL9jIA8SGhTv-9d*>SeutyCS9W zj0LM>dBZ~^UY~565=;9J;jfMQzb2a%ghRUavG#7NW-u*2k?$eelS_pDsRFH5 zXZw|u{ncD`H5W7At%%G0W3#lkeo-;0aQb$;0}DSYs>S$UYEee3B{i+0Osc%%28)zv#=$?X}OO0<$Zy;)Gu^X~k1-uK66$(tCLkT+z&g&zK;@jGT_TGtv zE$(^}rBf{&i2%ib#KM`ijp0#y0@U4AmeQNxOP9uJ{g+SQ-i0xV3WT-R$_OkwU22d$S z^QyxpDycV5(7Ecm@rI`x?8g%D>Gr++?_8zm3NCsPhdgJ1i?AJ}97W;@+j0jr2rchJ zeQWF4OAd<6&t~T;M|G@Fb%ClQkf4X1yEo%Z_=;Mmdw-A)boXn!e$Z4=L<>LZNzR}6 zacRv&gr6{LntQ7GoIGt2gA9VkNZwgc0=xP&O*ZCAFa+TiV`mn5_3)}goTVg2ccHn8 zKQ-2)e)1P7w3Yk%lhFIq?EKCi?E83w48yO3Kprt_0%&pxV7O69sX^MCeyQ51y@SLf zZks0&ym7-7@zY1Rj(T~Zysb2g^WYld(D>m_!GfUKUq8)OQ-4Zk$pDfNOq2h9;6x-(;DYgzksS&8(n&b_M@o`TraRj&K%oJ7Lj z(!S#&RDhZ?5wU2MiRvgRIV2>MgS22KJGB%tbGCeNzJuii(t>Lfc^huZr*Z!N&`X+P z0SZ2?$7YSxXJ2kd^kd=@#+mnNw0rIy+t&wY}499yZ6fWnD@BIZF>ri)`?9cAnuqNSl!J7j_`s}QH5KZ$Y2c(3W$V$7ovJu zLBLcm(Nb@|1OF7XDP{}DzxYlj2>s3s+q9gnbQB=s0lnX91)}BMEO(%*0zD6P3BFzv z;Ctt#&u3&x&iB9B*rnHBMeFOV{P-@6QE}pB27WMFi5|%+m?u-+}iW92sPU<#M#p5?Rt^7+n1sEdM1vel=1fWjNO$+`Q;?7YKemAsqlxpQeIfbqAkw+6Hm?xXUim0sH6Oodf~#ZgQ(Sn|Xp zfNg=$)Oiv9{?{;_#&^-p*}1HqB8~Yu9JB1{LkN0iv;8(Vw6>`30jprtXb@;eq)+IH zgFdH-_eU|@eMA}5G!ayh*vAhwz+r4ft;(LQ}SHzsj zp;e`u)>9`wjb_a2O2__*cY6qDiTdD|-r~xFh!z|iFaiS+5Ywck?;!}6igQ1ewwDLl zFN;&z#7Ey1(-NLkmWbdDeuOOm_srCG4Z7*Ow9;(QJ3r$QA0puP)<3=p3>=Muimi(q zw0?=i@9#w_aQ)dFFK@UkYCN;Dhw_gaL>*}QVK|#-cCIcc+V^VgjJ{44I*w8F=Brab ztl+CB{o%zR5ILAQ+`#T8#!@GMxrug>rd$7en%DfHV%^xAeV^-?^V1a?!KM(Ya?z4U zXlr8ug(%lw(#7+dsIYWDTg!Ip7d?*O%MhFLI-dG@&?n+FqH9t39-l0M!Y9Rjo%M2o z?t0f<3}adEB9NGQ{bT`VweT3#l&Um&pH>sx>+Y>uDcn$JA_qaQ%wG@Mv({{kR%|YU zyxlBE_LmZ;`=TM+f;H!~nmow|`Qwl%*<+wZBPDd#PXY2_M;+fssry*_Goz9t^7C6j2-p%n-9W6rv>Fi z*dcP9Sw#Zxx!uZ^mTbD85J4BWSk=(m&ZtRpg~x3+Lky(8u-Wx%!_PBvPK%lN>bIH zj)@ScOz8LA=6elEc$G^9Sj>9%jI$cp3O0bRhJ{uGsMD>>peWa>~PPs#&CfyHD$h`?#Uc`hn81hvUc za(x>kpa5%{ad=$!wxO~s{#Xf-`qLp=kIIZ>!BKT6_N@2klQ)j}rmEKq*Xlc^D_#6P z)mt7N{65z}Lxb59dTBFGf4yB7?Y%A@M0aACIXaUjgV}b%lOaPoJDYb>UG+!B?@}^I zLl>NPewm>JW)O$YGp&cKPO*B*IdQY*u83A^C>{l$ zxuW-|Ntmcs2eqT()#+;)noRx?9fs?AwY$8_!5M@So|>xUb!4*c&7S4HQ@`!*zSwz3 zIlVpc(^iyT^NoqS#DMujiMkpPe+c>qC9itj5@FYwG0@LVvp_!>+bV)jW8bv#W!6ex z#LQSc=(fC(K_P%KTwRc6{V0lYkNc6btyIl+I^+}Mc0_vGJG0re4HD!N!;GmpP#L$Z zQDVGU#>vT=nW2a38j5~geY5ZMfzb`V@ut67g73rUt%o4nvg?sCBMSFDErsD2niJ}e z()lOn6FW+dD}9t?+DElV;q1bdx+saQcb$iwZ^$ zi~xuext4Nq10unB!jk#h1D+O-H5~C1u=2CH&oM51GY8s9xXw!}TX$G~eX*fQa&}^u z`m+`MM661zR{xv^A3iX+;FuZDKU{vTzgE9w#JzFpz%}luH8KDvH&>ciAG&JWjD4FN z(6`@j-#^UYlIoLQN*4`-Vnb;76yBUrd9d4FwV(IuNjWhFA&@}#5z7VZziBz7R&Ze3Fxic=`^g6^)6?MGMtUlpa#FV3|6PCE#UY) z+~3^YaZvvZ1MmUI-Ug6%r3(cYN8GD#luO@MP7ViE8m!6bJa;8+p_d4whE1miXDg#b z=gAK;4d>f+)0K^g(+s(&dRqh!9x97FV`9KVGzQzCuUEsRU=pWQYePz5p!pq_{H4vo z828J{$fxa=LET4*Gb8wGoUC$$og~QjjGZvd*3e5$#5Ob4f5?O(xXg51rktEk<^nvN z%GD1-X+%a6x2%$#M*du#?-Z-6hz?WPJZm+;-!D(Sv~jYXbwWV_Y$Wd$iHx;%?{5Cg z*Y#T|+O%5FN&%4JoMDdJ>od#w_;gn$<43_ku?Y#VO?`8Zufh;^0$A2AsoSPgHCU-E^=PX2OF(UWrXtq4Q$E|TR0 z$oagWL4Is-2kYe;b0%BL4IrC`#4lx;o0Em4C>-;@MC+P1$|mG;H^fBdr@h!FFOTK{ z*~krZ&OR0FfgTAsU=tQ?2dUNlBq|v+mI)Pp_2;0YtNM-evJ||eH~N@_%CX8NH&(yP zNqPMR_ShsLwEJChnS?{!n40L0QQOm$E?t`k~& zYEZ#=y^_l6)+KG#aC+PwFO7Xu=2vn8^nh)uxYdj_dN#;sP-F-iEE8krQp}w7aXg9- z0AouUg7_@t%<&C<;PR`YoI};idO<1Pv1;5(S{sHgvIom+fwspE+6FJ?q_n(a5$ zcdgb%3-5L?AUO~cZC~2(dG|;p(E(WZTnPq)On3Eig0#I0fU1yEy9CZNjdd2=*m3D9 zUMwkBCRt-dSI@AyLrlLSt0Gl*pdcThQpZ6448>qV`!J#-<-DoO8fmB``UWFByn(|S z{`&OMX0KmioZ%BmFD@ zfUa_@?7NEs4hqnmte5-2R&ERYSjU%H1A|!!mY&)1($WS(Fr}a*_n#LVMJrn!KB$T~ zPX;*Bx)AvdOJdBAfzJ1=!SW&#p9aCur6{zWX{y#0z{GYilBmWzH)X)5BkDXYWRO)! zsbYjAJ=h)sv>TpN_e*4Dx?1JCaLisYQ$1v=tu8otT*St>N$-rJwk0%yc2i?`EsYmd zl=4ysFY|H;My)Nr6F})IT0bocI<{1Vy>!El1FZ42 z`v&_(Imq;GAJ!xDBv~#-IuDIPZ38Bk@v=##8{mnJ1vrnSXAp|eVF{IZKZFQMkS6f+ z*!j_f5Mgs@3B;$pXaKM!J}Ir9v4a>0KOgiP4r;40NYpe$Bol>XlIS*xd)B-F%_uZa z9jhAGy~g)hsSCPTH|^qxeHUf@DJ+(tUws`W7-YaZj@sWxX&`E_>um<33`HFS906w) zUd9me!?xI%8kjc>jd9&fhwAuHXyc}Q;?%mkV8ekFf77Begd*PU+5+1^QNZt)Z&r?07ZK!tP zD2PFjw*TMOKLA?~+K{tDJK))h{(_D*z-Q47q>%OH2Y0T&FpyLRlND8a zYjG!QUECF9^&X{h;Dcb3WOgB!-9sMOUOp7HD(U^*;_^_>L|d*=1FhkyMXDzfe;oVN zjRR;7bRYjU`07l?e^xt1ym|#%-Hz#ajd~k5TCoX9s8x=x?bQ%?oN6q7CMFeLJU{)J zHRG<%NuHA3TL3w=eU7rJ)t`W@Mq$rTe1DoG<_jF!jLu@^*j`_UL<3^aG5W_x%T$Dq zB=`VK7d(&5Dl^g4D13Kami~!@h;)Ymy~Xb|oBUog9fU(ctI2i253Kps@=29qyU-yo&N!@vsph&fGQH6V8FV}$DFJaJh3gHN-ko@<*`7@}M7 z#QEw$CT!mSW)s7}{tKqRir(PwI+FAi$6`TA&R>QmH*H;3`qgbF{pA~;y7^!Jr2vfp ze{R6j|ND61e~o-Q@oWCkM1G9xtXQpi2<}apX9AY!NF2ZEp3yM!iFXNUtUiSk44>6) z!~hJt{c-E%G27*z05$b;UiIlHASq*bcuA#CIfD$H>q7x!efk;DhE$R{B`P+mvY|6i zWOT}9v+Y&a#dsdRDULs+Dz*sb6MeXSaVoFaAh#XTPkPlnmzjrsiVRu&3{8wW{Em4! zEGQ4I#D8Wk;eHYj!2YrKL$Vgn=_0W_^w=)D_yFbZEExPVw<49Z`DZSEzGN%%ZSWQ{ zTdwM;swpz;f;Cp#>eakYr|wwmPT$mj2{z^JT1oqwVhCf4>T(U>(5vWF6xO^2h1iy) ze`-#nK4{bxstwGDjZcZ_4Nqr>Y8>^SQfnYcmfA@;b<1A%#B%1_m{Hbz6vOdP_B_Aj zIi|@fXRs1O+dIFcd}1tNv1Q^Rh_y*qhebeU3|E4_r6vn*tDpUmTI)U8*e@4zuSD{h zj|rQJy@Jx?`w4>fDQAeS4X@H-10{U+asE`e8RB3<5^cUy^C_if4fZu`-cfbzlzZ3+ zK`@mh=m$%`DW2sl zSdBqSV>!1@_^Xg&C43Ap84}EiS*WxCaL}PxfmiE`HVI$)&p>COK*z@e2BYlcsAWx@o>nI4pd z0j~BEy;S_eL?q3fq>GI#%O&1yRzo8U zWj-UnJX8mKxgqt5dxF<3;8LPT0-Pm{nwdEFx^ouXd%zIg+KP)1}rJ(^x z`TGS4(kg%euZ1iozON)PTQo`7`_&nP^|)yB4-YMgkhbT<2$U?^*-`DNbUIY#Yya&{ zP>S`Z@N}tim`>TtQUxxxvrE>>Nse{o=L!IZvkU`AJ`4e8rEGg2X>0d41L0h_W?Atb#~y&1Zo9T{23%f$>nL%T1< z_8>kc(-2Pp6ciodP#be!T9R)#d>eN#kA_t51fbr$tq@f@)aic`y>~xSYbrRJgTMP> zSJ`1{r*#-ozBKc(d~Q|9sA4?#LF`*y>d2Lxft9RQ3|=FDK?7rxynpdf+S{oNLu51F-s?ieM`J!c z@N@-b%T`EIAhwUp33)oAce3amJK85Fsq7`^qX%0m7nj9B*IU~cjB)j%yx!)~P82FF(t#rbWKYQ*;6IOj;V*pAi zAl(ifjAz=Se2tKC7UXkFx}TOI+g`kQYJ0NQh$u2eSQBD77@cy))9{G!C^W4US0h^@ zO*mh=@W!?!(nK(>Do3>rlMZ@~4};Ba5xM+%(Z)s1;QxY2L?-ZjM{}3zoaJI6)tT8N z6gfEJ&H=QKVR&6gYXEF#{XUR`*RdXk)*eS1vNjAW_QNR;QkYU*4fcE{L549|0zlpS z1UMC;W1i(r)?~m#q8G+-#ZD88izZ<~!-FDAb6CI(ZaQGYcpthbZ@wIa&tW*=wzn42 z`rSYId>0=BD7ye*npT+pomw)(N|N-0w|jq74SV?lKS0D4j4UZ7ECExI-%!{_$plwJ zk-lcIFF7t*j@^3$<}kR{cRWXmbOB6YPmB_b3dmCcZYzn*2uj)pF@^F&9BK~&<-y#{ zO^TwI;+4uLg_9#ARykZwNLNA52!<0YA*(fXOiJ~p(d^;``$naRAEB>T_0RLy_Ss3V z)*{Sb8Fk!}gPwjt7PG*#&^Nr7`S&E!Hi0$Gn6o8r*6e1Vjpvu}+$dzOQ%z5%VXs*c zBspE`QhTzh`Iyf(@2yF{w)sf!zDBPBVu{O$qdGJ5`O<4Z3>qUl^2^ED)+M_c2i!1-&w zbQo9nsB$fPu^h1Pb_u*&Q)}a*DIQ|yj;jDG4~0mtjjB*SNy}IY7Xntz23B=SA-D3s zSydO))B3hL&|@?njr7OHDdrs|;K^T|2M)yS9CY!-`HWKmB^#y6Y)|f!?%xPj^{jVi zhijBV4jI^lltV?W5IzelfN@~Aap|S6rE!`3;BWgJHJ1bkc^(hh_#PHG4!chcm-!7W z{S2D%PUJmUVZUgQ3f3kPLOSBB|6lGT;4WzHu_u7xxv!=N?8D80u^ef0tx$ljSpHkr z0;&t*o_+Z!D9JNnzIy7Bh-k4A0Czofxq7}E;C-$+w}*^03xW%-n3H%w<{A&oZqVn6+3&E{GpwKee)ObjdF=tz9IX3-m_}Z&2C=saE{V2r(PsCy__uL>odnGK zAy)Umt+yS{hy0^)%W8Dxyc_jySqjxrrrdQucN)*@jUr=Q00Ormi9JqW+SO$ILN4=O zQ`J+TB6UJ+jvJ0AdnwR2t)yY*81_08onoa7@u8>k971?#a$j7-CSBD}VW0f}h+K1Y z+*a)O(5AC=h&SDY8&wNV$1b?rcT;I(Msj#z-pQj%{^E&g&l^yIHdrtHt+PiPxv< zT&{%f1~LDBOZL^u%8JeC%i<6tI3qc*y*FlNax!U0bW;s`(|`n+b(em#>CA+f3D)6Q zzT_;sjtzIGH>MsgQ(UHlweV3?`k7{}I@#A~^_7|JbLylmQx!*rUe{L4eQh~dd#7@~ z6MT9;l6m$qTIH5ak14O7`NftC5GOf6QsD^@XrU)Yj#?lOb$%MO@HlQdkzD#^uq&C{mT74SytGYdd*t(8SM|_V_-Y zjfe917WHOakpWr@d9fWI;zU9WN0e#vHTH-&xi2Q9tWqWxVy3Xf71%j5?88?*rgq@t zl-E){$uf%QoEYvYZjir6{28-~YIsXH6PD(ZT`SjEG?EQx9N7jV2n@N<@B)5S!ow+x zg|2ByoG}RSB(!K!NVNK_j>{Uz>>G|^#V!xkixrleap(@+pKXE>bYvp@_FUu5E6%^q z$n=jn`ecv%2x(-^8 zf>&X*g>=^^9eQP^fp^3&-iP%n!#w5#nU_z+fR<>SmYD0ZD{A*|#t0&Kwmh@H3Ryv7 z6XQR}Quamcv@h&k63wI@biCdpeu+ht^AKES@X1W{&a+vxt-77Y>BFBrAl}L;XLw;r zUw|QVtMaJA-wAji0ln{C5|4;d*1QO-W;+`4sqbc_^Zu>R{`mRXGocWboFd{^wnRB< z;Ic#C#ROD`#A;mt3s6w-4oy?GK1V~PwnUUKbEc}M3R zwd$^9aTQMTZ<-SYDay#B3IpF=ll8yJ^teM1UjQkY^e0$MVEq`Je6dknoCC=k?c$`j zA$uNT=p3jP#Y|eqvM+@O76#VH^^1}vzRCAGg}%B<`g313KB2Y7Zc*_*uG5mW^Z!`x zYLu@lDY`c%uC34S3`CiBU_Hx%110O5M5oaN-DASvtcgm>%8JB$*5Hv?`Jyl4I-jB3 zY^?Id_F4J7Pi<7?3;8VNcHE&9AX985^W97l`AQ7qKT8jP1bwaxF5ZbLHrY=NW4Wr^ zSWGZNAy*P;EgmzP_gY$JN<9lxvu`ZjH%|fTvScL0wHBDE zd$W>8l&{a0(_<)ffen%i9^7dh+_Sa;g8xrqKs9yh@_sj}0c^AngUC^c44F7((t6Op z)=km{l9`viOLD>IM3vtXY0MZ*C%c4HBbIl>;8HJM0Jj#1Se~IlgdDR%pYO{~d7p35 z_LiCTR9u~HShsw}ljkXRk*e2y_xMI)Q>5>yWg>{ZA)=Gdjf9ZUu3f#;5)2@Lz``WJ zcZ5zCqxfOHYNY`2?{WLjI}L3^re2CKN+ogMk6n zK`1oojkWAtiS|qL2@O8=5q<~Q#|EHd&9Mi6%9;ZZ1^3O;4%CnXjDPg-rKecx9g)o^ zNZbNo+v4tsoy?xaes{VN7X5D%^lWW==}=K4U`8vdyq(ekcKmfLbAeHW=@MK53k;&izYm^YYy5Ppe(bN|Bt zk0soGKz|=?e!padnn(0uH?V495 z-@pCUn(AA0W+K(aIOZOUM#r&5F1?&Fd+sRf5jqCoih>s|i6`C&U;pzThYpU8@HH2Z z+Y+fEFyeCtv-eh~qpbLM8H$YyVEot})+vI%iK=Y3_&Zqiscx>blU^P{9C}-yAx+8_ z{OMLWXVutuLj}Niv@0trR_BAcM___8liN=JtMjRQyM)y7gfH5|V)RV972Q1^Duug83$P4eC zlc7a9`HQ7KyQMZsz-mM>u#SBdQ=aSQYOsME&&Te_(}{c63zFJjVWFB()JLT6&EG~}`0XZZoZ;jD z5&|yJ=hpPtIjq36=cl1=3J%ng%=CMcf@R>#c zHD?vlGPY|L?hw*G*V!m24;=ydc zXmr=pdTun_XsNU`wAMRf)qF;VAOA>C00UR`DdFCiXKSs*DnqQTZrbL_X7O7q%9`(QOh`bxfkegyp{_#KhvBN!4Y$6gpeG1`bnf|jTXrz6C z(65Ui4gj3777HI=!e+M=c}4W%p#K;<-M$t;?XS|0aEE*)njKfC-~S>!LsUbzIE7t%c{*JE`aJ-MrDAP6>Dh<3 zgimAG{>6a?Jj|*oWndKGduyuwaYAg(wUN**9JU(x_R)yOgrCF|E(x`72qnL~GiUhA zM{l@lH#4JPb0ASaVaUl#nH-)WOCn{vk z+?L0%He(*YX8FuOq#3QM&1p;C&N)^n@oj0mr}+^`%?!BXs?jd0ZPgt@T#`F{k&t|@ zxM;M{=!sE@5PGk2lKBy$`*<9ae!G`K&zy|yqqkdGQWdKQybA&?59%W}`0gy`4wA7@=ifIBba_hF3f zg;1#E+HrPtDK-g!j`RSi97l2UzpHvV3gBIp&}g5HVsp5;IYo$}lRvNd7bFH64FXob zSdK=`Jpg8AMpnXSAnon_-E+e}oO8T@@c#LhSvQBC#&UuxH3o3Wcb67LDFV*^h3I|n zZ8cP=GyOVhG+GcjeU=y;b8;LdFLqoYD9~yMJHsMGmu9%fQYryc`KAAst_(F83%o0Q zfA?rDe>K-4$87R+U?8*G^9te;ML`t_Oa((urG`C!DLM2;R|CnGy7nxHUQq04g@ zgRS>3c?S<8ffVr+fZ+anB2?Kjcrw#j-Ib;GTM^&NqJh`i-_^3~ZCg=;d=X7F(0zfw z6Fecb|0CB$^e=@;Hu0!$3nV)g>E83o0#ZV^u8Uz?yCHpL1q&a5$n4Y@MqG>hBujWf zb1#P#-Ce?J{o`xhe*y8QL=!jj{RkgeC5h17OM6Zt{a5Sa*IN5sKBNuLcweUd0RT+O zOB#uDRO>|~LC;Mz=D_an?dp2vb`dJhdejF;Ve{Y=z|amS!>Rw3gQ1u&&b<`ChR|5s z2wn0FAWcO>MC(HJWYvjs3IN%dU9zGk8-R8zPnG_{N2aTK?kg};&8fMe35}r&)_kLs zdmp2c8352dRzfII6Cyw+3dBk>R*e89zs*{wnT?4CD4fb^*9L<@24wq@g1jNo8}{Sx zUelDZJiX4@90pj;6QtxS&-y{3Q<9zyV0gC)DWez+_DNA4TD-@sBRkg7VVA?W(8@vbo$RQs?VxTEbAouvU z(_28jbCq(zPz|ek4$;^c)@CbzFL_J;w<)GO@~)Jll*2#&5hgFm&yoohkpxKmJ-u}< z9pg{ocn(TWkVh&4w)qhj-0<&6l31HoYXHCy1LS{9UOJzQlfvq!IoPmQY3G13O;s<* z{?+WeC+IV?%|R)yk&DM3oGPNnA@{?c4;kY>CI5eksTuPk(I#*Ao;a*p7|A^Y6N)Gf zr6~_kzSh>S{QoIKMZmKu9;_uRf@N?+?LugsGK7L}Vygoi;9|IeGfjWy{P$>+6;_MP z#mg%t!JcjMU{>d0`z0>@s)>a$bnq*3pqoE*LEE~C0cp)xldc91TyoFMsbv?* zq0vCifk~+ZM5sS~&$e#5Pc$$oW>Q=5QUdL5!I82S#EX0cJ4bb{x87^INPcLJxL_-w*=XYb5Jk_h$i>L zqs8&B!X(lXG3I`R#Q?xq!;dM(;9>XSn0@S87e4VjQfah+Cf4@~2AKW@zpuU~Tr0qM z5{Z!4x!aP3*#A@9SB6E^w(XJ%D1zjJq_m`hloA6A*wLyL3@NT*7N zNOwF)cMr{X4+_5T{*Jx>Zhr7%j#+E&wbpfCabD+nEqK9Tyls@_APxYEAnLv-fvy+a zEM%Ud^b~c<7nKo9KobX}MyHQcL4DirIQTi5=3S*RU^As&XIWP;+ecrY@3(QNc^>JP z1Y$z~$?gX-G`(j7cd*qox$M%m+c5mAS2L#h5Zwz?N5|yRys;qnHgf`=QNri?Pwz|T z0*8QnsJ(t*aO8w=Yc<8GWb-bzp9Dbt&1Xv#$1Y7e4KO*h>98K&Z1f&Y}`JkYc1G7o8&<*1*ZaMh( zo^ARe&plYLx$>CI3K{4{gEr#o2l{X&*fxFTM(xvB*2zM zWKLF!_6PX-4stX&v-QvS3yU!Q9^G#Jfu}LkEEmr!uPAc*WxaBN>QGK&7Sz4@0902o z1-Ly9L-&UVPZWg@(`>5Ocy~rt(gD|2eWPY4#I^og$Yl=#L=QQd&rLBnlrDh;uBRip z?lT3CXSE&KF?pwEI$}O-NGs6OGFhPk2+NrT^a*Cc{TWo(ww}YJ9<-lr4TpjD!&vy} zWC(ltxH1^O(t|91bzMf9##|36nG}=q&QCXd4(0Rb@3b}DoNxCa+7WHnNe;GtAYL64 zyH{@B7Qt}XtuW+ZuwjU`DZm_Lh}esycD;B1l+kFv#2ijWP7vC%bG(&OgJfOrlqpmq ztV|`N7%t!=K=_C+e3C5e_0V>*=459ao!o7BGD4QKR#pG7UBjtQo^J`TvWGy~|6xp= z+WX)mqe8+5A7SHG{3gR9;Jb(v8|vIYvnQEqIJW1mN-= z>!!d_*xbVOaJjOM5Zt-jVC>9?5?qMG#>kHlW?2+NpVIczU?{60E6IC_v4OUv;lrJU zl}V3{J@2LBT1Lc_&p8fiE0B@^JU*Q+R8Sdo8M=h#>ZH*2-y1QZ{$rr-$e5q0YN@iz3V5{`1PG)lWMEVvg>J!=}(2j^MFsAGK$iJySFI|rNz+3w;TiQguAO@TiBuqBh`zU+pcxyTU&*FHk9MQj8Sksqjs)0FSo|(A2UcIsWma}eCuLQk!?F0TL z#z&)Mi+1Kbqyh-2J6$ru+hK6B75j9NBPDZg@4@#P-t73vmHiWZW=dt(>Gf3rJj;Cn z+P8R8c&ru`_brdgk`3y*PMknhUAjVxj9Os79<92C5GwG0tp=q#nx8L6^=ffaY1n1z zfa1qOjAd0H;GayLH;ayN=aCz?udeKG+YKI;Cbc{wZBu?BMqYbf950z$|4qc_$X zi2XyO)!crQO~8;9W&=|<$F4?BN=8Q>spXN(i&V8g2cn4{rX`*_c^Q-;(wJ6-l9Fw5 z)$wrd>Q5TOL69yaI=q_wY39#&lsM58u6Gd5j?UL1k`mk<9Th7jQ|3YRu{a~@MG`}h z*;1CX-}KqkdJQ(?fmz)$PI~f5?0A`4&*Le$27VT@I$0Kx?7z%Umvv~s2nk}rO&a(; zNLTQpNzr^p#%jL6WBX7e&ZeUkeWGf&1g;X3fy!%LxvqZ2UH)ntVPEhro$cZq$U)c! zYsC;>6l}S^c*XWW(uRqFAb7$%my1(jzxapeW}Vg#A(f9ClsVJF$L_3|h%~tn@e~3; zl2R2*<~CEYkmbgbG-ZlEqtg;(t8C(H^fQ47GkB?$d5g$Pd&MO)R?WPD0i1 z3-Vgr1p1uI8VZZwL{O@6H|VLtBQ=F7Z%zC$a>JIOcaQbq-6H7NesRcd$eX^W3|DK+ zgG}er4LG$yaaM~hljwK0S}X)MC>W^Xq->@1US9t{=p-P}C1UY+bp=x#bBik+mVya7 zB_8R@GDsebu3J)Xve#%^xRpf8Hi+`kBG>yb5?$oKR-9pHVJ+=+Hzw9kg|kG%FmfJU zE7%j4F`;uA0Gc;h*|v(T$Hprxl<%PUS%9)AJBC~I504TU0QXu!wNp0NTM`nhe?Ra% zq5Tx9Hv5Hp_U1ofLO(F5-W;z#TlIe+!&jHkmQBUApzfA|&yBLPp-9y*m zXJ_)fti~I}``Dm&v4ng09jfGG9S8(4<9lOGwY_kYvA^DgL{r3njZI8Lgm+%eqcHbu=fnQbv z0k4c@xdX-{38^=)WoKr3>&)B<6GEc|S)9}AH&F8ZPUllNFwo+y3o^E1nGfFA?yMEP zB^h0U{W2NV8)7q&5=%({4`Z%BgYD}3XlM%jshgqaia^sF&24N}cC8>cO|#-Nv~xtp zXZHoBg?RXTKqKnSGEH*1%)h>4AMlN+%9Fw;!@x)*iWbZI$BsdUzc`dm)~jdZ%zdS? zqO?G=-2ZL%5~;#c4fi!N8OAwZYza;hlKAnHrK4r;aSId_4g9z~k zgE2mT&@HSyt%mdH_qCH@fpB8h!{JX6*ka?F+{EM}gDZEp^2_U#&ri$8-FId^%!|dQ z&cR@mqryijdg3W4VBesH4P##3$1F+I7B5AhWwet8 zB0`eGUK1Q&wPkQ=r^)%1wvQj%SU2wn8hbo>H0hU*l?Mn}#(NU0zlm(%ln>we)>6ot zSV_Ig<}u;;nxlk*ob$BG4Xry!{hm`)^j};yS&FG}?s!a)Ngs)mY{ z)Sx9PZOTh>J+n{4`^^FmBUp_Q)4Sy>csrd0%Ljq~GE_h>1Rgv@b{|sLRUQ@>Uu!rq zqW>Wq)^d;AQawAjNbJm|nZvY;XYT%uAsmjO|Yth{G!`eb|;G22c{m4|jm;&7;jsW(PG;Yz3pSzAbQ?{bOY5LYKP@f{n3yL=EWZ zk=$mVW2>vp!m-7g?+H0uV9U^Tz(6LSy-*@McD1``fffv}?b=<-n;r92R@qC$WQDue zH%NrT}z}lOF<3tyc3X7N+BqJy{ zDo!x_X1dB#cCcmpw?{txVPyeS0TVO9OcjZ$`@Cf@DV53`fPF8O5#FiC84OO%)dglX z+Ks>W2ipdC#q)-O?%M)~4-X}6;`7s2SiE;O0HN^>O978sZW_{u?A^Q2^IybD^ow2; zX}z*ROX7C^8zBE@pmJwMB6+ib_~<||{%2DI>vGa=*g7T_UI){5rs$F-X9|6uot$|f z;RRn}(46pFnBZAl7XB*+P*tF?zp$kxQ&2ayNs>8k_{1Z%GVkduW3+c zDWH3DbG@|ObDo>RjQo1UQFml;d`<7G(^j2;zNVT{trhN;>iMase50x{`<(h%$pLdb zk9@4^Mds_iJC$j4U8nJOto;|IgqA700&PEZ0*p2I%+6sM>+lvH4Yl6vDbyDkK?r1X zq46F}+sZ*#t+U@e72BEbQkkmrBEETzRU`ZwCmalNoI?3S46k;z1!E_R)tkMkTiNvJ zeSCIwb03R-$GfGH>f^((eo~@gdoCME%vGN)4(K?m4SNL{UmnFTDWJ<`T@Uk&z_~+ohc5wRQ0g1|HMFkPsP#eB6@l`l=T)%oBfSDL z9R&;IT*PL9p)VIo(+j(-fPhX1kLEyPu5HQ^qRdiXz$;7W-w!V6Iuw79$p%kMz?ukj9w%M#>Rhr7;$So3<4nQ%w_D zGhL|*E!I~#iK@-=xb!=7%5s#GX~Fg*B8ytR^ctubIE0&c)CG$IMzbGE#jHR-1!x|O zqR&=KzHu28#t{K8)7B@Ees%gI{7qc#K8(8yZDbFJEyI9QG9(REfsOVbnbG8%0mQI3 zBoOz~gQ1|gqn6x(Wk=d>hFRfrSv@byg38v86!lWq=r5*d>@BU}$|0;jiR&Z@+DQUu?azyX)xgGtBzZda z$Rqg{z_xOPg;cN7WHUQd@<68;PtWMJcNuZzj)OUet4e%VLFp?dz#Cbd3CxTqJrv=U zj(2pj_Tv=)3+DxlM>>3gqvvh6csFRU?15Tm)qkha|7Gc(3s`>sj=1&J@`DKM;^O*G zUX2l-tl!-RqE)L^;y%J=Z_=hIR>Zc&>W9zfO@K*HBv3xRj8k{B!^8iOGm?DLbS znDQG^9O+&CHAQcmS-5TWYzPr*$UFeb6u5kqmS7Qa7;u(fOic3Gzj5!M-CLc2=vZ{4 zg&7EoUYa&0{qKRoQ8F|YzZ8O#cqA02>hx}}r-}#bI)no}TO)_Itvnu>`$XWc`icTzq5IiiO z;F$U{+jzYncFJR72NMKW9E~)u1R;jMK`jc>fGtTRutkrFICl|7fT2*t$bxTYVWE60 zg$r4l*k`K#T@C?`TRBny^XL4LCuZN9?Zf;%6R6MN+WQg2b1`bl4sS$8xE<+&? zEB-iCgPZ?UN1*oD&DxHXL)Gf!IT*W{Gr~InYr=+DoZeLS`Fnab93bN`zLp7aR-l) zQe0jv3CJ#=c24i=C#E8?(IG~jCCn1N*JKHGcB0zaaG}f65C&ziFFlR@l6Na-rTN)w=&7 zvSI~5?fNVhFR3iJfaejjzq!~*|DvRV2Iv)ejMWZDT9(ii>t_EidLE?&gy+vx*0M?g zX|jKm>{H#}+bIrIY*eX>|8Vnr<>rHYVf{2-%f}}`hg+K@`v5$vCO!tq%cY?B-660` z1^Q&qIRE2Sz@T5QGzGWnZItF0W?%_T$aY!?UFFrI!s`F*%g28zDWVSY7X4ca=ZjCk z&%EnR_Ei4Po?Dkg=&L#<@bS0c^kPNC1X$}^&f?x)8U5qD{9b2*;4z~K*w$)(yfdI* zgh8|Cs$hNvwEIHvK;dPRk+IX#PR7d?FDqEkkk7}uwHYAP8vVo(&2K!wVO$Z;CEk~* zIP_-*&#kv7jZXGmA0E)1k2d(Bm-a}I&n53%mC2x1Rlb3o#?$9+GyqIoCVmpH#X0(5 zYyxyCE5#(K*x9uiUE2&thzAqRzVr`hLeFei(t+I4LF@$}0=8mD?XjM*tPJ+Nh2~ED znXKdSwef1M&MQ1yjZAHuN^$)Y9}ff?xmqcQ-ATN{$Mg+<;_MIYD>DJUcIFIsXa!QU z=FC!qZZPO%8OlFo3Bq_{cvIa0m)6g&iI94+4K>1nbM)s-c6zhG z%fJ_6d!wQ^@9WYHQt7cEAF1OXlqQa`^}}?rcvSdHi$s8npSF}rR+D7-p1O#xlzN!c z+UFc1m`JG(&r6;+k=8yfqlcP~Jxd5H8({+aZ|i(8J4lk(dX!#zu#btAg~b@~lmR<# zhU2B6!|ZD|kUE;#^+I1T{xuewpKsAj?2Z!LdlR^oB1F!7!WCFIE1Z_vNwl>)Esyu} z8UK-nKPVxU?d4esF=|P3-fQRJ<`6o6mzIB`FI^0KV)$TB8pUB8R#UrgY_|DPn)53X z+pludDZaAZkJ>&nAqjvI-jP@zs7%na)p~?+qODrM196i#4KAT)#~y5#@JoKkY^REXcm5 z_{`c#QwWcW{Kx5|^(t-UCc0R@Q+=hc`#v_|9k1$c+gJMk*m(bjYNa?grEM){kWxDx z%&{SMaNnJhW;lqbc_l6$4pUD#yKHgu8L>(-30xgfQ z-d^Q<931=XfaLN>v&it}#?FTaMrn0*lL$Cp4{-x;Rnot+B&*M{nWzV`^?iTQcvhR` zAFdSEOR%zTW2=A<$+2xZ13OGbXCDPbfrxH`w zSfI#-AI*9&HenyfqTz>+_xIcHemtVM-eZ50kMz{zqjeY@e($I}&^Q#O#ZPE`GJjd1 zzv(S0gOJd~9S4K%H|6DQF7+_3-X^FQ^>Zd4FFhimTfNt&XcAA4-hw3@#C9yBG9@To~Z>$JoL$=v`0V;o(w#$`!cXBN5FeCUOFK_$H8>F77 zuWu<2p)QS1%nPBC3wu^9hVogDS3EGg|0}WE(hY6YIU^PbT8X}QgqME1{S$?rhi5PD zx&yy^iGmrhkBC(~T(;WTw7|f;p%(G> zajVEl06Te?c~yN?fj!x1v?y86xJ6hE&W#)C85t4^iH{hxvX+-E?y<0N$yyjK=ezTm z4*{lPotc8?9)=qb>^gu+TL=DlCHVD!({cjEhN@Dl?Su2Ew4U-|b=TN|8$1SW)S^L2 zmmi3rqEc(6ILhEk#n{ z*9NBL<;l%=Cv&3b_Z(niVk#>u8@6Ya&$DxK#^&U(ki8Hu2^Bud(o{z$*4-!_(z{X0 z!FHh+y@>VGs;Da_5*#ZzcdAt)oDVgQrY0jpY%Lod|4pTzTafMbc~@rWD|}e*9lM-) zf)JEG>rh=1+z!BgJ+7`9Vc3u@D|*VfD~*ym7BB2o1$uH_H@Ohh8u#sIo)UVxbvy(^ znSlx@WXWL&dkWlK$LM1aHqMc?aIk!NtWz z+YOj8pEZ3S(D3r1#Q#kIAvEC_6&S4HTJcr?VJikDD5$Tya2w`tFDguL{`*~7TYGGR zw)05?Xgs*A;@9qF`5eu~>Kl)hS#O^rrj8Zoh0sC>u*!f3tM@u~=`m^$DJ?5&C)M}7 z>w#F0-rsjQ*Q}AG$p96rVU;_x4($!VvJ@lX&|#kPLg>?R#g zI&Dsi-eYHv2Ax$=5&-uAA3mvBLp%MCpTY52do(NbaC?<7&Z=D)t8_GTU#8QFU7+5Qgl9BuU1 zRy%8<)RQMqV)r!iwHQD7HtwR`RjXmFYLrn%(`&&fjo<%!^P zPqxGAjYZ1Og9W;*pCsNB#C8xLV`c- z#hrjIGit{cs-GM2L5bJ-!PYO6`l0D3eIS{e1=Itc4L}dD5RS*w=-N^EMeO$+pmc8r zVm|6%v!^K5P(CyZdypb4^wITYU{1}X0|In@*B!=|j*ei`ii&tx&84oJAQ8cydjd^l z-84P+*45R8D7t}gU-)XX6V9EQT-)Z8=Ps)K`)D@hj|~K0EA;E# zPP*~!){K*1F2ZYKqx6-IitA3oy4dLq;PTO*0p%4koeClOIoSYO&$_!IB9m#g)Q5+M zlcC^YO8M2))c7U|Pi(xpNoKeVn{Xa-hPOQUw{@Z+Jen58-nc~h9&lz20%%7>W|UUi zYHCoHQV)bpTUQs4$d7CW9?_Is$cq;r`}_M_vsE(X3iX|xONoey;h2C@z=mw{e);m{ z!`RMpzT2Mk5kOcvN^uW10K5l^Na5Z3Sy@ewA01cDqs}Kq&XMh{Is24 zbF)NSTbsQbdo~rCAKFovXf4JAIyySLpuMG;rne?KulI=}@L!VxN9tyNul-`i!S5>4 z7aijt^ci1?G@SqN!i`7~d(9W3|8(pPsK7zoGJBeHS$%@(kLtYOyR07LdBo} zlKubZ2@N3z6IIT|)?;OQ_ufoe><(z?M@KR#1|^)2l$drs0RA zAOJl4DhPg}7_SWG*D9OfLcsic+S1a}y9++^on$4qP^DUp*^r&bVL&f4^dMBa?A9W^ z7=fg3iwUEGwhB^YPqU0RFfgF3scFs;Uj>*h(7`;-ZP1R7Tk|}ei!HVs;(&8OgKNN+ zO06XVa{+mFVmb9WfG6SEk&%^!PB-|~6%Pyzb(C}qv$lj%$4W;swgJ^ZXJnGIUs)Ni zJ7U5W+7dz;k(|s4iY-L`f>)m|yryIGqOLYb1~kt7krF6!AqAM=r|B3N7`QwS>Wj!~ z1>RfINaDhaTvlVf@4l)V`px|J*XZ2c-BX3AU%q^q;g5@pTMQg3{Qe97#eYIUV0n9u z<+1Df!yO^2&>A4fK^1L%N5*ZuHd<`$cVT0F3K3`zi=q82GP!P-~6UbafLwPr%#N4mMvL&d1xPRaRs=O;TEncvO{^6ZQ!VJqyBh4ZJ|9h@CMKR%fYm!!H4nsqInQL(Wd zz^~7N59dRkPg?Jqz-z{)FXnpCH~@Fhira>iKT7eVQg(ywU@Ijq{<3m)If&dQ?A^PY zNI4sYpxb@gP!N*HfM%SCh^Y1K^f)~yN2a&87v%YBWmY4!BIhS1@WNX9vtQz{61m%I zucI#zN;C|e!O}11bsu;QNPe3genpw@zJwWy!=Bqa%fiD2*~J~DxcTx<08|^lm*~*g!qhb|HHxHW2pcM;I}{{p8x!Cu@d;K?yl$P$iu*{ zzxekJ;OE$cAp?Iu`M;kMg params(xptr_params); double *result_ptr = (double *)result.begin(); char *buf = (char *)status_buffer.begin(); - // status = solver->solve(data, params, result_ptr, buf, log_dir.get_cstring()); } From 71e48ae1abbad3739cad558621783512fc841fa7 Mon Sep 17 00:00:00 2001 From: dselivanov Date: Tue, 15 May 2018 16:07:32 +0400 Subject: [PATCH 06/14] symlink for headers --- r-package/DESCRIPTION | 1 + r-package/inst/include | 1 + r-package/src/Makevars | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) create mode 120000 r-package/inst/include diff --git a/r-package/DESCRIPTION b/r-package/DESCRIPTION index 9185d0e..de9c8dd 100644 --- a/r-package/DESCRIPTION +++ b/r-package/DESCRIPTION @@ -11,6 +11,7 @@ Description: Efficient Solver for Linear Models with Various License: GPL (>= 2) Encoding: UTF-8 LazyData: true +SystemRequirements: GNU make Depends: R (>= 3.1.0), methods diff --git a/r-package/inst/include b/r-package/inst/include new file mode 120000 index 0000000..3611dd2 --- /dev/null +++ b/r-package/inst/include @@ -0,0 +1 @@ +../../include/ \ No newline at end of file diff --git a/r-package/src/Makevars b/r-package/src/Makevars index 0d22719..fecaffe 100644 --- a/r-package/src/Makevars +++ b/r-package/src/Makevars @@ -11,4 +11,4 @@ all: $(SHLIB) clean: @rm -f $(OBJECTS) -PKG_CXXFLAGS = -I../../include +PKG_CXXFLAGS = -I../inst/include From ae8e54ef9dd63c8a9512dc7781329bf8a79e2a9b Mon Sep 17 00:00:00 2001 From: dselivanov Date: Tue, 15 May 2018 17:39:06 +0400 Subject: [PATCH 07/14] add macro to allow indexing with 4/8 byte words also tweak makevars --- include/blitzml/base/common.h | 9 +++++++-- r-package/DESCRIPTION | 2 ++ r-package/src/Makevars | 4 +++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/include/blitzml/base/common.h b/include/blitzml/base/common.h index b55eb67..c9b44a2 100644 --- a/include/blitzml/base/common.h +++ b/include/blitzml/base/common.h @@ -18,10 +18,15 @@ namespace BlitzML { #endif typedef double value_t; -//typedef std::size_t size_t; + +#ifdef BLITZML_32BIT_WORD typedef uint32_t size_t; +#else +typedef std::size_t size_t; +#endif + typedef uint32_t index_t; -//typedef int index_t; + typedef std::vector::iterator index_itr; typedef std::vector::iterator value_itr; typedef std::vector::const_iterator const_index_itr; diff --git a/r-package/DESCRIPTION b/r-package/DESCRIPTION index de9c8dd..dec1129 100644 --- a/r-package/DESCRIPTION +++ b/r-package/DESCRIPTION @@ -20,5 +20,7 @@ Imports: R6, Rcpp (>= 0.12.0), futile.logger +Suggests: + data.table LinkingTo: Rcpp RoxygenNote: 6.0.1 diff --git a/r-package/src/Makevars b/r-package/src/Makevars index fecaffe..33bb164 100644 --- a/r-package/src/Makevars +++ b/r-package/src/Makevars @@ -2,6 +2,8 @@ CXX_STD = CXX11 PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) +# https://cran.r-project.org/doc/manuals/r-release/R-exts.html#Writing-portable-packages +# potentially it can make sense to remove 'wildcard' SOURCES=$(wildcard base/*.cpp dataset/*.cpp smooth_loss/*.cpp sparse_linear/*.cpp) OBJECTS = wrap_sparse_solver.o wrap_sparse_dataset.o RcppExports.o $(SOURCES:.cpp=.o) @@ -11,4 +13,4 @@ all: $(SHLIB) clean: @rm -f $(OBJECTS) -PKG_CXXFLAGS = -I../inst/include +PKG_CXXFLAGS = -I../inst/include -DBLITZML_32BIT_WORD From 0e171c506c51b00b240883091eb5a6778670790b Mon Sep 17 00:00:00 2001 From: dselivanov Date: Wed, 16 May 2018 20:58:34 +0400 Subject: [PATCH 08/14] upd readme --- r-package/README.md | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/r-package/README.md b/r-package/README.md index 16051e7..3541918 100644 --- a/r-package/README.md +++ b/r-package/README.md @@ -7,7 +7,7 @@ git clone https://github.com/dselivanov/BlitzML.git R CMD INSTALL r-package ``` -- At the moment package supports only logistic regressionon on sparse matrices. +- At the moment package supports only logistic regression on sparse matrices in CSC format. ## Example @@ -16,21 +16,23 @@ Here is exmple on sentiment classification. We perform 4-fold cross-validation a library(blitzml) library(text2vec) library(futile.logger) -flog.threshold(WARN, "blitzml") +flog.threshold(INFO, "blitzml") it = itoken(movie_review$review, tolower, word_tokenizer) dtm = create_dtm(it, hash_vectorizer(2**14)) system.time({ set.seed(1) - model = LassoLogisticRegressionBlitzML$new(loss = "logistic", lambda = "auto", + model = LassoLogisticRegressionBlitzML$new(loss = "logistic", + lambda = "auto", tol = 1e-2, - use_working_sets = F) + use_working_sets = TRUE) cv_stat = model$cross_validate(dtm, movie_review$sentiment, n_folds = 8, cores = 1) }) -#INFO [2018-05-15 12:31:26] found max lambda: 1699.562800 + +#INFO [2018-05-16 20:55:51] found max lambda: 1699.562800 # user system elapsed -# 4.956 0.221 5.212 +# 3.935 0.117 4.091 ``` And visialization of lambda path: From bc0eca4e2bd74f169fe6662aa93d823f89802d84 Mon Sep 17 00:00:00 2001 From: dselivanov Date: Thu, 21 Jun 2018 08:58:52 +0400 Subject: [PATCH 09/14] fix compiler warnings --- src/sparse_linear/logreg_solver.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/sparse_linear/logreg_solver.cpp b/src/sparse_linear/logreg_solver.cpp index 4a9c765..a65fd8c 100644 --- a/src/sparse_linear/logreg_solver.cpp +++ b/src/sparse_linear/logreg_solver.cpp @@ -108,11 +108,11 @@ void SparseLogRegSolver::update_bias(int max_newton_itr) { return; } - value_t exp_delta_total = 1.0; + value_t exp_delta_total = 1.0; if (is_vector_const(exp_Aomega)) { // Special case closed-form solution: // (this case occurs when we initialize model as all zeros) - exp_delta_total = (exp_Aomega[0] * num_positive_labels) / + exp_delta_total = (exp_Aomega[0] * num_positive_labels) / (num_examples - num_positive_labels); scale_vector(exp_Aomega, exp_delta_total); max_newton_itr = 0; @@ -148,7 +148,7 @@ void SparseLogRegSolver::update_bias(int max_newton_itr) { value_t change = log(exp_delta_total); bias += change; - + sum_x = 0.; for (index_t j = 0; j < num_examples; ++j) { Aomega[j] += change; @@ -167,7 +167,7 @@ void SparseLogRegSolver::perform_backtracking() { std::vector low_exp_Aomega = exp_Aomega; sum_x = 0.; - for (int j = 0; j < num_examples; ++j) { + for (index_t j = 0; j < num_examples; ++j) { exp_Aomega[j] = exp(Aomega[j] + Delta_Aomega[j] + Delta_bias); x[j] = compute_x_value(j); sum_x += x[j]; @@ -190,7 +190,7 @@ void SparseLogRegSolver::perform_backtracking() { low_exp_Aomega[j] < 1e15 && low_exp_Aomega[j] > 1e-15) { exp_Aomega[j] = sqrt(high_exp_Aomega[j] * low_exp_Aomega[j]); } else { - exp_Aomega[j] = exp(Aomega[j] + + exp_Aomega[j] = exp(Aomega[j] + step_size * (Delta_Aomega[j] + Delta_bias)); } x[j] = compute_x_value(j); @@ -215,8 +215,8 @@ void SparseLogRegSolver::perform_backtracking() { } } - for (const_index_itr ind = ws.begin_indices(); - ind != ws.end_indices(); + for (const_index_itr ind = ws.begin_indices(); + ind != ws.end_indices(); ++ind) { index_t i = ws.ith_member(*ind); omega[i] += step_size * Delta_omega[*ind]; @@ -245,7 +245,7 @@ value_t SparseLogRegSolver::compute_dual_obj() const { } -void SparseLogRegSolver::update_subproblem_obj_vals() { +void SparseLogRegSolver::update_subproblem_obj_vals() { obj_vals.set_dual_obj(compute_dual_obj()); value_t gap = 0.; @@ -256,7 +256,7 @@ void SparseLogRegSolver::update_subproblem_obj_vals() { if (log_kappa != log_kappa) { throw log_kappa; } - for (int j = 0; j < num_examples; ++j) { + for (index_t j = 0; j < num_examples; ++j) { value_t prob, mid_term, last_term; if (is_positive_label[j]) { prob = kappa_x / (1 + exp_Aomega[j]); From c6495534106fe2ba00222a43dc21579bc8a28a7c Mon Sep 17 00:00:00 2001 From: dselivanov Date: Thu, 21 Jun 2018 10:28:24 +0400 Subject: [PATCH 10/14] finish main functionality - added all available loss functions, added dense matrix support --- r-package/DESCRIPTION | 3 +- r-package/NAMESPACE | 2 +- r-package/R/RcppExports.R | 20 ++- r-package/R/dataset.R | 13 ++ r-package/R/solver.R | 119 ++++++++++++------ r-package/R/utils.R | 16 +-- r-package/R/zzz.R | 12 +- r-package/README.md | 2 +- r-package/src/Makevars | 2 +- r-package/src/RcppExports.cpp | 55 ++++++-- ...ap_sparse_dataset.cpp => wrap_dataset.cpp} | 13 +- ...wrap_sparse_solver.cpp => wrap_solver.cpp} | 33 +++-- 12 files changed, 204 insertions(+), 86 deletions(-) create mode 100644 r-package/R/dataset.R rename r-package/src/{wrap_sparse_dataset.cpp => wrap_dataset.cpp} (57%) rename r-package/src/{wrap_sparse_solver.cpp => wrap_solver.cpp} (69%) diff --git a/r-package/DESCRIPTION b/r-package/DESCRIPTION index dec1129..2c47748 100644 --- a/r-package/DESCRIPTION +++ b/r-package/DESCRIPTION @@ -19,8 +19,7 @@ Imports: Matrix, R6, Rcpp (>= 0.12.0), - futile.logger -Suggests: + futile.logger, data.table LinkingTo: Rcpp RoxygenNote: 6.0.1 diff --git a/r-package/NAMESPACE b/r-package/NAMESPACE index cef907a..23828fc 100644 --- a/r-package/NAMESPACE +++ b/r-package/NAMESPACE @@ -1,6 +1,6 @@ # Generated by roxygen2: do not edit by hand -export(LassoLogisticRegressionBlitzML) +export(BlitzMLLassoRegression) export(auc) import(Matrix) importFrom(Rcpp,sourceCpp) diff --git a/r-package/R/RcppExports.R b/r-package/R/RcppExports.R index a5dd362..63e6cbb 100644 --- a/r-package/R/RcppExports.R +++ b/r-package/R/RcppExports.R @@ -5,16 +5,28 @@ BlitzML_new_sparse_dataset <- function(x, y) { .Call(`_blitzml_BlitzML_new_sparse_dataset`, x, y) } -BlitzML_new_sparse_logreg_solver <- function() { - .Call(`_blitzml_BlitzML_new_sparse_logreg_solver`) +BlitzML_new_dense_dataset <- function(x, y) { + .Call(`_blitzml_BlitzML_new_dense_dataset`, x, y) +} + +BlitzML_new_solver <- function() { + .Call(`_blitzml_BlitzML_new_solver`) +} + +BlitzML_new_logreg_solver <- function() { + .Call(`_blitzml_BlitzML_new_logreg_solver`) +} + +BlitzML_new_linear_solver <- function() { + .Call(`_blitzml_BlitzML_new_linear_solver`) } BlitzML_new_parameters <- function(params) { .Call(`_blitzml_BlitzML_new_parameters`, params) } -BlitzML_sparse_linear_solver_compute_max_l1_penalty <- function(xptr_solver, xptr_dataset, xptr_params) { - .Call(`_blitzml_BlitzML_sparse_linear_solver_compute_max_l1_penalty`, xptr_solver, xptr_dataset, xptr_params) +BlitzML_solver_compute_max_l1_penalty <- function(xptr_solver, xptr_dataset, xptr_params) { + .Call(`_blitzml_BlitzML_solver_compute_max_l1_penalty`, xptr_solver, xptr_dataset, xptr_params) } BlitzML_solve_problem <- function(xptr_solver, xptr_dataset, xptr_params, result, status_buffer, log_dir) { diff --git a/r-package/R/dataset.R b/r-package/R/dataset.R new file mode 100644 index 0000000..33c6604 --- /dev/null +++ b/r-package/R/dataset.R @@ -0,0 +1,13 @@ +BlitzML_create_dataset = function(x, y) { + + if(!is.numeric(y)) stop("'y' should be numeric vector") + + if(!(inherits(x, "matrix") || inherits(x, "dgCMatrix"))) + stop("'x' should inherit from 'matrix' or 'dgCMatrix'") + + if(inherits(x, "dgCMatrix")) { + BlitzML_new_sparse_dataset(x, y) + } else { + BlitzML_new_dense_dataset(x, y) + } +} diff --git a/r-package/R/solver.R b/r-package/R/solver.R index 63cf494..9ac1dc3 100644 --- a/r-package/R/solver.R +++ b/r-package/R/solver.R @@ -1,13 +1,8 @@ -#' @useDynLib "blitzml", .registration=TRUE -#' @importFrom Rcpp sourceCpp -#' @import Matrix -#' @importFrom futile.logger flog.info flog.debug flog.trace - #' @export -LassoLogisticRegressionBlitzML = R6::R6Class( - "LassoLogisticRegressionBlitzML", +BlitzMLLassoRegression = R6::R6Class( + "BlitzMLLassoRegression", public = list( - initialize = function(loss = c("squared", "huber", "logistic"), + initialize = function(loss = c("squared", "logistic"), #c("squared", "huber", "logistic", "squared_hinge", "smoothed_hinge"), lambda = "auto", n_lambda = 20, lambda_min_fraction = 0.0001, @@ -16,49 +11,58 @@ LassoLogisticRegressionBlitzML = R6::R6Class( tol = 1e-3, max_time = 60, max_iter = 10000, - use_working_sets = TRUE, log_dir = tempdir()) { - if(loss != "logistic") - stop("only 'logistic' loss supported at the moment") + + loss = match.arg(loss) + check_loss(loss) + + stopifnot( + is.numeric(lambda_min_fraction) && + is.numeric(lambda_max_fraction) && + is.numeric(n_lambda) && + is.numeric(tol) && + is.numeric(max_time) && + is.numeric(max_iter) && + is.logical(include_bias_term) + ) stopifnot(is.numeric(lambda) || identical(lambda, "auto")) - if(is.numeric(lambda)) - lambda = sort(lambda) + if(is.numeric(lambda)) lambda = sort(lambda) private$n_lambda = n_lambda[[1]] private$lambda_min_fraction = lambda_min_fraction[[1]] private$lambda_max_fraction = lambda_max_fraction[[1]] private$tol = tol private$max_iter = max_iter - private$use_working_sets= use_working_sets private$max_time = max_time - private$loss = match.arg(loss) + private$loss = loss private$log_dir = log_dir - # loss_index: - # 0 - squared - # 1 - huber - # 2 - logistic private$params = list(lambda = lambda, undefined_param = 0, include_bias_term = as.numeric(include_bias_term), - loss_index = match(private$loss, private$supported_losses) - 1L) + loss_index = encode_loss(private$loss), + loss = private$loss) + futile.logger::flog.debug("params : %s", paste(names(private$params), private$params, sep = "=", collapse = ", ")) }, fit = function(x, y, ...) { stopifnot(is.numeric(y)) - stopifnot(inherits(x, "dgCMatrix")) + if(inherits(x, "sparseMatrix")) { + x = as(x, "dgCMatrix") + } else { + if(! inherits(x, "matrix")) + stop("'x' should inherit from 'matrix' or 'Matrix::sparseMatrix'") + } + stopifnot(nrow(x) == length(y)) + solver = BlitzML_create_solver(private$loss) - solver = switch (private$loss, - logistic = BlitzML_new_sparse_logreg_solver(), - stop(sprintf("solver for '%s' loss function is not supported yet", private$loss)) - ) # BlitzML requirement - positive classes encoded as 1 and negative as -1 # FIXME this is for logistic solver only! - y = ifelse(y > 0, 1, -1) + if(private$loss == "logistic") y = ifelse(y > 0, 1, -1) BlitzML_set_tolerance(solver, private$tol) - BlitzML_set_use_working_sets(solver, private$use_working_sets) + BlitzML_set_use_working_sets(solver, TRUE) BlitzML_set_max_time(solver, private$max_time) BlitzML_set_max_iterations(solver, private$max_iter) @@ -68,7 +72,7 @@ LassoLogisticRegressionBlitzML = R6::R6Class( # https://github.com/tbjohns/BlitzML/blob/93f80a83e206001817a0b1a0b81db6455ef44a87/python-package/_sparse_linear.py#L141 result_buffer = numeric(num_features + 1 + num_examples + 2) - dataset = BlitzML_new_sparse_dataset(x, y) + dataset = BlitzML_create_dataset(x, y) if(is.null(self$lambda_seq)) { if(identical(private$params$lambda, "auto")) { @@ -79,9 +83,10 @@ LassoLogisticRegressionBlitzML = R6::R6Class( } res = lapply(self$lambda_seq, function(lambda) { - status_buffer = raw(64) + status_buffer = raw(STATUS_BUFFER_SIZE) start = Sys.time() params = c(lambda = lambda, + # FIXME https://github.com/tbjohns/BlitzML/blob/93f80a83e206001817a0b1a0b81db6455ef44a87/python-package/_sparse_linear.py#L134 undefined_param = private$params$undefined_param, include_bias_term = private$params$include_bias_term, loss_index = private$params$loss_index) @@ -115,7 +120,9 @@ LassoLogisticRegressionBlitzML = R6::R6Class( predict = function(x, ...) { # add first dummy column of ones in order to add biases via matrix multiplication x = cbind(rep(1, nrow(x)), x) - as.matrix(sigmoid(x %*% self$coef)) + res = x %*% self$coef + if(private$loss == "logistic") res = sigmoid(res) + as.matrix(res) }, cross_validate = function(x, y, fold_id = NULL, @@ -161,7 +168,7 @@ LassoLogisticRegressionBlitzML = R6::R6Class( ret }, mc.cores = cores, ...) ret = do.call(rbind, res) - class(ret) = c("cv.LassoLogisticRegressionBlitzML", class(ret)) + class(ret) = c("cv.BlitzMLLassoRegression", class(ret)) # attr(ret, "coef") = lapply(res, function(x) attr(x, "coef")) ret }, @@ -170,14 +177,17 @@ LassoLogisticRegressionBlitzML = R6::R6Class( ), private = list( generate_lambda_seq = function(x, y) { - lambda_max = find_max_lambda(x, y, params = c(0, 0, private$params$include_bias_term, private$params$loss_index)) + # lambda_max = find_max_lambda(x, y, params = c(0, 0, private$params$include_bias_term, private$params$loss_index)) + lambda_max = find_max_lambda(x, y, private$params) flog.info("found max lambda: %f", lambda_max) - 10**seq(log10(private$lambda_min_fraction * lambda_max), - log10(private$lambda_max_fraction * lambda_max), - length.out = private$n_lambda) + lambda_seq = seq( + log10(private$lambda_min_fraction * lambda_max), + log10(private$lambda_max_fraction * lambda_max), + length.out = private$n_lambda + ) + lambda_seq = 10 ** lambda_seq + lambda_seq }, - - supported_losses = c("squared", "huber", "logistic"), params = NULL, loss = NULL, log_dir = NULL, @@ -186,7 +196,38 @@ LassoLogisticRegressionBlitzML = R6::R6Class( n_lambda = NULL, tol = NULL, max_iter = NULL, - max_time = NULL, - use_working_sets = NULL + max_time = NULL ) ) + +BlitzML_create_solver = function(loss) { + check_loss(loss) + switch (loss, + squared = BlitzML_new_linear_solver(), + huber = BlitzML_new_solver(), + logistic = BlitzML_new_logreg_solver(), + squared_hinge = BlitzML_new_solver(), + smoothed_hinge = BlitzML_new_solver(), + stop(sprintf("solver for '%s' loss function is not supported yet", loss)) + ) +} + +check_loss = function(loss) { + if(!(loss %in% IMPLEMENTED_LOSS_FUNCTIONS)) + stop(sprintf("only %s loss supported at the moment", paste(paste("'", IMPLEMENTED_LOSS_FUNCTIONS, "'", sep = ""), collapse = "/"))) + invisible(TRUE) +} + +encode_loss = function(x) { + match(x, AVAILABLE_LOSS_FUNCTIONS) - 1L # loss codes start from 0 in BlitzML +} + +find_max_lambda = function(x, y, params) { + solver = BlitzML_create_solver(params$loss) + dataset = BlitzML_create_dataset(x, y) + # https://github.com/tbjohns/BlitzML/blob/93f80a83e206001817a0b1a0b81db6455ef44a87/python-package/_sparse_linear.py#L42 + params_vec = c(0, 0, params$include_bias_term, params$loss_index) + params_vec = BlitzML_new_parameters(params_vec) + + BlitzML_solver_compute_max_l1_penalty(solver, dataset, params_vec) +} diff --git a/r-package/R/utils.R b/r-package/R/utils.R index acf0af3..9c69b90 100644 --- a/r-package/R/utils.R +++ b/r-package/R/utils.R @@ -1,25 +1,11 @@ -find_max_lambda = function(x, y, params = c(0, 0, 1, 2)) { - solver = BlitzML_new_sparse_logreg_solver() - dataset = BlitzML_new_sparse_dataset(x, y) - params = BlitzML_new_parameters(params) - BlitzML_sparse_linear_solver_compute_max_l1_penalty(solver, dataset, params) -} - sigmoid = function(x) { 1 / (1 + exp(-x)) } -fast_rank = function(...) { - if(require(data.table)) - data.table::frank(...) - else - rank(...) -} - #' @export auc = function (actual, predicted) { actual = as.integer(actual > 0) - rprob = fast_rank(predicted) + rprob = data.table::frank(predicted) n1 = sum(actual) n0 = length(actual) - n1 u = sum(rprob[actual == 1]) - n1 * (n1 + 1)/2 diff --git a/r-package/R/zzz.R b/r-package/R/zzz.R index 03dd43d..b8f6bb6 100644 --- a/r-package/R/zzz.R +++ b/r-package/R/zzz.R @@ -1,3 +1,9 @@ -.onAttach = function(libname, pkgname) { - suppressPackageStartupMessages(fast_rank(c(1, 2, 3))) -} +#' @useDynLib "blitzml", .registration=TRUE +#' @importFrom Rcpp sourceCpp +#' @import Matrix +#' @importFrom futile.logger flog.info flog.debug flog.trace + +STATUS_BUFFER_SIZE = 64L + +AVAILABLE_LOSS_FUNCTIONS = c("squared", "huber", "logistic", "squared_hinge", "smoothed_hinge") +IMPLEMENTED_LOSS_FUNCTIONS = c("squared", "logistic") diff --git a/r-package/README.md b/r-package/README.md index 3541918..15ead0b 100644 --- a/r-package/README.md +++ b/r-package/README.md @@ -23,7 +23,7 @@ dtm = create_dtm(it, hash_vectorizer(2**14)) system.time({ set.seed(1) - model = LassoLogisticRegressionBlitzML$new(loss = "logistic", + model = BlitzMLLassoRegression$new(loss = "logistic", lambda = "auto", tol = 1e-2, use_working_sets = TRUE) diff --git a/r-package/src/Makevars b/r-package/src/Makevars index 33bb164..db37275 100644 --- a/r-package/src/Makevars +++ b/r-package/src/Makevars @@ -6,7 +6,7 @@ PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) # potentially it can make sense to remove 'wildcard' SOURCES=$(wildcard base/*.cpp dataset/*.cpp smooth_loss/*.cpp sparse_linear/*.cpp) -OBJECTS = wrap_sparse_solver.o wrap_sparse_dataset.o RcppExports.o $(SOURCES:.cpp=.o) +OBJECTS = wrap_solver.o wrap_dataset.o RcppExports.o $(SOURCES:.cpp=.o) all: $(SHLIB) diff --git a/r-package/src/RcppExports.cpp b/r-package/src/RcppExports.cpp index 6fd79c2..aff46f7 100644 --- a/r-package/src/RcppExports.cpp +++ b/r-package/src/RcppExports.cpp @@ -17,13 +17,45 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } -// BlitzML_new_sparse_logreg_solver -SEXP BlitzML_new_sparse_logreg_solver(); -RcppExport SEXP _blitzml_BlitzML_new_sparse_logreg_solver() { +// BlitzML_new_dense_dataset +SEXP BlitzML_new_dense_dataset(const Rcpp::NumericMatrix& x, const Rcpp::NumericVector& y); +RcppExport SEXP _blitzml_BlitzML_new_dense_dataset(SEXP xSEXP, SEXP ySEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - rcpp_result_gen = Rcpp::wrap(BlitzML_new_sparse_logreg_solver()); + Rcpp::traits::input_parameter< const Rcpp::NumericMatrix& >::type x(xSEXP); + Rcpp::traits::input_parameter< const Rcpp::NumericVector& >::type y(ySEXP); + rcpp_result_gen = Rcpp::wrap(BlitzML_new_dense_dataset(x, y)); + return rcpp_result_gen; +END_RCPP +} +// BlitzML_new_solver +SEXP BlitzML_new_solver(); +RcppExport SEXP _blitzml_BlitzML_new_solver() { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + rcpp_result_gen = Rcpp::wrap(BlitzML_new_solver()); + return rcpp_result_gen; +END_RCPP +} +// BlitzML_new_logreg_solver +SEXP BlitzML_new_logreg_solver(); +RcppExport SEXP _blitzml_BlitzML_new_logreg_solver() { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + rcpp_result_gen = Rcpp::wrap(BlitzML_new_logreg_solver()); + return rcpp_result_gen; +END_RCPP +} +// BlitzML_new_linear_solver +SEXP BlitzML_new_linear_solver(); +RcppExport SEXP _blitzml_BlitzML_new_linear_solver() { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + rcpp_result_gen = Rcpp::wrap(BlitzML_new_linear_solver()); return rcpp_result_gen; END_RCPP } @@ -38,16 +70,16 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } -// BlitzML_sparse_linear_solver_compute_max_l1_penalty -double BlitzML_sparse_linear_solver_compute_max_l1_penalty(SEXP xptr_solver, SEXP xptr_dataset, SEXP xptr_params); -RcppExport SEXP _blitzml_BlitzML_sparse_linear_solver_compute_max_l1_penalty(SEXP xptr_solverSEXP, SEXP xptr_datasetSEXP, SEXP xptr_paramsSEXP) { +// BlitzML_solver_compute_max_l1_penalty +double BlitzML_solver_compute_max_l1_penalty(SEXP xptr_solver, SEXP xptr_dataset, SEXP xptr_params); +RcppExport SEXP _blitzml_BlitzML_solver_compute_max_l1_penalty(SEXP xptr_solverSEXP, SEXP xptr_datasetSEXP, SEXP xptr_paramsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type xptr_solver(xptr_solverSEXP); Rcpp::traits::input_parameter< SEXP >::type xptr_dataset(xptr_datasetSEXP); Rcpp::traits::input_parameter< SEXP >::type xptr_params(xptr_paramsSEXP); - rcpp_result_gen = Rcpp::wrap(BlitzML_sparse_linear_solver_compute_max_l1_penalty(xptr_solver, xptr_dataset, xptr_params)); + rcpp_result_gen = Rcpp::wrap(BlitzML_solver_compute_max_l1_penalty(xptr_solver, xptr_dataset, xptr_params)); return rcpp_result_gen; END_RCPP } @@ -113,9 +145,12 @@ END_RCPP static const R_CallMethodDef CallEntries[] = { {"_blitzml_BlitzML_new_sparse_dataset", (DL_FUNC) &_blitzml_BlitzML_new_sparse_dataset, 2}, - {"_blitzml_BlitzML_new_sparse_logreg_solver", (DL_FUNC) &_blitzml_BlitzML_new_sparse_logreg_solver, 0}, + {"_blitzml_BlitzML_new_dense_dataset", (DL_FUNC) &_blitzml_BlitzML_new_dense_dataset, 2}, + {"_blitzml_BlitzML_new_solver", (DL_FUNC) &_blitzml_BlitzML_new_solver, 0}, + {"_blitzml_BlitzML_new_logreg_solver", (DL_FUNC) &_blitzml_BlitzML_new_logreg_solver, 0}, + {"_blitzml_BlitzML_new_linear_solver", (DL_FUNC) &_blitzml_BlitzML_new_linear_solver, 0}, {"_blitzml_BlitzML_new_parameters", (DL_FUNC) &_blitzml_BlitzML_new_parameters, 1}, - {"_blitzml_BlitzML_sparse_linear_solver_compute_max_l1_penalty", (DL_FUNC) &_blitzml_BlitzML_sparse_linear_solver_compute_max_l1_penalty, 3}, + {"_blitzml_BlitzML_solver_compute_max_l1_penalty", (DL_FUNC) &_blitzml_BlitzML_solver_compute_max_l1_penalty, 3}, {"_blitzml_BlitzML_solve_problem", (DL_FUNC) &_blitzml_BlitzML_solve_problem, 6}, {"_blitzml_BlitzML_set_tolerance", (DL_FUNC) &_blitzml_BlitzML_set_tolerance, 2}, {"_blitzml_BlitzML_set_max_time", (DL_FUNC) &_blitzml_BlitzML_set_max_time, 2}, diff --git a/r-package/src/wrap_sparse_dataset.cpp b/r-package/src/wrap_dataset.cpp similarity index 57% rename from r-package/src/wrap_sparse_dataset.cpp rename to r-package/src/wrap_dataset.cpp index ce084c0..3622cbb 100644 --- a/r-package/src/wrap_sparse_dataset.cpp +++ b/r-package/src/wrap_dataset.cpp @@ -1,7 +1,7 @@ #include #include "mapped_csc_matrix.h" #include - +#include // [[Rcpp::export]] SEXP BlitzML_new_sparse_dataset(const Rcpp::S4 &x, @@ -14,3 +14,14 @@ SEXP BlitzML_new_sparse_dataset(const Rcpp::S4 &x, Rcpp::XPtr> ptr(dataset, true); return ptr; } + +// [[Rcpp::export]] +SEXP BlitzML_new_dense_dataset(const Rcpp::NumericMatrix &x, + const Rcpp::NumericVector &y) { + double *x_ptr = (double *)x.begin(); + double *y_ptr = (double *)y.begin(); + BlitzML::DenseDataset* dataset = + new BlitzML::DenseDataset(x_ptr, x.nrow(), x.ncol(), y_ptr, y.size()); + Rcpp::XPtr> ptr(dataset, true); + return ptr; +} diff --git a/r-package/src/wrap_sparse_solver.cpp b/r-package/src/wrap_solver.cpp similarity index 69% rename from r-package/src/wrap_sparse_solver.cpp rename to r-package/src/wrap_solver.cpp index 4712ede..1eabe40 100644 --- a/r-package/src/wrap_sparse_solver.cpp +++ b/r-package/src/wrap_solver.cpp @@ -7,13 +7,28 @@ #include #include + +// [[Rcpp::export]] +SEXP BlitzML_new_solver() { + BlitzML::SparseLinearSolver* solver = new BlitzML::SparseLinearSolver(); + Rcpp::XPtr ptr(solver, true); + return ptr; +} + // [[Rcpp::export]] -SEXP BlitzML_new_sparse_logreg_solver() { +SEXP BlitzML_new_logreg_solver() { BlitzML::SparseLogRegSolver* solver = new BlitzML::SparseLogRegSolver(); Rcpp::XPtr ptr(solver, true); return ptr; } +// [[Rcpp::export]] +SEXP BlitzML_new_linear_solver() { + BlitzML::LassoSolver* solver = new BlitzML::LassoSolver(); + Rcpp::XPtr ptr(solver, true); + return ptr; +} + // [[Rcpp::export]] SEXP BlitzML_new_parameters(const Rcpp::NumericVector params) { BlitzML::Parameters* blitz_params = new BlitzML::Parameters((double*)params.begin(), params.size() ); @@ -22,12 +37,12 @@ SEXP BlitzML_new_parameters(const Rcpp::NumericVector params) { } // [[Rcpp::export]] -double BlitzML_sparse_linear_solver_compute_max_l1_penalty( +double BlitzML_solver_compute_max_l1_penalty( SEXP xptr_solver, SEXP xptr_dataset, SEXP xptr_params) { - Rcpp::XPtr< BlitzML::SparseLogRegSolver > solver(xptr_solver); - Rcpp::XPtr< BlitzML::SparseDataset > data(xptr_dataset); + Rcpp::XPtr< BlitzML::SparseLinearSolver > solver(xptr_solver); + Rcpp::XPtr< BlitzML::Dataset> data(xptr_dataset); Rcpp::XPtr< BlitzML::Parameters > params(xptr_params); return solver->compute_max_l1_penalty(data, params); } @@ -39,7 +54,7 @@ void BlitzML_solve_problem(SEXP xptr_solver, Rcpp::NumericVector &result, Rcpp::RawVector &status_buffer, const Rcpp::String &log_dir) { - Rcpp::XPtr< BlitzML::SparseLogRegSolver > solver(xptr_solver); + Rcpp::XPtr< BlitzML::SparseLinearSolver > solver(xptr_solver); Rcpp::XPtr< BlitzML::SparseDataset > data(xptr_dataset); Rcpp::XPtr< BlitzML::Parameters > params(xptr_params); double *result_ptr = (double *)result.begin(); @@ -49,23 +64,23 @@ void BlitzML_solve_problem(SEXP xptr_solver, // [[Rcpp::export]] void BlitzML_set_tolerance(SEXP xptr_solver, double value) { - Rcpp::XPtr< BlitzML::SparseLogRegSolver > solver(xptr_solver); + Rcpp::XPtr< BlitzML::SparseLinearSolver > solver(xptr_solver); solver->set_tolerance(value); } // [[Rcpp::export]] void BlitzML_set_max_time(SEXP xptr_solver, double value) { - Rcpp::XPtr< BlitzML::SparseLogRegSolver > solver(xptr_solver); + Rcpp::XPtr< BlitzML::SparseLinearSolver > solver(xptr_solver); solver->set_max_time(value); } // [[Rcpp::export]] void BlitzML_set_max_iterations(SEXP xptr_solver, unsigned value) { - Rcpp::XPtr< BlitzML::SparseLogRegSolver > solver(xptr_solver); + Rcpp::XPtr< BlitzML::SparseLinearSolver > solver(xptr_solver); solver->set_max_iterations(value); } // [[Rcpp::export]] void BlitzML_set_use_working_sets(SEXP xptr_solver, unsigned value) { - Rcpp::XPtr< BlitzML::SparseLogRegSolver > solver(xptr_solver); + Rcpp::XPtr< BlitzML::SparseLinearSolver > solver(xptr_solver); solver->set_use_working_sets(value); } From 7b02f84f6999140810c886875ed5de4e3e028ea6 Mon Sep 17 00:00:00 2001 From: dselivanov Date: Thu, 21 Jun 2018 17:14:58 +0400 Subject: [PATCH 11/14] make R CMD check happy - conditionally (macro) change all stdout, stderr to R counterparts --- include/blitzml/base/common.h | 4 +--- r-package/DESCRIPTION | 5 ++--- r-package/NAMESPACE | 1 + r-package/R/zzz.R | 1 + r-package/src/Makevars | 2 +- src/base/common.cpp | 23 ++++++++++++++++++----- 6 files changed, 24 insertions(+), 12 deletions(-) diff --git a/include/blitzml/base/common.h b/include/blitzml/base/common.h index c9b44a2..a101066 100644 --- a/include/blitzml/base/common.h +++ b/include/blitzml/base/common.h @@ -19,7 +19,7 @@ namespace BlitzML { typedef double value_t; -#ifdef BLITZML_32BIT_WORD +#ifdef BLITZML_R_WRAPPER typedef uint32_t size_t; #else typedef std::size_t size_t; @@ -32,13 +32,11 @@ typedef std::vector::iterator value_itr; typedef std::vector::const_iterator const_index_itr; typedef std::vector::const_iterator const_value_itr; - void assert_with_error_message(bool okay, std::string error_message); void warn_if(bool condition, std::string message); void print(const char* fmt, ...); void debug(const char* fmt, ...); - class ObjectiveValues { public: ObjectiveValues() diff --git a/r-package/DESCRIPTION b/r-package/DESCRIPTION index 2c47748..1df658c 100644 --- a/r-package/DESCRIPTION +++ b/r-package/DESCRIPTION @@ -1,13 +1,12 @@ Package: blitzml Type: Package Title: Efficient Solver for Linear Models with Various Loss Functions and L1 penalty -Version: 0.1.0.1 +Version: 0.1.0 Authors@R: c( person("Tyler", "Johnson", role = c("aut"), comment = "backend C++ solver"), person("Dmitriy", "Selivanov", role = c("aut", "cre"), email = "selivanov.dmitriy@gmail.com", comment = "R package") ) -Description: Efficient Solver for Linear Models with Various - Loss Functions and L1 penalty +Description: Efficient Solver for Linear Models with L1 penalty. License: GPL (>= 2) Encoding: UTF-8 LazyData: true diff --git a/r-package/NAMESPACE b/r-package/NAMESPACE index 23828fc..d96204d 100644 --- a/r-package/NAMESPACE +++ b/r-package/NAMESPACE @@ -3,6 +3,7 @@ export(BlitzMLLassoRegression) export(auc) import(Matrix) +import(methods) importFrom(Rcpp,sourceCpp) importFrom(futile.logger,flog.debug) importFrom(futile.logger,flog.info) diff --git a/r-package/R/zzz.R b/r-package/R/zzz.R index b8f6bb6..ccaae20 100644 --- a/r-package/R/zzz.R +++ b/r-package/R/zzz.R @@ -1,5 +1,6 @@ #' @useDynLib "blitzml", .registration=TRUE #' @importFrom Rcpp sourceCpp +#' @import methods #' @import Matrix #' @importFrom futile.logger flog.info flog.debug flog.trace diff --git a/r-package/src/Makevars b/r-package/src/Makevars index db37275..18ba54a 100644 --- a/r-package/src/Makevars +++ b/r-package/src/Makevars @@ -13,4 +13,4 @@ all: $(SHLIB) clean: @rm -f $(OBJECTS) -PKG_CXXFLAGS = -I../inst/include -DBLITZML_32BIT_WORD +PKG_CXXFLAGS = -I../inst/include -DBLITZML_R_WRAPPER -DR_USE_C99_IN_CXX diff --git a/src/base/common.cpp b/src/base/common.cpp index eeea6b7..2fcc09e 100644 --- a/src/base/common.cpp +++ b/src/base/common.cpp @@ -4,26 +4,39 @@ #include #include +#ifdef BLITZML_R_WRAPPER +#include +// https://cran.r-project.org/doc/manuals/r-release/R-exts.html#index-Rvprintf +#include +#define vprintf Rvprintf +#define printf Rprintf +#endif + using std::string; using std::cout; using std::cerr; using std::endl; - namespace BlitzML { void assert_with_error_message(bool okay, string error_message) { if (!okay) { - cerr << "Program exited with error: " - << error_message - << endl; - throw error_message; +#ifdef BLITZML_R_WRAPPER + Rcpp::Rcerr << "Program exited with error: " << error_message << endl; +#else + cerr << "Program exited with error: " << error_message << endl; +#endif } + throw error_message; } void warn_if(bool condition, std::string message) { if (condition) { +#ifdef BLITZML_R_WRAPPER + Rcpp::Rcout << "Warning: " << message << endl; +#else cout << "Warning: " << message << endl; +#endif } } From cedebde824ed256759efa70f1080901bebe8c56c Mon Sep 17 00:00:00 2001 From: dselivanov Date: Sun, 29 Jul 2018 23:15:12 +0400 Subject: [PATCH 12/14] fix assert --- r-package/src/RcppExports.cpp | 4 ++-- r-package/src/wrap_solver.cpp | 2 +- src/base/common.cpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/r-package/src/RcppExports.cpp b/r-package/src/RcppExports.cpp index aff46f7..6ed291e 100644 --- a/r-package/src/RcppExports.cpp +++ b/r-package/src/RcppExports.cpp @@ -132,12 +132,12 @@ BEGIN_RCPP END_RCPP } // BlitzML_set_use_working_sets -void BlitzML_set_use_working_sets(SEXP xptr_solver, unsigned value); +void BlitzML_set_use_working_sets(SEXP xptr_solver, bool value); RcppExport SEXP _blitzml_BlitzML_set_use_working_sets(SEXP xptr_solverSEXP, SEXP valueSEXP) { BEGIN_RCPP Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type xptr_solver(xptr_solverSEXP); - Rcpp::traits::input_parameter< unsigned >::type value(valueSEXP); + Rcpp::traits::input_parameter< bool >::type value(valueSEXP); BlitzML_set_use_working_sets(xptr_solver, value); return R_NilValue; END_RCPP diff --git a/r-package/src/wrap_solver.cpp b/r-package/src/wrap_solver.cpp index 1eabe40..ed0f469 100644 --- a/r-package/src/wrap_solver.cpp +++ b/r-package/src/wrap_solver.cpp @@ -80,7 +80,7 @@ void BlitzML_set_max_iterations(SEXP xptr_solver, unsigned value) { } // [[Rcpp::export]] -void BlitzML_set_use_working_sets(SEXP xptr_solver, unsigned value) { +void BlitzML_set_use_working_sets(SEXP xptr_solver, bool value) { Rcpp::XPtr< BlitzML::SparseLinearSolver > solver(xptr_solver); solver->set_use_working_sets(value); } diff --git a/src/base/common.cpp b/src/base/common.cpp index 2fcc09e..9d29121 100644 --- a/src/base/common.cpp +++ b/src/base/common.cpp @@ -26,8 +26,8 @@ void assert_with_error_message(bool okay, string error_message) { #else cerr << "Program exited with error: " << error_message << endl; #endif + throw error_message; } - throw error_message; } void warn_if(bool condition, std::string message) { From 214c90358d5232c5c903340720044751153fcae0 Mon Sep 17 00:00:00 2001 From: dselivanov Date: Thu, 14 Feb 2019 10:17:07 +0400 Subject: [PATCH 13/14] initial work on cross-validation plots using base graphics --- r-package/R/plot.R | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 r-package/R/plot.R diff --git a/r-package/R/plot.R b/r-package/R/plot.R new file mode 100644 index 0000000..14d2cab --- /dev/null +++ b/r-package/R/plot.R @@ -0,0 +1,28 @@ +error.bars = function (x, upper, lower, width = 0.02, ...) { + xlim <- range(x) + barw <- diff(xlim) * width + graphics::segments(x, upper, x, lower, ...) + graphics::segments(x - barw, upper, x + barw, upper, ...) + graphics::segments(x - barw, lower, x + barw, lower, ...) + range(upper, lower) +} + +plot.cv.BlitzMLLassoRegression = function (x, ...) { + cvobj = x + xlab = "log10(lambda)" + + new.args = list(...) + if (length(new.args)) plot.args[names(new.args)] = new.args + plot.args = list(x = log10(cvobj$lambda), + y = cvobj$cvm, + ylim = range(cvobj$cvup, cvobj$cvlo), + xlab = xlab, + ylab = "auc", + type = "n") + do.call("plot", plot.args) + + error.bars(log10(cvobj$lambda), cvobj$cvup, + cvobj$cvlo, width = 0.01, col = "darkgrey") + graphics::points(log10(cvobj$lambda), cvobj$cvm, pch = 20, col = "red") + invisible() +} From 6ac22d77cd13faa1a48d3350c50c9588d67d36fd Mon Sep 17 00:00:00 2001 From: Dmitriy Selivanov Date: Sun, 9 Feb 2020 09:06:56 +0400 Subject: [PATCH 14/14] - add missing import - add "max non-zero" parameter --- r-package/DESCRIPTION | 4 +-- r-package/NAMESPACE | 1 + r-package/R/solver.R | 74 +++++++++++++++++++++++++++++-------------- r-package/R/zzz.R | 2 +- 4 files changed, 54 insertions(+), 27 deletions(-) diff --git a/r-package/DESCRIPTION b/r-package/DESCRIPTION index 1df658c..a29f9d0 100644 --- a/r-package/DESCRIPTION +++ b/r-package/DESCRIPTION @@ -1,7 +1,7 @@ Package: blitzml Type: Package Title: Efficient Solver for Linear Models with Various Loss Functions and L1 penalty -Version: 0.1.0 +Version: 0.1.1 Authors@R: c( person("Tyler", "Johnson", role = c("aut"), comment = "backend C++ solver"), person("Dmitriy", "Selivanov", role = c("aut", "cre"), email = "selivanov.dmitriy@gmail.com", comment = "R package") @@ -21,4 +21,4 @@ Imports: futile.logger, data.table LinkingTo: Rcpp -RoxygenNote: 6.0.1 +RoxygenNote: 6.1.1 diff --git a/r-package/NAMESPACE b/r-package/NAMESPACE index d96204d..be5f960 100644 --- a/r-package/NAMESPACE +++ b/r-package/NAMESPACE @@ -8,4 +8,5 @@ importFrom(Rcpp,sourceCpp) importFrom(futile.logger,flog.debug) importFrom(futile.logger,flog.info) importFrom(futile.logger,flog.trace) +importFrom(futile.logger,flog.warn) useDynLib("blitzml", .registration=TRUE) diff --git a/r-package/R/solver.R b/r-package/R/solver.R index 9ac1dc3..8abe7cf 100644 --- a/r-package/R/solver.R +++ b/r-package/R/solver.R @@ -8,6 +8,7 @@ BlitzMLLassoRegression = R6::R6Class( lambda_min_fraction = 0.0001, lambda_max_fraction = 0.3, include_bias_term = TRUE, + max_nonzero = Inf, tol = 1e-3, max_time = 60, max_iter = 10000, @@ -23,11 +24,12 @@ BlitzMLLassoRegression = R6::R6Class( is.numeric(tol) && is.numeric(max_time) && is.numeric(max_iter) && + is.numeric(max_nonzero) && is.logical(include_bias_term) ) stopifnot(is.numeric(lambda) || identical(lambda, "auto")) - if(is.numeric(lambda)) lambda = sort(lambda) + if (is.numeric(lambda)) lambda = sort(lambda, decreasing = TRUE) private$n_lambda = n_lambda[[1]] private$lambda_min_fraction = lambda_min_fraction[[1]] @@ -42,15 +44,16 @@ BlitzMLLassoRegression = R6::R6Class( undefined_param = 0, include_bias_term = as.numeric(include_bias_term), loss_index = encode_loss(private$loss), - loss = private$loss) + loss = private$loss, + max_nonzero = max_nonzero) futile.logger::flog.debug("params : %s", paste(names(private$params), private$params, sep = "=", collapse = ", ")) }, fit = function(x, y, ...) { stopifnot(is.numeric(y)) - if(inherits(x, "sparseMatrix")) { + if (inherits(x, "sparseMatrix")) { x = as(x, "dgCMatrix") } else { - if(! inherits(x, "matrix")) + if (!inherits(x, "matrix")) stop("'x' should inherit from 'matrix' or 'Matrix::sparseMatrix'") } @@ -59,7 +62,7 @@ BlitzMLLassoRegression = R6::R6Class( # BlitzML requirement - positive classes encoded as 1 and negative as -1 # FIXME this is for logistic solver only! - if(private$loss == "logistic") y = ifelse(y > 0, 1, -1) + if (private$loss == "logistic") y = ifelse(y > 0, 1, -1) BlitzML_set_tolerance(solver, private$tol) BlitzML_set_use_working_sets(solver, TRUE) @@ -74,15 +77,16 @@ BlitzMLLassoRegression = R6::R6Class( dataset = BlitzML_create_dataset(x, y) - if(is.null(self$lambda_seq)) { - if(identical(private$params$lambda, "auto")) { + if (is.null(self$lambda_seq)) { + if (identical(private$params$lambda, "auto")) { self$lambda_seq = private$generate_lambda_seq(x, y) } else { self$lambda_seq = private$params$lambda } } - res = lapply(self$lambda_seq, function(lambda) { + res = list() + for (lambda in self$lambda_seq) { status_buffer = raw(STATUS_BUFFER_SIZE) start = Sys.time() params = c(lambda = lambda, @@ -99,19 +103,26 @@ BlitzMLLassoRegression = R6::R6Class( status = rawToChar(status_buffer) time_spent = difftime(Sys.time(), start, "sec") - if(status == 'Exceeded time limit') + if (status == 'Exceeded time limit') flog.warn("haven't converged for lambda %f - exceeded time limit of %f sec", lambda, private$max_time) flog.trace("solved in %f sec for lambda %f, %d non-zero coef (status: '%s')", time_spent, lambda, sum(coefs != 0), status) + if (sum(coefs != 0) > private$params$max_nonzero) { + break() + } # put bias followed coefficients and convert to sparse matrix (actually vector) - as(c(bias, coefs), "CsparseMatrix") - }) + res[[length(res) + 1L]] = c(bias, coefs) + } + self$lambda_seq = self$lambda_seq[seq_along(res)] + cn = paste("lambda=", self$lambda_seq, sep = "") res = do.call(cbind, res) - colnames(res) = paste("lambda=", self$lambda_seq, sep = "") + colnames(res) = cn + res = as(res, "CsparseMatrix") + cn = colnames(x) - if(is.null(cn)) + if (is.null(cn)) cn = paste("V", seq_len(ncol(x)), sep = "") rownames(res) = c("bias", cn) self$coef = res @@ -121,18 +132,22 @@ BlitzMLLassoRegression = R6::R6Class( # add first dummy column of ones in order to add biases via matrix multiplication x = cbind(rep(1, nrow(x)), x) res = x %*% self$coef - if(private$loss == "logistic") res = sigmoid(res) + if (private$loss == "logistic") res = sigmoid(res) as.matrix(res) }, cross_validate = function(x, y, fold_id = NULL, n_folds = 4, callbacks = list(auc = blitzml::auc), + lambda = "auto", cores = getOption("mc.cores", parallel::detectCores()), ...) { - if(is.null(self$lambda_seq) && identical(private$params$lambda, "auto")) { + if (identical(lambda, "auto")) { + private$params$lambda = lambda self$lambda_seq = private$generate_lambda_seq(x, y) + } else { + self$lambda_seq = lambda } if (.Platform$OS.type != "unix" && cores > 1) { @@ -142,8 +157,15 @@ BlitzMLLassoRegression = R6::R6Class( lapply(callbacks, function(f) stopifnot(is.function(f) && length(formals(f)) == 2L)) - if(is.null(fold_id)) - fold_id = sample.int(n_folds, nrow(x), replace = TRUE) + if (is.null(fold_id)) { + i_neg = y == -1 + i_pos = which(!i_neg) + i_neg = which(i_neg) + fold_id = rep(0, nrow(x)) + fold_id[i_pos] = sample.int(n_folds, length(i_pos), replace = TRUE) + fold_id[i_neg] = sample.int(n_folds, length(i_neg), replace = TRUE) + } + folds = unique(fold_id) res = parallel::mclapply(folds, function(fld) { @@ -180,12 +202,16 @@ BlitzMLLassoRegression = R6::R6Class( # lambda_max = find_max_lambda(x, y, params = c(0, 0, private$params$include_bias_term, private$params$loss_index)) lambda_max = find_max_lambda(x, y, private$params) flog.info("found max lambda: %f", lambda_max) - lambda_seq = seq( - log10(private$lambda_min_fraction * lambda_max), - log10(private$lambda_max_fraction * lambda_max), - length.out = private$n_lambda + if (lambda_max == 0) { + lambda_seq = 0 + } else { + lambda_seq = seq( + log10(private$lambda_max_fraction * lambda_max), + log10(private$lambda_min_fraction * lambda_max), + length.out = private$n_lambda ) - lambda_seq = 10 ** lambda_seq + lambda_seq = 10 ** lambda_seq + } lambda_seq }, params = NULL, @@ -202,7 +228,7 @@ BlitzMLLassoRegression = R6::R6Class( BlitzML_create_solver = function(loss) { check_loss(loss) - switch (loss, + switch(loss, squared = BlitzML_new_linear_solver(), huber = BlitzML_new_solver(), logistic = BlitzML_new_logreg_solver(), @@ -213,7 +239,7 @@ BlitzML_create_solver = function(loss) { } check_loss = function(loss) { - if(!(loss %in% IMPLEMENTED_LOSS_FUNCTIONS)) + if (!(loss %in% IMPLEMENTED_LOSS_FUNCTIONS)) stop(sprintf("only %s loss supported at the moment", paste(paste("'", IMPLEMENTED_LOSS_FUNCTIONS, "'", sep = ""), collapse = "/"))) invisible(TRUE) } diff --git a/r-package/R/zzz.R b/r-package/R/zzz.R index ccaae20..d08e973 100644 --- a/r-package/R/zzz.R +++ b/r-package/R/zzz.R @@ -2,7 +2,7 @@ #' @importFrom Rcpp sourceCpp #' @import methods #' @import Matrix -#' @importFrom futile.logger flog.info flog.debug flog.trace +#' @importFrom futile.logger flog.info flog.debug flog.trace flog.warn STATUS_BUFFER_SIZE = 64L