From 9fce1c73e73000ee973956d3da8e50d1f8dc876c Mon Sep 17 00:00:00 2001 From: peterhurford Date: Sun, 9 Jul 2017 16:55:39 -0500 Subject: [PATCH 1/4] Started mungebits3 --- config/engines.R | 4 +- config/global/modeling/modeling.R | 2 +- lib/controllers/data.R | 8 +-- lib/mungebits/trivial.R | 3 - lib/mungebits/trivial_column_transform.R | 6 -- lib/shared/lexicals.R | 90 ++++++++++++------------ lib/stages/data.R | 45 +++++++----- lockfile.yml | 12 ---- 8 files changed, 77 insertions(+), 93 deletions(-) delete mode 100644 lib/mungebits/trivial.R delete mode 100644 lib/mungebits/trivial_column_transform.R diff --git a/config/engines.R b/config/engines.R index 214257f..5c34389 100644 --- a/config/engines.R +++ b/config/engines.R @@ -1,4 +1,5 @@ -engine("base", type = "github", repo = "syberia/base.sy", mount = TRUE) +engine("base", type = "local", path = "~/dev/base.sy", mount = TRUE) +# engine("base", type = "github", repo = "syberia/base.sy", mount = TRUE) .onAttach <- function(parent_engine) { @@ -22,6 +23,5 @@ engine("base", type = "github", repo = "syberia/base.sy", mount = TRUE) attach(parent_engine$resource('config/global', director = parent_engine, children. = FALSE), name = "syberia:project") } - } diff --git a/config/global/modeling/modeling.R b/config/global/modeling/modeling.R index 2d4f84e..85b2f70 100644 --- a/config/global/modeling/modeling.R +++ b/config/global/modeling/modeling.R @@ -99,7 +99,7 @@ run_model <- Ramd::define("run_model")[[1L]] run <- run_model makeActiveBinding("A", function() last_run()$after$data, env = globalenv()) +makeActiveBinding("df", function() last_run()$after$data, env = globalenv()) makeActiveBinding("B", function() last_run()$before$data, env = globalenv()) makeActiveBinding("M", function() last_run()$after$model_stage$model, env = globalenv()) makeActiveBinding("S", function() active_runner(), env = globalenv()) - diff --git a/lib/controllers/data.R b/lib/controllers/data.R index 3e73575..6256698 100644 --- a/lib/controllers/data.R +++ b/lib/controllers/data.R @@ -1,11 +1,9 @@ # The data controller. preprocessor <- function(director, source_env) { # Add lexicals to local environment. - lexicals <- director$resource('lib/shared/lexicals') - for (x in ls(lexicals)) source_env[[x]] <- lexicals[[x]] - - director$resource('lib/shared/source_mungebits', source_env, director) - + # lexicals <- director$resource('lib/shared/lexicals') + # for (x in ls(lexicals)) source_env[[x]] <- lexicals[[x]] + # director$resource('lib/shared/source_mungebits', source_env, director) source() } diff --git a/lib/mungebits/trivial.R b/lib/mungebits/trivial.R deleted file mode 100644 index 52bce10..0000000 --- a/lib/mungebits/trivial.R +++ /dev/null @@ -1,3 +0,0 @@ -train <- predict <- function(dataframe) { - dataframe -} diff --git a/lib/mungebits/trivial_column_transform.R b/lib/mungebits/trivial_column_transform.R deleted file mode 100644 index 641541c..0000000 --- a/lib/mungebits/trivial_column_transform.R +++ /dev/null @@ -1,6 +0,0 @@ -column_transformation <- TRUE - -train <- predict <- function(x) { - rev(x) -} - diff --git a/lib/shared/lexicals.R b/lib/shared/lexicals.R index 105e002..be512ff 100644 --- a/lib/shared/lexicals.R +++ b/lib/shared/lexicals.R @@ -1,51 +1,51 @@ lexicals <- list() -lexicals$`~` <- (function() { - function(x, y = NULL) { - try_check <- function(el, fn) tryCatch(fn(el), error = function(.) FALSE) - suppressWarnings({ - if ((xlist <- try_check(x, is.list)) || (ylist <- try_check(y, is.list))) { - list(x, y) - } else if ((xfn <- try_check(x, is.function)) || (yfn <- try_check(y, is.function))) { - mungebits2:::mungebit$new(x, y, enforce_train = FALSE) - } else if ((xfn <- try_check(x, is.mungebit)) || (yfn <- try_check(y, is.mungebit))) { - mungebits2:::mungebit$new(x, y, enforce_train = FALSE) - } else { - eval.parent(parse(text = - paste0("(function() { `~` <- .Primitive('~')\n", - 'structure(', paste(deparse(match.call()), collapse = "\n"), - ', .Environment = parent.frame()) })()'))) - } - }) - } -})() +# lexicals$`~` <- (function() { +# function(x, y = NULL) { +# try_check <- function(el, fn) tryCatch(fn(el), error = function(.) FALSE) +# suppressWarnings({ +# if ((xlist <- try_check(x, is.list)) || (ylist <- try_check(y, is.list))) { +# list(x, y) +# } else if ((xfn <- try_check(x, is.function)) || (yfn <- try_check(y, is.function))) { +# mungebits2:::mungebit$new(x, y, enforce_train = FALSE) +# } else if ((xfn <- try_check(x, is.mungebit)) || (yfn <- try_check(y, is.mungebit))) { +# mungebits2:::mungebit$new(x, y, enforce_train = FALSE) +# } else { +# eval.parent(parse(text = +# paste0("(function() { `~` <- .Primitive('~')\n", +# 'structure(', paste(deparse(match.call()), collapse = "\n"), +# ', .Environment = parent.frame()) })()'))) +# } +# }) +# } +# })() -lexicals$`!` <- (function() { - function(x) { - if (deparse(substitute(x))[[1]] == '{') { - fn <- function(x) {} - body(fn) <- substitute(x) - column_transformation(fn) - } else if (is.function(x) && base::`!`(is.transformation(x))) { - column_transformation(x) - } else if (is.transformation(x)) { - # !!{dataframe <- ...} should be a global transformation - fnbody <- body(environment(x)$transformation) - eval(bquote(function(dataframe, ...) eval.parent(substitute(.(fnbody))))) - } else if (is.list(x)) { - lapply(x, function(obj) { - if (is.symbol(obj)) { - get(as.character(obj), envir = parent.frame()) - } else { - obj - } - }) - } else { - base::`!`(x) - } - } -})() +# lexicals$`!` <- (function() { +# function(x) { +# if (deparse(substitute(x))[[1]] == '{') { +# fn <- function(x) {} +# body(fn) <- substitute(x) +# column_transformation(fn) +# } else if (is.function(x) && base::`!`(is.transformation(x))) { +# column_transformation(x) +# } else if (is.transformation(x)) { +# # !!{dataframe <- ...} should be a global transformation +# fnbody <- body(environment(x)$transformation) +# eval(bquote(function(dataframe, ...) eval.parent(substitute(.(fnbody))))) +# } else if (is.list(x)) { +# lapply(x, function(obj) { +# if (is.symbol(obj)) { +# get(as.character(obj), envir = parent.frame()) +# } else { +# obj +# } +# }) +# } else { +# base::`!`(x) +# } +# } +# })() -lexicals$debug <- list(function(.) { browser() }) +# lexicals$debug <- list(function(.) { browser() }) lexicals diff --git a/lib/stages/data.R b/lib/stages/data.R index 020d0ae..d6ee6be 100644 --- a/lib/stages/data.R +++ b/lib/stages/data.R @@ -1,18 +1,18 @@ -simple_deflate <- resource("lib/shared/simple_deflate") +# simple_deflate <- resource("lib/shared/simple_deflate") -preprocess_munge_procedure <- function(munge_procedure) { - # We need to make sure we're not storing the entire parent environment chain - # when later serializing a tundraContainer. - deflate <- function(obj) { - if (is.list(obj)) lapply(obj, deflate) - else if (is.function(obj)) simple_deflate(obj) - else obj - } - for (i in seq_along(munge_procedure)) { - munge_procedure[[i]] <- deflate(munge_procedure[[i]]) - } - munge_procedure -} +# preprocess_munge_procedure <- function(munge_procedure) { +# # We need to make sure we're not storing the entire parent environment chain +# # when later serializing a tundraContainer. +# deflate <- function(obj) { +# if (is.list(obj)) lapply(obj, deflate) +# else if (is.function(obj)) simple_deflate(obj) +# else obj +# } +# for (i in seq_along(munge_procedure)) { +# munge_procedure[[i]] <- deflate(munge_procedure[[i]]) +# } +# munge_procedure +# } #' Data stage for syberia models #' @@ -25,12 +25,19 @@ preprocess_munge_procedure <- function(munge_procedure) { #' The default is \code{TRUE}. #' @export data_stage <- function(modelenv, munge_procedure, remember = TRUE) { - munge_procedure <- preprocess_munge_procedure(munge_procedure) + stages <- lapply(seq_along(munge_procedure), function(index) { + action <- munge_procedure[[index]] + function(modelenv) { modelenv$data <- action(modelenv$data); modelenv } + }) + names(stages) <- names(munge_procedure) + stageRunner$new(modelenv, stages, remember = TRUE) + +# munge_procedure <- preprocess_munge_procedure(munge_procedure) - stagerunner <- mungebits2::munge(modelenv, munge_procedure, - stagerunner = list(remember = remember) - ) +# stagerunner <- mungebits2::munge(modelenv, munge_procedure, +# stagerunner = list(remember = remember) +# ) - stagerunner +# stagerunner } diff --git a/lockfile.yml b/lockfile.yml index a4277dd..9c10730 100644 --- a/lockfile.yml +++ b/lockfile.yml @@ -11,14 +11,6 @@ packages: name: Ramd version: 0.3.8 repo: robertzk/Ramd - - - name: statsUtils - version: 0.1.3 - repo: robertzk/statsUtils - - - name: syberiaMungebits2 - version: 0.1.2.9002 - repo: syberia/syberiaMungebits2 - name: director version: 0.3.0.5.9000 @@ -32,10 +24,6 @@ packages: version: 0.6.1.9009 repo: syberia/syberia ref: 0.6.1.9009 - - - name: mungebits2 - version: 0.1.0.9014 - repo: syberia/mungebits2 - name: gbm version: 2.1.1 From 5971a73c085e1cd6a8ba3be6859e7f2c5e02071c Mon Sep 17 00:00:00 2001 From: peterhurford Date: Sun, 9 Jul 2017 17:11:01 -0500 Subject: [PATCH 2/4] go back to github engine --- config/engines.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/config/engines.R b/config/engines.R index 5c34389..5087d58 100644 --- a/config/engines.R +++ b/config/engines.R @@ -1,5 +1,4 @@ -engine("base", type = "local", path = "~/dev/base.sy", mount = TRUE) -# engine("base", type = "github", repo = "syberia/base.sy", mount = TRUE) +engine("base", type = "github", repo = "syberia/base.sy", mount = TRUE) .onAttach <- function(parent_engine) { From a86f4b2b583e6a4b09c75398d85114471b8d113b Mon Sep 17 00:00:00 2001 From: peterhurford Date: Fri, 4 Aug 2017 12:13:14 -0500 Subject: [PATCH 3/4] Avoid invocation of mungebits --- lib/controllers/mungebits.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/controllers/mungebits.R b/lib/controllers/mungebits.R index 6815a9e..d2fddf9 100644 --- a/lib/controllers/mungebits.R +++ b/lib/controllers/mungebits.R @@ -1,12 +1,12 @@ # This is the mungebits controller. function(input, director) { - simple_deflate <- director$resource("lib/shared/simple_deflate") - if (isTRUE(input$column_transformation)) { - mungebits2::mungebit$new(mungebits2::column_transformation(simple_deflate(input$train)), - mungebits2::column_transformation(simple_deflate(input$predict))) - } else { - mungebits2::mungebit$new(simple_deflate(input$train), simple_deflate(input$predict)) - } + # simple_deflate <- director$resource("lib/shared/simple_deflate") + # if (isTRUE(input$column_transformation)) { + # mungebits2::mungebit$new(mungebits2::column_transformation(simple_deflate(input$train)), + # mungebits2::column_transformation(simple_deflate(input$predict))) + # } else { + # mungebits2::mungebit$new(simple_deflate(input$train), simple_deflate(input$predict)) + # } } From e973d046500c7512945348f34f9a2f8b84c0630a Mon Sep 17 00:00:00 2001 From: peterhurford Date: Sun, 10 Sep 2017 13:52:29 -0500 Subject: [PATCH 4/4] import stage can be a function --- lib/stages/import.R | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/lib/stages/import.R b/lib/stages/import.R index 0865e7d..2124c8a 100644 --- a/lib/stages/import.R +++ b/lib/stages/import.R @@ -14,24 +14,29 @@ build_import_stagerunner <- function(modelenv, import_options) { if (nzchar(Sys.getenv("CI"))) return(list("import" = force)) stages <- Reduce(append, lapply(seq_along(import_options), function(index) { - adapter_name <- names(import_options)[index] %||% default_adapter - adapter_name <- gsub(".", "/", adapter_name, fixed = TRUE) - adapter <- resource(file.path("lib", "adapters", adapter_name)) - opts <- import_options[[index]] - - if (is.function(adapter)) { - # If a raw function, give it the import options and let it generate - # the stage function. This is useful if you need finer control over - # the importing process. - setNames(list(adapter(modelenv, opts)), adapter_name) + # If the import stage is a raw function with no adapter, turn it into a stage. + if (is.function(import_options[[1]])) { + list("function" = function(modelenv) { modelenv$data <- import_options[[1]]() }) } else { - setNames(list(function(modelenv) { - # Only run if data isn"t already loaded - if (!"data" %in% ls(modelenv)) { - modelenv$import_stage$adapter <- adapter - modelenv$data <- adapter$read(opts) - } - }), adapter$.keyword) + adapter_name <- names(import_options)[index] %||% default_adapter + adapter_name <- gsub(".", "/", adapter_name, fixed = TRUE) + adapter <- resource(file.path("lib", "adapters", adapter_name)) + opts <- import_options[[index]] + + if (is.function(adapter)) { + # If the adapter is a raw function, give it the import options and let it generate + # the stage function. This is useful if you need finer control over + # the importing process. + setNames(list(adapter(modelenv, opts)), adapter_name) + } else { + setNames(list(function(modelenv) { + # Only run if data isn"t already loaded + if (!"data" %in% ls(modelenv)) { + modelenv$import_stage$adapter <- adapter + modelenv$data <- adapter$read(opts) + } + }), adapter$.keyword) + } } }))