diff --git a/.Rbuildignore b/.Rbuildignore index 54336f4..44aa68b 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -16,3 +16,5 @@ Rprof\.out ^scripts$ ^vignettes_src$ ^appveyor\.yml$ +^.*\.Rproj$ +^\.Rproj\.user$ diff --git a/DESCRIPTION b/DESCRIPTION index 4d7b8ef..a3a6179 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,5 +27,5 @@ Suggests: rbenchmark, testthat (>= 1.0.0) VignetteBuilder: knitr -RoxygenNote: 6.0.1 +RoxygenNote: 6.1.0 Encoding: UTF-8 diff --git a/NAMESPACE b/NAMESPACE index 88661fb..6e9d7c7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,6 +10,7 @@ export(driver_remote) export(encode64) export(fetch_hook_read) export(join_key_namespace) +export(register_mangler) export(storr) export(storr_dbi) export(storr_environment) diff --git a/NEWS.md b/NEWS.md index 320ed08..f4299aa 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,8 @@ -## storr 1.2.1 (2018-??-??) +## storr 1.2.2 (????-??-??) + +- Supply custom key manglers with `register_mangler()` (#88, @wlandau). + +## storr 1.2.1 (2018-10-18) * Avoid a race condition in writing to rds storrs in parallel (#80, reported by @wlandau) diff --git a/R/driver_rds.R b/R/driver_rds.R index bbcbda7..68852f4 100644 --- a/R/driver_rds.R +++ b/R/driver_rds.R @@ -21,14 +21,24 @@ ##' is set. Using \code{mangle_key = NULL} uses whatever mangledness ##' exists (or no mangledness if creating a new storr). ##' +##' If base64 encoding does not meet your use case +##' (e.g. if it is too slow) then you can supply custom functions +##' to encode and decode the keys. +##' First, register your encoder and decoder functions +##' with \code{\link{register_mangler}()}. +##' Next, set \code{mangle_key} equal to the \code{name} +##' you gave \code{\link{register_mangler}()}. +##' ##' @section Corrupt keys: ##' ##' Some file synchronisation utilities like dropbox can create file ##' that confuse an rds storr (e.g., ##' \code{"myobject (Someone's conflicted copy)"}. If -##' \code{mangle_key} is \code{FALSE} these cannot be detected but at +##' \code{mangle_key} is \code{"none"}, +##' these cannot be detected but at ##' the same time are not a real problem for storr. However, if -##' \code{mangle_key} is \code{TRUE} and keys are base64 encoded then +##' \code{mangle_key} is \code{"base64"} +##' and keys are base64 encoded then ##' these conflicted copies can break parts of storr. ##' ##' If you see a warning asking you to deal with these files, please @@ -57,8 +67,11 @@ ##' @param compress Compress the generated file? This saves a small ##' amount of space for a reasonable amount of time. ##' -##' @param mangle_key Mangle keys? If TRUE, then the key is encoded -##' using base64 before saving to the filesystem. See Details. +##' @param mangle_key Mangle keys? If \code{"base64"}, +##' then the key is encoded using base64 before saving to the filesystem. +##' If \code{"none"}, then keys are not encoded. If some other string, +##' \code{storr} will use a custom key mangler set up with +##' \code{\link{register_mangler}()}. See Details. ##' ##' @param mangle_key_pad Logical indicating if the filenames created ##' when using \code{mangle_key} should also be "padded" with the @@ -132,6 +145,7 @@ R6_driver_rds <- R6::R6Class( compress = NULL, mangle_key = NULL, mangle_key_pad = NULL, + mangler = NULL, hash_algorithm = NULL, traits = list(accept = "raw"), @@ -163,10 +177,14 @@ R6_driver_rds <- R6::R6Class( driver_rds_config_file(path, "version")) if (!is.null(mangle_key)) { - assert_scalar_logical(mangle_key) + assert_scalar(mangle_key) + assert_is(mangle_key, c("logical", "character")) } self$mangle_key <- driver_rds_config(path, "mangle_key", mangle_key, - FALSE, TRUE) + "FALSE", TRUE) + if (self$mangle_key %in% c("TRUE", "FALSE")){ + self$mangle_key <- as.logical(self$mangle_key) + } if (!is.null(mangle_key_pad)) { assert_scalar_logical(mangle_key_pad) @@ -186,6 +204,8 @@ R6_driver_rds <- R6::R6Class( } self$hash_algorithm <- driver_rds_config(path, "hash_algorithm", hash_algorithm, "md5", TRUE) + + self$set_mangler(path, is_new) }, type = function() { @@ -201,7 +221,7 @@ R6_driver_rds <- R6::R6Class( }, set_hash = function(key, namespace, hash) { - dir_create(self$name_key("", namespace)) + dir_create(self$key_path("", namespace)) write_lines(hash, self$name_key(key, namespace), scratch_dir = self$path_scratch) }, @@ -244,14 +264,12 @@ R6_driver_rds <- R6::R6Class( list_keys = function(namespace) { path <- file.path(self$path, "keys", namespace) files <- dir(path) - if (self$mangle_key) { - ret <- decode64(files, error = FALSE) + ret <- self$mangler$decode(x = files, error = FALSE) + if (!use_no_mangler(self$mangle_key)) { if (anyNA(ret)) { message_corrupted_rds_keys(namespace, path, files[is.na(ret)]) ret <- ret[!is.na(ret)] } - } else { - ret <- files } ret }, @@ -265,10 +283,10 @@ R6_driver_rds <- R6::R6Class( }, purge_corrupt_keys = function(namespace) { - if (self$mangle_key) { + if (!use_no_mangler(self$mangle_key)) { path <- file.path(self$path, "keys", namespace) files <- dir(path) - i <- is.na(decode64(files, error = FALSE)) + i <- is.na(self$mangler$decode(files, error = FALSE)) if (any(i)) { res <- file.remove(file.path(path, files[i])) message(sprintf("Removed %d of %d corrupt %s", @@ -286,12 +304,40 @@ R6_driver_rds <- R6::R6Class( }, name_key = function(key, namespace) { - if (self$mangle_key) { - key <- encode64(key, pad = self$mangle_key_pad) - } + key <- self$mangler$encode(x = key, pad = self$mangle_key_pad) + self$key_path(key, namespace) + }, + + key_path = function(key, namespace) { file.path(self$path, "keys", namespace, key) - } - )) + }, + + set_mangler = function(path, is_new) { + if (use_no_mangler(self$mangle_key)) { + self$mangler <- list( + name = "none", + encode = function(x, pad) { + x + }, + decode = function(x, error) { + x + } + ) + } else if (use_base64_mangler(self$mangle_key)) { + self$mangler <- list( + name = "base64", + encode = encode64, + decode = decode64 + ) + } else if (is_new){ + self$mangler <- getOption("storr_mangler") + assert_custom_mangler(self$mangler, self$mangle_key) + saveRDS(self$mangler, file.path(self$path, "config", "mangler.rds")) + } else { + self$mangler <- readRDS(file.path(path, "config", "mangler.rds")) + assert_custom_mangler(self$mangler, self$mangle_key) + } + })) ## This attempts to check that we are connecting to a storr of @@ -447,3 +493,57 @@ See 'Corrupt keys' within ?storr_rds for how to proceed" -> fmt message(sprintf(fmt, length(files), namespace, path, files)) corrupt_notices[[path]] <- now } + +##' @title Register a key mangler +##' @description Define custom functinons for mangling \code{storr_rds()} keys. +##' @details +##' \code{storr_rds(mangle_key = "base64")} encodes keys using base64 +##' encoding. This precaution ensures that the names of key files +##' do not have illegal character names such as ":". However, +##' base 64 encoding can be slow for some applications, +##' so you have the option of setting your own custom key mangler. +##' First, create functions to encode and decode keys and register them +##' with \code{\link{register_mangler}()}. +##' Next, set \code{mangle_key} equal to the \code{name} +##' you gave \code{\link{register_mangler}()}. +##' @export +##' @return nothing +##' @param name character scalar, name of the mangler +##' @param encode function to encode keys. Must have arguments \code{x} +##' and \code{pad}. +##' @param decode function to decode keys. Must have arguments \code{x} +##' and \code{error} +##' @param overwrite logical, whether to overwrite a previously +##' registered mangler. +##' @examples +##' register_mangler( +##' "test_mangler", +##' encode = function (x, pad) { +##' paste0("x_", x) +##' }, +##' decode = function(x, error) { +##' substr(x, start = 3, stop = 1e7) +##' } +##' ) +##' s <- storr_rds(tempfile(), mangle_key = "test_mangler") +##' s$set("a", 1) +##' s$get("a") +##' list.files(file.path(s$driver$path, "keys", s$default_namespace)) +##' options(storr_mangler = NULL) +register_mangler <- function(name, encode, decode, overwrite = FALSE) { + current <- getOption("storr_mangler") + if (is.list(current) && !overwrite){ + return() + } + if (overwrite) { + warning( + sprintf( + "Overwriting existing key mangler '%s' with '%s'", + current$name, name + ), + call. = FALSE + ) + } + options(storr_mangler = list(name = name, encode = encode, decode = decode)) + invisible() +} diff --git a/R/driver_remote.R b/R/driver_remote.R index 403e0d2..c5da9c9 100644 --- a/R/driver_remote.R +++ b/R/driver_remote.R @@ -134,7 +134,7 @@ R6_driver_remote <- R6::R6Class( return(character(0)) } ret <- self$ops$list_dir(path) - if (self$rds$mangle_key) decode64(ret, TRUE) else ret + self$rds$mangler$decode(x = ret, error = TRUE) }, ## These functions could be done better if driver_rds takes a @@ -152,7 +152,6 @@ R6_driver_remote <- R6::R6Class( basename(p)) })) - ## It would be really nice to do this as a single operation but that ## probably can't be easily done generally. Quite possibly it would ## be possible to get/fetch an entire directory though. diff --git a/R/utils.R b/R/utils.R index 81e93e2..3d6a54e 100644 --- a/R/utils.R +++ b/R/utils.R @@ -26,7 +26,6 @@ vcapply <- function(X, FUN, ...) { vapply(X, FUN, character(1), ...) } - assert_scalar <- function(x, name = deparse(substitute(x))) { if (length(x) != 1) { stop(sprintf("'%s' must be a scalar", name), call. = FALSE) @@ -116,6 +115,25 @@ assert_probably_storr_driver <- function(x, name = deparse(substitute(x))) { invisible(x) } +assert_custom_mangler <- function(mangler, mangle_key) { + if (is.null(mangler)) { + stop( + sprintf("Mangler '%s' not registered. ", mangle_key), + "Registered it with register_mangler().", + call. = FALSE + ) + } + if (!identical(mangler$name, mangle_key)) { + stop( + sprintf( + "Registered key mangler '%s' conflicts with mangle_key ('%s')", + mangler$name, mangle_key + ), + call. = FALSE + ) + } +} + match_value <- function(x, choices, name = deparse(substitute(x))) { assert_scalar_character(x, name) @@ -179,3 +197,14 @@ file_size <- function(...) { prompt_ask_yes_no <- function(reason) { utils::menu(c("no", "yes"), FALSE, title = reason) == 2 # nocov } + +use_no_mangler <- function(mangle_key) { + is.null(mangle_key) || + identical(mangle_key, FALSE) || + identical(mangle_key, "none") +} + +use_base64_mangler <- function(mangle_key) { + identical(mangle_key, TRUE) || + identical(mangle_key, "base64") +} diff --git a/man/register_mangler.Rd b/man/register_mangler.Rd new file mode 100644 index 0000000..fd6fd44 --- /dev/null +++ b/man/register_mangler.Rd @@ -0,0 +1,53 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/driver_rds.R +\name{register_mangler} +\alias{register_mangler} +\title{Register a key mangler} +\usage{ +register_mangler(name, encode, decode, overwrite = FALSE) +} +\arguments{ +\item{name}{character scalar, name of the mangler} + +\item{encode}{function to encode keys. Must have arguments \code{x} +and \code{pad}.} + +\item{decode}{function to decode keys. Must have arguments \code{x} +and \code{error}} + +\item{overwrite}{logical, whether to overwrite a previously +registered mangler.} +} +\value{ +nothing +} +\description{ +Define custom functinons for mangling \code{storr_rds()} keys. +} +\details{ +\code{storr_rds(mangle_key = "base64")} encodes keys using base64 +encoding. This precaution ensures that the names of key files +do not have illegal character names such as ":". However, +base 64 encoding can be slow for some applications, +so you have the option of setting your own custom key mangler. +First, create functions to encode and decode keys and register them +with \code{\link{register_mangler}()}. +Next, set \code{mangle_key} equal to the \code{name} +you gave \code{\link{register_mangler}()}. +} +\examples{ +register_mangler( + "test_mangler", + encode = function (x, pad) { + paste0("x_", x) + }, + decode = function(x, error) { + substr(x, start = 3, stop = 1e7) + } +) +s <- storr_rds(tempfile(), mangle_key = "test_mangler") +s$set("a", 1) +s$get("a") +list.files(file.path(s$driver$path, "keys", s$default_namespace)) +options(storr_mangler = NULL) +} diff --git a/man/storr_rds.Rd b/man/storr_rds.Rd index 4e46782..738137f 100644 --- a/man/storr_rds.Rd +++ b/man/storr_rds.Rd @@ -5,8 +5,9 @@ \alias{driver_rds} \title{rds object cache driver} \usage{ -storr_rds(path, compress = NULL, mangle_key = NULL, mangle_key_pad = NULL, - hash_algorithm = NULL, default_namespace = "objects") +storr_rds(path, compress = NULL, mangle_key = NULL, + mangle_key_pad = NULL, hash_algorithm = NULL, + default_namespace = "objects") driver_rds(path, compress = NULL, mangle_key = NULL, mangle_key_pad = NULL, hash_algorithm = NULL) @@ -19,8 +20,11 @@ might be nice for persistent application data.} \item{compress}{Compress the generated file? This saves a small amount of space for a reasonable amount of time.} -\item{mangle_key}{Mangle keys? If TRUE, then the key is encoded -using base64 before saving to the filesystem. See Details.} +\item{mangle_key}{Mangle keys? If \code{"base64"}, +then the key is encoded using base64 before saving to the filesystem. +If \code{"none"}, then keys are not encoded. If some other string, +\code{storr} will use a custom key mangler set up with +\code{\link{register_mangler}()}. See Details.} \item{mangle_key_pad}{Logical indicating if the filenames created when using \code{mangle_key} should also be "padded" with the @@ -63,6 +67,14 @@ not safe to use the same path for a storr with and without mangling. So once an rds storr has been created its "mangledness" is set. Using \code{mangle_key = NULL} uses whatever mangledness exists (or no mangledness if creating a new storr). + +If base64 encoding does not meet your use case +(e.g. if it is too slow) then you can supply custom functions +to encode and decode the keys. +First, register your encoder and decoder functions +with \code{\link{register_mangler}()}. +Next, set \code{mangle_key} equal to the \code{name} +you gave \code{\link{register_mangler}()}. } \section{Corrupt keys}{ @@ -70,9 +82,11 @@ exists (or no mangledness if creating a new storr). Some file synchronisation utilities like dropbox can create file that confuse an rds storr (e.g., \code{"myobject (Someone's conflicted copy)"}. If -\code{mangle_key} is \code{FALSE} these cannot be detected but at +\code{mangle_key} is \code{"none"}, +these cannot be detected but at the same time are not a real problem for storr. However, if -\code{mangle_key} is \code{TRUE} and keys are base64 encoded then +\code{mangle_key} is \code{"base64"} +and keys are base64 encoded then these conflicted copies can break parts of storr. If you see a warning asking you to deal with these files, please diff --git a/tests/testthat/helper-storr.R b/tests/testthat/helper-storr.R index 02516ee..895f627 100644 --- a/tests/testthat/helper-storr.R +++ b/tests/testthat/helper-storr.R @@ -45,3 +45,28 @@ with_options <- function(opts, code) { on.exit(options(oo)) force(code) } + +test_mangler <- list( + name = "test_mangler", + encode = function(x, pad) { + paste0("test_mangler_", x) + }, + decode = function(x, error) { + gsub(pattern = "^test_mangler_", replacement = "", x = x) + } +) + +test_key <- function(s, raw, encoded) { + key_file <- file.path( + s$driver$path, + "keys", + s$default_namespace, + encoded + ) + testthat::expect_false(file.exists(key_file)) + s$set(raw, "x") + testthat::expect_true(file.exists(key_file)) + testthat::expect_equal(s$get(raw), "x") + testthat::expect_true(s$exists(raw)) + testthat::expect_true(raw %in% s$list()) +} diff --git a/tests/testthat/test-custom-manglers.R b/tests/testthat/test-custom-manglers.R new file mode 100644 index 0000000..c21e58f --- /dev/null +++ b/tests/testthat/test-custom-manglers.R @@ -0,0 +1,98 @@ +context("test-custom-manglers") + +test_that("ignore registered mangler on a new storr", { + on.exit(options(storr_mangler = NULL)) + do.call(register_mangler, test_mangler) + s <- storr_rds(tempfile(), mangle_key = "none") + test_key(s, "a", "a") +}) + +test_that("use registered mangler on a new storr", { + on.exit(options(storr_mangler = NULL)) + do.call(register_mangler, test_mangler) + s <- storr_rds(tempfile(), mangle_key = "test_mangler") + test_key(s, "a", "test_mangler_a") +}) + +test_that("recover an old custom mangler", { + on.exit(options(storr_mangler = NULL)) + do.call(register_mangler, test_mangler) + s <- storr_rds(tempfile(), mangle_key = "test_mangler") + test_key(s, "a", "test_mangler_a") + s <- storr_rds(s$driver$path, mangle_key = "test_mangler") + expect_equal(s$get("a"), "x") + test_key(s, "b", "test_mangler_b") + s <- storr_rds(s$driver$path) + expect_equal(s$get("a"), "x") + expect_equal(s$get("b"), "x") + test_key(s, "c", "test_mangler_c") +}) + +test_that("unregistered mangler and a new storr", { + expect_null(getOption("storr_mangler")) + expect_error( + storr_rds(tempfile(), mangle_key = "test_mangler"), + regex = "not registered" + ) +}) + +test_that("mangle conflicts with mangle_key a new storr", { + on.exit(options(storr_mangler = NULL)) + do.call(register_mangler, test_mangler) + expect_error( + storr_rds(tempfile(), mangle_key = "test_mangler_2"), + regex = "conflicts with mangle_key" + ) +}) + +test_that("overwriting registered manglers", { + on.exit(options(storr_mangler = NULL)) + do.call(register_mangler, test_mangler) + test_mangler_2 <- test_mangler + test_mangler_2$name <- "test_mangler_2" + do.call(register_mangler, test_mangler_2) + s <- storr_rds(tempfile(), mangle_key = "test_mangler") + test_key(s, "a", "test_mangler_a") + test_mangler_2$overwrite = TRUE + expect_warning( + do.call(register_mangler, test_mangler_2), + regex = "Overwriting existing key mangler" + ) + expect_error( + storr_rds(tempfile(), mangle_key = "test_mangler"), + regex = "conflicts with mangle_key" + ) + s <- storr_rds(tempfile(), mangle_key = "test_mangler_2") + test_key(s, "a", "test_mangler_a") +}) + +test_that("new mangler global option conflicts with old stored mangler", { + on.exit(options(storr_mangler = NULL)) + do.call(register_mangler, test_mangler) + s <- storr_rds(tempfile(), mangle_key = "test_mangler") + do.call( + register_mangler, + list(name = "test_mangler_2", encode = I, decode = I) + ) + test_key(s, "a", "test_mangler_a") +}) + +test_that("new custom mangler conflicts with old stored one", { + on.exit(options(storr_mangler = NULL)) + for (mangle_key in c("none", "base64", "test_mangler")) { + if (identical(mangle_key, "test_mangler")) { + do.call(register_mangler, test_mangler) + } + s <- storr_rds(tempfile(), mangle_key = mangle_key) + with_options(list(storr_mangler = test_mangler), { + expect_error( + s <- storr_rds(s$driver$path, mangle_key = "test_mangler_2"), + regex = "Incompatible value for mangle_key" + ) + }) + } + expect_error( + s <- storr_rds(s$driver$path, mangle_key = TRUE), + regex = "Incompatible value for mangle_key" + ) +})