diff --git a/DESCRIPTION b/DESCRIPTION index 306def7..410a0f3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -32,5 +32,5 @@ Suggests: rbenchmark, testthat (>= 1.0.0) VignetteBuilder: knitr -RoxygenNote: 6.1.1 +RoxygenNote: 7.0.0 Encoding: UTF-8 diff --git a/NEWS.md b/NEWS.md index b5b0ab6..72e8614 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,7 @@ ## storr 1.2.2 (2018-??-??) * Speed up the `$get_hash()` method of RDS drivers using C code and traits (#96, #98, @wlandau). +* Add a new `use_scratch_keys` argument to `driver_rds()` and `storr_rds()`. Allows users to bypass the `scratch` directory for keys. Speeds up processing on some file systems (#116, @wlandau). ## storr 1.2.1 (2018-10-18) diff --git a/R/driver_rds.R b/R/driver_rds.R index 5a30f59..7a14f5e 100644 --- a/R/driver_rds.R +++ b/R/driver_rds.R @@ -74,6 +74,11 @@ ##' \code{\link{digest}}. If not given, then we will default to ##' "md5". ##' +##' @param use_scratch_keys Logical, whether to create key files +##' in a scratch directory before moving them to their final destinations. +##' Set to \code{TRUE} to ensure atomic reads and writes for keys, or set to +##' \code{FALSE} to increase speed on some platforms (namely Windows). +##' ##' @param default_namespace Default namespace (see ##' \code{\link{storr}}). ##' @export @@ -110,16 +115,17 @@ ##' st2$destroy() storr_rds <- function(path, compress = NULL, mangle_key = NULL, mangle_key_pad = NULL, hash_algorithm = NULL, - default_namespace = "objects") { - storr(driver_rds(path, compress, mangle_key, mangle_key_pad, hash_algorithm), + default_namespace = "objects", use_scratch_keys = TRUE) { + storr(driver_rds(path, compress, mangle_key, mangle_key_pad, hash_algorithm, use_scratch_keys), default_namespace) } ##' @export ##' @rdname storr_rds driver_rds <- function(path, compress = NULL, mangle_key = NULL, - mangle_key_pad = NULL, hash_algorithm = NULL) { - R6_driver_rds$new(path, compress, mangle_key, mangle_key_pad, hash_algorithm) + mangle_key_pad = NULL, hash_algorithm = NULL, + use_scratch_keys = TRUE) { + R6_driver_rds$new(path, compress, mangle_key, mangle_key_pad, hash_algorithm, use_scratch_keys) } R6_driver_rds <- R6::R6Class( @@ -129,6 +135,7 @@ R6_driver_rds <- R6::R6Class( ## This needs sorting before anyone writes their own driver! path = NULL, path_scratch = NULL, + use_scratch_keys = NULL, compress = NULL, mangle_key = NULL, mangle_key_pad = NULL, @@ -137,16 +144,16 @@ R6_driver_rds <- R6::R6Class( traits = list(accept = "raw", throw_missing = TRUE), initialize = function(path, compress, mangle_key, mangle_key_pad, - hash_algorithm) { + hash_algorithm, use_scratch_keys) { is_new <- !file.exists(file.path(path, "config")) dir_create(path) dir_create(file.path(path, "data")) dir_create(file.path(path, "keys")) dir_create(file.path(path, "config")) self$path <- normalizePath(path, mustWork = TRUE) - self$path_scratch <- file.path(self$path, "scratch") dir_create(self$path_scratch) + self$use_scratch_keys <- use_scratch_keys ## This is a bit of complicated dancing around to mantain ## backward compatibility while allowing better defaults in @@ -187,7 +194,7 @@ R6_driver_rds <- R6::R6Class( } self$hash_algorithm <- driver_rds_config(path, "hash_algorithm", hash_algorithm, "md5", TRUE) - + self$hash_length <- nchar( digest::digest(as.raw(0x00), self$hash_algorithm, serialize = FALSE)) }, @@ -207,7 +214,7 @@ R6_driver_rds <- R6::R6Class( set_hash = function(key, namespace, hash) { dir_create(self$name_key("", namespace)) write_lines(hash, self$name_key(key, namespace), - scratch_dir = self$path_scratch) + scratch_dir = self$path_scratch, use_scratch_keys = self$use_scratch_keys) }, get_object = function(hash) { diff --git a/R/hash.R b/R/hash.R index 80fb64e..a3bd5f0 100644 --- a/R/hash.R +++ b/R/hash.R @@ -85,8 +85,8 @@ write_serialized_rds <- function(value, filename, compress, ## delete the file *after* that. try_write_serialized_rds <- function(value, filename, compress, scratch_dir = NULL, long = 2^31 - 2) { - tmp <- tempfile(tmpdir = scratch_dir %||% tempdir()) + tmp <- tempfile(tmpdir = scratch_dir %||% tempdir()) con <- (if (compress) gzfile else file)(tmp, "wb") needs_close <- TRUE on.exit(if (needs_close) close(con), add = TRUE) @@ -106,9 +106,11 @@ try_write_serialized_rds <- function(value, filename, compress, ## Same pattern for write_lines. The difference is that this will ## delete the key on a failed write (otherwise there's a copy ## involved) -write_lines <- function(text, filename, ..., scratch_dir = NULL) { +write_lines <- function(text, filename, ..., + scratch_dir = NULL, use_scratch_keys = TRUE) { withCallingHandlers( - try_write_lines(text, filename, ..., scratch_dir = scratch_dir), + try_write_lines(text, filename, ..., scratch_dir = scratch_dir, + use_scratch_keys = use_scratch_keys), error = function(e) unlink(filename)) } @@ -116,10 +118,12 @@ write_lines <- function(text, filename, ..., scratch_dir = NULL) { ## This implements write-then-move for writeLines, which gives us ## atomic writes and rewrites. If 'scratch' is on the same filesystem ## as dirname(filename), then the os's rename is atomic -try_write_lines <- function(text, filename, ..., scratch_dir) { - tmp <- tempfile(tmpdir = scratch_dir %||% tempdir()) +try_write_lines <- function(text, filename, ..., scratch_dir, use_scratch_keys) { + tmp <- ifelse(use_scratch_keys, tempfile(tmpdir = scratch_dir %||% tempdir()), filename) writeLines(text, tmp, ...) ## Not 100% necessary and strictly makes this nonatomic - unlink(filename) - file.rename(tmp, filename) + if (use_scratch_keys) { + unlink(filename) + file.rename(tmp, filename) + } } diff --git a/man/storr.Rd b/man/storr.Rd index 4d92b76..116857a 100644 --- a/man/storr.Rd +++ b/man/storr.Rd @@ -11,7 +11,7 @@ storr(driver, default_namespace = "objects") \item{default_namespace}{Default namespace to store objects in. By default \code{"objects"} is used, but this might be useful to -have two diffent \code{storr} objects pointing at the same +have two different \code{storr} objects pointing at the same underlying storage, but storing things in different namespaces.} } \description{ diff --git a/man/storr_dbi.Rd b/man/storr_dbi.Rd index efba7b3..39ffe9a 100644 --- a/man/storr_dbi.Rd +++ b/man/storr_dbi.Rd @@ -5,11 +5,24 @@ \alias{driver_dbi} \title{DBI storr driver} \usage{ -storr_dbi(tbl_data, tbl_keys, con, args = NULL, binary = NULL, - hash_algorithm = NULL, default_namespace = "objects") - -driver_dbi(tbl_data, tbl_keys, con, args = NULL, binary = NULL, - hash_algorithm = NULL) +storr_dbi( + tbl_data, + tbl_keys, + con, + args = NULL, + binary = NULL, + hash_algorithm = NULL, + default_namespace = "objects" +) + +driver_dbi( + tbl_data, + tbl_keys, + con, + args = NULL, + binary = NULL, + hash_algorithm = NULL +) } \arguments{ \item{tbl_data}{Name for the table that maps hashes to values} diff --git a/man/storr_environment.Rd b/man/storr_environment.Rd index 101c346..7881073 100644 --- a/man/storr_environment.Rd +++ b/man/storr_environment.Rd @@ -5,8 +5,11 @@ \alias{driver_environment} \title{Environment object cache driver} \usage{ -storr_environment(envir = NULL, hash_algorithm = NULL, - default_namespace = "objects") +storr_environment( + envir = NULL, + hash_algorithm = NULL, + default_namespace = "objects" +) driver_environment(envir = NULL, hash_algorithm = NULL) } diff --git a/man/storr_rds.Rd b/man/storr_rds.Rd index a5c4e23..7e4ca56 100644 --- a/man/storr_rds.Rd +++ b/man/storr_rds.Rd @@ -5,12 +5,24 @@ \alias{driver_rds} \title{rds object cache driver} \usage{ -storr_rds(path, compress = NULL, mangle_key = NULL, - mangle_key_pad = NULL, hash_algorithm = NULL, - default_namespace = "objects") - -driver_rds(path, compress = NULL, mangle_key = NULL, - mangle_key_pad = NULL, hash_algorithm = NULL) +storr_rds( + path, + compress = NULL, + mangle_key = NULL, + mangle_key_pad = NULL, + hash_algorithm = NULL, + default_namespace = "objects", + use_scratch_keys = TRUE +) + +driver_rds( + path, + compress = NULL, + mangle_key = NULL, + mangle_key_pad = NULL, + hash_algorithm = NULL, + use_scratch_keys = TRUE +) } \arguments{ \item{path}{Path for the store. \code{tempdir()} is a good choice @@ -39,6 +51,11 @@ values are "md5", "sha1", and others supported by \item{default_namespace}{Default namespace (see \code{\link{storr}}).} + +\item{use_scratch_keys}{Logical, whether to create key files +in a scratch directory before moving them to their final destinations. +Set to \code{TRUE} to ensure atomic reads and writes for keys, or set to +\code{FALSE} to increase speed on some platforms (namely Windows).} } \description{ Object cache driver that saves objects using R's native diff --git a/tests/testthat/test-driver-rds.R b/tests/testthat/test-driver-rds.R index eaf6063..517f413 100644 --- a/tests/testthat/test-driver-rds.R +++ b/tests/testthat/test-driver-rds.R @@ -381,3 +381,10 @@ test_that("avoid race condition when writing in parallel", { ok <- vlapply(1:10, function(i) racy_write()) expect_true(all(ok)) }) + +test_that("use_scratch_keys = FALSE (#116)", { + st <- storr_rds(tempfile(), use_scratch_keys = FALSE) + st$set("a", "a") + expect_equal(st$get("a"), "a") + expect_false(st$driver$use_scratch_keys) +})