Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Suggests:
DBI (>= 0.6),
RSQLite (>= 1.1-2),
RPostgres,
fst,
knitr,
mockr,
parallel,
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
## storr 1.2.2 (2018-??-??)

* Speed up the `$get_hash()` method of RDS drivers using C code and traits (#96, #98, @wlandau).
* Add support for `LZ4` and `ZSTD` compression (via `fst`) for RDS drivers (#110, #111, @wlandau, @MarcusKlik).

## storr 1.2.1 (2018-10-18)

Expand Down
71 changes: 59 additions & 12 deletions R/driver_rds.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,28 @@
##' might be nice for persistent application data.
##'
##' @param compress Compress the generated file? This saves a small
##' amount of space for a reasonable amount of time.
##' amount of space for a reasonable amount of time. Possible values:
##' \describe{
##' \item{"none"}{No compression.}
##' \item{"gzip"}{gzip compression with \code{base::gzfile()} (default).}
##' \item{"lz4"}{lz4 compression via \code{fst::compress_fst()}. Very fast, but lower quality than zstd.}
##' \item{"zstd"}{zstd compression via \code{fst::compress_fst()}. Higher quality but slower than lz4.}
##' }
##' \code{"lz4"} is generally much faster than the other compression methods.
##' \code{"lz4"} and \code{"zstd"} require the \code{fst} package,
##' and they both use more memory (RAM) than the other choices.
##'
##' To preserve compatibility with earlier \code{storr}s,
##' \code{compress} can also be logical:
##' \code{TRUE} for \code{"gzfile"} and \code{FALSE} for \code{"none"}.
##' However, these values are not interchangeable for existings \code{storr}s.
##' For example, if you create a \code{storr} with \code{compress = TRUE},
##' you must continue to use \code{compress = TRUE}
##' and not \code{compress = "gzfile"} when you recover it later.
##'
##' @param compression Numeric compression factor for \code{fst::compress_fst()}.
##' Between 0 and 100: 0 for lowest compression, 100 for maximum compression.
##' Only applies to \code{compress = "lz4"} and \code{compress = "zstd"}.
##'
##' @param mangle_key Mangle keys? If TRUE, then the key is encoded
##' using base64 before saving to the filesystem. See Details.
Expand All @@ -76,6 +97,7 @@
##'
##' @param default_namespace Default namespace (see
##' \code{\link{storr}}).
##'
##' @export
##' @examples
##'
Expand Down Expand Up @@ -108,18 +130,18 @@
##' # Clean up the two storrs:
##' st$destroy()
##' st2$destroy()
storr_rds <- function(path, compress = NULL, mangle_key = NULL,
storr_rds <- function(path, compress = NULL, compression = NULL, mangle_key = NULL,
mangle_key_pad = NULL, hash_algorithm = NULL,
default_namespace = "objects") {
storr(driver_rds(path, compress, mangle_key, mangle_key_pad, hash_algorithm),
storr(driver_rds(path, compress, compression, mangle_key, mangle_key_pad, hash_algorithm),
default_namespace)
}

##' @export
##' @rdname storr_rds
driver_rds <- function(path, compress = NULL, mangle_key = NULL,
driver_rds <- function(path, compress = NULL, compression = NULL, mangle_key = NULL,
mangle_key_pad = NULL, hash_algorithm = NULL) {
R6_driver_rds$new(path, compress, mangle_key, mangle_key_pad, hash_algorithm)
R6_driver_rds$new(path, compress, compression, mangle_key, mangle_key_pad, hash_algorithm)
}

R6_driver_rds <- R6::R6Class(
Expand All @@ -130,13 +152,14 @@ R6_driver_rds <- R6::R6Class(
path = NULL,
path_scratch = NULL,
compress = NULL,
compression = NULL,
mangle_key = NULL,
mangle_key_pad = NULL,
hash_algorithm = NULL,
hash_length = NULL,
traits = list(accept = "raw", throw_missing = TRUE),

initialize = function(path, compress, mangle_key, mangle_key_pad,
initialize = function(path, compress, compression, mangle_key, mangle_key_pad,
hash_algorithm) {
is_new <- !file.exists(file.path(path, "config"))
dir_create(path)
Expand All @@ -154,9 +177,10 @@ R6_driver_rds <- R6::R6Class(
## future versions of driver_rds can use to patch, warn or
## change behaviour with older versions of the storr.
if (!is_new && !file.exists(driver_rds_config_file(path, "version"))) {
write_if_missing("1.0.1", driver_rds_config_file(path, "version"))
write_if_missing("1.2.2", driver_rds_config_file(path, "version"))
write_if_missing("TRUE", driver_rds_config_file(path, "mangle_key_pad"))
write_if_missing("TRUE", driver_rds_config_file(path, "compress"))
write_if_missing("gzip", driver_rds_config_file(path, "compress"))
write_if_missing("0", driver_rds_config_file(path, "compression"))
write_if_missing("md5", driver_rds_config_file(path, "hash_algorithm"))
}
## Then write out the version number:
Expand All @@ -177,10 +201,15 @@ R6_driver_rds <- R6::R6Class(
FALSE, TRUE)

if (!is.null(compress)) {
assert_scalar_logical(compress)
compress <- as.character(compress)
}

self$compress <- driver_rds_config(path, "compress", compress,
TRUE, FALSE)
"TRUE", TRUE)
self$compress <- parse_rds_compress(self$compress)

self$compression <- driver_rds_config(path, "compression", compression,
0, FALSE)

if (!is.null(hash_algorithm)) {
assert_scalar_character(hash_algorithm)
Expand Down Expand Up @@ -211,15 +240,20 @@ R6_driver_rds <- R6::R6Class(
},

get_object = function(hash) {
read_rds(self$name_hash(hash))
out <- read_rds(self$name_hash(hash))
if (self$compress %in% c("lz4", "zstd")) {
out <- fst::decompress_fst(out)
out <- unserialize(out)
}
out
},

set_object = function(hash, value) {
## NOTE: this takes advantage of having the serialized value
## already and avoids seralising twice.
assert_raw(value)
write_serialized_rds(value, self$name_hash(hash), self$compress,
self$path_scratch)
self$compression, self$path_scratch)
},

exists_hash = function(key, namespace) {
Expand Down Expand Up @@ -452,3 +486,16 @@ See 'Corrupt keys' within ?storr_rds for how to proceed" -> fmt
message(sprintf(fmt, length(files), namespace, path, files))
corrupt_notices[[path]] <- now
}

parse_rds_compress <- function(compress) {
if (identical(compress, "TRUE")) {
"gzip"
} else if (identical(compress, "FALSE")) {
"none"
} else if (compress %in% c("lz4", "zstd")) {
loadNamespace("fst")
compress
} else {
compress
}
}
23 changes: 17 additions & 6 deletions R/hash.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,22 +72,35 @@ serialize_to_raw <- function(x, ascii, xdr) {
## been successful, otherwise the container will claim existence for
## an object which cannot be retrieved later on, causing havoc
## upstream.
write_serialized_rds <- function(value, filename, compress,
write_serialized_rds <- function(value, filename, compress, compression,
scratch_dir = NULL, long = 2^31 - 2) {
withCallingHandlers(
try_write_serialized_rds(value, filename, compress, scratch_dir, long),
try_write_serialized_rds(value, filename, compress, compression, scratch_dir, long),
error = function(e) unlink(filename))
}


## The split here helps keep the order really consistent; we will
## close the connection on exit from try_write_serialized_rds and
## delete the file *after* that.
try_write_serialized_rds <- function(value, filename, compress,
try_write_serialized_rds <- function(value, filename, compress, compression,
scratch_dir = NULL, long = 2^31 - 2) {
tmp <- tempfile(tmpdir = scratch_dir %||% tempdir())
if (compress %in% c("lz4", "zstd")) {
write_tmp_fst(value, tmp, compress, compression)
} else {
write_tmp_default(value, tmp, compress, scratch_dir, long)
}
file.rename(tmp, filename)
}

con <- (if (compress) gzfile else file)(tmp, "wb")
write_tmp_fst <- function(value, filename, compress, compression) {
saveRDS(fst::compress_fst(value, toupper(compress), compression),
filename, compress = FALSE)
}

write_tmp_default <- function(value, filename, compress, scratch_dir, long) {
con <- (if (identical(compress, "gzip")) gzfile else file)(filename, "wb")
needs_close <- TRUE
on.exit(if (needs_close) close(con), add = TRUE)
len <- length(value)
Expand All @@ -99,10 +112,8 @@ try_write_serialized_rds <- function(value, filename, compress,
}
close(con)
needs_close <- FALSE
file.rename(tmp, filename)
}


## Same pattern for write_lines. The difference is that this will
## delete the key on a failed write (otherwise there's a copy
## involved)
Expand Down
1 change: 0 additions & 1 deletion R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,6 @@ assert_probably_storr_driver <- function(x, name = deparse(substitute(x))) {
invisible(x)
}


match_value <- function(x, choices, name = deparse(substitute(x))) {
assert_scalar_character(x, name)
i <- match(x, choices)
Expand Down
2 changes: 1 addition & 1 deletion man/storr.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 26 additions & 5 deletions man/storr_rds.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

56 changes: 48 additions & 8 deletions tests/testthat/test-driver-rds.R
Original file line number Diff line number Diff line change
Expand Up @@ -143,25 +143,40 @@ test_that("large vector support", {
})

test_that("compression support", {
skip_if_not_installed("fst")

## some data that will likely compress very well:
data <- rep(1:10, each = 500)

st1 <- storr_rds(tempfile(), TRUE)
st2 <- storr_rds(tempfile(), FALSE)
st1 <- storr_rds(tempfile(), "gzip")
st2 <- storr_rds(tempfile(), "none")
st3 <- storr_rds(tempfile(), "lz4")
st4 <- storr_rds(tempfile(), "zstd")

on.exit({
st1$destroy()
st2$destroy()
st3$destroy()
st4$destroy()
})

h1 <- st1$set("data", data)
h2 <- st2$set("data", data)
h1 <- st1$set("data", data, use_cache = FALSE)
h2 <- st2$set("data", data, use_cache = FALSE)
h3 <- st3$set("data", data, use_cache = FALSE)
h4 <- st4$set("data", data, use_cache = FALSE)

expect_identical(h1, h2)
expect_identical(h1, h2, h3, h4)
expect_gt(file.size(st2$driver$name_hash(h2)),
file.size(st1$driver$name_hash(h1)))

expect_identical(st1$get("data"), data)
expect_identical(st2$get("data"), data)
expect_gt(file.size(st2$driver$name_hash(h2)),
file.size(st3$driver$name_hash(h3)))
expect_gt(file.size(st2$driver$name_hash(h2)),
file.size(st3$driver$name_hash(h4)))

expect_identical(st1$get("data", use_cache = FALSE), data)
expect_identical(st2$get("data", use_cache = FALSE), data)
expect_identical(st3$get("data", use_cache = FALSE), data)
expect_identical(st4$get("data", use_cache = FALSE), data)
})

test_that("backward compatibility", {
Expand All @@ -181,6 +196,31 @@ test_that("backward compatibility", {
class = "ConfigError")
})

test_that("backward compatibility: compression", {
## In version 1.2.1 and earlier, the compress argument was a logical scalar.
path <- copy_to_tmp("v1.2.1_compress_TRUE")
st <- storr_rds(path)
expect_equal(st$list(), "key")
expect_equal(st$get("key"), "value")
expect_silent(st <- storr_rds(path, compress = TRUE))
for (compress in list("none", "gzip", "lz4", "zstd", FALSE)) {
expect_error(st <- storr_rds(path, compress = compress),
"Incompatible value for compress",
class = "ConfigError")
}

path <- copy_to_tmp("v1.2.1_compress_FALSE")
st <- storr_rds(path)
expect_equal(st$list(), "key")
expect_equal(st$get("key"), "value")
expect_silent(st <- storr_rds(path, compress = FALSE))
for (compress in list("none", "gzip", "lz4", "zstd", TRUE)) {
expect_error(st <- storr_rds(path, compress = compress),
"Incompatible value for compress",
class = "ConfigError")
}
})

test_that("mangledness padding backward compatibility", {
## In version 1.0.1 and earlier, mangling was always padded
path <- copy_to_tmp("v1.0.1_mangled")
Expand Down
1 change: 1 addition & 0 deletions tests/testthat/v1.2.1_compress_FALSE/config/compress
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
FALSE
1 change: 1 addition & 0 deletions tests/testthat/v1.2.1_compress_FALSE/config/hash_algorithm
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
md5
1 change: 1 addition & 0 deletions tests/testthat/v1.2.1_compress_FALSE/config/mangle_key
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
FALSE
1 change: 1 addition & 0 deletions tests/testthat/v1.2.1_compress_FALSE/config/mangle_key_pad
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
FALSE
1 change: 1 addition & 0 deletions tests/testthat/v1.2.1_compress_FALSE/config/version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1.2.1
Binary file not shown.
1 change: 1 addition & 0 deletions tests/testthat/v1.2.1_compress_FALSE/keys/objects/key
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
6abd4188389195dfdfa9a37665e4cca6
1 change: 1 addition & 0 deletions tests/testthat/v1.2.1_compress_TRUE/config/compress
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
TRUE
1 change: 1 addition & 0 deletions tests/testthat/v1.2.1_compress_TRUE/config/hash_algorithm
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
md5
1 change: 1 addition & 0 deletions tests/testthat/v1.2.1_compress_TRUE/config/mangle_key
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
FALSE
1 change: 1 addition & 0 deletions tests/testthat/v1.2.1_compress_TRUE/config/mangle_key_pad
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
FALSE
1 change: 1 addition & 0 deletions tests/testthat/v1.2.1_compress_TRUE/config/version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1.2.1
Binary file not shown.
1 change: 1 addition & 0 deletions tests/testthat/v1.2.1_compress_TRUE/keys/objects/key
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
6abd4188389195dfdfa9a37665e4cca6