diff --git a/.Rbuildignore b/.Rbuildignore index 91114bf..112ad26 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,2 +1,3 @@ ^.*\.Rproj$ ^\.Rproj\.user$ +^\.travis\.yml$ diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..f294d6b --- /dev/null +++ b/.travis.yml @@ -0,0 +1,24 @@ +language: r +cache: packages + +# Departure from usual setup because we need docker for testing +sudo: true +service: docker + +# Make tests more reliable by getting this up early +before_script: + - docker pull sickp/alpine-sshd:latest + - docker build tests/testthat/sshd + +env: + - STORR_REMOTE_USE_SSHD=true + +# Not 'addon' based because we're on sudo infrastructure +apt_packages: + - libssh-dev + +# Test coverage +r_packages: + - covr +after_success: + - Rscript -e 'covr::codecov()' diff --git a/DESCRIPTION b/DESCRIPTION index 013acbd..48d061d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -6,10 +6,13 @@ Description: This package provides remote back ends to the wonderful storr key/val package. Right now AWS S3 is implemented, but we hope it will be easy for others to contribute other backends. Depends: R (>= 3.1.0) -License: What license is it under? +License: MIT + file LICENSE Encoding: UTF-8 LazyData: true +Imports: + storr (>= 1.2.0) Suggests: aws.s3, - storr + ssh, + testthat RoxygenNote: 6.0.1 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..7afe5ed --- /dev/null +++ b/LICENSE @@ -0,0 +1,2 @@ +YEAR: 2018 +COPYRIGHT HOLDER: Ben Gready diff --git a/NAMESPACE b/NAMESPACE index 95bd585..a7121a0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,3 +1,6 @@ # Generated by roxygen2: do not edit by hand -export(storr_rds_s3) +export(driver_s3) +export(driver_ssh) +export(storr_s3) +export(storr_ssh) diff --git a/R/driver_rds_s3.R b/R/driver_rds_s3.R deleted file mode 100644 index d9c43d4..0000000 --- a/R/driver_rds_s3.R +++ /dev/null @@ -1,303 +0,0 @@ -##' @title S3 backend for rds object cache driver -##' @param bucket Name of the S3 bucket for which you wish to create of connect -##' store -##' @inheritParams storr::storr_rds -##' @export -##' @rdname storr_rds_s3 -storr_rds_s3 <- function(bucket, path, compress = NULL, mangle_key = NULL, - mangle_key_pad = NULL, hash_algorithm = NULL, - default_namespace = "objects") { - - storr::storr(driver_rds_s3(bucket, path, compress, mangle_key, mangle_key_pad, hash_algorithm), - default_namespace) -} - -driver_rds_s3 <- function(bucket, path, compress = NULL, mangle_key = NULL, - mangle_key_pad = NULL, hash_algorithm = NULL) { - R6_driver_rds_s3$new(bucket, path, compress, mangle_key, mangle_key_pad, hash_algorithm) -} - -R6_driver_rds_s3 <- R6::R6Class( - "driver_rds_s3", - public = list( - ## TODO: things like hash_algorithm: do they belong in traits? - ## This needs sorting before anyone writes their own driver! - bucket = NULL, - path = NULL, - compress = NULL, - mangle_key = NULL, - mangle_key_pad = NULL, - hash_algorithm = NULL, - traits = list(accept = "raw"), - - initialize = function(bucket, path, compress, mangle_key, mangle_key_pad, - hash_algorithm) { - - is_new <- !s3_object_exists(bucket = bucket, path = file.path(path, "config")) - s3_create_dir(path = path, bucket = bucket) - s3_create_dir(path = file.path(path, "data"), bucket = bucket) - s3_create_dir(path = file.path(path, "keys"), bucket = bucket) - s3_create_dir(path = file.path(path, "config"), bucket = bucket) - self$bucket <- bucket - self$path <- path - - ## This is a bit of complicated dancing around to mantain - ## backward compatibility while allowing better defaults in - ## future versions. I'm writing out a version number here that - ## future versions of driver_rds can use to patch, warn or - ## change behaviour with older versions of the storr. - if (!is_new && !s3_object_exists(path = driver_rds_s3_config_file(path, "version"), bucket = bucket)) { - s3_write_if_missing("1.0.1", bucket = bucket, path = driver_rds_s3_config_file(path, "version")) - s3_write_if_missing("TRUE", bucket = bucket, path = driver_rds_s3_config_file(path, "mangle_key_pad")) - s3_write_if_missing("TRUE", bucket = bucket, path = driver_rds_s3_config_file(path, "compress")) - s3_write_if_missing("md5", bucket = bucket, path = driver_rds_s3_config_file(path, "hash_algorithm")) - } - ## Then write out the version number: - s3_write_if_missing(as.character(packageVersion("storr")), - bucket = bucket, - path = driver_rds_s3_config_file(path, "version")) - - if (!is.null(mangle_key)) { - storr:::assert_scalar_logical(mangle_key) - } - self$mangle_key <- driver_rds_s3_config(bucket, path, "mangle_key", mangle_key, - FALSE, TRUE) - - if (!is.null(mangle_key_pad)) { - storr:::assert_scalar_logical(mangle_key_pad) - } - self$mangle_key_pad <- - driver_rds_s3_config(bucket, path, "mangle_key_pad", mangle_key_pad, - FALSE, TRUE) - - if (!is.null(compress)) { - storr:::assert_scalar_logical(compress) - } - self$compress <- driver_rds_s3_config(bucket, path, "compress", compress, - TRUE, FALSE) - - if (!is.null(hash_algorithm)) { - storr:::assert_scalar_character(hash_algorithm) - } - self$hash_algorithm <- driver_rds_s3_config(bucket, path, "hash_algorithm", - hash_algorithm, "md5", TRUE) - }, - - type = function() { - "s3" - }, - - destroy = function() { - s3_delete_recursive(bucket = self$bucket, path = self$path) - }, - - get_hash = function(key, namespace) { - s3_readLines(path = self$name_key(key, namespace), bucket = self$bucket) - }, - - set_hash = function(key, namespace, hash) { - s3_create_dir(path = self$name_key("", namespace), bucket = self$bucket) - s3_writeLines(text = hash, path = self$name_key(key, namespace), bucket = self$bucket) - #*** should be making use of (or making an equivalent version of) the - #write_lines function within the storr package here (I think it deletes - #file if the write fails) - }, - - get_object = function(hash) { - aws.s3::s3readRDS(object = self$name_hash(hash), bucket = self$bucket) - }, - - set_object = function(hash, value) { - ## NOTE: this takes advantage of having the serialized value - ## already and avoids seralising twice. - storr:::assert_raw(value) - - s3_write_serialized_rds(value = value, - filename = self$name_hash(hash), - bucket = self$bucket, - compress = self$compress) - }, - - exists_hash = function(key, namespace) { - s3_object_exists(self$name_key(key, namespace), bucket = self$bucket) - }, - - exists_object = function(hash) { - s3_object_exists(self$name_hash(hash), bucket = self$bucket) - }, - - del_hash = function(key, namespace) { - #s3_delete_file(bucket = self$bucket, path = self$name_key(key, namespace)) - ## above deletes just one file (s3 key). - ## However it will throw an error if the file we are trying to delete - ## looks like a directory. - ## S3 has no actual notion of directory, we just fake it using "/". As a - ## result, it's possible to get into a muddle. To play it safe, line below - ## can be uncommented to force it to delete just the path given, but throw - ## a warning, if it does look like a directory can also change to - ## if_dir = "del_recursive" to delete the whole directory with a warning. - ## May never actually show up as an issue, this is just a note. - s3_delete_file(bucket = self$bucket, path = self$name_key(key, namespace), if_dir = "del_only_key") - }, - - del_object = function(hash) { - # see above note which also applies here - s3_delete_file(bucket = self$bucket, path = self$name_hash(key, namespace), if_dir = "del_only_key") - }, - - list_hashes = function() { - sub("\\.rds$", "", s3_list_dir(bucket = self$bucket, path = file.path(self$path, "data"))) - }, - - list_namespaces = function() { - s3_list_dir(bucket = self$bucket, path = file.path(self$path, "keys")) - }, - - list_keys = function(namespace) { - ret <- s3_list_dir(bucket = self$bucket, path = file.path(self$path, "keys", namespace)) - if (self$mangle_key) decode64(ret, TRUE) else ret - }, - - name_hash = function(hash) { - if (length(hash) > 0L) { - file.path(self$path, "data", paste0(hash, ".rds")) - } else { - character(0) - } - }, - - name_key = function(key, namespace) { - if (self$mangle_key) { - key <- encode64(key, pad = self$mangle_key_pad) - } - file.path(self$path, "keys", namespace, key) - } - - )) - -## This attempts to check that we are connecting to a storr of -## appropriate mangledness. There's a lot of logic here, but it's -## actually pretty simple in practice and tested in test-driver-rds.R: -## -## if mangle_key is NULL we take the mangledless of the -## existing storr or set up for no mangling. -## -## if mangle_key is not NULL then it is an error if it differs -## from the existing storr's mangledness. -driver_rds_s3_config <- function(bucket, path, name, value, default, must_agree) { - path_opt <- driver_rds_s3_config_file(path, name) - - load_value <- function() { - if (s3_object_exists(bucket, path_opt)) { - value <- s3_readLines(path_opt, bucket) - storage.mode(value) <- storage.mode(default) - } else { - value <- default - } - value - } - - if (is.null(value)) { - value <- load_value() - } else if (must_agree && s3_object_exists(bucket = bucket, path = path_opt)) { - value_prev <- load_value() - if (value != value_prev) { - stop(ConfigError(name, value_prev, value)) - } - } - if (!s3_object_exists(bucket = bucket, path = path_opt)) { - s3_writeLines(text = as.character(value), path = path_opt, bucket = bucket) - } - - value -} - -driver_rds_s3_config_file <- function(path, key) { - file.path(path, "config", key) -} - -s3_write_if_missing <- function(value, bucket, path) { - if (s3_object_exists(bucket, path)) { - s3_writeLines(text, path, bucket) - } -} - -## S3 Helper functions -## -## - -s3_write_serialized_rds <- function(value, filename, bucket, compress) { - # write_serialized_rds(value, self$name_hash(hash), self$compress) - aws.s3::s3write_using(x = value, - FUN = function(v, f) storr:::write_serialized_rds(value = v, filename = f, compress=compress), - object = filename, - bucket = bucket) -} - -s3_create_dir <- function(path, bucket) { - aws.s3::put_folder(folder = path, bucket = bucket) -} - -s3_file_remove <- function(path, bucket) { - - exists <- s3_object_exists(bucket, path) - if (any(exists)) { - objec(path[exists]) - } - invisible(exists) -} - -s3_writeLines <- function(text, path, bucket) { - aws.s3::s3write_using(x = text, FUN = writeLines, object = path, bucket = bucket) -} - -s3_readLines <- function(path, bucket) { - aws.s3::s3read_using(FUN = readLines, object = path, bucket = bucket) -} - -s3_object_exists <- function(bucket, path) { - suppressMessages(aws.s3::head_object(object = path, bucket = bucket)[1]) -} - -s3_list_dir <- function(bucket, path) { - - if(substr(path, nchar(path), nchar(path)) != "/") path = paste0(path, "/") - files_table <- aws.s3::get_bucket_df(bucket = bucket, prefix = path, max = Inf) - keys <- files_table[files_table$Size > 0,]$Key - files <- gsub(pattern = path, replacement = "", x = keys) - split_names <- strsplit(files, "/") - # first element of each split name is the file or directory within path, - # take unique of these, so that directories only appear once - unique(unlist(lapply(split_names, function(x) x[1]))) -} - -s3_delete_recursive <- function(bucket, path, force=FALSE) { - - files <- aws.s3::get_bucket_df(bucket = bucket, prefix = path, max = Inf)[["Key"]] - invisible(lapply(files, function(x) aws.s3::delete_object(x, bucket))) -} - -s3_delete_file <- function(bucket, path, if_dir = c("stop", "del_only_key", "del_recursive")) { - - files <- aws.s3::get_bucket_df(bucket = bucket, prefix = path, max = Inf)[["Key"]] - - if(length(files) > 1){ - if_dir == match.arg(if_dir) # only need this if we get inside this loop - - if(if_dir == "stop"){ - stop("You are trying to delete 1 file, but it looks like it is setup like - a directory") - } else if(if_dir == "del_only_key"){ - warning("You are trying to delete 1 file, but it looks like it is setup - like a directory. Deleted specific path you requested") - invisible(aws.s3::delete_object(object = path, bucket = bucket)) - } else if(if_dir == "del_recursive"){ - warning("You are trying to delete 1 file, but it looks like it is setup - like a directory. Deleting recursively everyting below the path - you specified") - s3_delete_recursive(bucket, path) - } - } else{ - invisible(aws.s3::delete_object(object = path, bucket = bucket)) - } -} diff --git a/R/s3.R b/R/s3.R new file mode 100644 index 0000000..329b6b8 --- /dev/null +++ b/R/s3.R @@ -0,0 +1,163 @@ +##' S3 backend for storr +##' +##' @title S3 backend for rds object cache driver +##' +##' @param bucket Name of the S3 bucket for which you wish to create +##' of connect store +##' +##' @param remote_root Base path on the remote bucket +##' +##' @param ... Additional arguments passed through to +##' \code{storr::driver_rds} +##' +##' @param path_local Optional path to a local cache (see +##' \code{storr::driver_remote}) +##' +##' @param default_namespace Default namespace (see \code{storr::storr}) +##' @export +storr_s3 <- function(bucket, remote_root, ..., path_local = NULL, + default_namespace = "objects") { + dr <- driver_s3(bucket, remote_root, ..., path_local = path_local) + storr::storr(dr, default_namespace) +} + + +##' @export +##' @rdname storr_s3 +driver_s3 <- function(bucket, remote_root, ..., path_local = NULL) { + ops <- s3_file_ops(bucket, remote_root) + storr::driver_remote(ops, ..., path_local = path_local) +} + + +s3_file_ops <- function(bucket, root) { + R6_s3_file_ops$new(bucket, root) +} + + +R6_s3_file_ops <- R6::R6Class( + "s3_file_ops", + + public = list( + bucket = NULL, + + initialize = function(bucket, root) { + self$bucket <- bucket + self$root <- root + }, + + type = function() { + "s3" + }, + + destroy = function() { + ## TODO: not sure if this is right? + self$file_ops$delete_dir(path = self$root) + }, + + create_dir = function(path) { + path_remote <- file.path(self$root, path) + aws.s3::put_folder(folder = path_remote, bucket = self$bucket) + }, + + list_dir = function(path) { + if (substr(path, nchar(path), nchar(path)) != "/") { + path <- paste0(path, "/") + } + path_remote <- file.path(self$root) + files_table <- aws.s3::get_bucket_df(bucket = self$bucket, + prefix = path_root, + max = Inf) + keys <- files_table[files_table$Size > 0,]$Key + ## TODO: this should really be stripped off the front rather + ## than gsub? And probably fixed replacement to avoid + ## regexp-sensitive characters messing things up? + files <- gsub(pattern = path_root, replacement = "", x = keys) + split_names <- strsplit(files, "/") + ## first element of each split name is the file or directory + ## within path, take unique of these, so that directories only + ## appear once + ## + ## TODO: I _think_ I've done the translation from + ## unique/unlist/lapply to unique/vapply here, but I don't + ## really get what is going on. It's quite likely that what you + ## really want is + ## + ## unique(basename(files)) + ## + ## perhaps? + unique(vapply(split_names, function(x) x[[1L]], character(1))) + }, + + exists = function(path) { + path_remote <- file.path(self$root, path) + ## QUESTION: What messages are produced by this? + suppressMessages( + aws.s3::head_object(object = path_remote, bucket = self$bucket)[[1]]) + }, + + exists_dir = function(path) { + ## TODO: If it is possible to test if the remote thing is a + ## directory (e.g., one of the bits of head_object?) that would + ## be nice to get here too... + self$exists(path) + }, + + delete_file = function(path) { + if (length(path) != 1L) { + ## TODO: this is the most naive way of doing vectorisation, + ## but if aws.s3 can delete multiple objects (the docs suggest + ## it can) you can probably do better. This function must + ## return a logical vector the same length as "path" + ## indicating if deletion actually happened. + return(vapply(path, self$delete_file, logical(1))) + } + path_remote <- file.path(self$root, path) + ## TODO: I've copied the contents of your previous function + ## here, but stripped it down to ony take the if_dir == + ## "del_only_key" path because that was what looked like what + ## was implemented. I have not tested this! + files <- aws.s3::get_bucket_df(bucket = self$bucket, + prefix = path_remote, + max = Inf)[["Key"]] + aws.s3::delete_object(object = path_remote, bucket = self$bucket) + }, + + delete_dir = function(path) { + path_remote <- file.path(self$root, path) + files <- aws.s3::get_bucket_df(bucket = self$bucket, + prefix = path_remote, max = Inf)[["Key"]] + for (x in files) { + aws.s3::delete_object(x, self$bucket) + } + }, + + upload_file = function(file, dest_dir) { + ## TODO: implement me! Upload a file with full local name + ## `file` into a somewhere with directory prefix `dest_dir` - + ## that's going to be `data` here really. I have taken a guess + ## here. + path_remote <- file.path(self$root, dest_dir, basename(file)) + aws.s3::put_object(file = file, object = path_remote, + bucket = self$bucket) + }, + + download_file = function(file, dest_dir) { + file_remote <- file.path(self$root, file) + dest_file <- file.path(dest_dir, basename(file)) + aws.s3::save_object(file_remote, self$bucket, file = dest_file, + overwrite = TRUE) + }, + + write_string = function(string, dest_file) { + file_remote <- filepath(self$root, dest_file) + aws.s3::s3write_using(x = string, FUN = writeLines, + object = file_remote, bucket = self$bucket) + }, + + read_string = function(file) { + file_remote <- file.path(self$root, dest_file) + aws.s3::s3read_using(FUN = readLines, object = file_remote, + bucket = self$bucket) + } + )) diff --git a/R/ssh.R b/R/ssh.R new file mode 100644 index 0000000..ec57fcd --- /dev/null +++ b/R/ssh.R @@ -0,0 +1,175 @@ +##' ssh backend for storr +##' +##' @title ssh storr +##' +##' @param session A ssh session object (see \code{ssh::ssh_connect}) +##' @inheritParams storr_s3 +##' @export +storr_ssh <- function(session, remote_root, ..., path_local = NULL, + default_namespace = "default_namespace") { + dr <- driver_ssh(session, remote_root, ..., path_local = path_local) + storr::storr(dr, default_namespace) +} + + +##' @export +##' @rdname storr_ssh +driver_ssh <- function(session, remote_root, ..., path_local = NULL) { + ops <- ssh_file_ops(session, remote_root) + storr::driver_remote(ops, ..., path_local = path_local) +} + + +ssh_file_ops <- function(session, root) { + R6_ssh_file_ops$new(session, root) +} + + +R6_ssh_file_ops <- R6::R6Class( + "ssh_file_ops", + + public = list( + session = NULL, + root = NULL, + + initialize = function(session, root) { + assert_is(session, "ssh_session") + assert_scalar_character(root) + self$session <- session + self$root <- root + }, + + ## A human-readable scalar character that storr will use + type = function() { + "ssh" + }, + + ## Totally remove whereever we are storing files + destroy = function() { + self$delete_dir(NULL) + self$session <- NULL + self$root <- NULL + }, + + ## Create a directory (or something that will act as one) + create_dir = function(path) { + path_remote <- file.path(self$root, path) + ssh::ssh_exec_wait(self$session, + sprintf("mkdir -p %s", shQuote(path_remote))) + }, + + list_dir = function(path) { + path_remote <- file.path(self$root, path) + res <- ssh::ssh_exec_internal(self$session, + sprintf("ls -1 %s", shQuote(path_remote))) + strsplit(rawToChar(res$stdout), "\n", fixed = TRUE)[[1L]] + }, + + exists_remote = function(path, is_directory) { + if (length(path) != 1L) { + ## TODO: vectorise via a remote script? + return(vapply(path, self$exists_remote, logical(1), is_directory, + USE.NAMES = FALSE)) + } + flag <- if (is_directory) "d" else "e" + path_remote <- file.path(self$root, path) + cmd <- sprintf("test -%s %s", flag, shQuote(path_remote)) + ssh::ssh_exec_internal(self$session, cmd, error = FALSE)$status == 0L + }, + + exists = function(path) { + self$exists_remote(path, FALSE) + }, + + exists_dir = function(path) { + self$exists_remote(path, TRUE) + }, + + delete_file = function(path) { + ## NOTE: this should be farmed out to a set of scripts that we + ## move into the remote storr, then we can do this in one single + ## remote call. A bash script can cake a path as args and + ## return a vector of 0/1? Path length issues will be a problem + ## and we might need to use a temporary file? + exists <- self$exists(path) + for (p in path[exists]) { + path_remote <- file.path(self$root, path) + res <- ssh::ssh_exec_internal( + self$session, + sprintf("rm -f %s", shQuote(path_remote)), + error = FALSE) + } + exists + }, + + delete_dir = function(path) { + path_remote <- + if (is.null(path)) self$root else file.path(self$root, path) + ssh::ssh_exec_wait(self$session, + sprintf("rm -rf %s", shQuote(path_remote))) + }, + + ## local file to remote file + upload_file = function(file, dest_dir) { + if (dest_dir == ".") { + path_remote <- self$root + } else { + path_remote <- file.path(self$root, dest_dir) + self$create_dir(dest_dir) + } + ssh::scp_upload(self$session, file, path_remote, verbose = FALSE) + invisible(file.path(dest_dir, basename(file))) + }, + + download_file = function(file, dest_dir) { + file_remote <- file.path(self$root, file) + tmp <- tempfile() + dir.create(tmp) + on.exit(unlink(tmp, recursive = TRUE)) + file_local <- file.path(tmp, basename(file)) + + ## TODO: This really should be an error on failure in the ssh + ## package imo (or have the option to convert to one) - the file + ## does not exist! I am promoting this to error here, but there + ## may be other warnings that this catches undesirably. + res <- tryCatch( + ssh::scp_download(self$session, file_remote, tmp, verbose = FALSE), + warning = identity) + if (!file.exists(file_local)) { + stop("Error downloading file: ", res$message) + } + if (is.null(dest_dir)) { + readBin(file_local, raw(), file.size(file_local)) + } else { + dir.create(dest_dir, FALSE, TRUE) + file.copy(file_local, dest_dir, overwrite = TRUE) + file.path(dest_dir, basename(file)) + } + }, + + write_bytes = function(bytes, dest_file) { + tmp <- tempfile() + dir.create(tmp) + on.exit(unlink(tmp, recursive = TRUE)) + path_local <- file.path(tmp, basename(dest_file)) + writeBin(bytes, path_local) + self$upload_file(path_local, dirname(dest_file)) + }, + + + read_bytes = function(file) { + self$download_file(file, NULL) + }, + + ## NOTE: I am adding a trailing newline here so that later on R's + ## readLines copes with these files better - this means that the + ## remote storr is able to be used as a local storr (except for + ## some issues with configuration). + write_string = function(string, dest_file) { + self$write_bytes(charToRaw(paste0(string, "\n")), dest_file) + }, + + read_string = function(file) { + sub("\n$", "", rawToChar(self$read_bytes(file))) + } + )) diff --git a/R/util_assert.R b/R/util_assert.R new file mode 100644 index 0000000..cf23845 --- /dev/null +++ b/R/util_assert.R @@ -0,0 +1,34 @@ +assert_scalar <- function(x, name = deparse(substitute(x))) { + if (length(x) != 1) { + stop(sprintf("'%s' must be a scalar", name), call. = FALSE) + } +} + + +assert_character <- function(x, name = deparse(substitute(x))) { + if (!is.character(x)) { + stop(sprintf("'%s' must be character", name), call. = FALSE) + } +} + + +assert_scalar_character <- function(x, name = deparse(substitute(x))) { + assert_scalar(x, name) + assert_character(x, name) + assert_nonmissing(x, name) +} + + +assert_is <- function(x, what, name = deparse(substitute(x))) { + if (!inherits(x, what)) { + stop(sprintf("'%s' must be a %s", name, + paste(what, collapse = " / ")), call. = FALSE) + } +} + + +assert_nonmissing <- function(x, name = deparse(substitute(x))) { + if (any(is.na(x))) { + stop(sprintf("'%s' must not be NA", name), call. = FALSE) + } +} diff --git a/README.md b/README.md index 3843e82..3708d6d 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,5 @@ # storr.remote -Interface for plugging in remote storage (e.g. AWS S3) to [storr](https://github.com/richfitz/storr) + +[![Travis-CI Build Status](https://travis-ci.org/richfitz/storr.remote.svg?branch=master)](https://travis-ci.org/richfitz/storr.remote) + +Interface for plugging in remote storage (e.g. AWS S3, ssh) to [storr](https://github.com/richfitz/storr) diff --git a/man/storr_rds_s3.Rd b/man/storr_rds_s3.Rd deleted file mode 100644 index da3b162..0000000 --- a/man/storr_rds_s3.Rd +++ /dev/null @@ -1,41 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/driver_rds_s3.R -\name{storr_rds_s3} -\alias{storr_rds_s3} -\title{S3 backend for rds object cache driver} -\usage{ -storr_rds_s3(bucket, path, compress = NULL, mangle_key = NULL, - mangle_key_pad = NULL, hash_algorithm = NULL, - default_namespace = "objects") -} -\arguments{ -\item{bucket}{Name of the S3 bucket for which you wish to create of connect -store} - -\item{path}{Path for the store. \code{tempdir()} is a good choice -for ephemeral storage, The \code{rappdirs} package (on CRAN) -might be nice for persistent application data.} - -\item{compress}{Compress the generated file? This saves a small -amount of space for a reasonable amount of time.} - -\item{mangle_key}{Mangle keys? If TRUE, then the key is encoded -using base64 before saving to the filesystem. See Details.} - -\item{mangle_key_pad}{Logical indicating if the filenames created -when using \code{mangle_key} should also be "padded" with the -\code{=} character to make up a round number of bytes. Padding -is required to satisfy the document that describes base64 -encoding (RFC 4648) but can cause problems in some applications -(see \href{{https://github.com/richfitz/storr/issues/43}{this -issue}}. The default is to not pad \emph{new} storr archives. -This should be generally safe to leave alone.} - -\item{hash_algorithm}{Name of the hash algorithm to use. Possible -values are "md5", "sha1", and others supported by -\code{\link{digest}}. If not given, then we will default to -"md5".} - -\item{default_namespace}{Default namespace (see -\code{\link{storr}}).} -} diff --git a/man/storr_s3.Rd b/man/storr_s3.Rd new file mode 100644 index 0000000..8b1b778 --- /dev/null +++ b/man/storr_s3.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/s3.R +\name{storr_s3} +\alias{storr_s3} +\alias{driver_s3} +\title{S3 backend for rds object cache driver} +\usage{ +storr_s3(bucket, remote_root, ..., path_local = NULL, + default_namespace = "objects") + +driver_s3(bucket, remote_root, ..., path_local = NULL) +} +\arguments{ +\item{bucket}{Name of the S3 bucket for which you wish to create +of connect store} + +\item{remote_root}{Base path on the remote bucket} + +\item{...}{Additional arguments passed through to +\code{storr::driver_rds}} + +\item{path_local}{Optional path to a local cache (see +\code{storr::driver_remote})} + +\item{default_namespace}{Default namespace (see \code{storr::storr})} +} +\description{ +S3 backend for storr +} diff --git a/man/storr_ssh.Rd b/man/storr_ssh.Rd new file mode 100644 index 0000000..bd5b97b --- /dev/null +++ b/man/storr_ssh.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ssh.R +\name{storr_ssh} +\alias{storr_ssh} +\alias{driver_ssh} +\title{ssh storr} +\usage{ +storr_ssh(session, remote_root, ..., path_local = NULL, + default_namespace = "default_namespace") + +driver_ssh(session, remote_root, ..., path_local = NULL) +} +\arguments{ +\item{session}{A ssh session object (see \code{ssh::ssh_connect})} + +\item{remote_root}{Base path on the remote bucket} + +\item{...}{Additional arguments passed through to +\code{storr::driver_rds}} + +\item{path_local}{Optional path to a local cache (see +\code{storr::driver_remote})} + +\item{default_namespace}{Default namespace (see \code{storr::storr})} +} +\description{ +ssh backend for storr +} diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..4337d81 --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,4 @@ +library(testthat) +library(storr.remote) + +test_check("storr.remote") diff --git a/tests/testthat/README.md b/tests/testthat/README.md new file mode 100644 index 0000000..8983c68 --- /dev/null +++ b/tests/testthat/README.md @@ -0,0 +1,15 @@ +## storr.remote testing + +### ssh + +You will need `docker` installed and running and, the environment variable `STORR_REMOTE_USE_SSHD` set to the string `true`. For example, add + +``` +STORR_REMOTE_USE_SSHD=true +``` + +to your `~/.Renviron`. This will start a docker container called `storr-remote-sshd` that can be connected to using a set of ssh keys. These keys will be copied to disk (in the `tests/testthat/sshd` directory). + +With recent testthat, the `teardown-ssh.R` script will remove the container once tests have finished. + +If the `ssh` package is not installed the server will not be started, as the tests cannot be run. diff --git a/tests/testthat/helper-ssh.R b/tests/testthat/helper-ssh.R new file mode 100644 index 0000000..f1d080c --- /dev/null +++ b/tests/testthat/helper-ssh.R @@ -0,0 +1,80 @@ +HAS_SSHD_SERVER <- FALSE +HERE <- getwd() + + +start_sshd_server <- function() { + testthat::skip_on_cran() + testthat::skip_if_not_installed("ssh") + if (!identical(Sys.getenv("STORR_REMOTE_USE_SSHD"), "true")) { + skip("Set 'STORR_REMOTE_USE_SSHD' to 'true' to enable sshd tests") + } + + ## Is there an existing server running still? + res <- system3("docker", c("inspect", "storr-remote-sshd")) + if (res$success) { + HAS_SSHD_SERVER <<- TRUE + return(TRUE) + } + + ## Can we start a new one? + res <- system3("sshd/server.sh", check = FALSE) + if (res$success) { + HAS_SSHD_SERVER <<- TRUE + return(TRUE) + } + + FALSE +} + + +stop_sshd_server <- function(force = FALSE) { + if (HAS_SSHD_SERVER || force) { + system3("docker", c("stop", "storr-remote-sshd")) + HAS_SSHD_SERVER <<- FALSE + } +} + + +use_sshd_server <- function() { + if (HAS_SSHD_SERVER || start_sshd_server()) { + return(TRUE) + } + testthat::skip("sshd server not available") +} + + +system3 <- function(command, args = character(), check = FALSE) { + res <- suppressWarnings(system2(command, args, stdout = TRUE, stderr = TRUE)) + status <- attr(res, "status") + code <- if (is.null(status)) 0 else status + attr(res, "status") <- NULL + ret <- list(success = code == 0, + code = code, + output = res) + if (check && !ret$success) { + stop("Command failed: ", paste(ret$output, collapse = "\n")) + } + ret +} + + +test_ssh_connection <- function() { + use_sshd_server() + for (i in 1:10) { + con <- tryCatch( + ssh::ssh_connect("root@127.0.0.1:10022", + file.path(HERE, "sshd/keys/id_rsa")), + error = function(e) NULL) + if (inherits(con, "ssh_session")) { + return(con) + } + ## server may not yet be up: + Sys.sleep(0.1) + } + testthat::skip("Failed to make connection") +} + + +random_path <- function(prefix = "storr_remote", fileext = "") { + basename(tempfile(prefix, fileext = fileext)) +} diff --git a/tests/testthat/sshd/.gitignore b/tests/testthat/sshd/.gitignore new file mode 100644 index 0000000..0bdfd49 --- /dev/null +++ b/tests/testthat/sshd/.gitignore @@ -0,0 +1 @@ +keys diff --git a/tests/testthat/sshd/Dockerfile b/tests/testthat/sshd/Dockerfile new file mode 100644 index 0000000..fe891ea --- /dev/null +++ b/tests/testthat/sshd/Dockerfile @@ -0,0 +1,4 @@ +FROM sickp/alpine-sshd:latest +RUN ssh-keygen -P "" -f /root/.ssh/id_rsa && \ + cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys && \ + passwd -d root diff --git a/tests/testthat/sshd/server.sh b/tests/testthat/sshd/server.sh new file mode 100755 index 0000000..d2d62c5 --- /dev/null +++ b/tests/testthat/sshd/server.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +set -ex + +PORT=10022 +NAME_IMAGE=richfitz/alpine-sshd:latest +NAME_CONTAINER=storr-remote-sshd + +HERE=$(dirname $0) +docker build --rm -t $NAME_IMAGE $HERE +docker run -d --rm -p $PORT:22 --name $NAME_CONTAINER $NAME_IMAGE +rm -rf $HERE/keys +docker cp $NAME_CONTAINER:/root/.ssh $HERE/keys diff --git a/tests/testthat/teardown-ssh.R b/tests/testthat/teardown-ssh.R new file mode 100644 index 0000000..af14b2d --- /dev/null +++ b/tests/testthat/teardown-ssh.R @@ -0,0 +1 @@ +stop_sshd_server() diff --git a/tests/testthat/test-driver-ssh.R b/tests/testthat/test-driver-ssh.R new file mode 100644 index 0000000..cd8bcd8 --- /dev/null +++ b/tests/testthat/test-driver-ssh.R @@ -0,0 +1,36 @@ +context("rds over ssh") + +test_that("storr spec", { + create <- function(dr = NULL, ...) { + if (is.null(dr)) { + ops <- ssh_file_ops(test_ssh_connection(), random_path("storr_driver_")) + } else { + ops <- dr$ops + } + storr::driver_remote(ops, ...) + } + + res <- storr::test_driver(create) + expect_equal(sum(res$failed), 0) +}) + + +test_that("user interface", { + st <- storr_ssh(test_ssh_connection(), random_path("storr_remote_")) + expect_is(st, "storr") + expect_is(st$driver, "driver_remote") + expect_is(st$driver$ops, "ssh_file_ops") + expect_equal(st$driver$type(), "remote/ssh") + expect_equal(st$driver$ops$type(), "ssh") +}) + + +test_that("local cache works", { + st <- storr_ssh(test_ssh_connection(), random_path("storr_remote_")) + value <- 1:10 + h <- st$set("a", value) + expect_true(st$driver$rds$del_object(h)) + expect_identical(st$get("a", use_cache = FALSE), value) + expect_equal(st$list_hashes(), h) + expect_equal(st$driver$rds$list_hashes(), h) +}) diff --git a/tests/testthat/test-ssh-file-ops.R b/tests/testthat/test-ssh-file-ops.R new file mode 100644 index 0000000..d30820a --- /dev/null +++ b/tests/testthat/test-ssh-file-ops.R @@ -0,0 +1,74 @@ +context("ssh_file_ops") + +test_that("directory operations", { + ## Not strictly unit tests, but integration tests of basic + ## operations. + root <- random_path("storr_ssh_root_") + path <- random_path() + + ops <- ssh_file_ops(test_ssh_connection(), root) + on.exit(ops$destroy()) + + path <- random_path() + expect_false(ops$exists(path)) + ops$create_dir(path) + expect_true(ops$exists(path)) + expect_true(ops$exists_dir(path)) + expect_equal(ops$list_dir(path), character()) + + ops$delete_file(path) + expect_true(ops$exists_dir(path)) + ops$delete_dir(path) + expect_false(ops$exists_dir(path)) +}) + + +test_that("single file, directoryless io", { + root <- random_path("storr_ssh_root_") + path <- random_path() + bytes <- charToRaw("hello world") + + ops <- ssh_file_ops(test_ssh_connection(), root) + on.exit(ops$destroy()) + + expect_error(ops$read_bytes(path), + "Error downloading file") + ops$write_bytes(bytes, path) + expect_equal(ops$list_dir("."), path) + + expect_true(ops$exists(path)) + expect_equal(ops$read_bytes(path), bytes) + + expect_true(ops$delete_file(path)) + expect_false(ops$exists(path)) + expect_false(ops$delete_file(path)) + expect_equal(ops$list_dir("."), character(0)) +}) + + +test_that("directories are created automatically", { + root <- random_path("storr_ssh_root_") + path <- file.path("a", "b", "c") + bytes <- charToRaw("hello world") + + ops <- ssh_file_ops(test_ssh_connection(), root) + on.exit(ops$destroy()) + + expect_error(ops$read_bytes(path), + "Error downloading file") + ops$write_bytes(bytes, path) + + expect_equal(ops$list_dir("."), "a") + expect_equal(ops$list_dir("a"), "b") + expect_equal(ops$list_dir("a/b"), "c") + + expect_true(ops$exists(path)) + expect_equal(ops$read_bytes(path), bytes) + + expect_true(ops$delete_file(path)) + expect_false(ops$exists(path)) + expect_false(ops$delete_file(path)) + expect_equal(ops$list_dir("."), "a") + expect_equal(ops$list_dir("a"), "b") + expect_equal(ops$list_dir("a/b"), character(0)) +}) diff --git a/tests/testthat/test-util-assert.R b/tests/testthat/test-util-assert.R new file mode 100644 index 0000000..f6bf5aa --- /dev/null +++ b/tests/testthat/test-util-assert.R @@ -0,0 +1,12 @@ +context("util (assert)") + +test_that("assertions", { + expect_error(assert_scalar(NULL), "must be a scalar") + expect_error(assert_scalar(1:2), "must be a scalar") + + expect_error(assert_character(1:5), "must be character") + + expect_error(assert_is(1, "foo"), "must be a foo") + + expect_error(assert_nonmissing(NA), "must not be NA") +})