From b9c6954640b9ceef30f4ab2566d5bbd407ff08ca Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Tue, 1 May 2018 07:40:40 +0100 Subject: [PATCH 01/30] Docker image for testing ssh --- tests/testthat/sshd/.gitignore | 1 + tests/testthat/sshd/Dockerfile | 3 +++ tests/testthat/sshd/server.sh | 12 ++++++++++++ 3 files changed, 16 insertions(+) create mode 100644 tests/testthat/sshd/.gitignore create mode 100644 tests/testthat/sshd/Dockerfile create mode 100755 tests/testthat/sshd/server.sh diff --git a/tests/testthat/sshd/.gitignore b/tests/testthat/sshd/.gitignore new file mode 100644 index 0000000..0bdfd49 --- /dev/null +++ b/tests/testthat/sshd/.gitignore @@ -0,0 +1 @@ +keys diff --git a/tests/testthat/sshd/Dockerfile b/tests/testthat/sshd/Dockerfile new file mode 100644 index 0000000..7d4d088 --- /dev/null +++ b/tests/testthat/sshd/Dockerfile @@ -0,0 +1,3 @@ +FROM sickp/alpine-sshd:latest +RUN ssh-keygen -P "" -f /root/.ssh/id_rsa && \ + cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys diff --git a/tests/testthat/sshd/server.sh b/tests/testthat/sshd/server.sh new file mode 100755 index 0000000..b5631c2 --- /dev/null +++ b/tests/testthat/sshd/server.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +set -ex + +PORT=10022 +NAME_IMAGE=richfitz/alpine-sshd:latest +NAME_CONTAINER=storr-remote-sshd + +HERE=$(dirname $0) +docker build --rm -t $NAME_IMAGE $HERE +docker run -d --rm -p $PORT:22 --name $NAME_CONTAINER $NAME_IMAGE +rm -rf $HERE/keys +docker cp $NAME_CONTAINER:/root/.ssh keys From 4f8570f8bec2d6c9a754e8339b6dd08a81f01ef7 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Tue, 1 May 2018 07:46:10 +0100 Subject: [PATCH 02/30] Add basic test infrastructure --- DESCRIPTION | 4 +++- tests/testthat.R | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 tests/testthat.R diff --git a/DESCRIPTION b/DESCRIPTION index 013acbd..a1d72cf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -11,5 +11,7 @@ Encoding: UTF-8 LazyData: true Suggests: aws.s3, - storr + ssh.utils, + storr, + testthat RoxygenNote: 6.0.1 diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..4337d81 --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,4 @@ +library(testthat) +library(storr.remote) + +test_check("storr.remote") From ccaa05d6dfae1edd573a2be2ecacf7851f1f53f1 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Tue, 1 May 2018 07:46:30 +0100 Subject: [PATCH 03/30] Basic support for creating/removing sshd server --- R/util.R | 3 +++ tests/testthat/helper-ssh.R | 44 +++++++++++++++++++++++++++++++++++ tests/testthat/teardown-ssh.R | 1 + 3 files changed, 48 insertions(+) create mode 100644 R/util.R create mode 100644 tests/testthat/helper-ssh.R create mode 100644 tests/testthat/teardown-ssh.R diff --git a/R/util.R b/R/util.R new file mode 100644 index 0000000..e0c63e1 --- /dev/null +++ b/R/util.R @@ -0,0 +1,3 @@ +`%||%` <- function(a, b) { + if (is.null(a)) b else a +} diff --git a/tests/testthat/helper-ssh.R b/tests/testthat/helper-ssh.R new file mode 100644 index 0000000..c5cdcb7 --- /dev/null +++ b/tests/testthat/helper-ssh.R @@ -0,0 +1,44 @@ +HAS_SSHD_SERVER <- FALSE + + +start_sshd_server <- function() { + testthat::skip_on_cran() + if (!identical(Sys.getenv("STORR_REMOTE_USE_SSHD"), "true")) { + skip("Set 'STORR_REMOTE_USE_SSHD' to 'true' to enable sshd tests") + } + res <- system3("sshd/server.sh", check = FALSE) + if (res$success) { + HAS_SSHD_SERVER <<- TRUE + } + res$success +} + + +stop_sshd_server <- function() { + if (HAS_SSHD_SERVER) { + system3("docker", c("stop", "storr-remote-sshd")) + HAS_SSHD_SERVER <<- FALSE + } +} + + +use_sshd_server <- function() { + if (HAS_SSHD_SERVER || start_sshd_server()) { + return(TRUE) + } + testthat::skip("sshd server not available") +} + + +system3 <- function(command, args = character(), check = FALSE) { + res <- suppressWarnings(system2(command, args, stdout = TRUE, stderr = TRUE)) + code <- attr(res, "status") %||% 0 + attr(res, "status") <- NULL + ret <- list(success = code == 0, + code = code, + output = res) + if (check && !ret$success) { + stop("Command failed: ", paste(ret$output, collapse = "\n")) + } + ret +} diff --git a/tests/testthat/teardown-ssh.R b/tests/testthat/teardown-ssh.R new file mode 100644 index 0000000..af14b2d --- /dev/null +++ b/tests/testthat/teardown-ssh.R @@ -0,0 +1 @@ +stop_sshd_server() From cc4c7a45e945abaf8d0ca645a1fd0858aa3f30f1 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Tue, 1 May 2018 07:56:18 +0100 Subject: [PATCH 04/30] Add helper for making connection --- tests/testthat/helper-ssh.R | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tests/testthat/helper-ssh.R b/tests/testthat/helper-ssh.R index c5cdcb7..2725ce8 100644 --- a/tests/testthat/helper-ssh.R +++ b/tests/testthat/helper-ssh.R @@ -14,8 +14,8 @@ start_sshd_server <- function() { } -stop_sshd_server <- function() { - if (HAS_SSHD_SERVER) { +stop_sshd_server <- function(force = FALSE) { + if (HAS_SSHD_SERVER || force) { system3("docker", c("stop", "storr-remote-sshd")) HAS_SSHD_SERVER <<- FALSE } @@ -42,3 +42,18 @@ system3 <- function(command, args = character(), check = FALSE) { } ret } + + +test_ssh_connection <- function() { + use_sshd_server() + for (i in 1:10) { + con <- tryCatch(ssh::ssh_connect("root@127.0.0.1:10022", "keys/id_rsa"), + error = function(e) NULL) + if (inherits(con, "ssh_session")) { + return(con) + } + ## server may not yet be up: + Sys.sleep(0.1) + } + testthat::skip("Failed to make connection") +} From bf46bb80d7d2a30a5f4a773987a01fcf4799ff55 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Tue, 1 May 2018 07:57:32 +0100 Subject: [PATCH 05/30] Store keys within sshd path --- tests/testthat/helper-ssh.R | 5 +++-- tests/testthat/sshd/server.sh | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/testthat/helper-ssh.R b/tests/testthat/helper-ssh.R index 2725ce8..f018595 100644 --- a/tests/testthat/helper-ssh.R +++ b/tests/testthat/helper-ssh.R @@ -47,8 +47,9 @@ system3 <- function(command, args = character(), check = FALSE) { test_ssh_connection <- function() { use_sshd_server() for (i in 1:10) { - con <- tryCatch(ssh::ssh_connect("root@127.0.0.1:10022", "keys/id_rsa"), - error = function(e) NULL) + con <- tryCatch( + ssh::ssh_connect("root@127.0.0.1:10022", "sshd/keys/id_rsa"), + error = function(e) NULL) if (inherits(con, "ssh_session")) { return(con) } diff --git a/tests/testthat/sshd/server.sh b/tests/testthat/sshd/server.sh index b5631c2..d2d62c5 100755 --- a/tests/testthat/sshd/server.sh +++ b/tests/testthat/sshd/server.sh @@ -9,4 +9,4 @@ HERE=$(dirname $0) docker build --rm -t $NAME_IMAGE $HERE docker run -d --rm -p $PORT:22 --name $NAME_CONTAINER $NAME_IMAGE rm -rf $HERE/keys -docker cp $NAME_CONTAINER:/root/.ssh keys +docker cp $NAME_CONTAINER:/root/.ssh $HERE/keys From bf9ddfb8f48de6531717cf25c2701d8d9d03a94b Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Wed, 2 May 2018 17:46:03 +0100 Subject: [PATCH 06/30] Basic ssh file ops implemented Read and write bytes and files, directory creation, listing, etc --- R/ssh_file_ops.R | 133 +++++++++++++++++++++++++++++ R/util.R | 5 ++ R/util_assert.R | 34 ++++++++ tests/testthat/helper-ssh.R | 18 +++- tests/testthat/test-ssh-file-ops.R | 74 ++++++++++++++++ 5 files changed, 263 insertions(+), 1 deletion(-) create mode 100644 R/ssh_file_ops.R create mode 100644 R/util_assert.R create mode 100644 tests/testthat/test-ssh-file-ops.R diff --git a/R/ssh_file_ops.R b/R/ssh_file_ops.R new file mode 100644 index 0000000..c86bdf7 --- /dev/null +++ b/R/ssh_file_ops.R @@ -0,0 +1,133 @@ +ssh_file_ops <- function(session, root) { + R6_ssh_file_ops$new(session, root) +} + + +## So the issues here are +## +## 1. listing files is annoying and the test-for-existance bit needs +## not doing +## +## 2. tunelling something over ssh will be better! +## +## 3. docs for throw/not throw +## +## we can vectorise most of the read/write/exists operations but that +## is going to require a little support. Perhaps the simplest +## approach would be to set up a directory of scripts on the remote +## machine so that we don't have to communicate only with exit codes +## but could work with json directly? +## +## This assumes that we are "in control" of a directory. I'm going to +## assume that nothing else writes to this directory ever - anything +## that does could cause this to fail in unexpected ways! +R6_ssh_file_ops <- R6::R6Class( + "ssh_file_ops", + + public = list( + session = NULL, + root = NULL, + + initialize = function(session, root) { + assert_is(session, "ssh_session") + assert_scalar_character(root) + self$session <- session + self$root <- root + }, + + ## The two use cases that we can do easily are: + ## + ## upload arbitrary bytes to an arbitrary filename + ## upload a file from a path here to an identical path there + + ## local file to remote file + write_file = function(file, dest_dir) { + if (dest_dir == ".") { + path_remote <- self$root + } else { + path_remote <- file.path(self$root, dest_dir) + self$create_dir(dest_dir) + } + ssh::scp_upload(self$session, file, path_remote, verbose = FALSE) + invisible(file.path(dest_dir, basename(file))) + }, + + write_bytes = function(bytes, dest_file) { + tmp <- tempfile() + dir.create(tmp) + on.exit(unlink(tmp, recursive = TRUE)) + path_local <- file.path(tmp, basename(dest_file)) + writeBin(bytes, path_local) + self$write_file(path_local, dirname(dest_file)) + }, + + read_file = function(file, dest) { + file_remote <- file.path(self$root, file) + ## TODO: This really should be an error on failure in the ssh + ## package imo (or have the option to convert to one) - the file + ## does not exist! I am promoting this to error here, but there + ## may be other warnings that this lets through. + tmp <- tempfile() + dir.create(tmp) + on.exit(unlink(tmp, recursive = TRUE)) + file_local <- file.path(tmp, basename(file)) + + res <- tryCatch( + ssh::scp_download(self$session, file_remote, tmp, verbose = FALSE), + warning = identity) + if (!file.exists(file_local)) { + stop("Error downloading file: ", res$message) + } + if (is.null(dest)) { + read_binary(file_local) + } else { + dir.create(dirname(dest), FALSE, TRUE) + file.copy(file_local, dest) + dest + } + }, + + read_bytes = function(file) { + self$read_file(file, NULL) + }, + + ## TODO: not vectorised + exists = function(path, type = c("any", "file", "directory")) { + flag <- c(file = "f", directory = "d", any = "e")[[match.arg(type)]] + path_remote <- file.path(self$root, path) + cmd <- sprintf("test -%s %s", flag, shQuote(path_remote)) + ssh::ssh_exec_internal(self$session, cmd, error = FALSE)$status == 0L + }, + + create_dir = function(path) { + path_remote <- file.path(self$root, path) + ssh::ssh_exec_wait(self$session, + sprintf("mkdir -p %s", shQuote(path_remote))) + }, + + list_dir = function(path) { + path_remote <- file.path(self$root, path) + res <- ssh::ssh_exec_internal(self$session, + sprintf("ls -1 %s", shQuote(path_remote))) + strsplit(rawToChar(res$stdout), "\n", fixed = TRUE)[[1L]] + }, + + delete_file = function(path) { + ## NOTE: This can't notify which were deleted + path_remote <- file.path(self$root, path) + res <- ssh::ssh_exec_internal(self$session, + sprintf("rm -f %s", shQuote(path_remote)), + error = FALSE) + }, + + delete_directory = function(path) { + path_remote <- file.path(self$root, path) + ssh::ssh_exec_wait(self$session, + sprintf("rm -rf %s", shQuote(path_remote))) + }, + + destroy = function() { + ssh::ssh_exec_wait(self$session, + sprintf("rm -rf %s", shQuote(self$root))) + } + )) diff --git a/R/util.R b/R/util.R index e0c63e1..432030e 100644 --- a/R/util.R +++ b/R/util.R @@ -1,3 +1,8 @@ `%||%` <- function(a, b) { if (is.null(a)) b else a } + + +read_binary <- function(x) { + readBin(x, raw(), file.size(x)) +} diff --git a/R/util_assert.R b/R/util_assert.R new file mode 100644 index 0000000..cf23845 --- /dev/null +++ b/R/util_assert.R @@ -0,0 +1,34 @@ +assert_scalar <- function(x, name = deparse(substitute(x))) { + if (length(x) != 1) { + stop(sprintf("'%s' must be a scalar", name), call. = FALSE) + } +} + + +assert_character <- function(x, name = deparse(substitute(x))) { + if (!is.character(x)) { + stop(sprintf("'%s' must be character", name), call. = FALSE) + } +} + + +assert_scalar_character <- function(x, name = deparse(substitute(x))) { + assert_scalar(x, name) + assert_character(x, name) + assert_nonmissing(x, name) +} + + +assert_is <- function(x, what, name = deparse(substitute(x))) { + if (!inherits(x, what)) { + stop(sprintf("'%s' must be a %s", name, + paste(what, collapse = " / ")), call. = FALSE) + } +} + + +assert_nonmissing <- function(x, name = deparse(substitute(x))) { + if (any(is.na(x))) { + stop(sprintf("'%s' must not be NA", name), call. = FALSE) + } +} diff --git a/tests/testthat/helper-ssh.R b/tests/testthat/helper-ssh.R index f018595..7c8cd87 100644 --- a/tests/testthat/helper-ssh.R +++ b/tests/testthat/helper-ssh.R @@ -6,11 +6,22 @@ start_sshd_server <- function() { if (!identical(Sys.getenv("STORR_REMOTE_USE_SSHD"), "true")) { skip("Set 'STORR_REMOTE_USE_SSHD' to 'true' to enable sshd tests") } + + ## Is there an existing server running still? + res <- system3("docker", c("inspect", "storr-remote-sshd")) + if (res$success) { + HAS_SSHD_SERVER <<- TRUE + return(TRUE) + } + + ## Can we start a new one? res <- system3("sshd/server.sh", check = FALSE) if (res$success) { HAS_SSHD_SERVER <<- TRUE + return(TRUE) } - res$success + + FALSE } @@ -58,3 +69,8 @@ test_ssh_connection <- function() { } testthat::skip("Failed to make connection") } + + +random_path <- function(prefix = "storr_remote", fileext = "") { + basename(tempfile(prefix, fileext = fileext)) +} diff --git a/tests/testthat/test-ssh-file-ops.R b/tests/testthat/test-ssh-file-ops.R new file mode 100644 index 0000000..986697c --- /dev/null +++ b/tests/testthat/test-ssh-file-ops.R @@ -0,0 +1,74 @@ +context("ssh_file_ops") + +test_that("directory operations", { + ## Not strictly unit tests, but integration tests of basic + ## operations. + + root <- random_path("storr_ssh_root_") + path <- random_path() + + ops <- ssh_file_ops(test_ssh_connection(), root) + on.exit(ops$destroy()) + + path <- random_path() + expect_false(ops$exists(path)) + ops$create_dir(path) + expect_false(ops$exists(path, "file")) + expect_true(ops$exists(path, "any")) + expect_true(ops$exists(path, "directory")) + expect_equal(ops$list_dir(path), character()) + + ops$delete_file(path) + expect_true(ops$exists(path, "directory")) + ops$delete_directory(path) + expect_false(ops$exists(path, "directory")) +}) + + +test_that("single file, directoryless io", { + root <- random_path("storr_ssh_root_") + path <- random_path() + bytes <- charToRaw("hello world") + + ops <- ssh_file_ops(test_ssh_connection(), root) + on.exit(ops$destroy()) + + expect_error(ops$read_bytes(path), + "Error downloading file") + ops$write_bytes(bytes, path) + expect_equal(ops$list_dir("."), path) + + expect_true(ops$exists(path)) + expect_equal(ops$read_bytes(path), bytes) + + ops$delete_file(path) + expect_false(ops$exists(path)) + expect_equal(ops$list_dir("."), character(0)) +}) + + +test_that("directories are created automatically", { + root <- random_path("storr_ssh_root_") + path <- file.path("a", "b", "c") + bytes <- charToRaw("hello world") + + ops <- ssh_file_ops(test_ssh_connection(), root) + on.exit(ops$destroy()) + + expect_error(ops$read_bytes(path), + "Error downloading file") + ops$write_bytes(bytes, path) + + expect_equal(ops$list_dir("."), "a") + expect_equal(ops$list_dir("a"), "b") + expect_equal(ops$list_dir("a/b"), "c") + + expect_true(ops$exists(path)) + expect_equal(ops$read_bytes(path), bytes) + + ops$delete_file(path) + expect_false(ops$exists(path)) + expect_equal(ops$list_dir("."), "a") + expect_equal(ops$list_dir("a"), "b") + expect_equal(ops$list_dir("a/b"), character(0)) +}) From d5b9757f0c557838145c47505d5a2c2ee235d8b2 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Thu, 3 May 2018 08:28:14 +0100 Subject: [PATCH 07/30] Vectorised exists/delete and report exists on delete --- R/ssh_file_ops.R | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/R/ssh_file_ops.R b/R/ssh_file_ops.R index c86bdf7..7351d14 100644 --- a/R/ssh_file_ops.R +++ b/R/ssh_file_ops.R @@ -93,6 +93,10 @@ R6_ssh_file_ops <- R6::R6Class( ## TODO: not vectorised exists = function(path, type = c("any", "file", "directory")) { + if (length(path) != 1L) { + return(vapply(path, self$exists, logical(1), type, + USE.NAMES = FALSE)) + } flag <- c(file = "f", directory = "d", any = "e")[[match.arg(type)]] path_remote <- file.path(self$root, path) cmd <- sprintf("test -%s %s", flag, shQuote(path_remote)) @@ -113,11 +117,20 @@ R6_ssh_file_ops <- R6::R6Class( }, delete_file = function(path) { - ## NOTE: This can't notify which were deleted - path_remote <- file.path(self$root, path) - res <- ssh::ssh_exec_internal(self$session, - sprintf("rm -f %s", shQuote(path_remote)), - error = FALSE) + ## NOTE: this should be farmed out to a set of scripts that we + ## move into the remote storr, then we can do this in one single + ## remote call. A bash script can cake a path as args and + ## return a vector of 0/1? Path length issues will be a problem + ## and we might need to use a temporary file? + exists <- self$exists(path) + for (p in path[exists]) { + path_remote <- file.path(self$root, path) + res <- ssh::ssh_exec_internal( + self$session, + sprintf("rm -f %s", shQuote(path_remote)), + error = FALSE) + } + exists }, delete_directory = function(path) { From 8bc3e5bc4aa34d4000837c2ad1efcef3ee8e1570 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Thu, 3 May 2018 08:28:41 +0100 Subject: [PATCH 08/30] Human-readable ops type --- R/ssh_file_ops.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/ssh_file_ops.R b/R/ssh_file_ops.R index 7351d14..1bdc702 100644 --- a/R/ssh_file_ops.R +++ b/R/ssh_file_ops.R @@ -35,6 +35,10 @@ R6_ssh_file_ops <- R6::R6Class( self$root <- root }, + type = function() { + "ssh" + }, + ## The two use cases that we can do easily are: ## ## upload arbitrary bytes to an arbitrary filename From 8af8cd2b96236359e9559b446bc38d9c820c188b Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Thu, 3 May 2018 08:44:17 +0100 Subject: [PATCH 09/30] Tidy up the ssh ops --- R/ssh_file_ops.R | 163 ++++++++++++++--------------- tests/testthat/test-ssh-file-ops.R | 8 +- 2 files changed, 84 insertions(+), 87 deletions(-) diff --git a/R/ssh_file_ops.R b/R/ssh_file_ops.R index 1bdc702..1102e27 100644 --- a/R/ssh_file_ops.R +++ b/R/ssh_file_ops.R @@ -3,24 +3,6 @@ ssh_file_ops <- function(session, root) { } -## So the issues here are -## -## 1. listing files is annoying and the test-for-existance bit needs -## not doing -## -## 2. tunelling something over ssh will be better! -## -## 3. docs for throw/not throw -## -## we can vectorise most of the read/write/exists operations but that -## is going to require a little support. Perhaps the simplest -## approach would be to set up a directory of scripts on the remote -## machine so that we don't have to communicate only with exit codes -## but could work with json directly? -## -## This assumes that we are "in control" of a directory. I'm going to -## assume that nothing else writes to this directory ever - anything -## that does could cause this to fail in unexpected ways! R6_ssh_file_ops <- R6::R6Class( "ssh_file_ops", @@ -35,17 +17,70 @@ R6_ssh_file_ops <- R6::R6Class( self$root <- root }, + ## A human-readable scalar character that storr will use type = function() { "ssh" }, - ## The two use cases that we can do easily are: - ## - ## upload arbitrary bytes to an arbitrary filename - ## upload a file from a path here to an identical path there + ## Totally remove whereever we are storing files + destroy = function() { + self$delete_dir(NULL) + self$session <- NULL + self$root <- NULL + }, + + ## Create a directory (or something that will act as one) + create_dir = function(path) { + path_remote <- file.path(self$root, path) + ssh::ssh_exec_wait(self$session, + sprintf("mkdir -p %s", shQuote(path_remote))) + }, + + list_dir = function(path) { + path_remote <- file.path(self$root, path) + res <- ssh::ssh_exec_internal(self$session, + sprintf("ls -1 %s", shQuote(path_remote))) + strsplit(rawToChar(res$stdout), "\n", fixed = TRUE)[[1L]] + }, + + exists = function(path, type = c("any", "file", "directory")) { + if (length(path) != 1L) { + ## TODO: vectorise via a remote script? + return(vapply(path, self$exists, logical(1), type, + USE.NAMES = FALSE)) + } + flag <- c(file = "f", directory = "d", any = "e")[[match.arg(type)]] + path_remote <- file.path(self$root, path) + cmd <- sprintf("test -%s %s", flag, shQuote(path_remote)) + ssh::ssh_exec_internal(self$session, cmd, error = FALSE)$status == 0L + }, + + delete_file = function(path) { + ## NOTE: this should be farmed out to a set of scripts that we + ## move into the remote storr, then we can do this in one single + ## remote call. A bash script can cake a path as args and + ## return a vector of 0/1? Path length issues will be a problem + ## and we might need to use a temporary file? + exists <- self$exists(path) + for (p in path[exists]) { + path_remote <- file.path(self$root, path) + res <- ssh::ssh_exec_internal( + self$session, + sprintf("rm -f %s", shQuote(path_remote)), + error = FALSE) + } + exists + }, + + delete_dir = function(path) { + path_remote <- + if (is.null(path)) self$root else file.path(self$root, path) + ssh::ssh_exec_wait(self$session, + sprintf("rm -rf %s", shQuote(path_remote))) + }, ## local file to remote file - write_file = function(file, dest_dir) { + upload_file = function(file, dest_dir) { if (dest_dir == ".") { path_remote <- self$root } else { @@ -56,26 +91,17 @@ R6_ssh_file_ops <- R6::R6Class( invisible(file.path(dest_dir, basename(file))) }, - write_bytes = function(bytes, dest_file) { + download_file = function(file, dest) { + file_remote <- file.path(self$root, file) tmp <- tempfile() dir.create(tmp) on.exit(unlink(tmp, recursive = TRUE)) - path_local <- file.path(tmp, basename(dest_file)) - writeBin(bytes, path_local) - self$write_file(path_local, dirname(dest_file)) - }, + file_local <- file.path(tmp, basename(file)) - read_file = function(file, dest) { - file_remote <- file.path(self$root, file) ## TODO: This really should be an error on failure in the ssh ## package imo (or have the option to convert to one) - the file ## does not exist! I am promoting this to error here, but there - ## may be other warnings that this lets through. - tmp <- tempfile() - dir.create(tmp) - on.exit(unlink(tmp, recursive = TRUE)) - file_local <- file.path(tmp, basename(file)) - + ## may be other warnings that this catches undesirably. res <- tryCatch( ssh::scp_download(self$session, file_remote, tmp, verbose = FALSE), warning = identity) @@ -91,60 +117,29 @@ R6_ssh_file_ops <- R6::R6Class( } }, - read_bytes = function(file) { - self$read_file(file, NULL) - }, - - ## TODO: not vectorised - exists = function(path, type = c("any", "file", "directory")) { - if (length(path) != 1L) { - return(vapply(path, self$exists, logical(1), type, - USE.NAMES = FALSE)) - } - flag <- c(file = "f", directory = "d", any = "e")[[match.arg(type)]] - path_remote <- file.path(self$root, path) - cmd <- sprintf("test -%s %s", flag, shQuote(path_remote)) - ssh::ssh_exec_internal(self$session, cmd, error = FALSE)$status == 0L + write_bytes = function(bytes, dest_file) { + tmp <- tempfile() + dir.create(tmp) + on.exit(unlink(tmp, recursive = TRUE)) + path_local <- file.path(tmp, basename(dest_file)) + writeBin(bytes, path_local) + self$upload_file(path_local, dirname(dest_file)) }, - create_dir = function(path) { - path_remote <- file.path(self$root, path) - ssh::ssh_exec_wait(self$session, - sprintf("mkdir -p %s", shQuote(path_remote))) - }, - list_dir = function(path) { - path_remote <- file.path(self$root, path) - res <- ssh::ssh_exec_internal(self$session, - sprintf("ls -1 %s", shQuote(path_remote))) - strsplit(rawToChar(res$stdout), "\n", fixed = TRUE)[[1L]] - }, - - delete_file = function(path) { - ## NOTE: this should be farmed out to a set of scripts that we - ## move into the remote storr, then we can do this in one single - ## remote call. A bash script can cake a path as args and - ## return a vector of 0/1? Path length issues will be a problem - ## and we might need to use a temporary file? - exists <- self$exists(path) - for (p in path[exists]) { - path_remote <- file.path(self$root, path) - res <- ssh::ssh_exec_internal( - self$session, - sprintf("rm -f %s", shQuote(path_remote)), - error = FALSE) - } - exists + read_bytes = function(file) { + self$download_file(file, NULL) }, - delete_directory = function(path) { - path_remote <- file.path(self$root, path) - ssh::ssh_exec_wait(self$session, - sprintf("rm -rf %s", shQuote(path_remote))) + ## NOTE: I am adding a trailing newline here so that later on R's + ## readLines copes with these files better - this means that the + ## remote storr is able to be used as a local storr (except for + ## some issues with configuration). + write_string = function(string, dest_file) { + self$write_bytes(charToRaw(paste0(string, "\n")), dest_file) }, - destroy = function() { - ssh::ssh_exec_wait(self$session, - sprintf("rm -rf %s", shQuote(self$root))) + read_string = function(file) { + sub("\n$", "", rawToChar(self$read_bytes(file))) } )) diff --git a/tests/testthat/test-ssh-file-ops.R b/tests/testthat/test-ssh-file-ops.R index 986697c..7e47784 100644 --- a/tests/testthat/test-ssh-file-ops.R +++ b/tests/testthat/test-ssh-file-ops.R @@ -20,7 +20,7 @@ test_that("directory operations", { ops$delete_file(path) expect_true(ops$exists(path, "directory")) - ops$delete_directory(path) + ops$delete_dir(path) expect_false(ops$exists(path, "directory")) }) @@ -41,8 +41,9 @@ test_that("single file, directoryless io", { expect_true(ops$exists(path)) expect_equal(ops$read_bytes(path), bytes) - ops$delete_file(path) + expect_true(ops$delete_file(path)) expect_false(ops$exists(path)) + expect_false(ops$delete_file(path)) expect_equal(ops$list_dir("."), character(0)) }) @@ -66,8 +67,9 @@ test_that("directories are created automatically", { expect_true(ops$exists(path)) expect_equal(ops$read_bytes(path), bytes) - ops$delete_file(path) + expect_true(ops$delete_file(path)) expect_false(ops$exists(path)) + expect_false(ops$delete_file(path)) expect_equal(ops$list_dir("."), "a") expect_equal(ops$list_dir("a"), "b") expect_equal(ops$list_dir("a/b"), character(0)) From 39b3ebf4ba44c7eaa9e315b1b36124854308c62f Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Thu, 3 May 2018 08:44:31 +0100 Subject: [PATCH 10/30] Make key finding more robust in test --- tests/testthat/helper-ssh.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/testthat/helper-ssh.R b/tests/testthat/helper-ssh.R index 7c8cd87..7d130a2 100644 --- a/tests/testthat/helper-ssh.R +++ b/tests/testthat/helper-ssh.R @@ -1,4 +1,5 @@ HAS_SSHD_SERVER <- FALSE +HERE <- getwd() start_sshd_server <- function() { @@ -59,7 +60,8 @@ test_ssh_connection <- function() { use_sshd_server() for (i in 1:10) { con <- tryCatch( - ssh::ssh_connect("root@127.0.0.1:10022", "sshd/keys/id_rsa"), + ssh::ssh_connect("root@127.0.0.1:10022", + file.path(HERE, "sshd/keys/id_rsa")), error = function(e) NULL) if (inherits(con, "ssh_session")) { return(con) From 2932ad69b4589db2ea4d48b28ed6a200a028cbfb Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Thu, 3 May 2018 08:51:13 +0100 Subject: [PATCH 11/30] Add generalised remote storr driver --- R/driver_remote.R | 113 +++++++++++++++++++++++++++++ tests/testthat/test-driver-ssh.R | 16 ++++ tests/testthat/test-ssh-file-ops.R | 1 - 3 files changed, 129 insertions(+), 1 deletion(-) create mode 100644 R/driver_remote.R create mode 100644 tests/testthat/test-driver-ssh.R diff --git a/R/driver_remote.R b/R/driver_remote.R new file mode 100644 index 0000000..150d2f6 --- /dev/null +++ b/R/driver_remote.R @@ -0,0 +1,113 @@ +driver_remote <- function(ops, rds) { + R6_driver_remote$new(ops, rds) +} + +R6_driver_remote <- R6::R6Class( + "driver_remote", + + public = list( + ops = NULL, + rds = NULL, + traits = NULL, + hash_algorithm = "md5", + + initialize = function(ops, rds) { + ## For the configuration, we should pass along all dots to the + ## remote storr and check? Or do that when controlling the + ## local rds. So this is going to get a lot of dots and do the + ## build itself. + self$ops <- ops + self$rds <- rds + self$ops$create_dir("data") + self$ops$create_dir("keys") + self$traits <- self$rds$traits + ## TODO: deal with this when configuration is done: + self$traits$hash_algorithm <- FALSE + self$hash_algorithm <- "md5" + }, + + type = function() { + sprintf("remote/%s", self$ops$type()) + }, + + destroy = function() { + self$ops$destroy() + self$ops <- NULL + }, + + get_hash = function(key, namespace) { + self$ops$read_string(self$name_key(key, namespace)) + }, + + set_hash = function(key, namespace, hash) { + self$ops$write_string(hash, self$name_key(key, namespace)) + }, + + get_object = function(hash) { + filename_local <- self$rds$name_hash(hash) + if (!self$rds$exists_object(hash)) { + filename_remote <- self$name_hash(hash) + self$ops$download_file(filename_remote, filename_local) + } + readRDS(filename_local) + }, + + set_object = function(hash, value) { + filename_remote <- self$name_hash(hash) + if (!self$ops$exists(filename_remote)) { + filename_local <- self$rds$name_hash(hash) + if (!file.exists(filename_local)) { + self$rds$set_object(hash, value) + } + self$ops$upload_file(filename_local, dirname(filename_remote)) + } + }, + + exists_hash = function(key, namespace) { + self$ops$exists(self$name_key(key, namespace)) + }, + + exists_object = function(hash) { + self$ops$exists(self$name_hash(hash)) + }, + + + del_hash = function(key, namespace) { + self$ops$delete_file(self$name_key(key, namespace)) + }, + + del_object = function(hash) { + self$ops$delete_file(self$name_hash(hash)) + }, + + list_hashes = function() { + sub("\\.rds$", "", self$ops$list_dir("data")) + }, + + list_namespaces = function() { + self$ops$list_dir("keys") + }, + + list_keys = function(namespace) { + path <- file.path("keys", namespace) + if (!self$ops$exists(path, "directory")) { + return(character(0)) + } + ret <- self$ops$list_dir(path) + if (self$rds$mangle_key) storr::decode64(ret, TRUE) else ret + }, + + ## These functions could be done better if driver_rds takes a + ## 'relative' argument + name_hash = function(hash) { + p <- self$rds$name_hash(hash) + file.path(basename(dirname(p)), basename(p)) + }, + + name_key = function(key, namespace) { + p <- self$rds$name_key(key, namespace) + file.path( + basename(dirname(dirname(p))), + basename(dirname(p)), + basename(p)) + })) diff --git a/tests/testthat/test-driver-ssh.R b/tests/testthat/test-driver-ssh.R new file mode 100644 index 0000000..b7aa5e8 --- /dev/null +++ b/tests/testthat/test-driver-ssh.R @@ -0,0 +1,16 @@ +context("rds over ssh") + +test_that("storr spec", { + create <- function(dr = NULL, ...) { + if (is.null(dr)) { + ops <- ssh_file_ops(test_ssh_connection(), tempfile()) + rds <- storr::driver_rds(tempfile(), ...) + driver_remote(ops, rds) + } else { + driver_remote(dr$ops, dr$rds) + } + } + + res <- storr::test_driver(create) + expect_equal(sum(res$failed), 0) +}) diff --git a/tests/testthat/test-ssh-file-ops.R b/tests/testthat/test-ssh-file-ops.R index 7e47784..a9dfd23 100644 --- a/tests/testthat/test-ssh-file-ops.R +++ b/tests/testthat/test-ssh-file-ops.R @@ -3,7 +3,6 @@ context("ssh_file_ops") test_that("directory operations", { ## Not strictly unit tests, but integration tests of basic ## operations. - root <- random_path("storr_ssh_root_") path <- random_path() From 19a3ddcfade648ca43cca2efd55a655e9218e26d Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Thu, 3 May 2018 10:30:48 +0100 Subject: [PATCH 12/30] Full rds configuration support --- R/driver_remote.R | 89 +++++++++++++++++++++++++++++--- tests/testthat/test-driver-ssh.R | 7 ++- 2 files changed, 84 insertions(+), 12 deletions(-) diff --git a/R/driver_remote.R b/R/driver_remote.R index 150d2f6..c96077c 100644 --- a/R/driver_remote.R +++ b/R/driver_remote.R @@ -1,5 +1,5 @@ -driver_remote <- function(ops, rds) { - R6_driver_remote$new(ops, rds) +driver_remote <- function(ops, ..., path_local = NULL) { + R6_driver_remote$new(ops, ..., path_local = NULL) } R6_driver_remote <- R6::R6Class( @@ -9,21 +9,26 @@ R6_driver_remote <- R6::R6Class( ops = NULL, rds = NULL, traits = NULL, - hash_algorithm = "md5", + hash_algorithm = NULL, - initialize = function(ops, rds) { + initialize = function(ops, ..., path_local) { ## For the configuration, we should pass along all dots to the ## remote storr and check? Or do that when controlling the ## local rds. So this is going to get a lot of dots and do the ## build itself. self$ops <- ops - self$rds <- rds + + path_local <- path_local %||% tempfile() + + config <- storr_remote_config_get(self$ops) + extra <- storr_remote_config_validate(config, path_local, ...) + storr_remote_config_set(self$ops, extra) self$ops$create_dir("data") self$ops$create_dir("keys") + + self$rds <- storr::driver_rds(path_local, ...) self$traits <- self$rds$traits - ## TODO: deal with this when configuration is done: - self$traits$hash_algorithm <- FALSE - self$hash_algorithm <- "md5" + self$hash_algorithm <- self$rds$hash_algorithm }, type = function() { @@ -111,3 +116,71 @@ R6_driver_remote <- R6::R6Class( basename(dirname(p)), basename(p)) })) + + +## It would be really nice to do this as a single operation but that +## probably can't be easily done generally. Quite possibly it would +## be possible to get/fetch an entire directory though. +## +## The other option is using a single remote object, which loses the +## ability to have a remote storr really reflect a local one... +storr_remote_config_get <- function(ops) { + ## NOTE: this is a storr/rds internal + path_config <- "config" + if (ops$exists(path_config, "directory")) { + keys <- ops$list_dir(path_config) + ret <- lapply(file.path(path_config, keys), ops$read_string) + names(ret) <- keys + } else { + ret <- NULL + } + ret +} + + +storr_remote_config_set <- function(ops, data) { + path_config <- "config" + for (key in names(data)) { + ops$write_string(data[[key]], file.path(path_config, key)) + } +} + + +storr_remote_config_validate <- function(prev, path_local, ...) { + path_config <- "config" + + ## This exploits quite a bit of storr's internals: + tmp <- tempfile() + on.exit(unlink(tmp, recursive = TRUE)) + path_config_tmp <- file.path(tmp, path_config) + + ## The storr rds driver requires that the config directory not exist + ## at all + if (!is.null(prev)) { + dir.create(path_config_tmp, FALSE, TRUE) + for (key in names(prev)) { + writeLines(prev[[key]], file.path(path_config_tmp, key)) + } + } + + ## This will error if the options can't be supported, but we never + ## use the driver itself for anything. + dr <- storr::driver_rds(tmp, ...) + + ## These are the configuration elements to set remotely: + extra <- setdiff(dir(path_config_tmp), names(prev)) + if (length(extra) > 0L) { + ret <- lapply(file.path(path_config_tmp, extra), readLines) + names(ret) <- extra + } else { + ret <- NULL + } + + ## Replicate our temporary configuration into the local cache: + path_config_local <- file.path(path_local, path_config) + msg <- setdiff(dir(path_config_tmp), dir(path_config_local)) + dir.create(path_config_local, FALSE, TRUE) + file.copy(file.path(path_config_tmp, msg), path_config_local) + + ret +} diff --git a/tests/testthat/test-driver-ssh.R b/tests/testthat/test-driver-ssh.R index b7aa5e8..76b9168 100644 --- a/tests/testthat/test-driver-ssh.R +++ b/tests/testthat/test-driver-ssh.R @@ -3,11 +3,10 @@ context("rds over ssh") test_that("storr spec", { create <- function(dr = NULL, ...) { if (is.null(dr)) { - ops <- ssh_file_ops(test_ssh_connection(), tempfile()) - rds <- storr::driver_rds(tempfile(), ...) - driver_remote(ops, rds) + ops <- ssh_file_ops(test_ssh_connection(), random_path("storr_driver_")) + driver_remote(ops, ...) } else { - driver_remote(dr$ops, dr$rds) + driver_remote(dr$ops, ...) } } From 3c41d3065a06f02f6e5a9711983c71ac08c39030 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Thu, 3 May 2018 12:30:42 +0100 Subject: [PATCH 13/30] Big simplification based on storr update --- DESCRIPTION | 5 +- R/driver_remote.R | 186 ------------------------------- R/{ssh_file_ops.R => ssh.R} | 15 ++- R/util.R | 8 -- tests/testthat/helper-ssh.R | 3 +- tests/testthat/test-driver-ssh.R | 4 +- 6 files changed, 21 insertions(+), 200 deletions(-) delete mode 100644 R/driver_remote.R rename R/{ssh_file_ops.R => ssh.R} (90%) delete mode 100644 R/util.R diff --git a/DESCRIPTION b/DESCRIPTION index a1d72cf..41f5ad9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -9,9 +9,10 @@ Depends: R (>= 3.1.0) License: What license is it under? Encoding: UTF-8 LazyData: true +Imports: + storr (>= 1.2.0) Suggests: aws.s3, - ssh.utils, - storr, + ssh, testthat RoxygenNote: 6.0.1 diff --git a/R/driver_remote.R b/R/driver_remote.R deleted file mode 100644 index c96077c..0000000 --- a/R/driver_remote.R +++ /dev/null @@ -1,186 +0,0 @@ -driver_remote <- function(ops, ..., path_local = NULL) { - R6_driver_remote$new(ops, ..., path_local = NULL) -} - -R6_driver_remote <- R6::R6Class( - "driver_remote", - - public = list( - ops = NULL, - rds = NULL, - traits = NULL, - hash_algorithm = NULL, - - initialize = function(ops, ..., path_local) { - ## For the configuration, we should pass along all dots to the - ## remote storr and check? Or do that when controlling the - ## local rds. So this is going to get a lot of dots and do the - ## build itself. - self$ops <- ops - - path_local <- path_local %||% tempfile() - - config <- storr_remote_config_get(self$ops) - extra <- storr_remote_config_validate(config, path_local, ...) - storr_remote_config_set(self$ops, extra) - self$ops$create_dir("data") - self$ops$create_dir("keys") - - self$rds <- storr::driver_rds(path_local, ...) - self$traits <- self$rds$traits - self$hash_algorithm <- self$rds$hash_algorithm - }, - - type = function() { - sprintf("remote/%s", self$ops$type()) - }, - - destroy = function() { - self$ops$destroy() - self$ops <- NULL - }, - - get_hash = function(key, namespace) { - self$ops$read_string(self$name_key(key, namespace)) - }, - - set_hash = function(key, namespace, hash) { - self$ops$write_string(hash, self$name_key(key, namespace)) - }, - - get_object = function(hash) { - filename_local <- self$rds$name_hash(hash) - if (!self$rds$exists_object(hash)) { - filename_remote <- self$name_hash(hash) - self$ops$download_file(filename_remote, filename_local) - } - readRDS(filename_local) - }, - - set_object = function(hash, value) { - filename_remote <- self$name_hash(hash) - if (!self$ops$exists(filename_remote)) { - filename_local <- self$rds$name_hash(hash) - if (!file.exists(filename_local)) { - self$rds$set_object(hash, value) - } - self$ops$upload_file(filename_local, dirname(filename_remote)) - } - }, - - exists_hash = function(key, namespace) { - self$ops$exists(self$name_key(key, namespace)) - }, - - exists_object = function(hash) { - self$ops$exists(self$name_hash(hash)) - }, - - - del_hash = function(key, namespace) { - self$ops$delete_file(self$name_key(key, namespace)) - }, - - del_object = function(hash) { - self$ops$delete_file(self$name_hash(hash)) - }, - - list_hashes = function() { - sub("\\.rds$", "", self$ops$list_dir("data")) - }, - - list_namespaces = function() { - self$ops$list_dir("keys") - }, - - list_keys = function(namespace) { - path <- file.path("keys", namespace) - if (!self$ops$exists(path, "directory")) { - return(character(0)) - } - ret <- self$ops$list_dir(path) - if (self$rds$mangle_key) storr::decode64(ret, TRUE) else ret - }, - - ## These functions could be done better if driver_rds takes a - ## 'relative' argument - name_hash = function(hash) { - p <- self$rds$name_hash(hash) - file.path(basename(dirname(p)), basename(p)) - }, - - name_key = function(key, namespace) { - p <- self$rds$name_key(key, namespace) - file.path( - basename(dirname(dirname(p))), - basename(dirname(p)), - basename(p)) - })) - - -## It would be really nice to do this as a single operation but that -## probably can't be easily done generally. Quite possibly it would -## be possible to get/fetch an entire directory though. -## -## The other option is using a single remote object, which loses the -## ability to have a remote storr really reflect a local one... -storr_remote_config_get <- function(ops) { - ## NOTE: this is a storr/rds internal - path_config <- "config" - if (ops$exists(path_config, "directory")) { - keys <- ops$list_dir(path_config) - ret <- lapply(file.path(path_config, keys), ops$read_string) - names(ret) <- keys - } else { - ret <- NULL - } - ret -} - - -storr_remote_config_set <- function(ops, data) { - path_config <- "config" - for (key in names(data)) { - ops$write_string(data[[key]], file.path(path_config, key)) - } -} - - -storr_remote_config_validate <- function(prev, path_local, ...) { - path_config <- "config" - - ## This exploits quite a bit of storr's internals: - tmp <- tempfile() - on.exit(unlink(tmp, recursive = TRUE)) - path_config_tmp <- file.path(tmp, path_config) - - ## The storr rds driver requires that the config directory not exist - ## at all - if (!is.null(prev)) { - dir.create(path_config_tmp, FALSE, TRUE) - for (key in names(prev)) { - writeLines(prev[[key]], file.path(path_config_tmp, key)) - } - } - - ## This will error if the options can't be supported, but we never - ## use the driver itself for anything. - dr <- storr::driver_rds(tmp, ...) - - ## These are the configuration elements to set remotely: - extra <- setdiff(dir(path_config_tmp), names(prev)) - if (length(extra) > 0L) { - ret <- lapply(file.path(path_config_tmp, extra), readLines) - names(ret) <- extra - } else { - ret <- NULL - } - - ## Replicate our temporary configuration into the local cache: - path_config_local <- file.path(path_local, path_config) - msg <- setdiff(dir(path_config_tmp), dir(path_config_local)) - dir.create(path_config_local, FALSE, TRUE) - file.copy(file.path(path_config_tmp, msg), path_config_local) - - ret -} diff --git a/R/ssh_file_ops.R b/R/ssh.R similarity index 90% rename from R/ssh_file_ops.R rename to R/ssh.R index 1102e27..afcff54 100644 --- a/R/ssh_file_ops.R +++ b/R/ssh.R @@ -1,3 +1,16 @@ +storr_ssh <- function(session, remote_root, ..., path_local = NULL, + default_namespace = "default_namespace") { + dr <- driver_ssh(session, remote_root, ..., path_local = path_local) + storr::storr(dr, default_namespace) +} + + +driver_ssh <- function(session, remote_root, ..., path_local = NULL) { + ops <- ssh_file_ops(session, remote_root) + storr::driver_remote(ops, ..., path_local = path_local) +} + + ssh_file_ops <- function(session, root) { R6_ssh_file_ops$new(session, root) } @@ -109,7 +122,7 @@ R6_ssh_file_ops <- R6::R6Class( stop("Error downloading file: ", res$message) } if (is.null(dest)) { - read_binary(file_local) + readBin(file_local, raw(), file.size(file_local)) } else { dir.create(dirname(dest), FALSE, TRUE) file.copy(file_local, dest) diff --git a/R/util.R b/R/util.R deleted file mode 100644 index 432030e..0000000 --- a/R/util.R +++ /dev/null @@ -1,8 +0,0 @@ -`%||%` <- function(a, b) { - if (is.null(a)) b else a -} - - -read_binary <- function(x) { - readBin(x, raw(), file.size(x)) -} diff --git a/tests/testthat/helper-ssh.R b/tests/testthat/helper-ssh.R index 7d130a2..6391277 100644 --- a/tests/testthat/helper-ssh.R +++ b/tests/testthat/helper-ssh.R @@ -44,7 +44,8 @@ use_sshd_server <- function() { system3 <- function(command, args = character(), check = FALSE) { res <- suppressWarnings(system2(command, args, stdout = TRUE, stderr = TRUE)) - code <- attr(res, "status") %||% 0 + status <- attr(res, "status") + code <- if (is.null(status)) 0 else status attr(res, "status") <- NULL ret <- list(success = code == 0, code = code, diff --git a/tests/testthat/test-driver-ssh.R b/tests/testthat/test-driver-ssh.R index 76b9168..b17f42f 100644 --- a/tests/testthat/test-driver-ssh.R +++ b/tests/testthat/test-driver-ssh.R @@ -4,10 +4,10 @@ test_that("storr spec", { create <- function(dr = NULL, ...) { if (is.null(dr)) { ops <- ssh_file_ops(test_ssh_connection(), random_path("storr_driver_")) - driver_remote(ops, ...) } else { - driver_remote(dr$ops, ...) + ops <- dr$ops } + storr::driver_remote(ops, ...) } res <- storr::test_driver(create) From b3da3f2c7aa63c343c7b091075c3fb8fc30dfd7d Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Thu, 3 May 2018 14:00:33 +0100 Subject: [PATCH 14/30] Tweaks for changes to storr --- R/ssh.R | 14 +++++++++++--- tests/testthat/test-ssh-file-ops.R | 9 ++++----- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/R/ssh.R b/R/ssh.R index afcff54..e8eda6d 100644 --- a/R/ssh.R +++ b/R/ssh.R @@ -56,18 +56,26 @@ R6_ssh_file_ops <- R6::R6Class( strsplit(rawToChar(res$stdout), "\n", fixed = TRUE)[[1L]] }, - exists = function(path, type = c("any", "file", "directory")) { + exists_remote = function(path, is_directory) { if (length(path) != 1L) { ## TODO: vectorise via a remote script? - return(vapply(path, self$exists, logical(1), type, + return(vapply(path, self$exists_remote, logical(1), is_directory, USE.NAMES = FALSE)) } - flag <- c(file = "f", directory = "d", any = "e")[[match.arg(type)]] + flag <- if (is_directory) "d" else "e" path_remote <- file.path(self$root, path) cmd <- sprintf("test -%s %s", flag, shQuote(path_remote)) ssh::ssh_exec_internal(self$session, cmd, error = FALSE)$status == 0L }, + exists = function(path) { + self$exists_remote(path, FALSE) + }, + + exists_dir = function(path) { + self$exists_remote(path, TRUE) + }, + delete_file = function(path) { ## NOTE: this should be farmed out to a set of scripts that we ## move into the remote storr, then we can do this in one single diff --git a/tests/testthat/test-ssh-file-ops.R b/tests/testthat/test-ssh-file-ops.R index a9dfd23..d30820a 100644 --- a/tests/testthat/test-ssh-file-ops.R +++ b/tests/testthat/test-ssh-file-ops.R @@ -12,15 +12,14 @@ test_that("directory operations", { path <- random_path() expect_false(ops$exists(path)) ops$create_dir(path) - expect_false(ops$exists(path, "file")) - expect_true(ops$exists(path, "any")) - expect_true(ops$exists(path, "directory")) + expect_true(ops$exists(path)) + expect_true(ops$exists_dir(path)) expect_equal(ops$list_dir(path), character()) ops$delete_file(path) - expect_true(ops$exists(path, "directory")) + expect_true(ops$exists_dir(path)) ops$delete_dir(path) - expect_false(ops$exists(path, "directory")) + expect_false(ops$exists_dir(path)) }) From d73c961c132ee46066047d31245e67719e9e62cf Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Thu, 3 May 2018 14:09:24 +0100 Subject: [PATCH 15/30] Expand testing --- tests/testthat/test-driver-ssh.R | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/testthat/test-driver-ssh.R b/tests/testthat/test-driver-ssh.R index b17f42f..cd8bcd8 100644 --- a/tests/testthat/test-driver-ssh.R +++ b/tests/testthat/test-driver-ssh.R @@ -13,3 +13,24 @@ test_that("storr spec", { res <- storr::test_driver(create) expect_equal(sum(res$failed), 0) }) + + +test_that("user interface", { + st <- storr_ssh(test_ssh_connection(), random_path("storr_remote_")) + expect_is(st, "storr") + expect_is(st$driver, "driver_remote") + expect_is(st$driver$ops, "ssh_file_ops") + expect_equal(st$driver$type(), "remote/ssh") + expect_equal(st$driver$ops$type(), "ssh") +}) + + +test_that("local cache works", { + st <- storr_ssh(test_ssh_connection(), random_path("storr_remote_")) + value <- 1:10 + h <- st$set("a", value) + expect_true(st$driver$rds$del_object(h)) + expect_identical(st$get("a", use_cache = FALSE), value) + expect_equal(st$list_hashes(), h) + expect_equal(st$driver$rds$list_hashes(), h) +}) From d824c98b157e2ca83d2eb68b88fa621d3e0b783a Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Thu, 3 May 2018 14:11:56 +0100 Subject: [PATCH 16/30] Tests for assertions --- tests/testthat/test-util-assert.R | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 tests/testthat/test-util-assert.R diff --git a/tests/testthat/test-util-assert.R b/tests/testthat/test-util-assert.R new file mode 100644 index 0000000..f6bf5aa --- /dev/null +++ b/tests/testthat/test-util-assert.R @@ -0,0 +1,12 @@ +context("util (assert)") + +test_that("assertions", { + expect_error(assert_scalar(NULL), "must be a scalar") + expect_error(assert_scalar(1:2), "must be a scalar") + + expect_error(assert_character(1:5), "must be character") + + expect_error(assert_is(1, "foo"), "must be a foo") + + expect_error(assert_nonmissing(NA), "must not be NA") +}) From 72e93b081da57aa6252f000993e2de6ee18fa4b5 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Thu, 3 May 2018 14:55:15 +0100 Subject: [PATCH 17/30] Minimal roxygen --- NAMESPACE | 2 ++ R/ssh.R | 9 +++++++++ man/storr_ssh.Rd | 26 ++++++++++++++++++++++++++ 3 files changed, 37 insertions(+) create mode 100644 man/storr_ssh.Rd diff --git a/NAMESPACE b/NAMESPACE index 95bd585..9d8a305 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,3 +1,5 @@ # Generated by roxygen2: do not edit by hand +export(driver_ssh) export(storr_rds_s3) +export(storr_ssh) diff --git a/R/ssh.R b/R/ssh.R index e8eda6d..52ba21a 100644 --- a/R/ssh.R +++ b/R/ssh.R @@ -1,3 +1,10 @@ +##' ssh backend for storr +##' +##' @title ssh storr +##' +##' @param session A ssh session object (see \code{ssh::ssh_connect}) +##' @inheritParams storr_s3 +##' @export storr_ssh <- function(session, remote_root, ..., path_local = NULL, default_namespace = "default_namespace") { dr <- driver_ssh(session, remote_root, ..., path_local = path_local) @@ -5,6 +12,8 @@ storr_ssh <- function(session, remote_root, ..., path_local = NULL, } +##' @export +##' @rdname storr_ssh driver_ssh <- function(session, remote_root, ..., path_local = NULL) { ops <- ssh_file_ops(session, remote_root) storr::driver_remote(ops, ..., path_local = path_local) diff --git a/man/storr_ssh.Rd b/man/storr_ssh.Rd new file mode 100644 index 0000000..c346ba1 --- /dev/null +++ b/man/storr_ssh.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ssh.R +\name{storr_ssh} +\alias{storr_ssh} +\alias{driver_ssh} +\title{ssh storr} +\usage{ +storr_ssh(session, remote_root, ..., path_local = NULL, + default_namespace = "default_namespace") + +driver_ssh(session, remote_root, ..., path_local = NULL) +} +\arguments{ +\item{session}{A ssh session object (see \code{ssh::ssh_connect})} + +\item{remote_root}{Base path on the remote bucket} + +\item{...}{Additional arguments passed through to +\code{storr::driver_rds}} + +\item{path_local}{Optional path to a local cache (see +\code{storr::driver_remote})} +} +\description{ +ssh backend for storr +} From 3f7e6f04ff636d47a9f3b7547b3eef737957b145 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Thu, 3 May 2018 14:56:10 +0100 Subject: [PATCH 18/30] Untested port of the s3 driver --- NAMESPACE | 3 +- R/driver_rds_s3.R | 303 -------------------------------------------- R/s3.R | 157 +++++++++++++++++++++++ man/storr_rds_s3.Rd | 41 ------ man/storr_s3.Rd | 23 ++++ 5 files changed, 182 insertions(+), 345 deletions(-) delete mode 100644 R/driver_rds_s3.R create mode 100644 R/s3.R delete mode 100644 man/storr_rds_s3.Rd create mode 100644 man/storr_s3.Rd diff --git a/NAMESPACE b/NAMESPACE index 9d8a305..a7121a0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(driver_s3) export(driver_ssh) -export(storr_rds_s3) +export(storr_s3) export(storr_ssh) diff --git a/R/driver_rds_s3.R b/R/driver_rds_s3.R deleted file mode 100644 index d9c43d4..0000000 --- a/R/driver_rds_s3.R +++ /dev/null @@ -1,303 +0,0 @@ -##' @title S3 backend for rds object cache driver -##' @param bucket Name of the S3 bucket for which you wish to create of connect -##' store -##' @inheritParams storr::storr_rds -##' @export -##' @rdname storr_rds_s3 -storr_rds_s3 <- function(bucket, path, compress = NULL, mangle_key = NULL, - mangle_key_pad = NULL, hash_algorithm = NULL, - default_namespace = "objects") { - - storr::storr(driver_rds_s3(bucket, path, compress, mangle_key, mangle_key_pad, hash_algorithm), - default_namespace) -} - -driver_rds_s3 <- function(bucket, path, compress = NULL, mangle_key = NULL, - mangle_key_pad = NULL, hash_algorithm = NULL) { - R6_driver_rds_s3$new(bucket, path, compress, mangle_key, mangle_key_pad, hash_algorithm) -} - -R6_driver_rds_s3 <- R6::R6Class( - "driver_rds_s3", - public = list( - ## TODO: things like hash_algorithm: do they belong in traits? - ## This needs sorting before anyone writes their own driver! - bucket = NULL, - path = NULL, - compress = NULL, - mangle_key = NULL, - mangle_key_pad = NULL, - hash_algorithm = NULL, - traits = list(accept = "raw"), - - initialize = function(bucket, path, compress, mangle_key, mangle_key_pad, - hash_algorithm) { - - is_new <- !s3_object_exists(bucket = bucket, path = file.path(path, "config")) - s3_create_dir(path = path, bucket = bucket) - s3_create_dir(path = file.path(path, "data"), bucket = bucket) - s3_create_dir(path = file.path(path, "keys"), bucket = bucket) - s3_create_dir(path = file.path(path, "config"), bucket = bucket) - self$bucket <- bucket - self$path <- path - - ## This is a bit of complicated dancing around to mantain - ## backward compatibility while allowing better defaults in - ## future versions. I'm writing out a version number here that - ## future versions of driver_rds can use to patch, warn or - ## change behaviour with older versions of the storr. - if (!is_new && !s3_object_exists(path = driver_rds_s3_config_file(path, "version"), bucket = bucket)) { - s3_write_if_missing("1.0.1", bucket = bucket, path = driver_rds_s3_config_file(path, "version")) - s3_write_if_missing("TRUE", bucket = bucket, path = driver_rds_s3_config_file(path, "mangle_key_pad")) - s3_write_if_missing("TRUE", bucket = bucket, path = driver_rds_s3_config_file(path, "compress")) - s3_write_if_missing("md5", bucket = bucket, path = driver_rds_s3_config_file(path, "hash_algorithm")) - } - ## Then write out the version number: - s3_write_if_missing(as.character(packageVersion("storr")), - bucket = bucket, - path = driver_rds_s3_config_file(path, "version")) - - if (!is.null(mangle_key)) { - storr:::assert_scalar_logical(mangle_key) - } - self$mangle_key <- driver_rds_s3_config(bucket, path, "mangle_key", mangle_key, - FALSE, TRUE) - - if (!is.null(mangle_key_pad)) { - storr:::assert_scalar_logical(mangle_key_pad) - } - self$mangle_key_pad <- - driver_rds_s3_config(bucket, path, "mangle_key_pad", mangle_key_pad, - FALSE, TRUE) - - if (!is.null(compress)) { - storr:::assert_scalar_logical(compress) - } - self$compress <- driver_rds_s3_config(bucket, path, "compress", compress, - TRUE, FALSE) - - if (!is.null(hash_algorithm)) { - storr:::assert_scalar_character(hash_algorithm) - } - self$hash_algorithm <- driver_rds_s3_config(bucket, path, "hash_algorithm", - hash_algorithm, "md5", TRUE) - }, - - type = function() { - "s3" - }, - - destroy = function() { - s3_delete_recursive(bucket = self$bucket, path = self$path) - }, - - get_hash = function(key, namespace) { - s3_readLines(path = self$name_key(key, namespace), bucket = self$bucket) - }, - - set_hash = function(key, namespace, hash) { - s3_create_dir(path = self$name_key("", namespace), bucket = self$bucket) - s3_writeLines(text = hash, path = self$name_key(key, namespace), bucket = self$bucket) - #*** should be making use of (or making an equivalent version of) the - #write_lines function within the storr package here (I think it deletes - #file if the write fails) - }, - - get_object = function(hash) { - aws.s3::s3readRDS(object = self$name_hash(hash), bucket = self$bucket) - }, - - set_object = function(hash, value) { - ## NOTE: this takes advantage of having the serialized value - ## already and avoids seralising twice. - storr:::assert_raw(value) - - s3_write_serialized_rds(value = value, - filename = self$name_hash(hash), - bucket = self$bucket, - compress = self$compress) - }, - - exists_hash = function(key, namespace) { - s3_object_exists(self$name_key(key, namespace), bucket = self$bucket) - }, - - exists_object = function(hash) { - s3_object_exists(self$name_hash(hash), bucket = self$bucket) - }, - - del_hash = function(key, namespace) { - #s3_delete_file(bucket = self$bucket, path = self$name_key(key, namespace)) - ## above deletes just one file (s3 key). - ## However it will throw an error if the file we are trying to delete - ## looks like a directory. - ## S3 has no actual notion of directory, we just fake it using "/". As a - ## result, it's possible to get into a muddle. To play it safe, line below - ## can be uncommented to force it to delete just the path given, but throw - ## a warning, if it does look like a directory can also change to - ## if_dir = "del_recursive" to delete the whole directory with a warning. - ## May never actually show up as an issue, this is just a note. - s3_delete_file(bucket = self$bucket, path = self$name_key(key, namespace), if_dir = "del_only_key") - }, - - del_object = function(hash) { - # see above note which also applies here - s3_delete_file(bucket = self$bucket, path = self$name_hash(key, namespace), if_dir = "del_only_key") - }, - - list_hashes = function() { - sub("\\.rds$", "", s3_list_dir(bucket = self$bucket, path = file.path(self$path, "data"))) - }, - - list_namespaces = function() { - s3_list_dir(bucket = self$bucket, path = file.path(self$path, "keys")) - }, - - list_keys = function(namespace) { - ret <- s3_list_dir(bucket = self$bucket, path = file.path(self$path, "keys", namespace)) - if (self$mangle_key) decode64(ret, TRUE) else ret - }, - - name_hash = function(hash) { - if (length(hash) > 0L) { - file.path(self$path, "data", paste0(hash, ".rds")) - } else { - character(0) - } - }, - - name_key = function(key, namespace) { - if (self$mangle_key) { - key <- encode64(key, pad = self$mangle_key_pad) - } - file.path(self$path, "keys", namespace, key) - } - - )) - -## This attempts to check that we are connecting to a storr of -## appropriate mangledness. There's a lot of logic here, but it's -## actually pretty simple in practice and tested in test-driver-rds.R: -## -## if mangle_key is NULL we take the mangledless of the -## existing storr or set up for no mangling. -## -## if mangle_key is not NULL then it is an error if it differs -## from the existing storr's mangledness. -driver_rds_s3_config <- function(bucket, path, name, value, default, must_agree) { - path_opt <- driver_rds_s3_config_file(path, name) - - load_value <- function() { - if (s3_object_exists(bucket, path_opt)) { - value <- s3_readLines(path_opt, bucket) - storage.mode(value) <- storage.mode(default) - } else { - value <- default - } - value - } - - if (is.null(value)) { - value <- load_value() - } else if (must_agree && s3_object_exists(bucket = bucket, path = path_opt)) { - value_prev <- load_value() - if (value != value_prev) { - stop(ConfigError(name, value_prev, value)) - } - } - if (!s3_object_exists(bucket = bucket, path = path_opt)) { - s3_writeLines(text = as.character(value), path = path_opt, bucket = bucket) - } - - value -} - -driver_rds_s3_config_file <- function(path, key) { - file.path(path, "config", key) -} - -s3_write_if_missing <- function(value, bucket, path) { - if (s3_object_exists(bucket, path)) { - s3_writeLines(text, path, bucket) - } -} - -## S3 Helper functions -## -## - -s3_write_serialized_rds <- function(value, filename, bucket, compress) { - # write_serialized_rds(value, self$name_hash(hash), self$compress) - aws.s3::s3write_using(x = value, - FUN = function(v, f) storr:::write_serialized_rds(value = v, filename = f, compress=compress), - object = filename, - bucket = bucket) -} - -s3_create_dir <- function(path, bucket) { - aws.s3::put_folder(folder = path, bucket = bucket) -} - -s3_file_remove <- function(path, bucket) { - - exists <- s3_object_exists(bucket, path) - if (any(exists)) { - objec(path[exists]) - } - invisible(exists) -} - -s3_writeLines <- function(text, path, bucket) { - aws.s3::s3write_using(x = text, FUN = writeLines, object = path, bucket = bucket) -} - -s3_readLines <- function(path, bucket) { - aws.s3::s3read_using(FUN = readLines, object = path, bucket = bucket) -} - -s3_object_exists <- function(bucket, path) { - suppressMessages(aws.s3::head_object(object = path, bucket = bucket)[1]) -} - -s3_list_dir <- function(bucket, path) { - - if(substr(path, nchar(path), nchar(path)) != "/") path = paste0(path, "/") - files_table <- aws.s3::get_bucket_df(bucket = bucket, prefix = path, max = Inf) - keys <- files_table[files_table$Size > 0,]$Key - files <- gsub(pattern = path, replacement = "", x = keys) - split_names <- strsplit(files, "/") - # first element of each split name is the file or directory within path, - # take unique of these, so that directories only appear once - unique(unlist(lapply(split_names, function(x) x[1]))) -} - -s3_delete_recursive <- function(bucket, path, force=FALSE) { - - files <- aws.s3::get_bucket_df(bucket = bucket, prefix = path, max = Inf)[["Key"]] - invisible(lapply(files, function(x) aws.s3::delete_object(x, bucket))) -} - -s3_delete_file <- function(bucket, path, if_dir = c("stop", "del_only_key", "del_recursive")) { - - files <- aws.s3::get_bucket_df(bucket = bucket, prefix = path, max = Inf)[["Key"]] - - if(length(files) > 1){ - if_dir == match.arg(if_dir) # only need this if we get inside this loop - - if(if_dir == "stop"){ - stop("You are trying to delete 1 file, but it looks like it is setup like - a directory") - } else if(if_dir == "del_only_key"){ - warning("You are trying to delete 1 file, but it looks like it is setup - like a directory. Deleted specific path you requested") - invisible(aws.s3::delete_object(object = path, bucket = bucket)) - } else if(if_dir == "del_recursive"){ - warning("You are trying to delete 1 file, but it looks like it is setup - like a directory. Deleting recursively everyting below the path - you specified") - s3_delete_recursive(bucket, path) - } - } else{ - invisible(aws.s3::delete_object(object = path, bucket = bucket)) - } -} diff --git a/R/s3.R b/R/s3.R new file mode 100644 index 0000000..d88ef7f --- /dev/null +++ b/R/s3.R @@ -0,0 +1,157 @@ +##' @title S3 backend for rds object cache driver +##' +##' @param bucket Name of the S3 bucket for which you wish to create +##' of connect store +##' +##' @param remote_root Base path on the remote bucket +##' +##' @param ... Additional arguments passed through to +##' \code{storr::driver_rds} +##' +##' @param path_local Optional path to a local cache (see +##' \code{storr::driver_remote}) +##' @export +storr_s3 <- function(bucket, remote_root, ..., path_local = NULL) { + dr <- driver_s3(bucket, remote_root, ..., path_local = path_local) + storr::storr(dr, default_namespace) +} + + +##' @export +##' @rdname storr_s3 +driver_s3 <- function(bucket, remote_root, ..., path_local = NULL) { + ops <- s3_file_ops(bucket, remote_root) + storr::driver_remote(ops, ..., path_local = path_local) +} + + +s3_file_ops <- function(bucket, root) { + R6_s3_file_ops$new(bucket, root) +} + + +R6_s3_file_ops <- R6::R6Class( + "s3_file_ops", + + public = list( + bucket = NULL, + + initialize = function(bucket, root) { + self$bucket <- bucket + self$root <- root + }, + + type = function() { + "s3" + }, + + destroy = function() { + ## TODO: not sure if this is right? + self$file_ops$delete_dir(path = self$root) + }, + + create_dir = function(path) { + path_remote <- file.path(self$root, path) + aws.s3::put_folder(folder = path_remote, bucket = self$bucket) + }, + + list_dir = function(path) { + if (substr(path, nchar(path), nchar(path)) != "/") { + path <- paste0(path, "/") + } + path_remote <- file.path(self$root) + files_table <- aws.s3::get_bucket_df(bucket = self$bucket, + prefix = path_root, + max = Inf) + keys <- files_table[files_table$Size > 0,]$Key + ## TODO: this should really be stripped off the front rather + ## than gsub? And probably fixed replacement to avoid + ## regexp-sensitive characters messing things up? + files <- gsub(pattern = path_root, replacement = "", x = keys) + split_names <- strsplit(files, "/") + ## first element of each split name is the file or directory + ## within path, take unique of these, so that directories only + ## appear once + ## + ## TODO: I _think_ I've done the translation from + ## unique/unlist/lapply to unique/vapply here, but I don't + ## really get what is going on. It's quite likely that what you + ## really want is + ## + ## unique(basename(files)) + ## + ## perhaps? + unique(vapply(split_names, function(x) x[[1L]], character(1))) + }, + + exists = function(path) { + path_remote <- file.path(self$root, path) + ## QUESTION: What messages are produced by this? + suppressMessages( + aws.s3::head_object(object = path_remote, bucket = self$bucket)[[1]]) + }, + + exists_dir = function(path) { + ## TODO: If it is possible to test if the remote thing is a + ## directory (e.g., one of the bits of head_object?) that would + ## be nice to get here too... + self$exists(path) + }, + + delete_file = function(path) { + if (length(path) != 1L) { + ## TODO: this is the most naive way of doing vectorisation, + ## but if aws.s3 can delete multiple objects (the docs suggest + ## it can) you can probably do better. This function must + ## return a logical vector the same length as "path" + ## indicating if deletion actually happened. + return(vapply(path, self$delete_file, logical(1))) + } + path_remote <- file.path(self$root, path) + ## TODO: I've copied the contents of your previous function + ## here, but stripped it down to ony take the if_dir == + ## "del_only_key" path because that was what looked like what + ## was implemented. I have not tested this! + files <- aws.s3::get_bucket_df(bucket = self$bucket, + prefix = path_remote, + max = Inf)[["Key"]] + aws.s3::delete_object(object = path_remote, bucket = self$bucket) + }, + + delete_dir = function(path) { + path_remote <- file.path(self$root, path) + files <- aws.s3::get_bucket_df(bucket = self$bucket, + prefix = path_remote, max = Inf)[["Key"]] + for (x in files) { + aws.s3::delete_object(x, self$bucket) + } + }, + + upload_file = function(file, dest_dir) { + ## TODO: implement me! Upload a file with full local name + ## `file` into a somewhere with directory prefix `dest_dir` - + ## that's going to be `data` here really. I have taken a guess + ## here. + path_remote <- file.path(self$root, dest_dir, basename(file)) + aws.s3::put_object(file = file, object = path_remote, + bucket = self$bucket) + }, + + download_file = function(file, dest) { + file_remote <- file.path(self$root, file) + aws.s3::save_object(file_remote, self$bucket, file = dest, + overwrite = TRUE) + }, + + write_string = function(string, dest_file) { + file_remote <- filepath(self$root, dest_file) + aws.s3::s3write_using(x = string, FUN = writeLines, + object = file_remote, bucket = self$bucket) + }, + + read_string = function(file) { + file_remote <- file.path(self$root, dest_file) + aws.s3::s3read_using(FUN = readLines, object = file_remote, + bucket = self$bucket) + } + )) diff --git a/man/storr_rds_s3.Rd b/man/storr_rds_s3.Rd deleted file mode 100644 index da3b162..0000000 --- a/man/storr_rds_s3.Rd +++ /dev/null @@ -1,41 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/driver_rds_s3.R -\name{storr_rds_s3} -\alias{storr_rds_s3} -\title{S3 backend for rds object cache driver} -\usage{ -storr_rds_s3(bucket, path, compress = NULL, mangle_key = NULL, - mangle_key_pad = NULL, hash_algorithm = NULL, - default_namespace = "objects") -} -\arguments{ -\item{bucket}{Name of the S3 bucket for which you wish to create of connect -store} - -\item{path}{Path for the store. \code{tempdir()} is a good choice -for ephemeral storage, The \code{rappdirs} package (on CRAN) -might be nice for persistent application data.} - -\item{compress}{Compress the generated file? This saves a small -amount of space for a reasonable amount of time.} - -\item{mangle_key}{Mangle keys? If TRUE, then the key is encoded -using base64 before saving to the filesystem. See Details.} - -\item{mangle_key_pad}{Logical indicating if the filenames created -when using \code{mangle_key} should also be "padded" with the -\code{=} character to make up a round number of bytes. Padding -is required to satisfy the document that describes base64 -encoding (RFC 4648) but can cause problems in some applications -(see \href{{https://github.com/richfitz/storr/issues/43}{this -issue}}. The default is to not pad \emph{new} storr archives. -This should be generally safe to leave alone.} - -\item{hash_algorithm}{Name of the hash algorithm to use. Possible -values are "md5", "sha1", and others supported by -\code{\link{digest}}. If not given, then we will default to -"md5".} - -\item{default_namespace}{Default namespace (see -\code{\link{storr}}).} -} diff --git a/man/storr_s3.Rd b/man/storr_s3.Rd new file mode 100644 index 0000000..e6cecb4 --- /dev/null +++ b/man/storr_s3.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/s3.R +\name{storr_s3} +\alias{storr_s3} +\alias{driver_s3} +\title{S3 backend for rds object cache driver} +\usage{ +storr_s3(bucket, remote_root, ..., path_local = NULL) + +driver_s3(bucket, remote_root, ..., path_local = NULL) +} +\arguments{ +\item{bucket}{Name of the S3 bucket for which you wish to create +of connect store} + +\item{remote_root}{Base path on the remote bucket} + +\item{...}{Additional arguments passed through to +\code{storr::driver_rds}} + +\item{path_local}{Optional path to a local cache (see +\code{storr::driver_remote})} +} From fab9f3fd467833e06cac303017211a96b0c90fb2 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Thu, 3 May 2018 15:04:03 +0100 Subject: [PATCH 19/30] Disable root login --- tests/testthat/sshd/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/testthat/sshd/Dockerfile b/tests/testthat/sshd/Dockerfile index 7d4d088..fe891ea 100644 --- a/tests/testthat/sshd/Dockerfile +++ b/tests/testthat/sshd/Dockerfile @@ -1,3 +1,4 @@ FROM sickp/alpine-sshd:latest RUN ssh-keygen -P "" -f /root/.ssh/id_rsa && \ - cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys + cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys && \ + passwd -d root From acc3af0b2389b7600f9114720c25c1538325682e Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Thu, 3 May 2018 15:11:37 +0100 Subject: [PATCH 20/30] Don't start ssh server if ssh package not found --- tests/testthat/helper-ssh.R | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/testthat/helper-ssh.R b/tests/testthat/helper-ssh.R index 6391277..f1d080c 100644 --- a/tests/testthat/helper-ssh.R +++ b/tests/testthat/helper-ssh.R @@ -4,6 +4,7 @@ HERE <- getwd() start_sshd_server <- function() { testthat::skip_on_cran() + testthat::skip_if_not_installed("ssh") if (!identical(Sys.getenv("STORR_REMOTE_USE_SSHD"), "true")) { skip("Set 'STORR_REMOTE_USE_SSHD' to 'true' to enable sshd tests") } From d2d31cd69eb619c7288c069960ad96175d97eb28 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Thu, 3 May 2018 15:11:51 +0100 Subject: [PATCH 21/30] Docs for running tests --- tests/testthat/README.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 tests/testthat/README.md diff --git a/tests/testthat/README.md b/tests/testthat/README.md new file mode 100644 index 0000000..8983c68 --- /dev/null +++ b/tests/testthat/README.md @@ -0,0 +1,15 @@ +## storr.remote testing + +### ssh + +You will need `docker` installed and running and, the environment variable `STORR_REMOTE_USE_SSHD` set to the string `true`. For example, add + +``` +STORR_REMOTE_USE_SSHD=true +``` + +to your `~/.Renviron`. This will start a docker container called `storr-remote-sshd` that can be connected to using a set of ssh keys. These keys will be copied to disk (in the `tests/testthat/sshd` directory). + +With recent testthat, the `teardown-ssh.R` script will remove the container once tests have finished. + +If the `ssh` package is not installed the server will not be started, as the tests cannot be run. From 6b254525e28b3f3ac98071b2f94710eecc2bfea0 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Thu, 3 May 2018 15:24:46 +0100 Subject: [PATCH 22/30] Update to use dest_dir not dest for download_file --- R/s3.R | 5 +++-- R/ssh.R | 10 +++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/R/s3.R b/R/s3.R index d88ef7f..534a656 100644 --- a/R/s3.R +++ b/R/s3.R @@ -137,9 +137,10 @@ R6_s3_file_ops <- R6::R6Class( bucket = self$bucket) }, - download_file = function(file, dest) { + download_file = function(file, dest_dir) { file_remote <- file.path(self$root, file) - aws.s3::save_object(file_remote, self$bucket, file = dest, + dest_file <- file.path(dest_dir, basename(file)) + aws.s3::save_object(file_remote, self$bucket, file = dest_file, overwrite = TRUE) }, diff --git a/R/ssh.R b/R/ssh.R index 52ba21a..ec57fcd 100644 --- a/R/ssh.R +++ b/R/ssh.R @@ -121,7 +121,7 @@ R6_ssh_file_ops <- R6::R6Class( invisible(file.path(dest_dir, basename(file))) }, - download_file = function(file, dest) { + download_file = function(file, dest_dir) { file_remote <- file.path(self$root, file) tmp <- tempfile() dir.create(tmp) @@ -138,12 +138,12 @@ R6_ssh_file_ops <- R6::R6Class( if (!file.exists(file_local)) { stop("Error downloading file: ", res$message) } - if (is.null(dest)) { + if (is.null(dest_dir)) { readBin(file_local, raw(), file.size(file_local)) } else { - dir.create(dirname(dest), FALSE, TRUE) - file.copy(file_local, dest) - dest + dir.create(dest_dir, FALSE, TRUE) + file.copy(file_local, dest_dir, overwrite = TRUE) + file.path(dest_dir, basename(file)) } }, From 4761f46abd97b4d29b2160f9ce0fcb0b672192df Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Thu, 3 May 2018 15:35:25 +0100 Subject: [PATCH 23/30] Add travis (just on my fork for now) --- .Rbuildignore | 1 + .travis.yml | 25 +++++++++++++++++++++++++ README.md | 5 ++++- 3 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 .travis.yml diff --git a/.Rbuildignore b/.Rbuildignore index 91114bf..112ad26 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,2 +1,3 @@ ^.*\.Rproj$ ^\.Rproj\.user$ +^\.travis\.yml$ diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..d565bbd --- /dev/null +++ b/.travis.yml @@ -0,0 +1,25 @@ +language: r +cache: packages + +# Departure from usual setup because we need docker for testing +sudo: true +service: docker + +# Make tests more reliable by getting this up early +before_script: + - docker pull sickp/alpine-sshd:latest + - docker build tests/testthat/sshd + +# Not 'addon' based because we're on sudo infrastructure +apt_packages: + - libssh-dev + +# Could be done with Remotes: but I don't love that approach +r_github_packages: + - ropensci/ssh + +# Test coverage +r_packages: + - covr +after_success: + - Rscript -e 'covr::codecov()' diff --git a/README.md b/README.md index 3843e82..3708d6d 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,5 @@ # storr.remote -Interface for plugging in remote storage (e.g. AWS S3) to [storr](https://github.com/richfitz/storr) + +[![Travis-CI Build Status](https://travis-ci.org/richfitz/storr.remote.svg?branch=master)](https://travis-ci.org/richfitz/storr.remote) + +Interface for plugging in remote storage (e.g. AWS S3, ssh) to [storr](https://github.com/richfitz/storr) From fa2923f621a347a690f160bba8f09a8041a9fdf8 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Thu, 3 May 2018 15:44:23 +0100 Subject: [PATCH 24/30] Update storr reference --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index d565bbd..8cbba45 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,6 +17,7 @@ apt_packages: # Could be done with Remotes: but I don't love that approach r_github_packages: - ropensci/ssh + - richfitz/storr@i61_remote # Test coverage r_packages: From 60f7bbd8231cfccc07a3cf5a173874c803401198 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Thu, 3 May 2018 15:53:41 +0100 Subject: [PATCH 25/30] Add default_argument documentation --- R/s3.R | 5 ++++- man/storr_s3.Rd | 5 ++++- man/storr_ssh.Rd | 2 ++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/R/s3.R b/R/s3.R index 534a656..018bff2 100644 --- a/R/s3.R +++ b/R/s3.R @@ -10,8 +10,11 @@ ##' ##' @param path_local Optional path to a local cache (see ##' \code{storr::driver_remote}) +##' +##' @param default_namespace Default namespace (see \code{storr::storr}) ##' @export -storr_s3 <- function(bucket, remote_root, ..., path_local = NULL) { +storr_s3 <- function(bucket, remote_root, ..., path_local = NULL, + default_namesopace = "objects") { dr <- driver_s3(bucket, remote_root, ..., path_local = path_local) storr::storr(dr, default_namespace) } diff --git a/man/storr_s3.Rd b/man/storr_s3.Rd index e6cecb4..4d4507e 100644 --- a/man/storr_s3.Rd +++ b/man/storr_s3.Rd @@ -5,7 +5,8 @@ \alias{driver_s3} \title{S3 backend for rds object cache driver} \usage{ -storr_s3(bucket, remote_root, ..., path_local = NULL) +storr_s3(bucket, remote_root, ..., path_local = NULL, + default_namesopace = "objects") driver_s3(bucket, remote_root, ..., path_local = NULL) } @@ -20,4 +21,6 @@ of connect store} \item{path_local}{Optional path to a local cache (see \code{storr::driver_remote})} + +\item{default_namespace}{Default namespace (see \code{storr::storr})} } diff --git a/man/storr_ssh.Rd b/man/storr_ssh.Rd index c346ba1..bd5b97b 100644 --- a/man/storr_ssh.Rd +++ b/man/storr_ssh.Rd @@ -20,6 +20,8 @@ driver_ssh(session, remote_root, ..., path_local = NULL) \item{path_local}{Optional path to a local cache (see \code{storr::driver_remote})} + +\item{default_namespace}{Default namespace (see \code{storr::storr})} } \description{ ssh backend for storr From dc0a7bac95d552be43b249525e4d999d03d35a9f Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Thu, 3 May 2018 15:54:48 +0100 Subject: [PATCH 26/30] Add a licence @ben-gready - you need to be happy with what is selected! If you'd rather something else that's totally fine. I am duplicating licence information from storr itself but you may have different opinions. Please see https://choosealicense.com if this is something you do not yet have strong opinions on --- DESCRIPTION | 2 +- LICENSE | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 LICENSE diff --git a/DESCRIPTION b/DESCRIPTION index 41f5ad9..48d061d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -6,7 +6,7 @@ Description: This package provides remote back ends to the wonderful storr key/val package. Right now AWS S3 is implemented, but we hope it will be easy for others to contribute other backends. Depends: R (>= 3.1.0) -License: What license is it under? +License: MIT + file LICENSE Encoding: UTF-8 LazyData: true Imports: diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..7afe5ed --- /dev/null +++ b/LICENSE @@ -0,0 +1,2 @@ +YEAR: 2018 +COPYRIGHT HOLDER: Ben Gready From f1b85436b6932551835200a91250177ca342f2b4 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Thu, 3 May 2018 16:05:18 +0100 Subject: [PATCH 27/30] Fix typo --- R/s3.R | 2 +- man/storr_s3.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/s3.R b/R/s3.R index 018bff2..d8ddc03 100644 --- a/R/s3.R +++ b/R/s3.R @@ -14,7 +14,7 @@ ##' @param default_namespace Default namespace (see \code{storr::storr}) ##' @export storr_s3 <- function(bucket, remote_root, ..., path_local = NULL, - default_namesopace = "objects") { + default_namespace = "objects") { dr <- driver_s3(bucket, remote_root, ..., path_local = path_local) storr::storr(dr, default_namespace) } diff --git a/man/storr_s3.Rd b/man/storr_s3.Rd index 4d4507e..20095a0 100644 --- a/man/storr_s3.Rd +++ b/man/storr_s3.Rd @@ -6,7 +6,7 @@ \title{S3 backend for rds object cache driver} \usage{ storr_s3(bucket, remote_root, ..., path_local = NULL, - default_namesopace = "objects") + default_namespace = "objects") driver_s3(bucket, remote_root, ..., path_local = NULL) } From 50f7ba072b842f9eedcc852ffcab4c9897b427ed Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Thu, 3 May 2018 16:06:17 +0100 Subject: [PATCH 28/30] Enable tests on travis --- .travis.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.travis.yml b/.travis.yml index 8cbba45..520ac87 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,6 +10,9 @@ before_script: - docker pull sickp/alpine-sshd:latest - docker build tests/testthat/sshd +env: + - STORR_REMOTE_USE_SSHD=true + # Not 'addon' based because we're on sudo infrastructure apt_packages: - libssh-dev From 8058f65a6308c2888a21f258ca39231442c91381 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Thu, 3 May 2018 16:10:27 +0100 Subject: [PATCH 29/30] More QA tweaks --- R/s3.R | 2 ++ man/storr_s3.Rd | 3 +++ 2 files changed, 5 insertions(+) diff --git a/R/s3.R b/R/s3.R index d8ddc03..329b6b8 100644 --- a/R/s3.R +++ b/R/s3.R @@ -1,3 +1,5 @@ +##' S3 backend for storr +##' ##' @title S3 backend for rds object cache driver ##' ##' @param bucket Name of the S3 bucket for which you wish to create diff --git a/man/storr_s3.Rd b/man/storr_s3.Rd index 20095a0..8b1b778 100644 --- a/man/storr_s3.Rd +++ b/man/storr_s3.Rd @@ -24,3 +24,6 @@ of connect store} \item{default_namespace}{Default namespace (see \code{storr::storr})} } +\description{ +S3 backend for storr +} From 1aec88c5c4bfb92a1220b4cb69bbab591a996d56 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Tue, 12 Jun 2018 11:25:57 +0100 Subject: [PATCH 30/30] Everything on cran now --- .travis.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index 520ac87..f294d6b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,11 +17,6 @@ env: apt_packages: - libssh-dev -# Could be done with Remotes: but I don't love that approach -r_github_packages: - - ropensci/ssh - - richfitz/storr@i61_remote - # Test coverage r_packages: - covr