diff --git a/NEWS.md b/NEWS.md index f9362e0f..28e52c71 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,7 @@ * Make error messages more consistent (#146). * Restore access to Estonian data. +* Implement reading `vpts` data from a local directory (#135). * Prevent leaving temporary files for downloading Dutch polar volume data (#148). * Denmark does not require authentication anymore, removed from the package (#154). diff --git a/R/get_vpts.R b/R/get_vpts.R index f49b42e0..9b3826c0 100644 --- a/R/get_vpts.R +++ b/R/get_vpts.R @@ -7,6 +7,23 @@ #' @details #' For more details on supported sources, see `vignette("supported_sources")`. #' +#' In that case data is read from the directory, file in the directory +#' should be structures like they are in the monthly folders of the aloft +#' repository. To specify an alternative structure the +#' `"getRad.vpts_local_path_format"` option can be used. This can, for +#' example, be used to read daily data. Some example options for the glue +#' formatters are: +#' +#' * `"{radar}/{year}/{radar}_vpts_{year}{month}.csv.gz"`: The default format, +#' the same structure as the monthly directories in the aloft repository. Or as +#' contained in the `tgz` files in the aloft zenodo repository. +#' * `"{substr(radar, 1,2)}/{radar}/{year}/{radar}_vpts_{year}{month}.csv.gz"`: +#' The format as in the files in the zenodo aloft repository +#' * `"{radar}/{year}/{radar}_vpts_{year}{month}{day}.csv"`: The format as daily +#' data is stored in aloft data +#' +#' Besides the examples above there is a `date` object available for formatting. +#' #' @inheritParams get_pvol #' @inherit get_vpts_aloft details #' @param datetime Either: @@ -19,7 +36,8 @@ #' - A [lubridate::interval()], between which all data files are downloaded. #' @param source Source of the data. One of `"baltrad"`, `"uva"`, `"ecog-04003"` #' or `"rmi"`. Only one source can be queried at a time. If not provided, -#' `"baltrad"` is used. +#' `"baltrad"` is used. Alternatively a local directory can be specified, +#' see details for an explanation of the file format. #' @param return_type Type of object that should be returned. Either: #' - `"vpts"`: vpts object(s) (default). #' - `"tibble"`: a [dplyr::tibble()]. @@ -98,11 +116,12 @@ get_vpts <- function( # Get the default value of the source arg, even if the user provided # a different value. - if (!source %in% supported_sources) { + supported_sources <- eval(formals()$source) + if (!(source %in% supported_sources | dir.exists(source))) { cli::cli_abort( c( "{.arg source} {.val {source}} is invalid.", - "i" = "Supported sources: {.val {supported_sources}}." + "i" = "Supported sources: {.val {supported_sources}} or a local directory." ), class = "getRad_error_source_invalid" ) @@ -190,7 +209,9 @@ get_vpts <- function( switch( dplyr::case_when( source == "rmi" ~ "rmi", - source %in% eval(formals("get_vpts_aloft")$source) ~ "aloft" + source %in% eval(formals("get_vpts_aloft")$source) ~ "aloft", + # this is the last option to avoid using a local source if an online exists + dir.exists(source) ~ "local" ), rmi = purrr::map( radar, @@ -205,7 +226,8 @@ get_vpts <- function( source = source ), .purrr_error_call = cl - ) + ), + local = get_vpts_local(radar, rounded_interval, directory = source) ) |> radar_to_name() diff --git a/R/get_vpts_local.R b/R/get_vpts_local.R new file mode 100644 index 00000000..83debd8a --- /dev/null +++ b/R/get_vpts_local.R @@ -0,0 +1,68 @@ +get_vpts_local <- function( + radar, + rounded_interval, + directory, + ..., + call = rlang::caller_env() +) { + dates <- as.Date(seq( + lubridate::int_start(rounded_interval), + lubridate::int_end(rounded_interval), + "day" + )) + file_paths <- radar |> + purrr::map( + ~ unique(glue::glue( + getOption( + "getRad.vpts_local_path_format", + default = "{radar}/{year}/{radar}_vpts_{year}{month}.csv.gz" + ), + radar = .x, + year = lubridate::year(dates), + month = sprintf("%02i", lubridate::month(dates)), + day = sprintf("%02i", lubridate::day(dates)), + date = dates + )) + ) |> + purrr::set_names(radar) + full_paths <- purrr::map(file_paths, ~ file.path(directory, .x)) + s <- purrr::map(full_paths, file.exists) + if (all(!unlist(s))) { + cli::cli_abort( + c( + x = "None of the expected files are in the source directory ({.file {directory}}).", + i = "The following files were expected: {.file {unlist(full_paths)}}." + ), + class = "getRad_error_files_not_in_source_dir", + call = call + ) + } + if (any(!unlist(s))) { + missing_files <- unlist(purrr::map2(full_paths, s, ~ .x[!.y])) + cli::cli_warn( + c( + x = "Some of the expected files are in the source directory ({.file {directory}}).", + i = "The following files were expected but not found: {.file {missing_files}}.", + i = "These files are considered missing data and therefore omitted from the results." + ), + missing_files = missing_files, + class = "getRad_warning_some_files_not_in_source_dir", + call = call + ) + } + any_file <- purrr::map_lgl(s, any) + purrr::map2( + full_paths[any_file], + s[any_file], + ~ vroom::vroom( + .x[.y], + col_types = getOption( + "getRad.vpts_col_types" + ), + show_col_types = NULL, + progress = FALSE + ) |> + tibble::add_column(source = directory) |> + dplyr::mutate(dplyr::across("radar", as.character)) + ) +} diff --git a/R/utils_vpts_aloft.R b/R/utils_vpts_aloft.R index 8e9fec1d..9e8a5e6a 100644 --- a/R/utils_vpts_aloft.R +++ b/R/utils_vpts_aloft.R @@ -35,33 +35,8 @@ read_vpts_from_url <- function(urls, use_cache = TRUE) { vroom::vroom( delim = ",", I(.x), - col_types = list( - radar = vroom::col_factor(), - datetime = vroom::col_datetime(), - height = vroom::col_integer(), - u = vroom::col_double(), - v = vroom::col_double(), - w = vroom::col_double(), - ff = vroom::col_double(), - dd = vroom::col_double(), - sd_vvp = vroom::col_double(), - gap = vroom::col_logical(), - eta = vroom::col_double(), - dens = vroom::col_double(), - dbz = vroom::col_double(), - dbz_all = vroom::col_double(), - n = vroom::col_integer(), - n_dbz = vroom::col_integer(), - n_all = vroom::col_integer(), - n_dbz_all = vroom::col_integer(), - rcs = vroom::col_double(), - sd_vvp_threshold = vroom::col_double(), - vcp = vroom::col_integer(), - radar_longitude = vroom::col_double(), - radar_latitude = vroom::col_double(), - radar_height = vroom::col_integer(), - radar_wavelength = vroom::col_double(), - source_file = vroom::col_character() + col_types = getOption( + "getRad.vpts_col_types" ), show_col_types = NULL, progress = FALSE diff --git a/R/zzz.R b/R/zzz.R index 676f966d..c5bfef36 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -24,6 +24,34 @@ getRad.cache = cachem::cache_mem( max_size = 128 * 1024^2, max_age = 60^2 * 24 + ), + getRad.vpts_col_types = list( + radar = vroom::col_factor(), + datetime = vroom::col_datetime(), + height = vroom::col_integer(), + u = vroom::col_double(), + v = vroom::col_double(), + w = vroom::col_double(), + ff = vroom::col_double(), + dd = vroom::col_double(), + sd_vvp = vroom::col_double(), + gap = vroom::col_logical(), + eta = vroom::col_double(), + dens = vroom::col_double(), + dbz = vroom::col_double(), + dbz_all = vroom::col_double(), + n = vroom::col_integer(), + n_dbz = vroom::col_integer(), + n_all = vroom::col_integer(), + n_dbz_all = vroom::col_integer(), + rcs = vroom::col_double(), + sd_vvp_threshold = vroom::col_double(), + vcp = vroom::col_integer(), + radar_longitude = vroom::col_double(), + radar_latitude = vroom::col_double(), + radar_height = vroom::col_integer(), + radar_wavelength = vroom::col_double(), + source_file = vroom::col_character() ) ) toset <- !(names(op.getRad) %in% names(op)) diff --git a/man/get_vpts.Rd b/man/get_vpts.Rd index 1ce6d197..dd6512a7 100644 --- a/man/get_vpts.Rd +++ b/man/get_vpts.Rd @@ -28,7 +28,8 @@ downloaded. \item{source}{Source of the data. One of \code{"baltrad"}, \code{"uva"}, \code{"ecog-04003"} or \code{"rmi"}. Only one source can be queried at a time. If not provided, -\code{"baltrad"} is used.} +\code{"baltrad"} is used. Alternatively a local directory can be specified, +see details for an explanation of the file format.} \item{return_type}{Type of object that should be returned. Either: \itemize{ @@ -47,6 +48,24 @@ as a (list of) of \link[bioRad:summary.vpts]{vpts objects} or a } \details{ For more details on supported sources, see \code{vignette("supported_sources")}. + +In that case data is read from the directory, file in the directory +should be structures like they are in the monthly folders of the aloft +repository. To specify an alternative structure the +\code{"getRad.vpts_local_path_format"} option can be used. This can, for +example, be used to read daily data. Some example options for the glue +formatters are: +\itemize{ +\item \code{"{radar}/{year}/{radar}_vpts_{year}{month}.csv.gz"}: The default format, +the same structure as the monthly directories in the aloft repository. Or as +contained in the \code{tgz} files in the aloft zenodo repository. +\item \code{"{substr(radar, 1,2)}/{radar}/{year}/{radar}_vpts_{year}{month}.csv.gz"}: +The format as in the files in the zenodo aloft repository +\item \code{"{radar}/{year}/{radar}_vpts_{year}{month}{day}.csv"}: The format as daily +data is stored in aloft data +} + +Besides the examples above there is a \code{date} object available for formatting. } \examples{ \dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} diff --git a/tests/testthat/test-get_vpts_local.R b/tests/testthat/test-get_vpts_local.R new file mode 100644 index 00000000..8271b604 --- /dev/null +++ b/tests/testthat/test-get_vpts_local.R @@ -0,0 +1,109 @@ +dir <- tempdir() +skip_if_offline("aloftdata.s3-eu-west-1.amazonaws.com") +dir.create(file.path(dir, "local_tests", "bewid", "2016"), recursive = T) +dir.create(file.path(dir, "local_tests", "bejab", "2016"), recursive = T) +local_dir <- file.path(dir, "local_tests") +files <- c( + "bewid/2016/bewid_vpts_20160201.csv", + "bewid/2016/bewid_vpts_20160202.csv", + "bejab/2016/bejab_vpts_20160201.csv", + "bejab/2016/bejab_vpts_20160202.csv" +) +for (i in files) { + download.file( + paste0("https://aloftdata.s3-eu-west-1.amazonaws.com/uva/daily/", i), + file.path(dir, "local_tests", i), + quiet = TRUE + ) +} +test_that("can read data", { + withr::with_options( + c( + "getRad.vpts_local_path_format" = "{radar}/{year}/{radar}_vpts_{year}{month}{day}.csv" + ), + { + expect_s3_class( + get_vpts("bewid", as.Date("2016-2-1"), source = local_dir), + class = "vpts" + ) + expect_type( + ret <- get_vpts( + c("bewid", "bejab"), + as.Date("2016-2-1"), + source = local_dir + ), + "list" + ) + expect_length(ret, 2L) + expect_s3_class(ret[[1]], class = "vpts") + expect_s3_class(ret[[2]], class = "vpts") + } + ) +}) + + +test_that("errors outside of range or directory with corect format", { + withr::with_options( + c( + "getRad.vpts_local_path_format" = "{radar}/{year}/{radar}_vpts_{year}{month}{day}.csv" + ), + { + expect_error( + get_vpts("bewid", as.Date("2016-2-1"), source = dir), + class = "getRad_error_files_not_in_source_dir" + ) + expect_error( + get_vpts("bewid", as.Date("2016-3-1"), source = local_dir), + class = "getRad_error_files_not_in_source_dir" + ) + expect_warning( + ret <- get_vpts( + "bewid", + as.Date("2016-2-1") + -3:1, + source = local_dir + ), + class = "getRad_warning_some_files_not_in_source_dir" + ) + expect_s3_class(ret, "vpts") + + expect_warning( + ret <- get_vpts( + c("bewid", "behav"), + as.Date("2016-2-1"), + source = local_dir + ), + class = "getRad_warning_some_files_not_in_source_dir" + ) + expect_s3_class(ret, "vpts") + + expect_true(all(grepl( + "local_tests/behav/2016/behav_vpts_2016020[1|2].csv", + rlang::catch_cnd( + get_vpts( + c("bewid", "behav"), + as.Date("2016-2-1"), + source = local_dir + ), + class = "getRad_warning_some_files_not_in_source_dir" + )$missing_files + ))) + } + ) +}) + +test_that("error on non existing files", { + expect_error( + get_vpts("bewid", as.Date("2016-2-1"), source = local_dir), + class = "getRad_error_files_not_in_source_dir" + ) + expect_error( + get_vpts("bewid", as.Date("2016-2-10"), source = local_dir), + class = "getRad_error_files_not_in_source_dir" + ) + # test if by default the aloft file structure is used + expect_error( + get_vpts("bewid", as.Date("2016-2-10"), source = local_dir), + "local_tests/bewid/2016/bewid_vpts_201602.csv.gz" + ) +}) +unlink(local_dir)