From 4ac688a2edc2953c6386c5d61edcfbdd861e1d32 Mon Sep 17 00:00:00 2001 From: Bart Date: Tue, 18 Nov 2025 17:13:12 +0100 Subject: [PATCH 1/4] fix #135 --- NEWS.md | 2 + R/get_vpts.R | 17 +++-- R/get_vpts_local.R | 64 ++++++++++++++++ R/utils_vpts_aloft.R | 29 +------ R/zzz.R | 28 +++++++ man/get_vpts.Rd | 6 +- tests/testthat/test-get_vpts_local.R | 109 +++++++++++++++++++++++++++ 7 files changed, 222 insertions(+), 33 deletions(-) create mode 100644 R/get_vpts_local.R create mode 100644 tests/testthat/test-get_vpts_local.R diff --git a/NEWS.md b/NEWS.md index 1b22ce92..c6ad26fb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # getRad (development version) +* Implement reading vpts data from a local directory (#135). + # getRad 0.2.3 * Improve error for requesting German data out of temporal restrictions (#131). diff --git a/R/get_vpts.R b/R/get_vpts.R index 5a526b8f..d95c3f2b 100644 --- a/R/get_vpts.R +++ b/R/get_vpts.R @@ -19,7 +19,11 @@ #' - A [lubridate::interval()], between which all data files are downloaded. #' @param source Source of the data. One of `"baltrad"`, `"uva"`, `"ecog-04003"` #' or `"rmi"`. Only one source can be queried at a time. If not provided, -#' `"baltrad"` is used. +#' `"baltrad"` is used. Alternatively a local directory can be specified. +#' In that case data is read from the directory, file in the directory +#' should be structures like they are in the monthly folders of the aloft +#' repository. To specify an alternative structure the +#' `"getRad.vpts_local_path_format"` option can be used. #' @param return_type Type of object that should be returned. Either: #' - `"vpts"`: vpts object(s) (default). #' - `"tibble"`: a [dplyr::tibble()]. @@ -102,11 +106,11 @@ get_vpts <- function( # Get the default value of the source arg, even if the user provided # a different value. supported_sources <- eval(formals()$source) - if (!source %in% supported_sources) { + if (!(source %in% supported_sources | dir.exists(source))) { cli::cli_abort( glue::glue( "Invalid source {glue::backtick(source)} provided. Possible values are: - {possible_sources}.", + {possible_sources} or a local directory.", possible_sources = glue::glue_collapse( glue::backtick(supported_sources), sep = ", ", @@ -198,7 +202,9 @@ get_vpts <- function( switch( dplyr::case_when( source == "rmi" ~ "rmi", - source %in% eval(formals("get_vpts_aloft")$source) ~ "aloft" + source %in% eval(formals("get_vpts_aloft")$source) ~ "aloft", + # this is the last option to avoid using a local source if an online exists + dir.exists(source) ~ "local" ), rmi = purrr::map( radar, @@ -213,7 +219,8 @@ get_vpts <- function( source = source ), .purrr_error_call = cl - ) + ), + local = get_vpts_local(radar, rounded_interval, directory = source) ) |> radar_to_name() diff --git a/R/get_vpts_local.R b/R/get_vpts_local.R new file mode 100644 index 00000000..4d1e616a --- /dev/null +++ b/R/get_vpts_local.R @@ -0,0 +1,64 @@ +get_vpts_local <- function( + radar, + rounded_interval, + directory +) { + dates <- as.Date(seq( + lubridate::int_start(rounded_interval), + lubridate::int_end(rounded_interval), + "day" + )) + file_paths <- radar |> + purrr::map( + ~ unique(glue::glue( + getOption( + "getRad.vpts_local_path_format", + default = "{radar}/{year}/{radar}_vpts_{year}{sprintf('%02i',month)}.csv.gz" + ), + radar = .x, + year = lubridate::year(dates), + month = lubridate::month(dates), + day = lubridate::day(dates), + datetime = strftime(dates, "%Y%m%d%H%M", tz = "UTC") + )) + ) |> + purrr::set_names(radar) + full_paths <- purrr::map(file_paths, ~ file.path(directory, .x)) + s <- purrr::map(full_paths, file.exists) + if (all(!unlist(s))) { + cli::cli_abort( + c( + x = "None of the expected files are in the source directory ({.file {directory}}).", + i = "The following files were expected: {.file {unlist(full_paths)}}." + ), + class = "getRad_error_files_not_in_source_dir" + ) + } + if (any(!unlist(s))) { + missing_files <- unlist(purrr::map2(full_paths, s, ~ .x[!.y])) + cli::cli_warn( + c( + x = "Some of the expected files are in the source directory ({.file {directory}}).", + i = "The following files were expected but not found: {.file {missing_files}}.", + i = "These files are considered missing data and therefore omitted from the results." + ), + missing_files = missing_files, + class = "getRad_warning_some_files_not_in_source_dir" + ) + } + any_file <- purrr::map_lgl(s, any) + purrr::map2( + full_paths[any_file], + s[any_file], + ~ vroom::vroom( + .x[.y], + col_types = getOption( + "getRad.vpts_col_types" + ), + show_col_types = NULL, + progress = FALSE + ) |> + tibble::add_column(source = directory) |> + dplyr::mutate(dplyr::across("radar", as.character)) + ) +} diff --git a/R/utils_vpts_aloft.R b/R/utils_vpts_aloft.R index 8e9fec1d..9e8a5e6a 100644 --- a/R/utils_vpts_aloft.R +++ b/R/utils_vpts_aloft.R @@ -35,33 +35,8 @@ read_vpts_from_url <- function(urls, use_cache = TRUE) { vroom::vroom( delim = ",", I(.x), - col_types = list( - radar = vroom::col_factor(), - datetime = vroom::col_datetime(), - height = vroom::col_integer(), - u = vroom::col_double(), - v = vroom::col_double(), - w = vroom::col_double(), - ff = vroom::col_double(), - dd = vroom::col_double(), - sd_vvp = vroom::col_double(), - gap = vroom::col_logical(), - eta = vroom::col_double(), - dens = vroom::col_double(), - dbz = vroom::col_double(), - dbz_all = vroom::col_double(), - n = vroom::col_integer(), - n_dbz = vroom::col_integer(), - n_all = vroom::col_integer(), - n_dbz_all = vroom::col_integer(), - rcs = vroom::col_double(), - sd_vvp_threshold = vroom::col_double(), - vcp = vroom::col_integer(), - radar_longitude = vroom::col_double(), - radar_latitude = vroom::col_double(), - radar_height = vroom::col_integer(), - radar_wavelength = vroom::col_double(), - source_file = vroom::col_character() + col_types = getOption( + "getRad.vpts_col_types" ), show_col_types = NULL, progress = FALSE diff --git a/R/zzz.R b/R/zzz.R index 676f966d..c5bfef36 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -24,6 +24,34 @@ getRad.cache = cachem::cache_mem( max_size = 128 * 1024^2, max_age = 60^2 * 24 + ), + getRad.vpts_col_types = list( + radar = vroom::col_factor(), + datetime = vroom::col_datetime(), + height = vroom::col_integer(), + u = vroom::col_double(), + v = vroom::col_double(), + w = vroom::col_double(), + ff = vroom::col_double(), + dd = vroom::col_double(), + sd_vvp = vroom::col_double(), + gap = vroom::col_logical(), + eta = vroom::col_double(), + dens = vroom::col_double(), + dbz = vroom::col_double(), + dbz_all = vroom::col_double(), + n = vroom::col_integer(), + n_dbz = vroom::col_integer(), + n_all = vroom::col_integer(), + n_dbz_all = vroom::col_integer(), + rcs = vroom::col_double(), + sd_vvp_threshold = vroom::col_double(), + vcp = vroom::col_integer(), + radar_longitude = vroom::col_double(), + radar_latitude = vroom::col_double(), + radar_height = vroom::col_integer(), + radar_wavelength = vroom::col_double(), + source_file = vroom::col_character() ) ) toset <- !(names(op.getRad) %in% names(op)) diff --git a/man/get_vpts.Rd b/man/get_vpts.Rd index 1ce6d197..9c325258 100644 --- a/man/get_vpts.Rd +++ b/man/get_vpts.Rd @@ -28,7 +28,11 @@ downloaded. \item{source}{Source of the data. One of \code{"baltrad"}, \code{"uva"}, \code{"ecog-04003"} or \code{"rmi"}. Only one source can be queried at a time. If not provided, -\code{"baltrad"} is used.} +\code{"baltrad"} is used. Alternatively a local directory can be specified. +In that case data is read from the directory, file in the directory +should be structures like they are in the monthly folders of the aloft +repository. To specify an alternative structure the +\code{"getRad.vpts_local_path_format"} option can be used.} \item{return_type}{Type of object that should be returned. Either: \itemize{ diff --git a/tests/testthat/test-get_vpts_local.R b/tests/testthat/test-get_vpts_local.R new file mode 100644 index 00000000..6d081a6c --- /dev/null +++ b/tests/testthat/test-get_vpts_local.R @@ -0,0 +1,109 @@ +dir <- tempdir() +skip_if_offline("aloftdata.s3-eu-west-1.amazonaws.com") +dir.create(file.path(dir, "local_tests", "bewid", "2016"), recursive = T) +dir.create(file.path(dir, "local_tests", "bejab", "2016"), recursive = T) +local_dir <- file.path(dir, "local_tests") +files <- c( + "bewid/2016/bewid_vpts_20160201.csv", + "bewid/2016/bewid_vpts_20160202.csv", + "bejab/2016/bejab_vpts_20160201.csv", + "bejab/2016/bejab_vpts_20160202.csv" +) +for (i in files) { + download.file( + paste0("https://aloftdata.s3-eu-west-1.amazonaws.com/uva/daily/", i), + file.path(dir, "local_tests", i), + quiet = TRUE + ) +} +test_that("can read data", { + withr::with_options( + c( + "getRad.vpts_local_path_format" = "{radar}/{year}/{radar}_vpts_{year}{sprintf('%02i',month)}{sprintf('%02i',day)}.csv" + ), + { + expect_s3_class( + get_vpts("bewid", as.Date("2016-2-1"), source = local_dir), + class = "vpts" + ) + expect_type( + ret <- get_vpts( + c("bewid", "bejab"), + as.Date("2016-2-1"), + source = local_dir + ), + "list" + ) + expect_length(ret, 2L) + expect_s3_class(ret[[1]], class = "vpts") + expect_s3_class(ret[[2]], class = "vpts") + } + ) +}) + + +test_that("errors outside of range or directory with corect format", { + withr::with_options( + c( + "getRad.vpts_local_path_format" = "{radar}/{year}/{radar}_vpts_{year}{sprintf('%02i',month)}{sprintf('%02i',day)}.csv" + ), + { + expect_error( + get_vpts("bewid", as.Date("2016-2-1"), source = dir), + class = "getRad_error_files_not_in_source_dir" + ) + expect_error( + get_vpts("bewid", as.Date("2016-3-1"), source = local_dir), + class = "getRad_error_files_not_in_source_dir" + ) + expect_warning( + ret <- get_vpts( + "bewid", + as.Date("2016-2-1") + -3:1, + source = local_dir + ), + class = "getRad_warning_some_files_not_in_source_dir" + ) + expect_s3_class(ret, "vpts") + + expect_warning( + ret <- get_vpts( + c("bewid", "behav"), + as.Date("2016-2-1"), + source = local_dir + ), + class = "getRad_warning_some_files_not_in_source_dir" + ) + expect_s3_class(ret, "vpts") + + expect_true(all(grepl( + "local_tests/behav/2016/behav_vpts_2016020[1|2].csv", + rlang::catch_cnd( + get_vpts( + c("bewid", "behav"), + as.Date("2016-2-1"), + source = local_dir + ), + class = "getRad_warning_some_files_not_in_source_dir" + )$missing_files + ))) + } + ) +}) + +test_that("error on non existing files", { + expect_error( + get_vpts("bewid", as.Date("2016-2-1"), source = local_dir), + class = "getRad_error_files_not_in_source_dir" + ) + expect_error( + get_vpts("bewid", as.Date("2016-2-10"), source = local_dir), + class = "getRad_error_files_not_in_source_dir" + ) + # test if by default the aloft file structure is used + expect_error( + get_vpts("bewid", as.Date("2016-2-10"), source = local_dir), + "local_tests/bewid/2016/bewid_vpts_201602.csv.gz" + ) +}) +unlink(local_dir) From a087b2f969f79def1e41842a4a232b4fa34a988c Mon Sep 17 00:00:00 2001 From: Bart Date: Tue, 18 Nov 2025 17:47:15 +0100 Subject: [PATCH 2/4] more documentation on formatter --- NEWS.md | 2 +- R/get_vpts.R | 24 +++++++++++++++++++----- R/get_vpts_local.R | 8 ++++---- man/get_vpts.Rd | 25 ++++++++++++++++++++----- 4 files changed, 44 insertions(+), 15 deletions(-) diff --git a/NEWS.md b/NEWS.md index c6ad26fb..c501d270 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,6 @@ # getRad (development version) -* Implement reading vpts data from a local directory (#135). +* Implement reading `vpts` data from a local directory (#135). # getRad 0.2.3 diff --git a/R/get_vpts.R b/R/get_vpts.R index d95c3f2b..6b1409a6 100644 --- a/R/get_vpts.R +++ b/R/get_vpts.R @@ -7,6 +7,23 @@ #' @details #' For more details on supported sources, see `vignette("supported_sources")`. #' +#' In that case data is read from the directory, file in the directory +#' should be structures like they are in the monthly folders of the aloft +#' repository. To specify an alternative structure the +#' `"getRad.vpts_local_path_format"` option can be used. This can, for +#' example, be used to read daily data. Some example options for the glue +#' formatters are: +#' +#' * `"{radar}/{year}/{radar}_vpts_{year}{month}.csv.gz"`: The default format, +#' the same structure as the monthly directories in the aloft repository. Or as +#' contained in the `tgz` files in the aloft zenodo repository. +#' * `"{substr(radar, 1,2)}/{radar}/{year}/{radar}_vpts_{year}{month}.csv.gz"`: +#' The format as in the files in the zenodo aloft repository +#' * `"{radar}/{year}/{radar}_vpts_{year}{month}{day}.csv"`: The format as daily +#' data is stored in aloft data +#' +#' Besides the examples above there is a `date` object available for formatting. +#' #' @inheritParams get_pvol #' @inherit get_vpts_aloft details #' @param datetime Either: @@ -19,11 +36,8 @@ #' - A [lubridate::interval()], between which all data files are downloaded. #' @param source Source of the data. One of `"baltrad"`, `"uva"`, `"ecog-04003"` #' or `"rmi"`. Only one source can be queried at a time. If not provided, -#' `"baltrad"` is used. Alternatively a local directory can be specified. -#' In that case data is read from the directory, file in the directory -#' should be structures like they are in the monthly folders of the aloft -#' repository. To specify an alternative structure the -#' `"getRad.vpts_local_path_format"` option can be used. +#' `"baltrad"` is used. Alternatively a local directory can be specified, +#' see details for an explanation of the file format. #' @param return_type Type of object that should be returned. Either: #' - `"vpts"`: vpts object(s) (default). #' - `"tibble"`: a [dplyr::tibble()]. diff --git a/R/get_vpts_local.R b/R/get_vpts_local.R index 4d1e616a..cc79ba72 100644 --- a/R/get_vpts_local.R +++ b/R/get_vpts_local.R @@ -13,13 +13,13 @@ get_vpts_local <- function( ~ unique(glue::glue( getOption( "getRad.vpts_local_path_format", - default = "{radar}/{year}/{radar}_vpts_{year}{sprintf('%02i',month)}.csv.gz" + default = "{radar}/{year}/{radar}_vpts_{year}{month}.csv.gz" ), radar = .x, year = lubridate::year(dates), - month = lubridate::month(dates), - day = lubridate::day(dates), - datetime = strftime(dates, "%Y%m%d%H%M", tz = "UTC") + month = sprintf("%02i", lubridate::month(dates)), + day = sprintf("%02i", lubridate::day(dates)), + date = dates )) ) |> purrr::set_names(radar) diff --git a/man/get_vpts.Rd b/man/get_vpts.Rd index 9c325258..dd6512a7 100644 --- a/man/get_vpts.Rd +++ b/man/get_vpts.Rd @@ -28,11 +28,8 @@ downloaded. \item{source}{Source of the data. One of \code{"baltrad"}, \code{"uva"}, \code{"ecog-04003"} or \code{"rmi"}. Only one source can be queried at a time. If not provided, -\code{"baltrad"} is used. Alternatively a local directory can be specified. -In that case data is read from the directory, file in the directory -should be structures like they are in the monthly folders of the aloft -repository. To specify an alternative structure the -\code{"getRad.vpts_local_path_format"} option can be used.} +\code{"baltrad"} is used. Alternatively a local directory can be specified, +see details for an explanation of the file format.} \item{return_type}{Type of object that should be returned. Either: \itemize{ @@ -51,6 +48,24 @@ as a (list of) of \link[bioRad:summary.vpts]{vpts objects} or a } \details{ For more details on supported sources, see \code{vignette("supported_sources")}. + +In that case data is read from the directory, file in the directory +should be structures like they are in the monthly folders of the aloft +repository. To specify an alternative structure the +\code{"getRad.vpts_local_path_format"} option can be used. This can, for +example, be used to read daily data. Some example options for the glue +formatters are: +\itemize{ +\item \code{"{radar}/{year}/{radar}_vpts_{year}{month}.csv.gz"}: The default format, +the same structure as the monthly directories in the aloft repository. Or as +contained in the \code{tgz} files in the aloft zenodo repository. +\item \code{"{substr(radar, 1,2)}/{radar}/{year}/{radar}_vpts_{year}{month}.csv.gz"}: +The format as in the files in the zenodo aloft repository +\item \code{"{radar}/{year}/{radar}_vpts_{year}{month}{day}.csv"}: The format as daily +data is stored in aloft data +} + +Besides the examples above there is a \code{date} object available for formatting. } \examples{ \dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} From 98d57a74c65b052ccb0d137ab320103e2c068d58 Mon Sep 17 00:00:00 2001 From: Bart Date: Tue, 18 Nov 2025 18:01:00 +0100 Subject: [PATCH 3/4] correct caller env --- R/get_vpts_local.R | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/R/get_vpts_local.R b/R/get_vpts_local.R index cc79ba72..83debd8a 100644 --- a/R/get_vpts_local.R +++ b/R/get_vpts_local.R @@ -1,7 +1,9 @@ get_vpts_local <- function( radar, rounded_interval, - directory + directory, + ..., + call = rlang::caller_env() ) { dates <- as.Date(seq( lubridate::int_start(rounded_interval), @@ -31,7 +33,8 @@ get_vpts_local <- function( x = "None of the expected files are in the source directory ({.file {directory}}).", i = "The following files were expected: {.file {unlist(full_paths)}}." ), - class = "getRad_error_files_not_in_source_dir" + class = "getRad_error_files_not_in_source_dir", + call = call ) } if (any(!unlist(s))) { @@ -43,7 +46,8 @@ get_vpts_local <- function( i = "These files are considered missing data and therefore omitted from the results." ), missing_files = missing_files, - class = "getRad_warning_some_files_not_in_source_dir" + class = "getRad_warning_some_files_not_in_source_dir", + call = call ) } any_file <- purrr::map_lgl(s, any) From ff7634c1194605eb2ee4e99ea19ef72a9ae81257 Mon Sep 17 00:00:00 2001 From: Bart Date: Thu, 20 Nov 2025 10:54:36 +0100 Subject: [PATCH 4/4] test fix --- tests/testthat/test-get_vpts_local.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-get_vpts_local.R b/tests/testthat/test-get_vpts_local.R index 6d081a6c..8271b604 100644 --- a/tests/testthat/test-get_vpts_local.R +++ b/tests/testthat/test-get_vpts_local.R @@ -19,7 +19,7 @@ for (i in files) { test_that("can read data", { withr::with_options( c( - "getRad.vpts_local_path_format" = "{radar}/{year}/{radar}_vpts_{year}{sprintf('%02i',month)}{sprintf('%02i',day)}.csv" + "getRad.vpts_local_path_format" = "{radar}/{year}/{radar}_vpts_{year}{month}{day}.csv" ), { expect_s3_class( @@ -45,7 +45,7 @@ test_that("can read data", { test_that("errors outside of range or directory with corect format", { withr::with_options( c( - "getRad.vpts_local_path_format" = "{radar}/{year}/{radar}_vpts_{year}{sprintf('%02i',month)}{sprintf('%02i',day)}.csv" + "getRad.vpts_local_path_format" = "{radar}/{year}/{radar}_vpts_{year}{month}{day}.csv" ), { expect_error(