diff --git a/.Rbuildignore b/.Rbuildignore index bf5b60a..8206eb8 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -19,4 +19,5 @@ ^CODE_OF_CONDUCT\.md$ ^src/.*\.o$ ^dev\.R$ -^paper$ \ No newline at end of file +^paper$ +^\.cursor$ \ No newline at end of file diff --git a/CODE_OF_CONDUCT.md b/.github/CODE_OF_CONDUCT.md similarity index 100% rename from CODE_OF_CONDUCT.md rename to .github/CODE_OF_CONDUCT.md diff --git a/.gitignore b/.gitignore index 8b89e3a..622fb3b 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,5 @@ docs *.so src/tidyhydro.dll tidyhydro.md + +.cursor/ \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index b1cc2b3..6d5840e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: tidyhydro Type: Package Title: Tidy Metrics for Assessing Hydrological Models Performance -Version: 0.1.2 +Version: 0.1.2.9000 Authors@R: person(given = "Anatoly", family = "Tsyplenkov", email = "atsyplenkov@fastmail.com", role = c("cre", "aut", "cph"), comment = c(ORCID = "0000-0003-4144-8402")) Maintainer: Anatoly Tsyplenkov @@ -9,10 +9,11 @@ Description: Provides tidy tools to measure the characteristics of hydrological License: MIT + file LICENSE Depends: R (>= 4.1.0) Imports: + cli, + checkmate (>= 2.3.1), Rcpp (>= 1.0.12), rlang (>= 1.1.0), yardstick (>= 1.3.1), - checkmate (>= 2.3.1) LinkingTo: Rcpp Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.2 @@ -21,7 +22,8 @@ Language: en-US Suggests: hydroGOF, testthat (>= 3.0.0), - quickcheck (>= 0.1.3) + quickcheck (>= 0.1.3), + dplyr Config/testthat/edition: 3 URL: https://github.com/atsyplenkov/tidyhydro, https://atsyplenkov.github.io/tidyhydro/ BugReports: https://github.com/atsyplenkov/tidyhydro/issues diff --git a/NAMESPACE b/NAMESPACE index ed7190e..bc3c789 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,8 @@ importFrom(Rcpp, evalCpp) S3method(print, measure) S3method(format, measure) +S3method(print, measure_set) +S3method(format, measure_set) # general functions export(nse) @@ -19,6 +21,7 @@ export(press) export(sfe) export(cv) export(gm) +export(measure_set) # data.frame methods S3method(nse, data.frame) diff --git a/NEWS.md b/NEWS.md index 6c81bf3..2b8ef60 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# tidyhydro (development version) + +## New features +- Added `measure_set` function to combine different descriptive statistics, of class `measure` + # tidyhydro 0.1.2 ## New features diff --git a/R/aaa-new.R b/R/aaa-new.R index b821a24..607ac92 100644 --- a/R/aaa-new.R +++ b/R/aaa-new.R @@ -1,7 +1,3 @@ -# TODO: -# - Add hyperlink to `measure_set` -# - Add tests - # Modified after https://github.com/tidymodels/yardstick/blob/main/R/aaa-new.R #' Construct a new measure function @@ -9,17 +5,18 @@ #' #' @description #' These functions provide convenient wrappers to create the three types of -#' measure functions in `tidyhydro`: measures of central tendency, variability -#' and symmetry. They add a measure-specific class to `fn` and -#' mimic a behaviour of [metric_set][yardstick::metric_set]. These features -#' are used by measure_set. +#' descriptive statistics functions in `tidyhydro`: measures of central +#' tendency, variability and symmetry. They add a descriptive +#' statistics-specific class to `fn` and mimic a behaviour of +#' [metrics][yardstick::metrics] from `yardstick`, while are not +#' directly compatible with [metric_set][yardstick::metric_set]. #' -#' See [Custom performance -#' metrics](https://www.tidymodels.org/learn/develop/metrics/) for more -#' information about creating custom metrics. +#' In order to create a measure set, one can use [measure_set]. #' #' @param fn A function. The measure function to attach a measure-specific class #' +#' @seealso [measure_set] +#' #' @name new-measure NULL @@ -70,8 +67,261 @@ format.measure <- function(x, ...) { "tendency_measure" = "Measure of Central Tendency", "var_measure" = "Measure of Variability", "sym_measure" = "Measure of Distribution Symmetry", - "measure" + "measure" = "Measure" + ) + + paste("A", measure_type) +} + +# Measure set ------------------------------------------------------------ + +#' Combine multiple measures into a single function +#' @keywords summary_stats +#' @family descriptive statistics +#' +#' @description +#' This function proposes a convenient wrapper to create a measure set, +#' mimicking a behaviour of [metric_set][yardstick::metric_set]. +#' +#' @param ... The bare names of the functions to be included in the measure set. +#' +#' @details +#' All functions must be valid measure functions, i.e. they must be of +#' class `tendency_measure`, `var_measure` or `sym_measure`. Or created with +#' [new_tendency_measure], [new_var_measure] or [new_sym_measure]. +#' +#' Alike with [metric_set], where it is not allowed to mix different metric +#' classes, it is allowed to mix different measure classes in [measure_set]. +#' For example, [gm()] can be used with [cv()] because they +#' are valid measure functions even though first one is of class +#' `tendency_measure` and the second one is of class `var_measure`. +#' +#' @examples +#' \dontrun{ +#' library(tidyhydro) +#' +#' # Multiple descriptive statistics +#' multi_measure <- measure_set(gm, cv) +#' +#' # The returned function has arguments: +#' # fn(data, truth, na_rm = TRUE, ...) +#' multi_measure(avacha, obs) +#' +#' avacha |> +#' group_by(month = format(date, "%b")) |> +#' multi_measure(obs) +#' } +#' +#' @export +measure_set <- function(...) { + quo_fns <- rlang::enquos(...) + validate_not_empty(quo_fns) + + # Get values and check that they are fns + fns <- lapply(quo_fns, rlang::eval_tidy) + validate_inputs_are_functions(fns) + + # Add on names, and then check that + # all fns are of the same function class + names(fns) <- vapply(quo_fns, get_quo_label, character(1)) + validate_function_class(fns) + + fn_cls <- class(fns[[1]])[[1]] + + # All measure functions have the same signature + if ( + fn_cls %in% c("tendency_measure", "var_measure", "sym_measure", "measure") + ) { + make_measure_function(fns) + } else { + cli::cli_abort( + "{.fn validate_function_class} should have errored on unknown classes.", + .internal = TRUE + ) + } +} + +#' @export +print.measure_set <- function(x, ...) { + cat(format(x), sep = "\n") + invisible(x) +} + +#' @export +format.measure_set <- function(x, ...) { + measures <- attr(x, "measures") + measure_names <- names(measures) + + cli::cli_format_method({ + cli::cli_text("A measure set, consisting of:") + + for (i in seq_along(measures)) { + measure_format <- format(measures[[i]]) + cli::cli_text("- {.fun {measure_names[i]}}: {measure_format}") + } + }) +} + +validate_not_empty <- function(x, call = rlang::caller_env()) { + if (rlang::is_empty(x)) { + cli::cli_abort( + "At least 1 function must be supplied to {.code ...}.", + call = call ) + } +} + +validate_inputs_are_functions <- function(fns, call = rlang::caller_env()) { + is_fun_vec <- vapply(fns, rlang::is_function, logical(1)) + all_fns <- all(is_fun_vec) + + if (!all_fns) { + not_fn <- which(!is_fun_vec) + cli::cli_abort( + "All inputs to {.fn measure_set} must be functions.", + "These inputs are not: {not_fn}.", + call = call + ) + } +} + +# Validate that all metric functions inherit from valid function classes or +# combinations of classes +validate_function_class <- function(fns) { + fn_cls <- vapply(fns, function(fn) class(fn)[1], character(1)) + fn_cls_unique <- unique(fn_cls) + n_unique <- length(fn_cls_unique) + + if (n_unique == 0L) { + return(invisible(fns)) + } + + valid_cls <- c( + "tendency_measure", + "var_measure", + "sym_measure", + "measure" + ) + + # Allow mixing of different measure types + if (all(fn_cls_unique %in% valid_cls)) { + return(invisible(fns)) + } + + # Error handling for invalid classes + fn_bad_names <- lapply(fn_cls_unique, function(x) { + names(fns)[fn_cls == x] + }) + + # clean up for nicer printing + fn_cls_unique <- gsub("_measure", "", fn_cls_unique) + fn_cls_unique <- gsub("function", "other", fn_cls_unique) + + fn_cls_other <- fn_cls_unique == "other" + + if (any(fn_cls_other)) { + fn_cls_other_loc <- which(fn_cls_other) + fn_other_names <- fn_bad_names[[fn_cls_other_loc]] + fns_other <- fns[fn_other_names] + + env_names_other <- vapply( + fns_other, + function(fn) rlang::env_name(rlang::fn_env(fn)), + character(1) + ) + + fn_bad_names[[fn_cls_other_loc]] <- paste0( + fn_other_names, + " <", + env_names_other, + ">" + ) + } + + fn_pastable <- mapply( + FUN = function(fn_type, fn_names) { + fn_names <- paste0(fn_names, collapse = ", ") + paste0("- ", fn_type, " (", fn_names, ")") + }, + fn_type = fn_cls_unique, + fn_names = fn_bad_names, + USE.NAMES = FALSE + ) + + cli::cli_abort( + c( + "x" = "The combination of measure functions must be valid measure types.", + "i" = "The following measure function types are being mixed:", + fn_pastable + ), + call = rlang::call2("measure_set") + ) +} + +make_measure_function <- function(fns) { + measure_function <- function(data, truth, na_rm = TRUE, ...) { + # Construct common argument set for each measure call + call_args <- rlang::quos( + data = data, + truth = !!rlang::enquo(truth), + na_rm = na_rm, + ... = ... + ) + + # Construct calls from the functions + arguments + calls <- lapply(fns, rlang::call2, !!!call_args) + + # For measures, we don't need call_remove_static_arguments since + # measures don't typically have tweaked/static arguments + + # Evaluate + measure_list <- mapply( + FUN = eval_safely, + calls, + names(calls), + SIMPLIFY = FALSE, + USE.NAMES = FALSE + ) + + dplyr::bind_rows(measure_list) + } + + class(measure_function) <- c("measure_set", class(measure_function)) + attr(measure_function, "measures") <- fns + measure_function +} + +eval_safely <- function(expr, expr_nm, data = NULL, env = rlang::caller_env()) { + tryCatch( + expr = { + rlang::eval_tidy(expr, data = data, env = env) + }, + error = function(cnd) { + cli::cli_abort( + "Failed to compute {.fn {expr_nm}}.", + parent = cnd, + call = rlang::call2("measure_set") + ) + } + ) +} + +get_quo_label <- function(quo) { + out <- rlang::as_label(quo) + + if (length(out) != 1L) { + cli::cli_abort( + "{.code as_label(quo)} resulted in a character vector of length >1.", + .internal = TRUE + ) + } + + is_namespaced <- grepl("::", out, fixed = TRUE) + + if (is_namespaced) { + split <- strsplit(out, "::", fixed = TRUE)[[1]] + out <- split[[2]] + } - cat(paste("A", measure_type)) + out } diff --git a/README.Rmd b/README.Rmd index 2323589..6638370 100644 --- a/README.Rmd +++ b/README.Rmd @@ -64,7 +64,7 @@ hydro_metrics(avacha, obs, sim, performance = TRUE) ## Descriptive statistics In addition to `metric`, inherited from `yardstick`, the `tidyhydro` introduces the `measure` objects. It aims to calculate descriptive statistics of a single dataset, such as `cv()` — coefficient of variation (a measure of variability) or `gm()` — geometric mean (a measure of central tendency): -```{r measureset} +```{r measures} # Coefficient of Variation cv(avacha, obs) @@ -72,6 +72,14 @@ cv(avacha, obs) gm_vec(avacha$obs) ``` +Similarly to `metric_set`, one can create a `measure_set` and estimate desired descriptive statistics at once: + +```{r measureset} +ms <- measure_set(cv, gm) + +ms(avacha, obs) +``` + ## Installation You can install the development version of `tidyhydro` from [GitHub](https://github.com/atsyplenkov/tidyhydro) with: diff --git a/README.md b/README.md index de6246e..c5ce7f1 100644 --- a/README.md +++ b/README.md @@ -113,6 +113,20 @@ gm_vec(avacha$obs) #> [1] 128.9476 ``` +Similarly to `metric_set`, one can create a `measure_set` and estimate +desired descriptive statistics at once: + +``` r +ms <- measure_set(cv, gm) + +ms(avacha, obs) +#> # A tibble: 2 × 3 +#> .metric .estimator .estimate +#> +#> 1 cv standard 0.533 +#> 2 gm standard 129. +``` + ## Installation You can install the development version of `tidyhydro` from @@ -153,9 +167,9 @@ bench::mark( #> # A tibble: 3 × 6 #> expression min median `itr/sec` mem_alloc `gc/sec` #> -#> 1 tidyhydro 1 1 16.5 NaN NaN -#> 2 hydroGOF 9.74 11.5 1 Inf Inf -#> 3 baseR 6.40 7.92 2.09 Inf Inf +#> 1 tidyhydro 1 1 18.2 NaN NaN +#> 2 hydroGOF 11.7 11.6 1 Inf Inf +#> 3 baseR 7.19 8.63 1.98 Inf Inf ``` ## Code of Conduct diff --git a/codemeta.json b/codemeta.json index 8e4daaf..973c55c 100644 --- a/codemeta.json +++ b/codemeta.json @@ -2,13 +2,13 @@ "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "@type": "SoftwareSourceCode", "identifier": "tidyhydro", - "description": "Provides tidy tools for comparing simulated and observed hydrological time series. Includes compatibility with the 'yardstick' package for model performance evaluation using commonly used metrics such as the Nash–Sutcliffe Efficiency (NSE), Kling–Gupta Efficiency (KGE), percent bias (pBIAS) and etc.", + "description": "Provides tidy tools to measure the characteristics of hydrological time series and to assess the performance of hydrological models. Includes compatibility with the 'yardstick' package for model performance evaluation using commonly used metrics such as the Nash–Sutcliffe Efficiency (NSE), Kling–Gupta Efficiency (KGE), percent bias (pBIAS) and etc. Additionally provides a set of measures to calculate the descriptive statistics of a single dataset in accordance with Helsel et al. (2020). Helsel DR, Hirsch RM, Ryberg KR, Archfield SA, Gilroy EJ. Statistical methods in water resources. Reston, VA: 2020. .", "name": "tidyhydro: Tidy Metrics for Assessing Hydrological Models Performance", "relatedLink": "https://atsyplenkov.github.io/tidyhydro/", "codeRepository": "https://github.com/atsyplenkov/tidyhydro", "issueTracker": "https://github.com/atsyplenkov/tidyhydro/issues", "license": "https://spdx.org/licenses/MIT", - "version": "0.1.2", + "version": "0.1.2.9000", "programmingLanguage": { "@type": "ComputerLanguage", "name": "R", @@ -80,6 +80,18 @@ "url": "https://cran.r-project.org" }, "sameAs": "https://CRAN.R-project.org/package=quickcheck" + }, + { + "@type": "SoftwareApplication", + "identifier": "dplyr", + "name": "dplyr", + "provider": { + "@id": "https://cran.r-project.org", + "@type": "Organization", + "name": "Comprehensive R Archive Network (CRAN)", + "url": "https://cran.r-project.org" + }, + "sameAs": "https://CRAN.R-project.org/package=dplyr" } ], "softwareRequirements": { @@ -90,6 +102,31 @@ "version": ">= 4.1.0" }, "2": { + "@type": "SoftwareApplication", + "identifier": "cli", + "name": "cli", + "provider": { + "@id": "https://cran.r-project.org", + "@type": "Organization", + "name": "Comprehensive R Archive Network (CRAN)", + "url": "https://cran.r-project.org" + }, + "sameAs": "https://CRAN.R-project.org/package=cli" + }, + "3": { + "@type": "SoftwareApplication", + "identifier": "checkmate", + "name": "checkmate", + "version": ">= 2.3.1", + "provider": { + "@id": "https://cran.r-project.org", + "@type": "Organization", + "name": "Comprehensive R Archive Network (CRAN)", + "url": "https://cran.r-project.org" + }, + "sameAs": "https://CRAN.R-project.org/package=checkmate" + }, + "4": { "@type": "SoftwareApplication", "identifier": "Rcpp", "name": "Rcpp", @@ -102,7 +139,7 @@ }, "sameAs": "https://CRAN.R-project.org/package=Rcpp" }, - "3": { + "5": { "@type": "SoftwareApplication", "identifier": "rlang", "name": "rlang", @@ -115,7 +152,7 @@ }, "sameAs": "https://CRAN.R-project.org/package=rlang" }, - "4": { + "6": { "@type": "SoftwareApplication", "identifier": "yardstick", "name": "yardstick", @@ -128,22 +165,9 @@ }, "sameAs": "https://CRAN.R-project.org/package=yardstick" }, - "5": { - "@type": "SoftwareApplication", - "identifier": "checkmate", - "name": "checkmate", - "version": ">= 2.3.1", - "provider": { - "@id": "https://cran.r-project.org", - "@type": "Organization", - "name": "Comprehensive R Archive Network (CRAN)", - "url": "https://cran.r-project.org" - }, - "sameAs": "https://CRAN.R-project.org/package=checkmate" - }, "SystemRequirements": null }, - "fileSize": "7300.242KB", + "fileSize": "7323.159KB", "releaseNotes": "https://github.com/atsyplenkov/tidyhydro/blob/master/NEWS.md", "readme": "https://github.com/atsyplenkov/tidyhydro/blob/master/README.md", "keywords": ["r", "r-package", "rstats"] diff --git a/man/cv.Rd b/man/cv.Rd index e657124..ae83125 100644 --- a/man/cv.Rd +++ b/man/cv.Rd @@ -52,7 +52,8 @@ cv_vec(avacha$obs) } \seealso{ Other descriptive statistics: -\code{\link{gm}()} +\code{\link{gm}()}, +\code{\link{measure_set}()} } \concept{descriptive statistics} \keyword{summary_stats} diff --git a/man/gm.Rd b/man/gm.Rd index cabfd2d..e90e518 100644 --- a/man/gm.Rd +++ b/man/gm.Rd @@ -52,7 +52,8 @@ gm_vec(avacha$obs) } \seealso{ Other descriptive statistics: -\code{\link{cv}()} +\code{\link{cv}()}, +\code{\link{measure_set}()} } \concept{descriptive statistics} \keyword{summary_stats} diff --git a/man/measure_set.Rd b/man/measure_set.Rd new file mode 100644 index 0000000..a2e218a --- /dev/null +++ b/man/measure_set.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/aaa-new.R +\name{measure_set} +\alias{measure_set} +\title{Combine multiple measures into a single function} +\usage{ +measure_set(...) +} +\arguments{ +\item{...}{The bare names of the functions to be included in the measure set.} +} +\description{ +This function proposes a convenient wrapper to create a measure set, +mimicking a behaviour of \link[yardstick:metric_set]{metric_set}. +} +\details{ +All functions must be valid measure functions, i.e. they must be of +class \code{tendency_measure}, \code{var_measure} or \code{sym_measure}. Or created with +\link{new_tendency_measure}, \link{new_var_measure} or \link{new_sym_measure}. + +Alike with \link{metric_set}, where it is not allowed to mix different metric +classes, it is allowed to mix different measure classes in \link{measure_set}. +For example, \code{\link[=gm]{gm()}} can be used with \code{\link[=cv]{cv()}} because they +are valid measure functions even though first one is of class +\code{tendency_measure} and the second one is of class \code{var_measure}. +} +\examples{ +\dontrun{ +library(tidyhydro) + +# Multiple descriptive statistics +multi_measure <- measure_set(gm, cv) + +# The returned function has arguments: +# fn(data, truth, na_rm = TRUE, ...) +multi_measure(avacha, obs) + +avacha |> + group_by(month = format(date, "\%b")) |> + multi_measure(obs) +} + +} +\seealso{ +Other descriptive statistics: +\code{\link{cv}()}, +\code{\link{gm}()} +} +\concept{descriptive statistics} +\keyword{summary_stats} diff --git a/man/new-measure.Rd b/man/new-measure.Rd index 36acd76..1cb2cce 100644 --- a/man/new-measure.Rd +++ b/man/new-measure.Rd @@ -18,12 +18,15 @@ new_sym_measure(fn) } \description{ These functions provide convenient wrappers to create the three types of -measure functions in \code{tidyhydro}: measures of central tendency, variability -and symmetry. They add a measure-specific class to \code{fn} and -mimic a behaviour of \link[yardstick:metric_set]{metric_set}. These features -are used by measure_set. +descriptive statistics functions in \code{tidyhydro}: measures of central +tendency, variability and symmetry. They add a descriptive +statistics-specific class to \code{fn} and mimic a behaviour of +\link[yardstick:metrics]{metrics} from \code{yardstick}, while are not +directly compatible with \link[yardstick:metric_set]{metric_set}. -See \href{https://www.tidymodels.org/learn/develop/metrics/}{Custom performance metrics} for more -information about creating custom metrics. +In order to create a measure set, one can use \link{measure_set}. +} +\seealso{ +\link{measure_set} } \keyword{summary_stats} diff --git a/tests/testthat/test-measures.R b/tests/testthat/test-measures.R index 54ef69a..3798135 100644 --- a/tests/testthat/test-measures.R +++ b/tests/testthat/test-measures.R @@ -3,3 +3,69 @@ test_that("Negative values are not allowed in GM", { expect_error(gm_vec(x)) }) + +test_that("measure_set works as expected", { + ms <- measure_set(gm, cv) + + ms_new <- ms(avacha, obs) + ms_manual <- rbind(gm(avacha, obs), cv(avacha, obs)) + + expect_true(inherits(ms, "measure_set")) + checkmate::expect_data_frame(ms_new) + expect_equal(ms_new, ms_manual) +}) + +test_that("measure_set works with groups", { + skip_if_not_installed("dplyr") + + ms <- measure_set(gm, cv) + ms_grouped <- avacha |> + dplyr::group_by(month = format(date, "%b")) |> + ms(obs) + + checkmate::expect_data_frame(ms_grouped, nrows = 24, ncols = 4) + expect_equal(names(ms_grouped), c("month", ".metric", ".estimator", ".estimate")) + expect_equal(ms_grouped$.metric, rep(c("gm", "cv"), each = 12)) + expect_equal(ms_grouped$.estimator, rep("standard", 24)) +}) + +test_that("measure set functions are classed", { + expect_s3_class( + measure_set(gm, cv), + "measure_set" + ) + + expect_s3_class( + measure_set(gm, cv), + "function" + ) + + expect_s3_class( + measure_set(gm), + "measure_set" + ) + + expect_s3_class( + measure_set(cv), + "function" + ) +}) + +test_that("measure set functions retain measure functions", { + fns <- attr(measure_set(gm, cv), "measures") + + expect_equal( + names(fns), + c("gm", "cv") + ) + + expect_equal( + class(fns[[1]]), + c("tendency_measure", "measure", "function") + ) + + expect_equal( + class(fns[[2]]), + c("var_measure", "measure", "function") + ) +})