From 9003f1e0d4d9222b00755e33f1d37641a7645a1b Mon Sep 17 00:00:00 2001 From: Igor Kolodziej Date: Sat, 17 Jan 2026 13:57:06 +0100 Subject: [PATCH 1/6] bootstrap bug fixes --- src_dev/shared/bootstrap.R | 22 +++ tests/testthat/test-bootstrap-advanced.R | 49 ++---- tests/testthat/test-bootstrap-improvements.R | 90 ---------- tests/testthat/test-bootstrap.R | 170 +++++++++++++++++++ 4 files changed, 209 insertions(+), 122 deletions(-) delete mode 100644 tests/testthat/test-bootstrap-improvements.R diff --git a/src_dev/shared/bootstrap.R b/src_dev/shared/bootstrap.R index 96ce45a..765aae2 100644 --- a/src_dev/shared/bootstrap.R +++ b/src_dev/shared/bootstrap.R @@ -120,6 +120,11 @@ bootstrap_variance.data.frame <- function(data, estimator_func, point_estimate, dot_args <- list(...) est_fun <- estimator_func resample_guard <- NULL + if (!is.null(dot_args$bootstrap_settings)) dot_args$bootstrap_settings <- NULL + if (!is.null(dot_args$bootstrap_options)) dot_args$bootstrap_options <- NULL + if (!is.null(dot_args$bootstrap_type)) dot_args$bootstrap_type <- NULL + if (!is.null(dot_args$bootstrap_mse)) dot_args$bootstrap_mse <- NULL + if (!is.null(dot_args$survey_na_policy)) dot_args$survey_na_policy <- NULL if (!is.null(dot_args$resample_guard)) { # Some NMAR estimators require each resample to contain at least one respondent. # Allow callers to supply a guard that rejects unsuitable resamples. @@ -298,6 +303,9 @@ bootstrap_variance.survey.design <- function(data, estimator_func, point_estimat dot_args <- list(...) est_fun <- estimator_func + if (!is.null(dot_args$bootstrap_cores)) dot_args$bootstrap_cores <- NULL + if (!is.null(dot_args$bootstrap_workers)) dot_args$bootstrap_workers <- NULL + if (!is.null(dot_args$resample_guard)) dot_args$resample_guard <- NULL bootstrap_settings <- list() if (!is.null(dot_args$bootstrap_settings)) { if (!is.list(dot_args$bootstrap_settings)) { @@ -322,6 +330,14 @@ bootstrap_variance.survey.design <- function(data, estimator_func, point_estimat dot_args$bootstrap_mse <- NULL } + if (!is.null(bootstrap_settings$design) || !is.null(bootstrap_settings$replicates)) { + stop( + "`bootstrap_settings` must not include `design` or `replicates`.\n ", + "Use `data = ` and `bootstrap_reps = ` instead.", + call. = FALSE + ) + } + rep_design <- do.call(svrep::as_bootstrap_design, c(list(design = data, replicates = bootstrap_reps), bootstrap_settings)) estimator_args <- dot_args @@ -366,6 +382,12 @@ bootstrap_variance.survey.design <- function(data, estimator_func, point_estimat rep_scale <- rep_design$scale rep_rscales <- rep_design$rscales rep_mse <- rep_design$mse + if (isTRUE(rep_mse) && (!is.numeric(point_estimate) || length(point_estimate) != 1L || !is.finite(point_estimate))) { + stop( + "`point_estimate` must be a finite numeric scalar when bootstrap replicate design uses mse = TRUE.", + call. = FALSE + ) + } # Extract data frame only (not full survey design) to reduce serialization. data_vars <- data$variables diff --git a/tests/testthat/test-bootstrap-advanced.R b/tests/testthat/test-bootstrap-advanced.R index c3603aa..d82f5b0 100644 --- a/tests/testthat/test-bootstrap-advanced.R +++ b/tests/testthat/test-bootstrap-advanced.R @@ -21,8 +21,7 @@ test_that("svyrep.design is rejected with clear error", { bootstrap_variance( drep, estimator, point_estimate = mean(apistrat$api00), - bootstrap_reps = 10, - bootstrap_cores = 1 + bootstrap_reps = 10 ), "replicate design|svyrep" ) @@ -52,8 +51,7 @@ test_that("replicate count mismatch warns but proceeds", { bootstrap_variance( dstrat, estimator, point_estimate = mean(apistrat$api00), - bootstrap_reps = 200, - bootstrap_cores = 1 + bootstrap_reps = 200 ) ), error = function(e) list(error = TRUE, message = e$message) @@ -96,7 +94,6 @@ test_that("survey NA policy 'strict' shows detailed error", { dstrat, estimator, point_estimate = mean(apistrat$api00), bootstrap_reps = 10, - bootstrap_cores = 1, survey_na_policy = "strict" ), error = function(e) e$message @@ -138,7 +135,6 @@ test_that("survey NA policy 'omit' handles failures correctly", { dstrat, estimator, point_estimate = mean(apistrat$api00), bootstrap_reps = 15, - bootstrap_cores = 1, survey_na_policy = "omit" ), "3/15.*failed.*omitted" @@ -174,7 +170,6 @@ test_that("survey NA policy 'omit' requires at least 2 successes", { dstrat, estimator, point_estimate = mean(apistrat$api00), bootstrap_reps = 10, - bootstrap_cores = 1, survey_na_policy = "omit" ), "Too few successful" @@ -210,15 +205,14 @@ test_that("survey NA policy 'omit' shows failure pattern", { res <- tryCatch( withCallingHandlers( bootstrap_variance( - dstrat, estimator, - point_estimate = mean(apistrat$api00), - bootstrap_reps = 20, - bootstrap_cores = 1, - survey_na_policy = "omit" - ), - warning = function(w) { - warn_msg <<- conditionMessage(w) - invokeRestart("muffleWarning") + dstrat, estimator, + point_estimate = mean(apistrat$api00), + bootstrap_reps = 20, + survey_na_policy = "omit" + ), + warning = function(w) { + warn_msg <<- conditionMessage(w) + invokeRestart("muffleWarning") } ), error = function(e) list(error = TRUE) @@ -256,8 +250,7 @@ test_that("mathematical correctness: variance has correct properties", { res <- bootstrap_variance( dstrat, estimator, point_estimate = mean(apistrat$api00), - bootstrap_reps = 30, - bootstrap_cores = 1 + bootstrap_reps = 30 ) # Property 1: Variance must be non-negative @@ -291,8 +284,7 @@ test_that("mathematical correctness: variance has correct properties", { res_iid <- bootstrap_variance( data_iid, estimator_mean, point_estimate = mean(x), - bootstrap_reps = 500, - bootstrap_cores = 1 + bootstrap_reps = 500 ) # Bootstrap estimate should be close to analytical value @@ -327,8 +319,7 @@ test_that("boundary cases: minimum replicates and edge conditions", { res_min <- bootstrap_variance( dstrat, estimator, point_estimate = mean(apistrat$api00), - bootstrap_reps = 2, - bootstrap_cores = 1 + bootstrap_reps = 2 ) expect_equal(length(res_min$replicates), 2) expect_true(is.finite(res_min$variance)) @@ -340,8 +331,7 @@ test_that("boundary cases: minimum replicates and edge conditions", { bootstrap_variance( dstrat, estimator, point_estimate = mean(apistrat$api00), - bootstrap_reps = 1, - bootstrap_cores = 1 + bootstrap_reps = 1 ), error = function(e) list(error = TRUE, message = e$message) ) @@ -358,8 +348,7 @@ test_that("boundary cases: minimum replicates and edge conditions", { res_small <- bootstrap_variance( small_data, estimator_simple, point_estimate = mean(small_data$y), - bootstrap_reps = 10, - bootstrap_cores = 1 + bootstrap_reps = 10 ) expect_equal(length(res_small$replicates), 10) expect_true(is.finite(res_small$variance)) @@ -370,8 +359,7 @@ test_that("boundary cases: minimum replicates and edge conditions", { res_single <- bootstrap_variance( single_data, estimator_simple, point_estimate = 5, - bootstrap_reps = 10, - bootstrap_cores = 1 + bootstrap_reps = 10 ) # With single observation, all bootstrap samples are identical expect_equal(length(res_single$replicates), 10) @@ -442,7 +430,6 @@ test_that("omit policy correctly subsets rscales for mathematical correctness", dstrat, estimator, point_estimate = mean(apistrat$api00), bootstrap_reps = 20, - bootstrap_cores = 1, survey_na_policy = "omit" ), "5/20.*failed.*omitted" @@ -487,7 +474,6 @@ test_that("omit policy correctly subsets rscales for mathematical correctness", dstrat, estimator_random, point_estimate = mean(apistrat$api00), bootstrap_reps = 30, - bootstrap_cores = 1, survey_na_policy = "omit" ), "12/30.*failed" @@ -530,8 +516,7 @@ test_that("survey NA policy default is 'strict'", { bootstrap_variance( dstrat, estimator, point_estimate = mean(apistrat$api00), - bootstrap_reps = 10, - bootstrap_cores = 1 + bootstrap_reps = 10 ), error = function(e) e$message ) diff --git a/tests/testthat/test-bootstrap-improvements.R b/tests/testthat/test-bootstrap-improvements.R deleted file mode 100644 index 955fa93..0000000 --- a/tests/testthat/test-bootstrap-improvements.R +++ /dev/null @@ -1,90 +0,0 @@ -test_that("IID bootstrap is reproducible across future backends", { - skip_if_not_installed("future") - skip_if_not_installed("future.apply") - - data <- data.frame(x = rnorm(100), y = rnorm(100)) - estimator <- function(data, ...) { - list(y_hat = mean(data$y), converged = TRUE) - } - -# Sequential backend - future::plan(future::sequential) - set.seed(424242) - res_seq <- bootstrap_variance( - data, estimator, point_estimate = 0, bootstrap_reps = 50, bootstrap_cores = 1 - ) - -# Multisession backend (2 workers) - skip_on_cran() # Multisession can be unreliable on CRAN - future::plan(future::multisession, workers = 2) - set.seed(424242) - res_par2 <- bootstrap_variance( - data, estimator, point_estimate = 0, bootstrap_reps = 50 - ) - -# Multisession backend (4 workers) - future::plan(future::multisession, workers = 4) - set.seed(424242) - res_par4 <- bootstrap_variance( - data, estimator, point_estimate = 0, bootstrap_reps = 50 - ) - -# Reset to sequential - future::plan(future::sequential) - -# Verify exact reproducibility - expect_identical(res_seq$replicates, res_par2$replicates, - label = "Sequential vs 2-worker replicates") - expect_identical(res_seq$replicates, res_par4$replicates, - label = "Sequential vs 4-worker replicates") - expect_equal(res_seq$variance, res_par2$variance, - label = "Sequential vs 2-worker variance") - expect_equal(res_seq$variance, res_par4$variance, - label = "Sequential vs 4-worker variance") -}) - -test_that("Survey bootstrap is reproducible across backends", { - skip_if_not_installed("survey") - skip_if_not_installed("svrep") - skip_if_not_installed("future") - skip_if_not_installed("future.apply") - skip_on_cran() - - data(api, package = "survey") - dstrat <- survey::svydesign( - id = ~1, strata = ~stype, weights = ~pw, - data = apistrat, fpc = ~fpc - ) - - estimator <- function(data, ...) { - est <- survey::svymean(~api00, data) - list(y_hat = as.numeric(est), converged = TRUE) - } - -# Sequential baseline - future::plan(future::sequential) - set.seed(999) - res_seq1 <- bootstrap_variance( - dstrat, estimator, point_estimate = mean(apistrat$api00), - bootstrap_reps = 50, bootstrap_cores = 1 - ) - -# Sequential with same seed should be identical - future::plan(future::sequential) - set.seed(999) - res_seq2 <- bootstrap_variance( - dstrat, estimator, point_estimate = mean(apistrat$api00), - bootstrap_reps = 50, bootstrap_cores = 1 - ) - - future::plan(future::sequential) - -# Test reproducibility within sequential mode (this is the critical property) - expect_identical(res_seq1$replicates, res_seq2$replicates, - label = "Survey sequential reproducibility") - expect_equal(res_seq1$variance, res_seq2$variance, - label = "Survey sequential variance reproducibility") - -# Note: Cross-backend reproducibility (sequential vs parallel) requires -# complex template_call serialization and is tested separately when needed -}) diff --git a/tests/testthat/test-bootstrap.R b/tests/testthat/test-bootstrap.R index 8391e31..9ee1ae7 100644 --- a/tests/testthat/test-bootstrap.R +++ b/tests/testthat/test-bootstrap.R @@ -126,3 +126,173 @@ test_that("bootstrap_variance errors on calibrated survey designs", { fixed = FALSE ) }) + +test_that("bootstrap_settings cannot override design/replicates", { + skip_if_not_installed("survey") + skip_if_not_installed("svrep") + + df <- data.frame(y = c(1, 2, 3, 4), w = c(1, 2, 1, 3)) + design <- survey::svydesign(ids = ~1, data = df, weights = ~w) + + expect_error( + bootstrap_variance(design, + estimator_func = bootstrap_dummy_estimator, + point_estimate = 1, + bootstrap_reps = 3, + bootstrap_settings = list(replicates = 999) + ), + "must not include `design` or `replicates`", + fixed = TRUE + ) +}) + +test_that("bootstrap_mse requires a finite point_estimate", { + skip_if_not_installed("survey") + skip_if_not_installed("svrep") + + df <- data.frame(y = c(1, 2, 3, 4), w = c(1, 2, 1, 3)) + design <- survey::svydesign(ids = ~1, data = df, weights = ~w) + + expect_error( + bootstrap_variance(design, + estimator_func = bootstrap_dummy_estimator, + point_estimate = NA_real_, + bootstrap_reps = 3, + bootstrap_mse = TRUE + ), + "point_estimate", + fixed = FALSE + ) +}) + +test_that("survey-only bootstrap options are ignored for iid data", { + skip_if_not_installed("future.apply") + + set.seed(123) + df <- data.frame(y = c(1, 2, 3, 4)) + res <- bootstrap_variance(df, + estimator_func = bootstrap_dummy_estimator, + point_estimate = mean(df$y), + bootstrap_reps = 5, + bootstrap_type = "Preston", + bootstrap_mse = TRUE, + bootstrap_settings = list(type = "Rao-Wu-Yue-Beaumont"), + bootstrap_options = list(mse = FALSE), + survey_na_policy = "omit" + ) + expect_length(res$replicates, 5) + expect_true(is.finite(res$variance)) +}) + +test_that("iid-only resample_guard is ignored for survey designs", { + skip_if_not_installed("survey") + skip_if_not_installed("svrep") + + df <- data.frame(y = c(1, 2, 3, 4), w = c(1, 2, 1, 3)) + design <- survey::svydesign(ids = ~1, data = df, weights = ~w) + point_est <- sum(df$y * df$w) / sum(df$w) + + set.seed(123) + res <- bootstrap_variance(design, + estimator_func = bootstrap_dummy_estimator, + point_estimate = point_est, + bootstrap_reps = 5, + resample_guard = function(indices, data) FALSE + ) + expect_length(res$replicates, 5) + expect_true(is.finite(res$variance)) +}) + +test_that("IID bootstrap is reproducible across future backends", { + skip_if_not_installed("future") + skip_if_not_installed("future.apply") + + data <- data.frame(x = rnorm(100), y = rnorm(100)) + estimator <- function(data, ...) { + list(y_hat = mean(data$y), converged = TRUE) + } + +# Sequential backend + future::plan(future::sequential) + set.seed(424242) + res_seq <- bootstrap_variance( + data, estimator, point_estimate = 0, bootstrap_reps = 50 + ) + +# Multisession backend (2 workers) + skip_on_cran() # Multisession can be unreliable on CRAN + tryCatch( + future::plan(future::multisession, workers = 2), + error = function(e) skip(paste("future::multisession not available:", conditionMessage(e))) + ) + set.seed(424242) + res_par2 <- bootstrap_variance( + data, estimator, point_estimate = 0, bootstrap_reps = 50 + ) + +# Multisession backend (4 workers) + tryCatch( + future::plan(future::multisession, workers = 4), + error = function(e) skip(paste("future::multisession not available:", conditionMessage(e))) + ) + set.seed(424242) + res_par4 <- bootstrap_variance( + data, estimator, point_estimate = 0, bootstrap_reps = 50 + ) + +# Reset to sequential + future::plan(future::sequential) + +# Verify exact reproducibility + expect_identical(res_seq$replicates, res_par2$replicates, + label = "Sequential vs 2-worker replicates") + expect_identical(res_seq$replicates, res_par4$replicates, + label = "Sequential vs 4-worker replicates") + expect_equal(res_seq$variance, res_par2$variance, + label = "Sequential vs 2-worker variance") + expect_equal(res_seq$variance, res_par4$variance, + label = "Sequential vs 4-worker variance") +}) + +test_that("Survey bootstrap is reproducible across backends", { + skip_if_not_installed("survey") + skip_if_not_installed("svrep") + skip_if_not_installed("future") + skip_if_not_installed("future.apply") + skip_on_cran() + + data(api, package = "survey") + dstrat <- survey::svydesign( + id = ~1, strata = ~stype, weights = ~pw, + data = apistrat, fpc = ~fpc + ) + + estimator <- function(data, ...) { + est <- survey::svymean(~api00, data) + list(y_hat = as.numeric(est), converged = TRUE) + } + +# Sequential baseline + future::plan(future::sequential) + set.seed(999) + res_seq1 <- bootstrap_variance( + dstrat, estimator, point_estimate = mean(apistrat$api00), + bootstrap_reps = 50 + ) + +# Sequential with same seed should be identical + future::plan(future::sequential) + set.seed(999) + res_seq2 <- bootstrap_variance( + dstrat, estimator, point_estimate = mean(apistrat$api00), + bootstrap_reps = 50 + ) + + future::plan(future::sequential) + +# Test reproducibility within sequential mode (this is the critical property) + expect_identical(res_seq1$replicates, res_seq2$replicates, + label = "Survey sequential reproducibility") + expect_equal(res_seq1$variance, res_seq2$variance, + label = "Survey sequential variance reproducibility") +}) From c7ff0dd2bea4436e3fe30b191b792e635e873aa9 Mon Sep 17 00:00:00 2001 From: Igor Kolodziej Date: Sat, 17 Jan 2026 14:25:40 +0100 Subject: [PATCH 2/6] add sequential fallback --- src_dev/shared/bootstrap.R | 169 ++++++++++++++++++-------------- tests/testthat/test-bootstrap.R | 66 +++++++++++++ 2 files changed, 160 insertions(+), 75 deletions(-) diff --git a/src_dev/shared/bootstrap.R b/src_dev/shared/bootstrap.R index 765aae2..0900086 100644 --- a/src_dev/shared/bootstrap.R +++ b/src_dev/shared/bootstrap.R @@ -55,11 +55,13 @@ #' bootstrap_reps = 500)) #' } #' -#' The \code{future} framework (via \code{future.seed = TRUE} in -#' \code{future.apply::future_lapply()}) ensures that each bootstrap replicate -#' uses an independent L'Ecuyer-CMRG random number stream derived from this -#' seed. This gives reproducible results across supported future backends -#' (sequential, multisession, cluster, and so on). +#' If the optional \code{future.apply} package is installed, bootstrap uses +#' \code{future.apply::future_lapply(future.seed = TRUE)} which provides +#' backend-independent, parallel-safe random number streams under the +#' \code{future} framework. If \code{future.apply} is not installed, bootstrap +#' falls back to sequential evaluation via \code{base::lapply()}, which is +#' still reproducible under \code{set.seed()} but may not match the +#' \code{future.seed} stream. #' #' @param data A \code{data.frame} or a \code{survey.design}. #' @param estimator_func Function returning an object with a numeric scalar @@ -91,6 +93,71 @@ bootstrap_variance <- function(data, estimator_func, point_estimate, ...) { stop("Unsupported data type for bootstrap_variance().", call. = FALSE) } +# Internal helper: detect whether future.apply is available. +nmar_has_future_apply <- function() { + requireNamespace("future.apply", quietly = TRUE) +} + +# Internal helper: warn once per R session that we are falling back to sequential +# evaluation because future.apply is not installed. +nmar_warn_no_future_apply_once <- function() { + opt <- "NMAR.bootstrap.warned_no_future_apply" + if (isTRUE(getOption(opt, FALSE))) return(invisible(FALSE)) + warning( + "Package 'future.apply' is not installed. Running bootstrap sequentially via base::lapply().\n ", + "Install 'future.apply' to enable future-based parallel execution and future-seeded RNG (future.seed = TRUE).", + call. = FALSE, + immediate. = TRUE + ) + options(setNames(list(TRUE), opt)) + invisible(TRUE) +} + +# Internal helper: apply over X using future.apply (if installed) or base::lapply +# (sequential fallback). Progress is reported via progressr if installed. +nmar_bootstrap_apply <- function(X, FUN, use_progress, future_globals = NULL, future_packages = NULL) { + use_future <- nmar_has_future_apply() + if (!use_future) nmar_warn_no_future_apply_once() + + if (isTRUE(use_progress) && requireNamespace("progressr", quietly = TRUE)) { + progressr::with_progress({ + p <- progressr::progressor(steps = length(X)) + wrapper <- function(x) { + res <- FUN(x) + p() + res + } + if (use_future) { + fg <- future_globals + if (is.null(fg) || identical(fg, TRUE)) fg <- list() + fg <- c(fg, list(FUN = FUN, p = p)) + future.apply::future_lapply( + X, + wrapper, + future.seed = TRUE, + future.globals = fg, + future.packages = future_packages + ) + } else { + lapply(X, wrapper) + } + }) + } else { + if (use_future) { + if (is.null(future_globals)) future_globals <- TRUE + future.apply::future_lapply( + X, + FUN, + future.seed = TRUE, + future.globals = future_globals, + future.packages = future_packages + ) + } else { + lapply(X, FUN) + } + } +} + #' Default method dispatch (internal safety net) #' @keywords internal bootstrap_variance.default <- function(data, estimator_func, point_estimate, ...) { @@ -120,6 +187,8 @@ bootstrap_variance.data.frame <- function(data, estimator_func, point_estimate, dot_args <- list(...) est_fun <- estimator_func resample_guard <- NULL + if (!is.null(dot_args$bootstrap_cores)) dot_args$bootstrap_cores <- NULL + if (!is.null(dot_args$bootstrap_workers)) dot_args$bootstrap_workers <- NULL if (!is.null(dot_args$bootstrap_settings)) dot_args$bootstrap_settings <- NULL if (!is.null(dot_args$bootstrap_options)) dot_args$bootstrap_options <- NULL if (!is.null(dot_args$bootstrap_type)) dot_args$bootstrap_type <- NULL @@ -168,32 +237,12 @@ bootstrap_variance.data.frame <- function(data, estimator_func, point_estimate, val } - if (!requireNamespace("future.apply", quietly = TRUE)) { - stop("Package 'future.apply' is required for bootstrap variance.", call. = FALSE) - } - use_progress <- requireNamespace("progressr", quietly = TRUE) - - if (use_progress) { - lst <- progressr::with_progress({ - p <- progressr::progressor(steps = bootstrap_reps) - future.apply::future_lapply( - seq_len(bootstrap_reps), - function(i) { - res <- replicate_fn(i) - p() # Signal progress - res - }, - future.seed = TRUE - ) - }) - } else { - lst <- future.apply::future_lapply( - seq_len(bootstrap_reps), - replicate_fn, - future.seed = TRUE - ) - } + lst <- nmar_bootstrap_apply( + X = seq_len(bootstrap_reps), + FUN = replicate_fn, + use_progress = use_progress + ) estimates <- vapply(lst, identity, numeric(1)) @@ -424,55 +473,25 @@ bootstrap_variance.survey.design <- function(data, estimator_func, point_estimat val } - if (!requireNamespace("future.apply", quietly = TRUE)) { - stop("Package 'future.apply' is required for bootstrap variance.", call. = FALSE) - } - use_progress <- requireNamespace("progressr", quietly = TRUE) J <- seq_len(ncol(repw)) - if (use_progress) { - lst <- progressr::with_progress({ - p <- progressr::progressor(steps = length(J)) - future.apply::future_lapply( - J, - function(j) { - res <- replicate_eval(j) - p() # Signal progress - res - }, - future.seed = TRUE, - future.globals = list( - replicate_eval = replicate_eval, - repw = repw, - data_vars = data_vars, - nmar_inject_design_weights = nmar_inject_design_weights, - design_template = design_template, - estimator_args = estimator_args, - est_fun = est_fun, - p = p - ), - future.packages = "survey" - ) - }) - } else { - lst <- future.apply::future_lapply( - J, - replicate_eval, - future.seed = TRUE, - future.globals = list( - replicate_eval = replicate_eval, - repw = repw, - data_vars = data_vars, - nmar_inject_design_weights = nmar_inject_design_weights, - design_template = design_template, - estimator_args = estimator_args, - est_fun = est_fun - ), - future.packages = "survey" - ) - } + lst <- nmar_bootstrap_apply( + X = J, + FUN = replicate_eval, + use_progress = use_progress, + future_globals = list( + replicate_eval = replicate_eval, + repw = repw, + data_vars = data_vars, + nmar_inject_design_weights = nmar_inject_design_weights, + design_template = design_template, + estimator_args = estimator_args, + est_fun = est_fun + ), + future_packages = "survey" + ) replicate_estimates <- vapply(lst, identity, numeric(1)) diff --git a/tests/testthat/test-bootstrap.R b/tests/testthat/test-bootstrap.R index 9ee1ae7..24f9d2b 100644 --- a/tests/testthat/test-bootstrap.R +++ b/tests/testthat/test-bootstrap.R @@ -203,6 +203,72 @@ test_that("iid-only resample_guard is ignored for survey designs", { expect_true(is.finite(res$variance)) }) +test_that("bootstrap falls back to sequential lapply when future.apply is unavailable", { + testthat::local_mocked_bindings( + nmar_has_future_apply = function() FALSE, + .package = "NMAR" + ) + opt <- "NMAR.bootstrap.warned_no_future_apply" + old_opt <- getOption(opt) + options(setNames(list(FALSE), opt)) + on.exit(options(setNames(list(old_opt), opt)), add = TRUE) + + set.seed(123) + df <- data.frame(y = c(1, 2, 3, 4)) + point_est <- mean(df$y) + + expect_warning( + res1 <- bootstrap_variance(df, + estimator_func = bootstrap_dummy_estimator, + point_estimate = point_est, + bootstrap_reps = 5 + ), + "future\\.apply.*not installed|future\\.apply.*Install", + fixed = FALSE + ) + expect_length(res1$replicates, 5) + + expect_silent( + res2 <- bootstrap_variance(df, + estimator_func = bootstrap_dummy_estimator, + point_estimate = point_est, + bootstrap_reps = 5 + ) + ) + expect_length(res2$replicates, 5) +}) + +test_that("survey bootstrap falls back to sequential lapply when future.apply is unavailable", { + skip_if_not_installed("survey") + skip_if_not_installed("svrep") + + testthat::local_mocked_bindings( + nmar_has_future_apply = function() FALSE, + .package = "NMAR" + ) + opt <- "NMAR.bootstrap.warned_no_future_apply" + old_opt <- getOption(opt) + options(setNames(list(FALSE), opt)) + on.exit(options(setNames(list(old_opt), opt)), add = TRUE) + + set.seed(123) + df <- data.frame(y = c(1, 2, 3, 4), w = c(1, 2, 1, 3)) + design <- survey::svydesign(ids = ~1, data = df, weights = ~w) + point_est <- sum(df$y * df$w) / sum(df$w) + + expect_warning( + res <- bootstrap_variance(design, + estimator_func = bootstrap_dummy_estimator, + point_estimate = point_est, + bootstrap_reps = 5 + ), + "future\\.apply.*not installed|future\\.apply.*Install", + fixed = FALSE + ) + expect_length(res$replicates, 5) + expect_true(is.finite(res$se)) +}) + test_that("IID bootstrap is reproducible across future backends", { skip_if_not_installed("future") skip_if_not_installed("future.apply") From 9fd886b7a2556291d1cdecb0929c5d508ec71ba3 Mon Sep 17 00:00:00 2001 From: Igor Kolodziej Date: Sat, 17 Jan 2026 14:36:01 +0100 Subject: [PATCH 3/6] update version and news entry --- DESCRIPTION | 2 +- NEWS.md | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d283435..9f07bd6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: NMAR Type: Package Title: Estimation under not Missing at Random Nonresponse -Version: 0.1.1 +Version: 0.1.2 Authors@R: c(person(given = "Maciej", family = "Beresewicz", diff --git a/NEWS.md b/NEWS.md index 448359a..3383825 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# NMAR 0.1.2 +- Bootstrap variance no longer hard-requires 'future.apply'; it now falls back to sequential execution via base::lapply() when 'future.apply' is not installed (emits a one-time warning). +- When 'future.apply' is installed, bootstrap uses future-seeded RNG streams (future.seed = TRUE) for reproducibility across future backends. + # NMAR 0.1.1 * CRAN release-related fixes - Fix `return` roxygen keyword in S3 Functions @@ -25,4 +29,3 @@ * **Refactored Architecture:** The `exptilt` and `el` engines share a unified structural design, ensuring consistent behavior for controls, standardization, and error handling. * **Standardization:** Added `standardize = TRUE` argument to engines to improve numerical stability during optimization. - From 4fec69629db00a148401d54af5be70a7ae22a1aa Mon Sep 17 00:00:00 2001 From: Igor Kolodziej Date: Sat, 17 Jan 2026 14:43:18 +0100 Subject: [PATCH 4/6] clean unused args --- src_dev/shared/bootstrap.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/src_dev/shared/bootstrap.R b/src_dev/shared/bootstrap.R index 0900086..5477f5f 100644 --- a/src_dev/shared/bootstrap.R +++ b/src_dev/shared/bootstrap.R @@ -187,8 +187,6 @@ bootstrap_variance.data.frame <- function(data, estimator_func, point_estimate, dot_args <- list(...) est_fun <- estimator_func resample_guard <- NULL - if (!is.null(dot_args$bootstrap_cores)) dot_args$bootstrap_cores <- NULL - if (!is.null(dot_args$bootstrap_workers)) dot_args$bootstrap_workers <- NULL if (!is.null(dot_args$bootstrap_settings)) dot_args$bootstrap_settings <- NULL if (!is.null(dot_args$bootstrap_options)) dot_args$bootstrap_options <- NULL if (!is.null(dot_args$bootstrap_type)) dot_args$bootstrap_type <- NULL From 0de24e0f3481325975b8285e9b1731c5c2341f2d Mon Sep 17 00:00:00 2001 From: Igor Kolodziej Date: Sat, 17 Jan 2026 15:12:34 +0100 Subject: [PATCH 5/6] fix future globals --- src_dev/shared/bootstrap.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src_dev/shared/bootstrap.R b/src_dev/shared/bootstrap.R index 5477f5f..d29ddb9 100644 --- a/src_dev/shared/bootstrap.R +++ b/src_dev/shared/bootstrap.R @@ -129,8 +129,13 @@ nmar_bootstrap_apply <- function(X, FUN, use_progress, future_globals = NULL, fu } if (use_future) { fg <- future_globals - if (is.null(fg) || identical(fg, TRUE)) fg <- list() - fg <- c(fg, list(FUN = FUN, p = p)) + if (is.null(fg)) { + fg <- TRUE + } else if (isTRUE(fg)) { + fg <- TRUE + } else if (is.list(fg)) { + fg <- c(fg, list(FUN = FUN, p = p)) + } future.apply::future_lapply( X, wrapper, From 43b1409493b0470dbbf8017241acec3154430477 Mon Sep 17 00:00:00 2001 From: Igor Kolodziej Date: Sat, 17 Jan 2026 15:48:21 +0100 Subject: [PATCH 6/6] add warnings --- src_dev/shared/bootstrap.R | 54 ++++++- tests/testthat/test-bootstrap-advanced.R | 192 +++++++++++++++++------ tests/testthat/test-bootstrap.R | 24 ++- 3 files changed, 215 insertions(+), 55 deletions(-) diff --git a/src_dev/shared/bootstrap.R b/src_dev/shared/bootstrap.R index d29ddb9..bbcbd59 100644 --- a/src_dev/shared/bootstrap.R +++ b/src_dev/shared/bootstrap.R @@ -113,6 +113,43 @@ nmar_warn_no_future_apply_once <- function() { invisible(TRUE) } +# Internal helper: warn when survey bootstrap assumptions may be violated. +nmar_warn_survey_bootstrap_assumptions <- function(design) { + if (!inherits(design, "survey.design")) return(invisible(FALSE)) + + allprob <- design$allprob + has_multistage_probs <- is.data.frame(allprob) && ncol(allprob) > 1L + has_pps <- isTRUE(design$pps) + has_fpc_arg <- FALSE + has_fpc_popsize <- FALSE + has_probs_arg <- FALSE + has_pps_arg <- FALSE + + fpc <- design$fpc + has_fpc_popsize <- is.list(fpc) && !is.null(fpc$popsize) + + dc <- try(getCall(design), silent = TRUE) + if (!inherits(dc, "try-error") && !is.null(dc)) { + args <- as.list(dc)[-1] + has_fpc_arg <- !is.null(args$fpc) || !is.null(args$fpctype) + has_probs_arg <- !is.null(args$probs) + has_pps_arg <- !is.null(args$pps) + } + + if (has_multistage_probs || has_pps || has_fpc_arg || has_fpc_popsize || has_probs_arg || has_pps_arg) { + warning( + "Survey bootstrap injects replicate analysis weights into the design.\n ", + "This is valid when the estimator depends on weights (and optionally strata/cluster)\n ", + "but does not recompute stage-specific probabilities or FPC. This design appears\n ", + "to include PPS/multistage probabilities or FPC; if the estimator uses those\n ", + "fields directly, bootstrap variance may be incorrect.", + call. = FALSE + ) + return(invisible(TRUE)) + } + invisible(FALSE) +} + # Internal helper: apply over X using future.apply (if installed) or base::lapply # (sequential fallback). Progress is reported via progressr if installed. nmar_bootstrap_apply <- function(X, FUN, use_progress, future_globals = NULL, future_packages = NULL) { @@ -411,12 +448,25 @@ bootstrap_variance.survey.design <- function(data, estimator_func, point_estimat ) } + nmar_warn_survey_bootstrap_assumptions(data) + # Extract replicate analysis weights matrix (one column per replicate). repw <- weights(rep_design, type = "analysis") # Check replicate count (may differ from the requested number in stratified # designs). The variance formula is valid for the actual count produced. - J_actual <- ncol(repw) + J_actual <- if (is.null(dim(repw))) 1L else ncol(repw) + if (!is.finite(J_actual) || J_actual < 2L) { + stop( + sprintf( + "Bootstrap replicate design produced %d replicate(s). Variance estimation requires at least 2 replicates.\n ", + J_actual + ), + "This can happen with very small strata or restrictive design settings. ", + "Consider increasing per-stratum sample size or adjusting bootstrap settings.", + call. = FALSE + ) + } if (J_actual != bootstrap_reps) { warning(sprintf( paste0( @@ -478,7 +528,7 @@ bootstrap_variance.survey.design <- function(data, estimator_func, point_estimat use_progress <- requireNamespace("progressr", quietly = TRUE) - J <- seq_len(ncol(repw)) + J <- seq_len(J_actual) lst <- nmar_bootstrap_apply( X = J, diff --git a/tests/testthat/test-bootstrap-advanced.R b/tests/testthat/test-bootstrap-advanced.R index d82f5b0..5ae083d 100644 --- a/tests/testthat/test-bootstrap-advanced.R +++ b/tests/testthat/test-bootstrap-advanced.R @@ -1,3 +1,11 @@ +suppress_bootstrap_assumption_warning <- function(expr) { + withCallingHandlers(expr, warning = function(w) { + if (grepl("injects replicate analysis weights", conditionMessage(w), fixed = TRUE)) { + invokeRestart("muffleWarning") + } + }) +} + test_that("svyrep.design is rejected with clear error", { skip_if_not_installed("survey") skip_if_not_installed("svrep") @@ -64,6 +72,46 @@ test_that("replicate count mismatch warns but proceeds", { expect_true(length(result$replicates) >= 2) }) +test_that("survey bootstrap errors when replicate design has too few replicates", { + skip_if_not_installed("survey") + skip_if_not_installed("svrep") + + df <- data.frame(y = 1:4, w = 1) + design <- survey::svydesign(ids = ~1, data = df, weights = ~w) + + estimator <- function(data, ...) { + est <- survey::svymean(~y, data) + list(y_hat = as.numeric(est), converged = TRUE) + } + + testthat::local_mocked_bindings( + as_bootstrap_design = function(design, replicates, ...) { + repw <- matrix(stats::weights(design), ncol = 1) + survey::svrepdesign( + variables = design$variables, + repweights = repw, + weights = stats::weights(design), + type = "bootstrap", + scale = 1, + rscales = 1, + combined.weights = FALSE, + mse = FALSE + ) + }, + .package = "svrep" + ) + + expect_error( + bootstrap_variance( + design, estimator, + point_estimate = mean(df$y), + bootstrap_reps = 10 + ), + "at least 2 replicates", + fixed = TRUE + ) +}) + test_that("survey NA policy 'strict' shows detailed error", { skip_if_not_installed("survey") skip_if_not_installed("svrep") @@ -90,11 +138,13 @@ test_that("survey NA policy 'strict' shows detailed error", { future::plan(future::sequential) err <- tryCatch( - bootstrap_variance( - dstrat, estimator, - point_estimate = mean(apistrat$api00), - bootstrap_reps = 10, - survey_na_policy = "strict" + suppress_bootstrap_assumption_warning( + bootstrap_variance( + dstrat, estimator, + point_estimate = mean(apistrat$api00), + bootstrap_reps = 10, + survey_na_policy = "strict" + ) ), error = function(e) e$message ) @@ -131,11 +181,13 @@ test_that("survey NA policy 'omit' handles failures correctly", { future::plan(future::sequential) expect_warning( - res <- bootstrap_variance( - dstrat, estimator, - point_estimate = mean(apistrat$api00), - bootstrap_reps = 15, - survey_na_policy = "omit" + res <- suppress_bootstrap_assumption_warning( + bootstrap_variance( + dstrat, estimator, + point_estimate = mean(apistrat$api00), + bootstrap_reps = 15, + survey_na_policy = "omit" + ) ), "3/15.*failed.*omitted" ) @@ -166,11 +218,13 @@ test_that("survey NA policy 'omit' requires at least 2 successes", { future::plan(future::sequential) expect_error( - bootstrap_variance( - dstrat, estimator, - point_estimate = mean(apistrat$api00), - bootstrap_reps = 10, - survey_na_policy = "omit" + suppress_bootstrap_assumption_warning( + bootstrap_variance( + dstrat, estimator, + point_estimate = mean(apistrat$api00), + bootstrap_reps = 10, + survey_na_policy = "omit" + ) ), "Too few successful" ) @@ -204,12 +258,14 @@ test_that("survey NA policy 'omit' shows failure pattern", { warn_msg <- NULL res <- tryCatch( withCallingHandlers( - bootstrap_variance( - dstrat, estimator, - point_estimate = mean(apistrat$api00), - bootstrap_reps = 20, - survey_na_policy = "omit" - ), + suppress_bootstrap_assumption_warning( + bootstrap_variance( + dstrat, estimator, + point_estimate = mean(apistrat$api00), + bootstrap_reps = 20, + survey_na_policy = "omit" + ) + ), warning = function(w) { warn_msg <<- conditionMessage(w) invokeRestart("muffleWarning") @@ -247,10 +303,12 @@ test_that("mathematical correctness: variance has correct properties", { future::plan(future::sequential) set.seed(424242) - res <- bootstrap_variance( - dstrat, estimator, - point_estimate = mean(apistrat$api00), - bootstrap_reps = 30 + res <- suppress_bootstrap_assumption_warning( + bootstrap_variance( + dstrat, estimator, + point_estimate = mean(apistrat$api00), + bootstrap_reps = 30 + ) ) # Property 1: Variance must be non-negative @@ -316,10 +374,12 @@ test_that("boundary cases: minimum replicates and edge conditions", { future::plan(future::sequential) # Test 1: bootstrap_reps = 2 (minimum for variance calculation) - res_min <- bootstrap_variance( - dstrat, estimator, - point_estimate = mean(apistrat$api00), - bootstrap_reps = 2 + res_min <- suppress_bootstrap_assumption_warning( + bootstrap_variance( + dstrat, estimator, + point_estimate = mean(apistrat$api00), + bootstrap_reps = 2 + ) ) expect_equal(length(res_min$replicates), 2) expect_true(is.finite(res_min$variance)) @@ -328,10 +388,12 @@ test_that("boundary cases: minimum replicates and edge conditions", { # Test 2: bootstrap_reps = 1 should still work but variance may be problematic # (svrVar should handle this, but it's an edge case) res_one <- tryCatch( - bootstrap_variance( - dstrat, estimator, - point_estimate = mean(apistrat$api00), - bootstrap_reps = 1 + suppress_bootstrap_assumption_warning( + bootstrap_variance( + dstrat, estimator, + point_estimate = mean(apistrat$api00), + bootstrap_reps = 1 + ) ), error = function(e) list(error = TRUE, message = e$message) ) @@ -426,11 +488,13 @@ test_that("omit policy correctly subsets rscales for mathematical correctness", set.seed(999) expect_warning( - res <- bootstrap_variance( - dstrat, estimator, - point_estimate = mean(apistrat$api00), - bootstrap_reps = 20, - survey_na_policy = "omit" + res <- suppress_bootstrap_assumption_warning( + bootstrap_variance( + dstrat, estimator, + point_estimate = mean(apistrat$api00), + bootstrap_reps = 20, + survey_na_policy = "omit" + ) ), "5/20.*failed.*omitted" ) @@ -470,11 +534,13 @@ test_that("omit policy correctly subsets rscales for mathematical correctness", set.seed(888) expect_warning( - res_random <- bootstrap_variance( - dstrat, estimator_random, - point_estimate = mean(apistrat$api00), - bootstrap_reps = 30, - survey_na_policy = "omit" + res_random <- suppress_bootstrap_assumption_warning( + bootstrap_variance( + dstrat, estimator_random, + point_estimate = mean(apistrat$api00), + bootstrap_reps = 30, + survey_na_policy = "omit" + ) ), "12/30.*failed" ) @@ -513,10 +579,12 @@ test_that("survey NA policy default is 'strict'", { # Without specifying survey_na_policy, should default to strict and error err <- tryCatch( - bootstrap_variance( - dstrat, estimator, - point_estimate = mean(apistrat$api00), - bootstrap_reps = 10 + suppress_bootstrap_assumption_warning( + bootstrap_variance( + dstrat, estimator, + point_estimate = mean(apistrat$api00), + bootstrap_reps = 10 + ) ), error = function(e) e$message ) @@ -524,3 +592,33 @@ test_that("survey NA policy default is 'strict'", { expect_match(err, "strict") expect_match(err, "1/10.*failed") }) + +test_that("survey bootstrap warns for multistage/FPC designs", { + skip_if_not_installed("survey") + skip_if_not_installed("svrep") + + data(mu284, package = "survey") + multistage_design <- survey::svydesign( + data = mu284, + ids = ~ id1 + id2, + fpc = ~ n1 + n2 + ) + + estimator <- function(data, ...) { + est <- survey::svymean(~y1, data) + list(y_hat = as.numeric(est), converged = TRUE) + } + + future::plan(future::sequential) + + expect_warning( + bootstrap_variance( + multistage_design, + estimator, + point_estimate = mean(mu284$y1), + bootstrap_reps = 10 + ), + "injects replicate analysis weights", + fixed = TRUE + ) +}) diff --git a/tests/testthat/test-bootstrap.R b/tests/testthat/test-bootstrap.R index 24f9d2b..db14aec 100644 --- a/tests/testthat/test-bootstrap.R +++ b/tests/testthat/test-bootstrap.R @@ -20,6 +20,14 @@ bootstrap_dummy_estimator <- function(data, on_failure = "return") { class = c("bootstrap_dummy_result", "nmar_result") ) } + +suppress_bootstrap_assumption_warning <- function(expr) { + withCallingHandlers(expr, warning = function(w) { + if (grepl("injects replicate analysis weights", conditionMessage(w), fixed = TRUE)) { + invokeRestart("muffleWarning") + } + }) +} test_that("bootstrap_variance handles iid data", { set.seed(123) df <- data.frame(y = c(1, 2, 3, 4)) @@ -341,17 +349,21 @@ test_that("Survey bootstrap is reproducible across backends", { # Sequential baseline future::plan(future::sequential) set.seed(999) - res_seq1 <- bootstrap_variance( - dstrat, estimator, point_estimate = mean(apistrat$api00), - bootstrap_reps = 50 + res_seq1 <- suppress_bootstrap_assumption_warning( + bootstrap_variance( + dstrat, estimator, point_estimate = mean(apistrat$api00), + bootstrap_reps = 50 + ) ) # Sequential with same seed should be identical future::plan(future::sequential) set.seed(999) - res_seq2 <- bootstrap_variance( - dstrat, estimator, point_estimate = mean(apistrat$api00), - bootstrap_reps = 50 + res_seq2 <- suppress_bootstrap_assumption_warning( + bootstrap_variance( + dstrat, estimator, point_estimate = mean(apistrat$api00), + bootstrap_reps = 50 + ) ) future::plan(future::sequential)