From 7eec8b4dcc46de9ce6dd961b1061bbaa8baac814 Mon Sep 17 00:00:00 2001 From: jrwishart Date: Mon, 17 Nov 2025 12:16:53 +1100 Subject: [PATCH 1/2] RS-20388: Handle missing data properly in survey logistic regression models --- R/variables.R | 6 ++++-- tests/testthat/test-dataproblems.R | 23 +++++++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/R/variables.R b/R/variables.R index c154cc9..06ea8c9 100644 --- a/R/variables.R +++ b/R/variables.R @@ -216,7 +216,9 @@ Probabilities.Regression <- function(object, newdata = NULL, ...) StopForUserError(sQuote("Probabilities"), " is not applicable to linear regression models.") if (isTRUE(object$stacked) && IsRServer()) StopForUserError("Saving probabilitiles is currently not supported for stacked data.") - newdata <- ValidateNewData(object, newdata) + na.action <- if ("na.action" %in% ...names()) list(...)[["na.action"]] else na.pass + newdata <- ValidateNewData(object, newdata) |> + structure(na.action = na.action) # Ensure NA rows are preserved, survey models may drop them otherwise if (object$type %in% c("Ordered Logit", "Multinomial Logit")) { probs <- suppressWarnings(predict(object$original, newdata = newdata, @@ -230,7 +232,7 @@ Probabilities.Regression <- function(object, newdata = NULL, ...) if (object$type == "Binary Logit") { - probs <- suppressWarnings(predict(object$original, newdata = newdata, na.action = na.pass, type = "response")) + probs <- suppressWarnings(predict(object$original, newdata = newdata, na.action = na.action, type = "response")) outcome.levels <- levels(Observed(object)) if (length(outcome.levels) == 1L) { diff --git a/tests/testthat/test-dataproblems.R b/tests/testthat/test-dataproblems.R index fa70be6..aae7716 100644 --- a/tests/testthat/test-dataproblems.R +++ b/tests/testthat/test-dataproblems.R @@ -816,3 +816,26 @@ test_that("Removing missing entirely missing variables", { ) ) }) + +test_that("RS-20388: Survey weighted models preserve number of respondents in predictions", { + some.weighted.data.for.logistic <- data.frame( + y = rbinom(100, size = 1, prob = 0.5), + x1 = rnorm(100), + weights = runif(100, min = 0.5, max = 2) + ) + # Set some rows to missing + is.na(some.weighted.data.for.logistic$x1) <- sample(1:100, size = 10) + model <- Regression( + y ~ x1, data = some.weighted.data.for.logistic, + type = "Binary Logit", + weights = some.weighted.data.for.logistic$weights, + missing = "Exclude cases with missing data" + ) + probabilities <- Probabilities(model) + probabilities |> expect_type("double") + probabilities |> nrow() |> expect_equal(100L) + + probabilities.with.missing <- Probabilities(model, na.action = na.omit) + probabilities.with.missing |> expect_type("double") + probabilities.with.missing |> nrow() |> expect_equal(90L) +}) From acdbae319e3f6c9d09d179fb96a5553658c02940 Mon Sep 17 00:00:00 2001 From: jrwishart Date: Mon, 17 Nov 2025 13:28:13 +1100 Subject: [PATCH 2/2] Version bump --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4c971dc..ada824b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: flipRegression Type: Package Title: Estimates standard regression models -Version: 1.3.67 +Version: 1.3.68 Author: Displayr Maintainer: Displayr Description: Regression models according to the flip Project