diff --git a/R/Normalization.R b/R/Normalization.R index a86ab7b..45cace8 100644 --- a/R/Normalization.R +++ b/R/Normalization.R @@ -159,7 +159,7 @@ tidyCovariateData <- function(covariateData, inner_join(covariateData$valueCounts, by = "covariateId") %>% select(.data$analysisId, .data$covariateId, n) %>% collect() - valueCounts <- valueCounts[order(valueCounts$analysisId, -valueCounts$n), ] + valueCounts <- valueCounts[order(valueCounts$analysisId, -valueCounts$n, valueCounts$covariateId), ] deleteCovariateIds <- c(deleteCovariateIds, valueCounts$covariateId[!duplicated(valueCounts$analysisId)]) ignoreCovariateIds <- valueCounts$covariateId ParallelLogger::logInfo("Removing ", length(deleteCovariateIds), " redundant covariates") diff --git a/tests/testthat/test-tidyCovariates.R b/tests/testthat/test-tidyCovariates.R index 59637f6..1fe363b 100644 --- a/tests/testthat/test-tidyCovariates.R +++ b/tests/testthat/test-tidyCovariates.R @@ -93,3 +93,33 @@ test_that("tidyCovariateData on Temporal Data", { tidy <- tidyCovariateData(covariateData) expect_equal(length(pull(tidy$analysisRef, analysisId)), length(pull(covariateData$analysisRef, analysisId))) }) + +test_that("Removal of redundant covariates is reproducible", { + # Create data with two covariates from the same analysis, with equal prevalence: + covariates <- tibble( + covariateId = rep(c(1,2), 5), + rowId = seq_len(10), + covariateValue = 1 + ) + covariateRef <- tibble( + covariateId = c(1,2), + analysisId = c(1, 1) + ) + metaData <- list(populationSize = 10) + covariateData <- Andromeda::andromeda( + covariates = covariates, + covariateRef = covariateRef + ) + attr(covariateData, "metaData") <- metaData + class(covariateData) <- "CovariateData" + + # Repeat removal of redundant covariates multiple times to evaluate consistency: + covariateIds <- c() + for (i in seq_len(10)) { + tidy <- tidyCovariateData(covariateData, minFraction = 0, normalize = FALSE, removeRedundancy = TRUE) + covariateIds[i] <- tidy$covariates |> + distinct(covariateId) |> + pull() + } + expect_equal(length(unique(covariateIds)), 1) +})