From 0485e12a50bad945ed8e1839dac548feb531649b Mon Sep 17 00:00:00 2001 From: James Eapen Date: Wed, 22 Oct 2025 09:54:55 -0400 Subject: [PATCH 01/29] refactor(getArrayABsignal): extract preprocessArrays to own file --- R/getArrayABsignal.R | 56 -------------------------------------------- R/preprocessArrays.R | 54 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 56 deletions(-) create mode 100644 R/preprocessArrays.R diff --git a/R/getArrayABsignal.R b/R/getArrayABsignal.R index a3eb8670..bdcc7b72 100644 --- a/R/getArrayABsignal.R +++ b/R/getArrayABsignal.R @@ -153,62 +153,6 @@ getArrayABsignal <- function( RaggedExperiment(array.compartments, colData = colData(obj)) } -#' Preprocess arrays for compartment inference -#' -#' @name preprocessArrays -#' -#' @param obj Input SummarizedExperiment -#' @param genome What genome are we working on ("hg19", "hg38", "mm9", "mm10") -#' @param other Another arbitrary genome to compute compartments on -#' @param array.type What type of array is this ("hm450", "EPIC") -#' -#' @return A preprocessed SummarizedExperiment to compute compartments -#' @import SummarizedExperiment -#' -#' @examples -#' if (requireNamespace("minfiData", quietly = TRUE)) { -#' grSet <- minfi::preprocessNoob(minfiData::RGsetEx.sub) |> -#' minfi::ratioConvert() |> -#' minfi::mapToGenome() -#' preprocessArrays(grSet) -#' } -#' -#' @export -preprocessArrays <- function( - obj, - genome = c("hg19", "hg38", "mm9", "mm10"), - other = NULL, - array.type = c("hm450", "EPIC") -) { - if (!requireNamespace("minfi", quietly = TRUE)) { - stop("The minfi package must be installed for this functionality") - } - - # what genome do we have - genome <- match.arg(genome) - - # subset the array to open sea CpGs - obj.opensea <- filterOpenSea(obj, genome = genome, other = other) - verifyAssayNames(obj.opensea, assay = "array") - - # convert to M-values if beta values given - # this should be default but allows handling if given M-values in Beta slot - is.beta <- min(assays(obj)$Beta, na.rm = TRUE) > 0 - if (is.beta) { - message("Converting to squeezed M-values.") - assays(obj.opensea)$Beta <- flogit(assays(obj.opensea)$Beta) - } - - # impute missing values if possible - if (any(is.na(minfi::getBeta(obj.opensea)))) { - message("Imputing missing values.") - obj.opensea <- imputeKNN(obj.opensea, assay = "array") - } - - obj.opensea -} - - # this is the main analysis function for computing compartments from arrays .arrayCompartments <- function( obj, diff --git a/R/preprocessArrays.R b/R/preprocessArrays.R new file mode 100644 index 00000000..4e24d220 --- /dev/null +++ b/R/preprocessArrays.R @@ -0,0 +1,54 @@ +#' Preprocess arrays for compartment inference +#' +#' @name preprocessArrays +#' +#' @param obj Input SummarizedExperiment +#' @param genome What genome are we working on ("hg19", "hg38", "mm9", "mm10") +#' @param other Another arbitrary genome to compute compartments on +#' @param array.type What type of array is this ("hm450", "EPIC") +#' +#' @return A preprocessed SummarizedExperiment to compute compartments +#' @import SummarizedExperiment +#' +#' @examples +#' if (requireNamespace("minfiData", quietly = TRUE)) { +#' grSet <- minfi::preprocessNoob(minfiData::RGsetEx.sub) |> +#' minfi::ratioConvert() |> +#' minfi::mapToGenome() +#' preprocessArrays(grSet) +#' } +#' +#' @export +preprocessArrays <- function( + obj, + genome = c("hg19", "hg38", "mm9", "mm10"), + other = NULL, + array.type = c("hm450", "EPIC") +) { + if (!requireNamespace("minfi", quietly = TRUE)) { + stop("The minfi package must be installed for this functionality") + } + + # what genome do we have + genome <- match.arg(genome) + + # subset the array to open sea CpGs + obj.opensea <- filterOpenSea(obj, genome = genome, other = other) + verifyAssayNames(obj.opensea, assay = "array") + + # convert to M-values if beta values given + # this should be default but allows handling if given M-values in Beta slot + is.beta <- min(assays(obj)$Beta, na.rm = TRUE) > 0 + if (is.beta) { + message("Converting to squeezed M-values.") + assays(obj.opensea)$Beta <- flogit(assays(obj.opensea)$Beta) + } + + # impute missing values if possible + if (any(is.na(minfi::getBeta(obj.opensea)))) { + message("Imputing missing values.") + obj.opensea <- imputeKNN(obj.opensea, assay = "array") + } + + obj.opensea +} From e6d4189c82144cb152d6a60a07d3190a8469b8cb Mon Sep 17 00:00:00 2001 From: James Eapen Date: Wed, 22 Oct 2025 09:55:20 -0400 Subject: [PATCH 02/29] refactor(getCompartments): extract common compartment worker --- R/getATACABsignal.R | 169 +++------------------------------------- R/getArrayABsignal.R | 165 +++------------------------------------ R/getCompartments.R | 180 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 200 insertions(+), 314 deletions(-) create mode 100644 R/getCompartments.R diff --git a/R/getATACABsignal.R b/R/getATACABsignal.R index ec6c19e1..954b2835 100644 --- a/R/getATACABsignal.R +++ b/R/getATACABsignal.R @@ -53,169 +53,20 @@ getATACABsignal <- function( boot.parallel = FALSE, boot.cores = 2 ) { - chr <- chr %||% - { - message("Processing all chromosomes") - getChrs(obj) - } - - if (is.null(colnames(obj))) stop("colnames needs to be sample names.") - columns <- colnames(obj) - names(columns) <- columns - - prior.means <- getGlobalMeans(obj = obj, targets = targets, assay = "atac") - - if (bootstrap) { - message("Pre-computing the bootstrap global means.") - bmeans <- precomputeBootstrapMeans( - obj = obj, - targets = targets, - num.bootstraps = num.bootstraps, - assay = "atac", - parallel = parallel, - num.cores = cores - ) - } - - if (group) { - atac.compartments.list <- mclapply(chr, function(c) { - atacCompartments( - obj, - obj, - res = res, - chr = c, - targets = targets, - genome = genome, - bootstrap = bootstrap, - num.bootstraps = num.bootstraps, - prior.means = prior.means, - parallel = boot.parallel, - cores = boot.cores, - group = group, - bootstrap.means = bmeans - ) - }, mc.cores = ifelse(parallel, cores, 1)) - - atac.compartments <- sort(unlist(as(atac.compartments.list, "GRangesList"))) - return(atac.compartments) - } - - atac.compartments <- mclapply(columns, function(s) { - obj.sub <- obj[, s] - - message("Working on ", s) - atac.compartments.list <- lapply(chr, function(c) { - atacCompartments( - obj.sub, - obj, - res = res, - chr = c, - targets = targets, - genome = genome, - bootstrap = bootstrap, - prior.means = prior.means, - num.bootstraps = num.bootstraps, - parallel = boot.parallel, - cores = boot.cores, - group = group, - bootstrap.means = bmeans - ) - }) - - sort(unlist(as(atac.compartments.list, "GRangesList"))) - }, mc.cores = ifelse(parallel, cores, 1)) - - atac.compartments <- as(atac.compartments, "CompressedGRangesList") - RaggedExperiment(atac.compartments, colData = colData(obj)) -} - -# worker function -# this is the main analysis function for computing compartments from atacs -atacCompartments <- function( - obj, - original.obj, - res = 1e6, - chr = NULL, - targets = NULL, - genome = c("hg19", "hg38", "mm9", "mm10"), - prior.means = NULL, - bootstrap = TRUE, - num.bootstraps = 1000, - parallel = FALSE, - cores = 2, - group = FALSE, - bootstrap.means = NULL -) { - genome <- match.arg(genome) - - if (parallel) options(mc.cores = cores) - - # update - message("Computing compartments for ", chr) - obj <- keepSeqlevels(obj, chr, pruning.mode = "coarse") - original.obj <- keepSeqlevels(original.obj, chr, pruning.mode = "coarse") - - # take care of the global means - if (!is.null(prior.means)) { - # this assumes that we've alread computed the global means - pmeans <- as(prior.means, "GRanges") - pmeans <- keepSeqlevels(pmeans, chr, pruning.mode = "coarse") - # go back to a matrix - prior.means <- as(pmeans, "matrix") - colnames(prior.means) <- "globalMean" - } - - obj.bins <- shrinkBins( - obj, - original.obj, - prior.means = prior.means, - chr = chr, - res = res, - targets = targets, - assay = "atac", - genome = genome, - jse = TRUE - ) - - obj.cor <- getCorMatrix(obj.bins, squeeze = !group) - - if (any(is.na(obj.cor$binmat.cor))) { - obj.cor$gr$pc <- matrix(rep(NA, nrow(obj.cor$binmat.cor))) - obj.svd <- obj.cor$gr - } else { - # compute SVD of correlation matrix - obj.svd <- getABSignal(obj.cor, assay = "atac") - } - - if (!bootstrap) { - return(obj.svd) - } - - # bootstrap the estimates - # always compute confidence intervals too - # take care of the global means - # this assumes that we've alread computed the global means - bmeans <- as(bootstrap.means, "GRanges") - bmeans <- keepSeqlevels(bmeans, chr, pruning.mode = "coarse") - # go back to a matrix - bmeans <- as(bmeans, "matrix") - colnames(bmeans) <- rep("globalMean", ncol(bmeans)) - - bootstrapCompartments( - obj, - original.obj, - bootstrap.samples = num.bootstraps, - chr = chr, + getCompartments( + obj = obj, assay = "atac", + res = res, parallel = parallel, - cores = cores, + chr = chr, targets = targets, - res = res, + cores = cores, + bootstrap = bootstrap, + num.bootstraps = num.bootstraps, + boot.parallel = boot.parallel, + boot.cores = boot.cores, genome = genome, - q = 0.95, - svd = obj.svd, - group = group, - bootstrap.means = bmeans + group = group ) } diff --git a/R/getArrayABsignal.R b/R/getArrayABsignal.R index bdcc7b72..24a286c1 100644 --- a/R/getArrayABsignal.R +++ b/R/getArrayABsignal.R @@ -79,164 +79,19 @@ getArrayABsignal <- function( assays(obj)$Beta <- fexpit(assays(obj)$Beta) } - # gather the chromosomes we are working on - if (is.null(chr)) { - message("Assuming we want to process all chromosomes.") - # get what chromosomes we want - chr <- getChrs(obj) - } - - # get the column names - if (is.null(colnames(obj))) stop("colnames needs to be sample names.") - columns <- colnames(obj) - names(columns) <- columns - - # precompute global means - prior.means <- getGlobalMeans(obj = obj, targets = targets, assay = "array") - - if (bootstrap) { - message("Pre-computing the bootstrap global means.") - bmeans <- precomputeBootstrapMeans( - obj = obj, - targets = targets, - num.bootstraps = num.bootstraps, - assay = "array", - parallel = parallel, - num.cores = cores - ) - } - - if (group) { - array.compartments.list <- mclapply(chr, function(c) { - .arrayCompartments( - obj, obj, - res = res, - chr = c, - targets = targets, - genome = genome, - bootstrap = bootstrap, - num.bootstraps = num.bootstraps, - prior.means = prior.means, - parallel = boot.parallel, - cores = boot.cores, - group = group, - bootstrap.means = bmeans - ) - }, mc.cores = cores) - array.compartments <- sort(unlist(as(array.compartments.list, "GRangesList"))) - return(array.compartments) - } - - array.compartments <- mclapply(columns, function(s) { - obj.sub <- obj[, s] - message("Working on ", s) - array.compartments.list <- lapply(chr, function(c) { - .arrayCompartments( - obj.sub, obj, - res = res, - chr = c, - targets = targets, - genome = genome, - bootstrap = bootstrap, - prior.means = prior.means, - num.bootstraps = num.bootstraps, - parallel = boot.parallel, - cores = boot.cores, - group = group, - bootstrap.means = bmeans - ) - }) - sort(unlist(as(array.compartments.list, "GRangesList"))) - }, mc.cores = ifelse(parallel, cores, 1), mc.preschedule = F) - - array.compartments <- as(array.compartments, "CompressedGRangesList") - RaggedExperiment(array.compartments, colData = colData(obj)) -} - -# this is the main analysis function for computing compartments from arrays -.arrayCompartments <- function( - obj, - original.obj, - res = 1e6, - chr = NULL, - targets = NULL, - genome = c("hg19", "hg38", "mm9", "mm10"), - prior.means = NULL, - bootstrap = TRUE, - num.bootstraps = 1000, - parallel = FALSE, - cores = 2, - group = FALSE, - bootstrap.means = NULL -) { - genome <- match.arg(genome) - if (parallel) options(mc.cores = cores) - - # update - message("Computing compartments for ", chr) - obj <- keepSeqlevels(obj, chr, pruning.mode = "coarse") - original.obj <- keepSeqlevels(original.obj, chr, pruning.mode = "coarse") - - # take care of the global means - if (!is.null(prior.means)) { - # this assumes that we've alread computed the global means - pmeans <- as(prior.means, "GRanges") - pmeans <- keepSeqlevels(pmeans, chr, pruning.mode = "coarse") - # go back to a matrix - prior.means <- as(pmeans, "matrix") - colnames(prior.means) <- "globalMean" - } - - obj.bins <- shrinkBins( - obj, - original.obj, - prior.means = prior.means, - chr = chr, - res = res, - targets = targets, - assay = "array", - genome = genome, - jse = TRUE - ) - - obj.cor <- getCorMatrix(obj.bins, squeeze = !group) - - if (any(is.na(obj.cor$binmat.cor))) { - obj.cor$gr$pc <- matrix(rep(NA, nrow(obj.cor$binmat.cor))) - obj.svd <- obj.cor$gr - } else { - # compute SVD of correlation matrix - obj.svd <- getABSignal(obj.cor, assay = "array") - } - - if (!bootstrap) { - return(obj.svd) - } - - # bootstrap the estimates - # always compute confidence intervals too - # take care of the global means - # this assumes that we've alread computed the global means - bmeans <- as(bootstrap.means, "GRanges") - bmeans <- keepSeqlevels(bmeans, chr, pruning.mode = "coarse") - # go back to a matrix - bmeans <- as(bmeans, "matrix") - colnames(bmeans) <- rep("globalMean", ncol(bmeans)) - - bootstrapCompartments( - obj, - original.obj, - bootstrap.samples = num.bootstraps, - chr = chr, + getCompartments( + obj = obj, assay = "array", + res = res, parallel = parallel, - cores = cores, + chr = chr, targets = targets, - res = res, + cores = cores, + bootstrap = bootstrap, + num.bootstraps = num.bootstraps, + boot.parallel = boot.parallel, + boot.cores = boot.cores, genome = genome, - q = 0.95, - svd = obj.svd, - group = group, - bootstrap.means = bmeans + group = group ) } diff --git a/R/getCompartments.R b/R/getCompartments.R new file mode 100644 index 00000000..ad8178d6 --- /dev/null +++ b/R/getCompartments.R @@ -0,0 +1,180 @@ +getCompartments <- function( + obj, + assay, + res, + parallel, + chr, + targets, + cores, + bootstrap, + num.bootstraps, + boot.parallel, + boot.cores, + genome, + group +) { + if (is.null(chr)) { + message("Assuming we want to process all chromosomes.") + chr <- getChrs(obj) + } + + if (is.null(colnames(obj))) stop("colnames needs to be sample names.") + columns <- colnames(obj) + names(columns) <- columns + + prior.means <- getGlobalMeans(obj = obj, targets = targets, assay = assay) + + if (bootstrap) { + message("Pre-computing the bootstrap global means.") + bmeans <- precomputeBootstrapMeans( + obj = obj, + targets = targets, + num.bootstraps = num.bootstraps, + assay = assay, + parallel = parallel, + num.cores = cores + ) + } + + if (group) { + compartments.list <- mclapply(chr, function(c) { + .getCompartments( + obj, + obj, + assay = assay, + res = res, + chr = c, + targets = targets, + genome = genome, + bootstrap = bootstrap, + num.bootstraps = num.bootstraps, + prior.means = prior.means, + parallel = boot.parallel, + cores = boot.cores, + group = group, + bootstrap.means = bmeans + ) + }, mc.cores = ifelse(parallel, cores, 1)) + + compartments <- sort(unlist(as(compartments.list, "GRangesList"))) + return(compartments) + } + + compartments <- mclapply(columns, function(s) { + obj.sub <- obj[, s] + + message("Working on ", s) + compartments.list <- lapply(chr, function(c) { + .getCompartments( + obj.sub, + obj, + assay = assay, + res = res, + chr = c, + targets = targets, + genome = genome, + bootstrap = bootstrap, + prior.means = prior.means, + num.bootstraps = num.bootstraps, + parallel = boot.parallel, + cores = boot.cores, + group = group, + bootstrap.means = bmeans + ) + }) + sort(unlist(as(compartments.list, "GRangesList"))) + }, mc.cores = ifelse(parallel, cores, 1), mc.preschedule = F) + + compartments <- as(compartments, "CompressedGRangesList") + RaggedExperiment(compartments, colData = colData(obj)) +} + +# this is the main analysis function for computing compartments +.getCompartments <- function( + obj, + original.obj, + assay, + res = 1e6, + chr = NULL, + targets = NULL, + genome = c("hg19", "hg38", "mm9", "mm10"), + prior.means = NULL, + bootstrap = TRUE, + num.bootstraps = 1000, + parallel = FALSE, + cores = 2, + group = FALSE, + bootstrap.means = NULL +) { + genome <- match.arg(genome) + + if (parallel) options(mc.cores = cores) + + # update + message("Computing compartments for ", chr) + obj <- keepSeqlevels(obj, chr, pruning.mode = "coarse") + original.obj <- keepSeqlevels(original.obj, chr, pruning.mode = "coarse") + + # take care of the global means + if (!is.null(prior.means)) { + # this assumes that we've alread computed the global means + pmeans <- as(prior.means, "GRanges") + pmeans <- keepSeqlevels(pmeans, chr, pruning.mode = "coarse") + # go back to a matrix + prior.means <- as(pmeans, "matrix") + colnames(prior.means) <- "globalMean" + } + + obj.bins <- shrinkBins( + obj, + original.obj, + prior.means = prior.means, + chr = chr, + res = res, + targets = targets, + assay = assay, + genome = genome, + jse = TRUE + ) + + obj.cor <- getCorMatrix(obj.bins, squeeze = !group) + + if (any(is.na(obj.cor$binmat.cor))) { + obj.cor$gr$pc <- matrix(rep(NA, nrow(obj.cor$binmat.cor))) + obj.svd <- obj.cor$gr + } else { + # compute SVD of correlation matrix + obj.svd <- getABSignal(obj.cor, assay = assay) + } + + if (!bootstrap) { + return(obj.svd) + } + + # bootstrap the estimates + # always compute confidence intervals too + # take care of the global means + # this assumes that we've alread computed the global means + bmeans <- as(bootstrap.means, "GRanges") + bmeans <- keepSeqlevels(bmeans, chr, pruning.mode = "coarse") + # go back to a matrix + bmeans <- as(bmeans, "matrix") + colnames(bmeans) <- rep("globalMean", ncol(bmeans)) + + bootstrapCompartments( + obj, + original.obj, + bootstrap.samples = num.bootstraps, + chr = chr, + assay = assay, + parallel = parallel, + cores = cores, + targets = targets, + res = res, + genome = genome, + q = 0.95, + svd = obj.svd, + group = group, + bootstrap.means = bmeans + ) +} From bef8044f247f8421ef25ddb2c2f886f7ad044ba6 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 23 Oct 2025 12:27:14 -0400 Subject: [PATCH 03/29] refactor(scCompartments): directly call getCompartments without getATACABsignal --- NAMESPACE | 1 - R/getATACABsignal.R | 75 ---------------------------------- R/scCompartments.R | 7 ++-- man/getATACABsignal.Rd | 90 ----------------------------------------- man/preprocessArrays.Rd | 2 +- pkgdown/_pkgdown.yml | 1 - 6 files changed, 5 insertions(+), 171 deletions(-) delete mode 100644 R/getATACABsignal.R delete mode 100644 man/getATACABsignal.Rd diff --git a/NAMESPACE b/NAMESPACE index 777fa855..87c6c4d7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,7 +13,6 @@ export(fisherZ) export(fixCompartments) export(flogit) export(getABSignal) -export(getATACABsignal) export(getArrayABsignal) export(getAssayNames) export(getBinMatrix) diff --git a/R/getATACABsignal.R b/R/getATACABsignal.R deleted file mode 100644 index 954b2835..00000000 --- a/R/getATACABsignal.R +++ /dev/null @@ -1,75 +0,0 @@ -#' @title Estimate A/B compartments from ATAC-seq data -#' -#' @description -#' \code{getATACABsignal} returns estimated A/B compartments from ATAC-seq data. -#' -#' @param obj Input SummarizedExperiment object -#' @param res Compartment resolution in bp -#' @param parallel Whether to run samples in parallel -#' @param chr What chromosome to work on (leave as NULL to run on all chromosomes) -#' @param targets Samples/cells to shrink towards -#' @param cores How many cores to use when running samples in parallel -#' @param bootstrap Whether we should perform bootstrapping of inferred compartments -#' @param num.bootstraps How many bootstraps to run -#' @param genome What genome to work on ("hg19", "hg38", "mm9", "mm10") -#' @param other Another arbitrary genome to compute compartments on -#' @param group Whether to treat this as a group set of samples -#' @param boot.parallel Whether to run the bootstrapping in parallel -#' @param boot.cores How many cores to use for the bootstrapping -#' -#' @return A RaggedExperiment of inferred compartments -#' @import SummarizedExperiment -#' @import RaggedExperiment -#' @importFrom parallel mclapply -#' @importFrom methods as -#' @export -#' -#' @aliases getRNAABsignal -#' -#' @examples -#' if (requireNamespace("csaw", quietly = TRUE)) { -#' data("k562_scatac_chr14", package = "compartmap") -#' atac_compartments <- getATACABsignal( -#' k562_scatac_chr14, -#' parallel = FALSE, -#' chr = "chr14", -#' bootstrap = FALSE, -#' genome = "hg19", -#' group = TRUE -#' ) -#' } -getATACABsignal <- function( - obj, - res = 1e6, - parallel = FALSE, - chr = NULL, - targets = NULL, - cores = 2, - bootstrap = TRUE, - num.bootstraps = 100, - genome = c("hg19", "hg38", "mm9", "mm10"), - other = NULL, - group = FALSE, - boot.parallel = FALSE, - boot.cores = 2 -) { - getCompartments( - obj = obj, - assay = "atac", - res = res, - parallel = parallel, - chr = chr, - targets = targets, - cores = cores, - bootstrap = bootstrap, - num.bootstraps = num.bootstraps, - boot.parallel = boot.parallel, - boot.cores = boot.cores, - genome = genome, - group = group - ) -} - -#' @describeIn getATACABsignal Alias for getATACABsignal -#' -getRNAABsignal <- getATACABsignal diff --git a/R/scCompartments.R b/R/scCompartments.R index 050b24bb..9d9d196c 100644 --- a/R/scCompartments.R +++ b/R/scCompartments.R @@ -49,9 +49,9 @@ scCompartments <- function( if (!all(assay %in% c("atac", "rna"))) stop("Supported assays are 'atac', and 'rna'.") assay <- tolower(match.arg(assay)) verifyAssayNames(obj, assay = assay) - - sc_compartments <- getATACABsignal( + getCompartments( obj = obj, + assay = "atac", res = res, parallel = parallel, chr = chr, @@ -59,8 +59,9 @@ scCompartments <- function( cores = cores, bootstrap = bootstrap, num.bootstraps = num.bootstraps, + boot.parallel = boot.parallel, + boot.cores = boot.cores, genome = genome, group = group ) - return(sc_compartments) } diff --git a/man/getATACABsignal.Rd b/man/getATACABsignal.Rd deleted file mode 100644 index 3a9c419c..00000000 --- a/man/getATACABsignal.Rd +++ /dev/null @@ -1,90 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/getATACABsignal.R -\name{getATACABsignal} -\alias{getATACABsignal} -\alias{getRNAABsignal} -\title{Estimate A/B compartments from ATAC-seq data} -\usage{ -getATACABsignal( - obj, - res = 1000000, - parallel = FALSE, - chr = NULL, - targets = NULL, - cores = 2, - bootstrap = TRUE, - num.bootstraps = 100, - genome = c("hg19", "hg38", "mm9", "mm10"), - other = NULL, - group = FALSE, - boot.parallel = FALSE, - boot.cores = 2 -) - -getRNAABsignal( - obj, - res = 1000000, - parallel = FALSE, - chr = NULL, - targets = NULL, - cores = 2, - bootstrap = TRUE, - num.bootstraps = 100, - genome = c("hg19", "hg38", "mm9", "mm10"), - other = NULL, - group = FALSE, - boot.parallel = FALSE, - boot.cores = 2 -) -} -\arguments{ -\item{obj}{Input SummarizedExperiment object} - -\item{res}{Compartment resolution in bp} - -\item{parallel}{Whether to run samples in parallel} - -\item{chr}{What chromosome to work on (leave as NULL to run on all chromosomes)} - -\item{targets}{Samples/cells to shrink towards} - -\item{cores}{How many cores to use when running samples in parallel} - -\item{bootstrap}{Whether we should perform bootstrapping of inferred compartments} - -\item{num.bootstraps}{How many bootstraps to run} - -\item{genome}{What genome to work on ("hg19", "hg38", "mm9", "mm10")} - -\item{other}{Another arbitrary genome to compute compartments on} - -\item{group}{Whether to treat this as a group set of samples} - -\item{boot.parallel}{Whether to run the bootstrapping in parallel} - -\item{boot.cores}{How many cores to use for the bootstrapping} -} -\value{ -A RaggedExperiment of inferred compartments -} -\description{ -\code{getATACABsignal} returns estimated A/B compartments from ATAC-seq data. -} -\section{Functions}{ -\itemize{ -\item \code{getRNAABsignal()}: Alias for getATACABsignal - -}} -\examples{ -if (requireNamespace("csaw", quietly = TRUE)) { - data("k562_scatac_chr14", package = "compartmap") - atac_compartments <- getATACABsignal( - k562_scatac_chr14, - parallel = FALSE, - chr = "chr14", - bootstrap = FALSE, - genome = "hg19", - group = TRUE - ) -} -} diff --git a/man/preprocessArrays.Rd b/man/preprocessArrays.Rd index 972260f5..a040ad43 100644 --- a/man/preprocessArrays.Rd +++ b/man/preprocessArrays.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/getArrayABsignal.R +% Please edit documentation in R/preprocessArrays.R \name{preprocessArrays} \alias{preprocessArrays} \title{Preprocess arrays for compartment inference} diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index 62946a52..43db4c94 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -30,7 +30,6 @@ reference: - fixCompartments - extractOpenClosed - getABSignal - - getATACABsignal - getRNAABsignal - title: Correlation matrix contents: From 62e6e41bd532fdde099305799e5613eb8917ca24 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 23 Oct 2025 12:48:37 -0400 Subject: [PATCH 04/29] !fix(sc|arrayCompartments): add missing args and reorder for consistency --- R/getArrayABsignal.R | 20 ++++++++++---------- R/scCompartments.R | 14 +++++++++----- man/getArrayABsignal.Rd | 26 +++++++++++++------------- man/scCompartments.Rd | 14 ++++++++++---- 4 files changed, 42 insertions(+), 32 deletions(-) diff --git a/R/getArrayABsignal.R b/R/getArrayABsignal.R index 24a286c1..5353795f 100644 --- a/R/getArrayABsignal.R +++ b/R/getArrayABsignal.R @@ -5,19 +5,19 @@ #' #' @param obj Input SummarizedExperiment object #' @param res Compartment resolution in bp -#' @param parallel Whether to run samples in parallel #' @param chr What chromosome to work on (leave as NULL to run on all chromosomes) #' @param targets Samples/cells to shrink towards #' @param preprocess Whether to preprocess the arrays prior to compartment inference +#' @param parallel Whether to run samples in parallel #' @param cores How many cores to use when running samples in parallel #' @param bootstrap Whether we should perform bootstrapping of inferred compartments #' @param num.bootstraps How many bootstraps to run +#' @param boot.parallel Whether to run the bootstrapping in parallel +#' @param boot.cores How many cores to use for the bootstrapping #' @param genome What genome to work on ("hg19", "hg38", "mm9", "mm10") #' @param other Another arbitrary genome to compute compartments on -#' @param array.type What type of array is this ("hm450", "EPIC") #' @param group Whether to treat this as a group set of samples -#' @param boot.parallel Whether to run the bootstrapping in parallel -#' @param boot.cores How many cores to use for the bootstrapping +#' @param array.type What type of array is this ("hm450", "EPIC") #' #' @return A RaggedExperiment of inferred compartments #' @import SummarizedExperiment @@ -33,8 +33,8 @@ #' data("array_data_chr14", package = "compartmap") #' array_compartments <- getArrayABsignal( #' array.data.chr14, -#' parallel=FALSE, #' chr="chr14", +#' parallel=FALSE, #' bootstrap=FALSE, #' genome="hg19", #' array.type="hm450" @@ -43,19 +43,19 @@ getArrayABsignal <- function( obj, res = 1e6, - parallel = TRUE, chr = NULL, targets = NULL, preprocess = TRUE, + parallel = TRUE, cores = 2, bootstrap = TRUE, num.bootstraps = 1000, + boot.parallel = TRUE, + boot.cores = 2, genome = c("hg19", "hg38", "mm9", "mm10"), - other = NULL, - array.type = c("hm450", "EPIC"), group = FALSE, - boot.parallel = TRUE, - boot.cores = 2 + other = NULL, + array.type = c("hm450", "EPIC") ) { verifySE(obj) verifyCoords(obj) diff --git a/R/scCompartments.R b/R/scCompartments.R index 9d9d196c..606db7f6 100644 --- a/R/scCompartments.R +++ b/R/scCompartments.R @@ -5,12 +5,14 @@ #' #' @param obj Input SummarizedExperiment object #' @param res Compartment resolution in bp -#' @param parallel Whether to run samples in parallel #' @param chr What chromosome to work on (leave as NULL to run on all chromosomes) #' @param targets Samples/cells to shrink towards +#' @param parallel Whether to run samples in parallel #' @param cores How many cores to use when running samples in parallel #' @param bootstrap Whether we should perform bootstrapping of inferred compartments #' @param num.bootstraps How many bootstraps to run +#' @param boot.parallel Whether to run the bootstrapping in parallel +#' @param boot.cores How many cores to use for the bootstrapping #' @param genome What genome to work on ("hg19", "hg38", "mm9", "mm10") #' @param group Whether to treat this as a group set of samples #' @param assay What type of single-cell assay is the input data ("atac" or "rna") @@ -24,20 +26,22 @@ #' data("k562_scrna_chr14", package = "compartmap") #' sc_compartments <- scCompartments( #' k562_scrna_chr14, -#' parallel = FALSE, #' chr = "chr14", +#' parallel = FALSE, #' bootstrap = FALSE, #' genome = "hg19" #' ) scCompartments <- function( obj, res = 1e6, - parallel = FALSE, chr = NULL, targets = NULL, + parallel = FALSE, cores = 2, bootstrap = TRUE, num.bootstraps = 100, + boot.parallel = FALSE, + boot.cores = 2, genome = c("hg19", "hg38", "mm9", "mm10"), group = FALSE, assay = c("atac", "rna") @@ -51,11 +55,11 @@ scCompartments <- function( verifyAssayNames(obj, assay = assay) getCompartments( obj = obj, - assay = "atac", + assay = assay, res = res, - parallel = parallel, chr = chr, targets = targets, + parallel = parallel, cores = cores, bootstrap = bootstrap, num.bootstraps = num.bootstraps, diff --git a/man/getArrayABsignal.Rd b/man/getArrayABsignal.Rd index 330db367..2b6e76c9 100644 --- a/man/getArrayABsignal.Rd +++ b/man/getArrayABsignal.Rd @@ -7,19 +7,19 @@ getArrayABsignal( obj, res = 1000000, - parallel = TRUE, chr = NULL, targets = NULL, preprocess = TRUE, + parallel = TRUE, cores = 2, bootstrap = TRUE, num.bootstraps = 1000, + boot.parallel = TRUE, + boot.cores = 2, genome = c("hg19", "hg38", "mm9", "mm10"), - other = NULL, - array.type = c("hm450", "EPIC"), group = FALSE, - boot.parallel = TRUE, - boot.cores = 2 + other = NULL, + array.type = c("hm450", "EPIC") ) } \arguments{ @@ -27,31 +27,31 @@ getArrayABsignal( \item{res}{Compartment resolution in bp} -\item{parallel}{Whether to run samples in parallel} - \item{chr}{What chromosome to work on (leave as NULL to run on all chromosomes)} \item{targets}{Samples/cells to shrink towards} \item{preprocess}{Whether to preprocess the arrays prior to compartment inference} +\item{parallel}{Whether to run samples in parallel} + \item{cores}{How many cores to use when running samples in parallel} \item{bootstrap}{Whether we should perform bootstrapping of inferred compartments} \item{num.bootstraps}{How many bootstraps to run} -\item{genome}{What genome to work on ("hg19", "hg38", "mm9", "mm10")} +\item{boot.parallel}{Whether to run the bootstrapping in parallel} -\item{other}{Another arbitrary genome to compute compartments on} +\item{boot.cores}{How many cores to use for the bootstrapping} -\item{array.type}{What type of array is this ("hm450", "EPIC")} +\item{genome}{What genome to work on ("hg19", "hg38", "mm9", "mm10")} \item{group}{Whether to treat this as a group set of samples} -\item{boot.parallel}{Whether to run the bootstrapping in parallel} +\item{other}{Another arbitrary genome to compute compartments on} -\item{boot.cores}{How many cores to use for the bootstrapping} +\item{array.type}{What type of array is this ("hm450", "EPIC")} } \value{ A RaggedExperiment of inferred compartments @@ -65,8 +65,8 @@ if (requireNamespace("minfi", quietly = TRUE)) { data("array_data_chr14", package = "compartmap") array_compartments <- getArrayABsignal( array.data.chr14, - parallel=FALSE, chr="chr14", + parallel=FALSE, bootstrap=FALSE, genome="hg19", array.type="hm450" diff --git a/man/scCompartments.Rd b/man/scCompartments.Rd index acaff07e..15275e8e 100644 --- a/man/scCompartments.Rd +++ b/man/scCompartments.Rd @@ -7,12 +7,14 @@ scCompartments( obj, res = 1000000, - parallel = FALSE, chr = NULL, targets = NULL, + parallel = FALSE, cores = 2, bootstrap = TRUE, num.bootstraps = 100, + boot.parallel = FALSE, + boot.cores = 2, genome = c("hg19", "hg38", "mm9", "mm10"), group = FALSE, assay = c("atac", "rna") @@ -23,18 +25,22 @@ scCompartments( \item{res}{Compartment resolution in bp} -\item{parallel}{Whether to run samples in parallel} - \item{chr}{What chromosome to work on (leave as NULL to run on all chromosomes)} \item{targets}{Samples/cells to shrink towards} +\item{parallel}{Whether to run samples in parallel} + \item{cores}{How many cores to use when running samples in parallel} \item{bootstrap}{Whether we should perform bootstrapping of inferred compartments} \item{num.bootstraps}{How many bootstraps to run} +\item{boot.parallel}{Whether to run the bootstrapping in parallel} + +\item{boot.cores}{How many cores to use for the bootstrapping} + \item{genome}{What genome to work on ("hg19", "hg38", "mm9", "mm10")} \item{group}{Whether to treat this as a group set of samples} @@ -51,8 +57,8 @@ A RaggedExperiment of inferred compartments data("k562_scrna_chr14", package = "compartmap") sc_compartments <- scCompartments( k562_scrna_chr14, - parallel = FALSE, chr = "chr14", + parallel = FALSE, bootstrap = FALSE, genome = "hg19" ) From 815ae54c4fdedd17cfe35314fc4b1fbf5a86317a Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 23 Oct 2025 12:49:45 -0400 Subject: [PATCH 05/29] !fix(arrayCompartments): rename getArrayABsignal to arrayCompartments --- NAMESPACE | 2 +- R/{getArrayABsignal.R => arrayCompartments.R} | 6 +++--- R/plotAB.R | 2 +- man/{getArrayABsignal.Rd => arrayCompartments.Rd} | 12 ++++++------ man/plotAB.Rd | 2 +- pkgdown/_pkgdown.yml | 2 +- tests/testthat/test-getArrayABsignal.R | 8 ++++---- 7 files changed, 17 insertions(+), 17 deletions(-) rename R/{getArrayABsignal.R => arrayCompartments.R} (94%) rename man/{getArrayABsignal.Rd => arrayCompartments.Rd} (87%) diff --git a/NAMESPACE b/NAMESPACE index 87c6c4d7..1a99c305 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand export(agrestiCoullCI) +export(arrayCompartments) export(bootstrapCompartments) export(condenseRE) export(condenseSE) @@ -13,7 +14,6 @@ export(fisherZ) export(fixCompartments) export(flogit) export(getABSignal) -export(getArrayABsignal) export(getAssayNames) export(getBinMatrix) export(getChrs) diff --git a/R/getArrayABsignal.R b/R/arrayCompartments.R similarity index 94% rename from R/getArrayABsignal.R rename to R/arrayCompartments.R index 5353795f..4bff477c 100644 --- a/R/getArrayABsignal.R +++ b/R/arrayCompartments.R @@ -1,7 +1,7 @@ #' @title Estimate A/B compartments from methylation array data #' #' @description -#' \code{getArrayABsignal} returns estimated A/B compartments from methylation array data. +#' \code{arrayCompartments} returns estimated A/B compartments from methylation array data. #' #' @param obj Input SummarizedExperiment object #' @param res Compartment resolution in bp @@ -31,7 +31,7 @@ #' #' if (requireNamespace("minfi", quietly = TRUE)) { #' data("array_data_chr14", package = "compartmap") -#' array_compartments <- getArrayABsignal( +#' array_compartments <- arrayCompartments( #' array.data.chr14, #' chr="chr14", #' parallel=FALSE, @@ -40,7 +40,7 @@ #' array.type="hm450" #' ) #' } -getArrayABsignal <- function( +arrayCompartments <- function( obj, res = 1e6, chr = NULL, diff --git a/R/plotAB.R b/R/plotAB.R index b8d1566d..8b4db5b6 100644 --- a/R/plotAB.R +++ b/R/plotAB.R @@ -2,7 +2,7 @@ #' #' Plot A/B compartments bins #' -#' @param grAB The GRanges object returned from scCompartments and getArrayABsignal +#' @param grAB The GRanges object returned from scCompartments and arrayCompartments #' @param chr Chromosome to subset to for plotting #' @param main Title for the plot #' @param ylim Y-axis limits (default is -1 to 1) diff --git a/man/getArrayABsignal.Rd b/man/arrayCompartments.Rd similarity index 87% rename from man/getArrayABsignal.Rd rename to man/arrayCompartments.Rd index 2b6e76c9..29fcb711 100644 --- a/man/getArrayABsignal.Rd +++ b/man/arrayCompartments.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/getArrayABsignal.R -\name{getArrayABsignal} -\alias{getArrayABsignal} +% Please edit documentation in R/arrayCompartments.R +\name{arrayCompartments} +\alias{arrayCompartments} \title{Estimate A/B compartments from methylation array data} \usage{ -getArrayABsignal( +arrayCompartments( obj, res = 1000000, chr = NULL, @@ -57,13 +57,13 @@ getArrayABsignal( A RaggedExperiment of inferred compartments } \description{ -\code{getArrayABsignal} returns estimated A/B compartments from methylation array data. +\code{arrayCompartments} returns estimated A/B compartments from methylation array data. } \examples{ if (requireNamespace("minfi", quietly = TRUE)) { data("array_data_chr14", package = "compartmap") - array_compartments <- getArrayABsignal( + array_compartments <- arrayCompartments( array.data.chr14, chr="chr14", parallel=FALSE, diff --git a/man/plotAB.Rd b/man/plotAB.Rd index ad420e8a..10dae644 100644 --- a/man/plotAB.Rd +++ b/man/plotAB.Rd @@ -20,7 +20,7 @@ plotAB( ) } \arguments{ -\item{grAB}{The GRanges object returned from scCompartments and getArrayABsignal} +\item{grAB}{The GRanges object returned from scCompartments and arrayCompartments} \item{chr}{Chromosome to subset to for plotting} diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index 43db4c94..c1e753db 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -23,7 +23,7 @@ reference: - title: Compartment mapping contents: - scCompartments - - getArrayABsignal + - arrayCompartments - title: Compartment helper contents: - filterCompartments diff --git a/tests/testthat/test-getArrayABsignal.R b/tests/testthat/test-getArrayABsignal.R index f4ded02d..07696f14 100644 --- a/tests/testthat/test-getArrayABsignal.R +++ b/tests/testthat/test-getArrayABsignal.R @@ -1,10 +1,10 @@ se.noranges <- SummarizedExperiment(rowRanges = GRanges()) se.bad <- SummarizedExperiment(rowRanges = GRanges("chr1:1-10"), assays = SimpleList(a = matrix())) -test_that("getArrayABsignal bad inputs", { - expect_error(getArrayABsignal(iris), err.verifySE) - expect_error(getArrayABsignal(se.noranges), err.verifyCoords) - expect_error(getArrayABsignal(se.bad), err.verifyAssayNames.beta) +test_that("arrayCompartments bad inputs", { + expect_error(arrayCompartments(iris), err.verifySE) + expect_error(arrayCompartments(se.noranges), err.verifyCoords) + expect_error(arrayCompartments(se.bad), err.verifyAssayNames.beta) }) test_that(".preprocessArrays", { From 9382e7b2d0449bbbab9e9145b98b70d69bb370da Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 23 Oct 2025 13:06:11 -0400 Subject: [PATCH 06/29] docs(scCompartments): specify that inputs are RNA or ATAC --- R/scCompartments.R | 2 +- man/scCompartments.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/scCompartments.R b/R/scCompartments.R index 606db7f6..251bb099 100644 --- a/R/scCompartments.R +++ b/R/scCompartments.R @@ -1,4 +1,4 @@ -#' @title Estimate A/B compartments from single-cell sequencing data +#' @title Estimate A/B compartments from single-cell RNA or ATAC sequencing data #' #' @description #' \code{scCompartments} returns estimated A/B compartments from sc-seq data. diff --git a/man/scCompartments.Rd b/man/scCompartments.Rd index 15275e8e..92193fcb 100644 --- a/man/scCompartments.Rd +++ b/man/scCompartments.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/scCompartments.R \name{scCompartments} \alias{scCompartments} -\title{Estimate A/B compartments from single-cell sequencing data} +\title{Estimate A/B compartments from single-cell RNA or ATAC sequencing data} \usage{ scCompartments( obj, From f839a8229e08e751a3be5808d1981186d6c8baef Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 6 Nov 2025 13:52:41 -0500 Subject: [PATCH 07/29] refactor: reorganize the arg order in top level functions to consolidate parallelizations args before using biocParallel --- R/arrayCompartments.R | 26 +++++++++++++------------- R/getCompartments.R | 12 ++++++------ R/scCompartments.R | 22 +++++++++++----------- flake.nix | 1 + man/arrayCompartments.Rd | 36 ++++++++++++++++++------------------ man/scCompartments.Rd | 30 +++++++++++++++--------------- 6 files changed, 64 insertions(+), 63 deletions(-) diff --git a/R/arrayCompartments.R b/R/arrayCompartments.R index 4bff477c..3658909b 100644 --- a/R/arrayCompartments.R +++ b/R/arrayCompartments.R @@ -6,18 +6,18 @@ #' @param obj Input SummarizedExperiment object #' @param res Compartment resolution in bp #' @param chr What chromosome to work on (leave as NULL to run on all chromosomes) +#' @param group Whether to treat this as a group set of samples #' @param targets Samples/cells to shrink towards +#' @param bootstrap Whether we should perform bootstrapping of inferred compartments +#' @param num.bootstraps How many bootstraps to run #' @param preprocess Whether to preprocess the arrays prior to compartment inference +#' @param array.type What type of array is this ("hm450", "EPIC") +#' @param genome What genome to work on ("hg19", "hg38", "mm9", "mm10") +#' @param other Another arbitrary genome to compute compartments on #' @param parallel Whether to run samples in parallel #' @param cores How many cores to use when running samples in parallel -#' @param bootstrap Whether we should perform bootstrapping of inferred compartments -#' @param num.bootstraps How many bootstraps to run #' @param boot.parallel Whether to run the bootstrapping in parallel #' @param boot.cores How many cores to use for the bootstrapping -#' @param genome What genome to work on ("hg19", "hg38", "mm9", "mm10") -#' @param other Another arbitrary genome to compute compartments on -#' @param group Whether to treat this as a group set of samples -#' @param array.type What type of array is this ("hm450", "EPIC") #' #' @return A RaggedExperiment of inferred compartments #' @import SummarizedExperiment @@ -44,18 +44,18 @@ arrayCompartments <- function( obj, res = 1e6, chr = NULL, + group = FALSE, targets = NULL, - preprocess = TRUE, - parallel = TRUE, - cores = 2, bootstrap = TRUE, num.bootstraps = 1000, - boot.parallel = TRUE, - boot.cores = 2, + preprocess = TRUE, + array.type = c("hm450", "EPIC"), genome = c("hg19", "hg38", "mm9", "mm10"), - group = FALSE, other = NULL, - array.type = c("hm450", "EPIC") + parallel = TRUE, + cores = 2, + boot.parallel = TRUE, + boot.cores = 2 ) { verifySE(obj) verifyCoords(obj) diff --git a/R/getCompartments.R b/R/getCompartments.R index ad8178d6..6c98e55d 100644 --- a/R/getCompartments.R +++ b/R/getCompartments.R @@ -1,17 +1,17 @@ getCompartments <- function( obj, - assay, res, - parallel, chr, + group, targets, - cores, bootstrap, num.bootstraps, - boot.parallel, - boot.cores, genome, - group + assay, + parallel, + cores, + boot.parallel, + boot.cores ) { if (is.null(chr)) { message("Assuming we want to process all chromosomes.") diff --git a/R/scCompartments.R b/R/scCompartments.R index 251bb099..18a142cc 100644 --- a/R/scCompartments.R +++ b/R/scCompartments.R @@ -6,16 +6,16 @@ #' @param obj Input SummarizedExperiment object #' @param res Compartment resolution in bp #' @param chr What chromosome to work on (leave as NULL to run on all chromosomes) +#' @param group Whether to treat this as a group set of samples #' @param targets Samples/cells to shrink towards -#' @param parallel Whether to run samples in parallel -#' @param cores How many cores to use when running samples in parallel #' @param bootstrap Whether we should perform bootstrapping of inferred compartments #' @param num.bootstraps How many bootstraps to run -#' @param boot.parallel Whether to run the bootstrapping in parallel -#' @param boot.cores How many cores to use for the bootstrapping #' @param genome What genome to work on ("hg19", "hg38", "mm9", "mm10") -#' @param group Whether to treat this as a group set of samples #' @param assay What type of single-cell assay is the input data ("atac" or "rna") +#' @param parallel Whether to run samples in parallel +#' @param cores How many cores to use when running samples in parallel +#' @param boot.parallel Whether to run the bootstrapping in parallel +#' @param boot.cores How many cores to use for the bootstrapping #' #' @return A RaggedExperiment of inferred compartments #' @import SummarizedExperiment @@ -35,16 +35,16 @@ scCompartments <- function( obj, res = 1e6, chr = NULL, + group = FALSE, targets = NULL, - parallel = FALSE, - cores = 2, bootstrap = TRUE, num.bootstraps = 100, - boot.parallel = FALSE, - boot.cores = 2, genome = c("hg19", "hg38", "mm9", "mm10"), - group = FALSE, - assay = c("atac", "rna") + assay = c("atac", "rna"), + parallel = FALSE, + cores = 2, + boot.parallel = FALSE, + boot.cores = 2 ) { verifySE(obj) verifyCoords(obj) diff --git a/flake.nix b/flake.nix index 9134fbb0..5099c42d 100644 --- a/flake.nix +++ b/flake.nix @@ -17,6 +17,7 @@ ]; Imports = with pkgs.rPackages; [ + BiocParallel DelayedArray DelayedMatrixStats GenomicRanges diff --git a/man/arrayCompartments.Rd b/man/arrayCompartments.Rd index 29fcb711..5239ed9f 100644 --- a/man/arrayCompartments.Rd +++ b/man/arrayCompartments.Rd @@ -8,18 +8,18 @@ arrayCompartments( obj, res = 1000000, chr = NULL, + group = FALSE, targets = NULL, - preprocess = TRUE, - parallel = TRUE, - cores = 2, bootstrap = TRUE, num.bootstraps = 1000, - boot.parallel = TRUE, - boot.cores = 2, + preprocess = TRUE, + array.type = c("hm450", "EPIC"), genome = c("hg19", "hg38", "mm9", "mm10"), - group = FALSE, other = NULL, - array.type = c("hm450", "EPIC") + parallel = TRUE, + cores = 2, + boot.parallel = TRUE, + boot.cores = 2 ) } \arguments{ @@ -29,29 +29,29 @@ arrayCompartments( \item{chr}{What chromosome to work on (leave as NULL to run on all chromosomes)} -\item{targets}{Samples/cells to shrink towards} - -\item{preprocess}{Whether to preprocess the arrays prior to compartment inference} - -\item{parallel}{Whether to run samples in parallel} +\item{group}{Whether to treat this as a group set of samples} -\item{cores}{How many cores to use when running samples in parallel} +\item{targets}{Samples/cells to shrink towards} \item{bootstrap}{Whether we should perform bootstrapping of inferred compartments} \item{num.bootstraps}{How many bootstraps to run} -\item{boot.parallel}{Whether to run the bootstrapping in parallel} +\item{preprocess}{Whether to preprocess the arrays prior to compartment inference} -\item{boot.cores}{How many cores to use for the bootstrapping} +\item{array.type}{What type of array is this ("hm450", "EPIC")} \item{genome}{What genome to work on ("hg19", "hg38", "mm9", "mm10")} -\item{group}{Whether to treat this as a group set of samples} - \item{other}{Another arbitrary genome to compute compartments on} -\item{array.type}{What type of array is this ("hm450", "EPIC")} +\item{parallel}{Whether to run samples in parallel} + +\item{cores}{How many cores to use when running samples in parallel} + +\item{boot.parallel}{Whether to run the bootstrapping in parallel} + +\item{boot.cores}{How many cores to use for the bootstrapping} } \value{ A RaggedExperiment of inferred compartments diff --git a/man/scCompartments.Rd b/man/scCompartments.Rd index 92193fcb..a2e03278 100644 --- a/man/scCompartments.Rd +++ b/man/scCompartments.Rd @@ -8,16 +8,16 @@ scCompartments( obj, res = 1000000, chr = NULL, + group = FALSE, targets = NULL, - parallel = FALSE, - cores = 2, bootstrap = TRUE, num.bootstraps = 100, - boot.parallel = FALSE, - boot.cores = 2, genome = c("hg19", "hg38", "mm9", "mm10"), - group = FALSE, - assay = c("atac", "rna") + assay = c("atac", "rna"), + parallel = FALSE, + cores = 2, + boot.parallel = FALSE, + boot.cores = 2 ) } \arguments{ @@ -27,25 +27,25 @@ scCompartments( \item{chr}{What chromosome to work on (leave as NULL to run on all chromosomes)} -\item{targets}{Samples/cells to shrink towards} - -\item{parallel}{Whether to run samples in parallel} +\item{group}{Whether to treat this as a group set of samples} -\item{cores}{How many cores to use when running samples in parallel} +\item{targets}{Samples/cells to shrink towards} \item{bootstrap}{Whether we should perform bootstrapping of inferred compartments} \item{num.bootstraps}{How many bootstraps to run} -\item{boot.parallel}{Whether to run the bootstrapping in parallel} +\item{genome}{What genome to work on ("hg19", "hg38", "mm9", "mm10")} -\item{boot.cores}{How many cores to use for the bootstrapping} +\item{assay}{What type of single-cell assay is the input data ("atac" or "rna")} -\item{genome}{What genome to work on ("hg19", "hg38", "mm9", "mm10")} +\item{parallel}{Whether to run samples in parallel} -\item{group}{Whether to treat this as a group set of samples} +\item{cores}{How many cores to use when running samples in parallel} -\item{assay}{What type of single-cell assay is the input data ("atac" or "rna")} +\item{boot.parallel}{Whether to run the bootstrapping in parallel} + +\item{boot.cores}{How many cores to use for the bootstrapping} } \value{ A RaggedExperiment of inferred compartments From a6055078ae7926778f2d77bdc78ef613ed10048f Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 13 Nov 2025 10:59:00 -0500 Subject: [PATCH 08/29] feat(threads): add BiocParallel worker checking functions --- DESCRIPTION | 1 + NAMESPACE | 1 + R/parallel.R | 80 +++++++++++++++++++++++++++++++++++++++ man/bpnworkers.list.Rd | 18 +++++++++ man/check_worker_count.Rd | 16 ++++++++ man/get_bpnworkers.Rd | 14 +++++++ man/get_nested_params.Rd | 18 +++++++++ man/verify_bp.Rd | 19 ++++++++++ man/verify_workers.Rd | 19 ++++++++++ 9 files changed, 186 insertions(+) create mode 100644 R/parallel.R create mode 100644 man/bpnworkers.list.Rd create mode 100644 man/check_worker_count.Rd create mode 100644 man/get_bpnworkers.Rd create mode 100644 man/get_nested_params.Rd create mode 100644 man/verify_bp.Rd create mode 100644 man/verify_workers.Rd diff --git a/DESCRIPTION b/DESCRIPTION index ec535185..19e01d04 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,6 +26,7 @@ Depends: HDF5Array Imports: BiocSingular, + BiocParallel, rlang, S4Vectors, IRanges, diff --git a/NAMESPACE b/NAMESPACE index 1a99c305..40bc8400 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -49,6 +49,7 @@ import(HDF5Array) import(Matrix) import(RaggedExperiment) import(SummarizedExperiment) +importFrom(BiocParallel,bpnworkers) importFrom(BiocSingular,IrlbaParam) importFrom(BiocSingular,runSVD) importFrom(GenomeInfoDb,"seqlevelsStyle<-") diff --git a/R/parallel.R b/R/parallel.R new file mode 100644 index 00000000..908afa46 --- /dev/null +++ b/R/parallel.R @@ -0,0 +1,80 @@ +#' Check that the number of requested workers is valid +#' @keywords internal +check_worker_count <- function(bpparam, boot.parallel, avail_workers = parallelly::availableCores()) { + workers <- get_bpnworkers(bpparam) + total <- sum(Reduce(`*`, workers), workers[1]) + if (verify_workers(total)) { + return(TRUE) + } + + msg <- sprintf( + "Using %1$d outer and %2$d inner workers would require %3$d workers (%1$d + (%1$d x %2$d)) but your system has only %4$d cores. + See parallelly::availableCores() for more information on available resources", + workers[1], + workers[2], + total, + avail_workers + ) + stop(msg) +} + +#' Get the total number of BiocParallelParam workers that will get used +#' From a BiocParallelParam or list of 2 BiocParallelParam objects +#' @keywords internal +get_bpnworkers <- function(bp) { + workers <- bpnworkers.list(bp) +} + +#' Return the number of workers in a list of BiocParallelParam objects +#' @param List of BiocParallelParam objects +#' @importFrom BiocParallel bpnworkers +#' @return A vector of the `bpnworkers` count in each list element +#' @keywords internal +bpnworkers.list <- function(bplist) { + unlist(Map(bpnworkers, bplist)) +} + +#' Verify that the input BiocParallelParam is valid +#' @param A BiocParallelParam or list of 2 BiocParallelParam objects +#' @importFrom BiocParallel bpnworkers +#' @return TRUE if the total `bpnworkers` in the input does not exceed +#' available resources as defined by `parallelly::availableCores()` +#' @keywords internal +verify_bp <- function(bp) { + verify_workers(get_bpnworkers(bp)) +} + +#' Verify that requested thread count is not higher than available +#' @param thread_count The number of workers to check availability +#' @return TRUE if the requested `thread_count` does not exceed available +#' resources as defined by `parallelly::availableCores()` +#' @keywords internal +verify_workers <- function(n_workers) { + avail_workers <- parallelly::availableCores() + n_workers <= avail_workers +} + +#' Set outer and inner params for nester parallelization +#' The outer param is across the input samples/columns and the second is for +#' bootstrapping. If `boot.parallel` is FALSE, the inner param is set to +#' `SerialParam`. +#' @keywords internal +get_nested_params <- function(BPPARAM, boot.parallel) { + stopifnot("Only two BiocParallelParam objects can be used" = length(BPPARAM) <= 2) + single_param <- length(BPPARAM) == 1 + BPPARAM <- if (single_param & is.list(BPPARAM)) BPPARAM[[1]] else BPPARAM + + if (boot.parallel) { + if (single_param) { + return(list(outer = BPPARAM, inner = BPPARAM)) + } else { + return(list(outer = BPPARAM[[1]], inner = BPPARAM[[2]])) + } + } + + if (single_param) { + list(outer = BPPARAM, inner = SerialParam()) + } else { + list(outer = BPPARAM[[1]], inner = SerialParam()) + } +} diff --git a/man/bpnworkers.list.Rd b/man/bpnworkers.list.Rd new file mode 100644 index 00000000..762f6eb4 --- /dev/null +++ b/man/bpnworkers.list.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parallel.R +\name{bpnworkers.list} +\alias{bpnworkers.list} +\title{Return the number of workers in a list of BiocParallelParam objects} +\usage{ +bpnworkers.list(bplist) +} +\arguments{ +\item{List}{of BiocParallelParam objects} +} +\value{ +A vector of the \code{bpnworkers} count in each list element +} +\description{ +Return the number of workers in a list of BiocParallelParam objects +} +\keyword{internal} diff --git a/man/check_worker_count.Rd b/man/check_worker_count.Rd new file mode 100644 index 00000000..62d20728 --- /dev/null +++ b/man/check_worker_count.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parallel.R +\name{check_worker_count} +\alias{check_worker_count} +\title{Check that the number of requested workers is valid} +\usage{ +check_worker_count( + bpparam, + boot.parallel, + avail_workers = parallelly::availableCores() +) +} +\description{ +Check that the number of requested workers is valid +} +\keyword{internal} diff --git a/man/get_bpnworkers.Rd b/man/get_bpnworkers.Rd new file mode 100644 index 00000000..8bad4025 --- /dev/null +++ b/man/get_bpnworkers.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parallel.R +\name{get_bpnworkers} +\alias{get_bpnworkers} +\title{Get the total number of BiocParallelParam workers that will get used +From a BiocParallelParam or list of 2 BiocParallelParam objects} +\usage{ +get_bpnworkers(bp) +} +\description{ +Get the total number of BiocParallelParam workers that will get used +From a BiocParallelParam or list of 2 BiocParallelParam objects +} +\keyword{internal} diff --git a/man/get_nested_params.Rd b/man/get_nested_params.Rd new file mode 100644 index 00000000..2e7ed1bc --- /dev/null +++ b/man/get_nested_params.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parallel.R +\name{get_nested_params} +\alias{get_nested_params} +\title{Set outer and inner params for nester parallelization +The outer param is across the input samples/columns and the second is for +bootstrapping. If \code{boot.parallel} is FALSE, the inner param is set to +\code{SerialParam}.} +\usage{ +get_nested_params(BPPARAM, boot.parallel) +} +\description{ +Set outer and inner params for nester parallelization +The outer param is across the input samples/columns and the second is for +bootstrapping. If \code{boot.parallel} is FALSE, the inner param is set to +\code{SerialParam}. +} +\keyword{internal} diff --git a/man/verify_bp.Rd b/man/verify_bp.Rd new file mode 100644 index 00000000..0869d5d3 --- /dev/null +++ b/man/verify_bp.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parallel.R +\name{verify_bp} +\alias{verify_bp} +\title{Verify that the input BiocParallelParam is valid} +\usage{ +verify_bp(bp) +} +\arguments{ +\item{A}{BiocParallelParam or list of 2 BiocParallelParam objects} +} +\value{ +TRUE if the total \code{bpnworkers} in the input does not exceed +available resources as defined by \code{parallelly::availableCores()} +} +\description{ +Verify that the input BiocParallelParam is valid +} +\keyword{internal} diff --git a/man/verify_workers.Rd b/man/verify_workers.Rd new file mode 100644 index 00000000..d3117a1d --- /dev/null +++ b/man/verify_workers.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parallel.R +\name{verify_workers} +\alias{verify_workers} +\title{Verify that requested thread count is not higher than available} +\usage{ +verify_workers(n_workers) +} +\arguments{ +\item{thread_count}{The number of workers to check availability} +} +\value{ +TRUE if the requested \code{thread_count} does not exceed available +resources as defined by \code{parallelly::availableCores()} +} +\description{ +Verify that requested thread count is not higher than available +} +\keyword{internal} From 5366ba7d3ad651d108a92a83eecde7d892c572bc Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 13 Nov 2025 11:02:09 -0500 Subject: [PATCH 09/29] wip: add args for parallelization for scCompartments/arrayCompartments --- NAMESPACE | 1 + R/arrayCompartments.R | 21 +++++++++------------ R/scCompartments.R | 27 ++++++++++++--------------- man/arrayCompartments.Rd | 12 +++--------- man/scCompartments.Rd | 12 +++--------- 5 files changed, 28 insertions(+), 45 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 40bc8400..d373e809 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -50,6 +50,7 @@ import(Matrix) import(RaggedExperiment) import(SummarizedExperiment) importFrom(BiocParallel,bpnworkers) +importFrom(BiocParallel,bpparam) importFrom(BiocSingular,IrlbaParam) importFrom(BiocSingular,runSVD) importFrom(GenomeInfoDb,"seqlevelsStyle<-") diff --git a/R/arrayCompartments.R b/R/arrayCompartments.R index 3658909b..43978d79 100644 --- a/R/arrayCompartments.R +++ b/R/arrayCompartments.R @@ -14,10 +14,9 @@ #' @param array.type What type of array is this ("hm450", "EPIC") #' @param genome What genome to work on ("hg19", "hg38", "mm9", "mm10") #' @param other Another arbitrary genome to compute compartments on -#' @param parallel Whether to run samples in parallel -#' @param cores How many cores to use when running samples in parallel -#' @param boot.parallel Whether to run the bootstrapping in parallel -#' @param boot.cores How many cores to use for the bootstrapping +#' @param boot.parallel Whether to run the bootstrapping in parallel. See details. +#' @param BPPARAM BiocParallelParam object to use for parallelization. See details. +#' #' #' @return A RaggedExperiment of inferred compartments #' @import SummarizedExperiment @@ -52,14 +51,14 @@ arrayCompartments <- function( array.type = c("hm450", "EPIC"), genome = c("hg19", "hg38", "mm9", "mm10"), other = NULL, - parallel = TRUE, - cores = 2, boot.parallel = TRUE, - boot.cores = 2 + BPPARAM = bpparam() ) { verifySE(obj) verifyCoords(obj) verifyAssayNames(obj, assay = "array") + bpparams <- get_nested_params(BPPARAM) + check_worker_count(bpparams) # preprocess the arrays if (preprocess) { @@ -83,15 +82,13 @@ arrayCompartments <- function( obj = obj, assay = "array", res = res, - parallel = parallel, chr = chr, targets = targets, - cores = cores, bootstrap = bootstrap, num.bootstraps = num.bootstraps, - boot.parallel = boot.parallel, - boot.cores = boot.cores, genome = genome, - group = group + group = group, + boot.parallel = boot.parallel, + bpparams = bpparams ) } diff --git a/R/scCompartments.R b/R/scCompartments.R index 18a142cc..42a0ba4e 100644 --- a/R/scCompartments.R +++ b/R/scCompartments.R @@ -12,22 +12,20 @@ #' @param num.bootstraps How many bootstraps to run #' @param genome What genome to work on ("hg19", "hg38", "mm9", "mm10") #' @param assay What type of single-cell assay is the input data ("atac" or "rna") -#' @param parallel Whether to run samples in parallel -#' @param cores How many cores to use when running samples in parallel -#' @param boot.parallel Whether to run the bootstrapping in parallel -#' @param boot.cores How many cores to use for the bootstrapping +#' @param boot.parallel Whether to run the bootstrapping in parallel. See details. +#' @param BPPARAM BiocParallelParam object to use for parallelization. See details. +#' #' #' @return A RaggedExperiment of inferred compartments #' @import SummarizedExperiment -#' @importFrom parallel mclapply #' @import RaggedExperiment +#' @importFrom BiocParallel bpparam #' @export #' @examples #' data("k562_scrna_chr14", package = "compartmap") #' sc_compartments <- scCompartments( #' k562_scrna_chr14, #' chr = "chr14", -#' parallel = FALSE, #' bootstrap = FALSE, #' genome = "hg19" #' ) @@ -41,31 +39,30 @@ scCompartments <- function( num.bootstraps = 100, genome = c("hg19", "hg38", "mm9", "mm10"), assay = c("atac", "rna"), - parallel = FALSE, - cores = 2, boot.parallel = FALSE, - boot.cores = 2 + BPPARAM = bpparam() ) { verifySE(obj) verifyCoords(obj) + bpparams <- get_nested_params(BPPARAM, boot.parallel) + check_worker_count(bpparams) + # which assay are we working on if (!all(assay %in% c("atac", "rna"))) stop("Supported assays are 'atac', and 'rna'.") assay <- tolower(match.arg(assay)) verifyAssayNames(obj, assay = assay) getCompartments( obj = obj, - assay = assay, res = res, chr = chr, + group = group, targets = targets, - parallel = parallel, - cores = cores, bootstrap = bootstrap, num.bootstraps = num.bootstraps, - boot.parallel = boot.parallel, - boot.cores = boot.cores, genome = genome, - group = group + assay = assay, + boot.parallel = boot.parallel, + bpparams = bpparams ) } diff --git a/man/arrayCompartments.Rd b/man/arrayCompartments.Rd index 5239ed9f..a9bbc0fe 100644 --- a/man/arrayCompartments.Rd +++ b/man/arrayCompartments.Rd @@ -16,10 +16,8 @@ arrayCompartments( array.type = c("hm450", "EPIC"), genome = c("hg19", "hg38", "mm9", "mm10"), other = NULL, - parallel = TRUE, - cores = 2, boot.parallel = TRUE, - boot.cores = 2 + BPPARAM = bpparam() ) } \arguments{ @@ -45,13 +43,9 @@ arrayCompartments( \item{other}{Another arbitrary genome to compute compartments on} -\item{parallel}{Whether to run samples in parallel} +\item{boot.parallel}{Whether to run the bootstrapping in parallel. See details.} -\item{cores}{How many cores to use when running samples in parallel} - -\item{boot.parallel}{Whether to run the bootstrapping in parallel} - -\item{boot.cores}{How many cores to use for the bootstrapping} +\item{BPPARAM}{BiocParallelParam object to use for parallelization. See details.} } \value{ A RaggedExperiment of inferred compartments diff --git a/man/scCompartments.Rd b/man/scCompartments.Rd index a2e03278..97c8182e 100644 --- a/man/scCompartments.Rd +++ b/man/scCompartments.Rd @@ -14,10 +14,8 @@ scCompartments( num.bootstraps = 100, genome = c("hg19", "hg38", "mm9", "mm10"), assay = c("atac", "rna"), - parallel = FALSE, - cores = 2, boot.parallel = FALSE, - boot.cores = 2 + BPPARAM = bpparam() ) } \arguments{ @@ -39,13 +37,9 @@ scCompartments( \item{assay}{What type of single-cell assay is the input data ("atac" or "rna")} -\item{parallel}{Whether to run samples in parallel} - -\item{cores}{How many cores to use when running samples in parallel} - -\item{boot.parallel}{Whether to run the bootstrapping in parallel} +\item{boot.parallel}{Whether to run the bootstrapping in parallel. See details.} -\item{boot.cores}{How many cores to use for the bootstrapping} +\item{BPPARAM}{BiocParallelParam object to use for parallelization. See details.} } \value{ A RaggedExperiment of inferred compartments From 3c32725e1f3395ea86b2b65a7ff547a867925b4f Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 13 Nov 2025 11:10:20 -0500 Subject: [PATCH 10/29] wip: parallelize precomputeBootstrapMeans --- NAMESPACE | 1 + R/getCompartments.R | 11 +++++------ R/getGlobalMeans.R | 22 ++++++++++++---------- man/getCompartments.Rd | 23 +++++++++++++++++++++++ man/precomputeBootstrapMeans.Rd | 11 ++++------- 5 files changed, 45 insertions(+), 23 deletions(-) create mode 100644 man/getCompartments.Rd diff --git a/NAMESPACE b/NAMESPACE index d373e809..38f41365 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -49,6 +49,7 @@ import(HDF5Array) import(Matrix) import(RaggedExperiment) import(SummarizedExperiment) +importFrom(BiocParallel,bplapply) importFrom(BiocParallel,bpnworkers) importFrom(BiocParallel,bpparam) importFrom(BiocSingular,IrlbaParam) diff --git a/R/getCompartments.R b/R/getCompartments.R index 6c98e55d..f6fce380 100644 --- a/R/getCompartments.R +++ b/R/getCompartments.R @@ -1,3 +1,5 @@ +#' Run compartment inference +#' @importFrom BiocParallel bplapply getCompartments <- function( obj, res, @@ -8,10 +10,8 @@ getCompartments <- function( num.bootstraps, genome, assay, - parallel, - cores, boot.parallel, - boot.cores + bpparams ) { if (is.null(chr)) { message("Assuming we want to process all chromosomes.") @@ -28,11 +28,10 @@ getCompartments <- function( message("Pre-computing the bootstrap global means.") bmeans <- precomputeBootstrapMeans( obj = obj, + BPPARAM = bpparams[[1]], targets = targets, num.bootstraps = num.bootstraps, - assay = assay, - parallel = parallel, - num.cores = cores + assay = assay ) } diff --git a/R/getGlobalMeans.R b/R/getGlobalMeans.R index 09f1ae87..6a71cf92 100644 --- a/R/getGlobalMeans.R +++ b/R/getGlobalMeans.R @@ -49,11 +49,10 @@ getGlobalMeans <- function(obj, targets = NULL, assay = c("atac", "rna", "array" #' @name precomputeBootstrapMeans #' #' @param obj Input SummarizedExperiment object +#' @param BPPARAM BiocParallelParam for parallelizing computation #' @param targets Optional targets to shrink towards #' @param num.bootstraps The number of bootstraps to compute #' @param assay What type of assay the data are from -#' @param parallel Whether to run in parallel -#' @param num.cores How many cores to use for parallel processing #' #' @return A matrix of bootstrapped global means #' @@ -69,11 +68,10 @@ getGlobalMeans <- function(obj, targets = NULL, assay = c("atac", "rna", "array" #' ) precomputeBootstrapMeans <- function( obj, + BPPARAM, targets = NULL, num.bootstraps = 100, - assay = c("atac", "rna", "array"), - parallel = FALSE, - num.cores = 1 + assay = c("atac", "rna", "array") ) { # this function precomputes the bootstrapped global means # as a default we will make 100 bootstraps @@ -85,11 +83,15 @@ precomputeBootstrapMeans <- function( obj <- getShrinkageTargets(obj, targets) } assay.data <- .getAssay(obj, is.array) - bootMean <- mclapply(1:num.bootstraps, function(b) { - message("Working on bootstrap ", b) - resamp.mat <- .resampleMatrix(assay.data) - computeGlobalMean(resamp.mat) - }, mc.cores = ifelse(parallel, num.cores, 1)) + bootMean <- bplapply( + 1:num.bootstraps, + function(b) { + # message("Working on bootstrap ", b) + resamp.mat <- .resampleMatrix(assay.data) + computeGlobalMean(resamp.mat) + }, + BPPARAM = BPPARAM + ) bootResult <- do.call("cbind", bootMean) rownames(bootResult) <- as.character(granges(obj)) diff --git a/man/getCompartments.Rd b/man/getCompartments.Rd new file mode 100644 index 00000000..8dd97d7e --- /dev/null +++ b/man/getCompartments.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/getCompartments.R +\name{getCompartments} +\alias{getCompartments} +\title{Run compartment inference} +\usage{ +getCompartments( + obj, + res, + chr, + group, + targets, + bootstrap, + num.bootstraps, + genome, + assay, + boot.parallel, + bpparams +) +} +\description{ +Run compartment inference +} diff --git a/man/precomputeBootstrapMeans.Rd b/man/precomputeBootstrapMeans.Rd index 2f46f8ac..6f8536a4 100644 --- a/man/precomputeBootstrapMeans.Rd +++ b/man/precomputeBootstrapMeans.Rd @@ -6,25 +6,22 @@ \usage{ precomputeBootstrapMeans( obj, + BPPARAM, targets = NULL, num.bootstraps = 100, - assay = c("atac", "rna", "array"), - parallel = FALSE, - num.cores = 1 + assay = c("atac", "rna", "array") ) } \arguments{ \item{obj}{Input SummarizedExperiment object} +\item{BPPARAM}{BiocParallelParam for parallelizing computation} + \item{targets}{Optional targets to shrink towards} \item{num.bootstraps}{The number of bootstraps to compute} \item{assay}{What type of assay the data are from} - -\item{parallel}{Whether to run in parallel} - -\item{num.cores}{How many cores to use for parallel processing} } \value{ A matrix of bootstrapped global means From c19d02ff148da711abadd15137098504402e9ef3 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 13 Nov 2025 11:10:58 -0500 Subject: [PATCH 11/29] wip: parallelize inference and bootstrapping, --- R/bootstrapCompartments.R | 70 ++++++++++++++++--------------- R/getCompartments.R | 81 ++++++++++++++++++++---------------- man/bootstrapCompartments.Rd | 21 +++++----- 3 files changed, 91 insertions(+), 81 deletions(-) diff --git a/R/bootstrapCompartments.R b/R/bootstrapCompartments.R index 35e9e171..a129faad 100644 --- a/R/bootstrapCompartments.R +++ b/R/bootstrapCompartments.R @@ -1,14 +1,14 @@ -#' Non-parametric bootstrapping of compartments and summarization of bootstraps/compute confidence intervals +#' Non-parametric bootstrapping of compartments and summarization of +#' bootstraps/compute confidence intervals #' #' @name bootstrapCompartments #' #' @param obj List object of computed compartments for a sample with 'pc' and 'gr' as elements #' @param original.obj The original, full input SummarizedExperiment of all samples/cells +#' @param BPPARAM BiocParallelParam for parallelizing bootstrapping #' @param bootstrap.samples How many bootstraps to run #' @param chr Which chromosome to operate on #' @param assay What sort of assay are we working on -#' @param parallel Whether to run the bootstrapping in parallel -#' @param cores How many cores to use for parallel processing #' @param targets Targets to shrink towards #' @param res The compartment resolution #' @param genome What genome are we working on @@ -18,7 +18,6 @@ #' @param bootstrap.means Pre-computed bootstrap means matrix #' #' @return Compartment estimates with summarized bootstraps and confidence intervals -#' @importFrom parallel mclapply #' @import SummarizedExperiment #' #' @examples @@ -29,17 +28,16 @@ bootstrapCompartments <- function( obj, original.obj, + BPPARAM, bootstrap.samples = 1000, chr = "chr14", + group = FALSE, assay = c("rna", "atac", "array"), - parallel = TRUE, - cores = 2, targets = NULL, res = 1e6, genome = c("hg19", "hg38", "mm9", "mm10"), q = 0.95, svd = NULL, - group = FALSE, bootstrap.means = NULL ) { # function for nonparametric bootstrap of compartments and compute 95% CIs @@ -61,36 +59,40 @@ bootstrapCompartments <- function( } # if (ncol(original.obj) < 6) stop("We need more than 5 samples to bootstrap with for the results to be meaningful.") - if (parallel) { - message("Bootstrapping in parallel with ", cores, " cores.") - } else { - message("Not bootstrapping in parallel will take a long time...") - } + # if (parallel) { + # message("Bootstrapping in parallel with ", cores, " cores.") + # } else { + # message("Not bootstrapping in parallel will take a long time...") + # } # bootstrap and recompute compartments - resamp.compartments <- mclapply(1:ncol(bmeans), function(b) { - # get the shrunken bins with new global mean - boot.mean <- as.matrix(bmeans[, b]) - colnames(boot.mean) <- "globalMean" - s.bins <- shrinkBins( - obj, - original.obj, - prior.means = boot.mean, - chr = chr, - res = res, - assay = assay, - genome = genome - ) - cor.bins <- getCorMatrix(s.bins, squeeze = !group) + resamp.compartments <- bplapply( + 1:ncol(bmeans), + function(b) { + # get the shrunken bins with new global mean + boot.mean <- as.matrix(bmeans[, b]) + colnames(boot.mean) <- "globalMean" + s.bins <- shrinkBins( + obj, + original.obj, + prior.means = boot.mean, + chr = chr, + res = res, + assay = assay, + genome = genome + ) + cor.bins <- getCorMatrix(s.bins, squeeze = !group) - # Stupid check for perfect correlation with global mean - if (any(is.na(cor.bins$binmat.cor))) { - absig <- matrix(rep(NA, nrow(cor.bins$binmat.cor))) - } else { - absig <- getABSignal(cor.bins, assay = assay) - } - return(absig) - }, mc.cores = ifelse(parallel, cores, 1)) + # Stupid check for perfect correlation with global mean + if (any(is.na(cor.bins$binmat.cor))) { + absig <- matrix(rep(NA, nrow(cor.bins$binmat.cor))) + } else { + absig <- getABSignal(cor.bins, assay = assay) + } + return(absig) + }, + BPPARAM = BPPARAM + ) # summarize the bootstraps and compute confidence intervals resamp.compartments <- summarizeBootstraps(resamp.compartments, svd, q = q, assay = assay) diff --git a/R/getCompartments.R b/R/getCompartments.R index f6fce380..414941a6 100644 --- a/R/getCompartments.R +++ b/R/getCompartments.R @@ -35,54 +35,67 @@ getCompartments <- function( ) } + if (boot.parallel) { + innerBPPARAM <- bpparams[[2]] + } else { + innerBPPARAM <- BiocParallel::SerialParam() + } + if (group) { - compartments.list <- mclapply(chr, function(c) { - .getCompartments( - obj, - obj, - assay = assay, - res = res, - chr = c, - targets = targets, - genome = genome, - bootstrap = bootstrap, - num.bootstraps = num.bootstraps, - prior.means = prior.means, - parallel = boot.parallel, - cores = boot.cores, - group = group, - bootstrap.means = bmeans - ) - }, mc.cores = ifelse(parallel, cores, 1)) + message("Computing group level compartments") + compartments.list <- bplapply( + chr, + function(c) { + .getCompartments( + obj, + obj, + assay = assay, + BPPARAM = innerBPPARAM, + res = res, + chr = c, + group = group, + targets = targets, + genome = genome, + prior.means = prior.means, + bootstrap = bootstrap, + num.bootstraps = num.bootstraps, + bootstrap.means = bmeans + ) + }, + BPPARAM = bpparams[[1]] + ) compartments <- sort(unlist(as(compartments.list, "GRangesList"))) return(compartments) } - compartments <- mclapply(columns, function(s) { - obj.sub <- obj[, s] + message("Computing single-cell level compartments") + compartments <- bplapply( + columns, + function(s) { + obj.sub <- obj[, s] - message("Working on ", s) - compartments.list <- lapply(chr, function(c) { + compartments.list <- lapply(chr, function(c) { .getCompartments( obj.sub, obj, assay = assay, + BPPARAM = innerBPPARAM, res = res, chr = c, + group = group, targets = targets, genome = genome, - bootstrap = bootstrap, prior.means = prior.means, + bootstrap = bootstrap, num.bootstraps = num.bootstraps, - parallel = boot.parallel, - cores = boot.cores, - group = group, bootstrap.means = bmeans ) }) - sort(unlist(as(compartments.list, "GRangesList"))) - }, mc.cores = ifelse(parallel, cores, 1), mc.preschedule = F) + sort(unlist(as(compartments.list, "GRangesList"))) + }, + BPPARAM = bpparams[[1]] + ) compartments <- as(compartments, "CompressedGRangesList") RaggedExperiment(compartments, colData = colData(obj)) @@ -93,22 +106,19 @@ getCompartments <- function( obj, original.obj, assay, + BPPARAM, res = 1e6, chr = NULL, + group = FALSE, targets = NULL, genome = c("hg19", "hg38", "mm9", "mm10"), prior.means = NULL, bootstrap = TRUE, num.bootstraps = 1000, - parallel = FALSE, - cores = 2, - group = FALSE, bootstrap.means = NULL ) { genome <- match.arg(genome) - if (parallel) options(mc.cores = cores) - # update message("Computing compartments for ", chr) obj <- keepSeqlevels(obj, chr, pruning.mode = "coarse") @@ -163,17 +173,16 @@ getCompartments <- function( bootstrapCompartments( obj, original.obj, + BPPARAM = BPPARAM, bootstrap.samples = num.bootstraps, chr = chr, + group = group, assay = assay, - parallel = parallel, - cores = cores, targets = targets, res = res, genome = genome, q = 0.95, svd = obj.svd, - group = group, bootstrap.means = bmeans ) } diff --git a/man/bootstrapCompartments.Rd b/man/bootstrapCompartments.Rd index c9ab1c18..440b511f 100644 --- a/man/bootstrapCompartments.Rd +++ b/man/bootstrapCompartments.Rd @@ -2,22 +2,22 @@ % Please edit documentation in R/bootstrapCompartments.R \name{bootstrapCompartments} \alias{bootstrapCompartments} -\title{Non-parametric bootstrapping of compartments and summarization of bootstraps/compute confidence intervals} +\title{Non-parametric bootstrapping of compartments and summarization of +bootstraps/compute confidence intervals} \usage{ bootstrapCompartments( obj, original.obj, + BPPARAM, bootstrap.samples = 1000, chr = "chr14", + group = FALSE, assay = c("rna", "atac", "array"), - parallel = TRUE, - cores = 2, targets = NULL, res = 1000000, genome = c("hg19", "hg38", "mm9", "mm10"), q = 0.95, svd = NULL, - group = FALSE, bootstrap.means = NULL ) } @@ -26,15 +26,15 @@ bootstrapCompartments( \item{original.obj}{The original, full input SummarizedExperiment of all samples/cells} +\item{BPPARAM}{BiocParallelParam for parallelizing bootstrapping} + \item{bootstrap.samples}{How many bootstraps to run} \item{chr}{Which chromosome to operate on} -\item{assay}{What sort of assay are we working on} - -\item{parallel}{Whether to run the bootstrapping in parallel} +\item{group}{Whether this is for group-level inference} -\item{cores}{How many cores to use for parallel processing} +\item{assay}{What sort of assay are we working on} \item{targets}{Targets to shrink towards} @@ -46,15 +46,14 @@ bootstrapCompartments( \item{svd}{The original compartment calls as a GRanges object} -\item{group}{Whether this is for group-level inference} - \item{bootstrap.means}{Pre-computed bootstrap means matrix} } \value{ Compartment estimates with summarized bootstraps and confidence intervals } \description{ -Non-parametric bootstrapping of compartments and summarization of bootstraps/compute confidence intervals +Non-parametric bootstrapping of compartments and summarization of +bootstraps/compute confidence intervals } \examples{ From 145bc95940e8dfd4c8f35acf0603b9d7ddc51e16 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 13 Nov 2025 11:11:27 -0500 Subject: [PATCH 12/29] chore(flake): update with parallelly --- flake.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/flake.nix b/flake.nix index 5099c42d..7b09c100 100644 --- a/flake.nix +++ b/flake.nix @@ -24,6 +24,7 @@ ggplot2 impute Matrix + parallelly reshape2 RMTstat rtracklayer From d2bb05cc0be003feffbb39238e061127cda36d1f Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 13 Nov 2025 11:11:45 -0500 Subject: [PATCH 13/29] docs: add parallelization details --- R/arrayCompartments.R | 1 + R/scCompartments.R | 46 +++++++++++++++++++++++++++++++++++++++ man/arrayCompartments.Rd | 46 +++++++++++++++++++++++++++++++++++++++ man/scCompartments.Rd | 47 +++++++++++++++++++++++++++++++++++++++- 4 files changed, 139 insertions(+), 1 deletion(-) diff --git a/R/arrayCompartments.R b/R/arrayCompartments.R index 43978d79..aab2867d 100644 --- a/R/arrayCompartments.R +++ b/R/arrayCompartments.R @@ -17,6 +17,7 @@ #' @param boot.parallel Whether to run the bootstrapping in parallel. See details. #' @param BPPARAM BiocParallelParam object to use for parallelization. See details. #' +#' @inherit scCompartments details #' #' @return A RaggedExperiment of inferred compartments #' @import SummarizedExperiment diff --git a/R/scCompartments.R b/R/scCompartments.R index 42a0ba4e..c2cca96f 100644 --- a/R/scCompartments.R +++ b/R/scCompartments.R @@ -15,6 +15,52 @@ #' @param boot.parallel Whether to run the bootstrapping in parallel. See details. #' @param BPPARAM BiocParallelParam object to use for parallelization. See details. #' +#' @details +#' +#' compartmap uses `BiocParallel` to parallelize operations in four +#' configurations. The default setting is to parallelize across columns but not +#' bootstraps using the thread count as reported by `BiocParallel::bpparam()`, +#' which is usually two cores fewer than the number of available cores. +#' Parallel bootstrapping is disabled by default to avoid nested parallelism +#' issues but can be done independent of column-wise parallelization. +#' + +#' ## Available configurations +#' +#' ### Serial bootstrapping +#' +#' - Serially with just one core: +#' `BPPARAM = BiocParallel::SerialParam()` +#' +#' - Parallel across columns and serially across bootstraps: +#' `BPPARAM = BiocParallel::MulticoreParam(n)` where `n` is the number of +#' threads to use +#' +#' See `?BiocParallel::BiocParallelParam` for other parallel backends. Parallel +#' backends may also be passed to `BiocParallel::register()` to make them +#' available to `bpparam()`. + +#' ### Parallel bootstrapping +#' +#' Set `boot.parallel = TRUE` for one the these configurations: +#' +#' - Serially across columns and parallel across bootstraps: Set `BPPARAM = +#' list(SerialParam(), MulticoreParam(n))' +#' +#' - Parallel across both columns and bootstraps: Set `BPPARAM = +#' list(MulticoreParam(outer), MulticoreParam(inner))` where `outer` is the +#' thread count for column-wise operations and `inner` the thread count for +#' bootstrapping. The required number of threads is given by +#' +#' `( outer * inner ) + outer` +#' +#' We recommend using an explicit list of two BiocParallelParam backends over +#' relying on `register()` and `bpparam()` for parallelizing across bootstraps. +#' With nested `bplapply` calls, the registered backend is used for both the +#' outer and inner parallel loops. On a system with 8 available threads if the +#' registered backend asks for 4 workers, it will try to use 20 threads in the +#' nested loops. Instead to use all 8 cores, set +#' `BPPARAM = list(MulticoreParam(2), MulticoreParam(3))`. #' #' @return A RaggedExperiment of inferred compartments #' @import SummarizedExperiment diff --git a/man/arrayCompartments.Rd b/man/arrayCompartments.Rd index a9bbc0fe..8ac6ea61 100644 --- a/man/arrayCompartments.Rd +++ b/man/arrayCompartments.Rd @@ -53,6 +53,52 @@ A RaggedExperiment of inferred compartments \description{ \code{arrayCompartments} returns estimated A/B compartments from methylation array data. } +\details{ +compartmap uses \code{BiocParallel} to parallelize operations in four +configurations. The default setting is to parallelize across columns but not +bootstraps using the thread count as reported by \code{BiocParallel::bpparam()}, +which is usually two cores fewer than the number of available cores. +Parallel bootstrapping is disabled by default to avoid nested parallelism +issues but can be done independent of column-wise parallelization. +\subsection{Available configurations}{ +\subsection{Serial bootstrapping}{ +\itemize{ +\item Serially with just one core: +\code{BPPARAM = BiocParallel::SerialParam()} +\item Parallel across columns and serially across bootstraps: +\code{BPPARAM = BiocParallel::MulticoreParam(n)} where \code{n} is the number of +threads to use +} + +See \code{?BiocParallel::BiocParallelParam} for other parallel backends. Parallel +backends may also be passed to \code{BiocParallel::register()} to make them +available to \code{bpparam()}. +} + +\subsection{Parallel bootstrapping}{ + +Set \code{boot.parallel = TRUE} for one the these configurations: +\itemize{ +\item Serially across columns and parallel across bootstraps: Set `BPPARAM = +list(SerialParam(), MulticoreParam(n))' +\item Parallel across both columns and bootstraps: Set \code{BPPARAM = list(MulticoreParam(outer), MulticoreParam(inner))} where \code{outer} is the +thread count for column-wise operations and \code{inner} the thread count for +bootstrapping. The required number of threads is given by +} + +\code{( outer * inner ) + outer} + +We recommend using an explicit list of two BiocParallelParam backends over +relying on \code{register()} and \code{bpparam()} for parallelizing across bootstraps. +With nested \code{bplapply} calls, the registered backend is used for both the +outer and inner parallel loops. On a system with 8 available threads if the +registered backend asks for 4 workers, it will try to use 20 threads in the +nested loops. Instead to use all 8 cores, set +\code{BPPARAM = list(MulticoreParam(2), MulticoreParam(3))}. +} + +} +} \examples{ if (requireNamespace("minfi", quietly = TRUE)) { diff --git a/man/scCompartments.Rd b/man/scCompartments.Rd index 97c8182e..45c9b5e8 100644 --- a/man/scCompartments.Rd +++ b/man/scCompartments.Rd @@ -47,12 +47,57 @@ A RaggedExperiment of inferred compartments \description{ \code{scCompartments} returns estimated A/B compartments from sc-seq data. } +\details{ +compartmap uses \code{BiocParallel} to parallelize operations in four +configurations. The default setting is to parallelize across columns but not +bootstraps using the thread count as reported by \code{BiocParallel::bpparam()}, +which is usually two cores fewer than the number of available cores. +Parallel bootstrapping is disabled by default to avoid nested parallelism +issues but can be done independent of column-wise parallelization. +\subsection{Available configurations}{ +\subsection{Serial bootstrapping}{ +\itemize{ +\item Serially with just one core: +\code{BPPARAM = BiocParallel::SerialParam()} +\item Parallel across columns and serially across bootstraps: +\code{BPPARAM = BiocParallel::MulticoreParam(n)} where \code{n} is the number of +threads to use +} + +See \code{?BiocParallel::BiocParallelParam} for other parallel backends. Parallel +backends may also be passed to \code{BiocParallel::register()} to make them +available to \code{bpparam()}. +} + +\subsection{Parallel bootstrapping}{ + +Set \code{boot.parallel = TRUE} for one the these configurations: +\itemize{ +\item Serially across columns and parallel across bootstraps: Set `BPPARAM = +list(SerialParam(), MulticoreParam(n))' +\item Parallel across both columns and bootstraps: Set \code{BPPARAM = list(MulticoreParam(outer), MulticoreParam(inner))} where \code{outer} is the +thread count for column-wise operations and \code{inner} the thread count for +bootstrapping. The required number of threads is given by +} + +\code{( outer * inner ) + outer} + +We recommend using an explicit list of two BiocParallelParam backends over +relying on \code{register()} and \code{bpparam()} for parallelizing across bootstraps. +With nested \code{bplapply} calls, the registered backend is used for both the +outer and inner parallel loops. On a system with 8 available threads if the +registered backend asks for 4 workers, it will try to use 20 threads in the +nested loops. Instead to use all 8 cores, set +\code{BPPARAM = list(MulticoreParam(2), MulticoreParam(3))}. +} + +} +} \examples{ data("k562_scrna_chr14", package = "compartmap") sc_compartments <- scCompartments( k562_scrna_chr14, chr = "chr14", - parallel = FALSE, bootstrap = FALSE, genome = "hg19" ) From f0e3019451c9438de74f1b82ceede865ee5422b7 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 13 Nov 2025 11:12:03 -0500 Subject: [PATCH 14/29] docs(vignettes): update call calls with BPPARAM --- vignettes/compartmap.Rmd | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/vignettes/compartmap.Rmd b/vignettes/compartmap.Rmd index 86324010..5aa79522 100644 --- a/vignettes/compartmap.Rmd +++ b/vignettes/compartmap.Rmd @@ -54,7 +54,8 @@ k562_compartments <- scCompartments( group = TRUE, bootstrap = FALSE, genome = "hg19", - assay = "rna" + assay = "rna", + BPPARAM = BiocParallel::MulticoreParam(2) ) ``` @@ -77,7 +78,8 @@ k562_compartments.boot <- scCompartments( bootstrap = TRUE, num.bootstraps = 10, genome = "hg19", - assay = "rna" + assay = "rna", + BPPARAM = BiocParallel::MulticoreParam(2) ) # Flip the domain sign if the sign coherence is discordant in 80% of the bootstraps @@ -280,7 +282,8 @@ k562_scrna_chr14_raw_domains <- scCompartments(k562_scrna_se_chr14, bootstrap = TRUE, num.bootstraps = 10, genome = "hg19", - assay = "rna" + assay = "rna", + BPPARAM = BiocParallel::MulticoreParam(2) ) ``` From edd022784e6b1d1f6ec92fdb8dfd5ab1a0ce770a Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 13 Nov 2025 11:18:37 -0500 Subject: [PATCH 15/29] add logging --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index 19e01d04..dc5a5f79 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,6 +27,7 @@ Depends: Imports: BiocSingular, BiocParallel, + futile.logger, rlang, S4Vectors, IRanges, From b4326ddc88a9ae18f76804de98e58423c1936370 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 13 Nov 2025 11:26:49 -0500 Subject: [PATCH 16/29] fix(getCompartments): remove bpparams setting done by get_nested_params --- R/getCompartments.R | 6 ------ 1 file changed, 6 deletions(-) diff --git a/R/getCompartments.R b/R/getCompartments.R index 414941a6..48831fc1 100644 --- a/R/getCompartments.R +++ b/R/getCompartments.R @@ -35,12 +35,6 @@ getCompartments <- function( ) } - if (boot.parallel) { - innerBPPARAM <- bpparams[[2]] - } else { - innerBPPARAM <- BiocParallel::SerialParam() - } - if (group) { message("Computing group level compartments") compartments.list <- bplapply( From 34488deba02c5a6cddd9359efcf2edb120153227 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 13 Nov 2025 12:30:45 -0500 Subject: [PATCH 17/29] fix(logging): use futil.logger and move most to debug level reduce verbosity but configurable --- NAMESPACE | 2 ++ R/bootstrapCompartments.R | 5 ----- R/fixCompartments.R | 6 +++--- R/getABSignal.R | 6 +++--- R/getBinMatrix.R | 2 +- R/getCompartments.R | 21 ++++++++++++++------- R/getCorMatrix.R | 4 ++-- R/getGlobalMeans.R | 4 ++-- R/meanSmoother.R | 2 +- R/plotAB.R | 2 +- R/preprocessArrays.R | 4 ++-- R/shrinkBins.R | 2 +- R/summarizeBootstraps.R | 4 ++-- 13 files changed, 34 insertions(+), 30 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 38f41365..e498f7ac 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -62,6 +62,8 @@ importFrom(GenomeInfoDb,seqlengths) importFrom(GenomeInfoDb,seqlevels) importFrom(S4Vectors,queryHits) importFrom(S4Vectors,subjectHits) +importFrom(futile.logger,flog.debug) +importFrom(futile.logger,flog.info) importFrom(ggplot2,aes) importFrom(ggplot2,element_blank) importFrom(ggplot2,geom_raster) diff --git a/R/bootstrapCompartments.R b/R/bootstrapCompartments.R index a129faad..7a835302 100644 --- a/R/bootstrapCompartments.R +++ b/R/bootstrapCompartments.R @@ -59,11 +59,6 @@ bootstrapCompartments <- function( } # if (ncol(original.obj) < 6) stop("We need more than 5 samples to bootstrap with for the results to be meaningful.") - # if (parallel) { - # message("Bootstrapping in parallel with ", cores, " cores.") - # } else { - # message("Not bootstrapping in parallel will take a long time...") - # } # bootstrap and recompute compartments resamp.compartments <- bplapply( diff --git a/R/fixCompartments.R b/R/fixCompartments.R index 8485a952..af502336 100644 --- a/R/fixCompartments.R +++ b/R/fixCompartments.R @@ -22,7 +22,7 @@ fixCompartments <- function(obj, min.conf = 0.8, parallel = FALSE, cores = 1) { return(flipper(obj, min.conf)) } - message("Fixing compartments using a minimum confidence score of ", min.conf * 100, "%") + flog.info("Fixing compartments using a minimum confidence score of ", min.conf * 100, "%") # go through and invert compartments based on the min.conf flip_compartments_lst <- mclapply(obj, flipper, min.conf, mc.cores = ifelse(parallel, cores, 1)) names(flip_compartments_lst) <- names(obj) @@ -41,8 +41,8 @@ flipper <- function(input_obj, min.conf) { stop("Bootstrapping was not performed. Cannot fix compartments.") } - message("Assuming we only have a single sample to process.") - message("Fixing compartments using a minimum confidence score of ", min.conf * 100, "%") + flog.debug("Assuming we only have a single sample to process.") + flog.debug("Fixing compartments using a minimum confidence score of %f %%", min.conf * 100) invert_compartments <- apply(mcols(input_obj), 1, .inverter, min.conf) mcols(input_obj)$flip.compartment <- invert_compartments diff --git a/R/getABSignal.R b/R/getABSignal.R index e36a6901..a68ca690 100644 --- a/R/getABSignal.R +++ b/R/getABSignal.R @@ -52,13 +52,13 @@ getABSignal <- function(x, squeeze = FALSE, assay = c("rna", "atac", "array")) { assay <- match.arg(assay) gr <- x$gr - message("Calculating eigenvectors.") + flog.debug("Calculating eigenvectors.") pc <- getSVD(x$binmat.cor, sing.vec = "right") if (squeeze) pc <- ifisherZ(pc) - message("Smoothing eigenvector.") + flog.debug("Smoothing eigenvector.") gr$pc <- meanSmoother(pc) - message("Done smoothing.") + flog.debug("Done smoothing.") gr$compartments <- extractOpenClosed(gr, assay = assay) return(gr) diff --git a/R/getBinMatrix.R b/R/getBinMatrix.R index 5899c6c0..e16be4e0 100644 --- a/R/getBinMatrix.R +++ b/R/getBinMatrix.R @@ -68,7 +68,7 @@ getBinMatrix <- function( ids <- findOverlaps(genloc, gr.bin, select = "first") binCount <- length(gr.bin) - message(binCount, " bins created...") + flog.debug("%d bins created...", binCount) mat.bin <- apply(mat, 2, function(x) { .summarizeBins(x, binCount, ids, FUN) diff --git a/R/getCompartments.R b/R/getCompartments.R index 48831fc1..b89a4ec2 100644 --- a/R/getCompartments.R +++ b/R/getCompartments.R @@ -1,5 +1,6 @@ #' Run compartment inference #' @importFrom BiocParallel bplapply +#' @importFrom futile.logger flog.info flog.debug getCompartments <- function( obj, res, @@ -14,7 +15,7 @@ getCompartments <- function( bpparams ) { if (is.null(chr)) { - message("Assuming we want to process all chromosomes.") + flog.info("Assuming we want to process all chromosomes.") chr <- getChrs(obj) } @@ -25,7 +26,7 @@ getCompartments <- function( prior.means <- getGlobalMeans(obj = obj, targets = targets, assay = assay) if (bootstrap) { - message("Pre-computing the bootstrap global means.") + flog.info("Pre-computing the bootstrap global means.") bmeans <- precomputeBootstrapMeans( obj = obj, BPPARAM = bpparams[[1]], @@ -35,8 +36,14 @@ getCompartments <- function( ) } + if (boot.parallel) { + flog.info("Bootstrapping in parallel with %d cores", bpnworkers(bpparams[[2]])) + } else { + flog.info("Not bootstrapping in parallel will take a long time...") + } + if (group) { - message("Computing group level compartments") + flog.info("Computing group level compartments") compartments.list <- bplapply( chr, function(c) { @@ -44,7 +51,7 @@ getCompartments <- function( obj, obj, assay = assay, - BPPARAM = innerBPPARAM, + BPPARAM = bpparams[[2]], res = res, chr = c, group = group, @@ -63,7 +70,7 @@ getCompartments <- function( return(compartments) } - message("Computing single-cell level compartments") + flog.info("Computing single-cell level compartments") compartments <- bplapply( columns, function(s) { @@ -74,7 +81,7 @@ getCompartments <- function( obj.sub, obj, assay = assay, - BPPARAM = innerBPPARAM, + BPPARAM = bpparams[[2]], res = res, chr = c, group = group, @@ -114,7 +121,7 @@ getCompartments <- function( genome <- match.arg(genome) # update - message("Computing compartments for ", chr) + flog.debug("Computing compartments for %s", chr) obj <- keepSeqlevels(obj, chr, pruning.mode = "coarse") original.obj <- keepSeqlevels(original.obj, chr, pruning.mode = "coarse") diff --git a/R/getCorMatrix.R b/R/getCorMatrix.R index 914b6495..da2e68fb 100644 --- a/R/getCorMatrix.R +++ b/R/getCorMatrix.R @@ -43,7 +43,7 @@ #' # Calculate correlations #' getCorMatrix(bin.counts) getCorMatrix <- function(binmat, squeeze = FALSE) { - message("Calculating correlations...") + flog.debug("Calculating correlations...") # bind back up the global means and shrunken bins binmat$x <- cbind(binmat$x, binmat$gmeans) binmat.cor <- suppressWarnings(cor(t(binmat$x))) @@ -51,6 +51,6 @@ getCorMatrix <- function(binmat, squeeze = FALSE) { if (squeeze) { binmat.cor <- fisherZ(binmat.cor) } - message("Done...") + flog.debug("Done...") list(gr.cor = binmat$gr, binmat.cor = binmat.cor) } diff --git a/R/getGlobalMeans.R b/R/getGlobalMeans.R index 6a71cf92..cf4ab83b 100644 --- a/R/getGlobalMeans.R +++ b/R/getGlobalMeans.R @@ -30,7 +30,7 @@ getGlobalMeans <- function(obj, targets = NULL, assay = c("atac", "rna", "array" # check if shrinkage targets are being used if (!is.null(targets)) { stargets <- getShrinkageTargets(obj, targets) - message("Using ", paste(shQuote(targets), collapse = ", "), " as shrinkage targets...") + flog.debug("Using %s as shrinkage targets", paste(shQuote(targets), collapse = ", ")) globalMean.input <- stargets } else { globalMean.input <- obj @@ -86,7 +86,7 @@ precomputeBootstrapMeans <- function( bootMean <- bplapply( 1:num.bootstraps, function(b) { - # message("Working on bootstrap ", b) + flog.debug("Working on bootstrap ", b) resamp.mat <- .resampleMatrix(assay.data) computeGlobalMean(resamp.mat) }, diff --git a/R/meanSmoother.R b/R/meanSmoother.R index 09be0d7d..f645d2c5 100644 --- a/R/meanSmoother.R +++ b/R/meanSmoother.R @@ -22,7 +22,7 @@ #' @export meanSmoother <- function(mat, k = 1, iters = 2, delta = 0, weights = NULL) { if (k == 0) { - message("Returning unsmoothed mat as 'k' = 0") + flog.debug("Returning unsmoothed mat as 'k' = 0") return(mat) } diff --git a/R/plotAB.R b/R/plotAB.R index 8b4db5b6..d1040a6e 100644 --- a/R/plotAB.R +++ b/R/plotAB.R @@ -113,7 +113,7 @@ plotAB <- function( NAs <- is.na(x) x[!NAs] <- x[!NAs] / sqrt(sum(x[!NAs]^2)) na.count <- sum(NAs) - if (na.count > 0) message(sprintf("[.unitarize] %i missing values were ignored.\n", na.count)) + if (na.count > 0) flog.debug("[.unitarize] %i missing values were ignored.\n", na.count) x } diff --git a/R/preprocessArrays.R b/R/preprocessArrays.R index 4e24d220..1d3694c9 100644 --- a/R/preprocessArrays.R +++ b/R/preprocessArrays.R @@ -40,13 +40,13 @@ preprocessArrays <- function( # this should be default but allows handling if given M-values in Beta slot is.beta <- min(assays(obj)$Beta, na.rm = TRUE) > 0 if (is.beta) { - message("Converting to squeezed M-values.") + flog.debug("Converting to squeezed M-values.") assays(obj.opensea)$Beta <- flogit(assays(obj.opensea)$Beta) } # impute missing values if possible if (any(is.na(minfi::getBeta(obj.opensea)))) { - message("Imputing missing values.") + flog.debug("Imputing missing values.") obj.opensea <- imputeKNN(obj.opensea, assay = "array") } diff --git a/R/shrinkBins.R b/R/shrinkBins.R index 136f4a67..baace78c 100644 --- a/R/shrinkBins.R +++ b/R/shrinkBins.R @@ -55,7 +55,7 @@ shrinkBins <- function( if (target.count == 1) { stop("Cannot perform targeted bin-level shrinkage with one target sample.") } else if (target.count < 4) { - message("Number of means fewer than 4. Using Bayes instead of JSE.") + flog.debug("Number of means fewer than 4. Using Bayes instead of JSE.") jse <- FALSE } } diff --git a/R/summarizeBootstraps.R b/R/summarizeBootstraps.R index b540f354..df8c4d19 100644 --- a/R/summarizeBootstraps.R +++ b/R/summarizeBootstraps.R @@ -21,7 +21,7 @@ summarizeBootstraps <- function(boot.list, est.ab, q = 0.95, assay = c("rna", "a is.atac_or_rna <- assay %in% c("atac", "rna") - message("Summarizing bootstraps.") + flog.debug("Summarizing bootstraps") # filter out failed compartment estimates boot.list <- removeEmptyBoots(boot.list) @@ -37,7 +37,7 @@ summarizeBootstraps <- function(boot.list, est.ab, q = 0.95, assay = c("rna", "a est.ab$boot.open <- .getBootRowSums(1) est.ab$boot.closed <- .getBootRowSums(2) - message("Computing Agresti-Coull 95% confidence intervals.") + flog.debug("Computing Agresti-Coull 95% confidence intervals") .getCI(est.ab, q) } From af5477ac6f835448d32b6c01c25dc3f4e24c6eac Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 13 Nov 2025 12:31:57 -0500 Subject: [PATCH 18/29] fix(bootstrap): don't show progress bar in the bootstrap step pollutes progress info with too many bars --- R/bootstrapCompartments.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/bootstrapCompartments.R b/R/bootstrapCompartments.R index 7a835302..07f14a30 100644 --- a/R/bootstrapCompartments.R +++ b/R/bootstrapCompartments.R @@ -61,6 +61,7 @@ bootstrapCompartments <- function( # if (ncol(original.obj) < 6) stop("We need more than 5 samples to bootstrap with for the results to be meaningful.") # bootstrap and recompute compartments + BiocParallel::bpprogressbar(BPPARAM) <- FALSE resamp.compartments <- bplapply( 1:ncol(bmeans), function(b) { From fffcd1bdd537e0f5250dbdb5af06087a53640242 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 13 Nov 2025 13:07:59 -0500 Subject: [PATCH 19/29] fix(parallel): correctly calculate total when outer/inner is 1 --- R/parallel.R | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/R/parallel.R b/R/parallel.R index 908afa46..aa7dabd4 100644 --- a/R/parallel.R +++ b/R/parallel.R @@ -2,19 +2,31 @@ #' @keywords internal check_worker_count <- function(bpparam, boot.parallel, avail_workers = parallelly::availableCores()) { workers <- get_bpnworkers(bpparam) - total <- sum(Reduce(`*`, workers), workers[1]) + total <- required_workers(workers) if (verify_workers(total)) { return(TRUE) } - msg <- sprintf( - "Using %1$d outer and %2$d inner workers would require %3$d workers (%1$d + (%1$d x %2$d)) but your system has only %4$d cores. - See parallelly::availableCores() for more information on available resources", - workers[1], - workers[2], - total, - avail_workers - ) + avail_msg <- sprintf("but your system has only %d cores", avail_workers) + info_msg <- "See parallelly::availableCores(which = 'all') for more information on available resources" + if (workers[1] == 1 | workers[2] == 1) { + msg <- sprintf( + "Requested %d %s workers %s\n%s", + max(workers), + ifelse(workers[1] == 1, "inner", "outer"), + avail_msg, + info_msg + ) + } else { + msg <- sprintf( + "Requested %1$d outer and %2$d inner workers that require %3$d total workers (%1$d + (%1$d x %2$d)) %4$s\n%5$s", + workers[1], + workers[2], + total, + avail_msg, + info_msg + ) + } stop(msg) } @@ -34,6 +46,16 @@ bpnworkers.list <- function(bplist) { unlist(Map(bpnworkers, bplist)) } +required_workers <- function(workers) { + if (workers[1] == 1) { + return(workers[2]) + } + if (workers[2] == 1) { + return(workers[1]) + } + sum(Reduce(`*`, workers), workers[1]) +} + #' Verify that the input BiocParallelParam is valid #' @param A BiocParallelParam or list of 2 BiocParallelParam objects #' @importFrom BiocParallel bpnworkers From 5f9cc9b075f6e561568b0b7e0839c0a2b54a45e5 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 13 Nov 2025 13:15:45 -0500 Subject: [PATCH 20/29] fix(bootstrap): only message about slow bootstrap if bootstrapping --- R/getCompartments.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/getCompartments.R b/R/getCompartments.R index b89a4ec2..6bab517c 100644 --- a/R/getCompartments.R +++ b/R/getCompartments.R @@ -36,9 +36,9 @@ getCompartments <- function( ) } - if (boot.parallel) { + if (bootstrap & boot.parallel) { flog.info("Bootstrapping in parallel with %d cores", bpnworkers(bpparams[[2]])) - } else { + } else if (bootstrap & !boot.parallel) { flog.info("Not bootstrapping in parallel will take a long time...") } From c29e089640ff4d5e43430b57936dba6387726d4e Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 13 Nov 2025 13:53:03 -0500 Subject: [PATCH 21/29] fix(logging): move bootstrap message after computing message --- R/getCompartments.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/R/getCompartments.R b/R/getCompartments.R index 6bab517c..a602e736 100644 --- a/R/getCompartments.R +++ b/R/getCompartments.R @@ -36,14 +36,16 @@ getCompartments <- function( ) } + boot_msg <- "" if (bootstrap & boot.parallel) { - flog.info("Bootstrapping in parallel with %d cores", bpnworkers(bpparams[[2]])) + boot_msg <- sprintf("Bootstrapping in parallel with %d cores", bpnworkers(bpparams[[2]])) } else if (bootstrap & !boot.parallel) { - flog.info("Not bootstrapping in parallel will take a long time...") + boot_msg <- "Not bootstrapping in parallel will take a long time..." } if (group) { flog.info("Computing group level compartments") + flog.info(boot_msg) compartments.list <- bplapply( chr, function(c) { @@ -71,6 +73,7 @@ getCompartments <- function( } flog.info("Computing single-cell level compartments") + flog.info(boot_msg) compartments <- bplapply( columns, function(s) { From eeb7361b9f504e89207c61d7e55161a94d382d9c Mon Sep 17 00:00:00 2001 From: James Eapen Date: Fri, 14 Nov 2025 14:01:26 -0500 Subject: [PATCH 22/29] docs(parallel): add info on load balancing different configs --- R/scCompartments.R | 23 +++++++++++++++++++++++ man/arrayCompartments.Rd | 23 +++++++++++++++++++++++ man/scCompartments.Rd | 23 +++++++++++++++++++++++ 3 files changed, 69 insertions(+) diff --git a/R/scCompartments.R b/R/scCompartments.R index c2cca96f..a29dfa25 100644 --- a/R/scCompartments.R +++ b/R/scCompartments.R @@ -54,6 +54,8 @@ #' #' `( outer * inner ) + outer` #' +#' which is more easily calculated as `outer * (inner + 1)`. +#' #' We recommend using an explicit list of two BiocParallelParam backends over #' relying on `register()` and `bpparam()` for parallelizing across bootstraps. #' With nested `bplapply` calls, the registered backend is used for both the @@ -62,6 +64,27 @@ #' nested loops. Instead to use all 8 cores, set #' `BPPARAM = list(MulticoreParam(2), MulticoreParam(3))`. #' +#' ### Load balancing +#' +#' Unless you have only 1 chromosome or are not bootstrapping/not bootstrapping +#' in parallel, you can use nested parallelism. If you are working on just 1 +#' chromosome, put all cores into the inner bootstrapping backend. Conversely +#' with multiple chromosmes without bootstrapping, put all available workers in +#' the outer loop. +#' +#' In general, use more 'outer' workers, which loop over chromosmes when `group +#' = TRUE` and cells when `group = FALSE`, than 'inner' workers that loop over +#' bootstraps. Using 8 outer and 7 inner workers is faster than 7 outer and 8 +#' inner. +#' +#' When `group = FALSE`, use `MulticoreParam()` only on the outer workers. We +#' find that parallelizing at both column and bootstrap levels with the +#' single-cell inference is slower than only parallelizing at the column-level. +#' +#' With `group = TRUE`, minimize the difference between the two worker counts: +#' with 64 total cores, doing 8 outer and 7 inner is faster than 16 outer and 3 +#' inner. +#' #' @return A RaggedExperiment of inferred compartments #' @import SummarizedExperiment #' @import RaggedExperiment diff --git a/man/arrayCompartments.Rd b/man/arrayCompartments.Rd index 8ac6ea61..1162e663 100644 --- a/man/arrayCompartments.Rd +++ b/man/arrayCompartments.Rd @@ -88,6 +88,8 @@ bootstrapping. The required number of threads is given by \code{( outer * inner ) + outer} +which is more easily calculated as \code{outer * (inner + 1)}. + We recommend using an explicit list of two BiocParallelParam backends over relying on \code{register()} and \code{bpparam()} for parallelizing across bootstraps. With nested \code{bplapply} calls, the registered backend is used for both the @@ -97,6 +99,27 @@ nested loops. Instead to use all 8 cores, set \code{BPPARAM = list(MulticoreParam(2), MulticoreParam(3))}. } +\subsection{Load balancing}{ + +Unless you have only 1 chromosome or are not bootstrapping/not bootstrapping +in parallel, you can use nested parallelism. If you are working on just 1 +chromosome, put all cores into the inner bootstrapping backend. Conversely +with multiple chromosmes without bootstrapping, put all available workers in +the outer loop. + +In general, use more 'outer' workers, which loop over chromosmes when \code{group = TRUE} and cells when \code{group = FALSE}, than 'inner' workers that loop over +bootstraps. Using 8 outer and 7 inner workers is faster than 7 outer and 8 +inner. + +When \code{group = FALSE}, use \code{MulticoreParam()} only on the outer workers. We +find that parallelizing at both column and bootstrap levels with the +single-cell inference is slower than only parallelizing at the column-level. + +With \code{group = TRUE}, minimize the difference between the two worker counts: +with 64 total cores, doing 8 outer and 7 inner is faster than 16 outer and 3 +inner. +} + } } \examples{ diff --git a/man/scCompartments.Rd b/man/scCompartments.Rd index 45c9b5e8..a32103ae 100644 --- a/man/scCompartments.Rd +++ b/man/scCompartments.Rd @@ -82,6 +82,8 @@ bootstrapping. The required number of threads is given by \code{( outer * inner ) + outer} +which is more easily calculated as \code{outer * (inner + 1)}. + We recommend using an explicit list of two BiocParallelParam backends over relying on \code{register()} and \code{bpparam()} for parallelizing across bootstraps. With nested \code{bplapply} calls, the registered backend is used for both the @@ -91,6 +93,27 @@ nested loops. Instead to use all 8 cores, set \code{BPPARAM = list(MulticoreParam(2), MulticoreParam(3))}. } +\subsection{Load balancing}{ + +Unless you have only 1 chromosome or are not bootstrapping/not bootstrapping +in parallel, you can use nested parallelism. If you are working on just 1 +chromosome, put all cores into the inner bootstrapping backend. Conversely +with multiple chromosmes without bootstrapping, put all available workers in +the outer loop. + +In general, use more 'outer' workers, which loop over chromosmes when \code{group = TRUE} and cells when \code{group = FALSE}, than 'inner' workers that loop over +bootstraps. Using 8 outer and 7 inner workers is faster than 7 outer and 8 +inner. + +When \code{group = FALSE}, use \code{MulticoreParam()} only on the outer workers. We +find that parallelizing at both column and bootstrap levels with the +single-cell inference is slower than only parallelizing at the column-level. + +With \code{group = TRUE}, minimize the difference between the two worker counts: +with 64 total cores, doing 8 outer and 7 inner is faster than 16 outer and 3 +inner. +} + } } \examples{ From cb2bdc88330cf8e680389df4ea5375ba794d9095 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Fri, 14 Nov 2025 14:03:24 -0500 Subject: [PATCH 23/29] feat(parallel): check if user BPPARAM settings are optimal --- R/arrayCompartments.R | 2 +- R/parallel.R | 22 +++++++++++++++++++++- R/scCompartments.R | 2 +- man/check_worker_count.Rd | 4 +++- 4 files changed, 26 insertions(+), 4 deletions(-) diff --git a/R/arrayCompartments.R b/R/arrayCompartments.R index aab2867d..511d1432 100644 --- a/R/arrayCompartments.R +++ b/R/arrayCompartments.R @@ -59,7 +59,7 @@ arrayCompartments <- function( verifyCoords(obj) verifyAssayNames(obj, assay = "array") bpparams <- get_nested_params(BPPARAM) - check_worker_count(bpparams) + check_worker_count(bpparams, group, length(chr), bootstrap) # preprocess the arrays if (preprocess) { diff --git a/R/parallel.R b/R/parallel.R index aa7dabd4..af82a62e 100644 --- a/R/parallel.R +++ b/R/parallel.R @@ -1,9 +1,29 @@ +#' Check and tell users whether input BPPARAM are optimal given group/sc +#' inference and bootstrapping +#' @keywords internal +check_optim <- function(workers, group, chr_count, bootstrap) { + if (group & chr_count < workers[1]) { + flog.info( + "Grouped inference with more outer workers than chromosmes leaves %d of %d workers unused", + workers[1] - chr_count, + workers[1] + ) + if (bootstrap) { + flog.info("Consider using a single core for the outer worker and more cores for the inner bootstrap worker") + } + } + if (!group & bootstrap & workers[1] < workers[2]) { + flog.info("More outer (column-wise) than inner (bootstrap) workers is faster for single-cell inference") + } +} + #' Check that the number of requested workers is valid #' @keywords internal -check_worker_count <- function(bpparam, boot.parallel, avail_workers = parallelly::availableCores()) { +check_worker_count <- function(bpparam, group, chr_count, bootstrap, avail_workers = parallelly::availableCores()) { workers <- get_bpnworkers(bpparam) total <- required_workers(workers) if (verify_workers(total)) { + check_optim(workers, group, chr_count, bootstrap) return(TRUE) } diff --git a/R/scCompartments.R b/R/scCompartments.R index a29dfa25..a1986a8f 100644 --- a/R/scCompartments.R +++ b/R/scCompartments.R @@ -115,7 +115,7 @@ scCompartments <- function( verifyCoords(obj) bpparams <- get_nested_params(BPPARAM, boot.parallel) - check_worker_count(bpparams) + check_worker_count(bpparams, group, length(chr), bootstrap) # which assay are we working on if (!all(assay %in% c("atac", "rna"))) stop("Supported assays are 'atac', and 'rna'.") diff --git a/man/check_worker_count.Rd b/man/check_worker_count.Rd index 62d20728..5dd80962 100644 --- a/man/check_worker_count.Rd +++ b/man/check_worker_count.Rd @@ -6,7 +6,9 @@ \usage{ check_worker_count( bpparam, - boot.parallel, + group, + chr_count, + bootstrap, avail_workers = parallelly::availableCores() ) } From dbd880083f629b1b1815cf1f27f435df7faa51c4 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Fri, 14 Nov 2025 14:03:41 -0500 Subject: [PATCH 24/29] fix(bootstrapping): don't warn about slow serial bootstrapping Its only slower than parallel when group = TRUE --- R/getCompartments.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/getCompartments.R b/R/getCompartments.R index a602e736..25a4e53f 100644 --- a/R/getCompartments.R +++ b/R/getCompartments.R @@ -39,8 +39,8 @@ getCompartments <- function( boot_msg <- "" if (bootstrap & boot.parallel) { boot_msg <- sprintf("Bootstrapping in parallel with %d cores", bpnworkers(bpparams[[2]])) - } else if (bootstrap & !boot.parallel) { - boot_msg <- "Not bootstrapping in parallel will take a long time..." + } else if (bootstrap & !boot.parallel & group) { + boot_msg <- "Not bootstrapping in parallel could take a long time..." } if (group) { From 8dbaa4c7c56a562a7f933defeb2db69bea27d579 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Fri, 19 Dec 2025 11:09:22 -0500 Subject: [PATCH 25/29] fix(parallel): add missing SerialParam import --- NAMESPACE | 1 + R/parallel.R | 1 + 2 files changed, 2 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index e498f7ac..15f612b7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -49,6 +49,7 @@ import(HDF5Array) import(Matrix) import(RaggedExperiment) import(SummarizedExperiment) +importFrom(BiocParallel,SerialParam) importFrom(BiocParallel,bplapply) importFrom(BiocParallel,bpnworkers) importFrom(BiocParallel,bpparam) diff --git a/R/parallel.R b/R/parallel.R index af82a62e..b8a8b006 100644 --- a/R/parallel.R +++ b/R/parallel.R @@ -100,6 +100,7 @@ verify_workers <- function(n_workers) { #' The outer param is across the input samples/columns and the second is for #' bootstrapping. If `boot.parallel` is FALSE, the inner param is set to #' `SerialParam`. +#' @importFrom BiocParallel SerialParam #' @keywords internal get_nested_params <- function(BPPARAM, boot.parallel) { stopifnot("Only two BiocParallelParam objects can be used" = length(BPPARAM) <= 2) From b9c6162ed9fdf2bfdd1c1ae72834bade17fccc50 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Fri, 19 Dec 2025 11:24:26 -0500 Subject: [PATCH 26/29] chore(parallel): fix 'chromosme' typo --- R/parallel.R | 2 +- R/scCompartments.R | 4 ++-- man/arrayCompartments.Rd | 4 ++-- man/scCompartments.Rd | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/R/parallel.R b/R/parallel.R index b8a8b006..93e5faa7 100644 --- a/R/parallel.R +++ b/R/parallel.R @@ -4,7 +4,7 @@ check_optim <- function(workers, group, chr_count, bootstrap) { if (group & chr_count < workers[1]) { flog.info( - "Grouped inference with more outer workers than chromosmes leaves %d of %d workers unused", + "Grouped inference with more outer workers than chromosomes leaves %d of %d workers unused", workers[1] - chr_count, workers[1] ) diff --git a/R/scCompartments.R b/R/scCompartments.R index a1986a8f..7a839d6f 100644 --- a/R/scCompartments.R +++ b/R/scCompartments.R @@ -69,10 +69,10 @@ #' Unless you have only 1 chromosome or are not bootstrapping/not bootstrapping #' in parallel, you can use nested parallelism. If you are working on just 1 #' chromosome, put all cores into the inner bootstrapping backend. Conversely -#' with multiple chromosmes without bootstrapping, put all available workers in +#' with multiple chromosomes without bootstrapping, put all available workers in #' the outer loop. #' -#' In general, use more 'outer' workers, which loop over chromosmes when `group +#' In general, use more 'outer' workers, which loop over chromosomes when `group #' = TRUE` and cells when `group = FALSE`, than 'inner' workers that loop over #' bootstraps. Using 8 outer and 7 inner workers is faster than 7 outer and 8 #' inner. diff --git a/man/arrayCompartments.Rd b/man/arrayCompartments.Rd index 1162e663..57301f69 100644 --- a/man/arrayCompartments.Rd +++ b/man/arrayCompartments.Rd @@ -104,10 +104,10 @@ nested loops. Instead to use all 8 cores, set Unless you have only 1 chromosome or are not bootstrapping/not bootstrapping in parallel, you can use nested parallelism. If you are working on just 1 chromosome, put all cores into the inner bootstrapping backend. Conversely -with multiple chromosmes without bootstrapping, put all available workers in +with multiple chromosomes without bootstrapping, put all available workers in the outer loop. -In general, use more 'outer' workers, which loop over chromosmes when \code{group = TRUE} and cells when \code{group = FALSE}, than 'inner' workers that loop over +In general, use more 'outer' workers, which loop over chromosomes when \code{group = TRUE} and cells when \code{group = FALSE}, than 'inner' workers that loop over bootstraps. Using 8 outer and 7 inner workers is faster than 7 outer and 8 inner. diff --git a/man/scCompartments.Rd b/man/scCompartments.Rd index a32103ae..eae9ca74 100644 --- a/man/scCompartments.Rd +++ b/man/scCompartments.Rd @@ -98,10 +98,10 @@ nested loops. Instead to use all 8 cores, set Unless you have only 1 chromosome or are not bootstrapping/not bootstrapping in parallel, you can use nested parallelism. If you are working on just 1 chromosome, put all cores into the inner bootstrapping backend. Conversely -with multiple chromosmes without bootstrapping, put all available workers in +with multiple chromosomes without bootstrapping, put all available workers in the outer loop. -In general, use more 'outer' workers, which loop over chromosmes when \code{group = TRUE} and cells when \code{group = FALSE}, than 'inner' workers that loop over +In general, use more 'outer' workers, which loop over chromosomes when \code{group = TRUE} and cells when \code{group = FALSE}, than 'inner' workers that loop over bootstraps. Using 8 outer and 7 inner workers is faster than 7 outer and 8 inner. From 2feb10bcf7a81adcdc47bfc5eb5acf933734de63 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Fri, 19 Dec 2025 11:24:59 -0500 Subject: [PATCH 27/29] docs(parallel): add check_optim --- man/check_optim.Rd | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 man/check_optim.Rd diff --git a/man/check_optim.Rd b/man/check_optim.Rd new file mode 100644 index 00000000..582e2772 --- /dev/null +++ b/man/check_optim.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parallel.R +\name{check_optim} +\alias{check_optim} +\title{Check and tell users whether input BPPARAM are optimal given group/sc +inference and bootstrapping} +\usage{ +check_optim(workers, group, chr_count, bootstrap) +} +\description{ +Check and tell users whether input BPPARAM are optimal given group/sc +inference and bootstrapping +} +\keyword{internal} From 29af8823d1b8fdc6f106f92e9d420afd0ae64de8 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Fri, 19 Dec 2025 12:01:20 -0500 Subject: [PATCH 28/29] chore(DESCRIPTION): add missing parallelly --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index dc5a5f79..8a33e12d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -34,6 +34,7 @@ Imports: GenomicRanges, GenomeInfoDb, parallel, + parallelly, methods, grid, ggplot2, From bccf57e77124ea9aebc982b994d181432a044e01 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Fri, 19 Dec 2025 12:34:47 -0500 Subject: [PATCH 29/29] test: remove message tests since they're now handled by futile.logger --- R/fixCompartments.R | 6 ++++-- tests/testthat/test-fixCompartments.R | 1 + tests/testthat/test-getCorMatrix.R | 4 ---- tests/testthat/test-getGlobalMeans.R | 8 ++++---- tests/testthat/test-meanSmoother.R | 5 ----- tests/testthat/test-plotAB.R | 6 +----- tests/testthat/test-summarizeBootstraps.R | 2 -- 7 files changed, 10 insertions(+), 22 deletions(-) diff --git a/R/fixCompartments.R b/R/fixCompartments.R index af502336..70c6b1dd 100644 --- a/R/fixCompartments.R +++ b/R/fixCompartments.R @@ -15,14 +15,16 @@ #' @examples fixCompartments <- function(obj, min.conf = 0.8, parallel = FALSE, cores = 1) { # this function will invert or "fix" compartments based on bootstrapping results - if (is(obj, "RaggedExperiment")) obj <- condenseSE(obj, sample.name = colnames(assay(obj))) + if (is(obj, "RaggedExperiment")) { + obj <- condenseSE(obj, sample.name = colnames(assay(obj))) + } # if we somehow only have 1 sample if (is(obj, "GRanges")) { return(flipper(obj, min.conf)) } - flog.info("Fixing compartments using a minimum confidence score of ", min.conf * 100, "%") + flog.info("Fixing compartments using a minimum confidence score of %d%%", min.conf * 100) # go through and invert compartments based on the min.conf flip_compartments_lst <- mclapply(obj, flipper, min.conf, mc.cores = ifelse(parallel, cores, 1)) names(flip_compartments_lst) <- names(obj) diff --git a/tests/testthat/test-fixCompartments.R b/tests/testthat/test-fixCompartments.R index e4b7cc28..afb87891 100644 --- a/tests/testthat/test-fixCompartments.R +++ b/tests/testthat/test-fixCompartments.R @@ -80,6 +80,7 @@ test_that("flipper", { }) test_that("fixCompartments", { + futile.logger::flog.threshold(1) lapply(1:length(gr.expected), function(i) { expect_equal( fixCompartments(gr.input[[i]], min.conf), diff --git a/tests/testthat/test-getCorMatrix.R b/tests/testthat/test-getCorMatrix.R index 50e1d101..acc855af 100644 --- a/tests/testthat/test-getCorMatrix.R +++ b/tests/testthat/test-getCorMatrix.R @@ -10,10 +10,6 @@ test_that("getCorMatrix", { expected.result <- list(gr.cor = gr, binmat.cor = expected.cormat) expected.result.squeezed <- list(gr.cor = gr, binmat.cor = expected.cormat.squeezed) - expect_message(getCorMatrix(binmat), "Calculating correlations") - expect_message(getCorMatrix(binmat), "Done") - expect_no_warning(expect_message(getCorMatrix(binmat))) - expect_equal(getCorMatrix(binmat), expected.result) expect_equal(getCorMatrix(binmat, squeeze = TRUE), expected.result.squeezed) }) diff --git a/tests/testthat/test-getGlobalMeans.R b/tests/testthat/test-getGlobalMeans.R index 28c84c99..48733685 100644 --- a/tests/testthat/test-getGlobalMeans.R +++ b/tests/testthat/test-getGlobalMeans.R @@ -65,18 +65,18 @@ test_that("getGlobalMeans", { test_that("precomputeBootstrapMeans", { expected.rownames <- as.character(gr) - boot.mean <- precomputeBootstrapMeans(se.rna, num.bootstraps = 2) + boot.mean <- precomputeBootstrapMeans(se.rna, BiocParallel::SerialParam(), num.bootstraps = 2) expect_equal(rownames(boot.mean), expected.rownames) lapply(1:50, function(boot_count) { - boot.mean <- precomputeBootstrapMeans(se.rna, num.bootstraps = boot_count) + boot.mean <- precomputeBootstrapMeans(se.rna, BiocParallel::SerialParam(), num.bootstraps = boot_count) expect_equal(ncol(boot.mean), boot_count) }) expect_error( - precomputeBootstrapMeans(se.rna, num.bootstraps = 2, targets = c(1:4)), + precomputeBootstrapMeans(se.rna, BiocParallel::SerialParam(), num.bootstraps = 2, targets = c(1:4)), "Need 5 or more samples for targeted bootstrapping to work." ) expect_no_error( - precomputeBootstrapMeans(se.rna, num.bootstraps = 2, targets = c(1:5)) + precomputeBootstrapMeans(se.rna, BiocParallel::SerialParam(), num.bootstraps = 2, targets = c(1:5)) ) }) diff --git a/tests/testthat/test-meanSmoother.R b/tests/testthat/test-meanSmoother.R index dc6b9fe9..2c00347b 100644 --- a/tests/testthat/test-meanSmoother.R +++ b/tests/testthat/test-meanSmoother.R @@ -1,10 +1,5 @@ test_that("meanSmoother", { mat <- matrix(1:5, nrow = 5, ncol = 5) - expect_message( - meanSmoother(mat, k = 0), - "Returning unsmoothed mat as 'k' = 0", - fixed = TRUE - ) expect_equal( meanSmoother(mat, k = 0), mat diff --git a/tests/testthat/test-plotAB.R b/tests/testthat/test-plotAB.R index d04b0867..3634abb3 100644 --- a/tests/testthat/test-plotAB.R +++ b/tests/testthat/test-plotAB.R @@ -34,11 +34,7 @@ test_that(".unitarize", { scaling <- sqrt(sum(centered[-4]^2)) centered / scaling } - expect_message( - compartmap:::.unitarize(vec.withNA, medianCenter = FALSE), - "[.unitarize] 1 missing values were ignored.", - fixed = TRUE - ) + expect_equal(compartmap:::.unitarize(vec.withNA), expected.withNA) expected.withNA.nomedian <- { diff --git a/tests/testthat/test-summarizeBootstraps.R b/tests/testthat/test-summarizeBootstraps.R index 4b6ec355..a330d3a2 100644 --- a/tests/testthat/test-summarizeBootstraps.R +++ b/tests/testthat/test-summarizeBootstraps.R @@ -57,8 +57,6 @@ test_that("summarizeBootstraps", { removeEmptyBoots(list(gr1, gr2)) - expect_message(summarizeBootstraps(list(gr1, gr1), gr1), "Summarizing bootstraps") - tester <- function(one, two, expected) { mcols(gr1) <- data.frame(pc = one) gr1$compartments <- ifelse(mcols(gr1)$pc > 0, "open", "closed")