diff --git a/DESCRIPTION b/DESCRIPTION index ec535185..8a33e12d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,12 +26,15 @@ Depends: HDF5Array Imports: BiocSingular, + BiocParallel, + futile.logger, rlang, S4Vectors, IRanges, GenomicRanges, GenomeInfoDb, parallel, + parallelly, methods, grid, ggplot2, diff --git a/NAMESPACE b/NAMESPACE index 777fa855..15f612b7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand export(agrestiCoullCI) +export(arrayCompartments) export(bootstrapCompartments) export(condenseRE) export(condenseSE) @@ -13,8 +14,6 @@ export(fisherZ) export(fixCompartments) export(flogit) export(getABSignal) -export(getATACABsignal) -export(getArrayABsignal) export(getAssayNames) export(getBinMatrix) export(getChrs) @@ -50,6 +49,10 @@ import(HDF5Array) import(Matrix) import(RaggedExperiment) import(SummarizedExperiment) +importFrom(BiocParallel,SerialParam) +importFrom(BiocParallel,bplapply) +importFrom(BiocParallel,bpnworkers) +importFrom(BiocParallel,bpparam) importFrom(BiocSingular,IrlbaParam) importFrom(BiocSingular,runSVD) importFrom(GenomeInfoDb,"seqlevelsStyle<-") @@ -60,6 +63,8 @@ importFrom(GenomeInfoDb,seqlengths) importFrom(GenomeInfoDb,seqlevels) importFrom(S4Vectors,queryHits) importFrom(S4Vectors,subjectHits) +importFrom(futile.logger,flog.debug) +importFrom(futile.logger,flog.info) importFrom(ggplot2,aes) importFrom(ggplot2,element_blank) importFrom(ggplot2,geom_raster) diff --git a/R/arrayCompartments.R b/R/arrayCompartments.R new file mode 100644 index 00000000..511d1432 --- /dev/null +++ b/R/arrayCompartments.R @@ -0,0 +1,95 @@ +#' @title Estimate A/B compartments from methylation array data +#' +#' @description +#' \code{arrayCompartments} returns estimated A/B compartments from methylation array data. +#' +#' @param obj Input SummarizedExperiment object +#' @param res Compartment resolution in bp +#' @param chr What chromosome to work on (leave as NULL to run on all chromosomes) +#' @param group Whether to treat this as a group set of samples +#' @param targets Samples/cells to shrink towards +#' @param bootstrap Whether we should perform bootstrapping of inferred compartments +#' @param num.bootstraps How many bootstraps to run +#' @param preprocess Whether to preprocess the arrays prior to compartment inference +#' @param array.type What type of array is this ("hm450", "EPIC") +#' @param genome What genome to work on ("hg19", "hg38", "mm9", "mm10") +#' @param other Another arbitrary genome to compute compartments on +#' @param boot.parallel Whether to run the bootstrapping in parallel. See details. +#' @param BPPARAM BiocParallelParam object to use for parallelization. See details. +#' +#' @inherit scCompartments details +#' +#' @return A RaggedExperiment of inferred compartments +#' @import SummarizedExperiment +#' @import RaggedExperiment +#' @importFrom parallel mclapply +#' @importFrom GenomeInfoDb keepSeqlevels +#' @importFrom methods as +#' @export +#' +#' @examples +#' +#' if (requireNamespace("minfi", quietly = TRUE)) { +#' data("array_data_chr14", package = "compartmap") +#' array_compartments <- arrayCompartments( +#' array.data.chr14, +#' chr="chr14", +#' parallel=FALSE, +#' bootstrap=FALSE, +#' genome="hg19", +#' array.type="hm450" +#' ) +#' } +arrayCompartments <- function( + obj, + res = 1e6, + chr = NULL, + group = FALSE, + targets = NULL, + bootstrap = TRUE, + num.bootstraps = 1000, + preprocess = TRUE, + array.type = c("hm450", "EPIC"), + genome = c("hg19", "hg38", "mm9", "mm10"), + other = NULL, + boot.parallel = TRUE, + BPPARAM = bpparam() +) { + verifySE(obj) + verifyCoords(obj) + verifyAssayNames(obj, assay = "array") + bpparams <- get_nested_params(BPPARAM) + check_worker_count(bpparams, group, length(chr), bootstrap) + + # preprocess the arrays + if (preprocess) { + obj <- preprocessArrays( + obj = obj, + genome = genome, + other = other, + array.type = array.type + ) + } + + # critical check if imputation was _not_ done + # to send things back to beta land + is.beta <- min(assays(obj)$Beta, na.rm = TRUE) > 0 + if (!is.beta) { + # send things back to beta land + assays(obj)$Beta <- fexpit(assays(obj)$Beta) + } + + getCompartments( + obj = obj, + assay = "array", + res = res, + chr = chr, + targets = targets, + bootstrap = bootstrap, + num.bootstraps = num.bootstraps, + genome = genome, + group = group, + boot.parallel = boot.parallel, + bpparams = bpparams + ) +} diff --git a/R/bootstrapCompartments.R b/R/bootstrapCompartments.R index 35e9e171..07f14a30 100644 --- a/R/bootstrapCompartments.R +++ b/R/bootstrapCompartments.R @@ -1,14 +1,14 @@ -#' Non-parametric bootstrapping of compartments and summarization of bootstraps/compute confidence intervals +#' Non-parametric bootstrapping of compartments and summarization of +#' bootstraps/compute confidence intervals #' #' @name bootstrapCompartments #' #' @param obj List object of computed compartments for a sample with 'pc' and 'gr' as elements #' @param original.obj The original, full input SummarizedExperiment of all samples/cells +#' @param BPPARAM BiocParallelParam for parallelizing bootstrapping #' @param bootstrap.samples How many bootstraps to run #' @param chr Which chromosome to operate on #' @param assay What sort of assay are we working on -#' @param parallel Whether to run the bootstrapping in parallel -#' @param cores How many cores to use for parallel processing #' @param targets Targets to shrink towards #' @param res The compartment resolution #' @param genome What genome are we working on @@ -18,7 +18,6 @@ #' @param bootstrap.means Pre-computed bootstrap means matrix #' #' @return Compartment estimates with summarized bootstraps and confidence intervals -#' @importFrom parallel mclapply #' @import SummarizedExperiment #' #' @examples @@ -29,17 +28,16 @@ bootstrapCompartments <- function( obj, original.obj, + BPPARAM, bootstrap.samples = 1000, chr = "chr14", + group = FALSE, assay = c("rna", "atac", "array"), - parallel = TRUE, - cores = 2, targets = NULL, res = 1e6, genome = c("hg19", "hg38", "mm9", "mm10"), q = 0.95, svd = NULL, - group = FALSE, bootstrap.means = NULL ) { # function for nonparametric bootstrap of compartments and compute 95% CIs @@ -61,36 +59,36 @@ bootstrapCompartments <- function( } # if (ncol(original.obj) < 6) stop("We need more than 5 samples to bootstrap with for the results to be meaningful.") - if (parallel) { - message("Bootstrapping in parallel with ", cores, " cores.") - } else { - message("Not bootstrapping in parallel will take a long time...") - } # bootstrap and recompute compartments - resamp.compartments <- mclapply(1:ncol(bmeans), function(b) { - # get the shrunken bins with new global mean - boot.mean <- as.matrix(bmeans[, b]) - colnames(boot.mean) <- "globalMean" - s.bins <- shrinkBins( - obj, - original.obj, - prior.means = boot.mean, - chr = chr, - res = res, - assay = assay, - genome = genome - ) - cor.bins <- getCorMatrix(s.bins, squeeze = !group) + BiocParallel::bpprogressbar(BPPARAM) <- FALSE + resamp.compartments <- bplapply( + 1:ncol(bmeans), + function(b) { + # get the shrunken bins with new global mean + boot.mean <- as.matrix(bmeans[, b]) + colnames(boot.mean) <- "globalMean" + s.bins <- shrinkBins( + obj, + original.obj, + prior.means = boot.mean, + chr = chr, + res = res, + assay = assay, + genome = genome + ) + cor.bins <- getCorMatrix(s.bins, squeeze = !group) - # Stupid check for perfect correlation with global mean - if (any(is.na(cor.bins$binmat.cor))) { - absig <- matrix(rep(NA, nrow(cor.bins$binmat.cor))) - } else { - absig <- getABSignal(cor.bins, assay = assay) - } - return(absig) - }, mc.cores = ifelse(parallel, cores, 1)) + # Stupid check for perfect correlation with global mean + if (any(is.na(cor.bins$binmat.cor))) { + absig <- matrix(rep(NA, nrow(cor.bins$binmat.cor))) + } else { + absig <- getABSignal(cor.bins, assay = assay) + } + return(absig) + }, + BPPARAM = BPPARAM + ) # summarize the bootstraps and compute confidence intervals resamp.compartments <- summarizeBootstraps(resamp.compartments, svd, q = q, assay = assay) diff --git a/R/fixCompartments.R b/R/fixCompartments.R index 8485a952..70c6b1dd 100644 --- a/R/fixCompartments.R +++ b/R/fixCompartments.R @@ -15,14 +15,16 @@ #' @examples fixCompartments <- function(obj, min.conf = 0.8, parallel = FALSE, cores = 1) { # this function will invert or "fix" compartments based on bootstrapping results - if (is(obj, "RaggedExperiment")) obj <- condenseSE(obj, sample.name = colnames(assay(obj))) + if (is(obj, "RaggedExperiment")) { + obj <- condenseSE(obj, sample.name = colnames(assay(obj))) + } # if we somehow only have 1 sample if (is(obj, "GRanges")) { return(flipper(obj, min.conf)) } - message("Fixing compartments using a minimum confidence score of ", min.conf * 100, "%") + flog.info("Fixing compartments using a minimum confidence score of %d%%", min.conf * 100) # go through and invert compartments based on the min.conf flip_compartments_lst <- mclapply(obj, flipper, min.conf, mc.cores = ifelse(parallel, cores, 1)) names(flip_compartments_lst) <- names(obj) @@ -41,8 +43,8 @@ flipper <- function(input_obj, min.conf) { stop("Bootstrapping was not performed. Cannot fix compartments.") } - message("Assuming we only have a single sample to process.") - message("Fixing compartments using a minimum confidence score of ", min.conf * 100, "%") + flog.debug("Assuming we only have a single sample to process.") + flog.debug("Fixing compartments using a minimum confidence score of %f %%", min.conf * 100) invert_compartments <- apply(mcols(input_obj), 1, .inverter, min.conf) mcols(input_obj)$flip.compartment <- invert_compartments diff --git a/R/getABSignal.R b/R/getABSignal.R index e36a6901..a68ca690 100644 --- a/R/getABSignal.R +++ b/R/getABSignal.R @@ -52,13 +52,13 @@ getABSignal <- function(x, squeeze = FALSE, assay = c("rna", "atac", "array")) { assay <- match.arg(assay) gr <- x$gr - message("Calculating eigenvectors.") + flog.debug("Calculating eigenvectors.") pc <- getSVD(x$binmat.cor, sing.vec = "right") if (squeeze) pc <- ifisherZ(pc) - message("Smoothing eigenvector.") + flog.debug("Smoothing eigenvector.") gr$pc <- meanSmoother(pc) - message("Done smoothing.") + flog.debug("Done smoothing.") gr$compartments <- extractOpenClosed(gr, assay = assay) return(gr) diff --git a/R/getATACABsignal.R b/R/getATACABsignal.R deleted file mode 100644 index ec6c19e1..00000000 --- a/R/getATACABsignal.R +++ /dev/null @@ -1,224 +0,0 @@ -#' @title Estimate A/B compartments from ATAC-seq data -#' -#' @description -#' \code{getATACABsignal} returns estimated A/B compartments from ATAC-seq data. -#' -#' @param obj Input SummarizedExperiment object -#' @param res Compartment resolution in bp -#' @param parallel Whether to run samples in parallel -#' @param chr What chromosome to work on (leave as NULL to run on all chromosomes) -#' @param targets Samples/cells to shrink towards -#' @param cores How many cores to use when running samples in parallel -#' @param bootstrap Whether we should perform bootstrapping of inferred compartments -#' @param num.bootstraps How many bootstraps to run -#' @param genome What genome to work on ("hg19", "hg38", "mm9", "mm10") -#' @param other Another arbitrary genome to compute compartments on -#' @param group Whether to treat this as a group set of samples -#' @param boot.parallel Whether to run the bootstrapping in parallel -#' @param boot.cores How many cores to use for the bootstrapping -#' -#' @return A RaggedExperiment of inferred compartments -#' @import SummarizedExperiment -#' @import RaggedExperiment -#' @importFrom parallel mclapply -#' @importFrom methods as -#' @export -#' -#' @aliases getRNAABsignal -#' -#' @examples -#' if (requireNamespace("csaw", quietly = TRUE)) { -#' data("k562_scatac_chr14", package = "compartmap") -#' atac_compartments <- getATACABsignal( -#' k562_scatac_chr14, -#' parallel = FALSE, -#' chr = "chr14", -#' bootstrap = FALSE, -#' genome = "hg19", -#' group = TRUE -#' ) -#' } -getATACABsignal <- function( - obj, - res = 1e6, - parallel = FALSE, - chr = NULL, - targets = NULL, - cores = 2, - bootstrap = TRUE, - num.bootstraps = 100, - genome = c("hg19", "hg38", "mm9", "mm10"), - other = NULL, - group = FALSE, - boot.parallel = FALSE, - boot.cores = 2 -) { - chr <- chr %||% - { - message("Processing all chromosomes") - getChrs(obj) - } - - if (is.null(colnames(obj))) stop("colnames needs to be sample names.") - columns <- colnames(obj) - names(columns) <- columns - - prior.means <- getGlobalMeans(obj = obj, targets = targets, assay = "atac") - - if (bootstrap) { - message("Pre-computing the bootstrap global means.") - bmeans <- precomputeBootstrapMeans( - obj = obj, - targets = targets, - num.bootstraps = num.bootstraps, - assay = "atac", - parallel = parallel, - num.cores = cores - ) - } - - if (group) { - atac.compartments.list <- mclapply(chr, function(c) { - atacCompartments( - obj, - obj, - res = res, - chr = c, - targets = targets, - genome = genome, - bootstrap = bootstrap, - num.bootstraps = num.bootstraps, - prior.means = prior.means, - parallel = boot.parallel, - cores = boot.cores, - group = group, - bootstrap.means = bmeans - ) - }, mc.cores = ifelse(parallel, cores, 1)) - - atac.compartments <- sort(unlist(as(atac.compartments.list, "GRangesList"))) - return(atac.compartments) - } - - atac.compartments <- mclapply(columns, function(s) { - obj.sub <- obj[, s] - - message("Working on ", s) - atac.compartments.list <- lapply(chr, function(c) { - atacCompartments( - obj.sub, - obj, - res = res, - chr = c, - targets = targets, - genome = genome, - bootstrap = bootstrap, - prior.means = prior.means, - num.bootstraps = num.bootstraps, - parallel = boot.parallel, - cores = boot.cores, - group = group, - bootstrap.means = bmeans - ) - }) - - sort(unlist(as(atac.compartments.list, "GRangesList"))) - }, mc.cores = ifelse(parallel, cores, 1)) - - atac.compartments <- as(atac.compartments, "CompressedGRangesList") - RaggedExperiment(atac.compartments, colData = colData(obj)) -} - -# worker function -# this is the main analysis function for computing compartments from atacs -atacCompartments <- function( - obj, - original.obj, - res = 1e6, - chr = NULL, - targets = NULL, - genome = c("hg19", "hg38", "mm9", "mm10"), - prior.means = NULL, - bootstrap = TRUE, - num.bootstraps = 1000, - parallel = FALSE, - cores = 2, - group = FALSE, - bootstrap.means = NULL -) { - genome <- match.arg(genome) - - if (parallel) options(mc.cores = cores) - - # update - message("Computing compartments for ", chr) - obj <- keepSeqlevels(obj, chr, pruning.mode = "coarse") - original.obj <- keepSeqlevels(original.obj, chr, pruning.mode = "coarse") - - # take care of the global means - if (!is.null(prior.means)) { - # this assumes that we've alread computed the global means - pmeans <- as(prior.means, "GRanges") - pmeans <- keepSeqlevels(pmeans, chr, pruning.mode = "coarse") - # go back to a matrix - prior.means <- as(pmeans, "matrix") - colnames(prior.means) <- "globalMean" - } - - obj.bins <- shrinkBins( - obj, - original.obj, - prior.means = prior.means, - chr = chr, - res = res, - targets = targets, - assay = "atac", - genome = genome, - jse = TRUE - ) - - obj.cor <- getCorMatrix(obj.bins, squeeze = !group) - - if (any(is.na(obj.cor$binmat.cor))) { - obj.cor$gr$pc <- matrix(rep(NA, nrow(obj.cor$binmat.cor))) - obj.svd <- obj.cor$gr - } else { - # compute SVD of correlation matrix - obj.svd <- getABSignal(obj.cor, assay = "atac") - } - - if (!bootstrap) { - return(obj.svd) - } - - # bootstrap the estimates - # always compute confidence intervals too - # take care of the global means - # this assumes that we've alread computed the global means - bmeans <- as(bootstrap.means, "GRanges") - bmeans <- keepSeqlevels(bmeans, chr, pruning.mode = "coarse") - # go back to a matrix - bmeans <- as(bmeans, "matrix") - colnames(bmeans) <- rep("globalMean", ncol(bmeans)) - - bootstrapCompartments( - obj, - original.obj, - bootstrap.samples = num.bootstraps, - chr = chr, - assay = "atac", - parallel = parallel, - cores = cores, - targets = targets, - res = res, - genome = genome, - q = 0.95, - svd = obj.svd, - group = group, - bootstrap.means = bmeans - ) -} - -#' @describeIn getATACABsignal Alias for getATACABsignal -#' -getRNAABsignal <- getATACABsignal diff --git a/R/getArrayABsignal.R b/R/getArrayABsignal.R deleted file mode 100644 index a3eb8670..00000000 --- a/R/getArrayABsignal.R +++ /dev/null @@ -1,298 +0,0 @@ -#' @title Estimate A/B compartments from methylation array data -#' -#' @description -#' \code{getArrayABsignal} returns estimated A/B compartments from methylation array data. -#' -#' @param obj Input SummarizedExperiment object -#' @param res Compartment resolution in bp -#' @param parallel Whether to run samples in parallel -#' @param chr What chromosome to work on (leave as NULL to run on all chromosomes) -#' @param targets Samples/cells to shrink towards -#' @param preprocess Whether to preprocess the arrays prior to compartment inference -#' @param cores How many cores to use when running samples in parallel -#' @param bootstrap Whether we should perform bootstrapping of inferred compartments -#' @param num.bootstraps How many bootstraps to run -#' @param genome What genome to work on ("hg19", "hg38", "mm9", "mm10") -#' @param other Another arbitrary genome to compute compartments on -#' @param array.type What type of array is this ("hm450", "EPIC") -#' @param group Whether to treat this as a group set of samples -#' @param boot.parallel Whether to run the bootstrapping in parallel -#' @param boot.cores How many cores to use for the bootstrapping -#' -#' @return A RaggedExperiment of inferred compartments -#' @import SummarizedExperiment -#' @import RaggedExperiment -#' @importFrom parallel mclapply -#' @importFrom GenomeInfoDb keepSeqlevels -#' @importFrom methods as -#' @export -#' -#' @examples -#' -#' if (requireNamespace("minfi", quietly = TRUE)) { -#' data("array_data_chr14", package = "compartmap") -#' array_compartments <- getArrayABsignal( -#' array.data.chr14, -#' parallel=FALSE, -#' chr="chr14", -#' bootstrap=FALSE, -#' genome="hg19", -#' array.type="hm450" -#' ) -#' } -getArrayABsignal <- function( - obj, - res = 1e6, - parallel = TRUE, - chr = NULL, - targets = NULL, - preprocess = TRUE, - cores = 2, - bootstrap = TRUE, - num.bootstraps = 1000, - genome = c("hg19", "hg38", "mm9", "mm10"), - other = NULL, - array.type = c("hm450", "EPIC"), - group = FALSE, - boot.parallel = TRUE, - boot.cores = 2 -) { - verifySE(obj) - verifyCoords(obj) - verifyAssayNames(obj, assay = "array") - - # preprocess the arrays - if (preprocess) { - obj <- preprocessArrays( - obj = obj, - genome = genome, - other = other, - array.type = array.type - ) - } - - # critical check if imputation was _not_ done - # to send things back to beta land - is.beta <- min(assays(obj)$Beta, na.rm = TRUE) > 0 - if (!is.beta) { - # send things back to beta land - assays(obj)$Beta <- fexpit(assays(obj)$Beta) - } - - # gather the chromosomes we are working on - if (is.null(chr)) { - message("Assuming we want to process all chromosomes.") - # get what chromosomes we want - chr <- getChrs(obj) - } - - # get the column names - if (is.null(colnames(obj))) stop("colnames needs to be sample names.") - columns <- colnames(obj) - names(columns) <- columns - - # precompute global means - prior.means <- getGlobalMeans(obj = obj, targets = targets, assay = "array") - - if (bootstrap) { - message("Pre-computing the bootstrap global means.") - bmeans <- precomputeBootstrapMeans( - obj = obj, - targets = targets, - num.bootstraps = num.bootstraps, - assay = "array", - parallel = parallel, - num.cores = cores - ) - } - - if (group) { - array.compartments.list <- mclapply(chr, function(c) { - .arrayCompartments( - obj, obj, - res = res, - chr = c, - targets = targets, - genome = genome, - bootstrap = bootstrap, - num.bootstraps = num.bootstraps, - prior.means = prior.means, - parallel = boot.parallel, - cores = boot.cores, - group = group, - bootstrap.means = bmeans - ) - }, mc.cores = cores) - array.compartments <- sort(unlist(as(array.compartments.list, "GRangesList"))) - return(array.compartments) - } - - array.compartments <- mclapply(columns, function(s) { - obj.sub <- obj[, s] - message("Working on ", s) - array.compartments.list <- lapply(chr, function(c) { - .arrayCompartments( - obj.sub, obj, - res = res, - chr = c, - targets = targets, - genome = genome, - bootstrap = bootstrap, - prior.means = prior.means, - num.bootstraps = num.bootstraps, - parallel = boot.parallel, - cores = boot.cores, - group = group, - bootstrap.means = bmeans - ) - }) - sort(unlist(as(array.compartments.list, "GRangesList"))) - }, mc.cores = ifelse(parallel, cores, 1), mc.preschedule = F) - - array.compartments <- as(array.compartments, "CompressedGRangesList") - RaggedExperiment(array.compartments, colData = colData(obj)) -} - -#' Preprocess arrays for compartment inference -#' -#' @name preprocessArrays -#' -#' @param obj Input SummarizedExperiment -#' @param genome What genome are we working on ("hg19", "hg38", "mm9", "mm10") -#' @param other Another arbitrary genome to compute compartments on -#' @param array.type What type of array is this ("hm450", "EPIC") -#' -#' @return A preprocessed SummarizedExperiment to compute compartments -#' @import SummarizedExperiment -#' -#' @examples -#' if (requireNamespace("minfiData", quietly = TRUE)) { -#' grSet <- minfi::preprocessNoob(minfiData::RGsetEx.sub) |> -#' minfi::ratioConvert() |> -#' minfi::mapToGenome() -#' preprocessArrays(grSet) -#' } -#' -#' @export -preprocessArrays <- function( - obj, - genome = c("hg19", "hg38", "mm9", "mm10"), - other = NULL, - array.type = c("hm450", "EPIC") -) { - if (!requireNamespace("minfi", quietly = TRUE)) { - stop("The minfi package must be installed for this functionality") - } - - # what genome do we have - genome <- match.arg(genome) - - # subset the array to open sea CpGs - obj.opensea <- filterOpenSea(obj, genome = genome, other = other) - verifyAssayNames(obj.opensea, assay = "array") - - # convert to M-values if beta values given - # this should be default but allows handling if given M-values in Beta slot - is.beta <- min(assays(obj)$Beta, na.rm = TRUE) > 0 - if (is.beta) { - message("Converting to squeezed M-values.") - assays(obj.opensea)$Beta <- flogit(assays(obj.opensea)$Beta) - } - - # impute missing values if possible - if (any(is.na(minfi::getBeta(obj.opensea)))) { - message("Imputing missing values.") - obj.opensea <- imputeKNN(obj.opensea, assay = "array") - } - - obj.opensea -} - - -# this is the main analysis function for computing compartments from arrays -.arrayCompartments <- function( - obj, - original.obj, - res = 1e6, - chr = NULL, - targets = NULL, - genome = c("hg19", "hg38", "mm9", "mm10"), - prior.means = NULL, - bootstrap = TRUE, - num.bootstraps = 1000, - parallel = FALSE, - cores = 2, - group = FALSE, - bootstrap.means = NULL -) { - genome <- match.arg(genome) - if (parallel) options(mc.cores = cores) - - # update - message("Computing compartments for ", chr) - obj <- keepSeqlevels(obj, chr, pruning.mode = "coarse") - original.obj <- keepSeqlevels(original.obj, chr, pruning.mode = "coarse") - - # take care of the global means - if (!is.null(prior.means)) { - # this assumes that we've alread computed the global means - pmeans <- as(prior.means, "GRanges") - pmeans <- keepSeqlevels(pmeans, chr, pruning.mode = "coarse") - # go back to a matrix - prior.means <- as(pmeans, "matrix") - colnames(prior.means) <- "globalMean" - } - - obj.bins <- shrinkBins( - obj, - original.obj, - prior.means = prior.means, - chr = chr, - res = res, - targets = targets, - assay = "array", - genome = genome, - jse = TRUE - ) - - obj.cor <- getCorMatrix(obj.bins, squeeze = !group) - - if (any(is.na(obj.cor$binmat.cor))) { - obj.cor$gr$pc <- matrix(rep(NA, nrow(obj.cor$binmat.cor))) - obj.svd <- obj.cor$gr - } else { - # compute SVD of correlation matrix - obj.svd <- getABSignal(obj.cor, assay = "array") - } - - if (!bootstrap) { - return(obj.svd) - } - - # bootstrap the estimates - # always compute confidence intervals too - # take care of the global means - # this assumes that we've alread computed the global means - bmeans <- as(bootstrap.means, "GRanges") - bmeans <- keepSeqlevels(bmeans, chr, pruning.mode = "coarse") - # go back to a matrix - bmeans <- as(bmeans, "matrix") - colnames(bmeans) <- rep("globalMean", ncol(bmeans)) - - bootstrapCompartments( - obj, - original.obj, - bootstrap.samples = num.bootstraps, - chr = chr, - assay = "array", - parallel = parallel, - cores = cores, - targets = targets, - res = res, - genome = genome, - q = 0.95, - svd = obj.svd, - group = group, - bootstrap.means = bmeans - ) -} diff --git a/R/getBinMatrix.R b/R/getBinMatrix.R index 5899c6c0..e16be4e0 100644 --- a/R/getBinMatrix.R +++ b/R/getBinMatrix.R @@ -68,7 +68,7 @@ getBinMatrix <- function( ids <- findOverlaps(genloc, gr.bin, select = "first") binCount <- length(gr.bin) - message(binCount, " bins created...") + flog.debug("%d bins created...", binCount) mat.bin <- apply(mat, 2, function(x) { .summarizeBins(x, binCount, ids, FUN) diff --git a/R/getCompartments.R b/R/getCompartments.R new file mode 100644 index 00000000..25a4e53f --- /dev/null +++ b/R/getCompartments.R @@ -0,0 +1,192 @@ +#' Run compartment inference +#' @importFrom BiocParallel bplapply +#' @importFrom futile.logger flog.info flog.debug +getCompartments <- function( + obj, + res, + chr, + group, + targets, + bootstrap, + num.bootstraps, + genome, + assay, + boot.parallel, + bpparams +) { + if (is.null(chr)) { + flog.info("Assuming we want to process all chromosomes.") + chr <- getChrs(obj) + } + + if (is.null(colnames(obj))) stop("colnames needs to be sample names.") + columns <- colnames(obj) + names(columns) <- columns + + prior.means <- getGlobalMeans(obj = obj, targets = targets, assay = assay) + + if (bootstrap) { + flog.info("Pre-computing the bootstrap global means.") + bmeans <- precomputeBootstrapMeans( + obj = obj, + BPPARAM = bpparams[[1]], + targets = targets, + num.bootstraps = num.bootstraps, + assay = assay + ) + } + + boot_msg <- "" + if (bootstrap & boot.parallel) { + boot_msg <- sprintf("Bootstrapping in parallel with %d cores", bpnworkers(bpparams[[2]])) + } else if (bootstrap & !boot.parallel & group) { + boot_msg <- "Not bootstrapping in parallel could take a long time..." + } + + if (group) { + flog.info("Computing group level compartments") + flog.info(boot_msg) + compartments.list <- bplapply( + chr, + function(c) { + .getCompartments( + obj, + obj, + assay = assay, + BPPARAM = bpparams[[2]], + res = res, + chr = c, + group = group, + targets = targets, + genome = genome, + prior.means = prior.means, + bootstrap = bootstrap, + num.bootstraps = num.bootstraps, + bootstrap.means = bmeans + ) + }, + BPPARAM = bpparams[[1]] + ) + + compartments <- sort(unlist(as(compartments.list, "GRangesList"))) + return(compartments) + } + + flog.info("Computing single-cell level compartments") + flog.info(boot_msg) + compartments <- bplapply( + columns, + function(s) { + obj.sub <- obj[, s] + + compartments.list <- lapply(chr, function(c) { + .getCompartments( + obj.sub, + obj, + assay = assay, + BPPARAM = bpparams[[2]], + res = res, + chr = c, + group = group, + targets = targets, + genome = genome, + prior.means = prior.means, + bootstrap = bootstrap, + num.bootstraps = num.bootstraps, + bootstrap.means = bmeans + ) + }) + sort(unlist(as(compartments.list, "GRangesList"))) + }, + BPPARAM = bpparams[[1]] + ) + + compartments <- as(compartments, "CompressedGRangesList") + RaggedExperiment(compartments, colData = colData(obj)) +} + +# this is the main analysis function for computing compartments +.getCompartments <- function( + obj, + original.obj, + assay, + BPPARAM, + res = 1e6, + chr = NULL, + group = FALSE, + targets = NULL, + genome = c("hg19", "hg38", "mm9", "mm10"), + prior.means = NULL, + bootstrap = TRUE, + num.bootstraps = 1000, + bootstrap.means = NULL +) { + genome <- match.arg(genome) + + # update + flog.debug("Computing compartments for %s", chr) + obj <- keepSeqlevels(obj, chr, pruning.mode = "coarse") + original.obj <- keepSeqlevels(original.obj, chr, pruning.mode = "coarse") + + # take care of the global means + if (!is.null(prior.means)) { + # this assumes that we've alread computed the global means + pmeans <- as(prior.means, "GRanges") + pmeans <- keepSeqlevels(pmeans, chr, pruning.mode = "coarse") + # go back to a matrix + prior.means <- as(pmeans, "matrix") + colnames(prior.means) <- "globalMean" + } + + obj.bins <- shrinkBins( + obj, + original.obj, + prior.means = prior.means, + chr = chr, + res = res, + targets = targets, + assay = assay, + genome = genome, + jse = TRUE + ) + + obj.cor <- getCorMatrix(obj.bins, squeeze = !group) + + if (any(is.na(obj.cor$binmat.cor))) { + obj.cor$gr$pc <- matrix(rep(NA, nrow(obj.cor$binmat.cor))) + obj.svd <- obj.cor$gr + } else { + # compute SVD of correlation matrix + obj.svd <- getABSignal(obj.cor, assay = assay) + } + + if (!bootstrap) { + return(obj.svd) + } + + # bootstrap the estimates + # always compute confidence intervals too + # take care of the global means + # this assumes that we've alread computed the global means + bmeans <- as(bootstrap.means, "GRanges") + bmeans <- keepSeqlevels(bmeans, chr, pruning.mode = "coarse") + # go back to a matrix + bmeans <- as(bmeans, "matrix") + colnames(bmeans) <- rep("globalMean", ncol(bmeans)) + + bootstrapCompartments( + obj, + original.obj, + BPPARAM = BPPARAM, + bootstrap.samples = num.bootstraps, + chr = chr, + group = group, + assay = assay, + targets = targets, + res = res, + genome = genome, + q = 0.95, + svd = obj.svd, + bootstrap.means = bmeans + ) +} diff --git a/R/getCorMatrix.R b/R/getCorMatrix.R index 914b6495..da2e68fb 100644 --- a/R/getCorMatrix.R +++ b/R/getCorMatrix.R @@ -43,7 +43,7 @@ #' # Calculate correlations #' getCorMatrix(bin.counts) getCorMatrix <- function(binmat, squeeze = FALSE) { - message("Calculating correlations...") + flog.debug("Calculating correlations...") # bind back up the global means and shrunken bins binmat$x <- cbind(binmat$x, binmat$gmeans) binmat.cor <- suppressWarnings(cor(t(binmat$x))) @@ -51,6 +51,6 @@ getCorMatrix <- function(binmat, squeeze = FALSE) { if (squeeze) { binmat.cor <- fisherZ(binmat.cor) } - message("Done...") + flog.debug("Done...") list(gr.cor = binmat$gr, binmat.cor = binmat.cor) } diff --git a/R/getGlobalMeans.R b/R/getGlobalMeans.R index 09f1ae87..cf4ab83b 100644 --- a/R/getGlobalMeans.R +++ b/R/getGlobalMeans.R @@ -30,7 +30,7 @@ getGlobalMeans <- function(obj, targets = NULL, assay = c("atac", "rna", "array" # check if shrinkage targets are being used if (!is.null(targets)) { stargets <- getShrinkageTargets(obj, targets) - message("Using ", paste(shQuote(targets), collapse = ", "), " as shrinkage targets...") + flog.debug("Using %s as shrinkage targets", paste(shQuote(targets), collapse = ", ")) globalMean.input <- stargets } else { globalMean.input <- obj @@ -49,11 +49,10 @@ getGlobalMeans <- function(obj, targets = NULL, assay = c("atac", "rna", "array" #' @name precomputeBootstrapMeans #' #' @param obj Input SummarizedExperiment object +#' @param BPPARAM BiocParallelParam for parallelizing computation #' @param targets Optional targets to shrink towards #' @param num.bootstraps The number of bootstraps to compute #' @param assay What type of assay the data are from -#' @param parallel Whether to run in parallel -#' @param num.cores How many cores to use for parallel processing #' #' @return A matrix of bootstrapped global means #' @@ -69,11 +68,10 @@ getGlobalMeans <- function(obj, targets = NULL, assay = c("atac", "rna", "array" #' ) precomputeBootstrapMeans <- function( obj, + BPPARAM, targets = NULL, num.bootstraps = 100, - assay = c("atac", "rna", "array"), - parallel = FALSE, - num.cores = 1 + assay = c("atac", "rna", "array") ) { # this function precomputes the bootstrapped global means # as a default we will make 100 bootstraps @@ -85,11 +83,15 @@ precomputeBootstrapMeans <- function( obj <- getShrinkageTargets(obj, targets) } assay.data <- .getAssay(obj, is.array) - bootMean <- mclapply(1:num.bootstraps, function(b) { - message("Working on bootstrap ", b) - resamp.mat <- .resampleMatrix(assay.data) - computeGlobalMean(resamp.mat) - }, mc.cores = ifelse(parallel, num.cores, 1)) + bootMean <- bplapply( + 1:num.bootstraps, + function(b) { + flog.debug("Working on bootstrap ", b) + resamp.mat <- .resampleMatrix(assay.data) + computeGlobalMean(resamp.mat) + }, + BPPARAM = BPPARAM + ) bootResult <- do.call("cbind", bootMean) rownames(bootResult) <- as.character(granges(obj)) diff --git a/R/meanSmoother.R b/R/meanSmoother.R index 09be0d7d..f645d2c5 100644 --- a/R/meanSmoother.R +++ b/R/meanSmoother.R @@ -22,7 +22,7 @@ #' @export meanSmoother <- function(mat, k = 1, iters = 2, delta = 0, weights = NULL) { if (k == 0) { - message("Returning unsmoothed mat as 'k' = 0") + flog.debug("Returning unsmoothed mat as 'k' = 0") return(mat) } diff --git a/R/parallel.R b/R/parallel.R new file mode 100644 index 00000000..93e5faa7 --- /dev/null +++ b/R/parallel.R @@ -0,0 +1,123 @@ +#' Check and tell users whether input BPPARAM are optimal given group/sc +#' inference and bootstrapping +#' @keywords internal +check_optim <- function(workers, group, chr_count, bootstrap) { + if (group & chr_count < workers[1]) { + flog.info( + "Grouped inference with more outer workers than chromosomes leaves %d of %d workers unused", + workers[1] - chr_count, + workers[1] + ) + if (bootstrap) { + flog.info("Consider using a single core for the outer worker and more cores for the inner bootstrap worker") + } + } + if (!group & bootstrap & workers[1] < workers[2]) { + flog.info("More outer (column-wise) than inner (bootstrap) workers is faster for single-cell inference") + } +} + +#' Check that the number of requested workers is valid +#' @keywords internal +check_worker_count <- function(bpparam, group, chr_count, bootstrap, avail_workers = parallelly::availableCores()) { + workers <- get_bpnworkers(bpparam) + total <- required_workers(workers) + if (verify_workers(total)) { + check_optim(workers, group, chr_count, bootstrap) + return(TRUE) + } + + avail_msg <- sprintf("but your system has only %d cores", avail_workers) + info_msg <- "See parallelly::availableCores(which = 'all') for more information on available resources" + if (workers[1] == 1 | workers[2] == 1) { + msg <- sprintf( + "Requested %d %s workers %s\n%s", + max(workers), + ifelse(workers[1] == 1, "inner", "outer"), + avail_msg, + info_msg + ) + } else { + msg <- sprintf( + "Requested %1$d outer and %2$d inner workers that require %3$d total workers (%1$d + (%1$d x %2$d)) %4$s\n%5$s", + workers[1], + workers[2], + total, + avail_msg, + info_msg + ) + } + stop(msg) +} + +#' Get the total number of BiocParallelParam workers that will get used +#' From a BiocParallelParam or list of 2 BiocParallelParam objects +#' @keywords internal +get_bpnworkers <- function(bp) { + workers <- bpnworkers.list(bp) +} + +#' Return the number of workers in a list of BiocParallelParam objects +#' @param List of BiocParallelParam objects +#' @importFrom BiocParallel bpnworkers +#' @return A vector of the `bpnworkers` count in each list element +#' @keywords internal +bpnworkers.list <- function(bplist) { + unlist(Map(bpnworkers, bplist)) +} + +required_workers <- function(workers) { + if (workers[1] == 1) { + return(workers[2]) + } + if (workers[2] == 1) { + return(workers[1]) + } + sum(Reduce(`*`, workers), workers[1]) +} + +#' Verify that the input BiocParallelParam is valid +#' @param A BiocParallelParam or list of 2 BiocParallelParam objects +#' @importFrom BiocParallel bpnworkers +#' @return TRUE if the total `bpnworkers` in the input does not exceed +#' available resources as defined by `parallelly::availableCores()` +#' @keywords internal +verify_bp <- function(bp) { + verify_workers(get_bpnworkers(bp)) +} + +#' Verify that requested thread count is not higher than available +#' @param thread_count The number of workers to check availability +#' @return TRUE if the requested `thread_count` does not exceed available +#' resources as defined by `parallelly::availableCores()` +#' @keywords internal +verify_workers <- function(n_workers) { + avail_workers <- parallelly::availableCores() + n_workers <= avail_workers +} + +#' Set outer and inner params for nester parallelization +#' The outer param is across the input samples/columns and the second is for +#' bootstrapping. If `boot.parallel` is FALSE, the inner param is set to +#' `SerialParam`. +#' @importFrom BiocParallel SerialParam +#' @keywords internal +get_nested_params <- function(BPPARAM, boot.parallel) { + stopifnot("Only two BiocParallelParam objects can be used" = length(BPPARAM) <= 2) + single_param <- length(BPPARAM) == 1 + BPPARAM <- if (single_param & is.list(BPPARAM)) BPPARAM[[1]] else BPPARAM + + if (boot.parallel) { + if (single_param) { + return(list(outer = BPPARAM, inner = BPPARAM)) + } else { + return(list(outer = BPPARAM[[1]], inner = BPPARAM[[2]])) + } + } + + if (single_param) { + list(outer = BPPARAM, inner = SerialParam()) + } else { + list(outer = BPPARAM[[1]], inner = SerialParam()) + } +} diff --git a/R/plotAB.R b/R/plotAB.R index b8d1566d..d1040a6e 100644 --- a/R/plotAB.R +++ b/R/plotAB.R @@ -2,7 +2,7 @@ #' #' Plot A/B compartments bins #' -#' @param grAB The GRanges object returned from scCompartments and getArrayABsignal +#' @param grAB The GRanges object returned from scCompartments and arrayCompartments #' @param chr Chromosome to subset to for plotting #' @param main Title for the plot #' @param ylim Y-axis limits (default is -1 to 1) @@ -113,7 +113,7 @@ plotAB <- function( NAs <- is.na(x) x[!NAs] <- x[!NAs] / sqrt(sum(x[!NAs]^2)) na.count <- sum(NAs) - if (na.count > 0) message(sprintf("[.unitarize] %i missing values were ignored.\n", na.count)) + if (na.count > 0) flog.debug("[.unitarize] %i missing values were ignored.\n", na.count) x } diff --git a/R/preprocessArrays.R b/R/preprocessArrays.R new file mode 100644 index 00000000..1d3694c9 --- /dev/null +++ b/R/preprocessArrays.R @@ -0,0 +1,54 @@ +#' Preprocess arrays for compartment inference +#' +#' @name preprocessArrays +#' +#' @param obj Input SummarizedExperiment +#' @param genome What genome are we working on ("hg19", "hg38", "mm9", "mm10") +#' @param other Another arbitrary genome to compute compartments on +#' @param array.type What type of array is this ("hm450", "EPIC") +#' +#' @return A preprocessed SummarizedExperiment to compute compartments +#' @import SummarizedExperiment +#' +#' @examples +#' if (requireNamespace("minfiData", quietly = TRUE)) { +#' grSet <- minfi::preprocessNoob(minfiData::RGsetEx.sub) |> +#' minfi::ratioConvert() |> +#' minfi::mapToGenome() +#' preprocessArrays(grSet) +#' } +#' +#' @export +preprocessArrays <- function( + obj, + genome = c("hg19", "hg38", "mm9", "mm10"), + other = NULL, + array.type = c("hm450", "EPIC") +) { + if (!requireNamespace("minfi", quietly = TRUE)) { + stop("The minfi package must be installed for this functionality") + } + + # what genome do we have + genome <- match.arg(genome) + + # subset the array to open sea CpGs + obj.opensea <- filterOpenSea(obj, genome = genome, other = other) + verifyAssayNames(obj.opensea, assay = "array") + + # convert to M-values if beta values given + # this should be default but allows handling if given M-values in Beta slot + is.beta <- min(assays(obj)$Beta, na.rm = TRUE) > 0 + if (is.beta) { + flog.debug("Converting to squeezed M-values.") + assays(obj.opensea)$Beta <- flogit(assays(obj.opensea)$Beta) + } + + # impute missing values if possible + if (any(is.na(minfi::getBeta(obj.opensea)))) { + flog.debug("Imputing missing values.") + obj.opensea <- imputeKNN(obj.opensea, assay = "array") + } + + obj.opensea +} diff --git a/R/scCompartments.R b/R/scCompartments.R index 050b24bb..7a839d6f 100644 --- a/R/scCompartments.R +++ b/R/scCompartments.R @@ -1,30 +1,99 @@ -#' @title Estimate A/B compartments from single-cell sequencing data +#' @title Estimate A/B compartments from single-cell RNA or ATAC sequencing data #' #' @description #' \code{scCompartments} returns estimated A/B compartments from sc-seq data. #' #' @param obj Input SummarizedExperiment object #' @param res Compartment resolution in bp -#' @param parallel Whether to run samples in parallel #' @param chr What chromosome to work on (leave as NULL to run on all chromosomes) +#' @param group Whether to treat this as a group set of samples #' @param targets Samples/cells to shrink towards -#' @param cores How many cores to use when running samples in parallel #' @param bootstrap Whether we should perform bootstrapping of inferred compartments #' @param num.bootstraps How many bootstraps to run #' @param genome What genome to work on ("hg19", "hg38", "mm9", "mm10") -#' @param group Whether to treat this as a group set of samples #' @param assay What type of single-cell assay is the input data ("atac" or "rna") +#' @param boot.parallel Whether to run the bootstrapping in parallel. See details. +#' @param BPPARAM BiocParallelParam object to use for parallelization. See details. +#' +#' @details +#' +#' compartmap uses `BiocParallel` to parallelize operations in four +#' configurations. The default setting is to parallelize across columns but not +#' bootstraps using the thread count as reported by `BiocParallel::bpparam()`, +#' which is usually two cores fewer than the number of available cores. +#' Parallel bootstrapping is disabled by default to avoid nested parallelism +#' issues but can be done independent of column-wise parallelization. +#' + +#' ## Available configurations +#' +#' ### Serial bootstrapping +#' +#' - Serially with just one core: +#' `BPPARAM = BiocParallel::SerialParam()` +#' +#' - Parallel across columns and serially across bootstraps: +#' `BPPARAM = BiocParallel::MulticoreParam(n)` where `n` is the number of +#' threads to use +#' +#' See `?BiocParallel::BiocParallelParam` for other parallel backends. Parallel +#' backends may also be passed to `BiocParallel::register()` to make them +#' available to `bpparam()`. + +#' ### Parallel bootstrapping +#' +#' Set `boot.parallel = TRUE` for one the these configurations: +#' +#' - Serially across columns and parallel across bootstraps: Set `BPPARAM = +#' list(SerialParam(), MulticoreParam(n))' +#' +#' - Parallel across both columns and bootstraps: Set `BPPARAM = +#' list(MulticoreParam(outer), MulticoreParam(inner))` where `outer` is the +#' thread count for column-wise operations and `inner` the thread count for +#' bootstrapping. The required number of threads is given by +#' +#' `( outer * inner ) + outer` +#' +#' which is more easily calculated as `outer * (inner + 1)`. +#' +#' We recommend using an explicit list of two BiocParallelParam backends over +#' relying on `register()` and `bpparam()` for parallelizing across bootstraps. +#' With nested `bplapply` calls, the registered backend is used for both the +#' outer and inner parallel loops. On a system with 8 available threads if the +#' registered backend asks for 4 workers, it will try to use 20 threads in the +#' nested loops. Instead to use all 8 cores, set +#' `BPPARAM = list(MulticoreParam(2), MulticoreParam(3))`. +#' +#' ### Load balancing +#' +#' Unless you have only 1 chromosome or are not bootstrapping/not bootstrapping +#' in parallel, you can use nested parallelism. If you are working on just 1 +#' chromosome, put all cores into the inner bootstrapping backend. Conversely +#' with multiple chromosomes without bootstrapping, put all available workers in +#' the outer loop. +#' +#' In general, use more 'outer' workers, which loop over chromosomes when `group +#' = TRUE` and cells when `group = FALSE`, than 'inner' workers that loop over +#' bootstraps. Using 8 outer and 7 inner workers is faster than 7 outer and 8 +#' inner. +#' +#' When `group = FALSE`, use `MulticoreParam()` only on the outer workers. We +#' find that parallelizing at both column and bootstrap levels with the +#' single-cell inference is slower than only parallelizing at the column-level. +#' +#' With `group = TRUE`, minimize the difference between the two worker counts: +#' with 64 total cores, doing 8 outer and 7 inner is faster than 16 outer and 3 +#' inner. #' #' @return A RaggedExperiment of inferred compartments #' @import SummarizedExperiment -#' @importFrom parallel mclapply #' @import RaggedExperiment +#' @importFrom BiocParallel bpparam #' @export #' @examples #' data("k562_scrna_chr14", package = "compartmap") #' sc_compartments <- scCompartments( #' k562_scrna_chr14, -#' parallel = FALSE, #' chr = "chr14", #' bootstrap = FALSE, #' genome = "hg19" @@ -32,35 +101,37 @@ scCompartments <- function( obj, res = 1e6, - parallel = FALSE, chr = NULL, + group = FALSE, targets = NULL, - cores = 2, bootstrap = TRUE, num.bootstraps = 100, genome = c("hg19", "hg38", "mm9", "mm10"), - group = FALSE, - assay = c("atac", "rna") + assay = c("atac", "rna"), + boot.parallel = FALSE, + BPPARAM = bpparam() ) { verifySE(obj) verifyCoords(obj) + bpparams <- get_nested_params(BPPARAM, boot.parallel) + check_worker_count(bpparams, group, length(chr), bootstrap) + # which assay are we working on if (!all(assay %in% c("atac", "rna"))) stop("Supported assays are 'atac', and 'rna'.") assay <- tolower(match.arg(assay)) verifyAssayNames(obj, assay = assay) - - sc_compartments <- getATACABsignal( + getCompartments( obj = obj, res = res, - parallel = parallel, chr = chr, + group = group, targets = targets, - cores = cores, bootstrap = bootstrap, num.bootstraps = num.bootstraps, genome = genome, - group = group + assay = assay, + boot.parallel = boot.parallel, + bpparams = bpparams ) - return(sc_compartments) } diff --git a/R/shrinkBins.R b/R/shrinkBins.R index 136f4a67..baace78c 100644 --- a/R/shrinkBins.R +++ b/R/shrinkBins.R @@ -55,7 +55,7 @@ shrinkBins <- function( if (target.count == 1) { stop("Cannot perform targeted bin-level shrinkage with one target sample.") } else if (target.count < 4) { - message("Number of means fewer than 4. Using Bayes instead of JSE.") + flog.debug("Number of means fewer than 4. Using Bayes instead of JSE.") jse <- FALSE } } diff --git a/R/summarizeBootstraps.R b/R/summarizeBootstraps.R index b540f354..df8c4d19 100644 --- a/R/summarizeBootstraps.R +++ b/R/summarizeBootstraps.R @@ -21,7 +21,7 @@ summarizeBootstraps <- function(boot.list, est.ab, q = 0.95, assay = c("rna", "a is.atac_or_rna <- assay %in% c("atac", "rna") - message("Summarizing bootstraps.") + flog.debug("Summarizing bootstraps") # filter out failed compartment estimates boot.list <- removeEmptyBoots(boot.list) @@ -37,7 +37,7 @@ summarizeBootstraps <- function(boot.list, est.ab, q = 0.95, assay = c("rna", "a est.ab$boot.open <- .getBootRowSums(1) est.ab$boot.closed <- .getBootRowSums(2) - message("Computing Agresti-Coull 95% confidence intervals.") + flog.debug("Computing Agresti-Coull 95% confidence intervals") .getCI(est.ab, q) } diff --git a/flake.nix b/flake.nix index 9134fbb0..7b09c100 100644 --- a/flake.nix +++ b/flake.nix @@ -17,12 +17,14 @@ ]; Imports = with pkgs.rPackages; [ + BiocParallel DelayedArray DelayedMatrixStats GenomicRanges ggplot2 impute Matrix + parallelly reshape2 RMTstat rtracklayer diff --git a/man/arrayCompartments.Rd b/man/arrayCompartments.Rd new file mode 100644 index 00000000..57301f69 --- /dev/null +++ b/man/arrayCompartments.Rd @@ -0,0 +1,138 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/arrayCompartments.R +\name{arrayCompartments} +\alias{arrayCompartments} +\title{Estimate A/B compartments from methylation array data} +\usage{ +arrayCompartments( + obj, + res = 1000000, + chr = NULL, + group = FALSE, + targets = NULL, + bootstrap = TRUE, + num.bootstraps = 1000, + preprocess = TRUE, + array.type = c("hm450", "EPIC"), + genome = c("hg19", "hg38", "mm9", "mm10"), + other = NULL, + boot.parallel = TRUE, + BPPARAM = bpparam() +) +} +\arguments{ +\item{obj}{Input SummarizedExperiment object} + +\item{res}{Compartment resolution in bp} + +\item{chr}{What chromosome to work on (leave as NULL to run on all chromosomes)} + +\item{group}{Whether to treat this as a group set of samples} + +\item{targets}{Samples/cells to shrink towards} + +\item{bootstrap}{Whether we should perform bootstrapping of inferred compartments} + +\item{num.bootstraps}{How many bootstraps to run} + +\item{preprocess}{Whether to preprocess the arrays prior to compartment inference} + +\item{array.type}{What type of array is this ("hm450", "EPIC")} + +\item{genome}{What genome to work on ("hg19", "hg38", "mm9", "mm10")} + +\item{other}{Another arbitrary genome to compute compartments on} + +\item{boot.parallel}{Whether to run the bootstrapping in parallel. See details.} + +\item{BPPARAM}{BiocParallelParam object to use for parallelization. See details.} +} +\value{ +A RaggedExperiment of inferred compartments +} +\description{ +\code{arrayCompartments} returns estimated A/B compartments from methylation array data. +} +\details{ +compartmap uses \code{BiocParallel} to parallelize operations in four +configurations. The default setting is to parallelize across columns but not +bootstraps using the thread count as reported by \code{BiocParallel::bpparam()}, +which is usually two cores fewer than the number of available cores. +Parallel bootstrapping is disabled by default to avoid nested parallelism +issues but can be done independent of column-wise parallelization. +\subsection{Available configurations}{ +\subsection{Serial bootstrapping}{ +\itemize{ +\item Serially with just one core: +\code{BPPARAM = BiocParallel::SerialParam()} +\item Parallel across columns and serially across bootstraps: +\code{BPPARAM = BiocParallel::MulticoreParam(n)} where \code{n} is the number of +threads to use +} + +See \code{?BiocParallel::BiocParallelParam} for other parallel backends. Parallel +backends may also be passed to \code{BiocParallel::register()} to make them +available to \code{bpparam()}. +} + +\subsection{Parallel bootstrapping}{ + +Set \code{boot.parallel = TRUE} for one the these configurations: +\itemize{ +\item Serially across columns and parallel across bootstraps: Set `BPPARAM = +list(SerialParam(), MulticoreParam(n))' +\item Parallel across both columns and bootstraps: Set \code{BPPARAM = list(MulticoreParam(outer), MulticoreParam(inner))} where \code{outer} is the +thread count for column-wise operations and \code{inner} the thread count for +bootstrapping. The required number of threads is given by +} + +\code{( outer * inner ) + outer} + +which is more easily calculated as \code{outer * (inner + 1)}. + +We recommend using an explicit list of two BiocParallelParam backends over +relying on \code{register()} and \code{bpparam()} for parallelizing across bootstraps. +With nested \code{bplapply} calls, the registered backend is used for both the +outer and inner parallel loops. On a system with 8 available threads if the +registered backend asks for 4 workers, it will try to use 20 threads in the +nested loops. Instead to use all 8 cores, set +\code{BPPARAM = list(MulticoreParam(2), MulticoreParam(3))}. +} + +\subsection{Load balancing}{ + +Unless you have only 1 chromosome or are not bootstrapping/not bootstrapping +in parallel, you can use nested parallelism. If you are working on just 1 +chromosome, put all cores into the inner bootstrapping backend. Conversely +with multiple chromosomes without bootstrapping, put all available workers in +the outer loop. + +In general, use more 'outer' workers, which loop over chromosomes when \code{group = TRUE} and cells when \code{group = FALSE}, than 'inner' workers that loop over +bootstraps. Using 8 outer and 7 inner workers is faster than 7 outer and 8 +inner. + +When \code{group = FALSE}, use \code{MulticoreParam()} only on the outer workers. We +find that parallelizing at both column and bootstrap levels with the +single-cell inference is slower than only parallelizing at the column-level. + +With \code{group = TRUE}, minimize the difference between the two worker counts: +with 64 total cores, doing 8 outer and 7 inner is faster than 16 outer and 3 +inner. +} + +} +} +\examples{ + +if (requireNamespace("minfi", quietly = TRUE)) { + data("array_data_chr14", package = "compartmap") + array_compartments <- arrayCompartments( + array.data.chr14, + chr="chr14", + parallel=FALSE, + bootstrap=FALSE, + genome="hg19", + array.type="hm450" + ) +} +} diff --git a/man/bootstrapCompartments.Rd b/man/bootstrapCompartments.Rd index c9ab1c18..440b511f 100644 --- a/man/bootstrapCompartments.Rd +++ b/man/bootstrapCompartments.Rd @@ -2,22 +2,22 @@ % Please edit documentation in R/bootstrapCompartments.R \name{bootstrapCompartments} \alias{bootstrapCompartments} -\title{Non-parametric bootstrapping of compartments and summarization of bootstraps/compute confidence intervals} +\title{Non-parametric bootstrapping of compartments and summarization of +bootstraps/compute confidence intervals} \usage{ bootstrapCompartments( obj, original.obj, + BPPARAM, bootstrap.samples = 1000, chr = "chr14", + group = FALSE, assay = c("rna", "atac", "array"), - parallel = TRUE, - cores = 2, targets = NULL, res = 1000000, genome = c("hg19", "hg38", "mm9", "mm10"), q = 0.95, svd = NULL, - group = FALSE, bootstrap.means = NULL ) } @@ -26,15 +26,15 @@ bootstrapCompartments( \item{original.obj}{The original, full input SummarizedExperiment of all samples/cells} +\item{BPPARAM}{BiocParallelParam for parallelizing bootstrapping} + \item{bootstrap.samples}{How many bootstraps to run} \item{chr}{Which chromosome to operate on} -\item{assay}{What sort of assay are we working on} - -\item{parallel}{Whether to run the bootstrapping in parallel} +\item{group}{Whether this is for group-level inference} -\item{cores}{How many cores to use for parallel processing} +\item{assay}{What sort of assay are we working on} \item{targets}{Targets to shrink towards} @@ -46,15 +46,14 @@ bootstrapCompartments( \item{svd}{The original compartment calls as a GRanges object} -\item{group}{Whether this is for group-level inference} - \item{bootstrap.means}{Pre-computed bootstrap means matrix} } \value{ Compartment estimates with summarized bootstraps and confidence intervals } \description{ -Non-parametric bootstrapping of compartments and summarization of bootstraps/compute confidence intervals +Non-parametric bootstrapping of compartments and summarization of +bootstraps/compute confidence intervals } \examples{ diff --git a/man/bpnworkers.list.Rd b/man/bpnworkers.list.Rd new file mode 100644 index 00000000..762f6eb4 --- /dev/null +++ b/man/bpnworkers.list.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parallel.R +\name{bpnworkers.list} +\alias{bpnworkers.list} +\title{Return the number of workers in a list of BiocParallelParam objects} +\usage{ +bpnworkers.list(bplist) +} +\arguments{ +\item{List}{of BiocParallelParam objects} +} +\value{ +A vector of the \code{bpnworkers} count in each list element +} +\description{ +Return the number of workers in a list of BiocParallelParam objects +} +\keyword{internal} diff --git a/man/check_optim.Rd b/man/check_optim.Rd new file mode 100644 index 00000000..582e2772 --- /dev/null +++ b/man/check_optim.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parallel.R +\name{check_optim} +\alias{check_optim} +\title{Check and tell users whether input BPPARAM are optimal given group/sc +inference and bootstrapping} +\usage{ +check_optim(workers, group, chr_count, bootstrap) +} +\description{ +Check and tell users whether input BPPARAM are optimal given group/sc +inference and bootstrapping +} +\keyword{internal} diff --git a/man/check_worker_count.Rd b/man/check_worker_count.Rd new file mode 100644 index 00000000..5dd80962 --- /dev/null +++ b/man/check_worker_count.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parallel.R +\name{check_worker_count} +\alias{check_worker_count} +\title{Check that the number of requested workers is valid} +\usage{ +check_worker_count( + bpparam, + group, + chr_count, + bootstrap, + avail_workers = parallelly::availableCores() +) +} +\description{ +Check that the number of requested workers is valid +} +\keyword{internal} diff --git a/man/getATACABsignal.Rd b/man/getATACABsignal.Rd deleted file mode 100644 index 3a9c419c..00000000 --- a/man/getATACABsignal.Rd +++ /dev/null @@ -1,90 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/getATACABsignal.R -\name{getATACABsignal} -\alias{getATACABsignal} -\alias{getRNAABsignal} -\title{Estimate A/B compartments from ATAC-seq data} -\usage{ -getATACABsignal( - obj, - res = 1000000, - parallel = FALSE, - chr = NULL, - targets = NULL, - cores = 2, - bootstrap = TRUE, - num.bootstraps = 100, - genome = c("hg19", "hg38", "mm9", "mm10"), - other = NULL, - group = FALSE, - boot.parallel = FALSE, - boot.cores = 2 -) - -getRNAABsignal( - obj, - res = 1000000, - parallel = FALSE, - chr = NULL, - targets = NULL, - cores = 2, - bootstrap = TRUE, - num.bootstraps = 100, - genome = c("hg19", "hg38", "mm9", "mm10"), - other = NULL, - group = FALSE, - boot.parallel = FALSE, - boot.cores = 2 -) -} -\arguments{ -\item{obj}{Input SummarizedExperiment object} - -\item{res}{Compartment resolution in bp} - -\item{parallel}{Whether to run samples in parallel} - -\item{chr}{What chromosome to work on (leave as NULL to run on all chromosomes)} - -\item{targets}{Samples/cells to shrink towards} - -\item{cores}{How many cores to use when running samples in parallel} - -\item{bootstrap}{Whether we should perform bootstrapping of inferred compartments} - -\item{num.bootstraps}{How many bootstraps to run} - -\item{genome}{What genome to work on ("hg19", "hg38", "mm9", "mm10")} - -\item{other}{Another arbitrary genome to compute compartments on} - -\item{group}{Whether to treat this as a group set of samples} - -\item{boot.parallel}{Whether to run the bootstrapping in parallel} - -\item{boot.cores}{How many cores to use for the bootstrapping} -} -\value{ -A RaggedExperiment of inferred compartments -} -\description{ -\code{getATACABsignal} returns estimated A/B compartments from ATAC-seq data. -} -\section{Functions}{ -\itemize{ -\item \code{getRNAABsignal()}: Alias for getATACABsignal - -}} -\examples{ -if (requireNamespace("csaw", quietly = TRUE)) { - data("k562_scatac_chr14", package = "compartmap") - atac_compartments <- getATACABsignal( - k562_scatac_chr14, - parallel = FALSE, - chr = "chr14", - bootstrap = FALSE, - genome = "hg19", - group = TRUE - ) -} -} diff --git a/man/getArrayABsignal.Rd b/man/getArrayABsignal.Rd deleted file mode 100644 index 330db367..00000000 --- a/man/getArrayABsignal.Rd +++ /dev/null @@ -1,75 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/getArrayABsignal.R -\name{getArrayABsignal} -\alias{getArrayABsignal} -\title{Estimate A/B compartments from methylation array data} -\usage{ -getArrayABsignal( - obj, - res = 1000000, - parallel = TRUE, - chr = NULL, - targets = NULL, - preprocess = TRUE, - cores = 2, - bootstrap = TRUE, - num.bootstraps = 1000, - genome = c("hg19", "hg38", "mm9", "mm10"), - other = NULL, - array.type = c("hm450", "EPIC"), - group = FALSE, - boot.parallel = TRUE, - boot.cores = 2 -) -} -\arguments{ -\item{obj}{Input SummarizedExperiment object} - -\item{res}{Compartment resolution in bp} - -\item{parallel}{Whether to run samples in parallel} - -\item{chr}{What chromosome to work on (leave as NULL to run on all chromosomes)} - -\item{targets}{Samples/cells to shrink towards} - -\item{preprocess}{Whether to preprocess the arrays prior to compartment inference} - -\item{cores}{How many cores to use when running samples in parallel} - -\item{bootstrap}{Whether we should perform bootstrapping of inferred compartments} - -\item{num.bootstraps}{How many bootstraps to run} - -\item{genome}{What genome to work on ("hg19", "hg38", "mm9", "mm10")} - -\item{other}{Another arbitrary genome to compute compartments on} - -\item{array.type}{What type of array is this ("hm450", "EPIC")} - -\item{group}{Whether to treat this as a group set of samples} - -\item{boot.parallel}{Whether to run the bootstrapping in parallel} - -\item{boot.cores}{How many cores to use for the bootstrapping} -} -\value{ -A RaggedExperiment of inferred compartments -} -\description{ -\code{getArrayABsignal} returns estimated A/B compartments from methylation array data. -} -\examples{ - -if (requireNamespace("minfi", quietly = TRUE)) { - data("array_data_chr14", package = "compartmap") - array_compartments <- getArrayABsignal( - array.data.chr14, - parallel=FALSE, - chr="chr14", - bootstrap=FALSE, - genome="hg19", - array.type="hm450" - ) -} -} diff --git a/man/getCompartments.Rd b/man/getCompartments.Rd new file mode 100644 index 00000000..8dd97d7e --- /dev/null +++ b/man/getCompartments.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/getCompartments.R +\name{getCompartments} +\alias{getCompartments} +\title{Run compartment inference} +\usage{ +getCompartments( + obj, + res, + chr, + group, + targets, + bootstrap, + num.bootstraps, + genome, + assay, + boot.parallel, + bpparams +) +} +\description{ +Run compartment inference +} diff --git a/man/get_bpnworkers.Rd b/man/get_bpnworkers.Rd new file mode 100644 index 00000000..8bad4025 --- /dev/null +++ b/man/get_bpnworkers.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parallel.R +\name{get_bpnworkers} +\alias{get_bpnworkers} +\title{Get the total number of BiocParallelParam workers that will get used +From a BiocParallelParam or list of 2 BiocParallelParam objects} +\usage{ +get_bpnworkers(bp) +} +\description{ +Get the total number of BiocParallelParam workers that will get used +From a BiocParallelParam or list of 2 BiocParallelParam objects +} +\keyword{internal} diff --git a/man/get_nested_params.Rd b/man/get_nested_params.Rd new file mode 100644 index 00000000..2e7ed1bc --- /dev/null +++ b/man/get_nested_params.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parallel.R +\name{get_nested_params} +\alias{get_nested_params} +\title{Set outer and inner params for nester parallelization +The outer param is across the input samples/columns and the second is for +bootstrapping. If \code{boot.parallel} is FALSE, the inner param is set to +\code{SerialParam}.} +\usage{ +get_nested_params(BPPARAM, boot.parallel) +} +\description{ +Set outer and inner params for nester parallelization +The outer param is across the input samples/columns and the second is for +bootstrapping. If \code{boot.parallel} is FALSE, the inner param is set to +\code{SerialParam}. +} +\keyword{internal} diff --git a/man/plotAB.Rd b/man/plotAB.Rd index ad420e8a..10dae644 100644 --- a/man/plotAB.Rd +++ b/man/plotAB.Rd @@ -20,7 +20,7 @@ plotAB( ) } \arguments{ -\item{grAB}{The GRanges object returned from scCompartments and getArrayABsignal} +\item{grAB}{The GRanges object returned from scCompartments and arrayCompartments} \item{chr}{Chromosome to subset to for plotting} diff --git a/man/precomputeBootstrapMeans.Rd b/man/precomputeBootstrapMeans.Rd index 2f46f8ac..6f8536a4 100644 --- a/man/precomputeBootstrapMeans.Rd +++ b/man/precomputeBootstrapMeans.Rd @@ -6,25 +6,22 @@ \usage{ precomputeBootstrapMeans( obj, + BPPARAM, targets = NULL, num.bootstraps = 100, - assay = c("atac", "rna", "array"), - parallel = FALSE, - num.cores = 1 + assay = c("atac", "rna", "array") ) } \arguments{ \item{obj}{Input SummarizedExperiment object} +\item{BPPARAM}{BiocParallelParam for parallelizing computation} + \item{targets}{Optional targets to shrink towards} \item{num.bootstraps}{The number of bootstraps to compute} \item{assay}{What type of assay the data are from} - -\item{parallel}{Whether to run in parallel} - -\item{num.cores}{How many cores to use for parallel processing} } \value{ A matrix of bootstrapped global means diff --git a/man/preprocessArrays.Rd b/man/preprocessArrays.Rd index 972260f5..a040ad43 100644 --- a/man/preprocessArrays.Rd +++ b/man/preprocessArrays.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/getArrayABsignal.R +% Please edit documentation in R/preprocessArrays.R \name{preprocessArrays} \alias{preprocessArrays} \title{Preprocess arrays for compartment inference} diff --git a/man/scCompartments.Rd b/man/scCompartments.Rd index acaff07e..eae9ca74 100644 --- a/man/scCompartments.Rd +++ b/man/scCompartments.Rd @@ -2,20 +2,20 @@ % Please edit documentation in R/scCompartments.R \name{scCompartments} \alias{scCompartments} -\title{Estimate A/B compartments from single-cell sequencing data} +\title{Estimate A/B compartments from single-cell RNA or ATAC sequencing data} \usage{ scCompartments( obj, res = 1000000, - parallel = FALSE, chr = NULL, + group = FALSE, targets = NULL, - cores = 2, bootstrap = TRUE, num.bootstraps = 100, genome = c("hg19", "hg38", "mm9", "mm10"), - group = FALSE, - assay = c("atac", "rna") + assay = c("atac", "rna"), + boot.parallel = FALSE, + BPPARAM = bpparam() ) } \arguments{ @@ -23,13 +23,11 @@ scCompartments( \item{res}{Compartment resolution in bp} -\item{parallel}{Whether to run samples in parallel} - \item{chr}{What chromosome to work on (leave as NULL to run on all chromosomes)} -\item{targets}{Samples/cells to shrink towards} +\item{group}{Whether to treat this as a group set of samples} -\item{cores}{How many cores to use when running samples in parallel} +\item{targets}{Samples/cells to shrink towards} \item{bootstrap}{Whether we should perform bootstrapping of inferred compartments} @@ -37,9 +35,11 @@ scCompartments( \item{genome}{What genome to work on ("hg19", "hg38", "mm9", "mm10")} -\item{group}{Whether to treat this as a group set of samples} - \item{assay}{What type of single-cell assay is the input data ("atac" or "rna")} + +\item{boot.parallel}{Whether to run the bootstrapping in parallel. See details.} + +\item{BPPARAM}{BiocParallelParam object to use for parallelization. See details.} } \value{ A RaggedExperiment of inferred compartments @@ -47,11 +47,79 @@ A RaggedExperiment of inferred compartments \description{ \code{scCompartments} returns estimated A/B compartments from sc-seq data. } +\details{ +compartmap uses \code{BiocParallel} to parallelize operations in four +configurations. The default setting is to parallelize across columns but not +bootstraps using the thread count as reported by \code{BiocParallel::bpparam()}, +which is usually two cores fewer than the number of available cores. +Parallel bootstrapping is disabled by default to avoid nested parallelism +issues but can be done independent of column-wise parallelization. +\subsection{Available configurations}{ +\subsection{Serial bootstrapping}{ +\itemize{ +\item Serially with just one core: +\code{BPPARAM = BiocParallel::SerialParam()} +\item Parallel across columns and serially across bootstraps: +\code{BPPARAM = BiocParallel::MulticoreParam(n)} where \code{n} is the number of +threads to use +} + +See \code{?BiocParallel::BiocParallelParam} for other parallel backends. Parallel +backends may also be passed to \code{BiocParallel::register()} to make them +available to \code{bpparam()}. +} + +\subsection{Parallel bootstrapping}{ + +Set \code{boot.parallel = TRUE} for one the these configurations: +\itemize{ +\item Serially across columns and parallel across bootstraps: Set `BPPARAM = +list(SerialParam(), MulticoreParam(n))' +\item Parallel across both columns and bootstraps: Set \code{BPPARAM = list(MulticoreParam(outer), MulticoreParam(inner))} where \code{outer} is the +thread count for column-wise operations and \code{inner} the thread count for +bootstrapping. The required number of threads is given by +} + +\code{( outer * inner ) + outer} + +which is more easily calculated as \code{outer * (inner + 1)}. + +We recommend using an explicit list of two BiocParallelParam backends over +relying on \code{register()} and \code{bpparam()} for parallelizing across bootstraps. +With nested \code{bplapply} calls, the registered backend is used for both the +outer and inner parallel loops. On a system with 8 available threads if the +registered backend asks for 4 workers, it will try to use 20 threads in the +nested loops. Instead to use all 8 cores, set +\code{BPPARAM = list(MulticoreParam(2), MulticoreParam(3))}. +} + +\subsection{Load balancing}{ + +Unless you have only 1 chromosome or are not bootstrapping/not bootstrapping +in parallel, you can use nested parallelism. If you are working on just 1 +chromosome, put all cores into the inner bootstrapping backend. Conversely +with multiple chromosomes without bootstrapping, put all available workers in +the outer loop. + +In general, use more 'outer' workers, which loop over chromosomes when \code{group = TRUE} and cells when \code{group = FALSE}, than 'inner' workers that loop over +bootstraps. Using 8 outer and 7 inner workers is faster than 7 outer and 8 +inner. + +When \code{group = FALSE}, use \code{MulticoreParam()} only on the outer workers. We +find that parallelizing at both column and bootstrap levels with the +single-cell inference is slower than only parallelizing at the column-level. + +With \code{group = TRUE}, minimize the difference between the two worker counts: +with 64 total cores, doing 8 outer and 7 inner is faster than 16 outer and 3 +inner. +} + +} +} \examples{ data("k562_scrna_chr14", package = "compartmap") sc_compartments <- scCompartments( k562_scrna_chr14, - parallel = FALSE, chr = "chr14", bootstrap = FALSE, genome = "hg19" diff --git a/man/verify_bp.Rd b/man/verify_bp.Rd new file mode 100644 index 00000000..0869d5d3 --- /dev/null +++ b/man/verify_bp.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parallel.R +\name{verify_bp} +\alias{verify_bp} +\title{Verify that the input BiocParallelParam is valid} +\usage{ +verify_bp(bp) +} +\arguments{ +\item{A}{BiocParallelParam or list of 2 BiocParallelParam objects} +} +\value{ +TRUE if the total \code{bpnworkers} in the input does not exceed +available resources as defined by \code{parallelly::availableCores()} +} +\description{ +Verify that the input BiocParallelParam is valid +} +\keyword{internal} diff --git a/man/verify_workers.Rd b/man/verify_workers.Rd new file mode 100644 index 00000000..d3117a1d --- /dev/null +++ b/man/verify_workers.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parallel.R +\name{verify_workers} +\alias{verify_workers} +\title{Verify that requested thread count is not higher than available} +\usage{ +verify_workers(n_workers) +} +\arguments{ +\item{thread_count}{The number of workers to check availability} +} +\value{ +TRUE if the requested \code{thread_count} does not exceed available +resources as defined by \code{parallelly::availableCores()} +} +\description{ +Verify that requested thread count is not higher than available +} +\keyword{internal} diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index 62946a52..c1e753db 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -23,14 +23,13 @@ reference: - title: Compartment mapping contents: - scCompartments - - getArrayABsignal + - arrayCompartments - title: Compartment helper contents: - filterCompartments - fixCompartments - extractOpenClosed - getABSignal - - getATACABsignal - getRNAABsignal - title: Correlation matrix contents: diff --git a/tests/testthat/test-fixCompartments.R b/tests/testthat/test-fixCompartments.R index e4b7cc28..afb87891 100644 --- a/tests/testthat/test-fixCompartments.R +++ b/tests/testthat/test-fixCompartments.R @@ -80,6 +80,7 @@ test_that("flipper", { }) test_that("fixCompartments", { + futile.logger::flog.threshold(1) lapply(1:length(gr.expected), function(i) { expect_equal( fixCompartments(gr.input[[i]], min.conf), diff --git a/tests/testthat/test-getArrayABsignal.R b/tests/testthat/test-getArrayABsignal.R index f4ded02d..07696f14 100644 --- a/tests/testthat/test-getArrayABsignal.R +++ b/tests/testthat/test-getArrayABsignal.R @@ -1,10 +1,10 @@ se.noranges <- SummarizedExperiment(rowRanges = GRanges()) se.bad <- SummarizedExperiment(rowRanges = GRanges("chr1:1-10"), assays = SimpleList(a = matrix())) -test_that("getArrayABsignal bad inputs", { - expect_error(getArrayABsignal(iris), err.verifySE) - expect_error(getArrayABsignal(se.noranges), err.verifyCoords) - expect_error(getArrayABsignal(se.bad), err.verifyAssayNames.beta) +test_that("arrayCompartments bad inputs", { + expect_error(arrayCompartments(iris), err.verifySE) + expect_error(arrayCompartments(se.noranges), err.verifyCoords) + expect_error(arrayCompartments(se.bad), err.verifyAssayNames.beta) }) test_that(".preprocessArrays", { diff --git a/tests/testthat/test-getCorMatrix.R b/tests/testthat/test-getCorMatrix.R index 50e1d101..acc855af 100644 --- a/tests/testthat/test-getCorMatrix.R +++ b/tests/testthat/test-getCorMatrix.R @@ -10,10 +10,6 @@ test_that("getCorMatrix", { expected.result <- list(gr.cor = gr, binmat.cor = expected.cormat) expected.result.squeezed <- list(gr.cor = gr, binmat.cor = expected.cormat.squeezed) - expect_message(getCorMatrix(binmat), "Calculating correlations") - expect_message(getCorMatrix(binmat), "Done") - expect_no_warning(expect_message(getCorMatrix(binmat))) - expect_equal(getCorMatrix(binmat), expected.result) expect_equal(getCorMatrix(binmat, squeeze = TRUE), expected.result.squeezed) }) diff --git a/tests/testthat/test-getGlobalMeans.R b/tests/testthat/test-getGlobalMeans.R index 28c84c99..48733685 100644 --- a/tests/testthat/test-getGlobalMeans.R +++ b/tests/testthat/test-getGlobalMeans.R @@ -65,18 +65,18 @@ test_that("getGlobalMeans", { test_that("precomputeBootstrapMeans", { expected.rownames <- as.character(gr) - boot.mean <- precomputeBootstrapMeans(se.rna, num.bootstraps = 2) + boot.mean <- precomputeBootstrapMeans(se.rna, BiocParallel::SerialParam(), num.bootstraps = 2) expect_equal(rownames(boot.mean), expected.rownames) lapply(1:50, function(boot_count) { - boot.mean <- precomputeBootstrapMeans(se.rna, num.bootstraps = boot_count) + boot.mean <- precomputeBootstrapMeans(se.rna, BiocParallel::SerialParam(), num.bootstraps = boot_count) expect_equal(ncol(boot.mean), boot_count) }) expect_error( - precomputeBootstrapMeans(se.rna, num.bootstraps = 2, targets = c(1:4)), + precomputeBootstrapMeans(se.rna, BiocParallel::SerialParam(), num.bootstraps = 2, targets = c(1:4)), "Need 5 or more samples for targeted bootstrapping to work." ) expect_no_error( - precomputeBootstrapMeans(se.rna, num.bootstraps = 2, targets = c(1:5)) + precomputeBootstrapMeans(se.rna, BiocParallel::SerialParam(), num.bootstraps = 2, targets = c(1:5)) ) }) diff --git a/tests/testthat/test-meanSmoother.R b/tests/testthat/test-meanSmoother.R index dc6b9fe9..2c00347b 100644 --- a/tests/testthat/test-meanSmoother.R +++ b/tests/testthat/test-meanSmoother.R @@ -1,10 +1,5 @@ test_that("meanSmoother", { mat <- matrix(1:5, nrow = 5, ncol = 5) - expect_message( - meanSmoother(mat, k = 0), - "Returning unsmoothed mat as 'k' = 0", - fixed = TRUE - ) expect_equal( meanSmoother(mat, k = 0), mat diff --git a/tests/testthat/test-plotAB.R b/tests/testthat/test-plotAB.R index d04b0867..3634abb3 100644 --- a/tests/testthat/test-plotAB.R +++ b/tests/testthat/test-plotAB.R @@ -34,11 +34,7 @@ test_that(".unitarize", { scaling <- sqrt(sum(centered[-4]^2)) centered / scaling } - expect_message( - compartmap:::.unitarize(vec.withNA, medianCenter = FALSE), - "[.unitarize] 1 missing values were ignored.", - fixed = TRUE - ) + expect_equal(compartmap:::.unitarize(vec.withNA), expected.withNA) expected.withNA.nomedian <- { diff --git a/tests/testthat/test-summarizeBootstraps.R b/tests/testthat/test-summarizeBootstraps.R index 4b6ec355..a330d3a2 100644 --- a/tests/testthat/test-summarizeBootstraps.R +++ b/tests/testthat/test-summarizeBootstraps.R @@ -57,8 +57,6 @@ test_that("summarizeBootstraps", { removeEmptyBoots(list(gr1, gr2)) - expect_message(summarizeBootstraps(list(gr1, gr1), gr1), "Summarizing bootstraps") - tester <- function(one, two, expected) { mcols(gr1) <- data.frame(pc = one) gr1$compartments <- ifelse(mcols(gr1)$pc > 0, "open", "closed") diff --git a/vignettes/compartmap.Rmd b/vignettes/compartmap.Rmd index 86324010..5aa79522 100644 --- a/vignettes/compartmap.Rmd +++ b/vignettes/compartmap.Rmd @@ -54,7 +54,8 @@ k562_compartments <- scCompartments( group = TRUE, bootstrap = FALSE, genome = "hg19", - assay = "rna" + assay = "rna", + BPPARAM = BiocParallel::MulticoreParam(2) ) ``` @@ -77,7 +78,8 @@ k562_compartments.boot <- scCompartments( bootstrap = TRUE, num.bootstraps = 10, genome = "hg19", - assay = "rna" + assay = "rna", + BPPARAM = BiocParallel::MulticoreParam(2) ) # Flip the domain sign if the sign coherence is discordant in 80% of the bootstraps @@ -280,7 +282,8 @@ k562_scrna_chr14_raw_domains <- scCompartments(k562_scrna_se_chr14, bootstrap = TRUE, num.bootstraps = 10, genome = "hg19", - assay = "rna" + assay = "rna", + BPPARAM = BiocParallel::MulticoreParam(2) ) ```