Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
0485e12
refactor(getArrayABsignal): extract preprocessArrays to own file
jamespeapen Oct 22, 2025
e6d4189
refactor(getCompartments): extract common compartment worker
jamespeapen Oct 22, 2025
bef8044
refactor(scCompartments): directly call getCompartments without getAT…
jamespeapen Oct 23, 2025
62e6e41
!fix(sc|arrayCompartments): add missing args and reorder for consistency
jamespeapen Oct 23, 2025
815ae54
!fix(arrayCompartments): rename getArrayABsignal to arrayCompartments
jamespeapen Oct 23, 2025
9382e7b
docs(scCompartments): specify that inputs are RNA or ATAC
jamespeapen Oct 23, 2025
f839a82
refactor: reorganize the arg order in top level functions
jamespeapen Nov 6, 2025
a605507
feat(threads): add BiocParallel worker checking functions
jamespeapen Nov 13, 2025
5366ba7
wip: add args for parallelization for scCompartments/arrayCompartments
jamespeapen Nov 13, 2025
3c32725
wip: parallelize precomputeBootstrapMeans
jamespeapen Nov 13, 2025
c19d02f
wip: parallelize inference and bootstrapping,
jamespeapen Nov 13, 2025
145bc95
chore(flake): update with parallelly
jamespeapen Nov 13, 2025
d2bb05c
docs: add parallelization details
jamespeapen Nov 13, 2025
f0e3019
docs(vignettes): update call calls with BPPARAM
jamespeapen Nov 13, 2025
edd0227
add logging
jamespeapen Nov 13, 2025
b4326dd
fix(getCompartments): remove bpparams setting
jamespeapen Nov 13, 2025
34488de
fix(logging): use futil.logger and move most to debug level
jamespeapen Nov 13, 2025
af5477a
fix(bootstrap): don't show progress bar in the bootstrap step
jamespeapen Nov 13, 2025
fffcd1b
fix(parallel): correctly calculate total when outer/inner is 1
jamespeapen Nov 13, 2025
5f9cc9b
fix(bootstrap): only message about slow bootstrap if bootstrapping
jamespeapen Nov 13, 2025
c29e089
fix(logging): move bootstrap message after computing message
jamespeapen Nov 13, 2025
eeb7361
docs(parallel): add info on load balancing different configs
jamespeapen Nov 14, 2025
cb2bdc8
feat(parallel): check if user BPPARAM settings are optimal
jamespeapen Nov 14, 2025
dbd8800
fix(bootstrapping): don't warn about slow serial bootstrapping
jamespeapen Nov 14, 2025
8dbaa4c
fix(parallel): add missing SerialParam import
jamespeapen Dec 19, 2025
b9c6162
chore(parallel): fix 'chromosme' typo
jamespeapen Dec 19, 2025
2feb10b
docs(parallel): add check_optim
jamespeapen Dec 19, 2025
29af882
chore(DESCRIPTION): add missing parallelly
jamespeapen Dec 19, 2025
bccf57e
test: remove message tests since they're now handled by futile.logger
jamespeapen Dec 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,15 @@ Depends:
HDF5Array
Imports:
BiocSingular,
BiocParallel,
futile.logger,
rlang,
S4Vectors,
IRanges,
GenomicRanges,
GenomeInfoDb,
parallel,
parallelly,
methods,
grid,
ggplot2,
Expand Down
9 changes: 7 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Generated by roxygen2: do not edit by hand

export(agrestiCoullCI)
export(arrayCompartments)
export(bootstrapCompartments)
export(condenseRE)
export(condenseSE)
Expand All @@ -13,8 +14,6 @@ export(fisherZ)
export(fixCompartments)
export(flogit)
export(getABSignal)
export(getATACABsignal)
export(getArrayABsignal)
export(getAssayNames)
export(getBinMatrix)
export(getChrs)
Expand Down Expand Up @@ -50,6 +49,10 @@ import(HDF5Array)
import(Matrix)
import(RaggedExperiment)
import(SummarizedExperiment)
importFrom(BiocParallel,SerialParam)
importFrom(BiocParallel,bplapply)
importFrom(BiocParallel,bpnworkers)
importFrom(BiocParallel,bpparam)
importFrom(BiocSingular,IrlbaParam)
importFrom(BiocSingular,runSVD)
importFrom(GenomeInfoDb,"seqlevelsStyle<-")
Expand All @@ -60,6 +63,8 @@ importFrom(GenomeInfoDb,seqlengths)
importFrom(GenomeInfoDb,seqlevels)
importFrom(S4Vectors,queryHits)
importFrom(S4Vectors,subjectHits)
importFrom(futile.logger,flog.debug)
importFrom(futile.logger,flog.info)
importFrom(ggplot2,aes)
importFrom(ggplot2,element_blank)
importFrom(ggplot2,geom_raster)
Expand Down
95 changes: 95 additions & 0 deletions R/arrayCompartments.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#' @title Estimate A/B compartments from methylation array data
#'
#' @description
#' \code{arrayCompartments} returns estimated A/B compartments from methylation array data.
#'
#' @param obj Input SummarizedExperiment object
#' @param res Compartment resolution in bp
#' @param chr What chromosome to work on (leave as NULL to run on all chromosomes)
#' @param group Whether to treat this as a group set of samples
#' @param targets Samples/cells to shrink towards
#' @param bootstrap Whether we should perform bootstrapping of inferred compartments
#' @param num.bootstraps How many bootstraps to run
#' @param preprocess Whether to preprocess the arrays prior to compartment inference
#' @param array.type What type of array is this ("hm450", "EPIC")
#' @param genome What genome to work on ("hg19", "hg38", "mm9", "mm10")
#' @param other Another arbitrary genome to compute compartments on
#' @param boot.parallel Whether to run the bootstrapping in parallel. See details.
#' @param BPPARAM BiocParallelParam object to use for parallelization. See details.
#'
#' @inherit scCompartments details
#'
#' @return A RaggedExperiment of inferred compartments
#' @import SummarizedExperiment
#' @import RaggedExperiment
#' @importFrom parallel mclapply
#' @importFrom GenomeInfoDb keepSeqlevels
#' @importFrom methods as
#' @export
#'
#' @examples
#'
#' if (requireNamespace("minfi", quietly = TRUE)) {
#' data("array_data_chr14", package = "compartmap")
#' array_compartments <- arrayCompartments(
#' array.data.chr14,
#' chr="chr14",
#' parallel=FALSE,
#' bootstrap=FALSE,
#' genome="hg19",
#' array.type="hm450"
#' )
#' }
arrayCompartments <- function(
obj,
res = 1e6,
chr = NULL,
group = FALSE,
targets = NULL,
bootstrap = TRUE,
num.bootstraps = 1000,
preprocess = TRUE,
array.type = c("hm450", "EPIC"),
genome = c("hg19", "hg38", "mm9", "mm10"),
other = NULL,
boot.parallel = TRUE,
BPPARAM = bpparam()
) {
verifySE(obj)
verifyCoords(obj)
verifyAssayNames(obj, assay = "array")
bpparams <- get_nested_params(BPPARAM)
check_worker_count(bpparams, group, length(chr), bootstrap)

# preprocess the arrays
if (preprocess) {
obj <- preprocessArrays(
obj = obj,
genome = genome,
other = other,
array.type = array.type
)
}

# critical check if imputation was _not_ done
# to send things back to beta land
is.beta <- min(assays(obj)$Beta, na.rm = TRUE) > 0
if (!is.beta) {
# send things back to beta land
assays(obj)$Beta <- fexpit(assays(obj)$Beta)
}

getCompartments(
obj = obj,
assay = "array",
res = res,
chr = chr,
targets = targets,
bootstrap = bootstrap,
num.bootstraps = num.bootstraps,
genome = genome,
group = group,
boot.parallel = boot.parallel,
bpparams = bpparams
)
}
66 changes: 32 additions & 34 deletions R/bootstrapCompartments.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
#' Non-parametric bootstrapping of compartments and summarization of bootstraps/compute confidence intervals
#' Non-parametric bootstrapping of compartments and summarization of
#' bootstraps/compute confidence intervals
#'
#' @name bootstrapCompartments
#'
#' @param obj List object of computed compartments for a sample with 'pc' and 'gr' as elements
#' @param original.obj The original, full input SummarizedExperiment of all samples/cells
#' @param BPPARAM BiocParallelParam for parallelizing bootstrapping
#' @param bootstrap.samples How many bootstraps to run
#' @param chr Which chromosome to operate on
#' @param assay What sort of assay are we working on
#' @param parallel Whether to run the bootstrapping in parallel
#' @param cores How many cores to use for parallel processing
#' @param targets Targets to shrink towards
#' @param res The compartment resolution
#' @param genome What genome are we working on
Expand All @@ -18,7 +18,6 @@
#' @param bootstrap.means Pre-computed bootstrap means matrix
#'
#' @return Compartment estimates with summarized bootstraps and confidence intervals
#' @importFrom parallel mclapply
#' @import SummarizedExperiment
#'
#' @examples
Expand All @@ -29,17 +28,16 @@
bootstrapCompartments <- function(
obj,
original.obj,
BPPARAM,
bootstrap.samples = 1000,
chr = "chr14",
group = FALSE,
assay = c("rna", "atac", "array"),
parallel = TRUE,
cores = 2,
targets = NULL,
res = 1e6,
genome = c("hg19", "hg38", "mm9", "mm10"),
q = 0.95,
svd = NULL,
group = FALSE,
bootstrap.means = NULL
) {
# function for nonparametric bootstrap of compartments and compute 95% CIs
Expand All @@ -61,36 +59,36 @@ bootstrapCompartments <- function(
}

# if (ncol(original.obj) < 6) stop("We need more than 5 samples to bootstrap with for the results to be meaningful.")
if (parallel) {
message("Bootstrapping in parallel with ", cores, " cores.")
} else {
message("Not bootstrapping in parallel will take a long time...")
}

# bootstrap and recompute compartments
resamp.compartments <- mclapply(1:ncol(bmeans), function(b) {
# get the shrunken bins with new global mean
boot.mean <- as.matrix(bmeans[, b])
colnames(boot.mean) <- "globalMean"
s.bins <- shrinkBins(
obj,
original.obj,
prior.means = boot.mean,
chr = chr,
res = res,
assay = assay,
genome = genome
)
cor.bins <- getCorMatrix(s.bins, squeeze = !group)
BiocParallel::bpprogressbar(BPPARAM) <- FALSE
resamp.compartments <- bplapply(
1:ncol(bmeans),
function(b) {
# get the shrunken bins with new global mean
boot.mean <- as.matrix(bmeans[, b])
colnames(boot.mean) <- "globalMean"
s.bins <- shrinkBins(
obj,
original.obj,
prior.means = boot.mean,
chr = chr,
res = res,
assay = assay,
genome = genome
)
cor.bins <- getCorMatrix(s.bins, squeeze = !group)

# Stupid check for perfect correlation with global mean
if (any(is.na(cor.bins$binmat.cor))) {
absig <- matrix(rep(NA, nrow(cor.bins$binmat.cor)))
} else {
absig <- getABSignal(cor.bins, assay = assay)
}
return(absig)
}, mc.cores = ifelse(parallel, cores, 1))
# Stupid check for perfect correlation with global mean
if (any(is.na(cor.bins$binmat.cor))) {
absig <- matrix(rep(NA, nrow(cor.bins$binmat.cor)))
} else {
absig <- getABSignal(cor.bins, assay = assay)
}
return(absig)
},
BPPARAM = BPPARAM
)

# summarize the bootstraps and compute confidence intervals
resamp.compartments <- summarizeBootstraps(resamp.compartments, svd, q = q, assay = assay)
Expand Down
10 changes: 6 additions & 4 deletions R/fixCompartments.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,16 @@
#' @examples
fixCompartments <- function(obj, min.conf = 0.8, parallel = FALSE, cores = 1) {
# this function will invert or "fix" compartments based on bootstrapping results
if (is(obj, "RaggedExperiment")) obj <- condenseSE(obj, sample.name = colnames(assay(obj)))
if (is(obj, "RaggedExperiment")) {
obj <- condenseSE(obj, sample.name = colnames(assay(obj)))
}

# if we somehow only have 1 sample
if (is(obj, "GRanges")) {
return(flipper(obj, min.conf))
}

message("Fixing compartments using a minimum confidence score of ", min.conf * 100, "%")
flog.info("Fixing compartments using a minimum confidence score of %d%%", min.conf * 100)
# go through and invert compartments based on the min.conf
flip_compartments_lst <- mclapply(obj, flipper, min.conf, mc.cores = ifelse(parallel, cores, 1))
names(flip_compartments_lst) <- names(obj)
Expand All @@ -41,8 +43,8 @@ flipper <- function(input_obj, min.conf) {
stop("Bootstrapping was not performed. Cannot fix compartments.")
}

message("Assuming we only have a single sample to process.")
message("Fixing compartments using a minimum confidence score of ", min.conf * 100, "%")
flog.debug("Assuming we only have a single sample to process.")
flog.debug("Fixing compartments using a minimum confidence score of %f %%", min.conf * 100)
invert_compartments <- apply(mcols(input_obj), 1, .inverter, min.conf)
mcols(input_obj)$flip.compartment <- invert_compartments

Expand Down
6 changes: 3 additions & 3 deletions R/getABSignal.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,13 @@ getABSignal <- function(x, squeeze = FALSE, assay = c("rna", "atac", "array")) {
assay <- match.arg(assay)
gr <- x$gr

message("Calculating eigenvectors.")
flog.debug("Calculating eigenvectors.")
pc <- getSVD(x$binmat.cor, sing.vec = "right")
if (squeeze) pc <- ifisherZ(pc)

message("Smoothing eigenvector.")
flog.debug("Smoothing eigenvector.")
gr$pc <- meanSmoother(pc)
message("Done smoothing.")
flog.debug("Done smoothing.")

gr$compartments <- extractOpenClosed(gr, assay = assay)
return(gr)
Expand Down
Loading
Loading