Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 79 additions & 3 deletions R/transformCounts.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
#' to fill reference sample's column in returned assay when calculating alr.
#' (Default: \code{NA})
#' \item \code{ref_vals} Deprecated. Use \code{reference} instead.
#' \item \code{nbins}: \code{Numeric scalar}. For \code{"binning"}, specifies
#' the number of bins to use. (Default: \code{4})
#' \item \code{percentile}: \code{Numeric scalar} or \code{NULL} (css). Used
#' to set the percentile value that calculates the scaling factors in the css
#' normalization. If \code{NULL}, percentile is estimated from the data by
Expand Down Expand Up @@ -126,6 +128,13 @@
#' are taken into account. This method keeps only values greater than
#' \code{threshold} and replaces all other values with \code{value}.
#'
#' \item 'binning': Binning of the abundance values into a specified number of
#' bins. The non-zero values are sorted and divided into \code{nbins} groups of
#' equal size (quantiles). The group with the highest abundances is assigned to
#' bin \code{nbins}, while the group with the lowest non-zero abundances is
#' assigned to bin 1. Zero values are assigned to bin 0. This binning approach
#' is based on the binning strategy described by Medearis et al. (2026).
#'
#' }
#'
#' @return
Expand All @@ -140,6 +149,11 @@
#' _Nature Methods_ 10, 1200–1202.
#' doi:10.1038/nmeth.2658
#'
#' Medearis, N. A., Zhu, S., & Zomorrodi, A. R. (2026).
#' BiomeGPT: A foundation model for the human gut microbiome
#' _bioRxiv_
#' doi:10.64898/2026.01.05.697599
#'
#' @seealso
#' \itemize{
#' \item \code{\link[vegan:decostand]{vegan::decostand}}
Expand Down Expand Up @@ -269,7 +283,7 @@ setMethod("transformAssay", signature = c(x = "SingleCellExperiment"),
.transform_assay <- function(
x, assay.type = "counts", assay_name = NULL,
method = c(
"alr", "chi.square", "clr", "css", "cutoff", "difference", "-",
"alr", "binning", "chi.square", "clr", "css", "cutoff", "difference", "-",
"division", "/", "frequency", "hellinger", "invnorm", "log",
"log10", "log2", "max", "normalize", "pa", "philr", "pseudocount",
"range", "rank", "rclr", "relabundance", "rrank", "standardize",
Expand Down Expand Up @@ -325,8 +339,8 @@ setMethod("transformAssay", signature = c(x = "SingleCellExperiment"),
attr(assay, "pseudocount") <- NULL
# Calls help function that does the transformation
# Help function is different for mia and vegan transformations
if( method %in% c(
"log10", "log2", "css", "difference", "division", "invnorm") ){
if( method %in% c("binning", "log10", "log2", "css", "difference",
"division", "invnorm") ){
transformed_table <- .apply_transformation(
assay, method, MARGIN, ...)
} else if( method %in% c("philr") ){
Expand Down Expand Up @@ -360,6 +374,7 @@ setMethod("transformAssay", signature = c(x = "SingleCellExperiment"),
# Function is selected based on the "method" variable
FUN <- switch(
method,
binning = .apply_binning,
log10 = .calc_log,
log2 = .calc_log,
css = .calc_css,
Expand Down Expand Up @@ -907,6 +922,67 @@ NULL
return(res)
}

################################ .apply_binning ################################
# This function divides the data into a specified number of bins.
.apply_binning <- function(mat, nbins = 4, ...){
# Check that nbins is a single positive numeric value
if( !.is_a_numeric(nbins) || nbins <= 0 ){
stop("'nbins' must be a single positive numeric value.", call. = FALSE)
}
nbins <- as.integer(nbins)

# Check does not contain negative numeric values
if( !all(mat >= 0) ) {
stop("The assay contains negative numeric values. Resulting bins will not make sense.", call. = FALSE)
}

# Apply binning
res <- apply(mat, MARGIN = 2, function(x) {
# Initialize result with 0 (for zero values)
res <- rep(0, length(x))

# Identify non-zero values
is_nonzero <- x != 0
n_nonzero <- sum(is_nonzero)

if (n_nonzero > 0) {
# Get indices of non-zero values
nonzero_indices <- which(is_nonzero)
nonzero_values <- x[nonzero_indices]

# Sort indices based on values (descending)
ord <- order(nonzero_values, decreasing = TRUE)

# Calculate bin assignments
if( n_nonzero < nbins ){
# For samples with fewer than B non-zero abundance species,
# species are distributed proportionally across nbins 1 through B
bin_values <- round(seq(from = nbins, to = 1, length.out = n_nonzero))
} else {
# Get cut points
bin_idx <- cut(
seq_len(n_nonzero),
breaks = seq(0, n_nonzero, length.out = nbins + 1),
labels = FALSE
)
bin_values <- nbins - bin_idx + 1
}

# Assign back
res[nonzero_indices[ord]] <- bin_values
}
return(res)
})

# Ensure dimensions are preserved (apply simplifies to vector if dim is 1)
if( is.null(dim(res)) && !is.null(dim(mat)) ){
dim(res) <- dim(mat)
}
dimnames(res) <- dimnames(mat)

return(res)
}

# This function is used to add transformed table back to TreeSE. With most of
# the methods it is simple: it is added to assay. However, with transformations
# that change the dimensionality (e.g. philr, difference, division), the
Expand Down
2 changes: 2 additions & 0 deletions man/transformAssay.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

48 changes: 48 additions & 0 deletions tests/testthat/test-5transformCounts.R
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,54 @@ test_that("transformAssay", {
"'value' must be a single numeric value or NA"
)

############################## BINNING ###############################
# Test that binning transformation works
tse_bin <- transformAssay(tse, method = "binning", nbins = 3)

# Expect warning trying to bin negative values
tse <- transformAssay(tse, method = "rclr")
expect_error(transformAssay(tse, method = "binning", assay.type = "rclr"))

# Check that the assay was created
expect_true("binning" %in% assayNames(tse_bin))

# Check that values are between 0 and 3
binned_assay <- assay(tse_bin, "binning")
expect_true(all(binned_assay >= 0 & binned_assay <= 3, na.rm = TRUE))

# Check that 0s are 0
counts <- assay(tse, "counts")
expect_true(all(binned_assay[counts == 0] == 0))

# Check non-zeros are > 0
expect_true(all(binned_assay[counts != 0] > 0))

# Manual check for N < B case
# 2 non-zero values, 4 nbins. Should map to 4 and 1.
test_mat <- matrix(c(10, 5, 0, 0), ncol=1)
tse_test <- SummarizedExperiment(assays = list(counts = test_mat))
tse_test <- transformAssay(tse_test, method = "binning", nbins = 4)
expect_equal(as.vector(assay(tse_test, "binning")), c(4, 1, 0, 0))

# Test error
expect_error(transformAssay(tse, method = "binning", nbins = "a"))
expect_error(transformAssay(tse, method = "binning", nbins = 0))

# Test feature-wise binning
tse_bin_feat <- transformAssay(tse, method = "binning", nbins = 3, MARGIN = "features")
expect_equal(dim(assay(tse_bin_feat, "binning")), dim(assay(tse, "counts")))

# Manual check for feature-wise
# 2 features, 3 samples
mat_feat <- matrix(c(10, 5, 0, 20, 0, 10), nrow=2, byrow=TRUE)
tse_feat <- SummarizedExperiment(assays = list(counts = mat_feat))
tse_feat <- transformAssay(tse_feat, method = "binning", nbins = 3, MARGIN = "features")
res_feat <- assay(tse_feat, "binning")

# Check rows (features)
expect_equal(as.vector(res_feat[1,]), c(3, 1, 0))
expect_equal(as.vector(res_feat[2,]), c(3, 0, 1))

############################## DIFFERENCE #############################
# Test that difference transformation works on GlobalPatterns subset
# Load data
Expand Down
Loading