microbiome · raivo-otus · Jan 15, 2026 · Jan 15, 2026 · Jan 15, 2026 · Jan 21, 2026
diff --git a/R/transformCounts.R b/R/transformCounts.R
@@ -36,6 +36,8 @@
 #'   to fill reference sample's column in returned assay when calculating alr.
 #'   (Default: \code{NA})
 #'   \item \code{ref_vals} Deprecated. Use \code{reference} instead.
+#'   \item \code{nbins}: \code{Numeric scalar}. For \code{"binning"}, specifies
+#'   the number of bins to use. (Default: \code{4})
 #'   \item \code{percentile}: \code{Numeric scalar} or \code{NULL} (css). Used
 #'   to set the  percentile value that calculates the scaling factors in the css
 #'   normalization. If \code{NULL}, percentile is estimated from the data by
@@ -126,6 +128,13 @@
 #' are taken into account. This method keeps only values greater than
 #' \code{threshold} and replaces all other values with \code{value}.
 #'
+#' \item 'binning': Binning of the abundance values into a specified number of
+#' bins. The non-zero values are sorted and divided into \code{nbins} groups of
+#' equal size (quantiles). The group with the highest abundances is assigned to
+#' bin \code{nbins}, while the group with the lowest non-zero abundances is
+#' assigned to bin 1. Zero values are assigned to bin 0. This binning approach
+#' is based on the binning strategy described by Medearis et al. (2026).
+#'
 #' }
 #'
 #' @return
@@ -140,6 +149,11 @@
 #' _Nature Methods_ 10, 1200–1202.
 #' doi:10.1038/nmeth.2658
 #'
+#' Medearis, N. A., Zhu, S., & Zomorrodi, A. R. (2026).
+#' BiomeGPT: A foundation model for the human gut microbiome
+#' _bioRxiv_
+#' doi:10.64898/2026.01.05.697599
+#'
 #' @seealso
 #' \itemize{
 #'   \item \code{\link[vegan:decostand]{vegan::decostand}}
@@ -269,7 +283,7 @@ setMethod("transformAssay", signature = c(x = "SingleCellExperiment"),
 .transform_assay <- function(
         x, assay.type = "counts", assay_name = NULL,
         method = c(
-            "alr", "chi.square", "clr", "css", "cutoff", "difference", "-",
+            "alr", "binning", "chi.square", "clr", "css", "cutoff", "difference", "-",
             "division", "/", "frequency", "hellinger", "invnorm", "log",
             "log10", "log2", "max", "normalize", "pa", "philr", "pseudocount",
             "range", "rank", "rclr", "relabundance", "rrank", "standardize",
@@ -325,8 +339,8 @@ setMethod("transformAssay", signature = c(x = "SingleCellExperiment"),
     attr(assay, "pseudocount") <- NULL
     # Calls help function that does the transformation
     # Help function is different for mia and vegan transformations
-    if( method %in% c(
-            "log10", "log2", "css", "difference", "division", "invnorm") ){
+    if( method %in% c("binning", "log10", "log2", "css", "difference",
+            "division", "invnorm") ){
         transformed_table <- .apply_transformation(
             assay, method, MARGIN, ...)
     } else if( method %in% c("philr") ){
@@ -360,6 +374,7 @@ setMethod("transformAssay", signature = c(x = "SingleCellExperiment"),
     # Function is selected based on the "method" variable
     FUN <- switch(
         method,
+        binning = .apply_binning,
         log10 = .calc_log,
         log2 = .calc_log,
         css = .calc_css,
@@ -907,6 +922,67 @@ NULL
     return(res)
 }
 
+################################ .apply_binning ################################
+# This function divides the data into a specified number of bins.
+.apply_binning <- function(mat, nbins = 4, ...){
+    # Check that nbins is a single positive numeric value
+    if( !.is_a_numeric(nbins) || nbins <= 0 ){
+        stop("'nbins' must be a single positive numeric value.", call. = FALSE)
+    }
+    nbins <- as.integer(nbins)
+
+    # Check does not contain negative numeric values
+    if( !all(mat >= 0) ) {
+        stop("The assay contains negative numeric values. Resulting bins will not make sense.", call. = FALSE)
+    }
+
+    # Apply binning
+    res <- apply(mat, MARGIN = 2, function(x) {
+        # Initialize result with 0 (for zero values)
+        res <- rep(0, length(x))
+
+        # Identify non-zero values
+        is_nonzero <- x != 0
+        n_nonzero <- sum(is_nonzero)
+
+        if (n_nonzero > 0) {
+            # Get indices of non-zero values
+            nonzero_indices <- which(is_nonzero)
+            nonzero_values <- x[nonzero_indices]
+
+            # Sort indices based on values (descending)
+            ord <- order(nonzero_values, decreasing = TRUE)
+
+            # Calculate bin assignments
+            if( n_nonzero < nbins ){
+                # For samples with fewer than B non-zero abundance species,
+                # species are distributed proportionally across nbins 1 through B
+                bin_values <- round(seq(from = nbins, to = 1, length.out = n_nonzero))
+            } else {
+                # Get cut points
+                bin_idx <- cut(
+                    seq_len(n_nonzero),
+                    breaks = seq(0, n_nonzero, length.out = nbins + 1),
+                    labels = FALSE
+                )
+                bin_values <- nbins - bin_idx + 1
+            }
+
+            # Assign back
+            res[nonzero_indices[ord]] <- bin_values
+        }
+        return(res)
+    })
+
+    # Ensure dimensions are preserved (apply simplifies to vector if dim is 1)
+    if( is.null(dim(res)) && !is.null(dim(mat)) ){
+        dim(res) <- dim(mat)
+    }
+    dimnames(res) <- dimnames(mat)
+
+    return(res)
+}
+
 # This function is used to add transformed table back to TreeSE. With most of
 # the methods it is simple: it is added to assay. However, with transformations
 # that change the dimensionality (e.g. philr, difference, division), the

diff --git a/man/transformAssay.Rd b/man/transformAssay.Rd
diff --git a/tests/testthat/test-5transformCounts.R b/tests/testthat/test-5transformCounts.R
@@ -423,6 +423,54 @@ test_that("transformAssay", {
             "'value' must be a single numeric value or NA"
         )
 
+        ############################## BINNING ###############################
+        # Test that binning transformation works
+        tse_bin <- transformAssay(tse, method = "binning", nbins = 3)
+
+        # Expect warning trying to bin negative values
+        tse <- transformAssay(tse, method = "rclr")
+        expect_error(transformAssay(tse, method = "binning", assay.type = "rclr"))
+
+        # Check that the assay was created
+        expect_true("binning" %in% assayNames(tse_bin))
+
+        # Check that values are between 0 and 3
+        binned_assay <- assay(tse_bin, "binning")
+        expect_true(all(binned_assay >= 0 & binned_assay <= 3, na.rm = TRUE))
+
+        # Check that 0s are 0
+        counts <- assay(tse, "counts")
+        expect_true(all(binned_assay[counts == 0] == 0))
+
+        # Check non-zeros are > 0
+        expect_true(all(binned_assay[counts != 0] > 0))
+
+        # Manual check for N < B case
+        # 2 non-zero values, 4 nbins. Should map to 4 and 1.
+        test_mat <- matrix(c(10, 5, 0, 0), ncol=1)
+        tse_test <- SummarizedExperiment(assays = list(counts = test_mat))
+        tse_test <- transformAssay(tse_test, method = "binning", nbins = 4)
+        expect_equal(as.vector(assay(tse_test, "binning")), c(4, 1, 0, 0))
+
+        # Test error
+        expect_error(transformAssay(tse, method = "binning", nbins = "a"))
+        expect_error(transformAssay(tse, method = "binning", nbins = 0))
+
+        # Test feature-wise binning
+        tse_bin_feat <- transformAssay(tse, method = "binning", nbins = 3, MARGIN = "features")
+        expect_equal(dim(assay(tse_bin_feat, "binning")), dim(assay(tse, "counts")))
+
+        # Manual check for feature-wise
+        # 2 features, 3 samples
+        mat_feat <- matrix(c(10, 5, 0, 20, 0, 10), nrow=2, byrow=TRUE)
+        tse_feat <- SummarizedExperiment(assays = list(counts = mat_feat))
+        tse_feat <- transformAssay(tse_feat, method = "binning", nbins = 3, MARGIN = "features")
+        res_feat <- assay(tse_feat, "binning")
+
+        # Check rows (features)
+        expect_equal(as.vector(res_feat[1,]), c(3, 1, 0))
+        expect_equal(as.vector(res_feat[2,]), c(3, 0, 1))
+
 	      ############################## DIFFERENCE #############################
         # Test that difference transformation works on GlobalPatterns subset
         # Load data