From 6a3d3ac972f96ccae8b391bd692718cc563e9faf Mon Sep 17 00:00:00 2001
From: raivo-otus <hindstrom.rasmus@gmail.com>
Date: Thu, 15 Jan 2026 15:13:32 +0200
Subject: [PATCH 01/11] Add implementation of binning transformation

---
 R/transformCounts.R | 62 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 59 insertions(+), 3 deletions(-)

diff --git a/R/transformCounts.R b/R/transformCounts.R
index 40bdbbef3..857ce1a53 100644
--- a/R/transformCounts.R
+++ b/R/transformCounts.R
@@ -36,6 +36,8 @@
 #'   to fill reference sample's column in returned assay when calculating alr.
 #'   (Default: \code{NA})
 #'   \item \code{ref_vals} Deprecated. Use \code{reference} instead.
+#'   \item \code{bins}: \code{Numeric scalar}. For \code{"binning"}, specifies
+#'   the number of bins to use. (Default: \code{4})
 #'   \item \code{percentile}: \code{Numeric scalar} or \code{NULL} (css). Used
 #'   to set the  percentile value that calculates the scaling factors in the css
 #'   normalization. If \code{NULL}, percentile is estimated from the data by
@@ -269,7 +271,7 @@ setMethod("transformAssay", signature = c(x = "SingleCellExperiment"),
 .transform_assay <- function(
         x, assay.type = "counts", assay_name = NULL,
         method = c(
-            "alr", "chi.square", "clr", "css", "cutoff", "difference", "-",
+            "alr", "binning", "chi.square", "clr", "css", "cutoff", "difference", "-",
             "division", "/", "frequency", "hellinger", "invnorm", "log",
             "log10", "log2", "max", "normalize", "pa", "philr", "pseudocount",
             "range", "rank", "rclr", "relabundance", "rrank", "standardize",
@@ -325,8 +327,8 @@ setMethod("transformAssay", signature = c(x = "SingleCellExperiment"),
     attr(assay, "pseudocount") <- NULL
     # Calls help function that does the transformation
     # Help function is different for mia and vegan transformations
-    if( method %in% c(
-            "log10", "log2", "css", "difference", "division", "invnorm") ){
+    if( method %in% c("binning", "log10", "log2", "css", "difference",
+            "division", "invnorm") ){
         transformed_table <- .apply_transformation(
             assay, method, MARGIN, ...)
     } else if( method %in% c("philr") ){
@@ -360,6 +362,7 @@ setMethod("transformAssay", signature = c(x = "SingleCellExperiment"),
     # Function is selected based on the "method" variable
     FUN <- switch(
         method,
+        binning = .apply_binning,
         log10 = .calc_log,
         log2 = .calc_log,
         css = .calc_css,
@@ -907,6 +910,59 @@ NULL
     return(res)
 }
 
+################################ .apply_binning ################################
+# This function divides the data into a specified number of bins.
+.apply_binning <- function(mat, bins = 4, ...){
+    # Check that bins is a single positive numeric value
+    if( !.is_a_numeric(bins) || bins <= 0 ){
+        stop("'bins' must be a single positive numeric value.", call. = FALSE)
+    }
+    bins <- as.integer(bins)
+
+    # Apply binning
+    res <- apply(mat, MARGIN = 2, function(x) {
+        # Initialize result with 0 (for zero values)
+        res <- rep(0, length(x))
+
+        # Identify non-zero values
+        is_nonzero <- x != 0
+        n_nonzero <- sum(is_nonzero)
+
+        if (n_nonzero > 0) {
+            # Get indices of non-zero values
+            nonzero_indices <- which(is_nonzero)
+            nonzero_values <- x[nonzero_indices]
+
+            # Sort indices based on values (descending)
+            ord <- order(nonzero_values, decreasing = TRUE)
+
+            # Calculate bin assignments
+            if( n_nonzero < bins ){
+                # For samples with fewer than B non-zero abundance species,
+                # species are distributed proportionally across bins 1 through B
+                bin_values <- round(seq(from = bins, to = 1, length.out = n_nonzero))
+            } else {
+                # Ranks 1 to n_nonzero
+                # Formula: bin = B - ceiling(rank * B / N) + 1
+                ranks <- seq_len(n_nonzero)
+                bin_values <- bins - ceiling(ranks * bins / n_nonzero) + 1
+            }
+
+            # Assign back
+            res[nonzero_indices[ord]] <- bin_values
+        }
+        return(res)
+    })
+
+    # Ensure dimensions are preserved (apply simplifies to vector if dim is 1)
+    if( is.null(dim(res)) && !is.null(dim(mat)) ){
+        dim(res) <- dim(mat)
+    }
+    dimnames(res) <- dimnames(mat)
+
+    return(res)
+}
+
 # This function is used to add transformed table back to TreeSE. With most of
 # the methods it is simple: it is added to assay. However, with transformations
 # that change the dimensionality (e.g. philr, difference, division), the

From b7b03ddb406a8855ba82ca25b7c193b694c327e4 Mon Sep 17 00:00:00 2001
From: raivo-otus <hindstrom.rasmus@gmail.com>
Date: Thu, 15 Jan 2026 15:14:19 +0200
Subject: [PATCH 02/11] Add unit tests for binning transformation

---
 tests/testthat/test-5transformCounts.R | 43 ++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/tests/testthat/test-5transformCounts.R b/tests/testthat/test-5transformCounts.R
index bb9587179..be536104c 100644
--- a/tests/testthat/test-5transformCounts.R
+++ b/tests/testthat/test-5transformCounts.R
@@ -423,6 +423,49 @@ test_that("transformAssay", {
             "'value' must be a single numeric value or NA"
         )
 
+        ############################## BINNING ###############################
+        # Test that binning transformation works
+        tse_bin <- transformAssay(tse, method = "binning", bins = 3)
+        # Check that the assay was created
+        expect_true("binning" %in% assayNames(tse_bin))
+
+        # Check that values are between 0 and 3
+        binned_assay <- assay(tse_bin, "binning")
+        expect_true(all(binned_assay >= 0 & binned_assay <= 3, na.rm = TRUE))
+
+        # Check that 0s are 0
+        counts <- assay(tse, "counts")
+        expect_true(all(binned_assay[counts == 0] == 0))
+
+        # Check non-zeros are > 0
+        expect_true(all(binned_assay[counts != 0] > 0))
+
+        # Manual check for N < B case
+        # 2 non-zero values, 4 bins. Should map to 4 and 1.
+        test_mat <- matrix(c(10, 5, 0, 0), ncol=1)
+        tse_test <- SummarizedExperiment(assays = list(counts = test_mat))
+        tse_test <- transformAssay(tse_test, method = "binning", bins = 4)
+        expect_equal(as.vector(assay(tse_test, "binning")), c(4, 1, 0, 0))
+
+        # Test error
+        expect_error(transformAssay(tse, method = "binning", bins = "a"))
+        expect_error(transformAssay(tse, method = "binning", bins = 0))
+
+        # Test feature-wise binning
+        tse_bin_feat <- transformAssay(tse, method = "binning", bins = 3, MARGIN = "features")
+        expect_equal(dim(assay(tse_bin_feat, "binning")), dim(assay(tse, "counts")))
+
+        # Manual check for feature-wise
+        # 2 features, 3 samples
+        mat_feat <- matrix(c(10, 5, 0, 20, 0, 10), nrow=2, byrow=TRUE)
+        tse_feat <- SummarizedExperiment(assays = list(counts = mat_feat))
+        tse_feat <- transformAssay(tse_feat, method = "binning", bins = 3, MARGIN = "features")
+        res_feat <- assay(tse_feat, "binning")
+
+        # Check rows (features)
+        expect_equal(as.vector(res_feat[1,]), c(3, 1, 0))
+        expect_equal(as.vector(res_feat[2,]), c(3, 0, 1))
+
 	      ############################## DIFFERENCE #############################
         # Test that difference transformation works on GlobalPatterns subset
         # Load data

From a586a75fe0510d547a430b15d9347556064e98d0 Mon Sep 17 00:00:00 2001
From: raivo-otus <hindstrom.rasmus@gmail.com>
Date: Wed, 21 Jan 2026 13:59:32 +0200
Subject: [PATCH 03/11] rename argument bins to nbins

this renaming alings with other functions in the package
---
 R/transformCounts.R | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/R/transformCounts.R b/R/transformCounts.R
index 857ce1a53..56d7d4e53 100644
--- a/R/transformCounts.R
+++ b/R/transformCounts.R
@@ -912,12 +912,12 @@ NULL
 
 ################################ .apply_binning ################################
 # This function divides the data into a specified number of bins.
-.apply_binning <- function(mat, bins = 4, ...){
-    # Check that bins is a single positive numeric value
-    if( !.is_a_numeric(bins) || bins <= 0 ){
-        stop("'bins' must be a single positive numeric value.", call. = FALSE)
+.apply_binning <- function(mat, nbins = 4, ...){
+    # Check that nbins is a single positive numeric value
+    if( !.is_a_numeric(nbins) || nbins <= 0 ){
+        stop("'nbins' must be a single positive numeric value.", call. = FALSE)
     }
-    bins <- as.integer(bins)
+    nbins <- as.integer(nbins)
 
     # Apply binning
     res <- apply(mat, MARGIN = 2, function(x) {
@@ -937,15 +937,15 @@ NULL
             ord <- order(nonzero_values, decreasing = TRUE)
 
             # Calculate bin assignments
-            if( n_nonzero < bins ){
+            if( n_nonzero < nbins ){
                 # For samples with fewer than B non-zero abundance species,
-                # species are distributed proportionally across bins 1 through B
-                bin_values <- round(seq(from = bins, to = 1, length.out = n_nonzero))
+                # species are distributed proportionally across nbins 1 through B
+                bin_values <- round(seq(from = nbins, to = 1, length.out = n_nonzero))
             } else {
                 # Ranks 1 to n_nonzero
                 # Formula: bin = B - ceiling(rank * B / N) + 1
                 ranks <- seq_len(n_nonzero)
-                bin_values <- bins - ceiling(ranks * bins / n_nonzero) + 1
+                bin_values <- nbins - ceiling(ranks * nbins / n_nonzero) + 1
             }
 
             # Assign back

From 0e0a48d700cd6f3d101d7a67afad12a6492496bc Mon Sep 17 00:00:00 2001
From: raivo-otus <hindstrom.rasmus@gmail.com>
Date: Wed, 21 Jan 2026 14:01:07 +0200
Subject: [PATCH 04/11] missed rename of bins -> nbins

---
 R/transformCounts.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/transformCounts.R b/R/transformCounts.R
index 56d7d4e53..7cdf96761 100644
--- a/R/transformCounts.R
+++ b/R/transformCounts.R
@@ -36,7 +36,7 @@
 #'   to fill reference sample's column in returned assay when calculating alr.
 #'   (Default: \code{NA})
 #'   \item \code{ref_vals} Deprecated. Use \code{reference} instead.
-#'   \item \code{bins}: \code{Numeric scalar}. For \code{"binning"}, specifies
+#'   \item \code{nbins}: \code{Numeric scalar}. For \code{"binning"}, specifies
 #'   the number of bins to use. (Default: \code{4})
 #'   \item \code{percentile}: \code{Numeric scalar} or \code{NULL} (css). Used
 #'   to set the  percentile value that calculates the scaling factors in the css

From 4691df1b74a756de100607a8dd47e4fb4d9d4486 Mon Sep 17 00:00:00 2001
From: raivo-otus <hindstrom.rasmus@gmail.com>
Date: Wed, 21 Jan 2026 14:42:18 +0200
Subject: [PATCH 05/11] rename bins to nbins in unit test aswell

---
 man/transformAssay.Rd                  |  2 ++
 tests/testthat/test-5transformCounts.R | 14 +++++++-------
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/man/transformAssay.Rd b/man/transformAssay.Rd
index e13f18532..a9b543ebf 100644
--- a/man/transformAssay.Rd
+++ b/man/transformAssay.Rd
@@ -31,6 +31,8 @@ or \code{philr::philr}.
 to fill reference sample's column in returned assay when calculating alr.
 (Default: \code{NA})
 \item \code{ref_vals} Deprecated. Use \code{reference} instead.
+\item \code{nbins}: \code{Numeric scalar}. For \code{"binning"}, specifies
+the number of bins to use. (Default: \code{4})
 \item \code{percentile}: \code{Numeric scalar} or \code{NULL} (css). Used
 to set the  percentile value that calculates the scaling factors in the css
 normalization. If \code{NULL}, percentile is estimated from the data by
diff --git a/tests/testthat/test-5transformCounts.R b/tests/testthat/test-5transformCounts.R
index be536104c..487ec4036 100644
--- a/tests/testthat/test-5transformCounts.R
+++ b/tests/testthat/test-5transformCounts.R
@@ -425,7 +425,7 @@ test_that("transformAssay", {
 
         ############################## BINNING ###############################
         # Test that binning transformation works
-        tse_bin <- transformAssay(tse, method = "binning", bins = 3)
+        tse_bin <- transformAssay(tse, method = "binning", nbins = 3)
         # Check that the assay was created
         expect_true("binning" %in% assayNames(tse_bin))
 
@@ -441,25 +441,25 @@ test_that("transformAssay", {
         expect_true(all(binned_assay[counts != 0] > 0))
 
         # Manual check for N < B case
-        # 2 non-zero values, 4 bins. Should map to 4 and 1.
+        # 2 non-zero values, 4 nbins. Should map to 4 and 1.
         test_mat <- matrix(c(10, 5, 0, 0), ncol=1)
         tse_test <- SummarizedExperiment(assays = list(counts = test_mat))
-        tse_test <- transformAssay(tse_test, method = "binning", bins = 4)
+        tse_test <- transformAssay(tse_test, method = "binning", nbins = 4)
         expect_equal(as.vector(assay(tse_test, "binning")), c(4, 1, 0, 0))
 
         # Test error
-        expect_error(transformAssay(tse, method = "binning", bins = "a"))
-        expect_error(transformAssay(tse, method = "binning", bins = 0))
+        expect_error(transformAssay(tse, method = "binning", nbins = "a"))
+        expect_error(transformAssay(tse, method = "binning", nbins = 0))
 
         # Test feature-wise binning
-        tse_bin_feat <- transformAssay(tse, method = "binning", bins = 3, MARGIN = "features")
+        tse_bin_feat <- transformAssay(tse, method = "binning", nbins = 3, MARGIN = "features")
         expect_equal(dim(assay(tse_bin_feat, "binning")), dim(assay(tse, "counts")))
 
         # Manual check for feature-wise
         # 2 features, 3 samples
         mat_feat <- matrix(c(10, 5, 0, 20, 0, 10), nrow=2, byrow=TRUE)
         tse_feat <- SummarizedExperiment(assays = list(counts = mat_feat))
-        tse_feat <- transformAssay(tse_feat, method = "binning", bins = 3, MARGIN = "features")
+        tse_feat <- transformAssay(tse_feat, method = "binning", nbins = 3, MARGIN = "features")
         res_feat <- assay(tse_feat, "binning")
 
         # Check rows (features)

From 6ed957cb33c9c350a9e42e55f5665ee3b91e116a Mon Sep 17 00:00:00 2001
From: raivo-otus <hindstrom.rasmus@gmail.com>
Date: Wed, 21 Jan 2026 14:52:58 +0200
Subject: [PATCH 06/11] Add unittest for binning negative values

---
 R/transformCounts.R                    | 5 +++++
 tests/testthat/test-5transformCounts.R | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/R/transformCounts.R b/R/transformCounts.R
index 7cdf96761..f30e5cc50 100644
--- a/R/transformCounts.R
+++ b/R/transformCounts.R
@@ -919,6 +919,11 @@ NULL
     }
     nbins <- as.integer(nbins)
 
+    # Check does not contain negative numeric values
+    if( !all(mat >= 0) ) {
+        warning("The assay contains negative numeric values. Resulting bins will not make sense.")
+    }
+
     # Apply binning
     res <- apply(mat, MARGIN = 2, function(x) {
         # Initialize result with 0 (for zero values)
diff --git a/tests/testthat/test-5transformCounts.R b/tests/testthat/test-5transformCounts.R
index 487ec4036..5f5494010 100644
--- a/tests/testthat/test-5transformCounts.R
+++ b/tests/testthat/test-5transformCounts.R
@@ -426,6 +426,11 @@ test_that("transformAssay", {
         ############################## BINNING ###############################
         # Test that binning transformation works
         tse_bin <- transformAssay(tse, method = "binning", nbins = 3)
+
+        # Expect warning trying to bin negative values
+        tse <- transformAssay(tse, method = "rclr")
+        expect_warning(transformAssay(tse, method = "binning", assay.type = "rclr"))
+
         # Check that the assay was created
         expect_true("binning" %in% assayNames(tse_bin))
 

From 9a4f8646496cc91fe4881e88e6488b55c61c8a46 Mon Sep 17 00:00:00 2001
From: raivo-otus <hindstrom.rasmus@gmail.com>
Date: Thu, 22 Jan 2026 11:49:51 +0200
Subject: [PATCH 07/11] add mention of option to function documentation

---
 R/transformCounts.R | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/R/transformCounts.R b/R/transformCounts.R
index f30e5cc50..fa3e80c4b 100644
--- a/R/transformCounts.R
+++ b/R/transformCounts.R
@@ -128,6 +128,12 @@
 #' are taken into account. This method keeps only values greater than
 #' \code{threshold} and replaces all other values with \code{value}.
 #'
+#' \item 'binning': Binning of the abundance values into a specified number of
+#' bins. The non-zero values are sorted and divided into \code{nbins} groups of
+#' equal size (quantiles). The group with the highest abundances is assigned to
+#' bin \code{nbins}, while the group with the lowest non-zero abundances is
+#' assigned to bin 1. Zero values are assigned to bin 0.
+#'
 #' }
 #'
 #' @return

From 3d7cef6fa32f824d76e91fd622c4ffa5fa1d3509 Mon Sep 17 00:00:00 2001
From: raivo-otus <hindstrom.rasmus@gmail.com>
Date: Thu, 22 Jan 2026 12:04:56 +0200
Subject: [PATCH 08/11] warning to error on attempt to bin negative values

---
 R/transformCounts.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/transformCounts.R b/R/transformCounts.R
index fa3e80c4b..9bdbf0a86 100644
--- a/R/transformCounts.R
+++ b/R/transformCounts.R
@@ -927,7 +927,7 @@ NULL
 
     # Check does not contain negative numeric values
     if( !all(mat >= 0) ) {
-        warning("The assay contains negative numeric values. Resulting bins will not make sense.")
+        stop("The assay contains negative numeric values. Resulting bins will not make sense.", call. = FALSE)
     }
 
     # Apply binning

From 3cd1f4034b3c8e55ece3653aec913545f587fe51 Mon Sep 17 00:00:00 2001
From: raivo-otus <hindstrom.rasmus@gmail.com>
Date: Thu, 22 Jan 2026 13:21:16 +0200
Subject: [PATCH 09/11] fix test

---
 tests/testthat/test-5transformCounts.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/testthat/test-5transformCounts.R b/tests/testthat/test-5transformCounts.R
index 5f5494010..e61f0068a 100644
--- a/tests/testthat/test-5transformCounts.R
+++ b/tests/testthat/test-5transformCounts.R
@@ -429,7 +429,7 @@ test_that("transformAssay", {
 
         # Expect warning trying to bin negative values
         tse <- transformAssay(tse, method = "rclr")
-        expect_warning(transformAssay(tse, method = "binning", assay.type = "rclr"))
+        expect_error(transformAssay(tse, method = "binning", assay.type = "rclr"))
 
         # Check that the assay was created
         expect_true("binning" %in% assayNames(tse_bin))

From 1d4c092a769067ed31473f6fbe7deae72f7c115c Mon Sep 17 00:00:00 2001
From: raivo-otus <hindstrom.rasmus@gmail.com>
Date: Tue, 10 Feb 2026 15:56:23 +0200
Subject: [PATCH 10/11] add lit ref

---
 R/transformCounts.R | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/R/transformCounts.R b/R/transformCounts.R
index 9bdbf0a86..6e1b37128 100644
--- a/R/transformCounts.R
+++ b/R/transformCounts.R
@@ -132,7 +132,8 @@
 #' bins. The non-zero values are sorted and divided into \code{nbins} groups of
 #' equal size (quantiles). The group with the highest abundances is assigned to
 #' bin \code{nbins}, while the group with the lowest non-zero abundances is
-#' assigned to bin 1. Zero values are assigned to bin 0.
+#' assigned to bin 1. Zero values are assigned to bin 0. This binning approach
+#' is based on the binning strategy described by Medearis et al. (2026).
 #'
 #' }
 #'
@@ -148,6 +149,11 @@
 #' _Nature Methods_ 10, 1200–1202.
 #' doi:10.1038/nmeth.2658
 #'
+#' Medearis, N. A., Zhu, S., & Zomorrodi, A. R. (2026).
+#' BiomeGPT: A foundation model for the human gut microbiome
+#' _bioRxiv_
+#' doi:10.64898/2026.01.05.697599
+#'
 #' @seealso
 #' \itemize{
 #'   \item \code{\link[vegan:decostand]{vegan::decostand}}

From 7597721ceeb41133d116adb5f876a4a3c8da2878 Mon Sep 17 00:00:00 2001
From: raivo-otus <hindstrom.rasmus@gmail.com>
Date: Tue, 10 Feb 2026 16:18:35 +0200
Subject: [PATCH 11/11] implementation of logic with cut

---
 R/transformCounts.R | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/R/transformCounts.R b/R/transformCounts.R
index 6e1b37128..9cb7d49cc 100644
--- a/R/transformCounts.R
+++ b/R/transformCounts.R
@@ -959,10 +959,13 @@ NULL
                 # species are distributed proportionally across nbins 1 through B
                 bin_values <- round(seq(from = nbins, to = 1, length.out = n_nonzero))
             } else {
-                # Ranks 1 to n_nonzero
-                # Formula: bin = B - ceiling(rank * B / N) + 1
-                ranks <- seq_len(n_nonzero)
-                bin_values <- nbins - ceiling(ranks * nbins / n_nonzero) + 1
+                # Get cut points
+                bin_idx <- cut(
+                    seq_len(n_nonzero),
+                    breaks = seq(0, n_nonzero, length.out = nbins + 1),
+                    labels = FALSE
+                )
+                bin_values <- nbins - bin_idx + 1
             }
 
             # Assign back