Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions R/shrinkBins.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,12 @@ shrinkBins <- function(
}

# get the prior means
prior.means <- prior.means %||% getGlobalMeans(
obj = original.x,
targets = targets,
assay = assay
)
prior.means <- prior.means %||%
getGlobalMeans(
obj = original.x,
targets = targets,
assay = assay
)

is.atac_or_rna <- assay %in% c("atac", "rna")
input.fun <- if (jse) {
Expand All @@ -84,7 +85,7 @@ shrinkBins <- function(

# bin the input
bin.mat <- getBinMatrix(
mat = as.matrix(cbind(input.assay, prior.means)),
mat = cbind(input.assay, prior.means),
genloc = rowRanges(x),
chr = chr,
res = res,
Expand Down
32 changes: 9 additions & 23 deletions R/transformTFIDF.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,8 @@
#'
#' @param mat n x p input matrix (n = samples/cells; p = rna counts)
#' @param scale.factor Scaling factor for the term-frequency (TF)
#' @param count.min The minimum expression count used for TF-IDF. Binarizes
#' when `count.min` = 0 and `count.max` = 1.
#' @param count.max The maximum expression count used for TF-IDF. Binarizes
#' when `count.min` = 0 and `count.max` = 1.
#' binarizes the matrix. A `cap` value greater than 1 will cap counts at that
#' value.
#' @param binarize Whether to binarize the input matrix: any value > 0 is set
#' to 1
#'
#' @return A TF-IDF transformed matrix of the same dimensions as the input
#'
Expand All @@ -26,9 +22,7 @@
#' tfidf <- transformTFIDF(mat)
#'
#' @export
transformTFIDF <- function(mat, scale.factor = 1e5, count.min = 0, count.max = 1) {
stopifnot("'count.min' must be less than 'count.max'" = count.min < count.max)

transformTFIDF <- function(mat, scale.factor = 1e5, binarize = FALSE) {
if (!is(mat, "matrix") & !is(mat, "Matrix")) {
stop("Input needs to be a matrix.")
}
Expand All @@ -41,25 +35,17 @@ transformTFIDF <- function(mat, scale.factor = 1e5, count.min = 0, count.max = 1
mat.capped <- t(Matrix(mat, sparse = TRUE))
}

# constrain the matrix
mat.capped@x <- .constrain(mat.capped@x, count.min, count.max)
if (binarize) {
mat.capped@x <- .binarize(mat.capped@x)
}
tf <- t(t(mat.capped) / Matrix::colSums(mat.capped)) # compute term-frequency
tf@x <- log1p(tf@x * scale.factor) # scale
idf <- log(1 + ncol(mat.capped) / Matrix::rowSums(mat.capped)) # inverse-document frequency smooth
tfidf <- .tfidf(tf, idf) # transform

# cast back to a matrix since things like UMAP don't like sparse matrices
as.matrix(t(tfidf))
.tfidf(tf, idf) # transform
}

# binarize when lower is 0 and upper is 1, constrain otherwise
.constrain <- function(v, lower, upper) {
if (lower == 0 & upper == 1) {
v[v > 0] <- 1
} else {
v[v < lower] <- 0
v[v > upper] <- upper
}
.binarize <- function(v) {
v[v > 0] <- 1
v
}

Expand Down
11 changes: 3 additions & 8 deletions man/transformTFIDF.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 3 additions & 11 deletions tests/testthat/test-transformTFIDF.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,14 @@ test_that("transformTFIDF", {
"Input needs to be a matrix",
fixed = TRUE
)
expect_error(
transformTFIDF(0:10, count.min = 1, count.max = 0),
"'count.min' must be less than 'count.max'"
)
})
# }}}

# .constrain {{{
test_that(".constrain", {
expect_equal(
compartmap:::.constrain(0:10, lower = 0, upper = 1),
c(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
)
test_that(".binarize", {
expect_equal(
compartmap:::.constrain(0:10, lower = 2, upper = 5),
c(0, 0, 2, 3, 4, 5, 5, 5, 5, 5, 5)
compartmap:::.binarize(seq(0, 10, by = 0.5)),
c(0, rep(1, 20))
)
})
# }}}
Expand Down
2 changes: 1 addition & 1 deletion vignettes/compartmap.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ data("k562_scrna_raw", package = "compartmap")
k562_scrna_chr14_tfidf <- transformTFIDF(assay(k562_scrna_se_chr14))

# Add back the TF-IDF counts to the object in the counts slot
assay(k562_scrna_se_chr14, "counts") <- t(k562_scrna_chr14_tfidf)
assay(k562_scrna_se_chr14, "counts") <- k562_scrna_chr14_tfidf

# Compute chromatin domains at the group level
k562_scrna_chr14_raw_domains <- scCompartments(k562_scrna_se_chr14,
Expand Down