From 02180bedeb44cf7e260f38cc7ed4bae6cd656ea0 Mon Sep 17 00:00:00 2001 From: hertafeldsr Date: Fri, 8 May 2020 16:48:11 -0400 Subject: [PATCH 1/7] Added helper function to remove batch effect --- workflows/rnaseq/downstream/helpers.Rmd | 34 +++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/workflows/rnaseq/downstream/helpers.Rmd b/workflows/rnaseq/downstream/helpers.Rmd index 87740063f..2d0bf6645 100644 --- a/workflows/rnaseq/downstream/helpers.Rmd +++ b/workflows/rnaseq/downstream/helpers.Rmd @@ -9,6 +9,7 @@ library(heatmaply) library(readr) library(stringr) library(tibble) +library(limma) #' Get the OrgDb for the specified organism, using the cached AnnotationHub. #' @@ -725,4 +726,37 @@ write.clusterprofiler.results <- function(res, cprof.folder, label){ write.table(res.split, file=filename.split, sep='\t', quote=FALSE, row.names=FALSE) return(list(orig=filename.orig, split=filename.split)) } + + +#' Remove batch effect +#' +#' @param x Numeric matrix containing log-expression values for a series of samples +#' @param batches Vector of batch factors/vectors +#' @param covariates Matrix or vector of numeric covariates to be adjusted for +#' @param design Design matrix relating to treatment conditions to be preserved, usually the design matrix with all experimental factors other than the batch effects +#' @param ... Other arguments are passed to lmFit +removebatchEffect <- +function(x, batches=c(), covariantes=NULL, design=matrix(1,ncol(x),1),...) +{ + if(length(batches) == 0 and is.null(covariates)){ + return(as.matrix(x)) + } + batches_processed <- c() + if(length(batches) != 0){ + for(batch in batches){ + batch <- as.factor(batch) + contrasts(batch) <- contr.sum(levels(batch)) + batch <- model.matrix(~batch)[,-1,drop=FALSE] + batches_processed <- append(batches_processed, batch) + } + } + if(!is.null(covariates)) { + covariates <- as.matrix(covariates) + } + X.batch <- cbind(batches_processed, covariates) + fit <- lmFit(x, cbind(design, X.batch),...) + beta <- fit$coefficients[,-(1:ncol(design)),drop=FALSE] + beta[is.na(beta)] <- 0 + as.matrix(x) - beta %*% t(X.batch) +} ``` From 72b02500204c755716cda044bd7a5e60301a9e41 Mon Sep 17 00:00:00 2001 From: hertafeldsr Date: Tue, 12 May 2020 16:04:45 -0400 Subject: [PATCH 2/7] minor debugging --- workflows/rnaseq/downstream/helpers.Rmd | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/workflows/rnaseq/downstream/helpers.Rmd b/workflows/rnaseq/downstream/helpers.Rmd index 2d0bf6645..393c2c7be 100644 --- a/workflows/rnaseq/downstream/helpers.Rmd +++ b/workflows/rnaseq/downstream/helpers.Rmd @@ -729,16 +729,17 @@ write.clusterprofiler.results <- function(res, cprof.folder, label){ #' Remove batch effect -#' +#' +#' based on the [removeBatchEffect](rdrr.io/bio/limma/src/R/removeBatchEffect.R) function from limma (Gordan Smyth et al.) #' @param x Numeric matrix containing log-expression values for a series of samples #' @param batches Vector of batch factors/vectors #' @param covariates Matrix or vector of numeric covariates to be adjusted for #' @param design Design matrix relating to treatment conditions to be preserved, usually the design matrix with all experimental factors other than the batch effects #' @param ... Other arguments are passed to lmFit removebatchEffect <- -function(x, batches=c(), covariantes=NULL, design=matrix(1,ncol(x),1),...) +function(x, batches=c(), covariates=NULL, design=matrix(1,ncol(x),1),...) { - if(length(batches) == 0 and is.null(covariates)){ + if(length(batches) == 0 & (is.null(covariates)){ return(as.matrix(x)) } batches_processed <- c() From df781e7084305f1f22904972387d9862e3f5ae85 Mon Sep 17 00:00:00 2001 From: hertafeldsr Date: Tue, 12 May 2020 17:33:28 -0400 Subject: [PATCH 3/7] syntax fix --- workflows/rnaseq/downstream/helpers.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/rnaseq/downstream/helpers.Rmd b/workflows/rnaseq/downstream/helpers.Rmd index 393c2c7be..b5b404ba9 100644 --- a/workflows/rnaseq/downstream/helpers.Rmd +++ b/workflows/rnaseq/downstream/helpers.Rmd @@ -739,7 +739,7 @@ write.clusterprofiler.results <- function(res, cprof.folder, label){ removebatchEffect <- function(x, batches=c(), covariates=NULL, design=matrix(1,ncol(x),1),...) { - if(length(batches) == 0 & (is.null(covariates)){ + if(length(batches) == 0 & is.null(covariates)){ return(as.matrix(x)) } batches_processed <- c() From 609568897ff20bf37b92973ae54cd043db714150 Mon Sep 17 00:00:00 2001 From: Ryan Dale Date: Sat, 23 May 2020 17:09:26 -0400 Subject: [PATCH 4/7] add test_batch_effects.Rmd --- .../rnaseq/downstream/test_batch_effects.Rmd | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 workflows/rnaseq/downstream/test_batch_effects.Rmd diff --git a/workflows/rnaseq/downstream/test_batch_effects.Rmd b/workflows/rnaseq/downstream/test_batch_effects.Rmd new file mode 100644 index 000000000..f9ce619ef --- /dev/null +++ b/workflows/rnaseq/downstream/test_batch_effects.Rmd @@ -0,0 +1,44 @@ +# Test removebatchEffects function + +1. Install `airway` package, which has counts data for an experiment described + at + http://bioconductor.org/packages/release/data/experiment/html/airway.html: + +```{r} +if (!requireNamespace("BiocManager", quietly = TRUE)) + install.packages("BiocManager") +BiocManager::install("airway") +``` + +2. Load data, show colData + +```{r} +library(airway) +library(DESeq2) +data(airway) +knitr::kable(colData(airway)) +``` + + +3. dds object; run vst; plot PCA: + +```{r} +dds <- DESeqDataSet(airway, design = ~ cell + dex) +rld <- vst(dds, blind=TRUE) +plotPCA(rld, 'cell') +``` + +4. Load helpers + +```{r child='helpers.Rmd'} +rmarkdown::render('helpers.Rmd', run_pandoc=FALSE) +``` + + +5. Attempted usage here...imagine we want to remove the effect of cell type: + +```{r} +coldata <- colData(rld) +no.cell <- removebatchEffect(rld, batches=coldata$cell, design=model.matrix(~dex, data=coldata)) +plotPCA(no.cell, 'cell') +``` From 7c77c1b0c65e881e51bb6c7e7fe105f684e33a12 Mon Sep 17 00:00:00 2001 From: Caroline Esnault Date: Fri, 21 Aug 2020 09:04:41 -0400 Subject: [PATCH 5/7] add removebatchEffect to lib/lcdbwf/helpers.Rmd --- lib/lcdbwf/R/helpers.R | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/lib/lcdbwf/R/helpers.R b/lib/lcdbwf/R/helpers.R index a6bd46a85..25a46f771 100644 --- a/lib/lcdbwf/R/helpers.R +++ b/lib/lcdbwf/R/helpers.R @@ -9,6 +9,7 @@ library(heatmaply) library(readr) library(stringr) library(tibble) +library(limma) library(purrr) library(tidyr) @@ -893,3 +894,36 @@ write.clusterprofiler.results <- function(res, cprof.folder, label){ return(list(orig=filename.orig, split=filename.split)) } + +#' Remove batch effect +#' +#' based on the [removeBatchEffect](rdrr.io/bio/limma/src/R/removeBatchEffect.R) function from limma (Gordan Smyth et al.) +#' @param x Numeric matrix containing log-expression values for a series of samples +#' @param batches Vector of batch factors/vectors +#' @param covariates Matrix or vector of numeric covariates to be adjusted for +#' @param design Design matrix relating to treatment conditions to be preserved, usually the design matrix with all experimental factors other than the batch effects +#' @param ... Other arguments are passed to lmFit +removebatchEffect <- +function(x, batches=c(), covariates=NULL, design=matrix(1,ncol(x),1),...) +{ + if(length(batches) == 0 & is.null(covariates)){ + return(as.matrix(x)) + } + batches_processed <- c() + if(length(batches) != 0){ + for(batch in batches){ + batch <- as.factor(batch) + contrasts(batch) <- contr.sum(levels(batch)) + batch <- model.matrix(~batch)[,-1,drop=FALSE] + batches_processed <- rbind(batches_processed, batch) + } + } + if(!is.null(covariates)) { + covariates <- as.matrix(covariates) + } + X.batch <- cbind(batches_processed, covariates) + fit <- lmFit(x, cbind(design, X.batch),...) + beta <- fit$coefficients[,-(1:ncol(design)),drop=FALSE] + beta[is.na(beta)] <- 0 + as.matrix(x) - beta %*% t(X.batch) +} From 693cda8febc3e04e68e2f1c40b504e4b40e089a9 Mon Sep 17 00:00:00 2001 From: Caroline Esnault Date: Fri, 21 Aug 2020 09:06:01 -0400 Subject: [PATCH 6/7] fix batches_processed --- workflows/rnaseq/downstream/helpers.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/rnaseq/downstream/helpers.Rmd b/workflows/rnaseq/downstream/helpers.Rmd index 65d29e845..e997ca3df 100644 --- a/workflows/rnaseq/downstream/helpers.Rmd +++ b/workflows/rnaseq/downstream/helpers.Rmd @@ -891,7 +891,7 @@ function(x, batches=c(), covariates=NULL, design=matrix(1,ncol(x),1),...) batch <- as.factor(batch) contrasts(batch) <- contr.sum(levels(batch)) batch <- model.matrix(~batch)[,-1,drop=FALSE] - batches_processed <- append(batches_processed, batch) + batches_processed <- rbind(batches_processed, batch) } } if(!is.null(covariates)) { From 245d92d2b6d6c4dc37733a8a3e9ae0565adf9f19 Mon Sep 17 00:00:00 2001 From: Caroline Esnault Date: Fri, 21 Aug 2020 09:07:46 -0400 Subject: [PATCH 7/7] update removebatchEffect call --- workflows/rnaseq/downstream/test_batch_effects.Rmd | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/workflows/rnaseq/downstream/test_batch_effects.Rmd b/workflows/rnaseq/downstream/test_batch_effects.Rmd index f9ce619ef..820e5676b 100644 --- a/workflows/rnaseq/downstream/test_batch_effects.Rmd +++ b/workflows/rnaseq/downstream/test_batch_effects.Rmd @@ -30,7 +30,7 @@ plotPCA(rld, 'cell') 4. Load helpers -```{r child='helpers.Rmd'} +```{r child='helpers.Rmd', include=FALSE} rmarkdown::render('helpers.Rmd', run_pandoc=FALSE) ``` @@ -39,6 +39,8 @@ rmarkdown::render('helpers.Rmd', run_pandoc=FALSE) ```{r} coldata <- colData(rld) -no.cell <- removebatchEffect(rld, batches=coldata$cell, design=model.matrix(~dex, data=coldata)) -plotPCA(no.cell, 'cell') +no.cell <- removebatchEffect(assay(rld), batches=list(coldata$cell), design=model.matrix(~dex, data=coldata)) +rld.nocell <- rld +assay(rld.nocell) <- no.cell +plotPCA(rld.nocell, 'cell') ```