diff --git a/R/dryclean.R b/R/dryclean.R index eb1cbaf..b77f040 100644 --- a/R/dryclean.R +++ b/R/dryclean.R @@ -13,33 +13,42 @@ #' @name dryclean #' @title dryclean #' @description dryclean R6 class storing all methods and values necessary for "drycleaning" -#' @details Add more details +#' @detals Add more details #' #' @param pon PON object #' @param history data.table to store history of dryclean object #' @param dt_mismatch data.table to store mismatching chromosomes between coverage and PON +#' @param log_action function to log actions performed on the dryclean object #' #' @export #' -#' @author Aditya Deshpande , Sebastian Brylka +#' @author Aditya Deshpande , Johnathan Rafailov , Sebastian Brylka dryclean <- R6::R6Class("dryclean", private = list( history = NULL, pon = NULL, - dt_mismatch = NULL + dt_mismatch = NULL, + log_action = function(action) { + private$history <- rbindlist( + list(private$history, data.table(action = action, date = as.character(Sys.time()))), + use.names = TRUE, fill = TRUE + ) + } ), public = list( #' @method initialize() initialize() - #' @description Initialize dryclean object. Authors: Aditya Deshpande, Sebastian Brylka + #' @description Initialize dryclean object. #' + #' @author Aditya Deshpande, Sebastian Brylka #' @param pon PON object #' initialize = function(pon) { private$history <- data.table(action = character(), date = character()) - private$history <- rbindlist(list(private$history, data.table(action = "Created dryclean object", date = as.character(Sys.time())))) + + private$log_action("Created dryclean object") private$pon <- pon }, @@ -54,116 +63,86 @@ dryclean <- R6::R6Class("dryclean", #' @param cnsignif integer (default = 1e-5) the significance levels for the tests in cbs to accept change-points #' @param mc.cores interger (default == 1) number of cores to use for parallelization #' @param use.blacklist boolean (default = FALSE) whether to exclude off-target markers in case of Exomes or targeted sequencing. If set to TRUE, needs a GRange marking if each marker is set to be excluded or not or will use a default mask - #' @param blacklist_path character (default = NA) if use.blacklist == TRUE, path a GRanges object marking if each marker is set to be excluded or not + #' @param blacklist_path character (default = attached mask in extdata for hg19 aligned tumor samples) if use.blacklist == TRUE, path a GRanges object marking if each marker is set to be excluded or not #' @param germline.filter boolean (default == FALSE) if germline markers need to be removed from decomposition #' @param verbose boolean (default == TRUE) outputs progress #' @param field character (default == "reads.corrected") field to use for processing #' @param testing boolean (default = FALSE) DO NOT CHANGE #' @return Drycleaned coverage in GRanges format - clean = function(cov, center = TRUE, centering = "mean", cbs = FALSE, cnsignif = 1e-5, mc.cores = 1, verbose = TRUE, use.blacklist = FALSE, blacklist_path = NA, germline.filter = FALSE, field = "reads.corrected", testing = FALSE) { - message("Loading coverage") - private$history <- rbindlist(list(private$history, data.table(action = paste("Loaded coverage from", cov), date = as.character(Sys.time())))) - cov <- readRDS(cov) + clean = function(cov, center = TRUE, centering = "mean", cbs = FALSE, cnsignif = 1e-5, mc.cores = 1, verbose = TRUE, use.blacklist = FALSE, blacklist_path = system.file("extdata", "blacklist_A.rds", package = "dryclean"), germline.filter = FALSE, field = "reads.corrected", testing = FALSE) { + + private$log_action(paste("Loaded coverage from", cov)) - is.chr <- FALSE - if (any(grepl("chr", as.character(seqnames(cov))))) { - cov <- gr.sub(cov) - is.chr <- TRUE + if(verbose == TRUE) { + message("Loading coverage") } - # cov <- cov %>% - # gr2dt() %>% - # ## filter(seqnames != "Y") %>% - # dt2gr() - # cov <- cov %>% gr2dt() %>% filter(seqnames != 'X') %>% dt2gr() + cov <- cov %>% readRDS %>% gr.nochr() if (verbose == TRUE) { message("Loading PON a.k.a detergent") } - - - private$history <- rbindlist(list(private$history, data.table(action = "Loaded PON", date = as.character(Sys.time())))) + private$log_action("Loaded PON") tumor.binsize <- median(gr2dt(cov)$width) pon.binsize <- median(gr2dt(private$pon$get_template())$width) if (tumor.binsize != pon.binsize & testing == FALSE) { - message(paste0("WARNING: Input tumor bin size = ", tumor.binsize, "bp. PON bin size = ", pon.binsize, "bp. Rebinning tumor to bin size of PON...")) - private$history <- rbindlist(list(private$history, data.table(action = paste("Rebinning tumor to", pon.binsize, "bp bin size"), date = as.character(Sys.time())))) + warning(sprintf("Input tumor bin size = %s bp. PON bin size = %s bp. Rebinning tumor to bin size of PON...", tumor.binsize, pon.binsize)) + private$log_action(paste("Rebinning tumor to", pon.binsize, "bp bin size")) suppressWarnings({ cov <- gr.val(query = private$pon$get_template(), cov, val = field) }) } - pon.length <- private$pon$get_template() %>% length() - # gr2dt() %>% - # ## dplyr::filter(seqnames != "Y") %>% - # dt2gr() %>% + pon.length <- private$pon$get_template() %>% length() + + dt_cov <- gr2dt(cov)[, .(cov.bins = .N), by = seqnames] + dt_pon <- gr2dt(private$pon$get_template())[, .(pon.bins = .N), by = seqnames] + private$dt_mismatch <- merge(dt_cov, dt_pon, by = "seqnames", all = TRUE)[ + cov.bins != pon.bins, + .(seqnames, cov.bins, pon.bins) + ] + if (length(cov) != pon.length) { + warning("Number of bins of coverage and PON do not match. Use the get_mismatch() function to see mismatched chromosomes. Re aligning coverage per chromosome to the PON-defined bins.") + private$log_action("Number of bins of coverage and PON do not match. Re aligning coverage per chromosome to the PON-defined bins.") - if (length(cov) != pon.length & testing == FALSE) { - dt_mismatch <- data.table(chr = c(), coverage = c(), pon = c()) - for (chr in c(1:22, "X", "Y")) { - dt_mismatch <- rbind( - dt_mismatch, - data.table( - chr = chr, - coverage = cov %>% gr2dt() %>% filter(seqnames == chr) %>% nrow(), - pon = private$pon$get_template() %>% gr2dt() %>% filter(seqnames == chr) %>% nrow() - ) - ) - } - dt_mismatch <- dt_mismatch %>% filter(coverage != pon) - private$dt_mismatch <- dt_mismatch - if (testing == FALSE) { - message("WARNING: Number of bins of coverage and PON does not match. Use get_mismatch() function to see mismatched chromosomes\nAligning coverage to the PON") - suppressWarnings({ - cov <- gr.val(query = private$pon$get_template(), cov, val = field) - }) - } + suppressWarnings({ + cov <- gr.val(query = private$pon$get_template(), cov, val = field) + }) } if (verbose == TRUE) { message(paste0("Let's begin, this is whole exome/genome")) } - private$history <- rbindlist(list(private$history, data.table(action = paste("Started drycleaning the coverage file"), date = as.character(Sys.time())))) + private$log_action("Started drycleaning the coverage file") if (germline.filter & is.null(private$pon$get_inf_germ())) { stop("If germline.filter is set to TRUE, pon must have a inf_germ element, see prepare_detergent for details") } - all.chr <- c(as.character(1:22), "X", "Y") - - local.all.chr <- all.chr - cov <- cov %Q% (seqnames %in% local.all.chr) - cov <- cov[, field] %>% - gr2dt() %>% - setnames(., field, "signal") - cov <- cov %>% dt2gr() + #cov <- cov %Q% (seqnames %in% all.chr) + mcols(cov)$signal <- mcols(cov)[[field]] + mcols(cov)[[field]] <- NULL cov <- sortSeqlevels(cov) cov <- sort(cov) if (use.blacklist == TRUE) { - if ((is.na(blacklist_path) | blacklist_path == "NA")) { - blacklist_path <- system.file("extdata", "blacklist_A.rds", package = "dryclean") - message(paste0("Applying the default mask to the coverage")) - private$history <- rbindlist(list(private$history, data.table(action = paste("Applying the defualt mask to coverage"), date = as.character(Sys.time())))) - } else { - blacklist_path <- blacklist_path - message(paste0("Applying the provided mask to the coverage")) - private$history <- rbindlist(list(private$history, data.table(action = paste("Applying the provided mask to coverage"), date = as.character(Sys.time())))) - } + private$log_action("Applying the provided mask to coverage") + suppressWarnings({ + blacklist_pon <- gUtils::gr.val( query = private$pon$get_template(), target = readRDS(blacklist_path), val = "blacklisted", na.rm = TRUE )$blacklisted - blacklist_pon <- ifelse(blacklist_pon == 1, TRUE, FALSE) + blacklist_pon <- ifelse(blacklist_pon == 1, TRUE, FALSE) blacklist_cov <- gUtils::gr.val( query = cov, @@ -171,12 +150,13 @@ dryclean <- R6::R6Class("dryclean", val = "blacklisted", na.rm = TRUE )$blacklisted + blacklist_cov <- ifelse(blacklist_cov == 1, TRUE, FALSE) }) } message(paste0(centering, "-centering the sample")) - private$history <- rbindlist(list(private$history, data.table(action = paste0(centering, "normalization of coverage"), date = as.character(Sys.time())))) + private$log_action(paste0(centering, " normalization of coverage")) cov <- prep_cov(cov, use.blacklist = use.blacklist, @@ -185,7 +165,6 @@ dryclean <- R6::R6Class("dryclean", centering = centering ) - m.vec <- as.matrix(cov$signal) L.burnin <- private$pon$get_L() S.burnin <- private$pon$get_S() @@ -262,7 +241,6 @@ dryclean <- R6::R6Class("dryclean", cov <- na.omit(cov) } - # browser() cov <- dt2gr(cov) if (use.blacklist) { @@ -285,11 +263,7 @@ dryclean <- R6::R6Class("dryclean", }) } - if (is.chr) { - cov <- gr.chr(cov) - } - - private$history <- rbindlist(list(private$history, data.table(action = paste("Finished drycleaning the coverage file"), date = as.character(Sys.time())))) + private$log_action("Finished drycleaning the coverage file") if (cbs == TRUE) { message("Starting CBS on the drycleaned sample") @@ -334,12 +308,12 @@ dryclean <- R6::R6Class("dryclean", gc() - private$history <- rbindlist(list(private$history, data.table(action = paste("Applied CBS correction to the drycleaned coverage file"), date = as.character(Sys.time())))) + private$log_action("Applied CBS correction to the drycleaned coverage file") # return(out) saveRDS(out, "cbs_output.rds") - private$history <- rbindlist(list(private$history, data.table(action = paste("Saved CBS output in current directory as cbs_output.rds"), date = as.character(Sys.time())))) + private$log_action("Saved CBS output in current directory as cbs_output.rds") } return(cov) @@ -389,14 +363,12 @@ pon <- R6::R6Class("pon", sigma.hat = NULL, inf.germ = NULL, template = NULL, - prepare_pon = function(save_pon, use.all, choose.randomly, choose.by.clustering, number.of.samples, verbose, num.cores, tolerance, is.human, build, field, PAR.file, balance, infer.germline, signal.thresh, pct.thresh, wgs, target_resolution, nochr, all.chr) { + prepare_pon = function(save_pon, use.all, choose.randomly, choose.by.clustering, number.of.samples, verbose, num.cores, tolerance, is.human, build, field, PAR.file, balance, infer.germline, signal.thresh, pct.thresh, wgs, target_resolution, nochr, all.chr = c(as.character(1:22), "X", "Y")) { normal.table <- private$normal.table - if (save_pon == TRUE) { - message(paste0("WARNING: New PON will be generated and saved at ", private$pon.path)) - Sys.sleep(3) - message("\nGiving you some time to think...\n") - Sys.sleep(5) + message(paste0("New PON will be generated and saved at ", private$pon.path)) + message("Giving you some time to think...") + Sys.sleep(8) path.to.save <- private$pon.path } @@ -412,27 +384,36 @@ pon <- R6::R6Class("pon", num.samp <- nrow(normal.table) if (verbose) { - message(paste0(num.samp, " samples available")) + message(paste0(num.samp, " samples provided.")) } - if (use.all & choose.randomly | use.all & choose.by.clustering | choose.randomly & choose.by.clustering | use.all & choose.randomly & choose.by.clustering) { + if ((use.all & choose.randomly) | (use.all & choose.by.clustering) | (choose.randomly & choose.by.clustering) | (use.all & choose.randomly & choose.by.clustering)) { stop("only one of use.all, choose.randomly, choose.by.clustering can be set to TRUE. Rectify and restart") } - if (nochr) { - template <- generate_template(cov = gUtils::gr.nochr(readRDS(normal.table[1]$normal_cov)), wgs = wgs, target_resolution = target_resolution, this.field = field, nochr = nochr, all.chr = all.chr) - } else { - template <- generate_template(cov = readRDS(normal.table[1]$normal_cov), wgs = wgs, target_resolution = target_resolution, this.field = field, nochr = nochr, all.chr = all.chr) - } + samp.final <- validate_pon_paths(normal.table$normal_cov, + field = field, + target_resolution = target_resolution, + all.chr = all.chr, + cores = num.cores, + verbose = verbose) + + template <- generate_template( + cov = samp.final[[1]], # use the first sample for the template + wgs = wgs, + target_resolution = target_resolution, + this.field = field, + nochr = nochr, + all.chr = all.chr + ) if (choose.randomly) { if (verbose) { message(paste0("Selecting ", number.of.samples, " normal samples randomly")) } set.seed(12) - samp.final <- sample(1:num.samp, number.of.samples) - samp.final <- normal.table[samp.final] - setkey(samp.final, "sample") + sample.indices <- sample(1:num.samp, number.of.samples) + samp.final <- samp.final[sample.indices] } if (choose.by.clustering) { @@ -440,8 +421,12 @@ pon <- R6::R6Class("pon", message("Starting the clustering") } - mat.small <- mclapply(normal.table[, sample], function(nm) { - this.cov <- tryCatch(readRDS(normal.table[nm, normal_cov]), error = function(e) NULL) + mat.small <- mclapply(samp.final, function(cov) { + if(!class(cov) == "GRanges") { + return(NULL) + } else { + this.cov <- cov + } if (!is.null(this.cov)) { if (nochr) { this.cov <- gUtils::gr.nochr(this.cov) @@ -458,8 +443,8 @@ pon <- R6::R6Class("pon", reads[signal == 0, signal := min.cov] reads[signal < 0, signal := min.cov] reads <- log(reads[, .(signal)]) - reads <- transpose(reads) - reads <- cbind(reads, nm) + reads <- data.table::transpose(reads) +# reads <- cbind(reads, nm) } ## else {reads = data.table(NA)} return(reads) }, mc.cores = num.cores) @@ -467,8 +452,8 @@ pon <- R6::R6Class("pon", gc() mat.sub <- rbindlist(mat.small, fill = T) - mat.sub <- na.omit(mat.sub) ix <- ncol(mat.sub) + mat.sub <- na.omit(mat.sub) samp.names <- mat.sub[, ..ix] mat.sub.t <- transpose(mat.sub[, 1:(ncol(mat.sub) - 1)]) rm(mat.sub) @@ -504,18 +489,17 @@ pon <- R6::R6Class("pon", set.seed(12) samp.selected <- memb[, .SD[sample(1:.N, 1)], by = V1]$rn - samp.final <- normal.table[samp.selected] - setkey(samp.final, "sample") + samp.final <- samp.final[as.numeric(samp.selected)] } - if (use.all) { - if (verbose) { - message("Using all samples") - } + # if (use.all) { + # if (verbose) { + # message("Using all samples") + # } - samp.final <- normal.table - setkey(samp.final, "sample") - } + # samp.final <- normal.table + # setkey(samp.final, "sample") + # } if (!is.null(PAR.file)) { @@ -543,20 +527,12 @@ pon <- R6::R6Class("pon", message("PAR read") - # samp.final[, file.available := file.exists(normal_cov)] - samp.final <- samp.final %>% - mutate(file.available = file.exists(normal_cov)) - - - message("Checking for existence of files") - - samp.final <- samp.final[file.available == TRUE] - - message(paste0(nrow(samp.final), " files present")) - - - mat.n <- pbmcapply::pbmclapply(samp.final[, sample], function(nm, all.chr) { - this.cov <- tryCatch(readRDS(samp.final[sample == nm, normal_cov]), error = function(e) NULL) + mat.n <- pbmcapply::pbmclapply(samp.final, function(cov, all.chr) { + if(!class(cov) == "GRanges") { + return(NULL) + } else { + this.cov <- cov + } if (!is.null(this.cov)) { ## this.cov = standardize_coverage(cov = gr.nochr(this.cov), template = template, wgs = wgs, target_resolution = target_resolution, this.field = field) if (nochr) { @@ -585,8 +561,8 @@ pon <- R6::R6Class("pon", reads[is.na(signal), signal := median.chr] min.cov <- min(reads[signal > 0]$signal, na.rm = T) reads[is.infinite(signal), signal := min.cov] - reads[signal == 0, signal := min.cov] reads[signal < 0, signal := min.cov] + reads[signal == 0, signal := min.cov] reads[, signal := log(signal)] reads <- reads[, .(signal)] if (!any(is.infinite(reads$signal))) { @@ -600,7 +576,6 @@ pon <- R6::R6Class("pon", message("Starting decomposition") } - mat.bind.t <- matrix(unlist(mat.n), ncol = length(mat.n)) # print(nrow(mat.bind.t)) # print(ncol(mat.bind.t)) @@ -616,11 +591,6 @@ pon <- R6::R6Class("pon", detergent$U.hat <- rsvd.L.burnin$u detergent$V.hat <- t(rsvd.L.burnin$v) detergent$sigma.hat <- rsvd.L.burnin$d - ## browser() - ## this.template = readRDS(samp.final[1]$normal_cov) - ## this.template = sortSeqlevels(this.template) - ## this.template = sort(this.template) - ## detergent$template <- template if (infer.germline) { @@ -740,6 +710,7 @@ pon <- R6::R6Class("pon", private$V.hat <- pon$V.hat private$sigma.hat <- pon$sigma.hat private$template <- pon$template + private$pon.path <- pon_path rm(pon) } @@ -858,6 +829,25 @@ pon <- R6::R6Class("pon", #' @return seqlengths of template of the pon object get_seqlengths = function() { return(seqlengths(private$template)) + }, + + #' @method save_pon() save_pon() + #' @description Function to save_pon if not saved + #' @param path_to_save character (default == "./") path to save the pon object + + save_pon = function(path_to_save = "./detergent.rds") { + list( + L = private$L, + S = private$S, + err = private$err, + k = private$k, + U.hat = private$U.hat, + V.hat = private$V.hat, + sigma.hat = private$sigma.hat, + inf.germ = private$inf.germ, + template = private$template + ) %>% + saveRDS(file = path_to_save, compress = "xz") } ) ) diff --git a/R/helper_functions.R b/R/helper_functions.R index 11d1d51..aedff28 100644 --- a/R/helper_functions.R +++ b/R/helper_functions.R @@ -214,26 +214,16 @@ wash_cycle <- function(m.vec, L.burnin, S.burnin, r, N, U.hat, V.hat, sigma.hat, ## prep_cov ############################## #' @name prep_cov -#' #' @title function to prepare covearge data for decomposition and perform normalization #' @description prepares the GC corrected coverage data for decomposition -#' #' @keywords internal -#' #' @param m.vec GRanges object. GRanges containing GC corrected reads as a column in metadata -#' #' @param blacklist, boolean (default == FALSE). Whether to exclude off-target markers in case of Exomes or targeted sequqnecing. If set to TRUE, needs a GRange marking if each marker is set to be excluded or not. -#' #' @param burnin.samples.path, character (default = NA). Path to balcklist markers file -#' #' @param center, character (default == FALSE). Transform the data to centered? -#' #' @param centering, enum of "mean" or "median" ONLY (default = "mean"). Paradigm of centering. -#' #' @return vector of length m with processed coverage data -#' #' @author Aditya Deshpande / Johnathan Rafailov - prep_cov <- function(m.vec = m.vec, use.blacklist = FALSE, blacklist = NA, center = FALSE, centering = "mean") { mcols(m.vec)$signal[which(is.infinite(mcols(m.vec)$signal))] <- NA mcols(m.vec)$og.signal <- mcols(m.vec)$signal @@ -258,8 +248,6 @@ prep_cov <- function(m.vec = m.vec, use.blacklist = FALSE, blacklist = NA, cente m.vec[signal == 0, signal := min.cov] m.vec[signal < 0, signal := min.cov] - - if (use.blacklist) { m.vec$blacklisted <- blacklist m.vec[blacklisted == TRUE, signal := NA] @@ -302,3 +290,58 @@ generate_template <- function(cov, wgs = wgs, target_resolution = target_resolut template <- cov[, c()] return(template) } + +#' @title validate_pon_paths +#' @details Checks that the provided Panel of Normals (PON) file paths are valid and accessible. This function validates that each file path supplied in the input is a well-formed path and that the file exists. It raises an informative error if any path does not meet the required criteria for subsequent processing. Returns as list of rebinned coverages. The function performs necessary checks for file existence and proper format. Ensure that the provided paths correspond to files that are accessible in the current environment. +#' @name validate_pon_paths +#' @param pon_paths A character vector of file paths to PON files. +#' @param field +#' @param target_resolution +#' @param cores +#' @param all.chr +#' @return A character vector of validated PON file paths. +#' @examples +#' \dontrun{ +#' # Validate PON file paths +#' valid_paths <- validate_pon_paths(c("/path/to/pon1.txt", "/path/to/pon2.txt")) +#' } +validate_pon_paths <- function(pon_paths, field, target_resolution, cores, all.chr = c(1:22, "X", "Y"), verbose = TRUE) { + + if (verbose) { + message("Validating PON paths...") + } + + skip.files <- !file.exists(pon_paths) + + + if(length(which(skip.files)) > 0) { + message(sprintf("Number of files that do not exist: %d", sum(skip.files))) + message(paste0("Skipping files (do not exist): ", paste(pon_paths[skip.files], collapse = ", "))) + } + + result <- mclapply(pon_paths[!skip.files], function(path) { + cov.dt <- readRDS(path) %>% gr.nochr() %>% gr2dt() + bin.size <- median(cov.dt$width, na.rm = TRUE) + n.bins.chr <- cov.dt[seqnames %in% all.chr, .N, by = seqnames] + setnames(n.bins.chr, "N", paste0("normal_", match(path, pon_paths))) + list(bin.size = bin.size, n.bins.chr = n.bins.chr) + }, mc.cores = cores) + + bin.sizes <- sapply(result, function(x) x$bin.size) + names(bin.sizes) <- paste0("normal_", seq_along(bin.sizes)) + + rebin <- bin.sizes != target_resolution + + file.reads <- pbmcapply::pbmcmapply(function(nm, path, rebin){ + cov.gr <- readRDS(path) %>% gr.nochr() + og.resolution <- median(width(cov.gr), na.rm = TRUE) + if (rebin) { + message(sprintf("Rebinning %s (%d bp) to %d bp ...", nm, og.resolution, target_resolution)) + cov.gr <- collapse_cov(cov.gr, this.field = field, bin.size = target_resolution) + } + return(cov.gr) + }, names(rebin), pon_paths[!skip.files], rebin, mc.cores = cores) + + return(file.reads) + +} diff --git a/man/dryclean.Rd b/man/dryclean.Rd index a4e4645..885bbe8 100644 --- a/man/dryclean.Rd +++ b/man/dryclean.Rd @@ -6,11 +6,10 @@ \description{ dryclean R6 class storing all methods and values necessary for "drycleaning" } -\details{ -Add more details -} \author{ -Aditya Deshpande , Sebastian Brylka +Aditya Deshpande , Johnathan Rafailov , Sebastian Brylka + +Aditya Deshpande, Sebastian Brylka } \section{Methods}{ \subsection{Public methods}{ @@ -26,7 +25,7 @@ Aditya Deshpande , Sebastian Brylka }} \if{latex}{\out{\hypertarget{method-dryclean-new}{}}} \subsection{Method \code{new()}}{ -Initialize dryclean object. Authors: Aditya Deshpande, Sebastian Brylka +Initialize dryclean object. \subsection{Usage}{ \if{html}{\out{
}}\preformatted{dryclean$new(pon)}\if{html}{\out{
}} } @@ -54,7 +53,7 @@ Function begins the online rPCA process. Use this function if you performed batc mc.cores = 1, verbose = TRUE, use.blacklist = FALSE, - blacklist_path = NA, + blacklist_path = system.file("extdata", "blacklist_A.rds", package = "dryclean"), germline.filter = FALSE, field = "reads.corrected", testing = FALSE @@ -78,7 +77,7 @@ Function begins the online rPCA process. Use this function if you performed batc \item{\code{use.blacklist}}{boolean (default = FALSE) whether to exclude off-target markers in case of Exomes or targeted sequencing. If set to TRUE, needs a GRange marking if each marker is set to be excluded or not or will use a default mask} -\item{\code{blacklist_path}}{character (default = NA) if use.blacklist == TRUE, path a GRanges object marking if each marker is set to be excluded or not} +\item{\code{blacklist_path}}{character (default = attached mask in extdata for hg19 aligned tumor samples) if use.blacklist == TRUE, path a GRanges object marking if each marker is set to be excluded or not} \item{\code{germline.filter}}{boolean (default == FALSE) if germline markers need to be removed from decomposition} diff --git a/man/pon.Rd b/man/pon.Rd index 21706b9..24b0cb9 100644 --- a/man/pon.Rd +++ b/man/pon.Rd @@ -27,6 +27,7 @@ Aditya Deshpande , Sebastian Brylka }} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-pon-save_pon}{}}} +\subsection{Method \code{save_pon()}}{ +Function to save_pon if not saved +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{pon$save_pon(path_to_save = "./detergent.rds")}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{path_to_save}}{character (default == "./") path to save the pon object} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-pon-clone}{}}} \subsection{Method \code{clone()}}{ diff --git a/man/validate_pon_paths.Rd b/man/validate_pon_paths.Rd new file mode 100644 index 0000000..576d4c2 --- /dev/null +++ b/man/validate_pon_paths.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helper_functions.R +\name{validate_pon_paths} +\alias{validate_pon_paths} +\title{validate_pon_paths} +\usage{ +validate_pon_paths( + pon_paths, + field, + target_resolution, + cores, + all.chr = c(1:22, "X", "Y"), + verbose = TRUE +) +} +\arguments{ +\item{pon_paths}{A character vector of file paths to PON files.} +} +\value{ +A character vector of validated PON file paths. +} +\description{ +validate_pon_paths +} +\details{ +Checks that the provided Panel of Normals (PON) file paths are valid and accessible. This function validates that each file path supplied in the input is a well-formed path and that the file exists. It raises an informative error if any path does not meet the required criteria for subsequent processing. Returns as list of rebinned coverages. The function performs necessary checks for file existence and proper format. Ensure that the provided paths correspond to files that are accessible in the current environment. +} +\examples{ +\dontrun{ + # Validate PON file paths + valid_paths <- validate_pon_paths(c("/path/to/pon1.txt", "/path/to/pon2.txt")) +} +} diff --git a/tests/testthat.R b/tests/testthat.R index f7d2331..4ef9b28 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,5 +1,5 @@ library(testthat) -library(dryclean) +devtools::load_all() test_check("dryclean") diff --git a/tests/testthat/test_dryclean.R b/tests/testthat/test_dryclean.R index 6f58337..a71fd26 100644 --- a/tests/testthat/test_dryclean.R +++ b/tests/testthat/test_dryclean.R @@ -114,15 +114,13 @@ test_that("initialize", { test_that("clean", { - - pon_object = pon$new(pon_path = detergent.path) - + dryclean_object <- dryclean$new(pon = pon_object) a <- dryclean_object$clean(cov = sample.1.path, testing = TRUE) expect_true(identical(colnames(values(a)), c("background.log", "foreground.log", "signal", "input.read.counts", "center.all", "median.chr", "foreground", "background", "log.reads"))) - expect_equal(a$background.log[1], 0.0508, tolerance = 0.001) + expect_equal(a$background.log[1] %>% round(4), 0.0266, tolerance = 0.001) }) @@ -154,7 +152,7 @@ test_that("get_history", { test_that("cbs", { - pon_object = pon$new(pon_path = detergent.path) + #pon_object = pon$new(pon_path = detergent.path) dryclean_object <- dryclean$new(pon = pon_object) @@ -191,7 +189,7 @@ test_that("clustering", { number.of.samples = 2 ) - expect_equal(pon_object$get_L()[5], 0.004731082, tolerance = 0.00001) + expect_equal(pon_object$get_L()[5], 0.01749593, tolerance = 0.00001) })