From b4f71f7c376396f5d1bc025019d3930ab472f1f7 Mon Sep 17 00:00:00 2001 From: Teng Gao Date: Mon, 21 Oct 2019 14:16:32 -0400 Subject: [PATCH 1/3] adding post.sample.col --- R/clonevol.r | 5 ++++- R/generate.boot.r | 15 +++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/R/clonevol.r b/R/clonevol.r index cae099c..4ae29ab 100644 --- a/R/clonevol.r +++ b/R/clonevol.r @@ -2149,6 +2149,7 @@ infer.clonal.models <- function(c=NULL, variants=NULL, ignore.clusters=NULL, vaf.col.names=NULL, ccf.col.names=NULL, + post.sample.col=NULL, vaf.in.percent=TRUE, depth.col.names=NULL, weighted=FALSE, @@ -2303,6 +2304,7 @@ infer.clonal.models <- function(c=NULL, variants=NULL, boot = generate.boot(variants, vaf.col.names=vaf.col.names, depth.col.names=depth.col.names, vaf.in.percent=vaf.in.percent, + post.sample.col=post.sample.col, num.boots=num.boots, bootstrap.model=subclonal.test.model, cluster.center.method=cluster.center, @@ -3526,7 +3528,7 @@ sum.polyclonal <- function(x){ #' @export merge.samples #' @examples #' x2 = merge.samples(x, 1, c('C', 'M1197'), 'new', 'P', c('C_ref', 'M1197_ref'), c('C_var', 'M1197_var')) -merge.samples <- function(x, samples, new.sample, new.sample.group, +merge.samples <- function(x, samples, new.sample, new.sample.group, post.sample.col, ref.cols=NULL, var.cols=NULL){ if (!all(samples %in% names(x$models))){ stop('ERROR: Sample not found when merging!') @@ -3604,6 +3606,7 @@ merge.samples <- function(x, samples, new.sample, new.sample.group, # update bootstrap samples for the merged sample boot = generate.boot(x$variants, vaf.col.names=new.sample, + post.sample.col=post.sample.col, vaf.in.percent=x$params$vaf.in.percent, num.boots=x$params$num.boots, bootstrap.model=x$params$bootstrap.model, diff --git a/R/generate.boot.r b/R/generate.boot.r index 46630a4..e34204d 100644 --- a/R/generate.boot.r +++ b/R/generate.boot.r @@ -37,6 +37,7 @@ generate.boot <- function(variants, cluster.col.name='cluster', cluster.center.method='mean', + post.sample.col=NULL, vaf.col.names=NULL, depth.col.names=NULL, vaf.in.percent=TRUE, @@ -209,8 +210,13 @@ generate.boot <- function(variants, # generate bootstrap samples for each cluster, each sample num.variants.per.cluster = table(variants[[cluster.col.name]]) - #print(num.variants.per.cluster) - + + if (is.null(post.sample.col)) { + num.sample.per.cluster = num.variants.per.cluster + } else { + num.sample.per.cluster = table(variants[variants[[post.sample.col]] == FALSE,][[cluster.col.name]]) + } + if(!is.null(random.seed)){ set.seed(random.seed) } @@ -226,6 +232,7 @@ generate.boot <- function(variants, for (cl in clusters){ boot.size = num.variants.per.cluster[cl] + boot.sample.size = num.sample.per.cluster[cl] vafs = v[[cl]][[vaf.col.name]] # learn zero samples from data, @@ -352,10 +359,10 @@ generate.boot <- function(variants, # hdng: allow mean or median for non-parametric # TODO: allow this for all other models if (cluster.center.method == 'mean'){ - s.mean = mean(sample(v[[cl]][[vaf.col.name]], boot.size, + s.mean = mean(sample(v[[cl]][[vaf.col.name]], boot.sample.size, replace=TRUE, prob=depth)) }else if (cluster.center.method == 'median'){ - s.mean = median(sample(v[[cl]][[vaf.col.name]], boot.size, + s.mean = median(sample(v[[cl]][[vaf.col.name]], boot.sample.size, replace=TRUE, prob=depth)) } From d01c9a5636dad6841a40069e88f0fe93dbb19f22 Mon Sep 17 00:00:00 2001 From: teng-gao Date: Wed, 6 Nov 2019 16:38:27 -0500 Subject: [PATCH 2/3] fix combined p --- R/clonevol.r | 4 ++++ simulations/sim.r | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/R/clonevol.r b/R/clonevol.r index 4ae29ab..df88ea3 100644 --- a/R/clonevol.r +++ b/R/clonevol.r @@ -1465,6 +1465,10 @@ cross.rule.score <- function(x, meta.p.method='fisher', exhaustive.mode=FALSE, p$cmb.p = apply(p[,-1], 1, combine.p, method=meta.p.method) } # model score = max (combined p of each clone) + + # Teng's fix to a bug that the nrow of p doesn't agree with merged.tree + p = na.omit(p) # drop any row containing NA + x$matched$scores$max.clone.ccf.combined.p[i] = max(p$cmb.p) x$matched$merged.trees[[i]]$clone.ccf.combined.p = p$cmb.p # save the whole pvalue matrix diff --git a/simulations/sim.r b/simulations/sim.r index 5bfae51..785c7e6 100755 --- a/simulations/sim.r +++ b/simulations/sim.r @@ -316,7 +316,6 @@ if (!is.na(rand.seed)){ set.seed(rand.seed) } - # read ground truth CCF x = read.table(subpop.file, header=TRUE, sep='\t', stringsAsFactors=FALSE, na.strings='') From 091abf47d7eb6656af9e9cba992a1e32317eb06e Mon Sep 17 00:00:00 2001 From: teng-gao Date: Sat, 9 Nov 2019 17:09:16 -0500 Subject: [PATCH 3/3] unfix --- R/clonevol.r | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/R/clonevol.r b/R/clonevol.r index df88ea3..e69ffb4 100644 --- a/R/clonevol.r +++ b/R/clonevol.r @@ -1451,6 +1451,7 @@ cross.rule.score <- function(x, meta.p.method='fisher', exhaustive.mode=FALSE, for (s in samples){ mi = x$models[[s]][[x$match$index[i,s]]] mi = mi[!mi$excluded & !is.na(mi$parent), c('lab', 'p.value')] + colnames(mi) = c('lab', s) if (is.null(p)){ p = mi @@ -1465,10 +1466,6 @@ cross.rule.score <- function(x, meta.p.method='fisher', exhaustive.mode=FALSE, p$cmb.p = apply(p[,-1], 1, combine.p, method=meta.p.method) } # model score = max (combined p of each clone) - - # Teng's fix to a bug that the nrow of p doesn't agree with merged.tree - p = na.omit(p) # drop any row containing NA - x$matched$scores$max.clone.ccf.combined.p[i] = max(p$cmb.p) x$matched$merged.trees[[i]]$clone.ccf.combined.p = p$cmb.p # save the whole pvalue matrix