diff --git a/R/clonevol.r b/R/clonevol.r index cae099c..e69ffb4 100644 --- a/R/clonevol.r +++ b/R/clonevol.r @@ -1451,6 +1451,7 @@ cross.rule.score <- function(x, meta.p.method='fisher', exhaustive.mode=FALSE, for (s in samples){ mi = x$models[[s]][[x$match$index[i,s]]] mi = mi[!mi$excluded & !is.na(mi$parent), c('lab', 'p.value')] + colnames(mi) = c('lab', s) if (is.null(p)){ p = mi @@ -2149,6 +2150,7 @@ infer.clonal.models <- function(c=NULL, variants=NULL, ignore.clusters=NULL, vaf.col.names=NULL, ccf.col.names=NULL, + post.sample.col=NULL, vaf.in.percent=TRUE, depth.col.names=NULL, weighted=FALSE, @@ -2303,6 +2305,7 @@ infer.clonal.models <- function(c=NULL, variants=NULL, boot = generate.boot(variants, vaf.col.names=vaf.col.names, depth.col.names=depth.col.names, vaf.in.percent=vaf.in.percent, + post.sample.col=post.sample.col, num.boots=num.boots, bootstrap.model=subclonal.test.model, cluster.center.method=cluster.center, @@ -3526,7 +3529,7 @@ sum.polyclonal <- function(x){ #' @export merge.samples #' @examples #' x2 = merge.samples(x, 1, c('C', 'M1197'), 'new', 'P', c('C_ref', 'M1197_ref'), c('C_var', 'M1197_var')) -merge.samples <- function(x, samples, new.sample, new.sample.group, +merge.samples <- function(x, samples, new.sample, new.sample.group, post.sample.col, ref.cols=NULL, var.cols=NULL){ if (!all(samples %in% names(x$models))){ stop('ERROR: Sample not found when merging!') @@ -3604,6 +3607,7 @@ merge.samples <- function(x, samples, new.sample, new.sample.group, # update bootstrap samples for the merged sample boot = generate.boot(x$variants, vaf.col.names=new.sample, + post.sample.col=post.sample.col, vaf.in.percent=x$params$vaf.in.percent, num.boots=x$params$num.boots, bootstrap.model=x$params$bootstrap.model, diff --git a/R/generate.boot.r b/R/generate.boot.r index 46630a4..e34204d 100644 --- a/R/generate.boot.r +++ b/R/generate.boot.r @@ -37,6 +37,7 @@ generate.boot <- function(variants, cluster.col.name='cluster', cluster.center.method='mean', + post.sample.col=NULL, vaf.col.names=NULL, depth.col.names=NULL, vaf.in.percent=TRUE, @@ -209,8 +210,13 @@ generate.boot <- function(variants, # generate bootstrap samples for each cluster, each sample num.variants.per.cluster = table(variants[[cluster.col.name]]) - #print(num.variants.per.cluster) - + + if (is.null(post.sample.col)) { + num.sample.per.cluster = num.variants.per.cluster + } else { + num.sample.per.cluster = table(variants[variants[[post.sample.col]] == FALSE,][[cluster.col.name]]) + } + if(!is.null(random.seed)){ set.seed(random.seed) } @@ -226,6 +232,7 @@ generate.boot <- function(variants, for (cl in clusters){ boot.size = num.variants.per.cluster[cl] + boot.sample.size = num.sample.per.cluster[cl] vafs = v[[cl]][[vaf.col.name]] # learn zero samples from data, @@ -352,10 +359,10 @@ generate.boot <- function(variants, # hdng: allow mean or median for non-parametric # TODO: allow this for all other models if (cluster.center.method == 'mean'){ - s.mean = mean(sample(v[[cl]][[vaf.col.name]], boot.size, + s.mean = mean(sample(v[[cl]][[vaf.col.name]], boot.sample.size, replace=TRUE, prob=depth)) }else if (cluster.center.method == 'median'){ - s.mean = median(sample(v[[cl]][[vaf.col.name]], boot.size, + s.mean = median(sample(v[[cl]][[vaf.col.name]], boot.sample.size, replace=TRUE, prob=depth)) } diff --git a/simulations/sim.r b/simulations/sim.r index 5bfae51..785c7e6 100755 --- a/simulations/sim.r +++ b/simulations/sim.r @@ -316,7 +316,6 @@ if (!is.na(rand.seed)){ set.seed(rand.seed) } - # read ground truth CCF x = read.table(subpop.file, header=TRUE, sep='\t', stringsAsFactors=FALSE, na.strings='')