From b4f71f7c376396f5d1bc025019d3930ab472f1f7 Mon Sep 17 00:00:00 2001
From: Teng Gao <gaot@juno.cbio.private>
Date: Mon, 21 Oct 2019 14:16:32 -0400
Subject: [PATCH 1/3] adding post.sample.col

---
 R/clonevol.r      |  5 ++++-
 R/generate.boot.r | 15 +++++++++++----
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/R/clonevol.r b/R/clonevol.r
index cae099c..4ae29ab 100644
--- a/R/clonevol.r
+++ b/R/clonevol.r
@@ -2149,6 +2149,7 @@ infer.clonal.models <- function(c=NULL, variants=NULL,
                                 ignore.clusters=NULL,
                                 vaf.col.names=NULL,
                                 ccf.col.names=NULL,
+                                post.sample.col=NULL,
                                 vaf.in.percent=TRUE,
                                 depth.col.names=NULL,
                                 weighted=FALSE,
@@ -2303,6 +2304,7 @@ infer.clonal.models <- function(c=NULL, variants=NULL,
                 boot = generate.boot(variants, vaf.col.names=vaf.col.names,
                                      depth.col.names=depth.col.names,
                                      vaf.in.percent=vaf.in.percent,
+                                     post.sample.col=post.sample.col,
                                      num.boots=num.boots,
                                      bootstrap.model=subclonal.test.model,
                                      cluster.center.method=cluster.center,
@@ -3526,7 +3528,7 @@ sum.polyclonal <- function(x){
 #' @export merge.samples
 #' @examples
 #' x2 = merge.samples(x, 1, c('C', 'M1197'), 'new', 'P', c('C_ref', 'M1197_ref'), c('C_var', 'M1197_var'))
-merge.samples <- function(x, samples, new.sample, new.sample.group,
+merge.samples <- function(x, samples, new.sample, new.sample.group, post.sample.col,
                             ref.cols=NULL, var.cols=NULL){
     if (!all(samples %in% names(x$models))){
         stop('ERROR: Sample not found when merging!')
@@ -3604,6 +3606,7 @@ merge.samples <- function(x, samples, new.sample, new.sample.group,
 
     # update bootstrap samples for the merged sample
     boot = generate.boot(x$variants, vaf.col.names=new.sample,
+                         post.sample.col=post.sample.col,
                         vaf.in.percent=x$params$vaf.in.percent,
                         num.boots=x$params$num.boots,
                         bootstrap.model=x$params$bootstrap.model,
diff --git a/R/generate.boot.r b/R/generate.boot.r
index 46630a4..e34204d 100644
--- a/R/generate.boot.r
+++ b/R/generate.boot.r
@@ -37,6 +37,7 @@
 generate.boot <- function(variants,
                           cluster.col.name='cluster',
                           cluster.center.method='mean',
+                          post.sample.col=NULL,
                           vaf.col.names=NULL,
                           depth.col.names=NULL,
                           vaf.in.percent=TRUE,
@@ -209,8 +210,13 @@ generate.boot <- function(variants,
 
     # generate bootstrap samples for each cluster, each sample
     num.variants.per.cluster = table(variants[[cluster.col.name]])
-    #print(num.variants.per.cluster)
-
+    
+    if (is.null(post.sample.col)) {
+        num.sample.per.cluster = num.variants.per.cluster
+    } else {
+        num.sample.per.cluster = table(variants[variants[[post.sample.col]] == FALSE,][[cluster.col.name]])
+    }
+    
     if(!is.null(random.seed)){
         set.seed(random.seed)
     }
@@ -226,6 +232,7 @@ generate.boot <- function(variants,
 
         for (cl in clusters){
             boot.size = num.variants.per.cluster[cl]
+            boot.sample.size = num.sample.per.cluster[cl]
             vafs = v[[cl]][[vaf.col.name]]
 
             # learn zero samples from data,
@@ -352,10 +359,10 @@ generate.boot <- function(variants,
                     # hdng: allow mean or median for non-parametric
                     # TODO: allow this for all other models
                     if (cluster.center.method == 'mean'){
-                        s.mean = mean(sample(v[[cl]][[vaf.col.name]], boot.size,
+                        s.mean = mean(sample(v[[cl]][[vaf.col.name]], boot.sample.size,
                           replace=TRUE, prob=depth))
                     }else if (cluster.center.method == 'median'){
-                        s.mean = median(sample(v[[cl]][[vaf.col.name]], boot.size,
+                        s.mean = median(sample(v[[cl]][[vaf.col.name]], boot.sample.size,
                           replace=TRUE, prob=depth))
 
                     }

From d01c9a5636dad6841a40069e88f0fe93dbb19f22 Mon Sep 17 00:00:00 2001
From: teng-gao <gaoteng@wustl.edu>
Date: Wed, 6 Nov 2019 16:38:27 -0500
Subject: [PATCH 2/3] fix combined p

---
 R/clonevol.r      | 4 ++++
 simulations/sim.r | 1 -
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/R/clonevol.r b/R/clonevol.r
index 4ae29ab..df88ea3 100644
--- a/R/clonevol.r
+++ b/R/clonevol.r
@@ -1465,6 +1465,10 @@ cross.rule.score <- function(x, meta.p.method='fisher', exhaustive.mode=FALSE,
                 p$cmb.p = apply(p[,-1], 1, combine.p, method=meta.p.method)
             }
             # model score = max (combined p of each clone)
+            
+            # Teng's fix to a bug that the nrow of p doesn't agree with merged.tree
+            p = na.omit(p) # drop any row containing NA
+            
             x$matched$scores$max.clone.ccf.combined.p[i] = max(p$cmb.p)
             x$matched$merged.trees[[i]]$clone.ccf.combined.p = p$cmb.p
             # save the whole pvalue matrix
diff --git a/simulations/sim.r b/simulations/sim.r
index 5bfae51..785c7e6 100755
--- a/simulations/sim.r
+++ b/simulations/sim.r
@@ -316,7 +316,6 @@ if (!is.na(rand.seed)){
     set.seed(rand.seed)
 }
 
-
 # read ground truth CCF
 x = read.table(subpop.file, header=TRUE, sep='\t', stringsAsFactors=FALSE,
                na.strings='')

From 091abf47d7eb6656af9e9cba992a1e32317eb06e Mon Sep 17 00:00:00 2001
From: teng-gao <gaoteng@wustl.edu>
Date: Sat, 9 Nov 2019 17:09:16 -0500
Subject: [PATCH 3/3] unfix

---
 R/clonevol.r | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/R/clonevol.r b/R/clonevol.r
index df88ea3..e69ffb4 100644
--- a/R/clonevol.r
+++ b/R/clonevol.r
@@ -1451,6 +1451,7 @@ cross.rule.score <- function(x, meta.p.method='fisher', exhaustive.mode=FALSE,
             for (s in samples){
                 mi = x$models[[s]][[x$match$index[i,s]]]
                 mi = mi[!mi$excluded & !is.na(mi$parent), c('lab', 'p.value')]
+                
                 colnames(mi) = c('lab', s)
                 if (is.null(p)){
                     p = mi
@@ -1465,10 +1466,6 @@ cross.rule.score <- function(x, meta.p.method='fisher', exhaustive.mode=FALSE,
                 p$cmb.p = apply(p[,-1], 1, combine.p, method=meta.p.method)
             }
             # model score = max (combined p of each clone)
-            
-            # Teng's fix to a bug that the nrow of p doesn't agree with merged.tree
-            p = na.omit(p) # drop any row containing NA
-            
             x$matched$scores$max.clone.ccf.combined.p[i] = max(p$cmb.p)
             x$matched$merged.trees[[i]]$clone.ccf.combined.p = p$cmb.p
             # save the whole pvalue matrix