Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 56 additions & 46 deletions ase/fdr.R
Original file line number Diff line number Diff line change
@@ -1,73 +1,73 @@


sys = 'kalk'
sys = 'local'

if(sys == 'kalk'){
syntdir = '/proj/b2012046/edsgard/ase/sim/data/synt/ase'
ase.method.resdir = 'FILLMEIN'
}
if(sys == 'local'){
simdir = '~/Dropbox/postdoc/projects/ase/data/sim/synt'
snpdir = '~/Documents/postdoc/ase/sim/snps'
exprdir = file.path(simdir, 'expr')
asedir = file.path(simdir, 'ase')
number2sample.map.file = file.path(simdir, 'customref2enstcounts.filemapping.tab')
snp2count.file = file.path(simdir, 'snps', 'snp2count.RData')
snp2count.uniq.file = file.path(simdir, 'snps', 'snp2count.uniq.RData')
ase.res.file = file.path(asedir, 'ase.res.RData')
annovardir = '~/Dropbox/postdoc/projects/ase/data/sim/synt/annovar'
#annovar.tab.file = file.path(annovardir, 'synt.snps.tab.variant_function')
#annovar.file = file.path(annovardir, 'vars.refgene.annot.RData')
annovar.file = file.path(annovardir, 'synt.snps.geneannot.RData')
vars.annot.list.file = file.path(asedir, 'ase.res.annot.RData')
genes.pass.file = file.path(asedir, 'multisnp.genes.pass.RData')
nonuniq.genes.file = file.path(asedir, 'nonuniq.genes.RData')
induced.ase.res.file = file.path(asedir, 'induced.ase.res.RData')
induced.ase.sig.file = file.path(asedir, 'induced.ase.sig.RData')
syntdir = '~/Dropbox/postdoc/projects/ase/data/sim/synt/ase'
ase.method.resdir = '~/Documents/postdoc/ase/asebenchmark/novercontrol/data/gsnap/ase'
}

#Variant-based analysis
synt.sig.ase.res.file = file.path(syntdir, 'ase.noninduced.vars.RData')
sig.ase.res.file = file.path(ase.method.resdir, 'sig.ase.res.RData')

#Gene-based analysis
synt.genes.pass.file = file.path(syntdir, 'multisnp.genes.pass.RData')
method.genes.pass.file = file.path(ase.method.resdir, 'multisnp.genes.pass.RData')

#Cond-dep ASE
induced.ase.sig.file = file.path(syntdir, 'induced.ase.sig.RData')


main <- function(){


######
#1. Condition-INDEPENDENT ASE
######


###
#1.1 Variants
###
#Without test for sig diff number of treat vs untreat
ase.noninduced.vars.file = file.path(asedir, 'ase.noninduced.vars.RData')
load(ase.noninduced.vars.file) #alt.sig.vars

#Load "TRUTH"
load(synt.sig.ase.res.file) #sig.vars, alt.sig.vars
true.sig.vars = unique(unlist(lapply(sig.vars, '[[', 'chrpos')))
true.alt.sig.vars = alt.sig.vars
resdir = '~/Dropbox/postdoc/projects/ase/res/sim'
ase.cond.diff.file = file.path(resdir, 'ase.cond.diff.tab')
load(ase.cond.diff.file) #alt.sig.vars

#Load significant variants after application of ASE method
load(sig.ase.res.file) #sig.vars, alt.sig.vars
sig.vars = unique(unlist(lapply(sig.vars, rownames)))
sig.vars = gsub('^chr', '', sig.vars)
alt.sig.vars = gsub('^chr', '', alt.sig.vars)

#get fdr
#get fdr, alt allele direction filtered: NO
fdr = get.fdr(sig.vars, true.sig.vars)
print(fdr) #7.4%

#get fdr, alt allele direction filtered: YES
fdr = get.fdr(alt.sig.vars, true.alt.sig.vars)
print(fdr) #29%, 6049, 21092

#With test for sig diff treat vs untreat
ase.noninduced.vars.file = file.path(asedir, 'ase.noninduced.vars.RData')
load(ase.noninduced.vars.file)
true.sig.alt.ase = sig.alt.ase
resdir = '~/Dropbox/postdoc/projects/ase/res/sim'
ase.cond.diff.file = file.path(resdir, 'ase.cond.diff.tab')
load(ase.cond.diff.file) #sig.alt.ase
#get fdr
fdr = get.fdr(rownames(sig.alt.ase), rownames(true.sig.alt.ase))
print(fdr) #96%, 25, 26
print(fdr) #6.3%

#get sensitivity
sens = get.sense(alt.sig.vars, true.alt.sig.vars)
print(sens)

###
#1.2 Genes
###
#load data
load(genes.pass.file)

#Load "TRUTH"
load(synt.genes.pass.file) #genes.pass, gene2nsamples
true.genes.pass = genes.pass
true.gene2nsamples = gene2nsamples
run.ase.datadir = '~/Dropbox/postdoc/projects/ase/data/sim/ase'
genes.pass.file = file.path(run.ase.datadir, 'multisnp.genes.pass.RData')
load(genes.pass.file) #genes.pass

#Load significant variants after application of ASE method
load(method.genes.pass.file) #genes.pass, gene2nsamples

#get fdr, n.samples = 2
fdr = get.fdr(genes.pass, true.genes.pass)
print(fdr) #39%, 146, 373
Expand All @@ -87,8 +87,7 @@ main <- function(){
true.genes.pass = genes.pass
true.sig.vars.nsamples1 = sig.vars.nsamples1
true.genes.pass.nsamples1 = genes.pass.nsamples1
ase.datadir = '~/Dropbox/postdoc/projects/ase/data/sim/ase'
induced.ase.sig.file = file.path(ase.datadir, 'induced.ase.sig.RData')
induced.ase.sig.file = file.path(ase.method.resdir, 'induced.ase.sig.RData')
load(induced.ase.sig.file) #sig.vars, genes.pass

##
Expand Down Expand Up @@ -181,6 +180,17 @@ get.fdr <- function(feat.pass, true.feat.pass){
return(fdr)
}

get.sens <- function(feat.pass, true.feat.pass){
tp = intersect(feat.pass, true.feat.pass)
fn = setdiff(true.feat.pass, feat.pass)
n.tp = length(tp)
n.fn = length(fn)
sens = n.tp / (n.tp + n.fn)

sens = c(sens, n.tp, n.fn)
return(sens)
}

filter.altallele <- function(vars, frac.thr = 0.5, gene.col = 'gene annot', frac.col = 'frac', pval.col = 'mod.padj', min.alt = 1, alpha = 0.05){

sig.alt.vars = vars[which(vars[, pval.col] < alpha & vars[, frac.col] > frac.thr), ]
Expand Down
57 changes: 37 additions & 20 deletions ase/snv.ase.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@ sys = 'local'

if(sys == 'kalkyl'){
}
if(sys == 'local'){
varcalls.datadir = '~/Dropbox/postdoc/projects/ase/data/sim/varcalls'
basecount.datadir='~/Dropbox/postdoc/projects/ase/data/sim/basecount'
ase.datadir = '~/Dropbox/postdoc/projects/ase/data/sim/ase'
resdir = '~/Dropbox/postdoc/projects/ase/res/sim'
annovar.datadir = '~/Dropbox/postdoc/projects/ase/data/sim/annovar'
if(sys == 'local'){
basedir = '~/Documents/postdoc/ase/asebenchmark/novercontrol/data/gsnap'
varcalls.datadir = file.path(basedir, 'varcalls')
basecount.datadir = file.path(basedir, 'basecount')
annovar.datadir = file.path(basedir, 'annovar')
ase.datadir = file.path(basedir, 'ase')
resdir = '~/Dropbox/postdoc/projects/ase_msc_benchmarking/res/gsnap/ase'
dbsnpdir = '~/Documents/postdoc/ase/asebenchmark/novercontrol/data/annot/dbsnp'
}

#mpileup calls
Expand All @@ -21,14 +23,16 @@ vars.regex = '.*hetvars$'
#basecounts
bc.regex = '.*basecount$'

#annovar
#annot
annovar.file = file.path(annovar.datadir, 'annovar_all_variants.RData')
dbsnp.file = file.path(dbsnpdir, 'dbsnp.v135.common.pos.txt')


#Res
bc.file = file.path(basecount.datadir, 'bc.RData')
bc.dbsnp.file = file.path(basecount.datadir, 'bc.dbsnp.RData')
ase.res.file = file.path(ase.datadir, 'ase.res.RData')
ase.sample.cond.diff.file = file.path(ase.datadir, 'induced.ase.res.RData')
induced.ase.sig.file = file.path(ase.datadir, 'induced.ase.sig.RData')
sig.ase.res.file = file.path(ase.datadir, 'sig.ase.res.RData')

main <- function(){

Expand All @@ -53,7 +57,7 @@ main <- function(){

#Dump
save(basecount.list, file = bc.file)


###
#Filter
Expand All @@ -66,7 +70,20 @@ main <- function(){
bc.filt = lapply(bc.filt, function(bc){bc[which(bc[, 'site.dp'] >= min.dp), ]})

#total number of vars
length(unique(unlist(lapply(bc.filt, '[[', 'site')))) #112756
length(unique(unlist(lapply(bc.filt, '[[', 'site')))) #97628

#Filter on dbSNP presence
dbsnp.vars = read.table(dbsnp.file, stringsAsFactors = FALSE)
colnames(dbsnp.vars) = 'site'
bc.filt = lapply(bc.filt, function(jsample.vars, dbsnp.vars){merge(jsample.vars, dbsnp.vars, by = 'site')}, dbsnp.vars = dbsnp.vars)

#total number of vars
length(unique(unlist(lapply(bc.filt, '[[', 'site')))) #77969

#Dump
save(bc.filt, file = bc.dbsnp.file)



###
#DP dist
Expand Down Expand Up @@ -104,7 +121,7 @@ main <- function(){

#Dump
save(ase.res, file = ase.res.file)


###
#Sig hits stats
Expand All @@ -115,7 +132,14 @@ main <- function(){

#Get sig hits
sig.vars = lapply(ase.res, function(sample.vars, alpha, pval.col){sample.vars = sample.vars[which(sample.vars[, pval.col] <= alpha), ]; return(sample.vars);}, alpha = alpha, pval.col = pval.col)
sig.vars.mat = get.sig.mat(ase.res, alpha, pval.col = pval.col)

#Filter on alternative allele direction
alt.vars = lapply(sig.vars, alt.filter)
alt.sig.vars = unique(unlist(alt.vars))
length(alt.sig.vars) #17832

#Dump
save(sig.vars, alt.sig.vars, file = sig.ase.res.file)


###
Expand All @@ -129,14 +153,7 @@ main <- function(){

#number of uniq sig.vars
length(unique(unlist(lapply(sig.vars, '[[', 'site')))) #45952




#((see varstats.R for inspiration, or its enough with the stuff in multiplesnsps2gene.R))
#gene annot
#DE


###
#Gene based analysis.
Expand Down
10 changes: 5 additions & 5 deletions bam/bedtoolshist2pdf.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ main <- function(histfile2id, tab.outfile, hist.pdf.file){
hist.tabs = read.files(histfile2id)
dp2frac = get.dp2frac(hist.tabs)
dp2bases = get.dp2bases(hist.tabs)
plot.dp2frac(dp2frac, ltypes = histfile2id[names(dp2frac), 'ltypes'], colors = histfile2id[names(dp2frac), 'colors'])
plot.dp2bases(dp2bases)
plot.dp2y(dp2frac, ltypes = histfile2id[names(dp2frac), 'ltypes'], colors = histfile2id[names(dp2frac), 'colors'])
plot.dp2y(dp2bases)


#Plot
Expand All @@ -66,11 +66,11 @@ main <- function(histfile2id, tab.outfile, hist.pdf.file){
par(mfrow = c(1, 2))
unstim.ind = grep('unstim', names(dp2frac))
dp2frac.unstim = dp2frac[unstim.ind]
plot.dp2frac(dp2frac.unstim, ltypes = histfile2id[names(dp2frac.unstim), 'ltypes'], colors = histfile2id[names(dp2frac.unstim), 'colors'])
plot.dp2y(dp2frac.unstim, ltypes = histfile2id[names(dp2frac.unstim), 'ltypes'], colors = histfile2id[names(dp2frac.unstim), 'colors'])

lps.ind = grep('LPS', names(dp2frac))
dp2frac.lps = dp2frac[lps.ind]
plot.dp2frac(dp2frac.lps, ltypes = histfile2id[names(dp2frac.lps), 'ltypes'], colors = histfile2id[names(dp2frac.lps), 'colors'])
plot.dp2y(dp2frac.lps, ltypes = histfile2id[names(dp2frac.lps), 'ltypes'], colors = histfile2id[names(dp2frac.lps), 'colors'])
dev.off()
}

Expand Down Expand Up @@ -145,7 +145,7 @@ plot.dp2y <- function(dp2frac, max.plot.dp = 100, ylab = 'fraction covered', lty
n.hist = length(dp2frac)

ylim = range(unlist(dp2frac))
ylim = c(0, 5e8)
#ylim = c(0, 5e8)
jhist = 1
plot(dp2frac[[jhist]][min.plot.dp:max.plot.dp], xlab = 'depth', ylab = ylab, col=colors[jhist], type = 'l', ylim = ylim, lty = ltypes[jhist])
for(jhist in 2:n.hist){
Expand Down