-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathMigrationEffectOnAdmixture.Rmd
More file actions
638 lines (466 loc) · 35.1 KB
/
MigrationEffectOnAdmixture.Rmd
File metadata and controls
638 lines (466 loc) · 35.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
---
title: "Untitled"
author: "Aaron Wolf"
date: "9/27/2018"
output: html_document
---
```{r}
library(data.table)
library(ggplot2)
library(dplyr)
library(tidyr)
library(grid)
library(gridExtra)
```
```{r Coalescent Calls}
mdls <- list.files('~/DATALab/SimulatedDemographic/Sstar/chr1_variable_ref/simulations/Tenn/', pattern = 'n1_0.05')
dt.Coalescent <- data.table(NULL)
for (mdl in mdls){
print(mdl)
infile <- paste0('~/DATALab/SimulatedDemographic/Sstar/chr1_variable_ref/simulations/Tenn/',mdl)
chr_list <- fread(paste0(infile,'/Tenn.chr_list'), col.names = c('chr'), colClasses = c('character'))
###########
bedfile.EAS <- fread(input = paste0("gzcat ",infile,"/TreeCalls/*_ASN.gz"),
col.names = c('msp_ID_chr','start','end'))
bedfile.EAS[,len_bp:=end-start]
# bedfile.EAS <- bedfile.EAS %>% filter(len_bp>50000)
bedfile.EAS %<>% rowwise() %>% mutate(chr=strsplit(msp_ID_chr,split = '_')[[1]][[3]]) %>% as.data.table()
bedfile.EAS.chr <- bedfile.EAS %>% group_by(chr) %>% summarise(admix = sum(as.numeric(len_bp))/1008/1000000) %>% as.data.table()
bedfile.EAS.chr <- right_join(x=bedfile.EAS.chr, y = chr_list,by = c('chr')) %>% replace_na(list(admix=0)) %>% as.data.table()
admix.EAS = mean(bedfile.EAS.chr$admix)
ci.EAS = paste0(t.test(x = bedfile.EAS.chr$admix)$conf.int[1], ' : ',t.test(x = bedfile.EAS.chr$admix)$conf.int[2])
sd.EAS = sd(bedfile.EAS.chr$admix)
###########
bedfile.EUR <- fread(paste0('gzcat ',infile,'/TreeCalls/*_EUR.gz'),
col.names = c('msp_ID_chr','start','end'))
bedfile.EUR[,len_bp:=end-start]
# bedfile.EUR <- bedfile.EUR %>% filter(len_bp>50000)
bedfile.EUR %<>% rowwise() %>% mutate(chr=strsplit(msp_ID_chr,split = '_')[[1]][[3]]) %>% as.data.table()
bedfile.EUR.chr <- bedfile.EUR %>% group_by(chr) %>% summarise(admix = sum(as.numeric(len_bp))/1006/1000000) %>% as.data.table()
bedfile.EUR.chr <- right_join(x=bedfile.EUR.chr, y = chr_list,by = c('chr')) %>% replace_na(list(admix=0)) %>% as.data.table()
admix.EUR = mean(bedfile.EUR.chr$admix)
ci.EUR = paste0(t.test(x = bedfile.EUR.chr$admix)$conf.int[1], ' : ',t.test(x = bedfile.EUR.chr$admix)$conf.int[2])
sd.EUR = sd(bedfile.EUR.chr$admix)
###########
bedfile.ALL <- rbind(bedfile.EAS, bedfile.EUR)
bedfile.ALL.chr <- bedfile.ALL %>% group_by(chr) %>% summarise(admix = sum(as.numeric(len_bp))/2014/1000000) %>% as.data.table()
bedfile.ALL.chr <- right_join(x=bedfile.ALL.chr, y = chr_list,by = c('chr')) %>% replace_na(list(admix=0)) %>% as.data.table()
admix.ALL = mean(bedfile.ALL.chr$admix)
ci.ALL = paste0(t.test(x = bedfile.ALL.chr$admix)$conf.int[1],' : ',t.test(x = bedfile.ALL.chr$admix)$conf.int[2])
sd.ALL = sd(bedfile.ALL.chr$admix)
###########
row <- data.table(mdl,admix.ALL,ci.ALL,sd.ALL,admix.EAS,ci.EAS,sd.EAS,admix.EUR,ci.EUR,sd.EUR,(admix.EAS/admix.EUR),t.test(bedfile.EAS.chr$admix, bedfile.EUR.chr$admix)$p.value)
dt.Coalescent <- rbind(dt.Coalescent, row)
}
dt.Coalescent %>% setnames(c('mdl','admix.ALL','ci.ALL','sd.ALL','admix.EAS','ci.EAS','sd.EAS','admix.EUR','ci.EUR','sd.EUR','EAS:EUR','t.test.pval'))
dt.Coalescent[,c('ci.EUR.lwr', 'ci.EUR.upr') := tstrsplit(ci.EUR, ' : ',fixed=TRUE)]
dt.Coalescent[,c('ci.EAS.lwr', 'ci.EAS.upr') := tstrsplit(ci.EAS, ' : ',fixed=TRUE)]
dt.Coalescent[,c('ci.ALL.lwr', 'ci.ALL.upr') := tstrsplit(ci.ALL, ' : ',fixed=TRUE)]
```
```{r Coalescent data graph}
ggplot() + theme_bw() +
geom_point(data = dt.Coalescent, aes(x=as.numeric(admix.ALL), y=factor(mdl, levels = rev(mdls))), color='blue', size=3 ) +
geom_errorbarh(data = dt.Coalescent, aes(x=as.numeric(admix.ALL), xmin=as.numeric(ci.ALL.lwr),
xmax=as.numeric(ci.ALL.upr),
y=factor(mdl, levels = rev(mdls))), color='blue', size=1 ) +
geom_point(data = dt.Coalescent, aes(x=as.numeric(admix.EAS), y=factor(mdl, levels = rev(mdls))), color='orange', size=3 ) +
geom_errorbarh(data = dt.Coalescent, aes(x=as.numeric(admix.EAS),
xmin=as.numeric(ci.EAS.lwr), xmax=as.numeric(ci.EAS.upr),
y=factor(mdl, levels = rev(mdls))), color='orange', size=1 ) +
geom_point(data = dt.Coalescent, aes(x=as.numeric(admix.EUR), y=factor(mdl, levels = rev(mdls))), color='darkgreen', size=3 ) +
geom_errorbarh(data = dt.Coalescent, aes(x=as.numeric(admix.EUR),
xmin=as.numeric(ci.EUR.lwr), xmax=as.numeric(ci.EUR.upr),
y=factor(mdl, levels = rev(mdls))), color='darkgreen', size=1 ) +
geom_hline(yintercept = 8.5, linetype='dashed', color='darkgrey', size=2) +
geom_hline(yintercept = 16.5, linetype='dashed', color='darkgrey', size=2) +
xlab(label = 'admixture fraction estimate') +
ylab(label = 'admixture_migration model') +
ggtitle(label = 'Admixture Estimate from Coalescent Tree Calls') +
theme()
```
```{r S* calls}
admix <- 'n1_0.05'
mdls <- list.files(paste0('~/DATALab/SimulatedDemographic/Sstar/chr1_variable_ref/simulations/Sriram/', admix), pattern = 'n1_0.05')
dt.Sstar <- data.table(NULL)
for (mdl in mdls){
print(mdl)
infile <- paste0('~/DATALab/SimulatedDemographic/Sstar/chr1_variable_ref/simulations/Sriram/',admix,'/', mdl)
chr_list <- fread(paste0(infile,'/Sriram.chr_list'), col.names = c('chr'), colClasses = c('character'))
###########
bedfile.EAS <- fread(input = paste0("gzcat ",infile,"/bedfiles/ALL.merged_ASN.gz"),
col.names = c('msp_ID_chr','start','end'))
bedfile.EAS[,len_bp:=end-start]
bedfile.EAS %<>% rowwise() %>% mutate(chr=strsplit(msp_ID_chr,split = '_')[[1]][[3]]) %>% as.data.table()
bedfile.EAS.chr <- bedfile.EAS %>% group_by(chr) %>% summarise(admix = sum(as.numeric(len_bp))/1008/1000000) %>% as.data.table()
bedfile.EAS.chr <- right_join(x=bedfile.EAS.chr, y = chr_list,by = c('chr')) %>% replace_na(list(admix=0)) %>% as.data.table()
admix.EAS = mean(bedfile.EAS.chr$admix)
ci.EAS = paste0(t.test(x = bedfile.EAS.chr$admix)$conf.int[1], ' : ',t.test(x = bedfile.EAS.chr$admix)$conf.int[2])
sd.EAS = sd(bedfile.EAS.chr$admix)
###########
bedfile.EUR <- fread(paste0('gzcat ',infile,'/bedfiles/ALL.merged_EUR.gz'),
col.names = c('msp_ID_chr','start','end'))
bedfile.EUR[,len_bp:=end-start]
bedfile.EUR %<>% rowwise() %>% mutate(chr=strsplit(msp_ID_chr,split = '_')[[1]][[3]]) %>% as.data.table()
bedfile.EUR.chr <- bedfile.EUR %>% group_by(chr) %>% summarise(admix = sum(as.numeric(len_bp))/1006/1000000) %>% as.data.table()
bedfile.EUR.chr <- right_join(x=bedfile.EUR.chr, y = chr_list,by = c('chr')) %>% replace_na(list(admix=0)) %>% as.data.table()
admix.EUR = mean(bedfile.EUR.chr$admix)
ci.EUR = paste0(t.test(x = bedfile.EUR.chr$admix)$conf.int[1], ' : ',t.test(x = bedfile.EUR.chr$admix)$conf.int[2])
sd.EUR = sd(bedfile.EUR.chr$admix)
###########
bedfile.ALL <- rbind(bedfile.EAS, bedfile.EUR)
bedfile.ALL.chr <- bedfile.ALL %>% group_by(chr) %>% summarise(admix = sum(as.numeric(len_bp))/2014/1000000) %>% as.data.table()
bedfile.ALL.chr <- right_join(x=bedfile.ALL.chr, y = chr_list,by = c('chr')) %>% replace_na(list(admix=0)) %>% as.data.table()
admix.ALL = mean(bedfile.ALL.chr$admix)
ci.ALL = paste0(t.test(x = bedfile.ALL.chr$admix)$conf.int[1],' : ',t.test(x = bedfile.ALL.chr$admix)$conf.int[2])
sd.ALL = sd(bedfile.ALL.chr$admix)
###########
row <- data.table(mdl,admix.ALL,ci.ALL,sd.ALL,admix.EAS,ci.EAS,sd.EAS,admix.EUR,ci.EUR,sd.EUR,(admix.EAS/admix.EUR),t.test(bedfile.EAS.chr$admix, bedfile.EUR.chr$admix)$p.value)
dt.Sstar <- rbind(dt.Sstar, row)
}
dt.Sstar %>% setnames(c('mdl','admix.ALL','ci.ALL','sd.ALL','admix.EAS','ci.EAS','sd.EAS','admix.EUR','ci.EUR','sd.EUR','EAS:EUR','t.test.pval'))
dt.Sstar[,c('ci.EUR.lwr', 'ci.EUR.upr') := tstrsplit(ci.EUR, ' : ',fixed=TRUE)]
dt.Sstar[,c('ci.EAS.lwr', 'ci.EAS.upr') := tstrsplit(ci.EAS, ' : ',fixed=TRUE)]
dt.Sstar[,c('ci.ALL.lwr', 'ci.ALL.upr') := tstrsplit(ci.ALL, ' : ',fixed=TRUE)]
```
```{r S* diploid calls}
admix <- 'n1_0.05'
mdls <- list.files(paste0('~/DATALab/SimulatedDemographic/Sstar/chr1_variable_ref/simulations/Sriram/', admix), pattern = 'n1_0.05')
dt.Sstar.diploid <- data.table(NULL)
for (mdl in mdls){
print(mdl)
infile <- paste0('~/DATALab/SimulatedDemographic/Sstar/chr1_variable_ref/simulations/Sriram/',admix,'/', mdl)
chr_list <- fread(paste0(infile,'/Sriram.chr_list'), col.names = c('chr'), colClasses = c('character'))
###########
bedfile.EAS <- fread(input = paste0("cat ",infile,"/bedfiles/ALL.merged_ASN.diploid"),
col.names = c('msp_ID_chr','start','end'))
bedfile.EAS[,len_bp:=end-start]
bedfile.EAS %<>% rowwise() %>% mutate(chr=strsplit(msp_ID_chr,split = '_')[[1]][[2]]) %>% as.data.table()
bedfile.EAS.chr <- bedfile.EAS %>% group_by(chr) %>% summarise(admix = sum(as.numeric(len_bp))/504/1000000) %>% as.data.table()
bedfile.EAS.chr <- right_join(x=bedfile.EAS.chr, y = chr_list,by = c('chr')) %>% replace_na(list(admix=0)) %>% as.data.table()
admix.EAS = mean(bedfile.EAS.chr$admix)
ci.EAS = paste0(t.test(x = bedfile.EAS.chr$admix)$conf.int[1], ' : ',t.test(x = bedfile.EAS.chr$admix)$conf.int[2])
sd.EAS = sd(bedfile.EAS.chr$admix)
###########
bedfile.EUR <- fread(paste0('cat ',infile,'/bedfiles/ALL.merged_EUR.diploid'),
col.names = c('msp_ID_chr','start','end'))
bedfile.EUR[,len_bp:=end-start]
bedfile.EUR %<>% rowwise() %>% mutate(chr=strsplit(msp_ID_chr,split = '_')[[1]][[2]]) %>% as.data.table()
bedfile.EUR.chr <- bedfile.EUR %>% group_by(chr) %>% summarise(admix = sum(as.numeric(len_bp))/503/1000000) %>% as.data.table()
bedfile.EUR.chr <- right_join(x=bedfile.EUR.chr, y = chr_list,by = c('chr')) %>% replace_na(list(admix=0)) %>% as.data.table()
admix.EUR = mean(bedfile.EUR.chr$admix)
ci.EUR = paste0(t.test(x = bedfile.EUR.chr$admix)$conf.int[1], ' : ',t.test(x = bedfile.EUR.chr$admix)$conf.int[2])
sd.EUR = sd(bedfile.EUR.chr$admix)
###########
bedfile.ALL <- rbind(bedfile.EAS, bedfile.EUR)
bedfile.ALL.chr <- bedfile.ALL %>% rename(chr=msp_ID_chr) %>% group_by(chr) %>% summarise(admix = sum(as.numeric(len_bp))/1007/1000000) %>% as.data.table()
bedfile.ALL.chr <- right_join(x=bedfile.ALL.chr, y = chr_list,by = c('chr')) %>% replace_na(list(admix=0)) %>% as.data.table()
admix.ALL = mean(bedfile.ALL.chr$admix)
ci.ALL = paste0(t.test(x = bedfile.ALL.chr$admix)$conf.int[1],' : ',t.test(x = bedfile.ALL.chr$admix)$conf.int[2])
sd.ALL = sd(bedfile.ALL.chr$admix)
###########
row <- data.table(mdl,admix.ALL,ci.ALL,sd.ALL,admix.EAS,ci.EAS,sd.EAS,admix.EUR,ci.EUR,sd.EUR,(admix.EAS/admix.EUR),t.test(bedfile.EAS.chr$admix, bedfile.EUR.chr$admix)$p.value)
dt.Sstar.diploid <- rbind(dt.Sstar.diploid, row)
}
dt.Sstar.diploid %>% setnames(c('mdl','admix.ALL','ci.ALL','sd.ALL','admix.EAS','ci.EAS','sd.EAS','admix.EUR','ci.EUR','sd.EUR','EAS:EUR','t.test.pval'))
dt.Sstar.diploid[,c('ci.EUR.lwr', 'ci.EUR.upr') := tstrsplit(ci.EUR, ' : ',fixed=TRUE)]
dt.Sstar.diploid[,c('ci.EAS.lwr', 'ci.EAS.upr') := tstrsplit(ci.EAS, ' : ',fixed=TRUE)]
dt.Sstar.diploid[,c('ci.ALL.lwr', 'ci.ALL.upr') := tstrsplit(ci.ALL, ' : ',fixed=TRUE)]
```
```{r S* calls graph}
ggplot() + theme_bw() +
geom_point(data = dt.Sstar, aes(x=as.numeric(admix.ALL), y=factor(mdl, levels = rev(mdls))), color='blue', size=3 ) +
geom_errorbarh(data = dt.Sstar, aes(x=as.numeric(admix.ALL), xmin=as.numeric(ci.ALL.lwr),
xmax=as.numeric(ci.ALL.upr),
y=factor(mdl, levels = rev(mdls))), color='blue', size=1 ) +
geom_point(data = dt.Sstar, aes(x=as.numeric(admix.EAS), y=factor(mdl, levels = rev(mdls))), color='orange', size=3 ) +
geom_errorbarh(data = dt.Sstar, aes(x=as.numeric(admix.EAS),
xmin=as.numeric(ci.EAS.lwr), xmax=as.numeric(ci.EAS.upr),
y=factor(mdl, levels = rev(mdls))), color='orange', size=1 ) +
geom_point(data = dt.Sstar, aes(x=as.numeric(admix.EUR), y=factor(mdl, levels = rev(mdls))), color='darkgreen', size=3 ) +
geom_errorbarh(data = dt.Sstar, aes(x=as.numeric(admix.EUR),
xmin=as.numeric(ci.EUR.lwr), xmax=as.numeric(ci.EUR.upr),
y=factor(mdl, levels = rev(mdls))), color='darkgreen', size=1 ) +
geom_hline(yintercept = 8.5, linetype='dashed', color='darkgrey', size=2) +
geom_hline(yintercept = 16.5, linetype='dashed', color='darkgrey', size=2) +
xlab(label = 'admixture fraction estimate') +
ylab(label = 'admixture_migration model') +
ggtitle(label = 'Admixture Estimate from Sstar Calls') +
theme()
```
```{r IBDmix Calls}
admix <- 'n1_0.05'
mdls <- list.files(paste0('~/DATALab/SimulatedDemographic/Sstar/chr1_variable_ref/simulations/Sriram/',admix), pattern = 'n1_0.05')
dt.IBDmix <- data.table(NULL)
for (mdl in mdls){
print(mdl)
infile <- paste0('~/DATALab/SimulatedDemographic/Sstar/chr1_variable_ref/simulations/Sriram/',admix,'/',mdl)
chr_list <- fread(paste0(infile,'/Sriram.chr_list'), col.names = c('chr'), colClasses = c('integer')) %>% separate(col = 'chr', into = c('chr','pool')) %>% rowwise() %>% mutate(chr=as.integer(chr)) %>% as.data.table()
###########
bedfile.EAS <- fread(input = paste0("cat ",infile,"/IBDmixCalls/IntroSeg.ASN.D4E0.002.ALL.txt"),
col.names = c('Ind','chr','start','end','LOD1','LOD2'))
bedfile.EAS[,len_bp:=end-start]
bedfile.EAS <- bedfile.EAS %>% filter(len_bp>=30000)
#bedfile.EAS %<>% rowwise() %>% mutate(chr=strsplit(msp_ID_chr,split = '_')[[1]][[3]]) %>% as.data.table()
bedfile.EAS.chr <- bedfile.EAS %>% group_by(chr) %>% summarise(admix = sum(as.numeric(len_bp))/504/1000000) %>% as.data.table()
bedfile.EAS.chr <- right_join(x=bedfile.EAS.chr, y = chr_list, by = c('chr')) %>% replace_na(list(admix=0)) %>% as.data.table()
admix.EAS = mean(bedfile.EAS.chr$admix)
ci.EAS = paste0(t.test(x = bedfile.EAS.chr$admix)$conf.int[1], ' : ',t.test(x = bedfile.EAS.chr$admix)$conf.int[2])
sd.EAS = sd(bedfile.EAS.chr$admix)
###########
bedfile.EUR <- fread(paste0('cat ',infile,'/IBDmixCalls/IntroSeg.EUR.D4E0.002.ALL.txt'),
col.names = c('Ind','chr','start','end','LOD1','LOD2'))
bedfile.EUR[,len_bp:=end-start]
bedfile.EUR <- bedfile.EUR %>% filter(len_bp>=30000)
#bedfile.EUR %<>% rowwise() %>% mutate(chr=strsplit(msp_ID_chr,split = '_')[[1]][[3]]) %>% as.data.table()
bedfile.EUR.chr <- bedfile.EUR %>% group_by(chr) %>% summarise(admix = sum(as.numeric(len_bp))/503/1000000) %>% as.data.table()
bedfile.EUR.chr <- right_join(x=bedfile.EUR.chr, y = chr_list,by = c('chr')) %>% replace_na(list(admix=0)) %>% as.data.table()
admix.EUR = mean(bedfile.EUR.chr$admix)
ci.EUR = paste0(t.test(x = bedfile.EUR.chr$admix)$conf.int[1], ' : ',t.test(x = bedfile.EUR.chr$admix)$conf.int[2])
sd.EUR = sd(bedfile.EUR.chr$admix)
###########
bedfile.AFR <- fread(paste0('cat ',infile,'/IBDmixCalls/IntroSeg.AFR.D4E0.002.ALL.txt'),
col.names = c('Ind','chr','start','end','LOD1','LOD2'))
bedfile.AFR[,len_bp:=end-start]
bedfile.AFR <- bedfile.AFR %>% filter(len_bp>=30000)
#bedfile.EUR %<>% rowwise() %>% mutate(chr=strsplit(msp_ID_chr,split = '_')[[1]][[3]]) %>% as.data.table()
bedfile.AFR.chr <- bedfile.AFR %>% group_by(chr) %>% summarise(admix = sum(as.numeric(len_bp))/108/1000000) %>% as.data.table()
bedfile.AFR.chr <- right_join(x=bedfile.AFR.chr, y = chr_list,by = c('chr')) %>% replace_na(list(admix=0)) %>% as.data.table()
admix.AFR = mean(bedfile.AFR.chr$admix)
ci.AFR = paste0(t.test(x = bedfile.AFR.chr$admix)$conf.int[1], ' : ',t.test(x = bedfile.AFR.chr$admix)$conf.int[2])
sd.AFR = sd(bedfile.AFR.chr$admix)
###########
bedfile.ALL <- rbind(bedfile.EAS, bedfile.EUR, bedfile.AFR)
bedfile.ALL.chr <- bedfile.ALL %>% group_by(chr) %>% summarise(admix = sum(as.numeric(len_bp))/1115/1000000) %>% as.data.table()
bedfile.ALL.chr <- right_join(x=bedfile.ALL.chr, y = chr_list,by = c('chr')) %>% replace_na(list(admix=0)) %>% as.data.table()
admix.ALL = mean(bedfile.ALL.chr$admix)
ci.ALL = paste0(t.test(x = bedfile.ALL.chr$admix)$conf.int[1],' : ',t.test(x = bedfile.ALL.chr$admix)$conf.int[2])
sd.ALL = sd(bedfile.ALL.chr$admix)
###########
row <- data.table(mdl,admix.ALL,ci.ALL,sd.ALL,admix.EAS,ci.EAS,sd.EAS,admix.EUR,ci.EUR,sd.EUR,admix.AFR,ci.AFR,sd.AFR, (admix.EAS/admix.EUR),t.test(bedfile.EAS.chr$admix, bedfile.EUR.chr$admix)$p.value)
dt.IBDmix <- rbind(dt.IBDmix, row)
}
dt.IBDmix %>% setnames(c('mdl','admix.ALL','ci.ALL','sd.ALL','admix.EAS','ci.EAS','sd.EAS','admix.EUR','ci.EUR','sd.EUR','admix.AFR','ci.AFR','sd.AFR','EAS:EUR','t.test.pval'))
dt.IBDmix[,c('ci.EUR.lwr', 'ci.EUR.upr') := tstrsplit(ci.EUR, ' : ',fixed=TRUE)]
dt.IBDmix[,c('ci.EAS.lwr', 'ci.EAS.upr') := tstrsplit(ci.EAS, ' : ',fixed=TRUE)]
dt.IBDmix[,c('ci.AFR.lwr', 'ci.AFR.upr') := tstrsplit(ci.AFR, ' : ',fixed=TRUE)]
dt.IBDmix[,c('ci.ALL.lwr', 'ci.ALL.upr') := tstrsplit(ci.ALL, ' : ',fixed=TRUE)]
```
```{r Combined graph}
ggplot() + theme_bw() +
geom_point(data = dt.Sstar, aes(x=as.numeric(admix.ALL), y=factor(mdl, levels = rev(mdls)), color='ALL'), size=3 ) +
geom_errorbarh(data = dt.Sstar, aes(x=as.numeric(admix.ALL), xmin=as.numeric(ci.ALL.lwr),
xmax=as.numeric(ci.ALL.upr),
y=factor(mdl, levels = rev(mdls)), color='ALL'), size=1 ) +
geom_point(data = dt.Sstar, aes(x=as.numeric(admix.EAS), y=factor(mdl, levels = rev(mdls)), color='EAS'), size=3 ) +
geom_errorbarh(data = dt.Sstar, aes(x=as.numeric(admix.EAS),
xmin=as.numeric(ci.EAS.lwr), xmax=as.numeric(ci.EAS.upr),
y=factor(mdl, levels = rev(mdls)), color='EAS'), size=1 ) +
geom_point(data = dt.Sstar, aes(x=as.numeric(admix.EUR), y=factor(mdl, levels = rev(mdls)), color='EUR'), size=3 ) +
geom_errorbarh(data = dt.Sstar, aes(x=as.numeric(admix.EUR),
xmin=as.numeric(ci.EUR.lwr), xmax=as.numeric(ci.EUR.upr),
y=factor(mdl, levels = rev(mdls)), color='EUR'), size=1 ) +
###############################
geom_point(data = dt.Coalescent, aes(x=as.numeric(admix.ALL), y=factor(mdl, levels = rev(mdls)), color='ALL'), size=3 ) +
geom_errorbarh(data = dt.Coalescent, aes(x=as.numeric(admix.ALL), xmin=as.numeric(ci.ALL.lwr),
xmax=as.numeric(ci.ALL.upr),
y=factor(mdl, levels = rev(mdls)), color='ALL'), size=1 ) +
geom_point(data = dt.Coalescent, aes(x=as.numeric(admix.EAS), y=factor(mdl, levels = rev(mdls)), color='EAS'), size=3 ) +
geom_errorbarh(data = dt.Coalescent, aes(x=as.numeric(admix.EAS),
xmin=as.numeric(ci.EAS.lwr), xmax=as.numeric(ci.EAS.upr),
y=factor(mdl, levels = rev(mdls)), color='EAS'), size=1 ) +
geom_point(data = dt.Coalescent, aes(x=as.numeric(admix.EUR), y=factor(mdl, levels = rev(mdls)), color='EUR'), size=3 ) +
geom_errorbarh(data = dt.Coalescent, aes(x=as.numeric(admix.EUR),
xmin=as.numeric(ci.EUR.lwr), xmax=as.numeric(ci.EUR.upr),
y=factor(mdl, levels = rev(mdls)), color='EUR'), size=1 ) +
################################
geom_hline(yintercept = 8.5, linetype='dashed', color='darkgrey', size=2) +
geom_hline(yintercept = 16.5, linetype='dashed', color='darkgrey', size=2) +
xlab(label = 'admixture fraction estimate') +
ylab(label = 'admixture_migration model') +
ggtitle(label = 'Admixture Estimate from Sstar and Coalescent Tree Calls') +
scale_color_manual("",values = c('blue','darkgreen','orange'), breaks=c('ALL','EAS','EUR')) +
theme()
```
```{r Recent migration graph}
modern.mdls <- mdls[c(1:9)]
ancient.mdls <- mdls[c(1,10:17)]
modern.mig <- data.table(mig=c(0.0, 0.00000001, 0.0000001, 0.000001, 0.00001, 0.000025, 0.00005, 0.0001, 0.0005))
ancient.mig <- data.table(mig=c(0.0, 0.00000001, 0.0000001, 0.000001, 0.00001, 0.00005, 0.0001, 0.00015, 0.0005))
# modern.mdls <- mdls[c(1:8)]
# ancient.mdls <- mdls[c(1,9:15)]
# modern.mig <- data.table(mig=c(0.0, 0.0000001, 0.000001, 0.00001, 0.000025, 0.00005, 0.0001, 0.0005))
# ancient.mig <- data.table(mig=c(0.0, 0.0000001, 0.000001, 0.00001, 0.00005, 0.0001, 0.00015, 0.0005))
dt.Sstar.modern <- cbind(dt.Sstar[mdl %in% modern.mdls], modern.mig)
#dt.Coalescent.modern <- cbind(dt.Coalescent[mdl %in% modern.mdls], modern.mig)
dt.IBDmix.modern <- cbind(dt.IBDmix[mdl %in% modern.mdls], modern.mig)
dt.Sstar.diploid.modern <- cbind(dt.Sstar.diploid[mdl %in% modern.mdls], modern.mig)
dt.Sstar.ancient <- cbind(dt.Sstar[mdl %in% ancient.mdls], ancient.mig)
#dt.Coalescent.ancient <- cbind(dt.Coalescent[mdl %in% ancient.mdls], ancient.mig)
dt.IBDmix.ancient <- cbind(dt.IBDmix[mdl %in% ancient.mdls], modern.mig)
dt.Sstar.diploid.ancient <- cbind(dt.Sstar.diploid[mdl %in% ancient.mdls], ancient.mig)
################################
################################
plot.modern_migration <-
ggplot() + theme_bw() +
# geom_point(data = dt.Sstar.modern, aes(y=as.numeric(admix.ALL), x=mdl, color='ALL'), size=3 ) +
# geom_errorbar(data = dt.Sstar.modern, aes(y=as.numeric(admix.ALL),
# ymin=as.numeric(ci.ALL.lwr), ymax=as.numeric(ci.ALL.upr),
# x=mdl, color='ALL'), size=1 ) +
##############################
geom_point(data = dt.IBDmix.modern, aes(y=as.numeric(admix.EAS), x=mdl, color='EAS.IBDmix'), size=6 ) +
geom_errorbar(data = dt.IBDmix.modern, aes(y=as.numeric(admix.EAS),
ymin=as.numeric(ci.EAS.lwr), ymax=as.numeric(ci.EAS.upr),
x=mdl, color='EAS.IBDmix'), size=1.5 ) +
geom_line(data = dt.IBDmix.modern, aes(y=as.numeric(admix.EAS), x=mdl, color='EAS.IBDmix', group=1), size=1 ) +
geom_point(data = dt.IBDmix.modern, aes(y=as.numeric(admix.EUR), x=mdl, color='EUR.IBDmix'), size=6 ) +
geom_errorbar(data = dt.IBDmix.modern, aes(y=as.numeric(admix.EUR),
ymin=as.numeric(ci.EUR.lwr), ymax=as.numeric(ci.EUR.upr),
x=mdl, color='EUR.IBDmix'), size=1.5 ) +
geom_line(data = dt.IBDmix.modern, aes(y=as.numeric(admix.EUR), x=mdl, color='EUR.IBDmix', group=1), size=1 ) +
geom_point(data = dt.IBDmix.modern, aes(y=as.numeric(admix.AFR), x=mdl, color='AFR.IBDmix'), size=6 ) +
geom_errorbar(data = dt.IBDmix.modern, aes(y=as.numeric(admix.AFR),
ymin=as.numeric(ci.AFR.lwr), ymax=as.numeric(ci.AFR.upr),
x=mdl, color='AFR.IBDmix'), size=1.5 ) +
geom_line(data = dt.IBDmix.modern, aes(y=as.numeric(admix.AFR), x=mdl, color='AFR.IBDmix', group=1), size=1 ) +
###############################
# geom_point(data = dt.Coalescent.modern, aes(y=as.numeric(admix.ALL), x=mdl, color='ALL'), size=3 ) +
# geom_errorbar(data = dt.Coalescent.modern, aes(y=as.numeric(admix.ALL),
# ymin=as.numeric(ci.ALL.lwr), ymax=as.numeric(ci.ALL.upr),
# x=mdl, color='ALL'), size=1 ) +
# geom_point(data = dt.Coalescent.modern, aes(y=as.numeric(admix.EAS), x=mdl, color='EAS'), size=5 ) +
# geom_errorbar(data = dt.Coalescent.modern, aes(y=as.numeric(admix.EAS),
# ymin=as.numeric(ci.EAS.lwr), ymax=as.numeric(ci.EAS.upr),
# x=mdl, color='EAS'), size=3 ) +
#
# geom_point(data = dt.Coalescent.modern, aes(y=as.numeric(admix.EUR), x=mdl, color='EUR'), size=5 ) +
# geom_errorbar(data = dt.Coalescent.modern, aes(y=as.numeric(admix.EUR),
# ymin=as.numeric(ci.EUR.lwr), ymax=as.numeric(ci.EUR.upr),
# x=mdl, color='EUR'), size=3 ) +
geom_point(data = dt.Sstar.diploid.modern, aes(y=as.numeric(admix.EAS), x=mdl, color='EAS.Sstar.diploid'), size=6 ) +
geom_errorbar(data = dt.Sstar.diploid.modern, aes(y=as.numeric(admix.EAS),
ymin=as.numeric(ci.EAS.lwr), ymax=as.numeric(ci.EAS.upr),
x=mdl, color='EAS.Sstar.diploid'), size=1.5 ) +
geom_line(data = dt.Sstar.diploid.modern, aes(y=as.numeric(admix.EAS), x=mdl, color='EAS.Sstar.diploid', group=1), size=1 ) +
geom_point(data = dt.Sstar.diploid.modern, aes(y=as.numeric(admix.EUR), x=mdl, color='EUR.Sstar.diploid'), size=6 ) +
geom_errorbar(data = dt.Sstar.diploid.modern, aes(y=as.numeric(admix.EUR),
ymin=as.numeric(ci.EUR.lwr), ymax=as.numeric(ci.EUR.upr),
x=mdl, color='EUR.Sstar.diploid'), size=1.5 ) +
geom_line(data = dt.Sstar.diploid.modern, aes(y=as.numeric(admix.EUR), x=mdl, color='EUR.Sstar.diploid', group=1), size=1 ) +
##############################
##############################
geom_point(data = dt.Sstar.modern, aes(y=as.numeric(admix.EAS), x=mdl, color='EAS.Sstar'), size=6 ) +
geom_errorbar(data = dt.Sstar.modern, aes(y=as.numeric(admix.EAS),
ymin=as.numeric(ci.EAS.lwr), ymax=as.numeric(ci.EAS.upr),
x=mdl, color='EAS.Sstar'), size=1.5 ) +
geom_line(data = dt.Sstar.modern, aes(y=as.numeric(admix.EAS), x=mdl, color='EAS.Sstar', group=1), size=1 ) +
geom_point(data = dt.Sstar.modern, aes(y=as.numeric(admix.EUR), x=mdl, color='EUR.Sstar'), size=6 ) +
geom_errorbar(data = dt.Sstar.modern, aes(y=as.numeric(admix.EUR),
ymin=as.numeric(ci.EUR.lwr), ymax=as.numeric(ci.EUR.upr),
x=mdl, color='EUR.Sstar'), size=1.5 ) +
geom_line(data = dt.Sstar.modern, aes(y=as.numeric(admix.EUR), x=mdl, color='EUR.Sstar', group=1), size=1 ) +
geom_hline(yintercept = 0.05, linetype='dashed',color='black') +
################################
ylab(label = 'admixture fraction estimate') +
xlab(label = 'migration rate per generation') +
ggtitle(label = 'Admixture Estimate: Sstar and IBDmix Calls; Modern Migration') +
# scale_color_manual(values = c('EUR.IBDmix' = 'turquoise4','EUR.Sstar'='steelblue','EAS.IBDmix'='orangered4','EAS.Sstar'='red1', 'AFR.IBDmix'='orange3')) +
#scale_color_manual(values = c('EUR.IBDmix' = 'turquoise3','EUR.Sstar'='turquoise4','EAS.IBDmix'='red3','EAS.Sstar'='red1', 'AFR.IBDmix'='orange3')) +
#scale_color_hue(l=50, c=100) +
#scale_x_discrete(breaks=dt.Sstar.modern$mdl, labels=c(0.0, 0.0000001, 0.000001, 0.00001, 0.000025, 0.00005, 0.0001, 0.0005)) +
scale_x_discrete(breaks=dt.Sstar.modern$mdl, labels=c(0.0, 0.00000001, 0.0000001, 0.000001, 0.00001, 0.000025, 0.00005, 0.0001, 0.0005)) +
scale_y_continuous(breaks = seq(0,0.16,by = 0.01), labels=seq(0,0.16,by=0.01)) +
# scale_y_continuous(trans = "log10", limits = c(NA,-2)) +
# coord_cartesian(ylim = c(0,0.16), expand = c(0)) +
coord_cartesian(ylim = c(0,0.08), expand = c(0)) +
theme(
plot.title=element_text(face="bold", size = 14),
axis.text=element_text(face="bold", size = 14),
axis.title=element_text(face="bold", size = 14),
legend.title=element_text(face="bold", size = 14),
legend.text=element_text(face="bold", size = 14)
)
##################################
##################################
##################################
##################################
plot.ancient_migration <-
ggplot() + theme_bw() +
# geom_point(data = dt.Sstar.ancient, aes(y=as.numeric(admix.ALL), x=mdl, color='ALL'), size=3 ) +
# geom_errorbar(data = dt.Sstar.ancient, aes(y=as.numeric(admix.ALL),
# ymin=as.numeric(ci.ALL.lwr), ymax=as.numeric(ci.ALL.upr),
# x=mdl, color='ALL'), size=1 ) +
geom_point(data = dt.IBDmix.ancient, aes(y=as.numeric(admix.EAS), x=mdl, color='EAS.IBDmix'), size=6 ) +
geom_errorbar(data = dt.IBDmix.ancient, aes(y=as.numeric(admix.EAS),
ymin=as.numeric(ci.EAS.lwr), ymax=as.numeric(ci.EAS.upr),
x=mdl, color='EAS.IBDmix'), size=1.5 ) +
geom_line(data = dt.IBDmix.ancient, aes(y=as.numeric(admix.EAS), x=mdl, color='EAS.IBDmix', group=1), size=1 ) +
geom_point(data = dt.IBDmix.ancient, aes(y=as.numeric(admix.EUR), x=mdl, color='EUR.IBDmix'), size=6 ) +
geom_errorbar(data = dt.IBDmix.ancient, aes(y=as.numeric(admix.EUR),
ymin=as.numeric(ci.EUR.lwr), ymax=as.numeric(ci.EUR.upr),
x=mdl, color='EUR.IBDmix'), size=1.5 ) +
geom_line(data = dt.IBDmix.ancient, aes(y=as.numeric(admix.EUR), x=mdl, color='EUR.IBDmix', group=1), size=1 ) +
geom_point(data = dt.IBDmix.ancient, aes(y=as.numeric(admix.AFR), x=mdl, color='AFR.IBDmix'), size=6 ) +
geom_errorbar(data = dt.IBDmix.ancient, aes(y=as.numeric(admix.AFR),
ymin=as.numeric(ci.AFR.lwr), ymax=as.numeric(ci.AFR.upr),
x=mdl, color='AFR.IBDmix'), size=1.5 ) +
geom_line(data = dt.IBDmix.ancient, aes(y=as.numeric(admix.AFR), x=mdl, color='AFR.IBDmix', group=1), size=1 ) +
geom_hline(yintercept = 0.05, linetype='dashed',color='black') +
###############################
# geom_point(data = dt.Coalescent.ancient, aes(y=as.numeric(admix.ALL), x=mdl, color='ALL'), size=3 ) +
# geom_errorbar(data = dt.Coalescent.ancient, aes(y=as.numeric(admix.ALL),
# ymin=as.numeric(ci.ALL.lwr), ymax=as.numeric(ci.ALL.upr),
# x=mdl, color='ALL'), size=1 ) +
# geom_point(data = dt.Coalescent.ancient, aes(y=as.numeric(admix.EAS), x=mdl, color='EAS'), size=5 ) +
# geom_errorbar(data = dt.Coalescent.ancient, aes(y=as.numeric(admix.EAS),
# ymin=as.numeric(ci.EAS.lwr), ymax=as.numeric(ci.EAS.upr),
# x=mdl, color='EAS'), size=3 ) +
#
# geom_point(data = dt.Coalescent.ancient, aes(y=as.numeric(admix.EUR), x=mdl, color='EUR'), size=5 ) +
# geom_errorbar(data = dt.Coalescent.ancient, aes(y=as.numeric(admix.EUR),
# ymin=as.numeric(ci.EUR.lwr), ymax=as.numeric(ci.EUR.upr),
# x=mdl, color='EUR'), size=3 ) +
geom_point(data = dt.Sstar.diploid.ancient, aes(y=as.numeric(admix.EAS), x=mdl, color='EAS.Sstar.diploid'), size=6 ) +
geom_errorbar(data = dt.Sstar.diploid.ancient, aes(y=as.numeric(admix.EAS),
ymin=as.numeric(ci.EAS.lwr), ymax=as.numeric(ci.EAS.upr),
x=mdl, color='EAS.Sstar.diploid'), size=1.5 ) +
geom_line(data = dt.Sstar.diploid.ancient, aes(y=as.numeric(admix.EAS), x=mdl, color='EAS.Sstar.diploid', group=1), size=1 ) +
geom_point(data = dt.Sstar.diploid.ancient, aes(y=as.numeric(admix.EUR), x=mdl, color='EUR.Sstar.diploid'), size=6 ) +
geom_errorbar(data = dt.Sstar.diploid.ancient, aes(y=as.numeric(admix.EUR),
ymin=as.numeric(ci.EUR.lwr), ymax=as.numeric(ci.EUR.upr),
x=mdl, color='EUR.Sstar.diploid'), size=1.5 ) +
geom_line(data = dt.Sstar.diploid.ancient, aes(y=as.numeric(admix.EUR), x=mdl, color='EUR.Sstar.diploid', group=1), size=1 ) +
##############################
geom_point(data = dt.Sstar.ancient, aes(y=as.numeric(admix.EAS), x=mdl, color='EAS.Sstar'), size=6 ) +
geom_errorbar(data = dt.Sstar.ancient, aes(y=as.numeric(admix.EAS),
ymin=as.numeric(ci.EAS.lwr), ymax=as.numeric(ci.EAS.upr),
x=mdl, color='EAS.Sstar'), size=1.5 ) +
geom_line(data = dt.Sstar.ancient, aes(y=as.numeric(admix.EAS), x=mdl, color='EAS.Sstar', group=1), size=1 ) +
geom_point(data = dt.Sstar.ancient, aes(y=as.numeric(admix.EUR), x=mdl, color='EUR.Sstar'), size=6 ) +
geom_errorbar(data = dt.Sstar.ancient, aes(y=as.numeric(admix.EUR),
ymin=as.numeric(ci.EUR.lwr), ymax=as.numeric(ci.EUR.upr),
x=mdl, color='EUR.Sstar'), size=1.5 ) +
geom_line(data = dt.Sstar.ancient, aes(y=as.numeric(admix.EUR), x=mdl, color='EUR.Sstar', group=1), size=1 ) +
################################
ylab(label = 'admixture fraction estimate') +
xlab(label = 'migration rate per generation') +
ggtitle(label = 'Admixture Estimate: Sstar and IBDmix Calls; Ancient Migration') +
# scale_color_manual(values = c('EUR.IBDmix' = 'turquoise4','EUR.Sstar'='steelblue','EAS.IBDmix'='orangered4','EAS.Sstar'='red1', 'AFR.IBDmix'='orange3')) +
#scale_color_manual(values = c('EUR.IBDmix' = 'turquoise3','EUR.Sstar'='turquoise4','EAS.IBDmix'='red3','EAS.Sstar'='red1', 'AFR.IBDmix'='orange3')) +
#scale_color_hue(l=50, c=100) +
#scale_x_discrete(breaks=dt.Sstar.ancient$mdl, labels=c(0.0, 0.0000001, 0.000001, 0.00001, 0.00005, 0.0001, 0.00015, 0.0005)) +
scale_x_discrete(breaks=dt.Sstar.ancient$mdl, labels=c(0.0, 0.00000001, 0.0000001, 0.000001, 0.00001, 0.00005, 0.0001, 0.00015, 0.0005)) +
scale_y_continuous(breaks = seq(0,0.16,by = 0.01), labels=seq(0,0.16,by=0.01)) +
# scale_y_continuous(trans = "log10", limits = c(NA,-2), breaks = seq(0,0.12,by = 0.01), labels=seq(0,0.12,by=0.01)) +
# coord_cartesian(ylim = c(0,0.16), expand = c(0)) +
coord_cartesian(ylim = c(0,0.08), expand = c(0)) +
theme(
plot.title=element_text(face="bold", size = 14),
axis.text=element_text(face="bold", size = 14),
axis.title=element_text(face="bold", size = 14),
legend.title=element_text(face="bold", size = 14),
legend.text=element_text(face="bold", size = 14)
)
##################################
##################################
grid.arrange(plot.ancient_migration, plot.modern_migration, ncol=2)
```