-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtip2Root.R
More file actions
70 lines (52 loc) · 1.65 KB
/
tip2Root.R
File metadata and controls
70 lines (52 loc) · 1.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
library(dplyr,verbose = FALSE)
library(ape)
iFile <- "~/Data/Seattle/analysis_PRO/FTStsubB.nwk"
#Creating an ape tree object from the newick file
t <- read.tree(iFile)
#Obtain lists of sequence ID and Time
temp <- sapply(t$tip.label, function(x) strsplit(x, '_')[[1]])
t$ID <- temp[1,]
t$Time <- as.numeric(temp[2,])
inds <- 1:length(t$ID)
res <- lapply(inds, function(a) {
ta <- t$Time[a]
ida <- t$ID[a]
bs <- lapply(inds[inds>a], function(b){
tb <- t$Time[b]
idb <- t$ID[b]
p <- nodepath(t, a,b)
p <- tail(p, -1)
p <- head(p, -2)
dist <- sum(t$edge.length[which(t$edge[,2]%in%p)])
return(list(idb, tb, dist))
})
return(list(ta, ida, bs))
})
saveRDS(res, "~/distances.rds")
#If the newest timepoint contains a small number of sequences, we remove that timepoint from consideration
while(length(t$Time[t$Time==max(t$Time)]) <= minNS) {
t <- tFilt(t, max(t$Time)-1)
}
t$tip.label
el <- data.frame(ID1=as.character(NULL), t1=as.numeric(NULL),
ID2=as.character(NULL), t2=as.numeric(NULL),
Distance = as.numeric(NULL), stringsAsFactors= F)
ps <- 1:length(t$ID) #Children
ps <- t$edge[which(t$edge[,2]%in%ps),1] #To parent nodes
ds <- rep(0, length(subTs)) #Distance travelled (init at 0)
tab <- table(ps)
cN <- as.numeric(names(which(tab>1)))
opairs <- matrix(nrow=2, ncol=1)
npairs <- lapply(cN, function(x) {
inds <- combn(which(ps==x),2)
return(inds)
})
opairs <- cbind(opairs, npairs)
while(length(ps>1)){
ind <- sapply(ps, function(x){which(t$edge[,2]==x)})
ps <- t$edge[ind,1]
ds <- ds+t$edge.length[ind]
length(levels(as.factor(ps)))
length(ps)
}
l <- c(1,2,3,4,5,6,7,8,9,10)