-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcreateMortData.R
More file actions
73 lines (61 loc) · 3.05 KB
/
createMortData.R
File metadata and controls
73 lines (61 loc) · 3.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
createMortData = function(year_start,year_end,age_start,age_end,sex,sex_cat){
# function to import multiple data files
read.multiple.files = function(path,pattern,DEL){
# note: DEL: "death"/"exposure"/"lifetable"
list.filenames = list.files(path,pattern)
list.data = list()
for (i in 1:length(list.filenames)){
country = sub(paste(DEL,"-",sep=""),"",list.filenames[i])
country = sub("-.*","",country)
current.data = read.table(paste(path,list.filenames[i],
sep="/"),header=TRUE)
current.data$Country = country
list.data[[i]] = current.data
}
out = do.call(rbind,list.data)
return(out)
}
# import death text files:
D = read.multiple.files("./data", pattern="death[[:graph:]]+.txt", DEL = "death")
D = data.table::as.data.table(D)
# import exposure text files:
E = read.multiple.files("./data", pattern="exposure[[:graph:]]+.txt", DEL = "exposure")
E = data.table::as.data.table(E)
# convert Age in both files to numeric format:
D$Age = as.character(D$Age); D = D[Age!="110+"]; D$Age = as.numeric(D$Age)
E$Age = as.character(E$Age); E = E[Age!="110+"]; E$Age = as.numeric(E$Age)
# if sex_cat="yes" then sex is added as categorical variable:
if (sex_cat=="yes"){
Dl = melt(D,id.vars=c("Year","Age","Country","Total")); El = melt(E,id.vars=c("Year","Age","Country","Total"))
Dl = Dl[Year %in% year_start:year_end & Age %in% age_start:age_end]
El = El[Year %in% year_start:year_end & Age %in% age_start:age_end]
rate = Dl$value/El$value
dat = cbind(Dl[,.(Year,Age,Country,variable)],rate)
dat = dat[,y:=log(rate)]
names(dat)[names(dat)=="variable"]="sex"
dat = dat[order(Country,Year,Age,sex),.(Year,Age,Country,sex,rate,y)]
levels(dat$sex) = c("0","1"); dat$sex=as.numeric(dat$sex)-1
# Female is coded as 0 and Male is code as 1
names(dat) = tolower(names(dat))
}
else if (sex_cat=="no"){
if (sex=="female"|sex=="Female"|sex=="F"|sex=="f"){
D = D[Year %in% year_start:year_end & Age %in% age_start:age_end,.(Year,Age,Female,Country)]
E = E[Year %in% year_start:year_end & Age %in% age_start:age_end,.(Year,Age,Female,Country)]
rate = D$Female/E$Female}
else if (sex=="male"|sex=="Male"|sex=="M"|sex=="m"){
D = D[Year %in% year_start:year_end & Age %in% age_start:age_end,.(Year,Age,Male,Country)]
E = E[Year %in% year_start:year_end & Age %in% age_start:age_end,.(Year,Age,Male,Country)]
rate = D$Male/E$Male}
else {
D = D[Year %in% year_start:year_end & Age %in% age_start:age_end,.(Year,Age,Total,Country)]
E = E[Year %in% year_start:year_end & Age %in% age_start:age_end,.(Year,Age,Total,Country)]
rate = D$Total/E$Total}
dat = cbind(D[,.(Year,Age,Country)],rate)
dat = dat[,y:=log(rate)]
dat = dat[order(Country,Age,Year),.(Age,Year,Country,rate,y)]
names(dat) = tolower(names(dat))
}
else stop ("sex_cat must be either yes or no")
return(dat)
}