-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbootstrap_helpers.R
More file actions
118 lines (97 loc) · 3.58 KB
/
bootstrap_helpers.R
File metadata and controls
118 lines (97 loc) · 3.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
getUniquePSUs <- function(DHSDataPSUs, bootStrapIndices) {
`%>%` <- magrittr::`%>%` #Must import functions within parallel Bootstrap cluster
DHSDataPSUs %>% dplyr::select(uniquePSU) %>%
dplyr::slice(bootStrapIndices) %>%
return()
}
getUniqueHh <- function(DHSDataHh, bootStrapIndices) {
`%>%` <- magrittr::`%>%` #Must import functions within parallel Bootstrap cluster
DHSDataHh %>% dplyr::select(uniqueHh, uniqueUAU, uniquePSUBootIndex) %>%
dplyr::slice(bootStrapIndices) %>%
return()
}
collapseToHh <- function(DHSData, replaceNAsWithinHhs = T) {
#Collapses the DHS Data down to a single row for each unique household
if (replaceNAsWithinHhs) {
selectNonNAValue <- function(x) {
v <- unique(x[!is.na(x)])
if (length(v) == 0) {
return(NA)
} else {
return(v[1]) # MUST REMOVE [1] ONCE I HAVE CORRECT DHS DATA
}
}
#Some of the rows have NAs when other rows from the same household
#have non-NAs. Replace these NAs with the nonNA value:
DHSData %<>%
group_by(HouseholdID) %>%
mutate(across(c("survey_info.ultimate_area_unit",
"survey_info.psu",
"survey_info.stratum",
"survey_info.stratified",
"survey_info.region",
"survey_info.HH_weight"),
selectNonNAValue)) %>%
ungroup()
}
collapsedDHSData <- DHSData %>%
group_by(across(-c(sex,age))) %>% #sex and age are the only individual-level variables, all other DHSData variables are household level
summarise() %>%
ungroup()
#Some error checking
# collapsedDHSData %>%
# group_by(HouseholdID) %>%
# filter(n() > 1) %>%
# View()
#What to do with the duplicated households?
return(collapsedDHSData)
}
makeBootstrapKeys <- function(DHSData, RPSU, RHh) {
#Returns a DHS dataset containing keys:
# uniqueSurvey: a variable that uniquely identifies the survey
# uniqueStratum: uniquely identifies the strata (across different surveys)
# uniquePSU
# uniqueUAU/uniqueHh: (only created if there are household replications
# - i.e. RHh > 0)
DHSData %<>% within({
uniqueSurvey <-
paste0("C", country.code.ISO.3166.alpha.3, ".V",
version) %>%
as.factor()
uniqueStratum <-
paste0(uniqueSurvey, ".S",
(survey_info.stratified != 0)*survey_info.stratum) %>%
as.factor()
uniquePSU <-
paste0(uniqueStratum, ".P",
survey_info.psu) %>%
as.factor()
})
if (RHh) { #If we are bootstrapping over household, then make UAU and HH keys
DHSData %<>% within({
uniqueUAU <-
paste0(uniqueStratum, ".U",
survey_info.ultimate_area_unit) %>%
as.factor()
})
#Extract household number from HouseholdID:
#Assumption: household number is the characters following the final '.' in
#HouseholdID
HHIDLen <- nchar(levels(DHSData$HouseholdID))
HouseholdNum <- levels(DHSData$HouseholdID) %>%
stri_reverse %>%
regexpr(".", ., fixed = T) %>%
`-`(HHIDLen+2, .) %>%
substr(x = levels(DHSData$HouseholdID),
start = .,
stop = HHIDLen)
#Start by assigning household numbers as uniqueHh
DHSData$uniqueHh <- DHSData$HouseholdID
levels(DHSData$uniqueHh) <- HouseholdNum
#Then prepend uniqueUAU to household numbers:
DHSData$uniqueHh %<>% as.character() %>%
paste0(DHSData$uniqueUAU, ".H", .) %>%
as.factor()
}
return(DHSData)
}