diff --git a/archive/bimodal_v_dist.Rmd b/archive/bimodal_v_dist.Rmd new file mode 100644 index 00000000..dfb39330 --- /dev/null +++ b/archive/bimodal_v_dist.Rmd @@ -0,0 +1,148 @@ +--- +title: "Bimodal Prosocial Distribution" +output: + pdf_document: + number_sections: yes + fig_caption: yes + keep_tex: no + includes: + in_header: takeup_workingpaper_header.sty +--- + +```{r setup, include=FALSE} +library(magrittr) +library(tidyverse) +library(EnvStats) +library(nleqslv) + +ggplot2::theme_set(theme_minimal()) + +knitr::opts_chunk$set(echo = FALSE) +``` + +\begin{align*} +\Delta[v^*] = \int_{v^*}^\infty v \frac{f(v)}{1-F(v^*)}\,\textrm{d}v - \int_{-\infty}^{v^*} v \frac{f(v)}{F(v^*)}\,\textrm{d}v +\end{align*} + +```{r functions, echo=TRUE} +calculate_delta <- function(v_cutoff, mean1, mean2, sd1, sd2, p.mix = 0.5) { + F_v <- pnormMix(v_cutoff, mean1 = mean1, mean2 = mean2, sd1 = sd1, sd2 = sd2, p.mix = p.mix) + + delta_part <- function(v) v * dnormMix(v, mean1 = mean1, mean2 = mean2, sd1 = sd1, sd2 = sd2, p.mix = p.mix) + + e_honor <- integrate(delta_part, lower = v_cutoff, upper = Inf)$value / (1 - F_v) + e_stigma <- integrate(delta_part, lower = -Inf, upper = v_cutoff)$value / F_v + + e_honor - e_stigma +} + +generate_v_cutoff_fixedpoint <- function(b, mu, ...) { + function(v_cutoff) { + v_cutoff + b + mu * calculate_delta(v_cutoff, ...) + } +} + +generate_equil_sim <- function(mean1, mean2, sd1, sd2, p.mix, nb_min = -2, nb_max = 2) { + expand.grid( + net_benefit = seq(nb_min, nb_max, 0.1), + mu = seq(0.0, 1.0, 0.05) + ) %>% + rowwise() %>% + mutate( + equilibrium_info = list( + nleqslv( + net_benefit, + generate_v_cutoff_fixedpoint( + net_benefit, mu, mean1 = mean1, mean2 = mean2, sd1 = sd1, sd2 = sd2, p.mix = p.mix + ) + ) + ), + v_cutoff = equilibrium_info$x, + prob = 1 - pnormMix(v_cutoff, mean1 = mean1, mean2 = mean2, sd1 = sd1, sd2 = sd2, p.mix = p.mix), + term_code = equilibrium_info$termcd, + delta = calculate_delta(v_cutoff, mean1 = mean1, mean2 = mean2, sd1 = sd1, sd2 = sd2, p.mix = p.mix) + ) %>% + ungroup() +} + +generate_delta_sim <- function(mean1, mean2, sd1, sd2, p.mix) { + expand.grid( + v_cutoff = seq(-1.5, 1.5, 0.1) + ) %>% + rowwise() %>% + mutate( + delta = calculate_delta(v_cutoff, mean1 = mean1, mean2 = mean2, sd1 = sd1, sd2 = sd2, p.mix = p.mix) + ) %>% + ungroup() +} +``` + +```{r compare-pdfs} +tibble( + x = seq(-1.75, 1.75, 0.1), + pdf_bi = dnormMix(x, mean1 = -0.75, sd1 = 0.4375, mean2 = 0.75, sd2 = 0.4375, p.mix = 0.5), + cdf_bi = pnormMix(x, mean1 = -0.75, sd1 = 0.4375, mean2 = 0.75, sd2 = 0.4375, p.mix = 0.5), + pdf_uni = dnorm(x), + cdf_uni = pnorm(x), +) %>% + pivot_longer(-x, names_pattern = r"{([pc]df)_(uni|bi)}", names_to = c(".value", "v_type")) %>% + { + cowplot::plot_grid( + ggplot(., aes(x, pdf, color = v_type)) + + geom_line(show.legend = TRUE) + + scale_color_discrete("", labels = c("bi" = "bimodal", "uni" = "unimodal")) + + labs( + title = "Density", + x = "", y = "" + ) + + # ggplot(., aes(x, cdf, color = v_type)) + + # geom_line() + ) +} +``` + +```{r} +delta_sim <- bind_rows( + bi = generate_delta_sim(-0.75, 0.75, 0.4375, 0.4375, 0.5), + uni = generate_delta_sim(0, 0, 1, 1, 0.0), + + .id = "v_type" +) + +delta_sim %>% + ggplot(aes(v_cutoff, delta)) + + geom_line(aes(color = v_type)) + + labs(title = "Delta") + + scale_color_discrete("", labels = c("bi" = "bimodal", "uni" = "unimodal")) + + NULL +``` + +```{r, cache=TRUE} +equil_sim <- bind_rows( + bi = generate_equil_sim(-0.75, 0.75, 0.4375, 0.4375, 0.5, nb_min = -3), + uni = generate_equil_sim(0, 0, 1, 1, 0.0, nb_min = -2.5), + + .id = "v_type" +) + +equil_sim %>% + ggplot(aes(net_benefit, v_cutoff)) + + geom_point(, data = . %>% filter(term_code != 1)) + + geom_line(aes(group = mu, color = mu), data = . %>% filter(term_code == 1)) + + labs(title = "Equilibrium cutoff") + + coord_cartesian(ylim = c(-3, 3)) + + facet_wrap(vars(v_type)) + + theme(legend.position = "bottom") + +equil_sim %>% + ggplot(aes(net_benefit, prob)) + + # geom_point(, data = . %>% filter(term_code != 1)) + + geom_line(aes(group = mu, color = mu), data = . %>% filter(term_code == 1)) + + labs(title = "Equilibirum probability of take-up") + + coord_cartesian(ylim = c(0, 1)) + + facet_wrap(vars(v_type)) + + theme(legend.position = "bottom") + +``` + diff --git a/archive/equil_sim_test.R b/archive/equil_sim_test.R new file mode 100644 index 00000000..807835da --- /dev/null +++ b/archive/equil_sim_test.R @@ -0,0 +1,101 @@ +library(magrittr) +library(tidyverse) +library(EnvStats) + +ggplot2::theme_set(theme_minimal()) + +calculate_delta <- function(v_cutoff, mean1, mean2, sd1, sd2, p.mix = 0.5) { + F_v <- pnormMix(v_cutoff, mean1 = mean1, mean2 = mean2, sd1 = sd1, sd2 = sd2, p.mix = p.mix) + + delta_part <- function(v) v * dnormMix(v, mean1 = mean1, mean2 = mean2, sd1 = sd1, sd2 = sd2, p.mix = p.mix) + + e_honor <- integrate(delta_part, lower = v_cutoff, upper = Inf)$value / (1 - F_v) + e_stigma <- integrate(delta_part, lower = -Inf, upper = v_cutoff)$value / F_v + + e_honor - e_stigma +} + +generate_v_cutoff_fixedpoint <- function(b, mu, ...) { + function(v_cutoff) { + v_cutoff + b + mu * calculate_delta(v_cutoff, ...) + } +} + +generate_equil_sim <- function(mean1, mean2, sd1, sd2, p.mix, nb_min = -2, nb_max = 2) { + expand.grid( + net_benefit = seq(nb_min, nb_max, 0.1), + mu = seq(0.0, 1.0, 0.05) + ) %>% + rowwise() %>% + mutate( + equilibrium_info = list(nleqslv(net_benefit, generate_v_cutoff_fixedpoint(net_benefit, mu, mean1 = mean1, mean2 = mean2, sd1 = sd1, sd2 = sd2, p.mix = p.mix))), + v_cutoff = equilibrium_info$x, + prob = 1 - pnormMix(v_cutoff, mean1 = mean1, mean2 = mean2, sd1 = sd1, sd2 = sd2, p.mix = p.mix), + term_code = equilibrium_info$termcd, + delta = calculate_delta(v_cutoff, mean1 = mean1, mean2 = mean2, sd1 = sd1, sd2 = sd2, p.mix = p.mix) + ) %>% + ungroup() +} + +generate_delta_sim <- function(mean1, mean2, sd1, sd2, p.mix) { + expand.grid( + v_cutoff = seq(-1.5, 1.5, 0.1) + ) %>% + rowwise() %>% + mutate( + delta = calculate_delta(v_cutoff, mean1 = mean1, mean2 = mean2, sd1 = sd1, sd2 = sd2, p.mix = p.mix) + ) %>% + ungroup() +} + +tibble( + x = seq(-1.75, 1.75, 0.1), + pdf_bi = dnormMix(x, mean1 = -0.75, sd1 = 0.4375, mean2 = 0.75, sd2 = 0.4375, p.mix = 0.5), + cdf_bi = pnormMix(x, mean1 = -0.75, sd1 = 0.4375, mean2 = 0.75, sd2 = 0.4375, p.mix = 0.5), + pdf_uni = dnorm(x), + cdf_uni = pnorm(x), +) %>% + pivot_longer(-x, names_pattern = r"{([pc]df)_(uni|bi)}", names_to = c(".value", "v_type")) %>% + { + cowplot::plot_grid( + ggplot(., aes(x, pdf, color = v_type)) + + geom_line(show.legend = TRUE) + + # ggplot(., aes(x, cdf, color = v_type)) + + # geom_line() + ) +} + +equil_sim <- bind_rows( + bi = generate_equil_sim(-0.75, 0.75, 0.4375, 0.4375, 0.5, nb_min = -3), + uni = generate_equil_sim(0, 0, 1, 1, 0.0, nb_min = -2.5), + + .id = "v_type" +) + +equil_sim %>% + ggplot(aes(net_benefit, v_cutoff)) + + geom_point(, data = . %>% filter(term_code != 1)) + + geom_line(aes(group = mu, color = mu), data = . %>% filter(term_code == 1)) + + coord_cartesian(ylim = c(-3, 3)) + + facet_wrap(vars(v_type)) + + theme(legend.position = "bottom") + +equil_sim %>% + ggplot(aes(net_benefit, prob)) + + # geom_point(, data = . %>% filter(term_code != 1)) + + geom_line(aes(group = mu, color = mu), data = . %>% filter(term_code == 1)) + + coord_cartesian(ylim = c(0, 1)) + + facet_wrap(vars(v_type)) + + theme(legend.position = "bottom") + +delta_sim <- bind_rows( + bi = generate_delta_sim(-0.75, 0.75, 0.4375, 0.4375, 0.5), + uni = generate_delta_sim(0, 0, 1, 1, 0.0), + + .id = "v_type" +) + +delta_sim %>% + ggplot(aes(v_cutoff, delta)) + + geom_line(aes(color = v_type)) diff --git a/run_stan_dist_sim.R b/archive/run_stan_dist_sim.R similarity index 100% rename from run_stan_dist_sim.R rename to archive/run_stan_dist_sim.R diff --git a/run_takeup.R b/archive/run_takeup.R similarity index 100% rename from run_takeup.R rename to archive/run_takeup.R diff --git a/test_simul_dist_struct.R b/archive/test_simul_dist_struct.R similarity index 100% rename from test_simul_dist_struct.R rename to archive/test_simul_dist_struct.R diff --git a/rct-design-fieldwork/takeup_assigned_distance.qmd b/rct-design-fieldwork/takeup_assigned_distance.qmd new file mode 100644 index 00000000..c2bc9bbb --- /dev/null +++ b/rct-design-fieldwork/takeup_assigned_distance.qmd @@ -0,0 +1,211 @@ +--- +title: "TakeUp Distance Assignment" +format: html +editor: source +--- + +```{r} +#| label: setup + +library(magrittr) +library(tidyverse) +library(sf) +library(here) +library(broom) + + +source(here("rct-design-fieldwork", "takeup_rct_assign_clusters.R")) +source(here("analysis_util.R")) +source(here("dist_structural_util.R")) + +theme_set(theme_minimal() + theme(legend.position = "bottom")) +``` + +```{r} +#| label: load-geo-data + +wgs.84 <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0" +kenya.proj4 <- "+proj=utm +zone=36 +south +ellps=clrk80 +units=m +no_defs" + +rct.schools.data <- read_rds(here("data", "takeup_rct_schools.rds")) +load(here("data", "takeup_village_pot_dist.RData")) +load(here("data", "analysis.RData")) + +schools <- st_as_sf(rct.schools.data, crs = wgs.84) +villages <- village.centers %>% + st_as_sf(coords = c("lon", "lat"), crs = wgs.84) +``` + +```{r} +#| fig-width: 8 + +ggplot(schools) + + geom_sf(aes(color = "Schools"), size = 0.5) + + geom_sf(aes(color = "Villages"), data = villages) + + scale_color_manual("", values = c("Schools" = "black", "Villages" = "red")) +``` + +```{r} +villages %<>% + mutate( + dist_to_closest_school = st_transform(., kenya.proj4) %>% + st_distance(st_transform(schools, kenya.proj4)) %>% + plyr::alply(1, min) %>% + unlist() + ) +``` + +```{r} +villages %>% + ggplot(aes(dist_to_closest_school)) + + geom_histogram(aes(y = stat(density)), binwidth = 100, alpha = 0.5) + + labs(x = "Distance to Closest School", y = "Density") + +villages %>% + ggplot(aes(dist_to_closest_school)) + + geom_histogram(aes(y = stat(density), fill = dist.pot.group), position = "identity", binwidth = 100, alpha = 0.5) + + scale_fill_discrete("Distance Assignment", labels = str_to_title) + + labs(x = "Distance to Closest School", y = "Density") +``` + + + +## ed quick additions + + +tldr: I check whether school observables change as distance to schools change, not in a particularly smart way. +It doesn't look like there's any relationship between a village's distance to a POT and average school characteristics. Then I try +Borusyak and Hull's (2022) thing. + +https://www.dropbox.com/s/ajjld1b8gw470yr/borusyak_hull_wp_dec21.pdf?raw=1 + +```{r} + +# Find all schools within 1500 units of a village +vill_school_pairs = villages %>% + st_join( + x = ., + y = st_transform(schools, kenya.proj4), + join = st_is_within_distance, + dist = 1500, + left = TRUE + ) +# Calculate all pairwise distances between every school and +# village (even those > 1500) +all_vill_school_distances = st_distance( + villages, + schools +) +# Calculate average distance from school, conditional on school < 1500 +expected_distance = apply(all_vill_school_distances, FUN = function(x){mean(x[x < 1500])}, MARGIN = 1) +villages$expected_distance = expected_distance + +# Create set of numeric school variable to check for balance +balance_vars = schools %>% + select_if(is.numeric) %>% + select(-contains("gps")) %>% + st_drop_geometry() %>% + colnames() + +check_observables = function(data, variable, balance_vars) { + + balance_formulae = map( + balance_vars, + ~as.formula(paste0("`", .x, "` ~ ", variable)) + ) + + balance_fits = map2_dfr( + balance_formulae, + balance_vars, + ~lm( + .x, + data = data, + na.action = na.omit + ) %>% tidy() %>% mutate(variable = .y) + ) %>% + filter(!str_detect(term, "Intercept")) + return(balance_fits) +} + + +dist_pot_check = check_observables( + vill_school_pairs, + variable = "log(dist.to.pot)", + balance_vars = balance_vars +) + +dist_pot_group_check = check_observables( + vill_school_pairs, + variable = "dist.pot.group", + balance_vars = balance_vars +) + +dist_closest_school_check = check_observables( + vill_school_pairs, + variable = "log(dist_to_closest_school)", + balance_vars = balance_vars +) + +dist_pot_weight_check = check_observables( + vill_school_pairs, + variable = "log(dist.to.pot) + log(expected_distance)", + balance_vars = balance_vars +) + +dist_checks = bind_rows( + dist_closest_school_check, + dist_pot_group_check, + dist_pot_check, + dist_pot_weight_check %>% + filter(term == "log(dist.to.pot)") %>% + mutate(term = str_c(term, " reweighted")) +) %>% + filter(!is.nan(p.value)) + + +n_signif_diff_after_adjust = dist_checks %>% + group_by(term) %>% + mutate(p_adj = p.adjust(p.value, method = "BH")) %>% + filter(p_adj < 0.05) + + + +dist_checks %>% + ggplot(aes( + sample = p.value, + colour = term + )) + + stat_qq(distribution = stats::qunif, + alpha = 1, geom = "line") + + stat_qq_line(distribution = stats::qunif, + colour = "black",linetype = "longdash") + + theme_bw() + + labs( + x = "Theoretical Quantile", + y = "Realised Quantile", + title = "Under Set Null, P Values ~ U(0,1)" + ) + +``` + + + +Borusyak and Hull (2022) wouldn't use variation in continuous distance to POT but the variation in the difference between expected and realised distance to POT. Whilst +close and far is generated randomly, distance to POT has a non-random component: + + +$$ +dist = \underbrace{I[\text{close} = 1]}_{\text{exog}} \underbrace{\text{km to close school}}_{endog} + (1 - I_\text{close}) \text{km to far school} +$$ + + +By permuting through alternative, hypothetical RCT assignment draws they estimate $\mu$ the expected distance to a close school and +the expected distance to a far school. This can either be controlled for directly or used as a generated instrument: + +$$ +Z = dist - \mu +$$ + +This is essentially using the difference between the expected distance and realised distance as an instrument - since the actual treatment assignment was chosen randomly from a set of hypothetical draws, this variation is as good as random. They argue this has improved +power properties too, so even though it looks like distance is orthog to school characteristics might still be useful here. + diff --git a/renv.lock b/renv.lock index 0d9f5b86..8ffd9218 100644 --- a/renv.lock +++ b/renv.lock @@ -1,6 +1,6 @@ { "R": { - "Version": "4.2.1", + "Version": "4.2.0", "Repositories": [ { "Name": "CRAN", @@ -29,6 +29,18 @@ "Hash": "dcd1743af4336156873e3ce3c950b8b9", "Requirements": [] }, + "EnvStats": { + "Package": "EnvStats", + "Version": "2.7.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "3c75601e880b3cc9c47b14a7caf8f76f", + "Requirements": [ + "MASS", + "ggplot2", + "nortest" + ] + }, "Formula": { "Package": "Formula", "Version": "1.2-4", @@ -223,14 +235,6 @@ "sp" ] }, - "Rmpi": { - "Package": "Rmpi", - "Version": "0.6-9.2", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "57b2b6cfd02d6b5b69f6167e1cac38cd", - "Requirements": [] - }, "Rttf2pt1": { "Package": "Rttf2pt1", "Version": "1.3.10", @@ -1269,6 +1273,16 @@ "vctrs" ] }, + "here": { + "Package": "here", + "Version": "1.0.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "24b224366f9c2e7534d2344d10d59211", + "Requirements": [ + "rprojroot" + ] + }, "highr": { "Package": "highr", "Version": "0.9", @@ -1679,14 +1693,6 @@ "colorspace" ] }, - "mvtnorm": { - "Package": "mvtnorm", - "Version": "1.1-3", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "7a7541cc284cb2ba3ba7eae645892af5", - "Requirements": [] - }, "nleqslv": { "Package": "nleqslv", "Version": "3.3.2", @@ -1723,6 +1729,14 @@ "Hash": "cb1d8d9f300a7e536b89c8a88c53f610", "Requirements": [] }, + "nortest": { + "Package": "nortest", + "Version": "1.0-4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "e587e7a30c737ad415590976481332e4", + "Requirements": [] + }, "numDeriv": { "Package": "numDeriv", "Version": "2016.8-1.1", @@ -2146,27 +2160,6 @@ "stringr" ] }, - "rethinking": { - "Package": "rethinking", - "Version": "2.21", - "Source": "GitHub", - "RemoteType": "github", - "RemoteHost": "api.github.com", - "RemoteRepo": "rethinking", - "RemoteUsername": "rmcelreath", - "RemoteRef": "HEAD", - "RemoteSha": "783d111a2c70c534ee9e59ae024800290cc5ac48", - "Hash": "6d8dc80f036cb8252eb79cac0e21197a", - "Requirements": [ - "MASS", - "cmdstanr", - "coda", - "loo", - "mvtnorm", - "rstan", - "shape" - ] - }, "rgdal": { "Package": "rgdal", "Version": "1.5-32", @@ -2360,14 +2353,6 @@ "units" ] }, - "shape": { - "Package": "shape", - "Version": "1.4.6", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "9067f962730f58b14d8ae54ca885509f", - "Requirements": [] - }, "sp": { "Package": "sp", "Version": "1.4-7", diff --git a/run_stan.R b/run_stan.R deleted file mode 100644 index fd9ef2a7..00000000 --- a/run_stan.R +++ /dev/null @@ -1,373 +0,0 @@ -#!/usr/bin/Rscript - -# Setup ------------------------------------------------------------------- - -script_options <- docopt::docopt(sprintf( -"Usage: - run_stan wtp [--analysis-data-only |[--num-chains= --num-iterations= --adapt-delta= --max-treedepth=]] [--output-name= --output-dir=] - run_stan beliefs [--analysis-data-only |[--num-chains= --num-iterations= --adapt-delta= --max-treedepth=]] [--output-name= --output-dir=] - run_stan dynamic [--analysis-data-only |[--gumbel --num-chains= --num-iterations= --adapt-delta= --max-treedepth= --include-latent-var-data --save-sp-estimates]] [--separate-private-value --sms-control-only --include-name-matched --no-private-value-interact --model-levels= --use-cluster-re --output-name= --output-dir=] - run_stan static [--analysis-data-only |[--num-chains= --num-iterations= --adapt-delta= --max-treedepth=]] [--separate-private-value --sms-control-only --include-name-matched --no-private-value-interact --model-levels= --use-cluster-identity-corr --use-cluster-re --use-census-covar --output-name= --output-dir=] - - Options: - --num-chains=, -c Number of Stan chains [default: 4] - --num-iterations=, -i Number of sampling iterations [default: 300] - --adapt-delta=, -d Stan control adapt_delta [default: 0.8] - --max-treedepth=, -t Stan control max_treedepth [default: 15] - --output-name=, -o Name to use in stanfit .csv files and analysis data .RData file [default: param] - --output-dir=, -p Directory analysis data and stanfit output will be stored [default: %s/data] - --analysis-data-only Don't run sampling, just produce analysis data - --sms-control-only Exclude SMS treatment data - --separate-private-value Use separate private values for calendars and bracelets - --include-name-matched Include unmonitored sample (name matched against census) - --no-private-value-interact Allow private value to interact with distance - --model-levels= How deep is the multilevel model [default: 3] - --use-cluster-re Use cluster level random effects (if model levels <= 2) - --use-cluster-identity-corr No correlation estimation for cluster level parameters - --use-census-covar Control for census covariates - --save-sp-estimates Calcuate superpopulation ATE - --no-replicate Do not generate outcome replication - --gumbel, -g Gumbel link - --include-latent-var-data, -l Save latent variable while sampling", getwd()), - - args = if (interactive()) "beliefs -c 8 -i 4000" else commandArgs(trailingOnly = TRUE) -) - -library(magrittr) -library(plyr) -library(forcats) -library(broom) -library(tidyverse) -library(modelr) -library(rstan) - -source("analysis_util.R") - -fit_version <- script_options$output_name - -# Load Data --------------------------------------------------------------- - -load(file.path("data", "analysis.RData")) - -# Setup for Analysis Data ----------------------------------------------------------- - -subgroups <- "phone_owner" - -stan_analysis_data <- analysis.data %>% - mutate(private_value = fct_collapse(assigned.treatment, - control = if (!is_null(script_options) && script_options$separate_private_value) c("control", "ink", "bracelet") else c("control", "ink"), - calendar = if (!is_null(script_options) && script_options$separate_private_value) "calendar" else c("calendar", "bracelet")), - social_value = fct_collapse(assigned.treatment, control = c("control", "calendar")), - sms.treatment.2 = fct_recode(sms.treatment.2, control = "sms.control")) %>% - add_count(cluster.id, name = "cluster_pop_size") %>% - add_count(cluster.id, village, name = "village_pop_size") - -all_ate <- get_dyn_ate() - # bind_rows(Busia = ., Kakamega = ., Siaya = ., .id = "stratum") - -if (script_options$no_private_value_interact %||% FALSE) { - treatment_formula <- ~ (private_value + social_value * dist.pot.group) * phone_owner -} else { - treatment_formula <- ~ (private_value + social_value) * dist.pot.group * phone_owner -} - -if (script_options$separate_private_value %||% FALSE) { - all_ate %<>% - mutate( - private_value_left = if_else(social_value_left == "bracelet", "control", private_value_left), - private_value_right = if_else(social_value_right == "bracelet", "control", private_value_right), - incentive_shift_left = if_else(signal_observed_left == "bracelet", "control", incentive_shift_left), - incentive_shift_right = if_else(signal_observed_right == "bracelet", "control", incentive_shift_right)) -} - -treatment_map_var <- c("private_value", "social_value", "dist.pot.group") -base_treatment_map_filter <- . %>% - filter(private_value == "control" | social_value != "ink") -treatment_map_filter <- base_treatment_map_filter - -exclude_pars <- NA - -if (script_options$beliefs) { - exclude_pars <- c("obs_recognized_intercept_raw", "obs_1ord_beta_raw", "obs_2ord_beta_raw", "obs_recognized_intercept", "obs_1ord_beta", "obs_2ord_beta", "recognized_latent_var", - "rep_know_table_A_prop_recognized", "degree", "beliefs_1ord_latent_var", "beliefs_2ord_latent_var", - "rep_know_table_A_1ord_prop_know", "beliefs_1ord_prop_know", - "rep_know_table_A_2ord_prop_know", "beliefs_2ord_prop_know") -} - -if (script_options$dynamic %||% FALSE) { - if (script_options$sms_control_only %||% FALSE) { - stan_analysis_data %<>% - filter(!name_matched | script_options$include_name_matched, - sms.treatment.2 == "control") - - if (!is_null(script_options) && script_options$include_name_matched) { - subgroups %<>% c("name_matched") - - # static_treatment_map <- stan_analysis_data %>% - # data_grid(private_value, social_value, dist.pot.group, phone_owner, name_matched) %>% - # filter(private_value == "control" | social_value != "ink") %>% - # prepare_treatment_map() - - treatment_formula %<>% - update.formula(~ . * name_matched) - } else { - # static_treatment_map <- stan_analysis_data %>% - # data_grid(private_value, social_value, dist.pot.group, phone_owner) %>% - # filter(private_value == "control" | social_value != "ink") %>% - # prepare_treatment_map() - - all_ate %<>% - filter(!name_matched) %>% - select(-name_matched) %>% - filter(sms.treatment.2_left == "control", sms.treatment.2_right == "control") %>% - select(-starts_with("reminder_info_stock"), -starts_with("sms.treatment")) %>% - distinct() - } - } else { - stan_analysis_data %<>% - filter(!name_matched | ((script_options$include_name_matched %||% TRUE) & sms.treatment.2 == "control")) - - if (script_options$include_name_matched %||% TRUE) { - subgroups %<>% c("name_matched") - - treatment_map_filter <- . %>% - filter(sms.treatment.2 == "control" | phone_owner, - !name_matched | sms.treatment.2 == "control", - sms.treatment.2 != "reminder.only" | (private_value == "control" & social_value == "control")) %>% - base_treatment_map_filter() - - treatment_formula %<>% - update.formula(~ . * name_matched) - } else { - treatment_map_filter <- . %>% - filter(sms.treatment.2 == "control" | phone_owner, - sms.treatment.2 != "reminder.only" | (private_value == "control" & social_value == "control")) %>% - base_treatment_map_filter() - - all_ate %<>% - filter(!name_matched) %>% - select(-name_matched) - } - - treatment_map_var %<>% c("sms.treatment.2") - - treatment_formula %<>% - update.formula(~ . + (social_value * dist.pot.group) : sms.treatment.2 + sms.treatment.2) - } -} else { # STATIC - all_ate %<>% - select(-starts_with("reminder_info_stock"), -starts_with("signal_observed"), -starts_with("incentive_shift"), -starts_with("dyn_dist_pot")) - - if (script_options$sms_control_only %||% FALSE) { - stan_analysis_data %<>% - filter(!name_matched | script_options$include_name_matched, - sms.treatment.2 == "control") - - if (script_options$include_name_matched) { - subgroups %<>% c("name_matched") - - treatment_formula %<>% - update.formula(~ . * name_matched) - } else { - all_ate %<>% - filter(!name_matched) %>% - select(-name_matched) - } - - all_ate %<>% - filter(sms.treatment.2_left == "control", sms.treatment.2_right == "control") %>% - select(-starts_with("sms.treatment")) - } else { - stan_analysis_data %<>% - filter(!name_matched | ((script_options$include_name_matched %||% TRUE) & sms.treatment.2 == "control")) - - if (script_options$include_name_matched %||% TRUE) { - subgroups %<>% c("name_matched") - - treatment_map_filter <- . %>% - filter(sms.treatment.2 == "control" | phone_owner, - !name_matched | sms.treatment.2 == "control", - sms.treatment.2 != "reminder.only" | (private_value == "control" & social_value == "control")) %>% - base_treatment_map_filter() - - treatment_formula %<>% - update.formula(~ . * name_matched) - } else { - treatment_map_filter <- . %>% - filter(sms.treatment.2 == "control" | phone_owner, - sms.treatment.2 != "reminder.only" | (private_value == "control" & social_value == "control")) %>% - base_treatment_map_filter() - - all_ate %<>% - filter(!name_matched) %>% - select(-name_matched) - } - - if (!script_options$beliefs) { - treatment_map_var %<>% c("sms.treatment.2") - - treatment_formula %<>% - update.formula(~ . + (social_value * dist.pot.group) : sms.treatment.2 + sms.treatment.2) - } else { - all_ate %<>% - filter(sms.treatment.2_left == "control", sms.treatment.2_right == "control") %>% - select(-starts_with("sms.treatment")) - - treatment_map_filter <- base_treatment_map_filter - } - } - - all_ate %<>% - distinct() -} - -# if (script_options$`use-census-covar` %||% TRUE) { -# treatment_formula %<>% -# update.formula(~ . + age.census_group * gender) -# -# subgroups %<>% c("age.census_group", "gender") -# -# all_ate %<>% -# merge(distinct(stan_analysis_data, age.census_group, gender), all = TRUE) -# } - -treatment_map_var %<>% c(subgroups) - -cat(str_interp("Design matrix variables: ${str_c(treatment_map_var, collapse = ', ')}\n")) - -static_treatment_map <- stan_analysis_data %>% - expand(!!!syms(treatment_map_var)) %>% - treatment_map_filter() %>% - prepare_treatment_map() - -# Generate Analysis Data -------------------------------------------------- - -param_stan_data <- prepare_bayesian_analysis_data( - stan_analysis_data, - wtp.data, - endline.know.table.data, - - prepared_treatment_maps = TRUE, - treatment_map = static_treatment_map, - - treatment_formula = treatment_formula, - subgroup_col = subgroups, - drop_intercept_from_dm = FALSE, - param_dynamics = script_options$dynamic %||% FALSE, - param_poly_order = 2, - - all_ate = all_ate, - - scale_sigma = 1, - cluster_scale_sigma = 0.25, - hyper_coef_sigma = 1, - hyper_intercept_sigma = 5, - private_value_dist_sigma = 0.01, - - lkj_df = 2, - - # dynamic_model = script_options$dynamic %||% FALSE, - - model_link_type = !((script_options$dynamic %||% FALSE) && script_options$gumbel), - - estimate_ate = 1, - replicate_outcomes = !(script_options$no_replicate %||% FALSE), - - model_levels = as.integer(script_options$model_levels %||% 3), - use_cluster_re = as.integer(script_options$use_cluster_re %||% 0), - use_cluster_identity_corr = as.integer(script_options$use_cluster_identity_corr %||% 0), - save_sp_estimates = as.integer(script_options$save_sp_estimates %||% 0), - - use_census_covar = as.integer(script_options$use_census_covar %||% TRUE) -) - -save(param_stan_data, script_options, - file = file.path(script_options$output_dir, "stan_analysis_data", str_interp("model_${fit_version}.RData"))) - -if (script_options$analysis_data_only) quit() - -# Initializer Factory ------------------------------------------------------------- - -gen_initializer <- function(stan_data_list, script_options = script_options) { - if (script_options$beliefs) { - function() { - lst(recognized_prop_alpha = 0.1, - recognized_prop_beta = 0.1) - } - - } else if (script_options$dynamic) { # DYNAMIC - function() { - init_lst <- lst( - # strata_beta_day1_corr_mat_non_phone = with(stan_data_list, rethinking::rlkjcorr(1, subgroup_treatment_col_sizes[1], lkj_df)), - # strata_beta_day1_corr_mat_phone = with(stan_data_list, rethinking::rlkjcorr(1, subgroup_treatment_col_sizes[2], lkj_df)), - # strata_beta_day1_L_corr_mat_non_phone = t(chol(strata_beta_day1_corr_mat_non_phone)), - # strata_beta_day1_L_corr_mat_phone = t(chol(strata_beta_day1_corr_mat_phone)), - - # hyper_beta_day1 = rep(0, stan_data_list$num_all_treatment_coef), - # hyper_beta_day1 = rnorm(stan_data_list$num_all_treatment_coef), - # hyper_baseline_dyn_effect = rep(0, stan_data_list$num_deworming_days - 1), - # hyper_baseline_dyn_effect = rnorm(stan_data_list$num_deworming_days - 1), - # hyper_treat_beta_dyn_effect = rep(0, stan_data_list$num_param_dyn_coef), - strata_beta_tau = with(stan_data_list, pmax(0.001, rnorm(num_all_treatment_coef, sd = 0.5))), - strata_baseline_dyn_effect_raw = with(stan_data_list, matrix(rnorm((num_deworming_days - 1) * num_strata, sd = 0.5), nrow = num_deworming_days - 1, ncol = num_strata)), - # QR_strata_beta_day1 = with(stan_data_list, matrix(rnorm(num_all_treatment_coef * num_strata, sd = 0.005), nrow = num_all_treatment_coef, ncol = num_strata)), - QR_strata_beta_dyn_effect = with(stan_data_list, matrix(rnorm(num_param_dyn_coef * num_strata, sd = 0.005), nrow = num_param_dyn_coef, ncol = num_strata)), - strata_beta_corr_mat = with(stan_data_list, rethinking::rlkjcorr(1, num_all_treatment_coef, lkj_df)), - strata_beta_L_corr_mat = t(chol(strata_beta_corr_mat)) - ) - - if (script_options$model_levels > 2) { - init_lst %<>% - update_list(cluster_beta = with(stan_data_list, matrix(rnorm(num_all_treatment_coef * num_clusters, sd = 0.01), - nrow = num_all_treatment_coef, ncol = num_clusters))) - } - } - } else { # STATIC - if (script_options$model_levels > 2) { - function() { - lst(cluster_beta = with(stan_data_list, matrix(rnorm(num_all_treatment_coef * num_clusters, sd = 0.01), - nrow = num_all_treatment_coef, ncol = num_clusters)), - strata_beta_corr_mat = with(stan_data_list, diag(rep(1, num_all_treatment_coef))), - strata_beta_L_corr_mat = t(chol(strata_beta_corr_mat)), - strata_beta_tau = with(stan_data_list, pmax(0.001, rnorm(num_all_treatment_coef, sd = 0.5)))) - } - } else return("random") - } -} - -# Run --------------------------------------------------------------------- - -num_chains <- as.integer(script_options$num_chains) - -if (num_chains > parallel::detectCores()) stop("Not enough cores.") - -options(mc.cores = num_chains) -rstan_options(auto_write = TRUE) - -cat(str_interp("Output name: ${fit_version}\n")) - -if (script_options$wtp) { - model_param <- stan_model(file = file.path("stan_models", "wtp_model.stan"), model_name = "wtp_model") -} else if (script_options$beliefs) { - model_param <- stan_model(file = file.path("stan_models", "secobeliefs.stan"), model_name = "beliefs") -} else { - model_param <- stan_model(file = file.path("stan_models", "takeup_model_5_param.stan"), model_name = "model_5_param") - - cat(str_interp("Running model with ${param_stan_data$model_levels} levels.\n")) -} - -model_fit <- param_stan_data %>% - sampling(model_param, - data = ., - chains = num_chains, - iter = as.integer(script_options$num_iterations), - control = lst(max_treedepth = as.integer(script_options$max_treedepth), - adapt_delta = as.numeric(script_options$adapt_delta)), - init = gen_initializer(., script_options), - include = FALSE, - pars = exclude_pars, - sample_file = file.path(script_options$output_dir, "stanfit", str_interp("model_${fit_version}.csv"))) - -write_rds(model_fit, file.path(script_options$output_dir, "stanfit", str_interp("model_${fit_version}.rds"))) - -cat(str_interp("Sampling complete for ${fit_version}.\n"))