diff --git a/archive/bimodal_v_dist.Rmd b/archive/bimodal_v_dist.Rmd
new file mode 100644
index 00000000..dfb39330
--- /dev/null
+++ b/archive/bimodal_v_dist.Rmd
@@ -0,0 +1,148 @@
+---
+title: "Bimodal Prosocial Distribution"
+output: 
+  pdf_document:
+    number_sections: yes
+    fig_caption: yes
+    keep_tex: no
+    includes:
+      in_header: takeup_workingpaper_header.sty
+---
+
+```{r setup, include=FALSE}
+library(magrittr)
+library(tidyverse)
+library(EnvStats)
+library(nleqslv)
+
+ggplot2::theme_set(theme_minimal())
+
+knitr::opts_chunk$set(echo = FALSE)
+```
+
+\begin{align*}
+\Delta[v^*] = \int_{v^*}^\infty v \frac{f(v)}{1-F(v^*)}\,\textrm{d}v - \int_{-\infty}^{v^*} v \frac{f(v)}{F(v^*)}\,\textrm{d}v 
+\end{align*}
+
+```{r functions, echo=TRUE}
+calculate_delta <- function(v_cutoff, mean1, mean2, sd1, sd2, p.mix = 0.5) {
+  F_v <- pnormMix(v_cutoff, mean1 = mean1, mean2 = mean2, sd1 = sd1, sd2 = sd2, p.mix = p.mix) 
+  
+  delta_part <- function(v) v * dnormMix(v, mean1 = mean1, mean2 = mean2, sd1 = sd1, sd2 = sd2, p.mix = p.mix) 
+  
+  e_honor <- integrate(delta_part, lower = v_cutoff, upper = Inf)$value / (1 - F_v)
+  e_stigma <- integrate(delta_part, lower = -Inf, upper = v_cutoff)$value / F_v 
+
+  e_honor - e_stigma  
+} 
+
+generate_v_cutoff_fixedpoint <- function(b, mu, ...) {
+  function(v_cutoff) {
+    v_cutoff + b + mu * calculate_delta(v_cutoff, ...)
+  }
+}
+
+generate_equil_sim <- function(mean1, mean2, sd1, sd2, p.mix, nb_min = -2, nb_max = 2) {
+  expand.grid(
+    net_benefit = seq(nb_min, nb_max, 0.1),
+    mu = seq(0.0, 1.0, 0.05)
+  ) %>%
+    rowwise() %>% 
+    mutate(
+      equilibrium_info = list(
+        nleqslv(
+          net_benefit, 
+          generate_v_cutoff_fixedpoint(
+            net_benefit, mu, mean1 = mean1, mean2 = mean2, sd1 = sd1, sd2 = sd2, p.mix = p.mix
+          )
+        )
+      ),
+      v_cutoff = equilibrium_info$x,
+      prob = 1 - pnormMix(v_cutoff, mean1 = mean1, mean2 = mean2, sd1 = sd1, sd2 = sd2, p.mix = p.mix),
+      term_code = equilibrium_info$termcd,
+      delta = calculate_delta(v_cutoff, mean1 = mean1, mean2 = mean2, sd1 = sd1, sd2 = sd2, p.mix = p.mix)
+    ) %>% 
+    ungroup()
+}
+
+generate_delta_sim <- function(mean1, mean2, sd1, sd2, p.mix) {
+  expand.grid(
+    v_cutoff = seq(-1.5, 1.5, 0.1)
+  ) %>%
+    rowwise() %>% 
+    mutate(
+      delta = calculate_delta(v_cutoff, mean1 = mean1, mean2 = mean2, sd1 = sd1, sd2 = sd2, p.mix = p.mix)
+    ) %>% 
+    ungroup()
+}
+```
+
+```{r compare-pdfs}
+tibble(
+  x = seq(-1.75, 1.75, 0.1),
+  pdf_bi = dnormMix(x, mean1 = -0.75, sd1 = 0.4375, mean2 = 0.75, sd2 = 0.4375, p.mix = 0.5),
+  cdf_bi = pnormMix(x, mean1 = -0.75, sd1 = 0.4375, mean2 = 0.75, sd2 = 0.4375, p.mix = 0.5),
+  pdf_uni = dnorm(x),
+  cdf_uni = pnorm(x),
+) %>%
+  pivot_longer(-x, names_pattern = r"{([pc]df)_(uni|bi)}", names_to = c(".value", "v_type")) %>% 
+  {
+  cowplot::plot_grid(
+    ggplot(., aes(x, pdf, color = v_type)) +
+      geom_line(show.legend = TRUE) +
+      scale_color_discrete("", labels = c("bi" = "bimodal", "uni" = "unimodal")) +
+      labs(
+        title = "Density",
+        x = "", y = ""
+      )
+    
+    # ggplot(., aes(x, cdf, color = v_type)) +
+    #   geom_line()
+  )
+} 
+```
+
+```{r}
+delta_sim <- bind_rows(
+  bi = generate_delta_sim(-0.75, 0.75, 0.4375, 0.4375, 0.5),
+  uni = generate_delta_sim(0, 0, 1, 1, 0.0),
+  
+  .id = "v_type"
+)
+
+delta_sim %>% 
+  ggplot(aes(v_cutoff, delta)) +
+  geom_line(aes(color = v_type)) +
+  labs(title = "Delta") +
+  scale_color_discrete("", labels = c("bi" = "bimodal", "uni" = "unimodal")) +
+  NULL
+```
+
+```{r, cache=TRUE}
+equil_sim <- bind_rows(
+  bi = generate_equil_sim(-0.75, 0.75, 0.4375, 0.4375, 0.5, nb_min = -3),
+  uni = generate_equil_sim(0, 0, 1, 1, 0.0, nb_min = -2.5),
+  
+  .id = "v_type"
+)
+
+equil_sim %>% 
+  ggplot(aes(net_benefit, v_cutoff)) +
+  geom_point(, data = . %>% filter(term_code != 1)) +
+  geom_line(aes(group = mu, color = mu), data = . %>% filter(term_code == 1)) +
+  labs(title = "Equilibrium cutoff") +
+  coord_cartesian(ylim = c(-3, 3)) +
+  facet_wrap(vars(v_type)) +
+  theme(legend.position = "bottom")
+
+equil_sim %>% 
+  ggplot(aes(net_benefit, prob)) +
+  # geom_point(, data = . %>% filter(term_code != 1)) +
+  geom_line(aes(group = mu, color = mu), data = . %>% filter(term_code == 1)) +
+  labs(title = "Equilibirum probability of take-up") +
+  coord_cartesian(ylim = c(0, 1)) +
+  facet_wrap(vars(v_type)) +
+  theme(legend.position = "bottom")
+
+```
+
diff --git a/archive/equil_sim_test.R b/archive/equil_sim_test.R
new file mode 100644
index 00000000..807835da
--- /dev/null
+++ b/archive/equil_sim_test.R
@@ -0,0 +1,101 @@
+library(magrittr)
+library(tidyverse)
+library(EnvStats)
+
+ggplot2::theme_set(theme_minimal())
+
+calculate_delta <- function(v_cutoff, mean1, mean2, sd1, sd2, p.mix = 0.5) {
+  F_v <- pnormMix(v_cutoff, mean1 = mean1, mean2 = mean2, sd1 = sd1, sd2 = sd2, p.mix = p.mix) 
+  
+  delta_part <- function(v) v * dnormMix(v, mean1 = mean1, mean2 = mean2, sd1 = sd1, sd2 = sd2, p.mix = p.mix) 
+  
+  e_honor <- integrate(delta_part, lower = v_cutoff, upper = Inf)$value / (1 - F_v)
+  e_stigma <- integrate(delta_part, lower = -Inf, upper = v_cutoff)$value / F_v 
+
+  e_honor - e_stigma  
+} 
+
+generate_v_cutoff_fixedpoint <- function(b, mu, ...) {
+  function(v_cutoff) {
+    v_cutoff + b + mu * calculate_delta(v_cutoff, ...)
+  }
+}
+
+generate_equil_sim <- function(mean1, mean2, sd1, sd2, p.mix, nb_min = -2, nb_max = 2) {
+  expand.grid(
+    net_benefit = seq(nb_min, nb_max, 0.1),
+    mu = seq(0.0, 1.0, 0.05)
+  ) %>%
+    rowwise() %>% 
+    mutate(
+      equilibrium_info = list(nleqslv(net_benefit, generate_v_cutoff_fixedpoint(net_benefit, mu, mean1 = mean1, mean2 = mean2, sd1 = sd1, sd2 = sd2, p.mix = p.mix))),
+      v_cutoff = equilibrium_info$x,
+      prob = 1 - pnormMix(v_cutoff, mean1 = mean1, mean2 = mean2, sd1 = sd1, sd2 = sd2, p.mix = p.mix),
+      term_code = equilibrium_info$termcd,
+      delta = calculate_delta(v_cutoff, mean1 = mean1, mean2 = mean2, sd1 = sd1, sd2 = sd2, p.mix = p.mix)
+    ) %>% 
+    ungroup()
+}
+
+generate_delta_sim <- function(mean1, mean2, sd1, sd2, p.mix) {
+  expand.grid(
+    v_cutoff = seq(-1.5, 1.5, 0.1)
+  ) %>%
+    rowwise() %>% 
+    mutate(
+      delta = calculate_delta(v_cutoff, mean1 = mean1, mean2 = mean2, sd1 = sd1, sd2 = sd2, p.mix = p.mix)
+    ) %>% 
+    ungroup()
+}
+
+tibble(
+  x = seq(-1.75, 1.75, 0.1),
+  pdf_bi = dnormMix(x, mean1 = -0.75, sd1 = 0.4375, mean2 = 0.75, sd2 = 0.4375, p.mix = 0.5),
+  cdf_bi = pnormMix(x, mean1 = -0.75, sd1 = 0.4375, mean2 = 0.75, sd2 = 0.4375, p.mix = 0.5),
+  pdf_uni = dnorm(x),
+  cdf_uni = pnorm(x),
+) %>%
+  pivot_longer(-x, names_pattern = r"{([pc]df)_(uni|bi)}", names_to = c(".value", "v_type")) %>% 
+  {
+  cowplot::plot_grid(
+    ggplot(., aes(x, pdf, color = v_type)) +
+      geom_line(show.legend = TRUE)
+    
+    # ggplot(., aes(x, cdf, color = v_type)) +
+    #   geom_line()
+  )
+} 
+
+equil_sim <- bind_rows(
+  bi = generate_equil_sim(-0.75, 0.75, 0.4375, 0.4375, 0.5, nb_min = -3),
+  uni = generate_equil_sim(0, 0, 1, 1, 0.0, nb_min = -2.5),
+  
+  .id = "v_type"
+)
+
+equil_sim %>% 
+  ggplot(aes(net_benefit, v_cutoff)) +
+  geom_point(, data = . %>% filter(term_code != 1)) +
+  geom_line(aes(group = mu, color = mu), data = . %>% filter(term_code == 1)) +
+  coord_cartesian(ylim = c(-3, 3)) +
+  facet_wrap(vars(v_type)) +
+  theme(legend.position = "bottom")
+
+equil_sim %>% 
+  ggplot(aes(net_benefit, prob)) +
+  # geom_point(, data = . %>% filter(term_code != 1)) +
+  geom_line(aes(group = mu, color = mu), data = . %>% filter(term_code == 1)) +
+  coord_cartesian(ylim = c(0, 1)) +
+  facet_wrap(vars(v_type)) +
+  theme(legend.position = "bottom")
+
+delta_sim <- bind_rows(
+  bi = generate_delta_sim(-0.75, 0.75, 0.4375, 0.4375, 0.5),
+  uni = generate_delta_sim(0, 0, 1, 1, 0.0),
+  
+  .id = "v_type"
+)
+
+delta_sim %>% 
+  ggplot(aes(v_cutoff, delta)) +
+  geom_line(aes(color = v_type))
diff --git a/run_stan_dist_sim.R b/archive/run_stan_dist_sim.R
similarity index 100%
rename from run_stan_dist_sim.R
rename to archive/run_stan_dist_sim.R
diff --git a/run_takeup.R b/archive/run_takeup.R
similarity index 100%
rename from run_takeup.R
rename to archive/run_takeup.R
diff --git a/test_simul_dist_struct.R b/archive/test_simul_dist_struct.R
similarity index 100%
rename from test_simul_dist_struct.R
rename to archive/test_simul_dist_struct.R
diff --git a/rct-design-fieldwork/takeup_assigned_distance.qmd b/rct-design-fieldwork/takeup_assigned_distance.qmd
new file mode 100644
index 00000000..c2bc9bbb
--- /dev/null
+++ b/rct-design-fieldwork/takeup_assigned_distance.qmd
@@ -0,0 +1,211 @@
+---
+title: "TakeUp Distance Assignment"
+format: html
+editor: source 
+---
+
+```{r}
+#| label: setup
+
+library(magrittr)
+library(tidyverse)
+library(sf)
+library(here)
+library(broom)
+
+
+source(here("rct-design-fieldwork", "takeup_rct_assign_clusters.R"))
+source(here("analysis_util.R"))
+source(here("dist_structural_util.R"))
+
+theme_set(theme_minimal() + theme(legend.position = "bottom"))
+```
+
+```{r}
+#| label: load-geo-data
+
+wgs.84 <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
+kenya.proj4 <- "+proj=utm +zone=36 +south +ellps=clrk80 +units=m +no_defs"
+
+rct.schools.data <- read_rds(here("data", "takeup_rct_schools.rds"))
+load(here("data", "takeup_village_pot_dist.RData"))
+load(here("data", "analysis.RData"))
+
+schools <- st_as_sf(rct.schools.data, crs = wgs.84)
+villages <- village.centers %>% 
+  st_as_sf(coords = c("lon", "lat"), crs = wgs.84)
+```
+
+```{r}
+#| fig-width: 8
+
+ggplot(schools) +
+  geom_sf(aes(color = "Schools"), size = 0.5) +
+  geom_sf(aes(color = "Villages"), data = villages) +
+  scale_color_manual("", values = c("Schools" = "black", "Villages" = "red"))
+```
+
+```{r}
+villages %<>% 
+  mutate(
+    dist_to_closest_school = st_transform(., kenya.proj4) %>% 
+      st_distance(st_transform(schools, kenya.proj4)) %>%
+      plyr::alply(1, min) %>% 
+      unlist()
+  )
+```
+
+```{r}
+villages %>% 
+  ggplot(aes(dist_to_closest_school)) +
+  geom_histogram(aes(y = stat(density)), binwidth = 100, alpha = 0.5) +
+  labs(x = "Distance to Closest School", y = "Density")
+
+villages %>% 
+  ggplot(aes(dist_to_closest_school)) +
+  geom_histogram(aes(y = stat(density), fill = dist.pot.group), position = "identity", binwidth = 100, alpha = 0.5) +
+  scale_fill_discrete("Distance Assignment", labels = str_to_title) +
+  labs(x = "Distance to Closest School", y = "Density")
+```
+
+
+
+## ed quick additions
+
+
+tldr: I check whether school observables change as distance to schools change, not in a particularly smart way. 
+It doesn't look like there's any relationship between a village's distance to a POT and average school characteristics. Then I try 
+Borusyak and Hull's (2022) thing.
+
+https://www.dropbox.com/s/ajjld1b8gw470yr/borusyak_hull_wp_dec21.pdf?raw=1
+
+```{r}
+
+# Find all schools within 1500 units of a village
+vill_school_pairs = villages %>% 
+  st_join(
+    x = .,
+    y = st_transform(schools, kenya.proj4),
+    join = st_is_within_distance, 
+    dist = 1500,
+    left = TRUE
+  ) 
+# Calculate all pairwise distances between every school and 
+# village (even those > 1500)
+all_vill_school_distances = st_distance(
+  villages, 
+  schools
+)
+# Calculate average distance from school, conditional on school < 1500
+expected_distance = apply(all_vill_school_distances, FUN = function(x){mean(x[x < 1500])}, MARGIN = 1)
+villages$expected_distance = expected_distance
+
+# Create set of numeric school variable to check for balance
+balance_vars = schools %>%
+  select_if(is.numeric) %>%
+  select(-contains("gps")) %>%
+  st_drop_geometry() %>%
+  colnames()
+
+check_observables = function(data, variable, balance_vars) {
+
+  balance_formulae = map(
+    balance_vars,
+    ~as.formula(paste0("`", .x,  "` ~ ", variable))
+  )
+
+  balance_fits = map2_dfr(
+    balance_formulae,
+    balance_vars,
+    ~lm(
+      .x,
+      data = data,
+      na.action = na.omit
+    ) %>% tidy()  %>% mutate(variable = .y)
+  ) %>%
+    filter(!str_detect(term, "Intercept"))
+    return(balance_fits)
+}
+
+
+dist_pot_check = check_observables(
+  vill_school_pairs,
+  variable = "log(dist.to.pot)",
+  balance_vars = balance_vars
+)
+
+dist_pot_group_check = check_observables(
+  vill_school_pairs,
+  variable = "dist.pot.group",
+  balance_vars = balance_vars
+)
+
+dist_closest_school_check = check_observables(
+  vill_school_pairs,
+  variable = "log(dist_to_closest_school)",
+  balance_vars = balance_vars
+)
+
+dist_pot_weight_check = check_observables(
+  vill_school_pairs,
+  variable = "log(dist.to.pot) + log(expected_distance)",
+  balance_vars = balance_vars
+)
+
+dist_checks = bind_rows(
+  dist_closest_school_check,
+  dist_pot_group_check,
+  dist_pot_check,
+  dist_pot_weight_check %>%
+    filter(term == "log(dist.to.pot)") %>%
+    mutate(term = str_c(term, " reweighted"))
+) %>%
+  filter(!is.nan(p.value)) 
+
+
+n_signif_diff_after_adjust = dist_checks %>%
+  group_by(term) %>%
+  mutate(p_adj = p.adjust(p.value, method = "BH")) %>%
+  filter(p_adj < 0.05)
+
+
+
+dist_checks %>%
+  ggplot(aes( 
+    sample = p.value,
+    colour = term
+  )) +
+  stat_qq(distribution = stats::qunif,
+          alpha = 1, geom = "line") +
+  stat_qq_line(distribution = stats::qunif,
+               colour = "black",linetype = "longdash") +
+  theme_bw() +
+  labs(
+      x = "Theoretical Quantile", 
+      y = "Realised Quantile",
+      title = "Under Set Null, P Values ~ U(0,1)"
+  )
+
+```
+
+
+
+Borusyak and Hull (2022) wouldn't use variation in continuous distance to POT but the variation in the difference between expected and realised distance to POT. Whilst 
+close and far is generated randomly, distance to POT has a non-random component:
+
+
+$$
+dist = \underbrace{I[\text{close} = 1]}_{\text{exog}} \underbrace{\text{km to close school}}_{endog} + (1 - I_\text{close}) \text{km to far school}
+$$
+
+
+By permuting through alternative, hypothetical RCT assignment draws they estimate $\mu$ the expected distance to a close school and 
+the expected distance to a far school. This can either be controlled for directly or used as a generated instrument:
+
+$$
+Z = dist - \mu
+$$
+
+This is essentially using the difference between the expected distance and realised distance as an instrument - since the actual treatment assignment was chosen randomly from a set of hypothetical draws, this variation is as good as random. They argue this has improved 
+power properties too, so even though it looks like distance is orthog to school characteristics might still be useful here.
+
diff --git a/renv.lock b/renv.lock
index 0d9f5b86..8ffd9218 100644
--- a/renv.lock
+++ b/renv.lock
@@ -1,6 +1,6 @@
 {
   "R": {
-    "Version": "4.2.1",
+    "Version": "4.2.0",
     "Repositories": [
       {
         "Name": "CRAN",
@@ -29,6 +29,18 @@
       "Hash": "dcd1743af4336156873e3ce3c950b8b9",
       "Requirements": []
     },
+    "EnvStats": {
+      "Package": "EnvStats",
+      "Version": "2.7.0",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Hash": "3c75601e880b3cc9c47b14a7caf8f76f",
+      "Requirements": [
+        "MASS",
+        "ggplot2",
+        "nortest"
+      ]
+    },
     "Formula": {
       "Package": "Formula",
       "Version": "1.2-4",
@@ -223,14 +235,6 @@
         "sp"
       ]
     },
-    "Rmpi": {
-      "Package": "Rmpi",
-      "Version": "0.6-9.2",
-      "Source": "Repository",
-      "Repository": "CRAN",
-      "Hash": "57b2b6cfd02d6b5b69f6167e1cac38cd",
-      "Requirements": []
-    },
     "Rttf2pt1": {
       "Package": "Rttf2pt1",
       "Version": "1.3.10",
@@ -1269,6 +1273,16 @@
         "vctrs"
       ]
     },
+    "here": {
+      "Package": "here",
+      "Version": "1.0.1",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Hash": "24b224366f9c2e7534d2344d10d59211",
+      "Requirements": [
+        "rprojroot"
+      ]
+    },
     "highr": {
       "Package": "highr",
       "Version": "0.9",
@@ -1679,14 +1693,6 @@
         "colorspace"
       ]
     },
-    "mvtnorm": {
-      "Package": "mvtnorm",
-      "Version": "1.1-3",
-      "Source": "Repository",
-      "Repository": "CRAN",
-      "Hash": "7a7541cc284cb2ba3ba7eae645892af5",
-      "Requirements": []
-    },
     "nleqslv": {
       "Package": "nleqslv",
       "Version": "3.3.2",
@@ -1723,6 +1729,14 @@
       "Hash": "cb1d8d9f300a7e536b89c8a88c53f610",
       "Requirements": []
     },
+    "nortest": {
+      "Package": "nortest",
+      "Version": "1.0-4",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Hash": "e587e7a30c737ad415590976481332e4",
+      "Requirements": []
+    },
     "numDeriv": {
       "Package": "numDeriv",
       "Version": "2016.8-1.1",
@@ -2146,27 +2160,6 @@
         "stringr"
       ]
     },
-    "rethinking": {
-      "Package": "rethinking",
-      "Version": "2.21",
-      "Source": "GitHub",
-      "RemoteType": "github",
-      "RemoteHost": "api.github.com",
-      "RemoteRepo": "rethinking",
-      "RemoteUsername": "rmcelreath",
-      "RemoteRef": "HEAD",
-      "RemoteSha": "783d111a2c70c534ee9e59ae024800290cc5ac48",
-      "Hash": "6d8dc80f036cb8252eb79cac0e21197a",
-      "Requirements": [
-        "MASS",
-        "cmdstanr",
-        "coda",
-        "loo",
-        "mvtnorm",
-        "rstan",
-        "shape"
-      ]
-    },
     "rgdal": {
       "Package": "rgdal",
       "Version": "1.5-32",
@@ -2360,14 +2353,6 @@
         "units"
       ]
     },
-    "shape": {
-      "Package": "shape",
-      "Version": "1.4.6",
-      "Source": "Repository",
-      "Repository": "CRAN",
-      "Hash": "9067f962730f58b14d8ae54ca885509f",
-      "Requirements": []
-    },
     "sp": {
       "Package": "sp",
       "Version": "1.4-7",
diff --git a/run_stan.R b/run_stan.R
deleted file mode 100644
index fd9ef2a7..00000000
--- a/run_stan.R
+++ /dev/null
@@ -1,373 +0,0 @@
-#!/usr/bin/Rscript
-
-# Setup -------------------------------------------------------------------
-
-script_options <- docopt::docopt(sprintf(
-"Usage:
-  run_stan wtp [--analysis-data-only |[--num-chains=<num-chains> --num-iterations=<iterations> --adapt-delta=<adapt-delta> --max-treedepth=<max-treedepth>]] [--output-name=<output-name> --output-dir=<output-dir>]
-  run_stan beliefs [--analysis-data-only |[--num-chains=<num-chains> --num-iterations=<iterations> --adapt-delta=<adapt-delta> --max-treedepth=<max-treedepth>]] [--output-name=<output-name> --output-dir=<output-dir>]
-  run_stan dynamic [--analysis-data-only |[--gumbel --num-chains=<num-chains> --num-iterations=<iterations> --adapt-delta=<adapt-delta> --max-treedepth=<max-treedepth> --include-latent-var-data --save-sp-estimates]] [--separate-private-value --sms-control-only --include-name-matched --no-private-value-interact --model-levels=<model-levels> --use-cluster-re --output-name=<output-name> --output-dir=<output-dir>]
-  run_stan static [--analysis-data-only |[--num-chains=<num-chains> --num-iterations=<iterations> --adapt-delta=<adapt-delta> --max-treedepth=<max-treedepth>]] [--separate-private-value --sms-control-only --include-name-matched --no-private-value-interact --model-levels=<model-levels> --use-cluster-identity-corr --use-cluster-re --use-census-covar --output-name=<output-name> --output-dir=<output-dir>]
-
- Options:
-  --num-chains=<num-chains>, -c <num-chains>  Number of Stan chains [default: 4]
-  --num-iterations=<iterations>, -i <iterations>  Number of sampling iterations [default: 300]
-  --adapt-delta=<adapt-delta>, -d <adapt-delta>  Stan control adapt_delta [default: 0.8]
-  --max-treedepth=<max-treedepth>, -t <max-treedepth>  Stan control max_treedepth [default: 15]
-  --output-name=<output-name>, -o <output-name>  Name to use in stanfit .csv files and analysis data .RData file [default: param]
-  --output-dir=<output-dir>, -p <output-dir>  Directory analysis data and stanfit output will be stored [default: %s/data]
-  --analysis-data-only  Don't run sampling, just produce analysis data
-  --sms-control-only  Exclude SMS treatment data
-  --separate-private-value  Use separate private values for calendars and bracelets
-  --include-name-matched  Include unmonitored sample (name matched against census)
-  --no-private-value-interact  Allow private value to interact with distance
-  --model-levels=<model-levels>  How deep is the multilevel model [default: 3]
-  --use-cluster-re  Use cluster level random effects (if model levels <= 2)
-  --use-cluster-identity-corr  No correlation estimation for cluster level parameters
-  --use-census-covar  Control for census covariates
-  --save-sp-estimates  Calcuate superpopulation ATE 
-  --no-replicate  Do not generate outcome replication
-  --gumbel, -g  Gumbel link
-  --include-latent-var-data, -l  Save latent variable while sampling", getwd()),
-
-  args = if (interactive()) "beliefs -c 8 -i 4000" else commandArgs(trailingOnly = TRUE) 
-)
-
-library(magrittr)
-library(plyr)
-library(forcats)
-library(broom)
-library(tidyverse)
-library(modelr)
-library(rstan)
-
-source("analysis_util.R")
-
-fit_version <- script_options$output_name 
-
-# Load Data ---------------------------------------------------------------
-
-load(file.path("data", "analysis.RData"))
-
-# Setup for Analysis Data -----------------------------------------------------------
-
-subgroups <- "phone_owner"
-
-stan_analysis_data <- analysis.data %>%
-  mutate(private_value = fct_collapse(assigned.treatment, 
-                                      control = if (!is_null(script_options) && script_options$separate_private_value) c("control", "ink", "bracelet") else c("control", "ink"),
-                                      calendar = if (!is_null(script_options) && script_options$separate_private_value) "calendar" else c("calendar", "bracelet")),
-         social_value = fct_collapse(assigned.treatment, control = c("control", "calendar")),
-         sms.treatment.2 = fct_recode(sms.treatment.2, control = "sms.control")) %>% 
-  add_count(cluster.id, name = "cluster_pop_size") %>% 
-  add_count(cluster.id, village, name = "village_pop_size") 
-
-all_ate <- get_dyn_ate() 
-  # bind_rows(Busia = ., Kakamega = ., Siaya = ., .id = "stratum")
-
-if (script_options$no_private_value_interact %||% FALSE) {
-  treatment_formula <- ~ (private_value + social_value * dist.pot.group) * phone_owner
-} else {
-  treatment_formula <- ~ (private_value + social_value) * dist.pot.group * phone_owner
-}
-
-if (script_options$separate_private_value %||% FALSE) {
-  all_ate %<>% 
-    mutate(
-      private_value_left = if_else(social_value_left == "bracelet", "control", private_value_left),
-      private_value_right = if_else(social_value_right == "bracelet", "control", private_value_right),
-      incentive_shift_left = if_else(signal_observed_left == "bracelet", "control", incentive_shift_left),
-      incentive_shift_right = if_else(signal_observed_right == "bracelet", "control", incentive_shift_right))
-}
-
-treatment_map_var <- c("private_value", "social_value", "dist.pot.group")
-base_treatment_map_filter <- . %>% 
-  filter(private_value == "control" | social_value != "ink") 
-treatment_map_filter <- base_treatment_map_filter
-
-exclude_pars <- NA
-
-if (script_options$beliefs) {
-  exclude_pars <- c("obs_recognized_intercept_raw", "obs_1ord_beta_raw", "obs_2ord_beta_raw", "obs_recognized_intercept", "obs_1ord_beta", "obs_2ord_beta", "recognized_latent_var",
-                    "rep_know_table_A_prop_recognized", "degree", "beliefs_1ord_latent_var", "beliefs_2ord_latent_var", 
-                    "rep_know_table_A_1ord_prop_know", "beliefs_1ord_prop_know",
-                    "rep_know_table_A_2ord_prop_know", "beliefs_2ord_prop_know")
-}
-
-if (script_options$dynamic %||% FALSE) {
-  if (script_options$sms_control_only %||% FALSE) {
-    stan_analysis_data %<>% 
-      filter(!name_matched | script_options$include_name_matched, 
-             sms.treatment.2 == "control") 
-    
-    if (!is_null(script_options) && script_options$include_name_matched) {
-      subgroups %<>% c("name_matched")
-      
-      # static_treatment_map <- stan_analysis_data %>% 
-      #   data_grid(private_value, social_value, dist.pot.group, phone_owner, name_matched) %>% 
-      #   filter(private_value == "control" | social_value != "ink") %>%
-      #   prepare_treatment_map()
-      
-      treatment_formula %<>% 
-        update.formula(~ . * name_matched)
-    } else {
-      # static_treatment_map <- stan_analysis_data %>% 
-      #   data_grid(private_value, social_value, dist.pot.group, phone_owner) %>% 
-      #   filter(private_value == "control" | social_value != "ink") %>%
-      #   prepare_treatment_map()
-      
-       all_ate %<>% 
-        filter(!name_matched) %>% 
-        select(-name_matched) %>% 
-        filter(sms.treatment.2_left == "control", sms.treatment.2_right == "control") %>% 
-        select(-starts_with("reminder_info_stock"), -starts_with("sms.treatment")) %>% 
-        distinct()
-    }
-  } else {
-    stan_analysis_data %<>% 
-      filter(!name_matched | ((script_options$include_name_matched %||% TRUE) & sms.treatment.2 == "control")) 
-    
-    if (script_options$include_name_matched %||% TRUE) {
-      subgroups %<>% c("name_matched")
-      
-      treatment_map_filter <- . %>% 
-        filter(sms.treatment.2 == "control" | phone_owner,
-               !name_matched | sms.treatment.2 == "control",
-               sms.treatment.2 != "reminder.only" | (private_value == "control" & social_value == "control")) %>% 
-        base_treatment_map_filter() 
-      
-      treatment_formula %<>% 
-        update.formula(~ . * name_matched)
-    } else {
-      treatment_map_filter <- . %>% 
-        filter(sms.treatment.2 == "control" | phone_owner,
-               sms.treatment.2 != "reminder.only" | (private_value == "control" & social_value == "control")) %>% 
-        base_treatment_map_filter() 
-      
-       all_ate %<>% 
-        filter(!name_matched) %>% 
-        select(-name_matched) 
-    }
-    
-    treatment_map_var %<>% c("sms.treatment.2")
-    
-    treatment_formula %<>% 
-      update.formula(~ . + (social_value * dist.pot.group) : sms.treatment.2 + sms.treatment.2)
-  }
-} else { # STATIC
-  all_ate %<>% 
-    select(-starts_with("reminder_info_stock"), -starts_with("signal_observed"), -starts_with("incentive_shift"), -starts_with("dyn_dist_pot")) 
-  
-  if (script_options$sms_control_only %||% FALSE) {
-    stan_analysis_data %<>% 
-      filter(!name_matched | script_options$include_name_matched, 
-             sms.treatment.2 == "control") 
-  
-    if (script_options$include_name_matched) {
-      subgroups %<>% c("name_matched")
-      
-      treatment_formula %<>% 
-        update.formula(~ . * name_matched)
-    } else {
-       all_ate %<>% 
-        filter(!name_matched) %>% 
-        select(-name_matched) 
-    }
-    
-    all_ate %<>%
-      filter(sms.treatment.2_left == "control", sms.treatment.2_right == "control") %>% 
-      select(-starts_with("sms.treatment")) 
-  } else {
-    stan_analysis_data %<>% 
-      filter(!name_matched | ((script_options$include_name_matched %||% TRUE) & sms.treatment.2 == "control")) 
-  
-    if (script_options$include_name_matched %||% TRUE) {
-      subgroups %<>% c("name_matched")
-      
-      treatment_map_filter <- . %>% 
-        filter(sms.treatment.2 == "control" | phone_owner,
-               !name_matched | sms.treatment.2 == "control",
-               sms.treatment.2 != "reminder.only" | (private_value == "control" & social_value == "control")) %>% 
-        base_treatment_map_filter() 
-      
-      treatment_formula %<>% 
-        update.formula(~ . * name_matched)
-    } else {
-      treatment_map_filter <- . %>% 
-        filter(sms.treatment.2 == "control" | phone_owner,
-               sms.treatment.2 != "reminder.only" | (private_value == "control" & social_value == "control")) %>% 
-        base_treatment_map_filter() 
-      
-       all_ate %<>% 
-        filter(!name_matched) %>% 
-        select(-name_matched) 
-    }
-    
-    if (!script_options$beliefs) {
-      treatment_map_var %<>% c("sms.treatment.2")
-      
-      treatment_formula %<>% 
-        update.formula(~ . + (social_value * dist.pot.group) : sms.treatment.2 + sms.treatment.2)
-    } else {
-      all_ate %<>%
-        filter(sms.treatment.2_left == "control", sms.treatment.2_right == "control") %>% 
-        select(-starts_with("sms.treatment")) 
-      
-      treatment_map_filter <- base_treatment_map_filter 
-    }
-  }
-  
-  all_ate %<>% 
-    distinct()
-}
-
-# if (script_options$`use-census-covar` %||% TRUE) {
-#   treatment_formula %<>% 
-#     update.formula(~ . + age.census_group * gender)
-#   
-#   subgroups %<>% c("age.census_group", "gender")
-#   
-#   all_ate %<>% 
-#     merge(distinct(stan_analysis_data, age.census_group, gender), all = TRUE)
-# }
-
-treatment_map_var %<>% c(subgroups)
-
-cat(str_interp("Design matrix variables: ${str_c(treatment_map_var, collapse = ', ')}\n"))
-
-static_treatment_map <- stan_analysis_data %>% 
-  expand(!!!syms(treatment_map_var)) %>%
-  treatment_map_filter() %>% 
-  prepare_treatment_map()
-
-# Generate Analysis Data --------------------------------------------------
-
-param_stan_data <- prepare_bayesian_analysis_data(
-  stan_analysis_data,
-  wtp.data, 
-  endline.know.table.data,
-   
-  prepared_treatment_maps = TRUE, 
-  treatment_map = static_treatment_map,
-    
-  treatment_formula = treatment_formula,
-  subgroup_col = subgroups,
-  drop_intercept_from_dm = FALSE, 
-  param_dynamics = script_options$dynamic %||% FALSE,
-  param_poly_order = 2,
-  
-  all_ate = all_ate,
- 
-  scale_sigma = 1,
-  cluster_scale_sigma = 0.25,
-  hyper_coef_sigma = 1,
-  hyper_intercept_sigma = 5,
-  private_value_dist_sigma = 0.01,
-  
-  lkj_df = 2,
-  
-  # dynamic_model = script_options$dynamic %||% FALSE,
-  
-  model_link_type = !((script_options$dynamic %||% FALSE) && script_options$gumbel),
-  
-  estimate_ate = 1,
-  replicate_outcomes = !(script_options$no_replicate %||% FALSE),
-  
-  model_levels = as.integer(script_options$model_levels %||% 3),
-  use_cluster_re = as.integer(script_options$use_cluster_re %||% 0),
-  use_cluster_identity_corr = as.integer(script_options$use_cluster_identity_corr %||% 0),
-  save_sp_estimates = as.integer(script_options$save_sp_estimates %||% 0),
-  
-  use_census_covar = as.integer(script_options$use_census_covar %||% TRUE)
-)
-
-save(param_stan_data, script_options, 
-     file = file.path(script_options$output_dir, "stan_analysis_data", str_interp("model_${fit_version}.RData")))
-
-if (script_options$analysis_data_only) quit()
-
-# Initializer Factory -------------------------------------------------------------
-
-gen_initializer <- function(stan_data_list, script_options = script_options) {
-  if (script_options$beliefs) {
-    function() {
-      lst(recognized_prop_alpha = 0.1,
-          recognized_prop_beta = 0.1)
-    }
-    
-  } else if (script_options$dynamic) { # DYNAMIC
-    function() {
-      init_lst <- lst(
-        # strata_beta_day1_corr_mat_non_phone = with(stan_data_list, rethinking::rlkjcorr(1, subgroup_treatment_col_sizes[1], lkj_df)),
-        # strata_beta_day1_corr_mat_phone = with(stan_data_list, rethinking::rlkjcorr(1, subgroup_treatment_col_sizes[2], lkj_df)),
-        # strata_beta_day1_L_corr_mat_non_phone = t(chol(strata_beta_day1_corr_mat_non_phone)),
-        # strata_beta_day1_L_corr_mat_phone = t(chol(strata_beta_day1_corr_mat_phone)),
-        
-        # hyper_beta_day1 = rep(0, stan_data_list$num_all_treatment_coef),
-        # hyper_beta_day1 = rnorm(stan_data_list$num_all_treatment_coef),
-        # hyper_baseline_dyn_effect = rep(0, stan_data_list$num_deworming_days - 1),
-        # hyper_baseline_dyn_effect = rnorm(stan_data_list$num_deworming_days - 1),
-        # hyper_treat_beta_dyn_effect = rep(0, stan_data_list$num_param_dyn_coef),
-        strata_beta_tau = with(stan_data_list, pmax(0.001, rnorm(num_all_treatment_coef, sd = 0.5))), 
-        strata_baseline_dyn_effect_raw = with(stan_data_list, matrix(rnorm((num_deworming_days - 1) * num_strata, sd = 0.5), nrow = num_deworming_days - 1, ncol = num_strata)),
-        # QR_strata_beta_day1 = with(stan_data_list, matrix(rnorm(num_all_treatment_coef * num_strata, sd = 0.005), nrow = num_all_treatment_coef, ncol = num_strata)),
-        QR_strata_beta_dyn_effect = with(stan_data_list, matrix(rnorm(num_param_dyn_coef * num_strata, sd = 0.005), nrow = num_param_dyn_coef, ncol = num_strata)),
-        strata_beta_corr_mat = with(stan_data_list, rethinking::rlkjcorr(1, num_all_treatment_coef, lkj_df)),
-        strata_beta_L_corr_mat = t(chol(strata_beta_corr_mat))
-      )
-      
-      if (script_options$model_levels > 2) {
-        init_lst %<>% 
-          update_list(cluster_beta = with(stan_data_list, matrix(rnorm(num_all_treatment_coef * num_clusters, sd = 0.01), 
-                                                                 nrow = num_all_treatment_coef, ncol = num_clusters)))
-      }
-    }
-  } else { # STATIC
-    if (script_options$model_levels > 2) {
-      function() {
-        lst(cluster_beta = with(stan_data_list, matrix(rnorm(num_all_treatment_coef * num_clusters, sd = 0.01), 
-                                                       nrow = num_all_treatment_coef, ncol = num_clusters)),
-            strata_beta_corr_mat = with(stan_data_list, diag(rep(1, num_all_treatment_coef))),
-            strata_beta_L_corr_mat = t(chol(strata_beta_corr_mat)),
-            strata_beta_tau = with(stan_data_list, pmax(0.001, rnorm(num_all_treatment_coef, sd = 0.5)))) 
-      }
-    } else return("random")
-  }
-}
-
-# Run ---------------------------------------------------------------------
-
-num_chains <- as.integer(script_options$num_chains)
-
-if (num_chains > parallel::detectCores()) stop("Not enough cores.")
-
-options(mc.cores = num_chains)
-rstan_options(auto_write = TRUE)
-
-cat(str_interp("Output name: ${fit_version}\n"))
-
-if (script_options$wtp) {
-  model_param <- stan_model(file = file.path("stan_models", "wtp_model.stan"), model_name = "wtp_model")
-} else if (script_options$beliefs) {
-  model_param <- stan_model(file = file.path("stan_models", "secobeliefs.stan"), model_name = "beliefs")
-} else {
-  model_param <- stan_model(file = file.path("stan_models", "takeup_model_5_param.stan"), model_name = "model_5_param")
-  
-  cat(str_interp("Running model with ${param_stan_data$model_levels} levels.\n"))
-}
-
-model_fit <- param_stan_data %>% 
-  sampling(model_param, 
-           data = ., 
-           chains = num_chains,
-           iter = as.integer(script_options$num_iterations),
-           control = lst(max_treedepth = as.integer(script_options$max_treedepth), 
-                         adapt_delta = as.numeric(script_options$adapt_delta)), 
-           init = gen_initializer(., script_options),
-           include = FALSE,
-           pars = exclude_pars,
-           sample_file = file.path(script_options$output_dir, "stanfit", str_interp("model_${fit_version}.csv")))
-
-write_rds(model_fit, file.path(script_options$output_dir, "stanfit", str_interp("model_${fit_version}.rds")))
-
-cat(str_interp("Sampling complete for ${fit_version}.\n"))