Skip to content

Latest commit

 

History

History
1170 lines (1119 loc) · 43 KB

File metadata and controls

1170 lines (1119 loc) · 43 KB
title LRD paper Appendix D, Tables from simulations presented in lrd paper
author Adam C Sales & Ben B Hansen
date 29 April, 2022
output html_document

General dependencies.

library('knitr')
library('kableExtra')
source('R/simCIhet.r')
source('R/functions.r')
source('R/ddsandwich.R')
source('R/displaySim.r')

Initialization. Note that nreps=0 corresponds to no simulations, just print results from previously saved simulations. In order to re-run the simulations, the nreps variable should have been set to a positive integer before initiating this script.

##Level/Power Simulation The following code (optionally, if nreps>0) runs the simulation reported in Table 3 of "Limitless Regression Discontinuity"

To run the simulations in parallel, using the parallel package in R, register a cluster, called cl with the desired number of nodes, with code similar to the following:

library(parallel)
cl <- makeCluster(5)
if (!exists('nreps') ) nreps <- 0
nreps
## [1] 5000
if (nreps) {
library('robustbase')
library('rdd')
library('RItools')
library('sandwich')
library('nnet')

clust <- FALSE
if(require('parallel')) if(exists('cl')) if(inherits(cl,"cluster")) clust <- TRUE

if(clust){
  clusterEvalQ(cl,{
             library('robustbase')
             library('rdd')
             library('RItools')
             library('sandwich')
             library('nnet')
             source('R/functions.r')
             source('R/simCIhet.r')
	     source('R/ddsandwich.R')
             })
} else cl <- NULL

set.seed(201609)
st <- system.time(outcomeSim <- totalOutcomeSim(nreps,cl))
save(outcomeSim, file=paste0('./outcomeSim',Sys.Date(),'.RData'))
cat(paste0(date(), ', nreps=', nreps, '\n'),
    paste(c(names(st),'\n', collapse=T)),
    st,
    file='fullOutcomeSim-runtime.txt', append=TRUE)
} else{ ### load the most recent simulation
  sims <- sort(grep('outcomeSim',list.files('.'),value=TRUE),decreasing=TRUE)

  load(paste0('./',sims[1]))
}

This code creates Table 3:

capture.output({
  displayCIsimHet(outcomeSim,tau=0,
    caption=paste('Empirical bias and 95\\% confidence interval coverage (\\%) and width for the analyses of',prettyNum(ncol(outcomeSim[[1]]),big.mark=','),'simulated datasets using either permutation tests, limitless or local OLS methods. The average treatment effect was zero in all conditions; in six conditions the effect was uniquely zero, and in three it was distributed as $t_3$.'),
    label='tab:level')
  },file="./tab-levelSimulation.tex")

Here are the results, for all cases run:

allRes <- dispAllSimp(outcomeSim)
rownames(allRes) <- NULL
save(allRes,file=paste0('./levelSimResults',Sys.Date(),'.RData'))

###Full Results: Bias

kable(subset(allRes,meas=='Bias',select=-meas),caption=paste('Empirical bias for the analyses of ',ncol(outcomeSim[[1]]),'simulated datasets using either permutation tests, limitless or local OLS methods. Results for all conditions'),digits=2,format='html')%>%
  kable_styling(full_width=FALSE)%>%
#  group_rows("n=50",1,24)%>%group_rows("n=250",25,48)%>%group_rows("n=2500",49,72)%>%
  collapse_rows(columns = 1:4)#, valign = "center")
Empirical bias for the analyses of 5000 simulated datasets using either permutation tests, limitless or local OLS methods. Results for all conditions
n error ATE TE randomness Limitless Local-OLS Permutation
1 50 t 0 none 0.00 0.00 0.37
4 50 t 0.2 none 0.00 0.01 0.37
7 50 t 0 t 0.01 0.01 0.37
10 50 t 0.2 t 0.00 0.00 0.36
13 50 norm 0 none 0.01 0.01 0.37
16 50 norm 0.2 none -0.01 0.00 0.37
19 50 norm 0 norm 0.01 0.01 0.37
22 50 norm 0.2 norm 0.00 0.00 0.37
25 250 t 0 none 0.00 0.00 0.37
28 250 t 0.2 none 0.00 0.00 0.37
31 250 t 0 t 0.00 0.00 0.37
34 250 t 0.2 t 0.00 0.00 0.37
37 250 norm 0 none 0.00 0.00 0.37
40 250 norm 0.2 none 0.00 0.00 0.37
43 250 norm 0 norm 0.00 0.00 0.38
46 250 norm 0.2 norm 0.00 0.00 0.37
49 2500 t 0 none 0.00 0.00 0.37
52 2500 t 0.2 none 0.00 0.00 0.37
55 2500 t 0 t 0.00 0.00 0.38
58 2500 t 0.2 t 0.00 0.00 0.38
61 2500 norm 0 none 0.00 0.00 0.38
64 2500 norm 0.2 none 0.00 0.00 0.37
67 2500 norm 0 norm 0.00 0.00 0.38
70 2500 norm 0.2 norm 0.00 0.00 0.38

###95% CI Coverage

kable(subset(allRes,meas=='Coverage',select=-meas),caption=paste('Empirical 95% confidence interval coverage for the analyses of ',ncol(outcomeSim[[1]]),'simulated datasets using either permutation tests, limitless or local OLS methods. Results for all conditions'),digits=2,format='html')%>%
  kable_styling(full_width=FALSE)%>%
#  group_rows("n=50",1,24)%>%group_rows("n=250",25,48)%>%group_rows("n=2500",49,72)%>%
  collapse_rows(columns = 1:4)#, valign = "center")
Empirical 95% confidence interval coverage for the analyses of 5000 simulated datasets using either permutation tests, limitless or local OLS methods. Results for all conditions
n error ATE TE randomness Limitless Local-OLS Permutation
2 50 t 0 none 0.93 0.94 0.50
5 50 t 0.2 none 0.94 0.94 0.49
8 50 t 0 t 0.92 0.93 0.65
11 50 t 0.2 t 0.94 0.94 0.66
14 50 norm 0 none 0.93 0.93 0.63
17 50 norm 0.2 none 0.93 0.93 0.63
20 50 norm 0 norm 0.93 0.93 0.73
23 50 norm 0.2 norm 0.93 0.93 0.75
26 250 t 0 none 0.95 0.95 0.00
29 250 t 0.2 none 0.95 0.95 0.00
32 250 t 0 t 0.95 0.95 0.03
35 250 t 0.2 t 0.95 0.95 0.04
38 250 norm 0 none 0.94 0.94 0.03
41 250 norm 0.2 none 0.94 0.94 0.03
44 250 norm 0 norm 0.95 0.95 0.12
47 250 norm 0.2 norm 0.95 0.95 0.13
50 2500 t 0 none 0.95 0.95 0.00
53 2500 t 0.2 none 0.95 0.95 0.00
56 2500 t 0 t 0.95 0.95 0.00
59 2500 t 0.2 t 0.95 0.95 0.00
62 2500 norm 0 none 0.95 0.95 0.00
65 2500 norm 0.2 none 0.95 0.95 0.00
68 2500 norm 0 norm 0.95 0.95 0.00
71 2500 norm 0.2 norm 0.94 0.95 0.00

###95% CI Width

kable(subset(allRes,meas=='Width',select=-meas),caption=paste('Empirical 95% confidence interval width for the analyses of ',ncol(outcomeSim[[1]]),'simulated datasets using either permutation tests, limitless or local OLS methods. Results for all conditions'),digits=2,format='html')%>%
  kable_styling(full_width=FALSE)%>%
#  group_rows("n=50",1,24)%>%group_rows("n=250",25,48)%>%group_rows("n=2500",49,72)%>%
  collapse_rows(columns = 1:4)#, valign = "center")
Empirical 95% confidence interval width for the analyses of 5000 simulated datasets using either permutation tests, limitless or local OLS methods. Results for all conditions
n error ATE TE randomness Limitless Local-OLS Permutation
3 50 t 0 none 1.42 1.67 0.74
6 50 t 0.2 none 1.41 1.67 0.74
9 50 t 0 t 1.82 2.04 0.95
12 50 t 0.2 t 1.80 2.04 0.95
15 50 norm 0 none 1.74 1.68 0.90
18 50 norm 0.2 none 1.75 1.69 0.91
21 50 norm 0 norm 2.12 2.06 1.10
24 50 norm 0.2 norm 2.11 2.05 1.10
27 250 t 0 none 0.57 0.74 0.29
30 250 t 0.2 none 0.57 0.74 0.29
33 250 t 0 t 0.73 0.91 0.38
36 250 t 0.2 t 0.73 0.91 0.38
39 250 norm 0 none 0.76 0.74 0.39
42 250 norm 0.2 none 0.77 0.75 0.39
45 250 norm 0 norm 0.93 0.91 0.47
48 250 norm 0.2 norm 0.93 0.91 0.47
51 2500 t 0 none 0.17 0.23 0.09
54 2500 t 0.2 none 0.17 0.23 0.09
57 2500 t 0 t 0.22 0.29 0.11
60 2500 t 0.2 t 0.22 0.29 0.11
63 2500 norm 0 none 0.24 0.24 0.12
66 2500 norm 0.2 none 0.24 0.24 0.12
69 2500 norm 0 norm 0.29 0.29 0.15
72 2500 norm 0.2 norm 0.29 0.29 0.15

##Polynomial Simulation

if (!exists('nreps') ) nreps <- 0
nreps
## [1] 5000
if (nreps) {


set.seed(201609)
st2 <- system.time(totalPoly <- totalPolySim(nreps,cl))
save(totalPoly,file=paste0("./totalPolySim",Sys.Date(),".RData"))
cat(paste0(date(), ', nreps=', nreps, '\n'),
    paste(c(names(st),'\n', collapse=T)),
    st,
    file='totalPolySim-runtime.txt', append=TRUE)
} else{
  psims <- sort(grep('totalPolySim',list.files('.'),value=TRUE),decreasing=TRUE)

  load(paste0('./',psims[1]))
}

The following gives the results in Table 4 of the paper, in addition to the break-down of RMSE into bias and variance, and analogous results for normally-distributed errors.

tab.paper <- prntTab(totalPoly,5,full=FALSE,md=FALSE)
capture.output(
polyLatex5(tab.paper,full=FALSE,caption=paste0('Results from ',prettyNum(ncol(totalPoly[[1]]),big.mark=','),' simulations of polynomial specifications for RDD analysis, using limitless, OLS, or local linear regression. Data-generating models (DGM) were as depicted in Figure~\\ref{fig:dgms}, with $t_{3}$ errors; sample size for all runs was 500; there was no treatment effect.'),label='tab:poly'),
    file="./tab-polynomialSimulation.tex")

tab <- prntTab(totalPoly,5,full=TRUE,md=FALSE)
#rownames(tab) <- rep(c('level','RMSE','bias','sd'),6)
colnames(tab) <- gsub('(sh|ik)\\.est.','deg=',colnames(tab))#c(rep(paste0('deg=',1:4),2),'')
colnames(tab)[ncol(tab)] <- 'n/a'
kable(tab,format='html',caption='Full results for polynomial simulation',digits=2)%>%
    kable_styling()%>% column_spec( 6,border_right=TRUE)%>%column_spec(11,border_right=TRUE)%>%
        add_header_above(c(" " = 1, "Limitless" = 5, "OLS" = 5, "Loc. Lin." = 1))%>%
            #group_rows("$t_3$ Error",1,12)%>%group_rows("N(0,1) Error",13,24)%>%
            group_rows("linear",1,3)%>%group_rows('antiSym',4,6)%>%group_rows('sine',7,9)#%>%
Full results for polynomial simulation
Limitless
OLS
Loc. Lin.
deg=1 deg=2 deg=3 deg=4 deg=5 deg=1 deg=2 deg=3 deg=4 deg=5 n/a
linear
bias 0.01 0.01 0.01 0.01 0.01 0.00 0.01 0.00 0.01 0.01 0.01
RMSE 0.23 0.23 0.30 0.31 0.37 0.31 0.47 0.62 0.79 0.97 0.48
level 0.05 0.05 0.05 0.06 0.06 0.05 0.05 0.05 0.05 0.05 0.07
antiSym
bias -0.63 -0.63 -0.02 -0.02 0.13 -0.63 0.16 0.15 -0.09 -0.09 0.00
RMSE 0.67 0.67 0.31 0.31 0.39 0.70 0.51 0.65 0.81 0.98 0.51
level 0.79 0.79 0.05 0.06 0.06 0.54 0.07 0.06 0.05 0.05 0.07
sine
bias 1.16 1.16 0.17 0.17 0.01 1.16 -0.11 -0.09 -0.01 -0.01 0.06
RMSE 1.19 1.19 0.35 0.35 0.36 1.20 0.47 0.63 0.78 0.95 0.53
level 1.00 1.00 0.10 0.10 0.05 0.96 0.06 0.05 0.05 0.05 0.08
                #group_rows("linear",13,16)%>%group_rows('antiSym',17,20)%>%group_rows('oneSide',21,24)

Session information

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.3 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] nnet_7.3-16       RItools_0.1-18    SparseM_1.81      rdd_0.57         
##  [5] Formula_1.2-4     AER_1.2-9         survival_3.2-13   car_3.0-12       
##  [9] carData_3.0-5     lmtest_0.9-40     zoo_1.8-9         sandwich_3.0-1   
## [13] robustbase_0.95-0 kableExtra_1.3.4  knitr_1.33       
## 
## loaded via a namespace (and not attached):
##  [1] xfun_0.24         splines_4.1.2     lattice_0.20-45   colorspace_2.0-2 
##  [5] vctrs_0.3.8       htmltools_0.5.1.1 viridisLite_0.4.0 utf8_1.2.1       
##  [9] rlang_0.4.11      pillar_1.6.1      glue_1.4.2        lifecycle_1.0.0  
## [13] stringr_1.4.0     munsell_0.5.0     rvest_1.0.0       svd_0.5.1        
## [17] evaluate_0.14     fansi_0.5.0       DEoptimR_1.0-11   highr_0.9        
## [21] xtable_1.8-4      scales_1.1.1      webshot_0.5.3     abind_1.4-5      
## [25] systemfonts_1.0.4 digest_0.6.27     stringi_1.6.2     grid_4.1.2       
## [29] tools_4.1.2       magrittr_2.0.1    tibble_3.1.2      pkgconfig_2.0.3  
## [33] crayon_1.4.1      ellipsis_0.3.2    Matrix_1.3-4      xml2_1.3.2       
## [37] rmarkdown_2.9     svglite_2.1.0     httr_1.4.2        rstudioapi_0.13  
## [41] R6_2.5.0          compiler_4.1.2