From 729939c737aa59c1b112de6cedf054bb2f330798 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Fri, 8 Aug 2025 19:38:34 -0400 Subject: [PATCH 01/67] #42 bd infrastructure to support both --- DESCRIPTION | 2 ++ NAMESPACE | 2 ++ R/AddSuptBand.R | 12 ++++++--- R/EventStudy.R | 14 +++++++---- R/EventStudyPlot.R | 3 +-- tests/testthat/test-AddCIs.R | 8 +++--- tests/testthat/test-EventStudy.R | 30 +++++++++++++++-------- tests/testthat/test-EventStudyOLS.R | 24 ++++++++++++------ tests/testthat/test-EventStudyPlot.R | 24 ++++++++++++------ tests/testthat/test-PreparePlottingData.R | 16 ++++++------ tests/testthat/test-TestLinear.R | 22 ++++++++++------- 11 files changed, 99 insertions(+), 58 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 32b7fea2..6b7a65f4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -43,8 +43,10 @@ Imports: data.table, dplyr, estimatr, + fixest, ggplot2, MASS, + broom, rlang, pracma, stats, diff --git a/NAMESPACE b/NAMESPACE index 7867b6ac..95b1a163 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,10 +5,12 @@ export(EventStudyPlot) export(TestLinear) import(dplyr) import(estimatr) +import(fixest) import(ggplot2) import(stringr) importFrom(MASS,mvrnorm) importFrom(car,linearHypothesis) +importFrom(stats,vcov) importFrom(data.table,":=") importFrom(data.table,.SD) importFrom(data.table,CJ) diff --git a/R/AddSuptBand.R b/R/AddSuptBand.R index ccca421b..490acbd1 100644 --- a/R/AddSuptBand.R +++ b/R/AddSuptBand.R @@ -16,6 +16,7 @@ #' @return A data.frame that contains the upper and lower sup-t band values #' for each event-study coefficient. #' @import estimatr +#' @importFrom stats vcov #' @importFrom MASS mvrnorm #' @keywords internal #' @noRd @@ -49,14 +50,17 @@ AddSuptBand <- function(estimates, num_sim = 1000, conf_level = .95, eventstudy_coefficients) { - if (! class(estimates) %in% c("lm_robust", "iv_robust")) { - stop("estimates is not a data frame with coefficient estimates and standard errors") + if (! class(estimates) %in% c("lm_robust", "iv_robust", "fixest")) { + stop("`estimates` is not a supported model object.") } if (! is.numeric(num_sim) | num_sim %% 1 != 0 | num_sim <= 0) {stop("num_sim should be a natural number.")} if (! is.numeric(conf_level) | conf_level < 0 | conf_level > 1) {stop("conf_level should be a real number between 0 and 1, inclusive.")} if (! is.character(eventstudy_coefficients)) {stop("eventstudy_coefficients should be a character.")} - vcov_matrix_all <- estimates$vcov + vcov_matrix_all <- stats::vcov(estimates) + if (is.null(vcov_matrix_all)) { + stop("Model object does not provide a variance-covariance matrix.") + } v_terms_to_keep <- colnames(vcov_matrix_all) %in% eventstudy_coefficients vcov_matrix <- vcov_matrix_all[v_terms_to_keep, v_terms_to_keep] @@ -75,7 +79,7 @@ AddSuptBand <- function(estimates, num_sim = 1000, conf_level = .95, eventstudy_ critical_value = t[floor(conf_level_num_sim) + 1] } - df_estimates_tidy <- estimatr::tidy(estimates) + df_estimates_tidy <- broom::tidy(estimates) df_estimates_tidy["suptband_lower"] <- df_estimates_tidy$estimate - (critical_value * df_estimates_tidy$std.error) df_estimates_tidy["suptband_upper"] <- df_estimates_tidy$estimate + (critical_value * df_estimates_tidy$std.error) diff --git a/R/EventStudy.R b/R/EventStudy.R index 9d26f5b4..06d3df57 100644 --- a/R/EventStudy.R +++ b/R/EventStudy.R @@ -35,9 +35,10 @@ #' @param allow_duplicate_id If TRUE, the function estimates a regression where duplicated ID-time rows are weighted by their duplication count. If FALSE, the function raises an error if duplicate unit-time keys exist in the input data. Default is FALSE. #' @param avoid_internal_copy If TRUE, the function avoids making an internal deep copy of the input data, and instead directly modifies the input data.table. Default is FALSE. #' -#' @return A list that contains, under "output", the estimation output as an lm_robust object, and under "arguments", the arguments passed to the function. +#' @return A list that contains, under "output", the estimation output as a model object (from either `fixest` or `estimatr` depending on `kernel`), and under "arguments", the arguments passed to the function. #' @import dplyr #' @import estimatr +#' @import fixest #' @importFrom stats reformulate #' @importFrom data.table setorderv as.data.table is.data.table .SD copy #' @export @@ -145,10 +146,12 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, controls = NULL, proxy = NULL, proxyIV = NULL, FE = TRUE, TFE = TRUE, post, overidpost = 1, pre, overidpre = post + pre, normalize = -1 * (pre + 1), cluster = TRUE, anticipation_effects_normalization = TRUE, - allow_duplicate_id = FALSE, avoid_internal_copy = FALSE) { + allow_duplicate_id = FALSE, avoid_internal_copy = FALSE, + kernel = "fixest") { # Check for errors in arguments if (! estimator %in% c("OLS", "FHS")) {stop("estimator should be either 'OLS' or 'FHS'.")} + if (! kernel %in% c("fixest", "estimatr")) {stop("kernel should be either 'fixest' or 'estimatr'.")} if (! is.data.frame(data)) {stop("data should be a data frame.")} for (var in c(idvar, timevar, outcomevar, policyvar)) { if ((! is.character(var))) { @@ -339,7 +342,7 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c event_study_formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars, static, controls, proxy, proxyIV) - output <- EventStudyOLS(event_study_formula, data, idvar, timevar, FE, TFE, cluster) + output <- EventStudyOLS(event_study_formula, data, idvar, timevar, FE, TFE, cluster, kernel) coefficients <- str_policy_vars } if (estimator == "FHS") { @@ -363,7 +366,7 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c event_study_formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars, static, controls, proxy, proxyIV) - output <- EventStudyFHS(event_study_formula, data, idvar, timevar, FE, TFE, cluster) + output <- EventStudyFHS(event_study_formula, data, idvar, timevar, FE, TFE, cluster, kernel) coefficients <- dplyr::setdiff(str_policy_vars, proxyIV) } @@ -385,7 +388,8 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c "normalize" = normalize, "normalization_column" = normalization_column, "cluster" = cluster, - "eventstudy_coefficients" = coefficients) + "eventstudy_coefficients" = coefficients, + "kernel" = kernel) return(list("output" = output, "arguments" = event_study_args)) diff --git a/R/EventStudyPlot.R b/R/EventStudyPlot.R index cb6f57e6..607b6e2a 100644 --- a/R/EventStudyPlot.R +++ b/R/EventStudyPlot.R @@ -122,7 +122,7 @@ EventStudyPlot <- function(estimates, # Estimation Elements ----------------------------------------------------- df_estimates <- estimates$output - df_estimates_tidy <- estimatr::tidy(estimates$output) + df_estimates_tidy <- broom::tidy(estimates$output) static_model <- nrow(df_estimates_tidy) == 1 if (static_model) { @@ -153,7 +153,6 @@ EventStudyPlot <- function(estimates, plot_CI <- if(!is.null(conf_level)) TRUE else FALSE if (plot_CI) { - df_estimates_tidy <- AddCIs(df_estimates_tidy, eventstudy_coefficients, conf_level) } diff --git a/tests/testthat/test-AddCIs.R b/tests/testthat/test-AddCIs.R index 52ddcbdd..dca1f971 100644 --- a/tests/testthat/test-AddCIs.R +++ b/tests/testthat/test-AddCIs.R @@ -33,7 +33,7 @@ test_that("correctly recognizes missing columns in estimates argument", { controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) - df_test <- estimatr::tidy(estimates$output) + df_test <- broom::tidy(estimates$output) eventstudy_coefficients <- estimates$arguments$eventstudy_coefficients @@ -52,7 +52,7 @@ test_that("correctly recognizes wrong inputs for conf_level argument", { controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) - df_test <- estimatr::tidy(estimates$output) + df_test <- broom::tidy(estimates$output) eventstudy_coefficients <- estimates$arguments$eventstudy_coefficients @@ -80,7 +80,7 @@ test_that("correctly recognizes missing columns in estimates argument", { timevar = "t", controls = "x_r", proxy = "eta_m", FE = TRUE, TFE = TRUE, post = 1, overidpost = 2, pre = 1, overidpre = 2, normalize = -1, cluster = TRUE, anticipation_effects_normalization = FALSE) - df_test <- estimatr::tidy(estimates$output) + df_test <- broom::tidy(estimates$output) eventstudy_coefficients <- estimates$arguments$eventstudy_coefficients @@ -99,7 +99,7 @@ test_that("correctly recognizes wrong inputs for conf_level argument", { timevar = "t", controls = "x_r", proxy = "eta_m", FE = TRUE, TFE = TRUE, post = 1, overidpost = 2, pre = 1, overidpre = 2, normalize = -1, cluster = TRUE, anticipation_effects_normalization = FALSE) - df_test <- estimatr::tidy(estimates$output) + df_test <- broom::tidy(estimates$output) eventstudy_coefficients <- estimates$arguments$eventstudy_coefficients diff --git a/tests/testthat/test-EventStudy.R b/tests/testthat/test-EventStudy.R index 5c094dd3..7f82f4df 100644 --- a/tests/testthat/test-EventStudy.R +++ b/tests/testthat/test-EventStudy.R @@ -13,7 +13,8 @@ test_that("does not modify input data (even if input is data.table) when avoid_i controls = "x_r", FE = TRUE, TFE = TRUE, post = 2, pre = 3, overidpre = 4, overidpost = 11, normalize = - 1, - cluster = TRUE, anticipation_effects_normalization = TRUE) + cluster = TRUE, anticipation_effects_normalization = TRUE, + kernel = "estimatr") ) expect_true(isTRUE(all.equal(example_dt, example_dt_copy, check.attributes = FALSE))) @@ -33,7 +34,8 @@ test_that("input dt IS modified in-place when avoid_internal_copy = TRUE", { post = 2, pre = 3, overidpre = 4, overidpost = 11, normalize = - 1, cluster = TRUE, anticipation_effects_normalization = TRUE, - avoid_internal_copy = TRUE) + avoid_internal_copy = TRUE, + kernel = "estimatr") ) address_after <- rlang::obj_address(example_dt) @@ -54,7 +56,8 @@ test_that("correctly creates highest order shiftvalues", { EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, - post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, normalize = - 1, cluster = TRUE, anticipation_effects_normalization = TRUE) + post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, normalize = - 1, cluster = TRUE, anticipation_effects_normalization = TRUE, + kernel = "estimatr") ) shiftvalues <- outputs$output$term @@ -111,7 +114,8 @@ test_that("removes the correct column when normalize < 0", { policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, - normalize = normalize, cluster = TRUE, anticipation_effects_normalization = TRUE) + normalize = normalize, cluster = TRUE, anticipation_effects_normalization = TRUE, + kernel = "estimatr") shiftvalues <- outputs$output$term @@ -135,7 +139,8 @@ test_that("removes the correct column when normalize = 0", { policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, - normalize = normalize, cluster = TRUE, anticipation_effects_normalization = TRUE) + normalize = normalize, cluster = TRUE, anticipation_effects_normalization = TRUE, + kernel = "estimatr") shiftvalues <- outputs$output$term @@ -157,7 +162,8 @@ test_that("does not create a first differenced variable when post, overidpost, p policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, - normalize = normalize, cluster = TRUE, anticipation_effects_normalization = TRUE) + normalize = normalize, cluster = TRUE, anticipation_effects_normalization = TRUE, + kernel = "estimatr") shiftvalues <- outputs$output$term @@ -176,7 +182,8 @@ test_that("tests that package and STATA output agree when post, overidpost, pre, policyvar = "z", idvar = "id", timevar = "t", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, - normalize = normalize, cluster = TRUE, anticipation_effects_normalization = TRUE) + normalize = normalize, cluster = TRUE, anticipation_effects_normalization = TRUE, + kernel = "estimatr") coef_package <- outputs$output$coefficients[[1]] std_package <- outputs$output$std.error[[1]] @@ -202,7 +209,8 @@ test_that("does not create shiftvalues of differenced variable when post + overi policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, - normalize = normalize, cluster = TRUE, anticipation_effects_normalization = TRUE) + normalize = normalize, cluster = TRUE, anticipation_effects_normalization = TRUE, + kernel = "estimatr") shiftvalues <- outputs$output$term @@ -223,7 +231,8 @@ test_that("does not create leads of differenced variable when pre + overidpre < policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, - normalize = normalize, cluster = TRUE, anticipation_effects_normalization = TRUE) + normalize = normalize, cluster = TRUE, anticipation_effects_normalization = TRUE, + kernel = "estimatr") shiftvalues <- outputs$output$term @@ -244,7 +253,8 @@ test_that("removes the correct column when normalize > 0", { policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, - normalize = normalize, cluster = TRUE, anticipation_effects_normalization = TRUE) + normalize = normalize, cluster = TRUE, anticipation_effects_normalization = TRUE, + kernel = "estimatr") shiftvalues <- outputs$output$term diff --git a/tests/testthat/test-EventStudyOLS.R b/tests/testthat/test-EventStudyOLS.R index 265496fd..0cb41544 100644 --- a/tests/testthat/test-EventStudyOLS.R +++ b/tests/testthat/test-EventStudyOLS.R @@ -29,7 +29,8 @@ test_that("FE = TRUE, timevar = timevar, FE = FE, TFE = TFE, - cluster = cluster + cluster = cluster, + kernel = "estimatr" ) } @@ -69,7 +70,8 @@ test_that("FE = FALSE, timevar = timevar, FE = FE, TFE = TFE, - cluster = cluster + cluster = cluster, + kernel = "estimatr" ) } @@ -109,7 +111,8 @@ test_that("FE = TRUE, timevar = timevar, FE = FE, TFE = TFE, - cluster = cluster + cluster = cluster, + kernel = "estimatr" ) } @@ -150,7 +153,8 @@ test_that("FE = FALSE, timevar = timevar, FE = FE, TFE = TFE, - cluster = cluster + cluster = cluster, + kernel = "estimatr" ) } @@ -191,7 +195,8 @@ test_that("FE = TRUE, timevar = timevar, FE = FE, TFE = TFE, - cluster = cluster + cluster = cluster, + kernel = "estimatr" ) } @@ -231,7 +236,8 @@ test_that("FE = FALSE, timevar = timevar, FE = FE, TFE = TFE, - cluster = cluster + cluster = cluster, + kernel = "estimatr" ) } @@ -271,7 +277,8 @@ test_that("FE = TRUE, timevar = timevar, FE = FE, TFE = TFE, - cluster = cluster + cluster = cluster, + kernel = "estimatr" ) } @@ -350,7 +357,8 @@ test_that("Coefficients and Standard Errors agree with base STATA", { timevar = timevar, FE = FE, TFE = TFE, - cluster = cluster + cluster = cluster, + kernel = "estimatr" ) df_test_STATA <- read.csv("./input/df_test_base_STATA.csv", col.names = c("term", "coef", "std_error")) diff --git a/tests/testthat/test-EventStudyPlot.R b/tests/testthat/test-EventStudyPlot.R index 0850032b..0bdaf3d8 100644 --- a/tests/testthat/test-EventStudyPlot.R +++ b/tests/testthat/test-EventStudyPlot.R @@ -8,7 +8,8 @@ test_that("Dimension of OLS and FHS estimation output is the same", { estimates_ols <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_smooth_m", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", - post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) + post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, + kernel = "estimatr") estimates_fhs <- EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_smooth_m", policyvar = "z", idvar = "id", timevar = "t", proxy = "eta_r", controls = "x_r", @@ -35,7 +36,8 @@ test_that("correctly changes x-axis and y-axis labels", { estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", - post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) + post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, + kernel = "estimatr") p_labels <- EventStudyPlot(estimates = estimates, conf_level = .95, @@ -52,7 +54,8 @@ test_that("x- and y-axis breaks and limits are correct", { estimates = EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", - post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) + post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, + kernel = "estimatr") p_Addmean <- EventStudyPlot(estimates = estimates, ybreaks = c(-1.5, -.5, 0, .5, 1.5), @@ -103,7 +106,8 @@ test_that("sup-t bands are appropriately present or absent", { estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", - controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) + controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, + kernel = "estimatr") p_supt <- get_labs(EventStudyPlot(estimates = estimates, supt = .95)) @@ -122,7 +126,8 @@ test_that("confidence intervals are appropriately present or absent", { estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", - controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) + controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, + kernel = "estimatr") p_ci <- get_labs(EventStudyPlot(estimates = estimates, conf_level = .95, supt = NULL)) @@ -140,7 +145,8 @@ test_that("Preevent Coeffs and Postevent Coeffs are appropriately present or abs estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", - post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) + post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, + kernel = "estimatr") p_pre_post_caption <- get_labs(EventStudyPlot(estimates = estimates, ybreaks = c(-1.5, -.5, 0, .5, 1.5), @@ -199,7 +205,8 @@ test_that("Sup-t bands are wider than confidence intervals", { estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", - post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) + post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, + kernel = "estimatr") p <- EventStudyPlot(estimates = estimates, conf_level = .95, @@ -231,7 +238,8 @@ test_that("computed smoothest path for examples is within expectations", { estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_smooth_m", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", - post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = -3) + post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = -3, + kernel = "estimatr") p <- EventStudyPlot(estimates = estimates, smpath = T) diff --git a/tests/testthat/test-PreparePlottingData.R b/tests/testthat/test-PreparePlottingData.R index 4cc0fec0..99168d3c 100644 --- a/tests/testthat/test-PreparePlottingData.R +++ b/tests/testthat/test-PreparePlottingData.R @@ -9,7 +9,7 @@ test_that("labels are unique", { controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) - df_tidy_estimates <- estimatr::tidy(list_EventStudy$output) + df_tidy_estimates <- broom::tidy(list_EventStudy$output) policyvar <- list_EventStudy[[2]]$policyvar post <- list_EventStudy[[2]]$post @@ -37,7 +37,7 @@ test_that("the correct labels are created", { controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) - df_tidy_estimates <- estimatr::tidy(list_EventStudy$output) + df_tidy_estimates <- broom::tidy(list_EventStudy$output) policyvar <- list_EventStudy[[2]]$policyvar post <- list_EventStudy[[2]]$post @@ -64,7 +64,7 @@ test_that("the labels are ordered correctly", { controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) - df_tidy_estimates <- estimatr::tidy(list_EventStudy$output) + df_tidy_estimates <- broom::tidy(list_EventStudy$output) policyvar <- list_EventStudy[[2]]$policyvar post <- list_EventStudy[[2]]$post @@ -91,7 +91,7 @@ test_that("the control variable is removed", { controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) - df_tidy_estimates <- estimatr::tidy(list_EventStudy$output) + df_tidy_estimates <- broom::tidy(list_EventStudy$output) policyvar <- list_EventStudy[[2]]$policyvar post <- list_EventStudy[[2]]$post @@ -118,7 +118,7 @@ test_that("the largest lag label is correctly created", { controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) - df_tidy_estimates <- estimatr::tidy(list_EventStudy$output) + df_tidy_estimates <- broom::tidy(list_EventStudy$output) policyvar <- list_EventStudy[[2]]$policyvar post <- list_EventStudy[[2]]$post @@ -147,7 +147,7 @@ test_that("the largest lead label is correctly created", { controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) - df_tidy_estimates <- estimatr::tidy(list_EventStudy$output) + df_tidy_estimates <- broom::tidy(list_EventStudy$output) policyvar <- list_EventStudy[[2]]$policyvar post <- list_EventStudy[[2]]$post @@ -176,7 +176,7 @@ test_that("all columns besides 'term' and 'label' are 0 for the normalization co controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) - df_tidy_estimates <- estimatr::tidy(list_EventStudy$output) + df_tidy_estimates <- broom::tidy(list_EventStudy$output) policyvar <- list_EventStudy[[2]]$policyvar post <- list_EventStudy[[2]]$post @@ -211,7 +211,7 @@ test_that("all columns besides 'term' and 'label' are 0 for the proxyIV column", controls = "x_r", FE = TRUE, TFE = TRUE, proxy = "eta_m", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) - df_tidy_estimates <- estimatr::tidy(list_EventStudy$output) + df_tidy_estimates <- broom::tidy(list_EventStudy$output) policyvar <- list_EventStudy[[2]]$policyvar post <- list_EventStudy[[2]]$post diff --git a/tests/testthat/test-TestLinear.R b/tests/testthat/test-TestLinear.R index d78b5981..0e7afb48 100644 --- a/tests/testthat/test-TestLinear.R +++ b/tests/testthat/test-TestLinear.R @@ -2,9 +2,10 @@ test_that("correctly recognizes wrong variable type for estimate argument", { estimate <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, - post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) + post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE, + kernel = "estimatr") - df_estimate <- estimatr::tidy(estimate[[1]]) + df_estimate <- broom::tidy(estimate[[1]]) test = "z_fd_lag1 = z_fd" @@ -16,8 +17,9 @@ test_that("correctly recognizes wrong variable type for estimate argument", { test_that("correctly recognizes wrong variable type for pretrends", { estimate <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", - controls = "x_r", FE = TRUE, TFE = TRUE, - post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) + controls = "x_r", FE = TRUE, TFE = TRUE, + post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE, + kernel = "estimatr") expect_error(TestLinear(df_estimate, pretrends = "pretrends")) expect_error(TestLinear(df_estimate, pretrends = 1)) @@ -26,8 +28,9 @@ test_that("correctly recognizes wrong variable type for pretrends", { test_that("correctly recognizes wrong variable type for leveling_off", { estimate <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", - controls = "x_r", FE = TRUE, TFE = TRUE, - post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) + controls = "x_r", FE = TRUE, TFE = TRUE, + post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE, + kernel = "estimatr") expect_error(TestLinear(df_estimate, leveling_off = "leveling_off")) expect_error(TestLinear(df_estimate, leveling_off = 1)) @@ -82,10 +85,11 @@ test_that("checks equality with STATA", { estimate <- suppressWarnings( EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", - FE = TRUE, TFE = TRUE, + FE = TRUE, TFE = TRUE, post = 2, pre = 2, overidpre = 2, - overidpost = 2, normalize = - 1, - cluster = TRUE, anticipation_effects_normalization = TRUE) + overidpost = 2, normalize = - 1, + cluster = TRUE, anticipation_effects_normalization = TRUE, + kernel = "estimatr") ) codes <- read.csv('input/pvalues.txt', header = F) From ffb1b9567a0ac0a66cd594449ce424aaf5070e27 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Thu, 14 Aug 2025 21:45:27 -0400 Subject: [PATCH 02/67] Revert "#42 bd infrastructure to support both" This reverts commit 729939c737aa59c1b112de6cedf054bb2f330798. --- DESCRIPTION | 2 -- NAMESPACE | 2 -- R/AddSuptBand.R | 12 +++------ R/EventStudy.R | 14 ++++------- R/EventStudyPlot.R | 3 ++- tests/testthat/test-AddCIs.R | 8 +++--- tests/testthat/test-EventStudy.R | 30 ++++++++--------------- tests/testthat/test-EventStudyOLS.R | 24 ++++++------------ tests/testthat/test-EventStudyPlot.R | 24 ++++++------------ tests/testthat/test-PreparePlottingData.R | 16 ++++++------ tests/testthat/test-TestLinear.R | 22 +++++++---------- 11 files changed, 58 insertions(+), 99 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6b7a65f4..32b7fea2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -43,10 +43,8 @@ Imports: data.table, dplyr, estimatr, - fixest, ggplot2, MASS, - broom, rlang, pracma, stats, diff --git a/NAMESPACE b/NAMESPACE index 95b1a163..7867b6ac 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,12 +5,10 @@ export(EventStudyPlot) export(TestLinear) import(dplyr) import(estimatr) -import(fixest) import(ggplot2) import(stringr) importFrom(MASS,mvrnorm) importFrom(car,linearHypothesis) -importFrom(stats,vcov) importFrom(data.table,":=") importFrom(data.table,.SD) importFrom(data.table,CJ) diff --git a/R/AddSuptBand.R b/R/AddSuptBand.R index 490acbd1..ccca421b 100644 --- a/R/AddSuptBand.R +++ b/R/AddSuptBand.R @@ -16,7 +16,6 @@ #' @return A data.frame that contains the upper and lower sup-t band values #' for each event-study coefficient. #' @import estimatr -#' @importFrom stats vcov #' @importFrom MASS mvrnorm #' @keywords internal #' @noRd @@ -50,17 +49,14 @@ AddSuptBand <- function(estimates, num_sim = 1000, conf_level = .95, eventstudy_coefficients) { - if (! class(estimates) %in% c("lm_robust", "iv_robust", "fixest")) { - stop("`estimates` is not a supported model object.") + if (! class(estimates) %in% c("lm_robust", "iv_robust")) { + stop("estimates is not a data frame with coefficient estimates and standard errors") } if (! is.numeric(num_sim) | num_sim %% 1 != 0 | num_sim <= 0) {stop("num_sim should be a natural number.")} if (! is.numeric(conf_level) | conf_level < 0 | conf_level > 1) {stop("conf_level should be a real number between 0 and 1, inclusive.")} if (! is.character(eventstudy_coefficients)) {stop("eventstudy_coefficients should be a character.")} - vcov_matrix_all <- stats::vcov(estimates) - if (is.null(vcov_matrix_all)) { - stop("Model object does not provide a variance-covariance matrix.") - } + vcov_matrix_all <- estimates$vcov v_terms_to_keep <- colnames(vcov_matrix_all) %in% eventstudy_coefficients vcov_matrix <- vcov_matrix_all[v_terms_to_keep, v_terms_to_keep] @@ -79,7 +75,7 @@ AddSuptBand <- function(estimates, num_sim = 1000, conf_level = .95, eventstudy_ critical_value = t[floor(conf_level_num_sim) + 1] } - df_estimates_tidy <- broom::tidy(estimates) + df_estimates_tidy <- estimatr::tidy(estimates) df_estimates_tidy["suptband_lower"] <- df_estimates_tidy$estimate - (critical_value * df_estimates_tidy$std.error) df_estimates_tidy["suptband_upper"] <- df_estimates_tidy$estimate + (critical_value * df_estimates_tidy$std.error) diff --git a/R/EventStudy.R b/R/EventStudy.R index 06d3df57..9d26f5b4 100644 --- a/R/EventStudy.R +++ b/R/EventStudy.R @@ -35,10 +35,9 @@ #' @param allow_duplicate_id If TRUE, the function estimates a regression where duplicated ID-time rows are weighted by their duplication count. If FALSE, the function raises an error if duplicate unit-time keys exist in the input data. Default is FALSE. #' @param avoid_internal_copy If TRUE, the function avoids making an internal deep copy of the input data, and instead directly modifies the input data.table. Default is FALSE. #' -#' @return A list that contains, under "output", the estimation output as a model object (from either `fixest` or `estimatr` depending on `kernel`), and under "arguments", the arguments passed to the function. +#' @return A list that contains, under "output", the estimation output as an lm_robust object, and under "arguments", the arguments passed to the function. #' @import dplyr #' @import estimatr -#' @import fixest #' @importFrom stats reformulate #' @importFrom data.table setorderv as.data.table is.data.table .SD copy #' @export @@ -146,12 +145,10 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, controls = NULL, proxy = NULL, proxyIV = NULL, FE = TRUE, TFE = TRUE, post, overidpost = 1, pre, overidpre = post + pre, normalize = -1 * (pre + 1), cluster = TRUE, anticipation_effects_normalization = TRUE, - allow_duplicate_id = FALSE, avoid_internal_copy = FALSE, - kernel = "fixest") { + allow_duplicate_id = FALSE, avoid_internal_copy = FALSE) { # Check for errors in arguments if (! estimator %in% c("OLS", "FHS")) {stop("estimator should be either 'OLS' or 'FHS'.")} - if (! kernel %in% c("fixest", "estimatr")) {stop("kernel should be either 'fixest' or 'estimatr'.")} if (! is.data.frame(data)) {stop("data should be a data frame.")} for (var in c(idvar, timevar, outcomevar, policyvar)) { if ((! is.character(var))) { @@ -342,7 +339,7 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c event_study_formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars, static, controls, proxy, proxyIV) - output <- EventStudyOLS(event_study_formula, data, idvar, timevar, FE, TFE, cluster, kernel) + output <- EventStudyOLS(event_study_formula, data, idvar, timevar, FE, TFE, cluster) coefficients <- str_policy_vars } if (estimator == "FHS") { @@ -366,7 +363,7 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c event_study_formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars, static, controls, proxy, proxyIV) - output <- EventStudyFHS(event_study_formula, data, idvar, timevar, FE, TFE, cluster, kernel) + output <- EventStudyFHS(event_study_formula, data, idvar, timevar, FE, TFE, cluster) coefficients <- dplyr::setdiff(str_policy_vars, proxyIV) } @@ -388,8 +385,7 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c "normalize" = normalize, "normalization_column" = normalization_column, "cluster" = cluster, - "eventstudy_coefficients" = coefficients, - "kernel" = kernel) + "eventstudy_coefficients" = coefficients) return(list("output" = output, "arguments" = event_study_args)) diff --git a/R/EventStudyPlot.R b/R/EventStudyPlot.R index 607b6e2a..cb6f57e6 100644 --- a/R/EventStudyPlot.R +++ b/R/EventStudyPlot.R @@ -122,7 +122,7 @@ EventStudyPlot <- function(estimates, # Estimation Elements ----------------------------------------------------- df_estimates <- estimates$output - df_estimates_tidy <- broom::tidy(estimates$output) + df_estimates_tidy <- estimatr::tidy(estimates$output) static_model <- nrow(df_estimates_tidy) == 1 if (static_model) { @@ -153,6 +153,7 @@ EventStudyPlot <- function(estimates, plot_CI <- if(!is.null(conf_level)) TRUE else FALSE if (plot_CI) { + df_estimates_tidy <- AddCIs(df_estimates_tidy, eventstudy_coefficients, conf_level) } diff --git a/tests/testthat/test-AddCIs.R b/tests/testthat/test-AddCIs.R index dca1f971..52ddcbdd 100644 --- a/tests/testthat/test-AddCIs.R +++ b/tests/testthat/test-AddCIs.R @@ -33,7 +33,7 @@ test_that("correctly recognizes missing columns in estimates argument", { controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) - df_test <- broom::tidy(estimates$output) + df_test <- estimatr::tidy(estimates$output) eventstudy_coefficients <- estimates$arguments$eventstudy_coefficients @@ -52,7 +52,7 @@ test_that("correctly recognizes wrong inputs for conf_level argument", { controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) - df_test <- broom::tidy(estimates$output) + df_test <- estimatr::tidy(estimates$output) eventstudy_coefficients <- estimates$arguments$eventstudy_coefficients @@ -80,7 +80,7 @@ test_that("correctly recognizes missing columns in estimates argument", { timevar = "t", controls = "x_r", proxy = "eta_m", FE = TRUE, TFE = TRUE, post = 1, overidpost = 2, pre = 1, overidpre = 2, normalize = -1, cluster = TRUE, anticipation_effects_normalization = FALSE) - df_test <- broom::tidy(estimates$output) + df_test <- estimatr::tidy(estimates$output) eventstudy_coefficients <- estimates$arguments$eventstudy_coefficients @@ -99,7 +99,7 @@ test_that("correctly recognizes wrong inputs for conf_level argument", { timevar = "t", controls = "x_r", proxy = "eta_m", FE = TRUE, TFE = TRUE, post = 1, overidpost = 2, pre = 1, overidpre = 2, normalize = -1, cluster = TRUE, anticipation_effects_normalization = FALSE) - df_test <- broom::tidy(estimates$output) + df_test <- estimatr::tidy(estimates$output) eventstudy_coefficients <- estimates$arguments$eventstudy_coefficients diff --git a/tests/testthat/test-EventStudy.R b/tests/testthat/test-EventStudy.R index 7f82f4df..5c094dd3 100644 --- a/tests/testthat/test-EventStudy.R +++ b/tests/testthat/test-EventStudy.R @@ -13,8 +13,7 @@ test_that("does not modify input data (even if input is data.table) when avoid_i controls = "x_r", FE = TRUE, TFE = TRUE, post = 2, pre = 3, overidpre = 4, overidpost = 11, normalize = - 1, - cluster = TRUE, anticipation_effects_normalization = TRUE, - kernel = "estimatr") + cluster = TRUE, anticipation_effects_normalization = TRUE) ) expect_true(isTRUE(all.equal(example_dt, example_dt_copy, check.attributes = FALSE))) @@ -34,8 +33,7 @@ test_that("input dt IS modified in-place when avoid_internal_copy = TRUE", { post = 2, pre = 3, overidpre = 4, overidpost = 11, normalize = - 1, cluster = TRUE, anticipation_effects_normalization = TRUE, - avoid_internal_copy = TRUE, - kernel = "estimatr") + avoid_internal_copy = TRUE) ) address_after <- rlang::obj_address(example_dt) @@ -56,8 +54,7 @@ test_that("correctly creates highest order shiftvalues", { EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, - post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, normalize = - 1, cluster = TRUE, anticipation_effects_normalization = TRUE, - kernel = "estimatr") + post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, normalize = - 1, cluster = TRUE, anticipation_effects_normalization = TRUE) ) shiftvalues <- outputs$output$term @@ -114,8 +111,7 @@ test_that("removes the correct column when normalize < 0", { policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, - normalize = normalize, cluster = TRUE, anticipation_effects_normalization = TRUE, - kernel = "estimatr") + normalize = normalize, cluster = TRUE, anticipation_effects_normalization = TRUE) shiftvalues <- outputs$output$term @@ -139,8 +135,7 @@ test_that("removes the correct column when normalize = 0", { policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, - normalize = normalize, cluster = TRUE, anticipation_effects_normalization = TRUE, - kernel = "estimatr") + normalize = normalize, cluster = TRUE, anticipation_effects_normalization = TRUE) shiftvalues <- outputs$output$term @@ -162,8 +157,7 @@ test_that("does not create a first differenced variable when post, overidpost, p policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, - normalize = normalize, cluster = TRUE, anticipation_effects_normalization = TRUE, - kernel = "estimatr") + normalize = normalize, cluster = TRUE, anticipation_effects_normalization = TRUE) shiftvalues <- outputs$output$term @@ -182,8 +176,7 @@ test_that("tests that package and STATA output agree when post, overidpost, pre, policyvar = "z", idvar = "id", timevar = "t", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, - normalize = normalize, cluster = TRUE, anticipation_effects_normalization = TRUE, - kernel = "estimatr") + normalize = normalize, cluster = TRUE, anticipation_effects_normalization = TRUE) coef_package <- outputs$output$coefficients[[1]] std_package <- outputs$output$std.error[[1]] @@ -209,8 +202,7 @@ test_that("does not create shiftvalues of differenced variable when post + overi policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, - normalize = normalize, cluster = TRUE, anticipation_effects_normalization = TRUE, - kernel = "estimatr") + normalize = normalize, cluster = TRUE, anticipation_effects_normalization = TRUE) shiftvalues <- outputs$output$term @@ -231,8 +223,7 @@ test_that("does not create leads of differenced variable when pre + overidpre < policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, - normalize = normalize, cluster = TRUE, anticipation_effects_normalization = TRUE, - kernel = "estimatr") + normalize = normalize, cluster = TRUE, anticipation_effects_normalization = TRUE) shiftvalues <- outputs$output$term @@ -253,8 +244,7 @@ test_that("removes the correct column when normalize > 0", { policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, - normalize = normalize, cluster = TRUE, anticipation_effects_normalization = TRUE, - kernel = "estimatr") + normalize = normalize, cluster = TRUE, anticipation_effects_normalization = TRUE) shiftvalues <- outputs$output$term diff --git a/tests/testthat/test-EventStudyOLS.R b/tests/testthat/test-EventStudyOLS.R index 0cb41544..265496fd 100644 --- a/tests/testthat/test-EventStudyOLS.R +++ b/tests/testthat/test-EventStudyOLS.R @@ -29,8 +29,7 @@ test_that("FE = TRUE, timevar = timevar, FE = FE, TFE = TFE, - cluster = cluster, - kernel = "estimatr" + cluster = cluster ) } @@ -70,8 +69,7 @@ test_that("FE = FALSE, timevar = timevar, FE = FE, TFE = TFE, - cluster = cluster, - kernel = "estimatr" + cluster = cluster ) } @@ -111,8 +109,7 @@ test_that("FE = TRUE, timevar = timevar, FE = FE, TFE = TFE, - cluster = cluster, - kernel = "estimatr" + cluster = cluster ) } @@ -153,8 +150,7 @@ test_that("FE = FALSE, timevar = timevar, FE = FE, TFE = TFE, - cluster = cluster, - kernel = "estimatr" + cluster = cluster ) } @@ -195,8 +191,7 @@ test_that("FE = TRUE, timevar = timevar, FE = FE, TFE = TFE, - cluster = cluster, - kernel = "estimatr" + cluster = cluster ) } @@ -236,8 +231,7 @@ test_that("FE = FALSE, timevar = timevar, FE = FE, TFE = TFE, - cluster = cluster, - kernel = "estimatr" + cluster = cluster ) } @@ -277,8 +271,7 @@ test_that("FE = TRUE, timevar = timevar, FE = FE, TFE = TFE, - cluster = cluster, - kernel = "estimatr" + cluster = cluster ) } @@ -357,8 +350,7 @@ test_that("Coefficients and Standard Errors agree with base STATA", { timevar = timevar, FE = FE, TFE = TFE, - cluster = cluster, - kernel = "estimatr" + cluster = cluster ) df_test_STATA <- read.csv("./input/df_test_base_STATA.csv", col.names = c("term", "coef", "std_error")) diff --git a/tests/testthat/test-EventStudyPlot.R b/tests/testthat/test-EventStudyPlot.R index 0bdaf3d8..0850032b 100644 --- a/tests/testthat/test-EventStudyPlot.R +++ b/tests/testthat/test-EventStudyPlot.R @@ -8,8 +8,7 @@ test_that("Dimension of OLS and FHS estimation output is the same", { estimates_ols <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_smooth_m", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", - post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, - kernel = "estimatr") + post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) estimates_fhs <- EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_smooth_m", policyvar = "z", idvar = "id", timevar = "t", proxy = "eta_r", controls = "x_r", @@ -36,8 +35,7 @@ test_that("correctly changes x-axis and y-axis labels", { estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", - post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, - kernel = "estimatr") + post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) p_labels <- EventStudyPlot(estimates = estimates, conf_level = .95, @@ -54,8 +52,7 @@ test_that("x- and y-axis breaks and limits are correct", { estimates = EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", - post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, - kernel = "estimatr") + post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) p_Addmean <- EventStudyPlot(estimates = estimates, ybreaks = c(-1.5, -.5, 0, .5, 1.5), @@ -106,8 +103,7 @@ test_that("sup-t bands are appropriately present or absent", { estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", - controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, - kernel = "estimatr") + controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) p_supt <- get_labs(EventStudyPlot(estimates = estimates, supt = .95)) @@ -126,8 +122,7 @@ test_that("confidence intervals are appropriately present or absent", { estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", - controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, - kernel = "estimatr") + controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) p_ci <- get_labs(EventStudyPlot(estimates = estimates, conf_level = .95, supt = NULL)) @@ -145,8 +140,7 @@ test_that("Preevent Coeffs and Postevent Coeffs are appropriately present or abs estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", - post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, - kernel = "estimatr") + post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) p_pre_post_caption <- get_labs(EventStudyPlot(estimates = estimates, ybreaks = c(-1.5, -.5, 0, .5, 1.5), @@ -205,8 +199,7 @@ test_that("Sup-t bands are wider than confidence intervals", { estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", - post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, - kernel = "estimatr") + post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) p <- EventStudyPlot(estimates = estimates, conf_level = .95, @@ -238,8 +231,7 @@ test_that("computed smoothest path for examples is within expectations", { estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_smooth_m", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", - post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = -3, - kernel = "estimatr") + post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = -3) p <- EventStudyPlot(estimates = estimates, smpath = T) diff --git a/tests/testthat/test-PreparePlottingData.R b/tests/testthat/test-PreparePlottingData.R index 99168d3c..4cc0fec0 100644 --- a/tests/testthat/test-PreparePlottingData.R +++ b/tests/testthat/test-PreparePlottingData.R @@ -9,7 +9,7 @@ test_that("labels are unique", { controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) - df_tidy_estimates <- broom::tidy(list_EventStudy$output) + df_tidy_estimates <- estimatr::tidy(list_EventStudy$output) policyvar <- list_EventStudy[[2]]$policyvar post <- list_EventStudy[[2]]$post @@ -37,7 +37,7 @@ test_that("the correct labels are created", { controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) - df_tidy_estimates <- broom::tidy(list_EventStudy$output) + df_tidy_estimates <- estimatr::tidy(list_EventStudy$output) policyvar <- list_EventStudy[[2]]$policyvar post <- list_EventStudy[[2]]$post @@ -64,7 +64,7 @@ test_that("the labels are ordered correctly", { controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) - df_tidy_estimates <- broom::tidy(list_EventStudy$output) + df_tidy_estimates <- estimatr::tidy(list_EventStudy$output) policyvar <- list_EventStudy[[2]]$policyvar post <- list_EventStudy[[2]]$post @@ -91,7 +91,7 @@ test_that("the control variable is removed", { controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) - df_tidy_estimates <- broom::tidy(list_EventStudy$output) + df_tidy_estimates <- estimatr::tidy(list_EventStudy$output) policyvar <- list_EventStudy[[2]]$policyvar post <- list_EventStudy[[2]]$post @@ -118,7 +118,7 @@ test_that("the largest lag label is correctly created", { controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) - df_tidy_estimates <- broom::tidy(list_EventStudy$output) + df_tidy_estimates <- estimatr::tidy(list_EventStudy$output) policyvar <- list_EventStudy[[2]]$policyvar post <- list_EventStudy[[2]]$post @@ -147,7 +147,7 @@ test_that("the largest lead label is correctly created", { controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) - df_tidy_estimates <- broom::tidy(list_EventStudy$output) + df_tidy_estimates <- estimatr::tidy(list_EventStudy$output) policyvar <- list_EventStudy[[2]]$policyvar post <- list_EventStudy[[2]]$post @@ -176,7 +176,7 @@ test_that("all columns besides 'term' and 'label' are 0 for the normalization co controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) - df_tidy_estimates <- broom::tidy(list_EventStudy$output) + df_tidy_estimates <- estimatr::tidy(list_EventStudy$output) policyvar <- list_EventStudy[[2]]$policyvar post <- list_EventStudy[[2]]$post @@ -211,7 +211,7 @@ test_that("all columns besides 'term' and 'label' are 0 for the proxyIV column", controls = "x_r", FE = TRUE, TFE = TRUE, proxy = "eta_m", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) - df_tidy_estimates <- broom::tidy(list_EventStudy$output) + df_tidy_estimates <- estimatr::tidy(list_EventStudy$output) policyvar <- list_EventStudy[[2]]$policyvar post <- list_EventStudy[[2]]$post diff --git a/tests/testthat/test-TestLinear.R b/tests/testthat/test-TestLinear.R index 0e7afb48..d78b5981 100644 --- a/tests/testthat/test-TestLinear.R +++ b/tests/testthat/test-TestLinear.R @@ -2,10 +2,9 @@ test_that("correctly recognizes wrong variable type for estimate argument", { estimate <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, - post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE, - kernel = "estimatr") + post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) - df_estimate <- broom::tidy(estimate[[1]]) + df_estimate <- estimatr::tidy(estimate[[1]]) test = "z_fd_lag1 = z_fd" @@ -17,9 +16,8 @@ test_that("correctly recognizes wrong variable type for estimate argument", { test_that("correctly recognizes wrong variable type for pretrends", { estimate <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", - controls = "x_r", FE = TRUE, TFE = TRUE, - post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE, - kernel = "estimatr") + controls = "x_r", FE = TRUE, TFE = TRUE, + post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) expect_error(TestLinear(df_estimate, pretrends = "pretrends")) expect_error(TestLinear(df_estimate, pretrends = 1)) @@ -28,9 +26,8 @@ test_that("correctly recognizes wrong variable type for pretrends", { test_that("correctly recognizes wrong variable type for leveling_off", { estimate <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", - controls = "x_r", FE = TRUE, TFE = TRUE, - post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE, - kernel = "estimatr") + controls = "x_r", FE = TRUE, TFE = TRUE, + post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) expect_error(TestLinear(df_estimate, leveling_off = "leveling_off")) expect_error(TestLinear(df_estimate, leveling_off = 1)) @@ -85,11 +82,10 @@ test_that("checks equality with STATA", { estimate <- suppressWarnings( EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", - FE = TRUE, TFE = TRUE, + FE = TRUE, TFE = TRUE, post = 2, pre = 2, overidpre = 2, - overidpost = 2, normalize = - 1, - cluster = TRUE, anticipation_effects_normalization = TRUE, - kernel = "estimatr") + overidpost = 2, normalize = - 1, + cluster = TRUE, anticipation_effects_normalization = TRUE) ) codes <- read.csv('input/pvalues.txt', header = F) From 8430109653ecbbfe1cfdaf9e9e27686e6c2056aa Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Fri, 15 Aug 2025 09:13:29 -0400 Subject: [PATCH 03/67] #42 bd start development --- R/EventStudy.R | 8 ++++-- issue/testing.r | 65 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 2 deletions(-) create mode 100644 issue/testing.r diff --git a/R/EventStudy.R b/R/EventStudy.R index 9d26f5b4..c38432a0 100644 --- a/R/EventStudy.R +++ b/R/EventStudy.R @@ -341,8 +341,12 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c output <- EventStudyOLS(event_study_formula, data, idvar, timevar, FE, TFE, cluster) coefficients <- str_policy_vars - } - if (estimator == "FHS") { + } else if (estimator == "feols") { + # TODO: implement feols estimator + # Then adapt EventStudyPlot to use feols + # As well as ensure summary($output) works + # Building / modifying tests should be the LAST step + } else if (estimator == "FHS") { if (is.null(proxyIV)) { Fstart <- 0 diff --git a/issue/testing.r b/issue/testing.r new file mode 100644 index 00000000..eb81d666 --- /dev/null +++ b/issue/testing.r @@ -0,0 +1,65 @@ +library(haven) +library(data.table) + +indir <- 'examples/source/raw/eventstudy_illustration_data/orig' +data <- read_dta(sprintf('%s/simulation_data_dynamic.dta', indir)) |> as.data.table() +estimator <- "OLS" +outcomevar <- "y_base" +policyvar <- "z" +idvar <- "id" +timevar <- "t" +controls <- NULL +proxy <- NULL +proxyIV <- NULL +FE <- TRUE +TFE <- TRUE +post <- 2 +pre <- 2 +overidpost <- 1 +overidpre <- post + pre +normalize <- -1 * (pre + 1) +cluster <- TRUE +anticipation_effects_normalization <- TRUE +allow_duplicate_id <- FALSE +avoid_internal_copy <- FALSE +kernel <- "estimatr" + +detect_holes <- function(dt, idvar, timevar) { + holes_per_id <- dt[, .SD[!is.na(base::get(timevar))], by = c(idvar) + ][, list(holes = any(base::diff(base::get(timevar)) != 1)), + by = c(idvar)] + + return(any(holes_per_id$holes)) +} + +if (detect_holes(data, idvar, timevar)) { + warning(paste0("Note: gaps of more than one unit in the time variable '", timevar, "' were detected. ", + "Treating these as gaps in the panel dimension.")) + timevar_holes <- TRUE +} else { + timevar_holes <- FALSE +} + + +# EventStudy <- function( +# estimator, +# data, +# outcomevar, +# policyvar, +# idvar, +# timevar, +# controls = NULL, +# proxy = NULL, +# proxyIV = NULL, +# FE = TRUE, +# TFE = TRUE, +# post, +# overidpost = 1, +# pre, overidpre = post + pre, +# normalize = -1 * (pre + 1), +# cluster = TRUE, +# anticipation_effects_normalization = TRUE, +# allow_duplicate_id = FALSE, +# avoid_internal_copy = FALSE, +# kernel = "fixest" +# ) From 3ff46cf6ffb35e23825f26db673722692367c05e Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Thu, 21 Aug 2025 14:23:18 -0400 Subject: [PATCH 04/67] #42 bd feols for ols --- R/EventStudy.R | 5 +++++ issue/testing.r | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/R/EventStudy.R b/R/EventStudy.R index c38432a0..a360231a 100644 --- a/R/EventStudy.R +++ b/R/EventStudy.R @@ -346,6 +346,11 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c # Then adapt EventStudyPlot to use feols # As well as ensure summary($output) works # Building / modifying tests should be the LAST step + formula <- PrepareModelFormulaFEOLS(outcomevar, str_policy_vars, + controls, proxy, proxyIV, + idvar, timevar, FE, TFE) + output <- EventStudyFEOLS(formula, data, idvar, timevar, FE, TFE, cluster) + coefficients <- str_policy_vars } else if (estimator == "FHS") { if (is.null(proxyIV)) { diff --git a/issue/testing.r b/issue/testing.r index eb81d666..f6611663 100644 --- a/issue/testing.r +++ b/issue/testing.r @@ -40,6 +40,54 @@ if (detect_holes(data, idvar, timevar)) { timevar_holes <- FALSE } +PrepareModelFormulaFEOLS <- function(outcomevar, str_policy_vars, + controls = NULL, proxy = NULL, proxyIV = NULL, + idvar = NULL, timevar = NULL, FE = FALSE, TFE = FALSE) { + stopifnot(!is.null(idvar)) + stopifnot(!is.null(timevar)) + + regressors <- c(str_policy_vars, controls) + + if (FE | TFE) { + fes <- c() + if (FE) { + fes <- c(fes, idvar) + } + if (TFE) { + fes <- c(fes, timevar) + } + + formula_str <- paste( + outcomevar, + "~", + paste(regressors, collapse = " + "), + "|", + paste(fes, collapse = " + ") + ) + formula <- stats::as.formula(formula_str) + } else { + formula <- stats::reformulate( + termlabels = regressors, + response = outcomevar, + intercept = TRUE + ) + } + return(formula) +} + +EventStudyFEOLS <- function(formula, prepared_data, + idvar, timevar, FE, TFE, cluster) { + + cluster = ifelse(cluster, idvar, NULL) + + ols_output <- fixest::feols( + fml = formula, + data = prepared_data, + cluster = cluster + ) + return(ols_output) +} + # EventStudy <- function( # estimator, From 681fe787a5698c9667bc66eb61b90af37c32d851 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Thu, 21 Aug 2025 15:56:19 -0400 Subject: [PATCH 05/67] #42 bd initialize new plotting script --- R/EventStudyPlotFixest.R | 327 +++++++++++++++++++++++++++++++++++++++ issue/testing.r | 84 +++++++--- 2 files changed, 389 insertions(+), 22 deletions(-) create mode 100644 R/EventStudyPlotFixest.R diff --git a/R/EventStudyPlotFixest.R b/R/EventStudyPlotFixest.R new file mode 100644 index 00000000..7b0b508b --- /dev/null +++ b/R/EventStudyPlotFixest.R @@ -0,0 +1,327 @@ +#' Creates an Event-Study Plot Following the Suggestions in Freyaldenhoven et al. (2021) +#' +#' @description `EventStudyPlot` takes the output from [EventStudy()] and combines it with additional optional arguments to facilitate constructing an Event-Study Plot. +#' +#' @param estimates The output from calling [EventStudy()]. Should be a list of length 2. +#' @param xtitle The title for the x-axis. Should be a string. Defaults to "Event time". +#' @param ytitle The title for the y-axis. Should be a string. Defaults to "Coefficient". +#' @param ybreaks A vector containing the desired breaks for the y-axis. +#' Defaults to NULL, which means the breaks are computed automatically. +#' If custom breaks are selected with the `add_mean` argument set to TRUE, then the breaks must include zero. +#' @param conf_level Confidence level used for confidence interval +#' expressed as a real number between 0 and 1, inclusive. Defaults to 0.95. +#' @param supt The confidence level used for obtaining the sup-t bands critical value. +#' Should be a real number between 0 and 1, inclusive. Defaults to .95. Sup-t bands are simulation-based, +#' so you must set a seed if you would like your sup-t band results to be reproducible (see examples). +#' @param num_sim The number of simulations used in generating the sup-t bands. +#' Should be a natural number. Defaults to 1000. +#' @param add_mean Adds the mean of the dependent variable in the period used for normalization. +#' Should be TRUE or FALSE. Defaults to FALSE. +#' @param pre_event_coeffs If TRUE, uses pre and overidpre from estimates to test for pre-trends. +#' Should be TRUE or FALSE. Defaults to TRUE. +#' @param post_event_coeffs If TRUE, uses post and overidpost from estimates to test for leveling-off. +#' Should be TRUE or FALSE. Defaults to TRUE. +#' @param add_zero_line Whether or not to plot a dashed horizontal line at y = 0. +#' Should be TRUE or FALSE. Defaults to TRUE, meaning the line is plotted. +#' @param smpath Plot smoothest path of confounder that rationalizes event study coefficients. +#' Should be TRUE or FALSE. Defaults to FALSE. +#' +#' @return The Event-Study plot as a ggplot2 object. +#' @import ggplot2 dplyr +#' @import estimatr +#' @importFrom rlang .data +#' @importFrom data.table setorder +#' @export +#' +#' @examples +#' +#' # +#' +#' # Minimal examples +#' ### OLS +#' +#' estimates_ols <- EventStudy( +#' estimator = "OLS", +#' data = example_data, +#' outcomevar = "y_smooth_m", +#' policyvar = "z", +#' idvar = "id", +#' timevar = "t", +#' controls = "x_r", +#' FE = TRUE, TFE = TRUE, +#' post = 3, overidpost = 5, +#' pre = 2, overidpre = 4, +#' normalize = - 3 +#' ) +#' +#' plt_ols <- EventStudyPlot(estimates = estimates_ols) +#' plt_ols +#' +#' ### IV +#' +#' estimates_fhs <- EventStudy( +#' estimator = "FHS", +#' data = example_data, +#' outcomevar = "y_smooth_m", +#' policyvar = "z", +#' idvar = "id", +#' timevar = "t", +#' proxy = "x_r", +#' post = 2, overidpost = 1, +#' pre = 0, overidpre = 3, +#' normalize = -1 +#' ) +#' +#' plt_fhs <- EventStudyPlot(estimates = estimates_fhs) +#' plt_fhs +#' +#' # Optional arguments +#' +#' ### Change x- and y-axis titles and set ybreaks +#' EventStudyPlot(estimates = estimates_ols, +#' xtitle = "Relative time", ytitle = "", +#' ybreaks = seq(-2, 1, 0.5)) +#' +#' ### Add smoothest path +#' EventStudyPlot(estimates = estimates_ols, smpath = TRUE) +#' +#' ### Add y-mean to y-axis and line y = 0 +#' EventStudyPlot(estimates = estimates_ols, add_mean = TRUE, +#' add_zero_line = TRUE) +#' +#' ### Do not plot supt bands +#' EventStudyPlot(estimates = estimates_ols, supt = NULL) +#' +#' ### Setting seed prior to plotting sup-t bands +#' set.seed(1234) +#' EventStudyPlot(estimates = estimates_ols) +#' +#' # Modify plots using ggplot2 functions +#' library(ggplot2) +#' +#' ### Change color of dots, horizontal line, and theme +#' plt_ols + +#' geom_point(color = "red") + +#' geom_hline(color = "gray", yintercept = 0) + +#' theme_light() + +#' theme(panel.grid.minor.x = element_blank()) +#' + +EventStudyPlotFixest <- function(estimates, + xtitle = "Event time", ytitle = "Coefficient", ybreaks = NULL, + conf_level = .95, supt = .95, num_sim = 1000, add_mean = FALSE, + pre_event_coeffs = TRUE, post_event_coeffs = TRUE, + add_zero_line = TRUE, smpath = FALSE) { + + if (!is.character(xtitle)) {stop("Argument 'xtitle' should be a character.")} + if (!is.character(ytitle)) {stop("Argument 'ytitle' should be a character.")} + if (!is.logical(add_zero_line)) {stop("Argument 'add_zero_line' should be either TRUE or FALSE.")} + if (!is.null(ybreaks) & + !is.numeric(ybreaks)) {stop("Argument 'ybreaks' should be NULL or a numeric vector.")} + +# Estimation Elements ----------------------------------------------------- + + df_estimates_fixest <- estimates$output # this is class lm_robust - TODO # also need to rename - this is not df + df_estimates_tidy <- estimatr::tidy(estimates$output) # this is class data.frame + + static_model <- nrow(df_estimates_tidy) == 1 + if (static_model) { + stop("EventStudyPlot() does not support static models.") + } + + df_data <- estimates$arguments$data + outcomevar <- estimates$arguments$outcomevar + policyvar <- estimates$arguments$policyvar + post <- estimates$arguments$post + overidpost <- estimates$arguments$overidpost + pre <- estimates$arguments$pre + overidpre <- estimates$arguments$overidpre + normalize <- estimates$arguments$normalize + normalization_column <- estimates$arguments$normalization_column + eventstudy_coefficients <- estimates$arguments$eventstudy_coefficients + proxyIV <- estimates$arguments$proxyIV + +# Optionally Add Suptbands/Confidence Intervals --------------------------- + + plot_supt <- if(!is.null(supt)) TRUE else FALSE + + if (plot_supt) { + df_estimates_tidy <- AddSuptBand(df_estimates, num_sim = 1000, conf_level = supt, + eventstudy_coefficients = eventstudy_coefficients) + } + + plot_CI <- if(!is.null(conf_level)) TRUE else FALSE + + if (plot_CI) { + + df_estimates_tidy <- AddCIs(df_estimates_tidy, eventstudy_coefficients, conf_level) + } + +# Optionally Test For Pretrends/Levelling-Off ----------------------------- + + df_test_linear <- TestLinear(estimates = estimates, pretrends = pre_event_coeffs, leveling_off = post_event_coeffs) + + if ((pre_event_coeffs | post_event_coeffs)) { + pretrends_p_value <- df_test_linear[df_test_linear["Test"] == "Pre-Trends", "p.value"] + levelingoff_p_value <- df_test_linear[df_test_linear["Test"] == "Leveling-Off", "p.value"] + + text_pretrends <- paste0("Pretrends p-value = ", round(pretrends_p_value, 2)) + text_levelingoff <- paste0("Leveling off p-value = ", round(levelingoff_p_value, 2)) + + + if (pre_event_coeffs & post_event_coeffs) { + text_caption <- paste0(text_pretrends, " -- ", text_levelingoff) + + } else if (pre_event_coeffs & !post_event_coeffs) { + text_caption <- text_pretrends + + } else if (!pre_event_coeffs & post_event_coeffs) { + text_caption <- text_levelingoff + + } + } else { + text_caption <- NULL + } + + + df_plt <- PreparePlottingData(df_estimates_tidy, policyvar, + post, overidpost, pre, overidpre, normalization_column, proxyIV) + +# Construct y breaks ------------------------------------------------------ + + if (!is.null(ybreaks)) { + if (!(0 %in% ybreaks) & add_mean) { + stop("If you want to add the mean of y in the y-axis then 'ybreaks' must include 0.") + } + + ylabels <- ybreaks + ylims <- c(min(ybreaks), max(ybreaks)) + } else { + min_value <- min(c(df_plt$estimate, df_plt$ci_lower, df_plt$suptband_lower), na.rm = T) + max_value <- max(c(df_plt$estimate, df_plt$ci_upper, df_plt$suptband_upper), na.rm = T) + max_abs <- max(abs(min_value), abs(max_value)) + + magnitude <- 10^floor(log10(max_abs)) + + # Determine step depending on how far the endpoints are from the magnitude + mean_ratio <- mean(c(abs(min_value)/magnitude, max_value/magnitude)) + if (mean_ratio > 6.67) { + step = 3*magnitude + } else if (mean_ratio > 3.33) { + step = 2*magnitude + } else { + step = magnitude + } + + # Pick multiples of step to ensure zero is included + close_to_min <- floor(min_value/step)*step + close_to_max <- ceiling(max_value/step)*step + + ybreaks <- seq(close_to_min, close_to_max, step) + ylims <- c(min(ybreaks), max(ybreaks)) + + if (length(ybreaks) >= 9) { + # Too many breaks, double step size + step <- step*2 + close_to_min <- floor(min_value/step)*step + close_to_max <- ceiling(max_value/step)*step + + ybreaks <- seq(close_to_min, close_to_max, step) + } else if (length(ybreaks) <= 3) { + # Too few breaks, halve step size + step <- step/2 + close_to_min <- floor(min_value/step)*step + close_to_max <- ceiling(max_value/step)*step + + ybreaks <- seq(close_to_min, close_to_max, step) + } + ylabels <- ybreaks + } + +# Optionally Adds Mean ---------------------------------------------------- + + if (add_mean) { + + y_mean <- AddMeans(df_data, normalization_column, policyvar, outcomevar) + + index_zero <- which(ybreaks == 0) + ylabels[index_zero] <- paste0(ylabels[index_zero], " (", round(y_mean, 2), ")") + } + +# Optionally Add smooth path ---------------------------------------------- + + # Order coefficients + label_var = "label" + data.table::setorderv(df_plt, c(label_var)) + ordered_labels <- df_plt$label + + if (smpath) { + + unselect_message <- "Please change the 'Smpath' argument in 'EventStudyPlot' to FALSE." + + if (!is.null(proxyIV)) { + if (sum(df_plt$estimate == 0) > 2) { + stop(paste0("The smoothest path is not supported for the FHS estimator with more than one instrument.", + unselect_message)) + } + } + + coefficients <- df_plt$estimate + + # Add column and row in matrix of coefficients in index of norm columns + covar <- AddZerosCovar(estimates$output$vcov, + eventstudy_coefficients, + df_plt[df_plt$estimate==0, ]$term, + df_plt$term) + + inv_covar <- pracma::pinv(covar) + + df_plt <- AddSmPath(df_plt, coefficients, inv_covar) + } + +# Construct Plot ---------------------------------------------------------- + + df_plt$label_num <- as.numeric(gsub("+", "", df_plt$label, fixed = T)) + + plt <- ggplot(df_plt, + aes(x = .data$label_num, y = .data$estimate)) + + if (add_zero_line) { + plt <- plt + + geom_hline(yintercept = 0, + color = "green", linetype = "dashed") + } + if (plot_supt) { + plt <- plt + + geom_linerange(aes(ymin = .data$suptband_lower, + ymax = .data$suptband_upper), + data = df_plt[df_plt$estimate != 0,]) + } + if (plot_CI) { + plt <- plt + + geom_errorbar(aes(ymin = .data$ci_lower, + ymax = .data$ci_upper), + data = df_plt[df_plt$estimate != 0,], + width = .2) + } + if (smpath) { + plt <- plt + + geom_line(aes(y = .data$smoothest_path, group = 1), + color = "black") + } + + plt <- plt + + geom_point(color = "#006600") + + scale_x_continuous(breaks = min(df_plt$label_num):max(df_plt$label_num), + labels = ordered_labels) + + scale_y_continuous(breaks = ybreaks, + labels = ylabels, + limits = ylims) + + labs(x = xtitle, y = ytitle, + caption = text_caption) + + theme_bw() + + theme(panel.grid = element_blank(), + plot.caption = element_text(hjust = 0)) + + return(plt) +} diff --git a/issue/testing.r b/issue/testing.r index f6611663..93c0b60a 100644 --- a/issue/testing.r +++ b/issue/testing.r @@ -1,5 +1,6 @@ library(haven) library(data.table) +library(tidyverse) indir <- 'examples/source/raw/eventstudy_illustration_data/orig' data <- read_dta(sprintf('%s/simulation_data_dynamic.dta', indir)) |> as.data.table() @@ -89,25 +90,64 @@ EventStudyFEOLS <- function(formula, prepared_data, } -# EventStudy <- function( -# estimator, -# data, -# outcomevar, -# policyvar, -# idvar, -# timevar, -# controls = NULL, -# proxy = NULL, -# proxyIV = NULL, -# FE = TRUE, -# TFE = TRUE, -# post, -# overidpost = 1, -# pre, overidpre = post + pre, -# normalize = -1 * (pre + 1), -# cluster = TRUE, -# anticipation_effects_normalization = TRUE, -# allow_duplicate_id = FALSE, -# avoid_internal_copy = FALSE, -# kernel = "fixest" -# ) +output_feols <- EventStudy( + estimator = 'feols', + data, + outcomevar, + policyvar, + idvar, + timevar, + controls = NULL, + proxy = NULL, + proxyIV = NULL, + FE = TRUE, + TFE = TRUE, + post, + overidpost = 1, + pre, overidpre = post + pre, + normalize = -1 * (pre + 1), + cluster = TRUE, + anticipation_effects_normalization = TRUE, + allow_duplicate_id = FALSE, + avoid_internal_copy = FALSE +) + +output <- EventStudy( + estimator = 'OLS', + data, + outcomevar, + policyvar, + idvar, + timevar, + controls = NULL, + proxy = NULL, + proxyIV = NULL, + FE = TRUE, + TFE = TRUE, + post, + overidpost = 1, + pre, overidpre = post + pre, + normalize = -1 * (pre + 1), + cluster = TRUE, + anticipation_effects_normalization = TRUE, + allow_duplicate_id = FALSE, + avoid_internal_copy = FALSE +) + + +EventStudyPlot(output) +EventStudyPlotFixest(output_feols) + +estimates = output +xtitle = "Event time" +ytitle = "Coefficient" +ybreaks = NULL +conf_level = .95 +supt = .95 +num_sim = 1000 +add_mean = FALSE +pre_event_coeffs = TRUE +post_event_coeffs = TRUE +add_zero_line = TRUE +smpath = FALSE + From d00d853455aa6ad45b13c77578d426c229f5fbd5 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Thu, 21 Aug 2025 16:36:00 -0400 Subject: [PATCH 06/67] #42 cl rename function arguments to align with input --- R/AddSuptBand.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/AddSuptBand.R b/R/AddSuptBand.R index ccca421b..e1444ece 100644 --- a/R/AddSuptBand.R +++ b/R/AddSuptBand.R @@ -47,16 +47,16 @@ #' eventstudy_coefficients = eventstudy_estimates$arguments$eventstudy_coefficients #') -AddSuptBand <- function(estimates, num_sim = 1000, conf_level = .95, eventstudy_coefficients) { +AddSuptBand <- function(df_estimates, num_sim = 1000, conf_level = .95, eventstudy_coefficients) { - if (! class(estimates) %in% c("lm_robust", "iv_robust")) { + if (! class(df_estimates) %in% c("lm_robust", "iv_robust")) { stop("estimates is not a data frame with coefficient estimates and standard errors") } if (! is.numeric(num_sim) | num_sim %% 1 != 0 | num_sim <= 0) {stop("num_sim should be a natural number.")} if (! is.numeric(conf_level) | conf_level < 0 | conf_level > 1) {stop("conf_level should be a real number between 0 and 1, inclusive.")} if (! is.character(eventstudy_coefficients)) {stop("eventstudy_coefficients should be a character.")} - vcov_matrix_all <- estimates$vcov + vcov_matrix_all <- df_estimates$vcov v_terms_to_keep <- colnames(vcov_matrix_all) %in% eventstudy_coefficients vcov_matrix <- vcov_matrix_all[v_terms_to_keep, v_terms_to_keep] @@ -75,7 +75,7 @@ AddSuptBand <- function(estimates, num_sim = 1000, conf_level = .95, eventstudy_ critical_value = t[floor(conf_level_num_sim) + 1] } - df_estimates_tidy <- estimatr::tidy(estimates) + df_estimates_tidy <- estimatr::tidy(df_estimates) df_estimates_tidy["suptband_lower"] <- df_estimates_tidy$estimate - (critical_value * df_estimates_tidy$std.error) df_estimates_tidy["suptband_upper"] <- df_estimates_tidy$estimate + (critical_value * df_estimates_tidy$std.error) From 33caaefb50a29d477fcf18d1d80527305d3d2b2c Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Thu, 21 Aug 2025 16:47:57 -0400 Subject: [PATCH 07/67] #42 cl simplify useless code --- R/EventStudyPlot.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/R/EventStudyPlot.R b/R/EventStudyPlot.R index cb6f57e6..5f0c0fda 100644 --- a/R/EventStudyPlot.R +++ b/R/EventStudyPlot.R @@ -121,10 +121,9 @@ EventStudyPlot <- function(estimates, # Estimation Elements ----------------------------------------------------- - df_estimates <- estimates$output - df_estimates_tidy <- estimatr::tidy(estimates$output) + df_estimates <- estimates$output - static_model <- nrow(df_estimates_tidy) == 1 + static_model <- length(coef(df_estimates)) == 1 if (static_model) { stop("EventStudyPlot() does not support static models.") } From 92d7d6f53181068b942048d83b7de9612c7c94e6 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Thu, 21 Aug 2025 16:48:57 -0400 Subject: [PATCH 08/67] #42 bd adapt AddSuptBand() to fixest --- R/AddSuptBand.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/AddSuptBand.R b/R/AddSuptBand.R index e1444ece..1f9eb646 100644 --- a/R/AddSuptBand.R +++ b/R/AddSuptBand.R @@ -49,14 +49,16 @@ AddSuptBand <- function(df_estimates, num_sim = 1000, conf_level = .95, eventstudy_coefficients) { - if (! class(df_estimates) %in% c("lm_robust", "iv_robust")) { + if (! class(df_estimates) %in% c("lm_robust", "iv_robust", "fixest")) { stop("estimates is not a data frame with coefficient estimates and standard errors") } if (! is.numeric(num_sim) | num_sim %% 1 != 0 | num_sim <= 0) {stop("num_sim should be a natural number.")} if (! is.numeric(conf_level) | conf_level < 0 | conf_level > 1) {stop("conf_level should be a real number between 0 and 1, inclusive.")} if (! is.character(eventstudy_coefficients)) {stop("eventstudy_coefficients should be a character.")} - vcov_matrix_all <- df_estimates$vcov + fixest = class(df_estimates) == "fixest" + + vcov_matrix_all <- if(fixest){vcov(df_estimates)} else {df_estimates$vcov} v_terms_to_keep <- colnames(vcov_matrix_all) %in% eventstudy_coefficients vcov_matrix <- vcov_matrix_all[v_terms_to_keep, v_terms_to_keep] @@ -75,12 +77,10 @@ AddSuptBand <- function(df_estimates, num_sim = 1000, conf_level = .95, eventstu critical_value = t[floor(conf_level_num_sim) + 1] } - df_estimates_tidy <- estimatr::tidy(df_estimates) + df_estimates_tidy <- if(fixest){estimatr::tidy(df_estimates)} else {broom::tidy(df_estimates)} df_estimates_tidy["suptband_lower"] <- df_estimates_tidy$estimate - (critical_value * df_estimates_tidy$std.error) df_estimates_tidy["suptband_upper"] <- df_estimates_tidy$estimate + (critical_value * df_estimates_tidy$std.error) - return(df_estimates_tidy) - } From d72bd18882d02d176ff0ef75866b1a1ae911c3f4 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Thu, 21 Aug 2025 17:06:24 -0400 Subject: [PATCH 09/67] #42 bd adapt TestLinear to fixest --- R/TestLinear.R | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/R/TestLinear.R b/R/TestLinear.R index a5b6e293..70e9a279 100644 --- a/R/TestLinear.R +++ b/R/TestLinear.R @@ -36,7 +36,7 @@ TestLinear <- function(estimates, test = NA, test_name = "User Test", pretrends = TRUE, leveling_off = TRUE){ if (! is.list(estimates) | length(estimates) != 2){ stop("estimates should be a list of length two, an output of EventStudy()")} - if ((! class(estimates$output) %in% c("lm_robust", "iv_robust")) | ! is.list(estimates$output)) { + if ((! class(estimates$output) %in% c("lm_robust", "iv_robust", "fixest")) | ! is.list(estimates$output)) { stop("The first element of estimates should be a list of class 'lm_robust' with coefficient estimates and standard errors") } if (! is.list(estimates$arguments) | ! is.list(estimates$arguments)) { @@ -46,10 +46,14 @@ TestLinear <- function(estimates, test = NA, test_name = "User Test", pretrends if (! is.logical(pretrends)) {stop("pretrends should be a logical. Default value is TRUE")} if (! is.logical(leveling_off)) {stop("leveling_off should be a logical. Default value is TRUE")} - if(estimates$arguments$cluster == TRUE){ + fixest = class(estimates$output) == "fixest" - estimates$output$df.residual <- estimates$output$nclusters - 1 + if(estimates$arguments$cluster == TRUE){ + estimates$output$df.residual <- ifelse( + fixest, + as.integer(fixest::fitstat(estimates$output, "g")), + estimates$output$nclusters) - 1 } coefficients <- estimates$arguments$eventstudy_coefficients From de4d7bf3d4b1d6787f9505d49efafa261c3691b2 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Thu, 21 Aug 2025 17:11:04 -0400 Subject: [PATCH 10/67] #42 cl --- R/EventStudyPlot.R | 9 ++---- issue/testing.r | 68 +++++++++++++++++++++++----------------------- 2 files changed, 37 insertions(+), 40 deletions(-) diff --git a/R/EventStudyPlot.R b/R/EventStudyPlot.R index 5f0c0fda..a83c68b6 100644 --- a/R/EventStudyPlot.R +++ b/R/EventStudyPlot.R @@ -170,21 +170,18 @@ EventStudyPlot <- function(estimates, if (pre_event_coeffs & post_event_coeffs) { text_caption <- paste0(text_pretrends, " -- ", text_levelingoff) - } else if (pre_event_coeffs & !post_event_coeffs) { text_caption <- text_pretrends - } else if (!pre_event_coeffs & post_event_coeffs) { text_caption <- text_levelingoff - } } else { text_caption <- NULL } - - df_plt <- PreparePlottingData(df_estimates_tidy, policyvar, - post, overidpost, pre, overidpre, normalization_column, proxyIV) + df_plt <- PreparePlottingData( + df_estimates_tidy, policyvar, + post, overidpost, pre, overidpre, normalization_column, proxyIV) # Construct y breaks ------------------------------------------------------ diff --git a/issue/testing.r b/issue/testing.r index 93c0b60a..105f566a 100644 --- a/issue/testing.r +++ b/issue/testing.r @@ -78,9 +78,9 @@ PrepareModelFormulaFEOLS <- function(outcomevar, str_policy_vars, EventStudyFEOLS <- function(formula, prepared_data, idvar, timevar, FE, TFE, cluster) { - + cluster = ifelse(cluster, idvar, NULL) - + ols_output <- fixest::feols( fml = formula, data = prepared_data, @@ -91,52 +91,52 @@ EventStudyFEOLS <- function(formula, prepared_data, output_feols <- EventStudy( - estimator = 'feols', - data, - outcomevar, - policyvar, - idvar, - timevar, + estimator = 'feols', + data, + outcomevar, + policyvar, + idvar, + timevar, controls = NULL, - proxy = NULL, - proxyIV = NULL, - FE = TRUE, - TFE = TRUE, - post, - overidpost = 1, + proxy = NULL, + proxyIV = NULL, + FE = TRUE, + TFE = TRUE, + post, + overidpost = 1, pre, overidpre = post + pre, - normalize = -1 * (pre + 1), - cluster = TRUE, + normalize = -1 * (pre + 1), + cluster = TRUE, anticipation_effects_normalization = TRUE, - allow_duplicate_id = FALSE, + allow_duplicate_id = FALSE, avoid_internal_copy = FALSE ) output <- EventStudy( - estimator = 'OLS', - data, - outcomevar, - policyvar, - idvar, - timevar, + estimator = 'OLS', + data, + outcomevar, + policyvar, + idvar, + timevar, controls = NULL, - proxy = NULL, - proxyIV = NULL, - FE = TRUE, - TFE = TRUE, - post, - overidpost = 1, + proxy = NULL, + proxyIV = NULL, + FE = TRUE, + TFE = TRUE, + post, + overidpost = 1, pre, overidpre = post + pre, - normalize = -1 * (pre + 1), - cluster = TRUE, + normalize = -1 * (pre + 1), + cluster = TRUE, anticipation_effects_normalization = TRUE, - allow_duplicate_id = FALSE, + allow_duplicate_id = FALSE, avoid_internal_copy = FALSE ) EventStudyPlot(output) -EventStudyPlotFixest(output_feols) +EventStudyPlot(output_feols) estimates = output xtitle = "Event time" @@ -146,7 +146,7 @@ conf_level = .95 supt = .95 num_sim = 1000 add_mean = FALSE -pre_event_coeffs = TRUE +pre_event_coeffs = TRUE post_event_coeffs = TRUE add_zero_line = TRUE smpath = FALSE From 360cdcd52abccc1b8b3f3227447f7b90fa20bdfe Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Thu, 21 Aug 2025 17:14:37 -0400 Subject: [PATCH 11/67] #42 cl rename objects --- R/AddSuptBand.R | 10 +++++----- R/EventStudyPlot.R | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/R/AddSuptBand.R b/R/AddSuptBand.R index 1f9eb646..95375415 100644 --- a/R/AddSuptBand.R +++ b/R/AddSuptBand.R @@ -47,18 +47,18 @@ #' eventstudy_coefficients = eventstudy_estimates$arguments$eventstudy_coefficients #') -AddSuptBand <- function(df_estimates, num_sim = 1000, conf_level = .95, eventstudy_coefficients) { +AddSuptBand <- function(model_estimates, num_sim = 1000, conf_level = .95, eventstudy_coefficients) { - if (! class(df_estimates) %in% c("lm_robust", "iv_robust", "fixest")) { + if (! class(model_estimates) %in% c("lm_robust", "iv_robust", "fixest")) { stop("estimates is not a data frame with coefficient estimates and standard errors") } if (! is.numeric(num_sim) | num_sim %% 1 != 0 | num_sim <= 0) {stop("num_sim should be a natural number.")} if (! is.numeric(conf_level) | conf_level < 0 | conf_level > 1) {stop("conf_level should be a real number between 0 and 1, inclusive.")} if (! is.character(eventstudy_coefficients)) {stop("eventstudy_coefficients should be a character.")} - fixest = class(df_estimates) == "fixest" + fixest = class(model_estimates) == "fixest" - vcov_matrix_all <- if(fixest){vcov(df_estimates)} else {df_estimates$vcov} + vcov_matrix_all <- if(fixest){vcov(model_estimates)} else {model_estimates$vcov} v_terms_to_keep <- colnames(vcov_matrix_all) %in% eventstudy_coefficients vcov_matrix <- vcov_matrix_all[v_terms_to_keep, v_terms_to_keep] @@ -77,7 +77,7 @@ AddSuptBand <- function(df_estimates, num_sim = 1000, conf_level = .95, eventstu critical_value = t[floor(conf_level_num_sim) + 1] } - df_estimates_tidy <- if(fixest){estimatr::tidy(df_estimates)} else {broom::tidy(df_estimates)} + df_estimates_tidy <- if(fixest){estimatr::tidy(model_estimates)} else {broom::tidy(model_estimates)} df_estimates_tidy["suptband_lower"] <- df_estimates_tidy$estimate - (critical_value * df_estimates_tidy$std.error) df_estimates_tidy["suptband_upper"] <- df_estimates_tidy$estimate + (critical_value * df_estimates_tidy$std.error) diff --git a/R/EventStudyPlot.R b/R/EventStudyPlot.R index a83c68b6..500459f4 100644 --- a/R/EventStudyPlot.R +++ b/R/EventStudyPlot.R @@ -121,9 +121,9 @@ EventStudyPlot <- function(estimates, # Estimation Elements ----------------------------------------------------- - df_estimates <- estimates$output + model_estimates <- estimates$output - static_model <- length(coef(df_estimates)) == 1 + static_model <- length(coef(model_estimates)) == 1 if (static_model) { stop("EventStudyPlot() does not support static models.") } @@ -145,7 +145,7 @@ EventStudyPlot <- function(estimates, plot_supt <- if(!is.null(supt)) TRUE else FALSE if (plot_supt) { - df_estimates_tidy <- AddSuptBand(df_estimates, num_sim = 1000, conf_level = supt, + df_estimates_tidy <- AddSuptBand(model_estimates, num_sim = 1000, conf_level = supt, eventstudy_coefficients = eventstudy_coefficients) } From ca4255b00b15606942a83992a9b3fd13c68ae45c Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Thu, 21 Aug 2025 17:19:00 -0400 Subject: [PATCH 12/67] #42 adapt AddZerosCovar to fixest --- R/EventStudyPlot.R | 12 ++++++++---- R/SmPathHelpers.R | 6 +++--- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/R/EventStudyPlot.R b/R/EventStudyPlot.R index 500459f4..0ad6ebbb 100644 --- a/R/EventStudyPlot.R +++ b/R/EventStudyPlot.R @@ -265,10 +265,14 @@ EventStudyPlot <- function(estimates, coefficients <- df_plt$estimate # Add column and row in matrix of coefficients in index of norm columns - covar <- AddZerosCovar(estimates$output$vcov, - eventstudy_coefficients, - df_plt[df_plt$estimate==0, ]$term, - df_plt$term) + is_fixest <- class(model_estimates) == "fixest" + vcov <- if(is_fixest) {fixest::vcov(estimates$output)} else {estimates$output$vcov} + covar <- AddZerosCovar( + vcov, + eventstudy_coefficients, + df_plt[df_plt$estimate == 0, ]$term, + df_plt$term + ) inv_covar <- pracma::pinv(covar) diff --git a/R/SmPathHelpers.R b/R/SmPathHelpers.R index 70a08b33..b9674ccc 100644 --- a/R/SmPathHelpers.R +++ b/R/SmPathHelpers.R @@ -1,9 +1,9 @@ # Add zero where normalized coefficient(s) should be in covar matrix -AddZerosCovar <- function(vcov_matrix_all, eventstudy_coeffs, norm_column, +AddZerosCovar <- function(vcov, eventstudy_coeffs, norm_column, coeffs_order) { - v_terms_to_keep <- colnames(vcov_matrix_all) %in% eventstudy_coeffs - covar <- vcov_matrix_all[v_terms_to_keep, v_terms_to_keep] + v_terms_to_keep <- colnames(vcov) %in% eventstudy_coeffs + covar <- vcov[v_terms_to_keep, v_terms_to_keep] n_coefs = length(coeffs_order) needed_zeros = length(norm_column) From 8d9afa0cdbba87ba131c98b2ef5f679d8b313697 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Fri, 3 Oct 2025 10:57:38 -0400 Subject: [PATCH 13/67] #42 fx bug --- R/EventStudyPlot.R | 7 ++++--- issue/testing.r | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/R/EventStudyPlot.R b/R/EventStudyPlot.R index 0ad6ebbb..ecfc6373 100644 --- a/R/EventStudyPlot.R +++ b/R/EventStudyPlot.R @@ -122,6 +122,7 @@ EventStudyPlot <- function(estimates, # Estimation Elements ----------------------------------------------------- model_estimates <- estimates$output + model_estimates_tidy <- estimatr::tidy(estimates$output) static_model <- length(coef(model_estimates)) == 1 if (static_model) { @@ -145,7 +146,7 @@ EventStudyPlot <- function(estimates, plot_supt <- if(!is.null(supt)) TRUE else FALSE if (plot_supt) { - df_estimates_tidy <- AddSuptBand(model_estimates, num_sim = 1000, conf_level = supt, + model_estimates_tidy <- AddSuptBand(model_estimates, num_sim = 1000, conf_level = supt, eventstudy_coefficients = eventstudy_coefficients) } @@ -153,7 +154,7 @@ EventStudyPlot <- function(estimates, if (plot_CI) { - df_estimates_tidy <- AddCIs(df_estimates_tidy, eventstudy_coefficients, conf_level) + model_estimates_tidy <- AddCIs(model_estimates_tidy, eventstudy_coefficients, conf_level) } # Optionally Test For Pretrends/Levelling-Off ----------------------------- @@ -180,7 +181,7 @@ EventStudyPlot <- function(estimates, } df_plt <- PreparePlottingData( - df_estimates_tidy, policyvar, + model_estimates_tidy, policyvar, post, overidpost, pre, overidpre, normalization_column, proxyIV) # Construct y breaks ------------------------------------------------------ diff --git a/issue/testing.r b/issue/testing.r index 105f566a..eadb93a4 100644 --- a/issue/testing.r +++ b/issue/testing.r @@ -1,6 +1,7 @@ library(haven) library(data.table) library(tidyverse) +devtools::load_all() indir <- 'examples/source/raw/eventstudy_illustration_data/orig' data <- read_dta(sprintf('%s/simulation_data_dynamic.dta', indir)) |> as.data.table() From 709e9f420aeefd46dab812d2271c9f2686a72c7c Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Fri, 3 Oct 2025 11:00:18 -0400 Subject: [PATCH 14/67] #42 bd --- R/EventStudy.R | 2 +- R/EventStudyPlotFixest.R | 20 +++++++------------- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/R/EventStudy.R b/R/EventStudy.R index a360231a..af42c17c 100644 --- a/R/EventStudy.R +++ b/R/EventStudy.R @@ -148,7 +148,7 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c allow_duplicate_id = FALSE, avoid_internal_copy = FALSE) { # Check for errors in arguments - if (! estimator %in% c("OLS", "FHS")) {stop("estimator should be either 'OLS' or 'FHS'.")} + if (! estimator %in% c("OLS", 'feols', "FHS")) {stop("estimator should be either 'OLS' or 'FHS'.")} if (! is.data.frame(data)) {stop("data should be a data frame.")} for (var in c(idvar, timevar, outcomevar, policyvar)) { if ((! is.character(var))) { diff --git a/R/EventStudyPlotFixest.R b/R/EventStudyPlotFixest.R index 7b0b508b..c386c660 100644 --- a/R/EventStudyPlotFixest.R +++ b/R/EventStudyPlotFixest.R @@ -121,10 +121,7 @@ EventStudyPlotFixest <- function(estimates, # Estimation Elements ----------------------------------------------------- - df_estimates_fixest <- estimates$output # this is class lm_robust - TODO # also need to rename - this is not df - df_estimates_tidy <- estimatr::tidy(estimates$output) # this is class data.frame - - static_model <- nrow(df_estimates_tidy) == 1 + static_model <- length(coef(df_estimates)) == 1 if (static_model) { stop("EventStudyPlot() does not support static models.") } @@ -153,13 +150,15 @@ EventStudyPlotFixest <- function(estimates, plot_CI <- if(!is.null(conf_level)) TRUE else FALSE if (plot_CI) { - df_estimates_tidy <- AddCIs(df_estimates_tidy, eventstudy_coefficients, conf_level) } # Optionally Test For Pretrends/Levelling-Off ----------------------------- - df_test_linear <- TestLinear(estimates = estimates, pretrends = pre_event_coeffs, leveling_off = post_event_coeffs) + df_test_linear <- TestLinear( + estimates = estimates, + pretrends = pre_event_coeffs, + leveling_off = post_event_coeffs) if ((pre_event_coeffs | post_event_coeffs)) { pretrends_p_value <- df_test_linear[df_test_linear["Test"] == "Pre-Trends", "p.value"] @@ -168,22 +167,17 @@ EventStudyPlotFixest <- function(estimates, text_pretrends <- paste0("Pretrends p-value = ", round(pretrends_p_value, 2)) text_levelingoff <- paste0("Leveling off p-value = ", round(levelingoff_p_value, 2)) - if (pre_event_coeffs & post_event_coeffs) { text_caption <- paste0(text_pretrends, " -- ", text_levelingoff) - } else if (pre_event_coeffs & !post_event_coeffs) { text_caption <- text_pretrends - } else if (!pre_event_coeffs & post_event_coeffs) { text_caption <- text_levelingoff - - } + } # TODO: switch to `switch` for conciseness } else { text_caption <- NULL } - df_plt <- PreparePlottingData(df_estimates_tidy, policyvar, post, overidpost, pre, overidpre, normalization_column, proxyIV) @@ -269,7 +263,7 @@ EventStudyPlotFixest <- function(estimates, coefficients <- df_plt$estimate # Add column and row in matrix of coefficients in index of norm columns - covar <- AddZerosCovar(estimates$output$vcov, + covar <- AddZerosCovar(estimates$output$vcov, #TODO: make sure this is of class matrix / array eventstudy_coefficients, df_plt[df_plt$estimate==0, ]$term, df_plt$term) From ac6aa6d9191650bb7db475c2284e2bc5f118462e Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Fri, 3 Oct 2025 11:06:38 -0400 Subject: [PATCH 15/67] #42 cl drop legacy EventStudyPlotFixest.R --- R/EventStudyPlotFixest.R | 321 --------------------------------------- 1 file changed, 321 deletions(-) delete mode 100644 R/EventStudyPlotFixest.R diff --git a/R/EventStudyPlotFixest.R b/R/EventStudyPlotFixest.R deleted file mode 100644 index c386c660..00000000 --- a/R/EventStudyPlotFixest.R +++ /dev/null @@ -1,321 +0,0 @@ -#' Creates an Event-Study Plot Following the Suggestions in Freyaldenhoven et al. (2021) -#' -#' @description `EventStudyPlot` takes the output from [EventStudy()] and combines it with additional optional arguments to facilitate constructing an Event-Study Plot. -#' -#' @param estimates The output from calling [EventStudy()]. Should be a list of length 2. -#' @param xtitle The title for the x-axis. Should be a string. Defaults to "Event time". -#' @param ytitle The title for the y-axis. Should be a string. Defaults to "Coefficient". -#' @param ybreaks A vector containing the desired breaks for the y-axis. -#' Defaults to NULL, which means the breaks are computed automatically. -#' If custom breaks are selected with the `add_mean` argument set to TRUE, then the breaks must include zero. -#' @param conf_level Confidence level used for confidence interval -#' expressed as a real number between 0 and 1, inclusive. Defaults to 0.95. -#' @param supt The confidence level used for obtaining the sup-t bands critical value. -#' Should be a real number between 0 and 1, inclusive. Defaults to .95. Sup-t bands are simulation-based, -#' so you must set a seed if you would like your sup-t band results to be reproducible (see examples). -#' @param num_sim The number of simulations used in generating the sup-t bands. -#' Should be a natural number. Defaults to 1000. -#' @param add_mean Adds the mean of the dependent variable in the period used for normalization. -#' Should be TRUE or FALSE. Defaults to FALSE. -#' @param pre_event_coeffs If TRUE, uses pre and overidpre from estimates to test for pre-trends. -#' Should be TRUE or FALSE. Defaults to TRUE. -#' @param post_event_coeffs If TRUE, uses post and overidpost from estimates to test for leveling-off. -#' Should be TRUE or FALSE. Defaults to TRUE. -#' @param add_zero_line Whether or not to plot a dashed horizontal line at y = 0. -#' Should be TRUE or FALSE. Defaults to TRUE, meaning the line is plotted. -#' @param smpath Plot smoothest path of confounder that rationalizes event study coefficients. -#' Should be TRUE or FALSE. Defaults to FALSE. -#' -#' @return The Event-Study plot as a ggplot2 object. -#' @import ggplot2 dplyr -#' @import estimatr -#' @importFrom rlang .data -#' @importFrom data.table setorder -#' @export -#' -#' @examples -#' -#' # -#' -#' # Minimal examples -#' ### OLS -#' -#' estimates_ols <- EventStudy( -#' estimator = "OLS", -#' data = example_data, -#' outcomevar = "y_smooth_m", -#' policyvar = "z", -#' idvar = "id", -#' timevar = "t", -#' controls = "x_r", -#' FE = TRUE, TFE = TRUE, -#' post = 3, overidpost = 5, -#' pre = 2, overidpre = 4, -#' normalize = - 3 -#' ) -#' -#' plt_ols <- EventStudyPlot(estimates = estimates_ols) -#' plt_ols -#' -#' ### IV -#' -#' estimates_fhs <- EventStudy( -#' estimator = "FHS", -#' data = example_data, -#' outcomevar = "y_smooth_m", -#' policyvar = "z", -#' idvar = "id", -#' timevar = "t", -#' proxy = "x_r", -#' post = 2, overidpost = 1, -#' pre = 0, overidpre = 3, -#' normalize = -1 -#' ) -#' -#' plt_fhs <- EventStudyPlot(estimates = estimates_fhs) -#' plt_fhs -#' -#' # Optional arguments -#' -#' ### Change x- and y-axis titles and set ybreaks -#' EventStudyPlot(estimates = estimates_ols, -#' xtitle = "Relative time", ytitle = "", -#' ybreaks = seq(-2, 1, 0.5)) -#' -#' ### Add smoothest path -#' EventStudyPlot(estimates = estimates_ols, smpath = TRUE) -#' -#' ### Add y-mean to y-axis and line y = 0 -#' EventStudyPlot(estimates = estimates_ols, add_mean = TRUE, -#' add_zero_line = TRUE) -#' -#' ### Do not plot supt bands -#' EventStudyPlot(estimates = estimates_ols, supt = NULL) -#' -#' ### Setting seed prior to plotting sup-t bands -#' set.seed(1234) -#' EventStudyPlot(estimates = estimates_ols) -#' -#' # Modify plots using ggplot2 functions -#' library(ggplot2) -#' -#' ### Change color of dots, horizontal line, and theme -#' plt_ols + -#' geom_point(color = "red") + -#' geom_hline(color = "gray", yintercept = 0) + -#' theme_light() + -#' theme(panel.grid.minor.x = element_blank()) -#' - -EventStudyPlotFixest <- function(estimates, - xtitle = "Event time", ytitle = "Coefficient", ybreaks = NULL, - conf_level = .95, supt = .95, num_sim = 1000, add_mean = FALSE, - pre_event_coeffs = TRUE, post_event_coeffs = TRUE, - add_zero_line = TRUE, smpath = FALSE) { - - if (!is.character(xtitle)) {stop("Argument 'xtitle' should be a character.")} - if (!is.character(ytitle)) {stop("Argument 'ytitle' should be a character.")} - if (!is.logical(add_zero_line)) {stop("Argument 'add_zero_line' should be either TRUE or FALSE.")} - if (!is.null(ybreaks) & - !is.numeric(ybreaks)) {stop("Argument 'ybreaks' should be NULL or a numeric vector.")} - -# Estimation Elements ----------------------------------------------------- - - static_model <- length(coef(df_estimates)) == 1 - if (static_model) { - stop("EventStudyPlot() does not support static models.") - } - - df_data <- estimates$arguments$data - outcomevar <- estimates$arguments$outcomevar - policyvar <- estimates$arguments$policyvar - post <- estimates$arguments$post - overidpost <- estimates$arguments$overidpost - pre <- estimates$arguments$pre - overidpre <- estimates$arguments$overidpre - normalize <- estimates$arguments$normalize - normalization_column <- estimates$arguments$normalization_column - eventstudy_coefficients <- estimates$arguments$eventstudy_coefficients - proxyIV <- estimates$arguments$proxyIV - -# Optionally Add Suptbands/Confidence Intervals --------------------------- - - plot_supt <- if(!is.null(supt)) TRUE else FALSE - - if (plot_supt) { - df_estimates_tidy <- AddSuptBand(df_estimates, num_sim = 1000, conf_level = supt, - eventstudy_coefficients = eventstudy_coefficients) - } - - plot_CI <- if(!is.null(conf_level)) TRUE else FALSE - - if (plot_CI) { - df_estimates_tidy <- AddCIs(df_estimates_tidy, eventstudy_coefficients, conf_level) - } - -# Optionally Test For Pretrends/Levelling-Off ----------------------------- - - df_test_linear <- TestLinear( - estimates = estimates, - pretrends = pre_event_coeffs, - leveling_off = post_event_coeffs) - - if ((pre_event_coeffs | post_event_coeffs)) { - pretrends_p_value <- df_test_linear[df_test_linear["Test"] == "Pre-Trends", "p.value"] - levelingoff_p_value <- df_test_linear[df_test_linear["Test"] == "Leveling-Off", "p.value"] - - text_pretrends <- paste0("Pretrends p-value = ", round(pretrends_p_value, 2)) - text_levelingoff <- paste0("Leveling off p-value = ", round(levelingoff_p_value, 2)) - - if (pre_event_coeffs & post_event_coeffs) { - text_caption <- paste0(text_pretrends, " -- ", text_levelingoff) - } else if (pre_event_coeffs & !post_event_coeffs) { - text_caption <- text_pretrends - } else if (!pre_event_coeffs & post_event_coeffs) { - text_caption <- text_levelingoff - } # TODO: switch to `switch` for conciseness - } else { - text_caption <- NULL - } - - df_plt <- PreparePlottingData(df_estimates_tidy, policyvar, - post, overidpost, pre, overidpre, normalization_column, proxyIV) - -# Construct y breaks ------------------------------------------------------ - - if (!is.null(ybreaks)) { - if (!(0 %in% ybreaks) & add_mean) { - stop("If you want to add the mean of y in the y-axis then 'ybreaks' must include 0.") - } - - ylabels <- ybreaks - ylims <- c(min(ybreaks), max(ybreaks)) - } else { - min_value <- min(c(df_plt$estimate, df_plt$ci_lower, df_plt$suptband_lower), na.rm = T) - max_value <- max(c(df_plt$estimate, df_plt$ci_upper, df_plt$suptband_upper), na.rm = T) - max_abs <- max(abs(min_value), abs(max_value)) - - magnitude <- 10^floor(log10(max_abs)) - - # Determine step depending on how far the endpoints are from the magnitude - mean_ratio <- mean(c(abs(min_value)/magnitude, max_value/magnitude)) - if (mean_ratio > 6.67) { - step = 3*magnitude - } else if (mean_ratio > 3.33) { - step = 2*magnitude - } else { - step = magnitude - } - - # Pick multiples of step to ensure zero is included - close_to_min <- floor(min_value/step)*step - close_to_max <- ceiling(max_value/step)*step - - ybreaks <- seq(close_to_min, close_to_max, step) - ylims <- c(min(ybreaks), max(ybreaks)) - - if (length(ybreaks) >= 9) { - # Too many breaks, double step size - step <- step*2 - close_to_min <- floor(min_value/step)*step - close_to_max <- ceiling(max_value/step)*step - - ybreaks <- seq(close_to_min, close_to_max, step) - } else if (length(ybreaks) <= 3) { - # Too few breaks, halve step size - step <- step/2 - close_to_min <- floor(min_value/step)*step - close_to_max <- ceiling(max_value/step)*step - - ybreaks <- seq(close_to_min, close_to_max, step) - } - ylabels <- ybreaks - } - -# Optionally Adds Mean ---------------------------------------------------- - - if (add_mean) { - - y_mean <- AddMeans(df_data, normalization_column, policyvar, outcomevar) - - index_zero <- which(ybreaks == 0) - ylabels[index_zero] <- paste0(ylabels[index_zero], " (", round(y_mean, 2), ")") - } - -# Optionally Add smooth path ---------------------------------------------- - - # Order coefficients - label_var = "label" - data.table::setorderv(df_plt, c(label_var)) - ordered_labels <- df_plt$label - - if (smpath) { - - unselect_message <- "Please change the 'Smpath' argument in 'EventStudyPlot' to FALSE." - - if (!is.null(proxyIV)) { - if (sum(df_plt$estimate == 0) > 2) { - stop(paste0("The smoothest path is not supported for the FHS estimator with more than one instrument.", - unselect_message)) - } - } - - coefficients <- df_plt$estimate - - # Add column and row in matrix of coefficients in index of norm columns - covar <- AddZerosCovar(estimates$output$vcov, #TODO: make sure this is of class matrix / array - eventstudy_coefficients, - df_plt[df_plt$estimate==0, ]$term, - df_plt$term) - - inv_covar <- pracma::pinv(covar) - - df_plt <- AddSmPath(df_plt, coefficients, inv_covar) - } - -# Construct Plot ---------------------------------------------------------- - - df_plt$label_num <- as.numeric(gsub("+", "", df_plt$label, fixed = T)) - - plt <- ggplot(df_plt, - aes(x = .data$label_num, y = .data$estimate)) - - if (add_zero_line) { - plt <- plt + - geom_hline(yintercept = 0, - color = "green", linetype = "dashed") - } - if (plot_supt) { - plt <- plt + - geom_linerange(aes(ymin = .data$suptband_lower, - ymax = .data$suptband_upper), - data = df_plt[df_plt$estimate != 0,]) - } - if (plot_CI) { - plt <- plt + - geom_errorbar(aes(ymin = .data$ci_lower, - ymax = .data$ci_upper), - data = df_plt[df_plt$estimate != 0,], - width = .2) - } - if (smpath) { - plt <- plt + - geom_line(aes(y = .data$smoothest_path, group = 1), - color = "black") - } - - plt <- plt + - geom_point(color = "#006600") + - scale_x_continuous(breaks = min(df_plt$label_num):max(df_plt$label_num), - labels = ordered_labels) + - scale_y_continuous(breaks = ybreaks, - labels = ylabels, - limits = ylims) + - labs(x = xtitle, y = ytitle, - caption = text_caption) + - theme_bw() + - theme(panel.grid = element_blank(), - plot.caption = element_text(hjust = 0)) - - return(plt) -} From 3a1d31860168cc7bde5958cc838b93fdb50a0c7b Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Fri, 3 Oct 2025 11:14:43 -0400 Subject: [PATCH 16/67] #42 bd migrate feols functions to package from testing.r --- R/EventStudy.R | 6 +--- R/EventStudyOLS.R | 14 ++++++++ R/PrepareModelFormula.R | 36 +++++++++++++++++++ issue/testing.r | 78 ----------------------------------------- 4 files changed, 51 insertions(+), 83 deletions(-) diff --git a/R/EventStudy.R b/R/EventStudy.R index af42c17c..a3d6e638 100644 --- a/R/EventStudy.R +++ b/R/EventStudy.R @@ -226,7 +226,7 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c detect_holes <- function(dt, idvar, timevar) { holes_per_id <- dt[, .SD[!is.na(base::get(timevar))], by = c(idvar) - ][, list(holes = any(base::diff(base::get(timevar)) != 1)), + ][, list(holes = any(base::diff(base::get(timevar)) != 1)), by = c(idvar)] return(any(holes_per_id$holes)) @@ -342,10 +342,6 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c output <- EventStudyOLS(event_study_formula, data, idvar, timevar, FE, TFE, cluster) coefficients <- str_policy_vars } else if (estimator == "feols") { - # TODO: implement feols estimator - # Then adapt EventStudyPlot to use feols - # As well as ensure summary($output) works - # Building / modifying tests should be the LAST step formula <- PrepareModelFormulaFEOLS(outcomevar, str_policy_vars, controls, proxy, proxyIV, idvar, timevar, FE, TFE) diff --git a/R/EventStudyOLS.R b/R/EventStudyOLS.R index a0258bf5..ee8bd89d 100644 --- a/R/EventStudyOLS.R +++ b/R/EventStudyOLS.R @@ -123,3 +123,17 @@ EventStudyOLS <- function(prepared_model_formula, prepared_data, return(ols_output) } + +EventStudyFEOLS <- function(formula, prepared_data, + idvar, timevar, FE, TFE, cluster) { + + cluster = ifelse(cluster, idvar, NULL) + + ols_output <- fixest::feols( + fml = formula, + data = prepared_data, + cluster = cluster + ) + return(ols_output) +} + diff --git a/R/PrepareModelFormula.R b/R/PrepareModelFormula.R index c4962042..c730f3e9 100644 --- a/R/PrepareModelFormula.R +++ b/R/PrepareModelFormula.R @@ -76,3 +76,39 @@ PrepareModelFormula <- function(estimator, outcomevar, return(reg_formula) } + + +PrepareModelFormulaFEOLS <- function(outcomevar, str_policy_vars, + controls = NULL, proxy = NULL, proxyIV = NULL, + idvar = NULL, timevar = NULL, FE = FALSE, TFE = FALSE) { + stopifnot(!is.null(idvar)) + stopifnot(!is.null(timevar)) + + regressors <- c(str_policy_vars, controls) + + if (FE | TFE) { + fes <- c() + if (FE) { + fes <- c(fes, idvar) + } + if (TFE) { + fes <- c(fes, timevar) + } + + formula_str <- paste( + outcomevar, + "~", + paste(regressors, collapse = " + "), + "|", + paste(fes, collapse = " + ") + ) + formula <- stats::as.formula(formula_str) + } else { + formula <- stats::reformulate( + termlabels = regressors, + response = outcomevar, + intercept = TRUE + ) + } + return(formula) +} diff --git a/issue/testing.r b/issue/testing.r index eadb93a4..d2ad86f6 100644 --- a/issue/testing.r +++ b/issue/testing.r @@ -26,70 +26,6 @@ allow_duplicate_id <- FALSE avoid_internal_copy <- FALSE kernel <- "estimatr" -detect_holes <- function(dt, idvar, timevar) { - holes_per_id <- dt[, .SD[!is.na(base::get(timevar))], by = c(idvar) - ][, list(holes = any(base::diff(base::get(timevar)) != 1)), - by = c(idvar)] - - return(any(holes_per_id$holes)) -} - -if (detect_holes(data, idvar, timevar)) { - warning(paste0("Note: gaps of more than one unit in the time variable '", timevar, "' were detected. ", - "Treating these as gaps in the panel dimension.")) - timevar_holes <- TRUE -} else { - timevar_holes <- FALSE -} - -PrepareModelFormulaFEOLS <- function(outcomevar, str_policy_vars, - controls = NULL, proxy = NULL, proxyIV = NULL, - idvar = NULL, timevar = NULL, FE = FALSE, TFE = FALSE) { - stopifnot(!is.null(idvar)) - stopifnot(!is.null(timevar)) - - regressors <- c(str_policy_vars, controls) - - if (FE | TFE) { - fes <- c() - if (FE) { - fes <- c(fes, idvar) - } - if (TFE) { - fes <- c(fes, timevar) - } - - formula_str <- paste( - outcomevar, - "~", - paste(regressors, collapse = " + "), - "|", - paste(fes, collapse = " + ") - ) - formula <- stats::as.formula(formula_str) - } else { - formula <- stats::reformulate( - termlabels = regressors, - response = outcomevar, - intercept = TRUE - ) - } - return(formula) -} - -EventStudyFEOLS <- function(formula, prepared_data, - idvar, timevar, FE, TFE, cluster) { - - cluster = ifelse(cluster, idvar, NULL) - - ols_output <- fixest::feols( - fml = formula, - data = prepared_data, - cluster = cluster - ) - return(ols_output) -} - output_feols <- EventStudy( estimator = 'feols', @@ -138,17 +74,3 @@ output <- EventStudy( EventStudyPlot(output) EventStudyPlot(output_feols) - -estimates = output -xtitle = "Event time" -ytitle = "Coefficient" -ybreaks = NULL -conf_level = .95 -supt = .95 -num_sim = 1000 -add_mean = FALSE -pre_event_coeffs = TRUE -post_event_coeffs = TRUE -add_zero_line = TRUE -smpath = FALSE - From 909175cd83bcf9a2c0aacaaeba04f9a04f68cab3 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Fri, 3 Oct 2025 11:37:13 -0400 Subject: [PATCH 17/67] #42 fx bug --- R/EventStudyOLS.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/EventStudyOLS.R b/R/EventStudyOLS.R index ee8bd89d..b6b2d1bf 100644 --- a/R/EventStudyOLS.R +++ b/R/EventStudyOLS.R @@ -127,12 +127,12 @@ EventStudyOLS <- function(prepared_model_formula, prepared_data, EventStudyFEOLS <- function(formula, prepared_data, idvar, timevar, FE, TFE, cluster) { - cluster = ifelse(cluster, idvar, NULL) + cluster_var = if(cluster) idvar else NULL ols_output <- fixest::feols( fml = formula, data = prepared_data, - cluster = cluster + cluster = cluster_var ) return(ols_output) } From 3553ef1f3aac8abbec54aaad4e53b3e4a67eb2da Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Fri, 3 Oct 2025 12:00:16 -0400 Subject: [PATCH 18/67] #42 bd feols tests --- tests/testthat/test-EventStudy.R | 85 ++++++ tests/testthat/test-EventStudyFEOLS.R | 374 ++++++++++++++++++++++++++ 2 files changed, 459 insertions(+) create mode 100644 tests/testthat/test-EventStudyFEOLS.R diff --git a/tests/testthat/test-EventStudy.R b/tests/testthat/test-EventStudy.R index 5c094dd3..bd6fba6b 100644 --- a/tests/testthat/test-EventStudy.R +++ b/tests/testthat/test-EventStudy.R @@ -296,6 +296,91 @@ test_that("removes the correct column when normalize = post + overidpost", { expect_true(!normalization_column %in% shiftvalues) }) +# feols --------------------------------------------------------------------- + +test_that("feols estimator FE works", { + + outputs <- suppressWarnings( + EventStudy(estimator = "feols", data = example_data, outcomevar = "y_base", + policyvar = "z", idvar = "id", timevar = "t", + controls = "x_r", FE = TRUE, TFE = TRUE, + post = 2, pre = 3, overidpre = 4, + overidpost = 11, normalize = - 1, + cluster = TRUE, anticipation_effects_normalization = TRUE) + ) + + expect_true(class(outputs$output) == "fixest") + expect_true(all(outputs$output$fixef_vars == c("id", "t"))) +}) + +test_that("feols estimator with no fixed effects works", { + + outputs <- EventStudy(estimator = "feols", data = example_data, outcomevar = "y_base", + policyvar = "z", idvar = "id", timevar = "t", + controls = "x_r", FE = FALSE, TFE = FALSE, + post = 2, pre = 3, overidpre = 4, + overidpost = 11, normalize = - 1, + cluster = TRUE, anticipation_effects_normalization = TRUE) + + expect_true(class(outputs$output) == "fixest") + expect_true(is.null(outputs$output$fixef_vars)) +}) + +test_that("feols estimator with only time fixed effects works", { + + outputs <- EventStudy(estimator = "feols", data = example_data, outcomevar = "y_base", + policyvar = "z", idvar = "id", timevar = "t", + controls = "x_r", FE = FALSE, TFE = TRUE, + post = 2, pre = 3, overidpre = 4, + overidpost = 11, normalize = - 1, + cluster = TRUE, anticipation_effects_normalization = TRUE) + + expect_true(class(outputs$output) == "fixest") + expect_true(outputs$output$fixef_vars == "t") +}) + +test_that("feols estimator with only unit FE works", { + + outputs <- EventStudy(estimator = "feols", data = example_data, outcomevar = "y_base", + policyvar = "z", idvar = "id", timevar = "t", + controls = "x_r", FE = TRUE, TFE = FALSE, + post = 2, pre = 3, overidpre = 4, + overidpost = 11, normalize = - 1, + cluster = TRUE, anticipation_effects_normalization = TRUE) + + expect_true(class(outputs$output) == "fixest") + expect_true(outputs$output$fixef_vars == "id") +}) + +test_that("feols estimator coefficients match OLS coefficients", { + + outputs_ols <- suppressWarnings( + EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + policyvar = "z", idvar = "id", timevar = "t", + controls = "x_r", FE = TRUE, TFE = TRUE, + post = 2, pre = 3, overidpre = 4, + overidpost = 11, normalize = - 1, + cluster = TRUE, anticipation_effects_normalization = TRUE) + ) + + outputs_feols <- suppressWarnings( + EventStudy(estimator = "feols", data = example_data, outcomevar = "y_base", + policyvar = "z", idvar = "id", timevar = "t", + controls = "x_r", FE = TRUE, TFE = TRUE, + post = 2, pre = 3, overidpre = 4, + overidpost = 11, normalize = - 1, + cluster = TRUE, anticipation_effects_normalization = TRUE) + ) + + coef_ols <- coef(outputs_ols$output) + se_ols <- outputs_ols$output$std.error + + coef_feols <- coef(outputs_feols$output) + se_feols <- fixest::se(outputs_feols$output) + + expect_true(all(abs(coef_feols - coef_ols) <= 1e-6 + 1e-6 * abs(coef_ols))) +}) + # FHS --------------------------------------------------------------------- test_that("correctly creates highest order leads and shiftvalues", { diff --git a/tests/testthat/test-EventStudyFEOLS.R b/tests/testthat/test-EventStudyFEOLS.R new file mode 100644 index 00000000..7a3c2602 --- /dev/null +++ b/tests/testthat/test-EventStudyFEOLS.R @@ -0,0 +1,374 @@ +test_that("FE = TRUE, + TFE = TRUE, + cluster = TRUE works", { + + df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv") + + idvar <- "id" + timevar <- "t" + outcomevar <- "y_base" + str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") + controls <- "x_r" + + FE <- TRUE + TFE <- TRUE + cluster <- TRUE + + event_study_formula <- PrepareModelFormulaFEOLS(outcomevar, str_policy_vars, + controls = controls, + idvar = idvar, timevar = timevar, + FE = FE, TFE = TFE) + + if (FE & TFE & cluster) { + + reg <- EventStudyFEOLS( + formula = event_study_formula, + prepared_data = df_test_EventStudyOLS, + idvar = idvar, + timevar = timevar, + FE = FE, + TFE = TFE, + cluster = cluster + ) + } + + expect_true(class(reg) == "fixest") + expect_true(all(reg$fixef_vars == c(idvar, timevar))) + expect_true(length(reg$fixef_sizes) >= 1) + +}) + +test_that("FE = FALSE, + TFE = TRUE, + cluster = TRUE works", { + + df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv") + + idvar <- "id" + timevar <- "t" + outcomevar <- "y_base" + str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") + controls <- "x_r" + + FE <- FALSE + TFE <- TRUE + cluster <- TRUE + + event_study_formula <- PrepareModelFormulaFEOLS(outcomevar, str_policy_vars, + controls = controls, + idvar = idvar, timevar = timevar, + FE = FE, TFE = TFE) + + if ((!FE) & TFE & cluster) { + + reg <- EventStudyFEOLS( + formula = event_study_formula, + prepared_data = df_test_EventStudyOLS, + idvar = idvar, + timevar = timevar, + FE = FE, + TFE = TFE, + cluster = cluster + ) + } + + expect_true(class(reg) == "fixest") + expect_true(reg$fixef_vars == timevar) + expect_true(length(reg$fixef_sizes) >= 1) + +}) + +test_that("FE = TRUE, + TFE = FALSE, + cluster = TRUE works", { + + df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv") + + idvar <- "id" + timevar <- "t" + outcomevar <- "y_base" + str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") + controls <- "x_r" + + FE <- TRUE + TFE <- FALSE + cluster <- TRUE + + event_study_formula <- PrepareModelFormulaFEOLS(outcomevar, str_policy_vars, + controls = controls, + idvar = idvar, timevar = timevar, + FE = FE, TFE = TFE) + + if (FE & (!TFE) & cluster) { + + reg <- EventStudyFEOLS( + formula = event_study_formula, + prepared_data = df_test_EventStudyOLS, + idvar = idvar, + timevar = timevar, + FE = FE, + TFE = TFE, + cluster = cluster + ) + } + + expect_true(class(reg) == "fixest") + expect_true(reg$fixef_vars == idvar) + expect_true(length(reg$fixef_sizes) >= 1) + +}) + +test_that("FE = FALSE, + TFE = FALSE, + cluster = TRUE works", { + + df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv") + + idvar <- "id" + timevar <- "t" + outcomevar <- "y_base" + str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") + controls <- "x_r" + + FE <- FALSE + TFE <- FALSE + cluster <- TRUE + + event_study_formula <- PrepareModelFormulaFEOLS(outcomevar, str_policy_vars, + controls = controls, + idvar = idvar, timevar = timevar, + FE = FE, TFE = TFE) + + if ((!FE) & (!TFE) & cluster) { + + reg <- EventStudyFEOLS( + formula = event_study_formula, + prepared_data = df_test_EventStudyOLS, + idvar = idvar, + timevar = timevar, + FE = FE, + TFE = TFE, + cluster = cluster + ) + } + + expect_true(class(reg) == "fixest") + expect_true(is.null(reg$fixef_vars)) + +}) + +test_that("FE = TRUE, + TFE = TRUE, + cluster = FALSE works", { + + df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv") + + idvar <- "id" + timevar <- "t" + outcomevar <- "y_base" + str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") + controls <- "x_r" + + FE <- TRUE + TFE <- TRUE + cluster <- FALSE + + event_study_formula <- PrepareModelFormulaFEOLS(outcomevar, str_policy_vars, + controls = controls, + idvar = idvar, timevar = timevar, + FE = FE, TFE = TFE) + + if (FE & TFE & (!cluster)) { + + reg <- EventStudyFEOLS( + formula = event_study_formula, + prepared_data = df_test_EventStudyOLS, + idvar = idvar, + timevar = timevar, + FE = FE, + TFE = TFE, + cluster = cluster + ) + } + + expect_true(class(reg) == "fixest") + expect_true(all(reg$fixef_vars == c(idvar, timevar))) + expect_true(length(reg$fixef_sizes) >= 1) + +}) + +test_that("FE = FALSE, + TFE = TRUE, + cluster = FALSE works", { + + df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv") + + idvar <- "id" + timevar <- "t" + outcomevar <- "y_base" + str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") + controls <- "x_r" + + FE <- FALSE + TFE <- TRUE + cluster <- FALSE + + event_study_formula <- PrepareModelFormulaFEOLS(outcomevar, str_policy_vars, + controls = controls, + idvar = idvar, timevar = timevar, + FE = FE, TFE = TFE) + + if ((!FE) & TFE & (!cluster)) { + + reg <- EventStudyFEOLS( + formula = event_study_formula, + prepared_data = df_test_EventStudyOLS, + idvar = idvar, + timevar = timevar, + FE = FE, + TFE = TFE, + cluster = cluster + ) + } + + expect_true(class(reg) == "fixest") + expect_true(reg$fixef_vars == timevar) + expect_true(length(reg$fixef_sizes) >= 1) + +}) + +test_that("FE = TRUE, + TFE = FALSE, + cluster = FALSE works", { + + df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv") + + idvar <- "id" + timevar <- "t" + outcomevar <- "y_base" + str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") + controls <- "x_r" + + FE <- TRUE + TFE <- FALSE + cluster <- FALSE + + event_study_formula <- PrepareModelFormulaFEOLS(outcomevar, str_policy_vars, + controls = controls, + idvar = idvar, timevar = timevar, + FE = FE, TFE = TFE) + + if (FE & (!TFE) & (!cluster)) { + + reg <- EventStudyFEOLS( + formula = event_study_formula, + prepared_data = df_test_EventStudyOLS, + idvar = idvar, + timevar = timevar, + FE = FE, + TFE = TFE, + cluster = cluster + ) + } + + expect_true(class(reg) == "fixest") + expect_true(reg$fixef_vars == idvar) + expect_true(length(reg$fixef_sizes) >= 1) + +}) + +test_that("FE = FALSE, + TFE = FALSE, + cluster = FALSE works", { + + df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv") + + idvar <- "id" + timevar <- "t" + outcomevar <- "y_base" + str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") + controls <- "x_r" + + FE <- FALSE + TFE <- FALSE + cluster <- FALSE + + event_study_formula <- PrepareModelFormulaFEOLS(outcomevar, str_policy_vars, + controls = controls, + idvar = idvar, timevar = timevar, + FE = FE, TFE = TFE) + + if ((!FE) & (!TFE) & (!cluster)) { + + reg <- EventStudyFEOLS( + formula = event_study_formula, + prepared_data = df_test_EventStudyOLS, + idvar = idvar, + timevar = timevar, + FE = FE, + TFE = TFE, + cluster = cluster + ) + } + + expect_true(class(reg) == "fixest") + expect_true(is.null(reg$fixef_vars)) + +}) + +test_that("Coefficients and Standard Errors agree with STATA", { + + df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv") + + idvar <- "id" + timevar <- "t" + outcomevar <- "y_base" + str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") + controls <- "x_r" + + FE <- TRUE + TFE <- TRUE + cluster <- TRUE + + event_study_formula <- PrepareModelFormulaFEOLS(outcomevar, str_policy_vars, + controls = controls, + idvar = idvar, timevar = timevar, + FE = FE, TFE = TFE) + + reg <- EventStudyFEOLS( + formula = event_study_formula, + prepared_data = df_test_EventStudyOLS, + idvar = idvar, + timevar = timevar, + FE = FE, + TFE = TFE, + cluster = cluster + ) + + df_test_STATA <- read.csv("./input/df_test_base_STATA.csv", col.names = c("term", "coef", "std_error")) + + # Get coefficients and standard errors + coef_feols <- coef(reg) + se_feols <- fixest::se(reg) + + epsilon <- 1e-6 + + expect_equal(unname(coef_feols["z_fd"]), df_test_STATA[df_test_STATA["term"] == "zfd",][[2]], tolerance = epsilon) + expect_equal(unname(coef_feols["z_fd_lead2"]), df_test_STATA[df_test_STATA["term"] == "F2.zfd",][[2]], tolerance = epsilon) + expect_equal(unname(coef_feols["z_fd_lead3"]), df_test_STATA[df_test_STATA["term"] == "F3.zfd",][["coef"]][1], tolerance = epsilon) + expect_equal(unname(coef_feols["z_fd_lag1"]), df_test_STATA[df_test_STATA["term"] == "L.zfd",][[2]], tolerance = epsilon) + expect_equal(unname(coef_feols["z_fd_lag2"]), df_test_STATA[df_test_STATA["term"] == "L2.zfd",][[2]], tolerance = epsilon) + expect_equal(unname(coef_feols["z_lead3"]), -1 * df_test_STATA[df_test_STATA["term"] == "F3.z",][["coef"]], tolerance = epsilon) + expect_equal(unname(coef_feols["z_lag3"]), df_test_STATA[df_test_STATA["term"] == "L3.z",][[2]], tolerance = epsilon) + expect_equal(unname(coef_feols["x_r"]), df_test_STATA[df_test_STATA["term"] == "x_r",][[2]], tolerance = epsilon) + + expect_equal(unname(se_feols["z_fd"]), df_test_STATA[df_test_STATA["term"] == "zfd",][[3]], tolerance = 2e-2) + expect_equal(unname(se_feols["z_fd_lead2"]), df_test_STATA[df_test_STATA["term"] == "F2.zfd",][[3]], tolerance = 2e-2) + expect_equal(unname(se_feols["z_fd_lead3"]), df_test_STATA[df_test_STATA["term"] == "F3.zfd",][["std_error"]][1], tolerance = 2e-2) + expect_equal(unname(se_feols["z_fd_lag1"]), df_test_STATA[df_test_STATA["term"] == "L.zfd",][[3]], tolerance = 2e-2) + expect_equal(unname(se_feols["z_fd_lag2"]), df_test_STATA[df_test_STATA["term"] == "L2.zfd",][[3]], tolerance = 2e-2) + expect_equal(unname(se_feols["z_lead3"]), df_test_STATA[df_test_STATA["term"] == "F3.z",][["std_error"]], tolerance = 2e-2) + expect_equal(unname(se_feols["z_lag3"]), df_test_STATA[df_test_STATA["term"] == "L3.z",][[3]], tolerance = 2e-2) + expect_equal(unname(se_feols["x_r"]), df_test_STATA[df_test_STATA["term"] == "x_r",][[3]], tolerance = 2e-2) + +}) From baa5e853f16c0336ef8882400a75fd73bcfd927a Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Fri, 3 Oct 2025 13:14:07 -0400 Subject: [PATCH 19/67] #42 bd fhs first pass --- R/EventStudy.R | 27 +++++++++++++++-- R/EventStudyOLS.R | 40 +++++++++++++++++++++++++ R/PrepareModelFormula.R | 50 +++++++++++++++++++++++++++++++- tests/testthat/test-EventStudy.R | 46 +++++++++++++++++++++++++++++ 4 files changed, 160 insertions(+), 3 deletions(-) diff --git a/R/EventStudy.R b/R/EventStudy.R index a3d6e638..9ae41949 100644 --- a/R/EventStudy.R +++ b/R/EventStudy.R @@ -148,7 +148,7 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c allow_duplicate_id = FALSE, avoid_internal_copy = FALSE) { # Check for errors in arguments - if (! estimator %in% c("OLS", 'feols', "FHS")) {stop("estimator should be either 'OLS' or 'FHS'.")} + if (! estimator %in% c("OLS", 'feols', "FHS", "feols_FHS")) {stop("estimator should be either 'OLS', 'feols', 'FHS', or 'feols_FHS'.")} if (! is.data.frame(data)) {stop("data should be a data frame.")} for (var in c(idvar, timevar, outcomevar, policyvar)) { if ((! is.character(var))) { @@ -343,7 +343,7 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c coefficients <- str_policy_vars } else if (estimator == "feols") { formula <- PrepareModelFormulaFEOLS(outcomevar, str_policy_vars, - controls, proxy, proxyIV, + controls, idvar, timevar, FE, TFE) output <- EventStudyFEOLS(formula, data, idvar, timevar, FE, TFE, cluster) coefficients <- str_policy_vars @@ -370,6 +370,29 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c output <- EventStudyFHS(event_study_formula, data, idvar, timevar, FE, TFE, cluster) coefficients <- dplyr::setdiff(str_policy_vars, proxyIV) + } else if (estimator == "feols_FHS") { + + if (is.null(proxyIV)) { + Fstart <- 0 + str_fd_leads <- str_policy_vars[grepl("^z_fd_lead", str_policy_vars)] + + for (var in str_fd_leads) { + lm <- lm(data = data, formula = stats::reformulate(termlabels = var, response = proxy)) + Floop <- summary(lm)$fstatistic["value"] + if (Floop > Fstart) { + Fstart <- Floop + proxyIV <- var + } + } + message(paste0("Defaulting to strongest lead of differenced policy variable: proxyIV = ", proxyIV, + ". To specify a different proxyIV use the proxyIV argument.")) + } + + formula <- PrepareModelFormulaFEOLS_FHS(outcomevar, str_policy_vars, + controls, proxy, proxyIV, + idvar, timevar, FE, TFE) + output <- EventStudyFEOLS_FHS(formula, data, idvar, timevar, FE, TFE, cluster) + coefficients <- dplyr::setdiff(str_policy_vars, proxyIV) } event_study_args <- list("estimator" = estimator, diff --git a/R/EventStudyOLS.R b/R/EventStudyOLS.R index b6b2d1bf..b0392243 100644 --- a/R/EventStudyOLS.R +++ b/R/EventStudyOLS.R @@ -137,3 +137,43 @@ EventStudyFEOLS <- function(formula, prepared_data, return(ols_output) } +EventStudyFEOLS_FHS <- function(formula, prepared_data, + idvar, timevar, FE, TFE, cluster) { + + cluster_var = if(cluster) idvar else NULL + + fhs_output <- fixest::feols( + fml = formula, + data = prepared_data, + cluster = cluster_var + ) + + # Apply the same standard error adjustments as EventStudyFHS + if (FE & TFE & cluster) { + N <- fhs_output$nobs + n <- fhs_output$fixef_sizes[1] # number of clusters (unique id values) + # For FE & TFE, K = number of time FEs + number of structural parameters + # But we need to be careful - fixest nparams includes the fitted endogenous variable + # Let's count the actual event study coefficients (excluding fit_xxx) + n_event_coefs <- length(grep("^z_", names(coef(fhs_output)))) - 1 # -1 for fit_eta_m + K <- fhs_output$fixef_sizes[2] + n_event_coefs # time FEs + structural params + + adjustment <- sqrt((N - K) / (N - n - K + 1)) + fhs_output$se <- fhs_output$se / adjustment + fhs_output$cov.scaled <- fhs_output$cov.scaled / (adjustment^2) + + } else if (FE & (!TFE) & cluster) { + N <- fhs_output$nobs + n <- fhs_output$fixef_sizes[1] # number of clusters + # For FE only, K = 1 (for the FE intercept?) + structural parameters + n_event_coefs <- length(grep("^z_", names(coef(fhs_output)))) - 1 # -1 for fit_eta_m + K <- 1 + n_event_coefs + + adjustment <- sqrt((N - K) / (N - n - K + 1)) + fhs_output$se <- fhs_output$se / adjustment + fhs_output$cov.scaled <- fhs_output$cov.scaled / (adjustment^2) + } + + return(fhs_output) +} + diff --git a/R/PrepareModelFormula.R b/R/PrepareModelFormula.R index c730f3e9..9336abe6 100644 --- a/R/PrepareModelFormula.R +++ b/R/PrepareModelFormula.R @@ -79,7 +79,7 @@ PrepareModelFormula <- function(estimator, outcomevar, PrepareModelFormulaFEOLS <- function(outcomevar, str_policy_vars, - controls = NULL, proxy = NULL, proxyIV = NULL, + controls = NULL, idvar = NULL, timevar = NULL, FE = FALSE, TFE = FALSE) { stopifnot(!is.null(idvar)) stopifnot(!is.null(timevar)) @@ -112,3 +112,51 @@ PrepareModelFormulaFEOLS <- function(outcomevar, str_policy_vars, } return(formula) } + +PrepareModelFormulaFEOLS_FHS <- function(outcomevar, str_policy_vars, + controls = NULL, proxy = NULL, proxyIV = NULL, + idvar = NULL, timevar = NULL, FE = FALSE, TFE = FALSE) { + stopifnot(!is.null(idvar)) + stopifnot(!is.null(timevar)) + stopifnot(!is.null(proxy)) + stopifnot(!is.null(proxyIV)) + + exogenous <- c(str_policy_vars, controls) + exogenous <- exogenous[exogenous != proxy] + exogenous <- exogenous[exogenous != proxyIV] + + if (FE | TFE) { + fes <- c() + if (FE) { + fes <- c(fes, idvar) + } + if (TFE) { + fes <- c(fes, timevar) + } + + formula_str <- paste( + outcomevar, + "~", + paste(exogenous, collapse = " + "), + "|", + paste(fes, collapse = " + "), + "|", + proxy, + "~", + paste(c(exogenous, proxyIV), collapse = " + ") + ) + formula <- stats::as.formula(formula_str) + } else { + formula_str <- paste( + outcomevar, + "~", + paste(exogenous, collapse = " + "), + "|", + proxy, + "~", + paste(c(exogenous, proxyIV), collapse = " + ") + ) + formula <- stats::as.formula(formula_str) + } + return(formula) +} diff --git a/tests/testthat/test-EventStudy.R b/tests/testthat/test-EventStudy.R index bd6fba6b..c195fca6 100644 --- a/tests/testthat/test-EventStudy.R +++ b/tests/testthat/test-EventStudy.R @@ -597,6 +597,52 @@ test_that("proxyIV selection works", { ) }) +test_that("feols_FHS yields the same results as FHS", { + + # Run FHS estimator + fhs_result <- EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_base", + policyvar = "z", idvar = "id", timevar = "t", + controls = "x_r", FE = TRUE, TFE = TRUE, proxy = "eta_m", + post = 2, pre = 1, overidpre = 2, overidpost = 3, + normalize = -2, cluster = TRUE) + + # Run feols_FHS estimator with same parameters + feols_fhs_result <- EventStudy(estimator = "feols_FHS", data = example_data, outcomevar = "y_base", + policyvar = "z", idvar = "id", timevar = "t", + controls = "x_r", FE = TRUE, TFE = TRUE, proxy = "eta_m", + post = 2, pre = 1, overidpre = 2, overidpost = 3, + normalize = -2, cluster = TRUE) + + # Extract coefficients + fhs_coefs <- coef(fhs_result$output) + feols_fhs_coefs <- coef(feols_fhs_result$output) + + # Extract standard errors + fhs_se <- fhs_result$output$std.error + feols_fhs_se <- fixest::se(feols_fhs_result$output) + + # For FHS, coefficients include the endogenous variable "eta_m" + # For feols_FHS, coefficients include "fit_eta_m" (fitted endogenous) and the event study vars + # The event study coefficients should match + fhs_event_coefs <- fhs_coefs[names(fhs_coefs) != "eta_m"] + feols_fhs_event_coefs <- feols_fhs_coefs[names(feols_fhs_coefs) != "fit_eta_m"] + + # Check that event study coefficients are the same (within tolerance) + expect_equal(fhs_event_coefs, feols_fhs_event_coefs, tolerance = 1e-10) + + # Check that the endogenous variable coefficients are the same + # (FHS reports coef of eta_m, feols_FHS reports coef of fit_eta_m) + expect_equal(unname(fhs_coefs["eta_m"]), unname(feols_fhs_coefs["fit_eta_m"]), tolerance = 1e-10) + + # Check that standard errors for event study vars are the same (within reasonable tolerance) + fhs_event_se <- fhs_se[names(fhs_se) != "eta_m"] + feols_fhs_event_se <- feols_fhs_se[names(feols_fhs_se) != "fit_eta_m"] + expect_equal(fhs_event_se, feols_fhs_event_se, tolerance = 1e-6) + + # Check that SE for endogenous variable is the same + expect_equal(unname(fhs_se["eta_m"]), unname(feols_fhs_se["fit_eta_m"]), tolerance = 1e-6) +}) + test_that("warning with correct normalize and pre is thrown when anticpation effects are allowed and anticipation_effects_normalization is TRUE", { expect_warning( From f059cae2e4fe49f999463a5630a11e07b3da6bad Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Fri, 3 Oct 2025 14:23:30 -0400 Subject: [PATCH 20/67] #42 bd test fhs --- issue/eyeball_fhs.r | 76 +++++++++++++++++++++++++++++ tests/testthat/test-EventStudyFHS.R | 54 ++++++++++++++++++++ 2 files changed, 130 insertions(+) create mode 100644 issue/eyeball_fhs.r diff --git a/issue/eyeball_fhs.r b/issue/eyeball_fhs.r new file mode 100644 index 00000000..da038972 --- /dev/null +++ b/issue/eyeball_fhs.r @@ -0,0 +1,76 @@ +library(haven) +library(data.table) +library(tidyverse) +devtools::load_all() + +indir <- 'examples/source/raw/eventstudy_illustration_data/orig' +data <- read_dta(sprintf('%s/simulation_data_dynamic.dta', indir)) |> as.data.table() +outcomevar <- "y_base" +policyvar <- "z" +idvar <- "id" +timevar <- "t" +controls <- "x_r" +proxy <- "eta_m" +proxyIV <- NULL +FE <- TRUE +TFE <- TRUE +post <- 2 +pre <- 2 +overidpost <- 1 +overidpre <- post + pre +normalize <- -1 * (pre + 1) +cluster <- TRUE +anticipation_effects_normalization <- TRUE +allow_duplicate_id <- FALSE +avoid_internal_copy <- FALSE + + +output_feols_fhs <- EventStudy( + estimator = 'feols_FHS', + data, + outcomevar, + policyvar, + idvar, + timevar, + controls = controls, + proxy = proxy, + proxyIV = proxyIV, + FE = FE, + TFE = TFE, + post, + overidpost = overidpost, + pre = pre, + overidpre = post + pre, + normalize = -1 * (pre + 1), + cluster = cluster, + anticipation_effects_normalization = anticipation_effects_normalization, + allow_duplicate_id = allow_duplicate_id, + avoid_internal_copy = avoid_internal_copy +) + +output_fhs <- EventStudy( + estimator = 'FHS', + data, + outcomevar, + policyvar, + idvar, + timevar, + controls = controls, + proxy = proxy, + proxyIV = proxyIV, + FE = FE, + TFE = TFE, + post, + overidpost = overidpost, + pre = pre, + overidpre = post + pre, + normalize = -1 * (pre + 1), + cluster = cluster, + anticipation_effects_normalization = anticipation_effects_normalization, + allow_duplicate_id = allow_duplicate_id, + avoid_internal_copy = avoid_internal_copy +) + + +EventStudyPlot(output_fhs) +EventStudyPlot(output_feols_fhs) diff --git a/tests/testthat/test-EventStudyFHS.R b/tests/testthat/test-EventStudyFHS.R index 3aac45ef..07d707a0 100644 --- a/tests/testthat/test-EventStudyFHS.R +++ b/tests/testthat/test-EventStudyFHS.R @@ -311,6 +311,60 @@ test_that("FE = FALSE, }) +test_that("feols_FHS Coefficients and Standard Errors agree with base STATA", { + + bools <- c("TTT", "TFT", "FTF", "FTT", "FFF", "FFT") + + for (i in 1:length(bools)) { + bool <- bools[i] + estimator <- "feols_FHS" + outcomevar <- "y_base" + str_policy_vars <- c("z_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") + controls <- "x_r" + proxy <- "eta_m" + proxyIV <- "z_fd_lead3" + + idvar <- "id" + timevar <- "t" + + FE <- as.logical(substring(bool, 1, 1)) + TFE <- as.logical(substring(bool, 2, 2)) + cluster <- as.logical(substring(bool, 3, 3)) + + # Prepare the model formula for feols_FHS + formula <- PrepareModelFormulaFEOLS_FHS(outcomevar, str_policy_vars, + controls, proxy, proxyIV, + idvar, timevar, FE, TFE) + + reg <- EventStudyFEOLS_FHS(formula, df_EventStudyFHS_example, idvar, timevar, FE, TFE, cluster) + + df_test_STATA <- read.csv("./input/df_test_base_STATA_FHS.csv") + epsilon <- 10e-6 + epsilon_se <- 10e-2 # More lenient tolerance for standard errors + + # For feols_FHS, the endogenous variable coefficient is "fit_eta_m" instead of "eta_m" + # But STATA reports "eta_m", and from the comparison test we know fit_eta_m should equal eta_m + + expect_equal(unname(coef(reg)["z_fd"]), df_test_STATA[df_test_STATA[1] == "z_fd",][[2*i]], tolerance = epsilon) + expect_equal(unname(coef(reg)["z_fd_lead2"]), df_test_STATA[df_test_STATA[1] == "z_fd_lead2",][[2*i]], tolerance = epsilon) + expect_equal(unname(coef(reg)["fit_eta_m"]), df_test_STATA[df_test_STATA[1] == "eta_m",][[2*i]], tolerance = epsilon) + expect_equal(unname(coef(reg)["z_fd_lag1"]), df_test_STATA[df_test_STATA[1] == "z_fd_lag1",][[2*i]], tolerance = epsilon) + expect_equal(unname(coef(reg)["z_fd_lag2"]), df_test_STATA[df_test_STATA[1] == "z_fd_lag2",][[2*i]], tolerance = epsilon) + expect_equal(unname(coef(reg)["z_lead3"]), df_test_STATA[df_test_STATA[1] == "z_lead3",][[2*i]]*(-1), tolerance = epsilon) + expect_equal(unname(coef(reg)["z_lag3"]), df_test_STATA[df_test_STATA[1] == "z_lag3",][[2*i]], tolerance = epsilon) + expect_equal(unname(coef(reg)["x_r"]), df_test_STATA[df_test_STATA[1] == "x_r",][[2*i]], tolerance = epsilon) + + expect_equal(unname(fixest::se(reg)["z_fd"]), df_test_STATA[df_test_STATA[1] == "z_fd",][[2*i+1]], tolerance = epsilon_se) + expect_equal(unname(fixest::se(reg)["z_fd_lead2"]), df_test_STATA[df_test_STATA[1] == "z_fd_lead2",][[2*i+1]], tolerance = epsilon_se) + expect_equal(unname(fixest::se(reg)["fit_eta_m"]), df_test_STATA[df_test_STATA[1] == "eta_m",][[2*i+1]], tolerance = epsilon_se) + expect_equal(unname(fixest::se(reg)["z_fd_lag1"]), df_test_STATA[df_test_STATA[1] == "z_fd_lag1",][[2*i+1]], tolerance = epsilon_se) + expect_equal(unname(fixest::se(reg)["z_fd_lag2"]), df_test_STATA[df_test_STATA[1] == "z_fd_lag2",][[2*i+1]], tolerance = epsilon_se) + expect_equal(unname(fixest::se(reg)["z_lead3"]), df_test_STATA[df_test_STATA[1] == "z_lead3",][[2*i+1]], tolerance = epsilon_se) + expect_equal(unname(fixest::se(reg)["z_lag3"]), df_test_STATA[df_test_STATA[1] == "z_lag3",][[2*i+1]], tolerance = epsilon_se) + expect_equal(unname(fixest::se(reg)["x_r"]), df_test_STATA[df_test_STATA[1] == "x_r",][[2*i+1]], tolerance = epsilon_se) + } +}) + test_that("Coefficients and Standard Errors agree with base STATA", { bools <- c("TTT", "TFT", "FTF", "FTT", "FFF", "FFT") From d0d237d2f2eaee8993334a2ebc67dd216d639988 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Fri, 3 Oct 2025 14:23:59 -0400 Subject: [PATCH 21/67] #42 fx bug in existing code --- tests/testthat/test-EventStudyFHS.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-EventStudyFHS.R b/tests/testthat/test-EventStudyFHS.R index 07d707a0..d8faf55b 100644 --- a/tests/testthat/test-EventStudyFHS.R +++ b/tests/testthat/test-EventStudyFHS.R @@ -369,7 +369,7 @@ test_that("Coefficients and Standard Errors agree with base STATA", { bools <- c("TTT", "TFT", "FTF", "FTT", "FFF", "FFT") - for (i in length(bools)) { + for (i in 1:length(bools)) { bool <- bools[i] estimator <- "FHS" outcomevar <- "y_base" From e430297a32aa62335719ffa3d3399733f60f22f6 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Fri, 3 Oct 2025 14:37:36 -0400 Subject: [PATCH 22/67] #42 bd loop for tests --- tests/testthat/test-EventStudyFEOLS.R | 44 +++++++++++++++---------- tests/testthat/test-EventStudyFHS.R | 46 +++++++++++++++------------ 2 files changed, 53 insertions(+), 37 deletions(-) diff --git a/tests/testthat/test-EventStudyFEOLS.R b/tests/testthat/test-EventStudyFEOLS.R index 7a3c2602..d3b82390 100644 --- a/tests/testthat/test-EventStudyFEOLS.R +++ b/tests/testthat/test-EventStudyFEOLS.R @@ -352,23 +352,33 @@ test_that("Coefficients and Standard Errors agree with STATA", { se_feols <- fixest::se(reg) epsilon <- 1e-6 + epsilon_se <- 2e-2 + + # Define coefficient mappings: R_name -> STATA_term + coef_mappings <- list( + "z_fd" = "zfd", + "z_fd_lead2" = "F2.zfd", + "z_fd_lead3" = "F3.zfd", + "z_fd_lag1" = "L.zfd", + "z_fd_lag2" = "L2.zfd", + "z_lead3" = "F3.z", + "z_lag3" = "L3.z", + "x_r" = "x_r" + ) + + # Test coefficients + for (r_name in names(coef_mappings)) { + stata_term <- coef_mappings[[r_name]] + expected <- df_test_STATA[df_test_STATA["term"] == stata_term, "coef"] + if (r_name == "z_lead3") expected <- -1 * expected # STATA sign convention + expect_equal(unname(coef_feols[r_name]), expected, tolerance = epsilon) + } - expect_equal(unname(coef_feols["z_fd"]), df_test_STATA[df_test_STATA["term"] == "zfd",][[2]], tolerance = epsilon) - expect_equal(unname(coef_feols["z_fd_lead2"]), df_test_STATA[df_test_STATA["term"] == "F2.zfd",][[2]], tolerance = epsilon) - expect_equal(unname(coef_feols["z_fd_lead3"]), df_test_STATA[df_test_STATA["term"] == "F3.zfd",][["coef"]][1], tolerance = epsilon) - expect_equal(unname(coef_feols["z_fd_lag1"]), df_test_STATA[df_test_STATA["term"] == "L.zfd",][[2]], tolerance = epsilon) - expect_equal(unname(coef_feols["z_fd_lag2"]), df_test_STATA[df_test_STATA["term"] == "L2.zfd",][[2]], tolerance = epsilon) - expect_equal(unname(coef_feols["z_lead3"]), -1 * df_test_STATA[df_test_STATA["term"] == "F3.z",][["coef"]], tolerance = epsilon) - expect_equal(unname(coef_feols["z_lag3"]), df_test_STATA[df_test_STATA["term"] == "L3.z",][[2]], tolerance = epsilon) - expect_equal(unname(coef_feols["x_r"]), df_test_STATA[df_test_STATA["term"] == "x_r",][[2]], tolerance = epsilon) - - expect_equal(unname(se_feols["z_fd"]), df_test_STATA[df_test_STATA["term"] == "zfd",][[3]], tolerance = 2e-2) - expect_equal(unname(se_feols["z_fd_lead2"]), df_test_STATA[df_test_STATA["term"] == "F2.zfd",][[3]], tolerance = 2e-2) - expect_equal(unname(se_feols["z_fd_lead3"]), df_test_STATA[df_test_STATA["term"] == "F3.zfd",][["std_error"]][1], tolerance = 2e-2) - expect_equal(unname(se_feols["z_fd_lag1"]), df_test_STATA[df_test_STATA["term"] == "L.zfd",][[3]], tolerance = 2e-2) - expect_equal(unname(se_feols["z_fd_lag2"]), df_test_STATA[df_test_STATA["term"] == "L2.zfd",][[3]], tolerance = 2e-2) - expect_equal(unname(se_feols["z_lead3"]), df_test_STATA[df_test_STATA["term"] == "F3.z",][["std_error"]], tolerance = 2e-2) - expect_equal(unname(se_feols["z_lag3"]), df_test_STATA[df_test_STATA["term"] == "L3.z",][[3]], tolerance = 2e-2) - expect_equal(unname(se_feols["x_r"]), df_test_STATA[df_test_STATA["term"] == "x_r",][[3]], tolerance = 2e-2) + # Test standard errors + for (r_name in names(coef_mappings)) { + stata_term <- coef_mappings[[r_name]] + expected <- df_test_STATA[df_test_STATA["term"] == stata_term, "std_error"] + expect_equal(unname(se_feols[r_name]), expected, tolerance = epsilon_se) + } }) diff --git a/tests/testthat/test-EventStudyFHS.R b/tests/testthat/test-EventStudyFHS.R index d8faf55b..c9ae6255 100644 --- a/tests/testthat/test-EventStudyFHS.R +++ b/tests/testthat/test-EventStudyFHS.R @@ -342,26 +342,32 @@ test_that("feols_FHS Coefficients and Standard Errors agree with base STATA", { epsilon <- 10e-6 epsilon_se <- 10e-2 # More lenient tolerance for standard errors - # For feols_FHS, the endogenous variable coefficient is "fit_eta_m" instead of "eta_m" - # But STATA reports "eta_m", and from the comparison test we know fit_eta_m should equal eta_m - - expect_equal(unname(coef(reg)["z_fd"]), df_test_STATA[df_test_STATA[1] == "z_fd",][[2*i]], tolerance = epsilon) - expect_equal(unname(coef(reg)["z_fd_lead2"]), df_test_STATA[df_test_STATA[1] == "z_fd_lead2",][[2*i]], tolerance = epsilon) - expect_equal(unname(coef(reg)["fit_eta_m"]), df_test_STATA[df_test_STATA[1] == "eta_m",][[2*i]], tolerance = epsilon) - expect_equal(unname(coef(reg)["z_fd_lag1"]), df_test_STATA[df_test_STATA[1] == "z_fd_lag1",][[2*i]], tolerance = epsilon) - expect_equal(unname(coef(reg)["z_fd_lag2"]), df_test_STATA[df_test_STATA[1] == "z_fd_lag2",][[2*i]], tolerance = epsilon) - expect_equal(unname(coef(reg)["z_lead3"]), df_test_STATA[df_test_STATA[1] == "z_lead3",][[2*i]]*(-1), tolerance = epsilon) - expect_equal(unname(coef(reg)["z_lag3"]), df_test_STATA[df_test_STATA[1] == "z_lag3",][[2*i]], tolerance = epsilon) - expect_equal(unname(coef(reg)["x_r"]), df_test_STATA[df_test_STATA[1] == "x_r",][[2*i]], tolerance = epsilon) - - expect_equal(unname(fixest::se(reg)["z_fd"]), df_test_STATA[df_test_STATA[1] == "z_fd",][[2*i+1]], tolerance = epsilon_se) - expect_equal(unname(fixest::se(reg)["z_fd_lead2"]), df_test_STATA[df_test_STATA[1] == "z_fd_lead2",][[2*i+1]], tolerance = epsilon_se) - expect_equal(unname(fixest::se(reg)["fit_eta_m"]), df_test_STATA[df_test_STATA[1] == "eta_m",][[2*i+1]], tolerance = epsilon_se) - expect_equal(unname(fixest::se(reg)["z_fd_lag1"]), df_test_STATA[df_test_STATA[1] == "z_fd_lag1",][[2*i+1]], tolerance = epsilon_se) - expect_equal(unname(fixest::se(reg)["z_fd_lag2"]), df_test_STATA[df_test_STATA[1] == "z_fd_lag2",][[2*i+1]], tolerance = epsilon_se) - expect_equal(unname(fixest::se(reg)["z_lead3"]), df_test_STATA[df_test_STATA[1] == "z_lead3",][[2*i+1]], tolerance = epsilon_se) - expect_equal(unname(fixest::se(reg)["z_lag3"]), df_test_STATA[df_test_STATA[1] == "z_lag3",][[2*i+1]], tolerance = epsilon_se) - expect_equal(unname(fixest::se(reg)["x_r"]), df_test_STATA[df_test_STATA[1] == "x_r",][[2*i+1]], tolerance = epsilon_se) + # Define coefficient mappings: R_name -> STATA_name + coef_mappings <- list( + "z_fd" = "z_fd", + "z_fd_lead2" = "z_fd_lead2", + "fit_eta_m" = "eta_m", + "z_fd_lag1" = "z_fd_lag1", + "z_fd_lag2" = "z_fd_lag2", + "z_lead3" = "z_lead3", + "z_lag3" = "z_lag3", + "x_r" = "x_r" + ) + + # Test coefficients + for (r_name in names(coef_mappings)) { + stata_name <- coef_mappings[[r_name]] + expected <- df_test_STATA[df_test_STATA[1] == stata_name,][[2*i]] + if (r_name == "z_lead3") expected <- expected * (-1) # STATA sign convention + expect_equal(unname(coef(reg)[r_name]), expected, tolerance = epsilon) + } + + # Test standard errors + for (r_name in names(coef_mappings)) { + stata_name <- coef_mappings[[r_name]] + expected <- df_test_STATA[df_test_STATA[1] == stata_name,][[2*i+1]] + expect_equal(unname(fixest::se(reg)[r_name]), expected, tolerance = epsilon_se) + } } }) From 3fae492335d381d291133f38a957eb6baf5dff4e Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Fri, 3 Oct 2025 14:43:50 -0400 Subject: [PATCH 23/67] #42 bd rename --- R/EventStudy.R | 2 +- issue/{testing.r => eyeball_ols.r} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename issue/{testing.r => eyeball_ols.r} (100%) diff --git a/R/EventStudy.R b/R/EventStudy.R index 9ae41949..96887e11 100644 --- a/R/EventStudy.R +++ b/R/EventStudy.R @@ -226,7 +226,7 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c detect_holes <- function(dt, idvar, timevar) { holes_per_id <- dt[, .SD[!is.na(base::get(timevar))], by = c(idvar) - ][, list(holes = any(base::diff(base::get(timevar)) != 1)), + ][, list(holes = any(base::diff(base::get(timevar)) != 1)), by = c(idvar)] return(any(holes_per_id$holes)) diff --git a/issue/testing.r b/issue/eyeball_ols.r similarity index 100% rename from issue/testing.r rename to issue/eyeball_ols.r From a0166003eb7a92c17e9338f30f57e80a336f51bb Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Fri, 3 Oct 2025 14:53:08 -0400 Subject: [PATCH 24/67] #42 revise test --- tests/testthat/test-EventStudy.R | 63 +++++++++----------------------- 1 file changed, 18 insertions(+), 45 deletions(-) diff --git a/tests/testthat/test-EventStudy.R b/tests/testthat/test-EventStudy.R index c195fca6..9a8b9597 100644 --- a/tests/testthat/test-EventStudy.R +++ b/tests/testthat/test-EventStudy.R @@ -298,58 +298,31 @@ test_that("removes the correct column when normalize = post + overidpost", { # feols --------------------------------------------------------------------- -test_that("feols estimator FE works", { - - outputs <- suppressWarnings( - EventStudy(estimator = "feols", data = example_data, outcomevar = "y_base", - policyvar = "z", idvar = "id", timevar = "t", - controls = "x_r", FE = TRUE, TFE = TRUE, - post = 2, pre = 3, overidpre = 4, - overidpost = 11, normalize = - 1, - cluster = TRUE, anticipation_effects_normalization = TRUE) - ) - - expect_true(class(outputs$output) == "fixest") - expect_true(all(outputs$output$fixef_vars == c("id", "t"))) -}) - -test_that("feols estimator with no fixed effects works", { - - outputs <- EventStudy(estimator = "feols", data = example_data, outcomevar = "y_base", - policyvar = "z", idvar = "id", timevar = "t", - controls = "x_r", FE = FALSE, TFE = FALSE, - post = 2, pre = 3, overidpre = 4, - overidpost = 11, normalize = - 1, - cluster = TRUE, anticipation_effects_normalization = TRUE) - - expect_true(class(outputs$output) == "fixest") - expect_true(is.null(outputs$output$fixef_vars)) -}) +test_that("tests that package and STATA output agree when post, overidpost, pre, overidpre are zero", { -test_that("feols estimator with only time fixed effects works", { + post <- 0 + pre <- 0 + overidpre <- 0 + overidpost <- 0 + normalize <- -1 outputs <- EventStudy(estimator = "feols", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", - controls = "x_r", FE = FALSE, TFE = TRUE, - post = 2, pre = 3, overidpre = 4, - overidpost = 11, normalize = - 1, - cluster = TRUE, anticipation_effects_normalization = TRUE) - - expect_true(class(outputs$output) == "fixest") - expect_true(outputs$output$fixef_vars == "t") -}) + FE = TRUE, TFE = TRUE, + post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, + normalize = normalize, cluster = TRUE, anticipation_effects_normalization = TRUE) -test_that("feols estimator with only unit FE works", { + coef_package <- coef(outputs$output)[[1]] + std_package <- fixest::se(outputs$output)[[1]] - outputs <- EventStudy(estimator = "feols", data = example_data, outcomevar = "y_base", - policyvar = "z", idvar = "id", timevar = "t", - controls = "x_r", FE = TRUE, TFE = FALSE, - post = 2, pre = 3, overidpre = 4, - overidpost = 11, normalize = - 1, - cluster = TRUE, anticipation_effects_normalization = TRUE) + STATA_output <- read.csv('./input/df_test_base_STATA_allzero.csv') + coef_STATA <- STATA_output$coef[[1]] + std_STATA <- STATA_output$std_error[[1]] - expect_true(class(outputs$output) == "fixest") - expect_true(outputs$output$fixef_vars == "id") + epsilon <- 10e-7 + epsilon_se <- 2e-2 + expect_equal(coef_package, coef_STATA, tolerance = epsilon) + expect_equal(std_package, std_STATA, tolerance = epsilon_se) }) test_that("feols estimator coefficients match OLS coefficients", { From bb15d792cbfd7c3b6d1aa552dba2c760f2637d03 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Wed, 15 Oct 2025 15:03:56 -0400 Subject: [PATCH 25/67] #57 fx bug in AddSuptBand.R Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- R/AddSuptBand.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/AddSuptBand.R b/R/AddSuptBand.R index 95375415..66eb8bd9 100644 --- a/R/AddSuptBand.R +++ b/R/AddSuptBand.R @@ -77,7 +77,7 @@ AddSuptBand <- function(model_estimates, num_sim = 1000, conf_level = .95, event critical_value = t[floor(conf_level_num_sim) + 1] } - df_estimates_tidy <- if(fixest){estimatr::tidy(model_estimates)} else {broom::tidy(model_estimates)} + df_estimates_tidy <- if(fixest){broom::tidy(model_estimates)} else {estimatr::tidy(model_estimates)} df_estimates_tidy["suptband_lower"] <- df_estimates_tidy$estimate - (critical_value * df_estimates_tidy$std.error) df_estimates_tidy["suptband_upper"] <- df_estimates_tidy$estimate + (critical_value * df_estimates_tidy$std.error) From f8c442118fea3576db09a8ab787112c70766e332 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Tue, 21 Oct 2025 17:15:48 -0400 Subject: [PATCH 26/67] #42 bd explicitly specify clustered vcov --- R/EventStudyOLS.R | 16 ++++++++++++---- tests/testthat/test-EventStudy.R | 3 +-- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/R/EventStudyOLS.R b/R/EventStudyOLS.R index b0392243..62119e40 100644 --- a/R/EventStudyOLS.R +++ b/R/EventStudyOLS.R @@ -127,12 +127,16 @@ EventStudyOLS <- function(prepared_model_formula, prepared_data, EventStudyFEOLS <- function(formula, prepared_data, idvar, timevar, FE, TFE, cluster) { - cluster_var = if(cluster) idvar else NULL + if (cluster) { + vcov_spec <- as.formula(paste0("~", idvar)) + } else { + vcov_spec <- "iid" + } ols_output <- fixest::feols( fml = formula, data = prepared_data, - cluster = cluster_var + vcov = vcov_spec ) return(ols_output) } @@ -140,12 +144,16 @@ EventStudyFEOLS <- function(formula, prepared_data, EventStudyFEOLS_FHS <- function(formula, prepared_data, idvar, timevar, FE, TFE, cluster) { - cluster_var = if(cluster) idvar else NULL + if (cluster) { + vcov_spec <- as.formula(paste0("~", idvar)) + } else { + vcov_spec <- "iid" + } fhs_output <- fixest::feols( fml = formula, data = prepared_data, - cluster = cluster_var + vcov = vcov_spec ) # Apply the same standard error adjustments as EventStudyFHS diff --git a/tests/testthat/test-EventStudy.R b/tests/testthat/test-EventStudy.R index 9a8b9597..9795d39d 100644 --- a/tests/testthat/test-EventStudy.R +++ b/tests/testthat/test-EventStudy.R @@ -320,9 +320,8 @@ test_that("tests that package and STATA output agree when post, overidpost, pre, std_STATA <- STATA_output$std_error[[1]] epsilon <- 10e-7 - epsilon_se <- 2e-2 expect_equal(coef_package, coef_STATA, tolerance = epsilon) - expect_equal(std_package, std_STATA, tolerance = epsilon_se) + expect_equal(std_package, std_STATA, tolerance = epsilon) }) test_that("feols estimator coefficients match OLS coefficients", { From 2355c6d016133cdd04a597d6046ca673b8399f42 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Tue, 21 Oct 2025 17:25:25 -0400 Subject: [PATCH 27/67] #42 fx small sample correction to have SE line up w/ STATA --- R/EventStudyOLS.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/R/EventStudyOLS.R b/R/EventStudyOLS.R index 62119e40..6f587089 100644 --- a/R/EventStudyOLS.R +++ b/R/EventStudyOLS.R @@ -128,15 +128,18 @@ EventStudyFEOLS <- function(formula, prepared_data, idvar, timevar, FE, TFE, cluster) { if (cluster) { - vcov_spec <- as.formula(paste0("~", idvar)) + vcov_fixest <- as.formula(paste0("~", idvar)) + small_sample_correction <- fixest::ssc(K.fixef = "full") } else { vcov_spec <- "iid" + ssc_spec <- fixest::ssc() } ols_output <- fixest::feols( fml = formula, data = prepared_data, - vcov = vcov_spec + vcov = vcov_spec, + ssc = ssc_spec ) return(ols_output) } From 482c1a47375aed31de82e46d9172f3dff00245f9 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Tue, 21 Oct 2025 17:29:44 -0400 Subject: [PATCH 28/67] #42 cl style Co-authored-by: Santiago Hermo <45404755+santiagohermo@users.noreply.github.com> --- R/EventStudy.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/EventStudy.R b/R/EventStudy.R index 96887e11..3968572b 100644 --- a/R/EventStudy.R +++ b/R/EventStudy.R @@ -148,7 +148,7 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c allow_duplicate_id = FALSE, avoid_internal_copy = FALSE) { # Check for errors in arguments - if (! estimator %in% c("OLS", 'feols', "FHS", "feols_FHS")) {stop("estimator should be either 'OLS', 'feols', 'FHS', or 'feols_FHS'.")} + if (! estimator %in% c("OLS", "feols", "FHS", "feols_FHS")) {stop("estimator should be either 'OLS', 'feols', 'FHS', or 'feols_FHS'.")} if (! is.data.frame(data)) {stop("data should be a data frame.")} for (var in c(idvar, timevar, outcomevar, policyvar)) { if ((! is.character(var))) { From 7f02fca8112959467c10f8692484ec23e90d332c Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Tue, 21 Oct 2025 23:23:49 -0400 Subject: [PATCH 29/67] #42 fx bug --- R/EventStudyOLS.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/R/EventStudyOLS.R b/R/EventStudyOLS.R index 6f587089..b40ccadd 100644 --- a/R/EventStudyOLS.R +++ b/R/EventStudyOLS.R @@ -131,15 +131,15 @@ EventStudyFEOLS <- function(formula, prepared_data, vcov_fixest <- as.formula(paste0("~", idvar)) small_sample_correction <- fixest::ssc(K.fixef = "full") } else { - vcov_spec <- "iid" - ssc_spec <- fixest::ssc() + vcov_fixest <- "iid" + small_sample_correction <- fixest::ssc() } ols_output <- fixest::feols( fml = formula, data = prepared_data, - vcov = vcov_spec, - ssc = ssc_spec + vcov = vcov_fixest, + ssc = small_sample_correction ) return(ols_output) } @@ -148,15 +148,15 @@ EventStudyFEOLS_FHS <- function(formula, prepared_data, idvar, timevar, FE, TFE, cluster) { if (cluster) { - vcov_spec <- as.formula(paste0("~", idvar)) + vcov_fixest <- as.formula(paste0("~", idvar)) } else { - vcov_spec <- "iid" + vcov_fixest <- "iid" } fhs_output <- fixest::feols( fml = formula, data = prepared_data, - vcov = vcov_spec + vcov = vcov_fixest ) # Apply the same standard error adjustments as EventStudyFHS From a0f32c50f41f779edfd98da7c9cd82ea4be2374d Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Tue, 21 Oct 2025 23:45:14 -0400 Subject: [PATCH 30/67] #42 bd add kernel argument --- R/EventStudy.R | 15 +++++++++------ issue/eyeball_fhs.r | 3 ++- issue/eyeball_ols.r | 11 ++++++----- tests/testthat/test-EventStudy.R | 6 +++--- 4 files changed, 20 insertions(+), 15 deletions(-) diff --git a/R/EventStudy.R b/R/EventStudy.R index 3968572b..f4fd0bb9 100644 --- a/R/EventStudy.R +++ b/R/EventStudy.R @@ -3,6 +3,7 @@ #' @description `EventStudy` uses regression methods to estimate the effect of a policy on a given outcome. #' #' @param estimator Accepts one of "OLS" or "FHS". If "OLS" is specified, implements Ordinary Least Squares. If "FHS" is specified, implements Instrumental Variables (IV) estimator proposed in [Freyaldenhoven Hansen Shapiro (FHS, 2019)](https://www.aeaweb.org/articles?id=10.1257/aer.20180609). +#' @param kernel Accepts one of "estimatr" or "fixest". If "estimatr" is specified, uses the estimatr package for estimation. If "fixest" is specified, uses the fixest package for estimation. Defaults to "estimatr". #' @param data Data frame containing the variables of interest. #' @param outcomevar Character indicating column of outcome variable y. #' @param policyvar Character indicating column of policy variable z. @@ -145,10 +146,11 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, controls = NULL, proxy = NULL, proxyIV = NULL, FE = TRUE, TFE = TRUE, post, overidpost = 1, pre, overidpre = post + pre, normalize = -1 * (pre + 1), cluster = TRUE, anticipation_effects_normalization = TRUE, - allow_duplicate_id = FALSE, avoid_internal_copy = FALSE) { + allow_duplicate_id = FALSE, avoid_internal_copy = FALSE, kernel = "estimatr") { # Check for errors in arguments - if (! estimator %in% c("OLS", "feols", "FHS", "feols_FHS")) {stop("estimator should be either 'OLS', 'feols', 'FHS', or 'feols_FHS'.")} + if (! estimator %in% c("OLS", "FHS")) {stop("estimator should be either 'OLS' or 'FHS'.")} + if (! kernel %in% c("estimatr", "fixest")) {stop("kernel should be either 'estimatr' or 'fixest'.")} if (! is.data.frame(data)) {stop("data should be a data frame.")} for (var in c(idvar, timevar, outcomevar, policyvar)) { if ((! is.character(var))) { @@ -335,19 +337,19 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c str_policy_vars <- str_policy_vars[!(str_policy_vars %in% normalization_column)] } - if (estimator == "OLS") { + if (estimator == "OLS" && kernel == "estimatr") { event_study_formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars, static, controls, proxy, proxyIV) output <- EventStudyOLS(event_study_formula, data, idvar, timevar, FE, TFE, cluster) coefficients <- str_policy_vars - } else if (estimator == "feols") { + } else if (estimator == "OLS" && kernel == "fixest") { formula <- PrepareModelFormulaFEOLS(outcomevar, str_policy_vars, controls, idvar, timevar, FE, TFE) output <- EventStudyFEOLS(formula, data, idvar, timevar, FE, TFE, cluster) coefficients <- str_policy_vars - } else if (estimator == "FHS") { + } else if (estimator == "FHS" && kernel == "estimatr") { if (is.null(proxyIV)) { Fstart <- 0 @@ -370,7 +372,7 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c output <- EventStudyFHS(event_study_formula, data, idvar, timevar, FE, TFE, cluster) coefficients <- dplyr::setdiff(str_policy_vars, proxyIV) - } else if (estimator == "feols_FHS") { + } else if (estimator == "FHS" && kernel == "fixest") { if (is.null(proxyIV)) { Fstart <- 0 @@ -413,6 +415,7 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c "normalize" = normalize, "normalization_column" = normalization_column, "cluster" = cluster, + "kernel" = kernel, "eventstudy_coefficients" = coefficients) return(list("output" = output, diff --git a/issue/eyeball_fhs.r b/issue/eyeball_fhs.r index da038972..8b0eb82a 100644 --- a/issue/eyeball_fhs.r +++ b/issue/eyeball_fhs.r @@ -26,7 +26,8 @@ avoid_internal_copy <- FALSE output_feols_fhs <- EventStudy( - estimator = 'feols_FHS', + estimator = 'FHS', + kernel = 'fixest', data, outcomevar, policyvar, diff --git a/issue/eyeball_ols.r b/issue/eyeball_ols.r index d2ad86f6..5288ea91 100644 --- a/issue/eyeball_ols.r +++ b/issue/eyeball_ols.r @@ -27,8 +27,9 @@ avoid_internal_copy <- FALSE kernel <- "estimatr" -output_feols <- EventStudy( - estimator = 'feols', +estimates_feols <- EventStudy( + estimator = 'OLS', + kernel = 'fixest', data, outcomevar, policyvar, @@ -49,7 +50,7 @@ output_feols <- EventStudy( avoid_internal_copy = FALSE ) -output <- EventStudy( +estimates <- EventStudy( estimator = 'OLS', data, outcomevar, @@ -72,5 +73,5 @@ output <- EventStudy( ) -EventStudyPlot(output) -EventStudyPlot(output_feols) +EventStudyPlot(estimates) +EventStudyPlot(estimates_feols) diff --git a/tests/testthat/test-EventStudy.R b/tests/testthat/test-EventStudy.R index 9795d39d..8a38a181 100644 --- a/tests/testthat/test-EventStudy.R +++ b/tests/testthat/test-EventStudy.R @@ -306,7 +306,7 @@ test_that("tests that package and STATA output agree when post, overidpost, pre, overidpost <- 0 normalize <- -1 - outputs <- EventStudy(estimator = "feols", data = example_data, outcomevar = "y_base", + outputs <- EventStudy(estimator = "OLS", kernel = "fixest", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -336,7 +336,7 @@ test_that("feols estimator coefficients match OLS coefficients", { ) outputs_feols <- suppressWarnings( - EventStudy(estimator = "feols", data = example_data, outcomevar = "y_base", + EventStudy(estimator = "OLS", kernel = "fixest", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = 2, pre = 3, overidpre = 4, @@ -579,7 +579,7 @@ test_that("feols_FHS yields the same results as FHS", { normalize = -2, cluster = TRUE) # Run feols_FHS estimator with same parameters - feols_fhs_result <- EventStudy(estimator = "feols_FHS", data = example_data, outcomevar = "y_base", + feols_fhs_result <- EventStudy(estimator = "FHS", kernel = "fixest", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, proxy = "eta_m", post = 2, pre = 1, overidpre = 2, overidpost = 3, From d48d480ead507fd0807f7651969a6fb57756e1df Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Tue, 21 Oct 2025 23:51:18 -0400 Subject: [PATCH 31/67] #42 bd add explicit argument kernel="estimatr" --- README.md | 1 + issue/eyeball_fhs.r | 1 + issue/eyeball_ols.r | 1 + tests/testthat/test-AddCIs.R | 12 ++--- tests/testthat/test-AddMeans.R | 4 +- tests/testthat/test-AddSuptBand.R | 4 +- tests/testthat/test-EventStudy.R | 54 +++++++++++------------ tests/testthat/test-EventStudyPlot.R | 24 +++++----- tests/testthat/test-PreparePlottingData.R | 16 +++---- tests/testthat/test-TestLinear.R | 10 ++--- vignettes/documentation.Rmd | 6 +-- 11 files changed, 68 insertions(+), 65 deletions(-) diff --git a/README.md b/README.md index 5d46caa9..fe928ee8 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,7 @@ set.seed(10) # for reproducibility of sup-t bands estimates_ols <- EventStudy( estimator = "OLS", + kernel = "fixest", data = example_data, # Use package sample data outcomevar = "y_smooth_m", policyvar = "z", diff --git a/issue/eyeball_fhs.r b/issue/eyeball_fhs.r index 8b0eb82a..3f4f14b5 100644 --- a/issue/eyeball_fhs.r +++ b/issue/eyeball_fhs.r @@ -51,6 +51,7 @@ output_feols_fhs <- EventStudy( output_fhs <- EventStudy( estimator = 'FHS', + kernel = 'estimatr', data, outcomevar, policyvar, diff --git a/issue/eyeball_ols.r b/issue/eyeball_ols.r index 5288ea91..662e923c 100644 --- a/issue/eyeball_ols.r +++ b/issue/eyeball_ols.r @@ -52,6 +52,7 @@ estimates_feols <- EventStudy( estimates <- EventStudy( estimator = 'OLS', + kernel = 'estimatr', data, outcomevar, policyvar, diff --git a/tests/testthat/test-AddCIs.R b/tests/testthat/test-AddCIs.R index 52ddcbdd..855292cb 100644 --- a/tests/testthat/test-AddCIs.R +++ b/tests/testthat/test-AddCIs.R @@ -16,7 +16,7 @@ test_that("correctly calculates conf_level at 0.95", { # OLS ------------------------------------------ test_that("correctly recognizes wrong class for estimate argument", { - estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + estimates <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) @@ -28,7 +28,7 @@ test_that("correctly recognizes wrong class for estimate argument", { }) test_that("correctly recognizes missing columns in estimates argument", { - estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + estimates <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) @@ -47,7 +47,7 @@ test_that("correctly recognizes missing columns in estimates argument", { }) test_that("correctly recognizes wrong inputs for conf_level argument", { - estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + estimates <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) @@ -65,7 +65,7 @@ test_that("correctly recognizes wrong inputs for conf_level argument", { test_that("correctly recognizes wrong class for estimate argument", { data <- example_data[, c("y_base", "z", "id", "t", "x_r", "eta_m")] - estimates <- EventStudy(estimator = "FHS", data = data, outcomevar = "y_base", policyvar = "z", idvar = "id", + estimates <- EventStudy(estimator = "FHS", kernel = "estimatr", data = data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", proxy = "eta_m", FE = TRUE, TFE = TRUE, post = 1, overidpost = 2, pre = 1, overidpre = 2, normalize = -1, cluster = TRUE, anticipation_effects_normalization = FALSE) @@ -76,7 +76,7 @@ test_that("correctly recognizes wrong class for estimate argument", { test_that("correctly recognizes missing columns in estimates argument", { data <- example_data[, c("y_base", "z", "id", "t", "x_r", "eta_m")] - estimates <- EventStudy(estimator = "FHS", data = data, outcomevar = "y_base", policyvar = "z", idvar = "id", + estimates <- EventStudy(estimator = "FHS", kernel = "estimatr", data = data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", proxy = "eta_m", FE = TRUE, TFE = TRUE, post = 1, overidpost = 2, pre = 1, overidpre = 2, normalize = -1, cluster = TRUE, anticipation_effects_normalization = FALSE) @@ -95,7 +95,7 @@ test_that("correctly recognizes missing columns in estimates argument", { test_that("correctly recognizes wrong inputs for conf_level argument", { data <- example_data[, c("y_base", "z", "id", "t", "x_r", "eta_m")] - estimates <- EventStudy(estimator = "FHS", data = data, outcomevar = "y_base", policyvar = "z", idvar = "id", + estimates <- EventStudy(estimator = "FHS", kernel = "estimatr", data = data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", proxy = "eta_m", FE = TRUE, TFE = TRUE, post = 1, overidpost = 2, pre = 1, overidpre = 2, normalize = -1, cluster = TRUE, anticipation_effects_normalization = FALSE) diff --git a/tests/testthat/test-AddMeans.R b/tests/testthat/test-AddMeans.R index b19d6e56..5a27cebf 100644 --- a/tests/testthat/test-AddMeans.R +++ b/tests/testthat/test-AddMeans.R @@ -1,6 +1,6 @@ test_that("means are computed correctly when a first differenced variable is normalized", { suppressWarnings( - sample_estimation <- EventStudy(estimator = "OLS", data = example_data, + sample_estimation <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", FE = TRUE, TFE = TRUE, post = 2, pre = 2, overidpre = 2, @@ -20,7 +20,7 @@ test_that("means are computed correctly when a first differenced variable is nor }) test_that("means are computed correctly when the furthest lead is normalized", { - sample_estimation <- EventStudy(estimator = "OLS", data = example_data, + sample_estimation <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", FE = TRUE, TFE = TRUE, post = 2, pre = 2, overidpre = 2, diff --git a/tests/testthat/test-AddSuptBand.R b/tests/testthat/test-AddSuptBand.R index fae6e338..36fa11d9 100644 --- a/tests/testthat/test-AddSuptBand.R +++ b/tests/testthat/test-AddSuptBand.R @@ -2,7 +2,7 @@ test_that("check that the correct part of vcov matrix is used", { suppressWarnings( - sample_estimation <- EventStudy(estimator = "OLS", data = example_data, + sample_estimation <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", FE = TRUE, control = "x_r", TFE = TRUE, post = 2, pre = 2, overidpre = 2, @@ -24,7 +24,7 @@ test_that("check that the correct part of vcov matrix is used", { test_that("check that the correct part of vcov matrix is used", { data <- example_data[, c("y_base", "z", "id", "t", "x_r", "eta_m")] suppressWarnings( - sample_estimation <- EventStudy(estimator = "FHS", data = data, outcomevar = "y_base", policyvar = "z", idvar = "id", + sample_estimation <- EventStudy(estimator = "FHS", kernel = "estimatr", data = data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", proxy = "eta_m", FE = TRUE, TFE = TRUE, post = 1, overidpost = 2, pre = 1, overidpre = 2, normalize = -1, cluster = TRUE, anticipation_effects_normalization = TRUE) ) diff --git a/tests/testthat/test-EventStudy.R b/tests/testthat/test-EventStudy.R index 8a38a181..f7ce6d3c 100644 --- a/tests/testthat/test-EventStudy.R +++ b/tests/testthat/test-EventStudy.R @@ -51,7 +51,7 @@ test_that("correctly creates highest order shiftvalues", { overidpost <- 11 outputs <- suppressWarnings( - EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, normalize = - 1, cluster = TRUE, anticipation_effects_normalization = TRUE) @@ -77,7 +77,7 @@ test_that("correctly throws an error when normalized coefficient is outside even overidpost <- 7 normalize <- 15 - expect_error(EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + expect_error(EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -92,7 +92,7 @@ test_that("throws an error when post + pre + overidpre + overidpost exceeds the overidpost <- 25 normalize <- 2 - expect_error(EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + expect_error(EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -107,7 +107,7 @@ test_that("removes the correct column when normalize < 0", { overidpost <- 7 normalize <- -2 - outputs <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + outputs <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -131,7 +131,7 @@ test_that("removes the correct column when normalize = 0", { overidpost <- 7 normalize <- 0 - outputs <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + outputs <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -153,7 +153,7 @@ test_that("does not create a first differenced variable when post, overidpost, p overidpost <- 0 normalize <- -1 - outputs <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + outputs <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -172,7 +172,7 @@ test_that("tests that package and STATA output agree when post, overidpost, pre, overidpost <- 0 normalize <- -1 - outputs <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + outputs <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -198,7 +198,7 @@ test_that("does not create shiftvalues of differenced variable when post + overi overidpost <- 0 normalize <- -1 - outputs <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + outputs <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -219,7 +219,7 @@ test_that("does not create leads of differenced variable when pre + overidpre < overidpost <- 0 normalize <- -1 - outputs <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + outputs <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -240,7 +240,7 @@ test_that("removes the correct column when normalize > 0", { overidpost <- 7 normalize <- 2 - outputs <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + outputs <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -262,7 +262,7 @@ test_that("removes the correct column when normalize = - (pre + overidpre + 1)", overidpost <- 4 normalize <- -4 - outputs <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + outputs <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -283,7 +283,7 @@ test_that("removes the correct column when normalize = post + overidpost", { overidpost <- 4 normalize <- 5 - outputs <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + outputs <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -327,7 +327,7 @@ test_that("tests that package and STATA output agree when post, overidpost, pre, test_that("feols estimator coefficients match OLS coefficients", { outputs_ols <- suppressWarnings( - EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = 2, pre = 3, overidpre = 4, @@ -363,7 +363,7 @@ test_that("correctly creates highest order leads and shiftvalues", { overidpost <- 11 outputs <- suppressWarnings( - EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_base", + EventStudy(estimator = "FHS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, proxy = "eta_m", post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, normalize = - 1, cluster = TRUE, anticipation_effects_normalization = TRUE) @@ -389,7 +389,7 @@ test_that("correctly throws an error when normalized coefficient is outside even overidpost <- 7 normalize <- 15 - expect_error(EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_base", + expect_error(EventStudy(estimator = "FHS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, proxy = "eta_m", post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -404,7 +404,7 @@ test_that("throws an error when post + pre + overidpre + overidpost exceeds the overidpost <- 25 normalize <- 2 - expect_error(EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_base", + expect_error(EventStudy(estimator = "FHS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, proxy = "eta_m", post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -419,7 +419,7 @@ test_that("removes the correct column when normalize < 0", { overidpost <- 7 normalize <- -2 - outputs <- EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_base", + outputs <- EventStudy(estimator = "FHS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, proxy = "eta_m", post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -443,7 +443,7 @@ test_that("removes the correct column when normalize = 0", { overidpost <- 7 normalize <- 0 - outputs <- EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_base", + outputs <- EventStudy(estimator = "FHS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, proxy = "eta_m", post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -467,7 +467,7 @@ test_that("FHS does not run when post, pre, overidpre, and overidpost are all 0" expect_error( outputs <- - EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_base", + EventStudy(estimator = "FHS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, proxy = "eta_m", post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -483,7 +483,7 @@ test_that("removes the correct column when normalize > 0", { overidpost <- 7 normalize <- 2 - outputs <- EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_base", + outputs <- EventStudy(estimator = "FHS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, proxy = "eta_m", post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -505,7 +505,7 @@ test_that("removes the correct column when normalize = - (pre + overidpre + 1)", overidpost <- 4 normalize <- -4 - outputs <- EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_base", + outputs <- EventStudy(estimator = "FHS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, proxy = "eta_m", post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -526,7 +526,7 @@ test_that("removes the correct column when normalize = post + overidpost", { overidpost <- 4 normalize <- 5 - outputs <- EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_base", + outputs <- EventStudy(estimator = "FHS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, "eta_m", post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -543,7 +543,7 @@ test_that("proxyIV selection works", { expect_message( suppressWarnings( - EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", + EventStudy(estimator = "FHS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", proxy = "eta_m", FE = TRUE, TFE = TRUE, post = 2, overidpost = 2, pre = 1, overidpre = 2, normalize = -1, cluster = TRUE, anticipation_effects_normalization = TRUE) ), @@ -552,7 +552,7 @@ test_that("proxyIV selection works", { expect_message( suppressWarnings( - EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", + EventStudy(estimator = "FHS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", proxy = "eta_m", FE = TRUE, TFE = TRUE, post = 1, overidpost = 2, pre = 2, overidpre = 2, normalize = -1, cluster = TRUE, anticipation_effects_normalization = TRUE) ), @@ -561,7 +561,7 @@ test_that("proxyIV selection works", { expect_message( suppressWarnings( - EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", + EventStudy(estimator = "FHS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", proxy = "eta_m", FE = TRUE, TFE = TRUE, post = 1, overidpost = 2, pre = 6, overidpre = 2, normalize = -1, cluster = TRUE, anticipation_effects_normalization = TRUE) ), @@ -572,7 +572,7 @@ test_that("proxyIV selection works", { test_that("feols_FHS yields the same results as FHS", { # Run FHS estimator - fhs_result <- EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_base", + fhs_result <- EventStudy(estimator = "FHS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, proxy = "eta_m", post = 2, pre = 1, overidpre = 2, overidpost = 3, @@ -618,7 +618,7 @@ test_that("feols_FHS yields the same results as FHS", { test_that("warning with correct normalize and pre is thrown when anticpation effects are allowed and anticipation_effects_normalization is TRUE", { expect_warning( - EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = 1, pre = 1, overidpre = 4, overidpost = 5, normalize = - 1, cluster = TRUE, anticipation_effects_normalization = TRUE), diff --git a/tests/testthat/test-EventStudyPlot.R b/tests/testthat/test-EventStudyPlot.R index 0850032b..1f0a8735 100644 --- a/tests/testthat/test-EventStudyPlot.R +++ b/tests/testthat/test-EventStudyPlot.R @@ -6,11 +6,11 @@ if ("get_labs" %in% getNamespaceExports("ggplot2")) { test_that("Dimension of OLS and FHS estimation output is the same", { - estimates_ols <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_smooth_m", + estimates_ols <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_smooth_m", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) - estimates_fhs <- EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_smooth_m", + estimates_fhs <- EventStudy(estimator = "FHS", kernel = "estimatr", data = example_data, outcomevar = "y_smooth_m", policyvar = "z", idvar = "id", timevar = "t", proxy = "eta_r", controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) @@ -33,7 +33,7 @@ test_that("Dimension of OLS and FHS estimation output is the same", { test_that("correctly changes x-axis and y-axis labels", { - estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + estimates <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) @@ -50,7 +50,7 @@ test_that("correctly changes x-axis and y-axis labels", { test_that("x- and y-axis breaks and limits are correct", { - estimates = EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + estimates = EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) @@ -76,7 +76,7 @@ test_that("x- and y-axis breaks and limits are correct", { test_that("correctly adds mean of outcome var", { - estimates = EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + estimates = EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) @@ -101,7 +101,7 @@ test_that("correctly adds mean of outcome var", { test_that("sup-t bands are appropriately present or absent", { - estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + estimates <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) @@ -120,7 +120,7 @@ test_that("sup-t bands are appropriately present or absent", { test_that("confidence intervals are appropriately present or absent", { - estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + estimates <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) @@ -138,7 +138,7 @@ test_that("confidence intervals are appropriately present or absent", { test_that("Preevent Coeffs and Postevent Coeffs are appropriately present or absent", { - estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + estimates <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) @@ -197,7 +197,7 @@ test_that("Preevent Coeffs and Postevent Coeffs are appropriately present or abs test_that("Sup-t bands are wider than confidence intervals", { - estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + estimates <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) @@ -220,7 +220,7 @@ test_that("Sup-t bands are wider than confidence intervals", { test_that("computed smoothest path for examples is within expectations", { - estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + estimates <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) @@ -229,7 +229,7 @@ test_that("computed smoothest path for examples is within expectations", { expect_equal(p$data$smoothest_path, matrix(rep(0, nrow(p$data)))) - estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_smooth_m", + estimates <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_smooth_m", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = -3) @@ -255,7 +255,7 @@ test_that("computed smoothest path for examples is within expectations", { test_that("computed smoothest path for FHS has at least two coefficients almost equal to zero", { - estimates <- EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_jump_m", + estimates <- EventStudy(estimator = "FHS", kernel = "estimatr", data = example_data, outcomevar = "y_jump_m", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", proxy = "eta_r", post = 3, pre = 0, overidpre = 3, overidpost = 1, normalize = -1, proxyIV = "z_fd_lead3") diff --git a/tests/testthat/test-PreparePlottingData.R b/tests/testthat/test-PreparePlottingData.R index 4cc0fec0..defb29d1 100644 --- a/tests/testthat/test-PreparePlottingData.R +++ b/tests/testthat/test-PreparePlottingData.R @@ -4,7 +4,7 @@ test_that("labels are unique", { - list_EventStudy <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + list_EventStudy <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) @@ -32,7 +32,7 @@ test_that("labels are unique", { test_that("the correct labels are created", { - list_EventStudy <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + list_EventStudy <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) @@ -59,7 +59,7 @@ test_that("the correct labels are created", { test_that("the labels are ordered correctly", { - list_EventStudy <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + list_EventStudy <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) @@ -86,7 +86,7 @@ test_that("the labels are ordered correctly", { test_that("the control variable is removed", { - list_EventStudy <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + list_EventStudy <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) @@ -113,7 +113,7 @@ test_that("the control variable is removed", { test_that("the largest lag label is correctly created", { - list_EventStudy <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + list_EventStudy <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) @@ -142,7 +142,7 @@ test_that("the largest lag label is correctly created", { test_that("the largest lead label is correctly created", { - list_EventStudy <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + list_EventStudy <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) @@ -171,7 +171,7 @@ test_that("the largest lead label is correctly created", { test_that("all columns besides 'term' and 'label' are 0 for the normalization column", { - list_EventStudy <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + list_EventStudy <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) @@ -206,7 +206,7 @@ test_that("all columns besides 'term' and 'label' are 0 for the normalization co test_that("all columns besides 'term' and 'label' are 0 for the proxyIV column", { - list_EventStudy <- EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_base", + list_EventStudy <- EventStudy(estimator = "FHS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, proxy = "eta_m", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) diff --git a/tests/testthat/test-TestLinear.R b/tests/testthat/test-TestLinear.R index d78b5981..971cadab 100644 --- a/tests/testthat/test-TestLinear.R +++ b/tests/testthat/test-TestLinear.R @@ -1,5 +1,5 @@ test_that("correctly recognizes wrong variable type for estimate argument", { - estimate <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + estimate <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) @@ -14,7 +14,7 @@ test_that("correctly recognizes wrong variable type for estimate argument", { }) test_that("correctly recognizes wrong variable type for pretrends", { - estimate <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + estimate <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) @@ -24,7 +24,7 @@ test_that("correctly recognizes wrong variable type for pretrends", { }) test_that("correctly recognizes wrong variable type for leveling_off", { - estimate <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + estimate <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) @@ -34,7 +34,7 @@ test_that("correctly recognizes wrong variable type for leveling_off", { }) test_that("produces only functions that are specified", { - estimate <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + estimate <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) @@ -80,7 +80,7 @@ test_that("produces only functions that are specified", { test_that("checks equality with STATA", { estimate <- suppressWarnings( - EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", FE = TRUE, TFE = TRUE, post = 2, pre = 2, overidpre = 2, diff --git a/vignettes/documentation.Rmd b/vignettes/documentation.Rmd index 8a06fe54..2e6b4447 100644 --- a/vignettes/documentation.Rmd +++ b/vignettes/documentation.Rmd @@ -49,7 +49,7 @@ head(example_data) Here is an example using the sample data: ```{r Basic Eventstudy Example - Show Code, eval = FALSE} -results <- EventStudy(estimator = "OLS", +results <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_jump_m", policyvar = "z", @@ -59,7 +59,7 @@ results <- EventStudy(estimator = "OLS", pre = 0) ``` ```{r Basic Eventstudy Example - Run Code, echo = FALSE} -results <- EventStudy(estimator = "OLS", +results <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_jump_m", policyvar = "z", @@ -120,7 +120,7 @@ summary(results$output) This function is designed to use the output of the `EventStudy()` and returns a ggplot object. Here is an example of using the function with some default settings: ```{r EventStudyPlot example 1, fig.dim = c(7, 5)} -eventstudy_estimates_ols <- EventStudy(estimator = "OLS", +eventstudy_estimates_ols <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_jump_m", policyvar = "z", From b49722612c2fa632a94969dc9bb4e0edf9cd93fb Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Tue, 21 Oct 2025 23:59:41 -0400 Subject: [PATCH 32/67] #42 bd iterated if statements for kernel --- R/EventStudy.R | 60 ++++++++++++++++++-------------------------------- 1 file changed, 22 insertions(+), 38 deletions(-) diff --git a/R/EventStudy.R b/R/EventStudy.R index f4fd0bb9..81369363 100644 --- a/R/EventStudy.R +++ b/R/EventStudy.R @@ -337,20 +337,20 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c str_policy_vars <- str_policy_vars[!(str_policy_vars %in% normalization_column)] } - if (estimator == "OLS" && kernel == "estimatr") { - event_study_formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars, - static, controls, proxy, proxyIV) - - output <- EventStudyOLS(event_study_formula, data, idvar, timevar, FE, TFE, cluster) - coefficients <- str_policy_vars - } else if (estimator == "OLS" && kernel == "fixest") { - formula <- PrepareModelFormulaFEOLS(outcomevar, str_policy_vars, - controls, - idvar, timevar, FE, TFE) - output <- EventStudyFEOLS(formula, data, idvar, timevar, FE, TFE, cluster) + if (estimator == "OLS") { + if (kernel == "estimatr") { + formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars, + static, controls, proxy, proxyIV) + + output <- EventStudyOLS(formula, data, idvar, timevar, FE, TFE, cluster) + } else if (kernel == "fixest") { + formula <- PrepareModelFormulaFEOLS(outcomevar, str_policy_vars, + controls, + idvar, timevar, FE, TFE) + output <- EventStudyFEOLS(formula, data, idvar, timevar, FE, TFE, cluster) + } coefficients <- str_policy_vars - } else if (estimator == "FHS" && kernel == "estimatr") { - + } else if (estimator == "FHS") { if (is.null(proxyIV)) { Fstart <- 0 str_fd_leads <- str_policy_vars[grepl("^z_fd_lead", str_policy_vars)] @@ -367,33 +367,17 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c ". To specify a different proxyIV use the proxyIV argument.")) } - event_study_formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars, - static, controls, proxy, proxyIV) - - output <- EventStudyFHS(event_study_formula, data, idvar, timevar, FE, TFE, cluster) - coefficients <- dplyr::setdiff(str_policy_vars, proxyIV) - } else if (estimator == "FHS" && kernel == "fixest") { - - if (is.null(proxyIV)) { - Fstart <- 0 - str_fd_leads <- str_policy_vars[grepl("^z_fd_lead", str_policy_vars)] + if (kernel == "estimatr") { + formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars, + static, controls, proxy, proxyIV) - for (var in str_fd_leads) { - lm <- lm(data = data, formula = stats::reformulate(termlabels = var, response = proxy)) - Floop <- summary(lm)$fstatistic["value"] - if (Floop > Fstart) { - Fstart <- Floop - proxyIV <- var - } - } - message(paste0("Defaulting to strongest lead of differenced policy variable: proxyIV = ", proxyIV, - ". To specify a different proxyIV use the proxyIV argument.")) - } - - formula <- PrepareModelFormulaFEOLS_FHS(outcomevar, str_policy_vars, - controls, proxy, proxyIV, - idvar, timevar, FE, TFE) + output <- EventStudyFHS(formula, data, idvar, timevar, FE, TFE, cluster) + } else if (kernel == "fixest") { + formula <- PrepareModelFormulaFEOLS_FHS(outcomevar, str_policy_vars, + controls, proxy, proxyIV, + idvar, timevar, FE, TFE) output <- EventStudyFEOLS_FHS(formula, data, idvar, timevar, FE, TFE, cluster) + } coefficients <- dplyr::setdiff(str_policy_vars, proxyIV) } From 13a448c4b2058b79e7f5934dfb207ca6e42291c6 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Wed, 22 Oct 2025 00:08:37 -0400 Subject: [PATCH 33/67] #42 cl relocate fhs --- R/EventStudyFHS.R | 44 ++++++++++++++++++++++++++++++++++++++++++++ R/EventStudyOLS.R | 45 --------------------------------------------- 2 files changed, 44 insertions(+), 45 deletions(-) diff --git a/R/EventStudyFHS.R b/R/EventStudyFHS.R index bccf9d85..2294f0c8 100644 --- a/R/EventStudyFHS.R +++ b/R/EventStudyFHS.R @@ -127,3 +127,47 @@ EventStudyFHS <- function(prepared_model_formula, prepared_data, return(fhs_output) } + +EventStudyFEOLS_FHS <- function(formula, prepared_data, + idvar, timevar, FE, TFE, cluster) { + + if (cluster) { + vcov_fixest <- as.formula(paste0("~", idvar)) + } else { + vcov_fixest <- "iid" + } + + fhs_output <- fixest::feols( + fml = formula, + data = prepared_data, + vcov = vcov_fixest + ) + + # Apply the same standard error adjustments as EventStudyFHS + if (FE & TFE & cluster) { + N <- fhs_output$nobs + n <- fhs_output$fixef_sizes[1] # number of clusters (unique id values) + # For FE & TFE, K = number of time FEs + number of structural parameters + # But we need to be careful - fixest nparams includes the fitted endogenous variable + # Let's count the actual event study coefficients (excluding fit_xxx) + n_event_coefs <- length(grep("^z_", names(coef(fhs_output)))) - 1 # -1 for fit_eta_m + K <- fhs_output$fixef_sizes[2] + n_event_coefs # time FEs + structural params + + adjustment <- sqrt((N - K) / (N - n - K + 1)) + fhs_output$se <- fhs_output$se / adjustment + fhs_output$cov.scaled <- fhs_output$cov.scaled / (adjustment^2) + + } else if (FE & (!TFE) & cluster) { + N <- fhs_output$nobs + n <- fhs_output$fixef_sizes[1] # number of clusters + # For FE only, K = 1 (for the FE intercept?) + structural parameters + n_event_coefs <- length(grep("^z_", names(coef(fhs_output)))) - 1 # -1 for fit_eta_m + K <- 1 + n_event_coefs + + adjustment <- sqrt((N - K) / (N - n - K + 1)) + fhs_output$se <- fhs_output$se / adjustment + fhs_output$cov.scaled <- fhs_output$cov.scaled / (adjustment^2) + } + + return(fhs_output) +} diff --git a/R/EventStudyOLS.R b/R/EventStudyOLS.R index b40ccadd..100c510a 100644 --- a/R/EventStudyOLS.R +++ b/R/EventStudyOLS.R @@ -143,48 +143,3 @@ EventStudyFEOLS <- function(formula, prepared_data, ) return(ols_output) } - -EventStudyFEOLS_FHS <- function(formula, prepared_data, - idvar, timevar, FE, TFE, cluster) { - - if (cluster) { - vcov_fixest <- as.formula(paste0("~", idvar)) - } else { - vcov_fixest <- "iid" - } - - fhs_output <- fixest::feols( - fml = formula, - data = prepared_data, - vcov = vcov_fixest - ) - - # Apply the same standard error adjustments as EventStudyFHS - if (FE & TFE & cluster) { - N <- fhs_output$nobs - n <- fhs_output$fixef_sizes[1] # number of clusters (unique id values) - # For FE & TFE, K = number of time FEs + number of structural parameters - # But we need to be careful - fixest nparams includes the fitted endogenous variable - # Let's count the actual event study coefficients (excluding fit_xxx) - n_event_coefs <- length(grep("^z_", names(coef(fhs_output)))) - 1 # -1 for fit_eta_m - K <- fhs_output$fixef_sizes[2] + n_event_coefs # time FEs + structural params - - adjustment <- sqrt((N - K) / (N - n - K + 1)) - fhs_output$se <- fhs_output$se / adjustment - fhs_output$cov.scaled <- fhs_output$cov.scaled / (adjustment^2) - - } else if (FE & (!TFE) & cluster) { - N <- fhs_output$nobs - n <- fhs_output$fixef_sizes[1] # number of clusters - # For FE only, K = 1 (for the FE intercept?) + structural parameters - n_event_coefs <- length(grep("^z_", names(coef(fhs_output)))) - 1 # -1 for fit_eta_m - K <- 1 + n_event_coefs - - adjustment <- sqrt((N - K) / (N - n - K + 1)) - fhs_output$se <- fhs_output$se / adjustment - fhs_output$cov.scaled <- fhs_output$cov.scaled / (adjustment^2) - } - - return(fhs_output) -} - From 2f992192f92de07d0680e564cb3bfc0859dff17d Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Wed, 22 Oct 2025 00:22:49 -0400 Subject: [PATCH 34/67] #42 bd futurewarning --- R/EventStudy.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/EventStudy.R b/R/EventStudy.R index 81369363..122e3d04 100644 --- a/R/EventStudy.R +++ b/R/EventStudy.R @@ -151,6 +151,8 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c # Check for errors in arguments if (! estimator %in% c("OLS", "FHS")) {stop("estimator should be either 'OLS' or 'FHS'.")} if (! kernel %in% c("estimatr", "fixest")) {stop("kernel should be either 'estimatr' or 'fixest'.")} + if (missing(kernel)) {warning("Argument 'kernel' was not specified; using 'estimatr' as default; we strongly recommend explicitly specifying a kernel because the default is scheduled to change.")} + if (kernel == "estimatr") {warning("'estimatr' selected as kernel. We no longer maintain it and will depreciate it in a future release. We recommend using 'fixest' instead.")} if (! is.data.frame(data)) {stop("data should be a data frame.")} for (var in c(idvar, timevar, outcomevar, policyvar)) { if ((! is.character(var))) { From d57c4695296954457c857a6a679974193b4c8bdb Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Thu, 8 Jan 2026 12:10:51 -0500 Subject: [PATCH 35/67] #42 bd unify PrepareModelFormula --- R/EventStudy.R | 24 ++-- R/PrepareModelFormula.R | 200 +++++++++++++------------- tests/testthat/test-EventStudyFEOLS.R | 72 +++++----- tests/testthat/test-EventStudyFHS.R | 7 +- 4 files changed, 151 insertions(+), 152 deletions(-) diff --git a/R/EventStudy.R b/R/EventStudy.R index 122e3d04..fed81adf 100644 --- a/R/EventStudy.R +++ b/R/EventStudy.R @@ -340,15 +340,13 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c } if (estimator == "OLS") { + formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars, + static, controls, proxy, proxyIV, + kernel, idvar, timevar, FE, TFE) + if (kernel == "estimatr") { - formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars, - static, controls, proxy, proxyIV) - output <- EventStudyOLS(formula, data, idvar, timevar, FE, TFE, cluster) } else if (kernel == "fixest") { - formula <- PrepareModelFormulaFEOLS(outcomevar, str_policy_vars, - controls, - idvar, timevar, FE, TFE) output <- EventStudyFEOLS(formula, data, idvar, timevar, FE, TFE, cluster) } coefficients <- str_policy_vars @@ -369,16 +367,14 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c ". To specify a different proxyIV use the proxyIV argument.")) } + formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars, + static, controls, proxy, proxyIV, + kernel, idvar, timevar, FE, TFE) + if (kernel == "estimatr") { - formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars, - static, controls, proxy, proxyIV) - - output <- EventStudyFHS(formula, data, idvar, timevar, FE, TFE, cluster) + output <- EventStudyFHS(formula, data, idvar, timevar, FE, TFE, cluster) } else if (kernel == "fixest") { - formula <- PrepareModelFormulaFEOLS_FHS(outcomevar, str_policy_vars, - controls, proxy, proxyIV, - idvar, timevar, FE, TFE) - output <- EventStudyFEOLS_FHS(formula, data, idvar, timevar, FE, TFE, cluster) + output <- EventStudyFEOLS_FHS(formula, data, idvar, timevar, FE, TFE, cluster) } coefficients <- dplyr::setdiff(str_policy_vars, proxyIV) } diff --git a/R/PrepareModelFormula.R b/R/PrepareModelFormula.R index 9336abe6..aff72d37 100644 --- a/R/PrepareModelFormula.R +++ b/R/PrepareModelFormula.R @@ -10,6 +10,11 @@ #' @param proxyIV Character of column to be used as an instrument. Should be specified if and only if estimator is specified as "FHS". #' If NULL, defaults to the strongest lead of the policy variable based on the first stage. #' @param static Indicates whether the model to be estimated is static. Defaults to FALSE. +#' @param kernel Character indicating the estimation kernel. Accepts "estimatr" or "fixest". Defaults to "estimatr". +#' @param idvar Character indicating the identifier variable for fixed effects. Required when kernel = "fixest". +#' @param timevar Character indicating the time variable for fixed effects. Required when kernel = "fixest". +#' @param FE Logical indicating whether to include unit fixed effects. Defaults to FALSE. +#' @param TFE Logical indicating whether to include time fixed effects. Defaults to FALSE. #' @return A formula object to be passed to EventStudy #' #' @importFrom stats reformulate as.formula @@ -39,7 +44,9 @@ PrepareModelFormula <- function(estimator, outcomevar, str_policy_vars, static = FALSE, - controls = NULL, proxy = NULL, proxyIV = NULL) { + controls = NULL, proxy = NULL, proxyIV = NULL, + kernel = "estimatr", idvar = NULL, timevar = NULL, + FE = FALSE, TFE = FALSE) { if (! estimator %in% c("OLS", "FHS")) {stop("estimator should be either 'OLS' or 'FHS'.")} if (! is.character(outcomevar)) {stop("outcomevar should be a character.")} @@ -53,110 +60,105 @@ PrepareModelFormula <- function(estimator, outcomevar, if (! static & length(str_policy_vars) <= 1) {stop("str_policy_vars must have more than one variable with static = FALSE.")} if ( static & !is.null(proxyIV)) {stop("static model is not compatible with FHS estimator.")} - if (estimator == "OLS") { - reg_formula <- stats::reformulate( - termlabels = c(str_policy_vars, controls), - response = outcomevar, - intercept = FALSE - ) + if (! kernel %in% c("estimatr", "fixest")) {stop("kernel should be either 'estimatr' or 'fixest'.")} + if (kernel == "fixest") { + if (is.null(idvar) | !is.character(idvar)) {stop("idvar must be specified as a character when kernel is 'fixest'.")} + if (is.null(timevar) | !is.character(timevar)) {stop("timevar must be specified as a character when kernel is 'fixest'.")} + if (! is.logical(FE)) {stop("FE should be a logical.")} + if (! is.logical(TFE)) {stop("TFE should be a logical.")} } - if (estimator == "FHS") { - exogenous <- c(str_policy_vars, controls) - exogenous <- exogenous[exogenous != proxy] - exogenous <- exogenous[exogenous != proxyIV] - - reg_formula <- stats::as.formula( - paste(outcomevar, "~", - paste(c(exogenous, proxy), collapse="+"), - "|", - paste(c(exogenous, proxyIV), collapse="+")) - ) - } - - return(reg_formula) -} - - -PrepareModelFormulaFEOLS <- function(outcomevar, str_policy_vars, - controls = NULL, - idvar = NULL, timevar = NULL, FE = FALSE, TFE = FALSE) { - stopifnot(!is.null(idvar)) - stopifnot(!is.null(timevar)) - - regressors <- c(str_policy_vars, controls) + if (kernel == "estimatr") { + if (estimator == "OLS") { + reg_formula <- stats::reformulate( + termlabels = c(str_policy_vars, controls), + response = outcomevar, + intercept = FALSE + ) + } - if (FE | TFE) { - fes <- c() - if (FE) { - fes <- c(fes, idvar) + if (estimator == "FHS") { + exogenous <- c(str_policy_vars, controls) + exogenous <- exogenous[exogenous != proxy] + exogenous <- exogenous[exogenous != proxyIV] + + reg_formula <- stats::as.formula( + paste(outcomevar, "~", + paste(c(exogenous, proxy), collapse="+"), + "|", + paste(c(exogenous, proxyIV), collapse="+")) + ) } - if (TFE) { - fes <- c(fes, timevar) + } else if (kernel == "fixest") { + regressors <- c(str_policy_vars, controls) + + if (estimator == "OLS") { + if (FE | TFE) { + fes <- c() + if (FE) { + fes <- c(fes, idvar) + } + if (TFE) { + fes <- c(fes, timevar) + } + + formula_str <- paste( + outcomevar, + "~", + paste(regressors, collapse = " + "), + "|", + paste(fes, collapse = " + ") + ) + reg_formula <- stats::as.formula(formula_str) + } else { + reg_formula <- stats::reformulate( + termlabels = regressors, + response = outcomevar, + intercept = TRUE + ) + } + } else if (estimator == "FHS") { + exogenous <- c(str_policy_vars, controls) + exogenous <- exogenous[exogenous != proxy] + exogenous <- exogenous[exogenous != proxyIV] + + if (FE | TFE) { + fes <- c() + if (FE) { + fes <- c(fes, idvar) + } + if (TFE) { + fes <- c(fes, timevar) + } + + formula_str <- paste( + outcomevar, + "~", + paste(exogenous, collapse = " + "), + "|", + paste(fes, collapse = " + "), + "|", + proxy, + "~", + paste(c(exogenous, proxyIV), collapse = " + ") + ) + reg_formula <- stats::as.formula(formula_str) + } else { + formula_str <- paste( + outcomevar, + "~", + paste(exogenous, collapse = " + "), + "|", + proxy, + "~", + paste(c(exogenous, proxyIV), collapse = " + ") + ) + reg_formula <- stats::as.formula(formula_str) + } } - - formula_str <- paste( - outcomevar, - "~", - paste(regressors, collapse = " + "), - "|", - paste(fes, collapse = " + ") - ) - formula <- stats::as.formula(formula_str) - } else { - formula <- stats::reformulate( - termlabels = regressors, - response = outcomevar, - intercept = TRUE - ) } - return(formula) + + return(reg_formula) } -PrepareModelFormulaFEOLS_FHS <- function(outcomevar, str_policy_vars, - controls = NULL, proxy = NULL, proxyIV = NULL, - idvar = NULL, timevar = NULL, FE = FALSE, TFE = FALSE) { - stopifnot(!is.null(idvar)) - stopifnot(!is.null(timevar)) - stopifnot(!is.null(proxy)) - stopifnot(!is.null(proxyIV)) - - exogenous <- c(str_policy_vars, controls) - exogenous <- exogenous[exogenous != proxy] - exogenous <- exogenous[exogenous != proxyIV] - - if (FE | TFE) { - fes <- c() - if (FE) { - fes <- c(fes, idvar) - } - if (TFE) { - fes <- c(fes, timevar) - } - formula_str <- paste( - outcomevar, - "~", - paste(exogenous, collapse = " + "), - "|", - paste(fes, collapse = " + "), - "|", - proxy, - "~", - paste(c(exogenous, proxyIV), collapse = " + ") - ) - formula <- stats::as.formula(formula_str) - } else { - formula_str <- paste( - outcomevar, - "~", - paste(exogenous, collapse = " + "), - "|", - proxy, - "~", - paste(c(exogenous, proxyIV), collapse = " + ") - ) - formula <- stats::as.formula(formula_str) - } - return(formula) -} diff --git a/tests/testthat/test-EventStudyFEOLS.R b/tests/testthat/test-EventStudyFEOLS.R index d3b82390..6330d71f 100644 --- a/tests/testthat/test-EventStudyFEOLS.R +++ b/tests/testthat/test-EventStudyFEOLS.R @@ -14,10 +14,10 @@ test_that("FE = TRUE, TFE <- TRUE cluster <- TRUE - event_study_formula <- PrepareModelFormulaFEOLS(outcomevar, str_policy_vars, - controls = controls, - idvar = idvar, timevar = timevar, - FE = FE, TFE = TFE) + event_study_formula <- PrepareModelFormula(estimator = "OLS", outcomevar, str_policy_vars, + static = FALSE, controls = controls, + kernel = "fixest", idvar = idvar, timevar = timevar, + FE = FE, TFE = TFE) if (FE & TFE & cluster) { @@ -54,10 +54,10 @@ test_that("FE = FALSE, TFE <- TRUE cluster <- TRUE - event_study_formula <- PrepareModelFormulaFEOLS(outcomevar, str_policy_vars, - controls = controls, - idvar = idvar, timevar = timevar, - FE = FE, TFE = TFE) + event_study_formula <- PrepareModelFormula(estimator = "OLS", outcomevar, str_policy_vars, + static = FALSE, controls = controls, + kernel = "fixest", idvar = idvar, timevar = timevar, + FE = FE, TFE = TFE) if ((!FE) & TFE & cluster) { @@ -94,10 +94,10 @@ test_that("FE = TRUE, TFE <- FALSE cluster <- TRUE - event_study_formula <- PrepareModelFormulaFEOLS(outcomevar, str_policy_vars, - controls = controls, - idvar = idvar, timevar = timevar, - FE = FE, TFE = TFE) + event_study_formula <- PrepareModelFormula(estimator = "OLS", outcomevar, str_policy_vars, + static = FALSE, controls = controls, + kernel = "fixest", idvar = idvar, timevar = timevar, + FE = FE, TFE = TFE) if (FE & (!TFE) & cluster) { @@ -134,10 +134,10 @@ test_that("FE = FALSE, TFE <- FALSE cluster <- TRUE - event_study_formula <- PrepareModelFormulaFEOLS(outcomevar, str_policy_vars, - controls = controls, - idvar = idvar, timevar = timevar, - FE = FE, TFE = TFE) + event_study_formula <- PrepareModelFormula(estimator = "OLS", outcomevar, str_policy_vars, + static = FALSE, controls = controls, + kernel = "fixest", idvar = idvar, timevar = timevar, + FE = FE, TFE = TFE) if ((!FE) & (!TFE) & cluster) { @@ -173,10 +173,10 @@ test_that("FE = TRUE, TFE <- TRUE cluster <- FALSE - event_study_formula <- PrepareModelFormulaFEOLS(outcomevar, str_policy_vars, - controls = controls, - idvar = idvar, timevar = timevar, - FE = FE, TFE = TFE) + event_study_formula <- PrepareModelFormula(estimator = "OLS", outcomevar, str_policy_vars, + static = FALSE, controls = controls, + kernel = "fixest", idvar = idvar, timevar = timevar, + FE = FE, TFE = TFE) if (FE & TFE & (!cluster)) { @@ -213,10 +213,10 @@ test_that("FE = FALSE, TFE <- TRUE cluster <- FALSE - event_study_formula <- PrepareModelFormulaFEOLS(outcomevar, str_policy_vars, - controls = controls, - idvar = idvar, timevar = timevar, - FE = FE, TFE = TFE) + event_study_formula <- PrepareModelFormula(estimator = "OLS", outcomevar, str_policy_vars, + static = FALSE, controls = controls, + kernel = "fixest", idvar = idvar, timevar = timevar, + FE = FE, TFE = TFE) if ((!FE) & TFE & (!cluster)) { @@ -253,10 +253,10 @@ test_that("FE = TRUE, TFE <- FALSE cluster <- FALSE - event_study_formula <- PrepareModelFormulaFEOLS(outcomevar, str_policy_vars, - controls = controls, - idvar = idvar, timevar = timevar, - FE = FE, TFE = TFE) + event_study_formula <- PrepareModelFormula(estimator = "OLS", outcomevar, str_policy_vars, + static = FALSE, controls = controls, + kernel = "fixest", idvar = idvar, timevar = timevar, + FE = FE, TFE = TFE) if (FE & (!TFE) & (!cluster)) { @@ -293,10 +293,10 @@ test_that("FE = FALSE, TFE <- FALSE cluster <- FALSE - event_study_formula <- PrepareModelFormulaFEOLS(outcomevar, str_policy_vars, - controls = controls, - idvar = idvar, timevar = timevar, - FE = FE, TFE = TFE) + event_study_formula <- PrepareModelFormula(estimator = "OLS", outcomevar, str_policy_vars, + static = FALSE, controls = controls, + kernel = "fixest", idvar = idvar, timevar = timevar, + FE = FE, TFE = TFE) if ((!FE) & (!TFE) & (!cluster)) { @@ -330,10 +330,10 @@ test_that("Coefficients and Standard Errors agree with STATA", { TFE <- TRUE cluster <- TRUE - event_study_formula <- PrepareModelFormulaFEOLS(outcomevar, str_policy_vars, - controls = controls, - idvar = idvar, timevar = timevar, - FE = FE, TFE = TFE) + event_study_formula <- PrepareModelFormula(estimator = "OLS", outcomevar, str_policy_vars, + static = FALSE, controls = controls, + kernel = "fixest", idvar = idvar, timevar = timevar, + FE = FE, TFE = TFE) reg <- EventStudyFEOLS( formula = event_study_formula, diff --git a/tests/testthat/test-EventStudyFHS.R b/tests/testthat/test-EventStudyFHS.R index c9ae6255..46ba7c96 100644 --- a/tests/testthat/test-EventStudyFHS.R +++ b/tests/testthat/test-EventStudyFHS.R @@ -332,9 +332,10 @@ test_that("feols_FHS Coefficients and Standard Errors agree with base STATA", { cluster <- as.logical(substring(bool, 3, 3)) # Prepare the model formula for feols_FHS - formula <- PrepareModelFormulaFEOLS_FHS(outcomevar, str_policy_vars, - controls, proxy, proxyIV, - idvar, timevar, FE, TFE) + formula <- PrepareModelFormula(estimator = "FHS", outcomevar, str_policy_vars, + static = FALSE, controls = controls, proxy = proxy, proxyIV = proxyIV, + kernel = "fixest", idvar = idvar, timevar = timevar, + FE = FE, TFE = TFE) reg <- EventStudyFEOLS_FHS(formula, df_EventStudyFHS_example, idvar, timevar, FE, TFE, cluster) From 044cc2f2d3a79b632086a3e79b8ad94bd6aa4f23 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Thu, 8 Jan 2026 12:37:45 -0500 Subject: [PATCH 36/67] #42 fx tidy for EventStudyPlot.R --- R/EventStudy.R | 1 - R/EventStudyPlot.R | 8 +++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/R/EventStudy.R b/R/EventStudy.R index fed81adf..413ca4f2 100644 --- a/R/EventStudy.R +++ b/R/EventStudy.R @@ -397,7 +397,6 @@ EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, c "normalize" = normalize, "normalization_column" = normalization_column, "cluster" = cluster, - "kernel" = kernel, "eventstudy_coefficients" = coefficients) return(list("output" = output, diff --git a/R/EventStudyPlot.R b/R/EventStudyPlot.R index ecfc6373..9f15901b 100644 --- a/R/EventStudyPlot.R +++ b/R/EventStudyPlot.R @@ -30,6 +30,7 @@ #' @import ggplot2 dplyr #' @import estimatr #' @importFrom rlang .data +#' @importFrom broom tidy #' @importFrom data.table setorder #' @export #' @@ -122,7 +123,12 @@ EventStudyPlot <- function(estimates, # Estimation Elements ----------------------------------------------------- model_estimates <- estimates$output - model_estimates_tidy <- estimatr::tidy(estimates$output) + model_type <- class(model_estimates) + if (model_type == "fixest") { + model_estimates_tidy <- broom::tidy(model_estimates) + } else { + model_estimates_tidy <- estimatr::tidy(model_estimates) + } static_model <- length(coef(model_estimates)) == 1 if (static_model) { From f46c3bb0ea574e88f02d21559fee9b176df8f66a Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Thu, 8 Jan 2026 13:13:53 -0500 Subject: [PATCH 37/67] #42 fx EventStudyFEOLS_FHS wrong se adjustment --- R/EventStudyFHS.R | 106 +++++++++++++++++++++++++++++++++------------- 1 file changed, 77 insertions(+), 29 deletions(-) diff --git a/R/EventStudyFHS.R b/R/EventStudyFHS.R index 2294f0c8..596feeb4 100644 --- a/R/EventStudyFHS.R +++ b/R/EventStudyFHS.R @@ -131,42 +131,90 @@ EventStudyFHS <- function(prepared_model_formula, prepared_data, EventStudyFEOLS_FHS <- function(formula, prepared_data, idvar, timevar, FE, TFE, cluster) { - if (cluster) { - vcov_fixest <- as.formula(paste0("~", idvar)) - } else { - vcov_fixest <- "iid" - } - - fhs_output <- fixest::feols( - fml = formula, - data = prepared_data, - vcov = vcov_fixest - ) + if (! inherits(formula, "formula")) {stop("formula should be a formula")} + if (! is.data.frame(prepared_data)) {stop("data should be a data frame.")} + if (! is.character(idvar)) {stop("idvar should be a character.")} + if (! is.character(timevar)) {stop("timevar should be a character.")} + if (! is.logical(FE)) {stop("FE should be either TRUE or FALSE.")} + if (! is.logical(TFE)) {stop("TFE should be either TRUE or FALSE.")} + if (! is.logical(cluster)) {stop("cluster should be either TRUE or FALSE.")} + if (FE & !cluster) {stop("cluster=TRUE required when FE=TRUE.")} - # Apply the same standard error adjustments as EventStudyFHS if (FE & TFE & cluster) { + + fhs_output <- fixest::feols( + fml = formula, + data = prepared_data, + vcov = as.formula(paste0("~", idvar)) + ) + N <- fhs_output$nobs - n <- fhs_output$fixef_sizes[1] # number of clusters (unique id values) - # For FE & TFE, K = number of time FEs + number of structural parameters - # But we need to be careful - fixest nparams includes the fitted endogenous variable - # Let's count the actual event study coefficients (excluding fit_xxx) - n_event_coefs <- length(grep("^z_", names(coef(fhs_output)))) - 1 # -1 for fit_eta_m - K <- fhs_output$fixef_sizes[2] + n_event_coefs # time FEs + structural params + n <- length(unique(prepared_data[[idvar]])) + K <- fhs_output$fixef_sizes[[timevar]] + length(coef(fhs_output)) - adjustment <- sqrt((N - K) / (N - n - K + 1)) - fhs_output$se <- fhs_output$se / adjustment - fhs_output$cov.scaled <- fhs_output$cov.scaled / (adjustment^2) + adjustment_factor <- (N - K) / (N - n - K + 1) + fhs_output$se <- fhs_output$se / sqrt(adjustment_factor) + fhs_output$cov.scaled <- fhs_output$cov.scaled / adjustment_factor + + # Recalculate statistical inference + fhs_output$tstat <- coef(fhs_output) / fhs_output$se + fhs_output$pvalue <- 2 * stats::pnorm(abs(fhs_output$tstat), lower.tail = FALSE) + fhs_output$conf.low <- coef(fhs_output) - stats::qnorm(0.975) * fhs_output$se + fhs_output$conf.high <- coef(fhs_output) + stats::qnorm(0.975) * fhs_output$se } else if (FE & (!TFE) & cluster) { + + fhs_output <- fixest::feols( + fml = formula, + data = prepared_data, + vcov = as.formula(paste0("~", idvar)) + ) + N <- fhs_output$nobs - n <- fhs_output$fixef_sizes[1] # number of clusters - # For FE only, K = 1 (for the FE intercept?) + structural parameters - n_event_coefs <- length(grep("^z_", names(coef(fhs_output)))) - 1 # -1 for fit_eta_m - K <- 1 + n_event_coefs - - adjustment <- sqrt((N - K) / (N - n - K + 1)) - fhs_output$se <- fhs_output$se / adjustment - fhs_output$cov.scaled <- fhs_output$cov.scaled / (adjustment^2) + n <- length(unique(prepared_data[[idvar]])) + K <- 1 + length(coef(fhs_output)) + + adjustment_factor <- (N - K) / (N - n - K + 1) + fhs_output$se <- fhs_output$se / sqrt(adjustment_factor) + fhs_output$cov.scaled <- fhs_output$cov.scaled / adjustment_factor + + # Recalculate statistical inference + fhs_output$tstat <- coef(fhs_output) / fhs_output$se + fhs_output$pvalue <- 2 * stats::pnorm(abs(fhs_output$tstat), lower.tail = FALSE) + fhs_output$conf.low <- coef(fhs_output) - stats::qnorm(0.975) * fhs_output$se + fhs_output$conf.high <- coef(fhs_output) + stats::qnorm(0.975) * fhs_output$se + + } else if ((!FE) & TFE & (!cluster)) { + + fhs_output <- fixest::feols( + fml = formula, + data = prepared_data, + vcov = "iid" + ) + + } else if ((!FE) & TFE & cluster) { + + fhs_output <- fixest::feols( + fml = formula, + data = prepared_data, + vcov = as.formula(paste0("~", idvar)) + ) + + } else if ((!FE) & (!TFE) & (!cluster)) { + + fhs_output <- fixest::feols( + fml = formula, + data = prepared_data, + vcov = "iid" + ) + + } else if ((!FE) & (!TFE) & cluster) { + + fhs_output <- fixest::feols( + fml = formula, + data = prepared_data, + vcov = as.formula(paste0("~", idvar)) + ) } return(fhs_output) From a6ab57f9746a56f0ba981c891da6b91a764187af Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Thu, 8 Jan 2026 18:58:34 -0500 Subject: [PATCH 38/67] #42 switch to relative tolerance --- tests/testthat/test-EventStudyFEOLS.R | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tests/testthat/test-EventStudyFEOLS.R b/tests/testthat/test-EventStudyFEOLS.R index 6330d71f..97ffd2a4 100644 --- a/tests/testthat/test-EventStudyFEOLS.R +++ b/tests/testthat/test-EventStudyFEOLS.R @@ -347,14 +347,11 @@ test_that("Coefficients and Standard Errors agree with STATA", { df_test_STATA <- read.csv("./input/df_test_base_STATA.csv", col.names = c("term", "coef", "std_error")) - # Get coefficients and standard errors coef_feols <- coef(reg) se_feols <- fixest::se(reg) - epsilon <- 1e-6 - epsilon_se <- 2e-2 + epsilon <- 1e-4 - # Define coefficient mappings: R_name -> STATA_term coef_mappings <- list( "z_fd" = "zfd", "z_fd_lead2" = "F2.zfd", @@ -371,14 +368,18 @@ test_that("Coefficients and Standard Errors agree with STATA", { stata_term <- coef_mappings[[r_name]] expected <- df_test_STATA[df_test_STATA["term"] == stata_term, "coef"] if (r_name == "z_lead3") expected <- -1 * expected # STATA sign convention - expect_equal(unname(coef_feols[r_name]), expected, tolerance = epsilon) + actual <- unname(coef_feols[r_name]) + tolerance <- abs(expected) * epsilon + expect_equal(actual, expected, tolerance = tolerance) } # Test standard errors for (r_name in names(coef_mappings)) { stata_term <- coef_mappings[[r_name]] expected <- df_test_STATA[df_test_STATA["term"] == stata_term, "std_error"] - expect_equal(unname(se_feols[r_name]), expected, tolerance = epsilon_se) + actual <- unname(se_feols[r_name]) + tolerance <- abs(expected) * epsilon + expect_equal(actual, expected, tolerance = tolerance) } }) From fb716862a994d30c17ee92d97ed74e7f7f31e290 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Thu, 8 Jan 2026 19:05:11 -0500 Subject: [PATCH 39/67] #42 reorg relocate test --- tests/testthat/test-EventStudyFEOLS.R | 64 ++++++++++++++++++++++++++- tests/testthat/test-EventStudyFHS.R | 61 ------------------------- 2 files changed, 63 insertions(+), 62 deletions(-) diff --git a/tests/testthat/test-EventStudyFEOLS.R b/tests/testthat/test-EventStudyFEOLS.R index 97ffd2a4..996c8275 100644 --- a/tests/testthat/test-EventStudyFEOLS.R +++ b/tests/testthat/test-EventStudyFEOLS.R @@ -316,7 +316,7 @@ test_that("FE = FALSE, }) -test_that("Coefficients and Standard Errors agree with STATA", { +test_that("OLS coefficients and Standard Errors agree with STATA", { df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv") @@ -383,3 +383,65 @@ test_that("Coefficients and Standard Errors agree with STATA", { } }) + + +test_that("FHS coefficients and Standard Errors agree with base STATA", { + + bools <- c("TTT", "TFT", "FTF", "FTT", "FFF", "FFT") + + for (i in 1:length(bools)) { + bool <- bools[i] + estimator <- "FHS" + outcomevar <- "y_base" + str_policy_vars <- c("z_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") + controls <- "x_r" + proxy <- "eta_m" + proxyIV <- "z_fd_lead3" + + idvar <- "id" + timevar <- "t" + + FE <- as.logical(substring(bool, 1, 1)) + TFE <- as.logical(substring(bool, 2, 2)) + cluster <- as.logical(substring(bool, 3, 3)) + + # Prepare the model formula for feols_FHS + formula <- PrepareModelFormula(estimator = "FHS", outcomevar, str_policy_vars, + static = FALSE, controls = controls, proxy = proxy, proxyIV = proxyIV, + kernel = "fixest", idvar = idvar, timevar = timevar, + FE = FE, TFE = TFE) + + reg <- EventStudyFEOLS_FHS(formula, df_EventStudyFHS_example, idvar, timevar, FE, TFE, cluster) + + df_test_STATA <- read.csv("./input/df_test_base_STATA_FHS.csv") + epsilon <- 10e-6 + epsilon_se <- 10e-2 # More lenient tolerance for standard errors + + # Define coefficient mappings: R_name -> STATA_name + coef_mappings <- list( + "z_fd" = "z_fd", + "z_fd_lead2" = "z_fd_lead2", + "fit_eta_m" = "eta_m", + "z_fd_lag1" = "z_fd_lag1", + "z_fd_lag2" = "z_fd_lag2", + "z_lead3" = "z_lead3", + "z_lag3" = "z_lag3", + "x_r" = "x_r" + ) + + # Test coefficients + for (r_name in names(coef_mappings)) { + stata_name <- coef_mappings[[r_name]] + expected <- df_test_STATA[df_test_STATA[1] == stata_name,][[2*i]] + if (r_name == "z_lead3") expected <- expected * (-1) # STATA sign convention + expect_equal(unname(coef(reg)[r_name]), expected, tolerance = epsilon) + } + + # Test standard errors + for (r_name in names(coef_mappings)) { + stata_name <- coef_mappings[[r_name]] + expected <- df_test_STATA[df_test_STATA[1] == stata_name,][[2*i+1]] + expect_equal(unname(fixest::se(reg)[r_name]), expected, tolerance = epsilon_se) + } + } +}) diff --git a/tests/testthat/test-EventStudyFHS.R b/tests/testthat/test-EventStudyFHS.R index 46ba7c96..e3e2fe4d 100644 --- a/tests/testthat/test-EventStudyFHS.R +++ b/tests/testthat/test-EventStudyFHS.R @@ -311,67 +311,6 @@ test_that("FE = FALSE, }) -test_that("feols_FHS Coefficients and Standard Errors agree with base STATA", { - - bools <- c("TTT", "TFT", "FTF", "FTT", "FFF", "FFT") - - for (i in 1:length(bools)) { - bool <- bools[i] - estimator <- "feols_FHS" - outcomevar <- "y_base" - str_policy_vars <- c("z_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") - controls <- "x_r" - proxy <- "eta_m" - proxyIV <- "z_fd_lead3" - - idvar <- "id" - timevar <- "t" - - FE <- as.logical(substring(bool, 1, 1)) - TFE <- as.logical(substring(bool, 2, 2)) - cluster <- as.logical(substring(bool, 3, 3)) - - # Prepare the model formula for feols_FHS - formula <- PrepareModelFormula(estimator = "FHS", outcomevar, str_policy_vars, - static = FALSE, controls = controls, proxy = proxy, proxyIV = proxyIV, - kernel = "fixest", idvar = idvar, timevar = timevar, - FE = FE, TFE = TFE) - - reg <- EventStudyFEOLS_FHS(formula, df_EventStudyFHS_example, idvar, timevar, FE, TFE, cluster) - - df_test_STATA <- read.csv("./input/df_test_base_STATA_FHS.csv") - epsilon <- 10e-6 - epsilon_se <- 10e-2 # More lenient tolerance for standard errors - - # Define coefficient mappings: R_name -> STATA_name - coef_mappings <- list( - "z_fd" = "z_fd", - "z_fd_lead2" = "z_fd_lead2", - "fit_eta_m" = "eta_m", - "z_fd_lag1" = "z_fd_lag1", - "z_fd_lag2" = "z_fd_lag2", - "z_lead3" = "z_lead3", - "z_lag3" = "z_lag3", - "x_r" = "x_r" - ) - - # Test coefficients - for (r_name in names(coef_mappings)) { - stata_name <- coef_mappings[[r_name]] - expected <- df_test_STATA[df_test_STATA[1] == stata_name,][[2*i]] - if (r_name == "z_lead3") expected <- expected * (-1) # STATA sign convention - expect_equal(unname(coef(reg)[r_name]), expected, tolerance = epsilon) - } - - # Test standard errors - for (r_name in names(coef_mappings)) { - stata_name <- coef_mappings[[r_name]] - expected <- df_test_STATA[df_test_STATA[1] == stata_name,][[2*i+1]] - expect_equal(unname(fixest::se(reg)[r_name]), expected, tolerance = epsilon_se) - } - } -}) - test_that("Coefficients and Standard Errors agree with base STATA", { bools <- c("TTT", "TFT", "FTF", "FTT", "FFF", "FFT") From ccfd73cd42f3a4b42991183747d7c703ddb6e7f2 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Thu, 8 Jan 2026 19:19:18 -0500 Subject: [PATCH 40/67] #42 switch to relative tolerance for FHS --- tests/testthat/test-EventStudyFEOLS.R | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/testthat/test-EventStudyFEOLS.R b/tests/testthat/test-EventStudyFEOLS.R index 996c8275..8a838f5a 100644 --- a/tests/testthat/test-EventStudyFEOLS.R +++ b/tests/testthat/test-EventStudyFEOLS.R @@ -405,7 +405,6 @@ test_that("FHS coefficients and Standard Errors agree with base STATA", { TFE <- as.logical(substring(bool, 2, 2)) cluster <- as.logical(substring(bool, 3, 3)) - # Prepare the model formula for feols_FHS formula <- PrepareModelFormula(estimator = "FHS", outcomevar, str_policy_vars, static = FALSE, controls = controls, proxy = proxy, proxyIV = proxyIV, kernel = "fixest", idvar = idvar, timevar = timevar, @@ -414,10 +413,8 @@ test_that("FHS coefficients and Standard Errors agree with base STATA", { reg <- EventStudyFEOLS_FHS(formula, df_EventStudyFHS_example, idvar, timevar, FE, TFE, cluster) df_test_STATA <- read.csv("./input/df_test_base_STATA_FHS.csv") - epsilon <- 10e-6 - epsilon_se <- 10e-2 # More lenient tolerance for standard errors + epsilon <- 10e-4 - # Define coefficient mappings: R_name -> STATA_name coef_mappings <- list( "z_fd" = "z_fd", "z_fd_lead2" = "z_fd_lead2", @@ -434,14 +431,16 @@ test_that("FHS coefficients and Standard Errors agree with base STATA", { stata_name <- coef_mappings[[r_name]] expected <- df_test_STATA[df_test_STATA[1] == stata_name,][[2*i]] if (r_name == "z_lead3") expected <- expected * (-1) # STATA sign convention - expect_equal(unname(coef(reg)[r_name]), expected, tolerance = epsilon) + tolerance <- abs(expected) * epsilon + expect_equal(unname(coef(reg)[r_name]), expected, tolerance = tolerance) } # Test standard errors for (r_name in names(coef_mappings)) { stata_name <- coef_mappings[[r_name]] expected <- df_test_STATA[df_test_STATA[1] == stata_name,][[2*i+1]] - expect_equal(unname(fixest::se(reg)[r_name]), expected, tolerance = epsilon_se) + tolerance <- abs(expected) * epsilon + expect_equal(unname(fixest::se(reg)[r_name]), expected, tolerance = tolerance) } } }) From b7f23d388cb704aee6ae03692c2d7c809eea5944 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Thu, 8 Jan 2026 21:11:25 -0500 Subject: [PATCH 41/67] #42 fx iid -> HC1 for unclustered case --- R/EventStudyFHS.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/EventStudyFHS.R b/R/EventStudyFHS.R index 596feeb4..25e5b706 100644 --- a/R/EventStudyFHS.R +++ b/R/EventStudyFHS.R @@ -189,7 +189,7 @@ EventStudyFEOLS_FHS <- function(formula, prepared_data, fhs_output <- fixest::feols( fml = formula, data = prepared_data, - vcov = "iid" + vcov = "HC1" ) } else if ((!FE) & TFE & cluster) { @@ -205,7 +205,7 @@ EventStudyFEOLS_FHS <- function(formula, prepared_data, fhs_output <- fixest::feols( fml = formula, data = prepared_data, - vcov = "iid" + vcov = "HC1" ) } else if ((!FE) & (!TFE) & cluster) { From c3352cbe341da279eb6e3c77fce46da3e6f55aa4 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Thu, 8 Jan 2026 21:29:33 -0500 Subject: [PATCH 42/67] #42 cl anes --- R/EventStudy.R | 2 +- tests/testthat/test-TestLinear.R | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/EventStudy.R b/R/EventStudy.R index 413ca4f2..0c921385 100644 --- a/R/EventStudy.R +++ b/R/EventStudy.R @@ -3,7 +3,6 @@ #' @description `EventStudy` uses regression methods to estimate the effect of a policy on a given outcome. #' #' @param estimator Accepts one of "OLS" or "FHS". If "OLS" is specified, implements Ordinary Least Squares. If "FHS" is specified, implements Instrumental Variables (IV) estimator proposed in [Freyaldenhoven Hansen Shapiro (FHS, 2019)](https://www.aeaweb.org/articles?id=10.1257/aer.20180609). -#' @param kernel Accepts one of "estimatr" or "fixest". If "estimatr" is specified, uses the estimatr package for estimation. If "fixest" is specified, uses the fixest package for estimation. Defaults to "estimatr". #' @param data Data frame containing the variables of interest. #' @param outcomevar Character indicating column of outcome variable y. #' @param policyvar Character indicating column of policy variable z. @@ -35,6 +34,7 @@ #' when there are anticipation effects. If set to FALSE, does not make the switch. Defaults to TRUE. #' @param allow_duplicate_id If TRUE, the function estimates a regression where duplicated ID-time rows are weighted by their duplication count. If FALSE, the function raises an error if duplicate unit-time keys exist in the input data. Default is FALSE. #' @param avoid_internal_copy If TRUE, the function avoids making an internal deep copy of the input data, and instead directly modifies the input data.table. Default is FALSE. +#' @param kernel Accepts one of "estimatr" or "fixest". If "estimatr" is specified, uses the estimatr package for estimation. If "fixest" is specified, uses the fixest package for estimation. Defaults to "estimatr". #' #' @return A list that contains, under "output", the estimation output as an lm_robust object, and under "arguments", the arguments passed to the function. #' @import dplyr diff --git a/tests/testthat/test-TestLinear.R b/tests/testthat/test-TestLinear.R index 971cadab..d78b5981 100644 --- a/tests/testthat/test-TestLinear.R +++ b/tests/testthat/test-TestLinear.R @@ -1,5 +1,5 @@ test_that("correctly recognizes wrong variable type for estimate argument", { - estimate <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + estimate <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) @@ -14,7 +14,7 @@ test_that("correctly recognizes wrong variable type for estimate argument", { }) test_that("correctly recognizes wrong variable type for pretrends", { - estimate <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + estimate <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) @@ -24,7 +24,7 @@ test_that("correctly recognizes wrong variable type for pretrends", { }) test_that("correctly recognizes wrong variable type for leveling_off", { - estimate <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + estimate <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) @@ -34,7 +34,7 @@ test_that("correctly recognizes wrong variable type for leveling_off", { }) test_that("produces only functions that are specified", { - estimate <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + estimate <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) @@ -80,7 +80,7 @@ test_that("produces only functions that are specified", { test_that("checks equality with STATA", { estimate <- suppressWarnings( - EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", FE = TRUE, TFE = TRUE, post = 2, pre = 2, overidpre = 2, From 6961e567f1ac7d1f5cd25059c3c70db91fe8f74e Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Fri, 9 Jan 2026 15:56:17 -0500 Subject: [PATCH 43/67] #42 cl --- R/EventStudyPlot.R | 9 ++--- R/PrepareModelFormula.R | 4 +-- README.md | 4 +-- tests/testthat/test-AddCIs.R | 12 +++---- tests/testthat/test-AddMeans.R | 4 +-- tests/testthat/test-AddSuptBand.R | 4 +-- tests/testthat/test-EventStudy.R | 40 +++++++++++------------ tests/testthat/test-EventStudyPlot.R | 24 +++++++------- tests/testthat/test-PreparePlottingData.R | 16 ++++----- vignettes/documentation.Rmd | 18 +++++----- 10 files changed, 65 insertions(+), 70 deletions(-) diff --git a/R/EventStudyPlot.R b/R/EventStudyPlot.R index 9f15901b..76dbeca1 100644 --- a/R/EventStudyPlot.R +++ b/R/EventStudyPlot.R @@ -123,12 +123,8 @@ EventStudyPlot <- function(estimates, # Estimation Elements ----------------------------------------------------- model_estimates <- estimates$output - model_type <- class(model_estimates) - if (model_type == "fixest") { - model_estimates_tidy <- broom::tidy(model_estimates) - } else { - model_estimates_tidy <- estimatr::tidy(model_estimates) - } + is_fixest <- class(model_estimates) == "fixest" + model_estimates_tidy <- if(is_fixest) {broom::tidy(model_estimates)} else {estimatr::tidy(model_estimates)} static_model <- length(coef(model_estimates)) == 1 if (static_model) { @@ -272,7 +268,6 @@ EventStudyPlot <- function(estimates, coefficients <- df_plt$estimate # Add column and row in matrix of coefficients in index of norm columns - is_fixest <- class(model_estimates) == "fixest" vcov <- if(is_fixest) {fixest::vcov(estimates$output)} else {estimates$output$vcov} covar <- AddZerosCovar( vcov, diff --git a/R/PrepareModelFormula.R b/R/PrepareModelFormula.R index aff72d37..37c6016c 100644 --- a/R/PrepareModelFormula.R +++ b/R/PrepareModelFormula.R @@ -13,8 +13,8 @@ #' @param kernel Character indicating the estimation kernel. Accepts "estimatr" or "fixest". Defaults to "estimatr". #' @param idvar Character indicating the identifier variable for fixed effects. Required when kernel = "fixest". #' @param timevar Character indicating the time variable for fixed effects. Required when kernel = "fixest". -#' @param FE Logical indicating whether to include unit fixed effects. Defaults to FALSE. -#' @param TFE Logical indicating whether to include time fixed effects. Defaults to FALSE. +#' @param FE Logical indicating whether to include unit fixed effects. Required when kernel = "fixest". Defaults to FALSE. +#' @param TFE Logical indicating whether to include time fixed effects. Required when kernel = "fixest". Defaults to FALSE. #' @return A formula object to be passed to EventStudy #' #' @importFrom stats reformulate as.formula diff --git a/README.md b/README.md index fe928ee8..1b4e48a1 100644 --- a/README.md +++ b/README.md @@ -44,14 +44,14 @@ set.seed(10) # for reproducibility of sup-t bands estimates_ols <- EventStudy( estimator = "OLS", - kernel = "fixest", data = example_data, # Use package sample data outcomevar = "y_smooth_m", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", - pre = 0, post = 4 + pre = 0, post = 4, + kernel = "fixest" ) plt <- EventStudyPlot(estimates = estimates_ols) diff --git a/tests/testthat/test-AddCIs.R b/tests/testthat/test-AddCIs.R index 855292cb..52ddcbdd 100644 --- a/tests/testthat/test-AddCIs.R +++ b/tests/testthat/test-AddCIs.R @@ -16,7 +16,7 @@ test_that("correctly calculates conf_level at 0.95", { # OLS ------------------------------------------ test_that("correctly recognizes wrong class for estimate argument", { - estimates <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) @@ -28,7 +28,7 @@ test_that("correctly recognizes wrong class for estimate argument", { }) test_that("correctly recognizes missing columns in estimates argument", { - estimates <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) @@ -47,7 +47,7 @@ test_that("correctly recognizes missing columns in estimates argument", { }) test_that("correctly recognizes wrong inputs for conf_level argument", { - estimates <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) @@ -65,7 +65,7 @@ test_that("correctly recognizes wrong inputs for conf_level argument", { test_that("correctly recognizes wrong class for estimate argument", { data <- example_data[, c("y_base", "z", "id", "t", "x_r", "eta_m")] - estimates <- EventStudy(estimator = "FHS", kernel = "estimatr", data = data, outcomevar = "y_base", policyvar = "z", idvar = "id", + estimates <- EventStudy(estimator = "FHS", data = data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", proxy = "eta_m", FE = TRUE, TFE = TRUE, post = 1, overidpost = 2, pre = 1, overidpre = 2, normalize = -1, cluster = TRUE, anticipation_effects_normalization = FALSE) @@ -76,7 +76,7 @@ test_that("correctly recognizes wrong class for estimate argument", { test_that("correctly recognizes missing columns in estimates argument", { data <- example_data[, c("y_base", "z", "id", "t", "x_r", "eta_m")] - estimates <- EventStudy(estimator = "FHS", kernel = "estimatr", data = data, outcomevar = "y_base", policyvar = "z", idvar = "id", + estimates <- EventStudy(estimator = "FHS", data = data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", proxy = "eta_m", FE = TRUE, TFE = TRUE, post = 1, overidpost = 2, pre = 1, overidpre = 2, normalize = -1, cluster = TRUE, anticipation_effects_normalization = FALSE) @@ -95,7 +95,7 @@ test_that("correctly recognizes missing columns in estimates argument", { test_that("correctly recognizes wrong inputs for conf_level argument", { data <- example_data[, c("y_base", "z", "id", "t", "x_r", "eta_m")] - estimates <- EventStudy(estimator = "FHS", kernel = "estimatr", data = data, outcomevar = "y_base", policyvar = "z", idvar = "id", + estimates <- EventStudy(estimator = "FHS", data = data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", proxy = "eta_m", FE = TRUE, TFE = TRUE, post = 1, overidpost = 2, pre = 1, overidpre = 2, normalize = -1, cluster = TRUE, anticipation_effects_normalization = FALSE) diff --git a/tests/testthat/test-AddMeans.R b/tests/testthat/test-AddMeans.R index 5a27cebf..b19d6e56 100644 --- a/tests/testthat/test-AddMeans.R +++ b/tests/testthat/test-AddMeans.R @@ -1,6 +1,6 @@ test_that("means are computed correctly when a first differenced variable is normalized", { suppressWarnings( - sample_estimation <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, + sample_estimation <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", FE = TRUE, TFE = TRUE, post = 2, pre = 2, overidpre = 2, @@ -20,7 +20,7 @@ test_that("means are computed correctly when a first differenced variable is nor }) test_that("means are computed correctly when the furthest lead is normalized", { - sample_estimation <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, + sample_estimation <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", FE = TRUE, TFE = TRUE, post = 2, pre = 2, overidpre = 2, diff --git a/tests/testthat/test-AddSuptBand.R b/tests/testthat/test-AddSuptBand.R index 36fa11d9..fae6e338 100644 --- a/tests/testthat/test-AddSuptBand.R +++ b/tests/testthat/test-AddSuptBand.R @@ -2,7 +2,7 @@ test_that("check that the correct part of vcov matrix is used", { suppressWarnings( - sample_estimation <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, + sample_estimation <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", FE = TRUE, control = "x_r", TFE = TRUE, post = 2, pre = 2, overidpre = 2, @@ -24,7 +24,7 @@ test_that("check that the correct part of vcov matrix is used", { test_that("check that the correct part of vcov matrix is used", { data <- example_data[, c("y_base", "z", "id", "t", "x_r", "eta_m")] suppressWarnings( - sample_estimation <- EventStudy(estimator = "FHS", kernel = "estimatr", data = data, outcomevar = "y_base", policyvar = "z", idvar = "id", + sample_estimation <- EventStudy(estimator = "FHS", data = data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", proxy = "eta_m", FE = TRUE, TFE = TRUE, post = 1, overidpost = 2, pre = 1, overidpre = 2, normalize = -1, cluster = TRUE, anticipation_effects_normalization = TRUE) ) diff --git a/tests/testthat/test-EventStudy.R b/tests/testthat/test-EventStudy.R index f7ce6d3c..1fa46b32 100644 --- a/tests/testthat/test-EventStudy.R +++ b/tests/testthat/test-EventStudy.R @@ -51,7 +51,7 @@ test_that("correctly creates highest order shiftvalues", { overidpost <- 11 outputs <- suppressWarnings( - EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, normalize = - 1, cluster = TRUE, anticipation_effects_normalization = TRUE) @@ -77,7 +77,7 @@ test_that("correctly throws an error when normalized coefficient is outside even overidpost <- 7 normalize <- 15 - expect_error(EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + expect_error(EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -92,7 +92,7 @@ test_that("throws an error when post + pre + overidpre + overidpost exceeds the overidpost <- 25 normalize <- 2 - expect_error(EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + expect_error(EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -107,7 +107,7 @@ test_that("removes the correct column when normalize < 0", { overidpost <- 7 normalize <- -2 - outputs <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + outputs <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -131,7 +131,7 @@ test_that("removes the correct column when normalize = 0", { overidpost <- 7 normalize <- 0 - outputs <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + outputs <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -153,7 +153,7 @@ test_that("does not create a first differenced variable when post, overidpost, p overidpost <- 0 normalize <- -1 - outputs <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + outputs <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -172,7 +172,7 @@ test_that("tests that package and STATA output agree when post, overidpost, pre, overidpost <- 0 normalize <- -1 - outputs <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + outputs <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -198,7 +198,7 @@ test_that("does not create shiftvalues of differenced variable when post + overi overidpost <- 0 normalize <- -1 - outputs <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + outputs <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -262,7 +262,7 @@ test_that("removes the correct column when normalize = - (pre + overidpre + 1)", overidpost <- 4 normalize <- -4 - outputs <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + outputs <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -283,7 +283,7 @@ test_that("removes the correct column when normalize = post + overidpost", { overidpost <- 4 normalize <- 5 - outputs <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + outputs <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -363,7 +363,7 @@ test_that("correctly creates highest order leads and shiftvalues", { overidpost <- 11 outputs <- suppressWarnings( - EventStudy(estimator = "FHS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, proxy = "eta_m", post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, normalize = - 1, cluster = TRUE, anticipation_effects_normalization = TRUE) @@ -389,7 +389,7 @@ test_that("correctly throws an error when normalized coefficient is outside even overidpost <- 7 normalize <- 15 - expect_error(EventStudy(estimator = "FHS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + expect_error(EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, proxy = "eta_m", post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -404,7 +404,7 @@ test_that("throws an error when post + pre + overidpre + overidpost exceeds the overidpost <- 25 normalize <- 2 - expect_error(EventStudy(estimator = "FHS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + expect_error(EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, proxy = "eta_m", post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -419,7 +419,7 @@ test_that("removes the correct column when normalize < 0", { overidpost <- 7 normalize <- -2 - outputs <- EventStudy(estimator = "FHS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + outputs <- EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, proxy = "eta_m", post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -443,7 +443,7 @@ test_that("removes the correct column when normalize = 0", { overidpost <- 7 normalize <- 0 - outputs <- EventStudy(estimator = "FHS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + outputs <- EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, proxy = "eta_m", post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -505,7 +505,7 @@ test_that("removes the correct column when normalize = - (pre + overidpre + 1)", overidpost <- 4 normalize <- -4 - outputs <- EventStudy(estimator = "FHS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + outputs <- EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, proxy = "eta_m", post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -526,7 +526,7 @@ test_that("removes the correct column when normalize = post + overidpost", { overidpost <- 4 normalize <- 5 - outputs <- EventStudy(estimator = "FHS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + outputs <- EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, "eta_m", post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -543,7 +543,7 @@ test_that("proxyIV selection works", { expect_message( suppressWarnings( - EventStudy(estimator = "FHS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", + EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", proxy = "eta_m", FE = TRUE, TFE = TRUE, post = 2, overidpost = 2, pre = 1, overidpre = 2, normalize = -1, cluster = TRUE, anticipation_effects_normalization = TRUE) ), @@ -552,7 +552,7 @@ test_that("proxyIV selection works", { expect_message( suppressWarnings( - EventStudy(estimator = "FHS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", + EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", proxy = "eta_m", FE = TRUE, TFE = TRUE, post = 1, overidpost = 2, pre = 2, overidpre = 2, normalize = -1, cluster = TRUE, anticipation_effects_normalization = TRUE) ), @@ -561,7 +561,7 @@ test_that("proxyIV selection works", { expect_message( suppressWarnings( - EventStudy(estimator = "FHS", kernel = "estimatr", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", + EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", proxy = "eta_m", FE = TRUE, TFE = TRUE, post = 1, overidpost = 2, pre = 6, overidpre = 2, normalize = -1, cluster = TRUE, anticipation_effects_normalization = TRUE) ), diff --git a/tests/testthat/test-EventStudyPlot.R b/tests/testthat/test-EventStudyPlot.R index 1f0a8735..0850032b 100644 --- a/tests/testthat/test-EventStudyPlot.R +++ b/tests/testthat/test-EventStudyPlot.R @@ -6,11 +6,11 @@ if ("get_labs" %in% getNamespaceExports("ggplot2")) { test_that("Dimension of OLS and FHS estimation output is the same", { - estimates_ols <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_smooth_m", + estimates_ols <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_smooth_m", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) - estimates_fhs <- EventStudy(estimator = "FHS", kernel = "estimatr", data = example_data, outcomevar = "y_smooth_m", + estimates_fhs <- EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_smooth_m", policyvar = "z", idvar = "id", timevar = "t", proxy = "eta_r", controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) @@ -33,7 +33,7 @@ test_that("Dimension of OLS and FHS estimation output is the same", { test_that("correctly changes x-axis and y-axis labels", { - estimates <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) @@ -50,7 +50,7 @@ test_that("correctly changes x-axis and y-axis labels", { test_that("x- and y-axis breaks and limits are correct", { - estimates = EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + estimates = EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) @@ -76,7 +76,7 @@ test_that("x- and y-axis breaks and limits are correct", { test_that("correctly adds mean of outcome var", { - estimates = EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + estimates = EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) @@ -101,7 +101,7 @@ test_that("correctly adds mean of outcome var", { test_that("sup-t bands are appropriately present or absent", { - estimates <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) @@ -120,7 +120,7 @@ test_that("sup-t bands are appropriately present or absent", { test_that("confidence intervals are appropriately present or absent", { - estimates <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) @@ -138,7 +138,7 @@ test_that("confidence intervals are appropriately present or absent", { test_that("Preevent Coeffs and Postevent Coeffs are appropriately present or absent", { - estimates <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) @@ -197,7 +197,7 @@ test_that("Preevent Coeffs and Postevent Coeffs are appropriately present or abs test_that("Sup-t bands are wider than confidence intervals", { - estimates <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) @@ -220,7 +220,7 @@ test_that("Sup-t bands are wider than confidence intervals", { test_that("computed smoothest path for examples is within expectations", { - estimates <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) @@ -229,7 +229,7 @@ test_that("computed smoothest path for examples is within expectations", { expect_equal(p$data$smoothest_path, matrix(rep(0, nrow(p$data)))) - estimates <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_smooth_m", + estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_smooth_m", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = -3) @@ -255,7 +255,7 @@ test_that("computed smoothest path for examples is within expectations", { test_that("computed smoothest path for FHS has at least two coefficients almost equal to zero", { - estimates <- EventStudy(estimator = "FHS", kernel = "estimatr", data = example_data, outcomevar = "y_jump_m", + estimates <- EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_jump_m", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", proxy = "eta_r", post = 3, pre = 0, overidpre = 3, overidpost = 1, normalize = -1, proxyIV = "z_fd_lead3") diff --git a/tests/testthat/test-PreparePlottingData.R b/tests/testthat/test-PreparePlottingData.R index defb29d1..4cc0fec0 100644 --- a/tests/testthat/test-PreparePlottingData.R +++ b/tests/testthat/test-PreparePlottingData.R @@ -4,7 +4,7 @@ test_that("labels are unique", { - list_EventStudy <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + list_EventStudy <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) @@ -32,7 +32,7 @@ test_that("labels are unique", { test_that("the correct labels are created", { - list_EventStudy <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + list_EventStudy <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) @@ -59,7 +59,7 @@ test_that("the correct labels are created", { test_that("the labels are ordered correctly", { - list_EventStudy <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + list_EventStudy <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) @@ -86,7 +86,7 @@ test_that("the labels are ordered correctly", { test_that("the control variable is removed", { - list_EventStudy <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + list_EventStudy <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) @@ -113,7 +113,7 @@ test_that("the control variable is removed", { test_that("the largest lag label is correctly created", { - list_EventStudy <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + list_EventStudy <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) @@ -142,7 +142,7 @@ test_that("the largest lag label is correctly created", { test_that("the largest lead label is correctly created", { - list_EventStudy <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + list_EventStudy <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) @@ -171,7 +171,7 @@ test_that("the largest lead label is correctly created", { test_that("all columns besides 'term' and 'label' are 0 for the normalization column", { - list_EventStudy <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + list_EventStudy <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) @@ -206,7 +206,7 @@ test_that("all columns besides 'term' and 'label' are 0 for the normalization co test_that("all columns besides 'term' and 'label' are 0 for the proxyIV column", { - list_EventStudy <- EventStudy(estimator = "FHS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + list_EventStudy <- EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, proxy = "eta_m", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) diff --git a/vignettes/documentation.Rmd b/vignettes/documentation.Rmd index 2e6b4447..0e906c36 100644 --- a/vignettes/documentation.Rmd +++ b/vignettes/documentation.Rmd @@ -49,24 +49,24 @@ head(example_data) Here is an example using the sample data: ```{r Basic Eventstudy Example - Show Code, eval = FALSE} -results <- EventStudy(estimator = "OLS", kernel = "estimatr", +results <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_jump_m", policyvar = "z", idvar = "id", timevar = "t", - post = 3, - pre = 0) + post = 3, pre = 0, + kernel = "estimatr") ``` ```{r Basic Eventstudy Example - Run Code, echo = FALSE} -results <- EventStudy(estimator = "OLS", kernel = "estimatr", +results <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_jump_m", policyvar = "z", idvar = "id", timevar = "t", - post = 3, - pre = 0) + post = 3, pre = 0, + kernel = "estimatr") ``` ```{r Basic Eventstudy Example - Show Results 1, echo=TRUE, eval=TRUE} summary(results$output) @@ -120,14 +120,14 @@ summary(results$output) This function is designed to use the output of the `EventStudy()` and returns a ggplot object. Here is an example of using the function with some default settings: ```{r EventStudyPlot example 1, fig.dim = c(7, 5)} -eventstudy_estimates_ols <- EventStudy(estimator = "OLS", kernel = "estimatr", +eventstudy_estimates_ols <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_jump_m", policyvar = "z", idvar = "id", timevar = "t", - post = 3, - pre = 0) + post = 3, pre = 0, + kernel = "estimatr") EventStudyPlot(estimates = eventstudy_estimates_ols, xtitle = "Event time", From 703861941b50a9483e49490052eea069e1c353f2 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Fri, 9 Jan 2026 16:29:35 -0500 Subject: [PATCH 44/67] #42 cl --- tests/testthat/test-EventStudy.R | 10 +++++----- vignettes/documentation.Rmd | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/testthat/test-EventStudy.R b/tests/testthat/test-EventStudy.R index 1fa46b32..90f3a5a1 100644 --- a/tests/testthat/test-EventStudy.R +++ b/tests/testthat/test-EventStudy.R @@ -219,7 +219,7 @@ test_that("does not create leads of differenced variable when pre + overidpre < overidpost <- 0 normalize <- -1 - outputs <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + outputs <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -240,7 +240,7 @@ test_that("removes the correct column when normalize > 0", { overidpost <- 7 normalize <- 2 - outputs <- EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + outputs <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -467,7 +467,7 @@ test_that("FHS does not run when post, pre, overidpre, and overidpost are all 0" expect_error( outputs <- - EventStudy(estimator = "FHS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, proxy = "eta_m", post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -483,7 +483,7 @@ test_that("removes the correct column when normalize > 0", { overidpost <- 7 normalize <- 2 - outputs <- EventStudy(estimator = "FHS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + outputs <- EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, proxy = "eta_m", post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, @@ -618,7 +618,7 @@ test_that("feols_FHS yields the same results as FHS", { test_that("warning with correct normalize and pre is thrown when anticpation effects are allowed and anticipation_effects_normalization is TRUE", { expect_warning( - EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", + EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", FE = TRUE, TFE = TRUE, post = 1, pre = 1, overidpre = 4, overidpost = 5, normalize = - 1, cluster = TRUE, anticipation_effects_normalization = TRUE), diff --git a/vignettes/documentation.Rmd b/vignettes/documentation.Rmd index 0e906c36..71002d40 100644 --- a/vignettes/documentation.Rmd +++ b/vignettes/documentation.Rmd @@ -56,7 +56,7 @@ results <- EventStudy(estimator = "OLS", idvar = "id", timevar = "t", post = 3, pre = 0, - kernel = "estimatr") + kernel = "fixest") ``` ```{r Basic Eventstudy Example - Run Code, echo = FALSE} results <- EventStudy(estimator = "OLS", @@ -66,7 +66,7 @@ results <- EventStudy(estimator = "OLS", idvar = "id", timevar = "t", post = 3, pre = 0, - kernel = "estimatr") + kernel = "fixest") ``` ```{r Basic Eventstudy Example - Show Results 1, echo=TRUE, eval=TRUE} summary(results$output) @@ -127,7 +127,7 @@ eventstudy_estimates_ols <- EventStudy(estimator = "OLS", idvar = "id", timevar = "t", post = 3, pre = 0, - kernel = "estimatr") + kernel = "fixest") EventStudyPlot(estimates = eventstudy_estimates_ols, xtitle = "Event time", From ddec6eb59c5af6dbc2f15a6a60d5179dff7f14ca Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Fri, 9 Jan 2026 16:41:17 -0500 Subject: [PATCH 45/67] #42 cl combine tests --- tests/testthat/test-EventStudy.R | 50 ++++++++++++-------------------- 1 file changed, 18 insertions(+), 32 deletions(-) diff --git a/tests/testthat/test-EventStudy.R b/tests/testthat/test-EventStudy.R index 90f3a5a1..5235b23c 100644 --- a/tests/testthat/test-EventStudy.R +++ b/tests/testthat/test-EventStudy.R @@ -172,24 +172,36 @@ test_that("tests that package and STATA output agree when post, overidpost, pre, overidpost <- 0 normalize <- -1 - outputs <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + outputs_fixest <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", policyvar = "z", idvar = "id", timevar = "t", FE = TRUE, TFE = TRUE, post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, - normalize = normalize, cluster = TRUE, anticipation_effects_normalization = TRUE) + normalize = normalize, cluster = TRUE, anticipation_effects_normalization = TRUE, + kernel = "fixest") + outputs_estimatr <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", + policyvar = "z", idvar = "id", timevar = "t", + FE = TRUE, TFE = TRUE, + post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, + normalize = normalize, cluster = TRUE, anticipation_effects_normalization = TRUE, + kernel = "estimatr") - coef_package <- outputs$output$coefficients[[1]] - std_package <- outputs$output$std.error[[1]] + coef_fixest <- coef(outputs_fixest$output)[[1]] + std_fixest <- fixest::se(outputs_fixest$output)[[1]] + coef_estimatr <- outputs_estimatr$output$coefficients[[1]] + std_estimatr <- outputs_estimatr$output$std.error[[1]] STATA_output <- read.csv('./input/df_test_base_STATA_allzero.csv') coef_STATA <- STATA_output$coef[[1]] std_STATA <- STATA_output$std_error[[1]] epsilon <- 10e-7 - expect_equal(coef_package, coef_STATA, tolerance = epsilon) - expect_equal(std_package, std_STATA, tolerance = epsilon) + expect_equal(coef_fixest, coef_STATA, tolerance = epsilon) + expect_equal(std_fixest, std_STATA, tolerance = epsilon) + expect_equal(coef_estimatr, coef_STATA, tolerance = epsilon) + expect_equal(std_estimatr, std_STATA, tolerance = epsilon) }) + test_that("does not create shiftvalues of differenced variable when post + overidpost - 1 < 1", { post <- 1 @@ -298,32 +310,6 @@ test_that("removes the correct column when normalize = post + overidpost", { # feols --------------------------------------------------------------------- -test_that("tests that package and STATA output agree when post, overidpost, pre, overidpre are zero", { - - post <- 0 - pre <- 0 - overidpre <- 0 - overidpost <- 0 - normalize <- -1 - - outputs <- EventStudy(estimator = "OLS", kernel = "fixest", data = example_data, outcomevar = "y_base", - policyvar = "z", idvar = "id", timevar = "t", - FE = TRUE, TFE = TRUE, - post = post, pre = pre, overidpre = overidpre, overidpost = overidpost, - normalize = normalize, cluster = TRUE, anticipation_effects_normalization = TRUE) - - coef_package <- coef(outputs$output)[[1]] - std_package <- fixest::se(outputs$output)[[1]] - - STATA_output <- read.csv('./input/df_test_base_STATA_allzero.csv') - coef_STATA <- STATA_output$coef[[1]] - std_STATA <- STATA_output$std_error[[1]] - - epsilon <- 10e-7 - expect_equal(coef_package, coef_STATA, tolerance = epsilon) - expect_equal(std_package, std_STATA, tolerance = epsilon) -}) - test_that("feols estimator coefficients match OLS coefficients", { outputs_ols <- suppressWarnings( From 7db8a911915ba1706de93b415a35526e2d592aad Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Fri, 9 Jan 2026 16:42:31 -0500 Subject: [PATCH 46/67] #42 cl drop redundant test --- tests/testthat/test-EventStudy.R | 31 ------------------------------- 1 file changed, 31 deletions(-) diff --git a/tests/testthat/test-EventStudy.R b/tests/testthat/test-EventStudy.R index 5235b23c..4ed3ace7 100644 --- a/tests/testthat/test-EventStudy.R +++ b/tests/testthat/test-EventStudy.R @@ -308,37 +308,6 @@ test_that("removes the correct column when normalize = post + overidpost", { expect_true(!normalization_column %in% shiftvalues) }) -# feols --------------------------------------------------------------------- - -test_that("feols estimator coefficients match OLS coefficients", { - - outputs_ols <- suppressWarnings( - EventStudy(estimator = "OLS", kernel = "estimatr", data = example_data, outcomevar = "y_base", - policyvar = "z", idvar = "id", timevar = "t", - controls = "x_r", FE = TRUE, TFE = TRUE, - post = 2, pre = 3, overidpre = 4, - overidpost = 11, normalize = - 1, - cluster = TRUE, anticipation_effects_normalization = TRUE) - ) - - outputs_feols <- suppressWarnings( - EventStudy(estimator = "OLS", kernel = "fixest", data = example_data, outcomevar = "y_base", - policyvar = "z", idvar = "id", timevar = "t", - controls = "x_r", FE = TRUE, TFE = TRUE, - post = 2, pre = 3, overidpre = 4, - overidpost = 11, normalize = - 1, - cluster = TRUE, anticipation_effects_normalization = TRUE) - ) - - coef_ols <- coef(outputs_ols$output) - se_ols <- outputs_ols$output$std.error - - coef_feols <- coef(outputs_feols$output) - se_feols <- fixest::se(outputs_feols$output) - - expect_true(all(abs(coef_feols - coef_ols) <= 1e-6 + 1e-6 * abs(coef_ols))) -}) - # FHS --------------------------------------------------------------------- test_that("correctly creates highest order leads and shiftvalues", { From 209e7e18cd7b2d87557ec519719f713ee156b9f6 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Fri, 9 Jan 2026 17:13:44 -0500 Subject: [PATCH 47/67] #42 cl combine tests --- tests/testthat/test-EventStudyFEOLS.R | 69 ------------------- tests/testthat/test-EventStudyOLS.R | 98 +++++++++++++++++++++------ 2 files changed, 78 insertions(+), 89 deletions(-) diff --git a/tests/testthat/test-EventStudyFEOLS.R b/tests/testthat/test-EventStudyFEOLS.R index 8a838f5a..6a1b3a76 100644 --- a/tests/testthat/test-EventStudyFEOLS.R +++ b/tests/testthat/test-EventStudyFEOLS.R @@ -316,75 +316,6 @@ test_that("FE = FALSE, }) -test_that("OLS coefficients and Standard Errors agree with STATA", { - - df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv") - - idvar <- "id" - timevar <- "t" - outcomevar <- "y_base" - str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") - controls <- "x_r" - - FE <- TRUE - TFE <- TRUE - cluster <- TRUE - - event_study_formula <- PrepareModelFormula(estimator = "OLS", outcomevar, str_policy_vars, - static = FALSE, controls = controls, - kernel = "fixest", idvar = idvar, timevar = timevar, - FE = FE, TFE = TFE) - - reg <- EventStudyFEOLS( - formula = event_study_formula, - prepared_data = df_test_EventStudyOLS, - idvar = idvar, - timevar = timevar, - FE = FE, - TFE = TFE, - cluster = cluster - ) - - df_test_STATA <- read.csv("./input/df_test_base_STATA.csv", col.names = c("term", "coef", "std_error")) - - coef_feols <- coef(reg) - se_feols <- fixest::se(reg) - - epsilon <- 1e-4 - - coef_mappings <- list( - "z_fd" = "zfd", - "z_fd_lead2" = "F2.zfd", - "z_fd_lead3" = "F3.zfd", - "z_fd_lag1" = "L.zfd", - "z_fd_lag2" = "L2.zfd", - "z_lead3" = "F3.z", - "z_lag3" = "L3.z", - "x_r" = "x_r" - ) - - # Test coefficients - for (r_name in names(coef_mappings)) { - stata_term <- coef_mappings[[r_name]] - expected <- df_test_STATA[df_test_STATA["term"] == stata_term, "coef"] - if (r_name == "z_lead3") expected <- -1 * expected # STATA sign convention - actual <- unname(coef_feols[r_name]) - tolerance <- abs(expected) * epsilon - expect_equal(actual, expected, tolerance = tolerance) - } - - # Test standard errors - for (r_name in names(coef_mappings)) { - stata_term <- coef_mappings[[r_name]] - expected <- df_test_STATA[df_test_STATA["term"] == stata_term, "std_error"] - actual <- unname(se_feols[r_name]) - tolerance <- abs(expected) * epsilon - expect_equal(actual, expected, tolerance = tolerance) - } - -}) - - test_that("FHS coefficients and Standard Errors agree with base STATA", { bools <- c("TTT", "TFT", "FTF", "FTT", "FFF", "FFT") diff --git a/tests/testthat/test-EventStudyOLS.R b/tests/testthat/test-EventStudyOLS.R index 265496fd..0aa893bc 100644 --- a/tests/testthat/test-EventStudyOLS.R +++ b/tests/testthat/test-EventStudyOLS.R @@ -333,7 +333,7 @@ test_that("Coefficients and Standard Errors agree with base STATA", { str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") controls <- "x_r" - event_study_formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars, + event_study_formula_estimatr <- PrepareModelFormula(estimator, outcomevar, str_policy_vars, static = F, controls = controls) idvar <- "id" @@ -343,8 +343,8 @@ test_that("Coefficients and Standard Errors agree with base STATA", { TFE <- TRUE cluster <- TRUE - reg <- EventStudyOLS( - prepared_model_formula = event_study_formula, + reg_estimatr <- EventStudyOLS( + prepared_model_formula = event_study_formula_estimatr, prepared_data = df_test_EventStudyOLS, idvar = idvar, timevar = timevar, @@ -357,21 +357,79 @@ test_that("Coefficients and Standard Errors agree with base STATA", { epsilon <- 10e-7 - expect_equal(unname(reg$coefficients["z_fd"]), df_test_STATA[df_test_STATA["term"] == "zfd",][[2]], tolerance = epsilon) - expect_equal(unname(reg$coefficients["z_fd_lead2"]), df_test_STATA[df_test_STATA["term"] == "F2.zfd",][[2]], tolerance = epsilon) - expect_equal(unname(reg$coefficients["z_fd_lead3"]), df_test_STATA[df_test_STATA["term"] == "F3.zfd",][["coef"]][1], tolerance = epsilon) - expect_equal(unname(reg$coefficients["z_fd_lag1"]), df_test_STATA[df_test_STATA["term"] == "L.zfd",][[2]], tolerance = epsilon) - expect_equal(unname(reg$coefficients["z_fd_lag2"]), df_test_STATA[df_test_STATA["term"] == "L2.zfd",][[2]], tolerance = epsilon) - expect_equal(unname(reg$coefficients["z_lead3"]), -1 * df_test_STATA[df_test_STATA["term"] == "F3.z",][["coef"]], tolerance = epsilon) - expect_equal(unname(reg$coefficients["z_lag3"]), df_test_STATA[df_test_STATA["term"] == "L3.z",][[2]], tolerance = epsilon) - expect_equal(unname(reg$coefficients["x_r"]), df_test_STATA[df_test_STATA["term"] == "x_r",][[2]], tolerance = epsilon) - - expect_equal(unname(reg$std.error["z_fd"]), df_test_STATA[df_test_STATA["term"] == "zfd",][[3]], tolerance = epsilon) - expect_equal(unname(reg$std.error["z_fd_lead2"]), df_test_STATA[df_test_STATA["term"] == "F2.zfd",][[3]], tolerance = epsilon) - expect_equal(unname(reg$std.error["z_fd_lead3"]), df_test_STATA[df_test_STATA["term"] == "F3.zfd",][["std_error"]][1], tolerance = epsilon) - expect_equal(unname(reg$std.error["z_fd_lag1"]), df_test_STATA[df_test_STATA["term"] == "L.zfd",][[3]], tolerance = epsilon) - expect_equal(unname(reg$std.error["z_fd_lag2"]),df_test_STATA[df_test_STATA["term"] == "L2.zfd",][[3]], tolerance = epsilon) - expect_equal(unname(reg$std.error["z_lead3"]), df_test_STATA[df_test_STATA["term"] == "F3.z",][["std_error"]], tolerance = epsilon) - expect_equal(unname(reg$std.error["z_lag3"]),df_test_STATA[df_test_STATA["term"] == "L3.z",][[3]], tolerance = epsilon) - expect_equal(unname(reg$std.error["x_r"]), df_test_STATA[df_test_STATA["term"] == "x_r",][[3]], tolerance = epsilon * 10) + expect_equal(unname(reg_estimatr$coefficients["z_fd"]), df_test_STATA[df_test_STATA["term"] == "zfd",][[2]], tolerance = epsilon) + expect_equal(unname(reg_estimatr$coefficients["z_fd_lead2"]), df_test_STATA[df_test_STATA["term"] == "F2.zfd",][[2]], tolerance = epsilon) + expect_equal(unname(reg_estimatr$coefficients["z_fd_lead3"]), df_test_STATA[df_test_STATA["term"] == "F3.zfd",][["coef"]][1], tolerance = epsilon) + expect_equal(unname(reg_estimatr$coefficients["z_fd_lag1"]), df_test_STATA[df_test_STATA["term"] == "L.zfd",][[2]], tolerance = epsilon) + expect_equal(unname(reg_estimatr$coefficients["z_fd_lag2"]), df_test_STATA[df_test_STATA["term"] == "L2.zfd",][[2]], tolerance = epsilon) + expect_equal(unname(reg_estimatr$coefficients["z_lead3"]), -1 * df_test_STATA[df_test_STATA["term"] == "F3.z",][["coef"]], tolerance = epsilon) + expect_equal(unname(reg_estimatr$coefficients["z_lag3"]), df_test_STATA[df_test_STATA["term"] == "L3.z",][[2]], tolerance = epsilon) + expect_equal(unname(reg_estimatr$coefficients["x_r"]), df_test_STATA[df_test_STATA["term"] == "x_r",][[2]], tolerance = epsilon) + + expect_equal(unname(reg_estimatr$std.error["z_fd"]), df_test_STATA[df_test_STATA["term"] == "zfd",][[3]], tolerance = epsilon) + expect_equal(unname(reg_estimatr$std.error["z_fd_lead2"]), df_test_STATA[df_test_STATA["term"] == "F2.zfd",][[3]], tolerance = epsilon) + expect_equal(unname(reg_estimatr$std.error["z_fd_lead3"]), df_test_STATA[df_test_STATA["term"] == "F3.zfd",][["std_error"]][1], tolerance = epsilon) + expect_equal(unname(reg_estimatr$std.error["z_fd_lag1"]), df_test_STATA[df_test_STATA["term"] == "L.zfd",][[3]], tolerance = epsilon) + expect_equal(unname(reg_estimatr$std.error["z_fd_lag2"]),df_test_STATA[df_test_STATA["term"] == "L2.zfd",][[3]], tolerance = epsilon) + expect_equal(unname(reg_estimatr$std.error["z_lead3"]), df_test_STATA[df_test_STATA["term"] == "F3.z",][["std_error"]], tolerance = epsilon) + expect_equal(unname(reg_estimatr$std.error["z_lag3"]),df_test_STATA[df_test_STATA["term"] == "L3.z",][[3]], tolerance = epsilon) + expect_equal(unname(reg_estimatr$std.error["x_r"]), df_test_STATA[df_test_STATA["term"] == "x_r",][[3]], tolerance = epsilon * 10) + + event_study_formula_fixest <- PrepareModelFormula( + estimator = "OLS", + outcomevar = outcomevar, + str_policy_vars = str_policy_vars, + static = FALSE, + controls = controls, + idvar = idvar, + timevar = timevar, + FE = FE, + TFE = TFE, + kernel = "fixest" + ) + + reg_fixest <- EventStudyFEOLS( + formula = event_study_formula_fixest, + prepared_data = df_test_EventStudyOLS, + idvar = idvar, + timevar = timevar, + FE = FE, + TFE = TFE, + cluster = cluster + ) + + coef_feols <- coef(reg_fixest) + se_feols <- fixest::se(reg_fixest) + + epsilon <- 1e-4 + + coef_mappings <- list( + "z_fd" = "zfd", + "z_fd_lead2" = "F2.zfd", + "z_fd_lead3" = "F3.zfd", + "z_fd_lag1" = "L.zfd", + "z_fd_lag2" = "L2.zfd", + "z_lead3" = "F3.z", + "z_lag3" = "L3.z", + "x_r" = "x_r" + ) + + # Test coefficients + for (r_name in names(coef_mappings)) { + stata_term <- coef_mappings[[r_name]] + expected <- df_test_STATA[df_test_STATA["term"] == stata_term, "coef"] + if (r_name == "z_lead3") expected <- -1 * expected # STATA sign convention + actual <- unname(coef_feols[r_name]) + tolerance <- abs(expected) * epsilon + expect_equal(actual, expected, tolerance = tolerance) + } + + # Test standard errors + for (r_name in names(coef_mappings)) { + stata_term <- coef_mappings[[r_name]] + expected <- df_test_STATA[df_test_STATA["term"] == stata_term, "std_error"] + actual <- unname(se_feols[r_name]) + tolerance <- abs(expected) * epsilon + expect_equal(actual, expected, tolerance = tolerance) + } }) From 1c4a5c928d91085a02fd1b58bb9d32155073ebe1 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Fri, 9 Jan 2026 17:14:38 -0500 Subject: [PATCH 48/67] #42 cl drop useless tests --- tests/testthat/test-EventStudyFEOLS.R | 318 -------------------------- 1 file changed, 318 deletions(-) diff --git a/tests/testthat/test-EventStudyFEOLS.R b/tests/testthat/test-EventStudyFEOLS.R index 6a1b3a76..2ef82881 100644 --- a/tests/testthat/test-EventStudyFEOLS.R +++ b/tests/testthat/test-EventStudyFEOLS.R @@ -1,321 +1,3 @@ -test_that("FE = TRUE, - TFE = TRUE, - cluster = TRUE works", { - - df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv") - - idvar <- "id" - timevar <- "t" - outcomevar <- "y_base" - str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") - controls <- "x_r" - - FE <- TRUE - TFE <- TRUE - cluster <- TRUE - - event_study_formula <- PrepareModelFormula(estimator = "OLS", outcomevar, str_policy_vars, - static = FALSE, controls = controls, - kernel = "fixest", idvar = idvar, timevar = timevar, - FE = FE, TFE = TFE) - - if (FE & TFE & cluster) { - - reg <- EventStudyFEOLS( - formula = event_study_formula, - prepared_data = df_test_EventStudyOLS, - idvar = idvar, - timevar = timevar, - FE = FE, - TFE = TFE, - cluster = cluster - ) - } - - expect_true(class(reg) == "fixest") - expect_true(all(reg$fixef_vars == c(idvar, timevar))) - expect_true(length(reg$fixef_sizes) >= 1) - -}) - -test_that("FE = FALSE, - TFE = TRUE, - cluster = TRUE works", { - - df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv") - - idvar <- "id" - timevar <- "t" - outcomevar <- "y_base" - str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") - controls <- "x_r" - - FE <- FALSE - TFE <- TRUE - cluster <- TRUE - - event_study_formula <- PrepareModelFormula(estimator = "OLS", outcomevar, str_policy_vars, - static = FALSE, controls = controls, - kernel = "fixest", idvar = idvar, timevar = timevar, - FE = FE, TFE = TFE) - - if ((!FE) & TFE & cluster) { - - reg <- EventStudyFEOLS( - formula = event_study_formula, - prepared_data = df_test_EventStudyOLS, - idvar = idvar, - timevar = timevar, - FE = FE, - TFE = TFE, - cluster = cluster - ) - } - - expect_true(class(reg) == "fixest") - expect_true(reg$fixef_vars == timevar) - expect_true(length(reg$fixef_sizes) >= 1) - -}) - -test_that("FE = TRUE, - TFE = FALSE, - cluster = TRUE works", { - - df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv") - - idvar <- "id" - timevar <- "t" - outcomevar <- "y_base" - str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") - controls <- "x_r" - - FE <- TRUE - TFE <- FALSE - cluster <- TRUE - - event_study_formula <- PrepareModelFormula(estimator = "OLS", outcomevar, str_policy_vars, - static = FALSE, controls = controls, - kernel = "fixest", idvar = idvar, timevar = timevar, - FE = FE, TFE = TFE) - - if (FE & (!TFE) & cluster) { - - reg <- EventStudyFEOLS( - formula = event_study_formula, - prepared_data = df_test_EventStudyOLS, - idvar = idvar, - timevar = timevar, - FE = FE, - TFE = TFE, - cluster = cluster - ) - } - - expect_true(class(reg) == "fixest") - expect_true(reg$fixef_vars == idvar) - expect_true(length(reg$fixef_sizes) >= 1) - -}) - -test_that("FE = FALSE, - TFE = FALSE, - cluster = TRUE works", { - - df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv") - - idvar <- "id" - timevar <- "t" - outcomevar <- "y_base" - str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") - controls <- "x_r" - - FE <- FALSE - TFE <- FALSE - cluster <- TRUE - - event_study_formula <- PrepareModelFormula(estimator = "OLS", outcomevar, str_policy_vars, - static = FALSE, controls = controls, - kernel = "fixest", idvar = idvar, timevar = timevar, - FE = FE, TFE = TFE) - - if ((!FE) & (!TFE) & cluster) { - - reg <- EventStudyFEOLS( - formula = event_study_formula, - prepared_data = df_test_EventStudyOLS, - idvar = idvar, - timevar = timevar, - FE = FE, - TFE = TFE, - cluster = cluster - ) - } - - expect_true(class(reg) == "fixest") - expect_true(is.null(reg$fixef_vars)) - -}) - -test_that("FE = TRUE, - TFE = TRUE, - cluster = FALSE works", { - - df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv") - - idvar <- "id" - timevar <- "t" - outcomevar <- "y_base" - str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") - controls <- "x_r" - - FE <- TRUE - TFE <- TRUE - cluster <- FALSE - - event_study_formula <- PrepareModelFormula(estimator = "OLS", outcomevar, str_policy_vars, - static = FALSE, controls = controls, - kernel = "fixest", idvar = idvar, timevar = timevar, - FE = FE, TFE = TFE) - - if (FE & TFE & (!cluster)) { - - reg <- EventStudyFEOLS( - formula = event_study_formula, - prepared_data = df_test_EventStudyOLS, - idvar = idvar, - timevar = timevar, - FE = FE, - TFE = TFE, - cluster = cluster - ) - } - - expect_true(class(reg) == "fixest") - expect_true(all(reg$fixef_vars == c(idvar, timevar))) - expect_true(length(reg$fixef_sizes) >= 1) - -}) - -test_that("FE = FALSE, - TFE = TRUE, - cluster = FALSE works", { - - df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv") - - idvar <- "id" - timevar <- "t" - outcomevar <- "y_base" - str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") - controls <- "x_r" - - FE <- FALSE - TFE <- TRUE - cluster <- FALSE - - event_study_formula <- PrepareModelFormula(estimator = "OLS", outcomevar, str_policy_vars, - static = FALSE, controls = controls, - kernel = "fixest", idvar = idvar, timevar = timevar, - FE = FE, TFE = TFE) - - if ((!FE) & TFE & (!cluster)) { - - reg <- EventStudyFEOLS( - formula = event_study_formula, - prepared_data = df_test_EventStudyOLS, - idvar = idvar, - timevar = timevar, - FE = FE, - TFE = TFE, - cluster = cluster - ) - } - - expect_true(class(reg) == "fixest") - expect_true(reg$fixef_vars == timevar) - expect_true(length(reg$fixef_sizes) >= 1) - -}) - -test_that("FE = TRUE, - TFE = FALSE, - cluster = FALSE works", { - - df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv") - - idvar <- "id" - timevar <- "t" - outcomevar <- "y_base" - str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") - controls <- "x_r" - - FE <- TRUE - TFE <- FALSE - cluster <- FALSE - - event_study_formula <- PrepareModelFormula(estimator = "OLS", outcomevar, str_policy_vars, - static = FALSE, controls = controls, - kernel = "fixest", idvar = idvar, timevar = timevar, - FE = FE, TFE = TFE) - - if (FE & (!TFE) & (!cluster)) { - - reg <- EventStudyFEOLS( - formula = event_study_formula, - prepared_data = df_test_EventStudyOLS, - idvar = idvar, - timevar = timevar, - FE = FE, - TFE = TFE, - cluster = cluster - ) - } - - expect_true(class(reg) == "fixest") - expect_true(reg$fixef_vars == idvar) - expect_true(length(reg$fixef_sizes) >= 1) - -}) - -test_that("FE = FALSE, - TFE = FALSE, - cluster = FALSE works", { - - df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv") - - idvar <- "id" - timevar <- "t" - outcomevar <- "y_base" - str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") - controls <- "x_r" - - FE <- FALSE - TFE <- FALSE - cluster <- FALSE - - event_study_formula <- PrepareModelFormula(estimator = "OLS", outcomevar, str_policy_vars, - static = FALSE, controls = controls, - kernel = "fixest", idvar = idvar, timevar = timevar, - FE = FE, TFE = TFE) - - if ((!FE) & (!TFE) & (!cluster)) { - - reg <- EventStudyFEOLS( - formula = event_study_formula, - prepared_data = df_test_EventStudyOLS, - idvar = idvar, - timevar = timevar, - FE = FE, - TFE = TFE, - cluster = cluster - ) - } - - expect_true(class(reg) == "fixest") - expect_true(is.null(reg$fixef_vars)) - -}) - test_that("FHS coefficients and Standard Errors agree with base STATA", { bools <- c("TTT", "TFT", "FTF", "FTT", "FFF", "FFT") From 551bb3e690b03efc1649c1f9f16c398ac920b013 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Fri, 9 Jan 2026 17:23:07 -0500 Subject: [PATCH 49/67] #42 cl consolidate tests fhs --- tests/testthat/test-EventStudyFEOLS.R | 59 ------------------- tests/testthat/test-EventStudyFHS.R | 81 +++++++++++++++++++-------- 2 files changed, 59 insertions(+), 81 deletions(-) delete mode 100644 tests/testthat/test-EventStudyFEOLS.R diff --git a/tests/testthat/test-EventStudyFEOLS.R b/tests/testthat/test-EventStudyFEOLS.R deleted file mode 100644 index 2ef82881..00000000 --- a/tests/testthat/test-EventStudyFEOLS.R +++ /dev/null @@ -1,59 +0,0 @@ -test_that("FHS coefficients and Standard Errors agree with base STATA", { - - bools <- c("TTT", "TFT", "FTF", "FTT", "FFF", "FFT") - - for (i in 1:length(bools)) { - bool <- bools[i] - estimator <- "FHS" - outcomevar <- "y_base" - str_policy_vars <- c("z_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") - controls <- "x_r" - proxy <- "eta_m" - proxyIV <- "z_fd_lead3" - - idvar <- "id" - timevar <- "t" - - FE <- as.logical(substring(bool, 1, 1)) - TFE <- as.logical(substring(bool, 2, 2)) - cluster <- as.logical(substring(bool, 3, 3)) - - formula <- PrepareModelFormula(estimator = "FHS", outcomevar, str_policy_vars, - static = FALSE, controls = controls, proxy = proxy, proxyIV = proxyIV, - kernel = "fixest", idvar = idvar, timevar = timevar, - FE = FE, TFE = TFE) - - reg <- EventStudyFEOLS_FHS(formula, df_EventStudyFHS_example, idvar, timevar, FE, TFE, cluster) - - df_test_STATA <- read.csv("./input/df_test_base_STATA_FHS.csv") - epsilon <- 10e-4 - - coef_mappings <- list( - "z_fd" = "z_fd", - "z_fd_lead2" = "z_fd_lead2", - "fit_eta_m" = "eta_m", - "z_fd_lag1" = "z_fd_lag1", - "z_fd_lag2" = "z_fd_lag2", - "z_lead3" = "z_lead3", - "z_lag3" = "z_lag3", - "x_r" = "x_r" - ) - - # Test coefficients - for (r_name in names(coef_mappings)) { - stata_name <- coef_mappings[[r_name]] - expected <- df_test_STATA[df_test_STATA[1] == stata_name,][[2*i]] - if (r_name == "z_lead3") expected <- expected * (-1) # STATA sign convention - tolerance <- abs(expected) * epsilon - expect_equal(unname(coef(reg)[r_name]), expected, tolerance = tolerance) - } - - # Test standard errors - for (r_name in names(coef_mappings)) { - stata_name <- coef_mappings[[r_name]] - expected <- df_test_STATA[df_test_STATA[1] == stata_name,][[2*i+1]] - tolerance <- abs(expected) * epsilon - expect_equal(unname(fixest::se(reg)[r_name]), expected, tolerance = tolerance) - } - } -}) diff --git a/tests/testthat/test-EventStudyFHS.R b/tests/testthat/test-EventStudyFHS.R index e3e2fe4d..1bf2cc19 100644 --- a/tests/testthat/test-EventStudyFHS.R +++ b/tests/testthat/test-EventStudyFHS.R @@ -323,9 +323,6 @@ test_that("Coefficients and Standard Errors agree with base STATA", { controls <- "x_r" proxy <- "eta_m" proxyIV <- "z_fd_lead3" - event_study_formula <- PrepareModelFormula(estimator, outcomevar, - str_policy_vars, FALSE, controls, proxy, proxyIV) - idvar <- "id" timevar <- "t" @@ -333,8 +330,18 @@ test_that("Coefficients and Standard Errors agree with base STATA", { TFE <- as.logical(substring(bool, 2, 2)) cluster <- as.logical(substring(bool, 3, 3)) - reg <- EventStudyFHS( - prepared_model_formula = event_study_formula, + formula_fixest <- PrepareModelFormula( + estimator, outcomevar, str_policy_vars, static = FALSE, controls, + proxy, proxyIV, idvar, timevar, FE, TFE, kernel = "fixest") + formula_estimatr <- PrepareModelFormula( + estimator, outcomevar, str_policy_vars, static = FALSE, controls, + proxy, proxyIV + ) + reg_fixest <- EventStudyFEOLS_FHS( + formula_fixest, df_EventStudyFHS_example, idvar, timevar, FE, TFE, cluster + ) + reg_estimatr <- EventStudyFHS( + prepared_model_formula = formula_estimatr, prepared_data = df_EventStudyFHS_example, idvar = idvar, timevar = timevar, @@ -346,23 +353,53 @@ test_that("Coefficients and Standard Errors agree with base STATA", { df_test_STATA <- read.csv("./input/df_test_base_STATA_FHS.csv") epsilon <- 10e-6 - expect_equal(unname(reg$coefficients["z_fd"]), df_test_STATA[df_test_STATA[1] == "z_fd",][[2*i]], tolerance = epsilon) - expect_equal(unname(reg$coefficients["z_fd_lead2"]), df_test_STATA[df_test_STATA[1] == "z_fd_lead2",][[2*i]], tolerance = epsilon) - expect_equal(unname(reg$coefficients["eta_m"]), df_test_STATA[df_test_STATA[1] == "eta_m",][[2*i]], tolerance = epsilon) - expect_equal(unname(reg$coefficients["z_fd_lag1"]), df_test_STATA[df_test_STATA[1] == "z_fd_lag1",][[2*i]], tolerance = epsilon) - expect_equal(unname(reg$coefficients["z_fd_lag2"]), df_test_STATA[df_test_STATA[1] == "z_fd_lag2",][[2*i]], tolerance = epsilon) - expect_equal(unname(reg$coefficients["z_lead3"]), df_test_STATA[df_test_STATA[1] == "z_lead3",][[2*i]]*(-1), tolerance = epsilon) - expect_equal(unname(reg$coefficients["z_lag3"]), df_test_STATA[df_test_STATA[1] == "z_lag3",][[2*i]], tolerance = epsilon) - expect_equal(unname(reg$coefficients["x_r"]), df_test_STATA[df_test_STATA[1] == "x_r",][[2*i]], tolerance = epsilon) - - expect_equal(unname(reg$std.error["z_fd"]), df_test_STATA[df_test_STATA[1] == "z_fd",][[2*i+1]], tolerance = epsilon) - expect_equal(unname(reg$std.error["z_fd_lead2"]), df_test_STATA[df_test_STATA[1] == "z_fd_lead2",][[2*i+1]], tolerance = epsilon) - expect_equal(unname(reg$std.error["eta_m"]), df_test_STATA[df_test_STATA[1] == "eta_m",][[2*i+1]], tolerance = epsilon) - expect_equal(unname(reg$std.error["z_fd_lag1"]), df_test_STATA[df_test_STATA[1] == "z_fd_lag1",][[2*i+1]], tolerance = epsilon) - expect_equal(unname(reg$std.error["z_fd_lag2"]), df_test_STATA[df_test_STATA[1] == "z_fd_lag2",][[2*i+1]], tolerance = epsilon) - expect_equal(unname(reg$std.error["z_lead3"]), df_test_STATA[df_test_STATA[1] == "z_lead3",][[2*i+1]], tolerance = epsilon) - expect_equal(unname(reg$std.error["z_lag3"]), df_test_STATA[df_test_STATA[1] == "z_lag3",][[2*i+1]], tolerance = epsilon) - expect_equal(unname(reg$std.error["x_r"]), df_test_STATA[df_test_STATA[1] == "x_r",][[2*i+1]], tolerance = epsilon) + expect_equal(unname(reg_estimatr$coefficients["z_fd"]), df_test_STATA[df_test_STATA[1] == "z_fd",][[2*i]], tolerance = epsilon) + expect_equal(unname(reg_estimatr$coefficients["z_fd_lead2"]), df_test_STATA[df_test_STATA[1] == "z_fd_lead2",][[2*i]], tolerance = epsilon) + expect_equal(unname(reg_estimatr$coefficients["eta_m"]), df_test_STATA[df_test_STATA[1] == "eta_m",][[2*i]], tolerance = epsilon) + expect_equal(unname(reg_estimatr$coefficients["z_fd_lag1"]), df_test_STATA[df_test_STATA[1] == "z_fd_lag1",][[2*i]], tolerance = epsilon) + expect_equal(unname(reg_estimatr$coefficients["z_fd_lag2"]), df_test_STATA[df_test_STATA[1] == "z_fd_lag2",][[2*i]], tolerance = epsilon) + expect_equal(unname(reg_estimatr$coefficients["z_lead3"]), df_test_STATA[df_test_STATA[1] == "z_lead3",][[2*i]]*(-1), tolerance = epsilon) + expect_equal(unname(reg_estimatr$coefficients["z_lag3"]), df_test_STATA[df_test_STATA[1] == "z_lag3",][[2*i]], tolerance = epsilon) + expect_equal(unname(reg_estimatr$coefficients["x_r"]), df_test_STATA[df_test_STATA[1] == "x_r",][[2*i]], tolerance = epsilon) + + expect_equal(unname(reg_estimatr$std.error["z_fd"]), df_test_STATA[df_test_STATA[1] == "z_fd",][[2*i+1]], tolerance = epsilon) + expect_equal(unname(reg_estimatr$std.error["z_fd_lead2"]), df_test_STATA[df_test_STATA[1] == "z_fd_lead2",][[2*i+1]], tolerance = epsilon) + expect_equal(unname(reg_estimatr$std.error["eta_m"]), df_test_STATA[df_test_STATA[1] == "eta_m",][[2*i+1]], tolerance = epsilon) + expect_equal(unname(reg_estimatr$std.error["z_fd_lag1"]), df_test_STATA[df_test_STATA[1] == "z_fd_lag1",][[2*i+1]], tolerance = epsilon) + expect_equal(unname(reg_estimatr$std.error["z_fd_lag2"]), df_test_STATA[df_test_STATA[1] == "z_fd_lag2",][[2*i+1]], tolerance = epsilon) + expect_equal(unname(reg_estimatr$std.error["z_lead3"]), df_test_STATA[df_test_STATA[1] == "z_lead3",][[2*i+1]], tolerance = epsilon) + expect_equal(unname(reg_estimatr$std.error["z_lag3"]), df_test_STATA[df_test_STATA[1] == "z_lag3",][[2*i+1]], tolerance = epsilon) + expect_equal(unname(reg_estimatr$std.error["x_r"]), df_test_STATA[df_test_STATA[1] == "x_r",][[2*i+1]], tolerance = epsilon) + + epsilon <- 10e-4 + + coef_mappings <- list( + "z_fd" = "z_fd", + "z_fd_lead2" = "z_fd_lead2", + "fit_eta_m" = "eta_m", + "z_fd_lag1" = "z_fd_lag1", + "z_fd_lag2" = "z_fd_lag2", + "z_lead3" = "z_lead3", + "z_lag3" = "z_lag3", + "x_r" = "x_r" + ) + + # Test coefficients + for (r_name in names(coef_mappings)) { + stata_name <- coef_mappings[[r_name]] + expected <- df_test_STATA[df_test_STATA[1] == stata_name,][[2*i]] + if (r_name == "z_lead3") expected <- expected * (-1) # STATA sign convention + tolerance <- abs(expected) * epsilon + expect_equal(unname(coef(reg_fixest)[r_name]), expected, tolerance = tolerance) + } + + # Test standard errors + for (r_name in names(coef_mappings)) { + stata_name <- coef_mappings[[r_name]] + expected <- df_test_STATA[df_test_STATA[1] == stata_name,][[2*i+1]] + tolerance <- abs(expected) * epsilon + expect_equal(unname(fixest::se(reg_fixest)[r_name]), expected, tolerance = tolerance) + } } }) From 990166c9e8ed39df728b9d570999d07f5b23718d Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Fri, 9 Jan 2026 17:24:45 -0500 Subject: [PATCH 50/67] #42 cl drop redundant test --- tests/testthat/test-EventStudy.R | 46 -------------------------------- 1 file changed, 46 deletions(-) diff --git a/tests/testthat/test-EventStudy.R b/tests/testthat/test-EventStudy.R index 4ed3ace7..57006857 100644 --- a/tests/testthat/test-EventStudy.R +++ b/tests/testthat/test-EventStudy.R @@ -524,52 +524,6 @@ test_that("proxyIV selection works", { ) }) -test_that("feols_FHS yields the same results as FHS", { - - # Run FHS estimator - fhs_result <- EventStudy(estimator = "FHS", kernel = "estimatr", data = example_data, outcomevar = "y_base", - policyvar = "z", idvar = "id", timevar = "t", - controls = "x_r", FE = TRUE, TFE = TRUE, proxy = "eta_m", - post = 2, pre = 1, overidpre = 2, overidpost = 3, - normalize = -2, cluster = TRUE) - - # Run feols_FHS estimator with same parameters - feols_fhs_result <- EventStudy(estimator = "FHS", kernel = "fixest", data = example_data, outcomevar = "y_base", - policyvar = "z", idvar = "id", timevar = "t", - controls = "x_r", FE = TRUE, TFE = TRUE, proxy = "eta_m", - post = 2, pre = 1, overidpre = 2, overidpost = 3, - normalize = -2, cluster = TRUE) - - # Extract coefficients - fhs_coefs <- coef(fhs_result$output) - feols_fhs_coefs <- coef(feols_fhs_result$output) - - # Extract standard errors - fhs_se <- fhs_result$output$std.error - feols_fhs_se <- fixest::se(feols_fhs_result$output) - - # For FHS, coefficients include the endogenous variable "eta_m" - # For feols_FHS, coefficients include "fit_eta_m" (fitted endogenous) and the event study vars - # The event study coefficients should match - fhs_event_coefs <- fhs_coefs[names(fhs_coefs) != "eta_m"] - feols_fhs_event_coefs <- feols_fhs_coefs[names(feols_fhs_coefs) != "fit_eta_m"] - - # Check that event study coefficients are the same (within tolerance) - expect_equal(fhs_event_coefs, feols_fhs_event_coefs, tolerance = 1e-10) - - # Check that the endogenous variable coefficients are the same - # (FHS reports coef of eta_m, feols_FHS reports coef of fit_eta_m) - expect_equal(unname(fhs_coefs["eta_m"]), unname(feols_fhs_coefs["fit_eta_m"]), tolerance = 1e-10) - - # Check that standard errors for event study vars are the same (within reasonable tolerance) - fhs_event_se <- fhs_se[names(fhs_se) != "eta_m"] - feols_fhs_event_se <- feols_fhs_se[names(feols_fhs_se) != "fit_eta_m"] - expect_equal(fhs_event_se, feols_fhs_event_se, tolerance = 1e-6) - - # Check that SE for endogenous variable is the same - expect_equal(unname(fhs_se["eta_m"]), unname(feols_fhs_se["fit_eta_m"]), tolerance = 1e-6) -}) - test_that("warning with correct normalize and pre is thrown when anticpation effects are allowed and anticipation_effects_normalization is TRUE", { expect_warning( From b823f7619cad6d606657e769d250c7a04181ac5e Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Fri, 9 Jan 2026 17:27:41 -0500 Subject: [PATCH 51/67] #42 bd add roxygen parameters --- DESCRIPTION | 1 + R/EventStudyFHS.R | 1 + R/EventStudyOLS.R | 1 + R/EventStudyPlot.R | 1 + R/TestLinear.R | 1 + 5 files changed, 5 insertions(+) diff --git a/DESCRIPTION b/DESCRIPTION index 32b7fea2..09e61333 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -43,6 +43,7 @@ Imports: data.table, dplyr, estimatr, + fixest, ggplot2, MASS, rlang, diff --git a/R/EventStudyFHS.R b/R/EventStudyFHS.R index 25e5b706..3eb7ae5d 100644 --- a/R/EventStudyFHS.R +++ b/R/EventStudyFHS.R @@ -12,6 +12,7 @@ #' #' @return A data.frame that contains the estimates for the event study coefficients. #' @import estimatr +#' @import fixest #' @importFrom stats qnorm pnorm #' @keywords internal #' @noRd diff --git a/R/EventStudyOLS.R b/R/EventStudyOLS.R index 100c510a..e0fbb8ac 100644 --- a/R/EventStudyOLS.R +++ b/R/EventStudyOLS.R @@ -12,6 +12,7 @@ #' #' @return A data.frame that contains the estimates for the event study coefficients. #' @import estimatr +#' @import fixest #' @keywords internal #' @noRd #' diff --git a/R/EventStudyPlot.R b/R/EventStudyPlot.R index 76dbeca1..085c869f 100644 --- a/R/EventStudyPlot.R +++ b/R/EventStudyPlot.R @@ -29,6 +29,7 @@ #' @return The Event-Study plot as a ggplot2 object. #' @import ggplot2 dplyr #' @import estimatr +#' @importFrom fixest vcov #' @importFrom rlang .data #' @importFrom broom tidy #' @importFrom data.table setorder diff --git a/R/TestLinear.R b/R/TestLinear.R index 70e9a279..72c24102 100644 --- a/R/TestLinear.R +++ b/R/TestLinear.R @@ -14,6 +14,7 @@ #' #' @return A data frame containing the F-statistic and p-value for the specified test(s). #' @importFrom car linearHypothesis +#' @importFrom fixest fitstat #' @export #' #' @examples From 9e4b01af3358771955a3717d100232a740a5e956 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Fri, 9 Jan 2026 17:41:12 -0500 Subject: [PATCH 52/67] #42 bd roxygen --- DESCRIPTION | 1 + NAMESPACE | 3 +++ R/EventStudy.R | 5 +++-- cran-comments.md | 2 +- man/EventStudy.Rd | 8 ++++++-- 5 files changed, 14 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 09e61333..333cc479 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -44,6 +44,7 @@ Imports: dplyr, estimatr, fixest, + broom, ggplot2, MASS, rlang, diff --git a/NAMESPACE b/NAMESPACE index 7867b6ac..018f3e07 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,9 +5,11 @@ export(EventStudyPlot) export(TestLinear) import(dplyr) import(estimatr) +import(fixest) import(ggplot2) import(stringr) importFrom(MASS,mvrnorm) +importFrom(broom,tidy) importFrom(car,linearHypothesis) importFrom(data.table,":=") importFrom(data.table,.SD) @@ -19,6 +21,7 @@ importFrom(data.table,setnames) importFrom(data.table,setorder) importFrom(data.table,setorderv) importFrom(data.table,shift) +importFrom(fixest,fitstat) importFrom(pracma,inv) importFrom(pracma,pinv) importFrom(rlang,.data) diff --git a/R/EventStudy.R b/R/EventStudy.R index 0c921385..b99d88d0 100644 --- a/R/EventStudy.R +++ b/R/EventStudy.R @@ -34,7 +34,7 @@ #' when there are anticipation effects. If set to FALSE, does not make the switch. Defaults to TRUE. #' @param allow_duplicate_id If TRUE, the function estimates a regression where duplicated ID-time rows are weighted by their duplication count. If FALSE, the function raises an error if duplicate unit-time keys exist in the input data. Default is FALSE. #' @param avoid_internal_copy If TRUE, the function avoids making an internal deep copy of the input data, and instead directly modifies the input data.table. Default is FALSE. -#' @param kernel Accepts one of "estimatr" or "fixest". If "estimatr" is specified, uses the estimatr package for estimation. If "fixest" is specified, uses the fixest package for estimation. Defaults to "estimatr". +#' @param kernel Accepts one of "estimatr" or "fixest". If "estimatr" is specified, uses the estimatr package for estimation. If "fixest" is specified, uses the fixest package for estimation. Defaults to "estimatr" (deprecated - will change to "fixest" in a future release). #' #' @return A list that contains, under "output", the estimation output as an lm_robust object, and under "arguments", the arguments passed to the function. #' @import dplyr @@ -64,7 +64,8 @@ #' summary(eventstudy_model$output) #' #' ### data.frame of estimates -#' estimatr::tidy(eventstudy_model$output) +#' estimatr::tidy(eventstudy_model$output) # for kernel='estimatr' +#' broom::tidy(eventstudy_model$output) # for kernel='fixest' #' #' ### Access arguments #' eventstudy_model$arguments diff --git a/cran-comments.md b/cran-comments.md index c9b60b88..ee2ac6bb 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -15,4 +15,4 @@ We checked 0 reverse dependencies, comparing R CMD check results across CRAN and ## Package changes - * Implemented patch updates for compatibility with `ggplot2` 3.6.0 + * Added support for `fixest` as the regression kernel. `fixest` will replace `estimatr` as the default kernel in a future release. diff --git a/man/EventStudy.Rd b/man/EventStudy.Rd index 6cb118ca..93ccee97 100644 --- a/man/EventStudy.Rd +++ b/man/EventStudy.Rd @@ -24,7 +24,8 @@ EventStudy( cluster = TRUE, anticipation_effects_normalization = TRUE, allow_duplicate_id = FALSE, - avoid_internal_copy = FALSE + avoid_internal_copy = FALSE, + kernel = "estimatr" ) } \arguments{ @@ -79,6 +80,8 @@ when there are anticipation effects. If set to FALSE, does not make the switch. \item{allow_duplicate_id}{If TRUE, the function estimates a regression where duplicated ID-time rows are weighted by their duplication count. If FALSE, the function raises an error if duplicate unit-time keys exist in the input data. Default is FALSE.} \item{avoid_internal_copy}{If TRUE, the function avoids making an internal deep copy of the input data, and instead directly modifies the input data.table. Default is FALSE.} + +\item{kernel}{Accepts one of "estimatr" or "fixest". If "estimatr" is specified, uses the estimatr package for estimation. If "fixest" is specified, uses the fixest package for estimation. Defaults to "estimatr" (deprecated - will change to "fixest" in a future release).} } \value{ A list that contains, under "output", the estimation output as an lm_robust object, and under "arguments", the arguments passed to the function. @@ -107,7 +110,8 @@ eventstudy_model$output summary(eventstudy_model$output) ### data.frame of estimates -estimatr::tidy(eventstudy_model$output) +estimatr::tidy(eventstudy_model$output) # for kernel='estimatr' +broom::tidy(eventstudy_model$output) # for kernel='fixest' ### Access arguments eventstudy_model$arguments From 2ac8106312c475f1b5e6a927f169619e1d82fe65 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Fri, 9 Jan 2026 18:14:15 -0500 Subject: [PATCH 53/67] #42 fx correct vcov / coef citation --- R/EventStudyFHS.R | 19 ++++++++++--------- R/EventStudyPlot.R | 4 ++-- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/R/EventStudyFHS.R b/R/EventStudyFHS.R index 3eb7ae5d..0c86415b 100644 --- a/R/EventStudyFHS.R +++ b/R/EventStudyFHS.R @@ -13,7 +13,7 @@ #' @return A data.frame that contains the estimates for the event study coefficients. #' @import estimatr #' @import fixest -#' @importFrom stats qnorm pnorm +#' @importFrom stats qnorm pnorm coef #' @keywords internal #' @noRd #' @@ -148,20 +148,21 @@ EventStudyFEOLS_FHS <- function(formula, prepared_data, data = prepared_data, vcov = as.formula(paste0("~", idvar)) ) + coefs <- coef(fhs_output) N <- fhs_output$nobs n <- length(unique(prepared_data[[idvar]])) - K <- fhs_output$fixef_sizes[[timevar]] + length(coef(fhs_output)) + K <- fhs_output$fixef_sizes[[timevar]] + length(coefs) adjustment_factor <- (N - K) / (N - n - K + 1) fhs_output$se <- fhs_output$se / sqrt(adjustment_factor) fhs_output$cov.scaled <- fhs_output$cov.scaled / adjustment_factor # Recalculate statistical inference - fhs_output$tstat <- coef(fhs_output) / fhs_output$se + fhs_output$tstat <- coefs / fhs_output$se fhs_output$pvalue <- 2 * stats::pnorm(abs(fhs_output$tstat), lower.tail = FALSE) - fhs_output$conf.low <- coef(fhs_output) - stats::qnorm(0.975) * fhs_output$se - fhs_output$conf.high <- coef(fhs_output) + stats::qnorm(0.975) * fhs_output$se + fhs_output$conf.low <- coefs - stats::qnorm(0.975) * fhs_output$se + fhs_output$conf.high <- coefs + stats::qnorm(0.975) * fhs_output$se } else if (FE & (!TFE) & cluster) { @@ -173,17 +174,17 @@ EventStudyFEOLS_FHS <- function(formula, prepared_data, N <- fhs_output$nobs n <- length(unique(prepared_data[[idvar]])) - K <- 1 + length(coef(fhs_output)) + K <- 1 + length(coefs) adjustment_factor <- (N - K) / (N - n - K + 1) fhs_output$se <- fhs_output$se / sqrt(adjustment_factor) fhs_output$cov.scaled <- fhs_output$cov.scaled / adjustment_factor # Recalculate statistical inference - fhs_output$tstat <- coef(fhs_output) / fhs_output$se + fhs_output$tstat <- coefs / fhs_output$se fhs_output$pvalue <- 2 * stats::pnorm(abs(fhs_output$tstat), lower.tail = FALSE) - fhs_output$conf.low <- coef(fhs_output) - stats::qnorm(0.975) * fhs_output$se - fhs_output$conf.high <- coef(fhs_output) + stats::qnorm(0.975) * fhs_output$se + fhs_output$conf.low <- coefs - stats::qnorm(0.975) * fhs_output$se + fhs_output$conf.high <- coefs + stats::qnorm(0.975) * fhs_output$se } else if ((!FE) & TFE & (!cluster)) { diff --git a/R/EventStudyPlot.R b/R/EventStudyPlot.R index 085c869f..f88f7418 100644 --- a/R/EventStudyPlot.R +++ b/R/EventStudyPlot.R @@ -29,7 +29,7 @@ #' @return The Event-Study plot as a ggplot2 object. #' @import ggplot2 dplyr #' @import estimatr -#' @importFrom fixest vcov +#' @importFrom stats vcov #' @importFrom rlang .data #' @importFrom broom tidy #' @importFrom data.table setorder @@ -269,7 +269,7 @@ EventStudyPlot <- function(estimates, coefficients <- df_plt$estimate # Add column and row in matrix of coefficients in index of norm columns - vcov <- if(is_fixest) {fixest::vcov(estimates$output)} else {estimates$output$vcov} + vcov <- if(is_fixest) {vcov(estimates$output)} else {estimates$output$vcov} covar <- AddZerosCovar( vcov, eventstudy_coefficients, From 7553dac3856c3036959d2ba5f234f954e65f8779 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Fri, 9 Jan 2026 18:16:29 -0500 Subject: [PATCH 54/67] #42 fx namespace --- DESCRIPTION | 2 +- NAMESPACE | 5 ++++- R/EventStudyFHS.R | 2 +- R/EventStudyOLS.R | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 333cc479..208a5674 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -55,7 +55,7 @@ VignetteBuilder: knitr LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 Suggests: rmarkdown, knitr, diff --git a/NAMESPACE b/NAMESPACE index 018f3e07..225d447e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,7 +5,6 @@ export(EventStudyPlot) export(TestLinear) import(dplyr) import(estimatr) -import(fixest) import(ggplot2) import(stringr) importFrom(MASS,mvrnorm) @@ -21,13 +20,17 @@ importFrom(data.table,setnames) importFrom(data.table,setorder) importFrom(data.table,setorderv) importFrom(data.table,shift) +importFrom(fixest,feols) importFrom(fixest,fitstat) +importFrom(fixest,ssc) importFrom(pracma,inv) importFrom(pracma,pinv) importFrom(rlang,.data) importFrom(stats,as.formula) +importFrom(stats,coef) importFrom(stats,pnorm) importFrom(stats,qchisq) importFrom(stats,qnorm) importFrom(stats,reformulate) importFrom(stats,setNames) +importFrom(stats,vcov) diff --git a/R/EventStudyFHS.R b/R/EventStudyFHS.R index 0c86415b..06c4c251 100644 --- a/R/EventStudyFHS.R +++ b/R/EventStudyFHS.R @@ -12,7 +12,7 @@ #' #' @return A data.frame that contains the estimates for the event study coefficients. #' @import estimatr -#' @import fixest +#' @importFrom fixest feols #' @importFrom stats qnorm pnorm coef #' @keywords internal #' @noRd diff --git a/R/EventStudyOLS.R b/R/EventStudyOLS.R index e0fbb8ac..d19eeac5 100644 --- a/R/EventStudyOLS.R +++ b/R/EventStudyOLS.R @@ -12,7 +12,7 @@ #' #' @return A data.frame that contains the estimates for the event study coefficients. #' @import estimatr -#' @import fixest +#' @importFrom fixest feols ssc #' @keywords internal #' @noRd #' From cd55cdf9de654951f28277b1931bb4047b84917b Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Fri, 9 Jan 2026 18:22:01 -0500 Subject: [PATCH 55/67] #42 fx undefined coefs object --- R/EventStudyFHS.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/EventStudyFHS.R b/R/EventStudyFHS.R index 06c4c251..dc77aefa 100644 --- a/R/EventStudyFHS.R +++ b/R/EventStudyFHS.R @@ -171,6 +171,7 @@ EventStudyFEOLS_FHS <- function(formula, prepared_data, data = prepared_data, vcov = as.formula(paste0("~", idvar)) ) + coefs <- coef(fhs_output) N <- fhs_output$nobs n <- length(unique(prepared_data[[idvar]])) From 31945532dc1988066d5f7cb694e84641e59034cc Mon Sep 17 00:00:00 2001 From: zhizhongpu Date: Thu, 19 Mar 2026 11:15:32 -0600 Subject: [PATCH 56/67] #42 bd add `fixest` to mwe --- R/EventStudy.R | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/R/EventStudy.R b/R/EventStudy.R index b99d88d0..3eeed987 100644 --- a/R/EventStudy.R +++ b/R/EventStudy.R @@ -55,7 +55,8 @@ #' idvar = "id", #' timevar = "t", #' pre = 0, post = 3, -#' normalize = -1 +#' normalize = -1, +#' kernel = "fixest" #' ) #' #' ### Access estimated model @@ -85,6 +86,7 @@ #' pre = 2, overidpre = 4, #' normalize = - 3, #' cluster = TRUE, +#' kernel = "fixest", #' anticipation_effects_normalization = TRUE #' ) #' @@ -102,7 +104,8 @@ #' FE = TRUE, TFE = TRUE, #' post = 0, overidpost = 0, #' pre = 0, overidpre = 0, -#' cluster = TRUE +#' cluster = TRUE, +#' kernel = "fixest" #' ) #' #' summary(eventstudy_model_static$output) @@ -119,7 +122,8 @@ #' idvar = "id", #' timevar = "t", #' pre = 0, post = 3, -#' normalize = -1 +#' normalize = -1, +#' kernel = "fixest" #' ) #' #' summary(eventstudy_model_unbal$output) @@ -138,7 +142,8 @@ #' post = 2, overidpost = 1, #' pre = 0, overidpre = 3, #' normalize = -1, -#' cluster = TRUE +#' cluster = TRUE, +#' kernel = "fixest" #' ) #' #' summary(eventstudy_model_iv$output) From b54da49ed7923bea9a4c9fb433db9ac4aa4b2db2 Mon Sep 17 00:00:00 2001 From: zhizhongpu Date: Thu, 19 Mar 2026 11:42:30 -0600 Subject: [PATCH 57/67] #42 simplify feols-FHS implementation --- R/EventStudyFHS.R | 80 ++++++++++------------------------------------- 1 file changed, 17 insertions(+), 63 deletions(-) diff --git a/R/EventStudyFHS.R b/R/EventStudyFHS.R index dc77aefa..36204e62 100644 --- a/R/EventStudyFHS.R +++ b/R/EventStudyFHS.R @@ -141,83 +141,37 @@ EventStudyFEOLS_FHS <- function(formula, prepared_data, if (! is.logical(cluster)) {stop("cluster should be either TRUE or FALSE.")} if (FE & !cluster) {stop("cluster=TRUE required when FE=TRUE.")} - if (FE & TFE & cluster) { - - fhs_output <- fixest::feols( - fml = formula, - data = prepared_data, - vcov = as.formula(paste0("~", idvar)) - ) - coefs <- coef(fhs_output) - - N <- fhs_output$nobs - n <- length(unique(prepared_data[[idvar]])) - K <- fhs_output$fixef_sizes[[timevar]] + length(coefs) - - adjustment_factor <- (N - K) / (N - n - K + 1) - fhs_output$se <- fhs_output$se / sqrt(adjustment_factor) - fhs_output$cov.scaled <- fhs_output$cov.scaled / adjustment_factor - - # Recalculate statistical inference - fhs_output$tstat <- coefs / fhs_output$se - fhs_output$pvalue <- 2 * stats::pnorm(abs(fhs_output$tstat), lower.tail = FALSE) - fhs_output$conf.low <- coefs - stats::qnorm(0.975) * fhs_output$se - fhs_output$conf.high <- coefs + stats::qnorm(0.975) * fhs_output$se + if (cluster) { + vcov_arg <- as.formula(paste0("~", idvar)) + } else { + vcov_arg <- "HC1" + } - } else if (FE & (!TFE) & cluster) { + fhs_output <- fixest::feols( + fml = formula, + data = prepared_data, + vcov = vcov_arg + ) - fhs_output <- fixest::feols( - fml = formula, - data = prepared_data, - vcov = as.formula(paste0("~", idvar)) - ) + if (FE & cluster) { coefs <- coef(fhs_output) - N <- fhs_output$nobs n <- length(unique(prepared_data[[idvar]])) - K <- 1 + length(coefs) + + if (TFE) { + K <- fhs_output$fixef_sizes[[timevar]] + length(coefs) + } else { + K <- 1 + length(coefs) + } adjustment_factor <- (N - K) / (N - n - K + 1) fhs_output$se <- fhs_output$se / sqrt(adjustment_factor) fhs_output$cov.scaled <- fhs_output$cov.scaled / adjustment_factor - # Recalculate statistical inference fhs_output$tstat <- coefs / fhs_output$se fhs_output$pvalue <- 2 * stats::pnorm(abs(fhs_output$tstat), lower.tail = FALSE) fhs_output$conf.low <- coefs - stats::qnorm(0.975) * fhs_output$se fhs_output$conf.high <- coefs + stats::qnorm(0.975) * fhs_output$se - - } else if ((!FE) & TFE & (!cluster)) { - - fhs_output <- fixest::feols( - fml = formula, - data = prepared_data, - vcov = "HC1" - ) - - } else if ((!FE) & TFE & cluster) { - - fhs_output <- fixest::feols( - fml = formula, - data = prepared_data, - vcov = as.formula(paste0("~", idvar)) - ) - - } else if ((!FE) & (!TFE) & (!cluster)) { - - fhs_output <- fixest::feols( - fml = formula, - data = prepared_data, - vcov = "HC1" - ) - - } else if ((!FE) & (!TFE) & cluster) { - - fhs_output <- fixest::feols( - fml = formula, - data = prepared_data, - vcov = as.formula(paste0("~", idvar)) - ) } return(fhs_output) From fa8cd251554867d4435c7bc64783eeacafb1d98b Mon Sep 17 00:00:00 2001 From: zhizhongpu Date: Thu, 19 Mar 2026 12:07:56 -0600 Subject: [PATCH 58/67] #42 cl replace broom:tidy --- DESCRIPTION | 1 - NAMESPACE | 1 - R/AddSuptBand.R | 9 ++++++++- R/EventStudy.R | 2 +- R/EventStudyPlot.R | 10 ++++++++-- man/EventStudy.Rd | 2 +- 6 files changed, 18 insertions(+), 7 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 208a5674..c6a339ce 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -44,7 +44,6 @@ Imports: dplyr, estimatr, fixest, - broom, ggplot2, MASS, rlang, diff --git a/NAMESPACE b/NAMESPACE index 225d447e..d4eea06e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,7 +8,6 @@ import(estimatr) import(ggplot2) import(stringr) importFrom(MASS,mvrnorm) -importFrom(broom,tidy) importFrom(car,linearHypothesis) importFrom(data.table,":=") importFrom(data.table,.SD) diff --git a/R/AddSuptBand.R b/R/AddSuptBand.R index 66eb8bd9..9040359e 100644 --- a/R/AddSuptBand.R +++ b/R/AddSuptBand.R @@ -17,6 +17,8 @@ #' for each event-study coefficient. #' @import estimatr #' @importFrom MASS mvrnorm +#' @importFrom fixest coeftable +#' @importFrom dplyr rename #' @keywords internal #' @noRd #' @@ -77,7 +79,12 @@ AddSuptBand <- function(model_estimates, num_sim = 1000, conf_level = .95, event critical_value = t[floor(conf_level_num_sim) + 1] } - df_estimates_tidy <- if(fixest){broom::tidy(model_estimates)} else {estimatr::tidy(model_estimates)} + df_estimates_tidy <- if(fixest){ + model_estimates |> + fixest::coeftable() |> + as.data.frame() |> + dplyr::rename(estimate = Estimate, std.error = `Std. Error`) + } else {estimatr::tidy(model_estimates)} df_estimates_tidy["suptband_lower"] <- df_estimates_tidy$estimate - (critical_value * df_estimates_tidy$std.error) df_estimates_tidy["suptband_upper"] <- df_estimates_tidy$estimate + (critical_value * df_estimates_tidy$std.error) diff --git a/R/EventStudy.R b/R/EventStudy.R index 3eeed987..91bd47f9 100644 --- a/R/EventStudy.R +++ b/R/EventStudy.R @@ -66,7 +66,7 @@ #' #' ### data.frame of estimates #' estimatr::tidy(eventstudy_model$output) # for kernel='estimatr' -#' broom::tidy(eventstudy_model$output) # for kernel='fixest' +#' as.data.frame(fixest::coeftable(eventstudy_model$output)) # for kernel='fixest' #' #' ### Access arguments #' eventstudy_model$arguments diff --git a/R/EventStudyPlot.R b/R/EventStudyPlot.R index f88f7418..c33f97a3 100644 --- a/R/EventStudyPlot.R +++ b/R/EventStudyPlot.R @@ -31,7 +31,8 @@ #' @import estimatr #' @importFrom stats vcov #' @importFrom rlang .data -#' @importFrom broom tidy +#' @importFrom fixest coeftable +#' @importFrom dplyr rename #' @importFrom data.table setorder #' @export #' @@ -125,7 +126,12 @@ EventStudyPlot <- function(estimates, model_estimates <- estimates$output is_fixest <- class(model_estimates) == "fixest" - model_estimates_tidy <- if(is_fixest) {broom::tidy(model_estimates)} else {estimatr::tidy(model_estimates)} + model_estimates_tidy <- if(is_fixest) { + model_estimates |> + fixest::coeftable() |> + as.data.frame() |> + dplyr::rename(estimate = Estimate, std.error = `Std. Error`) + } else {estimatr::tidy(model_estimates)} static_model <- length(coef(model_estimates)) == 1 if (static_model) { diff --git a/man/EventStudy.Rd b/man/EventStudy.Rd index 93ccee97..0393c986 100644 --- a/man/EventStudy.Rd +++ b/man/EventStudy.Rd @@ -111,7 +111,7 @@ summary(eventstudy_model$output) ### data.frame of estimates estimatr::tidy(eventstudy_model$output) # for kernel='estimatr' -broom::tidy(eventstudy_model$output) # for kernel='fixest' +as.data.frame(fixest::coeftable(eventstudy_model$output)) # for kernel='fixest' ### Access arguments eventstudy_model$arguments From d36ff173cd7fcf6a8d23cc328623de76ee867a40 Mon Sep 17 00:00:00 2001 From: zhizhongpu Date: Thu, 19 Mar 2026 12:46:10 -0600 Subject: [PATCH 59/67] #42 fx coef_table rownames --- R/AddSuptBand.R | 9 ++++++--- R/EventStudyPlot.R | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/R/AddSuptBand.R b/R/AddSuptBand.R index 9040359e..8461bd70 100644 --- a/R/AddSuptBand.R +++ b/R/AddSuptBand.R @@ -80,10 +80,13 @@ AddSuptBand <- function(model_estimates, num_sim = 1000, conf_level = .95, event } df_estimates_tidy <- if(fixest){ - model_estimates |> + coef_table <- model_estimates |> fixest::coeftable() |> - as.data.frame() |> - dplyr::rename(estimate = Estimate, std.error = `Std. Error`) + as.data.frame() + coef_table$term <- rownames(coef_table) + coef_table |> + dplyr::rename(estimate = Estimate, std.error = `Std. Error`) |> + dplyr::select(term, estimate, std.error) } else {estimatr::tidy(model_estimates)} df_estimates_tidy["suptband_lower"] <- df_estimates_tidy$estimate - (critical_value * df_estimates_tidy$std.error) diff --git a/R/EventStudyPlot.R b/R/EventStudyPlot.R index c33f97a3..7b05e07d 100644 --- a/R/EventStudyPlot.R +++ b/R/EventStudyPlot.R @@ -127,10 +127,13 @@ EventStudyPlot <- function(estimates, model_estimates <- estimates$output is_fixest <- class(model_estimates) == "fixest" model_estimates_tidy <- if(is_fixest) { - model_estimates |> + coef_table <- model_estimates |> fixest::coeftable() |> - as.data.frame() |> - dplyr::rename(estimate = Estimate, std.error = `Std. Error`) + as.data.frame() + coef_table$term <- rownames(coef_table) + coef_table |> + dplyr::rename(estimate = Estimate, std.error = `Std. Error`) |> + dplyr::select(term, estimate, std.error) } else {estimatr::tidy(model_estimates)} static_model <- length(coef(model_estimates)) == 1 From fba4b3b3ec87ca0f768da60561e5434729fdd512 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Thu, 19 Mar 2026 13:55:12 -0600 Subject: [PATCH 60/67] #42 cl simplify vignette --- R/EventStudy.R | 2 +- man/EventStudy.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/EventStudy.R b/R/EventStudy.R index 91bd47f9..b9178edc 100644 --- a/R/EventStudy.R +++ b/R/EventStudy.R @@ -66,7 +66,7 @@ #' #' ### data.frame of estimates #' estimatr::tidy(eventstudy_model$output) # for kernel='estimatr' -#' as.data.frame(fixest::coeftable(eventstudy_model$output)) # for kernel='fixest' +#' fixest::coeftable(eventstudy_model$output) # for kernel='fixest' #' #' ### Access arguments #' eventstudy_model$arguments diff --git a/man/EventStudy.Rd b/man/EventStudy.Rd index 0393c986..1ad5b762 100644 --- a/man/EventStudy.Rd +++ b/man/EventStudy.Rd @@ -111,7 +111,7 @@ summary(eventstudy_model$output) ### data.frame of estimates estimatr::tidy(eventstudy_model$output) # for kernel='estimatr' -as.data.frame(fixest::coeftable(eventstudy_model$output)) # for kernel='fixest' +fixest::coeftable(eventstudy_model$output) # for kernel='fixest' ### Access arguments eventstudy_model$arguments From c489fa277dc9363a1e762039d75892b39f5aedb8 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Thu, 19 Mar 2026 14:02:04 -0600 Subject: [PATCH 61/67] #42 cl housekeeping --- NAMESPACE | 2 ++ R/AddSuptBand.R | 3 +-- R/EventStudyPlot.R | 3 +-- man/EventStudy.Rd | 13 +++++++++---- 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index d4eea06e..593b03af 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -19,6 +19,8 @@ importFrom(data.table,setnames) importFrom(data.table,setorder) importFrom(data.table,setorderv) importFrom(data.table,shift) +importFrom(dplyr,rename) +importFrom(fixest,coeftable) importFrom(fixest,feols) importFrom(fixest,fitstat) importFrom(fixest,ssc) diff --git a/R/AddSuptBand.R b/R/AddSuptBand.R index 8461bd70..b19a3089 100644 --- a/R/AddSuptBand.R +++ b/R/AddSuptBand.R @@ -85,8 +85,7 @@ AddSuptBand <- function(model_estimates, num_sim = 1000, conf_level = .95, event as.data.frame() coef_table$term <- rownames(coef_table) coef_table |> - dplyr::rename(estimate = Estimate, std.error = `Std. Error`) |> - dplyr::select(term, estimate, std.error) + dplyr::rename(estimate = Estimate, std.error = `Std. Error`) } else {estimatr::tidy(model_estimates)} df_estimates_tidy["suptband_lower"] <- df_estimates_tidy$estimate - (critical_value * df_estimates_tidy$std.error) diff --git a/R/EventStudyPlot.R b/R/EventStudyPlot.R index 7b05e07d..b2c57f8d 100644 --- a/R/EventStudyPlot.R +++ b/R/EventStudyPlot.R @@ -132,8 +132,7 @@ EventStudyPlot <- function(estimates, as.data.frame() coef_table$term <- rownames(coef_table) coef_table |> - dplyr::rename(estimate = Estimate, std.error = `Std. Error`) |> - dplyr::select(term, estimate, std.error) + dplyr::rename(estimate = Estimate, std.error = `Std. Error`) } else {estimatr::tidy(model_estimates)} static_model <- length(coef(model_estimates)) == 1 diff --git a/man/EventStudy.Rd b/man/EventStudy.Rd index 1ad5b762..779102c6 100644 --- a/man/EventStudy.Rd +++ b/man/EventStudy.Rd @@ -101,7 +101,8 @@ eventstudy_model <- idvar = "id", timevar = "t", pre = 0, post = 3, - normalize = -1 + normalize = -1, + kernel = "fixest" ) ### Access estimated model @@ -131,6 +132,7 @@ eventstudy_model_dyn <- pre = 2, overidpre = 4, normalize = - 3, cluster = TRUE, + kernel = "fixest", anticipation_effects_normalization = TRUE ) @@ -148,7 +150,8 @@ eventstudy_model_static <- FE = TRUE, TFE = TRUE, post = 0, overidpost = 0, pre = 0, overidpre = 0, - cluster = TRUE + cluster = TRUE, + kernel = "fixest" ) summary(eventstudy_model_static$output) @@ -165,7 +168,8 @@ eventstudy_model_unbal <- idvar = "id", timevar = "t", pre = 0, post = 3, - normalize = -1 + normalize = -1, + kernel = "fixest" ) summary(eventstudy_model_unbal$output) @@ -184,7 +188,8 @@ eventstudy_model_iv <- post = 2, overidpost = 1, pre = 0, overidpre = 3, normalize = -1, - cluster = TRUE + cluster = TRUE, + kernel = "fixest" ) summary(eventstudy_model_iv$output) From fda9a788cd37b7e12a4b1dfefb6910c25292e563 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Thu, 19 Mar 2026 14:06:13 -0600 Subject: [PATCH 62/67] #42 fx restore fix for Actions to pass --- R/AddSuptBand.R | 3 ++- R/EventStudyPlot.R | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/R/AddSuptBand.R b/R/AddSuptBand.R index b19a3089..31f8c7e4 100644 --- a/R/AddSuptBand.R +++ b/R/AddSuptBand.R @@ -85,7 +85,8 @@ AddSuptBand <- function(model_estimates, num_sim = 1000, conf_level = .95, event as.data.frame() coef_table$term <- rownames(coef_table) coef_table |> - dplyr::rename(estimate = Estimate, std.error = `Std. Error`) + dplyr::rename(estimate = Estimate, std.error = `Std. Error`) |> + dplyr::select(term, estimate, std.error) } else {estimatr::tidy(model_estimates)} df_estimates_tidy["suptband_lower"] <- df_estimates_tidy$estimate - (critical_value * df_estimates_tidy$std.error) diff --git a/R/EventStudyPlot.R b/R/EventStudyPlot.R index b2c57f8d..c2b5c9f2 100644 --- a/R/EventStudyPlot.R +++ b/R/EventStudyPlot.R @@ -32,7 +32,7 @@ #' @importFrom stats vcov #' @importFrom rlang .data #' @importFrom fixest coeftable -#' @importFrom dplyr rename +#' @importFrom dplyr rename select #' @importFrom data.table setorder #' @export #' @@ -132,7 +132,8 @@ EventStudyPlot <- function(estimates, as.data.frame() coef_table$term <- rownames(coef_table) coef_table |> - dplyr::rename(estimate = Estimate, std.error = `Std. Error`) + dplyr::rename(estimate = Estimate, std.error = `Std. Error`) |> + dplyr::select(term, estimate, std.error) } else {estimatr::tidy(model_estimates)} static_model <- length(coef(model_estimates)) == 1 From 82a5f067fdf2771fbf5627f0ba202e839883fd34 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Thu, 19 Mar 2026 14:10:28 -0600 Subject: [PATCH 63/67] #42 bd update metadata --- NAMESPACE | 1 + R/EventStudy.R | 2 +- man/EventStudy.Rd | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 593b03af..d3bbc0ab 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -20,6 +20,7 @@ importFrom(data.table,setorder) importFrom(data.table,setorderv) importFrom(data.table,shift) importFrom(dplyr,rename) +importFrom(dplyr,select) importFrom(fixest,coeftable) importFrom(fixest,feols) importFrom(fixest,fitstat) diff --git a/R/EventStudy.R b/R/EventStudy.R index b9178edc..40db5919 100644 --- a/R/EventStudy.R +++ b/R/EventStudy.R @@ -65,7 +65,7 @@ #' summary(eventstudy_model$output) #' #' ### data.frame of estimates -#' estimatr::tidy(eventstudy_model$output) # for kernel='estimatr' +#' # estimatr::tidy(eventstudy_model$output) # for kernel='estimatr' #' fixest::coeftable(eventstudy_model$output) # for kernel='fixest' #' #' ### Access arguments diff --git a/man/EventStudy.Rd b/man/EventStudy.Rd index 779102c6..d0fc80cf 100644 --- a/man/EventStudy.Rd +++ b/man/EventStudy.Rd @@ -111,7 +111,7 @@ eventstudy_model$output summary(eventstudy_model$output) ### data.frame of estimates -estimatr::tidy(eventstudy_model$output) # for kernel='estimatr' +# estimatr::tidy(eventstudy_model$output) # for kernel='estimatr' fixest::coeftable(eventstudy_model$output) # for kernel='fixest' ### Access arguments From 57d80b5f2b59f5041ca997872fdcceaca4302b64 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Thu, 19 Mar 2026 14:23:57 -0600 Subject: [PATCH 64/67] #42 refresh --- R/EventStudy.R | 2 +- man/EventStudy.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/EventStudy.R b/R/EventStudy.R index 40db5919..c8811b9b 100644 --- a/R/EventStudy.R +++ b/R/EventStudy.R @@ -65,8 +65,8 @@ #' summary(eventstudy_model$output) #' #' ### data.frame of estimates -#' # estimatr::tidy(eventstudy_model$output) # for kernel='estimatr' #' fixest::coeftable(eventstudy_model$output) # for kernel='fixest' +#' # estimatr::tidy(eventstudy_model$output) # for kernel='estimatr' #' #' ### Access arguments #' eventstudy_model$arguments diff --git a/man/EventStudy.Rd b/man/EventStudy.Rd index d0fc80cf..70a9da5e 100644 --- a/man/EventStudy.Rd +++ b/man/EventStudy.Rd @@ -111,8 +111,8 @@ eventstudy_model$output summary(eventstudy_model$output) ### data.frame of estimates -# estimatr::tidy(eventstudy_model$output) # for kernel='estimatr' fixest::coeftable(eventstudy_model$output) # for kernel='fixest' +# estimatr::tidy(eventstudy_model$output) # for kernel='estimatr' ### Access arguments eventstudy_model$arguments From ff1a216bc33d2ad56a23656c591d80eb1b16f846 Mon Sep 17 00:00:00 2001 From: Santiago Hermo Date: Fri, 20 Mar 2026 08:43:11 +1100 Subject: [PATCH 65/67] increase version number #42 --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index c6a339ce..62a6900d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: eventstudyr Title: Estimation and Visualization of Linear Panel Event Studies -Version: 1.1.5 +Version: 1.2.0 Authors@R: c(person(given = "Simon", family = "Freyaldenhoven", From 7f05a31ae006d14b4cec1f52892132e889b0954a Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Mon, 23 Mar 2026 13:46:05 -0400 Subject: [PATCH 66/67] #42 doc remove crossreferences to unexported functions --- R/EventStudyOLS.R | 2 +- R/PrepareModelFormula.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/EventStudyOLS.R b/R/EventStudyOLS.R index d19eeac5..c583f2d8 100644 --- a/R/EventStudyOLS.R +++ b/R/EventStudyOLS.R @@ -1,6 +1,6 @@ #' Runs Ordinary Least Squares (OLS) with optional fixed effects and clustering #' -#' @param prepared_model_formula A formula object created in [PrepareModelFormula()] that is passed to [EventStudy()]. +#' @param prepared_model_formula A formula object created in `PrepareModelFormula()` that is passed to [EventStudy()]. #' @param prepared_data Data frame containing all of the parameters required for [EventStudy()] plus leads and #' lags of the first differenced policy variable and leads and lags of the policy variable. #' @param idvar Character indicating column of units. diff --git a/R/PrepareModelFormula.R b/R/PrepareModelFormula.R index 37c6016c..65f69d29 100644 --- a/R/PrepareModelFormula.R +++ b/R/PrepareModelFormula.R @@ -1,5 +1,5 @@ -#' Prepares a formula object for use in [EventStudyOLS()] or [EventStudyFHS()] +#' Prepares a formula object for use in `EventStudyOLS()` or `EventStudyFHS()` #' @param estimator Accepts one of "OLS" or "FHS". If "FHS" is specified, implements IV estimator in Freyaldenhoven et al. 2019. #' @param outcomevar Character indicating column of outcome variable. From a8e0e79cffc47e604e33fa6b0710368544496680 Mon Sep 17 00:00:00 2001 From: zhizhongpu <84325421+zhizhongpu@users.noreply.github.com> Date: Mon, 23 Mar 2026 13:46:42 -0400 Subject: [PATCH 67/67] #42 drop issue/ --- issue/eyeball_fhs.r | 78 --------------------------------------------- issue/eyeball_ols.r | 78 --------------------------------------------- 2 files changed, 156 deletions(-) delete mode 100644 issue/eyeball_fhs.r delete mode 100644 issue/eyeball_ols.r diff --git a/issue/eyeball_fhs.r b/issue/eyeball_fhs.r deleted file mode 100644 index 3f4f14b5..00000000 --- a/issue/eyeball_fhs.r +++ /dev/null @@ -1,78 +0,0 @@ -library(haven) -library(data.table) -library(tidyverse) -devtools::load_all() - -indir <- 'examples/source/raw/eventstudy_illustration_data/orig' -data <- read_dta(sprintf('%s/simulation_data_dynamic.dta', indir)) |> as.data.table() -outcomevar <- "y_base" -policyvar <- "z" -idvar <- "id" -timevar <- "t" -controls <- "x_r" -proxy <- "eta_m" -proxyIV <- NULL -FE <- TRUE -TFE <- TRUE -post <- 2 -pre <- 2 -overidpost <- 1 -overidpre <- post + pre -normalize <- -1 * (pre + 1) -cluster <- TRUE -anticipation_effects_normalization <- TRUE -allow_duplicate_id <- FALSE -avoid_internal_copy <- FALSE - - -output_feols_fhs <- EventStudy( - estimator = 'FHS', - kernel = 'fixest', - data, - outcomevar, - policyvar, - idvar, - timevar, - controls = controls, - proxy = proxy, - proxyIV = proxyIV, - FE = FE, - TFE = TFE, - post, - overidpost = overidpost, - pre = pre, - overidpre = post + pre, - normalize = -1 * (pre + 1), - cluster = cluster, - anticipation_effects_normalization = anticipation_effects_normalization, - allow_duplicate_id = allow_duplicate_id, - avoid_internal_copy = avoid_internal_copy -) - -output_fhs <- EventStudy( - estimator = 'FHS', - kernel = 'estimatr', - data, - outcomevar, - policyvar, - idvar, - timevar, - controls = controls, - proxy = proxy, - proxyIV = proxyIV, - FE = FE, - TFE = TFE, - post, - overidpost = overidpost, - pre = pre, - overidpre = post + pre, - normalize = -1 * (pre + 1), - cluster = cluster, - anticipation_effects_normalization = anticipation_effects_normalization, - allow_duplicate_id = allow_duplicate_id, - avoid_internal_copy = avoid_internal_copy -) - - -EventStudyPlot(output_fhs) -EventStudyPlot(output_feols_fhs) diff --git a/issue/eyeball_ols.r b/issue/eyeball_ols.r deleted file mode 100644 index 662e923c..00000000 --- a/issue/eyeball_ols.r +++ /dev/null @@ -1,78 +0,0 @@ -library(haven) -library(data.table) -library(tidyverse) -devtools::load_all() - -indir <- 'examples/source/raw/eventstudy_illustration_data/orig' -data <- read_dta(sprintf('%s/simulation_data_dynamic.dta', indir)) |> as.data.table() -estimator <- "OLS" -outcomevar <- "y_base" -policyvar <- "z" -idvar <- "id" -timevar <- "t" -controls <- NULL -proxy <- NULL -proxyIV <- NULL -FE <- TRUE -TFE <- TRUE -post <- 2 -pre <- 2 -overidpost <- 1 -overidpre <- post + pre -normalize <- -1 * (pre + 1) -cluster <- TRUE -anticipation_effects_normalization <- TRUE -allow_duplicate_id <- FALSE -avoid_internal_copy <- FALSE -kernel <- "estimatr" - - -estimates_feols <- EventStudy( - estimator = 'OLS', - kernel = 'fixest', - data, - outcomevar, - policyvar, - idvar, - timevar, - controls = NULL, - proxy = NULL, - proxyIV = NULL, - FE = TRUE, - TFE = TRUE, - post, - overidpost = 1, - pre, overidpre = post + pre, - normalize = -1 * (pre + 1), - cluster = TRUE, - anticipation_effects_normalization = TRUE, - allow_duplicate_id = FALSE, - avoid_internal_copy = FALSE -) - -estimates <- EventStudy( - estimator = 'OLS', - kernel = 'estimatr', - data, - outcomevar, - policyvar, - idvar, - timevar, - controls = NULL, - proxy = NULL, - proxyIV = NULL, - FE = TRUE, - TFE = TRUE, - post, - overidpost = 1, - pre, overidpre = post + pre, - normalize = -1 * (pre + 1), - cluster = TRUE, - anticipation_effects_normalization = TRUE, - allow_duplicate_id = FALSE, - avoid_internal_copy = FALSE -) - - -EventStudyPlot(estimates) -EventStudyPlot(estimates_feols)