-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathEvent_study_example_simulated.R
More file actions
138 lines (110 loc) · 4.88 KB
/
Event_study_example_simulated.R
File metadata and controls
138 lines (110 loc) · 4.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# Intro -------------------------------------------------------------------
rm(list=ls()) # Clears workspace
# install.packages("renv") # Install/call libraries
# renv::init()
PKG<-c("fixest","tidyverse","did")
for (p in PKG) {
if(!require(p,character.only = TRUE)) {
install.packages(p)
require(p,character.only = TRUE)}
}
renv::snapshot()
rm(p,PKG)
set.seed(123) # Seed for reproducibility
# Simulate Panel Data -----------------------------------------------------
n<-50000 # Number of units
t<-20 # Time periods
beta<-2 # True treatment effect
panel<-expand.grid(id = 1:n, time = 1:t) # Create panel structure
panel<-panel %>% # Assign treatment time randomly for each unit (some never treated)
group_by(id) %>%
mutate(treat_time = sample(c(sample(5:15, 1), NA), 1)) %>%
ungroup()
panel <- panel %>%
mutate(
treated = ifelse(!is.na(treat_time) & time >= treat_time, 1, 0),
event_time = ifelse(!is.na(treat_time), time - treat_time, NA),
unit_fe = rep(rnorm(n), times = t),
time_fe = rep(rnorm(t), each = n)
)
panel <- panel %>%
mutate(
post_treat_time = ifelse(event_time >= 0, event_time, NA),
treated_effect = ifelse(event_time >= 0, beta - 0.1 * event_time, 0) # Linear decay
)
# Generate outcome based on treatment and fixed effects
panel<-panel %>%
mutate(
epsilon = rnorm(n * t), # Random error
y = 3 + treated_effect + unit_fe + time_fe + epsilon # Outcome equation
)
# Event Study Model -------------------------------------------------------
# model_clustered<-feols(y ~ i(event_time, ref = -1) | id + time, data = panel, cluster = ~id) - note, fixed effects models of have issues with small and large sample sizes (seemingly only with unit fixed effects?)
# # Issue in large samples seems to be associated with differences in unit fixed effects at different event times
# panel %>%
# group_by(event_time) %>%
# summarize(mean_unit_fe = mean(unit_fe, na.rm = TRUE)) %>%
# ggplot(aes(x = event_time, y = mean_unit_fe)) +
# geom_line()
model1<-feols(y ~ i(event_time, ref = -1) | time, data = panel) # Standard event study
#model2 <- feols(y ~ i(event_time, ref = -1) + post_treat_time | time, data = panel) # Event study with linear decay
model3 <- feols(y ~ i(event_time, ref = -1) + i(event_time, ref = -1):treated | time, data = panel) # Distributed Lag Model
plot_results <- function(model, title) {
coef_df <- data.frame(
event_time = as.numeric(sub("event_time::", "", names(coef(model)))),
estimate = coef(model),
conf.low = confint(model)[,1],
conf.high = confint(model)[,2]
)
post_treatment <- coef_df %>% filter(event_time >= 0)
# Compute true expected effect based on the DGP
post_treatment <- post_treatment %>%
mutate(true_effect = 2 - 0.1 * event_time) # This is our known data-generating function
ggplot(post_treatment, aes(x = event_time, y = estimate)) +
geom_point() +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.2) +
geom_line(aes(y = true_effect), color = "red", linetype = "dashed", size = 1) + # True effect line
geom_hline(yintercept = 0, linetype = "dotted") +
labs(title = title, x = "Event Time", y = "Estimated Effect") +
theme_minimal()
}
# Show plots with corrected true effect
plot_results(model1, "Standard Event Study (Time FE)")
#plot_results(model2, "Event Study with Linear Decay")
plot_results(model3, "Distributed Lag Model (DLM)")
# Impact calculation ------------------------------------------------------
coef_df <- data.frame( # Extract estimated event study coefficients
event_time = as.numeric(sub("event_time::", "", names(coef(model1)))), # Adjust for correct model
estimate = coef(model3)
)
panel <- panel %>% # Merge the estimated effects into the panel dataset
left_join(coef_df, by = "event_time") %>%
mutate(
estimate = ifelse(event_time >= 0, estimate, 0) # Only apply to post-treatment periods
)
# panel$estimate<-ifelse(panel$estimate<0,0,panel$estimate) # If a linear model, will predict negative values. Use DLM to avoid this.
panel %>% # Cumulative impact by id
group_by(id) %>%
summarise(treat = mode(treated), impact = sum(estimate))
panel %>% # Cumulative impact
summarise(impact = sum(estimate, na.rm = TRUE))
# DID (Callaway and Sant'Anna) --------------------------------------------
# The did package expects a panel dataset with:
# - id column
# - time column
# - treatment variable (0/1)
# - outcome variable
# - a variable specifying when treatment occurs (never-treated units get NA)
# panel <- panel %>%
# mutate(
# D = ifelse(!is.na(treat_time) & time >= treat_time, 1, 0) # Treatment status at each period
# )
#
# att_est <- att_gt(
# yname = "y", # Outcome variable
# tname = "time", # Time variable
# idname = "id", # Panel identifier
# gname = "treat_time", # First treatment time
# data = panel,
# est_method = "dr" # Doubly robust estimation
# )