diff --git a/.Rbuildignore b/.Rbuildignore index 6b79329..614ea69 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -11,3 +11,5 @@ ^pkgdown$ ^\.github$ ^temporary-scripts$ +\.Rmd\.orig$ +^vignettes/precompile\.R$ diff --git a/DESCRIPTION b/DESCRIPTION index 86ee1a5..af6be4a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -62,7 +62,6 @@ Remotes: UrbanInstitute/urbnthemes URL: https://ui-research.github.io/climateapi/ Suggests: - crosswalk, knitr, qualtRics, rmarkdown, diff --git a/vignettes/economic_recovery_factsheet.Rmd b/vignettes/economic_recovery_factsheet.Rmd index 390172f..a038ccd 100644 --- a/vignettes/economic_recovery_factsheet.Rmd +++ b/vignettes/economic_recovery_factsheet.Rmd @@ -1,699 +1,741 @@ ---- -title: "Economic Recovery Post Disaster" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Economic Recovery Post Disaster} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>", - warning = FALSE, - message = FALSE, - fig.width = 8, - fig.height = 6, - dpi = 1000, - eval = TRUE, - echo = TRUE) - -options(scipen = 999) -``` - - -## Overview - -Because post-event data for the affected area will not be available for months or years, this approach identifies historical comparison—counties that experienced similar disasters and had similar pre-disaster characteristics—in the past—to highlight various recovery trajectories. - -Contents: - -1. The affected county's pre-disaster economic baseline -2. Employment, business, and local government finance trajectories using an event-study framework -4. Typical PA/IHP/SBA flows from federal programs (post-disaster period only) - -## Notes - -Critical to this approach is an observable pre-period, especially for the disaster-impacted county (which has no post-period data). But because of data lags, the pre-period will never run up through 2026 (current year at time of writing)--at best, it will go through ~2024 (ACS) or in most cases only 2023 (County Business Patterns, government finances). - -Accordingly, we want our pre-period to be long--say, 8 years preceding the disaster, if possible--so that we have multiple years of observation, even for the disaster-affected county. But for our comparison counties, we are hemmed in on the other side--we want to have post-period observations for these, so the comparison disaster ideally occurs between 2018-2020, giving us 3-5 post-period years. However, this requires a pre-period dating back to as early as 2010... which is not covered in many datasets (e.g., ACS). Thus, we have a tension in identifying overlapping pre-period timelines for our comparison and affected counties due to historical data coverage. - -## Setup - -```{r setup} -library(climateapi) -library(tidyverse) -library(urbnthemes) -library(crosswalk) - -set_urbn_defaults(style = "print") -``` - -## Parameters - -Define the affected county and disaster characteristics. These would be updated for each new event. -```{r parameters} -# Affected county -affected_county_fips <- "12087" -affected_county_name <- "Monroe County, FL" -affected_state <- "FL" - -# Disaster characteristics -disaster_type <- "Hurricane" -disaster_year <- 2026 - -# Event-study window -years_before <- 6 -years_after <- 4 -``` - -### Preceding Disasters -```{r} -disasters = get_fema_disaster_declarations(api = FALSE) -disasters_affected = disasters %>% - filter(GEOID %in% affected_county_fips, incidents_natural_hazard > 0) -``` - ---- - -# Section 1: The Affected County Baseline Profile - -Establish the pre-disaster economic conditions of the affected county. - -## Sociodemographic Characteristics - -```{r} -acs_df_2023 = arrow::read_parquet(file.path(get_box_path(), "sociodemographics", "acs", "acs_county_2023.parquet")) - -acs_matching_variables = c( - "median_household_income_universe_allraces", - "race_personofcolor_percent", - "population_density_land_sq_kilometer", - "total_population_universe", - "educational_attainment_degree_bachelors_percent") -``` - - -## Hazard Damages over Time - -```{r} -sheldus_df = get_sheldus() %>% - summarize(.by = c(GEOID, year), damage_property_millions = sum(damage_property, na.rm = TRUE) / 1000000) - -sheldus_df %>% - filter(GEOID %in% affected_county_fips) %>% - ggplot(aes(x = year, y = damage_property_millions)) + - geom_line() + - geom_point() + - scale_y_continuous(labels = scales::dollar_format(suffix = "M")) + - labs(x = "Year", y = "Property damage (millions)") -``` - - -## Industry Composition over Time - -Identify sectors that may be particularly vulnerable to this disaster type. - -```{r baseline-industry} -cbp_df = cache_it( - cbp_df, - file_name = "cbp_2016_2023", - path = file.path(get_box_path(), "employment"), read = TRUE) - -cbp_affected = cbp_df %>% - mutate( - county_fips = str_c(state, county), - industry_label = industry %>% str_replace_all("_", " ") %>% str_to_sentence()) %>% - filter(county_fips %in% affected_county_fips) - -top_8_industries = cbp_affected %>% - filter(year == 2023, industry != "total") %>% - arrange(desc(employees)) %>% - slice(1:8) %>% - pull(industry) - -cbp_prior_disaster_years = disasters_affected %>% - filter(year_declared %in% (cbp_df$year %>% unique())) %>% - distinct(year_declared) - -cbp_affected %>% - filter(employees > 0, industry %in% top_8_industries) %>% - arrange(desc(employees)) %>% - ggplot(aes(x = year, y = employees, color = industry_label, group = industry_label)) + - geom_line() + - geom_vline(data = cbp_prior_disaster_years, color = "grey", linetype = "dashed", aes(xintercept = year_declared)) + - scale_y_continuous(labels = scales::comma) + - scale_x_continuous(n.breaks = length((cbp_df$year %>% unique()))) + - labs(y = "Employees (n)", x = "") -``` - -## Small Businesses over Time - -Identify smaller businesses, which may be more vulnerable to disaster-related economic shocks. - -```{r} -total_employers = cbp_affected %>% - filter(industry == "total", employee_size_range_label == "All establishments") %>% - slice_max(year) %>% - pull(employers) - -cbp_affected %>% - tibble::as_tibble() %>% - filter(employee_size_range_label != "All establishments") %>% - mutate( - employee_size_range_label = case_when( - employee_size_range_label %in% c("<100-249 employees", "250-499 employees") ~ "100-499 employees", - employee_size_range_label %in% c("500-999 employees", "1000+") ~ "500+ employees", - TRUE ~ employee_size_range_label), - employee_size_range = factor( - employee_size_range_label, - levels = c( - "1-4 employees", "<5 employees", "5-9 employees", "10-19 employees", "20-49 employees", - "50-99 employees", "100-499 employees", "500+ employees"), - ordered = TRUE), - industry_label = factor( - industry_label, - levels = count(., industry_label, sort = TRUE) %>% pull(industry_label), - ordered = TRUE)) %>% - mutate(.by = industry, employer_total = sum(employers)) %>% - filter(employer_total > (.05 * total_employers)) %>% - ggplot(aes(x = year)) + - geom_area(aes(y = employers, fill = employee_size_range)) + - facet_wrap(~ reorder(industry_label, -employer_total), ncol = 4) + - labs(x = "Year", y = "Employers", title = "Employers by Number of Employees, by Industry") + - scale_y_continuous(labels = scales::comma) -``` - -## County Fiscal Capacity Over Time - -```{r baseline-fiscal} -county_expenses = get_government_finances() -finances_years = county_expenses %>% pull(year) %>% unique() - -finances_prior_disaster_years = disasters_affected %>% - filter(year_declared %in% finances_years) %>% - distinct(year_declared) - -county_expenses %>% - filter(GEOID %in% affected_county_fips) %>% - mutate(across(matches("revenue|expenditure"), ~ .x / 1000)) %>% - pivot_longer(-c(year, GEOID, county_name)) %>% - filter(name %in% c("revenue_total", "expenditure_total")) %>% - mutate(name = if_else(str_detect(name, "expenditure"), "Expenditures", "Revenues")) %>% - inflation_adjust(year_variable = "year", dollar_variables = "value", base_year = 2024, names_suffix = "_adjusted") %>% - ggplot(aes(x = year, y = value, color = name, group = name)) + - geom_line() + - geom_point() + - geom_vline(data = finances_prior_disaster_years, color = "grey", linetype = "dashed", aes(xintercept = year_declared)) + - scale_y_continuous(labels = scales::dollar_format(suffix = "M")) + - labs(x = "Year", y = "USD (Millions)") -``` - ---- - -# Section 2: Selecting Comparison Counties - -Identify historical analogs: counties that experienced similar disasters 5+ years ago. - -## Assemble Matching Dataset - -Build a county-level dataset with all variables needed for matching. - -```{r matching-disasters} -# Disaster history: count and binary indicators by hazard type in past 5 years -disaster_lookback_years <- 5 -reference_year <- disaster_year - 1 # Use year before disaster for matching - -disaster_history <- disasters %>% - filter( - year_declared >= (reference_year - disaster_lookback_years), - year_declared <= reference_year) %>% - tidytable::summarise( - .by = GEOID, - n_disasters_prior_5yr = sum(incidents_natural_hazard, na.rm = TRUE), - fire_prior_5yr = as.integer(sum(incidents_fire, na.rm = TRUE) > 0), - flood_prior_5yr = as.integer(sum(incidents_flood, na.rm = TRUE) > 0), - hurricane_prior_5yr = as.integer(sum(incidents_hurricane, na.rm = TRUE) > 0), - severe_storm_prior_5yr = as.integer(sum(incidents_severe_storm, na.rm = TRUE) > 0), - tornado_prior_5yr = as.integer(sum(incidents_tornado, na.rm = TRUE) > 0), - winter_storm_prior_5yr = as.integer(sum(incidents_winter_storm, na.rm = TRUE) > 0), - drought_prior_5yr = as.integer(sum(incidents_drought, na.rm = TRUE) > 0)) %>% - as_tibble() -``` - -```{r matching-sheldus} -sheldus_reference_year = if_else(reference_year < 2023, reference_year, 2023) - -sheldus_matching = sheldus_df %>% - filter(year == sheldus_reference_year) -``` - -```{r matching-industry} -# Industry employment shares from County Business Patterns -# Use 2-digit NAICS codes for broad sector shares -cbp_reference_year = if_else(reference_year < 2023, reference_year, 2023) - -cbp_matching <- cbp_df %>% - ## filter out the employers by employee size records - filter(year == cbp_reference_year, employees > 0) %>% - mutate(GEOID = str_c(state, county)) %>% - select(GEOID, industry, employees) %>% - as_tibble() - -# Calculate total employment per county -cbp_totals <- cbp_matching %>% - filter(industry == "total") %>% - select(GEOID, total_employees = employees) - -# Calculate employment share by industry -industry_shares <- cbp_matching %>% - filter(industry != "total") %>% - tidylog::left_join(cbp_totals, by = "GEOID") %>% - arrange(GEOID) %>% - mutate( - share_employees_ = employees / total_employees, - industry_var = str_c("share_employees_", industry)) %>% - select(GEOID, industry_var, share_employees_) %>% - pivot_wider(names_from = industry_var, values_from = share_employees_, values_fill = 0) - -# Add total employment -## NOTE: the total employee count is always greater than or equal to -## the sum of individual industries' employment countes because CBP omits -## some industry categories -industry_matching <- industry_shares %>% - left_join(cbp_totals, by = "GEOID") -``` - -```{r matching-nfip} -# NFIP residential coverage rate by county -nfip_matching <- get_nfip_residential_penetration() %>% - mutate(share_residential_structures_sfha = residential_structures_sfha / residential_structures) %>% - select(GEOID, share_residential_structures_sfha, penetration_rate_sfha) -``` - -```{r matching-fiscal} -finance_reference_year = if_else(reference_year < 2023, reference_year, 2022) - -# Total county government expenses -county_finances_matching = county_expenses %>% - filter(year == finance_reference_year) -``` - -```{r matching-acs} -# Sociodemographic characteristics from ACS -acs_matching <- acs_df_2023 %>% - select(GEOID, all_of(acs_matching_variables)) -``` - -```{r matching-combine} -# Combine all matching variables into single dataset -matching_data <- acs_matching %>% - left_join(disaster_history, by = "GEOID") %>% - left_join(sheldus_matching, by = "GEOID") %>% - left_join(industry_matching, by = "GEOID") %>% - left_join(nfip_matching, by = "GEOID") %>% - left_join(county_finances_matching, by = "GEOID") %>% - # Replace NAs with 0 for disaster counts (counties with no disasters) - mutate(across(ends_with("_prior_5yr"), ~ replace_na(.x, 0))) %>% - # Drop counties with missing core variables - filter(!is.na(total_population_universe), !is.na(total_employees)) -``` - -## Select Comparison Counties - -```{r comparison-selection} -# Hard filter: same disaster type, disaster occurred 5+ years ago -comparison_pool <- disasters %>% - filter( - year_declared <= 2021, # Enough post-period - year_declared >= 2018) %>% - slice(.by = GEOID, 1) %>% - distinct(GEOID, year_declared) %>% - rename(comparison_disaster_year = year_declared) - -# Get affected county's characteristics -affected_county_chars <- matching_data %>% - filter(GEOID == affected_county_fips) - -# Join matching data -comparison_candidates <- comparison_pool %>% - inner_join(matching_data, by = "GEOID") %>% - filter( - total_population_universe > affected_county_chars$total_population_universe * .75, - total_population_universe < affected_county_chars$total_population_universe * 1.25, - median_household_income_universe_allraces > affected_county_chars$median_household_income_universe_allraces * .75, - median_household_income_universe_allraces < affected_county_chars$median_household_income_universe_allraces * 1.25, - GEOID != affected_county_fips) # Exclude affected county -``` - -```{r comparison-distance} -# Calculate Mahalanobis distance to affected county - -# Select numeric variables for distance calculation -matching_vars <- c( - acs_matching_variables, - "n_disasters_prior_5yr", - "total_employees", - "penetration_rate_sfha", - "revenue_total", - "expenditure_total", - "damage_property_millions") - -# Prepare matrices -matrix_candidates <- comparison_candidates %>% - select(all_of(matching_vars)) %>% - as.matrix() - -matrix_affected <- affected_county_chars %>% - select(all_of(matching_vars)) %>% - as.matrix() - -# Calculate covariance matrix and Mahalanobis distance -covariance_matrix <- cov(matrix_candidates, use = "pairwise.complete.obs") -distances <- mahalanobis(matrix_candidates, center = matrix_affected, cov = covariance_matrix, tol=1e-30) - -# Select top k nearest neighbors -k_neighbors <- 10 - -comparison_counties <- comparison_candidates %>% - mutate(mahalanobis_distance = distances) %>% - slice_min(mahalanobis_distance, n = k_neighbors) %>% - select(GEOID, comparison_disaster_year, mahalanobis_distance) %>% - left_join(tidycensus::fips_codes %>% transmute(county, GEOID = str_c(state_code, county_code))) %>% - slice(1:5) -``` - ---- - -# Section 3: Event-Study Data Preparation - -Align all outcome datasets to event time (t=0 is disaster year). - -```{r event-study-reference} -# Create reference table: affected county + comparison counties with disaster years -county_reference <- bind_rows( - # Affected county - tibble( - GEOID = affected_county_fips, - disaster_year_event = disaster_year, - county_type = "affected"), - # Comparison counties - comparison_counties %>% - transmute( - GEOID, - disaster_year_event = comparison_disaster_year, - county_type = "comparison")) - -# Define event window -event_window <- c(-5, 4) -``` - -```{r event-study-employment} -# Align to event time -cbp_event_aligned <- cbp_df %>% - mutate(GEOID = str_c(state, county)) %>% - rename(calendar_year = year) %>% - inner_join(county_reference, by = "GEOID") %>% - mutate(event_time = calendar_year - disaster_year_event) %>% - filter(event_time >= event_window[1], event_time <= event_window[2]) %>% - select(GEOID, county_type, disaster_year_event, calendar_year, event_time, - industry, employees, employers, annual_payroll) -``` - -```{r event-study-fiscal} -# Align to event time -fiscal_event_aligned <- county_expenses %>% - rename(calendar_year = year) %>% - inner_join(county_reference, by = "GEOID") %>% - mutate(event_time = calendar_year - disaster_year_event) %>% - filter(event_time >= event_window[1], event_time <= event_window[2]) %>% - select(GEOID, county_type, disaster_year_event, calendar_year, event_time, - expenditure_total, revenue_total) -``` - -```{r event-study-sba} -# SBA disaster loans -sba_raw <- get_sba_loans() - -sba_crosswalk = get_crosswalk( - source_geography = "zcta", - target_geography = "county") - -sba_simple = sba_raw %>% - select( - source_geoid = damaged_property_zip_code, - sba_loan_amount = approved_amount_total, - loan_type, - fiscal_year) - -warning("The crosswalking is imperfect and fiscal and calendar years are not aligned.") -sba_county = sba_simple %>% - tidylog::left_join(sba_crosswalk$crosswalks$step_1, by = "source_geoid") %>% - mutate(target_geoid = str_c(state_fips, target_geoid)) %>% - tidytable::summarize( - .by = c(target_geoid, loan_type, fiscal_year), - sba_loan_amount = sum(sba_loan_amount * allocation_factor_source_to_target)) %>% - filter(!is.na(target_geoid)) %>% - mutate( - GEOID = target_geoid, - calendar_year = fiscal_year %>% as.numeric) %>% - pivot_wider(names_from = loan_type, values_from = sba_loan_amount) %>% - rename(business_loan = business, residential_loan = residential) %>% - mutate(across(matches("loan"), ~ if_else(is.na(.x), 0, .x))) - -# Align to event time -sba_event_aligned <- sba_county %>% - inner_join(county_reference, by = "GEOID") %>% - mutate(event_time = calendar_year - disaster_year_event) %>% - filter(event_time >= event_window[1], event_time <= event_window[2]) %>% - select(GEOID, county_type, disaster_year_event, calendar_year, event_time, - matches("loan")) -``` - -```{r event-study-pa} -# FEMA Public Assistance -pa_raw <- get_public_assistance() - -# Aggregate to county-year level -pa_county_year <- pa_raw %>% - mutate( - GEOID = county_fips, - calendar_year = declaration_year) %>% - summarise( - .by = c(GEOID, calendar_year), - across(.cols = matches("split"), ~ sum(.x, na.rm = TRUE))) - -# Align to event time -pa_event_aligned <- pa_county_year %>% - inner_join(county_reference, by = "GEOID") %>% - mutate(event_time = calendar_year - disaster_year_event) %>% - filter(event_time >= event_window[1], event_time <= event_window[2]) %>% - select(GEOID, county_type, disaster_year_event, calendar_year, event_time, - pa_federal_funding_obligated_split) -``` - -```{r event-study-ihp} -# FEMA Individual and Households Program -# ihp_raw <- get_ihp_registrations() - -# # Aggregate to county-year level -# ihp_county_year <- ihp_raw %>% -# mutate(calendar_year = lubridate::year(declaration_date)) %>% -# summarise( -# .by = c(GEOID, calendar_year), -# ihp_registrations_n = n(), -# ihp_approved_n = sum(ihp_eligible, na.rm = TRUE), -# ihp_amount_total = sum(ihp_amount, na.rm = TRUE), -# ihp_ha_amount = sum(ha_amount, na.rm = TRUE), -# ihp_ona_amount = sum(ona_amount, na.rm = TRUE)) - -# # Align to event time -# ihp_event_aligned <- ihp_county_year %>% -# inner_join(county_reference, by = "GEOID") %>% -# mutate(event_time = calendar_year - disaster_year_event) %>% -# filter(event_time >= event_window[1], event_time <= event_window[2]) %>% -# select(GEOID, county_type, disaster_year_event, calendar_year, event_time, -# ihp_registrations_n, ihp_approved_n, ihp_amount_total, ihp_ha_amount, ihp_ona_amount) -``` - ---- - -# Section 4: Employment Trajectories (Event-Study Style) - -Plot employment trends with time relative to disaster (t=0). - -- **Affected county**: t-6 through t=0 (present) -- **Comparison counties**: full t-6 through t+6 window, aligned to their disaster year - -```{r employment-total} -# Total employment over event time -cbp_event_aligned %>% - filter(industry == "total", employees > 0) %>% - tibble::as_tibble() %>% - ggplot(aes(x = event_time, y = employees, group = GEOID, color = county_type, linetype = county_type)) + - geom_line() + - geom_point() + - ylim(c(0, NA)) + - geom_vline(xintercept = 0, linetype = "dashed", color = "grey40") + - scale_y_continuous(labels = scales::comma) + - scale_color_manual(values = c("affected" = "#1696d2", "comparison" = "#d2d2d2")) + - labs( - x = "Years Relative to Disaster (t=0)", - y = "Total Employees", - title = "Employment Trajectory: Affected vs. Comparison Counties") + - theme(legend.position = "none") -``` - ---- - -# Section 5: Local Government Fiscal Trajectories (Event-Study Style) - -Examine how local government finances evolved in comparison counties post-disaster. - -```{r fiscal-expenses} -# Total county expenses over event time -fiscal_event_aligned %>% - mutate(county_type = if_else(str_detect(county_type, "affected"), "Impacted", "Comparison")) %>% - ggplot(aes(x = event_time, y = expenditure_total / 1000, group = GEOID, color = county_type)) + - geom_line() + - geom_point() + - geom_vline(xintercept = 0, linetype = "dashed", color = "grey40") + - scale_y_continuous(labels = scales::dollar_format(suffix = "M")) + - ylim(c(0, NA)) + - scale_color_manual(values = c("Impacted" = "#1696d2", "Comparison" = "#d2d2d2")) + - labs( - x = "Years Relative to Disaster (t=0)", - y = "Total County Expenses (Millions)", - title = "County Government Expenses: Affected vs. Comparison Counties") + - theme(legend.position = "none") -``` - -```{r fiscal-revenue} -# Total county expenses over event time -fiscal_event_aligned %>% - mutate(county_type = if_else(str_detect(county_type, "affected"), "Impacted", "Comparison")) %>% - ggplot(aes(x = event_time, y = revenue_total / 1000, group = GEOID, color = county_type)) + - geom_line() + - geom_point() + - geom_vline(xintercept = 0, linetype = "dashed", color = "grey40") + - scale_y_continuous(labels = scales::dollar_format(suffix = "M")) + - scale_color_manual(values = c("Impacted" = "#1696d2", "Comparison" = "#d2d2d2")) + - labs( - x = "Years Relative to Disaster (t=0)", - y = "Total County Revenues (Millions)", - title = "County Government Revenues: Affected vs. Comparison Counties") + - theme(legend.position = "none") -``` - ---- - -# Section 6: Recovery Resources in Comparison Cases - -Describe typical federal assistance flows based on historical analogs. Since the affected county's disaster just occurred, we show comparison counties' post-disaster resource flows as projections. - -## SBA Disaster Loans - -```{r recovery-sba} -# SBA loans over event time (comparison counties only for post-period) - -sba_event_aligned %>% - arrange(GEOID, event_time) %>% - mutate( - .by = GEOID, - across(matches("loan"), ~ cumsum(.x))) %>% - mutate(county_type = if_else(str_detect(county_type, "affected"), "Impacted", "Comparison")) %>% - ggplot(aes(x = event_time, y = residential_loan / 1000000, group = GEOID, color = county_type)) + - geom_line() + - ylim(c(0, NA)) + - geom_vline(xintercept = 0, linetype = "dashed", color = "grey40") + - scale_y_continuous(labels = scales::dollar_format(suffix = "M")) + - scale_color_manual(values = c("Impacted" = "#1696d2", "Comparison" = "#d2d2d2")) + - labs( - x = "Years relative to disaster (t=0)", - y = "SBA loans (cumulative)", - title = "Cumulative SBA Loans - Residential") + - theme(legend.position = "none") - -sba_event_aligned %>% - arrange(GEOID, event_time) %>% - mutate( - .by = GEOID, - across(matches("loan"), ~ cumsum(.x))) %>% - ggplot(aes(x = event_time, y = business_loan / 1000000, group = GEOID, color = county_type)) + - geom_line() + - geom_vline(xintercept = 0, linetype = "dashed", color = "grey40") + - scale_y_continuous(labels = scales::dollar_format(suffix = "M")) + - scale_color_manual(values = c("affected" = "#1696d2", "comparison" = "#d2d2d2")) + - labs( - x = "Years relative to disaster (t=0)", - y = "SBA loans (cumulative)", - title = "Cumulative SBA Loans - Business") + - theme(legend.position = "none") -``` - -## FEMA Public Assistance - -```{r recovery-pa} -# PA funding over event time (comparison counties only for post-period) -pa_event_aligned %>% - mutate(county_type = if_else(str_detect(county_type, "affected"), "Impacted", "Comparison")) %>% - ggplot(aes(x = event_time, y = pa_federal_funding_obligated_split / 1e6, group = GEOID, color = county_type)) + - geom_line() + - geom_vline(xintercept = 0, linetype = "dashed", color = "grey40") + - scale_y_continuous(labels = scales::dollar_format(suffix = "M")) + - scale_color_manual(values = c("Impacted" = "#1696d2", "Comparison" = "#d2d2d2")) + - labs( - x = "Years Relative to Disaster (t=0)", - y = "FEMA PA Federal Share Obligated (Millions)", - title = "FEMA Public Assistance in Comparison Counties") -``` - -## Individual and Households Program - -```{r recovery-ihp} -# # IHP funding over event time (comparison counties only for post-period) -# ihp_event_aligned %>% -# ggplot(aes(x = event_time, y = ihp_amount_total / 1e6, group = GEOID)) + -# geom_line(alpha = 0.4, color = "#d2d2d2") + -# stat_summary(aes(group = 1), fun = median, geom = "line", color = "#1696d2", linewidth = 1.2) + -# geom_vline(xintercept = 0, linetype = "dashed", color = "grey40") + -# scale_y_continuous(labels = scales::dollar_format(suffix = "M")) + -# labs( -# x = "Years Relative to Disaster (t=0)", -# y = "IHP Total Amount (Millions)", -# title = "FEMA Individual & Households Program in Comparison Counties", -# subtitle = "Blue line = median across comparison counties" -# ) -``` - ---- - -# Section 7: Key Takeaways - -```{r takeaways} -# Programmatically generate summary statistics for the factsheet: -# - Pre-disaster vulnerability indicators for affected county -# - Median/range of recovery trajectories from comparison counties -# - Typical federal resource flows -``` - -## Summary Table - -```{r summary-table} -# Create summary table of key indicators -``` - ---- - -## Technical Notes - -### Comparison County Selection Criteria -- Similar population size and income (within 25% of affected county in either direction) - -### Data Availability Notes -- LODES data typically lag 2 years -- County Business Patterns lag 2 years -- Government finances lag 2 -- FEMA assistance data are near real-time - +--- +title: "Economic Recovery Post Disaster" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Economic Recovery Post Disaster} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + + + + +## Overview + +Because post-event data for the affected area will not be available for months or years, this approach identifies historical comparison—counties that experienced similar disasters and had similar pre-disaster characteristics—in the past—to highlight various recovery trajectories. + +Contents: + +1. The affected county's pre-disaster economic baseline +2. Employment, business, and local government finance trajectories using an event-study framework +4. Typical PA/IHP/SBA flows from federal programs (post-disaster period only) + +## Notes + +Critical to this approach is an observable pre-period, especially for the disaster-impacted county (which has no post-period data). But because of data lags, the pre-period will never run up through 2026 (current year at time of writing)--at best, it will go through ~2024 (ACS) or in most cases only 2023 (County Business Patterns, government finances). + +Accordingly, we want our pre-period to be long--say, 8 years preceding the disaster, if possible--so that we have multiple years of observation, even for the disaster-affected county. But for our comparison counties, we are hemmed in on the other side--we want to have post-period observations for these, so the comparison disaster ideally occurs between 2018-2020, giving us 3-5 post-period years. However, this requires a pre-period dating back to as early as 2010... which is not covered in many datasets (e.g., ACS). Thus, we have a tension in identifying overlapping pre-period timelines for our comparison and affected counties due to historical data coverage. + +## Setup + + +``` r +library(climateapi) +library(tidyverse) +library(urbnthemes) +library(crosswalk) + +set_urbn_defaults(style = "print") +``` + +## Parameters + +Define the affected county and disaster characteristics. These would be updated for each new event. + +``` r +# Affected county +affected_county_fips <- "12087" +affected_county_name <- "Monroe County, FL" +affected_state <- "FL" + +# Disaster characteristics +disaster_type <- "Hurricane" +disaster_year <- 2026 + +# Event-study window +years_before <- 6 +years_after <- 4 +``` + +### Preceding Disasters + +``` r +disasters = get_fema_disaster_declarations(api = FALSE) +disasters_affected = disasters %>% + filter(GEOID %in% affected_county_fips, incidents_natural_hazard > 0) +``` + +--- + +# Section 1: The Affected County Baseline Profile + +Establish the pre-disaster economic conditions of the affected county. + +## Sociodemographic Characteristics + + +``` r +acs_df_2023 = arrow::read_parquet(file.path(get_box_path(), "sociodemographics", "acs", "acs_county_2023.parquet")) + +acs_matching_variables = c( + "median_household_income_universe_allraces", + "race_personofcolor_percent", + "population_density_land_sq_kilometer", + "total_population_universe", + "educational_attainment_degree_bachelors_percent") +``` + + +## Hazard Damages over Time + + +``` r +sheldus_df = get_sheldus() %>% + summarize(.by = c(GEOID, year), damage_property_millions = sum(damage_property, na.rm = TRUE) / 1000000) + +sheldus_df %>% + filter(GEOID %in% affected_county_fips) %>% + ggplot(aes(x = year, y = damage_property_millions)) + + geom_line() + + geom_point() + + scale_y_continuous(labels = scales::dollar_format(suffix = "M")) + + labs(x = "Year", y = "Property damage (millions)") +``` + +![plot of chunk unnamed-chunk-4](figure/economic_recovery_factsheet-unnamed-chunk-4-1.png) + + +## Industry Composition over Time + +Identify sectors that may be particularly vulnerable to this disaster type. + + +``` r +cbp_df = cache_it( + cbp_df, + file_name = "cbp_2016_2023", + path = file.path(get_box_path(), "employment"), read = TRUE) + +cbp_affected = cbp_df %>% + mutate( + county_fips = str_c(state, county), + industry_label = industry %>% str_replace_all("_", " ") %>% str_to_sentence()) %>% + filter(county_fips %in% affected_county_fips) + +top_8_industries = cbp_affected %>% + filter(year == 2023, industry != "total") %>% + arrange(desc(employees)) %>% + slice(1:8) %>% + pull(industry) + +cbp_prior_disaster_years = disasters_affected %>% + filter(year_declared %in% (cbp_df$year %>% unique())) %>% + distinct(year_declared) + +cbp_affected %>% + filter(employees > 0, industry %in% top_8_industries) %>% + arrange(desc(employees)) %>% + ggplot(aes(x = year, y = employees, color = industry_label, group = industry_label)) + + geom_line() + + geom_vline(data = cbp_prior_disaster_years, color = "grey", linetype = "dashed", aes(xintercept = year_declared)) + + scale_y_continuous(labels = scales::comma) + + scale_x_continuous(n.breaks = length((cbp_df$year %>% unique()))) + + labs(y = "Employees (n)", x = "") +``` + +![plot of chunk baseline-industry](figure/economic_recovery_factsheet-baseline-industry-1.png) + +## Small Businesses over Time + +Identify smaller businesses, which may be more vulnerable to disaster-related economic shocks. + + +``` r +total_employers = cbp_affected %>% + filter(industry == "total", employee_size_range_label == "All establishments") %>% + slice_max(year) %>% + pull(employers) + +cbp_affected %>% + tibble::as_tibble() %>% + filter(employee_size_range_label != "All establishments") %>% + mutate( + employee_size_range_label = case_when( + employee_size_range_label %in% c("<100-249 employees", "250-499 employees") ~ "100-499 employees", + employee_size_range_label %in% c("500-999 employees", "1000+") ~ "500+ employees", + TRUE ~ employee_size_range_label), + employee_size_range = factor( + employee_size_range_label, + levels = c( + "1-4 employees", "<5 employees", "5-9 employees", "10-19 employees", "20-49 employees", + "50-99 employees", "100-499 employees", "500+ employees"), + ordered = TRUE), + industry_label = factor( + industry_label, + levels = count(., industry_label, sort = TRUE) %>% pull(industry_label), + ordered = TRUE)) %>% + mutate(.by = industry, employer_total = sum(employers)) %>% + filter(employer_total > (.05 * total_employers)) %>% + ggplot(aes(x = year)) + + geom_area(aes(y = employers, fill = employee_size_range)) + + facet_wrap(~ reorder(industry_label, -employer_total), ncol = 4) + + labs(x = "Year", y = "Employers", title = "Employers by Number of Employees, by Industry") + + scale_y_continuous(labels = scales::comma) +``` + +![plot of chunk unnamed-chunk-5](figure/economic_recovery_factsheet-unnamed-chunk-5-1.png) + +## County Fiscal Capacity Over Time + + +``` r +county_expenses = get_government_finances() +finances_years = county_expenses %>% pull(year) %>% unique() + +finances_prior_disaster_years = disasters_affected %>% + filter(year_declared %in% finances_years) %>% + distinct(year_declared) + +county_expenses %>% + filter(GEOID %in% affected_county_fips) %>% + mutate(across(matches("revenue|expenditure"), ~ .x / 1000)) %>% + pivot_longer(-c(year, GEOID, county_name)) %>% + filter(name %in% c("revenue_total", "expenditure_total")) %>% + mutate(name = if_else(str_detect(name, "expenditure"), "Expenditures", "Revenues")) %>% + inflation_adjust(year_variable = "year", dollar_variables = "value", base_year = 2024, names_suffix = "_adjusted") %>% + ggplot(aes(x = year, y = value, color = name, group = name)) + + geom_line() + + geom_point() + + geom_vline(data = finances_prior_disaster_years, color = "grey", linetype = "dashed", aes(xintercept = year_declared)) + + scale_y_continuous(labels = scales::dollar_format(suffix = "M")) + + labs(x = "Year", y = "USD (Millions)") +``` + +![plot of chunk baseline-fiscal](figure/economic_recovery_factsheet-baseline-fiscal-1.png) + +--- + +# Section 2: Selecting Comparison Counties + +Identify historical analogs: counties that experienced similar disasters 5+ years ago. + +## Assemble Matching Dataset + +Build a county-level dataset with all variables needed for matching. + + +``` r +# Disaster history: count and binary indicators by hazard type in past 5 years +disaster_lookback_years <- 5 +reference_year <- disaster_year - 1 # Use year before disaster for matching + +disaster_history <- disasters %>% + filter( + year_declared >= (reference_year - disaster_lookback_years), + year_declared <= reference_year) %>% + tidytable::summarise( + .by = GEOID, + n_disasters_prior_5yr = sum(incidents_natural_hazard, na.rm = TRUE), + fire_prior_5yr = as.integer(sum(incidents_fire, na.rm = TRUE) > 0), + flood_prior_5yr = as.integer(sum(incidents_flood, na.rm = TRUE) > 0), + hurricane_prior_5yr = as.integer(sum(incidents_hurricane, na.rm = TRUE) > 0), + severe_storm_prior_5yr = as.integer(sum(incidents_severe_storm, na.rm = TRUE) > 0), + tornado_prior_5yr = as.integer(sum(incidents_tornado, na.rm = TRUE) > 0), + winter_storm_prior_5yr = as.integer(sum(incidents_winter_storm, na.rm = TRUE) > 0), + drought_prior_5yr = as.integer(sum(incidents_drought, na.rm = TRUE) > 0)) %>% + as_tibble() +``` + + +``` r +sheldus_reference_year = if_else(reference_year < 2023, reference_year, 2023) + +sheldus_matching = sheldus_df %>% + filter(year == sheldus_reference_year) +``` + + +``` r +# Industry employment shares from County Business Patterns +# Use 2-digit NAICS codes for broad sector shares +cbp_reference_year = if_else(reference_year < 2023, reference_year, 2023) + +cbp_matching <- cbp_df %>% + ## filter out the employers by employee size records + filter(year == cbp_reference_year, employees > 0) %>% + mutate(GEOID = str_c(state, county)) %>% + select(GEOID, industry, employees) %>% + as_tibble() + +# Calculate total employment per county +cbp_totals <- cbp_matching %>% + filter(industry == "total") %>% + select(GEOID, total_employees = employees) + +# Calculate employment share by industry +industry_shares <- cbp_matching %>% + filter(industry != "total") %>% + tidylog::left_join(cbp_totals, by = "GEOID") %>% + arrange(GEOID) %>% + mutate( + share_employees_ = employees / total_employees, + industry_var = str_c("share_employees_", industry)) %>% + select(GEOID, industry_var, share_employees_) %>% + pivot_wider(names_from = industry_var, values_from = share_employees_, values_fill = 0) + +# Add total employment +## NOTE: the total employee count is always greater than or equal to +## the sum of individual industries' employment countes because CBP omits +## some industry categories +industry_matching <- industry_shares %>% + left_join(cbp_totals, by = "GEOID") +``` + + +``` r +# NFIP residential coverage rate by county +nfip_matching <- get_nfip_residential_penetration() %>% + mutate(share_residential_structures_sfha = residential_structures_sfha / residential_structures) %>% + select(GEOID, share_residential_structures_sfha, penetration_rate_sfha) +``` + + +``` r +finance_reference_year = if_else(reference_year < 2023, reference_year, 2022) + +# Total county government expenses +county_finances_matching = county_expenses %>% + filter(year == finance_reference_year) +``` + + +``` r +# Sociodemographic characteristics from ACS +acs_matching <- acs_df_2023 %>% + select(GEOID, all_of(acs_matching_variables)) +``` + + +``` r +# Combine all matching variables into single dataset +matching_data <- acs_matching %>% + left_join(disaster_history, by = "GEOID") %>% + left_join(sheldus_matching, by = "GEOID") %>% + left_join(industry_matching, by = "GEOID") %>% + left_join(nfip_matching, by = "GEOID") %>% + left_join(county_finances_matching, by = "GEOID") %>% + # Replace NAs with 0 for disaster counts (counties with no disasters) + mutate(across(ends_with("_prior_5yr"), ~ replace_na(.x, 0))) %>% + # Drop counties with missing core variables + filter(!is.na(total_population_universe), !is.na(total_employees)) +``` + +## Select Comparison Counties + + +``` r +# Hard filter: same disaster type, disaster occurred 5+ years ago +comparison_pool <- disasters %>% + filter( + year_declared <= 2021, # Enough post-period + year_declared >= 2018) %>% + slice(.by = GEOID, 1) %>% + distinct(GEOID, year_declared) %>% + rename(comparison_disaster_year = year_declared) + +# Get affected county's characteristics +affected_county_chars <- matching_data %>% + filter(GEOID == affected_county_fips) + +# Join matching data +comparison_candidates <- comparison_pool %>% + inner_join(matching_data, by = "GEOID") %>% + filter( + total_population_universe > affected_county_chars$total_population_universe * .75, + total_population_universe < affected_county_chars$total_population_universe * 1.25, + median_household_income_universe_allraces > affected_county_chars$median_household_income_universe_allraces * .75, + median_household_income_universe_allraces < affected_county_chars$median_household_income_universe_allraces * 1.25, + GEOID != affected_county_fips) # Exclude affected county +``` + + +``` r +# Calculate Mahalanobis distance to affected county + +# Select numeric variables for distance calculation +matching_vars <- c( + acs_matching_variables, + "n_disasters_prior_5yr", + "total_employees", + "penetration_rate_sfha", + "revenue_total", + "expenditure_total", + "damage_property_millions") + +# Prepare matrices +matrix_candidates <- comparison_candidates %>% + select(all_of(matching_vars)) %>% + as.matrix() + +matrix_affected <- affected_county_chars %>% + select(all_of(matching_vars)) %>% + as.matrix() + +# Calculate covariance matrix and Mahalanobis distance +covariance_matrix <- cov(matrix_candidates, use = "pairwise.complete.obs") +distances <- mahalanobis(matrix_candidates, center = matrix_affected, cov = covariance_matrix, tol=1e-30) + +# Select top k nearest neighbors +k_neighbors <- 10 + +comparison_counties <- comparison_candidates %>% + mutate(mahalanobis_distance = distances) %>% + slice_min(mahalanobis_distance, n = k_neighbors) %>% + select(GEOID, comparison_disaster_year, mahalanobis_distance) %>% + left_join(tidycensus::fips_codes %>% transmute(county, GEOID = str_c(state_code, county_code))) %>% + slice(1:5) +``` + +--- + +# Section 3: Event-Study Data Preparation + +Align all outcome datasets to event time (t=0 is disaster year). + + +``` r +# Create reference table: affected county + comparison counties with disaster years +county_reference <- bind_rows( + # Affected county + tibble( + GEOID = affected_county_fips, + disaster_year_event = disaster_year, + county_type = "affected"), + # Comparison counties + comparison_counties %>% + transmute( + GEOID, + disaster_year_event = comparison_disaster_year, + county_type = "comparison")) + +# Define event window +event_window <- c(-5, 4) +``` + + +``` r +# Align to event time +cbp_event_aligned <- cbp_df %>% + mutate(GEOID = str_c(state, county)) %>% + rename(calendar_year = year) %>% + inner_join(county_reference, by = "GEOID") %>% + mutate(event_time = calendar_year - disaster_year_event) %>% + filter(event_time >= event_window[1], event_time <= event_window[2]) %>% + select(GEOID, county_type, disaster_year_event, calendar_year, event_time, + industry, employees, employers, annual_payroll) +``` + + +``` r +# Align to event time +fiscal_event_aligned <- county_expenses %>% + rename(calendar_year = year) %>% + inner_join(county_reference, by = "GEOID") %>% + mutate(event_time = calendar_year - disaster_year_event) %>% + filter(event_time >= event_window[1], event_time <= event_window[2]) %>% + select(GEOID, county_type, disaster_year_event, calendar_year, event_time, + expenditure_total, revenue_total) +``` + + +``` r +# SBA disaster loans +sba_raw <- get_sba_loans() + +sba_crosswalk = get_crosswalk( + source_geography = "zcta", + target_geography = "county") + +sba_simple = sba_raw %>% + select( + source_geoid = damaged_property_zip_code, + sba_loan_amount = approved_amount_total, + loan_type, + fiscal_year) + +warning("The crosswalking is imperfect and fiscal and calendar years are not aligned.") +sba_county = sba_simple %>% + tidylog::left_join(sba_crosswalk$crosswalks$step_1, by = "source_geoid") %>% + mutate(target_geoid = str_c(state_fips, target_geoid)) %>% + tidytable::summarize( + .by = c(target_geoid, loan_type, fiscal_year), + sba_loan_amount = sum(sba_loan_amount * allocation_factor_source_to_target)) %>% + filter(!is.na(target_geoid)) %>% + mutate( + GEOID = target_geoid, + calendar_year = fiscal_year %>% as.numeric) %>% + pivot_wider(names_from = loan_type, values_from = sba_loan_amount) %>% + rename(business_loan = business, residential_loan = residential) %>% + mutate(across(matches("loan"), ~ if_else(is.na(.x), 0, .x))) + +# Align to event time +sba_event_aligned <- sba_county %>% + inner_join(county_reference, by = "GEOID") %>% + mutate(event_time = calendar_year - disaster_year_event) %>% + filter(event_time >= event_window[1], event_time <= event_window[2]) %>% + select(GEOID, county_type, disaster_year_event, calendar_year, event_time, + matches("loan")) +``` + + +``` r +# FEMA Public Assistance +pa_raw <- get_public_assistance() +#> Processed 360244 groups out of 656351. 55% done. Time elapsed: 3s. ETA: 2s. Processed 550614 groups out of 656351. 84% done. Time elapsed: 4s. ETA: 0s. Processed 656351 groups out of 656351. 100% done. Time elapsed: 4s. ETA: 0s. + +# Aggregate to county-year level +pa_county_year <- pa_raw %>% + mutate( + GEOID = county_fips, + calendar_year = declaration_year) %>% + summarise( + .by = c(GEOID, calendar_year), + across(.cols = matches("split"), ~ sum(.x, na.rm = TRUE))) + +# Align to event time +pa_event_aligned <- pa_county_year %>% + inner_join(county_reference, by = "GEOID") %>% + mutate(event_time = calendar_year - disaster_year_event) %>% + filter(event_time >= event_window[1], event_time <= event_window[2]) %>% + select(GEOID, county_type, disaster_year_event, calendar_year, event_time, + pa_federal_funding_obligated_split) +``` + + +``` r +# FEMA Individual and Households Program +# ihp_raw <- get_ihp_registrations() + +# # Aggregate to county-year level +# ihp_county_year <- ihp_raw %>% +# mutate(calendar_year = lubridate::year(declaration_date)) %>% +# summarise( +# .by = c(GEOID, calendar_year), +# ihp_registrations_n = n(), +# ihp_approved_n = sum(ihp_eligible, na.rm = TRUE), +# ihp_amount_total = sum(ihp_amount, na.rm = TRUE), +# ihp_ha_amount = sum(ha_amount, na.rm = TRUE), +# ihp_ona_amount = sum(ona_amount, na.rm = TRUE)) + +# # Align to event time +# ihp_event_aligned <- ihp_county_year %>% +# inner_join(county_reference, by = "GEOID") %>% +# mutate(event_time = calendar_year - disaster_year_event) %>% +# filter(event_time >= event_window[1], event_time <= event_window[2]) %>% +# select(GEOID, county_type, disaster_year_event, calendar_year, event_time, +# ihp_registrations_n, ihp_approved_n, ihp_amount_total, ihp_ha_amount, ihp_ona_amount) +``` + +--- + +# Section 4: Employment Trajectories (Event-Study Style) + +Plot employment trends with time relative to disaster (t=0). + +- **Affected county**: t-6 through t=0 (present) +- **Comparison counties**: full t-6 through t+6 window, aligned to their disaster year + + +``` r +# Total employment over event time +cbp_event_aligned %>% + filter(industry == "total", employees > 0) %>% + tibble::as_tibble() %>% + ggplot(aes(x = event_time, y = employees, group = GEOID, color = county_type, linetype = county_type)) + + geom_line() + + geom_point() + + ylim(c(0, NA)) + + geom_vline(xintercept = 0, linetype = "dashed", color = "grey40") + + scale_y_continuous(labels = scales::comma) + + scale_color_manual(values = c("affected" = "#1696d2", "comparison" = "#d2d2d2")) + + labs( + x = "Years Relative to Disaster (t=0)", + y = "Total Employees", + title = "Employment Trajectory: Affected vs. Comparison Counties") + + theme(legend.position = "none") +``` + +![plot of chunk employment-total](figure/economic_recovery_factsheet-employment-total-1.png) + +--- + +# Section 5: Local Government Fiscal Trajectories (Event-Study Style) + +Examine how local government finances evolved in comparison counties post-disaster. + + +``` r +# Total county expenses over event time +fiscal_event_aligned %>% + mutate(county_type = if_else(str_detect(county_type, "affected"), "Impacted", "Comparison")) %>% + ggplot(aes(x = event_time, y = expenditure_total / 1000, group = GEOID, color = county_type)) + + geom_line() + + geom_point() + + geom_vline(xintercept = 0, linetype = "dashed", color = "grey40") + + scale_y_continuous(labels = scales::dollar_format(suffix = "M")) + + ylim(c(0, NA)) + + scale_color_manual(values = c("Impacted" = "#1696d2", "Comparison" = "#d2d2d2")) + + labs( + x = "Years Relative to Disaster (t=0)", + y = "Total County Expenses (Millions)", + title = "County Government Expenses: Affected vs. Comparison Counties") + + theme(legend.position = "none") +``` + +![plot of chunk fiscal-expenses](figure/economic_recovery_factsheet-fiscal-expenses-1.png) + + +``` r +# Total county expenses over event time +fiscal_event_aligned %>% + mutate(county_type = if_else(str_detect(county_type, "affected"), "Impacted", "Comparison")) %>% + ggplot(aes(x = event_time, y = revenue_total / 1000, group = GEOID, color = county_type)) + + geom_line() + + geom_point() + + geom_vline(xintercept = 0, linetype = "dashed", color = "grey40") + + scale_y_continuous(labels = scales::dollar_format(suffix = "M")) + + scale_color_manual(values = c("Impacted" = "#1696d2", "Comparison" = "#d2d2d2")) + + labs( + x = "Years Relative to Disaster (t=0)", + y = "Total County Revenues (Millions)", + title = "County Government Revenues: Affected vs. Comparison Counties") + + theme(legend.position = "none") +``` + +![plot of chunk fiscal-revenue](figure/economic_recovery_factsheet-fiscal-revenue-1.png) + +--- + +# Section 6: Recovery Resources in Comparison Cases + +Describe typical federal assistance flows based on historical analogs. Since the affected county's disaster just occurred, we show comparison counties' post-disaster resource flows as projections. + +## SBA Disaster Loans + + +``` r +# SBA loans over event time (comparison counties only for post-period) + +sba_event_aligned %>% + arrange(GEOID, event_time) %>% + mutate( + .by = GEOID, + across(matches("loan"), ~ cumsum(.x))) %>% + mutate(county_type = if_else(str_detect(county_type, "affected"), "Impacted", "Comparison")) %>% + ggplot(aes(x = event_time, y = residential_loan / 1000000, group = GEOID, color = county_type)) + + geom_line() + + ylim(c(0, NA)) + + geom_vline(xintercept = 0, linetype = "dashed", color = "grey40") + + scale_y_continuous(labels = scales::dollar_format(suffix = "M")) + + scale_color_manual(values = c("Impacted" = "#1696d2", "Comparison" = "#d2d2d2")) + + labs( + x = "Years relative to disaster (t=0)", + y = "SBA loans (cumulative)", + title = "Cumulative SBA Loans - Residential") + + theme(legend.position = "none") +``` + +![plot of chunk recovery-sba](figure/economic_recovery_factsheet-recovery-sba-1.png) + +``` r + +sba_event_aligned %>% + arrange(GEOID, event_time) %>% + mutate( + .by = GEOID, + across(matches("loan"), ~ cumsum(.x))) %>% + ggplot(aes(x = event_time, y = business_loan / 1000000, group = GEOID, color = county_type)) + + geom_line() + + geom_vline(xintercept = 0, linetype = "dashed", color = "grey40") + + scale_y_continuous(labels = scales::dollar_format(suffix = "M")) + + scale_color_manual(values = c("affected" = "#1696d2", "comparison" = "#d2d2d2")) + + labs( + x = "Years relative to disaster (t=0)", + y = "SBA loans (cumulative)", + title = "Cumulative SBA Loans - Business") + + theme(legend.position = "none") +``` + +![plot of chunk recovery-sba](figure/economic_recovery_factsheet-recovery-sba-2.png) + +## FEMA Public Assistance + + +``` r +# PA funding over event time (comparison counties only for post-period) +pa_event_aligned %>% + mutate(county_type = if_else(str_detect(county_type, "affected"), "Impacted", "Comparison")) %>% + ggplot(aes(x = event_time, y = pa_federal_funding_obligated_split / 1e6, group = GEOID, color = county_type)) + + geom_line() + + geom_vline(xintercept = 0, linetype = "dashed", color = "grey40") + + scale_y_continuous(labels = scales::dollar_format(suffix = "M")) + + scale_color_manual(values = c("Impacted" = "#1696d2", "Comparison" = "#d2d2d2")) + + labs( + x = "Years Relative to Disaster (t=0)", + y = "FEMA PA Federal Share Obligated (Millions)", + title = "FEMA Public Assistance in Comparison Counties") +``` + +![plot of chunk recovery-pa](figure/economic_recovery_factsheet-recovery-pa-1.png) + +## Individual and Households Program + + +``` r +# # IHP funding over event time (comparison counties only for post-period) +# ihp_event_aligned %>% +# ggplot(aes(x = event_time, y = ihp_amount_total / 1e6, group = GEOID)) + +# geom_line(alpha = 0.4, color = "#d2d2d2") + +# stat_summary(aes(group = 1), fun = median, geom = "line", color = "#1696d2", linewidth = 1.2) + +# geom_vline(xintercept = 0, linetype = "dashed", color = "grey40") + +# scale_y_continuous(labels = scales::dollar_format(suffix = "M")) + +# labs( +# x = "Years Relative to Disaster (t=0)", +# y = "IHP Total Amount (Millions)", +# title = "FEMA Individual & Households Program in Comparison Counties", +# subtitle = "Blue line = median across comparison counties" +# ) +``` + +--- + +# Section 7: Key Takeaways + + +``` r +# Programmatically generate summary statistics for the factsheet: +# - Pre-disaster vulnerability indicators for affected county +# - Median/range of recovery trajectories from comparison counties +# - Typical federal resource flows +``` + +## Summary Table + + +``` r +# Create summary table of key indicators +``` + +--- + +## Technical Notes + +### Comparison County Selection Criteria +- Similar population size and income (within 25% of affected county in either direction) + +### Data Availability Notes +- LODES data typically lag 2 years +- County Business Patterns lag 2 years +- Government finances lag 2 +- FEMA assistance data are near real-time + diff --git a/vignettes/economic_recovery_factsheet.Rmd.orig b/vignettes/economic_recovery_factsheet.Rmd.orig new file mode 100644 index 0000000..390172f --- /dev/null +++ b/vignettes/economic_recovery_factsheet.Rmd.orig @@ -0,0 +1,699 @@ +--- +title: "Economic Recovery Post Disaster" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Economic Recovery Post Disaster} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + warning = FALSE, + message = FALSE, + fig.width = 8, + fig.height = 6, + dpi = 1000, + eval = TRUE, + echo = TRUE) + +options(scipen = 999) +``` + + +## Overview + +Because post-event data for the affected area will not be available for months or years, this approach identifies historical comparison—counties that experienced similar disasters and had similar pre-disaster characteristics—in the past—to highlight various recovery trajectories. + +Contents: + +1. The affected county's pre-disaster economic baseline +2. Employment, business, and local government finance trajectories using an event-study framework +4. Typical PA/IHP/SBA flows from federal programs (post-disaster period only) + +## Notes + +Critical to this approach is an observable pre-period, especially for the disaster-impacted county (which has no post-period data). But because of data lags, the pre-period will never run up through 2026 (current year at time of writing)--at best, it will go through ~2024 (ACS) or in most cases only 2023 (County Business Patterns, government finances). + +Accordingly, we want our pre-period to be long--say, 8 years preceding the disaster, if possible--so that we have multiple years of observation, even for the disaster-affected county. But for our comparison counties, we are hemmed in on the other side--we want to have post-period observations for these, so the comparison disaster ideally occurs between 2018-2020, giving us 3-5 post-period years. However, this requires a pre-period dating back to as early as 2010... which is not covered in many datasets (e.g., ACS). Thus, we have a tension in identifying overlapping pre-period timelines for our comparison and affected counties due to historical data coverage. + +## Setup + +```{r setup} +library(climateapi) +library(tidyverse) +library(urbnthemes) +library(crosswalk) + +set_urbn_defaults(style = "print") +``` + +## Parameters + +Define the affected county and disaster characteristics. These would be updated for each new event. +```{r parameters} +# Affected county +affected_county_fips <- "12087" +affected_county_name <- "Monroe County, FL" +affected_state <- "FL" + +# Disaster characteristics +disaster_type <- "Hurricane" +disaster_year <- 2026 + +# Event-study window +years_before <- 6 +years_after <- 4 +``` + +### Preceding Disasters +```{r} +disasters = get_fema_disaster_declarations(api = FALSE) +disasters_affected = disasters %>% + filter(GEOID %in% affected_county_fips, incidents_natural_hazard > 0) +``` + +--- + +# Section 1: The Affected County Baseline Profile + +Establish the pre-disaster economic conditions of the affected county. + +## Sociodemographic Characteristics + +```{r} +acs_df_2023 = arrow::read_parquet(file.path(get_box_path(), "sociodemographics", "acs", "acs_county_2023.parquet")) + +acs_matching_variables = c( + "median_household_income_universe_allraces", + "race_personofcolor_percent", + "population_density_land_sq_kilometer", + "total_population_universe", + "educational_attainment_degree_bachelors_percent") +``` + + +## Hazard Damages over Time + +```{r} +sheldus_df = get_sheldus() %>% + summarize(.by = c(GEOID, year), damage_property_millions = sum(damage_property, na.rm = TRUE) / 1000000) + +sheldus_df %>% + filter(GEOID %in% affected_county_fips) %>% + ggplot(aes(x = year, y = damage_property_millions)) + + geom_line() + + geom_point() + + scale_y_continuous(labels = scales::dollar_format(suffix = "M")) + + labs(x = "Year", y = "Property damage (millions)") +``` + + +## Industry Composition over Time + +Identify sectors that may be particularly vulnerable to this disaster type. + +```{r baseline-industry} +cbp_df = cache_it( + cbp_df, + file_name = "cbp_2016_2023", + path = file.path(get_box_path(), "employment"), read = TRUE) + +cbp_affected = cbp_df %>% + mutate( + county_fips = str_c(state, county), + industry_label = industry %>% str_replace_all("_", " ") %>% str_to_sentence()) %>% + filter(county_fips %in% affected_county_fips) + +top_8_industries = cbp_affected %>% + filter(year == 2023, industry != "total") %>% + arrange(desc(employees)) %>% + slice(1:8) %>% + pull(industry) + +cbp_prior_disaster_years = disasters_affected %>% + filter(year_declared %in% (cbp_df$year %>% unique())) %>% + distinct(year_declared) + +cbp_affected %>% + filter(employees > 0, industry %in% top_8_industries) %>% + arrange(desc(employees)) %>% + ggplot(aes(x = year, y = employees, color = industry_label, group = industry_label)) + + geom_line() + + geom_vline(data = cbp_prior_disaster_years, color = "grey", linetype = "dashed", aes(xintercept = year_declared)) + + scale_y_continuous(labels = scales::comma) + + scale_x_continuous(n.breaks = length((cbp_df$year %>% unique()))) + + labs(y = "Employees (n)", x = "") +``` + +## Small Businesses over Time + +Identify smaller businesses, which may be more vulnerable to disaster-related economic shocks. + +```{r} +total_employers = cbp_affected %>% + filter(industry == "total", employee_size_range_label == "All establishments") %>% + slice_max(year) %>% + pull(employers) + +cbp_affected %>% + tibble::as_tibble() %>% + filter(employee_size_range_label != "All establishments") %>% + mutate( + employee_size_range_label = case_when( + employee_size_range_label %in% c("<100-249 employees", "250-499 employees") ~ "100-499 employees", + employee_size_range_label %in% c("500-999 employees", "1000+") ~ "500+ employees", + TRUE ~ employee_size_range_label), + employee_size_range = factor( + employee_size_range_label, + levels = c( + "1-4 employees", "<5 employees", "5-9 employees", "10-19 employees", "20-49 employees", + "50-99 employees", "100-499 employees", "500+ employees"), + ordered = TRUE), + industry_label = factor( + industry_label, + levels = count(., industry_label, sort = TRUE) %>% pull(industry_label), + ordered = TRUE)) %>% + mutate(.by = industry, employer_total = sum(employers)) %>% + filter(employer_total > (.05 * total_employers)) %>% + ggplot(aes(x = year)) + + geom_area(aes(y = employers, fill = employee_size_range)) + + facet_wrap(~ reorder(industry_label, -employer_total), ncol = 4) + + labs(x = "Year", y = "Employers", title = "Employers by Number of Employees, by Industry") + + scale_y_continuous(labels = scales::comma) +``` + +## County Fiscal Capacity Over Time + +```{r baseline-fiscal} +county_expenses = get_government_finances() +finances_years = county_expenses %>% pull(year) %>% unique() + +finances_prior_disaster_years = disasters_affected %>% + filter(year_declared %in% finances_years) %>% + distinct(year_declared) + +county_expenses %>% + filter(GEOID %in% affected_county_fips) %>% + mutate(across(matches("revenue|expenditure"), ~ .x / 1000)) %>% + pivot_longer(-c(year, GEOID, county_name)) %>% + filter(name %in% c("revenue_total", "expenditure_total")) %>% + mutate(name = if_else(str_detect(name, "expenditure"), "Expenditures", "Revenues")) %>% + inflation_adjust(year_variable = "year", dollar_variables = "value", base_year = 2024, names_suffix = "_adjusted") %>% + ggplot(aes(x = year, y = value, color = name, group = name)) + + geom_line() + + geom_point() + + geom_vline(data = finances_prior_disaster_years, color = "grey", linetype = "dashed", aes(xintercept = year_declared)) + + scale_y_continuous(labels = scales::dollar_format(suffix = "M")) + + labs(x = "Year", y = "USD (Millions)") +``` + +--- + +# Section 2: Selecting Comparison Counties + +Identify historical analogs: counties that experienced similar disasters 5+ years ago. + +## Assemble Matching Dataset + +Build a county-level dataset with all variables needed for matching. + +```{r matching-disasters} +# Disaster history: count and binary indicators by hazard type in past 5 years +disaster_lookback_years <- 5 +reference_year <- disaster_year - 1 # Use year before disaster for matching + +disaster_history <- disasters %>% + filter( + year_declared >= (reference_year - disaster_lookback_years), + year_declared <= reference_year) %>% + tidytable::summarise( + .by = GEOID, + n_disasters_prior_5yr = sum(incidents_natural_hazard, na.rm = TRUE), + fire_prior_5yr = as.integer(sum(incidents_fire, na.rm = TRUE) > 0), + flood_prior_5yr = as.integer(sum(incidents_flood, na.rm = TRUE) > 0), + hurricane_prior_5yr = as.integer(sum(incidents_hurricane, na.rm = TRUE) > 0), + severe_storm_prior_5yr = as.integer(sum(incidents_severe_storm, na.rm = TRUE) > 0), + tornado_prior_5yr = as.integer(sum(incidents_tornado, na.rm = TRUE) > 0), + winter_storm_prior_5yr = as.integer(sum(incidents_winter_storm, na.rm = TRUE) > 0), + drought_prior_5yr = as.integer(sum(incidents_drought, na.rm = TRUE) > 0)) %>% + as_tibble() +``` + +```{r matching-sheldus} +sheldus_reference_year = if_else(reference_year < 2023, reference_year, 2023) + +sheldus_matching = sheldus_df %>% + filter(year == sheldus_reference_year) +``` + +```{r matching-industry} +# Industry employment shares from County Business Patterns +# Use 2-digit NAICS codes for broad sector shares +cbp_reference_year = if_else(reference_year < 2023, reference_year, 2023) + +cbp_matching <- cbp_df %>% + ## filter out the employers by employee size records + filter(year == cbp_reference_year, employees > 0) %>% + mutate(GEOID = str_c(state, county)) %>% + select(GEOID, industry, employees) %>% + as_tibble() + +# Calculate total employment per county +cbp_totals <- cbp_matching %>% + filter(industry == "total") %>% + select(GEOID, total_employees = employees) + +# Calculate employment share by industry +industry_shares <- cbp_matching %>% + filter(industry != "total") %>% + tidylog::left_join(cbp_totals, by = "GEOID") %>% + arrange(GEOID) %>% + mutate( + share_employees_ = employees / total_employees, + industry_var = str_c("share_employees_", industry)) %>% + select(GEOID, industry_var, share_employees_) %>% + pivot_wider(names_from = industry_var, values_from = share_employees_, values_fill = 0) + +# Add total employment +## NOTE: the total employee count is always greater than or equal to +## the sum of individual industries' employment countes because CBP omits +## some industry categories +industry_matching <- industry_shares %>% + left_join(cbp_totals, by = "GEOID") +``` + +```{r matching-nfip} +# NFIP residential coverage rate by county +nfip_matching <- get_nfip_residential_penetration() %>% + mutate(share_residential_structures_sfha = residential_structures_sfha / residential_structures) %>% + select(GEOID, share_residential_structures_sfha, penetration_rate_sfha) +``` + +```{r matching-fiscal} +finance_reference_year = if_else(reference_year < 2023, reference_year, 2022) + +# Total county government expenses +county_finances_matching = county_expenses %>% + filter(year == finance_reference_year) +``` + +```{r matching-acs} +# Sociodemographic characteristics from ACS +acs_matching <- acs_df_2023 %>% + select(GEOID, all_of(acs_matching_variables)) +``` + +```{r matching-combine} +# Combine all matching variables into single dataset +matching_data <- acs_matching %>% + left_join(disaster_history, by = "GEOID") %>% + left_join(sheldus_matching, by = "GEOID") %>% + left_join(industry_matching, by = "GEOID") %>% + left_join(nfip_matching, by = "GEOID") %>% + left_join(county_finances_matching, by = "GEOID") %>% + # Replace NAs with 0 for disaster counts (counties with no disasters) + mutate(across(ends_with("_prior_5yr"), ~ replace_na(.x, 0))) %>% + # Drop counties with missing core variables + filter(!is.na(total_population_universe), !is.na(total_employees)) +``` + +## Select Comparison Counties + +```{r comparison-selection} +# Hard filter: same disaster type, disaster occurred 5+ years ago +comparison_pool <- disasters %>% + filter( + year_declared <= 2021, # Enough post-period + year_declared >= 2018) %>% + slice(.by = GEOID, 1) %>% + distinct(GEOID, year_declared) %>% + rename(comparison_disaster_year = year_declared) + +# Get affected county's characteristics +affected_county_chars <- matching_data %>% + filter(GEOID == affected_county_fips) + +# Join matching data +comparison_candidates <- comparison_pool %>% + inner_join(matching_data, by = "GEOID") %>% + filter( + total_population_universe > affected_county_chars$total_population_universe * .75, + total_population_universe < affected_county_chars$total_population_universe * 1.25, + median_household_income_universe_allraces > affected_county_chars$median_household_income_universe_allraces * .75, + median_household_income_universe_allraces < affected_county_chars$median_household_income_universe_allraces * 1.25, + GEOID != affected_county_fips) # Exclude affected county +``` + +```{r comparison-distance} +# Calculate Mahalanobis distance to affected county + +# Select numeric variables for distance calculation +matching_vars <- c( + acs_matching_variables, + "n_disasters_prior_5yr", + "total_employees", + "penetration_rate_sfha", + "revenue_total", + "expenditure_total", + "damage_property_millions") + +# Prepare matrices +matrix_candidates <- comparison_candidates %>% + select(all_of(matching_vars)) %>% + as.matrix() + +matrix_affected <- affected_county_chars %>% + select(all_of(matching_vars)) %>% + as.matrix() + +# Calculate covariance matrix and Mahalanobis distance +covariance_matrix <- cov(matrix_candidates, use = "pairwise.complete.obs") +distances <- mahalanobis(matrix_candidates, center = matrix_affected, cov = covariance_matrix, tol=1e-30) + +# Select top k nearest neighbors +k_neighbors <- 10 + +comparison_counties <- comparison_candidates %>% + mutate(mahalanobis_distance = distances) %>% + slice_min(mahalanobis_distance, n = k_neighbors) %>% + select(GEOID, comparison_disaster_year, mahalanobis_distance) %>% + left_join(tidycensus::fips_codes %>% transmute(county, GEOID = str_c(state_code, county_code))) %>% + slice(1:5) +``` + +--- + +# Section 3: Event-Study Data Preparation + +Align all outcome datasets to event time (t=0 is disaster year). + +```{r event-study-reference} +# Create reference table: affected county + comparison counties with disaster years +county_reference <- bind_rows( + # Affected county + tibble( + GEOID = affected_county_fips, + disaster_year_event = disaster_year, + county_type = "affected"), + # Comparison counties + comparison_counties %>% + transmute( + GEOID, + disaster_year_event = comparison_disaster_year, + county_type = "comparison")) + +# Define event window +event_window <- c(-5, 4) +``` + +```{r event-study-employment} +# Align to event time +cbp_event_aligned <- cbp_df %>% + mutate(GEOID = str_c(state, county)) %>% + rename(calendar_year = year) %>% + inner_join(county_reference, by = "GEOID") %>% + mutate(event_time = calendar_year - disaster_year_event) %>% + filter(event_time >= event_window[1], event_time <= event_window[2]) %>% + select(GEOID, county_type, disaster_year_event, calendar_year, event_time, + industry, employees, employers, annual_payroll) +``` + +```{r event-study-fiscal} +# Align to event time +fiscal_event_aligned <- county_expenses %>% + rename(calendar_year = year) %>% + inner_join(county_reference, by = "GEOID") %>% + mutate(event_time = calendar_year - disaster_year_event) %>% + filter(event_time >= event_window[1], event_time <= event_window[2]) %>% + select(GEOID, county_type, disaster_year_event, calendar_year, event_time, + expenditure_total, revenue_total) +``` + +```{r event-study-sba} +# SBA disaster loans +sba_raw <- get_sba_loans() + +sba_crosswalk = get_crosswalk( + source_geography = "zcta", + target_geography = "county") + +sba_simple = sba_raw %>% + select( + source_geoid = damaged_property_zip_code, + sba_loan_amount = approved_amount_total, + loan_type, + fiscal_year) + +warning("The crosswalking is imperfect and fiscal and calendar years are not aligned.") +sba_county = sba_simple %>% + tidylog::left_join(sba_crosswalk$crosswalks$step_1, by = "source_geoid") %>% + mutate(target_geoid = str_c(state_fips, target_geoid)) %>% + tidytable::summarize( + .by = c(target_geoid, loan_type, fiscal_year), + sba_loan_amount = sum(sba_loan_amount * allocation_factor_source_to_target)) %>% + filter(!is.na(target_geoid)) %>% + mutate( + GEOID = target_geoid, + calendar_year = fiscal_year %>% as.numeric) %>% + pivot_wider(names_from = loan_type, values_from = sba_loan_amount) %>% + rename(business_loan = business, residential_loan = residential) %>% + mutate(across(matches("loan"), ~ if_else(is.na(.x), 0, .x))) + +# Align to event time +sba_event_aligned <- sba_county %>% + inner_join(county_reference, by = "GEOID") %>% + mutate(event_time = calendar_year - disaster_year_event) %>% + filter(event_time >= event_window[1], event_time <= event_window[2]) %>% + select(GEOID, county_type, disaster_year_event, calendar_year, event_time, + matches("loan")) +``` + +```{r event-study-pa} +# FEMA Public Assistance +pa_raw <- get_public_assistance() + +# Aggregate to county-year level +pa_county_year <- pa_raw %>% + mutate( + GEOID = county_fips, + calendar_year = declaration_year) %>% + summarise( + .by = c(GEOID, calendar_year), + across(.cols = matches("split"), ~ sum(.x, na.rm = TRUE))) + +# Align to event time +pa_event_aligned <- pa_county_year %>% + inner_join(county_reference, by = "GEOID") %>% + mutate(event_time = calendar_year - disaster_year_event) %>% + filter(event_time >= event_window[1], event_time <= event_window[2]) %>% + select(GEOID, county_type, disaster_year_event, calendar_year, event_time, + pa_federal_funding_obligated_split) +``` + +```{r event-study-ihp} +# FEMA Individual and Households Program +# ihp_raw <- get_ihp_registrations() + +# # Aggregate to county-year level +# ihp_county_year <- ihp_raw %>% +# mutate(calendar_year = lubridate::year(declaration_date)) %>% +# summarise( +# .by = c(GEOID, calendar_year), +# ihp_registrations_n = n(), +# ihp_approved_n = sum(ihp_eligible, na.rm = TRUE), +# ihp_amount_total = sum(ihp_amount, na.rm = TRUE), +# ihp_ha_amount = sum(ha_amount, na.rm = TRUE), +# ihp_ona_amount = sum(ona_amount, na.rm = TRUE)) + +# # Align to event time +# ihp_event_aligned <- ihp_county_year %>% +# inner_join(county_reference, by = "GEOID") %>% +# mutate(event_time = calendar_year - disaster_year_event) %>% +# filter(event_time >= event_window[1], event_time <= event_window[2]) %>% +# select(GEOID, county_type, disaster_year_event, calendar_year, event_time, +# ihp_registrations_n, ihp_approved_n, ihp_amount_total, ihp_ha_amount, ihp_ona_amount) +``` + +--- + +# Section 4: Employment Trajectories (Event-Study Style) + +Plot employment trends with time relative to disaster (t=0). + +- **Affected county**: t-6 through t=0 (present) +- **Comparison counties**: full t-6 through t+6 window, aligned to their disaster year + +```{r employment-total} +# Total employment over event time +cbp_event_aligned %>% + filter(industry == "total", employees > 0) %>% + tibble::as_tibble() %>% + ggplot(aes(x = event_time, y = employees, group = GEOID, color = county_type, linetype = county_type)) + + geom_line() + + geom_point() + + ylim(c(0, NA)) + + geom_vline(xintercept = 0, linetype = "dashed", color = "grey40") + + scale_y_continuous(labels = scales::comma) + + scale_color_manual(values = c("affected" = "#1696d2", "comparison" = "#d2d2d2")) + + labs( + x = "Years Relative to Disaster (t=0)", + y = "Total Employees", + title = "Employment Trajectory: Affected vs. Comparison Counties") + + theme(legend.position = "none") +``` + +--- + +# Section 5: Local Government Fiscal Trajectories (Event-Study Style) + +Examine how local government finances evolved in comparison counties post-disaster. + +```{r fiscal-expenses} +# Total county expenses over event time +fiscal_event_aligned %>% + mutate(county_type = if_else(str_detect(county_type, "affected"), "Impacted", "Comparison")) %>% + ggplot(aes(x = event_time, y = expenditure_total / 1000, group = GEOID, color = county_type)) + + geom_line() + + geom_point() + + geom_vline(xintercept = 0, linetype = "dashed", color = "grey40") + + scale_y_continuous(labels = scales::dollar_format(suffix = "M")) + + ylim(c(0, NA)) + + scale_color_manual(values = c("Impacted" = "#1696d2", "Comparison" = "#d2d2d2")) + + labs( + x = "Years Relative to Disaster (t=0)", + y = "Total County Expenses (Millions)", + title = "County Government Expenses: Affected vs. Comparison Counties") + + theme(legend.position = "none") +``` + +```{r fiscal-revenue} +# Total county expenses over event time +fiscal_event_aligned %>% + mutate(county_type = if_else(str_detect(county_type, "affected"), "Impacted", "Comparison")) %>% + ggplot(aes(x = event_time, y = revenue_total / 1000, group = GEOID, color = county_type)) + + geom_line() + + geom_point() + + geom_vline(xintercept = 0, linetype = "dashed", color = "grey40") + + scale_y_continuous(labels = scales::dollar_format(suffix = "M")) + + scale_color_manual(values = c("Impacted" = "#1696d2", "Comparison" = "#d2d2d2")) + + labs( + x = "Years Relative to Disaster (t=0)", + y = "Total County Revenues (Millions)", + title = "County Government Revenues: Affected vs. Comparison Counties") + + theme(legend.position = "none") +``` + +--- + +# Section 6: Recovery Resources in Comparison Cases + +Describe typical federal assistance flows based on historical analogs. Since the affected county's disaster just occurred, we show comparison counties' post-disaster resource flows as projections. + +## SBA Disaster Loans + +```{r recovery-sba} +# SBA loans over event time (comparison counties only for post-period) + +sba_event_aligned %>% + arrange(GEOID, event_time) %>% + mutate( + .by = GEOID, + across(matches("loan"), ~ cumsum(.x))) %>% + mutate(county_type = if_else(str_detect(county_type, "affected"), "Impacted", "Comparison")) %>% + ggplot(aes(x = event_time, y = residential_loan / 1000000, group = GEOID, color = county_type)) + + geom_line() + + ylim(c(0, NA)) + + geom_vline(xintercept = 0, linetype = "dashed", color = "grey40") + + scale_y_continuous(labels = scales::dollar_format(suffix = "M")) + + scale_color_manual(values = c("Impacted" = "#1696d2", "Comparison" = "#d2d2d2")) + + labs( + x = "Years relative to disaster (t=0)", + y = "SBA loans (cumulative)", + title = "Cumulative SBA Loans - Residential") + + theme(legend.position = "none") + +sba_event_aligned %>% + arrange(GEOID, event_time) %>% + mutate( + .by = GEOID, + across(matches("loan"), ~ cumsum(.x))) %>% + ggplot(aes(x = event_time, y = business_loan / 1000000, group = GEOID, color = county_type)) + + geom_line() + + geom_vline(xintercept = 0, linetype = "dashed", color = "grey40") + + scale_y_continuous(labels = scales::dollar_format(suffix = "M")) + + scale_color_manual(values = c("affected" = "#1696d2", "comparison" = "#d2d2d2")) + + labs( + x = "Years relative to disaster (t=0)", + y = "SBA loans (cumulative)", + title = "Cumulative SBA Loans - Business") + + theme(legend.position = "none") +``` + +## FEMA Public Assistance + +```{r recovery-pa} +# PA funding over event time (comparison counties only for post-period) +pa_event_aligned %>% + mutate(county_type = if_else(str_detect(county_type, "affected"), "Impacted", "Comparison")) %>% + ggplot(aes(x = event_time, y = pa_federal_funding_obligated_split / 1e6, group = GEOID, color = county_type)) + + geom_line() + + geom_vline(xintercept = 0, linetype = "dashed", color = "grey40") + + scale_y_continuous(labels = scales::dollar_format(suffix = "M")) + + scale_color_manual(values = c("Impacted" = "#1696d2", "Comparison" = "#d2d2d2")) + + labs( + x = "Years Relative to Disaster (t=0)", + y = "FEMA PA Federal Share Obligated (Millions)", + title = "FEMA Public Assistance in Comparison Counties") +``` + +## Individual and Households Program + +```{r recovery-ihp} +# # IHP funding over event time (comparison counties only for post-period) +# ihp_event_aligned %>% +# ggplot(aes(x = event_time, y = ihp_amount_total / 1e6, group = GEOID)) + +# geom_line(alpha = 0.4, color = "#d2d2d2") + +# stat_summary(aes(group = 1), fun = median, geom = "line", color = "#1696d2", linewidth = 1.2) + +# geom_vline(xintercept = 0, linetype = "dashed", color = "grey40") + +# scale_y_continuous(labels = scales::dollar_format(suffix = "M")) + +# labs( +# x = "Years Relative to Disaster (t=0)", +# y = "IHP Total Amount (Millions)", +# title = "FEMA Individual & Households Program in Comparison Counties", +# subtitle = "Blue line = median across comparison counties" +# ) +``` + +--- + +# Section 7: Key Takeaways + +```{r takeaways} +# Programmatically generate summary statistics for the factsheet: +# - Pre-disaster vulnerability indicators for affected county +# - Median/range of recovery trajectories from comparison counties +# - Typical federal resource flows +``` + +## Summary Table + +```{r summary-table} +# Create summary table of key indicators +``` + +--- + +## Technical Notes + +### Comparison County Selection Criteria +- Similar population size and income (within 25% of affected county in either direction) + +### Data Availability Notes +- LODES data typically lag 2 years +- County Business Patterns lag 2 years +- Government finances lag 2 +- FEMA assistance data are near real-time + diff --git a/vignettes/figure/economic_recovery_factsheet-baseline-fiscal-1.png b/vignettes/figure/economic_recovery_factsheet-baseline-fiscal-1.png new file mode 100644 index 0000000..b6ca7e0 Binary files /dev/null and b/vignettes/figure/economic_recovery_factsheet-baseline-fiscal-1.png differ diff --git a/vignettes/figure/economic_recovery_factsheet-baseline-industry-1.png b/vignettes/figure/economic_recovery_factsheet-baseline-industry-1.png new file mode 100644 index 0000000..d1dfcb7 Binary files /dev/null and b/vignettes/figure/economic_recovery_factsheet-baseline-industry-1.png differ diff --git a/vignettes/figure/economic_recovery_factsheet-employment-total-1.png b/vignettes/figure/economic_recovery_factsheet-employment-total-1.png new file mode 100644 index 0000000..473a66e Binary files /dev/null and b/vignettes/figure/economic_recovery_factsheet-employment-total-1.png differ diff --git a/vignettes/figure/economic_recovery_factsheet-fiscal-expenses-1.png b/vignettes/figure/economic_recovery_factsheet-fiscal-expenses-1.png new file mode 100644 index 0000000..061a931 Binary files /dev/null and b/vignettes/figure/economic_recovery_factsheet-fiscal-expenses-1.png differ diff --git a/vignettes/figure/economic_recovery_factsheet-fiscal-revenue-1.png b/vignettes/figure/economic_recovery_factsheet-fiscal-revenue-1.png new file mode 100644 index 0000000..e4426a0 Binary files /dev/null and b/vignettes/figure/economic_recovery_factsheet-fiscal-revenue-1.png differ diff --git a/vignettes/figure/economic_recovery_factsheet-recovery-pa-1.png b/vignettes/figure/economic_recovery_factsheet-recovery-pa-1.png new file mode 100644 index 0000000..997a15f Binary files /dev/null and b/vignettes/figure/economic_recovery_factsheet-recovery-pa-1.png differ diff --git a/vignettes/figure/economic_recovery_factsheet-recovery-sba-1.png b/vignettes/figure/economic_recovery_factsheet-recovery-sba-1.png new file mode 100644 index 0000000..f16455f Binary files /dev/null and b/vignettes/figure/economic_recovery_factsheet-recovery-sba-1.png differ diff --git a/vignettes/figure/economic_recovery_factsheet-recovery-sba-2.png b/vignettes/figure/economic_recovery_factsheet-recovery-sba-2.png new file mode 100644 index 0000000..ea7601b Binary files /dev/null and b/vignettes/figure/economic_recovery_factsheet-recovery-sba-2.png differ diff --git a/vignettes/figure/economic_recovery_factsheet-unnamed-chunk-4-1.png b/vignettes/figure/economic_recovery_factsheet-unnamed-chunk-4-1.png new file mode 100644 index 0000000..3c36660 Binary files /dev/null and b/vignettes/figure/economic_recovery_factsheet-unnamed-chunk-4-1.png differ diff --git a/vignettes/figure/economic_recovery_factsheet-unnamed-chunk-5-1.png b/vignettes/figure/economic_recovery_factsheet-unnamed-chunk-5-1.png new file mode 100644 index 0000000..a346a66 Binary files /dev/null and b/vignettes/figure/economic_recovery_factsheet-unnamed-chunk-5-1.png differ diff --git a/vignettes/figure/get_sheldus-annual-damage-1.png b/vignettes/figure/get_sheldus-annual-damage-1.png new file mode 100644 index 0000000..8abab78 Binary files /dev/null and b/vignettes/figure/get_sheldus-annual-damage-1.png differ diff --git a/vignettes/figure/get_sheldus-fatalities-1.png b/vignettes/figure/get_sheldus-fatalities-1.png new file mode 100644 index 0000000..868f0ab Binary files /dev/null and b/vignettes/figure/get_sheldus-fatalities-1.png differ diff --git a/vignettes/figure/get_sheldus-flood-damage-map-1.png b/vignettes/figure/get_sheldus-flood-damage-map-1.png new file mode 100644 index 0000000..ac64d9d Binary files /dev/null and b/vignettes/figure/get_sheldus-flood-damage-map-1.png differ diff --git a/vignettes/figure/get_sheldus-seasonal-patterns-1.png b/vignettes/figure/get_sheldus-seasonal-patterns-1.png new file mode 100644 index 0000000..902217f Binary files /dev/null and b/vignettes/figure/get_sheldus-seasonal-patterns-1.png differ diff --git a/vignettes/figure/get_structures-map-structures-1.png b/vignettes/figure/get_structures-map-structures-1.png new file mode 100644 index 0000000..7d5c8b7 Binary files /dev/null and b/vignettes/figure/get_structures-map-structures-1.png differ diff --git a/vignettes/figure/get_structures-tract-composition-1.png b/vignettes/figure/get_structures-tract-composition-1.png new file mode 100644 index 0000000..729a429 Binary files /dev/null and b/vignettes/figure/get_structures-tract-composition-1.png differ diff --git a/vignettes/figure/get_wildfire_burn_zones-annual-trends-1.png b/vignettes/figure/get_wildfire_burn_zones-annual-trends-1.png new file mode 100644 index 0000000..8c7e9e6 Binary files /dev/null and b/vignettes/figure/get_wildfire_burn_zones-annual-trends-1.png differ diff --git a/vignettes/figure/get_wildfire_burn_zones-map-example-1.png b/vignettes/figure/get_wildfire_burn_zones-map-example-1.png new file mode 100644 index 0000000..bb183b0 Binary files /dev/null and b/vignettes/figure/get_wildfire_burn_zones-map-example-1.png differ diff --git a/vignettes/figure/get_wildfire_burn_zones-severity-analysis-1.png b/vignettes/figure/get_wildfire_burn_zones-severity-analysis-1.png new file mode 100644 index 0000000..0af15b0 Binary files /dev/null and b/vignettes/figure/get_wildfire_burn_zones-severity-analysis-1.png differ diff --git a/vignettes/figure/get_wildfire_burn_zones-state-impacts-1.png b/vignettes/figure/get_wildfire_burn_zones-state-impacts-1.png new file mode 100644 index 0000000..61ddb1c Binary files /dev/null and b/vignettes/figure/get_wildfire_burn_zones-state-impacts-1.png differ diff --git a/vignettes/get_sheldus.Rmd b/vignettes/get_sheldus.Rmd index d3a6340..fd284c5 100644 --- a/vignettes/get_sheldus.Rmd +++ b/vignettes/get_sheldus.Rmd @@ -1,225 +1,246 @@ ---- -title: "SHELDUS Hazard Data" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{SHELDUS Hazard Data} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>", - warning = FALSE, - message = FALSE, - fig.width = 8, - fig.height = 6, - dpi = 1000, - eval = TRUE, - echo = TRUE) - -options(scipen = 999) -``` - -## Overview - -The `get_sheldus()` function provides access to county-level hazard event data from the Spatial Hazard Events and Losses Database for the United States (SHELDUS). This database tracks property damage, crop damage, fatalities, and injuries from natural hazards across all US counties. - -## Data source - -SHELDUS is maintained by Arizona State University's Center for Emergency Management and Homeland Security. The database compiles hazard event data from multiple sources including NOAA Storm Events, the National Climatic Data Center, and other federal agencies. - -Access to SHELDUS requires a subscription. See [https://cemhs.asu.edu/sheldus](https://cemhs.asu.edu/sheldus) for more information. - -## Loading the data - -```{r setup} -library(climateapi) -library(tidyverse) -library(sf) -library(urbnthemes) - -set_urbn_defaults(style = "print") -``` - -```{r load-data, eval = FALSE} -sheldus <- get_sheldus() -``` - -```{r load-data-hidden, echo = FALSE} -sheldus <- get_sheldus() -``` - -## Data structure - -Each row represents a unique combination of county, year, month, and hazard type. Only county-month-hazard combinations with recorded events are included. - -```{r structure} -glimpse(sheldus) -``` - -Key variables include: - -- `unique_id`: Unique identifier for each observation -- `GEOID`: Five-digit county FIPS code -- `year` and `month`: Temporal identifiers -- `hazard`: Type of hazard event (e.g., "Flooding", "Hurricane/Tropical Storm") -- `damage_property`: Property damage in 2023 inflation-adjusted dollars -- `damage_crop`: Crop damage in 2023 inflation-adjusted dollars -- `fatalities` and `injuries`: Human impacts -- `records`: Number of individual events aggregated into the observation - -## Example analyses - -### Hazard types in the database - -```{r hazard-types} -sheldus |> - distinct(hazard) |> - arrange(hazard) |> - pull(hazard) -``` - -### Annual property damage by hazard type - -```{r annual-damage} -df1 <- sheldus |> - summarize( - .by = c(year, hazard), - total_damage = sum(damage_property, na.rm = TRUE) / 1e9) - -top_hazards <- df1 |> - summarize( - .by = hazard, - total = sum(total_damage, na.rm = TRUE)) |> - slice_max(total, n = 5) |> - pull(hazard) - -df1 |> - filter(hazard %in% top_hazards) |> - ggplot(aes(x = year, y = total_damage, fill = hazard)) + - geom_col() + - scale_fill_manual(values = c( - palette_urbn_main[1:4], palette_urbn_cyan[5])) + - labs( - title = "Annual Property Damage by Hazard Type", - subtitle = "Top 5 hazard types by total damage, 2023 dollars", - x = "", - y = "Property damage (billions)", - fill = "Hazard type") -``` - -### Geographic distribution of flood damage - -```{r flood-damage-map, fig.height = 7} -flood_damage <- sheldus |> - filter(hazard == "Flooding") |> - summarize( - .by = GEOID, - total_damage = sum(damage_property, na.rm = TRUE)) - -counties_sf <- tigris::counties(cb = TRUE, year = 2022, progress_bar = FALSE) |> - st_transform(5070) |> - filter(!STATEFP %in% c("02", "15", "72", "78", "66", "60", "69")) - -flood_map <- counties_sf |> - left_join(flood_damage, by = "GEOID") |> - mutate( - damage_category = case_when( - is.na(total_damage) | total_damage == 0 ~ "No recorded damage", - total_damage < 1e6 ~ "< $1M", - total_damage < 10e6 ~ "$1M - $10M", - total_damage < 100e6 ~ "$10M - $100M", - TRUE ~ "> $100M"), - damage_category = factor( - damage_category, - levels = c("No recorded damage", "< $1M", "$1M - $10M", "$10M - $100M", "> $100M"))) - -ggplot(flood_map) + - geom_sf(aes(fill = damage_category), color = NA) + - scale_fill_manual(values = c( - "No recorded damage" = "grey90", - "< $1M" = palette_urbn_cyan[1], - "$1M - $10M" = palette_urbn_cyan[3], - "$10M - $100M" = palette_urbn_cyan[5], - "> $100M" = palette_urbn_cyan[7])) + - labs( - title = "Cumulative Flood Damage by County", - subtitle = "Total property damage from flooding events, 2023 dollars", - fill = "Total damage") + - theme_urbn_map() -``` - -### Seasonal patterns in hazard events - -```{r seasonal-patterns} -seasonal <- sheldus |> - filter(hazard %in% top_hazards) |> - summarize( - .by = c(month, hazard), - total_events = sum(records, na.rm = TRUE)) - -ggplot(seasonal, aes(x = month, y = total_events, color = hazard)) + - geom_line(linewidth = 1) + - geom_point(size = 2) + - scale_x_continuous(breaks = 1:12, labels = month.abb) + - scale_color_manual(values = c( - palette_urbn_main[1:4], palette_urbn_cyan[5])) + - labs( - title = "Seasonal Distribution of Hazard Events", - subtitle = "Total event count by month across all years", - x = "", - y = "Number of events", - color = "Hazard type") -``` - -### Counties with highest fatalities - -```{r fatalities} -high_fatality_counties <- sheldus |> - summarize( - .by = c(GEOID, state_name, county_name), - total_fatalities = sum(fatalities, na.rm = TRUE)) |> - slice_max(total_fatalities, n = 15) - -high_fatality_counties |> - mutate( - county_label = str_c(county_name, ", ", state_name), - county_label = fct_reorder(county_label, total_fatalities)) |> - ggplot(aes(y = county_label, x = total_fatalities)) + - geom_col() + - labs( - title = "Counties with Highest Hazard-Related Fatalities", - x = "Total fatalities", - y = "") -``` - -### Linking with census data - -The county-level structure makes it straightforward to join with demographic data. - -```{r demographics, eval = FALSE} -county_demographics <- tidycensus::get_acs( - geography = "county", - variables = c( - median_income = "B19013_001", - total_pop = "B01003_001"), - year = 2022, - output = "wide") - -county_hazard_summary <- sheldus |> - filter(year >= 2018) |> - summarize( - .by = GEOID, - total_damage = sum(damage_property, na.rm = TRUE), - total_events = sum(records, na.rm = TRUE)) |> - left_join(county_demographics, by = "GEOID") |> - mutate(damage_per_capita = total_damage / total_popE) -``` - -## See also - -- `get_fema_disaster_declarations()`: FEMA disaster declarations -- `get_nfip_claims()`: National Flood Insurance Program claims data -- `get_wildfire_burn_zones()`: Wildfire burn zone disasters +--- +title: "SHELDUS Hazard Data" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{SHELDUS Hazard Data} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + + + +## Overview + +The `get_sheldus()` function provides access to county-level hazard event data from the Spatial Hazard Events and Losses Database for the United States (SHELDUS). This database tracks property damage, crop damage, fatalities, and injuries from natural hazards across all US counties. + +## Data source + +SHELDUS is maintained by Arizona State University's Center for Emergency Management and Homeland Security. The database compiles hazard event data from multiple sources including NOAA Storm Events, the National Climatic Data Center, and other federal agencies. + +Access to SHELDUS requires a subscription. See [https://cemhs.asu.edu/sheldus](https://cemhs.asu.edu/sheldus) for more information. + +## Loading the data + + +``` r +library(climateapi) +library(tidyverse) +library(sf) +library(urbnthemes) + +set_urbn_defaults(style = "print") +``` + + +``` r +sheldus <- get_sheldus() +``` + + + +## Data structure + +Each row represents a unique combination of county, year, month, and hazard type. Only county-month-hazard combinations with recorded events are included. + + +``` r +glimpse(sheldus) +#> Rows: 1,000,189 +#> Columns: 13 +#> $ unique_id "199da95a-821f-4f2f-974a-85bb0c4e8a0f", "a9af66c4-1ea9-4c43-9a7e-85211bf280db", "b6e481a5-998b-46a3-a393-0064de9e87b4", "e40fd03d-cabf-4e41-9… +#> $ GEOID "01001", "01001", "01001", "01001", "01001", "01001", "01001", "01001", "01001", "01001", "01001", "01001", "01001", "01001", "01001", "01001… +#> $ state_name "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama"… +#> $ county_name "Autauga", "Autauga", "Autauga", "Autauga", "Autauga", "Autauga", "Autauga", "Autauga", "Autauga", "Autauga", "Autauga", "Autauga", "Autauga"… +#> $ year 1961, 1961, 1961, 1962, 1962, 1962, 1962, 1962, 1963, 1963, 1963, 1964, 1964, 1965, 1966, 1967, 1968, 1968, 1968, 1969, 1969, 1969, 1969, 196… +#> $ month 1, 2, 12, 1, 4, 4, 4, 12, 1, 4, 12, 4, 4, 7, 1, 3, 3, 8, 8, 3, 3, 6, 6, 6, 4, 4, 4, 5, 1, 3, 3, 4, 4, 4, 5, 5, 5, 6, 3, 3, 2, 4, 5, 11, 1, 2,… +#> $ hazard "Winter Weather", "Severe Storm/Thunder Storm", "Severe Storm/Thunder Storm", "Winter Weather", "Hail", "Severe Storm/Thunder Storm", "Wind",… +#> $ damage_property 21498.713, 75141.081, 83907.468, 74394.646, 2479.825, 2479.825, 2479.825, 74394.646, 73422.069, 491928.296, 73422.167, 292024.963, 6389.224, … +#> $ damage_crop 7514.11815, 75141.08081, 8390.71660, 74394.64644, 2479.82487, 2479.82487, 2479.82487, 74394.64644, 73422.06910, 0.00000, 7342.22659, 34952.83… +#> $ fatalities 0.00, 0.00, 0.00, 0.06, 0.00, 0.00, 0.00, 0.04, 0.00, 0.00, 0.00, 0.00, 0.00, 1.00, 0.15, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.0… +#> $ injuries 0.00000, 0.00000, 0.00000, 0.00000, 0.00333, 0.00333, 0.00333, 0.00000, 0.00000, 0.00000, 0.00000, 0.04000, 0.04000, 0.00000, 0.07000, 0.0000… +#> $ records 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, … +#> $ allocation_factor NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… +``` + +Key variables include: + +- `unique_id`: Unique identifier for each observation +- `GEOID`: Five-digit county FIPS code +- `year` and `month`: Temporal identifiers +- `hazard`: Type of hazard event (e.g., "Flooding", "Hurricane/Tropical Storm") +- `damage_property`: Property damage in 2023 inflation-adjusted dollars +- `damage_crop`: Crop damage in 2023 inflation-adjusted dollars +- `fatalities` and `injuries`: Human impacts +- `records`: Number of individual events aggregated into the observation + +## Example analyses + +### Hazard types in the database + + +``` r +sheldus |> + distinct(hazard) |> + arrange(hazard) |> + pull(hazard) +#> [1] "Avalanche" "Coastal" "Drought" "Earthquake" "Flooding" +#> [6] "Fog" "Hail" "Heat" "Hurricane/Tropical Storm" "Landslide" +#> [11] "Lightning" "Severe Storm/Thunder Storm" "Tornado" "Tsunami/Seiche" "Volcano" +#> [16] "Wildfire" "Wind" "Winter Weather" +``` + +### Annual property damage by hazard type + + +``` r +df1 <- sheldus |> + summarize( + .by = c(year, hazard), + total_damage = sum(damage_property, na.rm = TRUE) / 1e9) + +top_hazards <- df1 |> + summarize( + .by = hazard, + total = sum(total_damage, na.rm = TRUE)) |> + slice_max(total, n = 5) |> + pull(hazard) + +df1 |> + filter(hazard %in% top_hazards) |> + ggplot(aes(x = year, y = total_damage, fill = hazard)) + + geom_col() + + scale_fill_manual(values = c( + palette_urbn_main[1:4], palette_urbn_cyan[5])) + + labs( + title = "Annual Property Damage by Hazard Type", + subtitle = "Top 5 hazard types by total damage, 2023 dollars", + x = "", + y = "Property damage (billions)", + fill = "Hazard type") +``` + +![plot of chunk annual-damage](figure/get_sheldus-annual-damage-1.png) + +### Geographic distribution of flood damage + + +``` r +flood_damage <- sheldus |> + filter(hazard == "Flooding") |> + summarize( + .by = GEOID, + total_damage = sum(damage_property, na.rm = TRUE)) + +counties_sf <- tigris::counties(cb = TRUE, year = 2022, progress_bar = FALSE) |> + st_transform(5070) |> + filter(!STATEFP %in% c("02", "15", "72", "78", "66", "60", "69")) + +flood_map <- counties_sf |> + left_join(flood_damage, by = "GEOID") |> + mutate( + damage_category = case_when( + is.na(total_damage) | total_damage == 0 ~ "No recorded damage", + total_damage < 1e6 ~ "< $1M", + total_damage < 10e6 ~ "$1M - $10M", + total_damage < 100e6 ~ "$10M - $100M", + TRUE ~ "> $100M"), + damage_category = factor( + damage_category, + levels = c("No recorded damage", "< $1M", "$1M - $10M", "$10M - $100M", "> $100M"))) + +ggplot(flood_map) + + geom_sf(aes(fill = damage_category), color = NA) + + scale_fill_manual(values = c( + "No recorded damage" = "grey90", + "< $1M" = palette_urbn_cyan[1], + "$1M - $10M" = palette_urbn_cyan[3], + "$10M - $100M" = palette_urbn_cyan[5], + "> $100M" = palette_urbn_cyan[7])) + + labs( + title = "Cumulative Flood Damage by County", + subtitle = "Total property damage from flooding events, 2023 dollars", + fill = "Total damage") + + theme_urbn_map() +``` + +![plot of chunk flood-damage-map](figure/get_sheldus-flood-damage-map-1.png) + +### Seasonal patterns in hazard events + + +``` r +seasonal <- sheldus |> + filter(hazard %in% top_hazards) |> + summarize( + .by = c(month, hazard), + total_events = sum(records, na.rm = TRUE)) + +ggplot(seasonal, aes(x = month, y = total_events, color = hazard)) + + geom_line(linewidth = 1) + + geom_point(size = 2) + + scale_x_continuous(breaks = 1:12, labels = month.abb) + + scale_color_manual(values = c( + palette_urbn_main[1:4], palette_urbn_cyan[5])) + + labs( + title = "Seasonal Distribution of Hazard Events", + subtitle = "Total event count by month across all years", + x = "", + y = "Number of events", + color = "Hazard type") +``` + +![plot of chunk seasonal-patterns](figure/get_sheldus-seasonal-patterns-1.png) + +### Counties with highest fatalities + + +``` r +high_fatality_counties <- sheldus |> + summarize( + .by = c(GEOID, state_name, county_name), + total_fatalities = sum(fatalities, na.rm = TRUE)) |> + slice_max(total_fatalities, n = 15) + +high_fatality_counties |> + mutate( + county_label = str_c(county_name, ", ", state_name), + county_label = fct_reorder(county_label, total_fatalities)) |> + ggplot(aes(y = county_label, x = total_fatalities)) + + geom_col() + + labs( + title = "Counties with Highest Hazard-Related Fatalities", + x = "Total fatalities", + y = "") +``` + +![plot of chunk fatalities](figure/get_sheldus-fatalities-1.png) + +### Linking with census data + +The county-level structure makes it straightforward to join with demographic data. + + +``` r +county_demographics <- tidycensus::get_acs( + geography = "county", + variables = c( + median_income = "B19013_001", + total_pop = "B01003_001"), + year = 2022, + output = "wide") + +county_hazard_summary <- sheldus |> + filter(year >= 2018) |> + summarize( + .by = GEOID, + total_damage = sum(damage_property, na.rm = TRUE), + total_events = sum(records, na.rm = TRUE)) |> + left_join(county_demographics, by = "GEOID") |> + mutate(damage_per_capita = total_damage / total_popE) +``` + +## See also + +- `get_fema_disaster_declarations()`: FEMA disaster declarations +- `get_nfip_claims()`: National Flood Insurance Program claims data +- `get_wildfire_burn_zones()`: Wildfire burn zone disasters diff --git a/vignettes/get_sheldus.Rmd.orig b/vignettes/get_sheldus.Rmd.orig new file mode 100644 index 0000000..d3a6340 --- /dev/null +++ b/vignettes/get_sheldus.Rmd.orig @@ -0,0 +1,225 @@ +--- +title: "SHELDUS Hazard Data" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{SHELDUS Hazard Data} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + warning = FALSE, + message = FALSE, + fig.width = 8, + fig.height = 6, + dpi = 1000, + eval = TRUE, + echo = TRUE) + +options(scipen = 999) +``` + +## Overview + +The `get_sheldus()` function provides access to county-level hazard event data from the Spatial Hazard Events and Losses Database for the United States (SHELDUS). This database tracks property damage, crop damage, fatalities, and injuries from natural hazards across all US counties. + +## Data source + +SHELDUS is maintained by Arizona State University's Center for Emergency Management and Homeland Security. The database compiles hazard event data from multiple sources including NOAA Storm Events, the National Climatic Data Center, and other federal agencies. + +Access to SHELDUS requires a subscription. See [https://cemhs.asu.edu/sheldus](https://cemhs.asu.edu/sheldus) for more information. + +## Loading the data + +```{r setup} +library(climateapi) +library(tidyverse) +library(sf) +library(urbnthemes) + +set_urbn_defaults(style = "print") +``` + +```{r load-data, eval = FALSE} +sheldus <- get_sheldus() +``` + +```{r load-data-hidden, echo = FALSE} +sheldus <- get_sheldus() +``` + +## Data structure + +Each row represents a unique combination of county, year, month, and hazard type. Only county-month-hazard combinations with recorded events are included. + +```{r structure} +glimpse(sheldus) +``` + +Key variables include: + +- `unique_id`: Unique identifier for each observation +- `GEOID`: Five-digit county FIPS code +- `year` and `month`: Temporal identifiers +- `hazard`: Type of hazard event (e.g., "Flooding", "Hurricane/Tropical Storm") +- `damage_property`: Property damage in 2023 inflation-adjusted dollars +- `damage_crop`: Crop damage in 2023 inflation-adjusted dollars +- `fatalities` and `injuries`: Human impacts +- `records`: Number of individual events aggregated into the observation + +## Example analyses + +### Hazard types in the database + +```{r hazard-types} +sheldus |> + distinct(hazard) |> + arrange(hazard) |> + pull(hazard) +``` + +### Annual property damage by hazard type + +```{r annual-damage} +df1 <- sheldus |> + summarize( + .by = c(year, hazard), + total_damage = sum(damage_property, na.rm = TRUE) / 1e9) + +top_hazards <- df1 |> + summarize( + .by = hazard, + total = sum(total_damage, na.rm = TRUE)) |> + slice_max(total, n = 5) |> + pull(hazard) + +df1 |> + filter(hazard %in% top_hazards) |> + ggplot(aes(x = year, y = total_damage, fill = hazard)) + + geom_col() + + scale_fill_manual(values = c( + palette_urbn_main[1:4], palette_urbn_cyan[5])) + + labs( + title = "Annual Property Damage by Hazard Type", + subtitle = "Top 5 hazard types by total damage, 2023 dollars", + x = "", + y = "Property damage (billions)", + fill = "Hazard type") +``` + +### Geographic distribution of flood damage + +```{r flood-damage-map, fig.height = 7} +flood_damage <- sheldus |> + filter(hazard == "Flooding") |> + summarize( + .by = GEOID, + total_damage = sum(damage_property, na.rm = TRUE)) + +counties_sf <- tigris::counties(cb = TRUE, year = 2022, progress_bar = FALSE) |> + st_transform(5070) |> + filter(!STATEFP %in% c("02", "15", "72", "78", "66", "60", "69")) + +flood_map <- counties_sf |> + left_join(flood_damage, by = "GEOID") |> + mutate( + damage_category = case_when( + is.na(total_damage) | total_damage == 0 ~ "No recorded damage", + total_damage < 1e6 ~ "< $1M", + total_damage < 10e6 ~ "$1M - $10M", + total_damage < 100e6 ~ "$10M - $100M", + TRUE ~ "> $100M"), + damage_category = factor( + damage_category, + levels = c("No recorded damage", "< $1M", "$1M - $10M", "$10M - $100M", "> $100M"))) + +ggplot(flood_map) + + geom_sf(aes(fill = damage_category), color = NA) + + scale_fill_manual(values = c( + "No recorded damage" = "grey90", + "< $1M" = palette_urbn_cyan[1], + "$1M - $10M" = palette_urbn_cyan[3], + "$10M - $100M" = palette_urbn_cyan[5], + "> $100M" = palette_urbn_cyan[7])) + + labs( + title = "Cumulative Flood Damage by County", + subtitle = "Total property damage from flooding events, 2023 dollars", + fill = "Total damage") + + theme_urbn_map() +``` + +### Seasonal patterns in hazard events + +```{r seasonal-patterns} +seasonal <- sheldus |> + filter(hazard %in% top_hazards) |> + summarize( + .by = c(month, hazard), + total_events = sum(records, na.rm = TRUE)) + +ggplot(seasonal, aes(x = month, y = total_events, color = hazard)) + + geom_line(linewidth = 1) + + geom_point(size = 2) + + scale_x_continuous(breaks = 1:12, labels = month.abb) + + scale_color_manual(values = c( + palette_urbn_main[1:4], palette_urbn_cyan[5])) + + labs( + title = "Seasonal Distribution of Hazard Events", + subtitle = "Total event count by month across all years", + x = "", + y = "Number of events", + color = "Hazard type") +``` + +### Counties with highest fatalities + +```{r fatalities} +high_fatality_counties <- sheldus |> + summarize( + .by = c(GEOID, state_name, county_name), + total_fatalities = sum(fatalities, na.rm = TRUE)) |> + slice_max(total_fatalities, n = 15) + +high_fatality_counties |> + mutate( + county_label = str_c(county_name, ", ", state_name), + county_label = fct_reorder(county_label, total_fatalities)) |> + ggplot(aes(y = county_label, x = total_fatalities)) + + geom_col() + + labs( + title = "Counties with Highest Hazard-Related Fatalities", + x = "Total fatalities", + y = "") +``` + +### Linking with census data + +The county-level structure makes it straightforward to join with demographic data. + +```{r demographics, eval = FALSE} +county_demographics <- tidycensus::get_acs( + geography = "county", + variables = c( + median_income = "B19013_001", + total_pop = "B01003_001"), + year = 2022, + output = "wide") + +county_hazard_summary <- sheldus |> + filter(year >= 2018) |> + summarize( + .by = GEOID, + total_damage = sum(damage_property, na.rm = TRUE), + total_events = sum(records, na.rm = TRUE)) |> + left_join(county_demographics, by = "GEOID") |> + mutate(damage_per_capita = total_damage / total_popE) +``` + +## See also + +- `get_fema_disaster_declarations()`: FEMA disaster declarations +- `get_nfip_claims()`: National Flood Insurance Program claims data +- `get_wildfire_burn_zones()`: Wildfire burn zone disasters diff --git a/vignettes/get_structures.Rmd b/vignettes/get_structures.Rmd index 1577b18..ead14ed 100644 --- a/vignettes/get_structures.Rmd +++ b/vignettes/get_structures.Rmd @@ -1,229 +1,255 @@ ---- -title: "USA Structures Data" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{USA Structures Data} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>", - warning = FALSE, - message = FALSE, - fig.width = 8, - fig.height = 6, - dpi = 150 -) -``` - -## Overview - -The `get_structures()` function retrieves building footprint data from the USA Structures dataset and summarizes structure counts by type at the tract or county level. This is useful for estimating the number and types of buildings within areas affected by natural hazards. - -## Data source - -Data are sourced from the USA Structures dataset maintained by the Department of Homeland Security. The dataset contains building footprints derived from high-resolution imagery for structures across the United States. - -See [https://geoplatform.gov/metadata/9d4a3ae3-8637-4707-92a7-b7d67b769a6b](https://geoplatform.gov/metadata/9d4a3ae3-8637-4707-92a7-b7d67b769a6b) for more information. - -## Loading the data - -```{r setup} -library(climateapi) -library(tidyverse) -library(sf) -library(urbnthemes) - -set_urbn_defaults(style = "print") -``` - -The function requires a `boundaries` argument specifying the geographic area of interest. This must be a spatial polygon object with a defined coordinate reference system. - -```{r load-data, eval = FALSE} -# Example: Get structures in Washington, DC -dc_boundary <- tigris::states(cb = TRUE) |> - filter(STUSPS == "DC") - -dc_structures <- get_structures( - boundaries = dc_boundary, - geography = "tract") -``` - -```{r load-data-hidden, echo = FALSE} -dc_boundary <- tigris::states(cb = TRUE) |> - filter(STUSPS == "DC") - -dc_structures <- get_structures( - boundaries = dc_boundary, - geography = "tract") -``` - -## Function parameters - -- `boundaries`: A POLYGON or MULTIPOLYGON sf object, or an `sf::st_bbox()`-style bounding box. Must have a defined CRS. -- `geography`: The desired output geography. One of `"tract"` or `"county"`. -- `keep_structures`: If `TRUE`, returns both the summarized counts and the raw point-level structure data. - -## Data structure - -Each row represents a unique combination of geographic unit (tract or county) and structure type. - -```{r structure} -glimpse(dc_structures) -``` - -Key variables include: - -- `GEOID`: Census tract (11-digit) or county (5-digit) FIPS code -- `primary_occupancy`: The primary use of the structure (e.g., "Residential", "Commercial") -- `occupancy_class`: Broader classification of occupancy type -- `count`: Number of structures of this type in the geographic unit - -### Occupancy types - -```{r occupancy-types} -dc_structures |> - distinct(occupancy_class, primary_occupancy) |> - arrange(occupancy_class, primary_occupancy) -``` - -## Example analyses - -### Structure composition by tract - -```{r tract-composition} -dc_summary <- dc_structures |> - summarize( - .by = GEOID, - total_structures = sum(count, na.rm = TRUE), - residential = sum(count[occupancy_class == "Residential"], na.rm = TRUE), - commercial = sum(count[occupancy_class == "Commercial"], na.rm = TRUE)) |> - mutate( - residential_share = residential / total_structures) - -dc_summary |> - ggplot(aes(x = total_structures, y = residential_share)) + - geom_point(alpha = 0.7) + - scale_y_continuous(labels = scales::percent) + - labs( - title = "Residential Share vs. Total Structures by Tract", - subtitle = "Washington, DC census tracts", - x = "Total structures", - y = "Share residential") -``` - -### Mapping structure density - -```{r map-structures, fig.height = 8} -dc_tracts <- tigris::tracts(state = "DC", cb = TRUE, year = 2023, progress_bar = FALSE) |> - st_transform(5070) - -dc_tract_totals <- dc_structures |> - summarize( - .by = GEOID, - total_structures = sum(count, na.rm = TRUE)) - -dc_map <- dc_tracts |> - left_join(dc_tract_totals, by = "GEOID") - -ggplot(dc_map) + - geom_sf(aes(fill = total_structures), color = "white", linewidth = 0.2) + - scale_fill_gradientn( - colors = c(palette_urbn_cyan[1], palette_urbn_cyan[4], palette_urbn_cyan[7]), - labels = scales::comma) + - labs( - title = "Structure Count by Census Tract", - subtitle = "Washington, DC", - fill = "Structures") + - theme_urbn_map() -``` - -### Analyzing structures in a disaster area - -A common use case is estimating structures within a hazard-affected area. This example shows how to combine structure data with wildfire burn zones. - -```{r disaster-example, eval = FALSE} -# Get a specific wildfire's burn zone -burn_zones <- get_wildfire_burn_zones() - -camp_fire <- burn_zones |> - filter(str_detect(wildfire_name, "CAMP")) - -# Get structures within the burn zone -camp_fire_structures <- get_structures( - - boundaries = camp_fire, - geography = "tract", - keep_structures = TRUE) - -# Summarized counts -camp_fire_structures$structures_summarized |> - summarize( - .by = occupancy_class, - total = sum(count, na.rm = TRUE)) |> - arrange(desc(total)) -``` - -### Working with raw structure data - -Setting `keep_structures = TRUE` returns both the summarized data and the raw point-level structure data. - -```{r raw-structures, eval = FALSE} -dc_full <- get_structures( - boundaries = dc_boundary, - geography = "county", - keep_structures = TRUE) - -# Access the raw point data -raw_structures <- dc_full$structures_raw - -# Access the summarized data -summary_structures <- dc_full$structures_summarized - -# Map individual structures -ggplot() + - geom_sf(data = dc_boundary, fill = "grey95") + - geom_sf( - data = raw_structures |> filter(primary_occupancy == "Commercial"), - size = 0.1, - alpha = 0.5) + - labs(title = "Commercial Structures in Washington, DC") + - theme_urbn_map() -``` - -### County-level analysis - -For larger areas, county-level aggregation provides a useful summary. - -```{r county-example, eval = FALSE} -# Get structures for multiple states -southeast_boundary <- tigris::states(cb = TRUE) |> - filter(STUSPS %in% c("FL", "GA", "AL", "SC")) - -southeast_structures <- get_structures( - boundaries = southeast_boundary, - geography = "county") - -# Summarize by county -county_totals <- southeast_structures |> - summarize( - .by = GEOID, - total_structures = sum(count, na.rm = TRUE)) -``` - -## Performance considerations - -The raw building footprint data files are large. Processing can be slow, especially for multi-state analyses. Consider: - -- Starting with a small geographic area to test your workflow -- Using county-level aggregation when tract-level detail is not required -- Caching results for repeated analyses - -## See also - -- `get_wildfire_burn_zones()`: Wildfire burn zone disasters for use as boundaries -- `get_current_fire_perimeters()`: Active wildfire perimeters for use as boundaries -- `get_fema_disaster_declarations()`: FEMA disaster declarations +--- +title: "USA Structures Data" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{USA Structures Data} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + + + +## Overview + +The `get_structures()` function retrieves building footprint data from the USA Structures dataset and summarizes structure counts by type at the tract or county level. This is useful for estimating the number and types of buildings within areas affected by natural hazards. + +## Data source + +Data are sourced from the USA Structures dataset maintained by the Department of Homeland Security. The dataset contains building footprints derived from high-resolution imagery for structures across the United States. + +See [https://geoplatform.gov/metadata/9d4a3ae3-8637-4707-92a7-b7d67b769a6b](https://geoplatform.gov/metadata/9d4a3ae3-8637-4707-92a7-b7d67b769a6b) for more information. + +## Loading the data + + +``` r +library(climateapi) +library(tidyverse) +library(sf) +library(urbnthemes) + +set_urbn_defaults(style = "print") +``` + +The function requires a `boundaries` argument specifying the geographic area of interest. This must be a spatial polygon object with a defined coordinate reference system. + + +``` r +# Example: Get structures in Washington, DC +dc_boundary <- tigris::states(cb = TRUE) |> + filter(STUSPS == "DC") + +dc_structures <- get_structures( + boundaries = dc_boundary, + geography = "tract") +``` + + +``` +#> Reading layer `DC_Structures' from data source +#> `C:\Users\WCurranGroome\Box\METRO Climate and Communities Practice Area\github-repository\built-environment\housing-units\usa-structures\raw\DC\DC_Structures.gdb' +#> using driver `OpenFileGDB' +#> Simple feature collection with 64701 features and 33 fields +#> Geometry type: MULTIPOLYGON +#> Dimension: XY +#> Bounding box: xmin: -77.11509 ymin: 38.79307 xmax: -76.90971 ymax: 38.99554 +#> Geodetic CRS: WGS 84 +``` + +## Function parameters + +- `boundaries`: A POLYGON or MULTIPOLYGON sf object, or an `sf::st_bbox()`-style bounding box. Must have a defined CRS. +- `geography`: The desired output geography. One of `"tract"` or `"county"`. +- `keep_structures`: If `TRUE`, returns both the summarized counts and the raw point-level structure data. + +## Data structure + +Each row represents a unique combination of geographic unit (tract or county) and structure type. + + +``` r +glimpse(dc_structures) +#> Rows: 1,951 +#> Columns: 4 +#> $ GEOID "11001000101", "11001000101", "11001000101", "11001000101", "11001000101", "11001000101", "11001000102", "11001000102", "11001000102", "11001… +#> $ primary_occupancy "Single Family Dwelling", "Multi - Family Dwelling", "Religious", "Retail Trade", "General Services", "Temporary Lodging", "Single Family Dwe… +#> $ occupancy_class "Residential", "Residential", "Assembly", "Commercial", "Government", "Residential", "Residential", "Residential", "Commercial", "Commercial"… +#> $ count 63, 24, 5, 4, 1, 1, 384, 48, 42, 21, 19, 15, 13, 10, 8, 5, 5, 4, 2, 1, 1, 53, 8, 8, 1, 1, 1, 310, 60, 42, 23, 12, 9, 8, 7, 3, 3, 3, 3, 2, 2, … +``` + +Key variables include: + +- `GEOID`: Census tract (11-digit) or county (5-digit) FIPS code +- `primary_occupancy`: The primary use of the structure (e.g., "Residential", "Commercial") +- `occupancy_class`: Broader classification of occupancy type +- `count`: Number of structures of this type in the geographic unit + +### Occupancy types + + +``` r +dc_structures |> + distinct(occupancy_class, primary_occupancy) |> + arrange(occupancy_class, primary_occupancy) +#> # A tibble: 27 × 2 +#> occupancy_class primary_occupancy +#> +#> 1 Assembly Indoor Arena +#> 2 Assembly Religious +#> 3 Commercial Entertainment and Recreation +#> 4 Commercial Hospital +#> 5 Commercial Medical Office/Clinic +#> 6 Commercial Parking +#> 7 Commercial Personal and Repair Services +#> 8 Commercial Professional/Technical Services +#> 9 Commercial Retail Trade +#> 10 Commercial Theaters +#> # ℹ 17 more rows +``` + +## Example analyses + +### Structure composition by tract + + +``` r +dc_summary <- dc_structures |> + summarize( + .by = GEOID, + total_structures = sum(count, na.rm = TRUE), + residential = sum(count[occupancy_class == "Residential"], na.rm = TRUE), + commercial = sum(count[occupancy_class == "Commercial"], na.rm = TRUE)) |> + mutate( + residential_share = residential / total_structures) + +dc_summary |> + ggplot(aes(x = total_structures, y = residential_share)) + + geom_point(alpha = 0.7) + + scale_y_continuous(labels = scales::percent) + + labs( + title = "Residential Share vs. Total Structures by Tract", + subtitle = "Washington, DC census tracts", + x = "Total structures", + y = "Share residential") +``` + +![plot of chunk tract-composition](figure/get_structures-tract-composition-1.png) + +### Mapping structure density + + +``` r +dc_tracts <- tigris::tracts(state = "DC", cb = TRUE, year = 2023, progress_bar = FALSE) |> + st_transform(5070) + +dc_tract_totals <- dc_structures |> + summarize( + .by = GEOID, + total_structures = sum(count, na.rm = TRUE)) + +dc_map <- dc_tracts |> + left_join(dc_tract_totals, by = "GEOID") + +ggplot(dc_map) + + geom_sf(aes(fill = total_structures), color = "white", linewidth = 0.2) + + scale_fill_gradientn( + colors = c(palette_urbn_cyan[1], palette_urbn_cyan[4], palette_urbn_cyan[7]), + labels = scales::comma) + + labs( + title = "Structure Count by Census Tract", + subtitle = "Washington, DC", + fill = "Structures") + + theme_urbn_map() +``` + +![plot of chunk map-structures](figure/get_structures-map-structures-1.png) + +### Analyzing structures in a disaster area + +A common use case is estimating structures within a hazard-affected area. This example shows how to combine structure data with wildfire burn zones. + + +``` r +# Get a specific wildfire's burn zone +burn_zones <- get_wildfire_burn_zones() + +camp_fire <- burn_zones |> + filter(str_detect(wildfire_name, "CAMP")) + +# Get structures within the burn zone +camp_fire_structures <- get_structures( + + boundaries = camp_fire, + geography = "tract", + keep_structures = TRUE) + +# Summarized counts +camp_fire_structures$structures_summarized |> + summarize( + .by = occupancy_class, + total = sum(count, na.rm = TRUE)) |> + arrange(desc(total)) +``` + +### Working with raw structure data + +Setting `keep_structures = TRUE` returns both the summarized data and the raw point-level structure data. + + +``` r +dc_full <- get_structures( + boundaries = dc_boundary, + geography = "county", + keep_structures = TRUE) + +# Access the raw point data +raw_structures <- dc_full$structures_raw + +# Access the summarized data +summary_structures <- dc_full$structures_summarized + +# Map individual structures +ggplot() + + geom_sf(data = dc_boundary, fill = "grey95") + + geom_sf( + data = raw_structures |> filter(primary_occupancy == "Commercial"), + size = 0.1, + alpha = 0.5) + + labs(title = "Commercial Structures in Washington, DC") + + theme_urbn_map() +``` + +### County-level analysis + +For larger areas, county-level aggregation provides a useful summary. + + +``` r +# Get structures for multiple states +southeast_boundary <- tigris::states(cb = TRUE) |> + filter(STUSPS %in% c("FL", "GA", "AL", "SC")) + +southeast_structures <- get_structures( + boundaries = southeast_boundary, + geography = "county") + +# Summarize by county +county_totals <- southeast_structures |> + summarize( + .by = GEOID, + total_structures = sum(count, na.rm = TRUE)) +``` + +## Performance considerations + +The raw building footprint data files are large. Processing can be slow, especially for multi-state analyses. Consider: + +- Starting with a small geographic area to test your workflow +- Using county-level aggregation when tract-level detail is not required +- Caching results for repeated analyses + +## See also + +- `get_wildfire_burn_zones()`: Wildfire burn zone disasters for use as boundaries +- `get_current_fire_perimeters()`: Active wildfire perimeters for use as boundaries +- `get_fema_disaster_declarations()`: FEMA disaster declarations diff --git a/vignettes/get_structures.Rmd.orig b/vignettes/get_structures.Rmd.orig new file mode 100644 index 0000000..1577b18 --- /dev/null +++ b/vignettes/get_structures.Rmd.orig @@ -0,0 +1,229 @@ +--- +title: "USA Structures Data" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{USA Structures Data} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + warning = FALSE, + message = FALSE, + fig.width = 8, + fig.height = 6, + dpi = 150 +) +``` + +## Overview + +The `get_structures()` function retrieves building footprint data from the USA Structures dataset and summarizes structure counts by type at the tract or county level. This is useful for estimating the number and types of buildings within areas affected by natural hazards. + +## Data source + +Data are sourced from the USA Structures dataset maintained by the Department of Homeland Security. The dataset contains building footprints derived from high-resolution imagery for structures across the United States. + +See [https://geoplatform.gov/metadata/9d4a3ae3-8637-4707-92a7-b7d67b769a6b](https://geoplatform.gov/metadata/9d4a3ae3-8637-4707-92a7-b7d67b769a6b) for more information. + +## Loading the data + +```{r setup} +library(climateapi) +library(tidyverse) +library(sf) +library(urbnthemes) + +set_urbn_defaults(style = "print") +``` + +The function requires a `boundaries` argument specifying the geographic area of interest. This must be a spatial polygon object with a defined coordinate reference system. + +```{r load-data, eval = FALSE} +# Example: Get structures in Washington, DC +dc_boundary <- tigris::states(cb = TRUE) |> + filter(STUSPS == "DC") + +dc_structures <- get_structures( + boundaries = dc_boundary, + geography = "tract") +``` + +```{r load-data-hidden, echo = FALSE} +dc_boundary <- tigris::states(cb = TRUE) |> + filter(STUSPS == "DC") + +dc_structures <- get_structures( + boundaries = dc_boundary, + geography = "tract") +``` + +## Function parameters + +- `boundaries`: A POLYGON or MULTIPOLYGON sf object, or an `sf::st_bbox()`-style bounding box. Must have a defined CRS. +- `geography`: The desired output geography. One of `"tract"` or `"county"`. +- `keep_structures`: If `TRUE`, returns both the summarized counts and the raw point-level structure data. + +## Data structure + +Each row represents a unique combination of geographic unit (tract or county) and structure type. + +```{r structure} +glimpse(dc_structures) +``` + +Key variables include: + +- `GEOID`: Census tract (11-digit) or county (5-digit) FIPS code +- `primary_occupancy`: The primary use of the structure (e.g., "Residential", "Commercial") +- `occupancy_class`: Broader classification of occupancy type +- `count`: Number of structures of this type in the geographic unit + +### Occupancy types + +```{r occupancy-types} +dc_structures |> + distinct(occupancy_class, primary_occupancy) |> + arrange(occupancy_class, primary_occupancy) +``` + +## Example analyses + +### Structure composition by tract + +```{r tract-composition} +dc_summary <- dc_structures |> + summarize( + .by = GEOID, + total_structures = sum(count, na.rm = TRUE), + residential = sum(count[occupancy_class == "Residential"], na.rm = TRUE), + commercial = sum(count[occupancy_class == "Commercial"], na.rm = TRUE)) |> + mutate( + residential_share = residential / total_structures) + +dc_summary |> + ggplot(aes(x = total_structures, y = residential_share)) + + geom_point(alpha = 0.7) + + scale_y_continuous(labels = scales::percent) + + labs( + title = "Residential Share vs. Total Structures by Tract", + subtitle = "Washington, DC census tracts", + x = "Total structures", + y = "Share residential") +``` + +### Mapping structure density + +```{r map-structures, fig.height = 8} +dc_tracts <- tigris::tracts(state = "DC", cb = TRUE, year = 2023, progress_bar = FALSE) |> + st_transform(5070) + +dc_tract_totals <- dc_structures |> + summarize( + .by = GEOID, + total_structures = sum(count, na.rm = TRUE)) + +dc_map <- dc_tracts |> + left_join(dc_tract_totals, by = "GEOID") + +ggplot(dc_map) + + geom_sf(aes(fill = total_structures), color = "white", linewidth = 0.2) + + scale_fill_gradientn( + colors = c(palette_urbn_cyan[1], palette_urbn_cyan[4], palette_urbn_cyan[7]), + labels = scales::comma) + + labs( + title = "Structure Count by Census Tract", + subtitle = "Washington, DC", + fill = "Structures") + + theme_urbn_map() +``` + +### Analyzing structures in a disaster area + +A common use case is estimating structures within a hazard-affected area. This example shows how to combine structure data with wildfire burn zones. + +```{r disaster-example, eval = FALSE} +# Get a specific wildfire's burn zone +burn_zones <- get_wildfire_burn_zones() + +camp_fire <- burn_zones |> + filter(str_detect(wildfire_name, "CAMP")) + +# Get structures within the burn zone +camp_fire_structures <- get_structures( + + boundaries = camp_fire, + geography = "tract", + keep_structures = TRUE) + +# Summarized counts +camp_fire_structures$structures_summarized |> + summarize( + .by = occupancy_class, + total = sum(count, na.rm = TRUE)) |> + arrange(desc(total)) +``` + +### Working with raw structure data + +Setting `keep_structures = TRUE` returns both the summarized data and the raw point-level structure data. + +```{r raw-structures, eval = FALSE} +dc_full <- get_structures( + boundaries = dc_boundary, + geography = "county", + keep_structures = TRUE) + +# Access the raw point data +raw_structures <- dc_full$structures_raw + +# Access the summarized data +summary_structures <- dc_full$structures_summarized + +# Map individual structures +ggplot() + + geom_sf(data = dc_boundary, fill = "grey95") + + geom_sf( + data = raw_structures |> filter(primary_occupancy == "Commercial"), + size = 0.1, + alpha = 0.5) + + labs(title = "Commercial Structures in Washington, DC") + + theme_urbn_map() +``` + +### County-level analysis + +For larger areas, county-level aggregation provides a useful summary. + +```{r county-example, eval = FALSE} +# Get structures for multiple states +southeast_boundary <- tigris::states(cb = TRUE) |> + filter(STUSPS %in% c("FL", "GA", "AL", "SC")) + +southeast_structures <- get_structures( + boundaries = southeast_boundary, + geography = "county") + +# Summarize by county +county_totals <- southeast_structures |> + summarize( + .by = GEOID, + total_structures = sum(count, na.rm = TRUE)) +``` + +## Performance considerations + +The raw building footprint data files are large. Processing can be slow, especially for multi-state analyses. Consider: + +- Starting with a small geographic area to test your workflow +- Using county-level aggregation when tract-level detail is not required +- Caching results for repeated analyses + +## See also + +- `get_wildfire_burn_zones()`: Wildfire burn zone disasters for use as boundaries +- `get_current_fire_perimeters()`: Active wildfire perimeters for use as boundaries +- `get_fema_disaster_declarations()`: FEMA disaster declarations diff --git a/vignettes/get_wildfire_burn_zones.Rmd b/vignettes/get_wildfire_burn_zones.Rmd index 0d5bdd4..42a8133 100644 --- a/vignettes/get_wildfire_burn_zones.Rmd +++ b/vignettes/get_wildfire_burn_zones.Rmd @@ -1,215 +1,236 @@ ---- -title: "Wildfire Burn Zones" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Wildfire Burn Zones} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>", - warning = FALSE, - message = FALSE, - fig.width = 8, - fig.height = 6, - dpi = 150 -) -``` - -## Overview - -The `get_wildfire_burn_zones()` function provides access to a harmonized dataset of wildfire burn zone disasters in the United States from 2000-2025. - -## Data sources and methodology - -This dataset combines six authoritative wildfire data sources to identify wildfires that burned near communities and resulted in civilian fatalities, destroyed structures, or received federal disaster relief. These sources are: - -- **FIRED (Fire Event Delineation)**: Satellite-derived fire perimeters -- **MTBS (Monitoring Trends in Burn Severity)**: Burn severity and perimeters for large fires -- **NIFC (National Interagency Fire Center)**: Official fire incident data -- **ICS-209 (Incident Status Summary)**: Incident management records -- **RedBook**: Historical wildfire statistics -- **FEMA**: Federal disaster declarations - -Wildfires are classified as "disasters" if they: - -1. Burned near a community AND -2. Resulted in at least one of: - - Civilian fatality - - Destroyed structure - - Federal disaster relief - -These data are described and provided in association with the journal article: Wilner, L.B., Piepmeier, L., Gordon, M. et al. Two and a half decades of United States wildfire burn zone disaster data, 2000-2025. Sci Data 12, 1948 (2025). https://doi.org/10.1038/s41597-025-06226-8. - -## Loading the data - -```{r setup} -library(climateapi) -library(tidyverse) -library(sf) -library(urbnthemes) - -set_urbn_defaults(style = "print") -``` - -```{r load-data, eval = FALSE} -burn_zones <- get_wildfire_burn_zones() -``` - -```{r load-data-hidden, echo = FALSE} -# For vignette building, we'll create example output structure -# In practice, users would run the above code -burn_zones <- get_wildfire_burn_zones() -``` - -## Data structure - -Each row in the dataset represents a single wildfire burn zone disaster. - -```{r structure} -glimpse(burn_zones) -``` - -Key variables include: - -- `wildfire_id`: Unique identifier for each wildfire event -- `year`: Year the wildfire occurred (2000-2025) -- `wildfire_name`: Name of the wildfire or fire complex -- `county_fips`: Pipe-delimited string of county FIPS codes for all affected counties -- `county_name`: Pipe-delimited string of county names for all affected counties -- `area_sq_km`: Burned area in square kilometers -- `fatalities_total` and `injuries_total`: Human impacts -- `structures_destroyed` and `structures_threatened`: Built environment impacts -- `geometry`: Burn zone polygon boundaries - -## Example analyses - -### Annual trends in wildfire disasters - -```{r annual-trends} -# Extract state FIPS from the first county in the pipe-delimited list -df1 = burn_zones |> - st_drop_geometry() |> - mutate(state_fips = str_sub(county_fips, 1, 2)) |> - summarize( - .by = c(year, state_fips), - n_wildfires = n(), - total_area_sq_km = sum(area_sq_km, na.rm = TRUE), - total_structures_destroyed = sum(structures_destroyed, na.rm = TRUE)) |> - left_join( - tigris::fips_codes %>% distinct(state, state_code), - by = c("state_fips" = "state_code")) - -top_five_states = df1 %>% - arrange(desc(n_wildfires)) %>% - distinct(state) %>% - slice(1:5) - -df1 %>% - filter(state %in% top_five_states$state) %>% - mutate(state = factor(state, levels = top_five_states %>% pull(state), ordered = TRUE)) %>% - ggplot(aes(x = year, y = n_wildfires)) + - geom_col() + - labs( - title = "Many states frequently experience more than 50 wildfires per year", - subtitle = "Disasters defined as wildfires causing fatalities, structure loss, or federal relief", - x = "", - y = "Number of wildfires") + - facet_wrap(~state) -``` - -### Geographic distribution of impacts - -```{r state-impacts} -state_impacts <- burn_zones |> - st_drop_geometry() |> - mutate(state_fips = str_sub(county_fips, 1, 2)) |> - summarize( - .by = state_fips, - n_wildfires = n_distinct(wildfire_id), - total_structures_destroyed = sum(structures_destroyed, na.rm = TRUE), - total_fatalities = sum(fatalities_total, na.rm = TRUE) - ) |> - left_join( - tidycensus::fips_codes |> - distinct(state_code, state_name), - by = c("state_fips" = "state_code") - ) |> - filter(!is.na(state_name)) - -state_impacts |> - slice_max(n_wildfires, n = 10) |> - mutate(state_name = fct_reorder(state_name, n_wildfires)) |> - ggplot(aes(y = state_name, x = n_wildfires)) + - geom_col() + - labs( - title = "States with Most Wildfire Burn Zone Disasters (2000-2025)", - x = "Number of distinct wildfires", - y = "" - ) -``` - -### Mapping wildfire burn zones - -```{r map-example, fig.height = 8} -# Get wildfires from a recent year in California -ca_2020_fires <- burn_zones |> - filter(str_detect(county_fips, "^06"), year == 2020) - -# Get California counties for context -ca_counties <- tigris::counties(state = "CA", cb = TRUE, year = 2022, progress_bar = FALSE) |> - st_transform(5070) - -ggplot() + - geom_sf(data = ca_counties, fill = "grey95", color = "grey70") + - geom_sf(data = ca_2020_fires, aes(fill = structures_destroyed), alpha = 0.8) + - scale_fill_continuous(trans = "reverse") + - labs( - title = "California Wildfire Burn Zone Disasters (2020)", - subtitle = "Burn zone perimeters colored by structures destroyed", - fill = "Structures\ndestroyed") + - theme_urbn_map() -``` - -### Analyzing structure loss severity - -```{r severity-analysis} -burn_zones |> - st_drop_geometry() |> - distinct(wildfire_id, year, wildfire_name, structures_destroyed) |> - filter(!is.na(structures_destroyed), structures_destroyed > 0) |> - mutate( - severity = case_when( - structures_destroyed >= 1000 ~ "1,000+ structures", - structures_destroyed >= 100 ~ "100-999", - structures_destroyed >= 10 ~ "10-99", - TRUE ~ "1-9 structures"), - severity = factor( - severity, - levels = c("1-9 structures", "10-99", "100-999", "1,000+ structures"))) |> - count(year, severity) |> - mutate( - .by = year, - percent = n / sum(n, na.rm = TRUE)) |> - ggplot(aes(x = year, y = percent, fill = severity)) + - geom_col() + - scale_fill_manual(values = c( - "1-9 structures" = palette_urbn_cyan[1], - "10-99" = palette_urbn_cyan[3], - "100-999" = palette_urbn_cyan[5], - "1,000+ structures" = palette_urbn_cyan[7])) + - scale_y_continuous(labels = scales::percent) + - labs( - title = "Most wildifres destroy under 10 structures", - x = "", - y = "") -``` - -## See also - -- `get_current_fire_perimeters()`: Access current/active wildfire perimeters -- `get_fema_disaster_declarations()`: FEMA disaster declarations including fire-related declarations -- `get_structures()`: Estimate structures within geographic boundaries +--- +title: "Wildfire Burn Zones" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Wildfire Burn Zones} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + + + +## Overview + +The `get_wildfire_burn_zones()` function provides access to a harmonized dataset of wildfire burn zone disasters in the United States from 2000-2025. + +## Data sources and methodology + +This dataset combines six authoritative wildfire data sources to identify wildfires that burned near communities and resulted in civilian fatalities, destroyed structures, or received federal disaster relief. These sources are: + +- **FIRED (Fire Event Delineation)**: Satellite-derived fire perimeters +- **MTBS (Monitoring Trends in Burn Severity)**: Burn severity and perimeters for large fires +- **NIFC (National Interagency Fire Center)**: Official fire incident data +- **ICS-209 (Incident Status Summary)**: Incident management records +- **RedBook**: Historical wildfire statistics +- **FEMA**: Federal disaster declarations + +Wildfires are classified as "disasters" if they: + +1. Burned near a community AND +2. Resulted in at least one of: + - Civilian fatality + - Destroyed structure + - Federal disaster relief + +These data are described and provided in association with the journal article: Wilner, L.B., Piepmeier, L., Gordon, M. et al. Two and a half decades of United States wildfire burn zone disaster data, 2000-2025. Sci Data 12, 1948 (2025). https://doi.org/10.1038/s41597-025-06226-8. + +## Loading the data + + +``` r +library(climateapi) +library(tidyverse) +library(sf) +library(urbnthemes) + +set_urbn_defaults(style = "print") +``` + + +``` r +burn_zones <- get_wildfire_burn_zones() +``` + + + +## Data structure + +Each row in the dataset represents a single wildfire burn zone disaster. + + +``` r +glimpse(burn_zones) +#> Rows: 6,911 +#> Columns: 18 +#> $ wildfire_id 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,… +#> $ id_fema NA, "FM-5491-OK", NA, NA, "FM-5178-FL", NA, NA, "FM-5086-AZ", NA, "FM-5319-NV", NA, NA, NA, NA, NA, "FM-5445-CA", NA, NA, … +#> $ year 2018, 2024, 2017, 2014, 2017, 2019, 2016, 2015, 2017, 2020, 2020, 2014, 2017, 2016, 2022, 2022, 2022, 2020, 2022, 2020, 20… +#> $ wildfire_name "DONNELL", "57", "GARFIELD RD", "TYONEK", "30TH AVE", "G18", "WILLARD", "KEARNY RIV", "TURTLE", "NUMBERS", "LOYALTON", "FU… +#> $ county_fips "06003|06109", "40153", "12089", "02122", "12021", "08021", "06035", "04021", "30087", "32005", "06035|06063|06091", "0212… +#> $ county_name "ALPINE|TUOLUMNE", "WOODWARD", "NASSAU", "KENAI PENINSULA", "COLLIER", "CONEJOS", "LASSEN", "PINAL", "ROSEBUD", "DOUGLAS",… +#> $ area_sq_km 146.200893, 19.105698, 2.921101, 6.714350, 26.204585, 9.151442, 11.433032, 6.250412, 6.634805, 75.963891, 183.863534, 779.… +#> $ wildfire_complex_binary FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL… +#> $ date_start 2018-08-01, 2024-04-06, 2017-03-22, 2014-05-19, 2017-04-20, 2019-10-27, 2016-09-11, 2015-06-17, 2017-07-16, 2020-07-06, 2… +#> $ date_containment 2018-10-31, 2024-04-12, NA, NA, 2017-06-05, NA, 2016-10-12, 2015-06-27, NA, 2020-07-11, 2020-08-30, NA, 2017-07-18, NA, N… +#> $ fatalities_total NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,… +#> $ injuries_total 6, 2, NA, NA, 1, NA, NA, NA, NA, 2, NA, 4, NA, NA, NA, NA, NA, NA, 1, 2, 8, 20, NA, 5, 1, 2, 3, 2, NA, 6, NA, 1, NA, NA, N… +#> $ structures_destroyed 135, 1, 19, 5, 4, 4, 7, 3, 2, 40, 35, 4, 14, 1, 3, 194, 20, 9, 10, 1, 4, 247, 4, 24, 2, 20, 185, 30, 30, 4, NA, 1, 1, 5, 6… +#> $ structures_threatened NA, 1720, NA, 0, 0, NA, NA, 50, 0, NA, NA, 0, 0, NA, NA, NA, 127, NA, 3, 0, 0, 0, 200, 0, 10, NA, NA, NA, 200, NA, NA, 16,… +#> $ evacuation_total NA, NA, NA, NA, 7000, 50, NA, NA, NA, 50, 0, NA, NA, NA, NA, NA, NA, 50, NA, NA, 30, 236, 411, 5122, 1200, 200, NA, 621, N… +#> $ wui_type NA, NA, "intermix", NA, "intermix", "intermix", NA, "interface|intermix", "intermix", "intermix", "intermix", NA, "intermi… +#> $ density_people_sq_km_wildfire_buffer 0.0743354479, 4.1285729324, 5.4731167321, 0.0562439821, 116.3346681161, 2.7371345095, 8.8706970226, 1.6293691415, 0.536265… +#> $ geometry POLYGON ((-2033577 1961695,..., POLYGON ((-302378.6 1470132..., POLYGON ((1335180 915158.5,..., POLYGON ((-316073… +``` + +Key variables include: + +- `wildfire_id`: Unique identifier for each wildfire event +- `year`: Year the wildfire occurred (2000-2025) +- `wildfire_name`: Name of the wildfire or fire complex +- `county_fips`: Pipe-delimited string of county FIPS codes for all affected counties +- `county_name`: Pipe-delimited string of county names for all affected counties +- `area_sq_km`: Burned area in square kilometers +- `fatalities_total` and `injuries_total`: Human impacts +- `structures_destroyed` and `structures_threatened`: Built environment impacts +- `geometry`: Burn zone polygon boundaries + +## Example analyses + +### Annual trends in wildfire disasters + + +``` r +# Extract state FIPS from the first county in the pipe-delimited list +df1 = burn_zones |> + st_drop_geometry() |> + mutate(state_fips = str_sub(county_fips, 1, 2)) |> + summarize( + .by = c(year, state_fips), + n_wildfires = n(), + total_area_sq_km = sum(area_sq_km, na.rm = TRUE), + total_structures_destroyed = sum(structures_destroyed, na.rm = TRUE)) |> + left_join( + tigris::fips_codes %>% distinct(state, state_code), + by = c("state_fips" = "state_code")) + +top_five_states = df1 %>% + arrange(desc(n_wildfires)) %>% + distinct(state) %>% + slice(1:5) + +df1 %>% + filter(state %in% top_five_states$state) %>% + mutate(state = factor(state, levels = top_five_states %>% pull(state), ordered = TRUE)) %>% + ggplot(aes(x = year, y = n_wildfires)) + + geom_col() + + labs( + title = "Many states frequently experience more than 50 wildfires per year", + subtitle = "Disasters defined as wildfires causing fatalities, structure loss, or federal relief", + x = "", + y = "Number of wildfires") + + facet_wrap(~state) +``` + +![plot of chunk annual-trends](figure/get_wildfire_burn_zones-annual-trends-1.png) + +### Geographic distribution of impacts + + +``` r +state_impacts <- burn_zones |> + st_drop_geometry() |> + mutate(state_fips = str_sub(county_fips, 1, 2)) |> + summarize( + .by = state_fips, + n_wildfires = n_distinct(wildfire_id), + total_structures_destroyed = sum(structures_destroyed, na.rm = TRUE), + total_fatalities = sum(fatalities_total, na.rm = TRUE) + ) |> + left_join( + tidycensus::fips_codes |> + distinct(state_code, state_name), + by = c("state_fips" = "state_code") + ) |> + filter(!is.na(state_name)) + +state_impacts |> + slice_max(n_wildfires, n = 10) |> + mutate(state_name = fct_reorder(state_name, n_wildfires)) |> + ggplot(aes(y = state_name, x = n_wildfires)) + + geom_col() + + labs( + title = "States with Most Wildfire Burn Zone Disasters (2000-2025)", + x = "Number of distinct wildfires", + y = "" + ) +``` + +![plot of chunk state-impacts](figure/get_wildfire_burn_zones-state-impacts-1.png) + +### Mapping wildfire burn zones + + +``` r +# Get wildfires from a recent year in California +ca_2020_fires <- burn_zones |> + filter(str_detect(county_fips, "^06"), year == 2020) + +# Get California counties for context +ca_counties <- tigris::counties(state = "CA", cb = TRUE, year = 2022, progress_bar = FALSE) |> + st_transform(5070) + +ggplot() + + geom_sf(data = ca_counties, fill = "grey95", color = "grey70") + + geom_sf(data = ca_2020_fires, aes(fill = structures_destroyed), alpha = 0.8) + + scale_fill_continuous(trans = "reverse") + + labs( + title = "California Wildfire Burn Zone Disasters (2020)", + subtitle = "Burn zone perimeters colored by structures destroyed", + fill = "Structures\ndestroyed") + + theme_urbn_map() +``` + +![plot of chunk map-example](figure/get_wildfire_burn_zones-map-example-1.png) + +### Analyzing structure loss severity + + +``` r +burn_zones |> + st_drop_geometry() |> + distinct(wildfire_id, year, wildfire_name, structures_destroyed) |> + filter(!is.na(structures_destroyed), structures_destroyed > 0) |> + mutate( + severity = case_when( + structures_destroyed >= 1000 ~ "1,000+ structures", + structures_destroyed >= 100 ~ "100-999", + structures_destroyed >= 10 ~ "10-99", + TRUE ~ "1-9 structures"), + severity = factor( + severity, + levels = c("1-9 structures", "10-99", "100-999", "1,000+ structures"))) |> + count(year, severity) |> + mutate( + .by = year, + percent = n / sum(n, na.rm = TRUE)) |> + ggplot(aes(x = year, y = percent, fill = severity)) + + geom_col() + + scale_fill_manual(values = c( + "1-9 structures" = palette_urbn_cyan[1], + "10-99" = palette_urbn_cyan[3], + "100-999" = palette_urbn_cyan[5], + "1,000+ structures" = palette_urbn_cyan[7])) + + scale_y_continuous(labels = scales::percent) + + labs( + title = "Most wildifres destroy under 10 structures", + x = "", + y = "") +``` + +![plot of chunk severity-analysis](figure/get_wildfire_burn_zones-severity-analysis-1.png) + +## See also + +- `get_current_fire_perimeters()`: Access current/active wildfire perimeters +- `get_fema_disaster_declarations()`: FEMA disaster declarations including fire-related declarations +- `get_structures()`: Estimate structures within geographic boundaries diff --git a/vignettes/get_wildfire_burn_zones.Rmd.orig b/vignettes/get_wildfire_burn_zones.Rmd.orig new file mode 100644 index 0000000..0d5bdd4 --- /dev/null +++ b/vignettes/get_wildfire_burn_zones.Rmd.orig @@ -0,0 +1,215 @@ +--- +title: "Wildfire Burn Zones" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Wildfire Burn Zones} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + warning = FALSE, + message = FALSE, + fig.width = 8, + fig.height = 6, + dpi = 150 +) +``` + +## Overview + +The `get_wildfire_burn_zones()` function provides access to a harmonized dataset of wildfire burn zone disasters in the United States from 2000-2025. + +## Data sources and methodology + +This dataset combines six authoritative wildfire data sources to identify wildfires that burned near communities and resulted in civilian fatalities, destroyed structures, or received federal disaster relief. These sources are: + +- **FIRED (Fire Event Delineation)**: Satellite-derived fire perimeters +- **MTBS (Monitoring Trends in Burn Severity)**: Burn severity and perimeters for large fires +- **NIFC (National Interagency Fire Center)**: Official fire incident data +- **ICS-209 (Incident Status Summary)**: Incident management records +- **RedBook**: Historical wildfire statistics +- **FEMA**: Federal disaster declarations + +Wildfires are classified as "disasters" if they: + +1. Burned near a community AND +2. Resulted in at least one of: + - Civilian fatality + - Destroyed structure + - Federal disaster relief + +These data are described and provided in association with the journal article: Wilner, L.B., Piepmeier, L., Gordon, M. et al. Two and a half decades of United States wildfire burn zone disaster data, 2000-2025. Sci Data 12, 1948 (2025). https://doi.org/10.1038/s41597-025-06226-8. + +## Loading the data + +```{r setup} +library(climateapi) +library(tidyverse) +library(sf) +library(urbnthemes) + +set_urbn_defaults(style = "print") +``` + +```{r load-data, eval = FALSE} +burn_zones <- get_wildfire_burn_zones() +``` + +```{r load-data-hidden, echo = FALSE} +# For vignette building, we'll create example output structure +# In practice, users would run the above code +burn_zones <- get_wildfire_burn_zones() +``` + +## Data structure + +Each row in the dataset represents a single wildfire burn zone disaster. + +```{r structure} +glimpse(burn_zones) +``` + +Key variables include: + +- `wildfire_id`: Unique identifier for each wildfire event +- `year`: Year the wildfire occurred (2000-2025) +- `wildfire_name`: Name of the wildfire or fire complex +- `county_fips`: Pipe-delimited string of county FIPS codes for all affected counties +- `county_name`: Pipe-delimited string of county names for all affected counties +- `area_sq_km`: Burned area in square kilometers +- `fatalities_total` and `injuries_total`: Human impacts +- `structures_destroyed` and `structures_threatened`: Built environment impacts +- `geometry`: Burn zone polygon boundaries + +## Example analyses + +### Annual trends in wildfire disasters + +```{r annual-trends} +# Extract state FIPS from the first county in the pipe-delimited list +df1 = burn_zones |> + st_drop_geometry() |> + mutate(state_fips = str_sub(county_fips, 1, 2)) |> + summarize( + .by = c(year, state_fips), + n_wildfires = n(), + total_area_sq_km = sum(area_sq_km, na.rm = TRUE), + total_structures_destroyed = sum(structures_destroyed, na.rm = TRUE)) |> + left_join( + tigris::fips_codes %>% distinct(state, state_code), + by = c("state_fips" = "state_code")) + +top_five_states = df1 %>% + arrange(desc(n_wildfires)) %>% + distinct(state) %>% + slice(1:5) + +df1 %>% + filter(state %in% top_five_states$state) %>% + mutate(state = factor(state, levels = top_five_states %>% pull(state), ordered = TRUE)) %>% + ggplot(aes(x = year, y = n_wildfires)) + + geom_col() + + labs( + title = "Many states frequently experience more than 50 wildfires per year", + subtitle = "Disasters defined as wildfires causing fatalities, structure loss, or federal relief", + x = "", + y = "Number of wildfires") + + facet_wrap(~state) +``` + +### Geographic distribution of impacts + +```{r state-impacts} +state_impacts <- burn_zones |> + st_drop_geometry() |> + mutate(state_fips = str_sub(county_fips, 1, 2)) |> + summarize( + .by = state_fips, + n_wildfires = n_distinct(wildfire_id), + total_structures_destroyed = sum(structures_destroyed, na.rm = TRUE), + total_fatalities = sum(fatalities_total, na.rm = TRUE) + ) |> + left_join( + tidycensus::fips_codes |> + distinct(state_code, state_name), + by = c("state_fips" = "state_code") + ) |> + filter(!is.na(state_name)) + +state_impacts |> + slice_max(n_wildfires, n = 10) |> + mutate(state_name = fct_reorder(state_name, n_wildfires)) |> + ggplot(aes(y = state_name, x = n_wildfires)) + + geom_col() + + labs( + title = "States with Most Wildfire Burn Zone Disasters (2000-2025)", + x = "Number of distinct wildfires", + y = "" + ) +``` + +### Mapping wildfire burn zones + +```{r map-example, fig.height = 8} +# Get wildfires from a recent year in California +ca_2020_fires <- burn_zones |> + filter(str_detect(county_fips, "^06"), year == 2020) + +# Get California counties for context +ca_counties <- tigris::counties(state = "CA", cb = TRUE, year = 2022, progress_bar = FALSE) |> + st_transform(5070) + +ggplot() + + geom_sf(data = ca_counties, fill = "grey95", color = "grey70") + + geom_sf(data = ca_2020_fires, aes(fill = structures_destroyed), alpha = 0.8) + + scale_fill_continuous(trans = "reverse") + + labs( + title = "California Wildfire Burn Zone Disasters (2020)", + subtitle = "Burn zone perimeters colored by structures destroyed", + fill = "Structures\ndestroyed") + + theme_urbn_map() +``` + +### Analyzing structure loss severity + +```{r severity-analysis} +burn_zones |> + st_drop_geometry() |> + distinct(wildfire_id, year, wildfire_name, structures_destroyed) |> + filter(!is.na(structures_destroyed), structures_destroyed > 0) |> + mutate( + severity = case_when( + structures_destroyed >= 1000 ~ "1,000+ structures", + structures_destroyed >= 100 ~ "100-999", + structures_destroyed >= 10 ~ "10-99", + TRUE ~ "1-9 structures"), + severity = factor( + severity, + levels = c("1-9 structures", "10-99", "100-999", "1,000+ structures"))) |> + count(year, severity) |> + mutate( + .by = year, + percent = n / sum(n, na.rm = TRUE)) |> + ggplot(aes(x = year, y = percent, fill = severity)) + + geom_col() + + scale_fill_manual(values = c( + "1-9 structures" = palette_urbn_cyan[1], + "10-99" = palette_urbn_cyan[3], + "100-999" = palette_urbn_cyan[5], + "1,000+ structures" = palette_urbn_cyan[7])) + + scale_y_continuous(labels = scales::percent) + + labs( + title = "Most wildifres destroy under 10 structures", + x = "", + y = "") +``` + +## See also + +- `get_current_fire_perimeters()`: Access current/active wildfire perimeters +- `get_fema_disaster_declarations()`: FEMA disaster declarations including fire-related declarations +- `get_structures()`: Estimate structures within geographic boundaries