nps_water_vulnerability/rapid_data_processing.Rmd at main · rossyndicate/nps_water_vulnerability · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
---
title: "rapid_data_processing"
output: html_document
---

# Pre-processing datasets for Rapid workflow

This workflow includes importing and organizing of datasets for the rapid workflow to allow for quick re-runs of the rapid reports and a cleaner workflow.

```{r}
source("setup.R")

parks <- getParkSystem()

park_centroids <- parks %>%
  st_centroid() %>%
  st_transform(4326) %>%
  dplyr::mutate(y = st_coordinates(.)[,2], x = st_coordinates(.)[,1])

# water supplies
table_path <- "data/Water_Supply_Systems/NPS_Water_Systems_Database_Joined.xlsx"


water_supplies <- read_excel(table_path,
                             sheet = 1,
                             na = c("NA", "U"),
                             skip = 1) %>%
  dplyr::mutate(
    source_longitude = as.numeric(source_longitude),
    source_latitude = as.numeric(source_latitude)
  ) %>%
  filter(!state %in% c("AK", "AS", "HI", "MP", "PR", "VI")) %>%
  left_join(
    park_centroids %>%
      st_drop_geometry() %>%
      dplyr::select(UNIT_CODE, x, y),
    by = c(park_unit = "UNIT_CODE")
  ) %>% dplyr::mutate(
    coords_flag = ifelse(
      is.na(source_longitude) |
        is.na(source_latitude),
      "Centroid",
      "Source Coordinates"
    ),
    source_longitude = ifelse(is.na(source_longitude), x, source_longitude),
    source_latitude = ifelse(is.na(source_latitude), y, source_latitude)
  ) %>%
  # remove municipal and hauled source types, accounting for database entry errors
  filter(!str_detect(source_type, regex("municipal|hauled", ignore_case = TRUE))) %>%
  drop_na(source_longitude, source_latitude) %>%
  st_as_sf(coords = (c("source_longitude", "source_latitude")),
           crs = 4326,
           remove = FALSE)

```

# Create File Structure

Create folders if they don't already exist

```{r}
if(!dir.exists("data/rapid")) {
  dir.create("data/rapid")
}

# folder for park-level data
if(!dir.exists("data/rapid/park")) {
  dir.create("data/rapid/park")
}

# folders for each park code
walk(sort(parks$UNIT_CODE), function(x){
  if(!dir.exists(paste0("data/rapid/park", x))) {
    dir.create(paste0("data/rapid/park", x))
  }
})

# folder for conus or US wide data
if(!dir.exists("data/rapid/all")) {
  dir.create("data/rapid/all")
}

# folder for indicators input datasets
if(!dir.exists("data/rapid/inputs")) {
  dir.create("data/rapid/inputs")
}

# folder for indicator output datasets
if(!dir.exists("data/rapid/outputs")) {
  dir.create("data/rapid/outputs")
}

# folder for coastal component input datasets
if(!dir.exists("data/rapid/inputs/coast_comp")) {
  dir.create("data/rapid/inputs/coast_comp")
}


```

# Fire Probability Data

Combine all Gao et al. files and save final file to feed into `calc_fire_exposure()`

```{r}
# read in input datasets
pyromes <- st_read("data/all/usfs_fire/Pyromes-RDS-2020-0020/Data/Pyromes_CONUS_20200206.shp") %>%
  st_make_valid() %>%
  sf::st_transform(4326)


# GAO 2021 FIRE PROBABILITY
crosswalk <- read_csv("data/all/usfs_fire/fire_projections_gao_2021/gcm_crosswalk.csv")

# Back-casted GCMs, compared against GridMet. Really just need GridMet.
fireprob_historic <- read_csv("data/all/usfs_fire/fire_projections_gao_2021/tables3-fireprob-hist.csv")

# Midcentury is defined as the avg. across 2040-2070:
delta_fireprob <- read_csv("data/all/usfs_fire/fire_projections_gao_2021/tables12-45-midcentury.csv") %>%
  pivot_longer(-PYROME, names_to = "gao") %>%
  left_join(crosswalk, by = "gao") %>%
  mutate(GCM = paste0(GCM, ".rcp45")) %>%
  # do same thing on the 85 CFs, then bind together:
  bind_rows(
    read_csv("data/all/usfs_fire/fire_projections_gao_2021/tables6-85-midcentury.csv") %>%
      tidyr::pivot_longer(-PYROME, names_to = "gao", values_to = "value") %>%
      left_join(crosswalk, by = "gao") %>%
      mutate(GCM = paste0(GCM, ".rcp85"))
  ) %>%
  # join historic data by pyrome:
  left_join(fireprob_historic, by = "PYROME") %>%
  # Description: "The probability of a fire increases/decreases by `delta_fp` percent."
  #mutate(delta_fp = 100 * ((value - GridMet)/GridMet))
  # new method, pulling each model's specific hind cast value instead of Gridmet
  rowwise() %>%
  # No historic value for NorESM1 so using ensemble mean for that one
  mutate(delta_fp = 100 * ((value - get(ifelse(gao == "NorESM1", "ensemblemean", gao)))/get(ifelse(gao == "NorESM1", "ensemblemean", gao)))) %>%
  ungroup()

# Pull 10th, 50th and 90th percentile values for each pyrome
pyromes_selected <- delta_fireprob %>%
  group_by(PYROME) %>%
  slice(c(which.min(abs(delta_fp - quantile(delta_fp, 0.1))),
          which.min(abs(delta_fp - quantile(delta_fp, 0.5))),
          which.min(abs(delta_fp - quantile(delta_fp, 0.9))))) %>%
  mutate(percentile = case_when(
    delta_fp == delta_fp[which.min(abs(delta_fp - quantile(delta_fp, 0.1)))] ~ "p10",
    delta_fp == delta_fp[which.min(abs(delta_fp - quantile(delta_fp, 0.5)))] ~ "p50",
    delta_fp == delta_fp[which.min(abs(delta_fp - quantile(delta_fp, 0.9)))] ~ "p90"
  )) %>%
  ungroup() %>%
  pivot_wider(
    names_from = percentile,
    values_from = c(gao, value, GCM, GridMet, delta_fp),
    names_sep = "_"
  )

```

Save files

```{r}
write_csv(pyromes_selected, "data/rapid/inputs/pyromes_selected.csv")

st_write(pyromes, "data/rapid/inputs/pyromes.gpkg", append = FALSE)
```

# Process LOCA2 WBM Files

### Runoff

```{r}
runoff_data <- list.files("data/all/loca2_mwbm/", full.names = TRUE, pattern = "*runoff")

# Process all NetCDF files using helper function
walk(runoff_data, ~process_loca2_mwbm(.x,
                                      output_dir = "data/rapid/inputs/loca2_runoff_water_supplies/",
                                      water_supplies = water_supplies %>% select(park_unit, wsd_source_id),
                                      var = "runoff",
                                      historical = c("1980-01-01", "2024-12-31"),
                                      future = c("2035-01-01", "2065-12-31")))

# took 24 minutes
```

### Precipitation

```{r}
precip_data <- list.files("data/all/loca2_mwbm/", full.names = TRUE, pattern = "pr_|precip|pr.")

# Process all NetCDF files using helper function
walk(precip_data, ~process_loca2_mwbm(.x,
                                      output_dir = "data/rapid/inputs/loca2_precipitation_water_supplies/",
                                      water_supplies = water_supplies %>% select(park_unit, wsd_source_id),
                                      var = "pr",
                                      historical = c("1980-01-01", "2024-12-31"),
                                      future = c("2035-01-01", "2065-12-31")))

# took 57 minutes
```

# Process Inundation Data Files

```{r}
inundation_data <- process_inundation_data(coast_dir_path = here("data", "all", "coast_comp_data"),
                                           water_supplies = water_supplies,
                                           output_dir = here("data", "rapid", "inputs", "coast_comp"))
# took under 5 minutes
```

# CUSP Data

Combine all NOAA CUSP shoreline tiles into a single GeoPackage for use by `calc_inundation_sensitivity()`.

```{r}
process_CUSP_data(
  cusp_dir_path = here("data", "all", "coast_comp_data", "NOAA_CUSP"),
  output_dir    = here("data", "rapid", "inputs", "coast_comp")
)
# took 2 minutes
```