MackenzieConnectivity/4_trendAnalysis.Rmd at main · GlobalHydrologyLab/MackenzieConnectivity · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
---
title: "4_trendAnalysis"
output: html_document
date: "2023-03-13"
editor_options:
  chunk_output_type: console
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

# Libraries
```{r}
library(tidyverse)
library(sf)
library(lubridate)
library(grDevices)
library(mapview)
library(extrafont)
library(ggpubr)
library(ggmap)
library(RgoogleMaps)
library(broom)
library(feather)
library(tidyhydat)
library(sp)
library(data.table)
library(ggalluvial)
library(patchwork)
library(magick)
library(units)
library(Kendall)
library(ggspatial)
library(dtplyr)
#Import libraries for Random Forest
library(caret)
library(e1071)
library(Boruta)
library(tidymodels)
library(skimr)
library(vip)
```

# Import files / set constants
```{r}
# dates for version control
todayDate  = "20230324" # the first data join phase

# Names of files and folders for reflectance data
import.filePath = "C:/Users/whyana/OneDrive/DocumentsLaptop/001_GraduateSchool/Research/Connectivity/Mackenzie/Data/GEE Downloads"


# intermediate working directory
int.wd="C:/Users/whyana/OneDrive/DocumentsLaptop/001_GraduateSchool/Research/Connectivity/Mackenzie/Data/intermediaryDownloads"
refl.import = 'srCorrected_mackLakes_202303138.feather'

#Name of file and folder for lake shapefiles & island polygon shapefiles
shapeFiles.filePath = "C:/Users/whyana/OneDrive/DocumentsLaptop/001_GraduateSchool/Research/Connectivity/Mackenzie/Data/shapeFiles"
lakes.shapeFile = "mackenzieGoodLakes.shp"
setwd(shapeFiles.filePath)
lakes.sf = st_read(lakes.shapeFile)
import.sword = "na_sword_reaches_hb82_v14.shp"

images.wd = "C:/Users/whyana/OneDrive/DocumentsLaptop/001_GraduateSchool/Research/Connectivity/Mackenzie/images"
```

# Import river centerlines and set the projection for all future plots, import classifications
```{r}
crs.plot = "+proj=tcea +lon_0=-134.3847656 +datum=WGS84 +units=m +no_defs"
setwd(shapeFiles.filePath)

study.area.large=cbind.data.frame(lon=c(-136.80, -136.80, -133.47, -133.47),
                 lat=c(67.25, 69.55, 69.55, 67.46)) %>%
  st_as_sf(coords=c("lon", "lat")) %>% st_set_crs(4326) %>%
  st_bbox() %>% st_as_sfc() %>%
  st_transform(crs = crs.plot)

mack.basin.large = st_read(import.sword) %>%
  st_transform(crs = crs.plot) %>%
  st_intersection(study.area.large) %>% dplyr::filter(width>90)

# import classifications
setwd(int.wd)
all.classified.filter = read_feather(paste0("final.class_", todayDate, ".feather"))
```

# Trend analysis for calibrated reflected
## Prep lake connectivity classifications for trend analysis
```{r}
# group lakes within two temporal groups and summarize mean annual connectivity in each month
results.summary.subgroups = all.classified.filter %>% lazy_dt() %>%
  dplyr::select(.pred_class, OBJECTID, year, month) %>%
  mutate(yeargroup = case_when(
    year>=1984 & year<=2002 ~ "1984-2002",
    year>=2003 & year<=2022 ~ "2003-2022"
  )) %>% filter(!is.na(yeargroup)) %>%
  group_by(OBJECTID, month, year, yeargroup)%>%
  summarise(class.mean = mean(as.numeric(as.character(.pred_class)), na.rm=T),
            count=n()) %>% ungroup()
# calculate average yearly connectivity for all years
results.summary.all = all.classified.filter %>% lazy_dt() %>%
  dplyr::select(.pred_class, OBJECTID, year, month) %>%
  mutate(yeargroup = "all") %>%
  group_by(OBJECTID, month, year, yeargroup)%>%
  summarise(class.mean = mean(as.numeric(as.character(.pred_class)), na.rm=T),
            count=n()) %>% ungroup()

# Combine the two dataframes together
results.summary = rbind.data.frame(results.summary.subgroups %>% as_tibble(),
                                   results.summary.all %>% as_tibble())

# group by time period, count number of years of data each lake has in each month in each period
good.ids = results.summary %>% group_by(OBJECTID, month, yeargroup) %>%count() %>% ungroup() %>%
  filter(n>=10)
# select only lakes that have at least 10 obs in all periods
best.ids = good.ids %>% group_by(OBJECTID, month) %>% count() %>% ungroup() %>% filter(n==3)

# Apply the best.ids filter, and group observations by lake, month, and yeargroup
nested.data = results.summary %>%
  left_join(best.ids, by=c("OBJECTID", "month")) %>%
  dplyr::filter(!is.na(n)) %>%
  group_by(OBJECTID, month, yeargroup) %>% nest() %>% ungroup() %>% as_tibble()

```

## Apply trend analysis calculations to each lake
```{r}
## for each lake, calculate the trend (tau) and pvalue
row.combo=NULL
for (i in 1:nrow(nested.data)){
  dat = nested.data$data[[i]] %>% arrange(year)
  OBJECTID = nested.data$OBJECTID[[i]]
  month = nested.data$month[[i]]
  yeargroup = nested.data$yeargroup[[i]]
  n.obs = nrow(dat)
  obs.count = dat %>% group_by(class.mean) %>% count() %>% ungroup() %>%
    mutate(all.obs = n.obs,
           pct = n/n.obs)
  if(isTRUE(obs.count$pct[obs.count$class.mean<=0.66]>=0.95)){
    class = "always less than 0.66"
    col.combo = cbind.data.frame(OBJECTID, month,yeargroup,class, pval=NA, S=NA, tau=NA)
    row.combo=rbind.data.frame(row.combo, col.combo)
  } else if(isTRUE(obs.count$pct[obs.count$class.mean>0.66 |obs.count$class.mean<=1.33]>=0.95)){
    class = "always 0.66-1.33"
    col.combo = cbind.data.frame(OBJECTID, month,yeargroup,class, pval=NA, S=NA, tau=NA)
    row.combo=rbind.data.frame(row.combo, col.combo)
  }else if(isTRUE(obs.count$pct[obs.count$class.mean>1.33]>=0.95)){
    class = "always >1.33"
    col.combo = cbind.data.frame(OBJECTID, month,yeargroup,class, pval=NA, S=NA, tau=NA)
    row.combo=rbind.data.frame(row.combo, col.combo)
  } else {
    class = "trendtest"
    test.obj=MannKendall(dat$class.mean)
    S=test.obj$S[[1]]
    tau = test.obj$tau
    pval = test.obj$sl
    col.combo = cbind.data.frame(OBJECTID, month,yeargroup, class, pval, S, tau)
    row.combo=rbind.data.frame(row.combo, col.combo)
  }
}

setwd(int.wd)
write_feather(row.combo, "raw_trend.feather")

row.combo = read_feather("raw_trend.feather")

## Format trend data results
trend.data=row.combo %>% as_tibble()%>%
  mutate(trend = case_when(
    tau>0 & pval < 0.05 ~ "increasing connectivity trend",
    tau<0 & pval < 0.05~ "decreasing connectivity trend",
    pval>0.05 ~ "no monotonic trend")) %>%
  left_join(lakes.sf, by="OBJECTID") %>% st_as_sf() %>%
   st_transform(crs = crs.plot)

write_feather(trend.data %>% as_tibble() %>% select(-geometry),
              paste0(todayDate, "Treds_cal.feather"))

```

## Analyze/plot trends in connectivity
### plots not included in manuscript
```{r}
# Print a summary of trend results
setwd(int.wd)
row.combo = read_feather("raw_trend.feather") %>% as.data.table()
trend.data = read_feather(paste0(todayDate, "Treds_cal.feather")) %>%
  left_join(lakes.sf, by="OBJECTID") %>% st_as_sf() %>%
  st_transform(crs.plot)

june.combo = row.combo[month==6,]

june.spread = dcast(june.combo, OBJECTID ~ yeargroup, value.var=c("class"))

june.spread %>% na.omit()

row.combo %>% as_tibble()%>%
  mutate(trend = case_when(
    tau>0 & pval < 0.05 ~ "increasing sig. connectivity trend",
    tau<0 & pval < 0.05~ "decreasing sig. connectivity trend",
    pval>0.05 ~ "no monotonic trend",
    is.na(tau) & class == "always less than 0.66" ~ "always less than 0.66",
    is.na(tau) & class == "always 0.66-1.33" ~ "always 0.66-1.33",
    is.na(tau) & class == "always >1.33" ~ "always >1.33")) %>%
  filter(month==6) %>%
  group_by(month, yeargroup, trend) %>% count() %>%
  spread(yeargroup, n)


row.combo %>% filter(!is.na(tau)&month==6) %>%
  mutate(group = case_when(tau<0 ~ "lt 0", tau> 0 ~ "gt 0", tau==0 ~ "0")) %>%
  group_by(yeargroup, group) %>% count()

```