-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path01_height_munge.Rmd
More file actions
127 lines (91 loc) · 3.37 KB
/
01_height_munge.Rmd
File metadata and controls
127 lines (91 loc) · 3.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
---
title: "01_height_munge"
output: html_document
editor_options:
chunk_output_type: console
---
```{r setup, include=FALSE}
library(tidyverse)
library(lubridate)
knitr::opts_chunk$set(echo = TRUE)
```
## First take the raw heights and standardize lakes with multiple gauges based on the GPS readings
```{r cars}
## Pull in the Raw Heights
heights.raw <- read_csv('data/in/LOCSS_raw_heights.csv')
## Standardize lakes with multiple guages based on preliminary GPS data.
#Waccamaw
#DMN2 : 2.18
#HCN2 : GPS 2.18
#WCN2 : GPS 1.77
#ZZN2/NC1009 : GPS 2.24
heights.raw$height[heights.raw$gauge_id == 'DMN2'] <-
heights.raw$height[heights.raw$gauge_id == 'DMN2'] + (2.24-2.18)
heights.raw$height[heights.raw$gauge_id == 'HCN2'] <-
heights.raw$height[heights.raw$gauge_id == 'HCN2'] + (2.24-2.18)
heights.raw$height[heights.raw$gauge_id == 'WCN2'] <-
heights.raw$height[heights.raw$gauge_id == 'WCN2'] + (2.24-1.77)
#phelps
#FDN2 : GPS 2.59
#PHN2 : GPS 11.79
heights.raw$height[heights.raw$gauge_id == 'PHN2'] <-
heights.raw$height[heights.raw$gauge_id == 'PHN2'] + (2.59-11.79)
#white
#NWN2 : GPS 1.88
#WHN2 : GPS 1.18
heights.raw$height[heights.raw$gauge_id == 'WHN2'] <-
heights.raw$height[heights.raw$gauge_id == 'WHN2'] + (1.88-1.18)
#lawrence
#LAW2 : GPS 0.4
#XAW2 : GPS 0.38
heights.raw$height[heights.raw$gauge_id == 'XAW2'] <-
heights.raw$height[heights.raw$gauge_id == 'XAW2'] + (0.4-0.38)
```
## Pull in the data from Wisconsin and standardize its format to match LOCSS data
```{r}
## Pull in the Wisconsin data and reformat it
paths <- list.files('data/in/Wisc_raw_heights', full.names = T)
wisc.heights.raw <- map_dfr(paths, read_csv)
## Two duplicate names between Wisconsin and Washington, make explicit
wisc.heights.raw$Lake_Name[wisc.heights.raw$Lake_Name == 'Deep Lake'] <- 'Deep Lake Wisc.'
wisc.heights.raw$Lake_Name[wisc.heights.raw$Lake_Name == 'Phantom Lake'] <- 'Phantom Lake Wisc.'
wisc.heights.raw <- wisc.heights.raw %>%
mutate(date = mdy(Date)) %>%
select(name = Lake_Name, date, height = Level) %>%
mutate(region = 'WI',
name.std = name) %>%
group_by(name.std, name, date, region) %>%
summarise(height = mean(height)) %>%
ungroup()
```
## Make sure we have all the lakes/gauges with standardized names, average first by lake then by day to get one height measurement per day per lake
```{r}
## Join with standardized names
lakes <- read_csv('data/in/lake_properties.csv')
## Check for mismatches
check <- heights.raw %>% filter(!gauge_id %in% lakes$gauge_id) %>%
distinct(name, .keep_all = T)
## There are 14, but I think these are international gauges
check <- wisc.heights.raw %>% left_join(lakes) %>% filter(is.na(nSat))
## Average by lake then date
heights.raw <- heights.raw %>% left_join(
lakes %>%
mutate(name.std = name) %>%
select(name.std, gauge_id, region)) %>%
filter(height >= 0,
height <= 3.3) %>%
group_by(name.std, gauge_id, region, date) %>%
summarise(height = mean(height)) %>%
group_by(name.std, date, region) %>%
summarise(height = mean(height)) %>%
ungroup() %>%
filter(!is.na(region))
heights.munged <- heights.raw %>%
bind_rows(wisc.heights.raw %>% select(-name)) %>%
mutate(height = height*0.3048)
## Check counts
check <- heights.munged %>% group_by(name.std, region) %>%
summarise(count = n()) %>%
arrange(count)
write_csv(heights.munged, 'data/out/heights_munged.csv')
```