diff --git a/.gitignore b/.gitignore
old mode 100644
new mode 100755
diff --git a/01_TempMunge.Rmd b/01_TempMunge.Rmd
new file mode 100755
index 0000000..173012e
--- /dev/null
+++ b/01_TempMunge.Rmd
@@ -0,0 +1,359 @@
+---
+title: "01_TempMunge"
+author: "Simon Topp"
+date: "07/08/2021"
+output: html_document
+editor_options: 
+  chunk_output_type: console
+---
+
+```{r setup, include=FALSE}
+library(tidyverse)
+library(lubridate)
+library(purrr)
+library(furrr)
+library(data.table)
+library(feather)
+library(sf)
+library(ggpmisc)
+library(ggpubr)
+library(Hmisc)
+library(Metrics)
+
+knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
+```
+
+```{r eval = F, include = F}
+## Read in whatever path you saved the raw pull data to
+files <- list.files('data/EE_TempPull', full.names = T)
+files <- files[file.size(files) > 1] ## 6 WRS tile returned no values
+
+## Munge it a little, basically zero clouds/shadow/ice and
+## at least 5 pixels
+munger <- function(file){
+  df <- read_csv(file) %>%
+    filter(!is.na(temp),
+           cScore_clouds == 0,
+           pCount_water > 5) %>%
+    mutate(LandsatID = map_chr(`system:index`, ~str_split(.,'_0000')[[1]][1])) %>%
+  select(-`system:index`)
+  return(df)
+}
+
+munged <- files %>% map_dfr(munger)
+
+munged <- munged %>% mutate(temp = 0.00341802*temp + 149 - 273.15,
+                                  temp_qa = temp_qa*0.01,
+                                  lake_mix_layer_temperature = lake_mix_layer_temperature - 273.15) %>%
+  rename(temp_e5_mix = lake_mix_layer_temperature, temp_e5_full = lake_total_layer_temperature, 
+         temp_ls = temp, temp_ls_qa = temp_qa, date = DATE_ACQUIRED, sat = SPACECRAFT_ID, 
+         scene_cloud_cover = CLOUD_COVER)
+
+write_feather(munged,'data/out/landsat_temps.feather')
+```
+
+```{r}
+munged <- read_feather('data/out/landsat_temps.feather')
+```
+
+## Between Landsat 7 and 8 we have `r nrow(munged)` temperature observations.
+
+```{r}
+obs <- read_csv('data/in/lake_surface_temp_obs.csv') %>%
+  rename(date = Date)
+
+same_day <- obs %>% inner_join(munged) %>% mutate(join = 'same_day')
+
+day_plus_one <- obs %>% inner_join(munged %>% mutate(date = date + 1)) %>% mutate(join = 'day_plus_1')
+
+day_minus_one <- obs %>% inner_join(munged %>% mutate(date = date -1)) %>% mutate(join = 'day_minus_1')
+
+matchups <- bind_rows(same_day, day_plus_one, day_minus_one) %>%
+  mutate(resid = wtemp_obs - temp_ls,
+         abs_error = abs(resid),
+         e5_flag = abs(wtemp_obs - temp_e5_mix))
+
+matchups_filt <- matchups %>%
+  filter(temp_ls_qa < 3,
+         scene_cloud_cover < 20,
+         distance > 90,
+         temp_ls >=0)
+
+rm(same_day, day_plus_one, day_minus_one)
+```
+
+## Of those, we have `r nrow(matchups)` coincident observations of RS and *in situ* temp (matchups).
+
+Let's look at some *inclusive* metrics. General filters here are:
+
+-   +/- 1 day matchups
+
+-   0 clouds/ice/cloud shadow over the 'deepest point'
+
+```{r}
+ggplot(matchups, aes(x = wtemp_obs, y = temp_ls)) + geom_hex() +
+  scale_fill_viridis_c(trans = 'log10') +
+  geom_smooth(method = 'lm') +
+  stat_poly_eq(formula = y~x, 
+                aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
+                parse = TRUE) +
+  labs(x = 'Observed Temps', y = 'Landsat Temps', title = 'Landsat and Observed')
+
+matchups %>% summarise(rmse = rmse(wtemp_obs, temp_ls),
+                       mae = mae(wtemp_obs,temp_ls),
+                       smape = smape(wtemp_obs,temp_ls),
+                       bias = bias(wtemp_obs, temp_ls)) %>%
+  mutate(across(everything(), ~round(.,4))) %>%
+  kableExtra::kable() %>%
+  kableExtra::kable_styling()
+  
+## One quick look at Era5, it's not great
+ggplot(matchups, aes(x = wtemp_obs, y = temp_e5_mix)) + geom_hex() +
+  scale_fill_viridis_c(trans = 'log10') +
+  geom_smooth(method = 'lm', aes(color = 'Best Fit')) +
+  geom_abline(aes(slope=1, intercept=0,color = '1:1')) +
+  stat_poly_eq(formula = y~x, 
+                aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
+                parse = TRUE) +
+  labs(x = 'Observed Temps', y = 'Era5 Mixed Layer Temps', 'Era5 and Observed', titel='blue=Best Fit', 'Red=1:1')
+
+
+matchups %>% filter(!is.na(temp_e5_mix)) %>%
+  summarise(rmse = rmse(wtemp_obs, temp_e5_mix),
+                       mae = mae(wtemp_obs,temp_e5_mix),
+                       smape = smape(wtemp_obs,temp_e5_mix),
+                       bias = bias(wtemp_obs, temp_e5_mix)) %>%
+  mutate(across(everything(), ~round(.,4))) %>%
+  kableExtra::kable() %>%
+  kableExtra::kable_styling()
+```
+
+## Now the same but with *strict* filters. After qc filters we have `r nrow(matchups_filt)` matchups.
+
+General filters here are:
+
+-   +/- 1 day matchups
+
+-   0 clouds/ice/cloud shadow over the 'deepest point'
+
+-   Landsat uncertainty \< 3 degrees
+
+-   Scene cloud cover \< 20% - Distance to shore \> 90m
+
+```{r}
+matchups_filt <- matchups %>%
+  filter(temp_ls_qa < 3,
+         scene_cloud_cover < 10,
+         distance > 200,
+         temp_ls >=0,
+         wtemp_obs >=0,
+         e5_flag < 5,
+         join != 'day_minus_1'
+         )
+
+plotter <- function(var){
+  matchups %>%
+    mutate(bins= cut_number(get(var), 5)) %>%
+    ggplot(aes(x = bins, y = abs_error)) +
+    geom_violin(draw_quantiles = c(.25,.5,.75)) +
+    geom_hline(aes(yintercept = 0), color = 'red')+
+    labs(x = var)
+}
+
+plotter('areakm')
+plotter('scene_cloud_cover')
+plotter('e5_flag')
+plotter('distance')
+plotter('temp_ls_qa')
+
+matchups_filt <- matchups %>%
+  filter(areakm > .2,
+         temp_ls_qa < 3,
+         scene_cloud_cover < 20,
+         distance > 250,
+         temp_ls >= 0,
+         wtemp_obs >=0,
+         e5_flag < 5,
+         join != 'day_minus_1'
+         )
+
+ggplot(matchups_filt, aes(x = wtemp_obs, y = temp_ls)) + geom_hex() +
+  scale_fill_viridis_c(trans = 'log10') +
+  geom_smooth(method = 'lm') +
+  stat_poly_eq(formula = y~x, 
+                aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
+                parse = TRUE) +
+  labs(x = 'Observed Temps', y = 'Landsat Temps', title = 'Landsat and Observed')
+
+matchups_filt %>% summarise(rmse = rmse(wtemp_obs, temp_ls),
+                       mae = mae(wtemp_obs,temp_ls),
+                       smape = smape(wtemp_obs,temp_ls),
+                       bias = bias(wtemp_obs, temp_ls)) %>%
+  mutate(across(everything(), ~round(.,4))) %>%
+  kableExtra::kable() 
+
+check <- matchups_filt %>% mutate(se = (wtemp_obs - temp_ls)^2)
+```
+
+## Finally, just big lakes where we would expect to be most confident (distance to shore \> 1000m)
+
+```{r}
+ggplot(matchups_filt %>% filter(distance > 1000), aes(x = wtemp_obs, y = temp_e5_mix)) + geom_hex() +
+  scale_fill_viridis_c(trans = 'log10') +
+  geom_smooth(method = 'lm') +
+  stat_poly_eq(formula = y~x, 
+                aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
+                parse = TRUE) +
+  labs(x = 'Observed Temps', y = 'Landsat Temps', title = 'Landsat and Observed')
+
+matchups_filt %>% filter(distance > 1000) %>%
+  summarise(rmse = rmse(wtemp_obs, temp_ls),
+                   mae = mae(wtemp_obs,temp_ls),
+                   smape = smape(wtemp_obs,temp_ls),
+                   bias = bias(wtemp_obs, temp_ls)) %>%
+  mutate(across(everything(), ~round(.,4))) %>%
+  kableExtra::kable() %>%
+  kableExtra::kable_styling()
+```
+
+## Sliced and diced a couple ways
+
+```{r}
+lake_data <- read_csv('data/in/lake_metadata.csv')
+
+matchups <- matchups %>% left_join(lake_data) %>% 
+  mutate(residual = wtemp_obs - temp_ls,
+         abs_error = abs(residual))
+
+ggplot(matchups, aes(x = residual, fill = sat)) +
+  geom_density(alpha = .4) +
+  geom_vline(aes(xintercept = 0), color = 'red') +
+  labs(title = 'Residuals across sensors')
+
+ggplot(matchups, aes(x = residual, fill = join)) +
+  geom_density(alpha = .4) +
+  geom_vline(aes(xintercept = 0), color = 'red') +
+  labs(title = 'Residuals across sensors')
+
+matchups %>%
+  mutate(dist_group = cut_number(distance, 6)) %>%
+  ggplot(aes(x = dist_group, y = residual)) +
+  geom_violin(draw_quantiles = .5) +
+  labs(title = 'Residuals binned by distance to shore')
+
+
+## Spatially
+matchups.sf <- matchups %>% st_as_sf(coords = c('lake_lon_deg','lake_lat_deg'), crs = 4326) %>% st_transform(5070)
+
+usa <- maps::map('usa', plot = F) %>% st_as_sf() %>% st_transform(5070) 
+
+grid <- st_make_grid(usa, cellsize = c(200000,200000), square = F) %>% st_as_sf() %>% mutate(ID = row_number())
+
+rmse.sf <- matchups.sf %>% st_join(grid) %>% st_set_geometry(NULL) %>%
+  group_by(ID) %>%
+  summarise(rmse = rmse(wtemp_obs, temp_ls),
+            matchup.count = n()) %>%
+  inner_join(grid) %>%
+  st_as_sf() 
+
+ggplot(rmse.sf) + geom_sf(aes(fill = rmse)) +
+  scale_fill_viridis_c() +
+  geom_sf(data = usa, fill = 'transparent', color = 'black') +
+  theme_minimal() +
+  labs(title = 'Spatial Error Distribution')
+
+ggplot(rmse.sf) + geom_sf(aes(fill = matchup.count)) +
+  scale_fill_viridis_c(trans='log10') +
+  geom_sf(data = usa, fill = 'transparent', color = 'black') +
+  theme_minimal() +
+  labs(title = 'Matchup Count')
+
+```
+
+## Take a look at within scene variance.
+```{r}
+summary_in <- matchups_filt
+
+scene_sd <- summary_in %>% group_by(LandsatID) %>%
+  summarise(count = n(),
+            resid_sd = sd(resid)) %>%
+  filter(count >5)
+
+overall_sd <- summary_in %>% filter(LandsatID %in% scene_sd$LandsatID) %>%
+  summarise(overall = sd(resid)) %>% .[[1]]
+
+ggplot(scene_sd, aes(x = resid_sd)) + geom_histogram() + geom_vline(aes(xintercept = overall_sd, color = 'Overall residual SD'))
+
+scene_counts <- dplyr::count(summary_in, LandsatID) %>% filter(n > 5)
+
+scene_bias <- summary_in %>% 
+  filter(LandsatID %in% scene_counts$LandsatID) %>%
+  group_by(LandsatID) %>%
+  summarise(bias = bias(wtemp_obs, temp_ls))
+
+scene_stats <- summary_in %>% 
+  filter(LandsatID %in% scene_counts$LandsatID) %>%
+  left_join(scene_bias) %>%
+  mutate(temp_no_bias = temp_ls+bias) %>%
+  group_by(LandsatID) %>%
+  summarise(rmse_bias_removed = rmse(wtemp_obs, temp_no_bias),
+            scene_bias = mean(bias))
+
+overall_bias = summary_in %>% filter(LandsatID %in% scene_counts$LandsatID) %>%
+  summarise(bias = bias(wtemp_obs, temp_ls)) %>%
+  .[[1]]
+
+overall_rmse = summary_in %>% filter(LandsatID %in% scene_counts$LandsatID) %>%
+  mutate(temp_ls = temp_ls + overall_bias) %>%
+  summarise(rmse = rmse(wtemp_obs,temp_ls))
+rmses <- tibble(overall_rmse_bias_removed = overall_rmse[[1]], median_scene_rmse_bias_removed = median(scene_stats$rmse_bias_removed)) %>%
+  pivot_longer(everything(), names_to = "RMSE_Summaries")
+
+ggplot(scene_stats, aes(x = rmse_bias_removed)) + geom_histogram() + 
+  geom_vline(data = rmses, aes(xintercept = value, color = RMSE_Summaries)) +
+  labs(x = "Distribution of image specific rmse after removing image bias")
+```
+
+
+```{r}
+mendota <- read_csv('data/in/Mendota_Era5Land_1984_2021.csv') 
+mendotaObs <- obs %>% filter(site_id == "nhdhr_143249470")
+
+mendota <- mendota %>% mutate(lake_mix_layer_temperature = lake_mix_layer_temperature - 273.15,
+                              date = as.Date(date)) %>%
+  inner_join(mendotaObs)
+
+library(plotly)
+
+ggplotly(
+  ggplot(mendota, aes(x=date)) +
+    geom_point(aes(y = lake_mix_layer_temperature, color='Era5')) +
+    geom_point(aes(y=wtemp_obs, color = 'Obs')) +
+    geom_line(aes(y = lake_mix_layer_temperature, color='Era5')) +
+    geom_line(aes(y=wtemp_obs, color = 'Obs')))
+
+ggplot(mendota, aes(x=lake_mix_layer_temperature)) + geom_histogram()
+
+ggplot
+
+
+mendota %>% rename(temp_e5_mix = lake_mix_layer_temperature) %>%
+  summarise(rmse = rmse(wtemp_obs, temp_e5_mix),
+                       mae = mae(wtemp_obs,temp_e5_mix),
+                       smape = smape(wtemp_obs,temp_e5_mix),
+                       bias = bias(wtemp_obs, temp_e5_mix)) %>%
+  mutate(across(everything(), ~round(.,4))) %>%
+  kableExtra::kable() %>%
+  kableExtra::kable_styling()
+
+mendota %>% rename(temp_e5_mix = lake_mix_layer_temperature) %>%
+ggplot(.,aes(x = wtemp_obs, y = temp_e5_mix)) + geom_hex() +
+  scale_fill_viridis_c(trans = 'log10') +
+  geom_smooth(method = 'lm', aes(color = 'Best Fit')) +
+  geom_abline(aes(slope=1, intercept=0,color = '1:1')) +
+  stat_poly_eq(formula = y~x, 
+                aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
+                parse = TRUE) +
+  labs(x = 'Observed Temps', y = 'Era5 Mixed Layer Temps', 'Era5 and Observed', titel='blue=Best Fit', 'Red=1:1')
+```
+
diff --git a/02_raw_trends.R b/02_raw_trends.R
new file mode 100755
index 0000000..cea5a1d
--- /dev/null
+++ b/02_raw_trends.R
@@ -0,0 +1,135 @@
+#### This script is an initial analysis of lake surface temperature trends
+#### using temperatures from the Landsat.  ITS VERY PRELIMINARY and hacked together using
+#### two different EE pulls.
+library(tidyverse)
+library(feather)
+library(lubridate)
+library(trend)
+library(sf)
+library(scales)
+
+l7l8 <- read_feather('data/out/landsat_temps.feather')
+l7l8 <- l7l8 %>%
+  select(areakm, sat, date, distance, site_id, temp_ls, temp_ls_qa) %>%
+  filter(sat=='LANDSAT_7')
+
+ggplot(l7l8, aes(x=temp_ls)) + geom_histogram()
+
+l8l9 <- read_csv('/Users/stopp/Documents/repos/landsat-lake-temp-pull/data/out/L8_L9_cb_temps_munged.csv')
+l8l9 <- l8l9 %>%
+  select(areakm, sat = SPACECRAFT_ID, date, distance, site_id, temp_ls = ls_temp, temp_ls_qa = ls_temp_qa) %>%
+  mutate(sat=as.character(sat))
+
+ggplot(l8l9, aes(x=temp_ls)) + geom_histogram()
+
+ls_temps <- bind_rows(l7l8,l8l9) %>%
+  mutate(site_id = factor(site_id))
+
+rm(l7l8,l8l9)
+
+seasonally_aggregated <- ls_temps %>%
+  mutate(month = month(date),
+         year = year(date),
+         season = case_when((month %in% c(6,7,8)) ~ 'JJA',
+                            (month %in% c(9,10,11)) ~ "SON",
+                            (month %in% c(12,1,2)) ~ 'DJF',
+                            (month %in% c(3,4,5)) ~'MAM'),
+         season=factor(season)) %>%
+  group_by(site_id, year, season, distance, areakm) %>%
+  summarize(temp_ls = mean(temp_ls),
+            count = n())
+
+seasonally_aggregated <- seasonally_aggregated %>% filter(count > 1)
+
+counts <- seasonally_aggregated %>%
+  group_by(site_id, season) %>%
+  mutate(count = n())
+
+seasonally_aggregated <- seasonally_aggregated[counts$count > 20,]
+
+trends <- seasonally_aggregated %>% 
+  group_by(site_id, season) %>%
+  arrange(year) %>%
+  nest() %>%
+  mutate(mk = purrr::map(data, ~sens.slope(.$temp_ls)),
+         sen.slope = purrr::map_dbl(mk, 'estimates'),
+         p.value = purrr::map_dbl(mk, 'p.value')) %>%
+  select(site_id, season, sen.slope, p.value)
+
+ggplot(trends, aes(x=sen.slope, fill = season)) + 
+  geom_density(alpha=.4) +
+  xlim(-.3,.4) +
+  geom_vline(aes(xintercept=0), color = 'red') +
+  scale_fill_viridis_d()
+
+### Look at spatial trends
+lakes_sf <- read_csv('data/in/lake_metadata_20211217.csv') %>%
+  st_as_sf(coords = c('lake_lon_deg','lake_lat_deg'), crs = 4326) %>%
+  select(site_id, elevation_m, area_m2) %>%
+  st_transform(5070)
+
+trends_sf <- lakes_sf %>% inner_join(trends)
+
+usa <- maps::map('usa', plot = F) %>% st_as_sf() %>% st_transform(5070) 
+
+grid <- st_make_grid(usa, cellsize = c(75000,75000), square = F) %>% st_as_sf() %>% mutate(ID = row_number())
+
+grid_means <- grid %>% st_join(trends_sf %>% filter(abs(sen.slope) < .4) %>%st_transform(st_crs(grid)), left = F) %>%
+  st_set_geometry(NULL) %>% group_by(ID, season) %>%
+  summarize(mean_trend = mean(sen.slope))
+
+means_sf <- grid %>% inner_join(grid_means)
+
+ggplot() + 
+  geom_sf(data = usa) +
+  geom_sf(data = means_sf, aes(fill = mean_trend)) +
+  scale_fill_gradient2(low = muted('blue'), high=muted('red'), 'Mean Trend (°C/yr)') +
+  ggthemes::theme_map(base_size = 11) +
+  theme(legend.position = 'bottom') +
+  facet_wrap(~season) +
+  ggtitle("Mean Trend in Lake Temperature Since 1999")
+
+### Binned by Elevation
+trends_sf %>%
+  st_set_geometry(NULL) %>%
+  mutate(elevation_bin = cut_interval(elevation_m, 5)) %>%
+  ggplot(aes(x=elevation_bin,y = sen.slope,fill=elevation_bin)) +
+  coord_cartesian(ylim=c(-.15,.25)) +
+  geom_violin() +
+  geom_boxplot(width=.2) +
+  scale_fill_viridis_d(option='plasma', alpha=.3) +
+  theme(axis.text.x = element_text(angle=45,hjust=1,vjust=1),
+        legend.position = 'none') +
+  labs(x = 'Elevation (m)', y = 'Trend (°C/yr)') 
+
+### Binnned by lake size
+trends_sf %>%
+  st_set_geometry(NULL) %>%
+  mutate(size_bin = cut_number(area_m2/1e6, 10)) %>%
+  ggplot(aes(x=size_bin,y = sen.slope,fill=size_bin)) +
+  coord_cartesian(ylim=c(-.15,.25)) +
+  geom_violin() +
+  geom_boxplot(width=.2) +
+  scale_fill_viridis_d(option='plasma', alpha=.3) +
+  theme(axis.text.x = element_text(angle=45,hjust=1,vjust=1),
+        legend.position = 'none') +
+  labs(x = 'Lake Size Bin (km2)', y = 'Trend (°C/yr)') 
+
+### Look at actual timeseries to see if they look really goofy
+seasonal_summary <- seasonally_aggregated %>%
+  ungroup() %>%
+  mutate(area_bin = cut_number(areakm, 10)) %>%
+  group_by(season, site_id, area_bin) %>%
+  mutate(temp_scaled = scale(temp_ls)) %>%
+  ungroup() %>%
+  group_by(season, year, area_bin) %>%
+  summarise(mean_temp= mean(temp_scaled),
+            sd=sd(temp_scaled))
+
+### 
+ggplot(seasonal_summary, aes(x=year, y = mean_temp, color = area_bin, fill = area_bin)) +
+  geom_ribbon(aes(ymin = mean_temp-sd, ymax=mean_temp+sd), alpha = .3) +
+  geom_line() +
+  facet_wrap(~season)
+
+
diff --git a/GEE_pull_functions.py b/GEE_pull_functions.py
new file mode 100755
index 0000000..0dc2ad9
--- /dev/null
+++ b/GEE_pull_functions.py
@@ -0,0 +1,102 @@
+#!/usr/bin/env python2
+# -*- coding: utf-8 -*-
+"""
+Google Earth Engine Reflectance Pull Functions
+
+@author: simontopp
+"""
+import ee
+import time
+import os
+
+## Recreate the fmask based on Collection 2 Pixel QA band
+def AddFmask(image):
+    qa = image.select('pixel_qa')
+    water = qa.bitwiseAnd(1 << 7)
+    cloud = qa.bitwiseAnd(1 << 1).Or(qa.bitwiseAnd(1 << 2)).Or(qa.bitwiseAnd(1 << 3))
+    snow = qa.bitwiseAnd(1 << 5)
+    cloudshadow = qa.bitwiseAnd(1 << 4)
+
+    fmask = (water.gt(0).rename('fmask')
+             .where(snow.gt(0), ee.Image(3))
+             .where(cloudshadow.gt(0), ee.Image(2))
+             .where(cloud.gt(0), ee.Image(4))
+             .updateMask(qa.gte(0)))
+    ## mask the fmask so that it has the same footprint as the quality (BQA) band
+    return image.addBands(fmask)
+
+
+## Buffer the lake sites either 120 minutes or the distance to shore, whichever
+## is smaller
+def dpBuff(i):
+    dist = i.get('distance')
+    buffdist = ee.Number(dist).min(120)
+    return i.buffer(buffdist)
+
+
+
+## Set up the reflectance pull
+def RefPull(image):
+    f = AddFmask(image).select('fmask')
+    water = f.eq(1).rename('water')
+    clouds = f.gte(2).rename('clouds')
+
+    era5match = (era5.filterDate(ee.Date(image.get('system:time_start')).update(None, None, None, None, 0,0)))
+
+    era5out = (ee.Algorithms.If(era5match.size(),
+                     ee.Image(era5match.first()).select('lake_mix_layer_temperature', 'lake_total_layer_temperature'),
+                     ee.Image(0).rename('lake_mix_layer_temperature').addBands(ee.Image(0).rename('lake_total_layer_temperature')),
+                     )
+               )
+
+    pixOut = (image.select('temp', 'temp_qa')
+              .updateMask(water)
+              .addBands(era5out)
+              .addBands(clouds)
+              .addBands(water))
+
+    combinedReducer = (ee.Reducer.median().unweighted()
+        .forEachBand(pixOut.select('temp', 'temp_qa', 'lake_mix_layer_temperature', 'lake_total_layer_temperature'))
+        .combine(ee.Reducer.mean().unweighted().forEachBand(pixOut.select('clouds')), 'cScore_', False)
+        .combine(ee.Reducer.count().unweighted().forEachBand(pixOut.select('water')), 'pCount_', False))
+
+    def copyMeta(i):
+        return i.copyProperties(image, ["CLOUD_COVER", 'SPACECRAFT_ID', 'system:index', 'DATE_ACQUIRED'])
+    
+    ## Remove geometries
+    def removeGeo(i):
+        return i.setGeometry(None)
+
+    # Collect reflectance values, cloud score, and pixel counts
+    lsout = pixOut.reduceRegions(lakes, combinedReducer, 30)
+
+    out = lsout.map(removeGeo)
+    out = out.map(copyMeta)
+
+    return out
+
+
+## Function for limiting the max number of tasks sent to
+## earth engine at one time to avoid time out errors
+
+def maximum_no_of_tasks(MaxNActive, waitingPeriod):
+    ## maintain a maximum number of active tasks
+    time.sleep(10)
+    ## initialize submitting jobs
+    ts = list(ee.batch.Task.list())
+
+    NActive = 0
+    for task in ts:
+        if ('RUNNING' in str(task) or 'READY' in str(task)):
+            NActive += 1
+    ## wait if the number of current active tasks reach the maximum number
+    ## defined in MaxNActive
+    while (NActive >= MaxNActive):
+        time.sleep(waitingPeriod) 
+        ts = list(ee.batch.Task.list())
+        NActive = 0
+        for task in ts:
+            if ('RUNNING' in str(task) or 'READY' in str(task)):
+                NActive += 1
+    return ()
+
diff --git a/LakeExport.py b/LakeExport.py
new file mode 100755
index 0000000..2fed694
--- /dev/null
+++ b/LakeExport.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python2
+# -*- coding: utf-8 -*-
+"""
+@author: simontopp
+"""
+
+import time
+import ee
+import os
+
+## Initialize Earth Engine
+ee.Initialize()
+
+## Source necessary functions. We do this instead of 'import' because of EE quirk.
+exec(open('GEE_pull_functions.py').read())
+
+## Bring in EE Assets
+## Deepest point (Chebyshev center) for CONUS PGDL Lakes
+## DP Code available at https://zenodo.org/record/4136755#.X5d54pNKgUE and
+## https://code.earthengine.google.com/8dac409b220bdfb051bb469bc5b3c708
+dp = (ee.FeatureCollection('users/sntopp/USGS/PGDL_lakes_deepest_point')
+  .filterMetadata('distance', "greater_than", 60))
+
+## CONUS Boundary
+us = ee.FeatureCollection("USDOS/LSIB_SIMPLE/2017")\
+    .filterMetadata('country_na', 'equals', 'United States')
+
+## WRS Tiles in descending (daytime) mode for CONUS
+wrs = ee.FeatureCollection('users/sntopp/wrs2_asc_desc')\
+    .filterBounds(us)\
+    .filterMetadata('MODE', 'equals', 'D')
+
+## Run everything by path/row to speed up computation in EE    
+pr = wrs.aggregate_array('PR').getInfo()
+
+## Bring in temp data
+l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2")
+l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2")
+era5 = ee.ImageCollection("ECMWF/ERA5_LAND/HOURLY")
+
+## Standardize band names between the various collections 
+bn8 = ['ST_B10', 'ST_QA', 'QA_PIXEL']
+bn57 = ['ST_B6', 'ST_QA', 'QA_PIXEL']
+bns = ['temp','temp_qa','pixel_qa']
+  
+ls7 = l7.select(bn57, bns)
+ls8 = l8.select(bn8, bns)
+
+## Do coarse cloud filtering and aggregate 
+## them into one image collection
+ls = ee.ImageCollection(ls7.merge(ls8))\
+    .filter(ee.Filter.lt('CLOUD_COVER', 50))\
+    .filterBounds(us)  
+
+
+## Set up a counter and a list to keep track of what's been done already
+if not os.path.isfile('log.txt'):
+    open('log.txt', 'x')
+
+done = open('log.txt', 'r')
+done = [x.strip() for x in done.readlines()]
+done = list(map(int, done))
+counter = len(done)
+
+## In case something goofed, you should be able to just 
+## re-run this chunk with the following line filtering out 
+## what's already run. 
+pr = [i for i in pr if i not in done]
+
+for tiles in pr:
+    tile = wrs.filterMetadata('PR', 'equals', tiles)
+    
+    lakes = dp.filterBounds(tile.geometry())\
+        .map(dpBuff)
+        
+    stack = ls.filterBounds(tile.geometry().centroid())
+    out = stack.map(RefPull).flatten().filterMetadata('cScore_clouds','less_than',.5)
+    dataOut = ee.batch.Export.table.toDrive(collection = out,\
+                                            description = str(tiles),\
+                                            folder = 'EE_TempPull',\
+                                            fileFormat = 'csv',\
+                                            selectors = ["system:index", "areakm", "cScore_clouds", "CLOUD_COVER", 'SPACECRAFT_ID', 'DATE_ACQUIRED', "distance", "lake_mix_layer_temperature", "lake_total_layer_temperature", "pCount_water", "site_id", "temp", "temp_qa"])
+    
+    ## Check how many existing tasks are running and take a break if it's >15
+    maximum_no_of_tasks(25, 120)
+    ## Send next task.
+    dataOut.start()
+    counter = counter + 1
+    done.append(tiles)
+    f = open('log.txt', 'a')
+    f.write(str(tiles) + '\n')
+    f.close()
+
+    print('done_' + str(counter) + '_' + str(tiles))
+        
+
+
diff --git a/NHD_Deepest_Point_Pull.py b/NHD_Deepest_Point_Pull.py
new file mode 100755
index 0000000..c8fa1cf
--- /dev/null
+++ b/NHD_Deepest_Point_Pull.py
@@ -0,0 +1,134 @@
+import ee
+import math
+import time
+ee.Initialize()
+
+## Deepest point calculation adapted from Xiao Yang
+### https: // doi.org / 10.5281 / zenodo.4136754
+### Functions
+def get_scale(polygon):
+    radius = polygon.get('areasqkm')
+    radius = ee.Number(radius).divide(math.pi).sqrt().multiply(1000)
+    scale = radius.divide(20)
+    #scale = ee.Algorithms.If(ee.Number(scale).lte(10),10,scale)
+    scale = ee.Algorithms.If(ee.Number(scale).gte(500),500,scale)
+    return ee.Number(scale)
+
+
+def getUTMProj(lon, lat):
+    # see
+    # https: // apollomapping.com / blog / gtm - finding - a - utm - zone - number - easily and
+    # https: // sis.apache.org / faq.html
+    utmCode = ee.Number(lon).add(180).divide(6).ceil().int()
+    output = ee.Algorithms.If(ee.Number(lat).gte(0),
+                              ee.String('EPSG:326').cat(utmCode.format('%02d')),
+                              ee.String('EPSG:327').cat(utmCode.format('%02d')))
+    return output
+
+def GetLakeCenters(polygon):
+
+    ## Calculate both the deepest point an centroid
+    ## for the inpout polygon ( or multipolygon)
+    ## for each input, export geometries for both centroid and deepest point and their distance to shore.
+    scale = get_scale(polygon)
+    geo = polygon.geometry()
+    ct = geo.centroid(scale)
+    utmCode = getUTMProj(ct.coordinates().getNumber(0), ct.coordinates().getNumber(1))
+
+    polygonImg = ee.Image.constant(1).toByte().paint(ee.FeatureCollection(ee.Feature(geo, None)), 0)
+
+    dist = polygonImg.fastDistanceTransform(2056).updateMask(polygonImg.Not()).sqrt().reproject(utmCode, None, scale).multiply(scale) # convert unit from pixel to meter
+
+    # dist = (polygonImg.fastDistanceTransform(2056).updateMask(polygonImg.not ())
+    # .sqrt().reproject('EPSG:4326', None, scale).multiply(scale)  # convert unit from pixel to meter
+
+    maxDistance = (dist.reduceRegion(
+        reducer=ee.Reducer.max(),
+        geometry=geo,
+        scale=scale,
+        bestEffort=True,
+        tileScale=1
+    ).getNumber('distance').int16())
+
+    outputDp = (ee.Feature(dist.addBands(ee.Image.pixelLonLat()).updateMask(dist.gte(maxDistance))
+                          .sample(geo, scale).first()))
+
+    dp = ee.Geometry.Point([outputDp.get('longitude'), outputDp.get('latitude')])
+
+    regions = ee.FeatureCollection([ee.Feature(dp, {'type': 'dp'})])
+
+    output = dist.sampleRegions(
+        collection=regions,
+        properties=['type'],
+        scale=scale,
+        tileScale=1,
+        geometries=True)
+
+    return (ee.Feature(output.first()).copyProperties(polygon,['permanent','areasqkm']))
+
+def buff_dp(dp):
+    return dp.buffer(dp.getNumber('distance'))
+
+
+def maximum_no_of_tasks(MaxNActive, waitingPeriod):
+    ## maintain a maximum number of active tasks
+    time.sleep(10)
+    ## initialize submitting jobs
+    ts = list(ee.batch.Task.list())
+
+    NActive = 0
+    for task in ts:
+        if ('RUNNING' in str(task) or 'READY' in str(task)):
+            NActive += 1
+    ## wait if the number of current active tasks reach the maximum number
+    ## defined in MaxNActive
+    while (NActive >= MaxNActive):
+        time.sleep(waitingPeriod)
+        ts = list(ee.batch.Task.list())
+        NActive = 0
+        for task in ts:
+            if ('RUNNING' in str(task) or 'READY' in str(task)):
+                NActive += 1
+    return ()
+
+def calc_area(feature):
+    return(feature.set('area_calc',feature.area().divide(1e6)))
+
+assets_done = ee.data.listAssets({'parent': 'projects/earthengine-legacy/assets/users/sntopp/NHD/DeepestPoint'})['assets']
+ids_done = [i['id'].split('/')[-1] for i in assets_done]
+assets_parent = ee.data.listAssets({'parent': 'projects/earthengine-legacy/assets/projects/sat-io/open-datasets/NHD'})['assets']
+assets_parent = [i for i in assets_parent if i['id'].split('/')[-1] not in ids_done]
+#assets_parent = [i for i in assets_parent if i['id'].split('/')[-1] not in ['NHD_MO','NHD_TX','NHD_AK']]
+
+
+for i in range(len(assets_parent)):
+    state_asset = assets_parent[i]['id']
+    assets_state = (ee.FeatureCollection(f"{state_asset}/NHDWaterbody")
+    .filter(ee.Filter.gte('areasqkm',0.001))
+    .filter(ee.Filter.lte('areasqkm',5000))  #Remove Great Lakes
+    .filter(ee.Filter.inList('ftype',[361,436,390])))
+
+    ## Added to trouble shoot NM where some of the NHD areas are don't match the actual polygon areas
+    assets_state=ee.FeatureCollection(assets_state.map(calc_area)).filter(ee.Filter.gte('area_calc',0.001))
+
+    dp = assets_state.map(GetLakeCenters)
+
+    dataOut = ee.batch.Export.table.toAsset(collection=dp,description=state_asset.split('/')[-1],assetId=f"projects/earthengine-legacy/assets/users/sntopp/NHD/DeepestPoint/{state_asset.split('/')[-1]}")
+
+    ## Check how many existing tasks are running and take a break if it's >15
+    maximum_no_of_tasks(15, 240)
+    ## Send next task.
+    dataOut.start()
+    print(state_asset.split('/')[-1])
+
+#f"projects/earthengine-legacy/assets/users/sntopp/NHD/{state_asset.split('/')[-1]}/NHDDeepestPoint"
+
+
+state_dps = ee.data.listAssets({'parent': 'projects/earthengine-legacy/assets/users/sntopp/NHD/DeepestPoint'})['assets']
+
+for i in state_dps:
+    dps = ee.FeatureCollection(i['id'])
+    id =  i['id'].split('/')[-1]
+    dpsOut = ee.batch.Export.table.toDrive(dps,id,'EE_DP_Exports',id)
+    maximum_no_of_tasks(15, 60)
+    dpsOut.start()
\ No newline at end of file
diff --git a/NHD_HR_Lake_Pull.R b/NHD_HR_Lake_Pull.R
new file mode 100755
index 0000000..e364dc7
--- /dev/null
+++ b/NHD_HR_Lake_Pull.R
@@ -0,0 +1,105 @@
+## Mess around with tyler kings stuff
+library(tidyverse)
+library(sf)
+library(mapview)
+library(feather)
+mapviewOptions(fgb=FALSE)
+
+tk_resolvable <- vroom::vroom('/Users/stopp/Downloads/NHDWaterbody_resolvable.csv')
+
+
+### Read in the exports from EE and do a little munging
+files <- list.files('../../../My Drive/EE_DP_Exports', full.names = T)
+
+options(digits=15)
+dp_national <- read_csv(files[1]) %>%
+  mutate(coords = map(.geo, ~as.numeric(unlist(regmatches(.x,gregexpr("-?[[:digit:]]+\\.*-?[[:digit:]]*",.x))))),
+         long = map_dbl(coords, 1),
+         lat = map_dbl(coords,2),
+         distance = round(distance,3),
+         areasqkm = round(areasqkm, 4)) %>%
+  select(-c(coords, .geo,`system:index`))
+
+for(i in files[2:length(files)]){
+  state <- read_csv(i,show_col_types = F) %>%
+    mutate(coords = map(.geo, ~as.numeric(unlist(regmatches(.x,gregexpr("-?[[:digit:]]+\\.*-?[[:digit:]]*",.x))))),
+           long = map_dbl(coords, 1),
+           lat = map_dbl(coords,2),
+           distance = round(distance,3),
+           areasqkm = round(areasqkm, 4)) %>%
+    select(-c(coords, .geo,`system:index`))
+  
+  dp_national <- dp_national %>%
+    bind_rows(state) %>%
+    distinct(permanent,.keep_all=T)
+}
+
+dp_national <- dp_national %>%
+  select(areasqkm:lat)
+
+#write_feather(dp_national, 'data/out/nhd_hr_1ha_deepest_point_noTX.feather')
+dp_national <- read_feather('data/out/nhd_hr_1ha_deepest_point_noTX.feather')
+tk_resolvable <- vroom::vroom('/Users/stopp/Downloads/NHDWaterbody_resolvable.csv')
+
+
+match_ups <- dp_national %>%
+  inner_join(.,tk_resolvable, by = c('permanent'='permanent_identifier'))
+
+rm(dp_national, tk_resolvable)
+
+match_ups <- match_ups %>% mutate(resolvable = ifelse(resolvable == 1, 'Konrad resolvable', 'Konrad not resolvable'))
+
+thresholds <- tibble(distance = c(20,30,40,50,60,70,80,90))
+
+tk_resolvable <- match_ups %>% filter(resolvable == 'Konrad resolvable')
+
+thresholds$counts <- colSums(outer(tk_resolvable$distance, setNames(thresholds$distance, thresholds$distance), "<"))
+thresholds$labels <- paste0(thresholds$distance,' meters (n=',thresholds$counts,')')
+
+tk_resolvable %>%
+  ggplot(.,aes(x=distance)) + stat_bin(aes(y=cumsum(..count..)),geom="step") +
+  geom_vline(data= thresholds, aes(xintercept=distance, color = labels)) +
+  scale_color_viridis_d() +
+  scale_x_log10() +
+  theme_bw() +
+  facet_wrap(~resolvable, scales = 'free') +
+  labs(x = 'Maximum Distance from Shore (Meters)',
+       y= 'Cumulative Lake Count',
+       color='Res. Threshold (n disagreement)')
+
+
+thresholds <- tibble(distance = c(20,30,40,50,60,70,80,90))
+
+tk_non_resolvable <- match_ups %>% filter(resolvable == 'Konrad not resolvable')
+
+thresholds$counts <- colSums(outer(tk_non_resolvable$distance, setNames(thresholds$distance, thresholds$distance), ">"))
+thresholds$labels <- paste0(thresholds$distance,' meters (n=',thresholds$counts,')')
+
+tk_non_resolvable %>%
+  ggplot(.,aes(x=distance)) + stat_bin(aes(y=cumsum(..count..)),geom="step") +
+  geom_vline(data= thresholds, aes(xintercept=distance, color = labels)) +
+  scale_color_viridis_d() +
+  scale_x_log10() +
+  theme_bw() +
+  facet_wrap(~resolvable, scales = 'free') +
+  labs(x = 'Maximum Distance from Shore (Meters)',
+       y= 'Cumulative Lake Count',
+       color='Res. Threshold (n disagreement)')
+
+
+
+tk30_res <- tk_resolvable %>% filter(distance < 30)
+tk30_not_res <- tk_non_resolvable %>% filter(distance >30)
+
+ggplot(tk30_res, aes(x = areasqkm)) +
+  geom_histogram()
+
+ggplot(tk30_not_res, aes(x= areasqkm)) + 
+  geom_histogram() +
+  scale_x_log10()
+
+
+tk30_res %>% slice_sample(prop=.2) %>% 
+  st_as_sf(., coords=c('long','lat'), crs=4326) %>%
+  mapview(zcol = 'distance')
+  
diff --git a/README.md b/README.md
old mode 100644
new mode 100755