diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 diff --git a/01_TempMunge.Rmd b/01_TempMunge.Rmd new file mode 100755 index 0000000..173012e --- /dev/null +++ b/01_TempMunge.Rmd @@ -0,0 +1,359 @@ +--- +title: "01_TempMunge" +author: "Simon Topp" +date: "07/08/2021" +output: html_document +editor_options: + chunk_output_type: console +--- + +```{r setup, include=FALSE} +library(tidyverse) +library(lubridate) +library(purrr) +library(furrr) +library(data.table) +library(feather) +library(sf) +library(ggpmisc) +library(ggpubr) +library(Hmisc) +library(Metrics) + +knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE) +``` + +```{r eval = F, include = F} +## Read in whatever path you saved the raw pull data to +files <- list.files('data/EE_TempPull', full.names = T) +files <- files[file.size(files) > 1] ## 6 WRS tile returned no values + +## Munge it a little, basically zero clouds/shadow/ice and +## at least 5 pixels +munger <- function(file){ + df <- read_csv(file) %>% + filter(!is.na(temp), + cScore_clouds == 0, + pCount_water > 5) %>% + mutate(LandsatID = map_chr(`system:index`, ~str_split(.,'_0000')[[1]][1])) %>% + select(-`system:index`) + return(df) +} + +munged <- files %>% map_dfr(munger) + +munged <- munged %>% mutate(temp = 0.00341802*temp + 149 - 273.15, + temp_qa = temp_qa*0.01, + lake_mix_layer_temperature = lake_mix_layer_temperature - 273.15) %>% + rename(temp_e5_mix = lake_mix_layer_temperature, temp_e5_full = lake_total_layer_temperature, + temp_ls = temp, temp_ls_qa = temp_qa, date = DATE_ACQUIRED, sat = SPACECRAFT_ID, + scene_cloud_cover = CLOUD_COVER) + +write_feather(munged,'data/out/landsat_temps.feather') +``` + +```{r} +munged <- read_feather('data/out/landsat_temps.feather') +``` + +## Between Landsat 7 and 8 we have `r nrow(munged)` temperature observations. + +```{r} +obs <- read_csv('data/in/lake_surface_temp_obs.csv') %>% + rename(date = Date) + +same_day <- obs %>% inner_join(munged) %>% mutate(join = 'same_day') + +day_plus_one <- obs %>% inner_join(munged %>% mutate(date = date + 1)) %>% mutate(join = 'day_plus_1') + +day_minus_one <- obs %>% inner_join(munged %>% mutate(date = date -1)) %>% mutate(join = 'day_minus_1') + +matchups <- bind_rows(same_day, day_plus_one, day_minus_one) %>% + mutate(resid = wtemp_obs - temp_ls, + abs_error = abs(resid), + e5_flag = abs(wtemp_obs - temp_e5_mix)) + +matchups_filt <- matchups %>% + filter(temp_ls_qa < 3, + scene_cloud_cover < 20, + distance > 90, + temp_ls >=0) + +rm(same_day, day_plus_one, day_minus_one) +``` + +## Of those, we have `r nrow(matchups)` coincident observations of RS and *in situ* temp (matchups). + +Let's look at some *inclusive* metrics. General filters here are: + +- +/- 1 day matchups + +- 0 clouds/ice/cloud shadow over the 'deepest point' + +```{r} +ggplot(matchups, aes(x = wtemp_obs, y = temp_ls)) + geom_hex() + + scale_fill_viridis_c(trans = 'log10') + + geom_smooth(method = 'lm') + + stat_poly_eq(formula = y~x, + aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), + parse = TRUE) + + labs(x = 'Observed Temps', y = 'Landsat Temps', title = 'Landsat and Observed') + +matchups %>% summarise(rmse = rmse(wtemp_obs, temp_ls), + mae = mae(wtemp_obs,temp_ls), + smape = smape(wtemp_obs,temp_ls), + bias = bias(wtemp_obs, temp_ls)) %>% + mutate(across(everything(), ~round(.,4))) %>% + kableExtra::kable() %>% + kableExtra::kable_styling() + +## One quick look at Era5, it's not great +ggplot(matchups, aes(x = wtemp_obs, y = temp_e5_mix)) + geom_hex() + + scale_fill_viridis_c(trans = 'log10') + + geom_smooth(method = 'lm', aes(color = 'Best Fit')) + + geom_abline(aes(slope=1, intercept=0,color = '1:1')) + + stat_poly_eq(formula = y~x, + aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), + parse = TRUE) + + labs(x = 'Observed Temps', y = 'Era5 Mixed Layer Temps', 'Era5 and Observed', titel='blue=Best Fit', 'Red=1:1') + + +matchups %>% filter(!is.na(temp_e5_mix)) %>% + summarise(rmse = rmse(wtemp_obs, temp_e5_mix), + mae = mae(wtemp_obs,temp_e5_mix), + smape = smape(wtemp_obs,temp_e5_mix), + bias = bias(wtemp_obs, temp_e5_mix)) %>% + mutate(across(everything(), ~round(.,4))) %>% + kableExtra::kable() %>% + kableExtra::kable_styling() +``` + +## Now the same but with *strict* filters. After qc filters we have `r nrow(matchups_filt)` matchups. + +General filters here are: + +- +/- 1 day matchups + +- 0 clouds/ice/cloud shadow over the 'deepest point' + +- Landsat uncertainty \< 3 degrees + +- Scene cloud cover \< 20% - Distance to shore \> 90m + +```{r} +matchups_filt <- matchups %>% + filter(temp_ls_qa < 3, + scene_cloud_cover < 10, + distance > 200, + temp_ls >=0, + wtemp_obs >=0, + e5_flag < 5, + join != 'day_minus_1' + ) + +plotter <- function(var){ + matchups %>% + mutate(bins= cut_number(get(var), 5)) %>% + ggplot(aes(x = bins, y = abs_error)) + + geom_violin(draw_quantiles = c(.25,.5,.75)) + + geom_hline(aes(yintercept = 0), color = 'red')+ + labs(x = var) +} + +plotter('areakm') +plotter('scene_cloud_cover') +plotter('e5_flag') +plotter('distance') +plotter('temp_ls_qa') + +matchups_filt <- matchups %>% + filter(areakm > .2, + temp_ls_qa < 3, + scene_cloud_cover < 20, + distance > 250, + temp_ls >= 0, + wtemp_obs >=0, + e5_flag < 5, + join != 'day_minus_1' + ) + +ggplot(matchups_filt, aes(x = wtemp_obs, y = temp_ls)) + geom_hex() + + scale_fill_viridis_c(trans = 'log10') + + geom_smooth(method = 'lm') + + stat_poly_eq(formula = y~x, + aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), + parse = TRUE) + + labs(x = 'Observed Temps', y = 'Landsat Temps', title = 'Landsat and Observed') + +matchups_filt %>% summarise(rmse = rmse(wtemp_obs, temp_ls), + mae = mae(wtemp_obs,temp_ls), + smape = smape(wtemp_obs,temp_ls), + bias = bias(wtemp_obs, temp_ls)) %>% + mutate(across(everything(), ~round(.,4))) %>% + kableExtra::kable() + +check <- matchups_filt %>% mutate(se = (wtemp_obs - temp_ls)^2) +``` + +## Finally, just big lakes where we would expect to be most confident (distance to shore \> 1000m) + +```{r} +ggplot(matchups_filt %>% filter(distance > 1000), aes(x = wtemp_obs, y = temp_e5_mix)) + geom_hex() + + scale_fill_viridis_c(trans = 'log10') + + geom_smooth(method = 'lm') + + stat_poly_eq(formula = y~x, + aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), + parse = TRUE) + + labs(x = 'Observed Temps', y = 'Landsat Temps', title = 'Landsat and Observed') + +matchups_filt %>% filter(distance > 1000) %>% + summarise(rmse = rmse(wtemp_obs, temp_ls), + mae = mae(wtemp_obs,temp_ls), + smape = smape(wtemp_obs,temp_ls), + bias = bias(wtemp_obs, temp_ls)) %>% + mutate(across(everything(), ~round(.,4))) %>% + kableExtra::kable() %>% + kableExtra::kable_styling() +``` + +## Sliced and diced a couple ways + +```{r} +lake_data <- read_csv('data/in/lake_metadata.csv') + +matchups <- matchups %>% left_join(lake_data) %>% + mutate(residual = wtemp_obs - temp_ls, + abs_error = abs(residual)) + +ggplot(matchups, aes(x = residual, fill = sat)) + + geom_density(alpha = .4) + + geom_vline(aes(xintercept = 0), color = 'red') + + labs(title = 'Residuals across sensors') + +ggplot(matchups, aes(x = residual, fill = join)) + + geom_density(alpha = .4) + + geom_vline(aes(xintercept = 0), color = 'red') + + labs(title = 'Residuals across sensors') + +matchups %>% + mutate(dist_group = cut_number(distance, 6)) %>% + ggplot(aes(x = dist_group, y = residual)) + + geom_violin(draw_quantiles = .5) + + labs(title = 'Residuals binned by distance to shore') + + +## Spatially +matchups.sf <- matchups %>% st_as_sf(coords = c('lake_lon_deg','lake_lat_deg'), crs = 4326) %>% st_transform(5070) + +usa <- maps::map('usa', plot = F) %>% st_as_sf() %>% st_transform(5070) + +grid <- st_make_grid(usa, cellsize = c(200000,200000), square = F) %>% st_as_sf() %>% mutate(ID = row_number()) + +rmse.sf <- matchups.sf %>% st_join(grid) %>% st_set_geometry(NULL) %>% + group_by(ID) %>% + summarise(rmse = rmse(wtemp_obs, temp_ls), + matchup.count = n()) %>% + inner_join(grid) %>% + st_as_sf() + +ggplot(rmse.sf) + geom_sf(aes(fill = rmse)) + + scale_fill_viridis_c() + + geom_sf(data = usa, fill = 'transparent', color = 'black') + + theme_minimal() + + labs(title = 'Spatial Error Distribution') + +ggplot(rmse.sf) + geom_sf(aes(fill = matchup.count)) + + scale_fill_viridis_c(trans='log10') + + geom_sf(data = usa, fill = 'transparent', color = 'black') + + theme_minimal() + + labs(title = 'Matchup Count') + +``` + +## Take a look at within scene variance. +```{r} +summary_in <- matchups_filt + +scene_sd <- summary_in %>% group_by(LandsatID) %>% + summarise(count = n(), + resid_sd = sd(resid)) %>% + filter(count >5) + +overall_sd <- summary_in %>% filter(LandsatID %in% scene_sd$LandsatID) %>% + summarise(overall = sd(resid)) %>% .[[1]] + +ggplot(scene_sd, aes(x = resid_sd)) + geom_histogram() + geom_vline(aes(xintercept = overall_sd, color = 'Overall residual SD')) + +scene_counts <- dplyr::count(summary_in, LandsatID) %>% filter(n > 5) + +scene_bias <- summary_in %>% + filter(LandsatID %in% scene_counts$LandsatID) %>% + group_by(LandsatID) %>% + summarise(bias = bias(wtemp_obs, temp_ls)) + +scene_stats <- summary_in %>% + filter(LandsatID %in% scene_counts$LandsatID) %>% + left_join(scene_bias) %>% + mutate(temp_no_bias = temp_ls+bias) %>% + group_by(LandsatID) %>% + summarise(rmse_bias_removed = rmse(wtemp_obs, temp_no_bias), + scene_bias = mean(bias)) + +overall_bias = summary_in %>% filter(LandsatID %in% scene_counts$LandsatID) %>% + summarise(bias = bias(wtemp_obs, temp_ls)) %>% + .[[1]] + +overall_rmse = summary_in %>% filter(LandsatID %in% scene_counts$LandsatID) %>% + mutate(temp_ls = temp_ls + overall_bias) %>% + summarise(rmse = rmse(wtemp_obs,temp_ls)) +rmses <- tibble(overall_rmse_bias_removed = overall_rmse[[1]], median_scene_rmse_bias_removed = median(scene_stats$rmse_bias_removed)) %>% + pivot_longer(everything(), names_to = "RMSE_Summaries") + +ggplot(scene_stats, aes(x = rmse_bias_removed)) + geom_histogram() + + geom_vline(data = rmses, aes(xintercept = value, color = RMSE_Summaries)) + + labs(x = "Distribution of image specific rmse after removing image bias") +``` + + +```{r} +mendota <- read_csv('data/in/Mendota_Era5Land_1984_2021.csv') +mendotaObs <- obs %>% filter(site_id == "nhdhr_143249470") + +mendota <- mendota %>% mutate(lake_mix_layer_temperature = lake_mix_layer_temperature - 273.15, + date = as.Date(date)) %>% + inner_join(mendotaObs) + +library(plotly) + +ggplotly( + ggplot(mendota, aes(x=date)) + + geom_point(aes(y = lake_mix_layer_temperature, color='Era5')) + + geom_point(aes(y=wtemp_obs, color = 'Obs')) + + geom_line(aes(y = lake_mix_layer_temperature, color='Era5')) + + geom_line(aes(y=wtemp_obs, color = 'Obs'))) + +ggplot(mendota, aes(x=lake_mix_layer_temperature)) + geom_histogram() + +ggplot + + +mendota %>% rename(temp_e5_mix = lake_mix_layer_temperature) %>% + summarise(rmse = rmse(wtemp_obs, temp_e5_mix), + mae = mae(wtemp_obs,temp_e5_mix), + smape = smape(wtemp_obs,temp_e5_mix), + bias = bias(wtemp_obs, temp_e5_mix)) %>% + mutate(across(everything(), ~round(.,4))) %>% + kableExtra::kable() %>% + kableExtra::kable_styling() + +mendota %>% rename(temp_e5_mix = lake_mix_layer_temperature) %>% +ggplot(.,aes(x = wtemp_obs, y = temp_e5_mix)) + geom_hex() + + scale_fill_viridis_c(trans = 'log10') + + geom_smooth(method = 'lm', aes(color = 'Best Fit')) + + geom_abline(aes(slope=1, intercept=0,color = '1:1')) + + stat_poly_eq(formula = y~x, + aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), + parse = TRUE) + + labs(x = 'Observed Temps', y = 'Era5 Mixed Layer Temps', 'Era5 and Observed', titel='blue=Best Fit', 'Red=1:1') +``` + diff --git a/02_raw_trends.R b/02_raw_trends.R new file mode 100755 index 0000000..cea5a1d --- /dev/null +++ b/02_raw_trends.R @@ -0,0 +1,135 @@ +#### This script is an initial analysis of lake surface temperature trends +#### using temperatures from the Landsat. ITS VERY PRELIMINARY and hacked together using +#### two different EE pulls. +library(tidyverse) +library(feather) +library(lubridate) +library(trend) +library(sf) +library(scales) + +l7l8 <- read_feather('data/out/landsat_temps.feather') +l7l8 <- l7l8 %>% + select(areakm, sat, date, distance, site_id, temp_ls, temp_ls_qa) %>% + filter(sat=='LANDSAT_7') + +ggplot(l7l8, aes(x=temp_ls)) + geom_histogram() + +l8l9 <- read_csv('/Users/stopp/Documents/repos/landsat-lake-temp-pull/data/out/L8_L9_cb_temps_munged.csv') +l8l9 <- l8l9 %>% + select(areakm, sat = SPACECRAFT_ID, date, distance, site_id, temp_ls = ls_temp, temp_ls_qa = ls_temp_qa) %>% + mutate(sat=as.character(sat)) + +ggplot(l8l9, aes(x=temp_ls)) + geom_histogram() + +ls_temps <- bind_rows(l7l8,l8l9) %>% + mutate(site_id = factor(site_id)) + +rm(l7l8,l8l9) + +seasonally_aggregated <- ls_temps %>% + mutate(month = month(date), + year = year(date), + season = case_when((month %in% c(6,7,8)) ~ 'JJA', + (month %in% c(9,10,11)) ~ "SON", + (month %in% c(12,1,2)) ~ 'DJF', + (month %in% c(3,4,5)) ~'MAM'), + season=factor(season)) %>% + group_by(site_id, year, season, distance, areakm) %>% + summarize(temp_ls = mean(temp_ls), + count = n()) + +seasonally_aggregated <- seasonally_aggregated %>% filter(count > 1) + +counts <- seasonally_aggregated %>% + group_by(site_id, season) %>% + mutate(count = n()) + +seasonally_aggregated <- seasonally_aggregated[counts$count > 20,] + +trends <- seasonally_aggregated %>% + group_by(site_id, season) %>% + arrange(year) %>% + nest() %>% + mutate(mk = purrr::map(data, ~sens.slope(.$temp_ls)), + sen.slope = purrr::map_dbl(mk, 'estimates'), + p.value = purrr::map_dbl(mk, 'p.value')) %>% + select(site_id, season, sen.slope, p.value) + +ggplot(trends, aes(x=sen.slope, fill = season)) + + geom_density(alpha=.4) + + xlim(-.3,.4) + + geom_vline(aes(xintercept=0), color = 'red') + + scale_fill_viridis_d() + +### Look at spatial trends +lakes_sf <- read_csv('data/in/lake_metadata_20211217.csv') %>% + st_as_sf(coords = c('lake_lon_deg','lake_lat_deg'), crs = 4326) %>% + select(site_id, elevation_m, area_m2) %>% + st_transform(5070) + +trends_sf <- lakes_sf %>% inner_join(trends) + +usa <- maps::map('usa', plot = F) %>% st_as_sf() %>% st_transform(5070) + +grid <- st_make_grid(usa, cellsize = c(75000,75000), square = F) %>% st_as_sf() %>% mutate(ID = row_number()) + +grid_means <- grid %>% st_join(trends_sf %>% filter(abs(sen.slope) < .4) %>%st_transform(st_crs(grid)), left = F) %>% + st_set_geometry(NULL) %>% group_by(ID, season) %>% + summarize(mean_trend = mean(sen.slope)) + +means_sf <- grid %>% inner_join(grid_means) + +ggplot() + + geom_sf(data = usa) + + geom_sf(data = means_sf, aes(fill = mean_trend)) + + scale_fill_gradient2(low = muted('blue'), high=muted('red'), 'Mean Trend (°C/yr)') + + ggthemes::theme_map(base_size = 11) + + theme(legend.position = 'bottom') + + facet_wrap(~season) + + ggtitle("Mean Trend in Lake Temperature Since 1999") + +### Binned by Elevation +trends_sf %>% + st_set_geometry(NULL) %>% + mutate(elevation_bin = cut_interval(elevation_m, 5)) %>% + ggplot(aes(x=elevation_bin,y = sen.slope,fill=elevation_bin)) + + coord_cartesian(ylim=c(-.15,.25)) + + geom_violin() + + geom_boxplot(width=.2) + + scale_fill_viridis_d(option='plasma', alpha=.3) + + theme(axis.text.x = element_text(angle=45,hjust=1,vjust=1), + legend.position = 'none') + + labs(x = 'Elevation (m)', y = 'Trend (°C/yr)') + +### Binnned by lake size +trends_sf %>% + st_set_geometry(NULL) %>% + mutate(size_bin = cut_number(area_m2/1e6, 10)) %>% + ggplot(aes(x=size_bin,y = sen.slope,fill=size_bin)) + + coord_cartesian(ylim=c(-.15,.25)) + + geom_violin() + + geom_boxplot(width=.2) + + scale_fill_viridis_d(option='plasma', alpha=.3) + + theme(axis.text.x = element_text(angle=45,hjust=1,vjust=1), + legend.position = 'none') + + labs(x = 'Lake Size Bin (km2)', y = 'Trend (°C/yr)') + +### Look at actual timeseries to see if they look really goofy +seasonal_summary <- seasonally_aggregated %>% + ungroup() %>% + mutate(area_bin = cut_number(areakm, 10)) %>% + group_by(season, site_id, area_bin) %>% + mutate(temp_scaled = scale(temp_ls)) %>% + ungroup() %>% + group_by(season, year, area_bin) %>% + summarise(mean_temp= mean(temp_scaled), + sd=sd(temp_scaled)) + +### +ggplot(seasonal_summary, aes(x=year, y = mean_temp, color = area_bin, fill = area_bin)) + + geom_ribbon(aes(ymin = mean_temp-sd, ymax=mean_temp+sd), alpha = .3) + + geom_line() + + facet_wrap(~season) + + diff --git a/GEE_pull_functions.py b/GEE_pull_functions.py new file mode 100755 index 0000000..0dc2ad9 --- /dev/null +++ b/GEE_pull_functions.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python2 +# -*- coding: utf-8 -*- +""" +Google Earth Engine Reflectance Pull Functions + +@author: simontopp +""" +import ee +import time +import os + +## Recreate the fmask based on Collection 2 Pixel QA band +def AddFmask(image): + qa = image.select('pixel_qa') + water = qa.bitwiseAnd(1 << 7) + cloud = qa.bitwiseAnd(1 << 1).Or(qa.bitwiseAnd(1 << 2)).Or(qa.bitwiseAnd(1 << 3)) + snow = qa.bitwiseAnd(1 << 5) + cloudshadow = qa.bitwiseAnd(1 << 4) + + fmask = (water.gt(0).rename('fmask') + .where(snow.gt(0), ee.Image(3)) + .where(cloudshadow.gt(0), ee.Image(2)) + .where(cloud.gt(0), ee.Image(4)) + .updateMask(qa.gte(0))) + ## mask the fmask so that it has the same footprint as the quality (BQA) band + return image.addBands(fmask) + + +## Buffer the lake sites either 120 minutes or the distance to shore, whichever +## is smaller +def dpBuff(i): + dist = i.get('distance') + buffdist = ee.Number(dist).min(120) + return i.buffer(buffdist) + + + +## Set up the reflectance pull +def RefPull(image): + f = AddFmask(image).select('fmask') + water = f.eq(1).rename('water') + clouds = f.gte(2).rename('clouds') + + era5match = (era5.filterDate(ee.Date(image.get('system:time_start')).update(None, None, None, None, 0,0))) + + era5out = (ee.Algorithms.If(era5match.size(), + ee.Image(era5match.first()).select('lake_mix_layer_temperature', 'lake_total_layer_temperature'), + ee.Image(0).rename('lake_mix_layer_temperature').addBands(ee.Image(0).rename('lake_total_layer_temperature')), + ) + ) + + pixOut = (image.select('temp', 'temp_qa') + .updateMask(water) + .addBands(era5out) + .addBands(clouds) + .addBands(water)) + + combinedReducer = (ee.Reducer.median().unweighted() + .forEachBand(pixOut.select('temp', 'temp_qa', 'lake_mix_layer_temperature', 'lake_total_layer_temperature')) + .combine(ee.Reducer.mean().unweighted().forEachBand(pixOut.select('clouds')), 'cScore_', False) + .combine(ee.Reducer.count().unweighted().forEachBand(pixOut.select('water')), 'pCount_', False)) + + def copyMeta(i): + return i.copyProperties(image, ["CLOUD_COVER", 'SPACECRAFT_ID', 'system:index', 'DATE_ACQUIRED']) + + ## Remove geometries + def removeGeo(i): + return i.setGeometry(None) + + # Collect reflectance values, cloud score, and pixel counts + lsout = pixOut.reduceRegions(lakes, combinedReducer, 30) + + out = lsout.map(removeGeo) + out = out.map(copyMeta) + + return out + + +## Function for limiting the max number of tasks sent to +## earth engine at one time to avoid time out errors + +def maximum_no_of_tasks(MaxNActive, waitingPeriod): + ## maintain a maximum number of active tasks + time.sleep(10) + ## initialize submitting jobs + ts = list(ee.batch.Task.list()) + + NActive = 0 + for task in ts: + if ('RUNNING' in str(task) or 'READY' in str(task)): + NActive += 1 + ## wait if the number of current active tasks reach the maximum number + ## defined in MaxNActive + while (NActive >= MaxNActive): + time.sleep(waitingPeriod) + ts = list(ee.batch.Task.list()) + NActive = 0 + for task in ts: + if ('RUNNING' in str(task) or 'READY' in str(task)): + NActive += 1 + return () + diff --git a/LakeExport.py b/LakeExport.py new file mode 100755 index 0000000..2fed694 --- /dev/null +++ b/LakeExport.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python2 +# -*- coding: utf-8 -*- +""" +@author: simontopp +""" + +import time +import ee +import os + +## Initialize Earth Engine +ee.Initialize() + +## Source necessary functions. We do this instead of 'import' because of EE quirk. +exec(open('GEE_pull_functions.py').read()) + +## Bring in EE Assets +## Deepest point (Chebyshev center) for CONUS PGDL Lakes +## DP Code available at https://zenodo.org/record/4136755#.X5d54pNKgUE and +## https://code.earthengine.google.com/8dac409b220bdfb051bb469bc5b3c708 +dp = (ee.FeatureCollection('users/sntopp/USGS/PGDL_lakes_deepest_point') + .filterMetadata('distance', "greater_than", 60)) + +## CONUS Boundary +us = ee.FeatureCollection("USDOS/LSIB_SIMPLE/2017")\ + .filterMetadata('country_na', 'equals', 'United States') + +## WRS Tiles in descending (daytime) mode for CONUS +wrs = ee.FeatureCollection('users/sntopp/wrs2_asc_desc')\ + .filterBounds(us)\ + .filterMetadata('MODE', 'equals', 'D') + +## Run everything by path/row to speed up computation in EE +pr = wrs.aggregate_array('PR').getInfo() + +## Bring in temp data +l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2") +l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2") +era5 = ee.ImageCollection("ECMWF/ERA5_LAND/HOURLY") + +## Standardize band names between the various collections +bn8 = ['ST_B10', 'ST_QA', 'QA_PIXEL'] +bn57 = ['ST_B6', 'ST_QA', 'QA_PIXEL'] +bns = ['temp','temp_qa','pixel_qa'] + +ls7 = l7.select(bn57, bns) +ls8 = l8.select(bn8, bns) + +## Do coarse cloud filtering and aggregate +## them into one image collection +ls = ee.ImageCollection(ls7.merge(ls8))\ + .filter(ee.Filter.lt('CLOUD_COVER', 50))\ + .filterBounds(us) + + +## Set up a counter and a list to keep track of what's been done already +if not os.path.isfile('log.txt'): + open('log.txt', 'x') + +done = open('log.txt', 'r') +done = [x.strip() for x in done.readlines()] +done = list(map(int, done)) +counter = len(done) + +## In case something goofed, you should be able to just +## re-run this chunk with the following line filtering out +## what's already run. +pr = [i for i in pr if i not in done] + +for tiles in pr: + tile = wrs.filterMetadata('PR', 'equals', tiles) + + lakes = dp.filterBounds(tile.geometry())\ + .map(dpBuff) + + stack = ls.filterBounds(tile.geometry().centroid()) + out = stack.map(RefPull).flatten().filterMetadata('cScore_clouds','less_than',.5) + dataOut = ee.batch.Export.table.toDrive(collection = out,\ + description = str(tiles),\ + folder = 'EE_TempPull',\ + fileFormat = 'csv',\ + selectors = ["system:index", "areakm", "cScore_clouds", "CLOUD_COVER", 'SPACECRAFT_ID', 'DATE_ACQUIRED', "distance", "lake_mix_layer_temperature", "lake_total_layer_temperature", "pCount_water", "site_id", "temp", "temp_qa"]) + + ## Check how many existing tasks are running and take a break if it's >15 + maximum_no_of_tasks(25, 120) + ## Send next task. + dataOut.start() + counter = counter + 1 + done.append(tiles) + f = open('log.txt', 'a') + f.write(str(tiles) + '\n') + f.close() + + print('done_' + str(counter) + '_' + str(tiles)) + + + diff --git a/NHD_Deepest_Point_Pull.py b/NHD_Deepest_Point_Pull.py new file mode 100755 index 0000000..c8fa1cf --- /dev/null +++ b/NHD_Deepest_Point_Pull.py @@ -0,0 +1,134 @@ +import ee +import math +import time +ee.Initialize() + +## Deepest point calculation adapted from Xiao Yang +### https: // doi.org / 10.5281 / zenodo.4136754 +### Functions +def get_scale(polygon): + radius = polygon.get('areasqkm') + radius = ee.Number(radius).divide(math.pi).sqrt().multiply(1000) + scale = radius.divide(20) + #scale = ee.Algorithms.If(ee.Number(scale).lte(10),10,scale) + scale = ee.Algorithms.If(ee.Number(scale).gte(500),500,scale) + return ee.Number(scale) + + +def getUTMProj(lon, lat): + # see + # https: // apollomapping.com / blog / gtm - finding - a - utm - zone - number - easily and + # https: // sis.apache.org / faq.html + utmCode = ee.Number(lon).add(180).divide(6).ceil().int() + output = ee.Algorithms.If(ee.Number(lat).gte(0), + ee.String('EPSG:326').cat(utmCode.format('%02d')), + ee.String('EPSG:327').cat(utmCode.format('%02d'))) + return output + +def GetLakeCenters(polygon): + + ## Calculate both the deepest point an centroid + ## for the inpout polygon ( or multipolygon) + ## for each input, export geometries for both centroid and deepest point and their distance to shore. + scale = get_scale(polygon) + geo = polygon.geometry() + ct = geo.centroid(scale) + utmCode = getUTMProj(ct.coordinates().getNumber(0), ct.coordinates().getNumber(1)) + + polygonImg = ee.Image.constant(1).toByte().paint(ee.FeatureCollection(ee.Feature(geo, None)), 0) + + dist = polygonImg.fastDistanceTransform(2056).updateMask(polygonImg.Not()).sqrt().reproject(utmCode, None, scale).multiply(scale) # convert unit from pixel to meter + + # dist = (polygonImg.fastDistanceTransform(2056).updateMask(polygonImg.not ()) + # .sqrt().reproject('EPSG:4326', None, scale).multiply(scale) # convert unit from pixel to meter + + maxDistance = (dist.reduceRegion( + reducer=ee.Reducer.max(), + geometry=geo, + scale=scale, + bestEffort=True, + tileScale=1 + ).getNumber('distance').int16()) + + outputDp = (ee.Feature(dist.addBands(ee.Image.pixelLonLat()).updateMask(dist.gte(maxDistance)) + .sample(geo, scale).first())) + + dp = ee.Geometry.Point([outputDp.get('longitude'), outputDp.get('latitude')]) + + regions = ee.FeatureCollection([ee.Feature(dp, {'type': 'dp'})]) + + output = dist.sampleRegions( + collection=regions, + properties=['type'], + scale=scale, + tileScale=1, + geometries=True) + + return (ee.Feature(output.first()).copyProperties(polygon,['permanent','areasqkm'])) + +def buff_dp(dp): + return dp.buffer(dp.getNumber('distance')) + + +def maximum_no_of_tasks(MaxNActive, waitingPeriod): + ## maintain a maximum number of active tasks + time.sleep(10) + ## initialize submitting jobs + ts = list(ee.batch.Task.list()) + + NActive = 0 + for task in ts: + if ('RUNNING' in str(task) or 'READY' in str(task)): + NActive += 1 + ## wait if the number of current active tasks reach the maximum number + ## defined in MaxNActive + while (NActive >= MaxNActive): + time.sleep(waitingPeriod) + ts = list(ee.batch.Task.list()) + NActive = 0 + for task in ts: + if ('RUNNING' in str(task) or 'READY' in str(task)): + NActive += 1 + return () + +def calc_area(feature): + return(feature.set('area_calc',feature.area().divide(1e6))) + +assets_done = ee.data.listAssets({'parent': 'projects/earthengine-legacy/assets/users/sntopp/NHD/DeepestPoint'})['assets'] +ids_done = [i['id'].split('/')[-1] for i in assets_done] +assets_parent = ee.data.listAssets({'parent': 'projects/earthengine-legacy/assets/projects/sat-io/open-datasets/NHD'})['assets'] +assets_parent = [i for i in assets_parent if i['id'].split('/')[-1] not in ids_done] +#assets_parent = [i for i in assets_parent if i['id'].split('/')[-1] not in ['NHD_MO','NHD_TX','NHD_AK']] + + +for i in range(len(assets_parent)): + state_asset = assets_parent[i]['id'] + assets_state = (ee.FeatureCollection(f"{state_asset}/NHDWaterbody") + .filter(ee.Filter.gte('areasqkm',0.001)) + .filter(ee.Filter.lte('areasqkm',5000)) #Remove Great Lakes + .filter(ee.Filter.inList('ftype',[361,436,390]))) + + ## Added to trouble shoot NM where some of the NHD areas are don't match the actual polygon areas + assets_state=ee.FeatureCollection(assets_state.map(calc_area)).filter(ee.Filter.gte('area_calc',0.001)) + + dp = assets_state.map(GetLakeCenters) + + dataOut = ee.batch.Export.table.toAsset(collection=dp,description=state_asset.split('/')[-1],assetId=f"projects/earthengine-legacy/assets/users/sntopp/NHD/DeepestPoint/{state_asset.split('/')[-1]}") + + ## Check how many existing tasks are running and take a break if it's >15 + maximum_no_of_tasks(15, 240) + ## Send next task. + dataOut.start() + print(state_asset.split('/')[-1]) + +#f"projects/earthengine-legacy/assets/users/sntopp/NHD/{state_asset.split('/')[-1]}/NHDDeepestPoint" + + +state_dps = ee.data.listAssets({'parent': 'projects/earthengine-legacy/assets/users/sntopp/NHD/DeepestPoint'})['assets'] + +for i in state_dps: + dps = ee.FeatureCollection(i['id']) + id = i['id'].split('/')[-1] + dpsOut = ee.batch.Export.table.toDrive(dps,id,'EE_DP_Exports',id) + maximum_no_of_tasks(15, 60) + dpsOut.start() \ No newline at end of file diff --git a/NHD_HR_Lake_Pull.R b/NHD_HR_Lake_Pull.R new file mode 100755 index 0000000..e364dc7 --- /dev/null +++ b/NHD_HR_Lake_Pull.R @@ -0,0 +1,105 @@ +## Mess around with tyler kings stuff +library(tidyverse) +library(sf) +library(mapview) +library(feather) +mapviewOptions(fgb=FALSE) + +tk_resolvable <- vroom::vroom('/Users/stopp/Downloads/NHDWaterbody_resolvable.csv') + + +### Read in the exports from EE and do a little munging +files <- list.files('../../../My Drive/EE_DP_Exports', full.names = T) + +options(digits=15) +dp_national <- read_csv(files[1]) %>% + mutate(coords = map(.geo, ~as.numeric(unlist(regmatches(.x,gregexpr("-?[[:digit:]]+\\.*-?[[:digit:]]*",.x))))), + long = map_dbl(coords, 1), + lat = map_dbl(coords,2), + distance = round(distance,3), + areasqkm = round(areasqkm, 4)) %>% + select(-c(coords, .geo,`system:index`)) + +for(i in files[2:length(files)]){ + state <- read_csv(i,show_col_types = F) %>% + mutate(coords = map(.geo, ~as.numeric(unlist(regmatches(.x,gregexpr("-?[[:digit:]]+\\.*-?[[:digit:]]*",.x))))), + long = map_dbl(coords, 1), + lat = map_dbl(coords,2), + distance = round(distance,3), + areasqkm = round(areasqkm, 4)) %>% + select(-c(coords, .geo,`system:index`)) + + dp_national <- dp_national %>% + bind_rows(state) %>% + distinct(permanent,.keep_all=T) +} + +dp_national <- dp_national %>% + select(areasqkm:lat) + +#write_feather(dp_national, 'data/out/nhd_hr_1ha_deepest_point_noTX.feather') +dp_national <- read_feather('data/out/nhd_hr_1ha_deepest_point_noTX.feather') +tk_resolvable <- vroom::vroom('/Users/stopp/Downloads/NHDWaterbody_resolvable.csv') + + +match_ups <- dp_national %>% + inner_join(.,tk_resolvable, by = c('permanent'='permanent_identifier')) + +rm(dp_national, tk_resolvable) + +match_ups <- match_ups %>% mutate(resolvable = ifelse(resolvable == 1, 'Konrad resolvable', 'Konrad not resolvable')) + +thresholds <- tibble(distance = c(20,30,40,50,60,70,80,90)) + +tk_resolvable <- match_ups %>% filter(resolvable == 'Konrad resolvable') + +thresholds$counts <- colSums(outer(tk_resolvable$distance, setNames(thresholds$distance, thresholds$distance), "<")) +thresholds$labels <- paste0(thresholds$distance,' meters (n=',thresholds$counts,')') + +tk_resolvable %>% + ggplot(.,aes(x=distance)) + stat_bin(aes(y=cumsum(..count..)),geom="step") + + geom_vline(data= thresholds, aes(xintercept=distance, color = labels)) + + scale_color_viridis_d() + + scale_x_log10() + + theme_bw() + + facet_wrap(~resolvable, scales = 'free') + + labs(x = 'Maximum Distance from Shore (Meters)', + y= 'Cumulative Lake Count', + color='Res. Threshold (n disagreement)') + + +thresholds <- tibble(distance = c(20,30,40,50,60,70,80,90)) + +tk_non_resolvable <- match_ups %>% filter(resolvable == 'Konrad not resolvable') + +thresholds$counts <- colSums(outer(tk_non_resolvable$distance, setNames(thresholds$distance, thresholds$distance), ">")) +thresholds$labels <- paste0(thresholds$distance,' meters (n=',thresholds$counts,')') + +tk_non_resolvable %>% + ggplot(.,aes(x=distance)) + stat_bin(aes(y=cumsum(..count..)),geom="step") + + geom_vline(data= thresholds, aes(xintercept=distance, color = labels)) + + scale_color_viridis_d() + + scale_x_log10() + + theme_bw() + + facet_wrap(~resolvable, scales = 'free') + + labs(x = 'Maximum Distance from Shore (Meters)', + y= 'Cumulative Lake Count', + color='Res. Threshold (n disagreement)') + + + +tk30_res <- tk_resolvable %>% filter(distance < 30) +tk30_not_res <- tk_non_resolvable %>% filter(distance >30) + +ggplot(tk30_res, aes(x = areasqkm)) + + geom_histogram() + +ggplot(tk30_not_res, aes(x= areasqkm)) + + geom_histogram() + + scale_x_log10() + + +tk30_res %>% slice_sample(prop=.2) %>% + st_as_sf(., coords=c('long','lat'), crs=4326) %>% + mapview(zcol = 'distance') + diff --git a/README.md b/README.md old mode 100644 new mode 100755