From 5b573d2ffc0fdb7b637cd07168da818ed9f93d06 Mon Sep 17 00:00:00 2001 From: Simon Topp Date: Fri, 2 Jul 2021 16:08:05 -0400 Subject: [PATCH 01/15] Initial code for pulling lake temperatures from Landsat and Era5 --- GEE_pull_functions.py | 106 ++++++++++++++++++++++++++++++++++++++++++ LakeExport.py | 92 ++++++++++++++++++++++++++++++++++++ 2 files changed, 198 insertions(+) create mode 100644 GEE_pull_functions.py create mode 100644 LakeExport.py diff --git a/GEE_pull_functions.py b/GEE_pull_functions.py new file mode 100644 index 0000000..120a8e7 --- /dev/null +++ b/GEE_pull_functions.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python2 +# -*- coding: utf-8 -*- +""" +Google Earth Engine Reflectance Pull Functions +Created on Mon Apr 9 14:24:13 2018 +@author: simontopp +""" +import ee +import time +import os + +## Recreate the fmask based on Collection 2 Pixel QA band + +def AddFmask(image): + qa = image.select('pixel_qa') + water = qa.bitwiseAnd(1 << 7) + cloud = qa.bitwiseAnd(1 << 1).Or(qa.bitwiseAnd(1 << 2)).Or(qa.bitwiseAnd(1 << 3)) + snow = qa.bitwiseAnd(1 << 5) + cloudshadow = qa.bitwiseAnd(1 << 4) + + fmask = (water.gt(0).rename('fmask') + .where(snow.gt(0), ee.Image(3)) + .where(cloudshadow.gt(0), ee.Image(2)) + .where(cloud.gt(0), ee.Image(4)) + .updateMask(qa.gte(0))) + # mask the fmask so that it has the same footprint as the quality (BQA) band + return image.addBands(fmask) + + +## Calculuate hillshadow to correct DWSE +def CalcHillShadows(image, geo): + MergedDEM = ee.Image("users/eeProject/MERIT").clip(geo.buffer(3000)) + hillShadow = (ee.Terrain.hillShadow(MergedDEM, ee.Number(image.get('SOLAR_AZIMUTH_ANGLE')), + ee.Number(90).subtract(image.get('SOLAR_ZENITH_ANGLE')), 30).rename( + ['hillShadow'])) + return hillShadow + + +## Buffer the lake sites +def dpBuff(i): + dist = i.get('distance') + buffdist = ee.Number(dist).min(120) + return i.buffer(buffdist) + + +## Remove geometries +def removeGeo(i): + return i.setGeometry(None) + + +## Create water mask and extract lake medians + +## Set up the reflectance pull +def RefPull(image): + f = AddFmask(image).select('fmask') + water = f.eq(1).rename('water') + clouds = f.gte(2).rename('clouds') + #hs = CalcHillShadows(image, tile.geometry()).select('hillShadow') + #era5match = (ee.Image(era5.filterDate(ee.Date(image.get('system:time_start')).update(minute = 0, second = 0)) + # .first()).select('lake_mix_layer_temperature', 'lake_total_layer_temperature')) + era5match = ee.Image(0).rename('lake_mix_layer_temperature').addBands(ee.Image(1).rename('lake_total_layer_temperature')) + pixOut = (image + .addBands(era5match) + .updateMask(water) + .addBands(clouds) + .addBands(water)) + + combinedReducer = (ee.Reducer.median().unweighted() + .forEachBand(pixOut.select('temp', 'temp_qa', 'lake_mix_layer_temperature', 'lake_total_layer_temperature')) + .combine(ee.Reducer.mean().unweighted().forEachBand(pixOut.select('clouds')), 'cScore_', False) + .combine(ee.Reducer.count().unweighted().forEachBand(pixOut.select('water')), 'pCount_', False)) + + + # Collect median reflectance and occurance values + # Make a cloud score, and get the water pixel count + lsout = pixOut.reduceRegions(lakes, combinedReducer, 30) + + out = lsout.map(removeGeo) + + return out + + +##Function for limiting the max number of tasks sent to +# earth engine at one time to avoid time out errors + +def maximum_no_of_tasks(MaxNActive, waitingPeriod): + ##maintain a maximum number of active tasks + time.sleep(10) + ## initialize submitting jobs + ts = list(ee.batch.Task.list()) + + NActive = 0 + for task in ts: + if ('RUNNING' in str(task) or 'READY' in str(task)): + NActive += 1 + ## wait if the number of current active tasks reach the maximum number + ## defined in MaxNActive + while (NActive >= MaxNActive): + time.sleep(waitingPeriod) # if reach or over maximum no. of active tasks, wait for 2min and check again + ts = list(ee.batch.Task.list()) + NActive = 0 + for task in ts: + if ('RUNNING' in str(task) or 'READY' in str(task)): + NActive += 1 + return () + diff --git a/LakeExport.py b/LakeExport.py new file mode 100644 index 0000000..67e3434 --- /dev/null +++ b/LakeExport.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python2 +# -*- coding: utf-8 -*- +""" +Created on Tue Feb 27 12:07:02 2018 + +@author: simontopp +""" +#%% +import time +import ee +import os +#import pandas as pd +#import feather +import GEE_pull_functions as f +ee.Initialize() + +#Source necessary functions. +exec(open('GEE_pull_functions.py').read()) + +#water = ee.Image("JRC/GSW1_1/GlobalSurfaceWater").select('occurrence').gt(80) + +## Bring in EE Assets +# Deepest point for CONUS Hydrolakes from Xiao Yang +# Code available https://zenodo.org/record/4136755#.X5d54pNKgUE +dp = (ee.FeatureCollection('users/sntopp/USGS/PGDL_lakes_deepest_point') + .filterMetadata('distance', "greater_than", 60)) + +#CONUS Boundary +us = ee.FeatureCollection("USDOS/LSIB_SIMPLE/2017")\ + .filterMetadata('country_na', 'equals', 'United States') + +## WRS Tiles in descending (daytime) mode for CONUS +wrs = ee.FeatureCollection('users/sntopp/wrs2_asc_desc')\ + .filterBounds(us)\ + .filterMetadata('MODE', 'equals', 'D') + +pr = wrs.aggregate_array('PR').getInfo() + +l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2") +l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2") +era5 = ee.ImageCollection("ECMWF/ERA5_LAND/HOURLY") + +#Standardize band names between the various collections and aggregate +#them into one image collection +bn8 = ['ST_B10', 'ST_QA', 'QA_PIXEL'] +bn57 = ['ST_B6', 'ST_QA', 'QA_PIXEL'] +bns = ['temp','temp_qa','pixel_qa'] + +ls7 = l7.select(bn57, bns) +ls8 = l8.select(bn8, bns) + +ls = ee.ImageCollection(ls7.merge(ls8))\ + .filter(ee.Filter.lt('CLOUD_COVER', 50))\ + .filterBounds(us) + + +## Set up a counter and a list to keep track of what's been done already +counter = 0 +done = [] + +#%% + +## In case something goofed, you should be able to just +## re-run this chunk with the following line filtering out +## what's already run. +pr = [i for i in pr if i not in done] + +for tiles in pr[0:3]: + tile = wrs.filterMetadata('PR', 'equals', tiles) + # For some reason we need to cast this to a list and back to a + # feature collection + lakes = dp.filterBounds(tile.geometry())\ + .map(dpBuff) + + #lakes = ee.FeatureCollection(lakes.toList(10000)) + stack = ls.filterBounds(tile.geometry().centroid()) + out = stack.map(RefPull).flatten().filterMetadata('cScore_clouds','less_than',.5) + dataOut = ee.batch.Export.table.toDrive(collection = out,\ + description = str(tiles),\ + folder = 'EE_TempPull',\ + fileFormat = 'csv')#,\ + #selectors = []) + #Check how many existing tasks are running and take a break if it's >15 + maximum_no_of_tasks(25, 120) + #Send next task. + dataOut.start() + counter = counter + 1 + done.append(tiles) + print('done_' + str(counter) + '_' + str(tiles)) + +#%% + From 1682292ac4fb45f47b52359bcab827b2c81321fb Mon Sep 17 00:00:00 2001 From: Simon Topp Date: Mon, 5 Jul 2021 14:10:52 -0400 Subject: [PATCH 02/15] Fixed Era5 pull --- GEE_pull_functions.py | 24 ++++++++++++++++-------- LakeExport.py | 15 +++++++++++---- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/GEE_pull_functions.py b/GEE_pull_functions.py index 120a8e7..3f1a3cf 100644 --- a/GEE_pull_functions.py +++ b/GEE_pull_functions.py @@ -47,8 +47,7 @@ def dpBuff(i): def removeGeo(i): return i.setGeometry(None) - -## Create water mask and extract lake medians + ## Create water mask and extract lake medians ## Set up the reflectance pull def RefPull(image): @@ -56,12 +55,17 @@ def RefPull(image): water = f.eq(1).rename('water') clouds = f.gte(2).rename('clouds') #hs = CalcHillShadows(image, tile.geometry()).select('hillShadow') - #era5match = (ee.Image(era5.filterDate(ee.Date(image.get('system:time_start')).update(minute = 0, second = 0)) - # .first()).select('lake_mix_layer_temperature', 'lake_total_layer_temperature')) - era5match = ee.Image(0).rename('lake_mix_layer_temperature').addBands(ee.Image(1).rename('lake_total_layer_temperature')) - pixOut = (image - .addBands(era5match) + era5match = (era5.filterDate(ee.Date(image.get('system:time_start')).update(None, None, None, None, 0,0))) + + era5out = (ee.Algorithms.If(era5match.size(), + ee.Image(era5match.first()).select('lake_mix_layer_temperature', 'lake_total_layer_temperature'), + ee.Image(0).rename('lake_mix_layer_temperature').addBands(ee.Image(0).rename('lake_total_layer_temperature')), + ) + ) + + pixOut = (image.select('temp', 'temp_qa') .updateMask(water) + .addBands(era5out) .addBands(clouds) .addBands(water)) @@ -70,12 +74,16 @@ def RefPull(image): .combine(ee.Reducer.mean().unweighted().forEachBand(pixOut.select('clouds')), 'cScore_', False) .combine(ee.Reducer.count().unweighted().forEachBand(pixOut.select('water')), 'pCount_', False)) + def copyMeta(i): + return i.copyProperties(image, ["CLOUD_COVER", 'SPACECRAFT_ID', 'system:index', 'DATE_ACQUIRED']) + - # Collect median reflectance and occurance values + # Collect median reflectance and occurance values # Make a cloud score, and get the water pixel count lsout = pixOut.reduceRegions(lakes, combinedReducer, 30) out = lsout.map(removeGeo) + out = out.map(copyMeta) return out diff --git a/LakeExport.py b/LakeExport.py index 67e3434..ccf60ff 100644 --- a/LakeExport.py +++ b/LakeExport.py @@ -65,7 +65,10 @@ ## what's already run. pr = [i for i in pr if i not in done] -for tiles in pr[0:3]: +if not os.path.isfile('log.txt'): + open('log.txt', 'x') + +for tiles in pr[0:1]: tile = wrs.filterMetadata('PR', 'equals', tiles) # For some reason we need to cast this to a list and back to a # feature collection @@ -78,14 +81,18 @@ dataOut = ee.batch.Export.table.toDrive(collection = out,\ description = str(tiles),\ folder = 'EE_TempPull',\ - fileFormat = 'csv')#,\ - #selectors = []) - #Check how many existing tasks are running and take a break if it's >15 + fileFormat = 'csv',\ + selectors = ["system:index", "areakm", "cScore_clouds", "CLOUD_COVER", 'SPACECRAFT_ID', 'DATE_ACQUIRED', "distance", "lake_mix_layer_temperature", "lake_total_layer_temperature", "pCount_water", "site_id", "temp", "temp_qa"]) + #Check how many existing tasks are running and take a break if it's >15 maximum_no_of_tasks(25, 120) #Send next task. dataOut.start() counter = counter + 1 done.append(tiles) + f = open('log.txt', 'a') + f.write(str(tiles) + '\n') + f.close() + print('done_' + str(counter) + '_' + str(tiles)) #%% From a1bb3aadcb0cf2c4aef7472d358f0b73cbeee2b6 Mon Sep 17 00:00:00 2001 From: Simon Topp Date: Thu, 8 Jul 2021 07:57:44 -0400 Subject: [PATCH 03/15] Commented code for review. --- GEE_pull_functions.py | 18 ++++-------------- LakeExport.py | 29 +++++++++++++++-------------- 2 files changed, 19 insertions(+), 28 deletions(-) diff --git a/GEE_pull_functions.py b/GEE_pull_functions.py index 3f1a3cf..a3e6cce 100644 --- a/GEE_pull_functions.py +++ b/GEE_pull_functions.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- """ Google Earth Engine Reflectance Pull Functions -Created on Mon Apr 9 14:24:13 2018 + @author: simontopp """ import ee @@ -10,7 +10,6 @@ import os ## Recreate the fmask based on Collection 2 Pixel QA band - def AddFmask(image): qa = image.select('pixel_qa') water = qa.bitwiseAnd(1 << 7) @@ -27,16 +26,8 @@ def AddFmask(image): return image.addBands(fmask) -## Calculuate hillshadow to correct DWSE -def CalcHillShadows(image, geo): - MergedDEM = ee.Image("users/eeProject/MERIT").clip(geo.buffer(3000)) - hillShadow = (ee.Terrain.hillShadow(MergedDEM, ee.Number(image.get('SOLAR_AZIMUTH_ANGLE')), - ee.Number(90).subtract(image.get('SOLAR_ZENITH_ANGLE')), 30).rename( - ['hillShadow'])) - return hillShadow - - -## Buffer the lake sites +## Buffer the lake sites either 120 minutes or the distance to shore, whichever +## is smaller def dpBuff(i): dist = i.get('distance') buffdist = ee.Number(dist).min(120) @@ -47,14 +38,13 @@ def dpBuff(i): def removeGeo(i): return i.setGeometry(None) - ## Create water mask and extract lake medians ## Set up the reflectance pull def RefPull(image): f = AddFmask(image).select('fmask') water = f.eq(1).rename('water') clouds = f.gte(2).rename('clouds') - #hs = CalcHillShadows(image, tile.geometry()).select('hillShadow') + era5match = (era5.filterDate(ee.Date(image.get('system:time_start')).update(None, None, None, None, 0,0))) era5out = (ee.Algorithms.If(era5match.size(), diff --git a/LakeExport.py b/LakeExport.py index ccf60ff..2bdea4f 100644 --- a/LakeExport.py +++ b/LakeExport.py @@ -1,8 +1,6 @@ #!/usr/bin/env python2 # -*- coding: utf-8 -*- """ -Created on Tue Feb 27 12:07:02 2018 - @author: simontopp """ #%% @@ -11,17 +9,18 @@ import os #import pandas as pd #import feather -import GEE_pull_functions as f + +## Initialize Earth Engine ee.Initialize() -#Source necessary functions. +#Source necessary functions. We do this instead of 'import' because of EE quirk. exec(open('GEE_pull_functions.py').read()) #water = ee.Image("JRC/GSW1_1/GlobalSurfaceWater").select('occurrence').gt(80) ## Bring in EE Assets -# Deepest point for CONUS Hydrolakes from Xiao Yang -# Code available https://zenodo.org/record/4136755#.X5d54pNKgUE +# Deepest point for CONUS PGDL Lakes +# DP Code available at https://zenodo.org/record/4136755#.X5d54pNKgUE dp = (ee.FeatureCollection('users/sntopp/USGS/PGDL_lakes_deepest_point') .filterMetadata('distance', "greater_than", 60)) @@ -36,6 +35,7 @@ pr = wrs.aggregate_array('PR').getInfo() +#Bring in temp data l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2") l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2") era5 = ee.ImageCollection("ECMWF/ERA5_LAND/HOURLY") @@ -49,26 +49,27 @@ ls7 = l7.select(bn57, bns) ls8 = l8.select(bn8, bns) +##Do coarse cloud filtering ls = ee.ImageCollection(ls7.merge(ls8))\ .filter(ee.Filter.lt('CLOUD_COVER', 50))\ .filterBounds(us) ## Set up a counter and a list to keep track of what's been done already -counter = 0 -done = [] +if not os.path.isfile('log.txt'): + open('log.txt', 'x') -#%% +done = open('log.txt', 'r') +done = [x.strip() for x in done.readlines()] +done = list(map(int, done)) +counter = len(done) ## In case something goofed, you should be able to just ## re-run this chunk with the following line filtering out ## what's already run. pr = [i for i in pr if i not in done] -if not os.path.isfile('log.txt'): - open('log.txt', 'x') - -for tiles in pr[0:1]: +for tiles in pr: tile = wrs.filterMetadata('PR', 'equals', tiles) # For some reason we need to cast this to a list and back to a # feature collection @@ -95,5 +96,5 @@ print('done_' + str(counter) + '_' + str(tiles)) -#%% + From bf15af924c3e34b40e9855fa1ca3d96f3b60ea3e Mon Sep 17 00:00:00 2001 From: Simon Topp Date: Thu, 8 Jul 2021 11:32:21 -0400 Subject: [PATCH 04/15] Initial look at RS temps. --- 01_TempMunge.Rmd | 242 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 242 insertions(+) create mode 100644 01_TempMunge.Rmd diff --git a/01_TempMunge.Rmd b/01_TempMunge.Rmd new file mode 100644 index 0000000..28e6e77 --- /dev/null +++ b/01_TempMunge.Rmd @@ -0,0 +1,242 @@ +--- +title: "01_TempMunge" +author: "Simon Topp" +date: "07/08/2021" +output: html_document +editor_options: + chunk_output_type: console +--- + +```{r setup, include=FALSE} +library(tidyverse) +library(lubridate) +library(purrr) +library(furrr) +library(data.table) +library(feather) +library(sf) +library(ggpmisc) +library(ggpubr) +library(Hmisc) +library(Metrics) + +knitr::opts_chunk$set(echo = TRUE, warning = FALSE) +``` + +```{r eval = F, include = F} +## Read in whatever path you saved the raw pull data to +files <- list.files('data/EE_TempPull', full.names = T) +files <- files[file.size(files) > 1] ## 6 WRS tile returned no values + +## Munge it a little, basically zero clouds/shadow/ice and +## at least 5 pixels +munger <- function(file){ + df <- read_csv(file) %>% + filter(!is.na(temp), + cScore_clouds == 0, + pCount_water > 5) %>% + select(-`system:index`) + return(df) +} + +munged <- files %>% map_dfr(munger) + +munged <- munged %>% mutate(temp = 0.00341802*temp + 149 - 273.15, + temp_qa = temp_qa*0.01, + lake_mix_layer_temperature = lake_mix_layer_temperature - 273.15) %>% + rename(temp_e5_mix = lake_mix_layer_temperature, temp_e5_full = lake_total_layer_temperature, + temp_ls = temp, temp_ls_qa = temp_qa, date = DATE_ACQUIRED, sat = SPACECRAFT_ID, + scene_cloud_cover = CLOUD_COVER) + +write_feather(munged,'data/out/landsat_temps.feather') +``` + +```{r} +munged <- read_feather('data/out/landsat_temps.feather') +``` + +## Between Landsat 7 and 8 we have `r nrow(munged)` temperature observations. + +```{r} + +obs <- read_csv('data/in/lake_surface_temp_obs.csv') %>% + rename(date = Date) + +same_day <- obs %>% inner_join(munged) %>% mutate(join = 'same_day') + +day_plus_one <- obs %>% inner_join(munged %>% mutate(date = date + 1)) %>% mutate(join = 'day_plus_1') + +day_minus_one <- obs %>% inner_join(munged %>% mutate(date = date -1)) %>% mutate(join = 'day_minus_1') + +matchups <- bind_rows(same_day, day_plus_one, day_minus_one) + +matchups_filt <- matchups %>% + filter(temp_ls_qa < 3, + scene_cloud_cover < 20, + distance > 90, + temp_ls >=0) + +rm(same_day, day_plus_one, day_minus_one) +``` + +## Of those, we have `r nrow(matchups)` coincident observations of RS and *in situ* temp (matchups). + +Let's look at some *inclusive* metrics. General filters here are: + +- +/- 1 day matchups + +- 0 clouds/ice/cloud shadow over the 'deepest point' + +```{r} +ggplot(matchups, aes(x = wtemp_obs, y = temp_ls)) + geom_hex() + + scale_fill_viridis_c(trans = 'log10') + + geom_smooth(method = 'lm') + + stat_poly_eq(formula = y~x, + aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), + parse = TRUE) + + labs(x = 'Observed Temps', y = 'Landsat Temps', title = 'Landsat and Observed') + +matchups %>% summarise(rmse = rmse(wtemp_obs, temp_ls), + mae = mae(wtemp_obs,temp_ls), + smape = smape(wtemp_obs,temp_ls), + bias = bias(wtemp_obs, temp_ls)) %>% + mutate(across(everything(), ~round(.,4))) %>% + kableExtra::kable() %>% + kableExtra::kable_styling() + + +ggplot(matchups, aes(x = wtemp_obs, y = temp_e5_mix)) + geom_hex() + + scale_fill_viridis_c(trans = 'log10') + + geom_smooth(method = 'lm') + + stat_poly_eq(formula = y~x, + aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), + parse = TRUE) + + labs(x = 'Observed Temps', y = 'Era5 Mixed Layer Temps', 'Era5 and Observed') + + +matchups %>% filter(!is.na(temp_e5_mix)) %>% + summarise(rmse = rmse(wtemp_obs, temp_e5_mix), + mae = mae(wtemp_obs,temp_e5_mix), + smape = smape(wtemp_obs,temp_e5_mix), + bias = bias(wtemp_obs, temp_e5_mix)) %>% + mutate(across(everything(), ~round(.,4))) %>% + kableExtra::kable() %>% + kableExtra::kable_styling() +``` + +## Now the same but with *strict* filters. After qc filters we have `r nrow(matchups_filt)` matchups. + +General filters here are: + +- +/- 1 day matchups + +- 0 clouds/ice/cloud shadow over the 'deepest point' + +- Landsat uncertainty \< 3 degrees + +- Scene cloud cover \< 20% - Distance to shore \> 90m + +```{r} +ggplot(matchups_filt, aes(x = wtemp_obs, y = temp_ls)) + geom_hex() + + scale_fill_viridis_c(trans = 'log10') + + geom_smooth(method = 'lm') + + stat_poly_eq(formula = y~x, + aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), + parse = TRUE) + + labs(x = 'Observed Temps', y = 'Landsat Temps', title = 'Landsat and Observed') + +matchups_filt %>% summarise(rmse = rmse(wtemp_obs, temp_ls), + mae = mae(wtemp_obs,temp_ls), + smape = smape(wtemp_obs,temp_ls), + bias = bias(wtemp_obs, temp_ls)) %>% + mutate(across(everything(), ~round(.,4))) %>% + kableExtra::kable() %>% + kableExtra::kable_styling() + +ggplot(matchups_filt, aes(x = wtemp_obs, y = temp_e5_mix)) + geom_hex() + + scale_fill_viridis_c(trans = 'log10') + + geom_smooth(method = 'lm') + + stat_poly_eq(formula = y~x, + aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), + parse = TRUE) + + labs(x = 'Observed Temps', y = 'Era5 Mixed Layer Temps', 'Era5 and Observed') + + +matchups_filt %>% filter(!is.na(temp_e5_mix)) %>% + summarise(rmse = rmse(wtemp_obs, temp_e5_mix), + mae = mae(wtemp_obs,temp_e5_mix), + smape = smape(wtemp_obs,temp_e5_mix), + bias = bias(wtemp_obs, temp_e5_mix)) %>% + mutate(across(everything(), ~round(.,4))) %>% + kableExtra::kable() %>% + kableExtra::kable_styling() +``` + +## Finally, just big lakes where we would expect to be most confident (distanct to shore \> 1000m) + +```{r} +ggplot(matchups_filt %>% filter(distance > 1000), aes(x = wtemp_obs, y = temp_ls)) + geom_hex() + + scale_fill_viridis_c(trans = 'log10') + + geom_smooth(method = 'lm') + + stat_poly_eq(formula = y~x, + aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), + parse = TRUE) + + labs(x = 'Observed Temps', y = 'Landsat Temps', title = 'Landsat and Observed') + +matchups_filt %>% filter(distance > 1000) %>% summarise(rmse = rmse(wtemp_obs, temp_ls), + mae = mae(wtemp_obs,temp_ls), + smape = smape(wtemp_obs,temp_ls), + bias = bias(wtemp_obs, temp_ls)) %>% + mutate(across(everything(), ~round(.,4))) %>% + kableExtra::kable() %>% + kableExtra::kable_styling() +``` + +## Sliced and diced a couple ways + +```{r} +lake_data <- read_csv('data/in/lake_metadata.csv') + +matchups <- matchups %>% left_join(lake_data) %>% + mutate(residual = wtemp_obs - temp_ls, + abs_error = abs(residual)) + +ggplot(matchups, aes(x = residual, fill = sat)) + + geom_density(alpha = .4) + + geom_vline(aes(xintercept = 0), color = 'red') + + labs(title = 'Residuals across sensors') + +matchups %>% + mutate(dist_group = cut_number(distance, 6)) %>% + ggplot(aes(x = dist_group, y = residual)) + + geom_violin(draw_quantiles = .5) + + labs(title = 'Residuals binned by distance to shore') + + +## Spatially +matchups.sf <- matchups %>% st_as_sf(coords = c('lake_lon_deg','lake_lat_deg'), crs = 4326) %>% st_transform(5070) + +usa <- maps::map('usa', plot = F) %>% st_as_sf() %>% st_transform(5070) + +grid <- st_make_grid(usa, cellsize = c(200000,200000), square = F) %>% st_as_sf() %>% mutate(ID = row_number()) + +rmse.sf <- matchups.sf %>% st_join(grid) %>% st_set_geometry(NULL) %>% + group_by(ID) %>% + summarise(rmse = rmse(wtemp_obs, temp_ls), + matchup.count = n()) %>% + inner_join(grid) %>% + st_as_sf() + +ggplot(rmse.sf) + geom_sf(aes(fill = rmse)) + + scale_fill_viridis_c() + + geom_sf(data = usa, fill = 'transparent', color = 'black') + + theme_minimal() + + labs(title = 'Spatial Error Distribution') + +ggplot(rmse.sf) + geom_sf(aes(fill = matchup.count)) + + scale_fill_viridis_c(trans='log10') + + geom_sf(data = usa, fill = 'transparent', color = 'black') + + theme_minimal() + + labs(title = 'Matchup Count') + +``` From 1aaf8aa782a20a6a6f237da074e8f5d8197533ea Mon Sep 17 00:00:00 2001 From: Simon Topp Date: Thu, 8 Jul 2021 13:44:07 -0400 Subject: [PATCH 05/15] Responded to reviews from PR request. --- 01_TempMunge.Rmd | 29 ++++++++--------------------- GEE_pull_functions.py | 22 ++++++++++------------ LakeExport.py | 35 ++++++++++++++++------------------- 3 files changed, 34 insertions(+), 52 deletions(-) diff --git a/01_TempMunge.Rmd b/01_TempMunge.Rmd index 28e6e77..cef58e9 100644 --- a/01_TempMunge.Rmd +++ b/01_TempMunge.Rmd @@ -20,7 +20,7 @@ library(ggpubr) library(Hmisc) library(Metrics) -knitr::opts_chunk$set(echo = TRUE, warning = FALSE) +knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE) ``` ```{r eval = F, include = F} @@ -104,7 +104,7 @@ matchups %>% summarise(rmse = rmse(wtemp_obs, temp_ls), kableExtra::kable() %>% kableExtra::kable_styling() - +## One quick look at Era5, it's not great ggplot(matchups, aes(x = wtemp_obs, y = temp_e5_mix)) + geom_hex() + scale_fill_viridis_c(trans = 'log10') + geom_smooth(method = 'lm') + @@ -152,27 +152,9 @@ matchups_filt %>% summarise(rmse = rmse(wtemp_obs, temp_ls), mutate(across(everything(), ~round(.,4))) %>% kableExtra::kable() %>% kableExtra::kable_styling() - -ggplot(matchups_filt, aes(x = wtemp_obs, y = temp_e5_mix)) + geom_hex() + - scale_fill_viridis_c(trans = 'log10') + - geom_smooth(method = 'lm') + - stat_poly_eq(formula = y~x, - aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), - parse = TRUE) + - labs(x = 'Observed Temps', y = 'Era5 Mixed Layer Temps', 'Era5 and Observed') - - -matchups_filt %>% filter(!is.na(temp_e5_mix)) %>% - summarise(rmse = rmse(wtemp_obs, temp_e5_mix), - mae = mae(wtemp_obs,temp_e5_mix), - smape = smape(wtemp_obs,temp_e5_mix), - bias = bias(wtemp_obs, temp_e5_mix)) %>% - mutate(across(everything(), ~round(.,4))) %>% - kableExtra::kable() %>% - kableExtra::kable_styling() ``` -## Finally, just big lakes where we would expect to be most confident (distanct to shore \> 1000m) +## Finally, just big lakes where we would expect to be most confident (distance to shore \> 1000m) ```{r} ggplot(matchups_filt %>% filter(distance > 1000), aes(x = wtemp_obs, y = temp_ls)) + geom_hex() + @@ -206,6 +188,11 @@ ggplot(matchups, aes(x = residual, fill = sat)) + geom_vline(aes(xintercept = 0), color = 'red') + labs(title = 'Residuals across sensors') +ggplot(matchups, aes(x = residual, fill = join)) + + geom_density(alpha = .4) + + geom_vline(aes(xintercept = 0), color = 'red') + + labs(title = 'Residuals across sensors') + matchups %>% mutate(dist_group = cut_number(distance, 6)) %>% ggplot(aes(x = dist_group, y = residual)) + diff --git a/GEE_pull_functions.py b/GEE_pull_functions.py index a3e6cce..0dc2ad9 100644 --- a/GEE_pull_functions.py +++ b/GEE_pull_functions.py @@ -22,7 +22,7 @@ def AddFmask(image): .where(cloudshadow.gt(0), ee.Image(2)) .where(cloud.gt(0), ee.Image(4)) .updateMask(qa.gte(0))) - # mask the fmask so that it has the same footprint as the quality (BQA) band + ## mask the fmask so that it has the same footprint as the quality (BQA) band return image.addBands(fmask) @@ -34,10 +34,6 @@ def dpBuff(i): return i.buffer(buffdist) -## Remove geometries -def removeGeo(i): - return i.setGeometry(None) - ## Set up the reflectance pull def RefPull(image): @@ -66,10 +62,12 @@ def RefPull(image): def copyMeta(i): return i.copyProperties(image, ["CLOUD_COVER", 'SPACECRAFT_ID', 'system:index', 'DATE_ACQUIRED']) + + ## Remove geometries + def removeGeo(i): + return i.setGeometry(None) - - # Collect median reflectance and occurance values - # Make a cloud score, and get the water pixel count + # Collect reflectance values, cloud score, and pixel counts lsout = pixOut.reduceRegions(lakes, combinedReducer, 30) out = lsout.map(removeGeo) @@ -78,11 +76,11 @@ def copyMeta(i): return out -##Function for limiting the max number of tasks sent to -# earth engine at one time to avoid time out errors +## Function for limiting the max number of tasks sent to +## earth engine at one time to avoid time out errors def maximum_no_of_tasks(MaxNActive, waitingPeriod): - ##maintain a maximum number of active tasks + ## maintain a maximum number of active tasks time.sleep(10) ## initialize submitting jobs ts = list(ee.batch.Task.list()) @@ -94,7 +92,7 @@ def maximum_no_of_tasks(MaxNActive, waitingPeriod): ## wait if the number of current active tasks reach the maximum number ## defined in MaxNActive while (NActive >= MaxNActive): - time.sleep(waitingPeriod) # if reach or over maximum no. of active tasks, wait for 2min and check again + time.sleep(waitingPeriod) ts = list(ee.batch.Task.list()) NActive = 0 for task in ts: diff --git a/LakeExport.py b/LakeExport.py index 2bdea4f..b32aeaf 100644 --- a/LakeExport.py +++ b/LakeExport.py @@ -3,28 +3,25 @@ """ @author: simontopp """ -#%% + import time import ee import os -#import pandas as pd -#import feather ## Initialize Earth Engine ee.Initialize() -#Source necessary functions. We do this instead of 'import' because of EE quirk. +## Source necessary functions. We do this instead of 'import' because of EE quirk. exec(open('GEE_pull_functions.py').read()) -#water = ee.Image("JRC/GSW1_1/GlobalSurfaceWater").select('occurrence').gt(80) - ## Bring in EE Assets -# Deepest point for CONUS PGDL Lakes -# DP Code available at https://zenodo.org/record/4136755#.X5d54pNKgUE +## Deepest point (Chebyshev center) for CONUS PGDL Lakes +## DP Code available at https://zenodo.org/record/4136755#.X5d54pNKgUE and +## https://code.earthengine.google.com/8dac409b220bdfb051bb469bc5b3c708 dp = (ee.FeatureCollection('users/sntopp/USGS/PGDL_lakes_deepest_point') .filterMetadata('distance', "greater_than", 60)) -#CONUS Boundary +## CONUS Boundary us = ee.FeatureCollection("USDOS/LSIB_SIMPLE/2017")\ .filterMetadata('country_na', 'equals', 'United States') @@ -32,16 +29,17 @@ wrs = ee.FeatureCollection('users/sntopp/wrs2_asc_desc')\ .filterBounds(us)\ .filterMetadata('MODE', 'equals', 'D') - + +## Run everything by path/row to speed up computation in EE. pr = wrs.aggregate_array('PR').getInfo() -#Bring in temp data +## Bring in temp data l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2") l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2") era5 = ee.ImageCollection("ECMWF/ERA5_LAND/HOURLY") -#Standardize band names between the various collections and aggregate -#them into one image collection +## Standardize band names between the various collections and aggregate +## them into one image collection bn8 = ['ST_B10', 'ST_QA', 'QA_PIXEL'] bn57 = ['ST_B6', 'ST_QA', 'QA_PIXEL'] bns = ['temp','temp_qa','pixel_qa'] @@ -49,7 +47,7 @@ ls7 = l7.select(bn57, bns) ls8 = l8.select(bn8, bns) -##Do coarse cloud filtering +## Do coarse cloud filtering ls = ee.ImageCollection(ls7.merge(ls8))\ .filter(ee.Filter.lt('CLOUD_COVER', 50))\ .filterBounds(us) @@ -71,12 +69,10 @@ for tiles in pr: tile = wrs.filterMetadata('PR', 'equals', tiles) - # For some reason we need to cast this to a list and back to a - # feature collection + lakes = dp.filterBounds(tile.geometry())\ .map(dpBuff) - #lakes = ee.FeatureCollection(lakes.toList(10000)) stack = ls.filterBounds(tile.geometry().centroid()) out = stack.map(RefPull).flatten().filterMetadata('cScore_clouds','less_than',.5) dataOut = ee.batch.Export.table.toDrive(collection = out,\ @@ -84,9 +80,10 @@ folder = 'EE_TempPull',\ fileFormat = 'csv',\ selectors = ["system:index", "areakm", "cScore_clouds", "CLOUD_COVER", 'SPACECRAFT_ID', 'DATE_ACQUIRED', "distance", "lake_mix_layer_temperature", "lake_total_layer_temperature", "pCount_water", "site_id", "temp", "temp_qa"]) - #Check how many existing tasks are running and take a break if it's >15 + + ## Check how many existing tasks are running and take a break if it's >15 maximum_no_of_tasks(25, 120) - #Send next task. + ## Send next task. dataOut.start() counter = counter + 1 done.append(tiles) From f382c32c5525d8ea5a731963832c3aed54a10ab9 Mon Sep 17 00:00:00 2001 From: Simon Topp Date: Thu, 8 Jul 2021 13:55:01 -0400 Subject: [PATCH 06/15] Missed a couple comments in the PR! --- LakeExport.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/LakeExport.py b/LakeExport.py index b32aeaf..2fed694 100644 --- a/LakeExport.py +++ b/LakeExport.py @@ -30,7 +30,7 @@ .filterBounds(us)\ .filterMetadata('MODE', 'equals', 'D') -## Run everything by path/row to speed up computation in EE. +## Run everything by path/row to speed up computation in EE pr = wrs.aggregate_array('PR').getInfo() ## Bring in temp data @@ -38,8 +38,7 @@ l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2") era5 = ee.ImageCollection("ECMWF/ERA5_LAND/HOURLY") -## Standardize band names between the various collections and aggregate -## them into one image collection +## Standardize band names between the various collections bn8 = ['ST_B10', 'ST_QA', 'QA_PIXEL'] bn57 = ['ST_B6', 'ST_QA', 'QA_PIXEL'] bns = ['temp','temp_qa','pixel_qa'] @@ -47,7 +46,8 @@ ls7 = l7.select(bn57, bns) ls8 = l8.select(bn8, bns) -## Do coarse cloud filtering +## Do coarse cloud filtering and aggregate +## them into one image collection ls = ee.ImageCollection(ls7.merge(ls8))\ .filter(ee.Filter.lt('CLOUD_COVER', 50))\ .filterBounds(us) From e7b6c9ebab55bf26bffdc19b8afc14b1af1ce9e6 Mon Sep 17 00:00:00 2001 From: Simon Topp <33098707+SimonTopp@users.noreply.github.com> Date: Mon, 9 May 2022 16:05:12 -0400 Subject: [PATCH 07/15] Added python implementation of DP pull --- NHD_Deepest_Point_Pull.py | 101 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100755 NHD_Deepest_Point_Pull.py diff --git a/NHD_Deepest_Point_Pull.py b/NHD_Deepest_Point_Pull.py new file mode 100755 index 0000000..9083fd2 --- /dev/null +++ b/NHD_Deepest_Point_Pull.py @@ -0,0 +1,101 @@ +import ee +import math +ee.Initialize() + +## Deepest point calculation adapted from Xiao Yang +### https: // doi.org / 10.5281 / zenodo.4136754 +# +# table_filt = nhdhr.filter(ee.Filter.gte('AREASQK', .01)) +# print(table_filt.size()) +# print(table_filt) +# +# Map.addLayer(nhdhr) +# // Map.centerObject(fail_point, 12) +# // print(blah) + + +def get_scale(polygon): + radius = polygon.get('areasqkm').getInfo() + radius = ee.Number(radius).divide(math.pi).sqrt().multiply(1000) + return radius.divide(7) + + +def getUTMProj(lon, lat): + # see + # https: // apollomapping.com / blog / gtm - finding - a - utm - zone - number - easily and + # https: // sis.apache.org / faq.html + utmCode = ee.Number(lon).add(180).divide(6).ceil().int() + output = ee.Algorithms.If(ee.Number(lat).gte(0), + ee.String('EPSG:326').cat(utmCode.format('%02d')), + ee.String('EPSG:327').cat(utmCode.format('%02d'))) + return output + +def GetLakeCenters(polygon): + + ## Calculate both the deepest point an centroid + ## for the inpout polygon ( or multipolygon) + ## for each input, export geometries for both centroid and deepest point and their distance to shore. + scale = get_scale(polygon) + geo = polygon.geometry() + ct = geo.centroid(scale) + utmCode = getUTMProj(ct.coordinates().getNumber(0), ct.coordinates().getNumber(1)) + + polygonImg = ee.Image.constant(1).toByte().paint(ee.FeatureCollection(ee.Feature(geo, None)), 0) + + dist = polygonImg.fastDistanceTransform(2056).updateMask(polygonImg.Not()).sqrt().reproject(utmCode, None, scale).multiply(scale) # convert unit from pixel to meter + + # dist = (polygonImg.fastDistanceTransform(2056).updateMask(polygonImg.not ()) + # .sqrt().reproject('EPSG:4326', None, scale).multiply(scale) # convert unit from pixel to meter + + maxDistance = (dist.reduceRegion( + reducer=ee.Reducer.max(), + geometry=geo, + scale=scale, + bestEffort=True, + tileScale=1 + ).getNumber('distance')) + + outputDp = (ee.Feature(dist.addBands(ee.Image.pixelLonLat()).updateMask(dist.gte(maxDistance)) + .sample(geo, scale).first())) + + dp = ee.Geometry.Point([outputDp.get('longitude'), outputDp.get('latitude')]) + + regions = ee.FeatureCollection([ee.Feature(dp, {'type': 'dp'})]) + + output = dist.sampleRegions( + collection=regions, + properties=['type'], + scale=scale, + tileScale=1, + geometries=True) + + return (ee.Feature(output.first()).copyProperties(polygon)) + +assets_parent = ee.data.listAssets({'parent': 'projects/earthengine-legacy/assets/projects/sat-io/open-datasets/NHD'})['assets'] + + +assets_state = (ee.FeatureCollection(f"{assets_parent[1]['id']}/NHDWaterbody") + .filter(ee.Filter.gte('areasqkm',0.0001)) + .filter(ee.Filter.inList('ftype',[361,436,390]))) + +dp = GetLakeCenters(ee.Feature(assets_state.first())) + +dp_buff = test.map(function(f) +{ +return (f.buffer(f.getNumber('distance')))}) + +var + + +colorDp = {color: 'cyan'} +Map.addLayer(nhdhr.filterBounds(geometry)) +Map.addLayer(dp_buff, colorDp, 'DpBuffer', true, 0.5) +Map.addLayer(test, colorDp, 'Deepest Point', true) +Map.centerObject(geometry) + +var +dp_out = nhdhr.filter(ee.Filter.gte('AREASQK', 0.001)) + .filter(ee.Filter.lt('AREASQK', 0.01)).map(GetLakeCenters) +Export.table.toAsset(dp_out, 'USGS/NHD_hr_dp_f361_436_390_gt1hectare') + + From f7e9b574873b38ac1696c278b84cb23df57eb8d4 Mon Sep 17 00:00:00 2001 From: Simon Topp <33098707+SimonTopp@users.noreply.github.com> Date: Mon, 9 May 2022 16:11:35 -0400 Subject: [PATCH 08/15] within scene variance --- .gitignore | 0 01_TempMunge.Rmd | 152 +++++++++++++++++++++++++++++++++++++++--- GEE_pull_functions.py | 0 LakeExport.py | 0 README.md | 0 5 files changed, 141 insertions(+), 11 deletions(-) mode change 100644 => 100755 .gitignore mode change 100644 => 100755 01_TempMunge.Rmd mode change 100644 => 100755 GEE_pull_functions.py mode change 100644 => 100755 LakeExport.py mode change 100644 => 100755 README.md diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 diff --git a/01_TempMunge.Rmd b/01_TempMunge.Rmd old mode 100644 new mode 100755 index cef58e9..173012e --- a/01_TempMunge.Rmd +++ b/01_TempMunge.Rmd @@ -35,6 +35,7 @@ munger <- function(file){ filter(!is.na(temp), cScore_clouds == 0, pCount_water > 5) %>% + mutate(LandsatID = map_chr(`system:index`, ~str_split(.,'_0000')[[1]][1])) %>% select(-`system:index`) return(df) } @@ -58,7 +59,6 @@ munged <- read_feather('data/out/landsat_temps.feather') ## Between Landsat 7 and 8 we have `r nrow(munged)` temperature observations. ```{r} - obs <- read_csv('data/in/lake_surface_temp_obs.csv') %>% rename(date = Date) @@ -68,7 +68,10 @@ day_plus_one <- obs %>% inner_join(munged %>% mutate(date = date + 1)) %>% mutat day_minus_one <- obs %>% inner_join(munged %>% mutate(date = date -1)) %>% mutate(join = 'day_minus_1') -matchups <- bind_rows(same_day, day_plus_one, day_minus_one) +matchups <- bind_rows(same_day, day_plus_one, day_minus_one) %>% + mutate(resid = wtemp_obs - temp_ls, + abs_error = abs(resid), + e5_flag = abs(wtemp_obs - temp_e5_mix)) matchups_filt <- matchups %>% filter(temp_ls_qa < 3, @@ -107,11 +110,12 @@ matchups %>% summarise(rmse = rmse(wtemp_obs, temp_ls), ## One quick look at Era5, it's not great ggplot(matchups, aes(x = wtemp_obs, y = temp_e5_mix)) + geom_hex() + scale_fill_viridis_c(trans = 'log10') + - geom_smooth(method = 'lm') + + geom_smooth(method = 'lm', aes(color = 'Best Fit')) + + geom_abline(aes(slope=1, intercept=0,color = '1:1')) + stat_poly_eq(formula = y~x, aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), parse = TRUE) + - labs(x = 'Observed Temps', y = 'Era5 Mixed Layer Temps', 'Era5 and Observed') + labs(x = 'Observed Temps', y = 'Era5 Mixed Layer Temps', 'Era5 and Observed', titel='blue=Best Fit', 'Red=1:1') matchups %>% filter(!is.na(temp_e5_mix)) %>% @@ -137,6 +141,42 @@ General filters here are: - Scene cloud cover \< 20% - Distance to shore \> 90m ```{r} +matchups_filt <- matchups %>% + filter(temp_ls_qa < 3, + scene_cloud_cover < 10, + distance > 200, + temp_ls >=0, + wtemp_obs >=0, + e5_flag < 5, + join != 'day_minus_1' + ) + +plotter <- function(var){ + matchups %>% + mutate(bins= cut_number(get(var), 5)) %>% + ggplot(aes(x = bins, y = abs_error)) + + geom_violin(draw_quantiles = c(.25,.5,.75)) + + geom_hline(aes(yintercept = 0), color = 'red')+ + labs(x = var) +} + +plotter('areakm') +plotter('scene_cloud_cover') +plotter('e5_flag') +plotter('distance') +plotter('temp_ls_qa') + +matchups_filt <- matchups %>% + filter(areakm > .2, + temp_ls_qa < 3, + scene_cloud_cover < 20, + distance > 250, + temp_ls >= 0, + wtemp_obs >=0, + e5_flag < 5, + join != 'day_minus_1' + ) + ggplot(matchups_filt, aes(x = wtemp_obs, y = temp_ls)) + geom_hex() + scale_fill_viridis_c(trans = 'log10') + geom_smooth(method = 'lm') + @@ -150,14 +190,15 @@ matchups_filt %>% summarise(rmse = rmse(wtemp_obs, temp_ls), smape = smape(wtemp_obs,temp_ls), bias = bias(wtemp_obs, temp_ls)) %>% mutate(across(everything(), ~round(.,4))) %>% - kableExtra::kable() %>% - kableExtra::kable_styling() + kableExtra::kable() + +check <- matchups_filt %>% mutate(se = (wtemp_obs - temp_ls)^2) ``` ## Finally, just big lakes where we would expect to be most confident (distance to shore \> 1000m) ```{r} -ggplot(matchups_filt %>% filter(distance > 1000), aes(x = wtemp_obs, y = temp_ls)) + geom_hex() + +ggplot(matchups_filt %>% filter(distance > 1000), aes(x = wtemp_obs, y = temp_e5_mix)) + geom_hex() + scale_fill_viridis_c(trans = 'log10') + geom_smooth(method = 'lm') + stat_poly_eq(formula = y~x, @@ -165,10 +206,11 @@ ggplot(matchups_filt %>% filter(distance > 1000), aes(x = wtemp_obs, y = temp_ls parse = TRUE) + labs(x = 'Observed Temps', y = 'Landsat Temps', title = 'Landsat and Observed') -matchups_filt %>% filter(distance > 1000) %>% summarise(rmse = rmse(wtemp_obs, temp_ls), - mae = mae(wtemp_obs,temp_ls), - smape = smape(wtemp_obs,temp_ls), - bias = bias(wtemp_obs, temp_ls)) %>% +matchups_filt %>% filter(distance > 1000) %>% + summarise(rmse = rmse(wtemp_obs, temp_ls), + mae = mae(wtemp_obs,temp_ls), + smape = smape(wtemp_obs,temp_ls), + bias = bias(wtemp_obs, temp_ls)) %>% mutate(across(everything(), ~round(.,4))) %>% kableExtra::kable() %>% kableExtra::kable_styling() @@ -227,3 +269,91 @@ ggplot(rmse.sf) + geom_sf(aes(fill = matchup.count)) + labs(title = 'Matchup Count') ``` + +## Take a look at within scene variance. +```{r} +summary_in <- matchups_filt + +scene_sd <- summary_in %>% group_by(LandsatID) %>% + summarise(count = n(), + resid_sd = sd(resid)) %>% + filter(count >5) + +overall_sd <- summary_in %>% filter(LandsatID %in% scene_sd$LandsatID) %>% + summarise(overall = sd(resid)) %>% .[[1]] + +ggplot(scene_sd, aes(x = resid_sd)) + geom_histogram() + geom_vline(aes(xintercept = overall_sd, color = 'Overall residual SD')) + +scene_counts <- dplyr::count(summary_in, LandsatID) %>% filter(n > 5) + +scene_bias <- summary_in %>% + filter(LandsatID %in% scene_counts$LandsatID) %>% + group_by(LandsatID) %>% + summarise(bias = bias(wtemp_obs, temp_ls)) + +scene_stats <- summary_in %>% + filter(LandsatID %in% scene_counts$LandsatID) %>% + left_join(scene_bias) %>% + mutate(temp_no_bias = temp_ls+bias) %>% + group_by(LandsatID) %>% + summarise(rmse_bias_removed = rmse(wtemp_obs, temp_no_bias), + scene_bias = mean(bias)) + +overall_bias = summary_in %>% filter(LandsatID %in% scene_counts$LandsatID) %>% + summarise(bias = bias(wtemp_obs, temp_ls)) %>% + .[[1]] + +overall_rmse = summary_in %>% filter(LandsatID %in% scene_counts$LandsatID) %>% + mutate(temp_ls = temp_ls + overall_bias) %>% + summarise(rmse = rmse(wtemp_obs,temp_ls)) +rmses <- tibble(overall_rmse_bias_removed = overall_rmse[[1]], median_scene_rmse_bias_removed = median(scene_stats$rmse_bias_removed)) %>% + pivot_longer(everything(), names_to = "RMSE_Summaries") + +ggplot(scene_stats, aes(x = rmse_bias_removed)) + geom_histogram() + + geom_vline(data = rmses, aes(xintercept = value, color = RMSE_Summaries)) + + labs(x = "Distribution of image specific rmse after removing image bias") +``` + + +```{r} +mendota <- read_csv('data/in/Mendota_Era5Land_1984_2021.csv') +mendotaObs <- obs %>% filter(site_id == "nhdhr_143249470") + +mendota <- mendota %>% mutate(lake_mix_layer_temperature = lake_mix_layer_temperature - 273.15, + date = as.Date(date)) %>% + inner_join(mendotaObs) + +library(plotly) + +ggplotly( + ggplot(mendota, aes(x=date)) + + geom_point(aes(y = lake_mix_layer_temperature, color='Era5')) + + geom_point(aes(y=wtemp_obs, color = 'Obs')) + + geom_line(aes(y = lake_mix_layer_temperature, color='Era5')) + + geom_line(aes(y=wtemp_obs, color = 'Obs'))) + +ggplot(mendota, aes(x=lake_mix_layer_temperature)) + geom_histogram() + +ggplot + + +mendota %>% rename(temp_e5_mix = lake_mix_layer_temperature) %>% + summarise(rmse = rmse(wtemp_obs, temp_e5_mix), + mae = mae(wtemp_obs,temp_e5_mix), + smape = smape(wtemp_obs,temp_e5_mix), + bias = bias(wtemp_obs, temp_e5_mix)) %>% + mutate(across(everything(), ~round(.,4))) %>% + kableExtra::kable() %>% + kableExtra::kable_styling() + +mendota %>% rename(temp_e5_mix = lake_mix_layer_temperature) %>% +ggplot(.,aes(x = wtemp_obs, y = temp_e5_mix)) + geom_hex() + + scale_fill_viridis_c(trans = 'log10') + + geom_smooth(method = 'lm', aes(color = 'Best Fit')) + + geom_abline(aes(slope=1, intercept=0,color = '1:1')) + + stat_poly_eq(formula = y~x, + aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), + parse = TRUE) + + labs(x = 'Observed Temps', y = 'Era5 Mixed Layer Temps', 'Era5 and Observed', titel='blue=Best Fit', 'Red=1:1') +``` + diff --git a/GEE_pull_functions.py b/GEE_pull_functions.py old mode 100644 new mode 100755 diff --git a/LakeExport.py b/LakeExport.py old mode 100644 new mode 100755 diff --git a/README.md b/README.md old mode 100644 new mode 100755 From ffb4558befca15993e553d407248ce067a511a56 Mon Sep 17 00:00:00 2001 From: Simon Topp <33098707+SimonTopp@users.noreply.github.com> Date: Tue, 10 May 2022 15:35:36 -0400 Subject: [PATCH 09/15] Batched calculations by state and swapped EE community NHD dataset --- NHD_Deepest_Point_Pull.py | 73 ++++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 31 deletions(-) diff --git a/NHD_Deepest_Point_Pull.py b/NHD_Deepest_Point_Pull.py index 9083fd2..f7fe587 100755 --- a/NHD_Deepest_Point_Pull.py +++ b/NHD_Deepest_Point_Pull.py @@ -1,23 +1,15 @@ import ee import math +import time ee.Initialize() ## Deepest point calculation adapted from Xiao Yang ### https: // doi.org / 10.5281 / zenodo.4136754 -# -# table_filt = nhdhr.filter(ee.Filter.gte('AREASQK', .01)) -# print(table_filt.size()) -# print(table_filt) -# -# Map.addLayer(nhdhr) -# // Map.centerObject(fail_point, 12) -# // print(blah) - - +### Functions def get_scale(polygon): - radius = polygon.get('areasqkm').getInfo() + radius = polygon.get('areasqkm') radius = ee.Number(radius).divide(math.pi).sqrt().multiply(1000) - return radius.divide(7) + return radius.divide(20) def getUTMProj(lon, lat): @@ -69,33 +61,52 @@ def GetLakeCenters(polygon): tileScale=1, geometries=True) - return (ee.Feature(output.first()).copyProperties(polygon)) + return (ee.Feature(output.first()).copyProperties(polygon,['permanent','areasqkm'])) -assets_parent = ee.data.listAssets({'parent': 'projects/earthengine-legacy/assets/projects/sat-io/open-datasets/NHD'})['assets'] +def buff_dp(dp): + return dp.buffer(dp.getNumber('distance')) -assets_state = (ee.FeatureCollection(f"{assets_parent[1]['id']}/NHDWaterbody") - .filter(ee.Filter.gte('areasqkm',0.0001)) - .filter(ee.Filter.inList('ftype',[361,436,390]))) +def maximum_no_of_tasks(MaxNActive, waitingPeriod): + ## maintain a maximum number of active tasks + time.sleep(10) + ## initialize submitting jobs + ts = list(ee.batch.Task.list()) -dp = GetLakeCenters(ee.Feature(assets_state.first())) + NActive = 0 + for task in ts: + if ('RUNNING' in str(task) or 'READY' in str(task)): + NActive += 1 + ## wait if the number of current active tasks reach the maximum number + ## defined in MaxNActive + while (NActive >= MaxNActive): + time.sleep(waitingPeriod) + ts = list(ee.batch.Task.list()) + NActive = 0 + for task in ts: + if ('RUNNING' in str(task) or 'READY' in str(task)): + NActive += 1 + return () -dp_buff = test.map(function(f) -{ -return (f.buffer(f.getNumber('distance')))}) -var +assets_parent = ee.data.listAssets({'parent': 'projects/earthengine-legacy/assets/projects/sat-io/open-datasets/NHD'})['assets'] +assets_parent = assets_parent[0:2] +for i in range(len(assets_parent)): + state_asset = assets_parent[i]['id'] + assets_state = (ee.FeatureCollection(f"{state_asset}/NHDWaterbody") + .filter(ee.Filter.gte('areasqkm',0.0001)) + .filter(ee.Filter.inList('ftype',[361,436,390]))) + + dp = ee.FeatureCollection(assets_state).map(GetLakeCenters) + dataOut = ee.batch.Export.table.toAsset(collection=dp,description=state_asset.split('/')[-1],assetId=f"projects/earthengine-legacy/assets/users/sntopp/NHD/DeepestPoint/{state_asset.split('/')[-1]}") -colorDp = {color: 'cyan'} -Map.addLayer(nhdhr.filterBounds(geometry)) -Map.addLayer(dp_buff, colorDp, 'DpBuffer', true, 0.5) -Map.addLayer(test, colorDp, 'Deepest Point', true) -Map.centerObject(geometry) + ## Check how many existing tasks are running and take a break if it's >15 + maximum_no_of_tasks(10, 240) + ## Send next task. + dataOut.start() + print(state_asset.split('/')[-1]) -var -dp_out = nhdhr.filter(ee.Filter.gte('AREASQK', 0.001)) - .filter(ee.Filter.lt('AREASQK', 0.01)).map(GetLakeCenters) -Export.table.toAsset(dp_out, 'USGS/NHD_hr_dp_f361_436_390_gt1hectare') +#f"projects/earthengine-legacy/assets/users/sntopp/NHD/{state_asset.split('/')[-1]}/NHDDeepestPoint" From 8fa4fbb03fb60bf2b50070a44e79c791d641f5d3 Mon Sep 17 00:00:00 2001 From: Simon Topp <33098707+SimonTopp@users.noreply.github.com> Date: Mon, 16 May 2022 09:59:10 -0400 Subject: [PATCH 10/15] small fixes to batched DP pull --- NHD_Deepest_Point_Pull.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/NHD_Deepest_Point_Pull.py b/NHD_Deepest_Point_Pull.py index f7fe587..4d817c1 100755 --- a/NHD_Deepest_Point_Pull.py +++ b/NHD_Deepest_Point_Pull.py @@ -90,11 +90,11 @@ def maximum_no_of_tasks(MaxNActive, waitingPeriod): assets_parent = ee.data.listAssets({'parent': 'projects/earthengine-legacy/assets/projects/sat-io/open-datasets/NHD'})['assets'] -assets_parent = assets_parent[0:2] +##assets_parent = assets_parent[5:15] for i in range(len(assets_parent)): state_asset = assets_parent[i]['id'] assets_state = (ee.FeatureCollection(f"{state_asset}/NHDWaterbody") - .filter(ee.Filter.gte('areasqkm',0.0001)) + .filter(ee.Filter.gte('areasqkm',0.001)) .filter(ee.Filter.inList('ftype',[361,436,390]))) dp = ee.FeatureCollection(assets_state).map(GetLakeCenters) @@ -102,7 +102,7 @@ def maximum_no_of_tasks(MaxNActive, waitingPeriod): dataOut = ee.batch.Export.table.toAsset(collection=dp,description=state_asset.split('/')[-1],assetId=f"projects/earthengine-legacy/assets/users/sntopp/NHD/DeepestPoint/{state_asset.split('/')[-1]}") ## Check how many existing tasks are running and take a break if it's >15 - maximum_no_of_tasks(10, 240) + maximum_no_of_tasks(15, 240) ## Send next task. dataOut.start() print(state_asset.split('/')[-1]) From 508cdc7cf81ec66e3d9dce3d7fa3975612a4563b Mon Sep 17 00:00:00 2001 From: Simon Topp <33098707+SimonTopp@users.noreply.github.com> Date: Fri, 3 Jun 2022 14:36:12 -0400 Subject: [PATCH 11/15] Added upper and lower bounds to scale, removed great lakes --- NHD_Deepest_Point_Pull.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/NHD_Deepest_Point_Pull.py b/NHD_Deepest_Point_Pull.py index 4d817c1..0800a66 100755 --- a/NHD_Deepest_Point_Pull.py +++ b/NHD_Deepest_Point_Pull.py @@ -9,7 +9,10 @@ def get_scale(polygon): radius = polygon.get('areasqkm') radius = ee.Number(radius).divide(math.pi).sqrt().multiply(1000) - return radius.divide(20) + scale = radius.divide(20) + scale = ee.Algorithms.If(ee.Number(scale).lte(10),10,scale) + scale = ee.Algorithms.If(ee.Number(scale).gte(500),500,scale) + return ee.Number(scale) def getUTMProj(lon, lat): @@ -88,13 +91,17 @@ def maximum_no_of_tasks(MaxNActive, waitingPeriod): NActive += 1 return () - +assets_done = ee.data.listAssets({'parent': 'projects/earthengine-legacy/assets/users/sntopp/NHD/DeepestPoint'})['assets'] +ids_done = [i['id'].split('/')[-1] for i in assets_done] assets_parent = ee.data.listAssets({'parent': 'projects/earthengine-legacy/assets/projects/sat-io/open-datasets/NHD'})['assets'] -##assets_parent = assets_parent[5:15] +assets_parent = [i for i in assets_parent if i['id'].split('/')[-1] not in ids_done] + + for i in range(len(assets_parent)): state_asset = assets_parent[i]['id'] assets_state = (ee.FeatureCollection(f"{state_asset}/NHDWaterbody") .filter(ee.Filter.gte('areasqkm',0.001)) + .filter(ee.Filter.lte('areasqkm',5000)) #Remove Great Lakes .filter(ee.Filter.inList('ftype',[361,436,390]))) dp = ee.FeatureCollection(assets_state).map(GetLakeCenters) From 2479b3e7748d36de8c68f35131289f43cff9ba4e Mon Sep 17 00:00:00 2001 From: Simon Topp <33098707+SimonTopp@users.noreply.github.com> Date: Wed, 29 Jun 2022 15:44:07 -0400 Subject: [PATCH 12/15] Added EE calculation of area for additional threshold --- NHD_Deepest_Point_Pull.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/NHD_Deepest_Point_Pull.py b/NHD_Deepest_Point_Pull.py index 0800a66..2eb77d0 100755 --- a/NHD_Deepest_Point_Pull.py +++ b/NHD_Deepest_Point_Pull.py @@ -10,7 +10,7 @@ def get_scale(polygon): radius = polygon.get('areasqkm') radius = ee.Number(radius).divide(math.pi).sqrt().multiply(1000) scale = radius.divide(20) - scale = ee.Algorithms.If(ee.Number(scale).lte(10),10,scale) + #scale = ee.Algorithms.If(ee.Number(scale).lte(10),10,scale) scale = ee.Algorithms.If(ee.Number(scale).gte(500),500,scale) return ee.Number(scale) @@ -91,10 +91,14 @@ def maximum_no_of_tasks(MaxNActive, waitingPeriod): NActive += 1 return () +def calc_area(feature): + return(feature.set('area_calc',feature.area().divide(1e6))) + assets_done = ee.data.listAssets({'parent': 'projects/earthengine-legacy/assets/users/sntopp/NHD/DeepestPoint'})['assets'] ids_done = [i['id'].split('/')[-1] for i in assets_done] assets_parent = ee.data.listAssets({'parent': 'projects/earthengine-legacy/assets/projects/sat-io/open-datasets/NHD'})['assets'] assets_parent = [i for i in assets_parent if i['id'].split('/')[-1] not in ids_done] +#assets_parent = [i for i in assets_parent if i['id'].split('/')[-1] not in ['NHD_MO','NHD_TX','NHD_AK']] for i in range(len(assets_parent)): @@ -104,7 +108,10 @@ def maximum_no_of_tasks(MaxNActive, waitingPeriod): .filter(ee.Filter.lte('areasqkm',5000)) #Remove Great Lakes .filter(ee.Filter.inList('ftype',[361,436,390]))) - dp = ee.FeatureCollection(assets_state).map(GetLakeCenters) + ## Added to trouble shoot NM where some of the NHD areas are don't match the actual polygon areas + assets_state=ee.FeatureCollection(assets_state.map(calc_area)).filter(ee.Filter.gte('area_calc',0.001)) + + dp = assets_state.map(GetLakeCenters) dataOut = ee.batch.Export.table.toAsset(collection=dp,description=state_asset.split('/')[-1],assetId=f"projects/earthengine-legacy/assets/users/sntopp/NHD/DeepestPoint/{state_asset.split('/')[-1]}") From a4ab1d66cb26017271a4846532fe9aafcec2a497 Mon Sep 17 00:00:00 2001 From: Simon Topp <33098707+SimonTopp@users.noreply.github.com> Date: Wed, 13 Jul 2022 14:55:48 -0600 Subject: [PATCH 13/15] Started to munge national DP data >1 ha. --- NHD_Deepest_Point_Pull.py | 10 ++- NHD_HR_Lake_Pull.R | 137 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 146 insertions(+), 1 deletion(-) create mode 100755 NHD_HR_Lake_Pull.R diff --git a/NHD_Deepest_Point_Pull.py b/NHD_Deepest_Point_Pull.py index 2eb77d0..c8fa1cf 100755 --- a/NHD_Deepest_Point_Pull.py +++ b/NHD_Deepest_Point_Pull.py @@ -48,7 +48,7 @@ def GetLakeCenters(polygon): scale=scale, bestEffort=True, tileScale=1 - ).getNumber('distance')) + ).getNumber('distance').int16()) outputDp = (ee.Feature(dist.addBands(ee.Image.pixelLonLat()).updateMask(dist.gte(maxDistance)) .sample(geo, scale).first())) @@ -124,3 +124,11 @@ def calc_area(feature): #f"projects/earthengine-legacy/assets/users/sntopp/NHD/{state_asset.split('/')[-1]}/NHDDeepestPoint" +state_dps = ee.data.listAssets({'parent': 'projects/earthengine-legacy/assets/users/sntopp/NHD/DeepestPoint'})['assets'] + +for i in state_dps: + dps = ee.FeatureCollection(i['id']) + id = i['id'].split('/')[-1] + dpsOut = ee.batch.Export.table.toDrive(dps,id,'EE_DP_Exports',id) + maximum_no_of_tasks(15, 60) + dpsOut.start() \ No newline at end of file diff --git a/NHD_HR_Lake_Pull.R b/NHD_HR_Lake_Pull.R new file mode 100755 index 0000000..239e090 --- /dev/null +++ b/NHD_HR_Lake_Pull.R @@ -0,0 +1,137 @@ +## Mess around with tyler kings stuff +#resolvable <- vroom::vroom('/Users/stopp/Downloads/NHDWaterbody_resolvable.csv') +library(tidyverse) +library(dataRetrieval) +library(stringr) +library(sf) +library(httr) +library(sbtools) +sf::sf_use_s2(FALSE) +library(mapview) +mapviewOptions(fgb=FALSE) +#library(smoothr) +# +# ## Check the layers +# st_layers('/Users/stopp/Downloads/NHD_H_National_GDB/NHD_H_National_GDB.gdb') +# +# ## Pull the first few waterbodies to check the fields +# check_fields <- st_read('/Users/stopp/Downloads/NHD_H_National_GDB/NHD_H_National_GDB.gdb', +# query = 'SELECT * FROM "NHDWaterbody" WHERE FID < 5', layer = 'NHDWaterbody') +# +# ## Set up a query to just bring in the fields we want for lakes and reservoirs (get rid of swamps, etc) +# lakes <- st_read('/Users/stopp/Downloads/NHD_H_National_GDB/NHD_H_National_GDB.gdb', +# query = 'SELECT PERMANENT_IDENTIFIER, GNIS_NAME, AREASQKM, ELEVATION, FTYPE, FCODE FROM "NHDWaterbody" WHERE FTYPE = 361 OR FTYPE = 436 OR FTYPE = 390', layer = 'NHDWaterbody') #%>% +# +# #### Simplify everything in chunks +# step = 100000 +# for(i in seq(0,nrow(lakes),step)){ +# +# end <- min(i+step, nrow(lakes)) +# # For re-runs, skip what's been done +# files <- list.files('data/out/tmp_waterbody_chunks') +# if(T %in% grepl(sprintf('%i_%i.shp',i+1,end),files)) next +# +# ## Chunk and simplify +# lakes_out <- lakes[(i+1):end,] %>% +# st_cast(., "MULTIPOLYGON")%>% +# # bad Idaho site! +# filter(PERMANENT_IDENTIFIER != '{5BEDE13F-C94B-4501-B979-E00C29EA374B}') %>% +# sf::st_transform(crs = 4326) %>% +# sf::st_zm() %>% +# sf::st_buffer(dist = 0) %>% +# sf::st_simplify(preserveTopology = TRUE, dTolerance = 0.0001) +# +# st_write(lakes_out,sprintf('data/out/tmp_waterbody_chunks/%i_%i.shp',i+1,i+step)) +# } +# +# rm(lakes) +# +# chunks <- list.files('data/out/tmp_waterbody_chunks', pattern='*.shp',full.names = T) +# lakes_simplified <- purrr::map_dfr(chunks, st_read) +# +# object.size(lakes_simplified) +# +# length(unique(lakes_simplified$PERMANE)) +# +# mapview(sample_n(lakes_simplified,5000)) +# +# st_write(lakes_simplified, 'data/out/NHD_waterbodies/NHD_waterbodies_simplified.shp') + +tk_resolvable <- vroom::vroom('/Users/stopp/Downloads/NHDWaterbody_resolvable.csv') + +files <- list.files('../../../My Drive/EE_DP_Exports', full.names = T) + +options(digits=15) +dp_national <- read_csv(files[1]) %>% + mutate(coords = map(.geo, ~as.numeric(unlist(regmatches(.x,gregexpr("-?[[:digit:]]+\\.*-?[[:digit:]]*",.x))))), + long = map_dbl(coords, 1), + lat = map_dbl(coords,2), + distance = round(distance,3), + areasqkm = round(areasqkm, 4)) %>% + select(-c(coords, .geo,`system:index`)) + +for(i in files[2:length(files)]){ + state <- read_csv(i,show_col_types = F) %>% + mutate(coords = map(.geo, ~as.numeric(unlist(regmatches(.x,gregexpr("-?[[:digit:]]+\\.*-?[[:digit:]]*",.x))))), + long = map_dbl(coords, 1), + lat = map_dbl(coords,2), + distance = round(distance,3), + areasqkm = round(areasqkm, 4)) %>% + select(-c(coords, .geo,`system:index`)) + + dp_national <- dp_national %>% + bind_rows(state) %>% + distinct(permanent,.keep_all=T) +} + +dp_national <- dp_national %>% + select(areasqkm:lat) + +library(feather) +write_feather(dp_national, 'data/out/nhd_hr_1ha_deepest_point_noTX.feather') + + +match_ups <- dp_national %>% + inner_join(.,tk_resolvable, by = c('permanent'='permanent_identifier')) +rm(dp_national, tk_resolvable) + +match_ups <- match_ups %>% mutate(resolvable = ifelse(resolvable == 'resolvable', 'Konrad resolvable', 'Konrad not resolvable')) + +thresholds <- tibble(distance = c(20,30,40,50,60,70,80,90)) + +tk_resolvable <- match_ups %>% filter(resolvable == 'Konrad resolvable') + +thresholds$counts <- colSums(outer(tk_resolvable$distance, setNames(thresholds$distance, thresholds$distance), "<")) +thresholds$labels <- paste0(thresholds$distance,' meters (n=',thresholds$counts,')') + +tk_resolvable %>% + ggplot(.,aes(x=distance)) + stat_bin(aes(y=cumsum(..count..)),geom="step") + + geom_vline(data= thresholds, aes(xintercept=distance, color = labels)) + + scale_color_viridis_d() + + scale_x_log10() + + theme_bw() + + facet_wrap(~resolvable, scales = 'free') + + labs(x = 'Maximum Distance from Shore (Meters)', + y= 'Cumulative Lake Count', + color='Res. Threshold (n disagreement)') + + +thresholds <- tibble(distance = c(20,30,40,50,60,70,80,90)) + +tk_non_resolvable <- match_ups %>% filter(resolvable == 'Konrad not resolvable') + +thresholds$counts <- colSums(outer(tk_non_resolvable$distance, setNames(thresholds$distance, thresholds$distance), ">")) +thresholds$labels <- paste0(thresholds$distance,' meters (n=',thresholds$counts,')') + +tk_non_resolvable %>% + ggplot(.,aes(x=distance)) + stat_bin(aes(y=cumsum(..count..)),geom="step") + + geom_vline(data= thresholds, aes(xintercept=distance, color = labels)) + + scale_color_viridis_d() + + scale_x_log10() + + theme_bw() + + facet_wrap(~resolvable, scales = 'free') + + labs(x = 'Maximum Distance from Shore (Meters)', + y= 'Cumulative Lake Count', + color='Res. Threshold (n disagreement)') + + From 552501617ee0b503e3a44915edbb4063981a58db Mon Sep 17 00:00:00 2001 From: Simon Topp <33098707+SimonTopp@users.noreply.github.com> Date: Thu, 9 Feb 2023 14:41:21 -0500 Subject: [PATCH 14/15] Raw trends analysis and legacy TK resolvable push. --- 02_raw_trends.R | 125 +++++++++++++++++++++++++++++++++++++++++++++ NHD_HR_Lake_Pull.R | 80 +++++++++-------------------- 2 files changed, 149 insertions(+), 56 deletions(-) create mode 100755 02_raw_trends.R diff --git a/02_raw_trends.R b/02_raw_trends.R new file mode 100755 index 0000000..c5c219a --- /dev/null +++ b/02_raw_trends.R @@ -0,0 +1,125 @@ +#### This script is an initial analysis of lake surface temperature trends +#### using temperatures from the Landsat. ITS VERY PRELIMINARY and hacked together using +#### two different EE pulls. +library(tidyverse) +library(feather) +library(lubridate) +library(trend) +library(sf) +library(scales) + +l7l8 <- read_feather('data/out/landsat_temps.feather') +l7l8 <- l7l8 %>% + select(areakm, sat, date, distance, site_id, temp_ls, temp_ls_qa) %>% + filter(sat=='LANDSAT_7') + +ggplot(l7l8, aes(x=temp_ls)) + geom_histogram() + +l8l9 <- read_csv('/Users/stopp/Documents/repos/landsat-lake-temp-pull/data/out/L8_L9_cb_temps_munged.csv') +l8l9 <- l8l9 %>% + select(areakm, sat = SPACECRAFT_ID, date, distance, site_id, temp_ls = ls_temp, temp_ls_qa = ls_temp_qa) %>% + mutate(sat=as.character(sat)) + +ggplot(l8l9, aes(x=temp_ls)) + geom_histogram() + +ls_temps <- bind_rows(l7l8,l8l9) %>% + mutate(site_id = factor(site_id)) + +rm(l7l8,l8l9) + +seasonally_aggregated <- ls_temps %>% + mutate(month = month(date), + year = year(date), + season = case_when((month %in% c(6,7,8)) ~ 'JJA', + (month %in% c(9,10,11)) ~ "SON", + (month %in% c(12,1,2)) ~ 'DJF', + (month %in% c(3,4,5)) ~'MAM'), + season=factor(season)) %>% + group_by(site_id, year, season, distance, areakm) %>% + summarize(temp_ls = mean(temp_ls), + count = n()) + +seasonally_aggregated <- seasonally_aggregated %>% filter(count > 1) + +counts <- seasonally_aggregated %>% + group_by(site_id, season) %>% + mutate(count = n()) + +seasonally_aggregated <- seasonally_aggregated[counts$count > 20,] + +trends <- seasonally_aggregated %>% + group_by(site_id, season) %>% + arrange(year) %>% + nest() %>% + mutate(mk = purrr::map(data, ~sens.slope(.$temp_ls)), + sen.slope = purrr::map_dbl(mk, 'estimates'), + p.value = purrr::map_dbl(mk, 'p.value')) %>% + select(site_id, season, sen.slope, p.value) + +ggplot(trends, aes(x=sen.slope, fill = season)) + + geom_density(alpha=.4) + + xlim(-.3,.4) + + geom_vline(aes(xintercept=0), color = 'red') + + scale_fill_viridis_d() + +### Look at spatial trends +lakes_sf <- read_csv('data/in/lake_metadata_20211217.csv') %>% + st_as_sf(coords = c('lake_lon_deg','lake_lat_deg'), crs = 4326) %>% + select(site_id, elevation_m, area_m2) %>% + st_transform(5070) + +trends_sf <- lakes_sf %>% inner_join(trends) + +usa <- maps::map('usa', plot = F) %>% st_as_sf() %>% st_transform(5070) + +grid <- st_make_grid(usa, cellsize = c(75000,75000), square = F) %>% st_as_sf() %>% mutate(ID = row_number()) + +grid_means <- grid %>% st_join(trends_sf %>% filter(abs(sen.slope) < .4) %>%st_transform(st_crs(grid)), left = F) %>% + st_set_geometry(NULL) %>% group_by(ID, season) %>% + summarize(mean_trend = mean(sen.slope)) + +means_sf <- grid %>% inner_join(grid_means) + +ggplot() + + geom_sf(data = usa) + + geom_sf(data = means_sf, aes(fill = mean_trend)) + + scale_fill_gradient2(low = muted('blue'), high=muted('red'), 'Mean Trend (°C/yr)') + + ggthemes::theme_map(base_size = 11) + + theme(legend.position = 'bottom') + + facet_wrap(~season) + + ggtitle("Mean Trend in Lake Temperature Since 1999") + +### Binned by Elevation +trends_sf %>% + st_set_geometry(NULL) %>% + mutate(elevation_bin = cut_interval(elevation_m, 5)) %>% + ggplot(aes(x=elevation_bin,y = sen.slope,fill=elevation_bin)) + + coord_cartesian(ylim=c(-.15,.25)) + + geom_violin() + + geom_boxplot(width=.2) + + scale_fill_viridis_d(option='plasma', alpha=.3) + + theme(axis.text.x = element_text(angle=45,hjust=1,vjust=1), + legend.position = 'none') + + labs(x = 'Elevation (m)', y = 'Trend (°C/yr)') + +### Binnned by lake size +trends_sf %>% + st_set_geometry(NULL) %>% + mutate(size_bin = cut_number(area_m2/1e6, 10)) %>% + ggplot(aes(x=size_bin,y = sen.slope,fill=size_bin)) + + coord_cartesian(ylim=c(-.15,.25)) + + geom_violin() + + geom_boxplot(width=.2) + + scale_fill_viridis_d(option='plasma', alpha=.3) + + theme(axis.text.x = element_text(angle=45,hjust=1,vjust=1), + legend.position = 'none') + + labs(x = 'Lake Size Bin (km2)', y = 'Trend (°C/yr)') + +seasonal_summary <- seasonally_aggregated %>% + group_by(season, site_id) %>% + mutate(temp_scaled = scale(temp_ls)) %>% + ungroup() %>% + group_by(season, year) %>% + summarise(mean_temp= mean(temp_scaled), + sd=sd(temp_scaled)) + diff --git a/NHD_HR_Lake_Pull.R b/NHD_HR_Lake_Pull.R index 239e090..e364dc7 100755 --- a/NHD_HR_Lake_Pull.R +++ b/NHD_HR_Lake_Pull.R @@ -1,64 +1,14 @@ ## Mess around with tyler kings stuff -#resolvable <- vroom::vroom('/Users/stopp/Downloads/NHDWaterbody_resolvable.csv') library(tidyverse) -library(dataRetrieval) -library(stringr) library(sf) -library(httr) -library(sbtools) -sf::sf_use_s2(FALSE) library(mapview) +library(feather) mapviewOptions(fgb=FALSE) -#library(smoothr) -# -# ## Check the layers -# st_layers('/Users/stopp/Downloads/NHD_H_National_GDB/NHD_H_National_GDB.gdb') -# -# ## Pull the first few waterbodies to check the fields -# check_fields <- st_read('/Users/stopp/Downloads/NHD_H_National_GDB/NHD_H_National_GDB.gdb', -# query = 'SELECT * FROM "NHDWaterbody" WHERE FID < 5', layer = 'NHDWaterbody') -# -# ## Set up a query to just bring in the fields we want for lakes and reservoirs (get rid of swamps, etc) -# lakes <- st_read('/Users/stopp/Downloads/NHD_H_National_GDB/NHD_H_National_GDB.gdb', -# query = 'SELECT PERMANENT_IDENTIFIER, GNIS_NAME, AREASQKM, ELEVATION, FTYPE, FCODE FROM "NHDWaterbody" WHERE FTYPE = 361 OR FTYPE = 436 OR FTYPE = 390', layer = 'NHDWaterbody') #%>% -# -# #### Simplify everything in chunks -# step = 100000 -# for(i in seq(0,nrow(lakes),step)){ -# -# end <- min(i+step, nrow(lakes)) -# # For re-runs, skip what's been done -# files <- list.files('data/out/tmp_waterbody_chunks') -# if(T %in% grepl(sprintf('%i_%i.shp',i+1,end),files)) next -# -# ## Chunk and simplify -# lakes_out <- lakes[(i+1):end,] %>% -# st_cast(., "MULTIPOLYGON")%>% -# # bad Idaho site! -# filter(PERMANENT_IDENTIFIER != '{5BEDE13F-C94B-4501-B979-E00C29EA374B}') %>% -# sf::st_transform(crs = 4326) %>% -# sf::st_zm() %>% -# sf::st_buffer(dist = 0) %>% -# sf::st_simplify(preserveTopology = TRUE, dTolerance = 0.0001) -# -# st_write(lakes_out,sprintf('data/out/tmp_waterbody_chunks/%i_%i.shp',i+1,i+step)) -# } -# -# rm(lakes) -# -# chunks <- list.files('data/out/tmp_waterbody_chunks', pattern='*.shp',full.names = T) -# lakes_simplified <- purrr::map_dfr(chunks, st_read) -# -# object.size(lakes_simplified) -# -# length(unique(lakes_simplified$PERMANE)) -# -# mapview(sample_n(lakes_simplified,5000)) -# -# st_write(lakes_simplified, 'data/out/NHD_waterbodies/NHD_waterbodies_simplified.shp') tk_resolvable <- vroom::vroom('/Users/stopp/Downloads/NHDWaterbody_resolvable.csv') + +### Read in the exports from EE and do a little munging files <- list.files('../../../My Drive/EE_DP_Exports', full.names = T) options(digits=15) @@ -87,15 +37,17 @@ for(i in files[2:length(files)]){ dp_national <- dp_national %>% select(areasqkm:lat) -library(feather) -write_feather(dp_national, 'data/out/nhd_hr_1ha_deepest_point_noTX.feather') +#write_feather(dp_national, 'data/out/nhd_hr_1ha_deepest_point_noTX.feather') +dp_national <- read_feather('data/out/nhd_hr_1ha_deepest_point_noTX.feather') +tk_resolvable <- vroom::vroom('/Users/stopp/Downloads/NHDWaterbody_resolvable.csv') match_ups <- dp_national %>% inner_join(.,tk_resolvable, by = c('permanent'='permanent_identifier')) + rm(dp_national, tk_resolvable) -match_ups <- match_ups %>% mutate(resolvable = ifelse(resolvable == 'resolvable', 'Konrad resolvable', 'Konrad not resolvable')) +match_ups <- match_ups %>% mutate(resolvable = ifelse(resolvable == 1, 'Konrad resolvable', 'Konrad not resolvable')) thresholds <- tibble(distance = c(20,30,40,50,60,70,80,90)) @@ -135,3 +87,19 @@ tk_non_resolvable %>% color='Res. Threshold (n disagreement)') + +tk30_res <- tk_resolvable %>% filter(distance < 30) +tk30_not_res <- tk_non_resolvable %>% filter(distance >30) + +ggplot(tk30_res, aes(x = areasqkm)) + + geom_histogram() + +ggplot(tk30_not_res, aes(x= areasqkm)) + + geom_histogram() + + scale_x_log10() + + +tk30_res %>% slice_sample(prop=.2) %>% + st_as_sf(., coords=c('long','lat'), crs=4326) %>% + mapview(zcol = 'distance') + From 2c497a4ac4446e9fde17be260b484e1893a7a422 Mon Sep 17 00:00:00 2001 From: Simon Topp <33098707+SimonTopp@users.noreply.github.com> Date: Thu, 9 Feb 2023 15:16:47 -0500 Subject: [PATCH 15/15] add trends --- 02_raw_trends.R | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/02_raw_trends.R b/02_raw_trends.R index c5c219a..cea5a1d 100755 --- a/02_raw_trends.R +++ b/02_raw_trends.R @@ -115,11 +115,21 @@ trends_sf %>% legend.position = 'none') + labs(x = 'Lake Size Bin (km2)', y = 'Trend (°C/yr)') +### Look at actual timeseries to see if they look really goofy seasonal_summary <- seasonally_aggregated %>% - group_by(season, site_id) %>% + ungroup() %>% + mutate(area_bin = cut_number(areakm, 10)) %>% + group_by(season, site_id, area_bin) %>% mutate(temp_scaled = scale(temp_ls)) %>% ungroup() %>% - group_by(season, year) %>% + group_by(season, year, area_bin) %>% summarise(mean_temp= mean(temp_scaled), sd=sd(temp_scaled)) +### +ggplot(seasonal_summary, aes(x=year, y = mean_temp, color = area_bin, fill = area_bin)) + + geom_ribbon(aes(ymin = mean_temp-sd, ymax=mean_temp+sd), alpha = .3) + + geom_line() + + facet_wrap(~season) + +