diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 index 73e0036..92b9010 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,6 @@ data/ 2_batch_summary.R sites_with_comid.csv /obsolete +.aider* +nadp/ +*.csv diff --git a/1_batch_delineation.Rmd b/1_batch_delineation.Rmd old mode 100644 new mode 100755 diff --git a/1_doe_maps.Rmd b/1_doe_maps.Rmd old mode 100644 new mode 100755 diff --git a/2_batch_summary_nhd.R b/2_batch_summary_nhd.R old mode 100644 new mode 100755 index 7c875be..b479cc1 --- a/2_batch_summary_nhd.R +++ b/2_batch_summary_nhd.R @@ -1,5 +1,6 @@ #batch summarizing of watershed data for the continental USA #Mike Vlah (vlahm13@gmail.com) +#updated 2022-07-28 #use these tools to: #1. acquire NHDPlusV2 COMIDs based on lat/long @@ -16,23 +17,43 @@ #NOTE: these tools necessarily download a lot of large datasets and store them #in memory. keep an eye on your usage. -library(stringr) -library(dplyr) + library(plyr) -# devtools::install_github("USGS-R/nhdplusTools") +library(tidyverse) library(nhdplusTools) -# devtools::install_github("jsta/nhdR") library(nhdR) library(RCurl) library(sf) -# library(MODISTools) +library(mapview) + +mv = mapview::mapview + +options(timeout = 5000) + +#establish save location for NHD HR files (some place you don't mind accumulating a few gigs). +#you could make it a temporary directory. +nhd_hr_dir = '~/git/macrosheds/data_acquisition/data/general/nhd_hr' + +#this is where maps of site, NHDPlusV2 flowlines, NHD HR flowlines will be saved +mapview_save_dir <- '~/git/charlotte_observer/poultry_farms/out/monitoring_location_maps' # 1. setup and helper functions #### -setwd('~/Desktop/untracked/watershed_geohax/') -sites = read.csv('site_data.csv', stringsAsFactors=FALSE) WGS84 = 4326 #EPSG code for coordinate reference system +#create site data for example run +sites = tribble( + ~region, ~sitecode, ~sitename, ~latitude, ~longitude, + 'AZ', 'SC', 'Sycamore Creek', 33.7532, -111.5060, + 'CT', 'Unio', 'Farmington River at Unionville', 41.7555, -72.8870, + 'MD', 'POBR', 'Pond Branch', 39.4803, -76.6875, + 'NC', 'UEno', 'East Eno River', 36.1359, -79.1588, + 'VT', 'Pass', 'Passumpsic River', 44.3656, -72.0393, + 'WI', 'BRW', 'Brewery Creek', 43.1250, -89.6350 +) +# ) %>% st_as_sf(coords = c('longitude', 'latitude'), crs = WGS84) + + comid_from_point = function(lat, long, CRS) { pt = sf::st_point(c(long, lat)) ptc = sf::st_sfc(pt, crs=CRS) @@ -48,6 +69,36 @@ vpu_from_point = function(lat, long, CRS) { return(VPU) } +get_nearby_nhd <- function(lat, long, CRS, buf_dist = 1000){ + + site = sf::st_point(c(long, lat)) %>% sf::st_sfc(crs = CRS) + + site_buf <- sf::st_buffer(x = site, + dist = buf_dist) + site_box <- st_bbox(site_buf) + + subset_file <- tempfile(fileext = '.gpkg') + + subset <- try({ + nhdplusTools::subset_nhdplus(bbox = site_box, + output_file = subset_file, + nhdplus_data = 'download', + return_data = TRUE, + overwrite = FALSE, + out_prj = 4326) %>% + suppressMessages() + }, silent = TRUE) + + if(inherits(subset, 'try-error') || ! length(subset)){# || nrow(subset[[1]]) < 2){ + print('incrementing buffer distance by 500') + buf_dist <- buf_dist + 500 + if(buf_dist > 20000) stop('no streamlines within 20000 meter radius?') + get_nearby_nhd(lat = lat, long = long, CRS = CRS, buf_dist = buf_dist) + } else { + return(list(nhd_subset = subset, bbox = site_box)) + } +} + #this calculates how far along a reach any given point falls. That way when we pull in #watershed summary data for a reach, we can adjust it according to how much #of the total upstream area actually contributes to the point in question. @@ -60,6 +111,14 @@ calc_reach_prop = function(VPU, COMID, lat, long, CRS, quiet=FALSE){ ' Fortunately, each component need only be downloaded once.')) } + # url <- nhdR:::get_plus_remotepath(14, component = 'NHDSnapshot') + # cc = get_nhdplus(comid = COMID) + # nhdplusTools::download_nhdplusv2(outdir = '~/Desktop/untracked/nhdplusv2', + # url = 'https://edap-ow-data-commons.s3.amazonaws.com/NHDPlusV21/Data/NHDPlusSA/NHDPlus03N/NHDPlusV21_SA_03N_03a_CatSeed_01.7z') + # nhdplusTools::download_nhdplusv2(outdir = '~/Desktop/untracked/nhdplusv2') + # nhd_plus_get(vpu = VPU, "NHDSnapshot") + # nhd_plus_get(vpu = VPU, component = "NHDSnapshot", force_unzip = TRUE) + # nhd_plus_get(vpu = VPU, component = "NHDPlusAttributes", force_unzip = TRUE) fl = nhdR::nhd_plus_load(vpu=VPU, component='NHDSnapshot', dsn='NHDFlowline', approve_all_dl=TRUE) fl_etc = nhdR::nhd_plus_load(vpu=VPU, component='NHDPlusAttributes', @@ -218,7 +277,7 @@ streamcat_bulk = function(site_df, streamcat_sets){ # 2. get NHDPlusV2 data #### #COMID is the NHD identifier for any reach in the continental U.S. -#add COMIDs to your site table. +#add COMIDs to your site table. If this doesn't work, try updating nhdplusTools sites$COMID = unlist(mapply(comid_from_point, sites$latitude, sites$longitude, WGS84)) sites = sites[! is.na(sites$COMID),] @@ -228,8 +287,88 @@ sites = sites[! is.na(sites$COMID),] sites$VPU = unlist(mapply(vpu_from_point, sites$latitude, sites$longitude, WGS84)) -sites$reach_proportion = unlist(mapply(calc_reach_prop, sites$VPU, sites$COMID, - sites$latitude, sites$longitude, WGS84, quiet=TRUE)) +#loop through sites and verify that they're actually on NHDPlusV2 flowlines, +#rather than NHD HR flowlines, for which we don't yet have StreamCat summaries. +#this loop will also calculate reach proportions +sites$reach_proportion = NA +sites$nhd_network = NA + +prev_huc4 <- 'none' +for(i in seq_len(nrow(sites))){ + + print('---') + site <- sites[i, ] + print(paste0(i, ': ', site$sitecode)) + + nhdchunk = suppressWarnings(get_nearby_nhd( + lat = site$latitude, + long = site$longitude, + CRS = WGS84)) + + subset <- nhdchunk$nhd_subset + site_box <- nhdchunk$bbox + + site_sf <- sf::st_point(c(site$longitude, site$latitude)) %>% + sf::st_sfc(crs = WGS84) + + huc12 <- get_huc12(site_sf) + print(paste('HUC12:', huc12$huc12)) + huc4 <- substr(huc12$huc12, 1, 4)[1] + nhdplusTools::download_nhdplushr(nhd_hr_dir, hu_list = huc4) %>% invisible() + + if(huc4 != prev_huc4){ + + HRflowlines <- nhdplusTools::get_nhdplushr( + file.path(nhd_hr_dir, substr(huc4, 1, 2)), + file.path(nhd_hr_dir, paste0(huc4, '.gpkg')), + layers = 'NHDFlowline', + proj = 4326 + )$NHDFlowline + + } else { + print('using previous NHD HR HUC') + } + + prev_huc4 <- huc4 + + NHD_HR <- suppressWarnings(sf::st_crop(HRflowlines, site_box)) + + NHDPlus <- subset$NHDFlowline_Network + # catchments <- subset$CatchmentSP + # upstream <- nhdplusTools::get_UT(flowlines, comid) + + x = suppressWarnings(nhdplusTools::get_flowline_index(NHDPlus, points=site_sf)) + sites$reach_proportion[i] = 1 - x$REACH_meas / 100 #0=upstream end; 1=downstream end + + dist_to_nearest_NHDPlus_flowline <- min(st_distance(NHDPlus, site_sf)) + print(paste('Dist to NHDPlus:', round(dist_to_nearest_NHDPlus_flowline, 2), 'm')) + dist_to_nearest_NHDHR_flowline <- min(st_distance(NHD_HR, site_sf)) + print(paste('Dist to NHD_HR:', round(dist_to_nearest_NHDHR_flowline, 2), 'm')) + + xx = mv(NHD_HR, color = 'darkslategray3') + mv(NHDPlus, color='deepskyblue4') + mv(site_sf, color='red') + mapview_save_path <- file.path(mapview_save_dir, paste0(site$sitecode, '.html')) + mapview::mapshot(xx, url = mapview_save_path) + print(paste('map saved to', mapview_save_path)) + print(xx) + + x <- readline(cat('This point is on: [A] an NHDPlus flowline, [B] an NHD_HR flowline (but not NHDPlus), or [C] neither >\n')) + + if(x == 'A'){ + sites[i, 'nhd_network'] <- 'NHDPlusV2' + } else if(x == 'B'){ + sites[i, 'COMID'] <- NA + sites[i, 'nhd_network'] <- 'NHD HR' + } else if(x == 'C'){ + sites[i, 'COMID'] <- NA + sites[i, 'nhd_network'] <- 'too small even for HR' + } else { + stop(paste("'A', 'B', or 'C'")) + } +} + +#usef to calculate reach proportion naively like this +# sites$reach_proportion = unlist(mapply(calc_reach_prop, sites$VPU, sites$COMID, +# sites$latitude, sites$longitude, WGS84, quiet=TRUE)) #construct list of DSN=component pairs to acquire. see NHDPlus docs for more. setlist = list('NHDPlusAttributes'='PlusFlowlineVAA', @@ -283,53 +422,3 @@ sites = sites[! duplicated(sites$sitecode),] #save yer data sites = arrange(sites, region, sitecode) write.csv(sites, 'watershed_summary_data.csv', row.names=FALSE) - - -# 4. get MODIS data (this section incomplete) #### -# VNP13A1 -mt_bands("MOD13Q1") -subset1 = mt_subset(product = "MOD13Q1", - lat = 40, - lon = -110, - band = "250m_16_days_NDVI", - start = "2004-01-01", - end = "2004-02-01", - km_lr = 10, - km_ab = 10, - site_name = "testsite", - internal = TRUE, - progress = FALSE) - -dfx = data.frame("site_name" = paste("test",1:2)) -dfx$lat = 40 -dfx$lon = -110 - -# test batch download -subsets = mt_batch_subset(dfx = dfx, - product = "MOD11A2", - band = "LST_Day_1km", - internal = TRUE, - start = "2004-01-01", - end = "2004-02-01") - - -#### SCRAPS #### - -# storage_path='/home/mike/.local/share/StreamCat' - -# subset = subset %>% -# st_as_sf(coords=c('longitude', 'latitude'), crs=4326) %>% -# st_transform(PROJ4) -# -# #get DEM, 14 is highest res, smallest area; 1 is lowest res, broadest area -# dem = get_elev_raster(subset, z=8) -# mapview(dem) + mapview(subset) -# -# devtools::install_github("jsta/nhdR") -# -# #convert to spatial object and project from WGS 84 -# # subset = subset %>% -# # st_as_sf(coords=c('longitude','latitude'), crs=4326) %>% -# # st_transform(PROJ4) -# - diff --git a/3_batch_summary_manual_delin.R b/3_batch_summary_manual_delin.R old mode 100644 new mode 100755 diff --git a/4_interactive_watershed_delineation.R b/4_interactive_watershed_delineation.R new file mode 100755 index 0000000..66600e9 --- /dev/null +++ b/4_interactive_watershed_delineation.R @@ -0,0 +1,1090 @@ +#2021-04-01 +#mike vlah +#vlahm13@gmail.com + +#if this can't delineate your watershed, email me! there are heaps of updates +#that i haven't yet incorporated into this version of the tool + +#documentation and helper functions are included in the function definition below. +#If you're in Rstudio, collapse the function with Alt+o (Windows/Linux) or Cmd+Opt+o (Mac). +#There's a demo call included at the bottom. +#Spaces in write_dir may cause errors on Windows. + +library(tidyverse) +library(glue) +library(sf) +library(elevatr) +library(raster) +library(whitebox) #this can't be installed from CRAN. google "install whitebox R" +library(terra) +library(mapview) +library(osmdata) #only needed if you want to burn streams into the DEM +#NOTE: you may also need to install the lwgeom, rgdal, and e1071 packages. +# (you'll be notified in error messages if so) + +delineate_watershed_from_point <- function(lat, + long, + crs, + write_dir, + write_name, + verbose = TRUE){ + + #lat: numeric representing latitude of the pour point in decimal degrees + # (negative indicates southern hemisphere) + #long: numeric representing longitude of the pour point in decimal degrees + # (negative indicates west of prime meridian) + #crs: numeric representing the coordinate reference system (e.g. 4326 for WSG84) + #write_dir: character. the directory in which to write output shapefile + #write_name: character. the basename of the shapefile components to be written. i.e. + # .shp, .shx, .prj, .dbf + #verbose: logical. determines the amount of informative messaging during run + + #details: Output will have CRS 4326 (WGS 84), though processing is done + # on projected data. the projection specifications are determined + # automatically, based on pour point location. Note that for mega-huge + # watersheds, this could result in an inaccurate watershed area calculation. + + #returns: a list containing the following components: + # watershed_area_ha: the area of the delineated watershed in hectares + # (meters squared divided by 10,000) + # buffer_radius_m: the width (meters) around the site location that was used when + # requesting a DEM (digital elevation model) + # snap_distance_m: the search radius (meters) around the pour point that was used + # to choose a stream to snap the pour point to. + # snap_method: either "standard", which snaps the pour point to the cell + # within snap_distance_m with highest flow accumulation, or "jenson", + # which snaps to the nearest flow line + # dem_resolution: passed to elevatr::get_elev_raster (z parameter). + # depends on supplied machine_status + # flat_increment: see whitebox::wbt_breach_depressions or + # whitebox::wbt_breach_depressions_least_cost + # breach_method: string. Either 'basic', indicating that + # whitebox::wbt_breach_depressions was used, or 'lc', indicating + # whitebox::wbt_breach_depressions_least_cost (which less readily + # alters the DEM) + # burn_streams: TRUE or FALSE, indicating whether + # whitebox::wbt_burn_streams_at_roads and whitebox::wbt_fill_burn were + # used on the DEM + + sm <- suppressMessages + sw <- suppressWarnings + + #the functions below are all helpers for subroutines of + # delineate_watershed_from_point. they should all stand alone pretty + # well, and some are generally useful. they are defined locally here, + # just for convenient distribution. + + #moving shapefiles can be annoying, since they're actually represented by + # 3-4 files + move_shapefiles <- function(shp_files, + from_dir, + to_dir, + new_name_vec = NULL){ + + #shp_files is a character vector of filenames with .shp extension + # (.shx, .prj, .dbf are handled internally and don't need to be listed) + #from_dir and to_dir are strings representing the source and destination + # directories, respectively + #new_name_vec is an optional character vector of new names for each shape file. + # these can end in ".shp", but don't need to + + if(any(! grepl('\\.shp$', shp_files))){ + stop('All components of shp_files must end in ".shp"') + } + + if(length(shp_files) != length(new_name_vec)){ + stop('new_name_vec must have the same length as shp_files') + } + + dir.create(to_dir, + showWarnings = FALSE, + recursive = TRUE) + + for(i in 1:length(shp_files)){ + + shapefile_base <- strsplit(shp_files[i], '\\.shp')[[1]] + + files_to_move <- list.files(path = from_dir, + pattern = shapefile_base) + + extensions <- str_match(files_to_move, + paste0(shapefile_base, '(\\.[a-z]{3})'))[, 2] + + if(is.null(new_name_vec)){ + new_name_base <- rep(shapefile_base, length(files_to_move)) + } else { + new_name_base <- strsplit(new_name_vec[i], '\\.shp$')[[1]] + new_name_base <- rep(new_name_base, length(files_to_move)) + } + + tryCatch({ + + #try to move the files (may fail if they are on different partitions) + mapply(function(x, nm, ext) file.rename(from = paste(from_dir, + x, + sep = '/'), + to = glue('{td}/{n}{ex}', + td = to_dir, + n = nm, + ex = ext)), + x = files_to_move, + nm = new_name_base, + ext = extensions) + + }, warning = function(w){ + + #if that fails, copy them and then delete them + mapply(function(x, nm, ext) file.copy(from = paste(from_dir, + x, + sep = '/'), + to = glue('{td}/{n}{ex}', + td = to_dir, + n = nm, + ex = ext), + overwrite = TRUE), + x = files_to_move, + nm = new_name_base, + ext = extensions) + + lapply(paste(from_dir, + files_to_move, + sep = '/'), + unlink) + }) + } + + #return() + } + + #prompt users for stuff, provide single-character responses, + # reprompt if they don't choose one of the expected responses + get_response_1char <- function(msg, + possible_chars, + subsequent_prompt = FALSE){ + + #msg: character. a message that will be used to prompt the user + #possible_chars: character vector of acceptable single-character responses + #subsequent prompt: not to be set directly. This is handled by + # get_response_mchar during recursion. + + if(subsequent_prompt){ + cat(paste('Please choose one of:', + paste(possible_chars, + collapse = ', '), + '\n> ')) + } else { + cat(msg) + } + + ch <- as.character(readLines(con = stdin(), 1)) + + if(length(ch) == 1 && ch %in% possible_chars){ + return(ch) + } else { + get_response_1char(msg = msg, + possible_chars = possible_chars, + subsequent_prompt = TRUE) + } + } + + #prompt users for stuff, provide multi-character responses, + # reprompt if they don't choose one of the expected responses + get_response_mchar <- function(msg, + possible_resps, + allow_alphanumeric_response = TRUE, + subsequent_prompt = FALSE){ + + #msg: character. a message that will be used to prompt the user + #possible_resps: character vector. If length 1, each character in the response + # will be required to match a character in possible_resps, and the return + # value will be a character vector of each single-character tokens in the + # response. If + # length > 1, the response will be required to match an element of + # possible_resps exactly, and the response will be returned as-is. + #allow_alphanumeric_response: logical. If FALSE, the response may not + # include both numerals and letters. Only applies when possible_resps + # has length 1. + #subsequent prompt: not to be set directly. This is handled by + # get_response_mchar during recursion. + + split_by_character <- ifelse(length(possible_resps) == 1, TRUE, FALSE) + + if(subsequent_prompt){ + + if(split_by_character){ + pr <- strsplit(possible_resps, split = '')[[1]] + } else { + pr <- possible_resps + } + + cat(paste('Your options are:', + paste(pr, + collapse = ', '), + '\n> ')) + } else { + cat(msg) + } + + chs <- as.character(readLines(con = stdin(), 1)) + + if(! allow_alphanumeric_response && + split_by_character && + grepl('[0-9]', chs) && + grepl('[a-zA-Z]', chs)){ + + cat('Response may not include both letters and numbers.\n> ') + resp <- get_response_mchar( + msg = msg, + possible_resps = possible_resps, + allow_alphanumeric_response = allow_alphanumeric_response, + subsequent_prompt = FALSE) + + return(resp) + } + + if(length(chs)){ + if(split_by_character){ + + if(length(possible_resps) != 1){ + stop('possible_resps must be length 1 if split_by_character is TRUE') + } + + chs <- strsplit(chs, split = '')[[1]] + possible_resps_split <- strsplit(possible_resps, split = '')[[1]] + + if(all(chs %in% possible_resps_split)){ + return(chs) + } + + } else { + + if(length(possible_resps) < 2){ + stop('possible_resps must have length > 1 if split_by_character is FALSE') + } + + if(any(possible_resps == chs)){ + return(chs) + } + } + } + + resp <- get_response_mchar( + msg = msg, + possible_resps = possible_resps, + allow_alphanumeric_response = allow_alphanumeric_response, + subsequent_prompt = TRUE) + + return(resp) + } + + #chooose an appropriate projection, based on location + choose_projection <- function(lat = NULL, + long = NULL, + unprojected = FALSE){ + + if(unprojected){ + PROJ4 <- glue('+proj=longlat +datum=WGS84 +no_defs ', + '+ellps=WGS84 +towgs84=0,0,0') + return(PROJ4) + } + + if(is.null(lat) || is.null(long)){ + stop('If projecting, lat and long are required.') + } + + abslat <- abs(lat) + + #this is what the makers of https://projectionwizard.org/# use to choose + #a suitable projection: https://rdrr.io/cran/rCAT/man/simProjWiz.html + # THIS WORKS (PROJECTS STUFF), BUT CAN'T BE READ AUTOMATICALLY BY st_read + if(abslat < 70){ #tropical or temperate + PROJ4 <- glue('+proj=cea +lon_0={lng} +lat_ts=0 +x_0=0 +y_0=0 ', + '+ellps=WGS84 +datum=WGS84 +units=m +no_defs', + lng = long) + } else { #polar + PROJ4 <- glue('+proj=laea +lat_0={lt} +lon_0={lng} +x_0=0 +y_0=0 ', + '+ellps=WGS84 +datum=WGS84 +units=m +no_defs', + lt = lat, + lng = long) + } + + return(PROJ4) + } + + #choose appropriate granularity of the elevation model, + # based on the approximate size of the task (area potentially covered) + choose_dem_resolution <- function(dev_machine_status, + buffer_radius){ + + if(dev_machine_status == '1337'){ + dem_resolution <- case_when( + buffer_radius <= 1e4 ~ 12, + buffer_radius == 1e5 ~ 11, + buffer_radius == 1e6 ~ 10, + buffer_radius == 1e7 ~ 8, + buffer_radius == 1e8 ~ 6, + buffer_radius == 1e9 ~ 4, + buffer_radius >= 1e10 ~ 2) + } else if(dev_machine_status == 'n00b'){ + dem_resolution <- case_when( + buffer_radius <= 1e4 ~ 10, + buffer_radius == 1e5 ~ 8, + buffer_radius == 1e6 ~ 6, + buffer_radius == 1e7 ~ 4, + buffer_radius == 1e8 ~ 2, + buffer_radius >= 1e9 ~ 1) + } else { + stop('dev_machine_status must be either "1337" or "n00b"') + } + + return(dem_resolution) + } + + #for determining whether the DEM extent wasn't big enough to allow full + # delineation + raster_intersection_summary <- function(wb, + dem){ + + #wb is a delineated watershed boundary as a rasterLayer + #dem is a DEM rasterLayer + + summary_out <- list() + + #convert wb to sf object (there are several benign but seemingly uncatchable + # garbage collection errors here) + wb <- sf::st_as_sf(raster::rasterToPolygons(wb)) + + #get edge of DEM as sf object + dem_edge <- raster::focal(x = dem, #the terra version doesn't retain NA border + fun = function(x, ...) return(0), + w = matrix(1, nrow = 3, ncol = 3)) %>% + raster::reclassify(rcl = matrix(c(0, NA, #second, set inner cells to NA + NA, 1), #first, set outer cells to 1... yup. + ncol = 2)) %>% + raster::rasterToPolygons() %>% + sf::st_as_sf() + # dem_edge <- raster::boundaries(dem) %>% + # # classes = TRUE, + # # asNA = FALSE) %>% + # raster::reclassify(rcl = matrix(c(0, NA), #set inner cells to NA + # ncol = 2)) %>% + # raster::rasterToPolygons() %>% + # sf::st_as_sf() + + #tally raster cells + summary_out$n_wb_cells <- length(wb$geometry) + summary_out$n_dem_cells <- length(dem_edge$geometry) + + #tally intersections; calc percent of wb cells that overlap + intersections <- sf::st_intersects(wb, dem_edge) %>% + as.matrix() %>% + apply(MARGIN = 2, + FUN = sum) %>% + table() + + true_intersections <- sum(intersections[names(intersections) > 0]) + + summary_out$n_intersections <- true_intersections + summary_out$pct_wb_cells_intersect <- true_intersections / + summary_out$n_wb_cells * 100 + + return(summary_out) + } + + #handles ephemeral download errors + expo_backoff <- function(expr, + max_attempts = 5, + verbose = TRUE){ + + for(attempt_i in seq_len(max_attempts)){ + + results <- try(expr = expr, + silent = TRUE) + + if(inherits(results, 'try-error')){ + + if(attempt_i == max_attempts){ + stop(attr(results, 'condition')) + } + + backoff <- runif(n = 1, + min = 0, + max = 2^attempt_i - 1) + + if(verbose){ + print(glue("Backing off for ", round(backoff, 1), " seconds.")) + } + + Sys.sleep(backoff) + + } else { + + # if(verbose){ + # print(paste0("Request succeeded after ", attempt_i, " attempt(s).")) + # } + + break + } + } + + return(results) + } + + #for retrieving openStreetMap layers (roads) + get_osm_roads <- function(extent_raster, outfile = NULL){ + + #extent_raster: either a terra spatRaster or a rasterLayer. The output + # roads will have the same crs, and roughly the same extent, as this raster. + #outfile: string. If supplied, output shapefile will be written to this + # location. If not supplied, the output will be returned. + + message('Downloading roads layer from OpenStreetMaps') + + extent_raster <- terra::rast(extent_raster) + # rast_crs <- as.character(extent_raster@crs) + rast_crs <- terra::crs(extent_raster, + proj4 = TRUE) + + extent_raster_wgs84 <- terra::project(extent_raster, + y = 'epsg:4326') + + dem_bounds <- terra::ext(extent_raster_wgs84)[c(1, 3, 2, 4)] + + highway_types <- c('motorway', 'trunk', 'primary', 'secondary', 'tertiary') + highway_types <- c(highway_types, + paste(highway_types, 'link', sep = '_')) + + roads_query <- osmdata::opq(dem_bounds) %>% + osmdata::add_osm_feature(key = 'highway', + value = highway_types) + + roads <- osmdata::osmdata_sf(roads_query) + roads <- roads$osm_lines$geometry + + # plot(roads$osm_lines, max.plot = 1) + + roads_proj <- roads %>% + sf::st_transform(crs = rast_crs) %>% + sf::st_union() %>% + # sf::st_transform(crs = WGS84) %>% + sf::st_as_sf() %>% + rename(geometry = x) %>% + mutate(FID = 0:(n() - 1)) %>% + dplyr::select(FID, geometry) + + if(! is.null(outfile)){ + + sf::st_write(roads_proj, + dsn = outfile, + layer = 'roads', + driver = 'ESRI Shapefile', + delete_layer = TRUE, + quiet = TRUE) + + message(paste('OSM roads layer written to', outfile)) + + } else { + return(roads_proj) + } + } + + #for retrieving openStreetMap layers (streams) + get_osm_streams <- function(extent_raster, outfile = NULL){ + + #extent_raster: either a terra spatRaster or a rasterLayer. The output + # streams will have the same crs, and roughly the same extent, as this raster. + #outfile: string. If supplied, output shapefile will be written to this + # location. If not supplied, the output will be returned. + + message('Downloading streams layer from OpenStreetMaps') + + extent_raster <- terra::rast(extent_raster) + # rast_crs <- as.character(extent_raster@crs) + rast_crs <- terra::crs(extent_raster, + proj4 = TRUE) + + extent_raster_wgs84 <- terra::project(extent_raster, + y = 'epsg:4326') + + dem_bounds <- terra::ext(extent_raster_wgs84)[c(1, 3, 2, 4)] + + streams_query <- osmdata::opq(dem_bounds) %>% + osmdata::add_osm_feature(key = 'waterway', + value = c('river', 'stream')) + + streams <- osmdata::osmdata_sf(streams_query) + streams <- streams$osm_lines$geometry + + streams_proj <- streams %>% + sf::st_transform(crs = rast_crs) %>% + sf::st_union() %>% + # sf::st_transform(crs = WGS84) %>% + sf::st_as_sf() %>% + rename(geometry = x) %>% + mutate(FID = 0:(n() - 1)) %>% + dplyr::select(FID, geometry) + + if(! is.null(outfile)){ + + sf::st_write(streams_proj, + dsn = outfile, + layer = 'streams', + driver = 'ESRI Shapefile', + delete_layer = TRUE, + quiet = TRUE) + + message(paste('OSM streams layer written to', outfile)) + + } else { + return(streams_proj) + } + } + + #the function that runs the workhorse + delineate_watershed_apriori_recurse <- function(lat, + long, + crs, + site_name, + dem_resolution = NULL, + flat_increment = NULL, + breach_method = 'lc', + burn_streams = FALSE, + scratch_dir = tempdir(), + write_dir, + dev_machine_status = 'n00b', + verbose = FALSE){ + + #This function calls delineate_watershed_apriori recursively, taking + # user input after each call, until the user selects a delineation + # or aborts. For parameter documentation, see delineate_watershed_apriori. + + # tmp <- tempdir() + scratch_dir <- stringr::str_replace_all(scratch_dir, '\\\\', '/') + + inspection_dir <- delineate_watershed_apriori( + lat = lat, + long = long, + crs = crs, + site_name = site_name, + dem_resolution = dem_resolution, + flat_increment = flat_increment, + breach_method = breach_method, + burn_streams = burn_streams, + scratch_dir = scratch_dir, + dev_machine_status = dev_machine_status, + verbose = verbose) + + files_to_inspect <- list.files(path = inspection_dir, + pattern = '.shp') + + temp_point <- glue(scratch_dir, '/', 'POINT') + + tibble(longitude = long, latitude = lat) %>% + sf::st_as_sf(coords = c('longitude', 'latitude'), + crs = crs) %>% + sf::st_write(dsn = temp_point, + driver = 'ESRI Shapefile', + delete_dsn = TRUE, + quiet = TRUE) + + nshapes <- length(files_to_inspect) + + wb_selections <- paste(paste0('[', + c(1:nshapes, 'S', 'B', 'R', 'I', 'a'), + ']'), + c(files_to_inspect, + 'Burn streams into the DEM (may handle road-stream intersections)', + 'Use more aggressive breaching method', + 'Select DEM resolution', + 'Set flat_increment', + # 'Next (skip this one for now)', + 'Abort delineation'), + sep = ': ', + collapse = '\n') + + helper_code <- glue('{id}.\nmapview::mapviewOptions(fgb = FALSE);', + 'mapview::mapview(sf::st_read("{wd}/{f}")) + ', + 'mapview::mapview(sf::st_read("{pf}"))', + id = 1:length(files_to_inspect), + wd = inspection_dir, + f = files_to_inspect, + pf = temp_point) %>% + paste(collapse = '\n\n') + + msg <- glue('Visually inspect the watershed boundary candidate shapefiles ', + 'by pasting the mapview lines below into a separate instance of R.\n\n{hc}\n\n', + 'Enter the number corresponding to the ', + 'one that looks most legit, or select one or more tuning ', + 'options (e.g. "SBRI" without quotes). You usually won\'t ', + 'need to tune anything. If you aren\'t ', + 'sure which delineation is correct, get a site manager to verify:\n', + 'request_site_manager_verification(type=\'wb delin\', ', + 'network, domain) [function not yet built]\n\nChoices:\n{sel}\n\nEnter choice(s) here > ', + hc = helper_code, + sel = wb_selections) + # td = inspection_dir) + + resp <- get_response_mchar( + msg = msg, + possible_resps = paste(c(1:nshapes, 'S', 'B', 'R', 'I', 'a'), + collapse = ''), + allow_alphanumeric_response = FALSE) + # + # if('n' %in% resp){ + # print(glue('Moving on. You haven\'t seen the last of {s}!', + # s = site_name)) + # return(1) + # } + + if('a' %in% resp){ + print(glue('Aborted. Any completed delineations have been saved.')) + return(2) + } + + if('S' %in% resp){ + burn_streams <- TRUE + } else { + burn_streams <- FALSE + } + + if('B' %in% resp){ + breach_method <- 'basic' + } else { + breach_method <- 'lc' + } + + if('R' %in% resp){ + dem_resolution <- get_response_mchar( + msg = paste0('Choose DEM resolution between 1 (low) and 14 (high)', + ' to pass to elevatr::get_elev_raster. For tiny ', + 'watersheds, use 12-13. For giant ones, use 8-9.\n\n', + 'Enter choice here > '), + possible_resps = paste(1:14)) + dem_resolution <- as.numeric(dem_resolution) + } + + if('I' %in% resp){ + + bm <- ifelse(breach_method == 'basic', + 'whitebox::wbt_breach_depressions', + 'whitebox::wbt_breach_depressions_least_cost') + + new_options <- paste(paste0('[', + c('S', 'M', 'L'), + ']'), + c('0.001', '0.01', '0.1'), + sep = ': ', + collapse = '\n') + + resp2 <- get_response_1char( + msg = glue('Pick the size of the elevation increment to pass to ', + bm, '.\n\n', new_options, '\n\nEnter choice here > '), + possible_chars = c('S', 'M', 'L')) + + flat_increment <- switch(resp2, + S = 0.001, + M = 0.01, + L = 0.1) + } + + if(! grepl('[0-9]', resp)){ + + selection <- delineate_watershed_apriori_recurse( + lat = lat, + long = long, + crs = crs, + site_name = site_name, + dem_resolution = dem_resolution, + flat_increment = flat_increment, + breach_method = breach_method, + burn_streams = burn_streams, + scratch_dir = scratch_dir, + write_dir = write_dir, + dev_machine_status = dev_machine_status, + verbose = verbose) + + return(selection) + } + + selection <- files_to_inspect[as.numeric(resp)] + + move_shapefiles(shp_files = selection, + from_dir = inspection_dir, + to_dir = write_dir, + new_name_vec = site_name) + + message(glue('Selection {s}:\n\t{sel}\nwas written to:\n\t{sdr}', + s = resp, + sel = selection, + sdr = write_dir)) + + return(selection) + } + + #the workhorse + delineate_watershed_apriori <- function(lat, + long, + crs, + site_name, + dem_resolution = NULL, + flat_increment = NULL, + breach_method = 'basic', + burn_streams = FALSE, + scratch_dir = tempdir(), + dev_machine_status = 'n00b', + verbose = FALSE){ + + #lat: numeric representing latitude in decimal degrees + # (negative indicates southern hemisphere) + #long: numeric representing longitude in decimal degrees + # (negative indicates west of prime meridian) + #crs: numeric representing the coordinate reference system (e.g. WSG84) + #dem_resolution: optional integer 1-14. the granularity of the DEM that is used for + # delineation. this argument is passed directly to the z parameter of + # elevatr::get_elev_raster. 1 is low resolution; 14 is high. If NULL, + # this is determined automatically. + #flat_increment: float or NULL. Passed to + # whitebox::wbt_breach_depressions_least_cost + # or whitebox::wbt_breach_depressions, depending on the value + # of breach_method (see next). + #breach_method: string. Either 'basic', which invokes whitebox::wbt_breach_depressions, + # or 'lc', which invokes whitebox::wbt_breach_depressions_least_cost + #burn_streams: logical. if TRUE, both whitebox::wbt_burn_streams_at_roads + # and whitebox::wbt_fill_burn are called on the DEM, using road and stream + # layers from OpenStreetMap. + #scratch_dir: the directory where intermediate files will be dumped. This + # is a randomly generated temporary directory if not specified. + #dev_machine_status: either '1337', indicating that your machine has >= 16 GB + # RAM, or 'n00b', indicating < 16 GB RAM. DEM resolution is chosen accordingly + #verbose: logical. determines the amount of informative messaging during run + + #returns the location of candidate watershed boundary files + + # tmp <- tempdir() + # tmp <- str_replace_all(tmp, '\\\\', '/') + + if(! is.null(dem_resolution) && ! is.numeric(dem_resolution)){ + stop('dem_resolution must be a numeric integer or NULL') + } + if(! is.null(flat_increment) && ! is.numeric(flat_increment)){ + stop('flat_increment must be numeric or NULL') + } + if(! breach_method %in% c('lc', 'basic')) stop('breach_method must be "basic" or "lc"') + if(! is.logical(burn_streams)) stop('burn_streams must be logical') + + inspection_dir <- glue(scratch_dir, '/INSPECT_THESE') + point_dir <- glue(scratch_dir, '/POINT') + dem_f <- glue(scratch_dir, '/dem.tif') + point_f <- glue(scratch_dir, '/point.shp') + streams_f <- glue(scratch_dir, '/streams.shp') + roads_f <- glue(scratch_dir, '/roads.shp') + d8_f <- glue(scratch_dir, '/d8_pntr.tif') + flow_f <- glue(scratch_dir, '/flow.tif') + + dir.create(path = inspection_dir, + showWarnings = FALSE) + + dir_clean <- list.files(inspection_dir) + + if(length(dir_clean) > 0) { + file.remove(paste(inspection_dir, dir_clean, sep = '/')) + } + + proj <- choose_projection(lat = lat, + long = long) + + site <- tibble(x = lat, + y = long) %>% + sf::st_as_sf(coords = c("y", "x"), + crs = crs) %>% + sf::st_transform(proj) + # sf::st_transform(4326) #WGS 84 (would be nice to do this unprojected) + + #prepare for delineation loops + buffer_radius <- 1000 + dem_coverage_insufficient <- FALSE + while_loop_begin <- TRUE + + #snap site to flowlines 3 different ways. delineate watershed boundaries (wb) + #for each unique snap. if the delineations get cut off, get more elevation data + #and try again + while(while_loop_begin || dem_coverage_insufficient){ + + while_loop_begin <- FALSE + + if(is.null(dem_resolution)){ + dem_resolution <- choose_dem_resolution( + dev_machine_status = dev_machine_status, + buffer_radius = buffer_radius) + } + + if(verbose){ + + if(is.null(flat_increment)){ + fi <- 'NULL (auto)' + } else { + fi <- as.character(flat_increment) + } + + print(glue('Delineation specs for this attempt:\n', + '\tsite_name: {st}; ', + 'dem_resolution: {dr}; flat_increment: {fi}\n', + '\tbreach_method: {bm}; burn_streams: {bs}', + st = site_name, + dr = dem_resolution, + fi = fi, + bm = breach_method, + bs = as.character(burn_streams), + .trim = FALSE)) + } + + site_buf <- sf::st_buffer(x = site, + dist = buffer_radius) + + dem <- expo_backoff( + expr = { + elevatr::get_elev_raster(locations = site_buf, + z = dem_resolution, + verbose = FALSE) + }, + max_attempts = 5 + ) + + # terra::writeRaster(x = dem, + raster::writeRaster(x = dem, + filename = dem_f, + overwrite = TRUE) + + #loses projection? + sf::st_write(obj = site, + dsn = point_f, + delete_layer = TRUE, + quiet = TRUE) + + if(burn_streams){ + get_osm_roads(extent_raster = dem, + outfile = roads_f) + get_osm_streams(extent_raster = dem, + outfile = streams_f) + } + + whitebox::wbt_fill_single_cell_pits(dem = dem_f, + output = dem_f) %>% invisible() + + if(breach_method == 'basic'){ + + whitebox::wbt_breach_depressions( + dem = dem_f, + output = dem_f, + flat_increment = flat_increment) %>% invisible() + + } else if(breach_method == 'lc'){ + + whitebox::wbt_breach_depressions_least_cost( + dem = dem_f, + output = dem_f, + dist = 10000, #maximum trench length + fill = TRUE, + flat_increment = flat_increment) %>% invisible() + } + #also see wbt_fill_depressions for when there are open pit mines + + if(burn_streams){ + + #the secret is that BOTH of these burns can work in tandem! + whitebox::wbt_burn_streams_at_roads(dem = dem_f, + streams = streams_f, + roads = roads_f, + output = dem_f, + width = 50) %>% invisible() + whitebox::wbt_fill_burn(dem = dem_f, + streams = streams_f, + output = dem_f) %>% invisible() + } + + whitebox::wbt_d8_pointer(dem = dem_f, + output = d8_f) %>% invisible() + + whitebox::wbt_d8_flow_accumulation(input = dem_f, + output = flow_f, + out_type = 'catchment area') %>% invisible() + + snap1_f <- glue(scratch_dir, '/snap1_jenson_dist150.shp') + whitebox::wbt_jenson_snap_pour_points(pour_pts = point_f, + streams = flow_f, + output = snap1_f, + snap_dist = 150) %>% invisible() + snap2_f <- glue(scratch_dir, '/snap2_standard_dist50.shp') + whitebox::wbt_snap_pour_points(pour_pts = point_f, + flow_accum = flow_f, + output = snap2_f, + snap_dist = 50) %>% invisible() + snap3_f <- glue(scratch_dir, '/snap3_standard_dist150.shp') + whitebox::wbt_snap_pour_points(pour_pts = point_f, + flow_accum = flow_f, + output = snap3_f, + snap_dist = 150) %>% invisible() + + #the site has been snapped 3 different ways. identify unique snap locations. + snap1 <- sf::st_read(snap1_f, quiet = TRUE) + snap2 <- sf::st_read(snap2_f, quiet = TRUE) + snap3 <- sf::st_read(snap3_f, quiet = TRUE) + unique_snaps_f <- snap1_f + if(! identical(snap1, snap2)) unique_snaps_f <- c(unique_snaps_f, snap2_f) + if(! identical(snap1, snap3)) unique_snaps_f <- c(unique_snaps_f, snap3_f) + + #good for experimenting with snap specs: + # delineate_watershed_test2(scratch_dir, point_f, flow_f, + # d8_f, 'standard', 1000) + + #delineate each unique location + for(i in 1:length(unique_snaps_f)){ + + rgx <- str_match(unique_snaps_f[i], + '.*?_(standard|jenson)_dist([0-9]+)\\.shp$') + snap_method <- rgx[, 2] + snap_distance <- rgx[, 3] + + wb_f <- glue('{path}/wb{n}_buffer{b}_{typ}_dist{dst}.tif', + path = scratch_dir, + n = i, + b = buffer_radius, + typ = snap_method, + dst = snap_distance) + + whitebox::wbt_watershed(d8_pntr = d8_f, + pour_pts = unique_snaps_f[i], + output = wb_f) %>% invisible() + + wb <- raster::raster(wb_f) + + #check how many wb cells coincide with the edge of the DEM. + #If > 0.1% or > 5, broader DEM needed + smry <- raster_intersection_summary(wb = wb, + dem = dem) + + if(verbose){ + print(glue('site buffer radius: {br}; pour point snap: {sn}/{tot}; ', + 'n intersecting border cells: {ni}; pct intersect: {pct}', + br = buffer_radius, + sn = i, + tot = length(unique_snaps_f), + ni = round(smry$n_intersections, 2), + pct = round(smry$pct_wb_cells_intersect, 2))) + } + + if(smry$pct_wb_cells_intersect > 0.1 || smry$n_intersections > 5){ + buffer_radius_new <- buffer_radius * 10 + dem_coverage_insufficient <- TRUE + print(glue('Hit DEM edge. Incrementing buffer.')) + break + } else { + dem_coverage_insufficient <- FALSE + buffer_radius_new <- buffer_radius + + #write and record temp files for the technician to visually inspect + wb_sf <- wb %>% + raster::rasterToPolygons() %>% + sf::st_as_sf() %>% + sf::st_buffer(dist = 0.1) %>% + sf::st_union() %>% + sf::st_as_sf() #again? ugh. + + wb_sf <- sf::st_transform(wb_sf, 4326) %>% #EPSG for WGS84 + st_make_valid(wb_sf) + + ws_area_ha <- as.numeric(sf::st_area(wb_sf)) / 10000 + + wb_sf <- wb_sf %>% + mutate(site_name = !!site_name) %>% + mutate(area = !!ws_area_ha) + + if(is.null(flat_increment)){ + flt_incrmt <- 'null' + } else { + flt_incrmt <- as.character(flat_increment) + } + + wb_sf_f <- glue('{path}/wb{n}_BUF{b}{typ}DIST{dst}RES{res}', + 'INC{inc}BREACH{brc}BURN{brn}.shp', + path = inspection_dir, + n = i, + b = sprintf('%d', buffer_radius), + typ = snap_method, + dst = snap_distance, + res = dem_resolution, + inc = flt_incrmt, + brc = breach_method, + brn = as.character(burn_streams)) + + sw(sf::st_write(obj = wb_sf, + dsn = wb_sf_f, + delete_dsn = TRUE, + quiet = TRUE)) + } + } + + buffer_radius <- buffer_radius_new + } #end while loop + + if(verbose){ + message(glue('Candidate delineations are in: ', inspection_dir)) + } + + return(inspection_dir) + } + + if(verbose){ + message('Beginning watershed delineation') + } + + tmp <- tempdir() + + selection <- sw(delineate_watershed_apriori_recurse( + lat = lat, + long = long, + crs = crs, + site_name = write_name, + dem_resolution = NULL, + flat_increment = NULL, + breach_method = 'lc', + burn_streams = FALSE, + scratch_dir = tmp, + write_dir = write_dir, + dev_machine_status = 'n00b', + verbose = verbose)) + + #calculate watershed area in hectares + wb <- sf::st_read(glue('{d}/{s}.shp', + d = write_dir, + s = write_name), + quiet = TRUE) + + ws_area_ha <- as.numeric(sf::st_area(wb)) / 10000 + + #return the specifications of the correctly delineated watershed, and some + # other goodies + rgx <- str_match(selection, + paste0('^wb[0-9]+_BUF([0-9]+)(standard|jenson)', + 'DIST([0-9]+)RES([0-9]+)INC([0-1\\.null]+)', + 'BREACH(basic|lc)BURN(TRUE|FALSE)\\.shp$')) + + deets <- list(name = write_name, + watershed_area_ha = ws_area_ha, + buffer_radius_m = as.numeric(rgx[, 2]), + snap_distance_m = as.numeric(rgx[, 4]), + snap_method = rgx[, 3], + dem_resolution = as.numeric(rgx[, 5]), + flat_increment = rgx[, 6], + breach_method = rgx[, 7], + burn_streams = rgx[, 8]) + + return(deets) +} + +#example +# deets <- delineate_watershed_from_point(lat = 44.21013, +# long = -122.2571, +# crs = 4326, +# write_dir = '/some/path', +# write_name = 'ultimate_watershed') diff --git a/5_summarize_nlcd.R b/5_summarize_nlcd.R new file mode 100644 index 0000000..528d0ec --- /dev/null +++ b/5_summarize_nlcd.R @@ -0,0 +1,138 @@ +library(raster) +library(tidyverse) +library(sf) +library(rgee) + +#setup #### + +# setup for rgee is super annoying, and must be done anew on each machine +rgee::ee_Initialize(email = 'vlahm13@gmail.com', + drive = TRUE) + +abp_wb = sf::st_read('~/Downloads/Shape Files/Al-Beuhler_Pond.shp') +cp_wb = sf::st_read('~/Downloads/Shape Files/Circuit_Pond.shp') +gpp_wb = sf::st_read('~/Downloads/Shape Files/GardenPond_Pond.shp') + +get_nlcd_summary = function(watershed_boundary, + epochs = c(1992, 2001, 2004, 2006, 2008, 2011, 2013, 2016)){ + + #summarizes landcover classes from NLCD for a spatial extent. Returns + # a two-element list. element 1 is a tibble containing cell tallies for + # each class and year (epoch). + # Element 2 is a named vector of impervious surface percentages for each + # epoch. + + #watershed_boundary is an sf object representing the outline of a watershed. + # NLCD categories will be summarized for that areal extent. + #epochs is a numeric vector of years for which NLCD data will be collected. + # available epochs are 1992, 2001, 2004, 2006, 2008, 2011, 2013, 2016 + + if(! all(epochs %in% c(1992, 2001, 2004, 2006, 2008, 2011, 2013, 2016))){ + stop('epochs must be one or more of 1992, 2001, 2004, 2006, 2008, 2011, 2013, 2016') + } + + color_key = tribble( + ~id, ~hexcode, ~category, + 11,"466b9f","Open water", + 12,"d1def8","Perennial ice/snow", + 21,"dec5c5","Developed, open space", + 22,"d99282","Developed, low intensity", + 23,"eb0000","Developed, medium intensity", + 24,"ab0000","Developed high intensity", + 31,"b3ac9f","Barren land (rock/sand/clay)", + 41,"68ab5f","Deciduous forest", + 42,"1c5f2c","Evergreen forest", + 43,"b5c58f","Mixed forest", + 51,"af963c","Dwarf scrub", + 52,"ccb879","Shrub/scrub", + 71,"dfdfc2","Grassland/herbaceous", + 72,"d1d182","Sedge/herbaceous", + 73,"a3cc51","Lichens", + 74,"82ba9e","Moss", + 81,"dcd939","Pasture/hay", + 82,"ab6c28","Cultivated crops", + 90,"b8d9eb","Woody wetlands", + 95,"6c9fb8","Emergent herbaceous wetlands", + ) + + watershed_boundary_ee = sf_as_ee(watershed_boundary) + + tmp = tempdir() + tmp = paste0(tmp, '/nlcd') + dir.create(tmp, showWarnings = FALSE) + + pct_impervious_surface = rep(NA, length(epochs)) + names(pct_impervious_surface) = epochs + for(i in seq_along(epochs)){ + + subset_id = paste0('NLCD', as.character(epochs[i])) + + #write landcover rasters + + img = ee$ImageCollection('USGS/NLCD')$ + select('landcover')$ + filter(ee$Filter$eq('system:index', subset_id))$ + first()$ + clip(watershed_boundary_ee) + + ee_as_raster(image = img, + region = watershed_boundary_ee$geometry(), + dsn = paste0(tmp, '/', subset_id, '.tif')) + + #get impervious surface % + + img = ee$ImageCollection('USGS/NLCD')$ + select('impervious')$ + filter(ee$Filter$eq('system:index', subset_id))$ + first() + + imp_surf = tryCatch({ + ee_extract(x = img, + y = watershed_boundary, + scale = 30, + fun = ee$Reducer$mean(), + sf = TRUE) %>% + pull(impervious) + }, error = function(e) NA) + + pct_impervious_surface[i] = imp_surf + } + + nlcd_summary = color_key %>% + select(id, category) %>% + mutate(id = as.character(id)) + + for(e in epochs){ + + subset_id = paste0('NLCD', as.character(e)) + + epoch_rst = raster::raster(paste0(tmp, '/', subset_id, '.tif')) + tabulated_values = raster::values(epoch_rst) %>% + table() %>% + as_tibble() %>% + dplyr::rename(id = '.', + !!sym(paste0('CellTally', as.character(e))) := 'n') + + nlcd_summary = full_join(nlcd_summary, + tabulated_values, + by = 'id') + } + + return(list(landcover = nlcd_summary, + pct_impervious_surface = pct_impervious_surface)) +} + +# summarize landcover and % impervious surfaces #### + +abp_summary = get_nlcd_summary(watershed_boundary = abp_wb, + epochs = 2016) + +cp_summary = get_nlcd_summary(watershed_boundary = cp_wb, + epochs = 2016) + +gpp_summary = get_nlcd_summary(watershed_boundary = gpp_wb, + epochs = 2016) + +saveRDS(abp_summary, '~/Downloads/al_beuhler_pond.rds') +saveRDS(cp_summary, '~/Downloads/circuit_pond.rds') +saveRDS(gpp_summary, '~/Downloads/gardenpond_pond.rds') diff --git a/README.md b/README.md old mode 100644 new mode 100755 diff --git a/Watershed_Tools.Rproj b/Watershed_Tools.Rproj old mode 100644 new mode 100755 diff --git a/compare_point_to_nhd.R b/compare_point_to_nhd.R new file mode 100644 index 0000000..dae336c --- /dev/null +++ b/compare_point_to_nhd.R @@ -0,0 +1,146 @@ +library(tidyverse) +library(nhdplusTools) +library(sf) +library(mapview) +mv <- mapview::mapview + +options(timeout = 5000) + +#setup #### + +#establish this path to local nhd highres data +nhd_hr_dir <- '~/git/macrosheds/data_acquisition/data/general/nhd_hr' +#and this one if you want to save interactive map outputs (also activate commented chunk in loop) +# mapview_save_dir <- '~/git/macrosheds/data_acquisition/output/sites_vs_NHD' + +buf <- function(site, buf_dist){ + + site_buf <- sf::st_buffer(x = site, + dist = buf_dist) + site_box <- st_bbox(site_buf) + + subset_file <- tempfile(fileext = '.gpkg') + + subset <- try({ + nhdplusTools::subset_nhdplus(bbox = site_box, + output_file = subset_file, + nhdplus_data = 'download', + return_data = TRUE, + overwrite = FALSE, + out_prj = 4326) %>% + suppressMessages() + }, silent = TRUE) + + if(inherits(subset, 'try-error') || ! length(subset)){# || nrow(subset[[1]]) < 2){ + print('incrementing buffer distance by 500') + buf_dist <- buf_dist + 500 + if(buf_dist > 5000) stop() + buf(site = site, buf_dist) + } else { + return(list(subset = subset, box = site_box)) + } +} + +# site_csv <- read_csv('~/git/macrosheds/data_acquisition/data/general/site_data.csv') +# sites = tribble( +# ~site_code, ~domain, ~latitude, ~longitude, ~epsg, +# 'a', 'b', 41.98732, -72.60537, 4269, +# 'a', 'b', 38.59614, -77.05603, 4269, +# ) + +sites = read_csv('~/Downloads/duplicate_COMIDS.csv') %>% + select(-site_name) %>% + rename(latitude = lat, longitude = lon, site_code = nhdplus_id, site_name = long_name) %>% + mutate(epsg = 4269, NHD_COMID = NULL) +# sites=sites[4:nrow(sites),] + +sites$NHD_COMID <- '?' +total_len <- nrow(sites) + +# loop 1: NHDPlusV2 or NHD-HR #### + +#this loop is for identifying whether a point is on the NHDPlusV2 or the NHD-HR, +# or neither. for NHM seg_ids, see the next loop +prev_huc4 <- 'none' +for(i in 1:total_len){ + + print('---') + print(i) + site <- sites[i, ] + site_code <- site$site_code + epsg <- site$epsg + + print(paste('site:', site_code)) + print(paste('name:', sites$site_name[i])) + + site <- sf::st_point(c(site$longitude, site$latitude)) %>% + sf::st_sfc(crs = site$epsg) + + nextt <- FALSE + comid <- tryCatch({ + nhdplusTools::discover_nhdplus_id(site) + }, error = function(e) nextt <<- TRUE) + if(nextt) { + print('couldnt find comid') + next + } + + out <- suppressWarnings(buf(site = site, 1000)) + subset <- out$subset + site_box <- out$box + + huc12 <- get_huc12(site) + # print(huc12$huc12) + huc4 <- substr(huc12$huc12, 1, 4)[1] + nhdplusTools::download_nhdplushr(nhd_hr_dir, + hu_list = huc4) %>% + invisible() + + if(huc4 != prev_huc4){ + HRflowlines <- nhdplusTools::get_nhdplushr(file.path(nhd_hr_dir, + substr(huc4, 1, 2)), + file.path(nhd_hr_dir, + paste0(huc4, '.gpkg')), + layers = 'NHDFlowline', + proj = epsg)$NHDFlowline + } else { + print('using previous NHD HR HUC') + } + + prev_huc4 <- huc4 + + NHD_HR <- suppressWarnings(sf::st_crop(HRflowlines, site_box)) + + NHDPlus <- subset$NHDFlowline_Network %>% + st_transform(crs = epsg) + + dist_to_nearest_NHDPlus_flowline <- min(st_distance(NHDPlus, site)) + print(paste('Dist to NHDPlus:', round(dist_to_nearest_NHDPlus_flowline, 2), 'm')) + dist_to_nearest_NHDHR_flowline <- min(st_distance(NHD_HR, site)) + print(paste('Dist to NHD_HR:', round(dist_to_nearest_NHDHR_flowline, 2), 'm')) + + options(scipen = 100) + xx = mv(NHD_HR, color = 'darkslategray3') + mv(NHDPlus, color='deepskyblue4') + mv(site, color='red') + print(xx) + + # mapview_save_path <- file.path(mapview_save_dir, + # paste0(dmn, '_', site_code, '.html')) + # mapview::mapshot(xx, + # url = mapview_save_path) + # print(paste('map saved to', mapview_save_path)) + # print(xx) + + system('spd-say "chili chili chili"') + x <- readline(cat('This point is on: [A] an NHDPlus flowline, [B] an NHD_HR flowline, or [C] neither >\n')) + + if(x == 'A'){ + sites[i, 'NHD_COMID'] <- as.character(comid) + print(comid) + } else if(x == 'B'){ + sites[i, 'NHD_COMID'] <- 'HR only' + } else if(x == 'C'){ + sites[i, 'NHD_COMID'] <- 'too small' + } else { + stop(paste("'A', 'B', or 'C'")) + } +} diff --git a/site_data.csv b/site_data.csv old mode 100644 new mode 100755 diff --git a/summarize_nadp_for_watershed.R b/summarize_nadp_for_watershed.R new file mode 100644 index 0000000..c8c9b08 --- /dev/null +++ b/summarize_nadp_for_watershed.R @@ -0,0 +1,493 @@ +library(macrosheds) +library(terra) +library(sf) +library(dplyr) +library(tidyr) +library(httr) + +sites <- c('ALBION', 'GREEN4', 'NAVAJO', 'w8', 'w9', 'W-9') + +sheds <- ms_load_spatial_product( + macrosheds_root = '~/ssd2/macrosheds_stuff/ms_test/', + spatial_product = 'ws_boundary', + site_codes = sites +) + +# ---- configuration ---- + +nadp_dir <- 'nadp' +dir.create(nadp_dir, showWarnings = FALSE) + +# NADP NTN annual raster base URL +# Variables: pH (lab pH), plus major ion concentrations (mg/L) +# Conductivity is not provided directly, but can be estimated from ions. +# Available from NADP's gridded data: https://nadp.slh.wisc.edu/maps-data/ntn-gradient-maps/ + +# Ion grids are precipitation-weighted mean concentrations. +# IMPORTANT: NADP reports most ions in mg/L, but Na, K, and Mg grids are +# in ug/L. We convert these to mg/L after extraction. +# H+ is only available as deposition (_dep_), not concentration, so we +# estimate H+ concentration from pH: [H+] mg/L = 10^(-pH) * 1.008 * 1000 +nadp_vars <- c('pH', 'Ca', 'Mg', 'K', 'Na', 'Cl', 'NO3', 'NH4', 'SO4') +years <- 1985:2024 # adjust range as needed + +# Equivalent weights (mg/meq = g/eq = molecular weight / ionic charge) +# Used to convert mg/L -> meq/L (divide), or mg/L -> ueq/L (divide, then * 1000) +equiv_weights <- c(Ca = 20.04, Mg = 12.15, K = 39.10, Na = 22.99, + Cl = 35.45, NO3 = 62.00, NH4 = 18.04, SO4 = 48.03) + +# Variables whose NADP grids are in ug/L rather than mg/L +vars_in_ugl <- c('Na', 'K', 'Mg') + +# Limiting equivalent conductances at 25C (uS/cm per ueq/L) +# Sources: CRC Handbook / Standard Methods +equiv_conductances <- c(Ca = 59.5, Mg = 53.1, K = 73.5, Na = 50.1, + Cl = 76.3, NO3 = 71.4, NH4 = 73.5, SO4 = 80.0) +# H+ equivalent weight and conductance (used in conductivity estimation; +# H+ concentration is estimated from pH, not downloaded separately) +H_equiv_weight <- 1.008 +H_equiv_conductance <- 349.8 + +# Base URL pattern for NADP NTN concentration grids (annual) +# URL pattern: https://nadp.slh.wisc.edu/filelib/maps/NTN/grids/{year}/{var}_conc_{year}.zip +# Zips contain raster data (ArcGrid or GeoTIFF) +base_url <- 'https://nadp.slh.wisc.edu/filelib/maps/NTN/grids' + +# NADP filename prefixes for concentration grids (_conc_ suffix only) +nadp_var_prefixes <- c(pH = 'pH', + Ca = 'Ca', + Mg = 'Mg', + K = 'K', + Na = 'Na', + Cl = 'Cl', + NO3 = 'NO3', + NH4 = 'NH4', + SO4 = 'SO4') + +# ---- prepare watershed geometries ---- + +# Reproject watersheds to WGS84 for consistency with NADP rasters +sheds_wgs84 <- st_transform(sheds, 4326) + +# Get a bounding box with some buffer for cropping +sheds_bbox <- st_bbox(sheds_wgs84) + +# ---- download NADP rasters ---- + +download_nadp_raster <- function(variable, year, dest_dir) { + + prefix <- nadp_var_prefixes[variable] + suffixes <- c('_conc_', '_') + + # Check if we already have an extracted raster for this variable/year + for(sfx in suffixes) { + extracted_dir <- file.path(dest_dir, paste0(prefix, sfx, year)) + if(dir.exists(extracted_dir)) { + rast_file <- find_raster_in_dir(extracted_dir) + if(! is.null(rast_file)) { + message('Already extracted: ', variable, ' ', year) + return(rast_file) + } + } + # Also check using the variable name (e.g. H_conc_) + extracted_dir2 <- file.path(dest_dir, paste0(variable, sfx, year)) + if(extracted_dir2 != extracted_dir && dir.exists(extracted_dir2)) { + rast_file <- find_raster_in_dir(extracted_dir2) + if(! is.null(rast_file)) { + message('Already extracted: ', variable, ' ', year) + return(rast_file) + } + } + } + + # Also check for a .tif directly + for(sfx in suffixes) { + for(pfx in c(prefix, variable)) { + tif_file <- file.path(dest_dir, paste0(pfx, sfx, year, '.tif')) + if(file.exists(tif_file)) { + test <- try(rast(tif_file), silent = TRUE) + if(! inherits(test, 'try-error')) { + message('Already have: ', variable, ' ', year) + return(tif_file) + } + } + } + } + + zip_file <- file.path(dest_dir, paste0(prefix, '_', year, '.zip')) + + # Try URL patterns: _conc_ and bare _ suffixes, case variations + # e.g. pH_conc_2000.zip (older) vs pH_2023.zip (newer) + prefixes_to_try <- unique(c(prefix, tolower(prefix), toupper(prefix))) + urls_to_try <- unlist(lapply(prefixes_to_try, function(p) { + unlist(lapply(suffixes, function(sfx) { + zn <- paste0(p, sfx, year, '.zip') + c( + paste0(base_url, '/', year, '/', zn), + paste0(base_url, '/', year, '/', tolower(zn)) + ) + })) + })) + urls_to_try <- unique(urls_to_try) + + max_retries <- 4 + + for(url in urls_to_try) { + for(attempt in seq_len(max_retries)) { + resp <- try(GET(url, write_disk(zip_file, overwrite = TRUE), + timeout(120)), silent = TRUE) + + if(! inherits(resp, 'try-error') && status_code(resp) == 200) { + # Try to unzip; use zip basename for directory name + zip_base <- tools::file_path_sans_ext(basename(url)) + exdir <- file.path(dest_dir, zip_base) + unzipped <- try(unzip(zip_file, exdir = exdir), silent = TRUE) + + if(! inherits(unzipped, 'try-error') && length(unzipped) > 0) { + rast_file <- find_raster_in_dir(exdir) + if(! is.null(rast_file)) { + message('Downloaded and extracted: ', variable, ' ', year) + file.remove(zip_file) + Sys.sleep(2) # polite delay between successful downloads + return(rast_file) + } + } + + # Clean up failed extraction + if(dir.exists(exdir)) unlink(exdir, recursive = TRUE) + break # got 200 but extraction failed; try next URL + } + + if(file.exists(zip_file)) file.remove(zip_file) + + # If connection error, retry with backoff + if(inherits(resp, 'try-error') || + status_code(resp) %in% c(429, 500, 502, 503, 504)) { + wait <- 2^attempt + runif(1, 0, 1) + message(' Retry ', attempt, '/', max_retries, + ' for ', variable, ' ', year, + ' (waiting ', round(wait, 1), 's)') + Sys.sleep(wait) + } else { + break # non-retryable HTTP error (e.g. 404); try next URL + } + } + + if(file.exists(zip_file)) file.remove(zip_file) + } + + message('Could not download: ', variable, ' ', year) + return(NA_character_) +} + +find_raster_in_dir <- function(dir_path) { + # Look for GeoTIFF first + tifs <- list.files(dir_path, pattern = '\\.tif$', full.names = TRUE, + recursive = TRUE) + if(length(tifs) > 0) return(tifs[1]) + + # Look for ArcGrid (hdr.adf) + adfs <- list.files(dir_path, pattern = 'hdr\\.adf$', full.names = TRUE, + recursive = TRUE, ignore.case = TRUE) + if(length(adfs) > 0) { + # Return the directory containing hdr.adf (that's the ArcGrid "file") + return(dirname(adfs[1])) + } + + # Look for .asc (Arc ASCII grid) + ascs <- list.files(dir_path, pattern = '\\.asc$', full.names = TRUE, + recursive = TRUE) + if(length(ascs) > 0) return(ascs[1]) + + # Look for .img + imgs <- list.files(dir_path, pattern = '\\.img$', full.names = TRUE, + recursive = TRUE) + if(length(imgs) > 0) return(imgs[1]) + + # Try to load anything terra can read + all_files <- list.files(dir_path, full.names = TRUE, recursive = TRUE) + for(f in all_files) { + test <- try(rast(f), silent = TRUE) + if(! inherits(test, 'try-error')) return(f) + } + + return(NULL) +} + +message('Downloading NADP NTN annual rasters...') +message('If automatic download fails, manually download rasters from:') +message(' https://nadp.slh.wisc.edu/filelib/maps/NTN/grids/{year}/') +message('Place zip files or extracted folders in the nadp/ directory') + +download_log <- expand.grid(variable = nadp_vars, year = years, + stringsAsFactors = FALSE) %>% + as_tibble() + +download_log$filepath <- mapply( + download_nadp_raster, + variable = download_log$variable, + year = download_log$year, + dest_dir = nadp_dir +) + +download_log <- download_log %>% + filter(! is.na(filepath)) + +if(nrow(download_log) == 0) { + stop('No NADP rasters were downloaded. Please download manually from:\n', + ' https://nadp.slh.wisc.edu/filelib/maps/NTN/grids/{year}/\n', + 'and place them in nadp/ (as zips or extracted folders)\n', + 'Then re-run this script.') +} + +message(nrow(download_log), ' rasters available for processing.') + +# ---- also check for any manually placed rasters ---- + +# Check for .tif files and extracted directories +manual_tifs <- list.files(nadp_dir, pattern = '\\.tif$', full.names = TRUE) +manual_dirs <- list.dirs(nadp_dir, recursive = FALSE, full.names = TRUE) + +manual_entries <- list() + +if(length(manual_tifs) > 0) { + manual_entries[['tifs']] <- tibble(filepath = manual_tifs) %>% + mutate( + basename = tools::file_path_sans_ext(basename(filepath)), + variable = sub('_(conc_)?[0-9]{4}$', '', basename), + year = as.integer(sub('.*_', '', basename)) + ) %>% + filter(variable %in% nadp_vars, ! is.na(year)) %>% + select(variable, year, filepath) +} + +if(length(manual_dirs) > 0) { + for(d in manual_dirs) { + dname <- basename(d) + # Parse variable_conc_year or variable_year pattern + yr <- as.integer(sub('.*_', '', dname)) + vr <- sub('_(conc_)?[0-9]{4}$', '', dname) + if(! is.na(yr) && vr %in% nadp_vars) { + rast_file <- find_raster_in_dir(d) + if(! is.null(rast_file)) { + manual_entries[[d]] <- tibble(variable = vr, year = yr, + filepath = rast_file) + } + } + } +} + +if(length(manual_entries) > 0) { + manual_info <- bind_rows(manual_entries) + download_log <- bind_rows(download_log, manual_info) %>% + distinct(variable, year, .keep_all = TRUE) +} + +# ---- extract raster values for each watershed ---- + +# Use exactextractr if available, otherwise terra::extract +use_exactextractr <- requireNamespace('exactextractr', quietly = TRUE) + +if(use_exactextractr) { + library(exactextractr) + message('Using exactextractr for weighted extraction.') +} else { + message('Using terra::extract. Install exactextractr for better accuracy.') +} + +results <- list() + +for(i in seq_len(nrow(download_log))) { + + row <- download_log[i, ] + message('Processing: ', row$variable, ' ', row$year) + + r <- try(rast(row$filepath), silent = TRUE) + if(inherits(r, 'try-error')) { + message(' Skipping (invalid raster)') + next + } + + # Ensure single layer (some ArcGrids may load multiple bands) + if(nlyr(r) > 1) { + message(' Multiple layers detected, using first layer only') + r <- r[[1]] + } + + # Reproject watersheds to match raster CRS + sheds_reproj <- st_transform(sheds_wgs84, crs(r)) + + # Crop to reduce memory + sheds_ext <- ext(vect(sheds_reproj)) + sheds_ext_buf <- sheds_ext + 0.5 + r_crop <- try(crop(r, sheds_ext_buf), silent = TRUE) + if(inherits(r_crop, 'try-error')) r_crop <- r + + if(use_exactextractr) { + vals <- exact_extract(r_crop, sheds_reproj, fun = 'mean') + } else { + ex <- terra::extract(r_crop, vect(sheds_reproj), fun = mean, + na.rm = TRUE, weights = TRUE) + vals <- ex[, 2] + } + + # NADP reports Na, K, Mg in ug/L; convert to mg/L + if(row$variable %in% vars_in_ugl) { + vals <- vals / 1000 + message(' Converted ', row$variable, ' from ug/L to mg/L') + } + + res <- tibble( + site_code = sheds_wgs84$site_code, + variable = row$variable, + year = row$year, + value = vals + ) + + results[[length(results) + 1]] <- res +} + +# ---- combine and format results ---- + +nadp_summary <- bind_rows(results) + +# Add descriptive labels and units +nadp_summary <- nadp_summary %>% + mutate( + variable_description = case_when( + variable == 'pH' ~ 'Precipitation-weighted mean pH', + variable == 'Ca' ~ 'Precipitation-weighted mean Ca concentration', + variable == 'Mg' ~ 'Precipitation-weighted mean Mg concentration', + variable == 'K' ~ 'Precipitation-weighted mean K concentration', + variable == 'Na' ~ 'Precipitation-weighted mean Na concentration', + variable == 'Cl' ~ 'Precipitation-weighted mean Cl concentration', + variable == 'NO3' ~ 'Precipitation-weighted mean NO3 concentration', + variable == 'NH4' ~ 'Precipitation-weighted mean NH4 concentration', + variable == 'SO4' ~ 'Precipitation-weighted mean SO4 concentration', + TRUE ~ variable + ), + units = case_when( + variable == 'pH' ~ 'unitless', + variable %in% c('Ca', 'Mg', 'K', 'Na', 'Cl', 'NO3', 'NH4', 'SO4') ~ 'mg/L (converted from ug/L for Na, K, Mg)', + variable == 'Cond_est' ~ 'uS/cm', + TRUE ~ NA_character_ + ) + ) + +# Pivot to wide format (one column per variable) +nadp_wide <- nadp_summary %>% + select(site_code, year, variable, value) %>% + pivot_wider(names_from = variable, values_from = value) %>% + arrange(site_code, year) + +# ---- estimate conductivity from ions and pH ---- + +ion_cols <- intersect(names(equiv_weights), names(nadp_wide)) + +if(length(ion_cols) > 0) { + + # Compute estimated conductivity, tolerating some missing ions + # Each ion contributes independently; NA ions are skipped per row + n_rows <- nrow(nadp_wide) + cond <- rep(0, n_rows) + n_ions_available <- rep(0L, n_rows) + + for(ion in ion_cols) { + conc_mgl <- nadp_wide[[ion]] # mg/L (from NADP grid) + conc_ueql <- conc_mgl / equiv_weights[ion] * 1000 # mg/L -> ueq/L + contribution <- conc_ueql * equiv_conductances[ion] / 1000 # ueq/L * (uS/cm)/(ueq/L) / 1000 -> uS/cm + has_val <- !is.na(contribution) + cond[has_val] <- cond[has_val] + contribution[has_val] + n_ions_available[has_val] <- n_ions_available[has_val] + 1L + } + + # Add H+ contribution estimated from pH + # pH = -log10([H+] in mol/L), so [H+] = 10^(-pH) mol/L + # Convert mol/L -> ueq/L: for H+ (charge=1), 1 mol/L = 1e6 ueq/L + if('pH' %in% names(nadp_wide)) { + H_ueql <- 10^(-nadp_wide[['pH']]) * 1e6 # mol/L -> ueq/L + H_contribution <- H_ueql * H_equiv_conductance / 1000 # -> uS/cm + has_H <- !is.na(H_contribution) + cond[has_H] <- cond[has_H] + H_contribution[has_H] + n_ions_available[has_H] <- n_ions_available[has_H] + 1L + message('H+ concentration estimated from pH for conductivity calculation.') + } + + # Total possible ions = 8 measured + H from pH = 9 + max_possible <- length(ion_cols) + as.integer('pH' %in% names(nadp_wide)) + min_required <- max(max_possible - 2, 1) + cond[n_ions_available < min_required] <- NA_real_ + + nadp_wide$Cond_est <- round(cond, 2) + nadp_wide$Cond_est_n_ions <- n_ions_available + + message('Estimated conductivity (Cond_est, uS/cm) computed from ion concentrations.') + message(' Ions used: ', paste(ion_cols, collapse = ', ')) + message(' Min ions required per row: ', min_required, ' of ', max_possible) + message(' Rows with Cond_est: ', sum(!is.na(nadp_wide$Cond_est)), + ' of ', n_rows) + + # Also add to long format + cond_long <- nadp_wide %>% + select(site_code, year, Cond_est, Cond_est_n_ions) %>% + mutate(variable = 'Cond_est', + variable_description = 'Estimated conductivity from ions and pH', + units = 'uS/cm') %>% + rename(value = Cond_est) + + nadp_summary <- bind_rows(nadp_summary, select(cond_long, -Cond_est_n_ions)) + +} else { + message('Not enough ion variables to estimate conductivity.') +} + +# ---- save results ---- + +write.csv(nadp_summary, 'nadp_watershed_summary_long.csv', row.names = FALSE) +write.csv(nadp_wide, 'nadp_watershed_summary_wide.csv', row.names = FALSE) + +message('\n---- Summary ----') +message('Sites: ', paste(unique(nadp_summary$site_code), collapse = ', ')) +message('Variables: ', paste(sort(unique(nadp_summary$variable)), collapse = ', ')) +message('Years: ', min(nadp_summary$year), '-', max(nadp_summary$year)) +message('Results saved to:') +message(' nadp_watershed_summary_long.csv') +message(' nadp_watershed_summary_wide.csv') + +# Print a quick summary +nadp_summary %>% + group_by(site_code, variable) %>% + summarize( + n_years = n(), + mean_value = mean(value, na.rm = TRUE), + min_value = min(value, na.rm = TRUE), + max_value = max(value, na.rm = TRUE), + .groups = 'drop' + ) %>% + print(n = Inf) + +## verify fluxes match + +library(lubridate) + +clim <- ms_load_product( + macrosheds_root = '~/ssd2/macrosheds_stuff/ms_test/', + prodname = 'ws_attr_timeseries:climate', + site_codes = 'ALBION', + warn = FALSE +) +p <- ms_load_product( + macrosheds_root = '~/ssd2/macrosheds_stuff/ms_test/', + prodname = 'precipitation', + site_codes = 'ALBION', + warn = FALSE +) +albion_na_flux_1985 <- filter(clim, var == 'Na_flux_mean') %>% slice(1) +albion_p_1985 <- filter(p, year(date) == 1985) %>% summarize(val = sum(val)) + +albion_na_conc_1985_claude <- filter(nadp_summary, variable == 'Na', site_code == 'ALBION', year == 1985)$value +#mg/L kg/ha mg/kg mm/m m^2/ha mm L/m^3 +albion_na_conc_1985_macrosheds <- (albion_na_flux_1985$val * 10e6 * 1000) / (10e4 * albion_p_1985 * 1000) + +#% difference (accounted for by precip source, raster processing, etc. +(albion_na_conc_1985_macrosheds - albion_na_conc_1985_claude) / albion_na_conc_1985_macrosheds