From 1839d3197f6bffbdf94a1e22ff3c83d50885d179 Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Thu, 15 Oct 2020 15:11:38 -0400
Subject: [PATCH 01/37] interactive watershed delineation!

---
 4_interactive_watershed_delineation.R | 507 ++++++++++++++++++++++++++
 1 file changed, 507 insertions(+)
 create mode 100644 4_interactive_watershed_delineation.R

diff --git a/4_interactive_watershed_delineation.R b/4_interactive_watershed_delineation.R
new file mode 100644
index 0000000..10fc8d5
--- /dev/null
+++ b/4_interactive_watershed_delineation.R
@@ -0,0 +1,507 @@
+#documentation and helper functions included in function definition
+
+delineate_watershed_from_point <- function(lat,
+                                           long,
+                                           crs,
+                                           dev_machine_status = 'n00b',
+                                           write_dir,
+                                           write_name,
+                                           verbose = TRUE){
+
+    #lat: numeric representing latitude of the pour point in decimal degrees
+    #   (negative indicates southern hemisphere)
+    #long: numeric representing longitude of the pour point in decimal degrees
+    #   (negative indicates west of prime meridian)
+    #crs: numeric representing the coordinate reference system (e.g. 4326 for WSG84)
+    #dev_machine_status: either '1337', indicating that your machine has >= 16 GB
+    #   RAM, or 'n00b', indicating < 16 GB RAM. DEM resolution is chosen accordingly
+    #write_dir: character. the directory in which to write output shapefile
+    #write_name: character. the basename of the shapefile components to be written. i.e.
+    #   <write_name>.shp, <write_name>.shx, <write_name>.prj, <write_name>.dbf
+    #verbose: logical. determines the amount of informative messaging during run
+
+    #details: Output will have CRS 4326 (WGS 84), though processing is done
+    #   on projected data. the projection specifications are determined
+    #   automatically, based on pour point location. Note that for mega-huge
+    #   watersheds, this could result in an inaccurate watershed area calculation.
+
+    #returns: a list containing the following components:
+    #   watershed_area_ha: the area of the delineated watershed in hectares
+    #       (meters squared divided by 10,000)
+    #   buffer_radius_m: the width (meters) around the site location that was used when
+    #       requesting a DEM (digital elevation model)
+    #   snap_distance_m: the search radius (meters) around the pour point that was used
+    #       to choose a stream to snap the pour point to.
+    #   snap_method: either "standard", which snaps the pour point to the cell
+    #       within snap_distance_m with highest flow accumulation, or "jenson",
+    #       which snaps to the nearest flow line
+    #   dem_resolution: passed to elevatr::get_elev_raster (z parameter).
+    #       depends on supplied dev_machine_status
+
+    #NOTE: in addition to the packages loaded below, you'll need mapview to
+    #   visualize delineated watershed boundaries
+
+    require(dplyr)
+    require(glue)
+    require(sf)
+    require(elevatr)
+    require(raster)
+    require(whitebox)
+
+    sm <- suppressMessages
+    sw <- suppressWarnings
+
+    #the functions below are all helpers for subroutines of
+    #   delineate_watershed_from_point. they should all stand alone pretty
+    #   well, and some are generally useful. they are defined locally here,
+    #   just for convenient distribution.
+
+    #moving shapefiles can be annoying, since they're actually represented by
+    #   3-4 files
+    move_shapefiles <- function(shp_files, from_dir, to_dir,
+                                new_name_vec = NULL){
+
+        #shp_files is a character vector of filenames with .shp extension
+        #   (.shx, .prj, .dbf are handled internally and don't need to be listed)
+        #from_dir and to_dir are strings representing the source and destination
+        #   directories, respectively
+        #new_name_vec is an optional character vector of new names for each shape file.
+        #   these can end in ".shp", but don't need to
+
+        if(any(! grepl('\\.shp$', shp_files))){
+            stop('All components of shp_files must end in ".shp"')
+        }
+
+        if(length(shp_files) != length(new_name_vec)){
+            stop('new_name_vec must have the same length as shp_files')
+        }
+
+        for(i in 1:length(shp_files)){
+
+            shapefile_base <- strsplit(shp_files[i], '\\.shp')[[1]]
+
+            files_to_move <- list.files(path = from_dir,
+                                        pattern = shapefile_base)
+
+            extensions <- stringr::str_match(files_to_move,
+                                    paste0(shapefile_base, '(\\.[a-z]{3})'))[, 2]
+
+            if(is.null(new_name_vec)){
+                new_name_base <- rep(shapefile_base, length(files_to_move))
+            } else {
+                new_name_base <- strsplit(new_name_vec[i], '\\.shp$')[[1]]
+                new_name_base <- rep(new_name_base, length(files_to_move))
+            }
+
+            mapply(function(x, nm, ext) file.rename(from = paste(from_dir,
+                                                                 x,
+                                                                 sep = '/'),
+                                                    to = glue('{td}/{n}{ex}',
+                                                              td = to_dir,
+                                                              n = nm,
+                                                              ex = ext)),
+                   x = files_to_move,
+                   nm = new_name_base,
+                   ext = extensions)
+        }
+
+        return()
+    }
+
+    #prompt users for stuff, provide single-character responses,
+    #   reprompt if they don't choose one of the expected responses
+    get_response_1char <- function(msg, possible_chars,
+                                   subsequent_prompt = FALSE){
+
+        #msg: character. a message that will be used to prompt the user
+        #possible_chars: character vector of acceptable single-character responses
+
+        if(subsequent_prompt){
+            cat(paste('Please choose one of:',
+                      paste(possible_chars,
+                            collapse = ', '),
+                      '\n> '))
+        } else {
+            cat(msg)
+        }
+
+        ch <- as.character(readLines(con = stdin(), 1))
+
+        if(length(ch) == 1 && ch %in% possible_chars){
+            return(ch)
+        } else {
+            get_response_1char(msg, possible_chars, subsequent_prompt = TRUE)
+        }
+    }
+
+    #chooose an appropriate projection, based on location
+    choose_projection <- function(lat = NULL, long = NULL,
+                                  unprojected = FALSE){
+
+        if(unprojected){
+            PROJ4 <- glue('+proj=longlat +datum=WGS84 +no_defs ',
+                          '+ellps=WGS84 +towgs84=0,0,0')
+            return(PROJ4)
+        }
+
+        if(is.null(lat) || is.null(long)){
+            stop('If projecting, lat and long are required.')
+        }
+
+        if(lat <= 15 && lat >= -15){ #equatorial
+            PROJ4 = glue('+proj=laea +lon_0=', long)
+        } else { #temperate or polar
+            PROJ4 = glue('+proj=laea +lat_0=', lat, ' +lon_0=', long)
+        }
+
+        return(PROJ4)
+    }
+
+    #for determining whether the DEM extent wasn't big enough to allow full
+    #   delineation
+    raster_intersection_summary <- function(wb, dem){
+
+        #wb is a delineated watershed boundary as a rasterLayer
+        #dem is a DEM rasterLayer
+
+        summary_out <- list()
+
+        #convert wb to sf object
+        wb <- wb %>%
+            raster::rasterToPolygons() %>%
+            sf::st_as_sf()
+
+        #get edge of DEM as sf object
+        dem_edge <- raster::boundaries(dem) %>%
+            raster::reclassify(matrix(c(0, NA),
+                                      ncol = 2)) %>%
+            raster::rasterToPolygons() %>%
+            sf::st_as_sf()
+
+        #tally raster cells
+        summary_out$n_wb_cells <- length(wb$geometry)
+        summary_out$n_dem_cells <- length(dem_edge$geometry)
+
+        #tally intersections; calc percent of wb cells that overlap
+        intersections <- sf::st_intersects(wb, dem_edge) %>%
+            as.matrix() %>%
+            apply(MARGIN = 2,
+                  FUN = sum) %>%
+            table()
+
+        true_intersections <- sum(intersections[names(intersections) > 0])
+
+        summary_out$n_intersections <- true_intersections
+        summary_out$pct_wb_cells_intersect <- true_intersections /
+            summary_out$n_wb_cells * 100
+
+        return(summary_out)
+    }
+
+    #the workhorse
+    delineate_watershed_apriori <- function(lat, long, crs,
+                                            dev_machine_status = 'n00b',
+                                            verbose = FALSE){
+
+        #lat: numeric representing latitude in decimal degrees
+        #   (negative indicates southern hemisphere)
+        #long: numeric representing longitude in decimal degrees
+        #   (negative indicates west of prime meridian)
+        #crs: numeric representing the coordinate reference system (e.g. WSG84)
+        #dev_machine_status: either '1337', indicating that your machine has >= 16 GB
+        #   RAM, or 'n00b', indicating < 16 GB RAM. DEM resolution is chosen accordingly
+        #verbose: logical. determines the amount of informative messaging during run
+
+        #returns the location of candidate watershed boundary files
+
+        tmp <- tempdir()
+        inspection_dir <- glue(tmp, '/INSPECT_THESE')
+        dem_f <- glue(tmp, '/dem.tif')
+        point_f <- glue(tmp, '/point.shp')
+        d8_f <- glue(tmp, '/d8_pntr.tif')
+        flow_f <- glue(tmp, '/flow.tif')
+
+        dir.create(path = inspection_dir,
+                   showWarnings = FALSE)
+
+        proj <- choose_projection(lat = lat,
+                                  long = long)
+
+        site <- tibble(x = lat,
+                       y = long) %>%
+            sf::st_as_sf(coords = c("y", "x"),
+                         crs = crs) %>%
+            sf::st_transform(proj)
+        # sf::st_transform(4326) #WGS 84 (would be nice to do this unprojected)
+
+        #prepare for delineation loops
+        buffer_radius <- 100
+        dem_coverage_insufficient <- FALSE
+        while_loop_begin <- TRUE
+
+        #snap site to flowlines 3 different ways. delineate watershed boundaries (wb)
+        #for each unique snap. if the delineations get cut off, get more elevation data
+        #and try again
+        while(while_loop_begin || dem_coverage_insufficient){
+
+            while_loop_begin <- FALSE
+
+            if(dev_machine_status == '1337'){
+                dem_resolution <- case_when(
+                    buffer_radius <= 1e4 ~ 12,
+                    buffer_radius == 1e5 ~ 11,
+                    buffer_radius == 1e6 ~ 10,
+                    buffer_radius == 1e7 ~ 8,
+                    buffer_radius == 1e8 ~ 6,
+                    buffer_radius == 1e9 ~ 4,
+                    buffer_radius >= 1e10 ~ 2)
+            } else if(dev_machine_status == 'n00b'){
+                dem_resolution <- case_when(
+                    buffer_radius <= 1e4 ~ 10,
+                    buffer_radius == 1e5 ~ 8,
+                    buffer_radius == 1e6 ~ 6,
+                    buffer_radius == 1e7 ~ 4,
+                    buffer_radius == 1e8 ~ 2,
+                    buffer_radius >= 1e9 ~ 1)
+            } else {
+                stop('dev_machine_status must be either "1337" or "n00b"')
+            }
+
+            site_buf <- sf::st_buffer(x = site,
+                                      dist = buffer_radius)
+            dem <- elevatr::get_elev_raster(locations = site_buf,
+                                            z = dem_resolution,
+                                            verbose = verbose)
+
+            raster::writeRaster(x = dem,
+                                filename = dem_f,
+                                overwrite = TRUE)
+
+            sf::st_write(obj = site,
+                         dsn = point_f,
+                         delete_layer = TRUE,
+                         quiet = TRUE)
+
+            whitebox::wbt_fill_single_cell_pits(dem = dem_f,
+                                                output = dem_f)
+
+            whitebox::wbt_breach_depressions(dem = dem_f,
+                                             output = dem_f,
+                                             flat_increment = 0.01)
+
+            whitebox::wbt_d8_pointer(dem = dem_f,
+                                     output = d8_f)
+
+            whitebox::wbt_d8_flow_accumulation(input = dem_f,
+                                               output = flow_f,
+                                               out_type = 'catchment area')
+
+            snap1_f <- glue(tmp, '/snap1_jenson_dist150.shp')
+            whitebox::wbt_jenson_snap_pour_points(pour_pts = point_f,
+                                                  streams = flow_f,
+                                                  output = snap1_f,
+                                                  snap_dist = 150)
+            snap2_f <- glue(tmp, '/snap2_standard_dist50.shp')
+            whitebox::wbt_snap_pour_points(pour_pts = point_f,
+                                           flow_accum = flow_f,
+                                           output = snap2_f,
+                                           snap_dist = 50)
+            snap3_f <- glue(tmp, '/snap3_standard_dist150.shp')
+            whitebox::wbt_snap_pour_points(pour_pts = point_f,
+                                           flow_accum = flow_f,
+                                           output = snap3_f,
+                                           snap_dist = 150)
+
+            #the site has been snapped 3 different ways. identify unique snap locations.
+            snap1 <- sf::st_read(snap1_f, quiet = TRUE)
+            snap2 <- sf::st_read(snap2_f, quiet = TRUE)
+            snap3 <- sf::st_read(snap3_f, quiet = TRUE)
+            unique_snaps_f <- snap1_f
+            if(! identical(snap1, snap2)) unique_snaps_f <- c(unique_snaps_f, snap2_f)
+            if(! identical(snap1, snap3)) unique_snaps_f <- c(unique_snaps_f, snap3_f)
+
+            #good for experimenting with snap specs:
+            # delineate_watershed_test2(tmp, point_f, flow_f,
+            #                           d8_f, 'standard', 1000)
+
+            #delineate each unique location
+            for(i in 1:length(unique_snaps_f)){
+
+                rgx <- stringr::str_match(unique_snaps_f[i],
+                                 '.*?_(standard|jenson)_dist([0-9]+)\\.shp$')
+                snap_method <- rgx[, 2]
+                snap_distance <- rgx[, 3]
+
+                wb_f <- glue('{path}/wb{n}_buffer{b}_{typ}_dist{dst}.tif',
+                             path = tmp,
+                             n = i,
+                             b = buffer_radius,
+                             typ = snap_method,
+                             dst = snap_distance)
+
+                whitebox::wbt_watershed(d8_pntr = d8_f,
+                                        pour_pts = unique_snaps_f[i],
+                                        output = wb_f)
+
+                wb <- raster::raster(wb_f)
+
+                #check how many wb cells coincide with the edge of the DEM.
+                #If > 0.1% or > 5, broader DEM needed
+                smry <- raster_intersection_summary(wb = wb,
+                                                    dem = dem)
+
+                if(verbose){
+                    print(glue('buffer radius: {br}; snap: {sn}/{tot}; ',
+                               'n intersecting cells: {ni}; pct intersect: {pct}',
+                               br = buffer_radius,
+                               sn = i,
+                               tot = length(unique_snaps_f),
+                               ni = round(smry$n_intersections, 2),
+                               pct = round(smry$pct_wb_cells_intersect, 2)))
+                }
+
+                if(smry$pct_wb_cells_intersect > 0.1 || smry$n_intersections > 5){
+                    buffer_radius_new <- buffer_radius * 10
+                    dem_coverage_insufficient <- TRUE
+                } else {
+                    buffer_radius_new <- buffer_radius
+
+                    #write and record temp files for the technician to visually inspect
+                    wb_sf <- wb %>%
+                        raster::rasterToPolygons() %>%
+                        sf::st_as_sf() %>%
+                        sf::st_buffer(dist = 0.1) %>%
+                        sf::st_union() %>%
+                        sf::st_as_sf()#again? ugh.
+
+                    wb_sf <- sf::st_transform(wb_sf, 4326) #EPSG for WGS84
+
+                    wb_sf_f <- glue('{path}/wb{n}_BUF{b}{typ}DIST{dst}RES{res}.shp',
+                                    path = inspection_dir,
+                                    n = i,
+                                    b = buffer_radius,
+                                    typ = snap_method,
+                                    dst = snap_distance,
+                                    res = dem_resolution)
+
+                    sf::st_write(obj = wb_sf,
+                                 dsn = wb_sf_f,
+                                 delete_dsn = TRUE,
+                                 quiet = TRUE)
+                }
+            }
+
+            buffer_radius <- buffer_radius_new
+        } #end while loop
+
+        if(verbose){
+            message(glue('Candidate delineations are in: ', inspection_dir))
+        }
+
+        return(inspection_dir)
+    }
+
+    if(verbose){
+        message('Beginning watershed delineation')
+    }
+
+    inspection_dir <- sw(delineate_watershed_apriori(
+        lat = lat,
+        long = long,
+        crs = crs,
+        dev_machine_status = dev_machine_status,
+        verbose = verbose))
+
+    files_to_inspect <- list.files(path = inspection_dir,
+                                   pattern = '.shp')
+
+    #if only one delineation, write it to write_dir
+    if(length(files_to_inspect) == 1){
+
+        selection <- files_to_inspect[1]
+
+        move_shapefiles(shp_files = selection,
+                        from_dir = inspection_dir,
+                        to_dir = write_dir)
+
+        if(verbose){
+            message(glue('Delineation successful. Shapefile written to ',
+                         write_dir))
+        }
+
+    #otherwise, inspect all delineations and choose one
+    } else {
+
+        nshapes <- length(files_to_inspect)
+
+        wb_selections <- paste(paste0('[',
+                                      c(1:nshapes, 'A'),
+                                      ']'),
+                               c(files_to_inspect, 'Abort delineation'),
+                               sep = ': ',
+                               collapse = '\n')
+
+        helper_code <- glue('mapview::mapview(sf::st_read("{wd}/{f}"))',
+                            wd = inspection_dir,
+                            f = files_to_inspect) %>%
+            paste(collapse = '\n\n')
+
+        msg <- glue('Visually inspect the watershed boundary candidate shapefiles ',
+                    'in {td}, then enter the number corresponding to the ',
+                    'one that looks most legit. Here\'s some ',
+                    'helper code you can paste into a separate R instance ',
+                    ':\n\n{hc}\n\nChoices:\n{sel}\n\nEnter choice here > ',
+                    hc = helper_code,
+                    sel = wb_selections,
+                    td = inspection_dir)
+
+        resp <- get_response_1char(msg = msg,
+                                   possible_chars = c(1:nshapes, 'A'))
+
+        if(resp == 'A'){
+            message('Delineation aborted')
+            return()
+        }
+
+        selection <- files_to_inspect[as.numeric(resp)]
+
+        move_shapefiles(shp_files = selection,
+                        from_dir = inspection_dir,
+                        to_dir = write_dir,
+                        new_name_vec = write_name)
+
+        message(glue('Selection {s}:\n\t{sel}\nwas written to:\n\t{sdr}',
+                     s = resp,
+                     sel = selection,
+                     sdr = write_dir))
+    }
+
+    #calculate watershed area in hectares
+    wb <- sf::st_read(glue('{d}/{s}.shp',
+                           d = write_dir,
+                           s = write_name),
+                      quiet = TRUE)
+
+    ws_area_ha <- as.numeric(sf::st_area(wb)) / 10000
+
+    #return the specifications of the correctly delineated watershed, and some
+    #   other goodies
+    rgx <- stringr::str_match(selection,
+                     paste0('^wb[0-9]+_BUF([0-9]+)(standard|jenson)',
+                            'DIST([0-9]+)RES([0-9]+)\\.shp$'))
+
+    deets <- list(name = write_name,
+                  watershed_area_ha = ws_area_ha,
+                  buffer_radius_m = as.numeric(rgx[, 2]),
+                  snap_distance_m = as.numeric(rgx[, 4]),
+                  snap_method = rgx[, 3],
+                  dem_resolution = as.numeric(rgx[, 5]))
+
+    return(deets)
+}
+
+deets <- delineate_watershed_from_point(lat = 44.21013,
+                                        long = -122.2571,
+                                        crs = 4326,
+                                        write_dir = '~/some_path',
+                                        write_name = 'ultimate_watershed')

From 7a53f1a170ccdf68842f0270adb35b8bc0108650 Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Thu, 15 Oct 2020 15:17:10 -0400
Subject: [PATCH 02/37] clarified some notes

---
 4_interactive_watershed_delineation.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/4_interactive_watershed_delineation.R b/4_interactive_watershed_delineation.R
index 10fc8d5..83cc36d 100644
--- a/4_interactive_watershed_delineation.R
+++ b/4_interactive_watershed_delineation.R
@@ -1,4 +1,5 @@
-#documentation and helper functions included in function definition
+#documentation and helper functions are included in the function definition below.
+#if you're in Rstudio, collapse the function with Alt+o. there's an example included at the bottom.
 
 delineate_watershed_from_point <- function(lat,
                                            long,

From 0b19c14920b3a62583e7fb6bd62ae046e8e91b9e Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Thu, 15 Oct 2020 15:19:57 -0400
Subject: [PATCH 03/37] changed one parameter name

---
 4_interactive_watershed_delineation.R | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/4_interactive_watershed_delineation.R b/4_interactive_watershed_delineation.R
index 83cc36d..28e19e2 100644
--- a/4_interactive_watershed_delineation.R
+++ b/4_interactive_watershed_delineation.R
@@ -4,7 +4,7 @@
 delineate_watershed_from_point <- function(lat,
                                            long,
                                            crs,
-                                           dev_machine_status = 'n00b',
+                                           machine_status = 'n00b',
                                            write_dir,
                                            write_name,
                                            verbose = TRUE){
@@ -14,7 +14,7 @@ delineate_watershed_from_point <- function(lat,
     #long: numeric representing longitude of the pour point in decimal degrees
     #   (negative indicates west of prime meridian)
     #crs: numeric representing the coordinate reference system (e.g. 4326 for WSG84)
-    #dev_machine_status: either '1337', indicating that your machine has >= 16 GB
+    #machine_status: either '1337', indicating that your machine has >= 16 GB
     #   RAM, or 'n00b', indicating < 16 GB RAM. DEM resolution is chosen accordingly
     #write_dir: character. the directory in which to write output shapefile
     #write_name: character. the basename of the shapefile components to be written. i.e.
@@ -37,7 +37,7 @@ delineate_watershed_from_point <- function(lat,
     #       within snap_distance_m with highest flow accumulation, or "jenson",
     #       which snaps to the nearest flow line
     #   dem_resolution: passed to elevatr::get_elev_raster (z parameter).
-    #       depends on supplied dev_machine_status
+    #       depends on supplied machine_status
 
     #NOTE: in addition to the packages loaded below, you'll need mapview to
     #   visualize delineated watershed boundaries
@@ -201,7 +201,7 @@ delineate_watershed_from_point <- function(lat,
 
     #the workhorse
     delineate_watershed_apriori <- function(lat, long, crs,
-                                            dev_machine_status = 'n00b',
+                                            machine_status = 'n00b',
                                             verbose = FALSE){
 
         #lat: numeric representing latitude in decimal degrees
@@ -209,7 +209,7 @@ delineate_watershed_from_point <- function(lat,
         #long: numeric representing longitude in decimal degrees
         #   (negative indicates west of prime meridian)
         #crs: numeric representing the coordinate reference system (e.g. WSG84)
-        #dev_machine_status: either '1337', indicating that your machine has >= 16 GB
+        #machine_status: either '1337', indicating that your machine has >= 16 GB
         #   RAM, or 'n00b', indicating < 16 GB RAM. DEM resolution is chosen accordingly
         #verbose: logical. determines the amount of informative messaging during run
 
@@ -247,7 +247,7 @@ delineate_watershed_from_point <- function(lat,
 
             while_loop_begin <- FALSE
 
-            if(dev_machine_status == '1337'){
+            if(machine_status == '1337'){
                 dem_resolution <- case_when(
                     buffer_radius <= 1e4 ~ 12,
                     buffer_radius == 1e5 ~ 11,
@@ -256,7 +256,7 @@ delineate_watershed_from_point <- function(lat,
                     buffer_radius == 1e8 ~ 6,
                     buffer_radius == 1e9 ~ 4,
                     buffer_radius >= 1e10 ~ 2)
-            } else if(dev_machine_status == 'n00b'){
+            } else if(machine_status == 'n00b'){
                 dem_resolution <- case_when(
                     buffer_radius <= 1e4 ~ 10,
                     buffer_radius == 1e5 ~ 8,
@@ -265,7 +265,7 @@ delineate_watershed_from_point <- function(lat,
                     buffer_radius == 1e8 ~ 2,
                     buffer_radius >= 1e9 ~ 1)
             } else {
-                stop('dev_machine_status must be either "1337" or "n00b"')
+                stop('machine_status must be either "1337" or "n00b"')
             }
 
             site_buf <- sf::st_buffer(x = site,
@@ -410,7 +410,7 @@ delineate_watershed_from_point <- function(lat,
         lat = lat,
         long = long,
         crs = crs,
-        dev_machine_status = dev_machine_status,
+        machine_status = machine_status,
         verbose = verbose))
 
     files_to_inspect <- list.files(path = inspection_dir,

From 46bc4c5ff3a997699fecfe3ba47da0d381bcd0df Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Thu, 15 Oct 2020 16:40:37 -0400
Subject: [PATCH 04/37] added comment about needed packages

---
 4_interactive_watershed_delineation.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/4_interactive_watershed_delineation.R b/4_interactive_watershed_delineation.R
index 28e19e2..dab4da9 100644
--- a/4_interactive_watershed_delineation.R
+++ b/4_interactive_watershed_delineation.R
@@ -1,5 +1,6 @@
 #documentation and helper functions are included in the function definition below.
 #if you're in Rstudio, collapse the function with Alt+o. there's an example included at the bottom.
+#you'll need these packages: dplyr, glue, sf, elevatr, raster, whitebox, mapview
 
 delineate_watershed_from_point <- function(lat,
                                            long,

From e6ffa9cfd77ac82cdb0c8a2a637c1731970a5407 Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Tue, 20 Oct 2020 10:27:00 -0400
Subject: [PATCH 05/37] devtools -> remotes update

---
 2_batch_summary_nhd.R | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/2_batch_summary_nhd.R b/2_batch_summary_nhd.R
index 7c875be..6c6ab8f 100644
--- a/2_batch_summary_nhd.R
+++ b/2_batch_summary_nhd.R
@@ -19,9 +19,9 @@
 library(stringr)
 library(dplyr)
 library(plyr)
-# devtools::install_github("USGS-R/nhdplusTools")
+# remotes::install_github("USGS-R/nhdplusTools")
 library(nhdplusTools)
-# devtools::install_github("jsta/nhdR")
+# remotes::install_github("jsta/nhdR")
 library(nhdR)
 library(RCurl)
 library(sf)
@@ -29,7 +29,9 @@ library(sf)
 
 # 1. setup and helper functions ####
 
-setwd('~/Desktop/untracked/watershed_geohax/')
+#set your working directory to the location of your site data file.
+#   site data must include latitude and longitude columns (decimal degrees)
+# setwd('~/Desktop/untracked/watershed_geohax/')
 sites = read.csv('site_data.csv', stringsAsFactors=FALSE)
 WGS84 = 4326 #EPSG code for coordinate reference system
 

From 91b49edd2ba43a1ec541351b05685fc214e4dc3a Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Tue, 20 Oct 2020 11:43:10 -0400
Subject: [PATCH 06/37] note about nhdplustools issue

---
 2_batch_summary_nhd.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/2_batch_summary_nhd.R b/2_batch_summary_nhd.R
index 6c6ab8f..428e947 100644
--- a/2_batch_summary_nhd.R
+++ b/2_batch_summary_nhd.R
@@ -220,7 +220,7 @@ streamcat_bulk = function(site_df, streamcat_sets){
 # 2. get NHDPlusV2 data ####
 
 #COMID is the NHD identifier for any reach in the continental U.S.
-#add COMIDs to your site table.
+#add COMIDs to your site table. If this doesn't work, update nhdplusTools
 sites$COMID = unlist(mapply(comid_from_point, sites$latitude,
     sites$longitude, WGS84))
 sites = sites[! is.na(sites$COMID),]

From ea064b957a0c24a0316a93209c907e49ef06f142 Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Thu, 19 Nov 2020 23:49:21 -0500
Subject: [PATCH 07/37] fixed infinite loop potentialin 4_

---
 4_interactive_watershed_delineation.R | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/4_interactive_watershed_delineation.R b/4_interactive_watershed_delineation.R
index dab4da9..1f80b53 100644
--- a/4_interactive_watershed_delineation.R
+++ b/4_interactive_watershed_delineation.R
@@ -78,6 +78,10 @@ delineate_watershed_from_point <- function(lat,
             stop('new_name_vec must have the same length as shp_files')
         }
 
+        dir.create(to_dir,
+                   showWarnings = FALSE,
+                   recursive = TRUE)
+
         for(i in 1:length(shp_files)){
 
             shapefile_base <- strsplit(shp_files[i], '\\.shp')[[1]]
@@ -366,6 +370,7 @@ delineate_watershed_from_point <- function(lat,
                     buffer_radius_new <- buffer_radius * 10
                     dem_coverage_insufficient <- TRUE
                 } else {
+                    dem_coverage_insufficient <- FALSE
                     buffer_radius_new <- buffer_radius
 
                     #write and record temp files for the technician to visually inspect

From 91aaa2c8f405a1977ba3b9f6d298fdd32efbc03a Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Sun, 17 Jan 2021 20:05:55 -0500
Subject: [PATCH 08/37] first commit on 20.04; temp file for pulling OSM
 streets and waterways

---
 .gitignore                            |  0
 1_batch_delineation.Rmd               |  0
 1_doe_maps.Rmd                        |  0
 2_batch_summary_nhd.R                 |  0
 3_batch_summary_manual_delin.R        |  0
 4_interactive_watershed_delineation.R |  0
 README.md                             |  0
 Watershed_Tools.Rproj                 |  0
 osm_temp.R                            | 42 +++++++++++++++++++++++++++
 site_data.csv                         |  0
 10 files changed, 42 insertions(+)
 mode change 100644 => 100755 .gitignore
 mode change 100644 => 100755 1_batch_delineation.Rmd
 mode change 100644 => 100755 1_doe_maps.Rmd
 mode change 100644 => 100755 2_batch_summary_nhd.R
 mode change 100644 => 100755 3_batch_summary_manual_delin.R
 mode change 100644 => 100755 4_interactive_watershed_delineation.R
 mode change 100644 => 100755 README.md
 mode change 100644 => 100755 Watershed_Tools.Rproj
 create mode 100644 osm_temp.R
 mode change 100644 => 100755 site_data.csv

diff --git a/.gitignore b/.gitignore
old mode 100644
new mode 100755
diff --git a/1_batch_delineation.Rmd b/1_batch_delineation.Rmd
old mode 100644
new mode 100755
diff --git a/1_doe_maps.Rmd b/1_doe_maps.Rmd
old mode 100644
new mode 100755
diff --git a/2_batch_summary_nhd.R b/2_batch_summary_nhd.R
old mode 100644
new mode 100755
diff --git a/3_batch_summary_manual_delin.R b/3_batch_summary_manual_delin.R
old mode 100644
new mode 100755
diff --git a/4_interactive_watershed_delineation.R b/4_interactive_watershed_delineation.R
old mode 100644
new mode 100755
diff --git a/README.md b/README.md
old mode 100644
new mode 100755
diff --git a/Watershed_Tools.Rproj b/Watershed_Tools.Rproj
old mode 100644
new mode 100755
diff --git a/osm_temp.R b/osm_temp.R
new file mode 100644
index 0000000..76f24f4
--- /dev/null
+++ b/osm_temp.R
@@ -0,0 +1,42 @@
+dem_unprojected <- raster::projectRaster(dem, crs = '+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs')
+dem_bounds <- sf::st_bbox(dem_unprojected)
+# highway_types <- c('motorway', 'trunk', 'primary')
+highway_types <- c('motorway', 'trunk', 'primary', 'secondary', 'tertiary')
+highway_types <- c(highway_types,
+                   paste(highway_types, 'link', sep = '_'))
+roads_query <- opq(dem_bounds) %>%
+    add_osm_feature(key = 'highway',
+                    value = highway_types)
+roads <- osmdata_sf(roads_query)
+plot(roads$osm_lines)
+
+streams_query <- opq(dem_bounds) %>%
+    add_osm_feature(key = 'waterway',
+                    value = c('river', 'stream'))
+streams <- osmdata_sf(streams_query)
+streams$osm_lines$geometry
+plot(streams$osm_lines)
+
+streams_f <- glue(tmp, '/streams.shp')
+roads_f <- glue(tmp, '/roads.shp')
+sf::st_transform(streams$osm_lines$geometry,
+                 crs = proj) %>%
+    sf::st_write(dsn = tmp,
+                 layer = 'streams.shp',
+                 driver = 'ESRI Shapefile',
+                 delete_layer = TRUE,
+                 silent = TRUE)
+sf::st_transform(roads$osm_lines$geometry,
+                 crs = proj) %>%
+    sf::st_write(dsn = tmp,
+                 layer = 'roads.shp',
+                 driver = 'ESRI Shapefile',
+                 delete_layer = TRUE,
+                 silent = TRUE)
+
+whitebox::wbt_burn_streams_at_roads(dem = dem_f,
+                                    streams = streams_f,
+                                    roads = roads_f,
+                                    output = dem_f,
+                                    width = 50)
+
diff --git a/site_data.csv b/site_data.csv
old mode 100644
new mode 100755

From d01ed959d10350a278df51705480c8019fe9ef83 Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Mon, 15 Mar 2021 00:03:19 -0400
Subject: [PATCH 09/37] more options and bugfixes for autodelineator

---
 4_interactive_watershed_delineation.R | 910 +++++++++++++++++++++-----
 osm_temp.R                            |  42 --
 2 files changed, 740 insertions(+), 212 deletions(-)
 delete mode 100644 osm_temp.R

diff --git a/4_interactive_watershed_delineation.R b/4_interactive_watershed_delineation.R
index 1f80b53..b896397 100755
--- a/4_interactive_watershed_delineation.R
+++ b/4_interactive_watershed_delineation.R
@@ -1,11 +1,20 @@
 #documentation and helper functions are included in the function definition below.
-#if you're in Rstudio, collapse the function with Alt+o. there's an example included at the bottom.
-#you'll need these packages: dplyr, glue, sf, elevatr, raster, whitebox, mapview
+#if you're in Rstudio, collapse the function with Alt+o.
+#there's a demo call included at the bottom.
+
+library(tidyverse)
+library(glue)
+library(sf)
+library(elevatr)
+library(raster)
+library(whitebox)
+library(terra)
+library(mapview)
+library(osmdata) #only needed if you want to burn streams into the DEM
 
 delineate_watershed_from_point <- function(lat,
                                            long,
                                            crs,
-                                           machine_status = 'n00b',
                                            write_dir,
                                            write_name,
                                            verbose = TRUE){
@@ -15,8 +24,6 @@ delineate_watershed_from_point <- function(lat,
     #long: numeric representing longitude of the pour point in decimal degrees
     #   (negative indicates west of prime meridian)
     #crs: numeric representing the coordinate reference system (e.g. 4326 for WSG84)
-    #machine_status: either '1337', indicating that your machine has >= 16 GB
-    #   RAM, or 'n00b', indicating < 16 GB RAM. DEM resolution is chosen accordingly
     #write_dir: character. the directory in which to write output shapefile
     #write_name: character. the basename of the shapefile components to be written. i.e.
     #   <write_name>.shp, <write_name>.shx, <write_name>.prj, <write_name>.dbf
@@ -40,16 +47,6 @@ delineate_watershed_from_point <- function(lat,
     #   dem_resolution: passed to elevatr::get_elev_raster (z parameter).
     #       depends on supplied machine_status
 
-    #NOTE: in addition to the packages loaded below, you'll need mapview to
-    #   visualize delineated watershed boundaries
-
-    require(dplyr)
-    require(glue)
-    require(sf)
-    require(elevatr)
-    require(raster)
-    require(whitebox)
-
     sm <- suppressMessages
     sw <- suppressWarnings
 
@@ -60,7 +57,9 @@ delineate_watershed_from_point <- function(lat,
 
     #moving shapefiles can be annoying, since they're actually represented by
     #   3-4 files
-    move_shapefiles <- function(shp_files, from_dir, to_dir,
+    move_shapefiles <- function(shp_files,
+                                from_dir,
+                                to_dir,
                                 new_name_vec = NULL){
 
         #shp_files is a character vector of filenames with .shp extension
@@ -89,7 +88,7 @@ delineate_watershed_from_point <- function(lat,
             files_to_move <- list.files(path = from_dir,
                                         pattern = shapefile_base)
 
-            extensions <- stringr::str_match(files_to_move,
+            extensions <- str_match(files_to_move,
                                     paste0(shapefile_base, '(\\.[a-z]{3})'))[, 2]
 
             if(is.null(new_name_vec)){
@@ -99,28 +98,55 @@ delineate_watershed_from_point <- function(lat,
                 new_name_base <- rep(new_name_base, length(files_to_move))
             }
 
-            mapply(function(x, nm, ext) file.rename(from = paste(from_dir,
-                                                                 x,
-                                                                 sep = '/'),
-                                                    to = glue('{td}/{n}{ex}',
-                                                              td = to_dir,
-                                                              n = nm,
-                                                              ex = ext)),
-                   x = files_to_move,
-                   nm = new_name_base,
-                   ext = extensions)
+            tryCatch({
+
+                #try to move the files (may fail if they are on different partitions)
+                mapply(function(x, nm, ext) file.rename(from = paste(from_dir,
+                                                                     x,
+                                                                     sep = '/'),
+                                                        to = glue('{td}/{n}{ex}',
+                                                                  td = to_dir,
+                                                                  n = nm,
+                                                                  ex = ext)),
+                       x = files_to_move,
+                       nm = new_name_base,
+                       ext = extensions)
+
+            }, warning = function(w){
+
+                #if that fails, copy them and then delete them
+                mapply(function(x, nm, ext) file.copy(from = paste(from_dir,
+                                                                   x,
+                                                                   sep = '/'),
+                                                      to = glue('{td}/{n}{ex}',
+                                                                td = to_dir,
+                                                                n = nm,
+                                                                ex = ext),
+                                                      overwrite = TRUE),
+                       x = files_to_move,
+                       nm = new_name_base,
+                       ext = extensions)
+
+                lapply(paste(from_dir,
+                             files_to_move,
+                             sep = '/'),
+                       unlink)
+            })
         }
 
-        return()
+        #return()
     }
 
     #prompt users for stuff, provide single-character responses,
     #   reprompt if they don't choose one of the expected responses
-    get_response_1char <- function(msg, possible_chars,
+    get_response_1char <- function(msg,
+                                   possible_chars,
                                    subsequent_prompt = FALSE){
 
         #msg: character. a message that will be used to prompt the user
         #possible_chars: character vector of acceptable single-character responses
+        #subsequent prompt: not to be set directly. This is handled by
+        #   get_response_mchar during recursion.
 
         if(subsequent_prompt){
             cat(paste('Please choose one of:',
@@ -136,12 +162,105 @@ delineate_watershed_from_point <- function(lat,
         if(length(ch) == 1 && ch %in% possible_chars){
             return(ch)
         } else {
-            get_response_1char(msg, possible_chars, subsequent_prompt = TRUE)
+            get_response_1char(msg = msg,
+                               possible_chars = possible_chars,
+                               subsequent_prompt = TRUE)
         }
     }
 
+    #prompt users for stuff, provide multi-character responses,
+    #   reprompt if they don't choose one of the expected responses
+    get_response_mchar <- function(msg,
+                                   possible_resps,
+                                   allow_alphanumeric_response = TRUE,
+                                   subsequent_prompt = FALSE){
+
+        #msg: character. a message that will be used to prompt the user
+        #possible_resps: character vector. If length 1, each character in the response
+        #   will be required to match a character in possible_resps, and the return
+        #   value will be a character vector of each single-character tokens in the
+        #   response. If
+        #   length > 1, the response will be required to match an element of
+        #   possible_resps exactly, and the response will be returned as-is.
+        #allow_alphanumeric_response: logical. If FALSE, the response may not
+        #   include both numerals and letters. Only applies when possible_resps
+        #   has length 1.
+        #subsequent prompt: not to be set directly. This is handled by
+        #   get_response_mchar during recursion.
+
+        split_by_character <- ifelse(length(possible_resps) == 1, TRUE, FALSE)
+
+        if(subsequent_prompt){
+
+            if(split_by_character){
+                pr <- strsplit(possible_resps, split = '')[[1]]
+            } else {
+                pr <- possible_resps
+            }
+
+            cat(paste('Your options are:',
+                      paste(pr,
+                            collapse = ', '),
+                      '\n> '))
+        } else {
+            cat(msg)
+        }
+
+        chs <- as.character(readLines(con = stdin(), 1))
+
+        if(! allow_alphanumeric_response &&
+           split_by_character &&
+           grepl('[0-9]', chs) &&
+           grepl('[a-zA-Z]', chs)){
+
+            cat('Response may not include both letters and numbers.\n> ')
+            resp <- get_response_mchar(
+                msg = msg,
+                possible_resps = possible_resps,
+                allow_alphanumeric_response = allow_alphanumeric_response,
+                subsequent_prompt = FALSE)
+
+            return(resp)
+        }
+
+        if(length(chs)){
+            if(split_by_character){
+
+                if(length(possible_resps) != 1){
+                    stop('possible_resps must be length 1 if split_by_character is TRUE')
+                }
+
+                chs <- strsplit(chs, split = '')[[1]]
+                possible_resps_split <- strsplit(possible_resps, split = '')[[1]]
+
+                if(all(chs %in% possible_resps_split)){
+                    return(chs)
+                }
+
+            } else {
+
+                if(length(possible_resps) < 2){
+                    stop('possible_resps must have length > 1 if split_by_character is FALSE')
+                }
+
+                if(any(possible_resps == chs)){
+                    return(chs)
+                }
+            }
+        }
+
+        resp <- get_response_mchar(
+            msg = msg,
+            possible_resps = possible_resps,
+            allow_alphanumeric_response = allow_alphanumeric_response,
+            subsequent_prompt = TRUE)
+
+        return(resp)
+    }
+
     #chooose an appropriate projection, based on location
-    choose_projection <- function(lat = NULL, long = NULL,
+    choose_projection <- function(lat = NULL,
+                                  long = NULL,
                                   unprojected = FALSE){
 
         if(unprojected){
@@ -154,35 +273,84 @@ delineate_watershed_from_point <- function(lat,
             stop('If projecting, lat and long are required.')
         }
 
-        if(lat <= 15 && lat >= -15){ #equatorial
-            PROJ4 = glue('+proj=laea +lon_0=', long)
-        } else { #temperate or polar
-            PROJ4 = glue('+proj=laea +lat_0=', lat, ' +lon_0=', long)
+        abslat <- abs(lat)
+
+        #this is what the makers of https://projectionwizard.org/# use to choose
+        #a suitable projection: https://rdrr.io/cran/rCAT/man/simProjWiz.html
+        # THIS WORKS (PROJECTS STUFF), BUT CAN'T BE READ AUTOMATICALLY BY st_read
+        if(abslat < 70){ #tropical or temperate
+            PROJ4 <- glue('+proj=cea +lon_0={lng} +lat_ts=0 +x_0=0 +y_0=0 ',
+                          '+ellps=WGS84 +datum=WGS84 +units=m +no_defs',
+                          lng = long)
+        } else { #polar
+            PROJ4 <- glue('+proj=laea +lat_0={lt} +lon_0={lng} +x_0=0 +y_0=0 ',
+                          '+ellps=WGS84 +datum=WGS84 +units=m +no_defs',
+                          lt = lat,
+                          lng = long)
         }
 
         return(PROJ4)
     }
 
+    #choose appropriate granularity of the elevation model,
+    #   based on the approximate size of the task (area potentially covered)
+    choose_dem_resolution <- function(dev_machine_status,
+                                      buffer_radius){
+
+        if(dev_machine_status == '1337'){
+            dem_resolution <- case_when(
+                buffer_radius <= 1e4 ~ 12,
+                buffer_radius == 1e5 ~ 11,
+                buffer_radius == 1e6 ~ 10,
+                buffer_radius == 1e7 ~ 8,
+                buffer_radius == 1e8 ~ 6,
+                buffer_radius == 1e9 ~ 4,
+                buffer_radius >= 1e10 ~ 2)
+        } else if(dev_machine_status == 'n00b'){
+            dem_resolution <- case_when(
+                buffer_radius <= 1e4 ~ 10,
+                buffer_radius == 1e5 ~ 8,
+                buffer_radius == 1e6 ~ 6,
+                buffer_radius == 1e7 ~ 4,
+                buffer_radius == 1e8 ~ 2,
+                buffer_radius >= 1e9 ~ 1)
+        } else {
+            stop('dev_machine_status must be either "1337" or "n00b"')
+        }
+
+        return(dem_resolution)
+    }
+
     #for determining whether the DEM extent wasn't big enough to allow full
     #   delineation
-    raster_intersection_summary <- function(wb, dem){
+    raster_intersection_summary <- function(wb,
+                                            dem){
 
         #wb is a delineated watershed boundary as a rasterLayer
         #dem is a DEM rasterLayer
 
         summary_out <- list()
 
-        #convert wb to sf object
-        wb <- wb %>%
-            raster::rasterToPolygons() %>%
-            sf::st_as_sf()
+        #convert wb to sf object (there are several benign but seemingly uncatchable
+        #   garbage collection errors here)
+        wb <- sf::st_as_sf(raster::rasterToPolygons(wb))
 
         #get edge of DEM as sf object
-        dem_edge <- raster::boundaries(dem) %>%
-            raster::reclassify(matrix(c(0, NA),
-                                      ncol = 2)) %>%
+        dem_edge <- raster::focal(x = dem, #the terra version doesn't retain NA border
+                                  fun = function(x, ...) return(0),
+                                  w = matrix(1, nrow = 3, ncol = 3)) %>%
+            raster::reclassify(rcl = matrix(c(0, NA, #second, set inner cells to NA
+                                              NA, 1), #first, set outer cells to 1... yup.
+                                            ncol = 2)) %>%
             raster::rasterToPolygons() %>%
             sf::st_as_sf()
+        # dem_edge <- raster::boundaries(dem) %>%
+        #                                # classes = TRUE,
+        #                                # asNA = FALSE) %>%
+        #     raster::reclassify(rcl = matrix(c(0, NA), #set inner cells to NA
+        #                                     ncol = 2)) %>%
+        #     raster::rasterToPolygons() %>%
+        #     sf::st_as_sf()
 
         #tally raster cells
         summary_out$n_wb_cells <- length(wb$geometry)
@@ -204,9 +372,362 @@ delineate_watershed_from_point <- function(lat,
         return(summary_out)
     }
 
+    #handles ephemeral download errors
+    expo_backoff <- function(expr,
+                             max_attempts = 5,
+                             verbose = TRUE){
+
+        for(attempt_i in seq_len(max_attempts)){
+
+            results <- try(expr = expr,
+                           silent = TRUE)
+
+            if(inherits(results, 'try-error')){
+
+                backoff <- runif(n = 1,
+                                 min = 0,
+                                 max = 2^attempt_i - 1)
+
+                if(verbose){
+                    print(glue("Backing off for ", round(backoff, 1), " seconds."))
+                }
+
+                Sys.sleep(backoff)
+
+            } else {
+
+                # if(verbose){
+                #     print(paste0("Request succeeded after ", attempt_i, " attempt(s)."))
+                # }
+
+                break
+            }
+        }
+
+        return(results)
+    }
+
+    #for retrieving openStreetMap layers (roads)
+    get_osm_roads <- function(extent_raster, outfile = NULL){
+
+        #extent_raster: either a terra spatRaster or a rasterLayer. The output
+        #   roads will have the same crs, and roughly the same extent, as this raster.
+        #outfile: string. If supplied, output shapefile will be written to this
+        #   location. If not supplied, the output will be returned.
+
+        message('Downloading roads layer from OpenStreetMaps')
+
+        extent_raster <- terra::rast(extent_raster)
+        # rast_crs <- as.character(extent_raster@crs)
+        rast_crs <- terra::crs(extent_raster,
+                               proj4 = TRUE)
+
+        extent_raster_wgs84 <- terra::project(extent_raster,
+                                              y = 'epsg:4326')
+
+        dem_bounds <- terra::ext(extent_raster_wgs84)[c(1, 3, 2, 4)]
+
+        highway_types <- c('motorway', 'trunk', 'primary', 'secondary', 'tertiary')
+        highway_types <- c(highway_types,
+                           paste(highway_types, 'link', sep = '_'))
+
+        roads_query <- osmdata::opq(dem_bounds) %>%
+            osmdata::add_osm_feature(key = 'highway',
+                                     value = highway_types)
+
+        roads <- osmdata::osmdata_sf(roads_query)
+        roads <- roads$osm_lines$geometry
+
+        # plot(roads$osm_lines, max.plot = 1)
+
+        roads_proj <- roads %>%
+            sf::st_transform(crs = rast_crs) %>%
+            sf::st_union() %>%
+            # sf::st_transform(crs = WGS84) %>%
+            sf::st_as_sf() %>%
+            rename(geometry = x) %>%
+            mutate(FID = 0:(n() - 1)) %>%
+            dplyr::select(FID, geometry)
+
+        if(! is.null(outfile)){
+
+            sf::st_write(roads_proj,
+                         dsn = outfile,
+                         layer = 'roads',
+                         driver = 'ESRI Shapefile',
+                         delete_layer = TRUE,
+                         quiet = TRUE)
+
+            message(paste('OSM roads layer written to', outfile))
+
+        } else {
+            return(roads_proj)
+        }
+    }
+
+    #for retrieving openStreetMap layers (streams)
+    get_osm_streams <- function(extent_raster, outfile = NULL){
+
+        #extent_raster: either a terra spatRaster or a rasterLayer. The output
+        #   streams will have the same crs, and roughly the same extent, as this raster.
+        #outfile: string. If supplied, output shapefile will be written to this
+        #   location. If not supplied, the output will be returned.
+
+        message('Downloading streams layer from OpenStreetMaps')
+
+        extent_raster <- terra::rast(extent_raster)
+        # rast_crs <- as.character(extent_raster@crs)
+        rast_crs <- terra::crs(extent_raster,
+                               proj4 = TRUE)
+
+        extent_raster_wgs84 <- terra::project(extent_raster,
+                                              y = 'epsg:4326')
+
+        dem_bounds <- terra::ext(extent_raster_wgs84)[c(1, 3, 2, 4)]
+
+        streams_query <- osmdata::opq(dem_bounds) %>%
+            osmdata::add_osm_feature(key = 'waterway',
+                                     value = c('river', 'stream'))
+
+        streams <- osmdata::osmdata_sf(streams_query)
+        streams <- streams$osm_lines$geometry
+
+        streams_proj <- streams %>%
+            sf::st_transform(crs = rast_crs) %>%
+            sf::st_union() %>%
+            # sf::st_transform(crs = WGS84) %>%
+            sf::st_as_sf() %>%
+            rename(geometry = x) %>%
+            mutate(FID = 0:(n() - 1)) %>%
+            dplyr::select(FID, geometry)
+
+        if(! is.null(outfile)){
+
+            sf::st_write(streams_proj,
+                         dsn = outfile,
+                         layer = 'streams',
+                         driver = 'ESRI Shapefile',
+                         delete_layer = TRUE,
+                         quiet = TRUE)
+
+            message(paste('OSM streams layer written to', outfile))
+
+        } else {
+            return(streams_proj)
+        }
+    }
+
+    #the function that runs the workhorse
+    delineate_watershed_apriori_recurse <- function(lat,
+                                                    long,
+                                                    crs,
+                                                    site_name,
+                                                    dem_resolution = NULL,
+                                                    flat_increment = NULL,
+                                                    breach_method = 'lc',
+                                                    burn_streams = FALSE,
+                                                    scratch_dir = tempdir(),
+                                                    write_dir,
+                                                    dev_machine_status = 'n00b',
+                                                    verbose = FALSE){
+
+        #This function calls delineate_watershed_apriori recursively, taking
+        #   user input after each call, until the user selects a delineation
+        #   or aborts. For parameter documentation, see delineate_watershed_apriori.
+
+        # tmp <- tempdir()
+        scratch_dir <- stringr::str_replace_all(scratch_dir, '\\\\', '/')
+
+        inspection_dir <- delineate_watershed_apriori(
+            lat = lat,
+            long = long,
+            crs = crs,
+            site_name = site_name,
+            dem_resolution = dem_resolution,
+            flat_increment = flat_increment,
+            breach_method = breach_method,
+            burn_streams = burn_streams,
+            scratch_dir = scratch_dir,
+            dev_machine_status = dev_machine_status,
+            verbose = verbose)
+
+        files_to_inspect <- list.files(path = inspection_dir,
+                                       pattern = '.shp')
+
+        temp_point <- glue(scratch_dir, '/', 'POINT')
+
+        tibble(longitude = long, latitude = lat) %>%
+            sf::st_as_sf(coords = c('longitude', 'latitude'),
+                         crs = crs) %>%
+            sf::st_write(dsn = temp_point,
+                         driver = 'ESRI Shapefile',
+                         delete_dsn = TRUE,
+                         quiet = TRUE)
+
+        #if only one delineation, write it into macrosheds storage
+        if(length(files_to_inspect) == 1){
+
+            selection <- files_to_inspect[1]
+
+            move_shapefiles(shp_files = selection,
+                            from_dir = inspection_dir,
+                            to_dir = write_dir)
+
+            message(glue('Delineation successful. Shapefile written to ',
+                         write_dir))
+
+            #otherwise, technician must inspect all delineations and choose one
+        } else {
+
+            nshapes <- length(files_to_inspect)
+
+            wb_selections <- paste(paste0('[',
+                                          c(1:nshapes, 'S', 'B', 'R', 'I', 'a'),
+                                          ']'),
+                                   c(files_to_inspect,
+                                     'Burn streams into the DEM (may handle road-stream intersections)',
+                                     'Use more aggressive breaching method',
+                                     'Select DEM resolution',
+                                     'Set flat_increment',
+                                     # 'Next (skip this one for now)',
+                                     'Abort delineation'),
+                                   sep = ': ',
+                                   collapse = '\n')
+
+            helper_code <- glue('{id}.\nmapview::mapview(sf::st_read("{wd}/{f}")) + ',
+                                'mapview::mapview(sf::st_read("{pf}"))',
+                                id = 1:length(files_to_inspect),
+                                wd = inspection_dir,
+                                f = files_to_inspect,
+                                pf = temp_point) %>%
+                paste(collapse = '\n\n')
+
+            msg <- glue('Visually inspect the watershed boundary candidate shapefiles ',
+                        'by pasting the mapview lines below into R.\n\n{hc}\n\n',
+                        'Enter the number corresponding to the ',
+                        'one that looks most legit, or select one or more tuning ',
+                        'options (e.g. "SBRI" without quotes). You usually won\'t ',
+                        'need to tune anything. If you aren\'t ',
+                        'sure which delineation is correct, get a site manager to verify:\n',
+                        'request_site_manager_verification(type=\'wb delin\', ',
+                        'network, domain) [function not yet built]\n\nChoices:\n{sel}\n\nEnter choice(s) here > ',
+                        hc = helper_code,
+                        sel = wb_selections)
+            # td = inspection_dir)
+
+            resp <- get_response_mchar(
+                msg = msg,
+                possible_resps = paste(c(1:nshapes, 'S', 'B', 'R', 'I', 'a'),
+                                       collapse = ''),
+                allow_alphanumeric_response = FALSE)
+            #
+            # if('n' %in% resp){
+            #     unlink(write_dir,
+            #            recursive = TRUE)
+            #     print(glue('Moving on. You haven\'t seen the last of {s}!',
+            #                s = site_name))
+            #     return(1)
+            # }
+
+            if('a' %in% resp){
+                unlink(write_dir,
+                       recursive = TRUE)
+                print(glue('Aborted. Any completed delineations have been saved.'))
+                return(2)
+            }
+
+            if('S' %in% resp){
+                burn_streams <- TRUE
+            } else {
+                burn_streams <- FALSE
+            }
+
+            if('B' %in% resp){
+                breach_method <- 'basic'
+            } else {
+                breach_method <- 'lc'
+            }
+
+            if('R' %in% resp){
+                dem_resolution <- get_response_mchar(
+                    msg = paste0('Choose DEM resolution between 1 (low) and 14 (high)',
+                                 ' to pass to elevatr::get_elev_raster. For tiny ',
+                                 'watersheds, use 12-13. For giant ones, use 8-9.\n\n',
+                                 'Enter choice here > '),
+                    possible_resps = paste(1:14))
+                dem_resolution <- as.numeric(dem_resolution)
+            }
+
+            if('I' %in% resp){
+
+                bm <- ifelse(breach_method == 'basic',
+                             'whitebox::wbt_breach_depressions',
+                             'whitebox::wbt_breach_depressions_least_cost')
+
+                new_options <- paste(paste0('[',
+                                            c('S', 'M', 'L'),
+                                            ']'),
+                                     c('0.001', '0.01', '0.1'),
+                                     sep = ': ',
+                                     collapse = '\n')
+
+                resp2 <- get_response_1char(
+                    msg = glue('Pick the size of the elevation increment to pass to ',
+                               bm, '.\n\n', new_options, '\n\nEnter choice here > '),
+                    possible_chars = c('S', 'M', 'L'))
+
+                flat_increment <- switch(resp2,
+                                         S = 0.001,
+                                         M = 0.01,
+                                         L = 0.1)
+            }
+
+            if(! grepl('[0-9]', resp)){
+
+                selection <- delineate_watershed_apriori_recurse(
+                    lat = lat,
+                    long = long,
+                    crs = crs,
+                    site_name = site_name,
+                    dem_resolution = dem_resolution,
+                    flat_increment = flat_increment,
+                    breach_method = breach_method,
+                    burn_streams = burn_streams,
+                    scratch_dir = scratch_dir,
+                    write_dir = write_dir,
+                    dev_machine_status = dev_machine_status,
+                    verbose = verbose)
+
+                return(selection)
+            }
+
+            selection <- files_to_inspect[as.numeric(resp)]
+
+            move_shapefiles(shp_files = selection,
+                            from_dir = inspection_dir,
+                            to_dir = write_dir,
+                            new_name_vec = site_name)
+
+            message(glue('Selection {s}:\n\t{sel}\nwas written to:\n\t{sdr}',
+                         s = resp,
+                         sel = selection,
+                         sdr = write_dir))
+        }
+
+        return(selection)
+    }
+
     #the workhorse
-    delineate_watershed_apriori <- function(lat, long, crs,
-                                            machine_status = 'n00b',
+    delineate_watershed_apriori <- function(lat,
+                                            long,
+                                            crs,
+                                            site_name,
+                                            dem_resolution = NULL,
+                                            flat_increment = NULL,
+                                            breach_method = 'basic',
+                                            burn_streams = FALSE,
+                                            scratch_dir = tempdir(),
+                                            dev_machine_status = 'n00b',
                                             verbose = FALSE){
 
         #lat: numeric representing latitude in decimal degrees
@@ -214,22 +735,57 @@ delineate_watershed_from_point <- function(lat,
         #long: numeric representing longitude in decimal degrees
         #   (negative indicates west of prime meridian)
         #crs: numeric representing the coordinate reference system (e.g. WSG84)
-        #machine_status: either '1337', indicating that your machine has >= 16 GB
+        #dem_resolution: optional integer 1-14. the granularity of the DEM that is used for
+        #   delineation. this argument is passed directly to the z parameter of
+        #   elevatr::get_elev_raster. 1 is low resolution; 14 is high. If NULL,
+        #   this is determined automatically.
+        #flat_increment: float or NULL. Passed to
+        #   whitebox::wbt_breach_depressions_least_cost
+        #   or whitebox::wbt_breach_depressions, depending on the value
+        #   of breach_method (see next).
+        #breach_method: string. Either 'basic', which invokes whitebox::wbt_breach_depressions,
+        #   or 'lc', which invokes whitebox::wbt_breach_depressions_least_cost
+        #burn_streams: logical. if TRUE, both whitebox::wbt_burn_streams_at_roads
+        #   and whitebox::wbt_fill_burn are called on the DEM, using road and stream
+        #   layers from OpenStreetMap.
+        #scratch_dir: the directory where intermediate files will be dumped. This
+        #   is a randomly generated temporary directory if not specified.
+        #dev_machine_status: either '1337', indicating that your machine has >= 16 GB
         #   RAM, or 'n00b', indicating < 16 GB RAM. DEM resolution is chosen accordingly
         #verbose: logical. determines the amount of informative messaging during run
 
         #returns the location of candidate watershed boundary files
 
-        tmp <- tempdir()
-        inspection_dir <- glue(tmp, '/INSPECT_THESE')
-        dem_f <- glue(tmp, '/dem.tif')
-        point_f <- glue(tmp, '/point.shp')
-        d8_f <- glue(tmp, '/d8_pntr.tif')
-        flow_f <- glue(tmp, '/flow.tif')
+        # tmp <- tempdir()
+        # tmp <- str_replace_all(tmp, '\\\\', '/')
+
+        if(! is.null(dem_resolution) && ! is.numeric(dem_resolution)){
+            stop('dem_resolution must be a numeric integer or NULL')
+        }
+        if(! is.null(flat_increment) && ! is.numeric(flat_increment)){
+            stop('flat_increment must be numeric or NULL')
+        }
+        if(! breach_method %in% c('lc', 'basic')) stop('breach_method must be "basic" or "lc"')
+        if(! is.logical(burn_streams)) stop('burn_streams must be logical')
+
+        inspection_dir <- glue(scratch_dir, '/INSPECT_THESE')
+        point_dir <- glue(scratch_dir, '/POINT')
+        dem_f <- glue(scratch_dir, '/dem.tif')
+        point_f <- glue(scratch_dir, '/point.shp')
+        streams_f <- glue(scratch_dir, '/streams.shp')
+        roads_f <- glue(scratch_dir, '/roads.shp')
+        d8_f <- glue(scratch_dir, '/d8_pntr.tif')
+        flow_f <- glue(scratch_dir, '/flow.tif')
 
         dir.create(path = inspection_dir,
                    showWarnings = FALSE)
 
+        dir_clean <- list.files(inspection_dir)
+
+        if(length(dir_clean) > 0) {
+            file.remove(paste(inspection_dir, dir_clean, sep = '/'))
+        }
+
         proj <- choose_projection(lat = lat,
                                   long = long)
 
@@ -241,7 +797,7 @@ delineate_watershed_from_point <- function(lat,
         # sf::st_transform(4326) #WGS 84 (would be nice to do this unprojected)
 
         #prepare for delineation loops
-        buffer_radius <- 100
+        buffer_radius <- 1000
         dem_coverage_insufficient <- FALSE
         while_loop_begin <- TRUE
 
@@ -252,71 +808,118 @@ delineate_watershed_from_point <- function(lat,
 
             while_loop_begin <- FALSE
 
-            if(machine_status == '1337'){
-                dem_resolution <- case_when(
-                    buffer_radius <= 1e4 ~ 12,
-                    buffer_radius == 1e5 ~ 11,
-                    buffer_radius == 1e6 ~ 10,
-                    buffer_radius == 1e7 ~ 8,
-                    buffer_radius == 1e8 ~ 6,
-                    buffer_radius == 1e9 ~ 4,
-                    buffer_radius >= 1e10 ~ 2)
-            } else if(machine_status == 'n00b'){
-                dem_resolution <- case_when(
-                    buffer_radius <= 1e4 ~ 10,
-                    buffer_radius == 1e5 ~ 8,
-                    buffer_radius == 1e6 ~ 6,
-                    buffer_radius == 1e7 ~ 4,
-                    buffer_radius == 1e8 ~ 2,
-                    buffer_radius >= 1e9 ~ 1)
-            } else {
-                stop('machine_status must be either "1337" or "n00b"')
+            if(is.null(dem_resolution)){
+                dem_resolution <- choose_dem_resolution(
+                    dev_machine_status = dev_machine_status,
+                    buffer_radius = buffer_radius)
+            }
+
+            if(verbose){
+
+                if(is.null(flat_increment)){
+                    fi <- 'NULL (auto)'
+                } else {
+                    fi <- as.character(flat_increment)
+                }
+
+                print(glue('Delineation specs for this attempt:\n',
+                           '\tsite_name: {st}; ',
+                           'dem_resolution: {dr}; flat_increment: {fi}\n',
+                           '\tbreach_method: {bm}; burn_streams: {bs}',
+                           st = site_name,
+                           dr = dem_resolution,
+                           fi = fi,
+                           bm = breach_method,
+                           bs = as.character(burn_streams),
+                           .trim = FALSE))
             }
 
             site_buf <- sf::st_buffer(x = site,
                                       dist = buffer_radius)
-            dem <- elevatr::get_elev_raster(locations = site_buf,
-                                            z = dem_resolution,
-                                            verbose = verbose)
 
+            dem <- expo_backoff(
+                expr = {
+                    elevatr::get_elev_raster(locations = site_buf,
+                                             z = dem_resolution,
+                                             verbose = FALSE)
+                },
+                max_attempts = 5
+            )
+
+            # terra::writeRaster(x = dem,
             raster::writeRaster(x = dem,
                                 filename = dem_f,
                                 overwrite = TRUE)
 
+            #loses projection?
             sf::st_write(obj = site,
                          dsn = point_f,
                          delete_layer = TRUE,
                          quiet = TRUE)
 
+            if(burn_streams){
+                get_osm_roads(extent_raster = dem,
+                              outfile = roads_f)
+                get_osm_streams(extent_raster = dem,
+                                outfile = streams_f)
+            }
+
             whitebox::wbt_fill_single_cell_pits(dem = dem_f,
-                                                output = dem_f)
+                                                output = dem_f) %>% invisible()
+
+            if(breach_method == 'basic'){
+
+                whitebox::wbt_breach_depressions(
+                    dem = dem_f,
+                    output = dem_f,
+                    flat_increment = flat_increment) %>% invisible()
+
+            } else if(breach_method == 'lc'){
 
-            whitebox::wbt_breach_depressions(dem = dem_f,
-                                             output = dem_f,
-                                             flat_increment = 0.01)
+                whitebox::wbt_breach_depressions_least_cost(
+                    dem = dem_f,
+                    output = dem_f,
+                    dist = 10000, #maximum trench length
+                    fill = TRUE,
+                    flat_increment = flat_increment) %>% invisible()
+            }
+            #also see wbt_fill_depressions for when there are open pit mines
+
+            if(burn_streams){
+
+                #the secret is that BOTH of these burns can work in tandem!
+                whitebox::wbt_burn_streams_at_roads(dem = dem_f,
+                                                    streams = streams_f,
+                                                    roads = roads_f,
+                                                    output = dem_f,
+                                                    width = 50) %>% invisible()
+                whitebox::wbt_fill_burn(dem = dem_f,
+                                        streams = streams_f,
+                                        output = dem_f) %>% invisible()
+            }
 
             whitebox::wbt_d8_pointer(dem = dem_f,
-                                     output = d8_f)
+                                     output = d8_f) %>% invisible()
 
             whitebox::wbt_d8_flow_accumulation(input = dem_f,
                                                output = flow_f,
-                                               out_type = 'catchment area')
+                                               out_type = 'catchment area') %>% invisible()
 
-            snap1_f <- glue(tmp, '/snap1_jenson_dist150.shp')
+            snap1_f <- glue(scratch_dir, '/snap1_jenson_dist150.shp')
             whitebox::wbt_jenson_snap_pour_points(pour_pts = point_f,
                                                   streams = flow_f,
                                                   output = snap1_f,
-                                                  snap_dist = 150)
-            snap2_f <- glue(tmp, '/snap2_standard_dist50.shp')
+                                                  snap_dist = 150) %>% invisible()
+            snap2_f <- glue(scratch_dir, '/snap2_standard_dist50.shp')
             whitebox::wbt_snap_pour_points(pour_pts = point_f,
                                            flow_accum = flow_f,
                                            output = snap2_f,
-                                           snap_dist = 50)
-            snap3_f <- glue(tmp, '/snap3_standard_dist150.shp')
+                                           snap_dist = 50) %>% invisible()
+            snap3_f <- glue(scratch_dir, '/snap3_standard_dist150.shp')
             whitebox::wbt_snap_pour_points(pour_pts = point_f,
                                            flow_accum = flow_f,
                                            output = snap3_f,
-                                           snap_dist = 150)
+                                           snap_dist = 150) %>% invisible()
 
             #the site has been snapped 3 different ways. identify unique snap locations.
             snap1 <- sf::st_read(snap1_f, quiet = TRUE)
@@ -327,19 +930,19 @@ delineate_watershed_from_point <- function(lat,
             if(! identical(snap1, snap3)) unique_snaps_f <- c(unique_snaps_f, snap3_f)
 
             #good for experimenting with snap specs:
-            # delineate_watershed_test2(tmp, point_f, flow_f,
+            # delineate_watershed_test2(scratch_dir, point_f, flow_f,
             #                           d8_f, 'standard', 1000)
 
             #delineate each unique location
             for(i in 1:length(unique_snaps_f)){
 
-                rgx <- stringr::str_match(unique_snaps_f[i],
+                rgx <- str_match(unique_snaps_f[i],
                                  '.*?_(standard|jenson)_dist([0-9]+)\\.shp$')
                 snap_method <- rgx[, 2]
                 snap_distance <- rgx[, 3]
 
                 wb_f <- glue('{path}/wb{n}_buffer{b}_{typ}_dist{dst}.tif',
-                             path = tmp,
+                             path = scratch_dir,
                              n = i,
                              b = buffer_radius,
                              typ = snap_method,
@@ -347,7 +950,7 @@ delineate_watershed_from_point <- function(lat,
 
                 whitebox::wbt_watershed(d8_pntr = d8_f,
                                         pour_pts = unique_snaps_f[i],
-                                        output = wb_f)
+                                        output = wb_f) %>% invisible()
 
                 wb <- raster::raster(wb_f)
 
@@ -357,8 +960,8 @@ delineate_watershed_from_point <- function(lat,
                                                     dem = dem)
 
                 if(verbose){
-                    print(glue('buffer radius: {br}; snap: {sn}/{tot}; ',
-                               'n intersecting cells: {ni}; pct intersect: {pct}',
+                    print(glue('site buffer radius: {br}; pour point snap: {sn}/{tot}; ',
+                               'n intersecting border cells: {ni}; pct intersect: {pct}',
                                br = buffer_radius,
                                sn = i,
                                tot = length(unique_snaps_f),
@@ -369,6 +972,8 @@ delineate_watershed_from_point <- function(lat,
                 if(smry$pct_wb_cells_intersect > 0.1 || smry$n_intersections > 5){
                     buffer_radius_new <- buffer_radius * 10
                     dem_coverage_insufficient <- TRUE
+                    print(glue('Hit DEM edge. Incrementing buffer.'))
+                    break
                 } else {
                     dem_coverage_insufficient <- FALSE
                     buffer_radius_new <- buffer_radius
@@ -383,18 +988,34 @@ delineate_watershed_from_point <- function(lat,
 
                     wb_sf <- sf::st_transform(wb_sf, 4326) #EPSG for WGS84
 
-                    wb_sf_f <- glue('{path}/wb{n}_BUF{b}{typ}DIST{dst}RES{res}.shp',
+                    ws_area_ha <- as.numeric(sf::st_area(wb_sf)) / 10000
+
+                    wb_sf <- wb_sf %>%
+                        mutate(site_name = !!site_name) %>%
+                        mutate(area = !!ws_area_ha)
+
+                    if(is.null(flat_increment)){
+                        flt_incrmt <- 'null'
+                    } else {
+                        flt_incrmt <- as.character(flat_increment)
+                    }
+
+                    wb_sf_f <- glue('{path}/wb{n}_BUF{b}{typ}DIST{dst}RES{res}',
+                                    'INC{inc}BREACH{brc}BURN{brn}.shp',
                                     path = inspection_dir,
                                     n = i,
-                                    b = buffer_radius,
+                                    b = sprintf('%d', buffer_radius),
                                     typ = snap_method,
                                     dst = snap_distance,
-                                    res = dem_resolution)
-
-                    sf::st_write(obj = wb_sf,
-                                 dsn = wb_sf_f,
-                                 delete_dsn = TRUE,
-                                 quiet = TRUE)
+                                    res = dem_resolution,
+                                    inc = flt_incrmt,
+                                    brc = breach_method,
+                                    brn = as.character(burn_streams))
+
+                    sw(sf::st_write(obj = wb_sf,
+                                    dsn = wb_sf_f,
+                                    delete_dsn = TRUE,
+                                    quiet = TRUE))
                 }
             }
 
@@ -412,77 +1033,22 @@ delineate_watershed_from_point <- function(lat,
         message('Beginning watershed delineation')
     }
 
-    inspection_dir <- sw(delineate_watershed_apriori(
+    tmp <- tempdir()
+
+    selection <- sw(delineate_watershed_apriori_recurse(
         lat = lat,
         long = long,
         crs = crs,
-        machine_status = machine_status,
+        site_name = write_name,
+        dem_resolution = NULL,
+        flat_increment = NULL,
+        breach_method = 'lc',
+        burn_streams = FALSE,
+        scratch_dir = tmp,
+        write_dir = write_dir,
+        dev_machine_status = 'n00b',
         verbose = verbose))
 
-    files_to_inspect <- list.files(path = inspection_dir,
-                                   pattern = '.shp')
-
-    #if only one delineation, write it to write_dir
-    if(length(files_to_inspect) == 1){
-
-        selection <- files_to_inspect[1]
-
-        move_shapefiles(shp_files = selection,
-                        from_dir = inspection_dir,
-                        to_dir = write_dir)
-
-        if(verbose){
-            message(glue('Delineation successful. Shapefile written to ',
-                         write_dir))
-        }
-
-    #otherwise, inspect all delineations and choose one
-    } else {
-
-        nshapes <- length(files_to_inspect)
-
-        wb_selections <- paste(paste0('[',
-                                      c(1:nshapes, 'A'),
-                                      ']'),
-                               c(files_to_inspect, 'Abort delineation'),
-                               sep = ': ',
-                               collapse = '\n')
-
-        helper_code <- glue('mapview::mapview(sf::st_read("{wd}/{f}"))',
-                            wd = inspection_dir,
-                            f = files_to_inspect) %>%
-            paste(collapse = '\n\n')
-
-        msg <- glue('Visually inspect the watershed boundary candidate shapefiles ',
-                    'in {td}, then enter the number corresponding to the ',
-                    'one that looks most legit. Here\'s some ',
-                    'helper code you can paste into a separate R instance ',
-                    ':\n\n{hc}\n\nChoices:\n{sel}\n\nEnter choice here > ',
-                    hc = helper_code,
-                    sel = wb_selections,
-                    td = inspection_dir)
-
-        resp <- get_response_1char(msg = msg,
-                                   possible_chars = c(1:nshapes, 'A'))
-
-        if(resp == 'A'){
-            message('Delineation aborted')
-            return()
-        }
-
-        selection <- files_to_inspect[as.numeric(resp)]
-
-        move_shapefiles(shp_files = selection,
-                        from_dir = inspection_dir,
-                        to_dir = write_dir,
-                        new_name_vec = write_name)
-
-        message(glue('Selection {s}:\n\t{sel}\nwas written to:\n\t{sdr}',
-                     s = resp,
-                     sel = selection,
-                     sdr = write_dir))
-    }
-
     #calculate watershed area in hectares
     wb <- sf::st_read(glue('{d}/{s}.shp',
                            d = write_dir,
@@ -493,16 +1059,20 @@ delineate_watershed_from_point <- function(lat,
 
     #return the specifications of the correctly delineated watershed, and some
     #   other goodies
-    rgx <- stringr::str_match(selection,
+    rgx <- str_match(selection,
                      paste0('^wb[0-9]+_BUF([0-9]+)(standard|jenson)',
-                            'DIST([0-9]+)RES([0-9]+)\\.shp$'))
+                            'DIST([0-9]+)RES([0-9]+)INC([0-1\\.null]+)',
+                            'BREACH(basic|lc)BURN(TRUE|FALSE)\\.shp$'))
 
     deets <- list(name = write_name,
                   watershed_area_ha = ws_area_ha,
                   buffer_radius_m = as.numeric(rgx[, 2]),
                   snap_distance_m = as.numeric(rgx[, 4]),
                   snap_method = rgx[, 3],
-                  dem_resolution = as.numeric(rgx[, 5]))
+                  dem_resolution = as.numeric(rgx[, 5]),
+                  flat_increment = rgx[, 6],
+                  breach_method = rgx[, 7],
+                  burn_streams = rgx[, 8])
 
     return(deets)
 }
diff --git a/osm_temp.R b/osm_temp.R
deleted file mode 100644
index 76f24f4..0000000
--- a/osm_temp.R
+++ /dev/null
@@ -1,42 +0,0 @@
-dem_unprojected <- raster::projectRaster(dem, crs = '+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs')
-dem_bounds <- sf::st_bbox(dem_unprojected)
-# highway_types <- c('motorway', 'trunk', 'primary')
-highway_types <- c('motorway', 'trunk', 'primary', 'secondary', 'tertiary')
-highway_types <- c(highway_types,
-                   paste(highway_types, 'link', sep = '_'))
-roads_query <- opq(dem_bounds) %>%
-    add_osm_feature(key = 'highway',
-                    value = highway_types)
-roads <- osmdata_sf(roads_query)
-plot(roads$osm_lines)
-
-streams_query <- opq(dem_bounds) %>%
-    add_osm_feature(key = 'waterway',
-                    value = c('river', 'stream'))
-streams <- osmdata_sf(streams_query)
-streams$osm_lines$geometry
-plot(streams$osm_lines)
-
-streams_f <- glue(tmp, '/streams.shp')
-roads_f <- glue(tmp, '/roads.shp')
-sf::st_transform(streams$osm_lines$geometry,
-                 crs = proj) %>%
-    sf::st_write(dsn = tmp,
-                 layer = 'streams.shp',
-                 driver = 'ESRI Shapefile',
-                 delete_layer = TRUE,
-                 silent = TRUE)
-sf::st_transform(roads$osm_lines$geometry,
-                 crs = proj) %>%
-    sf::st_write(dsn = tmp,
-                 layer = 'roads.shp',
-                 driver = 'ESRI Shapefile',
-                 delete_layer = TRUE,
-                 silent = TRUE)
-
-whitebox::wbt_burn_streams_at_roads(dem = dem_f,
-                                    streams = streams_f,
-                                    roads = roads_f,
-                                    output = dem_f,
-                                    width = 50)
-

From 993dc79e188189df570b539e434ddd0917ee0b56 Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Mon, 15 Mar 2021 08:58:53 -0400
Subject: [PATCH 10/37] updated docs

---
 4_interactive_watershed_delineation.R | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/4_interactive_watershed_delineation.R b/4_interactive_watershed_delineation.R
index b896397..5619abe 100755
--- a/4_interactive_watershed_delineation.R
+++ b/4_interactive_watershed_delineation.R
@@ -46,6 +46,15 @@ delineate_watershed_from_point <- function(lat,
     #       which snaps to the nearest flow line
     #   dem_resolution: passed to elevatr::get_elev_raster (z parameter).
     #       depends on supplied machine_status
+    #   flat_increment: see whitebox::wbt_breach_depressions or
+    #       whitebox::wbt_breach_depressions_least_cost
+    #   breach_method: string. Either 'basic', indicating that
+    #       whitebox::wbt_breach_depressions was used, or 'lc', indicating
+    #       whitebox::wbt_breach_depressions_least_cost (which less readily
+    #       alters the DEM)
+    #   burn_streams: TRUE or FALSE, indicating whether
+    #       whitebox::wbt_burn_streams_at_roads and whitebox::wbt_fill_burn were
+    #       used on the DEM
 
     sm <- suppressMessages
     sw <- suppressWarnings

From 93aed2abe7869a27fe585ba75d2dc49c4f68686a Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Tue, 16 Mar 2021 17:14:46 -0400
Subject: [PATCH 11/37] notes and clarifications

---
 4_interactive_watershed_delineation.R | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/4_interactive_watershed_delineation.R b/4_interactive_watershed_delineation.R
index 5619abe..5d33e5b 100755
--- a/4_interactive_watershed_delineation.R
+++ b/4_interactive_watershed_delineation.R
@@ -1,16 +1,19 @@
 #documentation and helper functions are included in the function definition below.
-#if you're in Rstudio, collapse the function with Alt+o.
-#there's a demo call included at the bottom.
+#If you're in Rstudio, collapse the function with Alt+o (Windows/Linux) or Cmd+Opt+o (Mac).
+#There's a demo call included at the bottom.
+#Spaces in write_dir may cause errors on Windows.
 
 library(tidyverse)
 library(glue)
 library(sf)
 library(elevatr)
 library(raster)
-library(whitebox)
+library(whitebox) #this can't be installed from CRAN. google "install whitebox R"
 library(terra)
 library(mapview)
 library(osmdata) #only needed if you want to burn streams into the DEM
+#NOTE: you may also need to install the lwgeom, rgdal, and e1071 packages.
+#   (you'll be notified in error messages if so)
 
 delineate_watershed_from_point <- function(lat,
                                            long,
@@ -393,6 +396,10 @@ delineate_watershed_from_point <- function(lat,
 
             if(inherits(results, 'try-error')){
 
+                if(attempt_i == max_attempts){
+                    stop(attr(results, 'condition'))
+                }
+
                 backoff <- runif(n = 1,
                                  min = 0,
                                  max = 2^attempt_i - 1)
@@ -603,7 +610,8 @@ delineate_watershed_from_point <- function(lat,
                                    sep = ': ',
                                    collapse = '\n')
 
-            helper_code <- glue('{id}.\nmapview::mapview(sf::st_read("{wd}/{f}")) + ',
+            helper_code <- glue('{id}.\nmapview::mapviewOptions(fgb = FALSE);',
+                                'mapview::mapview(sf::st_read("{wd}/{f}")) + ',
                                 'mapview::mapview(sf::st_read("{pf}"))',
                                 id = 1:length(files_to_inspect),
                                 wd = inspection_dir,
@@ -612,7 +620,7 @@ delineate_watershed_from_point <- function(lat,
                 paste(collapse = '\n\n')
 
             msg <- glue('Visually inspect the watershed boundary candidate shapefiles ',
-                        'by pasting the mapview lines below into R.\n\n{hc}\n\n',
+                        'by pasting the mapview lines below into a separate instance of R.\n\n{hc}\n\n',
                         'Enter the number corresponding to the ',
                         'one that looks most legit, or select one or more tuning ',
                         'options (e.g. "SBRI" without quotes). You usually won\'t ',

From 61ec58e3012c9d2619116d5e62836ebfbc1d9711 Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Tue, 9 Nov 2021 09:03:12 -0700
Subject: [PATCH 12/37] added sf::st_make_valid to delineator; many updates yet
 to be incorporated

---
 4_interactive_watershed_delineation.R | 270 +++++++++++++-------------
 5_summarize_nlcd.R                    | 138 +++++++++++++
 2 files changed, 269 insertions(+), 139 deletions(-)
 create mode 100644 5_summarize_nlcd.R

diff --git a/4_interactive_watershed_delineation.R b/4_interactive_watershed_delineation.R
index 5d33e5b..e120e01 100755
--- a/4_interactive_watershed_delineation.R
+++ b/4_interactive_watershed_delineation.R
@@ -1,3 +1,10 @@
+#2021-04-01
+#mike vlah
+#vlahm13@gmail.com
+
+#if this can't delineate your watershed, email me! there are heaps of updates
+#that i haven't yet incorporated into this version of the tool
+
 #documentation and helper functions are included in the function definition below.
 #If you're in Rstudio, collapse the function with Alt+o (Windows/Linux) or Cmd+Opt+o (Mac).
 #There's a demo call included at the bottom.
@@ -580,156 +587,140 @@ delineate_watershed_from_point <- function(lat,
                          delete_dsn = TRUE,
                          quiet = TRUE)
 
-        #if only one delineation, write it into macrosheds storage
-        if(length(files_to_inspect) == 1){
-
-            selection <- files_to_inspect[1]
-
-            move_shapefiles(shp_files = selection,
-                            from_dir = inspection_dir,
-                            to_dir = write_dir)
+        nshapes <- length(files_to_inspect)
+
+        wb_selections <- paste(paste0('[',
+                                      c(1:nshapes, 'S', 'B', 'R', 'I', 'a'),
+                                      ']'),
+                               c(files_to_inspect,
+                                 'Burn streams into the DEM (may handle road-stream intersections)',
+                                 'Use more aggressive breaching method',
+                                 'Select DEM resolution',
+                                 'Set flat_increment',
+                                 # 'Next (skip this one for now)',
+                                 'Abort delineation'),
+                               sep = ': ',
+                               collapse = '\n')
+
+        helper_code <- glue('{id}.\nmapview::mapviewOptions(fgb = FALSE);',
+                            'mapview::mapview(sf::st_read("{wd}/{f}")) + ',
+                            'mapview::mapview(sf::st_read("{pf}"))',
+                            id = 1:length(files_to_inspect),
+                            wd = inspection_dir,
+                            f = files_to_inspect,
+                            pf = temp_point) %>%
+            paste(collapse = '\n\n')
+
+        msg <- glue('Visually inspect the watershed boundary candidate shapefiles ',
+                    'by pasting the mapview lines below into a separate instance of R.\n\n{hc}\n\n',
+                    'Enter the number corresponding to the ',
+                    'one that looks most legit, or select one or more tuning ',
+                    'options (e.g. "SBRI" without quotes). You usually won\'t ',
+                    'need to tune anything. If you aren\'t ',
+                    'sure which delineation is correct, get a site manager to verify:\n',
+                    'request_site_manager_verification(type=\'wb delin\', ',
+                    'network, domain) [function not yet built]\n\nChoices:\n{sel}\n\nEnter choice(s) here > ',
+                    hc = helper_code,
+                    sel = wb_selections)
+        # td = inspection_dir)
 
-            message(glue('Delineation successful. Shapefile written to ',
-                         write_dir))
+        resp <- get_response_mchar(
+            msg = msg,
+            possible_resps = paste(c(1:nshapes, 'S', 'B', 'R', 'I', 'a'),
+                                   collapse = ''),
+            allow_alphanumeric_response = FALSE)
+        #
+        # if('n' %in% resp){
+        #     unlink(write_dir,
+        #            recursive = TRUE)
+        #     print(glue('Moving on. You haven\'t seen the last of {s}!',
+        #                s = site_name))
+        #     return(1)
+        # }
+
+        if('a' %in% resp){
+            unlink(write_dir,
+                   recursive = TRUE)
+            print(glue('Aborted. Any completed delineations have been saved.'))
+            return(2)
+        }
 
-            #otherwise, technician must inspect all delineations and choose one
+        if('S' %in% resp){
+            burn_streams <- TRUE
         } else {
+            burn_streams <- FALSE
+        }
 
-            nshapes <- length(files_to_inspect)
-
-            wb_selections <- paste(paste0('[',
-                                          c(1:nshapes, 'S', 'B', 'R', 'I', 'a'),
-                                          ']'),
-                                   c(files_to_inspect,
-                                     'Burn streams into the DEM (may handle road-stream intersections)',
-                                     'Use more aggressive breaching method',
-                                     'Select DEM resolution',
-                                     'Set flat_increment',
-                                     # 'Next (skip this one for now)',
-                                     'Abort delineation'),
-                                   sep = ': ',
-                                   collapse = '\n')
-
-            helper_code <- glue('{id}.\nmapview::mapviewOptions(fgb = FALSE);',
-                                'mapview::mapview(sf::st_read("{wd}/{f}")) + ',
-                                'mapview::mapview(sf::st_read("{pf}"))',
-                                id = 1:length(files_to_inspect),
-                                wd = inspection_dir,
-                                f = files_to_inspect,
-                                pf = temp_point) %>%
-                paste(collapse = '\n\n')
-
-            msg <- glue('Visually inspect the watershed boundary candidate shapefiles ',
-                        'by pasting the mapview lines below into a separate instance of R.\n\n{hc}\n\n',
-                        'Enter the number corresponding to the ',
-                        'one that looks most legit, or select one or more tuning ',
-                        'options (e.g. "SBRI" without quotes). You usually won\'t ',
-                        'need to tune anything. If you aren\'t ',
-                        'sure which delineation is correct, get a site manager to verify:\n',
-                        'request_site_manager_verification(type=\'wb delin\', ',
-                        'network, domain) [function not yet built]\n\nChoices:\n{sel}\n\nEnter choice(s) here > ',
-                        hc = helper_code,
-                        sel = wb_selections)
-            # td = inspection_dir)
-
-            resp <- get_response_mchar(
-                msg = msg,
-                possible_resps = paste(c(1:nshapes, 'S', 'B', 'R', 'I', 'a'),
-                                       collapse = ''),
-                allow_alphanumeric_response = FALSE)
-            #
-            # if('n' %in% resp){
-            #     unlink(write_dir,
-            #            recursive = TRUE)
-            #     print(glue('Moving on. You haven\'t seen the last of {s}!',
-            #                s = site_name))
-            #     return(1)
-            # }
-
-            if('a' %in% resp){
-                unlink(write_dir,
-                       recursive = TRUE)
-                print(glue('Aborted. Any completed delineations have been saved.'))
-                return(2)
-            }
-
-            if('S' %in% resp){
-                burn_streams <- TRUE
-            } else {
-                burn_streams <- FALSE
-            }
-
-            if('B' %in% resp){
-                breach_method <- 'basic'
-            } else {
-                breach_method <- 'lc'
-            }
-
-            if('R' %in% resp){
-                dem_resolution <- get_response_mchar(
-                    msg = paste0('Choose DEM resolution between 1 (low) and 14 (high)',
-                                 ' to pass to elevatr::get_elev_raster. For tiny ',
-                                 'watersheds, use 12-13. For giant ones, use 8-9.\n\n',
-                                 'Enter choice here > '),
-                    possible_resps = paste(1:14))
-                dem_resolution <- as.numeric(dem_resolution)
-            }
+        if('B' %in% resp){
+            breach_method <- 'basic'
+        } else {
+            breach_method <- 'lc'
+        }
 
-            if('I' %in% resp){
+        if('R' %in% resp){
+            dem_resolution <- get_response_mchar(
+                msg = paste0('Choose DEM resolution between 1 (low) and 14 (high)',
+                             ' to pass to elevatr::get_elev_raster. For tiny ',
+                             'watersheds, use 12-13. For giant ones, use 8-9.\n\n',
+                             'Enter choice here > '),
+                possible_resps = paste(1:14))
+            dem_resolution <- as.numeric(dem_resolution)
+        }
 
-                bm <- ifelse(breach_method == 'basic',
-                             'whitebox::wbt_breach_depressions',
-                             'whitebox::wbt_breach_depressions_least_cost')
+        if('I' %in% resp){
 
-                new_options <- paste(paste0('[',
-                                            c('S', 'M', 'L'),
-                                            ']'),
-                                     c('0.001', '0.01', '0.1'),
-                                     sep = ': ',
-                                     collapse = '\n')
+            bm <- ifelse(breach_method == 'basic',
+                         'whitebox::wbt_breach_depressions',
+                         'whitebox::wbt_breach_depressions_least_cost')
 
-                resp2 <- get_response_1char(
-                    msg = glue('Pick the size of the elevation increment to pass to ',
-                               bm, '.\n\n', new_options, '\n\nEnter choice here > '),
-                    possible_chars = c('S', 'M', 'L'))
+            new_options <- paste(paste0('[',
+                                        c('S', 'M', 'L'),
+                                        ']'),
+                                 c('0.001', '0.01', '0.1'),
+                                 sep = ': ',
+                                 collapse = '\n')
 
-                flat_increment <- switch(resp2,
-                                         S = 0.001,
-                                         M = 0.01,
-                                         L = 0.1)
-            }
+            resp2 <- get_response_1char(
+                msg = glue('Pick the size of the elevation increment to pass to ',
+                           bm, '.\n\n', new_options, '\n\nEnter choice here > '),
+                possible_chars = c('S', 'M', 'L'))
 
-            if(! grepl('[0-9]', resp)){
-
-                selection <- delineate_watershed_apriori_recurse(
-                    lat = lat,
-                    long = long,
-                    crs = crs,
-                    site_name = site_name,
-                    dem_resolution = dem_resolution,
-                    flat_increment = flat_increment,
-                    breach_method = breach_method,
-                    burn_streams = burn_streams,
-                    scratch_dir = scratch_dir,
-                    write_dir = write_dir,
-                    dev_machine_status = dev_machine_status,
-                    verbose = verbose)
+            flat_increment <- switch(resp2,
+                                     S = 0.001,
+                                     M = 0.01,
+                                     L = 0.1)
+        }
 
-                return(selection)
-            }
+        if(! grepl('[0-9]', resp)){
+
+            selection <- delineate_watershed_apriori_recurse(
+                lat = lat,
+                long = long,
+                crs = crs,
+                site_name = site_name,
+                dem_resolution = dem_resolution,
+                flat_increment = flat_increment,
+                breach_method = breach_method,
+                burn_streams = burn_streams,
+                scratch_dir = scratch_dir,
+                write_dir = write_dir,
+                dev_machine_status = dev_machine_status,
+                verbose = verbose)
+
+            return(selection)
+        }
 
-            selection <- files_to_inspect[as.numeric(resp)]
+        selection <- files_to_inspect[as.numeric(resp)]
 
-            move_shapefiles(shp_files = selection,
-                            from_dir = inspection_dir,
-                            to_dir = write_dir,
-                            new_name_vec = site_name)
+        move_shapefiles(shp_files = selection,
+                        from_dir = inspection_dir,
+                        to_dir = write_dir,
+                        new_name_vec = site_name)
 
-            message(glue('Selection {s}:\n\t{sel}\nwas written to:\n\t{sdr}',
-                         s = resp,
-                         sel = selection,
-                         sdr = write_dir))
-        }
+        message(glue('Selection {s}:\n\t{sel}\nwas written to:\n\t{sdr}',
+                     s = resp,
+                     sel = selection,
+                     sdr = write_dir))
 
         return(selection)
     }
@@ -1001,9 +992,10 @@ delineate_watershed_from_point <- function(lat,
                         sf::st_as_sf() %>%
                         sf::st_buffer(dist = 0.1) %>%
                         sf::st_union() %>%
-                        sf::st_as_sf()#again? ugh.
+                        sf::st_as_sf() #again? ugh.
 
-                    wb_sf <- sf::st_transform(wb_sf, 4326) #EPSG for WGS84
+                    wb_sf <- sf::st_transform(wb_sf, 4326) %>% #EPSG for WGS84
+                        st_make_valid(wb_sf)
 
                     ws_area_ha <- as.numeric(sf::st_area(wb_sf)) / 10000
 
@@ -1097,5 +1089,5 @@ delineate_watershed_from_point <- function(lat,
 deets <- delineate_watershed_from_point(lat = 44.21013,
                                         long = -122.2571,
                                         crs = 4326,
-                                        write_dir = '~/some_path',
+                                        write_dir = '/some/path',
                                         write_name = 'ultimate_watershed')
diff --git a/5_summarize_nlcd.R b/5_summarize_nlcd.R
new file mode 100644
index 0000000..528d0ec
--- /dev/null
+++ b/5_summarize_nlcd.R
@@ -0,0 +1,138 @@
+library(raster)
+library(tidyverse)
+library(sf)
+library(rgee)
+
+#setup ####
+
+# setup for rgee is super annoying, and must be done anew on each machine
+rgee::ee_Initialize(email = 'vlahm13@gmail.com',
+                    drive = TRUE)
+
+abp_wb = sf::st_read('~/Downloads/Shape Files/Al-Beuhler_Pond.shp')
+cp_wb = sf::st_read('~/Downloads/Shape Files/Circuit_Pond.shp')
+gpp_wb = sf::st_read('~/Downloads/Shape Files/GardenPond_Pond.shp')
+
+get_nlcd_summary = function(watershed_boundary,
+                            epochs = c(1992, 2001, 2004, 2006, 2008, 2011, 2013, 2016)){
+
+    #summarizes landcover classes from NLCD for a spatial extent. Returns
+    #   a two-element list. element 1 is a tibble containing cell tallies for
+    #   each class and year (epoch).
+    #   Element 2 is a named vector of impervious surface percentages for each
+    #   epoch.
+
+    #watershed_boundary is an sf object representing the outline of a watershed.
+    #   NLCD categories will be summarized for that areal extent.
+    #epochs is a numeric vector of years for which NLCD data will be collected.
+    #   available epochs are 1992, 2001, 2004, 2006, 2008, 2011, 2013, 2016
+
+    if(! all(epochs %in% c(1992, 2001, 2004, 2006, 2008, 2011, 2013, 2016))){
+        stop('epochs must be one or more of 1992, 2001, 2004, 2006, 2008, 2011, 2013, 2016')
+    }
+
+    color_key = tribble(
+        ~id, ~hexcode, ~category,
+        11,"466b9f","Open water",
+        12,"d1def8","Perennial ice/snow",
+        21,"dec5c5","Developed, open space",
+        22,"d99282","Developed, low intensity",
+        23,"eb0000","Developed, medium intensity",
+        24,"ab0000","Developed high intensity",
+        31,"b3ac9f","Barren land (rock/sand/clay)",
+        41,"68ab5f","Deciduous forest",
+        42,"1c5f2c","Evergreen forest",
+        43,"b5c58f","Mixed forest",
+        51,"af963c","Dwarf scrub",
+        52,"ccb879","Shrub/scrub",
+        71,"dfdfc2","Grassland/herbaceous",
+        72,"d1d182","Sedge/herbaceous",
+        73,"a3cc51","Lichens",
+        74,"82ba9e","Moss",
+        81,"dcd939","Pasture/hay",
+        82,"ab6c28","Cultivated crops",
+        90,"b8d9eb","Woody wetlands",
+        95,"6c9fb8","Emergent herbaceous wetlands",
+    )
+
+    watershed_boundary_ee = sf_as_ee(watershed_boundary)
+
+    tmp = tempdir()
+    tmp = paste0(tmp, '/nlcd')
+    dir.create(tmp, showWarnings = FALSE)
+
+    pct_impervious_surface = rep(NA, length(epochs))
+    names(pct_impervious_surface) = epochs
+    for(i in seq_along(epochs)){
+
+        subset_id = paste0('NLCD', as.character(epochs[i]))
+
+        #write landcover rasters
+
+        img = ee$ImageCollection('USGS/NLCD')$
+            select('landcover')$
+            filter(ee$Filter$eq('system:index', subset_id))$
+            first()$
+            clip(watershed_boundary_ee)
+
+        ee_as_raster(image = img,
+                     region = watershed_boundary_ee$geometry(),
+                     dsn = paste0(tmp, '/', subset_id, '.tif'))
+
+        #get impervious surface %
+
+        img = ee$ImageCollection('USGS/NLCD')$
+            select('impervious')$
+            filter(ee$Filter$eq('system:index', subset_id))$
+            first()
+
+        imp_surf = tryCatch({
+            ee_extract(x = img,
+                       y = watershed_boundary,
+                       scale = 30,
+                       fun = ee$Reducer$mean(),
+                       sf = TRUE) %>%
+                pull(impervious)
+        }, error = function(e) NA)
+
+        pct_impervious_surface[i] = imp_surf
+    }
+
+    nlcd_summary = color_key %>%
+        select(id, category) %>%
+        mutate(id = as.character(id))
+
+    for(e in epochs){
+
+        subset_id = paste0('NLCD', as.character(e))
+
+        epoch_rst = raster::raster(paste0(tmp, '/', subset_id, '.tif'))
+        tabulated_values = raster::values(epoch_rst) %>%
+            table() %>%
+            as_tibble() %>%
+            dplyr::rename(id = '.',
+                          !!sym(paste0('CellTally', as.character(e))) := 'n')
+
+        nlcd_summary = full_join(nlcd_summary,
+                                 tabulated_values,
+                                 by = 'id')
+    }
+
+    return(list(landcover = nlcd_summary,
+                pct_impervious_surface = pct_impervious_surface))
+}
+
+# summarize landcover and % impervious surfaces ####
+
+abp_summary = get_nlcd_summary(watershed_boundary = abp_wb,
+                               epochs = 2016)
+
+cp_summary = get_nlcd_summary(watershed_boundary = cp_wb,
+                              epochs = 2016)
+
+gpp_summary = get_nlcd_summary(watershed_boundary = gpp_wb,
+                               epochs = 2016)
+
+saveRDS(abp_summary, '~/Downloads/al_beuhler_pond.rds')
+saveRDS(cp_summary, '~/Downloads/circuit_pond.rds')
+saveRDS(gpp_summary, '~/Downloads/gardenpond_pond.rds')

From 117810c9cdce5879a8acd5a04e3b93097d5ec5d8 Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Fri, 14 Jan 2022 10:47:56 -0700
Subject: [PATCH 13/37] commented example chunk

---
 4_interactive_watershed_delineation.R | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/4_interactive_watershed_delineation.R b/4_interactive_watershed_delineation.R
index e120e01..6cd9279 100755
--- a/4_interactive_watershed_delineation.R
+++ b/4_interactive_watershed_delineation.R
@@ -1086,8 +1086,9 @@ delineate_watershed_from_point <- function(lat,
     return(deets)
 }
 
-deets <- delineate_watershed_from_point(lat = 44.21013,
-                                        long = -122.2571,
-                                        crs = 4326,
-                                        write_dir = '/some/path',
-                                        write_name = 'ultimate_watershed')
+#example
+# deets <- delineate_watershed_from_point(lat = 44.21013,
+#                                         long = -122.2571,
+#                                         crs = 4326,
+#                                         write_dir = '/some/path',
+#                                         write_name = 'ultimate_watershed')

From df60a1418d801f3b4c828b49b65ca05bbf5cf8f9 Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Thu, 28 Jul 2022 09:21:38 -0600
Subject: [PATCH 14/37] compare_point_to_nhd.R

---
 compare_point_to_nhd.R | 146 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 146 insertions(+)
 create mode 100644 compare_point_to_nhd.R

diff --git a/compare_point_to_nhd.R b/compare_point_to_nhd.R
new file mode 100644
index 0000000..dae336c
--- /dev/null
+++ b/compare_point_to_nhd.R
@@ -0,0 +1,146 @@
+library(tidyverse)
+library(nhdplusTools)
+library(sf)
+library(mapview)
+mv <- mapview::mapview
+
+options(timeout = 5000)
+
+#setup ####
+
+#establish this path to local nhd highres data
+nhd_hr_dir <- '~/git/macrosheds/data_acquisition/data/general/nhd_hr'
+#and this one if you want to save interactive map outputs (also activate commented chunk in loop)
+# mapview_save_dir <- '~/git/macrosheds/data_acquisition/output/sites_vs_NHD'
+
+buf <- function(site, buf_dist){
+
+    site_buf <- sf::st_buffer(x = site,
+                              dist = buf_dist)
+    site_box <- st_bbox(site_buf)
+
+    subset_file <- tempfile(fileext = '.gpkg')
+
+    subset <- try({
+        nhdplusTools::subset_nhdplus(bbox = site_box,
+                                     output_file = subset_file,
+                                     nhdplus_data = 'download',
+                                     return_data = TRUE,
+                                     overwrite = FALSE,
+                                     out_prj = 4326) %>%
+            suppressMessages()
+    }, silent = TRUE)
+
+    if(inherits(subset, 'try-error') || ! length(subset)){# || nrow(subset[[1]]) < 2){
+        print('incrementing buffer distance by 500')
+        buf_dist <- buf_dist + 500
+        if(buf_dist > 5000) stop()
+        buf(site = site, buf_dist)
+    } else {
+        return(list(subset = subset, box = site_box))
+    }
+}
+
+# site_csv <- read_csv('~/git/macrosheds/data_acquisition/data/general/site_data.csv')
+# sites = tribble(
+#     ~site_code, ~domain, ~latitude, ~longitude, ~epsg,
+#     'a', 'b', 41.98732, -72.60537, 4269,
+#     'a', 'b', 38.59614, -77.05603, 4269,
+# )
+
+sites = read_csv('~/Downloads/duplicate_COMIDS.csv') %>%
+    select(-site_name) %>%
+    rename(latitude = lat, longitude = lon, site_code = nhdplus_id, site_name = long_name) %>%
+    mutate(epsg = 4269, NHD_COMID = NULL)
+# sites=sites[4:nrow(sites),]
+
+sites$NHD_COMID <- '?'
+total_len <- nrow(sites)
+
+# loop 1: NHDPlusV2 or NHD-HR ####
+
+#this loop is for identifying whether a point is on the NHDPlusV2 or the NHD-HR,
+#   or neither. for NHM seg_ids, see the next loop
+prev_huc4 <- 'none'
+for(i in 1:total_len){
+
+    print('---')
+    print(i)
+    site <- sites[i, ]
+    site_code <- site$site_code
+    epsg <- site$epsg
+
+    print(paste('site:', site_code))
+    print(paste('name:', sites$site_name[i]))
+
+    site <- sf::st_point(c(site$longitude, site$latitude)) %>%
+                         sf::st_sfc(crs = site$epsg)
+
+    nextt <- FALSE
+    comid <- tryCatch({
+        nhdplusTools::discover_nhdplus_id(site)
+    }, error = function(e) nextt <<- TRUE)
+    if(nextt) {
+        print('couldnt find comid')
+        next
+    }
+
+    out <- suppressWarnings(buf(site = site, 1000))
+    subset <- out$subset
+    site_box <- out$box
+
+    huc12 <- get_huc12(site)
+    # print(huc12$huc12)
+    huc4 <- substr(huc12$huc12, 1, 4)[1]
+    nhdplusTools::download_nhdplushr(nhd_hr_dir,
+                                     hu_list = huc4) %>%
+        invisible()
+
+    if(huc4 != prev_huc4){
+        HRflowlines <- nhdplusTools::get_nhdplushr(file.path(nhd_hr_dir,
+                                                             substr(huc4, 1, 2)),
+                                                   file.path(nhd_hr_dir,
+                                                             paste0(huc4, '.gpkg')),
+                                                   layers = 'NHDFlowline',
+                                                   proj = epsg)$NHDFlowline
+    } else {
+        print('using previous NHD HR HUC')
+    }
+
+    prev_huc4 <- huc4
+
+    NHD_HR <- suppressWarnings(sf::st_crop(HRflowlines, site_box))
+
+    NHDPlus <- subset$NHDFlowline_Network %>%
+        st_transform(crs = epsg)
+
+    dist_to_nearest_NHDPlus_flowline <- min(st_distance(NHDPlus, site))
+    print(paste('Dist to NHDPlus:', round(dist_to_nearest_NHDPlus_flowline, 2), 'm'))
+    dist_to_nearest_NHDHR_flowline <- min(st_distance(NHD_HR, site))
+    print(paste('Dist to NHD_HR:', round(dist_to_nearest_NHDHR_flowline, 2), 'm'))
+
+    options(scipen = 100)
+    xx = mv(NHD_HR, color = 'darkslategray3') + mv(NHDPlus, color='deepskyblue4') + mv(site, color='red')
+    print(xx)
+
+    # mapview_save_path <- file.path(mapview_save_dir,
+    #                                paste0(dmn, '_', site_code, '.html'))
+    # mapview::mapshot(xx,
+    #                  url = mapview_save_path)
+    # print(paste('map saved to', mapview_save_path))
+    # print(xx)
+
+    system('spd-say "chili chili chili"')
+    x <- readline(cat('This point is on: [A] an NHDPlus flowline, [B] an NHD_HR flowline, or [C] neither >\n'))
+
+    if(x == 'A'){
+        sites[i, 'NHD_COMID'] <- as.character(comid)
+        print(comid)
+    } else if(x == 'B'){
+        sites[i, 'NHD_COMID'] <- 'HR only'
+    } else if(x == 'C'){
+        sites[i, 'NHD_COMID'] <- 'too small'
+    } else {
+        stop(paste("'A', 'B', or 'C'"))
+    }
+}

From 79ce2b672106485f903b0047e1441c81f8ab5877 Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Thu, 28 Jul 2022 12:05:17 -0600
Subject: [PATCH 15/37] updated 2_batch_summary_nhd.R

---
 2_batch_summary_nhd.R | 211 +++++++++++++++++++++++++++++-------------
 1 file changed, 149 insertions(+), 62 deletions(-)

diff --git a/2_batch_summary_nhd.R b/2_batch_summary_nhd.R
index 428e947..b479cc1 100755
--- a/2_batch_summary_nhd.R
+++ b/2_batch_summary_nhd.R
@@ -1,5 +1,6 @@
 #batch summarizing of watershed data for the continental USA
 #Mike Vlah (vlahm13@gmail.com)
+#updated 2022-07-28
 
 #use these tools to:
     #1. acquire NHDPlusV2 COMIDs based on lat/long
@@ -16,25 +17,43 @@
 #NOTE: these tools necessarily download a lot of large datasets and store them
 #in memory. keep an eye on your usage.
 
-library(stringr)
-library(dplyr)
+
 library(plyr)
-# remotes::install_github("USGS-R/nhdplusTools")
+library(tidyverse)
 library(nhdplusTools)
-# remotes::install_github("jsta/nhdR")
 library(nhdR)
 library(RCurl)
 library(sf)
-# library(MODISTools)
+library(mapview)
+
+mv = mapview::mapview
+
+options(timeout = 5000)
+
+#establish save location for NHD HR files (some place you don't mind accumulating a few gigs).
+#you could make it a temporary directory.
+nhd_hr_dir = '~/git/macrosheds/data_acquisition/data/general/nhd_hr'
+
+#this is where maps of site, NHDPlusV2 flowlines, NHD HR flowlines will be saved
+mapview_save_dir <- '~/git/charlotte_observer/poultry_farms/out/monitoring_location_maps'
 
 # 1. setup and helper functions ####
 
-#set your working directory to the location of your site data file.
-#   site data must include latitude and longitude columns (decimal degrees)
-# setwd('~/Desktop/untracked/watershed_geohax/')
-sites = read.csv('site_data.csv', stringsAsFactors=FALSE)
 WGS84 = 4326 #EPSG code for coordinate reference system
 
+#create site data for example run
+sites = tribble(
+    ~region, ~sitecode, ~sitename, ~latitude, ~longitude,
+    'AZ',     'SC',                   'Sycamore Creek',  33.7532, -111.5060,
+    'CT',     'Unio', 'Farmington River at Unionville',  41.7555,  -72.8870,
+    'MD',     'POBR',                    'Pond Branch',  39.4803,  -76.6875,
+    'NC',     'UEno',                 'East Eno River',  36.1359,  -79.1588,
+    'VT',     'Pass',               'Passumpsic River',  44.3656,  -72.0393,
+    'WI',     'BRW',                   'Brewery Creek',  43.1250,  -89.6350
+)
+# ) %>% st_as_sf(coords = c('longitude', 'latitude'), crs = WGS84)
+
+
 comid_from_point = function(lat, long, CRS) {
     pt = sf::st_point(c(long, lat))
     ptc = sf::st_sfc(pt, crs=CRS)
@@ -50,6 +69,36 @@ vpu_from_point = function(lat, long, CRS) {
     return(VPU)
 }
 
+get_nearby_nhd <- function(lat, long, CRS, buf_dist = 1000){
+
+    site = sf::st_point(c(long, lat)) %>% sf::st_sfc(crs = CRS)
+
+    site_buf <- sf::st_buffer(x = site,
+                              dist = buf_dist)
+    site_box <- st_bbox(site_buf)
+
+    subset_file <- tempfile(fileext = '.gpkg')
+
+    subset <- try({
+        nhdplusTools::subset_nhdplus(bbox = site_box,
+                                     output_file = subset_file,
+                                     nhdplus_data = 'download',
+                                     return_data = TRUE,
+                                     overwrite = FALSE,
+                                     out_prj = 4326) %>%
+            suppressMessages()
+    }, silent = TRUE)
+
+    if(inherits(subset, 'try-error') || ! length(subset)){# || nrow(subset[[1]]) < 2){
+        print('incrementing buffer distance by 500')
+        buf_dist <- buf_dist + 500
+        if(buf_dist > 20000) stop('no streamlines within 20000 meter radius?')
+        get_nearby_nhd(lat = lat, long = long, CRS = CRS, buf_dist = buf_dist)
+    } else {
+        return(list(nhd_subset = subset, bbox = site_box))
+    }
+}
+
 #this calculates how far along a reach any given point falls. That way when we pull in
 #watershed summary data for a reach, we can adjust it according to how much
 #of the total upstream area actually contributes to the point in question.
@@ -62,6 +111,14 @@ calc_reach_prop = function(VPU, COMID, lat, long, CRS, quiet=FALSE){
             ' Fortunately, each component need only be downloaded once.'))
     }
 
+    # url <- nhdR:::get_plus_remotepath(14, component = 'NHDSnapshot')
+    # cc = get_nhdplus(comid = COMID)
+    # nhdplusTools::download_nhdplusv2(outdir = '~/Desktop/untracked/nhdplusv2',
+    #                                  url = 'https://edap-ow-data-commons.s3.amazonaws.com/NHDPlusV21/Data/NHDPlusSA/NHDPlus03N/NHDPlusV21_SA_03N_03a_CatSeed_01.7z')
+    # nhdplusTools::download_nhdplusv2(outdir = '~/Desktop/untracked/nhdplusv2')
+    # nhd_plus_get(vpu = VPU, "NHDSnapshot")
+    # nhd_plus_get(vpu = VPU, component = "NHDSnapshot", force_unzip = TRUE)
+    # nhd_plus_get(vpu = VPU, component = "NHDPlusAttributes", force_unzip = TRUE)
     fl = nhdR::nhd_plus_load(vpu=VPU, component='NHDSnapshot',
         dsn='NHDFlowline', approve_all_dl=TRUE)
     fl_etc = nhdR::nhd_plus_load(vpu=VPU, component='NHDPlusAttributes',
@@ -220,7 +277,7 @@ streamcat_bulk = function(site_df, streamcat_sets){
 # 2. get NHDPlusV2 data ####
 
 #COMID is the NHD identifier for any reach in the continental U.S.
-#add COMIDs to your site table. If this doesn't work, update nhdplusTools
+#add COMIDs to your site table. If this doesn't work, try updating nhdplusTools
 sites$COMID = unlist(mapply(comid_from_point, sites$latitude,
     sites$longitude, WGS84))
 sites = sites[! is.na(sites$COMID),]
@@ -230,8 +287,88 @@ sites = sites[! is.na(sites$COMID),]
 sites$VPU = unlist(mapply(vpu_from_point, sites$latitude,
     sites$longitude, WGS84))
 
-sites$reach_proportion = unlist(mapply(calc_reach_prop, sites$VPU, sites$COMID,
-    sites$latitude, sites$longitude, WGS84, quiet=TRUE))
+#loop through sites and verify that they're actually on NHDPlusV2 flowlines,
+#rather than NHD HR flowlines, for which we don't yet have StreamCat summaries.
+#this loop will also calculate reach proportions
+sites$reach_proportion = NA
+sites$nhd_network = NA
+
+prev_huc4 <- 'none'
+for(i in seq_len(nrow(sites))){
+
+    print('---')
+    site <- sites[i, ]
+    print(paste0(i, ': ', site$sitecode))
+
+    nhdchunk = suppressWarnings(get_nearby_nhd(
+        lat = site$latitude,
+        long = site$longitude,
+        CRS = WGS84))
+
+    subset <- nhdchunk$nhd_subset
+    site_box <- nhdchunk$bbox
+
+    site_sf <- sf::st_point(c(site$longitude, site$latitude)) %>%
+        sf::st_sfc(crs = WGS84)
+
+    huc12 <- get_huc12(site_sf)
+    print(paste('HUC12:', huc12$huc12))
+    huc4 <- substr(huc12$huc12, 1, 4)[1]
+    nhdplusTools::download_nhdplushr(nhd_hr_dir, hu_list = huc4) %>% invisible()
+
+    if(huc4 != prev_huc4){
+
+        HRflowlines <- nhdplusTools::get_nhdplushr(
+            file.path(nhd_hr_dir, substr(huc4, 1, 2)),
+            file.path(nhd_hr_dir, paste0(huc4, '.gpkg')),
+            layers = 'NHDFlowline',
+            proj = 4326
+        )$NHDFlowline
+
+    } else {
+        print('using previous NHD HR HUC')
+    }
+
+    prev_huc4 <- huc4
+
+    NHD_HR <- suppressWarnings(sf::st_crop(HRflowlines, site_box))
+
+    NHDPlus <- subset$NHDFlowline_Network
+    # catchments <- subset$CatchmentSP
+    # upstream <- nhdplusTools::get_UT(flowlines, comid)
+
+    x = suppressWarnings(nhdplusTools::get_flowline_index(NHDPlus, points=site_sf))
+    sites$reach_proportion[i] = 1 - x$REACH_meas / 100 #0=upstream end; 1=downstream end
+
+    dist_to_nearest_NHDPlus_flowline <- min(st_distance(NHDPlus, site_sf))
+    print(paste('Dist to NHDPlus:', round(dist_to_nearest_NHDPlus_flowline, 2), 'm'))
+    dist_to_nearest_NHDHR_flowline <- min(st_distance(NHD_HR, site_sf))
+    print(paste('Dist to NHD_HR:', round(dist_to_nearest_NHDHR_flowline, 2), 'm'))
+
+    xx = mv(NHD_HR, color = 'darkslategray3') + mv(NHDPlus, color='deepskyblue4') + mv(site_sf, color='red')
+    mapview_save_path <- file.path(mapview_save_dir, paste0(site$sitecode, '.html'))
+    mapview::mapshot(xx, url = mapview_save_path)
+    print(paste('map saved to', mapview_save_path))
+    print(xx)
+
+    x <- readline(cat('This point is on: [A] an NHDPlus flowline, [B] an NHD_HR flowline (but not NHDPlus), or [C] neither >\n'))
+
+    if(x == 'A'){
+        sites[i, 'nhd_network'] <- 'NHDPlusV2'
+    } else if(x == 'B'){
+        sites[i, 'COMID'] <- NA
+        sites[i, 'nhd_network'] <- 'NHD HR'
+    } else if(x == 'C'){
+        sites[i, 'COMID'] <- NA
+        sites[i, 'nhd_network'] <- 'too small even for HR'
+    } else {
+        stop(paste("'A', 'B', or 'C'"))
+    }
+}
+
+#usef to calculate reach proportion naively like this
+# sites$reach_proportion = unlist(mapply(calc_reach_prop, sites$VPU, sites$COMID,
+#     sites$latitude, sites$longitude, WGS84, quiet=TRUE))
 
 #construct list of DSN=component pairs to acquire. see NHDPlus docs for more.
 setlist = list('NHDPlusAttributes'='PlusFlowlineVAA',
@@ -285,53 +422,3 @@ sites = sites[! duplicated(sites$sitecode),]
 #save yer data
 sites = arrange(sites, region, sitecode)
 write.csv(sites, 'watershed_summary_data.csv', row.names=FALSE)
-
-
-# 4. get MODIS data (this section incomplete) ####
-# VNP13A1
-mt_bands("MOD13Q1")
-subset1 = mt_subset(product = "MOD13Q1",
-    lat = 40,
-    lon = -110,
-    band = "250m_16_days_NDVI",
-    start = "2004-01-01",
-    end = "2004-02-01",
-    km_lr = 10,
-    km_ab = 10,
-    site_name = "testsite",
-    internal = TRUE,
-    progress = FALSE)
-
-dfx = data.frame("site_name" = paste("test",1:2))
-dfx$lat = 40
-dfx$lon = -110
-
-# test batch download
-subsets = mt_batch_subset(dfx = dfx,
-    product = "MOD11A2",
-    band = "LST_Day_1km",
-    internal = TRUE,
-    start = "2004-01-01",
-    end = "2004-02-01")
-
-
-#### SCRAPS ####
-
-# storage_path='/home/mike/.local/share/StreamCat'
-
-# subset = subset %>%
-#     st_as_sf(coords=c('longitude', 'latitude'), crs=4326) %>%
-#     st_transform(PROJ4)
-#
-# #get DEM, 14 is highest res, smallest area; 1 is lowest res, broadest area
-# dem = get_elev_raster(subset, z=8)
-# mapview(dem) + mapview(subset)
-#
-# devtools::install_github("jsta/nhdR")
-#
-# #convert to spatial object and project from WGS 84
-# # subset = subset %>%
-# #     st_as_sf(coords=c('longitude','latitude'), crs=4326) %>%
-# #     st_transform(PROJ4)
-#
-

From ed792da3fbbe00144f121cbf89dc94f6af7c8e38 Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Mon, 26 Sep 2022 12:51:20 -0600
Subject: [PATCH 16/37] precaution against data loss

---
 4_interactive_watershed_delineation.R | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/4_interactive_watershed_delineation.R b/4_interactive_watershed_delineation.R
index 6cd9279..66600e9 100755
--- a/4_interactive_watershed_delineation.R
+++ b/4_interactive_watershed_delineation.R
@@ -631,16 +631,12 @@ delineate_watershed_from_point <- function(lat,
             allow_alphanumeric_response = FALSE)
         #
         # if('n' %in% resp){
-        #     unlink(write_dir,
-        #            recursive = TRUE)
         #     print(glue('Moving on. You haven\'t seen the last of {s}!',
         #                s = site_name))
         #     return(1)
         # }
 
         if('a' %in% resp){
-            unlink(write_dir,
-                   recursive = TRUE)
             print(glue('Aborted. Any completed delineations have been saved.'))
             return(2)
         }

From eb0c2aa398aa0f0b39d46eb4cc4af0a431b4b00b Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Tue, 24 Feb 2026 13:44:39 -0700
Subject: [PATCH 17/37] working on summarize nadp script

---
 .gitignore                     |  1 +
 summarize_nadp_for_watershed.R | 10 ++++++++++
 2 files changed, 11 insertions(+)
 create mode 100644 summarize_nadp_for_watershed.R

diff --git a/.gitignore b/.gitignore
index 73e0036..8c81e17 100755
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,4 @@ data/
 2_batch_summary.R
 sites_with_comid.csv
 /obsolete
+.aider*
diff --git a/summarize_nadp_for_watershed.R b/summarize_nadp_for_watershed.R
new file mode 100644
index 0000000..6a1130c
--- /dev/null
+++ b/summarize_nadp_for_watershed.R
@@ -0,0 +1,10 @@
+library(macrosheds)
+
+sites <- c('ALBION', 'GREEN4', 'NAVAJO', 'w8', 'w9', 'W-9')
+
+sheds <- ms_load_spatial_product(
+  macrosheds_root = '~/ssd2/macrosheds_stuff/ms_test/',
+  spatial_product = 'ws_boundary',
+  site_codes = sites
+)
+

From 5b0bfc33617268e4f94260f09da7d35ea92bdc4f Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Tue, 24 Feb 2026 13:48:11 -0700
Subject: [PATCH 18/37] feat: add NADP NTN raster download and watershed
 extraction for pH and conductivity

Co-authored-by: aider (anthropic/claude-opus-4-6) <aider@aider.chat>
---
 summarize_nadp_for_watershed.R | 242 +++++++++++++++++++++++++++++++++
 1 file changed, 242 insertions(+)

diff --git a/summarize_nadp_for_watershed.R b/summarize_nadp_for_watershed.R
index 6a1130c..e65f7e0 100644
--- a/summarize_nadp_for_watershed.R
+++ b/summarize_nadp_for_watershed.R
@@ -1,4 +1,9 @@
 library(macrosheds)
+library(terra)
+library(sf)
+library(dplyr)
+library(tidyr)
+library(httr)
 
 sites <- c('ALBION', 'GREEN4', 'NAVAJO', 'w8', 'w9', 'W-9')
 
@@ -8,3 +13,240 @@ sheds <- ms_load_spatial_product(
   site_codes = sites
 )
 
+# ---- configuration ----
+
+nadp_dir <- 'nadp'
+dir.create(nadp_dir, showWarnings = FALSE)
+
+# NADP NTN annual raster base URL
+# Variables: pH (lab pH), Cond (conductivity, uS/cm)
+# Files are named like: pH_dep_2022.tif, Cond_dep_2022.tif
+# Available from NADP's gridded data: https://nadp.slh.wisc.edu/maps-data/ntn-gradient-maps/
+
+nadp_vars <- c('pH', 'Cond')
+years <- 1985:2023  # adjust range as needed
+
+# Base URL pattern for NADP NTN concentration grids (annual)
+# NADP provides these as GeoTIFFs via their data portal
+base_url <- 'https://nadp.slh.wisc.edu/datalib/ntn/grids/annual'
+
+# ---- prepare watershed geometries ----
+
+# Reproject watersheds to WGS84 for consistency with NADP rasters
+sheds_wgs84 <- st_transform(sheds, 4326)
+
+# Get a bounding box with some buffer for cropping
+sheds_bbox <- st_bbox(sheds_wgs84)
+
+# ---- download NADP rasters ----
+
+download_nadp_raster <- function(variable, year, dest_dir) {
+
+    # NADP NTN annual concentration grids URL pattern
+    # pH: precipitation-weighted mean pH
+    # Cond: precipitation-weighted mean conductivity (uS/cm)
+    fname <- paste0(variable, '_', year, '.tif')
+    dest_file <- file.path(dest_dir, fname)
+
+    if(file.exists(dest_file)) {
+        message('Already downloaded: ', fname)
+        return(dest_file)
+    }
+
+    # Try common NADP URL patterns
+    urls_to_try <- c(
+        paste0(base_url, '/', variable, '/', year, '/', fname),
+        paste0(base_url, '/', variable, '/', fname),
+        paste0(base_url, '/', year, '/', fname)
+    )
+
+    for(url in urls_to_try) {
+        resp <- try(GET(url, write_disk(dest_file, overwrite = TRUE),
+                        timeout(60)), silent = TRUE)
+
+        if(! inherits(resp, 'try-error') && status_code(resp) == 200) {
+            # Verify it's a valid raster
+            test <- try(rast(dest_file), silent = TRUE)
+            if(! inherits(test, 'try-error')) {
+                message('Downloaded: ', fname)
+                return(dest_file)
+            }
+        }
+
+        # Clean up failed download
+        if(file.exists(dest_file)) file.remove(dest_file)
+    }
+
+    message('Could not download: ', variable, ' ', year)
+    return(NA_character_)
+}
+
+message('Downloading NADP NTN annual rasters...')
+message('If automatic download fails, manually download rasters from:')
+message('  https://nadp.slh.wisc.edu/maps-data/ntn-gradient-maps/')
+message('Place .tif files in the nadp/ directory as {Variable}_{Year}.tif')
+
+download_log <- expand.grid(variable = nadp_vars, year = years,
+                            stringsAsFactors = FALSE) %>%
+    as_tibble()
+
+download_log$filepath <- mapply(
+    download_nadp_raster,
+    variable = download_log$variable,
+    year = download_log$year,
+    dest_dir = nadp_dir
+)
+
+download_log <- download_log %>%
+    filter(! is.na(filepath))
+
+if(nrow(download_log) == 0) {
+    stop('No NADP rasters were downloaded. Please download manually from:\n',
+         '  https://nadp.slh.wisc.edu/maps-data/ntn-gradient-maps/\n',
+         'and place them in nadp/ as {Variable}_{Year}.tif\n',
+         'Then re-run this script.')
+}
+
+message(nrow(download_log), ' rasters available for processing.')
+
+# ---- also check for any manually placed rasters ----
+
+manual_tifs <- list.files(nadp_dir, pattern = '\\.tif$', full.names = TRUE)
+if(length(manual_tifs) > 0) {
+    # Parse variable and year from filenames
+    manual_info <- tibble(filepath = manual_tifs) %>%
+        mutate(
+            basename = tools::file_path_sans_ext(basename(filepath)),
+            variable = sub('_[0-9]{4}$', '', basename),
+            year = as.integer(sub('.*_', '', basename))
+        ) %>%
+        filter(variable %in% nadp_vars, ! is.na(year)) %>%
+        select(variable, year, filepath)
+
+    download_log <- bind_rows(download_log, manual_info) %>%
+        distinct(variable, year, .keep_all = TRUE)
+}
+
+# ---- extract raster values for each watershed ----
+
+extract_nadp_for_sheds <- function(raster_path, sheds_sf) {
+
+    r <- try(rast(raster_path), silent = TRUE)
+    if(inherits(r, 'try-error')) return(NULL)
+
+    # Reproject watersheds to match raster CRS
+    sheds_reproj <- st_transform(sheds_sf, crs(r))
+
+    # Crop raster to watershed extent (with buffer) to speed extraction
+    sheds_extent <- ext(vect(sheds_reproj)) + 0.5  # ~0.5 degree buffer
+    r_crop <- try(crop(r, sheds_extent), silent = TRUE)
+    if(inherits(r_crop, 'try-error')) r_crop <- r
+
+    # Extract area-weighted mean for each polygon
+    vals <- exact_extract(r_crop, sheds_reproj, fun = 'mean')
+
+    if(is.null(vals)) {
+        # Fallback to terra::extract
+        vals <- terra::extract(r_crop, vect(sheds_reproj), fun = mean,
+                               na.rm = TRUE, weights = TRUE)
+        vals <- vals[, 2]
+    }
+
+    return(vals)
+}
+
+# Use exactextractr if available, otherwise terra::extract
+use_exactextractr <- requireNamespace('exactextractr', quietly = TRUE)
+
+if(use_exactextractr) {
+    library(exactextractr)
+    message('Using exactextractr for weighted extraction.')
+} else {
+    message('Using terra::extract. Install exactextractr for better accuracy.')
+}
+
+results <- list()
+
+for(i in seq_len(nrow(download_log))) {
+
+    row <- download_log[i, ]
+    message('Processing: ', row$variable, ' ', row$year)
+
+    r <- try(rast(row$filepath), silent = TRUE)
+    if(inherits(r, 'try-error')) {
+        message('  Skipping (invalid raster)')
+        next
+    }
+
+    # Reproject watersheds to match raster CRS
+    sheds_reproj <- st_transform(sheds_wgs84, crs(r))
+
+    # Crop to reduce memory
+    sheds_ext <- ext(vect(sheds_reproj))
+    sheds_ext_buf <- sheds_ext + 0.5
+    r_crop <- try(crop(r, sheds_ext_buf), silent = TRUE)
+    if(inherits(r_crop, 'try-error')) r_crop <- r
+
+    if(use_exactextractr) {
+        vals <- exact_extract(r_crop, sheds_reproj, fun = 'mean')
+    } else {
+        ex <- terra::extract(r_crop, vect(sheds_reproj), fun = mean,
+                             na.rm = TRUE, weights = TRUE)
+        vals <- ex[, 2]
+    }
+
+    res <- tibble(
+        site_code = sheds_wgs84$site_code,
+        variable = row$variable,
+        year = row$year,
+        value = vals
+    )
+
+    results[[length(results) + 1]] <- res
+}
+
+# ---- combine and format results ----
+
+nadp_summary <- bind_rows(results)
+
+# Add descriptive labels
+nadp_summary <- nadp_summary %>%
+    mutate(
+        variable_description = case_when(
+            variable == 'pH'   ~ 'Precipitation-weighted mean pH',
+            variable == 'Cond' ~ 'Precipitation-weighted mean conductivity (uS/cm)',
+            TRUE ~ variable
+        )
+    )
+
+# Pivot to wide format (one column per variable)
+nadp_wide <- nadp_summary %>%
+    select(site_code, year, variable, value) %>%
+    pivot_wider(names_from = variable, values_from = value) %>%
+    arrange(site_code, year)
+
+# ---- save results ----
+
+write.csv(nadp_summary, 'nadp_watershed_summary_long.csv', row.names = FALSE)
+write.csv(nadp_wide, 'nadp_watershed_summary_wide.csv', row.names = FALSE)
+
+message('\n---- Summary ----')
+message('Sites: ', paste(unique(nadp_summary$site_code), collapse = ', '))
+message('Variables: ', paste(unique(nadp_summary$variable), collapse = ', '))
+message('Years: ', min(nadp_summary$year), '-', max(nadp_summary$year))
+message('Results saved to:')
+message('  nadp_watershed_summary_long.csv')
+message('  nadp_watershed_summary_wide.csv')
+
+# Print a quick summary
+nadp_summary %>%
+    group_by(site_code, variable) %>%
+    summarize(
+        n_years = n(),
+        mean_value = mean(value, na.rm = TRUE),
+        min_value = min(value, na.rm = TRUE),
+        max_value = max(value, na.rm = TRUE),
+        .groups = 'drop'
+    ) %>%
+    print(n = Inf)
+

From c53728bf08605c4c04d500c4f73c465877949f1e Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Tue, 24 Feb 2026 13:51:31 -0700
Subject: [PATCH 19/37] chore: extend NADP data year range from 2023 to 2024

---
 summarize_nadp_for_watershed.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/summarize_nadp_for_watershed.R b/summarize_nadp_for_watershed.R
index e65f7e0..29904d6 100644
--- a/summarize_nadp_for_watershed.R
+++ b/summarize_nadp_for_watershed.R
@@ -24,7 +24,7 @@ dir.create(nadp_dir, showWarnings = FALSE)
 # Available from NADP's gridded data: https://nadp.slh.wisc.edu/maps-data/ntn-gradient-maps/
 
 nadp_vars <- c('pH', 'Cond')
-years <- 1985:2023  # adjust range as needed
+years <- 1985:2024  # adjust range as needed
 
 # Base URL pattern for NADP NTN concentration grids (annual)
 # NADP provides these as GeoTIFFs via their data portal

From c683ac9b567e9aa68edb7b927394491b1284e4ab Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Tue, 24 Feb 2026 13:51:34 -0700
Subject: [PATCH 20/37] feat: estimate conductivity from individual ion grids
 and pH instead of unavailable Cond variable

Co-authored-by: aider (anthropic/claude-opus-4-6) <aider@aider.chat>
---
 summarize_nadp_for_watershed.R | 69 +++++++++++++++++++++++++++++++---
 1 file changed, 63 insertions(+), 6 deletions(-)

diff --git a/summarize_nadp_for_watershed.R b/summarize_nadp_for_watershed.R
index 29904d6..f460c64 100644
--- a/summarize_nadp_for_watershed.R
+++ b/summarize_nadp_for_watershed.R
@@ -19,13 +19,25 @@ nadp_dir <- 'nadp'
 dir.create(nadp_dir, showWarnings = FALSE)
 
 # NADP NTN annual raster base URL
-# Variables: pH (lab pH), Cond (conductivity, uS/cm)
-# Files are named like: pH_dep_2022.tif, Cond_dep_2022.tif
+# Variables: pH (lab pH), plus major ion concentrations (mg/L)
+# Conductivity is not provided directly, but can be estimated from ions.
 # Available from NADP's gridded data: https://nadp.slh.wisc.edu/maps-data/ntn-gradient-maps/
 
-nadp_vars <- c('pH', 'Cond')
+# Ion grids are precipitation-weighted mean concentrations in mg/L
+nadp_vars <- c('pH', 'Ca', 'Mg', 'K', 'Na', 'Cl', 'NO3', 'NH4', 'SO4')
 years <- 1985:2024  # adjust range as needed
 
+# Equivalent weights (mg/meq) for converting mg/L to ueq/L
+equiv_weights <- c(Ca = 20.04, Mg = 12.15, K = 39.10, Na = 22.99,
+                   Cl = 35.45, NO3 = 62.00, NH4 = 18.04, SO4 = 48.03)
+
+# Limiting equivalent conductances at 25C (uS/cm per ueq/L)
+# Sources: CRC Handbook / Standard Methods
+equiv_conductances <- c(Ca = 59.5, Mg = 53.1, K = 73.5, Na = 50.1,
+                        Cl = 76.3, NO3 = 71.4, NH4 = 73.5, SO4 = 80.0)
+# Note: H+ contributes too; will estimate from pH
+H_conductance <- 349.8
+
 # Base URL pattern for NADP NTN concentration grids (annual)
 # NADP provides these as GeoTIFFs via their data portal
 base_url <- 'https://nadp.slh.wisc.edu/datalib/ntn/grids/annual'
@@ -120,7 +132,7 @@ if(length(manual_tifs) > 0) {
             variable = sub('_[0-9]{4}$', '', basename),
             year = as.integer(sub('.*_', '', basename))
         ) %>%
-        filter(variable %in% nadp_vars, ! is.na(year)) %>%
+        filter(variable %in% c(nadp_vars, 'Cond_est'), ! is.na(year)) %>%
         select(variable, year, filepath)
 
     download_log <- bind_rows(download_log, manual_info) %>%
@@ -214,7 +226,14 @@ nadp_summary <- nadp_summary %>%
     mutate(
         variable_description = case_when(
             variable == 'pH'   ~ 'Precipitation-weighted mean pH',
-            variable == 'Cond' ~ 'Precipitation-weighted mean conductivity (uS/cm)',
+            variable == 'Ca'   ~ 'Precipitation-weighted mean Ca (mg/L)',
+            variable == 'Mg'   ~ 'Precipitation-weighted mean Mg (mg/L)',
+            variable == 'K'    ~ 'Precipitation-weighted mean K (mg/L)',
+            variable == 'Na'   ~ 'Precipitation-weighted mean Na (mg/L)',
+            variable == 'Cl'   ~ 'Precipitation-weighted mean Cl (mg/L)',
+            variable == 'NO3'  ~ 'Precipitation-weighted mean NO3 (mg/L)',
+            variable == 'NH4'  ~ 'Precipitation-weighted mean NH4 (mg/L)',
+            variable == 'SO4'  ~ 'Precipitation-weighted mean SO4 (mg/L)',
             TRUE ~ variable
         )
     )
@@ -225,6 +244,44 @@ nadp_wide <- nadp_summary %>%
     pivot_wider(names_from = variable, values_from = value) %>%
     arrange(site_code, year)
 
+# ---- estimate conductivity from ions and pH ----
+
+ion_cols <- intersect(names(equiv_weights), names(nadp_wide))
+
+if(length(ion_cols) > 0 && 'pH' %in% names(nadp_wide)) {
+
+    nadp_wide <- nadp_wide %>%
+        mutate(
+            # Convert each ion from mg/L to ueq/L, then multiply by equiv conductance
+            Cond_est = {
+                cond <- rep(0, n())
+                for(ion in ion_cols) {
+                    conc_mgl <- .data[[ion]]
+                    conc_ueql <- conc_mgl / equiv_weights[ion] * 1000
+                    cond <- cond + conc_ueql * equiv_conductances[ion] / 1000
+                }
+                # Add H+ contribution from pH
+                H_ueql <- 10^(-pH) * 1e6  # mol/L -> ueq/L
+                cond <- cond + H_ueql * H_conductance / 1000
+                round(cond, 2)
+            }
+        )
+
+    message('Estimated conductivity (Cond_est, uS/cm) computed from ion concentrations and pH.')
+
+    # Also add to long format
+    cond_long <- nadp_wide %>%
+        select(site_code, year, Cond_est) %>%
+        mutate(variable = 'Cond_est',
+               variable_description = 'Estimated conductivity from ions + pH (uS/cm)') %>%
+        rename(value = Cond_est)
+
+    nadp_summary <- bind_rows(nadp_summary, cond_long)
+
+} else {
+    message('Not enough ion variables to estimate conductivity.')
+}
+
 # ---- save results ----
 
 write.csv(nadp_summary, 'nadp_watershed_summary_long.csv', row.names = FALSE)
@@ -232,7 +289,7 @@ write.csv(nadp_wide, 'nadp_watershed_summary_wide.csv', row.names = FALSE)
 
 message('\n---- Summary ----')
 message('Sites: ', paste(unique(nadp_summary$site_code), collapse = ', '))
-message('Variables: ', paste(unique(nadp_summary$variable), collapse = ', '))
+message('Variables: ', paste(sort(unique(nadp_summary$variable)), collapse = ', '))
 message('Years: ', min(nadp_summary$year), '-', max(nadp_summary$year))
 message('Results saved to:')
 message('  nadp_watershed_summary_long.csv')

From c30c1362a387389dcbf948be7af167560ae46b10 Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Tue, 24 Feb 2026 13:55:43 -0700
Subject: [PATCH 21/37] fix: update NADP download to use correct URL pattern
 and handle zip/ArcGrid formats

Co-authored-by: aider (anthropic/claude-opus-4-6) <aider@aider.chat>
---
 summarize_nadp_for_watershed.R | 185 ++++++++++++++++++++++-----------
 1 file changed, 126 insertions(+), 59 deletions(-)

diff --git a/summarize_nadp_for_watershed.R b/summarize_nadp_for_watershed.R
index f460c64..3c974be 100644
--- a/summarize_nadp_for_watershed.R
+++ b/summarize_nadp_for_watershed.R
@@ -39,8 +39,20 @@ equiv_conductances <- c(Ca = 59.5, Mg = 53.1, K = 73.5, Na = 50.1,
 H_conductance <- 349.8
 
 # Base URL pattern for NADP NTN concentration grids (annual)
-# NADP provides these as GeoTIFFs via their data portal
-base_url <- 'https://nadp.slh.wisc.edu/datalib/ntn/grids/annual'
+# URL pattern: https://nadp.slh.wisc.edu/filelib/maps/NTN/grids/{year}/{var}_conc_{year}.zip
+# Zips contain raster data (ArcGrid or GeoTIFF)
+base_url <- 'https://nadp.slh.wisc.edu/filelib/maps/NTN/grids'
+
+# NADP uses these filename prefixes for concentration grids
+nadp_var_prefixes <- c(pH  = 'pH',
+                       Ca  = 'Ca',
+                       Mg  = 'Mg',
+                       K   = 'K',
+                       Na  = 'Na',
+                       Cl  = 'Cl',
+                       NO3 = 'NO3',
+                       NH4 = 'NH4',
+                       SO4 = 'SO4')
 
 # ---- prepare watershed geometries ----
 
@@ -54,49 +66,107 @@ sheds_bbox <- st_bbox(sheds_wgs84)
 
 download_nadp_raster <- function(variable, year, dest_dir) {
 
-    # NADP NTN annual concentration grids URL pattern
-    # pH: precipitation-weighted mean pH
-    # Cond: precipitation-weighted mean conductivity (uS/cm)
-    fname <- paste0(variable, '_', year, '.tif')
-    dest_file <- file.path(dest_dir, fname)
+    # Check if we already have an extracted raster for this variable/year
+    extracted_dir <- file.path(dest_dir, paste0(variable, '_conc_', year))
+    if(dir.exists(extracted_dir)) {
+        rast_file <- find_raster_in_dir(extracted_dir)
+        if(! is.null(rast_file)) {
+            message('Already extracted: ', variable, ' ', year)
+            return(rast_file)
+        }
+    }
 
-    if(file.exists(dest_file)) {
-        message('Already downloaded: ', fname)
-        return(dest_file)
+    # Also check for a .tif directly
+    tif_file <- file.path(dest_dir, paste0(variable, '_conc_', year, '.tif'))
+    if(file.exists(tif_file)) {
+        test <- try(rast(tif_file), silent = TRUE)
+        if(! inherits(test, 'try-error')) {
+            message('Already have: ', variable, ' ', year)
+            return(tif_file)
+        }
     }
 
-    # Try common NADP URL patterns
-    urls_to_try <- c(
-        paste0(base_url, '/', variable, '/', year, '/', fname),
-        paste0(base_url, '/', variable, '/', fname),
-        paste0(base_url, '/', year, '/', fname)
-    )
+    prefix <- nadp_var_prefixes[variable]
+    zip_name <- paste0(prefix, '_conc_', year, '.zip')
+    zip_file <- file.path(dest_dir, zip_name)
+
+    # Try URL patterns (case variations in prefix)
+    prefixes_to_try <- unique(c(prefix, tolower(prefix), toupper(prefix)))
+    urls_to_try <- unlist(lapply(prefixes_to_try, function(p) {
+        zn <- paste0(p, '_conc_', year, '.zip')
+        c(
+            paste0(base_url, '/', year, '/', zn),
+            paste0(base_url, '/', year, '/', tolower(zn))
+        )
+    }))
 
     for(url in urls_to_try) {
-        resp <- try(GET(url, write_disk(dest_file, overwrite = TRUE),
-                        timeout(60)), silent = TRUE)
+        resp <- try(GET(url, write_disk(zip_file, overwrite = TRUE),
+                        timeout(120)), silent = TRUE)
 
         if(! inherits(resp, 'try-error') && status_code(resp) == 200) {
-            # Verify it's a valid raster
-            test <- try(rast(dest_file), silent = TRUE)
-            if(! inherits(test, 'try-error')) {
-                message('Downloaded: ', fname)
-                return(dest_file)
+            # Try to unzip
+            exdir <- file.path(dest_dir, paste0(variable, '_conc_', year))
+            unzipped <- try(unzip(zip_file, exdir = exdir), silent = TRUE)
+
+            if(! inherits(unzipped, 'try-error') && length(unzipped) > 0) {
+                rast_file <- find_raster_in_dir(exdir)
+                if(! is.null(rast_file)) {
+                    message('Downloaded and extracted: ', variable, ' ', year)
+                    file.remove(zip_file)
+                    return(rast_file)
+                }
             }
+
+            # Clean up failed extraction
+            if(dir.exists(exdir)) unlink(exdir, recursive = TRUE)
         }
 
-        # Clean up failed download
-        if(file.exists(dest_file)) file.remove(dest_file)
+        if(file.exists(zip_file)) file.remove(zip_file)
     }
 
     message('Could not download: ', variable, ' ', year)
     return(NA_character_)
 }
 
+find_raster_in_dir <- function(dir_path) {
+    # Look for GeoTIFF first
+    tifs <- list.files(dir_path, pattern = '\\.tif$', full.names = TRUE,
+                       recursive = TRUE)
+    if(length(tifs) > 0) return(tifs[1])
+
+    # Look for ArcGrid (hdr.adf)
+    adfs <- list.files(dir_path, pattern = 'hdr\\.adf$', full.names = TRUE,
+                       recursive = TRUE, ignore.case = TRUE)
+    if(length(adfs) > 0) {
+        # Return the directory containing hdr.adf (that's the ArcGrid "file")
+        return(dirname(adfs[1]))
+    }
+
+    # Look for .asc (Arc ASCII grid)
+    ascs <- list.files(dir_path, pattern = '\\.asc$', full.names = TRUE,
+                       recursive = TRUE)
+    if(length(ascs) > 0) return(ascs[1])
+
+    # Look for .img
+    imgs <- list.files(dir_path, pattern = '\\.img$', full.names = TRUE,
+                       recursive = TRUE)
+    if(length(imgs) > 0) return(imgs[1])
+
+    # Try to load anything terra can read
+    all_files <- list.files(dir_path, full.names = TRUE, recursive = TRUE)
+    for(f in all_files) {
+        test <- try(rast(f), silent = TRUE)
+        if(! inherits(test, 'try-error')) return(f)
+    }
+
+    return(NULL)
+}
+
 message('Downloading NADP NTN annual rasters...')
 message('If automatic download fails, manually download rasters from:')
-message('  https://nadp.slh.wisc.edu/maps-data/ntn-gradient-maps/')
-message('Place .tif files in the nadp/ directory as {Variable}_{Year}.tif')
+message('  https://nadp.slh.wisc.edu/filelib/maps/NTN/grids/{year}/')
+message('Place zip files or extracted folders in the nadp/ directory')
 
 download_log <- expand.grid(variable = nadp_vars, year = years,
                             stringsAsFactors = FALSE) %>%
@@ -114,8 +184,8 @@ download_log <- download_log %>%
 
 if(nrow(download_log) == 0) {
     stop('No NADP rasters were downloaded. Please download manually from:\n',
-         '  https://nadp.slh.wisc.edu/maps-data/ntn-gradient-maps/\n',
-         'and place them in nadp/ as {Variable}_{Year}.tif\n',
+         '  https://nadp.slh.wisc.edu/filelib/maps/NTN/grids/{year}/\n',
+         'and place them in nadp/ (as zips or extracted folders)\n',
          'Then re-run this script.')
 }
 
@@ -123,50 +193,47 @@ message(nrow(download_log), ' rasters available for processing.')
 
 # ---- also check for any manually placed rasters ----
 
+# Check for .tif files and extracted directories
 manual_tifs <- list.files(nadp_dir, pattern = '\\.tif$', full.names = TRUE)
+manual_dirs <- list.dirs(nadp_dir, recursive = FALSE, full.names = TRUE)
+
+manual_entries <- list()
+
 if(length(manual_tifs) > 0) {
-    # Parse variable and year from filenames
-    manual_info <- tibble(filepath = manual_tifs) %>%
+    manual_entries[['tifs']] <- tibble(filepath = manual_tifs) %>%
         mutate(
             basename = tools::file_path_sans_ext(basename(filepath)),
-            variable = sub('_[0-9]{4}$', '', basename),
+            variable = sub('_(conc_)?[0-9]{4}$', '', basename),
             year = as.integer(sub('.*_', '', basename))
         ) %>%
-        filter(variable %in% c(nadp_vars, 'Cond_est'), ! is.na(year)) %>%
+        filter(variable %in% nadp_vars, ! is.na(year)) %>%
         select(variable, year, filepath)
+}
 
+if(length(manual_dirs) > 0) {
+    for(d in manual_dirs) {
+        dname <- basename(d)
+        # Parse variable_conc_year pattern
+        yr <- as.integer(sub('.*_', '', dname))
+        vr <- sub('_conc_[0-9]{4}$', '', dname)
+        if(! is.na(yr) && vr %in% nadp_vars) {
+            rast_file <- find_raster_in_dir(d)
+            if(! is.null(rast_file)) {
+                manual_entries[[d]] <- tibble(variable = vr, year = yr,
+                                             filepath = rast_file)
+            }
+        }
+    }
+}
+
+if(length(manual_entries) > 0) {
+    manual_info <- bind_rows(manual_entries)
     download_log <- bind_rows(download_log, manual_info) %>%
         distinct(variable, year, .keep_all = TRUE)
 }
 
 # ---- extract raster values for each watershed ----
 
-extract_nadp_for_sheds <- function(raster_path, sheds_sf) {
-
-    r <- try(rast(raster_path), silent = TRUE)
-    if(inherits(r, 'try-error')) return(NULL)
-
-    # Reproject watersheds to match raster CRS
-    sheds_reproj <- st_transform(sheds_sf, crs(r))
-
-    # Crop raster to watershed extent (with buffer) to speed extraction
-    sheds_extent <- ext(vect(sheds_reproj)) + 0.5  # ~0.5 degree buffer
-    r_crop <- try(crop(r, sheds_extent), silent = TRUE)
-    if(inherits(r_crop, 'try-error')) r_crop <- r
-
-    # Extract area-weighted mean for each polygon
-    vals <- exact_extract(r_crop, sheds_reproj, fun = 'mean')
-
-    if(is.null(vals)) {
-        # Fallback to terra::extract
-        vals <- terra::extract(r_crop, vect(sheds_reproj), fun = mean,
-                               na.rm = TRUE, weights = TRUE)
-        vals <- vals[, 2]
-    }
-
-    return(vals)
-}
-
 # Use exactextractr if available, otherwise terra::extract
 use_exactextractr <- requireNamespace('exactextractr', quietly = TRUE)
 

From b507a628080ea133214231d24a15a8922e97915e Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Tue, 24 Feb 2026 14:01:46 -0700
Subject: [PATCH 22/37] fix: add retry logic with exponential backoff and
 polite delay for NADP downloads

Co-authored-by: aider (anthropic/claude-opus-4-6) <aider@aider.chat>
---
 summarize_nadp_for_watershed.R | 52 +++++++++++++++++++++++-----------
 1 file changed, 36 insertions(+), 16 deletions(-)

diff --git a/summarize_nadp_for_watershed.R b/summarize_nadp_for_watershed.R
index 3c974be..9fbe100 100644
--- a/summarize_nadp_for_watershed.R
+++ b/summarize_nadp_for_watershed.R
@@ -100,26 +100,46 @@ download_nadp_raster <- function(variable, year, dest_dir) {
         )
     }))
 
+    max_retries <- 4
+
     for(url in urls_to_try) {
-        resp <- try(GET(url, write_disk(zip_file, overwrite = TRUE),
-                        timeout(120)), silent = TRUE)
-
-        if(! inherits(resp, 'try-error') && status_code(resp) == 200) {
-            # Try to unzip
-            exdir <- file.path(dest_dir, paste0(variable, '_conc_', year))
-            unzipped <- try(unzip(zip_file, exdir = exdir), silent = TRUE)
-
-            if(! inherits(unzipped, 'try-error') && length(unzipped) > 0) {
-                rast_file <- find_raster_in_dir(exdir)
-                if(! is.null(rast_file)) {
-                    message('Downloaded and extracted: ', variable, ' ', year)
-                    file.remove(zip_file)
-                    return(rast_file)
+        for(attempt in seq_len(max_retries)) {
+            resp <- try(GET(url, write_disk(zip_file, overwrite = TRUE),
+                            timeout(120)), silent = TRUE)
+
+            if(! inherits(resp, 'try-error') && status_code(resp) == 200) {
+                # Try to unzip
+                exdir <- file.path(dest_dir, paste0(variable, '_conc_', year))
+                unzipped <- try(unzip(zip_file, exdir = exdir), silent = TRUE)
+
+                if(! inherits(unzipped, 'try-error') && length(unzipped) > 0) {
+                    rast_file <- find_raster_in_dir(exdir)
+                    if(! is.null(rast_file)) {
+                        message('Downloaded and extracted: ', variable, ' ', year)
+                        file.remove(zip_file)
+                        Sys.sleep(2)  # polite delay between successful downloads
+                        return(rast_file)
+                    }
                 }
+
+                # Clean up failed extraction
+                if(dir.exists(exdir)) unlink(exdir, recursive = TRUE)
+                break  # got 200 but extraction failed; try next URL
             }
 
-            # Clean up failed extraction
-            if(dir.exists(exdir)) unlink(exdir, recursive = TRUE)
+            if(file.exists(zip_file)) file.remove(zip_file)
+
+            # If connection error, retry with backoff
+            if(inherits(resp, 'try-error') ||
+               status_code(resp) %in% c(429, 500, 502, 503, 504)) {
+                wait <- 2^attempt + runif(1, 0, 1)
+                message('  Retry ', attempt, '/', max_retries,
+                        ' for ', variable, ' ', year,
+                        ' (waiting ', round(wait, 1), 's)')
+                Sys.sleep(wait)
+            } else {
+                break  # non-retryable HTTP error (e.g. 404); try next URL
+            }
         }
 
         if(file.exists(zip_file)) file.remove(zip_file)

From 0d115310fe426ccaab56e46a63f66e03cf9335d5 Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Tue, 24 Feb 2026 16:00:17 -0700
Subject: [PATCH 23/37] fix: compute estimated conductivity outside dplyr
 mutate to avoid variable scoping issues

Co-authored-by: aider (anthropic/claude-opus-4-6) <aider@aider.chat>
---
 summarize_nadp_for_watershed.R | 27 +++++++++++----------------
 1 file changed, 11 insertions(+), 16 deletions(-)

diff --git a/summarize_nadp_for_watershed.R b/summarize_nadp_for_watershed.R
index 9fbe100..d848367 100644
--- a/summarize_nadp_for_watershed.R
+++ b/summarize_nadp_for_watershed.R
@@ -337,22 +337,17 @@ ion_cols <- intersect(names(equiv_weights), names(nadp_wide))
 
 if(length(ion_cols) > 0 && 'pH' %in% names(nadp_wide)) {
 
-    nadp_wide <- nadp_wide %>%
-        mutate(
-            # Convert each ion from mg/L to ueq/L, then multiply by equiv conductance
-            Cond_est = {
-                cond <- rep(0, n())
-                for(ion in ion_cols) {
-                    conc_mgl <- .data[[ion]]
-                    conc_ueql <- conc_mgl / equiv_weights[ion] * 1000
-                    cond <- cond + conc_ueql * equiv_conductances[ion] / 1000
-                }
-                # Add H+ contribution from pH
-                H_ueql <- 10^(-pH) * 1e6  # mol/L -> ueq/L
-                cond <- cond + H_ueql * H_conductance / 1000
-                round(cond, 2)
-            }
-        )
+    # Compute estimated conductivity outside dplyr to avoid scoping issues
+    cond <- rep(0, nrow(nadp_wide))
+    for(ion in ion_cols) {
+        conc_mgl <- nadp_wide[[ion]]
+        conc_ueql <- conc_mgl / equiv_weights[ion] * 1000
+        cond <- cond + conc_ueql * equiv_conductances[ion] / 1000
+    }
+    # Add H+ contribution from pH
+    H_ueql <- 10^(-nadp_wide[['pH']]) * 1e6  # mol/L -> ueq/L
+    cond <- cond + H_ueql * H_conductance / 1000
+    nadp_wide$Cond_est <- round(cond, 2)
 
     message('Estimated conductivity (Cond_est, uS/cm) computed from ion concentrations and pH.')
 

From 1b7e79c6823b7ec94a5ed2e9df618bf7a820db56 Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Tue, 24 Feb 2026 16:49:21 -0700
Subject: [PATCH 24/37] feat: add direct H+ grid download and make conductivity
 estimation tolerant of missing ions

Co-authored-by: aider (anthropic/claude-opus-4-6) <aider@aider.chat>
---
 summarize_nadp_for_watershed.R | 57 +++++++++++++++++++++++++---------
 1 file changed, 42 insertions(+), 15 deletions(-)

diff --git a/summarize_nadp_for_watershed.R b/summarize_nadp_for_watershed.R
index d848367..5a91b2c 100644
--- a/summarize_nadp_for_watershed.R
+++ b/summarize_nadp_for_watershed.R
@@ -24,7 +24,8 @@ dir.create(nadp_dir, showWarnings = FALSE)
 # Available from NADP's gridded data: https://nadp.slh.wisc.edu/maps-data/ntn-gradient-maps/
 
 # Ion grids are precipitation-weighted mean concentrations in mg/L
-nadp_vars <- c('pH', 'Ca', 'Mg', 'K', 'Na', 'Cl', 'NO3', 'NH4', 'SO4')
+# H is hydrogen ion concentration (mg/L), provided directly by NADP
+nadp_vars <- c('pH', 'H', 'Ca', 'Mg', 'K', 'Na', 'Cl', 'NO3', 'NH4', 'SO4')
 years <- 1985:2024  # adjust range as needed
 
 # Equivalent weights (mg/meq) for converting mg/L to ueq/L
@@ -35,8 +36,9 @@ equiv_weights <- c(Ca = 20.04, Mg = 12.15, K = 39.10, Na = 22.99,
 # Sources: CRC Handbook / Standard Methods
 equiv_conductances <- c(Ca = 59.5, Mg = 53.1, K = 73.5, Na = 50.1,
                         Cl = 76.3, NO3 = 71.4, NH4 = 73.5, SO4 = 80.0)
-# Note: H+ contributes too; will estimate from pH
-H_conductance <- 349.8
+# H+ equivalent weight and conductance
+equiv_weights <- c(equiv_weights, H = 1.008)
+equiv_conductances <- c(equiv_conductances, H = 349.8)
 
 # Base URL pattern for NADP NTN concentration grids (annual)
 # URL pattern: https://nadp.slh.wisc.edu/filelib/maps/NTN/grids/{year}/{var}_conc_{year}.zip
@@ -45,6 +47,7 @@ base_url <- 'https://nadp.slh.wisc.edu/filelib/maps/NTN/grids'
 
 # NADP uses these filename prefixes for concentration grids
 nadp_var_prefixes <- c(pH  = 'pH',
+                       H   = 'H',
                        Ca  = 'Ca',
                        Mg  = 'Mg',
                        K   = 'K',
@@ -321,6 +324,7 @@ nadp_summary <- nadp_summary %>%
             variable == 'NO3'  ~ 'Precipitation-weighted mean NO3 (mg/L)',
             variable == 'NH4'  ~ 'Precipitation-weighted mean NH4 (mg/L)',
             variable == 'SO4'  ~ 'Precipitation-weighted mean SO4 (mg/L)',
+            variable == 'H'    ~ 'Precipitation-weighted mean H+ (mg/L)',
             TRUE ~ variable
         )
     )
@@ -333,32 +337,55 @@ nadp_wide <- nadp_summary %>%
 
 # ---- estimate conductivity from ions and pH ----
 
-ion_cols <- intersect(names(equiv_weights), names(nadp_wide))
+all_ions <- names(equiv_weights)  # includes H
+ion_cols <- intersect(all_ions, names(nadp_wide))
 
-if(length(ion_cols) > 0 && 'pH' %in% names(nadp_wide)) {
+if(length(ion_cols) > 0) {
+
+    # If H not available directly, estimate from pH
+    if(! 'H' %in% names(nadp_wide) && 'pH' %in% names(nadp_wide)) {
+        nadp_wide$H <- 10^(-nadp_wide[['pH']]) * 1.008  # mol/L -> mg/L
+        ion_cols <- intersect(all_ions, names(nadp_wide))
+        message('H+ estimated from pH (direct H grid not available).')
+    }
+
+    # Compute estimated conductivity, tolerating some missing ions
+    # Each ion contributes independently; NA ions are skipped per row
+    n_rows <- nrow(nadp_wide)
+    cond <- rep(0, n_rows)
+    n_ions_available <- rep(0L, n_rows)
 
-    # Compute estimated conductivity outside dplyr to avoid scoping issues
-    cond <- rep(0, nrow(nadp_wide))
     for(ion in ion_cols) {
         conc_mgl <- nadp_wide[[ion]]
         conc_ueql <- conc_mgl / equiv_weights[ion] * 1000
-        cond <- cond + conc_ueql * equiv_conductances[ion] / 1000
+        contribution <- conc_ueql * equiv_conductances[ion] / 1000
+        has_val <- !is.na(contribution)
+        cond[has_val] <- cond[has_val] + contribution[has_val]
+        n_ions_available[has_val] <- n_ions_available[has_val] + 1L
     }
-    # Add H+ contribution from pH
-    H_ueql <- 10^(-nadp_wide[['pH']]) * 1e6  # mol/L -> ueq/L
-    cond <- cond + H_ueql * H_conductance / 1000
+
+    # Set to NA if fewer than 7 of 9 ions are available (too incomplete)
+    max_possible <- length(ion_cols)
+    min_required <- max(max_possible - 2, 1)
+    cond[n_ions_available < min_required] <- NA_real_
+
     nadp_wide$Cond_est <- round(cond, 2)
+    nadp_wide$Cond_est_n_ions <- n_ions_available
 
-    message('Estimated conductivity (Cond_est, uS/cm) computed from ion concentrations and pH.')
+    message('Estimated conductivity (Cond_est, uS/cm) computed from ion concentrations.')
+    message('  Ions used: ', paste(ion_cols, collapse = ', '))
+    message('  Min ions required per row: ', min_required, ' of ', max_possible)
+    message('  Rows with Cond_est: ', sum(!is.na(nadp_wide$Cond_est)),
+            ' of ', n_rows)
 
     # Also add to long format
     cond_long <- nadp_wide %>%
-        select(site_code, year, Cond_est) %>%
+        select(site_code, year, Cond_est, Cond_est_n_ions) %>%
         mutate(variable = 'Cond_est',
-               variable_description = 'Estimated conductivity from ions + pH (uS/cm)') %>%
+               variable_description = 'Estimated conductivity from ions (uS/cm)') %>%
         rename(value = Cond_est)
 
-    nadp_summary <- bind_rows(nadp_summary, cond_long)
+    nadp_summary <- bind_rows(nadp_summary, select(cond_long, -Cond_est_n_ions))
 
 } else {
     message('Not enough ion variables to estimate conductivity.')

From 85751d9618a19c00ec911b1db83a545420bd9412 Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Tue, 24 Feb 2026 16:52:21 -0700
Subject: [PATCH 25/37] fix: use correct 'hplus' prefix and '_dep_' suffix for
 H+ NADP downloads

Co-authored-by: aider (anthropic/claude-opus-4-6) <aider@aider.chat>
---
 summarize_nadp_for_watershed.R | 71 ++++++++++++++++++++++------------
 1 file changed, 46 insertions(+), 25 deletions(-)

diff --git a/summarize_nadp_for_watershed.R b/summarize_nadp_for_watershed.R
index 5a91b2c..5eccc1e 100644
--- a/summarize_nadp_for_watershed.R
+++ b/summarize_nadp_for_watershed.R
@@ -45,9 +45,10 @@ equiv_conductances <- c(equiv_conductances, H = 349.8)
 # Zips contain raster data (ArcGrid or GeoTIFF)
 base_url <- 'https://nadp.slh.wisc.edu/filelib/maps/NTN/grids'
 
-# NADP uses these filename prefixes for concentration grids
+# NADP uses these filename prefixes for concentration/deposition grids
+# Note: H+ uses 'hplus' prefix and '_dep_' suffix; others use '_conc_'
 nadp_var_prefixes <- c(pH  = 'pH',
-                       H   = 'H',
+                       H   = 'hplus',
                        Ca  = 'Ca',
                        Mg  = 'Mg',
                        K   = 'K',
@@ -69,39 +70,58 @@ sheds_bbox <- st_bbox(sheds_wgs84)
 
 download_nadp_raster <- function(variable, year, dest_dir) {
 
+    prefix <- nadp_var_prefixes[variable]
+    suffixes <- c('_conc_', '_dep_')
+
     # Check if we already have an extracted raster for this variable/year
-    extracted_dir <- file.path(dest_dir, paste0(variable, '_conc_', year))
-    if(dir.exists(extracted_dir)) {
-        rast_file <- find_raster_in_dir(extracted_dir)
-        if(! is.null(rast_file)) {
-            message('Already extracted: ', variable, ' ', year)
-            return(rast_file)
+    for(sfx in suffixes) {
+        extracted_dir <- file.path(dest_dir, paste0(prefix, sfx, year))
+        if(dir.exists(extracted_dir)) {
+            rast_file <- find_raster_in_dir(extracted_dir)
+            if(! is.null(rast_file)) {
+                message('Already extracted: ', variable, ' ', year)
+                return(rast_file)
+            }
+        }
+        # Also check using the variable name (e.g. H_conc_)
+        extracted_dir2 <- file.path(dest_dir, paste0(variable, sfx, year))
+        if(extracted_dir2 != extracted_dir && dir.exists(extracted_dir2)) {
+            rast_file <- find_raster_in_dir(extracted_dir2)
+            if(! is.null(rast_file)) {
+                message('Already extracted: ', variable, ' ', year)
+                return(rast_file)
+            }
         }
     }
 
     # Also check for a .tif directly
-    tif_file <- file.path(dest_dir, paste0(variable, '_conc_', year, '.tif'))
-    if(file.exists(tif_file)) {
-        test <- try(rast(tif_file), silent = TRUE)
-        if(! inherits(test, 'try-error')) {
-            message('Already have: ', variable, ' ', year)
-            return(tif_file)
+    for(sfx in suffixes) {
+        for(pfx in c(prefix, variable)) {
+            tif_file <- file.path(dest_dir, paste0(pfx, sfx, year, '.tif'))
+            if(file.exists(tif_file)) {
+                test <- try(rast(tif_file), silent = TRUE)
+                if(! inherits(test, 'try-error')) {
+                    message('Already have: ', variable, ' ', year)
+                    return(tif_file)
+                }
+            }
         }
     }
 
-    prefix <- nadp_var_prefixes[variable]
-    zip_name <- paste0(prefix, '_conc_', year, '.zip')
-    zip_file <- file.path(dest_dir, zip_name)
+    zip_file <- file.path(dest_dir, paste0(prefix, '_', year, '.zip'))
 
-    # Try URL patterns (case variations in prefix)
+    # Try URL patterns: both _conc_ and _dep_ suffixes, case variations
     prefixes_to_try <- unique(c(prefix, tolower(prefix), toupper(prefix)))
     urls_to_try <- unlist(lapply(prefixes_to_try, function(p) {
-        zn <- paste0(p, '_conc_', year, '.zip')
-        c(
-            paste0(base_url, '/', year, '/', zn),
-            paste0(base_url, '/', year, '/', tolower(zn))
-        )
+        unlist(lapply(suffixes, function(sfx) {
+            zn <- paste0(p, sfx, year, '.zip')
+            c(
+                paste0(base_url, '/', year, '/', zn),
+                paste0(base_url, '/', year, '/', tolower(zn))
+            )
+        }))
     }))
+    urls_to_try <- unique(urls_to_try)
 
     max_retries <- 4
 
@@ -111,8 +131,9 @@ download_nadp_raster <- function(variable, year, dest_dir) {
                             timeout(120)), silent = TRUE)
 
             if(! inherits(resp, 'try-error') && status_code(resp) == 200) {
-                # Try to unzip
-                exdir <- file.path(dest_dir, paste0(variable, '_conc_', year))
+                # Try to unzip; use zip basename for directory name
+                zip_base <- tools::file_path_sans_ext(basename(url))
+                exdir <- file.path(dest_dir, zip_base)
                 unzipped <- try(unzip(zip_file, exdir = exdir), silent = TRUE)
 
                 if(! inherits(unzipped, 'try-error') && length(unzipped) > 0) {

From 05c3c7cbdccdd2f924f6d77a290a30008281e937 Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Tue, 24 Feb 2026 16:55:26 -0700
Subject: [PATCH 26/37] refactor: drop H+ deposition grid, estimate H+
 concentration from pH instead

Co-authored-by: aider (anthropic/claude-opus-4-6) <aider@aider.chat>
---
 summarize_nadp_for_watershed.R | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/summarize_nadp_for_watershed.R b/summarize_nadp_for_watershed.R
index 5eccc1e..25af2d9 100644
--- a/summarize_nadp_for_watershed.R
+++ b/summarize_nadp_for_watershed.R
@@ -24,8 +24,9 @@ dir.create(nadp_dir, showWarnings = FALSE)
 # Available from NADP's gridded data: https://nadp.slh.wisc.edu/maps-data/ntn-gradient-maps/
 
 # Ion grids are precipitation-weighted mean concentrations in mg/L
-# H is hydrogen ion concentration (mg/L), provided directly by NADP
-nadp_vars <- c('pH', 'H', 'Ca', 'Mg', 'K', 'Na', 'Cl', 'NO3', 'NH4', 'SO4')
+# H+ is only available as deposition (_dep_), not concentration, so we
+# estimate H+ concentration from pH: [H+] mg/L = 10^(-pH) * 1.008 * 1000
+nadp_vars <- c('pH', 'Ca', 'Mg', 'K', 'Na', 'Cl', 'NO3', 'NH4', 'SO4')
 years <- 1985:2024  # adjust range as needed
 
 # Equivalent weights (mg/meq) for converting mg/L to ueq/L
@@ -45,10 +46,8 @@ equiv_conductances <- c(equiv_conductances, H = 349.8)
 # Zips contain raster data (ArcGrid or GeoTIFF)
 base_url <- 'https://nadp.slh.wisc.edu/filelib/maps/NTN/grids'
 
-# NADP uses these filename prefixes for concentration/deposition grids
-# Note: H+ uses 'hplus' prefix and '_dep_' suffix; others use '_conc_'
+# NADP filename prefixes for concentration grids (_conc_ suffix only)
 nadp_var_prefixes <- c(pH  = 'pH',
-                       H   = 'hplus',
                        Ca  = 'Ca',
                        Mg  = 'Mg',
                        K   = 'K',
@@ -71,7 +70,7 @@ sheds_bbox <- st_bbox(sheds_wgs84)
 download_nadp_raster <- function(variable, year, dest_dir) {
 
     prefix <- nadp_var_prefixes[variable]
-    suffixes <- c('_conc_', '_dep_')
+    suffixes <- c('_conc_')
 
     # Check if we already have an extracted raster for this variable/year
     for(sfx in suffixes) {
@@ -345,7 +344,6 @@ nadp_summary <- nadp_summary %>%
             variable == 'NO3'  ~ 'Precipitation-weighted mean NO3 (mg/L)',
             variable == 'NH4'  ~ 'Precipitation-weighted mean NH4 (mg/L)',
             variable == 'SO4'  ~ 'Precipitation-weighted mean SO4 (mg/L)',
-            variable == 'H'    ~ 'Precipitation-weighted mean H+ (mg/L)',
             TRUE ~ variable
         )
     )
@@ -363,11 +361,12 @@ ion_cols <- intersect(all_ions, names(nadp_wide))
 
 if(length(ion_cols) > 0) {
 
-    # If H not available directly, estimate from pH
-    if(! 'H' %in% names(nadp_wide) && 'pH' %in% names(nadp_wide)) {
-        nadp_wide$H <- 10^(-nadp_wide[['pH']]) * 1.008  # mol/L -> mg/L
+    # Estimate H+ concentration from pH: 10^(-pH) gives mol/L,
+    # multiply by molecular weight (1.008) and 1000 to get mg/L
+    if('pH' %in% names(nadp_wide)) {
+        nadp_wide$H <- 10^(-nadp_wide[['pH']]) * 1.008 * 1000
         ion_cols <- intersect(all_ions, names(nadp_wide))
-        message('H+ estimated from pH (direct H grid not available).')
+        message('H+ concentration estimated from pH grid.')
     }
 
     # Compute estimated conductivity, tolerating some missing ions

From 51443662f00f4d23794568f530d33f33f482ff18 Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Tue, 24 Feb 2026 16:57:06 -0700
Subject: [PATCH 27/37] refactor: separate H+ constants from ion vectors and
 estimate from pH only in conductivity calc

Co-authored-by: aider (anthropic/claude-opus-4-6) <aider@aider.chat>
---
 summarize_nadp_for_watershed.R | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/summarize_nadp_for_watershed.R b/summarize_nadp_for_watershed.R
index 25af2d9..314fa85 100644
--- a/summarize_nadp_for_watershed.R
+++ b/summarize_nadp_for_watershed.R
@@ -37,9 +37,10 @@ equiv_weights <- c(Ca = 20.04, Mg = 12.15, K = 39.10, Na = 22.99,
 # Sources: CRC Handbook / Standard Methods
 equiv_conductances <- c(Ca = 59.5, Mg = 53.1, K = 73.5, Na = 50.1,
                         Cl = 76.3, NO3 = 71.4, NH4 = 73.5, SO4 = 80.0)
-# H+ equivalent weight and conductance
-equiv_weights <- c(equiv_weights, H = 1.008)
-equiv_conductances <- c(equiv_conductances, H = 349.8)
+# H+ equivalent weight and conductance (used in conductivity estimation;
+# H+ concentration is estimated from pH, not downloaded separately)
+H_equiv_weight <- 1.008
+H_equiv_conductance <- 349.8
 
 # Base URL pattern for NADP NTN concentration grids (annual)
 # URL pattern: https://nadp.slh.wisc.edu/filelib/maps/NTN/grids/{year}/{var}_conc_{year}.zip
@@ -356,19 +357,10 @@ nadp_wide <- nadp_summary %>%
 
 # ---- estimate conductivity from ions and pH ----
 
-all_ions <- names(equiv_weights)  # includes H
-ion_cols <- intersect(all_ions, names(nadp_wide))
+ion_cols <- intersect(names(equiv_weights), names(nadp_wide))
 
 if(length(ion_cols) > 0) {
 
-    # Estimate H+ concentration from pH: 10^(-pH) gives mol/L,
-    # multiply by molecular weight (1.008) and 1000 to get mg/L
-    if('pH' %in% names(nadp_wide)) {
-        nadp_wide$H <- 10^(-nadp_wide[['pH']]) * 1.008 * 1000
-        ion_cols <- intersect(all_ions, names(nadp_wide))
-        message('H+ concentration estimated from pH grid.')
-    }
-
     # Compute estimated conductivity, tolerating some missing ions
     # Each ion contributes independently; NA ions are skipped per row
     n_rows <- nrow(nadp_wide)
@@ -384,8 +376,19 @@ if(length(ion_cols) > 0) {
         n_ions_available[has_val] <- n_ions_available[has_val] + 1L
     }
 
-    # Set to NA if fewer than 7 of 9 ions are available (too incomplete)
-    max_possible <- length(ion_cols)
+    # Add H+ contribution estimated from pH
+    if('pH' %in% names(nadp_wide)) {
+        H_mgl <- 10^(-nadp_wide[['pH']]) * H_equiv_weight * 1000
+        H_ueql <- H_mgl / H_equiv_weight * 1000
+        H_contribution <- H_ueql * H_equiv_conductance / 1000
+        has_H <- !is.na(H_contribution)
+        cond[has_H] <- cond[has_H] + H_contribution[has_H]
+        n_ions_available[has_H] <- n_ions_available[has_H] + 1L
+        message('H+ concentration estimated from pH for conductivity calculation.')
+    }
+
+    # Total possible ions = 8 measured + H from pH = 9
+    max_possible <- length(ion_cols) + as.integer('pH' %in% names(nadp_wide))
     min_required <- max(max_possible - 2, 1)
     cond[n_ions_available < min_required] <- NA_real_
 

From b375e809ebb2df8459948ea648ada543a8ac02ff Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Sat, 28 Feb 2026 12:02:00 -0700
Subject: [PATCH 28/37] docs: clarify equivalent weight comments, add units
 column, and simplify H+ conductivity calculation

Co-authored-by: aider (anthropic/claude-opus-4-6) <aider@aider.chat>
---
 summarize_nadp_for_watershed.R | 43 ++++++++++++++++++++--------------
 1 file changed, 26 insertions(+), 17 deletions(-)

diff --git a/summarize_nadp_for_watershed.R b/summarize_nadp_for_watershed.R
index 314fa85..e18d195 100644
--- a/summarize_nadp_for_watershed.R
+++ b/summarize_nadp_for_watershed.R
@@ -29,7 +29,8 @@ dir.create(nadp_dir, showWarnings = FALSE)
 nadp_vars <- c('pH', 'Ca', 'Mg', 'K', 'Na', 'Cl', 'NO3', 'NH4', 'SO4')
 years <- 1985:2024  # adjust range as needed
 
-# Equivalent weights (mg/meq) for converting mg/L to ueq/L
+# Equivalent weights (mg/meq = g/eq = molecular weight / ionic charge)
+# Used to convert mg/L -> meq/L (divide), or mg/L -> ueq/L (divide, then * 1000)
 equiv_weights <- c(Ca = 20.04, Mg = 12.15, K = 39.10, Na = 22.99,
                    Cl = 35.45, NO3 = 62.00, NH4 = 18.04, SO4 = 48.03)
 
@@ -332,20 +333,26 @@ for(i in seq_len(nrow(download_log))) {
 
 nadp_summary <- bind_rows(results)
 
-# Add descriptive labels
+# Add descriptive labels and units
 nadp_summary <- nadp_summary %>%
     mutate(
         variable_description = case_when(
             variable == 'pH'   ~ 'Precipitation-weighted mean pH',
-            variable == 'Ca'   ~ 'Precipitation-weighted mean Ca (mg/L)',
-            variable == 'Mg'   ~ 'Precipitation-weighted mean Mg (mg/L)',
-            variable == 'K'    ~ 'Precipitation-weighted mean K (mg/L)',
-            variable == 'Na'   ~ 'Precipitation-weighted mean Na (mg/L)',
-            variable == 'Cl'   ~ 'Precipitation-weighted mean Cl (mg/L)',
-            variable == 'NO3'  ~ 'Precipitation-weighted mean NO3 (mg/L)',
-            variable == 'NH4'  ~ 'Precipitation-weighted mean NH4 (mg/L)',
-            variable == 'SO4'  ~ 'Precipitation-weighted mean SO4 (mg/L)',
+            variable == 'Ca'   ~ 'Precipitation-weighted mean Ca concentration',
+            variable == 'Mg'   ~ 'Precipitation-weighted mean Mg concentration',
+            variable == 'K'    ~ 'Precipitation-weighted mean K concentration',
+            variable == 'Na'   ~ 'Precipitation-weighted mean Na concentration',
+            variable == 'Cl'   ~ 'Precipitation-weighted mean Cl concentration',
+            variable == 'NO3'  ~ 'Precipitation-weighted mean NO3 concentration',
+            variable == 'NH4'  ~ 'Precipitation-weighted mean NH4 concentration',
+            variable == 'SO4'  ~ 'Precipitation-weighted mean SO4 concentration',
             TRUE ~ variable
+        ),
+        units = case_when(
+            variable == 'pH'   ~ 'unitless',
+            variable %in% c('Ca', 'Mg', 'K', 'Na', 'Cl', 'NO3', 'NH4', 'SO4') ~ 'mg/L',
+            variable == 'Cond_est' ~ 'uS/cm',
+            TRUE ~ NA_character_
         )
     )
 
@@ -368,19 +375,20 @@ if(length(ion_cols) > 0) {
     n_ions_available <- rep(0L, n_rows)
 
     for(ion in ion_cols) {
-        conc_mgl <- nadp_wide[[ion]]
-        conc_ueql <- conc_mgl / equiv_weights[ion] * 1000
-        contribution <- conc_ueql * equiv_conductances[ion] / 1000
+        conc_mgl <- nadp_wide[[ion]]                        # mg/L (from NADP grid)
+        conc_ueql <- conc_mgl / equiv_weights[ion] * 1000   # mg/L -> ueq/L
+        contribution <- conc_ueql * equiv_conductances[ion] / 1000  # ueq/L * (uS/cm)/(ueq/L) / 1000 -> uS/cm
         has_val <- !is.na(contribution)
         cond[has_val] <- cond[has_val] + contribution[has_val]
         n_ions_available[has_val] <- n_ions_available[has_val] + 1L
     }
 
     # Add H+ contribution estimated from pH
+    # pH = -log10([H+] in mol/L), so [H+] = 10^(-pH) mol/L
+    # Convert mol/L -> ueq/L: for H+ (charge=1), 1 mol/L = 1e6 ueq/L
     if('pH' %in% names(nadp_wide)) {
-        H_mgl <- 10^(-nadp_wide[['pH']]) * H_equiv_weight * 1000
-        H_ueql <- H_mgl / H_equiv_weight * 1000
-        H_contribution <- H_ueql * H_equiv_conductance / 1000
+        H_ueql <- 10^(-nadp_wide[['pH']]) * 1e6             # mol/L -> ueq/L
+        H_contribution <- H_ueql * H_equiv_conductance / 1000  # -> uS/cm
         has_H <- !is.na(H_contribution)
         cond[has_H] <- cond[has_H] + H_contribution[has_H]
         n_ions_available[has_H] <- n_ions_available[has_H] + 1L
@@ -405,7 +413,8 @@ if(length(ion_cols) > 0) {
     cond_long <- nadp_wide %>%
         select(site_code, year, Cond_est, Cond_est_n_ions) %>%
         mutate(variable = 'Cond_est',
-               variable_description = 'Estimated conductivity from ions (uS/cm)') %>%
+               variable_description = 'Estimated conductivity from ions and pH',
+               units = 'uS/cm') %>%
         rename(value = Cond_est)
 
     nadp_summary <- bind_rows(nadp_summary, select(cond_long, -Cond_est_n_ions))

From d48646c960b1e0ddb299b7c38c93c97d758f90fb Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Sat, 28 Feb 2026 12:10:40 -0700
Subject: [PATCH 29/37] chore: add commented-out debug code for macrosheds
 product loading

---
 summarize_nadp_for_watershed.R | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/summarize_nadp_for_watershed.R b/summarize_nadp_for_watershed.R
index e18d195..a058dbe 100644
--- a/summarize_nadp_for_watershed.R
+++ b/summarize_nadp_for_watershed.R
@@ -448,3 +448,11 @@ nadp_summary %>%
     ) %>%
     print(n = Inf)
 
+# aa <- ms_load_product(
+#   macrosheds_root = '~/ssd2/macrosheds_stuff/ms_test/',
+#   prodname = 'ws_attr_timeseries:climate',
+#   site_codes = sites
+# )
+# unique(aa$var)
+# filter(aa, var == 'Na_flux_mean', site_code == 'ALBION')
+# macrosheds::ms_vars_ws
\ No newline at end of file

From b04d3a36349172bab2ffdf8f3ef63890282aa419 Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Sat, 28 Feb 2026 12:10:42 -0700
Subject: [PATCH 30/37] feat: add fallback URL pattern for NADP files using
 {var}_{year} naming convention

Co-authored-by: aider (anthropic/claude-opus-4-6) <aider@aider.chat>
---
 summarize_nadp_for_watershed.R | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/summarize_nadp_for_watershed.R b/summarize_nadp_for_watershed.R
index a058dbe..72544c1 100644
--- a/summarize_nadp_for_watershed.R
+++ b/summarize_nadp_for_watershed.R
@@ -72,7 +72,7 @@ sheds_bbox <- st_bbox(sheds_wgs84)
 download_nadp_raster <- function(variable, year, dest_dir) {
 
     prefix <- nadp_var_prefixes[variable]
-    suffixes <- c('_conc_')
+    suffixes <- c('_conc_', '_')
 
     # Check if we already have an extracted raster for this variable/year
     for(sfx in suffixes) {
@@ -111,7 +111,8 @@ download_nadp_raster <- function(variable, year, dest_dir) {
 
     zip_file <- file.path(dest_dir, paste0(prefix, '_', year, '.zip'))
 
-    # Try URL patterns: both _conc_ and _dep_ suffixes, case variations
+    # Try URL patterns: _conc_ and bare _ suffixes, case variations
+    # e.g. pH_conc_2000.zip (older) vs pH_2023.zip (newer)
     prefixes_to_try <- unique(c(prefix, tolower(prefix), toupper(prefix)))
     urls_to_try <- unlist(lapply(prefixes_to_try, function(p) {
         unlist(lapply(suffixes, function(sfx) {
@@ -258,9 +259,9 @@ if(length(manual_tifs) > 0) {
 if(length(manual_dirs) > 0) {
     for(d in manual_dirs) {
         dname <- basename(d)
-        # Parse variable_conc_year pattern
+        # Parse variable_conc_year or variable_year pattern
         yr <- as.integer(sub('.*_', '', dname))
-        vr <- sub('_conc_[0-9]{4}$', '', dname)
+        vr <- sub('_(conc_)?[0-9]{4}$', '', dname)
         if(! is.na(yr) && vr %in% nadp_vars) {
             rast_file <- find_raster_in_dir(d)
             if(! is.null(rast_file)) {
@@ -455,4 +456,4 @@ nadp_summary %>%
 # )
 # unique(aa$var)
 # filter(aa, var == 'Na_flux_mean', site_code == 'ALBION')
-# macrosheds::ms_vars_ws
\ No newline at end of file
+# macrosheds::ms_vars_ws

From 040f71fa5d0e040e64a6f9b94e3de93ca8d9f76a Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Sat, 28 Feb 2026 12:34:39 -0700
Subject: [PATCH 31/37] fix: ensure single raster layer is used during NADP
 extraction

Co-authored-by: aider (anthropic/claude-opus-4-6) <aider@aider.chat>
---
 summarize_nadp_for_watershed.R | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/summarize_nadp_for_watershed.R b/summarize_nadp_for_watershed.R
index 72544c1..86645aa 100644
--- a/summarize_nadp_for_watershed.R
+++ b/summarize_nadp_for_watershed.R
@@ -303,6 +303,12 @@ for(i in seq_len(nrow(download_log))) {
         next
     }
 
+    # Ensure single layer (some ArcGrids may load multiple bands)
+    if(nlyr(r) > 1) {
+        message('  Multiple layers detected, using first layer only')
+        r <- r[[1]]
+    }
+
     # Reproject watersheds to match raster CRS
     sheds_reproj <- st_transform(sheds_wgs84, crs(r))
 

From 6b5e1647fb7ba41848aaa7f1ec2003caa5480b43 Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Sat, 28 Feb 2026 13:27:10 -0700
Subject: [PATCH 32/37] feat: add verification of NADP flux calculations
 against macrosheds data for ALBION site

---
 summarize_nadp_for_watershed.R | 36 ++++++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/summarize_nadp_for_watershed.R b/summarize_nadp_for_watershed.R
index 86645aa..fa7835d 100644
--- a/summarize_nadp_for_watershed.R
+++ b/summarize_nadp_for_watershed.R
@@ -455,11 +455,31 @@ nadp_summary %>%
     ) %>%
     print(n = Inf)
 
-# aa <- ms_load_product(
-#   macrosheds_root = '~/ssd2/macrosheds_stuff/ms_test/',
-#   prodname = 'ws_attr_timeseries:climate',
-#   site_codes = sites
-# )
-# unique(aa$var)
-# filter(aa, var == 'Na_flux_mean', site_code == 'ALBION')
-# macrosheds::ms_vars_ws
+## verify fluxes match
+library(lubridate)
+
+clim <- ms_load_product(
+  macrosheds_root = '~/ssd2/macrosheds_stuff/ms_test/',
+  prodname = 'ws_attr_timeseries:climate',
+  site_codes = 'ALBION',
+  warn = FALSE
+)
+p <- ms_load_product(
+  macrosheds_root = '~/ssd2/macrosheds_stuff/ms_test/',
+  prodname = 'precipitation',
+  site_codes = 'ALBION',
+  warn = FALSE
+)
+albion_na_flux_1985 <- filter(clim, var == 'Na_flux_mean') %>% slice(1)
+# filter(clim, var == 'precip_median', year == 1985) %>% summarize(val = sum(val))
+albion_p_1985 <- filter(p, year(date) == 1985) %>% summarize(val = sum(val))
+# albion_ha <- ms_load_sites() %>% filter(site_code == 'ALBION') %>% pull(ws_area_ha)
+
+albion_na_conc_1985_claude = filter(nadp_summary, variable == 'Na', site_code == 'ALBION', year == 1985)
+#mg/L                             kg/ha                     mg     mm       m^2    mm             L
+albion_na_conc_1985_macrosheds = (albion_na_flux_1985$val * 10e6 * 10e3) / (10e4 * albion_p_1985 * 1000)
+
+# library(terra)
+# r <- rast("/home/mike/git/macrosheds/data_acquisition/data/spatial/ndap/1985/dep_na_1985.tif")
+# dd = feather::read_feather('/home/mike/git/macrosheds/data_acquisition/data/lter/niwot/ws_traits/nadp/sum_ALBION.feather')
+# dd %>% filter(year == 1985, var == 'ch_annual_Na_flux_mean')

From 4b3a511f2d6d6fb6e876ff03ce461d2b2a510003 Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Sat, 28 Feb 2026 13:27:13 -0700
Subject: [PATCH 33/37] feat: add NADP Na flux grid verification and fix unit
 conversion (10eN -> 1eN)

Co-authored-by: aider (anthropic/claude-opus-4-6) <aider@aider.chat>
---
 summarize_nadp_for_watershed.R | 60 ++++++++++++++++++++++++++++++++--
 1 file changed, 58 insertions(+), 2 deletions(-)

diff --git a/summarize_nadp_for_watershed.R b/summarize_nadp_for_watershed.R
index fa7835d..b687fb4 100644
--- a/summarize_nadp_for_watershed.R
+++ b/summarize_nadp_for_watershed.R
@@ -455,6 +455,51 @@ nadp_summary %>%
     ) %>%
     print(n = Inf)
 
+## ---- verify: compare Na flux grid to concentration grid × precip ----
+
+# Download 1985 Na deposition grid for comparison
+na_dep_url <- 'https://nadp.slh.wisc.edu/filelib/maps/NTN/grids/1985/Na_dep_1985.zip'
+na_dep_zip <- file.path(nadp_dir, 'Na_dep_1985.zip')
+na_dep_dir <- file.path(nadp_dir, 'Na_dep_1985')
+
+if(! dir.exists(na_dep_dir)) {
+    resp <- GET(na_dep_url, write_disk(na_dep_zip, overwrite = TRUE), timeout(120))
+    if(status_code(resp) == 200) {
+        unzip(na_dep_zip, exdir = na_dep_dir)
+        file.remove(na_dep_zip)
+        message('Downloaded Na deposition grid for 1985')
+    } else {
+        stop('Failed to download Na deposition grid')
+    }
+}
+
+na_dep_rast_path <- find_raster_in_dir(na_dep_dir)
+na_dep_r <- rast(na_dep_rast_path)
+if(nlyr(na_dep_r) > 1) na_dep_r <- na_dep_r[[1]]
+
+albion_shed <- sheds_wgs84 %>% filter(site_code == 'ALBION')
+albion_reproj <- st_transform(albion_shed, crs(na_dep_r))
+
+albion_ext <- ext(vect(albion_reproj)) + 0.5
+na_dep_crop <- crop(na_dep_r, albion_ext)
+
+if(use_exactextractr) {
+    na_dep_val <- exact_extract(na_dep_crop, albion_reproj, fun = 'mean')
+} else {
+    na_dep_ex <- terra::extract(na_dep_crop, vect(albion_reproj), fun = mean,
+                                na.rm = TRUE, weights = TRUE)
+    na_dep_val <- na_dep_ex[, 2]
+}
+
+message('\n---- Na flux verification (ALBION, 1985) ----')
+message('Na deposition from NADP _dep_ grid (kg/ha): ', round(na_dep_val, 4))
+
+# Get Na concentration from our extraction
+albion_na_conc_1985 <- nadp_wide %>%
+    filter(site_code == 'ALBION', year == 1985) %>%
+    pull(Na)
+message('Na concentration from NADP _conc_ grid (mg/L): ', round(albion_na_conc_1985, 4))
+
 ## verify fluxes match
 library(lubridate)
 
@@ -476,8 +521,19 @@ albion_p_1985 <- filter(p, year(date) == 1985) %>% summarize(val = sum(val))
 # albion_ha <- ms_load_sites() %>% filter(site_code == 'ALBION') %>% pull(ws_area_ha)
 
 albion_na_conc_1985_claude = filter(nadp_summary, variable == 'Na', site_code == 'ALBION', year == 1985)
-#mg/L                             kg/ha                     mg     mm       m^2    mm             L
-albion_na_conc_1985_macrosheds = (albion_na_flux_1985$val * 10e6 * 10e3) / (10e4 * albion_p_1985 * 1000)
+
+# Convert flux (kg/ha) to concentration (mg/L) using precip depth (mm)
+# kg/ha -> mg/ha: multiply by 1e6
+# mm precip -> L/ha: multiply by 1e4 (1 mm on 1 ha = 10,000 L)
+# concentration = (flux_kg_ha * 1e6) / (precip_mm * 1e4)
+albion_na_conc_1985_macrosheds = (albion_na_flux_1985$val * 1e6) / (albion_p_1985$val * 1e4)
+
+# Also compute from the NADP dep grid directly
+albion_na_conc_1985_from_dep = (na_dep_val * 1e6) / (albion_p_1985$val * 1e4)
+
+message('Na conc from macrosheds flux / precip (mg/L): ', round(albion_na_conc_1985_macrosheds, 4))
+message('Na conc from NADP dep grid / precip (mg/L):   ', round(albion_na_conc_1985_from_dep, 4))
+message('Na conc from NADP conc grid directly (mg/L):  ', round(albion_na_conc_1985_claude$value, 4))
 
 # library(terra)
 # r <- rast("/home/mike/git/macrosheds/data_acquisition/data/spatial/ndap/1985/dep_na_1985.tif")

From 496a90c473e3447f7a09e86ee1d156d6639c7b2d Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Sat, 28 Feb 2026 13:35:43 -0700
Subject: [PATCH 34/37] feat: add PRISM annual precip check to cross-validate
 macrosheds precipitation totals

Co-authored-by: aider (anthropic/claude-opus-4-6) <aider@aider.chat>
---
 summarize_nadp_for_watershed.R | 43 ++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/summarize_nadp_for_watershed.R b/summarize_nadp_for_watershed.R
index b687fb4..95c5560 100644
--- a/summarize_nadp_for_watershed.R
+++ b/summarize_nadp_for_watershed.R
@@ -534,6 +534,49 @@ albion_na_conc_1985_from_dep = (na_dep_val * 1e6) / (albion_p_1985$val * 1e4)
 message('Na conc from macrosheds flux / precip (mg/L): ', round(albion_na_conc_1985_macrosheds, 4))
 message('Na conc from NADP dep grid / precip (mg/L):   ', round(albion_na_conc_1985_from_dep, 4))
 message('Na conc from NADP conc grid directly (mg/L):  ', round(albion_na_conc_1985_claude$value, 4))
+message('Precip from macrosheds (mm):                   ', round(albion_p_1985$val, 1))
+
+# ---- independent precip check: PRISM annual total ----
+
+if(requireNamespace('prism', quietly = TRUE)) {
+    library(prism)
+
+    prism_dir <- file.path(nadp_dir, 'prism')
+    dir.create(prism_dir, showWarnings = FALSE)
+    prism_set_dl_dir(prism_dir)
+
+    # Download 1985 annual precip (4km resolution)
+    get_prism_annual(type = 'ppt', years = 1985, keepZip = FALSE)
+
+    # Find the downloaded raster
+    prism_files <- prism_archive_ls()
+    ppt_file <- prism_files[grep('ppt.*1985', prism_files)]
+    ppt_path <- pd_to_file(ppt_file)
+
+    ppt_r <- rast(ppt_path)
+    albion_ppt_reproj <- st_transform(albion_shed, crs(ppt_r))
+
+    if(use_exactextractr) {
+        prism_ppt_val <- exact_extract(ppt_r, albion_ppt_reproj, fun = 'mean')
+    } else {
+        prism_ppt_ex <- terra::extract(ppt_r, vect(albion_ppt_reproj), fun = mean,
+                                       na.rm = TRUE, weights = TRUE)
+        prism_ppt_val <- prism_ppt_ex[, 2]
+    }
+
+    message('Precip from PRISM annual grid (mm):            ', round(prism_ppt_val, 1))
+
+    # Recompute Na concentration using PRISM precip
+    albion_na_conc_1985_prism_ms <- (albion_na_flux_1985$val * 1e6) / (prism_ppt_val * 1e4)
+    albion_na_conc_1985_prism_dep <- (na_dep_val * 1e6) / (prism_ppt_val * 1e4)
+
+    message('Na conc from macrosheds flux / PRISM precip (mg/L): ', round(albion_na_conc_1985_prism_ms, 4))
+    message('Na conc from NADP dep grid / PRISM precip (mg/L):   ', round(albion_na_conc_1985_prism_dep, 4))
+
+} else {
+    message('Install the prism package for independent precip verification:')
+    message('  install.packages("prism")')
+}
 
 # library(terra)
 # r <- rast("/home/mike/git/macrosheds/data_acquisition/data/spatial/ndap/1985/dep_na_1985.tif")

From 2c9bbf5450cc6301aa01b0af9d962daf5ed47303 Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Sat, 28 Feb 2026 13:42:38 -0700
Subject: [PATCH 35/37] feat: add detailed diagnostics for NADP Na
 concentration grid unit mismatch investigation

Co-authored-by: aider (anthropic/claude-opus-4-6) <aider@aider.chat>
---
 summarize_nadp_for_watershed.R | 65 ++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)

diff --git a/summarize_nadp_for_watershed.R b/summarize_nadp_for_watershed.R
index 95c5560..b663b24 100644
--- a/summarize_nadp_for_watershed.R
+++ b/summarize_nadp_for_watershed.R
@@ -500,6 +500,71 @@ albion_na_conc_1985 <- nadp_wide %>%
     pull(Na)
 message('Na concentration from NADP _conc_ grid (mg/L): ', round(albion_na_conc_1985, 4))
 
+# ---- detailed diagnostics for the concentration grid ----
+na_conc_1985_path <- download_log %>%
+    filter(variable == 'Na', year == 1985) %>%
+    pull(filepath)
+
+if(length(na_conc_1985_path) > 0 && !is.na(na_conc_1985_path)) {
+    na_conc_r <- rast(na_conc_1985_path)
+    if(nlyr(na_conc_r) > 1) na_conc_r <- na_conc_r[[1]]
+
+    message('\n---- Na conc grid diagnostics ----')
+    message('Raster file: ', na_conc_1985_path)
+    message('CRS: ', crs(na_conc_r, describe = TRUE)$name)
+    message('Resolution: ', paste(res(na_conc_r), collapse = ' x '))
+    message('Extent: ', paste(round(as.vector(ext(na_conc_r)), 4), collapse = ', '))
+    message('Global value range: min=', round(global(na_conc_r, 'min', na.rm=TRUE)[[1]], 4),
+            ' max=', round(global(na_conc_r, 'max', na.rm=TRUE)[[1]], 4),
+            ' mean=', round(global(na_conc_r, 'mean', na.rm=TRUE)[[1]], 4))
+
+    # Extract at ALBION centroid (point extraction, no area averaging)
+    albion_centroid <- st_centroid(albion_shed)
+    albion_pt_reproj <- st_transform(albion_centroid, crs(na_conc_r))
+    pt_val <- terra::extract(na_conc_r, vect(albion_pt_reproj))
+    message('Point extraction at ALBION centroid: ', pt_val[1, 2])
+
+    # Also extract from dep grid at same point for comparison
+    albion_pt_dep <- st_transform(albion_centroid, crs(na_dep_r))
+    pt_dep_val <- terra::extract(na_dep_r, vect(albion_pt_dep))
+    message('Na dep point extraction at ALBION centroid (kg/ha): ', pt_dep_val[1, 2])
+
+    # Check if conc grid might be in different units
+    # If truly mg/L, Na in precip should be ~0.05-0.5 mg/L for most of CONUS
+    # If the grid mean is >> 1, units are likely NOT mg/L
+    grid_mean <- global(na_conc_r, 'mean', na.rm = TRUE)[[1]]
+    if(grid_mean > 5) {
+        message('\nWARNING: Grid mean (', round(grid_mean, 2),
+                ') is very high for Na concentration in mg/L.')
+        message('The grid may use different units. Check NADP metadata.')
+        message('Possible interpretations:')
+        message('  If ug/L: divide by 1000 -> ', round(albion_na_conc_1985 / 1000, 4), ' mg/L')
+        message('  If ueq/L: multiply by equiv_weight/1000 -> ',
+                round(albion_na_conc_1985 * 22.99 / 1000, 4), ' mg/L')
+        message('  If 100*mg/L (scaled int): divide by 100 -> ',
+                round(albion_na_conc_1985 / 100, 4), ' mg/L')
+
+        # Back-calculate expected conc from dep grid and precip
+        message('\nExpected Na conc from dep/precip: ~0.1 mg/L')
+        message('Ratio of grid value to expected: ',
+                round(albion_na_conc_1985 / 0.108, 1), 'x')
+
+        # Check if ratio is close to a known conversion factor
+        ratio <- albion_na_conc_1985 / 0.108
+        message('If ratio ~ 1000: grid is in ug/L')
+        message('If ratio ~ ', round(1000/22.99, 1), ': grid is in ueq/L')
+        message('Actual ratio: ', round(ratio, 1))
+    }
+
+    # Compare CRS of conc vs dep grids
+    message('\nNa conc grid CRS: ', crs(na_conc_r, proj = TRUE))
+    message('Na dep grid CRS:  ', crs(na_dep_r, proj = TRUE))
+
+    # Check if the raster might have a scale/offset factor
+    message('Raster scale factor: ', na_conc_r@ptr$source.get_scale())
+    message('Raster offset: ', na_conc_r@ptr$source.get_offset())
+}
+
 ## verify fluxes match
 library(lubridate)
 

From 4b56e89d03156a4f7d1b82ff7be641c6b9698044 Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Sat, 28 Feb 2026 13:58:45 -0700
Subject: [PATCH 36/37] fix: convert Na, K, Mg from ug/L to mg/L and remove
 verification code

Co-authored-by: aider (anthropic/claude-opus-4-6) <aider@aider.chat>
---
 summarize_nadp_for_watershed.R | 207 +++------------------------------
 1 file changed, 13 insertions(+), 194 deletions(-)

diff --git a/summarize_nadp_for_watershed.R b/summarize_nadp_for_watershed.R
index b663b24..846e381 100644
--- a/summarize_nadp_for_watershed.R
+++ b/summarize_nadp_for_watershed.R
@@ -23,7 +23,9 @@ dir.create(nadp_dir, showWarnings = FALSE)
 # Conductivity is not provided directly, but can be estimated from ions.
 # Available from NADP's gridded data: https://nadp.slh.wisc.edu/maps-data/ntn-gradient-maps/
 
-# Ion grids are precipitation-weighted mean concentrations in mg/L
+# Ion grids are precipitation-weighted mean concentrations.
+# IMPORTANT: NADP reports most ions in mg/L, but Na, K, and Mg grids are
+# in ug/L. We convert these to mg/L after extraction.
 # H+ is only available as deposition (_dep_), not concentration, so we
 # estimate H+ concentration from pH: [H+] mg/L = 10^(-pH) * 1.008 * 1000
 nadp_vars <- c('pH', 'Ca', 'Mg', 'K', 'Na', 'Cl', 'NO3', 'NH4', 'SO4')
@@ -34,6 +36,9 @@ years <- 1985:2024  # adjust range as needed
 equiv_weights <- c(Ca = 20.04, Mg = 12.15, K = 39.10, Na = 22.99,
                    Cl = 35.45, NO3 = 62.00, NH4 = 18.04, SO4 = 48.03)
 
+# Variables whose NADP grids are in ug/L rather than mg/L
+vars_in_ugl <- c('Na', 'K', 'Mg')
+
 # Limiting equivalent conductances at 25C (uS/cm per ueq/L)
 # Sources: CRC Handbook / Standard Methods
 equiv_conductances <- c(Ca = 59.5, Mg = 53.1, K = 73.5, Na = 50.1,
@@ -326,6 +331,12 @@ for(i in seq_len(nrow(download_log))) {
         vals <- ex[, 2]
     }
 
+    # NADP reports Na, K, Mg in ug/L; convert to mg/L
+    if(row$variable %in% vars_in_ugl) {
+        vals <- vals / 1000
+        message('  Converted ', row$variable, ' from ug/L to mg/L')
+    }
+
     res <- tibble(
         site_code = sheds_wgs84$site_code,
         variable = row$variable,
@@ -357,7 +368,7 @@ nadp_summary <- nadp_summary %>%
         ),
         units = case_when(
             variable == 'pH'   ~ 'unitless',
-            variable %in% c('Ca', 'Mg', 'K', 'Na', 'Cl', 'NO3', 'NH4', 'SO4') ~ 'mg/L',
+            variable %in% c('Ca', 'Mg', 'K', 'Na', 'Cl', 'NO3', 'NH4', 'SO4') ~ 'mg/L (converted from ug/L for Na, K, Mg)',
             variable == 'Cond_est' ~ 'uS/cm',
             TRUE ~ NA_character_
         )
@@ -455,195 +466,3 @@ nadp_summary %>%
     ) %>%
     print(n = Inf)
 
-## ---- verify: compare Na flux grid to concentration grid × precip ----
-
-# Download 1985 Na deposition grid for comparison
-na_dep_url <- 'https://nadp.slh.wisc.edu/filelib/maps/NTN/grids/1985/Na_dep_1985.zip'
-na_dep_zip <- file.path(nadp_dir, 'Na_dep_1985.zip')
-na_dep_dir <- file.path(nadp_dir, 'Na_dep_1985')
-
-if(! dir.exists(na_dep_dir)) {
-    resp <- GET(na_dep_url, write_disk(na_dep_zip, overwrite = TRUE), timeout(120))
-    if(status_code(resp) == 200) {
-        unzip(na_dep_zip, exdir = na_dep_dir)
-        file.remove(na_dep_zip)
-        message('Downloaded Na deposition grid for 1985')
-    } else {
-        stop('Failed to download Na deposition grid')
-    }
-}
-
-na_dep_rast_path <- find_raster_in_dir(na_dep_dir)
-na_dep_r <- rast(na_dep_rast_path)
-if(nlyr(na_dep_r) > 1) na_dep_r <- na_dep_r[[1]]
-
-albion_shed <- sheds_wgs84 %>% filter(site_code == 'ALBION')
-albion_reproj <- st_transform(albion_shed, crs(na_dep_r))
-
-albion_ext <- ext(vect(albion_reproj)) + 0.5
-na_dep_crop <- crop(na_dep_r, albion_ext)
-
-if(use_exactextractr) {
-    na_dep_val <- exact_extract(na_dep_crop, albion_reproj, fun = 'mean')
-} else {
-    na_dep_ex <- terra::extract(na_dep_crop, vect(albion_reproj), fun = mean,
-                                na.rm = TRUE, weights = TRUE)
-    na_dep_val <- na_dep_ex[, 2]
-}
-
-message('\n---- Na flux verification (ALBION, 1985) ----')
-message('Na deposition from NADP _dep_ grid (kg/ha): ', round(na_dep_val, 4))
-
-# Get Na concentration from our extraction
-albion_na_conc_1985 <- nadp_wide %>%
-    filter(site_code == 'ALBION', year == 1985) %>%
-    pull(Na)
-message('Na concentration from NADP _conc_ grid (mg/L): ', round(albion_na_conc_1985, 4))
-
-# ---- detailed diagnostics for the concentration grid ----
-na_conc_1985_path <- download_log %>%
-    filter(variable == 'Na', year == 1985) %>%
-    pull(filepath)
-
-if(length(na_conc_1985_path) > 0 && !is.na(na_conc_1985_path)) {
-    na_conc_r <- rast(na_conc_1985_path)
-    if(nlyr(na_conc_r) > 1) na_conc_r <- na_conc_r[[1]]
-
-    message('\n---- Na conc grid diagnostics ----')
-    message('Raster file: ', na_conc_1985_path)
-    message('CRS: ', crs(na_conc_r, describe = TRUE)$name)
-    message('Resolution: ', paste(res(na_conc_r), collapse = ' x '))
-    message('Extent: ', paste(round(as.vector(ext(na_conc_r)), 4), collapse = ', '))
-    message('Global value range: min=', round(global(na_conc_r, 'min', na.rm=TRUE)[[1]], 4),
-            ' max=', round(global(na_conc_r, 'max', na.rm=TRUE)[[1]], 4),
-            ' mean=', round(global(na_conc_r, 'mean', na.rm=TRUE)[[1]], 4))
-
-    # Extract at ALBION centroid (point extraction, no area averaging)
-    albion_centroid <- st_centroid(albion_shed)
-    albion_pt_reproj <- st_transform(albion_centroid, crs(na_conc_r))
-    pt_val <- terra::extract(na_conc_r, vect(albion_pt_reproj))
-    message('Point extraction at ALBION centroid: ', pt_val[1, 2])
-
-    # Also extract from dep grid at same point for comparison
-    albion_pt_dep <- st_transform(albion_centroid, crs(na_dep_r))
-    pt_dep_val <- terra::extract(na_dep_r, vect(albion_pt_dep))
-    message('Na dep point extraction at ALBION centroid (kg/ha): ', pt_dep_val[1, 2])
-
-    # Check if conc grid might be in different units
-    # If truly mg/L, Na in precip should be ~0.05-0.5 mg/L for most of CONUS
-    # If the grid mean is >> 1, units are likely NOT mg/L
-    grid_mean <- global(na_conc_r, 'mean', na.rm = TRUE)[[1]]
-    if(grid_mean > 5) {
-        message('\nWARNING: Grid mean (', round(grid_mean, 2),
-                ') is very high for Na concentration in mg/L.')
-        message('The grid may use different units. Check NADP metadata.')
-        message('Possible interpretations:')
-        message('  If ug/L: divide by 1000 -> ', round(albion_na_conc_1985 / 1000, 4), ' mg/L')
-        message('  If ueq/L: multiply by equiv_weight/1000 -> ',
-                round(albion_na_conc_1985 * 22.99 / 1000, 4), ' mg/L')
-        message('  If 100*mg/L (scaled int): divide by 100 -> ',
-                round(albion_na_conc_1985 / 100, 4), ' mg/L')
-
-        # Back-calculate expected conc from dep grid and precip
-        message('\nExpected Na conc from dep/precip: ~0.1 mg/L')
-        message('Ratio of grid value to expected: ',
-                round(albion_na_conc_1985 / 0.108, 1), 'x')
-
-        # Check if ratio is close to a known conversion factor
-        ratio <- albion_na_conc_1985 / 0.108
-        message('If ratio ~ 1000: grid is in ug/L')
-        message('If ratio ~ ', round(1000/22.99, 1), ': grid is in ueq/L')
-        message('Actual ratio: ', round(ratio, 1))
-    }
-
-    # Compare CRS of conc vs dep grids
-    message('\nNa conc grid CRS: ', crs(na_conc_r, proj = TRUE))
-    message('Na dep grid CRS:  ', crs(na_dep_r, proj = TRUE))
-
-    # Check if the raster might have a scale/offset factor
-    message('Raster scale factor: ', na_conc_r@ptr$source.get_scale())
-    message('Raster offset: ', na_conc_r@ptr$source.get_offset())
-}
-
-## verify fluxes match
-library(lubridate)
-
-clim <- ms_load_product(
-  macrosheds_root = '~/ssd2/macrosheds_stuff/ms_test/',
-  prodname = 'ws_attr_timeseries:climate',
-  site_codes = 'ALBION',
-  warn = FALSE
-)
-p <- ms_load_product(
-  macrosheds_root = '~/ssd2/macrosheds_stuff/ms_test/',
-  prodname = 'precipitation',
-  site_codes = 'ALBION',
-  warn = FALSE
-)
-albion_na_flux_1985 <- filter(clim, var == 'Na_flux_mean') %>% slice(1)
-# filter(clim, var == 'precip_median', year == 1985) %>% summarize(val = sum(val))
-albion_p_1985 <- filter(p, year(date) == 1985) %>% summarize(val = sum(val))
-# albion_ha <- ms_load_sites() %>% filter(site_code == 'ALBION') %>% pull(ws_area_ha)
-
-albion_na_conc_1985_claude = filter(nadp_summary, variable == 'Na', site_code == 'ALBION', year == 1985)
-
-# Convert flux (kg/ha) to concentration (mg/L) using precip depth (mm)
-# kg/ha -> mg/ha: multiply by 1e6
-# mm precip -> L/ha: multiply by 1e4 (1 mm on 1 ha = 10,000 L)
-# concentration = (flux_kg_ha * 1e6) / (precip_mm * 1e4)
-albion_na_conc_1985_macrosheds = (albion_na_flux_1985$val * 1e6) / (albion_p_1985$val * 1e4)
-
-# Also compute from the NADP dep grid directly
-albion_na_conc_1985_from_dep = (na_dep_val * 1e6) / (albion_p_1985$val * 1e4)
-
-message('Na conc from macrosheds flux / precip (mg/L): ', round(albion_na_conc_1985_macrosheds, 4))
-message('Na conc from NADP dep grid / precip (mg/L):   ', round(albion_na_conc_1985_from_dep, 4))
-message('Na conc from NADP conc grid directly (mg/L):  ', round(albion_na_conc_1985_claude$value, 4))
-message('Precip from macrosheds (mm):                   ', round(albion_p_1985$val, 1))
-
-# ---- independent precip check: PRISM annual total ----
-
-if(requireNamespace('prism', quietly = TRUE)) {
-    library(prism)
-
-    prism_dir <- file.path(nadp_dir, 'prism')
-    dir.create(prism_dir, showWarnings = FALSE)
-    prism_set_dl_dir(prism_dir)
-
-    # Download 1985 annual precip (4km resolution)
-    get_prism_annual(type = 'ppt', years = 1985, keepZip = FALSE)
-
-    # Find the downloaded raster
-    prism_files <- prism_archive_ls()
-    ppt_file <- prism_files[grep('ppt.*1985', prism_files)]
-    ppt_path <- pd_to_file(ppt_file)
-
-    ppt_r <- rast(ppt_path)
-    albion_ppt_reproj <- st_transform(albion_shed, crs(ppt_r))
-
-    if(use_exactextractr) {
-        prism_ppt_val <- exact_extract(ppt_r, albion_ppt_reproj, fun = 'mean')
-    } else {
-        prism_ppt_ex <- terra::extract(ppt_r, vect(albion_ppt_reproj), fun = mean,
-                                       na.rm = TRUE, weights = TRUE)
-        prism_ppt_val <- prism_ppt_ex[, 2]
-    }
-
-    message('Precip from PRISM annual grid (mm):            ', round(prism_ppt_val, 1))
-
-    # Recompute Na concentration using PRISM precip
-    albion_na_conc_1985_prism_ms <- (albion_na_flux_1985$val * 1e6) / (prism_ppt_val * 1e4)
-    albion_na_conc_1985_prism_dep <- (na_dep_val * 1e6) / (prism_ppt_val * 1e4)
-
-    message('Na conc from macrosheds flux / PRISM precip (mg/L): ', round(albion_na_conc_1985_prism_ms, 4))
-    message('Na conc from NADP dep grid / PRISM precip (mg/L):   ', round(albion_na_conc_1985_prism_dep, 4))
-
-} else {
-    message('Install the prism package for independent precip verification:')
-    message('  install.packages("prism")')
-}
-
-# library(terra)
-# r <- rast("/home/mike/git/macrosheds/data_acquisition/data/spatial/ndap/1985/dep_na_1985.tif")
-# dd = feather::read_feather('/home/mike/git/macrosheds/data_acquisition/data/lter/niwot/ws_traits/nadp/sum_ALBION.feather')
-# dd %>% filter(year == 1985, var == 'ch_annual_Na_flux_mean')

From e30aa1ad5466e8375d2d766ca87e83f4ccc959ec Mon Sep 17 00:00:00 2001
From: Mike Vlah <vlahm13@gmail.com>
Date: Sat, 28 Feb 2026 14:13:36 -0700
Subject: [PATCH 37/37] resolved nadp discrepancy. Na, K, Mg were in ug/L

---
 .gitignore                     |  2 ++
 summarize_nadp_for_watershed.R | 25 +++++++++++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/.gitignore b/.gitignore
index 8c81e17..92b9010 100755
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,5 @@ data/
 sites_with_comid.csv
 /obsolete
 .aider*
+nadp/
+*.csv
diff --git a/summarize_nadp_for_watershed.R b/summarize_nadp_for_watershed.R
index 846e381..c8c9b08 100644
--- a/summarize_nadp_for_watershed.R
+++ b/summarize_nadp_for_watershed.R
@@ -466,3 +466,28 @@ nadp_summary %>%
     ) %>%
     print(n = Inf)
 
+## verify fluxes match
+
+library(lubridate)
+
+clim <- ms_load_product(
+  macrosheds_root = '~/ssd2/macrosheds_stuff/ms_test/',
+  prodname = 'ws_attr_timeseries:climate',
+  site_codes = 'ALBION',
+  warn = FALSE
+)
+p <- ms_load_product(
+  macrosheds_root = '~/ssd2/macrosheds_stuff/ms_test/',
+  prodname = 'precipitation',
+  site_codes = 'ALBION',
+  warn = FALSE
+)
+albion_na_flux_1985 <- filter(clim, var == 'Na_flux_mean') %>% slice(1)
+albion_p_1985 <- filter(p, year(date) == 1985) %>% summarize(val = sum(val))
+
+albion_na_conc_1985_claude <- filter(nadp_summary, variable == 'Na', site_code == 'ALBION', year == 1985)$value
+#mg/L                             kg/ha                      mg/kg  mm/m     m^2/ha  mm             L/m^3
+albion_na_conc_1985_macrosheds <- (albion_na_flux_1985$val * 10e6 * 1000) / (10e4 * albion_p_1985 * 1000)
+
+#% difference (accounted for by precip source, raster processing, etc.
+(albion_na_conc_1985_macrosheds - albion_na_conc_1985_claude) / albion_na_conc_1985_macrosheds