From 43b69fa7ed0b90dda7ccb644f22c74d7e0cbbb3f Mon Sep 17 00:00:00 2001
From: afilazzola <alex.filazzola@apexrms.com>
Date: Mon, 15 Sep 2025 14:52:53 -0400
Subject: [PATCH 1/6] Revise how cell IDs are extracted to be memory optimized
 and reduce crashing

---
 src/4-background-data-generation.R | 298 ++++++++++++++++++-----------
 1 file changed, 189 insertions(+), 109 deletions(-)

diff --git a/src/4-background-data-generation.R b/src/4-background-data-generation.R
index 95ade79..c219b39 100644
--- a/src/4-background-data-generation.R
+++ b/src/4-background-data-generation.R
@@ -9,7 +9,7 @@
 
 # source dependencies ----------------------------------------------------------
 
-library(rsyncrosim) # install.packages("C:/GitHub/rsyncrosim", type="source", repos=NULL) 
+library(rsyncrosim) # install.packages("C:/GitHub/rsyncrosim", type="source", repos=NULL)
 library(terra)
 library(tidyr)
 library(dplyr)
@@ -27,14 +27,28 @@ myProject <- rsyncrosim::project()
 myScenario <- scenario()
 
 # temp directory
-ssimTempDir <- ssimEnvironment()$TransferDirectory 
+ssimTempDir <- ssimEnvironment()$TransferDirectory
 
 # Read in datasheets
 templateSheet <- datasheet(myScenario, "wisdm_TemplateRaster")
-covariateDataSheet <- datasheet(myScenario, "wisdm_CovariateData", optional = T, lookupsAsFactors = F)
+covariateDataSheet <- datasheet(
+  myScenario,
+  "wisdm_CovariateData",
+  optional = T,
+  lookupsAsFactors = F
+)
 fieldDataSheet <- datasheet(myScenario, "wisdm_FieldData", optional = T)
-backgroundDataOptionsSheet <- datasheet(myScenario, "wisdm_BackgroundDataOptions", optional = T)
-siteDataSheet <- datasheet(myScenario, "wisdm_SiteData", optional = T, lookupsAsFactors = F)
+backgroundDataOptionsSheet <- datasheet(
+  myScenario,
+  "wisdm_BackgroundDataOptions",
+  optional = T
+)
+siteDataSheet <- datasheet(
+  myScenario,
+  "wisdm_SiteData",
+  optional = T,
+  lookupsAsFactors = F
+)
 
 # Set progress bar -------------------------------------------------------------
 
@@ -43,180 +57,246 @@ progressBar(type = "begin", totalSteps = steps)
 
 # Prep inputs ------------------------------------------------------------------
 
-# drop no data (-9999) sites that resulted from spatial aggregation 
-fieldDataSheet <- fieldDataSheet[!fieldDataSheet$Response == -9999,] 
+# drop no data (-9999) sites that resulted from spatial aggregation
+fieldDataSheet <- fieldDataSheet[!fieldDataSheet$Response == -9999, ]
 
-#  Set defaults ----------------------------------------------------------------  
+#  Set defaults ----------------------------------------------------------------
 
 ## Background data options sheet
-if(nrow(backgroundDataOptionsSheet)<1){
-  backgroundDataOptionsSheet <- addRow(backgroundDataOptionsSheet, list(GenerateBackgroundSites = FALSE))
+if (nrow(backgroundDataOptionsSheet) < 1) {
+  backgroundDataOptionsSheet <- addRow(
+    backgroundDataOptionsSheet,
+    list(GenerateBackgroundSites = FALSE)
+  )
 }
-if(is.na(backgroundDataOptionsSheet$GenerateBackgroundSites)){ backgroundDataOptionsSheet$GenerateBackgroundSites <- FALSE }
-if(backgroundDataOptionsSheet$GenerateBackgroundSites){
-  if(is.na(backgroundDataOptionsSheet$BackgroundSiteCount)){backgroundDataOptionsSheet$BackgroundSiteCount <- sum(fieldDataSheet$Response) }  
-  if(is.na(backgroundDataOptionsSheet$BackgroundGenerationMethod)){backgroundDataOptionsSheet$BackgroundGenerationMethod <- "Kernel Density Estimate (KDE)"}
-  if(is.na(backgroundDataOptionsSheet$KDESurface)){
-    if(backgroundDataOptionsSheet$BackgroundGenerationMethod == "Kernel Density Estimate (KDE)"){
+if (is.na(backgroundDataOptionsSheet$GenerateBackgroundSites)) {
+  backgroundDataOptionsSheet$GenerateBackgroundSites <- FALSE
+}
+if (backgroundDataOptionsSheet$GenerateBackgroundSites) {
+  if (is.na(backgroundDataOptionsSheet$BackgroundSiteCount)) {
+    backgroundDataOptionsSheet$BackgroundSiteCount <- sum(
+      fieldDataSheet$Response
+    )
+  }
+  if (is.na(backgroundDataOptionsSheet$BackgroundGenerationMethod)) {
+    backgroundDataOptionsSheet$BackgroundGenerationMethod <- "Kernel Density Estimate (KDE)"
+  }
+  if (is.na(backgroundDataOptionsSheet$KDESurface)) {
+    if (
+      backgroundDataOptionsSheet$BackgroundGenerationMethod ==
+        "Kernel Density Estimate (KDE)"
+    ) {
       backgroundDataOptionsSheet$KDESurface <- "Continuous"
-    }}
-  if(is.na(backgroundDataOptionsSheet$Isopleth)){
-    if(backgroundDataOptionsSheet$KDESurface == "Binary" | backgroundDataOptionsSheet$BackgroundGenerationMethod == "Minimum Convex Polygon (MCP)"){
+    }
+  }
+  if (is.na(backgroundDataOptionsSheet$Isopleth)) {
+    if (
+      backgroundDataOptionsSheet$KDESurface == "Binary" |
+        backgroundDataOptionsSheet$BackgroundGenerationMethod ==
+          "Minimum Convex Polygon (MCP)"
+    ) {
       backgroundDataOptionsSheet$Isopleth <- 95
     }
   }
 }
 
-saveDatasheet(myScenario, backgroundDataOptionsSheet, "wisdm_BackgroundDataOptions")
+saveDatasheet(
+  myScenario,
+  backgroundDataOptionsSheet,
+  "wisdm_BackgroundDataOptions"
+)
 
 # Generate pseudo-absences (if applicable) -------------------------------------
 
-if(backgroundDataOptionsSheet$GenerateBackgroundSites){
-  
-  if(backgroundDataOptionsSheet$BackgroundGenerationMethod == "Kernel Density Estimate (KDE)"){ 
-    methodInputs <- list("method"="kde", 
-                   "surface"=backgroundDataOptionsSheet$KDESurface, 
-                   "isopleth"=backgroundDataOptionsSheet$Isopleth)
-    }
-  if(backgroundDataOptionsSheet$BackgroundGenerationMethod == "Minimum Convex Polygon (MCP)"){ 
-    methodInputs <- list("method"="mcp",
-                         "surface" = NA,
-                   "isopleth"=backgroundDataOptionsSheet$Isopleth)
-    }
+if (backgroundDataOptionsSheet$GenerateBackgroundSites) {
+  if (
+    backgroundDataOptionsSheet$BackgroundGenerationMethod ==
+      "Kernel Density Estimate (KDE)"
+  ) {
+    methodInputs <- list(
+      "method" = "kde",
+      "surface" = backgroundDataOptionsSheet$KDESurface,
+      "isopleth" = backgroundDataOptionsSheet$Isopleth
+    )
+  }
+  if (
+    backgroundDataOptionsSheet$BackgroundGenerationMethod ==
+      "Minimum Convex Polygon (MCP)"
+  ) {
+    methodInputs <- list(
+      "method" = "mcp",
+      "surface" = NA,
+      "isopleth" = backgroundDataOptionsSheet$Isopleth
+    )
+  }
 
   templateRaster <- rast(templateSheet$RasterFilePath)
-  
-  # create mask polygon from extent of valid data in template 
+
+  # create mask polygon from extent of valid data in template
   templateVector <- as.polygons(templateRaster)
-  
+
   # generate background surface
-  backgroundSurfaceGeneration(sp = "species",
-                              template = templateRaster,
-                              method = methodInputs, 
-                              mask = templateVector,
-                              outputDir = ssimTempDir,
-                              dat = fieldDataSheet)
-  
+  backgroundSurfaceGeneration(
+    sp = "species",
+    template = templateRaster,
+    method = methodInputs,
+    mask = templateVector,
+    outputDir = ssimTempDir,
+    dat = fieldDataSheet
+  )
+
   progressBar()
-  rm(templateVector); gc()
-  
+  rm(templateVector)
+  gc()
+
   # generate background (psuedo-absence) points
-  backgroundPointGeneration(sp = "species",
-                            outputDir = ssimTempDir,
-                            n = backgroundDataOptionsSheet$BackgroundSiteCount+100,
-                            method = methodInputs,
-                            # target_file = backgroundDataOptionsSheet, # this is an external input... 
-                            overwrite = T)
-  
+  backgroundPointGeneration(
+    sp = "species",
+    outputDir = ssimTempDir,
+    n = backgroundDataOptionsSheet$BackgroundSiteCount + 100,
+    method = methodInputs,
+    # target_file = backgroundDataOptionsSheet, # this is an external input...
+    overwrite = T
+  )
+
   progressBar()
 
   # add background point to field data
-  bgData <- read.csv(file.path(ssimTempDir, paste0("species_", methodInputs$method, "_bg_pts.csv")))
-  
-  startId <- max(fieldDataSheet$SiteID, siteDataSheet$SiteID)+1
-  bgData$SiteID <- startId:(startId+nrow(bgData)-1)
+  bgData <- read.csv(file.path(
+    ssimTempDir,
+    paste0("species_", methodInputs$method, "_bg_pts.csv")
+  ))
+
+  startId <- max(fieldDataSheet$SiteID, siteDataSheet$SiteID) + 1
+  bgData$SiteID <- startId:(startId + nrow(bgData) - 1)
   bgData$UseInModelEvaluation <- NA
   bgData$ModelSelectionSplit <- NA
   bgData$Weight <- NA
-  
+
   fieldData <- rbind(fieldDataSheet, bgData)
-  
+
   ## Remove background points that occur in pixels with presence points -----
-  
+
   # rasterize points data
-  r <- rast(ext(templateRaster), resolution = res(templateRaster), crs = crs(templateRaster))
+  r <- rast(
+    ext(templateRaster),
+    resolution = res(templateRaster),
+    crs = crs(templateRaster)
+  )
   pts <- vect(fieldData, geom = c("X", "Y"), crs = crs(templateRaster))
-  rm(templateRaster, fieldData); gc()
-  
-  rIDs <- rast(r, vals = 1:(dim(r)[1]*dim(r)[2]))
+  rIDs <- init(r, "cell")
+
+  # Extract cell IDs for each point
   cellPerPt <- terra::extract(rIDs, pts)
   cellPerPt$SiteID <- pts$SiteID
   names(cellPerPt)[2] <- "PixelID"
-  pts <- merge(pts, cellPerPt[,c("PixelID", "SiteID")])
-  rm(rIDs, cellPerPt); gc()
-  
+  pts <- merge(pts, cellPerPt[, c("PixelID", "SiteID")])
+  gc()
+
   # check for duplicate pixel ids
-  if(any(duplicated(pts$PixelID))){
-    dups <- pts[which(duplicated(pts$PixelID)),]
+  if (any(duplicated(pts$PixelID))) {
+    dups <- pts[which(duplicated(pts$PixelID)), ]
     dropSites <- dups$SiteID[which(dups$Response == -9998)]
-    pts <- pts[!pts$SiteID %in% dropSites,]
+    pts <- pts[!pts$SiteID %in% dropSites, ]
   }
-  
+
   bgPts <- pts[pts$Response == -9998]
   bgPts$PixelID <- NULL
-  rm(pts, dups, dropSites); gc()
-  
+  rm(pts, dups, dropSites)
+  gc()
+
   # remove extra bg sites
-  if(nrow(bgPts)>backgroundDataOptionsSheet$BackgroundSiteCount){
-    bgPts <- sample(x = bgPts, size = backgroundDataOptionsSheet$BackgroundSiteCount, replace = F)
+  if (nrow(bgPts) > backgroundDataOptionsSheet$BackgroundSiteCount) {
+    bgPts <- sample(
+      x = bgPts,
+      size = backgroundDataOptionsSheet$BackgroundSiteCount,
+      replace = F
+    )
   }
-  if(nrow(bgPts)<backgroundDataOptionsSheet$BackgroundSiteCount){
-    updateRunLog(paste0(nrow(bgPts), " psuedoabsence sites added to Field Data when ", backgroundDataOptionsSheet$BackgroundSiteCount, 
-                        " were requested."))
+  if (nrow(bgPts) < backgroundDataOptionsSheet$BackgroundSiteCount) {
+    updateRunLog(paste0(
+      nrow(bgPts),
+      " psuedoabsence sites added to Field Data when ",
+      backgroundDataOptionsSheet$BackgroundSiteCount,
+      " were requested."
+    ))
     # backgroundDataOptionsSheet$BackgroundSiteCount <- nrow(bgPts)
   }
-  
+
   progressBar()
 
   ## Extract covariate data for background sites  -----
-  
+
   # rasterize bg data
   rastPts <- rasterize(bgPts, r)
-  matPts <- as.matrix(rastPts, wide=T)
+  matPts <- as.matrix(rastPts, wide = T)
   keep <- which(!is.na(matPts))
-  rm(rastPts); gc()
-  
-  rIDs <- rast(r, vals = 1:(dim(r)[1]*dim(r)[2]))
+  rm(rastPts)
+  gc()
+
+  rIDs <- rast(r, vals = 1:(dim(r)[1] * dim(r)[2]))
   cellPerPt <- terra::extract(rIDs, bgPts)
   cellPerPt$SiteID <- bgPts$SiteID
   names(cellPerPt)[2] <- "PixelID"
-  bgPts <- merge(bgPts, cellPerPt[,c("PixelID", "SiteID")])
-  rm(rIDs); gc()
-  
+  bgPts <- merge(bgPts, cellPerPt[, c("PixelID", "SiteID")])
+  rm(rIDs)
+  gc()
+
   rPixels <- rasterize(bgPts, r, field = "PixelID")
-  matPixs <- as.matrix(rPixels, wide=T)
+  matPixs <- as.matrix(rPixels, wide = T)
   PixelIDs <- matPixs[keep]
   PixelData <- data.frame(PixelID = PixelIDs)
-  rm(rPixels, matPixs, PixelIDs); gc()
-  
-  for(i in 1:nrow(covariateDataSheet)){
+  rm(rPixels, matPixs, PixelIDs)
+  gc()
+
+  for (i in 1:nrow(covariateDataSheet)) {
     ri <- rast(covariateDataSheet$RasterFilePath[i])
-    mi <- as.matrix(ri, wide=TRUE)
-    
-    outMat <- mi*matPts
+    mi <- as.matrix(ri, wide = TRUE)
+
+    outMat <- mi * matPts
     vals <- outMat[keep]
-    rm(ri, mi, outMat); gc()
-    
+    rm(ri, mi, outMat)
+    gc()
+
     PixelData[covariateDataSheet$CovariatesID[i]] <- vals
     progressBar()
-    
   }
-  
+
   # convert background site data to long format and add to existing site datasheet
-  bgSiteData <- merge(cellPerPt[,c("PixelID", "SiteID")], PixelData)
+  bgSiteData <- merge(cellPerPt[, c("PixelID", "SiteID")], PixelData)
   bgSiteData$PixelID <- NULL
-  bgSiteData <- gather(data = bgSiteData, key = CovariatesID, value = Value, -SiteID)
-  
+  bgSiteData <- gather(
+    data = bgSiteData,
+    key = CovariatesID,
+    value = Value,
+    -SiteID
+  )
+
   siteDataSheet <- rbind(siteDataSheet, bgSiteData)
   siteDataSheet$SiteID <- format(siteDataSheet$SiteID, scientific = F)
-  siteDataSheet <- siteDataSheet %>% distinct(SiteID, CovariatesID, .keep_all = T)
-  rm(bgSiteData); gc()
-  
+  siteDataSheet <- siteDataSheet %>%
+    distinct(SiteID, CovariatesID, .keep_all = T)
+  rm(bgSiteData)
+  gc()
+
   # save site data to scenario
   saveDatasheet(myScenario, siteDataSheet, "wisdm_SiteData")
-  rm(siteDataSheet); gc()
-  
+  rm(siteDataSheet)
+  gc()
+
   # update field data
-  bgData <- bgData[which(bgData$SiteID %in% bgPts$SiteID),]
-  if(any(!is.na(fieldDataSheet$Weight))){ bgData$Weight <- 1 }
+  bgData <- bgData[which(bgData$SiteID %in% bgPts$SiteID), ]
+  if (any(!is.na(fieldDataSheet$Weight))) {
+    bgData$Weight <- 1
+  }
 
   fieldDataSheet <- rbind(fieldDataSheet, bgData)
   fieldDataSheet$SiteID <- format(fieldDataSheet$SiteID, scientific = F)
-  rm(bgData, bgPts); gc()
-  
+  rm(bgData, bgPts)
+  gc()
+
   # save updated field data to scenario
   saveDatasheet(myScenario, fieldDataSheet, "wisdm_FieldData", append = F)
 }
 
 progressBar(type = "end")
-

From 4d8e55fcf528d9431daa672037a76fe4c4116a56 Mon Sep 17 00:00:00 2001
From: afilazzola <alex.filazzola@apexrms.com>
Date: Mon, 15 Sep 2025 14:55:22 -0400
Subject: [PATCH 2/6] added same optimization to the background point
 extraction

---
 src/4-background-data-generation.R | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/4-background-data-generation.R b/src/4-background-data-generation.R
index c219b39..1d483b5 100644
--- a/src/4-background-data-generation.R
+++ b/src/4-background-data-generation.R
@@ -178,19 +178,22 @@ if (backgroundDataOptionsSheet$GenerateBackgroundSites) {
   ## Remove background points that occur in pixels with presence points -----
 
   # rasterize points data
-  r <- rast(
-    ext(templateRaster),
-    resolution = res(templateRaster),
-    crs = crs(templateRaster)
-  )
-  pts <- vect(fieldData, geom = c("X", "Y"), crs = crs(templateRaster))
-  rIDs <- init(r, "cell")
+  rastPts <- rasterize(bgPts, r)
+  # Replace matrix conversion with direct value extraction
+  vals <- values(rastPts)
+  keep <- which(!is.na(vals))
 
-  # Extract cell IDs for each point
-  cellPerPt <- terra::extract(rIDs, pts)
-  cellPerPt$SiteID <- pts$SiteID
+  rIDs <- init(r, "cell")
+  cellPerPt <- terra::extract(rIDs, bgPts)
+  cellPerPt$SiteID <- bgPts$SiteID
   names(cellPerPt)[2] <- "PixelID"
-  pts <- merge(pts, cellPerPt[, c("PixelID", "SiteID")])
+  bgPts <- merge(bgPts, cellPerPt[, c("PixelID", "SiteID")])
+
+  rPixels <- rasterize(bgPts, r, field = "PixelID")
+  pixelVals <- values(rPixels)
+  PixelIDs <- pixelVals[keep]
+
+  PixelData <- data.frame(PixelID = PixelIDs)
   gc()
 
   # check for duplicate pixel ids

From bbfd5e0afa5fa937bdabdf59456600698ef7e08b Mon Sep 17 00:00:00 2001
From: afilazzola <alex.filazzola@apexrms.com>
Date: Mon, 15 Sep 2025 15:03:03 -0400
Subject: [PATCH 3/6] Removed rasterization that was crashing with high res and
 high background occs. Now pulls cell ids directly

---
 src/4-background-data-generation.R | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/4-background-data-generation.R b/src/4-background-data-generation.R
index 1d483b5..5969eb3 100644
--- a/src/4-background-data-generation.R
+++ b/src/4-background-data-generation.R
@@ -231,10 +231,8 @@ if (backgroundDataOptionsSheet$GenerateBackgroundSites) {
   ## Extract covariate data for background sites  -----
 
   # rasterize bg data
-  rastPts <- rasterize(bgPts, r)
-  matPts <- as.matrix(rastPts, wide = T)
-  keep <- which(!is.na(matPts))
-  rm(rastPts)
+  cellIds <- cellFromXY(r, geom(bgPts)[, c("x", "y")])
+  keep <- unique(cellIds)
   gc()
 
   rIDs <- rast(r, vals = 1:(dim(r)[1] * dim(r)[2]))

From 8cea14e02d5f6e97062382de6c8d6d0fe251ad64 Mon Sep 17 00:00:00 2001
From: afilazzola <alex.filazzola@apexrms.com>
Date: Mon, 15 Sep 2025 15:08:25 -0400
Subject: [PATCH 4/6] removed all matrix and used only the raster Id indexing
 for highest performance

---
 src/4-background-data-generation.R | 37 +++++++++++++++---------------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/src/4-background-data-generation.R b/src/4-background-data-generation.R
index 5969eb3..4451e97 100644
--- a/src/4-background-data-generation.R
+++ b/src/4-background-data-generation.R
@@ -230,34 +230,33 @@ if (backgroundDataOptionsSheet$GenerateBackgroundSites) {
 
   ## Extract covariate data for background sites  -----
 
-  # rasterize bg data
+  # extract bg points data
   cellIds <- cellFromXY(r, geom(bgPts)[, c("x", "y")])
   keep <- unique(cellIds)
-  gc()
 
-  rIDs <- rast(r, vals = 1:(dim(r)[1] * dim(r)[2]))
-  cellPerPt <- terra::extract(rIDs, bgPts)
-  cellPerPt$SiteID <- bgPts$SiteID
-  names(cellPerPt)[2] <- "PixelID"
-  bgPts <- merge(bgPts, cellPerPt[, c("PixelID", "SiteID")])
-  rm(rIDs)
-  gc()
+  # Get cell IDs for each point (simpler approach)
+  cellPerPt <- data.frame(
+    ID = 1:length(bgPts), # temporary ID for merging
+    PixelID = cellIds,
+    SiteID = bgPts$SiteID
+  )
+
+  bgPts$ID <- 1:length(bgPts)
+  bgPts <- merge(bgPts, cellPerPt[, c("ID", "PixelID", "SiteID")], by = "ID")
+  bgPts$ID <- NULL # Remove temporary ID
+
+  PixelIDs <- keep
+
+  cellToPixel <- data.frame(cell = cellIds, PixelID = cellIds)
+  cellToPixel <- cellToPixel[!duplicated(cellToPixel$cell), ]
+  PixelIDs <- cellToPixel$PixelID[match(keep, cellToPixel$cell)]
 
-  rPixels <- rasterize(bgPts, r, field = "PixelID")
-  matPixs <- as.matrix(rPixels, wide = T)
-  PixelIDs <- matPixs[keep]
   PixelData <- data.frame(PixelID = PixelIDs)
-  rm(rPixels, matPixs, PixelIDs)
-  gc()
 
   for (i in 1:nrow(covariateDataSheet)) {
     ri <- rast(covariateDataSheet$RasterFilePath[i])
-    mi <- as.matrix(ri, wide = TRUE)
 
-    outMat <- mi * matPts
-    vals <- outMat[keep]
-    rm(ri, mi, outMat)
-    gc()
+    vals <- ri[keep]
 
     PixelData[covariateDataSheet$CovariatesID[i]] <- vals
     progressBar()

From ffe838af72edfeb11a8363310620f60b25300195 Mon Sep 17 00:00:00 2001
From: afilazzola <alex.filazzola@apexrms.com>
Date: Mon, 15 Sep 2025 15:28:41 -0400
Subject: [PATCH 5/6] Pretty sure the logic here should be fail
 any(fieldDataSheet$Response == -9998)

---
 src/8-fit-maxent.R | 430 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 308 insertions(+), 122 deletions(-)

diff --git a/src/8-fit-maxent.R b/src/8-fit-maxent.R
index 1d41ecd..b95b846 100644
--- a/src/8-fit-maxent.R
+++ b/src/8-fit-maxent.R
@@ -4,20 +4,20 @@
 ## --------------------
 
 # built under R version 4.1.3, SyncroSim 3.1.10 & rsyncrosim 2.1.3
-# script pulls in pre-processed field, site and covariate data; 
-# fits maxent model; builds model diagnostic and validation plots 
+# script pulls in pre-processed field, site and covariate data;
+# fits maxent model; builds model diagnostic and validation plots
 
 # source dependencies ----------------------------------------------------------
 
-library(rsyncrosim) 
+library(rsyncrosim)
 library(tidyverse)
-library(zip) 
+library(zip)
 
 packageDir <- Sys.getenv("ssim_package_directory")
 source(file.path(packageDir, "00-helper-functions.R"))
 source(file.path(packageDir, "08-fit-model-functions.R"))
 
-# disable scientific notation 
+# disable scientific notation
 options(scipen = 999)
 
 # Set progress bar -------------------------------------------------------------
@@ -39,100 +39,174 @@ covariatesSheet <- datasheet(myScenario, "wisdm_Covariates", optional = T)
 modelsSheet <- datasheet(myScenario, "wisdm_Models")
 fieldDataSheet <- datasheet(myScenario, "wisdm_FieldData", optional = T)
 validationDataSheet <- datasheet(myScenario, "wisdm_ValidationOptions")
-retainedCovariatesSheet <- datasheet(myScenario, "wisdm_RetainedCovariates", lookupsAsFactors = F)
+retainedCovariatesSheet <- datasheet(
+  myScenario,
+  "wisdm_RetainedCovariates",
+  lookupsAsFactors = F
+)
 siteDataSheet <- datasheet(myScenario, "wisdm_SiteData", lookupsAsFactors = F)
 maxentSheet <- datasheet(myScenario, "wisdm_Maxent")
 mulitprocessingSheet <- datasheet(myScenario, "core_Multiprocessing")
-modelOutputsSheet <- datasheet(myScenario, "wisdm_OutputModel", optional = T, returnInvisible = T, empty = T, lookupsAsFactors = F) %>% drop_na()
+modelOutputsSheet <- datasheet(
+  myScenario,
+  "wisdm_OutputModel",
+  optional = T,
+  returnInvisible = T,
+  empty = T,
+  lookupsAsFactors = F
+) %>%
+  drop_na()
 
 progressBar()
 
 # Error handling ---------------------------------------------------------------
 
 # check for for background data
-if(!any(fieldDataSheet$Response == -9998)){
-  if(any(siteDataSheet$Response == 0)){
-    stop("Maxent is a presence-background method; please select a method suitable for presence-absence data before continuing.")
+if (any(fieldDataSheet$Response == -9998)) {
+  if (any(siteDataSheet$Response == 0)) {
+    stop(
+      "Maxent is a presence-background method; please select a method suitable for presence-absence data before continuing."
+    )
   }
-  stop("Maxent is a presence-background method; please generate pseudo-absences before continuing. Note: Pseuso-absences can be generated in the '4 - Background Data Generation' Stage of the Pipeline. See 'Background Data Options' for more details.")
+  stop(
+    "Maxent is a presence-background method; please generate pseudo-absences before continuing. Note: Pseuso-absences can be generated in the '4 - Background Data Generation' Stage of the Pipeline. See 'Background Data Options' for more details."
+  )
 }
 
-#  Set defaults ----------------------------------------------------------------  
+#  Set defaults ----------------------------------------------------------------
 
 ## Validation Sheet
-if(nrow(validationDataSheet)<1){
-  validationDataSheet <- addRow(validationDataSheet, list(SplitData = FALSE,
-                                                          CrossValidate = FALSE))
+if (nrow(validationDataSheet) < 1) {
+  validationDataSheet <- addRow(
+    validationDataSheet,
+    list(SplitData = FALSE, CrossValidate = FALSE)
+  )
+}
+if (is.na(validationDataSheet$CrossValidate)) {
+  validationDataSheet$CrossValidate <- FALSE
+}
+if (is.na(validationDataSheet$SplitData)) {
+  validationDataSheet$SplitData <- FALSE
 }
-if(is.na(validationDataSheet$CrossValidate)){validationDataSheet$CrossValidate <- FALSE}
-if(is.na(validationDataSheet$SplitData)){validationDataSheet$SplitData <- FALSE}
 
-## Maxent Sheet 
-if(nrow(maxentSheet)<1 | all(is.na(maxentSheet))){
+## Maxent Sheet
+if (nrow(maxentSheet) < 1 | all(is.na(maxentSheet))) {
   maxentSheet <- maxentSheet %>% drop_na()
-  maxentSheet <- addRow(maxentSheet, list(MemoryLimit = 512,
-                                          VisibleInterface = FALSE,
-                                          MaximumBackgroundPoints = 10000,
-                                          SaveMaxentFiles = FALSE))
+  maxentSheet <- addRow(
+    maxentSheet,
+    list(
+      MemoryLimit = 512,
+      VisibleInterface = FALSE,
+      MaximumBackgroundPoints = 10000,
+      SaveMaxentFiles = FALSE
+    )
+  )
 }
 
-if(is.na(maxentSheet$MemoryLimit)){maxentSheet$MemoryLimit <- 512}
-if(is.na(maxentSheet$MaximumBackgroundPoints)){maxentSheet$MaximumBackgroundPoints <- 10000}
-if(is.na(maxentSheet$VisibleInterface)){maxentSheet$VisibleInterface <- FALSE}
-if(is.na(maxentSheet$SaveMaxentFiles)){maxentSheet$SaveMaxentFiles <- FALSE}
+if (is.na(maxentSheet$MemoryLimit)) {
+  maxentSheet$MemoryLimit <- 512
+}
+if (is.na(maxentSheet$MaximumBackgroundPoints)) {
+  maxentSheet$MaximumBackgroundPoints <- 10000
+}
+if (is.na(maxentSheet$VisibleInterface)) {
+  maxentSheet$VisibleInterface <- FALSE
+}
+if (is.na(maxentSheet$SaveMaxentFiles)) {
+  maxentSheet$SaveMaxentFiles <- FALSE
+}
 
-if(is.na(mulitprocessingSheet$EnableMultiprocessing) | mulitprocessingSheet$EnableMultiprocessing == FALSE){
+if (
+  is.na(mulitprocessingSheet$EnableMultiprocessing) |
+    mulitprocessingSheet$EnableMultiprocessing == FALSE
+) {
   maxentSheet$MultiprocessingThreads <- 1
-  } else {
-    if(is.na(maxentSheet$MultiprocessingThreads)){
-      maxentSheet$MultiprocessingThreads <- mulitprocessingSheet$MaximumJobs
-    }
+} else {
+  if (is.na(maxentSheet$MultiprocessingThreads)) {
+    maxentSheet$MultiprocessingThreads <- mulitprocessingSheet$MaximumJobs
   }
- 
-saveDatasheet(myScenario, maxentSheet, "wisdm_Maxent")   
+}
+
+saveDatasheet(myScenario, maxentSheet, "wisdm_Maxent")
 progressBar()
 
 # Prep data for model fitting --------------------------------------------------
 
 siteDataWide <- spread(siteDataSheet, key = CovariatesID, value = "Value")
-rm(siteDataSheet); gc()
+rm(siteDataSheet)
+gc()
 
 # remove variables dropped due to correlation
-siteDataWide <- siteDataWide[,c('SiteID', retainedCovariatesSheet$CovariatesID)]
+siteDataWide <- siteDataWide[, c(
+  'SiteID',
+  retainedCovariatesSheet$CovariatesID
+)]
 
 # merge field and site data
 siteDataWide <- merge(fieldDataSheet, siteDataWide, by = "SiteID")
-rm(fieldDataSheet); gc()
+rm(fieldDataSheet)
+gc()
 
-# remove sites with incomplete data 
+# remove sites with incomplete data
 allCases <- nrow(siteDataWide)
-siteDataWide <- siteDataWide[complete.cases(subset(siteDataWide, select = c(-UseInModelEvaluation, -ModelSelectionSplit, -Weight))),]
+siteDataWide <- siteDataWide[
+  complete.cases(subset(
+    siteDataWide,
+    select = c(-UseInModelEvaluation, -ModelSelectionSplit, -Weight)
+  )),
+]
 compCases <- nrow(siteDataWide)
-if(compCases/allCases < 0.9){updateRunLog(paste("\nWarning: ", round((1-compCases/allCases)*100,digits=2),"% of cases were removed because of missing values.\n",sep=""))}
+if (compCases / allCases < 0.9) {
+  updateRunLog(paste(
+    "\nWarning: ",
+    round((1 - compCases / allCases) * 100, digits = 2),
+    "% of cases were removed because of missing values.\n",
+    sep = ""
+  ))
+}
 
 # set site weights to default of 1 if not already supplied
-if(all(is.na(siteDataWide$Weight))){siteDataWide$Weight <- 1}
+if (all(is.na(siteDataWide$Weight))) {
+  siteDataWide$Weight <- 1
+}
 
-# set pseudo absences to zero 
-if(any(siteDataWide$Response == -9998)){pseudoAbs <- TRUE} else {pseudoAbs <- FALSE}
+# set pseudo absences to zero
+if (any(siteDataWide$Response == -9998)) {
+  pseudoAbs <- TRUE
+} else {
+  pseudoAbs <- FALSE
+}
 siteDataWide$Response[siteDataWide$Response == -9998] <- 0
 
 # set categorical variable to factor
-factorInputVars <- covariatesSheet$CovariateName[which(covariatesSheet$IsCategorical == T & covariatesSheet$CovariateName %in% names(siteDataWide))]
-if(length(factorInputVars)>0){ 
-  for (i in factorInputVars){
-    siteDataWide[,i] <- factor(siteDataWide[,i])
+factorInputVars <- covariatesSheet$CovariateName[which(
+  covariatesSheet$IsCategorical == T &
+    covariatesSheet$CovariateName %in% names(siteDataWide)
+)]
+if (length(factorInputVars) > 0) {
+  for (i in factorInputVars) {
+    siteDataWide[, i] <- factor(siteDataWide[, i])
   }
 }
 
-# identify training and testing sites 
-trainTestDatasets <- split(siteDataWide, f = siteDataWide[,"UseInModelEvaluation"], drop = T)
-rm(siteDataWide); gc()
+# identify training and testing sites
+trainTestDatasets <- split(
+  siteDataWide,
+  f = siteDataWide[, "UseInModelEvaluation"],
+  drop = T
+)
+rm(siteDataWide)
+gc()
 trainingData <- trainTestDatasets$`FALSE`
-if(!validationDataSheet$CrossValidate){trainingData$ModelSelectionSplit <- FALSE}
+if (!validationDataSheet$CrossValidate) {
+  trainingData$ModelSelectionSplit <- FALSE
+}
 testingData <- trainTestDatasets$`TRUE`
-rm(trainTestDatasets); gc()
-if(!is.null(testingData)){testingData$ModelSelectionSplit <- FALSE}
+rm(trainTestDatasets)
+gc()
+if (!is.null(testingData)) {
+  testingData$ModelSelectionSplit <- FALSE
+}
 progressBar()
 
 # Model definitions ------------------------------------------------------------
@@ -145,12 +219,12 @@ out$modType <- modType <- "maxent"
 
 ## Model options
 out$modOptions <- maxentSheet
-out$modOptions$thresholdOptimization <- "Sens=Spec"	# To Do: link to defined Threshold Optimization Method in UI - currently set to default: sensitivity=specificity 
+out$modOptions$thresholdOptimization <- "Sens=Spec" # To Do: link to defined Threshold Optimization Method in UI - currently set to default: sensitivity=specificity
 
-## Model family 
+## Model family
 out$modelFamily <- "binomial"
 
-## Candidate variables 
+## Candidate variables
 out$inputVars <- retainedCovariatesSheet$CovariatesID
 out$factorInputVars <- factorInputVars
 
@@ -158,9 +232,9 @@ out$factorInputVars <- factorInputVars
 out$pseudoAbs <- pseudoAbs
 
 ## Validation options
-out$validationOptions <- validationDataSheet 
+out$validationOptions <- validationDataSheet
 
-## path to temp ssim storage 
+## path to temp ssim storage
 out$tempDir <- ssimTempDir
 
 # Format and save data for use in Maxent -------------------------------------------
@@ -171,72 +245,118 @@ dir.create(file.path(ssimTempDir, "Outputs"))
 
 ## training data
 
-out$swdPath <- swdPath <- file.path(ssimTempDir, "Inputs", "training-swd.csv", fsep = "\\")
+out$swdPath <- swdPath <- file.path(
+  ssimTempDir,
+  "Inputs",
+  "training-swd.csv",
+  fsep = "\\"
+)
 
 trainingData %>%
   mutate(Species = case_when(Response == 1 ~ "species")) %>%
   drop_na(Species) %>%
-  select(-SiteID, -Response, -UseInModelEvaluation, -ModelSelectionSplit, -Weight) %>%
+  select(
+    -SiteID,
+    -Response,
+    -UseInModelEvaluation,
+    -ModelSelectionSplit,
+    -Weight
+  ) %>%
   relocate(Species, .before = X) %>%
   write.csv(swdPath, row.names = F)
 
 out$data$train <- trainingData
 
-if(pseudoAbs){
-  
-  out$backgroundPath <- backgroundPath <- file.path(ssimTempDir, "Inputs", "background-swd.csv", fsep = "\\")
-  
+if (pseudoAbs) {
+  out$backgroundPath <- backgroundPath <- file.path(
+    ssimTempDir,
+    "Inputs",
+    "background-swd.csv",
+    fsep = "\\"
+  )
+
   trainingData %>%
     mutate(Species = case_when(Response != 1 ~ "background")) %>%
     drop_na(Species) %>%
-    select(-SiteID, -Response, -UseInModelEvaluation, -ModelSelectionSplit, -Weight) %>%
+    select(
+      -SiteID,
+      -Response,
+      -UseInModelEvaluation,
+      -ModelSelectionSplit,
+      -Weight
+    ) %>%
     relocate(Species, .before = X) %>%
     write.csv(backgroundPath, row.names = F)
-  }
+}
 
 ## testing data
 
-if(!is.null(testingData)){
-  out$testDataPath <- testDataPath <- file.path(ssimTempDir, "Inputs", "testing-swd.csv", fsep = "\\")
+if (!is.null(testingData)) {
+  out$testDataPath <- testDataPath <- file.path(
+    ssimTempDir,
+    "Inputs",
+    "testing-swd.csv",
+    fsep = "\\"
+  )
   testingData %>%
     mutate(Species = case_when(Response == 1 ~ "species")) %>%
     drop_na(Species) %>%
-    select(-SiteID, -Response, -UseInModelEvaluation, -ModelSelectionSplit, -Weight) %>%
+    select(
+      -SiteID,
+      -Response,
+      -UseInModelEvaluation,
+      -ModelSelectionSplit,
+      -Weight
+    ) %>%
     relocate(Species, .before = X) %>%
     write.csv(testDataPath, row.names = F)
-  
+
   out$data$test <- testingData
-  rm(testingData); gc()
-  }
+  rm(testingData)
+  gc()
+}
 
 out$batchPath <- file.path(ssimTempDir, "Inputs", "runMaxent.bat", fsep = "\\")
 # out$maxJobs <- mulitprocessingSheet$MaximumJobs
 
 # Create output text file ------------------------------------------------------
 
-capture.output(cat("Maxent Results"), file = file.path(ssimTempDir, paste0(modType, "_output.txt"))) 
-on.exit(capture.output(cat("Model Failed\n\n"),file = file.path(ssimTempDir, paste0(modType, "_output.txt")),append=TRUE))  
+capture.output(
+  cat("Maxent Results"),
+  file = file.path(ssimTempDir, paste0(modType, "_output.txt"))
+)
+on.exit(capture.output(
+  cat("Model Failed\n\n"),
+  file = file.path(ssimTempDir, paste0(modType, "_output.txt")),
+  append = TRUE
+))
 
 
 # Review model data ------------------------------------------------------------
 
-if(nrow(out$data$train)/(length(out$inputVars)-1)<10){
-  updateRunLog(paste("\nYou have approximately ", round(nrow(out$data$train)/(ncol(out$data$train)-1),digits=1),
-                     " observations for every predictor\n consider reducing the number of predictors before continuing\n",sep=""))
+if (nrow(out$data$train) / (length(out$inputVars) - 1) < 10) {
+  updateRunLog(paste(
+    "\nYou have approximately ",
+    round(nrow(out$data$train) / (ncol(out$data$train) - 1), digits = 1),
+    " observations for every predictor\n consider reducing the number of predictors before continuing\n",
+    sep = ""
+  ))
 }
 progressBar()
 
 # Fit model --------------------------------------------------------------------
 
-finalMod <- fitModel(dat = NULL, # maxent code pulls in data from csv files built/saved above  
-                     out = out)
+finalMod <- fitModel(
+  dat = NULL, # maxent code pulls in data from csv files built/saved above
+  out = out
+)
 
 # finalMod$trainingData <- trainingData
 # save model to temp storage
 # saveRDS(finalMod, file = paste0(ssimTempDir,"\\Data\\", modType, "_model.rds"))
 # finalMod$trainingData <- NULL
 
-# add relevant model details to out 
+# add relevant model details to out
 out$finalMod <- finalMod
 out$finalVars <- out$inputVars # maxent doesn't drop variables
 out$nVarsFinal <- length(out$finalVars)
@@ -244,52 +364,84 @@ out$nVarsFinal <- length(out$finalVars)
 # load maxent run results
 runReults <- read.csv(file.path(ssimTempDir, "Outputs", "maxentResults.csv"))
 
-modSummary <- data.frame("Variabels" = gsub(".contribution", "", names(runReults)[grep("contribution",names(runReults))]))
-modSummary$Contribution <- t(runReults[grep("contribution",names(runReults))])
+modSummary <- data.frame(
+  "Variabels" = gsub(
+    ".contribution",
+    "",
+    names(runReults)[grep("contribution", names(runReults))]
+  )
+)
+modSummary$Contribution <- t(runReults[grep("contribution", names(runReults))])
 
 updateRunLog("\nSummary of Model:\n")
 coeftbl <- modSummary
 rownames(coeftbl) <- NULL
-updateRunLog(pander::pandoc.table.return(coeftbl, style = "simple", split.tables = 100))
-capture.output(cat("\n\n"), modSummary, file = file.path(ssimTempDir, paste0(modType, "_output.txt")), append=TRUE)
+updateRunLog(pander::pandoc.table.return(
+  coeftbl,
+  style = "simple",
+  split.tables = 100
+))
+capture.output(
+  cat("\n\n"),
+  modSummary,
+  file = file.path(ssimTempDir, paste0(modType, "_output.txt")),
+  append = TRUE
+)
 progressBar()
 
 # Test model predictions -------------------------------------------------------
 
-out$data$train$predicted <- pred.fct(x=out$data$train, mod=finalMod, modType=modType) # alternatively should these be pulled from the samplePredictions.csv output by maxent??
-
-if(validationDataSheet$SplitData){
-  out$data$test$predicted <- pred.fct(x=out$data$test, mod=finalMod, modType=modType)
+out$data$train$predicted <- pred.fct(
+  x = out$data$train,
+  mod = finalMod,
+  modType = modType
+) # alternatively should these be pulled from the samplePredictions.csv output by maxent??
+
+if (validationDataSheet$SplitData) {
+  out$data$test$predicted <- pred.fct(
+    x = out$data$test,
+    mod = finalMod,
+    modType = modType
+  )
 }
 progressBar()
 
 # Evaluate thresholds (for use with binary output) ----------------------------
 
-predOcc <- out$data$train[out$data$train$Response >= 1 , "predicted"]
-predAbs <- out$data$train[out$data$train$Response == 0 , "predicted"]
+predOcc <- out$data$train[out$data$train$Response >= 1, "predicted"]
+predAbs <- out$data$train[out$data$train$Response == 0, "predicted"]
 
 evalOut <- modelEvaluation(predOcc = predOcc, predAbs = predAbs)
 
 finalMod$binThresholds <- thresholds <- evalOut@thresholds
-rm(predOcc, predAbs, evalOut); gc()
+rm(predOcc, predAbs, evalOut)
+gc()
 
-names(thresholds) <- c("Max kappa", "Max sensitivity and specificity", "No omission", 
-                       "Prevalence", "Sensitivity equals specificity")
+names(thresholds) <- c(
+  "Max kappa",
+  "Max sensitivity and specificity",
+  "No omission",
+  "Prevalence",
+  "Sensitivity equals specificity"
+)
 
 updateRunLog("\nThresholds:\n")
-tbl <- round(thresholds, 6) 
-updateRunLog(pander::pandoc.table.return(tbl, style = "simple", split.tables = 100))
+tbl <- round(thresholds, 6)
+updateRunLog(pander::pandoc.table.return(
+  tbl,
+  style = "simple",
+  split.tables = 100
+))
 
 # save model info to temp storage
 finalMod$trainingData <- trainingData
 saveRDS(finalMod, file = file.path(ssimTempDir, paste0(modType, "_model.rds")))
-rm(finalMod); gc()
+rm(finalMod)
+gc()
 
 # Run Cross Validation (if specified) ------------------------------------------
-if(validationDataSheet$CrossValidate){
-  
-  out <- cv.fct(out = out,
-                nfolds = validationDataSheet$NumberOfFolds)
+if (validationDataSheet$CrossValidate) {
+  out <- cv.fct(out = out, nfolds = validationDataSheet$NumberOfFolds)
 }
 progressBar()
 
@@ -297,7 +449,7 @@ progressBar()
 
 ## AUC/ROC - Residual Plots - Variable Importance -  Calibration - Confusion Matrix ##
 
-out <- suppressWarnings(makeModelEvalPlots(out=out))
+out <- suppressWarnings(makeModelEvalPlots(out = out))
 progressBar()
 
 ## Response Curves ##
@@ -310,31 +462,66 @@ progressBar()
 tempFiles <- list.files(ssimTempDir)
 
 # add model Outputs to datasheet
-modelOutputsSheet <- addRow(modelOutputsSheet, 
-                            list(ModelsID = modelsSheet$ModelName[modelsSheet$ModelType == modType],
-                                 ModelRDS = file.path(ssimTempDir, paste0(modType, "_model.rds")),
-                                 ResponseCurves = file.path(ssimTempDir, paste0(modType, "_ResponseCurves.png")),
-                                 TextOutput = file.path(ssimTempDir, paste0(modType, "_output.txt")),
-                                 ResidualSmoothPlot = file.path(ssimTempDir, paste0(modType, "_ResidualSmoothPlot.png")),
-                                 ResidualSmoothRDS = file.path(ssimTempDir, paste0(modType, "_ResidualSmoothFunction.rds")),
-                                 ConfusionMatrix = file.path(ssimTempDir, paste0(modType, "_ConfusionMatrix.png")),
-                                 VariableImportancePlot = file.path(ssimTempDir, paste0(modType, "_VariableImportance.png")),
-                                 VariableImportanceData = file.path(ssimTempDir, paste0(modType, "_VariableImportance.csv")),
-                                 ROCAUCPlot = file.path(ssimTempDir, paste0(modType, "_ROCAUCPlot.png")),
-                                 CalibrationPlot = file.path(ssimTempDir, paste0(modType, "_CalibrationPlot.png"))))
-
-
-if("maxent_StandardResidualPlots.png" %in% tempFiles){ modelOutputsSheet$ResidualsPlot <- file.path(ssimTempDir, paste0(modType, "_StandardResidualPlots.png")) }
-if("maxent_AUCPRPlot.png" %in% tempFiles){ modelOutputsSheet$AUCPRPlot <- file.path(ssimTempDir, paste0(modType, "_AUCPRPlot.png")) } 
+modelOutputsSheet <- addRow(
+  modelOutputsSheet,
+  list(
+    ModelsID = modelsSheet$ModelName[modelsSheet$ModelType == modType],
+    ModelRDS = file.path(ssimTempDir, paste0(modType, "_model.rds")),
+    ResponseCurves = file.path(
+      ssimTempDir,
+      paste0(modType, "_ResponseCurves.png")
+    ),
+    TextOutput = file.path(ssimTempDir, paste0(modType, "_output.txt")),
+    ResidualSmoothPlot = file.path(
+      ssimTempDir,
+      paste0(modType, "_ResidualSmoothPlot.png")
+    ),
+    ResidualSmoothRDS = file.path(
+      ssimTempDir,
+      paste0(modType, "_ResidualSmoothFunction.rds")
+    ),
+    ConfusionMatrix = file.path(
+      ssimTempDir,
+      paste0(modType, "_ConfusionMatrix.png")
+    ),
+    VariableImportancePlot = file.path(
+      ssimTempDir,
+      paste0(modType, "_VariableImportance.png")
+    ),
+    VariableImportanceData = file.path(
+      ssimTempDir,
+      paste0(modType, "_VariableImportance.csv")
+    ),
+    ROCAUCPlot = file.path(ssimTempDir, paste0(modType, "_ROCAUCPlot.png")),
+    CalibrationPlot = file.path(
+      ssimTempDir,
+      paste0(modType, "_CalibrationPlot.png")
+    )
+  )
+)
+
+
+if ("maxent_StandardResidualPlots.png" %in% tempFiles) {
+  modelOutputsSheet$ResidualsPlot <- file.path(
+    ssimTempDir,
+    paste0(modType, "_StandardResidualPlots.png")
+  )
+}
+if ("maxent_AUCPRPlot.png" %in% tempFiles) {
+  modelOutputsSheet$AUCPRPlot <- file.path(
+    ssimTempDir,
+    paste0(modType, "_AUCPRPlot.png")
+  )
+}
 
 # save maxent files to compressed zip
-if(maxentSheet$SaveMaxentFiles){
+if (maxentSheet$SaveMaxentFiles) {
   setwd(out$tempDir)
-  
+
   # create zip file of all maxent specific inputs/outputs
-  zipName <-  paste0("Maxent-",ssimEnvironment()$ScenarioId, ".zip")
+  zipName <- paste0("Maxent-", ssimEnvironment()$ScenarioId, ".zip")
   zip(zipfile = zipName, files = c("Inputs", "Outputs"))
-  
+
   # save zip to model outputs
   modelOutputsSheet$MaxentFiles <- file.path(ssimTempDir, zipName)
 }
@@ -342,4 +529,3 @@ if(maxentSheet$SaveMaxentFiles){
 # save outputs datasheet
 saveDatasheet(myScenario, modelOutputsSheet, "wisdm_OutputModel", append = T)
 progressBar(type = "end")
-

From 319c167053623e49316b6f1c04c9f5ab56cdbb70 Mon Sep 17 00:00:00 2001
From: afilazzola <alex.filazzola@apexrms.com>
Date: Mon, 15 Sep 2025 15:33:38 -0400
Subject: [PATCH 6/6] Fixed typos in variable reduction run log prints

---
 src/6-variable-reduction.R | 333 ++++++++++++++++++++++++-------------
 1 file changed, 218 insertions(+), 115 deletions(-)

diff --git a/src/6-variable-reduction.R b/src/6-variable-reduction.R
index 87c67e4..9e0d662 100644
--- a/src/6-variable-reduction.R
+++ b/src/6-variable-reduction.R
@@ -10,7 +10,7 @@
 
 # source dependencies ----------------------------------------------------------
 
-library(rsyncrosim) # install.packages("C:/GitHub/rsyncrosim", type="source", repos=NULL) 
+library(rsyncrosim) # install.packages("C:/GitHub/rsyncrosim", type="source", repos=NULL)
 library(tidyr)
 library(dplyr)
 library(shiny)
@@ -31,30 +31,49 @@ myProject <- rsyncrosim::project()
 myScenario <- scenario()
 
 # path to ssim directories
-ssimTempDir <- ssimEnvironment()$TransferDirectory 
+ssimTempDir <- ssimEnvironment()$TransferDirectory
 
 # Read in datasheets
-covariatesSheet <- datasheet(myProject, "wisdm_Covariates", optional = T, includeKey = T)
+covariatesSheet <- datasheet(
+  myProject,
+  "wisdm_Covariates",
+  optional = T,
+  includeKey = T
+)
 fieldDataSheet <- datasheet(myScenario, "wisdm_FieldData", optional = T)
-siteDataSheet <- datasheet(myScenario, "wisdm_SiteData", lookupsAsFactors = F) 
-covariateSelectionSheet <- datasheet(myScenario, "wisdm_CovariateSelectionOptions", optional = T)
+siteDataSheet <- datasheet(myScenario, "wisdm_SiteData", lookupsAsFactors = F)
+covariateSelectionSheet <- datasheet(
+  myScenario,
+  "wisdm_CovariateSelectionOptions",
+  optional = T
+)
 retainedCovariatesSheet <- datasheet(myScenario, "wisdm_RetainedCovariates")
-covariateCorrelationSheet <- datasheet(myScenario, "wisdm_OutputCovariateCorrelationMatrix", optional = T) %>% drop_na()
+covariateCorrelationSheet <- datasheet(
+  myScenario,
+  "wisdm_OutputCovariateCorrelationMatrix",
+  optional = T
+) %>%
+  drop_na()
 
 progressBar()
 
 # Set defaults -----------------------------------------------------------------
 
-## Covariate selection 
-if(nrow(covariateSelectionSheet)<1){
-  covariateSelectionSheet <- addRow(covariateSelectionSheet, list(SelectionMethod = "Interactive (Correlation Viewer)"))
+## Covariate selection
+if (nrow(covariateSelectionSheet) < 1) {
+  covariateSelectionSheet <- addRow(
+    covariateSelectionSheet,
+    list(SelectionMethod = "Interactive (Correlation Viewer)")
+  )
 }
 
-if(is.na(covariateSelectionSheet$SelectionMethod)){covariateSelectionSheet$SelectionMethod <- "Interactive (Correlation Viewer)"}
+if (is.na(covariateSelectionSheet$SelectionMethod)) {
+  covariateSelectionSheet$SelectionMethod <- "Interactive (Correlation Viewer)"
+}
 
 ## Retained covariates
 
-if(any(is.na(retainedCovariatesSheet$CovariatesID))){
+if (any(is.na(retainedCovariatesSheet$CovariatesID))) {
   retainedCovariatesSheet <- na.omit(retainedCovariatesSheet)
 }
 
@@ -63,37 +82,52 @@ if(any(is.na(retainedCovariatesSheet$CovariatesID))){
 # merge field and site data
 siteDataWide <- spread(siteDataSheet, key = CovariatesID, value = "Value")
 siteDataWide <- merge(fieldDataSheet, siteDataWide, by = "SiteID")
-siteData <- select(siteDataWide, -c(SiteID, X, Y, UseInModelEvaluation, ModelSelectionSplit, Weight)) # 
-rm(siteDataSheet, fieldDataSheet, siteDataWide); gc()
+siteData <- select(
+  siteDataWide,
+  -c(SiteID, X, Y, UseInModelEvaluation, ModelSelectionSplit, Weight)
+) #
+rm(siteDataSheet, fieldDataSheet, siteDataWide)
+gc()
 
 # identify categorical covariates and drop any with a single level
-if(sum(covariatesSheet$IsCategorical, na.rm = T)>0){
-  factorVars <- covariatesSheet$CovariateName[which(covariatesSheet$IsCategorical == T & covariatesSheet$CovariateName %in% names(siteData))]
-  if(length(factorVars)>0){
+if (sum(covariatesSheet$IsCategorical, na.rm = T) > 0) {
+  factorVars <- covariatesSheet$CovariateName[which(
+    covariatesSheet$IsCategorical == T &
+      covariatesSheet$CovariateName %in% names(siteData)
+  )]
+  if (length(factorVars) > 0) {
     badFactors <- NULL
-    for (i in 1:length(factorVars)){
-      factor.table <- table(siteData[,factorVars[i]])
-      if(length(factor.table)<2){ badFactors <- c(badFactors, factorVars[i]) }
+    for (i in 1:length(factorVars)) {
+      factor.table <- table(siteData[, factorVars[i]])
+      if (length(factor.table) < 2) {
+        badFactors <- c(badFactors, factorVars[i])
+      }
     }
-    if(length(badFactors) > 0){
+    if (length(badFactors) > 0) {
       factorVars <- factorVars[-which(factorVars %in% badFactors)]
-      if(length(factorVars) == 0){ factorVars <- NULL }
-      updateRunLog(paste0("\nThe following categorical response variables were removed from consideration\n",
-                            "because they had only one level: ",paste(badFactors, collapse=","),"\n"))
+      if (length(factorVars) == 0) {
+        factorVars <- NULL
+      }
+      updateRunLog(paste0(
+        "\nThe following categorical response variables were removed from consideration\n",
+        "because they had only one level: ",
+        paste(badFactors, collapse = ","),
+        "\n"
+      ))
     }
-  } else { 
+  } else {
     badFactors <- NULL
-    }
-} else { 
+  }
+} else {
   factorVars <- NULL
   badFactors <- NULL
-  }
+}
 
-# model family 
+# model family
 modelFamily <- "binomial"
 
 # Ignore background data if present
-siteData <- siteData[!siteData$Response == -9999,]
+siteData <- siteData[!siteData$Response == -9999, ]
 
 # update response for pseudo-absence sites
 siteData$Response[siteData$Response == -9998] <- 0
@@ -105,137 +139,206 @@ progressBar()
 
 # Automatic selection (using VIF) ----------------------------------------------
 
-if(covariateSelectionSheet$SelectionMethod == "Automatic (Variance Inflation Factor)"){
-  
-  ## Covariate selection 
-  if(is.na(covariateSelectionSheet$CorrelationThreshold)){covariateSelectionSheet$CorrelationThreshold <- 0.7}
-  if(is.na(covariateSelectionSheet$VIFThreshold)){covariateSelectionSheet$VIFThreshold <- 10}
-  
-  saveDatasheet(myScenario, covariateSelectionSheet, "wisdm_CovariateSelectionOptions")
-  
+if (
+  covariateSelectionSheet$SelectionMethod ==
+    "Automatic (Variance Inflation Factor)"
+) {
+  ## Covariate selection
+  if (is.na(covariateSelectionSheet$CorrelationThreshold)) {
+    covariateSelectionSheet$CorrelationThreshold <- 0.7
+  }
+  if (is.na(covariateSelectionSheet$VIFThreshold)) {
+    covariateSelectionSheet$VIFThreshold <- 10
+  }
+
+  saveDatasheet(
+    myScenario,
+    covariateSelectionSheet,
+    "wisdm_CovariateSelectionOptions"
+  )
+
   source(file.path(packageDir, "usdm-vif.R"))
-  
-  if(length(retainedCovariatesSheet$CovariatesID)>0){
-    colin <- vifstep(x = covData, th = covariateSelectionSheet$VIFThreshold, keep = as.character(retainedCovariatesSheet$CovariatesID))
-  } else{
+
+  if (length(retainedCovariatesSheet$CovariatesID) > 0) {
+    colin <- vifstep(
+      x = covData,
+      th = covariateSelectionSheet$VIFThreshold,
+      keep = as.character(retainedCovariatesSheet$CovariatesID)
+    )
+  } else {
     colin <- vifstep(x = covData, th = covariateSelectionSheet$VIFThreshold)
   }
-  
-  selectedCovariates <-  colin@results$Variables
-  
+
+  selectedCovariates <- colin@results$Variables
+
   dropCovs <- colin@excluded
   allCovs <- colin@variables
-  
-  updateRunLog("\n",length(dropCovs), " of the ", length(allCovs), " input variables were removed from the covartite list due to collinarity:\n\n", paste(dropCovs, collapse = "\n"))
-  updateRunLog("VIFs of remaining varaibles")
-  updateRunLog(pander::pandoc.table.return(colin@results, style = "simple", split.tables = 100))
-  
+
+  updateRunLog(
+    "\n",
+    length(dropCovs),
+    " of the ",
+    length(allCovs),
+    " input variables were removed from the covariate list due to collinearity:\n\n",
+    paste(dropCovs, collapse = "\n")
+  )
+  updateRunLog("VIFs of remaining variables")
+  updateRunLog(pander::pandoc.table.return(
+    colin@results,
+    style = "simple",
+    split.tables = 100
+  ))
+
   corMat <- colin@corMatrix
   corMat[lower.tri(corMat, diag = TRUE)] <- NA
-  corVals <- unique(corMat[corMat > covariateSelectionSheet$CorrelationThreshold])
+  corVals <- unique(corMat[
+    corMat > covariateSelectionSheet$CorrelationThreshold
+  ])
   corVals <- corVals[!is.na(corVals)]
-  
-  if(length(corVals)>0){
-    
+
+  if (length(corVals) > 0) {
     updateRunLog("\nThe following correlated covariates were retained:\n")
-    
-    for(i in corVals){
-      row <- row.names(corMat)[as.vector(which(corMat==i,arr.ind=TRUE))[1]]
-      col <- colnames(corMat)[as.vector(which(corMat==i,arr.ind=TRUE))[2]]
-      updateRunLog(paste0(row, " ~ ", col, ": ", round(i,4)))
+
+    for (i in corVals) {
+      row <- row.names(corMat)[as.vector(which(corMat == i, arr.ind = TRUE))[1]]
+      col <- colnames(corMat)[as.vector(which(corMat == i, arr.ind = TRUE))[2]]
+      updateRunLog(paste0(row, " ~ ", col, ": ", round(i, 4)))
     }
   }
-progressBar()
+  progressBar()
 } # end if "Automatic"
 
 # Interactive Selection (using viewer) ------------------------------------------
 
-if(covariateSelectionSheet$SelectionMethod == "Interactive (Correlation Viewer)"){
-  
-  ## Covariate selection 
-  if(is.na(covariateSelectionSheet$DisplayHighestCorrelations)){covariateSelectionSheet$DisplayHighestCorrelations <- TRUE}
-  if(is.na(covariateSelectionSheet$CorrelationThreshold)){covariateSelectionSheet$CorrelationThreshold <- 0.7}
-  if(is.na(covariateSelectionSheet$NumberOfPlots)){covariateSelectionSheet$NumberOfPlots <- 5}
-  
-  saveDatasheet(myScenario, covariateSelectionSheet, "wisdm_CovariateSelectionOptions")
-  
+if (
+  covariateSelectionSheet$SelectionMethod == "Interactive (Correlation Viewer)"
+) {
+  ## Covariate selection
+  if (is.na(covariateSelectionSheet$DisplayHighestCorrelations)) {
+    covariateSelectionSheet$DisplayHighestCorrelations <- TRUE
+  }
+  if (is.na(covariateSelectionSheet$CorrelationThreshold)) {
+    covariateSelectionSheet$CorrelationThreshold <- 0.7
+  }
+  if (is.na(covariateSelectionSheet$NumberOfPlots)) {
+    covariateSelectionSheet$NumberOfPlots <- 5
+  }
+
+  saveDatasheet(
+    myScenario,
+    covariateSelectionSheet,
+    "wisdm_CovariateSelectionOptions"
+  )
+
   # prep deviance explained data
   devExp <- vector()
-  for(i in (1:ncol(covData))){
-    devExp[i] <- try(my.panel.smooth(x = covData[,i], 
-                                     y = siteData$Response,
-                                     plot.it=FALSE,
-                                     family=modelFamily),silent=TRUE)
+  for (i in (1:ncol(covData))) {
+    devExp[i] <- try(
+      my.panel.smooth(
+        x = covData[, i],
+        y = siteData$Response,
+        plot.it = FALSE,
+        family = modelFamily
+      ),
+      silent = TRUE
+    )
   }
-  devExp <- round(devExp,2)
+  devExp <- round(devExp, 2)
   devInfo <- as.data.frame(devExp)
   devInfo$covs <- names(covData)
   devInfo$covDE <- paste0(devInfo$covs, " (", devInfo$devExp, ")")
   covsDE <- devInfo$covs
   names(covsDE) <- devInfo$covDE
-  
+
   # run pairs explore with all variables -----------------------------------------
-  
+
   options <- covariateSelectionSheet
-  options$NumberOfPlots <- ncol(select(siteData, -Response, -all_of(badFactors)))
-  
-  pairsExplore(inputData = siteData,
-               options = options,
-               selectedCovs = names(select(siteData, -Response, -all_of(badFactors))),
-               factorVars = factorVars,
-               family = modelFamily,
-               outputFile = file.path(ssimTempDir, "InitialCovariateCorrelationMatrix.png"))
-  
+  options$NumberOfPlots <- ncol(select(
+    siteData,
+    -Response,
+    -all_of(badFactors)
+  ))
+
+  pairsExplore(
+    inputData = siteData,
+    options = options,
+    selectedCovs = names(select(siteData, -Response, -all_of(badFactors))),
+    factorVars = factorVars,
+    family = modelFamily,
+    outputFile = file.path(ssimTempDir, "InitialCovariateCorrelationMatrix.png")
+  )
+
   # run shiny app ----------------------------------------------------------------
-  
+
   # inputs
   covData <- select(siteData, -Response, -all_of(badFactors))
   selectedCovariates <- names(covData)
   # covsDE
   options <- covariateSelectionSheet
-  
-  # TO DO: find better way to access default web app 
+
+  # TO DO: find better way to access default web app
   browser.path <- NULL
-  if(file.exists("C:/Program Files/Google/Chrome/Application/chrome.exe")){
-   browser.path <- "C:/Program Files/Google/Chrome/Application/chrome.exe"
-  } else if(file.exists("C:/Program Files(x86)/Google/Chrome/Application/chrome.exe")){
+  if (file.exists("C:/Program Files/Google/Chrome/Application/chrome.exe")) {
+    browser.path <- "C:/Program Files/Google/Chrome/Application/chrome.exe"
+  } else if (
+    file.exists("C:/Program Files(x86)/Google/Chrome/Application/chrome.exe")
+  ) {
     browser.path <- "C:/Program Files(x86)/Google/Chrome/Application/chrome.exe"
-  } else if(file.exists("C:/Program Files/Mozilla Firefox/firefox.exe")){
+  } else if (file.exists("C:/Program Files/Mozilla Firefox/firefox.exe")) {
     browser.path <- "C:/Program Files/Mozilla Firefox/firefox.exe"
-   # } else if(file.exists("C:/Program Files/Internet Explorer/iexplore.exe")){
-   # browser.path <- "C:/Program Files/Internet Explorer/iexplore.exe"
+    # } else if(file.exists("C:/Program Files/Internet Explorer/iexplore.exe")){
+    # browser.path <- "C:/Program Files/Internet Explorer/iexplore.exe"
   }
-  
-  # portable chrome - to large to store on git 
+
+  # portable chrome - to large to store on git
   # browser.path = file.path(packageDir,"Apps/chrome/chrome.exe")
-  
-  if(is.null(browser.path)){
-    runApp(appDir = file.path(packageDir, "06-covariate-correlation-app.R"),
-           launch.browser = TRUE)  
+
+  if (is.null(browser.path)) {
+    runApp(
+      appDir = file.path(packageDir, "06-covariate-correlation-app.R"),
+      launch.browser = TRUE
+    )
   } else {
-    runApp(appDir = file.path(packageDir, "06-covariate-correlation-app.R"),
-         launch.browser = function(shinyurl) {
-           system(paste0("\"", browser.path, "\" --app=", shinyurl, " -incognito"), wait = F)
-          })
+    runApp(
+      appDir = file.path(packageDir, "06-covariate-correlation-app.R"),
+      launch.browser = function(shinyurl) {
+        system(
+          paste0("\"", browser.path, "\" --app=", shinyurl, " -incognito"),
+          wait = F
+        )
+      }
+    )
   }
-  
+
   # save image files
   covariateCorrelationSheet <- addRow(
-    covariateCorrelationSheet, 
-    data.frame(InitialMatrix = file.path(ssimTempDir, 
-                                         "InitialCovariateCorrelationMatrix.png"), 
-               SelectedMatrix = file.path(ssimTempDir, 
-                                          "SelectedCovariateCorrelationMatrix.png")))
-  saveDatasheet(myScenario, covariateCorrelationSheet, 
-                "wisdm_OutputCovariateCorrelationMatrix")
+    covariateCorrelationSheet,
+    data.frame(
+      InitialMatrix = file.path(
+        ssimTempDir,
+        "InitialCovariateCorrelationMatrix.png"
+      ),
+      SelectedMatrix = file.path(
+        ssimTempDir,
+        "SelectedCovariateCorrelationMatrix.png"
+      )
+    )
+  )
+  saveDatasheet(
+    myScenario,
+    covariateCorrelationSheet,
+    "wisdm_OutputCovariateCorrelationMatrix"
+  )
   progressBar()
-} # end if "Interactive" 
+} # end if "Interactive"
 
 # save reduced covariate list --------------------------------------------------
 
-selectedCovariates <- unique(c(as.character(retainedCovariatesSheet$CovariatesID), selectedCovariates))
+selectedCovariates <- unique(c(
+  as.character(retainedCovariatesSheet$CovariatesID),
+  selectedCovariates
+))
 
 retainedCovariatesSheet <- data.frame(CovariatesID = selectedCovariates)
 saveDatasheet(myScenario, retainedCovariatesSheet, "wisdm_RetainedCovariates")
 progressBar(type = "end")
-